aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2023-03-09 13:22:05 +0000
committerPeter Maydell <peter.maydell@linaro.org>2023-03-09 13:22:05 +0000
commit15002921e878e6cf485f655d580733b5319ea015 (patch)
tree404375b7acc31ee6b5583905fe817dc4f7799d48 /hw
parentba44caac0741482d2c1921f7f974c205d2d4222b (diff)
parent154eac37190c4d80d29b09c226abd899e397530f (diff)
downloadqemu-15002921e878e6cf485f655d580733b5319ea015.zip
qemu-15002921e878e6cf485f655d580733b5319ea015.tar.gz
qemu-15002921e878e6cf485f655d580733b5319ea015.tar.bz2
Merge tag 'xenfv-2' of git://git.infradead.org/users/dwmw2/qemu into staging
Enable PV backends with Xen/KVM emulation This is phase 2, following on from the basic platform support which was already merged. • Add a simple single-tenant internal XenStore implementation • Indirect Xen gnttab/evtchn/foreignmem/xenstore through operations table • Provide emulated back ends for Xen operations • Header cleanups to allow PV back ends to build without Xen itself • Enable PV back ends in emulated mode • Documentation update Tested-by: Paul Durrant <paul@xen.org> ... on real Xen (master branch, 4.18) with a Debian guest. # -----BEGIN PGP SIGNATURE----- # # iQJGBAABCgAwFiEEMUsIrNDeSBEzpfKGm+mA/QrAFUQFAmQHu3wSHGR3bXdAYW1h # em9uLmNvLnVrAAoJEJvpgP0KwBVE5LYP/0VodDsQdP7Z4L+/IzgBSgEec7qmyQFB # KlBZS/PmvCZKb0DHLI3GhXIyzD+/fnLtGSRl0rYObnKP7im+MpEDGmn97f6nIITk # AzkdsVhNEBQFXCkLgQ9y8kTrTmsod9O4sqn0+naa2TX4FPcRN0MaNmpuLEubvaRS # +JuyHmwy9ZeeAnsU31uJ0nx4F1hW9IDaatNoDeFcFnKCXQp36rtdZUViMowUJvwu # Q+Xyg6dybusznaoiXd485tTPrTt+FK/wEARse3q2gRh9QblLu0r5BFb0rOfhYCTQ # jw+5lBsOX+UlffmB9IDakRpVe4RKhvvRQSkRvYkPCshsqud9zMGhaquKg1vKBgca # I31XSN0LCcon/ahHGtmVAxyZUpWdEnfzO1TbTNpz9oacROklgVgEYdw5Vwca71VD # SURl6uCt9Jb9WmsR4twus4i4qDjQIDOtOF0hcxpl7HGktkxlGxUVI4qVLXARtVCS # OTB6N0LlhJ2woj2wYK5BRTiOj03T2MkJEWaYhDdIrQREKWe2Sn4xTOH5kGbQQnOr # km93odjBZFRHsAUnzXHXW3+yHjMefH7KrHePbmvsO4foGF77bBxosuC2ehFfvNJ0 # VM/H04NDtPYCBwdAr545PSN/q+WzEPQaquLZ0UuTBuPpMMOYd+Ff8YvQWJPyCM18 # 1mq9v6Xe9RQZ # =JGLX # -----END PGP SIGNATURE----- # gpg: Signature made Tue 07 Mar 2023 22:32:28 GMT # gpg: using RSA key 314B08ACD0DE481133A5F2869BE980FD0AC01544 # gpg: issuer "dwmw@amazon.co.uk" # gpg: Good signature from "David Woodhouse <dwmw@amazon.co.uk>" [unknown] # gpg: aka "David Woodhouse <dwmw@amazon.com>" [unknown] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: 314B 08AC D0DE 4811 33A5 F286 9BE9 80FD 0AC0 1544 * tag 'xenfv-2' of git://git.infradead.org/users/dwmw2/qemu: (27 commits) docs: Update Xen-on-KVM documentation for PV disk support MAINTAINERS: Add entry for Xen on KVM emulation i386/xen: Initialize Xen backends from pc_basic_device_init() for emulation hw/xen: Implement soft reset for emulated gnttab hw/xen: Map guest XENSTORE_PFN grant in emulated Xenstore hw/xen: Add emulated implementation of XenStore operations hw/xen: Add emulated implementation of grant table operations hw/xen: Hook up emulated implementation for event channel operations hw/xen: Only advertise ring-page-order for xen-block if gnttab supports it hw/xen: Avoid crash when backend watch fires too early hw/xen: Build PV backend drivers for CONFIG_XEN_BUS hw/xen: Rename xen_common.h to xen_native.h hw/xen: Use XEN_PAGE_SIZE in PV backend drivers hw/xen: Move xenstore_store_pv_console_info to xen_console.c hw/xen: Add xenstore operations to allow redirection to internal emulation hw/xen: Add foreignmem operations to allow redirection to internal emulation hw/xen: Pass grant ref to gnttab unmap operation hw/xen: Add gnttab operations to allow redirection to internal emulation hw/xen: Add evtchn operations to allow redirection to internal emulation hw/xen: Create initial XenStore nodes ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'hw')
-rw-r--r--hw/9pfs/meson.build2
-rw-r--r--hw/9pfs/xen-9p-backend.c32
-rw-r--r--hw/block/dataplane/meson.build2
-rw-r--r--hw/block/dataplane/xen-block.c12
-rw-r--r--hw/block/meson.build2
-rw-r--r--hw/block/xen-block.c12
-rw-r--r--hw/char/meson.build2
-rw-r--r--hw/char/xen_console.c57
-rw-r--r--hw/display/meson.build2
-rw-r--r--hw/display/xenfb.c32
-rw-r--r--hw/i386/kvm/meson.build1
-rw-r--r--hw/i386/kvm/trace-events15
-rw-r--r--hw/i386/kvm/xen_evtchn.c15
-rw-r--r--hw/i386/kvm/xen_gnttab.c325
-rw-r--r--hw/i386/kvm/xen_gnttab.h1
-rw-r--r--hw/i386/kvm/xen_xenstore.c1251
-rw-r--r--hw/i386/kvm/xenstore_impl.c1927
-rw-r--r--hw/i386/kvm/xenstore_impl.h63
-rw-r--r--hw/i386/pc.c7
-rw-r--r--hw/i386/pc_piix.c4
-rw-r--r--hw/i386/xen/xen-hvm.c38
-rw-r--r--hw/i386/xen/xen-mapcache.c2
-rw-r--r--hw/i386/xen/xen_platform.c7
-rw-r--r--hw/net/xen_nic.c25
-rw-r--r--hw/usb/meson.build2
-rw-r--r--hw/usb/xen-usb.c29
-rw-r--r--hw/xen/meson.build6
-rw-r--r--hw/xen/trace-events2
-rw-r--r--hw/xen/xen-bus-helper.c62
-rw-r--r--hw/xen/xen-bus.c411
-rw-r--r--hw/xen/xen-legacy-backend.c254
-rw-r--r--hw/xen/xen-operations.c478
-rw-r--r--hw/xen/xen_devconfig.c4
-rw-r--r--hw/xen/xen_pt.c2
-rw-r--r--hw/xen/xen_pt.h2
-rw-r--r--hw/xen/xen_pt_config_init.c2
-rw-r--r--hw/xen/xen_pt_graphics.c1
-rw-r--r--hw/xen/xen_pt_msi.c4
-rw-r--r--hw/xen/xen_pvdev.c63
39 files changed, 4426 insertions, 732 deletions
diff --git a/hw/9pfs/meson.build b/hw/9pfs/meson.build
index 12443b6..fd37b7a 100644
--- a/hw/9pfs/meson.build
+++ b/hw/9pfs/meson.build
@@ -15,7 +15,7 @@ fs_ss.add(files(
))
fs_ss.add(when: 'CONFIG_LINUX', if_true: files('9p-util-linux.c'))
fs_ss.add(when: 'CONFIG_DARWIN', if_true: files('9p-util-darwin.c'))
-fs_ss.add(when: 'CONFIG_XEN', if_true: files('xen-9p-backend.c'))
+fs_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-9p-backend.c'))
softmmu_ss.add_all(when: 'CONFIG_FSDEV_9P', if_true: fs_ss)
specific_ss.add(when: 'CONFIG_VIRTIO_9P', if_true: files('virtio-9p-device.c'))
diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c
index 65c4979..74f3a05 100644
--- a/hw/9pfs/xen-9p-backend.c
+++ b/hw/9pfs/xen-9p-backend.c
@@ -22,6 +22,7 @@
#include "qemu/config-file.h"
#include "qemu/main-loop.h"
#include "qemu/option.h"
+#include "qemu/iov.h"
#include "fsdev/qemu-fsdev.h"
#define VERSIONS "1"
@@ -241,7 +242,7 @@ static void xen_9pfs_push_and_notify(V9fsPDU *pdu)
xen_wmb();
ring->inprogress = false;
- xenevtchn_notify(ring->evtchndev, ring->local_port);
+ qemu_xen_evtchn_notify(ring->evtchndev, ring->local_port);
qemu_bh_schedule(ring->bh);
}
@@ -324,8 +325,8 @@ static void xen_9pfs_evtchn_event(void *opaque)
Xen9pfsRing *ring = opaque;
evtchn_port_t port;
- port = xenevtchn_pending(ring->evtchndev);
- xenevtchn_unmask(ring->evtchndev, port);
+ port = qemu_xen_evtchn_pending(ring->evtchndev);
+ qemu_xen_evtchn_unmask(ring->evtchndev, port);
qemu_bh_schedule(ring->bh);
}
@@ -337,10 +338,10 @@ static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev)
for (i = 0; i < xen_9pdev->num_rings; i++) {
if (xen_9pdev->rings[i].evtchndev != NULL) {
- qemu_set_fd_handler(xenevtchn_fd(xen_9pdev->rings[i].evtchndev),
- NULL, NULL, NULL);
- xenevtchn_unbind(xen_9pdev->rings[i].evtchndev,
- xen_9pdev->rings[i].local_port);
+ qemu_set_fd_handler(qemu_xen_evtchn_fd(xen_9pdev->rings[i].evtchndev),
+ NULL, NULL, NULL);
+ qemu_xen_evtchn_unbind(xen_9pdev->rings[i].evtchndev,
+ xen_9pdev->rings[i].local_port);
xen_9pdev->rings[i].evtchndev = NULL;
}
}
@@ -359,12 +360,13 @@ static int xen_9pfs_free(struct XenLegacyDevice *xendev)
if (xen_9pdev->rings[i].data != NULL) {
xen_be_unmap_grant_refs(&xen_9pdev->xendev,
xen_9pdev->rings[i].data,
+ xen_9pdev->rings[i].intf->ref,
(1 << xen_9pdev->rings[i].ring_order));
}
if (xen_9pdev->rings[i].intf != NULL) {
- xen_be_unmap_grant_refs(&xen_9pdev->xendev,
- xen_9pdev->rings[i].intf,
- 1);
+ xen_be_unmap_grant_ref(&xen_9pdev->xendev,
+ xen_9pdev->rings[i].intf,
+ xen_9pdev->rings[i].ref);
}
if (xen_9pdev->rings[i].bh != NULL) {
qemu_bh_delete(xen_9pdev->rings[i].bh);
@@ -447,12 +449,12 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev)
xen_9pdev->rings[i].inprogress = false;
- xen_9pdev->rings[i].evtchndev = xenevtchn_open(NULL, 0);
+ xen_9pdev->rings[i].evtchndev = qemu_xen_evtchn_open();
if (xen_9pdev->rings[i].evtchndev == NULL) {
goto out;
}
- qemu_set_cloexec(xenevtchn_fd(xen_9pdev->rings[i].evtchndev));
- xen_9pdev->rings[i].local_port = xenevtchn_bind_interdomain
+ qemu_set_cloexec(qemu_xen_evtchn_fd(xen_9pdev->rings[i].evtchndev));
+ xen_9pdev->rings[i].local_port = qemu_xen_evtchn_bind_interdomain
(xen_9pdev->rings[i].evtchndev,
xendev->dom,
xen_9pdev->rings[i].evtchn);
@@ -463,8 +465,8 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev)
goto out;
}
xen_pv_printf(xendev, 2, "bind evtchn port %d\n", xendev->local_port);
- qemu_set_fd_handler(xenevtchn_fd(xen_9pdev->rings[i].evtchndev),
- xen_9pfs_evtchn_event, NULL, &xen_9pdev->rings[i]);
+ qemu_set_fd_handler(qemu_xen_evtchn_fd(xen_9pdev->rings[i].evtchndev),
+ xen_9pfs_evtchn_event, NULL, &xen_9pdev->rings[i]);
}
xen_9pdev->security_model = xenstore_read_be_str(xendev, "security_model");
diff --git a/hw/block/dataplane/meson.build b/hw/block/dataplane/meson.build
index 12c6a26..78d7ac1 100644
--- a/hw/block/dataplane/meson.build
+++ b/hw/block/dataplane/meson.build
@@ -1,2 +1,2 @@
specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c'))
-specific_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c'))
+specific_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-block.c'))
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index 2785b9e..734da42 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -23,8 +23,9 @@
#include "qemu/main-loop.h"
#include "qemu/memalign.h"
#include "qapi/error.h"
-#include "hw/xen/xen_common.h"
+#include "hw/xen/xen.h"
#include "hw/block/xen_blkif.h"
+#include "hw/xen/interface/io/ring.h"
#include "sysemu/block-backend.h"
#include "sysemu/iothread.h"
#include "xen-block.h"
@@ -101,9 +102,9 @@ static XenBlockRequest *xen_block_start_request(XenBlockDataPlane *dataplane)
* re-use requests, allocate the memory once here. It will be freed
* xen_block_dataplane_destroy() when the request list is freed.
*/
- request->buf = qemu_memalign(XC_PAGE_SIZE,
+ request->buf = qemu_memalign(XEN_PAGE_SIZE,
BLKIF_MAX_SEGMENTS_PER_REQUEST *
- XC_PAGE_SIZE);
+ XEN_PAGE_SIZE);
dataplane->requests_total++;
qemu_iovec_init(&request->v, 1);
} else {
@@ -185,7 +186,7 @@ static int xen_block_parse_request(XenBlockRequest *request)
goto err;
}
if (request->req.seg[i].last_sect * dataplane->sector_size >=
- XC_PAGE_SIZE) {
+ XEN_PAGE_SIZE) {
error_report("error: page crossing");
goto err;
}
@@ -705,6 +706,7 @@ void xen_block_dataplane_stop(XenBlockDataPlane *dataplane)
Error *local_err = NULL;
xen_device_unmap_grant_refs(xendev, dataplane->sring,
+ dataplane->ring_ref,
dataplane->nr_ring_ref, &local_err);
dataplane->sring = NULL;
@@ -739,7 +741,7 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane,
dataplane->protocol = protocol;
- ring_size = XC_PAGE_SIZE * dataplane->nr_ring_ref;
+ ring_size = XEN_PAGE_SIZE * dataplane->nr_ring_ref;
switch (dataplane->protocol) {
case BLKIF_PROTOCOL_NATIVE:
{
diff --git a/hw/block/meson.build b/hw/block/meson.build
index b434d56..cc2a75c 100644
--- a/hw/block/meson.build
+++ b/hw/block/meson.build
@@ -14,7 +14,7 @@ softmmu_ss.add(when: 'CONFIG_PFLASH_CFI02', if_true: files('pflash_cfi02.c'))
softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80.c'))
softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80_sfdp.c'))
softmmu_ss.add(when: 'CONFIG_SWIM', if_true: files('swim.c'))
-softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c'))
+softmmu_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-block.c'))
softmmu_ss.add(when: 'CONFIG_TC58128', if_true: files('tc58128.c'))
specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c', 'virtio-blk-common.c'))
diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
index 345b284..f5a7445 100644
--- a/hw/block/xen-block.c
+++ b/hw/block/xen-block.c
@@ -19,7 +19,6 @@
#include "qapi/qmp/qdict.h"
#include "qapi/qmp/qstring.h"
#include "qom/object_interfaces.h"
-#include "hw/xen/xen_common.h"
#include "hw/block/xen_blkif.h"
#include "hw/qdev-properties.h"
#include "hw/xen/xen-block.h"
@@ -84,7 +83,8 @@ static void xen_block_connect(XenDevice *xendev, Error **errp)
g_free(ring_ref);
return;
}
- } else if (order <= blockdev->props.max_ring_page_order) {
+ } else if (qemu_xen_gnttab_can_map_multi() &&
+ order <= blockdev->props.max_ring_page_order) {
unsigned int i;
nr_ring_ref = 1 << order;
@@ -256,8 +256,12 @@ static void xen_block_realize(XenDevice *xendev, Error **errp)
}
xen_device_backend_printf(xendev, "feature-flush-cache", "%u", 1);
- xen_device_backend_printf(xendev, "max-ring-page-order", "%u",
- blockdev->props.max_ring_page_order);
+
+ if (qemu_xen_gnttab_can_map_multi()) {
+ xen_device_backend_printf(xendev, "max-ring-page-order", "%u",
+ blockdev->props.max_ring_page_order);
+ }
+
xen_device_backend_printf(xendev, "info", "%u", blockdev->info);
xen_device_frontend_printf(xendev, "virtual-device", "%lu",
diff --git a/hw/char/meson.build b/hw/char/meson.build
index 7b594f5..e02c60d 100644
--- a/hw/char/meson.build
+++ b/hw/char/meson.build
@@ -18,7 +18,7 @@ softmmu_ss.add(when: 'CONFIG_SERIAL_PCI', if_true: files('serial-pci.c'))
softmmu_ss.add(when: 'CONFIG_SERIAL_PCI_MULTI', if_true: files('serial-pci-multi.c'))
softmmu_ss.add(when: 'CONFIG_SHAKTI_UART', if_true: files('shakti_uart.c'))
softmmu_ss.add(when: 'CONFIG_VIRTIO_SERIAL', if_true: files('virtio-console.c'))
-softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen_console.c'))
+softmmu_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen_console.c'))
softmmu_ss.add(when: 'CONFIG_XILINX', if_true: files('xilinx_uartlite.c'))
softmmu_ss.add(when: 'CONFIG_AVR_USART', if_true: files('avr_usart.c'))
diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c
index 63153df..c7a19c0 100644
--- a/hw/char/xen_console.c
+++ b/hw/char/xen_console.c
@@ -173,6 +173,48 @@ static void xencons_send(struct XenConsole *con)
/* -------------------------------------------------------------------- */
+static int store_con_info(struct XenConsole *con)
+{
+ Chardev *cs = qemu_chr_fe_get_driver(&con->chr);
+ char *pts = NULL;
+ char *dom_path;
+ GString *path;
+ int ret = -1;
+
+ /* Only continue if we're talking to a pty. */
+ if (!CHARDEV_IS_PTY(cs)) {
+ return 0;
+ }
+ pts = cs->filename + 4;
+
+ dom_path = qemu_xen_xs_get_domain_path(xenstore, xen_domid);
+ if (!dom_path) {
+ return 0;
+ }
+
+ path = g_string_new(dom_path);
+ free(dom_path);
+
+ if (con->xendev.dev) {
+ g_string_append_printf(path, "/device/console/%d", con->xendev.dev);
+ } else {
+ g_string_append(path, "/console");
+ }
+ g_string_append(path, "/tty");
+
+ if (xenstore_write_str(con->console, path->str, pts)) {
+ fprintf(stderr, "xenstore_write_str for '%s' fail", path->str);
+ goto out;
+ }
+ ret = 0;
+
+out:
+ g_string_free(path, true);
+ free(path);
+
+ return ret;
+}
+
static int con_init(struct XenLegacyDevice *xendev)
{
struct XenConsole *con = container_of(xendev, struct XenConsole, xendev);
@@ -181,7 +223,7 @@ static int con_init(struct XenLegacyDevice *xendev)
const char *output;
/* setup */
- dom = xs_get_domain_path(xenstore, con->xendev.dom);
+ dom = qemu_xen_xs_get_domain_path(xenstore, con->xendev.dom);
if (!xendev->dev) {
snprintf(con->console, sizeof(con->console), "%s/console", dom);
} else {
@@ -215,8 +257,7 @@ static int con_init(struct XenLegacyDevice *xendev)
&error_abort);
}
- xenstore_store_pv_console_info(con->xendev.dev,
- qemu_chr_fe_get_driver(&con->chr));
+ store_con_info(con);
out:
g_free(type);
@@ -237,9 +278,9 @@ static int con_initialise(struct XenLegacyDevice *xendev)
if (!xendev->dev) {
xen_pfn_t mfn = con->ring_ref;
- con->sring = xenforeignmemory_map(xen_fmem, con->xendev.dom,
- PROT_READ | PROT_WRITE,
- 1, &mfn, NULL);
+ con->sring = qemu_xen_foreignmem_map(con->xendev.dom, NULL,
+ PROT_READ | PROT_WRITE,
+ 1, &mfn, NULL);
} else {
con->sring = xen_be_map_grant_ref(xendev, con->ring_ref,
PROT_READ | PROT_WRITE);
@@ -269,9 +310,9 @@ static void con_disconnect(struct XenLegacyDevice *xendev)
if (con->sring) {
if (!xendev->dev) {
- xenforeignmemory_unmap(xen_fmem, con->sring, 1);
+ qemu_xen_foreignmem_unmap(con->sring, 1);
} else {
- xen_be_unmap_grant_ref(xendev, con->sring);
+ xen_be_unmap_grant_ref(xendev, con->sring, con->ring_ref);
}
con->sring = NULL;
}
diff --git a/hw/display/meson.build b/hw/display/meson.build
index f470179..4191694 100644
--- a/hw/display/meson.build
+++ b/hw/display/meson.build
@@ -14,7 +14,7 @@ softmmu_ss.add(when: 'CONFIG_PL110', if_true: files('pl110.c'))
softmmu_ss.add(when: 'CONFIG_SII9022', if_true: files('sii9022.c'))
softmmu_ss.add(when: 'CONFIG_SSD0303', if_true: files('ssd0303.c'))
softmmu_ss.add(when: 'CONFIG_SSD0323', if_true: files('ssd0323.c'))
-softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xenfb.c'))
+softmmu_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xenfb.c'))
softmmu_ss.add(when: 'CONFIG_VGA_PCI', if_true: files('vga-pci.c'))
softmmu_ss.add(when: 'CONFIG_VGA_ISA', if_true: files('vga-isa.c'))
diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c
index 260eb38..0074a9b 100644
--- a/hw/display/xenfb.c
+++ b/hw/display/xenfb.c
@@ -98,8 +98,9 @@ static int common_bind(struct common *c)
if (xenstore_read_fe_int(&c->xendev, "event-channel", &c->xendev.remote_port) == -1)
return -1;
- c->page = xenforeignmemory_map(xen_fmem, c->xendev.dom,
- PROT_READ | PROT_WRITE, 1, &mfn, NULL);
+ c->page = qemu_xen_foreignmem_map(c->xendev.dom, NULL,
+ PROT_READ | PROT_WRITE, 1, &mfn,
+ NULL);
if (c->page == NULL)
return -1;
@@ -115,7 +116,7 @@ static void common_unbind(struct common *c)
{
xen_pv_unbind_evtchn(&c->xendev);
if (c->page) {
- xenforeignmemory_unmap(xen_fmem, c->page, 1);
+ qemu_xen_foreignmem_unmap(c->page, 1);
c->page = NULL;
}
}
@@ -488,27 +489,28 @@ static int xenfb_map_fb(struct XenFB *xenfb)
}
if (xenfb->pixels) {
- munmap(xenfb->pixels, xenfb->fbpages * XC_PAGE_SIZE);
+ munmap(xenfb->pixels, xenfb->fbpages * XEN_PAGE_SIZE);
xenfb->pixels = NULL;
}
- xenfb->fbpages = DIV_ROUND_UP(xenfb->fb_len, XC_PAGE_SIZE);
+ xenfb->fbpages = DIV_ROUND_UP(xenfb->fb_len, XEN_PAGE_SIZE);
n_fbdirs = xenfb->fbpages * mode / 8;
- n_fbdirs = DIV_ROUND_UP(n_fbdirs, XC_PAGE_SIZE);
+ n_fbdirs = DIV_ROUND_UP(n_fbdirs, XEN_PAGE_SIZE);
pgmfns = g_new0(xen_pfn_t, n_fbdirs);
fbmfns = g_new0(xen_pfn_t, xenfb->fbpages);
xenfb_copy_mfns(mode, n_fbdirs, pgmfns, pd);
- map = xenforeignmemory_map(xen_fmem, xenfb->c.xendev.dom,
- PROT_READ, n_fbdirs, pgmfns, NULL);
+ map = qemu_xen_foreignmem_map(xenfb->c.xendev.dom, NULL, PROT_READ,
+ n_fbdirs, pgmfns, NULL);
if (map == NULL)
goto out;
xenfb_copy_mfns(mode, xenfb->fbpages, fbmfns, map);
- xenforeignmemory_unmap(xen_fmem, map, n_fbdirs);
+ qemu_xen_foreignmem_unmap(map, n_fbdirs);
- xenfb->pixels = xenforeignmemory_map(xen_fmem, xenfb->c.xendev.dom,
- PROT_READ, xenfb->fbpages, fbmfns, NULL);
+ xenfb->pixels = qemu_xen_foreignmem_map(xenfb->c.xendev.dom, NULL,
+ PROT_READ, xenfb->fbpages,
+ fbmfns, NULL);
if (xenfb->pixels == NULL)
goto out;
@@ -526,8 +528,8 @@ static int xenfb_configure_fb(struct XenFB *xenfb, size_t fb_len_lim,
{
size_t mfn_sz = sizeof_field(struct xenfb_page, pd[0]);
size_t pd_len = sizeof_field(struct xenfb_page, pd) / mfn_sz;
- size_t fb_pages = pd_len * XC_PAGE_SIZE / mfn_sz;
- size_t fb_len_max = fb_pages * XC_PAGE_SIZE;
+ size_t fb_pages = pd_len * XEN_PAGE_SIZE / mfn_sz;
+ size_t fb_len_max = fb_pages * XEN_PAGE_SIZE;
int max_width, max_height;
if (fb_len_lim > fb_len_max) {
@@ -927,8 +929,8 @@ static void fb_disconnect(struct XenLegacyDevice *xendev)
* Replacing the framebuffer with anonymous shared memory
* instead. This releases the guest pages and keeps qemu happy.
*/
- xenforeignmemory_unmap(xen_fmem, fb->pixels, fb->fbpages);
- fb->pixels = mmap(fb->pixels, fb->fbpages * XC_PAGE_SIZE,
+ qemu_xen_foreignmem_unmap(fb->pixels, fb->fbpages);
+ fb->pixels = mmap(fb->pixels, fb->fbpages * XEN_PAGE_SIZE,
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON,
-1, 0);
if (fb->pixels == MAP_FAILED) {
diff --git a/hw/i386/kvm/meson.build b/hw/i386/kvm/meson.build
index 82dd6ae..6621ba5 100644
--- a/hw/i386/kvm/meson.build
+++ b/hw/i386/kvm/meson.build
@@ -9,6 +9,7 @@ i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files(
'xen_evtchn.c',
'xen_gnttab.c',
'xen_xenstore.c',
+ 'xenstore_impl.c',
))
i386_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss)
diff --git a/hw/i386/kvm/trace-events b/hw/i386/kvm/trace-events
index b83c3eb..e4c82de 100644
--- a/hw/i386/kvm/trace-events
+++ b/hw/i386/kvm/trace-events
@@ -3,3 +3,18 @@ kvm_xen_unmap_pirq(int pirq, int gsi) "pirq %d gsi %d"
kvm_xen_get_free_pirq(int pirq, int type) "pirq %d type %d"
kvm_xen_bind_pirq(int pirq, int port) "pirq %d port %d"
kvm_xen_unmask_pirq(int pirq, char *dev, int vector) "pirq %d dev %s vector %d"
+xenstore_error(unsigned int id, unsigned int tx_id, const char *err) "req %u tx %u err %s"
+xenstore_read(unsigned int tx_id, const char *path) "tx %u path %s"
+xenstore_write(unsigned int tx_id, const char *path) "tx %u path %s"
+xenstore_mkdir(unsigned int tx_id, const char *path) "tx %u path %s"
+xenstore_directory(unsigned int tx_id, const char *path) "tx %u path %s"
+xenstore_directory_part(unsigned int tx_id, const char *path, unsigned int offset) "tx %u path %s offset %u"
+xenstore_transaction_start(unsigned int new_tx) "new_tx %u"
+xenstore_transaction_end(unsigned int tx_id, bool commit) "tx %u commit %d"
+xenstore_rm(unsigned int tx_id, const char *path) "tx %u path %s"
+xenstore_get_perms(unsigned int tx_id, const char *path) "tx %u path %s"
+xenstore_set_perms(unsigned int tx_id, const char *path) "tx %u path %s"
+xenstore_watch(const char *path, const char *token) "path %s token %s"
+xenstore_unwatch(const char *path, const char *token) "path %s token %s"
+xenstore_reset_watches(void) ""
+xenstore_watch_event(const char *path, const char *token) "path %s token %s"
diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c
index 886fbf6..98a7b85 100644
--- a/hw/i386/kvm/xen_evtchn.c
+++ b/hw/i386/kvm/xen_evtchn.c
@@ -34,6 +34,7 @@
#include "hw/pci/msi.h"
#include "hw/pci/msix.h"
#include "hw/irq.h"
+#include "hw/xen/xen_backend_ops.h"
#include "xen_evtchn.h"
#include "xen_overlay.h"
@@ -278,6 +279,17 @@ static const TypeInfo xen_evtchn_info = {
.class_init = xen_evtchn_class_init,
};
+static struct evtchn_backend_ops emu_evtchn_backend_ops = {
+ .open = xen_be_evtchn_open,
+ .bind_interdomain = xen_be_evtchn_bind_interdomain,
+ .unbind = xen_be_evtchn_unbind,
+ .close = xen_be_evtchn_close,
+ .get_fd = xen_be_evtchn_fd,
+ .notify = xen_be_evtchn_notify,
+ .unmask = xen_be_evtchn_unmask,
+ .pending = xen_be_evtchn_pending,
+};
+
static void gsi_assert_bh(void *opaque)
{
struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
@@ -318,6 +330,9 @@ void xen_evtchn_create(void)
s->nr_pirq_inuse_words = DIV_ROUND_UP(s->nr_pirqs, 64);
s->pirq_inuse_bitmap = g_new0(uint64_t, s->nr_pirq_inuse_words);
s->pirq = g_new0(struct pirq_info, s->nr_pirqs);
+
+ /* Set event channel functions for backend drivers to use */
+ xen_evtchn_ops = &emu_evtchn_backend_ops;
}
void xen_evtchn_connect_gsis(qemu_irq *system_gsis)
diff --git a/hw/i386/kvm/xen_gnttab.c b/hw/i386/kvm/xen_gnttab.c
index 1e691de..21c30e3 100644
--- a/hw/i386/kvm/xen_gnttab.c
+++ b/hw/i386/kvm/xen_gnttab.c
@@ -22,6 +22,7 @@
#include "hw/sysbus.h"
#include "hw/xen/xen.h"
+#include "hw/xen/xen_backend_ops.h"
#include "xen_overlay.h"
#include "xen_gnttab.h"
@@ -34,11 +35,10 @@
#define TYPE_XEN_GNTTAB "xen-gnttab"
OBJECT_DECLARE_SIMPLE_TYPE(XenGnttabState, XEN_GNTTAB)
-#define XEN_PAGE_SHIFT 12
-#define XEN_PAGE_SIZE (1ULL << XEN_PAGE_SHIFT)
-
#define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
+static struct gnttab_backend_ops emu_gnttab_backend_ops;
+
struct XenGnttabState {
/*< private >*/
SysBusDevice busdev;
@@ -57,6 +57,8 @@ struct XenGnttabState {
MemoryRegion gnt_frames;
MemoryRegion *gnt_aliases;
uint64_t *gnt_frame_gpas;
+
+ uint8_t *map_track;
};
struct XenGnttabState *xen_gnttab_singleton;
@@ -70,13 +72,11 @@ static void xen_gnttab_realize(DeviceState *dev, Error **errp)
error_setg(errp, "Xen grant table support is for Xen emulation");
return;
}
- s->nr_frames = 0;
s->max_frames = kvm_xen_get_gnttab_max_frames();
memory_region_init_ram(&s->gnt_frames, OBJECT(dev), "xen:grant_table",
XEN_PAGE_SIZE * s->max_frames, &error_abort);
memory_region_set_enabled(&s->gnt_frames, true);
s->entries.v1 = memory_region_get_ram_ptr(&s->gnt_frames);
- memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames);
/* Create individual page-sizes aliases for overlays */
s->gnt_aliases = (void *)g_new0(MemoryRegion, s->max_frames);
@@ -88,9 +88,18 @@ static void xen_gnttab_realize(DeviceState *dev, Error **errp)
s->gnt_frame_gpas[i] = INVALID_GPA;
}
+ s->nr_frames = 0;
+ memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames);
+ s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access;
+ s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE);
+
qemu_mutex_init(&s->gnt_lock);
xen_gnttab_singleton = s;
+
+ s->map_track = g_new0(uint8_t, s->max_frames * ENTRIES_PER_FRAME_V1);
+
+ xen_gnttab_ops = &emu_gnttab_backend_ops;
}
static int xen_gnttab_post_load(void *opaque, int version_id)
@@ -230,3 +239,309 @@ int xen_gnttab_query_size_op(struct gnttab_query_size *size)
size->max_nr_frames = s->max_frames;
return 0;
}
+
+/* Track per-open refs, to allow close() to clean up. */
+struct active_ref {
+ MemoryRegionSection mrs;
+ void *virtaddr;
+ uint32_t refcnt;
+ int prot;
+};
+
+static void gnt_unref(XenGnttabState *s, grant_ref_t ref,
+ MemoryRegionSection *mrs, int prot)
+{
+ if (mrs && mrs->mr) {
+ if (prot & PROT_WRITE) {
+ memory_region_set_dirty(mrs->mr, mrs->offset_within_region,
+ XEN_PAGE_SIZE);
+ }
+ memory_region_unref(mrs->mr);
+ mrs->mr = NULL;
+ }
+ assert(s->map_track[ref] != 0);
+
+ if (--s->map_track[ref] == 0) {
+ grant_entry_v1_t *gnt_p = &s->entries.v1[ref];
+ qatomic_and(&gnt_p->flags, (uint16_t)~(GTF_reading | GTF_writing));
+ }
+}
+
+static uint64_t gnt_ref(XenGnttabState *s, grant_ref_t ref, int prot)
+{
+ uint16_t mask = GTF_type_mask | GTF_sub_page;
+ grant_entry_v1_t gnt, *gnt_p;
+ int retries = 0;
+
+ if (ref >= s->max_frames * ENTRIES_PER_FRAME_V1 ||
+ s->map_track[ref] == UINT8_MAX) {
+ return INVALID_GPA;
+ }
+
+ if (prot & PROT_WRITE) {
+ mask |= GTF_readonly;
+ }
+
+ gnt_p = &s->entries.v1[ref];
+
+ /*
+ * The guest can legitimately be changing the GTF_readonly flag. Allow
+ * that, but don't let a malicious guest cause a livelock.
+ */
+ for (retries = 0; retries < 5; retries++) {
+ uint16_t new_flags;
+
+ /* Read the entry before an atomic operation on its flags */
+ gnt = *(volatile grant_entry_v1_t *)gnt_p;
+
+ if ((gnt.flags & mask) != GTF_permit_access ||
+ gnt.domid != DOMID_QEMU) {
+ return INVALID_GPA;
+ }
+
+ new_flags = gnt.flags | GTF_reading;
+ if (prot & PROT_WRITE) {
+ new_flags |= GTF_writing;
+ }
+
+ if (qatomic_cmpxchg(&gnt_p->flags, gnt.flags, new_flags) == gnt.flags) {
+ return (uint64_t)gnt.frame << XEN_PAGE_SHIFT;
+ }
+ }
+
+ return INVALID_GPA;
+}
+
+struct xengntdev_handle {
+ GHashTable *active_maps;
+};
+
+static int xen_be_gnttab_set_max_grants(struct xengntdev_handle *xgt,
+ uint32_t nr_grants)
+{
+ return 0;
+}
+
+static void *xen_be_gnttab_map_refs(struct xengntdev_handle *xgt,
+ uint32_t count, uint32_t domid,
+ uint32_t *refs, int prot)
+{
+ XenGnttabState *s = xen_gnttab_singleton;
+ struct active_ref *act;
+
+ if (!s) {
+ errno = ENOTSUP;
+ return NULL;
+ }
+
+ if (domid != xen_domid) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ if (!count || count > 4096) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ /*
+ * Making a contiguous mapping from potentially discontiguous grant
+ * references would be... distinctly non-trivial. We don't support it.
+ * Even changing the API to return an array of pointers, one per page,
+ * wouldn't be simple to use in PV backends because some structures
+ * actually cross page boundaries (e.g. 32-bit blkif_response ring
+ * entries are 12 bytes).
+ */
+ if (count != 1) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ QEMU_LOCK_GUARD(&s->gnt_lock);
+
+ act = g_hash_table_lookup(xgt->active_maps, GINT_TO_POINTER(refs[0]));
+ if (act) {
+ if ((prot & PROT_WRITE) && !(act->prot & PROT_WRITE)) {
+ if (gnt_ref(s, refs[0], prot) == INVALID_GPA) {
+ return NULL;
+ }
+ act->prot |= PROT_WRITE;
+ }
+ act->refcnt++;
+ } else {
+ uint64_t gpa = gnt_ref(s, refs[0], prot);
+ if (gpa == INVALID_GPA) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ act = g_new0(struct active_ref, 1);
+ act->prot = prot;
+ act->refcnt = 1;
+ act->mrs = memory_region_find(get_system_memory(), gpa, XEN_PAGE_SIZE);
+
+ if (act->mrs.mr &&
+ !int128_lt(act->mrs.size, int128_make64(XEN_PAGE_SIZE)) &&
+ memory_region_get_ram_addr(act->mrs.mr) != RAM_ADDR_INVALID) {
+ act->virtaddr = qemu_map_ram_ptr(act->mrs.mr->ram_block,
+ act->mrs.offset_within_region);
+ }
+ if (!act->virtaddr) {
+ gnt_unref(s, refs[0], &act->mrs, 0);
+ g_free(act);
+ errno = EINVAL;
+ return NULL;
+ }
+
+ s->map_track[refs[0]]++;
+ g_hash_table_insert(xgt->active_maps, GINT_TO_POINTER(refs[0]), act);
+ }
+
+ return act->virtaddr;
+}
+
+static gboolean do_unmap(gpointer key, gpointer value, gpointer user_data)
+{
+ XenGnttabState *s = user_data;
+ grant_ref_t gref = GPOINTER_TO_INT(key);
+ struct active_ref *act = value;
+
+ gnt_unref(s, gref, &act->mrs, act->prot);
+ g_free(act);
+ return true;
+}
+
+static int xen_be_gnttab_unmap(struct xengntdev_handle *xgt,
+ void *start_address, uint32_t *refs,
+ uint32_t count)
+{
+ XenGnttabState *s = xen_gnttab_singleton;
+ struct active_ref *act;
+
+ if (!s) {
+ return -ENOTSUP;
+ }
+
+ if (count != 1) {
+ return -EINVAL;
+ }
+
+ QEMU_LOCK_GUARD(&s->gnt_lock);
+
+ act = g_hash_table_lookup(xgt->active_maps, GINT_TO_POINTER(refs[0]));
+ if (!act) {
+ return -ENOENT;
+ }
+
+ if (act->virtaddr != start_address) {
+ return -EINVAL;
+ }
+
+ if (!--act->refcnt) {
+ do_unmap(GINT_TO_POINTER(refs[0]), act, s);
+ g_hash_table_remove(xgt->active_maps, GINT_TO_POINTER(refs[0]));
+ }
+
+ return 0;
+}
+
+/*
+ * This looks a bit like the one for true Xen in xen-operations.c but
+ * in emulation we don't support multi-page mappings. And under Xen we
+ * *want* the multi-page mappings so we have fewer bounces through the
+ * kernel and the hypervisor. So the code paths end up being similar,
+ * but different.
+ */
+static int xen_be_gnttab_copy(struct xengntdev_handle *xgt, bool to_domain,
+ uint32_t domid, XenGrantCopySegment *segs,
+ uint32_t nr_segs, Error **errp)
+{
+ int prot = to_domain ? PROT_WRITE : PROT_READ;
+ unsigned int i;
+
+ for (i = 0; i < nr_segs; i++) {
+ XenGrantCopySegment *seg = &segs[i];
+ void *page;
+ uint32_t ref = to_domain ? seg->dest.foreign.ref :
+ seg->source.foreign.ref;
+
+ page = xen_be_gnttab_map_refs(xgt, 1, domid, &ref, prot);
+ if (!page) {
+ if (errp) {
+ error_setg_errno(errp, errno,
+ "xen_be_gnttab_map_refs failed");
+ }
+ return -errno;
+ }
+
+ if (to_domain) {
+ memcpy(page + seg->dest.foreign.offset, seg->source.virt,
+ seg->len);
+ } else {
+ memcpy(seg->dest.virt, page + seg->source.foreign.offset,
+ seg->len);
+ }
+
+ if (xen_be_gnttab_unmap(xgt, page, &ref, 1)) {
+ if (errp) {
+ error_setg_errno(errp, errno, "xen_be_gnttab_unmap failed");
+ }
+ return -errno;
+ }
+ }
+
+ return 0;
+}
+
+static struct xengntdev_handle *xen_be_gnttab_open(void)
+{
+ struct xengntdev_handle *xgt = g_new0(struct xengntdev_handle, 1);
+
+ xgt->active_maps = g_hash_table_new(g_direct_hash, g_direct_equal);
+ return xgt;
+}
+
+static int xen_be_gnttab_close(struct xengntdev_handle *xgt)
+{
+ XenGnttabState *s = xen_gnttab_singleton;
+
+ if (!s) {
+ return -ENOTSUP;
+ }
+
+ g_hash_table_foreach_remove(xgt->active_maps, do_unmap, s);
+ g_hash_table_destroy(xgt->active_maps);
+ g_free(xgt);
+ return 0;
+}
+
+static struct gnttab_backend_ops emu_gnttab_backend_ops = {
+ .open = xen_be_gnttab_open,
+ .close = xen_be_gnttab_close,
+ .grant_copy = xen_be_gnttab_copy,
+ .set_max_grants = xen_be_gnttab_set_max_grants,
+ .map_refs = xen_be_gnttab_map_refs,
+ .unmap = xen_be_gnttab_unmap,
+};
+
+int xen_gnttab_reset(void)
+{
+ XenGnttabState *s = xen_gnttab_singleton;
+
+ if (!s) {
+ return -ENOTSUP;
+ }
+
+ QEMU_LOCK_GUARD(&s->gnt_lock);
+
+ s->nr_frames = 0;
+
+ memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames);
+
+ s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access;
+ s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE);
+
+ memset(s->map_track, 0, s->max_frames * ENTRIES_PER_FRAME_V1);
+
+ return 0;
+}
diff --git a/hw/i386/kvm/xen_gnttab.h b/hw/i386/kvm/xen_gnttab.h
index 3bdbe96..ee21523 100644
--- a/hw/i386/kvm/xen_gnttab.h
+++ b/hw/i386/kvm/xen_gnttab.h
@@ -13,6 +13,7 @@
#define QEMU_XEN_GNTTAB_H
void xen_gnttab_create(void);
+int xen_gnttab_reset(void);
int xen_gnttab_map_page(uint64_t idx, uint64_t gfn);
struct gnttab_set_version;
diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c
index 14193ef..2cadafd 100644
--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -21,6 +21,7 @@
#include "hw/sysbus.h"
#include "hw/xen/xen.h"
+#include "hw/xen/xen_backend_ops.h"
#include "xen_overlay.h"
#include "xen_evtchn.h"
#include "xen_xenstore.h"
@@ -28,15 +29,17 @@
#include "sysemu/kvm.h"
#include "sysemu/kvm_xen.h"
+#include "trace.h"
+
+#include "xenstore_impl.h"
+
#include "hw/xen/interface/io/xs_wire.h"
#include "hw/xen/interface/event_channel.h"
+#include "hw/xen/interface/grant_table.h"
#define TYPE_XEN_XENSTORE "xen-xenstore"
OBJECT_DECLARE_SIMPLE_TYPE(XenXenstoreState, XEN_XENSTORE)
-#define XEN_PAGE_SHIFT 12
-#define XEN_PAGE_SIZE (1ULL << XEN_PAGE_SHIFT)
-
#define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
#define ENTRIES_PER_FRAME_V2 (XEN_PAGE_SIZE / sizeof(grant_entry_v2_t))
@@ -47,6 +50,9 @@ struct XenXenstoreState {
SysBusDevice busdev;
/*< public >*/
+ XenstoreImplState *impl;
+ GList *watch_events; /* for the guest */
+
MemoryRegion xenstore_page;
struct xenstore_domain_interface *xs;
uint8_t req_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX];
@@ -59,15 +65,54 @@ struct XenXenstoreState {
evtchn_port_t guest_port;
evtchn_port_t be_port;
struct xenevtchn_handle *eh;
+
+ uint8_t *impl_state;
+ uint32_t impl_state_size;
+
+ struct xengntdev_handle *gt;
+ void *granted_xs;
};
struct XenXenstoreState *xen_xenstore_singleton;
static void xen_xenstore_event(void *opaque);
+static void fire_watch_cb(void *opaque, const char *path, const char *token);
+
+static struct xenstore_backend_ops emu_xenstore_backend_ops;
+
+static void G_GNUC_PRINTF (4, 5) relpath_printf(XenXenstoreState *s,
+ GList *perms,
+ const char *relpath,
+ const char *fmt, ...)
+{
+ gchar *abspath;
+ gchar *value;
+ va_list args;
+ GByteArray *data;
+ int err;
+
+ abspath = g_strdup_printf("/local/domain/%u/%s", xen_domid, relpath);
+ va_start(args, fmt);
+ value = g_strdup_vprintf(fmt, args);
+ va_end(args);
+
+ data = g_byte_array_new_take((void *)value, strlen(value));
+
+ err = xs_impl_write(s->impl, DOMID_QEMU, XBT_NULL, abspath, data);
+ assert(!err);
+
+ g_byte_array_unref(data);
+
+ err = xs_impl_set_perms(s->impl, DOMID_QEMU, XBT_NULL, abspath, perms);
+ assert(!err);
+
+ g_free(abspath);
+}
static void xen_xenstore_realize(DeviceState *dev, Error **errp)
{
XenXenstoreState *s = XEN_XENSTORE(dev);
+ GList *perms;
if (xen_mode != XEN_EMULATE) {
error_setg(errp, "Xen xenstore support is for Xen emulation");
@@ -89,6 +134,50 @@ static void xen_xenstore_realize(DeviceState *dev, Error **errp)
}
aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), true,
xen_xenstore_event, NULL, NULL, NULL, s);
+
+ s->impl = xs_impl_create(xen_domid);
+
+ /* Populate the default nodes */
+
+ /* Nodes owned by 'dom0' but readable by the guest */
+ perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, DOMID_QEMU));
+ perms = g_list_append(perms, xs_perm_as_string(XS_PERM_READ, xen_domid));
+
+ relpath_printf(s, perms, "", "%s", "");
+
+ relpath_printf(s, perms, "domid", "%u", xen_domid);
+
+ relpath_printf(s, perms, "control/platform-feature-xs_reset_watches", "%u", 1);
+ relpath_printf(s, perms, "control/platform-feature-multiprocessor-suspend", "%u", 1);
+
+ relpath_printf(s, perms, "platform/acpi", "%u", 1);
+ relpath_printf(s, perms, "platform/acpi_s3", "%u", 1);
+ relpath_printf(s, perms, "platform/acpi_s4", "%u", 1);
+ relpath_printf(s, perms, "platform/acpi_laptop_slate", "%u", 0);
+
+ g_list_free_full(perms, g_free);
+
+ /* Nodes owned by the guest */
+ perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, xen_domid));
+
+ relpath_printf(s, perms, "attr", "%s", "");
+
+ relpath_printf(s, perms, "control/shutdown", "%s", "");
+ relpath_printf(s, perms, "control/feature-poweroff", "%u", 1);
+ relpath_printf(s, perms, "control/feature-reboot", "%u", 1);
+ relpath_printf(s, perms, "control/feature-suspend", "%u", 1);
+ relpath_printf(s, perms, "control/feature-s3", "%u", 1);
+ relpath_printf(s, perms, "control/feature-s4", "%u", 1);
+
+ relpath_printf(s, perms, "data", "%s", "");
+ relpath_printf(s, perms, "device", "%s", "");
+ relpath_printf(s, perms, "drivers", "%s", "");
+ relpath_printf(s, perms, "error", "%s", "");
+ relpath_printf(s, perms, "feature", "%s", "");
+
+ g_list_free_full(perms, g_free);
+
+ xen_xenstore_ops = &emu_xenstore_backend_ops;
}
static bool xen_xenstore_is_needed(void *opaque)
@@ -99,16 +188,26 @@ static bool xen_xenstore_is_needed(void *opaque)
static int xen_xenstore_pre_save(void *opaque)
{
XenXenstoreState *s = opaque;
+ GByteArray *save;
if (s->eh) {
s->guest_port = xen_be_evtchn_get_guest_port(s->eh);
}
+
+ g_free(s->impl_state);
+ save = xs_impl_serialize(s->impl);
+ s->impl_state = save->data;
+ s->impl_state_size = save->len;
+ g_byte_array_free(save, false);
+
return 0;
}
static int xen_xenstore_post_load(void *opaque, int ver)
{
XenXenstoreState *s = opaque;
+ GByteArray *save;
+ int ret;
/*
* As qemu/dom0, rebind to the guest's port. The Windows drivers may
@@ -125,11 +224,18 @@ static int xen_xenstore_post_load(void *opaque, int ver)
}
s->be_port = be_port;
}
- return 0;
+
+ save = g_byte_array_new_take(s->impl_state, s->impl_state_size);
+ s->impl_state = NULL;
+ s->impl_state_size = 0;
+
+ ret = xs_impl_deserialize(s->impl, save, xen_domid, fire_watch_cb, s);
+ return ret;
}
static const VMStateDescription xen_xenstore_vmstate = {
.name = "xen_xenstore",
+ .unmigratable = 1, /* The PV back ends don't migrate yet */
.version_id = 1,
.minimum_version_id = 1,
.needed = xen_xenstore_is_needed,
@@ -145,6 +251,10 @@ static const VMStateDescription xen_xenstore_vmstate = {
VMSTATE_BOOL(rsp_pending, XenXenstoreState),
VMSTATE_UINT32(guest_port, XenXenstoreState),
VMSTATE_BOOL(fatal_error, XenXenstoreState),
+ VMSTATE_UINT32(impl_state_size, XenXenstoreState),
+ VMSTATE_VARRAY_UINT32_ALLOC(impl_state, XenXenstoreState,
+ impl_state_size, 0,
+ vmstate_info_uint8, uint8_t),
VMSTATE_END_OF_LIST()
}
};
@@ -213,20 +323,761 @@ static void reset_rsp(XenXenstoreState *s)
s->rsp_offset = 0;
}
+static void xs_error(XenXenstoreState *s, unsigned int id,
+ xs_transaction_t tx_id, int errnum)
+{
+ struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
+ const char *errstr = NULL;
+
+ for (unsigned int i = 0; i < ARRAY_SIZE(xsd_errors); i++) {
+ struct xsd_errors *xsd_error = &xsd_errors[i];
+
+ if (xsd_error->errnum == errnum) {
+ errstr = xsd_error->errstring;
+ break;
+ }
+ }
+ assert(errstr);
+
+ trace_xenstore_error(id, tx_id, errstr);
+
+ rsp->type = XS_ERROR;
+ rsp->req_id = id;
+ rsp->tx_id = tx_id;
+ rsp->len = (uint32_t)strlen(errstr) + 1;
+
+ memcpy(&rsp[1], errstr, rsp->len);
+}
+
+static void xs_ok(XenXenstoreState *s, unsigned int type, unsigned int req_id,
+ xs_transaction_t tx_id)
+{
+ struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
+ const char *okstr = "OK";
+
+ rsp->type = type;
+ rsp->req_id = req_id;
+ rsp->tx_id = tx_id;
+ rsp->len = (uint32_t)strlen(okstr) + 1;
+
+ memcpy(&rsp[1], okstr, rsp->len);
+}
+
+/*
+ * The correct request and response formats are documented in xen.git:
+ * docs/misc/xenstore.txt. A summary is given below for convenience.
+ * The '|' symbol represents a NUL character.
+ *
+ * ---------- Database read, write and permissions operations ----------
+ *
+ * READ <path>| <value|>
+ * WRITE <path>|<value|>
+ * Store and read the octet string <value> at <path>.
+ * WRITE creates any missing parent paths, with empty values.
+ *
+ * MKDIR <path>|
+ * Ensures that the <path> exists, by necessary by creating
+ * it and any missing parents with empty values. If <path>
+ * or any parent already exists, its value is left unchanged.
+ *
+ * RM <path>|
+ * Ensures that the <path> does not exist, by deleting
+ * it and all of its children. It is not an error if <path> does
+ * not exist, but it _is_ an error if <path>'s immediate parent
+ * does not exist either.
+ *
+ * DIRECTORY <path>| <child-leaf-name>|*
+ * Gives a list of the immediate children of <path>, as only the
+ * leafnames. The resulting children are each named
+ * <path>/<child-leaf-name>.
+ *
+ * DIRECTORY_PART <path>|<offset> <gencnt>|<child-leaf-name>|*
+ * Same as DIRECTORY, but to be used for children lists longer than
+ * XENSTORE_PAYLOAD_MAX. Input are <path> and the byte offset into
+ * the list of children to return. Return values are the generation
+ * count <gencnt> of the node (to be used to ensure the node hasn't
+ * changed between two reads: <gencnt> being the same for multiple
+ * reads guarantees the node hasn't changed) and the list of children
+ * starting at the specified <offset> of the complete list.
+ *
+ * GET_PERMS <path>| <perm-as-string>|+
+ * SET_PERMS <path>|<perm-as-string>|+?
+ * <perm-as-string> is one of the following
+ * w<domid> write only
+ * r<domid> read only
+ * b<domid> both read and write
+ * n<domid> no access
+ * See https://wiki.xen.org/wiki/XenBus section
+ * `Permissions' for details of the permissions system.
+ * It is possible to set permissions for the special watch paths
+ * "@introduceDomain" and "@releaseDomain" to enable receiving those
+ * watches in unprivileged domains.
+ *
+ * ---------- Watches ----------
+ *
+ * WATCH <wpath>|<token>|?
+ * Adds a watch.
+ *
+ * When a <path> is modified (including path creation, removal,
+ * contents change or permissions change) this generates an event
+ * on the changed <path>. Changes made in transactions cause an
+ * event only if and when committed. Each occurring event is
+ * matched against all the watches currently set up, and each
+ * matching watch results in a WATCH_EVENT message (see below).
+ *
+ * The event's path matches the watch's <wpath> if it is an child
+ * of <wpath>.
+ *
+ * <wpath> can be a <path> to watch or @<wspecial>. In the
+ * latter case <wspecial> may have any syntax but it matches
+ * (according to the rules above) only the following special
+ * events which are invented by xenstored:
+ * @introduceDomain occurs on INTRODUCE
+ * @releaseDomain occurs on any domain crash or
+ * shutdown, and also on RELEASE
+ * and domain destruction
+ * <wspecial> events are sent to privileged callers or explicitly
+ * via SET_PERMS enabled domains only.
+ *
+ * When a watch is first set up it is triggered once straight
+ * away, with <path> equal to <wpath>. Watches may be triggered
+ * spuriously. The tx_id in a WATCH request is ignored.
+ *
+ * Watches are supposed to be restricted by the permissions
+ * system but in practice the implementation is imperfect.
+ * Applications should not rely on being sent a notification for
+ * paths that they cannot read; however, an application may rely
+ * on being sent a watch when a path which it _is_ able to read
+ * is deleted even if that leaves only a nonexistent unreadable
+ * parent. A notification may omitted if a node's permissions
+ * are changed so as to make it unreadable, in which case future
+ * notifications may be suppressed (and if the node is later made
+ * readable, some notifications may have been lost).
+ *
+ * WATCH_EVENT <epath>|<token>|
+ * Unsolicited `reply' generated for matching modification events
+ * as described above. req_id and tx_id are both 0.
+ *
+ * <epath> is the event's path, ie the actual path that was
+ * modified; however if the event was the recursive removal of an
+ * parent of <wpath>, <epath> is just
+ * <wpath> (rather than the actual path which was removed). So
+ * <epath> is a child of <wpath>, regardless.
+ *
+ * Iff <wpath> for the watch was specified as a relative pathname,
+ * the <epath> path will also be relative (with the same base,
+ * obviously).
+ *
+ * UNWATCH <wpath>|<token>|?
+ *
+ * RESET_WATCHES |
+ * Reset all watches and transactions of the caller.
+ *
+ * ---------- Transactions ----------
+ *
+ * TRANSACTION_START | <transid>|
+ * <transid> is an opaque uint32_t allocated by xenstored
+ * represented as unsigned decimal. After this, transaction may
+ * be referenced by using <transid> (as 32-bit binary) in the
+ * tx_id request header field. When transaction is started whole
+ * db is copied; reads and writes happen on the copy.
+ * It is not legal to send non-0 tx_id in TRANSACTION_START.
+ *
+ * TRANSACTION_END T|
+ * TRANSACTION_END F|
+ * tx_id must refer to existing transaction. After this
+ * request the tx_id is no longer valid and may be reused by
+ * xenstore. If F, the transaction is discarded. If T,
+ * it is committed: if there were any other intervening writes
+ * then our END gets get EAGAIN.
+ *
+ * The plan is that in the future only intervening `conflicting'
+ * writes cause EAGAIN, meaning only writes or other commits
+ * which changed paths which were read or written in the
+ * transaction at hand.
+ *
+ */
+
+static void xs_read(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *req_data, unsigned int len)
+{
+ const char *path = (const char *)req_data;
+ struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
+ uint8_t *rsp_data = (uint8_t *)&rsp[1];
+ g_autoptr(GByteArray) data = g_byte_array_new();
+ int err;
+
+ if (len == 0 || req_data[len - 1] != '\0') {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ trace_xenstore_read(tx_id, path);
+ err = xs_impl_read(s->impl, xen_domid, tx_id, path, data);
+ if (err) {
+ xs_error(s, req_id, tx_id, err);
+ return;
+ }
+
+ rsp->type = XS_READ;
+ rsp->req_id = req_id;
+ rsp->tx_id = tx_id;
+ rsp->len = 0;
+
+ len = data->len;
+ if (len > XENSTORE_PAYLOAD_MAX) {
+ xs_error(s, req_id, tx_id, E2BIG);
+ return;
+ }
+
+ memcpy(&rsp_data[rsp->len], data->data, len);
+ rsp->len += len;
+}
+
+static void xs_write(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *req_data,
+ unsigned int len)
+{
+ g_autoptr(GByteArray) data = g_byte_array_new();
+ const char *path;
+ int err;
+
+ if (len == 0) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ path = (const char *)req_data;
+
+ while (len--) {
+ if (*req_data++ == '\0') {
+ break;
+ }
+ if (len == 0) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+ }
+
+ g_byte_array_append(data, req_data, len);
+
+ trace_xenstore_write(tx_id, path);
+ err = xs_impl_write(s->impl, xen_domid, tx_id, path, data);
+ if (err) {
+ xs_error(s, req_id, tx_id, err);
+ return;
+ }
+
+ xs_ok(s, XS_WRITE, req_id, tx_id);
+}
+
+static void xs_mkdir(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *req_data,
+ unsigned int len)
+{
+ g_autoptr(GByteArray) data = g_byte_array_new();
+ const char *path;
+ int err;
+
+ if (len == 0 || req_data[len - 1] != '\0') {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ path = (const char *)req_data;
+
+ trace_xenstore_mkdir(tx_id, path);
+ err = xs_impl_read(s->impl, xen_domid, tx_id, path, data);
+ if (err == ENOENT) {
+ err = xs_impl_write(s->impl, xen_domid, tx_id, path, data);
+ }
+
+ if (!err) {
+ xs_error(s, req_id, tx_id, err);
+ return;
+ }
+
+ xs_ok(s, XS_MKDIR, req_id, tx_id);
+}
+
+static void xs_append_strings(XenXenstoreState *s, struct xsd_sockmsg *rsp,
+ GList *strings, unsigned int start, bool truncate)
+{
+ uint8_t *rsp_data = (uint8_t *)&rsp[1];
+ GList *l;
+
+ for (l = strings; l; l = l->next) {
+ size_t len = strlen(l->data) + 1; /* Including the NUL termination */
+ char *str = l->data;
+
+ if (rsp->len + len > XENSTORE_PAYLOAD_MAX) {
+ if (truncate) {
+ len = XENSTORE_PAYLOAD_MAX - rsp->len;
+ if (!len) {
+ return;
+ }
+ } else {
+ xs_error(s, rsp->req_id, rsp->tx_id, E2BIG);
+ return;
+ }
+ }
+
+ if (start) {
+ if (start >= len) {
+ start -= len;
+ continue;
+ }
+
+ str += start;
+ len -= start;
+ start = 0;
+ }
+
+ memcpy(&rsp_data[rsp->len], str, len);
+ rsp->len += len;
+ }
+ /* XS_DIRECTORY_PART wants an extra NUL to indicate the end */
+ if (truncate && rsp->len < XENSTORE_PAYLOAD_MAX) {
+ rsp_data[rsp->len++] = '\0';
+ }
+}
+
+static void xs_directory(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *req_data,
+ unsigned int len)
+{
+ struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
+ GList *items = NULL;
+ const char *path;
+ int err;
+
+ if (len == 0 || req_data[len - 1] != '\0') {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ path = (const char *)req_data;
+
+ trace_xenstore_directory(tx_id, path);
+ err = xs_impl_directory(s->impl, xen_domid, tx_id, path, NULL, &items);
+ if (err != 0) {
+ xs_error(s, req_id, tx_id, err);
+ return;
+ }
+
+ rsp->type = XS_DIRECTORY;
+ rsp->req_id = req_id;
+ rsp->tx_id = tx_id;
+ rsp->len = 0;
+
+ xs_append_strings(s, rsp, items, 0, false);
+
+ g_list_free_full(items, g_free);
+}
+
+static void xs_directory_part(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *req_data,
+ unsigned int len)
+{
+ const char *offset_str, *path = (const char *)req_data;
+ struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
+ char *rsp_data = (char *)&rsp[1];
+ uint64_t gencnt = 0;
+ unsigned int offset;
+ GList *items = NULL;
+ int err;
+
+ if (len == 0) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ while (len--) {
+ if (*req_data++ == '\0') {
+ break;
+ }
+ if (len == 0) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+ }
+
+ offset_str = (const char *)req_data;
+ while (len--) {
+ if (*req_data++ == '\0') {
+ break;
+ }
+ if (len == 0) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+ }
+
+ if (len) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ if (qemu_strtoui(offset_str, NULL, 10, &offset) < 0) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ trace_xenstore_directory_part(tx_id, path, offset);
+ err = xs_impl_directory(s->impl, xen_domid, tx_id, path, &gencnt, &items);
+ if (err != 0) {
+ xs_error(s, req_id, tx_id, err);
+ return;
+ }
+
+ rsp->type = XS_DIRECTORY_PART;
+ rsp->req_id = req_id;
+ rsp->tx_id = tx_id;
+ rsp->len = snprintf(rsp_data, XENSTORE_PAYLOAD_MAX, "%" PRIu64, gencnt) + 1;
+
+ xs_append_strings(s, rsp, items, offset, true);
+
+ g_list_free_full(items, g_free);
+}
+
+static void xs_transaction_start(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *req_data,
+ unsigned int len)
+{
+ struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
+ char *rsp_data = (char *)&rsp[1];
+ int err;
+
+ if (len != 1 || req_data[0] != '\0') {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ rsp->type = XS_TRANSACTION_START;
+ rsp->req_id = req_id;
+ rsp->tx_id = tx_id;
+ rsp->len = 0;
+
+ err = xs_impl_transaction_start(s->impl, xen_domid, &tx_id);
+ if (err) {
+ xs_error(s, req_id, tx_id, err);
+ return;
+ }
+
+ trace_xenstore_transaction_start(tx_id);
+
+ rsp->len = snprintf(rsp_data, XENSTORE_PAYLOAD_MAX, "%u", tx_id);
+ assert(rsp->len < XENSTORE_PAYLOAD_MAX);
+ rsp->len++;
+}
+
+static void xs_transaction_end(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *req_data,
+ unsigned int len)
+{
+ bool commit;
+ int err;
+
+ if (len != 2 || req_data[1] != '\0') {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ switch (req_data[0]) {
+ case 'T':
+ commit = true;
+ break;
+ case 'F':
+ commit = false;
+ break;
+ default:
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ trace_xenstore_transaction_end(tx_id, commit);
+ err = xs_impl_transaction_end(s->impl, xen_domid, tx_id, commit);
+ if (err) {
+ xs_error(s, req_id, tx_id, err);
+ return;
+ }
+
+ xs_ok(s, XS_TRANSACTION_END, req_id, tx_id);
+}
+
+static void xs_rm(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *req_data, unsigned int len)
+{
+ const char *path = (const char *)req_data;
+ int err;
+
+ if (len == 0 || req_data[len - 1] != '\0') {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ trace_xenstore_rm(tx_id, path);
+ err = xs_impl_rm(s->impl, xen_domid, tx_id, path);
+ if (err) {
+ xs_error(s, req_id, tx_id, err);
+ return;
+ }
+
+ xs_ok(s, XS_RM, req_id, tx_id);
+}
+
+static void xs_get_perms(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *req_data,
+ unsigned int len)
+{
+ const char *path = (const char *)req_data;
+ struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
+ GList *perms = NULL;
+ int err;
+
+ if (len == 0 || req_data[len - 1] != '\0') {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ trace_xenstore_get_perms(tx_id, path);
+ err = xs_impl_get_perms(s->impl, xen_domid, tx_id, path, &perms);
+ if (err) {
+ xs_error(s, req_id, tx_id, err);
+ return;
+ }
+
+ rsp->type = XS_GET_PERMS;
+ rsp->req_id = req_id;
+ rsp->tx_id = tx_id;
+ rsp->len = 0;
+
+ xs_append_strings(s, rsp, perms, 0, false);
+
+ g_list_free_full(perms, g_free);
+}
+
+static void xs_set_perms(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *req_data,
+ unsigned int len)
+{
+ const char *path = (const char *)req_data;
+ uint8_t *perm;
+ GList *perms = NULL;
+ int err;
+
+ if (len == 0) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ while (len--) {
+ if (*req_data++ == '\0') {
+ break;
+ }
+ if (len == 0) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+ }
+
+ perm = req_data;
+ while (len--) {
+ if (*req_data++ == '\0') {
+ perms = g_list_append(perms, perm);
+ perm = req_data;
+ }
+ }
+
+ /*
+ * Note that there may be trailing garbage at the end of the buffer.
+ * This is explicitly permitted by the '?' at the end of the definition:
+ *
+ * SET_PERMS <path>|<perm-as-string>|+?
+ */
+
+ trace_xenstore_set_perms(tx_id, path);
+ err = xs_impl_set_perms(s->impl, xen_domid, tx_id, path, perms);
+ g_list_free(perms);
+ if (err) {
+ xs_error(s, req_id, tx_id, err);
+ return;
+ }
+
+ xs_ok(s, XS_SET_PERMS, req_id, tx_id);
+}
+
+static void xs_watch(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *req_data,
+ unsigned int len)
+{
+ const char *token, *path = (const char *)req_data;
+ int err;
+
+ if (len == 0) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ while (len--) {
+ if (*req_data++ == '\0') {
+ break;
+ }
+ if (len == 0) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+ }
+
+ token = (const char *)req_data;
+ while (len--) {
+ if (*req_data++ == '\0') {
+ break;
+ }
+ if (len == 0) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+ }
+
+ /*
+ * Note that there may be trailing garbage at the end of the buffer.
+ * This is explicitly permitted by the '?' at the end of the definition:
+ *
+ * WATCH <wpath>|<token>|?
+ */
+
+ trace_xenstore_watch(path, token);
+ err = xs_impl_watch(s->impl, xen_domid, path, token, fire_watch_cb, s);
+ if (err) {
+ xs_error(s, req_id, tx_id, err);
+ return;
+ }
+
+ xs_ok(s, XS_WATCH, req_id, tx_id);
+}
+
+static void xs_unwatch(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *req_data,
+ unsigned int len)
+{
+ const char *token, *path = (const char *)req_data;
+ int err;
+
+ if (len == 0) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ while (len--) {
+ if (*req_data++ == '\0') {
+ break;
+ }
+ if (len == 0) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+ }
+
+ token = (const char *)req_data;
+ while (len--) {
+ if (*req_data++ == '\0') {
+ break;
+ }
+ if (len == 0) {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+ }
+
+ trace_xenstore_unwatch(path, token);
+ err = xs_impl_unwatch(s->impl, xen_domid, path, token, fire_watch_cb, s);
+ if (err) {
+ xs_error(s, req_id, tx_id, err);
+ return;
+ }
+
+ xs_ok(s, XS_UNWATCH, req_id, tx_id);
+}
+
+static void xs_reset_watches(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *req_data,
+ unsigned int len)
+{
+ if (len == 0 || req_data[len - 1] != '\0') {
+ xs_error(s, req_id, tx_id, EINVAL);
+ return;
+ }
+
+ trace_xenstore_reset_watches();
+ xs_impl_reset_watches(s->impl, xen_domid);
+
+ xs_ok(s, XS_RESET_WATCHES, req_id, tx_id);
+}
+
+static void xs_priv(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *data,
+ unsigned int len)
+{
+ xs_error(s, req_id, tx_id, EACCES);
+}
+
+static void xs_unimpl(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *data,
+ unsigned int len)
+{
+ xs_error(s, req_id, tx_id, ENOSYS);
+}
+
+typedef void (*xs_impl)(XenXenstoreState *s, unsigned int req_id,
+ xs_transaction_t tx_id, uint8_t *data,
+ unsigned int len);
+
+struct xsd_req {
+ const char *name;
+ xs_impl fn;
+};
+#define XSD_REQ(_type, _fn) \
+ [_type] = { .name = #_type, .fn = _fn }
+
+struct xsd_req xsd_reqs[] = {
+ XSD_REQ(XS_READ, xs_read),
+ XSD_REQ(XS_WRITE, xs_write),
+ XSD_REQ(XS_MKDIR, xs_mkdir),
+ XSD_REQ(XS_DIRECTORY, xs_directory),
+ XSD_REQ(XS_DIRECTORY_PART, xs_directory_part),
+ XSD_REQ(XS_TRANSACTION_START, xs_transaction_start),
+ XSD_REQ(XS_TRANSACTION_END, xs_transaction_end),
+ XSD_REQ(XS_RM, xs_rm),
+ XSD_REQ(XS_GET_PERMS, xs_get_perms),
+ XSD_REQ(XS_SET_PERMS, xs_set_perms),
+ XSD_REQ(XS_WATCH, xs_watch),
+ XSD_REQ(XS_UNWATCH, xs_unwatch),
+ XSD_REQ(XS_CONTROL, xs_priv),
+ XSD_REQ(XS_INTRODUCE, xs_priv),
+ XSD_REQ(XS_RELEASE, xs_priv),
+ XSD_REQ(XS_IS_DOMAIN_INTRODUCED, xs_priv),
+ XSD_REQ(XS_RESUME, xs_priv),
+ XSD_REQ(XS_SET_TARGET, xs_priv),
+ XSD_REQ(XS_RESET_WATCHES, xs_reset_watches),
+};
+
static void process_req(XenXenstoreState *s)
{
struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
- struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
- const char enosys[] = "ENOSYS";
+ xs_impl handler = NULL;
assert(req_pending(s));
assert(!s->rsp_pending);
- rsp->type = XS_ERROR;
- rsp->req_id = req->req_id;
- rsp->tx_id = req->tx_id;
- rsp->len = sizeof(enosys);
- memcpy((void *)&rsp[1], enosys, sizeof(enosys));
+ if (req->type < ARRAY_SIZE(xsd_reqs)) {
+ handler = xsd_reqs[req->type].fn;
+ }
+ if (!handler) {
+ handler = &xs_unimpl;
+ }
+
+ handler(s, req->req_id, req->tx_id, (uint8_t *)&req[1], req->len);
s->rsp_pending = true;
reset_req(s);
@@ -415,6 +1266,113 @@ static unsigned int put_rsp(XenXenstoreState *s)
return copylen;
}
+static void deliver_watch(XenXenstoreState *s, const char *path,
+ const char *token)
+{
+ struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
+ uint8_t *rsp_data = (uint8_t *)&rsp[1];
+ unsigned int len;
+
+ assert(!s->rsp_pending);
+
+ trace_xenstore_watch_event(path, token);
+
+ rsp->type = XS_WATCH_EVENT;
+ rsp->req_id = 0;
+ rsp->tx_id = 0;
+ rsp->len = 0;
+
+ len = strlen(path);
+
+ /* XENSTORE_ABS/REL_PATH_MAX should ensure there can be no overflow */
+ assert(rsp->len + len < XENSTORE_PAYLOAD_MAX);
+
+ memcpy(&rsp_data[rsp->len], path, len);
+ rsp->len += len;
+ rsp_data[rsp->len] = '\0';
+ rsp->len++;
+
+ len = strlen(token);
+ /*
+ * It is possible for the guest to have chosen a token that will
+ * not fit (along with the patch) into a watch event. We have no
+ * choice but to drop the event if this is the case.
+ */
+ if (rsp->len + len >= XENSTORE_PAYLOAD_MAX) {
+ return;
+ }
+
+ memcpy(&rsp_data[rsp->len], token, len);
+ rsp->len += len;
+ rsp_data[rsp->len] = '\0';
+ rsp->len++;
+
+ s->rsp_pending = true;
+}
+
+struct watch_event {
+ char *path;
+ char *token;
+};
+
+static void free_watch_event(struct watch_event *ev)
+{
+ if (ev) {
+ g_free(ev->path);
+ g_free(ev->token);
+ g_free(ev);
+ }
+}
+
+static void queue_watch(XenXenstoreState *s, const char *path,
+ const char *token)
+{
+ struct watch_event *ev = g_new0(struct watch_event, 1);
+
+ ev->path = g_strdup(path);
+ ev->token = g_strdup(token);
+
+ s->watch_events = g_list_append(s->watch_events, ev);
+}
+
+static void fire_watch_cb(void *opaque, const char *path, const char *token)
+{
+ XenXenstoreState *s = opaque;
+
+ assert(qemu_mutex_iothread_locked());
+
+ /*
+ * If there's a response pending, we obviously can't scribble over
+ * it. But if there's a request pending, it has dibs on the buffer
+ * too.
+ *
+ * In the common case of a watch firing due to backend activity
+ * when the ring was otherwise idle, we should be able to copy the
+ * strings directly into the rsp_data and thence the actual ring,
+ * without needing to perform any allocations and queue them.
+ */
+ if (s->rsp_pending || req_pending(s)) {
+ queue_watch(s, path, token);
+ } else {
+ deliver_watch(s, path, token);
+ /*
+ * If the message was queued because there was already ring activity,
+ * no need to wake the guest. But if not, we need to send the evtchn.
+ */
+ xen_be_evtchn_notify(s->eh, s->be_port);
+ }
+}
+
+static void process_watch_events(XenXenstoreState *s)
+{
+ struct watch_event *ev = s->watch_events->data;
+
+ deliver_watch(s, ev->path, ev->token);
+
+ s->watch_events = g_list_remove(s->watch_events, ev);
+ free_watch_event(ev);
+}
+
static void xen_xenstore_event(void *opaque)
{
XenXenstoreState *s = opaque;
@@ -433,6 +1391,10 @@ static void xen_xenstore_event(void *opaque)
copied_to = copied_from = 0;
processed = false;
+ if (!s->rsp_pending && s->watch_events) {
+ process_watch_events(s);
+ }
+
if (s->rsp_pending) {
copied_to = put_rsp(s);
}
@@ -441,7 +1403,7 @@ static void xen_xenstore_event(void *opaque)
copied_from = get_req(s);
}
- if (req_pending(s) && !s->rsp_pending) {
+ if (req_pending(s) && !s->rsp_pending && !s->watch_events) {
process_req(s);
processed = true;
}
@@ -496,5 +1458,270 @@ int xen_xenstore_reset(void)
}
s->be_port = err;
+ /*
+ * We don't actually access the guest's page through the grant, because
+ * this isn't real Xen, and we can just use the page we gave it in the
+ * first place. Map the grant anyway, mostly for cosmetic purposes so
+ * it *looks* like it's in use in the guest-visible grant table.
+ */
+ s->gt = qemu_xen_gnttab_open();
+ uint32_t xs_gntref = GNTTAB_RESERVED_XENSTORE;
+ s->granted_xs = qemu_xen_gnttab_map_refs(s->gt, 1, xen_domid, &xs_gntref,
+ PROT_READ | PROT_WRITE);
+
return 0;
}
+
+struct qemu_xs_handle {
+ XenstoreImplState *impl;
+ GList *watches;
+ QEMUBH *watch_bh;
+};
+
+struct qemu_xs_watch {
+ struct qemu_xs_handle *h;
+ char *path;
+ xs_watch_fn fn;
+ void *opaque;
+ GList *events;
+};
+
+static char *xs_be_get_domain_path(struct qemu_xs_handle *h, unsigned int domid)
+{
+ return g_strdup_printf("/local/domain/%u", domid);
+}
+
+static char **xs_be_directory(struct qemu_xs_handle *h, xs_transaction_t t,
+ const char *path, unsigned int *num)
+{
+ GList *items = NULL, *l;
+ unsigned int i = 0;
+ char **items_ret;
+ int err;
+
+ err = xs_impl_directory(h->impl, DOMID_QEMU, t, path, NULL, &items);
+ if (err) {
+ errno = err;
+ return NULL;
+ }
+
+ items_ret = g_new0(char *, g_list_length(items) + 1);
+ *num = 0;
+ for (l = items; l; l = l->next) {
+ items_ret[i++] = l->data;
+ (*num)++;
+ }
+ g_list_free(items);
+ return items_ret;
+}
+
+static void *xs_be_read(struct qemu_xs_handle *h, xs_transaction_t t,
+ const char *path, unsigned int *len)
+{
+ GByteArray *data = g_byte_array_new();
+ bool free_segment = false;
+ int err;
+
+ err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data);
+ if (err) {
+ free_segment = true;
+ errno = err;
+ } else {
+ if (len) {
+ *len = data->len;
+ }
+ /* The xen-bus-helper code expects to get NUL terminated string! */
+ g_byte_array_append(data, (void *)"", 1);
+ }
+
+ return g_byte_array_free(data, free_segment);
+}
+
+static bool xs_be_write(struct qemu_xs_handle *h, xs_transaction_t t,
+ const char *path, const void *data, unsigned int len)
+{
+ GByteArray *gdata = g_byte_array_new();
+ int err;
+
+ g_byte_array_append(gdata, data, len);
+ err = xs_impl_write(h->impl, DOMID_QEMU, t, path, gdata);
+ g_byte_array_unref(gdata);
+ if (err) {
+ errno = err;
+ return false;
+ }
+ return true;
+}
+
+static bool xs_be_create(struct qemu_xs_handle *h, xs_transaction_t t,
+ unsigned int owner, unsigned int domid,
+ unsigned int perms, const char *path)
+{
+ g_autoptr(GByteArray) data = g_byte_array_new();
+ GList *perms_list = NULL;
+ int err;
+
+ /* mkdir does this */
+ err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data);
+ if (err == ENOENT) {
+ err = xs_impl_write(h->impl, DOMID_QEMU, t, path, data);
+ }
+ if (err) {
+ errno = err;
+ return false;
+ }
+
+ perms_list = g_list_append(perms_list,
+ xs_perm_as_string(XS_PERM_NONE, owner));
+ perms_list = g_list_append(perms_list,
+ xs_perm_as_string(perms, domid));
+
+ err = xs_impl_set_perms(h->impl, DOMID_QEMU, t, path, perms_list);
+ g_list_free_full(perms_list, g_free);
+ if (err) {
+ errno = err;
+ return false;
+ }
+ return true;
+}
+
+static bool xs_be_destroy(struct qemu_xs_handle *h, xs_transaction_t t,
+ const char *path)
+{
+ int err = xs_impl_rm(h->impl, DOMID_QEMU, t, path);
+ if (err) {
+ errno = err;
+ return false;
+ }
+ return true;
+}
+
+static void be_watch_bh(void *_h)
+{
+ struct qemu_xs_handle *h = _h;
+ GList *l;
+
+ for (l = h->watches; l; l = l->next) {
+ struct qemu_xs_watch *w = l->data;
+
+ while (w->events) {
+ struct watch_event *ev = w->events->data;
+
+ w->fn(w->opaque, ev->path);
+
+ w->events = g_list_remove(w->events, ev);
+ free_watch_event(ev);
+ }
+ }
+}
+
+static void xs_be_watch_cb(void *opaque, const char *path, const char *token)
+{
+ struct watch_event *ev = g_new0(struct watch_event, 1);
+ struct qemu_xs_watch *w = opaque;
+
+ /* We don't care about the token */
+ ev->path = g_strdup(path);
+ w->events = g_list_append(w->events, ev);
+
+ qemu_bh_schedule(w->h->watch_bh);
+}
+
+static struct qemu_xs_watch *xs_be_watch(struct qemu_xs_handle *h,
+ const char *path, xs_watch_fn fn,
+ void *opaque)
+{
+ struct qemu_xs_watch *w = g_new0(struct qemu_xs_watch, 1);
+ int err;
+
+ w->h = h;
+ w->fn = fn;
+ w->opaque = opaque;
+
+ err = xs_impl_watch(h->impl, DOMID_QEMU, path, NULL, xs_be_watch_cb, w);
+ if (err) {
+ errno = err;
+ g_free(w);
+ return NULL;
+ }
+
+ w->path = g_strdup(path);
+ h->watches = g_list_append(h->watches, w);
+ return w;
+}
+
+static void xs_be_unwatch(struct qemu_xs_handle *h, struct qemu_xs_watch *w)
+{
+ xs_impl_unwatch(h->impl, DOMID_QEMU, w->path, NULL, xs_be_watch_cb, w);
+
+ h->watches = g_list_remove(h->watches, w);
+ g_list_free_full(w->events, (GDestroyNotify)free_watch_event);
+ g_free(w->path);
+ g_free(w);
+}
+
+static xs_transaction_t xs_be_transaction_start(struct qemu_xs_handle *h)
+{
+ unsigned int new_tx = XBT_NULL;
+ int err = xs_impl_transaction_start(h->impl, DOMID_QEMU, &new_tx);
+ if (err) {
+ errno = err;
+ return XBT_NULL;
+ }
+ return new_tx;
+}
+
+static bool xs_be_transaction_end(struct qemu_xs_handle *h, xs_transaction_t t,
+ bool abort)
+{
+ int err = xs_impl_transaction_end(h->impl, DOMID_QEMU, t, !abort);
+ if (err) {
+ errno = err;
+ return false;
+ }
+ return true;
+}
+
+static struct qemu_xs_handle *xs_be_open(void)
+{
+ XenXenstoreState *s = xen_xenstore_singleton;
+ struct qemu_xs_handle *h;
+
+ if (!s && !s->impl) {
+ errno = -ENOSYS;
+ return NULL;
+ }
+
+ h = g_new0(struct qemu_xs_handle, 1);
+ h->impl = s->impl;
+
+ h->watch_bh = aio_bh_new(qemu_get_aio_context(), be_watch_bh, h);
+
+ return h;
+}
+
+static void xs_be_close(struct qemu_xs_handle *h)
+{
+ while (h->watches) {
+ struct qemu_xs_watch *w = h->watches->data;
+ xs_be_unwatch(h, w);
+ }
+
+ qemu_bh_delete(h->watch_bh);
+ g_free(h);
+}
+
+static struct xenstore_backend_ops emu_xenstore_backend_ops = {
+ .open = xs_be_open,
+ .close = xs_be_close,
+ .get_domain_path = xs_be_get_domain_path,
+ .directory = xs_be_directory,
+ .read = xs_be_read,
+ .write = xs_be_write,
+ .create = xs_be_create,
+ .destroy = xs_be_destroy,
+ .watch = xs_be_watch,
+ .unwatch = xs_be_unwatch,
+ .transaction_start = xs_be_transaction_start,
+ .transaction_end = xs_be_transaction_end,
+};
diff --git a/hw/i386/kvm/xenstore_impl.c b/hw/i386/kvm/xenstore_impl.c
new file mode 100644
index 0000000..305fe75
--- /dev/null
+++ b/hw/i386/kvm/xenstore_impl.c
@@ -0,0 +1,1927 @@
+/*
+ * QEMU Xen emulation: The actual implementation of XenStore
+ *
+ * Copyright © 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * Authors: David Woodhouse <dwmw2@infradead.org>, Paul Durrant <paul@xen.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qom/object.h"
+
+#include "hw/xen/xen.h"
+
+#include "xen_xenstore.h"
+#include "xenstore_impl.h"
+
+#include "hw/xen/interface/io/xs_wire.h"
+
+#define XS_MAX_WATCHES 128
+#define XS_MAX_DOMAIN_NODES 1000
+#define XS_MAX_NODE_SIZE 2048
+#define XS_MAX_TRANSACTIONS 10
+#define XS_MAX_PERMS_PER_NODE 5
+
+#define XS_VALID_CHARS "abcdefghijklmnopqrstuvwxyz" \
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
+ "0123456789-/_"
+
+typedef struct XsNode {
+ uint32_t ref;
+ GByteArray *content;
+ GList *perms;
+ GHashTable *children;
+ uint64_t gencnt;
+ bool deleted_in_tx;
+ bool modified_in_tx;
+ unsigned int serialized_tx;
+#ifdef XS_NODE_UNIT_TEST
+ gchar *name; /* debug only */
+#endif
+} XsNode;
+
+typedef struct XsWatch {
+ struct XsWatch *next;
+ xs_impl_watch_fn *cb;
+ void *cb_opaque;
+ char *token;
+ unsigned int dom_id;
+ int rel_prefix;
+} XsWatch;
+
+typedef struct XsTransaction {
+ XsNode *root;
+ unsigned int nr_nodes;
+ unsigned int base_tx;
+ unsigned int tx_id;
+ unsigned int dom_id;
+} XsTransaction;
+
+struct XenstoreImplState {
+ XsNode *root;
+ unsigned int nr_nodes;
+ GHashTable *watches;
+ unsigned int nr_domu_watches;
+ GHashTable *transactions;
+ unsigned int nr_domu_transactions;
+ unsigned int root_tx;
+ unsigned int last_tx;
+ bool serialized;
+};
+
+
+static void nobble_tx(gpointer key, gpointer value, gpointer user_data)
+{
+ unsigned int *new_tx_id = user_data;
+ XsTransaction *tx = value;
+
+ if (tx->base_tx == *new_tx_id) {
+ /* Transactions based on XBT_NULL will always fail */
+ tx->base_tx = XBT_NULL;
+ }
+}
+
+static inline unsigned int next_tx(struct XenstoreImplState *s)
+{
+ unsigned int tx_id;
+
+ /* Find the next TX id which isn't either XBT_NULL or in use. */
+ do {
+ tx_id = ++s->last_tx;
+ } while (tx_id == XBT_NULL || tx_id == s->root_tx ||
+ g_hash_table_lookup(s->transactions, GINT_TO_POINTER(tx_id)));
+
+ /*
+ * It is vanishingly unlikely, but ensure that no outstanding transaction
+ * is based on the (previous incarnation of the) newly-allocated TX id.
+ */
+ g_hash_table_foreach(s->transactions, nobble_tx, &tx_id);
+
+ return tx_id;
+}
+
+static inline XsNode *xs_node_new(void)
+{
+ XsNode *n = g_new0(XsNode, 1);
+ n->ref = 1;
+
+#ifdef XS_NODE_UNIT_TEST
+ nr_xs_nodes++;
+ xs_node_list = g_list_prepend(xs_node_list, n);
+#endif
+ return n;
+}
+
+static inline XsNode *xs_node_ref(XsNode *n)
+{
+ /* With just 10 transactions, it can never get anywhere near this. */
+ g_assert(n->ref < INT_MAX);
+
+ g_assert(n->ref);
+ n->ref++;
+ return n;
+}
+
+static inline void xs_node_unref(XsNode *n)
+{
+ if (!n) {
+ return;
+ }
+ g_assert(n->ref);
+ if (--n->ref) {
+ return;
+ }
+
+ if (n->content) {
+ g_byte_array_unref(n->content);
+ }
+ if (n->perms) {
+ g_list_free_full(n->perms, g_free);
+ }
+ if (n->children) {
+ g_hash_table_unref(n->children);
+ }
+#ifdef XS_NODE_UNIT_TEST
+ g_free(n->name);
+ nr_xs_nodes--;
+ xs_node_list = g_list_remove(xs_node_list, n);
+#endif
+ g_free(n);
+}
+
+char *xs_perm_as_string(unsigned int perm, unsigned int domid)
+{
+ char letter;
+
+ switch (perm) {
+ case XS_PERM_READ | XS_PERM_WRITE:
+ letter = 'b';
+ break;
+ case XS_PERM_READ:
+ letter = 'r';
+ break;
+ case XS_PERM_WRITE:
+ letter = 'w';
+ break;
+ case XS_PERM_NONE:
+ default:
+ letter = 'n';
+ break;
+ }
+
+ return g_strdup_printf("%c%u", letter, domid);
+}
+
+static gpointer do_perm_copy(gconstpointer src, gpointer user_data)
+{
+ return g_strdup(src);
+}
+
+static XsNode *xs_node_create(const char *name, GList *perms)
+{
+ XsNode *n = xs_node_new();
+
+#ifdef XS_NODE_UNIT_TEST
+ if (name) {
+ n->name = g_strdup(name);
+ }
+#endif
+
+ n->perms = g_list_copy_deep(perms, do_perm_copy, NULL);
+
+ return n;
+}
+
+/* For copying from one hash table to another using g_hash_table_foreach() */
+static void do_child_insert(gpointer key, gpointer value, gpointer user_data)
+{
+ g_hash_table_insert(user_data, g_strdup(key), xs_node_ref(value));
+}
+
+static XsNode *xs_node_copy(XsNode *old)
+{
+ XsNode *n = xs_node_new();
+
+ n->gencnt = old->gencnt;
+
+#ifdef XS_NODE_UNIT_TEST
+ if (n->name) {
+ n->name = g_strdup(old->name);
+ }
+#endif
+
+ assert(old);
+ if (old->children) {
+ n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free,
+ (GDestroyNotify)xs_node_unref);
+ g_hash_table_foreach(old->children, do_child_insert, n->children);
+ }
+ if (old->perms) {
+ n->perms = g_list_copy_deep(old->perms, do_perm_copy, NULL);
+ }
+ if (old->content) {
+ n->content = g_byte_array_ref(old->content);
+ }
+ return n;
+}
+
+/* Returns true if it made a change to the hash table */
+static bool xs_node_add_child(XsNode *n, const char *path_elem, XsNode *child)
+{
+ assert(!strchr(path_elem, '/'));
+
+ if (!child) {
+ assert(n->children);
+ return g_hash_table_remove(n->children, path_elem);
+ }
+
+#ifdef XS_NODE_UNIT_TEST
+ g_free(child->name);
+ child->name = g_strdup(path_elem);
+#endif
+ if (!n->children) {
+ n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free,
+ (GDestroyNotify)xs_node_unref);
+ }
+
+ /*
+ * The documentation for g_hash_table_insert() says that it "returns a
+ * boolean value to indicate whether the newly added value was already
+ * in the hash table or not."
+ *
+ * It could perhaps be clearer that returning TRUE means it wasn't,
+ */
+ return g_hash_table_insert(n->children, g_strdup(path_elem), child);
+}
+
+struct walk_op {
+ struct XenstoreImplState *s;
+ char path[XENSTORE_ABS_PATH_MAX + 2]; /* Two NUL terminators */
+ int (*op_fn)(XsNode **n, struct walk_op *op);
+ void *op_opaque;
+ void *op_opaque2;
+
+ GList *watches;
+ unsigned int dom_id;
+ unsigned int tx_id;
+
+ /* The number of nodes which will exist in the tree if this op succeeds. */
+ unsigned int new_nr_nodes;
+
+ /*
+ * This is maintained on the way *down* the walk to indicate
+ * whether nodes can be modified in place or whether COW is
+ * required. It starts off being true, as we're always going to
+ * replace the root node. If we walk into a shared subtree it
+ * becomes false. If we start *creating* new nodes for a write,
+ * it becomes true again.
+ *
+ * Do not use it on the way back up.
+ */
+ bool inplace;
+ bool mutating;
+ bool create_dirs;
+ bool in_transaction;
+
+ /* Tracking during recursion so we know which is first. */
+ bool deleted_in_tx;
+};
+
+static void fire_watches(struct walk_op *op, bool parents)
+{
+ GList *l = NULL;
+ XsWatch *w;
+
+ if (!op->mutating || op->in_transaction) {
+ return;
+ }
+
+ if (parents) {
+ l = op->watches;
+ }
+
+ w = g_hash_table_lookup(op->s->watches, op->path);
+ while (w || l) {
+ if (!w) {
+ /* Fire the parent nodes from 'op' if asked to */
+ w = l->data;
+ l = l->next;
+ continue;
+ }
+
+ assert(strlen(op->path) > w->rel_prefix);
+ w->cb(w->cb_opaque, op->path + w->rel_prefix, w->token);
+
+ w = w->next;
+ }
+}
+
+static int xs_node_add_content(XsNode **n, struct walk_op *op)
+{
+ GByteArray *data = op->op_opaque;
+
+ if (op->dom_id) {
+ /*
+ * The real XenStored includes permissions and names of child nodes
+ * in the calculated datasize but life's too short. For a single
+ * tenant internal XenStore, we don't have to be quite as pedantic.
+ */
+ if (data->len > XS_MAX_NODE_SIZE) {
+ return E2BIG;
+ }
+ }
+ /* We *are* the node to be written. Either this or a copy. */
+ if (!op->inplace) {
+ XsNode *old = *n;
+ *n = xs_node_copy(old);
+ xs_node_unref(old);
+ }
+
+ if ((*n)->content) {
+ g_byte_array_unref((*n)->content);
+ }
+ (*n)->content = g_byte_array_ref(data);
+ if (op->tx_id != XBT_NULL) {
+ (*n)->modified_in_tx = true;
+ }
+ return 0;
+}
+
+static int xs_node_get_content(XsNode **n, struct walk_op *op)
+{
+ GByteArray *data = op->op_opaque;
+ GByteArray *node_data;
+
+ assert(op->inplace);
+ assert(*n);
+
+ node_data = (*n)->content;
+ if (node_data) {
+ g_byte_array_append(data, node_data->data, node_data->len);
+ }
+
+ return 0;
+}
+
+static int node_rm_recurse(gpointer key, gpointer value, gpointer user_data)
+{
+ struct walk_op *op = user_data;
+ int path_len = strlen(op->path);
+ int key_len = strlen(key);
+ XsNode *n = value;
+ bool this_inplace = op->inplace;
+
+ if (n->ref != 1) {
+ op->inplace = 0;
+ }
+
+ assert(key_len + path_len + 2 <= sizeof(op->path));
+ op->path[path_len] = '/';
+ memcpy(op->path + path_len + 1, key, key_len + 1);
+
+ if (n->children) {
+ g_hash_table_foreach_remove(n->children, node_rm_recurse, op);
+ }
+ op->new_nr_nodes--;
+
+ /*
+ * Fire watches on *this* node but not the parents because they are
+ * going to be deleted too, so the watch will fire for them anyway.
+ */
+ fire_watches(op, false);
+ op->path[path_len] = '\0';
+
+ /*
+ * Actually deleting the child here is just an optimisation; if we
+ * don't then the final unref on the topmost victim will just have
+ * to cascade down again repeating all the g_hash_table_foreach()
+ * calls.
+ */
+ return this_inplace;
+}
+
+static XsNode *xs_node_copy_deleted(XsNode *old, struct walk_op *op);
+static void copy_deleted_recurse(gpointer key, gpointer value,
+ gpointer user_data)
+{
+ struct walk_op *op = user_data;
+ GHashTable *siblings = op->op_opaque2;
+ XsNode *n = xs_node_copy_deleted(value, op);
+
+ /*
+ * Reinsert the deleted_in_tx copy of the node into the parent's
+ * 'children' hash table. Having stashed it from op->op_opaque2
+ * before the recursive call to xs_node_copy_deleted() scribbled
+ * over it.
+ */
+ g_hash_table_insert(siblings, g_strdup(key), n);
+}
+
+static XsNode *xs_node_copy_deleted(XsNode *old, struct walk_op *op)
+{
+ XsNode *n = xs_node_new();
+
+ n->gencnt = old->gencnt;
+
+#ifdef XS_NODE_UNIT_TEST
+ if (old->name) {
+ n->name = g_strdup(old->name);
+ }
+#endif
+
+ if (old->children) {
+ n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free,
+ (GDestroyNotify)xs_node_unref);
+ op->op_opaque2 = n->children;
+ g_hash_table_foreach(old->children, copy_deleted_recurse, op);
+ }
+ if (old->perms) {
+ n->perms = g_list_copy_deep(old->perms, do_perm_copy, NULL);
+ }
+ n->deleted_in_tx = true;
+ /* If it gets resurrected we only fire a watch if it lost its content */
+ if (old->content) {
+ n->modified_in_tx = true;
+ }
+ op->new_nr_nodes--;
+ return n;
+}
+
+static int xs_node_rm(XsNode **n, struct walk_op *op)
+{
+ bool this_inplace = op->inplace;
+
+ if (op->tx_id != XBT_NULL) {
+ /* It's not trivial to do inplace handling for this one */
+ XsNode *old = *n;
+ *n = xs_node_copy_deleted(old, op);
+ xs_node_unref(old);
+ return 0;
+ }
+
+ /* Fire watches for, and count, nodes in the subtree which get deleted */
+ if ((*n)->children) {
+ g_hash_table_foreach_remove((*n)->children, node_rm_recurse, op);
+ }
+ op->new_nr_nodes--;
+
+ if (this_inplace) {
+ xs_node_unref(*n);
+ }
+ *n = NULL;
+ return 0;
+}
+
+static int xs_node_get_perms(XsNode **n, struct walk_op *op)
+{
+ GList **perms = op->op_opaque;
+
+ assert(op->inplace);
+ assert(*n);
+
+ *perms = g_list_copy_deep((*n)->perms, do_perm_copy, NULL);
+ return 0;
+}
+
+static void parse_perm(const char *perm, char *letter, unsigned int *dom_id)
+{
+ unsigned int n = sscanf(perm, "%c%u", letter, dom_id);
+
+ assert(n == 2);
+}
+
+static bool can_access(unsigned int dom_id, GList *perms, const char *letters)
+{
+ unsigned int i, n;
+ char perm_letter;
+ unsigned int perm_dom_id;
+ bool access;
+
+ if (dom_id == 0) {
+ return true;
+ }
+
+ n = g_list_length(perms);
+ assert(n >= 1);
+
+ /*
+ * The dom_id of the first perm is the owner, and the owner always has
+ * read-write access.
+ */
+ parse_perm(g_list_nth_data(perms, 0), &perm_letter, &perm_dom_id);
+ if (dom_id == perm_dom_id) {
+ return true;
+ }
+
+ /*
+ * The letter of the first perm specified the default access for all other
+ * domains.
+ */
+ access = !!strchr(letters, perm_letter);
+ for (i = 1; i < n; i++) {
+ parse_perm(g_list_nth_data(perms, i), &perm_letter, &perm_dom_id);
+ if (dom_id != perm_dom_id) {
+ continue;
+ }
+ access = !!strchr(letters, perm_letter);
+ }
+
+ return access;
+}
+
+static int xs_node_set_perms(XsNode **n, struct walk_op *op)
+{
+ GList *perms = op->op_opaque;
+
+ if (op->dom_id) {
+ unsigned int perm_dom_id;
+ char perm_letter;
+
+ /* A guest may not change permissions on nodes it does not own */
+ if (!can_access(op->dom_id, (*n)->perms, "")) {
+ return EPERM;
+ }
+
+ /* A guest may not change the owner of a node it owns. */
+ parse_perm(perms->data, &perm_letter, &perm_dom_id);
+ if (perm_dom_id != op->dom_id) {
+ return EPERM;
+ }
+
+ if (g_list_length(perms) > XS_MAX_PERMS_PER_NODE) {
+ return ENOSPC;
+ }
+ }
+
+ /* We *are* the node to be written. Either this or a copy. */
+ if (!op->inplace) {
+ XsNode *old = *n;
+ *n = xs_node_copy(old);
+ xs_node_unref(old);
+ }
+
+ if ((*n)->perms) {
+ g_list_free_full((*n)->perms, g_free);
+ }
+ (*n)->perms = g_list_copy_deep(perms, do_perm_copy, NULL);
+ if (op->tx_id != XBT_NULL) {
+ (*n)->modified_in_tx = true;
+ }
+ return 0;
+}
+
+/*
+ * Passed a full reference in *n which it may free if it needs to COW.
+ *
+ * When changing the tree, the op->inplace flag indicates whether this
+ * node may be modified in place (i.e. it and all its parents had a
+ * refcount of one). If walking down the tree we find a node whose
+ * refcount is higher, we must clear op->inplace and COW from there
+ * down. Unless we are creating new nodes as scaffolding for a write
+ * (which works like 'mkdir -p' does). In which case those newly
+ * created nodes can (and must) be modified in place again.
+ */
+static int xs_node_walk(XsNode **n, struct walk_op *op)
+{
+ char *child_name = NULL;
+ size_t namelen;
+ XsNode *old = *n, *child = NULL;
+ bool stole_child = false;
+ bool this_inplace;
+ XsWatch *watch;
+ int err;
+
+ namelen = strlen(op->path);
+ watch = g_hash_table_lookup(op->s->watches, op->path);
+
+ /* Is there a child, or do we hit the double-NUL termination? */
+ if (op->path[namelen + 1]) {
+ char *slash;
+ child_name = op->path + namelen + 1;
+ slash = strchr(child_name, '/');
+ if (slash) {
+ *slash = '\0';
+ }
+ op->path[namelen] = '/';
+ }
+
+ /* If we walk into a subtree which is shared, we must COW */
+ if (op->mutating && old->ref != 1) {
+ op->inplace = false;
+ }
+
+ if (!child_name) {
+ const char *letters = op->mutating ? "wb" : "rb";
+
+ if (!can_access(op->dom_id, old->perms, letters)) {
+ err = EACCES;
+ goto out;
+ }
+
+ /* This is the actual node on which the operation shall be performed */
+ err = op->op_fn(n, op);
+ if (!err) {
+ fire_watches(op, true);
+ }
+ goto out;
+ }
+
+ /* op->inplace will be further modified during the recursion */
+ this_inplace = op->inplace;
+
+ if (old && old->children) {
+ child = g_hash_table_lookup(old->children, child_name);
+ /* This is a *weak* reference to 'child', owned by the hash table */
+ }
+
+ if (child) {
+ if (child->deleted_in_tx) {
+ assert(child->ref == 1);
+ /* Cannot actually set child->deleted_in_tx = false until later */
+ }
+ xs_node_ref(child);
+ /*
+ * Now we own it too. But if we can modify inplace, that's going to
+ * foil the check and force it to COW. We want to be the *only* owner
+ * so that it can be modified in place, so remove it from the hash
+ * table in that case. We'll add it (or its replacement) back later.
+ */
+ if (op->mutating && this_inplace) {
+ g_hash_table_remove(old->children, child_name);
+ stole_child = true;
+ }
+ } else if (op->create_dirs) {
+ assert(op->mutating);
+
+ if (!can_access(op->dom_id, old->perms, "wb")) {
+ err = EACCES;
+ goto out;
+ }
+
+ if (op->dom_id && op->new_nr_nodes >= XS_MAX_DOMAIN_NODES) {
+ err = ENOSPC;
+ goto out;
+ }
+
+ child = xs_node_create(child_name, old->perms);
+ op->new_nr_nodes++;
+
+ /*
+ * If we're creating a new child, we can clearly modify it (and its
+ * children) in place from here on down.
+ */
+ op->inplace = true;
+ } else {
+ err = ENOENT;
+ goto out;
+ }
+
+ /*
+ * If there's a watch on this node, add it to the list to be fired
+ * (with the correct full pathname for the modified node) at the end.
+ */
+ if (watch) {
+ op->watches = g_list_append(op->watches, watch);
+ }
+
+ /*
+ * Except for the temporary child-stealing as noted, our node has not
+ * changed yet. We don't yet know the overall operation will complete.
+ */
+ err = xs_node_walk(&child, op);
+
+ if (watch) {
+ op->watches = g_list_remove(op->watches, watch);
+ }
+
+ if (err || !op->mutating) {
+ if (stole_child) {
+ /* Put it back as it was. */
+ g_hash_table_replace(old->children, g_strdup(child_name), child);
+ } else {
+ xs_node_unref(child);
+ }
+ goto out;
+ }
+
+ /*
+ * Now we know the operation has completed successfully and we're on
+ * the way back up. Make the change, substituting 'child' in the
+ * node at our level.
+ */
+ if (!this_inplace) {
+ *n = xs_node_copy(old);
+ xs_node_unref(old);
+ }
+
+ /*
+ * If we resurrected a deleted_in_tx node, we can mark it as no longer
+ * deleted now that we know the overall operation has succeeded.
+ */
+ if (op->create_dirs && child && child->deleted_in_tx) {
+ op->new_nr_nodes++;
+ child->deleted_in_tx = false;
+ }
+
+ /*
+ * The child may be NULL here, for a remove operation. Either way,
+ * xs_node_add_child() will do the right thing and return a value
+ * indicating whether it changed the parent's hash table or not.
+ *
+ * We bump the parent gencnt if it adds a child that we *didn't*
+ * steal from it in the first place, or if child==NULL and was
+ * thus removed (whether we stole it earlier and didn't put it
+ * back, or xs_node_add_child() actually removed it now).
+ */
+ if ((xs_node_add_child(*n, child_name, child) && !stole_child) || !child) {
+ (*n)->gencnt++;
+ }
+
+ out:
+ op->path[namelen] = '\0';
+ if (!namelen) {
+ assert(!op->watches);
+ /*
+ * On completing the recursion back up the path walk and reaching the
+ * top, assign the new node count if the operation was successful. If
+ * the main tree was changed, bump its tx ID so that outstanding
+ * transactions correctly fail. But don't bump it every time; only
+ * if it makes a difference.
+ */
+ if (!err && op->mutating) {
+ if (!op->in_transaction) {
+ if (op->s->root_tx != op->s->last_tx) {
+ op->s->root_tx = next_tx(op->s);
+ }
+ op->s->nr_nodes = op->new_nr_nodes;
+ } else {
+ XsTransaction *tx = g_hash_table_lookup(op->s->transactions,
+ GINT_TO_POINTER(op->tx_id));
+ assert(tx);
+ tx->nr_nodes = op->new_nr_nodes;
+ }
+ }
+ }
+ return err;
+}
+
+static void append_directory_item(gpointer key, gpointer value,
+ gpointer user_data)
+{
+ GList **items = user_data;
+
+ *items = g_list_insert_sorted(*items, g_strdup(key), (GCompareFunc)strcmp);
+}
+
+/* Populates items with char * names which caller must free. */
+static int xs_node_directory(XsNode **n, struct walk_op *op)
+{
+ GList **items = op->op_opaque;
+
+ assert(op->inplace);
+ assert(*n);
+
+ if ((*n)->children) {
+ g_hash_table_foreach((*n)->children, append_directory_item, items);
+ }
+
+ if (op->op_opaque2) {
+ *(uint64_t *)op->op_opaque2 = (*n)->gencnt;
+ }
+
+ return 0;
+}
+
+static int validate_path(char *outpath, const char *userpath,
+ unsigned int dom_id)
+{
+ size_t i, pathlen = strlen(userpath);
+
+ if (!pathlen || userpath[pathlen] == '/' || strstr(userpath, "//")) {
+ return EINVAL;
+ }
+ for (i = 0; i < pathlen; i++) {
+ if (!strchr(XS_VALID_CHARS, userpath[i])) {
+ return EINVAL;
+ }
+ }
+ if (userpath[0] == '/') {
+ if (pathlen > XENSTORE_ABS_PATH_MAX) {
+ return E2BIG;
+ }
+ memcpy(outpath, userpath, pathlen + 1);
+ } else {
+ if (pathlen > XENSTORE_REL_PATH_MAX) {
+ return E2BIG;
+ }
+ snprintf(outpath, XENSTORE_ABS_PATH_MAX, "/local/domain/%u/%s", dom_id,
+ userpath);
+ }
+ return 0;
+}
+
+
+static int init_walk_op(XenstoreImplState *s, struct walk_op *op,
+ xs_transaction_t tx_id, unsigned int dom_id,
+ const char *path, XsNode ***rootp)
+{
+ int ret = validate_path(op->path, path, dom_id);
+ if (ret) {
+ return ret;
+ }
+
+ /*
+ * We use *two* NUL terminators at the end of the path, as during the walk
+ * we will temporarily turn each '/' into a NUL to allow us to use that
+ * path element for the lookup.
+ */
+ op->path[strlen(op->path) + 1] = '\0';
+ op->watches = NULL;
+ op->path[0] = '\0';
+ op->inplace = true;
+ op->mutating = false;
+ op->create_dirs = false;
+ op->in_transaction = false;
+ op->dom_id = dom_id;
+ op->tx_id = tx_id;
+ op->s = s;
+
+ if (tx_id == XBT_NULL) {
+ *rootp = &s->root;
+ op->new_nr_nodes = s->nr_nodes;
+ } else {
+ XsTransaction *tx = g_hash_table_lookup(s->transactions,
+ GINT_TO_POINTER(tx_id));
+ if (!tx) {
+ return ENOENT;
+ }
+ *rootp = &tx->root;
+ op->new_nr_nodes = tx->nr_nodes;
+ op->in_transaction = true;
+ }
+
+ return 0;
+}
+
+int xs_impl_read(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t tx_id, const char *path, GByteArray *data)
+{
+ /*
+ * The data GByteArray shall exist, and will be freed by caller.
+ * Just g_byte_array_append() to it.
+ */
+ struct walk_op op;
+ XsNode **n;
+ int ret;
+
+ ret = init_walk_op(s, &op, tx_id, dom_id, path, &n);
+ if (ret) {
+ return ret;
+ }
+ op.op_fn = xs_node_get_content;
+ op.op_opaque = data;
+ return xs_node_walk(n, &op);
+}
+
+int xs_impl_write(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t tx_id, const char *path, GByteArray *data)
+{
+ /*
+ * The data GByteArray shall exist, will be freed by caller. You are
+ * free to use g_byte_array_steal() and keep the data. Or just ref it.
+ */
+ struct walk_op op;
+ XsNode **n;
+ int ret;
+
+ ret = init_walk_op(s, &op, tx_id, dom_id, path, &n);
+ if (ret) {
+ return ret;
+ }
+ op.op_fn = xs_node_add_content;
+ op.op_opaque = data;
+ op.mutating = true;
+ op.create_dirs = true;
+ return xs_node_walk(n, &op);
+}
+
+int xs_impl_directory(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t tx_id, const char *path,
+ uint64_t *gencnt, GList **items)
+{
+ /*
+ * The items are (char *) to be freed by caller. Although it's consumed
+ * immediately so if you want to change it to (const char *) and keep
+ * them, go ahead and change the caller.
+ */
+ struct walk_op op;
+ XsNode **n;
+ int ret;
+
+ ret = init_walk_op(s, &op, tx_id, dom_id, path, &n);
+ if (ret) {
+ return ret;
+ }
+ op.op_fn = xs_node_directory;
+ op.op_opaque = items;
+ op.op_opaque2 = gencnt;
+ return xs_node_walk(n, &op);
+}
+
+int xs_impl_transaction_start(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t *tx_id)
+{
+ XsTransaction *tx;
+
+ if (*tx_id != XBT_NULL) {
+ return EINVAL;
+ }
+
+ if (dom_id && s->nr_domu_transactions >= XS_MAX_TRANSACTIONS) {
+ return ENOSPC;
+ }
+
+ tx = g_new0(XsTransaction, 1);
+
+ tx->nr_nodes = s->nr_nodes;
+ tx->tx_id = next_tx(s);
+ tx->base_tx = s->root_tx;
+ tx->root = xs_node_ref(s->root);
+ tx->dom_id = dom_id;
+
+ g_hash_table_insert(s->transactions, GINT_TO_POINTER(tx->tx_id), tx);
+ if (dom_id) {
+ s->nr_domu_transactions++;
+ }
+ *tx_id = tx->tx_id;
+ return 0;
+}
+
+static gboolean tx_commit_walk(gpointer key, gpointer value,
+ gpointer user_data)
+{
+ struct walk_op *op = user_data;
+ int path_len = strlen(op->path);
+ int key_len = strlen(key);
+ bool fire_parents = true;
+ XsWatch *watch;
+ XsNode *n = value;
+
+ if (n->ref != 1) {
+ return false;
+ }
+
+ if (n->deleted_in_tx) {
+ /*
+ * We fire watches on our parents if we are the *first* node
+ * to be deleted (the topmost one). This matches the behaviour
+ * when deleting in the live tree.
+ */
+ fire_parents = !op->deleted_in_tx;
+
+ /* Only used on the way down so no need to clear it later */
+ op->deleted_in_tx = true;
+ }
+
+ assert(key_len + path_len + 2 <= sizeof(op->path));
+ op->path[path_len] = '/';
+ memcpy(op->path + path_len + 1, key, key_len + 1);
+
+ watch = g_hash_table_lookup(op->s->watches, op->path);
+ if (watch) {
+ op->watches = g_list_append(op->watches, watch);
+ }
+
+ if (n->children) {
+ g_hash_table_foreach_remove(n->children, tx_commit_walk, op);
+ }
+
+ if (watch) {
+ op->watches = g_list_remove(op->watches, watch);
+ }
+
+ /*
+ * Don't fire watches if this node was only copied because a
+ * descendent was changed. The modified_in_tx flag indicates the
+ * ones which were really changed.
+ */
+ if (n->modified_in_tx || n->deleted_in_tx) {
+ fire_watches(op, fire_parents);
+ n->modified_in_tx = false;
+ }
+ op->path[path_len] = '\0';
+
+ /* Deleted nodes really do get expunged when we commit */
+ return n->deleted_in_tx;
+}
+
+static int transaction_commit(XenstoreImplState *s, XsTransaction *tx)
+{
+ struct walk_op op;
+ XsNode **n;
+
+ if (s->root_tx != tx->base_tx) {
+ return EAGAIN;
+ }
+ xs_node_unref(s->root);
+ s->root = tx->root;
+ tx->root = NULL;
+ s->root_tx = tx->tx_id;
+ s->nr_nodes = tx->nr_nodes;
+
+ init_walk_op(s, &op, XBT_NULL, tx->dom_id, "/", &n);
+ op.deleted_in_tx = false;
+ op.mutating = true;
+
+ /*
+ * Walk the new root and fire watches on any node which has a
+ * refcount of one (which is therefore unique to this transaction).
+ */
+ if (s->root->children) {
+ g_hash_table_foreach_remove(s->root->children, tx_commit_walk, &op);
+ }
+
+ return 0;
+}
+
+int xs_impl_transaction_end(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t tx_id, bool commit)
+{
+ int ret = 0;
+ XsTransaction *tx = g_hash_table_lookup(s->transactions,
+ GINT_TO_POINTER(tx_id));
+
+ if (!tx || tx->dom_id != dom_id) {
+ return ENOENT;
+ }
+
+ if (commit) {
+ ret = transaction_commit(s, tx);
+ }
+
+ g_hash_table_remove(s->transactions, GINT_TO_POINTER(tx_id));
+ if (dom_id) {
+ assert(s->nr_domu_transactions);
+ s->nr_domu_transactions--;
+ }
+ return ret;
+}
+
+int xs_impl_rm(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t tx_id, const char *path)
+{
+ struct walk_op op;
+ XsNode **n;
+ int ret;
+
+ ret = init_walk_op(s, &op, tx_id, dom_id, path, &n);
+ if (ret) {
+ return ret;
+ }
+ op.op_fn = xs_node_rm;
+ op.mutating = true;
+ return xs_node_walk(n, &op);
+}
+
+int xs_impl_get_perms(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t tx_id, const char *path, GList **perms)
+{
+ struct walk_op op;
+ XsNode **n;
+ int ret;
+
+ ret = init_walk_op(s, &op, tx_id, dom_id, path, &n);
+ if (ret) {
+ return ret;
+ }
+ op.op_fn = xs_node_get_perms;
+ op.op_opaque = perms;
+ return xs_node_walk(n, &op);
+}
+
+static void is_valid_perm(gpointer data, gpointer user_data)
+{
+ char *perm = data;
+ bool *valid = user_data;
+ char letter;
+ unsigned int dom_id;
+
+ if (!*valid) {
+ return;
+ }
+
+ if (sscanf(perm, "%c%u", &letter, &dom_id) != 2) {
+ *valid = false;
+ return;
+ }
+
+ switch (letter) {
+ case 'n':
+ case 'r':
+ case 'w':
+ case 'b':
+ break;
+
+ default:
+ *valid = false;
+ break;
+ }
+}
+
+int xs_impl_set_perms(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t tx_id, const char *path, GList *perms)
+{
+ struct walk_op op;
+ XsNode **n;
+ bool valid = true;
+ int ret;
+
+ if (!g_list_length(perms)) {
+ return EINVAL;
+ }
+
+ g_list_foreach(perms, is_valid_perm, &valid);
+ if (!valid) {
+ return EINVAL;
+ }
+
+ ret = init_walk_op(s, &op, tx_id, dom_id, path, &n);
+ if (ret) {
+ return ret;
+ }
+ op.op_fn = xs_node_set_perms;
+ op.op_opaque = perms;
+ op.mutating = true;
+ return xs_node_walk(n, &op);
+}
+
+static int do_xs_impl_watch(XenstoreImplState *s, unsigned int dom_id,
+ const char *path, const char *token,
+ xs_impl_watch_fn fn, void *opaque)
+
+{
+ char abspath[XENSTORE_ABS_PATH_MAX + 1];
+ XsWatch *w, *l;
+ int ret;
+
+ ret = validate_path(abspath, path, dom_id);
+ if (ret) {
+ return ret;
+ }
+
+ /* Check for duplicates */
+ l = w = g_hash_table_lookup(s->watches, abspath);
+ while (w) {
+ if (!g_strcmp0(token, w->token) && opaque == w->cb_opaque &&
+ fn == w->cb && dom_id == w->dom_id) {
+ return EEXIST;
+ }
+ w = w->next;
+ }
+
+ if (dom_id && s->nr_domu_watches >= XS_MAX_WATCHES) {
+ return E2BIG;
+ }
+
+ w = g_new0(XsWatch, 1);
+ w->token = g_strdup(token);
+ w->cb = fn;
+ w->cb_opaque = opaque;
+ w->dom_id = dom_id;
+ w->rel_prefix = strlen(abspath) - strlen(path);
+
+ /* l was looked up above when checking for duplicates */
+ if (l) {
+ w->next = l->next;
+ l->next = w;
+ } else {
+ g_hash_table_insert(s->watches, g_strdup(abspath), w);
+ }
+ if (dom_id) {
+ s->nr_domu_watches++;
+ }
+
+ return 0;
+}
+
+int xs_impl_watch(XenstoreImplState *s, unsigned int dom_id, const char *path,
+ const char *token, xs_impl_watch_fn fn, void *opaque)
+{
+ int ret = do_xs_impl_watch(s, dom_id, path, token, fn, opaque);
+
+ if (!ret) {
+ /* A new watch should fire immediately */
+ fn(opaque, path, token);
+ }
+
+ return ret;
+}
+
+static XsWatch *free_watch(XenstoreImplState *s, XsWatch *w)
+{
+ XsWatch *next = w->next;
+
+ if (w->dom_id) {
+ assert(s->nr_domu_watches);
+ s->nr_domu_watches--;
+ }
+
+ g_free(w->token);
+ g_free(w);
+
+ return next;
+}
+
+int xs_impl_unwatch(XenstoreImplState *s, unsigned int dom_id,
+ const char *path, const char *token,
+ xs_impl_watch_fn fn, void *opaque)
+{
+ char abspath[XENSTORE_ABS_PATH_MAX + 1];
+ XsWatch *w, **l;
+ int ret;
+
+ ret = validate_path(abspath, path, dom_id);
+ if (ret) {
+ return ret;
+ }
+
+ w = g_hash_table_lookup(s->watches, abspath);
+ if (!w) {
+ return ENOENT;
+ }
+
+ /*
+ * The hash table contains the first element of a list of
+ * watches. Removing the first element in the list is a
+ * special case because we have to update the hash table to
+ * point to the next (or remove it if there's nothing left).
+ */
+ if (!g_strcmp0(token, w->token) && fn == w->cb && opaque == w->cb_opaque &&
+ dom_id == w->dom_id) {
+ if (w->next) {
+ /* Insert the previous 'next' into the hash table */
+ g_hash_table_insert(s->watches, g_strdup(abspath), w->next);
+ } else {
+ /* Nothing left; remove from hash table */
+ g_hash_table_remove(s->watches, abspath);
+ }
+ free_watch(s, w);
+ return 0;
+ }
+
+ /*
+ * We're all done messing with the hash table because the element
+ * it points to has survived the cull. Now it's just a simple
+ * linked list removal operation.
+ */
+ for (l = &w->next; *l; l = &w->next) {
+ w = *l;
+
+ if (!g_strcmp0(token, w->token) && fn == w->cb &&
+ opaque != w->cb_opaque && dom_id == w->dom_id) {
+ *l = free_watch(s, w);
+ return 0;
+ }
+ }
+
+ return ENOENT;
+}
+
+int xs_impl_reset_watches(XenstoreImplState *s, unsigned int dom_id)
+{
+ char **watch_paths;
+ guint nr_watch_paths;
+ guint i;
+
+ watch_paths = (char **)g_hash_table_get_keys_as_array(s->watches,
+ &nr_watch_paths);
+
+ for (i = 0; i < nr_watch_paths; i++) {
+ XsWatch *w1 = g_hash_table_lookup(s->watches, watch_paths[i]);
+ XsWatch *w2, *w, **l;
+
+ /*
+ * w1 is the original list. The hash table has this pointer.
+ * w2 is the head of our newly-filtered list.
+ * w and l are temporary for processing. w is somewhat redundant
+ * with *l but makes my eyes bleed less.
+ */
+
+ w = w2 = w1;
+ l = &w;
+ while (w) {
+ if (w->dom_id == dom_id) {
+ /* If we're freeing the head of the list, bump w2 */
+ if (w2 == w) {
+ w2 = w->next;
+ }
+ *l = free_watch(s, w);
+ } else {
+ l = &w->next;
+ }
+ w = *l;
+ }
+ /*
+ * If the head of the list survived the cull, we don't need to
+ * touch the hash table and we're done with this path. Else...
+ */
+ if (w1 != w2) {
+ g_hash_table_steal(s->watches, watch_paths[i]);
+
+ /*
+ * It was already freed. (Don't worry, this whole thing is
+ * single-threaded and nobody saw it in the meantime). And
+ * having *stolen* it, we now own the watch_paths[i] string
+ * so if we don't give it back to the hash table, we need
+ * to free it.
+ */
+ if (w2) {
+ g_hash_table_insert(s->watches, watch_paths[i], w2);
+ } else {
+ g_free(watch_paths[i]);
+ }
+ }
+ }
+ g_free(watch_paths);
+ return 0;
+}
+
+static void xs_tx_free(void *_tx)
+{
+ XsTransaction *tx = _tx;
+ if (tx->root) {
+ xs_node_unref(tx->root);
+ }
+ g_free(tx);
+}
+
+XenstoreImplState *xs_impl_create(unsigned int dom_id)
+{
+ XenstoreImplState *s = g_new0(XenstoreImplState, 1);
+ GList *perms;
+
+ s->watches = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL);
+ s->transactions = g_hash_table_new_full(g_direct_hash, g_direct_equal,
+ NULL, xs_tx_free);
+
+ perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, 0));
+ s->root = xs_node_create("/", perms);
+ g_list_free_full(perms, g_free);
+ s->nr_nodes = 1;
+
+ s->root_tx = s->last_tx = 1;
+ return s;
+}
+
+
+static void clear_serialized_tx(gpointer key, gpointer value, gpointer opaque)
+{
+ XsNode *n = value;
+
+ n->serialized_tx = XBT_NULL;
+ if (n->children) {
+ g_hash_table_foreach(n->children, clear_serialized_tx, NULL);
+ }
+}
+
+static void clear_tx_serialized_tx(gpointer key, gpointer value,
+ gpointer opaque)
+{
+ XsTransaction *t = value;
+
+ clear_serialized_tx(NULL, t->root, NULL);
+}
+
+static void write_be32(GByteArray *save, uint32_t val)
+{
+ uint32_t be = htonl(val);
+ g_byte_array_append(save, (void *)&be, sizeof(be));
+}
+
+
+struct save_state {
+ GByteArray *bytes;
+ unsigned int tx_id;
+};
+
+#define MODIFIED_IN_TX (1U << 0)
+#define DELETED_IN_TX (1U << 1)
+#define NODE_REF (1U << 2)
+
+static void save_node(gpointer key, gpointer value, gpointer opaque)
+{
+ struct save_state *ss = opaque;
+ XsNode *n = value;
+ char *name = key;
+ uint8_t flag = 0;
+
+ /* Child nodes (i.e. anything but the root) have a name */
+ if (name) {
+ g_byte_array_append(ss->bytes, key, strlen(key) + 1);
+ }
+
+ /*
+ * If we already wrote this node, refer to the previous copy.
+ * There's no rename/move in XenStore, so all we need to find
+ * it is the tx_id of the transation in which it exists. Which
+ * may be the root tx.
+ */
+ if (n->serialized_tx != XBT_NULL) {
+ flag = NODE_REF;
+ g_byte_array_append(ss->bytes, &flag, 1);
+ write_be32(ss->bytes, n->serialized_tx);
+ } else {
+ GList *l;
+ n->serialized_tx = ss->tx_id;
+
+ if (n->modified_in_tx) {
+ flag |= MODIFIED_IN_TX;
+ }
+ if (n->deleted_in_tx) {
+ flag |= DELETED_IN_TX;
+ }
+ g_byte_array_append(ss->bytes, &flag, 1);
+
+ if (n->content) {
+ write_be32(ss->bytes, n->content->len);
+ g_byte_array_append(ss->bytes, n->content->data, n->content->len);
+ } else {
+ write_be32(ss->bytes, 0);
+ }
+
+ for (l = n->perms; l; l = l->next) {
+ g_byte_array_append(ss->bytes, l->data, strlen(l->data) + 1);
+ }
+ /* NUL termination after perms */
+ g_byte_array_append(ss->bytes, (void *)"", 1);
+
+ if (n->children) {
+ g_hash_table_foreach(n->children, save_node, ss);
+ }
+ /* NUL termination after children (child name is NUL) */
+ g_byte_array_append(ss->bytes, (void *)"", 1);
+ }
+}
+
+static void save_tree(struct save_state *ss, uint32_t tx_id, XsNode *root)
+{
+ write_be32(ss->bytes, tx_id);
+ ss->tx_id = tx_id;
+ save_node(NULL, root, ss);
+}
+
+static void save_tx(gpointer key, gpointer value, gpointer opaque)
+{
+ uint32_t tx_id = GPOINTER_TO_INT(key);
+ struct save_state *ss = opaque;
+ XsTransaction *n = value;
+
+ write_be32(ss->bytes, n->base_tx);
+ write_be32(ss->bytes, n->dom_id);
+
+ save_tree(ss, tx_id, n->root);
+}
+
+static void save_watch(gpointer key, gpointer value, gpointer opaque)
+{
+ struct save_state *ss = opaque;
+ XsWatch *w = value;
+
+ /* We only save the *guest* watches. */
+ if (w->dom_id) {
+ gpointer relpath = key + w->rel_prefix;
+ g_byte_array_append(ss->bytes, relpath, strlen(relpath) + 1);
+ g_byte_array_append(ss->bytes, (void *)w->token, strlen(w->token) + 1);
+ }
+}
+
+GByteArray *xs_impl_serialize(XenstoreImplState *s)
+{
+ struct save_state ss;
+
+ ss.bytes = g_byte_array_new();
+
+ /*
+ * node = flags [ real_node / node_ref ]
+ * flags = uint8_t (MODIFIED_IN_TX | DELETED_IN_TX | NODE_REF)
+ * node_ref = tx_id (in which the original version of this node exists)
+ * real_node = content perms child* NUL
+ * content = len data
+ * len = uint32_t
+ * data = uint8_t{len}
+ * perms = perm* NUL
+ * perm = asciiz
+ * child = name node
+ * name = asciiz
+ *
+ * tree = tx_id node
+ * tx_id = uint32_t
+ *
+ * transaction = base_tx_id dom_id tree
+ * base_tx_id = uint32_t
+ * dom_id = uint32_t
+ *
+ * tx_list = tree transaction* XBT_NULL
+ *
+ * watch = path token
+ * path = asciiz
+ * token = asciiz
+ *
+ * watch_list = watch* NUL
+ *
+ * xs_serialize_stream = last_tx tx_list watch_list
+ * last_tx = uint32_t
+ */
+
+ /* Clear serialized_tx in every node. */
+ if (s->serialized) {
+ clear_serialized_tx(NULL, s->root, NULL);
+ g_hash_table_foreach(s->transactions, clear_tx_serialized_tx, NULL);
+ }
+
+ s->serialized = true;
+
+ write_be32(ss.bytes, s->last_tx);
+ save_tree(&ss, s->root_tx, s->root);
+ g_hash_table_foreach(s->transactions, save_tx, &ss);
+
+ write_be32(ss.bytes, XBT_NULL);
+
+ g_hash_table_foreach(s->watches, save_watch, &ss);
+ g_byte_array_append(ss.bytes, (void *)"", 1);
+
+ return ss.bytes;
+}
+
+struct unsave_state {
+ char path[XENSTORE_ABS_PATH_MAX + 1];
+ XenstoreImplState *s;
+ GByteArray *bytes;
+ uint8_t *d;
+ size_t l;
+ bool root_walk;
+};
+
+static int consume_be32(struct unsave_state *us, unsigned int *val)
+{
+ uint32_t d;
+
+ if (us->l < sizeof(d)) {
+ return -EINVAL;
+ }
+ memcpy(&d, us->d, sizeof(d));
+ *val = ntohl(d);
+ us->d += sizeof(d);
+ us->l -= sizeof(d);
+ return 0;
+}
+
+static int consume_string(struct unsave_state *us, char **str, size_t *len)
+{
+ size_t l;
+
+ if (!us->l) {
+ return -EINVAL;
+ }
+
+ l = strnlen((void *)us->d, us->l);
+ if (l == us->l) {
+ return -EINVAL;
+ }
+
+ if (str) {
+ *str = (void *)us->d;
+ }
+ if (len) {
+ *len = l;
+ }
+
+ us->d += l + 1;
+ us->l -= l + 1;
+ return 0;
+}
+
+static XsNode *lookup_node(XsNode *n, char *path)
+{
+ char *slash = strchr(path, '/');
+ XsNode *child;
+
+ if (path[0] == '\0') {
+ return n;
+ }
+
+ if (slash) {
+ *slash = '\0';
+ }
+
+ if (!n->children) {
+ return NULL;
+ }
+ child = g_hash_table_lookup(n->children, path);
+ if (!slash) {
+ return child;
+ }
+
+ *slash = '/';
+ if (!child) {
+ return NULL;
+ }
+ return lookup_node(child, slash + 1);
+}
+
+static XsNode *lookup_tx_node(struct unsave_state *us, unsigned int tx_id)
+{
+ XsTransaction *t;
+ if (tx_id == us->s->root_tx) {
+ return lookup_node(us->s->root, us->path + 1);
+ }
+
+ t = g_hash_table_lookup(us->s->transactions, GINT_TO_POINTER(tx_id));
+ if (!t) {
+ return NULL;
+ }
+ g_assert(t->root);
+ return lookup_node(t->root, us->path + 1);
+}
+
+static void count_child_nodes(gpointer key, gpointer value, gpointer user_data)
+{
+ unsigned int *nr_nodes = user_data;
+ XsNode *n = value;
+
+ (*nr_nodes)++;
+
+ if (n->children) {
+ g_hash_table_foreach(n->children, count_child_nodes, nr_nodes);
+ }
+}
+
+static int consume_node(struct unsave_state *us, XsNode **nodep,
+ unsigned int *nr_nodes)
+{
+ XsNode *n = NULL;
+ uint8_t flags;
+ int ret;
+
+ if (us->l < 1) {
+ return -EINVAL;
+ }
+ flags = us->d[0];
+ us->d++;
+ us->l--;
+
+ if (flags == NODE_REF) {
+ unsigned int tx;
+
+ ret = consume_be32(us, &tx);
+ if (ret) {
+ return ret;
+ }
+
+ n = lookup_tx_node(us, tx);
+ if (!n) {
+ return -EINVAL;
+ }
+ n->ref++;
+ if (n->children) {
+ g_hash_table_foreach(n->children, count_child_nodes, nr_nodes);
+ }
+ } else {
+ uint32_t datalen;
+
+ if (flags & ~(DELETED_IN_TX | MODIFIED_IN_TX)) {
+ return -EINVAL;
+ }
+ n = xs_node_new();
+
+ if (flags & DELETED_IN_TX) {
+ n->deleted_in_tx = true;
+ }
+ if (flags & MODIFIED_IN_TX) {
+ n->modified_in_tx = true;
+ }
+ ret = consume_be32(us, &datalen);
+ if (ret) {
+ xs_node_unref(n);
+ return -EINVAL;
+ }
+ if (datalen) {
+ if (datalen > us->l) {
+ xs_node_unref(n);
+ return -EINVAL;
+ }
+
+ GByteArray *node_data = g_byte_array_new();
+ g_byte_array_append(node_data, us->d, datalen);
+ us->d += datalen;
+ us->l -= datalen;
+ n->content = node_data;
+
+ if (us->root_walk) {
+ n->modified_in_tx = true;
+ }
+ }
+ while (1) {
+ char *perm = NULL;
+ size_t permlen = 0;
+
+ ret = consume_string(us, &perm, &permlen);
+ if (ret) {
+ xs_node_unref(n);
+ return ret;
+ }
+
+ if (!permlen) {
+ break;
+ }
+
+ n->perms = g_list_append(n->perms, g_strdup(perm));
+ }
+
+ /* Now children */
+ while (1) {
+ size_t childlen;
+ char *childname;
+ char *pathend;
+ XsNode *child = NULL;
+
+ ret = consume_string(us, &childname, &childlen);
+ if (ret) {
+ xs_node_unref(n);
+ return ret;
+ }
+
+ if (!childlen) {
+ break;
+ }
+
+ pathend = us->path + strlen(us->path);
+ strncat(us->path, "/", sizeof(us->path) - 1);
+ strncat(us->path, childname, sizeof(us->path) - 1);
+
+ ret = consume_node(us, &child, nr_nodes);
+ *pathend = '\0';
+ if (ret) {
+ xs_node_unref(n);
+ return ret;
+ }
+ g_assert(child);
+ xs_node_add_child(n, childname, child);
+ }
+
+ /*
+ * If the node has no data and no children we still want to fire
+ * a watch on it.
+ */
+ if (us->root_walk && !n->children) {
+ n->modified_in_tx = true;
+ }
+ }
+
+ if (!n->deleted_in_tx) {
+ (*nr_nodes)++;
+ }
+
+ *nodep = n;
+ return 0;
+}
+
+static int consume_tree(struct unsave_state *us, XsTransaction *t)
+{
+ int ret;
+
+ ret = consume_be32(us, &t->tx_id);
+ if (ret) {
+ return ret;
+ }
+
+ if (t->tx_id > us->s->last_tx) {
+ return -EINVAL;
+ }
+
+ us->path[0] = '\0';
+
+ return consume_node(us, &t->root, &t->nr_nodes);
+}
+
+int xs_impl_deserialize(XenstoreImplState *s, GByteArray *bytes,
+ unsigned int dom_id, xs_impl_watch_fn watch_fn,
+ void *watch_opaque)
+{
+ struct unsave_state us;
+ XsTransaction base_t = { 0 };
+ int ret;
+
+ us.s = s;
+ us.bytes = bytes;
+ us.d = bytes->data;
+ us.l = bytes->len;
+
+ xs_impl_reset_watches(s, dom_id);
+ g_hash_table_remove_all(s->transactions);
+
+ xs_node_unref(s->root);
+ s->root = NULL;
+ s->root_tx = s->last_tx = XBT_NULL;
+
+ ret = consume_be32(&us, &s->last_tx);
+ if (ret) {
+ return ret;
+ }
+
+ /*
+ * Consume the base tree into a transaction so that watches can be
+ * fired as we commit it. By setting us.root_walk we cause the nodes
+ * to be marked as 'modified_in_tx' as they are created, so that the
+ * watches are triggered on them.
+ */
+ base_t.dom_id = dom_id;
+ base_t.base_tx = XBT_NULL;
+ us.root_walk = true;
+ ret = consume_tree(&us, &base_t);
+ if (ret) {
+ return ret;
+ }
+ us.root_walk = false;
+
+ /*
+ * Commit the transaction now while the refcount on all nodes is 1.
+ * Note that we haven't yet reinstated the *guest* watches but that's
+ * OK because we don't want the guest to see any changes. Even any
+ * backend nodes which get recreated should be *precisely* as they
+ * were before the migration. Back ends may have been instantiated
+ * already, and will see the frontend magically blink into existence
+ * now (well, from the aio_bh which fires the watches). It's their
+ * responsibility to rebuild everything precisely as it was before.
+ */
+ ret = transaction_commit(s, &base_t);
+ if (ret) {
+ return ret;
+ }
+
+ while (1) {
+ unsigned int base_tx;
+ XsTransaction *t;
+
+ ret = consume_be32(&us, &base_tx);
+ if (ret) {
+ return ret;
+ }
+ if (base_tx == XBT_NULL) {
+ break;
+ }
+
+ t = g_new0(XsTransaction, 1);
+ t->base_tx = base_tx;
+
+ ret = consume_be32(&us, &t->dom_id);
+ if (!ret) {
+ ret = consume_tree(&us, t);
+ }
+ if (ret) {
+ g_free(t);
+ return ret;
+ }
+ g_assert(t->root);
+ if (t->dom_id) {
+ s->nr_domu_transactions++;
+ }
+ g_hash_table_insert(s->transactions, GINT_TO_POINTER(t->tx_id), t);
+ }
+
+ while (1) {
+ char *path, *token;
+ size_t pathlen, toklen;
+
+ ret = consume_string(&us, &path, &pathlen);
+ if (ret) {
+ return ret;
+ }
+ if (!pathlen) {
+ break;
+ }
+
+ ret = consume_string(&us, &token, &toklen);
+ if (ret) {
+ return ret;
+ }
+
+ if (!watch_fn) {
+ continue;
+ }
+
+ ret = do_xs_impl_watch(s, dom_id, path, token, watch_fn, watch_opaque);
+ if (ret) {
+ return ret;
+ }
+ }
+
+ if (us.l) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
diff --git a/hw/i386/kvm/xenstore_impl.h b/hw/i386/kvm/xenstore_impl.h
new file mode 100644
index 0000000..0df2a91
--- /dev/null
+++ b/hw/i386/kvm/xenstore_impl.h
@@ -0,0 +1,63 @@
+/*
+ * QEMU Xen emulation: The actual implementation of XenStore
+ *
+ * Copyright © 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * Authors: David Woodhouse <dwmw2@infradead.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_XENSTORE_IMPL_H
+#define QEMU_XENSTORE_IMPL_H
+
+#include "hw/xen/xen_backend_ops.h"
+
+typedef struct XenstoreImplState XenstoreImplState;
+
+XenstoreImplState *xs_impl_create(unsigned int dom_id);
+
+char *xs_perm_as_string(unsigned int perm, unsigned int domid);
+
+/*
+ * These functions return *positive* error numbers. This is a little
+ * unconventional but it helps to keep us honest because there is
+ * also a very limited set of error numbers that they are permitted
+ * to return (those in xsd_errors).
+ */
+
+int xs_impl_read(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t tx_id, const char *path, GByteArray *data);
+int xs_impl_write(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t tx_id, const char *path, GByteArray *data);
+int xs_impl_directory(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t tx_id, const char *path,
+ uint64_t *gencnt, GList **items);
+int xs_impl_transaction_start(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t *tx_id);
+int xs_impl_transaction_end(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t tx_id, bool commit);
+int xs_impl_rm(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t tx_id, const char *path);
+int xs_impl_get_perms(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t tx_id, const char *path, GList **perms);
+int xs_impl_set_perms(XenstoreImplState *s, unsigned int dom_id,
+ xs_transaction_t tx_id, const char *path, GList *perms);
+
+/* This differs from xs_watch_fn because it has the token */
+typedef void(xs_impl_watch_fn)(void *opaque, const char *path,
+ const char *token);
+int xs_impl_watch(XenstoreImplState *s, unsigned int dom_id, const char *path,
+ const char *token, xs_impl_watch_fn fn, void *opaque);
+int xs_impl_unwatch(XenstoreImplState *s, unsigned int dom_id,
+ const char *path, const char *token, xs_impl_watch_fn fn,
+ void *opaque);
+int xs_impl_reset_watches(XenstoreImplState *s, unsigned int dom_id);
+
+GByteArray *xs_impl_serialize(XenstoreImplState *s);
+int xs_impl_deserialize(XenstoreImplState *s, GByteArray *bytes,
+ unsigned int dom_id, xs_impl_watch_fn watch_fn,
+ void *watch_opaque);
+
+#endif /* QEMU_XENSTORE_IMPL_H */
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 7bebea5..1489abf 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -102,6 +102,11 @@
#include "trace.h"
#include CONFIG_DEVICES
+#ifdef CONFIG_XEN_EMU
+#include "hw/xen/xen-legacy-backend.h"
+#include "hw/xen/xen-bus.h"
+#endif
+
/*
* Helper for setting model-id for CPU models that changed model-id
* depending on QEMU versions up to QEMU 2.4.
@@ -1318,6 +1323,8 @@ void pc_basic_device_init(struct PCMachineState *pcms,
if (pcms->bus) {
pci_create_simple(pcms->bus, -1, "xen-platform");
}
+ xen_bus_init();
+ xen_be_init();
}
#endif
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 4bf15f9..30eedd6 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -47,8 +47,6 @@
#include "hw/kvm/clock.h"
#include "hw/sysbus.h"
#include "hw/i2c/smbus_eeprom.h"
-#include "hw/xen/xen-x86.h"
-#include "hw/xen/xen.h"
#include "exec/memory.h"
#include "hw/acpi/acpi.h"
#include "hw/acpi/piix4.h"
@@ -60,6 +58,8 @@
#include <xen/hvm/hvm_info_table.h>
#include "hw/xen/xen_pt.h"
#endif
+#include "hw/xen/xen-x86.h"
+#include "hw/xen/xen.h"
#include "migration/global_state.h"
#include "migration/misc.h"
#include "sysemu/numa.h"
diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
index e5a1dd1..56641a5 100644
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -18,7 +18,7 @@
#include "hw/irq.h"
#include "hw/hw.h"
#include "hw/i386/apic-msidef.h"
-#include "hw/xen/xen_common.h"
+#include "hw/xen/xen_native.h"
#include "hw/xen/xen-legacy-backend.h"
#include "hw/xen/xen-bus.h"
#include "hw/xen/xen-x86.h"
@@ -52,10 +52,11 @@ static bool xen_in_migration;
/* Compatibility with older version */
-/* This allows QEMU to build on a system that has Xen 4.5 or earlier
- * installed. This here (not in hw/xen/xen_common.h) because xen/hvm/ioreq.h
- * needs to be included before this block and hw/xen/xen_common.h needs to
- * be included before xen/hvm/ioreq.h
+/*
+ * This allows QEMU to build on a system that has Xen 4.5 or earlier installed.
+ * This is here (not in hw/xen/xen_native.h) because xen/hvm/ioreq.h needs to
+ * be included before this block and hw/xen/xen_native.h needs to be included
+ * before xen/hvm/ioreq.h
*/
#ifndef IOREQ_TYPE_VMWARE_PORT
#define IOREQ_TYPE_VMWARE_PORT 3
@@ -761,7 +762,7 @@ static ioreq_t *cpu_get_ioreq(XenIOState *state)
int i;
evtchn_port_t port;
- port = xenevtchn_pending(state->xce_handle);
+ port = qemu_xen_evtchn_pending(state->xce_handle);
if (port == state->bufioreq_local_port) {
timer_mod(state->buffered_io_timer,
BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
@@ -780,7 +781,7 @@ static ioreq_t *cpu_get_ioreq(XenIOState *state)
}
/* unmask the wanted port again */
- xenevtchn_unmask(state->xce_handle, port);
+ qemu_xen_evtchn_unmask(state->xce_handle, port);
/* get the io packet from shared memory */
state->send_vcpu = i;
@@ -1147,7 +1148,7 @@ static void handle_buffered_io(void *opaque)
BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
} else {
timer_del(state->buffered_io_timer);
- xenevtchn_unmask(state->xce_handle, state->bufioreq_local_port);
+ qemu_xen_evtchn_unmask(state->xce_handle, state->bufioreq_local_port);
}
}
@@ -1196,8 +1197,8 @@ static void cpu_handle_ioreq(void *opaque)
}
req->state = STATE_IORESP_READY;
- xenevtchn_notify(state->xce_handle,
- state->ioreq_local_port[state->send_vcpu]);
+ qemu_xen_evtchn_notify(state->xce_handle,
+ state->ioreq_local_port[state->send_vcpu]);
}
}
@@ -1206,7 +1207,7 @@ static void xen_main_loop_prepare(XenIOState *state)
int evtchn_fd = -1;
if (state->xce_handle != NULL) {
- evtchn_fd = xenevtchn_fd(state->xce_handle);
+ evtchn_fd = qemu_xen_evtchn_fd(state->xce_handle);
}
state->buffered_io_timer = timer_new_ms(QEMU_CLOCK_REALTIME, handle_buffered_io,
@@ -1249,7 +1250,7 @@ static void xen_exit_notifier(Notifier *n, void *data)
xenforeignmemory_unmap_resource(xen_fmem, state->fres);
}
- xenevtchn_close(state->xce_handle);
+ qemu_xen_evtchn_close(state->xce_handle);
xs_daemon_close(state->xenstore);
}
@@ -1397,9 +1398,11 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory)
xen_pfn_t ioreq_pfn;
XenIOState *state;
+ setup_xen_backend_ops();
+
state = g_new0(XenIOState, 1);
- state->xce_handle = xenevtchn_open(NULL, 0);
+ state->xce_handle = qemu_xen_evtchn_open();
if (state->xce_handle == NULL) {
perror("xen: event channel open");
goto err;
@@ -1463,8 +1466,9 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory)
/* FIXME: how about if we overflow the page here? */
for (i = 0; i < max_cpus; i++) {
- rc = xenevtchn_bind_interdomain(state->xce_handle, xen_domid,
- xen_vcpu_eport(state->shared_page, i));
+ rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid,
+ xen_vcpu_eport(state->shared_page,
+ i));
if (rc == -1) {
error_report("shared evtchn %d bind error %d", i, errno);
goto err;
@@ -1472,8 +1476,8 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory)
state->ioreq_local_port[i] = rc;
}
- rc = xenevtchn_bind_interdomain(state->xce_handle, xen_domid,
- state->bufioreq_remote_port);
+ rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid,
+ state->bufioreq_remote_port);
if (rc == -1) {
error_report("buffered evtchn bind error %d", errno);
goto err;
diff --git a/hw/i386/xen/xen-mapcache.c b/hw/i386/xen/xen-mapcache.c
index 1d0879d..f7d9746 100644
--- a/hw/i386/xen/xen-mapcache.c
+++ b/hw/i386/xen/xen-mapcache.c
@@ -14,7 +14,7 @@
#include <sys/resource.h>
-#include "hw/xen/xen-legacy-backend.h"
+#include "hw/xen/xen_native.h"
#include "qemu/bitmap.h"
#include "sysemu/runstate.h"
diff --git a/hw/i386/xen/xen_platform.c b/hw/i386/xen/xen_platform.c
index 539f7da..57f1d74 100644
--- a/hw/i386/xen/xen_platform.c
+++ b/hw/i386/xen/xen_platform.c
@@ -28,7 +28,6 @@
#include "hw/ide/pci.h"
#include "hw/pci/pci.h"
#include "migration/vmstate.h"
-#include "hw/xen/xen.h"
#include "net/net.h"
#include "trace.h"
#include "sysemu/xen.h"
@@ -38,10 +37,12 @@
#include "qom/object.h"
#ifdef CONFIG_XEN
-#include "hw/xen/xen_common.h"
-#include "hw/xen/xen-legacy-backend.h"
+#include "hw/xen/xen_native.h"
#endif
+/* The rule is that xen_native.h must come first */
+#include "hw/xen/xen.h"
+
//#define DEBUG_PLATFORM
#ifdef DEBUG_PLATFORM
diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c
index 7d92c2d..9bbf659 100644
--- a/hw/net/xen_nic.c
+++ b/hw/net/xen_nic.c
@@ -145,7 +145,7 @@ static void net_tx_packets(struct XenNetDev *netdev)
continue;
}
- if ((txreq.offset + txreq.size) > XC_PAGE_SIZE) {
+ if ((txreq.offset + txreq.size) > XEN_PAGE_SIZE) {
xen_pv_printf(&netdev->xendev, 0, "error: page crossing\n");
net_tx_error(netdev, &txreq, rc);
continue;
@@ -171,7 +171,7 @@ static void net_tx_packets(struct XenNetDev *netdev)
if (txreq.flags & NETTXF_csum_blank) {
/* have read-only mapping -> can't fill checksum in-place */
if (!tmpbuf) {
- tmpbuf = g_malloc(XC_PAGE_SIZE);
+ tmpbuf = g_malloc(XEN_PAGE_SIZE);
}
memcpy(tmpbuf, page + txreq.offset, txreq.size);
net_checksum_calculate(tmpbuf, txreq.size, CSUM_ALL);
@@ -181,7 +181,7 @@ static void net_tx_packets(struct XenNetDev *netdev)
qemu_send_packet(qemu_get_queue(netdev->nic),
page + txreq.offset, txreq.size);
}
- xen_be_unmap_grant_ref(&netdev->xendev, page);
+ xen_be_unmap_grant_ref(&netdev->xendev, page, txreq.gref);
net_tx_response(netdev, &txreq, NETIF_RSP_OKAY);
}
if (!netdev->tx_work) {
@@ -243,9 +243,9 @@ static ssize_t net_rx_packet(NetClientState *nc, const uint8_t *buf, size_t size
if (rc == rp || RING_REQUEST_CONS_OVERFLOW(&netdev->rx_ring, rc)) {
return 0;
}
- if (size > XC_PAGE_SIZE - NET_IP_ALIGN) {
+ if (size > XEN_PAGE_SIZE - NET_IP_ALIGN) {
xen_pv_printf(&netdev->xendev, 0, "packet too big (%lu > %ld)",
- (unsigned long)size, XC_PAGE_SIZE - NET_IP_ALIGN);
+ (unsigned long)size, XEN_PAGE_SIZE - NET_IP_ALIGN);
return -1;
}
@@ -261,7 +261,7 @@ static ssize_t net_rx_packet(NetClientState *nc, const uint8_t *buf, size_t size
return -1;
}
memcpy(page + NET_IP_ALIGN, buf, size);
- xen_be_unmap_grant_ref(&netdev->xendev, page);
+ xen_be_unmap_grant_ref(&netdev->xendev, page, rxreq.gref);
net_rx_response(netdev, &rxreq, NETIF_RSP_OKAY, NET_IP_ALIGN, size, 0);
return size;
@@ -343,12 +343,13 @@ static int net_connect(struct XenLegacyDevice *xendev)
netdev->rx_ring_ref,
PROT_READ | PROT_WRITE);
if (!netdev->rxs) {
- xen_be_unmap_grant_ref(&netdev->xendev, netdev->txs);
+ xen_be_unmap_grant_ref(&netdev->xendev, netdev->txs,
+ netdev->tx_ring_ref);
netdev->txs = NULL;
return -1;
}
- BACK_RING_INIT(&netdev->tx_ring, netdev->txs, XC_PAGE_SIZE);
- BACK_RING_INIT(&netdev->rx_ring, netdev->rxs, XC_PAGE_SIZE);
+ BACK_RING_INIT(&netdev->tx_ring, netdev->txs, XEN_PAGE_SIZE);
+ BACK_RING_INIT(&netdev->rx_ring, netdev->rxs, XEN_PAGE_SIZE);
xen_be_bind_evtchn(&netdev->xendev);
@@ -368,11 +369,13 @@ static void net_disconnect(struct XenLegacyDevice *xendev)
xen_pv_unbind_evtchn(&netdev->xendev);
if (netdev->txs) {
- xen_be_unmap_grant_ref(&netdev->xendev, netdev->txs);
+ xen_be_unmap_grant_ref(&netdev->xendev, netdev->txs,
+ netdev->tx_ring_ref);
netdev->txs = NULL;
}
if (netdev->rxs) {
- xen_be_unmap_grant_ref(&netdev->xendev, netdev->rxs);
+ xen_be_unmap_grant_ref(&netdev->xendev, netdev->rxs,
+ netdev->rx_ring_ref);
netdev->rxs = NULL;
}
}
diff --git a/hw/usb/meson.build b/hw/usb/meson.build
index bdf34cb..599dc24 100644
--- a/hw/usb/meson.build
+++ b/hw/usb/meson.build
@@ -84,6 +84,6 @@ if libusb.found()
hw_usb_modules += {'host': usbhost_ss}
endif
-softmmu_ss.add(when: ['CONFIG_USB', 'CONFIG_XEN', libusb], if_true: files('xen-usb.c'))
+softmmu_ss.add(when: ['CONFIG_USB', 'CONFIG_XEN_BUS', libusb], if_true: files('xen-usb.c'))
modules += { 'hw-usb': hw_usb_modules }
diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c
index 0f7369e..66cb3f7 100644
--- a/hw/usb/xen-usb.c
+++ b/hw/usb/xen-usb.c
@@ -101,6 +101,8 @@ struct usbback_hotplug {
struct usbback_info {
struct XenLegacyDevice xendev; /* must be first */
USBBus bus;
+ uint32_t urb_ring_ref;
+ uint32_t conn_ring_ref;
void *urb_sring;
void *conn_sring;
struct usbif_urb_back_ring urb_ring;
@@ -159,7 +161,7 @@ static int usbback_gnttab_map(struct usbback_req *usbback_req)
for (i = 0; i < nr_segs; i++) {
if ((unsigned)usbback_req->req.seg[i].offset +
- (unsigned)usbback_req->req.seg[i].length > XC_PAGE_SIZE) {
+ (unsigned)usbback_req->req.seg[i].length > XEN_PAGE_SIZE) {
xen_pv_printf(xendev, 0, "segment crosses page boundary\n");
return -EINVAL;
}
@@ -183,7 +185,7 @@ static int usbback_gnttab_map(struct usbback_req *usbback_req)
for (i = 0; i < usbback_req->nr_buffer_segs; i++) {
seg = usbback_req->req.seg + i;
- addr = usbback_req->buffer + i * XC_PAGE_SIZE + seg->offset;
+ addr = usbback_req->buffer + i * XEN_PAGE_SIZE + seg->offset;
qemu_iovec_add(&usbback_req->packet.iov, addr, seg->length);
}
}
@@ -277,10 +279,11 @@ static int usbback_init_packet(struct usbback_req *usbback_req)
static void usbback_do_response(struct usbback_req *usbback_req, int32_t status,
int32_t actual_length, int32_t error_count)
{
+ uint32_t ref[USBIF_MAX_SEGMENTS_PER_REQUEST];
struct usbback_info *usbif;
struct usbif_urb_response *res;
struct XenLegacyDevice *xendev;
- unsigned int notify;
+ unsigned int notify, i;
usbif = usbback_req->usbif;
xendev = &usbif->xendev;
@@ -293,13 +296,19 @@ static void usbback_do_response(struct usbback_req *usbback_req, int32_t status,
}
if (usbback_req->buffer) {
- xen_be_unmap_grant_refs(xendev, usbback_req->buffer,
+ for (i = 0; i < usbback_req->nr_buffer_segs; i++) {
+ ref[i] = usbback_req->req.seg[i].gref;
+ }
+ xen_be_unmap_grant_refs(xendev, usbback_req->buffer, ref,
usbback_req->nr_buffer_segs);
usbback_req->buffer = NULL;
}
if (usbback_req->isoc_buffer) {
- xen_be_unmap_grant_refs(xendev, usbback_req->isoc_buffer,
+ for (i = 0; i < usbback_req->nr_extra_segs; i++) {
+ ref[i] = usbback_req->req.seg[i + usbback_req->req.nr_buffer_segs].gref;
+ }
+ xen_be_unmap_grant_refs(xendev, usbback_req->isoc_buffer, ref,
usbback_req->nr_extra_segs);
usbback_req->isoc_buffer = NULL;
}
@@ -832,11 +841,11 @@ static void usbback_disconnect(struct XenLegacyDevice *xendev)
xen_pv_unbind_evtchn(xendev);
if (usbif->urb_sring) {
- xen_be_unmap_grant_ref(xendev, usbif->urb_sring);
+ xen_be_unmap_grant_ref(xendev, usbif->urb_sring, usbif->urb_ring_ref);
usbif->urb_sring = NULL;
}
if (usbif->conn_sring) {
- xen_be_unmap_grant_ref(xendev, usbif->conn_sring);
+ xen_be_unmap_grant_ref(xendev, usbif->conn_sring, usbif->conn_ring_ref);
usbif->conn_sring = NULL;
}
@@ -889,10 +898,12 @@ static int usbback_connect(struct XenLegacyDevice *xendev)
return -1;
}
+ usbif->urb_ring_ref = urb_ring_ref;
+ usbif->conn_ring_ref = conn_ring_ref;
urb_sring = usbif->urb_sring;
conn_sring = usbif->conn_sring;
- BACK_RING_INIT(&usbif->urb_ring, urb_sring, XC_PAGE_SIZE);
- BACK_RING_INIT(&usbif->conn_ring, conn_sring, XC_PAGE_SIZE);
+ BACK_RING_INIT(&usbif->urb_ring, urb_sring, XEN_PAGE_SIZE);
+ BACK_RING_INIT(&usbif->conn_ring, conn_sring, XEN_PAGE_SIZE);
xen_be_bind_evtchn(xendev);
diff --git a/hw/xen/meson.build b/hw/xen/meson.build
index ae0ace3..19c6aab 100644
--- a/hw/xen/meson.build
+++ b/hw/xen/meson.build
@@ -1,4 +1,4 @@
-softmmu_ss.add(when: ['CONFIG_XEN', xen], if_true: files(
+softmmu_ss.add(when: ['CONFIG_XEN_BUS'], if_true: files(
'xen-backend.c',
'xen-bus-helper.c',
'xen-bus.c',
@@ -7,6 +7,10 @@ softmmu_ss.add(when: ['CONFIG_XEN', xen], if_true: files(
'xen_pvdev.c',
))
+softmmu_ss.add(when: ['CONFIG_XEN', xen], if_true: files(
+ 'xen-operations.c',
+))
+
xen_specific_ss = ss.source_set()
if have_xen_pci_passthrough
xen_specific_ss.add(files(
diff --git a/hw/xen/trace-events b/hw/xen/trace-events
index 3da3fd8..55c9e1d 100644
--- a/hw/xen/trace-events
+++ b/hw/xen/trace-events
@@ -1,6 +1,6 @@
# See docs/devel/tracing.rst for syntax documentation.
-# ../../include/hw/xen/xen_common.h
+# ../../include/hw/xen/xen_native.h
xen_default_ioreq_server(void) ""
xen_ioreq_server_create(uint32_t id) "id: %u"
xen_ioreq_server_destroy(uint32_t id) "id: %u"
diff --git a/hw/xen/xen-bus-helper.c b/hw/xen/xen-bus-helper.c
index 5a1e12b..b2b2cc9 100644
--- a/hw/xen/xen-bus-helper.c
+++ b/hw/xen/xen-bus-helper.c
@@ -10,6 +10,7 @@
#include "hw/xen/xen-bus.h"
#include "hw/xen/xen-bus-helper.h"
#include "qapi/error.h"
+#include "trace.h"
#include <glib/gprintf.h>
@@ -46,34 +47,28 @@ const char *xs_strstate(enum xenbus_state state)
return "INVALID";
}
-void xs_node_create(struct xs_handle *xsh, xs_transaction_t tid,
- const char *node, struct xs_permissions perms[],
- unsigned int nr_perms, Error **errp)
+void xs_node_create(struct qemu_xs_handle *h, xs_transaction_t tid,
+ const char *node, unsigned int owner, unsigned int domid,
+ unsigned int perms, Error **errp)
{
trace_xs_node_create(node);
- if (!xs_write(xsh, tid, node, "", 0)) {
+ if (!qemu_xen_xs_create(h, tid, owner, domid, perms, node)) {
error_setg_errno(errp, errno, "failed to create node '%s'", node);
- return;
- }
-
- if (!xs_set_permissions(xsh, tid, node, perms, nr_perms)) {
- error_setg_errno(errp, errno, "failed to set node '%s' permissions",
- node);
}
}
-void xs_node_destroy(struct xs_handle *xsh, xs_transaction_t tid,
+void xs_node_destroy(struct qemu_xs_handle *h, xs_transaction_t tid,
const char *node, Error **errp)
{
trace_xs_node_destroy(node);
- if (!xs_rm(xsh, tid, node)) {
+ if (!qemu_xen_xs_destroy(h, tid, node)) {
error_setg_errno(errp, errno, "failed to destroy node '%s'", node);
}
}
-void xs_node_vprintf(struct xs_handle *xsh, xs_transaction_t tid,
+void xs_node_vprintf(struct qemu_xs_handle *h, xs_transaction_t tid,
const char *node, const char *key, Error **errp,
const char *fmt, va_list ap)
{
@@ -86,7 +81,7 @@ void xs_node_vprintf(struct xs_handle *xsh, xs_transaction_t tid,
trace_xs_node_vprintf(path, value);
- if (!xs_write(xsh, tid, path, value, len)) {
+ if (!qemu_xen_xs_write(h, tid, path, value, len)) {
error_setg_errno(errp, errno, "failed to write '%s' to '%s'",
value, path);
}
@@ -95,18 +90,18 @@ void xs_node_vprintf(struct xs_handle *xsh, xs_transaction_t tid,
g_free(path);
}
-void xs_node_printf(struct xs_handle *xsh, xs_transaction_t tid,
+void xs_node_printf(struct qemu_xs_handle *h, xs_transaction_t tid,
const char *node, const char *key, Error **errp,
const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
- xs_node_vprintf(xsh, tid, node, key, errp, fmt, ap);
+ xs_node_vprintf(h, tid, node, key, errp, fmt, ap);
va_end(ap);
}
-int xs_node_vscanf(struct xs_handle *xsh, xs_transaction_t tid,
+int xs_node_vscanf(struct qemu_xs_handle *h, xs_transaction_t tid,
const char *node, const char *key, Error **errp,
const char *fmt, va_list ap)
{
@@ -115,7 +110,7 @@ int xs_node_vscanf(struct xs_handle *xsh, xs_transaction_t tid,
path = (strlen(node) != 0) ? g_strdup_printf("%s/%s", node, key) :
g_strdup(key);
- value = xs_read(xsh, tid, path, NULL);
+ value = qemu_xen_xs_read(h, tid, path, NULL);
trace_xs_node_vscanf(path, value);
@@ -133,7 +128,7 @@ int xs_node_vscanf(struct xs_handle *xsh, xs_transaction_t tid,
return rc;
}
-int xs_node_scanf(struct xs_handle *xsh, xs_transaction_t tid,
+int xs_node_scanf(struct qemu_xs_handle *h, xs_transaction_t tid,
const char *node, const char *key, Error **errp,
const char *fmt, ...)
{
@@ -141,42 +136,35 @@ int xs_node_scanf(struct xs_handle *xsh, xs_transaction_t tid,
int rc;
va_start(ap, fmt);
- rc = xs_node_vscanf(xsh, tid, node, key, errp, fmt, ap);
+ rc = xs_node_vscanf(h, tid, node, key, errp, fmt, ap);
va_end(ap);
return rc;
}
-void xs_node_watch(struct xs_handle *xsh, const char *node, const char *key,
- char *token, Error **errp)
+struct qemu_xs_watch *xs_node_watch(struct qemu_xs_handle *h, const char *node,
+ const char *key, xs_watch_fn fn,
+ void *opaque, Error **errp)
{
char *path;
+ struct qemu_xs_watch *w;
path = (strlen(node) != 0) ? g_strdup_printf("%s/%s", node, key) :
g_strdup(key);
trace_xs_node_watch(path);
- if (!xs_watch(xsh, path, token)) {
+ w = qemu_xen_xs_watch(h, path, fn, opaque);
+ if (!w) {
error_setg_errno(errp, errno, "failed to watch node '%s'", path);
}
g_free(path);
+
+ return w;
}
-void xs_node_unwatch(struct xs_handle *xsh, const char *node,
- const char *key, const char *token, Error **errp)
+void xs_node_unwatch(struct qemu_xs_handle *h, struct qemu_xs_watch *w)
{
- char *path;
-
- path = (strlen(node) != 0) ? g_strdup_printf("%s/%s", node, key) :
- g_strdup(key);
-
- trace_xs_node_unwatch(path);
-
- if (!xs_unwatch(xsh, path, token)) {
- error_setg_errno(errp, errno, "failed to unwatch node '%s'", path);
- }
-
- g_free(path);
+ qemu_xen_xs_unwatch(h, w);
}
diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c
index df3f6b9..c59850b 100644
--- a/hw/xen/xen-bus.c
+++ b/hw/xen/xen-bus.c
@@ -62,7 +62,7 @@ static void xen_device_unplug(XenDevice *xendev, Error **errp)
/* Mimic the way the Xen toolstack does an unplug */
again:
- tid = xs_transaction_start(xenbus->xsh);
+ tid = qemu_xen_xs_transaction_start(xenbus->xsh);
if (tid == XBT_NULL) {
error_setg_errno(errp, errno, "failed xs_transaction_start");
return;
@@ -80,7 +80,7 @@ again:
goto abort;
}
- if (!xs_transaction_end(xenbus->xsh, tid, false)) {
+ if (!qemu_xen_xs_transaction_end(xenbus->xsh, tid, false)) {
if (errno == EAGAIN) {
goto again;
}
@@ -95,7 +95,7 @@ abort:
* We only abort if there is already a failure so ignore any error
* from ending the transaction.
*/
- xs_transaction_end(xenbus->xsh, tid, true);
+ qemu_xen_xs_transaction_end(xenbus->xsh, tid, true);
}
static void xen_bus_print_dev(Monitor *mon, DeviceState *dev, int indent)
@@ -111,143 +111,6 @@ static char *xen_bus_get_dev_path(DeviceState *dev)
return xen_device_get_backend_path(XEN_DEVICE(dev));
}
-struct XenWatch {
- char *node, *key;
- char *token;
- XenWatchHandler handler;
- void *opaque;
- Notifier notifier;
-};
-
-static void watch_notify(Notifier *n, void *data)
-{
- XenWatch *watch = container_of(n, XenWatch, notifier);
- const char *token = data;
-
- if (!strcmp(watch->token, token)) {
- watch->handler(watch->opaque);
- }
-}
-
-static XenWatch *new_watch(const char *node, const char *key,
- XenWatchHandler handler, void *opaque)
-{
- XenWatch *watch = g_new0(XenWatch, 1);
- QemuUUID uuid;
-
- qemu_uuid_generate(&uuid);
-
- watch->token = qemu_uuid_unparse_strdup(&uuid);
- watch->node = g_strdup(node);
- watch->key = g_strdup(key);
- watch->handler = handler;
- watch->opaque = opaque;
- watch->notifier.notify = watch_notify;
-
- return watch;
-}
-
-static void free_watch(XenWatch *watch)
-{
- g_free(watch->token);
- g_free(watch->key);
- g_free(watch->node);
-
- g_free(watch);
-}
-
-struct XenWatchList {
- struct xs_handle *xsh;
- NotifierList notifiers;
-};
-
-static void watch_list_event(void *opaque)
-{
- XenWatchList *watch_list = opaque;
- char **v;
- const char *token;
-
- v = xs_check_watch(watch_list->xsh);
- if (!v) {
- return;
- }
-
- token = v[XS_WATCH_TOKEN];
-
- notifier_list_notify(&watch_list->notifiers, (void *)token);
-
- free(v);
-}
-
-static XenWatchList *watch_list_create(struct xs_handle *xsh)
-{
- XenWatchList *watch_list = g_new0(XenWatchList, 1);
-
- g_assert(xsh);
-
- watch_list->xsh = xsh;
- notifier_list_init(&watch_list->notifiers);
- qemu_set_fd_handler(xs_fileno(watch_list->xsh), watch_list_event, NULL,
- watch_list);
-
- return watch_list;
-}
-
-static void watch_list_destroy(XenWatchList *watch_list)
-{
- g_assert(notifier_list_empty(&watch_list->notifiers));
- qemu_set_fd_handler(xs_fileno(watch_list->xsh), NULL, NULL, NULL);
- g_free(watch_list);
-}
-
-static XenWatch *watch_list_add(XenWatchList *watch_list, const char *node,
- const char *key, XenWatchHandler handler,
- void *opaque, Error **errp)
-{
- ERRP_GUARD();
- XenWatch *watch = new_watch(node, key, handler, opaque);
-
- notifier_list_add(&watch_list->notifiers, &watch->notifier);
-
- xs_node_watch(watch_list->xsh, node, key, watch->token, errp);
- if (*errp) {
- notifier_remove(&watch->notifier);
- free_watch(watch);
-
- return NULL;
- }
-
- return watch;
-}
-
-static void watch_list_remove(XenWatchList *watch_list, XenWatch *watch,
- Error **errp)
-{
- xs_node_unwatch(watch_list->xsh, watch->node, watch->key, watch->token,
- errp);
-
- notifier_remove(&watch->notifier);
- free_watch(watch);
-}
-
-static XenWatch *xen_bus_add_watch(XenBus *xenbus, const char *node,
- const char *key, XenWatchHandler handler,
- Error **errp)
-{
- trace_xen_bus_add_watch(node, key);
-
- return watch_list_add(xenbus->watch_list, node, key, handler, xenbus,
- errp);
-}
-
-static void xen_bus_remove_watch(XenBus *xenbus, XenWatch *watch,
- Error **errp)
-{
- trace_xen_bus_remove_watch(watch->node, watch->key);
-
- watch_list_remove(xenbus->watch_list, watch, errp);
-}
-
static void xen_bus_backend_create(XenBus *xenbus, const char *type,
const char *name, char *path,
Error **errp)
@@ -261,15 +124,15 @@ static void xen_bus_backend_create(XenBus *xenbus, const char *type,
trace_xen_bus_backend_create(type, path);
again:
- tid = xs_transaction_start(xenbus->xsh);
+ tid = qemu_xen_xs_transaction_start(xenbus->xsh);
if (tid == XBT_NULL) {
error_setg(errp, "failed xs_transaction_start");
return;
}
- key = xs_directory(xenbus->xsh, tid, path, &n);
+ key = qemu_xen_xs_directory(xenbus->xsh, tid, path, &n);
if (!key) {
- if (!xs_transaction_end(xenbus->xsh, tid, true)) {
+ if (!qemu_xen_xs_transaction_end(xenbus->xsh, tid, true)) {
error_setg_errno(errp, errno, "failed xs_transaction_end");
}
return;
@@ -300,7 +163,7 @@ again:
free(key);
- if (!xs_transaction_end(xenbus->xsh, tid, false)) {
+ if (!qemu_xen_xs_transaction_end(xenbus->xsh, tid, false)) {
qobject_unref(opts);
if (errno == EAGAIN) {
@@ -327,7 +190,7 @@ static void xen_bus_type_enumerate(XenBus *xenbus, const char *type)
trace_xen_bus_type_enumerate(type);
- backend = xs_directory(xenbus->xsh, XBT_NULL, domain_path, &n);
+ backend = qemu_xen_xs_directory(xenbus->xsh, XBT_NULL, domain_path, &n);
if (!backend) {
goto out;
}
@@ -372,7 +235,7 @@ static void xen_bus_enumerate(XenBus *xenbus)
trace_xen_bus_enumerate();
- type = xs_directory(xenbus->xsh, XBT_NULL, "backend", &n);
+ type = qemu_xen_xs_directory(xenbus->xsh, XBT_NULL, "backend", &n);
if (!type) {
return;
}
@@ -415,7 +278,7 @@ static void xen_bus_cleanup(XenBus *xenbus)
}
}
-static void xen_bus_backend_changed(void *opaque)
+static void xen_bus_backend_changed(void *opaque, const char *path)
{
XenBus *xenbus = opaque;
@@ -434,7 +297,7 @@ static void xen_bus_unrealize(BusState *bus)
for (i = 0; i < xenbus->backend_types; i++) {
if (xenbus->backend_watch[i]) {
- xen_bus_remove_watch(xenbus, xenbus->backend_watch[i], NULL);
+ xs_node_unwatch(xenbus->xsh, xenbus->backend_watch[i]);
}
}
@@ -442,13 +305,8 @@ static void xen_bus_unrealize(BusState *bus)
xenbus->backend_watch = NULL;
}
- if (xenbus->watch_list) {
- watch_list_destroy(xenbus->watch_list);
- xenbus->watch_list = NULL;
- }
-
if (xenbus->xsh) {
- xs_close(xenbus->xsh);
+ qemu_xen_xs_close(xenbus->xsh);
}
}
@@ -463,7 +321,7 @@ static void xen_bus_realize(BusState *bus, Error **errp)
trace_xen_bus_realize();
- xenbus->xsh = xs_open(0);
+ xenbus->xsh = qemu_xen_xs_open();
if (!xenbus->xsh) {
error_setg_errno(errp, errno, "failed xs_open");
goto fail;
@@ -476,19 +334,18 @@ static void xen_bus_realize(BusState *bus, Error **errp)
xenbus->backend_id = 0; /* Assume lack of node means dom0 */
}
- xenbus->watch_list = watch_list_create(xenbus->xsh);
-
module_call_init(MODULE_INIT_XEN_BACKEND);
type = xen_backend_get_types(&xenbus->backend_types);
- xenbus->backend_watch = g_new(XenWatch *, xenbus->backend_types);
+ xenbus->backend_watch = g_new(struct qemu_xs_watch *,
+ xenbus->backend_types);
for (i = 0; i < xenbus->backend_types; i++) {
char *node = g_strdup_printf("backend/%s", type[i]);
xenbus->backend_watch[i] =
- xen_bus_add_watch(xenbus, node, key, xen_bus_backend_changed,
- &local_err);
+ xs_node_watch(xenbus->xsh, node, key, xen_bus_backend_changed,
+ xenbus, &local_err);
if (local_err) {
/* This need not be treated as a hard error so don't propagate */
error_reportf_err(local_err,
@@ -631,7 +488,7 @@ static bool xen_device_frontend_is_active(XenDevice *xendev)
}
}
-static void xen_device_backend_changed(void *opaque)
+static void xen_device_backend_changed(void *opaque, const char *path)
{
XenDevice *xendev = opaque;
const char *type = object_get_typename(OBJECT(xendev));
@@ -685,66 +542,35 @@ static void xen_device_backend_changed(void *opaque)
}
}
-static XenWatch *xen_device_add_watch(XenDevice *xendev, const char *node,
- const char *key,
- XenWatchHandler handler,
- Error **errp)
-{
- const char *type = object_get_typename(OBJECT(xendev));
-
- trace_xen_device_add_watch(type, xendev->name, node, key);
-
- return watch_list_add(xendev->watch_list, node, key, handler, xendev,
- errp);
-}
-
-static void xen_device_remove_watch(XenDevice *xendev, XenWatch *watch,
- Error **errp)
-{
- const char *type = object_get_typename(OBJECT(xendev));
-
- trace_xen_device_remove_watch(type, xendev->name, watch->node,
- watch->key);
-
- watch_list_remove(xendev->watch_list, watch, errp);
-}
-
-
static void xen_device_backend_create(XenDevice *xendev, Error **errp)
{
ERRP_GUARD();
XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev)));
- struct xs_permissions perms[2];
xendev->backend_path = xen_device_get_backend_path(xendev);
- perms[0].id = xenbus->backend_id;
- perms[0].perms = XS_PERM_NONE;
- perms[1].id = xendev->frontend_id;
- perms[1].perms = XS_PERM_READ;
-
g_assert(xenbus->xsh);
- xs_node_create(xenbus->xsh, XBT_NULL, xendev->backend_path, perms,
- ARRAY_SIZE(perms), errp);
+ xs_node_create(xenbus->xsh, XBT_NULL, xendev->backend_path,
+ xenbus->backend_id, xendev->frontend_id, XS_PERM_READ, errp);
if (*errp) {
error_prepend(errp, "failed to create backend: ");
return;
}
xendev->backend_state_watch =
- xen_device_add_watch(xendev, xendev->backend_path,
- "state", xen_device_backend_changed,
- errp);
+ xs_node_watch(xendev->xsh, xendev->backend_path,
+ "state", xen_device_backend_changed, xendev,
+ errp);
if (*errp) {
error_prepend(errp, "failed to watch backend state: ");
return;
}
xendev->backend_online_watch =
- xen_device_add_watch(xendev, xendev->backend_path,
- "online", xen_device_backend_changed,
- errp);
+ xs_node_watch(xendev->xsh, xendev->backend_path,
+ "online", xen_device_backend_changed, xendev,
+ errp);
if (*errp) {
error_prepend(errp, "failed to watch backend online: ");
return;
@@ -757,12 +583,12 @@ static void xen_device_backend_destroy(XenDevice *xendev)
Error *local_err = NULL;
if (xendev->backend_online_watch) {
- xen_device_remove_watch(xendev, xendev->backend_online_watch, NULL);
+ xs_node_unwatch(xendev->xsh, xendev->backend_online_watch);
xendev->backend_online_watch = NULL;
}
if (xendev->backend_state_watch) {
- xen_device_remove_watch(xendev, xendev->backend_state_watch, NULL);
+ xs_node_unwatch(xendev->xsh, xendev->backend_state_watch);
xendev->backend_state_watch = NULL;
}
@@ -837,7 +663,7 @@ static void xen_device_frontend_set_state(XenDevice *xendev,
}
}
-static void xen_device_frontend_changed(void *opaque)
+static void xen_device_frontend_changed(void *opaque, const char *path)
{
XenDevice *xendev = opaque;
XenDeviceClass *xendev_class = XEN_DEVICE_GET_CLASS(xendev);
@@ -885,7 +711,6 @@ static void xen_device_frontend_create(XenDevice *xendev, Error **errp)
{
ERRP_GUARD();
XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev)));
- struct xs_permissions perms[2];
xendev->frontend_path = xen_device_get_frontend_path(xendev);
@@ -894,15 +719,11 @@ static void xen_device_frontend_create(XenDevice *xendev, Error **errp)
* toolstack.
*/
if (!xen_device_frontend_exists(xendev)) {
- perms[0].id = xendev->frontend_id;
- perms[0].perms = XS_PERM_NONE;
- perms[1].id = xenbus->backend_id;
- perms[1].perms = XS_PERM_READ | XS_PERM_WRITE;
-
g_assert(xenbus->xsh);
- xs_node_create(xenbus->xsh, XBT_NULL, xendev->frontend_path, perms,
- ARRAY_SIZE(perms), errp);
+ xs_node_create(xenbus->xsh, XBT_NULL, xendev->frontend_path,
+ xendev->frontend_id, xenbus->backend_id,
+ XS_PERM_READ | XS_PERM_WRITE, errp);
if (*errp) {
error_prepend(errp, "failed to create frontend: ");
return;
@@ -910,8 +731,8 @@ static void xen_device_frontend_create(XenDevice *xendev, Error **errp)
}
xendev->frontend_state_watch =
- xen_device_add_watch(xendev, xendev->frontend_path, "state",
- xen_device_frontend_changed, errp);
+ xs_node_watch(xendev->xsh, xendev->frontend_path, "state",
+ xen_device_frontend_changed, xendev, errp);
if (*errp) {
error_prepend(errp, "failed to watch frontend state: ");
}
@@ -923,8 +744,7 @@ static void xen_device_frontend_destroy(XenDevice *xendev)
Error *local_err = NULL;
if (xendev->frontend_state_watch) {
- xen_device_remove_watch(xendev, xendev->frontend_state_watch,
- NULL);
+ xs_node_unwatch(xendev->xsh, xendev->frontend_state_watch);
xendev->frontend_state_watch = NULL;
}
@@ -947,7 +767,7 @@ static void xen_device_frontend_destroy(XenDevice *xendev)
void xen_device_set_max_grant_refs(XenDevice *xendev, unsigned int nr_refs,
Error **errp)
{
- if (xengnttab_set_max_grants(xendev->xgth, nr_refs)) {
+ if (qemu_xen_gnttab_set_max_grants(xendev->xgth, nr_refs)) {
error_setg_errno(errp, errno, "xengnttab_set_max_grants failed");
}
}
@@ -956,9 +776,8 @@ void *xen_device_map_grant_refs(XenDevice *xendev, uint32_t *refs,
unsigned int nr_refs, int prot,
Error **errp)
{
- void *map = xengnttab_map_domain_grant_refs(xendev->xgth, nr_refs,
- xendev->frontend_id, refs,
- prot);
+ void *map = qemu_xen_gnttab_map_refs(xendev->xgth, nr_refs,
+ xendev->frontend_id, refs, prot);
if (!map) {
error_setg_errno(errp, errno,
@@ -968,112 +787,20 @@ void *xen_device_map_grant_refs(XenDevice *xendev, uint32_t *refs,
return map;
}
-void xen_device_unmap_grant_refs(XenDevice *xendev, void *map,
+void xen_device_unmap_grant_refs(XenDevice *xendev, void *map, uint32_t *refs,
unsigned int nr_refs, Error **errp)
{
- if (xengnttab_unmap(xendev->xgth, map, nr_refs)) {
+ if (qemu_xen_gnttab_unmap(xendev->xgth, map, refs, nr_refs)) {
error_setg_errno(errp, errno, "xengnttab_unmap failed");
}
}
-static void compat_copy_grant_refs(XenDevice *xendev, bool to_domain,
- XenDeviceGrantCopySegment segs[],
- unsigned int nr_segs, Error **errp)
-{
- uint32_t *refs = g_new(uint32_t, nr_segs);
- int prot = to_domain ? PROT_WRITE : PROT_READ;
- void *map;
- unsigned int i;
-
- for (i = 0; i < nr_segs; i++) {
- XenDeviceGrantCopySegment *seg = &segs[i];
-
- refs[i] = to_domain ? seg->dest.foreign.ref :
- seg->source.foreign.ref;
- }
-
- map = xengnttab_map_domain_grant_refs(xendev->xgth, nr_segs,
- xendev->frontend_id, refs,
- prot);
- if (!map) {
- error_setg_errno(errp, errno,
- "xengnttab_map_domain_grant_refs failed");
- goto done;
- }
-
- for (i = 0; i < nr_segs; i++) {
- XenDeviceGrantCopySegment *seg = &segs[i];
- void *page = map + (i * XC_PAGE_SIZE);
-
- if (to_domain) {
- memcpy(page + seg->dest.foreign.offset, seg->source.virt,
- seg->len);
- } else {
- memcpy(seg->dest.virt, page + seg->source.foreign.offset,
- seg->len);
- }
- }
-
- if (xengnttab_unmap(xendev->xgth, map, nr_segs)) {
- error_setg_errno(errp, errno, "xengnttab_unmap failed");
- }
-
-done:
- g_free(refs);
-}
-
void xen_device_copy_grant_refs(XenDevice *xendev, bool to_domain,
XenDeviceGrantCopySegment segs[],
unsigned int nr_segs, Error **errp)
{
- xengnttab_grant_copy_segment_t *xengnttab_segs;
- unsigned int i;
-
- if (!xendev->feature_grant_copy) {
- compat_copy_grant_refs(xendev, to_domain, segs, nr_segs, errp);
- return;
- }
-
- xengnttab_segs = g_new0(xengnttab_grant_copy_segment_t, nr_segs);
-
- for (i = 0; i < nr_segs; i++) {
- XenDeviceGrantCopySegment *seg = &segs[i];
- xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i];
-
- if (to_domain) {
- xengnttab_seg->flags = GNTCOPY_dest_gref;
- xengnttab_seg->dest.foreign.domid = xendev->frontend_id;
- xengnttab_seg->dest.foreign.ref = seg->dest.foreign.ref;
- xengnttab_seg->dest.foreign.offset = seg->dest.foreign.offset;
- xengnttab_seg->source.virt = seg->source.virt;
- } else {
- xengnttab_seg->flags = GNTCOPY_source_gref;
- xengnttab_seg->source.foreign.domid = xendev->frontend_id;
- xengnttab_seg->source.foreign.ref = seg->source.foreign.ref;
- xengnttab_seg->source.foreign.offset =
- seg->source.foreign.offset;
- xengnttab_seg->dest.virt = seg->dest.virt;
- }
-
- xengnttab_seg->len = seg->len;
- }
-
- if (xengnttab_grant_copy(xendev->xgth, nr_segs, xengnttab_segs)) {
- error_setg_errno(errp, errno, "xengnttab_grant_copy failed");
- goto done;
- }
-
- for (i = 0; i < nr_segs; i++) {
- xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i];
-
- if (xengnttab_seg->status != GNTST_okay) {
- error_setg(errp, "xengnttab_grant_copy seg[%u] failed", i);
- break;
- }
- }
-
-done:
- g_free(xengnttab_segs);
+ qemu_xen_gnttab_grant_copy(xendev->xgth, to_domain, xendev->frontend_id,
+ (XenGrantCopySegment *)segs, nr_segs, errp);
}
struct XenEventChannel {
@@ -1095,12 +822,12 @@ static bool xen_device_poll(void *opaque)
static void xen_device_event(void *opaque)
{
XenEventChannel *channel = opaque;
- unsigned long port = xenevtchn_pending(channel->xeh);
+ unsigned long port = qemu_xen_evtchn_pending(channel->xeh);
if (port == channel->local_port) {
xen_device_poll(channel);
- xenevtchn_unmask(channel->xeh, port);
+ qemu_xen_evtchn_unmask(channel->xeh, port);
}
}
@@ -1115,11 +842,11 @@ void xen_device_set_event_channel_context(XenDevice *xendev,
}
if (channel->ctx)
- aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true,
+ aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), true,
NULL, NULL, NULL, NULL, NULL);
channel->ctx = ctx;
- aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true,
+ aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), true,
xen_device_event, NULL, xen_device_poll, NULL, channel);
}
@@ -1131,13 +858,13 @@ XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev,
XenEventChannel *channel = g_new0(XenEventChannel, 1);
xenevtchn_port_or_error_t local_port;
- channel->xeh = xenevtchn_open(NULL, 0);
+ channel->xeh = qemu_xen_evtchn_open();
if (!channel->xeh) {
error_setg_errno(errp, errno, "failed xenevtchn_open");
goto fail;
}
- local_port = xenevtchn_bind_interdomain(channel->xeh,
+ local_port = qemu_xen_evtchn_bind_interdomain(channel->xeh,
xendev->frontend_id,
port);
if (local_port < 0) {
@@ -1160,7 +887,7 @@ XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev,
fail:
if (channel->xeh) {
- xenevtchn_close(channel->xeh);
+ qemu_xen_evtchn_close(channel->xeh);
}
g_free(channel);
@@ -1177,7 +904,7 @@ void xen_device_notify_event_channel(XenDevice *xendev,
return;
}
- if (xenevtchn_notify(channel->xeh, channel->local_port) < 0) {
+ if (qemu_xen_evtchn_notify(channel->xeh, channel->local_port) < 0) {
error_setg_errno(errp, errno, "xenevtchn_notify failed");
}
}
@@ -1193,14 +920,14 @@ void xen_device_unbind_event_channel(XenDevice *xendev,
QLIST_REMOVE(channel, list);
- aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true,
+ aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), true,
NULL, NULL, NULL, NULL, NULL);
- if (xenevtchn_unbind(channel->xeh, channel->local_port) < 0) {
+ if (qemu_xen_evtchn_unbind(channel->xeh, channel->local_port) < 0) {
error_setg_errno(errp, errno, "xenevtchn_unbind failed");
}
- xenevtchn_close(channel->xeh);
+ qemu_xen_evtchn_close(channel->xeh);
g_free(channel);
}
@@ -1235,17 +962,12 @@ static void xen_device_unrealize(DeviceState *dev)
xen_device_backend_destroy(xendev);
if (xendev->xgth) {
- xengnttab_close(xendev->xgth);
+ qemu_xen_gnttab_close(xendev->xgth);
xendev->xgth = NULL;
}
- if (xendev->watch_list) {
- watch_list_destroy(xendev->watch_list);
- xendev->watch_list = NULL;
- }
-
if (xendev->xsh) {
- xs_close(xendev->xsh);
+ qemu_xen_xs_close(xendev->xsh);
xendev->xsh = NULL;
}
@@ -1290,23 +1012,18 @@ static void xen_device_realize(DeviceState *dev, Error **errp)
trace_xen_device_realize(type, xendev->name);
- xendev->xsh = xs_open(0);
+ xendev->xsh = qemu_xen_xs_open();
if (!xendev->xsh) {
error_setg_errno(errp, errno, "failed xs_open");
goto unrealize;
}
- xendev->watch_list = watch_list_create(xendev->xsh);
-
- xendev->xgth = xengnttab_open(NULL, 0);
+ xendev->xgth = qemu_xen_gnttab_open();
if (!xendev->xgth) {
error_setg_errno(errp, errno, "failed xengnttab_open");
goto unrealize;
}
- xendev->feature_grant_copy =
- (xengnttab_grant_copy(xendev->xgth, 0, NULL) == 0);
-
xen_device_backend_create(xendev, errp);
if (*errp) {
goto unrealize;
@@ -1317,13 +1034,6 @@ static void xen_device_realize(DeviceState *dev, Error **errp)
goto unrealize;
}
- if (xendev_class->realize) {
- xendev_class->realize(xendev, errp);
- if (*errp) {
- goto unrealize;
- }
- }
-
xen_device_backend_printf(xendev, "frontend", "%s",
xendev->frontend_path);
xen_device_backend_printf(xendev, "frontend-id", "%u",
@@ -1342,6 +1052,13 @@ static void xen_device_realize(DeviceState *dev, Error **errp)
xen_device_frontend_set_state(xendev, XenbusStateInitialising, true);
}
+ if (xendev_class->realize) {
+ xendev_class->realize(xendev, errp);
+ if (*errp) {
+ goto unrealize;
+ }
+ }
+
xendev->exit.notify = xen_device_exit;
qemu_add_exit_notifier(&xendev->exit);
return;
diff --git a/hw/xen/xen-legacy-backend.c b/hw/xen/xen-legacy-backend.c
index afba71f..4ded3ce 100644
--- a/hw/xen/xen-legacy-backend.c
+++ b/hw/xen/xen-legacy-backend.c
@@ -39,11 +39,10 @@ BusState *xen_sysbus;
/* ------------------------------------------------------------- */
/* public */
-struct xs_handle *xenstore;
+struct qemu_xs_handle *xenstore;
const char *xen_protocol;
/* private */
-static bool xen_feature_grant_copy;
static int debug;
int xenstore_write_be_str(struct XenLegacyDevice *xendev, const char *node,
@@ -113,7 +112,7 @@ void xen_be_set_max_grant_refs(struct XenLegacyDevice *xendev,
{
assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV);
- if (xengnttab_set_max_grants(xendev->gnttabdev, nr_refs)) {
+ if (qemu_xen_gnttab_set_max_grants(xendev->gnttabdev, nr_refs)) {
xen_pv_printf(xendev, 0, "xengnttab_set_max_grants failed: %s\n",
strerror(errno));
}
@@ -126,8 +125,8 @@ void *xen_be_map_grant_refs(struct XenLegacyDevice *xendev, uint32_t *refs,
assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV);
- ptr = xengnttab_map_domain_grant_refs(xendev->gnttabdev, nr_refs,
- xen_domid, refs, prot);
+ ptr = qemu_xen_gnttab_map_refs(xendev->gnttabdev, nr_refs, xen_domid, refs,
+ prot);
if (!ptr) {
xen_pv_printf(xendev, 0,
"xengnttab_map_domain_grant_refs failed: %s\n",
@@ -138,123 +137,31 @@ void *xen_be_map_grant_refs(struct XenLegacyDevice *xendev, uint32_t *refs,
}
void xen_be_unmap_grant_refs(struct XenLegacyDevice *xendev, void *ptr,
- unsigned int nr_refs)
+ uint32_t *refs, unsigned int nr_refs)
{
assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV);
- if (xengnttab_unmap(xendev->gnttabdev, ptr, nr_refs)) {
+ if (qemu_xen_gnttab_unmap(xendev->gnttabdev, ptr, refs, nr_refs)) {
xen_pv_printf(xendev, 0, "xengnttab_unmap failed: %s\n",
strerror(errno));
}
}
-static int compat_copy_grant_refs(struct XenLegacyDevice *xendev,
- bool to_domain,
- XenGrantCopySegment segs[],
- unsigned int nr_segs)
-{
- uint32_t *refs = g_new(uint32_t, nr_segs);
- int prot = to_domain ? PROT_WRITE : PROT_READ;
- void *pages;
- unsigned int i;
-
- for (i = 0; i < nr_segs; i++) {
- XenGrantCopySegment *seg = &segs[i];
-
- refs[i] = to_domain ?
- seg->dest.foreign.ref : seg->source.foreign.ref;
- }
-
- pages = xengnttab_map_domain_grant_refs(xendev->gnttabdev, nr_segs,
- xen_domid, refs, prot);
- if (!pages) {
- xen_pv_printf(xendev, 0,
- "xengnttab_map_domain_grant_refs failed: %s\n",
- strerror(errno));
- g_free(refs);
- return -1;
- }
-
- for (i = 0; i < nr_segs; i++) {
- XenGrantCopySegment *seg = &segs[i];
- void *page = pages + (i * XC_PAGE_SIZE);
-
- if (to_domain) {
- memcpy(page + seg->dest.foreign.offset, seg->source.virt,
- seg->len);
- } else {
- memcpy(seg->dest.virt, page + seg->source.foreign.offset,
- seg->len);
- }
- }
-
- if (xengnttab_unmap(xendev->gnttabdev, pages, nr_segs)) {
- xen_pv_printf(xendev, 0, "xengnttab_unmap failed: %s\n",
- strerror(errno));
- }
-
- g_free(refs);
- return 0;
-}
-
int xen_be_copy_grant_refs(struct XenLegacyDevice *xendev,
bool to_domain,
XenGrantCopySegment segs[],
unsigned int nr_segs)
{
- xengnttab_grant_copy_segment_t *xengnttab_segs;
- unsigned int i;
int rc;
assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV);
- if (!xen_feature_grant_copy) {
- return compat_copy_grant_refs(xendev, to_domain, segs, nr_segs);
- }
-
- xengnttab_segs = g_new0(xengnttab_grant_copy_segment_t, nr_segs);
-
- for (i = 0; i < nr_segs; i++) {
- XenGrantCopySegment *seg = &segs[i];
- xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i];
-
- if (to_domain) {
- xengnttab_seg->flags = GNTCOPY_dest_gref;
- xengnttab_seg->dest.foreign.domid = xen_domid;
- xengnttab_seg->dest.foreign.ref = seg->dest.foreign.ref;
- xengnttab_seg->dest.foreign.offset = seg->dest.foreign.offset;
- xengnttab_seg->source.virt = seg->source.virt;
- } else {
- xengnttab_seg->flags = GNTCOPY_source_gref;
- xengnttab_seg->source.foreign.domid = xen_domid;
- xengnttab_seg->source.foreign.ref = seg->source.foreign.ref;
- xengnttab_seg->source.foreign.offset =
- seg->source.foreign.offset;
- xengnttab_seg->dest.virt = seg->dest.virt;
- }
-
- xengnttab_seg->len = seg->len;
- }
-
- rc = xengnttab_grant_copy(xendev->gnttabdev, nr_segs, xengnttab_segs);
-
+ rc = qemu_xen_gnttab_grant_copy(xendev->gnttabdev, to_domain, xen_domid,
+ segs, nr_segs, NULL);
if (rc) {
- xen_pv_printf(xendev, 0, "xengnttab_copy failed: %s\n",
- strerror(errno));
- }
-
- for (i = 0; i < nr_segs; i++) {
- xengnttab_grant_copy_segment_t *xengnttab_seg =
- &xengnttab_segs[i];
-
- if (xengnttab_seg->status != GNTST_okay) {
- xen_pv_printf(xendev, 0, "segment[%u] status: %d\n", i,
- xengnttab_seg->status);
- rc = -1;
- }
+ xen_pv_printf(xendev, 0, "xengnttab_grant_copy failed: %s\n",
+ strerror(-rc));
}
-
- g_free(xengnttab_segs);
return rc;
}
@@ -294,13 +201,13 @@ static struct XenLegacyDevice *xen_be_get_xendev(const char *type, int dom,
xendev->debug = debug;
xendev->local_port = -1;
- xendev->evtchndev = xenevtchn_open(NULL, 0);
+ xendev->evtchndev = qemu_xen_evtchn_open();
if (xendev->evtchndev == NULL) {
xen_pv_printf(NULL, 0, "can't open evtchn device\n");
qdev_unplug(DEVICE(xendev), NULL);
return NULL;
}
- qemu_set_cloexec(xenevtchn_fd(xendev->evtchndev));
+ qemu_set_cloexec(qemu_xen_evtchn_fd(xendev->evtchndev));
xen_pv_insert_xendev(xendev);
@@ -367,6 +274,25 @@ static void xen_be_frontend_changed(struct XenLegacyDevice *xendev,
}
}
+static void xenstore_update_fe(void *opaque, const char *watch)
+{
+ struct XenLegacyDevice *xendev = opaque;
+ const char *node;
+ unsigned int len;
+
+ len = strlen(xendev->fe);
+ if (strncmp(xendev->fe, watch, len) != 0) {
+ return;
+ }
+ if (watch[len] != '/') {
+ return;
+ }
+ node = watch + len + 1;
+
+ xen_be_frontend_changed(xendev, node);
+ xen_be_check_state(xendev);
+}
+
/* ------------------------------------------------------------- */
/* Check for possible state transitions and perform them. */
@@ -380,7 +306,6 @@ static void xen_be_frontend_changed(struct XenLegacyDevice *xendev,
*/
static int xen_be_try_setup(struct XenLegacyDevice *xendev)
{
- char token[XEN_BUFSIZE];
int be_state;
if (xenstore_read_be_int(xendev, "state", &be_state) == -1) {
@@ -401,8 +326,9 @@ static int xen_be_try_setup(struct XenLegacyDevice *xendev)
}
/* setup frontend watch */
- snprintf(token, sizeof(token), "fe:%p", xendev);
- if (!xs_watch(xenstore, xendev->fe, token)) {
+ xendev->watch = qemu_xen_xs_watch(xenstore, xendev->fe, xenstore_update_fe,
+ xendev);
+ if (!xendev->watch) {
xen_pv_printf(xendev, 0, "watching frontend path (%s) failed\n",
xendev->fe);
return -1;
@@ -466,7 +392,7 @@ static int xen_be_try_initialise(struct XenLegacyDevice *xendev)
}
if (xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV) {
- xendev->gnttabdev = xengnttab_open(NULL, 0);
+ xendev->gnttabdev = qemu_xen_gnttab_open();
if (xendev->gnttabdev == NULL) {
xen_pv_printf(NULL, 0, "can't open gnttab device\n");
return -1;
@@ -524,7 +450,7 @@ static void xen_be_disconnect(struct XenLegacyDevice *xendev,
xendev->ops->disconnect(xendev);
}
if (xendev->gnttabdev) {
- xengnttab_close(xendev->gnttabdev);
+ qemu_xen_gnttab_close(xendev->gnttabdev);
xendev->gnttabdev = NULL;
}
if (xendev->be_state != state) {
@@ -591,46 +517,20 @@ void xen_be_check_state(struct XenLegacyDevice *xendev)
/* ------------------------------------------------------------- */
-static int xenstore_scan(const char *type, int dom, struct XenDevOps *ops)
-{
- struct XenLegacyDevice *xendev;
- char path[XEN_BUFSIZE], token[XEN_BUFSIZE];
- char **dev = NULL;
- unsigned int cdev, j;
-
- /* setup watch */
- snprintf(token, sizeof(token), "be:%p:%d:%p", type, dom, ops);
- snprintf(path, sizeof(path), "backend/%s/%d", type, dom);
- if (!xs_watch(xenstore, path, token)) {
- xen_pv_printf(NULL, 0, "xen be: watching backend path (%s) failed\n",
- path);
- return -1;
- }
-
- /* look for backends */
- dev = xs_directory(xenstore, 0, path, &cdev);
- if (!dev) {
- return 0;
- }
- for (j = 0; j < cdev; j++) {
- xendev = xen_be_get_xendev(type, dom, atoi(dev[j]), ops);
- if (xendev == NULL) {
- continue;
- }
- xen_be_check_state(xendev);
- }
- free(dev);
- return 0;
-}
+struct xenstore_be {
+ const char *type;
+ int dom;
+ struct XenDevOps *ops;
+};
-void xenstore_update_be(char *watch, char *type, int dom,
- struct XenDevOps *ops)
+static void xenstore_update_be(void *opaque, const char *watch)
{
+ struct xenstore_be *be = opaque;
struct XenLegacyDevice *xendev;
char path[XEN_BUFSIZE], *bepath;
unsigned int len, dev;
- len = snprintf(path, sizeof(path), "backend/%s/%d", type, dom);
+ len = snprintf(path, sizeof(path), "backend/%s/%d", be->type, be->dom);
if (strncmp(path, watch, len) != 0) {
return;
}
@@ -644,9 +544,9 @@ void xenstore_update_be(char *watch, char *type, int dom,
return;
}
- xendev = xen_be_get_xendev(type, dom, dev, ops);
+ xendev = xen_be_get_xendev(be->type, be->dom, dev, be->ops);
if (xendev != NULL) {
- bepath = xs_read(xenstore, 0, xendev->be, &len);
+ bepath = qemu_xen_xs_read(xenstore, 0, xendev->be, &len);
if (bepath == NULL) {
xen_pv_del_xendev(xendev);
} else {
@@ -657,23 +557,41 @@ void xenstore_update_be(char *watch, char *type, int dom,
}
}
-void xenstore_update_fe(char *watch, struct XenLegacyDevice *xendev)
+static int xenstore_scan(const char *type, int dom, struct XenDevOps *ops)
{
- char *node;
- unsigned int len;
+ struct XenLegacyDevice *xendev;
+ char path[XEN_BUFSIZE];
+ struct xenstore_be *be = g_new0(struct xenstore_be, 1);
+ char **dev = NULL;
+ unsigned int cdev, j;
- len = strlen(xendev->fe);
- if (strncmp(xendev->fe, watch, len) != 0) {
- return;
- }
- if (watch[len] != '/') {
- return;
+ /* setup watch */
+ be->type = type;
+ be->dom = dom;
+ be->ops = ops;
+ snprintf(path, sizeof(path), "backend/%s/%d", type, dom);
+ if (!qemu_xen_xs_watch(xenstore, path, xenstore_update_be, be)) {
+ xen_pv_printf(NULL, 0, "xen be: watching backend path (%s) failed\n",
+ path);
+ return -1;
}
- node = watch + len + 1;
- xen_be_frontend_changed(xendev, node);
- xen_be_check_state(xendev);
+ /* look for backends */
+ dev = qemu_xen_xs_directory(xenstore, 0, path, &cdev);
+ if (!dev) {
+ return 0;
+ }
+ for (j = 0; j < cdev; j++) {
+ xendev = xen_be_get_xendev(type, dom, atoi(dev[j]), ops);
+ if (xendev == NULL) {
+ continue;
+ }
+ xen_be_check_state(xendev);
+ }
+ free(dev);
+ return 0;
}
+
/* -------------------------------------------------------------------- */
static void xen_set_dynamic_sysbus(void)
@@ -687,29 +605,17 @@ static void xen_set_dynamic_sysbus(void)
void xen_be_init(void)
{
- xengnttab_handle *gnttabdev;
-
- xenstore = xs_daemon_open();
+ xenstore = qemu_xen_xs_open();
if (!xenstore) {
xen_pv_printf(NULL, 0, "can't connect to xenstored\n");
exit(1);
}
- qemu_set_fd_handler(xs_fileno(xenstore), xenstore_update, NULL, NULL);
-
- if (xen_xc == NULL || xen_fmem == NULL) {
+ if (xen_evtchn_ops == NULL || xen_gnttab_ops == NULL) {
xen_pv_printf(NULL, 0, "Xen operations not set up\n");
exit(1);
}
- gnttabdev = xengnttab_open(NULL, 0);
- if (gnttabdev != NULL) {
- if (xengnttab_grant_copy(gnttabdev, 0, NULL) == 0) {
- xen_feature_grant_copy = true;
- }
- xengnttab_close(gnttabdev);
- }
-
xen_sysdev = qdev_new(TYPE_XENSYSDEV);
sysbus_realize_and_unref(SYS_BUS_DEVICE(xen_sysdev), &error_fatal);
xen_sysbus = qbus_new(TYPE_XENSYSBUS, xen_sysdev, "xen-sysbus");
@@ -751,14 +657,14 @@ int xen_be_bind_evtchn(struct XenLegacyDevice *xendev)
if (xendev->local_port != -1) {
return 0;
}
- xendev->local_port = xenevtchn_bind_interdomain
+ xendev->local_port = qemu_xen_evtchn_bind_interdomain
(xendev->evtchndev, xendev->dom, xendev->remote_port);
if (xendev->local_port == -1) {
xen_pv_printf(xendev, 0, "xenevtchn_bind_interdomain failed\n");
return -1;
}
xen_pv_printf(xendev, 2, "bind evtchn port %d\n", xendev->local_port);
- qemu_set_fd_handler(xenevtchn_fd(xendev->evtchndev),
+ qemu_set_fd_handler(qemu_xen_evtchn_fd(xendev->evtchndev),
xen_pv_evtchn_event, NULL, xendev);
return 0;
}
diff --git a/hw/xen/xen-operations.c b/hw/xen/xen-operations.c
new file mode 100644
index 0000000..4b78fbf
--- /dev/null
+++ b/hw/xen/xen-operations.c
@@ -0,0 +1,478 @@
+/*
+ * QEMU Xen backend support: Operations for true Xen
+ *
+ * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * Authors: David Woodhouse <dwmw2@infradead.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/uuid.h"
+#include "qapi/error.h"
+
+#include "hw/xen/xen_native.h"
+#include "hw/xen/xen_backend_ops.h"
+
+/*
+ * If we have new enough libxenctrl then we do not want/need these compat
+ * interfaces, despite what the user supplied cflags might say. They
+ * must be undefined before including xenctrl.h
+ */
+#undef XC_WANT_COMPAT_EVTCHN_API
+#undef XC_WANT_COMPAT_GNTTAB_API
+#undef XC_WANT_COMPAT_MAP_FOREIGN_API
+
+#include <xenctrl.h>
+
+/*
+ * We don't support Xen prior to 4.2.0.
+ */
+
+/* Xen 4.2 through 4.6 */
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40701
+
+typedef xc_evtchn xenevtchn_handle;
+typedef evtchn_port_or_error_t xenevtchn_port_or_error_t;
+
+#define xenevtchn_open(l, f) xc_evtchn_open(l, f);
+#define xenevtchn_close(h) xc_evtchn_close(h)
+#define xenevtchn_fd(h) xc_evtchn_fd(h)
+#define xenevtchn_pending(h) xc_evtchn_pending(h)
+#define xenevtchn_notify(h, p) xc_evtchn_notify(h, p)
+#define xenevtchn_bind_interdomain(h, d, p) xc_evtchn_bind_interdomain(h, d, p)
+#define xenevtchn_unmask(h, p) xc_evtchn_unmask(h, p)
+#define xenevtchn_unbind(h, p) xc_evtchn_unbind(h, p)
+
+typedef xc_gnttab xengnttab_handle;
+
+#define xengnttab_open(l, f) xc_gnttab_open(l, f)
+#define xengnttab_close(h) xc_gnttab_close(h)
+#define xengnttab_set_max_grants(h, n) xc_gnttab_set_max_grants(h, n)
+#define xengnttab_map_grant_ref(h, d, r, p) xc_gnttab_map_grant_ref(h, d, r, p)
+#define xengnttab_unmap(h, a, n) xc_gnttab_munmap(h, a, n)
+#define xengnttab_map_grant_refs(h, c, d, r, p) \
+ xc_gnttab_map_grant_refs(h, c, d, r, p)
+#define xengnttab_map_domain_grant_refs(h, c, d, r, p) \
+ xc_gnttab_map_domain_grant_refs(h, c, d, r, p)
+
+typedef xc_interface xenforeignmemory_handle;
+
+#else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40701 */
+
+#include <xenevtchn.h>
+#include <xengnttab.h>
+#include <xenforeignmemory.h>
+
+#endif
+
+/* Xen before 4.8 */
+
+static int libxengnttab_fallback_grant_copy(xengnttab_handle *xgt,
+ bool to_domain, uint32_t domid,
+ XenGrantCopySegment segs[],
+ unsigned int nr_segs, Error **errp)
+{
+ uint32_t *refs = g_new(uint32_t, nr_segs);
+ int prot = to_domain ? PROT_WRITE : PROT_READ;
+ void *map;
+ unsigned int i;
+ int rc = 0;
+
+ for (i = 0; i < nr_segs; i++) {
+ XenGrantCopySegment *seg = &segs[i];
+
+ refs[i] = to_domain ? seg->dest.foreign.ref :
+ seg->source.foreign.ref;
+ }
+ map = xengnttab_map_domain_grant_refs(xgt, nr_segs, domid, refs, prot);
+ if (!map) {
+ if (errp) {
+ error_setg_errno(errp, errno,
+ "xengnttab_map_domain_grant_refs failed");
+ }
+ rc = -errno;
+ goto done;
+ }
+
+ for (i = 0; i < nr_segs; i++) {
+ XenGrantCopySegment *seg = &segs[i];
+ void *page = map + (i * XEN_PAGE_SIZE);
+
+ if (to_domain) {
+ memcpy(page + seg->dest.foreign.offset, seg->source.virt,
+ seg->len);
+ } else {
+ memcpy(seg->dest.virt, page + seg->source.foreign.offset,
+ seg->len);
+ }
+ }
+
+ if (xengnttab_unmap(xgt, map, nr_segs)) {
+ if (errp) {
+ error_setg_errno(errp, errno, "xengnttab_unmap failed");
+ }
+ rc = -errno;
+ }
+
+done:
+ g_free(refs);
+ return rc;
+}
+
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40800
+
+static int libxengnttab_backend_grant_copy(xengnttab_handle *xgt,
+ bool to_domain, uint32_t domid,
+ XenGrantCopySegment *segs,
+ uint32_t nr_segs, Error **errp)
+{
+ xengnttab_grant_copy_segment_t *xengnttab_segs;
+ unsigned int i;
+ int rc;
+
+ xengnttab_segs = g_new0(xengnttab_grant_copy_segment_t, nr_segs);
+
+ for (i = 0; i < nr_segs; i++) {
+ XenGrantCopySegment *seg = &segs[i];
+ xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i];
+
+ if (to_domain) {
+ xengnttab_seg->flags = GNTCOPY_dest_gref;
+ xengnttab_seg->dest.foreign.domid = domid;
+ xengnttab_seg->dest.foreign.ref = seg->dest.foreign.ref;
+ xengnttab_seg->dest.foreign.offset = seg->dest.foreign.offset;
+ xengnttab_seg->source.virt = seg->source.virt;
+ } else {
+ xengnttab_seg->flags = GNTCOPY_source_gref;
+ xengnttab_seg->source.foreign.domid = domid;
+ xengnttab_seg->source.foreign.ref = seg->source.foreign.ref;
+ xengnttab_seg->source.foreign.offset =
+ seg->source.foreign.offset;
+ xengnttab_seg->dest.virt = seg->dest.virt;
+ }
+
+ xengnttab_seg->len = seg->len;
+ }
+
+ if (xengnttab_grant_copy(xgt, nr_segs, xengnttab_segs)) {
+ if (errp) {
+ error_setg_errno(errp, errno, "xengnttab_grant_copy failed");
+ }
+ rc = -errno;
+ goto done;
+ }
+
+ rc = 0;
+ for (i = 0; i < nr_segs; i++) {
+ xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i];
+
+ if (xengnttab_seg->status != GNTST_okay) {
+ if (errp) {
+ error_setg(errp, "xengnttab_grant_copy seg[%u] failed", i);
+ }
+ rc = -EIO;
+ break;
+ }
+ }
+
+done:
+ g_free(xengnttab_segs);
+ return rc;
+}
+#endif
+
+static xenevtchn_handle *libxenevtchn_backend_open(void)
+{
+ return xenevtchn_open(NULL, 0);
+}
+
+struct evtchn_backend_ops libxenevtchn_backend_ops = {
+ .open = libxenevtchn_backend_open,
+ .close = xenevtchn_close,
+ .bind_interdomain = xenevtchn_bind_interdomain,
+ .unbind = xenevtchn_unbind,
+ .get_fd = xenevtchn_fd,
+ .notify = xenevtchn_notify,
+ .unmask = xenevtchn_unmask,
+ .pending = xenevtchn_pending,
+};
+
+static xengnttab_handle *libxengnttab_backend_open(void)
+{
+ return xengnttab_open(NULL, 0);
+}
+
+static int libxengnttab_backend_unmap(xengnttab_handle *xgt,
+ void *start_address, uint32_t *refs,
+ uint32_t count)
+{
+ return xengnttab_unmap(xgt, start_address, count);
+}
+
+
+static struct gnttab_backend_ops libxengnttab_backend_ops = {
+ .features = XEN_GNTTAB_OP_FEATURE_MAP_MULTIPLE,
+ .open = libxengnttab_backend_open,
+ .close = xengnttab_close,
+ .grant_copy = libxengnttab_fallback_grant_copy,
+ .set_max_grants = xengnttab_set_max_grants,
+ .map_refs = xengnttab_map_domain_grant_refs,
+ .unmap = libxengnttab_backend_unmap,
+};
+
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40701
+
+static void *libxenforeignmem_backend_map(uint32_t dom, void *addr, int prot,
+ size_t pages, xfn_pfn_t *pfns,
+ int *errs)
+{
+ if (errs) {
+ return xc_map_foreign_bulk(xen_xc, dom, prot, pfns, errs, pages);
+ } else {
+ return xc_map_foreign_pages(xen_xc, dom, prot, pfns, pages);
+ }
+}
+
+static int libxenforeignmem_backend_unmap(void *addr, size_t pages)
+{
+ return munmap(addr, pages * XC_PAGE_SIZE);
+}
+
+#else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40701 */
+
+static void *libxenforeignmem_backend_map(uint32_t dom, void *addr, int prot,
+ size_t pages, xen_pfn_t *pfns,
+ int *errs)
+{
+ return xenforeignmemory_map2(xen_fmem, dom, addr, prot, 0, pages, pfns,
+ errs);
+}
+
+static int libxenforeignmem_backend_unmap(void *addr, size_t pages)
+{
+ return xenforeignmemory_unmap(xen_fmem, addr, pages);
+}
+
+#endif
+
+struct foreignmem_backend_ops libxenforeignmem_backend_ops = {
+ .map = libxenforeignmem_backend_map,
+ .unmap = libxenforeignmem_backend_unmap,
+};
+
+struct qemu_xs_handle {
+ struct xs_handle *xsh;
+ NotifierList notifiers;
+};
+
+static void watch_event(void *opaque)
+{
+ struct qemu_xs_handle *h = opaque;
+
+ for (;;) {
+ char **v = xs_check_watch(h->xsh);
+
+ if (!v) {
+ break;
+ }
+
+ notifier_list_notify(&h->notifiers, v);
+ free(v);
+ }
+}
+
+static struct qemu_xs_handle *libxenstore_open(void)
+{
+ struct xs_handle *xsh = xs_open(0);
+ struct qemu_xs_handle *h = g_new0(struct qemu_xs_handle, 1);
+
+ if (!xsh) {
+ return NULL;
+ }
+
+ h = g_new0(struct qemu_xs_handle, 1);
+ h->xsh = xsh;
+
+ notifier_list_init(&h->notifiers);
+ qemu_set_fd_handler(xs_fileno(h->xsh), watch_event, NULL, h);
+
+ return h;
+}
+
+static void libxenstore_close(struct qemu_xs_handle *h)
+{
+ g_assert(notifier_list_empty(&h->notifiers));
+ qemu_set_fd_handler(xs_fileno(h->xsh), NULL, NULL, NULL);
+ xs_close(h->xsh);
+ g_free(h);
+}
+
+static char *libxenstore_get_domain_path(struct qemu_xs_handle *h,
+ unsigned int domid)
+{
+ return xs_get_domain_path(h->xsh, domid);
+}
+
+static char **libxenstore_directory(struct qemu_xs_handle *h,
+ xs_transaction_t t, const char *path,
+ unsigned int *num)
+{
+ return xs_directory(h->xsh, t, path, num);
+}
+
+static void *libxenstore_read(struct qemu_xs_handle *h, xs_transaction_t t,
+ const char *path, unsigned int *len)
+{
+ return xs_read(h->xsh, t, path, len);
+}
+
+static bool libxenstore_write(struct qemu_xs_handle *h, xs_transaction_t t,
+ const char *path, const void *data,
+ unsigned int len)
+{
+ return xs_write(h->xsh, t, path, data, len);
+}
+
+static bool libxenstore_create(struct qemu_xs_handle *h, xs_transaction_t t,
+ unsigned int owner, unsigned int domid,
+ unsigned int perms, const char *path)
+{
+ struct xs_permissions perms_list[] = {
+ {
+ .id = owner,
+ .perms = XS_PERM_NONE,
+ },
+ {
+ .id = domid,
+ .perms = perms,
+ },
+ };
+
+ if (!xs_mkdir(h->xsh, t, path)) {
+ return false;
+ }
+
+ return xs_set_permissions(h->xsh, t, path, perms_list,
+ ARRAY_SIZE(perms_list));
+}
+
+static bool libxenstore_destroy(struct qemu_xs_handle *h, xs_transaction_t t,
+ const char *path)
+{
+ return xs_rm(h->xsh, t, path);
+}
+
+struct qemu_xs_watch {
+ char *path;
+ char *token;
+ xs_watch_fn fn;
+ void *opaque;
+ Notifier notifier;
+};
+
+static void watch_notify(Notifier *n, void *data)
+{
+ struct qemu_xs_watch *w = container_of(n, struct qemu_xs_watch, notifier);
+ const char **v = data;
+
+ if (!strcmp(w->token, v[XS_WATCH_TOKEN])) {
+ w->fn(w->opaque, v[XS_WATCH_PATH]);
+ }
+}
+
+static struct qemu_xs_watch *new_watch(const char *path, xs_watch_fn fn,
+ void *opaque)
+{
+ struct qemu_xs_watch *w = g_new0(struct qemu_xs_watch, 1);
+ QemuUUID uuid;
+
+ qemu_uuid_generate(&uuid);
+
+ w->token = qemu_uuid_unparse_strdup(&uuid);
+ w->path = g_strdup(path);
+ w->fn = fn;
+ w->opaque = opaque;
+ w->notifier.notify = watch_notify;
+
+ return w;
+}
+
+static void free_watch(struct qemu_xs_watch *w)
+{
+ g_free(w->token);
+ g_free(w->path);
+
+ g_free(w);
+}
+
+static struct qemu_xs_watch *libxenstore_watch(struct qemu_xs_handle *h,
+ const char *path, xs_watch_fn fn,
+ void *opaque)
+{
+ struct qemu_xs_watch *w = new_watch(path, fn, opaque);
+
+ notifier_list_add(&h->notifiers, &w->notifier);
+
+ if (!xs_watch(h->xsh, path, w->token)) {
+ notifier_remove(&w->notifier);
+ free_watch(w);
+ return NULL;
+ }
+
+ return w;
+}
+
+static void libxenstore_unwatch(struct qemu_xs_handle *h,
+ struct qemu_xs_watch *w)
+{
+ xs_unwatch(h->xsh, w->path, w->token);
+ notifier_remove(&w->notifier);
+ free_watch(w);
+}
+
+static xs_transaction_t libxenstore_transaction_start(struct qemu_xs_handle *h)
+{
+ return xs_transaction_start(h->xsh);
+}
+
+static bool libxenstore_transaction_end(struct qemu_xs_handle *h,
+ xs_transaction_t t, bool abort)
+{
+ return xs_transaction_end(h->xsh, t, abort);
+}
+
+struct xenstore_backend_ops libxenstore_backend_ops = {
+ .open = libxenstore_open,
+ .close = libxenstore_close,
+ .get_domain_path = libxenstore_get_domain_path,
+ .directory = libxenstore_directory,
+ .read = libxenstore_read,
+ .write = libxenstore_write,
+ .create = libxenstore_create,
+ .destroy = libxenstore_destroy,
+ .watch = libxenstore_watch,
+ .unwatch = libxenstore_unwatch,
+ .transaction_start = libxenstore_transaction_start,
+ .transaction_end = libxenstore_transaction_end,
+};
+
+void setup_xen_backend_ops(void)
+{
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40800
+ xengnttab_handle *xgt = xengnttab_open(NULL, 0);
+
+ if (xgt) {
+ if (xengnttab_grant_copy(xgt, 0, NULL) == 0) {
+ libxengnttab_backend_ops.grant_copy = libxengnttab_backend_grant_copy;
+ }
+ xengnttab_close(xgt);
+ }
+#endif
+ xen_evtchn_ops = &libxenevtchn_backend_ops;
+ xen_gnttab_ops = &libxengnttab_backend_ops;
+ xen_foreignmem_ops = &libxenforeignmem_backend_ops;
+ xen_xenstore_ops = &libxenstore_backend_ops;
+}
diff --git a/hw/xen/xen_devconfig.c b/hw/xen/xen_devconfig.c
index 46ee4a7..9b7304e 100644
--- a/hw/xen/xen_devconfig.c
+++ b/hw/xen/xen_devconfig.c
@@ -11,11 +11,11 @@ static int xen_config_dev_dirs(const char *ftype, const char *btype, int vdev,
{
char *dom;
- dom = xs_get_domain_path(xenstore, xen_domid);
+ dom = qemu_xen_xs_get_domain_path(xenstore, xen_domid);
snprintf(fe, len, "%s/device/%s/%d", dom, ftype, vdev);
free(dom);
- dom = xs_get_domain_path(xenstore, 0);
+ dom = qemu_xen_xs_get_domain_path(xenstore, 0);
snprintf(be, len, "%s/backend/%s/%d/%d", dom, btype, xen_domid, vdev);
free(dom);
diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
index 85c93cf..2d33d17 100644
--- a/hw/xen/xen_pt.c
+++ b/hw/xen/xen_pt.c
@@ -60,9 +60,9 @@
#include "hw/pci/pci_bus.h"
#include "hw/qdev-properties.h"
#include "hw/qdev-properties-system.h"
+#include "xen_pt.h"
#include "hw/xen/xen.h"
#include "hw/xen/xen-legacy-backend.h"
-#include "xen_pt.h"
#include "qemu/range.h"
static bool has_igd_gfx_passthru;
diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h
index e184699..b20744f 100644
--- a/hw/xen/xen_pt.h
+++ b/hw/xen/xen_pt.h
@@ -1,7 +1,7 @@
#ifndef XEN_PT_H
#define XEN_PT_H
-#include "hw/xen/xen_common.h"
+#include "hw/xen/xen_native.h"
#include "xen-host-pci-device.h"
#include "qom/object.h"
diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c
index 8b9b554..2b8680b 100644
--- a/hw/xen/xen_pt_config_init.c
+++ b/hw/xen/xen_pt_config_init.c
@@ -15,8 +15,8 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu/timer.h"
-#include "hw/xen/xen-legacy-backend.h"
#include "xen_pt.h"
+#include "hw/xen/xen-legacy-backend.h"
#define XEN_PT_MERGE_VALUE(value, data, val_mask) \
(((value) & (val_mask)) | ((data) & ~(val_mask)))
diff --git a/hw/xen/xen_pt_graphics.c b/hw/xen/xen_pt_graphics.c
index f303f67..0aed3bb 100644
--- a/hw/xen/xen_pt_graphics.c
+++ b/hw/xen/xen_pt_graphics.c
@@ -5,7 +5,6 @@
#include "qapi/error.h"
#include "xen_pt.h"
#include "xen-host-pci-device.h"
-#include "hw/xen/xen-legacy-backend.h"
static unsigned long igd_guest_opregion;
static unsigned long igd_host_opregion;
diff --git a/hw/xen/xen_pt_msi.c b/hw/xen/xen_pt_msi.c
index b71563f..09cca4e 100644
--- a/hw/xen/xen_pt_msi.c
+++ b/hw/xen/xen_pt_msi.c
@@ -11,9 +11,9 @@
#include "qemu/osdep.h"
-#include "hw/xen/xen-legacy-backend.h"
-#include "xen_pt.h"
#include "hw/i386/apic-msidef.h"
+#include "xen_pt.h"
+#include "hw/xen/xen-legacy-backend.h"
#define XEN_PT_AUTO_ASSIGN -1
diff --git a/hw/xen/xen_pvdev.c b/hw/xen/xen_pvdev.c
index 1a5177b..be1504b 100644
--- a/hw/xen/xen_pvdev.c
+++ b/hw/xen/xen_pvdev.c
@@ -54,31 +54,17 @@ void xen_config_cleanup(void)
struct xs_dirs *d;
QTAILQ_FOREACH(d, &xs_cleanup, list) {
- xs_rm(xenstore, 0, d->xs_dir);
+ qemu_xen_xs_destroy(xenstore, 0, d->xs_dir);
}
}
int xenstore_mkdir(char *path, int p)
{
- struct xs_permissions perms[2] = {
- {
- .id = 0, /* set owner: dom0 */
- }, {
- .id = xen_domid,
- .perms = p,
- }
- };
-
- if (!xs_mkdir(xenstore, 0, path)) {
+ if (!qemu_xen_xs_create(xenstore, 0, 0, xen_domid, p, path)) {
xen_pv_printf(NULL, 0, "xs_mkdir %s: failed\n", path);
return -1;
}
xenstore_cleanup_dir(g_strdup(path));
-
- if (!xs_set_permissions(xenstore, 0, path, perms, 2)) {
- xen_pv_printf(NULL, 0, "xs_set_permissions %s: failed\n", path);
- return -1;
- }
return 0;
}
@@ -87,7 +73,7 @@ int xenstore_write_str(const char *base, const char *node, const char *val)
char abspath[XEN_BUFSIZE];
snprintf(abspath, sizeof(abspath), "%s/%s", base, node);
- if (!xs_write(xenstore, 0, abspath, val, strlen(val))) {
+ if (!qemu_xen_xs_write(xenstore, 0, abspath, val, strlen(val))) {
return -1;
}
return 0;
@@ -100,7 +86,7 @@ char *xenstore_read_str(const char *base, const char *node)
char *str, *ret = NULL;
snprintf(abspath, sizeof(abspath), "%s/%s", base, node);
- str = xs_read(xenstore, 0, abspath, &len);
+ str = qemu_xen_xs_read(xenstore, 0, abspath, &len);
if (str != NULL) {
/* move to qemu-allocated memory to make sure
* callers can savely g_free() stuff. */
@@ -152,29 +138,6 @@ int xenstore_read_uint64(const char *base, const char *node, uint64_t *uval)
return rc;
}
-void xenstore_update(void *unused)
-{
- char **vec = NULL;
- intptr_t type, ops, ptr;
- unsigned int dom, count;
-
- vec = xs_read_watch(xenstore, &count);
- if (vec == NULL) {
- goto cleanup;
- }
-
- if (sscanf(vec[XS_WATCH_TOKEN], "be:%" PRIxPTR ":%d:%" PRIxPTR,
- &type, &dom, &ops) == 3) {
- xenstore_update_be(vec[XS_WATCH_PATH], (void *)type, dom, (void*)ops);
- }
- if (sscanf(vec[XS_WATCH_TOKEN], "fe:%" PRIxPTR, &ptr) == 1) {
- xenstore_update_fe(vec[XS_WATCH_PATH], (void *)ptr);
- }
-
-cleanup:
- free(vec);
-}
-
const char *xenbus_strstate(enum xenbus_state state)
{
static const char *const name[] = {
@@ -238,14 +201,14 @@ void xen_pv_evtchn_event(void *opaque)
struct XenLegacyDevice *xendev = opaque;
evtchn_port_t port;
- port = xenevtchn_pending(xendev->evtchndev);
+ port = qemu_xen_evtchn_pending(xendev->evtchndev);
if (port != xendev->local_port) {
xen_pv_printf(xendev, 0,
"xenevtchn_pending returned %d (expected %d)\n",
port, xendev->local_port);
return;
}
- xenevtchn_unmask(xendev->evtchndev, port);
+ qemu_xen_evtchn_unmask(xendev->evtchndev, port);
if (xendev->ops->event) {
xendev->ops->event(xendev);
@@ -257,15 +220,15 @@ void xen_pv_unbind_evtchn(struct XenLegacyDevice *xendev)
if (xendev->local_port == -1) {
return;
}
- qemu_set_fd_handler(xenevtchn_fd(xendev->evtchndev), NULL, NULL, NULL);
- xenevtchn_unbind(xendev->evtchndev, xendev->local_port);
+ qemu_set_fd_handler(qemu_xen_evtchn_fd(xendev->evtchndev), NULL, NULL, NULL);
+ qemu_xen_evtchn_unbind(xendev->evtchndev, xendev->local_port);
xen_pv_printf(xendev, 2, "unbind evtchn port %d\n", xendev->local_port);
xendev->local_port = -1;
}
int xen_pv_send_notify(struct XenLegacyDevice *xendev)
{
- return xenevtchn_notify(xendev->evtchndev, xendev->local_port);
+ return qemu_xen_evtchn_notify(xendev->evtchndev, xendev->local_port);
}
/* ------------------------------------------------------------- */
@@ -299,17 +262,15 @@ void xen_pv_del_xendev(struct XenLegacyDevice *xendev)
}
if (xendev->fe) {
- char token[XEN_BUFSIZE];
- snprintf(token, sizeof(token), "fe:%p", xendev);
- xs_unwatch(xenstore, xendev->fe, token);
+ qemu_xen_xs_unwatch(xenstore, xendev->watch);
g_free(xendev->fe);
}
if (xendev->evtchndev != NULL) {
- xenevtchn_close(xendev->evtchndev);
+ qemu_xen_evtchn_close(xendev->evtchndev);
}
if (xendev->gnttabdev != NULL) {
- xengnttab_close(xendev->gnttabdev);
+ qemu_xen_gnttab_close(xendev->gnttabdev);
}
QTAILQ_REMOVE(&xendevs, xendev, next);