diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2020-06-19 11:44:03 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2020-06-19 11:44:03 +0100 |
commit | 4d285821c5055ed68a6f6b7693fd11a06a1aa426 (patch) | |
tree | 66daa6b31cad2cb3d26204f6e60ca3d1ad0b37eb | |
parent | 292ef18a38270e1cb8e9a3dc06bca589068f293d (diff) | |
parent | 458e056257e67254546e58158f3f74ce040c7ca1 (diff) | |
download | qemu-4d285821c5055ed68a6f6b7693fd11a06a1aa426.zip qemu-4d285821c5055ed68a6f6b7693fd11a06a1aa426.tar.gz qemu-4d285821c5055ed68a6f6b7693fd11a06a1aa426.tar.bz2 |
Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20200618' into staging
s390x update:
- update Linux headers to 5.8-rc1 (for vfio-ccw path handling)
- vfio-ccw: add support for path handling
- documentation fix
# gpg: Signature made Thu 18 Jun 2020 16:36:04 BST
# gpg: using RSA key C3D0D66DC3624FF6A8C018CEDECF6B93C6F02FAF
# gpg: issuer "cohuck@redhat.com"
# gpg: Good signature from "Cornelia Huck <conny@cornelia-huck.de>" [marginal]
# gpg: aka "Cornelia Huck <huckc@linux.vnet.ibm.com>" [full]
# gpg: aka "Cornelia Huck <cornelia.huck@de.ibm.com>" [full]
# gpg: aka "Cornelia Huck <cohuck@kernel.org>" [marginal]
# gpg: aka "Cornelia Huck <cohuck@redhat.com>" [marginal]
# Primary key fingerprint: C3D0 D66D C362 4FF6 A8C0 18CE DECF 6B93 C6F0 2FAF
* remotes/cohuck/tags/s390x-20200618:
docs/s390x: fix vfio-ap device_del description
vfio-ccw: Add support for the CRW region and IRQ
s390x/css: Refactor the css_queue_crw() routine
vfio-ccw: Refactor ccw irq handler
vfio-ccw: Add support for the schib region
vfio-ccw: Refactor cleanup of regions
Linux headers: update
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
32 files changed, 1075 insertions, 78 deletions
diff --git a/docs/system/s390x/vfio-ap.rst b/docs/system/s390x/vfio-ap.rst index 3cd8417..084ba9c 100644 --- a/docs/system/s390x/vfio-ap.rst +++ b/docs/system/s390x/vfio-ap.rst @@ -606,10 +606,11 @@ action. To hot plug a vfio-ap device, use the QEMU ``device_add`` command:: - (qemu) device_add vfio-ap,sysfsdev="$path-to-mdev" + (qemu) device_add vfio-ap,sysfsdev="$path-to-mdev",id="$id" Where the ``$path-to-mdev`` value specifies the absolute path to a mediated device to which AP resources to be used by the guest have been assigned. +``$id`` is the name value for the optional id parameter. Note that on Linux guests, the AP devices will be created in the ``/sys/bus/ap/devices`` directory when the AP bus subsequently performs its periodic @@ -632,10 +633,9 @@ or a prior hot plug action. To hot unplug a vfio-ap device, use the QEMU ``device_del`` command:: - (qemu) device_del vfio-ap,sysfsdev="$path-to-mdev" + (qemu) device_del "$id" -Where ``$path-to-mdev`` is the same as the path specified when the vfio-ap -device was attached to the virtual machine's ap-bus. +Where ``$id`` is the same id that was specified at device creation. On a Linux guest, the AP devices will be removed from the ``/sys/bus/ap/devices`` directory on the guest when the AP bus subsequently performs its periodic scan, diff --git a/hw/s390x/css.c b/hw/s390x/css.c index 5d8e086..d1e365e 100644 --- a/hw/s390x/css.c +++ b/hw/s390x/css.c @@ -1335,11 +1335,20 @@ static void copy_schib_to_guest(SCHIB *dest, const SCHIB *src) } } -int css_do_stsch(SubchDev *sch, SCHIB *schib) +IOInstEnding css_do_stsch(SubchDev *sch, SCHIB *schib) { + int ret; + + /* + * For some subchannels, we may want to update parts of + * the schib (e.g., update path masks from the host device + * for passthrough subchannels). + */ + ret = s390_ccw_store(sch); + /* Use current status. */ copy_schib_to_guest(schib, &sch->curr_status); - return 0; + return ret; } static void copy_pmcw_from_guest(PMCW *dest, const PMCW *src) @@ -2161,30 +2170,23 @@ void css_subch_assign(uint8_t cssid, uint8_t ssid, uint16_t schid, } } -void css_queue_crw(uint8_t rsc, uint8_t erc, int solicited, - int chain, uint16_t rsid) +void css_crw_add_to_queue(CRW crw) { CrwContainer *crw_cont; - trace_css_crw(rsc, erc, rsid, chain ? "(chained)" : ""); + trace_css_crw((crw.flags & CRW_FLAGS_MASK_RSC) >> 8, + crw.flags & CRW_FLAGS_MASK_ERC, + crw.rsid, + (crw.flags & CRW_FLAGS_MASK_C) ? "(chained)" : ""); + /* TODO: Maybe use a static crw pool? */ crw_cont = g_try_new0(CrwContainer, 1); if (!crw_cont) { channel_subsys.crws_lost = true; return; } - crw_cont->crw.flags = (rsc << 8) | erc; - if (solicited) { - crw_cont->crw.flags |= CRW_FLAGS_MASK_S; - } - if (chain) { - crw_cont->crw.flags |= CRW_FLAGS_MASK_C; - } - crw_cont->crw.rsid = rsid; - if (channel_subsys.crws_lost) { - crw_cont->crw.flags |= CRW_FLAGS_MASK_R; - channel_subsys.crws_lost = false; - } + + crw_cont->crw = crw; QTAILQ_INSERT_TAIL(&channel_subsys.pending_crws, crw_cont, sibling); @@ -2195,6 +2197,27 @@ void css_queue_crw(uint8_t rsc, uint8_t erc, int solicited, } } +void css_queue_crw(uint8_t rsc, uint8_t erc, int solicited, + int chain, uint16_t rsid) +{ + CRW crw; + + crw.flags = (rsc << 8) | erc; + if (solicited) { + crw.flags |= CRW_FLAGS_MASK_S; + } + if (chain) { + crw.flags |= CRW_FLAGS_MASK_C; + } + crw.rsid = rsid; + if (channel_subsys.crws_lost) { + crw.flags |= CRW_FLAGS_MASK_R; + channel_subsys.crws_lost = false; + } + + css_crw_add_to_queue(crw); +} + void css_generate_sch_crws(uint8_t cssid, uint8_t ssid, uint16_t schid, int hotplugged, int add) { diff --git a/hw/s390x/s390-ccw.c b/hw/s390x/s390-ccw.c index c48510f..b497571 100644 --- a/hw/s390x/s390-ccw.c +++ b/hw/s390x/s390-ccw.c @@ -51,6 +51,27 @@ int s390_ccw_clear(SubchDev *sch) return cdc->handle_clear(sch); } +IOInstEnding s390_ccw_store(SubchDev *sch) +{ + S390CCWDeviceClass *cdc = NULL; + int ret = IOINST_CC_EXPECTED; + + /* + * This code is called for both virtual and passthrough devices, + * but only applies to to the latter. This ugly check makes that + * distinction for us. + */ + if (object_dynamic_cast(OBJECT(sch->driver_data), TYPE_S390_CCW)) { + cdc = S390_CCW_DEVICE_GET_CLASS(sch->driver_data); + } + + if (cdc && cdc->handle_store) { + ret = cdc->handle_store(sch); + } + + return ret; +} + static void s390_ccw_get_dev_info(S390CCWDevice *cdev, char *sysfsdev, Error **errp) diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c index 6340618..06e69d7 100644 --- a/hw/vfio/ccw.c +++ b/hw/vfio/ccw.c @@ -41,7 +41,14 @@ struct VFIOCCWDevice { uint64_t async_cmd_region_size; uint64_t async_cmd_region_offset; struct ccw_cmd_region *async_cmd_region; + uint64_t schib_region_size; + uint64_t schib_region_offset; + struct ccw_schib_region *schib_region; + uint64_t crw_region_size; + uint64_t crw_region_offset; + struct ccw_crw_region *crw_region; EventNotifier io_notifier; + EventNotifier crw_notifier; bool force_orb_pfch; bool warned_orb_pfch; }; @@ -116,6 +123,51 @@ again: } } +static IOInstEnding vfio_ccw_handle_store(SubchDev *sch) +{ + S390CCWDevice *cdev = sch->driver_data; + VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev); + SCHIB *schib = &sch->curr_status; + struct ccw_schib_region *region = vcdev->schib_region; + SCHIB *s; + int ret; + + /* schib region not available so nothing else to do */ + if (!region) { + return IOINST_CC_EXPECTED; + } + + memset(region, 0, sizeof(*region)); + ret = pread(vcdev->vdev.fd, region, vcdev->schib_region_size, + vcdev->schib_region_offset); + + if (ret == -1) { + /* + * Device is probably damaged, but store subchannel does not + * have a nonzero cc defined for this scenario. Log an error, + * and presume things are otherwise fine. + */ + error_report("vfio-ccw: store region read failed with errno=%d", errno); + return IOINST_CC_EXPECTED; + } + + /* + * Selectively copy path-related bits of the SCHIB, + * rather than copying the entire struct. + */ + s = (SCHIB *)region->schib_area; + schib->pmcw.pnom = s->pmcw.pnom; + schib->pmcw.lpum = s->pmcw.lpum; + schib->pmcw.pam = s->pmcw.pam; + schib->pmcw.pom = s->pmcw.pom; + + if (s->scsw.flags & SCSW_FLAGS_MASK_PNO) { + schib->scsw.flags |= SCSW_FLAGS_MASK_PNO; + } + + return IOINST_CC_EXPECTED; +} + static int vfio_ccw_handle_clear(SubchDev *sch) { S390CCWDevice *cdev = sch->driver_data; @@ -206,6 +258,44 @@ static void vfio_ccw_reset(DeviceState *dev) ioctl(vcdev->vdev.fd, VFIO_DEVICE_RESET); } +static void vfio_ccw_crw_read(VFIOCCWDevice *vcdev) +{ + struct ccw_crw_region *region = vcdev->crw_region; + CRW crw; + int size; + + /* Keep reading CRWs as long as data is returned */ + do { + memset(region, 0, sizeof(*region)); + size = pread(vcdev->vdev.fd, region, vcdev->crw_region_size, + vcdev->crw_region_offset); + + if (size == -1) { + error_report("vfio-ccw: Read crw region failed with errno=%d", + errno); + break; + } + + if (region->crw == 0) { + /* No more CRWs to queue */ + break; + } + + memcpy(&crw, ®ion->crw, sizeof(CRW)); + + css_crw_add_to_queue(crw); + } while (1); +} + +static void vfio_ccw_crw_notifier_handler(void *opaque) +{ + VFIOCCWDevice *vcdev = opaque; + + while (event_notifier_test_and_clear(&vcdev->crw_notifier)) { + vfio_ccw_crw_read(vcdev); + } +} + static void vfio_ccw_io_notifier_handler(void *opaque) { VFIOCCWDevice *vcdev = opaque; @@ -276,22 +366,40 @@ read_err: css_inject_io_interrupt(sch); } -static void vfio_ccw_register_io_notifier(VFIOCCWDevice *vcdev, Error **errp) +static void vfio_ccw_register_irq_notifier(VFIOCCWDevice *vcdev, + unsigned int irq, + Error **errp) { VFIODevice *vdev = &vcdev->vdev; struct vfio_irq_info *irq_info; size_t argsz; int fd; + EventNotifier *notifier; + IOHandler *fd_read; + + switch (irq) { + case VFIO_CCW_IO_IRQ_INDEX: + notifier = &vcdev->io_notifier; + fd_read = vfio_ccw_io_notifier_handler; + break; + case VFIO_CCW_CRW_IRQ_INDEX: + notifier = &vcdev->crw_notifier; + fd_read = vfio_ccw_crw_notifier_handler; + break; + default: + error_setg(errp, "vfio: Unsupported device irq(%d)", irq); + return; + } - if (vdev->num_irqs < VFIO_CCW_IO_IRQ_INDEX + 1) { - error_setg(errp, "vfio: unexpected number of io irqs %u", + if (vdev->num_irqs < irq + 1) { + error_setg(errp, "vfio: unexpected number of irqs %u", vdev->num_irqs); return; } argsz = sizeof(*irq_info); irq_info = g_malloc0(argsz); - irq_info->index = VFIO_CCW_IO_IRQ_INDEX; + irq_info->index = irq; irq_info->argsz = argsz; if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, irq_info) < 0 || irq_info->count < 1) { @@ -299,37 +407,52 @@ static void vfio_ccw_register_io_notifier(VFIOCCWDevice *vcdev, Error **errp) goto out_free_info; } - if (event_notifier_init(&vcdev->io_notifier, 0)) { + if (event_notifier_init(notifier, 0)) { error_setg_errno(errp, errno, - "vfio: Unable to init event notifier for IO"); + "vfio: Unable to init event notifier for irq (%d)", + irq); goto out_free_info; } - fd = event_notifier_get_fd(&vcdev->io_notifier); - qemu_set_fd_handler(fd, vfio_ccw_io_notifier_handler, NULL, vcdev); + fd = event_notifier_get_fd(notifier); + qemu_set_fd_handler(fd, fd_read, NULL, vcdev); - if (vfio_set_irq_signaling(vdev, VFIO_CCW_IO_IRQ_INDEX, 0, + if (vfio_set_irq_signaling(vdev, irq, 0, VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) { qemu_set_fd_handler(fd, NULL, NULL, vcdev); - event_notifier_cleanup(&vcdev->io_notifier); + event_notifier_cleanup(notifier); } out_free_info: g_free(irq_info); } -static void vfio_ccw_unregister_io_notifier(VFIOCCWDevice *vcdev) +static void vfio_ccw_unregister_irq_notifier(VFIOCCWDevice *vcdev, + unsigned int irq) { Error *err = NULL; + EventNotifier *notifier; + + switch (irq) { + case VFIO_CCW_IO_IRQ_INDEX: + notifier = &vcdev->io_notifier; + break; + case VFIO_CCW_CRW_IRQ_INDEX: + notifier = &vcdev->crw_notifier; + break; + default: + error_report("vfio: Unsupported device irq(%d)", irq); + return; + } - if (vfio_set_irq_signaling(&vcdev->vdev, VFIO_CCW_IO_IRQ_INDEX, 0, + if (vfio_set_irq_signaling(&vcdev->vdev, irq, 0, VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) { error_reportf_err(err, VFIO_MSG_PREFIX, vcdev->vdev.name); } - qemu_set_fd_handler(event_notifier_get_fd(&vcdev->io_notifier), + qemu_set_fd_handler(event_notifier_get_fd(notifier), NULL, NULL, vcdev); - event_notifier_cleanup(&vcdev->io_notifier); + event_notifier_cleanup(notifier); } static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) @@ -363,8 +486,7 @@ static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) vcdev->io_region_size = info->size; if (sizeof(*vcdev->io_region) != vcdev->io_region_size) { error_setg(errp, "vfio: Unexpected size of the I/O region"); - g_free(info); - return; + goto out_err; } vcdev->io_region_offset = info->offset; @@ -377,19 +499,53 @@ static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp) vcdev->async_cmd_region_size = info->size; if (sizeof(*vcdev->async_cmd_region) != vcdev->async_cmd_region_size) { error_setg(errp, "vfio: Unexpected size of the async cmd region"); - g_free(vcdev->io_region); - g_free(info); - return; + goto out_err; } vcdev->async_cmd_region_offset = info->offset; vcdev->async_cmd_region = g_malloc0(info->size); } + ret = vfio_get_dev_region_info(vdev, VFIO_REGION_TYPE_CCW, + VFIO_REGION_SUBTYPE_CCW_SCHIB, &info); + if (!ret) { + vcdev->schib_region_size = info->size; + if (sizeof(*vcdev->schib_region) != vcdev->schib_region_size) { + error_setg(errp, "vfio: Unexpected size of the schib region"); + goto out_err; + } + vcdev->schib_region_offset = info->offset; + vcdev->schib_region = g_malloc(info->size); + } + + ret = vfio_get_dev_region_info(vdev, VFIO_REGION_TYPE_CCW, + VFIO_REGION_SUBTYPE_CCW_CRW, &info); + + if (!ret) { + vcdev->crw_region_size = info->size; + if (sizeof(*vcdev->crw_region) != vcdev->crw_region_size) { + error_setg(errp, "vfio: Unexpected size of the CRW region"); + goto out_err; + } + vcdev->crw_region_offset = info->offset; + vcdev->crw_region = g_malloc(info->size); + } + g_free(info); + return; + +out_err: + g_free(vcdev->crw_region); + g_free(vcdev->schib_region); + g_free(vcdev->async_cmd_region); + g_free(vcdev->io_region); + g_free(info); + return; } static void vfio_ccw_put_region(VFIOCCWDevice *vcdev) { + g_free(vcdev->crw_region); + g_free(vcdev->schib_region); g_free(vcdev->async_cmd_region); g_free(vcdev->io_region); } @@ -499,11 +655,19 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp) goto out_region_err; } - vfio_ccw_register_io_notifier(vcdev, &err); + vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX, &err); if (err) { goto out_notifier_err; } + if (vcdev->crw_region) { + vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX, &err); + if (err) { + vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX); + goto out_notifier_err; + } + } + return; out_notifier_err: @@ -528,7 +692,8 @@ static void vfio_ccw_unrealize(DeviceState *dev) S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev); VFIOGroup *group = vcdev->vdev.group; - vfio_ccw_unregister_io_notifier(vcdev); + vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX); + vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX); vfio_ccw_put_region(vcdev); vfio_ccw_put_device(vcdev); vfio_put_group(group); @@ -565,6 +730,7 @@ static void vfio_ccw_class_init(ObjectClass *klass, void *data) cdc->handle_request = vfio_ccw_handle_request; cdc->handle_halt = vfio_ccw_handle_halt; cdc->handle_clear = vfio_ccw_handle_clear; + cdc->handle_store = vfio_ccw_handle_store; } static const TypeInfo vfio_ccw_info = { diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h index f46bcaf..08c869a 100644 --- a/include/hw/s390x/css.h +++ b/include/hw/s390x/css.h @@ -205,6 +205,7 @@ void copy_scsw_to_guest(SCSW *dest, const SCSW *src); void css_inject_io_interrupt(SubchDev *sch); void css_reset(void); void css_reset_sch(SubchDev *sch); +void css_crw_add_to_queue(CRW crw); void css_queue_crw(uint8_t rsc, uint8_t erc, int solicited, int chain, uint16_t rsid); void css_generate_sch_crws(uint8_t cssid, uint8_t ssid, uint16_t schid, @@ -218,6 +219,7 @@ IOInstEnding do_subchannel_work_passthrough(SubchDev *sub); int s390_ccw_halt(SubchDev *sch); int s390_ccw_clear(SubchDev *sch); +IOInstEnding s390_ccw_store(SubchDev *sch); typedef enum { CSS_IO_ADAPTER_VIRTIO = 0, @@ -242,7 +244,7 @@ SubchDev *css_find_subch(uint8_t m, uint8_t cssid, uint8_t ssid, uint16_t schid); bool css_subch_visible(SubchDev *sch); void css_conditional_io_interrupt(SubchDev *sch); -int css_do_stsch(SubchDev *sch, SCHIB *schib); +IOInstEnding css_do_stsch(SubchDev *sch, SCHIB *schib); bool css_schid_final(int m, uint8_t cssid, uint8_t ssid, uint16_t schid); IOInstEnding css_do_msch(SubchDev *sch, const SCHIB *schib); IOInstEnding css_do_xsch(SubchDev *sch); diff --git a/include/hw/s390x/s390-ccw.h b/include/hw/s390x/s390-ccw.h index 7f27bc2..d8e08b5 100644 --- a/include/hw/s390x/s390-ccw.h +++ b/include/hw/s390x/s390-ccw.h @@ -37,6 +37,7 @@ typedef struct S390CCWDeviceClass { IOInstEnding (*handle_request) (SubchDev *sch); int (*handle_halt) (SubchDev *sch); int (*handle_clear) (SubchDev *sch); + IOInstEnding (*handle_store) (SubchDev *sch); } S390CCWDeviceClass; #endif diff --git a/include/standard-headers/asm-x86/kvm_para.h b/include/standard-headers/asm-x86/kvm_para.h index 90604a8..07877d3 100644 --- a/include/standard-headers/asm-x86/kvm_para.h +++ b/include/standard-headers/asm-x86/kvm_para.h @@ -31,6 +31,7 @@ #define KVM_FEATURE_PV_SEND_IPI 11 #define KVM_FEATURE_POLL_CONTROL 12 #define KVM_FEATURE_PV_SCHED_YIELD 13 +#define KVM_FEATURE_ASYNC_PF_INT 14 #define KVM_HINTS_REALTIME 0 @@ -50,6 +51,8 @@ #define MSR_KVM_STEAL_TIME 0x4b564d03 #define MSR_KVM_PV_EOI_EN 0x4b564d04 #define MSR_KVM_POLL_CONTROL 0x4b564d05 +#define MSR_KVM_ASYNC_PF_INT 0x4b564d06 +#define MSR_KVM_ASYNC_PF_ACK 0x4b564d07 struct kvm_steal_time { uint64_t steal; @@ -81,6 +84,11 @@ struct kvm_clock_pairing { #define KVM_ASYNC_PF_ENABLED (1 << 0) #define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1) #define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT (1 << 2) +#define KVM_ASYNC_PF_DELIVERY_AS_INT (1 << 3) + +/* MSR_KVM_ASYNC_PF_INT */ +#define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0) + /* Operations for KVM_HC_MMU_OP */ #define KVM_MMU_OP_WRITE_PTE 1 @@ -112,8 +120,13 @@ struct kvm_mmu_op_release_pt { #define KVM_PV_REASON_PAGE_READY 2 struct kvm_vcpu_pv_apf_data { - uint32_t reason; - uint8_t pad[60]; + /* Used for 'page not present' events delivered via #PF */ + uint32_t flags; + + /* Used for 'page ready' events delivered via interrupt notification */ + uint32_t token; + + uint8_t pad[56]; uint32_t enabled; }; diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h index 66e8380..909a667 100644 --- a/include/standard-headers/drm/drm_fourcc.h +++ b/include/standard-headers/drm/drm_fourcc.h @@ -353,9 +353,12 @@ extern "C" { * a platform-dependent stride. On top of that the memory can apply * platform-depending swizzling of some higher address bits into bit6. * - * This format is highly platforms specific and not useful for cross-driver - * sharing. It exists since on a given platform it does uniquely identify the - * layout in a simple way for i915-specific userspace. + * Note that this layout is only accurate on intel gen 8+ or valleyview chipsets. + * On earlier platforms the is highly platforms specific and not useful for + * cross-driver sharing. It exists since on a given platform it does uniquely + * identify the layout in a simple way for i915-specific userspace, which + * facilitated conversion of userspace to modifiers. Additionally the exact + * format on some really old platforms is not known. */ #define I915_FORMAT_MOD_X_TILED fourcc_mod_code(INTEL, 1) @@ -368,9 +371,12 @@ extern "C" { * memory can apply platform-depending swizzling of some higher address bits * into bit6. * - * This format is highly platforms specific and not useful for cross-driver - * sharing. It exists since on a given platform it does uniquely identify the - * layout in a simple way for i915-specific userspace. + * Note that this layout is only accurate on intel gen 8+ or valleyview chipsets. + * On earlier platforms the is highly platforms specific and not useful for + * cross-driver sharing. It exists since on a given platform it does uniquely + * identify the layout in a simple way for i915-specific userspace, which + * facilitated conversion of userspace to modifiers. Additionally the exact + * format on some really old platforms is not known. */ #define I915_FORMAT_MOD_Y_TILED fourcc_mod_code(INTEL, 2) @@ -520,7 +526,113 @@ extern "C" { #define DRM_FORMAT_MOD_NVIDIA_TEGRA_TILED fourcc_mod_code(NVIDIA, 1) /* - * 16Bx2 Block Linear layout, used by desktop GPUs, and Tegra K1 and later + * Generalized Block Linear layout, used by desktop GPUs starting with NV50/G80, + * and Tegra GPUs starting with Tegra K1. + * + * Pixels are arranged in Groups of Bytes (GOBs). GOB size and layout varies + * based on the architecture generation. GOBs themselves are then arranged in + * 3D blocks, with the block dimensions (in terms of GOBs) always being a power + * of two, and hence expressible as their log2 equivalent (E.g., "2" represents + * a block depth or height of "4"). + * + * Chapter 20 "Pixel Memory Formats" of the Tegra X1 TRM describes this format + * in full detail. + * + * Macro + * Bits Param Description + * ---- ----- ----------------------------------------------------------------- + * + * 3:0 h log2(height) of each block, in GOBs. Placed here for + * compatibility with the existing + * DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK()-based modifiers. + * + * 4:4 - Must be 1, to indicate block-linear layout. Necessary for + * compatibility with the existing + * DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK()-based modifiers. + * + * 8:5 - Reserved (To support 3D-surfaces with variable log2(depth) block + * size). Must be zero. + * + * Note there is no log2(width) parameter. Some portions of the + * hardware support a block width of two gobs, but it is impractical + * to use due to lack of support elsewhere, and has no known + * benefits. + * + * 11:9 - Reserved (To support 2D-array textures with variable array stride + * in blocks, specified via log2(tile width in blocks)). Must be + * zero. + * + * 19:12 k Page Kind. This value directly maps to a field in the page + * tables of all GPUs >= NV50. It affects the exact layout of bits + * in memory and can be derived from the tuple + * + * (format, GPU model, compression type, samples per pixel) + * + * Where compression type is defined below. If GPU model were + * implied by the format modifier, format, or memory buffer, page + * kind would not need to be included in the modifier itself, but + * since the modifier should define the layout of the associated + * memory buffer independent from any device or other context, it + * must be included here. + * + * 21:20 g GOB Height and Page Kind Generation. The height of a GOB changed + * starting with Fermi GPUs. Additionally, the mapping between page + * kind and bit layout has changed at various points. + * + * 0 = Gob Height 8, Fermi - Volta, Tegra K1+ Page Kind mapping + * 1 = Gob Height 4, G80 - GT2XX Page Kind mapping + * 2 = Gob Height 8, Turing+ Page Kind mapping + * 3 = Reserved for future use. + * + * 22:22 s Sector layout. On Tegra GPUs prior to Xavier, there is a further + * bit remapping step that occurs at an even lower level than the + * page kind and block linear swizzles. This causes the layout of + * surfaces mapped in those SOC's GPUs to be incompatible with the + * equivalent mapping on other GPUs in the same system. + * + * 0 = Tegra K1 - Tegra Parker/TX2 Layout. + * 1 = Desktop GPU and Tegra Xavier+ Layout + * + * 25:23 c Lossless Framebuffer Compression type. + * + * 0 = none + * 1 = ROP/3D, layout 1, exact compression format implied by Page + * Kind field + * 2 = ROP/3D, layout 2, exact compression format implied by Page + * Kind field + * 3 = CDE horizontal + * 4 = CDE vertical + * 5 = Reserved for future use + * 6 = Reserved for future use + * 7 = Reserved for future use + * + * 55:25 - Reserved for future use. Must be zero. + */ +#define DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(c, s, g, k, h) \ + fourcc_mod_code(NVIDIA, (0x10 | \ + ((h) & 0xf) | \ + (((k) & 0xff) << 12) | \ + (((g) & 0x3) << 20) | \ + (((s) & 0x1) << 22) | \ + (((c) & 0x7) << 23))) + +/* To grandfather in prior block linear format modifiers to the above layout, + * the page kind "0", which corresponds to "pitch/linear" and hence is unusable + * with block-linear layouts, is remapped within drivers to the value 0xfe, + * which corresponds to the "generic" kind used for simple single-sample + * uncompressed color formats on Fermi - Volta GPUs. + */ +static inline uint64_t +drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier) +{ + if (!(modifier & 0x10) || (modifier & (0xff << 12))) + return modifier; + else + return modifier | (0xfe << 12); +} + +/* + * 16Bx2 Block Linear layout, used by Tegra K1 and later * * Pixels are arranged in 64x8 Groups Of Bytes (GOBs). GOBs are then stacked * vertically by a power of 2 (1 to 32 GOBs) to form a block. @@ -541,20 +653,20 @@ extern "C" { * in full detail. */ #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(v) \ - fourcc_mod_code(NVIDIA, 0x10 | ((v) & 0xf)) + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 0, 0, 0, (v)) #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_ONE_GOB \ - fourcc_mod_code(NVIDIA, 0x10) + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(0) #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_TWO_GOB \ - fourcc_mod_code(NVIDIA, 0x11) + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(1) #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_FOUR_GOB \ - fourcc_mod_code(NVIDIA, 0x12) + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(2) #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_EIGHT_GOB \ - fourcc_mod_code(NVIDIA, 0x13) + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(3) #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_SIXTEEN_GOB \ - fourcc_mod_code(NVIDIA, 0x14) + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(4) #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_THIRTYTWO_GOB \ - fourcc_mod_code(NVIDIA, 0x15) + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(5) /* * Some Broadcom modifiers take parameters, for example the number of diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h index 1200890..fd8d2cc 100644 --- a/include/standard-headers/linux/ethtool.h +++ b/include/standard-headers/linux/ethtool.h @@ -1666,6 +1666,18 @@ static inline int ethtool_validate_duplex(uint8_t duplex) return 0; } +#define MASTER_SLAVE_CFG_UNSUPPORTED 0 +#define MASTER_SLAVE_CFG_UNKNOWN 1 +#define MASTER_SLAVE_CFG_MASTER_PREFERRED 2 +#define MASTER_SLAVE_CFG_SLAVE_PREFERRED 3 +#define MASTER_SLAVE_CFG_MASTER_FORCE 4 +#define MASTER_SLAVE_CFG_SLAVE_FORCE 5 +#define MASTER_SLAVE_STATE_UNSUPPORTED 0 +#define MASTER_SLAVE_STATE_UNKNOWN 1 +#define MASTER_SLAVE_STATE_MASTER 2 +#define MASTER_SLAVE_STATE_SLAVE 3 +#define MASTER_SLAVE_STATE_ERR 4 + /* Which connector port. */ #define PORT_TP 0x00 #define PORT_AUI 0x01 @@ -1904,7 +1916,9 @@ struct ethtool_link_settings { uint8_t eth_tp_mdix_ctrl; int8_t link_mode_masks_nwords; uint8_t transceiver; - uint8_t reserved1[3]; + uint8_t master_slave_cfg; + uint8_t master_slave_state; + uint8_t reserved1[1]; uint32_t reserved[7]; uint32_t link_mode_masks[0]; /* layout of link_mode_masks fields: diff --git a/include/standard-headers/linux/virtio_ids.h b/include/standard-headers/linux/virtio_ids.h index ecc27a1..b052355 100644 --- a/include/standard-headers/linux/virtio_ids.h +++ b/include/standard-headers/linux/virtio_ids.h @@ -44,6 +44,7 @@ #define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ #define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ #define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */ +#define VIRTIO_ID_MEM 24 /* virtio mem */ #define VIRTIO_ID_FS 26 /* virtio filesystem */ #define VIRTIO_ID_PMEM 27 /* virtio pmem */ #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ diff --git a/include/standard-headers/linux/virtio_mem.h b/include/standard-headers/linux/virtio_mem.h new file mode 100644 index 0000000..05e5ade --- /dev/null +++ b/include/standard-headers/linux/virtio_mem.h @@ -0,0 +1,211 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Virtio Mem Device + * + * Copyright Red Hat, Inc. 2020 + * + * Authors: + * David Hildenbrand <david@redhat.com> + * + * This header is BSD licensed so anyone can use the definitions + * to implement compatible drivers/servers: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _LINUX_VIRTIO_MEM_H +#define _LINUX_VIRTIO_MEM_H + +#include "standard-headers/linux/types.h" +#include "standard-headers/linux/virtio_types.h" +#include "standard-headers/linux/virtio_ids.h" +#include "standard-headers/linux/virtio_config.h" + +/* + * Each virtio-mem device manages a dedicated region in physical address + * space. Each device can belong to a single NUMA node, multiple devices + * for a single NUMA node are possible. A virtio-mem device is like a + * "resizable DIMM" consisting of small memory blocks that can be plugged + * or unplugged. The device driver is responsible for (un)plugging memory + * blocks on demand. + * + * Virtio-mem devices can only operate on their assigned memory region in + * order to (un)plug memory. A device cannot (un)plug memory belonging to + * other devices. + * + * The "region_size" corresponds to the maximum amount of memory that can + * be provided by a device. The "size" corresponds to the amount of memory + * that is currently plugged. "requested_size" corresponds to a request + * from the device to the device driver to (un)plug blocks. The + * device driver should try to (un)plug blocks in order to reach the + * "requested_size". It is impossible to plug more memory than requested. + * + * The "usable_region_size" represents the memory region that can actually + * be used to (un)plug memory. It is always at least as big as the + * "requested_size" and will grow dynamically. It will only shrink when + * explicitly triggered (VIRTIO_MEM_REQ_UNPLUG). + * + * There are no guarantees what will happen if unplugged memory is + * read/written. Such memory should, in general, not be touched. E.g., + * even writing might succeed, but the values will simply be discarded at + * random points in time. + * + * It can happen that the device cannot process a request, because it is + * busy. The device driver has to retry later. + * + * Usually, during system resets all memory will get unplugged, so the + * device driver can start with a clean state. However, in specific + * scenarios (if the device is busy) it can happen that the device still + * has memory plugged. The device driver can request to unplug all memory + * (VIRTIO_MEM_REQ_UNPLUG) - which might take a while to succeed if the + * device is busy. + */ + +/* --- virtio-mem: feature bits --- */ + +/* node_id is an ACPI PXM and is valid */ +#define VIRTIO_MEM_F_ACPI_PXM 0 + + +/* --- virtio-mem: guest -> host requests --- */ + +/* request to plug memory blocks */ +#define VIRTIO_MEM_REQ_PLUG 0 +/* request to unplug memory blocks */ +#define VIRTIO_MEM_REQ_UNPLUG 1 +/* request to unplug all blocks and shrink the usable size */ +#define VIRTIO_MEM_REQ_UNPLUG_ALL 2 +/* request information about the plugged state of memory blocks */ +#define VIRTIO_MEM_REQ_STATE 3 + +struct virtio_mem_req_plug { + __virtio64 addr; + __virtio16 nb_blocks; + __virtio16 padding[3]; +}; + +struct virtio_mem_req_unplug { + __virtio64 addr; + __virtio16 nb_blocks; + __virtio16 padding[3]; +}; + +struct virtio_mem_req_state { + __virtio64 addr; + __virtio16 nb_blocks; + __virtio16 padding[3]; +}; + +struct virtio_mem_req { + __virtio16 type; + __virtio16 padding[3]; + + union { + struct virtio_mem_req_plug plug; + struct virtio_mem_req_unplug unplug; + struct virtio_mem_req_state state; + } u; +}; + + +/* --- virtio-mem: host -> guest response --- */ + +/* + * Request processed successfully, applicable for + * - VIRTIO_MEM_REQ_PLUG + * - VIRTIO_MEM_REQ_UNPLUG + * - VIRTIO_MEM_REQ_UNPLUG_ALL + * - VIRTIO_MEM_REQ_STATE + */ +#define VIRTIO_MEM_RESP_ACK 0 +/* + * Request denied - e.g. trying to plug more than requested, applicable for + * - VIRTIO_MEM_REQ_PLUG + */ +#define VIRTIO_MEM_RESP_NACK 1 +/* + * Request cannot be processed right now, try again later, applicable for + * - VIRTIO_MEM_REQ_PLUG + * - VIRTIO_MEM_REQ_UNPLUG + * - VIRTIO_MEM_REQ_UNPLUG_ALL + */ +#define VIRTIO_MEM_RESP_BUSY 2 +/* + * Error in request (e.g. addresses/alignment), applicable for + * - VIRTIO_MEM_REQ_PLUG + * - VIRTIO_MEM_REQ_UNPLUG + * - VIRTIO_MEM_REQ_STATE + */ +#define VIRTIO_MEM_RESP_ERROR 3 + + +/* State of memory blocks is "plugged" */ +#define VIRTIO_MEM_STATE_PLUGGED 0 +/* State of memory blocks is "unplugged" */ +#define VIRTIO_MEM_STATE_UNPLUGGED 1 +/* State of memory blocks is "mixed" */ +#define VIRTIO_MEM_STATE_MIXED 2 + +struct virtio_mem_resp_state { + __virtio16 state; +}; + +struct virtio_mem_resp { + __virtio16 type; + __virtio16 padding[3]; + + union { + struct virtio_mem_resp_state state; + } u; +}; + +/* --- virtio-mem: configuration --- */ + +struct virtio_mem_config { + /* Block size and alignment. Cannot change. */ + uint64_t block_size; + /* Valid with VIRTIO_MEM_F_ACPI_PXM. Cannot change. */ + uint16_t node_id; + uint8_t padding[6]; + /* Start address of the memory region. Cannot change. */ + uint64_t addr; + /* Region size (maximum). Cannot change. */ + uint64_t region_size; + /* + * Currently usable region size. Can grow up to region_size. Can + * shrink due to VIRTIO_MEM_REQ_UNPLUG_ALL (in which case no config + * update will be sent). + */ + uint64_t usable_region_size; + /* + * Currently used size. Changes due to plug/unplug requests, but no + * config updates will be sent. + */ + uint64_t plugged_size; + /* Requested size. New plug requests cannot exceed it. Can change. */ + uint64_t requested_size; +}; + +#endif /* _LINUX_VIRTIO_MEM_H */ diff --git a/include/standard-headers/linux/virtio_ring.h b/include/standard-headers/linux/virtio_ring.h index f230fed..0fa0e10 100644 --- a/include/standard-headers/linux/virtio_ring.h +++ b/include/standard-headers/linux/virtio_ring.h @@ -84,6 +84,13 @@ * at the end of the used ring. Guest should ignore the used->flags field. */ #define VIRTIO_RING_F_EVENT_IDX 29 +/* Alignment requirements for vring elements. + * When using pre-virtio 1.0 layout, these fall out naturally. + */ +#define VRING_AVAIL_ALIGN_SIZE 2 +#define VRING_USED_ALIGN_SIZE 4 +#define VRING_DESC_ALIGN_SIZE 16 + /* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ struct vring_desc { /* Address (guest-physical). */ @@ -110,28 +117,47 @@ struct vring_used_elem { __virtio32 len; }; +typedef struct vring_used_elem __attribute__((aligned(VRING_USED_ALIGN_SIZE))) + vring_used_elem_t; + struct vring_used { __virtio16 flags; __virtio16 idx; - struct vring_used_elem ring[]; + vring_used_elem_t ring[]; }; +/* + * The ring element addresses are passed between components with different + * alignments assumptions. Thus, we might need to decrease the compiler-selected + * alignment, and so must use a typedef to make sure the aligned attribute + * actually takes hold: + * + * https://gcc.gnu.org/onlinedocs//gcc/Common-Type-Attributes.html#Common-Type-Attributes + * + * When used on a struct, or struct member, the aligned attribute can only + * increase the alignment; in order to decrease it, the packed attribute must + * be specified as well. When used as part of a typedef, the aligned attribute + * can both increase and decrease alignment, and specifying the packed + * attribute generates a warning. + */ +typedef struct vring_desc __attribute__((aligned(VRING_DESC_ALIGN_SIZE))) + vring_desc_t; +typedef struct vring_avail __attribute__((aligned(VRING_AVAIL_ALIGN_SIZE))) + vring_avail_t; +typedef struct vring_used __attribute__((aligned(VRING_USED_ALIGN_SIZE))) + vring_used_t; + struct vring { unsigned int num; - struct vring_desc *desc; + vring_desc_t *desc; - struct vring_avail *avail; + vring_avail_t *avail; - struct vring_used *used; + vring_used_t *used; }; -/* Alignment requirements for vring elements. - * When using pre-virtio 1.0 layout, these fall out naturally. - */ -#define VRING_AVAIL_ALIGN_SIZE 2 -#define VRING_USED_ALIGN_SIZE 4 -#define VRING_DESC_ALIGN_SIZE 16 +#ifndef VIRTIO_RING_NO_LEGACY /* The standard layout for the ring is a continuous chunk of memory which looks * like this. We assume num is a power of 2. @@ -179,6 +205,8 @@ static inline unsigned vring_size(unsigned int num, unsigned long align) + sizeof(__virtio16) * 3 + sizeof(struct vring_used_elem) * num; } +#endif /* VIRTIO_RING_NO_LEGACY */ + /* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */ /* Assuming a given event_idx value from the other side, if * we have just incremented index from old to new_idx, diff --git a/linux-headers/asm-arm64/mman.h b/linux-headers/asm-arm64/mman.h index 8eebf89..e94b9af 100644 --- a/linux-headers/asm-arm64/mman.h +++ b/linux-headers/asm-arm64/mman.h @@ -1 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __ASM_MMAN_H +#define __ASM_MMAN_H + #include <asm-generic/mman.h> + +#define PROT_BTI 0x10 /* BTI guarded page */ + +#endif /* ! _UAPI__ASM_MMAN_H */ diff --git a/linux-headers/asm-generic/unistd.h b/linux-headers/asm-generic/unistd.h index 3a3201e..f4a0130 100644 --- a/linux-headers/asm-generic/unistd.h +++ b/linux-headers/asm-generic/unistd.h @@ -855,9 +855,11 @@ __SYSCALL(__NR_clone3, sys_clone3) __SYSCALL(__NR_openat2, sys_openat2) #define __NR_pidfd_getfd 438 __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) +#define __NR_faccessat2 439 +__SYSCALL(__NR_faccessat2, sys_faccessat2) #undef __NR_syscalls -#define __NR_syscalls 439 +#define __NR_syscalls 440 /* * 32 bit systems traditionally used different diff --git a/linux-headers/asm-mips/unistd_n32.h b/linux-headers/asm-mips/unistd_n32.h index aec9f60..3b9eda7 100644 --- a/linux-headers/asm-mips/unistd_n32.h +++ b/linux-headers/asm-mips/unistd_n32.h @@ -367,6 +367,7 @@ #define __NR_clone3 (__NR_Linux + 435) #define __NR_openat2 (__NR_Linux + 437) #define __NR_pidfd_getfd (__NR_Linux + 438) +#define __NR_faccessat2 (__NR_Linux + 439) #endif /* _ASM_MIPS_UNISTD_N32_H */ diff --git a/linux-headers/asm-mips/unistd_n64.h b/linux-headers/asm-mips/unistd_n64.h index 1c75d83..9cdf9b6 100644 --- a/linux-headers/asm-mips/unistd_n64.h +++ b/linux-headers/asm-mips/unistd_n64.h @@ -343,6 +343,7 @@ #define __NR_clone3 (__NR_Linux + 435) #define __NR_openat2 (__NR_Linux + 437) #define __NR_pidfd_getfd (__NR_Linux + 438) +#define __NR_faccessat2 (__NR_Linux + 439) #endif /* _ASM_MIPS_UNISTD_N64_H */ diff --git a/linux-headers/asm-mips/unistd_o32.h b/linux-headers/asm-mips/unistd_o32.h index 660716e..e3e5e23 100644 --- a/linux-headers/asm-mips/unistd_o32.h +++ b/linux-headers/asm-mips/unistd_o32.h @@ -413,6 +413,7 @@ #define __NR_clone3 (__NR_Linux + 435) #define __NR_openat2 (__NR_Linux + 437) #define __NR_pidfd_getfd (__NR_Linux + 438) +#define __NR_faccessat2 (__NR_Linux + 439) #endif /* _ASM_MIPS_UNISTD_O32_H */ diff --git a/linux-headers/asm-powerpc/unistd_32.h b/linux-headers/asm-powerpc/unistd_32.h index 4ba8e32..862edb7 100644 --- a/linux-headers/asm-powerpc/unistd_32.h +++ b/linux-headers/asm-powerpc/unistd_32.h @@ -420,6 +420,7 @@ #define __NR_clone3 435 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 +#define __NR_faccessat2 439 #endif /* _ASM_POWERPC_UNISTD_32_H */ diff --git a/linux-headers/asm-powerpc/unistd_64.h b/linux-headers/asm-powerpc/unistd_64.h index ac20bb4..f553224 100644 --- a/linux-headers/asm-powerpc/unistd_64.h +++ b/linux-headers/asm-powerpc/unistd_64.h @@ -392,6 +392,7 @@ #define __NR_clone3 435 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 +#define __NR_faccessat2 439 #endif /* _ASM_POWERPC_UNISTD_64_H */ diff --git a/linux-headers/asm-s390/unistd_32.h b/linux-headers/asm-s390/unistd_32.h index e4a6b65..e08233c 100644 --- a/linux-headers/asm-s390/unistd_32.h +++ b/linux-headers/asm-s390/unistd_32.h @@ -410,5 +410,6 @@ #define __NR_clone3 435 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 +#define __NR_faccessat2 439 #endif /* _ASM_S390_UNISTD_32_H */ diff --git a/linux-headers/asm-s390/unistd_64.h b/linux-headers/asm-s390/unistd_64.h index 472f732..560e19a 100644 --- a/linux-headers/asm-s390/unistd_64.h +++ b/linux-headers/asm-s390/unistd_64.h @@ -358,5 +358,6 @@ #define __NR_clone3 435 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 +#define __NR_faccessat2 439 #endif /* _ASM_S390_UNISTD_64_H */ diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index 3f3f780..17c5a03 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -385,32 +385,48 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4) #define KVM_STATE_NESTED_FORMAT_VMX 0 -#define KVM_STATE_NESTED_FORMAT_SVM 1 /* unused */ +#define KVM_STATE_NESTED_FORMAT_SVM 1 #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 #define KVM_STATE_NESTED_EVMCS 0x00000004 #define KVM_STATE_NESTED_MTF_PENDING 0x00000008 +#define KVM_STATE_NESTED_GIF_SET 0x00000100 #define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_SMM_VMXON 0x00000002 #define KVM_STATE_NESTED_VMX_VMCS_SIZE 0x1000 +#define KVM_STATE_NESTED_SVM_VMCB_SIZE 0x1000 + +#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 + struct kvm_vmx_nested_state_data { __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; }; struct kvm_vmx_nested_state_hdr { + __u32 flags; __u64 vmxon_pa; __u64 vmcs12_pa; + __u64 preemption_timer_deadline; struct { __u16 flags; } smm; }; +struct kvm_svm_nested_state_data { + /* Save area only used if KVM_STATE_NESTED_RUN_PENDING. */ + __u8 vmcb12[KVM_STATE_NESTED_SVM_VMCB_SIZE]; +}; + +struct kvm_svm_nested_state_hdr { + __u64 vmcb_pa; +}; + /* for KVM_CAP_NESTED_STATE */ struct kvm_nested_state { __u16 flags; @@ -419,6 +435,7 @@ struct kvm_nested_state { union { struct kvm_vmx_nested_state_hdr vmx; + struct kvm_svm_nested_state_hdr svm; /* Pad the header to 128 bytes. */ __u8 pad[120]; @@ -431,6 +448,7 @@ struct kvm_nested_state { */ union { struct kvm_vmx_nested_state_data vmx[0]; + struct kvm_svm_nested_state_data svm[0]; } data; }; diff --git a/linux-headers/asm-x86/unistd.h b/linux-headers/asm-x86/unistd.h index 498d151..d2af42d 100644 --- a/linux-headers/asm-x86/unistd.h +++ b/linux-headers/asm-x86/unistd.h @@ -2,8 +2,15 @@ #ifndef _ASM_X86_UNISTD_H #define _ASM_X86_UNISTD_H -/* x32 syscall flag bit */ -#define __X32_SYSCALL_BIT 0x40000000UL +/* + * x32 syscall flag bit. Some user programs expect syscall NR macros + * and __X32_SYSCALL_BIT to have type int, even though syscall numbers + * are, for practical purposes, unsigned long. + * + * Fortunately, expressions like (nr & ~__X32_SYSCALL_BIT) do the right + * thing regardless. + */ +#define __X32_SYSCALL_BIT 0x40000000 # ifdef __i386__ # include <asm/unistd_32.h> diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h index 1e6c1a5..c727981 100644 --- a/linux-headers/asm-x86/unistd_32.h +++ b/linux-headers/asm-x86/unistd_32.h @@ -428,6 +428,7 @@ #define __NR_clone3 435 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 +#define __NR_faccessat2 439 #endif /* _ASM_X86_UNISTD_32_H */ diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h index 6daf0ae..843fa62 100644 --- a/linux-headers/asm-x86/unistd_64.h +++ b/linux-headers/asm-x86/unistd_64.h @@ -350,6 +350,7 @@ #define __NR_clone3 435 #define __NR_openat2 437 #define __NR_pidfd_getfd 438 +#define __NR_faccessat2 439 #endif /* _ASM_X86_UNISTD_64_H */ diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h index e3f17ef..7d63d70 100644 --- a/linux-headers/asm-x86/unistd_x32.h +++ b/linux-headers/asm-x86/unistd_x32.h @@ -303,6 +303,7 @@ #define __NR_clone3 (__X32_SYSCALL_BIT + 435) #define __NR_openat2 (__X32_SYSCALL_BIT + 437) #define __NR_pidfd_getfd (__X32_SYSCALL_BIT + 438) +#define __NR_faccessat2 (__X32_SYSCALL_BIT + 439) #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512) #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513) #define __NR_ioctl (__X32_SYSCALL_BIT + 514) diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 9804495..a28c366 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -116,7 +116,7 @@ struct kvm_irq_level { * ACPI gsi notion of irq. * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47.. * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23.. - * For ARM: See Documentation/virt/kvm/api.txt + * For ARM: See Documentation/virt/kvm/api.rst */ union { __u32 irq; @@ -188,10 +188,13 @@ struct kvm_s390_cmma_log { struct kvm_hyperv_exit { #define KVM_EXIT_HYPERV_SYNIC 1 #define KVM_EXIT_HYPERV_HCALL 2 +#define KVM_EXIT_HYPERV_SYNDBG 3 __u32 type; + __u32 pad1; union { struct { __u32 msr; + __u32 pad2; __u64 control; __u64 evt_page; __u64 msg_page; @@ -201,6 +204,15 @@ struct kvm_hyperv_exit { __u64 result; __u64 params[2]; } hcall; + struct { + __u32 msr; + __u32 pad2; + __u64 control; + __u64 status; + __u64 send_page; + __u64 recv_page; + __u64 pending_page; + } syndbg; } u; }; @@ -1017,6 +1029,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_VCPU_RESETS 179 #define KVM_CAP_S390_PROTECTED 180 #define KVM_CAP_PPC_SECURE_GUEST 181 +#define KVM_CAP_HALT_POLL 182 +#define KVM_CAP_ASYNC_PF_INT 183 #ifdef KVM_CAP_IRQ_ROUTING @@ -1107,7 +1121,7 @@ struct kvm_xen_hvm_config { * * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies * the irqfd to operate in resampling mode for level triggered interrupt - * emulation. See Documentation/virt/kvm/api.txt. + * emulation. See Documentation/virt/kvm/api.rst. */ #define KVM_IRQFD_FLAG_RESAMPLE (1 << 1) diff --git a/linux-headers/linux/psp-sev.h b/linux-headers/linux/psp-sev.h index 31f971e..51d8b39 100644 --- a/linux-headers/linux/psp-sev.h +++ b/linux-headers/linux/psp-sev.h @@ -83,6 +83,8 @@ struct sev_user_data_status { __u32 guest_count; /* Out */ } __attribute__((packed)); +#define SEV_STATUS_FLAGS_CONFIG_ES 0x0100 + /** * struct sev_user_data_pek_csr - PEK_CSR command parameters * diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h index a41c452..f09df26 100644 --- a/linux-headers/linux/vfio.h +++ b/linux-headers/linux/vfio.h @@ -305,6 +305,7 @@ struct vfio_region_info_cap_type { #define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff) #define VFIO_REGION_TYPE_GFX (1) #define VFIO_REGION_TYPE_CCW (2) +#define VFIO_REGION_TYPE_MIGRATION (3) /* sub-types for VFIO_REGION_TYPE_PCI_* */ @@ -378,6 +379,235 @@ struct vfio_region_gfx_edid { /* sub-types for VFIO_REGION_TYPE_CCW */ #define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD (1) +#define VFIO_REGION_SUBTYPE_CCW_SCHIB (2) +#define VFIO_REGION_SUBTYPE_CCW_CRW (3) + +/* sub-types for VFIO_REGION_TYPE_MIGRATION */ +#define VFIO_REGION_SUBTYPE_MIGRATION (1) + +/* + * The structure vfio_device_migration_info is placed at the 0th offset of + * the VFIO_REGION_SUBTYPE_MIGRATION region to get and set VFIO device related + * migration information. Field accesses from this structure are only supported + * at their native width and alignment. Otherwise, the result is undefined and + * vendor drivers should return an error. + * + * device_state: (read/write) + * - The user application writes to this field to inform the vendor driver + * about the device state to be transitioned to. + * - The vendor driver should take the necessary actions to change the + * device state. After successful transition to a given state, the + * vendor driver should return success on write(device_state, state) + * system call. If the device state transition fails, the vendor driver + * should return an appropriate -errno for the fault condition. + * - On the user application side, if the device state transition fails, + * that is, if write(device_state, state) returns an error, read + * device_state again to determine the current state of the device from + * the vendor driver. + * - The vendor driver should return previous state of the device unless + * the vendor driver has encountered an internal error, in which case + * the vendor driver may report the device_state VFIO_DEVICE_STATE_ERROR. + * - The user application must use the device reset ioctl to recover the + * device from VFIO_DEVICE_STATE_ERROR state. If the device is + * indicated to be in a valid device state by reading device_state, the + * user application may attempt to transition the device to any valid + * state reachable from the current state or terminate itself. + * + * device_state consists of 3 bits: + * - If bit 0 is set, it indicates the _RUNNING state. If bit 0 is clear, + * it indicates the _STOP state. When the device state is changed to + * _STOP, driver should stop the device before write() returns. + * - If bit 1 is set, it indicates the _SAVING state, which means that the + * driver should start gathering device state information that will be + * provided to the VFIO user application to save the device's state. + * - If bit 2 is set, it indicates the _RESUMING state, which means that + * the driver should prepare to resume the device. Data provided through + * the migration region should be used to resume the device. + * Bits 3 - 31 are reserved for future use. To preserve them, the user + * application should perform a read-modify-write operation on this + * field when modifying the specified bits. + * + * +------- _RESUMING + * |+------ _SAVING + * ||+----- _RUNNING + * ||| + * 000b => Device Stopped, not saving or resuming + * 001b => Device running, which is the default state + * 010b => Stop the device & save the device state, stop-and-copy state + * 011b => Device running and save the device state, pre-copy state + * 100b => Device stopped and the device state is resuming + * 101b => Invalid state + * 110b => Error state + * 111b => Invalid state + * + * State transitions: + * + * _RESUMING _RUNNING Pre-copy Stop-and-copy _STOP + * (100b) (001b) (011b) (010b) (000b) + * 0. Running or default state + * | + * + * 1. Normal Shutdown (optional) + * |------------------------------------->| + * + * 2. Save the state or suspend + * |------------------------->|---------->| + * + * 3. Save the state during live migration + * |----------->|------------>|---------->| + * + * 4. Resuming + * |<---------| + * + * 5. Resumed + * |--------->| + * + * 0. Default state of VFIO device is _RUNNNG when the user application starts. + * 1. During normal shutdown of the user application, the user application may + * optionally change the VFIO device state from _RUNNING to _STOP. This + * transition is optional. The vendor driver must support this transition but + * must not require it. + * 2. When the user application saves state or suspends the application, the + * device state transitions from _RUNNING to stop-and-copy and then to _STOP. + * On state transition from _RUNNING to stop-and-copy, driver must stop the + * device, save the device state and send it to the application through the + * migration region. The sequence to be followed for such transition is given + * below. + * 3. In live migration of user application, the state transitions from _RUNNING + * to pre-copy, to stop-and-copy, and to _STOP. + * On state transition from _RUNNING to pre-copy, the driver should start + * gathering the device state while the application is still running and send + * the device state data to application through the migration region. + * On state transition from pre-copy to stop-and-copy, the driver must stop + * the device, save the device state and send it to the user application + * through the migration region. + * Vendor drivers must support the pre-copy state even for implementations + * where no data is provided to the user before the stop-and-copy state. The + * user must not be required to consume all migration data before the device + * transitions to a new state, including the stop-and-copy state. + * The sequence to be followed for above two transitions is given below. + * 4. To start the resuming phase, the device state should be transitioned from + * the _RUNNING to the _RESUMING state. + * In the _RESUMING state, the driver should use the device state data + * received through the migration region to resume the device. + * 5. After providing saved device data to the driver, the application should + * change the state from _RESUMING to _RUNNING. + * + * reserved: + * Reads on this field return zero and writes are ignored. + * + * pending_bytes: (read only) + * The number of pending bytes still to be migrated from the vendor driver. + * + * data_offset: (read only) + * The user application should read data_offset field from the migration + * region. The user application should read the device data from this + * offset within the migration region during the _SAVING state or write + * the device data during the _RESUMING state. See below for details of + * sequence to be followed. + * + * data_size: (read/write) + * The user application should read data_size to get the size in bytes of + * the data copied in the migration region during the _SAVING state and + * write the size in bytes of the data copied in the migration region + * during the _RESUMING state. + * + * The format of the migration region is as follows: + * ------------------------------------------------------------------ + * |vfio_device_migration_info| data section | + * | | /////////////////////////////// | + * ------------------------------------------------------------------ + * ^ ^ + * offset 0-trapped part data_offset + * + * The structure vfio_device_migration_info is always followed by the data + * section in the region, so data_offset will always be nonzero. The offset + * from where the data is copied is decided by the kernel driver. The data + * section can be trapped, mmapped, or partitioned, depending on how the kernel + * driver defines the data section. The data section partition can be defined + * as mapped by the sparse mmap capability. If mmapped, data_offset must be + * page aligned, whereas initial section which contains the + * vfio_device_migration_info structure, might not end at the offset, which is + * page aligned. The user is not required to access through mmap regardless + * of the capabilities of the region mmap. + * The vendor driver should determine whether and how to partition the data + * section. The vendor driver should return data_offset accordingly. + * + * The sequence to be followed while in pre-copy state and stop-and-copy state + * is as follows: + * a. Read pending_bytes, indicating the start of a new iteration to get device + * data. Repeated read on pending_bytes at this stage should have no side + * effects. + * If pending_bytes == 0, the user application should not iterate to get data + * for that device. + * If pending_bytes > 0, perform the following steps. + * b. Read data_offset, indicating that the vendor driver should make data + * available through the data section. The vendor driver should return this + * read operation only after data is available from (region + data_offset) + * to (region + data_offset + data_size). + * c. Read data_size, which is the amount of data in bytes available through + * the migration region. + * Read on data_offset and data_size should return the offset and size of + * the current buffer if the user application reads data_offset and + * data_size more than once here. + * d. Read data_size bytes of data from (region + data_offset) from the + * migration region. + * e. Process the data. + * f. Read pending_bytes, which indicates that the data from the previous + * iteration has been read. If pending_bytes > 0, go to step b. + * + * The user application can transition from the _SAVING|_RUNNING + * (pre-copy state) to the _SAVING (stop-and-copy) state regardless of the + * number of pending bytes. The user application should iterate in _SAVING + * (stop-and-copy) until pending_bytes is 0. + * + * The sequence to be followed while _RESUMING device state is as follows: + * While data for this device is available, repeat the following steps: + * a. Read data_offset from where the user application should write data. + * b. Write migration data starting at the migration region + data_offset for + * the length determined by data_size from the migration source. + * c. Write data_size, which indicates to the vendor driver that data is + * written in the migration region. Vendor driver must return this write + * operations on consuming data. Vendor driver should apply the + * user-provided migration region data to the device resume state. + * + * If an error occurs during the above sequences, the vendor driver can return + * an error code for next read() or write() operation, which will terminate the + * loop. The user application should then take the next necessary action, for + * example, failing migration or terminating the user application. + * + * For the user application, data is opaque. The user application should write + * data in the same order as the data is received and the data should be of + * same transaction size at the source. + */ + +struct vfio_device_migration_info { + __u32 device_state; /* VFIO device state */ +#define VFIO_DEVICE_STATE_STOP (0) +#define VFIO_DEVICE_STATE_RUNNING (1 << 0) +#define VFIO_DEVICE_STATE_SAVING (1 << 1) +#define VFIO_DEVICE_STATE_RESUMING (1 << 2) +#define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_RUNNING | \ + VFIO_DEVICE_STATE_SAVING | \ + VFIO_DEVICE_STATE_RESUMING) + +#define VFIO_DEVICE_STATE_VALID(state) \ + (state & VFIO_DEVICE_STATE_RESUMING ? \ + (state & VFIO_DEVICE_STATE_MASK) == VFIO_DEVICE_STATE_RESUMING : 1) + +#define VFIO_DEVICE_STATE_IS_ERROR(state) \ + ((state & VFIO_DEVICE_STATE_MASK) == (VFIO_DEVICE_STATE_SAVING | \ + VFIO_DEVICE_STATE_RESUMING)) + +#define VFIO_DEVICE_STATE_SET_ERROR(state) \ + ((state & ~VFIO_DEVICE_STATE_MASK) | VFIO_DEVICE_SATE_SAVING | \ + VFIO_DEVICE_STATE_RESUMING) + + __u32 reserved; + __u64 pending_bytes; + __u64 data_offset; + __u64 data_size; +}; /* * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped @@ -577,6 +807,7 @@ enum { enum { VFIO_CCW_IO_IRQ_INDEX, + VFIO_CCW_CRW_IRQ_INDEX, VFIO_CCW_NUM_IRQS }; @@ -785,6 +1016,29 @@ struct vfio_iommu_type1_info_cap_iova_range { struct vfio_iova_range iova_ranges[]; }; +/* + * The migration capability allows to report supported features for migration. + * + * The structures below define version 1 of this capability. + * + * The existence of this capability indicates that IOMMU kernel driver supports + * dirty page logging. + * + * pgsize_bitmap: Kernel driver returns bitmap of supported page sizes for dirty + * page logging. + * max_dirty_bitmap_size: Kernel driver returns maximum supported dirty bitmap + * size in bytes that can be used by user applications when getting the dirty + * bitmap. + */ +#define VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION 1 + +struct vfio_iommu_type1_info_cap_migration { + struct vfio_info_cap_header header; + __u32 flags; + __u64 pgsize_bitmap; + __u64 max_dirty_bitmap_size; /* in bytes */ +}; + #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) /** @@ -805,6 +1059,12 @@ struct vfio_iommu_type1_dma_map { #define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13) +struct vfio_bitmap { + __u64 pgsize; /* page size for bitmap in bytes */ + __u64 size; /* in bytes */ + __u64 *data; /* one bit per page */ +}; + /** * VFIO_IOMMU_UNMAP_DMA - _IOWR(VFIO_TYPE, VFIO_BASE + 14, * struct vfio_dma_unmap) @@ -814,12 +1074,23 @@ struct vfio_iommu_type1_dma_map { * field. No guarantee is made to the user that arbitrary unmaps of iova * or size different from those used in the original mapping call will * succeed. + * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap + * before unmapping IO virtual addresses. When this flag is set, the user must + * provide a struct vfio_bitmap in data[]. User must provide zero-allocated + * memory via vfio_bitmap.data and its size in the vfio_bitmap.size field. + * A bit in the bitmap represents one page, of user provided page size in + * vfio_bitmap.pgsize field, consecutively starting from iova offset. Bit set + * indicates that the page at that offset from iova is dirty. A Bitmap of the + * pages in the range of unmapped size is returned in the user-provided + * vfio_bitmap.data. */ struct vfio_iommu_type1_dma_unmap { __u32 argsz; __u32 flags; +#define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0) __u64 iova; /* IO virtual address */ __u64 size; /* Size of mapping (bytes) */ + __u8 data[]; }; #define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14) @@ -831,6 +1102,57 @@ struct vfio_iommu_type1_dma_unmap { #define VFIO_IOMMU_ENABLE _IO(VFIO_TYPE, VFIO_BASE + 15) #define VFIO_IOMMU_DISABLE _IO(VFIO_TYPE, VFIO_BASE + 16) +/** + * VFIO_IOMMU_DIRTY_PAGES - _IOWR(VFIO_TYPE, VFIO_BASE + 17, + * struct vfio_iommu_type1_dirty_bitmap) + * IOCTL is used for dirty pages logging. + * Caller should set flag depending on which operation to perform, details as + * below: + * + * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_START flag set, instructs + * the IOMMU driver to log pages that are dirtied or potentially dirtied by + * the device; designed to be used when a migration is in progress. Dirty pages + * are logged until logging is disabled by user application by calling the IOCTL + * with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag. + * + * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag set, instructs + * the IOMMU driver to stop logging dirtied pages. + * + * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP flag set + * returns the dirty pages bitmap for IOMMU container for a given IOVA range. + * The user must specify the IOVA range and the pgsize through the structure + * vfio_iommu_type1_dirty_bitmap_get in the data[] portion. This interface + * supports getting a bitmap of the smallest supported pgsize only and can be + * modified in future to get a bitmap of any specified supported pgsize. The + * user must provide a zeroed memory area for the bitmap memory and specify its + * size in bitmap.size. One bit is used to represent one page consecutively + * starting from iova offset. The user should provide page size in bitmap.pgsize + * field. A bit set in the bitmap indicates that the page at that offset from + * iova is dirty. The caller must set argsz to a value including the size of + * structure vfio_iommu_type1_dirty_bitmap_get, but excluding the size of the + * actual bitmap. If dirty pages logging is not enabled, an error will be + * returned. + * + * Only one of the flags _START, _STOP and _GET may be specified at a time. + * + */ +struct vfio_iommu_type1_dirty_bitmap { + __u32 argsz; + __u32 flags; +#define VFIO_IOMMU_DIRTY_PAGES_FLAG_START (1 << 0) +#define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP (1 << 1) +#define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP (1 << 2) + __u8 data[]; +}; + +struct vfio_iommu_type1_dirty_bitmap_get { + __u64 iova; /* IO virtual address */ + __u64 size; /* Size of iova range */ + struct vfio_bitmap bitmap; +}; + +#define VFIO_IOMMU_DIRTY_PAGES _IO(VFIO_TYPE, VFIO_BASE + 17) + /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ /* diff --git a/linux-headers/linux/vfio_ccw.h b/linux-headers/linux/vfio_ccw.h index fcc3e69..516496f 100644 --- a/linux-headers/linux/vfio_ccw.h +++ b/linux-headers/linux/vfio_ccw.h @@ -34,4 +34,23 @@ struct ccw_cmd_region { __u32 ret_code; } __attribute__((packed)); +/* + * Used for processing commands that read the subchannel-information block + * Reading this region triggers a stsch() to hardware + * Note: this is controlled by a capability + */ +struct ccw_schib_region { +#define SCHIB_AREA_SIZE 52 + __u8 schib_area[SCHIB_AREA_SIZE]; +} __attribute__((packed)); + +/* + * Used for returning a Channel Report Word to userspace. + * Note: this is controlled by a capability + */ +struct ccw_crw_region { + __u32 crw; + __u32 pad; +} __attribute__((packed)); + #endif diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h index 9fe72e4..0c23496 100644 --- a/linux-headers/linux/vhost.h +++ b/linux-headers/linux/vhost.h @@ -15,6 +15,8 @@ #include <linux/types.h> #include <linux/ioctl.h> +#define VHOST_FILE_UNBIND -1 + /* ioctls */ #define VHOST_VIRTIO 0xAF @@ -140,4 +142,6 @@ /* Get the max ring size. */ #define VHOST_VDPA_GET_VRING_NUM _IOR(VHOST_VIRTIO, 0x76, __u16) +/* Set event fd for config interrupt*/ +#define VHOST_VDPA_SET_CONFIG_CALL _IOW(VHOST_VIRTIO, 0x77, int) #endif diff --git a/target/s390x/ioinst.c b/target/s390x/ioinst.c index 7a14c52..a412926 100644 --- a/target/s390x/ioinst.c +++ b/target/s390x/ioinst.c @@ -292,8 +292,7 @@ void ioinst_handle_stsch(S390CPU *cpu, uint64_t reg1, uint32_t ipb, sch = css_find_subch(m, cssid, ssid, schid); if (sch) { if (css_subch_visible(sch)) { - css_do_stsch(sch, &schib); - cc = 0; + cc = css_do_stsch(sch, &schib); } else { /* Indicate no more subchannels in this css/ss */ cc = 3; |