From 8b645eea16fbbc6976f4d34b8e0db94ccbd0e61e Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Thu, 2 Feb 2023 11:19:44 +0000 Subject: [xen] Update to current Xen headers Signed-off-by: Michael Brown --- src/include/xen/arch-arm.h | 223 +++++--- src/include/xen/arch-x86/xen-x86_32.h | 86 ++-- src/include/xen/arch-x86/xen-x86_64.h | 116 +++-- src/include/xen/arch-x86/xen.h | 167 ++++-- src/include/xen/event_channel.h | 50 +- src/include/xen/features.h | 53 +- src/include/xen/grant_table.h | 83 +-- src/include/xen/hvm/hvm_op.h | 450 ++++++++-------- src/include/xen/hvm/params.h | 249 +++++++-- src/include/xen/io/netif.h | 942 +++++++++++++++++++++++++++++++--- src/include/xen/io/ring.h | 248 +++++++-- src/include/xen/io/xenbus.h | 19 +- src/include/xen/io/xs_wire.h | 55 +- src/include/xen/memory.h | 304 +++++++++-- src/include/xen/physdev.h | 368 +++++++++++++ src/include/xen/trace.h | 32 +- src/include/xen/version.h | 42 +- src/include/xen/xen-compat.h | 23 +- src/include/xen/xen.h | 265 +++++++--- 19 files changed, 2901 insertions(+), 874 deletions(-) create mode 100644 src/include/xen/physdev.h diff --git a/src/include/xen/arch-arm.h b/src/include/xen/arch-arm.h index ebc3aa2..54d19c6 100644 --- a/src/include/xen/arch-arm.h +++ b/src/include/xen/arch-arm.h @@ -1,26 +1,9 @@ +/* SPDX-License-Identifier: MIT */ /****************************************************************************** * arch-arm.h * * Guest OS interface to ARM Xen. * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * * Copyright 2011 (C) Citrix Systems */ @@ -63,15 +46,15 @@ FILE_LICENCE ( MIT ); * * All memory which is shared with other entities in the system * (including the hypervisor and other guests) must reside in memory - * which is mapped as Normal Inner-cacheable. This applies to: + * which is mapped as Normal Inner Write-Back Outer Write-Back Inner-Shareable. + * This applies to: * - hypercall arguments passed via a pointer to guest memory. * - memory shared via the grant table mechanism (including PV I/O * rings etc). * - memory shared with the hypervisor (struct shared_info, struct * vcpu_info, the grant table, etc). * - * Any Inner cache allocation strategy (Write-Back, Write-Through etc) - * is acceptable. There is no restriction on the Outer-cacheability. + * Any cache allocation hints are acceptable. */ /* @@ -89,15 +72,10 @@ FILE_LICENCE ( MIT ); * unavailable/unsupported. * * HYPERVISOR_memory_op - * All generic sub-operations. - * - * In addition the following arch specific sub-ops: - * * XENMEM_add_to_physmap - * * XENMEM_add_to_physmap_batch + * All generic sub-operations * * HYPERVISOR_domctl * All generic sub-operations, with the exception of: - * * XEN_DOMCTL_iomem_permission (not yet implemented) * * XEN_DOMCTL_irq_permission (not yet implemented) * * HYPERVISOR_sched_op @@ -114,7 +92,9 @@ FILE_LICENCE ( MIT ); * All generic sub-operations * * HYPERVISOR_physdev_op - * No sub-operations are currenty supported + * Exactly these sub-operations are supported: + * PHYSDEVOP_pci_device_add + * PHYSDEVOP_pci_device_remove * * HYPERVISOR_sysctl * All generic sub-operations, with the exception of: @@ -135,6 +115,8 @@ FILE_LICENCE ( MIT ); * * VCPUOP_register_vcpu_info * * VCPUOP_register_runstate_memory_area * + * HYPERVISOR_argo_op + * All generic sub-operations * * Other notes on the ARM ABI: * @@ -172,6 +154,7 @@ FILE_LICENCE ( MIT ); #define XEN_HYPERCALL_TAG 0XEA1 +#define int64_aligned_t int64_t __attribute__((aligned(8))) #define uint64_aligned_t uint64_t __attribute__((aligned(8))) #ifndef __ASSEMBLY__ @@ -179,14 +162,14 @@ FILE_LICENCE ( MIT ); typedef union { type *p; unsigned long q; } \ __guest_handle_ ## name; \ typedef union { type *p; uint64_aligned_t q; } \ - __guest_handle_64_ ## name; + __guest_handle_64_ ## name /* * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field * in a struct in memory. On ARM is always 8 bytes sizes and 8 bytes * aligned. - * XEN_GUEST_HANDLE_PARAM represent a guest pointer, when passed as an - * hypercall argument. It is 4 bytes on aarch and 8 bytes on aarch64. + * XEN_GUEST_HANDLE_PARAM represents a guest pointer, when passed as an + * hypercall argument. It is 4 bytes on aarch32 and 8 bytes on aarch64. */ #define __DEFINE_XEN_GUEST_HANDLE(name, type) \ ___DEFINE_XEN_GUEST_HANDLE(name, type); \ @@ -194,19 +177,29 @@ FILE_LICENCE ( MIT ); #define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) #define __XEN_GUEST_HANDLE(name) __guest_handle_64_ ## name #define XEN_GUEST_HANDLE(name) __XEN_GUEST_HANDLE(name) -/* this is going to be changed on 64 bit */ #define XEN_GUEST_HANDLE_PARAM(name) __guest_handle_ ## name #define set_xen_guest_handle_raw(hnd, val) \ do { \ - typeof(&(hnd)) _sxghr_tmp = &(hnd); \ + __typeof__(&(hnd)) _sxghr_tmp = &(hnd); \ _sxghr_tmp->q = 0; \ _sxghr_tmp->p = val; \ } while ( 0 ) -#ifdef __XEN_TOOLS__ -#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) -#endif #define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val) +typedef uint64_t xen_pfn_t; +#define PRI_xen_pfn PRIx64 +#define PRIu_xen_pfn PRIu64 + +/* + * Maximum number of virtual CPUs in legacy multi-processor guests. + * Only one. All other VCPUS must use VCPUOP_register_vcpu_info. + */ +#define XEN_LEGACY_MAX_VCPUS 1 + +typedef uint64_t xen_ulong_t; +#define PRI_xen_ulong PRIx64 + +#if defined(__XEN__) || defined(__XEN_TOOLS__) #if defined(__GNUC__) && !defined(__STRICT_ANSI__) /* Anonymous union includes both 32- and 64-bit names (e.g., r0/x0). */ # define __DECL_REG(n64, n32) union { \ @@ -263,10 +256,10 @@ struct vcpu_guest_core_regs /* Return address and mode */ __DECL_REG(pc64, pc32); /* ELR_EL2 */ - uint32_t cpsr; /* SPSR_EL2 */ + uint64_t cpsr; /* SPSR_EL2 */ union { - uint32_t spsr_el1; /* AArch64 */ + uint64_t spsr_el1; /* AArch64 */ uint32_t spsr_svc; /* AArch32 */ }; @@ -282,17 +275,6 @@ DEFINE_XEN_GUEST_HANDLE(vcpu_guest_core_regs_t); #undef __DECL_REG -typedef uint64_t xen_pfn_t; -#define PRI_xen_pfn PRIx64 - -/* Maximum number of virtual CPUs in legacy multi-processor guests. */ -/* Only one. All other VCPUS must use VCPUOP_register_vcpu_info */ -#define XEN_LEGACY_MAX_VCPUS 1 - -typedef uint64_t xen_ulong_t; -#define PRI_xen_ulong PRIx64 - -#if defined(__XEN__) || defined(__XEN_TOOLS__) struct vcpu_guest_context { #define _VGCF_online 0 #define VGCF_online (1<<_VGCF_online) @@ -300,12 +282,46 @@ struct vcpu_guest_context { struct vcpu_guest_core_regs user_regs; /* Core CPU registers */ - uint32_t sctlr; + uint64_t sctlr; uint64_t ttbcr, ttbr0, ttbr1; }; typedef struct vcpu_guest_context vcpu_guest_context_t; DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); -#endif + +/* + * struct xen_arch_domainconfig's ABI is covered by + * XEN_DOMCTL_INTERFACE_VERSION. + */ +#define XEN_DOMCTL_CONFIG_GIC_NATIVE 0 +#define XEN_DOMCTL_CONFIG_GIC_V2 1 +#define XEN_DOMCTL_CONFIG_GIC_V3 2 + +#define XEN_DOMCTL_CONFIG_TEE_NONE 0 +#define XEN_DOMCTL_CONFIG_TEE_OPTEE 1 + +struct xen_arch_domainconfig { + /* IN/OUT */ + uint8_t gic_version; + /* IN */ + uint16_t tee_type; + /* IN */ + uint32_t nr_spis; + /* + * OUT + * Based on the property clock-frequency in the DT timer node. + * The property may be present when the bootloader/firmware doesn't + * set correctly CNTFRQ which hold the timer frequency. + * + * As it's not possible to trap this register, we have to replicate + * the value in the guest DT. + * + * = 0 => property not present + * > 0 => Value of the property + * + */ + uint32_t clock_frequency; +}; +#endif /* __XEN__ || __XEN_TOOLS__ */ struct arch_vcpu_info { }; @@ -320,7 +336,7 @@ typedef uint64_t xen_callback_t; #if defined(__XEN__) || defined(__XEN_TOOLS__) -/* PSR bits (CPSR, SPSR)*/ +/* PSR bits (CPSR, SPSR) */ #define PSR_THUMB (1<<5) /* Thumb Mode enable */ #define PSR_FIQ_MASK (1<<6) /* Fast Interrupt mask */ @@ -330,6 +346,7 @@ typedef uint64_t xen_callback_t; #define PSR_DBG_MASK (1<<9) /* arm64: Debug Exception mask */ #define PSR_IT_MASK (0x0600fc00) /* Thumb If-Then Mask */ #define PSR_JAZELLE (1<<24) /* Jazelle Mode */ +#define PSR_Z (1<<30) /* Zero condition flag */ /* 32 bit modes */ #define PSR_MODE_USR 0x10 @@ -352,10 +369,18 @@ typedef uint64_t xen_callback_t; #define PSR_MODE_EL1t 0x04 #define PSR_MODE_EL0t 0x00 -#define PSR_GUEST32_INIT (PSR_ABT_MASK|PSR_FIQ_MASK|PSR_IRQ_MASK|PSR_MODE_SVC) +/* + * We set PSR_Z to be able to boot Linux kernel versions with an invalid + * encoding of the first 8 NOP instructions. See commit a92882a4d270 in + * Linux. + * + * Note that PSR_Z is also set by U-Boot and QEMU -kernel when loading + * zImage kernels on aarch32. + */ +#define PSR_GUEST32_INIT (PSR_Z|PSR_ABT_MASK|PSR_FIQ_MASK|PSR_IRQ_MASK|PSR_MODE_SVC) #define PSR_GUEST64_INIT (PSR_ABT_MASK|PSR_FIQ_MASK|PSR_IRQ_MASK|PSR_MODE_EL1h) -#define SCTLR_GUEST_INIT 0x00c50078 +#define SCTLR_GUEST_INIT xen_mk_ullong(0x00c50078) /* * Virtual machine platform (memory layout, interrupts) @@ -366,27 +391,78 @@ typedef uint64_t xen_callback_t; */ /* Physical Address Space */ -#define GUEST_GICD_BASE 0x03001000ULL -#define GUEST_GICD_SIZE 0x00001000ULL -#define GUEST_GICC_BASE 0x03002000ULL -#define GUEST_GICC_SIZE 0x00000100ULL -/* 16MB == 4096 pages reserved for guest to use as a region to map its +/* Virtio MMIO mappings */ +#define GUEST_VIRTIO_MMIO_BASE xen_mk_ullong(0x02000000) +#define GUEST_VIRTIO_MMIO_SIZE xen_mk_ullong(0x00100000) + +/* + * vGIC mappings: Only one set of mapping is used by the guest. + * Therefore they can overlap. + */ + +/* vGIC v2 mappings */ +#define GUEST_GICD_BASE xen_mk_ullong(0x03001000) +#define GUEST_GICD_SIZE xen_mk_ullong(0x00001000) +#define GUEST_GICC_BASE xen_mk_ullong(0x03002000) +#define GUEST_GICC_SIZE xen_mk_ullong(0x00002000) + +/* vGIC v3 mappings */ +#define GUEST_GICV3_GICD_BASE xen_mk_ullong(0x03001000) +#define GUEST_GICV3_GICD_SIZE xen_mk_ullong(0x00010000) + +#define GUEST_GICV3_RDIST_REGIONS 1 + +#define GUEST_GICV3_GICR0_BASE xen_mk_ullong(0x03020000) /* vCPU0..127 */ +#define GUEST_GICV3_GICR0_SIZE xen_mk_ullong(0x01000000) + +/* + * 256 MB is reserved for VPCI configuration space based on calculation + * 256 buses x 32 devices x 8 functions x 4 KB = 256 MB + */ +#define GUEST_VPCI_ECAM_BASE xen_mk_ullong(0x10000000) +#define GUEST_VPCI_ECAM_SIZE xen_mk_ullong(0x10000000) + +/* ACPI tables physical address */ +#define GUEST_ACPI_BASE xen_mk_ullong(0x20000000) +#define GUEST_ACPI_SIZE xen_mk_ullong(0x02000000) + +/* PL011 mappings */ +#define GUEST_PL011_BASE xen_mk_ullong(0x22000000) +#define GUEST_PL011_SIZE xen_mk_ullong(0x00001000) + +/* Guest PCI-PCIe memory space where config space and BAR will be available.*/ +#define GUEST_VPCI_ADDR_TYPE_MEM xen_mk_ullong(0x02000000) +#define GUEST_VPCI_MEM_ADDR xen_mk_ullong(0x23000000) +#define GUEST_VPCI_MEM_SIZE xen_mk_ullong(0x10000000) + +/* + * 16MB == 4096 pages reserved for guest to use as a region to map its * grant table in. */ -#define GUEST_GNTTAB_BASE 0x38000000ULL -#define GUEST_GNTTAB_SIZE 0x01000000ULL +#define GUEST_GNTTAB_BASE xen_mk_ullong(0x38000000) +#define GUEST_GNTTAB_SIZE xen_mk_ullong(0x01000000) -#define GUEST_MAGIC_BASE 0x39000000ULL -#define GUEST_MAGIC_SIZE 0x01000000ULL +#define GUEST_MAGIC_BASE xen_mk_ullong(0x39000000) +#define GUEST_MAGIC_SIZE xen_mk_ullong(0x01000000) #define GUEST_RAM_BANKS 2 -#define GUEST_RAM0_BASE 0x40000000ULL /* 3GB of low RAM @ 1GB */ -#define GUEST_RAM0_SIZE 0xc0000000ULL +/* + * The way to find the extended regions (to be exposed to the guest as unused + * address space) relies on the fact that the regions reserved for the RAM + * below are big enough to also accommodate such regions. + */ +#define GUEST_RAM0_BASE xen_mk_ullong(0x40000000) /* 3GB of low RAM @ 1GB */ +#define GUEST_RAM0_SIZE xen_mk_ullong(0xc0000000) -#define GUEST_RAM1_BASE 0x0200000000ULL /* 1016GB of RAM @ 8GB */ -#define GUEST_RAM1_SIZE 0xfe00000000ULL +/* 4GB @ 4GB Prefetch Memory for VPCI */ +#define GUEST_VPCI_ADDR_TYPE_PREFETCH_MEM xen_mk_ullong(0x42000000) +#define GUEST_VPCI_PREFETCH_MEM_ADDR xen_mk_ullong(0x100000000) +#define GUEST_VPCI_PREFETCH_MEM_SIZE xen_mk_ullong(0x100000000) + +#define GUEST_RAM1_BASE xen_mk_ullong(0x0200000000) /* 1016GB of RAM @ 8GB */ +#define GUEST_RAM1_SIZE xen_mk_ullong(0xfe00000000) #define GUEST_RAM_BASE GUEST_RAM0_BASE /* Lowest RAM address */ /* Largest amount of actual RAM, not including holes */ @@ -395,12 +471,20 @@ typedef uint64_t xen_callback_t; #define GUEST_RAM_BANK_BASES { GUEST_RAM0_BASE, GUEST_RAM1_BASE } #define GUEST_RAM_BANK_SIZES { GUEST_RAM0_SIZE, GUEST_RAM1_SIZE } +/* Current supported guest VCPUs */ +#define GUEST_MAX_VCPUS 128 + /* Interrupts */ #define GUEST_TIMER_VIRT_PPI 27 #define GUEST_TIMER_PHYS_S_PPI 29 #define GUEST_TIMER_PHYS_NS_PPI 30 #define GUEST_EVTCHN_PPI 31 +#define GUEST_VPL011_SPI 32 + +#define GUEST_VIRTIO_MMIO_SPI_FIRST 33 +#define GUEST_VIRTIO_MMIO_SPI_LAST 43 + /* PSCI functions */ #define PSCI_cpu_suspend 0 #define PSCI_cpu_off 1 @@ -409,6 +493,11 @@ typedef uint64_t xen_callback_t; #endif +#ifndef __ASSEMBLY__ +/* Stub definition of PMU structure */ +typedef struct xen_pmu_arch { uint8_t dummy; } xen_pmu_arch_t; +#endif + #endif /* __XEN_PUBLIC_ARCH_ARM_H__ */ /* diff --git a/src/include/xen/arch-x86/xen-x86_32.h b/src/include/xen/arch-x86/xen-x86_32.h index 96c8f48..cc0b324 100644 --- a/src/include/xen/arch-x86/xen-x86_32.h +++ b/src/include/xen/arch-x86/xen-x86_32.h @@ -1,26 +1,9 @@ +/* SPDX-License-Identifier: MIT */ /****************************************************************************** * xen-x86_32.h * * Guest OS interface to x86 32-bit Xen. * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * * Copyright (c) 2004-2007, K A Fraser */ @@ -60,34 +43,31 @@ FILE_LICENCE ( MIT ); #define __HYPERVISOR_VIRT_START_PAE 0xF5800000 #define __MACH2PHYS_VIRT_START_PAE 0xF5800000 #define __MACH2PHYS_VIRT_END_PAE 0xF6800000 -#define HYPERVISOR_VIRT_START_PAE \ - mk_unsigned_long(__HYPERVISOR_VIRT_START_PAE) -#define MACH2PHYS_VIRT_START_PAE \ - mk_unsigned_long(__MACH2PHYS_VIRT_START_PAE) -#define MACH2PHYS_VIRT_END_PAE \ - mk_unsigned_long(__MACH2PHYS_VIRT_END_PAE) +#define HYPERVISOR_VIRT_START_PAE xen_mk_ulong(__HYPERVISOR_VIRT_START_PAE) +#define MACH2PHYS_VIRT_START_PAE xen_mk_ulong(__MACH2PHYS_VIRT_START_PAE) +#define MACH2PHYS_VIRT_END_PAE xen_mk_ulong(__MACH2PHYS_VIRT_END_PAE) /* Non-PAE bounds are obsolete. */ #define __HYPERVISOR_VIRT_START_NONPAE 0xFC000000 #define __MACH2PHYS_VIRT_START_NONPAE 0xFC000000 #define __MACH2PHYS_VIRT_END_NONPAE 0xFC400000 #define HYPERVISOR_VIRT_START_NONPAE \ - mk_unsigned_long(__HYPERVISOR_VIRT_START_NONPAE) + xen_mk_ulong(__HYPERVISOR_VIRT_START_NONPAE) #define MACH2PHYS_VIRT_START_NONPAE \ - mk_unsigned_long(__MACH2PHYS_VIRT_START_NONPAE) + xen_mk_ulong(__MACH2PHYS_VIRT_START_NONPAE) #define MACH2PHYS_VIRT_END_NONPAE \ - mk_unsigned_long(__MACH2PHYS_VIRT_END_NONPAE) + xen_mk_ulong(__MACH2PHYS_VIRT_END_NONPAE) #define __HYPERVISOR_VIRT_START __HYPERVISOR_VIRT_START_PAE #define __MACH2PHYS_VIRT_START __MACH2PHYS_VIRT_START_PAE #define __MACH2PHYS_VIRT_END __MACH2PHYS_VIRT_END_PAE #ifndef HYPERVISOR_VIRT_START -#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) +#define HYPERVISOR_VIRT_START xen_mk_ulong(__HYPERVISOR_VIRT_START) #endif -#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) -#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_VIRT_START xen_mk_ulong(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END xen_mk_ulong(__MACH2PHYS_VIRT_END) #define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>2) #ifndef machine_to_phys_mapping #define machine_to_phys_mapping ((unsigned long *)MACH2PHYS_VIRT_START) @@ -106,6 +86,7 @@ FILE_LICENCE ( MIT ); do { if ( sizeof(hnd) == 8 ) *(uint64_t *)&(hnd) = 0; \ (hnd).p = val; \ } while ( 0 ) +#define int64_aligned_t int64_t __attribute__((aligned(8))) #define uint64_aligned_t uint64_t __attribute__((aligned(8))) #define __XEN_GUEST_HANDLE_64(name) __guest_handle_64_ ## name #define XEN_GUEST_HANDLE_64(name) __XEN_GUEST_HANDLE_64(name) @@ -113,22 +94,44 @@ FILE_LICENCE ( MIT ); #ifndef __ASSEMBLY__ +#if defined(XEN_GENERATING_COMPAT_HEADERS) +/* nothing */ +#elif defined(__XEN__) || defined(__XEN_TOOLS__) +/* Anonymous unions include all permissible names (e.g., al/ah/ax/eax). */ +#define __DECL_REG_LO8(which) union { \ + uint32_t e ## which ## x; \ + uint16_t which ## x; \ + struct { \ + uint8_t which ## l; \ + uint8_t which ## h; \ + }; \ +} +#define __DECL_REG_LO16(name) union { \ + uint32_t e ## name, _e ## name; \ + uint16_t name; \ +} +#else +/* Other sources must always use the proper 32-bit name (e.g., eax). */ +#define __DECL_REG_LO8(which) uint32_t e ## which ## x +#define __DECL_REG_LO16(name) uint32_t e ## name +#endif + struct cpu_user_regs { - uint32_t ebx; - uint32_t ecx; - uint32_t edx; - uint32_t esi; - uint32_t edi; - uint32_t ebp; - uint32_t eax; + __DECL_REG_LO8(b); + __DECL_REG_LO8(c); + __DECL_REG_LO8(d); + __DECL_REG_LO16(si); + __DECL_REG_LO16(di); + __DECL_REG_LO16(bp); + __DECL_REG_LO8(a); uint16_t error_code; /* private */ uint16_t entry_vector; /* private */ - uint32_t eip; + __DECL_REG_LO16(ip); uint16_t cs; uint8_t saved_upcall_mask; uint8_t _pad0; - uint32_t eflags; /* eflags.IF == !saved_upcall_mask */ - uint32_t esp; + __DECL_REG_LO16(flags); /* eflags.IF == !saved_upcall_mask */ + __DECL_REG_LO16(sp); uint16_t ss, _pad1; uint16_t es, _pad2; uint16_t ds, _pad3; @@ -138,6 +141,9 @@ struct cpu_user_regs { typedef struct cpu_user_regs cpu_user_regs_t; DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); +#undef __DECL_REG_LO8 +#undef __DECL_REG_LO16 + /* * Page-directory addresses above 4GB do not fit into architectural %cr3. * When accessing %cr3, or equivalent field in vcpu_guest_context, guests diff --git a/src/include/xen/arch-x86/xen-x86_64.h b/src/include/xen/arch-x86/xen-x86_64.h index 0e92702..8287fd2 100644 --- a/src/include/xen/arch-x86/xen-x86_64.h +++ b/src/include/xen/arch-x86/xen-x86_64.h @@ -1,26 +1,9 @@ +/* SPDX-License-Identifier: MIT */ /****************************************************************************** * xen-x86_64.h * * Guest OS interface to x86 64-bit Xen. * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * * Copyright (c) 2004-2006, K A Fraser */ @@ -46,11 +29,11 @@ FILE_LICENCE ( MIT ); */ #define FLAT_RING3_CS32 0xe023 /* GDT index 260 */ -#define FLAT_RING3_CS64 0xe033 /* GDT index 261 */ -#define FLAT_RING3_DS32 0xe02b /* GDT index 262 */ +#define FLAT_RING3_CS64 0xe033 /* GDT index 262 */ +#define FLAT_RING3_DS32 0xe02b /* GDT index 261 */ #define FLAT_RING3_DS64 0x0000 /* NULL selector */ -#define FLAT_RING3_SS32 0xe02b /* GDT index 262 */ -#define FLAT_RING3_SS64 0xe02b /* GDT index 262 */ +#define FLAT_RING3_SS32 0xe02b /* GDT index 261 */ +#define FLAT_RING3_SS64 0xe02b /* GDT index 261 */ #define FLAT_KERNEL_DS64 FLAT_RING3_DS64 #define FLAT_KERNEL_DS32 FLAT_RING3_DS32 @@ -78,12 +61,12 @@ FILE_LICENCE ( MIT ); #define __MACH2PHYS_VIRT_END 0xFFFF804000000000 #ifndef HYPERVISOR_VIRT_START -#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) -#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) +#define HYPERVISOR_VIRT_START xen_mk_ulong(__HYPERVISOR_VIRT_START) +#define HYPERVISOR_VIRT_END xen_mk_ulong(__HYPERVISOR_VIRT_END) #endif -#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) -#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_VIRT_START xen_mk_ulong(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END xen_mk_ulong(__MACH2PHYS_VIRT_END) #define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) #ifndef machine_to_phys_mapping #define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) @@ -132,7 +115,35 @@ struct iret_context { /* Bottom of iret stack frame. */ }; -#if defined(__GNUC__) && !defined(__STRICT_ANSI__) +#if defined(__XEN__) || defined(__XEN_TOOLS__) +/* Anonymous unions include all permissible names (e.g., al/ah/ax/eax/rax). */ +#define __DECL_REG_LOHI(which) union { \ + uint64_t r ## which ## x; \ + uint32_t e ## which ## x; \ + uint16_t which ## x; \ + struct { \ + uint8_t which ## l; \ + uint8_t which ## h; \ + }; \ +} +#define __DECL_REG_LO8(name) union { \ + uint64_t r ## name; \ + uint32_t e ## name; \ + uint16_t name; \ + uint8_t name ## l; \ +} +#define __DECL_REG_LO16(name) union { \ + uint64_t r ## name; \ + uint32_t e ## name; \ + uint16_t name; \ +} +#define __DECL_REG_HI(num) union { \ + uint64_t r ## num; \ + uint32_t r ## num ## d; \ + uint16_t r ## num ## w; \ + uint8_t r ## num ## b; \ +} +#elif defined(__GNUC__) && !defined(__STRICT_ANSI__) /* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */ #define __DECL_REG(name) union { \ uint64_t r ## name, e ## name; \ @@ -143,40 +154,51 @@ struct iret_context { #define __DECL_REG(name) uint64_t r ## name #endif +#ifndef __DECL_REG_LOHI +#define __DECL_REG_LOHI(name) __DECL_REG(name ## x) +#define __DECL_REG_LO8 __DECL_REG +#define __DECL_REG_LO16 __DECL_REG +#define __DECL_REG_HI(num) uint64_t r ## num +#endif + struct cpu_user_regs { - uint64_t r15; - uint64_t r14; - uint64_t r13; - uint64_t r12; - __DECL_REG(bp); - __DECL_REG(bx); - uint64_t r11; - uint64_t r10; - uint64_t r9; - uint64_t r8; - __DECL_REG(ax); - __DECL_REG(cx); - __DECL_REG(dx); - __DECL_REG(si); - __DECL_REG(di); + __DECL_REG_HI(15); + __DECL_REG_HI(14); + __DECL_REG_HI(13); + __DECL_REG_HI(12); + __DECL_REG_LO8(bp); + __DECL_REG_LOHI(b); + __DECL_REG_HI(11); + __DECL_REG_HI(10); + __DECL_REG_HI(9); + __DECL_REG_HI(8); + __DECL_REG_LOHI(a); + __DECL_REG_LOHI(c); + __DECL_REG_LOHI(d); + __DECL_REG_LO8(si); + __DECL_REG_LO8(di); uint32_t error_code; /* private */ uint32_t entry_vector; /* private */ - __DECL_REG(ip); + __DECL_REG_LO16(ip); uint16_t cs, _pad0[1]; uint8_t saved_upcall_mask; uint8_t _pad1[3]; - __DECL_REG(flags); /* rflags.IF == !saved_upcall_mask */ - __DECL_REG(sp); + __DECL_REG_LO16(flags); /* rflags.IF == !saved_upcall_mask */ + __DECL_REG_LO8(sp); uint16_t ss, _pad2[3]; uint16_t es, _pad3[3]; uint16_t ds, _pad4[3]; - uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base. */ - uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */ + uint16_t fs, _pad5[3]; + uint16_t gs, _pad6[3]; }; typedef struct cpu_user_regs cpu_user_regs_t; DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); #undef __DECL_REG +#undef __DECL_REG_LOHI +#undef __DECL_REG_LO8 +#undef __DECL_REG_LO16 +#undef __DECL_REG_HI #define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12) #define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12) diff --git a/src/include/xen/arch-x86/xen.h b/src/include/xen/arch-x86/xen.h index d75528f..2b7afb2 100644 --- a/src/include/xen/arch-x86/xen.h +++ b/src/include/xen/arch-x86/xen.h @@ -1,26 +1,9 @@ +/* SPDX-License-Identifier: MIT */ /****************************************************************************** * arch-x86/xen.h * * Guest OS interface to x86 Xen. * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * * Copyright (c) 2004-2006, K A Fraser */ @@ -56,13 +39,20 @@ FILE_LICENCE ( MIT ); #define XEN_GUEST_HANDLE(name) __XEN_GUEST_HANDLE(name) #define XEN_GUEST_HANDLE_PARAM(name) XEN_GUEST_HANDLE(name) #define set_xen_guest_handle_raw(hnd, val) do { (hnd).p = val; } while (0) -#ifdef __XEN_TOOLS__ -#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) -#endif #define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val) #if defined(__i386__) +# ifdef __XEN__ +__DeFiNe__ __DECL_REG_LO8(which) uint32_t e ## which ## x +__DeFiNe__ __DECL_REG_LO16(name) union { uint32_t e ## name; } +# endif #include "xen-x86_32.h" +# ifdef __XEN__ +__UnDeF__ __DECL_REG_LO8 +__UnDeF__ __DECL_REG_LO16 +__DeFiNe__ __DECL_REG_LO8(which) e ## which ## x +__DeFiNe__ __DECL_REG_LO16(name) e ## name +# endif #elif defined(__x86_64__) #include "xen-x86_64.h" #endif @@ -70,6 +60,7 @@ FILE_LICENCE ( MIT ); #ifndef __ASSEMBLY__ typedef unsigned long xen_pfn_t; #define PRI_xen_pfn "lx" +#define PRIu_xen_pfn "lu" #endif #define XEN_HAVE_PV_GUEST_ENTRY 1 @@ -137,6 +128,12 @@ typedef unsigned long xen_ulong_t; * Level == 1: Kernel may enter * Level == 2: Kernel may enter * Level == 3: Everyone may enter + * + * Note: For compatibility with kernels not setting up exception handlers + * early enough, Xen will avoid trying to inject #GP (and hence crash + * the domain) when an RDMSR would require this, but no handler was + * set yet. The precise conditions are implementation specific, and + * new code may not rely on such behavior anyway. */ #define TI_GET_DPL(_ti) ((_ti)->flags & 3) #define TI_GET_IF(_ti) ((_ti)->flags & 4) @@ -157,14 +154,12 @@ typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ * The following is all CPU context. Note that the fpu_ctxt block is filled * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. * - * Also note that when calling DOMCTL_setvcpucontext and VCPU_initialise - * for HVM and PVH guests, not all information in this structure is updated: - * - * - For HVM guests, the structures read include: fpu_ctxt (if - * VGCT_I387_VALID is set), flags, user_regs, debugreg[*] + * Also note that when calling DOMCTL_setvcpucontext for HVM guests, not all + * information in this structure is updated, the fields read include: fpu_ctxt + * (if VGCT_I387_VALID is set), flags, user_regs and debugreg[*]. * - * - PVH guests are the same as HVM guests, but additionally use ctrlreg[3] to - * set cr3. All other fields not used should be set to 0. + * Note: VCPUOP_initialise for HVM guests is non-symetric with + * DOMCTL_setvcpucontext, and uses struct vcpu_hvm_context from hvm/hvm_vcpu.h */ struct vcpu_guest_context { /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ @@ -222,14 +217,117 @@ typedef struct vcpu_guest_context vcpu_guest_context_t; DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); struct arch_shared_info { - unsigned long max_pfn; /* max pfn that appears in table */ - /* Frame containing list of mfns containing list of mfns containing p2m. */ + /* + * Number of valid entries in the p2m table(s) anchored at + * pfn_to_mfn_frame_list_list and/or p2m_vaddr. + */ + unsigned long max_pfn; + /* + * Frame containing list of mfns containing list of mfns containing p2m. + * A value of 0 indicates it has not yet been set up, ~0 indicates it has + * been set to invalid e.g. due to the p2m being too large for the 3-level + * p2m tree. In this case the linear mapper p2m list anchored at p2m_vaddr + * is to be used. + */ xen_pfn_t pfn_to_mfn_frame_list_list; unsigned long nmi_reason; - uint64_t pad[32]; + /* + * Following three fields are valid if p2m_cr3 contains a value different + * from 0. + * p2m_cr3 is the root of the address space where p2m_vaddr is valid. + * p2m_cr3 is in the same format as a cr3 value in the vcpu register state + * and holds the folded machine frame number (via xen_pfn_to_cr3) of a + * L3 or L4 page table. + * p2m_vaddr holds the virtual address of the linear p2m list. All entries + * in the range [0...max_pfn[ are accessible via this pointer. + * p2m_generation will be incremented by the guest before and after each + * change of the mappings of the p2m list. p2m_generation starts at 0 and + * a value with the least significant bit set indicates that a mapping + * update is in progress. This allows guest external software (e.g. in Dom0) + * to verify that read mappings are consistent and whether they have changed + * since the last check. + * Modifying a p2m element in the linear p2m list is allowed via an atomic + * write only. + */ + unsigned long p2m_cr3; /* cr3 value of the p2m address space */ + unsigned long p2m_vaddr; /* virtual address of the p2m list */ + unsigned long p2m_generation; /* generation count of p2m mapping */ +#ifdef __i386__ + /* There's no room for this field in the generic structure. */ + uint32_t wc_sec_hi; +#endif }; typedef struct arch_shared_info arch_shared_info_t; +#if defined(__XEN__) || defined(__XEN_TOOLS__) +/* + * struct xen_arch_domainconfig's ABI is covered by + * XEN_DOMCTL_INTERFACE_VERSION. + */ +struct xen_arch_domainconfig { +#define _XEN_X86_EMU_LAPIC 0 +#define XEN_X86_EMU_LAPIC (1U<<_XEN_X86_EMU_LAPIC) +#define _XEN_X86_EMU_HPET 1 +#define XEN_X86_EMU_HPET (1U<<_XEN_X86_EMU_HPET) +#define _XEN_X86_EMU_PM 2 +#define XEN_X86_EMU_PM (1U<<_XEN_X86_EMU_PM) +#define _XEN_X86_EMU_RTC 3 +#define XEN_X86_EMU_RTC (1U<<_XEN_X86_EMU_RTC) +#define _XEN_X86_EMU_IOAPIC 4 +#define XEN_X86_EMU_IOAPIC (1U<<_XEN_X86_EMU_IOAPIC) +#define _XEN_X86_EMU_PIC 5 +#define XEN_X86_EMU_PIC (1U<<_XEN_X86_EMU_PIC) +#define _XEN_X86_EMU_VGA 6 +#define XEN_X86_EMU_VGA (1U<<_XEN_X86_EMU_VGA) +#define _XEN_X86_EMU_IOMMU 7 +#define XEN_X86_EMU_IOMMU (1U<<_XEN_X86_EMU_IOMMU) +#define _XEN_X86_EMU_PIT 8 +#define XEN_X86_EMU_PIT (1U<<_XEN_X86_EMU_PIT) +#define _XEN_X86_EMU_USE_PIRQ 9 +#define XEN_X86_EMU_USE_PIRQ (1U<<_XEN_X86_EMU_USE_PIRQ) +#define _XEN_X86_EMU_VPCI 10 +#define XEN_X86_EMU_VPCI (1U<<_XEN_X86_EMU_VPCI) + +#define XEN_X86_EMU_ALL (XEN_X86_EMU_LAPIC | XEN_X86_EMU_HPET | \ + XEN_X86_EMU_PM | XEN_X86_EMU_RTC | \ + XEN_X86_EMU_IOAPIC | XEN_X86_EMU_PIC | \ + XEN_X86_EMU_VGA | XEN_X86_EMU_IOMMU | \ + XEN_X86_EMU_PIT | XEN_X86_EMU_USE_PIRQ |\ + XEN_X86_EMU_VPCI) + uint32_t emulation_flags; + +/* + * Select whether to use a relaxed behavior for accesses to MSRs not explicitly + * handled by Xen instead of injecting a #GP to the guest. Note this option + * doesn't allow the guest to read or write to the underlying MSR. + */ +#define XEN_X86_MSR_RELAXED (1u << 0) + uint32_t misc_flags; +}; + +/* Max XEN_X86_* constant. Used for ABI checking. */ +#define XEN_X86_MISC_FLAGS_MAX XEN_X86_MSR_RELAXED + +#endif + +/* + * Representations of architectural CPUID and MSR information. Used as the + * serialised version of Xen's internal representation. + */ +typedef struct xen_cpuid_leaf { +#define XEN_CPUID_NO_SUBLEAF 0xffffffffu + uint32_t leaf, subleaf; + uint32_t a, b, c, d; +} xen_cpuid_leaf_t; +DEFINE_XEN_GUEST_HANDLE(xen_cpuid_leaf_t); + +typedef struct xen_msr_entry { + uint32_t idx; + uint32_t flags; /* Reserved MBZ. */ + uint64_t val; +} xen_msr_entry_t; +DEFINE_XEN_GUEST_HANDLE(xen_msr_entry_t); + #endif /* !__ASSEMBLY__ */ /* @@ -262,6 +360,13 @@ typedef struct arch_shared_info arch_shared_info_t; #define XEN_CPUID XEN_EMULATE_PREFIX "cpuid" #endif +/* + * Debug console IO port, also called "port E9 hack". Each character written + * to this IO port will be printed on the hypervisor console, subject to log + * level restrictions. + */ +#define XEN_HVM_DEBUGCONS_IOPORT 0xe9 + #endif /* __XEN_PUBLIC_ARCH_X86_XEN_H__ */ /* diff --git a/src/include/xen/event_channel.h b/src/include/xen/event_channel.h index 356e946..0c37527 100644 --- a/src/include/xen/event_channel.h +++ b/src/include/xen/event_channel.h @@ -1,26 +1,9 @@ +/* SPDX-License-Identifier: MIT */ /****************************************************************************** * event_channel.h * * Event channels between domains. * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * * Copyright (c) 2003-2004, K A Fraser. */ @@ -76,6 +59,9 @@ FILE_LICENCE ( MIT ); #define EVTCHNOP_init_control 11 #define EVTCHNOP_expand_array 12 #define EVTCHNOP_set_priority 13 +#ifdef __XEN__ +#define EVTCHNOP_reset_cont 14 +#endif /* ` } */ typedef uint32_t evtchn_port_t; @@ -87,7 +73,7 @@ DEFINE_XEN_GUEST_HANDLE(evtchn_port_t); * is allocated in and returned as . * NOTES: * 1. If the caller is unprivileged then must be DOMID_SELF. - * 2. may be DOMID_SELF, allowing loopback connections. + * 2. may be DOMID_SELF, allowing loopback connections. */ struct evtchn_alloc_unbound { /* IN parameters */ @@ -266,6 +252,10 @@ typedef struct evtchn_unmask evtchn_unmask_t; * NOTES: * 1. may be specified as DOMID_SELF. * 2. Only a sufficiently-privileged domain may specify other than DOMID_SELF. + * 3. Destroys all control blocks and event array, resets event channel + * operations to 2-level ABI if called with == DOMID_SELF and FIFO + * ABI was used. Guests should not bind events during EVTCHNOP_reset call + * as these events are likely to be lost. */ struct evtchn_reset { /* IN parameters. */ @@ -305,7 +295,7 @@ typedef struct evtchn_expand_array evtchn_expand_array_t; */ struct evtchn_set_priority { /* IN parameters. */ - uint32_t port; + evtchn_port_t port; uint32_t priority; }; typedef struct evtchn_set_priority evtchn_set_priority_t; @@ -319,16 +309,16 @@ typedef struct evtchn_set_priority evtchn_set_priority_t; struct evtchn_op { uint32_t cmd; /* enum event_channel_op */ union { - struct evtchn_alloc_unbound alloc_unbound; - struct evtchn_bind_interdomain bind_interdomain; - struct evtchn_bind_virq bind_virq; - struct evtchn_bind_pirq bind_pirq; - struct evtchn_bind_ipi bind_ipi; - struct evtchn_close close; - struct evtchn_send send; - struct evtchn_status status; - struct evtchn_bind_vcpu bind_vcpu; - struct evtchn_unmask unmask; + evtchn_alloc_unbound_t alloc_unbound; + evtchn_bind_interdomain_t bind_interdomain; + evtchn_bind_virq_t bind_virq; + evtchn_bind_pirq_t bind_pirq; + evtchn_bind_ipi_t bind_ipi; + evtchn_close_t close; + evtchn_send_t send; + evtchn_status_t status; + evtchn_bind_vcpu_t bind_vcpu; + evtchn_unmask_t unmask; } u; }; typedef struct evtchn_op evtchn_op_t; diff --git a/src/include/xen/features.h b/src/include/xen/features.h index 1302658..bef56bf 100644 --- a/src/include/xen/features.h +++ b/src/include/xen/features.h @@ -1,26 +1,9 @@ +/* SPDX-License-Identifier: MIT */ /****************************************************************************** * features.h * * Feature flags, reported by XENVER_get_features. * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * * Copyright (c) 2006, Keir Fraser */ @@ -96,6 +79,40 @@ FILE_LICENCE ( MIT ); /* operation as Dom0 is supported */ #define XENFEAT_dom0 11 +/* Xen also maps grant references at pfn = mfn. + * This feature flag is deprecated and should not be used. +#define XENFEAT_grant_map_identity 12 + */ + +/* Guest can use XENMEMF_vnode to specify virtual node for memory op. */ +#define XENFEAT_memory_op_vnode_supported 13 + +/* arm: Hypervisor supports ARM SMC calling convention. */ +#define XENFEAT_ARM_SMCCC_supported 14 + +/* + * x86/PVH: If set, ACPI RSDP can be placed at any address. Otherwise RSDP + * must be located in lower 1MB, as required by ACPI Specification for IA-PC + * systems. + * This feature flag is only consulted if XEN_ELFNOTE_GUEST_OS contains + * the "linux" string. + */ +#define XENFEAT_linux_rsdp_unrestricted 15 + +/* + * A direct-mapped (or 1:1 mapped) domain is a domain for which its + * local pages have gfn == mfn. If a domain is direct-mapped, + * XENFEAT_direct_mapped is set; otherwise XENFEAT_not_direct_mapped + * is set. + * + * If neither flag is set (e.g. older Xen releases) the assumptions are: + * - not auto_translated domains (x86 only) are always direct-mapped + * - on x86, auto_translated domains are not direct-mapped + * - on ARM, Dom0 is direct-mapped, DomUs are not + */ +#define XENFEAT_not_direct_mapped 16 +#define XENFEAT_direct_mapped 17 + #define XENFEAT_NR_SUBMAPS 1 #endif /* __XEN_PUBLIC_FEATURES_H__ */ diff --git a/src/include/xen/grant_table.h b/src/include/xen/grant_table.h index 137939e..f0ae17c 100644 --- a/src/include/xen/grant_table.h +++ b/src/include/xen/grant_table.h @@ -1,27 +1,10 @@ +/* SPDX-License-Identifier: MIT */ /****************************************************************************** * grant_table.h * * Interface for granting foreign access to page frames, and receiving * page-ownership transfers. * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * * Copyright (c) 2004, K A Fraser */ @@ -45,7 +28,7 @@ FILE_LICENCE ( MIT ); * table are identified by grant references. A grant reference is an * integer, which indexes into the grant table. It acts as a * capability which the grantee can use to perform operations on the - * granter’s memory. + * granter's memory. * * This capability-based system allows shared-memory communications * between unprivileged domains. A grant reference also encapsulates @@ -123,8 +106,9 @@ typedef uint32_t grant_ref_t; */ /* - * Version 1 of the grant table entry structure is maintained purely - * for backwards compatibility. New guests should use version 2. + * Version 1 of the grant table entry structure is maintained largely for + * backwards compatibility. New guests are recommended to support using + * version 2 to overcome version 1 limitations, but to default to version 1. */ #if __XEN_INTERFACE_VERSION__ < 0x0003020a #define grant_entry_v1 grant_entry @@ -136,8 +120,10 @@ struct grant_entry_v1 { /* The domain being granted foreign privileges. [GST] */ domid_t domid; /* - * GTF_permit_access: Frame that @domid is allowed to map and access. [GST] - * GTF_accept_transfer: Frame whose ownership transferred by @domid. [XEN] + * GTF_permit_access: GFN that @domid is allowed to map and access. [GST] + * GTF_accept_transfer: GFN that @domid is allowed to transfer into. [GST] + * GTF_transfer_completed: MFN whose ownership transferred by @domid + * (non-translated guests only). [XEN] */ uint32_t frame; }; @@ -166,11 +152,13 @@ typedef struct grant_entry_v1 grant_entry_v1_t; #define GTF_type_mask (3U<<0) /* - * Subflags for GTF_permit_access. + * Subflags for GTF_permit_access and GTF_transitive. * GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST] * GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN] * GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN] - * GTF_PAT, GTF_PWT, GTF_PCD: (x86) cache attribute flags for the grant [GST] + * Further subflags for GTF_permit_access only. + * GTF_PAT, GTF_PWT, GTF_PCD: (x86) cache attribute flags to be used for + * mappings of the grant [GST] * GTF_sub_page: Grant access to only a subrange of the page. @domid * will only be allowed to copy from the grant, and not * map it. [GST] @@ -311,6 +299,7 @@ typedef uint16_t grant_status_t; #define GNTTABOP_get_status_frames 9 #define GNTTABOP_get_version 10 #define GNTTABOP_swap_grant_ref 11 +#define GNTTABOP_cache_flush 12 #endif /* __XEN_INTERFACE_VERSION__ */ /* ` } */ @@ -322,7 +311,7 @@ typedef uint32_t grant_handle_t; /* * GNTTABOP_map_grant_ref: Map the grant entry (,) for access * by devices and/or host CPUs. If successful, is a tracking number - * that must be presented later to destroy the mapping(s). On error, + * that must be presented later to destroy the mapping(s). On error, * is a negative status code. * NOTES: * 1. If GNTMAP_device_map is specified then is the address @@ -410,12 +399,13 @@ typedef struct gnttab_dump_table gnttab_dump_table_t; DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t); /* - * GNTTABOP_transfer_grant_ref: Transfer to a foreign domain. The - * foreign domain has previously registered its interest in the transfer via - * . + * GNTTABOP_transfer: Transfer to a foreign domain. The foreign domain + * has previously registered its interest in the transfer via . * * Note that, even if the transfer fails, the specified page no longer belongs * to the calling domain *unless* the error is GNTST_bad_page. + * + * Note further that only PV guests can use this operation. */ struct gnttab_transfer { /* IN parameters. */ @@ -454,7 +444,7 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_t); struct gnttab_copy { /* IN parameters. */ - struct { + struct gnttab_copy_ptr { union { grant_ref_t ref; xen_pfn_t gmfn; @@ -513,10 +503,9 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t); #if __XEN_INTERFACE_VERSION__ >= 0x0003020a /* * GNTTABOP_set_version: Request a particular version of the grant - * table shared table structure. This operation can only be performed - * once in any given domain. It must be performed before any grants - * are activated; otherwise, the domain will be stuck with version 1. - * The only defined versions are 1 and 2. + * table shared table structure. This operation may be used to toggle + * between different versions, but must be performed while no grants + * are active. The only defined versions are 1 and 2. */ struct gnttab_set_version { /* IN/OUT parameters */ @@ -576,6 +565,25 @@ struct gnttab_swap_grant_ref { typedef struct gnttab_swap_grant_ref gnttab_swap_grant_ref_t; DEFINE_XEN_GUEST_HANDLE(gnttab_swap_grant_ref_t); +/* + * Issue one or more cache maintenance operations on a portion of a + * page granted to the calling domain by a foreign domain. + */ +struct gnttab_cache_flush { + union { + uint64_t dev_bus_addr; + grant_ref_t ref; + } a; + uint16_t offset; /* offset from start of grant */ + uint16_t length; /* size within the grant */ +#define GNTTAB_CACHE_CLEAN (1u<<0) +#define GNTTAB_CACHE_INVAL (1u<<1) +#define GNTTAB_CACHE_SOURCE_GREF (1u<<31) + uint32_t op; +}; +typedef struct gnttab_cache_flush gnttab_cache_flush_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_cache_flush_t); + #endif /* __XEN_INTERFACE_VERSION__ */ /* @@ -606,9 +614,6 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_swap_grant_ref_t); #define _GNTMAP_contains_pte (4) #define GNTMAP_contains_pte (1<<_GNTMAP_contains_pte) -#define _GNTMAP_can_fail (5) -#define GNTMAP_can_fail (1<<_GNTMAP_can_fail) - /* * Bits to be placed in guest kernel available PTE bits (architecture * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set). @@ -633,6 +638,7 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_swap_grant_ref_t); #define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary. */ #define GNTST_address_too_big (-11) /* transfer page address too large. */ #define GNTST_eagain (-12) /* Operation not done; try again. */ +#define GNTST_no_space (-13) /* Out of space (handles etc). */ /* ` } */ #define GNTTABOP_error_msgs { \ @@ -648,7 +654,8 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_swap_grant_ref_t); "bad page", \ "copy arguments cross page boundary", \ "page address size too large", \ - "operation not done; try again" \ + "operation not done; try again", \ + "out of space", \ } #endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */ diff --git a/src/include/xen/hvm/hvm_op.h b/src/include/xen/hvm/hvm_op.h index 469ad4f..5d734c4 100644 --- a/src/include/xen/hvm/hvm_op.h +++ b/src/include/xen/hvm/hvm_op.h @@ -1,21 +1,6 @@ +/* SPDX-License-Identifier: MIT */ /* - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. + * Copyright (c) 2007, Keir Fraser */ #ifndef __XEN_PUBLIC_HVM_HVM_OP_H__ @@ -32,12 +17,33 @@ FILE_LICENCE ( MIT ); #define HVMOP_get_param 1 struct xen_hvm_param { domid_t domid; /* IN */ + uint16_t pad; uint32_t index; /* IN */ uint64_t value; /* IN/OUT */ }; typedef struct xen_hvm_param xen_hvm_param_t; DEFINE_XEN_GUEST_HANDLE(xen_hvm_param_t); +struct xen_hvm_altp2m_suppress_ve { + uint16_t view; + uint8_t suppress_ve; /* Boolean type. */ + uint8_t pad1; + uint32_t pad2; + uint64_t gfn; +}; + +struct xen_hvm_altp2m_suppress_ve_multi { + uint16_t view; + uint8_t suppress_ve; /* Boolean type. */ + uint8_t pad1; + int32_t first_error; /* Should be set to 0. */ + uint64_t first_gfn; /* Value may be updated. */ + uint64_t last_gfn; + uint64_t first_error_gfn; /* Gfn of the first error. */ +}; + +#if __XEN_INTERFACE_VERSION__ < 0x00040900 + /* Set the logical level of one of a domain's PCI INTx wires. */ #define HVMOP_set_pci_intx_level 2 struct xen_hvm_set_pci_intx_level { @@ -76,63 +82,38 @@ struct xen_hvm_set_pci_link_route { typedef struct xen_hvm_set_pci_link_route xen_hvm_set_pci_link_route_t; DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_link_route_t); +#endif /* __XEN_INTERFACE_VERSION__ < 0x00040900 */ + /* Flushes all VCPU TLBs: @arg must be NULL. */ #define HVMOP_flush_tlbs 5 +/* + * hvmmem_type_t should not be defined when generating the corresponding + * compat header. This will ensure that the improperly named HVMMEM_(*) + * values are defined only once. + */ +#ifndef XEN_GENERATING_COMPAT_HEADERS + typedef enum { HVMMEM_ram_rw, /* Normal read/write guest RAM */ HVMMEM_ram_ro, /* Read-only; writes are discarded */ HVMMEM_mmio_dm, /* Reads and write go to the device model */ +#if __XEN_INTERFACE_VERSION__ < 0x00040700 + HVMMEM_mmio_write_dm, /* Read-only; writes go to the device model */ +#else + HVMMEM_unused, /* Placeholder; setting memory to this type + will fail for code after 4.7.0 */ +#endif + HVMMEM_ioreq_server /* Memory type claimed by an ioreq server; type + changes to this value are only allowed after + an ioreq server has claimed its ownership. + Only pages with HVMMEM_ram_rw are allowed to + change to this type; conversely, pages with + this type are only allowed to be changed back + to HVMMEM_ram_rw. */ } hvmmem_type_t; -/* Following tools-only interfaces may change in future. */ -#if defined(__XEN__) || defined(__XEN_TOOLS__) - -/* Track dirty VRAM. */ -#define HVMOP_track_dirty_vram 6 -struct xen_hvm_track_dirty_vram { - /* Domain to be tracked. */ - domid_t domid; - /* Number of pages to track. */ - uint32_t nr; - /* First pfn to track. */ - uint64_aligned_t first_pfn; - /* OUT variable. */ - /* Dirty bitmap buffer. */ - XEN_GUEST_HANDLE_64(uint8) dirty_bitmap; -}; -typedef struct xen_hvm_track_dirty_vram xen_hvm_track_dirty_vram_t; -DEFINE_XEN_GUEST_HANDLE(xen_hvm_track_dirty_vram_t); - -/* Notify that some pages got modified by the Device Model. */ -#define HVMOP_modified_memory 7 -struct xen_hvm_modified_memory { - /* Domain to be updated. */ - domid_t domid; - /* Number of pages. */ - uint32_t nr; - /* First pfn. */ - uint64_aligned_t first_pfn; -}; -typedef struct xen_hvm_modified_memory xen_hvm_modified_memory_t; -DEFINE_XEN_GUEST_HANDLE(xen_hvm_modified_memory_t); - -#define HVMOP_set_mem_type 8 -/* Notify that a region of memory is to be treated in a specific way. */ -struct xen_hvm_set_mem_type { - /* Domain to be updated. */ - domid_t domid; - /* Memory type */ - uint16_t hvmmem_type; - /* Number of pages. */ - uint32_t nr; - /* First pfn. */ - uint64_aligned_t first_pfn; -}; -typedef struct xen_hvm_set_mem_type xen_hvm_set_mem_type_t; -DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_type_t); - -#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ +#endif /* XEN_GENERATING_COMPAT_HEADERS */ /* Hint from PV drivers for pagetable destruction. */ #define HVMOP_pagetable_dying 9 @@ -171,38 +152,6 @@ DEFINE_XEN_GUEST_HANDLE(xen_hvm_xentrace_t); /* Deprecated by XENMEM_access_op_get_access */ #define HVMOP_get_mem_access 13 -#define HVMOP_inject_trap 14 -/* Inject a trap into a VCPU, which will get taken up on the next - * scheduling of it. Note that the caller should know enough of the - * state of the CPU before injecting, to know what the effect of - * injecting the trap will be. - */ -struct xen_hvm_inject_trap { - /* Domain to be queried. */ - domid_t domid; - /* VCPU */ - uint32_t vcpuid; - /* Vector number */ - uint32_t vector; - /* Trap type (HVMOP_TRAP_*) */ - uint32_t type; -/* NB. This enumeration precisely matches hvm.h:X86_EVENTTYPE_* */ -# define HVMOP_TRAP_ext_int 0 /* external interrupt */ -# define HVMOP_TRAP_nmi 2 /* nmi */ -# define HVMOP_TRAP_hw_exc 3 /* hardware exception */ -# define HVMOP_TRAP_sw_int 4 /* software interrupt (CD nn) */ -# define HVMOP_TRAP_pri_sw_exc 5 /* ICEBP (F1) */ -# define HVMOP_TRAP_sw_exc 6 /* INT3 (CC), INTO (CE) */ - /* Error code, or ~0u to skip */ - uint32_t error_code; - /* Intruction length */ - uint32_t insn_len; - /* CR2 for page faults */ - uint64_aligned_t cr2; -}; -typedef struct xen_hvm_inject_trap xen_hvm_inject_trap_t; -DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_trap_t); - #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ #define HVMOP_get_mem_type 15 @@ -222,154 +171,201 @@ DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_mem_type_t); /* Following tools-only interfaces may change in future. */ #if defined(__XEN__) || defined(__XEN_TOOLS__) -/* MSI injection for emulated devices */ -#define HVMOP_inject_msi 16 -struct xen_hvm_inject_msi { - /* Domain to be injected */ - domid_t domid; - /* Data -- lower 32 bits */ - uint32_t data; - /* Address (0xfeexxxxx) */ - uint64_t addr; -}; -typedef struct xen_hvm_inject_msi xen_hvm_inject_msi_t; -DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_msi_t); +/* + * Definitions relating to DMOP_create_ioreq_server. (Defined here for + * backwards compatibility). + */ +#define HVM_IOREQSRV_BUFIOREQ_OFF 0 +#define HVM_IOREQSRV_BUFIOREQ_LEGACY 1 /* - * IOREQ Servers - * - * The interface between an I/O emulator an Xen is called an IOREQ Server. - * A domain supports a single 'legacy' IOREQ Server which is instantiated if - * parameter... - * - * HVM_PARAM_IOREQ_PFN is read (to get the gmfn containing the synchronous - * ioreq structures), or... - * HVM_PARAM_BUFIOREQ_PFN is read (to get the gmfn containing the buffered - * ioreq ring), or... - * HVM_PARAM_BUFIOREQ_EVTCHN is read (to get the event channel that Xen uses - * to request buffered I/O emulation). - * - * The following hypercalls facilitate the creation of IOREQ Servers for - * 'secondary' emulators which are invoked to implement port I/O, memory, or - * PCI config space ranges which they explicitly register. + * Use this when read_pointer gets updated atomically and + * the pointer pair gets read atomically: */ +#define HVM_IOREQSRV_BUFIOREQ_ATOMIC 2 -typedef uint16_t ioservid_t; +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + +#if defined(__i386__) || defined(__x86_64__) /* - * HVMOP_create_ioreq_server: Instantiate a new IOREQ Server for a secondary - * emulator servicing domain . - * - * The handed back is unique for . If is zero - * the buffered ioreq ring will not be allocated and hence all emulation - * requestes to this server will be synchronous. + * HVMOP_set_evtchn_upcall_vector: Set a that should be used for event + * channel upcalls on the specified . If set, + * this vector will be used in preference to the + * domain global callback via (see + * HVM_PARAM_CALLBACK_IRQ). */ -#define HVMOP_create_ioreq_server 17 -struct xen_hvm_create_ioreq_server { - domid_t domid; /* IN - domain to be serviced */ - uint8_t handle_bufioreq; /* IN - should server handle buffered ioreqs */ - ioservid_t id; /* OUT - server id */ +#define HVMOP_set_evtchn_upcall_vector 23 +struct xen_hvm_evtchn_upcall_vector { + uint32_t vcpu; + uint8_t vector; }; -typedef struct xen_hvm_create_ioreq_server xen_hvm_create_ioreq_server_t; -DEFINE_XEN_GUEST_HANDLE(xen_hvm_create_ioreq_server_t); +typedef struct xen_hvm_evtchn_upcall_vector xen_hvm_evtchn_upcall_vector_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_evtchn_upcall_vector_t); -/* - * HVMOP_get_ioreq_server_info: Get all the information necessary to access - * IOREQ Server . - * - * The emulator needs to map the synchronous ioreq structures and buffered - * ioreq ring (if it exists) that Xen uses to request emulation. These are - * hosted in domain 's gmfns and - * respectively. In addition, if the IOREQ Server is handling buffered - * emulation requests, the emulator needs to bind to event channel - * to listen for them. (The event channels used for - * synchronous emulation requests are specified in the per-CPU ioreq - * structures in ). - * If the IOREQ Server is not handling buffered emulation requests then the - * values handed back in and will both be 0. - */ -#define HVMOP_get_ioreq_server_info 18 -struct xen_hvm_get_ioreq_server_info { - domid_t domid; /* IN - domain to be serviced */ - ioservid_t id; /* IN - server id */ - evtchn_port_t bufioreq_port; /* OUT - buffered ioreq port */ - uint64_aligned_t ioreq_pfn; /* OUT - sync ioreq pfn */ - uint64_aligned_t bufioreq_pfn; /* OUT - buffered ioreq pfn */ +#endif /* defined(__i386__) || defined(__x86_64__) */ + +#define HVMOP_guest_request_vm_event 24 + +/* HVMOP_altp2m: perform altp2m state operations */ +#define HVMOP_altp2m 25 + +#define HVMOP_ALTP2M_INTERFACE_VERSION 0x00000001 + +struct xen_hvm_altp2m_domain_state { + /* IN or OUT variable on/off */ + uint8_t state; }; -typedef struct xen_hvm_get_ioreq_server_info xen_hvm_get_ioreq_server_info_t; -DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_ioreq_server_info_t); +typedef struct xen_hvm_altp2m_domain_state xen_hvm_altp2m_domain_state_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_domain_state_t); + +struct xen_hvm_altp2m_vcpu_enable_notify { + uint32_t vcpu_id; + uint32_t pad; + /* #VE info area gfn */ + uint64_t gfn; +}; +typedef struct xen_hvm_altp2m_vcpu_enable_notify xen_hvm_altp2m_vcpu_enable_notify_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_vcpu_enable_notify_t); -/* - * HVM_map_io_range_to_ioreq_server: Register an I/O range of domain - * for emulation by the client of IOREQ - * Server - * HVM_unmap_io_range_from_ioreq_server: Deregister an I/O range of - * for emulation by the client of IOREQ - * Server - * - * There are three types of I/O that can be emulated: port I/O, memory accesses - * and PCI config space accesses. The field denotes which type of range - * the and (inclusive) fields are specifying. - * PCI config space ranges are specified by segment/bus/device/function values - * which should be encoded using the HVMOP_PCI_SBDF helper macro below. - * - * NOTE: unless an emulation request falls entirely within a range mapped - * by a secondary emulator, it will not be passed to that emulator. - */ -#define HVMOP_map_io_range_to_ioreq_server 19 -#define HVMOP_unmap_io_range_from_ioreq_server 20 -struct xen_hvm_io_range { - domid_t domid; /* IN - domain to be serviced */ - ioservid_t id; /* IN - server id */ - uint32_t type; /* IN - type of range */ -# define HVMOP_IO_RANGE_PORT 0 /* I/O port range */ -# define HVMOP_IO_RANGE_MEMORY 1 /* MMIO range */ -# define HVMOP_IO_RANGE_PCI 2 /* PCI segment/bus/dev/func range */ - uint64_aligned_t start, end; /* IN - inclusive start and end of range */ +struct xen_hvm_altp2m_vcpu_disable_notify { + uint32_t vcpu_id; +}; +typedef struct xen_hvm_altp2m_vcpu_disable_notify xen_hvm_altp2m_vcpu_disable_notify_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_vcpu_disable_notify_t); + +struct xen_hvm_altp2m_view { + /* IN/OUT variable */ + uint16_t view; + uint16_t hvmmem_default_access; /* xenmem_access_t */ }; -typedef struct xen_hvm_io_range xen_hvm_io_range_t; -DEFINE_XEN_GUEST_HANDLE(xen_hvm_io_range_t); +typedef struct xen_hvm_altp2m_view xen_hvm_altp2m_view_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_view_t); -#define HVMOP_PCI_SBDF(s,b,d,f) \ - ((((s) & 0xffff) << 16) | \ - (((b) & 0xff) << 8) | \ - (((d) & 0x1f) << 3) | \ - ((f) & 0x07)) +#if __XEN_INTERFACE_VERSION__ < 0x00040a00 +struct xen_hvm_altp2m_set_mem_access { + /* view */ + uint16_t view; + /* Memory type */ + uint16_t access; /* xenmem_access_t */ + uint32_t pad; + /* gfn */ + uint64_t gfn; +}; +typedef struct xen_hvm_altp2m_set_mem_access xen_hvm_altp2m_set_mem_access_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_set_mem_access_t); +#endif /* __XEN_INTERFACE_VERSION__ < 0x00040a00 */ -/* - * HVMOP_destroy_ioreq_server: Destroy the IOREQ Server servicing domain - * . - * - * Any registered I/O ranges will be automatically deregistered. - */ -#define HVMOP_destroy_ioreq_server 21 -struct xen_hvm_destroy_ioreq_server { - domid_t domid; /* IN - domain to be serviced */ - ioservid_t id; /* IN - server id */ +struct xen_hvm_altp2m_mem_access { + /* view */ + uint16_t view; + /* Memory type */ + uint16_t access; /* xenmem_access_t */ + uint32_t pad; + /* gfn */ + uint64_t gfn; +}; +typedef struct xen_hvm_altp2m_mem_access xen_hvm_altp2m_mem_access_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_mem_access_t); + +struct xen_hvm_altp2m_set_mem_access_multi { + /* view */ + uint16_t view; + uint16_t pad; + /* Number of pages */ + uint32_t nr; + /* + * Used for continuation purposes. + * Must be set to zero upon initial invocation. + */ + uint64_t opaque; + /* List of pfns to set access for */ + XEN_GUEST_HANDLE(const_uint64) pfn_list; + /* Corresponding list of access settings for pfn_list */ + XEN_GUEST_HANDLE(const_uint8) access_list; }; -typedef struct xen_hvm_destroy_ioreq_server xen_hvm_destroy_ioreq_server_t; -DEFINE_XEN_GUEST_HANDLE(xen_hvm_destroy_ioreq_server_t); -/* - * HVMOP_set_ioreq_server_state: Enable or disable the IOREQ Server servicing - * domain . - * - * The IOREQ Server will not be passed any emulation requests until it is in the - * enabled state. - * Note that the contents of the ioreq_pfn and bufioreq_fn (see - * HVMOP_get_ioreq_server_info) are not meaningful until the IOREQ Server is in - * the enabled state. - */ -#define HVMOP_set_ioreq_server_state 22 -struct xen_hvm_set_ioreq_server_state { - domid_t domid; /* IN - domain to be serviced */ - ioservid_t id; /* IN - server id */ - uint8_t enabled; /* IN - enabled? */ +struct xen_hvm_altp2m_change_gfn { + /* view */ + uint16_t view; + uint16_t pad1; + uint32_t pad2; + /* old gfn */ + uint64_t old_gfn; + /* new gfn, INVALID_GFN (~0UL) means revert */ + uint64_t new_gfn; }; -typedef struct xen_hvm_set_ioreq_server_state xen_hvm_set_ioreq_server_state_t; -DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_ioreq_server_state_t); +typedef struct xen_hvm_altp2m_change_gfn xen_hvm_altp2m_change_gfn_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_change_gfn_t); -#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ +struct xen_hvm_altp2m_get_vcpu_p2m_idx { + uint32_t vcpu_id; + uint16_t altp2m_idx; +}; + +struct xen_hvm_altp2m_set_visibility { + uint16_t altp2m_idx; + uint8_t visible; + uint8_t pad; +}; + +struct xen_hvm_altp2m_op { + uint32_t version; /* HVMOP_ALTP2M_INTERFACE_VERSION */ + uint32_t cmd; +/* Get/set the altp2m state for a domain */ +#define HVMOP_altp2m_get_domain_state 1 +#define HVMOP_altp2m_set_domain_state 2 +/* Set a given VCPU to receive altp2m event notifications */ +#define HVMOP_altp2m_vcpu_enable_notify 3 +/* Create a new view */ +#define HVMOP_altp2m_create_p2m 4 +/* Destroy a view */ +#define HVMOP_altp2m_destroy_p2m 5 +/* Switch view for an entire domain */ +#define HVMOP_altp2m_switch_p2m 6 +/* Notify that a page of memory is to have specific access types */ +#define HVMOP_altp2m_set_mem_access 7 +/* Change a p2m entry to have a different gfn->mfn mapping */ +#define HVMOP_altp2m_change_gfn 8 +/* Set access for an array of pages */ +#define HVMOP_altp2m_set_mem_access_multi 9 +/* Set the "Suppress #VE" bit on a page */ +#define HVMOP_altp2m_set_suppress_ve 10 +/* Get the "Suppress #VE" bit of a page */ +#define HVMOP_altp2m_get_suppress_ve 11 +/* Get the access of a page of memory from a certain view */ +#define HVMOP_altp2m_get_mem_access 12 +/* Disable altp2m event notifications for a given VCPU */ +#define HVMOP_altp2m_vcpu_disable_notify 13 +/* Get the active vcpu p2m index */ +#define HVMOP_altp2m_get_p2m_idx 14 +/* Set the "Supress #VE" bit for a range of pages */ +#define HVMOP_altp2m_set_suppress_ve_multi 15 +/* Set visibility for a given altp2m view */ +#define HVMOP_altp2m_set_visibility 16 + domid_t domain; + uint16_t pad1; + uint32_t pad2; + union { + struct xen_hvm_altp2m_domain_state domain_state; + struct xen_hvm_altp2m_vcpu_enable_notify enable_notify; + struct xen_hvm_altp2m_view view; +#if __XEN_INTERFACE_VERSION__ < 0x00040a00 + struct xen_hvm_altp2m_set_mem_access set_mem_access; +#endif /* __XEN_INTERFACE_VERSION__ < 0x00040a00 */ + struct xen_hvm_altp2m_mem_access mem_access; + struct xen_hvm_altp2m_change_gfn change_gfn; + struct xen_hvm_altp2m_set_mem_access_multi set_mem_access_multi; + struct xen_hvm_altp2m_suppress_ve suppress_ve; + struct xen_hvm_altp2m_suppress_ve_multi suppress_ve_multi; + struct xen_hvm_altp2m_vcpu_disable_notify disable_notify; + struct xen_hvm_altp2m_get_vcpu_p2m_idx get_vcpu_p2m_idx; + struct xen_hvm_altp2m_set_visibility set_visibility; + uint8_t pad[64]; + } u; +}; +typedef struct xen_hvm_altp2m_op xen_hvm_altp2m_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_altp2m_op_t); #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ diff --git a/src/include/xen/hvm/params.h b/src/include/xen/hvm/params.h index 49e0658..39cb7f7 100644 --- a/src/include/xen/hvm/params.h +++ b/src/include/xen/hvm/params.h @@ -1,21 +1,6 @@ +/* SPDX-License-Identifier: MIT */ /* - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. + * Copyright (c) 2007, Keir Fraser */ #ifndef __XEN_PUBLIC_HVM_PARAMS_H__ @@ -25,22 +10,67 @@ FILE_LICENCE ( MIT ); #include "hvm_op.h" +/* These parameters are deprecated and their meaning is undefined. */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +#define HVM_PARAM_PAE_ENABLED 4 +#define HVM_PARAM_DM_DOMAIN 13 +#define HVM_PARAM_MEMORY_EVENT_CR0 20 +#define HVM_PARAM_MEMORY_EVENT_CR3 21 +#define HVM_PARAM_MEMORY_EVENT_CR4 22 +#define HVM_PARAM_MEMORY_EVENT_INT3 23 +#define HVM_PARAM_NESTEDHVM 24 +#define HVM_PARAM_MEMORY_EVENT_SINGLE_STEP 25 +#define HVM_PARAM_BUFIOREQ_EVTCHN 26 +#define HVM_PARAM_MEMORY_EVENT_MSR 30 + +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + /* * Parameter space for HVMOP_{set,get}_param. */ +#define HVM_PARAM_CALLBACK_IRQ 0 +#define HVM_PARAM_CALLBACK_IRQ_TYPE_MASK xen_mk_ullong(0xFF00000000000000) /* * How should CPU0 event-channel notifications be delivered? - * val[63:56] == 0: val[55:0] is a delivery GSI (Global System Interrupt). - * val[63:56] == 1: val[55:0] is a delivery PCI INTx line, as follows: - * Domain = val[47:32], Bus = val[31:16], - * DevFn = val[15: 8], IntX = val[ 1: 0] - * val[63:56] == 2: val[7:0] is a vector number, check for - * XENFEAT_hvm_callback_vector to know if this delivery - * method is available. + * * If val == 0 then CPU0 event-channel notifications are not delivered. + * If val != 0, val[63:56] encodes the type, as follows: */ -#define HVM_PARAM_CALLBACK_IRQ 0 + +#define HVM_PARAM_CALLBACK_TYPE_GSI 0 +/* + * val[55:0] is a delivery GSI. GSI 0 cannot be used, as it aliases val == 0, + * and disables all notifications. + */ + +#define HVM_PARAM_CALLBACK_TYPE_PCI_INTX 1 +/* + * val[55:0] is a delivery PCI INTx line: + * Domain = val[47:32], Bus = val[31:16] DevFn = val[15:8], IntX = val[1:0] + */ + +#if defined(__i386__) || defined(__x86_64__) +#define HVM_PARAM_CALLBACK_TYPE_VECTOR 2 +/* + * val[7:0] is a vector number. Check for XENFEAT_hvm_callback_vector to know + * if this delivery method is available. + */ +#elif defined(__arm__) || defined(__aarch64__) +#define HVM_PARAM_CALLBACK_TYPE_PPI 2 +/* + * val[55:16] needs to be zero. + * val[15:8] is interrupt flag of the PPI used by event-channel: + * bit 8: the PPI is edge(1) or level(0) triggered + * bit 9: the PPI is active low(1) or high(0) + * val[7:0] is a PPI number used by event-channel. + * This is only used by ARM/ARM64 and masking/eoi the interrupt associated to + * the notification is handled by the interrupt controller. + */ +#define HVM_PARAM_CALLBACK_TYPE_PPI_FLAG_MASK 0xFF00 +#define HVM_PARAM_CALLBACK_TYPE_PPI_FLAG_LOW_LEVEL 2 +#endif /* * These are not used by Xen. They are here for convenience of HVM-guest @@ -49,18 +79,103 @@ FILE_LICENCE ( MIT ); #define HVM_PARAM_STORE_PFN 1 #define HVM_PARAM_STORE_EVTCHN 2 -#define HVM_PARAM_PAE_ENABLED 4 - #define HVM_PARAM_IOREQ_PFN 5 #define HVM_PARAM_BUFIOREQ_PFN 6 -#define HVM_PARAM_BUFIOREQ_EVTCHN 26 #if defined(__i386__) || defined(__x86_64__) -/* Expose Viridian interfaces to this HVM guest? */ +/* + * Viridian enlightenments + * + * (See http://download.microsoft.com/download/A/B/4/AB43A34E-BDD0-4FA6-BDEF-79EEF16E880B/Hypervisor%20Top%20Level%20Functional%20Specification%20v4.0.docx) + * + * To expose viridian enlightenments to the guest set this parameter + * to the desired feature mask. The base feature set must be present + * in any valid feature mask. + */ #define HVM_PARAM_VIRIDIAN 9 +/* Base+Freq viridian feature sets: + * + * - Hypercall MSRs (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL) + * - APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR) + * - Virtual Processor index MSR (HV_X64_MSR_VP_INDEX) + * - Timer frequency MSRs (HV_X64_MSR_TSC_FREQUENCY and + * HV_X64_MSR_APIC_FREQUENCY) + */ +#define _HVMPV_base_freq 0 +#define HVMPV_base_freq (1 << _HVMPV_base_freq) + +/* Feature set modifications */ + +/* Disable timer frequency MSRs (HV_X64_MSR_TSC_FREQUENCY and + * HV_X64_MSR_APIC_FREQUENCY). + * This modification restores the viridian feature set to the + * original 'base' set exposed in releases prior to Xen 4.4. + */ +#define _HVMPV_no_freq 1 +#define HVMPV_no_freq (1 << _HVMPV_no_freq) + +/* Enable Partition Time Reference Counter (HV_X64_MSR_TIME_REF_COUNT) */ +#define _HVMPV_time_ref_count 2 +#define HVMPV_time_ref_count (1 << _HVMPV_time_ref_count) + +/* Enable Reference TSC Page (HV_X64_MSR_REFERENCE_TSC) */ +#define _HVMPV_reference_tsc 3 +#define HVMPV_reference_tsc (1 << _HVMPV_reference_tsc) + +/* Use Hypercall for remote TLB flush */ +#define _HVMPV_hcall_remote_tlb_flush 4 +#define HVMPV_hcall_remote_tlb_flush (1 << _HVMPV_hcall_remote_tlb_flush) + +/* Use APIC assist */ +#define _HVMPV_apic_assist 5 +#define HVMPV_apic_assist (1 << _HVMPV_apic_assist) + +/* Enable crash MSRs */ +#define _HVMPV_crash_ctl 6 +#define HVMPV_crash_ctl (1 << _HVMPV_crash_ctl) + +/* Enable SYNIC MSRs */ +#define _HVMPV_synic 7 +#define HVMPV_synic (1 << _HVMPV_synic) + +/* Enable STIMER MSRs */ +#define _HVMPV_stimer 8 +#define HVMPV_stimer (1 << _HVMPV_stimer) + +/* Use Synthetic Cluster IPI Hypercall */ +#define _HVMPV_hcall_ipi 9 +#define HVMPV_hcall_ipi (1 << _HVMPV_hcall_ipi) + +/* Enable ExProcessorMasks */ +#define _HVMPV_ex_processor_masks 10 +#define HVMPV_ex_processor_masks (1 << _HVMPV_ex_processor_masks) + +/* Allow more than 64 VPs */ +#define _HVMPV_no_vp_limit 11 +#define HVMPV_no_vp_limit (1 << _HVMPV_no_vp_limit) + +/* Enable vCPU hotplug */ +#define _HVMPV_cpu_hotplug 12 +#define HVMPV_cpu_hotplug (1 << _HVMPV_cpu_hotplug) + +#define HVMPV_feature_mask \ + (HVMPV_base_freq | \ + HVMPV_no_freq | \ + HVMPV_time_ref_count | \ + HVMPV_reference_tsc | \ + HVMPV_hcall_remote_tlb_flush | \ + HVMPV_apic_assist | \ + HVMPV_crash_ctl | \ + HVMPV_synic | \ + HVMPV_stimer | \ + HVMPV_hcall_ipi | \ + HVMPV_ex_processor_masks | \ + HVMPV_no_vp_limit | \ + HVMPV_cpu_hotplug) + #endif /* @@ -94,9 +209,6 @@ FILE_LICENCE ( MIT ); /* Identity-map page directory used by Intel EPT when CR0.PG=0. */ #define HVM_PARAM_IDENT_PT 12 -/* Device Model domain, defaults to 0. */ -#define HVM_PARAM_DM_DOMAIN 13 - /* ACPI S state: currently support S0 and S3 on x86. */ #define HVM_PARAM_ACPI_S_STATE 14 @@ -121,27 +233,9 @@ FILE_LICENCE ( MIT ); */ #define HVM_PARAM_ACPI_IOPORTS_LOCATION 19 -/* Enable blocking memory events, async or sync (pause vcpu until response) - * onchangeonly indicates messages only on a change of value */ -#define HVM_PARAM_MEMORY_EVENT_CR0 20 -#define HVM_PARAM_MEMORY_EVENT_CR3 21 -#define HVM_PARAM_MEMORY_EVENT_CR4 22 -#define HVM_PARAM_MEMORY_EVENT_INT3 23 -#define HVM_PARAM_MEMORY_EVENT_SINGLE_STEP 25 -#define HVM_PARAM_MEMORY_EVENT_MSR 30 - -#define HVMPME_MODE_MASK (3 << 0) -#define HVMPME_mode_disabled 0 -#define HVMPME_mode_async 1 -#define HVMPME_mode_sync 2 -#define HVMPME_onchangeonly (1 << 2) - -/* Boolean: Enable nestedhvm (hvm only) */ -#define HVM_PARAM_NESTEDHVM 24 - /* Params for the mem event rings */ #define HVM_PARAM_PAGING_RING_PFN 27 -#define HVM_PARAM_ACCESS_RING_PFN 28 +#define HVM_PARAM_MONITOR_RING_PFN 28 #define HVM_PARAM_SHARING_RING_PFN 29 /* SHUTDOWN_* action in case of a triple fault */ @@ -153,6 +247,57 @@ FILE_LICENCE ( MIT ); /* Location of the VM Generation ID in guest physical address space. */ #define HVM_PARAM_VM_GENERATION_ID_ADDR 34 -#define HVM_NR_PARAMS 35 +/* + * Set mode for altp2m: + * disabled: don't activate altp2m (default) + * mixed: allow access to all altp2m ops for both in-guest and external tools + * external: allow access to external privileged tools only + * limited: guest only has limited access (ie. control VMFUNC and #VE) + * + * Note that 'mixed' mode has not been evaluated for safety from a + * security perspective. Before using this mode in a + * security-critical environment, each subop should be evaluated for + * safety, with unsafe subops blacklisted in XSM. + */ +#define HVM_PARAM_ALTP2M 35 +#define XEN_ALTP2M_disabled 0 +#define XEN_ALTP2M_mixed 1 +#define XEN_ALTP2M_external 2 +#define XEN_ALTP2M_limited 3 + +/* + * Size of the x87 FPU FIP/FDP registers that the hypervisor needs to + * save/restore. This is a workaround for a hardware limitation that + * does not allow the full FIP/FDP and FCS/FDS to be restored. + * + * Valid values are: + * + * 8: save/restore 64-bit FIP/FDP and clear FCS/FDS (default if CPU + * has FPCSDS feature). + * + * 4: save/restore 32-bit FIP/FDP, FCS/FDS, and clear upper 32-bits of + * FIP/FDP. + * + * 0: allow hypervisor to choose based on the value of FIP/FDP + * (default if CPU does not have FPCSDS). + * + * If FPCSDS (bit 13 in CPUID leaf 0x7, subleaf 0x0) is set, the CPU + * never saves FCS/FDS and this parameter should be left at the + * default of 8. + */ +#define HVM_PARAM_X87_FIP_WIDTH 36 + +/* + * TSS (and its size) used on Intel when CR0.PE=0. The address occupies + * the low 32 bits, while the size is in the high 32 ones. + */ +#define HVM_PARAM_VM86_TSS_SIZED 37 + +/* Enable MCA capabilities. */ +#define HVM_PARAM_MCA_CAP 38 +#define XEN_HVM_MCA_CAP_LMCE (xen_mk_ullong(1) << 0) +#define XEN_HVM_MCA_CAP_MASK XEN_HVM_MCA_CAP_LMCE + +#define HVM_NR_PARAMS 39 #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ diff --git a/src/include/xen/io/netif.h b/src/include/xen/io/netif.h index ae12eab..bec61ab 100644 --- a/src/include/xen/io/netif.h +++ b/src/include/xen/io/netif.h @@ -1,26 +1,9 @@ +/* SPDX-License-Identifier: MIT */ /****************************************************************************** * netif.h * * Unified network-device I/O interface for Xen guest OSes. * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * * Copyright (c) 2003-2004, Keir Fraser */ @@ -138,14 +121,839 @@ FILE_LICENCE ( MIT ); */ /* - * This is the 'wire' format for packets: - * Request 1: netif_tx_request -- NETTXF_* (any flags) - * [Request 2: netif_tx_extra] (only if request 1 has NETTXF_extra_info) - * [Request 3: netif_tx_extra] (only if request 2 has XEN_NETIF_EXTRA_MORE) - * Request 4: netif_tx_request -- NETTXF_more_data - * Request 5: netif_tx_request -- NETTXF_more_data + * "feature-multicast-control" and "feature-dynamic-multicast-control" + * advertise the capability to filter ethernet multicast packets in the + * backend. If the frontend wishes to take advantage of this feature then + * it may set "request-multicast-control". If the backend only advertises + * "feature-multicast-control" then "request-multicast-control" must be set + * before the frontend moves into the connected state. The backend will + * sample the value on this state transition and any subsequent change in + * value will have no effect. However, if the backend also advertises + * "feature-dynamic-multicast-control" then "request-multicast-control" + * may be set by the frontend at any time. In this case, the backend will + * watch the value and re-sample on watch events. + * + * If the sampled value of "request-multicast-control" is set then the + * backend transmit side should no longer flood multicast packets to the + * frontend, it should instead drop any multicast packet that does not + * match in a filter list. + * The list is amended by the frontend by sending dummy transmit requests + * containing XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL} extra-info fragments as + * specified below. + * Note that the filter list may be amended even if the sampled value of + * "request-multicast-control" is not set, however the filter should only + * be applied if it is set. + */ + +/* + * The setting of "trusted" node to "0" in the frontend path signals that the + * frontend should not trust the backend, and should deploy whatever measures + * available to protect from a malicious backend on the other end. + */ + +/* + * Control ring + * ============ + * + * Some features, such as hashing (detailed below), require a + * significant amount of out-of-band data to be passed from frontend to + * backend. Use of xenstore is not suitable for large quantities of data + * because of quota limitations and so a dedicated 'control ring' is used. + * The ability of the backend to use a control ring is advertised by + * setting: + * + * /local/domain/X/backend/vif///feature-ctrl-ring = "1" + * + * The frontend provides a control ring to the backend by setting: + * + * /local/domain//device/vif//ctrl-ring-ref = + * /local/domain//device/vif//event-channel-ctrl = + * + * where is the grant reference of the shared page used to + * implement the control ring and is an event channel to be used + * as a mailbox interrupt. These keys must be set before the frontend + * moves into the connected state. + * + * The control ring uses a fixed request/response message size and is + * balanced (i.e. one request to one response), so operationally it is much + * the same as a transmit or receive ring. + * Note that there is no requirement that responses are issued in the same + * order as requests. + */ + +/* + * Link state + * ========== + * + * The backend can advertise its current link (carrier) state to the + * frontend using the /local/domain/X/backend/vif///carrier + * node. If this node is not present, then the frontend should assume that + * the link is up (for compatibility with backends that do not implement + * this feature). If this node is present, then a value of "0" should be + * interpreted by the frontend as the link being down (no carrier) and a + * value of "1" should be interpreted as the link being up (carrier + * present). + */ + +/* + * MTU + * === + * + * The toolstack may set a value of MTU for the frontend by setting the + * /local/domain//device/vif//mtu node with the MTU value in + * octets. If this node is absent the frontend should assume an MTU value + * of 1500 octets. A frontend is also at liberty to ignore this value so + * it is only suitable for informing the frontend that a packet payload + * >1500 octets is permitted. + */ + +/* + * Hash types + * ========== + * + * For the purposes of the definitions below, 'Packet[]' is an array of + * octets containing an IP packet without options, 'Array[X..Y]' means a + * sub-array of 'Array' containing bytes X thru Y inclusive, and '+' is + * used to indicate concatenation of arrays. + */ + +/* + * A hash calculated over an IP version 4 header as follows: + * + * Buffer[0..8] = Packet[12..15] (source address) + + * Packet[16..19] (destination address) + * + * Result = Hash(Buffer, 8) + */ +#define _XEN_NETIF_CTRL_HASH_TYPE_IPV4 0 +#define XEN_NETIF_CTRL_HASH_TYPE_IPV4 \ + (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV4) + +/* + * A hash calculated over an IP version 4 header and TCP header as + * follows: + * + * Buffer[0..12] = Packet[12..15] (source address) + + * Packet[16..19] (destination address) + + * Packet[20..21] (source port) + + * Packet[22..23] (destination port) + * + * Result = Hash(Buffer, 12) + */ +#define _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP 1 +#define XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP \ + (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP) + +/* + * A hash calculated over an IP version 6 header as follows: + * + * Buffer[0..32] = Packet[8..23] (source address ) + + * Packet[24..39] (destination address) + * + * Result = Hash(Buffer, 32) + */ +#define _XEN_NETIF_CTRL_HASH_TYPE_IPV6 2 +#define XEN_NETIF_CTRL_HASH_TYPE_IPV6 \ + (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV6) + +/* + * A hash calculated over an IP version 6 header and TCP header as + * follows: + * + * Buffer[0..36] = Packet[8..23] (source address) + + * Packet[24..39] (destination address) + + * Packet[40..41] (source port) + + * Packet[42..43] (destination port) + * + * Result = Hash(Buffer, 36) + */ +#define _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP 3 +#define XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP \ + (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP) + +/* + * Hash algorithms + * =============== + */ + +#define XEN_NETIF_CTRL_HASH_ALGORITHM_NONE 0 + +/* + * Toeplitz hash: + */ + +#define XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ 1 + +/* + * This algorithm uses a 'key' as well as the data buffer itself. + * (Buffer[] and Key[] are treated as shift-registers where the MSB of + * Buffer/Key[0] is considered 'left-most' and the LSB of Buffer/Key[N-1] + * is the 'right-most'). + * + * Value = 0 + * For number of bits in Buffer[] + * If (left-most bit of Buffer[] is 1) + * Value ^= left-most 32 bits of Key[] + * Key[] << 1 + * Buffer[] << 1 + * + * The code below is provided for convenience where an operating system + * does not already provide an implementation. + */ +#ifdef XEN_NETIF_DEFINE_TOEPLITZ +static uint32_t xen_netif_toeplitz_hash(const uint8_t *key, + unsigned int keylen, + const uint8_t *buf, + unsigned int buflen) +{ + unsigned int keyi, bufi; + uint64_t prefix = 0; + uint64_t hash = 0; + + /* Pre-load prefix with the first 8 bytes of the key */ + for (keyi = 0; keyi < 8; keyi++) { + prefix <<= 8; + prefix |= (keyi < keylen) ? key[keyi] : 0; + } + + for (bufi = 0; bufi < buflen; bufi++) { + uint8_t byte = buf[bufi]; + unsigned int bit; + + for (bit = 0; bit < 8; bit++) { + if (byte & 0x80) + hash ^= prefix; + prefix <<= 1; + byte <<=1; + } + + /* + * 'prefix' has now been left-shifted by 8, so + * OR in the next byte. + */ + prefix |= (keyi < keylen) ? key[keyi] : 0; + keyi++; + } + + /* The valid part of the hash is in the upper 32 bits. */ + return hash >> 32; +} +#endif /* XEN_NETIF_DEFINE_TOEPLITZ */ + +/* + * Control requests (struct xen_netif_ctrl_request) + * ================================================ + * + * All requests have the following format: + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | id | type | data[0] | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | data[1] | data[2] | + * +-----+-----+-----+-----+-----------------------+ + * + * id: the request identifier, echoed in response. + * type: the type of request (see below) + * data[]: any data associated with the request (determined by type) + */ + +struct xen_netif_ctrl_request { + uint16_t id; + uint16_t type; + +#define XEN_NETIF_CTRL_TYPE_INVALID 0 +#define XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS 1 +#define XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS 2 +#define XEN_NETIF_CTRL_TYPE_SET_HASH_KEY 3 +#define XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE 4 +#define XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE 5 +#define XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING 6 +#define XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM 7 +#define XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE 8 +#define XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING 9 +#define XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING 10 + + uint32_t data[3]; +}; + +/* + * Control responses (struct xen_netif_ctrl_response) + * ================================================== + * + * All responses have the following format: + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | id | type | status | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | data | + * +-----+-----+-----+-----+ + * + * id: the corresponding request identifier + * type: the type of the corresponding request + * status: the status of request processing + * data: any data associated with the response (determined by type and + * status) + */ + +struct xen_netif_ctrl_response { + uint16_t id; + uint16_t type; + uint32_t status; + +#define XEN_NETIF_CTRL_STATUS_SUCCESS 0 +#define XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED 1 +#define XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER 2 +#define XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW 3 + + uint32_t data; +}; + +/* + * Static Grants (struct xen_netif_gref) + * ===================================== + * + * A frontend may provide a fixed set of grant references to be mapped on + * the backend. The message of type XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING + * prior its usage in the command ring allows for creation of these mappings. + * The backend will maintain a fixed amount of these mappings. + * + * XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE lets a frontend query how many + * of these mappings can be kept. + * + * Each entry in the XEN_NETIF_CTRL_TYPE_{ADD,DEL}_GREF_MAPPING input table has + * the following format: + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | grant ref | flags | status | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * + * grant ref: grant reference (IN) + * flags: flags describing the control operation (IN) + * status: XEN_NETIF_CTRL_STATUS_* (OUT) + * + * 'status' is an output parameter which does not require to be set to zero + * prior to its usage in the corresponding control messages. + */ + +struct xen_netif_gref { + grant_ref_t ref; + uint16_t flags; + +#define _XEN_NETIF_CTRLF_GREF_readonly 0 +#define XEN_NETIF_CTRLF_GREF_readonly (1U<<_XEN_NETIF_CTRLF_GREF_readonly) + + uint16_t status; +}; + +/* + * Control messages + * ================ + * + * XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM + * -------------------------------------- + * + * This is sent by the frontend to set the desired hash algorithm. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM + * data[0] = a XEN_NETIF_CTRL_HASH_ALGORITHM_* value + * data[1] = 0 + * data[2] = 0 + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - The algorithm is not + * supported + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * + * NOTE: Setting data[0] to XEN_NETIF_CTRL_HASH_ALGORITHM_NONE disables + * hashing and the backend is free to choose how it steers packets + * to queues (which is the default behaviour). + * + * XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS + * ---------------------------------- + * + * This is sent by the frontend to query the types of hash supported by + * the backend. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS + * data[0] = 0 + * data[1] = 0 + * data[2] = 0 + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not supported + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = supported hash types (if operation was successful) + * + * NOTE: A valid hash algorithm must be selected before this operation can + * succeed. + * + * XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS + * ---------------------------------- + * + * This is sent by the frontend to set the types of hash that the backend + * should calculate. (See above for hash type definitions). + * Note that the 'maximal' type of hash should always be chosen. For + * example, if the frontend sets both IPV4 and IPV4_TCP hash types then + * the latter hash type should be calculated for any TCP packet and the + * former only calculated for non-TCP packets. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS + * data[0] = bitwise OR of XEN_NETIF_CTRL_HASH_TYPE_* values + * data[1] = 0 + * data[2] = 0 + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - One or more flag + * value is invalid or + * unsupported + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = 0 + * + * NOTE: A valid hash algorithm must be selected before this operation can + * succeed. + * Also, setting data[0] to zero disables hashing and the backend + * is free to choose how it steers packets to queues. + * + * XEN_NETIF_CTRL_TYPE_SET_HASH_KEY + * -------------------------------- + * + * This is sent by the frontend to set the key of the hash if the algorithm + * requires it. (See hash algorithms above). + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_SET_HASH_KEY + * data[0] = grant reference of page containing the key (assumed to + * start at beginning of grant) + * data[1] = size of key in octets + * data[2] = 0 + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Key size is invalid + * XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW - Key size is larger + * than the backend + * supports + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = 0 + * + * NOTE: Any key octets not specified are assumed to be zero (the key + * is assumed to be empty by default) and specifying a new key + * invalidates any previous key, hence specifying a key size of + * zero will clear the key (which ensures that the calculated hash + * will always be zero). + * The maximum size of key is algorithm and backend specific, but + * is also limited by the single grant reference. + * The grant reference may be read-only and must remain valid until + * the response has been processed. + * + * XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE + * ----------------------------------------- + * + * This is sent by the frontend to query the maximum size of mapping + * table supported by the backend. The size is specified in terms of + * table entries. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE + * data[0] = 0 + * data[1] = 0 + * data[2] = 0 + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not supported + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = maximum number of entries allowed in the mapping table + * (if operation was successful) or zero if a mapping table is + * not supported (i.e. hash mapping is done only by modular + * arithmetic). + * + * XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE + * ------------------------------------- + * + * This is sent by the frontend to set the actual size of the mapping + * table to be used by the backend. The size is specified in terms of + * table entries. + * Any previous table is invalidated by this message and any new table + * is assumed to be zero filled. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE + * data[0] = number of entries in mapping table + * data[1] = 0 + * data[2] = 0 + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Table size is invalid + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = 0 + * + * NOTE: Setting data[0] to 0 means that hash mapping should be done + * using modular arithmetic. + * + * XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING + * ------------------------------------ + * + * This is sent by the frontend to set the content of the table mapping + * hash value to queue number. The backend should calculate the hash from + * the packet header, use it as an index into the table (modulo the size + * of the table) and then steer the packet to the queue number found at + * that index. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING + * data[0] = grant reference of page containing the mapping (sub-)table + * (assumed to start at beginning of grant) + * data[1] = size of (sub-)table in entries + * data[2] = offset, in entries, of sub-table within overall table + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Table size or content + * is invalid + * XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW - Table size is larger + * than the backend + * supports + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = 0 + * + * NOTE: The overall table has the following format: + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | mapping[0] | mapping[1] | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | . | + * | . | + * | . | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | mapping[N-2] | mapping[N-1] | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * + * where N is specified by a XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE + * message and each mapping must specifies a queue between 0 and + * "multi-queue-num-queues" (see above). + * The backend may support a mapping table larger than can be + * mapped by a single grant reference. Thus sub-tables within a + * larger table can be individually set by sending multiple messages + * with differing offset values. Specifying a new sub-table does not + * invalidate any table data outside that range. + * The grant reference may be read-only and must remain valid until + * the response has been processed. + * + * XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE + * ----------------------------------------- + * + * This is sent by the frontend to fetch the number of grefs that can be kept + * mapped in the backend. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE + * data[0] = queue index (assumed 0 for single queue) + * data[1] = 0 + * data[2] = 0 + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - The queue index is + * out of range + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = maximum number of entries allowed in the gref mapping table + * (if operation was successful) or zero if it is not supported. + * + * XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING + * ------------------------------------ + * + * This is sent by the frontend for backend to map a list of grant + * references. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING + * data[0] = queue index + * data[1] = grant reference of page containing the mapping list + * (r/w and assumed to start at beginning of page) + * data[2] = size of list in entries + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Operation failed + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * + * NOTE: Each entry in the input table has the format outlined + * in struct xen_netif_gref. + * Contrary to XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING, the struct + * xen_netif_gref 'status' field is not used and therefore the response + * 'status' determines the success of this operation. In case of + * failure none of grants mappings get added in the backend. + * + * XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING + * ------------------------------------ + * + * This is sent by the frontend for backend to unmap a list of grant + * references. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING + * data[0] = queue index + * data[1] = grant reference of page containing the mapping list + * (r/w and assumed to start at beginning of page) + * data[2] = size of list in entries + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Operation failed + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = number of entries that were unmapped + * + * NOTE: Each entry in the input table has the format outlined in struct + * xen_netif_gref. + * The struct xen_netif_gref 'status' field determines if the entry + * was successfully removed. + * The entries used are only the ones representing grant references that + * were previously the subject of a XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING + * operation. Any other entries will have their status set to + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER upon completion. + */ + +DEFINE_RING_TYPES(xen_netif_ctrl, + struct xen_netif_ctrl_request, + struct xen_netif_ctrl_response); + +/* + * Guest transmit + * ============== + * + * This is the 'wire' format for transmit (frontend -> backend) packets: + * + * Fragment 1: netif_tx_request_t - flags = NETTXF_* + * size = total packet size + * [Extra 1: netif_extra_info_t] - (only if fragment 1 flags include + * NETTXF_extra_info) + * ... + * [Extra N: netif_extra_info_t] - (only if extra N-1 flags include + * XEN_NETIF_EXTRA_MORE) + * ... + * Fragment N: netif_tx_request_t - (only if fragment N-1 flags include + * NETTXF_more_data - flags on preceding + * extras are not relevant here) + * flags = 0 + * size = fragment size + * + * NOTE: + * + * This format slightly is different from that used for receive + * (backend -> frontend) packets. Specifically, in a multi-fragment + * packet the actual size of fragment 1 can only be determined by + * subtracting the sizes of fragments 2..N from the total packet size. + * + * Ring slot size is 12 octets, however not all request/response + * structs use the full size. + * + * tx request data (netif_tx_request_t) + * ------------------------------------ + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | grant ref | offset | flags | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | id | size | + * +-----+-----+-----+-----+ + * + * grant ref: Reference to buffer page. + * offset: Offset within buffer page. + * flags: NETTXF_*. + * id: request identifier, echoed in response. + * size: packet size in bytes. + * + * tx response (netif_tx_response_t) + * --------------------------------- + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | id | status | unused | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | unused | + * +-----+-----+-----+-----+ + * + * id: reflects id in transmit request + * status: NETIF_RSP_* + * + * Guest receive + * ============= + * + * This is the 'wire' format for receive (backend -> frontend) packets: + * + * Fragment 1: netif_rx_request_t - flags = NETRXF_* + * size = fragment size + * [Extra 1: netif_extra_info_t] - (only if fragment 1 flags include + * NETRXF_extra_info) * ... - * Request N: netif_tx_request -- 0 + * [Extra N: netif_extra_info_t] - (only if extra N-1 flags include + * XEN_NETIF_EXTRA_MORE) + * ... + * Fragment N: netif_rx_request_t - (only if fragment N-1 flags include + * NETRXF_more_data - flags on preceding + * extras are not relevant here) + * flags = 0 + * size = fragment size + * + * NOTE: + * + * This format slightly is different from that used for transmit + * (frontend -> backend) packets. Specifically, in a multi-fragment + * packet the size of the packet can only be determined by summing the + * sizes of fragments 1..N. + * + * Ring slot size is 8 octets. + * + * rx request (netif_rx_request_t) + * ------------------------------- + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | id | pad | gref | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * + * id: request identifier, echoed in response. + * gref: reference to incoming granted frame. + * + * rx response (netif_rx_response_t) + * --------------------------------- + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | id | offset | flags | status | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * + * id: reflects id in receive request + * offset: offset in page of start of received packet + * flags: NETRXF_* + * status: -ve: NETIF_RSP_*; +ve: Rx'ed pkt size. + * + * NOTE: Historically, to support GSO on the frontend receive side, Linux + * netfront does not make use of the rx response id (because, as + * described below, extra info structures overlay the id field). + * Instead it assumes that responses always appear in the same ring + * slot as their corresponding request. Thus, to maintain + * compatibility, backends must make sure this is the case. + * + * Extra Info + * ========== + * + * Can be present if initial request or response has NET{T,R}XF_extra_info, + * or previous extra request has XEN_NETIF_EXTRA_MORE. + * + * The struct therefore needs to fit into either a tx or rx slot and + * is therefore limited to 8 octets. + * + * NOTE: Because extra info data overlays the usual request/response + * structures, there is no id information in the opposite direction. + * So, if an extra info overlays an rx response the frontend can + * assume that it is in the same ring slot as the request that was + * consumed to make the slot available, and the backend must ensure + * this assumption is true. + * + * extra info (netif_extra_info_t) + * ------------------------------- + * + * General format: + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * |type |flags| type specific data | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | padding for tx | + * +-----+-----+-----+-----+ + * + * type: XEN_NETIF_EXTRA_TYPE_* + * flags: XEN_NETIF_EXTRA_FLAG_* + * padding for tx: present only in the tx case due to 8 octet limit + * from rx case. Not shown in type specific entries + * below. + * + * XEN_NETIF_EXTRA_TYPE_GSO: + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * |type |flags| size |type | pad | features | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * + * type: Must be XEN_NETIF_EXTRA_TYPE_GSO + * flags: XEN_NETIF_EXTRA_FLAG_* + * size: Maximum payload size of each segment. For example, + * for TCP this is just the path MSS. + * type: XEN_NETIF_GSO_TYPE_*: This determines the protocol of + * the packet and any extra features required to segment the + * packet properly. + * features: EN_NETIF_GSO_FEAT_*: This specifies any extra GSO + * features required to process this packet, such as ECN + * support for TCPv4. + * + * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}: + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * |type |flags| addr | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * + * type: Must be XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL} + * flags: XEN_NETIF_EXTRA_FLAG_* + * addr: address to add/remove + * + * XEN_NETIF_EXTRA_TYPE_HASH: + * + * A backend that supports teoplitz hashing is assumed to accept + * this type of extra info in transmit packets. + * A frontend that enables hashing is assumed to accept + * this type of extra info in receive packets. + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * |type |flags|htype| alg |LSB ---- value ---- MSB| + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * + * type: Must be XEN_NETIF_EXTRA_TYPE_HASH + * flags: XEN_NETIF_EXTRA_FLAG_* + * htype: Hash type (one of _XEN_NETIF_CTRL_HASH_TYPE_* - see above) + * alg: The algorithm used to calculate the hash (one of + * XEN_NETIF_CTRL_HASH_TYPE_ALGORITHM_* - see above) + * value: Hash value */ /* Protocol checksum field is blank in the packet (hardware offload)? */ @@ -166,11 +974,11 @@ FILE_LICENCE ( MIT ); #define XEN_NETIF_MAX_TX_SIZE 0xFFFF struct netif_tx_request { - grant_ref_t gref; /* Reference to buffer page */ - uint16_t offset; /* Offset within buffer page */ - uint16_t flags; /* NETTXF_* */ - uint16_t id; /* Echoed in response message. */ - uint16_t size; /* Packet size in bytes. */ + grant_ref_t gref; + uint16_t offset; + uint16_t flags; + uint16_t id; + uint16_t size; }; typedef struct netif_tx_request netif_tx_request_t; @@ -179,9 +987,10 @@ typedef struct netif_tx_request netif_tx_request_t; #define XEN_NETIF_EXTRA_TYPE_GSO (1) /* u.gso */ #define XEN_NETIF_EXTRA_TYPE_MCAST_ADD (2) /* u.mcast */ #define XEN_NETIF_EXTRA_TYPE_MCAST_DEL (3) /* u.mcast */ -#define XEN_NETIF_EXTRA_TYPE_MAX (4) +#define XEN_NETIF_EXTRA_TYPE_HASH (4) /* u.hash */ +#define XEN_NETIF_EXTRA_TYPE_MAX (5) -/* netif_extra_info flags. */ +/* netif_extra_info_t flags. */ #define _XEN_NETIF_EXTRA_FLAG_MORE (0) #define XEN_NETIF_EXTRA_FLAG_MORE (1U<<_XEN_NETIF_EXTRA_FLAG_MORE) @@ -191,55 +1000,27 @@ typedef struct netif_tx_request netif_tx_request_t; #define XEN_NETIF_GSO_TYPE_TCPV6 (2) /* - * This structure needs to fit within both netif_tx_request and - * netif_rx_response for compatibility. + * This structure needs to fit within both netif_tx_request_t and + * netif_rx_response_t for compatibility. */ struct netif_extra_info { - uint8_t type; /* XEN_NETIF_EXTRA_TYPE_* */ - uint8_t flags; /* XEN_NETIF_EXTRA_FLAG_* */ - + uint8_t type; + uint8_t flags; union { - /* - * XEN_NETIF_EXTRA_TYPE_GSO: - */ struct { - /* - * Maximum payload size of each segment. For example, for TCP this - * is just the path MSS. - */ uint16_t size; - - /* - * GSO type. This determines the protocol of the packet and any - * extra features required to segment the packet properly. - */ - uint8_t type; /* XEN_NETIF_GSO_TYPE_* */ - - /* Future expansion. */ + uint8_t type; uint8_t pad; - - /* - * GSO features. This specifies any extra GSO features required - * to process this packet, such as ECN support for TCPv4. - */ - uint16_t features; /* XEN_NETIF_GSO_FEAT_* */ + uint16_t features; } gso; - - /* - * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}: - * Backend advertises availability via 'feature-multicast-control' - * xenbus node containing value '1'. - * Frontend requests this feature by advertising - * 'request-multicast-control' xenbus node containing value '1'. - * If multicast control is requested then multicast flooding is - * disabled and the frontend must explicitly register its interest - * in multicast groups using dummy transmit requests containing - * MCAST_{ADD,DEL} extra-info fragments. - */ struct { - uint8_t addr[6]; /* Address to add/remove. */ + uint8_t addr[6]; } mcast; - + struct { + uint8_t type; + uint8_t algorithm; + uint8_t value[4]; + } hash; uint16_t pad[3]; } u; }; @@ -247,13 +1028,14 @@ typedef struct netif_extra_info netif_extra_info_t; struct netif_tx_response { uint16_t id; - int16_t status; /* NETIF_RSP_* */ + int16_t status; }; typedef struct netif_tx_response netif_tx_response_t; struct netif_rx_request { uint16_t id; /* Echoed in response message. */ - grant_ref_t gref; /* Reference to incoming granted frame */ + uint16_t pad; + grant_ref_t gref; }; typedef struct netif_rx_request netif_rx_request_t; @@ -273,11 +1055,15 @@ typedef struct netif_rx_request netif_rx_request_t; #define _NETRXF_extra_info (3) #define NETRXF_extra_info (1U<<_NETRXF_extra_info) +/* Packet has GSO prefix. Deprecated but included for compatibility */ +#define _NETRXF_gso_prefix (4) +#define NETRXF_gso_prefix (1U<<_NETRXF_gso_prefix) + struct netif_rx_response { uint16_t id; - uint16_t offset; /* Offset in page of start of received packet */ - uint16_t flags; /* NETRXF_* */ - int16_t status; /* -ve: NETIF_RSP_* ; +ve: Rx'ed pkt size. */ + uint16_t offset; + uint16_t flags; + int16_t status; }; typedef struct netif_rx_response netif_rx_response_t; @@ -291,7 +1077,7 @@ DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response); #define NETIF_RSP_DROPPED -2 #define NETIF_RSP_ERROR -1 #define NETIF_RSP_OKAY 0 -/* No response: used for auxiliary requests (e.g., netif_tx_extra). */ +/* No response: used for auxiliary requests (e.g., netif_extra_info_t). */ #define NETIF_RSP_NULL 1 #endif diff --git a/src/include/xen/io/ring.h b/src/include/xen/io/ring.h index 89d7386..41b50e2 100644 --- a/src/include/xen/io/ring.h +++ b/src/include/xen/io/ring.h @@ -1,26 +1,9 @@ +/* SPDX-License-Identifier: MIT */ /****************************************************************************** * ring.h * * Shared producer-consumer ring macros. * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * * Tim Deegan and Andrew Warfield November 2004. */ @@ -29,6 +12,21 @@ FILE_LICENCE ( MIT ); +/* + * When #include'ing this header, you need to provide the following + * declaration upfront: + * - standard integers types (uint8_t, uint16_t, etc) + * They are provided by stdint.h of the standard headers. + * + * In addition, if you intend to use the FLEX macros, you also need to + * provide the following, before invoking the FLEX macros: + * - size_t + * - memcpy + * - grant_ref_t + * These declarations are provided by string.h of the standard headers, + * and grant_table.h from the Xen public headers. + */ + #include "../xen-compat.h" #if __XEN_INTERFACE_VERSION__ < 0x00030208 @@ -82,9 +80,8 @@ typedef unsigned int RING_IDX; * of the shared memory area (PAGE_SIZE, for instance). To initialise * the front half: * - * mytag_front_ring_t front_ring; - * SHARED_RING_INIT((mytag_sring_t *)shared_page); - * FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); + * mytag_front_ring_t ring; + * XEN_FRONT_RING_INIT(&ring, (mytag_sring_t *)shared_page, PAGE_SIZE); * * Initializing the back follows similarly (note that only the front * initializes the shared ring): @@ -113,7 +110,7 @@ struct __name##_sring { \ uint8_t msg; \ } tapif_user; \ uint8_t pvt_pad[4]; \ - } private; \ + } pvt; \ uint8_t __pad[44]; \ union __name##_sring_entry ring[1]; /* variable-length */ \ }; \ @@ -158,24 +155,33 @@ typedef struct __name##_back_ring __name##_back_ring_t #define SHARED_RING_INIT(_s) do { \ (_s)->req_prod = (_s)->rsp_prod = 0; \ (_s)->req_event = (_s)->rsp_event = 1; \ - (void)memset((_s)->private.pvt_pad, 0, sizeof((_s)->private.pvt_pad)); \ + (void)memset((_s)->pvt.pvt_pad, 0, sizeof((_s)->pvt.pvt_pad)); \ (void)memset((_s)->__pad, 0, sizeof((_s)->__pad)); \ } while(0) -#define FRONT_RING_INIT(_r, _s, __size) do { \ - (_r)->req_prod_pvt = 0; \ - (_r)->rsp_cons = 0; \ +#define FRONT_RING_ATTACH(_r, _s, _i, __size) do { \ + (_r)->req_prod_pvt = (_i); \ + (_r)->rsp_cons = (_i); \ (_r)->nr_ents = __RING_SIZE(_s, __size); \ (_r)->sring = (_s); \ } while (0) -#define BACK_RING_INIT(_r, _s, __size) do { \ - (_r)->rsp_prod_pvt = 0; \ - (_r)->req_cons = 0; \ +#define FRONT_RING_INIT(_r, _s, __size) FRONT_RING_ATTACH(_r, _s, 0, __size) + +#define XEN_FRONT_RING_INIT(r, s, size) do { \ + SHARED_RING_INIT(s); \ + FRONT_RING_INIT(r, s, size); \ +} while (0) + +#define BACK_RING_ATTACH(_r, _s, _i, __size) do { \ + (_r)->rsp_prod_pvt = (_i); \ + (_r)->req_cons = (_i); \ (_r)->nr_ents = __RING_SIZE(_s, __size); \ (_r)->sring = (_s); \ } while (0) +#define BACK_RING_INIT(_r, _s, __size) BACK_RING_ATTACH(_r, _s, 0, __size) + /* How big is this ring? */ #define RING_SIZE(_r) \ ((_r)->nr_ents) @@ -191,11 +197,11 @@ typedef struct __name##_back_ring __name##_back_ring_t (RING_FREE_REQUESTS(_r) == 0) /* Test if there are outstanding messages to be processed on a ring. */ -#define RING_HAS_UNCONSUMED_RESPONSES(_r) \ +#define XEN_RING_NR_UNCONSUMED_RESPONSES(_r) \ ((_r)->sring->rsp_prod - (_r)->rsp_cons) #ifdef __GNUC__ -#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({ \ +#define XEN_RING_NR_UNCONSUMED_REQUESTS(_r) ({ \ unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ unsigned int rsp = RING_SIZE(_r) - \ ((_r)->req_cons - (_r)->rsp_prod_pvt); \ @@ -203,13 +209,27 @@ typedef struct __name##_back_ring __name##_back_ring_t }) #else /* Same as above, but without the nice GCC ({ ... }) syntax. */ -#define RING_HAS_UNCONSUMED_REQUESTS(_r) \ +#define XEN_RING_NR_UNCONSUMED_REQUESTS(_r) \ ((((_r)->sring->req_prod - (_r)->req_cons) < \ (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ? \ ((_r)->sring->req_prod - (_r)->req_cons) : \ (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) #endif +#ifdef XEN_RING_HAS_UNCONSUMED_IS_BOOL +/* + * These variants should only be used in case no caller is abusing them for + * obtaining the number of unconsumed responses/requests. + */ +#define RING_HAS_UNCONSUMED_RESPONSES(_r) \ + (!!XEN_RING_NR_UNCONSUMED_RESPONSES(_r)) +#define RING_HAS_UNCONSUMED_REQUESTS(_r) \ + (!!XEN_RING_NR_UNCONSUMED_REQUESTS(_r)) +#else +#define RING_HAS_UNCONSUMED_RESPONSES(_r) XEN_RING_NR_UNCONSUMED_RESPONSES(_r) +#define RING_HAS_UNCONSUMED_REQUESTS(_r) XEN_RING_NR_UNCONSUMED_REQUESTS(_r) +#endif + /* Direct access to individual ring elements, by index. */ #define RING_GET_REQUEST(_r, _idx) \ (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req)) @@ -217,6 +237,23 @@ typedef struct __name##_back_ring __name##_back_ring_t #define RING_GET_RESPONSE(_r, _idx) \ (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) +/* + * Get a local copy of a request/response. + * + * Use this in preference to RING_GET_{REQUEST,RESPONSE}() so all processing is + * done on a local copy that cannot be modified by the other end. + * + * Note that https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 may cause this + * to be ineffective where dest is a struct which consists of only bitfields. + */ +#define RING_COPY_(type, r, idx, dest) do { \ + /* Use volatile to force the copy into dest. */ \ + *(dest) = *(volatile __typeof__(dest))RING_GET_##type(r, idx); \ +} while (0) + +#define RING_COPY_REQUEST(r, idx, req) RING_COPY_(REQUEST, r, idx, req) +#define RING_COPY_RESPONSE(r, idx, rsp) RING_COPY_(RESPONSE, r, idx, rsp) + /* Loop termination condition: Would the specified index overflow the ring? */ #define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r)) @@ -225,6 +262,10 @@ typedef struct __name##_back_ring __name##_back_ring_t #define RING_REQUEST_PROD_OVERFLOW(_r, _prod) \ (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r)) +/* Ill-behaved backend determination: Can there be this many responses? */ +#define RING_RESPONSE_PROD_OVERFLOW(_r, _prod) \ + (((_prod) - (_r)->rsp_cons) > RING_SIZE(_r)) + #define RING_PUSH_REQUESTS(_r) do { \ xen_wmb(); /* back sees requests /before/ updated producer index */ \ (_r)->sring->req_prod = (_r)->req_prod_pvt; \ @@ -301,6 +342,149 @@ typedef struct __name##_back_ring __name##_back_ring_t (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ } while (0) + +/* + * DEFINE_XEN_FLEX_RING_AND_INTF defines two monodirectional rings and + * functions to check if there is data on the ring, and to read and + * write to them. + * + * DEFINE_XEN_FLEX_RING is similar to DEFINE_XEN_FLEX_RING_AND_INTF, but + * does not define the indexes page. As different protocols can have + * extensions to the basic format, this macro allow them to define their + * own struct. + * + * XEN_FLEX_RING_SIZE + * Convenience macro to calculate the size of one of the two rings + * from the overall order. + * + * $NAME_mask + * Function to apply the size mask to an index, to reduce the index + * within the range [0-size]. + * + * $NAME_read_packet + * Function to read data from the ring. The amount of data to read is + * specified by the "size" argument. + * + * $NAME_write_packet + * Function to write data to the ring. The amount of data to write is + * specified by the "size" argument. + * + * $NAME_get_ring_ptr + * Convenience function that returns a pointer to read/write to the + * ring at the right location. + * + * $NAME_data_intf + * Indexes page, shared between frontend and backend. It also + * contains the array of grant refs. + * + * $NAME_queued + * Function to calculate how many bytes are currently on the ring, + * ready to be read. It can also be used to calculate how much free + * space is currently on the ring (XEN_FLEX_RING_SIZE() - + * $NAME_queued()). + */ + +#ifndef XEN_PAGE_SHIFT +/* The PAGE_SIZE for ring protocols and hypercall interfaces is always + * 4K, regardless of the architecture, and page granularity chosen by + * operating systems. + */ +#define XEN_PAGE_SHIFT 12 +#endif +#define XEN_FLEX_RING_SIZE(order) \ + (1UL << ((order) + XEN_PAGE_SHIFT - 1)) + +#define DEFINE_XEN_FLEX_RING(name) \ +static inline RING_IDX name##_mask(RING_IDX idx, RING_IDX ring_size) \ +{ \ + return idx & (ring_size - 1); \ +} \ + \ +static inline unsigned char *name##_get_ring_ptr(unsigned char *buf, \ + RING_IDX idx, \ + RING_IDX ring_size) \ +{ \ + return buf + name##_mask(idx, ring_size); \ +} \ + \ +static inline void name##_read_packet(void *opaque, \ + const unsigned char *buf, \ + size_t size, \ + RING_IDX masked_prod, \ + RING_IDX *masked_cons, \ + RING_IDX ring_size) \ +{ \ + if (*masked_cons < masked_prod || \ + size <= ring_size - *masked_cons) { \ + memcpy(opaque, buf + *masked_cons, size); \ + } else { \ + memcpy(opaque, buf + *masked_cons, ring_size - *masked_cons); \ + memcpy((unsigned char *)opaque + ring_size - *masked_cons, buf, \ + size - (ring_size - *masked_cons)); \ + } \ + *masked_cons = name##_mask(*masked_cons + size, ring_size); \ +} \ + \ +static inline void name##_write_packet(unsigned char *buf, \ + const void *opaque, \ + size_t size, \ + RING_IDX *masked_prod, \ + RING_IDX masked_cons, \ + RING_IDX ring_size) \ +{ \ + if (*masked_prod < masked_cons || \ + size <= ring_size - *masked_prod) { \ + memcpy(buf + *masked_prod, opaque, size); \ + } else { \ + memcpy(buf + *masked_prod, opaque, ring_size - *masked_prod); \ + memcpy(buf, (unsigned char *)opaque + (ring_size - *masked_prod), \ + size - (ring_size - *masked_prod)); \ + } \ + *masked_prod = name##_mask(*masked_prod + size, ring_size); \ +} \ + \ +static inline RING_IDX name##_queued(RING_IDX prod, \ + RING_IDX cons, \ + RING_IDX ring_size) \ +{ \ + RING_IDX size; \ + \ + if (prod == cons) \ + return 0; \ + \ + prod = name##_mask(prod, ring_size); \ + cons = name##_mask(cons, ring_size); \ + \ + if (prod == cons) \ + return ring_size; \ + \ + if (prod > cons) \ + size = prod - cons; \ + else \ + size = ring_size - (cons - prod); \ + return size; \ +} \ + \ +struct name##_data { \ + unsigned char *in; /* half of the allocation */ \ + unsigned char *out; /* half of the allocation */ \ +} + +#define DEFINE_XEN_FLEX_RING_AND_INTF(name) \ +struct name##_data_intf { \ + RING_IDX in_cons, in_prod; \ + \ + uint8_t pad1[56]; \ + \ + RING_IDX out_cons, out_prod; \ + \ + uint8_t pad2[56]; \ + \ + RING_IDX ring_order; \ + grant_ref_t ref[]; \ +}; \ +DEFINE_XEN_FLEX_RING(name) + #endif /* __XEN_PUBLIC_IO_RING_H__ */ /* diff --git a/src/include/xen/io/xenbus.h b/src/include/xen/io/xenbus.h index 182aeb9..473f538 100644 --- a/src/include/xen/io/xenbus.h +++ b/src/include/xen/io/xenbus.h @@ -1,26 +1,9 @@ +/* SPDX-License-Identifier: MIT */ /***************************************************************************** * xenbus.h * * Xenbus protocol details. * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * * Copyright (C) 2005 XenSource Ltd. */ diff --git a/src/include/xen/io/xs_wire.h b/src/include/xen/io/xs_wire.h index 50415f0..cffd75c 100644 --- a/src/include/xen/io/xs_wire.h +++ b/src/include/xen/io/xs_wire.h @@ -1,25 +1,8 @@ +/* SPDX-License-Identifier: MIT */ /* * Details of the "wire" protocol between Xen Store Daemon and client * library or guest kernel. * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * * Copyright (C) 2005 Rusty Russell IBM Corporation */ @@ -30,7 +13,8 @@ FILE_LICENCE ( MIT ); enum xsd_sockmsg_type { - XS_DEBUG, + XS_CONTROL, +#define XS_DEBUG XS_CONTROL XS_DIRECTORY, XS_READ, XS_GET_PERMS, @@ -50,8 +34,13 @@ enum xsd_sockmsg_type XS_IS_DOMAIN_INTRODUCED, XS_RESUME, XS_SET_TARGET, - XS_RESTRICT, - XS_RESET_WATCHES + /* XS_RESTRICT has been removed */ + XS_RESET_WATCHES = XS_SET_TARGET + 2, + XS_DIRECTORY_PART, + + XS_TYPE_COUNT, /* Number of valid types. */ + + XS_INVALID = 0xffff /* Guaranteed to remain an invalid type */ }; #define XS_WRITE_NONE "NONE" @@ -67,11 +56,12 @@ struct xsd_errors #ifdef EINVAL #define XSD_ERROR(x) { x, #x } /* LINTED: static unused */ -static struct xsd_errors xsd_errors[] +static const struct xsd_errors xsd_errors[] #if defined(__GNUC__) __attribute__((unused)) #endif = { + /* /!\ New errors should be added at the end of the array. */ XSD_ERROR(EINVAL), XSD_ERROR(EACCES), XSD_ERROR(EEXIST), @@ -86,7 +76,8 @@ __attribute__((unused)) XSD_ERROR(EBUSY), XSD_ERROR(EAGAIN), XSD_ERROR(EISCONN), - XSD_ERROR(E2BIG) + XSD_ERROR(E2BIG), + XSD_ERROR(EPERM), }; #endif @@ -118,6 +109,9 @@ struct xenstore_domain_interface { char rsp[XENSTORE_RING_SIZE]; /* Replies and async watch events. */ XENSTORE_RING_IDX req_cons, req_prod; XENSTORE_RING_IDX rsp_cons, rsp_prod; + uint32_t server_features; /* Bitmap of features supported by the server */ + uint32_t connection; + uint32_t error; }; /* Violating this is very bad. See docs/misc/xenstore.txt. */ @@ -127,6 +121,21 @@ struct xenstore_domain_interface { #define XENSTORE_ABS_PATH_MAX 3072 #define XENSTORE_REL_PATH_MAX 2048 +/* The ability to reconnect a ring */ +#define XENSTORE_SERVER_FEATURE_RECONNECTION 1 +/* The presence of the "error" field in the ring page */ +#define XENSTORE_SERVER_FEATURE_ERROR 2 + +/* Valid values for the connection field */ +#define XENSTORE_CONNECTED 0 /* the steady-state */ +#define XENSTORE_RECONNECT 1 /* reconnect in progress */ + +/* Valid values for the error field */ +#define XENSTORE_ERROR_NONE 0 /* No error */ +#define XENSTORE_ERROR_COMM 1 /* Communication problem */ +#define XENSTORE_ERROR_RINGIDX 2 /* Invalid ring index */ +#define XENSTORE_ERROR_PROTO 3 /* Protocol violation (payload too long) */ + #endif /* _XS_WIRE_H */ /* diff --git a/src/include/xen/memory.h b/src/include/xen/memory.h index 0c76c0d..4783d99 100644 --- a/src/include/xen/memory.h +++ b/src/include/xen/memory.h @@ -1,26 +1,9 @@ +/* SPDX-License-Identifier: MIT */ /****************************************************************************** * memory.h * * Memory reservation and information. * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * * Copyright (c) 2005, Keir Fraser */ @@ -30,6 +13,7 @@ FILE_LICENCE ( MIT ); #include "xen.h" +#include "physdev.h" /* * Increase or decrease the specified domain's memory reservation. Returns the @@ -57,6 +41,8 @@ FILE_LICENCE ( MIT ); /* Flag to request allocation only from the node specified */ #define XENMEMF_exact_node_request (1<<17) #define XENMEMF_exact_node(n) (XENMEMF_node(n) | XENMEMF_exact_node_request) +/* Flag to indicate the node specified is virtual node */ +#define XENMEMF_vnode (1<<18) #endif struct xen_memory_reservation { @@ -101,6 +87,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t); * Returns zero on complete success, otherwise a negative error code. * On complete success then always @nr_exchanged == @in.nr_extents. * On partial success @nr_exchanged indicates how much work was done. + * + * Note that only PV guests can use this operation. */ #define XENMEM_exchange 11 struct xen_memory_exchange { @@ -145,16 +133,23 @@ DEFINE_XEN_GUEST_HANDLE(xen_memory_exchange_t); */ #define XENMEM_maximum_ram_page 2 +struct xen_memory_domain { + /* [IN] Domain information is being queried for. */ + domid_t domid; +}; + /* * Returns the current or maximum memory reservation, in pages, of the * specified domain (may be DOMID_SELF). Returns -ve errcode on failure. - * arg == addr of domid_t. + * arg == addr of struct xen_memory_domain. */ #define XENMEM_current_reservation 3 #define XENMEM_maximum_reservation 4 /* - * Returns the maximum GPFN in use by the guest, or -ve errcode on failure. + * Returns the maximum GFN in use by the specified domain (may be DOMID_SELF). + * Returns -ve errcode on failure. + * arg == addr of struct xen_memory_domain. */ #define XENMEM_maximum_gpfn 14 @@ -219,11 +214,16 @@ DEFINE_XEN_GUEST_HANDLE(xen_machphys_mapping_t); #define XENMAPSPACE_gmfn_range 3 /* GMFN range, XENMEM_add_to_physmap only. */ #define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another dom, * XENMEM_add_to_physmap_batch only. */ +#define XENMAPSPACE_dev_mmio 5 /* device mmio region + ARM only; the region is mapped in + Stage-2 using the Normal Memory + Inner/Outer Write-Back Cacheable + memory attribute. */ /* ` } */ /* * Sets the GPFN at which a particular page appears in the specified guest's - * pseudophysical address space. + * physical address space (translated guests only). * arg == addr of xen_add_to_physmap_t. */ #define XENMEM_add_to_physmap 7 @@ -257,7 +257,15 @@ struct xen_add_to_physmap_batch { /* Number of pages to go through */ uint16_t size; - domid_t foreign_domid; /* IFF gmfn_foreign */ + +#if __XEN_INTERFACE_VERSION__ < 0x00040700 + domid_t foreign_domid; /* IFF gmfn_foreign. Should be 0 for other spaces. */ +#else + union xen_add_to_physmap_batch_extra { + domid_t foreign_domid; /* gmfn_foreign */ + uint16_t res0; /* All the other spaces. Should be 0 */ + } u; +#endif /* Indexes into space being mapped. */ XEN_GUEST_HANDLE(xen_ulong_t) idxs; @@ -282,7 +290,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_range_t); /* * Unmaps the page appearing at a particular GPFN from the specified guest's - * pseudophysical address space. + * physical address space (translated guests only). * arg == addr of xen_remove_from_physmap_t. */ #define XENMEM_remove_from_physmap 15 @@ -325,6 +333,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_memory_map_t); /* * Returns the real physical memory map. Passes the same structure as * XENMEM_memory_map. + * Specifying buffer as NULL will return the number of entries required + * to store the complete memory map. * arg == addr of xen_memory_map_t. */ #define XENMEM_machine_memory_map 10 @@ -374,23 +384,29 @@ typedef struct xen_pod_target xen_pod_target_t; #define XENMEM_paging_op_evict 1 #define XENMEM_paging_op_prep 2 -struct xen_mem_event_op { - uint8_t op; /* XENMEM_*_op_* */ +struct xen_mem_paging_op { + uint8_t op; /* XENMEM_paging_op_* */ domid_t domain; - - /* PAGING_PREP IN: buffer to immediately fill page in */ - uint64_aligned_t buffer; - /* Other OPs */ - uint64_aligned_t gfn; /* IN: gfn of page being operated on */ + /* IN: (XENMEM_paging_op_prep) buffer to immediately fill page from */ + XEN_GUEST_HANDLE_64(const_uint8) buffer; + /* IN: gfn of page being operated on */ + uint64_aligned_t gfn; }; -typedef struct xen_mem_event_op xen_mem_event_op_t; -DEFINE_XEN_GUEST_HANDLE(xen_mem_event_op_t); +typedef struct xen_mem_paging_op xen_mem_paging_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_mem_paging_op_t); #define XENMEM_access_op 21 -#define XENMEM_access_op_resume 0 -#define XENMEM_access_op_set_access 1 -#define XENMEM_access_op_get_access 2 +#define XENMEM_access_op_set_access 0 +#define XENMEM_access_op_get_access 1 +/* + * XENMEM_access_op_enable_emulate and XENMEM_access_op_disable_emulate are + * currently unused, but since they have been in use please do not reuse them. + * + * #define XENMEM_access_op_enable_emulate 2 + * #define XENMEM_access_op_disable_emulate 3 + */ +#define XENMEM_access_op_set_access_multi 4 typedef enum { XENMEM_access_n, @@ -423,7 +439,8 @@ struct xen_mem_access_op { uint8_t access; domid_t domid; /* - * Number of pages for set op + * Number of pages for set op (or size of pfn_list for + * XENMEM_access_op_set_access_multi) * Ignored on setting default access and other ops */ uint32_t nr; @@ -433,6 +450,16 @@ struct xen_mem_access_op { * ~0ull is used to set and get the default access for pages */ uint64_aligned_t pfn; + /* + * List of pfns to set access for + * Used only with XENMEM_access_op_set_access_multi + */ + XEN_GUEST_HANDLE(const_uint64) pfn_list; + /* + * Corresponding list of access settings for pfn_list + * Used only with XENMEM_access_op_set_access_multi + */ + XEN_GUEST_HANDLE(const_uint8) access_list; }; typedef struct xen_mem_access_op xen_mem_access_op_t; DEFINE_XEN_GUEST_HANDLE(xen_mem_access_op_t); @@ -441,12 +468,14 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_access_op_t); #define XENMEM_sharing_op_nominate_gfn 0 #define XENMEM_sharing_op_nominate_gref 1 #define XENMEM_sharing_op_share 2 -#define XENMEM_sharing_op_resume 3 -#define XENMEM_sharing_op_debug_gfn 4 -#define XENMEM_sharing_op_debug_mfn 5 -#define XENMEM_sharing_op_debug_gref 6 -#define XENMEM_sharing_op_add_physmap 7 -#define XENMEM_sharing_op_audit 8 +#define XENMEM_sharing_op_debug_gfn 3 +#define XENMEM_sharing_op_debug_mfn 4 +#define XENMEM_sharing_op_debug_gref 5 +#define XENMEM_sharing_op_add_physmap 6 +#define XENMEM_sharing_op_audit 7 +#define XENMEM_sharing_op_range_share 8 +#define XENMEM_sharing_op_fork 9 +#define XENMEM_sharing_op_fork_reset 10 #define XENMEM_SHARING_OP_S_HANDLE_INVALID (-10) #define XENMEM_SHARING_OP_C_HANDLE_INVALID (-9) @@ -455,10 +484,10 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_access_op_t); * for sharing utilities sitting as "filters" in IO backends * (e.g. memshr + blktap(2)). The IO backend is only exposed * to grant references, and this allows sharing of the grefs */ -#define XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG (1ULL << 62) +#define XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG (xen_mk_ullong(1) << 62) #define XENMEM_SHARING_OP_FIELD_MAKE_GREF(field, val) \ - (field) = (XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG | val) + (field) = (XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG | (val)) #define XENMEM_SHARING_OP_FIELD_IS_GREF(field) \ ((field) & XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG) #define XENMEM_SHARING_OP_FIELD_GET_GREF(field) \ @@ -483,6 +512,13 @@ struct xen_mem_sharing_op { uint64_aligned_t client_handle; /* IN: handle to the client page */ domid_t client_domain; /* IN: the client domain id */ } share; + struct mem_sharing_op_range { /* OP_RANGE_SHARE */ + uint64_aligned_t first_gfn; /* IN: the first gfn */ + uint64_aligned_t last_gfn; /* IN: the last gfn */ + uint64_aligned_t opaque; /* Must be set to 0 */ + domid_t client_domain; /* IN: the client domain id */ + uint16_t _pad[3]; /* Must be set to 0 */ + } range; struct mem_sharing_op_debug { /* OP_DEBUG_xxx */ union { uint64_aligned_t gfn; /* IN: gfn to debug */ @@ -490,6 +526,17 @@ struct xen_mem_sharing_op { uint32_t gref; /* IN: gref to debug */ } u; } debug; + struct mem_sharing_op_fork { /* OP_FORK{,_RESET} */ + domid_t parent_domain; /* IN: parent's domain id */ +/* Only makes sense for short-lived forks */ +#define XENMEM_FORK_WITH_IOMMU_ALLOWED (1u << 0) +/* Only makes sense for short-lived forks */ +#define XENMEM_FORK_BLOCK_INTERRUPTS (1u << 1) +#define XENMEM_FORK_RESET_STATE (1u << 2) +#define XENMEM_FORK_RESET_MEMORY (1u << 3) + uint16_t flags; /* IN: optional settings */ + uint32_t pad; /* Must be set to 0 */ + } fork; } u; }; typedef struct xen_mem_sharing_op xen_mem_sharing_op_t; @@ -511,8 +558,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t); * * Note that a valid claim may be staked even after memory has been * allocated for a domain. In this case, the claim is not incremental, - * i.e. if the domain's tot_pages is 3, and a claim is staked for 10, - * only 7 additional pages are claimed. + * i.e. if the domain's total page count is 3, and a claim is staked + * for 10, only 7 additional pages are claimed. * * Caller must be privileged or the hypercall fails. */ @@ -520,12 +567,173 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t); /* * XENMEM_claim_pages flags - the are no flags at this time. - * The zero value is appropiate. + * The zero value is appropriate. */ +/* + * With some legacy devices, certain guest-physical addresses cannot safely + * be used for other purposes, e.g. to map guest RAM. This hypercall + * enumerates those regions so the toolstack can avoid using them. + */ +#define XENMEM_reserved_device_memory_map 27 +struct xen_reserved_device_memory { + xen_pfn_t start_pfn; + xen_ulong_t nr_pages; +}; +typedef struct xen_reserved_device_memory xen_reserved_device_memory_t; +DEFINE_XEN_GUEST_HANDLE(xen_reserved_device_memory_t); + +struct xen_reserved_device_memory_map { +#define XENMEM_RDM_ALL 1 /* Request all regions (ignore dev union). */ + /* IN */ + uint32_t flags; + /* + * IN/OUT + * + * Gets set to the required number of entries when too low, + * signaled by error code -ERANGE. + */ + unsigned int nr_entries; + /* OUT */ + XEN_GUEST_HANDLE(xen_reserved_device_memory_t) buffer; + /* IN */ + union { + physdev_pci_device_t pci; + } dev; +}; +typedef struct xen_reserved_device_memory_map xen_reserved_device_memory_map_t; +DEFINE_XEN_GUEST_HANDLE(xen_reserved_device_memory_map_t); + #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ -/* Next available subop number is 26 */ +/* + * Get the pages for a particular guest resource, so that they can be + * mapped directly by a tools domain. + */ +#define XENMEM_acquire_resource 28 +struct xen_mem_acquire_resource { + /* IN - The domain whose resource is to be mapped */ + domid_t domid; + /* IN - the type of resource */ + uint16_t type; + +#define XENMEM_resource_ioreq_server 0 +#define XENMEM_resource_grant_table 1 +#define XENMEM_resource_vmtrace_buf 2 + + /* + * IN - a type-specific resource identifier, which must be zero + * unless stated otherwise. + * + * type == XENMEM_resource_ioreq_server -> id == ioreq server id + * type == XENMEM_resource_grant_table -> id defined below + */ + uint32_t id; + +#define XENMEM_resource_grant_table_id_shared 0 +#define XENMEM_resource_grant_table_id_status 1 + + /* + * IN/OUT + * + * As an IN parameter number of frames of the resource to be mapped. + * This value may be updated over the course of the operation. + * + * When frame_list is NULL and nr_frames is 0, this is interpreted as a + * request for the size of the resource, which shall be returned in the + * nr_frames field. + * + * The size of a resource will never be zero, but a nonzero result doesn't + * guarantee that a subsequent mapping request will be successful. There + * are further type/id specific constraints which may change between the + * two calls. + */ + uint32_t nr_frames; + /* + * Padding field, must be zero on input. + * In a previous version this was an output field with the lowest bit + * named XENMEM_rsrc_acq_caller_owned. Future versions of this interface + * will not reuse this bit as an output with the field being zero on + * input. + */ + uint32_t pad; + /* + * IN - the index of the initial frame to be mapped. This parameter + * is ignored if nr_frames is 0. This value may be updated + * over the course of the operation. + */ + uint64_t frame; + +#define XENMEM_resource_ioreq_server_frame_bufioreq 0 +#define XENMEM_resource_ioreq_server_frame_ioreq(n) (1 + (n)) + + /* + * IN/OUT - If the tools domain is PV then, upon return, frame_list + * will be populated with the MFNs of the resource. + * If the tools domain is HVM then it is expected that, on + * entry, frame_list will be populated with a list of GFNs + * that will be mapped to the MFNs of the resource. + * If -EIO is returned then the frame_list has only been + * partially mapped and it is up to the caller to unmap all + * the GFNs. + * This parameter may be NULL if nr_frames is 0. This + * value may be updated over the course of the operation. + */ + XEN_GUEST_HANDLE(xen_pfn_t) frame_list; +}; +typedef struct xen_mem_acquire_resource xen_mem_acquire_resource_t; +DEFINE_XEN_GUEST_HANDLE(xen_mem_acquire_resource_t); + +/* + * XENMEM_get_vnumainfo used by guest to get + * vNUMA topology from hypervisor. + */ +#define XENMEM_get_vnumainfo 26 + +/* vNUMA node memory ranges */ +struct xen_vmemrange { + uint64_t start, end; + unsigned int flags; + unsigned int nid; +}; +typedef struct xen_vmemrange xen_vmemrange_t; +DEFINE_XEN_GUEST_HANDLE(xen_vmemrange_t); + +/* + * vNUMA topology specifies vNUMA node number, distance table, + * memory ranges and vcpu mapping provided for guests. + * XENMEM_get_vnumainfo hypercall expects to see from guest + * nr_vnodes, nr_vmemranges and nr_vcpus to indicate available memory. + * After filling guests structures, nr_vnodes, nr_vmemranges and nr_vcpus + * copied back to guest. Domain returns expected values of nr_vnodes, + * nr_vmemranges and nr_vcpus to guest if the values where incorrect. + */ +struct xen_vnuma_topology_info { + /* IN */ + domid_t domid; + uint16_t pad; + /* IN/OUT */ + unsigned int nr_vnodes; + unsigned int nr_vcpus; + unsigned int nr_vmemranges; + /* OUT */ + union { + XEN_GUEST_HANDLE(uint) h; + uint64_t pad; + } vdistance; + union { + XEN_GUEST_HANDLE(uint) h; + uint64_t pad; + } vcpu_to_vnode; + union { + XEN_GUEST_HANDLE(xen_vmemrange_t) h; + uint64_t pad; + } vmemrange; +}; +typedef struct xen_vnuma_topology_info xen_vnuma_topology_info_t; +DEFINE_XEN_GUEST_HANDLE(xen_vnuma_topology_info_t); + +/* Next available subop number is 29 */ #endif /* __XEN_PUBLIC_MEMORY_H__ */ diff --git a/src/include/xen/physdev.h b/src/include/xen/physdev.h new file mode 100644 index 0000000..09a4496 --- /dev/null +++ b/src/include/xen/physdev.h @@ -0,0 +1,368 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (c) 2006, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_PHYSDEV_H__ +#define __XEN_PUBLIC_PHYSDEV_H__ + +FILE_LICENCE ( MIT ); + +#include "xen.h" + +/* + * Prototype for this hypercall is: + * int physdev_op(int cmd, void *args) + * @cmd == PHYSDEVOP_??? (physdev operation). + * @args == Operation-specific extra arguments (NULL if none). + */ + +/* + * Notify end-of-interrupt (EOI) for the specified IRQ. + * @arg == pointer to physdev_eoi structure. + */ +#define PHYSDEVOP_eoi 12 +struct physdev_eoi { + /* IN */ + uint32_t irq; +}; +typedef struct physdev_eoi physdev_eoi_t; +DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t); + +/* + * Register a shared page for the hypervisor to indicate whether the guest + * must issue PHYSDEVOP_eoi. The semantics of PHYSDEVOP_eoi change slightly + * once the guest used this function in that the associated event channel + * will automatically get unmasked. The page registered is used as a bit + * array indexed by Xen's PIRQ value. + */ +#define PHYSDEVOP_pirq_eoi_gmfn_v1 17 +/* + * Register a shared page for the hypervisor to indicate whether the + * guest must issue PHYSDEVOP_eoi. This hypercall is very similar to + * PHYSDEVOP_pirq_eoi_gmfn_v1 but it doesn't change the semantics of + * PHYSDEVOP_eoi. The page registered is used as a bit array indexed by + * Xen's PIRQ value. + */ +#define PHYSDEVOP_pirq_eoi_gmfn_v2 28 +struct physdev_pirq_eoi_gmfn { + /* IN */ + xen_pfn_t gmfn; +}; +typedef struct physdev_pirq_eoi_gmfn physdev_pirq_eoi_gmfn_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pirq_eoi_gmfn_t); + +/* + * Query the status of an IRQ line. + * @arg == pointer to physdev_irq_status_query structure. + */ +#define PHYSDEVOP_irq_status_query 5 +struct physdev_irq_status_query { + /* IN */ + uint32_t irq; + /* OUT */ + uint32_t flags; /* XENIRQSTAT_* */ +}; +typedef struct physdev_irq_status_query physdev_irq_status_query_t; +DEFINE_XEN_GUEST_HANDLE(physdev_irq_status_query_t); + +/* Need to call PHYSDEVOP_eoi when the IRQ has been serviced? */ +#define _XENIRQSTAT_needs_eoi (0) +#define XENIRQSTAT_needs_eoi (1U<<_XENIRQSTAT_needs_eoi) + +/* IRQ shared by multiple guests? */ +#define _XENIRQSTAT_shared (1) +#define XENIRQSTAT_shared (1U<<_XENIRQSTAT_shared) + +/* + * Set the current VCPU's I/O privilege level. + * @arg == pointer to physdev_set_iopl structure. + */ +#define PHYSDEVOP_set_iopl 6 +struct physdev_set_iopl { + /* IN */ + uint32_t iopl; +}; +typedef struct physdev_set_iopl physdev_set_iopl_t; +DEFINE_XEN_GUEST_HANDLE(physdev_set_iopl_t); + +/* + * Set the current VCPU's I/O-port permissions bitmap. + * @arg == pointer to physdev_set_iobitmap structure. + */ +#define PHYSDEVOP_set_iobitmap 7 +struct physdev_set_iobitmap { + /* IN */ +#if __XEN_INTERFACE_VERSION__ >= 0x00030205 + XEN_GUEST_HANDLE(uint8) bitmap; +#else + uint8_t *bitmap; +#endif + uint32_t nr_ports; +}; +typedef struct physdev_set_iobitmap physdev_set_iobitmap_t; +DEFINE_XEN_GUEST_HANDLE(physdev_set_iobitmap_t); + +/* + * Read or write an IO-APIC register. + * @arg == pointer to physdev_apic structure. + */ +#define PHYSDEVOP_apic_read 8 +#define PHYSDEVOP_apic_write 9 +struct physdev_apic { + /* IN */ + unsigned long apic_physbase; + uint32_t reg; + /* IN or OUT */ + uint32_t value; +}; +typedef struct physdev_apic physdev_apic_t; +DEFINE_XEN_GUEST_HANDLE(physdev_apic_t); + +/* + * Allocate or free a physical upcall vector for the specified IRQ line. + * @arg == pointer to physdev_irq structure. + */ +#define PHYSDEVOP_alloc_irq_vector 10 +#define PHYSDEVOP_free_irq_vector 11 +struct physdev_irq { + /* IN */ + uint32_t irq; + /* IN or OUT */ + uint32_t vector; +}; +typedef struct physdev_irq physdev_irq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_irq_t); + +#define MAP_PIRQ_TYPE_MSI 0x0 +#define MAP_PIRQ_TYPE_GSI 0x1 +#define MAP_PIRQ_TYPE_UNKNOWN 0x2 +#define MAP_PIRQ_TYPE_MSI_SEG 0x3 +#define MAP_PIRQ_TYPE_MULTI_MSI 0x4 + +#define PHYSDEVOP_map_pirq 13 +struct physdev_map_pirq { + domid_t domid; + /* IN */ + int type; + /* IN (ignored for ..._MULTI_MSI) */ + int index; + /* IN or OUT */ + int pirq; + /* IN - high 16 bits hold segment for ..._MSI_SEG and ..._MULTI_MSI */ + int bus; + /* IN */ + int devfn; + /* IN (also OUT for ..._MULTI_MSI) */ + int entry_nr; + /* IN */ + uint64_t table_base; +}; +typedef struct physdev_map_pirq physdev_map_pirq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_map_pirq_t); + +#define PHYSDEVOP_unmap_pirq 14 +struct physdev_unmap_pirq { + domid_t domid; + /* IN */ + int pirq; +}; + +typedef struct physdev_unmap_pirq physdev_unmap_pirq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_unmap_pirq_t); + +#define PHYSDEVOP_manage_pci_add 15 +#define PHYSDEVOP_manage_pci_remove 16 +struct physdev_manage_pci { + /* IN */ + uint8_t bus; + uint8_t devfn; +}; + +typedef struct physdev_manage_pci physdev_manage_pci_t; +DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_t); + +#define PHYSDEVOP_restore_msi 19 +struct physdev_restore_msi { + /* IN */ + uint8_t bus; + uint8_t devfn; +}; +typedef struct physdev_restore_msi physdev_restore_msi_t; +DEFINE_XEN_GUEST_HANDLE(physdev_restore_msi_t); + +#define PHYSDEVOP_manage_pci_add_ext 20 +struct physdev_manage_pci_ext { + /* IN */ + uint8_t bus; + uint8_t devfn; + uint32_t is_extfn; + uint32_t is_virtfn; + struct { + uint8_t bus; + uint8_t devfn; + } physfn; +}; + +typedef struct physdev_manage_pci_ext physdev_manage_pci_ext_t; +DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_ext_t); + +/* + * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op() + * hypercall since 0x00030202. + */ +struct physdev_op { + uint32_t cmd; + union { + physdev_irq_status_query_t irq_status_query; + physdev_set_iopl_t set_iopl; + physdev_set_iobitmap_t set_iobitmap; + physdev_apic_t apic_op; + physdev_irq_t irq_op; + } u; +}; +typedef struct physdev_op physdev_op_t; +DEFINE_XEN_GUEST_HANDLE(physdev_op_t); + +#define PHYSDEVOP_setup_gsi 21 +struct physdev_setup_gsi { + int gsi; + /* IN */ + uint8_t triggering; + /* IN */ + uint8_t polarity; + /* IN */ +}; + +typedef struct physdev_setup_gsi physdev_setup_gsi_t; +DEFINE_XEN_GUEST_HANDLE(physdev_setup_gsi_t); + +/* leave PHYSDEVOP 22 free */ + +/* type is MAP_PIRQ_TYPE_GSI or MAP_PIRQ_TYPE_MSI + * the hypercall returns a free pirq */ +#define PHYSDEVOP_get_free_pirq 23 +struct physdev_get_free_pirq { + /* IN */ + int type; + /* OUT */ + uint32_t pirq; +}; + +typedef struct physdev_get_free_pirq physdev_get_free_pirq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_get_free_pirq_t); + +#define XEN_PCI_MMCFG_RESERVED 0x1 + +#define PHYSDEVOP_pci_mmcfg_reserved 24 +struct physdev_pci_mmcfg_reserved { + uint64_t address; + uint16_t segment; + uint8_t start_bus; + uint8_t end_bus; + uint32_t flags; +}; +typedef struct physdev_pci_mmcfg_reserved physdev_pci_mmcfg_reserved_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pci_mmcfg_reserved_t); + +#define XEN_PCI_DEV_EXTFN 0x1 +#define XEN_PCI_DEV_VIRTFN 0x2 +#define XEN_PCI_DEV_PXM 0x4 + +#define PHYSDEVOP_pci_device_add 25 +struct physdev_pci_device_add { + /* IN */ + uint16_t seg; + uint8_t bus; + uint8_t devfn; + uint32_t flags; + struct { + uint8_t bus; + uint8_t devfn; + } physfn; + /* + * Optional parameters array. + * First element ([0]) is PXM domain associated with the device (if + * XEN_PCI_DEV_PXM is set) + */ + uint32_t optarr[XEN_FLEX_ARRAY_DIM]; +}; +typedef struct physdev_pci_device_add physdev_pci_device_add_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_add_t); + +#define PHYSDEVOP_pci_device_remove 26 +#define PHYSDEVOP_restore_msi_ext 27 +/* + * Dom0 should use these two to announce MMIO resources assigned to + * MSI-X capable devices won't (prepare) or may (release) change. + */ +#define PHYSDEVOP_prepare_msix 30 +#define PHYSDEVOP_release_msix 31 +struct physdev_pci_device { + /* IN */ + uint16_t seg; + uint8_t bus; + uint8_t devfn; +}; +typedef struct physdev_pci_device physdev_pci_device_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_t); + +#define PHYSDEVOP_DBGP_RESET_PREPARE 1 +#define PHYSDEVOP_DBGP_RESET_DONE 2 + +#define PHYSDEVOP_DBGP_BUS_UNKNOWN 0 +#define PHYSDEVOP_DBGP_BUS_PCI 1 + +#define PHYSDEVOP_dbgp_op 29 +struct physdev_dbgp_op { + /* IN */ + uint8_t op; + uint8_t bus; + union { + physdev_pci_device_t pci; + } u; +}; +typedef struct physdev_dbgp_op physdev_dbgp_op_t; +DEFINE_XEN_GUEST_HANDLE(physdev_dbgp_op_t); + +/* + * Notify that some PIRQ-bound event channels have been unmasked. + * ** This command is obsolete since interface version 0x00030202 and is ** + * ** unsupported by newer versions of Xen. ** + */ +#define PHYSDEVOP_IRQ_UNMASK_NOTIFY 4 + +#if __XEN_INTERFACE_VERSION__ < 0x00040600 +/* + * These all-capitals physdev operation names are superceded by the new names + * (defined above) since interface version 0x00030202. The guard above was + * added post-4.5 only though and hence shouldn't check for 0x00030202. + */ +#define PHYSDEVOP_IRQ_STATUS_QUERY PHYSDEVOP_irq_status_query +#define PHYSDEVOP_SET_IOPL PHYSDEVOP_set_iopl +#define PHYSDEVOP_SET_IOBITMAP PHYSDEVOP_set_iobitmap +#define PHYSDEVOP_APIC_READ PHYSDEVOP_apic_read +#define PHYSDEVOP_APIC_WRITE PHYSDEVOP_apic_write +#define PHYSDEVOP_ASSIGN_VECTOR PHYSDEVOP_alloc_irq_vector +#define PHYSDEVOP_FREE_VECTOR PHYSDEVOP_free_irq_vector +#define PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY XENIRQSTAT_needs_eoi +#define PHYSDEVOP_IRQ_SHARED XENIRQSTAT_shared +#endif + +#if __XEN_INTERFACE_VERSION__ < 0x00040200 +#define PHYSDEVOP_pirq_eoi_gmfn PHYSDEVOP_pirq_eoi_gmfn_v1 +#else +#define PHYSDEVOP_pirq_eoi_gmfn PHYSDEVOP_pirq_eoi_gmfn_v2 +#endif + +#endif /* __XEN_PUBLIC_PHYSDEV_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/src/include/xen/trace.h b/src/include/xen/trace.h index bf8bf65..38d283e 100644 --- a/src/include/xen/trace.h +++ b/src/include/xen/trace.h @@ -1,24 +1,7 @@ +/* SPDX-License-Identifier: MIT */ /****************************************************************************** * include/public/trace.h * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * * Mark Williamson, (C) 2004 Intel Research Cambridge * Copyright (C) 2005 Bin Ren */ @@ -77,8 +60,10 @@ FILE_LICENCE ( MIT ); /* Per-scheduler IDs, to identify scheduler specific events */ #define TRC_SCHED_CSCHED 0 #define TRC_SCHED_CSCHED2 1 -#define TRC_SCHED_SEDF 2 +/* #define XEN_SCHEDULER_SEDF 2 (Removed) */ #define TRC_SCHED_ARINC653 3 +#define TRC_SCHED_RTDS 4 +#define TRC_SCHED_SNULL 5 /* Per-scheduler tracing */ #define TRC_SCHED_CLASS_EVT(_c, _e) \ @@ -86,6 +71,9 @@ FILE_LICENCE ( MIT ); ((TRC_SCHED_##_c << TRC_SCHED_ID_SHIFT) & TRC_SCHED_ID_MASK) ) + \ (_e & TRC_SCHED_EVT_MASK) ) +/* Trace classes for DOM0 operations */ +#define TRC_DOM0_DOMOPS 0x00041000 /* Domains manipulations */ + /* Trace classes for Hardware */ #define TRC_HW_PM 0x00801000 /* Power management traces */ #define TRC_HW_IRQ 0x00802000 /* Traces relating to the handling of IRQs */ @@ -113,6 +101,10 @@ FILE_LICENCE ( MIT ); #define TRC_SCHED_SWITCH_INFPREV (TRC_SCHED_VERBOSE + 14) #define TRC_SCHED_SWITCH_INFNEXT (TRC_SCHED_VERBOSE + 15) #define TRC_SCHED_SHUTDOWN_CODE (TRC_SCHED_VERBOSE + 16) +#define TRC_SCHED_SWITCH_INFCONT (TRC_SCHED_VERBOSE + 17) + +#define TRC_DOM0_DOM_ADD (TRC_DOM0_DOMOPS + 1) +#define TRC_DOM0_DOM_REM (TRC_DOM0_DOMOPS + 2) #define TRC_MEM_PAGE_GRANT_MAP (TRC_MEM + 1) #define TRC_MEM_PAGE_GRANT_UNMAP (TRC_MEM + 2) @@ -228,6 +220,8 @@ FILE_LICENCE ( MIT ); #define TRC_HVM_TRAP (TRC_HVM_HANDLER + 0x23) #define TRC_HVM_TRAP_DEBUG (TRC_HVM_HANDLER + 0x24) #define TRC_HVM_VLAPIC (TRC_HVM_HANDLER + 0x25) +#define TRC_HVM_XCR_READ64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x26) +#define TRC_HVM_XCR_WRITE64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x27) #define TRC_HVM_IOPORT_WRITE (TRC_HVM_HANDLER + 0x216) #define TRC_HVM_IOMEM_WRITE (TRC_HVM_HANDLER + 0x217) diff --git a/src/include/xen/version.h b/src/include/xen/version.h index 4e81ca0..060306f 100644 --- a/src/include/xen/version.h +++ b/src/include/xen/version.h @@ -1,26 +1,9 @@ +/* SPDX-License-Identifier: MIT */ /****************************************************************************** * version.h * * Xen version, type, and compile information. * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * * Copyright (c) 2005, Nguyen Anh Quynh * Copyright (c) 2005, Keir Fraser */ @@ -32,7 +15,8 @@ FILE_LICENCE ( MIT ); #include "xen.h" -/* NB. All ops return zero on success, except XENVER_{version,pagesize} */ +/* NB. All ops return zero on success, except XENVER_{version,pagesize} + * XENVER_{version,pagesize,build_id} */ /* arg == NULL; returns major:minor (16:16). */ #define XENVER_version 0 @@ -68,7 +52,7 @@ typedef struct xen_platform_parameters xen_platform_parameters_t; #define XENVER_get_features 6 struct xen_feature_info { - unsigned int submap_idx; /* IN: which 32-bit submap to return */ + uint32_t submap_idx; /* IN: which 32-bit submap to return */ uint32_t submap; /* OUT: 32-bit submap */ }; typedef struct xen_feature_info xen_feature_info_t; @@ -79,12 +63,28 @@ typedef struct xen_feature_info xen_feature_info_t; /* arg == NULL; returns host memory page size. */ #define XENVER_pagesize 7 -/* arg == xen_domain_handle_t. */ +/* arg == xen_domain_handle_t. + * + * The toolstack fills it out for guest consumption. It is intended to hold + * the UUID of the guest. + */ #define XENVER_guest_handle 8 #define XENVER_commandline 9 typedef char xen_commandline_t[1024]; +/* + * Return value is the number of bytes written, or XEN_Exx on error. + * Calling with empty parameter returns the size of build_id. + */ +#define XENVER_build_id 10 +struct xen_build_id { + uint32_t len; /* IN: size of buf[]. */ + unsigned char buf[XEN_FLEX_ARRAY_DIM]; + /* OUT: Variable length buffer with build_id. */ +}; +typedef struct xen_build_id xen_build_id_t; + #endif /* __XEN_PUBLIC_VERSION_H__ */ /* diff --git a/src/include/xen/xen-compat.h b/src/include/xen/xen-compat.h index 0ba6fca..8b23618 100644 --- a/src/include/xen/xen-compat.h +++ b/src/include/xen/xen-compat.h @@ -1,26 +1,9 @@ +/* SPDX-License-Identifier: MIT */ /****************************************************************************** * xen-compat.h * * Guest OS interface to Xen. Compatibility layer. * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * * Copyright (c) 2006, Christian Limpach */ @@ -29,7 +12,7 @@ FILE_LICENCE ( MIT ); -#define __XEN_LATEST_INTERFACE_VERSION__ 0x00040400 +#define __XEN_LATEST_INTERFACE_VERSION__ 0x00040e00 #if defined(__XEN__) || defined(__XEN_TOOLS__) /* Xen is built with matching headers and implements the latest interface. */ @@ -43,4 +26,6 @@ FILE_LICENCE ( MIT ); #error "These header files do not support the requested interface version." #endif +#define COMPAT_FLEX_ARRAY_DIM XEN_FLEX_ARRAY_DIM + #endif /* __XEN_PUBLIC_XEN_COMPAT_H__ */ diff --git a/src/include/xen/xen.h b/src/include/xen/xen.h index 2da521d..8a4b30b 100644 --- a/src/include/xen/xen.h +++ b/src/include/xen/xen.h @@ -1,26 +1,9 @@ +/* SPDX-License-Identifier: MIT */ /****************************************************************************** * xen.h * * Guest OS interface to Xen. * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * * Copyright (c) 2004, K A Fraser */ @@ -54,6 +37,33 @@ DEFINE_XEN_GUEST_HANDLE(void); DEFINE_XEN_GUEST_HANDLE(uint64_t); DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); DEFINE_XEN_GUEST_HANDLE(xen_ulong_t); + +/* Define a variable length array (depends on compiler). */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#define XEN_FLEX_ARRAY_DIM +#elif defined(__GNUC__) +#define XEN_FLEX_ARRAY_DIM 0 +#else +#define XEN_FLEX_ARRAY_DIM 1 /* variable size */ +#endif + +/* Turn a plain number into a C unsigned (long (long)) constant. */ +#define __xen_mk_uint(x) x ## U +#define __xen_mk_ulong(x) x ## UL +#ifndef __xen_mk_ullong +# define __xen_mk_ullong(x) x ## ULL +#endif +#define xen_mk_uint(x) __xen_mk_uint(x) +#define xen_mk_ulong(x) __xen_mk_ulong(x) +#define xen_mk_ullong(x) __xen_mk_ullong(x) + +#else + +/* In assembly code we cannot use C numeric constant suffixes. */ +#define xen_mk_uint(x) x +#define xen_mk_ulong(x) x +#define xen_mk_ullong(x) x + #endif /* @@ -102,7 +112,10 @@ DEFINE_XEN_GUEST_HANDLE(xen_ulong_t); #define __HYPERVISOR_domctl 36 #define __HYPERVISOR_kexec_op 37 #define __HYPERVISOR_tmem_op 38 -#define __HYPERVISOR_xc_reserved_op 39 /* reserved for XenClient */ +#define __HYPERVISOR_argo_op 39 +#define __HYPERVISOR_xenpmu_op 40 +#define __HYPERVISOR_dm_op 41 +#define __HYPERVISOR_hypfs_op 42 /* Architecture-specific hypercall definitions. */ #define __HYPERVISOR_arch_0 48 @@ -159,9 +172,10 @@ DEFINE_XEN_GUEST_HANDLE(xen_ulong_t); #define VIRQ_XENOPROF 7 /* V. XenOprofile interrupt: new sample available */ #define VIRQ_CON_RING 8 /* G. (DOM0) Bytes received on console */ #define VIRQ_PCPU_STATE 9 /* G. (DOM0) PCPU state changed */ -#define VIRQ_MEM_EVENT 10 /* G. (DOM0) A memory event has occured */ -#define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient */ +#define VIRQ_MEM_EVENT 10 /* G. (DOM0) A memory event has occurred */ +#define VIRQ_ARGO 11 /* G. Argo interdomain message notification */ #define VIRQ_ENOMEM 12 /* G. (DOM0) Low on heap memory */ +#define VIRQ_XENPMU 13 /* V. PMC interrupt */ /* Architecture-specific VIRQ definitions. */ #define VIRQ_ARCH_0 16 @@ -249,6 +263,10 @@ DEFINE_XEN_GUEST_HANDLE(xen_ulong_t); * As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed * with those in @val. * + * ptr[1:0] == MMU_PT_UPDATE_NO_TRANSLATE: + * As MMU_NORMAL_PT_UPDATE above, but @val is not translated though FD + * page tables. + * * @val is usually the machine frame number along with some attributes. * The attributes by default follow the architecture defined bits. Meaning that * if this is a X86_64 machine and four page table layout is used, the layout @@ -315,9 +333,11 @@ DEFINE_XEN_GUEST_HANDLE(xen_ulong_t); * * PAT (bit 7 on) --> PWT (bit 3 on) and clear bit 7. */ -#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ -#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ -#define MMU_PT_UPDATE_PRESERVE_AD 2 /* atomically: *ptr = val | (*ptr&(A|D)) */ +#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ +#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ +#define MMU_PT_UPDATE_PRESERVE_AD 2 /* atomically: *ptr = val | (*ptr&(A|D)) */ +#define MMU_PT_UPDATE_NO_TRANSLATE 3 /* checked '*ptr = val'. ptr is MA. */ + /* val never translated. */ /* * MMU EXTENDED OPERATIONS @@ -451,17 +471,38 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t); /* When specifying UVMF_MULTI, also OR in a pointer to a CPU bitmap. */ /* UVMF_LOCAL is merely UVMF_MULTI with a NULL bitmap pointer. */ /* ` enum uvm_flags { */ -#define UVMF_NONE (0UL<<0) /* No flushing at all. */ -#define UVMF_TLB_FLUSH (1UL<<0) /* Flush entire TLB(s). */ -#define UVMF_INVLPG (2UL<<0) /* Flush only one entry. */ -#define UVMF_FLUSHTYPE_MASK (3UL<<0) -#define UVMF_MULTI (0UL<<2) /* Flush subset of TLBs. */ -#define UVMF_LOCAL (0UL<<2) /* Flush local TLB. */ -#define UVMF_ALL (1UL<<2) /* Flush all TLBs. */ +#define UVMF_NONE (xen_mk_ulong(0)<<0) /* No flushing at all. */ +#define UVMF_TLB_FLUSH (xen_mk_ulong(1)<<0) /* Flush entire TLB(s). */ +#define UVMF_INVLPG (xen_mk_ulong(2)<<0) /* Flush only one entry. */ +#define UVMF_FLUSHTYPE_MASK (xen_mk_ulong(3)<<0) +#define UVMF_MULTI (xen_mk_ulong(0)<<2) /* Flush subset of TLBs. */ +#define UVMF_LOCAL (xen_mk_ulong(0)<<2) /* Flush local TLB. */ +#define UVMF_ALL (xen_mk_ulong(1)<<2) /* Flush all TLBs. */ /* ` } */ /* - * Commands to HYPERVISOR_console_io(). + * ` int + * ` HYPERVISOR_console_io(unsigned int cmd, + * ` unsigned int count, + * ` char buffer[]); + * + * @cmd: Command (see below) + * @count: Size of the buffer to read/write + * @buffer: Pointer in the guest memory + * + * List of commands: + * + * * CONSOLEIO_write: Write the buffer to Xen console. + * For the hardware domain, all the characters in the buffer will + * be written. Characters will be printed directly to the console. + * For all the other domains, only the printable characters will be + * written. Characters may be buffered until a newline (i.e '\n') is + * found. + * @return 0 on success, otherwise return an error code. + * * CONSOLEIO_read: Attempts to read up to @count characters from Xen + * console. The maximum buffer size (i.e. @count) supported is 2GB. + * @return the number of characters read on success, otherwise return + * an error code. */ #define CONSOLEIO_write 0 #define CONSOLEIO_read 1 @@ -488,17 +529,42 @@ DEFINE_XEN_GUEST_HANDLE(mmuext_op_t); /* x86/PAE guests: support PDPTs above 4GB. */ #define VMASST_TYPE_pae_extended_cr3 3 -#define MAX_VMASST_TYPE 3 +/* + * x86 guests: Sane behaviour for virtual iopl + * - virtual iopl updated from do_iret() hypercalls. + * - virtual iopl reported in bounce frames. + * - guest kernels assumed to be level 0 for the purpose of iopl checks. + */ +#define VMASST_TYPE_architectural_iopl 4 -#ifndef __ASSEMBLY__ +/* + * All guests: activate update indicator in vcpu_runstate_info + * Enable setting the XEN_RUNSTATE_UPDATE flag in guest memory mapped + * vcpu_runstate_info during updates of the runstate information. + */ +#define VMASST_TYPE_runstate_update_flag 5 -typedef uint16_t domid_t; +/* + * x86/64 guests: strictly hide M2P from user mode. + * This allows the guest to control respective hypervisor behavior: + * - when not set, L4 tables get created with the respective slot blank, + * and whenever the L4 table gets used as a kernel one the missing + * mapping gets inserted, + * - when set, L4 tables get created with the respective slot initialized + * as before, and whenever the L4 table gets used as a user one the + * mapping gets zapped. + */ +#define VMASST_TYPE_m2p_strict 32 + +#if __XEN_INTERFACE_VERSION__ < 0x00040600 +#define MAX_VMASST_TYPE 3 +#endif /* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary domains. */ -#define DOMID_FIRST_RESERVED (0x7FF0U) +#define DOMID_FIRST_RESERVED xen_mk_uint(0x7FF0) /* DOMID_SELF is used in certain contexts to refer to oneself. */ -#define DOMID_SELF (0x7FF0U) +#define DOMID_SELF xen_mk_uint(0x7FF0) /* * DOMID_IO is used to restrict page-table updates to mapping I/O memory. @@ -506,28 +572,40 @@ typedef uint16_t domid_t; * is useful to ensure that no mappings to the OS's own heap are accidentally * installed. (e.g., in Linux this could cause havoc as reference counts * aren't adjusted on the I/O-mapping code path). - * This only makes sense in MMUEXT_SET_FOREIGNDOM, but in that context can - * be specified by any calling domain. + * This only makes sense as HYPERVISOR_mmu_update()'s and + * HYPERVISOR_update_va_mapping_otherdomain()'s "foreigndom" argument. For + * HYPERVISOR_mmu_update() context it can be specified by any calling domain, + * otherwise it's only permitted if the caller is privileged. */ -#define DOMID_IO (0x7FF1U) +#define DOMID_IO xen_mk_uint(0x7FF1) /* * DOMID_XEN is used to allow privileged domains to map restricted parts of * Xen's heap space (e.g., the machine_to_phys table). - * This only makes sense in MMUEXT_SET_FOREIGNDOM, and is only permitted if - * the caller is privileged. + * This only makes sense as + * - HYPERVISOR_mmu_update()'s, HYPERVISOR_mmuext_op()'s, or + * HYPERVISOR_update_va_mapping_otherdomain()'s "foreigndom" argument, + * - with XENMAPSPACE_gmfn_foreign, + * and is only permitted if the caller is privileged. */ -#define DOMID_XEN (0x7FF2U) +#define DOMID_XEN xen_mk_uint(0x7FF2) /* * DOMID_COW is used as the owner of sharable pages */ -#define DOMID_COW (0x7FF3U) +#define DOMID_COW xen_mk_uint(0x7FF3) /* DOMID_INVALID is used to identify pages with unknown owner. */ -#define DOMID_INVALID (0x7FF4U) +#define DOMID_INVALID xen_mk_uint(0x7FF4) /* Idle domain. */ -#define DOMID_IDLE (0x7FFFU) +#define DOMID_IDLE xen_mk_uint(0x7FFF) + +/* Mask for valid domain id values */ +#define DOMID_MASK xen_mk_uint(0x7FFF) + +#ifndef __ASSEMBLY__ + +typedef uint16_t domid_t; /* * Send an array of these to HYPERVISOR_mmu_update(). @@ -587,10 +665,18 @@ struct vcpu_time_info { */ uint32_t tsc_to_system_mul; int8_t tsc_shift; +#if __XEN_INTERFACE_VERSION__ > 0x040600 + uint8_t flags; + uint8_t pad1[2]; +#else int8_t pad1[3]; +#endif }; /* 32 bytes */ typedef struct vcpu_time_info vcpu_time_info_t; +#define XEN_PVCLOCK_TSC_STABLE_BIT (1 << 0) +#define XEN_PVCLOCK_GUEST_STOPPED (1 << 1) + struct vcpu_info { /* * 'evtchn_upcall_pending' is written non-zero by Xen to indicate @@ -625,7 +711,7 @@ struct vcpu_info { #endif /* XEN_HAVE_PV_UPCALL_MASK */ xen_ulong_t evtchn_pending_sel; struct arch_vcpu_info arch; - struct vcpu_time_info time; + vcpu_time_info_t time; }; /* 64 bytes (x86) */ #ifndef __XEN__ typedef struct vcpu_info vcpu_info_t; @@ -678,12 +764,23 @@ struct shared_info { xen_ulong_t evtchn_mask[sizeof(xen_ulong_t) * 8]; /* - * Wallclock time: updated only by control software. Guests should base - * their gettimeofday() syscall on this wallclock-base value. + * Wallclock time: updated by control software or RTC emulation. + * Guests should base their gettimeofday() syscall on this + * wallclock-base value. + * The values of wc_sec and wc_nsec are offsets from the Unix epoch + * adjusted by the domain's 'time offset' (in seconds) as set either + * by XEN_DOMCTL_settimeoffset, or adjusted via a guest write to the + * emulated RTC. */ uint32_t wc_version; /* Version counter: see vcpu_time_info_t. */ - uint32_t wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */ - uint32_t wc_nsec; /* Nsecs 00:00:00 UTC, Jan 1, 1970. */ + uint32_t wc_sec; + uint32_t wc_nsec; +#if !defined(__i386__) + uint32_t wc_sec_hi; +# define xen_wc_sec_hi wc_sec_hi +#elif !defined(__XEN__) && !defined(__XEN_TOOLS__) +# define xen_wc_sec_hi arch.wc_sec_hi +#endif struct arch_shared_info arch; @@ -700,24 +797,27 @@ typedef struct shared_info shared_info_t; * 3. This the order of bootstrap elements in the initial virtual region: * a. relocated kernel image * b. initial ram disk [mod_start, mod_len] + * (may be omitted) * c. list of allocated page frames [mfn_list, nr_pages] * (unless relocated due to XEN_ELFNOTE_INIT_P2M) - * d. start_info_t structure [register ESI (x86)] - * e. bootstrap page tables [pt_base and CR3 (x86)] - * f. bootstrap stack [register ESP (x86)] + * d. start_info_t structure [register rSI (x86)] + * in case of dom0 this page contains the console info, too + * e. unless dom0: xenstore ring page + * f. unless dom0: console ring page + * g. bootstrap page tables [pt_base and CR3 (x86)] + * h. bootstrap stack [register ESP (x86)] * 4. Bootstrap elements are packed together, but each is 4kB-aligned. - * 5. The initial ram disk may be omitted. - * 6. The list of page frames forms a contiguous 'pseudo-physical' memory + * 5. The list of page frames forms a contiguous 'pseudo-physical' memory * layout for the domain. In particular, the bootstrap virtual-memory * region is a 1:1 mapping to the first section of the pseudo-physical map. - * 7. All bootstrap elements are mapped read-writable for the guest OS. The + * 6. All bootstrap elements are mapped read-writable for the guest OS. The * only exception is the bootstrap page table, which is mapped read-only. - * 8. There is guaranteed to be at least 512kB padding after the final + * 7. There is guaranteed to be at least 512kB padding after the final * bootstrap element. If necessary, the bootstrap virtual region is * extended by an extra 4MB to ensure this. * * Note: Prior to 25833:bb85bbccb1c9. ("x86/32-on-64 adjust Dom0 initial page - * table layout") a bug caused the pt_base (3.e above) and cr3 to not point + * table layout") a bug caused the pt_base (3.g above) and cr3 to not point * to the start of the guest page tables (it was offset by two pages). * This only manifested itself on 32-on-64 dom0 kernels and not 32-on-64 domU * or 64-bit kernels of any colour. The page tables for a 32-on-64 dom0 got @@ -773,6 +873,8 @@ typedef struct start_info start_info_t; #define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */ #define SIF_MULTIBOOT_MOD (1<<2) /* Is mod_start a multiboot module? */ #define SIF_MOD_START_PFN (1<<3) /* Is mod_start a PFN? */ +#define SIF_VIRT_P2M_4TOOLS (1<<4) /* Do Xen tools understand a virt. mapped */ + /* P->M making the 3 level tree obsolete? */ #define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */ /* @@ -844,6 +946,11 @@ typedef struct dom0_vga_console_info { uint32_t gbl_caps; /* Mode attributes (offset 0x0, VESA command 0x4f01). */ uint16_t mode_attrs; + uint16_t pad; +#endif +#if __XEN_INTERFACE_VERSION__ >= 0x00040d00 + /* high 32 bits of lfb_base */ + uint32_t ext_lfb_base; #endif } vesa_lfb; } u; @@ -853,25 +960,50 @@ typedef struct dom0_vga_console_info { typedef uint8_t xen_domain_handle_t[16]; -/* Turn a plain number into a C unsigned long constant. */ -#define __mk_unsigned_long(x) x ## UL -#define mk_unsigned_long(x) __mk_unsigned_long(x) - __DEFINE_XEN_GUEST_HANDLE(uint8, uint8_t); __DEFINE_XEN_GUEST_HANDLE(uint16, uint16_t); __DEFINE_XEN_GUEST_HANDLE(uint32, uint32_t); __DEFINE_XEN_GUEST_HANDLE(uint64, uint64_t); -#else /* __ASSEMBLY__ */ +typedef struct { + uint8_t a[16]; +} xen_uuid_t; -/* In assembly code we cannot use C numeric constant suffixes. */ -#define mk_unsigned_long(x) x +/* + * XEN_DEFINE_UUID(0x00112233, 0x4455, 0x6677, 0x8899, + * 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff) + * will construct UUID 00112233-4455-6677-8899-aabbccddeeff presented as + * {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, + * 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff}; + * + * NB: This is compatible with Linux kernel and with libuuid, but it is not + * compatible with Microsoft, as they use mixed-endian encoding (some + * components are little-endian, some are big-endian). + */ +#define XEN_DEFINE_UUID_(a, b, c, d, e1, e2, e3, e4, e5, e6) \ + {{((a) >> 24) & 0xFF, ((a) >> 16) & 0xFF, \ + ((a) >> 8) & 0xFF, ((a) >> 0) & 0xFF, \ + ((b) >> 8) & 0xFF, ((b) >> 0) & 0xFF, \ + ((c) >> 8) & 0xFF, ((c) >> 0) & 0xFF, \ + ((d) >> 8) & 0xFF, ((d) >> 0) & 0xFF, \ + e1, e2, e3, e4, e5, e6}} + +#if defined(__STDC_VERSION__) ? __STDC_VERSION__ >= 199901L : defined(__GNUC__) +#define XEN_DEFINE_UUID(a, b, c, d, e1, e2, e3, e4, e5, e6) \ + ((xen_uuid_t)XEN_DEFINE_UUID_(a, b, c, d, e1, e2, e3, e4, e5, e6)) +#else +#define XEN_DEFINE_UUID(a, b, c, d, e1, e2, e3, e4, e5, e6) \ + XEN_DEFINE_UUID_(a, b, c, d, e1, e2, e3, e4, e5, e6) +#endif /* __STDC_VERSION__ / __GNUC__ */ #endif /* !__ASSEMBLY__ */ /* Default definitions for macros used by domctl/sysctl. */ #if defined(__XEN__) || defined(__XEN_TOOLS__) +#ifndef int64_aligned_t +#define int64_aligned_t int64_t +#endif #ifndef uint64_aligned_t #define uint64_aligned_t uint64_t #endif @@ -884,6 +1016,7 @@ struct xenctl_bitmap { XEN_GUEST_HANDLE_64(uint8) bitmap; uint32_t nr_bits; }; +typedef struct xenctl_bitmap xenctl_bitmap_t; #endif #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ -- cgit v1.1