diff options
Diffstat (limited to 'docs')
-rw-r--r-- | docs/devel/qom.rst | 8 | ||||
-rw-r--r-- | docs/devel/reset.rst | 2 | ||||
-rw-r--r-- | docs/devel/tcg-ops.rst | 228 | ||||
-rw-r--r-- | docs/devel/virtio-backends.rst | 2 | ||||
-rw-r--r-- | docs/igd-assign.txt | 265 |
5 files changed, 343 insertions, 162 deletions
diff --git a/docs/devel/qom.rst b/docs/devel/qom.rst index 0889ca9..5870745 100644 --- a/docs/devel/qom.rst +++ b/docs/devel/qom.rst @@ -147,7 +147,7 @@ to introduce an overridden virtual function: #include "qdev.h" - void my_device_class_init(ObjectClass *klass, void *class_data) + void my_device_class_init(ObjectClass *klass, const void *class_data) { DeviceClass *dc = DEVICE_CLASS(klass); dc->reset = my_device_reset; @@ -249,7 +249,7 @@ class, which someone might choose to change at some point. // do something } - static void my_class_init(ObjectClass *oc, void *data) + static void my_class_init(ObjectClass *oc, const void *data) { MyClass *mc = MY_CLASS(oc); @@ -279,7 +279,7 @@ class, which someone might choose to change at some point. // do something else here } - static void derived_class_init(ObjectClass *oc, void *data) + static void derived_class_init(ObjectClass *oc, const void *data) { MyClass *mc = MY_CLASS(oc); DerivedClass *dc = DERIVED_CLASS(oc); @@ -363,7 +363,7 @@ This is equivalent to the following: :caption: Expansion from defining a simple type static void my_device_finalize(Object *obj); - static void my_device_class_init(ObjectClass *oc, void *data); + static void my_device_class_init(ObjectClass *oc, const void *data); static void my_device_init(Object *obj); static const TypeInfo my_device_info = { diff --git a/docs/devel/reset.rst b/docs/devel/reset.rst index 0b8b2fa..c02fe0a 100644 --- a/docs/devel/reset.rst +++ b/docs/devel/reset.rst @@ -216,7 +216,7 @@ in reset. ResettablePhases parent_phases; } MyDevClass; - static void mydev_class_init(ObjectClass *class, void *data) + static void mydev_class_init(ObjectClass *class, const void *data) { MyDevClass *myclass = MYDEV_CLASS(class); ResettableClass *rc = RESETTABLE_CLASS(class); diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst index 688984f..f26b837 100644 --- a/docs/devel/tcg-ops.rst +++ b/docs/devel/tcg-ops.rst @@ -239,7 +239,7 @@ Jumps/Labels - | Jump to label. - * - brcond_i32/i64 *t0*, *t1*, *cond*, *label* + * - brcond *t0*, *t1*, *cond*, *label* - | Conditional jump if *t0* *cond* *t1* is true. *cond* can be: | @@ -261,98 +261,117 @@ Arithmetic .. list-table:: - * - add_i32/i64 *t0*, *t1*, *t2* + * - add *t0*, *t1*, *t2* - | *t0* = *t1* + *t2* - * - sub_i32/i64 *t0*, *t1*, *t2* + * - sub *t0*, *t1*, *t2* - | *t0* = *t1* - *t2* - * - neg_i32/i64 *t0*, *t1* + * - neg *t0*, *t1* - | *t0* = -*t1* (two's complement) - * - mul_i32/i64 *t0*, *t1*, *t2* + * - mul *t0*, *t1*, *t2* - | *t0* = *t1* * *t2* - * - div_i32/i64 *t0*, *t1*, *t2* + * - divs *t0*, *t1*, *t2* - | *t0* = *t1* / *t2* (signed) | Undefined behavior if division by zero or overflow. - * - divu_i32/i64 *t0*, *t1*, *t2* + * - divu *t0*, *t1*, *t2* - | *t0* = *t1* / *t2* (unsigned) | Undefined behavior if division by zero. - * - rem_i32/i64 *t0*, *t1*, *t2* + * - rems *t0*, *t1*, *t2* - | *t0* = *t1* % *t2* (signed) | Undefined behavior if division by zero or overflow. - * - remu_i32/i64 *t0*, *t1*, *t2* + * - remu *t0*, *t1*, *t2* - | *t0* = *t1* % *t2* (unsigned) | Undefined behavior if division by zero. + * - divs2 *q*, *r*, *nl*, *nh*, *d* + + - | *q* = *nh:nl* / *d* (signed) + | *r* = *nh:nl* % *d* + | Undefined behaviour if division by zero, or the double-word + numerator divided by the single-word divisor does not fit + within the single-word quotient. The code generator will + pass *nh* as a simple sign-extension of *nl*, so the only + overflow should be *INT_MIN* / -1. + + * - divu2 *q*, *r*, *nl*, *nh*, *d* + + - | *q* = *nh:nl* / *d* (unsigned) + | *r* = *nh:nl* % *d* + | Undefined behaviour if division by zero, or the double-word + numerator divided by the single-word divisor does not fit + within the single-word quotient. The code generator will + pass 0 to *nh* to make a simple zero-extension of *nl*, + so overflow should never occur. Logical ------- .. list-table:: - * - and_i32/i64 *t0*, *t1*, *t2* + * - and *t0*, *t1*, *t2* - | *t0* = *t1* & *t2* - * - or_i32/i64 *t0*, *t1*, *t2* + * - or *t0*, *t1*, *t2* - | *t0* = *t1* | *t2* - * - xor_i32/i64 *t0*, *t1*, *t2* + * - xor *t0*, *t1*, *t2* - | *t0* = *t1* ^ *t2* - * - not_i32/i64 *t0*, *t1* + * - not *t0*, *t1* - | *t0* = ~\ *t1* - * - andc_i32/i64 *t0*, *t1*, *t2* + * - andc *t0*, *t1*, *t2* - | *t0* = *t1* & ~\ *t2* - * - eqv_i32/i64 *t0*, *t1*, *t2* + * - eqv *t0*, *t1*, *t2* - | *t0* = ~(*t1* ^ *t2*), or equivalently, *t0* = *t1* ^ ~\ *t2* - * - nand_i32/i64 *t0*, *t1*, *t2* + * - nand *t0*, *t1*, *t2* - | *t0* = ~(*t1* & *t2*) - * - nor_i32/i64 *t0*, *t1*, *t2* + * - nor *t0*, *t1*, *t2* - | *t0* = ~(*t1* | *t2*) - * - orc_i32/i64 *t0*, *t1*, *t2* + * - orc *t0*, *t1*, *t2* - | *t0* = *t1* | ~\ *t2* - * - clz_i32/i64 *t0*, *t1*, *t2* + * - clz *t0*, *t1*, *t2* - | *t0* = *t1* ? clz(*t1*) : *t2* - * - ctz_i32/i64 *t0*, *t1*, *t2* + * - ctz *t0*, *t1*, *t2* - | *t0* = *t1* ? ctz(*t1*) : *t2* - * - ctpop_i32/i64 *t0*, *t1* + * - ctpop *t0*, *t1* - | *t0* = number of bits set in *t1* | - | With *ctpop* short for "count population", matching - | the function name used in ``include/qemu/host-utils.h``. + | The name *ctpop* is short for "count population", and matches + the function name used in ``include/qemu/host-utils.h``. Shifts/Rotates @@ -360,30 +379,30 @@ Shifts/Rotates .. list-table:: - * - shl_i32/i64 *t0*, *t1*, *t2* + * - shl *t0*, *t1*, *t2* - | *t0* = *t1* << *t2* - | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) + | Unspecified behavior for negative or out-of-range shifts. - * - shr_i32/i64 *t0*, *t1*, *t2* + * - shr *t0*, *t1*, *t2* - | *t0* = *t1* >> *t2* (unsigned) - | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) + | Unspecified behavior for negative or out-of-range shifts. - * - sar_i32/i64 *t0*, *t1*, *t2* + * - sar *t0*, *t1*, *t2* - | *t0* = *t1* >> *t2* (signed) - | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) + | Unspecified behavior for negative or out-of-range shifts. - * - rotl_i32/i64 *t0*, *t1*, *t2* + * - rotl *t0*, *t1*, *t2* - | Rotation of *t2* bits to the left - | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) + | Unspecified behavior for negative or out-of-range shifts. - * - rotr_i32/i64 *t0*, *t1*, *t2* + * - rotr *t0*, *t1*, *t2* - | Rotation of *t2* bits to the right. - | Unspecified behavior if *t2* < 0 or *t2* >= 32 (resp 64) + | Unspecified behavior for negative or out-of-range shifts. Misc @@ -391,26 +410,12 @@ Misc .. list-table:: - * - mov_i32/i64 *t0*, *t1* + * - mov *t0*, *t1* - | *t0* = *t1* - | Move *t1* to *t0* (both operands must have the same type). - - * - ext8s_i32/i64 *t0*, *t1* - - ext8u_i32/i64 *t0*, *t1* - - ext16s_i32/i64 *t0*, *t1* - - ext16u_i32/i64 *t0*, *t1* + | Move *t1* to *t0*. - ext32s_i64 *t0*, *t1* - - ext32u_i64 *t0*, *t1* - - - | 8, 16 or 32 bit sign/zero extension (both operands must have the same type) - - * - bswap16_i32/i64 *t0*, *t1*, *flags* + * - bswap16 *t0*, *t1*, *flags* - | 16 bit byte swap on the low bits of a 32/64 bit input. | @@ -420,24 +425,24 @@ Misc | | If neither ``TCG_BSWAP_OZ`` nor ``TCG_BSWAP_OS`` are set, then the bits of *t0* above bit 15 may contain any value. - * - bswap32_i64 *t0*, *t1*, *flags* - - - | 32 bit byte swap on a 64-bit value. The flags are the same as for bswap16, - except they apply from bit 31 instead of bit 15. + * - bswap32 *t0*, *t1*, *flags* - * - bswap32_i32 *t0*, *t1*, *flags* + - | 32 bit byte swap. The flags are the same as for bswap16, except + they apply from bit 31 instead of bit 15. On TCG_TYPE_I32, the + flags should be zero. - bswap64_i64 *t0*, *t1*, *flags* + * - bswap64 *t0*, *t1*, *flags* - - | 32/64 bit byte swap. The flags are ignored, but still present - for consistency with the other bswap opcodes. + - | 64 bit byte swap. The flags are ignored, but still present + for consistency with the other bswap opcodes. For future + compatibility, the flags should be zero. * - discard_i32/i64 *t0* - | Indicate that the value of *t0* won't be used later. It is useful to force dead code elimination. - * - deposit_i32/i64 *dest*, *t1*, *t2*, *pos*, *len* + * - deposit *dest*, *t1*, *t2*, *pos*, *len* - | Deposit *t2* as a bitfield into *t1*, placing the result in *dest*. | @@ -446,14 +451,16 @@ Misc | *len* - the length of the bitfield | *pos* - the position of the first bit, counting from the LSB | - | For example, "deposit_i32 dest, t1, t2, 8, 4" indicates a 4-bit field + | For example, "deposit dest, t1, t2, 8, 4" indicates a 4-bit field at bit 8. This operation would be equivalent to | | *dest* = (*t1* & ~0x0f00) | ((*t2* << 8) & 0x0f00) + | + | on TCG_TYPE_I32. - * - extract_i32/i64 *dest*, *t1*, *pos*, *len* + * - extract *dest*, *t1*, *pos*, *len* - sextract_i32/i64 *dest*, *t1*, *pos*, *len* + sextract *dest*, *t1*, *pos*, *len* - | Extract a bitfield from *t1*, placing the result in *dest*. | @@ -462,16 +469,16 @@ Misc to the left with zeros; for sextract_*, the result will be extended to the left with copies of the bitfield sign bit at *pos* + *len* - 1. | - | For example, "sextract_i32 dest, t1, 8, 4" indicates a 4-bit field + | For example, "sextract dest, t1, 8, 4" indicates a 4-bit field at bit 8. This operation would be equivalent to | | *dest* = (*t1* << 20) >> 28 | - | (using an arithmetic right shift). + | (using an arithmetic right shift) on TCG_TYPE_I32. - * - extract2_i32/i64 *dest*, *t1*, *t2*, *pos* + * - extract2 *dest*, *t1*, *t2*, *pos* - - | For N = {32,64}, extract an N-bit quantity from the concatenation + - | For TCG_TYPE_I{N}, extract an N-bit quantity from the concatenation of *t2*:*t1*, beginning at *pos*. The tcg_gen_extract2_{i32,i64} expander accepts 0 <= *pos* <= N as inputs. The backend code generator will not see either 0 or N as inputs for these opcodes. @@ -494,19 +501,19 @@ Conditional moves .. list-table:: - * - setcond_i32/i64 *dest*, *t1*, *t2*, *cond* + * - setcond *dest*, *t1*, *t2*, *cond* - | *dest* = (*t1* *cond* *t2*) | | Set *dest* to 1 if (*t1* *cond* *t2*) is true, otherwise set to 0. - * - negsetcond_i32/i64 *dest*, *t1*, *t2*, *cond* + * - negsetcond *dest*, *t1*, *t2*, *cond* - | *dest* = -(*t1* *cond* *t2*) | | Set *dest* to -1 if (*t1* *cond* *t2*) is true, otherwise set to 0. - * - movcond_i32/i64 *dest*, *c1*, *c2*, *v1*, *v2*, *cond* + * - movcond *dest*, *c1*, *c2*, *v1*, *v2*, *cond* - | *dest* = (*c1* *cond* *c2* ? *v1* : *v2*) | @@ -586,26 +593,79 @@ Multiword arithmetic support .. list-table:: - * - add2_i32/i64 *t0_low*, *t0_high*, *t1_low*, *t1_high*, *t2_low*, *t2_high* + * - addco *t0*, *t1*, *t2* + + - | Compute *t0* = *t1* + *t2* and in addition output to the + carry bit provided by the host architecture. + + * - addci *t0, *t1*, *t2* - sub2_i32/i64 *t0_low*, *t0_high*, *t1_low*, *t1_high*, *t2_low*, *t2_high* + - | Compute *t0* = *t1* + *t2* + *C*, where *C* is the + input carry bit provided by the host architecture. + The output carry bit need not be computed. - - | Similar to add/sub, except that the double-word inputs *t1* and *t2* are - formed from two single-word arguments, and the double-word output *t0* - is returned in two single-word outputs. + * - addcio *t0, *t1*, *t2* - * - mulu2_i32/i64 *t0_low*, *t0_high*, *t1*, *t2* + - | Compute *t0* = *t1* + *t2* + *C*, where *C* is the + input carry bit provided by the host architecture, + and also compute the output carry bit. + + * - addc1o *t0, *t1*, *t2* + + - | Compute *t0* = *t1* + *t2* + 1, and in addition output to the + carry bit provided by the host architecture. This is akin to + *addcio* with a fixed carry-in value of 1. + | This is intended to be used by the optimization pass, + intermediate to complete folding of the addition chain. + In some cases complete folding is not possible and this + opcode will remain until output. If this happens, the + code generator will use ``tcg_out_set_carry`` and then + the output routine for *addcio*. + + * - subbo *t0*, *t1*, *t2* + + - | Compute *t0* = *t1* - *t2* and in addition output to the + borrow bit provided by the host architecture. + | Depending on the host architecture, the carry bit may or may not be + identical to the borrow bit. Thus the addc\* and subb\* + opcodes must not be mixed. + + * - subbi *t0, *t1*, *t2* + + - | Compute *t0* = *t1* - *t2* - *B*, where *B* is the + input borrow bit provided by the host architecture. + The output borrow bit need not be computed. + + * - subbio *t0, *t1*, *t2* + + - | Compute *t0* = *t1* - *t2* - *B*, where *B* is the + input borrow bit provided by the host architecture, + and also compute the output borrow bit. + + * - subb1o *t0, *t1*, *t2* + + - | Compute *t0* = *t1* - *t2* - 1, and in addition output to the + borrow bit provided by the host architecture. This is akin to + *subbio* with a fixed borrow-in value of 1. + | This is intended to be used by the optimization pass, + intermediate to complete folding of the subtraction chain. + In some cases complete folding is not possible and this + opcode will remain until output. If this happens, the + code generator will use ``tcg_out_set_borrow`` and then + the output routine for *subbio*. + + * - mulu2 *t0_low*, *t0_high*, *t1*, *t2* - | Similar to mul, except two unsigned inputs *t1* and *t2* yielding the full double-word product *t0*. The latter is returned in two single-word outputs. - * - muls2_i32/i64 *t0_low*, *t0_high*, *t1*, *t2* + * - muls2 *t0_low*, *t0_high*, *t1*, *t2* - | Similar to mulu2, except the two inputs *t1* and *t2* are signed. - * - mulsh_i32/i64 *t0*, *t1*, *t2* + * - mulsh *t0*, *t1*, *t2* - muluh_i32/i64 *t0*, *t1*, *t2* + muluh *t0*, *t1*, *t2* - | Provide the high part of a signed or unsigned multiply, respectively. | @@ -684,8 +744,6 @@ QEMU specific operations qemu_st_i32/i64/i128 *t0*, *t1*, *flags*, *memidx* - qemu_st8_i32 *t0*, *t1*, *flags*, *memidx* - - | Load data at the guest address *t1* into *t0*, or store data in *t0* at guest address *t1*. The _i32/_i64/_i128 size applies to the size of the input/output register *t0* only. The address *t1* is always sized according to the guest, @@ -703,10 +761,6 @@ QEMU specific operations 64-bit memory access specified in *flags*. | | For qemu_ld/st_i128, these are only supported for a 64-bit host. - | - | For i386, qemu_st8_i32 is exactly like qemu_st_i32, except the size of - the memory operation is known to be 8-bit. This allows the backend to - provide a different set of register constraints. Host vector operations @@ -884,9 +938,9 @@ Assumptions The target word size (``TCG_TARGET_REG_BITS``) is expected to be 32 bit or 64 bit. It is expected that the pointer has the same size as the word. -On a 32 bit target, all 64 bit operations are converted to 32 bits. A -few specific operations must be implemented to allow it (see add2_i32, -sub2_i32, brcond2_i32). +On a 32 bit target, all 64 bit operations are converted to 32 bits. +A few specific operations must be implemented to allow it +(see brcond2_i32, setcond2_i32). On a 64 bit target, the values are transferred between 32 and 64-bit registers using the following ops: diff --git a/docs/devel/virtio-backends.rst b/docs/devel/virtio-backends.rst index 679d754..ebddc3b 100644 --- a/docs/devel/virtio-backends.rst +++ b/docs/devel/virtio-backends.rst @@ -119,7 +119,7 @@ manually instantiated: qdev_realize(vdev, BUS(&vpci_dev->bus), errp); } - static void virtio_blk_pci_class_init(ObjectClass *klass, void *data) + static void virtio_blk_pci_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); diff --git a/docs/igd-assign.txt b/docs/igd-assign.txt index e17bb50..3aed795 100644 --- a/docs/igd-assign.txt +++ b/docs/igd-assign.txt @@ -1,44 +1,69 @@ Intel Graphics Device (IGD) assignment with vfio-pci ==================================================== -IGD has two different modes for assignment using vfio-pci: - -1) Universal Pass-Through (UPT) mode: - - In this mode the IGD device is added as a *secondary* (ie. non-primary) - graphics device in combination with an emulated primary graphics device. - This mode *requires* guest driver support to remove the external - dependencies generally associated with IGD (see below). Those guest - drivers only support this mode for Broadwell and newer IGD, according to - Intel. Additionally, this mode by default, and as officially supported - by Intel, does not support direct video output. The intention is to use - this mode either to provide hardware acceleration to the emulated graphics - or to use this mode in combination with guest-based remote access software, - for example VNC (see below for optional output support). This mode - theoretically has no device specific handling dependencies on vfio-pci or - the VM firmware. - -2) "Legacy" mode: - - In this mode the IGD device is intended to be the primary and exclusive - graphics device in the VM[1], as such QEMU does not facilitate any sort - of remote graphics to the VM in this mode. A connected physical monitor - is the intended output device for IGD. This mode includes several - requirements and restrictions: - - * IGD must be given address 02.0 on the PCI root bus in the VM - * The host kernel must support vfio extensions for IGD (v4.6) - * vfio VGA support very likely needs to be enabled in the host kernel - * The VM firmware must support specific fw_cfg enablers for IGD - * The VM machine type must support a PCI host bridge at 00.0 (standard) - * The VM machine type must provide or allow to be created a special - ISA/LPC bridge device (vfio-pci-igd-lpc-bridge) on the root bus at - PCI address 1f.0. - * The IGD device must have a VGA ROM, either provided via the romfile - option or loaded automatically through vfio (standard). rombar=0 - will disable legacy mode support. - * Hotplug of the IGD device is not supported. - * The IGD device must be a SandyBridge or newer model device. +Using vfio-pci, we can passthrough Intel Graphics Device (IGD) to guest, either +serve as primary and exclusive graphics adapter, or used in combination with an +emulated primary graphics device, depending on the config and guest driver +support. However, IGD devices are not "clean" PCI devices, they use extra +memory regions other than BARs. Special handling is required to make them work +properly, including: + +* OpRegion for accessing Virtual BIOS Table (VBT) that contains display output + information. +* Data Stolen Memory (DSM) region used as VRAM at early stage (BIOS/UEFI) + +Certain guest software also depends on following conditions to work: +(*-Required by) + +| Condition | Linux | Windows | VBIOS | EFI GOP | +|---------------------------------------------|-------|---------|-------|---------| +| #1 IGD has a valid OpRegion containing VBT | * ^1 | * | * | * | +| #2 VID/DID of LPC bridge at 00:1f.0 matches | | | * | * | +| #3 IGD is assigned to BDF 00:02.0 | | | * | * | +| #4 IGD has VGA controller device class | | | * | * | +| #5 Host's VGA ranges are mapped to IGD | | | * | | +| #6 Guest has valid VBIOS or UEFI Option ROM | | | * | * | + +^1 Though i915 driver is able to mock a OpRegion, it is still recommended to + use the VBT copied from host OpRegion to prevent incorrect configuration. + +For #1, the "x-igd-opregion=on" option exposes a copy of host IGD OpRegion to +guest via fw_cfg, where guest firmware can set up guest OpRegion with it. + +For #2, "x-igd-lpc=on" option copies the IDs of host LPC bridge and host bridge +to guest. Currently this is only supported on i440fx machines as there is +already an ICH9 LPC bridge present on q35 machines, overwriting its IDs may +lead to unexpected behavior. + +For #3, "addr=2.0" assigns IGD to 00:02.0. + +For #4, the primary display must be set to IGD in host BIOS. + +For #5, "x-vga=on" enables guest access to standard VGA IO/MMIO ranges. + +For #6, ROM either provided via the ROM BAR or romfile= option is needed, this +Intel document [1] shows how to dump VBIOS to file. For UEFI Option ROM, see +"Guest firmware" section. + +QEMU also provides a "Legacy" mode that implicitly enables full functionality +on IGD, it is automatically enabled when +* Machine type is i440fx +* IGD is assigned to guest BDF 00:02.0 +* ROM BAR or romfile is present + +In "Legacy" mode, QEMU will automatically setup OpRegion, LPC bridge IDs and +VGA range access, which is equivalent to: + x-igd-opregion=on,x-igd-lpc=on,x-vga=on + +By default, "Legacy" mode won't fail, it continues on error. User can set +"x-igd-legacy-mode=on" to force enabling legacy mode, this also checks if the +conditions above for legacy mode is met, and if any error occurs, QEMU will +fail immediately. Users can also set "x-igd-legacy-mode=off" to disable legacy +mode. + +In legacy mode, as the guest VGA ranges are assigned to IGD device, all other +graphics devices should be removed, this can be done using "-nographic" or +"-vga none" or "-nodefaults", along with adding the device using vfio-pci. For either mode, depending on the host kernel, the i915 driver in the host may generate faults and errors upon re-binding to an IGD device after it @@ -73,31 +98,39 @@ DVI, or DisplayPort) may be unsupported in some use cases. In the author's experience, even DP to VGA adapters can be troublesome while adapters between digital formats work well. -Usage -===== -The intention is for IGD assignment to be transparent for users and thus for -management tools like libvirt. To make use of legacy mode, simply remove all -other graphics options and use "-nographic" and either "-vga none" or -"-nodefaults", along with adding the device using vfio-pci: - -device vfio-pci,host=00:02.0,id=hostdev0,bus=pci.0,addr=0x2 +Options +======= +* x-igd-opregion=[on|*off*] + Copy host IGD OpRegion and expose it to guest with fw_cfg + +* x-igd-lpc=[on|*off*] + Creates a dummy LPC bridge at 00:1f:0 with host VID/DID (i440fx only) + +* x-igd-legacy-mode=[on|off|*auto*] + Enable/Disable legacy mode + +* x-igd-gms=[hex, default 0] + Overriding DSM region size in GGC register, 0 means uses host value. + Use this only when the DSM size cannot be changed through the + 'DVMT Pre-Allocated' option in host BIOS. -For UPT mode, retain the default emulated graphics and simply add the vfio-pci -device making use of any other bus address other than 02.0. libvirt will -default to assigning the device a UPT compatible address while legacy mode -users will need to manually edit the XML if using a tool like virt-manager -where the VM device address is not expressly specified. -An experimental vfio-pci option also exists to enable OpRegion, and thus -external monitor support, for UPT mode. This can be enabled by adding -"x-igd-opregion=on" to the vfio-pci device options for the IGD device. As -with legacy mode, this requires the host to support features introduced in -the v4.6 kernel. If Intel chooses to embrace this support, the option may -be made non-experimental in the future, opening it to libvirt support. +Examples +======== +* Adding IGD with automatically legacy mode support + -device vfio-pci,host=00:02.0,id=hostdev0,addr=2.0 -Developer ABI -============= -Legacy mode IGD support imposes two fw_cfg requirements on the VM firmware: +* Adding IGD with OpRegion and LPC ID hack, but without VGA ranges + (For UEFI guests) + -device vfio-pci,host=00:02.0,id=hostdev0,addr=2.0,x-igd-legacy-mode=off,x-igd-opregion=on,x-igd-lpc=on,romfile=efi_oprom.rom + + +Guest firmware +============== +Guest firmware is responsible for setting up OpRegion and Base of Data Stolen +Memory (BDSM) in guest address space. IGD passthrough support imposes two +fw_cfg requirements on the VM firmware: 1) "etc/igd-opregion" @@ -117,17 +150,111 @@ Legacy mode IGD support imposes two fw_cfg requirements on the VM firmware: Firmware must allocate a reserved memory below 4GB with required 1MB alignment equal to this size. Additionally the base address of this reserved region must be written to the dword BDSM register in PCI config - space of the IGD device at offset 0x5C. As this support is related to - running the IGD ROM, which has other dependencies on the device appearing - at guest address 00:02.0, it's expected that this fw_cfg file is only - relevant to a single PCI class VGA device with Intel vendor ID, appearing - at PCI bus address 00:02.0. + space of the IGD device at offset 0x5C (or 0xC0 for Gen 11+ devices using + 64-bit BDSM). As this support is related to running the IGD ROM, which + has other dependencies on the device appearing at guest address 00:02.0, + it's expected that this fw_cfg file is only relevant to a single PCI + class VGA device with Intel vendor ID, appearing at PCI bus address 00:02.0. + +Upstream Seabios has OpRegion and BDSM (pre-Gen11 device only) support. +However, the support is not accepted by upstream EDK2/OVMF. A recommended +solution is to create a virtual OpRom with following DXE drivers: + +* IgdAssignmentDxe: Set up OpRegion and BDSM according to fw_cfg (must) +* IntelGopDriver: Closed-source Intel GOP driver +* PlatformGopPolicy: Protocol required by IntelGopDriver + +IntelGopDriver and PlatformGopPolicy is only required when enabling GOP on IGD. + +The original IgdAssignmentDxe can be found at [3]. A Intel maintained version +with PlatformGopPolicy for industrial computing is at [4]. There is also an +unofficially maintained version with newer Gen11+ device support at [5]. +You need to build them with EDK2. + +For the IntelGopDriver, Intel never released it to public. You may contact +Intel support to get one as [4] said, if you are an Intel Premier Support +customer, or you can try extracting it from your host firmware using +"UEFI BIOS Updater"[6]. + +Once you got all the required DXE drivers, a Option ROM can be generated with +EfiRom utility in EDK2, using + EfiRom -f 0x8086 -i <Device ID of your IGD> -o output.rom \ + -e IgdAssignmentDxe.efi PlatformGOPPolicy.efi IntelGopDriver.efi + + +Known issues +============ +When using OVMF as guest firmware, you may encounter the following warning: +warning: vfio_container_dma_map(0x55fab36ce610, 0x380010000000, 0x108000, 0x7fd336000000) = -22 (Invalid argument) + +Solution: +Set the host physical address bits to IOMMU address width using + -cpu host,host-phys-bits-limit=<IOMMU address width> +Or in libvirt XML with + <cpu> + <maxphysaddr mode='passthrough' limit='<IOMMU address width>'/> + </cpu> +The IOMMU address width can be determined with + echo $(( ((0x$(cat /sys/devices/virtual/iommu/dmar0/intel-iommu/cap) & 0x3F0000) >> 16) + 1 )) +Refer https://edk2.groups.io/g/devel/topic/patch_v1/102359124 for more details + + +Memory View +=========== +IGD has it own address space. To use system RAM as VRAM, a single-level page +table named Global Graphics Translation Table (GTT) is used for the address +translation. Each page table entry points a 4KB page. Illustration below shows +the translation flow on IGD with 64-bit GTT PTEs. + +(PTE_SIZE == 8) +-------------+---+ + | Address | V | V: Valid Bit + +-------------+---+ + | ... | | +IGD:0x01ae9010 0xd740| 0x70ffc000 | 1 | Mem:0x42ba3e010^ +-----------------------> 0xd748| 0x42ba3e000 | 1 +------------------> +(addr >> 12) * PTE_SIZE 0xd750| 0x42ba3f000 | 1 | + | ... | | + +-------------+---+ +^ The address may be remapped by IOMMU + +The memory region store GTT is called GTT Stolen Memory (GSM) it is located +right below the Data Stolen Memory (DSM). Accessing this region directly is +not allowed, any access will immediately freeze the whole system. The only way +to access it is through the second half of MMIO BAR0. + +The Data Stolen Memory is reserved by firmware, and acts as the VRAM in pre-OS +environments. In QEMU, guest firmware (Seabios/OVMF) is responsible for +reserving a continuous region and program its base address to BDSM register, +then let VBIOS/GOP driver initializing this region. Illustration below shows +how DSM is mapped. + + IGD Addr Space Host Addr Space Guest Addr Space + +-------------+ +-------------+ +-------------+ + | | | | | | + | | | | | | + | | +-------------+ +-------------+ + | | | Data Stolen | | Data Stolen | + | | | (Guest) | | (Guest) | + | | +------------>+-------------+<------->+-------------+<--Guest BDSM + | | | Passthrough | | EPT | | Emulated by QEMU +DSMSIZE+-------------+ | with IOMMU | | Mapping | | Programmed by guest FW + | | | | | | | + | | | | | | | + 0+-------------+--+ | | | | + | +-------------+ | | + | | Data Stolen | +-------------+ + | | (Host) | + +------------>+-------------+<--Host BDSM + Non- | | "real" one in HW + Passthrough | | Programmed by host FW + +-------------+ Footnotes ========= -[1] Nothing precludes adding additional emulated or assigned graphics devices - as non-primary, other than the combination typically not working. I only - intend to set user expectations, others are welcome to find working - combinations or fix whatever issues prevent this from working in the common - case. +[1] https://www.intel.com/content/www/us/en/docs/graphics-for-linux/developer-reference/1-0/dump-video-bios.html [2] # echo "vfio-pci" > /sys/bus/pci/devices/0000:00:02.0/driver_override +[3] https://web.archive.org/web/20240827012422/https://bugzilla.tianocore.org/show_bug.cgi?id=935 + Tianocore bugzilla was down since Jan 2025 :( +[4] https://eci.intel.com/docs/3.3/components/kvm-hypervisor.html, Patch 0001-0004 +[5] https://github.com/tomitamoeko/VfioIgdPkg +[6] https://winraid.level1techs.com/t/tool-guide-news-uefi-bios-updater-ubu/30357 |