aboutsummaryrefslogtreecommitdiff
path: root/hw/i386/intel_iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'hw/i386/intel_iommu.c')
-rw-r--r--hw/i386/intel_iommu.c539
1 files changed, 482 insertions, 57 deletions
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 69d72ad..6a168d5 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -45,6 +45,8 @@
((ce)->val[1] & VTD_SM_CONTEXT_ENTRY_RID2PASID_MASK)
#define VTD_CE_GET_PASID_DIR_TABLE(ce) \
((ce)->val[0] & VTD_PASID_DIR_BASE_ADDR_MASK)
+#define VTD_CE_GET_PRE(ce) \
+ ((ce)->val[0] & VTD_SM_CONTEXT_ENTRY_PRE)
/* pe operations */
#define VTD_PE_GET_TYPE(pe) ((pe)->val[0] & VTD_SM_PASID_ENTRY_PGTT)
@@ -85,13 +87,6 @@ struct vtd_iotlb_key {
static void vtd_address_space_refresh_all(IntelIOMMUState *s);
static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
-static void vtd_panic_require_caching_mode(void)
-{
- error_report("We need to set caching-mode=on for intel-iommu to enable "
- "device assignment with IOMMU protection.");
- exit(1);
-}
-
static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
uint64_t wmask, uint64_t w1cmask)
{
@@ -1838,6 +1833,7 @@ static const bool vtd_qualified_faults[] = {
[VTD_FR_FS_NON_CANONICAL] = true,
[VTD_FR_FS_PAGING_ENTRY_US] = true,
[VTD_FR_SM_WRITE] = true,
+ [VTD_FR_SM_PRE_ABS] = true,
[VTD_FR_SM_INTERRUPT_ADDR] = true,
[VTD_FR_FS_BIT_UPDATE_FAILED] = true,
[VTD_FR_MAX] = false,
@@ -1987,9 +1983,9 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce,
uint32_t pasid)
{
dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid);
- uint32_t level = vtd_get_iova_level(s, ce, pasid);
uint32_t offset;
uint64_t flpte, flag_ad = VTD_FL_A;
+ *flpte_level = vtd_get_iova_level(s, ce, pasid);
if (!vtd_iova_fl_check_canonical(s, iova, ce, pasid)) {
error_report_once("%s: detected non canonical IOVA (iova=0x%" PRIx64 ","
@@ -1998,11 +1994,11 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce,
}
while (true) {
- offset = vtd_iova_level_offset(iova, level);
+ offset = vtd_iova_level_offset(iova, *flpte_level);
flpte = vtd_get_pte(addr, offset);
if (flpte == (uint64_t)-1) {
- if (level == vtd_get_iova_level(s, ce, pasid)) {
+ if (*flpte_level == vtd_get_iova_level(s, ce, pasid)) {
/* Invalid programming of pasid-entry */
return -VTD_FR_PASID_ENTRY_FSPTPTR_INV;
} else {
@@ -2028,15 +2024,15 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce,
if (is_write && !(flpte & VTD_FL_RW)) {
return -VTD_FR_SM_WRITE;
}
- if (vtd_flpte_nonzero_rsvd(flpte, level)) {
+ if (vtd_flpte_nonzero_rsvd(flpte, *flpte_level)) {
error_report_once("%s: detected flpte reserved non-zero "
"iova=0x%" PRIx64 ", level=0x%" PRIx32
"flpte=0x%" PRIx64 ", pasid=0x%" PRIX32 ")",
- __func__, iova, level, flpte, pasid);
+ __func__, iova, *flpte_level, flpte, pasid);
return -VTD_FR_FS_PAGING_ENTRY_RSVD;
}
- if (vtd_is_last_pte(flpte, level) && is_write) {
+ if (vtd_is_last_pte(flpte, *flpte_level) && is_write) {
flag_ad |= VTD_FL_D;
}
@@ -2044,14 +2040,13 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce,
return -VTD_FR_FS_BIT_UPDATE_FAILED;
}
- if (vtd_is_last_pte(flpte, level)) {
+ if (vtd_is_last_pte(flpte, *flpte_level)) {
*flptep = flpte;
- *flpte_level = level;
return 0;
}
addr = vtd_get_pte_addr(flpte, aw_bits);
- level--;
+ (*flpte_level)--;
}
}
@@ -2092,7 +2087,8 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
uint8_t bus_num = pci_bus_num(bus);
VTDContextCacheEntry *cc_entry;
uint64_t pte, page_mask;
- uint32_t level, pasid = vtd_as->pasid;
+ uint32_t level = UINT32_MAX;
+ uint32_t pasid = vtd_as->pasid;
uint16_t source_id = PCI_BUILD_BDF(bus_num, devfn);
int ret_fr;
bool is_fpd_set = false;
@@ -2251,14 +2247,19 @@ out:
entry->iova = addr & page_mask;
entry->translated_addr = vtd_get_pte_addr(pte, s->aw_bits) & page_mask;
entry->addr_mask = ~page_mask;
- entry->perm = access_flags;
+ entry->perm = (is_write ? access_flags : (access_flags & (~IOMMU_WO)));
return true;
error:
vtd_iommu_unlock(s);
entry->iova = 0;
entry->translated_addr = 0;
- entry->addr_mask = 0;
+ /*
+ * Set the mask for ATS (the range must be present even when the
+ * translation fails : PCIe rev 5 10.2.3.5)
+ */
+ entry->addr_mask = (level != UINT32_MAX) ?
+ (~vtd_pt_level_page_mask(level)) : (~VTD_PAGE_MASK_4K);
entry->perm = IOMMU_NONE;
return false;
}
@@ -2503,6 +2504,7 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
.translated_addr = 0,
.addr_mask = size - 1,
.perm = IOMMU_NONE,
+ .pasid = vtd_as->pasid,
},
};
memory_region_notify_iommu(&vtd_as->iommu, 0, event);
@@ -2695,7 +2697,7 @@ static void vtd_handle_gcmd_write(IntelIOMMUState *s)
uint32_t changed = status ^ val;
trace_vtd_reg_write_gcmd(status, val);
- if ((changed & VTD_GCMD_TE) && s->dma_translation) {
+ if ((changed & VTD_GCMD_TE) && x86_iommu->dma_translation) {
/* Translation enable/disable */
vtd_handle_gcmd_te(s, val & VTD_GCMD_TE);
}
@@ -2822,6 +2824,7 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
{
uint64_t mask[4] = {VTD_INV_DESC_WAIT_RSVD_LO, VTD_INV_DESC_WAIT_RSVD_HI,
VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE};
+ bool ret = true;
if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, false,
__func__, "wait")) {
@@ -2833,8 +2836,6 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
uint32_t status_data = (uint32_t)(inv_desc->lo >>
VTD_INV_DESC_WAIT_DATA_SHIFT);
- assert(!(inv_desc->lo & VTD_INV_DESC_WAIT_IF));
-
/* FIXME: need to be masked with HAW? */
dma_addr_t status_addr = inv_desc->hi;
trace_vtd_inv_desc_wait_sw(status_addr, status_data);
@@ -2843,18 +2844,28 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
&status_data, sizeof(status_data),
MEMTXATTRS_UNSPECIFIED)) {
trace_vtd_inv_desc_wait_write_fail(inv_desc->hi, inv_desc->lo);
- return false;
+ ret = false;
}
- } else if (inv_desc->lo & VTD_INV_DESC_WAIT_IF) {
+ }
+
+ if (inv_desc->lo & VTD_INV_DESC_WAIT_IF) {
/* Interrupt flag */
vtd_generate_completion_event(s);
- } else {
+ }
+
+ /*
+ * SW=0, IF=0, FN=1 is also a valid descriptor (VT-d 7.10)
+ * Nothing to do as we process the descriptors in order
+ */
+
+ if (!(inv_desc->lo & (VTD_INV_DESC_WAIT_IF | VTD_INV_DESC_WAIT_SW |
+ VTD_INV_DESC_WAIT_FN))) {
error_report_once("%s: invalid wait desc: hi=%"PRIx64", lo=%"PRIx64
" (unknown type)", __func__, inv_desc->hi,
inv_desc->lo);
return false;
}
- return true;
+ return ret;
}
static bool vtd_process_context_cache_desc(IntelIOMMUState *s,
@@ -3090,6 +3101,7 @@ static void do_invalidate_device_tlb(VTDAddressSpace *vtd_dev_as,
event.entry.iova = addr;
event.entry.perm = IOMMU_NONE;
event.entry.translated_addr = 0;
+ event.entry.pasid = vtd_dev_as->pasid;
memory_region_notify_iommu(&vtd_dev_as->iommu, 0, event);
}
@@ -3136,6 +3148,59 @@ static bool vtd_process_device_piotlb_desc(IntelIOMMUState *s,
return true;
}
+static bool vtd_process_page_group_response_desc(IntelIOMMUState *s,
+ VTDInvDesc *inv_desc)
+{
+ VTDAddressSpace *vtd_dev_as;
+ bool pasid_present;
+ uint8_t response_code;
+ uint16_t rid;
+ uint32_t pasid;
+ uint16_t prgi;
+ IOMMUPRIResponse response;
+
+ if ((inv_desc->lo & VTD_INV_DESC_PGRESP_RSVD_LO) ||
+ (inv_desc->hi & VTD_INV_DESC_PGRESP_RSVD_HI)) {
+ error_report_once("%s: invalid page group response desc: hi=%"PRIx64
+ ", lo=%"PRIx64" (reserved nonzero)", __func__,
+ inv_desc->hi, inv_desc->lo);
+ return false;
+ }
+
+ pasid_present = VTD_INV_DESC_PGRESP_PP(inv_desc->lo);
+ response_code = VTD_INV_DESC_PGRESP_RC(inv_desc->lo);
+ rid = VTD_INV_DESC_PGRESP_RID(inv_desc->lo);
+ pasid = VTD_INV_DESC_PGRESP_PASID(inv_desc->lo);
+ prgi = VTD_INV_DESC_PGRESP_PRGI(inv_desc->hi);
+
+ if (!pasid_present) {
+ error_report_once("Page group response without PASID is"
+ "not supported yet");
+ return false;
+ }
+
+ vtd_dev_as = vtd_get_as_by_sid_and_pasid(s, rid, pasid);
+ if (!vtd_dev_as) {
+ return true;
+ }
+
+ response.prgi = prgi;
+
+ if (response_code == 0x0u) {
+ response.response_code = IOMMU_PRI_RESP_SUCCESS;
+ } else if (response_code == 0x1u) {
+ response.response_code = IOMMU_PRI_RESP_INVALID_REQUEST;
+ } else {
+ response.response_code = IOMMU_PRI_RESP_FAILURE;
+ }
+
+ if (vtd_dev_as->pri_notifier) {
+ vtd_dev_as->pri_notifier->notify(vtd_dev_as->pri_notifier, &response);
+ }
+
+ return true;
+}
+
static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
VTDInvDesc *inv_desc)
{
@@ -3236,6 +3301,13 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
}
break;
+ case VTD_INV_DESC_PGRESP:
+ trace_vtd_inv_desc("page group response", inv_desc.hi, inv_desc.lo);
+ if (!vtd_process_page_group_response_desc(s, &inv_desc)) {
+ return false;
+ }
+ break;
+
/*
* TODO: the entity of below two cases will be implemented in future series.
* To make guest (which integrates scalable mode support patch set in
@@ -3370,6 +3442,27 @@ static void vtd_handle_iectl_write(IntelIOMMUState *s)
}
}
+static void vtd_handle_prs_write(IntelIOMMUState *s)
+{
+ uint32_t prs = vtd_get_long_raw(s, DMAR_PRS_REG);
+ if (!(prs & VTD_PR_STATUS_PPR) && !(prs & VTD_PR_STATUS_PRO)) {
+ vtd_set_clear_mask_long(s, DMAR_PECTL_REG, VTD_PR_PECTL_IP, 0);
+ }
+}
+
+static void vtd_handle_pectl_write(IntelIOMMUState *s)
+{
+ uint32_t pectl = vtd_get_long_raw(s, DMAR_PECTL_REG);
+ if ((pectl & VTD_PR_PECTL_IP) && !(pectl & VTD_PR_PECTL_IM)) {
+ /*
+ * If IP field was 1 when software clears the IM field,
+ * the interrupt is generated along with clearing the IP field.
+ */
+ vtd_set_clear_mask_long(s, DMAR_PECTL_REG, VTD_PR_PECTL_IP, 0);
+ vtd_generate_interrupt(s, DMAR_PEADDR_REG, DMAR_PEDATA_REG);
+ }
+}
+
static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size)
{
IntelIOMMUState *s = opaque;
@@ -3412,6 +3505,11 @@ static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size)
val = s->iq >> 32;
break;
+ case DMAR_PEUADDR_REG:
+ assert(size == 4);
+ val = vtd_get_long_raw(s, DMAR_PEUADDR_REG);
+ break;
+
default:
if (size == 4) {
val = vtd_get_long(s, addr);
@@ -3475,6 +3573,11 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
vtd_handle_iotlb_write(s);
break;
+ case DMAR_PEUADDR_REG:
+ assert(size == 4);
+ vtd_set_long(s, addr, val);
+ break;
+
/* Invalidate Address Register, 64-bit */
case DMAR_IVA_REG:
if (size == 4) {
@@ -3655,6 +3758,18 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
vtd_set_long(s, addr, val);
break;
+ case DMAR_PRS_REG:
+ assert(size == 4);
+ vtd_set_long(s, addr, val);
+ vtd_handle_prs_write(s);
+ break;
+
+ case DMAR_PECTL_REG:
+ assert(size == 4);
+ vtd_set_long(s, addr, val);
+ vtd_handle_pectl_write(s);
+ break;
+
default:
if (size == 4) {
vtd_set_long(s, addr, val);
@@ -3672,6 +3787,7 @@ static IOMMUTLBEntry vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
IOMMUTLBEntry iotlb = {
/* We'll fill in the rest later. */
.target_as = &address_space_memory,
+ .pasid = vtd_as->pasid,
};
bool success;
@@ -3824,7 +3940,6 @@ static const Property vtd_properties[] = {
DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false),
DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false),
DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
- DEFINE_PROP_BOOL("dma-translation", IntelIOMMUState, dma_translation, true),
DEFINE_PROP_BOOL("stale-tm", IntelIOMMUState, stale_tm, false),
DEFINE_PROP_BOOL("fs1gp", IntelIOMMUState, fs1gp, true),
};
@@ -4367,6 +4482,12 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
assert(hiod);
+ if (!s->caching_mode) {
+ error_setg(errp, "Device assignment is not allowed without enabling "
+ "caching-mode=on for Intel IOMMU.");
+ return false;
+ }
+
vtd_iommu_lock(s);
if (g_hash_table_lookup(s->vtd_host_iommu_dev, &key)) {
@@ -4538,11 +4659,11 @@ static void vtd_cap_init(IntelIOMMUState *s)
s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND |
VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS |
- VTD_CAP_MGAW(s->aw_bits);
+ VTD_CAP_ESRTPS | VTD_CAP_MGAW(s->aw_bits);
if (s->dma_drain) {
s->cap |= VTD_CAP_DRAIN;
}
- if (s->dma_translation) {
+ if (x86_iommu->dma_translation) {
if (s->aw_bits >= VTD_HOST_AW_39BIT) {
s->cap |= VTD_CAP_SAGAW_39bit;
}
@@ -4587,7 +4708,7 @@ static void vtd_cap_init(IntelIOMMUState *s)
}
if (s->pasid) {
- s->ecap |= VTD_ECAP_PASID;
+ s->ecap |= VTD_ECAP_PASID | VTD_ECAP_PSS;
}
}
@@ -4705,6 +4826,18 @@ static void vtd_init(IntelIOMMUState *s)
* Interrupt remapping registers.
*/
vtd_define_quad(s, DMAR_IRTA_REG, 0, 0xfffffffffffff80fULL, 0);
+
+ /* Page request registers */
+ if (s->ecap & VTD_ECAP_PRS) {
+ vtd_define_quad(s, DMAR_PQH_REG, 0, 0x7ffe0ULL, 0);
+ vtd_define_quad(s, DMAR_PQT_REG, 0, 0x7ffe0ULL, 0);
+ vtd_define_quad(s, DMAR_PQA_REG, 0, 0xfffffffffffff007ULL, 0);
+ vtd_define_long(s, DMAR_PRS_REG, 0, 0, 0x3UL);
+ vtd_define_long(s, DMAR_PECTL_REG, 0, 0x80000000UL, 0);
+ vtd_define_long(s, DMAR_PEDATA_REG, 0, 0xffffUL, 0);
+ vtd_define_long(s, DMAR_PEADDR_REG, 0, 0xfffffffcUL, 0);
+ vtd_define_long(s, DMAR_PEUADDR_REG, 0, 0xffffffffUL, 0);
+ }
}
/* Should not reset address_spaces when reset because devices will still use
@@ -4730,10 +4863,329 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
return &vtd_as->as;
}
+static IOMMUTLBEntry vtd_iommu_ats_do_translate(IOMMUMemoryRegion *iommu,
+ hwaddr addr,
+ IOMMUAccessFlags flags)
+{
+ IOMMUTLBEntry entry;
+ VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
+
+ if (vtd_is_interrupt_addr(addr)) {
+ vtd_report_ir_illegal_access(vtd_as, addr, flags & IOMMU_WO);
+ entry.target_as = &address_space_memory;
+ entry.iova = 0;
+ entry.translated_addr = 0;
+ entry.addr_mask = ~VTD_PAGE_MASK_4K;
+ entry.perm = IOMMU_NONE;
+ entry.pasid = PCI_NO_PASID;
+ } else {
+ entry = vtd_iommu_translate(iommu, addr, flags, 0);
+ }
+
+ return entry;
+}
+
+static ssize_t vtd_ats_request_translation(PCIBus *bus, void *opaque,
+ int devfn, uint32_t pasid,
+ bool priv_req, bool exec_req,
+ hwaddr addr, size_t length,
+ bool no_write, IOMMUTLBEntry *result,
+ size_t result_length,
+ uint32_t *err_count)
+{
+ IntelIOMMUState *s = opaque;
+ VTDAddressSpace *vtd_as;
+ IOMMUAccessFlags flags = IOMMU_ACCESS_FLAG_FULL(true, !no_write, exec_req,
+ priv_req, false, false);
+ ssize_t res_index = 0;
+ hwaddr target_address = addr + length;
+ IOMMUTLBEntry entry;
+
+ vtd_as = vtd_find_add_as(s, bus, devfn, pasid);
+ *err_count = 0;
+
+ while ((addr < target_address) && (res_index < result_length)) {
+ entry = vtd_iommu_ats_do_translate(&vtd_as->iommu, addr, flags);
+ entry.perm &= ~IOMMU_GLOBAL; /* Spec 4.1.2: Global Mapping never set */
+
+ if ((entry.perm & flags) != flags) {
+ *err_count += 1; /* Less than expected */
+ }
+
+ result[res_index] = entry;
+ res_index += 1;
+ addr = (addr & (~entry.addr_mask)) + (entry.addr_mask + 1);
+ }
+
+ /* Buffer too small */
+ if (addr < target_address) {
+ return -ENOMEM;
+ }
+
+ return res_index;
+}
+
+/* 11.4.11.3 : The number of entries in the page request queue is 2^(PQS + 7) */
+static inline uint64_t vtd_prq_size(IntelIOMMUState *s)
+{
+ return 1ULL << ((vtd_get_quad(s, DMAR_PQA_REG) & VTD_PQA_SIZE) + 7);
+}
+
+/**
+ * Return true if the bit is accessible and correctly set, false otherwise
+ */
+static bool vtd_check_pre_bit(VTDAddressSpace *vtd_as, hwaddr addr,
+ uint16_t sid, bool is_write)
+{
+ int ret;
+ IntelIOMMUState *s = vtd_as->iommu_state;
+ uint8_t bus_n = pci_bus_num(vtd_as->bus);
+ VTDContextEntry ce;
+ bool is_fpd_set = false;
+
+ ret = vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce);
+
+ if (ret) {
+ goto error_report;
+ }
+
+ if (!VTD_CE_GET_PRE(&ce)) {
+ ret = -VTD_FR_SM_PRE_ABS;
+ goto error_get_fpd_and_report;
+ }
+
+ return true;
+
+error_get_fpd_and_report:
+ /* Try to get fpd (may not work but we are already on an error path) */
+ is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
+ vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, vtd_as->pasid);
+error_report:
+ vtd_report_fault(s, -ret, is_fpd_set, sid, addr, is_write,
+ vtd_as->pasid != PCI_NO_PASID, vtd_as->pasid);
+ return false;
+}
+
+/* Logic described in section 7.5 */
+static void vtd_generate_page_request_event(IntelIOMMUState *s,
+ uint32_t old_pr_status)
+{
+ uint32_t current_pectl = vtd_get_long(s, DMAR_PECTL_REG);
+ /*
+ * Hardware evaluates PPR and PRO fields in the Page Request Status Register
+ * and if any of them is set, Page Request Event is not generated
+ */
+ if (old_pr_status & (VTD_PR_STATUS_PRO | VTD_PR_STATUS_PPR)) {
+ return;
+ }
+
+ vtd_set_clear_mask_long(s, DMAR_PECTL_REG, 0, VTD_PR_PECTL_IP);
+ if (!(current_pectl & VTD_PR_PECTL_IM)) {
+ vtd_set_clear_mask_long(s, DMAR_PECTL_REG, VTD_PR_PECTL_IP, 0);
+ vtd_generate_interrupt(s, DMAR_PEADDR_REG, DMAR_PEDATA_REG);
+ }
+}
+
+/* When calling this function, we known that we are in scalable mode */
+static int vtd_pri_perform_implicit_invalidation(VTDAddressSpace *vtd_as,
+ hwaddr addr)
+{
+ IntelIOMMUState *s = vtd_as->iommu_state;
+ VTDContextEntry ce;
+ VTDPASIDEntry pe;
+ uint16_t pgtt;
+ uint16_t domain_id;
+ int ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
+ vtd_as->devfn, &ce);
+ if (ret) {
+ return -EINVAL;
+ }
+ ret = vtd_ce_get_rid2pasid_entry(s, &ce, &pe, vtd_as->pasid);
+ if (ret) {
+ return -EINVAL;
+ }
+ pgtt = VTD_PE_GET_TYPE(&pe);
+ domain_id = VTD_SM_PASID_ENTRY_DID(pe.val[1]);
+ ret = 0;
+ switch (pgtt) {
+ case VTD_SM_PASID_ENTRY_FLT:
+ vtd_piotlb_page_invalidate(s, domain_id, vtd_as->pasid, addr, 0);
+ break;
+ /* Room for other pgtt values */
+ default:
+ error_report_once("Translation type not supported yet : %d", pgtt);
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+/* Page Request Descriptor : 7.4.1.1 */
+static int vtd_pri_request_page(PCIBus *bus, void *opaque, int devfn,
+ uint32_t pasid, bool priv_req, bool exec_req,
+ hwaddr addr, bool lpig, uint16_t prgi,
+ bool is_read, bool is_write)
+{
+ IntelIOMMUState *s = opaque;
+ VTDAddressSpace *vtd_as;
+
+ vtd_as = vtd_find_add_as(s, bus, devfn, pasid);
+
+ uint64_t queue_addr_reg = vtd_get_quad(s, DMAR_PQA_REG);
+ uint64_t queue_tail_offset_reg = vtd_get_quad(s, DMAR_PQT_REG);
+ uint64_t new_queue_tail_offset = (
+ (queue_tail_offset_reg + VTD_PQA_ENTRY_SIZE) %
+ (vtd_prq_size(s) * VTD_PQA_ENTRY_SIZE));
+ uint64_t queue_head_offset_reg = vtd_get_quad(s, DMAR_PQH_REG);
+ hwaddr queue_tail = (queue_addr_reg & VTD_PQA_ADDR) + queue_tail_offset_reg;
+ uint32_t old_pr_status = vtd_get_long(s, DMAR_PRS_REG);
+ uint16_t sid = PCI_BUILD_BDF(pci_bus_num(vtd_as->bus), vtd_as->devfn);
+ VTDPRDesc desc;
+
+ if (!(s->ecap & VTD_ECAP_PRS)) {
+ return -EPERM;
+ }
+
+ /*
+ * No need to check if scalable mode is enabled as we already known that
+ * VTD_ECAP_PRS is set (see vtd_decide_config)
+ */
+
+ /* We do not support PRI without PASID */
+ if (vtd_as->pasid == PCI_NO_PASID) {
+ return -EPERM;
+ }
+ if (exec_req && !is_read) {
+ return -EINVAL;
+ }
+
+ /* Check PRE bit in the scalable mode context entry */
+ if (!vtd_check_pre_bit(vtd_as, addr, sid, is_write)) {
+ return -EPERM;
+ }
+
+ if (old_pr_status & VTD_PR_STATUS_PRO) {
+ /*
+ * No action is taken by hardware to report a fault
+ * or generate an event
+ */
+ return -ENOSPC;
+ }
+
+ /* Check for overflow */
+ if (new_queue_tail_offset == queue_head_offset_reg) {
+ vtd_set_clear_mask_long(s, DMAR_PRS_REG, 0, VTD_PR_STATUS_PRO);
+ vtd_generate_page_request_event(s, old_pr_status);
+ return -ENOSPC;
+ }
+
+ if (vtd_pri_perform_implicit_invalidation(vtd_as, addr)) {
+ return -EINVAL;
+ }
+
+ desc.lo = VTD_PRD_TYPE | VTD_PRD_PP(true) | VTD_PRD_RID(sid) |
+ VTD_PRD_PASID(vtd_as->pasid) | VTD_PRD_PMR(priv_req);
+ desc.hi = VTD_PRD_RDR(is_read) | VTD_PRD_WRR(is_write) |
+ VTD_PRD_LPIG(lpig) | VTD_PRD_PRGI(prgi) | VTD_PRD_ADDR(addr);
+
+ desc.lo = cpu_to_le64(desc.lo);
+ desc.hi = cpu_to_le64(desc.hi);
+ if (dma_memory_write(&address_space_memory, queue_tail, &desc, sizeof(desc),
+ MEMTXATTRS_UNSPECIFIED)) {
+ error_report_once("IO error, the PQ tail cannot be updated");
+ return -EIO;
+ }
+
+ /* increment the tail register and set the pending request bit */
+ vtd_set_quad(s, DMAR_PQT_REG, new_queue_tail_offset);
+ /*
+ * read status again so that the kernel does not miss a request.
+ * in some cases, we can trigger an unecessary interrupt but this strategy
+ * drastically improves performance as we don't need to take a lock.
+ */
+ old_pr_status = vtd_get_long(s, DMAR_PRS_REG);
+ if (!(old_pr_status & VTD_PR_STATUS_PPR)) {
+ vtd_set_clear_mask_long(s, DMAR_PRS_REG, 0, VTD_PR_STATUS_PPR);
+ vtd_generate_page_request_event(s, old_pr_status);
+ }
+
+ return 0;
+}
+
+static void vtd_init_iotlb_notifier(PCIBus *bus, void *opaque, int devfn,
+ IOMMUNotifier *n, IOMMUNotify fn,
+ void *user_opaque)
+{
+ n->opaque = user_opaque;
+ iommu_notifier_init(n, fn, IOMMU_NOTIFIER_DEVIOTLB_EVENTS, 0,
+ HWADDR_MAX, 0);
+}
+
+static void vtd_get_iotlb_info(void *opaque, uint8_t *addr_width,
+ uint32_t *min_page_size)
+{
+ IntelIOMMUState *s = opaque;
+
+ *addr_width = s->aw_bits;
+ *min_page_size = VTD_PAGE_SIZE;
+}
+
+static void vtd_register_iotlb_notifier(PCIBus *bus, void *opaque,
+ int devfn, uint32_t pasid,
+ IOMMUNotifier *n)
+{
+ IntelIOMMUState *s = opaque;
+ VTDAddressSpace *vtd_as;
+
+ vtd_as = vtd_find_add_as(s, bus, devfn, pasid);
+ memory_region_register_iommu_notifier(MEMORY_REGION(&vtd_as->iommu), n,
+ &error_fatal);
+}
+
+static void vtd_unregister_iotlb_notifier(PCIBus *bus, void *opaque,
+ int devfn, uint32_t pasid,
+ IOMMUNotifier *n)
+{
+ IntelIOMMUState *s = opaque;
+ VTDAddressSpace *vtd_as;
+
+ vtd_as = vtd_find_add_as(s, bus, devfn, pasid);
+ memory_region_unregister_iommu_notifier(MEMORY_REGION(&vtd_as->iommu), n);
+}
+
+static void vtd_pri_register_notifier(PCIBus *bus, void *opaque, int devfn,
+ uint32_t pasid, IOMMUPRINotifier *notifier)
+{
+ IntelIOMMUState *s = opaque;
+ VTDAddressSpace *vtd_as;
+
+ vtd_as = vtd_find_add_as(s, bus, devfn, pasid);
+ vtd_as->pri_notifier = notifier;
+}
+
+static void vtd_pri_unregister_notifier(PCIBus *bus, void *opaque,
+ int devfn, uint32_t pasid)
+{
+ IntelIOMMUState *s = opaque;
+ VTDAddressSpace *vtd_as;
+
+ vtd_as = vtd_find_add_as(s, bus, devfn, pasid);
+ vtd_as->pri_notifier = NULL;
+}
+
static PCIIOMMUOps vtd_iommu_ops = {
.get_address_space = vtd_host_dma_iommu,
.set_iommu_device = vtd_dev_set_iommu_device,
.unset_iommu_device = vtd_dev_unset_iommu_device,
+ .get_iotlb_info = vtd_get_iotlb_info,
+ .init_iotlb_notifier = vtd_init_iotlb_notifier,
+ .register_iotlb_notifier = vtd_register_iotlb_notifier,
+ .unregister_iotlb_notifier = vtd_unregister_iotlb_notifier,
+ .ats_request_translation = vtd_ats_request_translation,
+ .pri_register_notifier = vtd_pri_register_notifier,
+ .pri_unregister_notifier = vtd_pri_unregister_notifier,
+ .pri_request_page = vtd_pri_request_page,
};
static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
@@ -4791,32 +5243,6 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
return true;
}
-static int vtd_machine_done_notify_one(Object *child, void *unused)
-{
- IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default());
-
- /*
- * We hard-coded here because vfio-pci is the only special case
- * here. Let's be more elegant in the future when we can, but so
- * far there seems to be no better way.
- */
- if (object_dynamic_cast(child, "vfio-pci") && !iommu->caching_mode) {
- vtd_panic_require_caching_mode();
- }
-
- return 0;
-}
-
-static void vtd_machine_done_hook(Notifier *notifier, void *unused)
-{
- object_child_foreach_recursive(object_get_root(),
- vtd_machine_done_notify_one, NULL);
-}
-
-static Notifier vtd_machine_done_notify = {
- .notify = vtd_machine_done_hook,
-};
-
static void vtd_realize(DeviceState *dev, Error **errp)
{
MachineState *ms = MACHINE(qdev_get_machine());
@@ -4871,7 +5297,6 @@ static void vtd_realize(DeviceState *dev, Error **errp)
pci_setup_iommu(bus, &vtd_iommu_ops, dev);
/* Pseudo address space under root PCI bus. */
x86ms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC);
- qemu_add_machine_init_done_notifier(&vtd_machine_done_notify);
}
static void vtd_class_init(ObjectClass *klass, const void *data)