diff options
Diffstat (limited to 'hw/i386/intel_iommu.c')
-rw-r--r-- | hw/i386/intel_iommu.c | 539 |
1 files changed, 482 insertions, 57 deletions
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 69d72ad..6a168d5 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -45,6 +45,8 @@ ((ce)->val[1] & VTD_SM_CONTEXT_ENTRY_RID2PASID_MASK) #define VTD_CE_GET_PASID_DIR_TABLE(ce) \ ((ce)->val[0] & VTD_PASID_DIR_BASE_ADDR_MASK) +#define VTD_CE_GET_PRE(ce) \ + ((ce)->val[0] & VTD_SM_CONTEXT_ENTRY_PRE) /* pe operations */ #define VTD_PE_GET_TYPE(pe) ((pe)->val[0] & VTD_SM_PASID_ENTRY_PGTT) @@ -85,13 +87,6 @@ struct vtd_iotlb_key { static void vtd_address_space_refresh_all(IntelIOMMUState *s); static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n); -static void vtd_panic_require_caching_mode(void) -{ - error_report("We need to set caching-mode=on for intel-iommu to enable " - "device assignment with IOMMU protection."); - exit(1); -} - static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val, uint64_t wmask, uint64_t w1cmask) { @@ -1838,6 +1833,7 @@ static const bool vtd_qualified_faults[] = { [VTD_FR_FS_NON_CANONICAL] = true, [VTD_FR_FS_PAGING_ENTRY_US] = true, [VTD_FR_SM_WRITE] = true, + [VTD_FR_SM_PRE_ABS] = true, [VTD_FR_SM_INTERRUPT_ADDR] = true, [VTD_FR_FS_BIT_UPDATE_FAILED] = true, [VTD_FR_MAX] = false, @@ -1987,9 +1983,9 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce, uint32_t pasid) { dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid); - uint32_t level = vtd_get_iova_level(s, ce, pasid); uint32_t offset; uint64_t flpte, flag_ad = VTD_FL_A; + *flpte_level = vtd_get_iova_level(s, ce, pasid); if (!vtd_iova_fl_check_canonical(s, iova, ce, pasid)) { error_report_once("%s: detected non canonical IOVA (iova=0x%" PRIx64 "," @@ -1998,11 +1994,11 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce, } while (true) { - offset = vtd_iova_level_offset(iova, level); + offset = vtd_iova_level_offset(iova, *flpte_level); flpte = vtd_get_pte(addr, offset); if (flpte == (uint64_t)-1) { - if (level == vtd_get_iova_level(s, ce, pasid)) { + if (*flpte_level == vtd_get_iova_level(s, ce, pasid)) { /* Invalid programming of pasid-entry */ return -VTD_FR_PASID_ENTRY_FSPTPTR_INV; } else { @@ -2028,15 +2024,15 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce, if (is_write && !(flpte & VTD_FL_RW)) { return -VTD_FR_SM_WRITE; } - if (vtd_flpte_nonzero_rsvd(flpte, level)) { + if (vtd_flpte_nonzero_rsvd(flpte, *flpte_level)) { error_report_once("%s: detected flpte reserved non-zero " "iova=0x%" PRIx64 ", level=0x%" PRIx32 "flpte=0x%" PRIx64 ", pasid=0x%" PRIX32 ")", - __func__, iova, level, flpte, pasid); + __func__, iova, *flpte_level, flpte, pasid); return -VTD_FR_FS_PAGING_ENTRY_RSVD; } - if (vtd_is_last_pte(flpte, level) && is_write) { + if (vtd_is_last_pte(flpte, *flpte_level) && is_write) { flag_ad |= VTD_FL_D; } @@ -2044,14 +2040,13 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce, return -VTD_FR_FS_BIT_UPDATE_FAILED; } - if (vtd_is_last_pte(flpte, level)) { + if (vtd_is_last_pte(flpte, *flpte_level)) { *flptep = flpte; - *flpte_level = level; return 0; } addr = vtd_get_pte_addr(flpte, aw_bits); - level--; + (*flpte_level)--; } } @@ -2092,7 +2087,8 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, uint8_t bus_num = pci_bus_num(bus); VTDContextCacheEntry *cc_entry; uint64_t pte, page_mask; - uint32_t level, pasid = vtd_as->pasid; + uint32_t level = UINT32_MAX; + uint32_t pasid = vtd_as->pasid; uint16_t source_id = PCI_BUILD_BDF(bus_num, devfn); int ret_fr; bool is_fpd_set = false; @@ -2251,14 +2247,19 @@ out: entry->iova = addr & page_mask; entry->translated_addr = vtd_get_pte_addr(pte, s->aw_bits) & page_mask; entry->addr_mask = ~page_mask; - entry->perm = access_flags; + entry->perm = (is_write ? access_flags : (access_flags & (~IOMMU_WO))); return true; error: vtd_iommu_unlock(s); entry->iova = 0; entry->translated_addr = 0; - entry->addr_mask = 0; + /* + * Set the mask for ATS (the range must be present even when the + * translation fails : PCIe rev 5 10.2.3.5) + */ + entry->addr_mask = (level != UINT32_MAX) ? + (~vtd_pt_level_page_mask(level)) : (~VTD_PAGE_MASK_4K); entry->perm = IOMMU_NONE; return false; } @@ -2503,6 +2504,7 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s, .translated_addr = 0, .addr_mask = size - 1, .perm = IOMMU_NONE, + .pasid = vtd_as->pasid, }, }; memory_region_notify_iommu(&vtd_as->iommu, 0, event); @@ -2695,7 +2697,7 @@ static void vtd_handle_gcmd_write(IntelIOMMUState *s) uint32_t changed = status ^ val; trace_vtd_reg_write_gcmd(status, val); - if ((changed & VTD_GCMD_TE) && s->dma_translation) { + if ((changed & VTD_GCMD_TE) && x86_iommu->dma_translation) { /* Translation enable/disable */ vtd_handle_gcmd_te(s, val & VTD_GCMD_TE); } @@ -2822,6 +2824,7 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) { uint64_t mask[4] = {VTD_INV_DESC_WAIT_RSVD_LO, VTD_INV_DESC_WAIT_RSVD_HI, VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE}; + bool ret = true; if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, false, __func__, "wait")) { @@ -2833,8 +2836,6 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) uint32_t status_data = (uint32_t)(inv_desc->lo >> VTD_INV_DESC_WAIT_DATA_SHIFT); - assert(!(inv_desc->lo & VTD_INV_DESC_WAIT_IF)); - /* FIXME: need to be masked with HAW? */ dma_addr_t status_addr = inv_desc->hi; trace_vtd_inv_desc_wait_sw(status_addr, status_data); @@ -2843,18 +2844,28 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) &status_data, sizeof(status_data), MEMTXATTRS_UNSPECIFIED)) { trace_vtd_inv_desc_wait_write_fail(inv_desc->hi, inv_desc->lo); - return false; + ret = false; } - } else if (inv_desc->lo & VTD_INV_DESC_WAIT_IF) { + } + + if (inv_desc->lo & VTD_INV_DESC_WAIT_IF) { /* Interrupt flag */ vtd_generate_completion_event(s); - } else { + } + + /* + * SW=0, IF=0, FN=1 is also a valid descriptor (VT-d 7.10) + * Nothing to do as we process the descriptors in order + */ + + if (!(inv_desc->lo & (VTD_INV_DESC_WAIT_IF | VTD_INV_DESC_WAIT_SW | + VTD_INV_DESC_WAIT_FN))) { error_report_once("%s: invalid wait desc: hi=%"PRIx64", lo=%"PRIx64 " (unknown type)", __func__, inv_desc->hi, inv_desc->lo); return false; } - return true; + return ret; } static bool vtd_process_context_cache_desc(IntelIOMMUState *s, @@ -3090,6 +3101,7 @@ static void do_invalidate_device_tlb(VTDAddressSpace *vtd_dev_as, event.entry.iova = addr; event.entry.perm = IOMMU_NONE; event.entry.translated_addr = 0; + event.entry.pasid = vtd_dev_as->pasid; memory_region_notify_iommu(&vtd_dev_as->iommu, 0, event); } @@ -3136,6 +3148,59 @@ static bool vtd_process_device_piotlb_desc(IntelIOMMUState *s, return true; } +static bool vtd_process_page_group_response_desc(IntelIOMMUState *s, + VTDInvDesc *inv_desc) +{ + VTDAddressSpace *vtd_dev_as; + bool pasid_present; + uint8_t response_code; + uint16_t rid; + uint32_t pasid; + uint16_t prgi; + IOMMUPRIResponse response; + + if ((inv_desc->lo & VTD_INV_DESC_PGRESP_RSVD_LO) || + (inv_desc->hi & VTD_INV_DESC_PGRESP_RSVD_HI)) { + error_report_once("%s: invalid page group response desc: hi=%"PRIx64 + ", lo=%"PRIx64" (reserved nonzero)", __func__, + inv_desc->hi, inv_desc->lo); + return false; + } + + pasid_present = VTD_INV_DESC_PGRESP_PP(inv_desc->lo); + response_code = VTD_INV_DESC_PGRESP_RC(inv_desc->lo); + rid = VTD_INV_DESC_PGRESP_RID(inv_desc->lo); + pasid = VTD_INV_DESC_PGRESP_PASID(inv_desc->lo); + prgi = VTD_INV_DESC_PGRESP_PRGI(inv_desc->hi); + + if (!pasid_present) { + error_report_once("Page group response without PASID is" + "not supported yet"); + return false; + } + + vtd_dev_as = vtd_get_as_by_sid_and_pasid(s, rid, pasid); + if (!vtd_dev_as) { + return true; + } + + response.prgi = prgi; + + if (response_code == 0x0u) { + response.response_code = IOMMU_PRI_RESP_SUCCESS; + } else if (response_code == 0x1u) { + response.response_code = IOMMU_PRI_RESP_INVALID_REQUEST; + } else { + response.response_code = IOMMU_PRI_RESP_FAILURE; + } + + if (vtd_dev_as->pri_notifier) { + vtd_dev_as->pri_notifier->notify(vtd_dev_as->pri_notifier, &response); + } + + return true; +} + static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) { @@ -3236,6 +3301,13 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s) } break; + case VTD_INV_DESC_PGRESP: + trace_vtd_inv_desc("page group response", inv_desc.hi, inv_desc.lo); + if (!vtd_process_page_group_response_desc(s, &inv_desc)) { + return false; + } + break; + /* * TODO: the entity of below two cases will be implemented in future series. * To make guest (which integrates scalable mode support patch set in @@ -3370,6 +3442,27 @@ static void vtd_handle_iectl_write(IntelIOMMUState *s) } } +static void vtd_handle_prs_write(IntelIOMMUState *s) +{ + uint32_t prs = vtd_get_long_raw(s, DMAR_PRS_REG); + if (!(prs & VTD_PR_STATUS_PPR) && !(prs & VTD_PR_STATUS_PRO)) { + vtd_set_clear_mask_long(s, DMAR_PECTL_REG, VTD_PR_PECTL_IP, 0); + } +} + +static void vtd_handle_pectl_write(IntelIOMMUState *s) +{ + uint32_t pectl = vtd_get_long_raw(s, DMAR_PECTL_REG); + if ((pectl & VTD_PR_PECTL_IP) && !(pectl & VTD_PR_PECTL_IM)) { + /* + * If IP field was 1 when software clears the IM field, + * the interrupt is generated along with clearing the IP field. + */ + vtd_set_clear_mask_long(s, DMAR_PECTL_REG, VTD_PR_PECTL_IP, 0); + vtd_generate_interrupt(s, DMAR_PEADDR_REG, DMAR_PEDATA_REG); + } +} + static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size) { IntelIOMMUState *s = opaque; @@ -3412,6 +3505,11 @@ static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size) val = s->iq >> 32; break; + case DMAR_PEUADDR_REG: + assert(size == 4); + val = vtd_get_long_raw(s, DMAR_PEUADDR_REG); + break; + default: if (size == 4) { val = vtd_get_long(s, addr); @@ -3475,6 +3573,11 @@ static void vtd_mem_write(void *opaque, hwaddr addr, vtd_handle_iotlb_write(s); break; + case DMAR_PEUADDR_REG: + assert(size == 4); + vtd_set_long(s, addr, val); + break; + /* Invalidate Address Register, 64-bit */ case DMAR_IVA_REG: if (size == 4) { @@ -3655,6 +3758,18 @@ static void vtd_mem_write(void *opaque, hwaddr addr, vtd_set_long(s, addr, val); break; + case DMAR_PRS_REG: + assert(size == 4); + vtd_set_long(s, addr, val); + vtd_handle_prs_write(s); + break; + + case DMAR_PECTL_REG: + assert(size == 4); + vtd_set_long(s, addr, val); + vtd_handle_pectl_write(s); + break; + default: if (size == 4) { vtd_set_long(s, addr, val); @@ -3672,6 +3787,7 @@ static IOMMUTLBEntry vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr, IOMMUTLBEntry iotlb = { /* We'll fill in the rest later. */ .target_as = &address_space_memory, + .pasid = vtd_as->pasid, }; bool success; @@ -3824,7 +3940,6 @@ static const Property vtd_properties[] = { DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false), DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false), DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true), - DEFINE_PROP_BOOL("dma-translation", IntelIOMMUState, dma_translation, true), DEFINE_PROP_BOOL("stale-tm", IntelIOMMUState, stale_tm, false), DEFINE_PROP_BOOL("fs1gp", IntelIOMMUState, fs1gp, true), }; @@ -4367,6 +4482,12 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn, assert(hiod); + if (!s->caching_mode) { + error_setg(errp, "Device assignment is not allowed without enabling " + "caching-mode=on for Intel IOMMU."); + return false; + } + vtd_iommu_lock(s); if (g_hash_table_lookup(s->vtd_host_iommu_dev, &key)) { @@ -4538,11 +4659,11 @@ static void vtd_cap_init(IntelIOMMUState *s) s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS | - VTD_CAP_MGAW(s->aw_bits); + VTD_CAP_ESRTPS | VTD_CAP_MGAW(s->aw_bits); if (s->dma_drain) { s->cap |= VTD_CAP_DRAIN; } - if (s->dma_translation) { + if (x86_iommu->dma_translation) { if (s->aw_bits >= VTD_HOST_AW_39BIT) { s->cap |= VTD_CAP_SAGAW_39bit; } @@ -4587,7 +4708,7 @@ static void vtd_cap_init(IntelIOMMUState *s) } if (s->pasid) { - s->ecap |= VTD_ECAP_PASID; + s->ecap |= VTD_ECAP_PASID | VTD_ECAP_PSS; } } @@ -4705,6 +4826,18 @@ static void vtd_init(IntelIOMMUState *s) * Interrupt remapping registers. */ vtd_define_quad(s, DMAR_IRTA_REG, 0, 0xfffffffffffff80fULL, 0); + + /* Page request registers */ + if (s->ecap & VTD_ECAP_PRS) { + vtd_define_quad(s, DMAR_PQH_REG, 0, 0x7ffe0ULL, 0); + vtd_define_quad(s, DMAR_PQT_REG, 0, 0x7ffe0ULL, 0); + vtd_define_quad(s, DMAR_PQA_REG, 0, 0xfffffffffffff007ULL, 0); + vtd_define_long(s, DMAR_PRS_REG, 0, 0, 0x3UL); + vtd_define_long(s, DMAR_PECTL_REG, 0, 0x80000000UL, 0); + vtd_define_long(s, DMAR_PEDATA_REG, 0, 0xffffUL, 0); + vtd_define_long(s, DMAR_PEADDR_REG, 0, 0xfffffffcUL, 0); + vtd_define_long(s, DMAR_PEUADDR_REG, 0, 0xffffffffUL, 0); + } } /* Should not reset address_spaces when reset because devices will still use @@ -4730,10 +4863,329 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &vtd_as->as; } +static IOMMUTLBEntry vtd_iommu_ats_do_translate(IOMMUMemoryRegion *iommu, + hwaddr addr, + IOMMUAccessFlags flags) +{ + IOMMUTLBEntry entry; + VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu); + + if (vtd_is_interrupt_addr(addr)) { + vtd_report_ir_illegal_access(vtd_as, addr, flags & IOMMU_WO); + entry.target_as = &address_space_memory; + entry.iova = 0; + entry.translated_addr = 0; + entry.addr_mask = ~VTD_PAGE_MASK_4K; + entry.perm = IOMMU_NONE; + entry.pasid = PCI_NO_PASID; + } else { + entry = vtd_iommu_translate(iommu, addr, flags, 0); + } + + return entry; +} + +static ssize_t vtd_ats_request_translation(PCIBus *bus, void *opaque, + int devfn, uint32_t pasid, + bool priv_req, bool exec_req, + hwaddr addr, size_t length, + bool no_write, IOMMUTLBEntry *result, + size_t result_length, + uint32_t *err_count) +{ + IntelIOMMUState *s = opaque; + VTDAddressSpace *vtd_as; + IOMMUAccessFlags flags = IOMMU_ACCESS_FLAG_FULL(true, !no_write, exec_req, + priv_req, false, false); + ssize_t res_index = 0; + hwaddr target_address = addr + length; + IOMMUTLBEntry entry; + + vtd_as = vtd_find_add_as(s, bus, devfn, pasid); + *err_count = 0; + + while ((addr < target_address) && (res_index < result_length)) { + entry = vtd_iommu_ats_do_translate(&vtd_as->iommu, addr, flags); + entry.perm &= ~IOMMU_GLOBAL; /* Spec 4.1.2: Global Mapping never set */ + + if ((entry.perm & flags) != flags) { + *err_count += 1; /* Less than expected */ + } + + result[res_index] = entry; + res_index += 1; + addr = (addr & (~entry.addr_mask)) + (entry.addr_mask + 1); + } + + /* Buffer too small */ + if (addr < target_address) { + return -ENOMEM; + } + + return res_index; +} + +/* 11.4.11.3 : The number of entries in the page request queue is 2^(PQS + 7) */ +static inline uint64_t vtd_prq_size(IntelIOMMUState *s) +{ + return 1ULL << ((vtd_get_quad(s, DMAR_PQA_REG) & VTD_PQA_SIZE) + 7); +} + +/** + * Return true if the bit is accessible and correctly set, false otherwise + */ +static bool vtd_check_pre_bit(VTDAddressSpace *vtd_as, hwaddr addr, + uint16_t sid, bool is_write) +{ + int ret; + IntelIOMMUState *s = vtd_as->iommu_state; + uint8_t bus_n = pci_bus_num(vtd_as->bus); + VTDContextEntry ce; + bool is_fpd_set = false; + + ret = vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce); + + if (ret) { + goto error_report; + } + + if (!VTD_CE_GET_PRE(&ce)) { + ret = -VTD_FR_SM_PRE_ABS; + goto error_get_fpd_and_report; + } + + return true; + +error_get_fpd_and_report: + /* Try to get fpd (may not work but we are already on an error path) */ + is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD; + vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, vtd_as->pasid); +error_report: + vtd_report_fault(s, -ret, is_fpd_set, sid, addr, is_write, + vtd_as->pasid != PCI_NO_PASID, vtd_as->pasid); + return false; +} + +/* Logic described in section 7.5 */ +static void vtd_generate_page_request_event(IntelIOMMUState *s, + uint32_t old_pr_status) +{ + uint32_t current_pectl = vtd_get_long(s, DMAR_PECTL_REG); + /* + * Hardware evaluates PPR and PRO fields in the Page Request Status Register + * and if any of them is set, Page Request Event is not generated + */ + if (old_pr_status & (VTD_PR_STATUS_PRO | VTD_PR_STATUS_PPR)) { + return; + } + + vtd_set_clear_mask_long(s, DMAR_PECTL_REG, 0, VTD_PR_PECTL_IP); + if (!(current_pectl & VTD_PR_PECTL_IM)) { + vtd_set_clear_mask_long(s, DMAR_PECTL_REG, VTD_PR_PECTL_IP, 0); + vtd_generate_interrupt(s, DMAR_PEADDR_REG, DMAR_PEDATA_REG); + } +} + +/* When calling this function, we known that we are in scalable mode */ +static int vtd_pri_perform_implicit_invalidation(VTDAddressSpace *vtd_as, + hwaddr addr) +{ + IntelIOMMUState *s = vtd_as->iommu_state; + VTDContextEntry ce; + VTDPASIDEntry pe; + uint16_t pgtt; + uint16_t domain_id; + int ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), + vtd_as->devfn, &ce); + if (ret) { + return -EINVAL; + } + ret = vtd_ce_get_rid2pasid_entry(s, &ce, &pe, vtd_as->pasid); + if (ret) { + return -EINVAL; + } + pgtt = VTD_PE_GET_TYPE(&pe); + domain_id = VTD_SM_PASID_ENTRY_DID(pe.val[1]); + ret = 0; + switch (pgtt) { + case VTD_SM_PASID_ENTRY_FLT: + vtd_piotlb_page_invalidate(s, domain_id, vtd_as->pasid, addr, 0); + break; + /* Room for other pgtt values */ + default: + error_report_once("Translation type not supported yet : %d", pgtt); + ret = -EINVAL; + break; + } + + return ret; +} + +/* Page Request Descriptor : 7.4.1.1 */ +static int vtd_pri_request_page(PCIBus *bus, void *opaque, int devfn, + uint32_t pasid, bool priv_req, bool exec_req, + hwaddr addr, bool lpig, uint16_t prgi, + bool is_read, bool is_write) +{ + IntelIOMMUState *s = opaque; + VTDAddressSpace *vtd_as; + + vtd_as = vtd_find_add_as(s, bus, devfn, pasid); + + uint64_t queue_addr_reg = vtd_get_quad(s, DMAR_PQA_REG); + uint64_t queue_tail_offset_reg = vtd_get_quad(s, DMAR_PQT_REG); + uint64_t new_queue_tail_offset = ( + (queue_tail_offset_reg + VTD_PQA_ENTRY_SIZE) % + (vtd_prq_size(s) * VTD_PQA_ENTRY_SIZE)); + uint64_t queue_head_offset_reg = vtd_get_quad(s, DMAR_PQH_REG); + hwaddr queue_tail = (queue_addr_reg & VTD_PQA_ADDR) + queue_tail_offset_reg; + uint32_t old_pr_status = vtd_get_long(s, DMAR_PRS_REG); + uint16_t sid = PCI_BUILD_BDF(pci_bus_num(vtd_as->bus), vtd_as->devfn); + VTDPRDesc desc; + + if (!(s->ecap & VTD_ECAP_PRS)) { + return -EPERM; + } + + /* + * No need to check if scalable mode is enabled as we already known that + * VTD_ECAP_PRS is set (see vtd_decide_config) + */ + + /* We do not support PRI without PASID */ + if (vtd_as->pasid == PCI_NO_PASID) { + return -EPERM; + } + if (exec_req && !is_read) { + return -EINVAL; + } + + /* Check PRE bit in the scalable mode context entry */ + if (!vtd_check_pre_bit(vtd_as, addr, sid, is_write)) { + return -EPERM; + } + + if (old_pr_status & VTD_PR_STATUS_PRO) { + /* + * No action is taken by hardware to report a fault + * or generate an event + */ + return -ENOSPC; + } + + /* Check for overflow */ + if (new_queue_tail_offset == queue_head_offset_reg) { + vtd_set_clear_mask_long(s, DMAR_PRS_REG, 0, VTD_PR_STATUS_PRO); + vtd_generate_page_request_event(s, old_pr_status); + return -ENOSPC; + } + + if (vtd_pri_perform_implicit_invalidation(vtd_as, addr)) { + return -EINVAL; + } + + desc.lo = VTD_PRD_TYPE | VTD_PRD_PP(true) | VTD_PRD_RID(sid) | + VTD_PRD_PASID(vtd_as->pasid) | VTD_PRD_PMR(priv_req); + desc.hi = VTD_PRD_RDR(is_read) | VTD_PRD_WRR(is_write) | + VTD_PRD_LPIG(lpig) | VTD_PRD_PRGI(prgi) | VTD_PRD_ADDR(addr); + + desc.lo = cpu_to_le64(desc.lo); + desc.hi = cpu_to_le64(desc.hi); + if (dma_memory_write(&address_space_memory, queue_tail, &desc, sizeof(desc), + MEMTXATTRS_UNSPECIFIED)) { + error_report_once("IO error, the PQ tail cannot be updated"); + return -EIO; + } + + /* increment the tail register and set the pending request bit */ + vtd_set_quad(s, DMAR_PQT_REG, new_queue_tail_offset); + /* + * read status again so that the kernel does not miss a request. + * in some cases, we can trigger an unecessary interrupt but this strategy + * drastically improves performance as we don't need to take a lock. + */ + old_pr_status = vtd_get_long(s, DMAR_PRS_REG); + if (!(old_pr_status & VTD_PR_STATUS_PPR)) { + vtd_set_clear_mask_long(s, DMAR_PRS_REG, 0, VTD_PR_STATUS_PPR); + vtd_generate_page_request_event(s, old_pr_status); + } + + return 0; +} + +static void vtd_init_iotlb_notifier(PCIBus *bus, void *opaque, int devfn, + IOMMUNotifier *n, IOMMUNotify fn, + void *user_opaque) +{ + n->opaque = user_opaque; + iommu_notifier_init(n, fn, IOMMU_NOTIFIER_DEVIOTLB_EVENTS, 0, + HWADDR_MAX, 0); +} + +static void vtd_get_iotlb_info(void *opaque, uint8_t *addr_width, + uint32_t *min_page_size) +{ + IntelIOMMUState *s = opaque; + + *addr_width = s->aw_bits; + *min_page_size = VTD_PAGE_SIZE; +} + +static void vtd_register_iotlb_notifier(PCIBus *bus, void *opaque, + int devfn, uint32_t pasid, + IOMMUNotifier *n) +{ + IntelIOMMUState *s = opaque; + VTDAddressSpace *vtd_as; + + vtd_as = vtd_find_add_as(s, bus, devfn, pasid); + memory_region_register_iommu_notifier(MEMORY_REGION(&vtd_as->iommu), n, + &error_fatal); +} + +static void vtd_unregister_iotlb_notifier(PCIBus *bus, void *opaque, + int devfn, uint32_t pasid, + IOMMUNotifier *n) +{ + IntelIOMMUState *s = opaque; + VTDAddressSpace *vtd_as; + + vtd_as = vtd_find_add_as(s, bus, devfn, pasid); + memory_region_unregister_iommu_notifier(MEMORY_REGION(&vtd_as->iommu), n); +} + +static void vtd_pri_register_notifier(PCIBus *bus, void *opaque, int devfn, + uint32_t pasid, IOMMUPRINotifier *notifier) +{ + IntelIOMMUState *s = opaque; + VTDAddressSpace *vtd_as; + + vtd_as = vtd_find_add_as(s, bus, devfn, pasid); + vtd_as->pri_notifier = notifier; +} + +static void vtd_pri_unregister_notifier(PCIBus *bus, void *opaque, + int devfn, uint32_t pasid) +{ + IntelIOMMUState *s = opaque; + VTDAddressSpace *vtd_as; + + vtd_as = vtd_find_add_as(s, bus, devfn, pasid); + vtd_as->pri_notifier = NULL; +} + static PCIIOMMUOps vtd_iommu_ops = { .get_address_space = vtd_host_dma_iommu, .set_iommu_device = vtd_dev_set_iommu_device, .unset_iommu_device = vtd_dev_unset_iommu_device, + .get_iotlb_info = vtd_get_iotlb_info, + .init_iotlb_notifier = vtd_init_iotlb_notifier, + .register_iotlb_notifier = vtd_register_iotlb_notifier, + .unregister_iotlb_notifier = vtd_unregister_iotlb_notifier, + .ats_request_translation = vtd_ats_request_translation, + .pri_register_notifier = vtd_pri_register_notifier, + .pri_unregister_notifier = vtd_pri_unregister_notifier, + .pri_request_page = vtd_pri_request_page, }; static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) @@ -4791,32 +5243,6 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) return true; } -static int vtd_machine_done_notify_one(Object *child, void *unused) -{ - IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default()); - - /* - * We hard-coded here because vfio-pci is the only special case - * here. Let's be more elegant in the future when we can, but so - * far there seems to be no better way. - */ - if (object_dynamic_cast(child, "vfio-pci") && !iommu->caching_mode) { - vtd_panic_require_caching_mode(); - } - - return 0; -} - -static void vtd_machine_done_hook(Notifier *notifier, void *unused) -{ - object_child_foreach_recursive(object_get_root(), - vtd_machine_done_notify_one, NULL); -} - -static Notifier vtd_machine_done_notify = { - .notify = vtd_machine_done_hook, -}; - static void vtd_realize(DeviceState *dev, Error **errp) { MachineState *ms = MACHINE(qdev_get_machine()); @@ -4871,7 +5297,6 @@ static void vtd_realize(DeviceState *dev, Error **errp) pci_setup_iommu(bus, &vtd_iommu_ops, dev); /* Pseudo address space under root PCI bus. */ x86ms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC); - qemu_add_machine_init_done_notifier(&vtd_machine_done_notify); } static void vtd_class_init(ObjectClass *klass, const void *data) |