intel_iommu: enable remote IOTLB
This patch is based on Aviv Ben-David (<bd.aviv@gmail.com>)'s patch upstream: "IOMMU: enable intel_iommu map and unmap notifiers" https://lists.gnu.org/archive/html/qemu-devel/2016-11/msg01453.html However I removed/fixed some content, and added my own codes. Instead of translate() every page for iotlb invalidations (which is slower), we walk the pages when needed and notify in a hook function. This patch enables vfio devices for VT-d emulation. And, since we already have vhost DMAR support via device-iotlb, a natural benefit that this patch brings is that vt-d enabled vhost can live even without ATS capability now. Though more tests are needed. Signed-off-by: Aviv Ben-David <bdaviv@cs.technion.ac.il> Reviewed-by: Jason Wang <jasowang@redhat.com> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Reviewed-by: \"Michael S. Tsirkin\" <mst@redhat.com> Signed-off-by: Peter Xu <peterx@redhat.com> Message-Id: <1491562755-23867-10-git-send-email-peterx@redhat.com> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
This commit is contained in:
		
							parent
							
								
									558e0024a4
								
							
						
					
					
						commit
						dd4d607e40
					
				| @ -806,7 +806,8 @@ next: | ||||
|  * @private: private data for the hook function | ||||
|  */ | ||||
| static int vtd_page_walk(VTDContextEntry *ce, uint64_t start, uint64_t end, | ||||
|                          vtd_page_walk_hook hook_fn, void *private) | ||||
|                          vtd_page_walk_hook hook_fn, void *private, | ||||
|                          bool notify_unmap) | ||||
| { | ||||
|     dma_addr_t addr = vtd_get_slpt_base_from_context(ce); | ||||
|     uint32_t level = vtd_get_level_from_context_entry(ce); | ||||
| @ -821,7 +822,7 @@ static int vtd_page_walk(VTDContextEntry *ce, uint64_t start, uint64_t end, | ||||
|     } | ||||
| 
 | ||||
|     return vtd_page_walk_level(addr, start, end, hook_fn, private, | ||||
|                                level, true, true, false); | ||||
|                                level, true, true, notify_unmap); | ||||
| } | ||||
| 
 | ||||
| /* Map a device to its corresponding domain (context-entry) */ | ||||
| @ -1038,6 +1039,15 @@ static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s) | ||||
|                 s->intr_root, s->intr_size); | ||||
| } | ||||
| 
 | ||||
| static void vtd_iommu_replay_all(IntelIOMMUState *s) | ||||
| { | ||||
|     IntelIOMMUNotifierNode *node; | ||||
| 
 | ||||
|     QLIST_FOREACH(node, &s->notifiers_list, next) { | ||||
|         memory_region_iommu_replay_all(&node->vtd_as->iommu); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static void vtd_context_global_invalidate(IntelIOMMUState *s) | ||||
| { | ||||
|     trace_vtd_inv_desc_cc_global(); | ||||
| @ -1045,6 +1055,14 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s) | ||||
|     if (s->context_cache_gen == VTD_CONTEXT_CACHE_GEN_MAX) { | ||||
|         vtd_reset_context_cache(s); | ||||
|     } | ||||
|     /*
 | ||||
|      * From VT-d spec 6.5.2.1, a global context entry invalidation | ||||
|      * should be followed by a IOTLB global invalidation, so we should | ||||
|      * be safe even without this. Hoewever, let's replay the region as | ||||
|      * well to be safer, and go back here when we need finer tunes for | ||||
|      * VT-d emulation codes. | ||||
|      */ | ||||
|     vtd_iommu_replay_all(s); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| @ -1111,6 +1129,16 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s, | ||||
|                 trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it), | ||||
|                                              VTD_PCI_FUNC(devfn_it)); | ||||
|                 vtd_as->context_cache_entry.context_cache_gen = 0; | ||||
|                 /*
 | ||||
|                  * So a device is moving out of (or moving into) a | ||||
|                  * domain, a replay() suites here to notify all the | ||||
|                  * IOMMU_NOTIFIER_MAP registers about this change. | ||||
|                  * This won't bring bad even if we have no such | ||||
|                  * notifier registered - the IOMMU notification | ||||
|                  * framework will skip MAP notifications if that | ||||
|                  * happened. | ||||
|                  */ | ||||
|                 memory_region_iommu_replay_all(&vtd_as->iommu); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| @ -1152,12 +1180,53 @@ static void vtd_iotlb_global_invalidate(IntelIOMMUState *s) | ||||
| { | ||||
|     trace_vtd_iotlb_reset("global invalidation recved"); | ||||
|     vtd_reset_iotlb(s); | ||||
|     vtd_iommu_replay_all(s); | ||||
| } | ||||
| 
 | ||||
| static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id) | ||||
| { | ||||
|     IntelIOMMUNotifierNode *node; | ||||
|     VTDContextEntry ce; | ||||
|     VTDAddressSpace *vtd_as; | ||||
| 
 | ||||
|     g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain, | ||||
|                                 &domain_id); | ||||
| 
 | ||||
|     QLIST_FOREACH(node, &s->notifiers_list, next) { | ||||
|         vtd_as = node->vtd_as; | ||||
|         if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), | ||||
|                                       vtd_as->devfn, &ce) && | ||||
|             domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) { | ||||
|             memory_region_iommu_replay_all(&vtd_as->iommu); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static int vtd_page_invalidate_notify_hook(IOMMUTLBEntry *entry, | ||||
|                                            void *private) | ||||
| { | ||||
|     memory_region_notify_iommu((MemoryRegion *)private, *entry); | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s, | ||||
|                                            uint16_t domain_id, hwaddr addr, | ||||
|                                            uint8_t am) | ||||
| { | ||||
|     IntelIOMMUNotifierNode *node; | ||||
|     VTDContextEntry ce; | ||||
|     int ret; | ||||
| 
 | ||||
|     QLIST_FOREACH(node, &(s->notifiers_list), next) { | ||||
|         VTDAddressSpace *vtd_as = node->vtd_as; | ||||
|         ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), | ||||
|                                        vtd_as->devfn, &ce); | ||||
|         if (!ret && domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) { | ||||
|             vtd_page_walk(&ce, addr, addr + (1 << am) * VTD_PAGE_SIZE, | ||||
|                           vtd_page_invalidate_notify_hook, | ||||
|                           (void *)&vtd_as->iommu, true); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id, | ||||
| @ -1170,6 +1239,7 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id, | ||||
|     info.addr = addr; | ||||
|     info.mask = ~((1 << am) - 1); | ||||
|     g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info); | ||||
|     vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am); | ||||
| } | ||||
| 
 | ||||
| /* Flush IOTLB
 | ||||
| @ -2187,15 +2257,33 @@ static void vtd_iommu_notify_flag_changed(MemoryRegion *iommu, | ||||
|                                           IOMMUNotifierFlag new) | ||||
| { | ||||
|     VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu); | ||||
|     IntelIOMMUState *s = vtd_as->iommu_state; | ||||
|     IntelIOMMUNotifierNode *node = NULL; | ||||
|     IntelIOMMUNotifierNode *next_node = NULL; | ||||
| 
 | ||||
|     if (new & IOMMU_NOTIFIER_MAP) { | ||||
|         error_report("Device at bus %s addr %02x.%d requires iommu " | ||||
|                      "notifier which is currently not supported by " | ||||
|                      "intel-iommu emulation", | ||||
|                      vtd_as->bus->qbus.name, PCI_SLOT(vtd_as->devfn), | ||||
|                      PCI_FUNC(vtd_as->devfn)); | ||||
|     if (!s->caching_mode && new & IOMMU_NOTIFIER_MAP) { | ||||
|         error_report("We need to set cache_mode=1 for intel-iommu to enable " | ||||
|                      "device assignment with IOMMU protection."); | ||||
|         exit(1); | ||||
|     } | ||||
| 
 | ||||
|     if (old == IOMMU_NOTIFIER_NONE) { | ||||
|         node = g_malloc0(sizeof(*node)); | ||||
|         node->vtd_as = vtd_as; | ||||
|         QLIST_INSERT_HEAD(&s->notifiers_list, node, next); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     /* update notifier node with new flags */ | ||||
|     QLIST_FOREACH_SAFE(node, &s->notifiers_list, next, next_node) { | ||||
|         if (node->vtd_as == vtd_as) { | ||||
|             if (new == IOMMU_NOTIFIER_NONE) { | ||||
|                 QLIST_REMOVE(node, next); | ||||
|                 g_free(node); | ||||
|             } | ||||
|             return; | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static const VMStateDescription vtd_vmstate = { | ||||
| @ -2613,6 +2701,74 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn) | ||||
|     return vtd_dev_as; | ||||
| } | ||||
| 
 | ||||
| /* Unmap the whole range in the notifier's scope. */ | ||||
| static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) | ||||
| { | ||||
|     IOMMUTLBEntry entry; | ||||
|     hwaddr size; | ||||
|     hwaddr start = n->start; | ||||
|     hwaddr end = n->end; | ||||
| 
 | ||||
|     /*
 | ||||
|      * Note: all the codes in this function has a assumption that IOVA | ||||
|      * bits are no more than VTD_MGAW bits (which is restricted by | ||||
|      * VT-d spec), otherwise we need to consider overflow of 64 bits. | ||||
|      */ | ||||
| 
 | ||||
|     if (end > VTD_ADDRESS_SIZE) { | ||||
|         /*
 | ||||
|          * Don't need to unmap regions that is bigger than the whole | ||||
|          * VT-d supported address space size | ||||
|          */ | ||||
|         end = VTD_ADDRESS_SIZE; | ||||
|     } | ||||
| 
 | ||||
|     assert(start <= end); | ||||
|     size = end - start; | ||||
| 
 | ||||
|     if (ctpop64(size) != 1) { | ||||
|         /*
 | ||||
|          * This size cannot format a correct mask. Let's enlarge it to | ||||
|          * suite the minimum available mask. | ||||
|          */ | ||||
|         int n = 64 - clz64(size); | ||||
|         if (n > VTD_MGAW) { | ||||
|             /* should not happen, but in case it happens, limit it */ | ||||
|             n = VTD_MGAW; | ||||
|         } | ||||
|         size = 1ULL << n; | ||||
|     } | ||||
| 
 | ||||
|     entry.target_as = &address_space_memory; | ||||
|     /* Adjust iova for the size */ | ||||
|     entry.iova = n->start & ~(size - 1); | ||||
|     /* This field is meaningless for unmap */ | ||||
|     entry.translated_addr = 0; | ||||
|     entry.perm = IOMMU_NONE; | ||||
|     entry.addr_mask = size - 1; | ||||
| 
 | ||||
|     trace_vtd_as_unmap_whole(pci_bus_num(as->bus), | ||||
|                              VTD_PCI_SLOT(as->devfn), | ||||
|                              VTD_PCI_FUNC(as->devfn), | ||||
|                              entry.iova, size); | ||||
| 
 | ||||
|     memory_region_notify_one(n, &entry); | ||||
| } | ||||
| 
 | ||||
| static void vtd_address_space_unmap_all(IntelIOMMUState *s) | ||||
| { | ||||
|     IntelIOMMUNotifierNode *node; | ||||
|     VTDAddressSpace *vtd_as; | ||||
|     IOMMUNotifier *n; | ||||
| 
 | ||||
|     QLIST_FOREACH(node, &s->notifiers_list, next) { | ||||
|         vtd_as = node->vtd_as; | ||||
|         IOMMU_NOTIFIER_FOREACH(n, &vtd_as->iommu) { | ||||
|             vtd_address_space_unmap(vtd_as, n); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static int vtd_replay_hook(IOMMUTLBEntry *entry, void *private) | ||||
| { | ||||
|     memory_region_notify_one((IOMMUNotifier *)private, entry); | ||||
| @ -2626,16 +2782,19 @@ static void vtd_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n) | ||||
|     uint8_t bus_n = pci_bus_num(vtd_as->bus); | ||||
|     VTDContextEntry ce; | ||||
| 
 | ||||
|     /*
 | ||||
|      * The replay can be triggered by either a invalidation or a newly | ||||
|      * created entry. No matter what, we release existing mappings | ||||
|      * (it means flushing caches for UNMAP-only registers). | ||||
|      */ | ||||
|     vtd_address_space_unmap(vtd_as, n); | ||||
| 
 | ||||
|     if (vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce) == 0) { | ||||
|         /*
 | ||||
|          * Scanned a valid context entry, walk over the pages and | ||||
|          * notify when needed. | ||||
|          */ | ||||
|         trace_vtd_replay_ce_valid(bus_n, PCI_SLOT(vtd_as->devfn), | ||||
|                                   PCI_FUNC(vtd_as->devfn), | ||||
|                                   VTD_CONTEXT_ENTRY_DID(ce.hi), | ||||
|                                   ce.hi, ce.lo); | ||||
|         vtd_page_walk(&ce, 0, ~0ULL, vtd_replay_hook, (void *)n); | ||||
|         vtd_page_walk(&ce, 0, ~0ULL, vtd_replay_hook, (void *)n, false); | ||||
|     } else { | ||||
|         trace_vtd_replay_ce_invalid(bus_n, PCI_SLOT(vtd_as->devfn), | ||||
|                                     PCI_FUNC(vtd_as->devfn)); | ||||
| @ -2754,6 +2913,11 @@ static void vtd_reset(DeviceState *dev) | ||||
| 
 | ||||
|     VTD_DPRINTF(GENERAL, ""); | ||||
|     vtd_init(s); | ||||
| 
 | ||||
|     /*
 | ||||
|      * When device reset, throw away all mappings and external caches | ||||
|      */ | ||||
|     vtd_address_space_unmap_all(s); | ||||
| } | ||||
| 
 | ||||
| static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) | ||||
| @ -2817,6 +2981,7 @@ static void vtd_realize(DeviceState *dev, Error **errp) | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     QLIST_INIT(&s->notifiers_list); | ||||
|     memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num)); | ||||
|     memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s, | ||||
|                           "intel_iommu", DMAR_REG_SIZE); | ||||
|  | ||||
| @ -197,6 +197,7 @@ | ||||
| #define VTD_DOMAIN_ID_MASK          ((1UL << VTD_DOMAIN_ID_SHIFT) - 1) | ||||
| #define VTD_CAP_ND                  (((VTD_DOMAIN_ID_SHIFT - 4) / 2) & 7ULL) | ||||
| #define VTD_MGAW                    39  /* Maximum Guest Address Width */ | ||||
| #define VTD_ADDRESS_SIZE            (1ULL << VTD_MGAW) | ||||
| #define VTD_CAP_MGAW                (((VTD_MGAW - 1) & 0x3fULL) << 16) | ||||
| #define VTD_MAMV                    18ULL | ||||
| #define VTD_CAP_MAMV                (VTD_MAMV << 48) | ||||
|  | ||||
| @ -37,6 +37,7 @@ vtd_page_walk_skip_read(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"P | ||||
| vtd_page_walk_skip_perm(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to perm empty" | ||||
| vtd_page_walk_skip_reserve(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to rsrv set" | ||||
| vtd_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)" | ||||
| vtd_as_unmap_whole(uint8_t bus, uint8_t slot, uint8_t fn, uint64_t iova, uint64_t size) "Device %02x:%02x.%x start 0x%"PRIx64" size 0x%"PRIx64 | ||||
| 
 | ||||
| # hw/i386/amd_iommu.c | ||||
| amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" +  offset 0x%"PRIx32 | ||||
|  | ||||
| @ -63,6 +63,7 @@ typedef union VTD_IR_TableEntry VTD_IR_TableEntry; | ||||
| typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress; | ||||
| typedef struct VTDIrq VTDIrq; | ||||
| typedef struct VTD_MSIMessage VTD_MSIMessage; | ||||
| typedef struct IntelIOMMUNotifierNode IntelIOMMUNotifierNode; | ||||
| 
 | ||||
| /* Context-Entry */ | ||||
| struct VTDContextEntry { | ||||
| @ -249,6 +250,11 @@ struct VTD_MSIMessage { | ||||
| /* When IR is enabled, all MSI/MSI-X data bits should be zero */ | ||||
| #define VTD_IR_MSI_DATA          (0) | ||||
| 
 | ||||
| struct IntelIOMMUNotifierNode { | ||||
|     VTDAddressSpace *vtd_as; | ||||
|     QLIST_ENTRY(IntelIOMMUNotifierNode) next; | ||||
| }; | ||||
| 
 | ||||
| /* The iommu (DMAR) device state struct */ | ||||
| struct IntelIOMMUState { | ||||
|     X86IOMMUState x86_iommu; | ||||
| @ -286,6 +292,8 @@ struct IntelIOMMUState { | ||||
|     MemoryRegionIOMMUOps iommu_ops; | ||||
|     GHashTable *vtd_as_by_busptr;   /* VTDBus objects indexed by PCIBus* reference */ | ||||
|     VTDBus *vtd_as_by_bus_num[VTD_PCI_BUS_MAX]; /* VTDBus objects indexed by bus number */ | ||||
|     /* list of registered notifiers */ | ||||
|     QLIST_HEAD(, IntelIOMMUNotifierNode) notifiers_list; | ||||
| 
 | ||||
|     /* interrupt remapping */ | ||||
|     bool intr_enabled;              /* Whether guest enabled IR */ | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Peter Xu
						Peter Xu