VFIO patches: realtek NIC quirk + SPAPR IOMMU AddressSpace support
-----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQIcBAABAgAGBQJTjPBKAAoJECObm247sIsi0fkP/1q5o3ZwwgO8y1lO43P39rRL BzCoa3EHGt0kGn7dImxFhjHClqhyqJL1SpaVOVTdTOmlOL1VVGJ/+2UJ0W7gB/jU zSYdmnX2UD5KNUrECqcWr+twNn1LGNcs3SCeFoe2WMmoPwqGHYs9FJmn5eqD7c6k iF3HD9H9sm1TauQ9iaFlg695Ap9y+MXOECaocYyx10j5wVyo4yByugtfrGjs3pOA BzzbRb89NAY+tneRnMoiWD3TS5CPYpvhWmAIW988u+mJLXLaO7CS1E7qacxuFUdu y2Z+pPjp6qpr1ZNYgOXqlrcW6/2PocP2qvwXN/P/waxN5OjDmkrOnT7hU0FWtTwz qnhrsPzaqthat28mLA+Fd0VXKkoMnGVilZixj59sXMnTjA1lL0sM8HMuGu/JRDCl Nqz9Mc2xhxegUZ6gyuIVBqY7qTO3x8DiVPGCAe3m6qiyWnNOBu5oBjnoz1ns5M5H aeysil1tp5PUGM3KomLXL8slkoo6Tiu0rKRI8tzwSWJ6VS0vtv5JrKKGF5oaTSL7 YslkbQExoHa03aquUbc6DpN1LG35oOPhIg+HbErfp/Mb9j042s28rgeQ+55RNEev uJW0G0F8PYWr8DtIlk7bmVX0bFU8bV3O/gxNeb3jmTNXhcObNvr2GviZQr6UezLW ax1Nt+EhOfquV2j8uuLR =rimn -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/awilliam/tags/vfio-pci-for-qemu-20140602.0' into staging VFIO patches: realtek NIC quirk + SPAPR IOMMU AddressSpace support # gpg: Signature made Mon 02 Jun 2014 22:44:42 BST using RSA key ID 3BB08B22 # gpg: Can't check signature: public key not found * remotes/awilliam/tags/vfio-pci-for-qemu-20140602.0: vfio: Add guest side IOMMU support vfio: Create VFIOAddressSpace objects as needed vfio: Introduce VFIO address spaces vfio: Rework to have error paths vfio: Fix 128 bit handling int128: Add int128_exts64() memory: Sanity check that no listeners remain on a destroyed AddressSpace vfio-pci: Quirk RTL8168 NIC Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
		
						commit
						e00fcfeab3
					
				
							
								
								
									
										428
									
								
								hw/misc/vfio.c
									
									
									
									
									
								
							
							
						
						
									
										428
									
								
								hw/misc/vfio.c
									
									
									
									
									
								
							@ -133,6 +133,15 @@ enum {
 | 
			
		||||
    VFIO_INT_MSIX = 3,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
typedef struct VFIOAddressSpace {
 | 
			
		||||
    AddressSpace *as;
 | 
			
		||||
    QLIST_HEAD(, VFIOContainer) containers;
 | 
			
		||||
    QLIST_ENTRY(VFIOAddressSpace) list;
 | 
			
		||||
} VFIOAddressSpace;
 | 
			
		||||
 | 
			
		||||
static QLIST_HEAD(, VFIOAddressSpace) vfio_address_spaces =
 | 
			
		||||
    QLIST_HEAD_INITIALIZER(vfio_address_spaces);
 | 
			
		||||
 | 
			
		||||
struct VFIOGroup;
 | 
			
		||||
 | 
			
		||||
typedef struct VFIOType1 {
 | 
			
		||||
@ -142,6 +151,7 @@ typedef struct VFIOType1 {
 | 
			
		||||
} VFIOType1;
 | 
			
		||||
 | 
			
		||||
typedef struct VFIOContainer {
 | 
			
		||||
    VFIOAddressSpace *space;
 | 
			
		||||
    int fd; /* /dev/vfio/vfio, empowered by the attached groups */
 | 
			
		||||
    struct {
 | 
			
		||||
        /* enable abstraction to support various iommu backends */
 | 
			
		||||
@ -150,10 +160,18 @@ typedef struct VFIOContainer {
 | 
			
		||||
        };
 | 
			
		||||
        void (*release)(struct VFIOContainer *);
 | 
			
		||||
    } iommu_data;
 | 
			
		||||
    QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
 | 
			
		||||
    QLIST_HEAD(, VFIOGroup) group_list;
 | 
			
		||||
    QLIST_ENTRY(VFIOContainer) next;
 | 
			
		||||
} VFIOContainer;
 | 
			
		||||
 | 
			
		||||
typedef struct VFIOGuestIOMMU {
 | 
			
		||||
    VFIOContainer *container;
 | 
			
		||||
    MemoryRegion *iommu;
 | 
			
		||||
    Notifier n;
 | 
			
		||||
    QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
 | 
			
		||||
} VFIOGuestIOMMU;
 | 
			
		||||
 | 
			
		||||
/* Cache of MSI-X setup plus extra mmap and memory region for split BAR map */
 | 
			
		||||
typedef struct VFIOMSIXInfo {
 | 
			
		||||
    uint8_t table_bar;
 | 
			
		||||
@ -234,9 +252,6 @@ static const VFIORomBlacklistEntry romblacklist[] = {
 | 
			
		||||
 | 
			
		||||
#define MSIX_CAP_LENGTH 12
 | 
			
		||||
 | 
			
		||||
static QLIST_HEAD(, VFIOContainer)
 | 
			
		||||
    container_list = QLIST_HEAD_INITIALIZER(container_list);
 | 
			
		||||
 | 
			
		||||
static QLIST_HEAD(, VFIOGroup)
 | 
			
		||||
    group_list = QLIST_HEAD_INITIALIZER(group_list);
 | 
			
		||||
 | 
			
		||||
@ -1668,6 +1683,149 @@ static void vfio_probe_ati_bar4_window_quirk(VFIODevice *vdev, int nr)
 | 
			
		||||
            vdev->host.function);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define PCI_VENDOR_ID_REALTEK 0x10ec
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * RTL8168 devices have a backdoor that can access the MSI-X table.  At BAR2
 | 
			
		||||
 * offset 0x70 there is a dword data register, offset 0x74 is a dword address
 | 
			
		||||
 * register.  According to the Linux r8169 driver, the MSI-X table is addressed
 | 
			
		||||
 * when the "type" portion of the address register is set to 0x1.  This appears
 | 
			
		||||
 * to be bits 16:30.  Bit 31 is both a write indicator and some sort of
 | 
			
		||||
 * "address latched" indicator.  Bits 12:15 are a mask field, which we can
 | 
			
		||||
 * ignore because the MSI-X table should always be accessed as a dword (full
 | 
			
		||||
 * mask).  Bits 0:11 is offset within the type.
 | 
			
		||||
 *
 | 
			
		||||
 * Example trace:
 | 
			
		||||
 *
 | 
			
		||||
 * Read from MSI-X table offset 0
 | 
			
		||||
 * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x1f000, 4) // store read addr
 | 
			
		||||
 * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x8001f000 // latch
 | 
			
		||||
 * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x70, 4) = 0xfee00398 // read data
 | 
			
		||||
 *
 | 
			
		||||
 * Write 0xfee00000 to MSI-X table offset 0
 | 
			
		||||
 * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x70, 0xfee00000, 4) // write data
 | 
			
		||||
 * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x8001f000, 4) // do write
 | 
			
		||||
 * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x1f000 // complete
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
static uint64_t vfio_rtl8168_window_quirk_read(void *opaque,
 | 
			
		||||
                                               hwaddr addr, unsigned size)
 | 
			
		||||
{
 | 
			
		||||
    VFIOQuirk *quirk = opaque;
 | 
			
		||||
    VFIODevice *vdev = quirk->vdev;
 | 
			
		||||
 | 
			
		||||
    switch (addr) {
 | 
			
		||||
    case 4: /* address */
 | 
			
		||||
        if (quirk->data.flags) {
 | 
			
		||||
            DPRINTF("%s fake read(%04x:%02x:%02x.%d)\n",
 | 
			
		||||
                    memory_region_name(&quirk->mem), vdev->host.domain,
 | 
			
		||||
                    vdev->host.bus, vdev->host.slot, vdev->host.function);
 | 
			
		||||
 | 
			
		||||
            return quirk->data.address_match ^ 0x10000000U;
 | 
			
		||||
        }
 | 
			
		||||
        break;
 | 
			
		||||
    case 0: /* data */
 | 
			
		||||
        if (quirk->data.flags) {
 | 
			
		||||
            uint64_t val;
 | 
			
		||||
 | 
			
		||||
            DPRINTF("%s MSI-X table read(%04x:%02x:%02x.%d)\n",
 | 
			
		||||
                    memory_region_name(&quirk->mem), vdev->host.domain,
 | 
			
		||||
                    vdev->host.bus, vdev->host.slot, vdev->host.function);
 | 
			
		||||
 | 
			
		||||
            if (!(vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) {
 | 
			
		||||
                return 0;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            io_mem_read(&vdev->pdev.msix_table_mmio,
 | 
			
		||||
                        (hwaddr)(quirk->data.address_match & 0xfff),
 | 
			
		||||
                        &val, size);
 | 
			
		||||
            return val;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    DPRINTF("%s direct read(%04x:%02x:%02x.%d)\n",
 | 
			
		||||
            memory_region_name(&quirk->mem), vdev->host.domain,
 | 
			
		||||
            vdev->host.bus, vdev->host.slot, vdev->host.function);
 | 
			
		||||
 | 
			
		||||
    return vfio_bar_read(&vdev->bars[quirk->data.bar], addr + 0x70, size);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void vfio_rtl8168_window_quirk_write(void *opaque, hwaddr addr,
 | 
			
		||||
                                            uint64_t data, unsigned size)
 | 
			
		||||
{
 | 
			
		||||
    VFIOQuirk *quirk = opaque;
 | 
			
		||||
    VFIODevice *vdev = quirk->vdev;
 | 
			
		||||
 | 
			
		||||
    switch (addr) {
 | 
			
		||||
    case 4: /* address */
 | 
			
		||||
        if ((data & 0x7fff0000) == 0x10000) {
 | 
			
		||||
            if (data & 0x10000000U &&
 | 
			
		||||
                vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) {
 | 
			
		||||
 | 
			
		||||
                DPRINTF("%s MSI-X table write(%04x:%02x:%02x.%d)\n",
 | 
			
		||||
                        memory_region_name(&quirk->mem), vdev->host.domain,
 | 
			
		||||
                        vdev->host.bus, vdev->host.slot, vdev->host.function);
 | 
			
		||||
 | 
			
		||||
                io_mem_write(&vdev->pdev.msix_table_mmio,
 | 
			
		||||
                             (hwaddr)(quirk->data.address_match & 0xfff),
 | 
			
		||||
                             data, size);
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            quirk->data.flags = 1;
 | 
			
		||||
            quirk->data.address_match = data;
 | 
			
		||||
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
        quirk->data.flags = 0;
 | 
			
		||||
        break;
 | 
			
		||||
    case 0: /* data */
 | 
			
		||||
        quirk->data.address_mask = data;
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    DPRINTF("%s direct write(%04x:%02x:%02x.%d)\n",
 | 
			
		||||
            memory_region_name(&quirk->mem), vdev->host.domain,
 | 
			
		||||
            vdev->host.bus, vdev->host.slot, vdev->host.function);
 | 
			
		||||
 | 
			
		||||
    vfio_bar_write(&vdev->bars[quirk->data.bar], addr + 0x70, data, size);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static const MemoryRegionOps vfio_rtl8168_window_quirk = {
 | 
			
		||||
    .read = vfio_rtl8168_window_quirk_read,
 | 
			
		||||
    .write = vfio_rtl8168_window_quirk_write,
 | 
			
		||||
    .valid = {
 | 
			
		||||
        .min_access_size = 4,
 | 
			
		||||
        .max_access_size = 4,
 | 
			
		||||
        .unaligned = false,
 | 
			
		||||
    },
 | 
			
		||||
    .endianness = DEVICE_LITTLE_ENDIAN,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static void vfio_probe_rtl8168_bar2_window_quirk(VFIODevice *vdev, int nr)
 | 
			
		||||
{
 | 
			
		||||
    PCIDevice *pdev = &vdev->pdev;
 | 
			
		||||
    VFIOQuirk *quirk;
 | 
			
		||||
 | 
			
		||||
    if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_REALTEK ||
 | 
			
		||||
        pci_get_word(pdev->config + PCI_DEVICE_ID) != 0x8168 || nr != 2) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    quirk = g_malloc0(sizeof(*quirk));
 | 
			
		||||
    quirk->vdev = vdev;
 | 
			
		||||
    quirk->data.bar = nr;
 | 
			
		||||
 | 
			
		||||
    memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_rtl8168_window_quirk,
 | 
			
		||||
                          quirk, "vfio-rtl8168-window-quirk", 8);
 | 
			
		||||
    memory_region_add_subregion_overlap(&vdev->bars[nr].mem,
 | 
			
		||||
                                        0x70, &quirk->mem, 1);
 | 
			
		||||
 | 
			
		||||
    QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
 | 
			
		||||
 | 
			
		||||
    DPRINTF("Enabled RTL8168 BAR2 window quirk for device %04x:%02x:%02x.%x\n",
 | 
			
		||||
            vdev->host.domain, vdev->host.bus, vdev->host.slot,
 | 
			
		||||
            vdev->host.function);
 | 
			
		||||
}
 | 
			
		||||
/*
 | 
			
		||||
 * Trap the BAR2 MMIO window to config space as well.
 | 
			
		||||
 */
 | 
			
		||||
@ -2071,6 +2229,7 @@ static void vfio_bar_quirk_setup(VFIODevice *vdev, int nr)
 | 
			
		||||
    vfio_probe_nvidia_bar5_window_quirk(vdev, nr);
 | 
			
		||||
    vfio_probe_nvidia_bar0_88000_quirk(vdev, nr);
 | 
			
		||||
    vfio_probe_nvidia_bar0_1800_quirk(vdev, nr);
 | 
			
		||||
    vfio_probe_rtl8168_bar2_window_quirk(vdev, nr);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void vfio_bar_quirk_teardown(VFIODevice *vdev, int nr)
 | 
			
		||||
@ -2232,7 +2391,8 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
 | 
			
		||||
 | 
			
		||||
static bool vfio_listener_skipped_section(MemoryRegionSection *section)
 | 
			
		||||
{
 | 
			
		||||
    return !memory_region_is_ram(section->mr) ||
 | 
			
		||||
    return (!memory_region_is_ram(section->mr) &&
 | 
			
		||||
            !memory_region_is_iommu(section->mr)) ||
 | 
			
		||||
           /*
 | 
			
		||||
            * Sizing an enabled 64-bit BAR can cause spurious mappings to
 | 
			
		||||
            * addresses in the upper part of the 64-bit address space.  These
 | 
			
		||||
@ -2242,17 +2402,75 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
 | 
			
		||||
           section->offset_within_address_space & (1ULL << 63);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void vfio_iommu_map_notify(Notifier *n, void *data)
 | 
			
		||||
{
 | 
			
		||||
    VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
 | 
			
		||||
    VFIOContainer *container = giommu->container;
 | 
			
		||||
    IOMMUTLBEntry *iotlb = data;
 | 
			
		||||
    MemoryRegion *mr;
 | 
			
		||||
    hwaddr xlat;
 | 
			
		||||
    hwaddr len = iotlb->addr_mask + 1;
 | 
			
		||||
    void *vaddr;
 | 
			
		||||
    int ret;
 | 
			
		||||
 | 
			
		||||
    DPRINTF("iommu map @ %"HWADDR_PRIx" - %"HWADDR_PRIx"\n",
 | 
			
		||||
            iotlb->iova, iotlb->iova + iotlb->addr_mask);
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
     * The IOMMU TLB entry we have just covers translation through
 | 
			
		||||
     * this IOMMU to its immediate target.  We need to translate
 | 
			
		||||
     * it the rest of the way through to memory.
 | 
			
		||||
     */
 | 
			
		||||
    mr = address_space_translate(&address_space_memory,
 | 
			
		||||
                                 iotlb->translated_addr,
 | 
			
		||||
                                 &xlat, &len, iotlb->perm & IOMMU_WO);
 | 
			
		||||
    if (!memory_region_is_ram(mr)) {
 | 
			
		||||
        DPRINTF("iommu map to non memory area %"HWADDR_PRIx"\n",
 | 
			
		||||
                xlat);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
    /*
 | 
			
		||||
     * Translation truncates length to the IOMMU page size,
 | 
			
		||||
     * check that it did not truncate too much.
 | 
			
		||||
     */
 | 
			
		||||
    if (len & iotlb->addr_mask) {
 | 
			
		||||
        DPRINTF("iommu has granularity incompatible with target AS\n");
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (iotlb->perm != IOMMU_NONE) {
 | 
			
		||||
        vaddr = memory_region_get_ram_ptr(mr) + xlat;
 | 
			
		||||
 | 
			
		||||
        ret = vfio_dma_map(container, iotlb->iova,
 | 
			
		||||
                           iotlb->addr_mask + 1, vaddr,
 | 
			
		||||
                           !(iotlb->perm & IOMMU_WO) || mr->readonly);
 | 
			
		||||
        if (ret) {
 | 
			
		||||
            error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
 | 
			
		||||
                         "0x%"HWADDR_PRIx", %p) = %d (%m)",
 | 
			
		||||
                         container, iotlb->iova,
 | 
			
		||||
                         iotlb->addr_mask + 1, vaddr, ret);
 | 
			
		||||
        }
 | 
			
		||||
    } else {
 | 
			
		||||
        ret = vfio_dma_unmap(container, iotlb->iova, iotlb->addr_mask + 1);
 | 
			
		||||
        if (ret) {
 | 
			
		||||
            error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
 | 
			
		||||
                         "0x%"HWADDR_PRIx") = %d (%m)",
 | 
			
		||||
                         container, iotlb->iova,
 | 
			
		||||
                         iotlb->addr_mask + 1, ret);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void vfio_listener_region_add(MemoryListener *listener,
 | 
			
		||||
                                     MemoryRegionSection *section)
 | 
			
		||||
{
 | 
			
		||||
    VFIOContainer *container = container_of(listener, VFIOContainer,
 | 
			
		||||
                                            iommu_data.type1.listener);
 | 
			
		||||
    hwaddr iova, end;
 | 
			
		||||
    Int128 llend;
 | 
			
		||||
    void *vaddr;
 | 
			
		||||
    int ret;
 | 
			
		||||
 | 
			
		||||
    assert(!memory_region_is_iommu(section->mr));
 | 
			
		||||
 | 
			
		||||
    if (vfio_listener_skipped_section(section)) {
 | 
			
		||||
        DPRINTF("SKIPPING region_add %"HWADDR_PRIx" - %"PRIx64"\n",
 | 
			
		||||
                section->offset_within_address_space,
 | 
			
		||||
@ -2268,21 +2486,65 @@ static void vfio_listener_region_add(MemoryListener *listener,
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
 | 
			
		||||
    end = (section->offset_within_address_space + int128_get64(section->size)) &
 | 
			
		||||
          TARGET_PAGE_MASK;
 | 
			
		||||
    llend = int128_make64(section->offset_within_address_space);
 | 
			
		||||
    llend = int128_add(llend, section->size);
 | 
			
		||||
    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
 | 
			
		||||
 | 
			
		||||
    if (iova >= end) {
 | 
			
		||||
    if (int128_ge(int128_make64(iova), llend)) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    memory_region_ref(section->mr);
 | 
			
		||||
 | 
			
		||||
    if (memory_region_is_iommu(section->mr)) {
 | 
			
		||||
        VFIOGuestIOMMU *giommu;
 | 
			
		||||
 | 
			
		||||
        DPRINTF("region_add [iommu] %"HWADDR_PRIx" - %"HWADDR_PRIx"\n",
 | 
			
		||||
                iova, int128_get64(int128_sub(llend, int128_one())));
 | 
			
		||||
        /*
 | 
			
		||||
         * FIXME: We should do some checking to see if the
 | 
			
		||||
         * capabilities of the host VFIO IOMMU are adequate to model
 | 
			
		||||
         * the guest IOMMU
 | 
			
		||||
         *
 | 
			
		||||
         * FIXME: For VFIO iommu types which have KVM acceleration to
 | 
			
		||||
         * avoid bouncing all map/unmaps through qemu this way, this
 | 
			
		||||
         * would be the right place to wire that up (tell the KVM
 | 
			
		||||
         * device emulation the VFIO iommu handles to use).
 | 
			
		||||
         */
 | 
			
		||||
        /*
 | 
			
		||||
         * This assumes that the guest IOMMU is empty of
 | 
			
		||||
         * mappings at this point.
 | 
			
		||||
         *
 | 
			
		||||
         * One way of doing this is:
 | 
			
		||||
         * 1. Avoid sharing IOMMUs between emulated devices or different
 | 
			
		||||
         * IOMMU groups.
 | 
			
		||||
         * 2. Implement VFIO_IOMMU_ENABLE in the host kernel to fail if
 | 
			
		||||
         * there are some mappings in IOMMU.
 | 
			
		||||
         *
 | 
			
		||||
         * VFIO on SPAPR does that. Other IOMMU models may do that different,
 | 
			
		||||
         * they must make sure there are no existing mappings or
 | 
			
		||||
         * loop through existing mappings to map them into VFIO.
 | 
			
		||||
         */
 | 
			
		||||
        giommu = g_malloc0(sizeof(*giommu));
 | 
			
		||||
        giommu->iommu = section->mr;
 | 
			
		||||
        giommu->container = container;
 | 
			
		||||
        giommu->n.notify = vfio_iommu_map_notify;
 | 
			
		||||
        QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
 | 
			
		||||
        memory_region_register_iommu_notifier(giommu->iommu, &giommu->n);
 | 
			
		||||
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /* Here we assume that memory_region_is_ram(section->mr)==true */
 | 
			
		||||
 | 
			
		||||
    end = int128_get64(llend);
 | 
			
		||||
    vaddr = memory_region_get_ram_ptr(section->mr) +
 | 
			
		||||
            section->offset_within_region +
 | 
			
		||||
            (iova - section->offset_within_address_space);
 | 
			
		||||
 | 
			
		||||
    DPRINTF("region_add %"HWADDR_PRIx" - %"HWADDR_PRIx" [%p]\n",
 | 
			
		||||
    DPRINTF("region_add [ram] %"HWADDR_PRIx" - %"HWADDR_PRIx" [%p]\n",
 | 
			
		||||
            iova, end - 1, vaddr);
 | 
			
		||||
 | 
			
		||||
    memory_region_ref(section->mr);
 | 
			
		||||
    ret = vfio_dma_map(container, iova, end - iova, vaddr, section->readonly);
 | 
			
		||||
    if (ret) {
 | 
			
		||||
        error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
 | 
			
		||||
@ -2326,6 +2588,27 @@ static void vfio_listener_region_del(MemoryListener *listener,
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (memory_region_is_iommu(section->mr)) {
 | 
			
		||||
        VFIOGuestIOMMU *giommu;
 | 
			
		||||
 | 
			
		||||
        QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
 | 
			
		||||
            if (giommu->iommu == section->mr) {
 | 
			
		||||
                memory_region_unregister_iommu_notifier(&giommu->n);
 | 
			
		||||
                QLIST_REMOVE(giommu, giommu_next);
 | 
			
		||||
                g_free(giommu);
 | 
			
		||||
                break;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        /*
 | 
			
		||||
         * FIXME: We assume the one big unmap below is adequate to
 | 
			
		||||
         * remove any individual page mappings in the IOMMU which
 | 
			
		||||
         * might have been copied into VFIO. This works for a page table
 | 
			
		||||
         * based IOMMU where a big unmap flattens a large range of IO-PTEs.
 | 
			
		||||
         * That may not be true for all IOMMU types.
 | 
			
		||||
         */
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
 | 
			
		||||
    end = (section->offset_within_address_space + int128_get64(section->size)) &
 | 
			
		||||
          TARGET_PAGE_MASK;
 | 
			
		||||
@ -3274,16 +3557,43 @@ static void vfio_kvm_device_del_group(VFIOGroup *group)
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int vfio_connect_container(VFIOGroup *group)
 | 
			
		||||
static VFIOAddressSpace *vfio_get_address_space(AddressSpace *as)
 | 
			
		||||
{
 | 
			
		||||
    VFIOAddressSpace *space;
 | 
			
		||||
 | 
			
		||||
    QLIST_FOREACH(space, &vfio_address_spaces, list) {
 | 
			
		||||
        if (space->as == as) {
 | 
			
		||||
            return space;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /* No suitable VFIOAddressSpace, create a new one */
 | 
			
		||||
    space = g_malloc0(sizeof(*space));
 | 
			
		||||
    space->as = as;
 | 
			
		||||
    QLIST_INIT(&space->containers);
 | 
			
		||||
 | 
			
		||||
    QLIST_INSERT_HEAD(&vfio_address_spaces, space, list);
 | 
			
		||||
 | 
			
		||||
    return space;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void vfio_put_address_space(VFIOAddressSpace *space)
 | 
			
		||||
{
 | 
			
		||||
    if (QLIST_EMPTY(&space->containers)) {
 | 
			
		||||
        QLIST_REMOVE(space, list);
 | 
			
		||||
        g_free(space);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
 | 
			
		||||
{
 | 
			
		||||
    VFIOContainer *container;
 | 
			
		||||
    int ret, fd;
 | 
			
		||||
    VFIOAddressSpace *space;
 | 
			
		||||
 | 
			
		||||
    if (group->container) {
 | 
			
		||||
        return 0;
 | 
			
		||||
    }
 | 
			
		||||
    space = vfio_get_address_space(as);
 | 
			
		||||
 | 
			
		||||
    QLIST_FOREACH(container, &container_list, next) {
 | 
			
		||||
    QLIST_FOREACH(container, &space->containers, next) {
 | 
			
		||||
        if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
 | 
			
		||||
            group->container = container;
 | 
			
		||||
            QLIST_INSERT_HEAD(&container->group_list, group, container_next);
 | 
			
		||||
@ -3294,35 +3604,35 @@ static int vfio_connect_container(VFIOGroup *group)
 | 
			
		||||
    fd = qemu_open("/dev/vfio/vfio", O_RDWR);
 | 
			
		||||
    if (fd < 0) {
 | 
			
		||||
        error_report("vfio: failed to open /dev/vfio/vfio: %m");
 | 
			
		||||
        return -errno;
 | 
			
		||||
        ret = -errno;
 | 
			
		||||
        goto put_space_exit;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ret = ioctl(fd, VFIO_GET_API_VERSION);
 | 
			
		||||
    if (ret != VFIO_API_VERSION) {
 | 
			
		||||
        error_report("vfio: supported vfio version: %d, "
 | 
			
		||||
                     "reported version: %d", VFIO_API_VERSION, ret);
 | 
			
		||||
        close(fd);
 | 
			
		||||
        return -EINVAL;
 | 
			
		||||
        ret = -EINVAL;
 | 
			
		||||
        goto close_fd_exit;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    container = g_malloc0(sizeof(*container));
 | 
			
		||||
    container->space = space;
 | 
			
		||||
    container->fd = fd;
 | 
			
		||||
 | 
			
		||||
    if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) {
 | 
			
		||||
        ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
 | 
			
		||||
        if (ret) {
 | 
			
		||||
            error_report("vfio: failed to set group container: %m");
 | 
			
		||||
            g_free(container);
 | 
			
		||||
            close(fd);
 | 
			
		||||
            return -errno;
 | 
			
		||||
            ret = -errno;
 | 
			
		||||
            goto free_container_exit;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        ret = ioctl(fd, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU);
 | 
			
		||||
        if (ret) {
 | 
			
		||||
            error_report("vfio: failed to set iommu for container: %m");
 | 
			
		||||
            g_free(container);
 | 
			
		||||
            close(fd);
 | 
			
		||||
            return -errno;
 | 
			
		||||
            ret = -errno;
 | 
			
		||||
            goto free_container_exit;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        container->iommu_data.type1.listener = vfio_memory_listener;
 | 
			
		||||
@ -3333,29 +3643,39 @@ static int vfio_connect_container(VFIOGroup *group)
 | 
			
		||||
 | 
			
		||||
        if (container->iommu_data.type1.error) {
 | 
			
		||||
            ret = container->iommu_data.type1.error;
 | 
			
		||||
            vfio_listener_release(container);
 | 
			
		||||
            g_free(container);
 | 
			
		||||
            close(fd);
 | 
			
		||||
            error_report("vfio: memory listener initialization failed for container");
 | 
			
		||||
            return ret;
 | 
			
		||||
            goto listener_release_exit;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        container->iommu_data.type1.initialized = true;
 | 
			
		||||
 | 
			
		||||
    } else {
 | 
			
		||||
        error_report("vfio: No available IOMMU models");
 | 
			
		||||
        g_free(container);
 | 
			
		||||
        close(fd);
 | 
			
		||||
        return -EINVAL;
 | 
			
		||||
        ret = -EINVAL;
 | 
			
		||||
        goto free_container_exit;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    QLIST_INIT(&container->group_list);
 | 
			
		||||
    QLIST_INSERT_HEAD(&container_list, container, next);
 | 
			
		||||
    QLIST_INSERT_HEAD(&space->containers, container, next);
 | 
			
		||||
 | 
			
		||||
    group->container = container;
 | 
			
		||||
    QLIST_INSERT_HEAD(&container->group_list, group, container_next);
 | 
			
		||||
 | 
			
		||||
    return 0;
 | 
			
		||||
 | 
			
		||||
listener_release_exit:
 | 
			
		||||
    vfio_listener_release(container);
 | 
			
		||||
 | 
			
		||||
free_container_exit:
 | 
			
		||||
    g_free(container);
 | 
			
		||||
 | 
			
		||||
close_fd_exit:
 | 
			
		||||
    close(fd);
 | 
			
		||||
 | 
			
		||||
put_space_exit:
 | 
			
		||||
    vfio_put_address_space(space);
 | 
			
		||||
 | 
			
		||||
    return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void vfio_disconnect_container(VFIOGroup *group)
 | 
			
		||||
@ -3371,6 +3691,8 @@ static void vfio_disconnect_container(VFIOGroup *group)
 | 
			
		||||
    group->container = NULL;
 | 
			
		||||
 | 
			
		||||
    if (QLIST_EMPTY(&container->group_list)) {
 | 
			
		||||
        VFIOAddressSpace *space = container->space;
 | 
			
		||||
 | 
			
		||||
        if (container->iommu_data.release) {
 | 
			
		||||
            container->iommu_data.release(container);
 | 
			
		||||
        }
 | 
			
		||||
@ -3378,10 +3700,12 @@ static void vfio_disconnect_container(VFIOGroup *group)
 | 
			
		||||
        DPRINTF("vfio_disconnect_container: close container->fd\n");
 | 
			
		||||
        close(container->fd);
 | 
			
		||||
        g_free(container);
 | 
			
		||||
 | 
			
		||||
        vfio_put_address_space(space);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static VFIOGroup *vfio_get_group(int groupid)
 | 
			
		||||
static VFIOGroup *vfio_get_group(int groupid, AddressSpace *as)
 | 
			
		||||
{
 | 
			
		||||
    VFIOGroup *group;
 | 
			
		||||
    char path[32];
 | 
			
		||||
@ -3389,7 +3713,14 @@ static VFIOGroup *vfio_get_group(int groupid)
 | 
			
		||||
 | 
			
		||||
    QLIST_FOREACH(group, &group_list, next) {
 | 
			
		||||
        if (group->groupid == groupid) {
 | 
			
		||||
            return group;
 | 
			
		||||
            /* Found it.  Now is it already in the right context? */
 | 
			
		||||
            if (group->container->space->as == as) {
 | 
			
		||||
                return group;
 | 
			
		||||
            } else {
 | 
			
		||||
                error_report("vfio: group %d used in multiple address spaces",
 | 
			
		||||
                             group->groupid);
 | 
			
		||||
                return NULL;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@ -3399,34 +3730,27 @@ static VFIOGroup *vfio_get_group(int groupid)
 | 
			
		||||
    group->fd = qemu_open(path, O_RDWR);
 | 
			
		||||
    if (group->fd < 0) {
 | 
			
		||||
        error_report("vfio: error opening %s: %m", path);
 | 
			
		||||
        g_free(group);
 | 
			
		||||
        return NULL;
 | 
			
		||||
        goto free_group_exit;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &status)) {
 | 
			
		||||
        error_report("vfio: error getting group status: %m");
 | 
			
		||||
        close(group->fd);
 | 
			
		||||
        g_free(group);
 | 
			
		||||
        return NULL;
 | 
			
		||||
        goto close_fd_exit;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
 | 
			
		||||
        error_report("vfio: error, group %d is not viable, please ensure "
 | 
			
		||||
                     "all devices within the iommu_group are bound to their "
 | 
			
		||||
                     "vfio bus driver.", groupid);
 | 
			
		||||
        close(group->fd);
 | 
			
		||||
        g_free(group);
 | 
			
		||||
        return NULL;
 | 
			
		||||
        goto close_fd_exit;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    group->groupid = groupid;
 | 
			
		||||
    QLIST_INIT(&group->device_list);
 | 
			
		||||
 | 
			
		||||
    if (vfio_connect_container(group)) {
 | 
			
		||||
    if (vfio_connect_container(group, as)) {
 | 
			
		||||
        error_report("vfio: failed to setup container for group %d", groupid);
 | 
			
		||||
        close(group->fd);
 | 
			
		||||
        g_free(group);
 | 
			
		||||
        return NULL;
 | 
			
		||||
        goto close_fd_exit;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (QLIST_EMPTY(&group_list)) {
 | 
			
		||||
@ -3438,6 +3762,14 @@ static VFIOGroup *vfio_get_group(int groupid)
 | 
			
		||||
    vfio_kvm_device_add_group(group);
 | 
			
		||||
 | 
			
		||||
    return group;
 | 
			
		||||
 | 
			
		||||
close_fd_exit:
 | 
			
		||||
    close(group->fd);
 | 
			
		||||
 | 
			
		||||
free_group_exit:
 | 
			
		||||
    g_free(group);
 | 
			
		||||
 | 
			
		||||
    return NULL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void vfio_put_group(VFIOGroup *group)
 | 
			
		||||
@ -3768,7 +4100,7 @@ static int vfio_initfn(PCIDevice *pdev)
 | 
			
		||||
    DPRINTF("%s(%04x:%02x:%02x.%x) group %d\n", __func__, vdev->host.domain,
 | 
			
		||||
            vdev->host.bus, vdev->host.slot, vdev->host.function, groupid);
 | 
			
		||||
 | 
			
		||||
    group = vfio_get_group(groupid);
 | 
			
		||||
    group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev));
 | 
			
		||||
    if (!group) {
 | 
			
		||||
        error_report("vfio: failed to get group %d", groupid);
 | 
			
		||||
        return -ENOENT;
 | 
			
		||||
 | 
			
		||||
@ -38,6 +38,11 @@ static inline Int128 int128_2_64(void)
 | 
			
		||||
    return (Int128) { 0, 1 };
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline Int128 int128_exts64(int64_t a)
 | 
			
		||||
{
 | 
			
		||||
    return (Int128) { .lo = a, .hi = (a < 0) ? -1 : 0 };
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline Int128 int128_and(Int128 a, Int128 b)
 | 
			
		||||
{
 | 
			
		||||
    return (Int128) { a.lo & b.lo, a.hi & b.hi };
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										7
									
								
								memory.c
									
									
									
									
									
								
							
							
						
						
									
										7
									
								
								memory.c
									
									
									
									
									
								
							@ -1722,12 +1722,19 @@ void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name)
 | 
			
		||||
 | 
			
		||||
void address_space_destroy(AddressSpace *as)
 | 
			
		||||
{
 | 
			
		||||
    MemoryListener *listener;
 | 
			
		||||
 | 
			
		||||
    /* Flush out anything from MemoryListeners listening in on this */
 | 
			
		||||
    memory_region_transaction_begin();
 | 
			
		||||
    as->root = NULL;
 | 
			
		||||
    memory_region_transaction_commit();
 | 
			
		||||
    QTAILQ_REMOVE(&address_spaces, as, address_spaces_link);
 | 
			
		||||
    address_space_destroy_dispatch(as);
 | 
			
		||||
 | 
			
		||||
    QTAILQ_FOREACH(listener, &memory_listeners, link) {
 | 
			
		||||
        assert(listener->address_space_filter != as);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    flatview_unref(as->current_map);
 | 
			
		||||
    g_free(as->name);
 | 
			
		||||
    g_free(as->ioeventfds);
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user