 2b1dbd0d72
			
		
	
	
		2b1dbd0d72
		
	
	
	
	
		
			
			With vfio ioeventfd support, we can program vfio-pci to perform a specified BAR write when an eventfd is triggered. This allows the KVM ioeventfd to be wired directly to vfio-pci, entirely avoiding userspace handling for these events. On the same micro-benchmark where the ioeventfd got us to almost 90% of performance versus disabling the GeForce quirks, this gets us to within 95%. Reviewed-by: Peter Xu <peterx@redhat.com> Reviewed-by: Eric Auger <eric.auger@redhat.com> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
		
			
				
	
	
		
			202 lines
		
	
	
		
			6.1 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			202 lines
		
	
	
		
			6.1 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * vfio based device assignment support - PCI devices
 | |
|  *
 | |
|  * Copyright Red Hat, Inc. 2012-2015
 | |
|  *
 | |
|  * Authors:
 | |
|  *  Alex Williamson <alex.williamson@redhat.com>
 | |
|  *
 | |
|  * This work is licensed under the terms of the GNU GPL, version 2.  See
 | |
|  * the COPYING file in the top-level directory.
 | |
|  */
 | |
| #ifndef HW_VFIO_VFIO_PCI_H
 | |
| #define HW_VFIO_VFIO_PCI_H
 | |
| 
 | |
| #include "qemu-common.h"
 | |
| #include "exec/memory.h"
 | |
| #include "hw/pci/pci.h"
 | |
| #include "hw/vfio/vfio-common.h"
 | |
| #include "qemu/event_notifier.h"
 | |
| #include "qemu/queue.h"
 | |
| #include "qemu/timer.h"
 | |
| 
 | |
| #define PCI_ANY_ID (~0)
 | |
| 
 | |
| struct VFIOPCIDevice;
 | |
| 
 | |
| typedef struct VFIOIOEventFD {
 | |
|     QLIST_ENTRY(VFIOIOEventFD) next;
 | |
|     MemoryRegion *mr;
 | |
|     hwaddr addr;
 | |
|     unsigned size;
 | |
|     uint64_t data;
 | |
|     EventNotifier e;
 | |
|     VFIORegion *region;
 | |
|     hwaddr region_addr;
 | |
|     bool dynamic; /* Added runtime, removed on device reset */
 | |
|     bool vfio;
 | |
| } VFIOIOEventFD;
 | |
| 
 | |
| typedef struct VFIOQuirk {
 | |
|     QLIST_ENTRY(VFIOQuirk) next;
 | |
|     void *data;
 | |
|     QLIST_HEAD(, VFIOIOEventFD) ioeventfds;
 | |
|     int nr_mem;
 | |
|     MemoryRegion *mem;
 | |
|     void (*reset)(struct VFIOPCIDevice *vdev, struct VFIOQuirk *quirk);
 | |
| } VFIOQuirk;
 | |
| 
 | |
| typedef struct VFIOBAR {
 | |
|     VFIORegion region;
 | |
|     MemoryRegion *mr;
 | |
|     size_t size;
 | |
|     uint8_t type;
 | |
|     bool ioport;
 | |
|     bool mem64;
 | |
|     QLIST_HEAD(, VFIOQuirk) quirks;
 | |
| } VFIOBAR;
 | |
| 
 | |
| typedef struct VFIOVGARegion {
 | |
|     MemoryRegion mem;
 | |
|     off_t offset;
 | |
|     int nr;
 | |
|     QLIST_HEAD(, VFIOQuirk) quirks;
 | |
| } VFIOVGARegion;
 | |
| 
 | |
| typedef struct VFIOVGA {
 | |
|     off_t fd_offset;
 | |
|     int fd;
 | |
|     VFIOVGARegion region[QEMU_PCI_VGA_NUM_REGIONS];
 | |
| } VFIOVGA;
 | |
| 
 | |
| typedef struct VFIOINTx {
 | |
|     bool pending; /* interrupt pending */
 | |
|     bool kvm_accel; /* set when QEMU bypass through KVM enabled */
 | |
|     uint8_t pin; /* which pin to pull for qemu_set_irq */
 | |
|     EventNotifier interrupt; /* eventfd triggered on interrupt */
 | |
|     EventNotifier unmask; /* eventfd for unmask on QEMU bypass */
 | |
|     PCIINTxRoute route; /* routing info for QEMU bypass */
 | |
|     uint32_t mmap_timeout; /* delay to re-enable mmaps after interrupt */
 | |
|     QEMUTimer *mmap_timer; /* enable mmaps after periods w/o interrupts */
 | |
| } VFIOINTx;
 | |
| 
 | |
| typedef struct VFIOMSIVector {
 | |
|     /*
 | |
|      * Two interrupt paths are configured per vector.  The first, is only used
 | |
|      * for interrupts injected via QEMU.  This is typically the non-accel path,
 | |
|      * but may also be used when we want QEMU to handle masking and pending
 | |
|      * bits.  The KVM path bypasses QEMU and is therefore higher performance,
 | |
|      * but requires masking at the device.  virq is used to track the MSI route
 | |
|      * through KVM, thus kvm_interrupt is only available when virq is set to a
 | |
|      * valid (>= 0) value.
 | |
|      */
 | |
|     EventNotifier interrupt;
 | |
|     EventNotifier kvm_interrupt;
 | |
|     struct VFIOPCIDevice *vdev; /* back pointer to device */
 | |
|     int virq;
 | |
|     bool use;
 | |
| } VFIOMSIVector;
 | |
| 
 | |
| enum {
 | |
|     VFIO_INT_NONE = 0,
 | |
|     VFIO_INT_INTx = 1,
 | |
|     VFIO_INT_MSI  = 2,
 | |
|     VFIO_INT_MSIX = 3,
 | |
| };
 | |
| 
 | |
| /* Cache of MSI-X setup */
 | |
| typedef struct VFIOMSIXInfo {
 | |
|     uint8_t table_bar;
 | |
|     uint8_t pba_bar;
 | |
|     uint16_t entries;
 | |
|     uint32_t table_offset;
 | |
|     uint32_t pba_offset;
 | |
|     unsigned long *pending;
 | |
| } VFIOMSIXInfo;
 | |
| 
 | |
| typedef struct VFIOPCIDevice {
 | |
|     PCIDevice pdev;
 | |
|     VFIODevice vbasedev;
 | |
|     VFIOINTx intx;
 | |
|     unsigned int config_size;
 | |
|     uint8_t *emulated_config_bits; /* QEMU emulated bits, little-endian */
 | |
|     off_t config_offset; /* Offset of config space region within device fd */
 | |
|     unsigned int rom_size;
 | |
|     off_t rom_offset; /* Offset of ROM region within device fd */
 | |
|     void *rom;
 | |
|     int msi_cap_size;
 | |
|     VFIOMSIVector *msi_vectors;
 | |
|     VFIOMSIXInfo *msix;
 | |
|     int nr_vectors; /* Number of MSI/MSIX vectors currently in use */
 | |
|     int interrupt; /* Current interrupt type */
 | |
|     VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
 | |
|     VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */
 | |
|     void *igd_opregion;
 | |
|     PCIHostDeviceAddress host;
 | |
|     EventNotifier err_notifier;
 | |
|     EventNotifier req_notifier;
 | |
|     int (*resetfn)(struct VFIOPCIDevice *);
 | |
|     uint32_t vendor_id;
 | |
|     uint32_t device_id;
 | |
|     uint32_t sub_vendor_id;
 | |
|     uint32_t sub_device_id;
 | |
|     uint32_t features;
 | |
| #define VFIO_FEATURE_ENABLE_VGA_BIT 0
 | |
| #define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
 | |
| #define VFIO_FEATURE_ENABLE_REQ_BIT 1
 | |
| #define VFIO_FEATURE_ENABLE_REQ (1 << VFIO_FEATURE_ENABLE_REQ_BIT)
 | |
| #define VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT 2
 | |
| #define VFIO_FEATURE_ENABLE_IGD_OPREGION \
 | |
|                                 (1 << VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT)
 | |
|     OnOffAuto display;
 | |
|     int32_t bootindex;
 | |
|     uint32_t igd_gms;
 | |
|     OffAutoPCIBAR msix_relo;
 | |
|     uint8_t pm_cap;
 | |
|     uint8_t nv_gpudirect_clique;
 | |
|     bool pci_aer;
 | |
|     bool req_enabled;
 | |
|     bool has_flr;
 | |
|     bool has_pm_reset;
 | |
|     bool rom_read_failed;
 | |
|     bool no_kvm_intx;
 | |
|     bool no_kvm_msi;
 | |
|     bool no_kvm_msix;
 | |
|     bool no_geforce_quirks;
 | |
|     bool no_kvm_ioeventfd;
 | |
|     bool no_vfio_ioeventfd;
 | |
|     VFIODisplay *dpy;
 | |
| } VFIOPCIDevice;
 | |
| 
 | |
| uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
 | |
| void vfio_pci_write_config(PCIDevice *pdev,
 | |
|                            uint32_t addr, uint32_t val, int len);
 | |
| 
 | |
| uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size);
 | |
| void vfio_vga_write(void *opaque, hwaddr addr, uint64_t data, unsigned size);
 | |
| 
 | |
| bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev);
 | |
| void vfio_vga_quirk_setup(VFIOPCIDevice *vdev);
 | |
| void vfio_vga_quirk_exit(VFIOPCIDevice *vdev);
 | |
| void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev);
 | |
| void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr);
 | |
| void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr);
 | |
| void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr);
 | |
| void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev);
 | |
| int vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp);
 | |
| void vfio_quirk_reset(VFIOPCIDevice *vdev);
 | |
| 
 | |
| extern const PropertyInfo qdev_prop_nv_gpudirect_clique;
 | |
| 
 | |
| int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp);
 | |
| 
 | |
| int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
 | |
|                                struct vfio_region_info *info,
 | |
|                                Error **errp);
 | |
| 
 | |
| void vfio_display_reset(VFIOPCIDevice *vdev);
 | |
| int vfio_display_probe(VFIOPCIDevice *vdev, Error **errp);
 | |
| void vfio_display_finalize(VFIOPCIDevice *vdev);
 | |
| 
 | |
| #endif /* HW_VFIO_VFIO_PCI_H */
 |