 2ebfd1c4ac
			
		
	
	
		2ebfd1c4ac
		
	
	
	
	
		
			
			Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-Id: <20231221031652.119827-30-richard.henderson@linaro.org>
		
			
				
	
	
		
			2687 lines
		
	
	
		
			74 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			2687 lines
		
	
	
		
			74 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * QEMU Hyper-V VMBus
 | |
|  *
 | |
|  * Copyright (c) 2017-2018 Virtuozzo International GmbH.
 | |
|  *
 | |
|  * This work is licensed under the terms of the GNU GPL, version 2 or later.
 | |
|  * See the COPYING file in the top-level directory.
 | |
|  */
 | |
| 
 | |
| #include "qemu/osdep.h"
 | |
| #include "qemu/error-report.h"
 | |
| #include "qemu/main-loop.h"
 | |
| #include "qapi/error.h"
 | |
| #include "migration/vmstate.h"
 | |
| #include "hw/qdev-properties.h"
 | |
| #include "hw/qdev-properties-system.h"
 | |
| #include "hw/hyperv/hyperv.h"
 | |
| #include "hw/hyperv/vmbus.h"
 | |
| #include "hw/hyperv/vmbus-bridge.h"
 | |
| #include "hw/sysbus.h"
 | |
| #include "cpu.h"
 | |
| #include "trace.h"
 | |
| 
 | |
| enum {
 | |
|     VMGPADL_INIT,
 | |
|     VMGPADL_ALIVE,
 | |
|     VMGPADL_TEARINGDOWN,
 | |
|     VMGPADL_TORNDOWN,
 | |
| };
 | |
| 
 | |
| struct VMBusGpadl {
 | |
|     /* GPADL id */
 | |
|     uint32_t id;
 | |
|     /* associated channel id (rudimentary?) */
 | |
|     uint32_t child_relid;
 | |
| 
 | |
|     /* number of pages in the GPADL as declared in GPADL_HEADER message */
 | |
|     uint32_t num_gfns;
 | |
|     /*
 | |
|      * Due to limited message size, GPADL may not fit fully in a single
 | |
|      * GPADL_HEADER message, and is further popluated using GPADL_BODY
 | |
|      * messages.  @seen_gfns is the number of pages seen so far; once it
 | |
|      * reaches @num_gfns, the GPADL is ready to use.
 | |
|      */
 | |
|     uint32_t seen_gfns;
 | |
|     /* array of GFNs (of size @num_gfns once allocated) */
 | |
|     uint64_t *gfns;
 | |
| 
 | |
|     uint8_t state;
 | |
| 
 | |
|     QTAILQ_ENTRY(VMBusGpadl) link;
 | |
|     VMBus *vmbus;
 | |
|     unsigned refcount;
 | |
| };
 | |
| 
 | |
| /*
 | |
|  * Wrap sequential read from / write to GPADL.
 | |
|  */
 | |
| typedef struct GpadlIter {
 | |
|     VMBusGpadl *gpadl;
 | |
|     AddressSpace *as;
 | |
|     DMADirection dir;
 | |
|     /* offset into GPADL where the next i/o will be performed */
 | |
|     uint32_t off;
 | |
|     /*
 | |
|      * Cached mapping of the currently accessed page, up to page boundary.
 | |
|      * Updated lazily on i/o.
 | |
|      * Note: MemoryRegionCache can not be used here because pages in the GPADL
 | |
|      * are non-contiguous and may belong to different memory regions.
 | |
|      */
 | |
|     void *map;
 | |
|     /* offset after last i/o (i.e. not affected by seek) */
 | |
|     uint32_t last_off;
 | |
|     /*
 | |
|      * Indicator that the iterator is active and may have a cached mapping.
 | |
|      * Allows to enforce bracketing of all i/o (which may create cached
 | |
|      * mappings) and thus exclude mapping leaks.
 | |
|      */
 | |
|     bool active;
 | |
| } GpadlIter;
 | |
| 
 | |
| /*
 | |
|  * Ring buffer.  There are two of them, sitting in the same GPADL, for each
 | |
|  * channel.
 | |
|  * Each ring buffer consists of a set of pages, with the first page containing
 | |
|  * the ring buffer header, and the remaining pages being for data packets.
 | |
|  */
 | |
| typedef struct VMBusRingBufCommon {
 | |
|     AddressSpace *as;
 | |
|     /* GPA of the ring buffer header */
 | |
|     dma_addr_t rb_addr;
 | |
|     /* start and length of the ring buffer data area within GPADL */
 | |
|     uint32_t base;
 | |
|     uint32_t len;
 | |
| 
 | |
|     GpadlIter iter;
 | |
| } VMBusRingBufCommon;
 | |
| 
 | |
| typedef struct VMBusSendRingBuf {
 | |
|     VMBusRingBufCommon common;
 | |
|     /* current write index, to be committed at the end of send */
 | |
|     uint32_t wr_idx;
 | |
|     /* write index at the start of send */
 | |
|     uint32_t last_wr_idx;
 | |
|     /* space to be requested from the guest */
 | |
|     uint32_t wanted;
 | |
|     /* space reserved for planned sends */
 | |
|     uint32_t reserved;
 | |
|     /* last seen read index */
 | |
|     uint32_t last_seen_rd_idx;
 | |
| } VMBusSendRingBuf;
 | |
| 
 | |
| typedef struct VMBusRecvRingBuf {
 | |
|     VMBusRingBufCommon common;
 | |
|     /* current read index, to be committed at the end of receive */
 | |
|     uint32_t rd_idx;
 | |
|     /* read index at the start of receive */
 | |
|     uint32_t last_rd_idx;
 | |
|     /* last seen write index */
 | |
|     uint32_t last_seen_wr_idx;
 | |
| } VMBusRecvRingBuf;
 | |
| 
 | |
| 
 | |
| enum {
 | |
|     VMOFFER_INIT,
 | |
|     VMOFFER_SENDING,
 | |
|     VMOFFER_SENT,
 | |
| };
 | |
| 
 | |
| enum {
 | |
|     VMCHAN_INIT,
 | |
|     VMCHAN_OPENING,
 | |
|     VMCHAN_OPEN,
 | |
| };
 | |
| 
 | |
| struct VMBusChannel {
 | |
|     VMBusDevice *dev;
 | |
| 
 | |
|     /* channel id */
 | |
|     uint32_t id;
 | |
|     /*
 | |
|      * subchannel index within the device; subchannel #0 is "primary" and
 | |
|      * always exists
 | |
|      */
 | |
|     uint16_t subchan_idx;
 | |
|     uint32_t open_id;
 | |
|     /* VP_INDEX of the vCPU to notify with (synthetic) interrupts */
 | |
|     uint32_t target_vp;
 | |
|     /* GPADL id to use for the ring buffers */
 | |
|     uint32_t ringbuf_gpadl;
 | |
|     /* start (in pages) of the send ring buffer within @ringbuf_gpadl */
 | |
|     uint32_t ringbuf_send_offset;
 | |
| 
 | |
|     uint8_t offer_state;
 | |
|     uint8_t state;
 | |
|     bool is_open;
 | |
| 
 | |
|     /* main device worker; copied from the device class */
 | |
|     VMBusChannelNotifyCb notify_cb;
 | |
|     /*
 | |
|      * guest->host notifications, either sent directly or dispatched via
 | |
|      * interrupt page (older VMBus)
 | |
|      */
 | |
|     EventNotifier notifier;
 | |
| 
 | |
|     VMBus *vmbus;
 | |
|     /*
 | |
|      * SINT route to signal with host->guest notifications; may be shared with
 | |
|      * the main VMBus SINT route
 | |
|      */
 | |
|     HvSintRoute *notify_route;
 | |
|     VMBusGpadl *gpadl;
 | |
| 
 | |
|     VMBusSendRingBuf send_ringbuf;
 | |
|     VMBusRecvRingBuf recv_ringbuf;
 | |
| 
 | |
|     QTAILQ_ENTRY(VMBusChannel) link;
 | |
| };
 | |
| 
 | |
| /*
 | |
|  * Hyper-V spec mandates that every message port has 16 buffers, which means
 | |
|  * that the guest can post up to this many messages without blocking.
 | |
|  * Therefore a queue for incoming messages has to be provided.
 | |
|  * For outgoing (i.e. host->guest) messages there's no queue; the VMBus just
 | |
|  * doesn't transition to a new state until the message is known to have been
 | |
|  * successfully delivered to the respective SynIC message slot.
 | |
|  */
 | |
| #define HV_MSG_QUEUE_LEN     16
 | |
| 
 | |
| /* Hyper-V devices never use channel #0.  Must be something special. */
 | |
| #define VMBUS_FIRST_CHANID      1
 | |
| /* Each channel occupies one bit within a single event page sint slot. */
 | |
| #define VMBUS_CHANID_COUNT      (HV_EVENT_FLAGS_COUNT - VMBUS_FIRST_CHANID)
 | |
| /* Leave a few connection numbers for other purposes. */
 | |
| #define VMBUS_CHAN_CONNECTION_OFFSET     16
 | |
| 
 | |
| /*
 | |
|  * Since the success or failure of sending a message is reported
 | |
|  * asynchronously, the VMBus state machine has effectively two entry points:
 | |
|  * vmbus_run and vmbus_msg_cb (the latter is called when the host->guest
 | |
|  * message delivery status becomes known).  Both are run as oneshot BHs on the
 | |
|  * main aio context, ensuring serialization.
 | |
|  */
 | |
| enum {
 | |
|     VMBUS_LISTEN,
 | |
|     VMBUS_HANDSHAKE,
 | |
|     VMBUS_OFFER,
 | |
|     VMBUS_CREATE_GPADL,
 | |
|     VMBUS_TEARDOWN_GPADL,
 | |
|     VMBUS_OPEN_CHANNEL,
 | |
|     VMBUS_UNLOAD,
 | |
|     VMBUS_STATE_MAX
 | |
| };
 | |
| 
 | |
| struct VMBus {
 | |
|     BusState parent;
 | |
| 
 | |
|     uint8_t state;
 | |
|     /* protection against recursive aio_poll (see vmbus_run) */
 | |
|     bool in_progress;
 | |
|     /* whether there's a message being delivered to the guest */
 | |
|     bool msg_in_progress;
 | |
|     uint32_t version;
 | |
|     /* VP_INDEX of the vCPU to send messages and interrupts to */
 | |
|     uint32_t target_vp;
 | |
|     HvSintRoute *sint_route;
 | |
|     /*
 | |
|      * interrupt page for older protocol versions; newer ones use SynIC event
 | |
|      * flags directly
 | |
|      */
 | |
|     hwaddr int_page_gpa;
 | |
| 
 | |
|     DECLARE_BITMAP(chanid_bitmap, VMBUS_CHANID_COUNT);
 | |
| 
 | |
|     /* incoming message queue */
 | |
|     struct hyperv_post_message_input rx_queue[HV_MSG_QUEUE_LEN];
 | |
|     uint8_t rx_queue_head;
 | |
|     uint8_t rx_queue_size;
 | |
|     QemuMutex rx_queue_lock;
 | |
| 
 | |
|     QTAILQ_HEAD(, VMBusGpadl) gpadl_list;
 | |
|     QTAILQ_HEAD(, VMBusChannel) channel_list;
 | |
| 
 | |
|     /*
 | |
|      * guest->host notifications for older VMBus, to be dispatched via
 | |
|      * interrupt page
 | |
|      */
 | |
|     EventNotifier notifier;
 | |
| };
 | |
| 
 | |
| static bool gpadl_full(VMBusGpadl *gpadl)
 | |
| {
 | |
|     return gpadl->seen_gfns == gpadl->num_gfns;
 | |
| }
 | |
| 
 | |
| static VMBusGpadl *create_gpadl(VMBus *vmbus, uint32_t id,
 | |
|                                 uint32_t child_relid, uint32_t num_gfns)
 | |
| {
 | |
|     VMBusGpadl *gpadl = g_new0(VMBusGpadl, 1);
 | |
| 
 | |
|     gpadl->id = id;
 | |
|     gpadl->child_relid = child_relid;
 | |
|     gpadl->num_gfns = num_gfns;
 | |
|     gpadl->gfns = g_new(uint64_t, num_gfns);
 | |
|     QTAILQ_INSERT_HEAD(&vmbus->gpadl_list, gpadl, link);
 | |
|     gpadl->vmbus = vmbus;
 | |
|     gpadl->refcount = 1;
 | |
|     return gpadl;
 | |
| }
 | |
| 
 | |
| static void free_gpadl(VMBusGpadl *gpadl)
 | |
| {
 | |
|     QTAILQ_REMOVE(&gpadl->vmbus->gpadl_list, gpadl, link);
 | |
|     g_free(gpadl->gfns);
 | |
|     g_free(gpadl);
 | |
| }
 | |
| 
 | |
| static VMBusGpadl *find_gpadl(VMBus *vmbus, uint32_t gpadl_id)
 | |
| {
 | |
|     VMBusGpadl *gpadl;
 | |
|     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
 | |
|         if (gpadl->id == gpadl_id) {
 | |
|             return gpadl;
 | |
|         }
 | |
|     }
 | |
|     return NULL;
 | |
| }
 | |
| 
 | |
| VMBusGpadl *vmbus_get_gpadl(VMBusChannel *chan, uint32_t gpadl_id)
 | |
| {
 | |
|     VMBusGpadl *gpadl = find_gpadl(chan->vmbus, gpadl_id);
 | |
|     if (!gpadl || !gpadl_full(gpadl)) {
 | |
|         return NULL;
 | |
|     }
 | |
|     gpadl->refcount++;
 | |
|     return gpadl;
 | |
| }
 | |
| 
 | |
| void vmbus_put_gpadl(VMBusGpadl *gpadl)
 | |
| {
 | |
|     if (!gpadl) {
 | |
|         return;
 | |
|     }
 | |
|     if (--gpadl->refcount) {
 | |
|         return;
 | |
|     }
 | |
|     free_gpadl(gpadl);
 | |
| }
 | |
| 
 | |
| uint32_t vmbus_gpadl_len(VMBusGpadl *gpadl)
 | |
| {
 | |
|     return gpadl->num_gfns * TARGET_PAGE_SIZE;
 | |
| }
 | |
| 
 | |
| static void gpadl_iter_init(GpadlIter *iter, VMBusGpadl *gpadl,
 | |
|                             AddressSpace *as, DMADirection dir)
 | |
| {
 | |
|     iter->gpadl = gpadl;
 | |
|     iter->as = as;
 | |
|     iter->dir = dir;
 | |
|     iter->active = false;
 | |
| }
 | |
| 
 | |
| static inline void gpadl_iter_cache_unmap(GpadlIter *iter)
 | |
| {
 | |
|     uint32_t map_start_in_page = (uintptr_t)iter->map & ~TARGET_PAGE_MASK;
 | |
|     uint32_t io_end_in_page = ((iter->last_off - 1) & ~TARGET_PAGE_MASK) + 1;
 | |
| 
 | |
|     /* mapping is only done to do non-zero amount of i/o */
 | |
|     assert(iter->last_off > 0);
 | |
|     assert(map_start_in_page < io_end_in_page);
 | |
| 
 | |
|     dma_memory_unmap(iter->as, iter->map, TARGET_PAGE_SIZE - map_start_in_page,
 | |
|                      iter->dir, io_end_in_page - map_start_in_page);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Copy exactly @len bytes between the GPADL pointed to by @iter and @buf.
 | |
|  * The direction of the copy is determined by @iter->dir.
 | |
|  * The caller must ensure the operation overflows neither @buf nor the GPADL
 | |
|  * (there's an assert for the latter).
 | |
|  * Reuse the currently mapped page in the GPADL if possible.
 | |
|  */
 | |
| static ssize_t gpadl_iter_io(GpadlIter *iter, void *buf, uint32_t len)
 | |
| {
 | |
|     ssize_t ret = len;
 | |
| 
 | |
|     assert(iter->active);
 | |
| 
 | |
|     while (len) {
 | |
|         uint32_t off_in_page = iter->off & ~TARGET_PAGE_MASK;
 | |
|         uint32_t pgleft = TARGET_PAGE_SIZE - off_in_page;
 | |
|         uint32_t cplen = MIN(pgleft, len);
 | |
|         void *p;
 | |
| 
 | |
|         /* try to reuse the cached mapping */
 | |
|         if (iter->map) {
 | |
|             uint32_t map_start_in_page =
 | |
|                 (uintptr_t)iter->map & ~TARGET_PAGE_MASK;
 | |
|             uint32_t off_base = iter->off & ~TARGET_PAGE_MASK;
 | |
|             uint32_t mapped_base = (iter->last_off - 1) & ~TARGET_PAGE_MASK;
 | |
|             if (off_base != mapped_base || off_in_page < map_start_in_page) {
 | |
|                 gpadl_iter_cache_unmap(iter);
 | |
|                 iter->map = NULL;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         if (!iter->map) {
 | |
|             dma_addr_t maddr;
 | |
|             dma_addr_t mlen = pgleft;
 | |
|             uint32_t idx = iter->off >> TARGET_PAGE_BITS;
 | |
|             assert(idx < iter->gpadl->num_gfns);
 | |
| 
 | |
|             maddr = (iter->gpadl->gfns[idx] << TARGET_PAGE_BITS) | off_in_page;
 | |
| 
 | |
|             iter->map = dma_memory_map(iter->as, maddr, &mlen, iter->dir,
 | |
|                                        MEMTXATTRS_UNSPECIFIED);
 | |
|             if (mlen != pgleft) {
 | |
|                 dma_memory_unmap(iter->as, iter->map, mlen, iter->dir, 0);
 | |
|                 iter->map = NULL;
 | |
|                 return -EFAULT;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         p = (void *)(uintptr_t)(((uintptr_t)iter->map & TARGET_PAGE_MASK) |
 | |
|                 off_in_page);
 | |
|         if (iter->dir == DMA_DIRECTION_FROM_DEVICE) {
 | |
|             memcpy(p, buf, cplen);
 | |
|         } else {
 | |
|             memcpy(buf, p, cplen);
 | |
|         }
 | |
| 
 | |
|         buf += cplen;
 | |
|         len -= cplen;
 | |
|         iter->off += cplen;
 | |
|         iter->last_off = iter->off;
 | |
|     }
 | |
| 
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Position the iterator @iter at new offset @new_off.
 | |
|  * If this results in the cached mapping being unusable with the new offset,
 | |
|  * unmap it.
 | |
|  */
 | |
| static inline void gpadl_iter_seek(GpadlIter *iter, uint32_t new_off)
 | |
| {
 | |
|     assert(iter->active);
 | |
|     iter->off = new_off;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Start a series of i/o on the GPADL.
 | |
|  * After this i/o and seek operations on @iter become legal.
 | |
|  */
 | |
| static inline void gpadl_iter_start_io(GpadlIter *iter)
 | |
| {
 | |
|     assert(!iter->active);
 | |
|     /* mapping is cached lazily on i/o */
 | |
|     iter->map = NULL;
 | |
|     iter->active = true;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * End the eariler started series of i/o on the GPADL and release the cached
 | |
|  * mapping if any.
 | |
|  */
 | |
| static inline void gpadl_iter_end_io(GpadlIter *iter)
 | |
| {
 | |
|     assert(iter->active);
 | |
| 
 | |
|     if (iter->map) {
 | |
|         gpadl_iter_cache_unmap(iter);
 | |
|     }
 | |
| 
 | |
|     iter->active = false;
 | |
| }
 | |
| 
 | |
| static void vmbus_resched(VMBus *vmbus);
 | |
| static void vmbus_msg_cb(void *data, int status);
 | |
| 
 | |
| ssize_t vmbus_iov_to_gpadl(VMBusChannel *chan, VMBusGpadl *gpadl, uint32_t off,
 | |
|                            const struct iovec *iov, size_t iov_cnt)
 | |
| {
 | |
|     GpadlIter iter;
 | |
|     size_t i;
 | |
|     ssize_t ret = 0;
 | |
| 
 | |
|     gpadl_iter_init(&iter, gpadl, chan->dev->dma_as,
 | |
|                     DMA_DIRECTION_FROM_DEVICE);
 | |
|     gpadl_iter_start_io(&iter);
 | |
|     gpadl_iter_seek(&iter, off);
 | |
|     for (i = 0; i < iov_cnt; i++) {
 | |
|         ret = gpadl_iter_io(&iter, iov[i].iov_base, iov[i].iov_len);
 | |
|         if (ret < 0) {
 | |
|             goto out;
 | |
|         }
 | |
|     }
 | |
| out:
 | |
|     gpadl_iter_end_io(&iter);
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| int vmbus_map_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
 | |
|                   unsigned iov_cnt, size_t len, size_t off)
 | |
| {
 | |
|     int ret_cnt = 0, ret;
 | |
|     unsigned i;
 | |
|     QEMUSGList *sgl = &req->sgl;
 | |
|     ScatterGatherEntry *sg = sgl->sg;
 | |
| 
 | |
|     for (i = 0; i < sgl->nsg; i++) {
 | |
|         if (sg[i].len > off) {
 | |
|             break;
 | |
|         }
 | |
|         off -= sg[i].len;
 | |
|     }
 | |
|     for (; len && i < sgl->nsg; i++) {
 | |
|         dma_addr_t mlen = MIN(sg[i].len - off, len);
 | |
|         dma_addr_t addr = sg[i].base + off;
 | |
|         len -= mlen;
 | |
|         off = 0;
 | |
| 
 | |
|         for (; mlen; ret_cnt++) {
 | |
|             dma_addr_t l = mlen;
 | |
|             dma_addr_t a = addr;
 | |
| 
 | |
|             if (ret_cnt == iov_cnt) {
 | |
|                 ret = -ENOBUFS;
 | |
|                 goto err;
 | |
|             }
 | |
| 
 | |
|             iov[ret_cnt].iov_base = dma_memory_map(sgl->as, a, &l, dir,
 | |
|                                                    MEMTXATTRS_UNSPECIFIED);
 | |
|             if (!l) {
 | |
|                 ret = -EFAULT;
 | |
|                 goto err;
 | |
|             }
 | |
|             iov[ret_cnt].iov_len = l;
 | |
|             addr += l;
 | |
|             mlen -= l;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return ret_cnt;
 | |
| err:
 | |
|     vmbus_unmap_sgl(req, dir, iov, ret_cnt, 0);
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| void vmbus_unmap_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
 | |
|                      unsigned iov_cnt, size_t accessed)
 | |
| {
 | |
|     QEMUSGList *sgl = &req->sgl;
 | |
|     unsigned i;
 | |
| 
 | |
|     for (i = 0; i < iov_cnt; i++) {
 | |
|         size_t acsd = MIN(accessed, iov[i].iov_len);
 | |
|         dma_memory_unmap(sgl->as, iov[i].iov_base, iov[i].iov_len, dir, acsd);
 | |
|         accessed -= acsd;
 | |
|     }
 | |
| }
 | |
| 
 | |
| static const VMStateDescription vmstate_gpadl = {
 | |
|     .name = "vmbus/gpadl",
 | |
|     .version_id = 0,
 | |
|     .minimum_version_id = 0,
 | |
|     .fields = (const VMStateField[]) {
 | |
|         VMSTATE_UINT32(id, VMBusGpadl),
 | |
|         VMSTATE_UINT32(child_relid, VMBusGpadl),
 | |
|         VMSTATE_UINT32(num_gfns, VMBusGpadl),
 | |
|         VMSTATE_UINT32(seen_gfns, VMBusGpadl),
 | |
|         VMSTATE_VARRAY_UINT32_ALLOC(gfns, VMBusGpadl, num_gfns, 0,
 | |
|                                     vmstate_info_uint64, uint64_t),
 | |
|         VMSTATE_UINT8(state, VMBusGpadl),
 | |
|         VMSTATE_END_OF_LIST()
 | |
|     }
 | |
| };
 | |
| 
 | |
| /*
 | |
|  * Wrap the index into a ring buffer of @len bytes.
 | |
|  * @idx is assumed not to exceed twice the size of the ringbuffer, so only
 | |
|  * single wraparound is considered.
 | |
|  */
 | |
| static inline uint32_t rb_idx_wrap(uint32_t idx, uint32_t len)
 | |
| {
 | |
|     if (idx >= len) {
 | |
|         idx -= len;
 | |
|     }
 | |
|     return idx;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Circular difference between two indices into a ring buffer of @len bytes.
 | |
|  * @allow_catchup - whether @idx1 may catch up @idx2; e.g. read index may catch
 | |
|  * up write index but not vice versa.
 | |
|  */
 | |
| static inline uint32_t rb_idx_delta(uint32_t idx1, uint32_t idx2, uint32_t len,
 | |
|                                     bool allow_catchup)
 | |
| {
 | |
|     return rb_idx_wrap(idx2 + len - idx1 - !allow_catchup, len);
 | |
| }
 | |
| 
 | |
| static vmbus_ring_buffer *ringbuf_map_hdr(VMBusRingBufCommon *ringbuf)
 | |
| {
 | |
|     vmbus_ring_buffer *rb;
 | |
|     dma_addr_t mlen = sizeof(*rb);
 | |
| 
 | |
|     rb = dma_memory_map(ringbuf->as, ringbuf->rb_addr, &mlen,
 | |
|                         DMA_DIRECTION_FROM_DEVICE, MEMTXATTRS_UNSPECIFIED);
 | |
|     if (mlen != sizeof(*rb)) {
 | |
|         dma_memory_unmap(ringbuf->as, rb, mlen,
 | |
|                          DMA_DIRECTION_FROM_DEVICE, 0);
 | |
|         return NULL;
 | |
|     }
 | |
|     return rb;
 | |
| }
 | |
| 
 | |
| static void ringbuf_unmap_hdr(VMBusRingBufCommon *ringbuf,
 | |
|                               vmbus_ring_buffer *rb, bool dirty)
 | |
| {
 | |
|     assert(rb);
 | |
| 
 | |
|     dma_memory_unmap(ringbuf->as, rb, sizeof(*rb), DMA_DIRECTION_FROM_DEVICE,
 | |
|                      dirty ? sizeof(*rb) : 0);
 | |
| }
 | |
| 
 | |
| static void ringbuf_init_common(VMBusRingBufCommon *ringbuf, VMBusGpadl *gpadl,
 | |
|                                 AddressSpace *as, DMADirection dir,
 | |
|                                 uint32_t begin, uint32_t end)
 | |
| {
 | |
|     ringbuf->as = as;
 | |
|     ringbuf->rb_addr = gpadl->gfns[begin] << TARGET_PAGE_BITS;
 | |
|     ringbuf->base = (begin + 1) << TARGET_PAGE_BITS;
 | |
|     ringbuf->len = (end - begin - 1) << TARGET_PAGE_BITS;
 | |
|     gpadl_iter_init(&ringbuf->iter, gpadl, as, dir);
 | |
| }
 | |
| 
 | |
| static int ringbufs_init(VMBusChannel *chan)
 | |
| {
 | |
|     vmbus_ring_buffer *rb;
 | |
|     VMBusSendRingBuf *send_ringbuf = &chan->send_ringbuf;
 | |
|     VMBusRecvRingBuf *recv_ringbuf = &chan->recv_ringbuf;
 | |
| 
 | |
|     if (chan->ringbuf_send_offset <= 1 ||
 | |
|         chan->gpadl->num_gfns <= chan->ringbuf_send_offset + 1) {
 | |
|         return -EINVAL;
 | |
|     }
 | |
| 
 | |
|     ringbuf_init_common(&recv_ringbuf->common, chan->gpadl, chan->dev->dma_as,
 | |
|                         DMA_DIRECTION_TO_DEVICE, 0, chan->ringbuf_send_offset);
 | |
|     ringbuf_init_common(&send_ringbuf->common, chan->gpadl, chan->dev->dma_as,
 | |
|                         DMA_DIRECTION_FROM_DEVICE, chan->ringbuf_send_offset,
 | |
|                         chan->gpadl->num_gfns);
 | |
|     send_ringbuf->wanted = 0;
 | |
|     send_ringbuf->reserved = 0;
 | |
| 
 | |
|     rb = ringbuf_map_hdr(&recv_ringbuf->common);
 | |
|     if (!rb) {
 | |
|         return -EFAULT;
 | |
|     }
 | |
|     recv_ringbuf->rd_idx = recv_ringbuf->last_rd_idx = rb->read_index;
 | |
|     ringbuf_unmap_hdr(&recv_ringbuf->common, rb, false);
 | |
| 
 | |
|     rb = ringbuf_map_hdr(&send_ringbuf->common);
 | |
|     if (!rb) {
 | |
|         return -EFAULT;
 | |
|     }
 | |
|     send_ringbuf->wr_idx = send_ringbuf->last_wr_idx = rb->write_index;
 | |
|     send_ringbuf->last_seen_rd_idx = rb->read_index;
 | |
|     rb->feature_bits |= VMBUS_RING_BUFFER_FEAT_PENDING_SZ;
 | |
|     ringbuf_unmap_hdr(&send_ringbuf->common, rb, true);
 | |
| 
 | |
|     if (recv_ringbuf->rd_idx >= recv_ringbuf->common.len ||
 | |
|         send_ringbuf->wr_idx >= send_ringbuf->common.len) {
 | |
|         return -EOVERFLOW;
 | |
|     }
 | |
| 
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Perform io between the GPADL-backed ringbuffer @ringbuf and @buf, wrapping
 | |
|  * around if needed.
 | |
|  * @len is assumed not to exceed the size of the ringbuffer, so only single
 | |
|  * wraparound is considered.
 | |
|  */
 | |
| static ssize_t ringbuf_io(VMBusRingBufCommon *ringbuf, void *buf, uint32_t len)
 | |
| {
 | |
|     ssize_t ret1 = 0, ret2 = 0;
 | |
|     uint32_t remain = ringbuf->len + ringbuf->base - ringbuf->iter.off;
 | |
| 
 | |
|     if (len >= remain) {
 | |
|         ret1 = gpadl_iter_io(&ringbuf->iter, buf, remain);
 | |
|         if (ret1 < 0) {
 | |
|             return ret1;
 | |
|         }
 | |
|         gpadl_iter_seek(&ringbuf->iter, ringbuf->base);
 | |
|         buf += remain;
 | |
|         len -= remain;
 | |
|     }
 | |
|     ret2 = gpadl_iter_io(&ringbuf->iter, buf, len);
 | |
|     if (ret2 < 0) {
 | |
|         return ret2;
 | |
|     }
 | |
|     return ret1 + ret2;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Position the circular iterator within @ringbuf to offset @new_off, wrapping
 | |
|  * around if needed.
 | |
|  * @new_off is assumed not to exceed twice the size of the ringbuffer, so only
 | |
|  * single wraparound is considered.
 | |
|  */
 | |
| static inline void ringbuf_seek(VMBusRingBufCommon *ringbuf, uint32_t new_off)
 | |
| {
 | |
|     gpadl_iter_seek(&ringbuf->iter,
 | |
|                     ringbuf->base + rb_idx_wrap(new_off, ringbuf->len));
 | |
| }
 | |
| 
 | |
| static inline uint32_t ringbuf_tell(VMBusRingBufCommon *ringbuf)
 | |
| {
 | |
|     return ringbuf->iter.off - ringbuf->base;
 | |
| }
 | |
| 
 | |
| static inline void ringbuf_start_io(VMBusRingBufCommon *ringbuf)
 | |
| {
 | |
|     gpadl_iter_start_io(&ringbuf->iter);
 | |
| }
 | |
| 
 | |
| static inline void ringbuf_end_io(VMBusRingBufCommon *ringbuf)
 | |
| {
 | |
|     gpadl_iter_end_io(&ringbuf->iter);
 | |
| }
 | |
| 
 | |
| VMBusDevice *vmbus_channel_device(VMBusChannel *chan)
 | |
| {
 | |
|     return chan->dev;
 | |
| }
 | |
| 
 | |
| VMBusChannel *vmbus_device_channel(VMBusDevice *dev, uint32_t chan_idx)
 | |
| {
 | |
|     if (chan_idx >= dev->num_channels) {
 | |
|         return NULL;
 | |
|     }
 | |
|     return &dev->channels[chan_idx];
 | |
| }
 | |
| 
 | |
| uint32_t vmbus_channel_idx(VMBusChannel *chan)
 | |
| {
 | |
|     return chan - chan->dev->channels;
 | |
| }
 | |
| 
 | |
| void vmbus_channel_notify_host(VMBusChannel *chan)
 | |
| {
 | |
|     event_notifier_set(&chan->notifier);
 | |
| }
 | |
| 
 | |
| bool vmbus_channel_is_open(VMBusChannel *chan)
 | |
| {
 | |
|     return chan->is_open;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Notify the guest side about the data to work on in the channel ring buffer.
 | |
|  * The notification is done by signaling a dedicated per-channel SynIC event
 | |
|  * flag (more recent guests) or setting a bit in the interrupt page and firing
 | |
|  * the VMBus SINT (older guests).
 | |
|  */
 | |
| static int vmbus_channel_notify_guest(VMBusChannel *chan)
 | |
| {
 | |
|     int res = 0;
 | |
|     unsigned long *int_map, mask;
 | |
|     unsigned idx;
 | |
|     hwaddr addr = chan->vmbus->int_page_gpa;
 | |
|     hwaddr len = TARGET_PAGE_SIZE / 2, dirty = 0;
 | |
| 
 | |
|     trace_vmbus_channel_notify_guest(chan->id);
 | |
| 
 | |
|     if (!addr) {
 | |
|         return hyperv_set_event_flag(chan->notify_route, chan->id);
 | |
|     }
 | |
| 
 | |
|     int_map = cpu_physical_memory_map(addr, &len, 1);
 | |
|     if (len != TARGET_PAGE_SIZE / 2) {
 | |
|         res = -ENXIO;
 | |
|         goto unmap;
 | |
|     }
 | |
| 
 | |
|     idx = BIT_WORD(chan->id);
 | |
|     mask = BIT_MASK(chan->id);
 | |
|     if ((qatomic_fetch_or(&int_map[idx], mask) & mask) != mask) {
 | |
|         res = hyperv_sint_route_set_sint(chan->notify_route);
 | |
|         dirty = len;
 | |
|     }
 | |
| 
 | |
| unmap:
 | |
|     cpu_physical_memory_unmap(int_map, len, 1, dirty);
 | |
|     return res;
 | |
| }
 | |
| 
 | |
| #define VMBUS_PKT_TRAILER      sizeof(uint64_t)
 | |
| 
 | |
| static uint32_t vmbus_pkt_hdr_set_offsets(vmbus_packet_hdr *hdr,
 | |
|                                           uint32_t desclen, uint32_t msglen)
 | |
| {
 | |
|     hdr->offset_qwords = sizeof(*hdr) / sizeof(uint64_t) +
 | |
|         DIV_ROUND_UP(desclen, sizeof(uint64_t));
 | |
|     hdr->len_qwords = hdr->offset_qwords +
 | |
|         DIV_ROUND_UP(msglen, sizeof(uint64_t));
 | |
|     return hdr->len_qwords * sizeof(uint64_t) + VMBUS_PKT_TRAILER;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Simplified ring buffer operation with paired barriers annotations in the
 | |
|  * producer and consumer loops:
 | |
|  *
 | |
|  * producer                           * consumer
 | |
|  * ~~~~~~~~                           * ~~~~~~~~
 | |
|  * write pending_send_sz              * read write_index
 | |
|  * smp_mb                       [A]   * smp_mb                       [C]
 | |
|  * read read_index                    * read packet
 | |
|  * smp_mb                       [B]   * read/write out-of-band data
 | |
|  * read/write out-of-band data        * smp_mb                       [B]
 | |
|  * write packet                       * write read_index
 | |
|  * smp_mb                       [C]   * smp_mb                       [A]
 | |
|  * write write_index                  * read pending_send_sz
 | |
|  * smp_wmb                      [D]   * smp_rmb                      [D]
 | |
|  * write pending_send_sz              * read write_index
 | |
|  * ...                                * ...
 | |
|  */
 | |
| 
 | |
| static inline uint32_t ringbuf_send_avail(VMBusSendRingBuf *ringbuf)
 | |
| {
 | |
|     /* don't trust guest data */
 | |
|     if (ringbuf->last_seen_rd_idx >= ringbuf->common.len) {
 | |
|         return 0;
 | |
|     }
 | |
|     return rb_idx_delta(ringbuf->wr_idx, ringbuf->last_seen_rd_idx,
 | |
|                         ringbuf->common.len, false);
 | |
| }
 | |
| 
 | |
| static ssize_t ringbuf_send_update_idx(VMBusChannel *chan)
 | |
| {
 | |
|     VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
 | |
|     vmbus_ring_buffer *rb;
 | |
|     uint32_t written;
 | |
| 
 | |
|     written = rb_idx_delta(ringbuf->last_wr_idx, ringbuf->wr_idx,
 | |
|                            ringbuf->common.len, true);
 | |
|     if (!written) {
 | |
|         return 0;
 | |
|     }
 | |
| 
 | |
|     rb = ringbuf_map_hdr(&ringbuf->common);
 | |
|     if (!rb) {
 | |
|         return -EFAULT;
 | |
|     }
 | |
| 
 | |
|     ringbuf->reserved -= written;
 | |
| 
 | |
|     /* prevent reorder with the data operation and packet write */
 | |
|     smp_mb();                   /* barrier pair [C] */
 | |
|     rb->write_index = ringbuf->wr_idx;
 | |
| 
 | |
|     /*
 | |
|      * If the producer earlier indicated that it wants to be notified when the
 | |
|      * consumer frees certain amount of space in the ring buffer, that amount
 | |
|      * is reduced by the size of the completed write.
 | |
|      */
 | |
|     if (ringbuf->wanted) {
 | |
|         /* otherwise reservation would fail */
 | |
|         assert(ringbuf->wanted < written);
 | |
|         ringbuf->wanted -= written;
 | |
|         /* prevent reorder with write_index write */
 | |
|         smp_wmb();              /* barrier pair [D] */
 | |
|         rb->pending_send_sz = ringbuf->wanted;
 | |
|     }
 | |
| 
 | |
|     /* prevent reorder with write_index or pending_send_sz write */
 | |
|     smp_mb();                   /* barrier pair [A] */
 | |
|     ringbuf->last_seen_rd_idx = rb->read_index;
 | |
| 
 | |
|     /*
 | |
|      * The consumer may have missed the reduction of pending_send_sz and skip
 | |
|      * notification, so re-check the blocking condition, and, if it's no longer
 | |
|      * true, ensure processing another iteration by simulating consumer's
 | |
|      * notification.
 | |
|      */
 | |
|     if (ringbuf_send_avail(ringbuf) >= ringbuf->wanted) {
 | |
|         vmbus_channel_notify_host(chan);
 | |
|     }
 | |
| 
 | |
|     /* skip notification by consumer's request */
 | |
|     if (rb->interrupt_mask) {
 | |
|         goto out;
 | |
|     }
 | |
| 
 | |
|     /*
 | |
|      * The consumer hasn't caught up with the producer's previous state so it's
 | |
|      * not blocked.
 | |
|      * (last_seen_rd_idx comes from the guest but it's safe to use w/o
 | |
|      * validation here as it only affects notification.)
 | |
|      */
 | |
|     if (rb_idx_delta(ringbuf->last_seen_rd_idx, ringbuf->wr_idx,
 | |
|                      ringbuf->common.len, true) > written) {
 | |
|         goto out;
 | |
|     }
 | |
| 
 | |
|     vmbus_channel_notify_guest(chan);
 | |
| out:
 | |
|     ringbuf_unmap_hdr(&ringbuf->common, rb, true);
 | |
|     ringbuf->last_wr_idx = ringbuf->wr_idx;
 | |
|     return written;
 | |
| }
 | |
| 
 | |
| int vmbus_channel_reserve(VMBusChannel *chan,
 | |
|                           uint32_t desclen, uint32_t msglen)
 | |
| {
 | |
|     VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
 | |
|     vmbus_ring_buffer *rb = NULL;
 | |
|     vmbus_packet_hdr hdr;
 | |
|     uint32_t needed = ringbuf->reserved +
 | |
|         vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen);
 | |
| 
 | |
|     /* avoid touching the guest memory if possible */
 | |
|     if (likely(needed <= ringbuf_send_avail(ringbuf))) {
 | |
|         goto success;
 | |
|     }
 | |
| 
 | |
|     rb = ringbuf_map_hdr(&ringbuf->common);
 | |
|     if (!rb) {
 | |
|         return -EFAULT;
 | |
|     }
 | |
| 
 | |
|     /* fetch read index from guest memory and try again */
 | |
|     ringbuf->last_seen_rd_idx = rb->read_index;
 | |
| 
 | |
|     if (likely(needed <= ringbuf_send_avail(ringbuf))) {
 | |
|         goto success;
 | |
|     }
 | |
| 
 | |
|     rb->pending_send_sz = needed;
 | |
| 
 | |
|     /*
 | |
|      * The consumer may have made progress and freed up some space before
 | |
|      * seeing updated pending_send_sz, so re-read read_index (preventing
 | |
|      * reorder with the pending_send_sz write) and try again.
 | |
|      */
 | |
|     smp_mb();                   /* barrier pair [A] */
 | |
|     ringbuf->last_seen_rd_idx = rb->read_index;
 | |
| 
 | |
|     if (needed > ringbuf_send_avail(ringbuf)) {
 | |
|         goto out;
 | |
|     }
 | |
| 
 | |
| success:
 | |
|     ringbuf->reserved = needed;
 | |
|     needed = 0;
 | |
| 
 | |
|     /* clear pending_send_sz if it was set */
 | |
|     if (ringbuf->wanted) {
 | |
|         if (!rb) {
 | |
|             rb = ringbuf_map_hdr(&ringbuf->common);
 | |
|             if (!rb) {
 | |
|                 /* failure to clear pending_send_sz is non-fatal */
 | |
|                 goto out;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         rb->pending_send_sz = 0;
 | |
|     }
 | |
| 
 | |
|     /* prevent reorder of the following data operation with read_index read */
 | |
|     smp_mb();                   /* barrier pair [B] */
 | |
| 
 | |
| out:
 | |
|     if (rb) {
 | |
|         ringbuf_unmap_hdr(&ringbuf->common, rb, ringbuf->wanted == needed);
 | |
|     }
 | |
|     ringbuf->wanted = needed;
 | |
|     return needed ? -ENOSPC : 0;
 | |
| }
 | |
| 
 | |
| ssize_t vmbus_channel_send(VMBusChannel *chan, uint16_t pkt_type,
 | |
|                            void *desc, uint32_t desclen,
 | |
|                            void *msg, uint32_t msglen,
 | |
|                            bool need_comp, uint64_t transaction_id)
 | |
| {
 | |
|     ssize_t ret = 0;
 | |
|     vmbus_packet_hdr hdr;
 | |
|     uint32_t totlen;
 | |
|     VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
 | |
| 
 | |
|     if (!vmbus_channel_is_open(chan)) {
 | |
|         return -EINVAL;
 | |
|     }
 | |
| 
 | |
|     totlen = vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen);
 | |
|     hdr.type = pkt_type;
 | |
|     hdr.flags = need_comp ? VMBUS_PACKET_FLAG_REQUEST_COMPLETION : 0;
 | |
|     hdr.transaction_id = transaction_id;
 | |
| 
 | |
|     assert(totlen <= ringbuf->reserved);
 | |
| 
 | |
|     ringbuf_start_io(&ringbuf->common);
 | |
|     ringbuf_seek(&ringbuf->common, ringbuf->wr_idx);
 | |
|     ret = ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr));
 | |
|     if (ret < 0) {
 | |
|         goto out;
 | |
|     }
 | |
|     if (desclen) {
 | |
|         assert(desc);
 | |
|         ret = ringbuf_io(&ringbuf->common, desc, desclen);
 | |
|         if (ret < 0) {
 | |
|             goto out;
 | |
|         }
 | |
|         ringbuf_seek(&ringbuf->common,
 | |
|                      ringbuf->wr_idx + hdr.offset_qwords * sizeof(uint64_t));
 | |
|     }
 | |
|     ret = ringbuf_io(&ringbuf->common, msg, msglen);
 | |
|     if (ret < 0) {
 | |
|         goto out;
 | |
|     }
 | |
|     ringbuf_seek(&ringbuf->common, ringbuf->wr_idx + totlen);
 | |
|     ringbuf->wr_idx = ringbuf_tell(&ringbuf->common);
 | |
|     ret = 0;
 | |
| out:
 | |
|     ringbuf_end_io(&ringbuf->common);
 | |
|     if (ret) {
 | |
|         return ret;
 | |
|     }
 | |
|     return ringbuf_send_update_idx(chan);
 | |
| }
 | |
| 
 | |
| ssize_t vmbus_channel_send_completion(VMBusChanReq *req,
 | |
|                                       void *msg, uint32_t msglen)
 | |
| {
 | |
|     assert(req->need_comp);
 | |
|     return vmbus_channel_send(req->chan, VMBUS_PACKET_COMP, NULL, 0,
 | |
|                               msg, msglen, false, req->transaction_id);
 | |
| }
 | |
| 
 | |
| static int sgl_from_gpa_ranges(QEMUSGList *sgl, VMBusDevice *dev,
 | |
|                                VMBusRingBufCommon *ringbuf, uint32_t len)
 | |
| {
 | |
|     int ret;
 | |
|     vmbus_pkt_gpa_direct hdr;
 | |
|     hwaddr curaddr = 0;
 | |
|     hwaddr curlen = 0;
 | |
|     int num;
 | |
| 
 | |
|     if (len < sizeof(hdr)) {
 | |
|         return -EIO;
 | |
|     }
 | |
|     ret = ringbuf_io(ringbuf, &hdr, sizeof(hdr));
 | |
|     if (ret < 0) {
 | |
|         return ret;
 | |
|     }
 | |
|     len -= sizeof(hdr);
 | |
| 
 | |
|     num = (len - hdr.rangecount * sizeof(vmbus_gpa_range)) / sizeof(uint64_t);
 | |
|     if (num < 0) {
 | |
|         return -EIO;
 | |
|     }
 | |
|     qemu_sglist_init(sgl, DEVICE(dev), num, ringbuf->as);
 | |
| 
 | |
|     for (; hdr.rangecount; hdr.rangecount--) {
 | |
|         vmbus_gpa_range range;
 | |
| 
 | |
|         if (len < sizeof(range)) {
 | |
|             goto eio;
 | |
|         }
 | |
|         ret = ringbuf_io(ringbuf, &range, sizeof(range));
 | |
|         if (ret < 0) {
 | |
|             goto err;
 | |
|         }
 | |
|         len -= sizeof(range);
 | |
| 
 | |
|         if (range.byte_offset & TARGET_PAGE_MASK) {
 | |
|             goto eio;
 | |
|         }
 | |
| 
 | |
|         for (; range.byte_count; range.byte_offset = 0) {
 | |
|             uint64_t paddr;
 | |
|             uint32_t plen = MIN(range.byte_count,
 | |
|                                 TARGET_PAGE_SIZE - range.byte_offset);
 | |
| 
 | |
|             if (len < sizeof(uint64_t)) {
 | |
|                 goto eio;
 | |
|             }
 | |
|             ret = ringbuf_io(ringbuf, &paddr, sizeof(paddr));
 | |
|             if (ret < 0) {
 | |
|                 goto err;
 | |
|             }
 | |
|             len -= sizeof(uint64_t);
 | |
|             paddr <<= TARGET_PAGE_BITS;
 | |
|             paddr |= range.byte_offset;
 | |
|             range.byte_count -= plen;
 | |
| 
 | |
|             if (curaddr + curlen == paddr) {
 | |
|                 /* consecutive fragments - join */
 | |
|                 curlen += plen;
 | |
|             } else {
 | |
|                 if (curlen) {
 | |
|                     qemu_sglist_add(sgl, curaddr, curlen);
 | |
|                 }
 | |
| 
 | |
|                 curaddr = paddr;
 | |
|                 curlen = plen;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if (curlen) {
 | |
|         qemu_sglist_add(sgl, curaddr, curlen);
 | |
|     }
 | |
| 
 | |
|     return 0;
 | |
| eio:
 | |
|     ret = -EIO;
 | |
| err:
 | |
|     qemu_sglist_destroy(sgl);
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| static VMBusChanReq *vmbus_alloc_req(VMBusChannel *chan,
 | |
|                                      uint32_t size, uint16_t pkt_type,
 | |
|                                      uint32_t msglen, uint64_t transaction_id,
 | |
|                                      bool need_comp)
 | |
| {
 | |
|     VMBusChanReq *req;
 | |
|     uint32_t msgoff = QEMU_ALIGN_UP(size, __alignof__(*req->msg));
 | |
|     uint32_t totlen = msgoff + msglen;
 | |
| 
 | |
|     req = g_malloc0(totlen);
 | |
|     req->chan = chan;
 | |
|     req->pkt_type = pkt_type;
 | |
|     req->msg = (void *)req + msgoff;
 | |
|     req->msglen = msglen;
 | |
|     req->transaction_id = transaction_id;
 | |
|     req->need_comp = need_comp;
 | |
|     return req;
 | |
| }
 | |
| 
 | |
| int vmbus_channel_recv_start(VMBusChannel *chan)
 | |
| {
 | |
|     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
 | |
|     vmbus_ring_buffer *rb;
 | |
| 
 | |
|     rb = ringbuf_map_hdr(&ringbuf->common);
 | |
|     if (!rb) {
 | |
|         return -EFAULT;
 | |
|     }
 | |
|     ringbuf->last_seen_wr_idx = rb->write_index;
 | |
|     ringbuf_unmap_hdr(&ringbuf->common, rb, false);
 | |
| 
 | |
|     if (ringbuf->last_seen_wr_idx >= ringbuf->common.len) {
 | |
|         return -EOVERFLOW;
 | |
|     }
 | |
| 
 | |
|     /* prevent reorder of the following data operation with write_index read */
 | |
|     smp_mb();                   /* barrier pair [C] */
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| void *vmbus_channel_recv_peek(VMBusChannel *chan, uint32_t size)
 | |
| {
 | |
|     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
 | |
|     vmbus_packet_hdr hdr = {};
 | |
|     VMBusChanReq *req;
 | |
|     uint32_t avail;
 | |
|     uint32_t totlen, pktlen, msglen, msgoff, desclen;
 | |
| 
 | |
|     assert(size >= sizeof(*req));
 | |
| 
 | |
|     /* safe as last_seen_wr_idx is validated in vmbus_channel_recv_start */
 | |
|     avail = rb_idx_delta(ringbuf->rd_idx, ringbuf->last_seen_wr_idx,
 | |
|                          ringbuf->common.len, true);
 | |
|     if (avail < sizeof(hdr)) {
 | |
|         return NULL;
 | |
|     }
 | |
| 
 | |
|     ringbuf_seek(&ringbuf->common, ringbuf->rd_idx);
 | |
|     if (ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr)) < 0) {
 | |
|         return NULL;
 | |
|     }
 | |
| 
 | |
|     pktlen = hdr.len_qwords * sizeof(uint64_t);
 | |
|     totlen = pktlen + VMBUS_PKT_TRAILER;
 | |
|     if (totlen > avail) {
 | |
|         return NULL;
 | |
|     }
 | |
| 
 | |
|     msgoff = hdr.offset_qwords * sizeof(uint64_t);
 | |
|     if (msgoff > pktlen || msgoff < sizeof(hdr)) {
 | |
|         error_report("%s: malformed packet: %u %u", __func__, msgoff, pktlen);
 | |
|         return NULL;
 | |
|     }
 | |
| 
 | |
|     msglen = pktlen - msgoff;
 | |
| 
 | |
|     req = vmbus_alloc_req(chan, size, hdr.type, msglen, hdr.transaction_id,
 | |
|                           hdr.flags & VMBUS_PACKET_FLAG_REQUEST_COMPLETION);
 | |
| 
 | |
|     switch (hdr.type) {
 | |
|     case VMBUS_PACKET_DATA_USING_GPA_DIRECT:
 | |
|         desclen = msgoff - sizeof(hdr);
 | |
|         if (sgl_from_gpa_ranges(&req->sgl, chan->dev, &ringbuf->common,
 | |
|                                 desclen) < 0) {
 | |
|             error_report("%s: failed to convert GPA ranges to SGL", __func__);
 | |
|             goto free_req;
 | |
|         }
 | |
|         break;
 | |
|     case VMBUS_PACKET_DATA_INBAND:
 | |
|     case VMBUS_PACKET_COMP:
 | |
|         break;
 | |
|     default:
 | |
|         error_report("%s: unexpected msg type: %x", __func__, hdr.type);
 | |
|         goto free_req;
 | |
|     }
 | |
| 
 | |
|     ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + msgoff);
 | |
|     if (ringbuf_io(&ringbuf->common, req->msg, msglen) < 0) {
 | |
|         goto free_req;
 | |
|     }
 | |
|     ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + totlen);
 | |
| 
 | |
|     return req;
 | |
| free_req:
 | |
|     vmbus_free_req(req);
 | |
|     return NULL;
 | |
| }
 | |
| 
 | |
| void vmbus_channel_recv_pop(VMBusChannel *chan)
 | |
| {
 | |
|     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
 | |
|     ringbuf->rd_idx = ringbuf_tell(&ringbuf->common);
 | |
| }
 | |
| 
 | |
| ssize_t vmbus_channel_recv_done(VMBusChannel *chan)
 | |
| {
 | |
|     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
 | |
|     vmbus_ring_buffer *rb;
 | |
|     uint32_t read;
 | |
| 
 | |
|     read = rb_idx_delta(ringbuf->last_rd_idx, ringbuf->rd_idx,
 | |
|                         ringbuf->common.len, true);
 | |
|     if (!read) {
 | |
|         return 0;
 | |
|     }
 | |
| 
 | |
|     rb = ringbuf_map_hdr(&ringbuf->common);
 | |
|     if (!rb) {
 | |
|         return -EFAULT;
 | |
|     }
 | |
| 
 | |
|     /* prevent reorder with the data operation and packet read */
 | |
|     smp_mb();                   /* barrier pair [B] */
 | |
|     rb->read_index = ringbuf->rd_idx;
 | |
| 
 | |
|     /* prevent reorder of the following pending_send_sz read */
 | |
|     smp_mb();                   /* barrier pair [A] */
 | |
| 
 | |
|     if (rb->interrupt_mask) {
 | |
|         goto out;
 | |
|     }
 | |
| 
 | |
|     if (rb->feature_bits & VMBUS_RING_BUFFER_FEAT_PENDING_SZ) {
 | |
|         uint32_t wr_idx, wr_avail;
 | |
|         uint32_t wanted = rb->pending_send_sz;
 | |
| 
 | |
|         if (!wanted) {
 | |
|             goto out;
 | |
|         }
 | |
| 
 | |
|         /* prevent reorder with pending_send_sz read */
 | |
|         smp_rmb();              /* barrier pair [D] */
 | |
|         wr_idx = rb->write_index;
 | |
| 
 | |
|         wr_avail = rb_idx_delta(wr_idx, ringbuf->rd_idx, ringbuf->common.len,
 | |
|                                 true);
 | |
| 
 | |
|         /* the producer wasn't blocked on the consumer state */
 | |
|         if (wr_avail >= read + wanted) {
 | |
|             goto out;
 | |
|         }
 | |
|         /* there's not enough space for the producer to make progress */
 | |
|         if (wr_avail < wanted) {
 | |
|             goto out;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     vmbus_channel_notify_guest(chan);
 | |
| out:
 | |
|     ringbuf_unmap_hdr(&ringbuf->common, rb, true);
 | |
|     ringbuf->last_rd_idx = ringbuf->rd_idx;
 | |
|     return read;
 | |
| }
 | |
| 
 | |
| void vmbus_free_req(void *req)
 | |
| {
 | |
|     VMBusChanReq *r = req;
 | |
| 
 | |
|     if (!req) {
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     if (r->sgl.dev) {
 | |
|         qemu_sglist_destroy(&r->sgl);
 | |
|     }
 | |
|     g_free(req);
 | |
| }
 | |
| 
 | |
| static void channel_event_cb(EventNotifier *e)
 | |
| {
 | |
|     VMBusChannel *chan = container_of(e, VMBusChannel, notifier);
 | |
|     if (event_notifier_test_and_clear(e)) {
 | |
|         /*
 | |
|          * All receives are supposed to happen within the device worker, so
 | |
|          * bracket it with ringbuf_start/end_io on the receive ringbuffer, and
 | |
|          * potentially reuse the cached mapping throughout the worker.
 | |
|          * Can't do this for sends as they may happen outside the device
 | |
|          * worker.
 | |
|          */
 | |
|         VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
 | |
|         ringbuf_start_io(&ringbuf->common);
 | |
|         chan->notify_cb(chan);
 | |
|         ringbuf_end_io(&ringbuf->common);
 | |
| 
 | |
|     }
 | |
| }
 | |
| 
 | |
| static int alloc_chan_id(VMBus *vmbus)
 | |
| {
 | |
|     int ret;
 | |
| 
 | |
|     ret = find_next_zero_bit(vmbus->chanid_bitmap, VMBUS_CHANID_COUNT, 0);
 | |
|     if (ret == VMBUS_CHANID_COUNT) {
 | |
|         return -ENOMEM;
 | |
|     }
 | |
|     return ret + VMBUS_FIRST_CHANID;
 | |
| }
 | |
| 
 | |
| static int register_chan_id(VMBusChannel *chan)
 | |
| {
 | |
|     return test_and_set_bit(chan->id - VMBUS_FIRST_CHANID,
 | |
|                             chan->vmbus->chanid_bitmap) ? -EEXIST : 0;
 | |
| }
 | |
| 
 | |
| static void unregister_chan_id(VMBusChannel *chan)
 | |
| {
 | |
|     clear_bit(chan->id - VMBUS_FIRST_CHANID, chan->vmbus->chanid_bitmap);
 | |
| }
 | |
| 
 | |
| static uint32_t chan_connection_id(VMBusChannel *chan)
 | |
| {
 | |
|     return VMBUS_CHAN_CONNECTION_OFFSET + chan->id;
 | |
| }
 | |
| 
 | |
| static void init_channel(VMBus *vmbus, VMBusDevice *dev, VMBusDeviceClass *vdc,
 | |
|                          VMBusChannel *chan, uint16_t idx, Error **errp)
 | |
| {
 | |
|     int res;
 | |
| 
 | |
|     chan->dev = dev;
 | |
|     chan->notify_cb = vdc->chan_notify_cb;
 | |
|     chan->subchan_idx = idx;
 | |
|     chan->vmbus = vmbus;
 | |
| 
 | |
|     res = alloc_chan_id(vmbus);
 | |
|     if (res < 0) {
 | |
|         error_setg(errp, "no spare channel id");
 | |
|         return;
 | |
|     }
 | |
|     chan->id = res;
 | |
|     register_chan_id(chan);
 | |
| 
 | |
|     /*
 | |
|      * The guest drivers depend on the device subchannels (idx #1+) to be
 | |
|      * offered after the primary channel (idx #0) of that device.  To ensure
 | |
|      * that, record the channels on the channel list in the order they appear
 | |
|      * within the device.
 | |
|      */
 | |
|     QTAILQ_INSERT_TAIL(&vmbus->channel_list, chan, link);
 | |
| }
 | |
| 
 | |
| static void deinit_channel(VMBusChannel *chan)
 | |
| {
 | |
|     assert(chan->state == VMCHAN_INIT);
 | |
|     QTAILQ_REMOVE(&chan->vmbus->channel_list, chan, link);
 | |
|     unregister_chan_id(chan);
 | |
| }
 | |
| 
 | |
| static void create_channels(VMBus *vmbus, VMBusDevice *dev, Error **errp)
 | |
| {
 | |
|     uint16_t i;
 | |
|     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(dev);
 | |
|     Error *err = NULL;
 | |
| 
 | |
|     dev->num_channels = vdc->num_channels ? vdc->num_channels(dev) : 1;
 | |
|     if (dev->num_channels < 1) {
 | |
|         error_setg(errp, "invalid #channels: %u", dev->num_channels);
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     dev->channels = g_new0(VMBusChannel, dev->num_channels);
 | |
|     for (i = 0; i < dev->num_channels; i++) {
 | |
|         init_channel(vmbus, dev, vdc, &dev->channels[i], i, &err);
 | |
|         if (err) {
 | |
|             goto err_init;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return;
 | |
| 
 | |
| err_init:
 | |
|     while (i--) {
 | |
|         deinit_channel(&dev->channels[i]);
 | |
|     }
 | |
|     error_propagate(errp, err);
 | |
| }
 | |
| 
 | |
| static void free_channels(VMBusDevice *dev)
 | |
| {
 | |
|     uint16_t i;
 | |
|     for (i = 0; i < dev->num_channels; i++) {
 | |
|         deinit_channel(&dev->channels[i]);
 | |
|     }
 | |
|     g_free(dev->channels);
 | |
| }
 | |
| 
 | |
| static HvSintRoute *make_sint_route(VMBus *vmbus, uint32_t vp_index)
 | |
| {
 | |
|     VMBusChannel *chan;
 | |
| 
 | |
|     if (vp_index == vmbus->target_vp) {
 | |
|         hyperv_sint_route_ref(vmbus->sint_route);
 | |
|         return vmbus->sint_route;
 | |
|     }
 | |
| 
 | |
|     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
 | |
|         if (chan->target_vp == vp_index && vmbus_channel_is_open(chan)) {
 | |
|             hyperv_sint_route_ref(chan->notify_route);
 | |
|             return chan->notify_route;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return hyperv_sint_route_new(vp_index, VMBUS_SINT, NULL, NULL);
 | |
| }
 | |
| 
 | |
| static void open_channel(VMBusChannel *chan)
 | |
| {
 | |
|     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
 | |
| 
 | |
|     chan->gpadl = vmbus_get_gpadl(chan, chan->ringbuf_gpadl);
 | |
|     if (!chan->gpadl) {
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     if (ringbufs_init(chan)) {
 | |
|         goto put_gpadl;
 | |
|     }
 | |
| 
 | |
|     if (event_notifier_init(&chan->notifier, 0)) {
 | |
|         goto put_gpadl;
 | |
|     }
 | |
| 
 | |
|     event_notifier_set_handler(&chan->notifier, channel_event_cb);
 | |
| 
 | |
|     if (hyperv_set_event_flag_handler(chan_connection_id(chan),
 | |
|                                       &chan->notifier)) {
 | |
|         goto cleanup_notifier;
 | |
|     }
 | |
| 
 | |
|     chan->notify_route = make_sint_route(chan->vmbus, chan->target_vp);
 | |
|     if (!chan->notify_route) {
 | |
|         goto clear_event_flag_handler;
 | |
|     }
 | |
| 
 | |
|     if (vdc->open_channel && vdc->open_channel(chan)) {
 | |
|         goto unref_sint_route;
 | |
|     }
 | |
| 
 | |
|     chan->is_open = true;
 | |
|     return;
 | |
| 
 | |
| unref_sint_route:
 | |
|     hyperv_sint_route_unref(chan->notify_route);
 | |
| clear_event_flag_handler:
 | |
|     hyperv_set_event_flag_handler(chan_connection_id(chan), NULL);
 | |
| cleanup_notifier:
 | |
|     event_notifier_set_handler(&chan->notifier, NULL);
 | |
|     event_notifier_cleanup(&chan->notifier);
 | |
| put_gpadl:
 | |
|     vmbus_put_gpadl(chan->gpadl);
 | |
| }
 | |
| 
 | |
| static void close_channel(VMBusChannel *chan)
 | |
| {
 | |
|     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
 | |
| 
 | |
|     if (!chan->is_open) {
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     if (vdc->close_channel) {
 | |
|         vdc->close_channel(chan);
 | |
|     }
 | |
| 
 | |
|     hyperv_sint_route_unref(chan->notify_route);
 | |
|     hyperv_set_event_flag_handler(chan_connection_id(chan), NULL);
 | |
|     event_notifier_set_handler(&chan->notifier, NULL);
 | |
|     event_notifier_cleanup(&chan->notifier);
 | |
|     vmbus_put_gpadl(chan->gpadl);
 | |
|     chan->is_open = false;
 | |
| }
 | |
| 
 | |
| static int channel_post_load(void *opaque, int version_id)
 | |
| {
 | |
|     VMBusChannel *chan = opaque;
 | |
| 
 | |
|     return register_chan_id(chan);
 | |
| }
 | |
| 
 | |
| static const VMStateDescription vmstate_channel = {
 | |
|     .name = "vmbus/channel",
 | |
|     .version_id = 0,
 | |
|     .minimum_version_id = 0,
 | |
|     .post_load = channel_post_load,
 | |
|     .fields = (const VMStateField[]) {
 | |
|         VMSTATE_UINT32(id, VMBusChannel),
 | |
|         VMSTATE_UINT16(subchan_idx, VMBusChannel),
 | |
|         VMSTATE_UINT32(open_id, VMBusChannel),
 | |
|         VMSTATE_UINT32(target_vp, VMBusChannel),
 | |
|         VMSTATE_UINT32(ringbuf_gpadl, VMBusChannel),
 | |
|         VMSTATE_UINT32(ringbuf_send_offset, VMBusChannel),
 | |
|         VMSTATE_UINT8(offer_state, VMBusChannel),
 | |
|         VMSTATE_UINT8(state, VMBusChannel),
 | |
|         VMSTATE_END_OF_LIST()
 | |
|     }
 | |
| };
 | |
| 
 | |
| static VMBusChannel *find_channel(VMBus *vmbus, uint32_t id)
 | |
| {
 | |
|     VMBusChannel *chan;
 | |
|     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
 | |
|         if (chan->id == id) {
 | |
|             return chan;
 | |
|         }
 | |
|     }
 | |
|     return NULL;
 | |
| }
 | |
| 
 | |
| static int enqueue_incoming_message(VMBus *vmbus,
 | |
|                                     const struct hyperv_post_message_input *msg)
 | |
| {
 | |
|     int ret = 0;
 | |
|     uint8_t idx, prev_size;
 | |
| 
 | |
|     qemu_mutex_lock(&vmbus->rx_queue_lock);
 | |
| 
 | |
|     if (vmbus->rx_queue_size == HV_MSG_QUEUE_LEN) {
 | |
|         ret = -ENOBUFS;
 | |
|         goto out;
 | |
|     }
 | |
| 
 | |
|     prev_size = vmbus->rx_queue_size;
 | |
|     idx = (vmbus->rx_queue_head + vmbus->rx_queue_size) % HV_MSG_QUEUE_LEN;
 | |
|     memcpy(&vmbus->rx_queue[idx], msg, sizeof(*msg));
 | |
|     vmbus->rx_queue_size++;
 | |
| 
 | |
|     /* only need to resched if the queue was empty before */
 | |
|     if (!prev_size) {
 | |
|         vmbus_resched(vmbus);
 | |
|     }
 | |
| out:
 | |
|     qemu_mutex_unlock(&vmbus->rx_queue_lock);
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| static uint16_t vmbus_recv_message(const struct hyperv_post_message_input *msg,
 | |
|                                    void *data)
 | |
| {
 | |
|     VMBus *vmbus = data;
 | |
|     struct vmbus_message_header *vmbus_msg;
 | |
| 
 | |
|     if (msg->message_type != HV_MESSAGE_VMBUS) {
 | |
|         return HV_STATUS_INVALID_HYPERCALL_INPUT;
 | |
|     }
 | |
| 
 | |
|     if (msg->payload_size < sizeof(struct vmbus_message_header)) {
 | |
|         return HV_STATUS_INVALID_HYPERCALL_INPUT;
 | |
|     }
 | |
| 
 | |
|     vmbus_msg = (struct vmbus_message_header *)msg->payload;
 | |
| 
 | |
|     trace_vmbus_recv_message(vmbus_msg->message_type, msg->payload_size);
 | |
| 
 | |
|     if (vmbus_msg->message_type == VMBUS_MSG_INVALID ||
 | |
|         vmbus_msg->message_type >= VMBUS_MSG_COUNT) {
 | |
|         error_report("vmbus: unknown message type %#x",
 | |
|                      vmbus_msg->message_type);
 | |
|         return HV_STATUS_INVALID_HYPERCALL_INPUT;
 | |
|     }
 | |
| 
 | |
|     if (enqueue_incoming_message(vmbus, msg)) {
 | |
|         return HV_STATUS_INSUFFICIENT_BUFFERS;
 | |
|     }
 | |
|     return HV_STATUS_SUCCESS;
 | |
| }
 | |
| 
 | |
| static bool vmbus_initialized(VMBus *vmbus)
 | |
| {
 | |
|     return vmbus->version > 0 && vmbus->version <= VMBUS_VERSION_CURRENT;
 | |
| }
 | |
| 
 | |
| static void vmbus_reset_all(VMBus *vmbus)
 | |
| {
 | |
|     bus_cold_reset(BUS(vmbus));
 | |
| }
 | |
| 
 | |
| static void post_msg(VMBus *vmbus, void *msgdata, uint32_t msglen)
 | |
| {
 | |
|     int ret;
 | |
|     struct hyperv_message msg = {
 | |
|         .header.message_type = HV_MESSAGE_VMBUS,
 | |
|     };
 | |
| 
 | |
|     assert(!vmbus->msg_in_progress);
 | |
|     assert(msglen <= sizeof(msg.payload));
 | |
|     assert(msglen >= sizeof(struct vmbus_message_header));
 | |
| 
 | |
|     vmbus->msg_in_progress = true;
 | |
| 
 | |
|     trace_vmbus_post_msg(((struct vmbus_message_header *)msgdata)->message_type,
 | |
|                          msglen);
 | |
| 
 | |
|     memcpy(msg.payload, msgdata, msglen);
 | |
|     msg.header.payload_size = ROUND_UP(msglen, VMBUS_MESSAGE_SIZE_ALIGN);
 | |
| 
 | |
|     ret = hyperv_post_msg(vmbus->sint_route, &msg);
 | |
|     if (ret == 0 || ret == -EAGAIN) {
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     error_report("message delivery fatal failure: %d; aborting vmbus", ret);
 | |
|     vmbus_reset_all(vmbus);
 | |
| }
 | |
| 
 | |
| static int vmbus_init(VMBus *vmbus)
 | |
| {
 | |
|     if (vmbus->target_vp != (uint32_t)-1) {
 | |
|         vmbus->sint_route = hyperv_sint_route_new(vmbus->target_vp, VMBUS_SINT,
 | |
|                                                   vmbus_msg_cb, vmbus);
 | |
|         if (!vmbus->sint_route) {
 | |
|             error_report("failed to set up SINT route");
 | |
|             return -ENOMEM;
 | |
|         }
 | |
|     }
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| static void vmbus_deinit(VMBus *vmbus)
 | |
| {
 | |
|     VMBusGpadl *gpadl, *tmp_gpadl;
 | |
|     VMBusChannel *chan;
 | |
| 
 | |
|     QTAILQ_FOREACH_SAFE(gpadl, &vmbus->gpadl_list, link, tmp_gpadl) {
 | |
|         if (gpadl->state == VMGPADL_TORNDOWN) {
 | |
|             continue;
 | |
|         }
 | |
|         vmbus_put_gpadl(gpadl);
 | |
|     }
 | |
| 
 | |
|     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
 | |
|         chan->offer_state = VMOFFER_INIT;
 | |
|     }
 | |
| 
 | |
|     hyperv_sint_route_unref(vmbus->sint_route);
 | |
|     vmbus->sint_route = NULL;
 | |
|     vmbus->int_page_gpa = 0;
 | |
|     vmbus->target_vp = (uint32_t)-1;
 | |
|     vmbus->version = 0;
 | |
|     vmbus->state = VMBUS_LISTEN;
 | |
|     vmbus->msg_in_progress = false;
 | |
| }
 | |
| 
 | |
| static void handle_initiate_contact(VMBus *vmbus,
 | |
|                                     vmbus_message_initiate_contact *msg,
 | |
|                                     uint32_t msglen)
 | |
| {
 | |
|     if (msglen < sizeof(*msg)) {
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     trace_vmbus_initiate_contact(msg->version_requested >> 16,
 | |
|                                  msg->version_requested & 0xffff,
 | |
|                                  msg->target_vcpu, msg->monitor_page1,
 | |
|                                  msg->monitor_page2, msg->interrupt_page);
 | |
| 
 | |
|     /*
 | |
|      * Reset vmbus on INITIATE_CONTACT regardless of its previous state.
 | |
|      * Useful, in particular, with vmbus-aware BIOS which can't shut vmbus down
 | |
|      * before handing over to OS loader.
 | |
|      */
 | |
|     vmbus_reset_all(vmbus);
 | |
| 
 | |
|     vmbus->target_vp = msg->target_vcpu;
 | |
|     vmbus->version = msg->version_requested;
 | |
|     if (vmbus->version < VMBUS_VERSION_WIN8) {
 | |
|         /* linux passes interrupt page even when it doesn't need it */
 | |
|         vmbus->int_page_gpa = msg->interrupt_page;
 | |
|     }
 | |
|     vmbus->state = VMBUS_HANDSHAKE;
 | |
| 
 | |
|     if (vmbus_init(vmbus)) {
 | |
|         error_report("failed to init vmbus; aborting");
 | |
|         vmbus_deinit(vmbus);
 | |
|         return;
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void send_handshake(VMBus *vmbus)
 | |
| {
 | |
|     struct vmbus_message_version_response msg = {
 | |
|         .header.message_type = VMBUS_MSG_VERSION_RESPONSE,
 | |
|         .version_supported = vmbus_initialized(vmbus),
 | |
|     };
 | |
| 
 | |
|     post_msg(vmbus, &msg, sizeof(msg));
 | |
| }
 | |
| 
 | |
| static void handle_request_offers(VMBus *vmbus, void *msgdata, uint32_t msglen)
 | |
| {
 | |
|     VMBusChannel *chan;
 | |
| 
 | |
|     if (!vmbus_initialized(vmbus)) {
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
 | |
|         if (chan->offer_state == VMOFFER_INIT) {
 | |
|             chan->offer_state = VMOFFER_SENDING;
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     vmbus->state = VMBUS_OFFER;
 | |
| }
 | |
| 
 | |
| static void send_offer(VMBus *vmbus)
 | |
| {
 | |
|     VMBusChannel *chan;
 | |
|     struct vmbus_message_header alloffers_msg = {
 | |
|         .message_type = VMBUS_MSG_ALLOFFERS_DELIVERED,
 | |
|     };
 | |
| 
 | |
|     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
 | |
|         if (chan->offer_state == VMOFFER_SENDING) {
 | |
|             VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
 | |
|             /* Hyper-V wants LE GUIDs */
 | |
|             QemuUUID classid = qemu_uuid_bswap(vdc->classid);
 | |
|             QemuUUID instanceid = qemu_uuid_bswap(chan->dev->instanceid);
 | |
|             struct vmbus_message_offer_channel msg = {
 | |
|                 .header.message_type = VMBUS_MSG_OFFERCHANNEL,
 | |
|                 .child_relid = chan->id,
 | |
|                 .connection_id = chan_connection_id(chan),
 | |
|                 .channel_flags = vdc->channel_flags,
 | |
|                 .mmio_size_mb = vdc->mmio_size_mb,
 | |
|                 .sub_channel_index = vmbus_channel_idx(chan),
 | |
|                 .interrupt_flags = VMBUS_OFFER_INTERRUPT_DEDICATED,
 | |
|             };
 | |
| 
 | |
|             memcpy(msg.type_uuid, &classid, sizeof(classid));
 | |
|             memcpy(msg.instance_uuid, &instanceid, sizeof(instanceid));
 | |
| 
 | |
|             trace_vmbus_send_offer(chan->id, chan->dev);
 | |
| 
 | |
|             post_msg(vmbus, &msg, sizeof(msg));
 | |
|             return;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     /* no more offers, send terminator message */
 | |
|     trace_vmbus_terminate_offers();
 | |
|     post_msg(vmbus, &alloffers_msg, sizeof(alloffers_msg));
 | |
| }
 | |
| 
 | |
| static bool complete_offer(VMBus *vmbus)
 | |
| {
 | |
|     VMBusChannel *chan;
 | |
| 
 | |
|     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
 | |
|         if (chan->offer_state == VMOFFER_SENDING) {
 | |
|             chan->offer_state = VMOFFER_SENT;
 | |
|             goto next_offer;
 | |
|         }
 | |
|     }
 | |
|     /*
 | |
|      * no transitioning channels found so this is completing the terminator
 | |
|      * message, and vmbus can move to the next state
 | |
|      */
 | |
|     return true;
 | |
| 
 | |
| next_offer:
 | |
|     /* try to mark another channel for offering */
 | |
|     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
 | |
|         if (chan->offer_state == VMOFFER_INIT) {
 | |
|             chan->offer_state = VMOFFER_SENDING;
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
|     /*
 | |
|      * if an offer has been sent there are more offers or the terminator yet to
 | |
|      * send, so no state transition for vmbus
 | |
|      */
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| 
 | |
| static void handle_gpadl_header(VMBus *vmbus, vmbus_message_gpadl_header *msg,
 | |
|                                 uint32_t msglen)
 | |
| {
 | |
|     VMBusGpadl *gpadl;
 | |
|     uint32_t num_gfns, i;
 | |
| 
 | |
|     /* must include at least one gpa range */
 | |
|     if (msglen < sizeof(*msg) + sizeof(msg->range[0]) ||
 | |
|         !vmbus_initialized(vmbus)) {
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     num_gfns = (msg->range_buflen - msg->rangecount * sizeof(msg->range[0])) /
 | |
|                sizeof(msg->range[0].pfn_array[0]);
 | |
| 
 | |
|     trace_vmbus_gpadl_header(msg->gpadl_id, num_gfns);
 | |
| 
 | |
|     /*
 | |
|      * In theory the GPADL_HEADER message can define a GPADL with multiple GPA
 | |
|      * ranges each with arbitrary size and alignment.  However in practice only
 | |
|      * single-range page-aligned GPADLs have been observed so just ignore
 | |
|      * anything else and simplify things greatly.
 | |
|      */
 | |
|     if (msg->rangecount != 1 || msg->range[0].byte_offset ||
 | |
|         (msg->range[0].byte_count != (num_gfns << TARGET_PAGE_BITS))) {
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     /* ignore requests to create already existing GPADLs */
 | |
|     if (find_gpadl(vmbus, msg->gpadl_id)) {
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     gpadl = create_gpadl(vmbus, msg->gpadl_id, msg->child_relid, num_gfns);
 | |
| 
 | |
|     for (i = 0; i < num_gfns &&
 | |
|          (void *)&msg->range[0].pfn_array[i + 1] <= (void *)msg + msglen;
 | |
|          i++) {
 | |
|         gpadl->gfns[gpadl->seen_gfns++] = msg->range[0].pfn_array[i];
 | |
|     }
 | |
| 
 | |
|     if (gpadl_full(gpadl)) {
 | |
|         vmbus->state = VMBUS_CREATE_GPADL;
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void handle_gpadl_body(VMBus *vmbus, vmbus_message_gpadl_body *msg,
 | |
|                               uint32_t msglen)
 | |
| {
 | |
|     VMBusGpadl *gpadl;
 | |
|     uint32_t num_gfns_left, i;
 | |
| 
 | |
|     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     trace_vmbus_gpadl_body(msg->gpadl_id);
 | |
| 
 | |
|     gpadl = find_gpadl(vmbus, msg->gpadl_id);
 | |
|     if (!gpadl) {
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     num_gfns_left = gpadl->num_gfns - gpadl->seen_gfns;
 | |
|     assert(num_gfns_left);
 | |
| 
 | |
|     for (i = 0; i < num_gfns_left &&
 | |
|          (void *)&msg->pfn_array[i + 1] <= (void *)msg + msglen; i++) {
 | |
|         gpadl->gfns[gpadl->seen_gfns++] = msg->pfn_array[i];
 | |
|     }
 | |
| 
 | |
|     if (gpadl_full(gpadl)) {
 | |
|         vmbus->state = VMBUS_CREATE_GPADL;
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void send_create_gpadl(VMBus *vmbus)
 | |
| {
 | |
|     VMBusGpadl *gpadl;
 | |
| 
 | |
|     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
 | |
|         if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) {
 | |
|             struct vmbus_message_gpadl_created msg = {
 | |
|                 .header.message_type = VMBUS_MSG_GPADL_CREATED,
 | |
|                 .gpadl_id = gpadl->id,
 | |
|                 .child_relid = gpadl->child_relid,
 | |
|             };
 | |
| 
 | |
|             trace_vmbus_gpadl_created(gpadl->id);
 | |
|             post_msg(vmbus, &msg, sizeof(msg));
 | |
|             return;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     assert(false);
 | |
| }
 | |
| 
 | |
| static bool complete_create_gpadl(VMBus *vmbus)
 | |
| {
 | |
|     VMBusGpadl *gpadl;
 | |
| 
 | |
|     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
 | |
|         if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) {
 | |
|             gpadl->state = VMGPADL_ALIVE;
 | |
| 
 | |
|             return true;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     assert(false);
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| static void handle_gpadl_teardown(VMBus *vmbus,
 | |
|                                   vmbus_message_gpadl_teardown *msg,
 | |
|                                   uint32_t msglen)
 | |
| {
 | |
|     VMBusGpadl *gpadl;
 | |
| 
 | |
|     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     trace_vmbus_gpadl_teardown(msg->gpadl_id);
 | |
| 
 | |
|     gpadl = find_gpadl(vmbus, msg->gpadl_id);
 | |
|     if (!gpadl || gpadl->state == VMGPADL_TORNDOWN) {
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     gpadl->state = VMGPADL_TEARINGDOWN;
 | |
|     vmbus->state = VMBUS_TEARDOWN_GPADL;
 | |
| }
 | |
| 
 | |
| static void send_teardown_gpadl(VMBus *vmbus)
 | |
| {
 | |
|     VMBusGpadl *gpadl;
 | |
| 
 | |
|     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
 | |
|         if (gpadl->state == VMGPADL_TEARINGDOWN) {
 | |
|             struct vmbus_message_gpadl_torndown msg = {
 | |
|                 .header.message_type = VMBUS_MSG_GPADL_TORNDOWN,
 | |
|                 .gpadl_id = gpadl->id,
 | |
|             };
 | |
| 
 | |
|             trace_vmbus_gpadl_torndown(gpadl->id);
 | |
|             post_msg(vmbus, &msg, sizeof(msg));
 | |
|             return;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     assert(false);
 | |
| }
 | |
| 
 | |
| static bool complete_teardown_gpadl(VMBus *vmbus)
 | |
| {
 | |
|     VMBusGpadl *gpadl;
 | |
| 
 | |
|     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
 | |
|         if (gpadl->state == VMGPADL_TEARINGDOWN) {
 | |
|             gpadl->state = VMGPADL_TORNDOWN;
 | |
|             vmbus_put_gpadl(gpadl);
 | |
|             return true;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     assert(false);
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| static void handle_open_channel(VMBus *vmbus, vmbus_message_open_channel *msg,
 | |
|                                 uint32_t msglen)
 | |
| {
 | |
|     VMBusChannel *chan;
 | |
| 
 | |
|     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     trace_vmbus_open_channel(msg->child_relid, msg->ring_buffer_gpadl_id,
 | |
|                              msg->target_vp);
 | |
|     chan = find_channel(vmbus, msg->child_relid);
 | |
|     if (!chan || chan->state != VMCHAN_INIT) {
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     chan->ringbuf_gpadl = msg->ring_buffer_gpadl_id;
 | |
|     chan->ringbuf_send_offset = msg->ring_buffer_offset;
 | |
|     chan->target_vp = msg->target_vp;
 | |
|     chan->open_id = msg->open_id;
 | |
| 
 | |
|     open_channel(chan);
 | |
| 
 | |
|     chan->state = VMCHAN_OPENING;
 | |
|     vmbus->state = VMBUS_OPEN_CHANNEL;
 | |
| }
 | |
| 
 | |
| static void send_open_channel(VMBus *vmbus)
 | |
| {
 | |
|     VMBusChannel *chan;
 | |
| 
 | |
|     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
 | |
|         if (chan->state == VMCHAN_OPENING) {
 | |
|             struct vmbus_message_open_result msg = {
 | |
|                 .header.message_type = VMBUS_MSG_OPENCHANNEL_RESULT,
 | |
|                 .child_relid = chan->id,
 | |
|                 .open_id = chan->open_id,
 | |
|                 .status = !vmbus_channel_is_open(chan),
 | |
|             };
 | |
| 
 | |
|             trace_vmbus_channel_open(chan->id, msg.status);
 | |
|             post_msg(vmbus, &msg, sizeof(msg));
 | |
|             return;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     assert(false);
 | |
| }
 | |
| 
 | |
| static bool complete_open_channel(VMBus *vmbus)
 | |
| {
 | |
|     VMBusChannel *chan;
 | |
| 
 | |
|     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
 | |
|         if (chan->state == VMCHAN_OPENING) {
 | |
|             if (vmbus_channel_is_open(chan)) {
 | |
|                 chan->state = VMCHAN_OPEN;
 | |
|                 /*
 | |
|                  * simulate guest notification of ringbuffer space made
 | |
|                  * available, for the channel protocols where the host
 | |
|                  * initiates the communication
 | |
|                  */
 | |
|                 vmbus_channel_notify_host(chan);
 | |
|             } else {
 | |
|                 chan->state = VMCHAN_INIT;
 | |
|             }
 | |
|             return true;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     assert(false);
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| static void vdev_reset_on_close(VMBusDevice *vdev)
 | |
| {
 | |
|     uint16_t i;
 | |
| 
 | |
|     for (i = 0; i < vdev->num_channels; i++) {
 | |
|         if (vmbus_channel_is_open(&vdev->channels[i])) {
 | |
|             return;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     /* all channels closed -- reset device */
 | |
|     device_cold_reset(DEVICE(vdev));
 | |
| }
 | |
| 
 | |
| static void handle_close_channel(VMBus *vmbus, vmbus_message_close_channel *msg,
 | |
|                                  uint32_t msglen)
 | |
| {
 | |
|     VMBusChannel *chan;
 | |
| 
 | |
|     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     trace_vmbus_close_channel(msg->child_relid);
 | |
| 
 | |
|     chan = find_channel(vmbus, msg->child_relid);
 | |
|     if (!chan) {
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     close_channel(chan);
 | |
|     chan->state = VMCHAN_INIT;
 | |
| 
 | |
|     vdev_reset_on_close(chan->dev);
 | |
| }
 | |
| 
 | |
| static void handle_unload(VMBus *vmbus, void *msg, uint32_t msglen)
 | |
| {
 | |
|     vmbus->state = VMBUS_UNLOAD;
 | |
| }
 | |
| 
 | |
| static void send_unload(VMBus *vmbus)
 | |
| {
 | |
|     vmbus_message_header msg = {
 | |
|         .message_type = VMBUS_MSG_UNLOAD_RESPONSE,
 | |
|     };
 | |
| 
 | |
|     qemu_mutex_lock(&vmbus->rx_queue_lock);
 | |
|     vmbus->rx_queue_size = 0;
 | |
|     qemu_mutex_unlock(&vmbus->rx_queue_lock);
 | |
| 
 | |
|     post_msg(vmbus, &msg, sizeof(msg));
 | |
|     return;
 | |
| }
 | |
| 
 | |
| static bool complete_unload(VMBus *vmbus)
 | |
| {
 | |
|     vmbus_reset_all(vmbus);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static void process_message(VMBus *vmbus)
 | |
| {
 | |
|     struct hyperv_post_message_input *hv_msg;
 | |
|     struct vmbus_message_header *msg;
 | |
|     void *msgdata;
 | |
|     uint32_t msglen;
 | |
| 
 | |
|     qemu_mutex_lock(&vmbus->rx_queue_lock);
 | |
| 
 | |
|     if (!vmbus->rx_queue_size) {
 | |
|         goto unlock;
 | |
|     }
 | |
| 
 | |
|     hv_msg = &vmbus->rx_queue[vmbus->rx_queue_head];
 | |
|     msglen =  hv_msg->payload_size;
 | |
|     if (msglen < sizeof(*msg)) {
 | |
|         goto out;
 | |
|     }
 | |
|     msgdata = hv_msg->payload;
 | |
|     msg = msgdata;
 | |
| 
 | |
|     trace_vmbus_process_incoming_message(msg->message_type);
 | |
| 
 | |
|     switch (msg->message_type) {
 | |
|     case VMBUS_MSG_INITIATE_CONTACT:
 | |
|         handle_initiate_contact(vmbus, msgdata, msglen);
 | |
|         break;
 | |
|     case VMBUS_MSG_REQUESTOFFERS:
 | |
|         handle_request_offers(vmbus, msgdata, msglen);
 | |
|         break;
 | |
|     case VMBUS_MSG_GPADL_HEADER:
 | |
|         handle_gpadl_header(vmbus, msgdata, msglen);
 | |
|         break;
 | |
|     case VMBUS_MSG_GPADL_BODY:
 | |
|         handle_gpadl_body(vmbus, msgdata, msglen);
 | |
|         break;
 | |
|     case VMBUS_MSG_GPADL_TEARDOWN:
 | |
|         handle_gpadl_teardown(vmbus, msgdata, msglen);
 | |
|         break;
 | |
|     case VMBUS_MSG_OPENCHANNEL:
 | |
|         handle_open_channel(vmbus, msgdata, msglen);
 | |
|         break;
 | |
|     case VMBUS_MSG_CLOSECHANNEL:
 | |
|         handle_close_channel(vmbus, msgdata, msglen);
 | |
|         break;
 | |
|     case VMBUS_MSG_UNLOAD:
 | |
|         handle_unload(vmbus, msgdata, msglen);
 | |
|         break;
 | |
|     default:
 | |
|         error_report("unknown message type %#x", msg->message_type);
 | |
|         break;
 | |
|     }
 | |
| 
 | |
| out:
 | |
|     vmbus->rx_queue_size--;
 | |
|     vmbus->rx_queue_head++;
 | |
|     vmbus->rx_queue_head %= HV_MSG_QUEUE_LEN;
 | |
| 
 | |
|     vmbus_resched(vmbus);
 | |
| unlock:
 | |
|     qemu_mutex_unlock(&vmbus->rx_queue_lock);
 | |
| }
 | |
| 
 | |
| static const struct {
 | |
|     void (*run)(VMBus *vmbus);
 | |
|     bool (*complete)(VMBus *vmbus);
 | |
| } state_runner[] = {
 | |
|     [VMBUS_LISTEN]         = {process_message,     NULL},
 | |
|     [VMBUS_HANDSHAKE]      = {send_handshake,      NULL},
 | |
|     [VMBUS_OFFER]          = {send_offer,          complete_offer},
 | |
|     [VMBUS_CREATE_GPADL]   = {send_create_gpadl,   complete_create_gpadl},
 | |
|     [VMBUS_TEARDOWN_GPADL] = {send_teardown_gpadl, complete_teardown_gpadl},
 | |
|     [VMBUS_OPEN_CHANNEL]   = {send_open_channel,   complete_open_channel},
 | |
|     [VMBUS_UNLOAD]         = {send_unload,         complete_unload},
 | |
| };
 | |
| 
 | |
| static void vmbus_do_run(VMBus *vmbus)
 | |
| {
 | |
|     if (vmbus->msg_in_progress) {
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     assert(vmbus->state < VMBUS_STATE_MAX);
 | |
|     assert(state_runner[vmbus->state].run);
 | |
|     state_runner[vmbus->state].run(vmbus);
 | |
| }
 | |
| 
 | |
| static void vmbus_run(void *opaque)
 | |
| {
 | |
|     VMBus *vmbus = opaque;
 | |
| 
 | |
|     /* make sure no recursion happens (e.g. due to recursive aio_poll()) */
 | |
|     if (vmbus->in_progress) {
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     vmbus->in_progress = true;
 | |
|     /*
 | |
|      * FIXME: if vmbus_resched() is called from within vmbus_do_run(), it
 | |
|      * should go *after* the code that can result in aio_poll; otherwise
 | |
|      * reschedules can be missed.  No idea how to enforce that.
 | |
|      */
 | |
|     vmbus_do_run(vmbus);
 | |
|     vmbus->in_progress = false;
 | |
| }
 | |
| 
 | |
| static void vmbus_msg_cb(void *data, int status)
 | |
| {
 | |
|     VMBus *vmbus = data;
 | |
|     bool (*complete)(VMBus *vmbus);
 | |
| 
 | |
|     assert(vmbus->msg_in_progress);
 | |
| 
 | |
|     trace_vmbus_msg_cb(status);
 | |
| 
 | |
|     if (status == -EAGAIN) {
 | |
|         goto out;
 | |
|     }
 | |
|     if (status) {
 | |
|         error_report("message delivery fatal failure: %d; aborting vmbus",
 | |
|                      status);
 | |
|         vmbus_reset_all(vmbus);
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     assert(vmbus->state < VMBUS_STATE_MAX);
 | |
|     complete = state_runner[vmbus->state].complete;
 | |
|     if (!complete || complete(vmbus)) {
 | |
|         vmbus->state = VMBUS_LISTEN;
 | |
|     }
 | |
| out:
 | |
|     vmbus->msg_in_progress = false;
 | |
|     vmbus_resched(vmbus);
 | |
| }
 | |
| 
 | |
| static void vmbus_resched(VMBus *vmbus)
 | |
| {
 | |
|     aio_bh_schedule_oneshot(qemu_get_aio_context(), vmbus_run, vmbus);
 | |
| }
 | |
| 
 | |
| static void vmbus_signal_event(EventNotifier *e)
 | |
| {
 | |
|     VMBusChannel *chan;
 | |
|     VMBus *vmbus = container_of(e, VMBus, notifier);
 | |
|     unsigned long *int_map;
 | |
|     hwaddr addr, len;
 | |
|     bool is_dirty = false;
 | |
| 
 | |
|     if (!event_notifier_test_and_clear(e)) {
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     trace_vmbus_signal_event();
 | |
| 
 | |
|     if (!vmbus->int_page_gpa) {
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     addr = vmbus->int_page_gpa + TARGET_PAGE_SIZE / 2;
 | |
|     len = TARGET_PAGE_SIZE / 2;
 | |
|     int_map = cpu_physical_memory_map(addr, &len, 1);
 | |
|     if (len != TARGET_PAGE_SIZE / 2) {
 | |
|         goto unmap;
 | |
|     }
 | |
| 
 | |
|     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
 | |
|         if (bitmap_test_and_clear_atomic(int_map, chan->id, 1)) {
 | |
|             if (!vmbus_channel_is_open(chan)) {
 | |
|                 continue;
 | |
|             }
 | |
|             vmbus_channel_notify_host(chan);
 | |
|             is_dirty = true;
 | |
|         }
 | |
|     }
 | |
| 
 | |
| unmap:
 | |
|     cpu_physical_memory_unmap(int_map, len, 1, is_dirty);
 | |
| }
 | |
| 
 | |
| static void vmbus_dev_realize(DeviceState *dev, Error **errp)
 | |
| {
 | |
|     VMBusDevice *vdev = VMBUS_DEVICE(dev);
 | |
|     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
 | |
|     VMBus *vmbus = VMBUS(qdev_get_parent_bus(dev));
 | |
|     BusChild *child;
 | |
|     Error *err = NULL;
 | |
|     char idstr[UUID_STR_LEN];
 | |
| 
 | |
|     assert(!qemu_uuid_is_null(&vdev->instanceid));
 | |
| 
 | |
|     if (!qemu_uuid_is_null(&vdc->instanceid)) {
 | |
|         /* Class wants to only have a single instance with a fixed UUID */
 | |
|         if (!qemu_uuid_is_equal(&vdev->instanceid, &vdc->instanceid)) {
 | |
|             error_setg(&err, "instance id can't be changed");
 | |
|             goto error_out;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     /* Check for instance id collision for this class id */
 | |
|     QTAILQ_FOREACH(child, &BUS(vmbus)->children, sibling) {
 | |
|         VMBusDevice *child_dev = VMBUS_DEVICE(child->child);
 | |
| 
 | |
|         if (child_dev == vdev) {
 | |
|             continue;
 | |
|         }
 | |
| 
 | |
|         if (qemu_uuid_is_equal(&child_dev->instanceid, &vdev->instanceid)) {
 | |
|             qemu_uuid_unparse(&vdev->instanceid, idstr);
 | |
|             error_setg(&err, "duplicate vmbus device instance id %s", idstr);
 | |
|             goto error_out;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     vdev->dma_as = &address_space_memory;
 | |
| 
 | |
|     create_channels(vmbus, vdev, &err);
 | |
|     if (err) {
 | |
|         goto error_out;
 | |
|     }
 | |
| 
 | |
|     if (vdc->vmdev_realize) {
 | |
|         vdc->vmdev_realize(vdev, &err);
 | |
|         if (err) {
 | |
|             goto err_vdc_realize;
 | |
|         }
 | |
|     }
 | |
|     return;
 | |
| 
 | |
| err_vdc_realize:
 | |
|     free_channels(vdev);
 | |
| error_out:
 | |
|     error_propagate(errp, err);
 | |
| }
 | |
| 
 | |
| static void vmbus_dev_reset(DeviceState *dev)
 | |
| {
 | |
|     uint16_t i;
 | |
|     VMBusDevice *vdev = VMBUS_DEVICE(dev);
 | |
|     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
 | |
| 
 | |
|     if (vdev->channels) {
 | |
|         for (i = 0; i < vdev->num_channels; i++) {
 | |
|             VMBusChannel *chan = &vdev->channels[i];
 | |
|             close_channel(chan);
 | |
|             chan->state = VMCHAN_INIT;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if (vdc->vmdev_reset) {
 | |
|         vdc->vmdev_reset(vdev);
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void vmbus_dev_unrealize(DeviceState *dev)
 | |
| {
 | |
|     VMBusDevice *vdev = VMBUS_DEVICE(dev);
 | |
|     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
 | |
| 
 | |
|     if (vdc->vmdev_unrealize) {
 | |
|         vdc->vmdev_unrealize(vdev);
 | |
|     }
 | |
|     free_channels(vdev);
 | |
| }
 | |
| 
 | |
| static Property vmbus_dev_props[] = {
 | |
|     DEFINE_PROP_UUID("instanceid", VMBusDevice, instanceid),
 | |
|     DEFINE_PROP_END_OF_LIST()
 | |
| };
 | |
| 
 | |
| 
 | |
| static void vmbus_dev_class_init(ObjectClass *klass, void *data)
 | |
| {
 | |
|     DeviceClass *kdev = DEVICE_CLASS(klass);
 | |
|     device_class_set_props(kdev, vmbus_dev_props);
 | |
|     kdev->bus_type = TYPE_VMBUS;
 | |
|     kdev->realize = vmbus_dev_realize;
 | |
|     kdev->unrealize = vmbus_dev_unrealize;
 | |
|     kdev->reset = vmbus_dev_reset;
 | |
| }
 | |
| 
 | |
| static void vmbus_dev_instance_init(Object *obj)
 | |
| {
 | |
|     VMBusDevice *vdev = VMBUS_DEVICE(obj);
 | |
|     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
 | |
| 
 | |
|     if (!qemu_uuid_is_null(&vdc->instanceid)) {
 | |
|         /* Class wants to only have a single instance with a fixed UUID */
 | |
|         vdev->instanceid = vdc->instanceid;
 | |
|     }
 | |
| }
 | |
| 
 | |
| const VMStateDescription vmstate_vmbus_dev = {
 | |
|     .name = TYPE_VMBUS_DEVICE,
 | |
|     .version_id = 0,
 | |
|     .minimum_version_id = 0,
 | |
|     .fields = (const VMStateField[]) {
 | |
|         VMSTATE_UINT8_ARRAY(instanceid.data, VMBusDevice, 16),
 | |
|         VMSTATE_UINT16(num_channels, VMBusDevice),
 | |
|         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(channels, VMBusDevice,
 | |
|                                              num_channels, vmstate_channel,
 | |
|                                              VMBusChannel),
 | |
|         VMSTATE_END_OF_LIST()
 | |
|     }
 | |
| };
 | |
| 
 | |
| /* vmbus generic device base */
 | |
| static const TypeInfo vmbus_dev_type_info = {
 | |
|     .name = TYPE_VMBUS_DEVICE,
 | |
|     .parent = TYPE_DEVICE,
 | |
|     .abstract = true,
 | |
|     .instance_size = sizeof(VMBusDevice),
 | |
|     .class_size = sizeof(VMBusDeviceClass),
 | |
|     .class_init = vmbus_dev_class_init,
 | |
|     .instance_init = vmbus_dev_instance_init,
 | |
| };
 | |
| 
 | |
| static void vmbus_realize(BusState *bus, Error **errp)
 | |
| {
 | |
|     int ret = 0;
 | |
|     VMBus *vmbus = VMBUS(bus);
 | |
| 
 | |
|     qemu_mutex_init(&vmbus->rx_queue_lock);
 | |
| 
 | |
|     QTAILQ_INIT(&vmbus->gpadl_list);
 | |
|     QTAILQ_INIT(&vmbus->channel_list);
 | |
| 
 | |
|     ret = hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID,
 | |
|                                  vmbus_recv_message, vmbus);
 | |
|     if (ret != 0) {
 | |
|         error_setg(errp, "hyperv set message handler failed: %d", ret);
 | |
|         goto error_out;
 | |
|     }
 | |
| 
 | |
|     ret = event_notifier_init(&vmbus->notifier, 0);
 | |
|     if (ret != 0) {
 | |
|         error_setg(errp, "event notifier failed to init with %d", ret);
 | |
|         goto remove_msg_handler;
 | |
|     }
 | |
| 
 | |
|     event_notifier_set_handler(&vmbus->notifier, vmbus_signal_event);
 | |
|     ret = hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID,
 | |
|                                         &vmbus->notifier);
 | |
|     if (ret != 0) {
 | |
|         error_setg(errp, "hyperv set event handler failed with %d", ret);
 | |
|         goto clear_event_notifier;
 | |
|     }
 | |
| 
 | |
|     return;
 | |
| 
 | |
| clear_event_notifier:
 | |
|     event_notifier_cleanup(&vmbus->notifier);
 | |
| remove_msg_handler:
 | |
|     hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL);
 | |
| error_out:
 | |
|     qemu_mutex_destroy(&vmbus->rx_queue_lock);
 | |
| }
 | |
| 
 | |
| static void vmbus_unrealize(BusState *bus)
 | |
| {
 | |
|     VMBus *vmbus = VMBUS(bus);
 | |
| 
 | |
|     hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL);
 | |
|     hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID, NULL);
 | |
|     event_notifier_cleanup(&vmbus->notifier);
 | |
| 
 | |
|     qemu_mutex_destroy(&vmbus->rx_queue_lock);
 | |
| }
 | |
| 
 | |
| static void vmbus_reset(BusState *bus)
 | |
| {
 | |
|     vmbus_deinit(VMBUS(bus));
 | |
| }
 | |
| 
 | |
| static char *vmbus_get_dev_path(DeviceState *dev)
 | |
| {
 | |
|     BusState *bus = qdev_get_parent_bus(dev);
 | |
|     return qdev_get_dev_path(bus->parent);
 | |
| }
 | |
| 
 | |
| static char *vmbus_get_fw_dev_path(DeviceState *dev)
 | |
| {
 | |
|     VMBusDevice *vdev = VMBUS_DEVICE(dev);
 | |
|     char uuid[UUID_STR_LEN];
 | |
| 
 | |
|     qemu_uuid_unparse(&vdev->instanceid, uuid);
 | |
|     return g_strdup_printf("%s@%s", qdev_fw_name(dev), uuid);
 | |
| }
 | |
| 
 | |
| static void vmbus_class_init(ObjectClass *klass, void *data)
 | |
| {
 | |
|     BusClass *k = BUS_CLASS(klass);
 | |
| 
 | |
|     k->get_dev_path = vmbus_get_dev_path;
 | |
|     k->get_fw_dev_path = vmbus_get_fw_dev_path;
 | |
|     k->realize = vmbus_realize;
 | |
|     k->unrealize = vmbus_unrealize;
 | |
|     k->reset = vmbus_reset;
 | |
| }
 | |
| 
 | |
| static int vmbus_pre_load(void *opaque)
 | |
| {
 | |
|     VMBusChannel *chan;
 | |
|     VMBus *vmbus = VMBUS(opaque);
 | |
| 
 | |
|     /*
 | |
|      * channel IDs allocated by the source will come in the migration stream
 | |
|      * for each channel, so clean up the ones allocated at realize
 | |
|      */
 | |
|     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
 | |
|         unregister_chan_id(chan);
 | |
|     }
 | |
| 
 | |
|     return 0;
 | |
| }
 | |
| static int vmbus_post_load(void *opaque, int version_id)
 | |
| {
 | |
|     int ret;
 | |
|     VMBus *vmbus = VMBUS(opaque);
 | |
|     VMBusGpadl *gpadl;
 | |
|     VMBusChannel *chan;
 | |
| 
 | |
|     ret = vmbus_init(vmbus);
 | |
|     if (ret) {
 | |
|         return ret;
 | |
|     }
 | |
| 
 | |
|     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
 | |
|         gpadl->vmbus = vmbus;
 | |
|         gpadl->refcount = 1;
 | |
|     }
 | |
| 
 | |
|     /*
 | |
|      * reopening channels depends on initialized vmbus so it's done here
 | |
|      * instead of channel_post_load()
 | |
|      */
 | |
|     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
 | |
| 
 | |
|         if (chan->state == VMCHAN_OPENING || chan->state == VMCHAN_OPEN) {
 | |
|             open_channel(chan);
 | |
|         }
 | |
| 
 | |
|         if (chan->state != VMCHAN_OPEN) {
 | |
|             continue;
 | |
|         }
 | |
| 
 | |
|         if (!vmbus_channel_is_open(chan)) {
 | |
|             /* reopen failed, abort loading */
 | |
|             return -1;
 | |
|         }
 | |
| 
 | |
|         /* resume processing on the guest side if it missed the notification */
 | |
|         hyperv_sint_route_set_sint(chan->notify_route);
 | |
|         /* ditto on the host side */
 | |
|         vmbus_channel_notify_host(chan);
 | |
|     }
 | |
| 
 | |
|     vmbus_resched(vmbus);
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| static const VMStateDescription vmstate_post_message_input = {
 | |
|     .name = "vmbus/hyperv_post_message_input",
 | |
|     .version_id = 0,
 | |
|     .minimum_version_id = 0,
 | |
|     .fields = (const VMStateField[]) {
 | |
|         /*
 | |
|          * skip connection_id and message_type as they are validated before
 | |
|          * queueing and ignored on dequeueing
 | |
|          */
 | |
|         VMSTATE_UINT32(payload_size, struct hyperv_post_message_input),
 | |
|         VMSTATE_UINT8_ARRAY(payload, struct hyperv_post_message_input,
 | |
|                             HV_MESSAGE_PAYLOAD_SIZE),
 | |
|         VMSTATE_END_OF_LIST()
 | |
|     }
 | |
| };
 | |
| 
 | |
| static bool vmbus_rx_queue_needed(void *opaque)
 | |
| {
 | |
|     VMBus *vmbus = VMBUS(opaque);
 | |
|     return vmbus->rx_queue_size;
 | |
| }
 | |
| 
 | |
| static const VMStateDescription vmstate_rx_queue = {
 | |
|     .name = "vmbus/rx_queue",
 | |
|     .version_id = 0,
 | |
|     .minimum_version_id = 0,
 | |
|     .needed = vmbus_rx_queue_needed,
 | |
|     .fields = (const VMStateField[]) {
 | |
|         VMSTATE_UINT8(rx_queue_head, VMBus),
 | |
|         VMSTATE_UINT8(rx_queue_size, VMBus),
 | |
|         VMSTATE_STRUCT_ARRAY(rx_queue, VMBus,
 | |
|                              HV_MSG_QUEUE_LEN, 0,
 | |
|                              vmstate_post_message_input,
 | |
|                              struct hyperv_post_message_input),
 | |
|         VMSTATE_END_OF_LIST()
 | |
|     }
 | |
| };
 | |
| 
 | |
| static const VMStateDescription vmstate_vmbus = {
 | |
|     .name = TYPE_VMBUS,
 | |
|     .version_id = 0,
 | |
|     .minimum_version_id = 0,
 | |
|     .pre_load = vmbus_pre_load,
 | |
|     .post_load = vmbus_post_load,
 | |
|     .fields = (const VMStateField[]) {
 | |
|         VMSTATE_UINT8(state, VMBus),
 | |
|         VMSTATE_UINT32(version, VMBus),
 | |
|         VMSTATE_UINT32(target_vp, VMBus),
 | |
|         VMSTATE_UINT64(int_page_gpa, VMBus),
 | |
|         VMSTATE_QTAILQ_V(gpadl_list, VMBus, 0,
 | |
|                          vmstate_gpadl, VMBusGpadl, link),
 | |
|         VMSTATE_END_OF_LIST()
 | |
|     },
 | |
|     .subsections = (const VMStateDescription * const []) {
 | |
|         &vmstate_rx_queue,
 | |
|         NULL
 | |
|     }
 | |
| };
 | |
| 
 | |
| static const TypeInfo vmbus_type_info = {
 | |
|     .name = TYPE_VMBUS,
 | |
|     .parent = TYPE_BUS,
 | |
|     .instance_size = sizeof(VMBus),
 | |
|     .class_init = vmbus_class_init,
 | |
| };
 | |
| 
 | |
| static void vmbus_bridge_realize(DeviceState *dev, Error **errp)
 | |
| {
 | |
|     VMBusBridge *bridge = VMBUS_BRIDGE(dev);
 | |
| 
 | |
|     /*
 | |
|      * here there's at least one vmbus bridge that is being realized, so
 | |
|      * vmbus_bridge_find can only return NULL if it's not unique
 | |
|      */
 | |
|     if (!vmbus_bridge_find()) {
 | |
|         error_setg(errp, "there can be at most one %s in the system",
 | |
|                    TYPE_VMBUS_BRIDGE);
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     if (!hyperv_is_synic_enabled()) {
 | |
|         error_report("VMBus requires usable Hyper-V SynIC and VP_INDEX");
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     bridge->bus = VMBUS(qbus_new(TYPE_VMBUS, dev, "vmbus"));
 | |
| }
 | |
| 
 | |
| static char *vmbus_bridge_ofw_unit_address(const SysBusDevice *dev)
 | |
| {
 | |
|     /* there can be only one VMBus */
 | |
|     return g_strdup("0");
 | |
| }
 | |
| 
 | |
| static const VMStateDescription vmstate_vmbus_bridge = {
 | |
|     .name = TYPE_VMBUS_BRIDGE,
 | |
|     .version_id = 0,
 | |
|     .minimum_version_id = 0,
 | |
|     .fields = (const VMStateField[]) {
 | |
|         VMSTATE_STRUCT_POINTER(bus, VMBusBridge, vmstate_vmbus, VMBus),
 | |
|         VMSTATE_END_OF_LIST()
 | |
|     },
 | |
| };
 | |
| 
 | |
| static Property vmbus_bridge_props[] = {
 | |
|     DEFINE_PROP_UINT8("irq", VMBusBridge, irq, 7),
 | |
|     DEFINE_PROP_END_OF_LIST()
 | |
| };
 | |
| 
 | |
| static void vmbus_bridge_class_init(ObjectClass *klass, void *data)
 | |
| {
 | |
|     DeviceClass *k = DEVICE_CLASS(klass);
 | |
|     SysBusDeviceClass *sk = SYS_BUS_DEVICE_CLASS(klass);
 | |
| 
 | |
|     k->realize = vmbus_bridge_realize;
 | |
|     k->fw_name = "vmbus";
 | |
|     sk->explicit_ofw_unit_address = vmbus_bridge_ofw_unit_address;
 | |
|     set_bit(DEVICE_CATEGORY_BRIDGE, k->categories);
 | |
|     k->vmsd = &vmstate_vmbus_bridge;
 | |
|     device_class_set_props(k, vmbus_bridge_props);
 | |
|     /* override SysBusDevice's default */
 | |
|     k->user_creatable = true;
 | |
| }
 | |
| 
 | |
| static const TypeInfo vmbus_bridge_type_info = {
 | |
|     .name = TYPE_VMBUS_BRIDGE,
 | |
|     .parent = TYPE_SYS_BUS_DEVICE,
 | |
|     .instance_size = sizeof(VMBusBridge),
 | |
|     .class_init = vmbus_bridge_class_init,
 | |
| };
 | |
| 
 | |
| static void vmbus_register_types(void)
 | |
| {
 | |
|     type_register_static(&vmbus_bridge_type_info);
 | |
|     type_register_static(&vmbus_dev_type_info);
 | |
|     type_register_static(&vmbus_type_info);
 | |
| }
 | |
| 
 | |
| type_init(vmbus_register_types)
 |