postcopy: Allow registering of fd handler
Allow other userfaultfd's to be registered into the fault thread so that handlers for shared memory can get responses. Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> Reviewed-by: Peter Xu <peterx@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
This commit is contained in:
		
							parent
							
								
									2a84ffc0be
								
							
						
					
					
						commit
						00fa4fc85b
					
				| @ -155,6 +155,8 @@ MigrationIncomingState *migration_incoming_get_current(void) | ||||
|     if (!once) { | ||||
|         mis_current.state = MIGRATION_STATUS_NONE; | ||||
|         memset(&mis_current, 0, sizeof(MigrationIncomingState)); | ||||
|         mis_current.postcopy_remote_fds = g_array_new(FALSE, TRUE, | ||||
|                                                    sizeof(struct PostCopyFD)); | ||||
|         qemu_mutex_init(&mis_current.rp_mutex); | ||||
|         qemu_event_init(&mis_current.main_thread_load_event, false); | ||||
|         once = true; | ||||
| @ -177,6 +179,10 @@ void migration_incoming_state_destroy(void) | ||||
|         qemu_fclose(mis->from_src_file); | ||||
|         mis->from_src_file = NULL; | ||||
|     } | ||||
|     if (mis->postcopy_remote_fds) { | ||||
|         g_array_free(mis->postcopy_remote_fds, TRUE); | ||||
|         mis->postcopy_remote_fds = NULL; | ||||
|     } | ||||
| 
 | ||||
|     qemu_event_reset(&mis->main_thread_load_event); | ||||
| } | ||||
|  | ||||
| @ -51,6 +51,8 @@ struct MigrationIncomingState { | ||||
|     QemuMutex rp_mutex;    /* We send replies from multiple threads */ | ||||
|     void     *postcopy_tmp_page; | ||||
|     void     *postcopy_tmp_zero_page; | ||||
|     /* PostCopyFD's for external userfaultfds & handlers of shared memory */ | ||||
|     GArray   *postcopy_remote_fds; | ||||
| 
 | ||||
|     QEMUBH *bh; | ||||
| 
 | ||||
|  | ||||
| @ -533,29 +533,44 @@ static void *postcopy_ram_fault_thread(void *opaque) | ||||
|     MigrationIncomingState *mis = opaque; | ||||
|     struct uffd_msg msg; | ||||
|     int ret; | ||||
|     size_t index; | ||||
|     RAMBlock *rb = NULL; | ||||
|     RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */ | ||||
| 
 | ||||
|     trace_postcopy_ram_fault_thread_entry(); | ||||
|     qemu_sem_post(&mis->fault_thread_sem); | ||||
| 
 | ||||
|     struct pollfd *pfd; | ||||
|     size_t pfd_len = 2 + mis->postcopy_remote_fds->len; | ||||
| 
 | ||||
|     pfd = g_new0(struct pollfd, pfd_len); | ||||
| 
 | ||||
|     pfd[0].fd = mis->userfault_fd; | ||||
|     pfd[0].events = POLLIN; | ||||
|     pfd[1].fd = mis->userfault_event_fd; | ||||
|     pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */ | ||||
|     trace_postcopy_ram_fault_thread_fds_core(pfd[0].fd, pfd[1].fd); | ||||
|     for (index = 0; index < mis->postcopy_remote_fds->len; index++) { | ||||
|         struct PostCopyFD *pcfd = &g_array_index(mis->postcopy_remote_fds, | ||||
|                                                  struct PostCopyFD, index); | ||||
|         pfd[2 + index].fd = pcfd->fd; | ||||
|         pfd[2 + index].events = POLLIN; | ||||
|         trace_postcopy_ram_fault_thread_fds_extra(2 + index, pcfd->idstr, | ||||
|                                                   pcfd->fd); | ||||
|     } | ||||
| 
 | ||||
|     while (true) { | ||||
|         ram_addr_t rb_offset; | ||||
|         struct pollfd pfd[2]; | ||||
|         int poll_result; | ||||
| 
 | ||||
|         /*
 | ||||
|          * We're mainly waiting for the kernel to give us a faulting HVA, | ||||
|          * however we can be told to quit via userfault_quit_fd which is | ||||
|          * an eventfd | ||||
|          */ | ||||
|         pfd[0].fd = mis->userfault_fd; | ||||
|         pfd[0].events = POLLIN; | ||||
|         pfd[0].revents = 0; | ||||
|         pfd[1].fd = mis->userfault_event_fd; | ||||
|         pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */ | ||||
|         pfd[1].revents = 0; | ||||
| 
 | ||||
|         if (poll(pfd, 2, -1 /* Wait forever */) == -1) { | ||||
|         poll_result = poll(pfd, pfd_len, -1 /* Wait forever */); | ||||
|         if (poll_result == -1) { | ||||
|             error_report("%s: userfault poll: %s", __func__, strerror(errno)); | ||||
|             break; | ||||
|         } | ||||
| @ -575,57 +590,117 @@ static void *postcopy_ram_fault_thread(void *opaque) | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         ret = read(mis->userfault_fd, &msg, sizeof(msg)); | ||||
|         if (ret != sizeof(msg)) { | ||||
|             if (errno == EAGAIN) { | ||||
|                 /*
 | ||||
|                  * if a wake up happens on the other thread just after | ||||
|                  * the poll, there is nothing to read. | ||||
|                  */ | ||||
|                 continue; | ||||
|         if (pfd[0].revents) { | ||||
|             poll_result--; | ||||
|             ret = read(mis->userfault_fd, &msg, sizeof(msg)); | ||||
|             if (ret != sizeof(msg)) { | ||||
|                 if (errno == EAGAIN) { | ||||
|                     /*
 | ||||
|                      * if a wake up happens on the other thread just after | ||||
|                      * the poll, there is nothing to read. | ||||
|                      */ | ||||
|                     continue; | ||||
|                 } | ||||
|                 if (ret < 0) { | ||||
|                     error_report("%s: Failed to read full userfault " | ||||
|                                  "message: %s", | ||||
|                                  __func__, strerror(errno)); | ||||
|                     break; | ||||
|                 } else { | ||||
|                     error_report("%s: Read %d bytes from userfaultfd " | ||||
|                                  "expected %zd", | ||||
|                                  __func__, ret, sizeof(msg)); | ||||
|                     break; /* Lost alignment, don't know what we'd read next */ | ||||
|                 } | ||||
|             } | ||||
|             if (ret < 0) { | ||||
|                 error_report("%s: Failed to read full userfault message: %s", | ||||
|                              __func__, strerror(errno)); | ||||
|             if (msg.event != UFFD_EVENT_PAGEFAULT) { | ||||
|                 error_report("%s: Read unexpected event %ud from userfaultfd", | ||||
|                              __func__, msg.event); | ||||
|                 continue; /* It's not a page fault, shouldn't happen */ | ||||
|             } | ||||
| 
 | ||||
|             rb = qemu_ram_block_from_host( | ||||
|                      (void *)(uintptr_t)msg.arg.pagefault.address, | ||||
|                      true, &rb_offset); | ||||
|             if (!rb) { | ||||
|                 error_report("postcopy_ram_fault_thread: Fault outside guest: %" | ||||
|                              PRIx64, (uint64_t)msg.arg.pagefault.address); | ||||
|                 break; | ||||
|             } else { | ||||
|                 error_report("%s: Read %d bytes from userfaultfd expected %zd", | ||||
|                              __func__, ret, sizeof(msg)); | ||||
|                 break; /* Lost alignment, don't know what we'd read next */ | ||||
|             } | ||||
|         } | ||||
|         if (msg.event != UFFD_EVENT_PAGEFAULT) { | ||||
|             error_report("%s: Read unexpected event %ud from userfaultfd", | ||||
|                          __func__, msg.event); | ||||
|             continue; /* It's not a page fault, shouldn't happen */ | ||||
|         } | ||||
| 
 | ||||
|         rb = qemu_ram_block_from_host( | ||||
|                  (void *)(uintptr_t)msg.arg.pagefault.address, | ||||
|                  true, &rb_offset); | ||||
|         if (!rb) { | ||||
|             error_report("postcopy_ram_fault_thread: Fault outside guest: %" | ||||
|                          PRIx64, (uint64_t)msg.arg.pagefault.address); | ||||
|             break; | ||||
|         } | ||||
| 
 | ||||
|         rb_offset &= ~(qemu_ram_pagesize(rb) - 1); | ||||
|         trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address, | ||||
|             rb_offset &= ~(qemu_ram_pagesize(rb) - 1); | ||||
|             trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address, | ||||
|                                                 qemu_ram_get_idstr(rb), | ||||
|                                                 rb_offset); | ||||
|             /*
 | ||||
|              * Send the request to the source - we want to request one | ||||
|              * of our host page sizes (which is >= TPS) | ||||
|              */ | ||||
|             if (rb != last_rb) { | ||||
|                 last_rb = rb; | ||||
|                 migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb), | ||||
|                                          rb_offset, qemu_ram_pagesize(rb)); | ||||
|             } else { | ||||
|                 /* Save some space */ | ||||
|                 migrate_send_rp_req_pages(mis, NULL, | ||||
|                                          rb_offset, qemu_ram_pagesize(rb)); | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         /*
 | ||||
|          * Send the request to the source - we want to request one | ||||
|          * of our host page sizes (which is >= TPS) | ||||
|          */ | ||||
|         if (rb != last_rb) { | ||||
|             last_rb = rb; | ||||
|             migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb), | ||||
|                                      rb_offset, qemu_ram_pagesize(rb)); | ||||
|         } else { | ||||
|             /* Save some space */ | ||||
|             migrate_send_rp_req_pages(mis, NULL, | ||||
|                                      rb_offset, qemu_ram_pagesize(rb)); | ||||
|         /* Now handle any requests from external processes on shared memory */ | ||||
|         /* TODO: May need to handle devices deregistering during postcopy */ | ||||
|         for (index = 2; index < pfd_len && poll_result; index++) { | ||||
|             if (pfd[index].revents) { | ||||
|                 struct PostCopyFD *pcfd = | ||||
|                     &g_array_index(mis->postcopy_remote_fds, | ||||
|                                    struct PostCopyFD, index - 2); | ||||
| 
 | ||||
|                 poll_result--; | ||||
|                 if (pfd[index].revents & POLLERR) { | ||||
|                     error_report("%s: POLLERR on poll %zd fd=%d", | ||||
|                                  __func__, index, pcfd->fd); | ||||
|                     pfd[index].events = 0; | ||||
|                     continue; | ||||
|                 } | ||||
| 
 | ||||
|                 ret = read(pcfd->fd, &msg, sizeof(msg)); | ||||
|                 if (ret != sizeof(msg)) { | ||||
|                     if (errno == EAGAIN) { | ||||
|                         /*
 | ||||
|                          * if a wake up happens on the other thread just after | ||||
|                          * the poll, there is nothing to read. | ||||
|                          */ | ||||
|                         continue; | ||||
|                     } | ||||
|                     if (ret < 0) { | ||||
|                         error_report("%s: Failed to read full userfault " | ||||
|                                      "message: %s (shared) revents=%d", | ||||
|                                      __func__, strerror(errno), | ||||
|                                      pfd[index].revents); | ||||
|                         /*TODO: Could just disable this sharer */ | ||||
|                         break; | ||||
|                     } else { | ||||
|                         error_report("%s: Read %d bytes from userfaultfd " | ||||
|                                      "expected %zd (shared)", | ||||
|                                      __func__, ret, sizeof(msg)); | ||||
|                         /*TODO: Could just disable this sharer */ | ||||
|                         break; /*Lost alignment,don't know what we'd read next*/ | ||||
|                     } | ||||
|                 } | ||||
|                 if (msg.event != UFFD_EVENT_PAGEFAULT) { | ||||
|                     error_report("%s: Read unexpected event %ud " | ||||
|                                  "from userfaultfd (shared)", | ||||
|                                  __func__, msg.event); | ||||
|                     continue; /* It's not a page fault, shouldn't happen */ | ||||
|                 } | ||||
|                 /* Call the device handler registered with us */ | ||||
|                 ret = pcfd->handler(pcfd, &msg); | ||||
|                 if (ret) { | ||||
|                     error_report("%s: Failed to resolve shared fault on %zd/%s", | ||||
|                                  __func__, index, pcfd->idstr); | ||||
|                     /* TODO: Fail? Disable this sharer? */ | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     trace_postcopy_ram_fault_thread_exit(); | ||||
| @ -970,3 +1045,31 @@ PostcopyState postcopy_state_set(PostcopyState new_state) | ||||
| { | ||||
|     return atomic_xchg(&incoming_postcopy_state, new_state); | ||||
| } | ||||
| 
 | ||||
| /* Register a handler for external shared memory postcopy
 | ||||
|  * called on the destination. | ||||
|  */ | ||||
| void postcopy_register_shared_ufd(struct PostCopyFD *pcfd) | ||||
| { | ||||
|     MigrationIncomingState *mis = migration_incoming_get_current(); | ||||
| 
 | ||||
|     mis->postcopy_remote_fds = g_array_append_val(mis->postcopy_remote_fds, | ||||
|                                                   *pcfd); | ||||
| } | ||||
| 
 | ||||
| /* Unregister a handler for external shared memory postcopy
 | ||||
|  */ | ||||
| void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd) | ||||
| { | ||||
|     guint i; | ||||
|     MigrationIncomingState *mis = migration_incoming_get_current(); | ||||
|     GArray *pcrfds = mis->postcopy_remote_fds; | ||||
| 
 | ||||
|     for (i = 0; i < pcrfds->len; i++) { | ||||
|         struct PostCopyFD *cur = &g_array_index(pcrfds, struct PostCopyFD, i); | ||||
|         if (cur->fd == pcfd->fd) { | ||||
|             mis->postcopy_remote_fds = g_array_remove_index(pcrfds, i); | ||||
|             return; | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| @ -143,4 +143,25 @@ void postcopy_remove_notifier(NotifierWithReturn *n); | ||||
| /* Call the notifier list set by postcopy_add_start_notifier */ | ||||
| int postcopy_notify(enum PostcopyNotifyReason reason, Error **errp); | ||||
| 
 | ||||
| struct PostCopyFD; | ||||
| 
 | ||||
| /* ufd is a pointer to the struct uffd_msg *TODO: more Portable! */ | ||||
| typedef int (*pcfdhandler)(struct PostCopyFD *pcfd, void *ufd); | ||||
| 
 | ||||
| struct PostCopyFD { | ||||
|     int fd; | ||||
|     /* Data to pass to handler */ | ||||
|     void *data; | ||||
|     /* Handler to be called whenever we get a poll event */ | ||||
|     pcfdhandler handler; | ||||
|     /* A string to use in error messages */ | ||||
|     const char *idstr; | ||||
| }; | ||||
| 
 | ||||
| /* Register a userfaultfd owned by an external process for
 | ||||
|  * shared memory. | ||||
|  */ | ||||
| void postcopy_register_shared_ufd(struct PostCopyFD *pcfd); | ||||
| void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd); | ||||
| 
 | ||||
| #endif | ||||
|  | ||||
| @ -190,6 +190,8 @@ postcopy_place_page_zero(void *host_addr) "host=%p" | ||||
| postcopy_ram_enable_notify(void) "" | ||||
| postcopy_ram_fault_thread_entry(void) "" | ||||
| postcopy_ram_fault_thread_exit(void) "" | ||||
| postcopy_ram_fault_thread_fds_core(int baseufd, int quitfd) "ufd: %d quitfd: %d" | ||||
| postcopy_ram_fault_thread_fds_extra(size_t index, const char *name, int fd) "%zd/%s: %d" | ||||
| postcopy_ram_fault_thread_quit(void) "" | ||||
| postcopy_ram_fault_thread_request(uint64_t hostaddr, const char *ramblock, size_t offset) "Request for HVA=0x%" PRIx64 " rb=%s offset=0x%zx" | ||||
| postcopy_ram_incoming_cleanup_closeuf(void) "" | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Dr. David Alan Gilbert
						Dr. David Alan Gilbert