Migration pull for 2.13
Alexey Perevalov postcopy blocktime statistics Xiao Guangrong's compression performance improvements -----BEGIN PGP SIGNATURE----- iQIcBAABAgAGBQJa4NUpAAoJEAUWMx68W/3njR4P/2eGZOBNU8W5e8MLr1hi95yS 3yQiwNAWQ4rfvroyUAIVwvjbRUgXCMUaqOTu+kbwt/bXFbjrjlBAXBY61yEeWq3M gSCCnD/JUpEZfkieqhdulRzJDh/6VeQg4AbdIL8MyfxV0jODeVm2JfyMeHCHOEaP LaEiiMhteONu+hsTFms2oeTbyBVgVu/ceq5pFClr8dadYQtOEltSRWPwxHOpnm3P athGslx480AXDB3FqVucBpIq16qCqb8/jjXY1W2iJtCfrdtDExmmQA3KKSOe7WJJ AewsBPv1aFo9cqL7eefNXtMirulXhr38jMJ658bZyf2756TH4hE9eB6R3oz4FpTT j9iiTeNIfaYqgqOisW5fPng4gzxOYECmQZroYP2VSXSLfGeMQ5a5s0cJdNas6dpR Us0iedgJyp17p5g/68fZExCVlvEcPNPlfE6pd5T+DSEqdp5Dp0Qv/TBh5p/nG/gm /IBoEXcK9zqelHPOywVrTkEvkV+xzP+ORs9Ly1DQKWajzxCaLaClAP1INAj7ayFf yqJi8OvW66S487lQwdz3wvUGGTI1MCpYyoOplWYk22ohYft2BzYPtt/lX3pZanud YutxMSKfZMyFV81MkyRgoui5tG3jSXnv/IbGHaK42CIMuEcJsSseq2Ea3FmhznQV E90XGkay5Wi2alBKNEmX =3MTi -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20180425a' into staging Migration pull for 2.13 Alexey Perevalov postcopy blocktime statistics Xiao Guangrong's compression performance improvements # gpg: Signature made Wed 25 Apr 2018 20:21:13 BST # gpg: using RSA key 0516331EBC5BFDE7 # gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>" # Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A 9FA9 0516 331E BC5B FDE7 * remotes/dgilbert/tags/pull-migration-20180425a: migration: remove ram_save_compressed_page() migration: introduce save_normal_page() migration: move calling save_zero_page to the common place migration: move calling control_save_page to the common place migration: move some code to ram_save_host_page migration: introduce control_save_page() migration: detect compression and decompression errors migration: stop decompression to allocate and free memory frequently migration: stop compression to allocate and free memory frequently migration: stop compressing page in migration thread migration: add postcopy total blocktime into query-migrate migration: add blocktime calculation into migration-test migration: postcopy_blocktime documentation migration: calculate vCPU blocktime on dst side migration: add postcopy blocktime ctx into MigrationIncomingState migration: introduce postcopy-blocktime capability Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
		
						commit
						8e383d19b4
					
				| @ -401,6 +401,20 @@ will now cause the transition from precopy to postcopy. | ||||
| It can be issued immediately after migration is started or any | ||||
| time later on.  Issuing it after the end of a migration is harmless. | ||||
| 
 | ||||
| Blocktime is a postcopy live migration metric, intended to show how | ||||
| long the vCPU was in state of interruptable sleep due to pagefault. | ||||
| That metric is calculated both for all vCPUs as overlapped value, and | ||||
| separately for each vCPU. These values are calculated on destination | ||||
| side.  To enable postcopy blocktime calculation, enter following | ||||
| command on destination monitor: | ||||
| 
 | ||||
| ``migrate_set_capability postcopy-blocktime on`` | ||||
| 
 | ||||
| Postcopy blocktime can be retrieved by query-migrate qmp command. | ||||
| postcopy-blocktime value of qmp command will show overlapped blocking | ||||
| time for all vCPU, postcopy-vcpu-blocktime will show list of blocking | ||||
| time per vCPU. | ||||
| 
 | ||||
| .. note:: | ||||
|   During the postcopy phase, the bandwidth limits set using | ||||
|   ``migrate_set_speed`` is ignored (to avoid delaying requested pages that | ||||
|  | ||||
							
								
								
									
										15
									
								
								hmp.c
									
									
									
									
									
								
							
							
						
						
									
										15
									
								
								hmp.c
									
									
									
									
									
								
							| @ -274,6 +274,21 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) | ||||
|                        info->cpu_throttle_percentage); | ||||
|     } | ||||
| 
 | ||||
|     if (info->has_postcopy_blocktime) { | ||||
|         monitor_printf(mon, "postcopy blocktime: %u\n", | ||||
|                        info->postcopy_blocktime); | ||||
|     } | ||||
| 
 | ||||
|     if (info->has_postcopy_vcpu_blocktime) { | ||||
|         Visitor *v; | ||||
|         char *str; | ||||
|         v = string_output_visitor_new(false, &str); | ||||
|         visit_type_uint32List(v, NULL, &info->postcopy_vcpu_blocktime, NULL); | ||||
|         visit_complete(v, &str); | ||||
|         monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str); | ||||
|         g_free(str); | ||||
|         visit_free(v); | ||||
|     } | ||||
|     qapi_free_MigrationInfo(info); | ||||
|     qapi_free_MigrationCapabilityStatusList(caps); | ||||
| } | ||||
|  | ||||
| @ -630,14 +630,15 @@ static void populate_disk_info(MigrationInfo *info) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| MigrationInfo *qmp_query_migrate(Error **errp) | ||||
| static void fill_source_migration_info(MigrationInfo *info) | ||||
| { | ||||
|     MigrationInfo *info = g_malloc0(sizeof(*info)); | ||||
|     MigrationState *s = migrate_get_current(); | ||||
| 
 | ||||
|     switch (s->state) { | ||||
|     case MIGRATION_STATUS_NONE: | ||||
|         /* no migration has happened ever */ | ||||
|         /* do not overwrite destination migration status */ | ||||
|         return; | ||||
|         break; | ||||
|     case MIGRATION_STATUS_SETUP: | ||||
|         info->has_status = true; | ||||
| @ -688,8 +689,6 @@ MigrationInfo *qmp_query_migrate(Error **errp) | ||||
|         break; | ||||
|     } | ||||
|     info->status = s->state; | ||||
| 
 | ||||
|     return info; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
| @ -753,6 +752,41 @@ static bool migrate_caps_check(bool *cap_list, | ||||
|     return true; | ||||
| } | ||||
| 
 | ||||
| static void fill_destination_migration_info(MigrationInfo *info) | ||||
| { | ||||
|     MigrationIncomingState *mis = migration_incoming_get_current(); | ||||
| 
 | ||||
|     switch (mis->state) { | ||||
|     case MIGRATION_STATUS_NONE: | ||||
|         return; | ||||
|         break; | ||||
|     case MIGRATION_STATUS_SETUP: | ||||
|     case MIGRATION_STATUS_CANCELLING: | ||||
|     case MIGRATION_STATUS_CANCELLED: | ||||
|     case MIGRATION_STATUS_ACTIVE: | ||||
|     case MIGRATION_STATUS_POSTCOPY_ACTIVE: | ||||
|     case MIGRATION_STATUS_FAILED: | ||||
|     case MIGRATION_STATUS_COLO: | ||||
|         info->has_status = true; | ||||
|         break; | ||||
|     case MIGRATION_STATUS_COMPLETED: | ||||
|         info->has_status = true; | ||||
|         fill_destination_postcopy_migration_info(info); | ||||
|         break; | ||||
|     } | ||||
|     info->status = mis->state; | ||||
| } | ||||
| 
 | ||||
| MigrationInfo *qmp_query_migrate(Error **errp) | ||||
| { | ||||
|     MigrationInfo *info = g_malloc0(sizeof(*info)); | ||||
| 
 | ||||
|     fill_destination_migration_info(info); | ||||
|     fill_source_migration_info(info); | ||||
| 
 | ||||
|     return info; | ||||
| } | ||||
| 
 | ||||
| void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, | ||||
|                                   Error **errp) | ||||
| { | ||||
| @ -1541,6 +1575,15 @@ bool migrate_zero_blocks(void) | ||||
|     return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; | ||||
| } | ||||
| 
 | ||||
| bool migrate_postcopy_blocktime(void) | ||||
| { | ||||
|     MigrationState *s; | ||||
| 
 | ||||
|     s = migrate_get_current(); | ||||
| 
 | ||||
|     return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; | ||||
| } | ||||
| 
 | ||||
| bool migrate_use_compression(void) | ||||
| { | ||||
|     MigrationState *s; | ||||
|  | ||||
| @ -22,6 +22,8 @@ | ||||
| #include "hw/qdev.h" | ||||
| #include "io/channel.h" | ||||
| 
 | ||||
| struct PostcopyBlocktimeContext; | ||||
| 
 | ||||
| /* State for the incoming migration */ | ||||
| struct MigrationIncomingState { | ||||
|     QEMUFile *from_src_file; | ||||
| @ -65,10 +67,20 @@ struct MigrationIncomingState { | ||||
|     /* The coroutine we should enter (back) after failover */ | ||||
|     Coroutine *migration_incoming_co; | ||||
|     QemuSemaphore colo_incoming_sem; | ||||
| 
 | ||||
|     /*
 | ||||
|      * PostcopyBlocktimeContext to keep information for postcopy | ||||
|      * live migration, to calculate vCPU block time | ||||
|      * */ | ||||
|     struct PostcopyBlocktimeContext *blocktime_ctx; | ||||
| }; | ||||
| 
 | ||||
| MigrationIncomingState *migration_incoming_get_current(void); | ||||
| void migration_incoming_state_destroy(void); | ||||
| /*
 | ||||
|  * Functions to work with blocktime context | ||||
|  */ | ||||
| void fill_destination_postcopy_migration_info(MigrationInfo *info); | ||||
| 
 | ||||
| #define TYPE_MIGRATION "migration" | ||||
| 
 | ||||
| @ -230,6 +242,7 @@ int migrate_compress_level(void); | ||||
| int migrate_compress_threads(void); | ||||
| int migrate_decompress_threads(void); | ||||
| bool migrate_use_events(void); | ||||
| bool migrate_postcopy_blocktime(void); | ||||
| 
 | ||||
| /* Sending on the return path - generic and then for each message type */ | ||||
| void migrate_send_rp_shut(MigrationIncomingState *mis, | ||||
|  | ||||
| @ -90,6 +90,103 @@ int postcopy_notify(enum PostcopyNotifyReason reason, Error **errp) | ||||
| #include <sys/eventfd.h> | ||||
| #include <linux/userfaultfd.h> | ||||
| 
 | ||||
| typedef struct PostcopyBlocktimeContext { | ||||
|     /* time when page fault initiated per vCPU */ | ||||
|     uint32_t *page_fault_vcpu_time; | ||||
|     /* page address per vCPU */ | ||||
|     uintptr_t *vcpu_addr; | ||||
|     uint32_t total_blocktime; | ||||
|     /* blocktime per vCPU */ | ||||
|     uint32_t *vcpu_blocktime; | ||||
|     /* point in time when last page fault was initiated */ | ||||
|     uint32_t last_begin; | ||||
|     /* number of vCPU are suspended */ | ||||
|     int smp_cpus_down; | ||||
|     uint64_t start_time; | ||||
| 
 | ||||
|     /*
 | ||||
|      * Handler for exit event, necessary for | ||||
|      * releasing whole blocktime_ctx | ||||
|      */ | ||||
|     Notifier exit_notifier; | ||||
| } PostcopyBlocktimeContext; | ||||
| 
 | ||||
| static void destroy_blocktime_context(struct PostcopyBlocktimeContext *ctx) | ||||
| { | ||||
|     g_free(ctx->page_fault_vcpu_time); | ||||
|     g_free(ctx->vcpu_addr); | ||||
|     g_free(ctx->vcpu_blocktime); | ||||
|     g_free(ctx); | ||||
| } | ||||
| 
 | ||||
| static void migration_exit_cb(Notifier *n, void *data) | ||||
| { | ||||
|     PostcopyBlocktimeContext *ctx = container_of(n, PostcopyBlocktimeContext, | ||||
|                                                  exit_notifier); | ||||
|     destroy_blocktime_context(ctx); | ||||
| } | ||||
| 
 | ||||
| static struct PostcopyBlocktimeContext *blocktime_context_new(void) | ||||
| { | ||||
|     PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1); | ||||
|     ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus); | ||||
|     ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus); | ||||
|     ctx->vcpu_blocktime = g_new0(uint32_t, smp_cpus); | ||||
| 
 | ||||
|     ctx->exit_notifier.notify = migration_exit_cb; | ||||
|     ctx->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); | ||||
|     qemu_add_exit_notifier(&ctx->exit_notifier); | ||||
|     return ctx; | ||||
| } | ||||
| 
 | ||||
| static uint32List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx) | ||||
| { | ||||
|     uint32List *list = NULL, *entry = NULL; | ||||
|     int i; | ||||
| 
 | ||||
|     for (i = smp_cpus - 1; i >= 0; i--) { | ||||
|         entry = g_new0(uint32List, 1); | ||||
|         entry->value = ctx->vcpu_blocktime[i]; | ||||
|         entry->next = list; | ||||
|         list = entry; | ||||
|     } | ||||
| 
 | ||||
|     return list; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * This function just populates MigrationInfo from postcopy's | ||||
|  * blocktime context. It will not populate MigrationInfo, | ||||
|  * unless postcopy-blocktime capability was set. | ||||
|  * | ||||
|  * @info: pointer to MigrationInfo to populate | ||||
|  */ | ||||
| void fill_destination_postcopy_migration_info(MigrationInfo *info) | ||||
| { | ||||
|     MigrationIncomingState *mis = migration_incoming_get_current(); | ||||
|     PostcopyBlocktimeContext *bc = mis->blocktime_ctx; | ||||
| 
 | ||||
|     if (!bc) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     info->has_postcopy_blocktime = true; | ||||
|     info->postcopy_blocktime = bc->total_blocktime; | ||||
|     info->has_postcopy_vcpu_blocktime = true; | ||||
|     info->postcopy_vcpu_blocktime = get_vcpu_blocktime_list(bc); | ||||
| } | ||||
| 
 | ||||
| static uint32_t get_postcopy_total_blocktime(void) | ||||
| { | ||||
|     MigrationIncomingState *mis = migration_incoming_get_current(); | ||||
|     PostcopyBlocktimeContext *bc = mis->blocktime_ctx; | ||||
| 
 | ||||
|     if (!bc) { | ||||
|         return 0; | ||||
|     } | ||||
| 
 | ||||
|     return bc->total_blocktime; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * receive_ufd_features: check userfault fd features, to request only supported | ||||
| @ -182,6 +279,19 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
| #ifdef UFFD_FEATURE_THREAD_ID | ||||
|     if (migrate_postcopy_blocktime() && mis && | ||||
|         UFFD_FEATURE_THREAD_ID & supported_features) { | ||||
|         /* kernel supports that feature */ | ||||
|         /* don't create blocktime_context if it exists */ | ||||
|         if (!mis->blocktime_ctx) { | ||||
|             mis->blocktime_ctx = blocktime_context_new(); | ||||
|         } | ||||
| 
 | ||||
|         asked_features |= UFFD_FEATURE_THREAD_ID; | ||||
|     } | ||||
| #endif | ||||
| 
 | ||||
|     /*
 | ||||
|      * request features, even if asked_features is 0, due to | ||||
|      * kernel expects UFFD_API before UFFDIO_REGISTER, per | ||||
| @ -451,6 +561,9 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) | ||||
|         munmap(mis->postcopy_tmp_zero_page, mis->largest_page_size); | ||||
|         mis->postcopy_tmp_zero_page = NULL; | ||||
|     } | ||||
|     trace_postcopy_ram_incoming_cleanup_blocktime( | ||||
|             get_postcopy_total_blocktime()); | ||||
| 
 | ||||
|     trace_postcopy_ram_incoming_cleanup_exit(); | ||||
|     return 0; | ||||
| } | ||||
| @ -575,6 +688,148 @@ int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb, | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| static int get_mem_fault_cpu_index(uint32_t pid) | ||||
| { | ||||
|     CPUState *cpu_iter; | ||||
| 
 | ||||
|     CPU_FOREACH(cpu_iter) { | ||||
|         if (cpu_iter->thread_id == pid) { | ||||
|             trace_get_mem_fault_cpu_index(cpu_iter->cpu_index, pid); | ||||
|             return cpu_iter->cpu_index; | ||||
|         } | ||||
|     } | ||||
|     trace_get_mem_fault_cpu_index(-1, pid); | ||||
|     return -1; | ||||
| } | ||||
| 
 | ||||
| static uint32_t get_low_time_offset(PostcopyBlocktimeContext *dc) | ||||
| { | ||||
|     int64_t start_time_offset = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - | ||||
|                                     dc->start_time; | ||||
|     return start_time_offset < 1 ? 1 : start_time_offset & UINT32_MAX; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * This function is being called when pagefault occurs. It | ||||
|  * tracks down vCPU blocking time. | ||||
|  * | ||||
|  * @addr: faulted host virtual address | ||||
|  * @ptid: faulted process thread id | ||||
|  * @rb: ramblock appropriate to addr | ||||
|  */ | ||||
| static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid, | ||||
|                                           RAMBlock *rb) | ||||
| { | ||||
|     int cpu, already_received; | ||||
|     MigrationIncomingState *mis = migration_incoming_get_current(); | ||||
|     PostcopyBlocktimeContext *dc = mis->blocktime_ctx; | ||||
|     uint32_t low_time_offset; | ||||
| 
 | ||||
|     if (!dc || ptid == 0) { | ||||
|         return; | ||||
|     } | ||||
|     cpu = get_mem_fault_cpu_index(ptid); | ||||
|     if (cpu < 0) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     low_time_offset = get_low_time_offset(dc); | ||||
|     if (dc->vcpu_addr[cpu] == 0) { | ||||
|         atomic_inc(&dc->smp_cpus_down); | ||||
|     } | ||||
| 
 | ||||
|     atomic_xchg(&dc->last_begin, low_time_offset); | ||||
|     atomic_xchg(&dc->page_fault_vcpu_time[cpu], low_time_offset); | ||||
|     atomic_xchg(&dc->vcpu_addr[cpu], addr); | ||||
| 
 | ||||
|     /* check it here, not at the begining of the function,
 | ||||
|      * due to, check could accur early than bitmap_set in | ||||
|      * qemu_ufd_copy_ioctl */ | ||||
|     already_received = ramblock_recv_bitmap_test(rb, (void *)addr); | ||||
|     if (already_received) { | ||||
|         atomic_xchg(&dc->vcpu_addr[cpu], 0); | ||||
|         atomic_xchg(&dc->page_fault_vcpu_time[cpu], 0); | ||||
|         atomic_dec(&dc->smp_cpus_down); | ||||
|     } | ||||
|     trace_mark_postcopy_blocktime_begin(addr, dc, dc->page_fault_vcpu_time[cpu], | ||||
|                                         cpu, already_received); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  *  This function just provide calculated blocktime per cpu and trace it. | ||||
|  *  Total blocktime is calculated in mark_postcopy_blocktime_end. | ||||
|  * | ||||
|  * | ||||
|  * Assume we have 3 CPU | ||||
|  * | ||||
|  *      S1        E1           S1               E1 | ||||
|  * -----***********------------xxx***************------------------------> CPU1 | ||||
|  * | ||||
|  *             S2                E2 | ||||
|  * ------------****************xxx---------------------------------------> CPU2 | ||||
|  * | ||||
|  *                         S3            E3 | ||||
|  * ------------------------****xxx********-------------------------------> CPU3 | ||||
|  * | ||||
|  * We have sequence S1,S2,E1,S3,S1,E2,E3,E1 | ||||
|  * S2,E1 - doesn't match condition due to sequence S1,S2,E1 doesn't include CPU3 | ||||
|  * S3,S1,E2 - sequence includes all CPUs, in this case overlap will be S1,E2 - | ||||
|  *            it's a part of total blocktime. | ||||
|  * S1 - here is last_begin | ||||
|  * Legend of the picture is following: | ||||
|  *              * - means blocktime per vCPU | ||||
|  *              x - means overlapped blocktime (total blocktime) | ||||
|  * | ||||
|  * @addr: host virtual address | ||||
|  */ | ||||
| static void mark_postcopy_blocktime_end(uintptr_t addr) | ||||
| { | ||||
|     MigrationIncomingState *mis = migration_incoming_get_current(); | ||||
|     PostcopyBlocktimeContext *dc = mis->blocktime_ctx; | ||||
|     int i, affected_cpu = 0; | ||||
|     bool vcpu_total_blocktime = false; | ||||
|     uint32_t read_vcpu_time, low_time_offset; | ||||
| 
 | ||||
|     if (!dc) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     low_time_offset = get_low_time_offset(dc); | ||||
|     /* lookup cpu, to clear it,
 | ||||
|      * that algorithm looks straighforward, but it's not | ||||
|      * optimal, more optimal algorithm is keeping tree or hash | ||||
|      * where key is address value is a list of  */ | ||||
|     for (i = 0; i < smp_cpus; i++) { | ||||
|         uint32_t vcpu_blocktime = 0; | ||||
| 
 | ||||
|         read_vcpu_time = atomic_fetch_add(&dc->page_fault_vcpu_time[i], 0); | ||||
|         if (atomic_fetch_add(&dc->vcpu_addr[i], 0) != addr || | ||||
|             read_vcpu_time == 0) { | ||||
|             continue; | ||||
|         } | ||||
|         atomic_xchg(&dc->vcpu_addr[i], 0); | ||||
|         vcpu_blocktime = low_time_offset - read_vcpu_time; | ||||
|         affected_cpu += 1; | ||||
|         /* we need to know is that mark_postcopy_end was due to
 | ||||
|          * faulted page, another possible case it's prefetched | ||||
|          * page and in that case we shouldn't be here */ | ||||
|         if (!vcpu_total_blocktime && | ||||
|             atomic_fetch_add(&dc->smp_cpus_down, 0) == smp_cpus) { | ||||
|             vcpu_total_blocktime = true; | ||||
|         } | ||||
|         /* continue cycle, due to one page could affect several vCPUs */ | ||||
|         dc->vcpu_blocktime[i] += vcpu_blocktime; | ||||
|     } | ||||
| 
 | ||||
|     atomic_sub(&dc->smp_cpus_down, affected_cpu); | ||||
|     if (vcpu_total_blocktime) { | ||||
|         dc->total_blocktime += low_time_offset - atomic_fetch_add( | ||||
|                 &dc->last_begin, 0); | ||||
|     } | ||||
|     trace_mark_postcopy_blocktime_end(addr, dc, dc->total_blocktime, | ||||
|                                       affected_cpu); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Handle faults detected by the USERFAULT markings | ||||
|  */ | ||||
| @ -681,7 +936,12 @@ static void *postcopy_ram_fault_thread(void *opaque) | ||||
|             rb_offset &= ~(qemu_ram_pagesize(rb) - 1); | ||||
|             trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address, | ||||
|                                                 qemu_ram_get_idstr(rb), | ||||
|                                                 rb_offset); | ||||
|                                                 rb_offset, | ||||
|                                                 msg.arg.pagefault.feat.ptid); | ||||
|             mark_postcopy_blocktime_begin( | ||||
|                     (uintptr_t)(msg.arg.pagefault.address), | ||||
|                                 msg.arg.pagefault.feat.ptid, rb); | ||||
| 
 | ||||
|             /*
 | ||||
|              * Send the request to the source - we want to request one | ||||
|              * of our host page sizes (which is >= TPS) | ||||
| @ -829,6 +1089,8 @@ static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr, | ||||
|     if (!ret) { | ||||
|         ramblock_recv_bitmap_set_range(rb, host_addr, | ||||
|                                        pagesize / qemu_target_page_size()); | ||||
|         mark_postcopy_blocktime_end((uintptr_t)host_addr); | ||||
| 
 | ||||
|     } | ||||
|     return ret; | ||||
| } | ||||
| @ -947,6 +1209,10 @@ void *postcopy_get_tmp_page(MigrationIncomingState *mis) | ||||
| 
 | ||||
| #else | ||||
| /* No target OS support, stubs just fail */ | ||||
| void fill_destination_postcopy_migration_info(MigrationInfo *info) | ||||
| { | ||||
| } | ||||
| 
 | ||||
| bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) | ||||
| { | ||||
|     error_report("%s: No OS support", __func__); | ||||
|  | ||||
| @ -658,8 +658,32 @@ uint64_t qemu_get_be64(QEMUFile *f) | ||||
|     return v; | ||||
| } | ||||
| 
 | ||||
| /* Compress size bytes of data start at p with specific compression
 | ||||
|  * level and store the compressed data to the buffer of f. | ||||
| /* return the size after compression, or negative value on error */ | ||||
| static int qemu_compress_data(z_stream *stream, uint8_t *dest, size_t dest_len, | ||||
|                               const uint8_t *source, size_t source_len) | ||||
| { | ||||
|     int err; | ||||
| 
 | ||||
|     err = deflateReset(stream); | ||||
|     if (err != Z_OK) { | ||||
|         return -1; | ||||
|     } | ||||
| 
 | ||||
|     stream->avail_in = source_len; | ||||
|     stream->next_in = (uint8_t *)source; | ||||
|     stream->avail_out = dest_len; | ||||
|     stream->next_out = dest; | ||||
| 
 | ||||
|     err = deflate(stream, Z_FINISH); | ||||
|     if (err != Z_STREAM_END) { | ||||
|         return -1; | ||||
|     } | ||||
| 
 | ||||
|     return stream->next_out - dest; | ||||
| } | ||||
| 
 | ||||
| /* Compress size bytes of data start at p and store the compressed
 | ||||
|  * data to the buffer of f. | ||||
|  * | ||||
|  * When f is not writable, return -1 if f has no space to save the | ||||
|  * compressed data. | ||||
| @ -667,9 +691,8 @@ uint64_t qemu_get_be64(QEMUFile *f) | ||||
|  * do fflush first, if f still has no space to save the compressed | ||||
|  * data, return -1. | ||||
|  */ | ||||
| 
 | ||||
| ssize_t qemu_put_compression_data(QEMUFile *f, const uint8_t *p, size_t size, | ||||
|                                   int level) | ||||
| ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream, | ||||
|                                   const uint8_t *p, size_t size) | ||||
| { | ||||
|     ssize_t blen = IO_BUF_SIZE - f->buf_index - sizeof(int32_t); | ||||
| 
 | ||||
| @ -683,11 +706,13 @@ ssize_t qemu_put_compression_data(QEMUFile *f, const uint8_t *p, size_t size, | ||||
|             return -1; | ||||
|         } | ||||
|     } | ||||
|     if (compress2(f->buf + f->buf_index + sizeof(int32_t), (uLongf *)&blen, | ||||
|                   (Bytef *)p, size, level) != Z_OK) { | ||||
|         error_report("Compress Failed!"); | ||||
|         return 0; | ||||
| 
 | ||||
|     blen = qemu_compress_data(stream, f->buf + f->buf_index + sizeof(int32_t), | ||||
|                               blen, p, size); | ||||
|     if (blen < 0) { | ||||
|         return -1; | ||||
|     } | ||||
| 
 | ||||
|     qemu_put_be32(f, blen); | ||||
|     if (f->ops->writev_buffer) { | ||||
|         add_to_iovec(f, f->buf + f->buf_index, blen, false); | ||||
|  | ||||
| @ -25,6 +25,8 @@ | ||||
| #ifndef MIGRATION_QEMU_FILE_H | ||||
| #define MIGRATION_QEMU_FILE_H | ||||
| 
 | ||||
| #include <zlib.h> | ||||
| 
 | ||||
| /* Read a chunk of data from a file at the given position.  The pos argument
 | ||||
|  * can be ignored if the file is only be used for streaming.  The number of | ||||
|  * bytes actually read should be returned. | ||||
| @ -132,8 +134,8 @@ bool qemu_file_is_writable(QEMUFile *f); | ||||
| 
 | ||||
| size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset); | ||||
| size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size); | ||||
| ssize_t qemu_put_compression_data(QEMUFile *f, const uint8_t *p, size_t size, | ||||
|                                   int level); | ||||
| ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream, | ||||
|                                   const uint8_t *p, size_t size); | ||||
| int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src); | ||||
| 
 | ||||
| /*
 | ||||
|  | ||||
							
								
								
									
										488
									
								
								migration/ram.c
									
									
									
									
									
								
							
							
						
						
									
										488
									
								
								migration/ram.c
									
									
									
									
									
								
							| @ -269,6 +269,10 @@ struct CompressParam { | ||||
|     QemuCond cond; | ||||
|     RAMBlock *block; | ||||
|     ram_addr_t offset; | ||||
| 
 | ||||
|     /* internally used fields */ | ||||
|     z_stream stream; | ||||
|     uint8_t *originbuf; | ||||
| }; | ||||
| typedef struct CompressParam CompressParam; | ||||
| 
 | ||||
| @ -280,6 +284,7 @@ struct DecompressParam { | ||||
|     void *des; | ||||
|     uint8_t *compbuf; | ||||
|     int len; | ||||
|     z_stream stream; | ||||
| }; | ||||
| typedef struct DecompressParam DecompressParam; | ||||
| 
 | ||||
| @ -294,13 +299,14 @@ static QemuCond comp_done_cond; | ||||
| /* The empty QEMUFileOps will be used by file in CompressParam */ | ||||
| static const QEMUFileOps empty_ops = { }; | ||||
| 
 | ||||
| static QEMUFile *decomp_file; | ||||
| static DecompressParam *decomp_param; | ||||
| static QemuThread *decompress_threads; | ||||
| static QemuMutex decomp_done_lock; | ||||
| static QemuCond decomp_done_cond; | ||||
| 
 | ||||
| static int do_compress_ram_page(QEMUFile *f, RAMBlock *block, | ||||
|                                 ram_addr_t offset); | ||||
| static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block, | ||||
|                                 ram_addr_t offset, uint8_t *source_buf); | ||||
| 
 | ||||
| static void *do_data_compress(void *opaque) | ||||
| { | ||||
| @ -316,7 +322,8 @@ static void *do_data_compress(void *opaque) | ||||
|             param->block = NULL; | ||||
|             qemu_mutex_unlock(¶m->mutex); | ||||
| 
 | ||||
|             do_compress_ram_page(param->file, block, offset); | ||||
|             do_compress_ram_page(param->file, ¶m->stream, block, offset, | ||||
|                                  param->originbuf); | ||||
| 
 | ||||
|             qemu_mutex_lock(&comp_done_lock); | ||||
|             param->done = true; | ||||
| @ -357,10 +364,20 @@ static void compress_threads_save_cleanup(void) | ||||
|     terminate_compression_threads(); | ||||
|     thread_count = migrate_compress_threads(); | ||||
|     for (i = 0; i < thread_count; i++) { | ||||
|         /*
 | ||||
|          * we use it as a indicator which shows if the thread is | ||||
|          * properly init'd or not | ||||
|          */ | ||||
|         if (!comp_param[i].file) { | ||||
|             break; | ||||
|         } | ||||
|         qemu_thread_join(compress_threads + i); | ||||
|         qemu_fclose(comp_param[i].file); | ||||
|         qemu_mutex_destroy(&comp_param[i].mutex); | ||||
|         qemu_cond_destroy(&comp_param[i].cond); | ||||
|         deflateEnd(&comp_param[i].stream); | ||||
|         g_free(comp_param[i].originbuf); | ||||
|         qemu_fclose(comp_param[i].file); | ||||
|         comp_param[i].file = NULL; | ||||
|     } | ||||
|     qemu_mutex_destroy(&comp_done_lock); | ||||
|     qemu_cond_destroy(&comp_done_cond); | ||||
| @ -370,12 +387,12 @@ static void compress_threads_save_cleanup(void) | ||||
|     comp_param = NULL; | ||||
| } | ||||
| 
 | ||||
| static void compress_threads_save_setup(void) | ||||
| static int compress_threads_save_setup(void) | ||||
| { | ||||
|     int i, thread_count; | ||||
| 
 | ||||
|     if (!migrate_use_compression()) { | ||||
|         return; | ||||
|         return 0; | ||||
|     } | ||||
|     thread_count = migrate_compress_threads(); | ||||
|     compress_threads = g_new0(QemuThread, thread_count); | ||||
| @ -383,6 +400,17 @@ static void compress_threads_save_setup(void) | ||||
|     qemu_cond_init(&comp_done_cond); | ||||
|     qemu_mutex_init(&comp_done_lock); | ||||
|     for (i = 0; i < thread_count; i++) { | ||||
|         comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE); | ||||
|         if (!comp_param[i].originbuf) { | ||||
|             goto exit; | ||||
|         } | ||||
| 
 | ||||
|         if (deflateInit(&comp_param[i].stream, | ||||
|                         migrate_compress_level()) != Z_OK) { | ||||
|             g_free(comp_param[i].originbuf); | ||||
|             goto exit; | ||||
|         } | ||||
| 
 | ||||
|         /* comp_param[i].file is just used as a dummy buffer to save data,
 | ||||
|          * set its ops to empty. | ||||
|          */ | ||||
| @ -395,6 +423,11 @@ static void compress_threads_save_setup(void) | ||||
|                            do_data_compress, comp_param + i, | ||||
|                            QEMU_THREAD_JOINABLE); | ||||
|     } | ||||
|     return 0; | ||||
| 
 | ||||
| exit: | ||||
|     compress_threads_save_cleanup(); | ||||
|     return -1; | ||||
| } | ||||
| 
 | ||||
| /* Multiple fd's */ | ||||
| @ -941,6 +974,72 @@ static void ram_release_pages(const char *rbname, uint64_t offset, int pages) | ||||
|     ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * @pages: the number of pages written by the control path, | ||||
|  *        < 0 - error | ||||
|  *        > 0 - number of pages written | ||||
|  * | ||||
|  * Return true if the pages has been saved, otherwise false is returned. | ||||
|  */ | ||||
| static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, | ||||
|                               int *pages) | ||||
| { | ||||
|     uint64_t bytes_xmit = 0; | ||||
|     int ret; | ||||
| 
 | ||||
|     *pages = -1; | ||||
|     ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE, | ||||
|                                 &bytes_xmit); | ||||
|     if (ret == RAM_SAVE_CONTROL_NOT_SUPP) { | ||||
|         return false; | ||||
|     } | ||||
| 
 | ||||
|     if (bytes_xmit) { | ||||
|         ram_counters.transferred += bytes_xmit; | ||||
|         *pages = 1; | ||||
|     } | ||||
| 
 | ||||
|     if (ret == RAM_SAVE_CONTROL_DELAYED) { | ||||
|         return true; | ||||
|     } | ||||
| 
 | ||||
|     if (bytes_xmit > 0) { | ||||
|         ram_counters.normal++; | ||||
|     } else if (bytes_xmit == 0) { | ||||
|         ram_counters.duplicate++; | ||||
|     } | ||||
| 
 | ||||
|     return true; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * directly send the page to the stream | ||||
|  * | ||||
|  * Returns the number of pages written. | ||||
|  * | ||||
|  * @rs: current RAM state | ||||
|  * @block: block that contains the page we want to send | ||||
|  * @offset: offset inside the block for the page | ||||
|  * @buf: the page to be sent | ||||
|  * @async: send to page asyncly | ||||
|  */ | ||||
| static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, | ||||
|                             uint8_t *buf, bool async) | ||||
| { | ||||
|     ram_counters.transferred += save_page_header(rs, rs->f, block, | ||||
|                                                  offset | RAM_SAVE_FLAG_PAGE); | ||||
|     if (async) { | ||||
|         qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE, | ||||
|                               migrate_release_ram() & | ||||
|                               migration_in_postcopy()); | ||||
|     } else { | ||||
|         qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE); | ||||
|     } | ||||
|     ram_counters.transferred += TARGET_PAGE_SIZE; | ||||
|     ram_counters.normal++; | ||||
|     return 1; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * ram_save_page: send the given page to the stream | ||||
|  * | ||||
| @ -957,73 +1056,31 @@ static void ram_release_pages(const char *rbname, uint64_t offset, int pages) | ||||
| static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) | ||||
| { | ||||
|     int pages = -1; | ||||
|     uint64_t bytes_xmit; | ||||
|     ram_addr_t current_addr; | ||||
|     uint8_t *p; | ||||
|     int ret; | ||||
|     bool send_async = true; | ||||
|     RAMBlock *block = pss->block; | ||||
|     ram_addr_t offset = pss->page << TARGET_PAGE_BITS; | ||||
|     ram_addr_t current_addr = block->offset + offset; | ||||
| 
 | ||||
|     p = block->host + offset; | ||||
|     trace_ram_save_page(block->idstr, (uint64_t)offset, p); | ||||
| 
 | ||||
|     /* In doubt sent page as normal */ | ||||
|     bytes_xmit = 0; | ||||
|     ret = ram_control_save_page(rs->f, block->offset, | ||||
|                            offset, TARGET_PAGE_SIZE, &bytes_xmit); | ||||
|     if (bytes_xmit) { | ||||
|         ram_counters.transferred += bytes_xmit; | ||||
|         pages = 1; | ||||
|     } | ||||
| 
 | ||||
|     XBZRLE_cache_lock(); | ||||
| 
 | ||||
|     current_addr = block->offset + offset; | ||||
| 
 | ||||
|     if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { | ||||
|         if (ret != RAM_SAVE_CONTROL_DELAYED) { | ||||
|             if (bytes_xmit > 0) { | ||||
|                 ram_counters.normal++; | ||||
|             } else if (bytes_xmit == 0) { | ||||
|                 ram_counters.duplicate++; | ||||
|             } | ||||
|         } | ||||
|     } else { | ||||
|         pages = save_zero_page(rs, block, offset); | ||||
|         if (pages > 0) { | ||||
|             /* Must let xbzrle know, otherwise a previous (now 0'd) cached
 | ||||
|              * page would be stale | ||||
|     if (!rs->ram_bulk_stage && !migration_in_postcopy() && | ||||
|         migrate_use_xbzrle()) { | ||||
|         pages = save_xbzrle_page(rs, &p, current_addr, block, | ||||
|                                  offset, last_stage); | ||||
|         if (!last_stage) { | ||||
|             /* Can't send this cached data async, since the cache page
 | ||||
|              * might get updated before it gets to the wire | ||||
|              */ | ||||
|             xbzrle_cache_zero_page(rs, current_addr); | ||||
|             ram_release_pages(block->idstr, offset, pages); | ||||
|         } else if (!rs->ram_bulk_stage && | ||||
|                    !migration_in_postcopy() && migrate_use_xbzrle()) { | ||||
|             pages = save_xbzrle_page(rs, &p, current_addr, block, | ||||
|                                      offset, last_stage); | ||||
|             if (!last_stage) { | ||||
|                 /* Can't send this cached data async, since the cache page
 | ||||
|                  * might get updated before it gets to the wire | ||||
|                  */ | ||||
|                 send_async = false; | ||||
|             } | ||||
|             send_async = false; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     /* XBZRLE overflow or normal page */ | ||||
|     if (pages == -1) { | ||||
|         ram_counters.transferred += | ||||
|             save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_PAGE); | ||||
|         if (send_async) { | ||||
|             qemu_put_buffer_async(rs->f, p, TARGET_PAGE_SIZE, | ||||
|                                   migrate_release_ram() & | ||||
|                                   migration_in_postcopy()); | ||||
|         } else { | ||||
|             qemu_put_buffer(rs->f, p, TARGET_PAGE_SIZE); | ||||
|         } | ||||
|         ram_counters.transferred += TARGET_PAGE_SIZE; | ||||
|         pages = 1; | ||||
|         ram_counters.normal++; | ||||
|         pages = save_normal_page(rs, block, offset, p, send_async); | ||||
|     } | ||||
| 
 | ||||
|     XBZRLE_cache_unlock(); | ||||
| @ -1031,8 +1088,8 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) | ||||
|     return pages; | ||||
| } | ||||
| 
 | ||||
| static int do_compress_ram_page(QEMUFile *f, RAMBlock *block, | ||||
|                                 ram_addr_t offset) | ||||
| static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block, | ||||
|                                 ram_addr_t offset, uint8_t *source_buf) | ||||
| { | ||||
|     RAMState *rs = ram_state; | ||||
|     int bytes_sent, blen; | ||||
| @ -1040,8 +1097,14 @@ static int do_compress_ram_page(QEMUFile *f, RAMBlock *block, | ||||
| 
 | ||||
|     bytes_sent = save_page_header(rs, f, block, offset | | ||||
|                                   RAM_SAVE_FLAG_COMPRESS_PAGE); | ||||
|     blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE, | ||||
|                                      migrate_compress_level()); | ||||
| 
 | ||||
|     /*
 | ||||
|      * copy it to a internal buffer to avoid it being modified by VM | ||||
|      * so that we can catch up the error during compression and | ||||
|      * decompression | ||||
|      */ | ||||
|     memcpy(source_buf, p, TARGET_PAGE_SIZE); | ||||
|     blen = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE); | ||||
|     if (blen < 0) { | ||||
|         bytes_sent = 0; | ||||
|         qemu_file_set_error(migrate_get_current()->to_dst_file, blen); | ||||
| @ -1121,83 +1184,6 @@ static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block, | ||||
|     return pages; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * ram_save_compressed_page: compress the given page and send it to the stream | ||||
|  * | ||||
|  * Returns the number of pages written. | ||||
|  * | ||||
|  * @rs: current RAM state | ||||
|  * @block: block that contains the page we want to send | ||||
|  * @offset: offset inside the block for the page | ||||
|  * @last_stage: if we are at the completion stage | ||||
|  */ | ||||
| static int ram_save_compressed_page(RAMState *rs, PageSearchStatus *pss, | ||||
|                                     bool last_stage) | ||||
| { | ||||
|     int pages = -1; | ||||
|     uint64_t bytes_xmit = 0; | ||||
|     uint8_t *p; | ||||
|     int ret, blen; | ||||
|     RAMBlock *block = pss->block; | ||||
|     ram_addr_t offset = pss->page << TARGET_PAGE_BITS; | ||||
| 
 | ||||
|     p = block->host + offset; | ||||
| 
 | ||||
|     ret = ram_control_save_page(rs->f, block->offset, | ||||
|                                 offset, TARGET_PAGE_SIZE, &bytes_xmit); | ||||
|     if (bytes_xmit) { | ||||
|         ram_counters.transferred += bytes_xmit; | ||||
|         pages = 1; | ||||
|     } | ||||
|     if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { | ||||
|         if (ret != RAM_SAVE_CONTROL_DELAYED) { | ||||
|             if (bytes_xmit > 0) { | ||||
|                 ram_counters.normal++; | ||||
|             } else if (bytes_xmit == 0) { | ||||
|                 ram_counters.duplicate++; | ||||
|             } | ||||
|         } | ||||
|     } else { | ||||
|         /* When starting the process of a new block, the first page of
 | ||||
|          * the block should be sent out before other pages in the same | ||||
|          * block, and all the pages in last block should have been sent | ||||
|          * out, keeping this order is important, because the 'cont' flag | ||||
|          * is used to avoid resending the block name. | ||||
|          */ | ||||
|         if (block != rs->last_sent_block) { | ||||
|             flush_compressed_data(rs); | ||||
|             pages = save_zero_page(rs, block, offset); | ||||
|             if (pages == -1) { | ||||
|                 /* Make sure the first page is sent out before other pages */ | ||||
|                 bytes_xmit = save_page_header(rs, rs->f, block, offset | | ||||
|                                               RAM_SAVE_FLAG_COMPRESS_PAGE); | ||||
|                 blen = qemu_put_compression_data(rs->f, p, TARGET_PAGE_SIZE, | ||||
|                                                  migrate_compress_level()); | ||||
|                 if (blen > 0) { | ||||
|                     ram_counters.transferred += bytes_xmit + blen; | ||||
|                     ram_counters.normal++; | ||||
|                     pages = 1; | ||||
|                 } else { | ||||
|                     qemu_file_set_error(rs->f, blen); | ||||
|                     error_report("compressed data failed!"); | ||||
|                 } | ||||
|             } | ||||
|             if (pages > 0) { | ||||
|                 ram_release_pages(block->idstr, offset, pages); | ||||
|             } | ||||
|         } else { | ||||
|             pages = save_zero_page(rs, block, offset); | ||||
|             if (pages == -1) { | ||||
|                 pages = compress_page_with_multi_thread(rs, block, offset); | ||||
|             } else { | ||||
|                 ram_release_pages(block->idstr, offset, pages); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     return pages; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * find_dirty_block: find the next dirty page and update any state | ||||
|  * associated with the search process. | ||||
| @ -1434,44 +1420,80 @@ err: | ||||
|     return -1; | ||||
| } | ||||
| 
 | ||||
| static bool save_page_use_compression(RAMState *rs) | ||||
| { | ||||
|     if (!migrate_use_compression()) { | ||||
|         return false; | ||||
|     } | ||||
| 
 | ||||
|     /*
 | ||||
|      * If xbzrle is on, stop using the data compression after first | ||||
|      * round of migration even if compression is enabled. In theory, | ||||
|      * xbzrle can do better than compression. | ||||
|      */ | ||||
|     if (rs->ram_bulk_stage || !migrate_use_xbzrle()) { | ||||
|         return true; | ||||
|     } | ||||
| 
 | ||||
|     return false; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * ram_save_target_page: save one target page | ||||
|  * | ||||
|  * Returns the number of pages written | ||||
|  * | ||||
|  * @rs: current RAM state | ||||
|  * @ms: current migration state | ||||
|  * @pss: data about the page we want to send | ||||
|  * @last_stage: if we are at the completion stage | ||||
|  */ | ||||
| static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss, | ||||
|                                 bool last_stage) | ||||
| { | ||||
|     int res = 0; | ||||
|     RAMBlock *block = pss->block; | ||||
|     ram_addr_t offset = pss->page << TARGET_PAGE_BITS; | ||||
|     int res; | ||||
| 
 | ||||
|     /* Check the pages is dirty and if it is send it */ | ||||
|     if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) { | ||||
|         /*
 | ||||
|          * If xbzrle is on, stop using the data compression after first | ||||
|          * round of migration even if compression is enabled. In theory, | ||||
|          * xbzrle can do better than compression. | ||||
|          */ | ||||
|         if (migrate_use_compression() && | ||||
|             (rs->ram_bulk_stage || !migrate_use_xbzrle())) { | ||||
|             res = ram_save_compressed_page(rs, pss, last_stage); | ||||
|         } else { | ||||
|             res = ram_save_page(rs, pss, last_stage); | ||||
|         } | ||||
| 
 | ||||
|         if (res < 0) { | ||||
|             return res; | ||||
|         } | ||||
|         if (pss->block->unsentmap) { | ||||
|             clear_bit(pss->page, pss->block->unsentmap); | ||||
|         } | ||||
|     if (control_save_page(rs, block, offset, &res)) { | ||||
|         return res; | ||||
|     } | ||||
| 
 | ||||
|     return res; | ||||
|     /*
 | ||||
|      * When starting the process of a new block, the first page of | ||||
|      * the block should be sent out before other pages in the same | ||||
|      * block, and all the pages in last block should have been sent | ||||
|      * out, keeping this order is important, because the 'cont' flag | ||||
|      * is used to avoid resending the block name. | ||||
|      */ | ||||
|     if (block != rs->last_sent_block && save_page_use_compression(rs)) { | ||||
|             flush_compressed_data(rs); | ||||
|     } | ||||
| 
 | ||||
|     res = save_zero_page(rs, block, offset); | ||||
|     if (res > 0) { | ||||
|         /* Must let xbzrle know, otherwise a previous (now 0'd) cached
 | ||||
|          * page would be stale | ||||
|          */ | ||||
|         if (!save_page_use_compression(rs)) { | ||||
|             XBZRLE_cache_lock(); | ||||
|             xbzrle_cache_zero_page(rs, block->offset + offset); | ||||
|             XBZRLE_cache_unlock(); | ||||
|         } | ||||
|         ram_release_pages(block->idstr, offset, res); | ||||
|         return res; | ||||
|     } | ||||
| 
 | ||||
|     /*
 | ||||
|      * Make sure the first page is sent out before other pages. | ||||
|      * | ||||
|      * we post it as normal page as compression will take much | ||||
|      * CPU resource. | ||||
|      */ | ||||
|     if (block == rs->last_sent_block && save_page_use_compression(rs)) { | ||||
|         res = compress_page_with_multi_thread(rs, block, offset); | ||||
|     } | ||||
| 
 | ||||
|     return ram_save_page(rs, pss, last_stage); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
| @ -1500,12 +1522,22 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, | ||||
|         qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS; | ||||
| 
 | ||||
|     do { | ||||
|         /* Check the pages is dirty and if it is send it */ | ||||
|         if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) { | ||||
|             pss->page++; | ||||
|             continue; | ||||
|         } | ||||
| 
 | ||||
|         tmppages = ram_save_target_page(rs, pss, last_stage); | ||||
|         if (tmppages < 0) { | ||||
|             return tmppages; | ||||
|         } | ||||
| 
 | ||||
|         pages += tmppages; | ||||
|         if (pss->block->unsentmap) { | ||||
|             clear_bit(pss->page, pss->block->unsentmap); | ||||
|         } | ||||
| 
 | ||||
|         pss->page++; | ||||
|     } while ((pss->page & (pagesize_bits - 1)) && | ||||
|              offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS)); | ||||
| @ -2214,9 +2246,14 @@ static int ram_save_setup(QEMUFile *f, void *opaque) | ||||
|     RAMState **rsp = opaque; | ||||
|     RAMBlock *block; | ||||
| 
 | ||||
|     if (compress_threads_save_setup()) { | ||||
|         return -1; | ||||
|     } | ||||
| 
 | ||||
|     /* migration has already setup the bitmap, reuse it. */ | ||||
|     if (!migration_in_colo_state()) { | ||||
|         if (ram_init_all(rsp) != 0) { | ||||
|             compress_threads_save_cleanup(); | ||||
|             return -1; | ||||
|         } | ||||
|     } | ||||
| @ -2236,7 +2273,6 @@ static int ram_save_setup(QEMUFile *f, void *opaque) | ||||
|     } | ||||
| 
 | ||||
|     rcu_read_unlock(); | ||||
|     compress_threads_save_setup(); | ||||
| 
 | ||||
|     ram_control_before_iterate(f, RAM_CONTROL_SETUP); | ||||
|     ram_control_after_iterate(f, RAM_CONTROL_SETUP); | ||||
| @ -2501,12 +2537,37 @@ void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| /* return the size after decompression, or negative value on error */ | ||||
| static int | ||||
| qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len, | ||||
|                      const uint8_t *source, size_t source_len) | ||||
| { | ||||
|     int err; | ||||
| 
 | ||||
|     err = inflateReset(stream); | ||||
|     if (err != Z_OK) { | ||||
|         return -1; | ||||
|     } | ||||
| 
 | ||||
|     stream->avail_in = source_len; | ||||
|     stream->next_in = (uint8_t *)source; | ||||
|     stream->avail_out = dest_len; | ||||
|     stream->next_out = dest; | ||||
| 
 | ||||
|     err = inflate(stream, Z_NO_FLUSH); | ||||
|     if (err != Z_STREAM_END) { | ||||
|         return -1; | ||||
|     } | ||||
| 
 | ||||
|     return stream->total_out; | ||||
| } | ||||
| 
 | ||||
| static void *do_data_decompress(void *opaque) | ||||
| { | ||||
|     DecompressParam *param = opaque; | ||||
|     unsigned long pagesize; | ||||
|     uint8_t *des; | ||||
|     int len; | ||||
|     int len, ret; | ||||
| 
 | ||||
|     qemu_mutex_lock(¶m->mutex); | ||||
|     while (!param->quit) { | ||||
| @ -2517,13 +2578,13 @@ static void *do_data_decompress(void *opaque) | ||||
|             qemu_mutex_unlock(¶m->mutex); | ||||
| 
 | ||||
|             pagesize = TARGET_PAGE_SIZE; | ||||
|             /* uncompress() will return failed in some case, especially
 | ||||
|              * when the page is dirted when doing the compression, it's | ||||
|              * not a problem because the dirty page will be retransferred | ||||
|              * and uncompress() won't break the data in other pages. | ||||
|              */ | ||||
|             uncompress((Bytef *)des, &pagesize, | ||||
|                        (const Bytef *)param->compbuf, len); | ||||
| 
 | ||||
|             ret = qemu_uncompress_data(¶m->stream, des, pagesize, | ||||
|                                        param->compbuf, len); | ||||
|             if (ret < 0) { | ||||
|                 error_report("decompress data failed"); | ||||
|                 qemu_file_set_error(decomp_file, ret); | ||||
|             } | ||||
| 
 | ||||
|             qemu_mutex_lock(&decomp_done_lock); | ||||
|             param->done = true; | ||||
| @ -2540,12 +2601,12 @@ static void *do_data_decompress(void *opaque) | ||||
|     return NULL; | ||||
| } | ||||
| 
 | ||||
| static void wait_for_decompress_done(void) | ||||
| static int wait_for_decompress_done(void) | ||||
| { | ||||
|     int idx, thread_count; | ||||
| 
 | ||||
|     if (!migrate_use_compression()) { | ||||
|         return; | ||||
|         return 0; | ||||
|     } | ||||
| 
 | ||||
|     thread_count = migrate_decompress_threads(); | ||||
| @ -2556,30 +2617,7 @@ static void wait_for_decompress_done(void) | ||||
|         } | ||||
|     } | ||||
|     qemu_mutex_unlock(&decomp_done_lock); | ||||
| } | ||||
| 
 | ||||
| static void compress_threads_load_setup(void) | ||||
| { | ||||
|     int i, thread_count; | ||||
| 
 | ||||
|     if (!migrate_use_compression()) { | ||||
|         return; | ||||
|     } | ||||
|     thread_count = migrate_decompress_threads(); | ||||
|     decompress_threads = g_new0(QemuThread, thread_count); | ||||
|     decomp_param = g_new0(DecompressParam, thread_count); | ||||
|     qemu_mutex_init(&decomp_done_lock); | ||||
|     qemu_cond_init(&decomp_done_cond); | ||||
|     for (i = 0; i < thread_count; i++) { | ||||
|         qemu_mutex_init(&decomp_param[i].mutex); | ||||
|         qemu_cond_init(&decomp_param[i].cond); | ||||
|         decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE)); | ||||
|         decomp_param[i].done = true; | ||||
|         decomp_param[i].quit = false; | ||||
|         qemu_thread_create(decompress_threads + i, "decompress", | ||||
|                            do_data_decompress, decomp_param + i, | ||||
|                            QEMU_THREAD_JOINABLE); | ||||
|     } | ||||
|     return qemu_file_get_error(decomp_file); | ||||
| } | ||||
| 
 | ||||
| static void compress_threads_load_cleanup(void) | ||||
| @ -2591,21 +2629,70 @@ static void compress_threads_load_cleanup(void) | ||||
|     } | ||||
|     thread_count = migrate_decompress_threads(); | ||||
|     for (i = 0; i < thread_count; i++) { | ||||
|         /*
 | ||||
|          * we use it as a indicator which shows if the thread is | ||||
|          * properly init'd or not | ||||
|          */ | ||||
|         if (!decomp_param[i].compbuf) { | ||||
|             break; | ||||
|         } | ||||
| 
 | ||||
|         qemu_mutex_lock(&decomp_param[i].mutex); | ||||
|         decomp_param[i].quit = true; | ||||
|         qemu_cond_signal(&decomp_param[i].cond); | ||||
|         qemu_mutex_unlock(&decomp_param[i].mutex); | ||||
|     } | ||||
|     for (i = 0; i < thread_count; i++) { | ||||
|         if (!decomp_param[i].compbuf) { | ||||
|             break; | ||||
|         } | ||||
| 
 | ||||
|         qemu_thread_join(decompress_threads + i); | ||||
|         qemu_mutex_destroy(&decomp_param[i].mutex); | ||||
|         qemu_cond_destroy(&decomp_param[i].cond); | ||||
|         inflateEnd(&decomp_param[i].stream); | ||||
|         g_free(decomp_param[i].compbuf); | ||||
|         decomp_param[i].compbuf = NULL; | ||||
|     } | ||||
|     g_free(decompress_threads); | ||||
|     g_free(decomp_param); | ||||
|     decompress_threads = NULL; | ||||
|     decomp_param = NULL; | ||||
|     decomp_file = NULL; | ||||
| } | ||||
| 
 | ||||
| static int compress_threads_load_setup(QEMUFile *f) | ||||
| { | ||||
|     int i, thread_count; | ||||
| 
 | ||||
|     if (!migrate_use_compression()) { | ||||
|         return 0; | ||||
|     } | ||||
| 
 | ||||
|     thread_count = migrate_decompress_threads(); | ||||
|     decompress_threads = g_new0(QemuThread, thread_count); | ||||
|     decomp_param = g_new0(DecompressParam, thread_count); | ||||
|     qemu_mutex_init(&decomp_done_lock); | ||||
|     qemu_cond_init(&decomp_done_cond); | ||||
|     decomp_file = f; | ||||
|     for (i = 0; i < thread_count; i++) { | ||||
|         if (inflateInit(&decomp_param[i].stream) != Z_OK) { | ||||
|             goto exit; | ||||
|         } | ||||
| 
 | ||||
|         decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE)); | ||||
|         qemu_mutex_init(&decomp_param[i].mutex); | ||||
|         qemu_cond_init(&decomp_param[i].cond); | ||||
|         decomp_param[i].done = true; | ||||
|         decomp_param[i].quit = false; | ||||
|         qemu_thread_create(decompress_threads + i, "decompress", | ||||
|                            do_data_decompress, decomp_param + i, | ||||
|                            QEMU_THREAD_JOINABLE); | ||||
|     } | ||||
|     return 0; | ||||
| exit: | ||||
|     compress_threads_load_cleanup(); | ||||
|     return -1; | ||||
| } | ||||
| 
 | ||||
| static void decompress_data_with_multi_threads(QEMUFile *f, | ||||
| @ -2647,8 +2734,11 @@ static void decompress_data_with_multi_threads(QEMUFile *f, | ||||
|  */ | ||||
| static int ram_load_setup(QEMUFile *f, void *opaque) | ||||
| { | ||||
|     if (compress_threads_load_setup(f)) { | ||||
|         return -1; | ||||
|     } | ||||
| 
 | ||||
|     xbzrle_load_setup(); | ||||
|     compress_threads_load_setup(); | ||||
|     ramblock_recv_map_init(); | ||||
|     return 0; | ||||
| } | ||||
| @ -2999,7 +3089,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     wait_for_decompress_done(); | ||||
|     ret |= wait_for_decompress_done(); | ||||
|     rcu_read_unlock(); | ||||
|     trace_ram_load_complete(ret, seq_iter); | ||||
|     return ret; | ||||
|  | ||||
| @ -115,6 +115,8 @@ process_incoming_migration_co_end(int ret, int ps) "ret=%d postcopy-state=%d" | ||||
| process_incoming_migration_co_postcopy_end_main(void) "" | ||||
| migration_set_incoming_channel(void *ioc, const char *ioctype) "ioc=%p ioctype=%s" | ||||
| migration_set_outgoing_channel(void *ioc, const char *ioctype, const char *hostname, void *err)  "ioc=%p ioctype=%s hostname=%s err=%p" | ||||
| mark_postcopy_blocktime_begin(uint64_t addr, void *dd, uint32_t time, int cpu, int received) "addr: 0x%" PRIx64 ", dd: %p, time: %u, cpu: %d, already_received: %d" | ||||
| mark_postcopy_blocktime_end(uint64_t addr, void *dd, uint32_t time, int affected_cpu) "addr: 0x%" PRIx64 ", dd: %p, time: %u, affected_cpu: %d" | ||||
| 
 | ||||
| # migration/rdma.c | ||||
| qemu_rdma_accept_incoming_migration(void) "" | ||||
| @ -193,11 +195,12 @@ postcopy_ram_fault_thread_exit(void) "" | ||||
| postcopy_ram_fault_thread_fds_core(int baseufd, int quitfd) "ufd: %d quitfd: %d" | ||||
| postcopy_ram_fault_thread_fds_extra(size_t index, const char *name, int fd) "%zd/%s: %d" | ||||
| postcopy_ram_fault_thread_quit(void) "" | ||||
| postcopy_ram_fault_thread_request(uint64_t hostaddr, const char *ramblock, size_t offset) "Request for HVA=0x%" PRIx64 " rb=%s offset=0x%zx" | ||||
| postcopy_ram_fault_thread_request(uint64_t hostaddr, const char *ramblock, size_t offset, uint32_t pid) "Request for HVA=0x%" PRIx64 " rb=%s offset=0x%zx pid=%u" | ||||
| postcopy_ram_incoming_cleanup_closeuf(void) "" | ||||
| postcopy_ram_incoming_cleanup_entry(void) "" | ||||
| postcopy_ram_incoming_cleanup_exit(void) "" | ||||
| postcopy_ram_incoming_cleanup_join(void) "" | ||||
| postcopy_ram_incoming_cleanup_blocktime(uint64_t total) "total blocktime %" PRIu64 | ||||
| postcopy_request_shared_page(const char *sharer, const char *rb, uint64_t rb_offset) "for %s in %s offset 0x%"PRIx64 | ||||
| postcopy_request_shared_page_present(const char *sharer, const char *rb, uint64_t rb_offset) "%s already %s offset 0x%"PRIx64 | ||||
| postcopy_wake_shared(uint64_t client_addr, const char *rb) "at 0x%"PRIx64" in %s" | ||||
| @ -206,6 +209,7 @@ save_xbzrle_page_skipping(void) "" | ||||
| save_xbzrle_page_overflow(void) "" | ||||
| ram_save_iterate_big_wait(uint64_t milliconds, int iterations) "big wait: %" PRIu64 " milliseconds, %d iterations" | ||||
| ram_load_complete(int ret, uint64_t seq_iter) "exit_code %d seq iteration %" PRIu64 | ||||
| get_mem_fault_cpu_index(int cpu, uint32_t pid) "cpu: %d, pid: %u" | ||||
| 
 | ||||
| # migration/exec.c | ||||
| migration_exec_outgoing(const char *cmd) "cmd=%s" | ||||
|  | ||||
| @ -155,6 +155,13 @@ | ||||
| # @error-desc: the human readable error description string, when | ||||
| #              @status is 'failed'. Clients should not attempt to parse the | ||||
| #              error strings. (Since 2.7) | ||||
| # | ||||
| # @postcopy-blocktime: total time when all vCPU were blocked during postcopy | ||||
| #           live migration (Since 2.13) | ||||
| # | ||||
| # @postcopy-vcpu-blocktime: list of the postcopy blocktime per vCPU (Since 2.13) | ||||
| # | ||||
| 
 | ||||
| # | ||||
| # Since: 0.14.0 | ||||
| ## | ||||
| @ -167,7 +174,9 @@ | ||||
|            '*downtime': 'int', | ||||
|            '*setup-time': 'int', | ||||
|            '*cpu-throttle-percentage': 'int', | ||||
|            '*error-desc': 'str'} } | ||||
|            '*error-desc': 'str', | ||||
|            '*postcopy-blocktime' : 'uint32', | ||||
|            '*postcopy-vcpu-blocktime': ['uint32']} } | ||||
| 
 | ||||
| ## | ||||
| # @query-migrate: | ||||
| @ -354,16 +363,20 @@ | ||||
| # | ||||
| # @x-multifd: Use more than one fd for migration (since 2.11) | ||||
| # | ||||
| # | ||||
| # @dirty-bitmaps: If enabled, QEMU will migrate named dirty bitmaps. | ||||
| #                 (since 2.12) | ||||
| # | ||||
| # @postcopy-blocktime: Calculate downtime for postcopy live migration | ||||
| #                     (since 2.13) | ||||
| # | ||||
| # Since: 1.2 | ||||
| ## | ||||
| { 'enum': 'MigrationCapability', | ||||
|   'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks', | ||||
|            'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram', | ||||
|            'block', 'return-path', 'pause-before-switchover', 'x-multifd', | ||||
|            'dirty-bitmaps' ] } | ||||
|            'dirty-bitmaps', 'postcopy-blocktime' ] } | ||||
| 
 | ||||
| ## | ||||
| # @MigrationCapabilityStatus: | ||||
|  | ||||
| @ -26,6 +26,7 @@ | ||||
| const unsigned start_address = 1024 * 1024; | ||||
| const unsigned end_address = 100 * 1024 * 1024; | ||||
| bool got_stop; | ||||
| static bool uffd_feature_thread_id; | ||||
| 
 | ||||
| #if defined(__linux__) | ||||
| #include <sys/syscall.h> | ||||
| @ -55,6 +56,7 @@ static bool ufd_version_check(void) | ||||
|         g_test_message("Skipping test: UFFDIO_API failed"); | ||||
|         return false; | ||||
|     } | ||||
|     uffd_feature_thread_id = api_struct.features & UFFD_FEATURE_THREAD_ID; | ||||
| 
 | ||||
|     ioctl_mask = (__u64)1 << _UFFDIO_REGISTER | | ||||
|                  (__u64)1 << _UFFDIO_UNREGISTER; | ||||
| @ -223,6 +225,16 @@ static uint64_t get_migration_pass(QTestState *who) | ||||
|     return result; | ||||
| } | ||||
| 
 | ||||
| static void read_blocktime(QTestState *who) | ||||
| { | ||||
|     QDict *rsp, *rsp_return; | ||||
| 
 | ||||
|     rsp = wait_command(who, "{ 'execute': 'query-migrate' }"); | ||||
|     rsp_return = qdict_get_qdict(rsp, "return"); | ||||
|     g_assert(qdict_haskey(rsp_return, "postcopy-blocktime")); | ||||
|     QDECREF(rsp); | ||||
| } | ||||
| 
 | ||||
| static void wait_for_migration_complete(QTestState *who) | ||||
| { | ||||
|     while (true) { | ||||
| @ -533,6 +545,7 @@ static void test_migrate(void) | ||||
| 
 | ||||
|     migrate_set_capability(from, "postcopy-ram", "true"); | ||||
|     migrate_set_capability(to, "postcopy-ram", "true"); | ||||
|     migrate_set_capability(to, "postcopy-blocktime", "true"); | ||||
| 
 | ||||
|     /* We want to pick a speed slow enough that the test completes
 | ||||
|      * quickly, but that it doesn't complete precopy even on a slow | ||||
| @ -559,6 +572,9 @@ static void test_migrate(void) | ||||
|     wait_for_serial("dest_serial"); | ||||
|     wait_for_migration_complete(from); | ||||
| 
 | ||||
|     if (uffd_feature_thread_id) { | ||||
|         read_blocktime(to); | ||||
|     } | ||||
|     g_free(uri); | ||||
| 
 | ||||
|     test_migrate_end(from, to, true); | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Peter Maydell
						Peter Maydell