Merge remote-tracking branch 'quintela/migration.next' into staging
# By Michael R. Hines (8) and others # Via Juan Quintela * quintela/migration.next: migration: add autoconvergence documentation Fix real mode guest segments dpl value in savevm Fix real mode guest migration rdma: account for the time spent in MIG_STATE_SETUP through QMP rdma: introduce MIG_STATE_NONE and change MIG_STATE_SETUP state transition rdma: allow state transitions between other states besides ACTIVE rdma: send pc.ram rdma: core logic rdma: introduce ram_handle_compressed() rdma: bugfix: ram_control_save_page() rdma: update documentation to reflect new unpin support Message-id: 1374590725-14144-1-git-send-email-quintela@redhat.com Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
This commit is contained in:
		
						commit
						f03d07d468
					
				@ -51,6 +51,7 @@ common-obj-$(CONFIG_POSIX) += os-posix.o
 | 
				
			|||||||
common-obj-$(CONFIG_LINUX) += fsdev/
 | 
					common-obj-$(CONFIG_LINUX) += fsdev/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
common-obj-y += migration.o migration-tcp.o
 | 
					common-obj-y += migration.o migration-tcp.o
 | 
				
			||||||
 | 
					common-obj-$(CONFIG_RDMA) += migration-rdma.o
 | 
				
			||||||
common-obj-y += qemu-char.o #aio.o
 | 
					common-obj-y += qemu-char.o #aio.o
 | 
				
			||||||
common-obj-y += block-migration.o
 | 
					common-obj-y += block-migration.o
 | 
				
			||||||
common-obj-y += page_cache.o xbzrle.o
 | 
					common-obj-y += page_cache.o xbzrle.o
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										62
									
								
								arch_init.c
									
									
									
									
									
								
							
							
						
						
									
										62
									
								
								arch_init.c
									
									
									
									
									
								
							@ -118,6 +118,7 @@ static void check_guest_throttling(void);
 | 
				
			|||||||
#define RAM_SAVE_FLAG_EOS      0x10
 | 
					#define RAM_SAVE_FLAG_EOS      0x10
 | 
				
			||||||
#define RAM_SAVE_FLAG_CONTINUE 0x20
 | 
					#define RAM_SAVE_FLAG_CONTINUE 0x20
 | 
				
			||||||
#define RAM_SAVE_FLAG_XBZRLE   0x40
 | 
					#define RAM_SAVE_FLAG_XBZRLE   0x40
 | 
				
			||||||
 | 
					/* 0x80 is reserved in migration.h start with 0x100 next */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static struct defconfig_file {
 | 
					static struct defconfig_file {
 | 
				
			||||||
@ -475,6 +476,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
 | 
				
			|||||||
                ram_bulk_stage = false;
 | 
					                ram_bulk_stage = false;
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
 | 
					            int ret;
 | 
				
			||||||
            uint8_t *p;
 | 
					            uint8_t *p;
 | 
				
			||||||
            int cont = (block == last_sent_block) ?
 | 
					            int cont = (block == last_sent_block) ?
 | 
				
			||||||
                RAM_SAVE_FLAG_CONTINUE : 0;
 | 
					                RAM_SAVE_FLAG_CONTINUE : 0;
 | 
				
			||||||
@ -483,7 +485,18 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
            /* In doubt sent page as normal */
 | 
					            /* In doubt sent page as normal */
 | 
				
			||||||
            bytes_sent = -1;
 | 
					            bytes_sent = -1;
 | 
				
			||||||
            if (is_zero_page(p)) {
 | 
					            ret = ram_control_save_page(f, block->offset,
 | 
				
			||||||
 | 
					                               offset, TARGET_PAGE_SIZE, &bytes_sent);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
 | 
				
			||||||
 | 
					                if (ret != RAM_SAVE_CONTROL_DELAYED) {
 | 
				
			||||||
 | 
					                    if (bytes_sent > 0) {
 | 
				
			||||||
 | 
					                        acct_info.norm_pages++;
 | 
				
			||||||
 | 
					                    } else if (bytes_sent == 0) {
 | 
				
			||||||
 | 
					                        acct_info.dup_pages++;
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            } else if (is_zero_page(p)) {
 | 
				
			||||||
                acct_info.dup_pages++;
 | 
					                acct_info.dup_pages++;
 | 
				
			||||||
                bytes_sent = save_block_hdr(f, block, offset, cont,
 | 
					                bytes_sent = save_block_hdr(f, block, offset, cont,
 | 
				
			||||||
                                            RAM_SAVE_FLAG_COMPRESS);
 | 
					                                            RAM_SAVE_FLAG_COMPRESS);
 | 
				
			||||||
@ -635,6 +648,10 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    qemu_mutex_unlock_ramlist();
 | 
					    qemu_mutex_unlock_ramlist();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ram_control_before_iterate(f, RAM_CONTROL_SETUP);
 | 
				
			||||||
 | 
					    ram_control_after_iterate(f, RAM_CONTROL_SETUP);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
 | 
					    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return 0;
 | 
					    return 0;
 | 
				
			||||||
@ -653,6 +670,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
 | 
				
			|||||||
        reset_ram_globals();
 | 
					        reset_ram_globals();
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ram_control_before_iterate(f, RAM_CONTROL_ROUND);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    t0 = qemu_get_clock_ns(rt_clock);
 | 
					    t0 = qemu_get_clock_ns(rt_clock);
 | 
				
			||||||
    i = 0;
 | 
					    i = 0;
 | 
				
			||||||
    while ((ret = qemu_file_rate_limit(f)) == 0) {
 | 
					    while ((ret = qemu_file_rate_limit(f)) == 0) {
 | 
				
			||||||
@ -684,6 +703,12 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    qemu_mutex_unlock_ramlist();
 | 
					    qemu_mutex_unlock_ramlist();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /*
 | 
				
			||||||
 | 
					     * Must occur before EOS (or any QEMUFile operation)
 | 
				
			||||||
 | 
					     * because of RDMA protocol.
 | 
				
			||||||
 | 
					     */
 | 
				
			||||||
 | 
					    ram_control_after_iterate(f, RAM_CONTROL_ROUND);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (ret < 0) {
 | 
					    if (ret < 0) {
 | 
				
			||||||
        bytes_transferred += total_sent;
 | 
					        bytes_transferred += total_sent;
 | 
				
			||||||
        return ret;
 | 
					        return ret;
 | 
				
			||||||
@ -701,6 +726,8 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
 | 
				
			|||||||
    qemu_mutex_lock_ramlist();
 | 
					    qemu_mutex_lock_ramlist();
 | 
				
			||||||
    migration_bitmap_sync();
 | 
					    migration_bitmap_sync();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ram_control_before_iterate(f, RAM_CONTROL_FINISH);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /* try transferring iterative blocks of memory */
 | 
					    /* try transferring iterative blocks of memory */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /* flush all remaining blocks regardless of rate limiting */
 | 
					    /* flush all remaining blocks regardless of rate limiting */
 | 
				
			||||||
@ -714,6 +741,8 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
        bytes_transferred += bytes_sent;
 | 
					        bytes_transferred += bytes_sent;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ram_control_after_iterate(f, RAM_CONTROL_FINISH);
 | 
				
			||||||
    migration_end();
 | 
					    migration_end();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    qemu_mutex_unlock_ramlist();
 | 
					    qemu_mutex_unlock_ramlist();
 | 
				
			||||||
@ -808,6 +837,24 @@ static inline void *host_from_stream_offset(QEMUFile *f,
 | 
				
			|||||||
    return NULL;
 | 
					    return NULL;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * If a page (or a whole RDMA chunk) has been
 | 
				
			||||||
 | 
					 * determined to be zero, then zap it.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    if (ch != 0 || !is_zero_page(host)) {
 | 
				
			||||||
 | 
					        memset(host, ch, size);
 | 
				
			||||||
 | 
					#ifndef _WIN32
 | 
				
			||||||
 | 
					        if (ch == 0 &&
 | 
				
			||||||
 | 
					            (!kvm_enabled() || kvm_has_sync_mmu()) &&
 | 
				
			||||||
 | 
					            getpagesize() <= TARGET_PAGE_SIZE) {
 | 
				
			||||||
 | 
					            qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int ram_load(QEMUFile *f, void *opaque, int version_id)
 | 
					static int ram_load(QEMUFile *f, void *opaque, int version_id)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    ram_addr_t addr;
 | 
					    ram_addr_t addr;
 | 
				
			||||||
@ -879,16 +926,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
 | 
				
			|||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            ch = qemu_get_byte(f);
 | 
					            ch = qemu_get_byte(f);
 | 
				
			||||||
            if (ch != 0 || !is_zero_page(host)) {
 | 
					            ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
 | 
				
			||||||
                memset(host, ch, TARGET_PAGE_SIZE);
 | 
					 | 
				
			||||||
#ifndef _WIN32
 | 
					 | 
				
			||||||
                if (ch == 0 &&
 | 
					 | 
				
			||||||
                    (!kvm_enabled() || kvm_has_sync_mmu()) &&
 | 
					 | 
				
			||||||
                    getpagesize() <= TARGET_PAGE_SIZE) {
 | 
					 | 
				
			||||||
                    qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        } else if (flags & RAM_SAVE_FLAG_PAGE) {
 | 
					        } else if (flags & RAM_SAVE_FLAG_PAGE) {
 | 
				
			||||||
            void *host;
 | 
					            void *host;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -908,6 +946,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
 | 
				
			|||||||
                ret = -EINVAL;
 | 
					                ret = -EINVAL;
 | 
				
			||||||
                goto done;
 | 
					                goto done;
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					        } else if (flags & RAM_SAVE_FLAG_HOOK) {
 | 
				
			||||||
 | 
					            ram_control_load_hook(f, flags);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        error = qemu_file_get_error(f);
 | 
					        error = qemu_file_get_error(f);
 | 
				
			||||||
        if (error) {
 | 
					        if (error) {
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										40
									
								
								configure
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										40
									
								
								configure
									
									
									
									
										vendored
									
									
								
							@ -180,6 +180,7 @@ xfs=""
 | 
				
			|||||||
vhost_net="no"
 | 
					vhost_net="no"
 | 
				
			||||||
vhost_scsi="no"
 | 
					vhost_scsi="no"
 | 
				
			||||||
kvm="no"
 | 
					kvm="no"
 | 
				
			||||||
 | 
					rdma=""
 | 
				
			||||||
gprof="no"
 | 
					gprof="no"
 | 
				
			||||||
debug_tcg="no"
 | 
					debug_tcg="no"
 | 
				
			||||||
debug="no"
 | 
					debug="no"
 | 
				
			||||||
@ -937,6 +938,10 @@ for opt do
 | 
				
			|||||||
  ;;
 | 
					  ;;
 | 
				
			||||||
  --enable-gtk) gtk="yes"
 | 
					  --enable-gtk) gtk="yes"
 | 
				
			||||||
  ;;
 | 
					  ;;
 | 
				
			||||||
 | 
					  --enable-rdma) rdma="yes"
 | 
				
			||||||
 | 
					  ;;
 | 
				
			||||||
 | 
					  --disable-rdma) rdma="no"
 | 
				
			||||||
 | 
					  ;;
 | 
				
			||||||
  --with-gtkabi=*) gtkabi="$optarg"
 | 
					  --with-gtkabi=*) gtkabi="$optarg"
 | 
				
			||||||
  ;;
 | 
					  ;;
 | 
				
			||||||
  --enable-tpm) tpm="yes"
 | 
					  --enable-tpm) tpm="yes"
 | 
				
			||||||
@ -1095,6 +1100,8 @@ echo "  --enable-bluez           enable bluez stack connectivity"
 | 
				
			|||||||
echo "  --disable-slirp          disable SLIRP userspace network connectivity"
 | 
					echo "  --disable-slirp          disable SLIRP userspace network connectivity"
 | 
				
			||||||
echo "  --disable-kvm            disable KVM acceleration support"
 | 
					echo "  --disable-kvm            disable KVM acceleration support"
 | 
				
			||||||
echo "  --enable-kvm             enable KVM acceleration support"
 | 
					echo "  --enable-kvm             enable KVM acceleration support"
 | 
				
			||||||
 | 
					echo "  --disable-rdma           disable RDMA-based migration support"
 | 
				
			||||||
 | 
					echo "  --enable-rdma            enable RDMA-based migration support"
 | 
				
			||||||
echo "  --enable-tcg-interpreter enable TCG with bytecode interpreter (TCI)"
 | 
					echo "  --enable-tcg-interpreter enable TCG with bytecode interpreter (TCI)"
 | 
				
			||||||
echo "  --disable-nptl           disable usermode NPTL support"
 | 
					echo "  --disable-nptl           disable usermode NPTL support"
 | 
				
			||||||
echo "  --enable-nptl            enable usermode NPTL support"
 | 
					echo "  --enable-nptl            enable usermode NPTL support"
 | 
				
			||||||
@ -1801,6 +1808,30 @@ EOF
 | 
				
			|||||||
  libs_softmmu="$sdl_libs $libs_softmmu"
 | 
					  libs_softmmu="$sdl_libs $libs_softmmu"
 | 
				
			||||||
fi
 | 
					fi
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					##########################################
 | 
				
			||||||
 | 
					# RDMA needs OpenFabrics libraries
 | 
				
			||||||
 | 
					if test "$rdma" != "no" ; then
 | 
				
			||||||
 | 
					  cat > $TMPC <<EOF
 | 
				
			||||||
 | 
					#include <rdma/rdma_cma.h>
 | 
				
			||||||
 | 
					int main(void) { return 0; }
 | 
				
			||||||
 | 
					EOF
 | 
				
			||||||
 | 
					  rdma_libs="-lrdmacm -libverbs"
 | 
				
			||||||
 | 
					  if compile_prog "" "$rdma_libs" ; then
 | 
				
			||||||
 | 
					    rdma="yes"
 | 
				
			||||||
 | 
					    libs_softmmu="$libs_softmmu $rdma_libs"
 | 
				
			||||||
 | 
					  else
 | 
				
			||||||
 | 
					    if test "$rdma" = "yes" ; then
 | 
				
			||||||
 | 
					        error_exit \
 | 
				
			||||||
 | 
					            " OpenFabrics librdmacm/libibverbs not present." \
 | 
				
			||||||
 | 
					            " Your options:" \
 | 
				
			||||||
 | 
					            "  (1) Fast: Install infiniband packages from your distro." \
 | 
				
			||||||
 | 
					            "  (2) Cleanest: Install libraries from www.openfabrics.org" \
 | 
				
			||||||
 | 
					            "  (3) Also: Install softiwarp if you don't have RDMA hardware"
 | 
				
			||||||
 | 
					    fi
 | 
				
			||||||
 | 
					    rdma="no"
 | 
				
			||||||
 | 
					  fi
 | 
				
			||||||
 | 
					fi
 | 
				
			||||||
 | 
					
 | 
				
			||||||
##########################################
 | 
					##########################################
 | 
				
			||||||
# VNC TLS/WS detection
 | 
					# VNC TLS/WS detection
 | 
				
			||||||
if test "$vnc" = "yes" -a \( "$vnc_tls" != "no" -o "$vnc_ws" != "no" \) ; then
 | 
					if test "$vnc" = "yes" -a \( "$vnc_tls" != "no" -o "$vnc_ws" != "no" \) ; then
 | 
				
			||||||
@ -3558,6 +3589,7 @@ echo "Linux AIO support $linux_aio"
 | 
				
			|||||||
echo "ATTR/XATTR support $attr"
 | 
					echo "ATTR/XATTR support $attr"
 | 
				
			||||||
echo "Install blobs     $blobs"
 | 
					echo "Install blobs     $blobs"
 | 
				
			||||||
echo "KVM support       $kvm"
 | 
					echo "KVM support       $kvm"
 | 
				
			||||||
 | 
					echo "RDMA support      $rdma"
 | 
				
			||||||
echo "TCG interpreter   $tcg_interpreter"
 | 
					echo "TCG interpreter   $tcg_interpreter"
 | 
				
			||||||
echo "fdt support       $fdt"
 | 
					echo "fdt support       $fdt"
 | 
				
			||||||
echo "preadv support    $preadv"
 | 
					echo "preadv support    $preadv"
 | 
				
			||||||
@ -4046,6 +4078,10 @@ if test "$trace_default" = "yes"; then
 | 
				
			|||||||
  echo "CONFIG_TRACE_DEFAULT=y" >> $config_host_mak
 | 
					  echo "CONFIG_TRACE_DEFAULT=y" >> $config_host_mak
 | 
				
			||||||
fi
 | 
					fi
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if test "$rdma" = "yes" ; then
 | 
				
			||||||
 | 
					  echo "CONFIG_RDMA=y" >> $config_host_mak
 | 
				
			||||||
 | 
					fi
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if test "$tcg_interpreter" = "yes"; then
 | 
					if test "$tcg_interpreter" = "yes"; then
 | 
				
			||||||
  QEMU_INCLUDES="-I\$(SRC_PATH)/tcg/tci $QEMU_INCLUDES"
 | 
					  QEMU_INCLUDES="-I\$(SRC_PATH)/tcg/tci $QEMU_INCLUDES"
 | 
				
			||||||
elif test "$ARCH" = "sparc64" ; then
 | 
					elif test "$ARCH" = "sparc64" ; then
 | 
				
			||||||
@ -4485,6 +4521,10 @@ if [ "$pixman" = "internal" ]; then
 | 
				
			|||||||
  echo "config-host.h: subdir-pixman" >> $config_host_mak
 | 
					  echo "config-host.h: subdir-pixman" >> $config_host_mak
 | 
				
			||||||
fi
 | 
					fi
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if test "$rdma" = "yes" ; then
 | 
				
			||||||
 | 
					echo "CONFIG_RDMA=y" >> $config_host_mak
 | 
				
			||||||
 | 
					fi
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if [ "$dtc_internal" = "yes" ]; then
 | 
					if [ "$dtc_internal" = "yes" ]; then
 | 
				
			||||||
  echo "config-host.h: subdir-dtc" >> $config_host_mak
 | 
					  echo "config-host.h: subdir-dtc" >> $config_host_mak
 | 
				
			||||||
fi
 | 
					fi
 | 
				
			||||||
 | 
				
			|||||||
@ -35,7 +35,7 @@ memory tracked during each live migration iteration round cannot keep pace
 | 
				
			|||||||
with the rate of dirty memory produced by the workload.
 | 
					with the rate of dirty memory produced by the workload.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
RDMA currently comes in two flavors: both Ethernet based (RoCE, or RDMA
 | 
					RDMA currently comes in two flavors: both Ethernet based (RoCE, or RDMA
 | 
				
			||||||
over Convered Ethernet) as well as Infiniband-based. This implementation of
 | 
					over Converged Ethernet) as well as Infiniband-based. This implementation of
 | 
				
			||||||
migration using RDMA is capable of using both technologies because of
 | 
					migration using RDMA is capable of using both technologies because of
 | 
				
			||||||
the use of the OpenFabrics OFED software stack that abstracts out the
 | 
					the use of the OpenFabrics OFED software stack that abstracts out the
 | 
				
			||||||
programming model irrespective of the underlying hardware.
 | 
					programming model irrespective of the underlying hardware.
 | 
				
			||||||
@ -188,9 +188,9 @@ header portion and a data portion (but together are transmitted
 | 
				
			|||||||
as a single SEND message).
 | 
					as a single SEND message).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Header:
 | 
					Header:
 | 
				
			||||||
    * Length  (of the data portion, uint32, network byte order)
 | 
					    * Length               (of the data portion, uint32, network byte order)
 | 
				
			||||||
    * Type    (what command to perform, uint32, network byte order)
 | 
					    * Type                 (what command to perform, uint32, network byte order)
 | 
				
			||||||
    * Repeat  (Number of commands in data portion, same type only)
 | 
					    * Repeat               (Number of commands in data portion, same type only)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The 'Repeat' field is here to support future multiple page registrations
 | 
					The 'Repeat' field is here to support future multiple page registrations
 | 
				
			||||||
in a single message without any need to change the protocol itself
 | 
					in a single message without any need to change the protocol itself
 | 
				
			||||||
@ -202,17 +202,19 @@ The maximum number of repeats is hard-coded to 4096. This is a conservative
 | 
				
			|||||||
limit based on the maximum size of a SEND message along with emperical
 | 
					limit based on the maximum size of a SEND message along with emperical
 | 
				
			||||||
observations on the maximum future benefit of simultaneous page registrations.
 | 
					observations on the maximum future benefit of simultaneous page registrations.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The 'type' field has 10 different command values:
 | 
					The 'type' field has 12 different command values:
 | 
				
			||||||
    1. Unused
 | 
					     1. Unused
 | 
				
			||||||
    2. Error              (sent to the source during bad things)
 | 
					     2. Error                      (sent to the source during bad things)
 | 
				
			||||||
    3. Ready              (control-channel is available)
 | 
					     3. Ready                      (control-channel is available)
 | 
				
			||||||
    4. QEMU File          (for sending non-live device state)
 | 
					     4. QEMU File                  (for sending non-live device state)
 | 
				
			||||||
    5. RAM Blocks request (used right after connection setup)
 | 
					     5. RAM Blocks request         (used right after connection setup)
 | 
				
			||||||
    6. RAM Blocks result  (used right after connection setup)
 | 
					     6. RAM Blocks result          (used right after connection setup)
 | 
				
			||||||
    7. Compress page      (zap zero page and skip registration)
 | 
					     7. Compress page              (zap zero page and skip registration)
 | 
				
			||||||
    8. Register request   (dynamic chunk registration)
 | 
					     8. Register request           (dynamic chunk registration)
 | 
				
			||||||
    9. Register result    ('rkey' to be used by sender)
 | 
					     9. Register result            ('rkey' to be used by sender)
 | 
				
			||||||
    10. Register finished  (registration for current iteration finished)
 | 
					    10. Register finished          (registration for current iteration finished)
 | 
				
			||||||
 | 
					    11. Unregister request         (unpin previously registered memory)
 | 
				
			||||||
 | 
					    12. Unregister finished        (confirmation that unpin completed)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
A single control message, as hinted above, can contain within the data
 | 
					A single control message, as hinted above, can contain within the data
 | 
				
			||||||
portion an array of many commands of the same type. If there is more than
 | 
					portion an array of many commands of the same type. If there is more than
 | 
				
			||||||
@ -243,7 +245,7 @@ qemu_rdma_exchange_send(header, data, optional response header & data):
 | 
				
			|||||||
   from the receiver to tell us that the receiver
 | 
					   from the receiver to tell us that the receiver
 | 
				
			||||||
   is *ready* for us to transmit some new bytes.
 | 
					   is *ready* for us to transmit some new bytes.
 | 
				
			||||||
2. Optionally: if we are expecting a response from the command
 | 
					2. Optionally: if we are expecting a response from the command
 | 
				
			||||||
   (that we have no yet transmitted), let's post an RQ
 | 
					   (that we have not yet transmitted), let's post an RQ
 | 
				
			||||||
   work request to receive that data a few moments later.
 | 
					   work request to receive that data a few moments later.
 | 
				
			||||||
3. When the READY arrives, librdmacm will
 | 
					3. When the READY arrives, librdmacm will
 | 
				
			||||||
   unblock us and we immediately post a RQ work request
 | 
					   unblock us and we immediately post a RQ work request
 | 
				
			||||||
@ -293,8 +295,10 @@ librdmacm provides the user with a 'private data' area to be exchanged
 | 
				
			|||||||
at connection-setup time before any infiniband traffic is generated.
 | 
					at connection-setup time before any infiniband traffic is generated.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Header:
 | 
					Header:
 | 
				
			||||||
    * Version (protocol version validated before send/recv occurs), uint32, network byte order
 | 
					    * Version (protocol version validated before send/recv occurs),
 | 
				
			||||||
    * Flags   (bitwise OR of each capability), uint32, network byte order
 | 
					                                               uint32, network byte order
 | 
				
			||||||
 | 
					    * Flags   (bitwise OR of each capability),
 | 
				
			||||||
 | 
					                                               uint32, network byte order
 | 
				
			||||||
 | 
					
 | 
				
			||||||
There is no data portion of this header right now, so there is
 | 
					There is no data portion of this header right now, so there is
 | 
				
			||||||
no length field. The maximum size of the 'private data' section
 | 
					no length field. The maximum size of the 'private data' section
 | 
				
			||||||
@ -313,7 +317,7 @@ If the version is invalid, we throw an error.
 | 
				
			|||||||
If the version is new, we only negotiate the capabilities that the
 | 
					If the version is new, we only negotiate the capabilities that the
 | 
				
			||||||
requested version is able to perform and ignore the rest.
 | 
					requested version is able to perform and ignore the rest.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Currently there is only *one* capability in Version #1: dynamic page registration
 | 
					Currently there is only one capability in Version #1: dynamic page registration
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Finally: Negotiation happens with the Flags field: If the primary-VM
 | 
					Finally: Negotiation happens with the Flags field: If the primary-VM
 | 
				
			||||||
sets a flag, but the destination does not support this capability, it
 | 
					sets a flag, but the destination does not support this capability, it
 | 
				
			||||||
@ -326,8 +330,8 @@ QEMUFileRDMA Interface:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
QEMUFileRDMA introduces a couple of new functions:
 | 
					QEMUFileRDMA introduces a couple of new functions:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
1. qemu_rdma_get_buffer()  (QEMUFileOps rdma_read_ops)
 | 
					1. qemu_rdma_get_buffer()               (QEMUFileOps rdma_read_ops)
 | 
				
			||||||
2. qemu_rdma_put_buffer()  (QEMUFileOps rdma_write_ops)
 | 
					2. qemu_rdma_put_buffer()               (QEMUFileOps rdma_write_ops)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
These two functions are very short and simply use the protocol
 | 
					These two functions are very short and simply use the protocol
 | 
				
			||||||
describe above to deliver bytes without changing the upper-level
 | 
					describe above to deliver bytes without changing the upper-level
 | 
				
			||||||
@ -413,3 +417,8 @@ TODO:
 | 
				
			|||||||
   the use of KSM and ballooning while using RDMA.
 | 
					   the use of KSM and ballooning while using RDMA.
 | 
				
			||||||
4. Also, some form of balloon-device usage tracking would also
 | 
					4. Also, some form of balloon-device usage tracking would also
 | 
				
			||||||
   help alleviate some issues.
 | 
					   help alleviate some issues.
 | 
				
			||||||
 | 
					5. Move UNREGISTER requests to a separate thread.
 | 
				
			||||||
 | 
					6. Use LRU to provide more fine-grained direction of UNREGISTER
 | 
				
			||||||
 | 
					   requests for unpinning memory in an overcommitted environment.
 | 
				
			||||||
 | 
					7. Expose UNREGISTER support to the user by way of workload-specific
 | 
				
			||||||
 | 
					   hints about application behavior.
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										4
									
								
								hmp.c
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								hmp.c
									
									
									
									
									
								
							@ -164,6 +164,10 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
 | 
				
			|||||||
            monitor_printf(mon, "downtime: %" PRIu64 " milliseconds\n",
 | 
					            monitor_printf(mon, "downtime: %" PRIu64 " milliseconds\n",
 | 
				
			||||||
                           info->downtime);
 | 
					                           info->downtime);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					        if (info->has_setup_time) {
 | 
				
			||||||
 | 
					            monitor_printf(mon, "setup: %" PRIu64 " milliseconds\n",
 | 
				
			||||||
 | 
					                           info->setup_time);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (info->has_ram) {
 | 
					    if (info->has_ram) {
 | 
				
			||||||
 | 
				
			|||||||
@ -49,6 +49,7 @@ struct MigrationState
 | 
				
			|||||||
    int64_t dirty_bytes_rate;
 | 
					    int64_t dirty_bytes_rate;
 | 
				
			||||||
    bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
 | 
					    bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
 | 
				
			||||||
    int64_t xbzrle_cache_size;
 | 
					    int64_t xbzrle_cache_size;
 | 
				
			||||||
 | 
					    int64_t setup_time;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void process_incoming_migration(QEMUFile *f);
 | 
					void process_incoming_migration(QEMUFile *f);
 | 
				
			||||||
@ -77,6 +78,10 @@ void fd_start_incoming_migration(const char *path, Error **errp);
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error **errp);
 | 
					void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error **errp);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void rdma_start_outgoing_migration(void *opaque, const char *host_port, Error **errp);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void rdma_start_incoming_migration(const char *host_port, Error **errp);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void migrate_fd_error(MigrationState *s);
 | 
					void migrate_fd_error(MigrationState *s);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void migrate_fd_connect(MigrationState *s);
 | 
					void migrate_fd_connect(MigrationState *s);
 | 
				
			||||||
@ -109,6 +114,8 @@ uint64_t xbzrle_mig_pages_transferred(void);
 | 
				
			|||||||
uint64_t xbzrle_mig_pages_overflow(void);
 | 
					uint64_t xbzrle_mig_pages_overflow(void);
 | 
				
			||||||
uint64_t xbzrle_mig_pages_cache_miss(void);
 | 
					uint64_t xbzrle_mig_pages_cache_miss(void);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void ram_handle_compressed(void *host, uint8_t ch, uint64_t size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 * @migrate_add_blocker - prevent migration from proceeding
 | 
					 * @migrate_add_blocker - prevent migration from proceeding
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										3249
									
								
								migration-rdma.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3249
									
								
								migration-rdma.c
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										50
									
								
								migration.c
									
									
									
									
									
								
							
							
						
						
									
										50
									
								
								migration.c
									
									
									
									
									
								
							@ -36,7 +36,8 @@
 | 
				
			|||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
enum {
 | 
					enum {
 | 
				
			||||||
    MIG_STATE_ERROR,
 | 
					    MIG_STATE_ERROR = -1,
 | 
				
			||||||
 | 
					    MIG_STATE_NONE,
 | 
				
			||||||
    MIG_STATE_SETUP,
 | 
					    MIG_STATE_SETUP,
 | 
				
			||||||
    MIG_STATE_CANCELLED,
 | 
					    MIG_STATE_CANCELLED,
 | 
				
			||||||
    MIG_STATE_ACTIVE,
 | 
					    MIG_STATE_ACTIVE,
 | 
				
			||||||
@ -63,7 +64,7 @@ static NotifierList migration_state_notifiers =
 | 
				
			|||||||
MigrationState *migrate_get_current(void)
 | 
					MigrationState *migrate_get_current(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    static MigrationState current_migration = {
 | 
					    static MigrationState current_migration = {
 | 
				
			||||||
        .state = MIG_STATE_SETUP,
 | 
					        .state = MIG_STATE_NONE,
 | 
				
			||||||
        .bandwidth_limit = MAX_THROTTLE,
 | 
					        .bandwidth_limit = MAX_THROTTLE,
 | 
				
			||||||
        .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE,
 | 
					        .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE,
 | 
				
			||||||
        .mbps = -1,
 | 
					        .mbps = -1,
 | 
				
			||||||
@ -78,6 +79,10 @@ void qemu_start_incoming_migration(const char *uri, Error **errp)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    if (strstart(uri, "tcp:", &p))
 | 
					    if (strstart(uri, "tcp:", &p))
 | 
				
			||||||
        tcp_start_incoming_migration(p, errp);
 | 
					        tcp_start_incoming_migration(p, errp);
 | 
				
			||||||
 | 
					#ifdef CONFIG_RDMA
 | 
				
			||||||
 | 
					    else if (strstart(uri, "x-rdma:", &p))
 | 
				
			||||||
 | 
					        rdma_start_incoming_migration(p, errp);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
#if !defined(WIN32)
 | 
					#if !defined(WIN32)
 | 
				
			||||||
    else if (strstart(uri, "exec:", &p))
 | 
					    else if (strstart(uri, "exec:", &p))
 | 
				
			||||||
        exec_start_incoming_migration(p, errp);
 | 
					        exec_start_incoming_migration(p, errp);
 | 
				
			||||||
@ -180,9 +185,14 @@ MigrationInfo *qmp_query_migrate(Error **errp)
 | 
				
			|||||||
    MigrationState *s = migrate_get_current();
 | 
					    MigrationState *s = migrate_get_current();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    switch (s->state) {
 | 
					    switch (s->state) {
 | 
				
			||||||
    case MIG_STATE_SETUP:
 | 
					    case MIG_STATE_NONE:
 | 
				
			||||||
        /* no migration has happened ever */
 | 
					        /* no migration has happened ever */
 | 
				
			||||||
        break;
 | 
					        break;
 | 
				
			||||||
 | 
					    case MIG_STATE_SETUP:
 | 
				
			||||||
 | 
					        info->has_status = true;
 | 
				
			||||||
 | 
					        info->status = g_strdup("setup");
 | 
				
			||||||
 | 
					        info->has_total_time = false;
 | 
				
			||||||
 | 
					        break;
 | 
				
			||||||
    case MIG_STATE_ACTIVE:
 | 
					    case MIG_STATE_ACTIVE:
 | 
				
			||||||
        info->has_status = true;
 | 
					        info->has_status = true;
 | 
				
			||||||
        info->status = g_strdup("active");
 | 
					        info->status = g_strdup("active");
 | 
				
			||||||
@ -191,6 +201,8 @@ MigrationInfo *qmp_query_migrate(Error **errp)
 | 
				
			|||||||
            - s->total_time;
 | 
					            - s->total_time;
 | 
				
			||||||
        info->has_expected_downtime = true;
 | 
					        info->has_expected_downtime = true;
 | 
				
			||||||
        info->expected_downtime = s->expected_downtime;
 | 
					        info->expected_downtime = s->expected_downtime;
 | 
				
			||||||
 | 
					        info->has_setup_time = true;
 | 
				
			||||||
 | 
					        info->setup_time = s->setup_time;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        info->has_ram = true;
 | 
					        info->has_ram = true;
 | 
				
			||||||
        info->ram = g_malloc0(sizeof(*info->ram));
 | 
					        info->ram = g_malloc0(sizeof(*info->ram));
 | 
				
			||||||
@ -222,6 +234,8 @@ MigrationInfo *qmp_query_migrate(Error **errp)
 | 
				
			|||||||
        info->total_time = s->total_time;
 | 
					        info->total_time = s->total_time;
 | 
				
			||||||
        info->has_downtime = true;
 | 
					        info->has_downtime = true;
 | 
				
			||||||
        info->downtime = s->downtime;
 | 
					        info->downtime = s->downtime;
 | 
				
			||||||
 | 
					        info->has_setup_time = true;
 | 
				
			||||||
 | 
					        info->setup_time = s->setup_time;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        info->has_ram = true;
 | 
					        info->has_ram = true;
 | 
				
			||||||
        info->ram = g_malloc0(sizeof(*info->ram));
 | 
					        info->ram = g_malloc0(sizeof(*info->ram));
 | 
				
			||||||
@ -253,7 +267,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
 | 
				
			|||||||
    MigrationState *s = migrate_get_current();
 | 
					    MigrationState *s = migrate_get_current();
 | 
				
			||||||
    MigrationCapabilityStatusList *cap;
 | 
					    MigrationCapabilityStatusList *cap;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (s->state == MIG_STATE_ACTIVE) {
 | 
					    if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP) {
 | 
				
			||||||
        error_set(errp, QERR_MIGRATION_ACTIVE);
 | 
					        error_set(errp, QERR_MIGRATION_ACTIVE);
 | 
				
			||||||
        return;
 | 
					        return;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@ -291,9 +305,9 @@ static void migrate_fd_cleanup(void *opaque)
 | 
				
			|||||||
    notifier_list_notify(&migration_state_notifiers, s);
 | 
					    notifier_list_notify(&migration_state_notifiers, s);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void migrate_finish_set_state(MigrationState *s, int new_state)
 | 
					static void migrate_set_state(MigrationState *s, int old_state, int new_state)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    if (atomic_cmpxchg(&s->state, MIG_STATE_ACTIVE, new_state) == new_state) {
 | 
					    if (atomic_cmpxchg(&s->state, old_state, new_state) == new_state) {
 | 
				
			||||||
        trace_migrate_set_state(new_state);
 | 
					        trace_migrate_set_state(new_state);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@ -311,7 +325,7 @@ static void migrate_fd_cancel(MigrationState *s)
 | 
				
			|||||||
{
 | 
					{
 | 
				
			||||||
    DPRINTF("cancelling migration\n");
 | 
					    DPRINTF("cancelling migration\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    migrate_finish_set_state(s, MIG_STATE_CANCELLED);
 | 
					    migrate_set_state(s, s->state, MIG_STATE_CANCELLED);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void add_migration_state_change_notifier(Notifier *notify)
 | 
					void add_migration_state_change_notifier(Notifier *notify)
 | 
				
			||||||
@ -388,7 +402,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
 | 
				
			|||||||
    params.blk = blk;
 | 
					    params.blk = blk;
 | 
				
			||||||
    params.shared = inc;
 | 
					    params.shared = inc;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (s->state == MIG_STATE_ACTIVE) {
 | 
					    if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP) {
 | 
				
			||||||
        error_set(errp, QERR_MIGRATION_ACTIVE);
 | 
					        error_set(errp, QERR_MIGRATION_ACTIVE);
 | 
				
			||||||
        return;
 | 
					        return;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@ -406,6 +420,10 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    if (strstart(uri, "tcp:", &p)) {
 | 
					    if (strstart(uri, "tcp:", &p)) {
 | 
				
			||||||
        tcp_start_outgoing_migration(s, p, &local_err);
 | 
					        tcp_start_outgoing_migration(s, p, &local_err);
 | 
				
			||||||
 | 
					#ifdef CONFIG_RDMA
 | 
				
			||||||
 | 
					    } else if (strstart(uri, "x-rdma:", &p)) {
 | 
				
			||||||
 | 
					        rdma_start_outgoing_migration(s, p, &local_err);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
#if !defined(WIN32)
 | 
					#if !defined(WIN32)
 | 
				
			||||||
    } else if (strstart(uri, "exec:", &p)) {
 | 
					    } else if (strstart(uri, "exec:", &p)) {
 | 
				
			||||||
        exec_start_outgoing_migration(s, p, &local_err);
 | 
					        exec_start_outgoing_migration(s, p, &local_err);
 | 
				
			||||||
@ -526,6 +544,7 @@ static void *migration_thread(void *opaque)
 | 
				
			|||||||
{
 | 
					{
 | 
				
			||||||
    MigrationState *s = opaque;
 | 
					    MigrationState *s = opaque;
 | 
				
			||||||
    int64_t initial_time = qemu_get_clock_ms(rt_clock);
 | 
					    int64_t initial_time = qemu_get_clock_ms(rt_clock);
 | 
				
			||||||
 | 
					    int64_t setup_start = qemu_get_clock_ms(host_clock);
 | 
				
			||||||
    int64_t initial_bytes = 0;
 | 
					    int64_t initial_bytes = 0;
 | 
				
			||||||
    int64_t max_size = 0;
 | 
					    int64_t max_size = 0;
 | 
				
			||||||
    int64_t start_time = initial_time;
 | 
					    int64_t start_time = initial_time;
 | 
				
			||||||
@ -534,6 +553,11 @@ static void *migration_thread(void *opaque)
 | 
				
			|||||||
    DPRINTF("beginning savevm\n");
 | 
					    DPRINTF("beginning savevm\n");
 | 
				
			||||||
    qemu_savevm_state_begin(s->file, &s->params);
 | 
					    qemu_savevm_state_begin(s->file, &s->params);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    s->setup_time = qemu_get_clock_ms(host_clock) - setup_start;
 | 
				
			||||||
 | 
					    migrate_set_state(s, MIG_STATE_SETUP, MIG_STATE_ACTIVE);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    DPRINTF("setup complete\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    while (s->state == MIG_STATE_ACTIVE) {
 | 
					    while (s->state == MIG_STATE_ACTIVE) {
 | 
				
			||||||
        int64_t current_time;
 | 
					        int64_t current_time;
 | 
				
			||||||
        uint64_t pending_size;
 | 
					        uint64_t pending_size;
 | 
				
			||||||
@ -561,19 +585,19 @@ static void *migration_thread(void *opaque)
 | 
				
			|||||||
                qemu_mutex_unlock_iothread();
 | 
					                qemu_mutex_unlock_iothread();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                if (ret < 0) {
 | 
					                if (ret < 0) {
 | 
				
			||||||
                    migrate_finish_set_state(s, MIG_STATE_ERROR);
 | 
					                    migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_ERROR);
 | 
				
			||||||
                    break;
 | 
					                    break;
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                if (!qemu_file_get_error(s->file)) {
 | 
					                if (!qemu_file_get_error(s->file)) {
 | 
				
			||||||
                    migrate_finish_set_state(s, MIG_STATE_COMPLETED);
 | 
					                    migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_COMPLETED);
 | 
				
			||||||
                    break;
 | 
					                    break;
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if (qemu_file_get_error(s->file)) {
 | 
					        if (qemu_file_get_error(s->file)) {
 | 
				
			||||||
            migrate_finish_set_state(s, MIG_STATE_ERROR);
 | 
					            migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_ERROR);
 | 
				
			||||||
            break;
 | 
					            break;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        current_time = qemu_get_clock_ms(rt_clock);
 | 
					        current_time = qemu_get_clock_ms(rt_clock);
 | 
				
			||||||
@ -624,8 +648,8 @@ static void *migration_thread(void *opaque)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
void migrate_fd_connect(MigrationState *s)
 | 
					void migrate_fd_connect(MigrationState *s)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    s->state = MIG_STATE_ACTIVE;
 | 
					    s->state = MIG_STATE_SETUP;
 | 
				
			||||||
    trace_migrate_set_state(MIG_STATE_ACTIVE);
 | 
					    trace_migrate_set_state(MIG_STATE_SETUP);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /* This is a best 1st approximation. ns to ms */
 | 
					    /* This is a best 1st approximation. ns to ms */
 | 
				
			||||||
    s->expected_downtime = max_downtime/1000000;
 | 
					    s->expected_downtime = max_downtime/1000000;
 | 
				
			||||||
 | 
				
			|||||||
@ -578,6 +578,12 @@
 | 
				
			|||||||
#        expected downtime in milliseconds for the guest in last walk
 | 
					#        expected downtime in milliseconds for the guest in last walk
 | 
				
			||||||
#        of the dirty bitmap. (since 1.3)
 | 
					#        of the dirty bitmap. (since 1.3)
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
 | 
					# @setup-time: #optional amount of setup time in milliseconds _before_ the
 | 
				
			||||||
 | 
					#        iterations begin but _after_ the QMP command is issued. This is designed
 | 
				
			||||||
 | 
					#        to provide an accounting of any activities (such as RDMA pinning) which
 | 
				
			||||||
 | 
					#        may be expensive, but do not actually occur during the iterative
 | 
				
			||||||
 | 
					#        migration rounds themselves. (since 1.6)
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
# Since: 0.14.0
 | 
					# Since: 0.14.0
 | 
				
			||||||
##
 | 
					##
 | 
				
			||||||
{ 'type': 'MigrationInfo',
 | 
					{ 'type': 'MigrationInfo',
 | 
				
			||||||
@ -586,7 +592,8 @@
 | 
				
			|||||||
           '*xbzrle-cache': 'XBZRLECacheStats',
 | 
					           '*xbzrle-cache': 'XBZRLECacheStats',
 | 
				
			||||||
           '*total-time': 'int',
 | 
					           '*total-time': 'int',
 | 
				
			||||||
           '*expected-downtime': 'int',
 | 
					           '*expected-downtime': 'int',
 | 
				
			||||||
           '*downtime': 'int'} }
 | 
					           '*downtime': 'int',
 | 
				
			||||||
 | 
					           '*setup-time': 'int'} }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
##
 | 
					##
 | 
				
			||||||
# @query-migrate
 | 
					# @query-migrate
 | 
				
			||||||
@ -619,6 +626,9 @@
 | 
				
			|||||||
#          to enable the capability on the source VM. The feature is disabled by
 | 
					#          to enable the capability on the source VM. The feature is disabled by
 | 
				
			||||||
#          default. (since 1.6)
 | 
					#          default. (since 1.6)
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
 | 
					# @auto-converge: If enabled, QEMU will automatically throttle down the guest
 | 
				
			||||||
 | 
					#          to speed up convergence of RAM migration. (since 1.6)
 | 
				
			||||||
 | 
					#
 | 
				
			||||||
# Since: 1.2
 | 
					# Since: 1.2
 | 
				
			||||||
##
 | 
					##
 | 
				
			||||||
{ 'enum': 'MigrationCapability',
 | 
					{ 'enum': 'MigrationCapability',
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										2
									
								
								savevm.c
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								savevm.c
									
									
									
									
									
								
							@ -662,7 +662,7 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
 | 
				
			|||||||
                                    offset, size, bytes_sent);
 | 
					                                    offset, size, bytes_sent);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if (ret != RAM_SAVE_CONTROL_DELAYED) {
 | 
					        if (ret != RAM_SAVE_CONTROL_DELAYED) {
 | 
				
			||||||
            if (*bytes_sent > 0) {
 | 
					            if (bytes_sent && *bytes_sent > 0) {
 | 
				
			||||||
                qemu_update_position(f, *bytes_sent);
 | 
					                qemu_update_position(f, *bytes_sent);
 | 
				
			||||||
            } else if (ret < 0) {
 | 
					            } else if (ret < 0) {
 | 
				
			||||||
                qemu_file_set_error(f, ret);
 | 
					                qemu_file_set_error(f, ret);
 | 
				
			||||||
 | 
				
			|||||||
@ -252,6 +252,24 @@ static void cpu_pre_save(void *opaque)
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    env->fpregs_format_vmstate = 0;
 | 
					    env->fpregs_format_vmstate = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /*
 | 
				
			||||||
 | 
					     * Real mode guest segments register DPL should be zero.
 | 
				
			||||||
 | 
					     * Older KVM version were setting it wrongly.
 | 
				
			||||||
 | 
					     * Fixing it will allow live migration to host with unrestricted guest
 | 
				
			||||||
 | 
					     * support (otherwise the migration will fail with invalid guest state
 | 
				
			||||||
 | 
					     * error).
 | 
				
			||||||
 | 
					     */
 | 
				
			||||||
 | 
					    if (!(env->cr[0] & CR0_PE_MASK) &&
 | 
				
			||||||
 | 
					        (env->segs[R_CS].flags >> DESC_DPL_SHIFT & 3) != 0) {
 | 
				
			||||||
 | 
					        env->segs[R_CS].flags &= ~(env->segs[R_CS].flags & DESC_DPL_MASK);
 | 
				
			||||||
 | 
					        env->segs[R_DS].flags &= ~(env->segs[R_DS].flags & DESC_DPL_MASK);
 | 
				
			||||||
 | 
					        env->segs[R_ES].flags &= ~(env->segs[R_ES].flags & DESC_DPL_MASK);
 | 
				
			||||||
 | 
					        env->segs[R_FS].flags &= ~(env->segs[R_FS].flags & DESC_DPL_MASK);
 | 
				
			||||||
 | 
					        env->segs[R_GS].flags &= ~(env->segs[R_GS].flags & DESC_DPL_MASK);
 | 
				
			||||||
 | 
					        env->segs[R_SS].flags &= ~(env->segs[R_SS].flags & DESC_DPL_MASK);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int cpu_post_load(void *opaque, int version_id)
 | 
					static int cpu_post_load(void *opaque, int version_id)
 | 
				
			||||||
@ -260,6 +278,24 @@ static int cpu_post_load(void *opaque, int version_id)
 | 
				
			|||||||
    CPUX86State *env = &cpu->env;
 | 
					    CPUX86State *env = &cpu->env;
 | 
				
			||||||
    int i;
 | 
					    int i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /*
 | 
				
			||||||
 | 
					     * Real mode guest segments register DPL should be zero.
 | 
				
			||||||
 | 
					     * Older KVM version were setting it wrongly.
 | 
				
			||||||
 | 
					     * Fixing it will allow live migration from such host that don't have
 | 
				
			||||||
 | 
					     * restricted guest support to a host with unrestricted guest support
 | 
				
			||||||
 | 
					     * (otherwise the migration will fail with invalid guest state
 | 
				
			||||||
 | 
					     * error).
 | 
				
			||||||
 | 
					     */
 | 
				
			||||||
 | 
					    if (!(env->cr[0] & CR0_PE_MASK) &&
 | 
				
			||||||
 | 
					        (env->segs[R_CS].flags >> DESC_DPL_SHIFT & 3) != 0) {
 | 
				
			||||||
 | 
					        env->segs[R_CS].flags &= ~(env->segs[R_CS].flags & DESC_DPL_MASK);
 | 
				
			||||||
 | 
					        env->segs[R_DS].flags &= ~(env->segs[R_DS].flags & DESC_DPL_MASK);
 | 
				
			||||||
 | 
					        env->segs[R_ES].flags &= ~(env->segs[R_ES].flags & DESC_DPL_MASK);
 | 
				
			||||||
 | 
					        env->segs[R_FS].flags &= ~(env->segs[R_FS].flags & DESC_DPL_MASK);
 | 
				
			||||||
 | 
					        env->segs[R_GS].flags &= ~(env->segs[R_GS].flags & DESC_DPL_MASK);
 | 
				
			||||||
 | 
					        env->segs[R_SS].flags &= ~(env->segs[R_SS].flags & DESC_DPL_MASK);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /* XXX: restore FPU round state */
 | 
					    /* XXX: restore FPU round state */
 | 
				
			||||||
    env->fpstt = (env->fpus_vmstate >> 11) & 7;
 | 
					    env->fpstt = (env->fpus_vmstate >> 11) & 7;
 | 
				
			||||||
    env->fpus = env->fpus_vmstate & ~0x3800;
 | 
					    env->fpus = env->fpus_vmstate & ~0x3800;
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user