Merge remote-tracking branch 'quintela/migration.next' into staging
# By Michael R. Hines (8) and others # Via Juan Quintela * quintela/migration.next: migration: add autoconvergence documentation Fix real mode guest segments dpl value in savevm Fix real mode guest migration rdma: account for the time spent in MIG_STATE_SETUP through QMP rdma: introduce MIG_STATE_NONE and change MIG_STATE_SETUP state transition rdma: allow state transitions between other states besides ACTIVE rdma: send pc.ram rdma: core logic rdma: introduce ram_handle_compressed() rdma: bugfix: ram_control_save_page() rdma: update documentation to reflect new unpin support Message-id: 1374590725-14144-1-git-send-email-quintela@redhat.com Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
This commit is contained in:
		
						commit
						f03d07d468
					
				@ -51,6 +51,7 @@ common-obj-$(CONFIG_POSIX) += os-posix.o
 | 
			
		||||
common-obj-$(CONFIG_LINUX) += fsdev/
 | 
			
		||||
 | 
			
		||||
common-obj-y += migration.o migration-tcp.o
 | 
			
		||||
common-obj-$(CONFIG_RDMA) += migration-rdma.o
 | 
			
		||||
common-obj-y += qemu-char.o #aio.o
 | 
			
		||||
common-obj-y += block-migration.o
 | 
			
		||||
common-obj-y += page_cache.o xbzrle.o
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										62
									
								
								arch_init.c
									
									
									
									
									
								
							
							
						
						
									
										62
									
								
								arch_init.c
									
									
									
									
									
								
							@ -118,6 +118,7 @@ static void check_guest_throttling(void);
 | 
			
		||||
#define RAM_SAVE_FLAG_EOS      0x10
 | 
			
		||||
#define RAM_SAVE_FLAG_CONTINUE 0x20
 | 
			
		||||
#define RAM_SAVE_FLAG_XBZRLE   0x40
 | 
			
		||||
/* 0x80 is reserved in migration.h start with 0x100 next */
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
static struct defconfig_file {
 | 
			
		||||
@ -475,6 +476,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
 | 
			
		||||
                ram_bulk_stage = false;
 | 
			
		||||
            }
 | 
			
		||||
        } else {
 | 
			
		||||
            int ret;
 | 
			
		||||
            uint8_t *p;
 | 
			
		||||
            int cont = (block == last_sent_block) ?
 | 
			
		||||
                RAM_SAVE_FLAG_CONTINUE : 0;
 | 
			
		||||
@ -483,7 +485,18 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
 | 
			
		||||
 | 
			
		||||
            /* In doubt sent page as normal */
 | 
			
		||||
            bytes_sent = -1;
 | 
			
		||||
            if (is_zero_page(p)) {
 | 
			
		||||
            ret = ram_control_save_page(f, block->offset,
 | 
			
		||||
                               offset, TARGET_PAGE_SIZE, &bytes_sent);
 | 
			
		||||
 | 
			
		||||
            if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
 | 
			
		||||
                if (ret != RAM_SAVE_CONTROL_DELAYED) {
 | 
			
		||||
                    if (bytes_sent > 0) {
 | 
			
		||||
                        acct_info.norm_pages++;
 | 
			
		||||
                    } else if (bytes_sent == 0) {
 | 
			
		||||
                        acct_info.dup_pages++;
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            } else if (is_zero_page(p)) {
 | 
			
		||||
                acct_info.dup_pages++;
 | 
			
		||||
                bytes_sent = save_block_hdr(f, block, offset, cont,
 | 
			
		||||
                                            RAM_SAVE_FLAG_COMPRESS);
 | 
			
		||||
@ -635,6 +648,10 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    qemu_mutex_unlock_ramlist();
 | 
			
		||||
 | 
			
		||||
    ram_control_before_iterate(f, RAM_CONTROL_SETUP);
 | 
			
		||||
    ram_control_after_iterate(f, RAM_CONTROL_SETUP);
 | 
			
		||||
 | 
			
		||||
    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
 | 
			
		||||
 | 
			
		||||
    return 0;
 | 
			
		||||
@ -653,6 +670,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
 | 
			
		||||
        reset_ram_globals();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ram_control_before_iterate(f, RAM_CONTROL_ROUND);
 | 
			
		||||
 | 
			
		||||
    t0 = qemu_get_clock_ns(rt_clock);
 | 
			
		||||
    i = 0;
 | 
			
		||||
    while ((ret = qemu_file_rate_limit(f)) == 0) {
 | 
			
		||||
@ -684,6 +703,12 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
 | 
			
		||||
 | 
			
		||||
    qemu_mutex_unlock_ramlist();
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
     * Must occur before EOS (or any QEMUFile operation)
 | 
			
		||||
     * because of RDMA protocol.
 | 
			
		||||
     */
 | 
			
		||||
    ram_control_after_iterate(f, RAM_CONTROL_ROUND);
 | 
			
		||||
 | 
			
		||||
    if (ret < 0) {
 | 
			
		||||
        bytes_transferred += total_sent;
 | 
			
		||||
        return ret;
 | 
			
		||||
@ -701,6 +726,8 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
 | 
			
		||||
    qemu_mutex_lock_ramlist();
 | 
			
		||||
    migration_bitmap_sync();
 | 
			
		||||
 | 
			
		||||
    ram_control_before_iterate(f, RAM_CONTROL_FINISH);
 | 
			
		||||
 | 
			
		||||
    /* try transferring iterative blocks of memory */
 | 
			
		||||
 | 
			
		||||
    /* flush all remaining blocks regardless of rate limiting */
 | 
			
		||||
@ -714,6 +741,8 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
 | 
			
		||||
        }
 | 
			
		||||
        bytes_transferred += bytes_sent;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ram_control_after_iterate(f, RAM_CONTROL_FINISH);
 | 
			
		||||
    migration_end();
 | 
			
		||||
 | 
			
		||||
    qemu_mutex_unlock_ramlist();
 | 
			
		||||
@ -808,6 +837,24 @@ static inline void *host_from_stream_offset(QEMUFile *f,
 | 
			
		||||
    return NULL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * If a page (or a whole RDMA chunk) has been
 | 
			
		||||
 * determined to be zero, then zap it.
 | 
			
		||||
 */
 | 
			
		||||
void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
 | 
			
		||||
{
 | 
			
		||||
    if (ch != 0 || !is_zero_page(host)) {
 | 
			
		||||
        memset(host, ch, size);
 | 
			
		||||
#ifndef _WIN32
 | 
			
		||||
        if (ch == 0 &&
 | 
			
		||||
            (!kvm_enabled() || kvm_has_sync_mmu()) &&
 | 
			
		||||
            getpagesize() <= TARGET_PAGE_SIZE) {
 | 
			
		||||
            qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
 | 
			
		||||
        }
 | 
			
		||||
#endif
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int ram_load(QEMUFile *f, void *opaque, int version_id)
 | 
			
		||||
{
 | 
			
		||||
    ram_addr_t addr;
 | 
			
		||||
@ -879,16 +926,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            ch = qemu_get_byte(f);
 | 
			
		||||
            if (ch != 0 || !is_zero_page(host)) {
 | 
			
		||||
                memset(host, ch, TARGET_PAGE_SIZE);
 | 
			
		||||
#ifndef _WIN32
 | 
			
		||||
                if (ch == 0 &&
 | 
			
		||||
                    (!kvm_enabled() || kvm_has_sync_mmu()) &&
 | 
			
		||||
                    getpagesize() <= TARGET_PAGE_SIZE) {
 | 
			
		||||
                    qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
 | 
			
		||||
                }
 | 
			
		||||
#endif
 | 
			
		||||
            }
 | 
			
		||||
            ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
 | 
			
		||||
        } else if (flags & RAM_SAVE_FLAG_PAGE) {
 | 
			
		||||
            void *host;
 | 
			
		||||
 | 
			
		||||
@ -908,6 +946,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
 | 
			
		||||
                ret = -EINVAL;
 | 
			
		||||
                goto done;
 | 
			
		||||
            }
 | 
			
		||||
        } else if (flags & RAM_SAVE_FLAG_HOOK) {
 | 
			
		||||
            ram_control_load_hook(f, flags);
 | 
			
		||||
        }
 | 
			
		||||
        error = qemu_file_get_error(f);
 | 
			
		||||
        if (error) {
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										40
									
								
								configure
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										40
									
								
								configure
									
									
									
									
										vendored
									
									
								
							@ -180,6 +180,7 @@ xfs=""
 | 
			
		||||
vhost_net="no"
 | 
			
		||||
vhost_scsi="no"
 | 
			
		||||
kvm="no"
 | 
			
		||||
rdma=""
 | 
			
		||||
gprof="no"
 | 
			
		||||
debug_tcg="no"
 | 
			
		||||
debug="no"
 | 
			
		||||
@ -937,6 +938,10 @@ for opt do
 | 
			
		||||
  ;;
 | 
			
		||||
  --enable-gtk) gtk="yes"
 | 
			
		||||
  ;;
 | 
			
		||||
  --enable-rdma) rdma="yes"
 | 
			
		||||
  ;;
 | 
			
		||||
  --disable-rdma) rdma="no"
 | 
			
		||||
  ;;
 | 
			
		||||
  --with-gtkabi=*) gtkabi="$optarg"
 | 
			
		||||
  ;;
 | 
			
		||||
  --enable-tpm) tpm="yes"
 | 
			
		||||
@ -1095,6 +1100,8 @@ echo "  --enable-bluez           enable bluez stack connectivity"
 | 
			
		||||
echo "  --disable-slirp          disable SLIRP userspace network connectivity"
 | 
			
		||||
echo "  --disable-kvm            disable KVM acceleration support"
 | 
			
		||||
echo "  --enable-kvm             enable KVM acceleration support"
 | 
			
		||||
echo "  --disable-rdma           disable RDMA-based migration support"
 | 
			
		||||
echo "  --enable-rdma            enable RDMA-based migration support"
 | 
			
		||||
echo "  --enable-tcg-interpreter enable TCG with bytecode interpreter (TCI)"
 | 
			
		||||
echo "  --disable-nptl           disable usermode NPTL support"
 | 
			
		||||
echo "  --enable-nptl            enable usermode NPTL support"
 | 
			
		||||
@ -1801,6 +1808,30 @@ EOF
 | 
			
		||||
  libs_softmmu="$sdl_libs $libs_softmmu"
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
##########################################
 | 
			
		||||
# RDMA needs OpenFabrics libraries
 | 
			
		||||
if test "$rdma" != "no" ; then
 | 
			
		||||
  cat > $TMPC <<EOF
 | 
			
		||||
#include <rdma/rdma_cma.h>
 | 
			
		||||
int main(void) { return 0; }
 | 
			
		||||
EOF
 | 
			
		||||
  rdma_libs="-lrdmacm -libverbs"
 | 
			
		||||
  if compile_prog "" "$rdma_libs" ; then
 | 
			
		||||
    rdma="yes"
 | 
			
		||||
    libs_softmmu="$libs_softmmu $rdma_libs"
 | 
			
		||||
  else
 | 
			
		||||
    if test "$rdma" = "yes" ; then
 | 
			
		||||
        error_exit \
 | 
			
		||||
            " OpenFabrics librdmacm/libibverbs not present." \
 | 
			
		||||
            " Your options:" \
 | 
			
		||||
            "  (1) Fast: Install infiniband packages from your distro." \
 | 
			
		||||
            "  (2) Cleanest: Install libraries from www.openfabrics.org" \
 | 
			
		||||
            "  (3) Also: Install softiwarp if you don't have RDMA hardware"
 | 
			
		||||
    fi
 | 
			
		||||
    rdma="no"
 | 
			
		||||
  fi
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
##########################################
 | 
			
		||||
# VNC TLS/WS detection
 | 
			
		||||
if test "$vnc" = "yes" -a \( "$vnc_tls" != "no" -o "$vnc_ws" != "no" \) ; then
 | 
			
		||||
@ -3558,6 +3589,7 @@ echo "Linux AIO support $linux_aio"
 | 
			
		||||
echo "ATTR/XATTR support $attr"
 | 
			
		||||
echo "Install blobs     $blobs"
 | 
			
		||||
echo "KVM support       $kvm"
 | 
			
		||||
echo "RDMA support      $rdma"
 | 
			
		||||
echo "TCG interpreter   $tcg_interpreter"
 | 
			
		||||
echo "fdt support       $fdt"
 | 
			
		||||
echo "preadv support    $preadv"
 | 
			
		||||
@ -4046,6 +4078,10 @@ if test "$trace_default" = "yes"; then
 | 
			
		||||
  echo "CONFIG_TRACE_DEFAULT=y" >> $config_host_mak
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
if test "$rdma" = "yes" ; then
 | 
			
		||||
  echo "CONFIG_RDMA=y" >> $config_host_mak
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
if test "$tcg_interpreter" = "yes"; then
 | 
			
		||||
  QEMU_INCLUDES="-I\$(SRC_PATH)/tcg/tci $QEMU_INCLUDES"
 | 
			
		||||
elif test "$ARCH" = "sparc64" ; then
 | 
			
		||||
@ -4485,6 +4521,10 @@ if [ "$pixman" = "internal" ]; then
 | 
			
		||||
  echo "config-host.h: subdir-pixman" >> $config_host_mak
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
if test "$rdma" = "yes" ; then
 | 
			
		||||
echo "CONFIG_RDMA=y" >> $config_host_mak
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
if [ "$dtc_internal" = "yes" ]; then
 | 
			
		||||
  echo "config-host.h: subdir-dtc" >> $config_host_mak
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
@ -35,7 +35,7 @@ memory tracked during each live migration iteration round cannot keep pace
 | 
			
		||||
with the rate of dirty memory produced by the workload.
 | 
			
		||||
 | 
			
		||||
RDMA currently comes in two flavors: both Ethernet based (RoCE, or RDMA
 | 
			
		||||
over Convered Ethernet) as well as Infiniband-based. This implementation of
 | 
			
		||||
over Converged Ethernet) as well as Infiniband-based. This implementation of
 | 
			
		||||
migration using RDMA is capable of using both technologies because of
 | 
			
		||||
the use of the OpenFabrics OFED software stack that abstracts out the
 | 
			
		||||
programming model irrespective of the underlying hardware.
 | 
			
		||||
@ -202,7 +202,7 @@ The maximum number of repeats is hard-coded to 4096. This is a conservative
 | 
			
		||||
limit based on the maximum size of a SEND message along with emperical
 | 
			
		||||
observations on the maximum future benefit of simultaneous page registrations.
 | 
			
		||||
 | 
			
		||||
The 'type' field has 10 different command values:
 | 
			
		||||
The 'type' field has 12 different command values:
 | 
			
		||||
     1. Unused
 | 
			
		||||
     2. Error                      (sent to the source during bad things)
 | 
			
		||||
     3. Ready                      (control-channel is available)
 | 
			
		||||
@ -213,6 +213,8 @@ The 'type' field has 10 different command values:
 | 
			
		||||
     8. Register request           (dynamic chunk registration)
 | 
			
		||||
     9. Register result            ('rkey' to be used by sender)
 | 
			
		||||
    10. Register finished          (registration for current iteration finished)
 | 
			
		||||
    11. Unregister request         (unpin previously registered memory)
 | 
			
		||||
    12. Unregister finished        (confirmation that unpin completed)
 | 
			
		||||
 | 
			
		||||
A single control message, as hinted above, can contain within the data
 | 
			
		||||
portion an array of many commands of the same type. If there is more than
 | 
			
		||||
@ -243,7 +245,7 @@ qemu_rdma_exchange_send(header, data, optional response header & data):
 | 
			
		||||
   from the receiver to tell us that the receiver
 | 
			
		||||
   is *ready* for us to transmit some new bytes.
 | 
			
		||||
2. Optionally: if we are expecting a response from the command
 | 
			
		||||
   (that we have no yet transmitted), let's post an RQ
 | 
			
		||||
   (that we have not yet transmitted), let's post an RQ
 | 
			
		||||
   work request to receive that data a few moments later.
 | 
			
		||||
3. When the READY arrives, librdmacm will
 | 
			
		||||
   unblock us and we immediately post a RQ work request
 | 
			
		||||
@ -293,8 +295,10 @@ librdmacm provides the user with a 'private data' area to be exchanged
 | 
			
		||||
at connection-setup time before any infiniband traffic is generated.
 | 
			
		||||
 | 
			
		||||
Header:
 | 
			
		||||
    * Version (protocol version validated before send/recv occurs), uint32, network byte order
 | 
			
		||||
    * Flags   (bitwise OR of each capability), uint32, network byte order
 | 
			
		||||
    * Version (protocol version validated before send/recv occurs),
 | 
			
		||||
                                               uint32, network byte order
 | 
			
		||||
    * Flags   (bitwise OR of each capability),
 | 
			
		||||
                                               uint32, network byte order
 | 
			
		||||
 | 
			
		||||
There is no data portion of this header right now, so there is
 | 
			
		||||
no length field. The maximum size of the 'private data' section
 | 
			
		||||
@ -313,7 +317,7 @@ If the version is invalid, we throw an error.
 | 
			
		||||
If the version is new, we only negotiate the capabilities that the
 | 
			
		||||
requested version is able to perform and ignore the rest.
 | 
			
		||||
 | 
			
		||||
Currently there is only *one* capability in Version #1: dynamic page registration
 | 
			
		||||
Currently there is only one capability in Version #1: dynamic page registration
 | 
			
		||||
 | 
			
		||||
Finally: Negotiation happens with the Flags field: If the primary-VM
 | 
			
		||||
sets a flag, but the destination does not support this capability, it
 | 
			
		||||
@ -413,3 +417,8 @@ TODO:
 | 
			
		||||
   the use of KSM and ballooning while using RDMA.
 | 
			
		||||
4. Also, some form of balloon-device usage tracking would also
 | 
			
		||||
   help alleviate some issues.
 | 
			
		||||
5. Move UNREGISTER requests to a separate thread.
 | 
			
		||||
6. Use LRU to provide more fine-grained direction of UNREGISTER
 | 
			
		||||
   requests for unpinning memory in an overcommitted environment.
 | 
			
		||||
7. Expose UNREGISTER support to the user by way of workload-specific
 | 
			
		||||
   hints about application behavior.
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										4
									
								
								hmp.c
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								hmp.c
									
									
									
									
									
								
							@ -164,6 +164,10 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
 | 
			
		||||
            monitor_printf(mon, "downtime: %" PRIu64 " milliseconds\n",
 | 
			
		||||
                           info->downtime);
 | 
			
		||||
        }
 | 
			
		||||
        if (info->has_setup_time) {
 | 
			
		||||
            monitor_printf(mon, "setup: %" PRIu64 " milliseconds\n",
 | 
			
		||||
                           info->setup_time);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (info->has_ram) {
 | 
			
		||||
 | 
			
		||||
@ -49,6 +49,7 @@ struct MigrationState
 | 
			
		||||
    int64_t dirty_bytes_rate;
 | 
			
		||||
    bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
 | 
			
		||||
    int64_t xbzrle_cache_size;
 | 
			
		||||
    int64_t setup_time;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
void process_incoming_migration(QEMUFile *f);
 | 
			
		||||
@ -77,6 +78,10 @@ void fd_start_incoming_migration(const char *path, Error **errp);
 | 
			
		||||
 | 
			
		||||
void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error **errp);
 | 
			
		||||
 | 
			
		||||
void rdma_start_outgoing_migration(void *opaque, const char *host_port, Error **errp);
 | 
			
		||||
 | 
			
		||||
void rdma_start_incoming_migration(const char *host_port, Error **errp);
 | 
			
		||||
 | 
			
		||||
void migrate_fd_error(MigrationState *s);
 | 
			
		||||
 | 
			
		||||
void migrate_fd_connect(MigrationState *s);
 | 
			
		||||
@ -109,6 +114,8 @@ uint64_t xbzrle_mig_pages_transferred(void);
 | 
			
		||||
uint64_t xbzrle_mig_pages_overflow(void);
 | 
			
		||||
uint64_t xbzrle_mig_pages_cache_miss(void);
 | 
			
		||||
 | 
			
		||||
void ram_handle_compressed(void *host, uint8_t ch, uint64_t size);
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * @migrate_add_blocker - prevent migration from proceeding
 | 
			
		||||
 *
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										3249
									
								
								migration-rdma.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3249
									
								
								migration-rdma.c
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										50
									
								
								migration.c
									
									
									
									
									
								
							
							
						
						
									
										50
									
								
								migration.c
									
									
									
									
									
								
							@ -36,7 +36,8 @@
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
enum {
 | 
			
		||||
    MIG_STATE_ERROR,
 | 
			
		||||
    MIG_STATE_ERROR = -1,
 | 
			
		||||
    MIG_STATE_NONE,
 | 
			
		||||
    MIG_STATE_SETUP,
 | 
			
		||||
    MIG_STATE_CANCELLED,
 | 
			
		||||
    MIG_STATE_ACTIVE,
 | 
			
		||||
@ -63,7 +64,7 @@ static NotifierList migration_state_notifiers =
 | 
			
		||||
MigrationState *migrate_get_current(void)
 | 
			
		||||
{
 | 
			
		||||
    static MigrationState current_migration = {
 | 
			
		||||
        .state = MIG_STATE_SETUP,
 | 
			
		||||
        .state = MIG_STATE_NONE,
 | 
			
		||||
        .bandwidth_limit = MAX_THROTTLE,
 | 
			
		||||
        .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE,
 | 
			
		||||
        .mbps = -1,
 | 
			
		||||
@ -78,6 +79,10 @@ void qemu_start_incoming_migration(const char *uri, Error **errp)
 | 
			
		||||
 | 
			
		||||
    if (strstart(uri, "tcp:", &p))
 | 
			
		||||
        tcp_start_incoming_migration(p, errp);
 | 
			
		||||
#ifdef CONFIG_RDMA
 | 
			
		||||
    else if (strstart(uri, "x-rdma:", &p))
 | 
			
		||||
        rdma_start_incoming_migration(p, errp);
 | 
			
		||||
#endif
 | 
			
		||||
#if !defined(WIN32)
 | 
			
		||||
    else if (strstart(uri, "exec:", &p))
 | 
			
		||||
        exec_start_incoming_migration(p, errp);
 | 
			
		||||
@ -180,9 +185,14 @@ MigrationInfo *qmp_query_migrate(Error **errp)
 | 
			
		||||
    MigrationState *s = migrate_get_current();
 | 
			
		||||
 | 
			
		||||
    switch (s->state) {
 | 
			
		||||
    case MIG_STATE_SETUP:
 | 
			
		||||
    case MIG_STATE_NONE:
 | 
			
		||||
        /* no migration has happened ever */
 | 
			
		||||
        break;
 | 
			
		||||
    case MIG_STATE_SETUP:
 | 
			
		||||
        info->has_status = true;
 | 
			
		||||
        info->status = g_strdup("setup");
 | 
			
		||||
        info->has_total_time = false;
 | 
			
		||||
        break;
 | 
			
		||||
    case MIG_STATE_ACTIVE:
 | 
			
		||||
        info->has_status = true;
 | 
			
		||||
        info->status = g_strdup("active");
 | 
			
		||||
@ -191,6 +201,8 @@ MigrationInfo *qmp_query_migrate(Error **errp)
 | 
			
		||||
            - s->total_time;
 | 
			
		||||
        info->has_expected_downtime = true;
 | 
			
		||||
        info->expected_downtime = s->expected_downtime;
 | 
			
		||||
        info->has_setup_time = true;
 | 
			
		||||
        info->setup_time = s->setup_time;
 | 
			
		||||
 | 
			
		||||
        info->has_ram = true;
 | 
			
		||||
        info->ram = g_malloc0(sizeof(*info->ram));
 | 
			
		||||
@ -222,6 +234,8 @@ MigrationInfo *qmp_query_migrate(Error **errp)
 | 
			
		||||
        info->total_time = s->total_time;
 | 
			
		||||
        info->has_downtime = true;
 | 
			
		||||
        info->downtime = s->downtime;
 | 
			
		||||
        info->has_setup_time = true;
 | 
			
		||||
        info->setup_time = s->setup_time;
 | 
			
		||||
 | 
			
		||||
        info->has_ram = true;
 | 
			
		||||
        info->ram = g_malloc0(sizeof(*info->ram));
 | 
			
		||||
@ -253,7 +267,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
 | 
			
		||||
    MigrationState *s = migrate_get_current();
 | 
			
		||||
    MigrationCapabilityStatusList *cap;
 | 
			
		||||
 | 
			
		||||
    if (s->state == MIG_STATE_ACTIVE) {
 | 
			
		||||
    if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP) {
 | 
			
		||||
        error_set(errp, QERR_MIGRATION_ACTIVE);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
@ -291,9 +305,9 @@ static void migrate_fd_cleanup(void *opaque)
 | 
			
		||||
    notifier_list_notify(&migration_state_notifiers, s);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void migrate_finish_set_state(MigrationState *s, int new_state)
 | 
			
		||||
static void migrate_set_state(MigrationState *s, int old_state, int new_state)
 | 
			
		||||
{
 | 
			
		||||
    if (atomic_cmpxchg(&s->state, MIG_STATE_ACTIVE, new_state) == new_state) {
 | 
			
		||||
    if (atomic_cmpxchg(&s->state, old_state, new_state) == new_state) {
 | 
			
		||||
        trace_migrate_set_state(new_state);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
@ -311,7 +325,7 @@ static void migrate_fd_cancel(MigrationState *s)
 | 
			
		||||
{
 | 
			
		||||
    DPRINTF("cancelling migration\n");
 | 
			
		||||
 | 
			
		||||
    migrate_finish_set_state(s, MIG_STATE_CANCELLED);
 | 
			
		||||
    migrate_set_state(s, s->state, MIG_STATE_CANCELLED);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void add_migration_state_change_notifier(Notifier *notify)
 | 
			
		||||
@ -388,7 +402,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
 | 
			
		||||
    params.blk = blk;
 | 
			
		||||
    params.shared = inc;
 | 
			
		||||
 | 
			
		||||
    if (s->state == MIG_STATE_ACTIVE) {
 | 
			
		||||
    if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP) {
 | 
			
		||||
        error_set(errp, QERR_MIGRATION_ACTIVE);
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
@ -406,6 +420,10 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
 | 
			
		||||
 | 
			
		||||
    if (strstart(uri, "tcp:", &p)) {
 | 
			
		||||
        tcp_start_outgoing_migration(s, p, &local_err);
 | 
			
		||||
#ifdef CONFIG_RDMA
 | 
			
		||||
    } else if (strstart(uri, "x-rdma:", &p)) {
 | 
			
		||||
        rdma_start_outgoing_migration(s, p, &local_err);
 | 
			
		||||
#endif
 | 
			
		||||
#if !defined(WIN32)
 | 
			
		||||
    } else if (strstart(uri, "exec:", &p)) {
 | 
			
		||||
        exec_start_outgoing_migration(s, p, &local_err);
 | 
			
		||||
@ -526,6 +544,7 @@ static void *migration_thread(void *opaque)
 | 
			
		||||
{
 | 
			
		||||
    MigrationState *s = opaque;
 | 
			
		||||
    int64_t initial_time = qemu_get_clock_ms(rt_clock);
 | 
			
		||||
    int64_t setup_start = qemu_get_clock_ms(host_clock);
 | 
			
		||||
    int64_t initial_bytes = 0;
 | 
			
		||||
    int64_t max_size = 0;
 | 
			
		||||
    int64_t start_time = initial_time;
 | 
			
		||||
@ -534,6 +553,11 @@ static void *migration_thread(void *opaque)
 | 
			
		||||
    DPRINTF("beginning savevm\n");
 | 
			
		||||
    qemu_savevm_state_begin(s->file, &s->params);
 | 
			
		||||
 | 
			
		||||
    s->setup_time = qemu_get_clock_ms(host_clock) - setup_start;
 | 
			
		||||
    migrate_set_state(s, MIG_STATE_SETUP, MIG_STATE_ACTIVE);
 | 
			
		||||
 | 
			
		||||
    DPRINTF("setup complete\n");
 | 
			
		||||
 | 
			
		||||
    while (s->state == MIG_STATE_ACTIVE) {
 | 
			
		||||
        int64_t current_time;
 | 
			
		||||
        uint64_t pending_size;
 | 
			
		||||
@ -561,19 +585,19 @@ static void *migration_thread(void *opaque)
 | 
			
		||||
                qemu_mutex_unlock_iothread();
 | 
			
		||||
 | 
			
		||||
                if (ret < 0) {
 | 
			
		||||
                    migrate_finish_set_state(s, MIG_STATE_ERROR);
 | 
			
		||||
                    migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_ERROR);
 | 
			
		||||
                    break;
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                if (!qemu_file_get_error(s->file)) {
 | 
			
		||||
                    migrate_finish_set_state(s, MIG_STATE_COMPLETED);
 | 
			
		||||
                    migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_COMPLETED);
 | 
			
		||||
                    break;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (qemu_file_get_error(s->file)) {
 | 
			
		||||
            migrate_finish_set_state(s, MIG_STATE_ERROR);
 | 
			
		||||
            migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_ERROR);
 | 
			
		||||
            break;
 | 
			
		||||
        }
 | 
			
		||||
        current_time = qemu_get_clock_ms(rt_clock);
 | 
			
		||||
@ -624,8 +648,8 @@ static void *migration_thread(void *opaque)
 | 
			
		||||
 | 
			
		||||
void migrate_fd_connect(MigrationState *s)
 | 
			
		||||
{
 | 
			
		||||
    s->state = MIG_STATE_ACTIVE;
 | 
			
		||||
    trace_migrate_set_state(MIG_STATE_ACTIVE);
 | 
			
		||||
    s->state = MIG_STATE_SETUP;
 | 
			
		||||
    trace_migrate_set_state(MIG_STATE_SETUP);
 | 
			
		||||
 | 
			
		||||
    /* This is a best 1st approximation. ns to ms */
 | 
			
		||||
    s->expected_downtime = max_downtime/1000000;
 | 
			
		||||
 | 
			
		||||
@ -578,6 +578,12 @@
 | 
			
		||||
#        expected downtime in milliseconds for the guest in last walk
 | 
			
		||||
#        of the dirty bitmap. (since 1.3)
 | 
			
		||||
#
 | 
			
		||||
# @setup-time: #optional amount of setup time in milliseconds _before_ the
 | 
			
		||||
#        iterations begin but _after_ the QMP command is issued. This is designed
 | 
			
		||||
#        to provide an accounting of any activities (such as RDMA pinning) which
 | 
			
		||||
#        may be expensive, but do not actually occur during the iterative
 | 
			
		||||
#        migration rounds themselves. (since 1.6)
 | 
			
		||||
#
 | 
			
		||||
# Since: 0.14.0
 | 
			
		||||
##
 | 
			
		||||
{ 'type': 'MigrationInfo',
 | 
			
		||||
@ -586,7 +592,8 @@
 | 
			
		||||
           '*xbzrle-cache': 'XBZRLECacheStats',
 | 
			
		||||
           '*total-time': 'int',
 | 
			
		||||
           '*expected-downtime': 'int',
 | 
			
		||||
           '*downtime': 'int'} }
 | 
			
		||||
           '*downtime': 'int',
 | 
			
		||||
           '*setup-time': 'int'} }
 | 
			
		||||
 | 
			
		||||
##
 | 
			
		||||
# @query-migrate
 | 
			
		||||
@ -619,6 +626,9 @@
 | 
			
		||||
#          to enable the capability on the source VM. The feature is disabled by
 | 
			
		||||
#          default. (since 1.6)
 | 
			
		||||
#
 | 
			
		||||
# @auto-converge: If enabled, QEMU will automatically throttle down the guest
 | 
			
		||||
#          to speed up convergence of RAM migration. (since 1.6)
 | 
			
		||||
#
 | 
			
		||||
# Since: 1.2
 | 
			
		||||
##
 | 
			
		||||
{ 'enum': 'MigrationCapability',
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										2
									
								
								savevm.c
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								savevm.c
									
									
									
									
									
								
							@ -662,7 +662,7 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
 | 
			
		||||
                                    offset, size, bytes_sent);
 | 
			
		||||
 | 
			
		||||
        if (ret != RAM_SAVE_CONTROL_DELAYED) {
 | 
			
		||||
            if (*bytes_sent > 0) {
 | 
			
		||||
            if (bytes_sent && *bytes_sent > 0) {
 | 
			
		||||
                qemu_update_position(f, *bytes_sent);
 | 
			
		||||
            } else if (ret < 0) {
 | 
			
		||||
                qemu_file_set_error(f, ret);
 | 
			
		||||
 | 
			
		||||
@ -252,6 +252,24 @@ static void cpu_pre_save(void *opaque)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    env->fpregs_format_vmstate = 0;
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
     * Real mode guest segments register DPL should be zero.
 | 
			
		||||
     * Older KVM version were setting it wrongly.
 | 
			
		||||
     * Fixing it will allow live migration to host with unrestricted guest
 | 
			
		||||
     * support (otherwise the migration will fail with invalid guest state
 | 
			
		||||
     * error).
 | 
			
		||||
     */
 | 
			
		||||
    if (!(env->cr[0] & CR0_PE_MASK) &&
 | 
			
		||||
        (env->segs[R_CS].flags >> DESC_DPL_SHIFT & 3) != 0) {
 | 
			
		||||
        env->segs[R_CS].flags &= ~(env->segs[R_CS].flags & DESC_DPL_MASK);
 | 
			
		||||
        env->segs[R_DS].flags &= ~(env->segs[R_DS].flags & DESC_DPL_MASK);
 | 
			
		||||
        env->segs[R_ES].flags &= ~(env->segs[R_ES].flags & DESC_DPL_MASK);
 | 
			
		||||
        env->segs[R_FS].flags &= ~(env->segs[R_FS].flags & DESC_DPL_MASK);
 | 
			
		||||
        env->segs[R_GS].flags &= ~(env->segs[R_GS].flags & DESC_DPL_MASK);
 | 
			
		||||
        env->segs[R_SS].flags &= ~(env->segs[R_SS].flags & DESC_DPL_MASK);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int cpu_post_load(void *opaque, int version_id)
 | 
			
		||||
@ -260,6 +278,24 @@ static int cpu_post_load(void *opaque, int version_id)
 | 
			
		||||
    CPUX86State *env = &cpu->env;
 | 
			
		||||
    int i;
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
     * Real mode guest segments register DPL should be zero.
 | 
			
		||||
     * Older KVM version were setting it wrongly.
 | 
			
		||||
     * Fixing it will allow live migration from such host that don't have
 | 
			
		||||
     * restricted guest support to a host with unrestricted guest support
 | 
			
		||||
     * (otherwise the migration will fail with invalid guest state
 | 
			
		||||
     * error).
 | 
			
		||||
     */
 | 
			
		||||
    if (!(env->cr[0] & CR0_PE_MASK) &&
 | 
			
		||||
        (env->segs[R_CS].flags >> DESC_DPL_SHIFT & 3) != 0) {
 | 
			
		||||
        env->segs[R_CS].flags &= ~(env->segs[R_CS].flags & DESC_DPL_MASK);
 | 
			
		||||
        env->segs[R_DS].flags &= ~(env->segs[R_DS].flags & DESC_DPL_MASK);
 | 
			
		||||
        env->segs[R_ES].flags &= ~(env->segs[R_ES].flags & DESC_DPL_MASK);
 | 
			
		||||
        env->segs[R_FS].flags &= ~(env->segs[R_FS].flags & DESC_DPL_MASK);
 | 
			
		||||
        env->segs[R_GS].flags &= ~(env->segs[R_GS].flags & DESC_DPL_MASK);
 | 
			
		||||
        env->segs[R_SS].flags &= ~(env->segs[R_SS].flags & DESC_DPL_MASK);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /* XXX: restore FPU round state */
 | 
			
		||||
    env->fpstt = (env->fpus_vmstate >> 11) & 7;
 | 
			
		||||
    env->fpus = env->fpus_vmstate & ~0x3800;
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user