QEMU-Nyx-fork/nyx/snapshot/devices/nyx_device_state.c
Sergej Schumilo 8a88edc2a1 auto-apply clang-format
- including vl.c & kvm-all.c
2022-10-16 23:51:13 +02:00

480 lines
14 KiB
C

#include "qemu/osdep.h"
#include <immintrin.h>
#include <stdint.h>
#include <sys/stat.h>
#include <sys/types.h>
#include "qemu/main-loop.h"
#include "qemu/rcu_queue.h"
#include "block/qapi.h"
#include "exec/ram_addr.h"
#include "migration/global_state.h"
#include "migration/migration.h"
#include "migration/qemu-file.h"
#include "migration/qjson.h"
#include "migration/register.h"
#include "migration/savevm.h"
#include "migration/vmstate.h"
#include "sysemu/block-backend.h"
#include "sysemu/cpus.h"
#include "sysemu/kvm.h"
#include "sysemu/reset.h"
#include "sysemu/runstate.h"
#include "sysemu/sysemu.h"
#include "nyx/debug.h"
#include "nyx/snapshot/devices/nyx_device_state.h"
#include "nyx/snapshot/devices/state_reallocation.h"
#include "nyx/snapshot/devices/vm_change_state_handlers.h"
#define STATE_BUFFER 0x8000000 /* up to 128MB */
extern void enable_fast_snapshot_rtc(void);
extern void enable_fast_snapshot_kvm_clock(void);
static void enable_fast_snapshot_mode(void)
{
enable_fast_snapshot_rtc();
enable_fast_snapshot_kvm_clock();
}
extern int kvm_nyx_put_tsc_value(CPUState *cs, uint64_t data);
static void set_tsc_value(nyx_device_state_t *self, bool tmp_snapshot)
{
if (self->incremental_mode) {
assert(self->tsc_value_incremental);
assert(kvm_nyx_put_tsc_value(qemu_get_cpu(0), self->tsc_value_incremental) ==
0);
} else {
assert(self->tsc_value);
assert(kvm_nyx_put_tsc_value(qemu_get_cpu(0), self->tsc_value) == 0);
}
}
static void save_tsc_value(nyx_device_state_t *self, bool incremental_mode)
{
X86CPU *cpu = X86_CPU(qemu_get_cpu(0));
CPUX86State *env = &cpu->env;
if (incremental_mode) {
self->tsc_value_incremental = env->tsc;
} else {
self->tsc_value = env->tsc;
}
}
extern int qemu_savevm_state(QEMUFile *f, Error **errp);
/* new savevm routine */
typedef struct SaveStateEntry {
QTAILQ_ENTRY(SaveStateEntry) entry;
char idstr[256];
int instance_id;
int alias_id;
int version_id;
int load_version_id;
int section_id;
int load_section_id;
SaveVMHandlers *ops;
const VMStateDescription *vmsd;
void *opaque;
void *compat;
int is_ram;
} SaveStateEntry;
typedef struct SaveState {
QTAILQ_HEAD(, SaveStateEntry) handlers;
int global_section_id;
bool skip_configuration;
uint32_t len;
const char *name;
uint32_t target_page_bits;
} SaveState;
extern SaveState savevm_state;
extern void vmstate_save(QEMUFile *f, SaveStateEntry *se, QJSON *vmdesc);
extern bool should_send_vmdesc(void);
extern bool skip_section_footers;
extern void save_section_footer(QEMUFile *f, SaveStateEntry *se);
extern void save_section_header(QEMUFile *f, SaveStateEntry *se, uint8_t section_type);
/* skip block ram */
static void fast_qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only)
{
QJSON *vmdesc;
int vmdesc_len;
SaveStateEntry *se;
int ret;
bool in_postcopy = migration_in_postcopy();
cpu_synchronize_all_states();
QTAILQ_FOREACH (se, &savevm_state.handlers, entry) {
if (strcmp(se->idstr, "ram") && strcmp(se->idstr, "block")) {
if (!se->ops || (in_postcopy && se->ops->save_live_complete_postcopy) ||
(in_postcopy && !iterable_only) ||
!se->ops->save_live_complete_precopy)
{
continue;
}
if (se->ops && se->ops->is_active) {
if (!se->ops->is_active(se->opaque)) {
continue;
}
}
save_section_header(f, se, QEMU_VM_SECTION_END);
ret = se->ops->save_live_complete_precopy(f, se->opaque);
save_section_footer(f, se);
if (ret < 0) {
qemu_file_set_error(f, ret);
return;
}
}
}
if (iterable_only) {
return;
}
vmdesc = qjson_new();
json_prop_int(vmdesc, "page_size", TARGET_PAGE_SIZE);
json_start_array(vmdesc, "devices");
QTAILQ_FOREACH (se, &savevm_state.handlers, entry) {
if (strcmp(se->idstr, "ram") && strcmp(se->idstr, "block")) {
if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
continue;
}
if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
continue;
}
json_start_object(vmdesc, NULL);
json_prop_str(vmdesc, "name", se->idstr);
json_prop_int(vmdesc, "instance_id", se->instance_id);
save_section_header(f, se, QEMU_VM_SECTION_FULL);
vmstate_save(f, se, vmdesc);
save_section_footer(f, se);
json_end_object(vmdesc);
}
}
if (!in_postcopy) {
/* Postcopy stream will still be going */
qemu_put_byte(f, QEMU_VM_EOF);
}
json_end_array(vmdesc);
qjson_finish(vmdesc);
vmdesc_len = strlen(qjson_get_str(vmdesc));
if (should_send_vmdesc()) {
qemu_put_byte(f, QEMU_VM_VMDESCRIPTION);
qemu_put_be32(f, vmdesc_len);
qemu_put_buffer(f, (uint8_t *)qjson_get_str(vmdesc), vmdesc_len);
}
qjson_destroy(vmdesc);
qemu_fflush(f);
}
static int fast_qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
{
SaveStateEntry *se;
int ret = 1;
QTAILQ_FOREACH (se, &savevm_state.handlers, entry) {
if (strcmp(se->idstr, "ram") && strcmp(se->idstr, "block")) {
if (!se->ops || !se->ops->save_live_iterate) {
continue;
}
if (se->ops && se->ops->is_active) {
if (!se->ops->is_active(se->opaque)) {
continue;
}
}
/*
* In the postcopy phase, any device that doesn't know how to
* do postcopy should have saved it's state in the _complete
* call that's already run, it might get confused if we call
* iterate afterwards.
*/
if (postcopy && !se->ops->save_live_complete_postcopy) {
continue;
}
if (qemu_file_rate_limit(f)) {
return 0;
}
save_section_header(f, se, QEMU_VM_SECTION_PART);
ret = se->ops->save_live_iterate(f, se->opaque);
save_section_footer(f, se);
if (ret < 0) {
qemu_file_set_error(f, ret);
}
if (ret <= 0) {
/* Do not proceed to the next vmstate before this one reported
completion of the current stage. This serializes the migration
and reduces the probability that a faster changing state is
synchronized over and over again. */
break;
}
}
}
return ret;
}
static void fast_qemu_savevm_state_setup(QEMUFile *f)
{
SaveStateEntry *se;
int ret;
QTAILQ_FOREACH (se, &savevm_state.handlers, entry) {
if (strcmp(se->idstr, "ram") && strcmp(se->idstr, "block")) {
if (!se->ops || !se->ops->save_setup) {
continue;
}
if (se->ops && se->ops->is_active) {
if (!se->ops->is_active(se->opaque)) {
continue;
}
}
save_section_header(f, se, QEMU_VM_SECTION_START);
ret = se->ops->save_setup(f, se->opaque);
save_section_footer(f, se);
if (ret < 0) {
qemu_file_set_error(f, ret);
break;
}
}
}
}
static int fast_qemu_savevm_state(QEMUFile *f, Error **errp)
{
qemu_savevm_state_header(f);
fast_qemu_savevm_state_setup(f);
while (qemu_file_get_error(f) == 0) {
if (fast_qemu_savevm_state_iterate(f, false) > 0) {
fast_qemu_savevm_state_complete_precopy(f, false);
break;
}
}
return 0;
}
/* QEMUFile RAM Emulation */
static ssize_t fast_savevm_writev_buffer(void *opaque,
struct iovec *iov,
int iovcnt,
int64_t pos)
{
ssize_t retval = 0;
for (uint32_t i = 0; i < iovcnt; i++) {
memcpy((void *)(((struct fast_savevm_opaque_t *)(opaque))->buf +
((struct fast_savevm_opaque_t *)(opaque))->pos),
iov[i].iov_base, iov[i].iov_len);
((struct fast_savevm_opaque_t *)(opaque))->pos += iov[i].iov_len;
retval += iov[i].iov_len;
}
return retval;
}
static int fast_savevm_fclose_save_to_buffer(void *opaque)
{
memcpy(((struct fast_savevm_opaque_t *)(opaque))->output_buffer,
((struct fast_savevm_opaque_t *)(opaque))->buf,
((struct fast_savevm_opaque_t *)(opaque))->pos);
*((struct fast_savevm_opaque_t *)(opaque))->output_buffer_size =
((struct fast_savevm_opaque_t *)(opaque))->pos;
// printf("DUMPED: %d\n", *((struct fast_savevm_opaque_t*)(opaque))->output_buffer_size);
return 0;
}
static int fast_loadvm_fclose(void *opaque)
{
return 0;
}
static ssize_t fast_loadvm_get_buffer(void *opaque, uint8_t *buf, int64_t pos, size_t size)
{
memcpy(buf, (void *)(((struct fast_savevm_opaque_t *)(opaque))->buf + pos), size);
return size;
}
static const QEMUFileOps fast_loadvm_ops = {
.get_buffer = (QEMUFileGetBufferFunc *)fast_loadvm_get_buffer,
.close = (QEMUFileCloseFunc *)fast_loadvm_fclose
};
static const QEMUFileOps fast_savevm_ops_to_buffer = {
.writev_buffer = (QEMUFileWritevBufferFunc *)fast_savevm_writev_buffer,
.close = (QEMUFileCloseFunc *)fast_savevm_fclose_save_to_buffer
};
nyx_device_state_t *nyx_device_state_init_from_snapshot(const char *snapshot_folder,
bool pre_snapshot)
{
nyx_device_state_t *self = malloc(sizeof(nyx_device_state_t));
memset(self, 0, sizeof(nyx_device_state_t));
self->state_buf = malloc(STATE_BUFFER);
self->state_buf_size = 0;
char *qemu_state_file;
assert(asprintf(&qemu_state_file, "%s/fast_snapshot.qemu_state",
snapshot_folder) != -1);
struct fast_savevm_opaque_t fast_savevm_opaque;
FILE *f;
uint8_t ret = global_state_store();
assert(!ret);
struct stat buffer;
assert(stat(qemu_state_file, &buffer) == 0);
void *state_buf2 = malloc(STATE_BUFFER);
f = fopen(qemu_state_file, "r");
assert(fread(state_buf2, buffer.st_size, 1, f) == 1);
fclose(f);
fast_savevm_opaque.buf = state_buf2;
fast_savevm_opaque.f = NULL;
fast_savevm_opaque.pos = 0;
QEMUFile *file_dump = qemu_fopen_ops(&fast_savevm_opaque, &fast_loadvm_ops);
qemu_devices_reset();
qemu_loadvm_state(file_dump);
if (!pre_snapshot) {
self->qemu_state = state_reallocation_new(file_dump);
}
free(state_buf2);
if (!pre_snapshot) {
enable_fast_snapshot_mode();
save_tsc_value(self, false);
}
return self;
}
/*
* This is where QemuFile is created for later fast_snapshot creation
* we use fast_qemu_savevm_state() to create a regular snapshot to QEMUFile
* backed by RAM. state_reallocation_new() then uses this file to build an
* optimized sequence of snapshot restore operations.
*/
nyx_device_state_t *nyx_device_state_init(void)
{
nyx_device_state_t *self = malloc(sizeof(nyx_device_state_t));
memset(self, 0, sizeof(nyx_device_state_t));
self->state_buf = malloc(STATE_BUFFER);
self->state_buf_size = 0;
Error *local_err = NULL;
struct fast_savevm_opaque_t fast_savevm_opaque, fast_loadvm_opaque;
void *tmp_buf = malloc(1024 * 1024 * 16);
fast_savevm_opaque.output_buffer = self->state_buf;
fast_savevm_opaque.output_buffer_size = &self->state_buf_size;
fast_savevm_opaque.buf = tmp_buf;
fast_savevm_opaque.f = NULL;
fast_savevm_opaque.pos = 0;
uint8_t ret = global_state_store();
assert(!ret);
QEMUFile *f = qemu_fopen_ops(&fast_savevm_opaque, &fast_savevm_ops_to_buffer);
ret = fast_qemu_savevm_state(f, &local_err);
fast_loadvm_opaque.buf = tmp_buf;
fast_loadvm_opaque.f = NULL;
fast_loadvm_opaque.pos = 0;
QEMUFile *file_dump = qemu_fopen_ops(&fast_loadvm_opaque, &fast_loadvm_ops);
self->qemu_state = state_reallocation_new(file_dump);
qemu_fclose(file_dump);
qemu_fclose(f);
free(tmp_buf);
enable_fast_snapshot_mode();
save_tsc_value(self, false);
return self;
}
void nyx_device_state_switch_incremental(nyx_device_state_t *self)
{
self->incremental_mode = true;
fdl_fast_create_tmp(self->qemu_state);
fdl_fast_enable_tmp(self->qemu_state);
}
void nyx_device_state_disable_incremental(nyx_device_state_t *self)
{
fdl_fast_disable_tmp(self->qemu_state);
self->incremental_mode = false;
}
void nyx_device_state_restore(nyx_device_state_t *self)
{
fdl_fast_reload(self->qemu_state);
call_fast_change_handlers();
}
void nyx_device_state_post_restore(nyx_device_state_t *self)
{
set_tsc_value(self, self->incremental_mode);
}
void nyx_device_state_save_tsc(nyx_device_state_t *self)
{
save_tsc_value(self, false);
}
void nyx_device_state_save_tsc_incremental(nyx_device_state_t *self)
{
save_tsc_value(self, true);
}
void nyx_device_state_serialize(nyx_device_state_t *self, const char *snapshot_folder)
{
char *tmp;
assert(asprintf(&tmp, "%s/fast_snapshot.qemu_state", snapshot_folder) != -1);
FILE *f_qemu_state = fopen(tmp, "w+b");
assert(fwrite(self->state_buf, 1, self->state_buf_size, f_qemu_state) ==
self->state_buf_size);
fclose(f_qemu_state);
}