Use mmap_lock in user-mode to protect TCG state and the page descriptors.
In !user-mode, each vCPU has its own TCG state, so no locks needed.
Per-page locks are used to protect the page descriptors.
Per-TB locks are used in both modes to protect TB jumps.
Some notes:
- tb_lock is removed from notdirty_mem_write by passing a
locked page_collection to tb_invalidate_phys_page_fast.
- tcg_tb_lookup/remove/insert/etc have their own internal lock(s),
so there is no need to further serialize access to them.
- do_tb_flush is run in a safe async context, meaning no other
vCPU threads are running. Therefore acquiring mmap_lock there
is just to please tools such as thread sanitizer.
- Not visible in the diff, but tb_invalidate_phys_page already
has an assert_memory_lock.
- cpu_io_recompile is !user-only, so no mmap_lock there.
- Added mmap_unlock()'s before all siglongjmp's that could
be called in user-mode while mmap_lock is held.
+ Added an assert for !have_mmap_lock() after returning from
the longjmp in cpu_exec, just like we do in cpu_exec_step_atomic.
Performance numbers before/after:
Host: AMD Opteron(tm) Processor 6376
ubuntu 17.04 ppc64 bootup+shutdown time
700 +-+--+----+------+------------+-----------+------------*--+-+
| + + + + + *B |
| before ***B*** ** * |
|tb lock removal ###D### *** |
600 +-+ *** +-+
| ** # |
| *B* #D |
| *** * ## |
500 +-+ *** ### +-+
| * *** ### |
| *B* # ## |
| ** * #D# |
400 +-+ ** ## +-+
| ** ### |
| ** ## |
| ** # ## |
300 +-+ * B* #D# +-+
| B *** ### |
| * ** #### |
| * *** ### |
200 +-+ B *B #D# +-+
| #B* * ## # |
| #* ## |
| + D##D# + + + + |
100 +-+--+----+------+------------+-----------+------------+--+-+
1 8 16 Guest CPUs 48 64
png: https://imgur.com/HwmBHXe
debian jessie aarch64 bootup+shutdown time
90 +-+--+-----+-----+------------+------------+------------+--+-+
| + + + + + + |
| before ***B*** B |
80 +tb lock removal ###D### **D +-+
| **### |
| **## |
70 +-+ ** # +-+
| ** ## |
| ** # |
60 +-+ *B ## +-+
| ** ## |
| *** #D |
50 +-+ *** ## +-+
| * ** ### |
| **B* ### |
40 +-+ **** # ## +-+
| **** #D# |
| ***B** ### |
30 +-+ B***B** #### +-+
| B * * # ### |
| B ###D# |
20 +-+ D ##D## +-+
| D# |
| + + + + + + |
10 +-+--+-----+-----+------------+------------+------------+--+-+
1 8 16 Guest CPUs 48 64
png: https://imgur.com/iGpGFtv
The gains are high for 4-8 CPUs. Beyond that point, however, unrelated
lock contention significantly hurts scalability.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
131 lines
4.1 KiB
C
131 lines
4.1 KiB
C
#ifndef CPU_COMMON_H
|
|
#define CPU_COMMON_H
|
|
|
|
/* CPU interfaces that are target independent. */
|
|
|
|
#ifndef CONFIG_USER_ONLY
|
|
#include "exec/hwaddr.h"
|
|
#endif
|
|
|
|
#include "qemu/bswap.h"
|
|
#include "qemu/queue.h"
|
|
#include "qemu/fprintf-fn.h"
|
|
|
|
/**
|
|
* CPUListState:
|
|
* @cpu_fprintf: Print function.
|
|
* @file: File to print to using @cpu_fprint.
|
|
*
|
|
* State commonly used for iterating over CPU models.
|
|
*/
|
|
typedef struct CPUListState {
|
|
fprintf_function cpu_fprintf;
|
|
FILE *file;
|
|
} CPUListState;
|
|
|
|
/* The CPU list lock nests outside page_(un)lock or mmap_(un)lock */
|
|
void qemu_init_cpu_list(void);
|
|
void cpu_list_lock(void);
|
|
void cpu_list_unlock(void);
|
|
|
|
void tcg_flush_softmmu_tlb(CPUState *cs);
|
|
|
|
#if !defined(CONFIG_USER_ONLY)
|
|
|
|
enum device_endian {
|
|
DEVICE_NATIVE_ENDIAN,
|
|
DEVICE_BIG_ENDIAN,
|
|
DEVICE_LITTLE_ENDIAN,
|
|
};
|
|
|
|
#if defined(HOST_WORDS_BIGENDIAN)
|
|
#define DEVICE_HOST_ENDIAN DEVICE_BIG_ENDIAN
|
|
#else
|
|
#define DEVICE_HOST_ENDIAN DEVICE_LITTLE_ENDIAN
|
|
#endif
|
|
|
|
/* address in the RAM (different from a physical address) */
|
|
#if defined(CONFIG_XEN_BACKEND)
|
|
typedef uint64_t ram_addr_t;
|
|
# define RAM_ADDR_MAX UINT64_MAX
|
|
# define RAM_ADDR_FMT "%" PRIx64
|
|
#else
|
|
typedef uintptr_t ram_addr_t;
|
|
# define RAM_ADDR_MAX UINTPTR_MAX
|
|
# define RAM_ADDR_FMT "%" PRIxPTR
|
|
#endif
|
|
|
|
extern ram_addr_t ram_size;
|
|
|
|
/* memory API */
|
|
|
|
typedef void CPUWriteMemoryFunc(void *opaque, hwaddr addr, uint32_t value);
|
|
typedef uint32_t CPUReadMemoryFunc(void *opaque, hwaddr addr);
|
|
|
|
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
|
|
/* This should not be used by devices. */
|
|
ram_addr_t qemu_ram_addr_from_host(void *ptr);
|
|
RAMBlock *qemu_ram_block_by_name(const char *name);
|
|
RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
|
|
ram_addr_t *offset);
|
|
ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host);
|
|
void qemu_ram_set_idstr(RAMBlock *block, const char *name, DeviceState *dev);
|
|
void qemu_ram_unset_idstr(RAMBlock *block);
|
|
const char *qemu_ram_get_idstr(RAMBlock *rb);
|
|
bool qemu_ram_is_shared(RAMBlock *rb);
|
|
bool qemu_ram_is_uf_zeroable(RAMBlock *rb);
|
|
void qemu_ram_set_uf_zeroable(RAMBlock *rb);
|
|
bool qemu_ram_is_migratable(RAMBlock *rb);
|
|
void qemu_ram_set_migratable(RAMBlock *rb);
|
|
void qemu_ram_unset_migratable(RAMBlock *rb);
|
|
|
|
size_t qemu_ram_pagesize(RAMBlock *block);
|
|
size_t qemu_ram_pagesize_largest(void);
|
|
|
|
void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
|
|
int len, int is_write);
|
|
static inline void cpu_physical_memory_read(hwaddr addr,
|
|
void *buf, int len)
|
|
{
|
|
cpu_physical_memory_rw(addr, buf, len, 0);
|
|
}
|
|
static inline void cpu_physical_memory_write(hwaddr addr,
|
|
const void *buf, int len)
|
|
{
|
|
cpu_physical_memory_rw(addr, (void *)buf, len, 1);
|
|
}
|
|
void *cpu_physical_memory_map(hwaddr addr,
|
|
hwaddr *plen,
|
|
int is_write);
|
|
void cpu_physical_memory_unmap(void *buffer, hwaddr len,
|
|
int is_write, hwaddr access_len);
|
|
void cpu_register_map_client(QEMUBH *bh);
|
|
void cpu_unregister_map_client(QEMUBH *bh);
|
|
|
|
bool cpu_physical_memory_is_io(hwaddr phys_addr);
|
|
|
|
/* Coalesced MMIO regions are areas where write operations can be reordered.
|
|
* This usually implies that write operations are side-effect free. This allows
|
|
* batching which can make a major impact on performance when using
|
|
* virtualization.
|
|
*/
|
|
void qemu_flush_coalesced_mmio_buffer(void);
|
|
|
|
void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
|
|
const uint8_t *buf, int len);
|
|
void cpu_flush_icache_range(hwaddr start, int len);
|
|
|
|
extern struct MemoryRegion io_mem_rom;
|
|
extern struct MemoryRegion io_mem_notdirty;
|
|
|
|
typedef int (RAMBlockIterFunc)(const char *block_name, void *host_addr,
|
|
ram_addr_t offset, ram_addr_t length, void *opaque);
|
|
|
|
int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
|
|
int qemu_ram_foreach_migratable_block(RAMBlockIterFunc func, void *opaque);
|
|
int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
|
|
|
|
#endif
|
|
|
|
#endif /* CPU_COMMON_H */
|