Use mmap_lock in user-mode to protect TCG state and the page descriptors.
In !user-mode, each vCPU has its own TCG state, so no locks needed.
Per-page locks are used to protect the page descriptors.
Per-TB locks are used in both modes to protect TB jumps.
Some notes:
- tb_lock is removed from notdirty_mem_write by passing a
locked page_collection to tb_invalidate_phys_page_fast.
- tcg_tb_lookup/remove/insert/etc have their own internal lock(s),
so there is no need to further serialize access to them.
- do_tb_flush is run in a safe async context, meaning no other
vCPU threads are running. Therefore acquiring mmap_lock there
is just to please tools such as thread sanitizer.
- Not visible in the diff, but tb_invalidate_phys_page already
has an assert_memory_lock.
- cpu_io_recompile is !user-only, so no mmap_lock there.
- Added mmap_unlock()'s before all siglongjmp's that could
be called in user-mode while mmap_lock is held.
+ Added an assert for !have_mmap_lock() after returning from
the longjmp in cpu_exec, just like we do in cpu_exec_step_atomic.
Performance numbers before/after:
Host: AMD Opteron(tm) Processor 6376
ubuntu 17.04 ppc64 bootup+shutdown time
700 +-+--+----+------+------------+-----------+------------*--+-+
| + + + + + *B |
| before ***B*** ** * |
|tb lock removal ###D### *** |
600 +-+ *** +-+
| ** # |
| *B* #D |
| *** * ## |
500 +-+ *** ### +-+
| * *** ### |
| *B* # ## |
| ** * #D# |
400 +-+ ** ## +-+
| ** ### |
| ** ## |
| ** # ## |
300 +-+ * B* #D# +-+
| B *** ### |
| * ** #### |
| * *** ### |
200 +-+ B *B #D# +-+
| #B* * ## # |
| #* ## |
| + D##D# + + + + |
100 +-+--+----+------+------------+-----------+------------+--+-+
1 8 16 Guest CPUs 48 64
png: https://imgur.com/HwmBHXe
debian jessie aarch64 bootup+shutdown time
90 +-+--+-----+-----+------------+------------+------------+--+-+
| + + + + + + |
| before ***B*** B |
80 +tb lock removal ###D### **D +-+
| **### |
| **## |
70 +-+ ** # +-+
| ** ## |
| ** # |
60 +-+ *B ## +-+
| ** ## |
| *** #D |
50 +-+ *** ## +-+
| * ** ### |
| **B* ### |
40 +-+ **** # ## +-+
| **** #D# |
| ***B** ### |
30 +-+ B***B** #### +-+
| B * * # ### |
| B ###D# |
20 +-+ D ##D## +-+
| D# |
| + + + + + + |
10 +-+--+-----+-----+------------+------------+------------+--+-+
1 8 16 Guest CPUs 48 64
png: https://imgur.com/iGpGFtv
The gains are high for 4-8 CPUs. Beyond that point, however, unrelated
lock contention significantly hurts scalability.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
118 lines
4.0 KiB
C
118 lines
4.0 KiB
C
/*
|
|
* Declarations for functions which are internal to the memory subsystem.
|
|
*
|
|
* Copyright 2011 Red Hat, Inc. and/or its affiliates
|
|
*
|
|
* Authors:
|
|
* Avi Kivity <avi@redhat.com>
|
|
*
|
|
* This work is licensed under the terms of the GNU GPL, version 2 or
|
|
* later. See the COPYING file in the top-level directory.
|
|
*
|
|
*/
|
|
|
|
/*
|
|
* This header is for use by exec.c, memory.c and accel/tcg/cputlb.c ONLY,
|
|
* for declarations which are shared between the memory subsystem's
|
|
* internals and the TCG TLB code. Do not include it from elsewhere.
|
|
*/
|
|
|
|
#ifndef MEMORY_INTERNAL_H
|
|
#define MEMORY_INTERNAL_H
|
|
|
|
#ifndef CONFIG_USER_ONLY
|
|
static inline AddressSpaceDispatch *flatview_to_dispatch(FlatView *fv)
|
|
{
|
|
return fv->dispatch;
|
|
}
|
|
|
|
static inline AddressSpaceDispatch *address_space_to_dispatch(AddressSpace *as)
|
|
{
|
|
return flatview_to_dispatch(address_space_to_flatview(as));
|
|
}
|
|
|
|
FlatView *address_space_get_flatview(AddressSpace *as);
|
|
void flatview_unref(FlatView *view);
|
|
|
|
extern const MemoryRegionOps unassigned_mem_ops;
|
|
|
|
bool memory_region_access_valid(MemoryRegion *mr, hwaddr addr,
|
|
unsigned size, bool is_write,
|
|
MemTxAttrs attrs);
|
|
|
|
void flatview_add_to_dispatch(FlatView *fv, MemoryRegionSection *section);
|
|
AddressSpaceDispatch *address_space_dispatch_new(FlatView *fv);
|
|
void address_space_dispatch_compact(AddressSpaceDispatch *d);
|
|
void address_space_dispatch_free(AddressSpaceDispatch *d);
|
|
|
|
void mtree_print_dispatch(fprintf_function mon, void *f,
|
|
struct AddressSpaceDispatch *d,
|
|
MemoryRegion *root);
|
|
|
|
struct page_collection;
|
|
|
|
/* Opaque struct for passing info from memory_notdirty_write_prepare()
|
|
* to memory_notdirty_write_complete(). Callers should treat all fields
|
|
* as private, with the exception of @active.
|
|
*
|
|
* @active is a field which is not touched by either the prepare or
|
|
* complete functions, but which the caller can use if it wishes to
|
|
* track whether it has called prepare for this struct and so needs
|
|
* to later call the complete function.
|
|
*/
|
|
typedef struct {
|
|
CPUState *cpu;
|
|
struct page_collection *pages;
|
|
ram_addr_t ram_addr;
|
|
vaddr mem_vaddr;
|
|
unsigned size;
|
|
bool active;
|
|
} NotDirtyInfo;
|
|
|
|
/**
|
|
* memory_notdirty_write_prepare: call before writing to non-dirty memory
|
|
* @ndi: pointer to opaque NotDirtyInfo struct
|
|
* @cpu: CPU doing the write
|
|
* @mem_vaddr: virtual address of write
|
|
* @ram_addr: the ram address of the write
|
|
* @size: size of write in bytes
|
|
*
|
|
* Any code which writes to the host memory corresponding to
|
|
* guest RAM which has been marked as NOTDIRTY must wrap those
|
|
* writes in calls to memory_notdirty_write_prepare() and
|
|
* memory_notdirty_write_complete():
|
|
*
|
|
* NotDirtyInfo ndi;
|
|
* memory_notdirty_write_prepare(&ndi, ....);
|
|
* ... perform write here ...
|
|
* memory_notdirty_write_complete(&ndi);
|
|
*
|
|
* These calls will ensure that we flush any TCG translated code for
|
|
* the memory being written, update the dirty bits and (if possible)
|
|
* remove the slowpath callback for writing to the memory.
|
|
*
|
|
* This must only be called if we are using TCG; it will assert otherwise.
|
|
*
|
|
* We may take locks in the prepare call, so callers must ensure that
|
|
* they don't exit (via longjump or otherwise) without calling complete.
|
|
*
|
|
* This call must only be made inside an RCU critical section.
|
|
* (Note that while we're executing a TCG TB we're always in an
|
|
* RCU critical section, which is likely to be the case for callers
|
|
* of these functions.)
|
|
*/
|
|
void memory_notdirty_write_prepare(NotDirtyInfo *ndi,
|
|
CPUState *cpu,
|
|
vaddr mem_vaddr,
|
|
ram_addr_t ram_addr,
|
|
unsigned size);
|
|
/**
|
|
* memory_notdirty_write_complete: finish write to non-dirty memory
|
|
* @ndi: pointer to the opaque NotDirtyInfo struct which was initialized
|
|
* by memory_not_dirty_write_prepare().
|
|
*/
|
|
void memory_notdirty_write_complete(NotDirtyInfo *ndi);
|
|
|
|
#endif
|
|
#endif
|