numa: introduce machine callback for VCPU to node mapping
Current default round-robin way of distributing VCPUs among NUMA nodes might be wrong in case on multi-core/threads CPUs. Making guests confused wrt topology where cores from the same socket are on different nodes. Allow a machine to override default mapping by providing MachineClass::cpu_index_to_socket_id() callback which would allow it group VCPUs from a socket on the same NUMA node. Signed-off-by: Igor Mammedov <imammedo@redhat.com> Reviewed-by: Andreas Färber <afaerber@suse.de> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
This commit is contained in:
		
							parent
							
								
									3ef7197505
								
							
						
					
					
						commit
						57924bcd87
					
				@ -82,6 +82,10 @@ bool machine_mem_merge(MachineState *machine);
 | 
				
			|||||||
 *    of HotplugHandler object, which handles hotplug operation
 | 
					 *    of HotplugHandler object, which handles hotplug operation
 | 
				
			||||||
 *    for a given @dev. It may return NULL if @dev doesn't require
 | 
					 *    for a given @dev. It may return NULL if @dev doesn't require
 | 
				
			||||||
 *    any actions to be performed by hotplug handler.
 | 
					 *    any actions to be performed by hotplug handler.
 | 
				
			||||||
 | 
					 * @cpu_index_to_socket_id:
 | 
				
			||||||
 | 
					 *    used to provide @cpu_index to socket number mapping, allowing
 | 
				
			||||||
 | 
					 *    a machine to group CPU threads belonging to the same socket/package
 | 
				
			||||||
 | 
					 *    Returns: socket number given cpu_index belongs to.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
struct MachineClass {
 | 
					struct MachineClass {
 | 
				
			||||||
    /*< private >*/
 | 
					    /*< private >*/
 | 
				
			||||||
@ -118,6 +122,7 @@ struct MachineClass {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
 | 
					    HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
 | 
				
			||||||
                                           DeviceState *dev);
 | 
					                                           DeviceState *dev);
 | 
				
			||||||
 | 
					    unsigned (*cpu_index_to_socket_id)(unsigned cpu_index);
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 | 
				
			|||||||
@ -6,6 +6,7 @@
 | 
				
			|||||||
#include "qemu/option.h"
 | 
					#include "qemu/option.h"
 | 
				
			||||||
#include "sysemu/sysemu.h"
 | 
					#include "sysemu/sysemu.h"
 | 
				
			||||||
#include "sysemu/hostmem.h"
 | 
					#include "sysemu/hostmem.h"
 | 
				
			||||||
 | 
					#include "hw/boards.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern int nb_numa_nodes;   /* Number of NUMA nodes */
 | 
					extern int nb_numa_nodes;   /* Number of NUMA nodes */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -16,7 +17,7 @@ typedef struct node_info {
 | 
				
			|||||||
    bool present;
 | 
					    bool present;
 | 
				
			||||||
} NodeInfo;
 | 
					} NodeInfo;
 | 
				
			||||||
extern NodeInfo numa_info[MAX_NODES];
 | 
					extern NodeInfo numa_info[MAX_NODES];
 | 
				
			||||||
void parse_numa_opts(void);
 | 
					void parse_numa_opts(MachineClass *mc);
 | 
				
			||||||
void numa_post_machine_init(void);
 | 
					void numa_post_machine_init(void);
 | 
				
			||||||
void query_numa_node_mem(uint64_t node_mem[]);
 | 
					void query_numa_node_mem(uint64_t node_mem[]);
 | 
				
			||||||
extern QemuOptsList qemu_numa_opts;
 | 
					extern QemuOptsList qemu_numa_opts;
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										18
									
								
								numa.c
									
									
									
									
									
								
							
							
						
						
									
										18
									
								
								numa.c
									
									
									
									
									
								
							@ -202,7 +202,7 @@ static void validate_numa_cpus(void)
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void parse_numa_opts(void)
 | 
					void parse_numa_opts(MachineClass *mc)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    int i;
 | 
					    int i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -270,13 +270,21 @@ void parse_numa_opts(void)
 | 
				
			|||||||
                break;
 | 
					                break;
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        /* assigning the VCPUs round-robin is easier to implement, guest OSes
 | 
					        /* Historically VCPUs were assigned in round-robin order to NUMA
 | 
				
			||||||
         * must cope with this anyway, because there are BIOSes out there in
 | 
					         * nodes. However it causes issues with guest not handling it nice
 | 
				
			||||||
         * real machines which also use this scheme.
 | 
					         * in case where cores/threads from a multicore CPU appear on
 | 
				
			||||||
 | 
					         * different nodes. So allow boards to override default distribution
 | 
				
			||||||
 | 
					         * rule grouping VCPUs by socket so that VCPUs from the same socket
 | 
				
			||||||
 | 
					         * would be on the same node.
 | 
				
			||||||
         */
 | 
					         */
 | 
				
			||||||
        if (i == nb_numa_nodes) {
 | 
					        if (i == nb_numa_nodes) {
 | 
				
			||||||
            for (i = 0; i < max_cpus; i++) {
 | 
					            for (i = 0; i < max_cpus; i++) {
 | 
				
			||||||
                set_bit(i, numa_info[i % nb_numa_nodes].node_cpu);
 | 
					                unsigned node_id = i % nb_numa_nodes;
 | 
				
			||||||
 | 
					                if (mc->cpu_index_to_socket_id) {
 | 
				
			||||||
 | 
					                    node_id = mc->cpu_index_to_socket_id(i) % nb_numa_nodes;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                set_bit(i, numa_info[node_id].node_cpu);
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										2
									
								
								vl.c
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								vl.c
									
									
									
									
									
								
							@ -4170,7 +4170,7 @@ int main(int argc, char **argv, char **envp)
 | 
				
			|||||||
    default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS);
 | 
					    default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS);
 | 
				
			||||||
    default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS);
 | 
					    default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    parse_numa_opts();
 | 
					    parse_numa_opts(machine_class);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (qemu_opts_foreach(qemu_find_opts("mon"), mon_init_func, NULL, 1) != 0) {
 | 
					    if (qemu_opts_foreach(qemu_find_opts("mon"), mon_init_func, NULL, 1) != 0) {
 | 
				
			||||||
        exit(1);
 | 
					        exit(1);
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user