diff options
author | Chegu Vinod <chegu_vinod@hp.com> | 2012-07-16 21:31:30 -0700 |
---|---|---|
committer | Blue Swirl <blauwirbel@gmail.com> | 2012-08-04 13:23:58 +0000 |
commit | ee785fed5dd035d4b12142cacec6d3c344426dec (patch) | |
tree | a155758ce77854dc932d5d0ad6616de1f35fa9e3 | |
parent | 161abfb5f929184a68baa16701606015f5683722 (diff) | |
download | qemu-ee785fed5dd035d4b12142cacec6d3c344426dec.zip qemu-ee785fed5dd035d4b12142cacec6d3c344426dec.tar.gz qemu-ee785fed5dd035d4b12142cacec6d3c344426dec.tar.bz2 |
Fixes related to processing of qemu's -numa option
The -numa option to qemu is used to create [fake] numa nodes
and expose them to the guest OS instance.
There are a couple of issues with the -numa option:
a) Max VCPU's that can be specified for a guest while using
the qemu's -numa option is 64. Due to a typecasting issue
when the number of VCPUs is > 32 the VCPUs don't show up
under the specified [fake] numa nodes.
b) KVM currently has support for 160VCPUs per guest. The
qemu's -numa option has only support for upto 64VCPUs
per guest.
This patch addresses these two issues.
Below are examples of (a) and (b)
a) >32 VCPUs are specified with the -numa option:
/usr/local/bin/qemu-system-x86_64 \
-enable-kvm \
71:01:01 \
-net tap,ifname=tap0,script=no,downscript=no \
-vnc :4
...
Upstream qemu :
--------------
QEMU 1.1.50 monitor - type 'help' for more information
(qemu) info numa
6 nodes
node 0 cpus: 0 1 2 3 4 5 6 7 8 9 32 33 34 35 36 37 38 39 40 41
node 0 size: 131072 MB
node 1 cpus: 10 11 12 13 14 15 16 17 18 19 42 43 44 45 46 47 48 49 50 51
node 1 size: 131072 MB
node 2 cpus: 20 21 22 23 24 25 26 27 28 29 52 53 54 55 56 57 58 59
node 2 size: 131072 MB
node 3 cpus: 30
node 3 size: 131072 MB
node 4 cpus:
node 4 size: 131072 MB
node 5 cpus: 31
node 5 size: 131072 MB
With the patch applied :
-----------------------
QEMU 1.1.50 monitor - type 'help' for more information
(qemu) info numa
6 nodes
node 0 cpus: 0 1 2 3 4 5 6 7 8 9
node 0 size: 131072 MB
node 1 cpus: 10 11 12 13 14 15 16 17 18 19
node 1 size: 131072 MB
node 2 cpus: 20 21 22 23 24 25 26 27 28 29
node 2 size: 131072 MB
node 3 cpus: 30 31 32 33 34 35 36 37 38 39
node 3 size: 131072 MB
node 4 cpus: 40 41 42 43 44 45 46 47 48 49
node 4 size: 131072 MB
node 5 cpus: 50 51 52 53 54 55 56 57 58 59
node 5 size: 131072 MB
b) >64 VCPUs specified with -numa option:
/usr/local/bin/qemu-system-x86_64 \
-enable-kvm \
-cpu Westmere,+rdtscp,+pdpe1gb,+dca,+pdcm,+xtpr,+tm2,+est,+smx,+vmx,+ds_cpl,+monitor,+dtes64,+pclmuldq,+pbe,+tm,+ht,+ss,+acpi,+d-vnc :4
...
Upstream qemu :
--------------
only 63 CPUs in NUMA mode supported.
only 64 CPUs in NUMA mode supported.
QEMU 1.1.50 monitor - type 'help' for more information
(qemu) info numa
8 nodes
node 0 cpus: 6 7 8 9 38 39 40 41 70 71 72 73
node 0 size: 65536 MB
node 1 cpus: 10 11 12 13 14 15 16 17 18 19 42 43 44 45 46 47 48 49 50 51 74 75 76 77 78 79
node 1 size: 65536 MB
node 2 cpus: 20 21 22 23 24 25 26 27 28 29 52 53 54 55 56 57 58 59 60 61
node 2 size: 65536 MB
node 3 cpus: 30 62
node 3 size: 65536 MB
node 4 cpus:
node 4 size: 65536 MB
node 5 cpus:
node 5 size: 65536 MB
node 6 cpus: 31 63
node 6 size: 65536 MB
node 7 cpus: 0 1 2 3 4 5 32 33 34 35 36 37 64 65 66 67 68 69
node 7 size: 65536 MB
With the patch applied :
-----------------------
QEMU 1.1.50 monitor - type 'help' for more information
(qemu) info numa
8 nodes
node 0 cpus: 0 1 2 3 4 5 6 7 8 9
node 0 size: 65536 MB
node 1 cpus: 10 11 12 13 14 15 16 17 18 19
node 1 size: 65536 MB
node 2 cpus: 20 21 22 23 24 25 26 27 28 29
node 2 size: 65536 MB
node 3 cpus: 30 31 32 33 34 35 36 37 38 39
node 3 size: 65536 MB
node 4 cpus: 40 41 42 43 44 45 46 47 48 49
node 4 size: 65536 MB
node 5 cpus: 50 51 52 53 54 55 56 57 58 59
node 5 size: 65536 MB
node 6 cpus: 60 61 62 63 64 65 66 67 68 69
node 6 size: 65536 MB
node 7 cpus: 70 71 72 73 74 75 76 77 78 79
Signed-off-by: Chegu Vinod <chegu_vinod@hp.com>, Jim Hull <jim.hull@hp.com>, Craig Hada <craig.hada@hp.com>
Tested-by: Eduardo Habkost <ehabkost@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
-rw-r--r-- | cpus.c | 3 | ||||
-rw-r--r-- | hw/pc.c | 3 | ||||
-rw-r--r-- | sysemu.h | 3 | ||||
-rw-r--r-- | vl.c | 43 |
4 files changed, 27 insertions, 25 deletions
@@ -36,6 +36,7 @@ #include "cpus.h" #include "qtest.h" #include "main-loop.h" +#include "bitmap.h" #ifndef _WIN32 #include "compatfd.h" @@ -1159,7 +1160,7 @@ void set_numa_modes(void) for (env = first_cpu; env != NULL; env = env->next_cpu) { for (i = 0; i < nb_numa_nodes; i++) { - if (node_cpumask[i] & (1 << env->cpu_index)) { + if (test_bit(env->cpu_index, node_cpumask[i])) { env->numa_node = i; } } @@ -49,6 +49,7 @@ #include "memory.h" #include "exec-memory.h" #include "arch_init.h" +#include "bitmap.h" /* output Bochs bios info messages */ //#define DEBUG_BIOS @@ -625,7 +626,7 @@ static void *bochs_bios_init(void) numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes); for (i = 0; i < max_cpus; i++) { for (j = 0; j < nb_numa_nodes; j++) { - if (node_cpumask[j] & (1 << i)) { + if (test_bit(i, node_cpumask[j])) { numa_fw_cfg[i + 1] = cpu_to_le64(j); break; } @@ -134,9 +134,10 @@ extern uint8_t qemu_extra_params_fw[2]; extern QEMUClock *rtc_clock; #define MAX_NODES 64 +#define MAX_CPUMASK_BITS 255 extern int nb_numa_nodes; extern uint64_t node_mem[MAX_NODES]; -extern uint64_t node_cpumask[MAX_NODES]; +extern unsigned long *node_cpumask[MAX_NODES]; #define MAX_OPTION_ROMS 16 typedef struct QEMUOptionRom { @@ -28,6 +28,7 @@ #include <errno.h> #include <sys/time.h> #include <zlib.h> +#include "bitmap.h" /* Needed early for CONFIG_BSD etc. */ #include "config-host.h" @@ -241,7 +242,7 @@ QTAILQ_HEAD(, FWBootEntry) fw_boot_order = QTAILQ_HEAD_INITIALIZER(fw_boot_order int nb_numa_nodes; uint64_t node_mem[MAX_NODES]; -uint64_t node_cpumask[MAX_NODES]; +unsigned long *node_cpumask[MAX_NODES]; uint8_t qemu_uuid[16]; @@ -952,6 +953,8 @@ static void numa_add(const char *optarg) unsigned long long value, endvalue; int nodenr; + value = endvalue = 0ULL; + optarg = get_opt_name(option, 128, optarg, ',') + 1; if (!strcmp(option, "node")) { if (get_param_value(option, 128, "nodeid", optarg) == 0) { @@ -971,27 +974,22 @@ static void numa_add(const char *optarg) } node_mem[nodenr] = sval; } - if (get_param_value(option, 128, "cpus", optarg) == 0) { - node_cpumask[nodenr] = 0; - } else { + if (get_param_value(option, 128, "cpus", optarg) != 0) { value = strtoull(option, &endptr, 10); - if (value >= 64) { - value = 63; - fprintf(stderr, "only 64 CPUs in NUMA mode supported.\n"); + if (*endptr == '-') { + endvalue = strtoull(endptr+1, &endptr, 10); } else { - if (*endptr == '-') { - endvalue = strtoull(endptr+1, &endptr, 10); - if (endvalue >= 63) { - endvalue = 62; - fprintf(stderr, - "only 63 CPUs in NUMA mode supported.\n"); - } - value = (2ULL << endvalue) - (1ULL << value); - } else { - value = 1ULL << value; - } + endvalue = value; + } + + if (!(endvalue < MAX_CPUMASK_BITS)) { + endvalue = MAX_CPUMASK_BITS - 1; + fprintf(stderr, + "A max of %d CPUs are supported in a guest\n", + MAX_CPUMASK_BITS); } - node_cpumask[nodenr] = value; + + bitmap_set(node_cpumask[nodenr], value, endvalue-value+1); } nb_numa_nodes++; } @@ -2331,7 +2329,7 @@ int main(int argc, char **argv, char **envp) for (i = 0; i < MAX_NODES; i++) { node_mem[i] = 0; - node_cpumask[i] = 0; + node_cpumask[i] = bitmap_new(MAX_CPUMASK_BITS); } nb_numa_nodes = 0; @@ -3468,8 +3466,9 @@ int main(int argc, char **argv, char **envp) } for (i = 0; i < nb_numa_nodes; i++) { - if (node_cpumask[i] != 0) + if (!bitmap_empty(node_cpumask[i], MAX_CPUMASK_BITS)) { break; + } } /* assigning the VCPUs round-robin is easier to implement, guest OSes * must cope with this anyway, because there are BIOSes out there in @@ -3477,7 +3476,7 @@ int main(int argc, char **argv, char **envp) */ if (i == nb_numa_nodes) { for (i = 0; i < max_cpus; i++) { - node_cpumask[i % nb_numa_nodes] |= 1 << i; + set_bit(i, node_cpumask[i % nb_numa_nodes]); } } } |