Fixes related to processing of qemu's -numa option

The -numa option to qemu is used to create [fake] numa nodes and expose them to the guest OS instance. There are a couple of issues with the -numa option: a) Max VCPU's that can be specified for a guest while using the qemu's -numa option is 64. Due to a typecasting issue when the number of VCPUs is > 32 the VCPUs don't show up under the specified [fake] numa nodes. b) KVM currently has support for 160VCPUs per guest. The qemu's -numa option has only support for upto 64VCPUs per guest. This patch addresses these two issues. Below are examples of (a) and (b) a) >32 VCPUs are specified with the -numa option: /usr/local/bin/qemu-system-x86_64 \ -enable-kvm \ 71:01:01 \ -net tap,ifname=tap0,script=no,downscript=no \ -vnc :4 ... Upstream qemu : -------------- QEMU 1.1.50 monitor - type 'help' for more information (qemu) info numa 6 nodes node 0 cpus: 0 1 2 3 4 5 6 7 8 9 32 33 34 35 36 37 38 39 40 41 node 0 size: 131072 MB node 1 cpus: 10 11 12 13 14 15 16 17 18 19 42 43 44 45 46 47 48 49 50 51 node 1 size: 131072 MB node 2 cpus: 20 21 22 23 24 25 26 27 28 29 52 53 54 55 56 57 58 59 node 2 size: 131072 MB node 3 cpus: 30 node 3 size: 131072 MB node 4 cpus: node 4 size: 131072 MB node 5 cpus: 31 node 5 size: 131072 MB With the patch applied : ----------------------- QEMU 1.1.50 monitor - type 'help' for more information (qemu) info numa 6 nodes node 0 cpus: 0 1 2 3 4 5 6 7 8 9 node 0 size: 131072 MB node 1 cpus: 10 11 12 13 14 15 16 17 18 19 node 1 size: 131072 MB node 2 cpus: 20 21 22 23 24 25 26 27 28 29 node 2 size: 131072 MB node 3 cpus: 30 31 32 33 34 35 36 37 38 39 node 3 size: 131072 MB node 4 cpus: 40 41 42 43 44 45 46 47 48 49 node 4 size: 131072 MB node 5 cpus: 50 51 52 53 54 55 56 57 58 59 node 5 size: 131072 MB b) >64 VCPUs specified with -numa option: /usr/local/bin/qemu-system-x86_64 \ -enable-kvm \ -cpu Westmere,+rdtscp,+pdpe1gb,+dca,+pdcm,+xtpr,+tm2,+est,+smx,+vmx,+ds_cpl,+monitor,+dtes64,+pclmuldq,+pbe,+tm,+ht,+ss,+acpi,+d-vnc :4 ... Upstream qemu : -------------- only 63 CPUs in NUMA mode supported. only 64 CPUs in NUMA mode supported. QEMU 1.1.50 monitor - type 'help' for more information (qemu) info numa 8 nodes node 0 cpus: 6 7 8 9 38 39 40 41 70 71 72 73 node 0 size: 65536 MB node 1 cpus: 10 11 12 13 14 15 16 17 18 19 42 43 44 45 46 47 48 49 50 51 74 75 76 77 78 79 node 1 size: 65536 MB node 2 cpus: 20 21 22 23 24 25 26 27 28 29 52 53 54 55 56 57 58 59 60 61 node 2 size: 65536 MB node 3 cpus: 30 62 node 3 size: 65536 MB node 4 cpus: node 4 size: 65536 MB node 5 cpus: node 5 size: 65536 MB node 6 cpus: 31 63 node 6 size: 65536 MB node 7 cpus: 0 1 2 3 4 5 32 33 34 35 36 37 64 65 66 67 68 69 node 7 size: 65536 MB With the patch applied : ----------------------- QEMU 1.1.50 monitor - type 'help' for more information (qemu) info numa 8 nodes node 0 cpus: 0 1 2 3 4 5 6 7 8 9 node 0 size: 65536 MB node 1 cpus: 10 11 12 13 14 15 16 17 18 19 node 1 size: 65536 MB node 2 cpus: 20 21 22 23 24 25 26 27 28 29 node 2 size: 65536 MB node 3 cpus: 30 31 32 33 34 35 36 37 38 39 node 3 size: 65536 MB node 4 cpus: 40 41 42 43 44 45 46 47 48 49 node 4 size: 65536 MB node 5 cpus: 50 51 52 53 54 55 56 57 58 59 node 5 size: 65536 MB node 6 cpus: 60 61 62 63 64 65 66 67 68 69 node 6 size: 65536 MB node 7 cpus: 70 71 72 73 74 75 76 77 78 79 Signed-off-by: Chegu Vinod <chegu_vinod@hp.com>, Jim Hull <jim.hull@hp.com>, Craig Hada <craig.hada@hp.com> Tested-by: Eduardo Habkost <ehabkost@redhat.com> Reviewed-by: Eduardo Habkost <ehabkost@redhat.com> Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
author: Chegu Vinod <chegu_vinod@hp.com> 2012-07-16 21:31:30 -0700
committer: Blue Swirl <blauwirbel@gmail.com> 2012-08-04 13:23:58 +0000
commit: ee785fed5dd035d4b12142cacec6d3c344426dec (patch)
tree: a155758ce77854dc932d5d0ad6616de1f35fa9e3
parent: 161abfb5f929184a68baa16701606015f5683722 (diff)
download: qemu-ee785fed5dd035d4b12142cacec6d3c344426dec.zip
qemu-ee785fed5dd035d4b12142cacec6d3c344426dec.tar.gz
qemu-ee785fed5dd035d4b12142cacec6d3c344426dec.tar.bz2
4 files changed, 27 insertions, 25 deletions
diff --git a/cpus.c b/cpus.c
index b61f60e..3de2e27 100644
--- a/cpus.c
+++ b/cpus.c
@@ -36,6 +36,7 @@
 #include "cpus.h"
 #include "qtest.h"
 #include "main-loop.h"
+#include "bitmap.h"
 
 #ifndef _WIN32
 #include "compatfd.h"
@@ -1159,7 +1160,7 @@ void set_numa_modes(void)
 
     for (env = first_cpu; env != NULL; env = env->next_cpu) {
         for (i = 0; i < nb_numa_nodes; i++) {
-            if (node_cpumask[i] & (1 << env->cpu_index)) {
+            if (test_bit(env->cpu_index, node_cpumask[i])) {
                 env->numa_node = i;
             }
         }
diff --git a/hw/pc.c b/hw/pc.c
index bd193f3..81c391c 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -49,6 +49,7 @@
 #include "memory.h"
 #include "exec-memory.h"
 #include "arch_init.h"
+#include "bitmap.h"
 
 /* output Bochs bios info messages */
 //#define DEBUG_BIOS
@@ -625,7 +626,7 @@ static void *bochs_bios_init(void)
     numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
     for (i = 0; i < max_cpus; i++) {
         for (j = 0; j < nb_numa_nodes; j++) {
-            if (node_cpumask[j] & (1 << i)) {
+            if (test_bit(i, node_cpumask[j])) {
                 numa_fw_cfg[i + 1] = cpu_to_le64(j);
                 break;
             }
diff --git a/sysemu.h b/sysemu.h
index 6540c79..4669348 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -134,9 +134,10 @@ extern uint8_t qemu_extra_params_fw[2];
 extern QEMUClock *rtc_clock;
 
 #define MAX_NODES 64
+#define MAX_CPUMASK_BITS 255
 extern int nb_numa_nodes;
 extern uint64_t node_mem[MAX_NODES];
-extern uint64_t node_cpumask[MAX_NODES];
+extern unsigned long *node_cpumask[MAX_NODES];
 
 #define MAX_OPTION_ROMS 16
 typedef struct QEMUOptionRom {
diff --git a/vl.c b/vl.c
index 6d2ce45..e71cb30 100644
--- a/vl.c
+++ b/vl.c
@@ -28,6 +28,7 @@
 #include <errno.h>
 #include <sys/time.h>
 #include <zlib.h>
+#include "bitmap.h"
 
 /* Needed early for CONFIG_BSD etc. */
 #include "config-host.h"
@@ -241,7 +242,7 @@ QTAILQ_HEAD(, FWBootEntry) fw_boot_order = QTAILQ_HEAD_INITIALIZER(fw_boot_order
 
 int nb_numa_nodes;
 uint64_t node_mem[MAX_NODES];
-uint64_t node_cpumask[MAX_NODES];
+unsigned long *node_cpumask[MAX_NODES];
 
 uint8_t qemu_uuid[16];
 
@@ -952,6 +953,8 @@ static void numa_add(const char *optarg)
     unsigned long long value, endvalue;
     int nodenr;
 
+    value = endvalue = 0ULL;
+
     optarg = get_opt_name(option, 128, optarg, ',') + 1;
     if (!strcmp(option, "node")) {
         if (get_param_value(option, 128, "nodeid", optarg) == 0) {
@@ -971,27 +974,22 @@ static void numa_add(const char *optarg)
             }
             node_mem[nodenr] = sval;
         }
-        if (get_param_value(option, 128, "cpus", optarg) == 0) {
-            node_cpumask[nodenr] = 0;
-        } else {
+        if (get_param_value(option, 128, "cpus", optarg) != 0) {
             value = strtoull(option, &endptr, 10);
-            if (value >= 64) {
-                value = 63;
-                fprintf(stderr, "only 64 CPUs in NUMA mode supported.\n");
+            if (*endptr == '-') {
+                endvalue = strtoull(endptr+1, &endptr, 10);
             } else {
-                if (*endptr == '-') {
-                    endvalue = strtoull(endptr+1, &endptr, 10);
-                    if (endvalue >= 63) {
-                        endvalue = 62;
-                        fprintf(stderr,
-                            "only 63 CPUs in NUMA mode supported.\n");
-                    }
-                    value = (2ULL << endvalue) - (1ULL << value);
-                } else {
-                    value = 1ULL << value;
-                }
+                endvalue = value;
+            }
+
+            if (!(endvalue < MAX_CPUMASK_BITS)) {
+                endvalue = MAX_CPUMASK_BITS - 1;
+                fprintf(stderr,
+                    "A max of %d CPUs are supported in a guest\n",
+                     MAX_CPUMASK_BITS);
             }
-            node_cpumask[nodenr] = value;
+
+            bitmap_set(node_cpumask[nodenr], value, endvalue-value+1);
         }
         nb_numa_nodes++;
     }
@@ -2331,7 +2329,7 @@ int main(int argc, char **argv, char **envp)
 
     for (i = 0; i < MAX_NODES; i++) {
         node_mem[i] = 0;
-        node_cpumask[i] = 0;
+        node_cpumask[i] = bitmap_new(MAX_CPUMASK_BITS);
     }
 
     nb_numa_nodes = 0;
@@ -3468,8 +3466,9 @@ int main(int argc, char **argv, char **envp)
         }
 
         for (i = 0; i < nb_numa_nodes; i++) {
-            if (node_cpumask[i] != 0)
+            if (!bitmap_empty(node_cpumask[i], MAX_CPUMASK_BITS)) {
                 break;
+            }
         }
         /* assigning the VCPUs round-robin is easier to implement, guest OSes
          * must cope with this anyway, because there are BIOSes out there in
@@ -3477,7 +3476,7 @@ int main(int argc, char **argv, char **envp)
          */
         if (i == nb_numa_nodes) {
             for (i = 0; i < max_cpus; i++) {
-                node_cpumask[i % nb_numa_nodes] |= 1 << i;
+                set_bit(i, node_cpumask[i % nb_numa_nodes]);
             }
         }
     }
author	Chegu Vinod <chegu_vinod@hp.com>	2012-07-16 21:31:30 -0700
committer	Blue Swirl <blauwirbel@gmail.com>	2012-08-04 13:23:58 +0000
commit	ee785fed5dd035d4b12142cacec6d3c344426dec (patch)
tree	a155758ce77854dc932d5d0ad6616de1f35fa9e3
parent	161abfb5f929184a68baa16701606015f5683722 (diff)
download	qemu-ee785fed5dd035d4b12142cacec6d3c344426dec.zip qemu-ee785fed5dd035d4b12142cacec6d3c344426dec.tar.gz qemu-ee785fed5dd035d4b12142cacec6d3c344426dec.tar.bz2