diff options
author | Stewart Smith <stewart@linux.ibm.com> | 2018-07-15 22:20:11 -0500 |
---|---|---|
committer | Stewart Smith <stewart@linux.ibm.com> | 2018-07-16 23:48:30 -0500 |
commit | 06808a037d44231ba36e814ff1dbf66bc8b707da (patch) | |
tree | 60ebb92c635887ff587144b442a3c3f3f714b299 /core | |
parent | 5bf03755a972f2a120731051a6fe52a597672e39 (diff) | |
download | skiboot-06808a037d44231ba36e814ff1dbf66bc8b707da.zip skiboot-06808a037d44231ba36e814ff1dbf66bc8b707da.tar.gz skiboot-06808a037d44231ba36e814ff1dbf66bc8b707da.tar.bz2 |
fast-reboot: parallel memory clearing
Arbitrarily pick 16GB as the unit of parallelism, and
split up clearing memory into jobs and schedule them
node-local to the memory (or on node 0 if we can't
work that out because it's the memory up to SKIBOOT_BASE)
This seems to cut at least ~40% time from memory zeroing on
fast-reboot on a 256GB Boston system.
Signed-off-by: Stewart Smith <stewart@linux.ibm.com>
Diffstat (limited to 'core')
-rw-r--r-- | core/device.c | 2 | ||||
-rw-r--r-- | core/mem_region.c | 100 |
2 files changed, 99 insertions, 3 deletions
diff --git a/core/device.c b/core/device.c index 38dbdfc..19a8d8d 100644 --- a/core/device.c +++ b/core/device.c @@ -955,7 +955,7 @@ u64 dt_get_address(const struct dt_node *node, unsigned int index, return dt_get_number(p->prop + pos, na); } -static u32 __dt_get_chip_id(const struct dt_node *node) +u32 __dt_get_chip_id(const struct dt_node *node) { const struct dt_property *prop; diff --git a/core/mem_region.c b/core/mem_region.c index 8ae49bb..728c421 100644 --- a/core/mem_region.c +++ b/core/mem_region.c @@ -1206,19 +1206,52 @@ static void mem_clear_range(uint64_t s, uint64_t e) return; } - prlog(PR_NOTICE, "Clearing region %llx-%llx\n", + prlog(PR_DEBUG, "Clearing region %llx-%llx\n", (long long)s, (long long)e); memset((void *)s, 0, e - s); } +struct mem_region_clear_job_args { + char *job_name; + uint64_t s,e; +}; + +static void mem_region_clear_job(void *data) +{ + struct mem_region_clear_job_args *arg = (struct mem_region_clear_job_args*)data; + mem_clear_range(arg->s, arg->e); +} + +#define MEM_REGION_CLEAR_JOB_SIZE (16ULL*(1<<30)) + void mem_region_clear_unused(void) { + int njobs = 0; + struct cpu_job **jobs; struct mem_region *r; + struct mem_region_clear_job_args *job_args; + uint64_t s,l; + uint64_t total = 0; + uint32_t chip_id; + char *path; + int i; lock(&mem_region_lock); assert(mem_regions_finalised); + list_for_each(®ions, r, list) { + if (!(r->type == REGION_OS)) + continue; + njobs++; + /* One job per 16GB */ + njobs += r->len / MEM_REGION_CLEAR_JOB_SIZE; + } + + jobs = malloc(njobs * sizeof(struct cpu_job*)); + job_args = malloc(njobs * sizeof(struct mem_region_clear_job_args)); + prlog(PR_NOTICE, "Clearing unused memory:\n"); + i = 0; list_for_each(®ions, r, list) { /* If it's not unused, ignore it. */ if (!(r->type == REGION_OS)) @@ -1226,9 +1259,72 @@ void mem_region_clear_unused(void) assert(r != &skiboot_heap); - mem_clear_range(r->start, r->start + r->len); + s = r->start; + l = r->len; + while(l > MEM_REGION_CLEAR_JOB_SIZE) { + job_args[i].s = s+l - MEM_REGION_CLEAR_JOB_SIZE; + job_args[i].e = s+l; + l-=MEM_REGION_CLEAR_JOB_SIZE; + job_args[i].job_name = malloc(sizeof(char)*100); + total+=MEM_REGION_CLEAR_JOB_SIZE; + chip_id = __dt_get_chip_id(r->node); + if (chip_id == -1) + chip_id = 0; + path = dt_get_path(r->node); + snprintf(job_args[i].job_name, 100, + "clear %s, %s 0x%"PRIx64" len: %"PRIx64" on %d", + r->name, path, + job_args[i].s, + (job_args[i].e - job_args[i].s), + chip_id); + free(path); + printf("job: %s\n", job_args[i].job_name); + jobs[i] = cpu_queue_job_on_node(chip_id, + job_args[i].job_name, + mem_region_clear_job, + &job_args[i]); + if (!jobs[i]) + jobs[i] = cpu_queue_job(NULL, + job_args[i].job_name, + mem_region_clear_job, + &job_args[i]); + assert(jobs[i]); + i++; + } + job_args[i].s = s; + job_args[i].e = s+l; + job_args[i].job_name = malloc(sizeof(char)*100); + total+=l; + chip_id = __dt_get_chip_id(r->node); + if (chip_id == -1) + chip_id = 0; + path = dt_get_path(r->node); + snprintf(job_args[i].job_name,100, + "clear %s, %s 0x%"PRIx64" len: 0x%"PRIx64" on %d", + r->name, path, + job_args[i].s, + (job_args[i].e - job_args[i].s), + chip_id); + free(path); + printf("job: %s\n", job_args[i].job_name); + jobs[i] = cpu_queue_job_on_node(chip_id, + job_args[i].job_name, + mem_region_clear_job, + &job_args[i]); + i++; + } + cpu_process_local_jobs(); + l = 0; + for(i=0; i < njobs; i++) { + cpu_wait_job(jobs[i], true); + l += (job_args[i].e - job_args[i].s); + printf("Clearing memory... %"PRIu64"/%"PRIu64"GB done\n", + l>>30, total>>30); + free(job_args[i].job_name); } unlock(&mem_region_lock); + free(jobs); + free(job_args); } static void mem_region_add_dt_reserved_node(struct dt_node *parent, |