From e40901806fec4ea87e00cd408f8ca31c5868de0c Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 2 Nov 2018 14:54:07 +0000 Subject: [OPENMP][NVPTX]Improve emission of the globalized variables for target/teams/distribute regions. Target/teams/distribute regions exist for all the time the kernel is executed. Thus, if the variable is declared in their context and then escape it, we can allocate global memory statically instead of allocating it dynamically. Patch captures all the globalized variables in target/teams/distribute contexts, merges them into the records, one per each target region. Those records are then joined into the union, one per compilation unit (to save the global memory). Those units are organized into 2 x dimensional arrays, where the first dimension is the number of blocks per SM and the second one is the number of SMs. Runtime functions manage this global memory space between the executing teams. llvm-svn: 345978 --- clang/lib/Frontend/CompilerInvocation.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'clang/lib/Frontend/CompilerInvocation.cpp') diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 912afba..d78c133 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -2686,6 +2686,14 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, Opts.Exceptions = 0; Opts.CXXExceptions = 0; } + if (Opts.OpenMPIsDevice && T.isNVPTX()) { + Opts.OpenMPCUDANumSMs = + getLastArgIntValue(Args, options::OPT_fopenmp_cuda_number_of_sm_EQ, + Opts.OpenMPCUDANumSMs, Diags); + Opts.OpenMPCUDABlocksPerSM = + getLastArgIntValue(Args, options::OPT_fopenmp_cuda_blocks_per_sm_EQ, + Opts.OpenMPCUDABlocksPerSM, Diags); + } // Get the OpenMP target triples if any. if (Arg *A = Args.getLastArg(options::OPT_fopenmp_targets_EQ)) { -- cgit v1.1