diff options
author | Samuel Antao <sfantao@us.ibm.com> | 2016-06-13 18:10:57 +0000 |
---|---|---|
committer | Samuel Antao <sfantao@us.ibm.com> | 2016-06-13 18:10:57 +0000 |
commit | c1ffba5062c7fc8997a49f5656ff6ebc1d9de633 (patch) | |
tree | 569d8c1410ded757a651b83e5d27635b1f5c6a21 /clang/lib/Driver | |
parent | 8cb45c838f99fd1cbc38bdc5bd272f6d090ce147 (diff) | |
download | llvm-c1ffba5062c7fc8997a49f5656ff6ebc1d9de633.zip llvm-c1ffba5062c7fc8997a49f5656ff6ebc1d9de633.tar.gz llvm-c1ffba5062c7fc8997a49f5656ff6ebc1d9de633.tar.bz2 |
[CUDA][OpenMP] Create generic offload toolchains
Summary:
This patch introduces the concept of offloading tool chain and offloading kind. Each tool chain may have associated an offloading kind that marks it as used in a given programming model that requires offloading.
It also adds the logic to iterate on the tool chains based on the kind. Currently, only CUDA is supported, but in general a programming model (an offloading kind) may have associated multiple tool chains that require supporting offloading.
This patch does not add tests - its goal is to keep the existing functionality.
This patch is the first of a series of three that attempts to make the current support of CUDA more generic and easier to extend to other programming models, namely OpenMP. It tries to capture the suggestions/improvements/concerns on the initial proposal in http://lists.llvm.org/pipermail/cfe-dev/2016-February/047547.html. It only tackles the more consensual part of the proposal, i.e.does not address the problem of intermediate files bundling yet.
Reviewers: ABataev, jlebar, echristo, hfinkel, tra
Subscribers: guansong, Hahnfeld, andreybokhanko, tcramer, mkuron, cfe-commits, arpith-jacob, carlo.bertolli, caomhin
Differential Revision: http://reviews.llvm.org/D18170
llvm-svn: 272571
Diffstat (limited to 'clang/lib/Driver')
-rw-r--r-- | clang/lib/Driver/Compilation.cpp | 9 | ||||
-rw-r--r-- | clang/lib/Driver/Driver.cpp | 43 | ||||
-rw-r--r-- | clang/lib/Driver/Tools.cpp | 8 |
3 files changed, 39 insertions, 21 deletions
diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp index b24d381..46548e6 100644 --- a/clang/lib/Driver/Compilation.cpp +++ b/clang/lib/Driver/Compilation.cpp @@ -24,10 +24,13 @@ using namespace llvm::opt; Compilation::Compilation(const Driver &D, const ToolChain &_DefaultToolChain, InputArgList *_Args, DerivedArgList *_TranslatedArgs) - : TheDriver(D), DefaultToolChain(_DefaultToolChain), - CudaHostToolChain(&DefaultToolChain), CudaDeviceToolChain(nullptr), + : TheDriver(D), DefaultToolChain(_DefaultToolChain), ActiveOffloadMask(0u), Args(_Args), TranslatedArgs(_TranslatedArgs), Redirects(nullptr), - ForDiagnostics(false) {} + ForDiagnostics(false) { + // The offloading host toolchain is the default tool chain. + OrderedOffloadingToolchains.insert( + std::make_pair(Action::OFK_Host, &DefaultToolChain)); +} Compilation::~Compilation() { delete TranslatedArgs; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index f7e1530..796fd64 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -422,6 +422,31 @@ void Driver::setLTOMode(const llvm::opt::ArgList &Args) { } } +void Driver::CreateOffloadingDeviceToolChains(Compilation &C, + InputList &Inputs) { + + // + // CUDA + // + // We need to generate a CUDA toolchain if any of the inputs has a CUDA type. + if (llvm::any_of(Inputs, [](std::pair<types::ID, const llvm::opt::Arg *> &I) { + return types::isCuda(I.first); + })) { + const ToolChain &TC = getToolChain( + C.getInputArgs(), + llvm::Triple(C.getOffloadingHostToolChain()->getTriple().isArch64Bit() + ? "nvptx64-nvidia-cuda" + : "nvptx-nvidia-cuda")); + C.addOffloadDeviceToolChain(&TC, Action::OFK_Cuda); + } + + // + // TODO: Add support for other offloading programming models here. + // + + return; +} + Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) { llvm::PrettyStackTraceString CrashInfo("Compilation construction"); @@ -549,18 +574,8 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) { InputList Inputs; BuildInputs(C->getDefaultToolChain(), *TranslatedArgs, Inputs); - // Initialize the CUDA device TC only if we have any CUDA Inputs. This is - // necessary so that we don't break compilations that pass flags that are - // incompatible with the NVPTX TC (e.g. -mthread-model single). - if (llvm::any_of(Inputs, [](const std::pair<types::ID, const Arg *> &I) { - return I.first == types::TY_CUDA || I.first == types::TY_PP_CUDA || - I.first == types::TY_CUDA_DEVICE; - })) { - C->setCudaDeviceToolChain( - &getToolChain(C->getArgs(), llvm::Triple(TC.getTriple().isArch64Bit() - ? "nvptx64-nvidia-cuda" - : "nvptx-nvidia-cuda"))); - } + // Populate the tool chains for the offloading devices, if any. + CreateOffloadingDeviceToolChains(*C, Inputs); // Construct the list of abstract actions to perform for this compilation. On // MachO targets this uses the driver-driver and universal actions. @@ -1390,7 +1405,7 @@ static Action *buildCudaActions(Compilation &C, DerivedArgList &Args, CudaDeviceInputs.push_back(std::make_pair(types::TY_CUDA_DEVICE, InputArg)); // Build actions for all device inputs. - assert(C.getCudaDeviceToolChain() && + assert(C.getSingleOffloadToolChain<Action::OFK_Cuda>() && "Missing toolchain for device-side compilation."); ActionList CudaDeviceActions; C.getDriver().BuildActions(C, Args, CudaDeviceInputs, CudaDeviceActions); @@ -2031,7 +2046,7 @@ InputInfo Driver::BuildJobsForActionNoCache( // Initial processing of CudaDeviceAction carries host params. // Call BuildJobsForAction() again, now with correct device parameters. InputInfo II = BuildJobsForAction( - C, *CDA->input_begin(), C.getCudaDeviceToolChain(), + C, *CDA->input_begin(), C.getSingleOffloadToolChain<Action::OFK_Cuda>(), CDA->getGpuArchName(), CDA->isAtTopLevel(), /*MultipleArchs=*/true, LinkingOutput, CachedResults); // Currently II's Action is *CDA->input_begin(). Set it to CDA instead, so diff --git a/clang/lib/Driver/Tools.cpp b/clang/lib/Driver/Tools.cpp index 9b715e3..e73c22d 100644 --- a/clang/lib/Driver/Tools.cpp +++ b/clang/lib/Driver/Tools.cpp @@ -3767,10 +3767,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // particular compilation pass we're constructing here. For now we // can check which toolchain we're using and pick the other one to // extract the triple. - if (&getToolChain() == C.getCudaDeviceToolChain()) - AuxToolChain = C.getCudaHostToolChain(); - else if (&getToolChain() == C.getCudaHostToolChain()) - AuxToolChain = C.getCudaDeviceToolChain(); + if (&getToolChain() == C.getSingleOffloadToolChain<Action::OFK_Cuda>()) + AuxToolChain = C.getOffloadingHostToolChain(); + else if (&getToolChain() == C.getOffloadingHostToolChain()) + AuxToolChain = C.getSingleOffloadToolChain<Action::OFK_Cuda>(); else llvm_unreachable("Can't figure out CUDA compilation mode."); assert(AuxToolChain != nullptr && "No aux toolchain."); |