aboutsummaryrefslogtreecommitdiff
path: root/clang
diff options
context:
space:
mode:
authorGheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com>2017-08-07 15:39:11 +0000
committerGheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com>2017-08-07 15:39:11 +0000
commit47e0cf378c793b00207998a0537c2fd75bd1ec74 (patch)
tree3f667d95b0f1bf3a9a5009c9c8a568374d48cfb2 /clang
parent02d9945e6f11959887e2eefbbb5a635ff5087dbe (diff)
downloadllvm-47e0cf378c793b00207998a0537c2fd75bd1ec74.zip
llvm-47e0cf378c793b00207998a0537c2fd75bd1ec74.tar.gz
llvm-47e0cf378c793b00207998a0537c2fd75bd1ec74.tar.bz2
[OpenMP] Add flag for specifying the target device architecture for OpenMP device offloading
Summary: OpenMP has the ability to offload target regions to devices which may have different architectures. A new -fopenmp-target-arch flag is introduced to specify the device architecture. In this patch I use the new flag to specify the compute capability of the underlying NVIDIA architecture for the OpenMP offloading CUDA tool chain. Only a host-offloading test is provided since full device offloading capability will only be available when [[ https://reviews.llvm.org/D29654 | D29654 ]] lands. Reviewers: hfinkel, Hahnfeld, carlo.bertolli, caomhin, ABataev Reviewed By: hfinkel Subscribers: guansong, cfe-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D34784 llvm-svn: 310263
Diffstat (limited to 'clang')
-rw-r--r--clang/include/clang/Basic/DiagnosticDriverKinds.td4
-rw-r--r--clang/include/clang/Driver/Options.td4
-rw-r--r--clang/include/clang/Driver/ToolChain.h11
-rw-r--r--clang/lib/Driver/Compilation.cpp7
-rw-r--r--clang/lib/Driver/ToolChain.cpp62
-rw-r--r--clang/lib/Driver/ToolChains/Cuda.cpp21
-rw-r--r--clang/test/Driver/openmp-offload.c32
7 files changed, 138 insertions, 3 deletions
diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 1ac5cda..e57cc1a 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -69,6 +69,10 @@ def err_drv_invalid_Xarch_argument_with_args : Error<
"invalid Xarch argument: '%0', options requiring arguments are unsupported">;
def err_drv_invalid_Xarch_argument_isdriver : Error<
"invalid Xarch argument: '%0', cannot change driver behavior inside Xarch argument">;
+def err_drv_Xopenmp_target_missing_triple : Error<
+ "cannot deduce implicit triple value for -Xopenmp-target, specify triple using -Xopenmp-target=<triple>">;
+def err_drv_invalid_Xopenmp_target_with_args : Error<
+ "invalid -Xopenmp-target argument: '%0', options requiring arguments are unsupported">;
def err_drv_argument_only_allowed_with : Error<
"invalid argument '%0' only allowed with '%1'">;
def err_drv_argument_not_allowed_with : Error<
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 4350f63..d866c16 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -459,6 +459,10 @@ def Xcuda_fatbinary : Separate<["-"], "Xcuda-fatbinary">,
HelpText<"Pass <arg> to fatbinary invocation">, MetaVarName<"<arg>">;
def Xcuda_ptxas : Separate<["-"], "Xcuda-ptxas">,
HelpText<"Pass <arg> to the ptxas assembler">, MetaVarName<"<arg>">;
+def Xopenmp_target : Separate<["-"], "Xopenmp-target">,
+ HelpText<"Pass <arg> to the target offloading toolchain.">, MetaVarName<"<arg>">;
+def Xopenmp_target_EQ : JoinedAndSeparate<["-"], "Xopenmp-target=">,
+ HelpText<"Pass <arg> to the specified target offloading toolchain. The triple that identifies the toolchain must be provided after the equals sign.">, MetaVarName<"<arg>">;
def z : Separate<["-"], "z">, Flags<[LinkerInput, RenderAsInput]>,
HelpText<"Pass -z <arg> to the linker">, MetaVarName<"<arg>">,
Group<Link_Group>;
diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h
index a62a759..d74d1e0 100644
--- a/clang/include/clang/Driver/ToolChain.h
+++ b/clang/include/clang/Driver/ToolChain.h
@@ -217,6 +217,17 @@ public:
return nullptr;
}
+ /// TranslateOpenMPTargetArgs - Create a new derived argument list for
+ /// that contains the OpenMP target specific flags passed via
+ /// -Xopenmp-target -opt=val OR -Xopenmp-target=<triple> -opt=val
+ /// Translation occurs only when the \p DeviceOffloadKind is specified.
+ ///
+ /// \param DeviceOffloadKind - The device offload kind used for the
+ /// translation.
+ virtual llvm::opt::DerivedArgList *
+ TranslateOpenMPTargetArgs(const llvm::opt::DerivedArgList &Args,
+ Action::OffloadKind DeviceOffloadKind) const;
+
/// Choose a tool to use to handle the action \p JA.
///
/// This can be overridden when a particular ToolChain needs to use
diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp
index cf86644..6626662 100644
--- a/clang/lib/Driver/Compilation.cpp
+++ b/clang/lib/Driver/Compilation.cpp
@@ -59,7 +59,12 @@ Compilation::getArgsForToolChain(const ToolChain *TC, StringRef BoundArch,
DerivedArgList *&Entry = TCArgs[{TC, BoundArch, DeviceOffloadKind}];
if (!Entry) {
- Entry = TC->TranslateArgs(*TranslatedArgs, BoundArch, DeviceOffloadKind);
+ // Translate OpenMP toolchain arguments provided via the -Xopenmp-target flags.
+ Entry = TC->TranslateOpenMPTargetArgs(*TranslatedArgs, DeviceOffloadKind);
+ if (!Entry)
+ Entry = TranslatedArgs;
+
+ Entry = TC->TranslateArgs(*Entry, BoundArch, DeviceOffloadKind);
if (!Entry)
Entry = TranslatedArgs;
}
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 1e446dc..94d5e4f 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -795,3 +795,65 @@ ToolChain::computeMSVCVersion(const Driver *D,
return VersionTuple();
}
+
+llvm::opt::DerivedArgList *
+ToolChain::TranslateOpenMPTargetArgs(const llvm::opt::DerivedArgList &Args,
+ Action::OffloadKind DeviceOffloadKind) const {
+ if (DeviceOffloadKind == Action::OFK_OpenMP) {
+ DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs());
+ const OptTable &Opts = getDriver().getOpts();
+
+ // Handle -Xopenmp-target flags
+ for (Arg *A : Args) {
+ // Exclude flags which may only apply to the host toolchain.
+ // Do not exclude flags when the host triple (AuxTriple),
+ // matches the current toolchain triple.
+ if (A->getOption().matches(options::OPT_m_Group)) {
+ if (getAuxTriple() && getAuxTriple()->str() == getTriple().str())
+ DAL->append(A);
+ continue;
+ }
+
+ unsigned Index;
+ unsigned Prev;
+ bool XOpenMPTargetNoTriple = A->getOption().matches(
+ options::OPT_Xopenmp_target);
+
+ if (A->getOption().matches(options::OPT_Xopenmp_target_EQ)) {
+ // Passing device args: -Xopenmp-target=<triple> -opt=val.
+ if (A->getValue(0) == getTripleString())
+ Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
+ else
+ continue;
+ } else if (XOpenMPTargetNoTriple) {
+ // Passing device args: -Xopenmp-target -opt=val.
+ Index = Args.getBaseArgs().MakeIndex(A->getValue(0));
+ } else {
+ DAL->append(A);
+ continue;
+ }
+
+ // Parse the argument to -Xopenmp-target.
+ Prev = Index;
+ std::unique_ptr<Arg> XOpenMPTargetArg(Opts.ParseOneArg(Args, Index));
+ if (!XOpenMPTargetArg || Index > Prev + 1) {
+ getDriver().Diag(diag::err_drv_invalid_Xopenmp_target_with_args)
+ << A->getAsString(Args);
+ continue;
+ }
+ if (XOpenMPTargetNoTriple && XOpenMPTargetArg &&
+ Args.getAllArgValues(
+ options::OPT_fopenmp_targets_EQ).size() != 1) {
+ getDriver().Diag(diag::err_drv_Xopenmp_target_missing_triple);
+ continue;
+ }
+ XOpenMPTargetArg->setBaseArg(A);
+ A = XOpenMPTargetArg.release();
+ DAL->append(A);
+ }
+
+ return DAL;
+ }
+
+ return nullptr;
+}
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 935a5a3..86be187 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -212,8 +212,18 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
static_cast<const toolchains::CudaToolChain &>(getToolChain());
assert(TC.getTriple().isNVPTX() && "Wrong platform");
+ StringRef GPUArchName;
+ // If this is an OpenMP action we need to extract the device architecture
+ // from the -march=arch option. This option may come from -Xopenmp-target
+ // flag or the default value.
+ if (JA.isDeviceOffloading(Action::OFK_OpenMP)) {
+ GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
+ assert(!GPUArchName.empty() && "Must have an architecture passed in.");
+ } else
+ GPUArchName = JA.getOffloadingArch();
+
// Obtain architecture from the action.
- CudaArch gpu_arch = StringToCudaArch(JA.getOffloadingArch());
+ CudaArch gpu_arch = StringToCudaArch(GPUArchName);
assert(gpu_arch != CudaArch::UNKNOWN &&
"Device action expected to have an architecture.");
@@ -405,7 +415,7 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
// For OpenMP device offloading, append derived arguments. Make sure
// flags are not duplicated.
- // TODO: Append the compute capability.
+ // Also append the compute capability.
if (DeviceOffloadKind == Action::OFK_OpenMP) {
for (Arg *A : Args){
bool IsDuplicate = false;
@@ -418,6 +428,13 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
if (!IsDuplicate)
DAL->append(A);
}
+
+ StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
+ if (Arch.empty())
+ // Default compute capability for CUDA toolchain is sm_20.
+ DAL->AddJoinedArg(nullptr,
+ Opts.getOption(options::OPT_march_EQ), "sm_20");
+
return DAL;
}
diff --git a/clang/test/Driver/openmp-offload.c b/clang/test/Driver/openmp-offload.c
index d43aa33..055bcdd 100644
--- a/clang/test/Driver/openmp-offload.c
+++ b/clang/test/Driver/openmp-offload.c
@@ -597,3 +597,35 @@
// RUN: | FileCheck -check-prefix=CHK-FOPENMP-IS-DEVICE %s
// CHK-FOPENMP-IS-DEVICE: clang{{.*}} "-aux-triple" "powerpc64le--linux" {{.*}}.c" "-fopenmp-is-device" "-fopenmp-host-ir-file-path"
+
+/// ###########################################################################
+
+/// Check -Xopenmp-target=powerpc64le-ibm-linux-gnu -march=pwr7 is passed when compiling for the device.
+// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu -Xopenmp-target=powerpc64le-ibm-linux-gnu -mcpu=pwr7 %s 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-FOPENMP-EQ-TARGET %s
+
+// CHK-FOPENMP-EQ-TARGET: clang{{.*}} "-target-cpu" "pwr7"
+
+/// ###########################################################################
+
+/// Check -Xopenmp-target -march=pwr7 is passed when compiling for the device.
+// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu -Xopenmp-target -mcpu=pwr7 %s 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET %s
+
+// CHK-FOPENMP-TARGET: clang{{.*}} "-target-cpu" "pwr7"
+
+/// ###########################################################################
+
+/// Check -Xopenmp-target triggers error when multiple triples are used.
+// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu,powerpc64le-unknown-linux-gnu -Xopenmp-target -mcpu=pwr8 %s 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-AMBIGUOUS-ERROR %s
+
+// CHK-FOPENMP-TARGET-AMBIGUOUS-ERROR: clang{{.*}} error: cannot deduce implicit triple value for -Xopenmp-target, specify triple using -Xopenmp-target=<triple>
+
+/// ###########################################################################
+
+/// Check -Xopenmp-target triggers error when an option requiring arguments is passed to it.
+// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu -Xopenmp-target -Xopenmp-target -mcpu=pwr8 %s 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-NESTED-ERROR %s
+
+// CHK-FOPENMP-TARGET-NESTED-ERROR: clang{{.*}} error: invalid -Xopenmp-target argument: '-Xopenmp-target -Xopenmp-target', options requiring arguments are unsupported