diff options
Diffstat (limited to 'clang/lib/Driver')
-rw-r--r-- | clang/lib/Driver/Driver.cpp | 768 | ||||
-rw-r--r-- | clang/lib/Driver/ToolChain.cpp | 66 | ||||
-rw-r--r-- | clang/lib/Driver/ToolChains/AMDGPU.cpp | 149 | ||||
-rw-r--r-- | clang/lib/Driver/ToolChains/AMDGPU.h | 2 | ||||
-rw-r--r-- | clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp | 10 | ||||
-rw-r--r-- | clang/lib/Driver/ToolChains/AMDGPUOpenMP.h | 3 | ||||
-rw-r--r-- | clang/lib/Driver/ToolChains/Arch/Sparc.cpp | 31 | ||||
-rw-r--r-- | clang/lib/Driver/ToolChains/Arch/Sparc.h | 3 | ||||
-rw-r--r-- | clang/lib/Driver/ToolChains/BareMetal.cpp | 3 | ||||
-rw-r--r-- | clang/lib/Driver/ToolChains/Clang.cpp | 82 | ||||
-rw-r--r-- | clang/lib/Driver/ToolChains/CommonArgs.cpp | 2 | ||||
-rw-r--r-- | clang/lib/Driver/ToolChains/Cuda.cpp | 2 | ||||
-rw-r--r-- | clang/lib/Driver/ToolChains/Flang.cpp | 1 | ||||
-rw-r--r-- | clang/lib/Driver/ToolChains/HIPAMD.cpp | 8 | ||||
-rw-r--r-- | clang/lib/Driver/ToolChains/HIPAMD.h | 3 | ||||
-rw-r--r-- | clang/lib/Driver/ToolChains/HIPSPV.cpp | 7 | ||||
-rw-r--r-- | clang/lib/Driver/ToolChains/HIPSPV.h | 3 | ||||
-rw-r--r-- | clang/lib/Driver/ToolChains/OpenBSD.cpp | 2 | ||||
-rw-r--r-- | clang/lib/Driver/ToolChains/ROCm.h | 30 |
19 files changed, 557 insertions, 618 deletions
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index ec1135e..892049e 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -68,6 +68,7 @@ #include "clang/Driver/Types.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" @@ -83,6 +84,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ExitCodes.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/FileUtilities.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MD5.h" #include "llvm/Support/Path.h" @@ -109,65 +111,6 @@ using namespace clang::driver; using namespace clang; using namespace llvm::opt; -static std::optional<llvm::Triple> getOffloadTargetTriple(const Driver &D, - const ArgList &Args) { - auto OffloadTargets = Args.getAllArgValues(options::OPT_offload_EQ); - // Offload compilation flow does not support multiple targets for now. We - // need the HIPActionBuilder (and possibly the CudaActionBuilder{,Base}too) - // to support multiple tool chains first. - switch (OffloadTargets.size()) { - default: - D.Diag(diag::err_drv_only_one_offload_target_supported); - return std::nullopt; - case 0: - D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << ""; - return std::nullopt; - case 1: - break; - } - return llvm::Triple(OffloadTargets[0]); -} - -static std::optional<llvm::Triple> -getNVIDIAOffloadTargetTriple(const Driver &D, const ArgList &Args, - const llvm::Triple &HostTriple) { - if (!Args.hasArg(options::OPT_offload_EQ)) { - return llvm::Triple(HostTriple.isArch64Bit() ? "nvptx64-nvidia-cuda" - : "nvptx-nvidia-cuda"); - } - auto TT = getOffloadTargetTriple(D, Args); - if (TT && (TT->getArch() == llvm::Triple::spirv32 || - TT->getArch() == llvm::Triple::spirv64)) { - if (Args.hasArg(options::OPT_emit_llvm)) - return TT; - D.Diag(diag::err_drv_cuda_offload_only_emit_bc); - return std::nullopt; - } - D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << TT->str(); - return std::nullopt; -} - -static std::optional<llvm::Triple> -getHIPOffloadTargetTriple(const Driver &D, const ArgList &Args) { - if (!Args.hasArg(options::OPT_offload_EQ)) { - auto OffloadArchs = Args.getAllArgValues(options::OPT_offload_arch_EQ); - if (llvm::is_contained(OffloadArchs, "amdgcnspirv") && - OffloadArchs.size() == 1) - return llvm::Triple("spirv64-amd-amdhsa"); - return llvm::Triple("amdgcn-amd-amdhsa"); // Default HIP triple. - } - auto TT = getOffloadTargetTriple(D, Args); - if (!TT) - return std::nullopt; - if (TT->isAMDGCN() && TT->getVendor() == llvm::Triple::AMD && - TT->getOS() == llvm::Triple::AMDHSA) - return TT; - if (TT->getArch() == llvm::Triple::spirv64) - return TT; - D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << TT->str(); - return std::nullopt; -} - template <typename F> static bool usesInput(const ArgList &Args, F &&Fn) { return llvm::any_of(Args, [&](Arg *A) { return (A->getOption().matches(options::OPT_x) && @@ -458,6 +401,44 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL, return FinalPhase; } +llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>> +Driver::executeProgram(llvm::ArrayRef<llvm::StringRef> Args) const { + llvm::SmallString<64> OutputFile; + llvm::sys::fs::createTemporaryFile("driver-program", "txt", OutputFile, + llvm::sys::fs::OF_Text); + llvm::FileRemover OutputRemover(OutputFile.c_str()); + std::optional<llvm::StringRef> Redirects[] = { + {""}, + OutputFile.str(), + {""}, + }; + + std::string ErrorMessage; + int SecondsToWait = 60; + if (std::optional<std::string> Str = + llvm::sys::Process::GetEnv("CLANG_TOOLCHAIN_PROGRAM_TIMEOUT")) { + if (!llvm::to_integer(*Str, SecondsToWait)) + return llvm::createStringError(std::error_code(), + "CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected " + "an integer, got '" + + *Str + "'"); + SecondsToWait = std::max(SecondsToWait, 0); // infinite + } + StringRef Executable = Args[0]; + if (llvm::sys::ExecuteAndWait(Executable, Args, {}, Redirects, SecondsToWait, + /*MemoryLimit=*/0, &ErrorMessage)) + return llvm::createStringError(std::error_code(), + Executable + ": " + ErrorMessage); + + llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> OutputBuf = + llvm::MemoryBuffer::getFile(OutputFile.c_str()); + if (!OutputBuf) + return llvm::createStringError(OutputBuf.getError(), + "Failed to read stdout of " + Executable + + ": " + OutputBuf.getError().message()); + return std::move(*OutputBuf); +} + static Arg *MakeInputArg(DerivedArgList &Args, const OptTable &Opts, StringRef Value, bool Claim = true) { Arg *A = new Arg(Opts.getOption(options::OPT_INPUT), Value, @@ -921,250 +902,266 @@ Driver::OpenMPRuntimeKind Driver::getOpenMPRuntime(const ArgList &Args) const { return RT; } -static llvm::Triple getSYCLDeviceTriple(StringRef TargetArch) { - SmallVector<StringRef, 5> SYCLAlias = {"spir", "spir64", "spirv", "spirv32", - "spirv64"}; - if (llvm::is_contained(SYCLAlias, TargetArch)) { - llvm::Triple TargetTriple; - TargetTriple.setArchName(TargetArch); - TargetTriple.setVendor(llvm::Triple::UnknownVendor); - TargetTriple.setOS(llvm::Triple::UnknownOS); - return TargetTriple; - } - return llvm::Triple(TargetArch); +// Handles `native` offload architectures by using the 'offload-arch' utility. +static llvm::SmallVector<std::string> +getSystemOffloadArchs(Compilation &C, Action::OffloadKind Kind) { + StringRef Program = C.getArgs().getLastArgValue( + options::OPT_offload_arch_tool_EQ, "offload-arch"); + + SmallVector<std::string> GPUArchs; + if (llvm::ErrorOr<std::string> Executable = + llvm::sys::findProgramByName(Program)) { + llvm::SmallVector<StringRef> Args{*Executable}; + if (Kind == Action::OFK_HIP) + Args.push_back("--only=amdgpu"); + else if (Kind == Action::OFK_Cuda) + Args.push_back("--only=nvptx"); + auto StdoutOrErr = C.getDriver().executeProgram(Args); + + if (!StdoutOrErr) { + C.getDriver().Diag(diag::err_drv_undetermined_gpu_arch) + << Action::GetOffloadKindName(Kind) << StdoutOrErr.takeError() + << "--offload-arch"; + return GPUArchs; + } + if ((*StdoutOrErr)->getBuffer().empty()) { + C.getDriver().Diag(diag::err_drv_undetermined_gpu_arch) + << Action::GetOffloadKindName(Kind) << "No GPU detected in the system" + << "--offload-arch"; + return GPUArchs; + } + + for (StringRef Arch : llvm::split((*StdoutOrErr)->getBuffer(), "\n")) + if (!Arch.empty()) + GPUArchs.push_back(Arch.str()); + } else { + C.getDriver().Diag(diag::err_drv_command_failure) << "offload-arch"; + } + return GPUArchs; } -static bool addSYCLDefaultTriple(Compilation &C, - SmallVectorImpl<llvm::Triple> &SYCLTriples) { - // Check current set of triples to see if the default has already been set. - for (const auto &SYCLTriple : SYCLTriples) { - if (SYCLTriple.getSubArch() == llvm::Triple::NoSubArch && - SYCLTriple.isSPIROrSPIRV()) - return false; +// Attempts to infer the correct offloading toolchain triple by looking at the +// requested offloading kind and architectures. +static llvm::DenseSet<llvm::StringRef> +inferOffloadToolchains(Compilation &C, Action::OffloadKind Kind) { + std::set<std::string> Archs; + for (Arg *A : C.getInputArgs()) { + for (StringRef Arch : A->getValues()) { + if (A->getOption().matches(options::OPT_offload_arch_EQ)) { + if (Arch == "native") { + for (StringRef Str : getSystemOffloadArchs(C, Kind)) + Archs.insert(Str.str()); + } else { + Archs.insert(Arch.str()); + } + } else if (A->getOption().matches(options::OPT_no_offload_arch_EQ)) { + if (Arch == "all") + Archs.clear(); + else + Archs.erase(Arch.str()); + } + } } - // Add the default triple as it was not found. - llvm::Triple DefaultTriple = getSYCLDeviceTriple( - C.getDefaultToolChain().getTriple().isArch32Bit() ? "spirv32" - : "spirv64"); - SYCLTriples.insert(SYCLTriples.begin(), DefaultTriple); - return true; + + llvm::DenseSet<llvm::StringRef> Triples; + for (llvm::StringRef Arch : Archs) { + OffloadArch ID = StringToOffloadArch(Arch); + if (ID == OffloadArch::UNKNOWN) + ID = StringToOffloadArch( + getProcessorFromTargetID(llvm::Triple("amdgcn-amd-amdhsa"), Arch)); + + if (Kind == Action::OFK_HIP && !IsAMDOffloadArch(ID)) { + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "HIP" << Arch; + return llvm::DenseSet<llvm::StringRef>(); + } + if (Kind == Action::OFK_Cuda && !IsNVIDIAOffloadArch(ID)) { + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "CUDA" << Arch; + return llvm::DenseSet<llvm::StringRef>(); + } + if (Kind == Action::OFK_OpenMP && + (ID == OffloadArch::UNKNOWN || ID == OffloadArch::UNUSED)) { + C.getDriver().Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) + << Arch; + return llvm::DenseSet<llvm::StringRef>(); + } + if (ID == OffloadArch::UNKNOWN || ID == OffloadArch::UNUSED) { + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "offload" << Arch; + return llvm::DenseSet<llvm::StringRef>(); + } + + StringRef Triple; + if (ID == OffloadArch::AMDGCNSPIRV) + Triple = "spirv64-amd-amdhsa"; + else if (IsNVIDIAOffloadArch(ID)) + Triple = C.getDefaultToolChain().getTriple().isArch64Bit() + ? "nvptx64-nvidia-cuda" + : "nvptx-nvidia-cuda"; + else if (IsAMDOffloadArch(ID)) + Triple = "amdgcn-amd-amdhsa"; + else + continue; + + // Make a new argument that dispatches this argument to the appropriate + // toolchain. This is required when we infer it and create potentially + // incompatible toolchains from the global option. + Option Opt = C.getDriver().getOpts().getOption(options::OPT_Xarch__); + unsigned Index = C.getArgs().getBaseArgs().MakeIndex("-Xarch_"); + Arg *A = new Arg(Opt, C.getArgs().getArgString(Index), Index, + C.getArgs().MakeArgString(Triple.split("-").first), + C.getArgs().MakeArgString("--offload-arch=" + Arch)); + C.getArgs().append(A); + C.getArgs().AddSynthesizedArg(A); + Triples.insert(Triple); + } + + // Infer the default target triple if no specific architectures are given. + if (Archs.empty() && Kind == Action::OFK_HIP) + Triples.insert("amdgcn-amd-amdhsa"); + else if (Archs.empty() && Kind == Action::OFK_Cuda) + Triples.insert(C.getDefaultToolChain().getTriple().isArch64Bit() + ? "nvptx64-nvidia-cuda" + : "nvptx-nvidia-cuda"); + else if (Archs.empty() && Kind == Action::OFK_SYCL) + Triples.insert(C.getDefaultToolChain().getTriple().isArch64Bit() + ? "spirv64-unknown-unknown" + : "spirv32-unknown-unknown"); + + // We need to dispatch these to the appropriate toolchain now. + C.getArgs().eraseArg(options::OPT_offload_arch_EQ); + C.getArgs().eraseArg(options::OPT_no_offload_arch_EQ); + + return Triples; } void Driver::CreateOffloadingDeviceToolChains(Compilation &C, InputList &Inputs) { - - // - // CUDA/HIP - // - // We need to generate a CUDA/HIP toolchain if any of the inputs has a CUDA - // or HIP type. However, mixed CUDA/HIP compilation is not supported. + bool UseLLVMOffload = C.getInputArgs().hasArg( + options::OPT_foffload_via_llvm, options::OPT_fno_offload_via_llvm, false); bool IsCuda = - llvm::any_of(Inputs, [](std::pair<types::ID, const llvm::opt::Arg *> &I) { - return types::isCuda(I.first); - }); - bool IsHIP = llvm::any_of(Inputs, [](std::pair<types::ID, const llvm::opt::Arg *> &I) { - return types::isHIP(I.first); - }) || - C.getInputArgs().hasArg(options::OPT_hip_link) || - C.getInputArgs().hasArg(options::OPT_hipstdpar); - bool UseLLVMOffload = C.getInputArgs().hasArg( - options::OPT_foffload_via_llvm, options::OPT_fno_offload_via_llvm, false); - if (IsCuda && IsHIP) { - Diag(clang::diag::err_drv_mix_cuda_hip); + return types::isCuda(I.first); + }) && + !UseLLVMOffload; + bool IsHIP = + (llvm::any_of(Inputs, + [](std::pair<types::ID, const llvm::opt::Arg *> &I) { + return types::isHIP(I.first); + }) || + C.getInputArgs().hasArg(options::OPT_hip_link) || + C.getInputArgs().hasArg(options::OPT_hipstdpar)) && + !UseLLVMOffload; + bool IsSYCL = C.getInputArgs().hasFlag(options::OPT_fsycl, + options::OPT_fno_sycl, false); + bool IsOpenMPOffloading = + UseLLVMOffload || + (C.getInputArgs().hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, + options::OPT_fno_openmp, false) && + (C.getInputArgs().hasArg(options::OPT_offload_targets_EQ) || + (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) && + !(IsCuda || IsHIP)))); + + llvm::SmallSet<Action::OffloadKind, 4> Kinds; + const std::pair<bool, Action::OffloadKind> ActiveKinds[] = { + {IsCuda, Action::OFK_Cuda}, + {IsHIP, Action::OFK_HIP}, + {IsOpenMPOffloading, Action::OFK_OpenMP}, + {IsSYCL, Action::OFK_SYCL}}; + for (const auto &[Active, Kind] : ActiveKinds) + if (Active) + Kinds.insert(Kind); + + // We currently don't support any kind of mixed offloading. + if (Kinds.size() > 1) { + Diag(clang::diag::err_drv_mix_offload) + << Action::GetOffloadKindName(*Kinds.begin()).upper() + << Action::GetOffloadKindName(*(++Kinds.begin())).upper(); return; } - if (IsCuda && !UseLLVMOffload) { - auto CudaTriple = getNVIDIAOffloadTargetTriple( - *this, C.getInputArgs(), C.getDefaultToolChain().getTriple()); - if (!CudaTriple) - return; - - auto &TC = - getOffloadToolChain(C.getInputArgs(), Action::OFK_Cuda, *CudaTriple, - C.getDefaultToolChain().getTriple()); - - // Emit a warning if the detected CUDA version is too new. - const CudaInstallationDetector &CudaInstallation = - static_cast<const toolchains::CudaToolChain &>(TC).CudaInstallation; - if (CudaInstallation.isValid()) - CudaInstallation.WarnIfUnsupportedVersion(); - C.addOffloadDeviceToolChain(&TC, Action::OFK_Cuda); - OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_Cuda, &TC, - /*SpecificToolchain=*/true); - } else if (IsHIP && !UseLLVMOffload) { - if (auto *OMPTargetArg = - C.getInputArgs().getLastArg(options::OPT_offload_targets_EQ)) { - Diag(clang::diag::err_drv_unsupported_opt_for_language_mode) - << OMPTargetArg->getSpelling() << "HIP"; - return; - } - - auto HIPTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs()); - if (!HIPTriple) - return; - - auto &TC = - getOffloadToolChain(C.getInputArgs(), Action::OFK_HIP, *HIPTriple, - C.getDefaultToolChain().getTriple()); - C.addOffloadDeviceToolChain(&TC, Action::OFK_HIP); - - // TODO: Fix 'amdgcnspirv' handling with the new driver. - if (C.getInputArgs().hasFlag(options::OPT_offload_new_driver, - options::OPT_no_offload_new_driver, false)) - OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_HIP, &TC, - /*SpecificToolchain=*/true); - } + // Initialize the compilation identifier used for unique CUDA / HIP names. if (IsCuda || IsHIP) CUIDOpts = CUIDOptions(C.getArgs(), *this); - // - // OpenMP - // - // We need to generate an OpenMP toolchain if the user specified targets with - // the -fopenmp-targets option or used --offload-arch with OpenMP enabled. - bool IsOpenMPOffloading = - ((IsCuda || IsHIP) && UseLLVMOffload) || - (C.getInputArgs().hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, - options::OPT_fno_openmp, false) && - (C.getInputArgs().hasArg(options::OPT_offload_targets_EQ) || - C.getInputArgs().hasArg(options::OPT_offload_arch_EQ))); - if (IsOpenMPOffloading) { - // We expect that -fopenmp-targets is always used in conjunction with the - // option -fopenmp specifying a valid runtime with offloading support, i.e. - // libomp or libiomp. - OpenMPRuntimeKind RuntimeKind = getOpenMPRuntime(C.getInputArgs()); - if (RuntimeKind != OMPRT_OMP && RuntimeKind != OMPRT_IOMP5) { - Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets); - return; - } - - // If the user specified -fopenmp-targets= we create a toolchain for each - // valid triple. Otherwise, if only --offload-arch= was specified we instead - // attempt to derive the appropriate toolchains from the arguments. - if (Arg *OpenMPTargets = - C.getInputArgs().getLastArg(options::OPT_offload_targets_EQ)) { - if (OpenMPTargets && !OpenMPTargets->getNumValues()) { - Diag(clang::diag::warn_drv_empty_joined_argument) - << OpenMPTargets->getAsString(C.getInputArgs()); + // Get the list of requested offloading toolchains. If they were not + // explicitly specified we will infer them based on the offloading language + // and requested architectures. + std::multiset<llvm::StringRef> Triples; + if (C.getInputArgs().hasArg(options::OPT_offload_targets_EQ)) { + std::vector<std::string> ArgValues = + C.getInputArgs().getAllArgValues(options::OPT_offload_targets_EQ); + for (llvm::StringRef Target : ArgValues) + Triples.insert(C.getInputArgs().MakeArgString(Target)); + + if (ArgValues.empty()) + Diag(clang::diag::warn_drv_empty_joined_argument) + << C.getInputArgs() + .getLastArg(options::OPT_offload_targets_EQ) + ->getAsString(C.getInputArgs()); + } else if (Kinds.size() > 0) { + for (Action::OffloadKind Kind : Kinds) { + llvm::DenseSet<llvm::StringRef> Derived = inferOffloadToolchains(C, Kind); + Triples.insert(Derived.begin(), Derived.end()); + } + } + + // Build an offloading toolchain for every requested target and kind. + llvm::StringMap<StringRef> FoundNormalizedTriples; + for (StringRef Target : Triples) { + // OpenMP offloading requires a compatible libomp. + if (Kinds.contains(Action::OFK_OpenMP)) { + OpenMPRuntimeKind RuntimeKind = getOpenMPRuntime(C.getInputArgs()); + if (RuntimeKind != OMPRT_OMP && RuntimeKind != OMPRT_IOMP5) { + Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets); return; } + } - // Make sure these show up in a deterministic order. - std::multiset<StringRef> OpenMPTriples; - for (StringRef T : OpenMPTargets->getValues()) - OpenMPTriples.insert(T); - - llvm::StringMap<StringRef> FoundNormalizedTriples; - for (StringRef T : OpenMPTriples) { - llvm::Triple TT(ToolChain::getOpenMPTriple(T)); - std::string NormalizedName = TT.normalize(); - - // Make sure we don't have a duplicate triple. - auto [TripleIt, Inserted] = - FoundNormalizedTriples.try_emplace(NormalizedName, T); - if (!Inserted) { - Diag(clang::diag::warn_drv_omp_offload_target_duplicate) - << T << TripleIt->second; - continue; - } - - // If the specified target is invalid, emit a diagnostic. - if (TT.getArch() == llvm::Triple::UnknownArch) { - Diag(clang::diag::err_drv_invalid_omp_target) << T; - continue; - } + // Certain options are not allowed when combined with SYCL compilation. + if (Kinds.contains(Action::OFK_SYCL)) { + for (auto ID : + {options::OPT_static_libstdcxx, options::OPT_ffreestanding}) + if (Arg *IncompatArg = C.getInputArgs().getLastArg(ID)) + Diag(clang::diag::err_drv_argument_not_allowed_with) + << IncompatArg->getSpelling() << "-fsycl"; + } - auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_OpenMP, TT, - C.getDefaultToolChain().getTriple()); - C.addOffloadDeviceToolChain(&TC, Action::OFK_OpenMP); - OffloadArchs[&TC] = - getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, &TC, - /*SpecificToolchain=*/true); - } - } else if (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) && - ((!IsHIP && !IsCuda) || UseLLVMOffload)) { - llvm::Triple AMDTriple("amdgcn-amd-amdhsa"); - llvm::Triple NVPTXTriple("nvptx64-nvidia-cuda"); - - for (StringRef Arch : - C.getInputArgs().getAllArgValues(options::OPT_offload_arch_EQ)) { - bool IsNVPTX = IsNVIDIAOffloadArch( - StringToOffloadArch(getProcessorFromTargetID(NVPTXTriple, Arch))); - bool IsAMDGPU = IsAMDOffloadArch( - StringToOffloadArch(getProcessorFromTargetID(AMDTriple, Arch))); - if (!IsNVPTX && !IsAMDGPU && !Arch.empty() && - !Arch.equals_insensitive("native")) { - Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) << Arch; - return; - } + // Create a device toolchain for every specified kind and triple. + for (Action::OffloadKind Kind : Kinds) { + llvm::Triple TT = Kind == Action::OFK_OpenMP + ? ToolChain::getOpenMPTriple(Target) + : llvm::Triple(Target); + if (TT.getArch() == llvm::Triple::ArchType::UnknownArch) { + Diag(diag::err_drv_invalid_or_unsupported_offload_target) << TT.str(); + continue; } - // Attempt to deduce the offloading triple from the set of architectures. - // We can only correctly deduce NVPTX / AMDGPU triples currently. - for (const llvm::Triple &TT : {AMDTriple, NVPTXTriple}) { - auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_OpenMP, TT, - C.getDefaultToolChain().getTriple()); - - llvm::SmallVector<StringRef> Archs = - getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, &TC, - /*SpecificToolchain=*/false); - if (!Archs.empty()) { - C.addOffloadDeviceToolChain(&TC, Action::OFK_OpenMP); - OffloadArchs[&TC] = Archs; - } + std::string NormalizedName = TT.normalize(); + auto [TripleIt, Inserted] = + FoundNormalizedTriples.try_emplace(NormalizedName, Target); + if (!Inserted) { + Diag(clang::diag::warn_drv_omp_offload_target_duplicate) + << Target << TripleIt->second; + continue; } - // If the set is empty then we failed to find a native architecture. - auto TCRange = C.getOffloadToolChains(Action::OFK_OpenMP); - if (TCRange.first == TCRange.second) - Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) - << "native"; - } - } else if (C.getInputArgs().hasArg(options::OPT_offload_targets_EQ)) { - Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets); - return; - } + auto &TC = getOffloadToolChain(C.getInputArgs(), Kind, TT, + C.getDefaultToolChain().getTriple()); - // We need to generate a SYCL toolchain if the user specified -fsycl. - bool IsSYCL = C.getInputArgs().hasFlag(options::OPT_fsycl, - options::OPT_fno_sycl, false); - - auto argSYCLIncompatible = [&](OptSpecifier OptId) { - if (!IsSYCL) - return; - if (Arg *IncompatArg = C.getInputArgs().getLastArg(OptId)) - Diag(clang::diag::err_drv_argument_not_allowed_with) - << IncompatArg->getSpelling() << "-fsycl"; - }; - // -static-libstdc++ is not compatible with -fsycl. - argSYCLIncompatible(options::OPT_static_libstdcxx); - // -ffreestanding cannot be used with -fsycl - argSYCLIncompatible(options::OPT_ffreestanding); - - llvm::SmallVector<llvm::Triple, 4> UniqueSYCLTriplesVec; - - if (IsSYCL) { - addSYCLDefaultTriple(C, UniqueSYCLTriplesVec); + // Emit a warning if the detected CUDA version is too new. + if (Kind == Action::OFK_Cuda) { + auto &CudaInstallation = + static_cast<const toolchains::CudaToolChain &>(TC).CudaInstallation; + if (CudaInstallation.isValid()) + CudaInstallation.WarnIfUnsupportedVersion(); + } - // We'll need to use the SYCL and host triples as the key into - // getOffloadingDeviceToolChain, because the device toolchains we're - // going to create will depend on both. - const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>(); - for (const auto &TT : UniqueSYCLTriplesVec) { - auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT, - HostTC->getTriple()); - C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); - OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, - /*SpecificToolchain=*/true); + C.addOffloadDeviceToolChain(&TC, Kind); } } - - // - // TODO: Add support for other offloading programming models here. - // } bool Driver::loadZOSCustomizationFile(llvm::cl::ExpansionContext &ExpCtx) { @@ -3306,9 +3303,6 @@ class OffloadingActionBuilder final { // architecture. If we are in host-only mode we return 'success' so that // the host uses the CUDA offload kind. if (auto *IA = dyn_cast<InputAction>(HostAction)) { - assert(!GpuArchList.empty() && - "We should have at least one GPU architecture."); - // If the host input is not CUDA or HIP, we don't need to bother about // this input. if (!(IA->getType() == types::TY_CUDA || @@ -3408,10 +3402,6 @@ class OffloadingActionBuilder final { CudaDeviceActions.clear(); } - /// Get canonicalized offload arch option. \returns empty StringRef if the - /// option is invalid. - virtual StringRef getCanonicalOffloadArch(StringRef Arch) = 0; - virtual std::optional<std::pair<llvm::StringRef, llvm::StringRef>> getConflictOffloadArchCombination(const std::set<StringRef> &GpuArchs) = 0; @@ -3440,91 +3430,25 @@ class OffloadingActionBuilder final { return true; } - ToolChains.push_back( - AssociatedOffloadKind == Action::OFK_Cuda - ? C.getSingleOffloadToolChain<Action::OFK_Cuda>() - : C.getSingleOffloadToolChain<Action::OFK_HIP>()); - - CompileHostOnly = C.getDriver().offloadHostOnly(); - EmitLLVM = Args.getLastArg(options::OPT_emit_llvm); - EmitAsm = Args.getLastArg(options::OPT_S); - - // --offload and --offload-arch options are mutually exclusive. - if (Args.hasArgNoClaim(options::OPT_offload_EQ) && - Args.hasArgNoClaim(options::OPT_offload_arch_EQ, - options::OPT_no_offload_arch_EQ)) { - C.getDriver().Diag(diag::err_opt_not_valid_with_opt) << "--offload-arch" - << "--offload"; - } - - // Collect all offload arch parameters, removing duplicates. std::set<StringRef> GpuArchs; - bool Error = false; - const ToolChain &TC = *ToolChains.front(); - for (Arg *A : C.getArgsForToolChain(&TC, /*BoundArch=*/"", - AssociatedOffloadKind)) { - if (!(A->getOption().matches(options::OPT_offload_arch_EQ) || - A->getOption().matches(options::OPT_no_offload_arch_EQ))) - continue; - A->claim(); - - for (StringRef ArchStr : llvm::split(A->getValue(), ",")) { - if (A->getOption().matches(options::OPT_no_offload_arch_EQ) && - ArchStr == "all") { - GpuArchs.clear(); - } else if (ArchStr == "native") { - auto GPUsOrErr = ToolChains.front()->getSystemGPUArchs(Args); - if (!GPUsOrErr) { - TC.getDriver().Diag(diag::err_drv_undetermined_gpu_arch) - << llvm::Triple::getArchTypeName(TC.getArch()) - << llvm::toString(GPUsOrErr.takeError()) << "--offload-arch"; - continue; - } + for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_HIP}) { + for (auto &I : llvm::make_range(C.getOffloadToolChains(Kind))) { + ToolChains.push_back(I.second); - for (auto GPU : *GPUsOrErr) { - GpuArchs.insert(Args.MakeArgString(GPU)); - } - } else { - ArchStr = getCanonicalOffloadArch(ArchStr); - if (ArchStr.empty()) { - Error = true; - } else if (A->getOption().matches(options::OPT_offload_arch_EQ)) - GpuArchs.insert(ArchStr); - else if (A->getOption().matches(options::OPT_no_offload_arch_EQ)) - GpuArchs.erase(ArchStr); - else - llvm_unreachable("Unexpected option."); - } + for (auto Arch : + C.getDriver().getOffloadArchs(C, C.getArgs(), Kind, *I.second)) + GpuArchs.insert(Arch); } } - auto &&ConflictingArchs = getConflictOffloadArchCombination(GpuArchs); - if (ConflictingArchs) { - C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo) - << ConflictingArchs->first << ConflictingArchs->second; - C.setContainsError(); - return true; - } - - // Collect list of GPUs remaining in the set. for (auto Arch : GpuArchs) GpuArchList.push_back(Arch.data()); - // Default to sm_20 which is the lowest common denominator for - // supported GPUs. sm_20 code should work correctly, if - // suboptimally, on all newer GPUs. - if (GpuArchList.empty()) { - if (ToolChains.front()->getTriple().isSPIRV()) { - if (ToolChains.front()->getTriple().getVendor() == llvm::Triple::AMD) - GpuArchList.push_back(OffloadArch::AMDGCNSPIRV); - else - GpuArchList.push_back(OffloadArch::Generic); - } else { - GpuArchList.push_back(DefaultOffloadArch); - } - } + CompileHostOnly = C.getDriver().offloadHostOnly(); + EmitLLVM = Args.getLastArg(options::OPT_emit_llvm); + EmitAsm = Args.getLastArg(options::OPT_S); - return Error; + return false; } }; @@ -3538,15 +3462,6 @@ class OffloadingActionBuilder final { DefaultOffloadArch = OffloadArch::CudaDefault; } - StringRef getCanonicalOffloadArch(StringRef ArchStr) override { - OffloadArch Arch = StringToOffloadArch(ArchStr); - if (Arch == OffloadArch::UNKNOWN || !IsNVIDIAOffloadArch(Arch)) { - C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr; - return StringRef(); - } - return OffloadArchToString(Arch); - } - std::optional<std::pair<llvm::StringRef, llvm::StringRef>> getConflictOffloadArchCombination( const std::set<StringRef> &GpuArchs) override { @@ -3705,24 +3620,6 @@ class OffloadingActionBuilder final { bool canUseBundlerUnbundler() const override { return true; } - StringRef getCanonicalOffloadArch(StringRef IdStr) override { - llvm::StringMap<bool> Features; - // getHIPOffloadTargetTriple() is known to return valid value as it has - // been called successfully in the CreateOffloadingDeviceToolChains(). - auto T = - (IdStr == "amdgcnspirv") - ? llvm::Triple("spirv64-amd-amdhsa") - : *getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs()); - auto ArchStr = parseTargetID(T, IdStr, &Features); - if (!ArchStr) { - C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << IdStr; - C.setContainsError(); - return StringRef(); - } - auto CanId = getCanonicalTargetID(*ArchStr, Features); - return Args.MakeArgStringRef(CanId); - }; - std::optional<std::pair<llvm::StringRef, llvm::StringRef>> getConflictOffloadArchCombination( const std::set<StringRef> &GpuArchs) override { @@ -4715,23 +4612,20 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, static StringRef getCanonicalArchString(Compilation &C, const llvm::opt::DerivedArgList &Args, StringRef ArchStr, - const llvm::Triple &Triple, - bool SpecificToolchain) { + const llvm::Triple &Triple) { // Lookup the CUDA / HIP architecture string. Only report an error if we were // expecting the triple to be only NVPTX / AMDGPU. OffloadArch Arch = StringToOffloadArch(getProcessorFromTargetID(Triple, ArchStr)); if (Triple.isNVPTX() && (Arch == OffloadArch::UNKNOWN || !IsNVIDIAOffloadArch(Arch))) { - if (SpecificToolchain) - C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) - << "CUDA" << ArchStr; + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "CUDA" << ArchStr; return StringRef(); } else if (Triple.isAMDGPU() && (Arch == OffloadArch::UNKNOWN || !IsAMDOffloadArch(Arch))) { - if (SpecificToolchain) - C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) - << "HIP" << ArchStr; + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "HIP" << ArchStr; return StringRef(); } @@ -4767,11 +4661,7 @@ getConflictOffloadArchCombination(const llvm::DenseSet<StringRef> &Archs, llvm::SmallVector<StringRef> Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, - Action::OffloadKind Kind, const ToolChain *TC, - bool SpecificToolchain) const { - if (!TC) - TC = &C.getDefaultToolChain(); - + Action::OffloadKind Kind, const ToolChain &TC) const { // --offload and --offload-arch options are mutually exclusive. if (Args.hasArgNoClaim(options::OPT_offload_EQ) && Args.hasArgNoClaim(options::OPT_offload_arch_EQ, @@ -4784,48 +4674,44 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, } llvm::DenseSet<StringRef> Archs; - for (auto *Arg : C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind)) { + for (auto *Arg : C.getArgsForToolChain(&TC, /*BoundArch=*/"", Kind)) { // Add or remove the seen architectures in order of appearance. If an // invalid architecture is given we simply exit. if (Arg->getOption().matches(options::OPT_offload_arch_EQ)) { for (StringRef Arch : Arg->getValues()) { if (Arch == "native" || Arch.empty()) { - auto GPUsOrErr = TC->getSystemGPUArchs(Args); + auto GPUsOrErr = TC.getSystemGPUArchs(Args); if (!GPUsOrErr) { - if (!SpecificToolchain) - llvm::consumeError(GPUsOrErr.takeError()); - else - TC->getDriver().Diag(diag::err_drv_undetermined_gpu_arch) - << llvm::Triple::getArchTypeName(TC->getArch()) - << llvm::toString(GPUsOrErr.takeError()) << "--offload-arch"; + TC.getDriver().Diag(diag::err_drv_undetermined_gpu_arch) + << llvm::Triple::getArchTypeName(TC.getArch()) + << llvm::toString(GPUsOrErr.takeError()) << "--offload-arch"; continue; } for (auto ArchStr : *GPUsOrErr) { - StringRef CanonicalStr = - getCanonicalArchString(C, Args, Args.MakeArgString(ArchStr), - TC->getTriple(), SpecificToolchain); + StringRef CanonicalStr = getCanonicalArchString( + C, Args, Args.MakeArgString(ArchStr), TC.getTriple()); if (!CanonicalStr.empty()) Archs.insert(CanonicalStr); - else if (SpecificToolchain) + else return llvm::SmallVector<StringRef>(); } } else { - StringRef CanonicalStr = getCanonicalArchString( - C, Args, Arch, TC->getTriple(), SpecificToolchain); + StringRef CanonicalStr = + getCanonicalArchString(C, Args, Arch, TC.getTriple()); if (!CanonicalStr.empty()) Archs.insert(CanonicalStr); - else if (SpecificToolchain) + else return llvm::SmallVector<StringRef>(); } } } else if (Arg->getOption().matches(options::OPT_no_offload_arch_EQ)) { - for (StringRef Arch : llvm::split(Arg->getValue(), ",")) { + for (StringRef Arch : Arg->getValues()) { if (Arch == "all") { Archs.clear(); } else { - StringRef ArchStr = getCanonicalArchString( - C, Args, Arch, TC->getTriple(), SpecificToolchain); + StringRef ArchStr = + getCanonicalArchString(C, Args, Arch, TC.getTriple()); Archs.erase(ArchStr); } } @@ -4833,28 +4719,30 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, } if (auto ConflictingArchs = - getConflictOffloadArchCombination(Archs, TC->getTriple())) + getConflictOffloadArchCombination(Archs, TC.getTriple())) C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo) << ConflictingArchs->first << ConflictingArchs->second; - // Skip filling defaults if we're just querying what is availible. - if (SpecificToolchain && Archs.empty()) { + // Fill in the default architectures if not provided explicitly. + if (Archs.empty()) { if (Kind == Action::OFK_Cuda) { Archs.insert(OffloadArchToString(OffloadArch::CudaDefault)); } else if (Kind == Action::OFK_HIP) { - Archs.insert(OffloadArchToString(OffloadArch::HIPDefault)); + Archs.insert(OffloadArchToString(TC.getTriple().isSPIRV() + ? OffloadArch::Generic + : OffloadArch::HIPDefault)); } else if (Kind == Action::OFK_SYCL) { Archs.insert(StringRef()); } else if (Kind == Action::OFK_OpenMP) { // Accept legacy `-march` device arguments for OpenMP. - if (auto *Arg = C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind) + if (auto *Arg = C.getArgsForToolChain(&TC, /*BoundArch=*/"", Kind) .getLastArg(options::OPT_march_EQ)) { Archs.insert(Arg->getValue()); } else { - auto ArchsOrErr = TC->getSystemGPUArchs(Args); + auto ArchsOrErr = TC.getSystemGPUArchs(Args); if (!ArchsOrErr) { - TC->getDriver().Diag(diag::err_drv_undetermined_gpu_arch) - << llvm::Triple::getArchTypeName(TC->getArch()) + TC.getDriver().Diag(diag::err_drv_undetermined_gpu_arch) + << llvm::Triple::getArchTypeName(TC.getArch()) << llvm::toString(ArchsOrErr.takeError()) << "--offload-arch"; } else if (!ArchsOrErr->empty()) { for (auto Arch : *ArchsOrErr) @@ -4934,7 +4822,7 @@ Action *Driver::BuildOffloadingActions(Compilation &C, // Get the product of all bound architectures and toolchains. SmallVector<std::pair<const ToolChain *, StringRef>> TCAndArchs; for (const ToolChain *TC : ToolChains) { - for (StringRef Arch : OffloadArchs.lookup(TC)) { + for (StringRef Arch : getOffloadArchs(C, C.getArgs(), Kind, *TC)) { TCAndArchs.push_back(std::make_pair(TC, Arch)); DeviceActions.push_back( C.MakeAction<InputAction>(*InputArg, InputType, CUID)); @@ -4966,7 +4854,7 @@ Action *Driver::BuildOffloadingActions(Compilation &C, if (Kind == Action::OFK_SYCL && Phase == phases::Assemble) continue; - auto TCAndArch = TCAndArchs.begin(); + auto *TCAndArch = TCAndArchs.begin(); for (Action *&A : DeviceActions) { if (A->getType() == types::TY_Nothing) continue; @@ -4998,7 +4886,13 @@ Action *Driver::BuildOffloadingActions(Compilation &C, // Compiling HIP in device-only non-RDC mode requires linking each action // individually. for (Action *&A : DeviceActions) { - if ((A->getType() != types::TY_Object && + // Special handling for the HIP SPIR-V toolchain because it doesn't use + // the SPIR-V backend yet doesn't report the output as an object. + bool IsAMDGCNSPIRV = A->getOffloadingToolChain() && + A->getOffloadingToolChain()->getTriple().getOS() == + llvm::Triple::OSType::AMDHSA && + A->getOffloadingToolChain()->getTriple().isSPIRV(); + if ((A->getType() != types::TY_Object && !IsAMDGCNSPIRV && A->getType() != types::TY_LTO_BC) || !HIPNoRDC || !offloadDeviceOnly()) continue; @@ -5006,7 +4900,7 @@ Action *Driver::BuildOffloadingActions(Compilation &C, A = C.MakeAction<LinkJobAction>(LinkerInput, types::TY_Image); } - auto TCAndArch = TCAndArchs.begin(); + auto *TCAndArch = TCAndArchs.begin(); for (Action *A : DeviceActions) { DDeps.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); OffloadAction::DeviceDependences DDep; @@ -5054,8 +4948,9 @@ Action *Driver::BuildOffloadingActions(Compilation &C, // fatbinary for each translation unit, linking each input individually. Action *FatbinAction = C.MakeAction<LinkJobAction>(OffloadActions, types::TY_HIP_FATBIN); - DDep.add(*FatbinAction, *C.getSingleOffloadToolChain<Action::OFK_HIP>(), - nullptr, Action::OFK_HIP); + DDep.add(*FatbinAction, + *C.getOffloadToolChains<Action::OFK_HIP>().first->second, nullptr, + Action::OFK_HIP); } else { // Package all the offloading actions into a single output that can be // embedded in the host and linked. @@ -5131,11 +5026,13 @@ Action *Driver::ConstructPhaseAction( if (Args.hasArg(options::OPT_extract_api)) return C.MakeAction<ExtractAPIJobAction>(Input, types::TY_API_INFO); - // With 'fexperimental-modules-reduced-bmi', we don't want to run the + // With 'fmodules-reduced-bmi', we don't want to run the // precompile phase unless the user specified '--precompile'. In the case // the '--precompile' flag is enabled, we will try to emit the reduced BMI // as a by product in GenerateModuleInterfaceAction. - if (Args.hasArg(options::OPT_modules_reduced_bmi) && + if (!Args.hasArg(options::OPT_fno_modules_reduced_bmi) && + (Input->getType() == driver::types::TY_CXXModule || + Input->getType() == driver::types::TY_PP_CXXModule) && !Args.getLastArg(options::OPT__precompile)) return Input; @@ -5208,7 +5105,10 @@ Action *Driver::ConstructPhaseAction( false) || (Args.hasFlag(options::OPT_offload_new_driver, options::OPT_no_offload_new_driver, false) && - !offloadDeviceOnly())) || + (!offloadDeviceOnly() || + (Input->getOffloadingToolChain() && + TargetDeviceOffloadKind == Action::OFK_HIP && + Input->getOffloadingToolChain()->getTriple().isSPIRV())))) || TargetDeviceOffloadKind == Action::OFK_OpenMP))) { types::ID Output = Args.hasArg(options::OPT_S) && @@ -6323,7 +6223,7 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA, // `-fmodule-output`. if (!AtTopLevel && isa<PrecompileJobAction>(JA) && JA.getType() == types::TY_ModuleFile && SpecifiedModuleOutput) { - assert(!C.getArgs().hasArg(options::OPT_modules_reduced_bmi)); + assert(C.getArgs().hasArg(options::OPT_fno_modules_reduced_bmi)); return GetModuleOutputPath(C, JA, BaseInput); } diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 481f575..47f93fa1 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -104,44 +104,6 @@ ToolChain::ToolChain(const Driver &D, const llvm::Triple &T, addIfExists(getFilePaths(), Path); } -llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>> -ToolChain::executeToolChainProgram(StringRef Executable) const { - llvm::SmallString<64> OutputFile; - llvm::sys::fs::createTemporaryFile("toolchain-program", "txt", OutputFile, - llvm::sys::fs::OF_Text); - llvm::FileRemover OutputRemover(OutputFile.c_str()); - std::optional<llvm::StringRef> Redirects[] = { - {""}, - OutputFile.str(), - {""}, - }; - - std::string ErrorMessage; - int SecondsToWait = 60; - if (std::optional<std::string> Str = - llvm::sys::Process::GetEnv("CLANG_TOOLCHAIN_PROGRAM_TIMEOUT")) { - if (!llvm::to_integer(*Str, SecondsToWait)) - return llvm::createStringError(std::error_code(), - "CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected " - "an integer, got '" + - *Str + "'"); - SecondsToWait = std::max(SecondsToWait, 0); // infinite - } - if (llvm::sys::ExecuteAndWait(Executable, {Executable}, {}, Redirects, - SecondsToWait, - /*MemoryLimit=*/0, &ErrorMessage)) - return llvm::createStringError(std::error_code(), - Executable + ": " + ErrorMessage); - - llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> OutputBuf = - llvm::MemoryBuffer::getFile(OutputFile.c_str()); - if (!OutputBuf) - return llvm::createStringError(OutputBuf.getError(), - "Failed to read stdout of " + Executable + - ": " + OutputBuf.getError().message()); - return std::move(*OutputBuf); -} - void ToolChain::setTripleEnvironment(llvm::Triple::EnvironmentType Env) { Triple.setEnvironment(Env); if (EffectiveTriple != llvm::Triple()) @@ -255,6 +217,18 @@ static void getAArch64MultilibFlags(const Driver &D, Result.push_back(ABIArg->getAsString(Args)); } + if (const Arg *A = Args.getLastArg(options::OPT_O_Group); + A && A->getOption().matches(options::OPT_O)) { + switch (A->getValue()[0]) { + case 's': + Result.push_back("-Os"); + break; + case 'z': + Result.push_back("-Oz"); + break; + } + } + processMultilibCustomFlags(Result, Args); } @@ -332,6 +306,19 @@ static void getARMMultilibFlags(const Driver &D, const llvm::Triple &Triple, if (Endian->getOption().matches(options::OPT_mbig_endian)) Result.push_back(Endian->getAsString(Args)); } + + if (const Arg *A = Args.getLastArg(options::OPT_O_Group); + A && A->getOption().matches(options::OPT_O)) { + switch (A->getValue()[0]) { + case 's': + Result.push_back("-Os"); + break; + case 'z': + Result.push_back("-Oz"); + break; + } + } + processMultilibCustomFlags(Result, Args); } @@ -1644,7 +1631,8 @@ void ToolChain::addSYCLIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const {} llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> -ToolChain::getDeviceLibs(const ArgList &DriverArgs) const { +ToolChain::getDeviceLibs(const ArgList &DriverArgs, + const Action::OffloadKind DeviceOffloadingKind) const { return {}; } diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 7fc34f4..0781683 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -31,6 +31,68 @@ using namespace clang::driver::toolchains; using namespace clang; using namespace llvm::opt; +RocmInstallationDetector::CommonBitcodeLibsPreferences:: + CommonBitcodeLibsPreferences(const Driver &D, + const llvm::opt::ArgList &DriverArgs, + StringRef GPUArch, + const Action::OffloadKind DeviceOffloadingKind, + const bool NeedsASanRT) + : ABIVer(DeviceLibABIVersion::fromCodeObjectVersion( + tools::getAMDGPUCodeObjectVersion(D, DriverArgs))) { + const auto Kind = llvm::AMDGPU::parseArchAMDGCN(GPUArch); + const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind); + + IsOpenMP = DeviceOffloadingKind == Action::OFK_OpenMP; + + const bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32); + Wave64 = + !HasWave32 || DriverArgs.hasFlag(options::OPT_mwavefrontsize64, + options::OPT_mno_wavefrontsize64, false); + + const bool IsKnownOffloading = DeviceOffloadingKind == Action::OFK_OpenMP || + DeviceOffloadingKind == Action::OFK_HIP; + + // Default to enabling f32 denormals on subtargets where fma is fast with + // denormals + const bool DefaultDAZ = + (Kind == llvm::AMDGPU::GK_NONE) + ? false + : !((ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) && + (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32)); + // TODO: There are way too many flags that change this. Do we need to + // check them all? + DAZ = IsKnownOffloading + ? DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero, + options::OPT_fno_gpu_flush_denormals_to_zero, + DefaultDAZ) + : DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) || DefaultDAZ; + + FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only) || + DriverArgs.hasFlag(options::OPT_ffinite_math_only, + options::OPT_fno_finite_math_only, false); + + UnsafeMathOpt = + DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations) || + DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations, + options::OPT_fno_unsafe_math_optimizations, false); + + FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math) || + DriverArgs.hasFlag(options::OPT_ffast_math, + options::OPT_fno_fast_math, false); + + const bool DefaultSqrt = IsKnownOffloading ? true : false; + CorrectSqrt = + DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt) || + DriverArgs.hasFlag( + options::OPT_fhip_fp32_correctly_rounded_divide_sqrt, + options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, DefaultSqrt); + // GPU Sanitizer currently only supports ASan and is enabled through host + // ASan. + GPUSan = (DriverArgs.hasFlag(options::OPT_fgpu_sanitize, + options::OPT_fno_gpu_sanitize, true) && + NeedsASanRT); +} + void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path) { assert(!Path.empty()); @@ -841,7 +903,7 @@ AMDGPUToolChain::getSystemGPUArchs(const ArgList &Args) const { else Program = GetProgramPath("amdgpu-arch"); - auto StdoutOrErr = executeToolChainProgram(Program); + auto StdoutOrErr = getDriver().executeProgram({Program}); if (!StdoutOrErr) return StdoutOrErr.takeError(); @@ -884,33 +946,14 @@ void ROCMToolChain::addClangTargetOptions( ABIVer)) return; - bool Wave64 = isWave64(DriverArgs, Kind); - // TODO: There are way too many flags that change this. Do we need to check - // them all? - bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) || - getDefaultDenormsAreZeroForTarget(Kind); - bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only); - - bool UnsafeMathOpt = - DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations); - bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math); - bool CorrectSqrt = - DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt); - - // GPU Sanitizer currently only supports ASan and is enabled through host - // ASan. - bool GPUSan = DriverArgs.hasFlag(options::OPT_fgpu_sanitize, - options::OPT_fno_gpu_sanitize, true) && - getSanitizerArgs(DriverArgs).needsAsanRt(); - // Add the OpenCL specific bitcode library. llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs; BCLibs.emplace_back(RocmInstallation->getOpenCLPath().str()); // Add the generic set of libraries. BCLibs.append(RocmInstallation->getCommonBitcodeLibs( - DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, - FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, false)); + DriverArgs, LibDeviceFile, GpuArch, DeviceOffloadingKind, + getSanitizerArgs(DriverArgs).needsAsanRt())); for (auto [BCFile, Internalize] : BCLibs) { if (Internalize) @@ -947,35 +990,37 @@ bool RocmInstallationDetector::checkCommonBitcodeLibs( llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> RocmInstallationDetector::getCommonBitcodeLibs( - const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, bool Wave64, - bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath, - bool CorrectSqrt, DeviceLibABIVersion ABIVer, bool GPUSan, - bool isOpenMP) const { + const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, + StringRef GPUArch, const Action::OffloadKind DeviceOffloadingKind, + const bool NeedsASanRT) const { llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> BCLibs; + CommonBitcodeLibsPreferences Pref{D, DriverArgs, GPUArch, + DeviceOffloadingKind, NeedsASanRT}; + auto AddBCLib = [&](ToolChain::BitCodeLibraryInfo BCLib, bool Internalize = true) { BCLib.ShouldInternalize = Internalize; BCLibs.emplace_back(BCLib); }; auto AddSanBCLibs = [&]() { - if (GPUSan) + if (Pref.GPUSan) AddBCLib(getAsanRTLPath(), false); }; AddSanBCLibs(); AddBCLib(getOCMLPath()); - if (!isOpenMP) + if (!Pref.IsOpenMP) AddBCLib(getOCKLPath()); - else if (GPUSan && isOpenMP) + else if (Pref.GPUSan && Pref.IsOpenMP) AddBCLib(getOCKLPath(), false); - AddBCLib(getDenormalsAreZeroPath(DAZ)); - AddBCLib(getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath)); - AddBCLib(getFiniteOnlyPath(FiniteOnly || FastRelaxedMath)); - AddBCLib(getCorrectlyRoundedSqrtPath(CorrectSqrt)); - AddBCLib(getWavefrontSize64Path(Wave64)); + AddBCLib(getDenormalsAreZeroPath(Pref.DAZ)); + AddBCLib(getUnsafeMathPath(Pref.UnsafeMathOpt || Pref.FastRelaxedMath)); + AddBCLib(getFiniteOnlyPath(Pref.FiniteOnly || Pref.FastRelaxedMath)); + AddBCLib(getCorrectlyRoundedSqrtPath(Pref.CorrectSqrt)); + AddBCLib(getWavefrontSize64Path(Pref.Wave64)); AddBCLib(LibDeviceFile); - auto ABIVerPath = getABIVersionPath(ABIVer); + auto ABIVerPath = getABIVersionPath(Pref.ABIVer); if (!ABIVerPath.empty()) AddBCLib(ABIVerPath); @@ -983,9 +1028,9 @@ RocmInstallationDetector::getCommonBitcodeLibs( } llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> -ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs, - const std::string &GPUArch, - bool isOpenMP) const { +ROCMToolChain::getCommonDeviceLibNames( + const llvm::opt::ArgList &DriverArgs, const std::string &GPUArch, + Action::OffloadKind DeviceOffloadingKind) const { auto Kind = llvm::AMDGPU::parseArchAMDGCN(GPUArch); const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind); @@ -996,33 +1041,9 @@ ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs, ABIVer)) return {}; - // If --hip-device-lib is not set, add the default bitcode libraries. - // TODO: There are way too many flags that change this. Do we need to check - // them all? - bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero, - options::OPT_fno_gpu_flush_denormals_to_zero, - getDefaultDenormsAreZeroForTarget(Kind)); - bool FiniteOnly = DriverArgs.hasFlag( - options::OPT_ffinite_math_only, options::OPT_fno_finite_math_only, false); - bool UnsafeMathOpt = - DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations, - options::OPT_fno_unsafe_math_optimizations, false); - bool FastRelaxedMath = DriverArgs.hasFlag(options::OPT_ffast_math, - options::OPT_fno_fast_math, false); - bool CorrectSqrt = DriverArgs.hasFlag( - options::OPT_fhip_fp32_correctly_rounded_divide_sqrt, - options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, true); - bool Wave64 = isWave64(DriverArgs, Kind); - - // GPU Sanitizer currently only supports ASan and is enabled through host - // ASan. - bool GPUSan = DriverArgs.hasFlag(options::OPT_fgpu_sanitize, - options::OPT_fno_gpu_sanitize, true) && - getSanitizerArgs(DriverArgs).needsAsanRt(); - return RocmInstallation->getCommonBitcodeLibs( - DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, - FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, isOpenMP); + DriverArgs, LibDeviceFile, GPUArch, DeviceOffloadingKind, + getSanitizerArgs(DriverArgs).needsAsanRt()); } bool AMDGPUToolChain::shouldSkipSanitizeOption( diff --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h index 08bd4fa..513c77d 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.h +++ b/clang/lib/Driver/ToolChains/AMDGPU.h @@ -147,7 +147,7 @@ public: llvm::SmallVector<BitCodeLibraryInfo, 12> getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs, const std::string &GPUArch, - bool isOpenMP = false) const; + Action::OffloadKind DeviceOffloadingKind) const; SanitizerMask getSupportedSanitizers() const override { return SanitizerKind::Address; diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp index 7ffa3f0..2b41d54 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -44,7 +44,7 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions( true)) return; - for (auto BCFile : getDeviceLibs(DriverArgs)) { + for (auto BCFile : getDeviceLibs(DriverArgs, DeviceOffloadingKind)) { CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode" : "-mlink-bitcode-file"); CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path)); @@ -132,7 +132,9 @@ AMDGPUOpenMPToolChain::computeMSVCVersion(const Driver *D, } llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> -AMDGPUOpenMPToolChain::getDeviceLibs(const llvm::opt::ArgList &Args) const { +AMDGPUOpenMPToolChain::getDeviceLibs( + const llvm::opt::ArgList &Args, + const Action::OffloadKind DeviceOffloadingKind) const { if (!Args.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib, true)) return {}; @@ -140,8 +142,8 @@ AMDGPUOpenMPToolChain::getDeviceLibs(const llvm::opt::ArgList &Args) const { getTriple(), Args.getLastArgValue(options::OPT_march_EQ)); SmallVector<BitCodeLibraryInfo, 12> BCLibs; - for (auto BCLib : getCommonDeviceLibNames(Args, GpuArch.str(), - /*IsOpenMP=*/true)) + for (auto BCLib : + getCommonDeviceLibNames(Args, GpuArch.str(), DeviceOffloadingKind)) BCLibs.emplace_back(BCLib); return BCLibs; diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h index 0536c9f..cbafdf5 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h @@ -58,7 +58,8 @@ public: const llvm::opt::ArgList &Args) const override; llvm::SmallVector<BitCodeLibraryInfo, 12> - getDeviceLibs(const llvm::opt::ArgList &Args) const override; + getDeviceLibs(const llvm::opt::ArgList &Args, + const Action::OffloadKind DeviceOffloadKind) const override; const ToolChain &HostTC; }; diff --git a/clang/lib/Driver/ToolChains/Arch/Sparc.cpp b/clang/lib/Driver/ToolChains/Arch/Sparc.cpp index 9595ee8..94a94f1 100644 --- a/clang/lib/Driver/ToolChains/Arch/Sparc.cpp +++ b/clang/lib/Driver/ToolChains/Arch/Sparc.cpp @@ -23,7 +23,9 @@ const char *sparc::getSparcAsmModeForCPU(StringRef Name, if (Triple.getArch() == llvm::Triple::sparcv9) { const char *DefV9CPU; - if (Triple.isOSLinux() || Triple.isOSFreeBSD() || Triple.isOSOpenBSD()) + if (Triple.isOSSolaris()) + DefV9CPU = "-Av9b"; + else if (Triple.isOSLinux() || Triple.isOSFreeBSD() || Triple.isOSOpenBSD()) DefV9CPU = "-Av9a"; else DefV9CPU = "-Av9"; @@ -35,6 +37,13 @@ const char *sparc::getSparcAsmModeForCPU(StringRef Name, .Case("niagara4", "-Av9d") .Default(DefV9CPU); } else { + const char *DefV8CPU; + + if (Triple.isOSSolaris()) + DefV8CPU = "-Av8plus"; + else + DefV8CPU = "-Av8"; + return llvm::StringSwitch<const char *>(Name) .Case("v8", "-Av8") .Case("supersparc", "-Av8") @@ -70,7 +79,7 @@ const char *sparc::getSparcAsmModeForCPU(StringRef Name, .Case("gr712rc", "-Aleon") .Case("leon4", "-Aleon") .Case("gr740", "-Aleon") - .Default("-Av8"); + .Default(DefV8CPU); } } @@ -130,7 +139,8 @@ std::string sparc::getSparcTargetCPU(const Driver &D, const ArgList &Args, return ""; } -void sparc::getSparcTargetFeatures(const Driver &D, const ArgList &Args, +void sparc::getSparcTargetFeatures(const Driver &D, const llvm::Triple &Triple, + const ArgList &Args, std::vector<StringRef> &Features) { sparc::FloatABI FloatABI = sparc::getSparcFloatABI(D, Args); if (FloatABI == sparc::FloatABI::Soft) @@ -150,11 +160,22 @@ void sparc::getSparcTargetFeatures(const Driver &D, const ArgList &Args, Features.push_back("-popc"); } + // Those OSes default to enabling VIS on 64-bit SPARC. + // See also the corresponding code for external assemblers in + // sparc::getSparcAsmModeForCPU(). + bool IsSparcV9ATarget = + (Triple.getArch() == llvm::Triple::sparcv9) && + (Triple.isOSLinux() || Triple.isOSFreeBSD() || Triple.isOSOpenBSD()); + bool IsSparcV9BTarget = Triple.isOSSolaris(); + bool IsSparcV8PlusTarget = + Triple.getArch() == llvm::Triple::sparc && Triple.isOSSolaris(); if (Arg *A = Args.getLastArg(options::OPT_mvis, options::OPT_mno_vis)) { if (A->getOption().matches(options::OPT_mvis)) Features.push_back("+vis"); else Features.push_back("-vis"); + } else if (IsSparcV9ATarget) { + Features.push_back("+vis"); } if (Arg *A = Args.getLastArg(options::OPT_mvis2, options::OPT_mno_vis2)) { @@ -162,6 +183,8 @@ void sparc::getSparcTargetFeatures(const Driver &D, const ArgList &Args, Features.push_back("+vis2"); else Features.push_back("-vis2"); + } else if (IsSparcV9BTarget) { + Features.push_back("+vis2"); } if (Arg *A = Args.getLastArg(options::OPT_mvis3, options::OPT_mno_vis3)) { @@ -182,6 +205,8 @@ void sparc::getSparcTargetFeatures(const Driver &D, const ArgList &Args, if (Arg *A = Args.getLastArg(options::OPT_mv8plus, options::OPT_mno_v8plus)) { if (A->getOption().matches(options::OPT_mv8plus)) Features.push_back("+v8plus"); + } else if (IsSparcV8PlusTarget) { + Features.push_back("+v8plus"); } if (Args.hasArg(options::OPT_ffixed_g1)) diff --git a/clang/lib/Driver/ToolChains/Arch/Sparc.h b/clang/lib/Driver/ToolChains/Arch/Sparc.h index 2b178d9..fa25b49 100644 --- a/clang/lib/Driver/ToolChains/Arch/Sparc.h +++ b/clang/lib/Driver/ToolChains/Arch/Sparc.h @@ -31,7 +31,8 @@ FloatABI getSparcFloatABI(const Driver &D, const llvm::opt::ArgList &Args); std::string getSparcTargetCPU(const Driver &D, const llvm::opt::ArgList &Args, const llvm::Triple &Triple); -void getSparcTargetFeatures(const Driver &D, const llvm::opt::ArgList &Args, +void getSparcTargetFeatures(const Driver &D, const llvm::Triple &Triple, + const llvm::opt::ArgList &Args, std::vector<llvm::StringRef> &Features); const char *getSparcAsmModeForCPU(llvm::StringRef Name, const llvm::Triple &Triple); diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp index e670696..497f333 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.cpp +++ b/clang/lib/Driver/ToolChains/BareMetal.cpp @@ -694,9 +694,6 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, NeedCRTs) CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath(CRTEnd))); - if (TC.getTriple().isRISCV()) - CmdArgs.push_back("-X"); - // The R_ARM_TARGET2 relocation must be treated as R_ARM_REL32 on arm*-*-elf // and arm*-*-eabi (the default is R_ARM_GOT_PREL, used on arm*-*-linux and // arm*-*-*bsd). diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 8880c93..7d0c142 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -97,32 +97,15 @@ forAllAssociatedToolChains(Compilation &C, const JobAction &JA, // Apply Work on all the offloading tool chains associated with the current // action. - if (JA.isHostOffloading(Action::OFK_Cuda)) - Work(*C.getSingleOffloadToolChain<Action::OFK_Cuda>()); - else if (JA.isDeviceOffloading(Action::OFK_Cuda)) - Work(*C.getSingleOffloadToolChain<Action::OFK_Host>()); - else if (JA.isHostOffloading(Action::OFK_HIP)) - Work(*C.getSingleOffloadToolChain<Action::OFK_HIP>()); - else if (JA.isDeviceOffloading(Action::OFK_HIP)) - Work(*C.getSingleOffloadToolChain<Action::OFK_Host>()); - - if (JA.isHostOffloading(Action::OFK_OpenMP)) { - auto TCs = C.getOffloadToolChains<Action::OFK_OpenMP>(); - for (auto II = TCs.first, IE = TCs.second; II != IE; ++II) - Work(*II->second); - } else if (JA.isDeviceOffloading(Action::OFK_OpenMP)) - Work(*C.getSingleOffloadToolChain<Action::OFK_Host>()); - - if (JA.isHostOffloading(Action::OFK_SYCL)) { - auto TCs = C.getOffloadToolChains<Action::OFK_SYCL>(); - for (auto II = TCs.first, IE = TCs.second; II != IE; ++II) - Work(*II->second); - } else if (JA.isDeviceOffloading(Action::OFK_SYCL)) - Work(*C.getSingleOffloadToolChain<Action::OFK_Host>()); - - // - // TODO: Add support for other offloading programming models here. - // + for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_OpenMP, + Action::OFK_HIP, Action::OFK_SYCL}) { + if (JA.isHostOffloading(Kind)) { + auto TCs = C.getOffloadToolChains(Kind); + for (auto II = TCs.first, IE = TCs.second; II != IE; ++II) + Work(*II->second); + } else if (JA.isDeviceOffloading(Kind)) + Work(*C.getSingleOffloadToolChain<Action::OFK_Host>()); + } } static bool @@ -2731,16 +2714,6 @@ static void CollectArgsForIntegratedAssembler(Compilation &C, CmdArgs.push_back(MipsTargetFeature); } - // Those OSes default to enabling VIS on 64-bit SPARC. - // See also the corresponding code for external assemblers in - // sparc::getSparcAsmModeForCPU(). - bool IsSparcV9ATarget = - (C.getDefaultToolChain().getArch() == llvm::Triple::sparcv9) && - (Triple.isOSLinux() || Triple.isOSFreeBSD() || Triple.isOSOpenBSD()); - if (IsSparcV9ATarget && SparcTargetFeatures.empty()) { - CmdArgs.push_back("-target-feature"); - CmdArgs.push_back("+vis"); - } for (const char *Feature : SparcTargetFeatures) { CmdArgs.push_back("-target-feature"); CmdArgs.push_back(Feature); @@ -4095,31 +4068,34 @@ static bool RenderModulesOptions(Compilation &C, const Driver &D, // module fragment. CmdArgs.push_back("-fskip-odr-check-in-gmf"); - if (Args.hasArg(options::OPT_modules_reduced_bmi) && + if (!Args.hasArg(options::OPT_fno_modules_reduced_bmi) && (Input.getType() == driver::types::TY_CXXModule || - Input.getType() == driver::types::TY_PP_CXXModule)) { + Input.getType() == driver::types::TY_PP_CXXModule) && + !Args.hasArg(options::OPT__precompile)) { CmdArgs.push_back("-fmodules-reduced-bmi"); if (Args.hasArg(options::OPT_fmodule_output_EQ)) Args.AddLastArg(CmdArgs, options::OPT_fmodule_output_EQ); - else { - if (Args.hasArg(options::OPT__precompile) && - (!Args.hasArg(options::OPT_o) || - Args.getLastArg(options::OPT_o)->getValue() == - getCXX20NamedModuleOutputPath(Args, Input.getBaseInput()))) { - D.Diag(diag::err_drv_reduced_module_output_overrided); - } - + else CmdArgs.push_back(Args.MakeArgString( "-fmodule-output=" + getCXX20NamedModuleOutputPath(Args, Input.getBaseInput()))); - } } - // Noop if we see '-fmodules-reduced-bmi' with other translation - // units than module units. This is more user friendly to allow end uers to - // enable this feature without asking for help from build systems. - Args.ClaimAllArgs(options::OPT_modules_reduced_bmi); + if (Args.hasArg(options::OPT_fmodules_reduced_bmi) && + Args.hasArg(options::OPT__precompile) && + (!Args.hasArg(options::OPT_o) || + Args.getLastArg(options::OPT_o)->getValue() == + getCXX20NamedModuleOutputPath(Args, Input.getBaseInput()))) { + D.Diag(diag::err_drv_reduced_module_output_overrided); + } + + // Noop if we see '-fmodules-reduced-bmi' or `-fno-modules-reduced-bmi` with + // other translation units than module units. This is more user friendly to + // allow end uers to enable this feature without asking for help from build + // systems. + Args.ClaimAllArgs(options::OPT_fmodules_reduced_bmi); + Args.ClaimAllArgs(options::OPT_fno_modules_reduced_bmi); // We need to include the case the input file is a module file here. // Since the default compilation model for C++ module interface unit will @@ -4992,8 +4968,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, else { // Host-side compilation. NormalizedTriple = - (IsCuda ? C.getSingleOffloadToolChain<Action::OFK_Cuda>() - : C.getSingleOffloadToolChain<Action::OFK_HIP>()) + (IsCuda ? C.getOffloadToolChains(Action::OFK_Cuda).first->second + : C.getOffloadToolChains(Action::OFK_HIP).first->second) ->getTriple() .normalize(); if (IsCuda) { diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 651a39c..826e2ea 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -856,7 +856,7 @@ void tools::getTargetFeatures(const Driver &D, const llvm::Triple &Triple, case llvm::Triple::sparc: case llvm::Triple::sparcel: case llvm::Triple::sparcv9: - sparc::getSparcTargetFeatures(D, Args, Features); + sparc::getSparcTargetFeatures(D, Triple, Args, Features); break; case llvm::Triple::r600: case llvm::Triple::amdgcn: diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 2373d94..7d803be 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -815,7 +815,7 @@ NVPTXToolChain::getSystemGPUArchs(const ArgList &Args) const { else Program = GetProgramPath("nvptx-arch"); - auto StdoutOrErr = executeToolChainProgram(Program); + auto StdoutOrErr = getDriver().executeProgram({Program}); if (!StdoutOrErr) return StdoutOrErr.takeError(); diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 1edb83f..7ab41e9 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -447,6 +447,7 @@ void Flang::addTargetOptions(const ArgList &Args, // Add the target features. switch (TC.getArch()) { default: + getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false); break; case llvm::Triple::aarch64: getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false); diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp index 5fe0f85..b4c6da0 100644 --- a/clang/lib/Driver/ToolChains/HIPAMD.cpp +++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp @@ -264,7 +264,7 @@ void HIPAMDToolChain::addClangTargetOptions( return; // No DeviceLibs for SPIR-V. } - for (auto BCFile : getDeviceLibs(DriverArgs)) { + for (auto BCFile : getDeviceLibs(DriverArgs, DeviceOffloadingKind)) { CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode" : "-mlink-bitcode-file"); CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path)); @@ -355,7 +355,8 @@ VersionTuple HIPAMDToolChain::computeMSVCVersion(const Driver *D, } llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> -HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const { +HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs, + Action::OffloadKind DeviceOffloadingKind) const { llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs; if (!DriverArgs.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib, true) || @@ -397,7 +398,8 @@ HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const { assert(!GpuArch.empty() && "Must have an explicit GPU arch."); // Add common device libraries like ocml etc. - for (auto N : getCommonDeviceLibNames(DriverArgs, GpuArch.str())) + for (auto N : getCommonDeviceLibNames(DriverArgs, GpuArch.str(), + DeviceOffloadingKind)) BCLibs.emplace_back(N); // Add instrument lib. diff --git a/clang/lib/Driver/ToolChains/HIPAMD.h b/clang/lib/Driver/ToolChains/HIPAMD.h index 3630b11..bcc3ebb 100644 --- a/clang/lib/Driver/ToolChains/HIPAMD.h +++ b/clang/lib/Driver/ToolChains/HIPAMD.h @@ -80,7 +80,8 @@ public: void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; llvm::SmallVector<BitCodeLibraryInfo, 12> - getDeviceLibs(const llvm::opt::ArgList &Args) const override; + getDeviceLibs(const llvm::opt::ArgList &Args, + Action::OffloadKind DeviceOffloadKind) const override; SanitizerMask getSupportedSanitizers() const override; diff --git a/clang/lib/Driver/ToolChains/HIPSPV.cpp b/clang/lib/Driver/ToolChains/HIPSPV.cpp index 53649ca..643a67f 100644 --- a/clang/lib/Driver/ToolChains/HIPSPV.cpp +++ b/clang/lib/Driver/ToolChains/HIPSPV.cpp @@ -149,7 +149,8 @@ void HIPSPVToolChain::addClangTargetOptions( CC1Args.append( {"-fvisibility=hidden", "-fapply-global-visibility-to-externs"}); - for (const BitCodeLibraryInfo &BCFile : getDeviceLibs(DriverArgs)) + for (const BitCodeLibraryInfo &BCFile : + getDeviceLibs(DriverArgs, DeviceOffloadingKind)) CC1Args.append( {"-mlink-builtin-bitcode", DriverArgs.MakeArgString(BCFile.Path)}); } @@ -200,7 +201,9 @@ void HIPSPVToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs, } llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> -HIPSPVToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const { +HIPSPVToolChain::getDeviceLibs( + const llvm::opt::ArgList &DriverArgs, + const Action::OffloadKind DeviceOffloadingKind) const { llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> BCLibs; if (!DriverArgs.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib, true)) diff --git a/clang/lib/Driver/ToolChains/HIPSPV.h b/clang/lib/Driver/ToolChains/HIPSPV.h index ecd82e7..caf6924 100644 --- a/clang/lib/Driver/ToolChains/HIPSPV.h +++ b/clang/lib/Driver/ToolChains/HIPSPV.h @@ -69,7 +69,8 @@ public: void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; llvm::SmallVector<BitCodeLibraryInfo, 12> - getDeviceLibs(const llvm::opt::ArgList &Args) const override; + getDeviceLibs(const llvm::opt::ArgList &Args, + const Action::OffloadKind DeviceOffloadKind) const override; SanitizerMask getSupportedSanitizers() const override; diff --git a/clang/lib/Driver/ToolChains/OpenBSD.cpp b/clang/lib/Driver/ToolChains/OpenBSD.cpp index 79b1b69..8f58918 100644 --- a/clang/lib/Driver/ToolChains/OpenBSD.cpp +++ b/clang/lib/Driver/ToolChains/OpenBSD.cpp @@ -161,7 +161,7 @@ void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (Nopie || Profiling) CmdArgs.push_back("-nopie"); - if (Triple.isRISCV64()) { + if (Triple.isLoongArch64() || Triple.isRISCV64()) { CmdArgs.push_back("-X"); if (Args.hasArg(options::OPT_mno_relax)) CmdArgs.push_back("--no-relax"); diff --git a/clang/lib/Driver/ToolChains/ROCm.h b/clang/lib/Driver/ToolChains/ROCm.h index 2a09da01..ebd5443 100644 --- a/clang/lib/Driver/ToolChains/ROCm.h +++ b/clang/lib/Driver/ToolChains/ROCm.h @@ -11,6 +11,7 @@ #include "clang/Basic/Cuda.h" #include "clang/Basic/LLVM.h" +#include "clang/Driver/CommonArgs.h" #include "clang/Driver/Driver.h" #include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" @@ -18,6 +19,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/VersionTuple.h" +#include "llvm/TargetParser/TargetParser.h" #include "llvm/TargetParser/Triple.h" namespace clang { @@ -77,6 +79,24 @@ private: SPACKReleaseStr(SPACKReleaseStr.str()) {} }; + struct CommonBitcodeLibsPreferences { + CommonBitcodeLibsPreferences(const Driver &D, + const llvm::opt::ArgList &DriverArgs, + StringRef GPUArch, + const Action::OffloadKind DeviceOffloadingKind, + const bool NeedsASanRT); + + DeviceLibABIVersion ABIVer; + bool IsOpenMP; + bool Wave64; + bool DAZ; + bool FiniteOnly; + bool UnsafeMathOpt; + bool FastRelaxedMath; + bool CorrectSqrt; + bool GPUSan; + }; + const Driver &D; bool HasHIPRuntime = false; bool HasDeviceLibrary = false; @@ -175,11 +195,11 @@ public: /// Get file paths of default bitcode libraries common to AMDGPU based /// toolchains. - llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> getCommonBitcodeLibs( - const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, - bool Wave64, bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, - bool FastRelaxedMath, bool CorrectSqrt, DeviceLibABIVersion ABIVer, - bool GPUSan, bool isOpenMP) const; + llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> + getCommonBitcodeLibs(const llvm::opt::ArgList &DriverArgs, + StringRef LibDeviceFile, StringRef GPUArch, + const Action::OffloadKind DeviceOffloadingKind, + const bool NeedsASanRT) const; /// Check file paths of default bitcode libraries common to AMDGPU based /// toolchains. \returns false if there are invalid or missing files. bool checkCommonBitcodeLibs(StringRef GPUArch, StringRef LibDeviceFile, |