diff options
73 files changed, 2599 insertions, 581 deletions
diff --git a/clang-tools-extra/clang-tidy/google/CMakeLists.txt b/clang-tools-extra/clang-tidy/google/CMakeLists.txt index 2470c08..1d4229e 100644 --- a/clang-tools-extra/clang-tidy/google/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/google/CMakeLists.txt @@ -11,6 +11,7 @@ add_clang_library(clangTidyGoogleModule STATIC DefaultArgumentsCheck.cpp ExplicitConstructorCheck.cpp ExplicitMakePairCheck.cpp + FloatTypesCheck.cpp FunctionNamingCheck.cpp GlobalNamesInHeadersCheck.cpp GlobalVariableDeclarationCheck.cpp diff --git a/clang-tools-extra/clang-tidy/google/FloatTypesCheck.cpp b/clang-tools-extra/clang-tidy/google/FloatTypesCheck.cpp new file mode 100644 index 0000000..3d5fb02 --- /dev/null +++ b/clang-tools-extra/clang-tidy/google/FloatTypesCheck.cpp @@ -0,0 +1,77 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "FloatTypesCheck.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Lex/Lexer.h" + +namespace clang { + +using namespace ast_matchers; + +namespace { + +AST_POLYMORPHIC_MATCHER(isValidAndNotInMacro, + AST_POLYMORPHIC_SUPPORTED_TYPES(TypeLoc, + FloatingLiteral)) { + const SourceLocation Loc = Node.getBeginLoc(); + return Loc.isValid() && !Loc.isMacroID(); +} + +AST_MATCHER(TypeLoc, isLongDoubleType) { + TypeLoc TL = Node; + if (const auto QualLoc = Node.getAs<QualifiedTypeLoc>()) + TL = QualLoc.getUnqualifiedLoc(); + + const auto BuiltinLoc = TL.getAs<BuiltinTypeLoc>(); + if (!BuiltinLoc) + return false; + + if (const auto *BT = BuiltinLoc.getTypePtr()) + return BT->getKind() == BuiltinType::LongDouble; + return false; +} + +AST_MATCHER(FloatingLiteral, isLongDoubleLiteral) { + if (const auto *BT = + dyn_cast_if_present<BuiltinType>(Node.getType().getTypePtr())) + return BT->getKind() == BuiltinType::LongDouble; + return false; +} + +} // namespace + +namespace tidy::google::runtime { + +void RuntimeFloatCheck::registerMatchers(MatchFinder *Finder) { + Finder->addMatcher(typeLoc(loc(realFloatingPointType()), + isValidAndNotInMacro(), isLongDoubleType()) + .bind("longDoubleTypeLoc"), + this); + Finder->addMatcher(floatLiteral(isValidAndNotInMacro(), isLongDoubleLiteral()) + .bind("longDoubleFloatLiteral"), + this); +} + +void RuntimeFloatCheck::check(const MatchFinder::MatchResult &Result) { + if (const auto *TL = Result.Nodes.getNodeAs<TypeLoc>("longDoubleTypeLoc")) { + diag(TL->getBeginLoc(), "%0 type is not portable and should not be used") + << TL->getType(); + } + + if (const auto *FL = + Result.Nodes.getNodeAs<FloatingLiteral>("longDoubleFloatLiteral")) { + diag(FL->getBeginLoc(), "%0 type from literal suffix 'L' is not portable " + "and should not be used") + << FL->getType(); + } +} + +} // namespace tidy::google::runtime + +} // namespace clang diff --git a/clang-tools-extra/clang-tidy/google/FloatTypesCheck.h b/clang-tools-extra/clang-tidy/google/FloatTypesCheck.h new file mode 100644 index 0000000..b5534c0 --- /dev/null +++ b/clang-tools-extra/clang-tidy/google/FloatTypesCheck.h @@ -0,0 +1,34 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_FLOATTYPESCHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_FLOATTYPESCHECK_H + +#include "../ClangTidyCheck.h" + +namespace clang::tidy::google::runtime { + +/// Finds usages of `long double` and suggests against their use due to lack +/// of portability. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/google/runtime-float.html +class RuntimeFloatCheck : public ClangTidyCheck { +public: + RuntimeFloatCheck(StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context) {} + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + bool isLanguageVersionSupported(const LangOptions &LangOpts) const override { + return LangOpts.CPlusPlus && !LangOpts.ObjC; + } +}; + +} // namespace clang::tidy::google::runtime + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_FLOATTYPESCHECK_H diff --git a/clang-tools-extra/clang-tidy/google/GoogleTidyModule.cpp b/clang-tools-extra/clang-tidy/google/GoogleTidyModule.cpp index 5343e2b..eb5666b 100644 --- a/clang-tools-extra/clang-tidy/google/GoogleTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/google/GoogleTidyModule.cpp @@ -19,6 +19,7 @@ #include "DefaultArgumentsCheck.h" #include "ExplicitConstructorCheck.h" #include "ExplicitMakePairCheck.h" +#include "FloatTypesCheck.h" #include "FunctionNamingCheck.h" #include "GlobalNamesInHeadersCheck.h" #include "GlobalVariableDeclarationCheck.h" @@ -57,6 +58,8 @@ public: "google-objc-function-naming"); CheckFactories.registerCheck<objc::GlobalVariableDeclarationCheck>( "google-objc-global-variable-declaration"); + CheckFactories.registerCheck<runtime::RuntimeFloatCheck>( + "google-runtime-float"); CheckFactories.registerCheck<runtime::IntegerTypesCheck>( "google-runtime-int"); CheckFactories.registerCheck<runtime::OverloadedUnaryAndCheck>( diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 28620a92..23d757b 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -155,6 +155,12 @@ New checks Finds calls to ``operator[]`` in STL containers and suggests replacing them with safe alternatives. +- New :doc:`google-runtime-float + <clang-tidy/checks/google/runtime-float>` check. + + Finds uses of ``long double`` and suggests against their use due to lack of + portability. + - New :doc:`llvm-mlir-op-builder <clang-tidy/checks/llvm/use-new-mlir-op-builder>` check. diff --git a/clang-tools-extra/docs/clang-tidy/checks/google/runtime-float.rst b/clang-tools-extra/docs/clang-tidy/checks/google/runtime-float.rst new file mode 100644 index 0000000..4b853ad --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/google/runtime-float.rst @@ -0,0 +1,10 @@ +.. title:: clang-tidy - google-runtime-float + +google-runtime-float +==================== + +Finds uses of ``long double`` and suggests against their use due to lack of +portability. + +The corresponding style guide rule: +https://google.github.io/styleguide/cppguide.html#Floating-Point_Types diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index 89ad491..c490d2e 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -238,6 +238,7 @@ Clang-Tidy Checks :doc:`google-readability-avoid-underscore-in-googletest-name <google/readability-avoid-underscore-in-googletest-name>`, :doc:`google-readability-casting <google/readability-casting>`, :doc:`google-readability-todo <google/readability-todo>`, + :doc:`google-runtime-float <google/runtime-float>`, :doc:`google-runtime-int <google/runtime-int>`, :doc:`google-runtime-operator <google/runtime-operator>`, :doc:`google-upgrade-googletest-case <google/upgrade-googletest-case>`, "Yes" diff --git a/clang-tools-extra/test/clang-tidy/checkers/google/runtime-float.cpp b/clang-tools-extra/test/clang-tidy/checkers/google/runtime-float.cpp new file mode 100644 index 0000000..5c9cc11 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/google/runtime-float.cpp @@ -0,0 +1,40 @@ +// RUN: %check_clang_tidy %s google-runtime-float %t + +long double foo; +// CHECK-MESSAGES: :[[@LINE-1]]:1: warning: 'long double' type is not portable and should not be used [google-runtime-float] + +typedef long double MyLongDouble; +// CHECK-MESSAGES: :[[@LINE-1]]:9: warning: 'long double' type is not portable and should not be used [google-runtime-float] + +typedef long double MyOtherLongDouble; // NOLINT + +template <typename T> +void tmpl() { T i; } + +long volatile double v = 10; +// CHECK-MESSAGES: :[[@LINE-1]]:1: warning: 'volatile long double' type is not portable and should not be used [google-runtime-float] + +long double h(long const double aaa, long double bbb = 0.5L) { + // CHECK-MESSAGES: :[[@LINE-1]]:1: warning: 'long double' type is not portable and should not be used [google-runtime-float] + // CHECK-MESSAGES: :[[@LINE-2]]:15: warning: 'const long double' type is not portable and should not be used [google-runtime-float] + // CHECK-MESSAGES: :[[@LINE-3]]:38: warning: 'long double' type is not portable and should not be used [google-runtime-float] + // CHECK-MESSAGES: :[[@LINE-4]]:56: warning: 'long double' type from literal suffix 'L' is not portable and should not be used [google-runtime-float] + double x = 0.1; + double y = 0.2L; + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: 'long double' type from literal suffix 'L' is not portable and should not be used [google-runtime-float] +#define ldtype long double + ldtype z; + tmpl<long double>(); + // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: 'long double' type is not portable and should not be used [google-runtime-float] + return 0; +} + +struct S{}; +constexpr S operator"" _baz(unsigned long long) { + long double j; + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: 'long double' type is not portable and should not be used [google-runtime-float] + MyOtherLongDouble x; + long int a = 1L; + return S{}; +} + diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 10bf96a..1e48fa5 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -954,7 +954,7 @@ def PatchableFunctionEntry : InheritableAttr, TargetSpecificAttr<TargetArch< ["aarch64", "aarch64_be", "loongarch32", "loongarch64", "riscv32", - "riscv64", "x86", "x86_64", "ppc", "ppc64"]>> { + "riscv64", "x86", "x86_64", "ppc", "ppc64", "ppc64le"]>> { let Spellings = [GCC<"patchable_function_entry">]; let Subjects = SubjectList<[Function, ObjCMethod]>; let Args = [UnsignedArgument<"Count">, DefaultIntArgument<"Offset", 0>, diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index dd22e55..ab3f9e4 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -6719,7 +6719,7 @@ if omitted.``Section`` defaults to the ``-fpatchable-function-entry`` section n set, or to ``__patchable_function_entries`` otherwise. This attribute is only supported on -aarch64/aarch64-be/loongarch32/loongarch64/riscv32/riscv64/i386/x86-64/ppc/ppc64 targets. +aarch64/aarch64-be/loongarch32/loongarch64/riscv32/riscv64/i386/x86-64/ppc/ppc64/ppc64le targets. For ppc/ppc64 targets, AIX is still not supported. }]; } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index a357a88..2371128 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6760,7 +6760,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (!Triple.isAArch64() && !Triple.isLoongArch() && !Triple.isRISCV() && !Triple.isX86() && !(!Triple.isOSAIX() && (Triple.getArch() == llvm::Triple::ppc || - Triple.getArch() == llvm::Triple::ppc64))) + Triple.getArch() == llvm::Triple::ppc64 || + Triple.getArch() == llvm::Triple::ppc64le))) D.Diag(diag::err_drv_unsupported_opt_for_target) << A->getAsString(Args) << TripleStr; else if (S.consumeInteger(10, Size) || diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLambdaCapturesChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLambdaCapturesChecker.cpp index 27a9113..03eeb99 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLambdaCapturesChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLambdaCapturesChecker.cpp @@ -232,14 +232,11 @@ public: if (!Init) return nullptr; if (auto *Lambda = dyn_cast<LambdaExpr>(Init)) { + DeclRefExprsToIgnore.insert(DRE); updateIgnoreList(); return Lambda; } - TempExpr = dyn_cast<CXXBindTemporaryExpr>(Init->IgnoreParenCasts()); - if (!TempExpr) - return nullptr; - updateIgnoreList(); - return dyn_cast_or_null<LambdaExpr>(TempExpr->getSubExpr()); + return nullptr; } void checkCalleeLambda(CallExpr *CE) { diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp b/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp index 3079f8e..0b8af0d 100644 --- a/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp +++ b/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp @@ -448,4 +448,27 @@ void ranges_for_each(RefCountable* obj) { obj->method(); ++(*static_cast<unsigned*>(item)); }); -}
\ No newline at end of file +} + +class RefCountedObj { +public: + void ref(); + void deref(); + + void call() const; + void callLambda([[clang::noescape]] const WTF::Function<void ()>& callback) const; + void doSomeWork() const; +}; + +void RefCountedObj::callLambda([[clang::noescape]] const WTF::Function<void ()>& callback) const +{ + callback(); +} + +void RefCountedObj::call() const +{ + auto lambda = [&] { + doSomeWork(); + }; + callLambda(lambda); +} diff --git a/clang/test/Driver/fpatchable-function-entry.c b/clang/test/Driver/fpatchable-function-entry.c index 43be6c5..5248a7c0 100644 --- a/clang/test/Driver/fpatchable-function-entry.c +++ b/clang/test/Driver/fpatchable-function-entry.c @@ -8,6 +8,7 @@ // RUN: %clang --target=riscv64 %s -fpatchable-function-entry=1,0 -c -### 2>&1 | FileCheck %s // RUN: %clang --target=powerpc-unknown-linux-gnu %s -fpatchable-function-entry=1,0 -c -### 2>&1 | FileCheck %s // RUN: %clang --target=powerpc64-unknown-linux-gnu %s -fpatchable-function-entry=1,0 -c -### 2>&1 | FileCheck %s +// RUN: %clang --target=powerpc64le-unknown-linux-gnu %s -fpatchable-function-entry=1,0 -c -### 2>&1 | FileCheck %s // CHECK: "-fpatchable-function-entry=1" // RUN: %clang --target=aarch64 -fsyntax-only %s -fpatchable-function-entry=1,1 -c -### 2>&1 | FileCheck --check-prefix=11 %s diff --git a/clang/test/Sema/patchable-function-entry-attr.cpp b/clang/test/Sema/patchable-function-entry-attr.cpp index 7498e67..97b9c26 100644 --- a/clang/test/Sema/patchable-function-entry-attr.cpp +++ b/clang/test/Sema/patchable-function-entry-attr.cpp @@ -8,7 +8,7 @@ // RUN: %clang_cc1 -triple riscv64 -fsyntax-only -verify=silence %s // RUN: %clang_cc1 -triple powerpc-unknown-linux-gnu -fsyntax-only -verify=silence %s // RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu -fsyntax-only -verify=silence %s -// RUN: %clang_cc1 -triple ppc64le -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple ppc64le -fsyntax-only -verify=silence %s // RUN: %clang_cc1 -triple powerpc64-ibm-aix-xcoff -fsyntax-only -verify=AIX %s // RUN: %clang_cc1 -triple powerpc-ibm-aix-xcoff -fsyntax-only -verify=AIX %s diff --git a/flang-rt/lib/cuda/descriptor.cpp b/flang-rt/lib/cuda/descriptor.cpp index d3cc6c2..aa75d4e 100644 --- a/flang-rt/lib/cuda/descriptor.cpp +++ b/flang-rt/lib/cuda/descriptor.cpp @@ -62,15 +62,6 @@ void RTDEF(CUFDescriptorCheckSection)( } } -void RTDEF(CUFSetAllocatorIndex)( - Descriptor *desc, int index, const char *sourceFile, int sourceLine) { - if (!desc) { - Terminator terminator{sourceFile, sourceLine}; - terminator.Crash("descriptor is null"); - } - desc->SetAllocIdx(index); -} - RT_EXT_API_GROUP_END } } // namespace Fortran::runtime::cuda diff --git a/flang-rt/unittests/Runtime/CUDA/AllocatorCUF.cpp b/flang-rt/unittests/Runtime/CUDA/AllocatorCUF.cpp index 4b22e29..f1f931e 100644 --- a/flang-rt/unittests/Runtime/CUDA/AllocatorCUF.cpp +++ b/flang-rt/unittests/Runtime/CUDA/AllocatorCUF.cpp @@ -72,14 +72,3 @@ TEST(AllocatableCUFTest, DescriptorAllocationTest) { EXPECT_TRUE(desc != nullptr); RTNAME(CUFFreeDescriptor)(desc); } - -TEST(AllocatableCUFTest, CUFSetAllocatorIndex) { - using Fortran::common::TypeCategory; - RTNAME(CUFRegisterAllocator)(); - // REAL(4), DEVICE, ALLOCATABLE :: a(:) - auto a{createAllocatable(TypeCategory::Real, 4)}; - EXPECT_EQ((int)kDefaultAllocator, a->GetAllocIdx()); - RTNAME(CUFSetAllocatorIndex)( - a.get(), kDeviceAllocatorPos, __FILE__, __LINE__); - EXPECT_EQ((int)kDeviceAllocatorPos, a->GetAllocIdx()); -} diff --git a/flang/include/flang/Lower/CUDA.h b/flang/include/flang/Lower/CUDA.h index 4a831fd..ab9dde8 100644 --- a/flang/include/flang/Lower/CUDA.h +++ b/flang/include/flang/Lower/CUDA.h @@ -47,10 +47,6 @@ static inline unsigned getAllocatorIdx(const Fortran::semantics::Symbol &sym) { return kDefaultAllocator; } -void initializeDeviceComponentAllocator( - Fortran::lower::AbstractConverter &converter, - const Fortran::semantics::Symbol &sym, const fir::MutableBoxValue &box); - mlir::Type gatherDeviceComponentCoordinatesAndType( fir::FirOpBuilder &builder, mlir::Location loc, const Fortran::semantics::Symbol &sym, fir::RecordType recTy, diff --git a/flang/include/flang/Optimizer/Builder/Runtime/CUDA/Descriptor.h b/flang/include/flang/Optimizer/Builder/Runtime/CUDA/Descriptor.h index 43dca65..bdeb757 100644 --- a/flang/include/flang/Optimizer/Builder/Runtime/CUDA/Descriptor.h +++ b/flang/include/flang/Optimizer/Builder/Runtime/CUDA/Descriptor.h @@ -31,10 +31,6 @@ void genSyncGlobalDescriptor(fir::FirOpBuilder &builder, mlir::Location loc, void genDescriptorCheckSection(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value desc); -/// Generate runtime call to set the allocator index in the descriptor. -void genSetAllocatorIndex(fir::FirOpBuilder &builder, mlir::Location loc, - mlir::Value desc, mlir::Value index); - } // namespace fir::runtime::cuda #endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_CUDA_DESCRIPTOR_H_ diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td index 23ab8826..e3873823 100644 --- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td +++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td @@ -388,25 +388,4 @@ def cuf_StreamCastOp : cuf_Op<"stream_cast", [NoMemoryEffect]> { let hasVerifier = 1; } -def cuf_SetAllocatorIndexOp : cuf_Op<"set_allocator_idx", []> { - let summary = "Set the allocator index in a descriptor"; - - let description = [{ - Allocator index in the Fortran descriptor is used to retrived the correct - CUDA allocator to allocate the memory on the device. - In many cases the allocator index is set when the descriptor is created. For - device components, the descriptor is part of the derived-type itself and - needs to be set after the derived-type is allocated in managed memory. - }]; - - let arguments = (ins Arg<fir_ReferenceType, "", [MemRead, MemWrite]>:$box, - cuf_DataAttributeAttr:$data_attr); - - let assemblyFormat = [{ - $box `:` qualified(type($box)) attr-dict - }]; - - let hasVerifier = 1; -} - #endif // FORTRAN_DIALECT_CUF_CUF_OPS diff --git a/flang/include/flang/Runtime/CUDA/descriptor.h b/flang/include/flang/Runtime/CUDA/descriptor.h index 7555f27..06e4a464 100644 --- a/flang/include/flang/Runtime/CUDA/descriptor.h +++ b/flang/include/flang/Runtime/CUDA/descriptor.h @@ -41,10 +41,6 @@ void RTDECL(CUFSyncGlobalDescriptor)( void RTDECL(CUFDescriptorCheckSection)( const Descriptor *, const char *sourceFile = nullptr, int sourceLine = 0); -/// Set the allocator index with the provided value. -void RTDECL(CUFSetAllocatorIndex)(Descriptor *, int index, - const char *sourceFile = nullptr, int sourceLine = 0); - } // extern "C" } // namespace Fortran::runtime::cuda diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp index 444b5b6..53239cb 100644 --- a/flang/lib/Lower/Allocatable.cpp +++ b/flang/lib/Lower/Allocatable.cpp @@ -450,9 +450,6 @@ private: if (alloc.getSymbol().test(Fortran::semantics::Symbol::Flag::AccDeclare)) Fortran::lower::attachDeclarePostAllocAction(converter, builder, alloc.getSymbol()); - if (Fortran::semantics::HasCUDAComponent(alloc.getSymbol())) - Fortran::lower::initializeDeviceComponentAllocator( - converter, alloc.getSymbol(), box); } void setPinnedToFalse() { diff --git a/flang/lib/Lower/CUDA.cpp b/flang/lib/Lower/CUDA.cpp index 1293d2c..bb4bdee 100644 --- a/flang/lib/Lower/CUDA.cpp +++ b/flang/lib/Lower/CUDA.cpp @@ -17,95 +17,6 @@ #define DEBUG_TYPE "flang-lower-cuda" -void Fortran::lower::initializeDeviceComponentAllocator( - Fortran::lower::AbstractConverter &converter, - const Fortran::semantics::Symbol &sym, const fir::MutableBoxValue &box) { - if (const auto *details{ - sym.GetUltimate() - .detailsIf<Fortran::semantics::ObjectEntityDetails>()}) { - const Fortran::semantics::DeclTypeSpec *type{details->type()}; - const Fortran::semantics::DerivedTypeSpec *derived{type ? type->AsDerived() - : nullptr}; - if (derived) { - if (!FindCUDADeviceAllocatableUltimateComponent(*derived)) - return; // No device components. - - fir::FirOpBuilder &builder = converter.getFirOpBuilder(); - mlir::Location loc = converter.getCurrentLocation(); - - mlir::Type baseTy = fir::unwrapRefType(box.getAddr().getType()); - - // Only pointer and allocatable needs post allocation initialization - // of components descriptors. - if (!fir::isAllocatableType(baseTy) && !fir::isPointerType(baseTy)) - return; - - // Extract the derived type. - mlir::Type ty = fir::getDerivedType(baseTy); - auto recTy = mlir::dyn_cast<fir::RecordType>(ty); - assert(recTy && "expected fir::RecordType"); - - if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(baseTy)) - baseTy = boxTy.getEleTy(); - baseTy = fir::unwrapRefType(baseTy); - - Fortran::semantics::UltimateComponentIterator components{*derived}; - mlir::Value loadedBox = fir::LoadOp::create(builder, loc, box.getAddr()); - mlir::Value addr; - if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(baseTy)) { - mlir::Type idxTy = builder.getIndexType(); - mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1); - mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0); - llvm::SmallVector<fir::DoLoopOp> loops; - llvm::SmallVector<mlir::Value> indices; - llvm::SmallVector<mlir::Value> extents; - for (unsigned i = 0; i < seqTy.getDimension(); ++i) { - mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i); - auto dimInfo = fir::BoxDimsOp::create(builder, loc, idxTy, idxTy, - idxTy, loadedBox, dim); - mlir::Value lbub = mlir::arith::AddIOp::create( - builder, loc, dimInfo.getResult(0), dimInfo.getResult(1)); - mlir::Value ext = - mlir::arith::SubIOp::create(builder, loc, lbub, one); - mlir::Value cmp = mlir::arith::CmpIOp::create( - builder, loc, mlir::arith::CmpIPredicate::sgt, ext, zero); - ext = mlir::arith::SelectOp::create(builder, loc, cmp, ext, zero); - extents.push_back(ext); - - auto loop = fir::DoLoopOp::create( - builder, loc, dimInfo.getResult(0), dimInfo.getResult(1), - dimInfo.getResult(2), /*isUnordered=*/true, - /*finalCount=*/false, mlir::ValueRange{}); - loops.push_back(loop); - indices.push_back(loop.getInductionVar()); - builder.setInsertionPointToStart(loop.getBody()); - } - mlir::Value boxAddr = fir::BoxAddrOp::create(builder, loc, loadedBox); - auto shape = fir::ShapeOp::create(builder, loc, extents); - addr = fir::ArrayCoorOp::create( - builder, loc, fir::ReferenceType::get(recTy), boxAddr, shape, - /*slice=*/mlir::Value{}, indices, /*typeparms=*/mlir::ValueRange{}); - } else { - addr = fir::BoxAddrOp::create(builder, loc, loadedBox); - } - for (const auto &compSym : components) { - if (Fortran::semantics::IsDeviceAllocatable(compSym)) { - llvm::SmallVector<mlir::Value> coord; - mlir::Type fieldTy = gatherDeviceComponentCoordinatesAndType( - builder, loc, compSym, recTy, coord); - assert(coord.size() == 1 && "expect one coordinate"); - mlir::Value comp = fir::CoordinateOp::create( - builder, loc, builder.getRefType(fieldTy), addr, coord[0]); - cuf::DataAttributeAttr dataAttr = - Fortran::lower::translateSymbolCUFDataAttribute( - builder.getContext(), compSym); - cuf::SetAllocatorIndexOp::create(builder, loc, comp, dataAttr); - } - } - } - } -} - mlir::Type Fortran::lower::gatherDeviceComponentCoordinatesAndType( fir::FirOpBuilder &builder, mlir::Location loc, const Fortran::semantics::Symbol &sym, fir::RecordType recTy, diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp index c79c9b1..b1cd14c 100644 --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -786,62 +786,6 @@ static mlir::Value createNewLocal(Fortran::lower::AbstractConverter &converter, return res; } -/// Device allocatable components in a derived-type don't have the correct -/// allocator index in their descriptor when they are created. After -/// initialization, cuf.set_allocator_idx operations are inserted to set the -/// correct allocator index for each device component. -static void -initializeDeviceComponentAllocator(Fortran::lower::AbstractConverter &converter, - const Fortran::semantics::Symbol &symbol, - Fortran::lower::SymMap &symMap) { - if (const auto *details{ - symbol.GetUltimate() - .detailsIf<Fortran::semantics::ObjectEntityDetails>()}) { - const Fortran::semantics::DeclTypeSpec *type{details->type()}; - const Fortran::semantics::DerivedTypeSpec *derived{type ? type->AsDerived() - : nullptr}; - if (derived) { - if (!FindCUDADeviceAllocatableUltimateComponent(*derived)) - return; // No device components. - - fir::FirOpBuilder &builder = converter.getFirOpBuilder(); - mlir::Location loc = converter.getCurrentLocation(); - - fir::ExtendedValue exv = - converter.getSymbolExtendedValue(symbol.GetUltimate(), &symMap); - mlir::Type baseTy = fir::unwrapRefType(fir::getBase(exv).getType()); - if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(baseTy)) - baseTy = boxTy.getEleTy(); - baseTy = fir::unwrapRefType(baseTy); - - if (fir::isAllocatableType(fir::getBase(exv).getType()) || - fir::isPointerType(fir::getBase(exv).getType())) - return; // Allocator index need to be set after allocation. - - auto recTy = - mlir::dyn_cast<fir::RecordType>(fir::unwrapSequenceType(baseTy)); - assert(recTy && "expected fir::RecordType"); - - Fortran::semantics::UltimateComponentIterator components{*derived}; - for (const auto &sym : components) { - if (Fortran::semantics::IsDeviceAllocatable(sym)) { - llvm::SmallVector<mlir::Value> coord; - mlir::Type fieldTy = - Fortran::lower::gatherDeviceComponentCoordinatesAndType( - builder, loc, sym, recTy, coord); - mlir::Value base = fir::getBase(exv); - mlir::Value comp = fir::CoordinateOp::create( - builder, loc, builder.getRefType(fieldTy), base, coord); - cuf::DataAttributeAttr dataAttr = - Fortran::lower::translateSymbolCUFDataAttribute( - builder.getContext(), sym); - cuf::SetAllocatorIndexOp::create(builder, loc, comp, dataAttr); - } - } - } - } -} - /// Must \p var be default initialized at runtime when entering its scope. static bool mustBeDefaultInitializedAtRuntime(const Fortran::lower::pft::Variable &var) { @@ -1164,9 +1108,6 @@ static void instantiateLocal(Fortran::lower::AbstractConverter &converter, if (mustBeDefaultInitializedAtRuntime(var)) Fortran::lower::defaultInitializeAtRuntime(converter, var.getSymbol(), symMap); - if (converter.getFoldingContext().languageFeatures().IsEnabled( - Fortran::common::LanguageFeature::CUDA)) - initializeDeviceComponentAllocator(converter, var.getSymbol(), symMap); auto *builder = &converter.getFirOpBuilder(); if (needCUDAAlloc(var.getSymbol()) && !cuf::isCUDADeviceContext(builder->getRegion())) { @@ -1426,9 +1367,6 @@ static void instantiateAlias(Fortran::lower::AbstractConverter &converter, if (mustBeDefaultInitializedAtRuntime(var)) Fortran::lower::defaultInitializeAtRuntime(converter, var.getSymbol(), symMap); - if (converter.getFoldingContext().languageFeatures().IsEnabled( - Fortran::common::LanguageFeature::CUDA)) - initializeDeviceComponentAllocator(converter, var.getSymbol(), symMap); } //===--------------------------------------------------------------===// diff --git a/flang/lib/Optimizer/Builder/Runtime/CUDA/Descriptor.cpp b/flang/lib/Optimizer/Builder/Runtime/CUDA/Descriptor.cpp index a6ee986..37e4c5a 100644 --- a/flang/lib/Optimizer/Builder/Runtime/CUDA/Descriptor.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/CUDA/Descriptor.cpp @@ -47,18 +47,3 @@ void fir::runtime::cuda::genDescriptorCheckSection(fir::FirOpBuilder &builder, builder, loc, fTy, desc, sourceFile, sourceLine)}; fir::CallOp::create(builder, loc, func, args); } - -void fir::runtime::cuda::genSetAllocatorIndex(fir::FirOpBuilder &builder, - mlir::Location loc, - mlir::Value desc, - mlir::Value index) { - mlir::func::FuncOp func = - fir::runtime::getRuntimeFunc<mkRTKey(CUFSetAllocatorIndex)>(loc, builder); - auto fTy = func.getFunctionType(); - mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc); - mlir::Value sourceLine = - fir::factory::locationToLineNo(builder, loc, fTy.getInput(3)); - llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments( - builder, loc, fTy, desc, index, sourceFile, sourceLine)}; - fir::CallOp::create(builder, loc, func, args); -} diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp index ade8071..687007d 100644 --- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp +++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp @@ -345,17 +345,6 @@ llvm::LogicalResult cuf::StreamCastOp::verify() { return checkStreamType(*this); } -//===----------------------------------------------------------------------===// -// SetAllocatorOp -//===----------------------------------------------------------------------===// - -llvm::LogicalResult cuf::SetAllocatorIndexOp::verify() { - if (!mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(getBox().getType()))) - return emitOpError( - "expect box to be a reference to class or box type value"); - return mlir::success(); -} - // Tablegen operators #define GET_OP_CLASSES diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp index 9834b04..4fe83d4 100644 --- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp @@ -928,34 +928,6 @@ struct CUFSyncDescriptorOpConversion } }; -struct CUFSetAllocatorIndexOpConversion - : public mlir::OpRewritePattern<cuf::SetAllocatorIndexOp> { - using OpRewritePattern::OpRewritePattern; - - mlir::LogicalResult - matchAndRewrite(cuf::SetAllocatorIndexOp op, - mlir::PatternRewriter &rewriter) const override { - auto mod = op->getParentOfType<mlir::ModuleOp>(); - fir::FirOpBuilder builder(rewriter, mod); - mlir::Location loc = op.getLoc(); - int idx = kDefaultAllocator; - if (op.getDataAttr() == cuf::DataAttribute::Device) { - idx = kDeviceAllocatorPos; - } else if (op.getDataAttr() == cuf::DataAttribute::Managed) { - idx = kManagedAllocatorPos; - } else if (op.getDataAttr() == cuf::DataAttribute::Unified) { - idx = kUnifiedAllocatorPos; - } else if (op.getDataAttr() == cuf::DataAttribute::Pinned) { - idx = kPinnedAllocatorPos; - } - mlir::Value index = - builder.createIntegerConstant(loc, builder.getI32Type(), idx); - fir::runtime::cuda::genSetAllocatorIndex(builder, loc, op.getBox(), index); - op.erase(); - return mlir::success(); - } -}; - class CUFOpConversion : public fir::impl::CUFOpConversionBase<CUFOpConversion> { public: void runOnOperation() override { @@ -1017,8 +989,8 @@ void cuf::populateCUFToFIRConversionPatterns( const mlir::SymbolTable &symtab, mlir::RewritePatternSet &patterns) { patterns.insert<CUFAllocOpConversion>(patterns.getContext(), &dl, &converter); patterns.insert<CUFAllocateOpConversion, CUFDeallocateOpConversion, - CUFFreeOpConversion, CUFSyncDescriptorOpConversion, - CUFSetAllocatorIndexOpConversion>(patterns.getContext()); + CUFFreeOpConversion, CUFSyncDescriptorOpConversion>( + patterns.getContext()); patterns.insert<CUFDataTransferOpConversion>(patterns.getContext(), symtab, &dl, &converter); patterns.insert<CUFLaunchOpConversion, CUFDeviceAddressOpConversion>( diff --git a/flang/test/Fir/CUDA/cuda-alloc-free.fir b/flang/test/Fir/CUDA/cuda-alloc-free.fir index 8b6e7d6..31f2ed0 100644 --- a/flang/test/Fir/CUDA/cuda-alloc-free.fir +++ b/flang/test/Fir/CUDA/cuda-alloc-free.fir @@ -94,19 +94,4 @@ func.func @_QQalloc_char() attributes {fir.bindc_name = "alloc_char"} { // CHECK: %[[BYTES_CONV:.*]] = fir.convert %[[BYTES]] : (index) -> i64 // CHECK: fir.call @_FortranACUFMemAlloc(%[[BYTES_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) {cuf.data_attr = #cuf.cuda<device>} : (i64, i32, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8> - -func.func @_QQsetalloc() { - %0 = cuf.alloc !fir.type<_QMm1Tdt1{a2:!fir.box<!fir.heap<!fir.array<?xf32>>>}> {bindc_name = "d1", data_attr = #cuf.cuda<managed>, uniq_name = "_QFEd1"} -> !fir.ref<!fir.type<_QMm1Tdt1{a2:!fir.box<!fir.heap<!fir.array<?xf32>>>}>> - %1 = fir.coordinate_of %0, a2 : (!fir.ref<!fir.type<_QMm1Tdt1{a2:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> - cuf.set_allocator_idx %1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> {data_attr = #cuf.cuda<device>} - return -} - -// CHECK-LABEL: func.func @_QQsetalloc() { -// CHECK: %[[DT:.*]] = fir.call @_FortranACUFMemAlloc -// CHECK: %[[CONV:.*]] = fir.convert %[[DT]] : (!fir.llvm_ptr<i8>) -> !fir.ref<!fir.type<_QMm1Tdt1{a2:!fir.box<!fir.heap<!fir.array<?xf32>>>}>> -// CHECK: %[[COMP:.*]] = fir.coordinate_of %[[CONV]], a2 : (!fir.ref<!fir.type<_QMm1Tdt1{a2:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> -// CHECK: %[[DESC:.*]] = fir.convert %[[COMP]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>> -// CHECK: fir.call @_FortranACUFSetAllocatorIndex(%[[DESC]], %c2{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i32, !fir.ref<i8>, i32) -> () - } // end module diff --git a/flang/test/Lower/CUDA/cuda-set-allocator.cuf b/flang/test/Lower/CUDA/cuda-set-allocator.cuf deleted file mode 100644 index d783f34..0000000 --- a/flang/test/Lower/CUDA/cuda-set-allocator.cuf +++ /dev/null @@ -1,66 +0,0 @@ -! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s - -module m1 - type ty_device - integer, device, allocatable, dimension(:) :: x - integer :: y - integer, device, allocatable, dimension(:) :: z - end type -contains - subroutine sub1() - type(ty_device) :: a - end subroutine - -! CHECK-LABEL: func.func @_QMm1Psub1() -! CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}> {bindc_name = "a", data_attr = #cuf.cuda<managed>, uniq_name = "_QMm1Fsub1Ea"} -> !fir.ref<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>> -! CHECK: %[[DT:.*]]:2 = hlfir.declare %[[ALLOC]] {data_attr = #cuf.cuda<managed>, uniq_name = "_QMm1Fsub1Ea"} : (!fir.ref<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> (!fir.ref<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>, !fir.ref<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -! CHECK: fir.address_of(@_QQ_QMm1Tty_device.DerivedInit) -! CHECK: fir.copy -! CHECK: %[[X:.*]] = fir.coordinate_of %[[DT]]#0, x : (!fir.ref<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> -! CHECK: cuf.set_allocator_idx %[[X]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>} -! CHECK: %[[Z:.*]] = fir.coordinate_of %[[DT]]#0, z : (!fir.ref<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> -! CHECK: cuf.set_allocator_idx %[[Z]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>} - - subroutine sub2() - type(ty_device), pointer :: d1 - allocate(d1) - end subroutine - -! CHECK-LABEL: func.func @_QMm1Psub2() -! CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>> {bindc_name = "d1", data_attr = #cuf.cuda<managed>, uniq_name = "_QMm1Fsub2Ed1"} -> !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>> -! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QMm1Fsub2Ed1"} : (!fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>) -! CHECK: cuf.allocate -! CHECK: %[[LOAD:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>> -! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>> -! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ADDR]], x : (!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> -! CHECK: cuf.set_allocator_idx %[[COORD1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>} -! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ADDR]], z : (!fir.ptr<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> -! CHECK: cuf.set_allocator_idx %[[COORD2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>} - - subroutine sub3() - type(ty_device), allocatable :: d1 - allocate(d1) - end subroutine - -! CHECK-LABEL: func.func @_QMm1Psub3() -! CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>> {bindc_name = "d1", data_attr = #cuf.cuda<managed>, uniq_name = "_QMm1Fsub3Ed1"} -> !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>> -! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]] {data_attr = #cuf.cuda<managed>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMm1Fsub3Ed1"} : (!fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>, !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>>) -! CHECK: cuf.allocate -! CHECK: %[[LOAD:.*]] = fir.load %[[DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>> -! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>>) -> !fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>> -! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ADDR]], x : (!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> -! CHECK: cuf.set_allocator_idx %[[COORD1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>} -! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ADDR]], z : (!fir.heap<!fir.type<_QMm1Tty_device{x:!fir.box<!fir.heap<!fir.array<?xi32>>>,y:i32,z:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> -! CHECK: cuf.set_allocator_idx %[[COORD2]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>} - - subroutine sub4() - type(ty_device), allocatable :: d1(:,:) - allocate(d1(10, 10)) - end subroutine - -! CHECK-LABEL: func.func @_QMm1Psub4() -! CHECK: cuf.allocate -! CHECK-COUNT-2: fir.do_loop -! CHECK-COUNT-2: cuf.set_allocator_idx - -end module diff --git a/libc/src/__support/CPP/simd.h b/libc/src/__support/CPP/simd.h index 449455c..0cc74fb 100644 --- a/libc/src/__support/CPP/simd.h +++ b/libc/src/__support/CPP/simd.h @@ -37,15 +37,15 @@ using get_as_integer_type_t = unsigned _BitInt(sizeof(T) * CHAR_BIT); #if defined(LIBC_TARGET_CPU_HAS_AVX512F) template <typename T> -inline constexpr size_t native_vector_size = 64 / sizeof(T); +LIBC_INLINE_VAR constexpr size_t native_vector_size = 64 / sizeof(T); #elif defined(LIBC_TARGET_CPU_HAS_AVX2) template <typename T> -inline constexpr size_t native_vector_size = 32 / sizeof(T); +LIBC_INLINE_VAR constexpr size_t native_vector_size = 32 / sizeof(T); #elif defined(LIBC_TARGET_CPU_HAS_SSE2) || defined(LIBC_TARGET_CPU_HAS_ARM_NEON) template <typename T> -inline constexpr size_t native_vector_size = 16 / sizeof(T); +LIBC_INLINE_VAR constexpr size_t native_vector_size = 16 / sizeof(T); #else -template <typename T> inline constexpr size_t native_vector_size = 1; +template <typename T> LIBC_INLINE constexpr size_t native_vector_size = 1; #endif template <typename T> LIBC_INLINE constexpr T poison() { @@ -90,122 +90,127 @@ using enable_if_simd_t = cpp::enable_if_t<is_simd_v<T>, T>; // Casting. template <typename To, typename From, size_t N> -LIBC_INLINE constexpr simd<To, N> simd_cast(simd<From, N> v) { +LIBC_INLINE constexpr static simd<To, N> simd_cast(simd<From, N> v) { return __builtin_convertvector(v, simd<To, N>); } // SIMD mask operations. -template <size_t N> LIBC_INLINE constexpr bool all_of(simd<bool, N> m) { +template <size_t N> LIBC_INLINE constexpr static bool all_of(simd<bool, N> m) { return __builtin_reduce_and(m); } -template <size_t N> LIBC_INLINE constexpr bool any_of(simd<bool, N> m) { +template <size_t N> LIBC_INLINE constexpr static bool any_of(simd<bool, N> m) { return __builtin_reduce_or(m); } -template <size_t N> LIBC_INLINE constexpr bool none_of(simd<bool, N> m) { +template <size_t N> LIBC_INLINE constexpr static bool none_of(simd<bool, N> m) { return !any_of(m); } -template <size_t N> LIBC_INLINE constexpr bool some_of(simd<bool, N> m) { +template <size_t N> LIBC_INLINE constexpr static bool some_of(simd<bool, N> m) { return any_of(m) && !all_of(m); } -template <size_t N> LIBC_INLINE constexpr int popcount(simd<bool, N> m) { +template <size_t N> LIBC_INLINE constexpr static int popcount(simd<bool, N> m) { return __builtin_popcountg(m); } -template <size_t N> LIBC_INLINE constexpr int find_first_set(simd<bool, N> m) { +template <size_t N> +LIBC_INLINE constexpr static int find_first_set(simd<bool, N> m) { return __builtin_ctzg(m); } -template <size_t N> LIBC_INLINE constexpr int find_last_set(simd<bool, N> m) { +template <size_t N> +LIBC_INLINE constexpr static int find_last_set(simd<bool, N> m) { constexpr size_t size = simd_size_v<simd<bool, N>>; return size - __builtin_clzg(m); } // Elementwise operations. template <typename T, size_t N> -LIBC_INLINE constexpr simd<T, N> min(simd<T, N> x, simd<T, N> y) { +LIBC_INLINE constexpr static simd<T, N> min(simd<T, N> x, simd<T, N> y) { return __builtin_elementwise_min(x, y); } template <typename T, size_t N> -LIBC_INLINE constexpr simd<T, N> max(simd<T, N> x, simd<T, N> y) { +LIBC_INLINE constexpr static simd<T, N> max(simd<T, N> x, simd<T, N> y) { return __builtin_elementwise_max(x, y); } template <typename T, size_t N> -LIBC_INLINE constexpr simd<T, N> abs(simd<T, N> x) { +LIBC_INLINE constexpr static simd<T, N> abs(simd<T, N> x) { return __builtin_elementwise_abs(x); } template <typename T, size_t N> -LIBC_INLINE constexpr simd<T, N> fma(simd<T, N> x, simd<T, N> y, simd<T, N> z) { +LIBC_INLINE constexpr static simd<T, N> fma(simd<T, N> x, simd<T, N> y, + simd<T, N> z) { return __builtin_elementwise_fma(x, y, z); } template <typename T, size_t N> -LIBC_INLINE constexpr simd<T, N> ceil(simd<T, N> x) { +LIBC_INLINE constexpr static simd<T, N> ceil(simd<T, N> x) { return __builtin_elementwise_ceil(x); } template <typename T, size_t N> -LIBC_INLINE constexpr simd<T, N> floor(simd<T, N> x) { +LIBC_INLINE constexpr static simd<T, N> floor(simd<T, N> x) { return __builtin_elementwise_floor(x); } template <typename T, size_t N> -LIBC_INLINE constexpr simd<T, N> roundeven(simd<T, N> x) { +LIBC_INLINE constexpr static simd<T, N> roundeven(simd<T, N> x) { return __builtin_elementwise_roundeven(x); } template <typename T, size_t N> -LIBC_INLINE constexpr simd<T, N> round(simd<T, N> x) { +LIBC_INLINE constexpr static simd<T, N> round(simd<T, N> x) { return __builtin_elementwise_round(x); } template <typename T, size_t N> -LIBC_INLINE constexpr simd<T, N> trunc(simd<T, N> x) { +LIBC_INLINE constexpr static simd<T, N> trunc(simd<T, N> x) { return __builtin_elementwise_trunc(x); } template <typename T, size_t N> -LIBC_INLINE constexpr simd<T, N> nearbyint(simd<T, N> x) { +LIBC_INLINE constexpr static simd<T, N> nearbyint(simd<T, N> x) { return __builtin_elementwise_nearbyint(x); } template <typename T, size_t N> -LIBC_INLINE constexpr simd<T, N> rint(simd<T, N> x) { +LIBC_INLINE constexpr static simd<T, N> rint(simd<T, N> x) { return __builtin_elementwise_rint(x); } template <typename T, size_t N> -LIBC_INLINE constexpr simd<T, N> canonicalize(simd<T, N> x) { +LIBC_INLINE constexpr static simd<T, N> canonicalize(simd<T, N> x) { return __builtin_elementwise_canonicalize(x); } template <typename T, size_t N> -LIBC_INLINE constexpr simd<T, N> copysign(simd<T, N> x, simd<T, N> y) { +LIBC_INLINE constexpr static simd<T, N> copysign(simd<T, N> x, simd<T, N> y) { return __builtin_elementwise_copysign(x, y); } template <typename T, size_t N> -LIBC_INLINE constexpr simd<T, N> fmod(simd<T, N> x, simd<T, N> y) { +LIBC_INLINE constexpr static simd<T, N> fmod(simd<T, N> x, simd<T, N> y) { return __builtin_elementwise_fmod(x, y); } // Reduction operations. template <typename T, size_t N, typename Op = cpp::plus<>> -LIBC_INLINE constexpr T reduce(simd<T, N> v, Op op = {}) { +LIBC_INLINE constexpr static T reduce(simd<T, N> v, Op op = {}) { return reduce(v, op); } template <typename T, size_t N> -LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::plus<>) { +LIBC_INLINE constexpr static T reduce(simd<T, N> v, cpp::plus<>) { return __builtin_reduce_add(v); } template <typename T, size_t N> -LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::multiplies<>) { +LIBC_INLINE constexpr static T reduce(simd<T, N> v, cpp::multiplies<>) { return __builtin_reduce_mul(v); } template <typename T, size_t N> -LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::bit_and<>) { +LIBC_INLINE constexpr static T reduce(simd<T, N> v, cpp::bit_and<>) { return __builtin_reduce_and(v); } template <typename T, size_t N> -LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::bit_or<>) { +LIBC_INLINE constexpr static T reduce(simd<T, N> v, cpp::bit_or<>) { return __builtin_reduce_or(v); } template <typename T, size_t N> -LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::bit_xor<>) { +LIBC_INLINE constexpr static T reduce(simd<T, N> v, cpp::bit_xor<>) { return __builtin_reduce_xor(v); } -template <typename T, size_t N> LIBC_INLINE constexpr T hmin(simd<T, N> v) { +template <typename T, size_t N> +LIBC_INLINE constexpr static T hmin(simd<T, N> v) { return __builtin_reduce_min(v); } -template <typename T, size_t N> LIBC_INLINE constexpr T hmax(simd<T, N> v) { +template <typename T, size_t N> +LIBC_INLINE constexpr static T hmax(simd<T, N> v) { return __builtin_reduce_max(v); } @@ -242,28 +247,29 @@ LIBC_INLINE enable_if_simd_t<T> masked_store(simd<bool, simd_size_v<T>> m, T v, } // Construction helpers. -template <typename T, size_t N> LIBC_INLINE constexpr simd<T, N> splat(T v) { +template <typename T, size_t N> +LIBC_INLINE constexpr static simd<T, N> splat(T v) { return simd<T, N>(v); } -template <typename T> LIBC_INLINE constexpr simd<T> splat(T v) { +template <typename T> LIBC_INLINE constexpr static simd<T> splat(T v) { return splat<T, simd_size_v<simd<T>>>(v); } template <typename T, unsigned N> -LIBC_INLINE constexpr simd<T, N> iota(T base = T(0), T step = T(1)) { +LIBC_INLINE constexpr static simd<T, N> iota(T base = T(0), T step = T(1)) { simd<T, N> v{}; for (unsigned i = 0; i < N; ++i) v[i] = base + T(i) * step; return v; } template <typename T> -LIBC_INLINE constexpr simd<T> iota(T base = T(0), T step = T(1)) { +LIBC_INLINE constexpr static simd<T> iota(T base = T(0), T step = T(1)) { return iota<T, simd_size_v<simd<T>>>(base, step); } // Conditional helpers. template <typename T, size_t N> -LIBC_INLINE constexpr simd<T, N> select(simd<bool, N> m, simd<T, N> x, - simd<T, N> y) { +LIBC_INLINE constexpr static simd<T, N> select(simd<bool, N> m, simd<T, N> x, + simd<T, N> y) { return m ? x : y; } diff --git a/lldb/include/lldb/Core/Architecture.h b/lldb/include/lldb/Core/Architecture.h index b6fc1a2..ed64a89 100644 --- a/lldb/include/lldb/Core/Architecture.h +++ b/lldb/include/lldb/Core/Architecture.h @@ -12,6 +12,7 @@ #include "lldb/Core/PluginInterface.h" #include "lldb/Target/DynamicRegisterInfo.h" #include "lldb/Target/MemoryTagManager.h" +#include "lldb/Target/RegisterContextUnwind.h" namespace lldb_private { @@ -129,6 +130,14 @@ public: RegisterContext ®_context) const { return false; } + + /// Return an UnwindPlan that allows architecture-defined rules for finding + /// saved registers, given a particular set of register values. + virtual lldb::UnwindPlanSP GetArchitectureUnwindPlan( + lldb_private::Thread &thread, lldb_private::RegisterContextUnwind *regctx, + std::shared_ptr<const UnwindPlan> current_unwindplan) { + return lldb::UnwindPlanSP(); + } }; } // namespace lldb_private diff --git a/lldb/include/lldb/Target/RegisterContextUnwind.h b/lldb/include/lldb/Target/RegisterContextUnwind.h index b10a3648..52c28fd 100644 --- a/lldb/include/lldb/Target/RegisterContextUnwind.h +++ b/lldb/include/lldb/Target/RegisterContextUnwind.h @@ -21,6 +21,7 @@ namespace lldb_private { class UnwindLLDB; +class ArchitectureArm; class RegisterContextUnwind : public lldb_private::RegisterContext { public: @@ -72,6 +73,25 @@ public: // above asynchronous trap handlers (sigtramp) for instance. bool BehavesLikeZerothFrame() const override; +protected: + // Provide a location for where THIS function saved the CALLER's register + // value, or a frame "below" this one saved it. That is, this function doesn't + // modify the register, it may call a function that does & saved it to stack. + // + // The ConcreteRegisterLocation type may be set to eRegisterNotAvailable -- + // this will happen for a volatile register being queried mid-stack. Instead + // of floating frame 0's contents of that register up the stack (which may or + // may not be the value of that reg when the function was executing), we won't + // return any value. + // + // If a non-volatile register (a "preserved" register, a callee-preserved + // register) is requested mid-stack, and no frames "below" the requested stack + // have saved the register anywhere, it is safe to assume that frame 0's + // register value is the same. + lldb_private::UnwindLLDB::RegisterSearchResult SavedLocationForRegister( + uint32_t lldb_regnum, + lldb_private::UnwindLLDB::ConcreteRegisterLocation ®loc); + private: enum FrameType { eNormalFrame, @@ -86,6 +106,8 @@ private: // UnwindLLDB needs to pass around references to ConcreteRegisterLocations friend class UnwindLLDB; + // Architecture may need to retrieve caller register values from this frame + friend class ArchitectureArm; // Returns true if we have an unwind loop -- the same stack frame unwinding // multiple times. @@ -130,27 +152,6 @@ private: void PropagateTrapHandlerFlagFromUnwindPlan( std::shared_ptr<const UnwindPlan> unwind_plan); - // Provide a location for where THIS function saved the CALLER's register - // value - // Or a frame "below" this one saved it, i.e. a function called by this one, - // preserved a register that this - // function didn't modify/use. - // - // The ConcreteRegisterLocation type may be set to eRegisterNotAvailable -- - // this will happen for a volatile register being queried mid-stack. Instead - // of floating frame 0's contents of that register up the stack (which may or - // may not be the value of that reg when the function was executing), we won't - // return any value. - // - // If a non-volatile register (a "preserved" register) is requested mid-stack - // and no frames "below" the requested - // stack have saved the register anywhere, it is safe to assume that frame 0's - // register values are still the same - // as the requesting frame's. - lldb_private::UnwindLLDB::RegisterSearchResult SavedLocationForRegister( - uint32_t lldb_regnum, - lldb_private::UnwindLLDB::ConcreteRegisterLocation ®loc); - std::optional<UnwindPlan::Row::AbstractRegisterLocation> GetAbstractRegisterLocation(uint32_t lldb_regnum, lldb::RegisterKind &kind); @@ -202,6 +203,8 @@ private: std::shared_ptr<const UnwindPlan> GetFullUnwindPlanForFrame(); + lldb::UnwindPlanSP TryAdoptArchitectureUnwindPlan(); + void UnwindLogMsg(const char *fmt, ...) __attribute__((format(printf, 2, 3))); void UnwindLogMsgVerbose(const char *fmt, ...) diff --git a/lldb/include/lldb/Target/StopInfo.h b/lldb/include/lldb/Target/StopInfo.h index 368ec51d..cdd6a6f 100644 --- a/lldb/include/lldb/Target/StopInfo.h +++ b/lldb/include/lldb/Target/StopInfo.h @@ -97,6 +97,12 @@ public: /// and silently continue again one more time. virtual bool WasContinueInterrupted(Thread &thread) { return false; } + virtual uint32_t GetStopReasonDataCount() const { return 0; } + virtual uint64_t GetStopReasonDataAtIndex(uint32_t idx) { + // Handle all the common cases that have no data. + return 0; + } + // Sometimes the thread plan logic will know that it wants a given stop to // stop or not, regardless of what the ordinary logic for that StopInfo would // dictate. The main example of this is the ThreadPlanCallFunction, which diff --git a/lldb/include/lldb/Target/UnwindLLDB.h b/lldb/include/lldb/Target/UnwindLLDB.h index 88180b3..29b3ab9c 100644 --- a/lldb/include/lldb/Target/UnwindLLDB.h +++ b/lldb/include/lldb/Target/UnwindLLDB.h @@ -22,6 +22,7 @@ namespace lldb_private { class RegisterContextUnwind; +class ArchitectureArm; class UnwindLLDB : public lldb_private::Unwind { public: @@ -37,6 +38,7 @@ public: protected: friend class lldb_private::RegisterContextUnwind; + friend class lldb_private::ArchitectureArm; /// An UnwindPlan::Row::AbstractRegisterLocation, combined with the register /// context and memory for a specific stop point, is used to create a diff --git a/lldb/source/API/SBThread.cpp b/lldb/source/API/SBThread.cpp index ec68b2a..4e4aa48 100644 --- a/lldb/source/API/SBThread.cpp +++ b/lldb/source/API/SBThread.cpp @@ -157,52 +157,8 @@ size_t SBThread::GetStopReasonDataCount() { if (exe_ctx) { if (exe_ctx->HasThreadScope()) { StopInfoSP stop_info_sp = exe_ctx->GetThreadPtr()->GetStopInfo(); - if (stop_info_sp) { - StopReason reason = stop_info_sp->GetStopReason(); - switch (reason) { - case eStopReasonInvalid: - case eStopReasonNone: - case eStopReasonTrace: - case eStopReasonExec: - case eStopReasonPlanComplete: - case eStopReasonThreadExiting: - case eStopReasonInstrumentation: - case eStopReasonProcessorTrace: - case eStopReasonVForkDone: - case eStopReasonHistoryBoundary: - // There is no data for these stop reasons. - return 0; - - case eStopReasonBreakpoint: { - break_id_t site_id = stop_info_sp->GetValue(); - lldb::BreakpointSiteSP bp_site_sp( - exe_ctx->GetProcessPtr()->GetBreakpointSiteList().FindByID( - site_id)); - if (bp_site_sp) - return bp_site_sp->GetNumberOfConstituents() * 2; - else - return 0; // Breakpoint must have cleared itself... - } break; - - case eStopReasonWatchpoint: - return 1; - - case eStopReasonSignal: - return 1; - - case eStopReasonInterrupt: - return 1; - - case eStopReasonException: - return 1; - - case eStopReasonFork: - return 1; - - case eStopReasonVFork: - return 1; - } - } + if (stop_info_sp) + return stop_info_sp->GetStopReasonDataCount(); } } else { LLDB_LOG_ERROR(GetLog(LLDBLog::API), exe_ctx.takeError(), "{0}"); @@ -220,63 +176,8 @@ uint64_t SBThread::GetStopReasonDataAtIndex(uint32_t idx) { if (exe_ctx->HasThreadScope()) { Thread *thread = exe_ctx->GetThreadPtr(); StopInfoSP stop_info_sp = thread->GetStopInfo(); - if (stop_info_sp) { - StopReason reason = stop_info_sp->GetStopReason(); - switch (reason) { - case eStopReasonInvalid: - case eStopReasonNone: - case eStopReasonTrace: - case eStopReasonExec: - case eStopReasonPlanComplete: - case eStopReasonThreadExiting: - case eStopReasonInstrumentation: - case eStopReasonProcessorTrace: - case eStopReasonVForkDone: - case eStopReasonHistoryBoundary: - // There is no data for these stop reasons. - return 0; - - case eStopReasonBreakpoint: { - break_id_t site_id = stop_info_sp->GetValue(); - lldb::BreakpointSiteSP bp_site_sp( - exe_ctx->GetProcessPtr()->GetBreakpointSiteList().FindByID( - site_id)); - if (bp_site_sp) { - uint32_t bp_index = idx / 2; - BreakpointLocationSP bp_loc_sp( - bp_site_sp->GetConstituentAtIndex(bp_index)); - if (bp_loc_sp) { - if (idx & 1) { - // Odd idx, return the breakpoint location ID - return bp_loc_sp->GetID(); - } else { - // Even idx, return the breakpoint ID - return bp_loc_sp->GetBreakpoint().GetID(); - } - } - } - return LLDB_INVALID_BREAK_ID; - } break; - - case eStopReasonWatchpoint: - return stop_info_sp->GetValue(); - - case eStopReasonSignal: - return stop_info_sp->GetValue(); - - case eStopReasonInterrupt: - return stop_info_sp->GetValue(); - - case eStopReasonException: - return stop_info_sp->GetValue(); - - case eStopReasonFork: - return stop_info_sp->GetValue(); - - case eStopReasonVFork: - return stop_info_sp->GetValue(); - } - } + if (stop_info_sp) + return stop_info_sp->GetStopReasonDataAtIndex(idx); } } else { LLDB_LOG_ERROR(GetLog(LLDBLog::API), exe_ctx.takeError(), "{0}"); diff --git a/lldb/source/Plugins/ABI/ARM/ABISysV_arm.cpp b/lldb/source/Plugins/ABI/ARM/ABISysV_arm.cpp index 2bcb2c0..bb0c4ba 100644 --- a/lldb/source/Plugins/ABI/ARM/ABISysV_arm.cpp +++ b/lldb/source/Plugins/ABI/ARM/ABISysV_arm.cpp @@ -1921,6 +1921,13 @@ UnwindPlanSP ABISysV_arm::CreateFunctionEntryUnwindPlan() { UnwindPlanSP ABISysV_arm::CreateDefaultUnwindPlan() { // TODO: Handle thumb + // If we had a Target argument, could at least check + // target.GetArchitecture().GetTriple().isArmMClass() + // which is always thumb. + // To handle thumb properly, we'd need to fetch the current + // CPSR state at unwind time to tell if the processor is + // in thumb mode in this stack frame. There's no way to + // express something like that in an UnwindPlan today. uint32_t fp_reg_num = dwarf_r11; uint32_t pc_reg_num = dwarf_pc; diff --git a/lldb/source/Plugins/Architecture/Arm/ArchitectureArm.cpp b/lldb/source/Plugins/Architecture/Arm/ArchitectureArm.cpp index 81c7212..721c4bc 100644 --- a/lldb/source/Plugins/Architecture/Arm/ArchitectureArm.cpp +++ b/lldb/source/Plugins/Architecture/Arm/ArchitectureArm.cpp @@ -9,10 +9,18 @@ #include "Plugins/Architecture/Arm/ArchitectureArm.h" #include "Plugins/Process/Utility/ARMDefines.h" #include "Plugins/Process/Utility/InstructionUtils.h" +#include "Utility/ARM_DWARF_Registers.h" #include "lldb/Core/PluginManager.h" +#include "lldb/Symbol/UnwindPlan.h" +#include "lldb/Target/Process.h" #include "lldb/Target/RegisterContext.h" +#include "lldb/Target/RegisterNumber.h" #include "lldb/Target/Thread.h" +#include "lldb/Target/UnwindLLDB.h" #include "lldb/Utility/ArchSpec.h" +#include "lldb/Utility/LLDBLog.h" +#include "lldb/Utility/Log.h" +#include "lldb/Utility/RegisterValue.h" using namespace lldb_private; using namespace lldb; @@ -150,3 +158,181 @@ addr_t ArchitectureArm::GetOpcodeLoadAddress(addr_t opcode_addr, } return opcode_addr & ~(1ull); } + +// The ARM M-Profile Armv7-M Architecture Reference Manual, +// subsection "B1.5 Armv7-M exception model", see the parts +// describing "Exception entry behavior" and "Exception +// return behavior". +// When an exception happens on this processor, certain registers are +// saved below the stack pointer, the stack pointer is decremented, +// a special value is put in the link register to indicate the +// exception has been taken, and an exception handler function +// is invoked. +// +// Detect that special value in $lr, and if present, add +// unwind rules for the registers that were saved above this +// stack frame's CFA. Overwrite any register locations that +// the current_unwindplan has for these registers; they are +// not correct when we're invoked this way. +UnwindPlanSP ArchitectureArm::GetArchitectureUnwindPlan( + Thread &thread, RegisterContextUnwind *regctx, + std::shared_ptr<const UnwindPlan> current_unwindplan) { + + ProcessSP process_sp = thread.GetProcess(); + if (!process_sp) + return {}; + + const ArchSpec arch = process_sp->GetTarget().GetArchitecture(); + if (!arch.GetTriple().isArmMClass() || arch.GetAddressByteSize() != 4) + return {}; + + // Get the caller's LR value from regctx (the LR value + // at function entry to this function). + RegisterNumber ra_regnum(thread, eRegisterKindGeneric, + LLDB_REGNUM_GENERIC_RA); + uint32_t ra_regnum_lldb = ra_regnum.GetAsKind(eRegisterKindLLDB); + + if (ra_regnum_lldb == LLDB_INVALID_REGNUM) + return {}; + + UnwindLLDB::ConcreteRegisterLocation regloc = {}; + bool got_concrete_location = false; + if (regctx->SavedLocationForRegister(ra_regnum_lldb, regloc) == + UnwindLLDB::RegisterSearchResult::eRegisterFound) { + got_concrete_location = true; + } else { + RegisterNumber pc_regnum(thread, eRegisterKindGeneric, + LLDB_REGNUM_GENERIC_PC); + uint32_t pc_regnum_lldb = pc_regnum.GetAsKind(eRegisterKindLLDB); + if (regctx->SavedLocationForRegister(pc_regnum_lldb, regloc) == + UnwindLLDB::RegisterSearchResult::eRegisterFound) + got_concrete_location = true; + } + + if (!got_concrete_location) + return {}; + + addr_t callers_return_address = LLDB_INVALID_ADDRESS; + const RegisterInfo *reg_info = regctx->GetRegisterInfoAtIndex(ra_regnum_lldb); + if (reg_info) { + RegisterValue reg_value; + if (regctx->ReadRegisterValueFromRegisterLocation(regloc, reg_info, + reg_value)) { + callers_return_address = reg_value.GetAsUInt32(); + } + } + + if (callers_return_address == LLDB_INVALID_ADDRESS) + return {}; + + // ARMv7-M ARM says that the LR will be set to + // one of these values when an exception has taken + // place: + // if HaveFPExt() then + // if CurrentMode==Mode_Handler then + // LR = Ones(27):NOT(CONTROL.FPCA):'0001'; + // else + // LR = Ones(27):NOT(CONTROL.FPCA):'1':CONTROL.SPSEL:'01'; + // else + // if CurrentMode==Mode_Handler then + // LR = Ones(28):'0001'; + // else + // LR = Ones(29):CONTROL.SPSEL:'01'; + + // Top 27 bits are set for an exception return. + const uint32_t exception_return = -1U & ~0b11111U; + // Bit4 is 1 if only GPRs were saved. + const uint32_t gprs_only = 0b10000; + // Bit<1:0> are '01'. + const uint32_t lowbits = 0b01; + + if ((callers_return_address & exception_return) != exception_return) + return {}; + if ((callers_return_address & lowbits) != lowbits) + return {}; + + const bool fp_regs_saved = !(callers_return_address & gprs_only); + + const RegisterKind plan_regkind = current_unwindplan->GetRegisterKind(); + UnwindPlanSP new_plan = std::make_shared<UnwindPlan>(plan_regkind); + new_plan->SetSourceName("Arm Cortex-M exception return UnwindPlan"); + new_plan->SetSourcedFromCompiler(eLazyBoolNo); + new_plan->SetUnwindPlanValidAtAllInstructions(eLazyBoolYes); + new_plan->SetUnwindPlanForSignalTrap(eLazyBoolYes); + + int stored_regs_size = fp_regs_saved ? 0x68 : 0x20; + + uint32_t gpr_regs[] = {dwarf_r0, dwarf_r1, dwarf_r2, dwarf_r3, + dwarf_r12, dwarf_lr, dwarf_pc, dwarf_cpsr}; + const int gpr_reg_count = std::size(gpr_regs); + uint32_t fpr_regs[] = {dwarf_s0, dwarf_s1, dwarf_s2, dwarf_s3, + dwarf_s4, dwarf_s5, dwarf_s6, dwarf_s7, + dwarf_s8, dwarf_s9, dwarf_s10, dwarf_s11, + dwarf_s12, dwarf_s13, dwarf_s14, dwarf_s15}; + const int fpr_reg_count = std::size(fpr_regs); + + RegisterContextSP reg_ctx_sp = thread.GetRegisterContext(); + std::vector<uint32_t> saved_regs; + for (int i = 0; i < gpr_reg_count; i++) { + uint32_t regno = gpr_regs[i]; + reg_ctx_sp->ConvertBetweenRegisterKinds(eRegisterKindDWARF, gpr_regs[i], + plan_regkind, regno); + saved_regs.push_back(regno); + } + if (fp_regs_saved) { + for (int i = 0; i < fpr_reg_count; i++) { + uint32_t regno = fpr_regs[i]; + reg_ctx_sp->ConvertBetweenRegisterKinds(eRegisterKindDWARF, fpr_regs[i], + plan_regkind, regno); + saved_regs.push_back(regno); + } + } + + addr_t cfa; + if (!regctx->GetCFA(cfa)) + return {}; + + // The CPSR value saved to stack is actually (from Armv7-M ARM) + // "XPSR<31:10>:frameptralign:XPSR<8:0>" + // Bit 9 indicates that the stack pointer was aligned (to + // an 8-byte alignment) when the exception happened, and we must + // account for that when restoring the original stack pointer value. + Status error; + uint32_t callers_xPSR = + process_sp->ReadUnsignedIntegerFromMemory(cfa + 0x1c, 4, 0, error); + const bool align_stack = callers_xPSR & (1U << 9); + uint32_t callers_sp = cfa + stored_regs_size; + if (align_stack) + callers_sp |= 4; + + Log *log = GetLog(LLDBLog::Unwind); + LLDB_LOGF(log, + "ArchitectureArm::GetArchitectureUnwindPlan found caller return " + "addr of 0x%" PRIx64 ", for frame with CFA 0x%" PRIx64 + ", fp_regs_saved %d, stored_regs_size 0x%x, align stack %d", + callers_return_address, cfa, fp_regs_saved, stored_regs_size, + align_stack); + + uint32_t sp_regnum = dwarf_sp; + reg_ctx_sp->ConvertBetweenRegisterKinds(eRegisterKindDWARF, dwarf_sp, + plan_regkind, sp_regnum); + + const int row_count = current_unwindplan->GetRowCount(); + for (int i = 0; i < row_count; i++) { + UnwindPlan::Row row = *current_unwindplan->GetRowAtIndex(i); + uint32_t offset = 0; + const size_t saved_reg_count = saved_regs.size(); + for (size_t j = 0; j < saved_reg_count; j++) { + // The locations could be set with + // SetRegisterLocationToIsConstant(regno, cfa+offset) + // expressing it in terms of CFA addr+offset - this UnwindPlan + // is only used once, with this specific CFA. I'm not sure + // which will be clearer for someone reading the unwind log. + row.SetRegisterLocationToAtCFAPlusOffset(saved_regs[j], offset, true); + offset += 4; + } + row.SetRegisterLocationToIsCFAPlusOffset(sp_regnum, callers_sp - cfa, true); + new_plan->AppendRow(row); + } + return new_plan; +} diff --git a/lldb/source/Plugins/Architecture/Arm/ArchitectureArm.h b/lldb/source/Plugins/Architecture/Arm/ArchitectureArm.h index f579d6b..52277dc 100644 --- a/lldb/source/Plugins/Architecture/Arm/ArchitectureArm.h +++ b/lldb/source/Plugins/Architecture/Arm/ArchitectureArm.h @@ -10,6 +10,7 @@ #define LLDB_SOURCE_PLUGINS_ARCHITECTURE_ARM_ARCHITECTUREARM_H #include "lldb/Core/Architecture.h" +#include "lldb/Target/Thread.h" namespace lldb_private { @@ -29,6 +30,10 @@ public: lldb::addr_t GetOpcodeLoadAddress(lldb::addr_t load_addr, AddressClass addr_class) const override; + lldb::UnwindPlanSP GetArchitectureUnwindPlan( + lldb_private::Thread &thread, lldb_private::RegisterContextUnwind *regctx, + std::shared_ptr<const UnwindPlan> current_unwindplan) override; + private: static std::unique_ptr<Architecture> Create(const ArchSpec &arch); ArchitectureArm() = default; diff --git a/lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.cpp b/lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.cpp index cb8ba05..69885aa 100644 --- a/lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.cpp +++ b/lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.cpp @@ -12,6 +12,7 @@ #include "lldb/Core/PluginManager.h" #include "lldb/Core/Section.h" #include "lldb/Symbol/Symbol.h" +#include "lldb/Target/Target.h" #include "lldb/Utility/LLDBLog.h" #include "lldb/Utility/Log.h" #include "llvm/ADT/DenseSet.h" @@ -233,6 +234,40 @@ void ObjectFileJSON::CreateSections(SectionList &unified_section_list) { } } +bool ObjectFileJSON::SetLoadAddress(Target &target, lldb::addr_t value, + bool value_is_offset) { + Log *log(GetLog(LLDBLog::DynamicLoader)); + if (!m_sections_up) + return true; + + addr_t slide = value; + if (!value_is_offset) { + addr_t lowest_addr = LLDB_INVALID_ADDRESS; + for (const SectionSP §ion_sp : *m_sections_up) { + addr_t section_load_addr = section_sp->GetFileAddress(); + lowest_addr = std::min(lowest_addr, section_load_addr); + } + if (lowest_addr == LLDB_INVALID_ADDRESS) + return false; + slide = value - lowest_addr; + } + + // Apply slide to each section's file address. + for (const SectionSP §ion_sp : *m_sections_up) { + addr_t section_load_addr = section_sp->GetFileAddress(); + if (section_load_addr != LLDB_INVALID_ADDRESS) { + LLDB_LOGF( + log, + "ObjectFileJSON::SetLoadAddress section %s to load addr 0x%" PRIx64, + section_sp->GetName().AsCString(), section_load_addr + slide); + target.SetSectionLoadAddress(section_sp, section_load_addr + slide, + /*warn_multiple=*/true); + } + } + + return true; +} + bool ObjectFileJSON::MagicBytesMatch(DataBufferSP data_sp, lldb::addr_t data_offset, lldb::addr_t data_length) { diff --git a/lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.h b/lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.h index b72565f..029c8ff 100644 --- a/lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.h +++ b/lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.h @@ -86,6 +86,9 @@ public: Strata CalculateStrata() override { return eStrataUser; } + bool SetLoadAddress(Target &target, lldb::addr_t value, + bool value_is_offset) override; + static bool MagicBytesMatch(lldb::DataBufferSP data_sp, lldb::addr_t offset, lldb::addr_t length); diff --git a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp index 6037c8d..a780b3f 100644 --- a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp +++ b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp @@ -799,6 +799,23 @@ Status ProcessMachCore::DoGetMemoryRegionInfo(addr_t load_addr, region_info.SetMapped(MemoryRegionInfo::eNo); } return Status(); + } else { + // The corefile has no LC_SEGMENT at this virtual address, + // but see if there is a binary whose Section has been + // loaded at that address in the current Target. + Address addr; + if (GetTarget().ResolveLoadAddress(load_addr, addr)) { + SectionSP section_sp(addr.GetSection()); + if (section_sp) { + region_info.GetRange().SetRangeBase( + section_sp->GetLoadBaseAddress(&GetTarget())); + region_info.GetRange().SetByteSize(section_sp->GetByteSize()); + if (region_info.GetRange().Contains(load_addr)) { + region_info.SetLLDBPermissions(section_sp->GetPermissions()); + return Status(); + } + } + } } region_info.GetRange().SetRangeBase(load_addr); diff --git a/lldb/source/Target/RegisterContextUnwind.cpp b/lldb/source/Target/RegisterContextUnwind.cpp index bcf1297..787eb94 100644 --- a/lldb/source/Target/RegisterContextUnwind.cpp +++ b/lldb/source/Target/RegisterContextUnwind.cpp @@ -293,6 +293,9 @@ void RegisterContextUnwind::InitializeZerothFrame() { return; } + // Give the Architecture a chance to replace the UnwindPlan. + TryAdoptArchitectureUnwindPlan(); + UnwindLogMsg("initialized frame current pc is 0x%" PRIx64 " cfa is 0x%" PRIx64 " afa is 0x%" PRIx64 " using %s UnwindPlan", (uint64_t)m_current_pc.GetLoadAddress(exe_ctx.GetTargetPtr()), @@ -482,6 +485,9 @@ void RegisterContextUnwind::InitializeNonZerothFrame() { } } + // Give the Architecture a chance to replace the UnwindPlan. + TryAdoptArchitectureUnwindPlan(); + UnwindLogMsg("initialized frame cfa is 0x%" PRIx64 " afa is 0x%" PRIx64, (uint64_t)m_cfa, (uint64_t)m_afa); return; @@ -686,6 +692,9 @@ void RegisterContextUnwind::InitializeNonZerothFrame() { } } + // Give the Architecture a chance to replace the UnwindPlan. + TryAdoptArchitectureUnwindPlan(); + UnwindLogMsg("initialized frame current pc is 0x%" PRIx64 " cfa is 0x%" PRIx64 " afa is 0x%" PRIx64, (uint64_t)m_current_pc.GetLoadAddress(exe_ctx.GetTargetPtr()), @@ -1717,6 +1726,41 @@ RegisterContextUnwind::SavedLocationForRegister( return UnwindLLDB::RegisterSearchResult::eRegisterNotFound; } +UnwindPlanSP RegisterContextUnwind::TryAdoptArchitectureUnwindPlan() { + if (!m_full_unwind_plan_sp) + return {}; + ProcessSP process_sp = m_thread.GetProcess(); + if (!process_sp) + return {}; + + UnwindPlanSP arch_override_plan_sp; + if (Architecture *arch = process_sp->GetTarget().GetArchitecturePlugin()) + arch_override_plan_sp = + arch->GetArchitectureUnwindPlan(m_thread, this, m_full_unwind_plan_sp); + + if (arch_override_plan_sp) { + m_full_unwind_plan_sp = arch_override_plan_sp; + PropagateTrapHandlerFlagFromUnwindPlan(m_full_unwind_plan_sp); + m_registers.clear(); + if (GetLog(LLDBLog::Unwind)) { + UnwindLogMsg( + "Replacing Full Unwindplan with Architecture UnwindPlan, '%s'", + m_full_unwind_plan_sp->GetSourceName().AsCString()); + const UnwindPlan::Row *active_row = + m_full_unwind_plan_sp->GetRowForFunctionOffset(m_current_offset); + if (active_row) { + StreamString active_row_strm; + active_row->Dump(active_row_strm, m_full_unwind_plan_sp.get(), + &m_thread, + m_start_pc.GetLoadAddress(&process_sp->GetTarget())); + UnwindLogMsg("%s", active_row_strm.GetData()); + } + } + } + + return {}; +} + // TryFallbackUnwindPlan() -- this method is a little tricky. // // When this is called, the frame above -- the caller frame, the "previous" diff --git a/lldb/source/Target/StopInfo.cpp b/lldb/source/Target/StopInfo.cpp index ddf8c62..f47dae2 100644 --- a/lldb/source/Target/StopInfo.cpp +++ b/lldb/source/Target/StopInfo.cpp @@ -108,8 +108,7 @@ public: void StoreBPInfo() { ThreadSP thread_sp(m_thread_wp.lock()); if (thread_sp) { - BreakpointSiteSP bp_site_sp( - thread_sp->GetProcess()->GetBreakpointSiteList().FindByID(m_value)); + BreakpointSiteSP bp_site_sp = GetBreakpointSiteSP(); if (bp_site_sp) { uint32_t num_constituents = bp_site_sp->GetNumberOfConstituents(); if (num_constituents == 1) { @@ -139,8 +138,7 @@ public: bool IsValidForOperatingSystemThread(Thread &thread) override { ProcessSP process_sp(thread.GetProcess()); if (process_sp) { - BreakpointSiteSP bp_site_sp( - process_sp->GetBreakpointSiteList().FindByID(m_value)); + BreakpointSiteSP bp_site_sp = GetBreakpointSiteSP(); if (bp_site_sp) return bp_site_sp->ValidForThisThread(thread); } @@ -154,8 +152,7 @@ public: if (thread_sp) { if (!m_should_stop_is_valid) { // Only check once if we should stop at a breakpoint - BreakpointSiteSP bp_site_sp( - thread_sp->GetProcess()->GetBreakpointSiteList().FindByID(m_value)); + BreakpointSiteSP bp_site_sp = GetBreakpointSiteSP(); if (bp_site_sp) { ExecutionContext exe_ctx(thread_sp->GetStackFrameAtIndex(0)); StoppointCallbackContext context(event_ptr, exe_ctx, true); @@ -186,8 +183,7 @@ public: if (m_description.empty()) { ThreadSP thread_sp(m_thread_wp.lock()); if (thread_sp) { - BreakpointSiteSP bp_site_sp( - thread_sp->GetProcess()->GetBreakpointSiteList().FindByID(m_value)); + BreakpointSiteSP bp_site_sp = GetBreakpointSiteSP(); if (bp_site_sp) { StreamString strm; // If we have just hit an internal breakpoint, and it has a kind @@ -247,6 +243,35 @@ public: return m_description.c_str(); } + uint32_t GetStopReasonDataCount() const override { + lldb::BreakpointSiteSP bp_site_sp = GetBreakpointSiteSP(); + if (bp_site_sp) + return bp_site_sp->GetNumberOfConstituents() * 2; + return 0; // Breakpoint must have cleared itself... + } + + uint64_t GetStopReasonDataAtIndex(uint32_t idx) override { + lldb::BreakpointSiteSP bp_site_sp = GetBreakpointSiteSP(); + if (bp_site_sp) { + uint32_t bp_index = idx / 2; + BreakpointLocationSP bp_loc_sp( + bp_site_sp->GetConstituentAtIndex(bp_index)); + if (bp_loc_sp) { + if (idx & 1) { + // FIXME: This might be a Facade breakpoint, so we need to fetch + // the one that the thread actually hit, not the native loc ID. + + // Odd idx, return the breakpoint location ID + return bp_loc_sp->GetID(); + } else { + // Even idx, return the breakpoint ID + return bp_loc_sp->GetBreakpoint().GetID(); + } + } + } + return LLDB_INVALID_BREAK_ID; + } + std::optional<uint32_t> GetSuggestedStackFrameIndex(bool inlined_stack) override { if (!inlined_stack) @@ -255,8 +280,7 @@ public: ThreadSP thread_sp(m_thread_wp.lock()); if (!thread_sp) return {}; - BreakpointSiteSP bp_site_sp( - thread_sp->GetProcess()->GetBreakpointSiteList().FindByID(m_value)); + BreakpointSiteSP bp_site_sp = GetBreakpointSiteSP(); if (!bp_site_sp) return {}; @@ -297,8 +321,7 @@ protected: return; } - BreakpointSiteSP bp_site_sp( - thread_sp->GetProcess()->GetBreakpointSiteList().FindByID(m_value)); + BreakpointSiteSP bp_site_sp = GetBreakpointSiteSP(); std::unordered_set<break_id_t> precondition_breakpoints; // Breakpoints that fail their condition check are not considered to // have been hit. If the only locations at this site have failed their @@ -629,6 +652,20 @@ protected: } private: + BreakpointSiteSP GetBreakpointSiteSP() const { + if (m_value == LLDB_INVALID_BREAK_ID) + return {}; + + ThreadSP thread_sp = GetThread(); + if (!thread_sp) + return {}; + ProcessSP process_sp = thread_sp->GetProcess(); + if (!process_sp) + return {}; + + return process_sp->GetBreakpointSiteList().FindByID(m_value); + } + bool m_should_stop; bool m_should_stop_is_valid; bool m_should_perform_action; // Since we are trying to preserve the "state" @@ -699,6 +736,13 @@ public: StopReason GetStopReason() const override { return eStopReasonWatchpoint; } + uint32_t GetStopReasonDataCount() const override { return 1; } + uint64_t GetStopReasonDataAtIndex(uint32_t idx) override { + if (idx == 0) + return GetValue(); + return 0; + } + const char *GetDescription() override { if (m_description.empty()) { StreamString strm; @@ -1139,6 +1183,13 @@ public: bool ShouldSelect() const override { return IsShouldStopSignal(); } + uint32_t GetStopReasonDataCount() const override { return 1; } + uint64_t GetStopReasonDataAtIndex(uint32_t idx) override { + if (idx == 0) + return GetValue(); + return 0; + } + private: // In siginfo_t terms, if m_value is si_signo, m_code is si_code. std::optional<int> m_code; @@ -1171,6 +1222,14 @@ public: } return m_description.c_str(); } + + uint32_t GetStopReasonDataCount() const override { return 1; } + uint64_t GetStopReasonDataAtIndex(uint32_t idx) override { + if (idx == 0) + return GetValue(); + else + return 0; + } }; // StopInfoTrace @@ -1249,6 +1308,13 @@ public: else return m_description.c_str(); } + uint32_t GetStopReasonDataCount() const override { return 1; } + uint64_t GetStopReasonDataAtIndex(uint32_t idx) override { + if (idx == 0) + return GetValue(); + else + return 0; + } }; // StopInfoProcessorTrace @@ -1390,6 +1456,14 @@ public: const char *GetDescription() override { return "fork"; } + uint32_t GetStopReasonDataCount() const override { return 1; } + uint64_t GetStopReasonDataAtIndex(uint32_t idx) override { + if (idx == 0) + return GetValue(); + else + return 0; + } + protected: void PerformAction(Event *event_ptr) override { // Only perform the action once @@ -1424,6 +1498,13 @@ public: const char *GetDescription() override { return "vfork"; } + uint32_t GetStopReasonDataCount() const override { return 1; } + uint64_t GetStopReasonDataAtIndex(uint32_t idx) override { + if (idx == 0) + return GetValue(); + return 0; + } + protected: void PerformAction(Event *event_ptr) override { // Only perform the action once diff --git a/lldb/test/API/commands/expression/import-std-module/array/TestArrayFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/array/TestArrayFromStdModule.py index a488276..ed028a1 100644 --- a/lldb/test/API/commands/expression/import-std-module/array/TestArrayFromStdModule.py +++ b/lldb/test/API/commands/expression/import-std-module/array/TestArrayFromStdModule.py @@ -11,7 +11,6 @@ class TestCase(TestBase): @add_test_categories(["libc++"]) @skipIf(compiler=no_match("clang")) @skipIf(macos_version=["<", "15.0"]) - @skipUnlessDarwin def test(self): self.build() diff --git a/lldb/test/API/commands/expression/import-std-module/deque-basic/TestDequeFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/deque-basic/TestDequeFromStdModule.py index 38b8508..0fb6e88 100644 --- a/lldb/test/API/commands/expression/import-std-module/deque-basic/TestDequeFromStdModule.py +++ b/lldb/test/API/commands/expression/import-std-module/deque-basic/TestDequeFromStdModule.py @@ -11,7 +11,6 @@ class TestBasicDeque(TestBase): @add_test_categories(["libc++"]) @skipIf(compiler=no_match("clang")) @skipIf(macos_version=["<", "15.0"]) - @skipUnlessDarwin def test(self): self.build() diff --git a/lldb/test/API/commands/expression/import-std-module/deque-dbg-info-content/TestDbgInfoContentDequeFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/deque-dbg-info-content/TestDbgInfoContentDequeFromStdModule.py index 85eaa8f..e631a87 100644 --- a/lldb/test/API/commands/expression/import-std-module/deque-dbg-info-content/TestDbgInfoContentDequeFromStdModule.py +++ b/lldb/test/API/commands/expression/import-std-module/deque-dbg-info-content/TestDbgInfoContentDequeFromStdModule.py @@ -12,7 +12,6 @@ class TestDbgInfoContentDeque(TestBase): @skipIf(compiler=no_match("clang")) @skipIf(compiler="clang", compiler_version=["<", "18.0"]) @skipIf(macos_version=["<", "15.0"]) - @skipUnlessDarwin def test(self): self.build() diff --git a/lldb/test/API/commands/expression/import-std-module/forward_list-dbg-info-content/TestDbgInfoContentForwardListFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/forward_list-dbg-info-content/TestDbgInfoContentForwardListFromStdModule.py index a3a409d..1d0f9ccf 100644 --- a/lldb/test/API/commands/expression/import-std-module/forward_list-dbg-info-content/TestDbgInfoContentForwardListFromStdModule.py +++ b/lldb/test/API/commands/expression/import-std-module/forward_list-dbg-info-content/TestDbgInfoContentForwardListFromStdModule.py @@ -11,7 +11,6 @@ class TestDbgInfoContentForwardList(TestBase): @add_test_categories(["libc++"]) @skipIf(compiler=no_match("clang")) @skipIf(macos_version=["<", "15.0"]) - @skipUnlessDarwin def test(self): self.build() diff --git a/lldb/test/API/commands/expression/import-std-module/forward_list/TestForwardListFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/forward_list/TestForwardListFromStdModule.py index c9f4a15..a6ba081 100644 --- a/lldb/test/API/commands/expression/import-std-module/forward_list/TestForwardListFromStdModule.py +++ b/lldb/test/API/commands/expression/import-std-module/forward_list/TestForwardListFromStdModule.py @@ -11,7 +11,6 @@ class TestBasicForwardList(TestBase): @add_test_categories(["libc++"]) @skipIf(compiler=no_match("clang")) @skipIf(macos_version=["<", "15.0"]) - @skipUnlessDarwin def test(self): self.build() diff --git a/lldb/test/API/commands/expression/import-std-module/iterator/TestIteratorFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/iterator/TestIteratorFromStdModule.py index 5c82ac3..370c367 100644 --- a/lldb/test/API/commands/expression/import-std-module/iterator/TestIteratorFromStdModule.py +++ b/lldb/test/API/commands/expression/import-std-module/iterator/TestIteratorFromStdModule.py @@ -11,7 +11,6 @@ class TestCase(TestBase): @add_test_categories(["libc++"]) @skipIf(compiler=no_match("clang")) @skipIf(macos_version=["<", "15.0"]) - @skipUnlessDarwin def test(self): self.build() diff --git a/lldb/test/API/commands/expression/import-std-module/list-dbg-info-content/TestDbgInfoContentListFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/list-dbg-info-content/TestDbgInfoContentListFromStdModule.py index 0ecc244..b26bd7d 100644 --- a/lldb/test/API/commands/expression/import-std-module/list-dbg-info-content/TestDbgInfoContentListFromStdModule.py +++ b/lldb/test/API/commands/expression/import-std-module/list-dbg-info-content/TestDbgInfoContentListFromStdModule.py @@ -13,7 +13,6 @@ class TestDbgInfoContentList(TestBase): @skipIf(compiler=no_match("clang")) @skipIf(compiler="clang", compiler_version=["<", "12.0"]) @skipIf(macos_version=["<", "15.0"]) - @skipUnlessDarwin def test(self): self.build() diff --git a/lldb/test/API/commands/expression/import-std-module/list/TestListFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/list/TestListFromStdModule.py index f29f353..6253a35 100644 --- a/lldb/test/API/commands/expression/import-std-module/list/TestListFromStdModule.py +++ b/lldb/test/API/commands/expression/import-std-module/list/TestListFromStdModule.py @@ -11,7 +11,6 @@ class TestBasicList(TestBase): @add_test_categories(["libc++"]) @skipIf(compiler=no_match("clang")) @skipIf(macos_version=["<", "15.0"]) - @skipUnlessDarwin def test(self): self.build() diff --git a/lldb/test/API/commands/expression/import-std-module/non-module-type-separation/TestNonModuleTypeSeparation.py b/lldb/test/API/commands/expression/import-std-module/non-module-type-separation/TestNonModuleTypeSeparation.py index 5e0ab48..cc91ddc 100644 --- a/lldb/test/API/commands/expression/import-std-module/non-module-type-separation/TestNonModuleTypeSeparation.py +++ b/lldb/test/API/commands/expression/import-std-module/non-module-type-separation/TestNonModuleTypeSeparation.py @@ -12,7 +12,6 @@ class TestCase(TestBase): @add_test_categories(["libc++"]) @skipIf(compiler=no_match("clang")) @skipIf(macos_version=["<", "15.0"]) - @skipUnlessDarwin def test(self): """ This test is creating ValueObjects with both C++ module and debug diff --git a/lldb/test/API/commands/expression/import-std-module/shared_ptr-dbg-info-content/TestSharedPtrDbgInfoContentFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/shared_ptr-dbg-info-content/TestSharedPtrDbgInfoContentFromStdModule.py index 50419b6..5bfdb9b 100644 --- a/lldb/test/API/commands/expression/import-std-module/shared_ptr-dbg-info-content/TestSharedPtrDbgInfoContentFromStdModule.py +++ b/lldb/test/API/commands/expression/import-std-module/shared_ptr-dbg-info-content/TestSharedPtrDbgInfoContentFromStdModule.py @@ -11,7 +11,6 @@ class TestSharedPtrDbgInfoContent(TestBase): @add_test_categories(["libc++"]) @skipIf(compiler=no_match("clang")) @skipIf(macos_version=["<", "15.0"]) - @skipUnlessDarwin def test(self): self.build() diff --git a/lldb/test/API/commands/expression/import-std-module/shared_ptr/TestSharedPtrFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/shared_ptr/TestSharedPtrFromStdModule.py index 9f04361..da86466 100644 --- a/lldb/test/API/commands/expression/import-std-module/shared_ptr/TestSharedPtrFromStdModule.py +++ b/lldb/test/API/commands/expression/import-std-module/shared_ptr/TestSharedPtrFromStdModule.py @@ -10,9 +10,8 @@ from lldbsuite.test import lldbutil class TestSharedPtr(TestBase): @add_test_categories(["libc++"]) @skipIf(compiler=no_match("clang")) - @skipIf(compiler="clang", compiler_version=["<", "17.0"]) @skipIf(macos_version=["<", "15.0"]) - @skipUnlessDarwin + @skipIf(compiler="clang", compiler_version=["<", "17.0"]) def test(self): self.build() diff --git a/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py index ba4df40..1c32222 100644 --- a/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py +++ b/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py @@ -14,7 +14,6 @@ class TestDbgInfoContentVector(TestBase): @skipIf(compiler="clang", compiler_version=["<", "12.0"]) @skipIf(macos_version=["<", "14.0"]) @skipIfDarwin # https://github.com/llvm/llvm-project/issues/106475 - @skipIfLinux def test(self): self.build() diff --git a/lldb/test/API/commands/expression/import-std-module/vector-of-vectors/TestVectorOfVectorsFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/vector-of-vectors/TestVectorOfVectorsFromStdModule.py index 6fa9dd5..2cddce0 100644 --- a/lldb/test/API/commands/expression/import-std-module/vector-of-vectors/TestVectorOfVectorsFromStdModule.py +++ b/lldb/test/API/commands/expression/import-std-module/vector-of-vectors/TestVectorOfVectorsFromStdModule.py @@ -11,7 +11,6 @@ class TestVectorOfVectors(TestBase): @add_test_categories(["libc++"]) @skipIf(compiler=no_match("clang")) @skipIf(macos_version=["<", "15.0"]) - @skipUnlessDarwin def test(self): self.build() diff --git a/lldb/test/API/commands/expression/import-std-module/weak_ptr-dbg-info-content/TestDbgInfoContentWeakPtrFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/weak_ptr-dbg-info-content/TestDbgInfoContentWeakPtrFromStdModule.py index 19d24c1..28edf19 100644 --- a/lldb/test/API/commands/expression/import-std-module/weak_ptr-dbg-info-content/TestDbgInfoContentWeakPtrFromStdModule.py +++ b/lldb/test/API/commands/expression/import-std-module/weak_ptr-dbg-info-content/TestDbgInfoContentWeakPtrFromStdModule.py @@ -12,7 +12,6 @@ class TestDbgInfoContentWeakPtr(TestBase): @skipIf(compiler=no_match("clang")) @skipIf(compiler="clang", compiler_version=["<", "17.0"]) @skipIf(macos_version=["<", "15.0"]) - @skipUnlessDarwin def test(self): self.build() diff --git a/lldb/test/API/commands/expression/import-std-module/weak_ptr/TestWeakPtrFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/weak_ptr/TestWeakPtrFromStdModule.py index e3cc9b9..f0a0a46 100644 --- a/lldb/test/API/commands/expression/import-std-module/weak_ptr/TestWeakPtrFromStdModule.py +++ b/lldb/test/API/commands/expression/import-std-module/weak_ptr/TestWeakPtrFromStdModule.py @@ -12,7 +12,6 @@ class TestSharedPtr(TestBase): @skipIf(compiler=no_match("clang")) @skipIf(compiler="clang", compiler_version=["<", "17.0"]) @skipIf(macos_version=["<", "15.0"]) - @skipUnlessDarwin def test(self): self.build() diff --git a/lldb/test/API/functionalities/postmortem/elf-core/gcore/TestGCore.py b/lldb/test/API/functionalities/postmortem/elf-core/gcore/TestGCore.py index 020a226..497b8e8 100644 --- a/lldb/test/API/functionalities/postmortem/elf-core/gcore/TestGCore.py +++ b/lldb/test/API/functionalities/postmortem/elf-core/gcore/TestGCore.py @@ -37,7 +37,7 @@ class GCoreTestCase(TestBase): for thread in process: reason = thread.GetStopReason() self.assertStopReason(reason, lldb.eStopReasonSignal) - signal = thread.GetStopReasonDataAtIndex(1) + signal = thread.GetStopReasonDataAtIndex(0) # Check we got signal 19 (SIGSTOP) self.assertEqual(signal, 19) diff --git a/lldb/test/API/functionalities/postmortem/elf-core/thread_crash/TestLinuxCoreThreads.py b/lldb/test/API/functionalities/postmortem/elf-core/thread_crash/TestLinuxCoreThreads.py index 4a848d1..6d9aef2 100644 --- a/lldb/test/API/functionalities/postmortem/elf-core/thread_crash/TestLinuxCoreThreads.py +++ b/lldb/test/API/functionalities/postmortem/elf-core/thread_crash/TestLinuxCoreThreads.py @@ -91,7 +91,7 @@ class LinuxCoreThreadsTestCase(TestBase): reason = thread.GetStopReason() if thread.GetThreadID() == tid: self.assertStopReason(reason, lldb.eStopReasonSignal) - signal = thread.GetStopReasonDataAtIndex(1) + signal = thread.GetStopReasonDataAtIndex(0) # Check we got signal 4 (SIGILL) self.assertEqual(signal, 4) else: diff --git a/lldb/test/API/functionalities/unwind/cortex-m-exception/Makefile b/lldb/test/API/functionalities/unwind/cortex-m-exception/Makefile new file mode 100644 index 0000000..22f1051 --- /dev/null +++ b/lldb/test/API/functionalities/unwind/cortex-m-exception/Makefile @@ -0,0 +1 @@ +include Makefile.rules diff --git a/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py b/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py new file mode 100644 index 0000000..267f8c8 --- /dev/null +++ b/lldb/test/API/functionalities/unwind/cortex-m-exception/TestCortexMExceptionUnwind.py @@ -0,0 +1,56 @@ +""" +Test that we can backtrace up an ARM Cortex-M Exception return stack +""" + +import lldb +import json +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class TestCortexMExceptionUnwind(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + @skipUnlessDarwin # on the lldb-remote-linux-ubuntu CI, only get 1 stack frame not 6 + def test_no_fpu(self): + """Test that we can backtrace correctly through an ARM Cortex-M Exception return stack""" + + target = self.dbg.CreateTarget("") + exe = "binary.json" + with open(exe) as f: + exe_json = json.load(f) + exe_uuid = exe_json["uuid"] + + target.AddModule(exe, "", exe_uuid) + self.assertTrue(target.IsValid()) + + core = self.getBuildArtifact("core") + self.yaml2macho_core("armv7m-nofpu-exception.yaml", core, exe_uuid) + + process = target.LoadCore(core) + self.assertTrue(process.IsValid()) + + if self.TraceOn(): + self.runCmd("target list") + self.runCmd("image list") + self.runCmd("target modules dump sections") + self.runCmd("target modules dump symtab") + self.runCmd("bt") + + thread = process.GetThreadAtIndex(0) + self.assertTrue(thread.IsValid()) + + # We have 4 named stack frames and two unnamed + # frames above that. The topmost two stack frames + # were not interesting for this test, so I didn't + # create symbols for them. + self.assertEqual(thread.GetNumFrames(), 6) + stackframe_names = [ + "exception_catcher", + "exception_catcher", + "exception_thrower", + "main", + ] + for i, name in enumerate(stackframe_names): + self.assertEqual(name, thread.GetFrameAtIndex(i).GetSymbol().GetName()) diff --git a/lldb/test/API/functionalities/unwind/cortex-m-exception/armv7m-nofpu-exception.yaml b/lldb/test/API/functionalities/unwind/cortex-m-exception/armv7m-nofpu-exception.yaml new file mode 100644 index 0000000..9ce5ff4 --- /dev/null +++ b/lldb/test/API/functionalities/unwind/cortex-m-exception/armv7m-nofpu-exception.yaml @@ -0,0 +1,64 @@ +cpu: armv7m +threads: + - regsets: + - flavor: gpr + registers: [{name: sp, value: 0x2000fe70}, {name: r7, value: 0x2000fe80}, + {name: pc, value: 0x0020392c}, {name: lr, value: 0x0020392d}] +memory-regions: + # stack memory fetched via + # (lldb) p/x $sp + # (lldb) x/128wx $sp + # % pbpaste | sed -e 's,.*: ,,' -e 's/ /, /g' -e 's/$/,/' + - addr: 0x2000fe70 + UInt32: [ + 0x0000002a, 0x20010e58, 0x00203923, 0x00000001, + 0x2000fe88, 0x00203911, 0x2000ffdc, 0xfffffff9, + 0x00000102, 0x00000002, 0x000003f0, 0x0000002a, + 0x20012620, 0x00203215, 0x00203366, 0x81000200, + 0x00203215, 0x200128b0, 0x0024928d, 0x2000fecc, + 0x002491ed, 0x20010e58, 0x20010e4c, 0x2000ffa0, + 0x200107a0, 0x0000003c, 0x200116e8, 0x200108b0, + 0x0020b895, 0x00000000, 0x0000e200, 0x2001227d, + 0x200121fd, 0x0000e000, 0x00000000, 0x200129a0, + 0x002035bf, 0x00000029, 0x000003d8, 0x20011120, + 0x200116e0, 0x40003800, 0x20011120, 0x00000000, + 0x00205169, 0x00203713, 0x00000000, 0x0022dcb9, + 0x40003800, 0x20011240, 0x00000000, 0xf7d71ecf, + 0xfc7676d6, 0x00000000, 0x968782d3, 0xe75afbbb, + 0x600d77c8, 0xc1c05886, 0x17f3e76d, 0xefc3054d, + 0x11940aaa, 0x00000000, 0x93bffabb, 0x6db85af0, + 0x00000000, 0x2001d76f, 0xcb35f653, 0x00000000, + 0x00000000, 0x079d5058, 0x00000000, 0x00000000, + 0xc5622949, 0x68682572, 0x00000075, 0x0000e500, + 0x20012c30, 0x00000000, 0xcdfcd8c2, 0x76efc90f, + 0x0024495f, 0x20012bf0, 0x0000e400, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x0029089c, 0x0029089c, 0x00000000, 0x2000ffe4, + 0x00202a87, 0x2000ffec, 0x00200257, 0x2000fff4, + 0x00200211, 0x00000000, 0x00000000, 0x7badb3f6, + 0x20010794, 0x20010fac, 0x200109b0, 0x002887a4, + 0x00285688, 0x002854c8, 0x00288f74, 0x0028a618, + 0x0028a6f8, 0x00000000, 0x00000001, 0x00000000, + 0x00000000, 0x00000000, 0x002037dd, 0x00000000, + 0x00000002, 0x00000100, 0x00000000, 0x20010064, + 0x00000000, 0x00000000, 0x00000000, 0x200109c0, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + ] + # exception_catcher() function bytes + # (lldb) dis + # binary`exception_catcher: + # 0x203910 <+0>: push {r3, r4, r5, r6, r7, lr} + # 0x203912 <+2>: add r7, sp, #0x10 + # ... + # (lldb) x/44bx 0x203910 + # % pbpaste | sed -e 's,.*: ,,' -e 's/ /, /g' -e 's/$/,/' + - addr: 0x203910 + UInt8: [ + 0xf8, 0xb5, 0x04, 0xaf, 0x06, 0x4c, 0x07, 0x49, + 0x74, 0xf0, 0x2e, 0xf8, 0x01, 0xac, 0x74, 0xf0, + 0x61, 0xf8, 0x05, 0x48, 0x76, 0xf0, 0xdf, 0xfe, + 0x74, 0xf0, 0x0b, 0xf9, 0xfe, 0xe7, 0x00, 0xbf, + 0x4c, 0x0e, 0x01, 0x20, 0x0d, 0x35, 0x20, 0x00, + 0x98, 0xae, 0x28, 0x00 + ] + diff --git a/lldb/test/API/functionalities/unwind/cortex-m-exception/binary.json b/lldb/test/API/functionalities/unwind/cortex-m-exception/binary.json new file mode 100644 index 0000000..8fcd530 --- /dev/null +++ b/lldb/test/API/functionalities/unwind/cortex-m-exception/binary.json @@ -0,0 +1,41 @@ +{ + "triple": "armv7m-apple", + "uuid": "2D157DBA-53C9-3AC7-B5A1-9D336EC831CB", + "type": "executable", + "sections": [ + { + "user_id": 100, + "name": "TEXT", + "type": "code", + "address": 2097664, + "size": 598872, + "file_offset": 0, + "file_size": 598872, + "alignment": 2, + "flags": 514, + "read": true, + "write": false, + "execute": true + } + ], + "symbols": [ + { + "name": "main", + "type": "code", + "size": 10, + "address": 2108030 + }, + { + "name": "exception_catcher", + "type": "code", + "size": 44, + "address": 2111760 + }, + { + "name": "exception_thrower", + "type": "code", + "size": 2652, + "address": 2108040 + } + ] +} diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp index 673c4f7..75e8fe4 100644 --- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp +++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp @@ -521,7 +521,8 @@ bool CachingVPExpander::foldEVLIntoMask(VPIntrinsic &VPI) { // Only VP intrinsics can have an %evl parameter. Value *OldMaskParam = VPI.getMaskParam(); if (!OldMaskParam) { - assert(VPI.getIntrinsicID() == Intrinsic::vp_merge && + assert((VPI.getIntrinsicID() == Intrinsic::vp_merge || + VPI.getIntrinsicID() == Intrinsic::vp_select) && "Unexpected VP intrinsic without mask operand"); OldMaskParam = VPI.getArgOperand(0); } @@ -537,7 +538,8 @@ bool CachingVPExpander::foldEVLIntoMask(VPIntrinsic &VPI) { ElementCount ElemCount = VPI.getStaticVectorLength(); Value *VLMask = convertEVLToMask(Builder, OldEVLParam, ElemCount); Value *NewMaskParam = Builder.CreateAnd(VLMask, OldMaskParam); - if (VPI.getIntrinsicID() == Intrinsic::vp_merge) + if (VPI.getIntrinsicID() == Intrinsic::vp_merge || + VPI.getIntrinsicID() == Intrinsic::vp_select) VPI.setArgOperand(0, NewMaskParam); else VPI.setMaskParam(NewMaskParam); diff --git a/llvm/lib/FileCheck/FileCheck.cpp b/llvm/lib/FileCheck/FileCheck.cpp index ce35a5b..9245db4 100644 --- a/llvm/lib/FileCheck/FileCheck.cpp +++ b/llvm/lib/FileCheck/FileCheck.cpp @@ -1218,6 +1218,14 @@ Pattern::MatchResult Pattern::match(StringRef Buffer, StringRef MatchedValue = MatchInfo[CaptureParenGroup]; ExpressionFormat Format = DefinedNumericVariable->getImplicitFormat(); APInt Value = Format.valueFromStringRepr(MatchedValue, SM); + // Numeric variables are already inserted into GlobalNumericVariableTable + // during parsing, but clearLocalVars might remove them, so we must + // reinsert them. Numeric-variable resolution does not access + // GlobalNumericVariableTable; it directly uses a pointer to the variable. + // However, other functions (such as clearLocalVars) may require active + // variables to be in the table. + Context->GlobalNumericVariableTable.try_emplace(NumericVariableDef.getKey(), + DefinedNumericVariable); DefinedNumericVariable->setValue(Value, MatchedValue); } diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 2ab2c14..023fd14 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -920,10 +920,6 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) { case TargetOpcode::PATCHABLE_FUNCTION_ENTER: { assert(!Subtarget->isAIXABI() && "AIX does not support patchable function entry!"); - // PATCHABLE_FUNCTION_ENTER on little endian is for XRAY support which is - // handled in PPCLinuxAsmPrinter. - if (MAI->isLittleEndian()) - return; const Function &F = MF->getFunction(); unsigned Num = 0; (void)F.getFnAttribute("patchable-function-entry") @@ -1789,7 +1785,13 @@ void PPCLinuxAsmPrinter::emitInstruction(const MachineInstr *MI) { // Update compiler-rt/lib/xray/xray_powerpc64.cc accordingly when number // of instructions change. // XRAY is only supported on PPC Linux little endian. - if (!MAI->isLittleEndian()) + const Function &F = MF->getFunction(); + unsigned Num = 0; + (void)F.getFnAttribute("patchable-function-entry") + .getValueAsString() + .getAsInteger(10, Num); + + if (!MAI->isLittleEndian() || Num) break; MCSymbol *BeginOfSled = OutContext.createTempSymbol(); MCSymbol *EndOfSled = OutContext.createTempSymbol(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 10b2f5d..ac5e8d8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1270,11 +1270,12 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { return; } - VPInstruction *OpVPI; - if (match(Def, m_ExtractLastElement(m_VPInstruction(OpVPI))) && - OpVPI->isVectorToScalar()) { - Def->replaceAllUsesWith(OpVPI); - return; + if (match(Def, + m_VPInstruction<VPInstruction::ExtractLastElement>(m_VPValue(A))) && + vputils::isSingleScalar(A) && all_of(A->users(), [Def, A](VPUser *U) { + return U->usesScalars(A) || Def == U; + })) { + return Def->replaceAllUsesWith(A); } } diff --git a/llvm/test/CodeGen/PowerPC/patchable-function-entry.ll b/llvm/test/CodeGen/PowerPC/patchable-function-entry.ll index 0c2d282..f597754 100644 --- a/llvm/test/CodeGen/PowerPC/patchable-function-entry.ll +++ b/llvm/test/CodeGen/PowerPC/patchable-function-entry.ll @@ -1,5 +1,6 @@ ; RUN: llc -mtriple=powerpc %s -o - | FileCheck %s --check-prefixes=CHECK,PPC32 ; RUN: llc -mtriple=powerpc64 %s -o - | FileCheck %s --check-prefixes=CHECK,PPC64 +; RUN: llc -mtriple=powerpc64le %s -o - | FileCheck %s --check-prefix=PPC64LE @a = global i32 0, align 4 @@ -9,6 +10,12 @@ define void @f0() { ; CHECK: # %bb.0: ; CHECK-NEXT: blr ; CHECK-NOT: .section __patchable_function_entries +; +; PPC64LE-LABEL: f0: +; PPC64LE-NOT: nop +; PPC64LE: # %bb.0: +; PPC64LE-NEXT: blr +; PPC64LE-NOT: .section __patchable_function_entries ret void } @@ -18,6 +25,22 @@ define void @f1() "patchable-function-entry"="0" { ; CHECK: # %bb.0: ; CHECK-NEXT: blr ; CHECK-NOT: .section __patchable_function_entries +; +; PPC64LE-LABEL: f1: +; PPC64LE: # %bb.0: +; PPC64LE-NEXT: .Ltmp0: +; PPC64LE-NEXT: b .Ltmp1 +; PPC64LE-NEXT: nop +; PPC64LE-NEXT: std 0, -8(1) +; PPC64LE-NEXT: mflr 0 +; PPC64LE-NEXT: bl __xray_FunctionEntry +; PPC64LE-NEXT: nop +; PPC64LE-NEXT: mtlr 0 +; PPC64LE-NEXT: .Ltmp1: +; PPC64LE-NEXT: blr +; PPC64LE-NOT: .section __patchable_function_entries +; PPC64LE: .section xray_instr_map +; PPC64LE: .section xray_fn_idx ret void } @@ -32,6 +55,17 @@ define void @f2() "patchable-function-entry"="1" { ; PPC64: .p2align 3, 0x0 ; PPC32-NEXT: .long .Lfunc_begin2 ; PPC64-NEXT: .quad .Lfunc_begin2 +; +; PPC64LE-LABEL: f2: +; PPC64LE-LABEL-NEXT: .Lfunc_begin2: +; PPC64LE: # %bb.0: +; PPC64LE-NEXT: nop +; PPC64LE-NEXT: blr +; PPC64LE: .section __patchable_function_entries +; PPC64LE: .p2align 3, 0x0 +; PPC64LE-NEXT: .quad .Lfunc_begin2 +; PPC64LE-NOT: .section xray_instr_map +; PPC64LE-NOT: .section xray_fn_idx ret void } @@ -52,6 +86,21 @@ define i32 @f3() "patchable-function-entry"="1" "patchable-function-prefix"="2" ; PPC64: .p2align 3, 0x0 ; PPC32-NEXT: .long .Ltmp0 ; PPC64-NEXT: .quad .Ltmp0 +; +; PC64LE-LABEL: .Ltmp3: +; PC64LE-COUNT-2: nop +; PC64LE-LABEL: f3: +; PC64LE: # %bb.0: +; PC64LE-NEXT: nop +; PC64LE: addis 3, 2, .LC0@toc@ha +; PC64LE-NEXT: ld 3, .LC0@toc@l(3) +; PC64LE-NEXT: lwz 3, 0(3) +; PC64LE: blr +; PC64LE: .section __patchable_function_entries +; PPC64LE: .p2align 3, 0x0 +; PPC64LE-NEXT: .quad .Ltmp3 +; PC64LE-NOT: .section xray_instr_map +; PC64LE-NOT: .section xray_fn_idx entry: %0 = load i32, ptr @a, align 4 ret i32 %0 diff --git a/llvm/test/CodeGen/RISCV/select-zbb.ll b/llvm/test/CodeGen/RISCV/select-zbb.ll new file mode 100644 index 0000000..0af699a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/select-zbb.ll @@ -0,0 +1,1614 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s | FileCheck --check-prefixes=RV32IM %s +; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s | FileCheck --check-prefixes=RV64IM %s +; RUN: llc -mtriple=riscv32 -mattr=+m,+zbb -verify-machineinstrs < %s | FileCheck --check-prefixes=RV32IMZBB %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+zbb -verify-machineinstrs < %s | FileCheck --check-prefixes=RV64IMZBB %s +; RUN: llc -mtriple=riscv32 -mattr=+m,+zicond -verify-machineinstrs < %s | FileCheck --check-prefixes=RV32IMZICOND %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+zicond -verify-machineinstrs < %s | FileCheck --check-prefixes=RV64IMZICOND %s +; RUN: llc -mtriple=riscv32 -mattr=+m,+zicond,+zbb -verify-machineinstrs < %s | FileCheck --check-prefixes=RV32IMBOTH %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+zicond,+zbb -verify-machineinstrs < %s | FileCheck --check-prefixes=RV64IMBOTH %s + + +define i32 @select_umin_1(i1 zeroext %cond, i32 %a, i32 %b) { +; RV32IM-LABEL: select_umin_1: +; RV32IM: # %bb.0: # %entry +; RV32IM-NEXT: bgeu a1, a2, .LBB0_3 +; RV32IM-NEXT: # %bb.1: # %entry +; RV32IM-NEXT: beqz a0, .LBB0_4 +; RV32IM-NEXT: .LBB0_2: # %entry +; RV32IM-NEXT: mv a0, a1 +; RV32IM-NEXT: ret +; RV32IM-NEXT: .LBB0_3: # %entry +; RV32IM-NEXT: mv a1, a2 +; RV32IM-NEXT: bnez a0, .LBB0_2 +; RV32IM-NEXT: .LBB0_4: # %entry +; RV32IM-NEXT: mv a0, a2 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: select_umin_1: +; RV64IM: # %bb.0: # %entry +; RV64IM-NEXT: sext.w a3, a2 +; RV64IM-NEXT: sext.w a1, a1 +; RV64IM-NEXT: bgeu a1, a3, .LBB0_3 +; RV64IM-NEXT: # %bb.1: # %entry +; RV64IM-NEXT: beqz a0, .LBB0_4 +; RV64IM-NEXT: .LBB0_2: # %entry +; RV64IM-NEXT: mv a0, a1 +; RV64IM-NEXT: ret +; RV64IM-NEXT: .LBB0_3: # %entry +; RV64IM-NEXT: mv a1, a3 +; RV64IM-NEXT: bnez a0, .LBB0_2 +; RV64IM-NEXT: .LBB0_4: # %entry +; RV64IM-NEXT: mv a0, a2 +; RV64IM-NEXT: ret +; +; RV32IMZBB-LABEL: select_umin_1: +; RV32IMZBB: # %bb.0: # %entry +; RV32IMZBB-NEXT: beqz a0, .LBB0_2 +; RV32IMZBB-NEXT: # %bb.1: +; RV32IMZBB-NEXT: minu a2, a1, a2 +; RV32IMZBB-NEXT: .LBB0_2: # %entry +; RV32IMZBB-NEXT: mv a0, a2 +; RV32IMZBB-NEXT: ret +; +; RV64IMZBB-LABEL: select_umin_1: +; RV64IMZBB: # %bb.0: # %entry +; RV64IMZBB-NEXT: beqz a0, .LBB0_2 +; RV64IMZBB-NEXT: # %bb.1: +; RV64IMZBB-NEXT: sext.w a2, a2 +; RV64IMZBB-NEXT: sext.w a1, a1 +; RV64IMZBB-NEXT: minu a2, a1, a2 +; RV64IMZBB-NEXT: .LBB0_2: # %entry +; RV64IMZBB-NEXT: mv a0, a2 +; RV64IMZBB-NEXT: ret +; +; RV32IMZICOND-LABEL: select_umin_1: +; RV32IMZICOND: # %bb.0: # %entry +; RV32IMZICOND-NEXT: sltu a3, a1, a2 +; RV32IMZICOND-NEXT: czero.nez a4, a2, a3 +; RV32IMZICOND-NEXT: czero.eqz a1, a1, a3 +; RV32IMZICOND-NEXT: or a1, a1, a4 +; RV32IMZICOND-NEXT: czero.eqz a1, a1, a0 +; RV32IMZICOND-NEXT: czero.nez a0, a2, a0 +; RV32IMZICOND-NEXT: or a0, a1, a0 +; RV32IMZICOND-NEXT: ret +; +; RV64IMZICOND-LABEL: select_umin_1: +; RV64IMZICOND: # %bb.0: # %entry +; RV64IMZICOND-NEXT: sext.w a3, a2 +; RV64IMZICOND-NEXT: sext.w a1, a1 +; RV64IMZICOND-NEXT: sltu a4, a1, a3 +; RV64IMZICOND-NEXT: czero.nez a3, a3, a4 +; RV64IMZICOND-NEXT: czero.eqz a1, a1, a4 +; RV64IMZICOND-NEXT: or a1, a1, a3 +; RV64IMZICOND-NEXT: czero.eqz a1, a1, a0 +; RV64IMZICOND-NEXT: czero.nez a0, a2, a0 +; RV64IMZICOND-NEXT: or a0, a1, a0 +; RV64IMZICOND-NEXT: ret +; +; RV32IMBOTH-LABEL: select_umin_1: +; RV32IMBOTH: # %bb.0: # %entry +; RV32IMBOTH-NEXT: minu a1, a1, a2 +; RV32IMBOTH-NEXT: czero.nez a2, a2, a0 +; RV32IMBOTH-NEXT: czero.eqz a0, a1, a0 +; RV32IMBOTH-NEXT: or a0, a0, a2 +; RV32IMBOTH-NEXT: ret +; +; RV64IMBOTH-LABEL: select_umin_1: +; RV64IMBOTH: # %bb.0: # %entry +; RV64IMBOTH-NEXT: sext.w a3, a2 +; RV64IMBOTH-NEXT: sext.w a1, a1 +; RV64IMBOTH-NEXT: minu a1, a1, a3 +; RV64IMBOTH-NEXT: czero.nez a2, a2, a0 +; RV64IMBOTH-NEXT: czero.eqz a0, a1, a0 +; RV64IMBOTH-NEXT: or a0, a0, a2 +; RV64IMBOTH-NEXT: ret +entry: + %c = call i32 @llvm.umin(i32 %a, i32 %b) + %res = select i1 %cond, i32 %c, i32 %b + ret i32 %res +} + +define i32 @select_umin_2(i1 zeroext %cond, i32 %a, i32 %b) { +; RV32IM-LABEL: select_umin_2: +; RV32IM: # %bb.0: # %entry +; RV32IM-NEXT: mv a3, a1 +; RV32IM-NEXT: bgeu a1, a2, .LBB1_3 +; RV32IM-NEXT: # %bb.1: # %entry +; RV32IM-NEXT: beqz a0, .LBB1_4 +; RV32IM-NEXT: .LBB1_2: # %entry +; RV32IM-NEXT: mv a0, a1 +; RV32IM-NEXT: ret +; RV32IM-NEXT: .LBB1_3: # %entry +; RV32IM-NEXT: mv a3, a2 +; RV32IM-NEXT: bnez a0, .LBB1_2 +; RV32IM-NEXT: .LBB1_4: # %entry +; RV32IM-NEXT: mv a0, a3 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: select_umin_2: +; RV64IM: # %bb.0: # %entry +; RV64IM-NEXT: sext.w a3, a2 +; RV64IM-NEXT: sext.w a2, a1 +; RV64IM-NEXT: bgeu a2, a3, .LBB1_3 +; RV64IM-NEXT: # %bb.1: # %entry +; RV64IM-NEXT: beqz a0, .LBB1_4 +; RV64IM-NEXT: .LBB1_2: # %entry +; RV64IM-NEXT: mv a0, a1 +; RV64IM-NEXT: ret +; RV64IM-NEXT: .LBB1_3: # %entry +; RV64IM-NEXT: mv a2, a3 +; RV64IM-NEXT: bnez a0, .LBB1_2 +; RV64IM-NEXT: .LBB1_4: # %entry +; RV64IM-NEXT: mv a0, a2 +; RV64IM-NEXT: ret +; +; RV32IMZBB-LABEL: select_umin_2: +; RV32IMZBB: # %bb.0: # %entry +; RV32IMZBB-NEXT: bnez a0, .LBB1_2 +; RV32IMZBB-NEXT: # %bb.1: # %entry +; RV32IMZBB-NEXT: minu a1, a1, a2 +; RV32IMZBB-NEXT: .LBB1_2: # %entry +; RV32IMZBB-NEXT: mv a0, a1 +; RV32IMZBB-NEXT: ret +; +; RV64IMZBB-LABEL: select_umin_2: +; RV64IMZBB: # %bb.0: # %entry +; RV64IMZBB-NEXT: bnez a0, .LBB1_2 +; RV64IMZBB-NEXT: # %bb.1: # %entry +; RV64IMZBB-NEXT: sext.w a2, a2 +; RV64IMZBB-NEXT: sext.w a1, a1 +; RV64IMZBB-NEXT: minu a1, a1, a2 +; RV64IMZBB-NEXT: .LBB1_2: # %entry +; RV64IMZBB-NEXT: mv a0, a1 +; RV64IMZBB-NEXT: ret +; +; RV32IMZICOND-LABEL: select_umin_2: +; RV32IMZICOND: # %bb.0: # %entry +; RV32IMZICOND-NEXT: sltu a3, a1, a2 +; RV32IMZICOND-NEXT: czero.nez a2, a2, a3 +; RV32IMZICOND-NEXT: czero.eqz a3, a1, a3 +; RV32IMZICOND-NEXT: or a2, a3, a2 +; RV32IMZICOND-NEXT: czero.nez a2, a2, a0 +; RV32IMZICOND-NEXT: czero.eqz a0, a1, a0 +; RV32IMZICOND-NEXT: or a0, a0, a2 +; RV32IMZICOND-NEXT: ret +; +; RV64IMZICOND-LABEL: select_umin_2: +; RV64IMZICOND: # %bb.0: # %entry +; RV64IMZICOND-NEXT: sext.w a2, a2 +; RV64IMZICOND-NEXT: sext.w a3, a1 +; RV64IMZICOND-NEXT: sltu a4, a3, a2 +; RV64IMZICOND-NEXT: czero.nez a2, a2, a4 +; RV64IMZICOND-NEXT: czero.eqz a3, a3, a4 +; RV64IMZICOND-NEXT: or a2, a3, a2 +; RV64IMZICOND-NEXT: czero.nez a2, a2, a0 +; RV64IMZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64IMZICOND-NEXT: or a0, a0, a2 +; RV64IMZICOND-NEXT: ret +; +; RV32IMBOTH-LABEL: select_umin_2: +; RV32IMBOTH: # %bb.0: # %entry +; RV32IMBOTH-NEXT: minu a2, a1, a2 +; RV32IMBOTH-NEXT: czero.eqz a1, a1, a0 +; RV32IMBOTH-NEXT: czero.nez a0, a2, a0 +; RV32IMBOTH-NEXT: or a0, a1, a0 +; RV32IMBOTH-NEXT: ret +; +; RV64IMBOTH-LABEL: select_umin_2: +; RV64IMBOTH: # %bb.0: # %entry +; RV64IMBOTH-NEXT: sext.w a2, a2 +; RV64IMBOTH-NEXT: sext.w a3, a1 +; RV64IMBOTH-NEXT: minu a2, a3, a2 +; RV64IMBOTH-NEXT: czero.eqz a1, a1, a0 +; RV64IMBOTH-NEXT: czero.nez a0, a2, a0 +; RV64IMBOTH-NEXT: or a0, a1, a0 +; RV64IMBOTH-NEXT: ret +entry: + %c = call i32 @llvm.umin(i32 %a, i32 %b) + %res = select i1 %cond, i32 %a, i32 %c + ret i32 %res +} + +define i32 @select_umin_3(i1 zeroext %cond, i32 %a) { +; RV32IM-LABEL: select_umin_3: +; RV32IM: # %bb.0: # %entry +; RV32IM-NEXT: li a3, 32 +; RV32IM-NEXT: mv a2, a1 +; RV32IM-NEXT: bgeu a1, a3, .LBB2_3 +; RV32IM-NEXT: # %bb.1: # %entry +; RV32IM-NEXT: beqz a0, .LBB2_4 +; RV32IM-NEXT: .LBB2_2: # %entry +; RV32IM-NEXT: mv a0, a1 +; RV32IM-NEXT: ret +; RV32IM-NEXT: .LBB2_3: # %entry +; RV32IM-NEXT: li a2, 32 +; RV32IM-NEXT: bnez a0, .LBB2_2 +; RV32IM-NEXT: .LBB2_4: # %entry +; RV32IM-NEXT: mv a0, a2 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: select_umin_3: +; RV64IM: # %bb.0: # %entry +; RV64IM-NEXT: sext.w a2, a1 +; RV64IM-NEXT: li a3, 32 +; RV64IM-NEXT: bgeu a2, a3, .LBB2_3 +; RV64IM-NEXT: # %bb.1: # %entry +; RV64IM-NEXT: beqz a0, .LBB2_4 +; RV64IM-NEXT: .LBB2_2: # %entry +; RV64IM-NEXT: mv a0, a1 +; RV64IM-NEXT: ret +; RV64IM-NEXT: .LBB2_3: # %entry +; RV64IM-NEXT: li a2, 32 +; RV64IM-NEXT: bnez a0, .LBB2_2 +; RV64IM-NEXT: .LBB2_4: # %entry +; RV64IM-NEXT: mv a0, a2 +; RV64IM-NEXT: ret +; +; RV32IMZBB-LABEL: select_umin_3: +; RV32IMZBB: # %bb.0: # %entry +; RV32IMZBB-NEXT: bnez a0, .LBB2_2 +; RV32IMZBB-NEXT: # %bb.1: # %entry +; RV32IMZBB-NEXT: li a0, 32 +; RV32IMZBB-NEXT: minu a1, a1, a0 +; RV32IMZBB-NEXT: .LBB2_2: # %entry +; RV32IMZBB-NEXT: mv a0, a1 +; RV32IMZBB-NEXT: ret +; +; RV64IMZBB-LABEL: select_umin_3: +; RV64IMZBB: # %bb.0: # %entry +; RV64IMZBB-NEXT: bnez a0, .LBB2_2 +; RV64IMZBB-NEXT: # %bb.1: # %entry +; RV64IMZBB-NEXT: sext.w a1, a1 +; RV64IMZBB-NEXT: li a0, 32 +; RV64IMZBB-NEXT: minu a1, a1, a0 +; RV64IMZBB-NEXT: .LBB2_2: # %entry +; RV64IMZBB-NEXT: mv a0, a1 +; RV64IMZBB-NEXT: ret +; +; RV32IMZICOND-LABEL: select_umin_3: +; RV32IMZICOND: # %bb.0: # %entry +; RV32IMZICOND-NEXT: sltiu a2, a1, 32 +; RV32IMZICOND-NEXT: addi a3, a1, -32 +; RV32IMZICOND-NEXT: czero.eqz a2, a3, a2 +; RV32IMZICOND-NEXT: addi a2, a2, 32 +; RV32IMZICOND-NEXT: czero.eqz a1, a1, a0 +; RV32IMZICOND-NEXT: czero.nez a0, a2, a0 +; RV32IMZICOND-NEXT: or a0, a1, a0 +; RV32IMZICOND-NEXT: ret +; +; RV64IMZICOND-LABEL: select_umin_3: +; RV64IMZICOND: # %bb.0: # %entry +; RV64IMZICOND-NEXT: sext.w a2, a1 +; RV64IMZICOND-NEXT: sltiu a3, a2, 32 +; RV64IMZICOND-NEXT: addi a2, a2, -32 +; RV64IMZICOND-NEXT: czero.eqz a2, a2, a3 +; RV64IMZICOND-NEXT: addi a2, a2, 32 +; RV64IMZICOND-NEXT: czero.eqz a1, a1, a0 +; RV64IMZICOND-NEXT: czero.nez a0, a2, a0 +; RV64IMZICOND-NEXT: or a0, a1, a0 +; RV64IMZICOND-NEXT: ret +; +; RV32IMBOTH-LABEL: select_umin_3: +; RV32IMBOTH: # %bb.0: # %entry +; RV32IMBOTH-NEXT: li a2, 32 +; RV32IMBOTH-NEXT: minu a2, a1, a2 +; RV32IMBOTH-NEXT: czero.eqz a1, a1, a0 +; RV32IMBOTH-NEXT: czero.nez a0, a2, a0 +; RV32IMBOTH-NEXT: or a0, a1, a0 +; RV32IMBOTH-NEXT: ret +; +; RV64IMBOTH-LABEL: select_umin_3: +; RV64IMBOTH: # %bb.0: # %entry +; RV64IMBOTH-NEXT: sext.w a2, a1 +; RV64IMBOTH-NEXT: li a3, 32 +; RV64IMBOTH-NEXT: minu a2, a2, a3 +; RV64IMBOTH-NEXT: czero.eqz a1, a1, a0 +; RV64IMBOTH-NEXT: czero.nez a0, a2, a0 +; RV64IMBOTH-NEXT: or a0, a1, a0 +; RV64IMBOTH-NEXT: ret +entry: + %c = call i32 @llvm.umin(i32 %a, i32 32) + %res = select i1 %cond, i32 %a, i32 %c + ret i32 %res +} + +define i32 @select_umin_4(i1 zeroext %cond, i32 %x) { +; RV32IM-LABEL: select_umin_4: +; RV32IM: # %bb.0: +; RV32IM-NEXT: li a2, 128 +; RV32IM-NEXT: bgeu a1, a2, .LBB3_3 +; RV32IM-NEXT: # %bb.1: +; RV32IM-NEXT: beqz a0, .LBB3_4 +; RV32IM-NEXT: .LBB3_2: +; RV32IM-NEXT: mv a0, a2 +; RV32IM-NEXT: ret +; RV32IM-NEXT: .LBB3_3: +; RV32IM-NEXT: li a1, 128 +; RV32IM-NEXT: bnez a0, .LBB3_2 +; RV32IM-NEXT: .LBB3_4: +; RV32IM-NEXT: mv a0, a1 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: select_umin_4: +; RV64IM: # %bb.0: +; RV64IM-NEXT: sext.w a2, a1 +; RV64IM-NEXT: li a1, 128 +; RV64IM-NEXT: bgeu a2, a1, .LBB3_3 +; RV64IM-NEXT: # %bb.1: +; RV64IM-NEXT: beqz a0, .LBB3_4 +; RV64IM-NEXT: .LBB3_2: +; RV64IM-NEXT: mv a0, a1 +; RV64IM-NEXT: ret +; RV64IM-NEXT: .LBB3_3: +; RV64IM-NEXT: li a2, 128 +; RV64IM-NEXT: bnez a0, .LBB3_2 +; RV64IM-NEXT: .LBB3_4: +; RV64IM-NEXT: mv a0, a2 +; RV64IM-NEXT: ret +; +; RV32IMZBB-LABEL: select_umin_4: +; RV32IMZBB: # %bb.0: +; RV32IMZBB-NEXT: mv a2, a0 +; RV32IMZBB-NEXT: li a0, 128 +; RV32IMZBB-NEXT: bnez a2, .LBB3_2 +; RV32IMZBB-NEXT: # %bb.1: +; RV32IMZBB-NEXT: minu a0, a1, a0 +; RV32IMZBB-NEXT: .LBB3_2: +; RV32IMZBB-NEXT: ret +; +; RV64IMZBB-LABEL: select_umin_4: +; RV64IMZBB: # %bb.0: +; RV64IMZBB-NEXT: mv a2, a0 +; RV64IMZBB-NEXT: li a0, 128 +; RV64IMZBB-NEXT: bnez a2, .LBB3_2 +; RV64IMZBB-NEXT: # %bb.1: +; RV64IMZBB-NEXT: sext.w a1, a1 +; RV64IMZBB-NEXT: minu a0, a1, a0 +; RV64IMZBB-NEXT: .LBB3_2: +; RV64IMZBB-NEXT: ret +; +; RV32IMZICOND-LABEL: select_umin_4: +; RV32IMZICOND: # %bb.0: +; RV32IMZICOND-NEXT: sltiu a2, a1, 128 +; RV32IMZICOND-NEXT: addi a1, a1, -128 +; RV32IMZICOND-NEXT: czero.eqz a1, a1, a2 +; RV32IMZICOND-NEXT: czero.nez a0, a1, a0 +; RV32IMZICOND-NEXT: addi a0, a0, 128 +; RV32IMZICOND-NEXT: ret +; +; RV64IMZICOND-LABEL: select_umin_4: +; RV64IMZICOND: # %bb.0: +; RV64IMZICOND-NEXT: sext.w a1, a1 +; RV64IMZICOND-NEXT: sltiu a2, a1, 128 +; RV64IMZICOND-NEXT: addi a1, a1, -128 +; RV64IMZICOND-NEXT: czero.eqz a1, a1, a2 +; RV64IMZICOND-NEXT: czero.nez a0, a1, a0 +; RV64IMZICOND-NEXT: addi a0, a0, 128 +; RV64IMZICOND-NEXT: ret +; +; RV32IMBOTH-LABEL: select_umin_4: +; RV32IMBOTH: # %bb.0: +; RV32IMBOTH-NEXT: li a2, 128 +; RV32IMBOTH-NEXT: minu a1, a1, a2 +; RV32IMBOTH-NEXT: addi a1, a1, -128 +; RV32IMBOTH-NEXT: czero.nez a0, a1, a0 +; RV32IMBOTH-NEXT: addi a0, a0, 128 +; RV32IMBOTH-NEXT: ret +; +; RV64IMBOTH-LABEL: select_umin_4: +; RV64IMBOTH: # %bb.0: +; RV64IMBOTH-NEXT: sext.w a1, a1 +; RV64IMBOTH-NEXT: li a2, 128 +; RV64IMBOTH-NEXT: minu a1, a1, a2 +; RV64IMBOTH-NEXT: addi a1, a1, -128 +; RV64IMBOTH-NEXT: czero.nez a0, a1, a0 +; RV64IMBOTH-NEXT: addi a0, a0, 128 +; RV64IMBOTH-NEXT: ret + %minmax = call i32 @llvm.umin(i32 %x, i32 128) + %sel = select i1 %cond, i32 128, i32 %minmax + ret i32 %sel +} + +define i32 @select_umax_1(i1 zeroext %cond, i32 %a, i32 %b) { +; RV32IM-LABEL: select_umax_1: +; RV32IM: # %bb.0: # %entry +; RV32IM-NEXT: bgeu a2, a1, .LBB4_3 +; RV32IM-NEXT: # %bb.1: # %entry +; RV32IM-NEXT: beqz a0, .LBB4_4 +; RV32IM-NEXT: .LBB4_2: # %entry +; RV32IM-NEXT: mv a0, a1 +; RV32IM-NEXT: ret +; RV32IM-NEXT: .LBB4_3: # %entry +; RV32IM-NEXT: mv a1, a2 +; RV32IM-NEXT: bnez a0, .LBB4_2 +; RV32IM-NEXT: .LBB4_4: # %entry +; RV32IM-NEXT: mv a0, a2 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: select_umax_1: +; RV64IM: # %bb.0: # %entry +; RV64IM-NEXT: sext.w a1, a1 +; RV64IM-NEXT: sext.w a3, a2 +; RV64IM-NEXT: bgeu a3, a1, .LBB4_3 +; RV64IM-NEXT: # %bb.1: # %entry +; RV64IM-NEXT: beqz a0, .LBB4_4 +; RV64IM-NEXT: .LBB4_2: # %entry +; RV64IM-NEXT: mv a0, a1 +; RV64IM-NEXT: ret +; RV64IM-NEXT: .LBB4_3: # %entry +; RV64IM-NEXT: mv a1, a3 +; RV64IM-NEXT: bnez a0, .LBB4_2 +; RV64IM-NEXT: .LBB4_4: # %entry +; RV64IM-NEXT: mv a0, a2 +; RV64IM-NEXT: ret +; +; RV32IMZBB-LABEL: select_umax_1: +; RV32IMZBB: # %bb.0: # %entry +; RV32IMZBB-NEXT: beqz a0, .LBB4_2 +; RV32IMZBB-NEXT: # %bb.1: +; RV32IMZBB-NEXT: maxu a2, a1, a2 +; RV32IMZBB-NEXT: .LBB4_2: # %entry +; RV32IMZBB-NEXT: mv a0, a2 +; RV32IMZBB-NEXT: ret +; +; RV64IMZBB-LABEL: select_umax_1: +; RV64IMZBB: # %bb.0: # %entry +; RV64IMZBB-NEXT: beqz a0, .LBB4_2 +; RV64IMZBB-NEXT: # %bb.1: +; RV64IMZBB-NEXT: sext.w a2, a2 +; RV64IMZBB-NEXT: sext.w a1, a1 +; RV64IMZBB-NEXT: maxu a2, a1, a2 +; RV64IMZBB-NEXT: .LBB4_2: # %entry +; RV64IMZBB-NEXT: mv a0, a2 +; RV64IMZBB-NEXT: ret +; +; RV32IMZICOND-LABEL: select_umax_1: +; RV32IMZICOND: # %bb.0: # %entry +; RV32IMZICOND-NEXT: sltu a3, a2, a1 +; RV32IMZICOND-NEXT: czero.nez a4, a2, a3 +; RV32IMZICOND-NEXT: czero.eqz a1, a1, a3 +; RV32IMZICOND-NEXT: or a1, a1, a4 +; RV32IMZICOND-NEXT: czero.eqz a1, a1, a0 +; RV32IMZICOND-NEXT: czero.nez a0, a2, a0 +; RV32IMZICOND-NEXT: or a0, a1, a0 +; RV32IMZICOND-NEXT: ret +; +; RV64IMZICOND-LABEL: select_umax_1: +; RV64IMZICOND: # %bb.0: # %entry +; RV64IMZICOND-NEXT: sext.w a1, a1 +; RV64IMZICOND-NEXT: sext.w a3, a2 +; RV64IMZICOND-NEXT: sltu a4, a3, a1 +; RV64IMZICOND-NEXT: czero.nez a3, a3, a4 +; RV64IMZICOND-NEXT: czero.eqz a1, a1, a4 +; RV64IMZICOND-NEXT: or a1, a1, a3 +; RV64IMZICOND-NEXT: czero.eqz a1, a1, a0 +; RV64IMZICOND-NEXT: czero.nez a0, a2, a0 +; RV64IMZICOND-NEXT: or a0, a1, a0 +; RV64IMZICOND-NEXT: ret +; +; RV32IMBOTH-LABEL: select_umax_1: +; RV32IMBOTH: # %bb.0: # %entry +; RV32IMBOTH-NEXT: maxu a1, a1, a2 +; RV32IMBOTH-NEXT: czero.nez a2, a2, a0 +; RV32IMBOTH-NEXT: czero.eqz a0, a1, a0 +; RV32IMBOTH-NEXT: or a0, a0, a2 +; RV32IMBOTH-NEXT: ret +; +; RV64IMBOTH-LABEL: select_umax_1: +; RV64IMBOTH: # %bb.0: # %entry +; RV64IMBOTH-NEXT: sext.w a3, a2 +; RV64IMBOTH-NEXT: sext.w a1, a1 +; RV64IMBOTH-NEXT: maxu a1, a1, a3 +; RV64IMBOTH-NEXT: czero.nez a2, a2, a0 +; RV64IMBOTH-NEXT: czero.eqz a0, a1, a0 +; RV64IMBOTH-NEXT: or a0, a0, a2 +; RV64IMBOTH-NEXT: ret +entry: + %c = call i32 @llvm.umax(i32 %a, i32 %b) + %res = select i1 %cond, i32 %c, i32 %b + ret i32 %res +} + +define i32 @select_umax_2(i1 zeroext %cond, i32 %a, i32 %b) { +; RV32IM-LABEL: select_umax_2: +; RV32IM: # %bb.0: # %entry +; RV32IM-NEXT: mv a3, a1 +; RV32IM-NEXT: bgeu a2, a1, .LBB5_3 +; RV32IM-NEXT: # %bb.1: # %entry +; RV32IM-NEXT: beqz a0, .LBB5_4 +; RV32IM-NEXT: .LBB5_2: # %entry +; RV32IM-NEXT: mv a0, a1 +; RV32IM-NEXT: ret +; RV32IM-NEXT: .LBB5_3: # %entry +; RV32IM-NEXT: mv a3, a2 +; RV32IM-NEXT: bnez a0, .LBB5_2 +; RV32IM-NEXT: .LBB5_4: # %entry +; RV32IM-NEXT: mv a0, a3 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: select_umax_2: +; RV64IM: # %bb.0: # %entry +; RV64IM-NEXT: sext.w a3, a1 +; RV64IM-NEXT: sext.w a2, a2 +; RV64IM-NEXT: bgeu a2, a3, .LBB5_3 +; RV64IM-NEXT: # %bb.1: # %entry +; RV64IM-NEXT: beqz a0, .LBB5_4 +; RV64IM-NEXT: .LBB5_2: # %entry +; RV64IM-NEXT: mv a0, a1 +; RV64IM-NEXT: ret +; RV64IM-NEXT: .LBB5_3: # %entry +; RV64IM-NEXT: mv a3, a2 +; RV64IM-NEXT: bnez a0, .LBB5_2 +; RV64IM-NEXT: .LBB5_4: # %entry +; RV64IM-NEXT: mv a0, a3 +; RV64IM-NEXT: ret +; +; RV32IMZBB-LABEL: select_umax_2: +; RV32IMZBB: # %bb.0: # %entry +; RV32IMZBB-NEXT: bnez a0, .LBB5_2 +; RV32IMZBB-NEXT: # %bb.1: # %entry +; RV32IMZBB-NEXT: maxu a1, a1, a2 +; RV32IMZBB-NEXT: .LBB5_2: # %entry +; RV32IMZBB-NEXT: mv a0, a1 +; RV32IMZBB-NEXT: ret +; +; RV64IMZBB-LABEL: select_umax_2: +; RV64IMZBB: # %bb.0: # %entry +; RV64IMZBB-NEXT: bnez a0, .LBB5_2 +; RV64IMZBB-NEXT: # %bb.1: # %entry +; RV64IMZBB-NEXT: sext.w a2, a2 +; RV64IMZBB-NEXT: sext.w a1, a1 +; RV64IMZBB-NEXT: maxu a1, a1, a2 +; RV64IMZBB-NEXT: .LBB5_2: # %entry +; RV64IMZBB-NEXT: mv a0, a1 +; RV64IMZBB-NEXT: ret +; +; RV32IMZICOND-LABEL: select_umax_2: +; RV32IMZICOND: # %bb.0: # %entry +; RV32IMZICOND-NEXT: sltu a3, a2, a1 +; RV32IMZICOND-NEXT: czero.nez a2, a2, a3 +; RV32IMZICOND-NEXT: czero.eqz a3, a1, a3 +; RV32IMZICOND-NEXT: or a2, a3, a2 +; RV32IMZICOND-NEXT: czero.nez a2, a2, a0 +; RV32IMZICOND-NEXT: czero.eqz a0, a1, a0 +; RV32IMZICOND-NEXT: or a0, a0, a2 +; RV32IMZICOND-NEXT: ret +; +; RV64IMZICOND-LABEL: select_umax_2: +; RV64IMZICOND: # %bb.0: # %entry +; RV64IMZICOND-NEXT: sext.w a3, a1 +; RV64IMZICOND-NEXT: sext.w a2, a2 +; RV64IMZICOND-NEXT: sltu a4, a2, a3 +; RV64IMZICOND-NEXT: czero.nez a2, a2, a4 +; RV64IMZICOND-NEXT: czero.eqz a3, a3, a4 +; RV64IMZICOND-NEXT: or a2, a3, a2 +; RV64IMZICOND-NEXT: czero.nez a2, a2, a0 +; RV64IMZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64IMZICOND-NEXT: or a0, a0, a2 +; RV64IMZICOND-NEXT: ret +; +; RV32IMBOTH-LABEL: select_umax_2: +; RV32IMBOTH: # %bb.0: # %entry +; RV32IMBOTH-NEXT: maxu a2, a1, a2 +; RV32IMBOTH-NEXT: czero.eqz a1, a1, a0 +; RV32IMBOTH-NEXT: czero.nez a0, a2, a0 +; RV32IMBOTH-NEXT: or a0, a1, a0 +; RV32IMBOTH-NEXT: ret +; +; RV64IMBOTH-LABEL: select_umax_2: +; RV64IMBOTH: # %bb.0: # %entry +; RV64IMBOTH-NEXT: sext.w a2, a2 +; RV64IMBOTH-NEXT: sext.w a3, a1 +; RV64IMBOTH-NEXT: maxu a2, a3, a2 +; RV64IMBOTH-NEXT: czero.eqz a1, a1, a0 +; RV64IMBOTH-NEXT: czero.nez a0, a2, a0 +; RV64IMBOTH-NEXT: or a0, a1, a0 +; RV64IMBOTH-NEXT: ret +entry: + %c = call i32 @llvm.umax(i32 %a, i32 %b) + %res = select i1 %cond, i32 %a, i32 %c + ret i32 %res +} + +define i32 @select_umax_3(i1 zeroext %cond, i32 %a) { +; RV32IM-LABEL: select_umax_3: +; RV32IM: # %bb.0: # %entry +; RV32IM-NEXT: li a3, 32 +; RV32IM-NEXT: mv a2, a1 +; RV32IM-NEXT: bgeu a3, a1, .LBB6_3 +; RV32IM-NEXT: # %bb.1: # %entry +; RV32IM-NEXT: beqz a0, .LBB6_4 +; RV32IM-NEXT: .LBB6_2: # %entry +; RV32IM-NEXT: mv a0, a1 +; RV32IM-NEXT: ret +; RV32IM-NEXT: .LBB6_3: # %entry +; RV32IM-NEXT: li a2, 32 +; RV32IM-NEXT: bnez a0, .LBB6_2 +; RV32IM-NEXT: .LBB6_4: # %entry +; RV32IM-NEXT: mv a0, a2 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: select_umax_3: +; RV64IM: # %bb.0: # %entry +; RV64IM-NEXT: sext.w a2, a1 +; RV64IM-NEXT: li a3, 32 +; RV64IM-NEXT: bgeu a3, a2, .LBB6_3 +; RV64IM-NEXT: # %bb.1: # %entry +; RV64IM-NEXT: beqz a0, .LBB6_4 +; RV64IM-NEXT: .LBB6_2: # %entry +; RV64IM-NEXT: mv a0, a1 +; RV64IM-NEXT: ret +; RV64IM-NEXT: .LBB6_3: # %entry +; RV64IM-NEXT: li a2, 32 +; RV64IM-NEXT: bnez a0, .LBB6_2 +; RV64IM-NEXT: .LBB6_4: # %entry +; RV64IM-NEXT: mv a0, a2 +; RV64IM-NEXT: ret +; +; RV32IMZBB-LABEL: select_umax_3: +; RV32IMZBB: # %bb.0: # %entry +; RV32IMZBB-NEXT: bnez a0, .LBB6_2 +; RV32IMZBB-NEXT: # %bb.1: # %entry +; RV32IMZBB-NEXT: li a0, 32 +; RV32IMZBB-NEXT: maxu a1, a1, a0 +; RV32IMZBB-NEXT: .LBB6_2: # %entry +; RV32IMZBB-NEXT: mv a0, a1 +; RV32IMZBB-NEXT: ret +; +; RV64IMZBB-LABEL: select_umax_3: +; RV64IMZBB: # %bb.0: # %entry +; RV64IMZBB-NEXT: bnez a0, .LBB6_2 +; RV64IMZBB-NEXT: # %bb.1: # %entry +; RV64IMZBB-NEXT: sext.w a1, a1 +; RV64IMZBB-NEXT: li a0, 32 +; RV64IMZBB-NEXT: maxu a1, a1, a0 +; RV64IMZBB-NEXT: .LBB6_2: # %entry +; RV64IMZBB-NEXT: mv a0, a1 +; RV64IMZBB-NEXT: ret +; +; RV32IMZICOND-LABEL: select_umax_3: +; RV32IMZICOND: # %bb.0: # %entry +; RV32IMZICOND-NEXT: sltiu a2, a1, 33 +; RV32IMZICOND-NEXT: addi a3, a1, -32 +; RV32IMZICOND-NEXT: czero.nez a2, a3, a2 +; RV32IMZICOND-NEXT: addi a2, a2, 32 +; RV32IMZICOND-NEXT: czero.eqz a1, a1, a0 +; RV32IMZICOND-NEXT: czero.nez a0, a2, a0 +; RV32IMZICOND-NEXT: or a0, a1, a0 +; RV32IMZICOND-NEXT: ret +; +; RV64IMZICOND-LABEL: select_umax_3: +; RV64IMZICOND: # %bb.0: # %entry +; RV64IMZICOND-NEXT: sext.w a2, a1 +; RV64IMZICOND-NEXT: sltiu a3, a2, 33 +; RV64IMZICOND-NEXT: addi a2, a2, -32 +; RV64IMZICOND-NEXT: czero.nez a2, a2, a3 +; RV64IMZICOND-NEXT: addi a2, a2, 32 +; RV64IMZICOND-NEXT: czero.eqz a1, a1, a0 +; RV64IMZICOND-NEXT: czero.nez a0, a2, a0 +; RV64IMZICOND-NEXT: or a0, a1, a0 +; RV64IMZICOND-NEXT: ret +; +; RV32IMBOTH-LABEL: select_umax_3: +; RV32IMBOTH: # %bb.0: # %entry +; RV32IMBOTH-NEXT: li a2, 32 +; RV32IMBOTH-NEXT: maxu a2, a1, a2 +; RV32IMBOTH-NEXT: czero.eqz a1, a1, a0 +; RV32IMBOTH-NEXT: czero.nez a0, a2, a0 +; RV32IMBOTH-NEXT: or a0, a1, a0 +; RV32IMBOTH-NEXT: ret +; +; RV64IMBOTH-LABEL: select_umax_3: +; RV64IMBOTH: # %bb.0: # %entry +; RV64IMBOTH-NEXT: sext.w a2, a1 +; RV64IMBOTH-NEXT: li a3, 32 +; RV64IMBOTH-NEXT: maxu a2, a2, a3 +; RV64IMBOTH-NEXT: czero.eqz a1, a1, a0 +; RV64IMBOTH-NEXT: czero.nez a0, a2, a0 +; RV64IMBOTH-NEXT: or a0, a1, a0 +; RV64IMBOTH-NEXT: ret +entry: + %c = call i32 @llvm.umax(i32 %a, i32 32) + %res = select i1 %cond, i32 %a, i32 %c + ret i32 %res +} + +define i32 @select_umax_4(i1 zeroext %cond, i32 %x) { +; RV32IM-LABEL: select_umax_4: +; RV32IM: # %bb.0: +; RV32IM-NEXT: li a2, 128 +; RV32IM-NEXT: bgeu a2, a1, .LBB7_3 +; RV32IM-NEXT: # %bb.1: +; RV32IM-NEXT: beqz a0, .LBB7_4 +; RV32IM-NEXT: .LBB7_2: +; RV32IM-NEXT: mv a0, a2 +; RV32IM-NEXT: ret +; RV32IM-NEXT: .LBB7_3: +; RV32IM-NEXT: li a1, 128 +; RV32IM-NEXT: bnez a0, .LBB7_2 +; RV32IM-NEXT: .LBB7_4: +; RV32IM-NEXT: mv a0, a1 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: select_umax_4: +; RV64IM: # %bb.0: +; RV64IM-NEXT: sext.w a2, a1 +; RV64IM-NEXT: li a1, 128 +; RV64IM-NEXT: bgeu a1, a2, .LBB7_3 +; RV64IM-NEXT: # %bb.1: +; RV64IM-NEXT: beqz a0, .LBB7_4 +; RV64IM-NEXT: .LBB7_2: +; RV64IM-NEXT: mv a0, a1 +; RV64IM-NEXT: ret +; RV64IM-NEXT: .LBB7_3: +; RV64IM-NEXT: li a2, 128 +; RV64IM-NEXT: bnez a0, .LBB7_2 +; RV64IM-NEXT: .LBB7_4: +; RV64IM-NEXT: mv a0, a2 +; RV64IM-NEXT: ret +; +; RV32IMZBB-LABEL: select_umax_4: +; RV32IMZBB: # %bb.0: +; RV32IMZBB-NEXT: mv a2, a0 +; RV32IMZBB-NEXT: li a0, 128 +; RV32IMZBB-NEXT: bnez a2, .LBB7_2 +; RV32IMZBB-NEXT: # %bb.1: +; RV32IMZBB-NEXT: maxu a0, a1, a0 +; RV32IMZBB-NEXT: .LBB7_2: +; RV32IMZBB-NEXT: ret +; +; RV64IMZBB-LABEL: select_umax_4: +; RV64IMZBB: # %bb.0: +; RV64IMZBB-NEXT: mv a2, a0 +; RV64IMZBB-NEXT: li a0, 128 +; RV64IMZBB-NEXT: bnez a2, .LBB7_2 +; RV64IMZBB-NEXT: # %bb.1: +; RV64IMZBB-NEXT: sext.w a1, a1 +; RV64IMZBB-NEXT: maxu a0, a1, a0 +; RV64IMZBB-NEXT: .LBB7_2: +; RV64IMZBB-NEXT: ret +; +; RV32IMZICOND-LABEL: select_umax_4: +; RV32IMZICOND: # %bb.0: +; RV32IMZICOND-NEXT: sltiu a2, a1, 129 +; RV32IMZICOND-NEXT: addi a1, a1, -128 +; RV32IMZICOND-NEXT: czero.nez a1, a1, a2 +; RV32IMZICOND-NEXT: czero.nez a0, a1, a0 +; RV32IMZICOND-NEXT: addi a0, a0, 128 +; RV32IMZICOND-NEXT: ret +; +; RV64IMZICOND-LABEL: select_umax_4: +; RV64IMZICOND: # %bb.0: +; RV64IMZICOND-NEXT: sext.w a1, a1 +; RV64IMZICOND-NEXT: sltiu a2, a1, 129 +; RV64IMZICOND-NEXT: addi a1, a1, -128 +; RV64IMZICOND-NEXT: czero.nez a1, a1, a2 +; RV64IMZICOND-NEXT: czero.nez a0, a1, a0 +; RV64IMZICOND-NEXT: addi a0, a0, 128 +; RV64IMZICOND-NEXT: ret +; +; RV32IMBOTH-LABEL: select_umax_4: +; RV32IMBOTH: # %bb.0: +; RV32IMBOTH-NEXT: li a2, 128 +; RV32IMBOTH-NEXT: maxu a1, a1, a2 +; RV32IMBOTH-NEXT: addi a1, a1, -128 +; RV32IMBOTH-NEXT: czero.nez a0, a1, a0 +; RV32IMBOTH-NEXT: addi a0, a0, 128 +; RV32IMBOTH-NEXT: ret +; +; RV64IMBOTH-LABEL: select_umax_4: +; RV64IMBOTH: # %bb.0: +; RV64IMBOTH-NEXT: sext.w a1, a1 +; RV64IMBOTH-NEXT: li a2, 128 +; RV64IMBOTH-NEXT: maxu a1, a1, a2 +; RV64IMBOTH-NEXT: addi a1, a1, -128 +; RV64IMBOTH-NEXT: czero.nez a0, a1, a0 +; RV64IMBOTH-NEXT: addi a0, a0, 128 +; RV64IMBOTH-NEXT: ret + %minmax = call i32 @llvm.umax(i32 %x, i32 128) + %sel = select i1 %cond, i32 128, i32 %minmax + ret i32 %sel +} + +define i32 @select_smin_1(i1 zeroext %cond, i32 %a, i32 %b) { +; RV32IM-LABEL: select_smin_1: +; RV32IM: # %bb.0: # %entry +; RV32IM-NEXT: bge a1, a2, .LBB8_3 +; RV32IM-NEXT: # %bb.1: # %entry +; RV32IM-NEXT: beqz a0, .LBB8_4 +; RV32IM-NEXT: .LBB8_2: # %entry +; RV32IM-NEXT: mv a0, a1 +; RV32IM-NEXT: ret +; RV32IM-NEXT: .LBB8_3: # %entry +; RV32IM-NEXT: mv a1, a2 +; RV32IM-NEXT: bnez a0, .LBB8_2 +; RV32IM-NEXT: .LBB8_4: # %entry +; RV32IM-NEXT: mv a0, a2 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: select_smin_1: +; RV64IM: # %bb.0: # %entry +; RV64IM-NEXT: sext.w a3, a2 +; RV64IM-NEXT: sext.w a1, a1 +; RV64IM-NEXT: bge a1, a3, .LBB8_3 +; RV64IM-NEXT: # %bb.1: # %entry +; RV64IM-NEXT: beqz a0, .LBB8_4 +; RV64IM-NEXT: .LBB8_2: # %entry +; RV64IM-NEXT: mv a0, a1 +; RV64IM-NEXT: ret +; RV64IM-NEXT: .LBB8_3: # %entry +; RV64IM-NEXT: mv a1, a3 +; RV64IM-NEXT: bnez a0, .LBB8_2 +; RV64IM-NEXT: .LBB8_4: # %entry +; RV64IM-NEXT: mv a0, a2 +; RV64IM-NEXT: ret +; +; RV32IMZBB-LABEL: select_smin_1: +; RV32IMZBB: # %bb.0: # %entry +; RV32IMZBB-NEXT: beqz a0, .LBB8_2 +; RV32IMZBB-NEXT: # %bb.1: +; RV32IMZBB-NEXT: min a2, a1, a2 +; RV32IMZBB-NEXT: .LBB8_2: # %entry +; RV32IMZBB-NEXT: mv a0, a2 +; RV32IMZBB-NEXT: ret +; +; RV64IMZBB-LABEL: select_smin_1: +; RV64IMZBB: # %bb.0: # %entry +; RV64IMZBB-NEXT: beqz a0, .LBB8_2 +; RV64IMZBB-NEXT: # %bb.1: +; RV64IMZBB-NEXT: sext.w a2, a2 +; RV64IMZBB-NEXT: sext.w a1, a1 +; RV64IMZBB-NEXT: min a2, a1, a2 +; RV64IMZBB-NEXT: .LBB8_2: # %entry +; RV64IMZBB-NEXT: mv a0, a2 +; RV64IMZBB-NEXT: ret +; +; RV32IMZICOND-LABEL: select_smin_1: +; RV32IMZICOND: # %bb.0: # %entry +; RV32IMZICOND-NEXT: slt a3, a1, a2 +; RV32IMZICOND-NEXT: czero.nez a4, a2, a3 +; RV32IMZICOND-NEXT: czero.eqz a1, a1, a3 +; RV32IMZICOND-NEXT: or a1, a1, a4 +; RV32IMZICOND-NEXT: czero.eqz a1, a1, a0 +; RV32IMZICOND-NEXT: czero.nez a0, a2, a0 +; RV32IMZICOND-NEXT: or a0, a1, a0 +; RV32IMZICOND-NEXT: ret +; +; RV64IMZICOND-LABEL: select_smin_1: +; RV64IMZICOND: # %bb.0: # %entry +; RV64IMZICOND-NEXT: sext.w a3, a2 +; RV64IMZICOND-NEXT: sext.w a1, a1 +; RV64IMZICOND-NEXT: slt a4, a1, a3 +; RV64IMZICOND-NEXT: czero.nez a3, a3, a4 +; RV64IMZICOND-NEXT: czero.eqz a1, a1, a4 +; RV64IMZICOND-NEXT: or a1, a1, a3 +; RV64IMZICOND-NEXT: czero.eqz a1, a1, a0 +; RV64IMZICOND-NEXT: czero.nez a0, a2, a0 +; RV64IMZICOND-NEXT: or a0, a1, a0 +; RV64IMZICOND-NEXT: ret +; +; RV32IMBOTH-LABEL: select_smin_1: +; RV32IMBOTH: # %bb.0: # %entry +; RV32IMBOTH-NEXT: min a1, a1, a2 +; RV32IMBOTH-NEXT: czero.nez a2, a2, a0 +; RV32IMBOTH-NEXT: czero.eqz a0, a1, a0 +; RV32IMBOTH-NEXT: or a0, a0, a2 +; RV32IMBOTH-NEXT: ret +; +; RV64IMBOTH-LABEL: select_smin_1: +; RV64IMBOTH: # %bb.0: # %entry +; RV64IMBOTH-NEXT: sext.w a3, a2 +; RV64IMBOTH-NEXT: sext.w a1, a1 +; RV64IMBOTH-NEXT: min a1, a1, a3 +; RV64IMBOTH-NEXT: czero.nez a2, a2, a0 +; RV64IMBOTH-NEXT: czero.eqz a0, a1, a0 +; RV64IMBOTH-NEXT: or a0, a0, a2 +; RV64IMBOTH-NEXT: ret +entry: + %c = call i32 @llvm.smin(i32 %a, i32 %b) + %res = select i1 %cond, i32 %c, i32 %b + ret i32 %res +} + +define i32 @select_smin_2(i1 zeroext %cond, i32 %a, i32 %b) { +; RV32IM-LABEL: select_smin_2: +; RV32IM: # %bb.0: # %entry +; RV32IM-NEXT: mv a3, a1 +; RV32IM-NEXT: bge a1, a2, .LBB9_3 +; RV32IM-NEXT: # %bb.1: # %entry +; RV32IM-NEXT: beqz a0, .LBB9_4 +; RV32IM-NEXT: .LBB9_2: # %entry +; RV32IM-NEXT: mv a0, a1 +; RV32IM-NEXT: ret +; RV32IM-NEXT: .LBB9_3: # %entry +; RV32IM-NEXT: mv a3, a2 +; RV32IM-NEXT: bnez a0, .LBB9_2 +; RV32IM-NEXT: .LBB9_4: # %entry +; RV32IM-NEXT: mv a0, a3 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: select_smin_2: +; RV64IM: # %bb.0: # %entry +; RV64IM-NEXT: sext.w a3, a2 +; RV64IM-NEXT: sext.w a2, a1 +; RV64IM-NEXT: bge a2, a3, .LBB9_3 +; RV64IM-NEXT: # %bb.1: # %entry +; RV64IM-NEXT: beqz a0, .LBB9_4 +; RV64IM-NEXT: .LBB9_2: # %entry +; RV64IM-NEXT: mv a0, a1 +; RV64IM-NEXT: ret +; RV64IM-NEXT: .LBB9_3: # %entry +; RV64IM-NEXT: mv a2, a3 +; RV64IM-NEXT: bnez a0, .LBB9_2 +; RV64IM-NEXT: .LBB9_4: # %entry +; RV64IM-NEXT: mv a0, a2 +; RV64IM-NEXT: ret +; +; RV32IMZBB-LABEL: select_smin_2: +; RV32IMZBB: # %bb.0: # %entry +; RV32IMZBB-NEXT: bnez a0, .LBB9_2 +; RV32IMZBB-NEXT: # %bb.1: # %entry +; RV32IMZBB-NEXT: min a1, a1, a2 +; RV32IMZBB-NEXT: .LBB9_2: # %entry +; RV32IMZBB-NEXT: mv a0, a1 +; RV32IMZBB-NEXT: ret +; +; RV64IMZBB-LABEL: select_smin_2: +; RV64IMZBB: # %bb.0: # %entry +; RV64IMZBB-NEXT: bnez a0, .LBB9_2 +; RV64IMZBB-NEXT: # %bb.1: # %entry +; RV64IMZBB-NEXT: sext.w a2, a2 +; RV64IMZBB-NEXT: sext.w a1, a1 +; RV64IMZBB-NEXT: min a1, a1, a2 +; RV64IMZBB-NEXT: .LBB9_2: # %entry +; RV64IMZBB-NEXT: mv a0, a1 +; RV64IMZBB-NEXT: ret +; +; RV32IMZICOND-LABEL: select_smin_2: +; RV32IMZICOND: # %bb.0: # %entry +; RV32IMZICOND-NEXT: slt a3, a1, a2 +; RV32IMZICOND-NEXT: czero.nez a2, a2, a3 +; RV32IMZICOND-NEXT: czero.eqz a3, a1, a3 +; RV32IMZICOND-NEXT: or a2, a3, a2 +; RV32IMZICOND-NEXT: czero.nez a2, a2, a0 +; RV32IMZICOND-NEXT: czero.eqz a0, a1, a0 +; RV32IMZICOND-NEXT: or a0, a0, a2 +; RV32IMZICOND-NEXT: ret +; +; RV64IMZICOND-LABEL: select_smin_2: +; RV64IMZICOND: # %bb.0: # %entry +; RV64IMZICOND-NEXT: sext.w a2, a2 +; RV64IMZICOND-NEXT: sext.w a3, a1 +; RV64IMZICOND-NEXT: slt a4, a3, a2 +; RV64IMZICOND-NEXT: czero.nez a2, a2, a4 +; RV64IMZICOND-NEXT: czero.eqz a3, a3, a4 +; RV64IMZICOND-NEXT: or a2, a3, a2 +; RV64IMZICOND-NEXT: czero.nez a2, a2, a0 +; RV64IMZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64IMZICOND-NEXT: or a0, a0, a2 +; RV64IMZICOND-NEXT: ret +; +; RV32IMBOTH-LABEL: select_smin_2: +; RV32IMBOTH: # %bb.0: # %entry +; RV32IMBOTH-NEXT: min a2, a1, a2 +; RV32IMBOTH-NEXT: czero.eqz a1, a1, a0 +; RV32IMBOTH-NEXT: czero.nez a0, a2, a0 +; RV32IMBOTH-NEXT: or a0, a1, a0 +; RV32IMBOTH-NEXT: ret +; +; RV64IMBOTH-LABEL: select_smin_2: +; RV64IMBOTH: # %bb.0: # %entry +; RV64IMBOTH-NEXT: sext.w a2, a2 +; RV64IMBOTH-NEXT: sext.w a3, a1 +; RV64IMBOTH-NEXT: min a2, a3, a2 +; RV64IMBOTH-NEXT: czero.eqz a1, a1, a0 +; RV64IMBOTH-NEXT: czero.nez a0, a2, a0 +; RV64IMBOTH-NEXT: or a0, a1, a0 +; RV64IMBOTH-NEXT: ret +entry: + %c = call i32 @llvm.smin(i32 %a, i32 %b) + %res = select i1 %cond, i32 %a, i32 %c + ret i32 %res +} + +define i32 @select_smin_3(i1 zeroext %cond, i32 %a) { +; RV32IM-LABEL: select_smin_3: +; RV32IM: # %bb.0: # %entry +; RV32IM-NEXT: li a3, 32 +; RV32IM-NEXT: mv a2, a1 +; RV32IM-NEXT: bge a1, a3, .LBB10_3 +; RV32IM-NEXT: # %bb.1: # %entry +; RV32IM-NEXT: beqz a0, .LBB10_4 +; RV32IM-NEXT: .LBB10_2: # %entry +; RV32IM-NEXT: mv a0, a1 +; RV32IM-NEXT: ret +; RV32IM-NEXT: .LBB10_3: # %entry +; RV32IM-NEXT: li a2, 32 +; RV32IM-NEXT: bnez a0, .LBB10_2 +; RV32IM-NEXT: .LBB10_4: # %entry +; RV32IM-NEXT: mv a0, a2 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: select_smin_3: +; RV64IM: # %bb.0: # %entry +; RV64IM-NEXT: sext.w a2, a1 +; RV64IM-NEXT: li a3, 32 +; RV64IM-NEXT: bge a2, a3, .LBB10_3 +; RV64IM-NEXT: # %bb.1: # %entry +; RV64IM-NEXT: beqz a0, .LBB10_4 +; RV64IM-NEXT: .LBB10_2: # %entry +; RV64IM-NEXT: mv a0, a1 +; RV64IM-NEXT: ret +; RV64IM-NEXT: .LBB10_3: # %entry +; RV64IM-NEXT: li a2, 32 +; RV64IM-NEXT: bnez a0, .LBB10_2 +; RV64IM-NEXT: .LBB10_4: # %entry +; RV64IM-NEXT: mv a0, a2 +; RV64IM-NEXT: ret +; +; RV32IMZBB-LABEL: select_smin_3: +; RV32IMZBB: # %bb.0: # %entry +; RV32IMZBB-NEXT: bnez a0, .LBB10_2 +; RV32IMZBB-NEXT: # %bb.1: # %entry +; RV32IMZBB-NEXT: li a0, 32 +; RV32IMZBB-NEXT: min a1, a1, a0 +; RV32IMZBB-NEXT: .LBB10_2: # %entry +; RV32IMZBB-NEXT: mv a0, a1 +; RV32IMZBB-NEXT: ret +; +; RV64IMZBB-LABEL: select_smin_3: +; RV64IMZBB: # %bb.0: # %entry +; RV64IMZBB-NEXT: bnez a0, .LBB10_2 +; RV64IMZBB-NEXT: # %bb.1: # %entry +; RV64IMZBB-NEXT: sext.w a1, a1 +; RV64IMZBB-NEXT: li a0, 32 +; RV64IMZBB-NEXT: min a1, a1, a0 +; RV64IMZBB-NEXT: .LBB10_2: # %entry +; RV64IMZBB-NEXT: mv a0, a1 +; RV64IMZBB-NEXT: ret +; +; RV32IMZICOND-LABEL: select_smin_3: +; RV32IMZICOND: # %bb.0: # %entry +; RV32IMZICOND-NEXT: slti a2, a1, 32 +; RV32IMZICOND-NEXT: addi a3, a1, -32 +; RV32IMZICOND-NEXT: czero.eqz a2, a3, a2 +; RV32IMZICOND-NEXT: addi a2, a2, 32 +; RV32IMZICOND-NEXT: czero.eqz a1, a1, a0 +; RV32IMZICOND-NEXT: czero.nez a0, a2, a0 +; RV32IMZICOND-NEXT: or a0, a1, a0 +; RV32IMZICOND-NEXT: ret +; +; RV64IMZICOND-LABEL: select_smin_3: +; RV64IMZICOND: # %bb.0: # %entry +; RV64IMZICOND-NEXT: sext.w a2, a1 +; RV64IMZICOND-NEXT: slti a3, a2, 32 +; RV64IMZICOND-NEXT: addi a2, a2, -32 +; RV64IMZICOND-NEXT: czero.eqz a2, a2, a3 +; RV64IMZICOND-NEXT: addi a2, a2, 32 +; RV64IMZICOND-NEXT: czero.eqz a1, a1, a0 +; RV64IMZICOND-NEXT: czero.nez a0, a2, a0 +; RV64IMZICOND-NEXT: or a0, a1, a0 +; RV64IMZICOND-NEXT: ret +; +; RV32IMBOTH-LABEL: select_smin_3: +; RV32IMBOTH: # %bb.0: # %entry +; RV32IMBOTH-NEXT: li a2, 32 +; RV32IMBOTH-NEXT: min a2, a1, a2 +; RV32IMBOTH-NEXT: czero.eqz a1, a1, a0 +; RV32IMBOTH-NEXT: czero.nez a0, a2, a0 +; RV32IMBOTH-NEXT: or a0, a1, a0 +; RV32IMBOTH-NEXT: ret +; +; RV64IMBOTH-LABEL: select_smin_3: +; RV64IMBOTH: # %bb.0: # %entry +; RV64IMBOTH-NEXT: sext.w a2, a1 +; RV64IMBOTH-NEXT: li a3, 32 +; RV64IMBOTH-NEXT: min a2, a2, a3 +; RV64IMBOTH-NEXT: czero.eqz a1, a1, a0 +; RV64IMBOTH-NEXT: czero.nez a0, a2, a0 +; RV64IMBOTH-NEXT: or a0, a1, a0 +; RV64IMBOTH-NEXT: ret +entry: + %c = call i32 @llvm.smin(i32 %a, i32 32) + %res = select i1 %cond, i32 %a, i32 %c + ret i32 %res +} + +define i32 @select_smin_4(i1 zeroext %cond, i32 %x) { +; RV32IM-LABEL: select_smin_4: +; RV32IM: # %bb.0: +; RV32IM-NEXT: li a2, 128 +; RV32IM-NEXT: bge a1, a2, .LBB11_3 +; RV32IM-NEXT: # %bb.1: +; RV32IM-NEXT: beqz a0, .LBB11_4 +; RV32IM-NEXT: .LBB11_2: +; RV32IM-NEXT: mv a0, a2 +; RV32IM-NEXT: ret +; RV32IM-NEXT: .LBB11_3: +; RV32IM-NEXT: li a1, 128 +; RV32IM-NEXT: bnez a0, .LBB11_2 +; RV32IM-NEXT: .LBB11_4: +; RV32IM-NEXT: mv a0, a1 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: select_smin_4: +; RV64IM: # %bb.0: +; RV64IM-NEXT: sext.w a2, a1 +; RV64IM-NEXT: li a1, 128 +; RV64IM-NEXT: bge a2, a1, .LBB11_3 +; RV64IM-NEXT: # %bb.1: +; RV64IM-NEXT: beqz a0, .LBB11_4 +; RV64IM-NEXT: .LBB11_2: +; RV64IM-NEXT: mv a0, a1 +; RV64IM-NEXT: ret +; RV64IM-NEXT: .LBB11_3: +; RV64IM-NEXT: li a2, 128 +; RV64IM-NEXT: bnez a0, .LBB11_2 +; RV64IM-NEXT: .LBB11_4: +; RV64IM-NEXT: mv a0, a2 +; RV64IM-NEXT: ret +; +; RV32IMZBB-LABEL: select_smin_4: +; RV32IMZBB: # %bb.0: +; RV32IMZBB-NEXT: mv a2, a0 +; RV32IMZBB-NEXT: li a0, 128 +; RV32IMZBB-NEXT: bnez a2, .LBB11_2 +; RV32IMZBB-NEXT: # %bb.1: +; RV32IMZBB-NEXT: min a0, a1, a0 +; RV32IMZBB-NEXT: .LBB11_2: +; RV32IMZBB-NEXT: ret +; +; RV64IMZBB-LABEL: select_smin_4: +; RV64IMZBB: # %bb.0: +; RV64IMZBB-NEXT: mv a2, a0 +; RV64IMZBB-NEXT: li a0, 128 +; RV64IMZBB-NEXT: bnez a2, .LBB11_2 +; RV64IMZBB-NEXT: # %bb.1: +; RV64IMZBB-NEXT: sext.w a1, a1 +; RV64IMZBB-NEXT: min a0, a1, a0 +; RV64IMZBB-NEXT: .LBB11_2: +; RV64IMZBB-NEXT: ret +; +; RV32IMZICOND-LABEL: select_smin_4: +; RV32IMZICOND: # %bb.0: +; RV32IMZICOND-NEXT: slti a2, a1, 128 +; RV32IMZICOND-NEXT: addi a1, a1, -128 +; RV32IMZICOND-NEXT: czero.eqz a1, a1, a2 +; RV32IMZICOND-NEXT: czero.nez a0, a1, a0 +; RV32IMZICOND-NEXT: addi a0, a0, 128 +; RV32IMZICOND-NEXT: ret +; +; RV64IMZICOND-LABEL: select_smin_4: +; RV64IMZICOND: # %bb.0: +; RV64IMZICOND-NEXT: sext.w a1, a1 +; RV64IMZICOND-NEXT: slti a2, a1, 128 +; RV64IMZICOND-NEXT: addi a1, a1, -128 +; RV64IMZICOND-NEXT: czero.eqz a1, a1, a2 +; RV64IMZICOND-NEXT: czero.nez a0, a1, a0 +; RV64IMZICOND-NEXT: addi a0, a0, 128 +; RV64IMZICOND-NEXT: ret +; +; RV32IMBOTH-LABEL: select_smin_4: +; RV32IMBOTH: # %bb.0: +; RV32IMBOTH-NEXT: li a2, 128 +; RV32IMBOTH-NEXT: min a1, a1, a2 +; RV32IMBOTH-NEXT: addi a1, a1, -128 +; RV32IMBOTH-NEXT: czero.nez a0, a1, a0 +; RV32IMBOTH-NEXT: addi a0, a0, 128 +; RV32IMBOTH-NEXT: ret +; +; RV64IMBOTH-LABEL: select_smin_4: +; RV64IMBOTH: # %bb.0: +; RV64IMBOTH-NEXT: sext.w a1, a1 +; RV64IMBOTH-NEXT: li a2, 128 +; RV64IMBOTH-NEXT: min a1, a1, a2 +; RV64IMBOTH-NEXT: addi a1, a1, -128 +; RV64IMBOTH-NEXT: czero.nez a0, a1, a0 +; RV64IMBOTH-NEXT: addi a0, a0, 128 +; RV64IMBOTH-NEXT: ret + %minmax = call i32 @llvm.smin(i32 %x, i32 128) + %sel = select i1 %cond, i32 128, i32 %minmax + ret i32 %sel +} + +define i32 @select_smax_1(i1 zeroext %cond, i32 %a, i32 %b) { +; RV32IM-LABEL: select_smax_1: +; RV32IM: # %bb.0: # %entry +; RV32IM-NEXT: bge a2, a1, .LBB12_3 +; RV32IM-NEXT: # %bb.1: # %entry +; RV32IM-NEXT: beqz a0, .LBB12_4 +; RV32IM-NEXT: .LBB12_2: # %entry +; RV32IM-NEXT: mv a0, a1 +; RV32IM-NEXT: ret +; RV32IM-NEXT: .LBB12_3: # %entry +; RV32IM-NEXT: mv a1, a2 +; RV32IM-NEXT: bnez a0, .LBB12_2 +; RV32IM-NEXT: .LBB12_4: # %entry +; RV32IM-NEXT: mv a0, a2 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: select_smax_1: +; RV64IM: # %bb.0: # %entry +; RV64IM-NEXT: sext.w a1, a1 +; RV64IM-NEXT: sext.w a3, a2 +; RV64IM-NEXT: bge a3, a1, .LBB12_3 +; RV64IM-NEXT: # %bb.1: # %entry +; RV64IM-NEXT: beqz a0, .LBB12_4 +; RV64IM-NEXT: .LBB12_2: # %entry +; RV64IM-NEXT: mv a0, a1 +; RV64IM-NEXT: ret +; RV64IM-NEXT: .LBB12_3: # %entry +; RV64IM-NEXT: mv a1, a3 +; RV64IM-NEXT: bnez a0, .LBB12_2 +; RV64IM-NEXT: .LBB12_4: # %entry +; RV64IM-NEXT: mv a0, a2 +; RV64IM-NEXT: ret +; +; RV32IMZBB-LABEL: select_smax_1: +; RV32IMZBB: # %bb.0: # %entry +; RV32IMZBB-NEXT: beqz a0, .LBB12_2 +; RV32IMZBB-NEXT: # %bb.1: +; RV32IMZBB-NEXT: max a2, a1, a2 +; RV32IMZBB-NEXT: .LBB12_2: # %entry +; RV32IMZBB-NEXT: mv a0, a2 +; RV32IMZBB-NEXT: ret +; +; RV64IMZBB-LABEL: select_smax_1: +; RV64IMZBB: # %bb.0: # %entry +; RV64IMZBB-NEXT: beqz a0, .LBB12_2 +; RV64IMZBB-NEXT: # %bb.1: +; RV64IMZBB-NEXT: sext.w a2, a2 +; RV64IMZBB-NEXT: sext.w a1, a1 +; RV64IMZBB-NEXT: max a2, a1, a2 +; RV64IMZBB-NEXT: .LBB12_2: # %entry +; RV64IMZBB-NEXT: mv a0, a2 +; RV64IMZBB-NEXT: ret +; +; RV32IMZICOND-LABEL: select_smax_1: +; RV32IMZICOND: # %bb.0: # %entry +; RV32IMZICOND-NEXT: slt a3, a2, a1 +; RV32IMZICOND-NEXT: czero.nez a4, a2, a3 +; RV32IMZICOND-NEXT: czero.eqz a1, a1, a3 +; RV32IMZICOND-NEXT: or a1, a1, a4 +; RV32IMZICOND-NEXT: czero.eqz a1, a1, a0 +; RV32IMZICOND-NEXT: czero.nez a0, a2, a0 +; RV32IMZICOND-NEXT: or a0, a1, a0 +; RV32IMZICOND-NEXT: ret +; +; RV64IMZICOND-LABEL: select_smax_1: +; RV64IMZICOND: # %bb.0: # %entry +; RV64IMZICOND-NEXT: sext.w a1, a1 +; RV64IMZICOND-NEXT: sext.w a3, a2 +; RV64IMZICOND-NEXT: slt a4, a3, a1 +; RV64IMZICOND-NEXT: czero.nez a3, a3, a4 +; RV64IMZICOND-NEXT: czero.eqz a1, a1, a4 +; RV64IMZICOND-NEXT: or a1, a1, a3 +; RV64IMZICOND-NEXT: czero.eqz a1, a1, a0 +; RV64IMZICOND-NEXT: czero.nez a0, a2, a0 +; RV64IMZICOND-NEXT: or a0, a1, a0 +; RV64IMZICOND-NEXT: ret +; +; RV32IMBOTH-LABEL: select_smax_1: +; RV32IMBOTH: # %bb.0: # %entry +; RV32IMBOTH-NEXT: max a1, a1, a2 +; RV32IMBOTH-NEXT: czero.nez a2, a2, a0 +; RV32IMBOTH-NEXT: czero.eqz a0, a1, a0 +; RV32IMBOTH-NEXT: or a0, a0, a2 +; RV32IMBOTH-NEXT: ret +; +; RV64IMBOTH-LABEL: select_smax_1: +; RV64IMBOTH: # %bb.0: # %entry +; RV64IMBOTH-NEXT: sext.w a3, a2 +; RV64IMBOTH-NEXT: sext.w a1, a1 +; RV64IMBOTH-NEXT: max a1, a1, a3 +; RV64IMBOTH-NEXT: czero.nez a2, a2, a0 +; RV64IMBOTH-NEXT: czero.eqz a0, a1, a0 +; RV64IMBOTH-NEXT: or a0, a0, a2 +; RV64IMBOTH-NEXT: ret +entry: + %c = call i32 @llvm.smax(i32 %a, i32 %b) + %res = select i1 %cond, i32 %c, i32 %b + ret i32 %res +} + +define i32 @select_smax_2(i1 zeroext %cond, i32 %a, i32 %b) { +; RV32IM-LABEL: select_smax_2: +; RV32IM: # %bb.0: # %entry +; RV32IM-NEXT: mv a3, a1 +; RV32IM-NEXT: bge a2, a1, .LBB13_3 +; RV32IM-NEXT: # %bb.1: # %entry +; RV32IM-NEXT: beqz a0, .LBB13_4 +; RV32IM-NEXT: .LBB13_2: # %entry +; RV32IM-NEXT: mv a0, a1 +; RV32IM-NEXT: ret +; RV32IM-NEXT: .LBB13_3: # %entry +; RV32IM-NEXT: mv a3, a2 +; RV32IM-NEXT: bnez a0, .LBB13_2 +; RV32IM-NEXT: .LBB13_4: # %entry +; RV32IM-NEXT: mv a0, a3 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: select_smax_2: +; RV64IM: # %bb.0: # %entry +; RV64IM-NEXT: sext.w a3, a1 +; RV64IM-NEXT: sext.w a2, a2 +; RV64IM-NEXT: bge a2, a3, .LBB13_3 +; RV64IM-NEXT: # %bb.1: # %entry +; RV64IM-NEXT: beqz a0, .LBB13_4 +; RV64IM-NEXT: .LBB13_2: # %entry +; RV64IM-NEXT: mv a0, a1 +; RV64IM-NEXT: ret +; RV64IM-NEXT: .LBB13_3: # %entry +; RV64IM-NEXT: mv a3, a2 +; RV64IM-NEXT: bnez a0, .LBB13_2 +; RV64IM-NEXT: .LBB13_4: # %entry +; RV64IM-NEXT: mv a0, a3 +; RV64IM-NEXT: ret +; +; RV32IMZBB-LABEL: select_smax_2: +; RV32IMZBB: # %bb.0: # %entry +; RV32IMZBB-NEXT: bnez a0, .LBB13_2 +; RV32IMZBB-NEXT: # %bb.1: # %entry +; RV32IMZBB-NEXT: max a1, a1, a2 +; RV32IMZBB-NEXT: .LBB13_2: # %entry +; RV32IMZBB-NEXT: mv a0, a1 +; RV32IMZBB-NEXT: ret +; +; RV64IMZBB-LABEL: select_smax_2: +; RV64IMZBB: # %bb.0: # %entry +; RV64IMZBB-NEXT: bnez a0, .LBB13_2 +; RV64IMZBB-NEXT: # %bb.1: # %entry +; RV64IMZBB-NEXT: sext.w a2, a2 +; RV64IMZBB-NEXT: sext.w a1, a1 +; RV64IMZBB-NEXT: max a1, a1, a2 +; RV64IMZBB-NEXT: .LBB13_2: # %entry +; RV64IMZBB-NEXT: mv a0, a1 +; RV64IMZBB-NEXT: ret +; +; RV32IMZICOND-LABEL: select_smax_2: +; RV32IMZICOND: # %bb.0: # %entry +; RV32IMZICOND-NEXT: slt a3, a2, a1 +; RV32IMZICOND-NEXT: czero.nez a2, a2, a3 +; RV32IMZICOND-NEXT: czero.eqz a3, a1, a3 +; RV32IMZICOND-NEXT: or a2, a3, a2 +; RV32IMZICOND-NEXT: czero.nez a2, a2, a0 +; RV32IMZICOND-NEXT: czero.eqz a0, a1, a0 +; RV32IMZICOND-NEXT: or a0, a0, a2 +; RV32IMZICOND-NEXT: ret +; +; RV64IMZICOND-LABEL: select_smax_2: +; RV64IMZICOND: # %bb.0: # %entry +; RV64IMZICOND-NEXT: sext.w a3, a1 +; RV64IMZICOND-NEXT: sext.w a2, a2 +; RV64IMZICOND-NEXT: slt a4, a2, a3 +; RV64IMZICOND-NEXT: czero.nez a2, a2, a4 +; RV64IMZICOND-NEXT: czero.eqz a3, a3, a4 +; RV64IMZICOND-NEXT: or a2, a3, a2 +; RV64IMZICOND-NEXT: czero.nez a2, a2, a0 +; RV64IMZICOND-NEXT: czero.eqz a0, a1, a0 +; RV64IMZICOND-NEXT: or a0, a0, a2 +; RV64IMZICOND-NEXT: ret +; +; RV32IMBOTH-LABEL: select_smax_2: +; RV32IMBOTH: # %bb.0: # %entry +; RV32IMBOTH-NEXT: max a2, a1, a2 +; RV32IMBOTH-NEXT: czero.eqz a1, a1, a0 +; RV32IMBOTH-NEXT: czero.nez a0, a2, a0 +; RV32IMBOTH-NEXT: or a0, a1, a0 +; RV32IMBOTH-NEXT: ret +; +; RV64IMBOTH-LABEL: select_smax_2: +; RV64IMBOTH: # %bb.0: # %entry +; RV64IMBOTH-NEXT: sext.w a2, a2 +; RV64IMBOTH-NEXT: sext.w a3, a1 +; RV64IMBOTH-NEXT: max a2, a3, a2 +; RV64IMBOTH-NEXT: czero.eqz a1, a1, a0 +; RV64IMBOTH-NEXT: czero.nez a0, a2, a0 +; RV64IMBOTH-NEXT: or a0, a1, a0 +; RV64IMBOTH-NEXT: ret +entry: + %c = call i32 @llvm.smax(i32 %a, i32 %b) + %res = select i1 %cond, i32 %a, i32 %c + ret i32 %res +} + +define i32 @select_smax_3(i1 zeroext %cond, i32 %a) { +; RV32IM-LABEL: select_smax_3: +; RV32IM: # %bb.0: # %entry +; RV32IM-NEXT: li a3, 32 +; RV32IM-NEXT: mv a2, a1 +; RV32IM-NEXT: bge a3, a1, .LBB14_3 +; RV32IM-NEXT: # %bb.1: # %entry +; RV32IM-NEXT: beqz a0, .LBB14_4 +; RV32IM-NEXT: .LBB14_2: # %entry +; RV32IM-NEXT: mv a0, a1 +; RV32IM-NEXT: ret +; RV32IM-NEXT: .LBB14_3: # %entry +; RV32IM-NEXT: li a2, 32 +; RV32IM-NEXT: bnez a0, .LBB14_2 +; RV32IM-NEXT: .LBB14_4: # %entry +; RV32IM-NEXT: mv a0, a2 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: select_smax_3: +; RV64IM: # %bb.0: # %entry +; RV64IM-NEXT: sext.w a2, a1 +; RV64IM-NEXT: li a3, 32 +; RV64IM-NEXT: bge a3, a2, .LBB14_3 +; RV64IM-NEXT: # %bb.1: # %entry +; RV64IM-NEXT: beqz a0, .LBB14_4 +; RV64IM-NEXT: .LBB14_2: # %entry +; RV64IM-NEXT: mv a0, a1 +; RV64IM-NEXT: ret +; RV64IM-NEXT: .LBB14_3: # %entry +; RV64IM-NEXT: li a2, 32 +; RV64IM-NEXT: bnez a0, .LBB14_2 +; RV64IM-NEXT: .LBB14_4: # %entry +; RV64IM-NEXT: mv a0, a2 +; RV64IM-NEXT: ret +; +; RV32IMZBB-LABEL: select_smax_3: +; RV32IMZBB: # %bb.0: # %entry +; RV32IMZBB-NEXT: bnez a0, .LBB14_2 +; RV32IMZBB-NEXT: # %bb.1: # %entry +; RV32IMZBB-NEXT: li a0, 32 +; RV32IMZBB-NEXT: max a1, a1, a0 +; RV32IMZBB-NEXT: .LBB14_2: # %entry +; RV32IMZBB-NEXT: mv a0, a1 +; RV32IMZBB-NEXT: ret +; +; RV64IMZBB-LABEL: select_smax_3: +; RV64IMZBB: # %bb.0: # %entry +; RV64IMZBB-NEXT: bnez a0, .LBB14_2 +; RV64IMZBB-NEXT: # %bb.1: # %entry +; RV64IMZBB-NEXT: sext.w a1, a1 +; RV64IMZBB-NEXT: li a0, 32 +; RV64IMZBB-NEXT: max a1, a1, a0 +; RV64IMZBB-NEXT: .LBB14_2: # %entry +; RV64IMZBB-NEXT: mv a0, a1 +; RV64IMZBB-NEXT: ret +; +; RV32IMZICOND-LABEL: select_smax_3: +; RV32IMZICOND: # %bb.0: # %entry +; RV32IMZICOND-NEXT: slti a2, a1, 33 +; RV32IMZICOND-NEXT: addi a3, a1, -32 +; RV32IMZICOND-NEXT: czero.nez a2, a3, a2 +; RV32IMZICOND-NEXT: addi a2, a2, 32 +; RV32IMZICOND-NEXT: czero.eqz a1, a1, a0 +; RV32IMZICOND-NEXT: czero.nez a0, a2, a0 +; RV32IMZICOND-NEXT: or a0, a1, a0 +; RV32IMZICOND-NEXT: ret +; +; RV64IMZICOND-LABEL: select_smax_3: +; RV64IMZICOND: # %bb.0: # %entry +; RV64IMZICOND-NEXT: sext.w a2, a1 +; RV64IMZICOND-NEXT: slti a3, a2, 33 +; RV64IMZICOND-NEXT: addi a2, a2, -32 +; RV64IMZICOND-NEXT: czero.nez a2, a2, a3 +; RV64IMZICOND-NEXT: addi a2, a2, 32 +; RV64IMZICOND-NEXT: czero.eqz a1, a1, a0 +; RV64IMZICOND-NEXT: czero.nez a0, a2, a0 +; RV64IMZICOND-NEXT: or a0, a1, a0 +; RV64IMZICOND-NEXT: ret +; +; RV32IMBOTH-LABEL: select_smax_3: +; RV32IMBOTH: # %bb.0: # %entry +; RV32IMBOTH-NEXT: li a2, 32 +; RV32IMBOTH-NEXT: max a2, a1, a2 +; RV32IMBOTH-NEXT: czero.eqz a1, a1, a0 +; RV32IMBOTH-NEXT: czero.nez a0, a2, a0 +; RV32IMBOTH-NEXT: or a0, a1, a0 +; RV32IMBOTH-NEXT: ret +; +; RV64IMBOTH-LABEL: select_smax_3: +; RV64IMBOTH: # %bb.0: # %entry +; RV64IMBOTH-NEXT: sext.w a2, a1 +; RV64IMBOTH-NEXT: li a3, 32 +; RV64IMBOTH-NEXT: max a2, a2, a3 +; RV64IMBOTH-NEXT: czero.eqz a1, a1, a0 +; RV64IMBOTH-NEXT: czero.nez a0, a2, a0 +; RV64IMBOTH-NEXT: or a0, a1, a0 +; RV64IMBOTH-NEXT: ret +entry: + %c = call i32 @llvm.smax(i32 %a, i32 32) + %res = select i1 %cond, i32 %a, i32 %c + ret i32 %res +} + +define i32 @select_smax_4(i1 zeroext %cond, i32 %x) { +; RV32IM-LABEL: select_smax_4: +; RV32IM: # %bb.0: +; RV32IM-NEXT: li a2, 128 +; RV32IM-NEXT: bge a2, a1, .LBB15_3 +; RV32IM-NEXT: # %bb.1: +; RV32IM-NEXT: beqz a0, .LBB15_4 +; RV32IM-NEXT: .LBB15_2: +; RV32IM-NEXT: mv a0, a2 +; RV32IM-NEXT: ret +; RV32IM-NEXT: .LBB15_3: +; RV32IM-NEXT: li a1, 128 +; RV32IM-NEXT: bnez a0, .LBB15_2 +; RV32IM-NEXT: .LBB15_4: +; RV32IM-NEXT: mv a0, a1 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: select_smax_4: +; RV64IM: # %bb.0: +; RV64IM-NEXT: sext.w a2, a1 +; RV64IM-NEXT: li a1, 128 +; RV64IM-NEXT: bge a1, a2, .LBB15_3 +; RV64IM-NEXT: # %bb.1: +; RV64IM-NEXT: beqz a0, .LBB15_4 +; RV64IM-NEXT: .LBB15_2: +; RV64IM-NEXT: mv a0, a1 +; RV64IM-NEXT: ret +; RV64IM-NEXT: .LBB15_3: +; RV64IM-NEXT: li a2, 128 +; RV64IM-NEXT: bnez a0, .LBB15_2 +; RV64IM-NEXT: .LBB15_4: +; RV64IM-NEXT: mv a0, a2 +; RV64IM-NEXT: ret +; +; RV32IMZBB-LABEL: select_smax_4: +; RV32IMZBB: # %bb.0: +; RV32IMZBB-NEXT: mv a2, a0 +; RV32IMZBB-NEXT: li a0, 128 +; RV32IMZBB-NEXT: bnez a2, .LBB15_2 +; RV32IMZBB-NEXT: # %bb.1: +; RV32IMZBB-NEXT: max a0, a1, a0 +; RV32IMZBB-NEXT: .LBB15_2: +; RV32IMZBB-NEXT: ret +; +; RV64IMZBB-LABEL: select_smax_4: +; RV64IMZBB: # %bb.0: +; RV64IMZBB-NEXT: mv a2, a0 +; RV64IMZBB-NEXT: li a0, 128 +; RV64IMZBB-NEXT: bnez a2, .LBB15_2 +; RV64IMZBB-NEXT: # %bb.1: +; RV64IMZBB-NEXT: sext.w a1, a1 +; RV64IMZBB-NEXT: max a0, a1, a0 +; RV64IMZBB-NEXT: .LBB15_2: +; RV64IMZBB-NEXT: ret +; +; RV32IMZICOND-LABEL: select_smax_4: +; RV32IMZICOND: # %bb.0: +; RV32IMZICOND-NEXT: slti a2, a1, 129 +; RV32IMZICOND-NEXT: addi a1, a1, -128 +; RV32IMZICOND-NEXT: czero.nez a1, a1, a2 +; RV32IMZICOND-NEXT: czero.nez a0, a1, a0 +; RV32IMZICOND-NEXT: addi a0, a0, 128 +; RV32IMZICOND-NEXT: ret +; +; RV64IMZICOND-LABEL: select_smax_4: +; RV64IMZICOND: # %bb.0: +; RV64IMZICOND-NEXT: sext.w a1, a1 +; RV64IMZICOND-NEXT: slti a2, a1, 129 +; RV64IMZICOND-NEXT: addi a1, a1, -128 +; RV64IMZICOND-NEXT: czero.nez a1, a1, a2 +; RV64IMZICOND-NEXT: czero.nez a0, a1, a0 +; RV64IMZICOND-NEXT: addi a0, a0, 128 +; RV64IMZICOND-NEXT: ret +; +; RV32IMBOTH-LABEL: select_smax_4: +; RV32IMBOTH: # %bb.0: +; RV32IMBOTH-NEXT: li a2, 128 +; RV32IMBOTH-NEXT: max a1, a1, a2 +; RV32IMBOTH-NEXT: addi a1, a1, -128 +; RV32IMBOTH-NEXT: czero.nez a0, a1, a0 +; RV32IMBOTH-NEXT: addi a0, a0, 128 +; RV32IMBOTH-NEXT: ret +; +; RV64IMBOTH-LABEL: select_smax_4: +; RV64IMBOTH: # %bb.0: +; RV64IMBOTH-NEXT: sext.w a1, a1 +; RV64IMBOTH-NEXT: li a2, 128 +; RV64IMBOTH-NEXT: max a1, a1, a2 +; RV64IMBOTH-NEXT: addi a1, a1, -128 +; RV64IMBOTH-NEXT: czero.nez a0, a1, a0 +; RV64IMBOTH-NEXT: addi a0, a0, 128 +; RV64IMBOTH-NEXT: ret + %minmax = call i32 @llvm.smax(i32 %x, i32 128) + %sel = select i1 %cond, i32 128, i32 %minmax + ret i32 %sel +} diff --git a/llvm/test/FileCheck/var-scope.txt b/llvm/test/FileCheck/var-scope.txt index 9b3ea0e..b65eddb6 100644 --- a/llvm/test/FileCheck/var-scope.txt +++ b/llvm/test/FileCheck/var-scope.txt @@ -3,15 +3,15 @@ ; Reference run: variables remain defined at all time when not using ; --enable-var-scope option. -RUN: FileCheck --check-prefixes CHECK,LOCAL3,GLOBAL --input-file %s %s +RUN: FileCheck --check-prefixes CHECK,CHECK-LOCAL-BOTH,CHECK-GLOBAL --input-file %s %s -RUN: FileCheck --check-prefixes CHECK,GLOBAL --enable-var-scope --input-file %s %s -RUN: %ProtectFileCheckOutput not FileCheck --check-prefixes CHECK,LOCAL1 --enable-var-scope --input-file %s %s 2>&1 \ -RUN: | FileCheck --check-prefix ERRUNDEFLOCAL %s -RUN: %ProtectFileCheckOutput not FileCheck --check-prefixes CHECK,LOCAL2 --enable-var-scope --input-file %s %s 2>&1 \ -RUN: | FileCheck --check-prefix ERRUNDEFLOCNUM %s -RUN: %ProtectFileCheckOutput not FileCheck --check-prefixes CHECK,LOCAL3 --enable-var-scope --input-file %s %s 2>&1 \ -RUN: | FileCheck --check-prefixes ERRUNDEFLOCAL,ERRUNDEFLOCNUM %s +RUN: FileCheck --check-prefixes CHECK,CHECK-GLOBAL --enable-var-scope --input-file %s %s +RUN: %ProtectFileCheckOutput not FileCheck --check-prefixes CHECK,CHECK-LOCAL-TEXT --enable-var-scope --input-file %s %s 2>&1 \ +RUN: | FileCheck --implicit-check-not "undefined variable:" --check-prefixes ERRUNDEF,ERRUNDEF-LOCAL %s +RUN: %ProtectFileCheckOutput not FileCheck --check-prefixes CHECK,CHECK-LOCAL-NUM --enable-var-scope --input-file %s %s 2>&1 \ +RUN: | FileCheck --implicit-check-not "undefined variable:" --check-prefixes ERRUNDEF,ERRUNDEF-LOCNUM %s +RUN: %ProtectFileCheckOutput not FileCheck --check-prefixes CHECK,CHECK-LOCAL-BOTH --enable-var-scope --input-file %s %s 2>&1 \ +RUN: | FileCheck --implicit-check-not "undefined variable:" --check-prefixes ERRUNDEF,ERRUNDEF-LOCAL,ERRUNDEF-LOCNUM %s local1 global1 @@ -23,15 +23,47 @@ global2 CHECK: [[LOCAL]][[#LOCNUM+1]] CHECK: [[$GLOBAL]][[#$GLOBNUM+1]] -barrier: -CHECK-LABEL: barrier +// Barrier to clear local variables +barrier1: +CHECK-LABEL: barrier1 local3 global3 -LOCAL1: [[LOCAL]]3 -LOCAL2: local[[#LOCNUM+2]] -LOCAL3: [[LOCAL]][[#LOCNUM+2]] -GLOBAL: [[$GLOBAL]][[#$GLOBNUM+2]] +CHECK-LOCAL-TEXT: [[LOCAL]]3 +CHECK-LOCAL-NUM: local[[#LOCNUM+2]] +CHECK-LOCAL-BOTH: [[LOCAL]][[#LOCNUM+2]] +CHECK-GLOBAL: [[$GLOBAL]][[#$GLOBNUM+2]] -ERRUNDEFLOCAL: undefined variable: LOCAL -ERRUNDEFLOCNUM: undefined variable: LOCNUM +// Barrier to continue FileCheck execution even after the first fail +barrier2: +CHECK-LABEL: barrier2 + +// Reassign the variables to check that clearing-after-reassigning works +local4 +global4 +CHECK: [[LOCAL:loc[^[:digit:]]*]][[#LOCNUM:]] +CHECK: [[$GLOBAL:glo[^[:digit:]]*]][[#$GLOBNUM:]] + +// Barrier to clear local variables +barrier3: +CHECK-LABEL: barrier3 + +local5 +global5 +CHECK-LOCAL-TEXT: [[LOCAL]]5 +CHECK-LOCAL-NUM: local[[#LOCNUM+1]] +CHECK-LOCAL-BOTH: [[LOCAL]][[#LOCNUM+1]] +CHECK-GLOBAL: [[$GLOBAL]][[#$GLOBNUM+1]] + + +// Check that the tests fail as expected +ERRUNDEF-LOCAL: undefined variable: LOCAL +ERRUNDEF-LOCNUM: undefined variable: LOCNUM +ERRUNDEF-LOCAL: undefined variable: LOCAL +ERRUNDEF-LOCNUM: undefined variable: LOCNUM + +// Look for "Input was:" to only match the error messages before the input-context. +// +// The regex /([[:space:]]|.)*/ matches all remaining characters, +// to avoid fails due to --implicit-check-not +ERRUNDEF: {{^Input was:([[:space:]]|.)*}} diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll b/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll index 9181cce6..9ee5484 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll @@ -97,14 +97,12 @@ define i8 @pr141968(i1 %cond, i8 %v) { ; CHECK: [[PRED_SDIV_IF29]]: ; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE30]] ; CHECK: [[PRED_SDIV_CONTINUE30]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT31:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT32:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT31]], <16 x i8> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[BROADCAST_SPLAT]], <16 x i8> zeroinitializer, <16 x i8> [[BROADCAST_SPLAT32]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i1> [[BROADCAST_SPLAT]], i32 0 +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP18]], i8 0, i8 [[V]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 15 ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] @@ -121,7 +119,7 @@ define i8 @pr141968(i1 %cond, i8 %v) { ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i8 [[IV_NEXT]], 0 ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RET_LCSSA:%.*]] = phi i8 [ [[RET]], %[[LOOP_LATCH]] ], [ [[TMP18]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RET_LCSSA:%.*]] = phi i8 [ [[RET]], %[[LOOP_LATCH]] ], [ [[PREDPHI]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i8 [[RET_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp.ll index fe7d725..0c3a7c6 100644 --- a/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp.ll +++ b/llvm/test/Transforms/PreISelIntrinsicLowering/expand-vp.ll @@ -69,6 +69,7 @@ define void @test_vp_int_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x i32> %i2, <8 x i3 %rF = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) %r10 = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) %r11 = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %i0, <8 x i32> %i1, i32 %n) + %r12 = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> %m, <8 x i32> %i0, <8 x i32> %i1, i32 %n) ret void } @@ -113,6 +114,7 @@ define void @test_vp_int_vscale(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, %rF = call <vscale x 4 x i32> @llvm.vp.lshr.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n) %r10 = call <vscale x 4 x i32> @llvm.vp.shl.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n) %r11 = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, i32 %n) + %r12 = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, i32 %n) ret void } @@ -325,6 +327,7 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x ; LEGAL_LEGAL-NEXT: %rF = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) ; LEGAL_LEGAL-NEXT: %r10 = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n) ; LEGAL_LEGAL-NEXT: %r11 = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %i0, <8 x i32> %i1, i32 %n) +; LEGAL_LEGAL-NEXT: %r12 = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> %m, <8 x i32> %i0, <8 x i32> %i1, i32 %n) ; LEGAL_LEGAL-NEXT: ret void ; LEGAL_LEGAL:define void @test_vp_int_vscale(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i32> %i2, <vscale x 4 x i32> %f3, <vscale x 4 x i1> %m, i32 %n) { @@ -346,6 +349,7 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x ; LEGAL_LEGAL-NEXT: %rF = call <vscale x 4 x i32> @llvm.vp.lshr.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n) ; LEGAL_LEGAL-NEXT: %r10 = call <vscale x 4 x i32> @llvm.vp.shl.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n) ; LEGAL_LEGAL-NEXT: %r11 = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, i32 %n) +; LEGAL_LEGAL-NEXT: %r12 = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, i32 %n) ; LEGAL_LEGAL-NEXT: ret void ; LEGAL_LEGAL: define void @test_vp_reduce_int_v4(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n) { @@ -424,6 +428,7 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x ; DISCARD_LEGAL-NEXT: [[EVLMASK2:%.+]] = icmp ult <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[NSPLAT2]] ; DISCARD_LEGAL-NEXT: [[NEWMASK2:%.+]] = and <8 x i1> [[EVLMASK2]], %m ; DISCARD_LEGAL-NEXT: %r11 = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> [[NEWMASK2]], <8 x i32> %i0, <8 x i32> %i1, i32 8) +; DISCARD_LEGAL-NEXT: %r12 = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> %m, <8 x i32> %i0, <8 x i32> %i1, i32 8) ; DISCARD_LEGAL-NEXT: ret void ; TODO compute vscale only once and use caching. @@ -441,6 +446,7 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x ; DISCARD_LEGAL: %r3 = call <vscale x 4 x i32> @llvm.vp.sdiv.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> [[NEWM]], i32 %scalable_size{{.*}}) ; DISCARD_LEGAL-NOT: %{{.+}} = call <vscale x 4 x i32> @llvm.vp.{{.*}}, i32 %n) ; DISCARD_LEGAL: %r11 = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> %{{.*}}, <vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, i32 %scalable_size{{.*}}) +; DISCARD_LEGAL: %r12 = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, i32 %scalable_size{{.*}}) ; DISCARD_LEGAL-NEXT: ret void ; DISCARD_LEGAL: define void @test_vp_reduce_int_v4(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n) { @@ -514,6 +520,7 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x ; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8) ; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8) ; CONVERT_LEGAL: %r11 = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %{{.*}}, <8 x i32> %i0, <8 x i32> %i1, i32 8) +; CONVERT_LEGAL: %r12 = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> %{{.*}}, <8 x i32> %i0, <8 x i32> %i1, i32 8) ; CONVERT_LEGAL: ret void ; Similar to %evl discard, %mask legal but make sure the first VP intrinsic has a legal expansion @@ -525,6 +532,7 @@ define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x ; CONVERT_LEGAL-NEXT: %r0 = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> [[NEWM]], i32 %scalable_size) ; CONVERT_LEGAL-NOT: %{{.*}} = call <vscale x 4 x i32> @llvm.vp.{{.*}}, i32 %n) ; CONVERT_LEGAL: %r11 = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> %{{.*}}, <vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, i32 %scalable_size{{.*}}) +; CONVERT_LEGAL: %r12 = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %{{.*}}, <vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, i32 %scalable_size{{.*}}) ; CONVERT_LEGAL: ret void ; CONVERT_LEGAL: define void @test_vp_reduce_int_v4(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n) { |