aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/docs/DirectX/DXContainer.rst24
-rw-r--r--llvm/include/llvm/CodeGen/TargetLowering.h17
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h6
-rw-r--r--llvm/include/llvm/IR/IntrinsicsAArch64.td10
-rw-r--r--llvm/include/llvm/Support/Jobserver.h162
-rw-r--r--llvm/include/llvm/Support/ThreadPool.h4
-rw-r--r--llvm/include/llvm/Support/Threading.h18
-rw-r--r--llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h6
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp7
-rw-r--r--llvm/lib/Support/CMakeLists.txt1
-rw-r--r--llvm/lib/Support/Jobserver.cpp259
-rw-r--r--llvm/lib/Support/Parallel.cpp98
-rw-r--r--llvm/lib/Support/ThreadPool.cpp108
-rw-r--r--llvm/lib/Support/Threading.cpp5
-rw-r--r--llvm/lib/Support/Unix/Jobserver.inc195
-rw-r--r--llvm/lib/Support/Windows/Jobserver.inc79
-rw-r--r--llvm/lib/TableGen/Error.cpp58
-rw-r--r--llvm/lib/TableGen/Main.cpp4
-rw-r--r--llvm/lib/TableGen/Record.cpp6
-rw-r--r--llvm/lib/TableGen/TGParser.cpp6
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp15
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrFormats.td104
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp4
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp3
-rw-r--r--llvm/lib/Target/AArch64/SMEInstrFormats.td12
-rw-r--r--llvm/lib/Target/AArch64/SVEInstrFormats.td106
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp20
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp25
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h5
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp26
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h1
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp4
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVGISel.td12
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp6
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp4
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td4
-rw-r--r--llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp4
-rw-r--r--llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp16
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp25
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp10
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp10
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp20
-rw-r--r--llvm/lib/Transforms/Scalar/NewGVN.cpp4
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp7
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll4
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-sextinreg.mir3
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-vashr-vlshr.mir9
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/select-neon-vcvtfxu2fp.mir3
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir6
-rw-r--r--llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll6
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll204
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll12
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-vabs.ll24
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-vadd.ll341
-rw-r--r--llvm/test/CodeGen/AArch64/combine-sdiv.ll137
-rw-r--r--llvm/test/CodeGen/AArch64/extract-vector-elt.ll4
-rw-r--r--llvm/test/CodeGen/AArch64/fcmp.ll18
-rw-r--r--llvm/test/CodeGen/AArch64/fpclamptosat.ll55
-rw-r--r--llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll101
-rw-r--r--llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll12
-rw-r--r--llvm/test/CodeGen/AArch64/neon-compare-instructions.ll113
-rw-r--r--llvm/test/CodeGen/AArch64/neon-shift-left-long.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/select_cc.ll4
-rw-r--r--llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll8
-rw-r--r--llvm/test/CodeGen/AMDGPU/amdgpu-attributor-flat-scratch-init-asan.ll24
-rw-r--r--llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/true16-fold.mir25
-rw-r--r--llvm/test/CodeGen/ARM/fpclamptosat.ll48
-rw-r--r--llvm/test/CodeGen/ARM/fpclamptosat_vec.ll107
-rw-r--r--llvm/test/CodeGen/Hexagon/inst_setcc_uno_uo.ll93
-rw-r--r--llvm/test/CodeGen/Hexagon/isel-fold-shl-zext.ll12
-rw-r--r--llvm/test/CodeGen/RISCV/fpclamptosat.ll58
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll202
-rw-r--r--llvm/test/CodeGen/SPIRV/pointers/ptrcast-bitcast.ll28
-rw-r--r--llvm/test/CodeGen/WebAssembly/fpclamptosat.ll89
-rw-r--r--llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll78
-rw-r--r--llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll6
-rw-r--r--llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll6
-rw-r--r--llvm/test/CodeGen/X86/fpclamptosat.ll45
-rw-r--r--llvm/test/CodeGen/X86/fpclamptosat_vec.ll105
-rw-r--r--llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll8
-rw-r--r--llvm/test/CodeGen/X86/vec-strict-cmp-128.ll32
-rw-r--r--llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll10
-rw-r--r--llvm/test/CodeGen/X86/vector-fshl-sub128.ll10
-rw-r--r--llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll10
-rw-r--r--llvm/test/CodeGen/X86/vector-fshr-sub128.ll10
-rw-r--r--llvm/test/CodeGen/X86/vector-sext.ll4
-rw-r--r--llvm/test/CodeGen/X86/vector-zext.ll6
-rw-r--r--llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll8
-rw-r--r--llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll8
-rw-r--r--llvm/test/Transforms/InstCombine/masked_intrinsics.ll58
-rw-r--r--llvm/test/Transforms/InstCombine/pr83947.ll2
-rw-r--r--llvm/test/Transforms/InstCombine/select-and-cmp.ll44
-rw-r--r--llvm/test/Transforms/InstCombine/select-or-cmp.ll50
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll126
-rw-r--r--llvm/test/Transforms/NewGVN/pr159918.ll21
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll101
-rw-r--r--llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp6
-rw-r--r--llvm/unittests/Analysis/IR2VecTest.cpp4
-rw-r--r--llvm/unittests/Support/CMakeLists.txt1
-rw-r--r--llvm/unittests/Support/JobserverTest.cpp442
-rw-r--r--llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp6
-rw-r--r--llvm/utils/TableGen/Common/CodeGenRegisters.cpp11
-rw-r--r--llvm/utils/TableGen/Common/InfoByHwMode.cpp8
-rw-r--r--llvm/utils/TableGen/Common/PredicateExpander.cpp4
-rw-r--r--llvm/utils/TableGen/DXILEmitter.cpp22
-rw-r--r--llvm/utils/TableGen/DecoderEmitter.cpp4
-rw-r--r--llvm/utils/TableGen/ExegesisEmitter.cpp22
-rw-r--r--llvm/utils/TableGen/FastISelEmitter.cpp6
-rw-r--r--llvm/utils/TableGen/X86DisassemblerShared.h4
-rw-r--r--llvm/utils/TableGen/X86FoldTablesEmitter.cpp18
-rw-r--r--llvm/utils/TableGen/X86InstrMappingEmitter.cpp4
-rw-r--r--llvm/utils/TableGen/X86MnemonicTables.cpp3
-rw-r--r--llvm/utils/TableGen/X86ModRMFilters.h8
-rw-r--r--llvm/utils/TableGen/X86RecognizableInstr.h4
-rw-r--r--llvm/utils/gn/secondary/compiler-rt/lib/builtins/sources.gni2
119 files changed, 3314 insertions, 1297 deletions
diff --git a/llvm/docs/DirectX/DXContainer.rst b/llvm/docs/DirectX/DXContainer.rst
index 17452d9..4473f4e 100644
--- a/llvm/docs/DirectX/DXContainer.rst
+++ b/llvm/docs/DirectX/DXContainer.rst
@@ -530,7 +530,7 @@ but adds a 32-bit access flag.
.. code-block:: c
struct DescriptorRange_V1_0 {
- uint32_t RangeType;
+ dxil::ResourceClass RangeType;
uint32_t NumDescriptors;
uint32_t BaseShaderRegister;
uint32_t RegisterSpace;
@@ -538,12 +538,12 @@ but adds a 32-bit access flag.
};
struct DescriptorRange_V1_1 {
- dxbc::DescriptorRangeType RangeType;
+ dxil::ResourceClass RangeType;
uint32_t NumDescriptors;
uint32_t BaseShaderRegister;
uint32_t RegisterSpace;
- uint32_t OffsetInDescriptorsFromTableStart;
uint32_t Flags;
+ uint32_t OffsetInDescriptorsFromTableStart;
};
Static Samplers
@@ -556,22 +556,26 @@ This section also has a variable size, since it can contain multiple static
samplers definitions. However, the definition is a fixed sized struct,
containing 13 32-byte fields of various enum, float, and integer values.
+In version 1.2, the static sampler is 17 bytes. It matches the 1.0 static sampler
+but adds a 32-bit access flag. In Version 1.1, it matches static sampler
+version 1.0.
+
.. code-block:: c
struct StaticSamplerDesc {
- FilterMode Filter;
- TextureAddressMode AddressU;
- TextureAddressMode AddressV;
- TextureAddressMode AddressW;
+ dxbc::FilterMode Filter;
+ dxbc::TextureAddressMode AddressU;
+ dxbc::TextureAddressMode AddressV;
+ dxbc::TextureAddressMode AddressW;
float MipLODBias;
uint32_t MaxAnisotropy;
- ComparisonFunc ComparisonFunc;
- StaticBorderColor BorderColor;
+ dxbc::ComparisonFunc ComparisonFunc;
+ dxbc::StaticBorderColor BorderColor;
float MinLOD;
float MaxLOD;
uint32_t ShaderRegister;
uint32_t RegisterSpace;
- ShaderVisibility ShaderVisibility;
+ dxbc::ShaderVisibility ShaderVisibility;
};
SFI0 Part
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 7bbad17..88691b9 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -4654,23 +4654,6 @@ public:
return false;
}
- /// Allows the target to handle physreg-carried dependency
- /// in target-specific way. Used from the ScheduleDAGSDNodes to decide whether
- /// to add the edge to the dependency graph.
- /// Def - input: Selection DAG node defininfg physical register
- /// User - input: Selection DAG node using physical register
- /// Op - input: Number of User operand
- /// PhysReg - inout: set to the physical register if the edge is
- /// necessary, unchanged otherwise
- /// Cost - inout: physical register copy cost.
- /// Returns 'true' is the edge is necessary, 'false' otherwise
- virtual bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
- const TargetRegisterInfo *TRI,
- const TargetInstrInfo *TII,
- MCRegister &PhysReg, int &Cost) const {
- return false;
- }
-
/// Target-specific combining of register parts into its original value
virtual SDValue
joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL,
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 0a11617..5331cb5 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -4001,15 +4001,17 @@ public:
/// Keeps track of value of iteration variable for input/scan loop to be
/// used for Scan directive lowering
- llvm::Value *IV;
+ llvm::Value *IV = nullptr;
/// Stores the span of canonical loop being lowered to be used for temporary
/// buffer allocation or Finalization.
- llvm::Value *Span;
+ llvm::Value *Span = nullptr;
ScanInfo() {
ScanBuffPtrs = new llvm::SmallDenseMap<llvm::Value *, llvm::Value *>();
}
+ ScanInfo(ScanInfo &) = delete;
+ ScanInfo &operator=(const ScanInfo &) = delete;
~ScanInfo() { delete (ScanBuffPtrs); }
};
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index fbc92d7..b0269ee 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -162,7 +162,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
class AdvSIMD_2Arg_Scalar_Narrow_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMExtendedType<0>, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
class AdvSIMD_2VectorArg_Scalar_Wide_BySize_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMTruncatedType<0>],
@@ -187,13 +187,13 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
class AdvSIMD_3VectorArg_Scalar_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<ArgIndex<2>>]>;
class AdvSIMD_CvtFxToFP_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
class AdvSIMD_CvtFPToFx_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
class AdvSIMD_1Arg_Intrinsic
: DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrNoMem]>;
@@ -221,7 +221,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
// Arithmetic ops
-let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
+let TargetPrefix = "aarch64" in {
// Vector Add Across Lanes
def int_aarch64_neon_saddv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
def int_aarch64_neon_uaddv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
diff --git a/llvm/include/llvm/Support/Jobserver.h b/llvm/include/llvm/Support/Jobserver.h
new file mode 100644
index 0000000..6bee3b5
--- /dev/null
+++ b/llvm/include/llvm/Support/Jobserver.h
@@ -0,0 +1,162 @@
+//===- llvm/Support/Jobserver.h - Jobserver Client --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a client for the GNU Make jobserver protocol. This allows
+// LLVM tools to coordinate parallel execution with a parent `make` process.
+//
+// The jobserver protocol is a mechanism for GNU Make to share its pool of
+// available "job slots" with the subprocesses it invokes. This is particularly
+// useful for tools that can perform parallel operations themselves (e.g., a
+// multi-threaded linker or compiler). By participating in this protocol, a
+// tool can ensure the total number of concurrent jobs does not exceed the
+// limit specified by the user (e.g., `make -j8`).
+//
+// How it works:
+//
+// 1. Establishment:
+// A child process discovers the jobserver by inspecting the `MAKEFLAGS`
+// environment variable. If a jobserver is active, this variable will
+// contain a `--jobserver-auth=<value>` argument. The format of `<value>`
+// determines how to communicate with the server.
+//
+// 2. The Implicit Slot:
+// Every command invoked by `make` is granted one "implicit" job slot. This
+// means a tool can always perform at least one unit of work without needing
+// to communicate with the jobserver. This implicit slot should NEVER be
+// released back to the jobserver.
+//
+// 3. Acquiring and Releasing Slots:
+// On POSIX systems, the jobserver is implemented as a pipe. The
+// `--jobserver-auth` value specifies either a path to a named pipe
+// (`fifo:PATH`) or a pair of file descriptors (`R,W`). The pipe is
+// pre-loaded with single-character tokens, one for each available job slot.
+//
+// - To acquire an additional slot, a client reads a single-character token
+// from the pipe.
+// - To release a slot, the client must write the *exact same* character
+// token back to the pipe.
+//
+// It is critical that a client releases all acquired slots before it exits,
+// even in cases of error, to avoid deadlocking the build.
+//
+// Example:
+// A multi-threaded linker invoked by `make -j8` wants to use multiple
+// threads. It first checks for the jobserver. It knows it has one implicit
+// slot, so it can use one thread. It then tries to acquire 7 more slots by
+// reading 7 tokens from the jobserver pipe. If it only receives 3 tokens,
+// it knows it can use a total of 1 (implicit) + 3 (acquired) = 4 threads.
+// Before exiting, it must write the 3 tokens it read back to the pipe.
+//
+// For more context, see:
+// - GNU Make manual on job slots:
+// https://www.gnu.org/software/make/manual/html_node/Job-Slots.html
+// - LLVM RFC discussion on jobserver support:
+// https://discourse.llvm.org/t/rfc-adding-gnu-make-jobserver-
+// support-to-llvm-for-coordinated-parallelism/87034
+// - Ninja’s jobserver support PR:
+// https://github.com/ninja-build/ninja/pull/2506
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_JOBSERVER_H
+#define LLVM_SUPPORT_JOBSERVER_H
+
+#include "llvm/ADT/StringRef.h"
+#include <memory>
+#include <string>
+
+namespace llvm {
+
+/// A JobSlot represents a single job slot that can be acquired from or released
+/// to a jobserver pool. This class is move-only.
+class JobSlot {
+public:
+ /// Default constructor creates an invalid instance.
+ JobSlot() = default;
+
+ // Move operations are allowed.
+ JobSlot(JobSlot &&Other) noexcept : Value(Other.Value) {
+ Other.Value = kInvalidValue;
+ }
+ JobSlot &operator=(JobSlot &&Other) noexcept {
+ if (this != &Other) {
+ this->Value = Other.Value;
+ Other.Value = kInvalidValue;
+ }
+ return *this;
+ }
+
+ // Copy operations are disallowed.
+ JobSlot(const JobSlot &) = delete;
+ JobSlot &operator=(const JobSlot &) = delete;
+
+ /// Returns true if this instance is valid (either implicit or explicit).
+ bool isValid() const { return Value >= 0; }
+
+ /// Returns true if this instance represents the implicit job slot.
+ bool isImplicit() const { return Value == kImplicitValue; }
+
+ static JobSlot createExplicit(uint8_t V) {
+ return JobSlot(static_cast<int16_t>(V));
+ }
+
+ static JobSlot createImplicit() { return JobSlot(kImplicitValue); }
+
+ uint8_t getExplicitValue() const;
+ bool isExplicit() const { return isValid() && !isImplicit(); }
+
+private:
+ friend class JobserverClient;
+ friend class JobserverClientImpl;
+
+ JobSlot(int16_t V) : Value(V) {}
+
+ /// The jobserver pipe carries explicit tokens (bytes 0–255). We reserve two
+ /// sentinels in Value for special cases:
+ /// kInvalidValue (-1): no slot held
+ /// kImplicitValue (INT16_MAX): implicit slot granted at startup (no pipe
+ /// I/O)
+ ///
+ /// We use int16_t so Value can store 0–255 explicit tokens and
+ /// sentinels without overflow, enforces fixed 16-bit width, and avoids
+ /// unsigned/signed mix-ups.
+ static constexpr int16_t kInvalidValue = -1;
+ static constexpr int16_t kImplicitValue = INT16_MAX;
+ int16_t Value = kInvalidValue;
+};
+
+/// The public interface for a jobserver client.
+/// This client is a lazy-initialized singleton that is created on first use.
+class JobserverClient {
+public:
+ virtual ~JobserverClient();
+
+ /// Tries to acquire a job slot from the pool. On failure (e.g., if the pool
+ /// is empty), this returns an invalid JobSlot instance. The first successful
+ /// call will always return the implicit slot.
+ virtual JobSlot tryAcquire() = 0;
+
+ /// Releases a job slot back to the pool.
+ virtual void release(JobSlot Slot) = 0;
+
+ /// Returns the number of job slots available, as determined on first use.
+ /// This value is cached. Returns 0 if no jobserver is active.
+ virtual unsigned getNumJobs() const = 0;
+
+ /// Returns the singleton instance of the JobserverClient.
+ /// The instance is created on the first call to this function.
+ /// Returns a nullptr if no jobserver is configured or an error occurs.
+ static JobserverClient *getInstance();
+
+ /// Resets the singleton instance. For testing purposes only.
+ static void resetForTesting();
+};
+
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_JOBSERVER_H
diff --git a/llvm/include/llvm/Support/ThreadPool.h b/llvm/include/llvm/Support/ThreadPool.h
index c26681c..c20efc7 100644
--- a/llvm/include/llvm/Support/ThreadPool.h
+++ b/llvm/include/llvm/Support/ThreadPool.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Jobserver.h"
#include "llvm/Support/RWMutex.h"
#include "llvm/Support/Threading.h"
#include "llvm/Support/thread.h"
@@ -180,6 +181,7 @@ private:
void grow(int requested);
void processTasks(ThreadPoolTaskGroup *WaitingForGroup);
+ void processTasksWithJobserver();
/// Threads in flight
std::vector<llvm::thread> Threads;
@@ -208,6 +210,8 @@ private:
/// Maximum number of threads to potentially grow this pool to.
const unsigned MaxThreadCount;
+
+ JobserverClient *TheJobserver = nullptr;
};
#endif // LLVM_ENABLE_THREADS
diff --git a/llvm/include/llvm/Support/Threading.h b/llvm/include/llvm/Support/Threading.h
index d3fe0a5..8884680 100644
--- a/llvm/include/llvm/Support/Threading.h
+++ b/llvm/include/llvm/Support/Threading.h
@@ -142,6 +142,11 @@ constexpr bool llvm_is_multithreaded() { return LLVM_ENABLE_THREADS; }
/// the thread shall remain on the actual CPU socket.
LLVM_ABI std::optional<unsigned>
compute_cpu_socket(unsigned ThreadPoolNum) const;
+
+ /// If true, the thread pool will attempt to coordinate with a GNU Make
+ /// jobserver, acquiring a job slot before processing a task. If no
+ /// jobserver is found in the environment, this is ignored.
+ bool UseJobserver = false;
};
/// Build a strategy from a number of threads as a string provided in \p Num.
@@ -210,6 +215,19 @@ constexpr bool llvm_is_multithreaded() { return LLVM_ENABLE_THREADS; }
return S;
}
+ /// Returns a thread strategy that attempts to coordinate with a GNU Make
+ /// jobserver. The number of active threads will be limited by the number of
+ /// available job slots. If no jobserver is detected in the environment, this
+ /// strategy falls back to the default hardware_concurrency() behavior.
+ inline ThreadPoolStrategy jobserver_concurrency() {
+ ThreadPoolStrategy S;
+ S.UseJobserver = true;
+ // We can still request all threads be created, as they will simply
+ // block waiting for a job slot if the jobserver is the limiting factor.
+ S.ThreadsRequested = 0; // 0 means 'use all available'
+ return S;
+ }
+
/// Return the current thread id, as used in various OS system calls.
/// Note that not all platforms guarantee that the value returned will be
/// unique across the entire system, so portable code should not assume
diff --git a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
index 1e07fbe..faaff4a 100644
--- a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
+++ b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
@@ -18,8 +18,7 @@
#include "llvm/Support/DataTypes.h"
-namespace llvm {
-namespace X86Disassembler {
+namespace llvm::X86Disassembler {
#define INSTRUCTIONS_SYM x86DisassemblerInstrSpecifiers
#define CONTEXTS_SYM x86DisassemblerContexts
@@ -541,7 +540,6 @@ static const unsigned X86_MAX_OPERANDS = 6;
/// respectively.
enum DisassemblerMode { MODE_16BIT, MODE_32BIT, MODE_64BIT };
-} // namespace X86Disassembler
-} // namespace llvm
+} // namespace llvm::X86Disassembler
#endif
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 558c5a0..309f1be 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6046,7 +6046,7 @@ static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
return N02;
}
- if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
+ if (MaxC == 0 && MinC != 0 && MinCPlus1.isPowerOf2()) {
BW = MinCPlus1.exactLogBase2();
Unsigned = true;
return N02;
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 7902229..4f4fb9c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -111,15 +111,11 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
const TargetRegisterInfo *TRI,
const TargetInstrInfo *TII,
- const TargetLowering &TLI,
MCRegister &PhysReg, int &Cost) {
if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
return;
Register Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
- if (TLI.checkForPhysRegDependency(Def, User, Op, TRI, TII, PhysReg, Cost))
- return;
-
if (Reg.isVirtual())
return;
@@ -490,8 +486,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
MCRegister PhysReg;
int Cost = 1;
// Determine if this is a physical register dependency.
- const TargetLowering &TLI = DAG->getTargetLoweringInfo();
- CheckForPhysRegDependency(OpN, N, i, TRI, TII, TLI, PhysReg, Cost);
+ CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
assert((!PhysReg || !isChain) && "Chain dependence via physreg data?");
// FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
// emits a copy from the physical register to a virtual register unless
diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index 7da972f..42b21b5 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -207,6 +207,7 @@ add_llvm_component_library(LLVMSupport
InstructionCost.cpp
IntEqClasses.cpp
IntervalMap.cpp
+ Jobserver.cpp
JSON.cpp
KnownBits.cpp
KnownFPClass.cpp
diff --git a/llvm/lib/Support/Jobserver.cpp b/llvm/lib/Support/Jobserver.cpp
new file mode 100644
index 0000000..9f726eb
--- /dev/null
+++ b/llvm/lib/Support/Jobserver.cpp
@@ -0,0 +1,259 @@
+//===- llvm/Support/Jobserver.cpp - Jobserver Client Implementation -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Jobserver.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <atomic>
+#include <memory>
+#include <mutex>
+#include <new>
+
+#define DEBUG_TYPE "jobserver"
+
+using namespace llvm;
+
+namespace {
+struct FdPair {
+ int Read = -1;
+ int Write = -1;
+ bool isValid() const { return Read >= 0 && Write >= 0; }
+};
+
+struct JobserverConfig {
+ enum Mode {
+ None,
+ PosixFifo,
+ PosixPipe,
+ Win32Semaphore,
+ };
+ Mode TheMode = None;
+ std::string Path;
+ FdPair PipeFDs;
+};
+
+/// A helper function that checks if `Input` starts with `Prefix`.
+/// If it does, it removes the prefix from `Input`, assigns the remainder to
+/// `Value`, and returns true. Otherwise, it returns false.
+bool getPrefixedValue(StringRef Input, StringRef Prefix, StringRef &Value) {
+ if (Input.consume_front(Prefix)) {
+ Value = Input;
+ return true;
+ }
+ return false;
+}
+
+/// A helper function to parse a string in the format "R,W" where R and W are
+/// non-negative integers representing file descriptors. It populates the
+/// `ReadFD` and `WriteFD` output parameters. Returns true on success.
+static std::optional<FdPair> getFileDescriptorPair(StringRef Input) {
+ FdPair FDs;
+ if (Input.consumeInteger(10, FDs.Read))
+ return std::nullopt;
+ if (!Input.consume_front(","))
+ return std::nullopt;
+ if (Input.consumeInteger(10, FDs.Write))
+ return std::nullopt;
+ if (!Input.empty() || !FDs.isValid())
+ return std::nullopt;
+ return FDs;
+}
+
+/// Parses the `MAKEFLAGS` environment variable string to find jobserver
+/// arguments. It splits the string into space-separated arguments and searches
+/// for `--jobserver-auth` or `--jobserver-fds`. Based on the value of these
+/// arguments, it determines the jobserver mode (Pipe, FIFO, or Semaphore) and
+/// connection details (file descriptors or path).
+Expected<JobserverConfig> parseNativeMakeFlags(StringRef MakeFlags) {
+ JobserverConfig Config;
+ if (MakeFlags.empty())
+ return Config;
+
+ // Split the MAKEFLAGS string into arguments.
+ SmallVector<StringRef, 8> Args;
+ SplitString(MakeFlags, Args);
+
+ // If '-n' (dry-run) is present as a legacy flag (not starting with '-'),
+ // disable the jobserver.
+ if (!Args.empty() && !Args[0].starts_with("-") && Args[0].contains('n'))
+ return Config;
+
+ // Iterate through arguments to find jobserver flags.
+ // Note that make may pass multiple --jobserver-auth flags; the last one wins.
+ for (StringRef Arg : Args) {
+ StringRef Value;
+ if (getPrefixedValue(Arg, "--jobserver-auth=", Value)) {
+ // Try to parse as a file descriptor pair first.
+ if (auto FDPair = getFileDescriptorPair(Value)) {
+ Config.TheMode = JobserverConfig::PosixPipe;
+ Config.PipeFDs = *FDPair;
+ } else {
+ StringRef FifoPath;
+ // If not FDs, try to parse as a named pipe (fifo).
+ if (getPrefixedValue(Value, "fifo:", FifoPath)) {
+ Config.TheMode = JobserverConfig::PosixFifo;
+ Config.Path = FifoPath.str();
+ } else {
+ // Otherwise, assume it's a Windows semaphore.
+ Config.TheMode = JobserverConfig::Win32Semaphore;
+ Config.Path = Value.str();
+ }
+ }
+ } else if (getPrefixedValue(Arg, "--jobserver-fds=", Value)) {
+ // This is an alternative, older syntax for the pipe-based server.
+ if (auto FDPair = getFileDescriptorPair(Value)) {
+ Config.TheMode = JobserverConfig::PosixPipe;
+ Config.PipeFDs = *FDPair;
+ } else {
+ return createStringError(inconvertibleErrorCode(),
+ "Invalid file descriptor pair in MAKEFLAGS");
+ }
+ }
+ }
+
+// Perform platform-specific validation.
+#ifdef _WIN32
+ if (Config.TheMode == JobserverConfig::PosixFifo ||
+ Config.TheMode == JobserverConfig::PosixPipe)
+ return createStringError(
+ inconvertibleErrorCode(),
+ "FIFO/Pipe-based jobserver is not supported on Windows");
+#else
+ if (Config.TheMode == JobserverConfig::Win32Semaphore)
+ return createStringError(
+ inconvertibleErrorCode(),
+ "Semaphore-based jobserver is not supported on this platform");
+#endif
+ return Config;
+}
+
+std::once_flag GJobserverOnceFlag;
+JobserverClient *GJobserver = nullptr;
+
+} // namespace
+
+namespace llvm {
+class JobserverClientImpl : public JobserverClient {
+ bool IsInitialized = false;
+ std::atomic<bool> HasImplicitSlot{true};
+ unsigned NumJobs = 0;
+
+public:
+ JobserverClientImpl(const JobserverConfig &Config);
+ ~JobserverClientImpl() override;
+
+ JobSlot tryAcquire() override;
+ void release(JobSlot Slot) override;
+ unsigned getNumJobs() const override { return NumJobs; }
+
+ bool isValid() const { return IsInitialized; }
+
+private:
+#if defined(LLVM_ON_UNIX)
+ int ReadFD = -1;
+ int WriteFD = -1;
+ std::string FifoPath;
+#elif defined(_WIN32)
+ void *Semaphore = nullptr;
+#endif
+};
+} // namespace llvm
+
+// Include the platform-specific parts of the class.
+#if defined(LLVM_ON_UNIX)
+#include "Unix/Jobserver.inc"
+#elif defined(_WIN32)
+#include "Windows/Jobserver.inc"
+#else
+// Dummy implementation for unsupported platforms.
+JobserverClientImpl::JobserverClientImpl(const JobserverConfig &Config) {}
+JobserverClientImpl::~JobserverClientImpl() = default;
+JobSlot JobserverClientImpl::tryAcquire() { return JobSlot(); }
+void JobserverClientImpl::release(JobSlot Slot) {}
+#endif
+
+namespace llvm {
+JobserverClient::~JobserverClient() = default;
+
+uint8_t JobSlot::getExplicitValue() const {
+ assert(isExplicit() && "Cannot get value of implicit or invalid slot");
+ return static_cast<uint8_t>(Value);
+}
+
+/// This is the main entry point for acquiring a jobserver client. It uses a
+/// std::call_once to ensure the singleton `GJobserver` instance is created
+/// safely in a multi-threaded environment. On first call, it reads the
+/// `MAKEFLAGS` environment variable, parses it, and attempts to construct and
+/// initialize a `JobserverClientImpl`. If successful, the global instance is
+/// stored in `GJobserver`. Subsequent calls will return the existing instance.
+JobserverClient *JobserverClient::getInstance() {
+ std::call_once(GJobserverOnceFlag, []() {
+ LLVM_DEBUG(
+ dbgs()
+ << "JobserverClient::getInstance() called for the first time.\n");
+ const char *MakeFlagsEnv = getenv("MAKEFLAGS");
+ if (!MakeFlagsEnv) {
+ errs() << "Warning: failed to create jobserver client due to MAKEFLAGS "
+ "environment variable not found\n";
+ return;
+ }
+
+ LLVM_DEBUG(dbgs() << "Found MAKEFLAGS = \"" << MakeFlagsEnv << "\"\n");
+
+ auto ConfigOrErr = parseNativeMakeFlags(MakeFlagsEnv);
+ if (Error Err = ConfigOrErr.takeError()) {
+ errs() << "Warning: failed to create jobserver client due to invalid "
+ "MAKEFLAGS environment variable: "
+ << toString(std::move(Err)) << "\n";
+ return;
+ }
+
+ JobserverConfig Config = *ConfigOrErr;
+ if (Config.TheMode == JobserverConfig::None) {
+ errs() << "Warning: failed to create jobserver client due to jobserver "
+ "mode missing in MAKEFLAGS environment variable\n";
+ return;
+ }
+
+ if (Config.TheMode == JobserverConfig::PosixPipe) {
+#if defined(LLVM_ON_UNIX)
+ if (!areFdsValid(Config.PipeFDs.Read, Config.PipeFDs.Write)) {
+ errs() << "Warning: failed to create jobserver client due to invalid "
+ "Pipe FDs in MAKEFLAGS environment variable\n";
+ return;
+ }
+#endif
+ }
+
+ auto Client = std::make_unique<JobserverClientImpl>(Config);
+ if (Client->isValid()) {
+ LLVM_DEBUG(dbgs() << "Jobserver client created successfully!\n");
+ GJobserver = Client.release();
+ } else
+ errs() << "Warning: jobserver client initialization failed.\n";
+ });
+ return GJobserver;
+}
+
+/// For testing purposes only. This function resets the singleton instance by
+/// destroying the existing client and re-initializing the `std::once_flag`.
+/// This allows tests to simulate the first-time initialization of the
+/// jobserver client multiple times.
+void JobserverClient::resetForTesting() {
+ delete GJobserver;
+ GJobserver = nullptr;
+ // Re-construct the std::once_flag in place to reset the singleton state.
+ new (&GJobserverOnceFlag) std::once_flag();
+}
+} // namespace llvm
diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp
index 3ac6fc7..8e0c724 100644
--- a/llvm/lib/Support/Parallel.cpp
+++ b/llvm/lib/Support/Parallel.cpp
@@ -7,12 +7,17 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/Parallel.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/ExponentialBackoff.h"
+#include "llvm/Support/Jobserver.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Threading.h"
#include <atomic>
#include <future>
+#include <memory>
+#include <mutex>
#include <thread>
#include <vector>
@@ -49,6 +54,9 @@ public:
class ThreadPoolExecutor : public Executor {
public:
explicit ThreadPoolExecutor(ThreadPoolStrategy S) {
+ if (S.UseJobserver)
+ TheJobserver = JobserverClient::getInstance();
+
ThreadCount = S.compute_thread_count();
// Spawn all but one of the threads in another thread as spawning threads
// can take a while.
@@ -69,6 +77,10 @@ public:
});
}
+ // To make sure the thread pool executor can only be created with a parallel
+ // strategy.
+ ThreadPoolExecutor() = delete;
+
void stop() {
{
std::lock_guard<std::mutex> Lock(Mutex);
@@ -111,15 +123,62 @@ private:
void work(ThreadPoolStrategy S, unsigned ThreadID) {
threadIndex = ThreadID;
S.apply_thread_strategy(ThreadID);
+ // Note on jobserver deadlock avoidance:
+ // GNU Make grants each invoked process one implicit job slot. Our
+ // JobserverClient models this by returning an implicit JobSlot on the
+ // first successful tryAcquire() in a process. This guarantees forward
+ // progress without requiring a dedicated "always-on" thread here.
+
+ static thread_local std::unique_ptr<ExponentialBackoff> Backoff;
+
while (true) {
- std::unique_lock<std::mutex> Lock(Mutex);
- Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); });
- if (Stop)
- break;
- auto Task = std::move(WorkStack.back());
- WorkStack.pop_back();
- Lock.unlock();
- Task();
+ if (TheJobserver) {
+ // Jobserver-mode scheduling:
+ // - Acquire one job slot (with exponential backoff to avoid busy-wait).
+ // - While holding the slot, drain and run tasks from the local queue.
+ // - Release the slot when the queue is empty or when shutting down.
+ // Rationale: Holding a slot amortizes acquire/release overhead over
+ // multiple tasks and avoids requeue/yield churn, while still enforcing
+ // the jobserver’s global concurrency limit. With K available slots,
+ // up to K workers run tasks in parallel; within each worker tasks run
+ // sequentially until the local queue is empty.
+ ExponentialBackoff Backoff(std::chrono::hours(24));
+ JobSlot Slot;
+ do {
+ if (Stop)
+ return;
+ Slot = TheJobserver->tryAcquire();
+ if (Slot.isValid())
+ break;
+ } while (Backoff.waitForNextAttempt());
+
+ auto SlotReleaser = llvm::make_scope_exit(
+ [&] { TheJobserver->release(std::move(Slot)); });
+
+ while (true) {
+ std::function<void()> Task;
+ {
+ std::unique_lock<std::mutex> Lock(Mutex);
+ Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); });
+ if (Stop && WorkStack.empty())
+ return;
+ if (WorkStack.empty())
+ break;
+ Task = std::move(WorkStack.back());
+ WorkStack.pop_back();
+ }
+ Task();
+ }
+ } else {
+ std::unique_lock<std::mutex> Lock(Mutex);
+ Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); });
+ if (Stop)
+ break;
+ auto Task = std::move(WorkStack.back());
+ WorkStack.pop_back();
+ Lock.unlock();
+ Task();
+ }
}
}
@@ -130,9 +189,20 @@ private:
std::promise<void> ThreadsCreated;
std::vector<std::thread> Threads;
unsigned ThreadCount;
+
+ JobserverClient *TheJobserver = nullptr;
};
-Executor *Executor::getDefaultExecutor() {
+// A global raw pointer to the executor. Lifetime is managed by the
+// objects created within createExecutor().
+static Executor *TheExec = nullptr;
+static std::once_flag Flag;
+
+// This function will be called exactly once to create the executor.
+// It contains the necessary platform-specific logic. Since functions
+// called by std::call_once cannot return value, we have to set the
+// executor as a global variable.
+void createExecutor() {
#ifdef _WIN32
// The ManagedStatic enables the ThreadPoolExecutor to be stopped via
// llvm_shutdown() which allows a "clean" fast exit, e.g. via _exit(). This
@@ -156,16 +226,22 @@ Executor *Executor::getDefaultExecutor() {
ThreadPoolExecutor::Deleter>
ManagedExec;
static std::unique_ptr<ThreadPoolExecutor> Exec(&(*ManagedExec));
- return Exec.get();
+ TheExec = Exec.get();
#else
// ManagedStatic is not desired on other platforms. When `Exec` is destroyed
// by llvm_shutdown(), worker threads will clean up and invoke TLS
// destructors. This can lead to race conditions if other threads attempt to
// access TLS objects that have already been destroyed.
static ThreadPoolExecutor Exec(strategy);
- return &Exec;
+ TheExec = &Exec;
#endif
}
+
+Executor *Executor::getDefaultExecutor() {
+ // Use std::call_once to lazily and safely initialize the executor.
+ std::call_once(Flag, createExecutor);
+ return TheExec;
+}
} // namespace
} // namespace detail
diff --git a/llvm/lib/Support/ThreadPool.cpp b/llvm/lib/Support/ThreadPool.cpp
index c304f0f..6960268 100644
--- a/llvm/lib/Support/ThreadPool.cpp
+++ b/llvm/lib/Support/ThreadPool.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
+//
// This file implements a crude C++11 based thread pool.
//
//===----------------------------------------------------------------------===//
@@ -14,6 +15,8 @@
#include "llvm/Config/llvm-config.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/Support/ExponentialBackoff.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Threading.h"
#include "llvm/Support/raw_ostream.h"
@@ -33,7 +36,10 @@ ThreadPoolInterface::~ThreadPoolInterface() = default;
#if LLVM_ENABLE_THREADS
StdThreadPool::StdThreadPool(ThreadPoolStrategy S)
- : Strategy(S), MaxThreadCount(S.compute_thread_count()) {}
+ : Strategy(S), MaxThreadCount(S.compute_thread_count()) {
+ if (Strategy.UseJobserver)
+ TheJobserver = JobserverClient::getInstance();
+}
void StdThreadPool::grow(int requested) {
llvm::sys::ScopedWriter LockGuard(ThreadsLock);
@@ -45,7 +51,15 @@ void StdThreadPool::grow(int requested) {
Threads.emplace_back([this, ThreadID] {
set_thread_name(formatv("llvm-worker-{0}", ThreadID));
Strategy.apply_thread_strategy(ThreadID);
- processTasks(nullptr);
+ // Note on jobserver deadlock avoidance:
+ // GNU Make grants each invoked process one implicit job slot.
+ // JobserverClient::tryAcquire() returns that implicit slot on the first
+ // successful call in a process, ensuring forward progress without a
+ // dedicated "always-on" thread.
+ if (TheJobserver)
+ processTasksWithJobserver();
+ else
+ processTasks(nullptr);
});
}
}
@@ -133,6 +147,96 @@ void StdThreadPool::processTasks(ThreadPoolTaskGroup *WaitingForGroup) {
}
}
+/// Main loop for worker threads when using a jobserver.
+/// This function uses a two-level queue; it first acquires a job slot from the
+/// external jobserver, then retrieves a task from the internal queue.
+/// This allows the thread pool to cooperate with build systems like `make -j`.
+void StdThreadPool::processTasksWithJobserver() {
+ while (true) {
+ // Acquire a job slot from the external jobserver.
+ // This polls for a slot and yields the thread to avoid a high-CPU wait.
+ JobSlot Slot;
+ // The timeout for the backoff can be very long, as the shutdown
+ // is checked on each iteration. The sleep duration is capped by MaxWait
+ // in ExponentialBackoff, so shutdown latency is not a problem.
+ ExponentialBackoff Backoff(std::chrono::hours(24));
+ bool AcquiredToken = false;
+ do {
+ // Return if the thread pool is shutting down.
+ {
+ std::unique_lock<std::mutex> LockGuard(QueueLock);
+ if (!EnableFlag)
+ return;
+ }
+
+ Slot = TheJobserver->tryAcquire();
+ if (Slot.isValid()) {
+ AcquiredToken = true;
+ break;
+ }
+ } while (Backoff.waitForNextAttempt());
+
+ if (!AcquiredToken) {
+ // This is practically unreachable with a 24h timeout and indicates a
+ // deeper problem if hit.
+ report_fatal_error("Timed out waiting for jobserver token.");
+ }
+
+ // `make_scope_exit` guarantees the job slot is released, even if the
+ // task throws or we exit early. This prevents deadlocking the build.
+ auto SlotReleaser =
+ make_scope_exit([&] { TheJobserver->release(std::move(Slot)); });
+
+ // While we hold a job slot, process tasks from the internal queue.
+ while (true) {
+ std::function<void()> Task;
+ ThreadPoolTaskGroup *GroupOfTask = nullptr;
+
+ {
+ std::unique_lock<std::mutex> LockGuard(QueueLock);
+
+ // Wait until a task is available or the pool is shutting down.
+ QueueCondition.wait(LockGuard,
+ [&] { return !EnableFlag || !Tasks.empty(); });
+
+ // If shutting down and the queue is empty, the thread can terminate.
+ if (!EnableFlag && Tasks.empty())
+ return;
+
+ // If the queue is empty, we're done processing tasks for now.
+ // Break the inner loop to release the job slot.
+ if (Tasks.empty())
+ break;
+
+ // A task is available. Mark it as active before releasing the lock
+ // to prevent race conditions with `wait()`.
+ ++ActiveThreads;
+ Task = std::move(Tasks.front().first);
+ GroupOfTask = Tasks.front().second;
+ if (GroupOfTask != nullptr)
+ ++ActiveGroups[GroupOfTask];
+ Tasks.pop_front();
+ } // The queue lock is released.
+
+ // Run the task. The job slot remains acquired during execution.
+ Task();
+
+ // The task has finished. Update the active count and notify any waiters.
+ {
+ std::lock_guard<std::mutex> LockGuard(QueueLock);
+ --ActiveThreads;
+ if (GroupOfTask != nullptr) {
+ auto A = ActiveGroups.find(GroupOfTask);
+ if (--(A->second) == 0)
+ ActiveGroups.erase(A);
+ }
+ // If all tasks are complete, notify any waiting threads.
+ if (workCompletedUnlocked(nullptr))
+ CompletionCondition.notify_all();
+ }
+ }
+ }
+}
bool StdThreadPool::workCompletedUnlocked(ThreadPoolTaskGroup *Group) const {
if (Group == nullptr)
return !ActiveThreads && Tasks.empty();
diff --git a/llvm/lib/Support/Threading.cpp b/llvm/lib/Support/Threading.cpp
index 693de0e..9da357a 100644
--- a/llvm/lib/Support/Threading.cpp
+++ b/llvm/lib/Support/Threading.cpp
@@ -14,6 +14,7 @@
#include "llvm/Support/Threading.h"
#include "llvm/Config/config.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Jobserver.h"
#include <cassert>
#include <optional>
@@ -51,6 +52,10 @@ int llvm::get_physical_cores() { return -1; }
static int computeHostNumHardwareThreads();
unsigned llvm::ThreadPoolStrategy::compute_thread_count() const {
+ if (UseJobserver)
+ if (auto JS = JobserverClient::getInstance())
+ return JS->getNumJobs();
+
int MaxThreadCount =
UseHyperThreads ? computeHostNumHardwareThreads() : get_physical_cores();
if (MaxThreadCount <= 0)
diff --git a/llvm/lib/Support/Unix/Jobserver.inc b/llvm/lib/Support/Unix/Jobserver.inc
new file mode 100644
index 0000000..53bf7f2
--- /dev/null
+++ b/llvm/lib/Support/Unix/Jobserver.inc
@@ -0,0 +1,195 @@
+//===- llvm/Support/Unix/Jobserver.inc - Unix Jobserver Impl ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the UNIX-specific parts of the JobserverClient class.
+//
+//===----------------------------------------------------------------------===//
+
+#include <atomic>
+#include <cassert>
+#include <cerrno>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+namespace {
+/// Returns true if the given file descriptor is a FIFO (named pipe).
+bool isFifo(int FD) {
+ struct stat StatBuf;
+ if (::fstat(FD, &StatBuf) != 0)
+ return false;
+ return S_ISFIFO(StatBuf.st_mode);
+}
+
+/// Returns true if the given file descriptors are valid.
+bool areFdsValid(int ReadFD, int WriteFD) {
+ if (ReadFD == -1 || WriteFD == -1)
+ return false;
+ // Check if the file descriptors are actually valid by checking their flags.
+ return ::fcntl(ReadFD, F_GETFD) != -1 && ::fcntl(WriteFD, F_GETFD) != -1;
+}
+} // namespace
+
+/// The constructor sets up the client based on the provided configuration.
+/// For pipe-based jobservers, it duplicates the inherited file descriptors,
+/// sets them to close-on-exec, and makes the read descriptor non-blocking.
+/// For FIFO-based jobservers, it opens the named pipe. After setup, it drains
+/// all available tokens from the jobserver to determine the total number of
+/// available jobs (`NumJobs`), then immediately releases them back.
+JobserverClientImpl::JobserverClientImpl(const JobserverConfig &Config) {
+ switch (Config.TheMode) {
+ case JobserverConfig::PosixPipe: {
+ // Duplicate the read and write file descriptors.
+ int NewReadFD = ::dup(Config.PipeFDs.Read);
+ if (NewReadFD < 0)
+ return;
+ int NewWriteFD = ::dup(Config.PipeFDs.Write);
+ if (NewWriteFD < 0) {
+ ::close(NewReadFD);
+ return;
+ }
+ // Set the new descriptors to be closed automatically on exec().
+ if (::fcntl(NewReadFD, F_SETFD, FD_CLOEXEC) == -1 ||
+ ::fcntl(NewWriteFD, F_SETFD, FD_CLOEXEC) == -1) {
+ ::close(NewReadFD);
+ ::close(NewWriteFD);
+ return;
+ }
+ // Set the read descriptor to non-blocking.
+ int flags = ::fcntl(NewReadFD, F_GETFL, 0);
+ if (flags == -1 || ::fcntl(NewReadFD, F_SETFL, flags | O_NONBLOCK) == -1) {
+ ::close(NewReadFD);
+ ::close(NewWriteFD);
+ return;
+ }
+ ReadFD = NewReadFD;
+ WriteFD = NewWriteFD;
+ break;
+ }
+ case JobserverConfig::PosixFifo:
+ // Open the FIFO for reading. It must be non-blocking and close-on-exec.
+ ReadFD = ::open(Config.Path.c_str(), O_RDONLY | O_NONBLOCK | O_CLOEXEC);
+ if (ReadFD < 0 || !isFifo(ReadFD)) {
+ if (ReadFD >= 0)
+ ::close(ReadFD);
+ ReadFD = -1;
+ return;
+ }
+ FifoPath = Config.Path;
+ // The write FD is opened on-demand in release().
+ WriteFD = -1;
+ break;
+ default:
+ return;
+ }
+
+ IsInitialized = true;
+ // Determine the total number of jobs by acquiring all available slots and
+ // then immediately releasing them.
+ SmallVector<JobSlot, 8> Slots;
+ while (true) {
+ auto S = tryAcquire();
+ if (!S.isValid())
+ break;
+ Slots.push_back(std::move(S));
+ }
+ NumJobs = Slots.size();
+ assert(NumJobs >= 1 && "Invalid number of jobs");
+ for (auto &S : Slots)
+ release(std::move(S));
+}
+
+/// The destructor closes any open file descriptors.
+JobserverClientImpl::~JobserverClientImpl() {
+ if (ReadFD >= 0)
+ ::close(ReadFD);
+ if (WriteFD >= 0)
+ ::close(WriteFD);
+}
+
+/// Tries to acquire a job slot. The first call to this function will always
+/// successfully acquire the single "implicit" slot that is granted to every
+/// process started by `make`. Subsequent calls attempt to read a one-byte
+/// token from the jobserver's read pipe. A successful read grants one
+/// explicit job slot. The read is non-blocking; if no token is available,
+/// it fails and returns an invalid JobSlot.
+JobSlot JobserverClientImpl::tryAcquire() {
+ if (!IsInitialized)
+ return JobSlot();
+
+ // The first acquisition is always for the implicit slot.
+ if (HasImplicitSlot.exchange(false, std::memory_order_acquire)) {
+ LLVM_DEBUG(dbgs() << "Acquired implicit job slot.\n");
+ return JobSlot::createImplicit();
+ }
+
+ char Token;
+ ssize_t Ret;
+ LLVM_DEBUG(dbgs() << "Attempting to read token from FD " << ReadFD << ".\n");
+ // Loop to retry on EINTR (interrupted system call).
+ do {
+ Ret = ::read(ReadFD, &Token, 1);
+ } while (Ret < 0 && errno == EINTR);
+
+ if (Ret == 1) {
+ LLVM_DEBUG(dbgs() << "Acquired explicit token '" << Token << "'.\n");
+ return JobSlot::createExplicit(static_cast<uint8_t>(Token));
+ }
+
+ LLVM_DEBUG(dbgs() << "Failed to acquire job slot, read returned " << Ret
+ << ".\n");
+ return JobSlot();
+}
+
+/// Releases a job slot back to the pool. If the slot is implicit, it simply
+/// resets a flag. If the slot is explicit, it writes the character token
+/// associated with the slot back into the jobserver's write pipe. For FIFO
+/// jobservers, this may require opening the FIFO for writing if it hasn't
+/// been already.
+void JobserverClientImpl::release(JobSlot Slot) {
+ if (!Slot.isValid())
+ return;
+
+ // Releasing the implicit slot just makes it available for the next acquire.
+ if (Slot.isImplicit()) {
+ LLVM_DEBUG(dbgs() << "Released implicit job slot.\n");
+ [[maybe_unused]] bool was_already_released =
+ HasImplicitSlot.exchange(true, std::memory_order_release);
+ assert(!was_already_released && "Implicit slot released twice");
+ return;
+ }
+
+ uint8_t Token = Slot.getExplicitValue();
+ LLVM_DEBUG(dbgs() << "Releasing explicit token '" << (char)Token << "' to FD "
+ << WriteFD << ".\n");
+
+ // For FIFO-based jobservers, the write FD might not be open yet.
+ // Open it on the first release.
+ if (WriteFD < 0) {
+ LLVM_DEBUG(dbgs() << "WriteFD is invalid, opening FIFO: " << FifoPath
+ << "\n");
+ WriteFD = ::open(FifoPath.c_str(), O_WRONLY | O_CLOEXEC);
+ if (WriteFD < 0) {
+ LLVM_DEBUG(dbgs() << "Failed to open FIFO for writing.\n");
+ return;
+ }
+ LLVM_DEBUG(dbgs() << "Opened FIFO as new WriteFD: " << WriteFD << "\n");
+ }
+
+ ssize_t Written;
+ // Loop to retry on EINTR (interrupted system call).
+ do {
+ Written = ::write(WriteFD, &Token, 1);
+ } while (Written < 0 && errno == EINTR);
+
+ if (Written <= 0) {
+ LLVM_DEBUG(dbgs() << "Failed to write token to pipe, write returned "
+ << Written << "\n");
+ }
+}
diff --git a/llvm/lib/Support/Windows/Jobserver.inc b/llvm/lib/Support/Windows/Jobserver.inc
new file mode 100644
index 0000000..79028ee
--- /dev/null
+++ b/llvm/lib/Support/Windows/Jobserver.inc
@@ -0,0 +1,79 @@
+//==- llvm/Support/Windows/Jobserver.inc - Windows Jobserver Impl -*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Windows-specific parts of the JobserverClient class.
+// On Windows, the jobserver is implemented using a named semaphore.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Windows/WindowsSupport.h"
+#include <atomic>
+#include <cassert>
+
+namespace llvm {
+/// The constructor for the Windows jobserver client. It attempts to open a
+/// handle to an existing named semaphore, the name of which is provided by
+/// GNU make in the --jobserver-auth argument. If the semaphore is opened
+/// successfully, the client is marked as initialized.
+JobserverClientImpl::JobserverClientImpl(const JobserverConfig &Config) {
+ Semaphore = (void *)::OpenSemaphoreA(SEMAPHORE_MODIFY_STATE | SYNCHRONIZE,
+ FALSE, Config.Path.c_str());
+ if (Semaphore != nullptr)
+ IsInitialized = true;
+}
+
+/// The destructor closes the handle to the semaphore, releasing the resource.
+JobserverClientImpl::~JobserverClientImpl() {
+ if (Semaphore != nullptr)
+ ::CloseHandle((HANDLE)Semaphore);
+}
+
+/// Tries to acquire a job slot. The first call always returns the implicit
+/// slot. Subsequent calls use a non-blocking wait on the semaphore
+/// (`WaitForSingleObject` with a timeout of 0). If the wait succeeds, the
+/// semaphore's count is decremented, and an explicit job slot is acquired.
+/// If the wait times out, it means no slots are available, and an invalid
+/// slot is returned.
+JobSlot JobserverClientImpl::tryAcquire() {
+ if (!IsInitialized)
+ return JobSlot();
+
+ // First, grant the implicit slot.
+ if (HasImplicitSlot.exchange(false, std::memory_order_acquire)) {
+ return JobSlot::createImplicit();
+ }
+
+ // Try to acquire a slot from the semaphore without blocking.
+ if (::WaitForSingleObject((HANDLE)Semaphore, 0) == WAIT_OBJECT_0) {
+ // The explicit token value is arbitrary on Windows, as the semaphore
+ // count is the real resource.
+ return JobSlot::createExplicit(1);
+ }
+
+ return JobSlot(); // Invalid slot
+}
+
+/// Releases a job slot back to the pool. If the slot is implicit, it simply
+/// resets a flag. For an explicit slot, it increments the semaphore's count
+/// by one using `ReleaseSemaphore`, making the slot available to other
+/// processes.
+void JobserverClientImpl::release(JobSlot Slot) {
+ if (!IsInitialized || !Slot.isValid())
+ return;
+
+ if (Slot.isImplicit()) {
+ [[maybe_unused]] bool was_already_released =
+ HasImplicitSlot.exchange(true, std::memory_order_release);
+ assert(!was_already_released && "Implicit slot released twice");
+ return;
+ }
+
+ // Release the slot by incrementing the semaphore count.
+ (void)::ReleaseSemaphore((HANDLE)Semaphore, 1, NULL);
+}
+} // namespace llvm
diff --git a/llvm/lib/TableGen/Error.cpp b/llvm/lib/TableGen/Error.cpp
index de0c4c9..3ba2c6c 100644
--- a/llvm/lib/TableGen/Error.cpp
+++ b/llvm/lib/TableGen/Error.cpp
@@ -19,10 +19,10 @@
#include "llvm/TableGen/Record.h"
#include <cstdlib>
-namespace llvm {
+using namespace llvm;
-SourceMgr SrcMgr;
-unsigned ErrorsPrinted = 0;
+SourceMgr llvm::SrcMgr;
+unsigned llvm::ErrorsPrinted = 0;
static void PrintMessage(ArrayRef<SMLoc> Locs, SourceMgr::DiagKind Kind,
const Twine &Msg) {
@@ -49,118 +49,118 @@ static void PrintMessage(ArrayRef<SMLoc> Locs, SourceMgr::DiagKind Kind,
// Functions to print notes.
-void PrintNote(const Twine &Msg) {
- WithColor::note() << Msg << "\n";
-}
+void llvm::PrintNote(const Twine &Msg) { WithColor::note() << Msg << "\n"; }
-void PrintNote(function_ref<void(raw_ostream &OS)> PrintMsg) {
+void llvm::PrintNote(function_ref<void(raw_ostream &OS)> PrintMsg) {
PrintMsg(WithColor::note());
}
-void PrintNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) {
+void llvm::PrintNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) {
PrintMessage(NoteLoc, SourceMgr::DK_Note, Msg);
}
// Functions to print fatal notes.
-void PrintFatalNote(const Twine &Msg) {
+void llvm::PrintFatalNote(const Twine &Msg) {
PrintNote(Msg);
fatal_exit();
}
-void PrintFatalNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) {
+void llvm::PrintFatalNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) {
PrintNote(NoteLoc, Msg);
fatal_exit();
}
// This method takes a Record and uses the source location
// stored in it.
-void PrintFatalNote(const Record *Rec, const Twine &Msg) {
+void llvm::PrintFatalNote(const Record *Rec, const Twine &Msg) {
PrintNote(Rec->getLoc(), Msg);
fatal_exit();
}
// This method takes a RecordVal and uses the source location
// stored in it.
-void PrintFatalNote(const RecordVal *RecVal, const Twine &Msg) {
+void llvm::PrintFatalNote(const RecordVal *RecVal, const Twine &Msg) {
PrintNote(RecVal->getLoc(), Msg);
fatal_exit();
}
// Functions to print warnings.
-void PrintWarning(const Twine &Msg) { WithColor::warning() << Msg << "\n"; }
+void llvm::PrintWarning(const Twine &Msg) {
+ WithColor::warning() << Msg << "\n";
+}
-void PrintWarning(ArrayRef<SMLoc> WarningLoc, const Twine &Msg) {
+void llvm::PrintWarning(ArrayRef<SMLoc> WarningLoc, const Twine &Msg) {
PrintMessage(WarningLoc, SourceMgr::DK_Warning, Msg);
}
-void PrintWarning(const char *Loc, const Twine &Msg) {
+void llvm::PrintWarning(const char *Loc, const Twine &Msg) {
SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Warning, Msg);
}
// Functions to print errors.
-void PrintError(const Twine &Msg) { WithColor::error() << Msg << "\n"; }
+void llvm::PrintError(const Twine &Msg) { WithColor::error() << Msg << "\n"; }
-void PrintError(function_ref<void(raw_ostream &OS)> PrintMsg) {
+void llvm::PrintError(function_ref<void(raw_ostream &OS)> PrintMsg) {
PrintMsg(WithColor::error());
}
-void PrintError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) {
+void llvm::PrintError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) {
PrintMessage(ErrorLoc, SourceMgr::DK_Error, Msg);
}
-void PrintError(const char *Loc, const Twine &Msg) {
+void llvm::PrintError(const char *Loc, const Twine &Msg) {
SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Error, Msg);
}
// This method takes a Record and uses the source location
// stored in it.
-void PrintError(const Record *Rec, const Twine &Msg) {
+void llvm::PrintError(const Record *Rec, const Twine &Msg) {
PrintMessage(Rec->getLoc(), SourceMgr::DK_Error, Msg);
}
// This method takes a RecordVal and uses the source location
// stored in it.
-void PrintError(const RecordVal *RecVal, const Twine &Msg) {
+void llvm::PrintError(const RecordVal *RecVal, const Twine &Msg) {
PrintMessage(RecVal->getLoc(), SourceMgr::DK_Error, Msg);
}
// Functions to print fatal errors.
-void PrintFatalError(const Twine &Msg) {
+void llvm::PrintFatalError(const Twine &Msg) {
PrintError(Msg);
fatal_exit();
}
-void PrintFatalError(function_ref<void(raw_ostream &OS)> PrintMsg) {
+void llvm::PrintFatalError(function_ref<void(raw_ostream &OS)> PrintMsg) {
PrintError(PrintMsg);
fatal_exit();
}
-void PrintFatalError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) {
+void llvm::PrintFatalError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) {
PrintError(ErrorLoc, Msg);
fatal_exit();
}
// This method takes a Record and uses the source location
// stored in it.
-void PrintFatalError(const Record *Rec, const Twine &Msg) {
+void llvm::PrintFatalError(const Record *Rec, const Twine &Msg) {
PrintError(Rec->getLoc(), Msg);
fatal_exit();
}
// This method takes a RecordVal and uses the source location
// stored in it.
-void PrintFatalError(const RecordVal *RecVal, const Twine &Msg) {
+void llvm::PrintFatalError(const RecordVal *RecVal, const Twine &Msg) {
PrintError(RecVal->getLoc(), Msg);
fatal_exit();
}
// Check an assertion: Obtain the condition value and be sure it is true.
// If not, print a nonfatal error along with the message.
-bool CheckAssert(SMLoc Loc, const Init *Condition, const Init *Message) {
+bool llvm::CheckAssert(SMLoc Loc, const Init *Condition, const Init *Message) {
auto *CondValue = dyn_cast_or_null<IntInit>(Condition->convertInitializerTo(
IntRecTy::get(Condition->getRecordKeeper())));
if (!CondValue) {
@@ -178,11 +178,9 @@ bool CheckAssert(SMLoc Loc, const Init *Condition, const Init *Message) {
}
// Dump a message to stderr.
-void dumpMessage(SMLoc Loc, const Init *Message) {
+void llvm::dumpMessage(SMLoc Loc, const Init *Message) {
if (auto *MessageInit = dyn_cast<StringInit>(Message))
PrintNote(Loc, MessageInit->getValue());
else
PrintError(Loc, "dump value is not of type string");
}
-
-} // end namespace llvm
diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp
index f545706..42043f7 100644
--- a/llvm/lib/TableGen/Main.cpp
+++ b/llvm/lib/TableGen/Main.cpp
@@ -64,14 +64,12 @@ WriteIfChanged("write-if-changed", cl::desc("Only write output if it changed"));
static cl::opt<bool>
TimePhases("time-phases", cl::desc("Time phases of parser and backend"));
-namespace llvm {
-cl::opt<bool> EmitLongStrLiterals(
+cl::opt<bool> llvm::EmitLongStrLiterals(
"long-string-literals",
cl::desc("when emitting large string tables, prefer string literals over "
"comma-separated char literals. This can be a readability and "
"compile-time performance win, but upsets some compilers"),
cl::Hidden, cl::init(true));
-} // end namespace llvm
static cl::opt<bool> NoWarnOnUnusedTemplateArgs(
"no-warn-on-unused-template-args",
diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp
index 051a896..2ea3a24 100644
--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@@ -46,8 +46,7 @@ using namespace llvm;
// Context
//===----------------------------------------------------------------------===//
-namespace llvm {
-namespace detail {
+namespace llvm::detail {
/// This class represents the internal implementation of the RecordKeeper.
/// It contains all of the contextual static state of the Record classes. It is
/// kept out-of-line to simplify dependencies, and also make it easier for
@@ -100,8 +99,7 @@ struct RecordKeeperImpl {
void dumpAllocationStats(raw_ostream &OS) const;
};
-} // namespace detail
-} // namespace llvm
+} // namespace llvm::detail
void detail::RecordKeeperImpl::dumpAllocationStats(raw_ostream &OS) const {
// Dump memory allocation related stats.
diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp
index f928ded..3d31d8e 100644
--- a/llvm/lib/TableGen/TGParser.cpp
+++ b/llvm/lib/TableGen/TGParser.cpp
@@ -31,8 +31,6 @@ using namespace llvm;
// Support Code for the Semantic Actions.
//===----------------------------------------------------------------------===//
-namespace llvm {
-
RecordsEntry::RecordsEntry(std::unique_ptr<Record> Rec) : Rec(std::move(Rec)) {}
RecordsEntry::RecordsEntry(std::unique_ptr<ForeachLoop> Loop)
: Loop(std::move(Loop)) {}
@@ -41,6 +39,7 @@ RecordsEntry::RecordsEntry(std::unique_ptr<Record::AssertionInfo> Assertion)
RecordsEntry::RecordsEntry(std::unique_ptr<Record::DumpInfo> Dump)
: Dump(std::move(Dump)) {}
+namespace llvm {
struct SubClassReference {
SMRange RefRange;
const Record *Rec = nullptr;
@@ -61,6 +60,7 @@ struct SubMultiClassReference {
bool isInvalid() const { return MC == nullptr; }
void dump() const;
};
+} // end namespace llvm
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void SubMultiClassReference::dump() const {
@@ -74,8 +74,6 @@ LLVM_DUMP_METHOD void SubMultiClassReference::dump() const {
}
#endif
-} // end namespace llvm
-
static bool checkBitsConcrete(Record &R, const RecordVal &RV) {
const auto *BV = cast<BitsInit>(RV.getValue());
for (unsigned i = 0, e = BV->getNumBits(); i != e; ++i) {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 70d5ad7d..dc8e7c8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16461,7 +16461,7 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
- DAG.getConstant(Cnt, DL, MVT::i32));
+ DAG.getTargetConstant(Cnt, DL, MVT::i32));
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL,
MVT::i32),
@@ -16491,7 +16491,8 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
unsigned Opc =
(Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
- DAG.getConstant(Cnt, DL, MVT::i32), Op->getFlags());
+ DAG.getTargetConstant(Cnt, DL, MVT::i32),
+ Op->getFlags());
}
// Right shift register. Note, there is not a shift right register
@@ -19973,7 +19974,7 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
SDValue FixConv =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ResTy,
DAG.getConstant(IntrinsicOpcode, DL, MVT::i32),
- Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32));
+ Op->getOperand(0), DAG.getTargetConstant(C, DL, MVT::i32));
// We can handle smaller integers by generating an extra trunc.
if (IntBits < FloatBits)
FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv);
@@ -20696,7 +20697,7 @@ static SDValue performConcatVectorsCombine(SDNode *N,
N100 = DAG.getNode(AArch64ISD::NVCAST, DL, VT, N100);
SDValue Uzp = DAG.getNode(AArch64ISD::UZP2, DL, VT, N000, N100);
SDValue NewShiftConstant =
- DAG.getConstant(N001ConstVal - NScalarSize, DL, MVT::i32);
+ DAG.getTargetConstant(N001ConstVal - NScalarSize, DL, MVT::i32);
return DAG.getNode(AArch64ISD::VLSHR, DL, VT, Uzp, NewShiftConstant);
}
@@ -22373,14 +22374,14 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
Op = DAG.getNode(Opcode, DL, VT, Op,
- DAG.getSignedConstant(-ShiftAmount, DL, MVT::i32));
+ DAG.getSignedConstant(-ShiftAmount, DL, MVT::i32, true));
if (N->getValueType(0) == MVT::i64)
Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op,
DAG.getConstant(0, DL, MVT::i64));
return Op;
} else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
Op = DAG.getNode(Opcode, DL, VT, Op,
- DAG.getConstant(ShiftAmount, DL, MVT::i32));
+ DAG.getTargetConstant(ShiftAmount, DL, MVT::i32));
if (N->getValueType(0) == MVT::i64)
Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op,
DAG.getConstant(0, DL, MVT::i64));
@@ -23198,7 +23199,7 @@ static SDValue performZExtUZPCombine(SDNode *N, SelectionDAG &DAG) {
Op.getOperand(ExtOffset == 0 ? 0 : 1));
if (Shift != 0)
BC = DAG.getNode(AArch64ISD::VLSHR, DL, VT, BC,
- DAG.getConstant(Shift, DL, MVT::i32));
+ DAG.getTargetConstant(Shift, DL, MVT::i32));
return DAG.getNode(ISD::AND, DL, VT, BC, DAG.getConstant(Mask, DL, VT));
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 6ef0a95..09ce713 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -812,49 +812,49 @@ def fixedpoint_recip_f16_i64 : fixedpoint_recip_i64<f16>;
def fixedpoint_recip_f32_i64 : fixedpoint_recip_i64<f32>;
def fixedpoint_recip_f64_i64 : fixedpoint_recip_i64<f64>;
-def vecshiftR8 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftR8 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9);
}]> {
let EncoderMethod = "getVecShiftR8OpValue";
let DecoderMethod = "DecodeVecShiftR8Imm";
let ParserMatchClass = Imm1_8Operand;
}
-def vecshiftR16 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftR16 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17);
}]> {
let EncoderMethod = "getVecShiftR16OpValue";
let DecoderMethod = "DecodeVecShiftR16Imm";
let ParserMatchClass = Imm1_16Operand;
}
-def vecshiftR16Narrow : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftR16Narrow : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9);
}]> {
let EncoderMethod = "getVecShiftR16OpValue";
let DecoderMethod = "DecodeVecShiftR16ImmNarrow";
let ParserMatchClass = Imm1_8Operand;
}
-def vecshiftR32 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftR32 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33);
}]> {
let EncoderMethod = "getVecShiftR32OpValue";
let DecoderMethod = "DecodeVecShiftR32Imm";
let ParserMatchClass = Imm1_32Operand;
}
-def vecshiftR32Narrow : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftR32Narrow : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17);
}]> {
let EncoderMethod = "getVecShiftR32OpValue";
let DecoderMethod = "DecodeVecShiftR32ImmNarrow";
let ParserMatchClass = Imm1_16Operand;
}
-def vecshiftR64 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftR64 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 65);
}]> {
let EncoderMethod = "getVecShiftR64OpValue";
let DecoderMethod = "DecodeVecShiftR64Imm";
let ParserMatchClass = Imm1_64Operand;
}
-def vecshiftR64Narrow : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftR64Narrow : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33);
}]> {
let EncoderMethod = "getVecShiftR64OpValue";
@@ -862,37 +862,6 @@ def vecshiftR64Narrow : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm1_32Operand;
}
-// Same as vecshiftR#N, but use TargetConstant (TimmLeaf) instead of Constant
-// (ImmLeaf)
-def tvecshiftR8 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9);
-}]> {
- let EncoderMethod = "getVecShiftR8OpValue";
- let DecoderMethod = "DecodeVecShiftR8Imm";
- let ParserMatchClass = Imm1_8Operand;
-}
-def tvecshiftR16 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17);
-}]> {
- let EncoderMethod = "getVecShiftR16OpValue";
- let DecoderMethod = "DecodeVecShiftR16Imm";
- let ParserMatchClass = Imm1_16Operand;
-}
-def tvecshiftR32 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33);
-}]> {
- let EncoderMethod = "getVecShiftR32OpValue";
- let DecoderMethod = "DecodeVecShiftR32Imm";
- let ParserMatchClass = Imm1_32Operand;
-}
-def tvecshiftR64 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 65);
-}]> {
- let EncoderMethod = "getVecShiftR64OpValue";
- let DecoderMethod = "DecodeVecShiftR64Imm";
- let ParserMatchClass = Imm1_64Operand;
-}
-
def Imm0_0Operand : AsmImmRange<0, 0>;
def Imm0_1Operand : AsmImmRange<0, 1>;
def Imm1_1Operand : AsmImmRange<1, 1>;
@@ -904,28 +873,28 @@ def Imm0_15Operand : AsmImmRange<0, 15>;
def Imm0_31Operand : AsmImmRange<0, 31>;
def Imm0_63Operand : AsmImmRange<0, 63>;
-def vecshiftL8 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftL8 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) < 8);
}]> {
let EncoderMethod = "getVecShiftL8OpValue";
let DecoderMethod = "DecodeVecShiftL8Imm";
let ParserMatchClass = Imm0_7Operand;
}
-def vecshiftL16 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftL16 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) < 16);
}]> {
let EncoderMethod = "getVecShiftL16OpValue";
let DecoderMethod = "DecodeVecShiftL16Imm";
let ParserMatchClass = Imm0_15Operand;
}
-def vecshiftL32 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftL32 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) < 32);
}]> {
let EncoderMethod = "getVecShiftL32OpValue";
let DecoderMethod = "DecodeVecShiftL32Imm";
let ParserMatchClass = Imm0_31Operand;
}
-def vecshiftL64 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftL64 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) < 64);
}]> {
let EncoderMethod = "getVecShiftL64OpValue";
@@ -933,36 +902,6 @@ def vecshiftL64 : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm0_63Operand;
}
-// Same as vecshiftL#N, but use TargetConstant (TimmLeaf) instead of Constant
-// (ImmLeaf)
-def tvecshiftL8 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) < 8);
-}]> {
- let EncoderMethod = "getVecShiftL8OpValue";
- let DecoderMethod = "DecodeVecShiftL8Imm";
- let ParserMatchClass = Imm0_7Operand;
-}
-def tvecshiftL16 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) < 16);
-}]> {
- let EncoderMethod = "getVecShiftL16OpValue";
- let DecoderMethod = "DecodeVecShiftL16Imm";
- let ParserMatchClass = Imm0_15Operand;
-}
-def tvecshiftL32 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) < 32);
-}]> {
- let EncoderMethod = "getVecShiftL32OpValue";
- let DecoderMethod = "DecodeVecShiftL32Imm";
- let ParserMatchClass = Imm0_31Operand;
-}
-def tvecshiftL64 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) < 64);
-}]> {
- let EncoderMethod = "getVecShiftL64OpValue";
- let DecoderMethod = "DecodeVecShiftL64Imm";
- let ParserMatchClass = Imm0_63Operand;
-}
// Crazy immediate formats used by 32-bit and 64-bit logical immediate
// instructions for splatting repeating bit patterns across the immediate.
@@ -10232,7 +10171,7 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm,
def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
V64, V64, vecshiftR16,
asm, ".4h", ".4h",
- [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (i32 imm:$imm)))]> {
+ [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (i32 vecshiftR16:$imm)))]> {
bits<4> imm;
let Inst{19-16} = imm;
}
@@ -10240,15 +10179,16 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm,
def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
V128, V128, vecshiftR16,
asm, ".8h", ".8h",
- [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (i32 imm:$imm)))]> {
+ [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (i32 vecshiftR16:$imm)))]> {
bits<4> imm;
let Inst{19-16} = imm;
}
} // Predicates = [HasNEON, HasFullFP16]
+
def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
V64, V64, vecshiftR32,
asm, ".2s", ".2s",
- [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (i32 imm:$imm)))]> {
+ [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (i32 vecshiftR32:$imm)))]> {
bits<5> imm;
let Inst{20-16} = imm;
}
@@ -10256,7 +10196,7 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm,
def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?},
V128, V128, vecshiftR32,
asm, ".4s", ".4s",
- [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (i32 imm:$imm)))]> {
+ [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (i32 vecshiftR32:$imm)))]> {
bits<5> imm;
let Inst{20-16} = imm;
}
@@ -10264,7 +10204,7 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm,
def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?},
V128, V128, vecshiftR64,
asm, ".2d", ".2d",
- [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (i32 imm:$imm)))]> {
+ [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (i32 vecshiftR64:$imm)))]> {
bits<6> imm;
let Inst{21-16} = imm;
}
@@ -10276,7 +10216,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm,
def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
V64, V64, vecshiftR16,
asm, ".4h", ".4h",
- [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (i32 imm:$imm)))]> {
+ [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (i32 vecshiftR16:$imm)))]> {
bits<4> imm;
let Inst{19-16} = imm;
}
@@ -10284,7 +10224,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm,
def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
V128, V128, vecshiftR16,
asm, ".8h", ".8h",
- [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (i32 imm:$imm)))]> {
+ [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (i32 vecshiftR16:$imm)))]> {
bits<4> imm;
let Inst{19-16} = imm;
}
@@ -10293,7 +10233,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm,
def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
V64, V64, vecshiftR32,
asm, ".2s", ".2s",
- [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (i32 imm:$imm)))]> {
+ [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (i32 vecshiftR32:$imm)))]> {
bits<5> imm;
let Inst{20-16} = imm;
}
@@ -10301,7 +10241,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm,
def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?},
V128, V128, vecshiftR32,
asm, ".4s", ".4s",
- [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (i32 imm:$imm)))]> {
+ [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (i32 vecshiftR32:$imm)))]> {
bits<5> imm;
let Inst{20-16} = imm;
}
@@ -10309,7 +10249,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm,
def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?},
V128, V128, vecshiftR64,
asm, ".2d", ".2d",
- [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (i32 imm:$imm)))]> {
+ [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (i32 vecshiftR64:$imm)))]> {
bits<6> imm;
let Inst{21-16} = imm;
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 96cc3f3..3e55b76 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2957,9 +2957,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
// Need special instructions for atomics that affect ordering.
- if (Order != AtomicOrdering::NotAtomic &&
- Order != AtomicOrdering::Unordered &&
- Order != AtomicOrdering::Monotonic) {
+ if (isStrongerThanMonotonic(Order)) {
assert(!isa<GZExtLoad>(LdSt));
assert(MemSizeInBytes <= 8 &&
"128-bit atomics should already be custom-legalized");
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 6025f1c..63313da 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -556,8 +556,7 @@ void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
unsigned NewOpc =
Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
MachineIRBuilder MIB(MI);
- auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm);
- MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef});
+ MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1)}).addImm(Imm);
MI.eraseFromParent();
}
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 539470d..be44b8f 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -4967,7 +4967,7 @@ multiclass sme2_movaz_array_to_vec_vg4_multi<string mnemonic> {
//===----------------------------------------------------------------------===//
// SME2 multi-vec saturating shift right narrow
class sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u>
- : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, tvecshiftR16:$imm4),
+ : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, vecshiftR16:$imm4),
mnemonic, "\t$Zd, $Zn, $imm4",
"", []>, Sched<[]> {
bits<4> imm4;
@@ -4985,7 +4985,7 @@ class sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u>
multiclass sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u, SDPatternOperator intrinsic> {
def _H : sme2_sat_shift_vector_vg2<mnemonic, op, u>;
- def : SME2_Sat_Shift_VG2_Pat<NAME # _H, intrinsic, nxv8i16, nxv4i32, tvecshiftR16>;
+ def : SME2_Sat_Shift_VG2_Pat<NAME # _H, intrinsic, nxv8i16, nxv4i32, vecshiftR16>;
}
class sme2_sat_shift_vector_vg4<bits<2> sz, bits<3> op, ZPRRegOp zpr_ty,
@@ -5008,20 +5008,20 @@ class sme2_sat_shift_vector_vg4<bits<2> sz, bits<3> op, ZPRRegOp zpr_ty,
}
multiclass sme2_sat_shift_vector_vg4<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
- def _B : sme2_sat_shift_vector_vg4<{0,1}, op, ZPR8, ZZZZ_s_mul_r, tvecshiftR32,
+ def _B : sme2_sat_shift_vector_vg4<{0,1}, op, ZPR8, ZZZZ_s_mul_r, vecshiftR32,
mnemonic>{
bits<5> imm;
let Inst{20-16} = imm;
}
- def _H : sme2_sat_shift_vector_vg4<{1,?}, op, ZPR16, ZZZZ_d_mul_r, tvecshiftR64,
+ def _H : sme2_sat_shift_vector_vg4<{1,?}, op, ZPR16, ZZZZ_d_mul_r, vecshiftR64,
mnemonic> {
bits<6> imm;
let Inst{22} = imm{5};
let Inst{20-16} = imm{4-0};
}
- def : SME2_Sat_Shift_VG4_Pat<NAME # _B, intrinsic, nxv16i8, nxv4i32, tvecshiftR32>;
- def : SME2_Sat_Shift_VG4_Pat<NAME # _H, intrinsic, nxv8i16, nxv2i64, tvecshiftR64>;
+ def : SME2_Sat_Shift_VG4_Pat<NAME # _B, intrinsic, nxv16i8, nxv4i32, vecshiftR32>;
+ def : SME2_Sat_Shift_VG4_Pat<NAME # _H, intrinsic, nxv8i16, nxv2i64, vecshiftR64>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 9a23c35..3cdd505 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -4436,9 +4436,9 @@ multiclass sve2_bitwise_shift_left_long<bits<2> opc, string asm,
ZPR64, ZPR32, vecshiftL32> {
let Inst{20-19} = imm{4-3};
}
- def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _H)>;
- def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _S)>;
- def : SVE_2_Op_Imm_Pat<nxv2i64, op, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _D)>;
+ def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv16i8, i32, vecshiftL8, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv8i16, i32, vecshiftL16, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Imm_Pat<nxv2i64, op, nxv4i32, i32, vecshiftL32, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -4481,10 +4481,10 @@ multiclass sve2_int_bin_shift_imm_left<bit opc, string asm,
let Inst{20-19} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftL64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, vecshiftL8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, vecshiftL16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, vecshiftL32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, vecshiftL64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve2_int_bin_shift_imm_right<bit opc, string asm,
@@ -4501,10 +4501,10 @@ multiclass sve2_int_bin_shift_imm_right<bit opc, string asm,
let Inst{20-19} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>;
}
class sve2_int_bin_accum_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
@@ -4546,10 +4546,10 @@ multiclass sve2_int_bin_accum_shift_imm_right<bits<2> opc, string asm,
let Inst{20-19} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>;
def : SVE_Shift_Add_All_Active_Pat<nxv16i8, shift_op, nxv16i1, nxv16i8, nxv16i8, i32, !cast<Instruction>(NAME # _B)>;
def : SVE_Shift_Add_All_Active_Pat<nxv8i16, shift_op, nxv8i1, nxv8i16, nxv8i16, i32, !cast<Instruction>(NAME # _H)>;
@@ -4676,18 +4676,18 @@ class sve2_int_bin_shift_imm_narrow_bottom<bits<3> tsz8_64, bits<3> opc,
multiclass sve2_int_bin_shift_imm_right_narrow_bottom<bits<3> opc, string asm,
SDPatternOperator op> {
def _B : sve2_int_bin_shift_imm_narrow_bottom<{0,0,1}, opc, asm, ZPR8, ZPR16,
- tvecshiftR8>;
+ vecshiftR8>;
def _H : sve2_int_bin_shift_imm_narrow_bottom<{0,1,?}, opc, asm, ZPR16, ZPR32,
- tvecshiftR16> {
+ vecshiftR16> {
let Inst{19} = imm{3};
}
def _S : sve2_int_bin_shift_imm_narrow_bottom<{1,?,?}, opc, asm, ZPR32, ZPR64,
- tvecshiftR32> {
+ vecshiftR32> {
let Inst{20-19} = imm{4-3};
}
- def : SVE_2_Op_Imm_Pat<nxv16i8, op, nxv8i16, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv4i32, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv2i64, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Imm_Pat<nxv16i8, op, nxv8i16, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv4i32, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv2i64, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
}
class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc,
@@ -4717,18 +4717,18 @@ class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc,
multiclass sve2_int_bin_shift_imm_right_narrow_top<bits<3> opc, string asm,
SDPatternOperator op> {
def _B : sve2_int_bin_shift_imm_narrow_top<{0,0,1}, opc, asm, ZPR8, ZPR16,
- tvecshiftR8>;
+ vecshiftR8>;
def _H : sve2_int_bin_shift_imm_narrow_top<{0,1,?}, opc, asm, ZPR16, ZPR32,
- tvecshiftR16> {
+ vecshiftR16> {
let Inst{19} = imm{3};
}
def _S : sve2_int_bin_shift_imm_narrow_top<{1,?,?}, opc, asm, ZPR32, ZPR64,
- tvecshiftR32> {
+ vecshiftR32> {
let Inst{20-19} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv8i16, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv4i32, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv2i64, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv8i16, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv4i32, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv2i64, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
}
class sve2_int_addsub_narrow_high_bottom<bits<2> sz, bits<2> opc, string asm,
@@ -5461,10 +5461,10 @@ multiclass sve2_int_rotate_right_imm<string asm, SDPatternOperator op> {
let Inst{20-19} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -6443,10 +6443,10 @@ multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm, string Ps,
let Inst{9-8} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, tvecshiftL64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, vecshiftL8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, vecshiftL16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, vecshiftL32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, vecshiftL64, !cast<Instruction>(NAME # _D)>;
}
// As above but shift amount takes the form of a "vector immediate".
@@ -6460,15 +6460,15 @@ multiclass sve_int_bin_pred_shift_imm_left_dup<bits<4> opc, string asm,
}
multiclass sve_int_bin_pred_shift_imm_left_zeroing_bhsd<SDPatternOperator op> {
- def _B_ZERO : PredTwoOpImmPseudo<NAME # _B, ZPR8, tvecshiftL8, FalseLanesZero>;
- def _H_ZERO : PredTwoOpImmPseudo<NAME # _H, ZPR16, tvecshiftL16, FalseLanesZero>;
- def _S_ZERO : PredTwoOpImmPseudo<NAME # _S, ZPR32, tvecshiftL32, FalseLanesZero>;
- def _D_ZERO : PredTwoOpImmPseudo<NAME # _D, ZPR64, tvecshiftL64, FalseLanesZero>;
+ def _B_ZERO : PredTwoOpImmPseudo<NAME # _B, ZPR8, vecshiftL8, FalseLanesZero>;
+ def _H_ZERO : PredTwoOpImmPseudo<NAME # _H, ZPR16, vecshiftL16, FalseLanesZero>;
+ def _S_ZERO : PredTwoOpImmPseudo<NAME # _S, ZPR32, vecshiftL32, FalseLanesZero>;
+ def _D_ZERO : PredTwoOpImmPseudo<NAME # _D, ZPR64, vecshiftL64, FalseLanesZero>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, tvecshiftL8, !cast<Pseudo>(NAME # _B_ZERO)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, tvecshiftL16, !cast<Pseudo>(NAME # _H_ZERO)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, tvecshiftL32, !cast<Pseudo>(NAME # _S_ZERO)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, tvecshiftL64, !cast<Pseudo>(NAME # _D_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, vecshiftL8, !cast<Pseudo>(NAME # _B_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, vecshiftL16, !cast<Pseudo>(NAME # _H_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, vecshiftL32, !cast<Pseudo>(NAME # _S_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, vecshiftL64, !cast<Pseudo>(NAME # _D_ZERO)>;
}
multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm, string Ps,
@@ -6489,10 +6489,10 @@ multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm, string Ps,
let Inst{9-8} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>;
}
// As above but shift amount takes the form of a "vector immediate".
@@ -6511,10 +6511,10 @@ multiclass sve_int_bin_pred_shift_imm_right_zeroing_bhsd<SDPatternOperator op =
def _S_ZERO : PredTwoOpImmPseudo<NAME # _S, ZPR32, vecshiftR32, FalseLanesZero>;
def _D_ZERO : PredTwoOpImmPseudo<NAME # _D, ZPR64, vecshiftR64, FalseLanesZero>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, tvecshiftR8, !cast<Pseudo>(NAME # _B_ZERO)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, tvecshiftR16, !cast<Pseudo>(NAME # _H_ZERO)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, tvecshiftR32, !cast<Pseudo>(NAME # _S_ZERO)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, tvecshiftR64, !cast<Pseudo>(NAME # _D_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, vecshiftR8, !cast<Pseudo>(NAME # _B_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, vecshiftR16, !cast<Pseudo>(NAME # _H_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, vecshiftR32, !cast<Pseudo>(NAME # _S_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, vecshiftR64, !cast<Pseudo>(NAME # _D_ZERO)>;
}
class sve_int_bin_pred_shift<bits<2> sz8_64, bit wide, bits<3> opc,
@@ -10031,7 +10031,7 @@ multiclass sve2p1_multi_vec_extract_narrow<string mnemonic, bits<2> opc, SDPatte
// SVE2 multi-vec shift narrow
class sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, bits<2> tsz>
- : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, tvecshiftR16:$imm4),
+ : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, vecshiftR16:$imm4),
mnemonic, "\t$Zd, $Zn, $imm4",
"", []>, Sched<[]> {
bits<5> Zd;
@@ -10055,7 +10055,7 @@ class sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, bits<2> tsz>
multiclass sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, SDPatternOperator intrinsic> {
def NAME : sve2p1_multi_vec_shift_narrow<mnemonic, opc, 0b01>;
- def : SVE2p1_Sat_Shift_VG2_Pat<NAME, intrinsic, nxv8i16, nxv4i32, tvecshiftR16>;
+ def : SVE2p1_Sat_Shift_VG2_Pat<NAME, intrinsic, nxv8i16, nxv4i32, vecshiftR16>;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 2ba3156..9dd64e0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -131,10 +131,8 @@ static bool isDSAddress(const Constant *C) {
return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
}
-/// Returns true if the function requires the implicit argument be passed
-/// regardless of the function contents.
-static bool funcRequiresHostcallPtr(const Function &F) {
- // Sanitizers require the hostcall buffer passed in the implicit arguments.
+/// Returns true if sanitizer attributes are present on a function.
+static bool hasSanitizerAttributes(const Function &F) {
return F.hasFnAttribute(Attribute::SanitizeAddress) ||
F.hasFnAttribute(Attribute::SanitizeThread) ||
F.hasFnAttribute(Attribute::SanitizeMemory) ||
@@ -469,15 +467,21 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
// If the function requires the implicit arg pointer due to sanitizers,
// assume it's needed even if explicitly marked as not requiring it.
- const bool NeedsHostcall = funcRequiresHostcallPtr(*F);
- if (NeedsHostcall) {
+ // Flat scratch initialization is needed because `asan_malloc_impl`
+ // calls introduced later in pipeline will have flat scratch accesses.
+ // FIXME: FLAT_SCRATCH_INIT will not be required here if device-libs
+ // implementation for `asan_malloc_impl` is updated.
+ const bool HasSanitizerAttrs = hasSanitizerAttributes(*F);
+ if (HasSanitizerAttrs) {
removeAssumedBits(IMPLICIT_ARG_PTR);
removeAssumedBits(HOSTCALL_PTR);
+ removeAssumedBits(FLAT_SCRATCH_INIT);
}
for (auto Attr : ImplicitAttrs) {
- if (NeedsHostcall &&
- (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
+ if (HasSanitizerAttrs &&
+ (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR ||
+ Attr.first == FLAT_SCRATCH_INIT))
continue;
if (F->hasFnAttribute(Attr.second))
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 82789bc..90c828b 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -932,7 +932,9 @@ static MachineOperand *lookUpCopyChain(const SIInstrInfo &TII,
for (MachineInstr *SubDef = MRI.getVRegDef(SrcReg);
SubDef && TII.isFoldableCopy(*SubDef);
SubDef = MRI.getVRegDef(Sub->getReg())) {
- MachineOperand &SrcOp = SubDef->getOperand(1);
+ unsigned SrcIdx = TII.getFoldableCopySrcIdx(*SubDef);
+ MachineOperand &SrcOp = SubDef->getOperand(SrcIdx);
+
if (SrcOp.isImm())
return &SrcOp;
if (!SrcOp.isReg() || SrcOp.getReg().isPhysical())
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 79876ff..e233457 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -18860,31 +18860,6 @@ SITargetLowering::getTargetMMOFlags(const Instruction &I) const {
return Flags;
}
-bool SITargetLowering::checkForPhysRegDependency(
- SDNode *Def, SDNode *User, unsigned Op, const TargetRegisterInfo *TRI,
- const TargetInstrInfo *TII, MCRegister &PhysReg, int &Cost) const {
- if (User->getOpcode() != ISD::CopyToReg)
- return false;
- if (!Def->isMachineOpcode())
- return false;
- MachineSDNode *MDef = dyn_cast<MachineSDNode>(Def);
- if (!MDef)
- return false;
-
- unsigned ResNo = User->getOperand(Op).getResNo();
- if (User->getOperand(Op)->getValueType(ResNo) != MVT::i1)
- return false;
- const MCInstrDesc &II = TII->get(MDef->getMachineOpcode());
- if (II.isCompare() && II.hasImplicitDefOfPhysReg(AMDGPU::SCC)) {
- PhysReg = AMDGPU::SCC;
- const TargetRegisterClass *RC =
- TRI->getMinimalPhysRegClass(PhysReg, Def->getSimpleValueType(ResNo));
- Cost = RC->expensiveOrImpossibleToCopy() ? -1 : RC->getCopyCost();
- return true;
- }
- return false;
-}
-
void SITargetLowering::emitExpandAtomicAddrSpacePredicate(
Instruction *AI) const {
// Given: atomicrmw fadd ptr %addr, float %val ordering
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index a474dab..74e58f4 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -561,11 +561,6 @@ public:
bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const;
bool denormalsEnabledForType(LLT Ty, const MachineFunction &MF) const;
- bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
- const TargetRegisterInfo *TRI,
- const TargetInstrInfo *TII,
- MCRegister &PhysReg, int &Cost) const override;
-
bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts,
const SelectionDAG &DAG, bool SNaN = false,
unsigned Depth = 0) const override;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index cda8069..46757cf 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3433,6 +3433,32 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
}
}
+unsigned SIInstrInfo::getFoldableCopySrcIdx(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case AMDGPU::V_MOV_B16_t16_e32:
+ case AMDGPU::V_MOV_B16_t16_e64:
+ return 2;
+ case AMDGPU::V_MOV_B32_e32:
+ case AMDGPU::V_MOV_B32_e64:
+ case AMDGPU::V_MOV_B64_PSEUDO:
+ case AMDGPU::V_MOV_B64_e32:
+ case AMDGPU::V_MOV_B64_e64:
+ case AMDGPU::S_MOV_B32:
+ case AMDGPU::S_MOV_B64:
+ case AMDGPU::S_MOV_B64_IMM_PSEUDO:
+ case AMDGPU::COPY:
+ case AMDGPU::WWM_COPY:
+ case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
+ case AMDGPU::V_ACCVGPR_READ_B32_e64:
+ case AMDGPU::V_ACCVGPR_MOV_B32:
+ case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
+ case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
+ return 1;
+ default:
+ llvm_unreachable("MI is not a foldable copy");
+ }
+}
+
static constexpr AMDGPU::OpName ModifierOpNames[] = {
AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index a21089f..cc59acf 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -417,6 +417,7 @@ public:
const MachineInstr &MIb) const override;
static bool isFoldableCopy(const MachineInstr &MI);
+ static unsigned getFoldableCopySrcIdx(const MachineInstr &MI);
void removeModOperands(MachineInstr &MI) const;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 0040504..a94e131 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -359,6 +359,8 @@ HexagonTargetLowering::initializeHVXLowering() {
setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand);
setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand);
setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand);
+ setCondCodeAction(ISD::SETUO, MVT::v64f16, Expand);
+ setCondCodeAction(ISD::SETO, MVT::v64f16, Expand);
setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand);
setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand);
@@ -372,6 +374,8 @@ HexagonTargetLowering::initializeHVXLowering() {
setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand);
setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand);
setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand);
+ setCondCodeAction(ISD::SETUO, MVT::v32f32, Expand);
+ setCondCodeAction(ISD::SETO, MVT::v32f32, Expand);
// Boolean vectors.
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 7d4535a..b37b740 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -1560,7 +1560,7 @@ static MCRegister getRVVBaseRegister(const RISCVRegisterInfo &TRI,
MCRegister BaseReg = TRI.getSubReg(Reg, RISCV::sub_vrm1_0);
// If it's not a grouped vector register, it doesn't have subregister, so
// the base register is just itself.
- if (BaseReg == RISCV::NoRegister)
+ if (!BaseReg.isValid())
BaseReg = Reg;
return BaseReg;
}
diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td
index cf6f83a..7f5d0af 100644
--- a/llvm/lib/Target/RISCV/RISCVGISel.td
+++ b/llvm/lib/Target/RISCV/RISCVGISel.td
@@ -126,13 +126,6 @@ let Predicates = [HasAtomicLdSt, IsRV64] in {
// RV64 i32 patterns not used by SelectionDAG
//===----------------------------------------------------------------------===//
-def uimm5i32 : ImmLeaf<i32, [{return isUInt<5>(Imm);}]>;
-
-def zext_is_sext : PatFrag<(ops node:$src), (zext node:$src), [{
- KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0), 0);
- return Known.isNonNegative();
-}]>;
-
let Predicates = [IsRV64] in {
def : LdPat<extloadi8, LBU, i32>; // Prefer unsigned due to no c.lb in Zcb.
def : LdPat<extloadi16, LH, i32>;
@@ -140,15 +133,10 @@ def : LdPat<extloadi16, LH, i32>;
def : StPat<truncstorei8, SB, GPR, i32>;
def : StPat<truncstorei16, SH, GPR, i32>;
-def : Pat<(anyext (i32 GPR:$src)), (COPY GPR:$src)>;
def : Pat<(sext (i32 GPR:$src)), (ADDIW GPR:$src, 0)>;
-def : Pat<(i32 (trunc GPR:$src)), (COPY GPR:$src)>;
def : Pat<(sext_inreg (i64 (add GPR:$rs1, simm12_lo:$imm)), i32),
(ADDIW GPR:$rs1, simm12_lo:$imm)>;
-
-// Use sext if the sign bit of the input is 0.
-def : Pat<(zext_is_sext (i32 GPR:$src)), (ADDIW GPR:$src, 0)>;
}
let Predicates = [IsRV64, NoStdExtZba] in
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 6a6ead2..cf8d120 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -128,7 +128,7 @@ static bool hasUndefinedPassthru(const MachineInstr &MI) {
// All undefined passthrus should be $noreg: see
// RISCVDAGToDAGISel::doPeepholeNoRegPassThru
const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
- return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef();
+ return !UseMO.getReg().isValid() || UseMO.isUndef();
}
/// Return true if \p MI is a copy that will be lowered to one or more vmvNr.vs.
@@ -1454,7 +1454,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
Register Reg = VLOp.getReg();
// Erase the AVL operand from the instruction.
- VLOp.setReg(RISCV::NoRegister);
+ VLOp.setReg(Register());
VLOp.setIsKill(false);
if (LIS) {
LiveInterval &LI = LIS->getInterval(Reg);
@@ -1663,7 +1663,7 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
if (!MO.isReg() || !MO.getReg().isVirtual())
return;
Register OldVLReg = MO.getReg();
- MO.setReg(RISCV::NoRegister);
+ MO.setReg(Register());
if (LIS)
LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 1e6b04f8..7db4832 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1364,7 +1364,7 @@ void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
RS->scavengeRegisterBackwards(RISCV::GPRRegClass, MI.getIterator(),
/*RestoreAfter=*/false, /*SpAdj=*/0,
/*AllowSpill=*/false);
- if (TmpGPR != RISCV::NoRegister)
+ if (TmpGPR.isValid())
RS->setRegUsed(TmpGPR);
else {
// The case when there is no scavenged register needs special handling.
@@ -3021,7 +3021,7 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
ErrInfo = "Invalid operand type for VL operand";
return false;
}
- if (Op.isReg() && Op.getReg() != RISCV::NoRegister) {
+ if (Op.isReg() && Op.getReg().isValid()) {
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
auto *RC = MRI.getRegClass(Op.getReg());
if (!RISCV::GPRRegClass.hasSubClassEq(RC)) {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td
index 1674c95..1dd7332 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td
@@ -26,7 +26,7 @@ class LAQ_r<bit aq, bit rl, bits<3> funct3, string opcodestr>
let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
class SRL_r<bit aq, bit rl, bits<3> funct3, string opcodestr>
: RVInstRAtomic<0b00111, aq, rl, funct3, OPC_AMO,
- (outs ), (ins GPRMemZeroOffset:$rs1, GPR:$rs2),
+ (outs), (ins GPR:$rs2, GPRMemZeroOffset:$rs1),
opcodestr, "$rs2, $rs1"> {
let rd = 0;
}
@@ -71,7 +71,7 @@ class PatLAQ<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT>
// while atomic_store has data, addr
class PatSRL<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT>
: Pat<(OpNode (vt GPR:$rs2), (XLenVT GPRMemZeroOffset:$rs1)),
- (Inst GPRMemZeroOffset:$rs1, GPR:$rs2)>;
+ (Inst GPR:$rs2, GPRMemZeroOffset:$rs1)>;
let Predicates = [HasStdExtZalasr] in {
diff --git a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
index f8d33ae..54569b1 100644
--- a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
@@ -259,7 +259,7 @@ static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) {
if (isCompressibleLoad(MI) || isCompressibleStore(MI)) {
const MachineOperand &MOImm = MI.getOperand(2);
if (!MOImm.isImm())
- return RegImmPair(RISCV::NoRegister, 0);
+ return RegImmPair(Register(), 0);
int64_t Offset = MOImm.getImm();
int64_t NewBaseAdjust = getBaseAdjustForCompression(Offset, Opcode);
@@ -292,7 +292,7 @@ static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) {
}
}
}
- return RegImmPair(RISCV::NoRegister, 0);
+ return RegImmPair(Register(), 0);
}
// Check all uses after FirstMI of the given register, keeping a vector of
diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index ffba284..fdf9a4f 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -382,7 +382,7 @@ bool RISCVVectorPeephole::convertAllOnesVMergeToVMv(MachineInstr &MI) const {
// vmv.v.v doesn't have a mask operand, so we may be able to inflate the
// register class for the destination and passthru operands e.g. VRNoV0 -> VR
MRI->recomputeRegClass(MI.getOperand(0).getReg());
- if (MI.getOperand(1).getReg() != RISCV::NoRegister)
+ if (MI.getOperand(1).getReg().isValid())
MRI->recomputeRegClass(MI.getOperand(1).getReg());
return true;
}
@@ -448,7 +448,7 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) {
Register FalseReg = MI.getOperand(2).getReg();
if (TruePassthruReg != FalseReg) {
// If True's passthru is undef see if we can change it to False
- if (TruePassthruReg != RISCV::NoRegister ||
+ if (TruePassthruReg.isValid() ||
!MRI->hasOneUse(MI.getOperand(3).getReg()) ||
!ensureDominates(MI.getOperand(2), *True))
return false;
@@ -467,7 +467,7 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) {
// vmv.v.v doesn't have a mask operand, so we may be able to inflate the
// register class for the destination and passthru operands e.g. VRNoV0 -> VR
MRI->recomputeRegClass(MI.getOperand(0).getReg());
- if (MI.getOperand(1).getReg() != RISCV::NoRegister)
+ if (MI.getOperand(1).getReg().isValid())
MRI->recomputeRegClass(MI.getOperand(1).getReg());
return true;
}
@@ -517,7 +517,7 @@ bool RISCVVectorPeephole::convertToUnmasked(MachineInstr &MI) const {
if (RISCVII::isFirstDefTiedToFirstUse(MaskedMCID)) {
unsigned PassthruOpIdx = MI.getNumExplicitDefs();
if (HasPassthru) {
- if (MI.getOperand(PassthruOpIdx).getReg() != RISCV::NoRegister)
+ if (MI.getOperand(PassthruOpIdx).getReg())
MRI->recomputeRegClass(MI.getOperand(PassthruOpIdx).getReg());
} else
MI.removeOperand(PassthruOpIdx);
@@ -576,7 +576,7 @@ static bool dominates(MachineBasicBlock::const_iterator A,
bool RISCVVectorPeephole::ensureDominates(const MachineOperand &MO,
MachineInstr &Src) const {
assert(MO.getParent()->getParent() == Src.getParent());
- if (!MO.isReg() || MO.getReg() == RISCV::NoRegister)
+ if (!MO.isReg() || !MO.getReg().isValid())
return true;
MachineInstr *Def = MRI->getVRegDef(MO.getReg());
@@ -593,7 +593,7 @@ bool RISCVVectorPeephole::ensureDominates(const MachineOperand &MO,
bool RISCVVectorPeephole::foldUndefPassthruVMV_V_V(MachineInstr &MI) {
if (RISCV::getRVVMCOpcode(MI.getOpcode()) != RISCV::VMV_V_V)
return false;
- if (MI.getOperand(1).getReg() != RISCV::NoRegister)
+ if (MI.getOperand(1).getReg().isValid())
return false;
// If the input was a pseudo with a policy operand, we can give it a tail
@@ -654,7 +654,7 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
// Src needs to have the same passthru as VMV_V_V
MachineOperand &SrcPassthru = Src->getOperand(Src->getNumExplicitDefs());
- if (SrcPassthru.getReg() != RISCV::NoRegister &&
+ if (SrcPassthru.getReg().isValid() &&
SrcPassthru.getReg() != Passthru.getReg())
return false;
@@ -672,7 +672,7 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
if (SrcPassthru.getReg() != Passthru.getReg()) {
SrcPassthru.setReg(Passthru.getReg());
// If Src is masked then its passthru needs to be in VRNoV0.
- if (Passthru.getReg() != RISCV::NoRegister)
+ if (Passthru.getReg().isValid())
MRI->constrainRegClass(
Passthru.getReg(),
TII->getRegClass(Src->getDesc(), SrcPassthru.getOperandNo(), TRI));
diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp
index 7505507..ebd957c 100644
--- a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp
@@ -188,8 +188,31 @@ class SPIRVLegalizePointerCast : public FunctionPass {
FixedVectorType *SrcType = cast<FixedVectorType>(Src->getType());
FixedVectorType *DstType =
cast<FixedVectorType>(GR->findDeducedElementType(Dst));
- assert(DstType->getNumElements() >= SrcType->getNumElements());
+ auto dstNumElements = DstType->getNumElements();
+ auto srcNumElements = SrcType->getNumElements();
+
+ // if the element type differs, it is a bitcast.
+ if (DstType->getElementType() != SrcType->getElementType()) {
+ // Support bitcast between vectors of different sizes only if
+ // the total bitwidth is the same.
+ auto dstBitWidth =
+ DstType->getElementType()->getScalarSizeInBits() * dstNumElements;
+ auto srcBitWidth =
+ SrcType->getElementType()->getScalarSizeInBits() * srcNumElements;
+ assert(dstBitWidth == srcBitWidth &&
+ "Unsupported bitcast between vectors of different sizes.");
+
+ Src =
+ B.CreateIntrinsic(Intrinsic::spv_bitcast, {DstType, SrcType}, {Src});
+ buildAssignType(B, DstType, Src);
+ SrcType = DstType;
+
+ StoreInst *SI = B.CreateStore(Src, Dst);
+ SI->setAlignment(Alignment);
+ return SI;
+ }
+ assert(DstType->getNumElements() >= SrcType->getNumElements());
LoadInst *LI = B.CreateLoad(DstType, Dst);
LI->setAlignment(Alignment);
Value *OldValues = LI;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 02b20b3..931a10b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -13783,10 +13783,12 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// so prevents folding a load into this instruction or making a copy.
const int UnpackLoMask[] = {0, 0, 1, 1};
const int UnpackHiMask[] = {2, 2, 3, 3};
- if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2))
- Mask = UnpackLoMask;
- else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2))
- Mask = UnpackHiMask;
+ if (!isSingleElementRepeatedMask(Mask)) {
+ if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2))
+ Mask = UnpackLoMask;
+ else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2))
+ Mask = UnpackHiMask;
+ }
return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index cf6d0ec..e1e24a9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -318,18 +318,18 @@ Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
// * Single constant active lane -> store
// * Narrow width by halfs excluding zero/undef lanes
Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
+ Value *StorePtr = II.getArgOperand(1);
+ Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
if (!ConstMask)
return nullptr;
// If the mask is all zeros, this instruction does nothing.
- if (ConstMask->isNullValue())
+ if (maskIsAllZeroOrUndef(ConstMask))
return eraseInstFromFunction(II);
// If the mask is all ones, this is a plain vector store of the 1st argument.
- if (ConstMask->isAllOnesValue()) {
- Value *StorePtr = II.getArgOperand(1);
- Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
+ if (maskIsAllOneOrUndef(ConstMask)) {
StoreInst *S =
new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
S->copyMetadata(II);
@@ -389,7 +389,7 @@ Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
return nullptr;
// If the mask is all zeros, a scatter does nothing.
- if (ConstMask->isNullValue())
+ if (maskIsAllZeroOrUndef(ConstMask))
return eraseInstFromFunction(II);
// Vector splat address -> scalar store
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 87000a1..3df448d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -50,6 +50,9 @@
using namespace llvm;
using namespace PatternMatch;
+namespace llvm {
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+}
/// Replace a select operand based on an equality comparison with the identity
/// constant of a binop.
@@ -4492,8 +4495,21 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
auto FoldSelectWithAndOrCond = [&](bool IsAnd, Value *A,
Value *B) -> Instruction * {
if (Value *V = simplifySelectInst(B, TrueVal, FalseVal,
- SQ.getWithInstruction(&SI)))
- return SelectInst::Create(A, IsAnd ? V : TrueVal, IsAnd ? FalseVal : V);
+ SQ.getWithInstruction(&SI))) {
+ Value *NewTrueVal = IsAnd ? V : TrueVal;
+ Value *NewFalseVal = IsAnd ? FalseVal : V;
+
+ // If the True and False values don't change, then preserve the branch
+ // metadata of the original select as the net effect of this change is to
+ // simplify the conditional.
+ Instruction *MDFrom = nullptr;
+ if (NewTrueVal == TrueVal && NewFalseVal == FalseVal &&
+ !ProfcheckDisableMetadataFixes) {
+ MDFrom = &SI;
+ }
+ return SelectInst::Create(A, NewTrueVal, NewFalseVal, "", nullptr,
+ MDFrom);
+ }
// Is (select B, T, F) a SPF?
if (CondVal->hasOneUse() && SelType->isIntOrIntVectorTy()) {
diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 9d4fb79..d6b7633 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -1646,10 +1646,6 @@ NewGVN::performSymbolicPredicateInfoEvaluation(BitCastInst *I) const {
// Evaluate read only and pure calls, and create an expression result.
NewGVN::ExprResult NewGVN::performSymbolicCallEvaluation(Instruction *I) const {
auto *CI = cast<CallInst>(I);
- if (auto *II = dyn_cast<IntrinsicInst>(I)) {
- if (auto *ReturnedValue = II->getReturnedArgOperand())
- return ExprResult::some(createVariableOrConstant(ReturnedValue));
- }
// FIXME: Currently the calls which may access the thread id may
// be considered as not accessing the memory. But this is
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 43d61f2..a88cffc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3298,10 +3298,11 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
UI->getOpcode(), ValTy, Alignment, AS, Ctx.CostKind, OpInfo);
Type *PtrTy = isSingleScalar() ? ScalarPtrTy : toVectorTy(ScalarPtrTy, VF);
-
+ bool UsedByLoadStoreAddress = isUsedByLoadStoreAddress(this);
InstructionCost ScalarCost =
ScalarMemOpCost + Ctx.TTI.getAddressComputationCost(
- PtrTy, &Ctx.SE, nullptr, Ctx.CostKind);
+ PtrTy, UsedByLoadStoreAddress ? nullptr : &Ctx.SE,
+ nullptr, Ctx.CostKind);
if (isSingleScalar())
return ScalarCost;
@@ -3312,7 +3313,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
// vectorized addressing or the loaded value is used as part of an address
// of another load or store.
bool PreferVectorizedAddressing = Ctx.TTI.prefersVectorizedAddressing();
- if (PreferVectorizedAddressing || !isUsedByLoadStoreAddress(this)) {
+ if (PreferVectorizedAddressing || !UsedByLoadStoreAddress) {
bool EfficientVectorLoadStore =
Ctx.TTI.supportsEfficientVectorElementLoadStore();
if (!(IsLoad && !PreferVectorizedAddressing) &&
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
index 7872c02..461a7ef 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
@@ -177,7 +177,7 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
; GISEL-NEXT: neg v2.16b, v3.16b
; GISEL-NEXT: shl v3.16b, v4.16b, #7
; GISEL-NEXT: ushl v1.16b, v1.16b, v2.16b
-; GISEL-NEXT: sshr v2.16b, v3.16b, #7
+; GISEL-NEXT: cmlt v2.16b, v3.16b, #0
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
; GISEL-NEXT: ret
%div = udiv <16 x i8> %x, <i8 -64, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -229,7 +229,7 @@ define <8 x i16> @pr38477(<8 x i16> %a0) {
; GISEL-NEXT: add v1.8h, v2.8h, v1.8h
; GISEL-NEXT: neg v2.8h, v4.8h
; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
-; GISEL-NEXT: sshr v2.8h, v3.8h, #15
+; GISEL-NEXT: cmlt v2.8h, v3.8h, #0
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
; GISEL-NEXT: ret
%1 = udiv <8 x i16> %a0, <i16 1, i16 119, i16 73, i16 -111, i16 -3, i16 118, i16 32, i16 31>
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-sextinreg.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-sextinreg.mir
index 0b950b7..76d4d29 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-sextinreg.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-sextinreg.mir
@@ -14,8 +14,7 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[DUP:%[0-9]+]]:_(<4 x s32>) = G_DUP [[C]](s32)
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<4 x s32>) = G_SHL %v1, [[DUP]](<4 x s32>)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: %sext:_(<4 x s32>) = G_VASHR [[SHL]], [[C1]](s32)
+ ; CHECK-NEXT: %sext:_(<4 x s32>) = G_VASHR [[SHL]], 16
; CHECK-NEXT: $q0 = COPY %sext(<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%v1:_(<4 x s32>) = COPY $q0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-vashr-vlshr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-vashr-vlshr.mir
index b3fb5a4..dfaddba 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-vashr-vlshr.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-vashr-vlshr.mir
@@ -15,8 +15,7 @@ body: |
; CHECK: liveins: $d0, $d1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
- ; CHECK-NEXT: [[VASHR:%[0-9]+]]:_(<4 x s32>) = G_VASHR [[COPY]], [[C]](s32)
+ ; CHECK-NEXT: [[VASHR:%[0-9]+]]:_(<4 x s32>) = G_VASHR [[COPY]], 5
; CHECK-NEXT: $q0 = COPY [[VASHR]](<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
@@ -39,8 +38,7 @@ body: |
; CHECK: liveins: $d0, $d1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
- ; CHECK-NEXT: [[VLSHR:%[0-9]+]]:_(<4 x s32>) = G_VLSHR [[COPY]], [[C]](s32)
+ ; CHECK-NEXT: [[VLSHR:%[0-9]+]]:_(<4 x s32>) = G_VLSHR [[COPY]], 5
; CHECK-NEXT: $q0 = COPY [[VLSHR]](<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
@@ -63,8 +61,7 @@ body: |
; CHECK: liveins: $d0, $d1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
- ; CHECK-NEXT: [[VLSHR:%[0-9]+]]:_(<8 x s16>) = G_VLSHR [[COPY]], [[C]](s32)
+ ; CHECK-NEXT: [[VLSHR:%[0-9]+]]:_(<8 x s16>) = G_VLSHR [[COPY]], 5
; CHECK-NEXT: $q0 = COPY [[VLSHR]](<8 x s16>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<8 x s16>) = COPY $q0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-neon-vcvtfxu2fp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-neon-vcvtfxu2fp.mir
index c38e4a8..cf227cb 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-neon-vcvtfxu2fp.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-neon-vcvtfxu2fp.mir
@@ -29,7 +29,6 @@ body: |
; CHECK-NEXT: [[UCVTFd:%[0-9]+]]:fpr64 = UCVTFd [[COPY]], 12
; CHECK-NEXT: $d1 = COPY [[UCVTFd]]
%0(s64) = COPY $d0
- %1(s32) = G_CONSTANT i32 12
- %2(s64) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.vcvtfxu2fp.f64), %0, %1
+ %2(s64) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.vcvtfxu2fp.f64), %0, 12
$d1 = COPY %2(s64)
...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir
index 0706115..9fa6326 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir
@@ -499,8 +499,7 @@ body: |
; CHECK-NEXT: $d0 = COPY [[SSHRv4i16_shift]]
; CHECK-NEXT: RET_ReallyLR implicit $d0
%0:fpr(<4 x s16>) = COPY $d0
- %1:gpr(s32) = G_CONSTANT i32 5
- %2:fpr(<4 x s16>) = G_VASHR %0, %1
+ %2:fpr(<4 x s16>) = G_VASHR %0, 5
$d0 = COPY %2(<4 x s16>)
RET_ReallyLR implicit $d0
...
@@ -520,8 +519,7 @@ body: |
; CHECK-NEXT: $d0 = COPY [[USHRv4i16_shift]]
; CHECK-NEXT: RET_ReallyLR implicit $d0
%0:fpr(<4 x s16>) = COPY $d0
- %1:gpr(s32) = G_CONSTANT i32 5
- %2:fpr(<4 x s16>) = G_VLSHR %0, %1
+ %2:fpr(<4 x s16>) = G_VLSHR %0, 5
$d0 = COPY %2(<4 x s16>)
RET_ReallyLR implicit $d0
...
diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
index cdde110..63c08dd 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
@@ -902,7 +902,7 @@ define void @sink_v8z16_0(ptr %p, ptr %d, i64 %n, <16 x i8> %a) {
; CHECK-GI-NEXT: subs x2, x2, #8
; CHECK-GI-NEXT: add x8, x8, #8
; CHECK-GI-NEXT: umull v1.8h, v1.8b, v0.8b
-; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #15
+; CHECK-GI-NEXT: cmlt v1.8h, v1.8h, #0
; CHECK-GI-NEXT: xtn v1.8b, v1.8h
; CHECK-GI-NEXT: str d1, [x0], #32
; CHECK-GI-NEXT: b.ne .LBB8_1
@@ -967,8 +967,8 @@ define void @sink_v16s16_8(ptr %p, ptr %d, i64 %n, <16 x i8> %a) {
; CHECK-GI-NEXT: mov d2, v1.d[1]
; CHECK-GI-NEXT: smull v1.8h, v1.8b, v0.8b
; CHECK-GI-NEXT: smull v2.8h, v2.8b, v0.8b
-; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #15
-; CHECK-GI-NEXT: sshr v2.8h, v2.8h, #15
+; CHECK-GI-NEXT: cmlt v1.8h, v1.8h, #0
+; CHECK-GI-NEXT: cmlt v2.8h, v2.8h, #0
; CHECK-GI-NEXT: uzp1 v1.16b, v1.16b, v2.16b
; CHECK-GI-NEXT: str q1, [x0], #32
; CHECK-GI-NEXT: b.ne .LBB9_1
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
index 9bafc5b..2a8b3ce2 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
@@ -999,16 +999,10 @@ entry:
}
define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SD-LABEL: test_vaddhn_s16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: addhn v0.8b, v0.8h, v1.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vaddhn_s16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vaddhn_s16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addhn v0.8b, v0.8h, v1.8h
+; CHECK-NEXT: ret
entry:
%vaddhn.i = add <8 x i16> %a, %b
%vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -1017,16 +1011,10 @@ entry:
}
define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SD-LABEL: test_vaddhn_s32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: addhn v0.4h, v0.4s, v1.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vaddhn_s32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vaddhn_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addhn v0.4h, v0.4s, v1.4s
+; CHECK-NEXT: ret
entry:
%vaddhn.i = add <4 x i32> %a, %b
%vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16>
@@ -1035,16 +1023,10 @@ entry:
}
define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-SD-LABEL: test_vaddhn_s64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: addhn v0.2s, v0.2d, v1.2d
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vaddhn_s64:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vaddhn_s64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addhn v0.2s, v0.2d, v1.2d
+; CHECK-NEXT: ret
entry:
%vaddhn.i = add <2 x i64> %a, %b
%vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32>
@@ -1053,16 +1035,10 @@ entry:
}
define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SD-LABEL: test_vaddhn_u16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: addhn v0.8b, v0.8h, v1.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vaddhn_u16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vaddhn_u16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addhn v0.8b, v0.8h, v1.8h
+; CHECK-NEXT: ret
entry:
%vaddhn.i = add <8 x i16> %a, %b
%vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -1071,16 +1047,10 @@ entry:
}
define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SD-LABEL: test_vaddhn_u32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: addhn v0.4h, v0.4s, v1.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vaddhn_u32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vaddhn_u32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addhn v0.4h, v0.4s, v1.4s
+; CHECK-NEXT: ret
entry:
%vaddhn.i = add <4 x i32> %a, %b
%vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16>
@@ -1089,16 +1059,10 @@ entry:
}
define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-SD-LABEL: test_vaddhn_u64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: addhn v0.2s, v0.2d, v1.2d
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vaddhn_u64:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vaddhn_u64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addhn v0.2s, v0.2d, v1.2d
+; CHECK-NEXT: ret
entry:
%vaddhn.i = add <2 x i64> %a, %b
%vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32>
@@ -1115,9 +1079,8 @@ define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b)
;
; CHECK-GI-LABEL: test_vaddhn_high_s16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: add v1.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT: addhn v1.8b, v1.8h, v2.8h
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
@@ -1141,9 +1104,8 @@ define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b)
;
; CHECK-GI-LABEL: test_vaddhn_high_s32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: add v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT: addhn v1.4h, v1.4s, v2.4s
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
@@ -1167,9 +1129,8 @@ define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b)
;
; CHECK-GI-LABEL: test_vaddhn_high_s64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: add v1.2d, v1.2d, v2.2d
+; CHECK-GI-NEXT: addhn v1.2s, v1.2d, v2.2d
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
@@ -1193,9 +1154,8 @@ define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b)
;
; CHECK-GI-LABEL: test_vaddhn_high_u16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: add v1.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT: addhn v1.8b, v1.8h, v2.8h
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
@@ -1219,9 +1179,8 @@ define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b)
;
; CHECK-GI-LABEL: test_vaddhn_high_u32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: add v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT: addhn v1.4h, v1.4s, v2.4s
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
@@ -1245,9 +1204,8 @@ define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b)
;
; CHECK-GI-LABEL: test_vaddhn_high_u64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: add v1.2d, v1.2d, v2.2d
+; CHECK-GI-NEXT: addhn v1.2s, v1.2d, v2.2d
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
@@ -1461,16 +1419,10 @@ entry:
}
define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SD-LABEL: test_vsubhn_s16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: subhn v0.8b, v0.8h, v1.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vsubhn_s16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub v0.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vsubhn_s16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: subhn v0.8b, v0.8h, v1.8h
+; CHECK-NEXT: ret
entry:
%vsubhn.i = sub <8 x i16> %a, %b
%vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -1479,16 +1431,10 @@ entry:
}
define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SD-LABEL: test_vsubhn_s32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: subhn v0.4h, v0.4s, v1.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vsubhn_s32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub v0.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vsubhn_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: subhn v0.4h, v0.4s, v1.4s
+; CHECK-NEXT: ret
entry:
%vsubhn.i = sub <4 x i32> %a, %b
%vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16>
@@ -1497,16 +1443,10 @@ entry:
}
define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-SD-LABEL: test_vsubhn_s64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: subhn v0.2s, v0.2d, v1.2d
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vsubhn_s64:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub v0.2d, v0.2d, v1.2d
-; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vsubhn_s64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: subhn v0.2s, v0.2d, v1.2d
+; CHECK-NEXT: ret
entry:
%vsubhn.i = sub <2 x i64> %a, %b
%vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32>
@@ -1515,16 +1455,10 @@ entry:
}
define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SD-LABEL: test_vsubhn_u16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: subhn v0.8b, v0.8h, v1.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vsubhn_u16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub v0.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vsubhn_u16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: subhn v0.8b, v0.8h, v1.8h
+; CHECK-NEXT: ret
entry:
%vsubhn.i = sub <8 x i16> %a, %b
%vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -1533,16 +1467,10 @@ entry:
}
define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SD-LABEL: test_vsubhn_u32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: subhn v0.4h, v0.4s, v1.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vsubhn_u32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub v0.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vsubhn_u32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: subhn v0.4h, v0.4s, v1.4s
+; CHECK-NEXT: ret
entry:
%vsubhn.i = sub <4 x i32> %a, %b
%vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16>
@@ -1551,16 +1479,10 @@ entry:
}
define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-SD-LABEL: test_vsubhn_u64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: subhn v0.2s, v0.2d, v1.2d
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vsubhn_u64:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub v0.2d, v0.2d, v1.2d
-; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vsubhn_u64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: subhn v0.2s, v0.2d, v1.2d
+; CHECK-NEXT: ret
entry:
%vsubhn.i = sub <2 x i64> %a, %b
%vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32>
@@ -1577,9 +1499,8 @@ define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b)
;
; CHECK-GI-LABEL: test_vsubhn_high_s16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub v1.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT: subhn v1.8b, v1.8h, v2.8h
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
@@ -1603,9 +1524,8 @@ define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b)
;
; CHECK-GI-LABEL: test_vsubhn_high_s32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT: subhn v1.4h, v1.4s, v2.4s
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
@@ -1629,9 +1549,8 @@ define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b)
;
; CHECK-GI-LABEL: test_vsubhn_high_s64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub v1.2d, v1.2d, v2.2d
+; CHECK-GI-NEXT: subhn v1.2s, v1.2d, v2.2d
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
@@ -1655,9 +1574,8 @@ define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b)
;
; CHECK-GI-LABEL: test_vsubhn_high_u16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub v1.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT: subhn v1.8b, v1.8h, v2.8h
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
@@ -1681,9 +1599,8 @@ define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b)
;
; CHECK-GI-LABEL: test_vsubhn_high_u32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT: subhn v1.4h, v1.4s, v2.4s
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
@@ -1707,9 +1624,8 @@ define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b)
;
; CHECK-GI-LABEL: test_vsubhn_high_u64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub v1.2d, v1.2d, v2.2d
+; CHECK-GI-NEXT: subhn v1.2s, v1.2d, v2.2d
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
index 84879d1..03e6ca1 100644
--- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
@@ -524,8 +524,8 @@ define <32 x i8> @sext_v32i1(<32 x i1> %arg) {
; CHECK-GI-NEXT: mov.b v1[15], w9
; CHECK-GI-NEXT: shl.16b v0, v0, #7
; CHECK-GI-NEXT: shl.16b v1, v1, #7
-; CHECK-GI-NEXT: sshr.16b v0, v0, #7
-; CHECK-GI-NEXT: sshr.16b v1, v1, #7
+; CHECK-GI-NEXT: cmlt.16b v0, v0, #0
+; CHECK-GI-NEXT: cmlt.16b v1, v1, #0
; CHECK-GI-NEXT: ret
%res = sext <32 x i1> %arg to <32 x i8>
ret <32 x i8> %res
@@ -934,10 +934,10 @@ define <64 x i8> @sext_v64i1(<64 x i1> %arg) {
; CHECK-GI-NEXT: shl.16b v1, v1, #7
; CHECK-GI-NEXT: shl.16b v2, v2, #7
; CHECK-GI-NEXT: shl.16b v3, v3, #7
-; CHECK-GI-NEXT: sshr.16b v0, v0, #7
-; CHECK-GI-NEXT: sshr.16b v1, v1, #7
-; CHECK-GI-NEXT: sshr.16b v2, v2, #7
-; CHECK-GI-NEXT: sshr.16b v3, v3, #7
+; CHECK-GI-NEXT: cmlt.16b v0, v0, #0
+; CHECK-GI-NEXT: cmlt.16b v1, v1, #0
+; CHECK-GI-NEXT: cmlt.16b v2, v2, #0
+; CHECK-GI-NEXT: cmlt.16b v3, v3, #0
; CHECK-GI-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-GI-NEXT: ret
%res = sext <64 x i1> %arg to <64 x i8>
diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
index c408d7f..a3f4722 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -1914,21 +1914,13 @@ define <2 x i128> @uabd_i64(<2 x i64> %a, <2 x i64> %b) {
}
define <8 x i16> @pr88784(<8 x i8> %l0, <8 x i8> %l1, <8 x i16> %l2) {
-; CHECK-SD-LABEL: pr88784:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: usubl.8h v0, v0, v1
-; CHECK-SD-NEXT: cmlt.8h v1, v2, #0
-; CHECK-SD-NEXT: ssra.8h v0, v2, #15
-; CHECK-SD-NEXT: eor.16b v0, v1, v0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: pr88784:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: usubl.8h v0, v0, v1
-; CHECK-GI-NEXT: sshr.8h v1, v2, #15
-; CHECK-GI-NEXT: ssra.8h v0, v2, #15
-; CHECK-GI-NEXT: eor.16b v0, v1, v0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: pr88784:
+; CHECK: // %bb.0:
+; CHECK-NEXT: usubl.8h v0, v0, v1
+; CHECK-NEXT: cmlt.8h v1, v2, #0
+; CHECK-NEXT: ssra.8h v0, v2, #15
+; CHECK-NEXT: eor.16b v0, v1, v0
+; CHECK-NEXT: ret
%l4 = zext <8 x i8> %l0 to <8 x i16>
%l5 = ashr <8 x i16> %l2, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%l6 = zext <8 x i8> %l1 to <8 x i16>
@@ -1947,7 +1939,7 @@ define <8 x i16> @pr88784_fixed(<8 x i8> %l0, <8 x i8> %l1, <8 x i16> %l2) {
; CHECK-GI-LABEL: pr88784_fixed:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: usubl.8h v0, v0, v1
-; CHECK-GI-NEXT: sshr.8h v1, v0, #15
+; CHECK-GI-NEXT: cmlt.8h v1, v0, #0
; CHECK-GI-NEXT: ssra.8h v0, v0, #15
; CHECK-GI-NEXT: eor.16b v0, v1, v0
; CHECK-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/arm64-vadd.ll b/llvm/test/CodeGen/AArch64/arm64-vadd.ll
index 11fb732..938712a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vadd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vadd.ll
@@ -1103,20 +1103,12 @@ define <2 x i64> @ssubl2_duplhs(i32 %lhs, <4 x i32> %rhs) {
}
define <8 x i8> @addhn8b_natural(ptr %A, ptr %B) nounwind {
-; CHECK-SD-LABEL: addhn8b_natural:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr q0, [x0]
-; CHECK-SD-NEXT: ldr q1, [x1]
-; CHECK-SD-NEXT: addhn v0.8b, v0.8h, v1.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: addhn8b_natural:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q0, [x0]
-; CHECK-GI-NEXT: ldr q1, [x1]
-; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: addhn8b_natural:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr q1, [x1]
+; CHECK-NEXT: addhn v0.8b, v0.8h, v1.8h
+; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%sum = add <8 x i16> %tmp1, %tmp2
@@ -1126,20 +1118,12 @@ define <8 x i8> @addhn8b_natural(ptr %A, ptr %B) nounwind {
}
define <4 x i16> @addhn4h_natural(ptr %A, ptr %B) nounwind {
-; CHECK-SD-LABEL: addhn4h_natural:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr q0, [x0]
-; CHECK-SD-NEXT: ldr q1, [x1]
-; CHECK-SD-NEXT: addhn v0.4h, v0.4s, v1.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: addhn4h_natural:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q0, [x0]
-; CHECK-GI-NEXT: ldr q1, [x1]
-; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: addhn4h_natural:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr q1, [x1]
+; CHECK-NEXT: addhn v0.4h, v0.4s, v1.4s
+; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%sum = add <4 x i32> %tmp1, %tmp2
@@ -1149,20 +1133,12 @@ define <4 x i16> @addhn4h_natural(ptr %A, ptr %B) nounwind {
}
define <2 x i32> @addhn2s_natural(ptr %A, ptr %B) nounwind {
-; CHECK-SD-LABEL: addhn2s_natural:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr q0, [x0]
-; CHECK-SD-NEXT: ldr q1, [x1]
-; CHECK-SD-NEXT: addhn v0.2s, v0.2d, v1.2d
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: addhn2s_natural:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q0, [x0]
-; CHECK-GI-NEXT: ldr q1, [x1]
-; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: addhn2s_natural:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr q1, [x1]
+; CHECK-NEXT: addhn v0.2s, v0.2d, v1.2d
+; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i64>, ptr %B
%sum = add <2 x i64> %tmp1, %tmp2
@@ -1172,22 +1148,13 @@ define <2 x i32> @addhn2s_natural(ptr %A, ptr %B) nounwind {
}
define <16 x i8> @addhn2_16b_natural(<8 x i8> %low, ptr %A, ptr %B) nounwind {
-; CHECK-SD-LABEL: addhn2_16b_natural:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr q1, [x0]
-; CHECK-SD-NEXT: ldr q2, [x1]
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: addhn2 v0.16b, v1.8h, v2.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: addhn2_16b_natural:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q1, [x0]
-; CHECK-GI-NEXT: ldr q2, [x1]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: add v1.8h, v1.8h, v2.8h
-; CHECK-GI-NEXT: shrn2 v0.16b, v1.8h, #8
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: addhn2_16b_natural:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q1, [x0]
+; CHECK-NEXT: ldr q2, [x1]
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: addhn2 v0.16b, v1.8h, v2.8h
+; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%sum = add <8 x i16> %tmp1, %tmp2
@@ -1198,22 +1165,13 @@ define <16 x i8> @addhn2_16b_natural(<8 x i8> %low, ptr %A, ptr %B) nounwind {
}
define <8 x i16> @addhn2_8h_natural(<4 x i16> %low, ptr %A, ptr %B) nounwind {
-; CHECK-SD-LABEL: addhn2_8h_natural:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr q1, [x0]
-; CHECK-SD-NEXT: ldr q2, [x1]
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: addhn2 v0.8h, v1.4s, v2.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: addhn2_8h_natural:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q1, [x0]
-; CHECK-GI-NEXT: ldr q2, [x1]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: add v1.4s, v1.4s, v2.4s
-; CHECK-GI-NEXT: shrn2 v0.8h, v1.4s, #16
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: addhn2_8h_natural:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q1, [x0]
+; CHECK-NEXT: ldr q2, [x1]
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: addhn2 v0.8h, v1.4s, v2.4s
+; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%sum = add <4 x i32> %tmp1, %tmp2
@@ -1224,22 +1182,13 @@ define <8 x i16> @addhn2_8h_natural(<4 x i16> %low, ptr %A, ptr %B) nounwind {
}
define <4 x i32> @addhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind {
-; CHECK-SD-LABEL: addhn2_4s_natural:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr q1, [x0]
-; CHECK-SD-NEXT: ldr q2, [x1]
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: addhn2 v0.4s, v1.2d, v2.2d
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: addhn2_4s_natural:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q1, [x0]
-; CHECK-GI-NEXT: ldr q2, [x1]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: add v1.2d, v1.2d, v2.2d
-; CHECK-GI-NEXT: shrn2 v0.4s, v1.2d, #32
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: addhn2_4s_natural:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q1, [x0]
+; CHECK-NEXT: ldr q2, [x1]
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: addhn2 v0.4s, v1.2d, v2.2d
+; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i64>, ptr %B
%sum = add <2 x i64> %tmp1, %tmp2
@@ -1250,22 +1199,13 @@ define <4 x i32> @addhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind {
}
define <4 x i32> @addhn_addhn2_4s(ptr %A, ptr %B, ptr %C, ptr %D) nounwind {
-; CHECK-SD-LABEL: addhn_addhn2_4s:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr q1, [x0]
-; CHECK-SD-NEXT: ldr q2, [x1]
-; CHECK-SD-NEXT: addhn v0.2s, v1.2d, v2.2d
-; CHECK-SD-NEXT: addhn2 v0.4s, v1.2d, v2.2d
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: addhn_addhn2_4s:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q0, [x0]
-; CHECK-GI-NEXT: ldr q1, [x1]
-; CHECK-GI-NEXT: add v1.2d, v0.2d, v1.2d
-; CHECK-GI-NEXT: shrn v0.2s, v1.2d, #32
-; CHECK-GI-NEXT: shrn2 v0.4s, v1.2d, #32
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: addhn_addhn2_4s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q1, [x0]
+; CHECK-NEXT: ldr q2, [x1]
+; CHECK-NEXT: addhn v0.2s, v1.2d, v2.2d
+; CHECK-NEXT: addhn2 v0.4s, v1.2d, v2.2d
+; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i64>, ptr %B
%sum1 = add <2 x i64> %tmp1, %tmp2
@@ -1281,20 +1221,12 @@ define <4 x i32> @addhn_addhn2_4s(ptr %A, ptr %B, ptr %C, ptr %D) nounwind {
}
define <8 x i8> @subhn8b_natural(ptr %A, ptr %B) nounwind {
-; CHECK-SD-LABEL: subhn8b_natural:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr q0, [x0]
-; CHECK-SD-NEXT: ldr q1, [x1]
-; CHECK-SD-NEXT: subhn v0.8b, v0.8h, v1.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: subhn8b_natural:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q0, [x0]
-; CHECK-GI-NEXT: ldr q1, [x1]
-; CHECK-GI-NEXT: sub v0.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: subhn8b_natural:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr q1, [x1]
+; CHECK-NEXT: subhn v0.8b, v0.8h, v1.8h
+; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%diff = sub <8 x i16> %tmp1, %tmp2
@@ -1304,20 +1236,12 @@ define <8 x i8> @subhn8b_natural(ptr %A, ptr %B) nounwind {
}
define <4 x i16> @subhn4h_natural(ptr %A, ptr %B) nounwind {
-; CHECK-SD-LABEL: subhn4h_natural:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr q0, [x0]
-; CHECK-SD-NEXT: ldr q1, [x1]
-; CHECK-SD-NEXT: subhn v0.4h, v0.4s, v1.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: subhn4h_natural:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q0, [x0]
-; CHECK-GI-NEXT: ldr q1, [x1]
-; CHECK-GI-NEXT: sub v0.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: subhn4h_natural:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr q1, [x1]
+; CHECK-NEXT: subhn v0.4h, v0.4s, v1.4s
+; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%diff = sub <4 x i32> %tmp1, %tmp2
@@ -1327,20 +1251,12 @@ define <4 x i16> @subhn4h_natural(ptr %A, ptr %B) nounwind {
}
define <2 x i32> @subhn2s_natural(ptr %A, ptr %B) nounwind {
-; CHECK-SD-LABEL: subhn2s_natural:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr q0, [x0]
-; CHECK-SD-NEXT: ldr q1, [x1]
-; CHECK-SD-NEXT: subhn v0.2s, v0.2d, v1.2d
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: subhn2s_natural:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q0, [x0]
-; CHECK-GI-NEXT: ldr q1, [x1]
-; CHECK-GI-NEXT: sub v0.2d, v0.2d, v1.2d
-; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: subhn2s_natural:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr q1, [x1]
+; CHECK-NEXT: subhn v0.2s, v0.2d, v1.2d
+; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i64>, ptr %B
%diff = sub <2 x i64> %tmp1, %tmp2
@@ -1350,22 +1266,13 @@ define <2 x i32> @subhn2s_natural(ptr %A, ptr %B) nounwind {
}
define <16 x i8> @subhn2_16b_natural(<8 x i8> %low, ptr %A, ptr %B) nounwind {
-; CHECK-SD-LABEL: subhn2_16b_natural:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr q1, [x0]
-; CHECK-SD-NEXT: ldr q2, [x1]
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: subhn2 v0.16b, v1.8h, v2.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: subhn2_16b_natural:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q1, [x0]
-; CHECK-GI-NEXT: ldr q2, [x1]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: sub v1.8h, v1.8h, v2.8h
-; CHECK-GI-NEXT: shrn2 v0.16b, v1.8h, #8
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: subhn2_16b_natural:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q1, [x0]
+; CHECK-NEXT: ldr q2, [x1]
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: subhn2 v0.16b, v1.8h, v2.8h
+; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%diff = sub <8 x i16> %tmp1, %tmp2
@@ -1376,22 +1283,13 @@ define <16 x i8> @subhn2_16b_natural(<8 x i8> %low, ptr %A, ptr %B) nounwind {
}
define <8 x i16> @subhn2_8h_natural(<4 x i16> %low, ptr %A, ptr %B) nounwind {
-; CHECK-SD-LABEL: subhn2_8h_natural:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr q1, [x0]
-; CHECK-SD-NEXT: ldr q2, [x1]
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: subhn2 v0.8h, v1.4s, v2.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: subhn2_8h_natural:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q1, [x0]
-; CHECK-GI-NEXT: ldr q2, [x1]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: sub v1.4s, v1.4s, v2.4s
-; CHECK-GI-NEXT: shrn2 v0.8h, v1.4s, #16
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: subhn2_8h_natural:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q1, [x0]
+; CHECK-NEXT: ldr q2, [x1]
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: subhn2 v0.8h, v1.4s, v2.4s
+; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%diff = sub <4 x i32> %tmp1, %tmp2
@@ -1402,22 +1300,13 @@ define <8 x i16> @subhn2_8h_natural(<4 x i16> %low, ptr %A, ptr %B) nounwind {
}
define <4 x i32> @subhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind {
-; CHECK-SD-LABEL: subhn2_4s_natural:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr q1, [x0]
-; CHECK-SD-NEXT: ldr q2, [x1]
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: subhn2 v0.4s, v1.2d, v2.2d
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: subhn2_4s_natural:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q1, [x0]
-; CHECK-GI-NEXT: ldr q2, [x1]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: sub v1.2d, v1.2d, v2.2d
-; CHECK-GI-NEXT: shrn2 v0.4s, v1.2d, #32
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: subhn2_4s_natural:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q1, [x0]
+; CHECK-NEXT: ldr q2, [x1]
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: subhn2 v0.4s, v1.2d, v2.2d
+; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i64>, ptr %B
%diff = sub <2 x i64> %tmp1, %tmp2
@@ -1428,20 +1317,12 @@ define <4 x i32> @subhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind {
}
define <16 x i8> @neg_narrow_i8(<16 x i16> %a) {
-; CHECK-SD-LABEL: neg_narrow_i8:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: movi v2.2d, #0xffffffffffffffff
-; CHECK-SD-NEXT: subhn v0.8b, v2.8h, v0.8h
-; CHECK-SD-NEXT: subhn2 v0.16b, v2.8h, v1.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: neg_narrow_i8:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mvn v0.16b, v0.16b
-; CHECK-GI-NEXT: mvn v1.16b, v1.16b
-; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8
-; CHECK-GI-NEXT: shrn2 v0.16b, v1.8h, #8
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: neg_narrow_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff
+; CHECK-NEXT: subhn v0.8b, v2.8h, v0.8h
+; CHECK-NEXT: subhn2 v0.16b, v2.8h, v1.8h
+; CHECK-NEXT: ret
%not.i = xor <16 x i16> %a, splat (i16 -1)
%s = lshr <16 x i16> %not.i, splat (i16 8)
%vshrn_n = trunc nuw <16 x i16> %s to <16 x i8>
@@ -1449,20 +1330,12 @@ define <16 x i8> @neg_narrow_i8(<16 x i16> %a) {
}
define <8 x i16> @neg_narrow_i16(<8 x i32> %a) {
-; CHECK-SD-LABEL: neg_narrow_i16:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: movi v2.2d, #0xffffffffffffffff
-; CHECK-SD-NEXT: subhn v0.4h, v2.4s, v0.4s
-; CHECK-SD-NEXT: subhn2 v0.8h, v2.4s, v1.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: neg_narrow_i16:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mvn v0.16b, v0.16b
-; CHECK-GI-NEXT: mvn v1.16b, v1.16b
-; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16
-; CHECK-GI-NEXT: shrn2 v0.8h, v1.4s, #16
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: neg_narrow_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff
+; CHECK-NEXT: subhn v0.4h, v2.4s, v0.4s
+; CHECK-NEXT: subhn2 v0.8h, v2.4s, v1.4s
+; CHECK-NEXT: ret
%not.i = xor <8 x i32> %a, splat (i32 -1)
%s = lshr <8 x i32> %not.i, splat (i32 16)
%vshrn_n = trunc nuw <8 x i32> %s to <8 x i16>
@@ -1470,20 +1343,12 @@ define <8 x i16> @neg_narrow_i16(<8 x i32> %a) {
}
define <4 x i32> @neg_narrow_i32(<4 x i64> %a) {
-; CHECK-SD-LABEL: neg_narrow_i32:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: movi v2.2d, #0xffffffffffffffff
-; CHECK-SD-NEXT: subhn v0.2s, v2.2d, v0.2d
-; CHECK-SD-NEXT: subhn2 v0.4s, v2.2d, v1.2d
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: neg_narrow_i32:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mvn v0.16b, v0.16b
-; CHECK-GI-NEXT: mvn v1.16b, v1.16b
-; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32
-; CHECK-GI-NEXT: shrn2 v0.4s, v1.2d, #32
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: neg_narrow_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff
+; CHECK-NEXT: subhn v0.2s, v2.2d, v0.2d
+; CHECK-NEXT: subhn2 v0.4s, v2.2d, v1.2d
+; CHECK-NEXT: ret
%not.i = xor <4 x i64> %a, splat (i64 -1)
%s = lshr <4 x i64> %not.i, splat (i64 32)
%vshrn_n = trunc nuw <4 x i64> %s to <4 x i32>
diff --git a/llvm/test/CodeGen/AArch64/combine-sdiv.ll b/llvm/test/CodeGen/AArch64/combine-sdiv.ll
index 9d0ade2..dc88f94 100644
--- a/llvm/test/CodeGen/AArch64/combine-sdiv.ll
+++ b/llvm/test/CodeGen/AArch64/combine-sdiv.ll
@@ -66,9 +66,9 @@ define <4 x i32> @combine_vec_sdiv_by_minsigned(<4 x i32> %x) {
;
; CHECK-GI-LABEL: combine_vec_sdiv_by_minsigned:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v1.4s, v0.4s, #31
+; CHECK-GI-NEXT: cmlt v1.4s, v0.4s, #0
; CHECK-GI-NEXT: usra v0.4s, v1.4s, #1
-; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-GI-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-GI-NEXT: neg v0.4s, v0.4s
; CHECK-GI-NEXT: ret
%1 = sdiv <4 x i32> %x, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
@@ -176,7 +176,7 @@ define <4 x i32> @combine_vec_sdiv_by_pos1(<4 x i32> %x) {
; CHECK-GI-NEXT: mov v1.s[2], w9
; CHECK-GI-NEXT: mov v1.s[3], w9
; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31
-; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31
+; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0
; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT: ret
%1 = and <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
@@ -185,39 +185,24 @@ define <4 x i32> @combine_vec_sdiv_by_pos1(<4 x i32> %x) {
}
define <4 x i32> @combine_vec_sdiv_by_pow2a(<4 x i32> %x) {
-; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2a:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v1.4s, v0.4s, #0
-; CHECK-SD-NEXT: usra v0.4s, v1.4s, #30
-; CHECK-SD-NEXT: sshr v0.4s, v0.4s, #2
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2a:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v1.4s, v0.4s, #31
-; CHECK-GI-NEXT: usra v0.4s, v1.4s, #30
-; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #2
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: combine_vec_sdiv_by_pow2a:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v1.4s, v0.4s, #0
+; CHECK-NEXT: usra v0.4s, v1.4s, #30
+; CHECK-NEXT: sshr v0.4s, v0.4s, #2
+; CHECK-NEXT: ret
%1 = sdiv <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4>
ret <4 x i32> %1
}
define <4 x i32> @combine_vec_sdiv_by_pow2a_neg(<4 x i32> %x) {
-; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2a_neg:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v1.4s, v0.4s, #0
-; CHECK-SD-NEXT: usra v0.4s, v1.4s, #30
-; CHECK-SD-NEXT: sshr v0.4s, v0.4s, #2
-; CHECK-SD-NEXT: neg v0.4s, v0.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2a_neg:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v1.4s, v0.4s, #31
-; CHECK-GI-NEXT: usra v0.4s, v1.4s, #30
-; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #2
-; CHECK-GI-NEXT: neg v0.4s, v0.4s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: combine_vec_sdiv_by_pow2a_neg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v1.4s, v0.4s, #0
+; CHECK-NEXT: usra v0.4s, v1.4s, #30
+; CHECK-NEXT: sshr v0.4s, v0.4s, #2
+; CHECK-NEXT: neg v0.4s, v0.4s
+; CHECK-NEXT: ret
%1 = sdiv <4 x i32> %x, <i32 -4, i32 -4, i32 -4, i32 -4>
ret <4 x i32> %1
}
@@ -240,7 +225,7 @@ define <16 x i8> @combine_vec_sdiv_by_pow2b_v16i8(<16 x i8> %x) {
; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v16i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI14_1
-; CHECK-GI-NEXT: sshr v2.16b, v0.16b, #7
+; CHECK-GI-NEXT: cmlt v2.16b, v0.16b, #0
; CHECK-GI-NEXT: adrp x9, .LCPI14_0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI14_1]
; CHECK-GI-NEXT: adrp x8, .LCPI14_2
@@ -252,7 +237,7 @@ define <16 x i8> @combine_vec_sdiv_by_pow2b_v16i8(<16 x i8> %x) {
; CHECK-GI-NEXT: neg v2.16b, v2.16b
; CHECK-GI-NEXT: add v1.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: sshl v1.16b, v1.16b, v2.16b
-; CHECK-GI-NEXT: sshr v2.16b, v3.16b, #7
+; CHECK-GI-NEXT: cmlt v2.16b, v3.16b, #0
; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-GI-NEXT: ret
%1 = sdiv <16 x i8> %x, <i8 1, i8 4, i8 2, i8 16, i8 8, i8 32, i8 64, i8 2, i8 1, i8 4, i8 2, i8 16, i8 8, i8 32, i8 64, i8 2>
@@ -278,7 +263,7 @@ define <8 x i16> @combine_vec_sdiv_by_pow2b_v8i16(<8 x i16> %x) {
; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v8i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI15_1
-; CHECK-GI-NEXT: sshr v2.8h, v0.8h, #15
+; CHECK-GI-NEXT: cmlt v2.8h, v0.8h, #0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI15_1]
; CHECK-GI-NEXT: adrp x8, .LCPI15_0
; CHECK-GI-NEXT: ldr d3, [x8, :lo12:.LCPI15_0]
@@ -291,7 +276,7 @@ define <8 x i16> @combine_vec_sdiv_by_pow2b_v8i16(<8 x i16> %x) {
; CHECK-GI-NEXT: add v1.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: shl v2.8h, v2.8h, #15
; CHECK-GI-NEXT: sshl v1.8h, v1.8h, v3.8h
-; CHECK-GI-NEXT: sshr v2.8h, v2.8h, #15
+; CHECK-GI-NEXT: cmlt v2.8h, v2.8h, #0
; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-GI-NEXT: ret
%1 = sdiv <8 x i16> %x, <i16 1, i16 4, i16 2, i16 16, i16 8, i16 32, i16 64, i16 2>
@@ -322,8 +307,8 @@ define <16 x i16> @combine_vec_sdiv_by_pow2b_v16i16(<16 x i16> %x) {
; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v16i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI16_1
-; CHECK-GI-NEXT: sshr v3.8h, v0.8h, #15
-; CHECK-GI-NEXT: sshr v4.8h, v1.8h, #15
+; CHECK-GI-NEXT: cmlt v3.8h, v0.8h, #0
+; CHECK-GI-NEXT: cmlt v4.8h, v1.8h, #0
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI16_1]
; CHECK-GI-NEXT: adrp x8, .LCPI16_0
; CHECK-GI-NEXT: ldr d5, [x8, :lo12:.LCPI16_0]
@@ -339,7 +324,7 @@ define <16 x i16> @combine_vec_sdiv_by_pow2b_v16i16(<16 x i16> %x) {
; CHECK-GI-NEXT: add v2.8h, v1.8h, v2.8h
; CHECK-GI-NEXT: sshl v3.8h, v3.8h, v4.8h
; CHECK-GI-NEXT: sshl v2.8h, v2.8h, v4.8h
-; CHECK-GI-NEXT: sshr v4.8h, v5.8h, #15
+; CHECK-GI-NEXT: cmlt v4.8h, v5.8h, #0
; CHECK-GI-NEXT: bif v0.16b, v3.16b, v4.16b
; CHECK-GI-NEXT: bif v1.16b, v2.16b, v4.16b
; CHECK-GI-NEXT: ret
@@ -381,12 +366,12 @@ define <32 x i16> @combine_vec_sdiv_by_pow2b_v32i16(<32 x i16> %x) {
; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v32i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI17_1
-; CHECK-GI-NEXT: sshr v5.8h, v0.8h, #15
-; CHECK-GI-NEXT: sshr v6.8h, v1.8h, #15
+; CHECK-GI-NEXT: cmlt v5.8h, v0.8h, #0
+; CHECK-GI-NEXT: cmlt v6.8h, v1.8h, #0
; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI17_1]
; CHECK-GI-NEXT: adrp x8, .LCPI17_0
-; CHECK-GI-NEXT: sshr v7.8h, v2.8h, #15
-; CHECK-GI-NEXT: sshr v16.8h, v3.8h, #15
+; CHECK-GI-NEXT: cmlt v7.8h, v2.8h, #0
+; CHECK-GI-NEXT: cmlt v16.8h, v3.8h, #0
; CHECK-GI-NEXT: ldr d17, [x8, :lo12:.LCPI17_0]
; CHECK-GI-NEXT: adrp x8, .LCPI17_2
; CHECK-GI-NEXT: neg v4.8h, v4.8h
@@ -402,7 +387,7 @@ define <32 x i16> @combine_vec_sdiv_by_pow2b_v32i16(<32 x i16> %x) {
; CHECK-GI-NEXT: add v6.8h, v1.8h, v6.8h
; CHECK-GI-NEXT: add v7.8h, v2.8h, v7.8h
; CHECK-GI-NEXT: add v4.8h, v3.8h, v4.8h
-; CHECK-GI-NEXT: sshr v17.8h, v17.8h, #15
+; CHECK-GI-NEXT: cmlt v17.8h, v17.8h, #0
; CHECK-GI-NEXT: sshl v5.8h, v5.8h, v16.8h
; CHECK-GI-NEXT: sshl v6.8h, v6.8h, v16.8h
; CHECK-GI-NEXT: sshl v7.8h, v7.8h, v16.8h
@@ -436,7 +421,7 @@ define <4 x i32> @combine_vec_sdiv_by_pow2b_v4i32(<4 x i32> %x) {
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: mov w9, #0 // =0x0
-; CHECK-GI-NEXT: sshr v3.4s, v0.4s, #31
+; CHECK-GI-NEXT: cmlt v3.4s, v0.4s, #0
; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: adrp x8, .LCPI18_0
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI18_0]
@@ -451,7 +436,7 @@ define <4 x i32> @combine_vec_sdiv_by_pow2b_v4i32(<4 x i32> %x) {
; CHECK-GI-NEXT: mov v1.s[3], w9
; CHECK-GI-NEXT: sshl v2.4s, v2.4s, v3.4s
; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31
-; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31
+; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0
; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT: ret
%1 = sdiv <4 x i32> %x, <i32 1, i32 4, i32 8, i32 16>
@@ -483,10 +468,10 @@ define <8 x i32> @combine_vec_sdiv_by_pow2b_v8i32(<8 x i32> %x) {
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: mov w9, #0 // =0x0
-; CHECK-GI-NEXT: sshr v4.4s, v0.4s, #31
+; CHECK-GI-NEXT: cmlt v4.4s, v0.4s, #0
; CHECK-GI-NEXT: fmov s2, w8
; CHECK-GI-NEXT: adrp x8, .LCPI19_0
-; CHECK-GI-NEXT: sshr v5.4s, v1.4s, #31
+; CHECK-GI-NEXT: cmlt v5.4s, v1.4s, #0
; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI19_0]
; CHECK-GI-NEXT: adrp x8, .LCPI19_1
; CHECK-GI-NEXT: mov v2.h[1], w9
@@ -503,7 +488,7 @@ define <8 x i32> @combine_vec_sdiv_by_pow2b_v8i32(<8 x i32> %x) {
; CHECK-GI-NEXT: sshl v3.4s, v3.4s, v5.4s
; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0
; CHECK-GI-NEXT: shl v2.4s, v2.4s, #31
-; CHECK-GI-NEXT: sshr v2.4s, v2.4s, #31
+; CHECK-GI-NEXT: cmlt v2.4s, v2.4s, #0
; CHECK-GI-NEXT: bif v0.16b, v4.16b, v2.16b
; CHECK-GI-NEXT: bif v1.16b, v3.16b, v2.16b
; CHECK-GI-NEXT: ret
@@ -546,13 +531,13 @@ define <16 x i32> @combine_vec_sdiv_by_pow2b_v16i32(<16 x i32> %x) {
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: mov w9, #0 // =0x0
-; CHECK-GI-NEXT: sshr v6.4s, v0.4s, #31
+; CHECK-GI-NEXT: cmlt v6.4s, v0.4s, #0
; CHECK-GI-NEXT: fmov s4, w8
; CHECK-GI-NEXT: adrp x8, .LCPI20_0
-; CHECK-GI-NEXT: sshr v7.4s, v1.4s, #31
+; CHECK-GI-NEXT: cmlt v7.4s, v1.4s, #0
; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI20_0]
-; CHECK-GI-NEXT: sshr v16.4s, v2.4s, #31
-; CHECK-GI-NEXT: sshr v17.4s, v3.4s, #31
+; CHECK-GI-NEXT: cmlt v16.4s, v2.4s, #0
+; CHECK-GI-NEXT: cmlt v17.4s, v3.4s, #0
; CHECK-GI-NEXT: adrp x8, .LCPI20_1
; CHECK-GI-NEXT: mov v4.h[1], w9
; CHECK-GI-NEXT: neg v5.4s, v5.4s
@@ -574,7 +559,7 @@ define <16 x i32> @combine_vec_sdiv_by_pow2b_v16i32(<16 x i32> %x) {
; CHECK-GI-NEXT: sshl v5.4s, v5.4s, v17.4s
; CHECK-GI-NEXT: ushll v4.4s, v4.4h, #0
; CHECK-GI-NEXT: shl v4.4s, v4.4s, #31
-; CHECK-GI-NEXT: sshr v4.4s, v4.4s, #31
+; CHECK-GI-NEXT: cmlt v4.4s, v4.4s, #0
; CHECK-GI-NEXT: bif v0.16b, v6.16b, v4.16b
; CHECK-GI-NEXT: bif v1.16b, v7.16b, v4.16b
; CHECK-GI-NEXT: bif v2.16b, v16.16b, v4.16b
@@ -603,7 +588,7 @@ define <2 x i64> @combine_vec_sdiv_by_pow2b_v2i64(<2 x i64> %x) {
; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v2i64:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI21_1
-; CHECK-GI-NEXT: sshr v2.2d, v0.2d, #63
+; CHECK-GI-NEXT: cmlt v2.2d, v0.2d, #0
; CHECK-GI-NEXT: adrp x9, .LCPI21_0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI21_1]
; CHECK-GI-NEXT: adrp x8, .LCPI21_2
@@ -615,7 +600,7 @@ define <2 x i64> @combine_vec_sdiv_by_pow2b_v2i64(<2 x i64> %x) {
; CHECK-GI-NEXT: neg v2.2d, v2.2d
; CHECK-GI-NEXT: add v1.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: sshl v1.2d, v1.2d, v2.2d
-; CHECK-GI-NEXT: sshr v2.2d, v3.2d, #63
+; CHECK-GI-NEXT: cmlt v2.2d, v3.2d, #0
; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-GI-NEXT: ret
%1 = sdiv <2 x i64> %x, <i64 1, i64 4>
@@ -649,7 +634,7 @@ define <4 x i64> @combine_vec_sdiv_by_pow2b_v4i64(<4 x i64> %x) {
; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v4i64:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI22_2
-; CHECK-GI-NEXT: sshr v3.2d, v0.2d, #63
+; CHECK-GI-NEXT: cmlt v3.2d, v0.2d, #0
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI22_2]
; CHECK-GI-NEXT: adrp x8, .LCPI22_1
; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI22_1]
@@ -662,13 +647,13 @@ define <4 x i64> @combine_vec_sdiv_by_pow2b_v4i64(<4 x i64> %x) {
; CHECK-GI-NEXT: adrp x8, .LCPI22_3
; CHECK-GI-NEXT: neg v5.2d, v5.2d
; CHECK-GI-NEXT: ushl v2.2d, v3.2d, v2.2d
-; CHECK-GI-NEXT: sshr v3.2d, v1.2d, #63
+; CHECK-GI-NEXT: cmlt v3.2d, v1.2d, #0
; CHECK-GI-NEXT: shl v6.2d, v6.2d, #63
; CHECK-GI-NEXT: add v2.2d, v0.2d, v2.2d
; CHECK-GI-NEXT: ushl v3.2d, v3.2d, v4.2d
; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI22_3]
; CHECK-GI-NEXT: sshl v2.2d, v2.2d, v5.2d
-; CHECK-GI-NEXT: sshr v5.2d, v6.2d, #63
+; CHECK-GI-NEXT: cmlt v5.2d, v6.2d, #0
; CHECK-GI-NEXT: add v1.2d, v1.2d, v3.2d
; CHECK-GI-NEXT: neg v3.2d, v4.2d
; CHECK-GI-NEXT: bif v0.16b, v2.16b, v5.16b
@@ -715,13 +700,13 @@ define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) {
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: mov w9, #0 // =0x0
-; CHECK-GI-NEXT: sshr v7.2d, v0.2d, #63
+; CHECK-GI-NEXT: cmlt v7.2d, v0.2d, #0
; CHECK-GI-NEXT: fmov s4, w8
; CHECK-GI-NEXT: adrp x8, .LCPI23_1
-; CHECK-GI-NEXT: sshr v16.2d, v1.2d, #63
+; CHECK-GI-NEXT: cmlt v16.2d, v1.2d, #0
; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI23_1]
-; CHECK-GI-NEXT: sshr v17.2d, v2.2d, #63
-; CHECK-GI-NEXT: sshr v18.2d, v3.2d, #63
+; CHECK-GI-NEXT: cmlt v17.2d, v2.2d, #0
+; CHECK-GI-NEXT: cmlt v18.2d, v3.2d, #0
; CHECK-GI-NEXT: adrp x8, .LCPI23_3
; CHECK-GI-NEXT: mov v4.h[1], w9
; CHECK-GI-NEXT: neg v5.2d, v5.2d
@@ -754,9 +739,9 @@ define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) {
; CHECK-GI-NEXT: shl v4.2d, v4.2d, #63
; CHECK-GI-NEXT: sshl v16.2d, v16.2d, v20.2d
; CHECK-GI-NEXT: sshl v6.2d, v6.2d, v20.2d
-; CHECK-GI-NEXT: sshr v17.2d, v17.2d, #63
-; CHECK-GI-NEXT: sshr v18.2d, v18.2d, #63
-; CHECK-GI-NEXT: sshr v4.2d, v4.2d, #63
+; CHECK-GI-NEXT: cmlt v17.2d, v17.2d, #0
+; CHECK-GI-NEXT: cmlt v18.2d, v18.2d, #0
+; CHECK-GI-NEXT: cmlt v4.2d, v4.2d, #0
; CHECK-GI-NEXT: bif v0.16b, v7.16b, v17.16b
; CHECK-GI-NEXT: bif v1.16b, v16.16b, v18.16b
; CHECK-GI-NEXT: bif v2.16b, v5.16b, v4.16b
@@ -792,7 +777,7 @@ define <4 x i32> @combine_vec_sdiv_by_pow2b_PosAndNeg(<4 x i32> %x) {
; CHECK-GI-NEXT: adrp x10, .LCPI24_0
; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: ldr q2, [x10, :lo12:.LCPI24_0]
-; CHECK-GI-NEXT: sshr v3.4s, v0.4s, #31
+; CHECK-GI-NEXT: cmlt v3.4s, v0.4s, #0
; CHECK-GI-NEXT: fmov s4, w9
; CHECK-GI-NEXT: adrp x10, .LCPI24_1
; CHECK-GI-NEXT: neg v2.4s, v2.4s
@@ -807,10 +792,10 @@ define <4 x i32> @combine_vec_sdiv_by_pow2b_PosAndNeg(<4 x i32> %x) {
; CHECK-GI-NEXT: mov v1.s[3], w9
; CHECK-GI-NEXT: sshl v2.4s, v2.4s, v3.4s
; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31
-; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31
+; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0
; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT: shl v1.4s, v4.4s, #31
-; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31
+; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0
; CHECK-GI-NEXT: neg v2.4s, v0.4s
; CHECK-GI-NEXT: bit v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT: ret
@@ -871,7 +856,7 @@ define <16 x i8> @non_splat_minus_one_divisor_0(<16 x i8> %A) {
; CHECK-GI-NEXT: neg v2.16b, v0.16b
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI25_0]
; CHECK-GI-NEXT: shl v1.16b, v1.16b, #7
-; CHECK-GI-NEXT: sshr v1.16b, v1.16b, #7
+; CHECK-GI-NEXT: cmlt v1.16b, v1.16b, #0
; CHECK-GI-NEXT: bit v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT: ret
%div = sdiv <16 x i8> %A, <i8 -1, i8 -1, i8 1, i8 -1, i8 -1, i8 -1, i8 1, i8 -1, i8 -1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -901,7 +886,7 @@ define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) {
; CHECK-GI-LABEL: non_splat_minus_one_divisor_1:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI26_2
-; CHECK-GI-NEXT: sshr v2.16b, v0.16b, #7
+; CHECK-GI-NEXT: cmlt v2.16b, v0.16b, #0
; CHECK-GI-NEXT: adrp x9, .LCPI26_1
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI26_2]
; CHECK-GI-NEXT: adrp x8, .LCPI26_3
@@ -914,11 +899,11 @@ define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) {
; CHECK-GI-NEXT: neg v2.16b, v2.16b
; CHECK-GI-NEXT: add v1.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: sshl v1.16b, v1.16b, v2.16b
-; CHECK-GI-NEXT: sshr v2.16b, v3.16b, #7
+; CHECK-GI-NEXT: cmlt v2.16b, v3.16b, #0
; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI26_0]
; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-GI-NEXT: shl v1.16b, v3.16b, #7
-; CHECK-GI-NEXT: sshr v1.16b, v1.16b, #7
+; CHECK-GI-NEXT: cmlt v1.16b, v1.16b, #0
; CHECK-GI-NEXT: neg v2.16b, v0.16b
; CHECK-GI-NEXT: bit v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT: ret
@@ -954,7 +939,7 @@ define <4 x i32> @non_splat_minus_one_divisor_2(<4 x i32> %A) {
; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: ldr q2, [x9, :lo12:.LCPI27_0]
; CHECK-GI-NEXT: fmov s4, w8
-; CHECK-GI-NEXT: sshr v3.4s, v0.4s, #31
+; CHECK-GI-NEXT: cmlt v3.4s, v0.4s, #0
; CHECK-GI-NEXT: adrp x9, .LCPI27_1
; CHECK-GI-NEXT: neg v2.4s, v2.4s
; CHECK-GI-NEXT: mov v1.s[1], w8
@@ -969,10 +954,10 @@ define <4 x i32> @non_splat_minus_one_divisor_2(<4 x i32> %A) {
; CHECK-GI-NEXT: sshl v2.4s, v2.4s, v3.4s
; CHECK-GI-NEXT: mov v4.s[3], w8
; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31
-; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31
+; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0
; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT: shl v1.4s, v4.4s, #31
-; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31
+; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0
; CHECK-GI-NEXT: neg v2.4s, v0.4s
; CHECK-GI-NEXT: bit v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT: ret
@@ -1207,7 +1192,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform7(<8 x i16> %x) {
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI34_0]
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: shl v1.8h, v1.8h, #15
-; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #15
+; CHECK-GI-NEXT: cmlt v1.8h, v1.8h, #0
; CHECK-GI-NEXT: bit v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT: ret
%1 = sdiv <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 1, i16 1, i16 1, i16 1>
diff --git a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
index 121cc30..babb4ed 100644
--- a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
@@ -605,7 +605,7 @@ define i32 @extract_v4i32_select(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %c
; CHECK-GI-NEXT: mov w8, w0
; CHECK-GI-NEXT: and x8, x8, #0x3
; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31
-; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31
+; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0
; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT: str q0, [sp]
; CHECK-GI-NEXT: ldr w0, [x9, x8, lsl #2]
@@ -634,7 +634,7 @@ define i32 @extract_v4i32_select_const(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x
; CHECK-GI-NEXT: adrp x8, .LCPI23_0
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI23_0]
; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31
-; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31
+; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0
; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT: mov s0, v0.s[2]
; CHECK-GI-NEXT: fmov w0, s0
diff --git a/llvm/test/CodeGen/AArch64/fcmp.ll b/llvm/test/CodeGen/AArch64/fcmp.ll
index 6d673f1..30fb82e 100644
--- a/llvm/test/CodeGen/AArch64/fcmp.ll
+++ b/llvm/test/CodeGen/AArch64/fcmp.ll
@@ -661,7 +661,7 @@ define <2 x double> @v2f128_double(<2 x fp128> %a, <2 x fp128> %b, <2 x double>
; CHECK-GI-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: shl v0.2d, v0.2d, #63
-; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #63
+; CHECK-GI-NEXT: cmlt v0.2d, v0.2d, #0
; CHECK-GI-NEXT: bsl v0.16b, v1.16b, v2.16b
; CHECK-GI-NEXT: add sp, sp, #80
; CHECK-GI-NEXT: ret
@@ -1540,7 +1540,7 @@ define <7 x i32> @v7f16_i32(<7 x half> %a, <7 x half> %b, <7 x i32> %d, <7 x i32
; CHECK-GI-FP16-NEXT: shl v0.4s, v0.4s, #31
; CHECK-GI-FP16-NEXT: mov v1.s[2], w8
; CHECK-GI-FP16-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-GI-FP16-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-GI-FP16-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-GI-FP16-NEXT: fmov s4, w8
; CHECK-GI-FP16-NEXT: mov v4.s[1], w8
; CHECK-GI-FP16-NEXT: ushl v1.4s, v1.4s, v2.4s
@@ -1602,7 +1602,7 @@ define <4 x i32> @v4f16_i32(<4 x half> %a, <4 x half> %b, <4 x i32> %d, <4 x i32
; CHECK-GI-FP16-NEXT: fcmgt v0.4h, v1.4h, v0.4h
; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-FP16-NEXT: shl v0.4s, v0.4s, #31
-; CHECK-GI-FP16-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-GI-FP16-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-GI-FP16-NEXT: bsl v0.16b, v2.16b, v3.16b
; CHECK-GI-FP16-NEXT: ret
entry:
@@ -1657,8 +1657,8 @@ define <8 x i32> @v8f16_i32(<8 x half> %a, <8 x half> %b, <8 x i32> %d, <8 x i32
; CHECK-GI-FP16-NEXT: ushll2 v0.4s, v0.8h, #0
; CHECK-GI-FP16-NEXT: shl v1.4s, v1.4s, #31
; CHECK-GI-FP16-NEXT: shl v0.4s, v0.4s, #31
-; CHECK-GI-FP16-NEXT: sshr v1.4s, v1.4s, #31
-; CHECK-GI-FP16-NEXT: sshr v6.4s, v0.4s, #31
+; CHECK-GI-FP16-NEXT: cmlt v1.4s, v1.4s, #0
+; CHECK-GI-FP16-NEXT: cmlt v6.4s, v0.4s, #0
; CHECK-GI-FP16-NEXT: mov v0.16b, v1.16b
; CHECK-GI-FP16-NEXT: mov v1.16b, v6.16b
; CHECK-GI-FP16-NEXT: bsl v0.16b, v2.16b, v4.16b
@@ -1748,10 +1748,10 @@ define <16 x i32> @v16f16_i32(<16 x half> %a, <16 x half> %b, <16 x i32> %d, <16
; CHECK-GI-FP16-NEXT: shl v0.4s, v0.4s, #31
; CHECK-GI-FP16-NEXT: shl v3.4s, v3.4s, #31
; CHECK-GI-FP16-NEXT: shl v1.4s, v1.4s, #31
-; CHECK-GI-FP16-NEXT: sshr v2.4s, v2.4s, #31
-; CHECK-GI-FP16-NEXT: sshr v16.4s, v0.4s, #31
-; CHECK-GI-FP16-NEXT: sshr v3.4s, v3.4s, #31
-; CHECK-GI-FP16-NEXT: sshr v17.4s, v1.4s, #31
+; CHECK-GI-FP16-NEXT: cmlt v2.4s, v2.4s, #0
+; CHECK-GI-FP16-NEXT: cmlt v16.4s, v0.4s, #0
+; CHECK-GI-FP16-NEXT: cmlt v3.4s, v3.4s, #0
+; CHECK-GI-FP16-NEXT: cmlt v17.4s, v1.4s, #0
; CHECK-GI-FP16-NEXT: ldp q0, q1, [sp]
; CHECK-GI-FP16-NEXT: bit v0.16b, v4.16b, v2.16b
; CHECK-GI-FP16-NEXT: mov v2.16b, v3.16b
diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat.ll b/llvm/test/CodeGen/AArch64/fpclamptosat.ll
index 00de153..24be923 100644
--- a/llvm/test/CodeGen/AArch64/fpclamptosat.ll
+++ b/llvm/test/CodeGen/AArch64/fpclamptosat.ll
@@ -111,14 +111,14 @@ entry:
ret i32 %conv6
}
-define i32 @utesth_f16i32(half %x) {
-; CHECK-CVT-LABEL: utesth_f16i32:
+define i32 @utest_f16i32(half %x) {
+; CHECK-CVT-LABEL: utest_f16i32:
; CHECK-CVT: // %bb.0: // %entry
; CHECK-CVT-NEXT: fcvt s0, h0
; CHECK-CVT-NEXT: fcvtzu w0, s0
; CHECK-CVT-NEXT: ret
;
-; CHECK-FP16-LABEL: utesth_f16i32:
+; CHECK-FP16-LABEL: utest_f16i32:
; CHECK-FP16: // %bb.0: // %entry
; CHECK-FP16-NEXT: fcvtzu w0, h0
; CHECK-FP16-NEXT: ret
@@ -298,8 +298,8 @@ entry:
ret i16 %conv6
}
-define i16 @utesth_f16i16(half %x) {
-; CHECK-CVT-LABEL: utesth_f16i16:
+define i16 @utest_f16i16(half %x) {
+; CHECK-CVT-LABEL: utest_f16i16:
; CHECK-CVT: // %bb.0: // %entry
; CHECK-CVT-NEXT: fcvt s0, h0
; CHECK-CVT-NEXT: mov w9, #65535 // =0xffff
@@ -308,7 +308,7 @@ define i16 @utesth_f16i16(half %x) {
; CHECK-CVT-NEXT: csel w0, w8, w9, lo
; CHECK-CVT-NEXT: ret
;
-; CHECK-FP16-LABEL: utesth_f16i16:
+; CHECK-FP16-LABEL: utest_f16i16:
; CHECK-FP16: // %bb.0: // %entry
; CHECK-FP16-NEXT: fcvtzu w8, h0
; CHECK-FP16-NEXT: mov w9, #65535 // =0xffff
@@ -493,8 +493,8 @@ entry:
ret i64 %conv6
}
-define i64 @utesth_f16i64(half %x) {
-; CHECK-LABEL: utesth_f16i64:
+define i64 @utest_f16i64(half %x) {
+; CHECK-LABEL: utest_f16i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
@@ -636,14 +636,14 @@ entry:
ret i32 %conv6
}
-define i32 @utesth_f16i32_mm(half %x) {
-; CHECK-CVT-LABEL: utesth_f16i32_mm:
+define i32 @utest_f16i32_mm(half %x) {
+; CHECK-CVT-LABEL: utest_f16i32_mm:
; CHECK-CVT: // %bb.0: // %entry
; CHECK-CVT-NEXT: fcvt s0, h0
; CHECK-CVT-NEXT: fcvtzu w0, s0
; CHECK-CVT-NEXT: ret
;
-; CHECK-FP16-LABEL: utesth_f16i32_mm:
+; CHECK-FP16-LABEL: utest_f16i32_mm:
; CHECK-FP16: // %bb.0: // %entry
; CHECK-FP16-NEXT: fcvtzu w0, h0
; CHECK-FP16-NEXT: ret
@@ -808,8 +808,8 @@ entry:
ret i16 %conv6
}
-define i16 @utesth_f16i16_mm(half %x) {
-; CHECK-CVT-LABEL: utesth_f16i16_mm:
+define i16 @utest_f16i16_mm(half %x) {
+; CHECK-CVT-LABEL: utest_f16i16_mm:
; CHECK-CVT: // %bb.0: // %entry
; CHECK-CVT-NEXT: fcvt s0, h0
; CHECK-CVT-NEXT: mov w9, #65535 // =0xffff
@@ -818,7 +818,7 @@ define i16 @utesth_f16i16_mm(half %x) {
; CHECK-CVT-NEXT: csel w0, w8, w9, lo
; CHECK-CVT-NEXT: ret
;
-; CHECK-FP16-LABEL: utesth_f16i16_mm:
+; CHECK-FP16-LABEL: utest_f16i16_mm:
; CHECK-FP16: // %bb.0: // %entry
; CHECK-FP16-NEXT: fcvtzu w8, h0
; CHECK-FP16-NEXT: mov w9, #65535 // =0xffff
@@ -986,8 +986,8 @@ entry:
ret i64 %conv6
}
-define i64 @utesth_f16i64_mm(half %x) {
-; CHECK-LABEL: utesth_f16i64_mm:
+define i64 @utest_f16i64_mm(half %x) {
+; CHECK-LABEL: utest_f16i64_mm:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
@@ -1026,6 +1026,29 @@ entry:
ret i64 %conv6
}
+; i32 non saturate
+
+define i32 @ustest_f16i32_nsat(half %x) {
+; CHECK-CVT-LABEL: ustest_f16i32_nsat:
+; CHECK-CVT: // %bb.0:
+; CHECK-CVT-NEXT: fcvt s0, h0
+; CHECK-CVT-NEXT: fcvtzs w8, s0
+; CHECK-CVT-NEXT: and w8, w8, w8, asr #31
+; CHECK-CVT-NEXT: bic w0, w8, w8, asr #31
+; CHECK-CVT-NEXT: ret
+;
+; CHECK-FP16-LABEL: ustest_f16i32_nsat:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcvtzs w8, h0
+; CHECK-FP16-NEXT: and w8, w8, w8, asr #31
+; CHECK-FP16-NEXT: bic w0, w8, w8, asr #31
+; CHECK-FP16-NEXT: ret
+ %conv = fptosi half %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+ ret i32 %spec.store.select7
+}
+
declare i32 @llvm.smin.i32(i32, i32)
declare i32 @llvm.smax.i32(i32, i32)
declare i32 @llvm.umin.i32(i32, i32)
diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
index b09a867..637c028 100644
--- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
@@ -321,20 +321,20 @@ entry:
ret <4 x i32> %conv6
}
-define <4 x i32> @utesth_f16i32(<4 x half> %x) {
-; CHECK-CVT-SD-LABEL: utesth_f16i32:
+define <4 x i32> @utest_f16i32(<4 x half> %x) {
+; CHECK-CVT-SD-LABEL: utest_f16i32:
; CHECK-CVT-SD: // %bb.0: // %entry
; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
; CHECK-CVT-SD-NEXT: fcvtzu v0.4s, v0.4s
; CHECK-CVT-SD-NEXT: ret
;
-; CHECK-FP16-SD-LABEL: utesth_f16i32:
+; CHECK-FP16-SD-LABEL: utest_f16i32:
; CHECK-FP16-SD: // %bb.0: // %entry
; CHECK-FP16-SD-NEXT: fcvtl v0.4s, v0.4h
; CHECK-FP16-SD-NEXT: fcvtzu v0.4s, v0.4s
; CHECK-FP16-SD-NEXT: ret
;
-; CHECK-CVT-GI-LABEL: utesth_f16i32:
+; CHECK-CVT-GI-LABEL: utest_f16i32:
; CHECK-CVT-GI: // %bb.0: // %entry
; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h
; CHECK-CVT-GI-NEXT: movi v1.2d, #0x000000ffffffff
@@ -349,7 +349,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-CVT-GI-NEXT: uzp1 v0.4s, v2.4s, v0.4s
; CHECK-CVT-GI-NEXT: ret
;
-; CHECK-FP16-GI-LABEL: utesth_f16i32:
+; CHECK-FP16-GI-LABEL: utest_f16i32:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-FP16-GI-NEXT: mov h2, v0.h[1]
@@ -614,8 +614,8 @@ entry:
ret <8 x i16> %conv6
}
-define <8 x i16> @utesth_f16i16(<8 x half> %x) {
-; CHECK-CVT-LABEL: utesth_f16i16:
+define <8 x i16> @utest_f16i16(<8 x half> %x) {
+; CHECK-CVT-LABEL: utest_f16i16:
; CHECK-CVT: // %bb.0: // %entry
; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
@@ -625,12 +625,12 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-CVT-NEXT: uqxtn2 v0.8h, v2.4s
; CHECK-CVT-NEXT: ret
;
-; CHECK-FP16-SD-LABEL: utesth_f16i16:
+; CHECK-FP16-SD-LABEL: utest_f16i16:
; CHECK-FP16-SD: // %bb.0: // %entry
; CHECK-FP16-SD-NEXT: fcvtzu v0.8h, v0.8h
; CHECK-FP16-SD-NEXT: ret
;
-; CHECK-FP16-GI-LABEL: utesth_f16i16:
+; CHECK-FP16-GI-LABEL: utest_f16i16:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h
; CHECK-FP16-GI-NEXT: fcvtl2 v0.4s, v0.8h
@@ -1746,8 +1746,8 @@ entry:
ret <2 x i64> %conv6
}
-define <2 x i64> @utesth_f16i64(<2 x half> %x) {
-; CHECK-CVT-SD-LABEL: utesth_f16i64:
+define <2 x i64> @utest_f16i64(<2 x half> %x) {
+; CHECK-CVT-SD-LABEL: utest_f16i64:
; CHECK-CVT-SD: // %bb.0: // %entry
; CHECK-CVT-SD-NEXT: sub sp, sp, #48
; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
@@ -1777,7 +1777,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
; CHECK-CVT-SD-NEXT: add sp, sp, #48
; CHECK-CVT-SD-NEXT: ret
;
-; CHECK-FP16-SD-LABEL: utesth_f16i64:
+; CHECK-FP16-SD-LABEL: utest_f16i64:
; CHECK-FP16-SD: // %bb.0: // %entry
; CHECK-FP16-SD-NEXT: sub sp, sp, #48
; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
@@ -1807,7 +1807,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
; CHECK-FP16-SD-NEXT: add sp, sp, #48
; CHECK-FP16-SD-NEXT: ret
;
-; CHECK-CVT-GI-LABEL: utesth_f16i64:
+; CHECK-CVT-GI-LABEL: utest_f16i64:
; CHECK-CVT-GI: // %bb.0: // %entry
; CHECK-CVT-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-CVT-GI-NEXT: mov h1, v0.h[1]
@@ -1819,7 +1819,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
; CHECK-CVT-GI-NEXT: mov v0.d[1], x9
; CHECK-CVT-GI-NEXT: ret
;
-; CHECK-FP16-GI-LABEL: utesth_f16i64:
+; CHECK-FP16-GI-LABEL: utest_f16i64:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-FP16-GI-NEXT: mov h1, v0.h[1]
@@ -2307,20 +2307,20 @@ entry:
ret <4 x i32> %conv6
}
-define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
-; CHECK-CVT-SD-LABEL: utesth_f16i32_mm:
+define <4 x i32> @utest_f16i32_mm(<4 x half> %x) {
+; CHECK-CVT-SD-LABEL: utest_f16i32_mm:
; CHECK-CVT-SD: // %bb.0: // %entry
; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
; CHECK-CVT-SD-NEXT: fcvtzu v0.4s, v0.4s
; CHECK-CVT-SD-NEXT: ret
;
-; CHECK-FP16-SD-LABEL: utesth_f16i32_mm:
+; CHECK-FP16-SD-LABEL: utest_f16i32_mm:
; CHECK-FP16-SD: // %bb.0: // %entry
; CHECK-FP16-SD-NEXT: fcvtl v0.4s, v0.4h
; CHECK-FP16-SD-NEXT: fcvtzu v0.4s, v0.4s
; CHECK-FP16-SD-NEXT: ret
;
-; CHECK-CVT-GI-LABEL: utesth_f16i32_mm:
+; CHECK-CVT-GI-LABEL: utest_f16i32_mm:
; CHECK-CVT-GI: // %bb.0: // %entry
; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h
; CHECK-CVT-GI-NEXT: movi v1.2d, #0x000000ffffffff
@@ -2335,7 +2335,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
; CHECK-CVT-GI-NEXT: uzp1 v0.4s, v2.4s, v0.4s
; CHECK-CVT-GI-NEXT: ret
;
-; CHECK-FP16-GI-LABEL: utesth_f16i32_mm:
+; CHECK-FP16-GI-LABEL: utest_f16i32_mm:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-FP16-GI-NEXT: mov h2, v0.h[1]
@@ -2585,8 +2585,8 @@ entry:
ret <8 x i16> %conv6
}
-define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
-; CHECK-CVT-LABEL: utesth_f16i16_mm:
+define <8 x i16> @utest_f16i16_mm(<8 x half> %x) {
+; CHECK-CVT-LABEL: utest_f16i16_mm:
; CHECK-CVT: // %bb.0: // %entry
; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
@@ -2596,12 +2596,12 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-CVT-NEXT: uqxtn2 v0.8h, v2.4s
; CHECK-CVT-NEXT: ret
;
-; CHECK-FP16-SD-LABEL: utesth_f16i16_mm:
+; CHECK-FP16-SD-LABEL: utest_f16i16_mm:
; CHECK-FP16-SD: // %bb.0: // %entry
; CHECK-FP16-SD-NEXT: fcvtzu v0.8h, v0.8h
; CHECK-FP16-SD-NEXT: ret
;
-; CHECK-FP16-GI-LABEL: utesth_f16i16_mm:
+; CHECK-FP16-GI-LABEL: utest_f16i16_mm:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h
; CHECK-FP16-GI-NEXT: fcvtl2 v0.4s, v0.8h
@@ -3694,8 +3694,8 @@ entry:
ret <2 x i64> %conv6
}
-define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
-; CHECK-CVT-SD-LABEL: utesth_f16i64_mm:
+define <2 x i64> @utest_f16i64_mm(<2 x half> %x) {
+; CHECK-CVT-SD-LABEL: utest_f16i64_mm:
; CHECK-CVT-SD: // %bb.0: // %entry
; CHECK-CVT-SD-NEXT: sub sp, sp, #48
; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
@@ -3725,7 +3725,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
; CHECK-CVT-SD-NEXT: add sp, sp, #48
; CHECK-CVT-SD-NEXT: ret
;
-; CHECK-FP16-SD-LABEL: utesth_f16i64_mm:
+; CHECK-FP16-SD-LABEL: utest_f16i64_mm:
; CHECK-FP16-SD: // %bb.0: // %entry
; CHECK-FP16-SD-NEXT: sub sp, sp, #48
; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
@@ -3755,7 +3755,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
; CHECK-FP16-SD-NEXT: add sp, sp, #48
; CHECK-FP16-SD-NEXT: ret
;
-; CHECK-CVT-GI-LABEL: utesth_f16i64_mm:
+; CHECK-CVT-GI-LABEL: utest_f16i64_mm:
; CHECK-CVT-GI: // %bb.0: // %entry
; CHECK-CVT-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-CVT-GI-NEXT: mov h1, v0.h[1]
@@ -3767,7 +3767,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
; CHECK-CVT-GI-NEXT: mov v0.d[1], x9
; CHECK-CVT-GI-NEXT: ret
;
-; CHECK-FP16-GI-LABEL: utesth_f16i64_mm:
+; CHECK-FP16-GI-LABEL: utest_f16i64_mm:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-FP16-GI-NEXT: mov h1, v0.h[1]
@@ -3941,6 +3941,51 @@ entry:
ret <2 x i64> %conv6
}
+; i32 non saturate
+
+define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) {
+; CHECK-CVT-SD-LABEL: ustest_f16i32_nsat:
+; CHECK-CVT-SD: // %bb.0: // %entry
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: movi v1.2d, #0000000000000000
+; CHECK-CVT-SD-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-CVT-SD-NEXT: smin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: smax v0.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: ret
+;
+; CHECK-FP16-SD-LABEL: ustest_f16i32_nsat:
+; CHECK-FP16-SD: // %bb.0: // %entry
+; CHECK-FP16-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-FP16-SD-NEXT: movi v1.2d, #0000000000000000
+; CHECK-FP16-SD-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-FP16-SD-NEXT: smin v0.4s, v0.4s, v1.4s
+; CHECK-FP16-SD-NEXT: smax v0.4s, v0.4s, v1.4s
+; CHECK-FP16-SD-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: ustest_f16i32_nsat:
+; CHECK-CVT-GI: // %bb.0: // %entry
+; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-GI-NEXT: movi v1.2d, #0000000000000000
+; CHECK-CVT-GI-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-CVT-GI-NEXT: smin v0.4s, v1.4s, v0.4s
+; CHECK-CVT-GI-NEXT: smax v0.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: ustest_f16i32_nsat:
+; CHECK-FP16-GI: // %bb.0: // %entry
+; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-FP16-GI-NEXT: movi v1.2d, #0000000000000000
+; CHECK-FP16-GI-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-FP16-GI-NEXT: smin v0.4s, v1.4s, v0.4s
+; CHECK-FP16-GI-NEXT: smax v0.4s, v0.4s, v1.4s
+; CHECK-FP16-GI-NEXT: ret
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i32>
+ %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv)
+ %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer)
+ ret <4 x i32> %spec.store.select7
+}
+
declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)
diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
index 0c84468f..2026959 100644
--- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
@@ -1110,7 +1110,7 @@ define <8 x i8> @vselect_constant_cond_zero_v8i8(<8 x i8> %a) {
; CHECK-GI-NEXT: adrp x8, .LCPI83_0
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI83_0]
; CHECK-GI-NEXT: shl v1.8b, v1.8b, #7
-; CHECK-GI-NEXT: sshr v1.8b, v1.8b, #7
+; CHECK-GI-NEXT: cmlt v1.8b, v1.8b, #0
; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%b = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x i8> %a, <8 x i8> zeroinitializer
@@ -1133,7 +1133,7 @@ define <4 x i16> @vselect_constant_cond_zero_v4i16(<4 x i16> %a) {
; CHECK-GI-NEXT: mov v1.h[2], w9
; CHECK-GI-NEXT: mov v1.h[3], w8
; CHECK-GI-NEXT: shl v1.4h, v1.4h, #15
-; CHECK-GI-NEXT: sshr v1.4h, v1.4h, #15
+; CHECK-GI-NEXT: cmlt v1.4h, v1.4h, #0
; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%b = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i16> %a, <4 x i16> zeroinitializer
@@ -1157,7 +1157,7 @@ define <4 x i32> @vselect_constant_cond_zero_v4i32(<4 x i32> %a) {
; CHECK-GI-NEXT: mov v1.s[2], w9
; CHECK-GI-NEXT: mov v1.s[3], w8
; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31
-; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31
+; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%b = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a, <4 x i32> zeroinitializer
@@ -1176,7 +1176,7 @@ define <8 x i8> @vselect_constant_cond_v8i8(<8 x i8> %a, <8 x i8> %b) {
; CHECK-GI-NEXT: adrp x8, .LCPI86_0
; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI86_0]
; CHECK-GI-NEXT: shl v2.8b, v2.8b, #7
-; CHECK-GI-NEXT: sshr v2.8b, v2.8b, #7
+; CHECK-GI-NEXT: cmlt v2.8b, v2.8b, #0
; CHECK-GI-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-GI-NEXT: ret
%c = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x i8> %a, <8 x i8> %b
@@ -1199,7 +1199,7 @@ define <4 x i16> @vselect_constant_cond_v4i16(<4 x i16> %a, <4 x i16> %b) {
; CHECK-GI-NEXT: mov v2.h[2], w9
; CHECK-GI-NEXT: mov v2.h[3], w8
; CHECK-GI-NEXT: shl v2.4h, v2.4h, #15
-; CHECK-GI-NEXT: sshr v2.4h, v2.4h, #15
+; CHECK-GI-NEXT: cmlt v2.4h, v2.4h, #0
; CHECK-GI-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-GI-NEXT: ret
%c = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i16> %a, <4 x i16> %b
@@ -1223,7 +1223,7 @@ define <4 x i32> @vselect_constant_cond_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-GI-NEXT: mov v2.s[2], w9
; CHECK-GI-NEXT: mov v2.s[3], w8
; CHECK-GI-NEXT: shl v2.4s, v2.4s, #31
-; CHECK-GI-NEXT: sshr v2.4s, v2.4s, #31
+; CHECK-GI-NEXT: cmlt v2.4s, v2.4s, #0
; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-GI-NEXT: ret
%c = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a, <4 x i32> %b
diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
index fb8b721..11b3b62 100644
--- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -966,7 +966,7 @@ define <8 x i8> @cmgez8xi8_alt(<8 x i8> %A) {
;
; CHECK-GI-LABEL: cmgez8xi8_alt:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.8b, v0.8b, #7
+; CHECK-GI-NEXT: cmlt v0.8b, v0.8b, #0
; CHECK-GI-NEXT: mvn v0.8b, v0.8b
; CHECK-GI-NEXT: ret
%sign = ashr <8 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
@@ -982,7 +982,7 @@ define <16 x i8> @cmgez16xi8_alt(<16 x i8> %A) {
;
; CHECK-GI-LABEL: cmgez16xi8_alt:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.16b, v0.16b, #7
+; CHECK-GI-NEXT: cmlt v0.16b, v0.16b, #0
; CHECK-GI-NEXT: mvn v0.16b, v0.16b
; CHECK-GI-NEXT: ret
%sign = ashr <16 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
@@ -998,7 +998,7 @@ define <4 x i16> @cmgez4xi16_alt(<4 x i16> %A) {
;
; CHECK-GI-LABEL: cmgez4xi16_alt:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.4h, v0.4h, #15
+; CHECK-GI-NEXT: cmlt v0.4h, v0.4h, #0
; CHECK-GI-NEXT: mvn v0.8b, v0.8b
; CHECK-GI-NEXT: ret
%sign = ashr <4 x i16> %A, <i16 15, i16 15, i16 15, i16 15>
@@ -1014,7 +1014,7 @@ define <8 x i16> @cmgez8xi16_alt(<8 x i16> %A) {
;
; CHECK-GI-LABEL: cmgez8xi16_alt:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #15
+; CHECK-GI-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-GI-NEXT: mvn v0.16b, v0.16b
; CHECK-GI-NEXT: ret
%sign = ashr <8 x i16> %A, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
@@ -1030,7 +1030,7 @@ define <2 x i32> @cmgez2xi32_alt(<2 x i32> %A) {
;
; CHECK-GI-LABEL: cmgez2xi32_alt:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.2s, v0.2s, #31
+; CHECK-GI-NEXT: cmlt v0.2s, v0.2s, #0
; CHECK-GI-NEXT: mvn v0.8b, v0.8b
; CHECK-GI-NEXT: ret
%sign = ashr <2 x i32> %A, <i32 31, i32 31>
@@ -1046,7 +1046,7 @@ define <4 x i32> @cmgez4xi32_alt(<4 x i32> %A) {
;
; CHECK-GI-LABEL: cmgez4xi32_alt:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-GI-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-GI-NEXT: mvn v0.16b, v0.16b
; CHECK-GI-NEXT: ret
%sign = ashr <4 x i32> %A, <i32 31, i32 31, i32 31, i32 31>
@@ -1062,7 +1062,7 @@ define <2 x i64> @cmgez2xi64_alt(<2 x i64> %A) {
;
; CHECK-GI-LABEL: cmgez2xi64_alt:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #63
+; CHECK-GI-NEXT: cmlt v0.2d, v0.2d, #0
; CHECK-GI-NEXT: mvn v0.16b, v0.16b
; CHECK-GI-NEXT: ret
%sign = ashr <2 x i64> %A, <i64 63, i64 63>
@@ -1503,99 +1503,64 @@ entry:
}
define <8 x i8> @cmltz8xi8_alt(<8 x i8> %A) {
-; CHECK-SD-LABEL: cmltz8xi8_alt:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v0.8b, v0.8b, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmltz8xi8_alt:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.8b, v0.8b, #7
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmltz8xi8_alt:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v0.8b, v0.8b, #0
+; CHECK-NEXT: ret
%A.lobit = ashr <8 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
ret <8 x i8> %A.lobit
}
define <16 x i8> @cmltz16xi8_alt(<16 x i8> %A) {
-; CHECK-SD-LABEL: cmltz16xi8_alt:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v0.16b, v0.16b, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmltz16xi8_alt:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.16b, v0.16b, #7
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmltz16xi8_alt:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
+; CHECK-NEXT: ret
%A.lobit = ashr <16 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
ret <16 x i8> %A.lobit
}
define <4 x i16> @cmltz4xi16_alt(<4 x i16> %A) {
-; CHECK-SD-LABEL: cmltz4xi16_alt:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v0.4h, v0.4h, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmltz4xi16_alt:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.4h, v0.4h, #15
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmltz4xi16_alt:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
+; CHECK-NEXT: ret
%A.lobit = ashr <4 x i16> %A, <i16 15, i16 15, i16 15, i16 15>
ret <4 x i16> %A.lobit
}
define <8 x i16> @cmltz8xi16_alt(<8 x i16> %A) {
-; CHECK-SD-LABEL: cmltz8xi16_alt:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v0.8h, v0.8h, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmltz8xi16_alt:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #15
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmltz8xi16_alt:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
+; CHECK-NEXT: ret
%A.lobit = ashr <8 x i16> %A, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
ret <8 x i16> %A.lobit
}
define <2 x i32> @cmltz2xi32_alt(<2 x i32> %A) {
-; CHECK-SD-LABEL: cmltz2xi32_alt:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v0.2s, v0.2s, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmltz2xi32_alt:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.2s, v0.2s, #31
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmltz2xi32_alt:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v0.2s, v0.2s, #0
+; CHECK-NEXT: ret
%A.lobit = ashr <2 x i32> %A, <i32 31, i32 31>
ret <2 x i32> %A.lobit
}
define <4 x i32> @cmltz4xi32_alt(<4 x i32> %A) {
-; CHECK-SD-LABEL: cmltz4xi32_alt:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v0.4s, v0.4s, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmltz4xi32_alt:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmltz4xi32_alt:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
+; CHECK-NEXT: ret
%A.lobit = ashr <4 x i32> %A, <i32 31, i32 31, i32 31, i32 31>
ret <4 x i32> %A.lobit
}
define <2 x i64> @cmltz2xi64_alt(<2 x i64> %A) {
-; CHECK-SD-LABEL: cmltz2xi64_alt:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v0.2d, v0.2d, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmltz2xi64_alt:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #63
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmltz2xi64_alt:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v0.2d, v0.2d, #0
+; CHECK-NEXT: ret
%A.lobit = ashr <2 x i64> %A, <i64 63, i64 63>
ret <2 x i64> %A.lobit
}
@@ -2523,7 +2488,7 @@ define <2 x i32> @fcmal2xfloat(<2 x float> %A, <2 x float> %B) {
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2s, #1
; CHECK-GI-NEXT: shl v0.2s, v0.2s, #31
-; CHECK-GI-NEXT: sshr v0.2s, v0.2s, #31
+; CHECK-GI-NEXT: cmlt v0.2s, v0.2s, #0
; CHECK-GI-NEXT: ret
%tmp3 = fcmp true <2 x float> %A, %B
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -2542,7 +2507,7 @@ define <4 x i32> @fcmal4xfloat(<4 x float> %A, <4 x float> %B) {
; CHECK-GI-NEXT: dup v0.2s, w8
; CHECK-GI-NEXT: mov v0.d[1], v0.d[0]
; CHECK-GI-NEXT: shl v0.4s, v0.4s, #31
-; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-GI-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-GI-NEXT: ret
%tmp3 = fcmp true <4 x float> %A, %B
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -2559,7 +2524,7 @@ define <2 x i64> @fcmal2xdouble(<2 x double> %A, <2 x double> %B) {
; CHECK-GI-NEXT: adrp x8, .LCPI221_0
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI221_0]
; CHECK-GI-NEXT: shl v0.2d, v0.2d, #63
-; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #63
+; CHECK-GI-NEXT: cmlt v0.2d, v0.2d, #0
; CHECK-GI-NEXT: ret
%tmp3 = fcmp true <2 x double> %A, %B
%tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -2589,7 +2554,7 @@ define <4 x i32> @fcmnv4xfloat(<4 x float> %A, <4 x float> %B) {
; CHECK-GI-NEXT: mov v0.s[1], w8
; CHECK-GI-NEXT: mov v0.d[1], v0.d[0]
; CHECK-GI-NEXT: shl v0.4s, v0.4s, #31
-; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-GI-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-GI-NEXT: ret
%tmp3 = fcmp false <4 x float> %A, %B
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
diff --git a/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll b/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll
index 282f437..a8c55b4 100644
--- a/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll
+++ b/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll
@@ -465,7 +465,7 @@ define <8 x i16> @test_ushll_cmp(<8 x i8> %a, <8 x i8> %b) #0 {
; CHECK-GI-NEXT: movi v1.2d, #0xff00ff00ff00ff
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: shl v0.8h, v0.8h, #15
-; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #15
+; CHECK-GI-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%cmp.i = icmp eq <8 x i8> %a, %b
diff --git a/llvm/test/CodeGen/AArch64/select_cc.ll b/llvm/test/CodeGen/AArch64/select_cc.ll
index 483f6c2..b562340 100644
--- a/llvm/test/CodeGen/AArch64/select_cc.ll
+++ b/llvm/test/CodeGen/AArch64/select_cc.ll
@@ -98,7 +98,7 @@ define <2 x double> @select_olt_load_cmp(<2 x double> %a, ptr %src) {
; CHECK-GI-NEXT: fcmgt v1.2s, v1.2s, #0.0
; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
; CHECK-GI-NEXT: shl v1.2d, v1.2d, #63
-; CHECK-GI-NEXT: sshr v1.2d, v1.2d, #63
+; CHECK-GI-NEXT: cmlt v1.2d, v1.2d, #0
; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT: ret
entry:
@@ -136,7 +136,7 @@ define <4 x i32> @select_icmp_sgt(<4 x i32> %a, <4 x i8> %b) {
; CHECK-GI-NEXT: mov v2.s[2], w8
; CHECK-GI-NEXT: mov v2.s[3], w9
; CHECK-GI-NEXT: shl v1.4s, v2.4s, #31
-; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31
+; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0
; CHECK-GI-NEXT: bic v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll b/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll
index 293b74ec..96a7a9d0 100644
--- a/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll
+++ b/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll
@@ -255,7 +255,7 @@ define <16 x i8> @sel_shift_bool_v16i8(<16 x i1> %t) {
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: shl v0.16b, v0.16b, #7
; CHECK-GI-NEXT: movi v1.16b, #128
-; CHECK-GI-NEXT: sshr v0.16b, v0.16b, #7
+; CHECK-GI-NEXT: cmlt v0.16b, v0.16b, #0
; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: ret
%shl = select <16 x i1> %t, <16 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>, <16 x i8> zeroinitializer
@@ -277,7 +277,7 @@ define <8 x i16> @sel_shift_bool_v8i16(<8 x i1> %t) {
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: movi v1.8h, #128
; CHECK-GI-NEXT: shl v0.8h, v0.8h, #15
-; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #15
+; CHECK-GI-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: ret
%shl= select <8 x i1> %t, <8 x i16> <i16 128, i16 128, i16 128, i16 128, i16 128, i16 128, i16 128, i16 128>, <8 x i16> zeroinitializer
@@ -299,7 +299,7 @@ define <4 x i32> @sel_shift_bool_v4i32(<4 x i1> %t) {
; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: movi v1.4s, #64
; CHECK-GI-NEXT: shl v0.4s, v0.4s, #31
-; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-GI-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: ret
%shl = select <4 x i1> %t, <4 x i32> <i32 64, i32 64, i32 64, i32 64>, <4 x i32> zeroinitializer
@@ -323,7 +323,7 @@ define <2 x i64> @sel_shift_bool_v2i64(<2 x i1> %t) {
; CHECK-GI-NEXT: adrp x8, .LCPI16_0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI16_0]
; CHECK-GI-NEXT: shl v0.2d, v0.2d, #63
-; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #63
+; CHECK-GI-NEXT: cmlt v0.2d, v0.2d, #0
; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: ret
%shl = select <2 x i1> %t, <2 x i64> <i64 65536, i64 65536>, <2 x i64> zeroinitializer
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-flat-scratch-init-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-flat-scratch-init-asan.ll
new file mode 100644
index 0000000..0d68762
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-flat-scratch-init-asan.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes='amdgpu-attributor' %s -o - | FileCheck %s
+
+@lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
+
+;.
+; CHECK: @lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
+;.
+define amdgpu_kernel void @k0() #0 {
+; CHECK: Function Attrs: sanitize_address
+; CHECK-LABEL: define amdgpu_kernel void @k0(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: store i8 7, ptr addrspace(3) @lds_1, align 4
+; CHECK-NEXT: ret void
+;
+ store i8 7, ptr addrspace(3) @lds_1, align 4
+ ret void
+}
+
+attributes #0 = { sanitize_address }
+; "amdgpu-no-flat-scratch-init" attribute should not be present in attribute list
+;.
+; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
index a688b6f..fb566e5 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -707,8 +707,8 @@ attributes #6 = { "enqueued-block" }
; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR15:[0-9]+]] = { nounwind "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR19:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR20:[0-9]+]] = { "enqueued-block" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "enqueued-block" "uniform-work-group-size"="false" }
diff --git a/llvm/test/CodeGen/AMDGPU/true16-fold.mir b/llvm/test/CodeGen/AMDGPU/true16-fold.mir
index 93cc12f..9484417 100644
--- a/llvm/test/CodeGen/AMDGPU/true16-fold.mir
+++ b/llvm/test/CodeGen/AMDGPU/true16-fold.mir
@@ -57,6 +57,7 @@ body: |
%4:vgpr_16 = COPY %3:sgpr_lo16
%5:vgpr_32 = V_ALIGNBIT_B32_t16_e64 0, %0:sreg_32, 0, killed %1:sreg_32, 0, killed %4:vgpr_16, 0, 0, implicit $exec
S_ENDPGM 0, implicit %5
+...
---
name: fold_16bit_madmix_clamp
@@ -207,3 +208,27 @@ body: |
$vgpr0 = COPY %4
S_ENDPGM 0, implicit $vgpr0
...
+
+---
+name: fold_imm16_across_reg_sequence
+tracksRegLiveness: true
+registers:
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-LABEL: name: fold_imm16_across_reg_sequence
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_MOV_B16_t16_e64_:%[0-9]+]]:vgpr_16 = V_MOV_B16_t16_e64 0, -1, 0, implicit $exec
+ ; CHECK-NEXT: [[V_MOV_B16_t16_e64_1:%[0-9]+]]:vgpr_16 = V_MOV_B16_t16_e64 0, -1, 0, implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_MOV_B16_t16_e64_]], %subreg.lo16, [[V_MOV_B16_t16_e64_1]], %subreg.hi16
+ ; CHECK-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, -1, 0, -1, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F32_e64_]]
+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
+ %0:vgpr_16 = V_MOV_B16_t16_e64 0, -1, 0, implicit $exec
+ %1:vgpr_16 = V_MOV_B16_t16_e64 0, -1, 0, implicit $exec
+ %2:vgpr_32 = REG_SEQUENCE %0, %subreg.lo16, %1, %subreg.hi16
+ %3:vgpr_32 = nofpexcept V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec
+ $vgpr0 = COPY %3
+ S_ENDPGM 0, implicit $vgpr0
+...
diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll
index 8ab56b2..a6f0a03 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll
@@ -383,8 +383,8 @@ entry:
ret i32 %conv6
}
-define i32 @utesth_f16i32(half %x) {
-; SOFT-LABEL: utesth_f16i32:
+define i32 @utest_f16i32(half %x) {
+; SOFT-LABEL: utest_f16i32:
; SOFT: @ %bb.0: @ %entry
; SOFT-NEXT: .save {r7, lr}
; SOFT-NEXT: push {r7, lr}
@@ -400,7 +400,7 @@ define i32 @utesth_f16i32(half %x) {
; SOFT-NEXT: .LBB7_2: @ %entry
; SOFT-NEXT: pop {r7, pc}
;
-; VFP2-LABEL: utesth_f16i32:
+; VFP2-LABEL: utest_f16i32:
; VFP2: @ %bb.0: @ %entry
; VFP2-NEXT: .save {r7, lr}
; VFP2-NEXT: push {r7, lr}
@@ -411,7 +411,7 @@ define i32 @utesth_f16i32(half %x) {
; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: pop {r7, pc}
;
-; FULL-LABEL: utesth_f16i32:
+; FULL-LABEL: utest_f16i32:
; FULL: @ %bb.0: @ %entry
; FULL-NEXT: vcvt.u32.f16 s0, s0
; FULL-NEXT: vmov r0, s0
@@ -3985,6 +3985,46 @@ entry:
ret i32 %spec.store.select7
}
+; i32 non saturate
+
+define i32 @ustest_f16i32_nsat(half %x) {
+; SOFT-LABEL: ustest_f16i32_nsat:
+; SOFT: @ %bb.0:
+; SOFT-NEXT: .save {r7, lr}
+; SOFT-NEXT: push {r7, lr}
+; SOFT-NEXT: uxth r0, r0
+; SOFT-NEXT: bl __aeabi_h2f
+; SOFT-NEXT: bl __aeabi_f2iz
+; SOFT-NEXT: asrs r1, r0, #31
+; SOFT-NEXT: ands r0, r1
+; SOFT-NEXT: asrs r1, r0, #31
+; SOFT-NEXT: bics r0, r1
+; SOFT-NEXT: pop {r7, pc}
+;
+; VFP2-LABEL: ustest_f16i32_nsat:
+; VFP2: @ %bb.0:
+; VFP2-NEXT: .save {r7, lr}
+; VFP2-NEXT: push {r7, lr}
+; VFP2-NEXT: vmov r0, s0
+; VFP2-NEXT: bl __aeabi_h2f
+; VFP2-NEXT: vmov s0, r0
+; VFP2-NEXT: vcvt.s32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
+; VFP2-NEXT: usat r0, #0, r0
+; VFP2-NEXT: pop {r7, pc}
+;
+; FULL-LABEL: ustest_f16i32_nsat:
+; FULL: @ %bb.0:
+; FULL-NEXT: vcvt.s32.f16 s0, s0
+; FULL-NEXT: vmov r0, s0
+; FULL-NEXT: usat r0, #0, r0
+; FULL-NEXT: bx lr
+ %conv = fptosi half %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+ ret i32 %spec.store.select7
+}
+
declare i32 @llvm.smin.i32(i32, i32)
diff --git a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
index 96f009a..ba31b35 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
@@ -748,8 +748,8 @@ entry:
ret <4 x i32> %conv6
}
-define <4 x i32> @utesth_f16i32(<4 x half> %x) {
-; CHECK-NEON-LABEL: utesth_f16i32:
+define <4 x i32> @utest_f16i32(<4 x half> %x) {
+; CHECK-NEON-LABEL: utest_f16i32:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
@@ -821,7 +821,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-NEON-NEXT: vpop {d12, d13}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
;
-; CHECK-FP16-LABEL: utesth_f16i32:
+; CHECK-FP16-LABEL: utest_f16i32:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
@@ -1366,8 +1366,8 @@ entry:
ret <8 x i16> %conv6
}
-define <8 x i16> @utesth_f16i16(<8 x half> %x) {
-; CHECK-NEON-LABEL: utesth_f16i16:
+define <8 x i16> @utest_f16i16(<8 x half> %x) {
+; CHECK-NEON-LABEL: utest_f16i16:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
@@ -1441,7 +1441,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
;
-; CHECK-FP16-LABEL: utesth_f16i16:
+; CHECK-FP16-LABEL: utest_f16i16:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: vmovx.f16 s4, s0
; CHECK-FP16-NEXT: vcvt.u32.f16 s12, s0
@@ -2109,8 +2109,8 @@ entry:
ret <2 x i64> %conv6
}
-define <2 x i64> @utesth_f16i64(<2 x half> %x) {
-; CHECK-NEON-LABEL: utesth_f16i64:
+define <2 x i64> @utest_f16i64(<2 x half> %x) {
+; CHECK-NEON-LABEL: utest_f16i64:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, lr}
@@ -2148,7 +2148,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
; CHECK-NEON-NEXT: vpop {d8}
; CHECK-NEON-NEXT: pop {r4, r5, r6, pc}
;
-; CHECK-FP16-LABEL: utesth_f16i64:
+; CHECK-FP16-LABEL: utest_f16i64:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r4, r5, r6, lr}
; CHECK-FP16-NEXT: push {r4, r5, r6, lr}
@@ -2835,8 +2835,8 @@ entry:
ret <4 x i32> %conv6
}
-define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
-; CHECK-NEON-LABEL: utesth_f16i32_mm:
+define <4 x i32> @utest_f16i32_mm(<4 x half> %x) {
+; CHECK-NEON-LABEL: utest_f16i32_mm:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
@@ -2881,7 +2881,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
;
-; CHECK-FP16-LABEL: utesth_f16i32_mm:
+; CHECK-FP16-LABEL: utest_f16i32_mm:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r4, r5, r6, lr}
; CHECK-FP16-NEXT: push {r4, r5, r6, lr}
@@ -3344,8 +3344,8 @@ entry:
ret <8 x i16> %conv6
}
-define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
-; CHECK-NEON-LABEL: utesth_f16i16_mm:
+define <8 x i16> @utest_f16i16_mm(<8 x half> %x) {
+; CHECK-NEON-LABEL: utest_f16i16_mm:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
@@ -3419,7 +3419,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
;
-; CHECK-FP16-LABEL: utesth_f16i16_mm:
+; CHECK-FP16-LABEL: utest_f16i16_mm:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: vmovx.f16 s4, s0
; CHECK-FP16-NEXT: vcvt.u32.f16 s12, s0
@@ -4044,8 +4044,8 @@ entry:
ret <2 x i64> %conv6
}
-define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
-; CHECK-NEON-LABEL: utesth_f16i64_mm:
+define <2 x i64> @utest_f16i64_mm(<2 x half> %x) {
+; CHECK-NEON-LABEL: utest_f16i64_mm:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, lr}
@@ -4083,7 +4083,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
; CHECK-NEON-NEXT: vpop {d8}
; CHECK-NEON-NEXT: pop {r4, r5, r6, pc}
;
-; CHECK-FP16-LABEL: utesth_f16i64_mm:
+; CHECK-FP16-LABEL: utest_f16i64_mm:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r4, r5, r6, lr}
; CHECK-FP16-NEXT: push {r4, r5, r6, lr}
@@ -4215,6 +4215,77 @@ entry:
ret <2 x i64> %conv6
}
+; i32 non saturate
+
+define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) {
+; CHECK-NEON-LABEL: ustest_f16i32_nsat:
+; CHECK-NEON: @ %bb.0: @ %entry
+; CHECK-NEON-NEXT: .save {r4, lr}
+; CHECK-NEON-NEXT: push {r4, lr}
+; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.f32 s16, s3
+; CHECK-NEON-NEXT: vmov.f32 s18, s2
+; CHECK-NEON-NEXT: vmov.f32 s20, s1
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r4, r0
+; CHECK-NEON-NEXT: vmov r0, s16
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s16, r0
+; CHECK-NEON-NEXT: vmov r0, s18
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vmov r1, s20
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov s18, r4
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.32 d11[0], r0
+; CHECK-NEON-NEXT: mov r0, r1
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vcvt.s32.f32 s2, s18
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vcvt.s32.f32 s4, s16
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov.i32 q8, #0x0
+; CHECK-NEON-NEXT: vmov r0, s2
+; CHECK-NEON-NEXT: vmov.32 d10[0], r0
+; CHECK-NEON-NEXT: vmov r0, s4
+; CHECK-NEON-NEXT: vmov.32 d11[1], r0
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.32 d10[1], r0
+; CHECK-NEON-NEXT: vmin.s32 q9, q5, q8
+; CHECK-NEON-NEXT: vmax.s32 q0, q9, q8
+; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEON-NEXT: pop {r4, pc}
+;
+; CHECK-FP16-LABEL: ustest_f16i32_nsat:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: vmovx.f16 s2, s0
+; CHECK-FP16-NEXT: vcvt.s32.f16 s6, s0
+; CHECK-FP16-NEXT: vcvt.s32.f16 s0, s1
+; CHECK-FP16-NEXT: vmovx.f16 s4, s1
+; CHECK-FP16-NEXT: vmov r0, s0
+; CHECK-FP16-NEXT: vcvt.s32.f16 s4, s4
+; CHECK-FP16-NEXT: vcvt.s32.f16 s2, s2
+; CHECK-FP16-NEXT: vmov.i32 q9, #0x0
+; CHECK-FP16-NEXT: vmov.32 d17[0], r0
+; CHECK-FP16-NEXT: vmov r0, s6
+; CHECK-FP16-NEXT: vmov.32 d16[0], r0
+; CHECK-FP16-NEXT: vmov r0, s4
+; CHECK-FP16-NEXT: vmov.32 d17[1], r0
+; CHECK-FP16-NEXT: vmov r0, s2
+; CHECK-FP16-NEXT: vmov.32 d16[1], r0
+; CHECK-FP16-NEXT: vmin.s32 q8, q8, q9
+; CHECK-FP16-NEXT: vmax.s32 q0, q8, q9
+; CHECK-FP16-NEXT: bx lr
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i32>
+ %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv)
+ %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer)
+ ret <4 x i32> %spec.store.select7
+}
+
declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)
diff --git a/llvm/test/CodeGen/Hexagon/inst_setcc_uno_uo.ll b/llvm/test/CodeGen/Hexagon/inst_setcc_uno_uo.ll
new file mode 100644
index 0000000..8b121c5
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/inst_setcc_uno_uo.ll
@@ -0,0 +1,93 @@
+;; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b %s -o - | FileCheck %s
+
+define dso_local void @store_isnan_f32(ptr %a, ptr %b, ptr %isnan_cmp) local_unnamed_addr {
+entry:
+ %arrayidx_a = getelementptr inbounds nuw float, ptr %a, i32 0
+ %arrayidx_b = getelementptr inbounds nuw float, ptr %b, i32 0
+ %0 = load <32 x float>, ptr %arrayidx_a, align 4
+ %1 = load <32 x float>, ptr %arrayidx_b, align 4
+ %.vectorized = fcmp uno <32 x float> %0, %1
+ %.LS.instance = zext <32 x i1> %.vectorized to <32 x i32>
+ %arrayidx1 = getelementptr inbounds nuw i32, ptr %isnan_cmp, i32 0
+ store <32 x i32> %.LS.instance, ptr %arrayidx1, align 4
+ ret void
+}
+
+; CHECK: store_isnan_f32
+; CHECK: [[RONE32:r[0-9]+]] = #1
+; CHECK: [[VOP2_F32:v[0-9]+]] = vxor([[VOP2_F32]],[[VOP2_F32]])
+; CHECK: [[VOP1_F32:v[0-9]+]] = vmemu(r0+#0)
+; CHECK: [[VONES32:v[0-9]+]] = vsplat([[RONE32]])
+; CHECK: [[Q1_F32:q[0-9]+]] = vcmp.eq([[VOP1_F32]].w,[[VOP1_F32]].w)
+; CHECK: [[VOP3_F32:v[0-9]+]] = vmemu(r1+#0)
+; CHECK: [[Q1_F32]] &= vcmp.eq([[VOP3_F32]].w,[[VOP3_F32]].w)
+; CHECK: [[VOUT_F32:v[0-9]+]] = vmux([[Q1_F32]],[[VOP2_F32]],[[VONES32]])
+; CHECK: vmemu(r2+#0) = [[VOUT_F32]]
+
+define dso_local void @store_isnan_f16(ptr %a, ptr %b, ptr %isnan_cmp) local_unnamed_addr {
+entry:
+ %arrayidx_a = getelementptr inbounds nuw half, ptr %a, i32 0
+ %arrayidx_b = getelementptr inbounds nuw half, ptr %b, i32 0
+ %0 = load <64 x half>, ptr %arrayidx_a, align 2
+ %1 = load <64 x half>, ptr %arrayidx_b, align 2
+ %.vectorized = fcmp uno <64 x half> %0, %1
+ %conv.LS.instance = zext <64 x i1> %.vectorized to <64 x i16>
+ %arrayidx1 = getelementptr inbounds nuw i16, ptr %isnan_cmp, i32 0
+ store <64 x i16> %conv.LS.instance, ptr %arrayidx1, align 2
+ ret void
+}
+; CHECK-LABEL: store_isnan_f16
+; CHECK: [[RONE16:r[0-9]+]] = #1
+; CHECK: [[VOP2_F16:v[0-9]+]] = vxor([[VOP2_F16]],[[VOP2_F16]])
+; CHECK: [[VOP1_F16:v[0-9]+]] = vmemu(r0+#0)
+; CHECK: [[VONES16:v[0-9]+]].h = vsplat([[RONE16]])
+; CHECK: [[Q1_F16:q[0-9]+]] = vcmp.eq([[VOP1_F16]].h,[[VOP1_F16]].h)
+; CHECK: [[VOP3_F16:v[0-9]+]] = vmemu(r1+#0)
+; CHECK: [[Q1_F16]] &= vcmp.eq([[VOP3_F16]].h,[[VOP3_F16]].h)
+; CHECK: [[VOUT_F16:v[0-9]+]] = vmux([[Q1_F16]],[[VOP2_F16]],[[VONES16]])
+; CHECK: vmemu(r2+#0) = [[VOUT_F32]]
+
+define dso_local void @store_isordered_f32(ptr %a, ptr %b, ptr %isordered_cmp) local_unnamed_addr {
+entry:
+ %arrayidx_a = getelementptr inbounds nuw float, ptr %a, i32 0
+ %arrayidx_b = getelementptr inbounds nuw float, ptr %b, i32 0
+ %0 = load <32 x float>, ptr %arrayidx_a, align 4
+ %1 = load <32 x float>, ptr %arrayidx_b, align 4
+ %.vectorized = fcmp ord <32 x float> %0, %1
+ %.LS.instance = zext <32 x i1> %.vectorized to <32 x i32>
+ %arrayidx1 = getelementptr inbounds nuw i32, ptr %isordered_cmp, i32 0
+ store <32 x i32> %.LS.instance, ptr %arrayidx1, align 4
+ ret void
+}
+; CHECK-LABEL: store_isordered_f32
+; CHECK: [[VOP2_ORD_F32:v[0-9]+]] = vxor([[VOP2_ORD_F32]],[[VOP2_ORD_F32]])
+; CHECK: [[VOP1_ORD_F32:v[0-9]+]] = vmemu(r0+#0)
+; CHECK: [[VONES_ORD_F32:v[0-9]+]] = vsplat([[RONE32]])
+; CHECK: [[Q1_ORD_F32:q[0-9]+]] = vcmp.eq([[VOP1_ORD_F32]].w,[[VOP1_ORD_F32]].w)
+; CHECK: [[VOP3_ORD_F32:v[0-9]+]] = vmemu(r1+#0)
+; CHECK: [[Q1_ORD_F32]] &= vcmp.eq([[VOP3_ORD_F32]].w,[[VOP3_ORD_F32]].w)
+; CHECK: [[VOUT_ORD_F32:v[0-9]+]] = vmux([[Q1_ORD_F32]],[[VONES_ORD_F32]],[[VOP2_ORD_F32]])
+; CHECK: vmemu(r2+#0) = [[VOUT_ORD_F32]]
+
+
+define dso_local void @store_isordered_f16(ptr %a, ptr %b, ptr %isordered_cmp) local_unnamed_addr {
+entry:
+ %arrayidx_a = getelementptr inbounds nuw half, ptr %a, i32 0
+ %arrayidx_b = getelementptr inbounds nuw half, ptr %b, i32 0
+ %0 = load <64 x half>, ptr %arrayidx_a, align 2
+ %1 = load <64 x half>, ptr %arrayidx_b, align 2
+ %.vectorized = fcmp ord <64 x half> %0, %1
+ %conv.LS.instance = zext <64 x i1> %.vectorized to <64 x i16>
+ %arrayidx1 = getelementptr inbounds nuw i16, ptr %isordered_cmp, i32 0
+ store <64 x i16> %conv.LS.instance, ptr %arrayidx1, align 2
+ ret void
+}
+; CHECK-LABEL: store_isordered_f16
+; CHECK: [[VOP2_ORD_F16:v[0-9]+]] = vxor([[VOP2_ORD_F16]],[[VOP2_ORD_F16]])
+; CHECK: [[VOP1_ORD_F16:v[0-9]+]] = vmemu(r0+#0)
+; CHECK: [[VONES_ORD_F16:v[0-9]+]].h = vsplat([[RONE16]])
+; CHECK: [[Q1_ORD_F16:q[0-9]+]] = vcmp.eq([[VOP1_ORD_F16]].h,[[VOP1_ORD_F16]].h)
+; CHECK: [[VOP3_ORD_F16:v[0-9]+]] = vmemu(r1+#0)
+; CHECK: [[Q1_ORD_F16]] &= vcmp.eq([[VOP3_ORD_F16]].h,[[VOP3_ORD_F16]].h)
+; CHECK: [[VOUT_ORD_F16:v[0-9]+]] = vmux([[Q1_ORD_F16]],[[VONES_ORD_F16]],[[VOP2_ORD_F16]])
+; CHECK: vmemu(r2+#0) = [[VOUT_ORD_F16]]
diff --git a/llvm/test/CodeGen/Hexagon/isel-fold-shl-zext.ll b/llvm/test/CodeGen/Hexagon/isel-fold-shl-zext.ll
index 5fa5023..fe0f7dd 100644
--- a/llvm/test/CodeGen/Hexagon/isel-fold-shl-zext.ll
+++ b/llvm/test/CodeGen/Hexagon/isel-fold-shl-zext.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple=hexagon-unknown-elf < %s | FileCheck %s
; In ISelLowering, when folding nodes (or (shl xx, s), (zext y))
@@ -11,17 +12,18 @@ target triple = "hexagon"
; Function Attrs: nofree nosync nounwind memory(readwrite, inaccessiblemem: none)
define dso_local void @foo(i64* nocapture noundef %buf, i32 %a, i32 %b) local_unnamed_addr {
; CHECK-LABEL: foo:
-; CHECK: // %bb.0: // %entry
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: {
-; CHECK-NEXT: r[[REG0:[0-9]+]] = addasl(r2,r1,#1)
-; CHECK-NEXT: r[[REG2:[0-9]+]] = asl(r1,#1)
+; CHECK-NEXT: r2 = addasl(r2,r1,#1)
+; CHECK-NEXT: r3 = asl(r1,#1)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: r[[REG1:[0-9]+]] = addasl(r[[REG0]],r1,#1)
+; CHECK-NEXT: r2 = addasl(r2,r1,#1)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: jumpr r31
-; CHECK-NEXT: memd(r0+#8) = r[[REG2]]:[[REG1]]
+; CHECK-NEXT: memd(r0+#8) = r3:2
; CHECK-NEXT: }
entry:
%arrayidx = getelementptr inbounds i64, i64* %buf, i32 1
diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
index 18d071c..a0d1ecc 100644
--- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll
+++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
@@ -436,8 +436,8 @@ entry:
ret i32 %conv6
}
-define i32 @utesth_f16i32(half %x) {
-; RV32-LABEL: utesth_f16i32:
+define i32 @utest_f16i32(half %x) {
+; RV32-LABEL: utest_f16i32:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
@@ -456,7 +456,7 @@ define i32 @utesth_f16i32(half %x) {
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
;
-; RV64-LABEL: utesth_f16i32:
+; RV64-LABEL: utest_f16i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
@@ -974,8 +974,8 @@ entry:
ret i16 %conv6
}
-define i16 @utesth_f16i16(half %x) {
-; RV32-LABEL: utesth_f16i16:
+define i16 @utest_f16i16(half %x) {
+; RV32-LABEL: utest_f16i16:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
@@ -995,7 +995,7 @@ define i16 @utesth_f16i16(half %x) {
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
;
-; RV64-LABEL: utesth_f16i16:
+; RV64-LABEL: utest_f16i16:
; RV64: # %bb.0: # %entry
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
@@ -3829,6 +3829,52 @@ entry:
ret i64 %conv6
}
+; i32 non saturate
+
+define i32 @ustest_f16i32_nsat(half %x) {
+; RV32-LABEL: ustest_f16i32_nsat:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __extendhfsf2
+; RV32-NEXT: fcvt.w.s a0, fa0, rtz
+; RV32-NEXT: srai a1, a0, 31
+; RV32-NEXT: and a0, a1, a0
+; RV32-NEXT: sgtz a1, a0
+; RV32-NEXT: neg a1, a1
+; RV32-NEXT: and a0, a1, a0
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: .cfi_restore ra
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: ustest_f16i32_nsat:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __extendhfsf2
+; RV64-NEXT: fcvt.l.s a0, fa0, rtz
+; RV64-NEXT: srai a1, a0, 63
+; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: sgtz a1, a0
+; RV64-NEXT: neg a1, a1
+; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: .cfi_restore ra
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: .cfi_def_cfa_offset 0
+; RV64-NEXT: ret
+ %conv = fptosi half %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+ ret i32 %spec.store.select7
+}
+
declare i32 @llvm.smin.i32(i32, i32)
declare i32 @llvm.smax.i32(i32, i32)
declare i32 @llvm.umin.i32(i32, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
index aba9d37..f5977625 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
@@ -519,8 +519,8 @@ entry:
ret <4 x i32> %conv6
}
-define <4 x i32> @utesth_f16i32(<4 x half> %x) {
-; CHECK-NOV-LABEL: utesth_f16i32:
+define <4 x i32> @utest_f16i32(<4 x half> %x) {
+; CHECK-NOV-LABEL: utest_f16i32:
; CHECK-NOV: # %bb.0: # %entry
; CHECK-NOV-NEXT: addi sp, sp, -64
; CHECK-NOV-NEXT: .cfi_def_cfa_offset 64
@@ -610,7 +610,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: bgeu a3, a1, .LBB7_4
; CHECK-NOV-NEXT: j .LBB7_5
;
-; CHECK-V-LABEL: utesth_f16i32:
+; CHECK-V-LABEL: utest_f16i32:
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: addi sp, sp, -48
; CHECK-V-NEXT: .cfi_def_cfa_offset 48
@@ -1594,8 +1594,8 @@ entry:
ret <8 x i16> %conv6
}
-define <8 x i16> @utesth_f16i16(<8 x half> %x) {
-; CHECK-NOV-LABEL: utesth_f16i16:
+define <8 x i16> @utest_f16i16(<8 x half> %x) {
+; CHECK-NOV-LABEL: utest_f16i16:
; CHECK-NOV: # %bb.0: # %entry
; CHECK-NOV-NEXT: addi sp, sp, -128
; CHECK-NOV-NEXT: .cfi_def_cfa_offset 128
@@ -1765,7 +1765,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: bgeu a7, a3, .LBB16_8
; CHECK-NOV-NEXT: j .LBB16_9
;
-; CHECK-V-LABEL: utesth_f16i16:
+; CHECK-V-LABEL: utest_f16i16:
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: addi sp, sp, -80
; CHECK-V-NEXT: .cfi_def_cfa_offset 80
@@ -3332,8 +3332,8 @@ entry:
ret <2 x i64> %conv6
}
-define <2 x i64> @utesth_f16i64(<2 x half> %x) {
-; CHECK-NOV-LABEL: utesth_f16i64:
+define <2 x i64> @utest_f16i64(<2 x half> %x) {
+; CHECK-NOV-LABEL: utest_f16i64:
; CHECK-NOV: # %bb.0: # %entry
; CHECK-NOV-NEXT: addi sp, sp, -32
; CHECK-NOV-NEXT: .cfi_def_cfa_offset 32
@@ -3373,7 +3373,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
; CHECK-NOV-NEXT: .cfi_def_cfa_offset 0
; CHECK-NOV-NEXT: ret
;
-; CHECK-V-LABEL: utesth_f16i64:
+; CHECK-V-LABEL: utest_f16i64:
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: addi sp, sp, -32
; CHECK-V-NEXT: .cfi_def_cfa_offset 32
@@ -4074,8 +4074,8 @@ entry:
ret <4 x i32> %conv6
}
-define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
-; CHECK-NOV-LABEL: utesth_f16i32_mm:
+define <4 x i32> @utest_f16i32_mm(<4 x half> %x) {
+; CHECK-NOV-LABEL: utest_f16i32_mm:
; CHECK-NOV: # %bb.0: # %entry
; CHECK-NOV-NEXT: addi sp, sp, -64
; CHECK-NOV-NEXT: .cfi_def_cfa_offset 64
@@ -4165,7 +4165,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: bgeu a3, a1, .LBB34_4
; CHECK-NOV-NEXT: j .LBB34_5
;
-; CHECK-V-LABEL: utesth_f16i32_mm:
+; CHECK-V-LABEL: utest_f16i32_mm:
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: addi sp, sp, -48
; CHECK-V-NEXT: .cfi_def_cfa_offset 48
@@ -5134,8 +5134,8 @@ entry:
ret <8 x i16> %conv6
}
-define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
-; CHECK-NOV-LABEL: utesth_f16i16_mm:
+define <8 x i16> @utest_f16i16_mm(<8 x half> %x) {
+; CHECK-NOV-LABEL: utest_f16i16_mm:
; CHECK-NOV: # %bb.0: # %entry
; CHECK-NOV-NEXT: addi sp, sp, -128
; CHECK-NOV-NEXT: .cfi_def_cfa_offset 128
@@ -5305,7 +5305,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: bgeu a7, a3, .LBB43_8
; CHECK-NOV-NEXT: j .LBB43_9
;
-; CHECK-V-LABEL: utesth_f16i16_mm:
+; CHECK-V-LABEL: utest_f16i16_mm:
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: addi sp, sp, -80
; CHECK-V-NEXT: .cfi_def_cfa_offset 80
@@ -6837,8 +6837,8 @@ entry:
ret <2 x i64> %conv6
}
-define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
-; CHECK-NOV-LABEL: utesth_f16i64_mm:
+define <2 x i64> @utest_f16i64_mm(<2 x half> %x) {
+; CHECK-NOV-LABEL: utest_f16i64_mm:
; CHECK-NOV: # %bb.0: # %entry
; CHECK-NOV-NEXT: addi sp, sp, -32
; CHECK-NOV-NEXT: .cfi_def_cfa_offset 32
@@ -6877,7 +6877,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
; CHECK-NOV-NEXT: .cfi_def_cfa_offset 0
; CHECK-NOV-NEXT: ret
;
-; CHECK-V-LABEL: utesth_f16i64_mm:
+; CHECK-V-LABEL: utest_f16i64_mm:
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: addi sp, sp, -32
; CHECK-V-NEXT: .cfi_def_cfa_offset 32
@@ -7048,6 +7048,172 @@ entry:
ret <2 x i64> %conv6
}
+; i32 non saturate
+
+define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) {
+; CHECK-NOV-LABEL: ustest_f16i32_nsat:
+; CHECK-NOV: # %bb.0: # %entry
+; CHECK-NOV-NEXT: addi sp, sp, -64
+; CHECK-NOV-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NOV-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; CHECK-NOV-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; CHECK-NOV-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
+; CHECK-NOV-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
+; CHECK-NOV-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
+; CHECK-NOV-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill
+; CHECK-NOV-NEXT: fsd fs1, 8(sp) # 8-byte Folded Spill
+; CHECK-NOV-NEXT: .cfi_offset ra, -8
+; CHECK-NOV-NEXT: .cfi_offset s0, -16
+; CHECK-NOV-NEXT: .cfi_offset s1, -24
+; CHECK-NOV-NEXT: .cfi_offset s2, -32
+; CHECK-NOV-NEXT: .cfi_offset s3, -40
+; CHECK-NOV-NEXT: .cfi_offset fs0, -48
+; CHECK-NOV-NEXT: .cfi_offset fs1, -56
+; CHECK-NOV-NEXT: lhu s1, 0(a1)
+; CHECK-NOV-NEXT: lhu s2, 8(a1)
+; CHECK-NOV-NEXT: lhu a2, 16(a1)
+; CHECK-NOV-NEXT: lhu s3, 24(a1)
+; CHECK-NOV-NEXT: mv s0, a0
+; CHECK-NOV-NEXT: fmv.w.x fa0, a2
+; CHECK-NOV-NEXT: call __extendhfsf2
+; CHECK-NOV-NEXT: fmv.s fs0, fa0
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
+; CHECK-NOV-NEXT: call __extendhfsf2
+; CHECK-NOV-NEXT: fmv.s fs1, fa0
+; CHECK-NOV-NEXT: fmv.w.x fa0, s1
+; CHECK-NOV-NEXT: call __extendhfsf2
+; CHECK-NOV-NEXT: fcvt.l.s s1, fa0, rtz
+; CHECK-NOV-NEXT: fcvt.l.s s2, fs1, rtz
+; CHECK-NOV-NEXT: fmv.w.x fa0, s3
+; CHECK-NOV-NEXT: fcvt.l.s s3, fs0, rtz
+; CHECK-NOV-NEXT: call __extendhfsf2
+; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz
+; CHECK-NOV-NEXT: srai a1, s3, 63
+; CHECK-NOV-NEXT: and a1, a1, s3
+; CHECK-NOV-NEXT: srai a2, s2, 63
+; CHECK-NOV-NEXT: and a2, a2, s2
+; CHECK-NOV-NEXT: srai a3, s1, 63
+; CHECK-NOV-NEXT: and a3, a3, s1
+; CHECK-NOV-NEXT: srai a4, a0, 63
+; CHECK-NOV-NEXT: and a0, a4, a0
+; CHECK-NOV-NEXT: sgtz a4, a3
+; CHECK-NOV-NEXT: neg a4, a4
+; CHECK-NOV-NEXT: and a3, a4, a3
+; CHECK-NOV-NEXT: sgtz a4, a2
+; CHECK-NOV-NEXT: neg a4, a4
+; CHECK-NOV-NEXT: and a2, a4, a2
+; CHECK-NOV-NEXT: sgtz a4, a1
+; CHECK-NOV-NEXT: neg a4, a4
+; CHECK-NOV-NEXT: and a1, a4, a1
+; CHECK-NOV-NEXT: sgtz a4, a0
+; CHECK-NOV-NEXT: neg a4, a4
+; CHECK-NOV-NEXT: and a0, a4, a0
+; CHECK-NOV-NEXT: sw a3, 0(s0)
+; CHECK-NOV-NEXT: sw a2, 4(s0)
+; CHECK-NOV-NEXT: sw a1, 8(s0)
+; CHECK-NOV-NEXT: sw a0, 12(s0)
+; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; CHECK-NOV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; CHECK-NOV-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; CHECK-NOV-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
+; CHECK-NOV-NEXT: ld s3, 24(sp) # 8-byte Folded Reload
+; CHECK-NOV-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload
+; CHECK-NOV-NEXT: fld fs1, 8(sp) # 8-byte Folded Reload
+; CHECK-NOV-NEXT: .cfi_restore ra
+; CHECK-NOV-NEXT: .cfi_restore s0
+; CHECK-NOV-NEXT: .cfi_restore s1
+; CHECK-NOV-NEXT: .cfi_restore s2
+; CHECK-NOV-NEXT: .cfi_restore s3
+; CHECK-NOV-NEXT: .cfi_restore fs0
+; CHECK-NOV-NEXT: .cfi_restore fs1
+; CHECK-NOV-NEXT: addi sp, sp, 64
+; CHECK-NOV-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NOV-NEXT: ret
+;
+; CHECK-V-LABEL: ustest_f16i32_nsat:
+; CHECK-V: # %bb.0: # %entry
+; CHECK-V-NEXT: addi sp, sp, -48
+; CHECK-V-NEXT: .cfi_def_cfa_offset 48
+; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: .cfi_offset ra, -8
+; CHECK-V-NEXT: .cfi_offset s0, -16
+; CHECK-V-NEXT: .cfi_offset s1, -24
+; CHECK-V-NEXT: .cfi_offset s2, -32
+; CHECK-V-NEXT: csrr a1, vlenb
+; CHECK-V-NEXT: slli a1, a1, 1
+; CHECK-V-NEXT: sub sp, sp, a1
+; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb
+; CHECK-V-NEXT: lhu s0, 0(a0)
+; CHECK-V-NEXT: lhu s1, 8(a0)
+; CHECK-V-NEXT: lhu s2, 16(a0)
+; CHECK-V-NEXT: lhu a0, 24(a0)
+; CHECK-V-NEXT: fmv.w.x fa0, a0
+; CHECK-V-NEXT: call __extendhfsf2
+; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
+; CHECK-V-NEXT: fmv.w.x fa0, s2
+; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
+; CHECK-V-NEXT: call __extendhfsf2
+; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
+; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; CHECK-V-NEXT: vslideup.vi v8, v9, 1
+; CHECK-V-NEXT: csrr a0, vlenb
+; CHECK-V-NEXT: add a0, sp, a0
+; CHECK-V-NEXT: addi a0, a0, 16
+; CHECK-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
+; CHECK-V-NEXT: fmv.w.x fa0, s1
+; CHECK-V-NEXT: call __extendhfsf2
+; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
+; CHECK-V-NEXT: fmv.w.x fa0, s0
+; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
+; CHECK-V-NEXT: call __extendhfsf2
+; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
+; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; CHECK-V-NEXT: vslideup.vi v8, v9, 1
+; CHECK-V-NEXT: csrr a0, vlenb
+; CHECK-V-NEXT: add a0, sp, a0
+; CHECK-V-NEXT: addi a0, a0, 16
+; CHECK-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-V-NEXT: vslideup.vi v8, v9, 2
+; CHECK-V-NEXT: vmin.vx v8, v8, zero
+; CHECK-V-NEXT: vmax.vx v8, v8, zero
+; CHECK-V-NEXT: csrr a0, vlenb
+; CHECK-V-NEXT: slli a0, a0, 1
+; CHECK-V-NEXT: add sp, sp, a0
+; CHECK-V-NEXT: .cfi_def_cfa sp, 48
+; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: .cfi_restore ra
+; CHECK-V-NEXT: .cfi_restore s0
+; CHECK-V-NEXT: .cfi_restore s1
+; CHECK-V-NEXT: .cfi_restore s2
+; CHECK-V-NEXT: addi sp, sp, 48
+; CHECK-V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-V-NEXT: ret
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i32>
+ %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv)
+ %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer)
+ ret <4 x i32> %spec.store.select7
+}
+
declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)
diff --git a/llvm/test/CodeGen/SPIRV/pointers/ptrcast-bitcast.ll b/llvm/test/CodeGen/SPIRV/pointers/ptrcast-bitcast.ll
new file mode 100644
index 0000000..8491328
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/pointers/ptrcast-bitcast.ll
@@ -0,0 +1,28 @@
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - | FileCheck %s --match-full-lines
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-DAG: %[[#uint:]] = OpTypeInt 32 0
+; CHECK-DAG: %[[#v2_uint:]] = OpTypeVector %[[#uint]] 2
+; CHECK-DAG: %[[#double:]] = OpTypeFloat 64
+; CHECK-DAG: %[[#v2_double:]] = OpTypeVector %[[#double]] 2
+; CHECK-DAG: %[[#v4_uint:]] = OpTypeVector %[[#uint]] 4
+@.str = private unnamed_addr constant [3 x i8] c"In\00", align 1
+@.str.2 = private unnamed_addr constant [4 x i8] c"Out\00", align 1
+
+define void @main() local_unnamed_addr #0 {
+entry:
+ %0 = tail call target("spirv.VulkanBuffer", [0 x <2 x i32>], 12, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0v2i32_12_0t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str)
+ %1 = tail call target("spirv.VulkanBuffer", [0 x <2 x double>], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0v2f64_12_1t(i32 0, i32 2, i32 1, i32 0, ptr nonnull @.str.2)
+ %2 = tail call noundef align 8 dereferenceable(8) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v2i32_12_0t(target("spirv.VulkanBuffer", [0 x <2 x i32>], 12, 0) %0, i32 0)
+ %3 = load <2 x i32>, ptr addrspace(11) %2, align 8
+ %4 = tail call noundef align 8 dereferenceable(8) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v2i32_12_0t(target("spirv.VulkanBuffer", [0 x <2 x i32>], 12, 0) %0, i32 1)
+ %5 = load <2 x i32>, ptr addrspace(11) %4, align 8
+; CHECK: %[[#tmp:]] = OpVectorShuffle %[[#v4_uint]] {{%[0-9]+}} {{%[0-9]+}} 0 2 1 3
+ %6 = shufflevector <2 x i32> %3, <2 x i32> %5, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; CHECK: %[[#access:]] = OpAccessChain {{.*}}
+ %7 = tail call noundef align 16 dereferenceable(16) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v2f64_12_1t(target("spirv.VulkanBuffer", [0 x <2 x double>], 12, 1) %1, i32 0)
+; CHECK: %[[#bitcast:]] = OpBitcast %[[#v2_double]] %[[#tmp]]
+; CHECK: OpStore %[[#access]] %[[#bitcast]] Aligned 16
+ store <4 x i32> %6, ptr addrspace(11) %7, align 16
+ ret void
+}
diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
index 137994ce..59f3edc 100644
--- a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
@@ -136,9 +136,9 @@ entry:
ret i32 %conv6
}
-define i32 @utesth_f16i32(half %x) {
-; CHECK-LABEL: utesth_f16i32:
-; CHECK: .functype utesth_f16i32 (f32) -> (i32)
+define i32 @utest_f16i32(half %x) {
+; CHECK-LABEL: utest_f16i32:
+; CHECK: .functype utest_f16i32 (f32) -> (i32)
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
; CHECK-NEXT: call __truncsfhf2
@@ -153,9 +153,9 @@ entry:
ret i32 %conv6
}
-define i32 @utesth_f16i32_cse(half %x) {
-; CHECK-LABEL: utesth_f16i32_cse:
-; CHECK: .functype utesth_f16i32_cse (f32) -> (i32)
+define i32 @utest_f16i32_cse(half %x) {
+; CHECK-LABEL: utest_f16i32_cse:
+; CHECK: .functype utest_f16i32_cse (f32) -> (i32)
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
; CHECK-NEXT: call __truncsfhf2
@@ -403,9 +403,9 @@ entry:
ret i16 %conv6
}
-define i16 @utesth_f16i16(half %x) {
-; CHECK-LABEL: utesth_f16i16:
-; CHECK: .functype utesth_f16i16 (f32) -> (i32)
+define i16 @utest_f16i16(half %x) {
+; CHECK-LABEL: utest_f16i16:
+; CHECK: .functype utest_f16i16 (f32) -> (i32)
; CHECK-NEXT: .local i32
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
@@ -427,9 +427,9 @@ entry:
ret i16 %conv6
}
-define i16 @utesth_f16i16_cse(half %x) {
-; CHECK-LABEL: utesth_f16i16_cse:
-; CHECK: .functype utesth_f16i16_cse (f32) -> (i32)
+define i16 @utest_f16i16_cse(half %x) {
+; CHECK-LABEL: utest_f16i16_cse:
+; CHECK: .functype utest_f16i16_cse (f32) -> (i32)
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
; CHECK-NEXT: call __truncsfhf2
@@ -880,9 +880,9 @@ entry:
ret i64 %conv6
}
-define i64 @utesth_f16i64(half %x) {
-; CHECK-LABEL: utesth_f16i64:
-; CHECK: .functype utesth_f16i64 (f32) -> (i64)
+define i64 @utest_f16i64(half %x) {
+; CHECK-LABEL: utest_f16i64:
+; CHECK: .functype utest_f16i64 (f32) -> (i64)
; CHECK-NEXT: .local i32, i64, i64
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: global.get __stack_pointer
@@ -919,9 +919,9 @@ entry:
ret i64 %conv6
}
-define i64 @utesth_f16i64_cse(half %x) {
-; CHECK-LABEL: utesth_f16i64_cse:
-; CHECK: .functype utesth_f16i64_cse (f32) -> (i64)
+define i64 @utest_f16i64_cse(half %x) {
+; CHECK-LABEL: utest_f16i64_cse:
+; CHECK: .functype utest_f16i64_cse (f32) -> (i64)
; CHECK-NEXT: .local i32, i64
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: global.get __stack_pointer
@@ -1118,9 +1118,9 @@ entry:
ret i32 %conv6
}
-define i32 @utesth_f16i32_mm(half %x) {
-; CHECK-LABEL: utesth_f16i32_mm:
-; CHECK: .functype utesth_f16i32_mm (f32) -> (i32)
+define i32 @utest_f16i32_mm(half %x) {
+; CHECK-LABEL: utest_f16i32_mm:
+; CHECK: .functype utest_f16i32_mm (f32) -> (i32)
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
; CHECK-NEXT: call __truncsfhf2
@@ -1353,9 +1353,9 @@ entry:
ret i16 %conv6
}
-define i16 @utesth_f16i16_mm(half %x) {
-; CHECK-LABEL: utesth_f16i16_mm:
-; CHECK: .functype utesth_f16i16_mm (f32) -> (i32)
+define i16 @utest_f16i16_mm(half %x) {
+; CHECK-LABEL: utest_f16i16_mm:
+; CHECK: .functype utest_f16i16_mm (f32) -> (i32)
; CHECK-NEXT: .local i32
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
@@ -1637,9 +1637,9 @@ entry:
ret i64 %conv6
}
-define i64 @utesth_f16i64_mm(half %x) {
-; CHECK-LABEL: utesth_f16i64_mm:
-; CHECK: .functype utesth_f16i64_mm (f32) -> (i64)
+define i64 @utest_f16i64_mm(half %x) {
+; CHECK-LABEL: utest_f16i64_mm:
+; CHECK: .functype utest_f16i64_mm (f32) -> (i64)
; CHECK-NEXT: .local i32, i64, i64
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: global.get __stack_pointer
@@ -1724,9 +1724,9 @@ entry:
ret i64 %conv6
}
-define i64 @utesth_f16i64_mm_cse(half %x) {
-; CHECK-LABEL: utesth_f16i64_mm_cse:
-; CHECK: .functype utesth_f16i64_mm_cse (f32) -> (i64)
+define i64 @utest_f16i64_mm_cse(half %x) {
+; CHECK-LABEL: utest_f16i64_mm_cse:
+; CHECK: .functype utest_f16i64_mm_cse (f32) -> (i64)
; CHECK-NEXT: .local i32, i64
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: global.get __stack_pointer
@@ -1754,6 +1754,35 @@ entry:
ret i64 %conv6
}
+; i32 non saturate
+
+define i32 @ustest_f16i32_nsat(half %x) {
+; CHECK-LABEL: ustest_f16i32_nsat:
+; CHECK: .functype ustest_f16i32_nsat (f32) -> (i32)
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const 31
+; CHECK-NEXT: i32.shr_s
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.and
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: i32.gt_s
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: # fallthrough-return
+ %conv = fptosi half %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+ ret i32 %spec.store.select7
+}
+
declare i32 @llvm.smin.i32(i32, i32)
declare i32 @llvm.smax.i32(i32, i32)
declare i32 @llvm.umin.i32(i32, i32)
diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
index 7190e16..52f57dc 100644
--- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
@@ -209,9 +209,9 @@ entry:
ret <4 x i32> %conv6
}
-define <4 x i32> @utesth_f16i32(<4 x half> %x) {
-; CHECK-LABEL: utesth_f16i32:
-; CHECK: .functype utesth_f16i32 (f32, f32, f32, f32) -> (v128)
+define <4 x i32> @utest_f16i32(<4 x half> %x) {
+; CHECK-LABEL: utest_f16i32:
+; CHECK: .functype utest_f16i32 (f32, f32, f32, f32) -> (v128)
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 1
; CHECK-NEXT: call __truncsfhf2
@@ -513,9 +513,9 @@ entry:
ret <8 x i16> %conv6
}
-define <8 x i16> @utesth_f16i16(<8 x half> %x) {
-; CHECK-LABEL: utesth_f16i16:
-; CHECK: .functype utesth_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
+define <8 x i16> @utest_f16i16(<8 x half> %x) {
+; CHECK-LABEL: utest_f16i16:
+; CHECK: .functype utest_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
; CHECK-NEXT: .local v128
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 5
@@ -1295,9 +1295,9 @@ entry:
ret <2 x i64> %conv6
}
-define <2 x i64> @utesth_f16i64(<2 x half> %x) {
-; CHECK-LABEL: utesth_f16i64:
-; CHECK: .functype utesth_f16i64 (f32, f32) -> (v128)
+define <2 x i64> @utest_f16i64(<2 x half> %x) {
+; CHECK-LABEL: utest_f16i64:
+; CHECK: .functype utest_f16i64 (f32, f32) -> (v128)
; CHECK-NEXT: .local i32, i64, i64, i64, i64
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: global.get __stack_pointer
@@ -1649,9 +1649,9 @@ entry:
ret <4 x i32> %conv6
}
-define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
-; CHECK-LABEL: utesth_f16i32_mm:
-; CHECK: .functype utesth_f16i32_mm (f32, f32, f32, f32) -> (v128)
+define <4 x i32> @utest_f16i32_mm(<4 x half> %x) {
+; CHECK-LABEL: utest_f16i32_mm:
+; CHECK: .functype utest_f16i32_mm (f32, f32, f32, f32) -> (v128)
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 1
; CHECK-NEXT: call __truncsfhf2
@@ -1938,9 +1938,9 @@ entry:
ret <8 x i16> %conv6
}
-define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
-; CHECK-LABEL: utesth_f16i16_mm:
-; CHECK: .functype utesth_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
+define <8 x i16> @utest_f16i16_mm(<8 x half> %x) {
+; CHECK-LABEL: utest_f16i16_mm:
+; CHECK: .functype utest_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
; CHECK-NEXT: .local v128
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 5
@@ -2673,9 +2673,9 @@ entry:
ret <2 x i64> %conv6
}
-define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
-; CHECK-LABEL: utesth_f16i64_mm:
-; CHECK: .functype utesth_f16i64_mm (f32, f32) -> (v128)
+define <2 x i64> @utest_f16i64_mm(<2 x half> %x) {
+; CHECK-LABEL: utest_f16i64_mm:
+; CHECK: .functype utest_f16i64_mm (f32, f32) -> (v128)
; CHECK-NEXT: .local i32, i64, i64, i64, i64
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: global.get __stack_pointer
@@ -2810,6 +2810,48 @@ entry:
ret <2 x i64> %conv6
}
+; i32 non saturate
+
+define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) {
+; CHECK-LABEL: ustest_f16i32_nsat:
+; CHECK: .functype ustest_f16i32_nsat (f32, f32, f32, f32) -> (v128)
+; CHECK-NEXT: .local v128
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 1
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 2
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 3
+; CHECK-NEXT: v128.const 0, 0, 0, 0
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: i32x4.min_s
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i32x4.max_s
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i32>
+ %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv)
+ %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer)
+ ret <4 x i32> %spec.store.select7
+}
+
declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)
diff --git a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
index dec829f..44cf4e8 100644
--- a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
+++ b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
@@ -911,7 +911,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in.
; SSE2-NEXT: paddb (%rsi), %xmm0
; SSE2-NEXT: paddb 16(%rsi), %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: paddb (%rdx), %xmm0
; SSE2-NEXT: movdqa %xmm0, (%rcx)
@@ -1898,7 +1898,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
; SSE2-NEXT: paddb (%rsi), %xmm0
; SSE2-NEXT: paddb 32(%rsi), %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: movdqa %xmm0, %xmm3
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
@@ -4155,7 +4155,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
; SSE2-NEXT: paddb (%rsi), %xmm0
; SSE2-NEXT: paddb 48(%rsi), %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,0,0]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE2-NEXT: paddb (%rdx), %xmm2
diff --git a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
index 3d4cddb..89b5c33 100644
--- a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
+++ b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
@@ -769,7 +769,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in.
; SSE2-LABEL: vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0]
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: paddb (%rsi), %xmm1
; SSE2-NEXT: movdqa %xmm1, (%rdx)
@@ -1522,7 +1522,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
; SSE2-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0]
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3]
@@ -3335,7 +3335,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa (%rdi), %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,0,0]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE2-NEXT: paddb (%rsi), %xmm2
diff --git a/llvm/test/CodeGen/X86/fpclamptosat.ll b/llvm/test/CodeGen/X86/fpclamptosat.ll
index 3f5ec7b..67483be 100644
--- a/llvm/test/CodeGen/X86/fpclamptosat.ll
+++ b/llvm/test/CodeGen/X86/fpclamptosat.ll
@@ -161,8 +161,8 @@ entry:
ret i32 %conv6
}
-define i32 @utesth_f16i32(half %x) nounwind {
-; CHECK-LABEL: utesth_f16i32:
+define i32 @utest_f16i32(half %x) nounwind {
+; CHECK-LABEL: utest_f16i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq __extendhfsf2@PLT
@@ -360,8 +360,8 @@ entry:
ret i16 %conv6
}
-define i16 @utesth_f16i16(half %x) nounwind {
-; CHECK-LABEL: utesth_f16i16:
+define i16 @utest_f16i16(half %x) nounwind {
+; CHECK-LABEL: utest_f16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq __extendhfsf2@PLT
@@ -566,8 +566,8 @@ entry:
ret i64 %conv6
}
-define i64 @utesth_f16i64(half %x) nounwind {
-; CHECK-LABEL: utesth_f16i64:
+define i64 @utest_f16i64(half %x) nounwind {
+; CHECK-LABEL: utest_f16i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq __fixunshfti@PLT
@@ -762,8 +762,8 @@ entry:
ret i32 %conv6
}
-define i32 @utesth_f16i32_mm(half %x) nounwind {
-; CHECK-LABEL: utesth_f16i32_mm:
+define i32 @utest_f16i32_mm(half %x) nounwind {
+; CHECK-LABEL: utest_f16i32_mm:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq __extendhfsf2@PLT
@@ -946,8 +946,8 @@ entry:
ret i16 %conv6
}
-define i16 @utesth_f16i16_mm(half %x) nounwind {
-; CHECK-LABEL: utesth_f16i16_mm:
+define i16 @utest_f16i16_mm(half %x) nounwind {
+; CHECK-LABEL: utest_f16i16_mm:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq __extendhfsf2@PLT
@@ -1131,8 +1131,8 @@ entry:
ret i64 %conv6
}
-define i64 @utesth_f16i64_mm(half %x) nounwind {
-; CHECK-LABEL: utesth_f16i64_mm:
+define i64 @utest_f16i64_mm(half %x) nounwind {
+; CHECK-LABEL: utest_f16i64_mm:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq __fixunshfti@PLT
@@ -1170,6 +1170,27 @@ entry:
ret i64 %conv6
}
+; i32 non saturate
+
+define i32 @ustest_f16i32_nsat(half %x) nounwind {
+; CHECK-LABEL: ustest_f16i32_nsat:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: callq __extendhfsf2@PLT
+; CHECK-NEXT: cvttss2si %xmm0, %ecx
+; CHECK-NEXT: movl %ecx, %eax
+; CHECK-NEXT: sarl $31, %eax
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: andl %ecx, %eax
+; CHECK-NEXT: cmovlel %edx, %eax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: retq
+ %conv = fptosi half %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+ ret i32 %spec.store.select7
+}
+
declare i32 @llvm.smin.i32(i32, i32)
declare i32 @llvm.smax.i32(i32, i32)
declare i32 @llvm.umin.i32(i32, i32)
diff --git a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll
index 1a2cfd6..991ce33 100644
--- a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll
@@ -747,8 +747,8 @@ entry:
ret <4 x i32> %conv6
}
-define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind {
-; SSE-LABEL: utesth_f16i32:
+define <4 x i32> @utest_f16i32(<4 x half> %x) nounwind {
+; SSE-LABEL: utest_f16i32:
; SSE: # %bb.0: # %entry
; SSE-NEXT: subq $72, %rsp
; SSE-NEXT: movaps %xmm0, %xmm1
@@ -835,7 +835,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind {
; SSE-NEXT: addq $72, %rsp
; SSE-NEXT: retq
;
-; AVX2-LABEL: utesth_f16i32:
+; AVX2-LABEL: utest_f16i32:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm2
@@ -893,7 +893,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: utesth_f16i32:
+; AVX512-LABEL: utest_f16i32:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512-NEXT: vcvttps2uqq %ymm0, %zmm0
@@ -1338,8 +1338,8 @@ entry:
ret <8 x i16> %conv6
}
-define <8 x i16> @utesth_f16i16(<8 x half> %x) nounwind {
-; SSE-LABEL: utesth_f16i16:
+define <8 x i16> @utest_f16i16(<8 x half> %x) nounwind {
+; SSE-LABEL: utest_f16i16:
; SSE: # %bb.0: # %entry
; SSE-NEXT: subq $72, %rsp
; SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
@@ -1436,7 +1436,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) nounwind {
; SSE-NEXT: addq $72, %rsp
; SSE-NEXT: retq
;
-; AVX2-LABEL: utesth_f16i16:
+; AVX2-LABEL: utest_f16i16:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vcvtph2ps %xmm0, %ymm0
; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
@@ -1453,7 +1453,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) nounwind {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: utesth_f16i16:
+; AVX512-LABEL: utest_f16i16:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0
; AVX512-NEXT: vcvttps2udq %ymm0, %ymm0
@@ -2456,8 +2456,8 @@ entry:
ret <2 x i64> %conv6
}
-define <2 x i64> @utesth_f16i64(<2 x half> %x) nounwind {
-; SSE-LABEL: utesth_f16i64:
+define <2 x i64> @utest_f16i64(<2 x half> %x) nounwind {
+; SSE-LABEL: utest_f16i64:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %r14
; SSE-NEXT: pushq %rbx
@@ -2483,7 +2483,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) nounwind {
; SSE-NEXT: popq %r14
; SSE-NEXT: retq
;
-; AVX2-LABEL: utesth_f16i64:
+; AVX2-LABEL: utest_f16i64:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: pushq %r14
; AVX2-NEXT: pushq %rbx
@@ -2508,7 +2508,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) nounwind {
; AVX2-NEXT: popq %r14
; AVX2-NEXT: retq
;
-; AVX512-LABEL: utesth_f16i64:
+; AVX512-LABEL: utest_f16i64:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: pushq %r14
; AVX512-NEXT: pushq %rbx
@@ -3359,8 +3359,8 @@ entry:
ret <4 x i32> %conv6
}
-define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind {
-; SSE-LABEL: utesth_f16i32_mm:
+define <4 x i32> @utest_f16i32_mm(<4 x half> %x) nounwind {
+; SSE-LABEL: utest_f16i32_mm:
; SSE: # %bb.0: # %entry
; SSE-NEXT: subq $72, %rsp
; SSE-NEXT: movaps %xmm0, %xmm1
@@ -3447,7 +3447,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind {
; SSE-NEXT: addq $72, %rsp
; SSE-NEXT: retq
;
-; AVX2-LABEL: utesth_f16i32_mm:
+; AVX2-LABEL: utest_f16i32_mm:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm2
@@ -3505,7 +3505,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: utesth_f16i32_mm:
+; AVX512-LABEL: utest_f16i32_mm:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512-NEXT: vcvttps2uqq %ymm0, %zmm0
@@ -3935,8 +3935,8 @@ entry:
ret <8 x i16> %conv6
}
-define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) nounwind {
-; SSE-LABEL: utesth_f16i16_mm:
+define <8 x i16> @utest_f16i16_mm(<8 x half> %x) nounwind {
+; SSE-LABEL: utest_f16i16_mm:
; SSE: # %bb.0: # %entry
; SSE-NEXT: subq $72, %rsp
; SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
@@ -4033,7 +4033,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) nounwind {
; SSE-NEXT: addq $72, %rsp
; SSE-NEXT: retq
;
-; AVX2-LABEL: utesth_f16i16_mm:
+; AVX2-LABEL: utest_f16i16_mm:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vcvtph2ps %xmm0, %ymm0
; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
@@ -4050,7 +4050,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) nounwind {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: utesth_f16i16_mm:
+; AVX512-LABEL: utest_f16i16_mm:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0
; AVX512-NEXT: vcvttps2udq %ymm0, %ymm0
@@ -4820,8 +4820,8 @@ entry:
ret <2 x i64> %conv6
}
-define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) nounwind {
-; SSE-LABEL: utesth_f16i64_mm:
+define <2 x i64> @utest_f16i64_mm(<2 x half> %x) nounwind {
+; SSE-LABEL: utest_f16i64_mm:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %r14
; SSE-NEXT: pushq %rbx
@@ -4847,7 +4847,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) nounwind {
; SSE-NEXT: popq %r14
; SSE-NEXT: retq
;
-; AVX2-LABEL: utesth_f16i64_mm:
+; AVX2-LABEL: utest_f16i64_mm:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: pushq %r14
; AVX2-NEXT: pushq %rbx
@@ -4872,7 +4872,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) nounwind {
; AVX2-NEXT: popq %r14
; AVX2-NEXT: retq
;
-; AVX512-LABEL: utesth_f16i64_mm:
+; AVX512-LABEL: utest_f16i64_mm:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: pushq %r14
; AVX512-NEXT: pushq %rbx
@@ -4974,6 +4974,63 @@ entry:
ret <2 x i64> %conv6
}
+; i32 non saturate
+
+define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) nounwind {
+; SSE-LABEL: ustest_f16i32_nsat:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: subq $72, %rsp
+; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrld $16, %xmm1
+; SSE-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
+; SSE-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill
+; SSE-NEXT: psrlq $48, %xmm0
+; SSE-NEXT: callq __extendhfsf2@PLT
+; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; SSE-NEXT: callq __extendhfsf2@PLT
+; SSE-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; SSE-NEXT: cvttps2dq %xmm0, %xmm0
+; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: callq __extendhfsf2@PLT
+; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: callq __extendhfsf2@PLT
+; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE-NEXT: cvttps2dq %xmm1, %xmm0
+; SSE-NEXT: punpcklqdq (%rsp), %xmm0 # 16-byte Folded Reload
+; SSE-NEXT: # xmm0 = xmm0[0],mem[0]
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: pxor %xmm2, %xmm2
+; SSE-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE-NEXT: pand %xmm0, %xmm2
+; SSE-NEXT: movdqa %xmm2, %xmm0
+; SSE-NEXT: pcmpgtd %xmm1, %xmm0
+; SSE-NEXT: pand %xmm2, %xmm0
+; SSE-NEXT: addq $72, %rsp
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ustest_f16i32_nsat:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i32>
+ %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv)
+ %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer)
+ ret <4 x i32> %spec.store.select7
+}
+
declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)
diff --git a/llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll b/llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll
index ecd9435..1766b4d 100644
--- a/llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll
+++ b/llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll
@@ -58,7 +58,7 @@ define <8 x float> @foo8(<8 x float> %v, ptr%p) nounwind {
define <4 x i32> @undef_splatmask(<4 x i32> %v) nounwind {
; AVX2-LABEL: undef_splatmask:
; AVX2: # %bb.0:
-; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,2,2]
; AVX2-NEXT: retq
%res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 2, i32 undef>
%res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
@@ -68,7 +68,7 @@ define <4 x i32> @undef_splatmask(<4 x i32> %v) nounwind {
define <4 x i32> @undef_splatmask2(<4 x i32> %v) nounwind {
; AVX2-LABEL: undef_splatmask2:
; AVX2: # %bb.0:
-; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,2,2]
; AVX2-NEXT: retq
%res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 undef>
%res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
@@ -78,7 +78,7 @@ define <4 x i32> @undef_splatmask2(<4 x i32> %v) nounwind {
define <4 x i32> @undef_splatmask3(<4 x i32> %v) nounwind {
; AVX2-LABEL: undef_splatmask3:
; AVX2: # %bb.0:
-; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,2,2]
; AVX2-NEXT: retq
%res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 2, i32 undef>
%res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 3>
@@ -88,7 +88,7 @@ define <4 x i32> @undef_splatmask3(<4 x i32> %v) nounwind {
define <4 x i32> @undef_splatmask4(<4 x i32> %v, ptr %p) nounwind {
; AVX2-LABEL: undef_splatmask4:
; AVX2: # %bb.0:
-; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,2,3,3]
+; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,2,2,2]
; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX2-NEXT: vmovaps %xmm0, (%rdi)
; AVX2-NEXT: vmovaps %xmm1, %xmm0
diff --git a/llvm/test/CodeGen/X86/vec-strict-cmp-128.ll b/llvm/test/CodeGen/X86/vec-strict-cmp-128.ll
index 209d6a5..93a692c 100644
--- a/llvm/test/CodeGen/X86/vec-strict-cmp-128.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-cmp-128.ll
@@ -1911,13 +1911,13 @@ define <2 x i64> @test_v2f64_ogt_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
; SSE-32-NEXT: movl $0, %edx
; SSE-32-NEXT: cmoval %ecx, %edx
; SSE-32-NEXT: movd %edx, %xmm3
-; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
; SSE-32-NEXT: ucomisd %xmm4, %xmm2
; SSE-32-NEXT: cmoval %ecx, %eax
; SSE-32-NEXT: movd %eax, %xmm2
-; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; SSE-32-NEXT: pand %xmm3, %xmm0
; SSE-32-NEXT: pandn %xmm1, %xmm3
@@ -2031,13 +2031,13 @@ define <2 x i64> @test_v2f64_oge_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
; SSE-32-NEXT: movl $0, %edx
; SSE-32-NEXT: cmovael %ecx, %edx
; SSE-32-NEXT: movd %edx, %xmm3
-; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
; SSE-32-NEXT: ucomisd %xmm4, %xmm2
; SSE-32-NEXT: cmovael %ecx, %eax
; SSE-32-NEXT: movd %eax, %xmm2
-; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; SSE-32-NEXT: pand %xmm3, %xmm0
; SSE-32-NEXT: pandn %xmm1, %xmm3
@@ -2151,13 +2151,13 @@ define <2 x i64> @test_v2f64_olt_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
; SSE-32-NEXT: movl $0, %edx
; SSE-32-NEXT: cmoval %ecx, %edx
; SSE-32-NEXT: movd %edx, %xmm3
-; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
; SSE-32-NEXT: ucomisd %xmm2, %xmm4
; SSE-32-NEXT: cmoval %ecx, %eax
; SSE-32-NEXT: movd %eax, %xmm2
-; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; SSE-32-NEXT: pand %xmm3, %xmm0
; SSE-32-NEXT: pandn %xmm1, %xmm3
@@ -2269,13 +2269,13 @@ define <2 x i64> @test_v2f64_ole_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
; SSE-32-NEXT: movl $0, %edx
; SSE-32-NEXT: cmovael %ecx, %edx
; SSE-32-NEXT: movd %edx, %xmm3
-; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
; SSE-32-NEXT: ucomisd %xmm2, %xmm4
; SSE-32-NEXT: cmovael %ecx, %eax
; SSE-32-NEXT: movd %eax, %xmm2
-; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; SSE-32-NEXT: pand %xmm3, %xmm0
; SSE-32-NEXT: pandn %xmm1, %xmm3
@@ -2680,13 +2680,13 @@ define <2 x i64> @test_v2f64_ugt_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
; SSE-32-NEXT: movl $0, %edx
; SSE-32-NEXT: cmovbl %ecx, %edx
; SSE-32-NEXT: movd %edx, %xmm3
-; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
; SSE-32-NEXT: ucomisd %xmm2, %xmm4
; SSE-32-NEXT: cmovbl %ecx, %eax
; SSE-32-NEXT: movd %eax, %xmm2
-; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; SSE-32-NEXT: pand %xmm3, %xmm0
; SSE-32-NEXT: pandn %xmm1, %xmm3
@@ -2798,13 +2798,13 @@ define <2 x i64> @test_v2f64_uge_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
; SSE-32-NEXT: movl $0, %edx
; SSE-32-NEXT: cmovbel %ecx, %edx
; SSE-32-NEXT: movd %edx, %xmm3
-; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
; SSE-32-NEXT: ucomisd %xmm2, %xmm4
; SSE-32-NEXT: cmovbel %ecx, %eax
; SSE-32-NEXT: movd %eax, %xmm2
-; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; SSE-32-NEXT: pand %xmm3, %xmm0
; SSE-32-NEXT: pandn %xmm1, %xmm3
@@ -2916,13 +2916,13 @@ define <2 x i64> @test_v2f64_ult_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
; SSE-32-NEXT: movl $0, %edx
; SSE-32-NEXT: cmovbl %ecx, %edx
; SSE-32-NEXT: movd %edx, %xmm3
-; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
; SSE-32-NEXT: ucomisd %xmm4, %xmm2
; SSE-32-NEXT: cmovbl %ecx, %eax
; SSE-32-NEXT: movd %eax, %xmm2
-; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; SSE-32-NEXT: pand %xmm3, %xmm0
; SSE-32-NEXT: pandn %xmm1, %xmm3
@@ -3036,13 +3036,13 @@ define <2 x i64> @test_v2f64_ule_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
; SSE-32-NEXT: movl $0, %edx
; SSE-32-NEXT: cmovbel %ecx, %edx
; SSE-32-NEXT: movd %edx, %xmm3
-; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
; SSE-32-NEXT: ucomisd %xmm4, %xmm2
; SSE-32-NEXT: cmovbel %ecx, %eax
; SSE-32-NEXT: movd %eax, %xmm2
-; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; SSE-32-NEXT: pand %xmm3, %xmm0
; SSE-32-NEXT: pandn %xmm1, %xmm3
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll
index 9ecc629..b378dce 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll
@@ -162,7 +162,7 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
; SSE2-LABEL: splatvar_funnnel_v2i32:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; SSE2-NEXT: pslld $23, %xmm1
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -182,7 +182,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
;
; SSE41-LABEL: splatvar_funnnel_v2i32:
; SSE41: # %bb.0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; SSE41-NEXT: pslld $23, %xmm1
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -200,7 +200,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
;
; AVX1-LABEL: splatvar_funnnel_v2i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
@@ -277,7 +277,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
;
; XOPAVX1-LABEL: splatvar_funnnel_v2i32:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: retq
;
@@ -289,7 +289,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
;
; X86-SSE2-LABEL: splatvar_funnnel_v2i32:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; X86-SSE2-NEXT: pslld $23, %xmm1
; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
diff --git a/llvm/test/CodeGen/X86/vector-fshl-sub128.ll b/llvm/test/CodeGen/X86/vector-fshl-sub128.ll
index 322ebe2..06ff7e7 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-sub128.ll
@@ -250,7 +250,7 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt)
define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt) nounwind {
; SSE2-LABEL: splatvar_funnnel_v2i32:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31]
; SSE2-NEXT: movdqa %xmm3, %xmm5
; SSE2-NEXT: pandn %xmm4, %xmm5
@@ -286,7 +286,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %
;
; SSE41-LABEL: splatvar_funnnel_v2i32:
; SSE41: # %bb.0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE41-NEXT: pmovsxbd {{.*#+}} xmm3 = [31,31,31,31]
; SSE41-NEXT: movdqa %xmm2, %xmm4
; SSE41-NEXT: pandn %xmm3, %xmm4
@@ -316,7 +316,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %
;
; AVX1-LABEL: splatvar_funnnel_v2i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31]
; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vpsrldq {{.*#+}} xmm5 = xmm4[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
@@ -423,7 +423,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %
;
; XOPAVX1-LABEL: splatvar_funnnel_v2i32:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31]
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpshld %xmm4, %xmm0, %xmm0
@@ -450,7 +450,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %
;
; X86-SSE2-LABEL: splatvar_funnnel_v2i32:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1]
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31]
; X86-SSE2-NEXT: movdqa %xmm3, %xmm5
; X86-SSE2-NEXT: pandn %xmm4, %xmm5
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll
index 178c02f..ef5ffe4 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll
@@ -172,7 +172,7 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
; SSE2-LABEL: splatvar_funnnel_v2i32:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: psubd %xmm1, %xmm2
; SSE2-NEXT: pslld $23, %xmm2
@@ -194,7 +194,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
;
; SSE41-LABEL: splatvar_funnnel_v2i32:
; SSE41: # %bb.0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; SSE41-NEXT: pxor %xmm2, %xmm2
; SSE41-NEXT: psubd %xmm1, %xmm2
; SSE41-NEXT: pslld $23, %xmm2
@@ -214,7 +214,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
;
; AVX1-LABEL: splatvar_funnnel_v2i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
@@ -293,7 +293,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
;
; XOPAVX1-LABEL: splatvar_funnnel_v2i32:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1
; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0
@@ -309,7 +309,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
;
; X86-SSE2-LABEL: splatvar_funnnel_v2i32:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; X86-SSE2-NEXT: pxor %xmm2, %xmm2
; X86-SSE2-NEXT: psubd %xmm1, %xmm2
; X86-SSE2-NEXT: pslld $23, %xmm2
diff --git a/llvm/test/CodeGen/X86/vector-fshr-sub128.ll b/llvm/test/CodeGen/X86/vector-fshr-sub128.ll
index 372deb05..2d8670a 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-sub128.ll
@@ -251,7 +251,7 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt)
define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt) nounwind {
; SSE2-LABEL: splatvar_funnnel_v2i32:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31]
; SSE2-NEXT: movdqa %xmm3, %xmm5
; SSE2-NEXT: pand %xmm4, %xmm5
@@ -287,7 +287,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %
;
; SSE41-LABEL: splatvar_funnnel_v2i32:
; SSE41: # %bb.0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE41-NEXT: pmovsxbd {{.*#+}} xmm3 = [31,31,31,31]
; SSE41-NEXT: movdqa %xmm2, %xmm4
; SSE41-NEXT: pand %xmm3, %xmm4
@@ -317,7 +317,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %
;
; AVX1-LABEL: splatvar_funnnel_v2i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31]
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vpsrldq {{.*#+}} xmm5 = xmm4[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
@@ -425,7 +425,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %
;
; XOPAVX1-LABEL: splatvar_funnnel_v2i32:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31]
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0
@@ -452,7 +452,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %
;
; X86-SSE2-LABEL: splatvar_funnnel_v2i32:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1]
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31]
; X86-SSE2-NEXT: movdqa %xmm3, %xmm5
; X86-SSE2-NEXT: pand %xmm4, %xmm5
diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll
index f57efb4..1e11ea9 100644
--- a/llvm/test/CodeGen/X86/vector-sext.ll
+++ b/llvm/test/CodeGen/X86/vector-sext.ll
@@ -1409,11 +1409,11 @@ define <2 x i64> @load_sext_2i1_to_2i64(ptr%ptr) {
; X86-SSE2-NEXT: movzbl %al, %eax
; X86-SSE2-NEXT: negl %eax
; X86-SSE2-NEXT: movd %eax, %xmm0
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
; X86-SSE2-NEXT: andl $1, %ecx
; X86-SSE2-NEXT: negl %ecx
; X86-SSE2-NEXT: movd %ecx, %xmm0
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X86-SSE2-NEXT: retl
;
diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll
index bd1a48b..7b0f1c9 100644
--- a/llvm/test/CodeGen/X86/vector-zext.ll
+++ b/llvm/test/CodeGen/X86/vector-zext.ll
@@ -2555,7 +2555,7 @@ entry:
define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) {
; SSE2-LABEL: splatshuf_zext_v4i64:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movdqa %xmm0, %xmm1
@@ -2563,7 +2563,7 @@ define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) {
;
; SSSE3-LABEL: splatshuf_zext_v4i64:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSSE3-NEXT: pxor %xmm1, %xmm1
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: movdqa %xmm0, %xmm1
@@ -2571,7 +2571,7 @@ define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) {
;
; SSE41-LABEL: splatshuf_zext_v4i64:
; SSE41: # %bb.0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
index 19a31a6..31ed745 100644
--- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
+++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
@@ -911,7 +911,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in.
; SSE2-NEXT: paddb (%rsi), %xmm0
; SSE2-NEXT: paddb 16(%rsi), %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: paddb (%rdx), %xmm0
; SSE2-NEXT: movdqa %xmm0, (%rcx)
@@ -1898,7 +1898,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
; SSE2-NEXT: paddb (%rsi), %xmm0
; SSE2-NEXT: paddb 32(%rsi), %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: movdqa %xmm0, %xmm3
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
@@ -4610,7 +4610,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
; SSE2-NEXT: paddb (%rsi), %xmm0
; SSE2-NEXT: paddb 48(%rsi), %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-NEXT: pxor %xmm1, %xmm1
@@ -6544,7 +6544,7 @@ define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in.
; SSE2-NEXT: movdqa (%rdi), %xmm0
; SSE2-NEXT: paddb (%rsi), %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movdqa 16(%rdx), %xmm1
; SSE2-NEXT: paddb %xmm0, %xmm1
diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
index 239472c..5b4cdd2 100644
--- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
+++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
@@ -769,7 +769,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in.
; SSE2-LABEL: vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0]
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: paddb (%rsi), %xmm1
; SSE2-NEXT: movdqa %xmm1, (%rdx)
@@ -1522,7 +1522,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
; SSE2-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0]
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3]
@@ -3660,7 +3660,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
; SSE2-LABEL: vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0]
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; SSE2-NEXT: pxor %xmm0, %xmm0
@@ -5250,7 +5250,7 @@ define void @vec512_i16_widen_to_i256_factor16_broadcast_to_v2i256_factor2(ptr %
define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in.elt.ptr, ptr %out.vec.bias.ptr, ptr %out.vec.ptr) nounwind {
; SSE2-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movdqa 16(%rsi), %xmm1
diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
index 8f76834..67ab167 100644
--- a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
@@ -16,6 +16,14 @@ define <2 x double> @load_zeromask(ptr %ptr, <2 x double> %passthru) {
ret <2 x double> %res
}
+define <2 x double> @load_zero_withpoison_mask(ptr %ptr, <2 x double> %passthru) {
+; CHECK-LABEL: @load_zero_withpoison_mask(
+; CHECK-NEXT: ret <2 x double> [[PASSTHRU:%.*]]
+;
+ %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %ptr, i32 1, <2 x i1> <i1 0, i1 poison>, <2 x double> %passthru)
+ ret <2 x double> %res
+}
+
define <2 x double> @load_onemask(ptr %ptr, <2 x double> %passthru) {
; CHECK-LABEL: @load_onemask(
; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, ptr [[PTR:%.*]], align 2
@@ -150,6 +158,14 @@ define void @store_zeromask(ptr %ptr, <2 x double> %val) {
ret void
}
+define void @store_poisonmask(ptr %ptr, <2 x double> %val) {
+; CHECK-LABEL: @store_poisonmask(
+; CHECK-NEXT: ret void
+;
+ call void @llvm.masked.store.v2f64.p0(<2 x double> %val, ptr %ptr, i32 4, <2 x i1> splat(i1 poison))
+ ret void
+}
+
define void @store_onemask(ptr %ptr, <2 x double> %val) {
; CHECK-LABEL: @store_onemask(
; CHECK-NEXT: store <2 x double> [[VAL:%.*]], ptr [[PTR:%.*]], align 4
@@ -159,6 +175,15 @@ define void @store_onemask(ptr %ptr, <2 x double> %val) {
ret void
}
+define void @store_one_withpoison_mask(ptr %ptr, <2 x double> %val) {
+; CHECK-LABEL: @store_one_withpoison_mask(
+; CHECK-NEXT: store <2 x double> [[VAL:%.*]], ptr [[PTR:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+ call void @llvm.masked.store.v2f64.p0(<2 x double> %val, ptr %ptr, i32 4, <2 x i1> <i1 1, i1 poison>)
+ ret void
+}
+
define void @store_demandedelts(ptr %ptr, double %val) {
; CHECK-LABEL: @store_demandedelts(
; CHECK-NEXT: [[VALVEC1:%.*]] = insertelement <2 x double> poison, double [[VAL:%.*]], i64 0
@@ -189,6 +214,13 @@ define <2 x double> @gather_zeromask(<2 x ptr> %ptrs, <2 x double> %passthru) {
ret <2 x double> %res
}
+define <2 x double> @gather_zero_withpoison_mask(<2 x ptr> %ptrs, <2 x double> %passthru) {
+; CHECK-LABEL: @gather_zero_withpoison_mask(
+; CHECK-NEXT: ret <2 x double> [[PASSTHRU:%.*]]
+;
+ %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> <i1 0, i1 poison>, <2 x double> %passthru)
+ ret <2 x double> %res
+}
define <2 x double> @gather_onemask(<2 x ptr> %ptrs, <2 x double> %passthru) {
; CHECK-LABEL: @gather_onemask(
@@ -199,6 +231,15 @@ define <2 x double> @gather_onemask(<2 x ptr> %ptrs, <2 x double> %passthru) {
ret <2 x double> %res
}
+define <2 x double> @gather_one_withpoisonmask(<2 x ptr> %ptrs, <2 x double> %passthru) {
+; CHECK-LABEL: @gather_one_withpoisonmask(
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[PTRS:%.*]], i32 4, <2 x i1> <i1 true, i1 poison>, <2 x double> [[PASSTHRU:%.*]])
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> <i1 true, i1 poison>, <2 x double> %passthru)
+ ret <2 x double> %res
+}
+
define <4 x double> @gather_lane2(ptr %base, double %pt) {
; CHECK-LABEL: @gather_lane2(
; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, ptr [[BASE:%.*]], <4 x i64> <i64 poison, i64 poison, i64 2, i64 poison>
@@ -257,6 +298,23 @@ define void @scatter_zeromask(<2 x ptr> %ptrs, <2 x double> %val) {
ret void
}
+define void @scatter_zero_withpoison_mask(<2 x ptr> %ptrs, <2 x double> %val) {
+; CHECK-LABEL: @scatter_zero_withpoison_mask(
+; CHECK-NEXT: ret void
+;
+ call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> %val, <2 x ptr> %ptrs, i32 8, <2 x i1> <i1 0, i1 poison>)
+ ret void
+}
+
+define void @scatter_one_withpoison_mask(<2 x ptr> %ptrs, <2 x double> %val) {
+; CHECK-LABEL: @scatter_one_withpoison_mask(
+; CHECK-NEXT: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> [[VAL:%.*]], <2 x ptr> [[PTRS:%.*]], i32 8, <2 x i1> <i1 true, i1 poison>)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> %val, <2 x ptr> %ptrs, i32 8, <2 x i1> <i1 1, i1 poison>)
+ ret void
+}
+
define void @scatter_demandedelts(ptr %ptr, double %val) {
; CHECK-LABEL: @scatter_demandedelts(
; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, ptr [[PTR:%.*]], <2 x i64> <i64 0, i64 poison>
diff --git a/llvm/test/Transforms/InstCombine/pr83947.ll b/llvm/test/Transforms/InstCombine/pr83947.ll
index 1906502..679230a4 100644
--- a/llvm/test/Transforms/InstCombine/pr83947.ll
+++ b/llvm/test/Transforms/InstCombine/pr83947.ll
@@ -24,7 +24,6 @@ define void @masked_scatter2() {
define void @masked_scatter3() {
; CHECK-LABEL: define void @masked_scatter3() {
-; CHECK-NEXT: store i32 0, ptr @c, align 4
; CHECK-NEXT: ret void
;
call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> zeroinitializer, <2 x ptr> splat (ptr @c), i32 4, <2 x i1> undef)
@@ -50,7 +49,6 @@ define void @masked_scatter5() {
define void @masked_scatter6() {
; CHECK-LABEL: define void @masked_scatter6() {
-; CHECK-NEXT: store i32 0, ptr @c, align 4
; CHECK-NEXT: ret void
;
call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> zeroinitializer, <2 x ptr> splat (ptr @c), i32 4, <2 x i1> <i1 undef, i1 false>)
diff --git a/llvm/test/Transforms/InstCombine/select-and-cmp.ll b/llvm/test/Transforms/InstCombine/select-and-cmp.ll
index 50e1493..26c04ad 100644
--- a/llvm/test/Transforms/InstCombine/select-and-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/select-and-cmp.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
define i32 @select_and_icmp(i32 %x, i32 %y, i32 %z) {
@@ -114,34 +114,34 @@ define i32 @select_and_icmp_inv(i32 %x, i32 %y, i32 %z) {
; Below used to be negative tests in InstSimplify, but are no more negative cases here
-define i32 @select_and_icmp_pred_bad_1(i32 %x, i32 %y, i32 %z) {
+define i32 @select_and_icmp_pred_bad_1(i32 %x, i32 %y, i32 %z) !prof !0 {
; CHECK-LABEL: @select_and_icmp_pred_bad_1(
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 [[X:%.*]]
;
%A = icmp eq i32 %x, %z
%B = icmp ne i32 %y, %z
%C = and i1 %A, %B
- %D = select i1 %C, i32 %z, i32 %x
+ %D = select i1 %C, i32 %z, i32 %x, !prof !1
ret i32 %D
}
-define i32 @select_and_icmp_pred_bad_2(i32 %x, i32 %y, i32 %z) {
+define i32 @select_and_icmp_pred_bad_2(i32 %x, i32 %y, i32 %z) !prof !0 {
; CHECK-LABEL: @select_and_icmp_pred_bad_2(
; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]]
-; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[Z]], i32 [[X:%.*]], !prof [[PROF1:![0-9]+]]
; CHECK-NEXT: ret i32 [[D]]
;
%A = icmp ne i32 %x, %z
%B = icmp eq i32 %y, %z
%C = and i1 %A, %B
- %D = select i1 %C, i32 %z, i32 %x
+ %D = select i1 %C, i32 %z, i32 %x, !prof !1
ret i32 %D
}
define i32 @select_and_icmp_pred_bad_3(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @select_and_icmp_pred_bad_3(
-; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
-; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[X:%.*]], i32 [[Z]]
; CHECK-NEXT: ret i32 [[D]]
;
%A = icmp ne i32 %x, %z
@@ -153,8 +153,8 @@ define i32 @select_and_icmp_pred_bad_3(i32 %x, i32 %y, i32 %z) {
define i32 @select_and_icmp_pred_bad_4(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @select_and_icmp_pred_bad_4(
-; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
-; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[Z]], i32 [[X:%.*]]
; CHECK-NEXT: ret i32 [[D]]
;
%A = icmp eq i32 %x, %z
@@ -166,7 +166,7 @@ define i32 @select_and_icmp_pred_bad_4(i32 %x, i32 %y, i32 %z) {
define i32 @select_and_icmp_alt_bad_1(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @select_and_icmp_alt_bad_1(
-; CHECK-NEXT: ret i32 [[Z]]
+; CHECK-NEXT: ret i32 [[Z:%.*]]
;
%A = icmp eq i32 %x, %z
%B = icmp ne i32 %y, %z
@@ -177,8 +177,8 @@ define i32 @select_and_icmp_alt_bad_1(i32 %x, i32 %y, i32 %z) {
define i32 @select_and_icmp_alt_bad_2(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @select_and_icmp_alt_bad_2(
-; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
-; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[X:%.*]], i32 [[Z]]
; CHECK-NEXT: ret i32 [[D]]
;
%A = icmp ne i32 %x, %z
@@ -191,8 +191,8 @@ define i32 @select_and_icmp_alt_bad_2(i32 %x, i32 %y, i32 %z) {
define i32 @select_and_icmp_alt_bad_3(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @select_and_icmp_alt_bad_3(
-; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
-; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[Z]], i32 [[X:%.*]]
; CHECK-NEXT: ret i32 [[D]]
;
%A = icmp ne i32 %x, %z
@@ -204,8 +204,8 @@ define i32 @select_and_icmp_alt_bad_3(i32 %x, i32 %y, i32 %z) {
define i32 @select_and_icmp_alt_bad_4(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @select_and_icmp_alt_bad_4(
-; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
-; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[X:%.*]], i32 [[Z]]
; CHECK-NEXT: ret i32 [[D]]
;
%A = icmp eq i32 %x, %z
@@ -322,3 +322,11 @@ define i32 @select_and_icmp_alt_bad_false_val(i32 %x, i32 %y, i32 %z, i32 %k) {
%D = select i1 %C, i32 %x, i32 %k
ret i32 %D
}
+
+!0 = !{!"function_entry_count", i64 1000}
+!1 = !{!"branch_weights", i32 2, i32 3}
+
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 2, i32 3}
+;.
diff --git a/llvm/test/Transforms/InstCombine/select-or-cmp.ll b/llvm/test/Transforms/InstCombine/select-or-cmp.ll
index 72a3747..82b069b 100644
--- a/llvm/test/Transforms/InstCombine/select-or-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/select-or-cmp.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
define i32 @select_or_icmp(i32 %x, i32 %y, i32 %z) {
@@ -114,47 +114,47 @@ define i32 @select_or_icmp_inv(i32 %x, i32 %y, i32 %z) {
; Below used to be negative tests in InstSimplify, but are no more negative cases here
-define i32 @select_and_icmp_pred_bad_1(i32 %x, i32 %y, i32 %z) {
+define i32 @select_and_icmp_pred_bad_1(i32 %x, i32 %y, i32 %z) !prof !0 {
; CHECK-LABEL: @select_and_icmp_pred_bad_1(
-; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
-; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[X:%.*]], i32 [[Z]], !prof [[PROF1:![0-9]+]]
; CHECK-NEXT: ret i32 [[D]]
;
%A = icmp eq i32 %x, %z
%B = icmp ne i32 %y, %z
%C = or i1 %A, %B
- %D = select i1 %C, i32 %z, i32 %x
+ %D = select i1 %C, i32 %z, i32 %x, !prof !1
ret i32 %D
}
-define i32 @select_and_icmp_pred_bad_2(i32 %x, i32 %y, i32 %z) {
+define i32 @select_and_icmp_pred_bad_2(i32 %x, i32 %y, i32 %z) !prof !0 {
; CHECK-LABEL: @select_and_icmp_pred_bad_2(
-; CHECK-NEXT: ret i32 [[Z]]
+; CHECK-NEXT: ret i32 [[Z:%.*]]
;
%A = icmp ne i32 %x, %z
%B = icmp eq i32 %y, %z
%C = or i1 %A, %B
- %D = select i1 %C, i32 %z, i32 %x
+ %D = select i1 %C, i32 %z, i32 %x, !prof !1
ret i32 %D
}
-define i32 @select_and_icmp_pred_bad_3(i32 %x, i32 %y, i32 %z) {
+define i32 @select_and_icmp_pred_bad_3(i32 %x, i32 %y, i32 %z) !prof !0 {
; CHECK-LABEL: @select_and_icmp_pred_bad_3(
-; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
-; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[Z]], i32 [[X:%.*]], !prof [[PROF2:![0-9]+]]
; CHECK-NEXT: ret i32 [[D]]
;
%A = icmp eq i32 %x, %z
%B = icmp eq i32 %y, %z
%C = or i1 %A, %B
- %D = select i1 %C, i32 %z, i32 %x
+ %D = select i1 %C, i32 %z, i32 %x, !prof !1
ret i32 %D
}
define i32 @select_and_icmp_pred_bad_4(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @select_and_icmp_pred_bad_4(
-; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
-; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[X:%.*]], i32 [[Z]]
; CHECK-NEXT: ret i32 [[D]]
;
%A = icmp ne i32 %x, %z
@@ -166,8 +166,8 @@ define i32 @select_and_icmp_pred_bad_4(i32 %x, i32 %y, i32 %z) {
define i32 @select_or_icmp_alt_bad_1(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @select_or_icmp_alt_bad_1(
-; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
-; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[Z]], i32 [[X:%.*]]
; CHECK-NEXT: ret i32 [[D]]
;
%A = icmp eq i32 %x, %z
@@ -179,7 +179,7 @@ define i32 @select_or_icmp_alt_bad_1(i32 %x, i32 %y, i32 %z) {
define i32 @select_or_icmp_alt_bad_2(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @select_or_icmp_alt_bad_2(
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 [[X:%.*]]
;
%A = icmp ne i32 %x, %z
%B = icmp eq i32 %y, %z
@@ -190,8 +190,8 @@ define i32 @select_or_icmp_alt_bad_2(i32 %x, i32 %y, i32 %z) {
define i32 @select_or_icmp_alt_bad_3(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @select_or_icmp_alt_bad_3(
-; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
-; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[X:%.*]], i32 [[Z]]
; CHECK-NEXT: ret i32 [[D]]
;
%A = icmp eq i32 %x, %z
@@ -203,8 +203,8 @@ define i32 @select_or_icmp_alt_bad_3(i32 %x, i32 %y, i32 %z) {
define i32 @select_or_icmp_alt_bad_4(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @select_or_icmp_alt_bad_4(
-; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
-; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[Z]], i32 [[X:%.*]]
; CHECK-NEXT: ret i32 [[D]]
;
%A = icmp ne i32 %x, %z
@@ -321,3 +321,11 @@ define i32 @select_or_icmp_alt_bad_false_val(i32 %x, i32 %y, i32 %z, i32 %k) {
%D = select i1 %C, i32 %x, i32 %k
ret i32 %D
}
+
+!0 = !{!"function_entry_count", i64 1000}
+!1 = !{!"branch_weights", i32 2, i32 3}
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 3, i32 2}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 2, i32 3}
+;.
diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
index 8784873..f5329cf 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
@@ -454,6 +454,132 @@ exit:
ret void
}
+declare i1 @cond()
+
+define double @test_load_used_by_other_load_scev(ptr %ptr.a, ptr %ptr.b, ptr %ptr.c) {
+; I64-LABEL: define double @test_load_used_by_other_load_scev(
+; I64-SAME: ptr [[PTR_A:%.*]], ptr [[PTR_B:%.*]], ptr [[PTR_C:%.*]]) {
+; I64-NEXT: [[ENTRY:.*]]:
+; I64-NEXT: br label %[[OUTER_LOOP:.*]]
+; I64: [[OUTER_LOOP_LOOPEXIT:.*]]:
+; I64-NEXT: br label %[[OUTER_LOOP]]
+; I64: [[OUTER_LOOP]]:
+; I64-NEXT: [[ACCUM:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP29:%.*]], %[[OUTER_LOOP_LOOPEXIT]] ]
+; I64-NEXT: [[COND:%.*]] = call i1 @cond()
+; I64-NEXT: br i1 [[COND]], label %[[INNER_LOOP_PREHEADER:.*]], label %[[EXIT:.*]]
+; I64: [[INNER_LOOP_PREHEADER]]:
+; I64-NEXT: br label %[[VECTOR_PH:.*]]
+; I64: [[VECTOR_PH]]:
+; I64-NEXT: br label %[[VECTOR_BODY:.*]]
+; I64: [[VECTOR_BODY]]:
+; I64-NEXT: [[TMP0:%.*]] = add i64 0, 1
+; I64-NEXT: [[TMP1:%.*]] = add i64 1, 1
+; I64-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR_C]], i64 [[TMP0]]
+; I64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR_C]], i64 [[TMP1]]
+; I64-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[PTR_A]], i64 [[TMP0]]
+; I64-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[PTR_A]], i64 [[TMP1]]
+; I64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8
+; I64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
+; I64-NEXT: [[TMP8:%.*]] = getelementptr double, ptr [[PTR_B]], i64 [[TMP6]]
+; I64-NEXT: [[TMP9:%.*]] = getelementptr double, ptr [[PTR_B]], i64 [[TMP7]]
+; I64-NEXT: [[TMP10:%.*]] = load double, ptr [[PTR_A]], align 8
+; I64-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP10]], i64 0
+; I64-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
+; I64-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[BROADCAST_SPLAT]], zeroinitializer
+; I64-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP2]], i64 8
+; I64-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP3]], i64 8
+; I64-NEXT: [[TMP14:%.*]] = load double, ptr [[TMP12]], align 8
+; I64-NEXT: [[TMP15:%.*]] = load double, ptr [[TMP13]], align 8
+; I64-NEXT: [[TMP16:%.*]] = insertelement <2 x double> poison, double [[TMP14]], i32 0
+; I64-NEXT: [[TMP17:%.*]] = insertelement <2 x double> [[TMP16]], double [[TMP15]], i32 1
+; I64-NEXT: [[TMP18:%.*]] = fmul <2 x double> [[TMP11]], zeroinitializer
+; I64-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x double> poison, double [[ACCUM]], i64 0
+; I64-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT1]], <2 x double> poison, <2 x i32> zeroinitializer
+; I64-NEXT: [[TMP19:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLAT2]], <2 x double> [[TMP18]], <2 x i32> <i32 1, i32 2>
+; I64-NEXT: [[TMP20:%.*]] = fmul <2 x double> [[TMP17]], zeroinitializer
+; I64-NEXT: [[TMP21:%.*]] = fadd <2 x double> [[TMP20]], zeroinitializer
+; I64-NEXT: [[TMP22:%.*]] = fadd <2 x double> [[TMP21]], splat (double 1.000000e+00)
+; I64-NEXT: [[TMP23:%.*]] = load double, ptr [[TMP8]], align 8
+; I64-NEXT: [[TMP24:%.*]] = load double, ptr [[TMP9]], align 8
+; I64-NEXT: [[TMP25:%.*]] = insertelement <2 x double> poison, double [[TMP23]], i32 0
+; I64-NEXT: [[TMP26:%.*]] = insertelement <2 x double> [[TMP25]], double [[TMP24]], i32 1
+; I64-NEXT: [[TMP27:%.*]] = fdiv <2 x double> [[TMP26]], [[TMP22]]
+; I64-NEXT: [[TMP28:%.*]] = fsub <2 x double> [[TMP19]], [[TMP27]]
+; I64-NEXT: br label %[[MIDDLE_BLOCK:.*]]
+; I64: [[MIDDLE_BLOCK]]:
+; I64-NEXT: [[TMP29]] = extractelement <2 x double> [[TMP28]], i32 1
+; I64-NEXT: br label %[[OUTER_LOOP_LOOPEXIT]]
+; I64: [[EXIT]]:
+; I64-NEXT: ret double [[ACCUM]]
+;
+; I32-LABEL: define double @test_load_used_by_other_load_scev(
+; I32-SAME: ptr [[PTR_A:%.*]], ptr [[PTR_B:%.*]], ptr [[PTR_C:%.*]]) {
+; I32-NEXT: [[ENTRY:.*]]:
+; I32-NEXT: br label %[[OUTER_LOOP:.*]]
+; I32: [[OUTER_LOOP]]:
+; I32-NEXT: [[ACCUM:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[RESULT:%.*]], %[[INNER_LOOP:.*]] ]
+; I32-NEXT: [[COND:%.*]] = call i1 @cond()
+; I32-NEXT: br i1 [[COND]], label %[[INNER_LOOP]], label %[[EXIT:.*]]
+; I32: [[INNER_LOOP]]:
+; I32-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[OUTER_LOOP]] ], [ [[IV_NEXT:%.*]], %[[INNER_LOOP]] ]
+; I32-NEXT: [[ACCUM_INNER:%.*]] = phi double [ [[ACCUM]], %[[OUTER_LOOP]] ], [ [[MUL1:%.*]], %[[INNER_LOOP]] ]
+; I32-NEXT: [[IDX_PLUS1:%.*]] = add i64 [[IV]], 1
+; I32-NEXT: [[GEP_C:%.*]] = getelementptr i8, ptr [[PTR_C]], i64 [[IDX_PLUS1]]
+; I32-NEXT: [[GEP_A_I64:%.*]] = getelementptr i64, ptr [[PTR_A]], i64 [[IDX_PLUS1]]
+; I32-NEXT: [[LOAD_IDX:%.*]] = load i64, ptr [[GEP_A_I64]], align 8
+; I32-NEXT: [[GEP_B:%.*]] = getelementptr double, ptr [[PTR_B]], i64 [[LOAD_IDX]]
+; I32-NEXT: [[LOAD_A:%.*]] = load double, ptr [[PTR_A]], align 8
+; I32-NEXT: [[ADD1:%.*]] = fadd double [[LOAD_A]], 0.000000e+00
+; I32-NEXT: [[GEP_C_OFFSET:%.*]] = getelementptr i8, ptr [[GEP_C]], i64 8
+; I32-NEXT: [[LOAD_C:%.*]] = load double, ptr [[GEP_C_OFFSET]], align 8
+; I32-NEXT: [[MUL1]] = fmul double [[ADD1]], 0.000000e+00
+; I32-NEXT: [[MUL2:%.*]] = fmul double [[LOAD_C]], 0.000000e+00
+; I32-NEXT: [[ADD2:%.*]] = fadd double [[MUL2]], 0.000000e+00
+; I32-NEXT: [[ADD3:%.*]] = fadd double [[ADD2]], 1.000000e+00
+; I32-NEXT: [[LOAD_B:%.*]] = load double, ptr [[GEP_B]], align 8
+; I32-NEXT: [[DIV:%.*]] = fdiv double [[LOAD_B]], [[ADD3]]
+; I32-NEXT: [[RESULT]] = fsub double [[ACCUM_INNER]], [[DIV]]
+; I32-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; I32-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 1
+; I32-NEXT: br i1 [[EXITCOND]], label %[[OUTER_LOOP]], label %[[INNER_LOOP]]
+; I32: [[EXIT]]:
+; I32-NEXT: ret double [[ACCUM]]
+;
+entry:
+ br label %outer.loop
+
+outer.loop:
+ %accum = phi double [ 0.0, %entry ], [ %result, %inner.loop ]
+ %cond = call i1 @cond()
+ br i1 %cond, label %inner.loop, label %exit
+
+inner.loop:
+ %iv = phi i64 [ 0, %outer.loop ], [ %iv.next, %inner.loop ]
+ %accum.inner = phi double [ %accum, %outer.loop ], [ %mul1, %inner.loop ]
+ %idx.plus1 = add i64 %iv, 1
+ %gep.c = getelementptr i8, ptr %ptr.c, i64 %idx.plus1
+ %gep.a.i64 = getelementptr i64, ptr %ptr.a, i64 %idx.plus1
+ %load.idx = load i64, ptr %gep.a.i64, align 8
+ %gep.b = getelementptr double, ptr %ptr.b, i64 %load.idx
+ %load.a = load double, ptr %ptr.a, align 8
+ %add1 = fadd double %load.a, 0.000000e+00
+ %gep.c.offset = getelementptr i8, ptr %gep.c, i64 8
+ %load.c = load double, ptr %gep.c.offset, align 8
+ %mul1 = fmul double %add1, 0.000000e+00
+ %mul2 = fmul double %load.c, 0.000000e+00
+ %add2 = fadd double %mul2, 0.000000e+00
+ %add3 = fadd double %add2, 1.000000e+00
+ %load.b = load double, ptr %gep.b, align 8
+ %div = fdiv double %load.b, %add3
+ %result = fsub double %accum.inner, %div
+ %iv.next = add i64 %iv, 1
+ %exitcond = icmp eq i64 %iv, 1
+ br i1 %exitcond, label %outer.loop, label %inner.loop
+
+exit:
+ ret double %accum
+}
+
attributes #0 = { "target-cpu"="znver2" }
!0 = distinct !{!0, !1}
diff --git a/llvm/test/Transforms/NewGVN/pr159918.ll b/llvm/test/Transforms/NewGVN/pr159918.ll
new file mode 100644
index 0000000..3fad6e6
--- /dev/null
+++ b/llvm/test/Transforms/NewGVN/pr159918.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -passes=newgvn < %s | FileCheck %s
+
+; Don't use returned argument in memory defining intrinsics.
+define void @wombat(ptr %arg) {
+; CHECK-LABEL: define void @wombat(
+; CHECK-SAME: ptr [[ARG:%.*]]) {
+; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr [[ARG]], align 8
+; CHECK-NEXT: [[CALL:%.*]] = call ptr @llvm.objc.retain(ptr [[LOAD]])
+; CHECK-NEXT: store ptr [[CALL]], ptr [[ARG]], align 8
+; CHECK-NEXT: ret void
+;
+ %load = load ptr, ptr %arg, align 8
+ %call = call ptr @llvm.objc.retain(ptr %load)
+ store ptr %call, ptr %arg, align 8
+ ret void
+}
+
+declare ptr @llvm.objc.retain(ptr returned) #0
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
index ed0bd3f..cf62fd5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
@@ -55,6 +55,54 @@ entry:
ret void
}
+define void @test_add_udiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
+; CHECK-LABEL: @test_add_udiv(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[GEP1_2:%.*]] = getelementptr i32, ptr [[ARR1:%.*]], i32 2
+; CHECK-NEXT: [[GEP1_3:%.*]] = getelementptr i32, ptr [[ARR1]], i32 3
+; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP1_2]], align 4
+; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP1_3]], align 4
+; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42
+; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[ARR1]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, i32 [[A0:%.*]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A3:%.*]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> <i32 1146, i32 146, i32 0, i32 0>, [[TMP3]]
+; CHECK-NEXT: [[RES2:%.*]] = udiv i32 [[V2]], [[Y2]]
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[RES2]], i32 2
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V3]], i32 3
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP4]]
+; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[ARR2:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %gep1.1 = getelementptr i32, ptr %arr1, i32 1
+ %gep1.2 = getelementptr i32, ptr %arr1, i32 2
+ %gep1.3 = getelementptr i32, ptr %arr1, i32 3
+ %gep2.1 = getelementptr i32, ptr %arr2, i32 1
+ %gep2.2 = getelementptr i32, ptr %arr2, i32 2
+ %gep2.3 = getelementptr i32, ptr %arr2, i32 3
+ %v0 = load i32, ptr %arr1
+ %v1 = load i32, ptr %gep1.1
+ %v2 = load i32, ptr %gep1.2
+ %v3 = load i32, ptr %gep1.3
+ %y0 = add nsw i32 %a0, 1146
+ %y1 = add nsw i32 %a1, 146
+ %y2 = add nsw i32 %a2, 42
+ %y3 = add nsw i32 %a3, 0
+ %res0 = add nsw i32 %v0, %y0
+ %res1 = add nsw i32 %v1, %y1
+ %res2 = udiv i32 %v2, %y2
+ %res3 = add nsw i32 %v3, %y3
+ store i32 %res0, ptr %arr2
+ store i32 %res1, ptr %gep2.1
+ store i32 %res2, ptr %gep2.2
+ store i32 %res3, ptr %gep2.3
+ ret void
+}
+
;; Similar test, but now div/rem is main opcode and not the alternate one. Same issue.
define void @test_urem_add(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: @test_urem_add(
@@ -114,3 +162,56 @@ entry:
store i32 %res3, ptr %gep2.3
ret void
}
+
+define void @test_srem_add(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
+; CHECK-LABEL: @test_srem_add(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[GEP1_1:%.*]] = getelementptr i32, ptr [[ARR1:%.*]], i32 1
+; CHECK-NEXT: [[GEP1_2:%.*]] = getelementptr i32, ptr [[ARR1]], i32 2
+; CHECK-NEXT: [[GEP1_3:%.*]] = getelementptr i32, ptr [[ARR1]], i32 3
+; CHECK-NEXT: [[GEP2_1:%.*]] = getelementptr i32, ptr [[ARR2:%.*]], i32 1
+; CHECK-NEXT: [[GEP2_2:%.*]] = getelementptr i32, ptr [[ARR2]], i32 2
+; CHECK-NEXT: [[GEP2_3:%.*]] = getelementptr i32, ptr [[ARR2]], i32 3
+; CHECK-NEXT: [[V0:%.*]] = load i32, ptr [[ARR1]], align 4
+; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[GEP1_1]], align 4
+; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP1_2]], align 4
+; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP1_3]], align 4
+; CHECK-NEXT: [[Y0:%.*]] = add nsw i32 [[A0:%.*]], 1146
+; CHECK-NEXT: [[Y1:%.*]] = add nsw i32 [[A1:%.*]], 146
+; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42
+; CHECK-NEXT: [[Y3:%.*]] = add nsw i32 [[A3:%.*]], 0
+; CHECK-NEXT: [[RES0:%.*]] = srem i32 [[V0]], [[Y0]]
+; CHECK-NEXT: [[RES1:%.*]] = srem i32 [[V1]], [[Y1]]
+; CHECK-NEXT: [[RES2:%.*]] = srem i32 [[V2]], [[Y2]]
+; CHECK-NEXT: [[RES3:%.*]] = add nsw i32 [[V3]], [[Y3]]
+; CHECK-NEXT: store i32 [[RES0]], ptr [[ARR2]], align 4
+; CHECK-NEXT: store i32 [[RES1]], ptr [[GEP2_1]], align 4
+; CHECK-NEXT: store i32 [[RES2]], ptr [[GEP2_2]], align 4
+; CHECK-NEXT: store i32 [[RES3]], ptr [[GEP2_3]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %gep1.1 = getelementptr i32, ptr %arr1, i32 1
+ %gep1.2 = getelementptr i32, ptr %arr1, i32 2
+ %gep1.3 = getelementptr i32, ptr %arr1, i32 3
+ %gep2.1 = getelementptr i32, ptr %arr2, i32 1
+ %gep2.2 = getelementptr i32, ptr %arr2, i32 2
+ %gep2.3 = getelementptr i32, ptr %arr2, i32 3
+ %v0 = load i32, ptr %arr1
+ %v1 = load i32, ptr %gep1.1
+ %v2 = load i32, ptr %gep1.2
+ %v3 = load i32, ptr %gep1.3
+ %y0 = add nsw i32 %a0, 1146
+ %y1 = add nsw i32 %a1, 146
+ %y2 = add nsw i32 %a2, 42
+ %y3 = add nsw i32 %a3, 0
+ %res0 = srem i32 %v0, %y0
+ %res1 = srem i32 %v1, %y1
+ %res2 = srem i32 %v2, %y2
+ %res3 = add nsw i32 %v3, %y3
+ store i32 %res0, ptr %arr2
+ store i32 %res1, ptr %gep2.1
+ store i32 %res2, ptr %gep2.2
+ store i32 %res3, ptr %gep2.3
+ ret void
+}
diff --git a/llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp b/llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp
index b6e8567..497da8f 100644
--- a/llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp
+++ b/llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp
@@ -46,8 +46,8 @@ public:
MAM.registerPass([VocabVector = std::move(VocabVector)]() mutable {
return IR2VecVocabAnalysis(std::move(VocabVector));
});
- IR2VecVocab =
- new ir2vec::Vocabulary(ir2vec::Vocabulary::createDummyVocabForTest(1));
+ IR2VecVocab = std::make_unique<ir2vec::Vocabulary>(
+ ir2vec::Vocabulary::createDummyVocabForTest(1));
MAM.registerPass([&] { return PassInstrumentationAnalysis(); });
FAM.registerPass([&] { return ModuleAnalysisManagerFunctionProxy(MAM); });
FAM.registerPass([&] { return DominatorTreeAnalysis(); });
@@ -69,7 +69,7 @@ protected:
std::unique_ptr<LoopInfo> LI;
FunctionAnalysisManager FAM;
ModuleAnalysisManager MAM;
- ir2vec::Vocabulary *IR2VecVocab;
+ std::unique_ptr<ir2vec::Vocabulary> IR2VecVocab;
void TearDown() override {
// Restore original IR2Vec weights
diff --git a/llvm/unittests/Analysis/IR2VecTest.cpp b/llvm/unittests/Analysis/IR2VecTest.cpp
index 743628f..d136cb6 100644
--- a/llvm/unittests/Analysis/IR2VecTest.cpp
+++ b/llvm/unittests/Analysis/IR2VecTest.cpp
@@ -295,7 +295,7 @@ TEST(IR2VecTest, ZeroDimensionEmbedding) {
// Fixture for IR2Vec tests requiring IR setup.
class IR2VecTestFixture : public ::testing::Test {
protected:
- Vocabulary *V;
+ std::unique_ptr<Vocabulary> V;
LLVMContext Ctx;
std::unique_ptr<Module> M;
Function *F = nullptr;
@@ -304,7 +304,7 @@ protected:
Instruction *RetInst = nullptr;
void SetUp() override {
- V = new Vocabulary(Vocabulary::createDummyVocabForTest(2));
+ V = std::make_unique<Vocabulary>(Vocabulary::createDummyVocabForTest(2));
// Setup IR
M = std::make_unique<Module>("TestM", Ctx);
diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt
index d1dfb1d..25efa00 100644
--- a/llvm/unittests/Support/CMakeLists.txt
+++ b/llvm/unittests/Support/CMakeLists.txt
@@ -52,6 +52,7 @@ add_llvm_unittest(SupportTests
IndexedAccessorTest.cpp
InstructionCostTest.cpp
InterleavedRangeTest.cpp
+ JobserverTest.cpp
JSONTest.cpp
KnownBitsTest.cpp
LEB128Test.cpp
diff --git a/llvm/unittests/Support/JobserverTest.cpp b/llvm/unittests/Support/JobserverTest.cpp
new file mode 100644
index 0000000..ddee023
--- /dev/null
+++ b/llvm/unittests/Support/JobserverTest.cpp
@@ -0,0 +1,442 @@
+//===- llvm/unittest/Support/JobserverTest.cpp ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Jobserver.h unit tests.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Jobserver.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Parallel.h"
+#include "llvm/Support/ThreadPool.h"
+#include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
+#include <future>
+#include <random>
+#include <stdlib.h>
+
+#if defined(LLVM_ON_UNIX)
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/FileSystem.h"
+#include <atomic>
+#include <condition_variable>
+#include <fcntl.h>
+#include <mutex>
+#include <sys/stat.h>
+#include <thread>
+#include <unistd.h>
+#elif defined(_WIN32)
+#include <windows.h>
+#endif
+
+#define DEBUG_TYPE "jobserver-test"
+
+using namespace llvm;
+
+namespace {
+
+// RAII helper to set an environment variable for the duration of a test.
+class ScopedEnvironment {
+ std::string Name;
+ std::string OldValue;
+ bool HadOldValue;
+
+public:
+ ScopedEnvironment(const char *Name, const char *Value) : Name(Name) {
+#if defined(_WIN32)
+ char *Old = nullptr;
+ size_t OldLen;
+ errno_t err = _dupenv_s(&Old, &OldLen, Name);
+ if (err == 0 && Old != nullptr) {
+ HadOldValue = true;
+ OldValue = Old;
+ free(Old);
+ } else {
+ HadOldValue = false;
+ }
+ _putenv_s(Name, Value);
+#else
+ const char *Old = getenv(Name);
+ if (Old) {
+ HadOldValue = true;
+ OldValue = Old;
+ } else {
+ HadOldValue = false;
+ }
+ setenv(Name, Value, 1);
+#endif
+ }
+
+ ~ScopedEnvironment() {
+#if defined(_WIN32)
+ if (HadOldValue)
+ _putenv_s(Name.c_str(), OldValue.c_str());
+ else
+ // On Windows, setting an environment variable to an empty string
+ // unsets it, making getenv() return NULL.
+ _putenv_s(Name.c_str(), "");
+#else
+ if (HadOldValue)
+ setenv(Name.c_str(), OldValue.c_str(), 1);
+ else
+ unsetenv(Name.c_str());
+#endif
+ }
+};
+
+TEST(Jobserver, Slot) {
+ // Default constructor creates an invalid slot.
+ JobSlot S1;
+ EXPECT_FALSE(S1.isValid());
+ EXPECT_FALSE(S1.isImplicit());
+
+ // Create an implicit slot.
+ JobSlot S2 = JobSlot::createImplicit();
+ EXPECT_TRUE(S2.isValid());
+ EXPECT_TRUE(S2.isImplicit());
+
+ // Create an explicit slot.
+ JobSlot S3 = JobSlot::createExplicit(42);
+ EXPECT_TRUE(S3.isValid());
+ EXPECT_FALSE(S3.isImplicit());
+
+ // Test move construction.
+ JobSlot S4 = std::move(S2);
+ EXPECT_TRUE(S4.isValid());
+ EXPECT_TRUE(S4.isImplicit());
+ EXPECT_FALSE(S2.isValid()); // S2 is now invalid.
+
+ // Test move assignment.
+ S1 = std::move(S3);
+ EXPECT_TRUE(S1.isValid());
+ EXPECT_FALSE(S1.isImplicit());
+ EXPECT_FALSE(S3.isValid()); // S3 is now invalid.
+}
+
+// Test fixture for parsing tests to ensure the singleton state is
+// reset between each test case.
+class JobserverParsingTest : public ::testing::Test {
+protected:
+ void TearDown() override { JobserverClient::resetForTesting(); }
+};
+
+TEST_F(JobserverParsingTest, NoMakeflags) {
+ // No MAKEFLAGS, should be null.
+ ScopedEnvironment Env("MAKEFLAGS", "");
+ // On Unix, setting an env var to "" makes getenv() return an empty
+ // string, not NULL. We must call unsetenv() to test the case where
+ // the variable is truly not present.
+#if !defined(_WIN32)
+ unsetenv("MAKEFLAGS");
+#endif
+ EXPECT_EQ(JobserverClient::getInstance(), nullptr);
+}
+
+TEST_F(JobserverParsingTest, EmptyMakeflags) {
+ // Empty MAKEFLAGS, should be null.
+ ScopedEnvironment Env("MAKEFLAGS", "");
+ EXPECT_EQ(JobserverClient::getInstance(), nullptr);
+}
+
+TEST_F(JobserverParsingTest, DryRunFlag) {
+ // Dry-run flag 'n', should be null.
+ ScopedEnvironment Env("MAKEFLAGS", "n -j --jobserver-auth=fifo:/tmp/foo");
+ EXPECT_EQ(JobserverClient::getInstance(), nullptr);
+}
+
+// Separate fixture for non-threaded client tests.
+class JobserverClientTest : public JobserverParsingTest {};
+
+#if defined(LLVM_ON_UNIX)
+// RAII helper to create and clean up a temporary FIFO file.
+class ScopedFifo {
+ SmallString<128> Path;
+ bool IsValid = false;
+
+public:
+ ScopedFifo() {
+ // To get a unique, non-colliding name for a FIFO, we use the
+ // createTemporaryFile function to reserve a name in the filesystem.
+ std::error_code EC =
+ sys::fs::createTemporaryFile("jobserver-test", "fifo", Path);
+ if (EC)
+ return;
+ // Then we immediately remove the regular file it created, but keep the
+ // unique path.
+ sys::fs::remove(Path);
+ // Finally, we create the FIFO at that safe, unique path.
+ if (mkfifo(Path.c_str(), 0600) != 0)
+ return;
+ IsValid = true;
+ }
+
+ ~ScopedFifo() {
+ if (IsValid)
+ sys::fs::remove(Path);
+ }
+
+ const char *c_str() const { return Path.data(); }
+ bool isValid() const { return IsValid; }
+};
+
+TEST_F(JobserverClientTest, UnixClientFifo) {
+ // This test covers basic FIFO client creation and behavior with an empty
+ // FIFO. No job tokens are available.
+ ScopedFifo F;
+ ASSERT_TRUE(F.isValid());
+
+ // Intentionally inserted \t in environment string.
+ std::string Makeflags = " \t -j4\t \t--jobserver-auth=fifo:";
+ Makeflags += F.c_str();
+ ScopedEnvironment Env("MAKEFLAGS", Makeflags.c_str());
+
+ JobserverClient *Client = JobserverClient::getInstance();
+ ASSERT_NE(Client, nullptr);
+
+ // Get the implicit token.
+ JobSlot S1 = Client->tryAcquire();
+ EXPECT_TRUE(S1.isValid());
+ EXPECT_TRUE(S1.isImplicit());
+
+ // FIFO is empty, next acquire fails.
+ JobSlot S2 = Client->tryAcquire();
+ EXPECT_FALSE(S2.isValid());
+
+ // Release does not write to the pipe for the implicit token.
+ Client->release(std::move(S1));
+
+ // Re-acquire the implicit token.
+ S1 = Client->tryAcquire();
+ EXPECT_TRUE(S1.isValid());
+}
+
+#if LLVM_ENABLE_THREADS
+// Test fixture for tests that use the jobserver strategy. It creates a
+// temporary FIFO, sets MAKEFLAGS, and provides a helper to pre-load the FIFO
+// with job tokens, simulating `make -jN`.
+class JobserverStrategyTest : public JobserverParsingTest {
+protected:
+ std::unique_ptr<ScopedFifo> TheFifo;
+ std::thread MakeThread;
+ std::atomic<bool> StopMakeThread{false};
+ // Save and restore the global parallel strategy to avoid interfering with
+ // other tests in the same process.
+ ThreadPoolStrategy SavedStrategy;
+
+ void SetUp() override {
+ SavedStrategy = parallel::strategy;
+ TheFifo = std::make_unique<ScopedFifo>();
+ ASSERT_TRUE(TheFifo->isValid());
+
+ std::string MakeFlags = "--jobserver-auth=fifo:";
+ MakeFlags += TheFifo->c_str();
+ setenv("MAKEFLAGS", MakeFlags.c_str(), 1);
+ }
+
+ void TearDown() override {
+ if (MakeThread.joinable()) {
+ StopMakeThread = true;
+ MakeThread.join();
+ }
+ unsetenv("MAKEFLAGS");
+ TheFifo.reset();
+ // Restore the original strategy to ensure subsequent tests are unaffected.
+ parallel::strategy = SavedStrategy;
+ }
+
+ // Starts a background thread that emulates `make`. It populates the FIFO
+ // with initial tokens and then recycles tokens released by clients.
+ void startMakeProxy(int NumInitialJobs) {
+ MakeThread = std::thread([this, NumInitialJobs]() {
+ LLVM_DEBUG(dbgs() << "[MakeProxy] Thread started.\n");
+ // Open the FIFO for reading and writing. This call does not block.
+ int RWFd = open(TheFifo->c_str(), O_RDWR);
+ LLVM_DEBUG(dbgs() << "[MakeProxy] Opened FIFO " << TheFifo->c_str()
+ << " with O_RDWR, FD=" << RWFd << "\n");
+ if (RWFd == -1) {
+ LLVM_DEBUG(
+ dbgs()
+ << "[MakeProxy] ERROR: Failed to open FIFO with O_RDWR. Errno: "
+ << errno << "\n");
+ return;
+ }
+
+ // Populate with initial jobs.
+ LLVM_DEBUG(dbgs() << "[MakeProxy] Writing " << NumInitialJobs
+ << " initial tokens.\n");
+ for (int i = 0; i < NumInitialJobs; ++i) {
+ if (write(RWFd, "+", 1) != 1) {
+ LLVM_DEBUG(dbgs()
+ << "[MakeProxy] ERROR: Failed to write initial token " << i
+ << ".\n");
+ close(RWFd);
+ return;
+ }
+ }
+ LLVM_DEBUG(dbgs() << "[MakeProxy] Finished writing initial tokens.\n");
+
+ // Make the read non-blocking so we can periodically check StopMakeThread.
+ int flags = fcntl(RWFd, F_GETFL, 0);
+ fcntl(RWFd, F_SETFL, flags | O_NONBLOCK);
+
+ while (!StopMakeThread) {
+ char Token;
+ ssize_t Ret = read(RWFd, &Token, 1);
+ if (Ret == 1) {
+ LLVM_DEBUG(dbgs() << "[MakeProxy] Read token '" << Token
+ << "' to recycle.\n");
+ // A client released a token, 'make' makes it available again.
+ std::this_thread::sleep_for(std::chrono::microseconds(100));
+ ssize_t WRet;
+ do {
+ WRet = write(RWFd, &Token, 1);
+ } while (WRet < 0 && errno == EINTR);
+ if (WRet <= 0) {
+ LLVM_DEBUG(
+ dbgs()
+ << "[MakeProxy] ERROR: Failed to write recycled token.\n");
+ break; // Error, stop the proxy.
+ }
+ LLVM_DEBUG(dbgs()
+ << "[MakeProxy] Wrote token '" << Token << "' back.\n");
+ } else if (Ret < 0 && errno != EAGAIN && errno != EWOULDBLOCK) {
+ LLVM_DEBUG(dbgs() << "[MakeProxy] ERROR: Read failed with errno "
+ << errno << ".\n");
+ break; // Error, stop the proxy.
+ }
+ // Yield to prevent this thread from busy-waiting.
+ std::this_thread::sleep_for(std::chrono::milliseconds(1));
+ }
+ LLVM_DEBUG(dbgs() << "[MakeProxy] Thread stopping.\n");
+ close(RWFd);
+ });
+
+ // Give the proxy thread a moment to start and populate the FIFO.
+ // This is a simple way to avoid a race condition where the client starts
+ // before the initial tokens are in the pipe.
+ std::this_thread::sleep_for(std::chrono::milliseconds(50));
+ }
+};
+
+TEST_F(JobserverStrategyTest, ThreadPoolConcurrencyIsLimited) {
+ // This test simulates `make -j3`. We will have 1 implicit job slot and
+ // we will add 2 explicit job tokens to the FIFO, for a total of 3.
+ const int NumExplicitJobs = 2;
+ const int ConcurrencyLimit = NumExplicitJobs + 1; // +1 for the implicit slot
+ const int NumTasks = 8; // More tasks than available slots.
+
+ LLVM_DEBUG(dbgs() << "Calling startMakeProxy with " << NumExplicitJobs
+ << " jobs.\n");
+ startMakeProxy(NumExplicitJobs);
+ LLVM_DEBUG(dbgs() << "MakeProxy is running.\n");
+
+ // Create the thread pool. Its constructor will call jobserver_concurrency()
+ // and create a client that reads from our pre-loaded FIFO.
+ StdThreadPool Pool(jobserver_concurrency());
+
+ std::atomic<int> ActiveTasks{0};
+ std::atomic<int> MaxActiveTasks{0};
+ std::atomic<int> CompletedTasks{0};
+ std::mutex M;
+ std::condition_variable CV;
+
+ // Dispatch more tasks than there are job slots. The pool should block
+ // and only run up to `ConcurrencyLimit` tasks at once.
+ for (int i = 0; i < NumTasks; ++i) {
+ Pool.async([&, i] {
+ // Track the number of concurrently running tasks.
+ int CurrentActive = ++ActiveTasks;
+ LLVM_DEBUG(dbgs() << "Task " << i << ": Active tasks: " << CurrentActive
+ << "\n");
+ int OldMax = MaxActiveTasks.load();
+ while (CurrentActive > OldMax)
+ MaxActiveTasks.compare_exchange_weak(OldMax, CurrentActive);
+
+ std::this_thread::sleep_for(std::chrono::milliseconds(25));
+
+ --ActiveTasks;
+ if (++CompletedTasks == NumTasks) {
+ std::lock_guard<std::mutex> Lock(M);
+ CV.notify_one();
+ }
+ });
+ }
+
+ // Wait for all tasks to complete.
+ std::unique_lock<std::mutex> Lock(M);
+ CV.wait(Lock, [&] { return CompletedTasks == NumTasks; });
+
+ LLVM_DEBUG(dbgs() << "Test finished. Max active tasks was " << MaxActiveTasks
+ << ".\n");
+ // The key assertion: the maximum number of concurrent tasks should
+ // not have exceeded the limit imposed by the jobserver.
+ EXPECT_LE(MaxActiveTasks, ConcurrencyLimit);
+ EXPECT_EQ(CompletedTasks, NumTasks);
+}
+
+TEST_F(JobserverStrategyTest, ParallelForIsLimited) {
+ // This test verifies that llvm::parallelFor respects the jobserver limit.
+ const int NumExplicitJobs = 3;
+ const int ConcurrencyLimit = NumExplicitJobs + 1; // +1 implicit
+ const int NumTasks = 20;
+
+ LLVM_DEBUG(dbgs() << "Calling startMakeProxy with " << NumExplicitJobs
+ << " jobs.\n");
+ startMakeProxy(NumExplicitJobs);
+ LLVM_DEBUG(dbgs() << "MakeProxy is running.\n");
+
+ // Set the global strategy. parallelFor will use this.
+ parallel::strategy = jobserver_concurrency();
+
+ std::atomic<int> ActiveTasks{0};
+ std::atomic<int> MaxActiveTasks{0};
+
+ parallelFor(0, NumTasks, [&](int i) {
+ int CurrentActive = ++ActiveTasks;
+ LLVM_DEBUG(dbgs() << "Task " << i << ": Active tasks: " << CurrentActive
+ << "\n");
+ int OldMax = MaxActiveTasks.load();
+ while (CurrentActive > OldMax)
+ MaxActiveTasks.compare_exchange_weak(OldMax, CurrentActive);
+
+ std::this_thread::sleep_for(std::chrono::milliseconds(20));
+ --ActiveTasks;
+ });
+
+ LLVM_DEBUG(dbgs() << "ParallelFor finished. Max active tasks was "
+ << MaxActiveTasks << ".\n");
+ EXPECT_LE(MaxActiveTasks, ConcurrencyLimit);
+}
+
+TEST_F(JobserverStrategyTest, ParallelSortIsLimited) {
+ // This test serves as an integration test to ensure parallelSort completes
+ // correctly when running under the jobserver strategy. It doesn't directly
+ // measure concurrency but verifies correctness.
+ const int NumExplicitJobs = 3;
+ startMakeProxy(NumExplicitJobs);
+
+ parallel::strategy = jobserver_concurrency();
+
+ std::vector<int> V(1024);
+ // Fill with random data
+ std::mt19937 randEngine;
+ std::uniform_int_distribution<int> dist;
+ for (int &i : V)
+ i = dist(randEngine);
+
+ parallelSort(V.begin(), V.end());
+ ASSERT_TRUE(llvm::is_sorted(V));
+}
+
+#endif // LLVM_ENABLE_THREADS
+
+#endif // defined(LLVM_ON_UNIX)
+
+} // end anonymous namespace
diff --git a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp
index 75bea77..8076ce2 100644
--- a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp
+++ b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp
@@ -246,16 +246,14 @@ bool TypeSetByHwMode::operator==(const TypeSetByHwMode &VTS) const {
return true;
}
-namespace llvm {
-raw_ostream &operator<<(raw_ostream &OS, const MachineValueTypeSet &T) {
+raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineValueTypeSet &T) {
T.writeToStream(OS);
return OS;
}
-raw_ostream &operator<<(raw_ostream &OS, const TypeSetByHwMode &T) {
+raw_ostream &llvm::operator<<(raw_ostream &OS, const TypeSetByHwMode &T) {
T.writeToStream(OS);
return OS;
}
-} // namespace llvm
LLVM_DUMP_METHOD
void TypeSetByHwMode::dump() const { dbgs() << *this << '\n'; }
diff --git a/llvm/utils/TableGen/Common/CodeGenRegisters.cpp b/llvm/utils/TableGen/Common/CodeGenRegisters.cpp
index 294f3af..8d0ec9a 100644
--- a/llvm/utils/TableGen/Common/CodeGenRegisters.cpp
+++ b/llvm/utils/TableGen/Common/CodeGenRegisters.cpp
@@ -857,17 +857,6 @@ unsigned CodeGenRegisterClass::getWeight(const CodeGenRegBank &RegBank) const {
return (*Members.begin())->getWeight(RegBank);
}
-namespace llvm {
-
-raw_ostream &operator<<(raw_ostream &OS, const CodeGenRegisterClass::Key &K) {
- OS << "{ " << K.RSI;
- for (const auto R : *K.Members)
- OS << ", " << R->getName();
- return OS << " }";
-}
-
-} // end namespace llvm
-
// This is a simple lexicographical order that can be used to search for sets.
// It is not the same as the topological order provided by TopoOrderRC.
bool CodeGenRegisterClass::Key::operator<(
diff --git a/llvm/utils/TableGen/Common/InfoByHwMode.cpp b/llvm/utils/TableGen/Common/InfoByHwMode.cpp
index a6e2fc4..4c8197d 100644
--- a/llvm/utils/TableGen/Common/InfoByHwMode.cpp
+++ b/llvm/utils/TableGen/Common/InfoByHwMode.cpp
@@ -227,19 +227,17 @@ EncodingInfoByHwMode::EncodingInfoByHwMode(const Record *R,
}
}
-namespace llvm {
-raw_ostream &operator<<(raw_ostream &OS, const ValueTypeByHwMode &T) {
+raw_ostream &llvm::operator<<(raw_ostream &OS, const ValueTypeByHwMode &T) {
T.writeToStream(OS);
return OS;
}
-raw_ostream &operator<<(raw_ostream &OS, const RegSizeInfo &T) {
+raw_ostream &llvm::operator<<(raw_ostream &OS, const RegSizeInfo &T) {
T.writeToStream(OS);
return OS;
}
-raw_ostream &operator<<(raw_ostream &OS, const RegSizeInfoByHwMode &T) {
+raw_ostream &llvm::operator<<(raw_ostream &OS, const RegSizeInfoByHwMode &T) {
T.writeToStream(OS);
return OS;
}
-} // namespace llvm
diff --git a/llvm/utils/TableGen/Common/PredicateExpander.cpp b/llvm/utils/TableGen/Common/PredicateExpander.cpp
index 09d9538..03252ed 100644
--- a/llvm/utils/TableGen/Common/PredicateExpander.cpp
+++ b/llvm/utils/TableGen/Common/PredicateExpander.cpp
@@ -14,7 +14,7 @@
#include "CodeGenSchedule.h" // Definition of STIPredicateFunction.
#include "llvm/TableGen/Record.h"
-namespace llvm {
+using namespace llvm;
void PredicateExpander::expandTrue(raw_ostream &OS) { OS << "true"; }
void PredicateExpander::expandFalse(raw_ostream &OS) { OS << "false"; }
@@ -553,5 +553,3 @@ void STIPredicateExpander::expandSTIPredicate(raw_ostream &OS,
expandEpilogue(OS, Fn);
}
}
-
-} // namespace llvm
diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index 09ce9f3..9471959 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -37,15 +37,6 @@ struct DXILIntrinsicSelect {
SmallVector<const Record *> ArgSelectRecords;
};
-static StringRef StripIntrinArgSelectTypePrefix(StringRef Type) {
- StringRef Prefix = "IntrinArgSelect_";
- if (!Type.starts_with(Prefix)) {
- PrintFatalError("IntrinArgSelectType definintion must be prefixed with "
- "'IntrinArgSelect_'");
- }
- return Type.substr(Prefix.size());
-}
-
struct DXILOperationDesc {
std::string OpName; // name of DXIL operation
int OpCode; // ID of DXIL operation
@@ -66,6 +57,15 @@ struct DXILOperationDesc {
};
} // end anonymous namespace
+static StringRef stripIntrinArgSelectTypePrefix(StringRef Type) {
+ StringRef Prefix = "IntrinArgSelect_";
+ if (!Type.starts_with(Prefix)) {
+ PrintFatalError("IntrinArgSelectType definintion must be prefixed with "
+ "'IntrinArgSelect_'");
+ }
+ return Type.substr(Prefix.size());
+}
+
/// In-place sort TableGen records of class with a field
/// Version dxil_version
/// in the ascending version order.
@@ -449,7 +449,7 @@ static void emitDXILIntrinsicMap(ArrayRef<DXILOperationDesc> Ops,
ArgSelect->getValueAsDef("type")->getNameInitAsString();
int Value = ArgSelect->getValueAsInt("value");
OS << "(IntrinArgSelect{"
- << "IntrinArgSelect::Type::" << StripIntrinArgSelectTypePrefix(Type)
+ << "IntrinArgSelect::Type::" << stripIntrinArgSelectTypePrefix(Type)
<< "," << Value << "}), ";
}
OS << ")\n";
@@ -466,7 +466,7 @@ static void emitDXILIntrinsicArgSelectTypes(const RecordKeeper &Records,
OS << "#ifdef DXIL_OP_INTRINSIC_ARG_SELECT_TYPE\n";
for (const Record *Records :
Records.getAllDerivedDefinitions("IntrinArgSelectType")) {
- StringRef StrippedName = StripIntrinArgSelectTypePrefix(Records->getName());
+ StringRef StrippedName = stripIntrinArgSelectTypePrefix(Records->getName());
OS << "DXIL_OP_INTRINSIC_ARG_SELECT_TYPE(" << StrippedName << ")\n";
}
OS << "#undef DXIL_OP_INTRINSIC_ARG_SELECT_TYPE\n";
diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 961dc28..5d41b7d 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -194,10 +194,6 @@ private:
void parseInstructionEncodings();
};
-} // end anonymous namespace
-
-namespace {
-
struct EncodingIsland {
unsigned StartBit;
unsigned NumBits;
diff --git a/llvm/utils/TableGen/ExegesisEmitter.cpp b/llvm/utils/TableGen/ExegesisEmitter.cpp
index 1b4b072..bd69919 100644
--- a/llvm/utils/TableGen/ExegesisEmitter.cpp
+++ b/llvm/utils/TableGen/ExegesisEmitter.cpp
@@ -58,6 +58,14 @@ private:
const std::map<llvm::StringRef, unsigned> PfmCounterNameTable;
};
+struct ValidationCounterInfo {
+ int64_t EventNumber;
+ StringRef EventName;
+ unsigned PfmCounterID;
+};
+
+} // namespace
+
static std::map<llvm::StringRef, unsigned>
collectPfmCounters(const RecordKeeper &Records) {
std::map<llvm::StringRef, unsigned> PfmCounterNameTable;
@@ -106,14 +114,8 @@ ExegesisEmitter::ExegesisEmitter(const RecordKeeper &RK)
Target = Targets[0]->getName().str();
}
-struct ValidationCounterInfo {
- int64_t EventNumber;
- StringRef EventName;
- unsigned PfmCounterID;
-};
-
-bool EventNumberLess(const ValidationCounterInfo &LHS,
- const ValidationCounterInfo &RHS) {
+static bool EventNumberLess(const ValidationCounterInfo &LHS,
+ const ValidationCounterInfo &RHS) {
return LHS.EventNumber < RHS.EventNumber;
}
@@ -221,7 +223,7 @@ void ExegesisEmitter::emitPfmCounters(raw_ostream &OS) const {
emitPfmCountersInfo(*Def, IssueCountersTableOffset, OS);
OS << "\n";
-} // namespace
+}
void ExegesisEmitter::emitPfmCountersLookupTable(raw_ostream &OS) const {
std::vector<const Record *> Bindings =
@@ -249,7 +251,5 @@ void ExegesisEmitter::run(raw_ostream &OS) const {
emitPfmCountersLookupTable(OS);
}
-} // end anonymous namespace
-
static TableGen::Emitter::OptClass<ExegesisEmitter>
X("gen-exegesis", "Generate llvm-exegesis tables");
diff --git a/llvm/utils/TableGen/FastISelEmitter.cpp b/llvm/utils/TableGen/FastISelEmitter.cpp
index 694d89a..dba8bde 100644
--- a/llvm/utils/TableGen/FastISelEmitter.cpp
+++ b/llvm/utils/TableGen/FastISelEmitter.cpp
@@ -52,11 +52,9 @@ struct InstructionMemo {
InstructionMemo(const InstructionMemo &Other) = delete;
InstructionMemo(InstructionMemo &&Other) = default;
};
-} // End anonymous namespace
/// ImmPredicateSet - This uniques predicates (represented as a string) and
/// gives them unique (small) integer ID's that start at 0.
-namespace {
class ImmPredicateSet {
DenseMap<TreePattern *, unsigned> ImmIDs;
std::vector<TreePredicateFn> PredsByName;
@@ -77,12 +75,10 @@ public:
iterator begin() const { return PredsByName.begin(); }
iterator end() const { return PredsByName.end(); }
};
-} // End anonymous namespace
/// OperandsSignature - This class holds a description of a list of operand
/// types. It has utility methods for emitting text based on the operands.
///
-namespace {
struct OperandsSignature {
class OpKind {
enum { OK_Reg, OK_FP, OK_Imm, OK_Invalid = -1 };
@@ -366,9 +362,7 @@ struct OperandsSignature {
Opnd.printManglingSuffix(OS, ImmPredicates, StripImmCodes);
}
};
-} // End anonymous namespace
-namespace {
class FastISelMap {
// A multimap is needed instead of a "plain" map because the key is
// the instruction's complexity (an int) and they are not unique.
diff --git a/llvm/utils/TableGen/X86DisassemblerShared.h b/llvm/utils/TableGen/X86DisassemblerShared.h
index f60fd47..d5f936d 100644
--- a/llvm/utils/TableGen/X86DisassemblerShared.h
+++ b/llvm/utils/TableGen/X86DisassemblerShared.h
@@ -14,6 +14,8 @@
#include "llvm/Support/X86DisassemblerDecoderCommon.h"
+namespace llvm::X86Disassembler {
+
struct InstructionSpecifier {
llvm::X86Disassembler::OperandSpecifier
operands[llvm::X86Disassembler::X86_MAX_OPERANDS];
@@ -52,4 +54,6 @@ struct ContextDecision {
ContextDecision() { memset(opcodeDecisions, 0, sizeof(opcodeDecisions)); }
};
+} // namespace llvm::X86Disassembler
+
#endif
diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
index 1e1e4ab..6f523b5 100644
--- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
+++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
@@ -30,22 +30,23 @@ struct ManualMapEntry {
const char *MemInstStr;
uint16_t Strategy;
};
+} // namespace
// List of instructions requiring explicitly aligned memory.
-const char *ExplicitAlign[] = {"MOVDQA", "MOVAPS", "MOVAPD", "MOVNTPS",
- "MOVNTPD", "MOVNTDQ", "MOVNTDQA"};
+static constexpr const char *ExplicitAlign[] = {
+ "MOVDQA", "MOVAPS", "MOVAPD", "MOVNTPS", "MOVNTPD", "MOVNTDQ", "MOVNTDQA"};
// List of instructions NOT requiring explicit memory alignment.
-const char *ExplicitUnalign[] = {"MOVDQU", "MOVUPS", "MOVUPD",
- "PCMPESTRM", "PCMPESTRI", "PCMPISTRM",
- "PCMPISTRI"};
+static constexpr const char *ExplicitUnalign[] = {
+ "MOVDQU", "MOVUPS", "MOVUPD", "PCMPESTRM",
+ "PCMPESTRI", "PCMPISTRM", "PCMPISTRI"};
-const ManualMapEntry ManualMapSet[] = {
+static const ManualMapEntry ManualMapSet[] = {
#define ENTRY(REG, MEM, FLAGS) {#REG, #MEM, FLAGS},
#include "X86ManualFoldTables.def"
};
-const std::set<StringRef> NoFoldSet = {
+static const std::set<StringRef> NoFoldSet = {
#define NOFOLD(INSN) #INSN,
#include "X86ManualFoldTables.def"
};
@@ -62,6 +63,7 @@ static bool isExplicitUnalign(const CodeGenInstruction *Inst) {
});
}
+namespace {
class X86FoldTablesEmitter {
const RecordKeeper &Records;
const CodeGenTarget Target;
@@ -230,6 +232,7 @@ private:
OS << "};\n\n";
}
};
+} // namespace
// Return true if one of the instruction's operands is a RST register class
static bool hasRSTRegClass(const CodeGenInstruction *Inst) {
@@ -318,6 +321,7 @@ static bool isNOREXRegClass(const Record *Op) {
// Function object - Operator() returns true if the given Reg instruction
// matches the Mem instruction of this object.
+namespace {
class IsMatch {
const CodeGenInstruction *MemInst;
const X86Disassembler::RecognizableInstrBase MemRI;
diff --git a/llvm/utils/TableGen/X86InstrMappingEmitter.cpp b/llvm/utils/TableGen/X86InstrMappingEmitter.cpp
index be5e2a7..2745ba7 100644
--- a/llvm/utils/TableGen/X86InstrMappingEmitter.cpp
+++ b/llvm/utils/TableGen/X86InstrMappingEmitter.cpp
@@ -66,6 +66,7 @@ private:
void printTable(ArrayRef<Entry> Table, StringRef Name, StringRef Macro,
raw_ostream &OS);
};
+} // namespace
void X86InstrMappingEmitter::printClassDef(raw_ostream &OS) {
OS << "struct X86TableEntry {\n"
@@ -106,6 +107,7 @@ void X86InstrMappingEmitter::printTable(ArrayRef<Entry> Table, StringRef Name,
printMacroEnd(Macro, OS);
}
+namespace {
class IsMatch {
const CodeGenInstruction *OldInst;
@@ -146,6 +148,7 @@ public:
return true;
}
};
+} // namespace
static bool isInteresting(const Record *Rec) {
// _REV instruction should not appear before encoding optimization
@@ -368,7 +371,6 @@ void X86InstrMappingEmitter::run(raw_ostream &OS) {
emitND2NonNDTable(Insts, OS);
emitSSE2AVXTable(Insts, OS);
}
-} // namespace
static TableGen::Emitter::OptClass<X86InstrMappingEmitter>
X("gen-x86-instr-mapping", "Generate X86 instruction mapping");
diff --git a/llvm/utils/TableGen/X86MnemonicTables.cpp b/llvm/utils/TableGen/X86MnemonicTables.cpp
index 85bd4df..7851919 100644
--- a/llvm/utils/TableGen/X86MnemonicTables.cpp
+++ b/llvm/utils/TableGen/X86MnemonicTables.cpp
@@ -30,6 +30,7 @@ public:
// Output X86 mnemonic tables.
void run(raw_ostream &OS);
};
+} // namespace
void X86MnemonicTablesEmitter::run(raw_ostream &OS) {
emitSourceFileHeader("X86 Mnemonic tables", OS);
@@ -83,7 +84,5 @@ void X86MnemonicTablesEmitter::run(raw_ostream &OS) {
OS << "} // end namespace X86\n} // end namespace llvm";
}
-} // namespace
-
static TableGen::Emitter::OptClass<X86MnemonicTablesEmitter>
X("gen-x86-mnemonic-tables", "Generate X86 mnemonic tables");
diff --git a/llvm/utils/TableGen/X86ModRMFilters.h b/llvm/utils/TableGen/X86ModRMFilters.h
index b579f22..7bf111f 100644
--- a/llvm/utils/TableGen/X86ModRMFilters.h
+++ b/llvm/utils/TableGen/X86ModRMFilters.h
@@ -19,9 +19,7 @@
#include <cstdint>
-namespace llvm {
-
-namespace X86Disassembler {
+namespace llvm::X86Disassembler {
/// ModRMFilter - Abstract base class for clases that recognize patterns in
/// ModR/M bytes.
@@ -135,8 +133,6 @@ public:
bool accepts(uint8_t modRM) const override { return (ModRM == modRM); }
};
-} // namespace X86Disassembler
-
-} // namespace llvm
+} // namespace llvm::X86Disassembler
#endif
diff --git a/llvm/utils/TableGen/X86RecognizableInstr.h b/llvm/utils/TableGen/X86RecognizableInstr.h
index b74e74d..52f9538 100644
--- a/llvm/utils/TableGen/X86RecognizableInstr.h
+++ b/llvm/utils/TableGen/X86RecognizableInstr.h
@@ -22,8 +22,6 @@
#include <string>
#include <vector>
-struct InstructionSpecifier;
-
namespace llvm {
class Record;
#define X86_INSTR_MRM_MAPPING \
@@ -179,6 +177,8 @@ enum { ExplicitREX2 = 1, ExplicitEVEX = 3 };
namespace X86Disassembler {
class DisassemblerTables;
+struct InstructionSpecifier;
+
/// Extract common fields of a single X86 instruction from a CodeGenInstruction
struct RecognizableInstrBase {
/// The OpPrefix field from the record
diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/sources.gni b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/sources.gni
index 2ab2a0e..5d1fb02 100644
--- a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/sources.gni
+++ b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/sources.gni
@@ -529,7 +529,7 @@ if (current_cpu == "ve") {
if (current_cpu == "wasm") {
builtins_sources += [
"wasm/__c_longjmp.S",
- "wasm/__cpp_exceptions.S",
+ "wasm/__cpp_exception.S",
]
}