aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/libcxx-build-and-test.yaml4
-rw-r--r--clang/include/clang/CIR/Dialect/IR/CIROps.td17
-rw-r--r--clang/include/clang/CIR/MissingFeatures.h7
-rw-r--r--clang/include/clang/Driver/Options.td5
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp26
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp82
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenExpr.cpp20
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenFunction.cpp5
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenFunction.h23
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenModule.cpp61
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenModule.h9
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenStmt.cpp1
-rw-r--r--clang/lib/CIR/CodeGen/CMakeLists.txt1
-rw-r--r--clang/lib/CIR/Dialect/IR/CIRDialect.cpp13
-rw-r--r--clang/lib/Driver/ToolChains/Clang.cpp22
-rw-r--r--clang/lib/Format/Format.cpp2
-rw-r--r--clang/lib/Format/FormatToken.h11
-rw-r--r--clang/lib/Format/TokenAnnotator.cpp21
-rw-r--r--clang/lib/StaticAnalyzer/Core/CMakeLists.txt1
-rw-r--r--clang/lib/Tooling/DependencyScanning/CMakeLists.txt1
-rw-r--r--clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp279
-rw-r--r--clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.h84
-rw-r--r--clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp189
-rw-r--r--clang/test/CIR/CodeGen/coro-task.cpp123
-rw-r--r--clang/test/CIR/CodeGen/predefined-expr.c71
-rw-r--r--clang/test/CIR/IR/func.cir11
-rw-r--r--clang/test/CodeGenHLSL/resources/AppendStructuredBuffer-elementtype.hlsl54
-rw-r--r--clang/test/CodeGenHLSL/resources/ConsumeStructuredBuffer-elementtype.hlsl54
-rw-r--r--clang/test/CodeGenHLSL/resources/RWStructuredBuffer-elementtype.hlsl74
-rw-r--r--clang/test/CodeGenHLSL/resources/StructuredBuffer-elementtype.hlsl61
-rw-r--r--clang/test/CodeGenHLSL/resources/StructuredBuffers-elementtype.hlsl113
-rw-r--r--clang/test/Driver/dxc_frs.hlsl3
-rw-r--r--clang/test/Driver/dxc_rootsignature_target.hlsl2
-rw-r--r--clang/test/Driver/hip-options.hip6
-rw-r--r--clang/test/Driver/linker-wrapper.c2
-rw-r--r--clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp18
-rw-r--r--clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td3
-rw-r--r--clang/unittests/Format/FormatTestObjC.cpp9
-rw-r--r--clang/unittests/Format/TokenAnnotatorTest.cpp31
-rw-r--r--compiler-rt/lib/builtins/CMakeLists.txt2
-rw-r--r--flang-rt/lib/runtime/CMakeLists.txt3
-rw-r--r--flang-rt/lib/runtime/extrema.cpp9
-rw-r--r--flang-rt/lib/runtime/findloc.cpp11
-rw-r--r--flang/include/flang/Evaluate/characteristics.h9
-rw-r--r--flang/include/flang/Parser/parse-tree.h1
-rw-r--r--flang/include/flang/Semantics/scope.h2
-rw-r--r--flang/include/flang/Semantics/symbol.h6
-rw-r--r--flang/include/flang/Semantics/tools.h2
-rw-r--r--flang/include/flang/Semantics/type.h3
-rw-r--r--flang/lib/Evaluate/characteristics.cpp14
-rw-r--r--flang/lib/Evaluate/tools.cpp9
-rw-r--r--flang/lib/Lower/OpenACC.cpp3
-rw-r--r--flang/lib/Parser/Fortran-parsers.cpp4
-rw-r--r--flang/lib/Semantics/check-call.cpp71
-rw-r--r--flang/lib/Semantics/check-declarations.cpp136
-rw-r--r--flang/lib/Semantics/expression.cpp2
-rw-r--r--flang/lib/Semantics/resolve-directives.cpp2
-rw-r--r--flang/lib/Semantics/resolve-names.cpp179
-rw-r--r--flang/lib/Semantics/scope.cpp5
-rw-r--r--flang/lib/Semantics/semantics.cpp6
-rw-r--r--flang/lib/Semantics/tools.cpp5
-rw-r--r--flang/lib/Semantics/type.cpp23
-rw-r--r--flang/test/Semantics/boz-literal-constants.f902
-rw-r--r--flang/test/Semantics/call13.f902
-rw-r--r--flang/test/Semantics/cuf24.cuf40
-rw-r--r--flang/test/Semantics/declarations01.f902
-rw-r--r--flang/test/Semantics/declarations08.f902
-rw-r--r--flang/test/Semantics/modfile80.F9025
-rw-r--r--flang/test/Semantics/null01.f904
-rw-r--r--flang/test/Semantics/resolve42.f9038
-rw-r--r--lld/MachO/Config.h1
-rw-r--r--lld/MachO/Driver.cpp2
-rw-r--r--lld/MachO/Options.td4
-rw-r--r--lld/MachO/SyntheticSections.cpp60
-rw-r--r--lld/docs/ReleaseNotes.rst2
-rw-r--r--lld/test/MachO/cstring-tailmerge-objc.s144
-rw-r--r--lld/test/MachO/cstring-tailmerge.s85
-rw-r--r--lld/test/MachO/order-file-cstring-tailmerge.s56
-rw-r--r--lldb/include/lldb/Host/JSONTransport.h626
-rw-r--r--lldb/include/lldb/Protocol/MCP/MCPError.h5
-rw-r--r--lldb/include/lldb/Protocol/MCP/Protocol.h5
-rw-r--r--lldb/include/lldb/Protocol/MCP/Server.h72
-rw-r--r--lldb/include/lldb/Protocol/MCP/Transport.h60
-rw-r--r--lldb/include/lldb/Target/Language.h7
-rw-r--r--lldb/packages/Python/lldbsuite/test/dotest.py5
-rw-r--r--lldb/source/Host/common/JSONTransport.cpp26
-rw-r--r--lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp52
-rw-r--r--lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.h20
-rw-r--r--lldb/source/Protocol/MCP/MCPError.cpp9
-rw-r--r--lldb/source/Protocol/MCP/Server.cpp209
-rw-r--r--lldb/source/Target/Language.cpp30
-rw-r--r--lldb/test/API/tools/lldb-server/TestLldbGdbServer.py4
-rw-r--r--lldb/tools/lldb-dap/DAP.h6
-rw-r--r--lldb/tools/lldb-dap/Protocol/ProtocolBase.h6
-rw-r--r--lldb/tools/lldb-dap/Transport.h11
-rw-r--r--lldb/unittests/DAP/DAPTest.cpp20
-rw-r--r--lldb/unittests/DAP/Handler/DisconnectTest.cpp4
-rw-r--r--lldb/unittests/DAP/TestBase.cpp42
-rw-r--r--lldb/unittests/DAP/TestBase.h123
-rw-r--r--lldb/unittests/Host/JSONTransportTest.cpp478
-rw-r--r--lldb/unittests/Host/posix/HostTest.cpp19
-rw-r--r--lldb/unittests/Protocol/ProtocolMCPServerTest.cpp307
-rw-r--r--lldb/unittests/Target/CMakeLists.txt1
-rw-r--r--lldb/unittests/Target/Language.cpp69
-rw-r--r--lldb/unittests/TestingSupport/Host/JSONTransportTestUtilities.h100
-rw-r--r--llvm/docs/DirectX/DXContainer.rst24
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAG.h8
-rw-r--r--llvm/include/llvm/CodeGen/TargetLowering.h17
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h6
-rw-r--r--llvm/include/llvm/IR/IntrinsicsAArch64.td10
-rw-r--r--llvm/include/llvm/Support/Jobserver.h162
-rw-r--r--llvm/include/llvm/Support/ThreadPool.h4
-rw-r--r--llvm/include/llvm/Support/Threading.h18
-rw-r--r--llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h6
-rw-r--r--llvm/include/llvm/Target/GenericOpcodes.td2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp7
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp39
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp15
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp25
-rw-r--r--llvm/lib/Support/CMakeLists.txt1
-rw-r--r--llvm/lib/Support/Jobserver.cpp259
-rw-r--r--llvm/lib/Support/Parallel.cpp98
-rw-r--r--llvm/lib/Support/ThreadPool.cpp108
-rw-r--r--llvm/lib/Support/Threading.cpp5
-rw-r--r--llvm/lib/Support/Unix/Jobserver.inc195
-rw-r--r--llvm/lib/Support/Windows/Jobserver.inc79
-rw-r--r--llvm/lib/TableGen/Error.cpp58
-rw-r--r--llvm/lib/TableGen/Main.cpp4
-rw-r--r--llvm/lib/TableGen/Record.cpp6
-rw-r--r--llvm/lib/TableGen/TGParser.cpp6
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp15
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrFormats.td104
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp4
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp3
-rw-r--r--llvm/lib/Target/AArch64/SMEInstrFormats.td12
-rw-r--r--llvm/lib/Target/AArch64/SVEInstrFormats.td106
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp20
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp25
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h5
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp26
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h1
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp4
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVGISel.td12
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp6
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp4
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td4
-rw-r--r--llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp4
-rw-r--r--llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp16
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp25
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp10
-rw-r--r--llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp178
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp10
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp20
-rw-r--r--llvm/lib/Transforms/Scalar/NewGVN.cpp4
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp7
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll4
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-sextinreg.mir3
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-vashr-vlshr.mir9
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/select-neon-vcvtfxu2fp.mir3
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir6
-rw-r--r--llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll6
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll204
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll12
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-vabs.ll24
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-vadd.ll341
-rw-r--r--llvm/test/CodeGen/AArch64/combine-sdiv.ll137
-rw-r--r--llvm/test/CodeGen/AArch64/extract-vector-elt.ll4
-rw-r--r--llvm/test/CodeGen/AArch64/fcmp.ll18
-rw-r--r--llvm/test/CodeGen/AArch64/fpclamptosat.ll55
-rw-r--r--llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll101
-rw-r--r--llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll12
-rw-r--r--llvm/test/CodeGen/AArch64/neon-compare-instructions.ll113
-rw-r--r--llvm/test/CodeGen/AArch64/neon-shift-left-long.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/select_cc.ll4
-rw-r--r--llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll8
-rw-r--r--llvm/test/CodeGen/AMDGPU/amdgpu-attributor-flat-scratch-init-asan.ll24
-rw-r--r--llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/true16-fold.mir25
-rw-r--r--llvm/test/CodeGen/ARM/fpclamptosat.ll48
-rw-r--r--llvm/test/CodeGen/ARM/fpclamptosat_vec.ll107
-rw-r--r--llvm/test/CodeGen/Hexagon/inst_setcc_uno_uo.ll93
-rw-r--r--llvm/test/CodeGen/Hexagon/isel-fold-shl-zext.ll12
-rw-r--r--llvm/test/CodeGen/RISCV/fpclamptosat.ll58
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll202
-rw-r--r--llvm/test/CodeGen/SPIRV/pointers/ptrcast-bitcast.ll28
-rw-r--r--llvm/test/CodeGen/WebAssembly/fpclamptosat.ll89
-rw-r--r--llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll78
-rw-r--r--llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll6
-rw-r--r--llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll6
-rw-r--r--llvm/test/CodeGen/X86/fpclamptosat.ll45
-rw-r--r--llvm/test/CodeGen/X86/fpclamptosat_vec.ll105
-rw-r--r--llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll8
-rw-r--r--llvm/test/CodeGen/X86/vec-strict-cmp-128.ll32
-rw-r--r--llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll10
-rw-r--r--llvm/test/CodeGen/X86/vector-fshl-sub128.ll10
-rw-r--r--llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll10
-rw-r--r--llvm/test/CodeGen/X86/vector-fshr-sub128.ll10
-rw-r--r--llvm/test/CodeGen/X86/vector-sext.ll4
-rw-r--r--llvm/test/CodeGen/X86/vector-zext.ll6
-rw-r--r--llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll8
-rw-r--r--llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll8
-rw-r--r--llvm/test/ThinLTO/X86/memprof-dups.ll138
-rw-r--r--llvm/test/ThinLTO/X86/memprof_imported_internal.ll10
-rw-r--r--llvm/test/Transforms/InstCombine/masked_intrinsics.ll58
-rw-r--r--llvm/test/Transforms/InstCombine/pr83947.ll2
-rw-r--r--llvm/test/Transforms/InstCombine/select-and-cmp.ll44
-rw-r--r--llvm/test/Transforms/InstCombine/select-or-cmp.ll50
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll126
-rw-r--r--llvm/test/Transforms/NewGVN/pr159918.ll21
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll101
-rw-r--r--llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp6
-rw-r--r--llvm/unittests/Analysis/IR2VecTest.cpp4
-rw-r--r--llvm/unittests/Support/CMakeLists.txt1
-rw-r--r--llvm/unittests/Support/JobserverTest.cpp442
-rw-r--r--llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp6
-rw-r--r--llvm/utils/TableGen/Common/CodeGenRegisters.cpp11
-rw-r--r--llvm/utils/TableGen/Common/InfoByHwMode.cpp8
-rw-r--r--llvm/utils/TableGen/Common/PredicateExpander.cpp4
-rw-r--r--llvm/utils/TableGen/DXILEmitter.cpp22
-rw-r--r--llvm/utils/TableGen/DecoderEmitter.cpp4
-rw-r--r--llvm/utils/TableGen/ExegesisEmitter.cpp22
-rw-r--r--llvm/utils/TableGen/FastISelEmitter.cpp6
-rw-r--r--llvm/utils/TableGen/X86DisassemblerShared.h4
-rw-r--r--llvm/utils/TableGen/X86FoldTablesEmitter.cpp18
-rw-r--r--llvm/utils/TableGen/X86InstrMappingEmitter.cpp4
-rw-r--r--llvm/utils/TableGen/X86MnemonicTables.cpp3
-rw-r--r--llvm/utils/TableGen/X86ModRMFilters.h8
-rw-r--r--llvm/utils/TableGen/X86RecognizableInstr.h4
-rw-r--r--llvm/utils/gn/secondary/compiler-rt/lib/builtins/sources.gni2
-rw-r--r--llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn1
-rw-r--r--llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn1
-rw-r--r--mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td53
-rw-r--r--mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td4
-rw-r--r--mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp23
-rw-r--r--mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp2
-rw-r--r--mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp67
-rw-r--r--mlir/test/Dialect/OpenACC/ops.mlir36
-rw-r--r--mlir/test/Dialect/XeGPU/subgroup-distribute-unit.mlir575
-rw-r--r--mlir/test/Dialect/XeGPU/subgroup-distribute.mlir570
-rw-r--r--mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp30
-rw-r--r--openmp/runtime/test/transform/tile/intfor.F90 (renamed from openmp/runtime/test/transform/tile/intfor.f90)1
-rw-r--r--utils/bazel/llvm-project-overlay/clang/BUILD.bazel1
245 files changed, 7957 insertions, 3244 deletions
diff --git a/.github/workflows/libcxx-build-and-test.yaml b/.github/workflows/libcxx-build-and-test.yaml
index 1c07a0a..77f79a8 100644
--- a/.github/workflows/libcxx-build-and-test.yaml
+++ b/.github/workflows/libcxx-build-and-test.yaml
@@ -281,6 +281,10 @@ jobs:
- name: Set up the MSVC dev environment
if: ${{ matrix.mingw != true }}
uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1.13.0
+ - name: Add the installed Clang at the start of the path
+ if: ${{ matrix.mingw != true }}
+ run: |
+ echo "c:\Program Files\LLVM\bin" | Out-File -FilePath $Env:GITHUB_PATH -Encoding utf8 -Append
- name: Build and test
run: |
bash libcxx/utils/ci/run-buildbot ${{ matrix.config }}
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index 0a78492..7f2e55d 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -2341,6 +2341,12 @@ def CIR_FuncOp : CIR_Op<"func", [
The function linkage information is specified by `linkage`, as defined by
`GlobalLinkageKind` attribute.
+ A compiler builtin function must be marked as `builtin` for further
+ processing when lowering from CIR.
+
+ The `coroutine` keyword is used to mark a coroutine function, which requires
+ at least one `cir.await` instruction to be used in its body.
+
The `lambda` translates to a C++ `operator()` that implements a lambda, this
allow callsites to make certain assumptions about the real function nature
when writing analysis.
@@ -2362,11 +2368,22 @@ def CIR_FuncOp : CIR_Op<"func", [
// Linkage information
cir.func linkonce_odr @some_method(...)
```
+ // Builtin function
+ cir.func builtin @__builtin_coro_end(!cir.ptr<i8>, !cir.bool) -> !cir.bool
+ // Coroutine
+ cir.func coroutine @_Z10silly_taskv() -> !CoroTask {
+ ...
+ cir.await(...)
+ ...
+ }
+ ```
}];
let arguments = (ins SymbolNameAttr:$sym_name,
CIR_VisibilityAttr:$global_visibility,
TypeAttrOf<CIR_FuncType>:$function_type,
+ UnitAttr:$builtin,
+ UnitAttr:$coroutine,
UnitAttr:$lambda,
UnitAttr:$no_proto,
UnitAttr:$dso_local,
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index 3dfcafc..0e7cec4 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -136,6 +136,13 @@ struct MissingFeatures {
static bool recordZeroInitPadding() { return false; }
static bool zeroSizeRecordMembers() { return false; }
+ // Coroutines
+ static bool coroAllocBuiltinCall() { return false; }
+ static bool coroBeginBuiltinCall() { return false; }
+ static bool coroEndBuiltinCall() { return false; }
+ static bool coroSizeBuiltinCall() { return false; }
+ static bool coroutineFrame() { return false; }
+
// Various handling of deferred processing in CIRGenModule.
static bool cgmRelease() { return false; }
static bool deferredVtables() { return false; }
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 2ef6098..5a48f0b 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1258,8 +1258,9 @@ def offload_compression_level_EQ : Joined<["--"], "offload-compression-level=">,
HelpText<"Compression level for offload device binaries (HIP only)">;
def offload_jobs_EQ : Joined<["--"], "offload-jobs=">,
- HelpText<"Specify the number of threads to use for device offloading tasks"
- " during compilation.">;
+ HelpText<"Specify the number of threads to use for device offloading tasks "
+ "during compilation. Can be a positive integer or the string "
+ "'jobserver' to use the make-style jobserver from the environment.">;
defm offload_via_llvm : BoolFOption<"offload-via-llvm",
LangOpts<"OffloadViaLLVM">, DefaultFalse,
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index cf17de1..4cfa91e 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -428,6 +428,32 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
return emitUnaryFPBuiltin<cir::ATanOp>(*this, *e);
case Builtin::BI__builtin_elementwise_cos:
return emitUnaryFPBuiltin<cir::CosOp>(*this, *e);
+ case Builtin::BI__builtin_coro_id:
+ case Builtin::BI__builtin_coro_promise:
+ case Builtin::BI__builtin_coro_resume:
+ case Builtin::BI__builtin_coro_noop:
+ case Builtin::BI__builtin_coro_destroy:
+ case Builtin::BI__builtin_coro_done:
+ case Builtin::BI__builtin_coro_alloc:
+ case Builtin::BI__builtin_coro_begin:
+ case Builtin::BI__builtin_coro_end:
+ case Builtin::BI__builtin_coro_suspend:
+ case Builtin::BI__builtin_coro_align:
+ cgm.errorNYI(e->getSourceRange(), "BI__builtin_coro_id like NYI");
+ return getUndefRValue(e->getType());
+
+ case Builtin::BI__builtin_coro_frame: {
+ cgm.errorNYI(e->getSourceRange(), "BI__builtin_coro_frame NYI");
+ assert(!cir::MissingFeatures::coroutineFrame());
+ return getUndefRValue(e->getType());
+ }
+ case Builtin::BI__builtin_coro_free:
+ case Builtin::BI__builtin_coro_size: {
+ cgm.errorNYI(e->getSourceRange(),
+ "BI__builtin_coro_free, BI__builtin_coro_size NYI");
+ assert(!cir::MissingFeatures::coroSizeBuiltinCall());
+ return getUndefRValue(e->getType());
+ }
}
// If this is an alias for a lib function (e.g. __builtin_sin), emit
diff --git a/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp b/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp
new file mode 100644
index 0000000..c25cce4
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenCoroutine.cpp
@@ -0,0 +1,82 @@
+//===----- CGCoroutine.cpp - Emit CIR Code for C++ coroutines -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code dealing with C++ code generation of coroutines.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRGenFunction.h"
+#include "mlir/Support/LLVM.h"
+#include "clang/AST/StmtCXX.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
+
+using namespace clang;
+using namespace clang::CIRGen;
+
+struct clang::CIRGen::CGCoroData {
+ // Stores the __builtin_coro_id emitted in the function so that we can supply
+ // it as the first argument to other builtins.
+ cir::CallOp coroId = nullptr;
+};
+
+// Defining these here allows to keep CGCoroData private to this file.
+CIRGenFunction::CGCoroInfo::CGCoroInfo() {}
+CIRGenFunction::CGCoroInfo::~CGCoroInfo() {}
+
+static void createCoroData(CIRGenFunction &cgf,
+ CIRGenFunction::CGCoroInfo &curCoro,
+ cir::CallOp coroId) {
+ assert(!curCoro.data && "EmitCoroutineBodyStatement called twice?");
+
+ curCoro.data = std::make_unique<CGCoroData>();
+ curCoro.data->coroId = coroId;
+}
+
+cir::CallOp CIRGenFunction::emitCoroIDBuiltinCall(mlir::Location loc,
+ mlir::Value nullPtr) {
+ cir::IntType int32Ty = builder.getUInt32Ty();
+
+ const TargetInfo &ti = cgm.getASTContext().getTargetInfo();
+ unsigned newAlign = ti.getNewAlign() / ti.getCharWidth();
+
+ mlir::Operation *builtin = cgm.getGlobalValue(cgm.builtinCoroId);
+
+ cir::FuncOp fnOp;
+ if (!builtin) {
+ fnOp = cgm.createCIRBuiltinFunction(
+ loc, cgm.builtinCoroId,
+ cir::FuncType::get({int32Ty, VoidPtrTy, VoidPtrTy, VoidPtrTy}, int32Ty),
+ /*FD=*/nullptr);
+ assert(fnOp && "should always succeed");
+ } else {
+ fnOp = cast<cir::FuncOp>(builtin);
+ }
+
+ return builder.createCallOp(loc, fnOp,
+ mlir::ValueRange{builder.getUInt32(newAlign, loc),
+ nullPtr, nullPtr, nullPtr});
+}
+
+mlir::LogicalResult
+CIRGenFunction::emitCoroutineBody(const CoroutineBodyStmt &s) {
+ mlir::Location openCurlyLoc = getLoc(s.getBeginLoc());
+ cir::ConstantOp nullPtrCst = builder.getNullPtr(VoidPtrTy, openCurlyLoc);
+
+ auto fn = mlir::cast<cir::FuncOp>(curFn);
+ fn.setCoroutine(true);
+ cir::CallOp coroId = emitCoroIDBuiltinCall(openCurlyLoc, nullPtrCst);
+ createCoroData(*this, curCoro, coroId);
+
+ assert(!cir::MissingFeatures::coroAllocBuiltinCall());
+
+ assert(!cir::MissingFeatures::coroBeginBuiltinCall());
+
+ assert(!cir::MissingFeatures::generateDebugInfo());
+ return mlir::success();
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index fa68ad9..b4c8924 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -1108,8 +1108,9 @@ CIRGenFunction::emitArraySubscriptExpr(const clang::ArraySubscriptExpr *e) {
return lv;
}
-LValue CIRGenFunction::emitStringLiteralLValue(const StringLiteral *e) {
- cir::GlobalOp globalOp = cgm.getGlobalForStringLiteral(e);
+LValue CIRGenFunction::emitStringLiteralLValue(const StringLiteral *e,
+ llvm::StringRef name) {
+ cir::GlobalOp globalOp = cgm.getGlobalForStringLiteral(e, name);
assert(globalOp.getAlignment() && "expected alignment for string literal");
unsigned align = *(globalOp.getAlignment());
mlir::Value addr =
@@ -2372,6 +2373,21 @@ mlir::Value CIRGenFunction::emitScalarConstant(
return builder.getConstant(getLoc(e->getSourceRange()), constant.getValue());
}
+LValue CIRGenFunction::emitPredefinedLValue(const PredefinedExpr *e) {
+ const StringLiteral *sl = e->getFunctionName();
+ assert(sl != nullptr && "No StringLiteral name in PredefinedExpr");
+ auto fn = cast<cir::FuncOp>(curFn);
+ StringRef fnName = fn.getName();
+ fnName.consume_front("\01");
+ std::array<StringRef, 2> nameItems = {
+ PredefinedExpr::getIdentKindName(e->getIdentKind()), fnName};
+ std::string gvName = llvm::join(nameItems, ".");
+ if (isa_and_nonnull<BlockDecl>(curCodeDecl))
+ cgm.errorNYI(e->getSourceRange(), "predefined lvalue in block");
+
+ return emitStringLiteralLValue(sl, gvName);
+}
+
/// An LValue is a candidate for having its loads and stores be made atomic if
/// we are operating under /volatile:ms *and* the LValue itself is volatile and
/// performing such an operation can be performed without a libcall.
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
index b26b4f2..52fb0d7 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
@@ -342,6 +342,9 @@ void CIRGenFunction::LexicalScope::cleanup() {
cir::ReturnOp CIRGenFunction::LexicalScope::emitReturn(mlir::Location loc) {
CIRGenBuilderTy &builder = cgf.getBuilder();
+ // If we are on a coroutine, add the coro_end builtin call.
+ assert(!cir::MissingFeatures::coroEndBuiltinCall());
+
auto fn = dyn_cast<cir::FuncOp>(cgf.curFn);
assert(fn && "emitReturn from non-function");
if (!fn.getFunctionType().hasVoidReturn()) {
@@ -815,6 +818,8 @@ LValue CIRGenFunction::emitLValue(const Expr *e) {
return emitMemberExpr(cast<MemberExpr>(e));
case Expr::CompoundLiteralExprClass:
return emitCompoundLiteralLValue(cast<CompoundLiteralExpr>(e));
+ case Expr::PredefinedExprClass:
+ return emitPredefinedLValue(cast<PredefinedExpr>(e));
case Expr::BinaryOperatorClass:
return emitBinaryOperatorLValue(cast<BinaryOperator>(e));
case Expr::CompoundAssignOperatorClass: {
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index cb7cf98..dfd9d2c 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -47,6 +47,8 @@ class LoopOp;
namespace clang::CIRGen {
+struct CGCoroData;
+
class CIRGenFunction : public CIRGenTypeCache {
public:
CIRGenModule &cgm;
@@ -66,6 +68,18 @@ public:
/// The compiler-generated variable that holds the return value.
std::optional<mlir::Value> fnRetAlloca;
+ // Holds coroutine data if the current function is a coroutine. We use a
+ // wrapper to manage its lifetime, so that we don't have to define CGCoroData
+ // in this header.
+ struct CGCoroInfo {
+ std::unique_ptr<CGCoroData> data;
+ CGCoroInfo();
+ ~CGCoroInfo();
+ };
+ CGCoroInfo curCoro;
+
+ bool isCoroutine() const { return curCoro.data != nullptr; }
+
/// The temporary alloca to hold the return value. This is
/// invalid iff the function has no return value.
Address returnValue = Address::invalid();
@@ -1174,6 +1188,10 @@ public:
void emitConstructorBody(FunctionArgList &args);
+ mlir::LogicalResult emitCoroutineBody(const CoroutineBodyStmt &s);
+ cir::CallOp emitCoroEndBuiltinCall(mlir::Location loc, mlir::Value nullPtr);
+ cir::CallOp emitCoroIDBuiltinCall(mlir::Location loc, mlir::Value nullPtr);
+
void emitDestroy(Address addr, QualType type, Destroyer *destroyer);
void emitDestructorBody(FunctionArgList &args);
@@ -1279,6 +1297,8 @@ public:
void emitInitializerForField(clang::FieldDecl *field, LValue lhs,
clang::Expr *init);
+ LValue emitPredefinedLValue(const PredefinedExpr *e);
+
mlir::Value emitPromotedComplexExpr(const Expr *e, QualType promotionType);
mlir::Value emitPromotedScalarExpr(const Expr *e, QualType promotionType);
@@ -1473,7 +1493,8 @@ public:
mlir::Value emitStoreThroughBitfieldLValue(RValue src, LValue dstresult);
- LValue emitStringLiteralLValue(const StringLiteral *e);
+ LValue emitStringLiteralLValue(const StringLiteral *e,
+ llvm::StringRef name = ".str");
mlir::LogicalResult emitSwitchBody(const clang::Stmt *s);
mlir::LogicalResult emitSwitchCase(const clang::SwitchCase &s,
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index 2bd2729..8485564 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -1343,32 +1343,36 @@ cir::GlobalOp CIRGenModule::getGlobalForStringLiteral(const StringLiteral *s,
mlir::Attribute c = getConstantArrayFromStringLiteral(s);
- if (getLangOpts().WritableStrings) {
- errorNYI(s->getSourceRange(),
- "getGlobalForStringLiteral: Writable strings");
- }
-
- // Mangle the string literal if that's how the ABI merges duplicate strings.
- // Don't do it if they are writable, since we don't want writes in one TU to
- // affect strings in another.
- if (getCXXABI().getMangleContext().shouldMangleStringLiteral(s) &&
- !getLangOpts().WritableStrings) {
- errorNYI(s->getSourceRange(),
- "getGlobalForStringLiteral: mangle string literals");
- }
-
- // Unlike LLVM IR, CIR doesn't automatically unique names for globals, so
- // we need to do that explicitly.
- std::string uniqueName = getUniqueGlobalName(name.str());
- mlir::Location loc = getLoc(s->getSourceRange());
- auto typedC = llvm::cast<mlir::TypedAttr>(c);
- cir::GlobalOp gv =
- generateStringLiteral(loc, typedC, cir::GlobalLinkageKind::PrivateLinkage,
- *this, uniqueName, alignment);
- setDSOLocal(static_cast<mlir::Operation *>(gv));
+ cir::GlobalOp gv;
+ if (!getLangOpts().WritableStrings && constantStringMap.count(c)) {
+ gv = constantStringMap[c];
+ // The bigger alignment always wins.
+ if (!gv.getAlignment() ||
+ uint64_t(alignment.getQuantity()) > *gv.getAlignment())
+ gv.setAlignmentAttr(getSize(alignment));
+ } else {
+ // Mangle the string literal if that's how the ABI merges duplicate strings.
+ // Don't do it if they are writable, since we don't want writes in one TU to
+ // affect strings in another.
+ if (getCXXABI().getMangleContext().shouldMangleStringLiteral(s) &&
+ !getLangOpts().WritableStrings) {
+ errorNYI(s->getSourceRange(),
+ "getGlobalForStringLiteral: mangle string literals");
+ }
- assert(!cir::MissingFeatures::sanitizers());
+ // Unlike LLVM IR, CIR doesn't automatically unique names for globals, so
+ // we need to do that explicitly.
+ std::string uniqueName = getUniqueGlobalName(name.str());
+ mlir::Location loc = getLoc(s->getSourceRange());
+ auto typedC = llvm::cast<mlir::TypedAttr>(c);
+ gv = generateStringLiteral(loc, typedC,
+ cir::GlobalLinkageKind::PrivateLinkage, *this,
+ uniqueName, alignment);
+ setDSOLocal(static_cast<mlir::Operation *>(gv));
+ constantStringMap[c] = gv;
+ assert(!cir::MissingFeatures::sanitizers());
+ }
return gv;
}
@@ -2065,6 +2069,15 @@ CIRGenModule::createCIRFunction(mlir::Location loc, StringRef name,
return func;
}
+cir::FuncOp
+CIRGenModule::createCIRBuiltinFunction(mlir::Location loc, StringRef name,
+ cir::FuncType ty,
+ const clang::FunctionDecl *fd) {
+ cir::FuncOp fnOp = createCIRFunction(loc, name, ty, fd);
+ fnOp.setBuiltin(true);
+ return fnOp;
+}
+
mlir::SymbolTable::Visibility
CIRGenModule::getMLIRVisibility(cir::GlobalOp op) {
// MLIR doesn't accept public symbols declarations (only
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h
index 2c4c6dd..c6a6681 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.h
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.h
@@ -274,6 +274,8 @@ public:
llvm_unreachable("unknown visibility!");
}
+ llvm::DenseMap<mlir::Attribute, cir::GlobalOp> constantStringMap;
+
/// Return a constant array for the given string.
mlir::Attribute getConstantArrayFromStringLiteral(const StringLiteral *e);
@@ -473,6 +475,13 @@ public:
cir::FuncType funcType,
const clang::FunctionDecl *funcDecl);
+ /// Create a CIR function with builtin attribute set.
+ cir::FuncOp createCIRBuiltinFunction(mlir::Location loc, llvm::StringRef name,
+ cir::FuncType ty,
+ const clang::FunctionDecl *fd);
+
+ static constexpr const char *builtinCoroId = "__builtin_coro_id";
+
/// Given a builtin id for a function like "__builtin_fabsf", return a
/// Function* for "fabsf".
cir::FuncOp getBuiltinLibFunction(const FunctionDecl *fd, unsigned builtinID);
diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
index 644c383..0b8f8bf 100644
--- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
@@ -197,6 +197,7 @@ mlir::LogicalResult CIRGenFunction::emitStmt(const Stmt *s,
case Stmt::SEHLeaveStmtClass:
case Stmt::SYCLKernelCallStmtClass:
case Stmt::CoroutineBodyStmtClass:
+ return emitCoroutineBody(cast<CoroutineBodyStmt>(*s));
case Stmt::CoreturnStmtClass:
case Stmt::CXXTryStmtClass:
case Stmt::IndirectGotoStmtClass:
diff --git a/clang/lib/CIR/CodeGen/CMakeLists.txt b/clang/lib/CIR/CodeGen/CMakeLists.txt
index 3ebf460..36db4bd 100644
--- a/clang/lib/CIR/CodeGen/CMakeLists.txt
+++ b/clang/lib/CIR/CodeGen/CMakeLists.txt
@@ -14,6 +14,7 @@ add_clang_library(clangCIR
CIRGenCall.cpp
CIRGenClass.cpp
CIRGenCleanup.cpp
+ CIRGenCoroutine.cpp
CIRGenCXX.cpp
CIRGenCXXABI.cpp
CIRGenBuiltin.cpp
diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
index 6b5cc80..fba094f 100644
--- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
@@ -1632,12 +1632,19 @@ ParseResult cir::FuncOp::parse(OpAsmParser &parser, OperationState &state) {
llvm::SMLoc loc = parser.getCurrentLocation();
mlir::Builder &builder = parser.getBuilder();
+ mlir::StringAttr builtinNameAttr = getBuiltinAttrName(state.name);
+ mlir::StringAttr coroutineNameAttr = getCoroutineAttrName(state.name);
mlir::StringAttr lambdaNameAttr = getLambdaAttrName(state.name);
mlir::StringAttr noProtoNameAttr = getNoProtoAttrName(state.name);
mlir::StringAttr visNameAttr = getSymVisibilityAttrName(state.name);
mlir::StringAttr visibilityNameAttr = getGlobalVisibilityAttrName(state.name);
mlir::StringAttr dsoLocalNameAttr = getDsoLocalAttrName(state.name);
+ if (::mlir::succeeded(parser.parseOptionalKeyword(builtinNameAttr.strref())))
+ state.addAttribute(builtinNameAttr, parser.getBuilder().getUnitAttr());
+ if (::mlir::succeeded(
+ parser.parseOptionalKeyword(coroutineNameAttr.strref())))
+ state.addAttribute(coroutineNameAttr, parser.getBuilder().getUnitAttr());
if (::mlir::succeeded(parser.parseOptionalKeyword(lambdaNameAttr.strref())))
state.addAttribute(lambdaNameAttr, parser.getBuilder().getUnitAttr());
if (parser.parseOptionalKeyword(noProtoNameAttr).succeeded())
@@ -1747,6 +1754,12 @@ mlir::Region *cir::FuncOp::getCallableRegion() {
}
void cir::FuncOp::print(OpAsmPrinter &p) {
+ if (getBuiltin())
+ p << " builtin";
+
+ if (getCoroutine())
+ p << " coroutine";
+
if (getLambda())
p << " lambda";
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 412a176..684cc09 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -9224,14 +9224,20 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
addOffloadCompressArgs(Args, CmdArgs);
if (Arg *A = Args.getLastArg(options::OPT_offload_jobs_EQ)) {
- int NumThreads;
- if (StringRef(A->getValue()).getAsInteger(10, NumThreads) ||
- NumThreads <= 0)
- C.getDriver().Diag(diag::err_drv_invalid_int_value)
- << A->getAsString(Args) << A->getValue();
- else
- CmdArgs.push_back(
- Args.MakeArgString("--wrapper-jobs=" + Twine(NumThreads)));
+ StringRef Val = A->getValue();
+
+ if (Val.equals_insensitive("jobserver"))
+ CmdArgs.push_back(Args.MakeArgString("--wrapper-jobs=jobserver"));
+ else {
+ int NumThreads;
+ if (Val.getAsInteger(10, NumThreads) || NumThreads <= 0) {
+ C.getDriver().Diag(diag::err_drv_invalid_int_value)
+ << A->getAsString(Args) << Val;
+ } else {
+ CmdArgs.push_back(
+ Args.MakeArgString("--wrapper-jobs=" + Twine(NumThreads)));
+ }
+ }
}
const char *Exec =
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index 2bf6244..686e541 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -3199,7 +3199,7 @@ private:
Keywords.kw_NS_OPTIONS, TT_ObjCBlockLBrace,
TT_ObjCBlockLParen, TT_ObjCDecl, TT_ObjCForIn,
TT_ObjCMethodExpr, TT_ObjCMethodSpecifier,
- TT_ObjCProperty)) {
+ TT_ObjCProperty, TT_ObjCSelector)) {
LLVM_DEBUG(llvm::dbgs()
<< "Detected ObjC at location "
<< FormatTok->Tok.getLocation().printToString(
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index e4ddd61..f015d27 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -127,9 +127,17 @@ namespace format {
TYPE(ObjCBlockLParen) \
TYPE(ObjCDecl) \
TYPE(ObjCForIn) \
+ /* The square brackets surrounding a method call, the colon separating the \
+ * method or parameter name and the argument inside the square brackets, and \
+ * the colon separating the method or parameter name and the type inside the \
+ * method declaration. */ \
TYPE(ObjCMethodExpr) \
+ /* The '+' or '-' at the start of the line. */ \
TYPE(ObjCMethodSpecifier) \
TYPE(ObjCProperty) \
+ /* The parentheses following '@selector' and the colon following the method \
+ * or parameter name inside the parentheses. */ \
+ TYPE(ObjCSelector) \
TYPE(ObjCStringLiteral) \
TYPE(OverloadedOperator) \
TYPE(OverloadedOperatorLParen) \
@@ -146,6 +154,9 @@ namespace format {
TYPE(RequiresExpression) \
TYPE(RequiresExpressionLBrace) \
TYPE(RequiresExpressionLParen) \
+ /* The hash key in languages that have hash literals, not including the \
+ * field name in the C++ struct literal. Also the method or parameter name \
+ * in the Objective-C method declaration or call. */ \
TYPE(SelectorName) \
TYPE(StartOfName) \
TYPE(StatementAttributeLikeMacro) \
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 59f81b3..5b784ed 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -321,13 +321,13 @@ private:
return parseUntouchableParens();
}
- bool StartsObjCMethodExpr = false;
+ bool StartsObjCSelector = false;
if (!Style.isVerilog()) {
if (FormatToken *MaybeSel = OpeningParen.Previous) {
// @selector( starts a selector.
if (MaybeSel->is(tok::objc_selector) && MaybeSel->Previous &&
MaybeSel->Previous->is(tok::at)) {
- StartsObjCMethodExpr = true;
+ StartsObjCSelector = true;
}
}
}
@@ -451,10 +451,8 @@ private:
}
}
- if (StartsObjCMethodExpr) {
- Contexts.back().ColonIsObjCMethodExpr = true;
- OpeningParen.setType(TT_ObjCMethodExpr);
- }
+ if (StartsObjCSelector)
+ OpeningParen.setType(TT_ObjCSelector);
// MightBeFunctionType and ProbablyFunctionType are used for
// function pointer and reference types as well as Objective-C
@@ -513,8 +511,8 @@ private:
}
}
- if (StartsObjCMethodExpr) {
- CurrentToken->setType(TT_ObjCMethodExpr);
+ if (StartsObjCSelector) {
+ CurrentToken->setType(TT_ObjCSelector);
if (Contexts.back().FirstObjCSelectorName) {
Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
Contexts.back().LongestObjCSelectorName;
@@ -1449,7 +1447,7 @@ private:
Next->Next->is(tok::colon)))) {
// This handles a special macro in ObjC code where selectors including
// the colon are passed as macro arguments.
- Tok->setType(TT_ObjCMethodExpr);
+ Tok->setType(TT_ObjCSelector);
}
break;
case tok::pipe:
@@ -4608,7 +4606,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
return false;
}
if (Left.is(tok::colon))
- return Left.isNot(TT_ObjCMethodExpr);
+ return Left.isNoneOf(TT_ObjCSelector, TT_ObjCMethodExpr);
if (Left.is(tok::coloncolon))
return false;
if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) {
@@ -5464,7 +5462,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
// `private:` and `public:`.
if (!Right.getNextNonComment())
return false;
- if (Right.is(TT_ObjCMethodExpr))
+ if (Right.isOneOf(TT_ObjCSelector, TT_ObjCMethodExpr))
return false;
if (Left.is(tok::question))
return false;
@@ -6288,6 +6286,7 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
return Style.BreakInheritanceList == FormatStyle::BILS_AfterColon;
if (Right.is(TT_InheritanceColon))
return Style.BreakInheritanceList != FormatStyle::BILS_AfterColon;
+ // When the method parameter has no name, allow breaking before the colon.
if (Right.is(TT_ObjCMethodExpr) && Right.isNot(tok::r_square) &&
Left.isNot(TT_SelectorName)) {
return true;
diff --git a/clang/lib/StaticAnalyzer/Core/CMakeLists.txt b/clang/lib/StaticAnalyzer/Core/CMakeLists.txt
index d0a9b20..b8095a5 100644
--- a/clang/lib/StaticAnalyzer/Core/CMakeLists.txt
+++ b/clang/lib/StaticAnalyzer/Core/CMakeLists.txt
@@ -61,6 +61,7 @@ add_clang_library(clangStaticAnalyzerCore
clangBasic
clangCrossTU
clangFrontend
+ clangIndex
clangLex
clangRewrite
clangToolingCore
diff --git a/clang/lib/Tooling/DependencyScanning/CMakeLists.txt b/clang/lib/Tooling/DependencyScanning/CMakeLists.txt
index 53a2728..76bdc50 100644
--- a/clang/lib/Tooling/DependencyScanning/CMakeLists.txt
+++ b/clang/lib/Tooling/DependencyScanning/CMakeLists.txt
@@ -24,6 +24,5 @@ add_clang_library(clangDependencyScanning
clangFrontend
clangLex
clangSerialization
- clangTooling
${LLVM_PTHREAD_LIB}
)
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp
index 010380d..e1f4d0d 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp
@@ -9,8 +9,10 @@
#include "DependencyScannerImpl.h"
#include "clang/Basic/DiagnosticFrontend.h"
#include "clang/Basic/DiagnosticSerialization.h"
+#include "clang/Driver/Driver.h"
#include "clang/Frontend/FrontendActions.h"
#include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
+#include "llvm/TargetParser/Host.h"
using namespace clang;
using namespace tooling;
@@ -332,11 +334,9 @@ public:
return DepFS->getDirectiveTokens(File.getName());
}
};
-} // namespace
/// Sanitize diagnostic options for dependency scan.
-void clang::tooling::dependencies::sanitizeDiagOpts(
- DiagnosticOptions &DiagOpts) {
+void sanitizeDiagOpts(DiagnosticOptions &DiagOpts) {
// Don't print 'X warnings and Y errors generated'.
DiagOpts.ShowCarets = false;
// Don't write out diagnostic file.
@@ -355,44 +355,146 @@ void clang::tooling::dependencies::sanitizeDiagOpts(
.Default(true);
});
}
+} // namespace
-bool DependencyScanningAction::runInvocation(
- std::shared_ptr<CompilerInvocation> Invocation,
- IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
- std::shared_ptr<PCHContainerOperations> PCHContainerOps,
- DiagnosticConsumer *DiagConsumer) {
- // Making sure that we canonicalize the defines before we create the deep
- // copy to avoid unnecessary variants in the scanner and in the resulting
- // explicit command lines.
- if (any(Service.getOptimizeArgs() & ScanningOptimizations::Macros))
- canonicalizeDefines(Invocation->getPreprocessorOpts());
+namespace clang::tooling::dependencies {
+std::unique_ptr<DiagnosticOptions>
+createDiagOptions(ArrayRef<std::string> CommandLine) {
+ std::vector<const char *> CLI;
+ for (const std::string &Arg : CommandLine)
+ CLI.push_back(Arg.c_str());
+ auto DiagOpts = CreateAndPopulateDiagOpts(CLI);
+ sanitizeDiagOpts(*DiagOpts);
+ return DiagOpts;
+}
- // Make a deep copy of the original Clang invocation.
- CompilerInvocation OriginalInvocation(*Invocation);
+DignosticsEngineWithDiagOpts::DignosticsEngineWithDiagOpts(
+ ArrayRef<std::string> CommandLine,
+ IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS, DiagnosticConsumer &DC) {
+ std::vector<const char *> CCommandLine(CommandLine.size(), nullptr);
+ llvm::transform(CommandLine, CCommandLine.begin(),
+ [](const std::string &Str) { return Str.c_str(); });
+ DiagOpts = CreateAndPopulateDiagOpts(CCommandLine);
+ sanitizeDiagOpts(*DiagOpts);
+ DiagEngine = CompilerInstance::createDiagnostics(*FS, *DiagOpts, &DC,
+ /*ShouldOwnClient=*/false);
+}
- if (Scanned) {
- // Scanning runs once for the first -cc1 invocation in a chain of driver
- // jobs. For any dependent jobs, reuse the scanning result and just
- // update the LastCC1Arguments to correspond to the new invocation.
- // FIXME: to support multi-arch builds, each arch requires a separate scan
- setLastCC1Arguments(std::move(OriginalInvocation));
- return true;
+std::pair<std::unique_ptr<driver::Driver>, std::unique_ptr<driver::Compilation>>
+buildCompilation(ArrayRef<std::string> ArgStrs, DiagnosticsEngine &Diags,
+ IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS) {
+ SmallVector<const char *, 256> Argv;
+ Argv.reserve(ArgStrs.size());
+ for (const std::string &Arg : ArgStrs)
+ Argv.push_back(Arg.c_str());
+
+ std::unique_ptr<driver::Driver> Driver = std::make_unique<driver::Driver>(
+ Argv[0], llvm::sys::getDefaultTargetTriple(), Diags,
+ "clang LLVM compiler", FS);
+ Driver->setTitle("clang_based_tool");
+
+ llvm::BumpPtrAllocator Alloc;
+ bool CLMode = driver::IsClangCL(
+ driver::getDriverMode(Argv[0], ArrayRef(Argv).slice(1)));
+
+ if (llvm::Error E =
+ driver::expandResponseFiles(Argv, CLMode, Alloc, FS.get())) {
+ Diags.Report(diag::err_drv_expand_response_file)
+ << llvm::toString(std::move(E));
+ return std::make_pair(nullptr, nullptr);
}
- Scanned = true;
+ std::unique_ptr<driver::Compilation> Compilation(
+ Driver->BuildCompilation(Argv));
+ if (!Compilation)
+ return std::make_pair(nullptr, nullptr);
- // Create a compiler instance to handle the actual work.
- auto ModCache = makeInProcessModuleCache(Service.getModuleCacheEntries());
- ScanInstanceStorage.emplace(std::move(Invocation), std::move(PCHContainerOps),
- ModCache.get());
- CompilerInstance &ScanInstance = *ScanInstanceStorage;
+ if (Compilation->containsError())
+ return std::make_pair(nullptr, nullptr);
+
+ return std::make_pair(std::move(Driver), std::move(Compilation));
+}
+
+std::unique_ptr<CompilerInvocation>
+createCompilerInvocation(ArrayRef<std::string> CommandLine,
+ DiagnosticsEngine &Diags) {
+ llvm::opt::ArgStringList Argv;
+ for (const std::string &Str : ArrayRef(CommandLine).drop_front())
+ Argv.push_back(Str.c_str());
+
+ auto Invocation = std::make_unique<CompilerInvocation>();
+ if (!CompilerInvocation::CreateFromArgs(*Invocation, Argv, Diags)) {
+ // FIXME: Should we just go on like cc1_main does?
+ return nullptr;
+ }
+ return Invocation;
+}
+
+std::pair<IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::vector<std::string>>
+initVFSForTUBuferScanning(IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS,
+ ArrayRef<std::string> CommandLine,
+ StringRef WorkingDirectory,
+ llvm::MemoryBufferRef TUBuffer) {
+ // Reset what might have been modified in the previous worker invocation.
+ BaseFS->setCurrentWorkingDirectory(WorkingDirectory);
+
+ IntrusiveRefCntPtr<llvm::vfs::FileSystem> ModifiedFS;
+ auto OverlayFS =
+ llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS);
+ auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+ InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory);
+ auto InputPath = TUBuffer.getBufferIdentifier();
+ InMemoryFS->addFile(
+ InputPath, 0, llvm::MemoryBuffer::getMemBufferCopy(TUBuffer.getBuffer()));
+ IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay = InMemoryFS;
+
+ OverlayFS->pushOverlay(InMemoryOverlay);
+ ModifiedFS = OverlayFS;
+ std::vector<std::string> ModifiedCommandLine(CommandLine);
+ ModifiedCommandLine.emplace_back(InputPath);
+
+ return std::make_pair(ModifiedFS, ModifiedCommandLine);
+}
+
+std::pair<IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::vector<std::string>>
+initVFSForByNameScanning(IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS,
+ ArrayRef<std::string> CommandLine,
+ StringRef WorkingDirectory, StringRef ModuleName) {
+ // Reset what might have been modified in the previous worker invocation.
+ BaseFS->setCurrentWorkingDirectory(WorkingDirectory);
+
+ // If we're scanning based on a module name alone, we don't expect the client
+ // to provide us with an input file. However, the driver really wants to have
+ // one. Let's just make it up to make the driver happy.
+ auto OverlayFS =
+ llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS);
+ auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+ InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory);
+ SmallString<128> FakeInputPath;
+ // TODO: We should retry the creation if the path already exists.
+ llvm::sys::fs::createUniquePath(ModuleName + "-%%%%%%%%.input", FakeInputPath,
+ /*MakeAbsolute=*/false);
+ InMemoryFS->addFile(FakeInputPath, 0, llvm::MemoryBuffer::getMemBuffer(""));
+ IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay = InMemoryFS;
+ OverlayFS->pushOverlay(InMemoryOverlay);
+
+ std::vector<std::string> ModifiedCommandLine(CommandLine);
+ ModifiedCommandLine.emplace_back(FakeInputPath);
+
+ return std::make_pair(OverlayFS, ModifiedCommandLine);
+}
+
+bool initializeScanCompilerInstance(
+ CompilerInstance &ScanInstance,
+ IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
+ DiagnosticConsumer *DiagConsumer, DependencyScanningService &Service,
+ IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS) {
ScanInstance.setBuildingModule(false);
ScanInstance.createVirtualFileSystem(FS, DiagConsumer);
// Create the compiler's actual diagnostics engine.
sanitizeDiagOpts(ScanInstance.getDiagnosticOpts());
- assert(!DiagConsumerFinished && "attempt to reuse finished consumer");
ScanInstance.createDiagnostics(DiagConsumer, /*ShouldOwnClient=*/false);
if (!ScanInstance.hasDiagnostics())
return false;
@@ -435,6 +537,26 @@ bool DependencyScanningAction::runInvocation(
ScanInstance.createSourceManager();
+ // Consider different header search and diagnostic options to create
+ // different modules. This avoids the unsound aliasing of module PCMs.
+ //
+ // TODO: Implement diagnostic bucketing to reduce the impact of strict
+ // context hashing.
+ ScanInstance.getHeaderSearchOpts().ModulesStrictContextHash = true;
+ ScanInstance.getHeaderSearchOpts().ModulesSerializeOnlyPreprocessor = true;
+ ScanInstance.getHeaderSearchOpts().ModulesSkipDiagnosticOptions = true;
+ ScanInstance.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = true;
+ ScanInstance.getHeaderSearchOpts().ModulesSkipPragmaDiagnosticMappings = true;
+ ScanInstance.getHeaderSearchOpts().ModulesForceValidateUserHeaders = false;
+
+ // Avoid some checks and module map parsing when loading PCM files.
+ ScanInstance.getPreprocessorOpts().ModulesCheckRelocated = false;
+
+ return true;
+}
+
+llvm::SmallVector<StringRef>
+getInitialStableDirs(const CompilerInstance &ScanInstance) {
// Create a collection of stable directories derived from the ScanInstance
// for determining whether module dependencies would fully resolve from
// those directories.
@@ -442,7 +564,12 @@ bool DependencyScanningAction::runInvocation(
const StringRef Sysroot = ScanInstance.getHeaderSearchOpts().Sysroot;
if (!Sysroot.empty() && (llvm::sys::path::root_directory(Sysroot) != Sysroot))
StableDirs = {Sysroot, ScanInstance.getHeaderSearchOpts().ResourceDir};
+ return StableDirs;
+}
+std::optional<PrebuiltModulesAttrsMap>
+computePrebuiltModulesASTMap(CompilerInstance &ScanInstance,
+ llvm::SmallVector<StringRef> &StableDirs) {
// Store a mapping of prebuilt module files and their properties like header
// search options. This will prevent the implicit build to create duplicate
// modules and will force reuse of the existing prebuilt module files
@@ -454,12 +581,14 @@ bool DependencyScanningAction::runInvocation(
ScanInstance.getPreprocessorOpts().ImplicitPCHInclude, ScanInstance,
ScanInstance.getHeaderSearchOpts().PrebuiltModuleFiles,
PrebuiltModulesASTMap, ScanInstance.getDiagnostics(), StableDirs))
- return false;
+ return {};
- // Create the dependency collector that will collect the produced
- // dependencies.
- //
- // This also moves the existing dependency output options from the
+ return PrebuiltModulesASTMap;
+}
+
+std::unique_ptr<DependencyOutputOptions>
+takeDependencyOutputOptionsFrom(CompilerInstance &ScanInstance) {
+ // This function moves the existing dependency output options from the
// invocation to the collector. The options in the invocation are reset,
// which ensures that the compiler won't create new dependency collectors,
// and thus won't write out the extra '.d' files to disk.
@@ -472,35 +601,85 @@ bool DependencyScanningAction::runInvocation(
ScanInstance.getFrontendOpts().Inputs)};
Opts->IncludeSystemHeaders = true;
+ return Opts;
+}
+
+std::shared_ptr<ModuleDepCollector> initializeScanInstanceDependencyCollector(
+ CompilerInstance &ScanInstance,
+ std::unique_ptr<DependencyOutputOptions> DepOutputOpts,
+ StringRef WorkingDirectory, DependencyConsumer &Consumer,
+ DependencyScanningService &Service, CompilerInvocation &Inv,
+ DependencyActionController &Controller,
+ PrebuiltModulesAttrsMap PrebuiltModulesASTMap,
+ llvm::SmallVector<StringRef> &StableDirs) {
+ std::shared_ptr<ModuleDepCollector> MDC;
switch (Service.getFormat()) {
case ScanningOutputFormat::Make:
ScanInstance.addDependencyCollector(
std::make_shared<DependencyConsumerForwarder>(
- std::move(Opts), WorkingDirectory, Consumer));
+ std::move(DepOutputOpts), WorkingDirectory, Consumer));
break;
case ScanningOutputFormat::P1689:
case ScanningOutputFormat::Full:
MDC = std::make_shared<ModuleDepCollector>(
- Service, std::move(Opts), ScanInstance, Consumer, Controller,
- OriginalInvocation, std::move(PrebuiltModulesASTMap), StableDirs);
+ Service, std::move(DepOutputOpts), ScanInstance, Consumer, Controller,
+ Inv, std::move(PrebuiltModulesASTMap), StableDirs);
ScanInstance.addDependencyCollector(MDC);
break;
}
- // Consider different header search and diagnostic options to create
- // different modules. This avoids the unsound aliasing of module PCMs.
- //
- // TODO: Implement diagnostic bucketing to reduce the impact of strict
- // context hashing.
- ScanInstance.getHeaderSearchOpts().ModulesStrictContextHash = true;
- ScanInstance.getHeaderSearchOpts().ModulesSerializeOnlyPreprocessor = true;
- ScanInstance.getHeaderSearchOpts().ModulesSkipDiagnosticOptions = true;
- ScanInstance.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = true;
- ScanInstance.getHeaderSearchOpts().ModulesSkipPragmaDiagnosticMappings = true;
- ScanInstance.getHeaderSearchOpts().ModulesForceValidateUserHeaders = false;
+ return MDC;
+}
+} // namespace clang::tooling::dependencies
- // Avoid some checks and module map parsing when loading PCM files.
- ScanInstance.getPreprocessorOpts().ModulesCheckRelocated = false;
+bool DependencyScanningAction::runInvocation(
+ std::unique_ptr<CompilerInvocation> Invocation,
+ IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
+ std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+ DiagnosticConsumer *DiagConsumer) {
+ // Making sure that we canonicalize the defines before we create the deep
+ // copy to avoid unnecessary variants in the scanner and in the resulting
+ // explicit command lines.
+ if (any(Service.getOptimizeArgs() & ScanningOptimizations::Macros))
+ canonicalizeDefines(Invocation->getPreprocessorOpts());
+
+ // Make a deep copy of the original Clang invocation.
+ CompilerInvocation OriginalInvocation(*Invocation);
+
+ if (Scanned) {
+ // Scanning runs once for the first -cc1 invocation in a chain of driver
+ // jobs. For any dependent jobs, reuse the scanning result and just
+ // update the LastCC1Arguments to correspond to the new invocation.
+ // FIXME: to support multi-arch builds, each arch requires a separate scan
+ setLastCC1Arguments(std::move(OriginalInvocation));
+ return true;
+ }
+
+ Scanned = true;
+
+ // Create a compiler instance to handle the actual work.
+ auto ModCache = makeInProcessModuleCache(Service.getModuleCacheEntries());
+ ScanInstanceStorage.emplace(std::move(Invocation), std::move(PCHContainerOps),
+ ModCache.get());
+ CompilerInstance &ScanInstance = *ScanInstanceStorage;
+
+ assert(!DiagConsumerFinished && "attempt to reuse finished consumer");
+ if (!initializeScanCompilerInstance(ScanInstance, FS, DiagConsumer, Service,
+ DepFS))
+ return false;
+
+ llvm::SmallVector<StringRef> StableDirs = getInitialStableDirs(ScanInstance);
+ auto MaybePrebuiltModulesASTMap =
+ computePrebuiltModulesASTMap(ScanInstance, StableDirs);
+ if (!MaybePrebuiltModulesASTMap)
+ return false;
+
+ auto DepOutputOpts = takeDependencyOutputOptionsFrom(ScanInstance);
+
+ MDC = initializeScanInstanceDependencyCollector(
+ ScanInstance, std::move(DepOutputOpts), WorkingDirectory, Consumer,
+ Service, OriginalInvocation, Controller, *MaybePrebuiltModulesASTMap,
+ StableDirs);
std::unique_ptr<FrontendAction> Action;
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.h b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.h
index 32fbcff..71c6731 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.h
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.h
@@ -9,8 +9,10 @@
#ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNER_H
#define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNER_H
+#include "clang/Driver/Compilation.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Frontend/CompilerInvocation.h"
+#include "clang/Frontend/TextDiagnosticPrinter.h"
#include "clang/Serialization/ObjectFilePCHContainerReader.h"
#include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
#include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
@@ -30,12 +32,12 @@ public:
DependencyScanningAction(
DependencyScanningService &Service, StringRef WorkingDirectory,
DependencyConsumer &Consumer, DependencyActionController &Controller,
- llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS,
+ IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS,
std::optional<StringRef> ModuleName = std::nullopt)
: Service(Service), WorkingDirectory(WorkingDirectory),
Consumer(Consumer), Controller(Controller), DepFS(std::move(DepFS)),
ModuleName(ModuleName) {}
- bool runInvocation(std::shared_ptr<CompilerInvocation> Invocation,
+ bool runInvocation(std::unique_ptr<CompilerInvocation> Invocation,
IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
std::shared_ptr<PCHContainerOperations> PCHContainerOps,
DiagnosticConsumer *DiagConsumer);
@@ -63,7 +65,7 @@ private:
StringRef WorkingDirectory;
DependencyConsumer &Consumer;
DependencyActionController &Controller;
- llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS;
+ IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS;
std::optional<StringRef> ModuleName;
std::optional<CompilerInstance> ScanInstanceStorage;
std::shared_ptr<ModuleDepCollector> MDC;
@@ -72,9 +74,81 @@ private:
bool DiagConsumerFinished = false;
};
-// Helper functions
-void sanitizeDiagOpts(DiagnosticOptions &DiagOpts);
+// Helper functions and data types.
+std::unique_ptr<DiagnosticOptions>
+createDiagOptions(ArrayRef<std::string> CommandLine);
+struct DignosticsEngineWithDiagOpts {
+ // We need to bound the lifetime of the DiagOpts used to create the
+ // DiganosticsEngine with the DiagnosticsEngine itself.
+ std::unique_ptr<DiagnosticOptions> DiagOpts;
+ IntrusiveRefCntPtr<DiagnosticsEngine> DiagEngine;
+
+ DignosticsEngineWithDiagOpts(ArrayRef<std::string> CommandLine,
+ IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
+ DiagnosticConsumer &DC);
+};
+
+struct TextDiagnosticsPrinterWithOutput {
+ // We need to bound the lifetime of the data that supports the DiagPrinter
+ // with it together so they have the same lifetime.
+ std::string DiagnosticOutput;
+ llvm::raw_string_ostream DiagnosticsOS;
+ std::unique_ptr<DiagnosticOptions> DiagOpts;
+ TextDiagnosticPrinter DiagPrinter;
+
+ TextDiagnosticsPrinterWithOutput(ArrayRef<std::string> CommandLine)
+ : DiagnosticsOS(DiagnosticOutput),
+ DiagOpts(createDiagOptions(CommandLine)),
+ DiagPrinter(DiagnosticsOS, *DiagOpts) {}
+};
+
+std::pair<std::unique_ptr<driver::Driver>, std::unique_ptr<driver::Compilation>>
+buildCompilation(ArrayRef<std::string> ArgStrs, DiagnosticsEngine &Diags,
+ IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS);
+
+std::unique_ptr<CompilerInvocation>
+createCompilerInvocation(ArrayRef<std::string> CommandLine,
+ DiagnosticsEngine &Diags);
+
+std::pair<IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::vector<std::string>>
+initVFSForTUBuferScanning(IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS,
+ ArrayRef<std::string> CommandLine,
+ StringRef WorkingDirectory,
+ llvm::MemoryBufferRef TUBuffer);
+
+std::pair<IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::vector<std::string>>
+initVFSForByNameScanning(IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS,
+ ArrayRef<std::string> CommandLine,
+ StringRef WorkingDirectory, StringRef ModuleName);
+
+bool initializeScanCompilerInstance(
+ CompilerInstance &ScanInstance,
+ IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
+ DiagnosticConsumer *DiagConsumer, DependencyScanningService &Service,
+ IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS);
+
+SmallVector<StringRef>
+getInitialStableDirs(const CompilerInstance &ScanInstance);
+
+std::optional<PrebuiltModulesAttrsMap>
+computePrebuiltModulesASTMap(CompilerInstance &ScanInstance,
+ SmallVector<StringRef> &StableDirs);
+
+std::unique_ptr<DependencyOutputOptions>
+takeDependencyOutputOptionsFrom(CompilerInstance &ScanInstance);
+
+/// Create the dependency collector that will collect the produced
+/// dependencies. May return the created ModuleDepCollector depending
+/// on the scanning format.
+std::shared_ptr<ModuleDepCollector> initializeScanInstanceDependencyCollector(
+ CompilerInstance &ScanInstance,
+ std::unique_ptr<DependencyOutputOptions> DepOutputOpts,
+ StringRef WorkingDirectory, DependencyConsumer &Consumer,
+ DependencyScanningService &Service, CompilerInvocation &Inv,
+ DependencyActionController &Controller,
+ PrebuiltModulesAttrsMap PrebuiltModulesASTMap,
+ llvm::SmallVector<StringRef> &StableDirs);
} // namespace dependencies
} // namespace tooling
} // namespace clang
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
index 796e587..9515421 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -8,29 +8,9 @@
#include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
#include "DependencyScannerImpl.h"
-#include "clang/Basic/DiagnosticDriver.h"
#include "clang/Basic/DiagnosticFrontend.h"
-#include "clang/Basic/DiagnosticSerialization.h"
-#include "clang/Driver/Compilation.h"
#include "clang/Driver/Driver.h"
-#include "clang/Driver/Job.h"
#include "clang/Driver/Tool.h"
-#include "clang/Frontend/CompilerInstance.h"
-#include "clang/Frontend/CompilerInvocation.h"
-#include "clang/Frontend/FrontendActions.h"
-#include "clang/Frontend/TextDiagnosticPrinter.h"
-#include "clang/Frontend/Utils.h"
-#include "clang/Lex/PreprocessorOptions.h"
-#include "clang/Serialization/ObjectFilePCHContainerReader.h"
-#include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
-#include "clang/Tooling/DependencyScanning/InProcessModuleCache.h"
-#include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
-#include "llvm/ADT/IntrusiveRefCntPtr.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/TargetParser/Host.h"
-#include <optional>
using namespace clang;
using namespace tooling;
@@ -63,32 +43,19 @@ DependencyScanningWorker::DependencyScanningWorker(
}
}
-static std::unique_ptr<DiagnosticOptions>
-createDiagOptions(const std::vector<std::string> &CommandLine) {
- std::vector<const char *> CLI;
- for (const std::string &Arg : CommandLine)
- CLI.push_back(Arg.c_str());
- auto DiagOpts = CreateAndPopulateDiagOpts(CLI);
- sanitizeDiagOpts(*DiagOpts);
- return DiagOpts;
-}
-
llvm::Error DependencyScanningWorker::computeDependencies(
StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
DependencyConsumer &Consumer, DependencyActionController &Controller,
std::optional<llvm::MemoryBufferRef> TUBuffer) {
// Capture the emitted diagnostics and report them to the client
// in the case of a failure.
- std::string DiagnosticOutput;
- llvm::raw_string_ostream DiagnosticsOS(DiagnosticOutput);
- auto DiagOpts = createDiagOptions(CommandLine);
- TextDiagnosticPrinter DiagPrinter(DiagnosticsOS, *DiagOpts);
+ TextDiagnosticsPrinterWithOutput DiagPrinterWithOS(CommandLine);
if (computeDependencies(WorkingDirectory, CommandLine, Consumer, Controller,
- DiagPrinter, TUBuffer))
+ DiagPrinterWithOS.DiagPrinter, TUBuffer))
return llvm::Error::success();
- return llvm::make_error<llvm::StringError>(DiagnosticsOS.str(),
- llvm::inconvertibleErrorCode());
+ return llvm::make_error<llvm::StringError>(
+ DiagPrinterWithOS.DiagnosticsOS.str(), llvm::inconvertibleErrorCode());
}
llvm::Error DependencyScanningWorker::computeDependencies(
@@ -97,51 +64,24 @@ llvm::Error DependencyScanningWorker::computeDependencies(
StringRef ModuleName) {
// Capture the emitted diagnostics and report them to the client
// in the case of a failure.
- std::string DiagnosticOutput;
- llvm::raw_string_ostream DiagnosticsOS(DiagnosticOutput);
- auto DiagOpts = createDiagOptions(CommandLine);
- TextDiagnosticPrinter DiagPrinter(DiagnosticsOS, *DiagOpts);
+ TextDiagnosticsPrinterWithOutput DiagPrinterWithOS(CommandLine);
if (computeDependencies(WorkingDirectory, CommandLine, Consumer, Controller,
- DiagPrinter, ModuleName))
+ DiagPrinterWithOS.DiagPrinter, ModuleName))
return llvm::Error::success();
- return llvm::make_error<llvm::StringError>(DiagnosticsOS.str(),
- llvm::inconvertibleErrorCode());
+ return llvm::make_error<llvm::StringError>(
+ DiagPrinterWithOS.DiagnosticsOS.str(), llvm::inconvertibleErrorCode());
}
static bool forEachDriverJob(
ArrayRef<std::string> ArgStrs, DiagnosticsEngine &Diags,
IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
llvm::function_ref<bool(const driver::Command &Cmd)> Callback) {
- SmallVector<const char *, 256> Argv;
- Argv.reserve(ArgStrs.size());
- for (const std::string &Arg : ArgStrs)
- Argv.push_back(Arg.c_str());
-
- std::unique_ptr<driver::Driver> Driver = std::make_unique<driver::Driver>(
- Argv[0], llvm::sys::getDefaultTargetTriple(), Diags,
- "clang LLVM compiler", FS);
- Driver->setTitle("clang_based_tool");
-
- llvm::BumpPtrAllocator Alloc;
- bool CLMode = driver::IsClangCL(
- driver::getDriverMode(Argv[0], ArrayRef(Argv).slice(1)));
-
- if (llvm::Error E =
- driver::expandResponseFiles(Argv, CLMode, Alloc, FS.get())) {
- Diags.Report(diag::err_drv_expand_response_file)
- << llvm::toString(std::move(E));
- return false;
- }
-
- const std::unique_ptr<driver::Compilation> Compilation(
- Driver->BuildCompilation(llvm::ArrayRef(Argv)));
+ // Compilation holds a non-owning a reference to the Driver, hence we need to
+ // keep the Driver alive when we use Compilation.
+ auto [Driver, Compilation] = buildCompilation(ArgStrs, Diags, FS);
if (!Compilation)
return false;
-
- if (Compilation->containsError())
- return false;
-
for (const driver::Command &Job : Compilation->getJobs()) {
if (!Callback(Job))
return false;
@@ -150,30 +90,21 @@ static bool forEachDriverJob(
}
static bool createAndRunToolInvocation(
- std::vector<std::string> CommandLine, DependencyScanningAction &Action,
+ const std::vector<std::string> &CommandLine,
+ DependencyScanningAction &Action,
IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
std::shared_ptr<clang::PCHContainerOperations> &PCHContainerOps,
DiagnosticsEngine &Diags, DependencyConsumer &Consumer) {
-
- // Save executable path before providing CommandLine to ToolInvocation
- std::string Executable = CommandLine[0];
-
- llvm::opt::ArgStringList Argv;
- for (const std::string &Str : ArrayRef(CommandLine).drop_front())
- Argv.push_back(Str.c_str());
-
- auto Invocation = std::make_shared<CompilerInvocation>();
- if (!CompilerInvocation::CreateFromArgs(*Invocation, Argv, Diags)) {
- // FIXME: Should we just go on like cc1_main does?
+ auto Invocation = createCompilerInvocation(CommandLine, Diags);
+ if (!Invocation)
return false;
- }
if (!Action.runInvocation(std::move(Invocation), std::move(FS),
PCHContainerOps, Diags.getClient()))
return false;
std::vector<std::string> Args = Action.takeLastCC1Arguments();
- Consumer.handleBuildCommand({std::move(Executable), std::move(Args)});
+ Consumer.handleBuildCommand({CommandLine[0], std::move(Args)});
return true;
}
@@ -182,24 +113,19 @@ bool DependencyScanningWorker::scanDependencies(
DependencyConsumer &Consumer, DependencyActionController &Controller,
DiagnosticConsumer &DC, llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
std::optional<StringRef> ModuleName) {
- std::vector<const char *> CCommandLine(CommandLine.size(), nullptr);
- llvm::transform(CommandLine, CCommandLine.begin(),
- [](const std::string &Str) { return Str.c_str(); });
- auto DiagOpts = CreateAndPopulateDiagOpts(CCommandLine);
- sanitizeDiagOpts(*DiagOpts);
- auto Diags = CompilerInstance::createDiagnostics(*FS, *DiagOpts, &DC,
- /*ShouldOwnClient=*/false);
-
+ DignosticsEngineWithDiagOpts DiagEngineWithCmdAndOpts(CommandLine, FS, DC);
DependencyScanningAction Action(Service, WorkingDirectory, Consumer,
Controller, DepFS, ModuleName);
bool Success = false;
if (CommandLine[1] == "-cc1") {
- Success = createAndRunToolInvocation(CommandLine, Action, FS,
- PCHContainerOps, *Diags, Consumer);
+ Success = createAndRunToolInvocation(
+ CommandLine, Action, FS, PCHContainerOps,
+ *DiagEngineWithCmdAndOpts.DiagEngine, Consumer);
} else {
Success = forEachDriverJob(
- CommandLine, *Diags, FS, [&](const driver::Command &Cmd) {
+ CommandLine, *DiagEngineWithCmdAndOpts.DiagEngine, FS,
+ [&](const driver::Command &Cmd) {
if (StringRef(Cmd.getCreator().getName()) != "clang") {
// Non-clang command. Just pass through to the dependency
// consumer.
@@ -218,13 +144,15 @@ bool DependencyScanningWorker::scanDependencies(
// system to ensure that any file system requests that
// are made by the driver do not go through the
// dependency scanning filesystem.
- return createAndRunToolInvocation(std::move(Argv), Action, FS,
- PCHContainerOps, *Diags, Consumer);
+ return createAndRunToolInvocation(
+ std::move(Argv), Action, FS, PCHContainerOps,
+ *DiagEngineWithCmdAndOpts.DiagEngine, Consumer);
});
}
if (Success && !Action.hasScanned())
- Diags->Report(diag::err_fe_expected_compiler_job)
+ DiagEngineWithCmdAndOpts.DiagEngine->Report(
+ diag::err_fe_expected_compiler_job)
<< llvm::join(CommandLine, " ");
// Ensure finish() is called even if we never reached ExecuteAction().
@@ -238,66 +166,25 @@ bool DependencyScanningWorker::computeDependencies(
StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
DependencyConsumer &Consumer, DependencyActionController &Controller,
DiagnosticConsumer &DC, std::optional<llvm::MemoryBufferRef> TUBuffer) {
- // Reset what might have been modified in the previous worker invocation.
- BaseFS->setCurrentWorkingDirectory(WorkingDirectory);
-
- std::optional<std::vector<std::string>> ModifiedCommandLine;
- llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> ModifiedFS;
-
- // If we're scanning based on a module name alone, we don't expect the client
- // to provide us with an input file. However, the driver really wants to have
- // one. Let's just make it up to make the driver happy.
if (TUBuffer) {
- auto OverlayFS =
- llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS);
- auto InMemoryFS =
- llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
- InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory);
- auto InputPath = TUBuffer->getBufferIdentifier();
- InMemoryFS->addFile(
- InputPath, 0,
- llvm::MemoryBuffer::getMemBufferCopy(TUBuffer->getBuffer()));
- llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay =
- InMemoryFS;
-
- OverlayFS->pushOverlay(InMemoryOverlay);
- ModifiedFS = OverlayFS;
- ModifiedCommandLine = CommandLine;
- ModifiedCommandLine->emplace_back(InputPath);
+ auto [FinalFS, FinalCommandLine] = initVFSForTUBuferScanning(
+ BaseFS, CommandLine, WorkingDirectory, *TUBuffer);
+ return scanDependencies(WorkingDirectory, FinalCommandLine, Consumer,
+ Controller, DC, FinalFS,
+ /*ModuleName=*/std::nullopt);
+ } else {
+ BaseFS->setCurrentWorkingDirectory(WorkingDirectory);
+ return scanDependencies(WorkingDirectory, CommandLine, Consumer, Controller,
+ DC, BaseFS, /*ModuleName=*/std::nullopt);
}
-
- const std::vector<std::string> &FinalCommandLine =
- ModifiedCommandLine ? *ModifiedCommandLine : CommandLine;
- auto &FinalFS = ModifiedFS ? ModifiedFS : BaseFS;
-
- return scanDependencies(WorkingDirectory, FinalCommandLine, Consumer,
- Controller, DC, FinalFS, /*ModuleName=*/std::nullopt);
}
bool DependencyScanningWorker::computeDependencies(
StringRef WorkingDirectory, const std::vector<std::string> &CommandLine,
DependencyConsumer &Consumer, DependencyActionController &Controller,
DiagnosticConsumer &DC, StringRef ModuleName) {
- // Reset what might have been modified in the previous worker invocation.
- BaseFS->setCurrentWorkingDirectory(WorkingDirectory);
-
- // If we're scanning based on a module name alone, we don't expect the client
- // to provide us with an input file. However, the driver really wants to have
- // one. Let's just make it up to make the driver happy.
- auto OverlayFS =
- llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(BaseFS);
- auto InMemoryFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
- InMemoryFS->setCurrentWorkingDirectory(WorkingDirectory);
- SmallString<128> FakeInputPath;
- // TODO: We should retry the creation if the path already exists.
- llvm::sys::fs::createUniquePath(ModuleName + "-%%%%%%%%.input", FakeInputPath,
- /*MakeAbsolute=*/false);
- InMemoryFS->addFile(FakeInputPath, 0, llvm::MemoryBuffer::getMemBuffer(""));
- llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> InMemoryOverlay = InMemoryFS;
-
- OverlayFS->pushOverlay(InMemoryOverlay);
- auto ModifiedCommandLine = CommandLine;
- ModifiedCommandLine.emplace_back(FakeInputPath);
+ auto [OverlayFS, ModifiedCommandLine] = initVFSForByNameScanning(
+ BaseFS, CommandLine, WorkingDirectory, ModuleName);
return scanDependencies(WorkingDirectory, ModifiedCommandLine, Consumer,
Controller, DC, OverlayFS, ModuleName);
diff --git a/clang/test/CIR/CodeGen/coro-task.cpp b/clang/test/CIR/CodeGen/coro-task.cpp
new file mode 100644
index 0000000..1fc7d77
--- /dev/null
+++ b/clang/test/CIR/CodeGen/coro-task.cpp
@@ -0,0 +1,123 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+
+namespace std {
+
+template<typename T> struct remove_reference { typedef T type; };
+template<typename T> struct remove_reference<T &> { typedef T type; };
+template<typename T> struct remove_reference<T &&> { typedef T type; };
+
+template<typename T>
+typename remove_reference<T>::type &&move(T &&t) noexcept;
+
+template <class Ret, typename... T>
+struct coroutine_traits { using promise_type = typename Ret::promise_type; };
+
+template <class Promise = void>
+struct coroutine_handle {
+ static coroutine_handle from_address(void *) noexcept;
+};
+template <>
+struct coroutine_handle<void> {
+ template <class PromiseType>
+ coroutine_handle(coroutine_handle<PromiseType>) noexcept;
+ static coroutine_handle from_address(void *);
+};
+
+struct suspend_always {
+ bool await_ready() noexcept { return false; }
+ void await_suspend(coroutine_handle<>) noexcept {}
+ void await_resume() noexcept {}
+};
+
+struct suspend_never {
+ bool await_ready() noexcept { return true; }
+ void await_suspend(coroutine_handle<>) noexcept {}
+ void await_resume() noexcept {}
+};
+
+} // namespace std
+
+namespace folly {
+namespace coro {
+
+using std::suspend_always;
+using std::suspend_never;
+using std::coroutine_handle;
+
+using SemiFuture = int;
+
+template<class T>
+struct Task {
+ struct promise_type {
+ Task<T> get_return_object() noexcept;
+ suspend_always initial_suspend() noexcept;
+ suspend_always final_suspend() noexcept;
+ void return_value(T);
+ void unhandled_exception();
+ auto yield_value(Task<T>) noexcept { return final_suspend(); }
+ };
+ bool await_ready() noexcept { return false; }
+ void await_suspend(coroutine_handle<>) noexcept {}
+ T await_resume();
+};
+
+template<>
+struct Task<void> {
+ struct promise_type {
+ Task<void> get_return_object() noexcept;
+ suspend_always initial_suspend() noexcept;
+ suspend_always final_suspend() noexcept;
+ void return_void() noexcept;
+ void unhandled_exception() noexcept;
+ auto yield_value(Task<void>) noexcept { return final_suspend(); }
+ };
+ bool await_ready() noexcept { return false; }
+ void await_suspend(coroutine_handle<>) noexcept {}
+ void await_resume() noexcept {}
+ SemiFuture semi();
+};
+
+// FIXME: add CIRGen support here.
+// struct blocking_wait_fn {
+// template <typename T>
+// T operator()(Task<T>&& awaitable) const {
+// return T();
+// }
+// };
+
+// inline constexpr blocking_wait_fn blocking_wait{};
+// static constexpr blocking_wait_fn const& blockingWait = blocking_wait;
+
+struct co_invoke_fn {
+ template <typename F, typename... A>
+ Task<void> operator()(F&& f, A&&... a) const {
+ return Task<void>();
+ }
+};
+
+co_invoke_fn co_invoke;
+
+}} // namespace folly::coro
+
+// CIR-DAG: ![[VoidTask:.*]] = !cir.record<struct "folly::coro::Task<void>" padded {!u8i}>
+
+// CIR: module {{.*}} {
+// CIR-NEXT: cir.global external @_ZN5folly4coro9co_invokeE = #cir.zero : !rec_folly3A3Acoro3A3Aco_invoke_fn
+
+// CIR: cir.func builtin private @__builtin_coro_id(!u32i, !cir.ptr<!void>, !cir.ptr<!void>, !cir.ptr<!void>) -> !u32i
+
+using VoidTask = folly::coro::Task<void>;
+
+VoidTask silly_task() {
+ co_await std::suspend_always();
+}
+
+// CIR: cir.func coroutine dso_local @_Z10silly_taskv() -> ![[VoidTask]]
+// CHECK: %[[#VoidTaskAddr:]] = cir.alloca ![[VoidTask]], {{.*}}, ["__retval"]
+
+// Get coroutine id with __builtin_coro_id.
+
+// CIR: %[[NullPtr:.*]] = cir.const #cir.ptr<null> : !cir.ptr<!void>
+// CIR: %[[Align:.*]] = cir.const #cir.int<16> : !u32i
+// CIR: %[[CoroId:.*]] = cir.call @__builtin_coro_id(%[[Align]], %[[NullPtr]], %[[NullPtr]], %[[NullPtr]])
diff --git a/clang/test/CIR/CodeGen/predefined-expr.c b/clang/test/CIR/CodeGen/predefined-expr.c
new file mode 100644
index 0000000..674c9bd0
--- /dev/null
+++ b/clang/test/CIR/CodeGen/predefined-expr.c
@@ -0,0 +1,71 @@
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir --check-prefix=CIR
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -o %t-cir.ll
+// RUN: FileCheck %s --input-file=%t-cir.ll --check-prefix=LLVM
+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -o %t.ll
+// RUN: FileCheck %s --input-file=%t.ll --check-prefix=OGCG
+
+// CIR: cir.global "private" constant cir_private dso_local @__func__.plainFunction = #cir.const_array<"plainFunction\00" : !cir.array<!s8i x 14>>
+// CIR: cir.global "private" constant cir_private dso_local @__PRETTY_FUNCTION__.plainFunction = #cir.const_array<"void plainFunction(void)\00" : !cir.array<!s8i x 25>>
+// CIR: cir.global "private" constant cir_private dso_local @__func__.externFunction = #cir.const_array<"externFunction\00" : !cir.array<!s8i x 15>>
+// CIR: cir.global "private" constant cir_private dso_local @__PRETTY_FUNCTION__.externFunction = #cir.const_array<"void externFunction(void)\00" : !cir.array<!s8i x 26>>
+// CIR: cir.global "private" constant cir_private dso_local @__func__.privateExternFunction = #cir.const_array<"privateExternFunction\00" : !cir.array<!s8i x 22>>
+// CIR: cir.global "private" constant cir_private dso_local @__PRETTY_FUNCTION__.privateExternFunction = #cir.const_array<"void privateExternFunction(void)\00" : !cir.array<!s8i x 33>>
+// CIR: cir.global "private" constant cir_private dso_local @__func__.staticFunction = #cir.const_array<"staticFunction\00" : !cir.array<!s8i x 15>>
+// CIR: cir.global "private" constant cir_private dso_local @__PRETTY_FUNCTION__.staticFunction = #cir.const_array<"void staticFunction(void)\00" : !cir.array<!s8i x 26>>
+
+// TODO(cir): These should be unnamed_addr
+// LLVM: @__func__.plainFunction = private constant [14 x i8] c"plainFunction\00"
+// LLVM: @__PRETTY_FUNCTION__.plainFunction = private constant [25 x i8] c"void plainFunction(void)\00"
+// LLVM: @__func__.externFunction = private constant [15 x i8] c"externFunction\00"
+// LLVM: @__PRETTY_FUNCTION__.externFunction = private constant [26 x i8] c"void externFunction(void)\00"
+// LLVM: @__func__.privateExternFunction = private constant [22 x i8] c"privateExternFunction\00"
+// LLVM: @__PRETTY_FUNCTION__.privateExternFunction = private constant [33 x i8] c"void privateExternFunction(void)\00"
+// LLVM: @__func__.staticFunction = private constant [15 x i8] c"staticFunction\00"
+// LLVM: @__PRETTY_FUNCTION__.staticFunction = private constant [26 x i8] c"void staticFunction(void)\00"
+
+// OGCG: @__func__.plainFunction = private unnamed_addr constant [14 x i8] c"plainFunction\00"
+// OGCG: @__PRETTY_FUNCTION__.plainFunction = private unnamed_addr constant [25 x i8] c"void plainFunction(void)\00"
+// OGCG: @__func__.externFunction = private unnamed_addr constant [15 x i8] c"externFunction\00"
+// OGCG: @__PRETTY_FUNCTION__.externFunction = private unnamed_addr constant [26 x i8] c"void externFunction(void)\00"
+// OGCG: @__func__.privateExternFunction = private unnamed_addr constant [22 x i8] c"privateExternFunction\00"
+// OGCG: @__PRETTY_FUNCTION__.privateExternFunction = private unnamed_addr constant [33 x i8] c"void privateExternFunction(void)\00"
+// OGCG: @__func__.staticFunction = private unnamed_addr constant [15 x i8] c"staticFunction\00"
+// OGCG: @__PRETTY_FUNCTION__.staticFunction = private unnamed_addr constant [26 x i8] c"void staticFunction(void)\00"
+
+int printf(const char *, ...);
+
+void plainFunction(void) {
+ printf("__func__ %s\n", __func__);
+ printf("__FUNCTION__ %s\n", __FUNCTION__);
+ printf("__PRETTY_FUNCTION__ %s\n\n", __PRETTY_FUNCTION__);
+}
+
+extern void externFunction(void) {
+ printf("__func__ %s\n", __func__);
+ printf("__FUNCTION__ %s\n", __FUNCTION__);
+ printf("__PRETTY_FUNCTION__ %s\n\n", __PRETTY_FUNCTION__);
+}
+
+__private_extern__ void privateExternFunction(void) {
+ printf("__func__ %s\n", __func__);
+ printf("__FUNCTION__ %s\n", __FUNCTION__);
+ printf("__PRETTY_FUNCTION__ %s\n\n", __PRETTY_FUNCTION__);
+}
+
+// TODO(cir): Add support for __captured_stmt
+
+static void staticFunction(void) {
+ printf("__func__ %s\n", __func__);
+ printf("__FUNCTION__ %s\n", __FUNCTION__);
+ printf("__PRETTY_FUNCTION__ %s\n\n", __PRETTY_FUNCTION__);
+}
+
+int main(void) {
+ plainFunction();
+ externFunction();
+ privateExternFunction();
+ staticFunction();
+
+ return 0;
+}
diff --git a/clang/test/CIR/IR/func.cir b/clang/test/CIR/IR/func.cir
index 9532859..d7e8184 100644
--- a/clang/test/CIR/IR/func.cir
+++ b/clang/test/CIR/IR/func.cir
@@ -99,4 +99,15 @@ cir.func @ullfunc() -> !u64i {
// CHECK: %[[VAL:.*]] = cir.const #cir.int<42> : !u64i
// CHECK: cir.return %[[VAL:.*]] : !u64i
// CHECK: }
+
+cir.func coroutine @coro() {
+ cir.return
+}
+// CHECK: cir.func{{.*}} coroutine @coro()
+
+cir.func builtin @builtin() {
+ cir.return
+}
+// CHECK: cir.func{{.*}} builtin @builtin()
+
}
diff --git a/clang/test/CodeGenHLSL/resources/AppendStructuredBuffer-elementtype.hlsl b/clang/test/CodeGenHLSL/resources/AppendStructuredBuffer-elementtype.hlsl
deleted file mode 100644
index 094006f..0000000
--- a/clang/test/CodeGenHLSL/resources/AppendStructuredBuffer-elementtype.hlsl
+++ /dev/null
@@ -1,54 +0,0 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=DXIL
-// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=SPV
-
-struct MyStruct {
- float4 a;
- int2 b;
-};
-
-// DXIL: %"class.hlsl::AppendStructuredBuffer" = type { target("dx.RawBuffer", i16, 1, 0)
-// DXIL: %"class.hlsl::AppendStructuredBuffer.0" = type { target("dx.RawBuffer", i16, 1, 0)
-// DXIL: %"class.hlsl::AppendStructuredBuffer.1" = type { target("dx.RawBuffer", i32, 1, 0)
-// DXIL: %"class.hlsl::AppendStructuredBuffer.2" = type { target("dx.RawBuffer", i32, 1, 0)
-// DXIL: %"class.hlsl::AppendStructuredBuffer.3" = type { target("dx.RawBuffer", i64, 1, 0)
-// DXIL: %"class.hlsl::AppendStructuredBuffer.4" = type { target("dx.RawBuffer", i64, 1, 0)
-// DXIL: %"class.hlsl::AppendStructuredBuffer.5" = type { target("dx.RawBuffer", half, 1, 0)
-// DXIL: %"class.hlsl::AppendStructuredBuffer.6" = type { target("dx.RawBuffer", float, 1, 0)
-// DXIL: %"class.hlsl::AppendStructuredBuffer.7" = type { target("dx.RawBuffer", double, 1, 0)
-// DXIL: %"class.hlsl::AppendStructuredBuffer.8" = type { target("dx.RawBuffer", <4 x i16>, 1, 0)
-// DXIL: %"class.hlsl::AppendStructuredBuffer.9" = type { target("dx.RawBuffer", <3 x i32>, 1, 0)
-// DXIL: %"class.hlsl::AppendStructuredBuffer.10" = type { target("dx.RawBuffer", <2 x half>, 1, 0)
-// DXIL: %"class.hlsl::AppendStructuredBuffer.11" = type { target("dx.RawBuffer", <3 x float>, 1, 0)
-// DXIL: %"class.hlsl::AppendStructuredBuffer.12" = type { target("dx.RawBuffer", %struct.MyStruct, 1, 0)
-// DXIL: %struct.MyStruct = type <{ <4 x float>, <2 x i32> }>
-// DXIL: %"class.hlsl::AppendStructuredBuffer.13" = type { target("dx.RawBuffer", i32, 1, 0)
-// SPV: %"class.hlsl::AppendStructuredBuffer.13" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1)
-// DXIL: %"class.hlsl::AppendStructuredBuffer.14" = type { target("dx.RawBuffer", <4 x i32>, 1, 0)
-// SPV: %"class.hlsl::AppendStructuredBuffer.14" = type { target("spirv.VulkanBuffer", [0 x <4 x i32>], 12, 1)
-
-AppendStructuredBuffer<int16_t> BufI16;
-AppendStructuredBuffer<uint16_t> BufU16;
-AppendStructuredBuffer<int> BufI32;
-AppendStructuredBuffer<uint> BufU32;
-AppendStructuredBuffer<int64_t> BufI64;
-AppendStructuredBuffer<uint64_t> BufU64;
-AppendStructuredBuffer<half> BufF16;
-AppendStructuredBuffer<float> BufF32;
-AppendStructuredBuffer<double> BufF64;
-AppendStructuredBuffer< vector<int16_t, 4> > BufI16x4;
-AppendStructuredBuffer< vector<uint, 3> > BufU32x3;
-AppendStructuredBuffer<half2> BufF16x2;
-AppendStructuredBuffer<float3> BufF32x3;
-// TODO: AppendStructuredBuffer<snorm half> BufSNormF16;
-// TODO: AppendStructuredBuffer<unorm half> BufUNormF16;
-// TODO: AppendStructuredBuffer<snorm float> BufSNormF32;
-// TODO: AppendStructuredBuffer<unorm float> BufUNormF32;
-// TODO: AppendStructuredBuffer<snorm double> BufSNormF64;
-// TODO: AppendStructuredBuffer<unorm double> BufUNormF64;
-AppendStructuredBuffer<MyStruct> BufMyStruct;
-AppendStructuredBuffer<bool> BufBool;
-AppendStructuredBuffer<bool4> BufBoolVec;
-
-[numthreads(1,1,1)]
-void main(int GI : SV_GroupIndex) {
-}
diff --git a/clang/test/CodeGenHLSL/resources/ConsumeStructuredBuffer-elementtype.hlsl b/clang/test/CodeGenHLSL/resources/ConsumeStructuredBuffer-elementtype.hlsl
deleted file mode 100644
index 632fd91..0000000
--- a/clang/test/CodeGenHLSL/resources/ConsumeStructuredBuffer-elementtype.hlsl
+++ /dev/null
@@ -1,54 +0,0 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=DXIL
-// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=SPV
-
-struct MyStruct {
- float4 a;
- int2 b;
-};
-
-// DXIL: %"class.hlsl::ConsumeStructuredBuffer" = type { target("dx.RawBuffer", i16, 1, 0)
-// DXIL: %"class.hlsl::ConsumeStructuredBuffer.0" = type { target("dx.RawBuffer", i16, 1, 0)
-// DXIL: %"class.hlsl::ConsumeStructuredBuffer.1" = type { target("dx.RawBuffer", i32, 1, 0)
-// DXIL: %"class.hlsl::ConsumeStructuredBuffer.2" = type { target("dx.RawBuffer", i32, 1, 0)
-// DXIL: %"class.hlsl::ConsumeStructuredBuffer.3" = type { target("dx.RawBuffer", i64, 1, 0)
-// DXIL: %"class.hlsl::ConsumeStructuredBuffer.4" = type { target("dx.RawBuffer", i64, 1, 0)
-// DXIL: %"class.hlsl::ConsumeStructuredBuffer.5" = type { target("dx.RawBuffer", half, 1, 0)
-// DXIL: %"class.hlsl::ConsumeStructuredBuffer.6" = type { target("dx.RawBuffer", float, 1, 0)
-// DXIL: %"class.hlsl::ConsumeStructuredBuffer.7" = type { target("dx.RawBuffer", double, 1, 0)
-// DXIL: %"class.hlsl::ConsumeStructuredBuffer.8" = type { target("dx.RawBuffer", <4 x i16>, 1, 0)
-// DXIL: %"class.hlsl::ConsumeStructuredBuffer.9" = type { target("dx.RawBuffer", <3 x i32>, 1, 0)
-// DXIL: %"class.hlsl::ConsumeStructuredBuffer.10" = type { target("dx.RawBuffer", <2 x half>, 1, 0)
-// DXIL: %"class.hlsl::ConsumeStructuredBuffer.11" = type { target("dx.RawBuffer", <3 x float>, 1, 0)
-// DXIL: %"class.hlsl::ConsumeStructuredBuffer.12" = type { target("dx.RawBuffer", %struct.MyStruct, 1, 0)
-// DXIL: %struct.MyStruct = type <{ <4 x float>, <2 x i32> }>
-// DXIL: %"class.hlsl::ConsumeStructuredBuffer.13" = type { target("dx.RawBuffer", i32, 1, 0)
-// SPV: %"class.hlsl::ConsumeStructuredBuffer.13" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1)
-// DXIL: %"class.hlsl::ConsumeStructuredBuffer.14" = type { target("dx.RawBuffer", <4 x i32>, 1, 0)
-// SPV: %"class.hlsl::ConsumeStructuredBuffer.14" = type { target("spirv.VulkanBuffer", [0 x <4 x i32>], 12, 1)
-
-ConsumeStructuredBuffer<int16_t> BufI16;
-ConsumeStructuredBuffer<uint16_t> BufU16;
-ConsumeStructuredBuffer<int> BufI32;
-ConsumeStructuredBuffer<uint> BufU32;
-ConsumeStructuredBuffer<int64_t> BufI64;
-ConsumeStructuredBuffer<uint64_t> BufU64;
-ConsumeStructuredBuffer<half> BufF16;
-ConsumeStructuredBuffer<float> BufF32;
-ConsumeStructuredBuffer<double> BufF64;
-ConsumeStructuredBuffer< vector<int16_t, 4> > BufI16x4;
-ConsumeStructuredBuffer< vector<uint, 3> > BufU32x3;
-ConsumeStructuredBuffer<half2> BufF16x2;
-ConsumeStructuredBuffer<float3> BufF32x3;
-// TODO: ConsumeStructuredBuffer<snorm half> BufSNormF16;
-// TODO: ConsumeStructuredBuffer<unorm half> BufUNormF16;
-// TODO: ConsumeStructuredBuffer<snorm float> BufSNormF32;
-// TODO: ConsumeStructuredBuffer<unorm float> BufUNormF32;
-// TODO: ConsumeStructuredBuffer<snorm double> BufSNormF64;
-// TODO: ConsumeStructuredBuffer<unorm double> BufUNormF64;
-ConsumeStructuredBuffer<MyStruct> BufMyStruct;
-ConsumeStructuredBuffer<bool> BufBool;
-ConsumeStructuredBuffer<bool4> BufBoolVec;
-
-[numthreads(1,1,1)]
-void main(int GI : SV_GroupIndex) {
-}
diff --git a/clang/test/CodeGenHLSL/resources/RWStructuredBuffer-elementtype.hlsl b/clang/test/CodeGenHLSL/resources/RWStructuredBuffer-elementtype.hlsl
deleted file mode 100644
index 9f0a5b7..0000000
--- a/clang/test/CodeGenHLSL/resources/RWStructuredBuffer-elementtype.hlsl
+++ /dev/null
@@ -1,74 +0,0 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK
-// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=SPV
-
-// CHECK: %"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", i16, 1, 0), target("dx.RawBuffer", i16, 1, 0) }
-// SPV: %"class.hlsl::RWStructuredBuffer" = type { target("spirv.VulkanBuffer", [0 x i16], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
-// CHECK: %"class.hlsl::RWStructuredBuffer.0" = type { target("dx.RawBuffer", i16, 1, 0), target("dx.RawBuffer", i16, 1, 0) }
-// SPV: %"class.hlsl::RWStructuredBuffer.0" = type { target("spirv.VulkanBuffer", [0 x i16], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
-// CHECK: %"class.hlsl::RWStructuredBuffer.1" = type { target("dx.RawBuffer", i32, 1, 0), target("dx.RawBuffer", i32, 1, 0) }
-// SPV: %"class.hlsl::RWStructuredBuffer.1" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
-// CHECK: %"class.hlsl::RWStructuredBuffer.2" = type { target("dx.RawBuffer", i32, 1, 0), target("dx.RawBuffer", i32, 1, 0) }
-// SPV: %"class.hlsl::RWStructuredBuffer.2" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
-// CHECK: %"class.hlsl::RWStructuredBuffer.3" = type { target("dx.RawBuffer", i64, 1, 0), target("dx.RawBuffer", i64, 1, 0) }
-// SPV: %"class.hlsl::RWStructuredBuffer.3" = type { target("spirv.VulkanBuffer", [0 x i64], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
-// CHECK: %"class.hlsl::RWStructuredBuffer.4" = type { target("dx.RawBuffer", i64, 1, 0), target("dx.RawBuffer", i64, 1, 0) }
-// SPV: %"class.hlsl::RWStructuredBuffer.4" = type { target("spirv.VulkanBuffer", [0 x i64], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
-// CHECK: %"class.hlsl::RWStructuredBuffer.5" = type { target("dx.RawBuffer", half, 1, 0), target("dx.RawBuffer", half, 1, 0) }
-// SPV: %"class.hlsl::RWStructuredBuffer.5" = type { target("spirv.VulkanBuffer", [0 x half], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
-// CHECK: %"class.hlsl::RWStructuredBuffer.6" = type { target("dx.RawBuffer", float, 1, 0), target("dx.RawBuffer", float, 1, 0) }
-// SPV: %"class.hlsl::RWStructuredBuffer.6" = type { target("spirv.VulkanBuffer", [0 x float], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
-// CHECK: %"class.hlsl::RWStructuredBuffer.7" = type { target("dx.RawBuffer", double, 1, 0), target("dx.RawBuffer", double, 1, 0) }
-// SPV: %"class.hlsl::RWStructuredBuffer.7" = type { target("spirv.VulkanBuffer", [0 x double], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
-// CHECK: %"class.hlsl::RWStructuredBuffer.8" = type { target("dx.RawBuffer", <4 x i16>, 1, 0), target("dx.RawBuffer", <4 x i16>, 1, 0) }
-// SPV: %"class.hlsl::RWStructuredBuffer.8" = type { target("spirv.VulkanBuffer", [0 x <4 x i16>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
-// CHECK: %"class.hlsl::RWStructuredBuffer.9" = type { target("dx.RawBuffer", <3 x i32>, 1, 0), target("dx.RawBuffer", <3 x i32>, 1, 0) }
-// SPV: %"class.hlsl::RWStructuredBuffer.9" = type { target("spirv.VulkanBuffer", [0 x <3 x i32>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
-// CHECK: %"class.hlsl::RWStructuredBuffer.10" = type { target("dx.RawBuffer", <2 x half>, 1, 0), target("dx.RawBuffer", <2 x half>, 1, 0) }
-// SPV: %"class.hlsl::RWStructuredBuffer.10" = type { target("spirv.VulkanBuffer", [0 x <2 x half>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
-// CHECK: %"class.hlsl::RWStructuredBuffer.11" = type { target("dx.RawBuffer", <3 x float>, 1, 0), target("dx.RawBuffer", <3 x float>, 1, 0) }
-// SPV: %"class.hlsl::RWStructuredBuffer.11" = type { target("spirv.VulkanBuffer", [0 x <3 x float>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
-// CHECK: %"class.hlsl::RWStructuredBuffer.12" = type { target("dx.RawBuffer", i32, 1, 0), target("dx.RawBuffer", i32, 1, 0) }
-// SPV: %"class.hlsl::RWStructuredBuffer.12" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
-// CHECK: %"class.hlsl::RWStructuredBuffer.13" = type { target("dx.RawBuffer", <4 x i32>, 1, 0), target("dx.RawBuffer", <4 x i32>, 1, 0) }
-// SPV: %"class.hlsl::RWStructuredBuffer.13" = type { target("spirv.VulkanBuffer", [0 x <4 x i32>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
-
-RWStructuredBuffer<int16_t> BufI16;
-RWStructuredBuffer<uint16_t> BufU16;
-RWStructuredBuffer<int> BufI32;
-RWStructuredBuffer<uint> BufU32;
-RWStructuredBuffer<int64_t> BufI64;
-RWStructuredBuffer<uint64_t> BufU64;
-RWStructuredBuffer<half> BufF16;
-RWStructuredBuffer<float> BufF32;
-RWStructuredBuffer<double> BufF64;
-RWStructuredBuffer< vector<int16_t, 4> > BufI16x4;
-RWStructuredBuffer< vector<uint, 3> > BufU32x3;
-RWStructuredBuffer<half2> BufF16x2;
-RWStructuredBuffer<float3> BufF32x3;
-RWStructuredBuffer<bool> BufBool;
-RWStructuredBuffer<bool4> BufBoolVec;
-// TODO: RWStructuredBuffer<snorm half> BufSNormF16;
-// TODO: RWStructuredBuffer<unorm half> BufUNormF16;
-// TODO: RWStructuredBuffer<snorm float> BufSNormF32;
-// TODO: RWStructuredBuffer<unorm float> BufUNormF32;
-// TODO: RWStructuredBuffer<snorm double> BufSNormF64;
-// TODO: RWStructuredBuffer<unorm double> BufUNormF64;
-
-[numthreads(1,1,1)]
-void main(int GI : SV_GroupIndex) {
- BufI16[GI] = 0;
- BufU16[GI] = 0;
- BufI32[GI] = 0;
- BufU32[GI] = 0;
- BufI64[GI] = 0;
- BufU64[GI] = 0;
- BufF16[GI] = 0;
- BufF32[GI] = 0;
- BufF64[GI] = 0;
- BufI16x4[GI] = 0;
- BufU32x3[GI] = 0;
- BufF16x2[GI] = 0;
- BufF32x3[GI] = 0;
- BufBool[GI] = false;
- BufBool[GI] = false;
-}
diff --git a/clang/test/CodeGenHLSL/resources/StructuredBuffer-elementtype.hlsl b/clang/test/CodeGenHLSL/resources/StructuredBuffer-elementtype.hlsl
deleted file mode 100644
index 00216df..0000000
--- a/clang/test/CodeGenHLSL/resources/StructuredBuffer-elementtype.hlsl
+++ /dev/null
@@ -1,61 +0,0 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK
-// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-compute -finclude-default-header -fnative-half-type -emit-llvm -o - %s | FileCheck %s -check-prefixes=SPV
-
-// CHECK: %"class.hlsl::StructuredBuffer" = type { target("dx.RawBuffer", i16, 0, 0) }
-// CHECK: %"class.hlsl::StructuredBuffer.0" = type { target("dx.RawBuffer", i16, 0, 0) }
-// CHECK: %"class.hlsl::StructuredBuffer.1" = type { target("dx.RawBuffer", i32, 0, 0) }
-// CHECK: %"class.hlsl::StructuredBuffer.2" = type { target("dx.RawBuffer", i32, 0, 0) }
-// CHECK: %"class.hlsl::StructuredBuffer.3" = type { target("dx.RawBuffer", i64, 0, 0) }
-// CHECK: %"class.hlsl::StructuredBuffer.4" = type { target("dx.RawBuffer", i64, 0, 0) }
-// CHECK: %"class.hlsl::StructuredBuffer.5" = type { target("dx.RawBuffer", half, 0, 0) }
-// CHECK: %"class.hlsl::StructuredBuffer.6" = type { target("dx.RawBuffer", float, 0, 0) }
-// CHECK: %"class.hlsl::StructuredBuffer.7" = type { target("dx.RawBuffer", double, 0, 0) }
-// CHECK: %"class.hlsl::StructuredBuffer.8" = type { target("dx.RawBuffer", <4 x i16>, 0, 0) }
-// CHECK: %"class.hlsl::StructuredBuffer.9" = type { target("dx.RawBuffer", <3 x i32>, 0, 0) }
-// CHECK: %"class.hlsl::StructuredBuffer.10" = type { target("dx.RawBuffer", <2 x half>, 0, 0) }
-// CHECK: %"class.hlsl::StructuredBuffer.11" = type { target("dx.RawBuffer", <3 x float>, 0, 0) }
-// CHECK: %"class.hlsl::StructuredBuffer.12" = type { target("dx.RawBuffer", i32, 0, 0) }
-// SPV: %"class.hlsl::StructuredBuffer.12" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 0)
-// CHECK: %"class.hlsl::StructuredBuffer.13" = type { target("dx.RawBuffer", <4 x i32>, 0, 0) }
-// SPV: %"class.hlsl::StructuredBuffer.13" = type { target("spirv.VulkanBuffer", [0 x <4 x i32>], 12, 0)
-
-StructuredBuffer<int16_t> BufI16;
-StructuredBuffer<uint16_t> BufU16;
-StructuredBuffer<int> BufI32;
-StructuredBuffer<uint> BufU32;
-StructuredBuffer<int64_t> BufI64;
-StructuredBuffer<uint64_t> BufU64;
-StructuredBuffer<half> BufF16;
-StructuredBuffer<float> BufF32;
-StructuredBuffer<double> BufF64;
-StructuredBuffer< vector<int16_t, 4> > BufI16x4;
-StructuredBuffer< vector<uint, 3> > BufU32x3;
-StructuredBuffer<half2> BufF16x2;
-StructuredBuffer<float3> BufF32x3;
-StructuredBuffer<bool> BufBool;
-StructuredBuffer<bool4> BufBoolVec;
-// TODO: StructuredBuffer<snorm half> BufSNormF16;
-// TODO: StructuredBuffer<unorm half> BufUNormF16;
-// TODO: StructuredBuffer<snorm float> BufSNormF32;
-// TODO: StructuredBuffer<unorm float> BufUNormF32;
-// TODO: StructuredBuffer<snorm double> BufSNormF64;
-// TODO: StructuredBuffer<unorm double> BufUNormF64;
-
-[numthreads(1,1,1)]
-void main(int GI : SV_GroupIndex) {
- int16_t v1 = BufI16[GI];
- uint16_t v2 = BufU16[GI];
- int v3 = BufI32[GI];
- uint v4 = BufU32[GI];
- int64_t v5 = BufI64[GI];
- uint64_t v6 = BufU64[GI];
- half v7 = BufF16[GI];
- float v8 = BufF32[GI];
- double v9 = BufF64[GI];
- vector<int16_t,4> v10 = BufI16x4[GI];
- vector<int, 3> v11 = BufU32x3[GI];
- half2 v12 = BufF16x2[GI];
- float3 v13 = BufF32x3[GI];
- bool v14 = BufBool[GI];
- bool4 v15 = BufBoolVec[GI];
-}
diff --git a/clang/test/CodeGenHLSL/resources/StructuredBuffers-elementtype.hlsl b/clang/test/CodeGenHLSL/resources/StructuredBuffers-elementtype.hlsl
new file mode 100644
index 0000000..2b286bd
--- /dev/null
+++ b/clang/test/CodeGenHLSL/resources/StructuredBuffers-elementtype.hlsl
@@ -0,0 +1,113 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type \
+// RUN: -emit-llvm -o - -DRESOURCE=StructuredBuffer %s | FileCheck %s -DRESOURCE=StructuredBuffer -check-prefixes=DXIL-RO
+
+// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-compute -finclude-default-header -fnative-half-type \
+// RUN: -emit-llvm -o - -DRESOURCE=StructuredBuffer %s | FileCheck %s -DRESOURCE=StructuredBuffer -check-prefixes=SPV-RO
+
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type \
+// RUN: -emit-llvm -o - -DRESOURCE=RWStructuredBuffer %s | FileCheck %s -DRESOURCE=RWStructuredBuffer -check-prefixes=DXIL-RW
+
+// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-compute -finclude-default-header -fnative-half-type \
+// RUN: -emit-llvm -o - -DRESOURCE=RWStructuredBuffer %s | FileCheck %s -DRESOURCE=RWStructuredBuffer -check-prefixes=SPV-RW
+
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type \
+// RUN: -emit-llvm -o - -DRESOURCE=AppendStructuredBuffer %s | FileCheck %s -DRESOURCE=AppendStructuredBuffer -check-prefixes=DXIL-RW
+
+// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-compute -finclude-default-header -fnative-half-type \
+// RUN: -emit-llvm -o - -DRESOURCE=AppendStructuredBuffer %s | FileCheck %s -DRESOURCE=AppendStructuredBuffer -check-prefixes=SPV-RW
+
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-compute -finclude-default-header -fnative-half-type \
+// RUN: -emit-llvm -o - -DRESOURCE=ConsumeStructuredBuffer %s | FileCheck %s -DRESOURCE=ConsumeStructuredBuffer -check-prefixes=DXIL-RW
+
+// RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-compute -finclude-default-header -fnative-half-type \
+// RUN: -emit-llvm -o - -DRESOURCE=ConsumeStructuredBuffer %s | FileCheck %s -DRESOURCE=ConsumeStructuredBuffer -check-prefixes=SPV-RW
+
+// DXIL-RO: %"class.hlsl::[[RESOURCE]]" = type { target("dx.RawBuffer", i16, 0, 0) }
+// DXIL-RO: %"class.hlsl::[[RESOURCE]].0" = type { target("dx.RawBuffer", i16, 0, 0) }
+// DXIL-RO: %"class.hlsl::[[RESOURCE]].1" = type { target("dx.RawBuffer", i32, 0, 0) }
+// DXIL-RO: %"class.hlsl::[[RESOURCE]].2" = type { target("dx.RawBuffer", i32, 0, 0) }
+// DXIL-RO: %"class.hlsl::[[RESOURCE]].3" = type { target("dx.RawBuffer", i64, 0, 0) }
+// DXIL-RO: %"class.hlsl::[[RESOURCE]].4" = type { target("dx.RawBuffer", i64, 0, 0) }
+// DXIL-RO: %"class.hlsl::[[RESOURCE]].5" = type { target("dx.RawBuffer", half, 0, 0) }
+// DXIL-RO: %"class.hlsl::[[RESOURCE]].6" = type { target("dx.RawBuffer", float, 0, 0) }
+// DXIL-RO: %"class.hlsl::[[RESOURCE]].7" = type { target("dx.RawBuffer", double, 0, 0) }
+// DXIL-RO: %"class.hlsl::[[RESOURCE]].8" = type { target("dx.RawBuffer", <4 x i16>, 0, 0) }
+// DXIL-RO: %"class.hlsl::[[RESOURCE]].9" = type { target("dx.RawBuffer", <3 x i32>, 0, 0) }
+// DXIL-RO: %"class.hlsl::[[RESOURCE]].10" = type { target("dx.RawBuffer", <2 x half>, 0, 0) }
+// DXIL-RO: %"class.hlsl::[[RESOURCE]].11" = type { target("dx.RawBuffer", <3 x float>, 0, 0) }
+// DXIL-RO: %"class.hlsl::[[RESOURCE]].12" = type { target("dx.RawBuffer", i32, 0, 0) }
+// DXIL-RO: %"class.hlsl::[[RESOURCE]].13" = type { target("dx.RawBuffer", <4 x i32>, 0, 0) }
+
+// DXIL-RW: %"class.hlsl::[[RESOURCE]]" = type { target("dx.RawBuffer", i16, 1, 0), target("dx.RawBuffer", i16, 1, 0) }
+// DXIL-RW: %"class.hlsl::[[RESOURCE]].0" = type { target("dx.RawBuffer", i16, 1, 0), target("dx.RawBuffer", i16, 1, 0) }
+// DXIL-RW: %"class.hlsl::[[RESOURCE]].1" = type { target("dx.RawBuffer", i32, 1, 0), target("dx.RawBuffer", i32, 1, 0) }
+// DXIL-RW: %"class.hlsl::[[RESOURCE]].2" = type { target("dx.RawBuffer", i32, 1, 0), target("dx.RawBuffer", i32, 1, 0) }
+// DXIL-RW: %"class.hlsl::[[RESOURCE]].3" = type { target("dx.RawBuffer", i64, 1, 0), target("dx.RawBuffer", i64, 1, 0) }
+// DXIL-RW: %"class.hlsl::[[RESOURCE]].4" = type { target("dx.RawBuffer", i64, 1, 0), target("dx.RawBuffer", i64, 1, 0) }
+// DXIL-RW: %"class.hlsl::[[RESOURCE]].5" = type { target("dx.RawBuffer", half, 1, 0), target("dx.RawBuffer", half, 1, 0) }
+// DXIL-RW: %"class.hlsl::[[RESOURCE]].6" = type { target("dx.RawBuffer", float, 1, 0), target("dx.RawBuffer", float, 1, 0) }
+// DXIL-RW: %"class.hlsl::[[RESOURCE]].7" = type { target("dx.RawBuffer", double, 1, 0), target("dx.RawBuffer", double, 1, 0) }
+// DXIL-RW: %"class.hlsl::[[RESOURCE]].8" = type { target("dx.RawBuffer", <4 x i16>, 1, 0), target("dx.RawBuffer", <4 x i16>, 1, 0) }
+// DXIL-RW: %"class.hlsl::[[RESOURCE]].9" = type { target("dx.RawBuffer", <3 x i32>, 1, 0), target("dx.RawBuffer", <3 x i32>, 1, 0) }
+// DXIL-RW: %"class.hlsl::[[RESOURCE]].10" = type { target("dx.RawBuffer", <2 x half>, 1, 0), target("dx.RawBuffer", <2 x half>, 1, 0) }
+// DXIL-RW: %"class.hlsl::[[RESOURCE]].11" = type { target("dx.RawBuffer", <3 x float>, 1, 0), target("dx.RawBuffer", <3 x float>, 1, 0) }
+// DXIL-RW: %"class.hlsl::[[RESOURCE]].12" = type { target("dx.RawBuffer", i32, 1, 0), target("dx.RawBuffer", i32, 1, 0) }
+// DXIL-RW: %"class.hlsl::[[RESOURCE]].13" = type { target("dx.RawBuffer", <4 x i32>, 1, 0), target("dx.RawBuffer", <4 x i32>, 1, 0) }
+
+// SPV-RO: %"class.hlsl::[[RESOURCE]]" = type { target("spirv.VulkanBuffer", [0 x i16], 12, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].0" = type { target("spirv.VulkanBuffer", [0 x i16], 12, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].1" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].2" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].3" = type { target("spirv.VulkanBuffer", [0 x i64], 12, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].4" = type { target("spirv.VulkanBuffer", [0 x i64], 12, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].5" = type { target("spirv.VulkanBuffer", [0 x half], 12, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].6" = type { target("spirv.VulkanBuffer", [0 x float], 12, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].7" = type { target("spirv.VulkanBuffer", [0 x double], 12, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].8" = type { target("spirv.VulkanBuffer", [0 x <4 x i16>], 12, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].9" = type { target("spirv.VulkanBuffer", [0 x <3 x i32>], 12, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].10" = type { target("spirv.VulkanBuffer", [0 x <2 x half>], 12, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].11" = type { target("spirv.VulkanBuffer", [0 x <3 x float>], 12, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].12" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 0) }
+// SPV-RO: %"class.hlsl::[[RESOURCE]].13" = type { target("spirv.VulkanBuffer", [0 x <4 x i32>], 12, 0) }
+
+// SPV-RW: %"class.hlsl::[[RESOURCE]]" = type { target("spirv.VulkanBuffer", [0 x i16], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].0" = type { target("spirv.VulkanBuffer", [0 x i16], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].1" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].2" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].3" = type { target("spirv.VulkanBuffer", [0 x i64], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].4" = type { target("spirv.VulkanBuffer", [0 x i64], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].5" = type { target("spirv.VulkanBuffer", [0 x half], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].6" = type { target("spirv.VulkanBuffer", [0 x float], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].7" = type { target("spirv.VulkanBuffer", [0 x double], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].8" = type { target("spirv.VulkanBuffer", [0 x <4 x i16>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].9" = type { target("spirv.VulkanBuffer", [0 x <3 x i32>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].10" = type { target("spirv.VulkanBuffer", [0 x <2 x half>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].11" = type { target("spirv.VulkanBuffer", [0 x <3 x float>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].12" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+// SPV-RW: %"class.hlsl::[[RESOURCE]].13" = type { target("spirv.VulkanBuffer", [0 x <4 x i32>], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+
+RESOURCE<int16_t> BufI16;
+RESOURCE<uint16_t> BufU16;
+RESOURCE<int> BufI32;
+RESOURCE<uint> BufU32;
+RESOURCE<int64_t> BufI64;
+RESOURCE<uint64_t> BufU64;
+RESOURCE<half> BufF16;
+RESOURCE<float> BufF32;
+RESOURCE<double> BufF64;
+RESOURCE< vector<int16_t, 4> > BufI16x4;
+RESOURCE< vector<uint, 3> > BufU32x3;
+RESOURCE<half2> BufF16x2;
+RESOURCE<float3> BufF32x3;
+RESOURCE<bool> BufBool;
+RESOURCE<bool4> BufBoolVec;
+// TODO: RESOURCE<snorm half> BufSNormF16;
+// TODO: RESOURCE<unorm half> BufUNormF16;
+// TODO: RESOURCE<snorm float> BufSNormF32;
+// TODO: RESOURCE<unorm float> BufUNormF32;
+// TODO: RESOURCE<snorm double> BufSNormF64;
+// TODO: RESOURCE<unorm double> BufUNormF64;
+
+[numthreads(1,1,1)]
+void main() {
+}
diff --git a/clang/test/Driver/dxc_frs.hlsl b/clang/test/Driver/dxc_frs.hlsl
index 767cab6..ffc3886 100644
--- a/clang/test/Driver/dxc_frs.hlsl
+++ b/clang/test/Driver/dxc_frs.hlsl
@@ -1,10 +1,9 @@
-// RUN: %clang_dxc -T cs_6_0 /Fo %t.dxo /Frs %t.rs.dxo -### %s 2>&1 | FileCheck %s
+// RUN: %clang_dxc -Vd -T cs_6_0 /Fo %t.dxo /Frs %t.rs.dxo -### %s 2>&1 | FileCheck %s
// Test to demonstrate extracting the root signature to the specified
// output file with /Frs.
// CHECK: "{{.*}}llvm-objcopy{{(.exe)?}}" "{{.*}}.obj" "{{.*}}.dxo" "--extract-section=RTS0={{.*}}.rs.dxo"
-
[shader("compute"), RootSignature("")]
[numthreads(1,1,1)]
void EmptyEntry() {}
diff --git a/clang/test/Driver/dxc_rootsignature_target.hlsl b/clang/test/Driver/dxc_rootsignature_target.hlsl
index 08cd1ab..bb48063 100644
--- a/clang/test/Driver/dxc_rootsignature_target.hlsl
+++ b/clang/test/Driver/dxc_rootsignature_target.hlsl
@@ -1,4 +1,4 @@
-// RUN: %clang_dxc -E EntryRS -T rootsig_1_1 /Fo %t.dxo -### %s 2>&1 | FileCheck %s --check-prefix=CMDS
+// RUN: %clang_dxc -Vd -E EntryRS -T rootsig_1_1 /Fo %t.dxo -### %s 2>&1 | FileCheck %s --check-prefix=CMDS
// CMDS: "{{.*}}clang{{.*}}" "-cc1"
// CMDS-SAME: "-triple" "dxilv1.1-unknown-shadermodel1.1-rootsignature"
diff --git a/clang/test/Driver/hip-options.hip b/clang/test/Driver/hip-options.hip
index 6206020..09f1ffa 100644
--- a/clang/test/Driver/hip-options.hip
+++ b/clang/test/Driver/hip-options.hip
@@ -254,3 +254,9 @@
// RUN: --offload-arch=gfx1100 --offload-new-driver --offload-jobs=0x4 %s 2>&1 | \
// RUN: FileCheck -check-prefix=INVJOBS %s
// INVJOBS: clang: error: invalid integral value '0x4' in '--offload-jobs=0x4'
+
+// RUN: %clang -### -Werror --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
+// RUN: --offload-arch=gfx1100 --offload-new-driver --offload-jobs=jobserver %s 2>&1 | \
+// RUN: FileCheck -check-prefix=JOBSV %s
+// JOBSV: clang-linker-wrapper{{.*}} "--wrapper-jobs=jobserver"
+
diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c
index c060dae..1c0fb96 100644
--- a/clang/test/Driver/linker-wrapper.c
+++ b/clang/test/Driver/linker-wrapper.c
@@ -114,6 +114,8 @@ __attribute__((visibility("protected"), used)) int x;
// RUN: -fembed-offload-object=%t.out
// RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu --wrapper-jobs=4 \
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA-PAR
+// RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu --wrapper-jobs=jobserver \
+// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA-PAR
// CUDA-PAR: fatbinary{{.*}}-64 --create {{.*}}.fatbin
diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
index 1419b8c..4d5b956 100644
--- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -1295,12 +1295,18 @@ int main(int Argc, char **Argv) {
parallel::strategy = hardware_concurrency(1);
if (auto *Arg = Args.getLastArg(OPT_wrapper_jobs)) {
- unsigned Threads = 0;
- if (!llvm::to_integer(Arg->getValue(), Threads) || Threads == 0)
- reportError(createStringError("%s: expected a positive integer, got '%s'",
- Arg->getSpelling().data(),
- Arg->getValue()));
- parallel::strategy = hardware_concurrency(Threads);
+ StringRef Val = Arg->getValue();
+ if (Val.equals_insensitive("jobserver"))
+ parallel::strategy = jobserver_concurrency();
+ else {
+ unsigned Threads = 0;
+ if (!llvm::to_integer(Val, Threads) || Threads == 0)
+ reportError(createStringError(
+ "%s: expected a positive integer or 'jobserver', got '%s'",
+ Arg->getSpelling().data(), Val.data()));
+ else
+ parallel::strategy = hardware_concurrency(Threads);
+ }
}
if (Args.hasArg(OPT_wrapper_time_trace_eq)) {
diff --git a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
index fa73e02..87f911c 100644
--- a/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
+++ b/clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
@@ -53,7 +53,8 @@ def wrapper_time_trace_granularity : Joined<["--"], "wrapper-time-trace-granular
def wrapper_jobs : Joined<["--"], "wrapper-jobs=">,
Flags<[WrapperOnlyOption]>, MetaVarName<"<number>">,
- HelpText<"Sets the number of parallel jobs to use for device linking">;
+ HelpText<"Sets the number of parallel jobs for device linking. Can be a "
+ "positive integer or 'jobserver'.">;
def override_image : Joined<["--"], "override-image=">,
Flags<[WrapperOnlyOption]>, MetaVarName<"<kind=file>">,
diff --git a/clang/unittests/Format/FormatTestObjC.cpp b/clang/unittests/Format/FormatTestObjC.cpp
index f7f73db..700d7cf8 100644
--- a/clang/unittests/Format/FormatTestObjC.cpp
+++ b/clang/unittests/Format/FormatTestObjC.cpp
@@ -763,6 +763,15 @@ TEST_F(FormatTestObjC, FormatObjCMethodExpr) {
" backing:NSBackingStoreBuffered\n"
" defer:NO]);\n"
"}");
+ Style.ColumnLimit = 63;
+ verifyFormat(
+ "- (void)test {\n"
+ " if ([object\n"
+ " respondsToSelector:@selector(\n"
+ " selectorName:param1:param2:)])\n"
+ " return;\n"
+ "}");
+ Style.ColumnLimit = PreviousColumnLimit;
verifyFormat("[contentsContainer replaceSubview:[subviews objectAtIndex:0]\n"
" with:contentsNativeView];");
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index 4a8f27f..c21b118 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -1929,6 +1929,37 @@ TEST_F(TokenAnnotatorTest, UnderstandsObjCMethodExpr) {
ASSERT_EQ(Tokens.size(), 20u) << Tokens;
EXPECT_TOKEN(Tokens[9], tok::l_square, TT_ObjCMethodExpr);
EXPECT_TOKEN(Tokens[15], tok::greater, TT_BinaryOperator);
+
+ Tokens = annotate("a = @selector(name:);");
+ ASSERT_EQ(Tokens.size(), 10u) << Tokens;
+ EXPECT_TOKEN(Tokens[4], tok::l_paren, TT_ObjCSelector);
+ EXPECT_TOKEN(Tokens[6], tok::colon, TT_ObjCSelector);
+ EXPECT_TOKEN(Tokens[7], tok::r_paren, TT_ObjCSelector);
+
+ Tokens =
+ annotate("[object respondsToSelector:@selector(name:param1:param2:)\n"
+ " respondsToSelector:@selector(name:param1:param2:)];");
+ ASSERT_EQ(Tokens.size(), 29u) << Tokens;
+ EXPECT_TOKEN(Tokens[0], tok::l_square, TT_ObjCMethodExpr);
+ EXPECT_TOKEN(Tokens[3], tok::colon, TT_ObjCMethodExpr);
+ EXPECT_TOKEN(Tokens[6], tok::l_paren, TT_ObjCSelector);
+ EXPECT_TOKEN(Tokens[8], tok::colon, TT_ObjCSelector);
+ EXPECT_TOKEN(Tokens[10], tok::colon, TT_ObjCSelector);
+ EXPECT_TOKEN(Tokens[12], tok::colon, TT_ObjCSelector);
+ EXPECT_TOKEN(Tokens[13], tok::r_paren, TT_ObjCSelector);
+ EXPECT_TOKEN(Tokens[15], tok::colon, TT_ObjCMethodExpr);
+ EXPECT_TOKEN(Tokens[18], tok::l_paren, TT_ObjCSelector);
+ EXPECT_TOKEN(Tokens[20], tok::colon, TT_ObjCSelector);
+ EXPECT_TOKEN(Tokens[22], tok::colon, TT_ObjCSelector);
+ EXPECT_TOKEN(Tokens[24], tok::colon, TT_ObjCSelector);
+ EXPECT_TOKEN(Tokens[25], tok::r_paren, TT_ObjCSelector);
+ EXPECT_TOKEN(Tokens[26], tok::r_square, TT_ObjCMethodExpr);
+
+ Tokens = annotate("[a b:c];");
+ ASSERT_EQ(Tokens.size(), 8u) << Tokens;
+ EXPECT_TOKEN(Tokens[0], tok::l_square, TT_ObjCMethodExpr);
+ EXPECT_TOKEN(Tokens[3], tok::colon, TT_ObjCMethodExpr);
+ EXPECT_TOKEN(Tokens[5], tok::r_square, TT_ObjCMethodExpr);
}
TEST_F(TokenAnnotatorTest, UnderstandsObjCMethodDecl) {
diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index 9095b05..6c226aa 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -819,7 +819,7 @@ set(s390x_SOURCES
set(wasm_SOURCES
wasm/__c_longjmp.S
- wasm/__cpp_exceptions.S
+ wasm/__cpp_exception.S
${GENERIC_TF_SOURCES}
${GENERIC_SOURCES}
)
diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt
index 6548ec9..e8f70bd 100644
--- a/flang-rt/lib/runtime/CMakeLists.txt
+++ b/flang-rt/lib/runtime/CMakeLists.txt
@@ -178,9 +178,6 @@ endif ()
if ("${LLVM_RUNTIMES_TARGET}" MATCHES "^amdgcn|^nvptx")
set(sources ${gpu_sources})
elseif(FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "CUDA")
- # findloc.cpp has some issues with higher compute capability. Remove it
- # from CUDA build until we can lower its memory footprint.
- list(REMOVE_ITEM supported_sources findloc.cpp)
set(sources ${supported_sources})
else ()
set(sources ${supported_sources} ${host_sources} ${f128_sources})
diff --git a/flang-rt/lib/runtime/extrema.cpp b/flang-rt/lib/runtime/extrema.cpp
index 9846529..c4575cc 100644
--- a/flang-rt/lib/runtime/extrema.cpp
+++ b/flang-rt/lib/runtime/extrema.cpp
@@ -397,9 +397,12 @@ template <TypeCategory CAT, bool IS_MAX,
template <typename, bool, bool> class COMPARE>
struct DoPartialMaxOrMinLocHelper {
template <int KIND> struct Functor {
- RT_API_ATTRS void operator()(const char *intrinsic, Descriptor &result,
- const Descriptor &x, int kind, int dim, const Descriptor *mask,
- bool back, Terminator &terminator) const {
+ // NVCC inlines more aggressively which causes too many specializations of
+ // this function to be inlined causing compiler timeouts. Set as
+ // noinline to allow compilation to complete.
+ RT_API_ATTRS RT_DEVICE_NOINLINE void operator()(const char *intrinsic,
+ Descriptor &result, const Descriptor &x, int kind, int dim,
+ const Descriptor *mask, bool back, Terminator &terminator) const {
DoPartialMaxOrMinLoc<CAT, KIND, IS_MAX, COMPARE>(
intrinsic, result, x, kind, dim, mask, back, terminator);
}
diff --git a/flang-rt/lib/runtime/findloc.cpp b/flang-rt/lib/runtime/findloc.cpp
index 5485f4b..b5031ec 100644
--- a/flang-rt/lib/runtime/findloc.cpp
+++ b/flang-rt/lib/runtime/findloc.cpp
@@ -153,10 +153,13 @@ template <TypeCategory CAT,
class HELPER>
struct NumericFindlocHelper {
template <int KIND> struct Functor {
- RT_API_ATTRS void operator()(TypeCategory targetCat, int targetKind,
- Descriptor &result, const Descriptor &x, const Descriptor &target,
- int kind, int dim, const Descriptor *mask, bool back,
- Terminator &terminator) const {
+ // NVCC inlines more aggressively which causes too many specializations of
+ // this function to be inlined causing compiler timeouts. Set as
+ // noinline to allow compilation to complete.
+ RT_API_ATTRS RT_DEVICE_NOINLINE void operator()(TypeCategory targetCat,
+ int targetKind, Descriptor &result, const Descriptor &x,
+ const Descriptor &target, int kind, int dim, const Descriptor *mask,
+ bool back, Terminator &terminator) const {
switch (targetCat) {
case TypeCategory::Integer:
case TypeCategory::Unsigned:
diff --git a/flang/include/flang/Evaluate/characteristics.h b/flang/include/flang/Evaluate/characteristics.h
index b6a9ebe..4cf82e7 100644
--- a/flang/include/flang/Evaluate/characteristics.h
+++ b/flang/include/flang/Evaluate/characteristics.h
@@ -251,7 +251,8 @@ struct DummyDataObject {
std::optional<std::string> *warning = nullptr) const;
static std::optional<DummyDataObject> Characterize(
const semantics::Symbol &, FoldingContext &);
- bool CanBePassedViaImplicitInterface(std::string *whyNot = nullptr) const;
+ bool CanBePassedViaImplicitInterface(
+ std::string *whyNot = nullptr, bool checkCUDA = true) const;
bool IsPassedByDescriptor(bool isBindC) const;
llvm::raw_ostream &Dump(llvm::raw_ostream &) const;
@@ -307,7 +308,8 @@ struct DummyArgument {
void SetOptional(bool = true);
common::Intent GetIntent() const;
void SetIntent(common::Intent);
- bool CanBePassedViaImplicitInterface(std::string *whyNot = nullptr) const;
+ bool CanBePassedViaImplicitInterface(
+ std::string *whyNot = nullptr, bool checkCUDA = true) const;
bool IsTypelessIntrinsicDummy() const;
bool IsCompatibleWith(const DummyArgument &, std::string *whyNot = nullptr,
std::optional<std::string> *warning = nullptr) const;
@@ -402,7 +404,8 @@ struct Procedure {
return !attrs.test(Attr::ImplicitInterface);
}
std::optional<int> FindPassIndex(std::optional<parser::CharBlock>) const;
- bool CanBeCalledViaImplicitInterface(std::string *whyNot = nullptr) const;
+ bool CanBeCalledViaImplicitInterface(
+ std::string *whyNot = nullptr, bool checkCUDA = true) const;
bool CanOverride(const Procedure &, std::optional<int> passIndex) const;
bool IsCompatibleWith(const Procedure &, bool ignoreImplicitVsExplicit,
std::string *whyNot = nullptr, const SpecificIntrinsic * = nullptr,
diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h
index 325ca9b..1443e93 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -1639,6 +1639,7 @@ struct CommonStmt {
BOILERPLATE(CommonStmt);
CommonStmt(std::optional<Name> &&, std::list<CommonBlockObject> &&,
std::list<Block> &&);
+ CharBlock source;
std::list<Block> blocks;
};
diff --git a/flang/include/flang/Semantics/scope.h b/flang/include/flang/Semantics/scope.h
index b404683..3195892 100644
--- a/flang/include/flang/Semantics/scope.h
+++ b/flang/include/flang/Semantics/scope.h
@@ -188,7 +188,7 @@ public:
void add_crayPointer(const SourceName &, Symbol &);
mapType &commonBlocks() { return commonBlocks_; }
const mapType &commonBlocks() const { return commonBlocks_; }
- Symbol &MakeCommonBlock(const SourceName &);
+ Symbol &MakeCommonBlock(SourceName, SourceName location);
Symbol *FindCommonBlock(const SourceName &) const;
/// Make a Symbol but don't add it to the scope.
diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h
index a0d5ae7..975423b 100644
--- a/flang/include/flang/Semantics/symbol.h
+++ b/flang/include/flang/Semantics/symbol.h
@@ -570,17 +570,21 @@ private:
class CommonBlockDetails : public WithBindName {
public:
+ explicit CommonBlockDetails(SourceName location)
+ : sourceLocation_{location} {}
+ SourceName sourceLocation() const { return sourceLocation_; }
MutableSymbolVector &objects() { return objects_; }
const MutableSymbolVector &objects() const { return objects_; }
void add_object(Symbol &object) { objects_.emplace_back(object); }
void replace_object(Symbol &object, unsigned index) {
- CHECK(index < (unsigned)objects_.size());
+ CHECK(index < objects_.size());
objects_[index] = object;
}
std::size_t alignment() const { return alignment_; }
void set_alignment(std::size_t alignment) { alignment_ = alignment; }
private:
+ SourceName sourceLocation_;
MutableSymbolVector objects_;
std::size_t alignment_{0}; // required alignment in bytes
};
diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h
index db73a85..b977fb8 100644
--- a/flang/include/flang/Semantics/tools.h
+++ b/flang/include/flang/Semantics/tools.h
@@ -770,5 +770,7 @@ std::string GetCommonBlockObjectName(const Symbol &, bool underscoring);
// Check for ambiguous USE associations
bool HadUseError(SemanticsContext &, SourceName at, const Symbol *);
+bool AreSameModuleSymbol(const Symbol &, const Symbol &);
+
} // namespace Fortran::semantics
#endif // FORTRAN_SEMANTICS_TOOLS_H_
diff --git a/flang/include/flang/Semantics/type.h b/flang/include/flang/Semantics/type.h
index 5d96f1e..3bd638b 100644
--- a/flang/include/flang/Semantics/type.h
+++ b/flang/include/flang/Semantics/type.h
@@ -285,6 +285,9 @@ public:
bool IsForwardReferenced() const;
bool HasDefaultInitialization(
bool ignoreAllocatable = false, bool ignorePointer = true) const;
+ std::optional<std::string> // component path suitable for error messages
+ ComponentWithDefaultInitialization(
+ bool ignoreAllocatable = false, bool ignorePointer = true) const;
bool HasDestruction() const;
// The "raw" type parameter list is a simple transcription from the
diff --git a/flang/lib/Evaluate/characteristics.cpp b/flang/lib/Evaluate/characteristics.cpp
index 37c62c9..542f122 100644
--- a/flang/lib/Evaluate/characteristics.cpp
+++ b/flang/lib/Evaluate/characteristics.cpp
@@ -458,7 +458,7 @@ std::optional<DummyDataObject> DummyDataObject::Characterize(
}
bool DummyDataObject::CanBePassedViaImplicitInterface(
- std::string *whyNot) const {
+ std::string *whyNot, bool checkCUDA) const {
if ((attrs &
Attrs{Attr::Allocatable, Attr::Asynchronous, Attr::Optional,
Attr::Pointer, Attr::Target, Attr::Value, Attr::Volatile})
@@ -482,7 +482,7 @@ bool DummyDataObject::CanBePassedViaImplicitInterface(
*whyNot = "a dummy argument is polymorphic";
}
return false; // 15.4.2.2(3)(f)
- } else if (cudaDataAttr) {
+ } else if (checkCUDA && cudaDataAttr) {
if (whyNot) {
*whyNot = "a dummy argument has a CUDA data attribute";
}
@@ -1012,9 +1012,10 @@ common::Intent DummyArgument::GetIntent() const {
u);
}
-bool DummyArgument::CanBePassedViaImplicitInterface(std::string *whyNot) const {
+bool DummyArgument::CanBePassedViaImplicitInterface(
+ std::string *whyNot, bool checkCUDA) const {
if (const auto *object{std::get_if<DummyDataObject>(&u)}) {
- return object->CanBePassedViaImplicitInterface(whyNot);
+ return object->CanBePassedViaImplicitInterface(whyNot, checkCUDA);
} else if (const auto *proc{std::get_if<DummyProcedure>(&u)}) {
return proc->CanBePassedViaImplicitInterface(whyNot);
} else {
@@ -1501,7 +1502,8 @@ std::optional<Procedure> Procedure::FromActuals(const ProcedureDesignator &proc,
return callee;
}
-bool Procedure::CanBeCalledViaImplicitInterface(std::string *whyNot) const {
+bool Procedure::CanBeCalledViaImplicitInterface(
+ std::string *whyNot, bool checkCUDA) const {
if (attrs.test(Attr::Elemental)) {
if (whyNot) {
*whyNot = "the procedure is elemental";
@@ -1524,7 +1526,7 @@ bool Procedure::CanBeCalledViaImplicitInterface(std::string *whyNot) const {
return false;
} else {
for (const DummyArgument &arg : dummyArguments) {
- if (!arg.CanBePassedViaImplicitInterface(whyNot)) {
+ if (!arg.CanBePassedViaImplicitInterface(whyNot, checkCUDA)) {
return false;
}
}
diff --git a/flang/lib/Evaluate/tools.cpp b/flang/lib/Evaluate/tools.cpp
index 3cfad03..b927fa3 100644
--- a/flang/lib/Evaluate/tools.cpp
+++ b/flang/lib/Evaluate/tools.cpp
@@ -1209,6 +1209,15 @@ parser::Message *AttachDeclaration(
message.Attach(use->location(),
"'%s' is USE-associated with '%s' in module '%s'"_en_US, symbol.name(),
unhosted->name(), GetUsedModule(*use).name());
+ } else if (const auto *common{
+ unhosted->detailsIf<semantics::CommonBlockDetails>()}) {
+ parser::CharBlock at{unhosted->name()};
+ if (at.empty()) { // blank COMMON, with or without //
+ at = common->sourceLocation();
+ }
+ if (!at.empty()) {
+ message.Attach(at, "Declaration of /%s/"_en_US, unhosted->name());
+ }
} else {
message.Attach(
unhosted->name(), "Declaration of '%s'"_en_US, unhosted->name());
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index f9b9b850..4a9e494 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -2222,6 +2222,9 @@ buildACCLoopOp(Fortran::lower::AbstractConverter &converter,
addOperands(operands, operandSegments, tileOperands);
addOperands(operands, operandSegments, cacheOperands);
addOperands(operands, operandSegments, privateOperands);
+ // fill empty firstprivate operands since they are not permitted
+ // from OpenACC language perspective.
+ addOperands(operands, operandSegments, {});
addOperands(operands, operandSegments, reductionOperands);
auto loopOp = createRegionOp<mlir::acc::LoopOp, mlir::acc::YieldOp>(
diff --git a/flang/lib/Parser/Fortran-parsers.cpp b/flang/lib/Parser/Fortran-parsers.cpp
index fbe629a..d33a18f 100644
--- a/flang/lib/Parser/Fortran-parsers.cpp
+++ b/flang/lib/Parser/Fortran-parsers.cpp
@@ -1100,14 +1100,14 @@ TYPE_PARSER(construct<EquivalenceObject>(indirect(designator)))
// R873 common-stmt ->
// COMMON [/ [common-block-name] /] common-block-object-list
// [[,] / [common-block-name] / common-block-object-list]...
-TYPE_PARSER(
+TYPE_PARSER(sourced(
construct<CommonStmt>("COMMON" >> defaulted("/" >> maybe(name) / "/"),
nonemptyList("expected COMMON block objects"_err_en_US,
Parser<CommonBlockObject>{}),
many(maybe(","_tok) >>
construct<CommonStmt::Block>("/" >> maybe(name) / "/",
nonemptyList("expected COMMON block objects"_err_en_US,
- Parser<CommonBlockObject>{})))))
+ Parser<CommonBlockObject>{}))))))
// R874 common-block-object -> variable-name [( array-spec )]
TYPE_PARSER(construct<CommonBlockObject>(name, maybe(arraySpec)))
diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp
index 4939d8d..81c53aa 100644
--- a/flang/lib/Semantics/check-call.cpp
+++ b/flang/lib/Semantics/check-call.cpp
@@ -56,28 +56,44 @@ static void CheckImplicitInterfaceArg(evaluate::ActualArgument &arg,
"%VAL argument must be a scalar numeric or logical expression"_err_en_US);
}
if (const auto *expr{arg.UnwrapExpr()}) {
- if (const Symbol * base{GetFirstSymbol(*expr)};
- base && IsFunctionResult(*base)) {
- context.NoteDefinedSymbol(*base);
+ if (const Symbol *base{GetFirstSymbol(*expr)}) {
+ const Symbol &symbol{GetAssociationRoot(*base)};
+ if (IsFunctionResult(symbol)) {
+ context.NoteDefinedSymbol(symbol);
+ }
}
if (IsBOZLiteral(*expr)) {
- messages.Say("BOZ argument requires an explicit interface"_err_en_US);
+ messages.Say("BOZ argument %s requires an explicit interface"_err_en_US,
+ expr->AsFortran());
} else if (evaluate::IsNullPointerOrAllocatable(expr)) {
messages.Say(
- "Null pointer argument requires an explicit interface"_err_en_US);
+ "Null pointer argument '%s' requires an explicit interface"_err_en_US,
+ expr->AsFortran());
} else if (auto named{evaluate::ExtractNamedEntity(*expr)}) {
- const Symbol &symbol{named->GetLastSymbol()};
- if (IsAssumedRank(symbol)) {
+ const Symbol &resolved{ResolveAssociations(named->GetLastSymbol())};
+ if (IsAssumedRank(resolved)) {
messages.Say(
- "Assumed rank argument requires an explicit interface"_err_en_US);
+ "Assumed rank argument '%s' requires an explicit interface"_err_en_US,
+ expr->AsFortran());
}
+ const Symbol &symbol{GetAssociationRoot(resolved)};
if (symbol.attrs().test(Attr::ASYNCHRONOUS)) {
messages.Say(
- "ASYNCHRONOUS argument requires an explicit interface"_err_en_US);
+ "ASYNCHRONOUS argument '%s' requires an explicit interface"_err_en_US,
+ expr->AsFortran());
}
if (symbol.attrs().test(Attr::VOLATILE)) {
messages.Say(
- "VOLATILE argument requires an explicit interface"_err_en_US);
+ "VOLATILE argument '%s' requires an explicit interface"_err_en_US,
+ expr->AsFortran());
+ }
+ if (const auto *object{symbol.detailsIf<ObjectEntityDetails>()}) {
+ if (object->cudaDataAttr()) {
+ messages.Warn(/*inModuleFile=*/false, context.languageFeatures(),
+ common::UsageWarning::CUDAUsage,
+ "Actual argument '%s' with CUDA data attributes should be passed via an explicit interface"_warn_en_US,
+ expr->AsFortran());
+ }
}
} else if (auto argChars{characteristics::DummyArgument::FromActual(
"actual argument", *expr, context.foldingContext(),
@@ -2387,44 +2403,51 @@ bool CheckArguments(const characteristics::Procedure &proc,
evaluate::FoldingContext foldingContext{context.foldingContext()};
parser::ContextualMessages &messages{foldingContext.messages()};
bool allowArgumentConversions{true};
+ parser::Messages implicitBuffer;
if (!explicitInterface || treatingExternalAsImplicit) {
- parser::Messages buffer;
{
- auto restorer{messages.SetMessages(buffer)};
+ auto restorer{messages.SetMessages(implicitBuffer)};
for (auto &actual : actuals) {
if (actual) {
CheckImplicitInterfaceArg(*actual, messages, context);
}
}
}
- if (!buffer.empty()) {
+ if (implicitBuffer.AnyFatalError()) {
if (auto *msgs{messages.messages()}) {
- msgs->Annex(std::move(buffer));
+ msgs->Annex(std::move(implicitBuffer));
}
return false; // don't pile on
}
allowArgumentConversions = false;
}
if (explicitInterface) {
- auto buffer{CheckExplicitInterface(proc, actuals, context, &scope,
+ auto explicitBuffer{CheckExplicitInterface(proc, actuals, context, &scope,
intrinsic, allowArgumentConversions,
/*extentErrors=*/true, ignoreImplicitVsExplicit)};
- if (!buffer.empty()) {
+ if (!explicitBuffer.empty()) {
if (treatingExternalAsImplicit) {
- if (auto *msg{foldingContext.Warn(
+ // Combine all messages into one warning
+ if (auto *warning{messages.Warn(/*inModuleFile=*/false,
+ context.languageFeatures(),
common::UsageWarning::KnownBadImplicitInterface,
"If the procedure's interface were explicit, this reference would be in error"_warn_en_US)}) {
- buffer.AttachTo(*msg, parser::Severity::Because);
- } else {
- buffer.clear();
+ explicitBuffer.AttachTo(*warning, parser::Severity::Because);
}
+ } else if (auto *msgs{messages.messages()}) {
+ msgs->Annex(std::move(explicitBuffer));
}
- if (auto *msgs{messages.messages()}) {
- msgs->Annex(std::move(buffer));
- }
+ // These messages override any in implicitBuffer.
return false;
}
}
- return true;
+ if (!implicitBuffer.empty()) {
+ if (auto *msgs{messages.messages()}) {
+ msgs->Annex(std::move(implicitBuffer));
+ }
+ return false;
+ } else {
+ return true; // no messages
+ }
}
} // namespace Fortran::semantics
diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp
index 7b88100..7593424 100644
--- a/flang/lib/Semantics/check-declarations.cpp
+++ b/flang/lib/Semantics/check-declarations.cpp
@@ -512,39 +512,111 @@ void CheckHelper::Check(const Symbol &symbol) {
}
void CheckHelper::CheckCommonBlock(const Symbol &symbol) {
- auto restorer{messages_.SetLocation(symbol.name())};
CheckGlobalName(symbol);
- if (symbol.attrs().test(Attr::BIND_C)) {
+ const auto &common{symbol.get<CommonBlockDetails>()};
+ SourceName location{symbol.name()};
+ if (location.empty()) {
+ location = common.sourceLocation();
+ }
+ bool isBindCCommon{symbol.attrs().test(Attr::BIND_C)};
+ if (isBindCCommon) {
CheckBindC(symbol);
- for (auto ref : symbol.get<CommonBlockDetails>().objects()) {
- if (ref->has<ObjectEntityDetails>()) {
- if (auto msgs{WhyNotInteroperableObject(*ref,
- /*allowInteroperableType=*/false, /*forCommonBlock=*/true)};
- !msgs.empty()) {
- parser::Message &reason{msgs.messages().front()};
- parser::Message *msg{nullptr};
- if (reason.IsFatal()) {
- msg = messages_.Say(symbol.name(),
- "'%s' may not be a member of BIND(C) COMMON block /%s/"_err_en_US,
- ref->name(), symbol.name());
- } else {
- msg = messages_.Say(symbol.name(),
- "'%s' should not be a member of BIND(C) COMMON block /%s/"_warn_en_US,
- ref->name(), symbol.name());
- }
- if (msg) {
- msg->Attach(
- std::move(reason.set_severity(parser::Severity::Because)));
- }
+ }
+ for (auto ref : symbol.get<CommonBlockDetails>().objects()) {
+ auto restorer{
+ messages_.SetLocation(location.empty() ? ref->name() : location)};
+ if (isBindCCommon && ref->has<ObjectEntityDetails>()) {
+ if (auto msgs{WhyNotInteroperableObject(*ref,
+ /*allowInteroperableType=*/false, /*forCommonBlock=*/true)};
+ !msgs.empty()) {
+ parser::Message &reason{msgs.messages().front()};
+ parser::Message *msg{nullptr};
+ if (reason.IsFatal()) {
+ msg = messages_.Say(
+ "'%s' may not be a member of BIND(C) COMMON block /%s/"_err_en_US,
+ ref->name(), symbol.name());
+ } else {
+ msg = messages_.Say(
+ "'%s' should not be a member of BIND(C) COMMON block /%s/"_warn_en_US,
+ ref->name(), symbol.name());
}
+ if (msg) {
+ msg = &msg->Attach(
+ std::move(reason.set_severity(parser::Severity::Because)));
+ }
+ evaluate::AttachDeclaration(msg, *ref);
}
}
- }
- for (auto ref : symbol.get<CommonBlockDetails>().objects()) {
if (ref->test(Symbol::Flag::CrayPointee)) {
- messages_.Say(ref->name(),
- "Cray pointee '%s' may not be a member of a COMMON block"_err_en_US,
- ref->name());
+ evaluate::AttachDeclaration(
+ messages_.Say(
+ "Cray pointee '%s' may not be a member of COMMON block /%s/"_err_en_US,
+ ref->name(), symbol.name()),
+ *ref);
+ }
+ if (IsAllocatable(*ref)) {
+ evaluate::AttachDeclaration(
+ messages_.Say(
+ "ALLOCATABLE object '%s' may not appear in COMMON block /%s/"_err_en_US,
+ ref->name(), symbol.name()),
+ *ref);
+ }
+ if (ref->attrs().test(Attr::BIND_C)) {
+ evaluate::AttachDeclaration(
+ messages_.Say(
+ "BIND(C) object '%s' may not appear in COMMON block /%s/"_err_en_US,
+ ref->name(), symbol.name()),
+ *ref);
+ }
+ if (IsNamedConstant(*ref)) {
+ evaluate::AttachDeclaration(
+ messages_.Say(
+ "Named constant '%s' may not appear in COMMON block /%s/"_err_en_US,
+ ref->name(), symbol.name()),
+ *ref);
+ }
+ if (IsDummy(*ref)) {
+ evaluate::AttachDeclaration(
+ messages_.Say(
+ "Dummy argument '%s' may not appear in COMMON block /%s/"_err_en_US,
+ ref->name(), symbol.name()),
+ *ref);
+ }
+ if (ref->IsFuncResult()) {
+ evaluate::AttachDeclaration(
+ messages_.Say(
+ "Function result '%s' may not appear in COMMON block /%s/"_err_en_US,
+ ref->name(), symbol.name()),
+ *ref);
+ }
+ if (const auto *type{ref->GetType()}) {
+ if (type->category() == DeclTypeSpec::ClassStar) {
+ evaluate::AttachDeclaration(
+ messages_.Say(
+ "Unlimited polymorphic pointer '%s' may not appear in COMMON block /%s/"_err_en_US,
+ ref->name(), symbol.name()),
+ *ref);
+ } else if (const auto *derived{type->AsDerived()}) {
+ if (!IsSequenceOrBindCType(derived)) {
+ evaluate::AttachDeclaration(
+ evaluate::AttachDeclaration(
+ messages_.Say(
+ "Object '%s' whose derived type '%s' is neither SEQUENCE nor BIND(C) may not appear in COMMON block /%s/"_err_en_US,
+ ref->name(), derived->name(), symbol.name()),
+ derived->typeSymbol()),
+ *ref);
+ } else if (auto componentPath{
+ derived->ComponentWithDefaultInitialization()}) {
+ evaluate::AttachDeclaration(
+ evaluate::AttachDeclaration(
+ messages_.Say(
+ "COMMON block /%s/ may not have the member '%s' whose derived type '%s' has a component '%s' that is ALLOCATABLE or has default initialization"_err_en_US,
+ symbol.name(), ref->name(), derived->name(),
+ *componentPath),
+ derived->typeSymbol()),
+ *ref);
+ }
+ }
}
}
}
@@ -2976,14 +3048,6 @@ static std::optional<std::string> DefinesGlobalName(const Symbol &symbol) {
return std::nullopt;
}
-static bool IsSameSymbolFromHermeticModule(
- const Symbol &symbol, const Symbol &other) {
- return symbol.name() == other.name() && symbol.owner().IsModule() &&
- other.owner().IsModule() && symbol.owner() != other.owner() &&
- symbol.owner().GetName() &&
- symbol.owner().GetName() == other.owner().GetName();
-}
-
// 19.2 p2
void CheckHelper::CheckGlobalName(const Symbol &symbol) {
if (auto global{DefinesGlobalName(symbol)}) {
@@ -3001,7 +3065,7 @@ void CheckHelper::CheckGlobalName(const Symbol &symbol) {
(!IsExternalProcedureDefinition(symbol) ||
!IsExternalProcedureDefinition(other))) {
// both are procedures/BLOCK DATA, not both definitions
- } else if (IsSameSymbolFromHermeticModule(symbol, other)) {
+ } else if (AreSameModuleSymbol(symbol, other)) {
// Both symbols are the same thing.
} else if (symbol.has<ModuleDetails>()) {
Warn(common::LanguageFeature::BenignNameClash, symbol.name(),
diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp
index 8365001..fc26888 100644
--- a/flang/lib/Semantics/expression.cpp
+++ b/flang/lib/Semantics/expression.cpp
@@ -3628,7 +3628,7 @@ std::optional<characteristics::Procedure> ExpressionAnalyzer::CheckCall(
if (chars) {
std::string whyNot;
if (treatExternalAsImplicit &&
- !chars->CanBeCalledViaImplicitInterface(&whyNot)) {
+ !chars->CanBeCalledViaImplicitInterface(&whyNot, /*checkCUDA=*/false)) {
if (auto *msg{Say(callSite,
"References to the procedure '%s' require an explicit interface"_err_en_US,
DEREF(procSymbol).name())};
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index 02fcf02..18fc638 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -625,7 +625,7 @@ public:
for (const parser::OmpObject &obj : x.v) {
auto *name{std::get_if<parser::Name>(&obj.u)};
if (name && !name->symbol) {
- Resolve(*name, currScope().MakeCommonBlock(name->source));
+ Resolve(*name, currScope().MakeCommonBlock(name->source, name->source));
}
}
}
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index 5041a6a..b7c7603d 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -1106,8 +1106,9 @@ protected:
// or nullptr on error.
Symbol *DeclareStatementEntity(const parser::DoVariable &,
const std::optional<parser::IntegerTypeSpec> &);
- Symbol &MakeCommonBlockSymbol(const parser::Name &);
- Symbol &MakeCommonBlockSymbol(const std::optional<parser::Name> &);
+ Symbol &MakeCommonBlockSymbol(const parser::Name &, SourceName);
+ Symbol &MakeCommonBlockSymbol(
+ const std::optional<parser::Name> &, SourceName);
bool CheckUseError(const parser::Name &);
void CheckAccessibility(const SourceName &, bool, Symbol &);
void CheckCommonBlocks();
@@ -1244,8 +1245,6 @@ private:
bool OkToAddComponent(const parser::Name &, const Symbol *extends = nullptr);
ParamValue GetParamValue(
const parser::TypeParamValue &, common::TypeParamAttr attr);
- void CheckCommonBlockDerivedType(
- const SourceName &, const Symbol &, UnorderedSymbolSet &);
Attrs HandleSaveName(const SourceName &, Attrs);
void AddSaveName(std::set<SourceName> &, const SourceName &);
bool HandleUnrestrictedSpecificIntrinsicFunction(const parser::Name &);
@@ -3963,8 +3962,26 @@ void ModuleVisitor::DoAddUse(SourceName location, SourceName localName,
}
}
+ auto AreSameModuleProcOrBothInterfaces{[](const Symbol &p1,
+ const Symbol &p2) {
+ if (IsProcedure(p1) && !IsPointer(p1) && IsProcedure(p2) &&
+ !IsPointer(p2)) {
+ auto classification{ClassifyProcedure(p1)};
+ if (classification == ClassifyProcedure(p2)) {
+ if (classification == ProcedureDefinitionClass::External) {
+ const auto *subp1{p1.detailsIf<SubprogramDetails>()};
+ const auto *subp2{p2.detailsIf<SubprogramDetails>()};
+ return subp1 && subp1->isInterface() && subp2 && subp2->isInterface();
+ } else if (classification == ProcedureDefinitionClass::Module) {
+ return AreSameModuleSymbol(p1, p2);
+ }
+ }
+ }
+ return false;
+ }};
+
auto AreSameProcedure{[&](const Symbol &p1, const Symbol &p2) {
- if (&p1 == &p2) {
+ if (&p1.GetUltimate() == &p2.GetUltimate()) {
return true;
} else if (p1.name() != p2.name()) {
return false;
@@ -3972,31 +3989,16 @@ void ModuleVisitor::DoAddUse(SourceName location, SourceName localName,
p2.attrs().test(Attr::INTRINSIC)) {
return p1.attrs().test(Attr::INTRINSIC) &&
p2.attrs().test(Attr::INTRINSIC);
- } else if (!IsProcedure(p1) || !IsProcedure(p2)) {
- return false;
- } else if (IsPointer(p1) || IsPointer(p2)) {
- return false;
- } else if (const auto *subp{p1.detailsIf<SubprogramDetails>()};
- subp && !subp->isInterface()) {
- return false; // defined in module, not an external
- } else if (const auto *subp{p2.detailsIf<SubprogramDetails>()};
- subp && !subp->isInterface()) {
- return false; // defined in module, not an external
+ } else if (AreSameModuleProcOrBothInterfaces(p1, p2)) {
+ // Both are external interfaces, perhaps to the same procedure,
+ // or both are module procedures from modules with the same name.
+ auto p1Chars{evaluate::characteristics::Procedure::Characterize(
+ p1, GetFoldingContext())};
+ auto p2Chars{evaluate::characteristics::Procedure::Characterize(
+ p2, GetFoldingContext())};
+ return p1Chars && p2Chars && *p1Chars == *p2Chars;
} else {
- // Both are external interfaces, perhaps to the same procedure
- auto class1{ClassifyProcedure(p1)};
- auto class2{ClassifyProcedure(p2)};
- if (class1 == ProcedureDefinitionClass::External &&
- class2 == ProcedureDefinitionClass::External) {
- auto chars1{evaluate::characteristics::Procedure::Characterize(
- p1, GetFoldingContext())};
- auto chars2{evaluate::characteristics::Procedure::Characterize(
- p2, GetFoldingContext())};
- // same procedure interface defined identically in two modules?
- return chars1 && chars2 && *chars1 == *chars2;
- } else {
- return false;
- }
+ return false;
}
}};
@@ -4097,13 +4099,32 @@ void ModuleVisitor::DoAddUse(SourceName location, SourceName localName,
localSymbol = &newSymbol;
}
if (useGeneric) {
- // Combine two use-associated generics
+ // Combine two use-associated generics.
localSymbol->attrs() =
useSymbol.attrs() & ~Attrs{Attr::PUBLIC, Attr::PRIVATE};
localSymbol->flags() = useSymbol.flags();
AddGenericUse(*localGeneric, localName, useUltimate);
- localGeneric->clear_derivedType();
- localGeneric->CopyFrom(*useGeneric);
+ // Don't duplicate specific procedures.
+ std::size_t originalLocalSpecifics{localGeneric->specificProcs().size()};
+ std::size_t useSpecifics{useGeneric->specificProcs().size()};
+ CHECK(originalLocalSpecifics == localGeneric->bindingNames().size());
+ CHECK(useSpecifics == useGeneric->bindingNames().size());
+ std::size_t j{0};
+ for (const Symbol &useSpecific : useGeneric->specificProcs()) {
+ SourceName useBindingName{useGeneric->bindingNames()[j++]};
+ bool isDuplicate{false};
+ std::size_t k{0};
+ for (const Symbol &localSpecific : localGeneric->specificProcs()) {
+ if (localGeneric->bindingNames()[k++] == useBindingName &&
+ AreSameProcedure(localSpecific, useSpecific)) {
+ isDuplicate = true;
+ break;
+ }
+ }
+ if (!isDuplicate) {
+ localGeneric->AddSpecificProc(useSpecific, useBindingName);
+ }
+ }
}
localGeneric->clear_derivedType();
if (combinedDerivedType) {
@@ -5564,7 +5585,7 @@ bool DeclarationVisitor::Pre(const parser::BindEntity &x) {
if (kind == parser::BindEntity::Kind::Object) {
symbol = &HandleAttributeStmt(Attr::BIND_C, name);
} else {
- symbol = &MakeCommonBlockSymbol(name);
+ symbol = &MakeCommonBlockSymbol(name, name.source);
SetExplicitAttr(*symbol, Attr::BIND_C);
}
// 8.6.4(1)
@@ -7147,7 +7168,7 @@ bool DeclarationVisitor::Pre(const parser::SaveStmt &x) {
auto kind{std::get<parser::SavedEntity::Kind>(y.t)};
const auto &name{std::get<parser::Name>(y.t)};
if (kind == parser::SavedEntity::Kind::Common) {
- MakeCommonBlockSymbol(name);
+ MakeCommonBlockSymbol(name, name.source);
AddSaveName(specPartState_.saveInfo.commons, name.source);
} else {
HandleAttributeStmt(Attr::SAVE, name);
@@ -7227,59 +7248,22 @@ void DeclarationVisitor::CheckCommonBlocks() {
if (symbol.get<CommonBlockDetails>().objects().empty() &&
symbol.attrs().test(Attr::BIND_C)) {
Say(symbol.name(),
- "'%s' appears as a COMMON block in a BIND statement but not in"
- " a COMMON statement"_err_en_US);
- }
- }
- // check objects in common blocks
- for (const auto &name : specPartState_.commonBlockObjects) {
- const auto *symbol{currScope().FindSymbol(name)};
- if (!symbol) {
- continue;
- }
- const auto &attrs{symbol->attrs()};
- if (attrs.test(Attr::ALLOCATABLE)) {
- Say(name,
- "ALLOCATABLE object '%s' may not appear in a COMMON block"_err_en_US);
- } else if (attrs.test(Attr::BIND_C)) {
- Say(name,
- "Variable '%s' with BIND attribute may not appear in a COMMON block"_err_en_US);
- } else if (IsNamedConstant(*symbol)) {
- Say(name,
- "A named constant '%s' may not appear in a COMMON block"_err_en_US);
- } else if (IsDummy(*symbol)) {
- Say(name,
- "Dummy argument '%s' may not appear in a COMMON block"_err_en_US);
- } else if (symbol->IsFuncResult()) {
- Say(name,
- "Function result '%s' may not appear in a COMMON block"_err_en_US);
- } else if (const DeclTypeSpec * type{symbol->GetType()}) {
- if (type->category() == DeclTypeSpec::ClassStar) {
- Say(name,
- "Unlimited polymorphic pointer '%s' may not appear in a COMMON block"_err_en_US);
- } else if (const auto *derived{type->AsDerived()}) {
- if (!IsSequenceOrBindCType(derived)) {
- Say(name,
- "Derived type '%s' in COMMON block must have the BIND or"
- " SEQUENCE attribute"_err_en_US);
- }
- UnorderedSymbolSet typeSet;
- CheckCommonBlockDerivedType(name, derived->typeSymbol(), typeSet);
- }
+ "'%s' appears as a COMMON block in a BIND statement but not in a COMMON statement"_err_en_US);
}
}
specPartState_.commonBlockObjects = {};
}
-Symbol &DeclarationVisitor::MakeCommonBlockSymbol(const parser::Name &name) {
- return Resolve(name, currScope().MakeCommonBlock(name.source));
+Symbol &DeclarationVisitor::MakeCommonBlockSymbol(
+ const parser::Name &name, SourceName location) {
+ return Resolve(name, currScope().MakeCommonBlock(name.source, location));
}
Symbol &DeclarationVisitor::MakeCommonBlockSymbol(
- const std::optional<parser::Name> &name) {
+ const std::optional<parser::Name> &name, SourceName location) {
if (name) {
- return MakeCommonBlockSymbol(*name);
+ return MakeCommonBlockSymbol(*name, location);
} else {
- return MakeCommonBlockSymbol(parser::Name{});
+ return MakeCommonBlockSymbol(parser::Name{}, location);
}
}
@@ -7287,43 +7271,6 @@ bool DeclarationVisitor::NameIsKnownOrIntrinsic(const parser::Name &name) {
return FindSymbol(name) || HandleUnrestrictedSpecificIntrinsicFunction(name);
}
-// Check if this derived type can be in a COMMON block.
-void DeclarationVisitor::CheckCommonBlockDerivedType(const SourceName &name,
- const Symbol &typeSymbol, UnorderedSymbolSet &typeSet) {
- if (auto iter{typeSet.find(SymbolRef{typeSymbol})}; iter != typeSet.end()) {
- return;
- }
- typeSet.emplace(typeSymbol);
- if (const auto *scope{typeSymbol.scope()}) {
- for (const auto &pair : *scope) {
- const Symbol &component{*pair.second};
- if (component.attrs().test(Attr::ALLOCATABLE)) {
- Say2(name,
- "Derived type variable '%s' may not appear in a COMMON block"
- " due to ALLOCATABLE component"_err_en_US,
- component.name(), "Component with ALLOCATABLE attribute"_en_US);
- return;
- }
- const auto *details{component.detailsIf<ObjectEntityDetails>()};
- if (component.test(Symbol::Flag::InDataStmt) ||
- (details && details->init())) {
- Say2(name,
- "Derived type variable '%s' may not appear in a COMMON block due to component with default initialization"_err_en_US,
- component.name(), "Component with default initialization"_en_US);
- return;
- }
- if (details) {
- if (const auto *type{details->type()}) {
- if (const auto *derived{type->AsDerived()}) {
- const Symbol &derivedTypeSymbol{derived->typeSymbol()};
- CheckCommonBlockDerivedType(name, derivedTypeSymbol, typeSet);
- }
- }
- }
- }
- }
-}
-
bool DeclarationVisitor::HandleUnrestrictedSpecificIntrinsicFunction(
const parser::Name &name) {
if (auto interface{context().intrinsics().IsSpecificIntrinsicFunction(
@@ -9655,7 +9602,7 @@ void ResolveNamesVisitor::CreateCommonBlockSymbols(
const parser::CommonStmt &commonStmt) {
for (const parser::CommonStmt::Block &block : commonStmt.blocks) {
const auto &[name, objects] = block.t;
- Symbol &commonBlock{MakeCommonBlockSymbol(name)};
+ Symbol &commonBlock{MakeCommonBlockSymbol(name, commonStmt.source)};
for (const auto &object : objects) {
Symbol &obj{DeclareObjectEntity(std::get<parser::Name>(object.t))};
if (auto *details{obj.detailsIf<ObjectEntityDetails>()}) {
diff --git a/flang/lib/Semantics/scope.cpp b/flang/lib/Semantics/scope.cpp
index 9c5682b..4af371f 100644
--- a/flang/lib/Semantics/scope.cpp
+++ b/flang/lib/Semantics/scope.cpp
@@ -143,12 +143,13 @@ void Scope::add_crayPointer(const SourceName &name, Symbol &pointer) {
crayPointers_.emplace(name, pointer);
}
-Symbol &Scope::MakeCommonBlock(const SourceName &name) {
+Symbol &Scope::MakeCommonBlock(SourceName name, SourceName location) {
const auto it{commonBlocks_.find(name)};
if (it != commonBlocks_.end()) {
return *it->second;
} else {
- Symbol &symbol{MakeSymbol(name, Attrs{}, CommonBlockDetails{})};
+ Symbol &symbol{MakeSymbol(
+ name, Attrs{}, CommonBlockDetails{name.empty() ? location : name})};
commonBlocks_.emplace(name, symbol);
return symbol;
}
diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp
index 6db11aa..bdb5377 100644
--- a/flang/lib/Semantics/semantics.cpp
+++ b/flang/lib/Semantics/semantics.cpp
@@ -313,15 +313,13 @@ private:
/// Return the symbol of an initialized member if a COMMON block
/// is initalized. Otherwise, return nullptr.
static Symbol *CommonBlockIsInitialized(const Symbol &common) {
- const auto &commonDetails =
- common.get<Fortran::semantics::CommonBlockDetails>();
-
+ const auto &commonDetails{
+ common.get<Fortran::semantics::CommonBlockDetails>()};
for (const auto &member : commonDetails.objects()) {
if (IsInitialized(*member)) {
return &*member;
}
}
-
// Common block may be initialized via initialized variables that are in an
// equivalence with the common block members.
for (const Fortran::semantics::EquivalenceSet &set :
diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp
index 28829d3..8eddd03 100644
--- a/flang/lib/Semantics/tools.cpp
+++ b/flang/lib/Semantics/tools.cpp
@@ -1870,4 +1870,9 @@ bool HadUseError(
}
}
+bool AreSameModuleSymbol(const Symbol &symbol, const Symbol &other) {
+ return symbol.name() == other.name() && symbol.owner().IsModule() &&
+ other.owner().IsModule() && symbol.owner().GetName() &&
+ symbol.owner().GetName() == other.owner().GetName();
+}
} // namespace Fortran::semantics
diff --git a/flang/lib/Semantics/type.cpp b/flang/lib/Semantics/type.cpp
index 964a37e..69e6ffa 100644
--- a/flang/lib/Semantics/type.cpp
+++ b/flang/lib/Semantics/type.cpp
@@ -206,14 +206,25 @@ bool DerivedTypeSpec::IsForwardReferenced() const {
return typeSymbol_.get<DerivedTypeDetails>().isForwardReferenced();
}
-bool DerivedTypeSpec::HasDefaultInitialization(
+std::optional<std::string> DerivedTypeSpec::ComponentWithDefaultInitialization(
bool ignoreAllocatable, bool ignorePointer) const {
DirectComponentIterator components{*this};
- return bool{std::find_if(
- components.begin(), components.end(), [&](const Symbol &component) {
- return IsInitialized(component, /*ignoreDataStatements=*/true,
- ignoreAllocatable, ignorePointer);
- })};
+ if (auto it{std::find_if(components.begin(), components.end(),
+ [ignoreAllocatable, ignorePointer](const Symbol &component) {
+ return (!ignoreAllocatable && IsAllocatable(component)) ||
+ (!ignorePointer && IsPointer(component)) ||
+ HasDeclarationInitializer(component);
+ })}) {
+ return it.BuildResultDesignatorName();
+ } else {
+ return std::nullopt;
+ }
+}
+
+bool DerivedTypeSpec::HasDefaultInitialization(
+ bool ignoreAllocatable, bool ignorePointer) const {
+ return ComponentWithDefaultInitialization(ignoreAllocatable, ignorePointer)
+ .has_value();
}
bool DerivedTypeSpec::HasDestruction() const {
diff --git a/flang/test/Semantics/boz-literal-constants.f90 b/flang/test/Semantics/boz-literal-constants.f90
index 4d957d1..67e9ce7 100644
--- a/flang/test/Semantics/boz-literal-constants.f90
+++ b/flang/test/Semantics/boz-literal-constants.f90
@@ -120,7 +120,7 @@ subroutine bozchecks
!ERROR: Actual argument 'z'55'' associated with dummy argument 'c=' is not a variable or typed expression
call explicit(z'deadbeef', o'666', b'01010101')
- !ERROR: BOZ argument requires an explicit interface
+ !ERROR: BOZ argument z'12345' requires an explicit interface
call implictSub(Z'12345')
!ERROR: Output item must not be a BOZ literal constant
diff --git a/flang/test/Semantics/call13.f90 b/flang/test/Semantics/call13.f90
index 3f7fb2e..90e1918 100644
--- a/flang/test/Semantics/call13.f90
+++ b/flang/test/Semantics/call13.f90
@@ -20,7 +20,7 @@ subroutine s(assumedRank, coarray, class, classStar, typeStar)
real :: array(implicit01()) ! 15.4.2.2(2)
!ERROR: Keyword 'keyword=' may not appear in a reference to a procedure with an implicit interface
call implicit10(1, 2, keyword=3) ! 15.4.2.2(1)
- !ERROR: Assumed rank argument requires an explicit interface
+ !ERROR: Assumed rank argument 'assumedrank' requires an explicit interface
call implicit11(assumedRank) ! 15.4.2.2(3)(c)
call implicit12(coarray) ! ok
call implicit12a(coarray[1]) ! ok
diff --git a/flang/test/Semantics/cuf24.cuf b/flang/test/Semantics/cuf24.cuf
new file mode 100644
index 0000000..67c9d5d
--- /dev/null
+++ b/flang/test/Semantics/cuf24.cuf
@@ -0,0 +1,40 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1 -fopenacc
+
+subroutine implicitDeviceInSameFile(v)
+ real, device :: v(10)
+end
+
+subroutine implicitNonDeviceInSameFile(v)
+ real :: v(10)
+end
+
+program p
+ real, device :: dev(10)
+ real :: host(10)
+ interface
+ subroutine explicitDevice(v)
+ real, device :: v(10)
+ end
+ subroutine explicitNonDevice(v)
+ real :: v(10)
+ end
+ end interface
+ !WARNING: Actual argument 'dev' with CUDA data attributes should be passed via an explicit interface [-Wcuda-usage]
+ call implicit1(dev)
+ call implicit2(host)
+ !WARNING: Actual argument 'dev' with CUDA data attributes should be passed via an explicit interface [-Wcuda-usage]
+ call implicitDeviceInSameFile(dev)
+ !WARNING: If the procedure's interface were explicit, this reference would be in error [-Wknown-bad-implicit-interface]
+ !BECAUSE: dummy argument 'v=' has ATTRIBUTES(DEVICE) but its associated actual argument has no CUDA data attribute
+ call implicitDeviceInSameFile(host)
+ !WARNING: If the procedure's interface were explicit, this reference would be in error [-Wknown-bad-implicit-interface]
+ !BECAUSE: dummy argument 'v=' has no CUDA data attribute but its associated actual argument has ATTRIBUTES(DEVICE)
+ call implicitNonDeviceInSameFile(dev)
+ call implicitNonDeviceInSameFile(host)
+ call explicitDevice(dev)
+ !ERROR: dummy argument 'v=' has ATTRIBUTES(DEVICE) but its associated actual argument has no CUDA data attribute
+ call explicitDevice(host)
+ !ERROR: dummy argument 'v=' has no CUDA data attribute but its associated actual argument has ATTRIBUTES(DEVICE)
+ call explicitNonDevice(dev)
+ call explicitNonDevice(host)
+end
diff --git a/flang/test/Semantics/declarations01.f90 b/flang/test/Semantics/declarations01.f90
index 77cb6b4..3d8754e 100644
--- a/flang/test/Semantics/declarations01.f90
+++ b/flang/test/Semantics/declarations01.f90
@@ -7,7 +7,7 @@ function f1() result(x)
integer, parameter :: x2 = 1
integer :: x3
- !ERROR: A named constant 'x2' may not appear in a COMMON block
+ !ERROR: Named constant 'x2' may not appear in COMMON block /blk/
common /blk/ x2, x3
end
diff --git a/flang/test/Semantics/declarations08.f90 b/flang/test/Semantics/declarations08.f90
index 2c4027d..de7d5d7 100644
--- a/flang/test/Semantics/declarations08.f90
+++ b/flang/test/Semantics/declarations08.f90
@@ -2,7 +2,7 @@
pointer(p,x)
!ERROR: Cray pointee 'y' may not be a member of an EQUIVALENCE group
pointer(p,y)
-!ERROR: Cray pointee 'x' may not be a member of a COMMON block
+!ERROR: Cray pointee 'x' may not be a member of COMMON block //
common x
equivalence(y,z)
!ERROR: Cray pointee 'v' may not be initialized
diff --git a/flang/test/Semantics/modfile80.F90 b/flang/test/Semantics/modfile80.F90
new file mode 100644
index 0000000..425847e
--- /dev/null
+++ b/flang/test/Semantics/modfile80.F90
@@ -0,0 +1,25 @@
+!RUN: %flang_fc1 -DPART1 %s
+!RUN: %flang_fc1 -DPART2 -fhermetic-module-files %s
+!RUN: %flang_fc1 -DPART3 | FileCheck --allow-empty %s
+!CHECK-NOT: error:
+
+#if defined PART1
+module modfile80a
+ interface generic
+ module procedure specific
+ end interface
+ contains
+ subroutine specific
+ end
+end
+#elif defined PART2
+module modfile80b
+ use modfile80a
+end
+#else
+program test
+ use modfile80a
+ use modfile80b
+ call generic
+end
+#endif
diff --git a/flang/test/Semantics/null01.f90 b/flang/test/Semantics/null01.f90
index 64c9881..ccf6179 100644
--- a/flang/test/Semantics/null01.f90
+++ b/flang/test/Semantics/null01.f90
@@ -116,9 +116,9 @@ subroutine test
call optionalAllocatable(null(mold=ip0))
call optionalAllocatable(null(mold=ia0)) ! fine
call optionalAllocatable(null()) ! fine
- !ERROR: Null pointer argument requires an explicit interface
+ !ERROR: Null pointer argument 'NULL()' requires an explicit interface
call implicit(null())
- !ERROR: Null pointer argument requires an explicit interface
+ !ERROR: Null pointer argument 'null(mold=ip0)' requires an explicit interface
call implicit(null(mold=ip0))
!ERROR: A NULL() pointer is not allowed for 'x=' intrinsic argument
print *, sin(null(rp0))
diff --git a/flang/test/Semantics/resolve42.f90 b/flang/test/Semantics/resolve42.f90
index 5a433d0..13caff0 100644
--- a/flang/test/Semantics/resolve42.f90
+++ b/flang/test/Semantics/resolve42.f90
@@ -28,17 +28,17 @@ subroutine s5
end
function f6(x) result(r)
- !ERROR: ALLOCATABLE object 'y' may not appear in a COMMON block
- !ERROR: Dummy argument 'x' may not appear in a COMMON block
+ !ERROR: ALLOCATABLE object 'y' may not appear in COMMON block //
+ !ERROR: Dummy argument 'x' may not appear in COMMON block //
+ !ERROR: Function result 'r' may not appear in COMMON block //
common y,x,z
allocatable y
- !ERROR: Function result 'r' may not appear in a COMMON block
common r
end
module m7
- !ERROR: Variable 'w' with BIND attribute may not appear in a COMMON block
- !ERROR: Variable 'z' with BIND attribute may not appear in a COMMON block
+ !ERROR: BIND(C) object 'w' may not appear in COMMON block //
+ !ERROR: BIND(C) object 'z' may not appear in COMMON block //
common w,z
integer, bind(c) :: z
integer, bind(c,name="w") :: w
@@ -48,8 +48,8 @@ module m8
type t
end type
class(*), pointer :: x
- !ERROR: Unlimited polymorphic pointer 'x' may not appear in a COMMON block
- !ERROR: Unlimited polymorphic pointer 'y' may not appear in a COMMON block
+ !ERROR: Unlimited polymorphic pointer 'x' may not appear in COMMON block //
+ !ERROR: Unlimited polymorphic pointer 'y' may not appear in COMMON block //
common x, y
class(*), pointer :: y
end
@@ -67,7 +67,7 @@ module m10
type t
end type
type(t) :: x
- !ERROR: Derived type 'x' in COMMON block must have the BIND or SEQUENCE attribute
+ !ERROR: Object 'x' whose derived type 't' is neither SEQUENCE nor BIND(C) may not appear in COMMON block //
common x
end
@@ -82,7 +82,7 @@ module m11
integer:: c
end type
type(t2) :: x2
- !ERROR: Derived type variable 'x2' may not appear in a COMMON block due to ALLOCATABLE component
+ !ERROR: COMMON block /c2/ may not have the member 'x2' whose derived type 't2' has a component '%b%a' that is ALLOCATABLE or has default initialization
common /c2/ x2
end
@@ -97,7 +97,7 @@ module m12
integer:: c
end type
type(t2) :: x2
- !ERROR: Derived type variable 'x2' may not appear in a COMMON block due to component with default initialization
+ !ERROR: COMMON block /c3/ may not have the member 'x2' whose derived type 't2' has a component '%b%a' that is ALLOCATABLE or has default initialization
common /c3/ x2
end
@@ -112,3 +112,21 @@ subroutine s14
!ERROR: 'c' appears as a COMMON block in a BIND statement but not in a COMMON statement
bind(c) :: /c/
end
+
+module m15
+ interface
+ subroutine sub
+ end subroutine
+ end interface
+ type t1
+ sequence
+ procedure(sub), pointer, nopass :: pp => sub
+ end type
+ type t2
+ sequence
+ type(t1) :: a
+ end type
+ type(t2) :: x2
+ !ERROR: COMMON block /c4/ may not have the member 'x2' whose derived type 't2' has a component '%a%pp' that is ALLOCATABLE or has default initialization
+ common /c4/ x2
+end
diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index 51b1363..a2ca577 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -223,6 +223,7 @@ struct Configuration {
bool warnThinArchiveMissingMembers;
bool disableVerify;
bool separateCstringLiteralSections;
+ bool tailMergeStrings;
bool callGraphProfileSort = false;
llvm::StringRef printSymbolOrder;
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 7ce987e..94f441b 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1986,6 +1986,8 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
config->separateCstringLiteralSections =
args.hasFlag(OPT_separate_cstring_literal_sections,
OPT_no_separate_cstring_literal_sections, false);
+ config->tailMergeStrings =
+ args.hasFlag(OPT_tail_merge_strings, OPT_no_tail_merge_strings, false);
auto IncompatWithCGSort = [&](StringRef firstArgStr) {
// Throw an error only if --call-graph-profile-sort is explicitly specified
diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index 4eeb8fb..be1a1cc 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -1091,6 +1091,10 @@ defm separate_cstring_literal_sections
"Emit all cstring literals into the __cstring section. As a special "
"case, the __objc_methname section will still be emitted. (default)">,
Group<grp_rare>;
+defm tail_merge_strings
+ : BB<"tail-merge-strings", "Enable string tail merging",
+ "Disable string tail merging to improve link-time performance">,
+ Group<grp_rare>;
def grp_deprecated : OptionGroup<"deprecated">, HelpText<"DEPRECATED">;
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 903ba78..187cccb 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1746,6 +1746,8 @@ void CStringSection::finalizeContents() {
void DeduplicatedCStringSection::finalizeContents() {
// Find the largest alignment required for each string.
DenseMap<CachedHashStringRef, Align> strToAlignment;
+ // Used for tail merging only
+ std::vector<CachedHashStringRef> deduplicatedStrs;
for (const CStringInputSection *isec : inputs) {
for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
if (!piece.live)
@@ -1754,17 +1756,66 @@ void DeduplicatedCStringSection::finalizeContents() {
assert(isec->align != 0);
auto align = getStringPieceAlignment(isec, piece);
auto [it, wasInserted] = strToAlignment.try_emplace(s, align);
+ if (config->tailMergeStrings && wasInserted)
+ deduplicatedStrs.push_back(s);
if (!wasInserted && it->second < align)
it->second = align;
}
}
+ // Like lexigraphical sort, except we read strings in reverse and take the
+ // longest string first
+ // TODO: We could improve performance by implementing our own sort that avoids
+ // comparing characters we know to be the same. See
+ // StringTableBuilder::multikeySort() for details
+ llvm::sort(deduplicatedStrs, [](const auto &left, const auto &right) {
+ for (const auto &[leftChar, rightChar] :
+ llvm::zip(llvm::reverse(left.val()), llvm::reverse(right.val()))) {
+ if (leftChar == rightChar)
+ continue;
+ return leftChar < rightChar;
+ }
+ return left.size() > right.size();
+ });
+ std::optional<CachedHashStringRef> mergeCandidate;
+ DenseMap<CachedHashStringRef, std::pair<CachedHashStringRef, uint64_t>>
+ tailMergeMap;
+ for (auto &s : deduplicatedStrs) {
+ if (!mergeCandidate || !mergeCandidate->val().ends_with(s.val())) {
+ mergeCandidate = s;
+ continue;
+ }
+ uint64_t tailMergeOffset = mergeCandidate->size() - s.size();
+ // TODO: If the tail offset is incompatible with this string's alignment, we
+ // might be able to find another superstring with a compatible tail offset.
+ // The difficulty is how to do this efficiently
+ const auto &align = strToAlignment.at(s);
+ if (!isAligned(align, tailMergeOffset))
+ continue;
+ auto &mergeCandidateAlign = strToAlignment[*mergeCandidate];
+ if (align > mergeCandidateAlign)
+ mergeCandidateAlign = align;
+ tailMergeMap.try_emplace(s, *mergeCandidate, tailMergeOffset);
+ }
+
// Sort the strings for performance and compression size win, and then
// assign an offset for each string and save it to the corresponding
// StringPieces for easy access.
for (auto &[isec, i] : priorityBuilder.buildCStringPriorities(inputs)) {
auto &piece = isec->pieces[i];
auto s = isec->getCachedHashStringRef(i);
+ // Any string can be tail merged with itself with an offset of zero
+ uint64_t tailMergeOffset = 0;
+ auto mergeIt =
+ config->tailMergeStrings ? tailMergeMap.find(s) : tailMergeMap.end();
+ if (mergeIt != tailMergeMap.end()) {
+ auto &[superString, offset] = mergeIt->second;
+ // s can be tail merged with superString. Do not layout s. Instead layout
+ // superString if we haven't already
+ assert(superString.val().ends_with(s.val()));
+ s = superString;
+ tailMergeOffset = offset;
+ }
auto [it, wasInserted] = stringOffsetMap.try_emplace(s, /*placeholder*/ 0);
if (wasInserted) {
// Avoid computing the offset until we are sure we will need to
@@ -1772,9 +1823,12 @@ void DeduplicatedCStringSection::finalizeContents() {
it->second = offset;
size = offset + s.size() + 1; // account for null terminator
}
- // If the string was already in stringOffsetMap, it is a duplicate and we
- // only need to assign the offset.
- piece.outSecOff = it->second;
+ piece.outSecOff = it->second + tailMergeOffset;
+ if (mergeIt != tailMergeMap.end()) {
+ auto &tailMergedString = mergeIt->first;
+ stringOffsetMap[tailMergedString] = piece.outSecOff;
+ assert(isAligned(strToAlignment.at(tailMergedString), piece.outSecOff));
+ }
}
for (CStringInputSection *isec : inputs)
isec->isFinal = true;
diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index 566dde6..29db1cd 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -46,6 +46,8 @@ MachO Improvements
* ``--separate-cstring-literal-sections`` emits cstring literal sections into sections defined by their section name.
(`#158720 <https://github.com/llvm/llvm-project/pull/158720>`_)
+* ``--tail-merge-strings`` enables tail merging of cstring literals.
+ (`#161262 <https://github.com/llvm/llvm-project/pull/161262>`_)
WebAssembly Improvements
------------------------
diff --git a/lld/test/MachO/cstring-tailmerge-objc.s b/lld/test/MachO/cstring-tailmerge-objc.s
new file mode 100644
index 0000000..46b2bbf
--- /dev/null
+++ b/lld/test/MachO/cstring-tailmerge-objc.s
@@ -0,0 +1,144 @@
+; REQUIRES: aarch64
+; RUN: rm -rf %t && split-file %s %t
+
+; Test that ObjC method names are tail merged and
+; ObjCSelRefsHelper::makeSelRef() still works correctly
+
+; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o
+; RUN: %lld -dylib -arch arm64 --tail-merge-strings %t/a.o -o %t/a
+; RUN: llvm-objdump --macho --section="__TEXT,__objc_methname" %t/a | FileCheck %s --implicit-check-not=error
+
+; RUN: %lld -dylib -arch arm64 --no-tail-merge-strings %t/a.o -o %t/nomerge
+; RUN: llvm-objdump --macho --section="__TEXT,__objc_methname" %t/nomerge | FileCheck %s --check-prefixes=CHECK,NOMERGE --implicit-check-not=error
+
+; CHECK: withBar:error:
+; NOMERGE: error:
+
+;--- a.mm
+__attribute__((objc_root_class))
+@interface Foo
+- (void)withBar:(int)bar error:(int)error;
+- (void)error:(int)error;
+@end
+
+@implementation Foo
+- (void)withBar:(int)bar error:(int)error {}
+- (void)error:(int)error {}
+@end
+
+void *_objc_empty_cache;
+void *_objc_empty_vtable;
+;--- gen
+clang -Oz -target arm64-apple-darwin a.mm -S -o -
+;--- a.s
+ .build_version macos, 11, 0
+ .section __TEXT,__text,regular,pure_instructions
+ .p2align 2 ; -- Begin function -[Foo withBar:error:]
+"-[Foo withBar:error:]": ; @"\01-[Foo withBar:error:]"
+ .cfi_startproc
+; %bb.0:
+ ret
+ .cfi_endproc
+ ; -- End function
+ .p2align 2 ; -- Begin function -[Foo error:]
+"-[Foo error:]": ; @"\01-[Foo error:]"
+ .cfi_startproc
+; %bb.0:
+ ret
+ .cfi_endproc
+ ; -- End function
+ .globl __objc_empty_vtable ; @_objc_empty_vtable
+.zerofill __DATA,__common,__objc_empty_vtable,8,3
+ .section __DATA,__objc_data
+ .globl _OBJC_CLASS_$_Foo ; @"OBJC_CLASS_$_Foo"
+ .p2align 3, 0x0
+_OBJC_CLASS_$_Foo:
+ .quad _OBJC_METACLASS_$_Foo
+ .quad 0
+ .quad __objc_empty_cache
+ .quad __objc_empty_vtable
+ .quad __OBJC_CLASS_RO_$_Foo
+
+ .globl _OBJC_METACLASS_$_Foo ; @"OBJC_METACLASS_$_Foo"
+ .p2align 3, 0x0
+_OBJC_METACLASS_$_Foo:
+ .quad _OBJC_METACLASS_$_Foo
+ .quad _OBJC_CLASS_$_Foo
+ .quad __objc_empty_cache
+ .quad __objc_empty_vtable
+ .quad __OBJC_METACLASS_RO_$_Foo
+
+ .section __TEXT,__objc_classname,cstring_literals
+l_OBJC_CLASS_NAME_: ; @OBJC_CLASS_NAME_
+ .asciz "Foo"
+
+ .section __DATA,__objc_const
+ .p2align 3, 0x0 ; @"_OBJC_METACLASS_RO_$_Foo"
+__OBJC_METACLASS_RO_$_Foo:
+ .long 3 ; 0x3
+ .long 40 ; 0x28
+ .long 40 ; 0x28
+ .space 4
+ .quad 0
+ .quad l_OBJC_CLASS_NAME_
+ .quad 0
+ .quad 0
+ .quad 0
+ .quad 0
+ .quad 0
+
+ .section __TEXT,__objc_methname,cstring_literals
+l_OBJC_METH_VAR_NAME_: ; @OBJC_METH_VAR_NAME_
+ .asciz "withBar:error:"
+
+ .section __TEXT,__objc_methtype,cstring_literals
+l_OBJC_METH_VAR_TYPE_: ; @OBJC_METH_VAR_TYPE_
+ .asciz "v24@0:8i16i20"
+
+ .section __TEXT,__objc_methname,cstring_literals
+l_OBJC_METH_VAR_NAME_.1: ; @OBJC_METH_VAR_NAME_.1
+ .asciz "error:"
+
+ .section __TEXT,__objc_methtype,cstring_literals
+l_OBJC_METH_VAR_TYPE_.2: ; @OBJC_METH_VAR_TYPE_.2
+ .asciz "v20@0:8i16"
+
+ .section __DATA,__objc_const
+ .p2align 3, 0x0 ; @"_OBJC_$_INSTANCE_METHODS_Foo"
+__OBJC_$_INSTANCE_METHODS_Foo:
+ .long 24 ; 0x18
+ .long 2 ; 0x2
+ .quad l_OBJC_METH_VAR_NAME_
+ .quad l_OBJC_METH_VAR_TYPE_
+ .quad "-[Foo withBar:error:]"
+ .quad l_OBJC_METH_VAR_NAME_.1
+ .quad l_OBJC_METH_VAR_TYPE_.2
+ .quad "-[Foo error:]"
+
+ .p2align 3, 0x0 ; @"_OBJC_CLASS_RO_$_Foo"
+__OBJC_CLASS_RO_$_Foo:
+ .long 2 ; 0x2
+ .long 0 ; 0x0
+ .long 0 ; 0x0
+ .space 4
+ .quad 0
+ .quad l_OBJC_CLASS_NAME_
+ .quad __OBJC_$_INSTANCE_METHODS_Foo
+ .quad 0
+ .quad 0
+ .quad 0
+ .quad 0
+
+ .globl __objc_empty_cache ; @_objc_empty_cache
+.zerofill __DATA,__common,__objc_empty_cache,8,3
+ .section __DATA,__objc_classlist,regular,no_dead_strip
+ .p2align 3, 0x0 ; @"OBJC_LABEL_CLASS_$"
+l_OBJC_LABEL_CLASS_$:
+ .quad _OBJC_CLASS_$_Foo
+
+ .section __DATA,__objc_imageinfo,regular,no_dead_strip
+L_OBJC_IMAGE_INFO:
+ .long 0
+ .long 64
+
+.subsections_via_symbols
diff --git a/lld/test/MachO/cstring-tailmerge.s b/lld/test/MachO/cstring-tailmerge.s
new file mode 100644
index 0000000..740f971
--- /dev/null
+++ b/lld/test/MachO/cstring-tailmerge.s
@@ -0,0 +1,85 @@
+; REQUIRES: aarch64
+; RUN: rm -rf %t && split-file %s %t
+
+; RUN: sed "s/<ALIGN>/0/g" %t/align.s.template > %t/align-1.s
+; RUN: sed "s/<ALIGN>/1/g" %t/align.s.template > %t/align-2.s
+; RUN: sed "s/<ALIGN>/2/g" %t/align.s.template > %t/align-4.s
+
+; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/first.s -o %t/first.o
+; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/align-1.s -o %t/align-1.o
+; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/align-2.s -o %t/align-2.o
+; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/align-4.s -o %t/align-4.o
+
+; RUN: %lld -dylib -arch arm64 --tail-merge-strings %t/first.o %t/align-1.o -o %t/align-1
+; RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-1 | FileCheck %s --check-prefixes=CHECK,ALIGN1
+
+; RUN: %lld -dylib -arch arm64 --tail-merge-strings %t/first.o %t/align-2.o -o %t/align-2
+; RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-2 | FileCheck %s --check-prefixes=CHECK,ALIGN2
+
+; RUN: %lld -dylib -arch arm64 --tail-merge-strings %t/first.o %t/align-4.o -o %t/align-4
+; RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-4 | FileCheck %s --check-prefixes=CHECK,ALIGN4
+
+; CHECK: Contents of (__TEXT,__cstring) section
+; CHECK: [[#%.16x,START:]] get awkward offset{{$}}
+
+; ALIGN1: [[#%.16x,START+19]] myotherlongstr{{$}}
+; ALIGN1: [[#%.16x,START+19+15]] otherstr{{$}}
+
+; ALIGN2: [[#%.16x,START+20]] myotherlongstr{{$}}
+; ALIGN2: [[#%.16x,START+20+16]] longstr{{$}}
+; ALIGN2: [[#%.16x,START+20+16+8]] otherstr{{$}}
+; ALIGN2: [[#%.16x,START+20+16+8+10]] str{{$}}
+
+; ALIGN4: [[#%.16x,START+20]] myotherlongstr{{$}}
+; ALIGN4: [[#%.16x,START+20+16]] otherlongstr{{$}}
+; ALIGN4: [[#%.16x,START+20+16+16]] longstr{{$}}
+; ALIGN4: [[#%.16x,START+20+16+16+8]] otherstr{{$}}
+; ALIGN4: [[#%.16x,START+20+16+16+8+12]] str{{$}}
+
+; CHECK: SYMBOL TABLE:
+
+; ALIGN1: [[#%.16x,START+19]] l O __TEXT,__cstring _myotherlongstr
+; ALIGN1: [[#%.16x,START+21]] l O __TEXT,__cstring _otherlongstr
+; ALIGN1: [[#%.16x,START+26]] l O __TEXT,__cstring _longstr
+; ALIGN1: [[#%.16x,START+34]] l O __TEXT,__cstring _otherstr
+; ALIGN1: [[#%.16x,START+39]] l O __TEXT,__cstring _str
+
+; ALIGN2: [[#%.16x,START+20]] l O __TEXT,__cstring _myotherlongstr
+; ALIGN2: [[#%.16x,START+20+2]] l O __TEXT,__cstring _otherlongstr
+; ALIGN2: [[#%.16x,START+20+16]] l O __TEXT,__cstring _longstr
+; ALIGN2: [[#%.16x,START+20+16+8]] l O __TEXT,__cstring _otherstr
+; ALIGN2: [[#%.16x,START+20+16+8+10]] l O __TEXT,__cstring _str
+
+; ALIGN4: [[#%.16x,START+20]] l O __TEXT,__cstring _myotherlongstr
+; ALIGN4: [[#%.16x,START+20+16]] l O __TEXT,__cstring _otherlongstr
+; ALIGN4: [[#%.16x,START+20+16+16]] l O __TEXT,__cstring _longstr
+; ALIGN4: [[#%.16x,START+20+16+16+8]] l O __TEXT,__cstring _otherstr
+; ALIGN4: [[#%.16x,START+20+16+16+8+12]] l O __TEXT,__cstring _str
+
+;--- first.s
+.cstring
+.p2align 2
+.asciz "get awkward offset" ; length = 19
+
+;--- align.s.template
+.cstring
+
+.p2align <ALIGN>
+ _myotherlongstr:
+.asciz "myotherlongstr" ; length = 15
+
+.p2align <ALIGN>
+ _otherlongstr:
+.asciz "otherlongstr" ; length = 13, tail offset = 2
+
+.p2align <ALIGN>
+ _longstr:
+.asciz "longstr" ; length = 8, tail offset = 7
+
+.p2align <ALIGN>
+ _otherstr:
+.asciz "otherstr" ; length = 9
+
+.p2align <ALIGN>
+ _str:
+.asciz "str" ; length = 4, tail offset = 5
diff --git a/lld/test/MachO/order-file-cstring-tailmerge.s b/lld/test/MachO/order-file-cstring-tailmerge.s
new file mode 100644
index 0000000..20a4d16
--- /dev/null
+++ b/lld/test/MachO/order-file-cstring-tailmerge.s
@@ -0,0 +1,56 @@
+; REQUIRES: aarch64
+; RUN: rm -rf %t && split-file %s %t
+
+; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o
+; RUN: %lld -dylib -arch arm64 --no-tail-merge-strings -order_file %t/orderfile.txt %t/a.o -o - | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s
+; RUN: %lld -dylib -arch arm64 --tail-merge-strings -order_file %t/orderfile.txt %t/a.o -o - | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=MERGED
+
+; CHECK: _str2
+; CHECK: _str1
+; CHECK: _superstr2
+; CHECK: _superstr3
+; CHECK: _superstr1
+; CHECK: _str3
+
+; str1 has a higher priority than superstr1, so str1 must be ordered before
+; str3, even though superstr1 is before superstr3 in the orderfile.
+
+; MERGED: _superstr2
+; MERGED: _str2
+; MERGED: _superstr1
+; MERGED: _str1
+; MERGED: _superstr3
+; MERGED: _str3
+
+;--- a.s
+.cstring
+ _superstr1:
+.asciz "superstr1"
+ _str1:
+.asciz "str1"
+ _superstr2:
+.asciz "superstr2"
+ _str2:
+.asciz "str2"
+ _superstr3:
+.asciz "superstr3"
+ _str3:
+.asciz "str3"
+
+; TODO: We could use update_test_body.py to generate the hashes for the
+; orderfile. Unfortunately, it seems that LLVM has a different hash
+; implementation than the xxh64sum tool. See
+; DeduplicatedCStringSection::getStringOffset() for hash details.
+;
+; while IFS="" read -r line; do
+; echo -n $line | xxh64sum | awk '{printf "CSTR;%010d", and(strtonum("0x"$1), 0x7FFFFFFF)}'
+; echo " # $line"
+; done < orderfile.txt.template
+
+;--- orderfile.txt
+CSTR;1236462241 # str2
+CSTR;1526669509 # str1
+CSTR;1563550684 # superstr2
+CSTR;1044337806 # superstr3
+CSTR;262417687 # superstr1
+CSTR;717161398 # str3
diff --git a/lldb/include/lldb/Host/JSONTransport.h b/lldb/include/lldb/Host/JSONTransport.h
index c73021d..1453316 100644
--- a/lldb/include/lldb/Host/JSONTransport.h
+++ b/lldb/include/lldb/Host/JSONTransport.h
@@ -18,6 +18,7 @@
#include "lldb/Utility/IOObject.h"
#include "lldb/Utility/Status.h"
#include "lldb/lldb-forward.h"
+#include "llvm/ADT/FunctionExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"
@@ -25,13 +26,23 @@
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/raw_ostream.h"
+#include <functional>
+#include <mutex>
+#include <optional>
#include <string>
#include <system_error>
+#include <type_traits>
+#include <utility>
#include <variant>
#include <vector>
+#if __cplusplus >= 202002L
+#include <concepts>
+#endif
-namespace lldb_private {
+namespace lldb_private::transport {
+/// An error to indicate that the transport reached EOF but there were still
+/// unhandled contents in the read buffer.
class TransportUnhandledContentsError
: public llvm::ErrorInfo<TransportUnhandledContentsError> {
public:
@@ -50,17 +61,75 @@ private:
std::string m_unhandled_contents;
};
+/// An error to indicate that the parameters of a Req, Resp or Evt could not be
+/// deserialized.
+class InvalidParams : public llvm::ErrorInfo<InvalidParams> {
+public:
+ static char ID;
+
+ explicit InvalidParams(std::string method, std::string context)
+ : m_method(std::move(method)), m_context(std::move(context)) {}
+
+ void log(llvm::raw_ostream &OS) const override;
+ std::error_code convertToErrorCode() const override;
+
+private:
+ /// The JSONRPC remote method call.
+ std::string m_method;
+
+ /// Additional context from the parsing failure, e.g. "missing value at
+ /// (root)[1].str".
+ std::string m_context;
+};
+
+/// An error to indicate that no handler was registered for a given method.
+class MethodNotFound : public llvm::ErrorInfo<MethodNotFound> {
+public:
+ static char ID;
+
+ static constexpr int kErrorCode = -32601;
+
+ explicit MethodNotFound(std::string method) : m_method(std::move(method)) {}
+
+ void log(llvm::raw_ostream &OS) const override;
+ std::error_code convertToErrorCode() const override;
+
+private:
+ std::string m_method;
+};
+
+#if __cplusplus >= 202002L
+/// A ProtocolDescriptor details the types used in a JSONTransport for handling
+/// transport communication.
+template <typename T>
+concept ProtocolDescriptor = requires {
+ typename T::Id;
+ typename T::Req;
+ typename T::Resp;
+ typename T::Evt;
+};
+#endif
+
/// A transport is responsible for maintaining the connection to a client
/// application, and reading/writing structured messages to it.
///
-/// Transports have limited thread safety requirements:
+/// JSONTransport have limited thread safety requirements:
/// - Messages will not be sent concurrently.
/// - Messages MAY be sent while Run() is reading, or its callback is active.
-template <typename Req, typename Resp, typename Evt> class Transport {
+///
+#if __cplusplus >= 202002L
+template <ProtocolDescriptor Proto>
+#else
+template <typename Proto>
+#endif
+class JSONTransport {
public:
+ using Req = typename Proto::Req;
+ using Resp = typename Proto::Resp;
+ using Evt = typename Proto::Evt;
using Message = std::variant<Req, Resp, Evt>;
- virtual ~Transport() = default;
+ virtual ~JSONTransport() = default;
/// Sends an event, a message that does not require a response.
virtual llvm::Error Send(const Evt &) = 0;
@@ -69,7 +138,8 @@ public:
/// Sends a response to a specific request.
virtual llvm::Error Send(const Resp &) = 0;
- /// Implemented to handle incoming messages. (See Run() below).
+ /// Implemented to handle incoming messages. (See `RegisterMessageHandler()`
+ /// below).
class MessageHandler {
public:
virtual ~MessageHandler() = default;
@@ -90,8 +160,6 @@ public:
virtual void OnClosed() = 0;
};
- using MessageHandlerSP = std::shared_ptr<MessageHandler>;
-
/// RegisterMessageHandler registers the Transport with the given MainLoop and
/// handles any incoming messages using the given MessageHandler.
///
@@ -108,18 +176,23 @@ protected:
};
/// An IOTransport sends and receives messages using an IOObject.
-template <typename Req, typename Resp, typename Evt>
-class IOTransport : public Transport<Req, Resp, Evt> {
+template <typename Proto> class IOTransport : public JSONTransport<Proto> {
public:
- using Transport<Req, Resp, Evt>::Transport;
- using MessageHandler = typename Transport<Req, Resp, Evt>::MessageHandler;
+ using Message = typename JSONTransport<Proto>::Message;
+ using MessageHandler = typename JSONTransport<Proto>::MessageHandler;
IOTransport(lldb::IOObjectSP in, lldb::IOObjectSP out)
: m_in(in), m_out(out) {}
- llvm::Error Send(const Evt &evt) override { return Write(evt); }
- llvm::Error Send(const Req &req) override { return Write(req); }
- llvm::Error Send(const Resp &resp) override { return Write(resp); }
+ llvm::Error Send(const typename Proto::Evt &evt) override {
+ return Write(evt);
+ }
+ llvm::Error Send(const typename Proto::Req &req) override {
+ return Write(req);
+ }
+ llvm::Error Send(const typename Proto::Resp &resp) override {
+ return Write(resp);
+ }
llvm::Expected<MainLoop::ReadHandleUP>
RegisterMessageHandler(MainLoop &loop, MessageHandler &handler) override {
@@ -139,7 +212,7 @@ public:
/// detail.
static constexpr size_t kReadBufferSize = 1024;
- // FIXME: Write should be protected.
+protected:
llvm::Error Write(const llvm::json::Value &message) {
this->Logv("<-- {0}", message);
std::string output = Encode(message);
@@ -147,7 +220,6 @@ public:
return m_out->Write(output.data(), bytes_written).takeError();
}
-protected:
virtual llvm::Expected<std::vector<std::string>> Parse() = 0;
virtual std::string Encode(const llvm::json::Value &message) = 0;
@@ -174,9 +246,8 @@ private:
}
for (const std::string &raw_message : *raw_messages) {
- llvm::Expected<typename Transport<Req, Resp, Evt>::Message> message =
- llvm::json::parse<typename Transport<Req, Resp, Evt>::Message>(
- raw_message);
+ llvm::Expected<Message> message =
+ llvm::json::parse<Message>(raw_message);
if (!message) {
handler.OnError(message.takeError());
return;
@@ -201,10 +272,14 @@ private:
};
/// A transport class for JSON with a HTTP header.
-template <typename Req, typename Resp, typename Evt>
-class HTTPDelimitedJSONTransport : public IOTransport<Req, Resp, Evt> {
+#if __cplusplus >= 202002L
+template <ProtocolDescriptor Proto>
+#else
+template <typename Proto>
+#endif
+class HTTPDelimitedJSONTransport : public IOTransport<Proto> {
public:
- using IOTransport<Req, Resp, Evt>::IOTransport;
+ using IOTransport<Proto>::IOTransport;
protected:
/// Encodes messages based on
@@ -230,8 +305,8 @@ protected:
for (const llvm::StringRef &header :
llvm::split(headers, kHeaderSeparator)) {
auto [key, value] = header.split(kHeaderFieldSeparator);
- // 'Content-Length' is the only meaningful key at the moment. Others are
- // ignored.
+ // 'Content-Length' is the only meaningful key at the moment. Others
+ // are ignored.
if (!key.equals_insensitive(kHeaderContentLength))
continue;
@@ -268,10 +343,14 @@ protected:
};
/// A transport class for JSON RPC.
-template <typename Req, typename Resp, typename Evt>
-class JSONRPCTransport : public IOTransport<Req, Resp, Evt> {
+#if __cplusplus >= 202002L
+template <ProtocolDescriptor Proto>
+#else
+template <typename Proto>
+#endif
+class JSONRPCTransport : public IOTransport<Proto> {
public:
- using IOTransport<Req, Resp, Evt>::IOTransport;
+ using IOTransport<Proto>::IOTransport;
protected:
std::string Encode(const llvm::json::Value &message) override {
@@ -297,6 +376,497 @@ protected:
static constexpr llvm::StringLiteral kMessageSeparator = "\n";
};
-} // namespace lldb_private
+/// A handler for the response to an outgoing request.
+template <typename T>
+using Reply =
+ std::conditional_t<std::is_void_v<T>,
+ llvm::unique_function<void(llvm::Error)>,
+ llvm::unique_function<void(llvm::Expected<T>)>>;
+
+namespace detail {
+template <typename R, typename P> struct request_t final {
+ using type = llvm::unique_function<void(const P &, Reply<R>)>;
+};
+template <typename R> struct request_t<R, void> final {
+ using type = llvm::unique_function<void(Reply<R>)>;
+};
+template <typename P> struct event_t final {
+ using type = llvm::unique_function<void(const P &)>;
+};
+template <> struct event_t<void> final {
+ using type = llvm::unique_function<void()>;
+};
+} // namespace detail
+
+template <typename R, typename P>
+using OutgoingRequest = typename detail::request_t<R, P>::type;
+
+/// A function to send an outgoing event.
+template <typename P> using OutgoingEvent = typename detail::event_t<P>::type;
+
+#if __cplusplus >= 202002L
+/// This represents a protocol description that includes additional helpers
+/// for constructing requests, responses and events to work with `Binder`.
+template <typename T>
+concept BindingBuilder =
+ ProtocolDescriptor<T> &&
+ requires(T::Id id, T::Req req, T::Resp resp, T::Evt evt,
+ llvm::StringRef method, std::optional<llvm::json::Value> params,
+ std::optional<llvm::json::Value> result, llvm::Error err) {
+ /// For initializing the unique sequence identifier;
+ { T::InitialId() } -> std::same_as<typename T::Id>;
+ /// Incrementing the sequence identifier.
+ { id++ } -> std::same_as<typename T::Id>;
+
+ /// Constructing protocol types
+ /// @{
+ /// Construct a new request.
+ { T::Make(id, method, params) } -> std::same_as<typename T::Req>;
+ /// Construct a new error response.
+ { T::Make(req, std::move(err)) } -> std::same_as<typename T::Resp>;
+ /// Construct a new success response.
+ { T::Make(req, result) } -> std::same_as<typename T::Resp>;
+ /// Construct a new event.
+ { T::Make(method, params) } -> std::same_as<typename T::Evt>;
+ /// @}
+
+ /// Keys for associated types.
+ /// @{
+ /// Looking up in flight responses.
+ { T::KeyFor(resp) } -> std::same_as<typename T::Id>;
+ /// Extract method from request.
+ { T::KeyFor(req) } -> std::same_as<std::string>;
+ /// Extract method from event.
+ { T::KeyFor(evt) } -> std::same_as<std::string>;
+ /// @}
+
+ /// Extracting information from associated types.
+ /// @{
+ /// Extract parameters from a request.
+ { T::Extract(req) } -> std::same_as<std::optional<llvm::json::Value>>;
+ /// Extract result from a response.
+ { T::Extract(resp) } -> std::same_as<llvm::Expected<llvm::json::Value>>;
+ /// Extract parameters from an event.
+ { T::Extract(evt) } -> std::same_as<std::optional<llvm::json::Value>>;
+ /// @}
+ };
+#endif
+
+/// Binder collects a table of functions that handle calls.
+///
+/// The wrapper takes care of parsing/serializing responses.
+///
+/// This allows a JSONTransport to handle incoming and outgoing requests and
+/// events.
+///
+/// A bind of an incoming request to a lambda.
+/// \code{cpp}
+/// Binder binder{transport};
+/// binder.bind<int, vector<int>>("adder", [](const vector<int> &params) {
+/// int sum = 0;
+/// for (int v : params)
+/// sum += v;
+/// return sum;
+/// });
+/// \endcode
+///
+/// A bind of an outgoing request.
+/// \code{cpp}
+/// OutgoingRequest<int, vector<int>> call_add =
+/// binder.bind<int, vector<int>>("add");
+/// call_add({1,2,3}, [](Expected<int> result) {
+/// cout << *result << "\n";
+/// });
+/// \endcode
+#if __cplusplus >= 202002L
+template <BindingBuilder Proto>
+#else
+template <typename Proto>
+#endif
+class Binder : public JSONTransport<Proto>::MessageHandler {
+ using Req = typename Proto::Req;
+ using Resp = typename Proto::Resp;
+ using Evt = typename Proto::Evt;
+ using Id = typename Proto::Id;
+ using Transport = JSONTransport<Proto>;
+ using MessageHandler = typename Transport::MessageHandler;
+
+public:
+ explicit Binder(Transport &transport) : m_transport(transport), m_seq(0) {}
+
+ Binder(const Binder &) = delete;
+ Binder &operator=(const Binder &) = delete;
+
+ /// Bind a handler on transport disconnect.
+ template <typename Fn, typename... Args>
+ void OnDisconnect(Fn &&fn, Args &&...args);
+
+ /// Bind a handler on error when communicating with the transport.
+ template <typename Fn, typename... Args>
+ void OnError(Fn &&fn, Args &&...args);
+
+ /// Bind a handler for an incoming request.
+ /// e.g. `bind("peek", &ThisModule::peek, this);`.
+ /// Handler should be e.g. `Expected<PeekResult> peek(const PeekParams&);`
+ /// PeekParams must be JSON parsable and PeekResult must be serializable.
+ template <typename Result, typename Params, typename Fn, typename... Args>
+ void Bind(llvm::StringLiteral method, Fn &&fn, Args &&...args);
+
+ /// Bind a handler for an incoming event.
+ /// e.g. `bind("peek", &ThisModule::peek, this);`
+ /// Handler should be e.g. `void peek(const PeekParams&);`
+ /// PeekParams must be JSON parsable.
+ template <typename Params, typename Fn, typename... Args>
+ void Bind(llvm::StringLiteral method, Fn &&fn, Args &&...args);
+
+ /// Bind a function object to be used for outgoing requests.
+ /// e.g. `OutgoingRequest<Params, Result> Edit = bind("edit");`
+ /// Params must be JSON-serializable, Result must be parsable.
+ template <typename Result, typename Params>
+ OutgoingRequest<Result, Params> Bind(llvm::StringLiteral method);
+
+ /// Bind a function object to be used for outgoing events.
+ /// e.g. `OutgoingEvent<LogParams> Log = bind("log");`
+ /// LogParams must be JSON-serializable.
+ template <typename Params>
+ OutgoingEvent<Params> Bind(llvm::StringLiteral method);
+
+ void Received(const Evt &evt) override {
+ std::scoped_lock<std::recursive_mutex> guard(m_mutex);
+ auto it = m_event_handlers.find(Proto::KeyFor(evt));
+ if (it == m_event_handlers.end()) {
+ OnError(llvm::createStringError(
+ llvm::formatv("no handled for event {0}", toJSON(evt))));
+ return;
+ }
+ it->second(evt);
+ }
+
+ void Received(const Req &req) override {
+ ReplyOnce reply(req, &m_transport, this);
+
+ std::scoped_lock<std::recursive_mutex> guard(m_mutex);
+ auto it = m_request_handlers.find(Proto::KeyFor(req));
+ if (it == m_request_handlers.end()) {
+ reply(Proto::Make(req, llvm::createStringError("method not found")));
+ return;
+ }
+
+ it->second(req, std::move(reply));
+ }
+
+ void Received(const Resp &resp) override {
+ std::scoped_lock<std::recursive_mutex> guard(m_mutex);
+
+ Id id = Proto::KeyFor(resp);
+ auto it = m_pending_responses.find(id);
+ if (it == m_pending_responses.end()) {
+ OnError(llvm::createStringError(
+ llvm::formatv("no pending request for {0}", toJSON(resp))));
+ return;
+ }
+
+ it->second(resp);
+ m_pending_responses.erase(it);
+ }
+
+ void OnError(llvm::Error err) override {
+ std::scoped_lock<std::recursive_mutex> guard(m_mutex);
+ if (m_error_handler)
+ m_error_handler(std::move(err));
+ }
+
+ void OnClosed() override {
+ std::scoped_lock<std::recursive_mutex> guard(m_mutex);
+ if (m_disconnect_handler)
+ m_disconnect_handler();
+ }
+
+private:
+ template <typename T>
+ llvm::Expected<T> static Parse(const llvm::json::Value &raw,
+ llvm::StringRef method);
+
+ template <typename T> using Callback = llvm::unique_function<T>;
+
+ std::recursive_mutex m_mutex;
+ Transport &m_transport;
+ Id m_seq;
+ std::map<Id, Callback<void(const Resp &)>> m_pending_responses;
+ llvm::StringMap<Callback<void(const Req &, Callback<void(const Resp &)>)>>
+ m_request_handlers;
+ llvm::StringMap<Callback<void(const Evt &)>> m_event_handlers;
+ Callback<void()> m_disconnect_handler;
+ Callback<void(llvm::Error)> m_error_handler;
+
+ /// Function object to reply to a call.
+ /// Each instance must be called exactly once, otherwise:
+ /// - the bug is logged, and (in debug mode) an assert will fire
+ /// - if there was no reply, an error reply is sent
+ /// - if there were multiple replies, only the first is sent
+ class ReplyOnce {
+ std::atomic<bool> replied = {false};
+ const Req req;
+ Transport *transport; // Null when moved-from.
+ MessageHandler *handler; // Null when moved-from.
+
+ public:
+ ReplyOnce(const Req req, Transport *transport, MessageHandler *handler)
+ : req(req), transport(transport), handler(handler) {
+ assert(handler);
+ }
+ ReplyOnce(ReplyOnce &&other)
+ : replied(other.replied.load()), req(other.req),
+ transport(other.transport), handler(other.handler) {
+ other.transport = nullptr;
+ other.handler = nullptr;
+ }
+ ReplyOnce &operator=(ReplyOnce &&) = delete;
+ ReplyOnce(const ReplyOnce &) = delete;
+ ReplyOnce &operator=(const ReplyOnce &) = delete;
+
+ ~ReplyOnce() {
+ if (transport && handler && !replied) {
+ assert(false && "must reply to all calls!");
+ (*this)(Proto::Make(req, llvm::createStringError("failed to reply")));
+ }
+ }
+
+ void operator()(const Resp &resp) {
+ assert(transport && handler && "moved-from!");
+ if (replied.exchange(true)) {
+ assert(false && "must reply to each call only once!");
+ return;
+ }
+
+ if (llvm::Error error = transport->Send(resp))
+ handler->OnError(std::move(error));
+ }
+ };
+};
+
+#if __cplusplus >= 202002L
+template <BindingBuilder Proto>
+#else
+template <typename Proto>
+#endif
+template <typename Fn, typename... Args>
+void Binder<Proto>::OnDisconnect(Fn &&fn, Args &&...args) {
+ m_disconnect_handler = [fn, args...]() mutable {
+ std::invoke(std::forward<Fn>(fn), std::forward<Args>(args)...);
+ };
+}
+
+#if __cplusplus >= 202002L
+template <BindingBuilder Proto>
+#else
+template <typename Proto>
+#endif
+template <typename Fn, typename... Args>
+void Binder<Proto>::OnError(Fn &&fn, Args &&...args) {
+ m_error_handler = [fn, args...](llvm::Error error) mutable {
+ std::invoke(std::forward<Fn>(fn), std::forward<Args>(args)...,
+ std::move(error));
+ };
+}
+
+#if __cplusplus >= 202002L
+template <BindingBuilder Proto>
+#else
+template <typename Proto>
+#endif
+template <typename Result, typename Params, typename Fn, typename... Args>
+void Binder<Proto>::Bind(llvm::StringLiteral method, Fn &&fn, Args &&...args) {
+ assert(m_request_handlers.find(method) == m_request_handlers.end() &&
+ "request already bound");
+ if constexpr (std::is_void_v<Result> && std::is_void_v<Params>) {
+ m_request_handlers[method] =
+ [fn, args...](const Req &req,
+ llvm::unique_function<void(const Resp &)> reply) mutable {
+ llvm::Error result =
+ std::invoke(std::forward<Fn>(fn), std::forward<Args>(args)...);
+ reply(Proto::Make(req, std::move(result)));
+ };
+ } else if constexpr (std::is_void_v<Params>) {
+ m_request_handlers[method] =
+ [fn, args...](const Req &req,
+ llvm::unique_function<void(const Resp &)> reply) mutable {
+ llvm::Expected<Result> result =
+ std::invoke(std::forward<Fn>(fn), std::forward<Args>(args)...);
+ if (!result)
+ return reply(Proto::Make(req, result.takeError()));
+ reply(Proto::Make(req, toJSON(*result)));
+ };
+ } else if constexpr (std::is_void_v<Result>) {
+ m_request_handlers[method] =
+ [method, fn,
+ args...](const Req &req,
+ llvm::unique_function<void(const Resp &)> reply) mutable {
+ llvm::Expected<Params> params =
+ Parse<Params>(Proto::Extract(req), method);
+ if (!params)
+ return reply(Proto::Make(req, params.takeError()));
+
+ llvm::Error result = std::invoke(
+ std::forward<Fn>(fn), std::forward<Args>(args)..., *params);
+ reply(Proto::Make(req, std::move(result)));
+ };
+ } else {
+ m_request_handlers[method] =
+ [method, fn,
+ args...](const Req &req,
+ llvm::unique_function<void(const Resp &)> reply) mutable {
+ llvm::Expected<Params> params =
+ Parse<Params>(Proto::Extract(req), method);
+ if (!params)
+ return reply(Proto::Make(req, params.takeError()));
+
+ llvm::Expected<Result> result = std::invoke(
+ std::forward<Fn>(fn), std::forward<Args>(args)..., *params);
+ if (!result)
+ return reply(Proto::Make(req, result.takeError()));
+
+ reply(Proto::Make(req, toJSON(*result)));
+ };
+ }
+}
+
+#if __cplusplus >= 202002L
+template <BindingBuilder Proto>
+#else
+template <typename Proto>
+#endif
+template <typename Params, typename Fn, typename... Args>
+void Binder<Proto>::Bind(llvm::StringLiteral method, Fn &&fn, Args &&...args) {
+ assert(m_event_handlers.find(method) == m_event_handlers.end() &&
+ "event already bound");
+ if constexpr (std::is_void_v<Params>) {
+ m_event_handlers[method] = [fn, args...](const Evt &) mutable {
+ std::invoke(std::forward<Fn>(fn), std::forward<Args>(args)...);
+ };
+ } else {
+ m_event_handlers[method] = [this, method, fn,
+ args...](const Evt &evt) mutable {
+ llvm::Expected<Params> params =
+ Parse<Params>(Proto::Extract(evt), method);
+ if (!params)
+ return OnError(params.takeError());
+ std::invoke(std::forward<Fn>(fn), std::forward<Args>(args)..., *params);
+ };
+ }
+}
+
+#if __cplusplus >= 202002L
+template <BindingBuilder Proto>
+#else
+template <typename Proto>
+#endif
+template <typename Result, typename Params>
+OutgoingRequest<Result, Params>
+Binder<Proto>::Bind(llvm::StringLiteral method) {
+ if constexpr (std::is_void_v<Result> && std::is_void_v<Params>) {
+ return [this, method](Reply<Result> fn) {
+ std::scoped_lock<std::recursive_mutex> guard(m_mutex);
+ Id id = ++m_seq;
+ Req req = Proto::Make(id, method, std::nullopt);
+ m_pending_responses[id] = [fn = std::move(fn)](const Resp &resp) mutable {
+ llvm::Expected<llvm::json::Value> result = Proto::Extract(resp);
+ if (!result)
+ return fn(result.takeError());
+ fn(llvm::Error::success());
+ };
+ if (llvm::Error error = m_transport.Send(req))
+ OnError(std::move(error));
+ };
+ } else if constexpr (std::is_void_v<Params>) {
+ return [this, method](Reply<Result> fn) {
+ std::scoped_lock<std::recursive_mutex> guard(m_mutex);
+ Id id = ++m_seq;
+ Req req = Proto::Make(id, method, std::nullopt);
+ m_pending_responses[id] = [fn = std::move(fn),
+ method](const Resp &resp) mutable {
+ llvm::Expected<llvm::json::Value> result = Proto::Extract(resp);
+ if (!result)
+ return fn(result.takeError());
+ fn(Parse<Result>(*result, method));
+ };
+ if (llvm::Error error = m_transport.Send(req))
+ OnError(std::move(error));
+ };
+ } else if constexpr (std::is_void_v<Result>) {
+ return [this, method](const Params &params, Reply<Result> fn) {
+ std::scoped_lock<std::recursive_mutex> guard(m_mutex);
+ Id id = ++m_seq;
+ Req req = Proto::Make(id, method, llvm::json::Value(params));
+ m_pending_responses[id] = [fn = std::move(fn)](const Resp &resp) mutable {
+ llvm::Expected<llvm::json::Value> result = Proto::Extract(resp);
+ if (!result)
+ return fn(result.takeError());
+ fn(llvm::Error::success());
+ };
+ if (llvm::Error error = m_transport.Send(req))
+ OnError(std::move(error));
+ };
+ } else {
+ return [this, method](const Params &params, Reply<Result> fn) {
+ std::scoped_lock<std::recursive_mutex> guard(m_mutex);
+ Id id = ++m_seq;
+ Req req = Proto::Make(id, method, llvm::json::Value(params));
+ m_pending_responses[id] = [fn = std::move(fn),
+ method](const Resp &resp) mutable {
+ llvm::Expected<llvm::json::Value> result = Proto::Extract(resp);
+ if (llvm::Error err = result.takeError())
+ return fn(std::move(err));
+ fn(Parse<Result>(*result, method));
+ };
+ if (llvm::Error error = m_transport.Send(req))
+ OnError(std::move(error));
+ };
+ }
+}
+
+#if __cplusplus >= 202002L
+template <BindingBuilder Proto>
+#else
+template <typename Proto>
+#endif
+template <typename Params>
+OutgoingEvent<Params> Binder<Proto>::Bind(llvm::StringLiteral method) {
+ if constexpr (std::is_void_v<Params>) {
+ return [this, method]() {
+ if (llvm::Error error =
+ m_transport.Send(Proto::Make(method, std::nullopt)))
+ OnError(std::move(error));
+ };
+ } else {
+ return [this, method](const Params &params) {
+ if (llvm::Error error =
+ m_transport.Send(Proto::Make(method, toJSON(params))))
+ OnError(std::move(error));
+ };
+ }
+}
+
+#if __cplusplus >= 202002L
+template <BindingBuilder Proto>
+#else
+template <typename Proto>
+#endif
+template <typename T>
+llvm::Expected<T> Binder<Proto>::Parse(const llvm::json::Value &raw,
+ llvm::StringRef method) {
+ T result;
+ llvm::json::Path::Root root;
+ if (!fromJSON(raw, result, root)) {
+ // Dump the relevant parts of the broken message.
+ std::string context;
+ llvm::raw_string_ostream OS(context);
+ root.printErrorContext(raw, OS);
+ return llvm::make_error<InvalidParams>(method.str(), context);
+ }
+ return std::move(result);
+}
+
+} // namespace lldb_private::transport
#endif
diff --git a/lldb/include/lldb/Protocol/MCP/MCPError.h b/lldb/include/lldb/Protocol/MCP/MCPError.h
index 55dd40f..609a173 100644
--- a/lldb/include/lldb/Protocol/MCP/MCPError.h
+++ b/lldb/include/lldb/Protocol/MCP/MCPError.h
@@ -9,7 +9,6 @@
#ifndef LLDB_PROTOCOL_MCP_MCPERROR_H
#define LLDB_PROTOCOL_MCP_MCPERROR_H
-#include "lldb/Protocol/MCP/Protocol.h"
#include "llvm/Support/Error.h"
#include <string>
@@ -26,14 +25,12 @@ public:
const std::string &getMessage() const { return m_message; }
- lldb_protocol::mcp::Error toProtocolError() const;
-
static constexpr int64_t kResourceNotFound = -32002;
static constexpr int64_t kInternalError = -32603;
private:
std::string m_message;
- int64_t m_error_code;
+ int m_error_code;
};
class UnsupportedURI : public llvm::ErrorInfo<UnsupportedURI> {
diff --git a/lldb/include/lldb/Protocol/MCP/Protocol.h b/lldb/include/lldb/Protocol/MCP/Protocol.h
index 6e1ffcb..a0ba865 100644
--- a/lldb/include/lldb/Protocol/MCP/Protocol.h
+++ b/lldb/include/lldb/Protocol/MCP/Protocol.h
@@ -14,6 +14,7 @@
#ifndef LLDB_PROTOCOL_MCP_PROTOCOL_H
#define LLDB_PROTOCOL_MCP_PROTOCOL_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/JSON.h"
#include <optional>
#include <string>
@@ -322,6 +323,10 @@ struct CallToolResult {
llvm::json::Value toJSON(const CallToolResult &);
bool fromJSON(const llvm::json::Value &, CallToolResult &, llvm::json::Path);
+lldb_protocol::mcp::Request
+MakeRequest(int64_t id, llvm::StringRef method,
+ std::optional<llvm::json::Value> params);
+
} // namespace lldb_protocol::mcp
#endif
diff --git a/lldb/include/lldb/Protocol/MCP/Server.h b/lldb/include/lldb/Protocol/MCP/Server.h
index 970980d..f185d51 100644
--- a/lldb/include/lldb/Protocol/MCP/Server.h
+++ b/lldb/include/lldb/Protocol/MCP/Server.h
@@ -9,7 +9,6 @@
#ifndef LLDB_PROTOCOL_MCP_SERVER_H
#define LLDB_PROTOCOL_MCP_SERVER_H
-#include "lldb/Host/JSONTransport.h"
#include "lldb/Host/MainLoop.h"
#include "lldb/Protocol/MCP/Protocol.h"
#include "lldb/Protocol/MCP/Resource.h"
@@ -19,75 +18,66 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/Signals.h"
-#include <functional>
#include <memory>
#include <string>
#include <vector>
namespace lldb_protocol::mcp {
-class Server : public MCPTransport::MessageHandler {
- using ClosedCallback = llvm::unique_function<void()>;
+class Server {
+
+ using MCPTransportUP = std::unique_ptr<lldb_protocol::mcp::MCPTransport>;
+
+ using ReadHandleUP = lldb_private::MainLoop::ReadHandleUP;
public:
- Server(std::string name, std::string version, MCPTransport &client,
- LogCallback log_callback = {}, ClosedCallback closed_callback = {});
+ Server(std::string name, std::string version, LogCallback log_callback = {});
~Server() = default;
- using NotificationHandler = std::function<void(const Notification &)>;
-
void AddTool(std::unique_ptr<Tool> tool);
void AddResourceProvider(std::unique_ptr<ResourceProvider> resource_provider);
- void AddNotificationHandler(llvm::StringRef method,
- NotificationHandler handler);
-
-protected:
- ServerCapabilities GetCapabilities();
-
- using RequestHandler =
- std::function<llvm::Expected<Response>(const Request &)>;
- void AddRequestHandlers();
+ llvm::Error Accept(lldb_private::MainLoop &, MCPTransportUP);
- void AddRequestHandler(llvm::StringRef method, RequestHandler handler);
-
- llvm::Expected<std::optional<Message>> HandleData(llvm::StringRef data);
-
- llvm::Expected<Response> Handle(const Request &request);
- void Handle(const Notification &notification);
+protected:
+ MCPBinderUP Bind(MCPTransport &);
- llvm::Expected<Response> InitializeHandler(const Request &);
+ ServerCapabilities GetCapabilities();
- llvm::Expected<Response> ToolsListHandler(const Request &);
- llvm::Expected<Response> ToolsCallHandler(const Request &);
+ llvm::Expected<InitializeResult> InitializeHandler(const InitializeParams &);
- llvm::Expected<Response> ResourcesListHandler(const Request &);
- llvm::Expected<Response> ResourcesReadHandler(const Request &);
+ llvm::Expected<ListToolsResult> ToolsListHandler();
+ llvm::Expected<CallToolResult> ToolsCallHandler(const CallToolParams &);
- void Received(const Request &) override;
- void Received(const Response &) override;
- void Received(const Notification &) override;
- void OnError(llvm::Error) override;
- void OnClosed() override;
+ llvm::Expected<ListResourcesResult> ResourcesListHandler();
+ llvm::Expected<ReadResourceResult>
+ ResourcesReadHandler(const ReadResourceParams &);
-protected:
- void Log(llvm::StringRef);
+ template <typename... Ts> inline auto Logv(const char *Fmt, Ts &&...Vals) {
+ Log(llvm::formatv(Fmt, std::forward<Ts>(Vals)...).str());
+ }
+ void Log(llvm::StringRef message) {
+ if (m_log_callback)
+ m_log_callback(message);
+ }
private:
const std::string m_name;
const std::string m_version;
- MCPTransport &m_client;
LogCallback m_log_callback;
- ClosedCallback m_closed_callback;
+ struct Client {
+ ReadHandleUP handle;
+ MCPTransportUP transport;
+ MCPBinderUP binder;
+ };
+ std::map<MCPTransport *, Client> m_instances;
llvm::StringMap<std::unique_ptr<Tool>> m_tools;
std::vector<std::unique_ptr<ResourceProvider>> m_resource_providers;
-
- llvm::StringMap<RequestHandler> m_request_handlers;
- llvm::StringMap<NotificationHandler> m_notification_handlers;
};
class ServerInfoHandle;
@@ -121,7 +111,7 @@ public:
ServerInfoHandle &operator=(const ServerInfoHandle &) = delete;
/// @}
- /// Remove the file.
+ /// Remove the file on disk, if one is tracked.
void Remove();
private:
diff --git a/lldb/include/lldb/Protocol/MCP/Transport.h b/lldb/include/lldb/Protocol/MCP/Transport.h
index 47c2ccf..b7a1eb7 100644
--- a/lldb/include/lldb/Protocol/MCP/Transport.h
+++ b/lldb/include/lldb/Protocol/MCP/Transport.h
@@ -10,22 +10,78 @@
#define LLDB_PROTOCOL_MCP_TRANSPORT_H
#include "lldb/Host/JSONTransport.h"
+#include "lldb/Protocol/MCP/MCPError.h"
#include "lldb/Protocol/MCP/Protocol.h"
#include "lldb/lldb-forward.h"
#include "llvm/ADT/FunctionExtras.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+#include <sys/types.h>
namespace lldb_protocol::mcp {
+struct ProtocolDescriptor {
+ using Id = int64_t;
+ using Req = Request;
+ using Resp = Response;
+ using Evt = Notification;
+
+ static inline Id InitialId() { return 0; }
+ static inline Request Make(Id id, llvm::StringRef method,
+ std::optional<llvm::json::Value> params) {
+ return Request{id, method.str(), params};
+ }
+ static inline Notification Make(llvm::StringRef method,
+ std::optional<llvm::json::Value> params) {
+ return Notification{method.str(), params};
+ }
+ static inline Response Make(Req req, llvm::Error error) {
+ lldb_protocol::mcp::Error protocol_error;
+ llvm::handleAllErrors(
+ std::move(error), [&](const llvm::ErrorInfoBase &err) {
+ std::error_code cerr = err.convertToErrorCode();
+ protocol_error.code =
+ cerr == llvm::inconvertibleErrorCode()
+ ? lldb_protocol::mcp::eErrorCodeInternalError
+ : cerr.value();
+ protocol_error.message = err.message();
+ });
+
+ return Response{req.id, std::move(protocol_error)};
+ }
+ static inline Response Make(Req req,
+ std::optional<llvm::json::Value> result) {
+ return Response{req.id, std::move(result)};
+ }
+ static inline Id KeyFor(Response r) { return std::get<Id>(r.id); }
+ static inline std::string KeyFor(Request r) { return r.method; }
+ static inline std::string KeyFor(Notification n) { return n.method; }
+ static inline std::optional<llvm::json::Value> Extract(Request r) {
+ return r.params;
+ }
+ static inline llvm::Expected<llvm::json::Value> Extract(Response r) {
+ if (const lldb_protocol::mcp::Error *error =
+ std::get_if<lldb_protocol::mcp::Error>(&r.result))
+ return llvm::make_error<lldb_protocol::mcp::MCPError>(error->message,
+ error->code);
+ return std::get<llvm::json::Value>(r.result);
+ }
+ static inline std::optional<llvm::json::Value> Extract(Notification n) {
+ return n.params;
+ }
+};
+
/// Generic transport that uses the MCP protocol.
-using MCPTransport = lldb_private::Transport<Request, Response, Notification>;
+using MCPTransport = lldb_private::transport::JSONTransport<ProtocolDescriptor>;
+using MCPBinder = lldb_private::transport::Binder<ProtocolDescriptor>;
+using MCPBinderUP = std::unique_ptr<MCPBinder>;
/// Generic logging callback, to allow the MCP server / client / transport layer
/// to be independent of the lldb log implementation.
using LogCallback = llvm::unique_function<void(llvm::StringRef message)>;
class Transport final
- : public lldb_private::JSONRPCTransport<Request, Response, Notification> {
+ : public lldb_private::transport::JSONRPCTransport<ProtocolDescriptor> {
public:
Transport(lldb::IOObjectSP in, lldb::IOObjectSP out,
LogCallback log_callback = {});
diff --git a/lldb/include/lldb/Target/Language.h b/lldb/include/lldb/Target/Language.h
index 6f20a02..9958b6e 100644
--- a/lldb/include/lldb/Target/Language.h
+++ b/lldb/include/lldb/Target/Language.h
@@ -404,8 +404,15 @@ public:
GetLanguageTypeFromString(const char *string) = delete;
static lldb::LanguageType GetLanguageTypeFromString(llvm::StringRef string);
+ /// Returns the internal LLDB name for the specified language. When presenting
+ /// the language name to users, use \ref GetDisplayNameForLanguageType
+ /// instead.
static const char *GetNameForLanguageType(lldb::LanguageType language);
+ /// Returns a user-friendly name for the specified language.
+ static llvm::StringRef
+ GetDisplayNameForLanguageType(lldb::LanguageType language);
+
static void PrintAllLanguages(Stream &s, const char *prefix,
const char *suffix);
diff --git a/lldb/packages/Python/lldbsuite/test/dotest.py b/lldb/packages/Python/lldbsuite/test/dotest.py
index 2966ac0..e30d549 100644
--- a/lldb/packages/Python/lldbsuite/test/dotest.py
+++ b/lldb/packages/Python/lldbsuite/test/dotest.py
@@ -280,9 +280,6 @@ def parseOptionsAndInitTestdirs():
configuration.llvm_tools_dir = args.llvm_tools_dir
configuration.filecheck = shutil.which("FileCheck", path=args.llvm_tools_dir)
configuration.yaml2obj = shutil.which("yaml2obj", path=args.llvm_tools_dir)
- configuration.yaml2macho_core = shutil.which(
- "yaml2macho-core", path=args.llvm_tools_dir
- )
if not configuration.get_filecheck_path():
logging.warning("No valid FileCheck executable; some tests may fail...")
@@ -563,6 +560,8 @@ def setupSysPath():
if is_exe(lldbDAPExec):
os.environ["LLDBDAP_EXEC"] = lldbDAPExec
+ configuration.yaml2macho_core = shutil.which("yaml2macho-core", path=lldbDir)
+
lldbPythonDir = None # The directory that contains 'lldb/__init__.py'
# If our lldb supports the -P option, use it to find the python path:
diff --git a/lldb/source/Host/common/JSONTransport.cpp b/lldb/source/Host/common/JSONTransport.cpp
index c4b42ea..22de7fa 100644
--- a/lldb/source/Host/common/JSONTransport.cpp
+++ b/lldb/source/Host/common/JSONTransport.cpp
@@ -14,8 +14,7 @@
#include <string>
using namespace llvm;
-using namespace lldb;
-using namespace lldb_private;
+using namespace lldb_private::transport;
char TransportUnhandledContentsError::ID;
@@ -23,10 +22,31 @@ TransportUnhandledContentsError::TransportUnhandledContentsError(
std::string unhandled_contents)
: m_unhandled_contents(unhandled_contents) {}
-void TransportUnhandledContentsError::log(llvm::raw_ostream &OS) const {
+void TransportUnhandledContentsError::log(raw_ostream &OS) const {
OS << "transport EOF with unhandled contents: '" << m_unhandled_contents
<< "'";
}
std::error_code TransportUnhandledContentsError::convertToErrorCode() const {
return std::make_error_code(std::errc::bad_message);
}
+
+char InvalidParams::ID;
+
+void InvalidParams::log(raw_ostream &OS) const {
+ OS << "invalid parameters for method '" << m_method << "': '" << m_context
+ << "'";
+}
+std::error_code InvalidParams::convertToErrorCode() const {
+ return std::make_error_code(std::errc::invalid_argument);
+}
+
+char MethodNotFound::ID;
+
+void MethodNotFound::log(raw_ostream &OS) const {
+ OS << "method not found: '" << m_method << "'";
+}
+
+std::error_code MethodNotFound::convertToErrorCode() const {
+ // JSON-RPC Method not found
+ return std::error_code(MethodNotFound::kErrorCode, std::generic_category());
+}
diff --git a/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp b/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp
index d7293fc..33bdd5e 100644
--- a/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp
+++ b/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp
@@ -52,11 +52,6 @@ llvm::StringRef ProtocolServerMCP::GetPluginDescriptionStatic() {
}
void ProtocolServerMCP::Extend(lldb_protocol::mcp::Server &server) const {
- server.AddNotificationHandler("notifications/initialized",
- [](const lldb_protocol::mcp::Notification &) {
- LLDB_LOG(GetLog(LLDBLog::Host),
- "MCP initialization complete");
- });
server.AddTool(
std::make_unique<CommandTool>("command", "Run an lldb command."));
server.AddTool(std::make_unique<DebuggerListTool>(
@@ -74,26 +69,9 @@ void ProtocolServerMCP::AcceptCallback(std::unique_ptr<Socket> socket) {
io_sp, io_sp, [client_name](llvm::StringRef message) {
LLDB_LOG(GetLog(LLDBLog::Host), "{0}: {1}", client_name, message);
});
- MCPTransport *transport_ptr = transport_up.get();
- auto instance_up = std::make_unique<lldb_protocol::mcp::Server>(
- std::string(kName), std::string(kVersion), *transport_up,
- /*log_callback=*/
- [client_name](llvm::StringRef message) {
- LLDB_LOG(GetLog(LLDBLog::Host), "{0} Server: {1}", client_name,
- message);
- },
- /*closed_callback=*/
- [this, transport_ptr]() { m_instances.erase(transport_ptr); });
- Extend(*instance_up);
- llvm::Expected<MainLoop::ReadHandleUP> handle =
- transport_up->RegisterMessageHandler(m_loop, *instance_up);
- if (!handle) {
- LLDB_LOG_ERROR(log, handle.takeError(), "Failed to run MCP server: {0}");
- return;
- }
- m_instances[transport_ptr] =
- std::make_tuple<ServerUP, ReadHandleUP, TransportUP>(
- std::move(instance_up), std::move(*handle), std::move(transport_up));
+
+ if (auto error = m_server->Accept(m_loop, std::move(transport_up)))
+ LLDB_LOG_ERROR(log, std::move(error), "{0}:");
}
llvm::Error ProtocolServerMCP::Start(ProtocolServer::Connection connection) {
@@ -124,14 +102,21 @@ llvm::Error ProtocolServerMCP::Start(ProtocolServer::Connection connection) {
llvm::join(m_listener->GetListeningConnectionURI(), ", ");
ServerInfo info{listening_uris[0]};
- llvm::Expected<ServerInfoHandle> handle = ServerInfo::Write(info);
- if (!handle)
- return handle.takeError();
+ llvm::Expected<ServerInfoHandle> server_info_handle = ServerInfo::Write(info);
+ if (!server_info_handle)
+ return server_info_handle.takeError();
+
+ m_client_count = 0;
+ m_server = std::make_unique<lldb_protocol::mcp::Server>(
+ std::string(kName), std::string(kVersion), [](StringRef message) {
+ LLDB_LOG(GetLog(LLDBLog::Host), "MCP Server: {0}", message);
+ });
+ Extend(*m_server);
m_running = true;
- m_server_info_handle = std::move(*handle);
- m_listen_handlers = std::move(*handles);
- m_loop_thread = std::thread([=] {
+ m_server_info_handle = std::move(*server_info_handle);
+ m_accept_handles = std::move(*handles);
+ m_loop_thread = std::thread([this] {
llvm::set_thread_name("protocol-server.mcp");
m_loop.Run();
});
@@ -155,9 +140,10 @@ llvm::Error ProtocolServerMCP::Stop() {
if (m_loop_thread.joinable())
m_loop_thread.join();
+ m_accept_handles.clear();
+
+ m_server.reset(nullptr);
m_server_info_handle.Remove();
- m_listen_handlers.clear();
- m_instances.clear();
return llvm::Error::success();
}
diff --git a/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.h b/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.h
index b325a36..e0f2a6c 100644
--- a/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.h
+++ b/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.h
@@ -23,16 +23,17 @@
namespace lldb_private::mcp {
class ProtocolServerMCP : public ProtocolServer {
- using ReadHandleUP = MainLoopBase::ReadHandleUP;
- using TransportUP = std::unique_ptr<lldb_protocol::mcp::MCPTransport>;
+
using ServerUP = std::unique_ptr<lldb_protocol::mcp::Server>;
+ using ReadHandleUP = MainLoop::ReadHandleUP;
+
public:
ProtocolServerMCP();
- virtual ~ProtocolServerMCP() override;
+ ~ProtocolServerMCP() override;
- virtual llvm::Error Start(ProtocolServer::Connection connection) override;
- virtual llvm::Error Stop() override;
+ llvm::Error Start(ProtocolServer::Connection connection) override;
+ llvm::Error Stop() override;
static void Initialize();
static void Terminate();
@@ -56,19 +57,18 @@ private:
bool m_running = false;
- lldb_protocol::mcp::ServerInfoHandle m_server_info_handle;
lldb_private::MainLoop m_loop;
std::thread m_loop_thread;
std::mutex m_mutex;
size_t m_client_count = 0;
std::unique_ptr<Socket> m_listener;
+ std::vector<ReadHandleUP> m_accept_handles;
- std::vector<ReadHandleUP> m_listen_handlers;
- std::map<lldb_protocol::mcp::MCPTransport *,
- std::tuple<ServerUP, ReadHandleUP, TransportUP>>
- m_instances;
+ ServerUP m_server;
+ lldb_protocol::mcp::ServerInfoHandle m_server_info_handle;
};
+
} // namespace lldb_private::mcp
#endif
diff --git a/lldb/source/Protocol/MCP/MCPError.cpp b/lldb/source/Protocol/MCP/MCPError.cpp
index e140d11..cfac055 100644
--- a/lldb/source/Protocol/MCP/MCPError.cpp
+++ b/lldb/source/Protocol/MCP/MCPError.cpp
@@ -22,14 +22,7 @@ MCPError::MCPError(std::string message, int64_t error_code)
void MCPError::log(llvm::raw_ostream &OS) const { OS << m_message; }
std::error_code MCPError::convertToErrorCode() const {
- return llvm::inconvertibleErrorCode();
-}
-
-lldb_protocol::mcp::Error MCPError::toProtocolError() const {
- lldb_protocol::mcp::Error error;
- error.code = m_error_code;
- error.message = m_message;
- return error;
+ return std::error_code(m_error_code, std::generic_category());
}
UnsupportedURI::UnsupportedURI(std::string uri) : m_uri(uri) {}
diff --git a/lldb/source/Protocol/MCP/Server.cpp b/lldb/source/Protocol/MCP/Server.cpp
index 19030a3..71323ad 100644
--- a/lldb/source/Protocol/MCP/Server.cpp
+++ b/lldb/source/Protocol/MCP/Server.cpp
@@ -12,6 +12,7 @@
#include "lldb/Host/HostInfo.h"
#include "lldb/Protocol/MCP/MCPError.h"
#include "lldb/Protocol/MCP/Protocol.h"
+#include "lldb/Protocol/MCP/Transport.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/JSON.h"
@@ -108,48 +109,9 @@ Expected<std::vector<ServerInfo>> ServerInfo::Load() {
return infos;
}
-Server::Server(std::string name, std::string version, MCPTransport &client,
- LogCallback log_callback, ClosedCallback closed_callback)
- : m_name(std::move(name)), m_version(std::move(version)), m_client(client),
- m_log_callback(std::move(log_callback)),
- m_closed_callback(std::move(closed_callback)) {
- AddRequestHandlers();
-}
-
-void Server::AddRequestHandlers() {
- AddRequestHandler("initialize", std::bind(&Server::InitializeHandler, this,
- std::placeholders::_1));
- AddRequestHandler("tools/list", std::bind(&Server::ToolsListHandler, this,
- std::placeholders::_1));
- AddRequestHandler("tools/call", std::bind(&Server::ToolsCallHandler, this,
- std::placeholders::_1));
- AddRequestHandler("resources/list", std::bind(&Server::ResourcesListHandler,
- this, std::placeholders::_1));
- AddRequestHandler("resources/read", std::bind(&Server::ResourcesReadHandler,
- this, std::placeholders::_1));
-}
-
-llvm::Expected<Response> Server::Handle(const Request &request) {
- auto it = m_request_handlers.find(request.method);
- if (it != m_request_handlers.end()) {
- llvm::Expected<Response> response = it->second(request);
- if (!response)
- return response;
- response->id = request.id;
- return *response;
- }
-
- return llvm::make_error<MCPError>(
- llvm::formatv("no handler for request: {0}", request.method).str());
-}
-
-void Server::Handle(const Notification &notification) {
- auto it = m_notification_handlers.find(notification.method);
- if (it != m_notification_handlers.end()) {
- it->second(notification);
- return;
- }
-}
+Server::Server(std::string name, std::string version, LogCallback log_callback)
+ : m_name(std::move(name)), m_version(std::move(version)),
+ m_log_callback(std::move(log_callback)) {}
void Server::AddTool(std::unique_ptr<Tool> tool) {
if (!tool)
@@ -164,48 +126,64 @@ void Server::AddResourceProvider(
m_resource_providers.push_back(std::move(resource_provider));
}
-void Server::AddRequestHandler(llvm::StringRef method, RequestHandler handler) {
- m_request_handlers[method] = std::move(handler);
-}
-
-void Server::AddNotificationHandler(llvm::StringRef method,
- NotificationHandler handler) {
- m_notification_handlers[method] = std::move(handler);
-}
-
-llvm::Expected<Response> Server::InitializeHandler(const Request &request) {
- Response response;
+MCPBinderUP Server::Bind(MCPTransport &transport) {
+ MCPBinderUP binder_up = std::make_unique<MCPBinder>(transport);
+ binder_up->Bind<InitializeResult, InitializeParams>(
+ "initialize", &Server::InitializeHandler, this);
+ binder_up->Bind<ListToolsResult, void>("tools/list",
+ &Server::ToolsListHandler, this);
+ binder_up->Bind<CallToolResult, CallToolParams>(
+ "tools/call", &Server::ToolsCallHandler, this);
+ binder_up->Bind<ListResourcesResult, void>(
+ "resources/list", &Server::ResourcesListHandler, this);
+ binder_up->Bind<ReadResourceResult, ReadResourceParams>(
+ "resources/read", &Server::ResourcesReadHandler, this);
+ binder_up->Bind<void>("notifications/initialized",
+ [this]() { Log("MCP initialization complete"); });
+ return binder_up;
+}
+
+llvm::Error Server::Accept(MainLoop &loop, MCPTransportUP transport) {
+ MCPBinderUP binder = Bind(*transport);
+ MCPTransport *transport_ptr = transport.get();
+ binder->OnDisconnect([this, transport_ptr]() {
+ assert(m_instances.find(transport_ptr) != m_instances.end() &&
+ "Client not found in m_instances");
+ m_instances.erase(transport_ptr);
+ });
+ binder->OnError([this](llvm::Error err) {
+ Logv("Transport error: {0}", llvm::toString(std::move(err)));
+ });
+
+ auto handle = transport->RegisterMessageHandler(loop, *binder);
+ if (!handle)
+ return handle.takeError();
+
+ m_instances[transport_ptr] =
+ Client{std::move(*handle), std::move(transport), std::move(binder)};
+ return llvm::Error::success();
+}
+
+Expected<InitializeResult>
+Server::InitializeHandler(const InitializeParams &request) {
InitializeResult result;
result.protocolVersion = mcp::kProtocolVersion;
result.capabilities = GetCapabilities();
result.serverInfo.name = m_name;
result.serverInfo.version = m_version;
- response.result = std::move(result);
- return response;
+ return result;
}
-llvm::Expected<Response> Server::ToolsListHandler(const Request &request) {
- Response response;
-
+llvm::Expected<ListToolsResult> Server::ToolsListHandler() {
ListToolsResult result;
for (const auto &tool : m_tools)
result.tools.emplace_back(tool.second->GetDefinition());
- response.result = std::move(result);
-
- return response;
+ return result;
}
-llvm::Expected<Response> Server::ToolsCallHandler(const Request &request) {
- Response response;
-
- if (!request.params)
- return llvm::createStringError("no tool parameters");
- CallToolParams params;
- json::Path::Root root("params");
- if (!fromJSON(request.params, params, root))
- return root.getError();
-
+llvm::Expected<CallToolResult>
+Server::ToolsCallHandler(const CallToolParams &params) {
llvm::StringRef tool_name = params.name;
if (tool_name.empty())
return llvm::createStringError("no tool name");
@@ -222,113 +200,50 @@ llvm::Expected<Response> Server::ToolsCallHandler(const Request &request) {
if (!text_result)
return text_result.takeError();
- response.result = toJSON(*text_result);
-
- return response;
+ return text_result;
}
-llvm::Expected<Response> Server::ResourcesListHandler(const Request &request) {
- Response response;
-
+llvm::Expected<ListResourcesResult> Server::ResourcesListHandler() {
ListResourcesResult result;
for (std::unique_ptr<ResourceProvider> &resource_provider_up :
m_resource_providers)
for (const Resource &resource : resource_provider_up->GetResources())
result.resources.push_back(resource);
- response.result = std::move(result);
-
- return response;
+ return result;
}
-llvm::Expected<Response> Server::ResourcesReadHandler(const Request &request) {
- Response response;
-
- if (!request.params)
- return llvm::createStringError("no resource parameters");
-
- ReadResourceParams params;
- json::Path::Root root("params");
- if (!fromJSON(request.params, params, root))
- return root.getError();
-
- llvm::StringRef uri_str = params.uri;
+Expected<ReadResourceResult>
+Server::ResourcesReadHandler(const ReadResourceParams &params) {
+ StringRef uri_str = params.uri;
if (uri_str.empty())
- return llvm::createStringError("no resource uri");
+ return createStringError("no resource uri");
for (std::unique_ptr<ResourceProvider> &resource_provider_up :
m_resource_providers) {
- llvm::Expected<ReadResourceResult> result =
+ Expected<ReadResourceResult> result =
resource_provider_up->ReadResource(uri_str);
if (result.errorIsA<UnsupportedURI>()) {
- llvm::consumeError(result.takeError());
+ consumeError(result.takeError());
continue;
}
if (!result)
return result.takeError();
- Response response;
- response.result = std::move(*result);
- return response;
+ return *result;
}
return make_error<MCPError>(
- llvm::formatv("no resource handler for uri: {0}", uri_str).str(),
+ formatv("no resource handler for uri: {0}", uri_str).str(),
MCPError::kResourceNotFound);
}
ServerCapabilities Server::GetCapabilities() {
lldb_protocol::mcp::ServerCapabilities capabilities;
capabilities.supportsToolsList = true;
+ capabilities.supportsResourcesList = true;
// FIXME: Support sending notifications when a debugger/target are
// added/removed.
- capabilities.supportsResourcesList = false;
+ capabilities.supportsResourcesSubscribe = false;
return capabilities;
}
-
-void Server::Log(llvm::StringRef message) {
- if (m_log_callback)
- m_log_callback(message);
-}
-
-void Server::Received(const Request &request) {
- auto SendResponse = [this](const Response &response) {
- if (llvm::Error error = m_client.Send(response))
- Log(llvm::toString(std::move(error)));
- };
-
- llvm::Expected<Response> response = Handle(request);
- if (response)
- return SendResponse(*response);
-
- lldb_protocol::mcp::Error protocol_error;
- llvm::handleAllErrors(
- response.takeError(),
- [&](const MCPError &err) { protocol_error = err.toProtocolError(); },
- [&](const llvm::ErrorInfoBase &err) {
- protocol_error.code = MCPError::kInternalError;
- protocol_error.message = err.message();
- });
- Response error_response;
- error_response.id = request.id;
- error_response.result = std::move(protocol_error);
- SendResponse(error_response);
-}
-
-void Server::Received(const Response &response) {
- Log("unexpected MCP message: response");
-}
-
-void Server::Received(const Notification &notification) {
- Handle(notification);
-}
-
-void Server::OnError(llvm::Error error) {
- Log(llvm::toString(std::move(error)));
-}
-
-void Server::OnClosed() {
- Log("EOF");
- if (m_closed_callback)
- m_closed_callback();
-}
diff --git a/lldb/source/Target/Language.cpp b/lldb/source/Target/Language.cpp
index d4a9268..395718e 100644
--- a/lldb/source/Target/Language.cpp
+++ b/lldb/source/Target/Language.cpp
@@ -271,6 +271,10 @@ const char *Language::GetNameForLanguageType(LanguageType language) {
return language_names[eLanguageTypeUnknown].name;
}
+llvm::StringRef Language::GetDisplayNameForLanguageType(LanguageType language) {
+ return SourceLanguage(language).GetDescription();
+}
+
void Language::PrintSupportedLanguagesForExpressions(Stream &s,
llvm::StringRef prefix,
llvm::StringRef suffix) {
@@ -543,9 +547,26 @@ Language::Language() = default;
// Destructor
Language::~Language() = default;
+static std::optional<llvm::dwarf::SourceLanguage>
+ToDwarfSourceLanguage(lldb::LanguageType language_type) {
+ if (language_type < lldb::eLanguageTypeLastStandardLanguage)
+ return static_cast<llvm::dwarf::SourceLanguage>(language_type);
+
+ switch (language_type) {
+ case eLanguageTypeMipsAssembler:
+ return llvm::dwarf::DW_LANG_Mips_Assembler;
+ default:
+ return std::nullopt;
+ }
+}
+
SourceLanguage::SourceLanguage(lldb::LanguageType language_type) {
- auto lname =
- llvm::dwarf::toDW_LNAME((llvm::dwarf::SourceLanguage)language_type);
+ std::optional<llvm::dwarf::SourceLanguage> dwarf_lang =
+ ToDwarfSourceLanguage(language_type);
+ if (!dwarf_lang)
+ return;
+
+ auto lname = llvm::dwarf::toDW_LNAME(*dwarf_lang);
if (!lname)
return;
name = lname->first;
@@ -560,11 +581,8 @@ lldb::LanguageType SourceLanguage::AsLanguageType() const {
}
llvm::StringRef SourceLanguage::GetDescription() const {
- LanguageType type = AsLanguageType();
- if (type)
- return Language::GetNameForLanguageType(type);
return llvm::dwarf::LanguageDescription(
- (llvm::dwarf::SourceLanguageName)name);
+ static_cast<llvm::dwarf::SourceLanguageName>(name));
}
bool SourceLanguage::IsC() const { return name == llvm::dwarf::DW_LNAME_C; }
diff --git a/lldb/test/API/tools/lldb-server/TestLldbGdbServer.py b/lldb/test/API/tools/lldb-server/TestLldbGdbServer.py
index c01f6d8..f1c0519 100644
--- a/lldb/test/API/tools/lldb-server/TestLldbGdbServer.py
+++ b/lldb/test/API/tools/lldb-server/TestLldbGdbServer.py
@@ -22,7 +22,9 @@ from lldbsuite.test.lldbtest import *
from lldbsuite.test.lldbdwarf import *
from lldbsuite.test import lldbutil, lldbplatformutil
-
+# On Linux systems with Yama ptrace_scope = 1 there is a race condition when the
+# debugee enables tracing. See https://github.com/llvm/llvm-project/issues/161510.
+@skipIfLinux
class LldbGdbServerTestCase(
gdbremote_testcase.GdbRemoteTestCaseBase, DwarfOpcodeParser
):
diff --git a/lldb/tools/lldb-dap/DAP.h b/lldb/tools/lldb-dap/DAP.h
index 71681fd..a90ddf5 100644
--- a/lldb/tools/lldb-dap/DAP.h
+++ b/lldb/tools/lldb-dap/DAP.h
@@ -78,11 +78,9 @@ enum DAPBroadcasterBits {
enum class ReplMode { Variable = 0, Command, Auto };
-using DAPTransport =
- lldb_private::Transport<protocol::Request, protocol::Response,
- protocol::Event>;
+using DAPTransport = lldb_private::transport::JSONTransport<ProtocolDescriptor>;
-struct DAP final : private DAPTransport::MessageHandler {
+struct DAP final : public DAPTransport::MessageHandler {
/// Path to the lldb-dap binary itself.
static llvm::StringRef debug_adapter_path;
diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolBase.h b/lldb/tools/lldb-dap/Protocol/ProtocolBase.h
index 0a9ef53..92e41b1 100644
--- a/lldb/tools/lldb-dap/Protocol/ProtocolBase.h
+++ b/lldb/tools/lldb-dap/Protocol/ProtocolBase.h
@@ -30,6 +30,8 @@ namespace lldb_dap::protocol {
// MARK: Base Protocol
+using Id = int64_t;
+
/// A client or debug adapter initiated request.
struct Request {
/// Sequence number of the message (also known as message ID). The `seq` for
@@ -39,7 +41,7 @@ struct Request {
/// associate requests with their corresponding responses. For protocol
/// messages of type `request` the sequence number can be used to cancel the
/// request.
- int64_t seq;
+ Id seq;
/// The command to execute.
std::string command;
@@ -76,7 +78,7 @@ enum ResponseMessage : unsigned {
/// Response for a request.
struct Response {
/// Sequence number of the corresponding request.
- int64_t request_seq;
+ Id request_seq;
/// The command requested.
std::string command;
diff --git a/lldb/tools/lldb-dap/Transport.h b/lldb/tools/lldb-dap/Transport.h
index 4a9dd76..58c48c1 100644
--- a/lldb/tools/lldb-dap/Transport.h
+++ b/lldb/tools/lldb-dap/Transport.h
@@ -22,11 +22,18 @@
namespace lldb_dap {
+struct ProtocolDescriptor {
+ using Id = protocol::Id;
+ using Req = protocol::Request;
+ using Resp = protocol::Response;
+ using Evt = protocol::Event;
+};
+
/// A transport class that performs the Debug Adapter Protocol communication
/// with the client.
class Transport final
- : public lldb_private::HTTPDelimitedJSONTransport<
- protocol::Request, protocol::Response, protocol::Event> {
+ : public lldb_private::transport::HTTPDelimitedJSONTransport<
+ ProtocolDescriptor> {
public:
Transport(llvm::StringRef client_name, lldb_dap::Log *log,
lldb::IOObjectSP input, lldb::IOObjectSP output);
diff --git a/lldb/unittests/DAP/DAPTest.cpp b/lldb/unittests/DAP/DAPTest.cpp
index 2090fe6..4fd6cd5 100644
--- a/lldb/unittests/DAP/DAPTest.cpp
+++ b/lldb/unittests/DAP/DAPTest.cpp
@@ -9,13 +9,10 @@
#include "DAP.h"
#include "Protocol/ProtocolBase.h"
#include "TestBase.h"
-#include "llvm/Testing/Support/Error.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include <optional>
-using namespace llvm;
-using namespace lldb;
using namespace lldb_dap;
using namespace lldb_dap_tests;
using namespace lldb_dap::protocol;
@@ -24,18 +21,7 @@ using namespace testing;
class DAPTest : public TransportBase {};
TEST_F(DAPTest, SendProtocolMessages) {
- DAP dap{
- /*log=*/nullptr,
- /*default_repl_mode=*/ReplMode::Auto,
- /*pre_init_commands=*/{},
- /*no_lldbinit=*/false,
- /*client_name=*/"test_client",
- /*transport=*/*transport,
- /*loop=*/loop,
- };
- dap.Send(Event{/*event=*/"my-event", /*body=*/std::nullopt});
- loop.AddPendingCallback(
- [](lldb_private::MainLoopBase &loop) { loop.RequestTermination(); });
- EXPECT_CALL(client, Received(IsEvent("my-event", std::nullopt)));
- ASSERT_THAT_ERROR(dap.Loop(), llvm::Succeeded());
+ dap->Send(Event{/*event=*/"my-event", /*body=*/std::nullopt});
+ EXPECT_CALL(client, Received(IsEvent("my-event")));
+ Run();
}
diff --git a/lldb/unittests/DAP/Handler/DisconnectTest.cpp b/lldb/unittests/DAP/Handler/DisconnectTest.cpp
index c6ff1f9..88d6e9a 100644
--- a/lldb/unittests/DAP/Handler/DisconnectTest.cpp
+++ b/lldb/unittests/DAP/Handler/DisconnectTest.cpp
@@ -31,7 +31,7 @@ TEST_F(DisconnectRequestHandlerTest, DisconnectTriggersTerminated) {
DisconnectRequestHandler handler(*dap);
ASSERT_THAT_ERROR(handler.Run(std::nullopt), Succeeded());
EXPECT_CALL(client, Received(IsEvent("terminated", _)));
- RunOnce();
+ Run();
}
TEST_F(DisconnectRequestHandlerTest, DisconnectTriggersTerminateCommands) {
@@ -53,5 +53,5 @@ TEST_F(DisconnectRequestHandlerTest, DisconnectTriggersTerminateCommands) {
EXPECT_CALL(client, Received(Output("(lldb) script print(2)\n")));
EXPECT_CALL(client, Received(Output("Running terminateCommands:\n")));
EXPECT_CALL(client, Received(IsEvent("terminated", _)));
- RunOnce();
+ Run();
}
diff --git a/lldb/unittests/DAP/TestBase.cpp b/lldb/unittests/DAP/TestBase.cpp
index ba7baf2..3721e09 100644
--- a/lldb/unittests/DAP/TestBase.cpp
+++ b/lldb/unittests/DAP/TestBase.cpp
@@ -32,23 +32,9 @@ using lldb_private::FileSystem;
using lldb_private::MainLoop;
using lldb_private::Pipe;
-Expected<MainLoop::ReadHandleUP>
-TestTransport::RegisterMessageHandler(MainLoop &loop, MessageHandler &handler) {
- Expected<lldb::FileUP> dummy_file = FileSystem::Instance().Open(
- FileSpec(FileSystem::DEV_NULL), File::eOpenOptionReadWrite);
- if (!dummy_file)
- return dummy_file.takeError();
- m_dummy_file = std::move(*dummy_file);
- lldb_private::Status status;
- auto handle = loop.RegisterReadObject(
- m_dummy_file, [](lldb_private::MainLoopBase &) {}, status);
- if (status.Fail())
- return status.takeError();
- return handle;
-}
+void TransportBase::SetUp() {
+ std::tie(to_client, to_server) = TestDAPTransport::createPair();
-void DAPTestBase::SetUp() {
- TransportBase::SetUp();
std::error_code EC;
log = std::make_unique<Log>("-", EC);
dap = std::make_unique<DAP>(
@@ -57,16 +43,30 @@ void DAPTestBase::SetUp() {
/*pre_init_commands=*/std::vector<std::string>(),
/*no_lldbinit=*/false,
/*client_name=*/"test_client",
- /*transport=*/*transport, /*loop=*/loop);
+ /*transport=*/*to_client, /*loop=*/loop);
+
+ auto server_handle = to_server->RegisterMessageHandler(loop, *dap.get());
+ EXPECT_THAT_EXPECTED(server_handle, Succeeded());
+ handles[0] = std::move(*server_handle);
+
+ auto client_handle = to_client->RegisterMessageHandler(loop, client);
+ EXPECT_THAT_EXPECTED(client_handle, Succeeded());
+ handles[1] = std::move(*client_handle);
}
+void TransportBase::Run() {
+ loop.AddPendingCallback(
+ [](lldb_private::MainLoopBase &loop) { loop.RequestTermination(); });
+ EXPECT_THAT_ERROR(loop.Run().takeError(), llvm::Succeeded());
+}
+
+void DAPTestBase::SetUp() { TransportBase::SetUp(); }
+
void DAPTestBase::TearDown() {
- if (core) {
+ if (core)
ASSERT_THAT_ERROR(core->discard(), Succeeded());
- }
- if (binary) {
+ if (binary)
ASSERT_THAT_ERROR(binary->discard(), Succeeded());
- }
}
void DAPTestBase::SetUpTestSuite() {
diff --git a/lldb/unittests/DAP/TestBase.h b/lldb/unittests/DAP/TestBase.h
index c19eead..c32f3a7 100644
--- a/lldb/unittests/DAP/TestBase.h
+++ b/lldb/unittests/DAP/TestBase.h
@@ -7,73 +7,48 @@
//===----------------------------------------------------------------------===//
#include "DAP.h"
+#include "DAPLog.h"
#include "Protocol/ProtocolBase.h"
#include "TestingSupport/Host/JSONTransportTestUtilities.h"
#include "TestingSupport/SubsystemRAII.h"
+#include "Transport.h"
#include "lldb/Host/FileSystem.h"
#include "lldb/Host/HostInfo.h"
#include "lldb/Host/MainLoop.h"
#include "lldb/Host/MainLoopBase.h"
-#include "lldb/lldb-forward.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/JSON.h"
-#include "llvm/Testing/Support/Error.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include <memory>
+#include <optional>
+
+/// Helpers for gtest printing.
+namespace lldb_dap::protocol {
+
+inline void PrintTo(const Request &req, std::ostream *os) {
+ *os << llvm::formatv("{0}", toJSON(req)).str();
+}
+
+inline void PrintTo(const Response &resp, std::ostream *os) {
+ *os << llvm::formatv("{0}", toJSON(resp)).str();
+}
+
+inline void PrintTo(const Event &evt, std::ostream *os) {
+ *os << llvm::formatv("{0}", toJSON(evt)).str();
+}
+
+inline void PrintTo(const Message &message, std::ostream *os) {
+ return std::visit([os](auto &&message) { return PrintTo(message, os); },
+ message);
+}
+
+} // namespace lldb_dap::protocol
namespace lldb_dap_tests {
-class TestTransport final
- : public lldb_private::Transport<lldb_dap::protocol::Request,
- lldb_dap::protocol::Response,
- lldb_dap::protocol::Event> {
-public:
- using Message = lldb_private::Transport<lldb_dap::protocol::Request,
- lldb_dap::protocol::Response,
- lldb_dap::protocol::Event>::Message;
-
- TestTransport(lldb_private::MainLoop &loop, MessageHandler &handler)
- : m_loop(loop), m_handler(handler) {}
-
- llvm::Error Send(const lldb_dap::protocol::Event &e) override {
- m_loop.AddPendingCallback([this, e](lldb_private::MainLoopBase &) {
- this->m_handler.Received(e);
- });
- return llvm::Error::success();
- }
-
- llvm::Error Send(const lldb_dap::protocol::Request &r) override {
- m_loop.AddPendingCallback([this, r](lldb_private::MainLoopBase &) {
- this->m_handler.Received(r);
- });
- return llvm::Error::success();
- }
-
- llvm::Error Send(const lldb_dap::protocol::Response &r) override {
- m_loop.AddPendingCallback([this, r](lldb_private::MainLoopBase &) {
- this->m_handler.Received(r);
- });
- return llvm::Error::success();
- }
-
- llvm::Expected<lldb_private::MainLoop::ReadHandleUP>
- RegisterMessageHandler(lldb_private::MainLoop &loop,
- MessageHandler &handler) override;
-
- void Log(llvm::StringRef message) override {
- log_messages.emplace_back(message);
- }
-
- std::vector<std::string> log_messages;
-
-private:
- lldb_private::MainLoop &m_loop;
- MessageHandler &m_handler;
- lldb::FileSP m_dummy_file;
-};
+using TestDAPTransport = TestTransport<lldb_dap::ProtocolDescriptor>;
/// A base class for tests that need transport configured for communicating DAP
/// messages.
@@ -82,22 +57,36 @@ protected:
lldb_private::SubsystemRAII<lldb_private::FileSystem, lldb_private::HostInfo>
subsystems;
lldb_private::MainLoop loop;
- std::unique_ptr<TestTransport> transport;
- MockMessageHandler<lldb_dap::protocol::Request, lldb_dap::protocol::Response,
- lldb_dap::protocol::Event>
- client;
-
- void SetUp() override {
- transport = std::make_unique<TestTransport>(loop, client);
- }
+ lldb_private::MainLoop::ReadHandleUP handles[2];
+
+ std::unique_ptr<lldb_dap::Log> log;
+
+ std::unique_ptr<TestDAPTransport> to_client;
+ MockMessageHandler<lldb_dap::ProtocolDescriptor> client;
+
+ std::unique_ptr<TestDAPTransport> to_server;
+ std::unique_ptr<lldb_dap::DAP> dap;
+
+ void SetUp() override;
+
+ void Run();
};
/// A matcher for a DAP event.
-template <typename M1, typename M2>
+template <typename EventMatcher, typename BodyMatcher>
inline testing::Matcher<const lldb_dap::protocol::Event &>
-IsEvent(const M1 &m1, const M2 &m2) {
- return testing::AllOf(testing::Field(&lldb_dap::protocol::Event::event, m1),
- testing::Field(&lldb_dap::protocol::Event::body, m2));
+IsEvent(const EventMatcher &event_matcher, const BodyMatcher &body_matcher) {
+ return testing::AllOf(
+ testing::Field(&lldb_dap::protocol::Event::event, event_matcher),
+ testing::Field(&lldb_dap::protocol::Event::body, body_matcher));
+}
+
+template <typename EventMatcher>
+inline testing::Matcher<const lldb_dap::protocol::Event &>
+IsEvent(const EventMatcher &event_matcher) {
+ return testing::AllOf(
+ testing::Field(&lldb_dap::protocol::Event::event, event_matcher),
+ testing::Field(&lldb_dap::protocol::Event::body, std::nullopt));
}
/// Matches an "output" event.
@@ -110,8 +99,6 @@ inline auto Output(llvm::StringRef o, llvm::StringRef cat = "console") {
/// A base class for tests that interact with a `lldb_dap::DAP` instance.
class DAPTestBase : public TransportBase {
protected:
- std::unique_ptr<lldb_dap::Log> log;
- std::unique_ptr<lldb_dap::DAP> dap;
std::optional<llvm::sys::fs::TempFile> core;
std::optional<llvm::sys::fs::TempFile> binary;
@@ -126,12 +113,6 @@ protected:
bool GetDebuggerSupportsTarget(llvm::StringRef platform);
void CreateDebugger();
void LoadCore();
-
- void RunOnce() {
- loop.AddPendingCallback(
- [](lldb_private::MainLoopBase &loop) { loop.RequestTermination(); });
- ASSERT_THAT_ERROR(dap->Loop(), llvm::Succeeded());
- }
};
} // namespace lldb_dap_tests
diff --git a/lldb/unittests/Host/JSONTransportTest.cpp b/lldb/unittests/Host/JSONTransportTest.cpp
index 3a36bf2..7db6508 100644
--- a/lldb/unittests/Host/JSONTransportTest.cpp
+++ b/lldb/unittests/Host/JSONTransportTest.cpp
@@ -9,6 +9,7 @@
#include "lldb/Host/JSONTransport.h"
#include "TestingSupport/Host/JSONTransportTestUtilities.h"
#include "TestingSupport/Host/PipeTestUtilities.h"
+#include "TestingSupport/SubsystemRAII.h"
#include "lldb/Host/File.h"
#include "lldb/Host/MainLoop.h"
#include "lldb/Host/MainLoopBase.h"
@@ -25,27 +26,45 @@
#include <chrono>
#include <cstddef>
#include <memory>
+#include <optional>
#include <string>
+#include <system_error>
using namespace llvm;
using namespace lldb_private;
+using namespace lldb_private::transport;
using testing::_;
using testing::HasSubstr;
using testing::InSequence;
+using testing::Ref;
+
+namespace llvm::json {
+static bool fromJSON(const Value &V, Value &T, Path P) {
+ T = V;
+ return true;
+}
+} // namespace llvm::json
namespace {
namespace test_protocol {
struct Req {
+ int id = 0;
std::string name;
+ std::optional<json::Value> params;
};
-json::Value toJSON(const Req &T) { return json::Object{{"req", T.name}}; }
+json::Value toJSON(const Req &T) {
+ return json::Object{{"name", T.name}, {"id", T.id}, {"params", T.params}};
+}
bool fromJSON(const json::Value &V, Req &T, json::Path P) {
json::ObjectMapper O(V, P);
- return O && O.map("req", T.name);
+ return O && O.map("name", T.name) && O.map("id", T.id) &&
+ O.map("params", T.params);
+}
+bool operator==(const Req &a, const Req &b) {
+ return a.name == b.name && a.id == b.id && a.params == b.params;
}
-bool operator==(const Req &a, const Req &b) { return a.name == b.name; }
inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Req &V) {
OS << toJSON(V);
return OS;
@@ -58,14 +77,22 @@ void PrintTo(const Req &message, std::ostream *os) {
}
struct Resp {
- std::string name;
+ int id = 0;
+ int errorCode = 0;
+ std::optional<json::Value> result;
};
-json::Value toJSON(const Resp &T) { return json::Object{{"resp", T.name}}; }
+json::Value toJSON(const Resp &T) {
+ return json::Object{
+ {"id", T.id}, {"errorCode", T.errorCode}, {"result", T.result}};
+}
bool fromJSON(const json::Value &V, Resp &T, json::Path P) {
json::ObjectMapper O(V, P);
- return O && O.map("resp", T.name);
+ return O && O.map("id", T.id) && O.mapOptional("errorCode", T.errorCode) &&
+ O.map("result", T.result);
+}
+bool operator==(const Resp &a, const Resp &b) {
+ return a.id == b.id && a.errorCode == b.errorCode && a.result == b.result;
}
-bool operator==(const Resp &a, const Resp &b) { return a.name == b.name; }
inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Resp &V) {
OS << toJSON(V);
return OS;
@@ -79,11 +106,14 @@ void PrintTo(const Resp &message, std::ostream *os) {
struct Evt {
std::string name;
+ std::optional<json::Value> params;
};
-json::Value toJSON(const Evt &T) { return json::Object{{"evt", T.name}}; }
+json::Value toJSON(const Evt &T) {
+ return json::Object{{"name", T.name}, {"params", T.params}};
+}
bool fromJSON(const json::Value &V, Evt &T, json::Path P) {
json::ObjectMapper O(V, P);
- return O && O.map("evt", T.name);
+ return O && O.map("name", T.name) && O.map("params", T.params);
}
bool operator==(const Evt &a, const Evt &b) { return a.name == b.name; }
inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Evt &V) {
@@ -107,41 +137,114 @@ bool fromJSON(const json::Value &V, Message &msg, json::Path P) {
P.report("expected object");
return false;
}
- if (O->get("req")) {
- Req R;
- if (!fromJSON(V, R, P))
+
+ if (O->find("id") == O->end()) {
+ Evt E;
+ if (!fromJSON(V, E, P))
return false;
- msg = std::move(R);
+ msg = std::move(E);
return true;
}
- if (O->get("resp")) {
- Resp R;
+
+ if (O->get("name")) {
+ Req R;
if (!fromJSON(V, R, P))
return false;
msg = std::move(R);
return true;
}
- if (O->get("evt")) {
- Evt E;
- if (!fromJSON(V, E, P))
- return false;
- msg = std::move(E);
- return true;
- }
- P.report("unknown message type");
- return false;
+ Resp R;
+ if (!fromJSON(V, R, P))
+ return false;
+
+ msg = std::move(R);
+ return true;
}
-} // namespace test_protocol
+struct MyFnParams {
+ int a = 0;
+ int b = 0;
+};
+json::Value toJSON(const MyFnParams &T) {
+ return json::Object{{"a", T.a}, {"b", T.b}};
+}
+bool fromJSON(const json::Value &V, MyFnParams &T, json::Path P) {
+ json::ObjectMapper O(V, P);
+ return O && O.map("a", T.a) && O.map("b", T.b);
+}
+
+struct MyFnResult {
+ int c = 0;
+};
+json::Value toJSON(const MyFnResult &T) { return json::Object{{"c", T.c}}; }
+bool fromJSON(const json::Value &V, MyFnResult &T, json::Path P) {
+ json::ObjectMapper O(V, P);
+ return O && O.map("c", T.c);
+}
-template <typename T, typename Req, typename Resp, typename Evt>
-class JSONTransportTest : public PipePairTest {
+struct ProtoDesc {
+ using Id = int;
+ using Req = Req;
+ using Resp = Resp;
+ using Evt = Evt;
+ static inline Id InitialId() { return 0; }
+ static inline Req Make(Id id, llvm::StringRef method,
+ std::optional<llvm::json::Value> params) {
+ return Req{id, method.str(), params};
+ }
+ static inline Evt Make(llvm::StringRef method,
+ std::optional<llvm::json::Value> params) {
+ return Evt{method.str(), params};
+ }
+ static inline Resp Make(Req req, llvm::Error error) {
+ Resp resp;
+ resp.id = req.id;
+ llvm::handleAllErrors(
+ std::move(error), [&](const llvm::ErrorInfoBase &err) {
+ std::error_code cerr = err.convertToErrorCode();
+ resp.errorCode =
+ cerr == llvm::inconvertibleErrorCode() ? 1 : cerr.value();
+ resp.result = err.message();
+ });
+ return resp;
+ }
+ static inline Resp Make(Req req, std::optional<llvm::json::Value> result) {
+ return Resp{req.id, 0, std::move(result)};
+ }
+ static inline Id KeyFor(Resp r) { return r.id; }
+ static inline std::string KeyFor(Req r) { return r.name; }
+ static inline std::string KeyFor(Evt e) { return e.name; }
+ static inline std::optional<llvm::json::Value> Extract(Req r) {
+ return r.params;
+ }
+ static inline llvm::Expected<llvm::json::Value> Extract(Resp r) {
+ if (r.errorCode != 0)
+ return llvm::createStringError(
+ std::error_code(r.errorCode, std::generic_category()),
+ r.result && r.result->getAsString() ? *r.result->getAsString()
+ : "no-message");
+ return r.result;
+ }
+ static inline std::optional<llvm::json::Value> Extract(Evt e) {
+ return e.params;
+ }
+};
+
+using Transport = TestTransport<ProtoDesc>;
+using Binder = lldb_private::transport::Binder<ProtoDesc>;
+using MessageHandler = MockMessageHandler<ProtoDesc>;
+
+} // namespace test_protocol
+
+template <typename T> class JSONTransportTest : public PipePairTest {
protected:
- MockMessageHandler<Req, Resp, Evt> message_handler;
+ SubsystemRAII<FileSystem> subsystems;
+
+ test_protocol::MessageHandler message_handler;
std::unique_ptr<T> transport;
MainLoop loop;
@@ -191,8 +294,7 @@ protected:
};
class TestHTTPDelimitedJSONTransport final
- : public HTTPDelimitedJSONTransport<test_protocol::Req, test_protocol::Resp,
- test_protocol::Evt> {
+ : public HTTPDelimitedJSONTransport<test_protocol::ProtoDesc> {
public:
using HTTPDelimitedJSONTransport::HTTPDelimitedJSONTransport;
@@ -204,9 +306,7 @@ public:
};
class HTTPDelimitedJSONTransportTest
- : public JSONTransportTest<TestHTTPDelimitedJSONTransport,
- test_protocol::Req, test_protocol::Resp,
- test_protocol::Evt> {
+ : public JSONTransportTest<TestHTTPDelimitedJSONTransport> {
public:
using JSONTransportTest::JSONTransportTest;
@@ -222,8 +322,7 @@ public:
};
class TestJSONRPCTransport final
- : public JSONRPCTransport<test_protocol::Req, test_protocol::Resp,
- test_protocol::Evt> {
+ : public JSONRPCTransport<test_protocol::ProtoDesc> {
public:
using JSONRPCTransport::JSONRPCTransport;
@@ -234,9 +333,7 @@ public:
std::vector<std::string> log_messages;
};
-class JSONRPCTransportTest
- : public JSONTransportTest<TestJSONRPCTransport, test_protocol::Req,
- test_protocol::Resp, test_protocol::Evt> {
+class JSONRPCTransportTest : public JSONTransportTest<TestJSONRPCTransport> {
public:
using JSONTransportTest::JSONTransportTest;
@@ -248,6 +345,33 @@ public:
}
};
+class TransportBinderTest : public testing::Test {
+protected:
+ SubsystemRAII<FileSystem> subsystems;
+
+ std::unique_ptr<test_protocol::Transport> to_remote;
+ std::unique_ptr<test_protocol::Transport> from_remote;
+ std::unique_ptr<test_protocol::Binder> binder;
+ test_protocol::MessageHandler remote;
+ MainLoop loop;
+
+ void SetUp() override {
+ std::tie(to_remote, from_remote) = test_protocol::Transport::createPair();
+ binder = std::make_unique<test_protocol::Binder>(*to_remote);
+
+ auto binder_handle = to_remote->RegisterMessageHandler(loop, remote);
+ EXPECT_THAT_EXPECTED(binder_handle, Succeeded());
+
+ auto remote_handle = from_remote->RegisterMessageHandler(loop, *binder);
+ EXPECT_THAT_EXPECTED(remote_handle, Succeeded());
+ }
+
+ void Run() {
+ loop.AddPendingCallback([](auto &loop) { loop.RequestTermination(); });
+ EXPECT_THAT_ERROR(loop.Run().takeError(), Succeeded());
+ }
+};
+
} // namespace
// Failing on Windows, see https://github.com/llvm/llvm-project/issues/153446.
@@ -269,35 +393,45 @@ TEST_F(HTTPDelimitedJSONTransportTest, MalformedRequests) {
}
TEST_F(HTTPDelimitedJSONTransportTest, Read) {
- Write(Req{"foo"});
- EXPECT_CALL(message_handler, Received(Req{"foo"}));
+ Write(Req{6, "foo", std::nullopt});
+ EXPECT_CALL(message_handler, Received(Req{6, "foo", std::nullopt}));
ASSERT_THAT_ERROR(Run(), Succeeded());
}
TEST_F(HTTPDelimitedJSONTransportTest, ReadMultipleMessagesInSingleWrite) {
InSequence seq;
- Write(Message{Req{"one"}}, Message{Evt{"two"}}, Message{Resp{"three"}});
- EXPECT_CALL(message_handler, Received(Req{"one"}));
- EXPECT_CALL(message_handler, Received(Evt{"two"}));
- EXPECT_CALL(message_handler, Received(Resp{"three"}));
+ Write(
+ Message{
+ Req{6, "one", std::nullopt},
+ },
+ Message{
+ Evt{"two", std::nullopt},
+ },
+ Message{
+ Resp{2, 0, std::nullopt},
+ });
+ EXPECT_CALL(message_handler, Received(Req{6, "one", std::nullopt}));
+ EXPECT_CALL(message_handler, Received(Evt{"two", std::nullopt}));
+ EXPECT_CALL(message_handler, Received(Resp{2, 0, std::nullopt}));
ASSERT_THAT_ERROR(Run(), Succeeded());
}
TEST_F(HTTPDelimitedJSONTransportTest, ReadAcrossMultipleChunks) {
std::string long_str = std::string(
- HTTPDelimitedJSONTransport<Req, Resp, Evt>::kReadBufferSize * 2, 'x');
- Write(Req{long_str});
- EXPECT_CALL(message_handler, Received(Req{long_str}));
+ HTTPDelimitedJSONTransport<test_protocol::ProtoDesc>::kReadBufferSize * 2,
+ 'x');
+ Write(Req{5, long_str, std::nullopt});
+ EXPECT_CALL(message_handler, Received(Req{5, long_str, std::nullopt}));
ASSERT_THAT_ERROR(Run(), Succeeded());
}
TEST_F(HTTPDelimitedJSONTransportTest, ReadPartialMessage) {
- std::string message = Encode(Req{"foo"});
+ std::string message = Encode(Req{5, "foo", std::nullopt});
auto split_at = message.size() / 2;
std::string part1 = message.substr(0, split_at);
std::string part2 = message.substr(split_at);
- EXPECT_CALL(message_handler, Received(Req{"foo"}));
+ EXPECT_CALL(message_handler, Received(Req{5, "foo", std::nullopt}));
ASSERT_THAT_EXPECTED(input.Write(part1.data(), part1.size()), Succeeded());
loop.AddPendingCallback(
@@ -309,12 +443,12 @@ TEST_F(HTTPDelimitedJSONTransportTest, ReadPartialMessage) {
}
TEST_F(HTTPDelimitedJSONTransportTest, ReadWithZeroByteWrites) {
- std::string message = Encode(Req{"foo"});
+ std::string message = Encode(Req{6, "foo", std::nullopt});
auto split_at = message.size() / 2;
std::string part1 = message.substr(0, split_at);
std::string part2 = message.substr(split_at);
- EXPECT_CALL(message_handler, Received(Req{"foo"}));
+ EXPECT_CALL(message_handler, Received(Req{6, "foo", std::nullopt}));
ASSERT_THAT_EXPECTED(input.Write(part1.data(), part1.size()), Succeeded());
@@ -366,20 +500,21 @@ TEST_F(HTTPDelimitedJSONTransportTest, InvalidTransport) {
}
TEST_F(HTTPDelimitedJSONTransportTest, Write) {
- ASSERT_THAT_ERROR(transport->Send(Req{"foo"}), Succeeded());
- ASSERT_THAT_ERROR(transport->Send(Resp{"bar"}), Succeeded());
- ASSERT_THAT_ERROR(transport->Send(Evt{"baz"}), Succeeded());
+ ASSERT_THAT_ERROR(transport->Send(Req{7, "foo", std::nullopt}), Succeeded());
+ ASSERT_THAT_ERROR(transport->Send(Resp{5, 0, "bar"}), Succeeded());
+ ASSERT_THAT_ERROR(transport->Send(Evt{"baz", std::nullopt}), Succeeded());
output.CloseWriteFileDescriptor();
char buf[1024];
Expected<size_t> bytes_read =
output.Read(buf, sizeof(buf), std::chrono::milliseconds(1));
ASSERT_THAT_EXPECTED(bytes_read, Succeeded());
- ASSERT_EQ(StringRef(buf, *bytes_read), StringRef("Content-Length: 13\r\n\r\n"
- R"({"req":"foo"})"
- "Content-Length: 14\r\n\r\n"
- R"({"resp":"bar"})"
- "Content-Length: 13\r\n\r\n"
- R"({"evt":"baz"})"));
+ ASSERT_EQ(StringRef(buf, *bytes_read),
+ StringRef("Content-Length: 35\r\n\r\n"
+ R"({"id":7,"name":"foo","params":null})"
+ "Content-Length: 37\r\n\r\n"
+ R"({"errorCode":0,"id":5,"result":"bar"})"
+ "Content-Length: 28\r\n\r\n"
+ R"({"name":"baz","params":null})"));
}
TEST_F(JSONRPCTransportTest, MalformedRequests) {
@@ -395,37 +530,38 @@ TEST_F(JSONRPCTransportTest, MalformedRequests) {
}
TEST_F(JSONRPCTransportTest, Read) {
- Write(Message{Req{"foo"}});
- EXPECT_CALL(message_handler, Received(Req{"foo"}));
+ Write(Message{Req{1, "foo", std::nullopt}});
+ EXPECT_CALL(message_handler, Received(Req{1, "foo", std::nullopt}));
ASSERT_THAT_ERROR(Run(), Succeeded());
}
TEST_F(JSONRPCTransportTest, ReadMultipleMessagesInSingleWrite) {
InSequence seq;
- Write(Message{Req{"one"}}, Message{Evt{"two"}}, Message{Resp{"three"}});
- EXPECT_CALL(message_handler, Received(Req{"one"}));
- EXPECT_CALL(message_handler, Received(Evt{"two"}));
- EXPECT_CALL(message_handler, Received(Resp{"three"}));
+ Write(Message{Req{1, "one", std::nullopt}}, Message{Evt{"two", std::nullopt}},
+ Message{Resp{3, 0, "three"}});
+ EXPECT_CALL(message_handler, Received(Req{1, "one", std::nullopt}));
+ EXPECT_CALL(message_handler, Received(Evt{"two", std::nullopt}));
+ EXPECT_CALL(message_handler, Received(Resp{3, 0, "three"}));
ASSERT_THAT_ERROR(Run(), Succeeded());
}
TEST_F(JSONRPCTransportTest, ReadAcrossMultipleChunks) {
// Use a string longer than the chunk size to ensure we split the message
// across the chunk boundary.
- std::string long_str =
- std::string(IOTransport<Req, Resp, Evt>::kReadBufferSize * 2, 'x');
- Write(Req{long_str});
- EXPECT_CALL(message_handler, Received(Req{long_str}));
+ std::string long_str = std::string(
+ IOTransport<test_protocol::ProtoDesc>::kReadBufferSize * 2, 'x');
+ Write(Req{42, long_str, std::nullopt});
+ EXPECT_CALL(message_handler, Received(Req{42, long_str, std::nullopt}));
ASSERT_THAT_ERROR(Run(), Succeeded());
}
TEST_F(JSONRPCTransportTest, ReadPartialMessage) {
- std::string message = R"({"req": "foo"})"
+ std::string message = R"({"id":42,"name":"foo","params":null})"
"\n";
std::string part1 = message.substr(0, 7);
std::string part2 = message.substr(7);
- EXPECT_CALL(message_handler, Received(Req{"foo"}));
+ EXPECT_CALL(message_handler, Received(Req{42, "foo", std::nullopt}));
ASSERT_THAT_EXPECTED(input.Write(part1.data(), part1.size()), Succeeded());
loop.AddPendingCallback(
@@ -455,20 +591,21 @@ TEST_F(JSONRPCTransportTest, ReaderWithUnhandledData) {
}
TEST_F(JSONRPCTransportTest, Write) {
- ASSERT_THAT_ERROR(transport->Send(Req{"foo"}), Succeeded());
- ASSERT_THAT_ERROR(transport->Send(Resp{"bar"}), Succeeded());
- ASSERT_THAT_ERROR(transport->Send(Evt{"baz"}), Succeeded());
+ ASSERT_THAT_ERROR(transport->Send(Req{11, "foo", std::nullopt}), Succeeded());
+ ASSERT_THAT_ERROR(transport->Send(Resp{14, 0, "bar"}), Succeeded());
+ ASSERT_THAT_ERROR(transport->Send(Evt{"baz", std::nullopt}), Succeeded());
output.CloseWriteFileDescriptor();
char buf[1024];
Expected<size_t> bytes_read =
output.Read(buf, sizeof(buf), std::chrono::milliseconds(1));
ASSERT_THAT_EXPECTED(bytes_read, Succeeded());
- ASSERT_EQ(StringRef(buf, *bytes_read), StringRef(R"({"req":"foo"})"
- "\n"
- R"({"resp":"bar"})"
- "\n"
- R"({"evt":"baz"})"
- "\n"));
+ ASSERT_EQ(StringRef(buf, *bytes_read),
+ StringRef(R"({"id":11,"name":"foo","params":null})"
+ "\n"
+ R"({"errorCode":0,"id":14,"result":"bar"})"
+ "\n"
+ R"({"name":"baz","params":null})"
+ "\n"));
}
TEST_F(JSONRPCTransportTest, InvalidTransport) {
@@ -477,4 +614,183 @@ TEST_F(JSONRPCTransportTest, InvalidTransport) {
FailedWithMessage("IO object is not valid."));
}
+// Out-bound binding request handler.
+TEST_F(TransportBinderTest, OutBoundRequests) {
+ OutgoingRequest<MyFnResult, MyFnParams> addFn =
+ binder->Bind<MyFnResult, MyFnParams>("add");
+ bool replied = false;
+ addFn(MyFnParams{1, 2}, [&](Expected<MyFnResult> result) {
+ EXPECT_THAT_EXPECTED(result, Succeeded());
+ EXPECT_EQ(result->c, 3);
+ replied = true;
+ });
+ EXPECT_CALL(remote, Received(Req{1, "add", MyFnParams{1, 2}}));
+ EXPECT_THAT_ERROR(from_remote->Send(Resp{1, 0, toJSON(MyFnResult{3})}),
+ Succeeded());
+ Run();
+ EXPECT_TRUE(replied);
+}
+
+TEST_F(TransportBinderTest, OutBoundRequestsVoidParams) {
+ OutgoingRequest<MyFnResult, void> voidParamFn =
+ binder->Bind<MyFnResult, void>("voidParam");
+ bool replied = false;
+ voidParamFn([&](Expected<MyFnResult> result) {
+ EXPECT_THAT_EXPECTED(result, Succeeded());
+ EXPECT_EQ(result->c, 3);
+ replied = true;
+ });
+ EXPECT_CALL(remote, Received(Req{1, "voidParam", std::nullopt}));
+ EXPECT_THAT_ERROR(from_remote->Send(Resp{1, 0, toJSON(MyFnResult{3})}),
+ Succeeded());
+ Run();
+ EXPECT_TRUE(replied);
+}
+
+TEST_F(TransportBinderTest, OutBoundRequestsVoidResult) {
+ OutgoingRequest<void, MyFnParams> voidResultFn =
+ binder->Bind<void, MyFnParams>("voidResult");
+ bool replied = false;
+ voidResultFn(MyFnParams{4, 5}, [&](llvm::Error error) {
+ EXPECT_THAT_ERROR(std::move(error), Succeeded());
+ replied = true;
+ });
+ EXPECT_CALL(remote, Received(Req{1, "voidResult", MyFnParams{4, 5}}));
+ EXPECT_THAT_ERROR(from_remote->Send(Resp{1, 0, std::nullopt}), Succeeded());
+ Run();
+ EXPECT_TRUE(replied);
+}
+
+TEST_F(TransportBinderTest, OutBoundRequestsVoidParamsAndVoidResult) {
+ OutgoingRequest<void, void> voidParamAndResultFn =
+ binder->Bind<void, void>("voidParamAndResult");
+ bool replied = false;
+ voidParamAndResultFn([&](llvm::Error error) {
+ EXPECT_THAT_ERROR(std::move(error), Succeeded());
+ replied = true;
+ });
+ EXPECT_CALL(remote, Received(Req{1, "voidParamAndResult", std::nullopt}));
+ EXPECT_THAT_ERROR(from_remote->Send(Resp{1, 0, std::nullopt}), Succeeded());
+ Run();
+ EXPECT_TRUE(replied);
+}
+
+// In-bound binding request handler.
+TEST_F(TransportBinderTest, InBoundRequests) {
+ bool called = false;
+ binder->Bind<MyFnResult, MyFnParams>(
+ "add",
+ [&](const int captured_param,
+ const MyFnParams &params) -> Expected<MyFnResult> {
+ called = true;
+ return MyFnResult{params.a + params.b + captured_param};
+ },
+ 2);
+ EXPECT_THAT_ERROR(from_remote->Send(Req{1, "add", MyFnParams{3, 4}}),
+ Succeeded());
+
+ EXPECT_CALL(remote, Received(Resp{1, 0, MyFnResult{9}}));
+ Run();
+ EXPECT_TRUE(called);
+}
+
+TEST_F(TransportBinderTest, InBoundRequestsVoidParams) {
+ bool called = false;
+ binder->Bind<MyFnResult, void>(
+ "voidParam",
+ [&](const int captured_param) -> Expected<MyFnResult> {
+ called = true;
+ return MyFnResult{captured_param};
+ },
+ 2);
+ EXPECT_THAT_ERROR(from_remote->Send(Req{2, "voidParam", std::nullopt}),
+ Succeeded());
+ EXPECT_CALL(remote, Received(Resp{2, 0, MyFnResult{2}}));
+ Run();
+ EXPECT_TRUE(called);
+}
+
+TEST_F(TransportBinderTest, InBoundRequestsVoidResult) {
+ bool called = false;
+ binder->Bind<void, MyFnParams>(
+ "voidResult",
+ [&](const int captured_param, const MyFnParams &params) -> llvm::Error {
+ called = true;
+ EXPECT_EQ(captured_param, 2);
+ EXPECT_EQ(params.a, 3);
+ EXPECT_EQ(params.b, 4);
+ return llvm::Error::success();
+ },
+ 2);
+ EXPECT_THAT_ERROR(from_remote->Send(Req{3, "voidResult", MyFnParams{3, 4}}),
+ Succeeded());
+ EXPECT_CALL(remote, Received(Resp{3, 0, std::nullopt}));
+ Run();
+ EXPECT_TRUE(called);
+}
+TEST_F(TransportBinderTest, InBoundRequestsVoidParamsAndResult) {
+ bool called = false;
+ binder->Bind<void, void>(
+ "voidParamAndResult",
+ [&](const int captured_param) -> llvm::Error {
+ called = true;
+ EXPECT_EQ(captured_param, 2);
+ return llvm::Error::success();
+ },
+ 2);
+ EXPECT_THAT_ERROR(
+ from_remote->Send(Req{4, "voidParamAndResult", std::nullopt}),
+ Succeeded());
+ EXPECT_CALL(remote, Received(Resp{4, 0, std::nullopt}));
+ Run();
+ EXPECT_TRUE(called);
+}
+
+// Out-bound binding event handler.
+TEST_F(TransportBinderTest, OutBoundEvents) {
+ OutgoingEvent<MyFnParams> emitEvent = binder->Bind<MyFnParams>("evt");
+ emitEvent(MyFnParams{1, 2});
+ EXPECT_CALL(remote, Received(Evt{"evt", MyFnParams{1, 2}}));
+ Run();
+}
+
+TEST_F(TransportBinderTest, OutBoundEventsVoidParams) {
+ OutgoingEvent<void> emitEvent = binder->Bind<void>("evt");
+ emitEvent();
+ EXPECT_CALL(remote, Received(Evt{"evt", std::nullopt}));
+ Run();
+}
+
+// In-bound binding event handler.
+TEST_F(TransportBinderTest, InBoundEvents) {
+ bool called = false;
+ binder->Bind<MyFnParams>(
+ "evt",
+ [&](const int captured_arg, const MyFnParams &params) {
+ EXPECT_EQ(captured_arg, 42);
+ EXPECT_EQ(params.a, 3);
+ EXPECT_EQ(params.b, 4);
+ called = true;
+ },
+ 42);
+ EXPECT_THAT_ERROR(from_remote->Send(Evt{"evt", MyFnParams{3, 4}}),
+ Succeeded());
+ Run();
+ EXPECT_TRUE(called);
+}
+
+TEST_F(TransportBinderTest, InBoundEventsVoidParams) {
+ bool called = false;
+ binder->Bind<void>(
+ "evt",
+ [&](const int captured_arg) {
+ EXPECT_EQ(captured_arg, 42);
+ called = true;
+ },
+ 42);
+ EXPECT_THAT_ERROR(from_remote->Send(Evt{"evt", std::nullopt}), Succeeded());
+ Run();
+ EXPECT_TRUE(called);
+}
+
#endif
diff --git a/lldb/unittests/Host/posix/HostTest.cpp b/lldb/unittests/Host/posix/HostTest.cpp
index dc75b28..7135f26 100644
--- a/lldb/unittests/Host/posix/HostTest.cpp
+++ b/lldb/unittests/Host/posix/HostTest.cpp
@@ -15,10 +15,6 @@
#include <cerrno>
#include <sys/resource.h>
-#ifdef __linux__
-#include <linux/version.h>
-#endif // __linux__
-
using namespace lldb_private;
namespace {
@@ -120,12 +116,13 @@ TEST_F(HostTest, GetProcessInfoSetsPriority) {
ASSERT_TRUE(Info.IsZombie().has_value());
ASSERT_FALSE(Info.IsZombie().value());
- // CoreDumping was added in kernel version 4.15.
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0)
- ASSERT_TRUE(Info.IsCoreDumping().has_value());
- ASSERT_FALSE(Info.IsCoreDumping().value());
-#else
- ASSERT_FALSE(Info.IsCoreDumping().has_value());
-#endif
+ const llvm::VersionTuple host_version = HostInfo::GetOSVersion();
+ ASSERT_FALSE(host_version.empty());
+ if (host_version >= llvm::VersionTuple(4, 15, 0)) {
+ ASSERT_TRUE(Info.IsCoreDumping().has_value());
+ ASSERT_FALSE(Info.IsCoreDumping().value());
+ } else {
+ ASSERT_FALSE(Info.IsCoreDumping().has_value());
+ }
}
#endif
diff --git a/lldb/unittests/Protocol/ProtocolMCPServerTest.cpp b/lldb/unittests/Protocol/ProtocolMCPServerTest.cpp
index f3ca4cf..9628cbd 100644
--- a/lldb/unittests/Protocol/ProtocolMCPServerTest.cpp
+++ b/lldb/unittests/Protocol/ProtocolMCPServerTest.cpp
@@ -6,9 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#include "ProtocolMCPTestUtilities.h"
+#include "ProtocolMCPTestUtilities.h" // IWYU pragma: keep
#include "TestingSupport/Host/JSONTransportTestUtilities.h"
-#include "TestingSupport/Host/PipeTestUtilities.h"
#include "TestingSupport/SubsystemRAII.h"
#include "lldb/Host/FileSystem.h"
#include "lldb/Host/HostInfo.h"
@@ -28,20 +27,22 @@
#include "llvm/Testing/Support/Error.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
-#include <chrono>
-#include <condition_variable>
+#include <future>
+#include <memory>
+#include <optional>
+#include <system_error>
using namespace llvm;
using namespace lldb;
using namespace lldb_private;
+using namespace lldb_private::transport;
using namespace lldb_protocol::mcp;
namespace {
-class TestServer : public Server {
-public:
- using Server::Server;
-};
+template <typename T> Response make_response(T &&result, Id id = 1) {
+ return Response{id, std::forward<T>(result)};
+}
/// Test tool that returns it argument as text.
class TestTool : public Tool {
@@ -101,7 +102,9 @@ public:
using Tool::Tool;
llvm::Expected<CallToolResult> Call(const ToolArguments &args) override {
- return llvm::createStringError("error");
+ return llvm::createStringError(
+ std::error_code(eErrorCodeInternalError, std::generic_category()),
+ "error");
}
};
@@ -118,195 +121,207 @@ public:
}
};
-class ProtocolServerMCPTest : public PipePairTest {
+class TestServer : public Server {
+public:
+ using Server::Bind;
+ using Server::Server;
+};
+
+using Transport = TestTransport<lldb_protocol::mcp::ProtocolDescriptor>;
+
+class ProtocolServerMCPTest : public testing::Test {
public:
SubsystemRAII<FileSystem, HostInfo, Socket> subsystems;
MainLoop loop;
+ lldb_private::MainLoop::ReadHandleUP handles[2];
- std::unique_ptr<lldb_protocol::mcp::Transport> from_client;
- std::unique_ptr<lldb_protocol::mcp::Transport> to_client;
- MainLoopBase::ReadHandleUP handles[2];
-
+ std::unique_ptr<Transport> to_server;
+ MCPBinderUP binder;
std::unique_ptr<TestServer> server_up;
- MockMessageHandler<Request, Response, Notification> message_handler;
- llvm::Error Write(llvm::StringRef message) {
- llvm::Expected<json::Value> value = json::parse(message);
- if (!value)
- return value.takeError();
- return from_client->Write(*value);
- }
+ std::unique_ptr<Transport> to_client;
+ MockMessageHandler<lldb_protocol::mcp::ProtocolDescriptor> client;
- llvm::Error Write(json::Value value) { return from_client->Write(value); }
+ std::vector<std::string> logged_messages;
- /// Run the transport MainLoop and return any messages received.
- llvm::Error Run() {
- loop.AddCallback([](MainLoopBase &loop) { loop.RequestTermination(); },
- std::chrono::milliseconds(10));
- return loop.Run().takeError();
+ /// Runs the MainLoop a single time, executing any pending callbacks.
+ void Run() {
+ loop.AddPendingCallback(
+ [](MainLoopBase &loop) { loop.RequestTermination(); });
+ EXPECT_THAT_ERROR(loop.Run().takeError(), Succeeded());
}
void SetUp() override {
- PipePairTest::SetUp();
-
- from_client = std::make_unique<lldb_protocol::mcp::Transport>(
- std::make_shared<NativeFile>(input.GetReadFileDescriptor(),
- File::eOpenOptionReadOnly,
- NativeFile::Unowned),
- std::make_shared<NativeFile>(output.GetWriteFileDescriptor(),
- File::eOpenOptionWriteOnly,
- NativeFile::Unowned),
- [](StringRef message) {
- // Uncomment for debugging
- // llvm::errs() << "from_client: " << message << '\n';
- });
- to_client = std::make_unique<lldb_protocol::mcp::Transport>(
- std::make_shared<NativeFile>(output.GetReadFileDescriptor(),
- File::eOpenOptionReadOnly,
- NativeFile::Unowned),
- std::make_shared<NativeFile>(input.GetWriteFileDescriptor(),
- File::eOpenOptionWriteOnly,
- NativeFile::Unowned),
- [](StringRef message) {
- // Uncomment for debugging
- // llvm::errs() << "to_client: " << message << '\n';
- });
-
- server_up = std::make_unique<TestServer>("lldb-mcp", "0.1.0", *to_client,
- [](StringRef message) {
- // Uncomment for debugging
- // llvm::errs() << "server: " <<
- // message << '\n';
- });
-
- auto maybe_from_client_handle =
- from_client->RegisterMessageHandler(loop, message_handler);
- EXPECT_THAT_EXPECTED(maybe_from_client_handle, Succeeded());
- handles[0] = std::move(*maybe_from_client_handle);
-
- auto maybe_to_client_handle =
- to_client->RegisterMessageHandler(loop, *server_up);
- EXPECT_THAT_EXPECTED(maybe_to_client_handle, Succeeded());
- handles[1] = std::move(*maybe_to_client_handle);
+ std::tie(to_client, to_server) = Transport::createPair();
+
+ server_up = std::make_unique<TestServer>(
+ "lldb-mcp", "0.1.0",
+ [this](StringRef msg) { logged_messages.push_back(msg.str()); });
+ binder = server_up->Bind(*to_client);
+ auto server_handle = to_server->RegisterMessageHandler(loop, *binder);
+ EXPECT_THAT_EXPECTED(server_handle, Succeeded());
+ binder->OnError([](llvm::Error error) {
+ llvm::errs() << formatv("Server transport error: {0}", error);
+ });
+ handles[0] = std::move(*server_handle);
+
+ auto client_handle = to_client->RegisterMessageHandler(loop, client);
+ EXPECT_THAT_EXPECTED(client_handle, Succeeded());
+ handles[1] = std::move(*client_handle);
+ }
+
+ template <typename Result, typename Params>
+ Expected<json::Value> Call(StringRef method, const Params &params) {
+ std::promise<Response> promised_result;
+ Request req =
+ lldb_protocol::mcp::Request{/*id=*/1, method.str(), toJSON(params)};
+ EXPECT_THAT_ERROR(to_server->Send(req), Succeeded());
+ EXPECT_CALL(client, Received(testing::An<const Response &>()))
+ .WillOnce(
+ [&](const Response &resp) { promised_result.set_value(resp); });
+ Run();
+ Response resp = promised_result.get_future().get();
+ return toJSON(resp);
+ }
+
+ template <typename Result>
+ Expected<json::Value>
+ Capture(llvm::unique_function<void(Reply<Result>)> &fn) {
+ std::promise<llvm::Expected<Result>> promised_result;
+ fn([&promised_result](llvm::Expected<Result> result) {
+ promised_result.set_value(std::move(result));
+ });
+ Run();
+ llvm::Expected<Result> result = promised_result.get_future().get();
+ if (!result)
+ return result.takeError();
+ return toJSON(*result);
+ }
+
+ template <typename Result, typename Params>
+ Expected<json::Value>
+ Capture(llvm::unique_function<void(const Params &, Reply<Result>)> &fn,
+ const Params &params) {
+ std::promise<llvm::Expected<Result>> promised_result;
+ fn(params, [&promised_result](llvm::Expected<Result> result) {
+ promised_result.set_value(std::move(result));
+ });
+ Run();
+ llvm::Expected<Result> result = promised_result.get_future().get();
+ if (!result)
+ return result.takeError();
+ return toJSON(*result);
}
};
template <typename T>
-Request make_request(StringLiteral method, T &&params, Id id = 1) {
- return Request{id, method.str(), toJSON(std::forward<T>(params))};
-}
-
-template <typename T> Response make_response(T &&result, Id id = 1) {
- return Response{id, std::forward<T>(result)};
+inline testing::internal::EqMatcher<llvm::json::Value> HasJSON(T x) {
+ return testing::internal::EqMatcher<llvm::json::Value>(toJSON(x));
}
} // namespace
TEST_F(ProtocolServerMCPTest, Initialization) {
- Request request = make_request(
- "initialize", InitializeParams{/*protocolVersion=*/"2024-11-05",
- /*capabilities=*/{},
- /*clientInfo=*/{"lldb-unit", "0.1.0"}});
- Response response = make_response(
- InitializeResult{/*protocolVersion=*/"2024-11-05",
- /*capabilities=*/{/*supportsToolsList=*/true},
- /*serverInfo=*/{"lldb-mcp", "0.1.0"}});
-
- ASSERT_THAT_ERROR(Write(request), Succeeded());
- EXPECT_CALL(message_handler, Received(response));
- EXPECT_THAT_ERROR(Run(), Succeeded());
+ EXPECT_THAT_EXPECTED(
+ (Call<InitializeResult, InitializeParams>(
+ "initialize",
+ InitializeParams{/*protocolVersion=*/"2024-11-05",
+ /*capabilities=*/{},
+ /*clientInfo=*/{"lldb-unit", "0.1.0"}})),
+ HasValue(make_response(
+ InitializeResult{/*protocolVersion=*/"2024-11-05",
+ /*capabilities=*/
+ {
+ /*supportsToolsList=*/true,
+ /*supportsResourcesList=*/true,
+ },
+ /*serverInfo=*/{"lldb-mcp", "0.1.0"}})));
}
TEST_F(ProtocolServerMCPTest, ToolsList) {
server_up->AddTool(std::make_unique<TestTool>("test", "test tool"));
- Request request = make_request("tools/list", Void{}, /*id=*/"one");
-
ToolDefinition test_tool;
test_tool.name = "test";
test_tool.description = "test tool";
test_tool.inputSchema = json::Object{{"type", "object"}};
- Response response = make_response(ListToolsResult{{test_tool}}, /*id=*/"one");
-
- ASSERT_THAT_ERROR(Write(request), llvm::Succeeded());
- EXPECT_CALL(message_handler, Received(response));
- EXPECT_THAT_ERROR(Run(), Succeeded());
+ EXPECT_THAT_EXPECTED(Call<ListToolsResult>("tools/list", Void{}),
+ HasValue(make_response(ListToolsResult{{test_tool}})));
}
TEST_F(ProtocolServerMCPTest, ResourcesList) {
server_up->AddResourceProvider(std::make_unique<TestResourceProvider>());
- Request request = make_request("resources/list", Void{});
- Response response = make_response(ListResourcesResult{
- {{/*uri=*/"lldb://foo/bar", /*name=*/"name",
- /*description=*/"description", /*mimeType=*/"application/json"}}});
-
- ASSERT_THAT_ERROR(Write(request), llvm::Succeeded());
- EXPECT_CALL(message_handler, Received(response));
- EXPECT_THAT_ERROR(Run(), Succeeded());
+ EXPECT_THAT_EXPECTED(Call<ListResourcesResult>("resources/list", Void{}),
+ HasValue(make_response(ListResourcesResult{{
+ {
+ /*uri=*/"lldb://foo/bar",
+ /*name=*/"name",
+ /*description=*/"description",
+ /*mimeType=*/"application/json",
+ },
+ }})));
}
TEST_F(ProtocolServerMCPTest, ToolsCall) {
server_up->AddTool(std::make_unique<TestTool>("test", "test tool"));
- Request request = make_request(
- "tools/call", CallToolParams{/*name=*/"test", /*arguments=*/json::Object{
- {"arguments", "foo"},
- {"debugger_id", 0},
- }});
- Response response = make_response(CallToolResult{{{/*text=*/"foo"}}});
-
- ASSERT_THAT_ERROR(Write(request), llvm::Succeeded());
- EXPECT_CALL(message_handler, Received(response));
- EXPECT_THAT_ERROR(Run(), Succeeded());
+ EXPECT_THAT_EXPECTED(
+ (Call<CallToolResult, CallToolParams>("tools/call",
+ CallToolParams{
+ /*name=*/"test",
+ /*arguments=*/
+ json::Object{
+ {"arguments", "foo"},
+ {"debugger_id", 0},
+ },
+ })),
+ HasValue(make_response(CallToolResult{{{/*text=*/"foo"}}})));
}
TEST_F(ProtocolServerMCPTest, ToolsCallError) {
server_up->AddTool(std::make_unique<ErrorTool>("error", "error tool"));
- Request request = make_request(
- "tools/call", CallToolParams{/*name=*/"error", /*arguments=*/json::Object{
- {"arguments", "foo"},
- {"debugger_id", 0},
- }});
- Response response =
- make_response(lldb_protocol::mcp::Error{eErrorCodeInternalError,
- /*message=*/"error"});
-
- ASSERT_THAT_ERROR(Write(request), llvm::Succeeded());
- EXPECT_CALL(message_handler, Received(response));
- EXPECT_THAT_ERROR(Run(), Succeeded());
+ EXPECT_THAT_EXPECTED((Call<CallToolResult, CallToolParams>(
+ "tools/call", CallToolParams{
+ /*name=*/"error",
+ /*arguments=*/
+ json::Object{
+ {"arguments", "foo"},
+ {"debugger_id", 0},
+ },
+ })),
+ HasValue(make_response(lldb_protocol::mcp::Error{
+ eErrorCodeInternalError, "error"})));
}
TEST_F(ProtocolServerMCPTest, ToolsCallFail) {
server_up->AddTool(std::make_unique<FailTool>("fail", "fail tool"));
- Request request = make_request(
- "tools/call", CallToolParams{/*name=*/"fail", /*arguments=*/json::Object{
- {"arguments", "foo"},
- {"debugger_id", 0},
- }});
- Response response =
- make_response(CallToolResult{{{/*text=*/"failed"}}, /*isError=*/true});
-
- ASSERT_THAT_ERROR(Write(request), llvm::Succeeded());
- EXPECT_CALL(message_handler, Received(response));
- EXPECT_THAT_ERROR(Run(), Succeeded());
+ EXPECT_THAT_EXPECTED((Call<CallToolResult, CallToolParams>(
+ "tools/call", CallToolParams{
+ /*name=*/"fail",
+ /*arguments=*/
+ json::Object{
+ {"arguments", "foo"},
+ {"debugger_id", 0},
+ },
+ })),
+ HasValue(make_response(CallToolResult{
+ {{/*text=*/"failed"}},
+ /*isError=*/true,
+ })));
}
TEST_F(ProtocolServerMCPTest, NotificationInitialized) {
- bool handler_called = false;
- std::condition_variable cv;
-
- server_up->AddNotificationHandler(
- "notifications/initialized",
- [&](const Notification &notification) { handler_called = true; });
- llvm::StringLiteral request =
- R"json({"method":"notifications/initialized","jsonrpc":"2.0"})json";
-
- ASSERT_THAT_ERROR(Write(request), llvm::Succeeded());
- EXPECT_THAT_ERROR(Run(), Succeeded());
- EXPECT_TRUE(handler_called);
+ EXPECT_THAT_ERROR(to_server->Send(lldb_protocol::mcp::Notification{
+ "notifications/initialized",
+ std::nullopt,
+ }),
+ Succeeded());
+ Run();
+ EXPECT_THAT(logged_messages,
+ testing::Contains("MCP initialization complete"));
}
diff --git a/lldb/unittests/Target/CMakeLists.txt b/lldb/unittests/Target/CMakeLists.txt
index 3169339..0c79675 100644
--- a/lldb/unittests/Target/CMakeLists.txt
+++ b/lldb/unittests/Target/CMakeLists.txt
@@ -2,6 +2,7 @@ add_lldb_unittest(TargetTests
ABITest.cpp
DynamicRegisterInfoTest.cpp
ExecutionContextTest.cpp
+ Language.cpp
LocateModuleCallbackTest.cpp
MemoryRegionInfoTest.cpp
MemoryTest.cpp
diff --git a/lldb/unittests/Target/Language.cpp b/lldb/unittests/Target/Language.cpp
new file mode 100644
index 0000000..a00fda78
--- /dev/null
+++ b/lldb/unittests/Target/Language.cpp
@@ -0,0 +1,69 @@
+//===-- LanguageTest.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "lldb/Target/Language.h"
+#include "lldb/lldb-enumerations.h"
+#include "gtest/gtest.h"
+
+using namespace lldb_private;
+using namespace lldb;
+
+namespace {
+class LanguageTest : public ::testing::Test {};
+} // namespace
+
+TEST_F(LanguageTest, SourceLanguage_GetDescription) {
+ for (uint32_t i = 1; i < lldb::eNumLanguageTypes; ++i) {
+ // 0x29 is unassigned
+ if (i == 0x29)
+ continue;
+
+ auto lang_type = static_cast<lldb::LanguageType>(i);
+ if (lang_type == lldb::eLanguageTypeLastStandardLanguage)
+ continue;
+
+ SourceLanguage lang(lang_type);
+
+ // eLanguageTypeHIP is not implemented as a DW_LNAME because of a conflict.
+ if (lang_type == lldb::eLanguageTypeHIP)
+ EXPECT_FALSE(lang);
+ else
+ EXPECT_TRUE(lang);
+ }
+
+ EXPECT_EQ(SourceLanguage(eLanguageTypeC_plus_plus).GetDescription(),
+ "ISO C++");
+ EXPECT_EQ(SourceLanguage(eLanguageTypeC_plus_plus_17).GetDescription(),
+ "ISO C++");
+ EXPECT_EQ(SourceLanguage(eLanguageTypeC_plus_plus_20).GetDescription(),
+ "ISO C++");
+
+ EXPECT_EQ(SourceLanguage(eLanguageTypeObjC).GetDescription(), "Objective C");
+ EXPECT_EQ(SourceLanguage(eLanguageTypeMipsAssembler).GetDescription(),
+ "Assembly");
+
+ auto next_vendor_language =
+ static_cast<lldb::LanguageType>(eLanguageTypeMipsAssembler + 1);
+ if (next_vendor_language < eNumLanguageTypes)
+ EXPECT_NE(SourceLanguage(next_vendor_language).GetDescription(), "Unknown");
+
+ EXPECT_EQ(SourceLanguage(eLanguageTypeUnknown).GetDescription(), "Unknown");
+}
+
+TEST_F(LanguageTest, SourceLanguage_AsLanguageType) {
+ EXPECT_EQ(SourceLanguage(eLanguageTypeC_plus_plus).AsLanguageType(),
+ eLanguageTypeC_plus_plus);
+ EXPECT_EQ(SourceLanguage(eLanguageTypeC_plus_plus_03).AsLanguageType(),
+ eLanguageTypeC_plus_plus_03);
+
+ // Vendor-specific language code.
+ EXPECT_EQ(SourceLanguage(eLanguageTypeMipsAssembler).AsLanguageType(),
+ eLanguageTypeAssembly);
+ EXPECT_EQ(SourceLanguage(eLanguageTypeUnknown).AsLanguageType(),
+ eLanguageTypeUnknown);
+}
diff --git a/lldb/unittests/TestingSupport/Host/JSONTransportTestUtilities.h b/lldb/unittests/TestingSupport/Host/JSONTransportTestUtilities.h
index 5a9eb8e..bacf8ca 100644
--- a/lldb/unittests/TestingSupport/Host/JSONTransportTestUtilities.h
+++ b/lldb/unittests/TestingSupport/Host/JSONTransportTestUtilities.h
@@ -6,19 +6,105 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLDB_UNITTESTS_TESTINGSUPPORT_HOST_NATIVEPROCESSTESTUTILS_H
-#define LLDB_UNITTESTS_TESTINGSUPPORT_HOST_NATIVEPROCESSTESTUTILS_H
+#ifndef LLDB_UNITTESTS_TESTINGSUPPORT_HOST_JSONTRANSPORTTESTUTILITIES_H
+#define LLDB_UNITTESTS_TESTINGSUPPORT_HOST_JSONTRANSPORTTESTUTILITIES_H
+#include "lldb/Host/FileSystem.h"
#include "lldb/Host/JSONTransport.h"
+#include "lldb/Host/MainLoop.h"
+#include "lldb/Utility/FileSpec.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Testing/Support/Error.h"
#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include <cstddef>
+#include <memory>
+#include <utility>
-template <typename Req, typename Resp, typename Evt>
+template <typename Proto>
+class TestTransport final
+ : public lldb_private::transport::JSONTransport<Proto> {
+public:
+ using MessageHandler =
+ typename lldb_private::transport::JSONTransport<Proto>::MessageHandler;
+
+ static std::pair<std::unique_ptr<TestTransport<Proto>>,
+ std::unique_ptr<TestTransport<Proto>>>
+ createPair() {
+ std::unique_ptr<TestTransport<Proto>> transports[2] = {
+ std::make_unique<TestTransport<Proto>>(),
+ std::make_unique<TestTransport<Proto>>()};
+ return std::make_pair(std::move(transports[0]), std::move(transports[1]));
+ }
+
+ explicit TestTransport() {
+ llvm::Expected<lldb::FileUP> dummy_file =
+ lldb_private::FileSystem::Instance().Open(
+ lldb_private::FileSpec(lldb_private::FileSystem::DEV_NULL),
+ lldb_private::File::eOpenOptionReadWrite);
+ EXPECT_THAT_EXPECTED(dummy_file, llvm::Succeeded());
+ m_dummy_file = std::move(*dummy_file);
+ }
+
+ llvm::Error Send(const typename Proto::Evt &evt) override {
+ EXPECT_TRUE(m_loop && m_handler)
+ << "Send called before RegisterMessageHandler";
+ m_loop->AddPendingCallback([this, evt](lldb_private::MainLoopBase &) {
+ m_handler->Received(evt);
+ });
+ return llvm::Error::success();
+ }
+
+ llvm::Error Send(const typename Proto::Req &req) override {
+ EXPECT_TRUE(m_loop && m_handler)
+ << "Send called before RegisterMessageHandler";
+ m_loop->AddPendingCallback([this, req](lldb_private::MainLoopBase &) {
+ m_handler->Received(req);
+ });
+ return llvm::Error::success();
+ }
+
+ llvm::Error Send(const typename Proto::Resp &resp) override {
+ EXPECT_TRUE(m_loop && m_handler)
+ << "Send called before RegisterMessageHandler";
+ m_loop->AddPendingCallback([this, resp](lldb_private::MainLoopBase &) {
+ m_handler->Received(resp);
+ });
+ return llvm::Error::success();
+ }
+
+ llvm::Expected<lldb_private::MainLoop::ReadHandleUP>
+ RegisterMessageHandler(lldb_private::MainLoop &loop,
+ MessageHandler &handler) override {
+ if (!m_loop)
+ m_loop = &loop;
+ if (!m_handler)
+ m_handler = &handler;
+ lldb_private::Status status;
+ auto handle = loop.RegisterReadObject(
+ m_dummy_file, [](lldb_private::MainLoopBase &) {}, status);
+ if (status.Fail())
+ return status.takeError();
+ return handle;
+ }
+
+protected:
+ void Log(llvm::StringRef message) override {};
+
+private:
+ lldb_private::MainLoop *m_loop = nullptr;
+ MessageHandler *m_handler = nullptr;
+ // Dummy file for registering with the MainLoop.
+ lldb::FileSP m_dummy_file = nullptr;
+};
+
+template <typename Proto>
class MockMessageHandler final
- : public lldb_private::Transport<Req, Resp, Evt>::MessageHandler {
+ : public lldb_private::transport::JSONTransport<Proto>::MessageHandler {
public:
- MOCK_METHOD(void, Received, (const Evt &), (override));
- MOCK_METHOD(void, Received, (const Req &), (override));
- MOCK_METHOD(void, Received, (const Resp &), (override));
+ MOCK_METHOD(void, Received, (const typename Proto::Req &), (override));
+ MOCK_METHOD(void, Received, (const typename Proto::Resp &), (override));
+ MOCK_METHOD(void, Received, (const typename Proto::Evt &), (override));
MOCK_METHOD(void, OnError, (llvm::Error), (override));
MOCK_METHOD(void, OnClosed, (), (override));
};
diff --git a/llvm/docs/DirectX/DXContainer.rst b/llvm/docs/DirectX/DXContainer.rst
index 17452d9..4473f4e 100644
--- a/llvm/docs/DirectX/DXContainer.rst
+++ b/llvm/docs/DirectX/DXContainer.rst
@@ -530,7 +530,7 @@ but adds a 32-bit access flag.
.. code-block:: c
struct DescriptorRange_V1_0 {
- uint32_t RangeType;
+ dxil::ResourceClass RangeType;
uint32_t NumDescriptors;
uint32_t BaseShaderRegister;
uint32_t RegisterSpace;
@@ -538,12 +538,12 @@ but adds a 32-bit access flag.
};
struct DescriptorRange_V1_1 {
- dxbc::DescriptorRangeType RangeType;
+ dxil::ResourceClass RangeType;
uint32_t NumDescriptors;
uint32_t BaseShaderRegister;
uint32_t RegisterSpace;
- uint32_t OffsetInDescriptorsFromTableStart;
uint32_t Flags;
+ uint32_t OffsetInDescriptorsFromTableStart;
};
Static Samplers
@@ -556,22 +556,26 @@ This section also has a variable size, since it can contain multiple static
samplers definitions. However, the definition is a fixed sized struct,
containing 13 32-byte fields of various enum, float, and integer values.
+In version 1.2, the static sampler is 17 bytes. It matches the 1.0 static sampler
+but adds a 32-bit access flag. In Version 1.1, it matches static sampler
+version 1.0.
+
.. code-block:: c
struct StaticSamplerDesc {
- FilterMode Filter;
- TextureAddressMode AddressU;
- TextureAddressMode AddressV;
- TextureAddressMode AddressW;
+ dxbc::FilterMode Filter;
+ dxbc::TextureAddressMode AddressU;
+ dxbc::TextureAddressMode AddressV;
+ dxbc::TextureAddressMode AddressW;
float MipLODBias;
uint32_t MaxAnisotropy;
- ComparisonFunc ComparisonFunc;
- StaticBorderColor BorderColor;
+ dxbc::ComparisonFunc ComparisonFunc;
+ dxbc::StaticBorderColor BorderColor;
float MinLOD;
float MaxLOD;
uint32_t ShaderRegister;
uint32_t RegisterSpace;
- ShaderVisibility ShaderVisibility;
+ dxbc::ShaderVisibility ShaderVisibility;
};
SFI0 Part
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index d9d6f0b..62c0806 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1959,6 +1959,10 @@ public:
LLVM_ABI SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
SDValue NewMemOp);
+ /// Get all the nodes in their topological order without modifying any states.
+ LLVM_ABI void getTopologicallyOrderedNodes(
+ SmallVectorImpl<const SDNode *> &SortedNodes) const;
+
/// Topological-sort the AllNodes list and a
/// assign a unique node id for each node in the DAG based on their
/// topological order. Returns the number of nodes.
@@ -2009,7 +2013,9 @@ public:
/// function mirrors \c llvm::salvageDebugInfo.
LLVM_ABI void salvageDebugInfo(SDNode &N);
- LLVM_ABI void dump() const;
+ /// Dump the textual format of this DAG. Print nodes in sorted orders if \p
+ /// Sorted is true.
+ LLVM_ABI void dump(bool Sorted = false) const;
/// In most cases this function returns the ABI alignment for a given type,
/// except for illegal vector types where the alignment exceeds that of the
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 7bbad17..88691b9 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -4654,23 +4654,6 @@ public:
return false;
}
- /// Allows the target to handle physreg-carried dependency
- /// in target-specific way. Used from the ScheduleDAGSDNodes to decide whether
- /// to add the edge to the dependency graph.
- /// Def - input: Selection DAG node defininfg physical register
- /// User - input: Selection DAG node using physical register
- /// Op - input: Number of User operand
- /// PhysReg - inout: set to the physical register if the edge is
- /// necessary, unchanged otherwise
- /// Cost - inout: physical register copy cost.
- /// Returns 'true' is the edge is necessary, 'false' otherwise
- virtual bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
- const TargetRegisterInfo *TRI,
- const TargetInstrInfo *TII,
- MCRegister &PhysReg, int &Cost) const {
- return false;
- }
-
/// Target-specific combining of register parts into its original value
virtual SDValue
joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL,
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 0a11617..5331cb5 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -4001,15 +4001,17 @@ public:
/// Keeps track of value of iteration variable for input/scan loop to be
/// used for Scan directive lowering
- llvm::Value *IV;
+ llvm::Value *IV = nullptr;
/// Stores the span of canonical loop being lowered to be used for temporary
/// buffer allocation or Finalization.
- llvm::Value *Span;
+ llvm::Value *Span = nullptr;
ScanInfo() {
ScanBuffPtrs = new llvm::SmallDenseMap<llvm::Value *, llvm::Value *>();
}
+ ScanInfo(ScanInfo &) = delete;
+ ScanInfo &operator=(const ScanInfo &) = delete;
~ScanInfo() { delete (ScanBuffPtrs); }
};
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index fbc92d7..b0269ee 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -162,7 +162,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
class AdvSIMD_2Arg_Scalar_Narrow_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMExtendedType<0>, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
class AdvSIMD_2VectorArg_Scalar_Wide_BySize_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMTruncatedType<0>],
@@ -187,13 +187,13 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
class AdvSIMD_3VectorArg_Scalar_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<ArgIndex<2>>]>;
class AdvSIMD_CvtFxToFP_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
class AdvSIMD_CvtFPToFx_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
class AdvSIMD_1Arg_Intrinsic
: DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrNoMem]>;
@@ -221,7 +221,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
// Arithmetic ops
-let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
+let TargetPrefix = "aarch64" in {
// Vector Add Across Lanes
def int_aarch64_neon_saddv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
def int_aarch64_neon_uaddv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
diff --git a/llvm/include/llvm/Support/Jobserver.h b/llvm/include/llvm/Support/Jobserver.h
new file mode 100644
index 0000000..6bee3b5
--- /dev/null
+++ b/llvm/include/llvm/Support/Jobserver.h
@@ -0,0 +1,162 @@
+//===- llvm/Support/Jobserver.h - Jobserver Client --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a client for the GNU Make jobserver protocol. This allows
+// LLVM tools to coordinate parallel execution with a parent `make` process.
+//
+// The jobserver protocol is a mechanism for GNU Make to share its pool of
+// available "job slots" with the subprocesses it invokes. This is particularly
+// useful for tools that can perform parallel operations themselves (e.g., a
+// multi-threaded linker or compiler). By participating in this protocol, a
+// tool can ensure the total number of concurrent jobs does not exceed the
+// limit specified by the user (e.g., `make -j8`).
+//
+// How it works:
+//
+// 1. Establishment:
+// A child process discovers the jobserver by inspecting the `MAKEFLAGS`
+// environment variable. If a jobserver is active, this variable will
+// contain a `--jobserver-auth=<value>` argument. The format of `<value>`
+// determines how to communicate with the server.
+//
+// 2. The Implicit Slot:
+// Every command invoked by `make` is granted one "implicit" job slot. This
+// means a tool can always perform at least one unit of work without needing
+// to communicate with the jobserver. This implicit slot should NEVER be
+// released back to the jobserver.
+//
+// 3. Acquiring and Releasing Slots:
+// On POSIX systems, the jobserver is implemented as a pipe. The
+// `--jobserver-auth` value specifies either a path to a named pipe
+// (`fifo:PATH`) or a pair of file descriptors (`R,W`). The pipe is
+// pre-loaded with single-character tokens, one for each available job slot.
+//
+// - To acquire an additional slot, a client reads a single-character token
+// from the pipe.
+// - To release a slot, the client must write the *exact same* character
+// token back to the pipe.
+//
+// It is critical that a client releases all acquired slots before it exits,
+// even in cases of error, to avoid deadlocking the build.
+//
+// Example:
+// A multi-threaded linker invoked by `make -j8` wants to use multiple
+// threads. It first checks for the jobserver. It knows it has one implicit
+// slot, so it can use one thread. It then tries to acquire 7 more slots by
+// reading 7 tokens from the jobserver pipe. If it only receives 3 tokens,
+// it knows it can use a total of 1 (implicit) + 3 (acquired) = 4 threads.
+// Before exiting, it must write the 3 tokens it read back to the pipe.
+//
+// For more context, see:
+// - GNU Make manual on job slots:
+// https://www.gnu.org/software/make/manual/html_node/Job-Slots.html
+// - LLVM RFC discussion on jobserver support:
+// https://discourse.llvm.org/t/rfc-adding-gnu-make-jobserver-
+// support-to-llvm-for-coordinated-parallelism/87034
+// - Ninja’s jobserver support PR:
+// https://github.com/ninja-build/ninja/pull/2506
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_JOBSERVER_H
+#define LLVM_SUPPORT_JOBSERVER_H
+
+#include "llvm/ADT/StringRef.h"
+#include <memory>
+#include <string>
+
+namespace llvm {
+
+/// A JobSlot represents a single job slot that can be acquired from or released
+/// to a jobserver pool. This class is move-only.
+class JobSlot {
+public:
+ /// Default constructor creates an invalid instance.
+ JobSlot() = default;
+
+ // Move operations are allowed.
+ JobSlot(JobSlot &&Other) noexcept : Value(Other.Value) {
+ Other.Value = kInvalidValue;
+ }
+ JobSlot &operator=(JobSlot &&Other) noexcept {
+ if (this != &Other) {
+ this->Value = Other.Value;
+ Other.Value = kInvalidValue;
+ }
+ return *this;
+ }
+
+ // Copy operations are disallowed.
+ JobSlot(const JobSlot &) = delete;
+ JobSlot &operator=(const JobSlot &) = delete;
+
+ /// Returns true if this instance is valid (either implicit or explicit).
+ bool isValid() const { return Value >= 0; }
+
+ /// Returns true if this instance represents the implicit job slot.
+ bool isImplicit() const { return Value == kImplicitValue; }
+
+ static JobSlot createExplicit(uint8_t V) {
+ return JobSlot(static_cast<int16_t>(V));
+ }
+
+ static JobSlot createImplicit() { return JobSlot(kImplicitValue); }
+
+ uint8_t getExplicitValue() const;
+ bool isExplicit() const { return isValid() && !isImplicit(); }
+
+private:
+ friend class JobserverClient;
+ friend class JobserverClientImpl;
+
+ JobSlot(int16_t V) : Value(V) {}
+
+ /// The jobserver pipe carries explicit tokens (bytes 0–255). We reserve two
+ /// sentinels in Value for special cases:
+ /// kInvalidValue (-1): no slot held
+ /// kImplicitValue (INT16_MAX): implicit slot granted at startup (no pipe
+ /// I/O)
+ ///
+ /// We use int16_t so Value can store 0–255 explicit tokens and
+ /// sentinels without overflow, enforces fixed 16-bit width, and avoids
+ /// unsigned/signed mix-ups.
+ static constexpr int16_t kInvalidValue = -1;
+ static constexpr int16_t kImplicitValue = INT16_MAX;
+ int16_t Value = kInvalidValue;
+};
+
+/// The public interface for a jobserver client.
+/// This client is a lazy-initialized singleton that is created on first use.
+class JobserverClient {
+public:
+ virtual ~JobserverClient();
+
+ /// Tries to acquire a job slot from the pool. On failure (e.g., if the pool
+ /// is empty), this returns an invalid JobSlot instance. The first successful
+ /// call will always return the implicit slot.
+ virtual JobSlot tryAcquire() = 0;
+
+ /// Releases a job slot back to the pool.
+ virtual void release(JobSlot Slot) = 0;
+
+ /// Returns the number of job slots available, as determined on first use.
+ /// This value is cached. Returns 0 if no jobserver is active.
+ virtual unsigned getNumJobs() const = 0;
+
+ /// Returns the singleton instance of the JobserverClient.
+ /// The instance is created on the first call to this function.
+ /// Returns a nullptr if no jobserver is configured or an error occurs.
+ static JobserverClient *getInstance();
+
+ /// Resets the singleton instance. For testing purposes only.
+ static void resetForTesting();
+};
+
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_JOBSERVER_H
diff --git a/llvm/include/llvm/Support/ThreadPool.h b/llvm/include/llvm/Support/ThreadPool.h
index c26681c..c20efc7 100644
--- a/llvm/include/llvm/Support/ThreadPool.h
+++ b/llvm/include/llvm/Support/ThreadPool.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Jobserver.h"
#include "llvm/Support/RWMutex.h"
#include "llvm/Support/Threading.h"
#include "llvm/Support/thread.h"
@@ -180,6 +181,7 @@ private:
void grow(int requested);
void processTasks(ThreadPoolTaskGroup *WaitingForGroup);
+ void processTasksWithJobserver();
/// Threads in flight
std::vector<llvm::thread> Threads;
@@ -208,6 +210,8 @@ private:
/// Maximum number of threads to potentially grow this pool to.
const unsigned MaxThreadCount;
+
+ JobserverClient *TheJobserver = nullptr;
};
#endif // LLVM_ENABLE_THREADS
diff --git a/llvm/include/llvm/Support/Threading.h b/llvm/include/llvm/Support/Threading.h
index d3fe0a5..8884680 100644
--- a/llvm/include/llvm/Support/Threading.h
+++ b/llvm/include/llvm/Support/Threading.h
@@ -142,6 +142,11 @@ constexpr bool llvm_is_multithreaded() { return LLVM_ENABLE_THREADS; }
/// the thread shall remain on the actual CPU socket.
LLVM_ABI std::optional<unsigned>
compute_cpu_socket(unsigned ThreadPoolNum) const;
+
+ /// If true, the thread pool will attempt to coordinate with a GNU Make
+ /// jobserver, acquiring a job slot before processing a task. If no
+ /// jobserver is found in the environment, this is ignored.
+ bool UseJobserver = false;
};
/// Build a strategy from a number of threads as a string provided in \p Num.
@@ -210,6 +215,19 @@ constexpr bool llvm_is_multithreaded() { return LLVM_ENABLE_THREADS; }
return S;
}
+ /// Returns a thread strategy that attempts to coordinate with a GNU Make
+ /// jobserver. The number of active threads will be limited by the number of
+ /// available job slots. If no jobserver is detected in the environment, this
+ /// strategy falls back to the default hardware_concurrency() behavior.
+ inline ThreadPoolStrategy jobserver_concurrency() {
+ ThreadPoolStrategy S;
+ S.UseJobserver = true;
+ // We can still request all threads be created, as they will simply
+ // block waiting for a job slot if the jobserver is the limiting factor.
+ S.ThreadsRequested = 0; // 0 means 'use all available'
+ return S;
+ }
+
/// Return the current thread id, as used in various OS system calls.
/// Note that not all platforms guarantee that the value returned will be
/// unique across the entire system, so portable code should not assume
diff --git a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
index 1e07fbe..faaff4a 100644
--- a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
+++ b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
@@ -18,8 +18,7 @@
#include "llvm/Support/DataTypes.h"
-namespace llvm {
-namespace X86Disassembler {
+namespace llvm::X86Disassembler {
#define INSTRUCTIONS_SYM x86DisassemblerInstrSpecifiers
#define CONTEXTS_SYM x86DisassemblerContexts
@@ -541,7 +540,6 @@ static const unsigned X86_MAX_OPERANDS = 6;
/// respectively.
enum DisassemblerMode { MODE_16BIT, MODE_32BIT, MODE_64BIT };
-} // namespace X86Disassembler
-} // namespace llvm
+} // namespace llvm::X86Disassembler
#endif
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index faf7788..e3f995d 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -126,7 +126,7 @@ def G_FRAME_INDEX : GenericInstruction {
}
def G_GLOBAL_VALUE : GenericInstruction {
- let OutOperandList = (outs type0:$dst);
+ let OutOperandList = (outs ptype0:$dst);
let InOperandList = (ins unknown:$src);
let hasSideEffects = false;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 558c5a0..309f1be 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6046,7 +6046,7 @@ static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
return N02;
}
- if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
+ if (MaxC == 0 && MinC != 0 && MinCPlus1.isPowerOf2()) {
BW = MinCPlus1.exactLogBase2();
Unsigned = true;
return N02;
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 7902229..4f4fb9c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -111,15 +111,11 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
const TargetRegisterInfo *TRI,
const TargetInstrInfo *TII,
- const TargetLowering &TLI,
MCRegister &PhysReg, int &Cost) {
if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
return;
Register Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
- if (TLI.checkForPhysRegDependency(Def, User, Op, TRI, TII, PhysReg, Cost))
- return;
-
if (Reg.isVirtual())
return;
@@ -490,8 +486,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
MCRegister PhysReg;
int Cost = 1;
// Determine if this is a physical register dependency.
- const TargetLowering &TLI = DAG->getTargetLoweringInfo();
- CheckForPhysRegDependency(OpN, N, i, TRI, TII, TLI, PhysReg, Cost);
+ CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
assert((!PhysReg || !isChain) && "Chain dependence via physreg data?");
// FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
// emits a copy from the physical register to a virtual register unless
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 95f53fe..6ea2e27 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -12698,6 +12698,45 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
return DAGSize;
}
+void SelectionDAG::getTopologicallyOrderedNodes(
+ SmallVectorImpl<const SDNode *> &SortedNodes) const {
+ SortedNodes.clear();
+ // Node -> remaining number of outstanding operands.
+ DenseMap<const SDNode *, unsigned> RemainingOperands;
+
+ // Put nodes without any operands into SortedNodes first.
+ for (const SDNode &N : allnodes()) {
+ checkForCycles(&N, this);
+ unsigned NumOperands = N.getNumOperands();
+ if (NumOperands == 0)
+ SortedNodes.push_back(&N);
+ else
+ // Record their total number of outstanding operands.
+ RemainingOperands[&N] = NumOperands;
+ }
+
+ // A node is pushed into SortedNodes when all of its operands (predecessors in
+ // the graph) are also in SortedNodes.
+ for (unsigned i = 0U; i < SortedNodes.size(); ++i) {
+ const SDNode *N = SortedNodes[i];
+ for (const SDNode *U : N->users()) {
+ unsigned &NumRemOperands = RemainingOperands[U];
+ assert(NumRemOperands && "Invalid number of remaining operands");
+ --NumRemOperands;
+ if (!NumRemOperands)
+ SortedNodes.push_back(U);
+ }
+ }
+
+ assert(SortedNodes.size() == AllNodes.size() && "Node count mismatch");
+ assert(SortedNodes.front()->getOpcode() == ISD::EntryToken &&
+ "First node in topological sort is not the entry token");
+ assert(SortedNodes.front()->getNumOperands() == 0 &&
+ "First node in topological sort has operands");
+ assert(SortedNodes.back()->use_empty() &&
+ "Last node in topologic sort has users");
+}
+
/// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the
/// value is produced by SD.
void SelectionDAG::AddDbgValue(SDDbgValue *DB, bool isParameter) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 4b2a00c..fcfbfe6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -1061,13 +1061,24 @@ static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
N->dump(G);
}
-LLVM_DUMP_METHOD void SelectionDAG::dump() const {
+LLVM_DUMP_METHOD void SelectionDAG::dump(bool Sorted) const {
dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:\n";
- for (const SDNode &N : allnodes()) {
+ auto dumpEachNode = [this](const SDNode &N) {
if (!N.hasOneUse() && &N != getRoot().getNode() &&
(!shouldPrintInline(N, this) || N.use_empty()))
DumpNodes(&N, 2, this);
+ };
+
+ if (Sorted) {
+ SmallVector<const SDNode *> SortedNodes;
+ SortedNodes.reserve(AllNodes.size());
+ getTopologicallyOrderedNodes(SortedNodes);
+ for (const SDNode *N : SortedNodes)
+ dumpEachNode(*N);
+ } else {
+ for (const SDNode &N : allnodes())
+ dumpEachNode(N);
}
if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index e61558c..c35f29d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -144,6 +144,11 @@ UseMBPI("use-mbpi",
cl::init(true), cl::Hidden);
#ifndef NDEBUG
+static cl::opt<bool>
+ DumpSortedDAG("dump-sorted-dags", cl::Hidden,
+ cl::desc("Print DAGs with sorted nodes in debug dump"),
+ cl::init(false));
+
static cl::opt<std::string>
FilterDAGBasicBlockName("filter-view-dags", cl::Hidden,
cl::desc("Only display the basic block whose name "
@@ -932,7 +937,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
ISEL_DUMP(dbgs() << "\nInitial selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
- CurDAG->dump());
+ CurDAG->dump(DumpSortedDAG));
#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
if (TTI->hasBranchDivergence())
@@ -952,7 +957,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
ISEL_DUMP(dbgs() << "\nOptimized lowered selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
- CurDAG->dump());
+ CurDAG->dump(DumpSortedDAG));
#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
if (TTI->hasBranchDivergence())
@@ -974,7 +979,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
ISEL_DUMP(dbgs() << "\nType-legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
- CurDAG->dump());
+ CurDAG->dump(DumpSortedDAG));
#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
if (TTI->hasBranchDivergence())
@@ -998,7 +1003,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
ISEL_DUMP(dbgs() << "\nOptimized type-legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
- CurDAG->dump());
+ CurDAG->dump(DumpSortedDAG));
#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
if (TTI->hasBranchDivergence())
@@ -1016,7 +1021,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
ISEL_DUMP(dbgs() << "\nVector-legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
- CurDAG->dump());
+ CurDAG->dump(DumpSortedDAG));
#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
if (TTI->hasBranchDivergence())
@@ -1032,7 +1037,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
ISEL_DUMP(dbgs() << "\nVector/type-legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
- CurDAG->dump());
+ CurDAG->dump(DumpSortedDAG));
#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
if (TTI->hasBranchDivergence())
@@ -1052,7 +1057,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
ISEL_DUMP(dbgs() << "\nOptimized vector-legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
- CurDAG->dump());
+ CurDAG->dump(DumpSortedDAG));
#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
if (TTI->hasBranchDivergence())
@@ -1072,7 +1077,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
ISEL_DUMP(dbgs() << "\nLegalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
- CurDAG->dump());
+ CurDAG->dump(DumpSortedDAG));
#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
if (TTI->hasBranchDivergence())
@@ -1092,7 +1097,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
ISEL_DUMP(dbgs() << "\nOptimized legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
- CurDAG->dump());
+ CurDAG->dump(DumpSortedDAG));
#if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS
if (TTI->hasBranchDivergence())
@@ -1116,7 +1121,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
ISEL_DUMP(dbgs() << "\nSelected selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
- CurDAG->dump());
+ CurDAG->dump(DumpSortedDAG));
if (ViewSchedDAGs && MatchFilterBB)
CurDAG->viewGraph("scheduler input for " + BlockName);
diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index 7da972f..42b21b5 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -207,6 +207,7 @@ add_llvm_component_library(LLVMSupport
InstructionCost.cpp
IntEqClasses.cpp
IntervalMap.cpp
+ Jobserver.cpp
JSON.cpp
KnownBits.cpp
KnownFPClass.cpp
diff --git a/llvm/lib/Support/Jobserver.cpp b/llvm/lib/Support/Jobserver.cpp
new file mode 100644
index 0000000..9f726eb
--- /dev/null
+++ b/llvm/lib/Support/Jobserver.cpp
@@ -0,0 +1,259 @@
+//===- llvm/Support/Jobserver.cpp - Jobserver Client Implementation -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Jobserver.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <atomic>
+#include <memory>
+#include <mutex>
+#include <new>
+
+#define DEBUG_TYPE "jobserver"
+
+using namespace llvm;
+
+namespace {
+struct FdPair {
+ int Read = -1;
+ int Write = -1;
+ bool isValid() const { return Read >= 0 && Write >= 0; }
+};
+
+struct JobserverConfig {
+ enum Mode {
+ None,
+ PosixFifo,
+ PosixPipe,
+ Win32Semaphore,
+ };
+ Mode TheMode = None;
+ std::string Path;
+ FdPair PipeFDs;
+};
+
+/// A helper function that checks if `Input` starts with `Prefix`.
+/// If it does, it removes the prefix from `Input`, assigns the remainder to
+/// `Value`, and returns true. Otherwise, it returns false.
+bool getPrefixedValue(StringRef Input, StringRef Prefix, StringRef &Value) {
+ if (Input.consume_front(Prefix)) {
+ Value = Input;
+ return true;
+ }
+ return false;
+}
+
+/// A helper function to parse a string in the format "R,W" where R and W are
+/// non-negative integers representing file descriptors. It populates the
+/// `ReadFD` and `WriteFD` output parameters. Returns true on success.
+static std::optional<FdPair> getFileDescriptorPair(StringRef Input) {
+ FdPair FDs;
+ if (Input.consumeInteger(10, FDs.Read))
+ return std::nullopt;
+ if (!Input.consume_front(","))
+ return std::nullopt;
+ if (Input.consumeInteger(10, FDs.Write))
+ return std::nullopt;
+ if (!Input.empty() || !FDs.isValid())
+ return std::nullopt;
+ return FDs;
+}
+
+/// Parses the `MAKEFLAGS` environment variable string to find jobserver
+/// arguments. It splits the string into space-separated arguments and searches
+/// for `--jobserver-auth` or `--jobserver-fds`. Based on the value of these
+/// arguments, it determines the jobserver mode (Pipe, FIFO, or Semaphore) and
+/// connection details (file descriptors or path).
+Expected<JobserverConfig> parseNativeMakeFlags(StringRef MakeFlags) {
+ JobserverConfig Config;
+ if (MakeFlags.empty())
+ return Config;
+
+ // Split the MAKEFLAGS string into arguments.
+ SmallVector<StringRef, 8> Args;
+ SplitString(MakeFlags, Args);
+
+ // If '-n' (dry-run) is present as a legacy flag (not starting with '-'),
+ // disable the jobserver.
+ if (!Args.empty() && !Args[0].starts_with("-") && Args[0].contains('n'))
+ return Config;
+
+ // Iterate through arguments to find jobserver flags.
+ // Note that make may pass multiple --jobserver-auth flags; the last one wins.
+ for (StringRef Arg : Args) {
+ StringRef Value;
+ if (getPrefixedValue(Arg, "--jobserver-auth=", Value)) {
+ // Try to parse as a file descriptor pair first.
+ if (auto FDPair = getFileDescriptorPair(Value)) {
+ Config.TheMode = JobserverConfig::PosixPipe;
+ Config.PipeFDs = *FDPair;
+ } else {
+ StringRef FifoPath;
+ // If not FDs, try to parse as a named pipe (fifo).
+ if (getPrefixedValue(Value, "fifo:", FifoPath)) {
+ Config.TheMode = JobserverConfig::PosixFifo;
+ Config.Path = FifoPath.str();
+ } else {
+ // Otherwise, assume it's a Windows semaphore.
+ Config.TheMode = JobserverConfig::Win32Semaphore;
+ Config.Path = Value.str();
+ }
+ }
+ } else if (getPrefixedValue(Arg, "--jobserver-fds=", Value)) {
+ // This is an alternative, older syntax for the pipe-based server.
+ if (auto FDPair = getFileDescriptorPair(Value)) {
+ Config.TheMode = JobserverConfig::PosixPipe;
+ Config.PipeFDs = *FDPair;
+ } else {
+ return createStringError(inconvertibleErrorCode(),
+ "Invalid file descriptor pair in MAKEFLAGS");
+ }
+ }
+ }
+
+// Perform platform-specific validation.
+#ifdef _WIN32
+ if (Config.TheMode == JobserverConfig::PosixFifo ||
+ Config.TheMode == JobserverConfig::PosixPipe)
+ return createStringError(
+ inconvertibleErrorCode(),
+ "FIFO/Pipe-based jobserver is not supported on Windows");
+#else
+ if (Config.TheMode == JobserverConfig::Win32Semaphore)
+ return createStringError(
+ inconvertibleErrorCode(),
+ "Semaphore-based jobserver is not supported on this platform");
+#endif
+ return Config;
+}
+
+std::once_flag GJobserverOnceFlag;
+JobserverClient *GJobserver = nullptr;
+
+} // namespace
+
+namespace llvm {
+class JobserverClientImpl : public JobserverClient {
+ bool IsInitialized = false;
+ std::atomic<bool> HasImplicitSlot{true};
+ unsigned NumJobs = 0;
+
+public:
+ JobserverClientImpl(const JobserverConfig &Config);
+ ~JobserverClientImpl() override;
+
+ JobSlot tryAcquire() override;
+ void release(JobSlot Slot) override;
+ unsigned getNumJobs() const override { return NumJobs; }
+
+ bool isValid() const { return IsInitialized; }
+
+private:
+#if defined(LLVM_ON_UNIX)
+ int ReadFD = -1;
+ int WriteFD = -1;
+ std::string FifoPath;
+#elif defined(_WIN32)
+ void *Semaphore = nullptr;
+#endif
+};
+} // namespace llvm
+
+// Include the platform-specific parts of the class.
+#if defined(LLVM_ON_UNIX)
+#include "Unix/Jobserver.inc"
+#elif defined(_WIN32)
+#include "Windows/Jobserver.inc"
+#else
+// Dummy implementation for unsupported platforms.
+JobserverClientImpl::JobserverClientImpl(const JobserverConfig &Config) {}
+JobserverClientImpl::~JobserverClientImpl() = default;
+JobSlot JobserverClientImpl::tryAcquire() { return JobSlot(); }
+void JobserverClientImpl::release(JobSlot Slot) {}
+#endif
+
+namespace llvm {
+JobserverClient::~JobserverClient() = default;
+
+uint8_t JobSlot::getExplicitValue() const {
+ assert(isExplicit() && "Cannot get value of implicit or invalid slot");
+ return static_cast<uint8_t>(Value);
+}
+
+/// This is the main entry point for acquiring a jobserver client. It uses a
+/// std::call_once to ensure the singleton `GJobserver` instance is created
+/// safely in a multi-threaded environment. On first call, it reads the
+/// `MAKEFLAGS` environment variable, parses it, and attempts to construct and
+/// initialize a `JobserverClientImpl`. If successful, the global instance is
+/// stored in `GJobserver`. Subsequent calls will return the existing instance.
+JobserverClient *JobserverClient::getInstance() {
+ std::call_once(GJobserverOnceFlag, []() {
+ LLVM_DEBUG(
+ dbgs()
+ << "JobserverClient::getInstance() called for the first time.\n");
+ const char *MakeFlagsEnv = getenv("MAKEFLAGS");
+ if (!MakeFlagsEnv) {
+ errs() << "Warning: failed to create jobserver client due to MAKEFLAGS "
+ "environment variable not found\n";
+ return;
+ }
+
+ LLVM_DEBUG(dbgs() << "Found MAKEFLAGS = \"" << MakeFlagsEnv << "\"\n");
+
+ auto ConfigOrErr = parseNativeMakeFlags(MakeFlagsEnv);
+ if (Error Err = ConfigOrErr.takeError()) {
+ errs() << "Warning: failed to create jobserver client due to invalid "
+ "MAKEFLAGS environment variable: "
+ << toString(std::move(Err)) << "\n";
+ return;
+ }
+
+ JobserverConfig Config = *ConfigOrErr;
+ if (Config.TheMode == JobserverConfig::None) {
+ errs() << "Warning: failed to create jobserver client due to jobserver "
+ "mode missing in MAKEFLAGS environment variable\n";
+ return;
+ }
+
+ if (Config.TheMode == JobserverConfig::PosixPipe) {
+#if defined(LLVM_ON_UNIX)
+ if (!areFdsValid(Config.PipeFDs.Read, Config.PipeFDs.Write)) {
+ errs() << "Warning: failed to create jobserver client due to invalid "
+ "Pipe FDs in MAKEFLAGS environment variable\n";
+ return;
+ }
+#endif
+ }
+
+ auto Client = std::make_unique<JobserverClientImpl>(Config);
+ if (Client->isValid()) {
+ LLVM_DEBUG(dbgs() << "Jobserver client created successfully!\n");
+ GJobserver = Client.release();
+ } else
+ errs() << "Warning: jobserver client initialization failed.\n";
+ });
+ return GJobserver;
+}
+
+/// For testing purposes only. This function resets the singleton instance by
+/// destroying the existing client and re-initializing the `std::once_flag`.
+/// This allows tests to simulate the first-time initialization of the
+/// jobserver client multiple times.
+void JobserverClient::resetForTesting() {
+ delete GJobserver;
+ GJobserver = nullptr;
+ // Re-construct the std::once_flag in place to reset the singleton state.
+ new (&GJobserverOnceFlag) std::once_flag();
+}
+} // namespace llvm
diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp
index 3ac6fc7..8e0c724 100644
--- a/llvm/lib/Support/Parallel.cpp
+++ b/llvm/lib/Support/Parallel.cpp
@@ -7,12 +7,17 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/Parallel.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/ExponentialBackoff.h"
+#include "llvm/Support/Jobserver.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Threading.h"
#include <atomic>
#include <future>
+#include <memory>
+#include <mutex>
#include <thread>
#include <vector>
@@ -49,6 +54,9 @@ public:
class ThreadPoolExecutor : public Executor {
public:
explicit ThreadPoolExecutor(ThreadPoolStrategy S) {
+ if (S.UseJobserver)
+ TheJobserver = JobserverClient::getInstance();
+
ThreadCount = S.compute_thread_count();
// Spawn all but one of the threads in another thread as spawning threads
// can take a while.
@@ -69,6 +77,10 @@ public:
});
}
+ // To make sure the thread pool executor can only be created with a parallel
+ // strategy.
+ ThreadPoolExecutor() = delete;
+
void stop() {
{
std::lock_guard<std::mutex> Lock(Mutex);
@@ -111,15 +123,62 @@ private:
void work(ThreadPoolStrategy S, unsigned ThreadID) {
threadIndex = ThreadID;
S.apply_thread_strategy(ThreadID);
+ // Note on jobserver deadlock avoidance:
+ // GNU Make grants each invoked process one implicit job slot. Our
+ // JobserverClient models this by returning an implicit JobSlot on the
+ // first successful tryAcquire() in a process. This guarantees forward
+ // progress without requiring a dedicated "always-on" thread here.
+
+ static thread_local std::unique_ptr<ExponentialBackoff> Backoff;
+
while (true) {
- std::unique_lock<std::mutex> Lock(Mutex);
- Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); });
- if (Stop)
- break;
- auto Task = std::move(WorkStack.back());
- WorkStack.pop_back();
- Lock.unlock();
- Task();
+ if (TheJobserver) {
+ // Jobserver-mode scheduling:
+ // - Acquire one job slot (with exponential backoff to avoid busy-wait).
+ // - While holding the slot, drain and run tasks from the local queue.
+ // - Release the slot when the queue is empty or when shutting down.
+ // Rationale: Holding a slot amortizes acquire/release overhead over
+ // multiple tasks and avoids requeue/yield churn, while still enforcing
+ // the jobserver’s global concurrency limit. With K available slots,
+ // up to K workers run tasks in parallel; within each worker tasks run
+ // sequentially until the local queue is empty.
+ ExponentialBackoff Backoff(std::chrono::hours(24));
+ JobSlot Slot;
+ do {
+ if (Stop)
+ return;
+ Slot = TheJobserver->tryAcquire();
+ if (Slot.isValid())
+ break;
+ } while (Backoff.waitForNextAttempt());
+
+ auto SlotReleaser = llvm::make_scope_exit(
+ [&] { TheJobserver->release(std::move(Slot)); });
+
+ while (true) {
+ std::function<void()> Task;
+ {
+ std::unique_lock<std::mutex> Lock(Mutex);
+ Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); });
+ if (Stop && WorkStack.empty())
+ return;
+ if (WorkStack.empty())
+ break;
+ Task = std::move(WorkStack.back());
+ WorkStack.pop_back();
+ }
+ Task();
+ }
+ } else {
+ std::unique_lock<std::mutex> Lock(Mutex);
+ Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); });
+ if (Stop)
+ break;
+ auto Task = std::move(WorkStack.back());
+ WorkStack.pop_back();
+ Lock.unlock();
+ Task();
+ }
}
}
@@ -130,9 +189,20 @@ private:
std::promise<void> ThreadsCreated;
std::vector<std::thread> Threads;
unsigned ThreadCount;
+
+ JobserverClient *TheJobserver = nullptr;
};
-Executor *Executor::getDefaultExecutor() {
+// A global raw pointer to the executor. Lifetime is managed by the
+// objects created within createExecutor().
+static Executor *TheExec = nullptr;
+static std::once_flag Flag;
+
+// This function will be called exactly once to create the executor.
+// It contains the necessary platform-specific logic. Since functions
+// called by std::call_once cannot return value, we have to set the
+// executor as a global variable.
+void createExecutor() {
#ifdef _WIN32
// The ManagedStatic enables the ThreadPoolExecutor to be stopped via
// llvm_shutdown() which allows a "clean" fast exit, e.g. via _exit(). This
@@ -156,16 +226,22 @@ Executor *Executor::getDefaultExecutor() {
ThreadPoolExecutor::Deleter>
ManagedExec;
static std::unique_ptr<ThreadPoolExecutor> Exec(&(*ManagedExec));
- return Exec.get();
+ TheExec = Exec.get();
#else
// ManagedStatic is not desired on other platforms. When `Exec` is destroyed
// by llvm_shutdown(), worker threads will clean up and invoke TLS
// destructors. This can lead to race conditions if other threads attempt to
// access TLS objects that have already been destroyed.
static ThreadPoolExecutor Exec(strategy);
- return &Exec;
+ TheExec = &Exec;
#endif
}
+
+Executor *Executor::getDefaultExecutor() {
+ // Use std::call_once to lazily and safely initialize the executor.
+ std::call_once(Flag, createExecutor);
+ return TheExec;
+}
} // namespace
} // namespace detail
diff --git a/llvm/lib/Support/ThreadPool.cpp b/llvm/lib/Support/ThreadPool.cpp
index c304f0f..6960268 100644
--- a/llvm/lib/Support/ThreadPool.cpp
+++ b/llvm/lib/Support/ThreadPool.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
+//
// This file implements a crude C++11 based thread pool.
//
//===----------------------------------------------------------------------===//
@@ -14,6 +15,8 @@
#include "llvm/Config/llvm-config.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/Support/ExponentialBackoff.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Threading.h"
#include "llvm/Support/raw_ostream.h"
@@ -33,7 +36,10 @@ ThreadPoolInterface::~ThreadPoolInterface() = default;
#if LLVM_ENABLE_THREADS
StdThreadPool::StdThreadPool(ThreadPoolStrategy S)
- : Strategy(S), MaxThreadCount(S.compute_thread_count()) {}
+ : Strategy(S), MaxThreadCount(S.compute_thread_count()) {
+ if (Strategy.UseJobserver)
+ TheJobserver = JobserverClient::getInstance();
+}
void StdThreadPool::grow(int requested) {
llvm::sys::ScopedWriter LockGuard(ThreadsLock);
@@ -45,7 +51,15 @@ void StdThreadPool::grow(int requested) {
Threads.emplace_back([this, ThreadID] {
set_thread_name(formatv("llvm-worker-{0}", ThreadID));
Strategy.apply_thread_strategy(ThreadID);
- processTasks(nullptr);
+ // Note on jobserver deadlock avoidance:
+ // GNU Make grants each invoked process one implicit job slot.
+ // JobserverClient::tryAcquire() returns that implicit slot on the first
+ // successful call in a process, ensuring forward progress without a
+ // dedicated "always-on" thread.
+ if (TheJobserver)
+ processTasksWithJobserver();
+ else
+ processTasks(nullptr);
});
}
}
@@ -133,6 +147,96 @@ void StdThreadPool::processTasks(ThreadPoolTaskGroup *WaitingForGroup) {
}
}
+/// Main loop for worker threads when using a jobserver.
+/// This function uses a two-level queue; it first acquires a job slot from the
+/// external jobserver, then retrieves a task from the internal queue.
+/// This allows the thread pool to cooperate with build systems like `make -j`.
+void StdThreadPool::processTasksWithJobserver() {
+ while (true) {
+ // Acquire a job slot from the external jobserver.
+ // This polls for a slot and yields the thread to avoid a high-CPU wait.
+ JobSlot Slot;
+ // The timeout for the backoff can be very long, as the shutdown
+ // is checked on each iteration. The sleep duration is capped by MaxWait
+ // in ExponentialBackoff, so shutdown latency is not a problem.
+ ExponentialBackoff Backoff(std::chrono::hours(24));
+ bool AcquiredToken = false;
+ do {
+ // Return if the thread pool is shutting down.
+ {
+ std::unique_lock<std::mutex> LockGuard(QueueLock);
+ if (!EnableFlag)
+ return;
+ }
+
+ Slot = TheJobserver->tryAcquire();
+ if (Slot.isValid()) {
+ AcquiredToken = true;
+ break;
+ }
+ } while (Backoff.waitForNextAttempt());
+
+ if (!AcquiredToken) {
+ // This is practically unreachable with a 24h timeout and indicates a
+ // deeper problem if hit.
+ report_fatal_error("Timed out waiting for jobserver token.");
+ }
+
+ // `make_scope_exit` guarantees the job slot is released, even if the
+ // task throws or we exit early. This prevents deadlocking the build.
+ auto SlotReleaser =
+ make_scope_exit([&] { TheJobserver->release(std::move(Slot)); });
+
+ // While we hold a job slot, process tasks from the internal queue.
+ while (true) {
+ std::function<void()> Task;
+ ThreadPoolTaskGroup *GroupOfTask = nullptr;
+
+ {
+ std::unique_lock<std::mutex> LockGuard(QueueLock);
+
+ // Wait until a task is available or the pool is shutting down.
+ QueueCondition.wait(LockGuard,
+ [&] { return !EnableFlag || !Tasks.empty(); });
+
+ // If shutting down and the queue is empty, the thread can terminate.
+ if (!EnableFlag && Tasks.empty())
+ return;
+
+ // If the queue is empty, we're done processing tasks for now.
+ // Break the inner loop to release the job slot.
+ if (Tasks.empty())
+ break;
+
+ // A task is available. Mark it as active before releasing the lock
+ // to prevent race conditions with `wait()`.
+ ++ActiveThreads;
+ Task = std::move(Tasks.front().first);
+ GroupOfTask = Tasks.front().second;
+ if (GroupOfTask != nullptr)
+ ++ActiveGroups[GroupOfTask];
+ Tasks.pop_front();
+ } // The queue lock is released.
+
+ // Run the task. The job slot remains acquired during execution.
+ Task();
+
+ // The task has finished. Update the active count and notify any waiters.
+ {
+ std::lock_guard<std::mutex> LockGuard(QueueLock);
+ --ActiveThreads;
+ if (GroupOfTask != nullptr) {
+ auto A = ActiveGroups.find(GroupOfTask);
+ if (--(A->second) == 0)
+ ActiveGroups.erase(A);
+ }
+ // If all tasks are complete, notify any waiting threads.
+ if (workCompletedUnlocked(nullptr))
+ CompletionCondition.notify_all();
+ }
+ }
+ }
+}
bool StdThreadPool::workCompletedUnlocked(ThreadPoolTaskGroup *Group) const {
if (Group == nullptr)
return !ActiveThreads && Tasks.empty();
diff --git a/llvm/lib/Support/Threading.cpp b/llvm/lib/Support/Threading.cpp
index 693de0e..9da357a 100644
--- a/llvm/lib/Support/Threading.cpp
+++ b/llvm/lib/Support/Threading.cpp
@@ -14,6 +14,7 @@
#include "llvm/Support/Threading.h"
#include "llvm/Config/config.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Jobserver.h"
#include <cassert>
#include <optional>
@@ -51,6 +52,10 @@ int llvm::get_physical_cores() { return -1; }
static int computeHostNumHardwareThreads();
unsigned llvm::ThreadPoolStrategy::compute_thread_count() const {
+ if (UseJobserver)
+ if (auto JS = JobserverClient::getInstance())
+ return JS->getNumJobs();
+
int MaxThreadCount =
UseHyperThreads ? computeHostNumHardwareThreads() : get_physical_cores();
if (MaxThreadCount <= 0)
diff --git a/llvm/lib/Support/Unix/Jobserver.inc b/llvm/lib/Support/Unix/Jobserver.inc
new file mode 100644
index 0000000..53bf7f2
--- /dev/null
+++ b/llvm/lib/Support/Unix/Jobserver.inc
@@ -0,0 +1,195 @@
+//===- llvm/Support/Unix/Jobserver.inc - Unix Jobserver Impl ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the UNIX-specific parts of the JobserverClient class.
+//
+//===----------------------------------------------------------------------===//
+
+#include <atomic>
+#include <cassert>
+#include <cerrno>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+namespace {
+/// Returns true if the given file descriptor is a FIFO (named pipe).
+bool isFifo(int FD) {
+ struct stat StatBuf;
+ if (::fstat(FD, &StatBuf) != 0)
+ return false;
+ return S_ISFIFO(StatBuf.st_mode);
+}
+
+/// Returns true if the given file descriptors are valid.
+bool areFdsValid(int ReadFD, int WriteFD) {
+ if (ReadFD == -1 || WriteFD == -1)
+ return false;
+ // Check if the file descriptors are actually valid by checking their flags.
+ return ::fcntl(ReadFD, F_GETFD) != -1 && ::fcntl(WriteFD, F_GETFD) != -1;
+}
+} // namespace
+
+/// The constructor sets up the client based on the provided configuration.
+/// For pipe-based jobservers, it duplicates the inherited file descriptors,
+/// sets them to close-on-exec, and makes the read descriptor non-blocking.
+/// For FIFO-based jobservers, it opens the named pipe. After setup, it drains
+/// all available tokens from the jobserver to determine the total number of
+/// available jobs (`NumJobs`), then immediately releases them back.
+JobserverClientImpl::JobserverClientImpl(const JobserverConfig &Config) {
+ switch (Config.TheMode) {
+ case JobserverConfig::PosixPipe: {
+ // Duplicate the read and write file descriptors.
+ int NewReadFD = ::dup(Config.PipeFDs.Read);
+ if (NewReadFD < 0)
+ return;
+ int NewWriteFD = ::dup(Config.PipeFDs.Write);
+ if (NewWriteFD < 0) {
+ ::close(NewReadFD);
+ return;
+ }
+ // Set the new descriptors to be closed automatically on exec().
+ if (::fcntl(NewReadFD, F_SETFD, FD_CLOEXEC) == -1 ||
+ ::fcntl(NewWriteFD, F_SETFD, FD_CLOEXEC) == -1) {
+ ::close(NewReadFD);
+ ::close(NewWriteFD);
+ return;
+ }
+ // Set the read descriptor to non-blocking.
+ int flags = ::fcntl(NewReadFD, F_GETFL, 0);
+ if (flags == -1 || ::fcntl(NewReadFD, F_SETFL, flags | O_NONBLOCK) == -1) {
+ ::close(NewReadFD);
+ ::close(NewWriteFD);
+ return;
+ }
+ ReadFD = NewReadFD;
+ WriteFD = NewWriteFD;
+ break;
+ }
+ case JobserverConfig::PosixFifo:
+ // Open the FIFO for reading. It must be non-blocking and close-on-exec.
+ ReadFD = ::open(Config.Path.c_str(), O_RDONLY | O_NONBLOCK | O_CLOEXEC);
+ if (ReadFD < 0 || !isFifo(ReadFD)) {
+ if (ReadFD >= 0)
+ ::close(ReadFD);
+ ReadFD = -1;
+ return;
+ }
+ FifoPath = Config.Path;
+ // The write FD is opened on-demand in release().
+ WriteFD = -1;
+ break;
+ default:
+ return;
+ }
+
+ IsInitialized = true;
+ // Determine the total number of jobs by acquiring all available slots and
+ // then immediately releasing them.
+ SmallVector<JobSlot, 8> Slots;
+ while (true) {
+ auto S = tryAcquire();
+ if (!S.isValid())
+ break;
+ Slots.push_back(std::move(S));
+ }
+ NumJobs = Slots.size();
+ assert(NumJobs >= 1 && "Invalid number of jobs");
+ for (auto &S : Slots)
+ release(std::move(S));
+}
+
+/// The destructor closes any open file descriptors.
+JobserverClientImpl::~JobserverClientImpl() {
+ if (ReadFD >= 0)
+ ::close(ReadFD);
+ if (WriteFD >= 0)
+ ::close(WriteFD);
+}
+
+/// Tries to acquire a job slot. The first call to this function will always
+/// successfully acquire the single "implicit" slot that is granted to every
+/// process started by `make`. Subsequent calls attempt to read a one-byte
+/// token from the jobserver's read pipe. A successful read grants one
+/// explicit job slot. The read is non-blocking; if no token is available,
+/// it fails and returns an invalid JobSlot.
+JobSlot JobserverClientImpl::tryAcquire() {
+ if (!IsInitialized)
+ return JobSlot();
+
+ // The first acquisition is always for the implicit slot.
+ if (HasImplicitSlot.exchange(false, std::memory_order_acquire)) {
+ LLVM_DEBUG(dbgs() << "Acquired implicit job slot.\n");
+ return JobSlot::createImplicit();
+ }
+
+ char Token;
+ ssize_t Ret;
+ LLVM_DEBUG(dbgs() << "Attempting to read token from FD " << ReadFD << ".\n");
+ // Loop to retry on EINTR (interrupted system call).
+ do {
+ Ret = ::read(ReadFD, &Token, 1);
+ } while (Ret < 0 && errno == EINTR);
+
+ if (Ret == 1) {
+ LLVM_DEBUG(dbgs() << "Acquired explicit token '" << Token << "'.\n");
+ return JobSlot::createExplicit(static_cast<uint8_t>(Token));
+ }
+
+ LLVM_DEBUG(dbgs() << "Failed to acquire job slot, read returned " << Ret
+ << ".\n");
+ return JobSlot();
+}
+
+/// Releases a job slot back to the pool. If the slot is implicit, it simply
+/// resets a flag. If the slot is explicit, it writes the character token
+/// associated with the slot back into the jobserver's write pipe. For FIFO
+/// jobservers, this may require opening the FIFO for writing if it hasn't
+/// been already.
+void JobserverClientImpl::release(JobSlot Slot) {
+ if (!Slot.isValid())
+ return;
+
+ // Releasing the implicit slot just makes it available for the next acquire.
+ if (Slot.isImplicit()) {
+ LLVM_DEBUG(dbgs() << "Released implicit job slot.\n");
+ [[maybe_unused]] bool was_already_released =
+ HasImplicitSlot.exchange(true, std::memory_order_release);
+ assert(!was_already_released && "Implicit slot released twice");
+ return;
+ }
+
+ uint8_t Token = Slot.getExplicitValue();
+ LLVM_DEBUG(dbgs() << "Releasing explicit token '" << (char)Token << "' to FD "
+ << WriteFD << ".\n");
+
+ // For FIFO-based jobservers, the write FD might not be open yet.
+ // Open it on the first release.
+ if (WriteFD < 0) {
+ LLVM_DEBUG(dbgs() << "WriteFD is invalid, opening FIFO: " << FifoPath
+ << "\n");
+ WriteFD = ::open(FifoPath.c_str(), O_WRONLY | O_CLOEXEC);
+ if (WriteFD < 0) {
+ LLVM_DEBUG(dbgs() << "Failed to open FIFO for writing.\n");
+ return;
+ }
+ LLVM_DEBUG(dbgs() << "Opened FIFO as new WriteFD: " << WriteFD << "\n");
+ }
+
+ ssize_t Written;
+ // Loop to retry on EINTR (interrupted system call).
+ do {
+ Written = ::write(WriteFD, &Token, 1);
+ } while (Written < 0 && errno == EINTR);
+
+ if (Written <= 0) {
+ LLVM_DEBUG(dbgs() << "Failed to write token to pipe, write returned "
+ << Written << "\n");
+ }
+}
diff --git a/llvm/lib/Support/Windows/Jobserver.inc b/llvm/lib/Support/Windows/Jobserver.inc
new file mode 100644
index 0000000..79028ee
--- /dev/null
+++ b/llvm/lib/Support/Windows/Jobserver.inc
@@ -0,0 +1,79 @@
+//==- llvm/Support/Windows/Jobserver.inc - Windows Jobserver Impl -*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Windows-specific parts of the JobserverClient class.
+// On Windows, the jobserver is implemented using a named semaphore.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Windows/WindowsSupport.h"
+#include <atomic>
+#include <cassert>
+
+namespace llvm {
+/// The constructor for the Windows jobserver client. It attempts to open a
+/// handle to an existing named semaphore, the name of which is provided by
+/// GNU make in the --jobserver-auth argument. If the semaphore is opened
+/// successfully, the client is marked as initialized.
+JobserverClientImpl::JobserverClientImpl(const JobserverConfig &Config) {
+ Semaphore = (void *)::OpenSemaphoreA(SEMAPHORE_MODIFY_STATE | SYNCHRONIZE,
+ FALSE, Config.Path.c_str());
+ if (Semaphore != nullptr)
+ IsInitialized = true;
+}
+
+/// The destructor closes the handle to the semaphore, releasing the resource.
+JobserverClientImpl::~JobserverClientImpl() {
+ if (Semaphore != nullptr)
+ ::CloseHandle((HANDLE)Semaphore);
+}
+
+/// Tries to acquire a job slot. The first call always returns the implicit
+/// slot. Subsequent calls use a non-blocking wait on the semaphore
+/// (`WaitForSingleObject` with a timeout of 0). If the wait succeeds, the
+/// semaphore's count is decremented, and an explicit job slot is acquired.
+/// If the wait times out, it means no slots are available, and an invalid
+/// slot is returned.
+JobSlot JobserverClientImpl::tryAcquire() {
+ if (!IsInitialized)
+ return JobSlot();
+
+ // First, grant the implicit slot.
+ if (HasImplicitSlot.exchange(false, std::memory_order_acquire)) {
+ return JobSlot::createImplicit();
+ }
+
+ // Try to acquire a slot from the semaphore without blocking.
+ if (::WaitForSingleObject((HANDLE)Semaphore, 0) == WAIT_OBJECT_0) {
+ // The explicit token value is arbitrary on Windows, as the semaphore
+ // count is the real resource.
+ return JobSlot::createExplicit(1);
+ }
+
+ return JobSlot(); // Invalid slot
+}
+
+/// Releases a job slot back to the pool. If the slot is implicit, it simply
+/// resets a flag. For an explicit slot, it increments the semaphore's count
+/// by one using `ReleaseSemaphore`, making the slot available to other
+/// processes.
+void JobserverClientImpl::release(JobSlot Slot) {
+ if (!IsInitialized || !Slot.isValid())
+ return;
+
+ if (Slot.isImplicit()) {
+ [[maybe_unused]] bool was_already_released =
+ HasImplicitSlot.exchange(true, std::memory_order_release);
+ assert(!was_already_released && "Implicit slot released twice");
+ return;
+ }
+
+ // Release the slot by incrementing the semaphore count.
+ (void)::ReleaseSemaphore((HANDLE)Semaphore, 1, NULL);
+}
+} // namespace llvm
diff --git a/llvm/lib/TableGen/Error.cpp b/llvm/lib/TableGen/Error.cpp
index de0c4c9..3ba2c6c 100644
--- a/llvm/lib/TableGen/Error.cpp
+++ b/llvm/lib/TableGen/Error.cpp
@@ -19,10 +19,10 @@
#include "llvm/TableGen/Record.h"
#include <cstdlib>
-namespace llvm {
+using namespace llvm;
-SourceMgr SrcMgr;
-unsigned ErrorsPrinted = 0;
+SourceMgr llvm::SrcMgr;
+unsigned llvm::ErrorsPrinted = 0;
static void PrintMessage(ArrayRef<SMLoc> Locs, SourceMgr::DiagKind Kind,
const Twine &Msg) {
@@ -49,118 +49,118 @@ static void PrintMessage(ArrayRef<SMLoc> Locs, SourceMgr::DiagKind Kind,
// Functions to print notes.
-void PrintNote(const Twine &Msg) {
- WithColor::note() << Msg << "\n";
-}
+void llvm::PrintNote(const Twine &Msg) { WithColor::note() << Msg << "\n"; }
-void PrintNote(function_ref<void(raw_ostream &OS)> PrintMsg) {
+void llvm::PrintNote(function_ref<void(raw_ostream &OS)> PrintMsg) {
PrintMsg(WithColor::note());
}
-void PrintNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) {
+void llvm::PrintNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) {
PrintMessage(NoteLoc, SourceMgr::DK_Note, Msg);
}
// Functions to print fatal notes.
-void PrintFatalNote(const Twine &Msg) {
+void llvm::PrintFatalNote(const Twine &Msg) {
PrintNote(Msg);
fatal_exit();
}
-void PrintFatalNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) {
+void llvm::PrintFatalNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg) {
PrintNote(NoteLoc, Msg);
fatal_exit();
}
// This method takes a Record and uses the source location
// stored in it.
-void PrintFatalNote(const Record *Rec, const Twine &Msg) {
+void llvm::PrintFatalNote(const Record *Rec, const Twine &Msg) {
PrintNote(Rec->getLoc(), Msg);
fatal_exit();
}
// This method takes a RecordVal and uses the source location
// stored in it.
-void PrintFatalNote(const RecordVal *RecVal, const Twine &Msg) {
+void llvm::PrintFatalNote(const RecordVal *RecVal, const Twine &Msg) {
PrintNote(RecVal->getLoc(), Msg);
fatal_exit();
}
// Functions to print warnings.
-void PrintWarning(const Twine &Msg) { WithColor::warning() << Msg << "\n"; }
+void llvm::PrintWarning(const Twine &Msg) {
+ WithColor::warning() << Msg << "\n";
+}
-void PrintWarning(ArrayRef<SMLoc> WarningLoc, const Twine &Msg) {
+void llvm::PrintWarning(ArrayRef<SMLoc> WarningLoc, const Twine &Msg) {
PrintMessage(WarningLoc, SourceMgr::DK_Warning, Msg);
}
-void PrintWarning(const char *Loc, const Twine &Msg) {
+void llvm::PrintWarning(const char *Loc, const Twine &Msg) {
SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Warning, Msg);
}
// Functions to print errors.
-void PrintError(const Twine &Msg) { WithColor::error() << Msg << "\n"; }
+void llvm::PrintError(const Twine &Msg) { WithColor::error() << Msg << "\n"; }
-void PrintError(function_ref<void(raw_ostream &OS)> PrintMsg) {
+void llvm::PrintError(function_ref<void(raw_ostream &OS)> PrintMsg) {
PrintMsg(WithColor::error());
}
-void PrintError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) {
+void llvm::PrintError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) {
PrintMessage(ErrorLoc, SourceMgr::DK_Error, Msg);
}
-void PrintError(const char *Loc, const Twine &Msg) {
+void llvm::PrintError(const char *Loc, const Twine &Msg) {
SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Error, Msg);
}
// This method takes a Record and uses the source location
// stored in it.
-void PrintError(const Record *Rec, const Twine &Msg) {
+void llvm::PrintError(const Record *Rec, const Twine &Msg) {
PrintMessage(Rec->getLoc(), SourceMgr::DK_Error, Msg);
}
// This method takes a RecordVal and uses the source location
// stored in it.
-void PrintError(const RecordVal *RecVal, const Twine &Msg) {
+void llvm::PrintError(const RecordVal *RecVal, const Twine &Msg) {
PrintMessage(RecVal->getLoc(), SourceMgr::DK_Error, Msg);
}
// Functions to print fatal errors.
-void PrintFatalError(const Twine &Msg) {
+void llvm::PrintFatalError(const Twine &Msg) {
PrintError(Msg);
fatal_exit();
}
-void PrintFatalError(function_ref<void(raw_ostream &OS)> PrintMsg) {
+void llvm::PrintFatalError(function_ref<void(raw_ostream &OS)> PrintMsg) {
PrintError(PrintMsg);
fatal_exit();
}
-void PrintFatalError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) {
+void llvm::PrintFatalError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) {
PrintError(ErrorLoc, Msg);
fatal_exit();
}
// This method takes a Record and uses the source location
// stored in it.
-void PrintFatalError(const Record *Rec, const Twine &Msg) {
+void llvm::PrintFatalError(const Record *Rec, const Twine &Msg) {
PrintError(Rec->getLoc(), Msg);
fatal_exit();
}
// This method takes a RecordVal and uses the source location
// stored in it.
-void PrintFatalError(const RecordVal *RecVal, const Twine &Msg) {
+void llvm::PrintFatalError(const RecordVal *RecVal, const Twine &Msg) {
PrintError(RecVal->getLoc(), Msg);
fatal_exit();
}
// Check an assertion: Obtain the condition value and be sure it is true.
// If not, print a nonfatal error along with the message.
-bool CheckAssert(SMLoc Loc, const Init *Condition, const Init *Message) {
+bool llvm::CheckAssert(SMLoc Loc, const Init *Condition, const Init *Message) {
auto *CondValue = dyn_cast_or_null<IntInit>(Condition->convertInitializerTo(
IntRecTy::get(Condition->getRecordKeeper())));
if (!CondValue) {
@@ -178,11 +178,9 @@ bool CheckAssert(SMLoc Loc, const Init *Condition, const Init *Message) {
}
// Dump a message to stderr.
-void dumpMessage(SMLoc Loc, const Init *Message) {
+void llvm::dumpMessage(SMLoc Loc, const Init *Message) {
if (auto *MessageInit = dyn_cast<StringInit>(Message))
PrintNote(Loc, MessageInit->getValue());
else
PrintError(Loc, "dump value is not of type string");
}
-
-} // end namespace llvm
diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp
index f545706..42043f7 100644
--- a/llvm/lib/TableGen/Main.cpp
+++ b/llvm/lib/TableGen/Main.cpp
@@ -64,14 +64,12 @@ WriteIfChanged("write-if-changed", cl::desc("Only write output if it changed"));
static cl::opt<bool>
TimePhases("time-phases", cl::desc("Time phases of parser and backend"));
-namespace llvm {
-cl::opt<bool> EmitLongStrLiterals(
+cl::opt<bool> llvm::EmitLongStrLiterals(
"long-string-literals",
cl::desc("when emitting large string tables, prefer string literals over "
"comma-separated char literals. This can be a readability and "
"compile-time performance win, but upsets some compilers"),
cl::Hidden, cl::init(true));
-} // end namespace llvm
static cl::opt<bool> NoWarnOnUnusedTemplateArgs(
"no-warn-on-unused-template-args",
diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp
index 051a896..2ea3a24 100644
--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@@ -46,8 +46,7 @@ using namespace llvm;
// Context
//===----------------------------------------------------------------------===//
-namespace llvm {
-namespace detail {
+namespace llvm::detail {
/// This class represents the internal implementation of the RecordKeeper.
/// It contains all of the contextual static state of the Record classes. It is
/// kept out-of-line to simplify dependencies, and also make it easier for
@@ -100,8 +99,7 @@ struct RecordKeeperImpl {
void dumpAllocationStats(raw_ostream &OS) const;
};
-} // namespace detail
-} // namespace llvm
+} // namespace llvm::detail
void detail::RecordKeeperImpl::dumpAllocationStats(raw_ostream &OS) const {
// Dump memory allocation related stats.
diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp
index f928ded..3d31d8e 100644
--- a/llvm/lib/TableGen/TGParser.cpp
+++ b/llvm/lib/TableGen/TGParser.cpp
@@ -31,8 +31,6 @@ using namespace llvm;
// Support Code for the Semantic Actions.
//===----------------------------------------------------------------------===//
-namespace llvm {
-
RecordsEntry::RecordsEntry(std::unique_ptr<Record> Rec) : Rec(std::move(Rec)) {}
RecordsEntry::RecordsEntry(std::unique_ptr<ForeachLoop> Loop)
: Loop(std::move(Loop)) {}
@@ -41,6 +39,7 @@ RecordsEntry::RecordsEntry(std::unique_ptr<Record::AssertionInfo> Assertion)
RecordsEntry::RecordsEntry(std::unique_ptr<Record::DumpInfo> Dump)
: Dump(std::move(Dump)) {}
+namespace llvm {
struct SubClassReference {
SMRange RefRange;
const Record *Rec = nullptr;
@@ -61,6 +60,7 @@ struct SubMultiClassReference {
bool isInvalid() const { return MC == nullptr; }
void dump() const;
};
+} // end namespace llvm
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void SubMultiClassReference::dump() const {
@@ -74,8 +74,6 @@ LLVM_DUMP_METHOD void SubMultiClassReference::dump() const {
}
#endif
-} // end namespace llvm
-
static bool checkBitsConcrete(Record &R, const RecordVal &RV) {
const auto *BV = cast<BitsInit>(RV.getValue());
for (unsigned i = 0, e = BV->getNumBits(); i != e; ++i) {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 70d5ad7d..dc8e7c8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16461,7 +16461,7 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
- DAG.getConstant(Cnt, DL, MVT::i32));
+ DAG.getTargetConstant(Cnt, DL, MVT::i32));
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL,
MVT::i32),
@@ -16491,7 +16491,8 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
unsigned Opc =
(Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
- DAG.getConstant(Cnt, DL, MVT::i32), Op->getFlags());
+ DAG.getTargetConstant(Cnt, DL, MVT::i32),
+ Op->getFlags());
}
// Right shift register. Note, there is not a shift right register
@@ -19973,7 +19974,7 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
SDValue FixConv =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ResTy,
DAG.getConstant(IntrinsicOpcode, DL, MVT::i32),
- Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32));
+ Op->getOperand(0), DAG.getTargetConstant(C, DL, MVT::i32));
// We can handle smaller integers by generating an extra trunc.
if (IntBits < FloatBits)
FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv);
@@ -20696,7 +20697,7 @@ static SDValue performConcatVectorsCombine(SDNode *N,
N100 = DAG.getNode(AArch64ISD::NVCAST, DL, VT, N100);
SDValue Uzp = DAG.getNode(AArch64ISD::UZP2, DL, VT, N000, N100);
SDValue NewShiftConstant =
- DAG.getConstant(N001ConstVal - NScalarSize, DL, MVT::i32);
+ DAG.getTargetConstant(N001ConstVal - NScalarSize, DL, MVT::i32);
return DAG.getNode(AArch64ISD::VLSHR, DL, VT, Uzp, NewShiftConstant);
}
@@ -22373,14 +22374,14 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
Op = DAG.getNode(Opcode, DL, VT, Op,
- DAG.getSignedConstant(-ShiftAmount, DL, MVT::i32));
+ DAG.getSignedConstant(-ShiftAmount, DL, MVT::i32, true));
if (N->getValueType(0) == MVT::i64)
Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op,
DAG.getConstant(0, DL, MVT::i64));
return Op;
} else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
Op = DAG.getNode(Opcode, DL, VT, Op,
- DAG.getConstant(ShiftAmount, DL, MVT::i32));
+ DAG.getTargetConstant(ShiftAmount, DL, MVT::i32));
if (N->getValueType(0) == MVT::i64)
Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op,
DAG.getConstant(0, DL, MVT::i64));
@@ -23198,7 +23199,7 @@ static SDValue performZExtUZPCombine(SDNode *N, SelectionDAG &DAG) {
Op.getOperand(ExtOffset == 0 ? 0 : 1));
if (Shift != 0)
BC = DAG.getNode(AArch64ISD::VLSHR, DL, VT, BC,
- DAG.getConstant(Shift, DL, MVT::i32));
+ DAG.getTargetConstant(Shift, DL, MVT::i32));
return DAG.getNode(ISD::AND, DL, VT, BC, DAG.getConstant(Mask, DL, VT));
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 6ef0a95..09ce713 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -812,49 +812,49 @@ def fixedpoint_recip_f16_i64 : fixedpoint_recip_i64<f16>;
def fixedpoint_recip_f32_i64 : fixedpoint_recip_i64<f32>;
def fixedpoint_recip_f64_i64 : fixedpoint_recip_i64<f64>;
-def vecshiftR8 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftR8 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9);
}]> {
let EncoderMethod = "getVecShiftR8OpValue";
let DecoderMethod = "DecodeVecShiftR8Imm";
let ParserMatchClass = Imm1_8Operand;
}
-def vecshiftR16 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftR16 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17);
}]> {
let EncoderMethod = "getVecShiftR16OpValue";
let DecoderMethod = "DecodeVecShiftR16Imm";
let ParserMatchClass = Imm1_16Operand;
}
-def vecshiftR16Narrow : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftR16Narrow : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9);
}]> {
let EncoderMethod = "getVecShiftR16OpValue";
let DecoderMethod = "DecodeVecShiftR16ImmNarrow";
let ParserMatchClass = Imm1_8Operand;
}
-def vecshiftR32 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftR32 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33);
}]> {
let EncoderMethod = "getVecShiftR32OpValue";
let DecoderMethod = "DecodeVecShiftR32Imm";
let ParserMatchClass = Imm1_32Operand;
}
-def vecshiftR32Narrow : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftR32Narrow : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17);
}]> {
let EncoderMethod = "getVecShiftR32OpValue";
let DecoderMethod = "DecodeVecShiftR32ImmNarrow";
let ParserMatchClass = Imm1_16Operand;
}
-def vecshiftR64 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftR64 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 65);
}]> {
let EncoderMethod = "getVecShiftR64OpValue";
let DecoderMethod = "DecodeVecShiftR64Imm";
let ParserMatchClass = Imm1_64Operand;
}
-def vecshiftR64Narrow : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftR64Narrow : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33);
}]> {
let EncoderMethod = "getVecShiftR64OpValue";
@@ -862,37 +862,6 @@ def vecshiftR64Narrow : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm1_32Operand;
}
-// Same as vecshiftR#N, but use TargetConstant (TimmLeaf) instead of Constant
-// (ImmLeaf)
-def tvecshiftR8 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9);
-}]> {
- let EncoderMethod = "getVecShiftR8OpValue";
- let DecoderMethod = "DecodeVecShiftR8Imm";
- let ParserMatchClass = Imm1_8Operand;
-}
-def tvecshiftR16 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17);
-}]> {
- let EncoderMethod = "getVecShiftR16OpValue";
- let DecoderMethod = "DecodeVecShiftR16Imm";
- let ParserMatchClass = Imm1_16Operand;
-}
-def tvecshiftR32 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33);
-}]> {
- let EncoderMethod = "getVecShiftR32OpValue";
- let DecoderMethod = "DecodeVecShiftR32Imm";
- let ParserMatchClass = Imm1_32Operand;
-}
-def tvecshiftR64 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 65);
-}]> {
- let EncoderMethod = "getVecShiftR64OpValue";
- let DecoderMethod = "DecodeVecShiftR64Imm";
- let ParserMatchClass = Imm1_64Operand;
-}
-
def Imm0_0Operand : AsmImmRange<0, 0>;
def Imm0_1Operand : AsmImmRange<0, 1>;
def Imm1_1Operand : AsmImmRange<1, 1>;
@@ -904,28 +873,28 @@ def Imm0_15Operand : AsmImmRange<0, 15>;
def Imm0_31Operand : AsmImmRange<0, 31>;
def Imm0_63Operand : AsmImmRange<0, 63>;
-def vecshiftL8 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftL8 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) < 8);
}]> {
let EncoderMethod = "getVecShiftL8OpValue";
let DecoderMethod = "DecodeVecShiftL8Imm";
let ParserMatchClass = Imm0_7Operand;
}
-def vecshiftL16 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftL16 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) < 16);
}]> {
let EncoderMethod = "getVecShiftL16OpValue";
let DecoderMethod = "DecodeVecShiftL16Imm";
let ParserMatchClass = Imm0_15Operand;
}
-def vecshiftL32 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftL32 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) < 32);
}]> {
let EncoderMethod = "getVecShiftL32OpValue";
let DecoderMethod = "DecodeVecShiftL32Imm";
let ParserMatchClass = Imm0_31Operand;
}
-def vecshiftL64 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftL64 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) < 64);
}]> {
let EncoderMethod = "getVecShiftL64OpValue";
@@ -933,36 +902,6 @@ def vecshiftL64 : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm0_63Operand;
}
-// Same as vecshiftL#N, but use TargetConstant (TimmLeaf) instead of Constant
-// (ImmLeaf)
-def tvecshiftL8 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) < 8);
-}]> {
- let EncoderMethod = "getVecShiftL8OpValue";
- let DecoderMethod = "DecodeVecShiftL8Imm";
- let ParserMatchClass = Imm0_7Operand;
-}
-def tvecshiftL16 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) < 16);
-}]> {
- let EncoderMethod = "getVecShiftL16OpValue";
- let DecoderMethod = "DecodeVecShiftL16Imm";
- let ParserMatchClass = Imm0_15Operand;
-}
-def tvecshiftL32 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) < 32);
-}]> {
- let EncoderMethod = "getVecShiftL32OpValue";
- let DecoderMethod = "DecodeVecShiftL32Imm";
- let ParserMatchClass = Imm0_31Operand;
-}
-def tvecshiftL64 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) < 64);
-}]> {
- let EncoderMethod = "getVecShiftL64OpValue";
- let DecoderMethod = "DecodeVecShiftL64Imm";
- let ParserMatchClass = Imm0_63Operand;
-}
// Crazy immediate formats used by 32-bit and 64-bit logical immediate
// instructions for splatting repeating bit patterns across the immediate.
@@ -10232,7 +10171,7 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm,
def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
V64, V64, vecshiftR16,
asm, ".4h", ".4h",
- [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (i32 imm:$imm)))]> {
+ [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (i32 vecshiftR16:$imm)))]> {
bits<4> imm;
let Inst{19-16} = imm;
}
@@ -10240,15 +10179,16 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm,
def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
V128, V128, vecshiftR16,
asm, ".8h", ".8h",
- [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (i32 imm:$imm)))]> {
+ [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (i32 vecshiftR16:$imm)))]> {
bits<4> imm;
let Inst{19-16} = imm;
}
} // Predicates = [HasNEON, HasFullFP16]
+
def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
V64, V64, vecshiftR32,
asm, ".2s", ".2s",
- [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (i32 imm:$imm)))]> {
+ [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (i32 vecshiftR32:$imm)))]> {
bits<5> imm;
let Inst{20-16} = imm;
}
@@ -10256,7 +10196,7 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm,
def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?},
V128, V128, vecshiftR32,
asm, ".4s", ".4s",
- [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (i32 imm:$imm)))]> {
+ [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (i32 vecshiftR32:$imm)))]> {
bits<5> imm;
let Inst{20-16} = imm;
}
@@ -10264,7 +10204,7 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm,
def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?},
V128, V128, vecshiftR64,
asm, ".2d", ".2d",
- [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (i32 imm:$imm)))]> {
+ [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (i32 vecshiftR64:$imm)))]> {
bits<6> imm;
let Inst{21-16} = imm;
}
@@ -10276,7 +10216,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm,
def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
V64, V64, vecshiftR16,
asm, ".4h", ".4h",
- [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (i32 imm:$imm)))]> {
+ [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (i32 vecshiftR16:$imm)))]> {
bits<4> imm;
let Inst{19-16} = imm;
}
@@ -10284,7 +10224,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm,
def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
V128, V128, vecshiftR16,
asm, ".8h", ".8h",
- [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (i32 imm:$imm)))]> {
+ [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (i32 vecshiftR16:$imm)))]> {
bits<4> imm;
let Inst{19-16} = imm;
}
@@ -10293,7 +10233,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm,
def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
V64, V64, vecshiftR32,
asm, ".2s", ".2s",
- [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (i32 imm:$imm)))]> {
+ [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (i32 vecshiftR32:$imm)))]> {
bits<5> imm;
let Inst{20-16} = imm;
}
@@ -10301,7 +10241,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm,
def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?},
V128, V128, vecshiftR32,
asm, ".4s", ".4s",
- [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (i32 imm:$imm)))]> {
+ [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (i32 vecshiftR32:$imm)))]> {
bits<5> imm;
let Inst{20-16} = imm;
}
@@ -10309,7 +10249,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm,
def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?},
V128, V128, vecshiftR64,
asm, ".2d", ".2d",
- [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (i32 imm:$imm)))]> {
+ [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (i32 vecshiftR64:$imm)))]> {
bits<6> imm;
let Inst{21-16} = imm;
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 96cc3f3..3e55b76 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2957,9 +2957,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
// Need special instructions for atomics that affect ordering.
- if (Order != AtomicOrdering::NotAtomic &&
- Order != AtomicOrdering::Unordered &&
- Order != AtomicOrdering::Monotonic) {
+ if (isStrongerThanMonotonic(Order)) {
assert(!isa<GZExtLoad>(LdSt));
assert(MemSizeInBytes <= 8 &&
"128-bit atomics should already be custom-legalized");
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 6025f1c..63313da 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -556,8 +556,7 @@ void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
unsigned NewOpc =
Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
MachineIRBuilder MIB(MI);
- auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm);
- MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef});
+ MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1)}).addImm(Imm);
MI.eraseFromParent();
}
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 539470d..be44b8f 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -4967,7 +4967,7 @@ multiclass sme2_movaz_array_to_vec_vg4_multi<string mnemonic> {
//===----------------------------------------------------------------------===//
// SME2 multi-vec saturating shift right narrow
class sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u>
- : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, tvecshiftR16:$imm4),
+ : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, vecshiftR16:$imm4),
mnemonic, "\t$Zd, $Zn, $imm4",
"", []>, Sched<[]> {
bits<4> imm4;
@@ -4985,7 +4985,7 @@ class sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u>
multiclass sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u, SDPatternOperator intrinsic> {
def _H : sme2_sat_shift_vector_vg2<mnemonic, op, u>;
- def : SME2_Sat_Shift_VG2_Pat<NAME # _H, intrinsic, nxv8i16, nxv4i32, tvecshiftR16>;
+ def : SME2_Sat_Shift_VG2_Pat<NAME # _H, intrinsic, nxv8i16, nxv4i32, vecshiftR16>;
}
class sme2_sat_shift_vector_vg4<bits<2> sz, bits<3> op, ZPRRegOp zpr_ty,
@@ -5008,20 +5008,20 @@ class sme2_sat_shift_vector_vg4<bits<2> sz, bits<3> op, ZPRRegOp zpr_ty,
}
multiclass sme2_sat_shift_vector_vg4<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
- def _B : sme2_sat_shift_vector_vg4<{0,1}, op, ZPR8, ZZZZ_s_mul_r, tvecshiftR32,
+ def _B : sme2_sat_shift_vector_vg4<{0,1}, op, ZPR8, ZZZZ_s_mul_r, vecshiftR32,
mnemonic>{
bits<5> imm;
let Inst{20-16} = imm;
}
- def _H : sme2_sat_shift_vector_vg4<{1,?}, op, ZPR16, ZZZZ_d_mul_r, tvecshiftR64,
+ def _H : sme2_sat_shift_vector_vg4<{1,?}, op, ZPR16, ZZZZ_d_mul_r, vecshiftR64,
mnemonic> {
bits<6> imm;
let Inst{22} = imm{5};
let Inst{20-16} = imm{4-0};
}
- def : SME2_Sat_Shift_VG4_Pat<NAME # _B, intrinsic, nxv16i8, nxv4i32, tvecshiftR32>;
- def : SME2_Sat_Shift_VG4_Pat<NAME # _H, intrinsic, nxv8i16, nxv2i64, tvecshiftR64>;
+ def : SME2_Sat_Shift_VG4_Pat<NAME # _B, intrinsic, nxv16i8, nxv4i32, vecshiftR32>;
+ def : SME2_Sat_Shift_VG4_Pat<NAME # _H, intrinsic, nxv8i16, nxv2i64, vecshiftR64>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 9a23c35..3cdd505 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -4436,9 +4436,9 @@ multiclass sve2_bitwise_shift_left_long<bits<2> opc, string asm,
ZPR64, ZPR32, vecshiftL32> {
let Inst{20-19} = imm{4-3};
}
- def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _H)>;
- def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _S)>;
- def : SVE_2_Op_Imm_Pat<nxv2i64, op, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _D)>;
+ def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv16i8, i32, vecshiftL8, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv8i16, i32, vecshiftL16, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Imm_Pat<nxv2i64, op, nxv4i32, i32, vecshiftL32, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -4481,10 +4481,10 @@ multiclass sve2_int_bin_shift_imm_left<bit opc, string asm,
let Inst{20-19} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftL64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, vecshiftL8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, vecshiftL16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, vecshiftL32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, vecshiftL64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve2_int_bin_shift_imm_right<bit opc, string asm,
@@ -4501,10 +4501,10 @@ multiclass sve2_int_bin_shift_imm_right<bit opc, string asm,
let Inst{20-19} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>;
}
class sve2_int_bin_accum_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
@@ -4546,10 +4546,10 @@ multiclass sve2_int_bin_accum_shift_imm_right<bits<2> opc, string asm,
let Inst{20-19} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>;
def : SVE_Shift_Add_All_Active_Pat<nxv16i8, shift_op, nxv16i1, nxv16i8, nxv16i8, i32, !cast<Instruction>(NAME # _B)>;
def : SVE_Shift_Add_All_Active_Pat<nxv8i16, shift_op, nxv8i1, nxv8i16, nxv8i16, i32, !cast<Instruction>(NAME # _H)>;
@@ -4676,18 +4676,18 @@ class sve2_int_bin_shift_imm_narrow_bottom<bits<3> tsz8_64, bits<3> opc,
multiclass sve2_int_bin_shift_imm_right_narrow_bottom<bits<3> opc, string asm,
SDPatternOperator op> {
def _B : sve2_int_bin_shift_imm_narrow_bottom<{0,0,1}, opc, asm, ZPR8, ZPR16,
- tvecshiftR8>;
+ vecshiftR8>;
def _H : sve2_int_bin_shift_imm_narrow_bottom<{0,1,?}, opc, asm, ZPR16, ZPR32,
- tvecshiftR16> {
+ vecshiftR16> {
let Inst{19} = imm{3};
}
def _S : sve2_int_bin_shift_imm_narrow_bottom<{1,?,?}, opc, asm, ZPR32, ZPR64,
- tvecshiftR32> {
+ vecshiftR32> {
let Inst{20-19} = imm{4-3};
}
- def : SVE_2_Op_Imm_Pat<nxv16i8, op, nxv8i16, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv4i32, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv2i64, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Imm_Pat<nxv16i8, op, nxv8i16, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv4i32, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv2i64, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
}
class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc,
@@ -4717,18 +4717,18 @@ class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc,
multiclass sve2_int_bin_shift_imm_right_narrow_top<bits<3> opc, string asm,
SDPatternOperator op> {
def _B : sve2_int_bin_shift_imm_narrow_top<{0,0,1}, opc, asm, ZPR8, ZPR16,
- tvecshiftR8>;
+ vecshiftR8>;
def _H : sve2_int_bin_shift_imm_narrow_top<{0,1,?}, opc, asm, ZPR16, ZPR32,
- tvecshiftR16> {
+ vecshiftR16> {
let Inst{19} = imm{3};
}
def _S : sve2_int_bin_shift_imm_narrow_top<{1,?,?}, opc, asm, ZPR32, ZPR64,
- tvecshiftR32> {
+ vecshiftR32> {
let Inst{20-19} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv8i16, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv4i32, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv2i64, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv8i16, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv4i32, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv2i64, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
}
class sve2_int_addsub_narrow_high_bottom<bits<2> sz, bits<2> opc, string asm,
@@ -5461,10 +5461,10 @@ multiclass sve2_int_rotate_right_imm<string asm, SDPatternOperator op> {
let Inst{20-19} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -6443,10 +6443,10 @@ multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm, string Ps,
let Inst{9-8} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, tvecshiftL64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, vecshiftL8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, vecshiftL16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, vecshiftL32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, vecshiftL64, !cast<Instruction>(NAME # _D)>;
}
// As above but shift amount takes the form of a "vector immediate".
@@ -6460,15 +6460,15 @@ multiclass sve_int_bin_pred_shift_imm_left_dup<bits<4> opc, string asm,
}
multiclass sve_int_bin_pred_shift_imm_left_zeroing_bhsd<SDPatternOperator op> {
- def _B_ZERO : PredTwoOpImmPseudo<NAME # _B, ZPR8, tvecshiftL8, FalseLanesZero>;
- def _H_ZERO : PredTwoOpImmPseudo<NAME # _H, ZPR16, tvecshiftL16, FalseLanesZero>;
- def _S_ZERO : PredTwoOpImmPseudo<NAME # _S, ZPR32, tvecshiftL32, FalseLanesZero>;
- def _D_ZERO : PredTwoOpImmPseudo<NAME # _D, ZPR64, tvecshiftL64, FalseLanesZero>;
+ def _B_ZERO : PredTwoOpImmPseudo<NAME # _B, ZPR8, vecshiftL8, FalseLanesZero>;
+ def _H_ZERO : PredTwoOpImmPseudo<NAME # _H, ZPR16, vecshiftL16, FalseLanesZero>;
+ def _S_ZERO : PredTwoOpImmPseudo<NAME # _S, ZPR32, vecshiftL32, FalseLanesZero>;
+ def _D_ZERO : PredTwoOpImmPseudo<NAME # _D, ZPR64, vecshiftL64, FalseLanesZero>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, tvecshiftL8, !cast<Pseudo>(NAME # _B_ZERO)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, tvecshiftL16, !cast<Pseudo>(NAME # _H_ZERO)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, tvecshiftL32, !cast<Pseudo>(NAME # _S_ZERO)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, tvecshiftL64, !cast<Pseudo>(NAME # _D_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, vecshiftL8, !cast<Pseudo>(NAME # _B_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, vecshiftL16, !cast<Pseudo>(NAME # _H_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, vecshiftL32, !cast<Pseudo>(NAME # _S_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, vecshiftL64, !cast<Pseudo>(NAME # _D_ZERO)>;
}
multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm, string Ps,
@@ -6489,10 +6489,10 @@ multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm, string Ps,
let Inst{9-8} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>;
}
// As above but shift amount takes the form of a "vector immediate".
@@ -6511,10 +6511,10 @@ multiclass sve_int_bin_pred_shift_imm_right_zeroing_bhsd<SDPatternOperator op =
def _S_ZERO : PredTwoOpImmPseudo<NAME # _S, ZPR32, vecshiftR32, FalseLanesZero>;
def _D_ZERO : PredTwoOpImmPseudo<NAME # _D, ZPR64, vecshiftR64, FalseLanesZero>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, tvecshiftR8, !cast<Pseudo>(NAME # _B_ZERO)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, tvecshiftR16, !cast<Pseudo>(NAME # _H_ZERO)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, tvecshiftR32, !cast<Pseudo>(NAME # _S_ZERO)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, tvecshiftR64, !cast<Pseudo>(NAME # _D_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, vecshiftR8, !cast<Pseudo>(NAME # _B_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, vecshiftR16, !cast<Pseudo>(NAME # _H_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, vecshiftR32, !cast<Pseudo>(NAME # _S_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, vecshiftR64, !cast<Pseudo>(NAME # _D_ZERO)>;
}
class sve_int_bin_pred_shift<bits<2> sz8_64, bit wide, bits<3> opc,
@@ -10031,7 +10031,7 @@ multiclass sve2p1_multi_vec_extract_narrow<string mnemonic, bits<2> opc, SDPatte
// SVE2 multi-vec shift narrow
class sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, bits<2> tsz>
- : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, tvecshiftR16:$imm4),
+ : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, vecshiftR16:$imm4),
mnemonic, "\t$Zd, $Zn, $imm4",
"", []>, Sched<[]> {
bits<5> Zd;
@@ -10055,7 +10055,7 @@ class sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, bits<2> tsz>
multiclass sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, SDPatternOperator intrinsic> {
def NAME : sve2p1_multi_vec_shift_narrow<mnemonic, opc, 0b01>;
- def : SVE2p1_Sat_Shift_VG2_Pat<NAME, intrinsic, nxv8i16, nxv4i32, tvecshiftR16>;
+ def : SVE2p1_Sat_Shift_VG2_Pat<NAME, intrinsic, nxv8i16, nxv4i32, vecshiftR16>;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 2ba3156..9dd64e0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -131,10 +131,8 @@ static bool isDSAddress(const Constant *C) {
return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
}
-/// Returns true if the function requires the implicit argument be passed
-/// regardless of the function contents.
-static bool funcRequiresHostcallPtr(const Function &F) {
- // Sanitizers require the hostcall buffer passed in the implicit arguments.
+/// Returns true if sanitizer attributes are present on a function.
+static bool hasSanitizerAttributes(const Function &F) {
return F.hasFnAttribute(Attribute::SanitizeAddress) ||
F.hasFnAttribute(Attribute::SanitizeThread) ||
F.hasFnAttribute(Attribute::SanitizeMemory) ||
@@ -469,15 +467,21 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
// If the function requires the implicit arg pointer due to sanitizers,
// assume it's needed even if explicitly marked as not requiring it.
- const bool NeedsHostcall = funcRequiresHostcallPtr(*F);
- if (NeedsHostcall) {
+ // Flat scratch initialization is needed because `asan_malloc_impl`
+ // calls introduced later in pipeline will have flat scratch accesses.
+ // FIXME: FLAT_SCRATCH_INIT will not be required here if device-libs
+ // implementation for `asan_malloc_impl` is updated.
+ const bool HasSanitizerAttrs = hasSanitizerAttributes(*F);
+ if (HasSanitizerAttrs) {
removeAssumedBits(IMPLICIT_ARG_PTR);
removeAssumedBits(HOSTCALL_PTR);
+ removeAssumedBits(FLAT_SCRATCH_INIT);
}
for (auto Attr : ImplicitAttrs) {
- if (NeedsHostcall &&
- (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
+ if (HasSanitizerAttrs &&
+ (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR ||
+ Attr.first == FLAT_SCRATCH_INIT))
continue;
if (F->hasFnAttribute(Attr.second))
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 82789bc..90c828b 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -932,7 +932,9 @@ static MachineOperand *lookUpCopyChain(const SIInstrInfo &TII,
for (MachineInstr *SubDef = MRI.getVRegDef(SrcReg);
SubDef && TII.isFoldableCopy(*SubDef);
SubDef = MRI.getVRegDef(Sub->getReg())) {
- MachineOperand &SrcOp = SubDef->getOperand(1);
+ unsigned SrcIdx = TII.getFoldableCopySrcIdx(*SubDef);
+ MachineOperand &SrcOp = SubDef->getOperand(SrcIdx);
+
if (SrcOp.isImm())
return &SrcOp;
if (!SrcOp.isReg() || SrcOp.getReg().isPhysical())
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 79876ff..e233457 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -18860,31 +18860,6 @@ SITargetLowering::getTargetMMOFlags(const Instruction &I) const {
return Flags;
}
-bool SITargetLowering::checkForPhysRegDependency(
- SDNode *Def, SDNode *User, unsigned Op, const TargetRegisterInfo *TRI,
- const TargetInstrInfo *TII, MCRegister &PhysReg, int &Cost) const {
- if (User->getOpcode() != ISD::CopyToReg)
- return false;
- if (!Def->isMachineOpcode())
- return false;
- MachineSDNode *MDef = dyn_cast<MachineSDNode>(Def);
- if (!MDef)
- return false;
-
- unsigned ResNo = User->getOperand(Op).getResNo();
- if (User->getOperand(Op)->getValueType(ResNo) != MVT::i1)
- return false;
- const MCInstrDesc &II = TII->get(MDef->getMachineOpcode());
- if (II.isCompare() && II.hasImplicitDefOfPhysReg(AMDGPU::SCC)) {
- PhysReg = AMDGPU::SCC;
- const TargetRegisterClass *RC =
- TRI->getMinimalPhysRegClass(PhysReg, Def->getSimpleValueType(ResNo));
- Cost = RC->expensiveOrImpossibleToCopy() ? -1 : RC->getCopyCost();
- return true;
- }
- return false;
-}
-
void SITargetLowering::emitExpandAtomicAddrSpacePredicate(
Instruction *AI) const {
// Given: atomicrmw fadd ptr %addr, float %val ordering
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index a474dab..74e58f4 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -561,11 +561,6 @@ public:
bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const;
bool denormalsEnabledForType(LLT Ty, const MachineFunction &MF) const;
- bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
- const TargetRegisterInfo *TRI,
- const TargetInstrInfo *TII,
- MCRegister &PhysReg, int &Cost) const override;
-
bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts,
const SelectionDAG &DAG, bool SNaN = false,
unsigned Depth = 0) const override;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index cda8069..46757cf 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3433,6 +3433,32 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
}
}
+unsigned SIInstrInfo::getFoldableCopySrcIdx(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case AMDGPU::V_MOV_B16_t16_e32:
+ case AMDGPU::V_MOV_B16_t16_e64:
+ return 2;
+ case AMDGPU::V_MOV_B32_e32:
+ case AMDGPU::V_MOV_B32_e64:
+ case AMDGPU::V_MOV_B64_PSEUDO:
+ case AMDGPU::V_MOV_B64_e32:
+ case AMDGPU::V_MOV_B64_e64:
+ case AMDGPU::S_MOV_B32:
+ case AMDGPU::S_MOV_B64:
+ case AMDGPU::S_MOV_B64_IMM_PSEUDO:
+ case AMDGPU::COPY:
+ case AMDGPU::WWM_COPY:
+ case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
+ case AMDGPU::V_ACCVGPR_READ_B32_e64:
+ case AMDGPU::V_ACCVGPR_MOV_B32:
+ case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
+ case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
+ return 1;
+ default:
+ llvm_unreachable("MI is not a foldable copy");
+ }
+}
+
static constexpr AMDGPU::OpName ModifierOpNames[] = {
AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index a21089f..cc59acf 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -417,6 +417,7 @@ public:
const MachineInstr &MIb) const override;
static bool isFoldableCopy(const MachineInstr &MI);
+ static unsigned getFoldableCopySrcIdx(const MachineInstr &MI);
void removeModOperands(MachineInstr &MI) const;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 0040504..a94e131 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -359,6 +359,8 @@ HexagonTargetLowering::initializeHVXLowering() {
setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand);
setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand);
setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand);
+ setCondCodeAction(ISD::SETUO, MVT::v64f16, Expand);
+ setCondCodeAction(ISD::SETO, MVT::v64f16, Expand);
setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand);
setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand);
@@ -372,6 +374,8 @@ HexagonTargetLowering::initializeHVXLowering() {
setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand);
setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand);
setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand);
+ setCondCodeAction(ISD::SETUO, MVT::v32f32, Expand);
+ setCondCodeAction(ISD::SETO, MVT::v32f32, Expand);
// Boolean vectors.
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 7d4535a..b37b740 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -1560,7 +1560,7 @@ static MCRegister getRVVBaseRegister(const RISCVRegisterInfo &TRI,
MCRegister BaseReg = TRI.getSubReg(Reg, RISCV::sub_vrm1_0);
// If it's not a grouped vector register, it doesn't have subregister, so
// the base register is just itself.
- if (BaseReg == RISCV::NoRegister)
+ if (!BaseReg.isValid())
BaseReg = Reg;
return BaseReg;
}
diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td
index cf6f83a..7f5d0af 100644
--- a/llvm/lib/Target/RISCV/RISCVGISel.td
+++ b/llvm/lib/Target/RISCV/RISCVGISel.td
@@ -126,13 +126,6 @@ let Predicates = [HasAtomicLdSt, IsRV64] in {
// RV64 i32 patterns not used by SelectionDAG
//===----------------------------------------------------------------------===//
-def uimm5i32 : ImmLeaf<i32, [{return isUInt<5>(Imm);}]>;
-
-def zext_is_sext : PatFrag<(ops node:$src), (zext node:$src), [{
- KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0), 0);
- return Known.isNonNegative();
-}]>;
-
let Predicates = [IsRV64] in {
def : LdPat<extloadi8, LBU, i32>; // Prefer unsigned due to no c.lb in Zcb.
def : LdPat<extloadi16, LH, i32>;
@@ -140,15 +133,10 @@ def : LdPat<extloadi16, LH, i32>;
def : StPat<truncstorei8, SB, GPR, i32>;
def : StPat<truncstorei16, SH, GPR, i32>;
-def : Pat<(anyext (i32 GPR:$src)), (COPY GPR:$src)>;
def : Pat<(sext (i32 GPR:$src)), (ADDIW GPR:$src, 0)>;
-def : Pat<(i32 (trunc GPR:$src)), (COPY GPR:$src)>;
def : Pat<(sext_inreg (i64 (add GPR:$rs1, simm12_lo:$imm)), i32),
(ADDIW GPR:$rs1, simm12_lo:$imm)>;
-
-// Use sext if the sign bit of the input is 0.
-def : Pat<(zext_is_sext (i32 GPR:$src)), (ADDIW GPR:$src, 0)>;
}
let Predicates = [IsRV64, NoStdExtZba] in
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 6a6ead2..cf8d120 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -128,7 +128,7 @@ static bool hasUndefinedPassthru(const MachineInstr &MI) {
// All undefined passthrus should be $noreg: see
// RISCVDAGToDAGISel::doPeepholeNoRegPassThru
const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
- return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef();
+ return !UseMO.getReg().isValid() || UseMO.isUndef();
}
/// Return true if \p MI is a copy that will be lowered to one or more vmvNr.vs.
@@ -1454,7 +1454,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
Register Reg = VLOp.getReg();
// Erase the AVL operand from the instruction.
- VLOp.setReg(RISCV::NoRegister);
+ VLOp.setReg(Register());
VLOp.setIsKill(false);
if (LIS) {
LiveInterval &LI = LIS->getInterval(Reg);
@@ -1663,7 +1663,7 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
if (!MO.isReg() || !MO.getReg().isVirtual())
return;
Register OldVLReg = MO.getReg();
- MO.setReg(RISCV::NoRegister);
+ MO.setReg(Register());
if (LIS)
LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 1e6b04f8..7db4832 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1364,7 +1364,7 @@ void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
RS->scavengeRegisterBackwards(RISCV::GPRRegClass, MI.getIterator(),
/*RestoreAfter=*/false, /*SpAdj=*/0,
/*AllowSpill=*/false);
- if (TmpGPR != RISCV::NoRegister)
+ if (TmpGPR.isValid())
RS->setRegUsed(TmpGPR);
else {
// The case when there is no scavenged register needs special handling.
@@ -3021,7 +3021,7 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
ErrInfo = "Invalid operand type for VL operand";
return false;
}
- if (Op.isReg() && Op.getReg() != RISCV::NoRegister) {
+ if (Op.isReg() && Op.getReg().isValid()) {
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
auto *RC = MRI.getRegClass(Op.getReg());
if (!RISCV::GPRRegClass.hasSubClassEq(RC)) {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td
index 1674c95..1dd7332 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td
@@ -26,7 +26,7 @@ class LAQ_r<bit aq, bit rl, bits<3> funct3, string opcodestr>
let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
class SRL_r<bit aq, bit rl, bits<3> funct3, string opcodestr>
: RVInstRAtomic<0b00111, aq, rl, funct3, OPC_AMO,
- (outs ), (ins GPRMemZeroOffset:$rs1, GPR:$rs2),
+ (outs), (ins GPR:$rs2, GPRMemZeroOffset:$rs1),
opcodestr, "$rs2, $rs1"> {
let rd = 0;
}
@@ -71,7 +71,7 @@ class PatLAQ<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT>
// while atomic_store has data, addr
class PatSRL<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT>
: Pat<(OpNode (vt GPR:$rs2), (XLenVT GPRMemZeroOffset:$rs1)),
- (Inst GPRMemZeroOffset:$rs1, GPR:$rs2)>;
+ (Inst GPR:$rs2, GPRMemZeroOffset:$rs1)>;
let Predicates = [HasStdExtZalasr] in {
diff --git a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
index f8d33ae..54569b1 100644
--- a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
@@ -259,7 +259,7 @@ static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) {
if (isCompressibleLoad(MI) || isCompressibleStore(MI)) {
const MachineOperand &MOImm = MI.getOperand(2);
if (!MOImm.isImm())
- return RegImmPair(RISCV::NoRegister, 0);
+ return RegImmPair(Register(), 0);
int64_t Offset = MOImm.getImm();
int64_t NewBaseAdjust = getBaseAdjustForCompression(Offset, Opcode);
@@ -292,7 +292,7 @@ static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) {
}
}
}
- return RegImmPair(RISCV::NoRegister, 0);
+ return RegImmPair(Register(), 0);
}
// Check all uses after FirstMI of the given register, keeping a vector of
diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index ffba284..fdf9a4f 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -382,7 +382,7 @@ bool RISCVVectorPeephole::convertAllOnesVMergeToVMv(MachineInstr &MI) const {
// vmv.v.v doesn't have a mask operand, so we may be able to inflate the
// register class for the destination and passthru operands e.g. VRNoV0 -> VR
MRI->recomputeRegClass(MI.getOperand(0).getReg());
- if (MI.getOperand(1).getReg() != RISCV::NoRegister)
+ if (MI.getOperand(1).getReg().isValid())
MRI->recomputeRegClass(MI.getOperand(1).getReg());
return true;
}
@@ -448,7 +448,7 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) {
Register FalseReg = MI.getOperand(2).getReg();
if (TruePassthruReg != FalseReg) {
// If True's passthru is undef see if we can change it to False
- if (TruePassthruReg != RISCV::NoRegister ||
+ if (TruePassthruReg.isValid() ||
!MRI->hasOneUse(MI.getOperand(3).getReg()) ||
!ensureDominates(MI.getOperand(2), *True))
return false;
@@ -467,7 +467,7 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) {
// vmv.v.v doesn't have a mask operand, so we may be able to inflate the
// register class for the destination and passthru operands e.g. VRNoV0 -> VR
MRI->recomputeRegClass(MI.getOperand(0).getReg());
- if (MI.getOperand(1).getReg() != RISCV::NoRegister)
+ if (MI.getOperand(1).getReg().isValid())
MRI->recomputeRegClass(MI.getOperand(1).getReg());
return true;
}
@@ -517,7 +517,7 @@ bool RISCVVectorPeephole::convertToUnmasked(MachineInstr &MI) const {
if (RISCVII::isFirstDefTiedToFirstUse(MaskedMCID)) {
unsigned PassthruOpIdx = MI.getNumExplicitDefs();
if (HasPassthru) {
- if (MI.getOperand(PassthruOpIdx).getReg() != RISCV::NoRegister)
+ if (MI.getOperand(PassthruOpIdx).getReg())
MRI->recomputeRegClass(MI.getOperand(PassthruOpIdx).getReg());
} else
MI.removeOperand(PassthruOpIdx);
@@ -576,7 +576,7 @@ static bool dominates(MachineBasicBlock::const_iterator A,
bool RISCVVectorPeephole::ensureDominates(const MachineOperand &MO,
MachineInstr &Src) const {
assert(MO.getParent()->getParent() == Src.getParent());
- if (!MO.isReg() || MO.getReg() == RISCV::NoRegister)
+ if (!MO.isReg() || !MO.getReg().isValid())
return true;
MachineInstr *Def = MRI->getVRegDef(MO.getReg());
@@ -593,7 +593,7 @@ bool RISCVVectorPeephole::ensureDominates(const MachineOperand &MO,
bool RISCVVectorPeephole::foldUndefPassthruVMV_V_V(MachineInstr &MI) {
if (RISCV::getRVVMCOpcode(MI.getOpcode()) != RISCV::VMV_V_V)
return false;
- if (MI.getOperand(1).getReg() != RISCV::NoRegister)
+ if (MI.getOperand(1).getReg().isValid())
return false;
// If the input was a pseudo with a policy operand, we can give it a tail
@@ -654,7 +654,7 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
// Src needs to have the same passthru as VMV_V_V
MachineOperand &SrcPassthru = Src->getOperand(Src->getNumExplicitDefs());
- if (SrcPassthru.getReg() != RISCV::NoRegister &&
+ if (SrcPassthru.getReg().isValid() &&
SrcPassthru.getReg() != Passthru.getReg())
return false;
@@ -672,7 +672,7 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
if (SrcPassthru.getReg() != Passthru.getReg()) {
SrcPassthru.setReg(Passthru.getReg());
// If Src is masked then its passthru needs to be in VRNoV0.
- if (Passthru.getReg() != RISCV::NoRegister)
+ if (Passthru.getReg().isValid())
MRI->constrainRegClass(
Passthru.getReg(),
TII->getRegClass(Src->getDesc(), SrcPassthru.getOperandNo(), TRI));
diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp
index 7505507..ebd957c 100644
--- a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp
@@ -188,8 +188,31 @@ class SPIRVLegalizePointerCast : public FunctionPass {
FixedVectorType *SrcType = cast<FixedVectorType>(Src->getType());
FixedVectorType *DstType =
cast<FixedVectorType>(GR->findDeducedElementType(Dst));
- assert(DstType->getNumElements() >= SrcType->getNumElements());
+ auto dstNumElements = DstType->getNumElements();
+ auto srcNumElements = SrcType->getNumElements();
+
+ // if the element type differs, it is a bitcast.
+ if (DstType->getElementType() != SrcType->getElementType()) {
+ // Support bitcast between vectors of different sizes only if
+ // the total bitwidth is the same.
+ auto dstBitWidth =
+ DstType->getElementType()->getScalarSizeInBits() * dstNumElements;
+ auto srcBitWidth =
+ SrcType->getElementType()->getScalarSizeInBits() * srcNumElements;
+ assert(dstBitWidth == srcBitWidth &&
+ "Unsupported bitcast between vectors of different sizes.");
+
+ Src =
+ B.CreateIntrinsic(Intrinsic::spv_bitcast, {DstType, SrcType}, {Src});
+ buildAssignType(B, DstType, Src);
+ SrcType = DstType;
+
+ StoreInst *SI = B.CreateStore(Src, Dst);
+ SI->setAlignment(Alignment);
+ return SI;
+ }
+ assert(DstType->getNumElements() >= SrcType->getNumElements());
LoadInst *LI = B.CreateLoad(DstType, Dst);
LI->setAlignment(Alignment);
Value *OldValues = LI;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 02b20b3..931a10b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -13783,10 +13783,12 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// so prevents folding a load into this instruction or making a copy.
const int UnpackLoMask[] = {0, 0, 1, 1};
const int UnpackHiMask[] = {2, 2, 3, 3};
- if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2))
- Mask = UnpackLoMask;
- else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2))
- Mask = UnpackHiMask;
+ if (!isSingleElementRepeatedMask(Mask)) {
+ if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2))
+ Mask = UnpackLoMask;
+ else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2))
+ Mask = UnpackHiMask;
+ }
return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index ddb95a4..faeab95 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -29,6 +29,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/MemoryProfileInfo.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -40,6 +41,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/InterleavedRange.h"
+#include "llvm/Support/SHA1.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
@@ -60,6 +62,9 @@ STATISTIC(FunctionClonesThinBackend,
"Number of function clones created during ThinLTO backend");
STATISTIC(FunctionsClonedThinBackend,
"Number of functions that had clones created during ThinLTO backend");
+STATISTIC(
+ FunctionCloneDuplicatesThinBackend,
+ "Number of function clone duplicates detected during ThinLTO backend");
STATISTIC(AllocTypeNotCold, "Number of not cold static allocations (possibly "
"cloned) during whole program analysis");
STATISTIC(AllocTypeCold, "Number of cold static allocations (possibly cloned) "
@@ -5186,19 +5191,127 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
return Changed;
}
+// Compute a SHA1 hash of the callsite and alloc version information of clone I
+// in the summary, to use in detection of duplicate clones.
+uint64_t ComputeHash(const FunctionSummary *FS, unsigned I) {
+ SHA1 Hasher;
+ // Update hash with any callsites that call non-default (non-zero) callee
+ // versions.
+ for (auto &SN : FS->callsites()) {
+ // In theory all callsites and allocs in this function should have the same
+ // number of clone entries, but handle any discrepancies gracefully below
+ // for NDEBUG builds.
+ assert(
+ SN.Clones.size() > I &&
+ "Callsite summary has fewer entries than other summaries in function");
+ if (SN.Clones.size() <= I || !SN.Clones[I])
+ continue;
+ uint8_t Data[sizeof(SN.Clones[I])];
+ support::endian::write32le(Data, SN.Clones[I]);
+ Hasher.update(Data);
+ }
+ // Update hash with any allocs that have non-default (non-None) hints.
+ for (auto &AN : FS->allocs()) {
+ // In theory all callsites and allocs in this function should have the same
+ // number of clone entries, but handle any discrepancies gracefully below
+ // for NDEBUG builds.
+ assert(AN.Versions.size() > I &&
+ "Alloc summary has fewer entries than other summaries in function");
+ if (AN.Versions.size() <= I ||
+ (AllocationType)AN.Versions[I] == AllocationType::None)
+ continue;
+ Hasher.update(ArrayRef<uint8_t>(&AN.Versions[I], 1));
+ }
+ return support::endian::read64le(Hasher.result().data());
+}
+
static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones(
Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE,
std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
- &FuncToAliasMap) {
+ &FuncToAliasMap,
+ FunctionSummary *FS) {
+ auto TakeDeclNameAndReplace = [](GlobalValue *DeclGV, GlobalValue *NewGV) {
+ // We might have created this when adjusting callsite in another
+ // function. It should be a declaration.
+ assert(DeclGV->isDeclaration());
+ NewGV->takeName(DeclGV);
+ DeclGV->replaceAllUsesWith(NewGV);
+ DeclGV->eraseFromParent();
+ };
+
+ // Handle aliases to this function, and create analogous alias clones to the
+ // provided clone of this function.
+ auto CloneFuncAliases = [&](Function *NewF, unsigned I) {
+ if (!FuncToAliasMap.count(&F))
+ return;
+ for (auto *A : FuncToAliasMap[&F]) {
+ std::string AliasName = getMemProfFuncName(A->getName(), I);
+ auto *PrevA = M.getNamedAlias(AliasName);
+ auto *NewA = GlobalAlias::create(A->getValueType(),
+ A->getType()->getPointerAddressSpace(),
+ A->getLinkage(), AliasName, NewF);
+ NewA->copyAttributesFrom(A);
+ if (PrevA)
+ TakeDeclNameAndReplace(PrevA, NewA);
+ }
+ };
+
// The first "clone" is the original copy, we should only call this if we
// needed to create new clones.
assert(NumClones > 1);
SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
VMaps.reserve(NumClones - 1);
FunctionsClonedThinBackend++;
+
+ // Map of hash of callsite/alloc versions to the instantiated function clone
+ // (possibly the original) implementing those calls. Used to avoid
+ // instantiating duplicate function clones.
+ // FIXME: Ideally the thin link would not generate such duplicate clones to
+ // start with, but right now it happens due to phase ordering in the function
+ // assignment and possible new clones that produces. We simply make each
+ // duplicate an alias to the matching instantiated clone recorded in the map
+ // (except for available_externally which are made declarations as they would
+ // be aliases in the prevailing module, and available_externally aliases are
+ // not well supported right now).
+ DenseMap<uint64_t, Function *> HashToFunc;
+
+ // Save the hash of the original function version.
+ HashToFunc[ComputeHash(FS, 0)] = &F;
+
for (unsigned I = 1; I < NumClones; I++) {
VMaps.emplace_back(std::make_unique<ValueToValueMapTy>());
+ std::string Name = getMemProfFuncName(F.getName(), I);
+ auto Hash = ComputeHash(FS, I);
+ // If this clone would duplicate a previously seen clone, don't generate the
+ // duplicate clone body, just make an alias to satisfy any (potentially
+ // cross-module) references.
+ if (HashToFunc.contains(Hash)) {
+ FunctionCloneDuplicatesThinBackend++;
+ auto *Func = HashToFunc[Hash];
+ if (Func->hasAvailableExternallyLinkage()) {
+ // Skip these as EliminateAvailableExternallyPass does not handle
+ // available_externally aliases correctly and we end up with an
+ // available_externally alias to a declaration. Just create a
+ // declaration for now as we know we will have a definition in another
+ // module.
+ auto Decl = M.getOrInsertFunction(Name, Func->getFunctionType());
+ ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F)
+ << "created clone decl " << ore::NV("Decl", Decl.getCallee()));
+ continue;
+ }
+ auto *PrevF = M.getFunction(Name);
+ auto *Alias = GlobalAlias::create(Name, Func);
+ if (PrevF)
+ TakeDeclNameAndReplace(PrevF, Alias);
+ ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F)
+ << "created clone alias " << ore::NV("Alias", Alias));
+
+ // Now handle aliases to this function, and clone those as well.
+ CloneFuncAliases(Func, I);
+ continue;
+ }
auto *NewF = CloneFunction(&F, *VMaps.back());
+ HashToFunc[Hash] = NewF;
FunctionClonesThinBackend++;
// Strip memprof and callsite metadata from clone as they are no longer
// needed.
@@ -5208,40 +5321,17 @@ static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones(
Inst.setMetadata(LLVMContext::MD_callsite, nullptr);
}
}
- std::string Name = getMemProfFuncName(F.getName(), I);
auto *PrevF = M.getFunction(Name);
- if (PrevF) {
- // We might have created this when adjusting callsite in another
- // function. It should be a declaration.
- assert(PrevF->isDeclaration());
- NewF->takeName(PrevF);
- PrevF->replaceAllUsesWith(NewF);
- PrevF->eraseFromParent();
- } else
+ if (PrevF)
+ TakeDeclNameAndReplace(PrevF, NewF);
+ else
NewF->setName(Name);
updateSubprogramLinkageName(NewF, Name);
ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F)
<< "created clone " << ore::NV("NewFunction", NewF));
// Now handle aliases to this function, and clone those as well.
- if (!FuncToAliasMap.count(&F))
- continue;
- for (auto *A : FuncToAliasMap[&F]) {
- std::string Name = getMemProfFuncName(A->getName(), I);
- auto *PrevA = M.getNamedAlias(Name);
- auto *NewA = GlobalAlias::create(A->getValueType(),
- A->getType()->getPointerAddressSpace(),
- A->getLinkage(), Name, NewF);
- NewA->copyAttributesFrom(A);
- if (PrevA) {
- // We might have created this when adjusting callsite in another
- // function. It should be a declaration.
- assert(PrevA->isDeclaration());
- NewA->takeName(PrevA);
- PrevA->replaceAllUsesWith(NewA);
- PrevA->eraseFromParent();
- }
- }
+ CloneFuncAliases(NewF, I);
}
return VMaps;
}
@@ -5401,7 +5491,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
bool ClonesCreated = false;
unsigned NumClonesCreated = 0;
- auto CloneFuncIfNeeded = [&](unsigned NumClones) {
+ auto CloneFuncIfNeeded = [&](unsigned NumClones, FunctionSummary *FS) {
// We should at least have version 0 which is the original copy.
assert(NumClones > 0);
// If only one copy needed use original.
@@ -5415,7 +5505,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
assert(NumClonesCreated == NumClones);
return;
}
- VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap);
+ VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap, FS);
// The first "clone" is the original copy, which doesn't have a VMap.
assert(VMaps.size() == NumClones - 1);
Changed = true;
@@ -5424,9 +5514,9 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
};
auto CloneCallsite = [&](const CallsiteInfo &StackNode, CallBase *CB,
- Function *CalledFunction) {
+ Function *CalledFunction, FunctionSummary *FS) {
// Perform cloning if not yet done.
- CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size());
+ CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size(), FS);
assert(!isMemProfClone(*CalledFunction));
@@ -5448,6 +5538,10 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
// below.
auto CalleeOrigName = CalledFunction->getName();
for (unsigned J = 0; J < StackNode.Clones.size(); J++) {
+ // If the VMap is empty, this clone was a duplicate of another and was
+ // created as an alias or a declaration.
+ if (J > 0 && VMaps[J - 1]->empty())
+ continue;
// Do nothing if this version calls the original version of its
// callee.
if (!StackNode.Clones[J])
@@ -5591,7 +5685,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
#endif
// Perform cloning if not yet done.
- CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size());
+ CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size(), FS);
OrigAllocsThinBackend++;
AllocVersionsThinBackend += AllocNode.Versions.size();
@@ -5624,6 +5718,10 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
// Update the allocation types per the summary info.
for (unsigned J = 0; J < AllocNode.Versions.size(); J++) {
+ // If the VMap is empty, this clone was a duplicate of another and
+ // was created as an alias or a declaration.
+ if (J > 0 && VMaps[J - 1]->empty())
+ continue;
// Ignore any that didn't get an assigned allocation type.
if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
continue;
@@ -5670,7 +5768,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
// we don't need to do ICP, but might need to clone this
// function as it is the target of other cloned calls.
if (NumClones)
- CloneFuncIfNeeded(NumClones);
+ CloneFuncIfNeeded(NumClones, FS);
}
else {
@@ -5690,7 +5788,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
}
#endif
- CloneCallsite(StackNode, CB, CalledFunction);
+ CloneCallsite(StackNode, CB, CalledFunction, FS);
}
} else if (CB->isTailCall() && CalledFunction) {
// Locate the synthesized callsite info for the callee VI, if any was
@@ -5700,7 +5798,7 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
if (CalleeVI && MapTailCallCalleeVIToCallsite.count(CalleeVI)) {
auto Callsite = MapTailCallCalleeVIToCallsite.find(CalleeVI);
assert(Callsite != MapTailCallCalleeVIToCallsite.end());
- CloneCallsite(Callsite->second, CB, CalledFunction);
+ CloneCallsite(Callsite->second, CB, CalledFunction, FS);
}
}
}
@@ -5846,6 +5944,10 @@ void MemProfContextDisambiguation::performICP(
// check.
CallBase *CBClone = CB;
for (unsigned J = 0; J < NumClones; J++) {
+ // If the VMap is empty, this clone was a duplicate of another and was
+ // created as an alias or a declaration.
+ if (J > 0 && VMaps[J - 1]->empty())
+ continue;
// Copy 0 is the original function.
if (J > 0)
CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
@@ -5891,6 +5993,10 @@ void MemProfContextDisambiguation::performICP(
// TotalCount and the number promoted.
CallBase *CBClone = CB;
for (unsigned J = 0; J < NumClones; J++) {
+ // If the VMap is empty, this clone was a duplicate of another and was
+ // created as an alias or a declaration.
+ if (J > 0 && VMaps[J - 1]->empty())
+ continue;
// Copy 0 is the original function.
if (J > 0)
CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index cf6d0ec..e1e24a9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -318,18 +318,18 @@ Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
// * Single constant active lane -> store
// * Narrow width by halfs excluding zero/undef lanes
Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
+ Value *StorePtr = II.getArgOperand(1);
+ Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
if (!ConstMask)
return nullptr;
// If the mask is all zeros, this instruction does nothing.
- if (ConstMask->isNullValue())
+ if (maskIsAllZeroOrUndef(ConstMask))
return eraseInstFromFunction(II);
// If the mask is all ones, this is a plain vector store of the 1st argument.
- if (ConstMask->isAllOnesValue()) {
- Value *StorePtr = II.getArgOperand(1);
- Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
+ if (maskIsAllOneOrUndef(ConstMask)) {
StoreInst *S =
new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
S->copyMetadata(II);
@@ -389,7 +389,7 @@ Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
return nullptr;
// If the mask is all zeros, a scatter does nothing.
- if (ConstMask->isNullValue())
+ if (maskIsAllZeroOrUndef(ConstMask))
return eraseInstFromFunction(II);
// Vector splat address -> scalar store
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 87000a1..3df448d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -50,6 +50,9 @@
using namespace llvm;
using namespace PatternMatch;
+namespace llvm {
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+}
/// Replace a select operand based on an equality comparison with the identity
/// constant of a binop.
@@ -4492,8 +4495,21 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
auto FoldSelectWithAndOrCond = [&](bool IsAnd, Value *A,
Value *B) -> Instruction * {
if (Value *V = simplifySelectInst(B, TrueVal, FalseVal,
- SQ.getWithInstruction(&SI)))
- return SelectInst::Create(A, IsAnd ? V : TrueVal, IsAnd ? FalseVal : V);
+ SQ.getWithInstruction(&SI))) {
+ Value *NewTrueVal = IsAnd ? V : TrueVal;
+ Value *NewFalseVal = IsAnd ? FalseVal : V;
+
+ // If the True and False values don't change, then preserve the branch
+ // metadata of the original select as the net effect of this change is to
+ // simplify the conditional.
+ Instruction *MDFrom = nullptr;
+ if (NewTrueVal == TrueVal && NewFalseVal == FalseVal &&
+ !ProfcheckDisableMetadataFixes) {
+ MDFrom = &SI;
+ }
+ return SelectInst::Create(A, NewTrueVal, NewFalseVal, "", nullptr,
+ MDFrom);
+ }
// Is (select B, T, F) a SPF?
if (CondVal->hasOneUse() && SelType->isIntOrIntVectorTy()) {
diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 9d4fb79..d6b7633 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -1646,10 +1646,6 @@ NewGVN::performSymbolicPredicateInfoEvaluation(BitCastInst *I) const {
// Evaluate read only and pure calls, and create an expression result.
NewGVN::ExprResult NewGVN::performSymbolicCallEvaluation(Instruction *I) const {
auto *CI = cast<CallInst>(I);
- if (auto *II = dyn_cast<IntrinsicInst>(I)) {
- if (auto *ReturnedValue = II->getReturnedArgOperand())
- return ExprResult::some(createVariableOrConstant(ReturnedValue));
- }
// FIXME: Currently the calls which may access the thread id may
// be considered as not accessing the memory. But this is
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 43d61f2..a88cffc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3298,10 +3298,11 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
UI->getOpcode(), ValTy, Alignment, AS, Ctx.CostKind, OpInfo);
Type *PtrTy = isSingleScalar() ? ScalarPtrTy : toVectorTy(ScalarPtrTy, VF);
-
+ bool UsedByLoadStoreAddress = isUsedByLoadStoreAddress(this);
InstructionCost ScalarCost =
ScalarMemOpCost + Ctx.TTI.getAddressComputationCost(
- PtrTy, &Ctx.SE, nullptr, Ctx.CostKind);
+ PtrTy, UsedByLoadStoreAddress ? nullptr : &Ctx.SE,
+ nullptr, Ctx.CostKind);
if (isSingleScalar())
return ScalarCost;
@@ -3312,7 +3313,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
// vectorized addressing or the loaded value is used as part of an address
// of another load or store.
bool PreferVectorizedAddressing = Ctx.TTI.prefersVectorizedAddressing();
- if (PreferVectorizedAddressing || !isUsedByLoadStoreAddress(this)) {
+ if (PreferVectorizedAddressing || !UsedByLoadStoreAddress) {
bool EfficientVectorLoadStore =
Ctx.TTI.supportsEfficientVectorElementLoadStore();
if (!(IsLoad && !PreferVectorizedAddressing) &&
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
index 7872c02..461a7ef 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
@@ -177,7 +177,7 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
; GISEL-NEXT: neg v2.16b, v3.16b
; GISEL-NEXT: shl v3.16b, v4.16b, #7
; GISEL-NEXT: ushl v1.16b, v1.16b, v2.16b
-; GISEL-NEXT: sshr v2.16b, v3.16b, #7
+; GISEL-NEXT: cmlt v2.16b, v3.16b, #0
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
; GISEL-NEXT: ret
%div = udiv <16 x i8> %x, <i8 -64, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -229,7 +229,7 @@ define <8 x i16> @pr38477(<8 x i16> %a0) {
; GISEL-NEXT: add v1.8h, v2.8h, v1.8h
; GISEL-NEXT: neg v2.8h, v4.8h
; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
-; GISEL-NEXT: sshr v2.8h, v3.8h, #15
+; GISEL-NEXT: cmlt v2.8h, v3.8h, #0
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
; GISEL-NEXT: ret
%1 = udiv <8 x i16> %a0, <i16 1, i16 119, i16 73, i16 -111, i16 -3, i16 118, i16 32, i16 31>
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-sextinreg.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-sextinreg.mir
index 0b950b7..76d4d29 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-sextinreg.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-sextinreg.mir
@@ -14,8 +14,7 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[DUP:%[0-9]+]]:_(<4 x s32>) = G_DUP [[C]](s32)
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<4 x s32>) = G_SHL %v1, [[DUP]](<4 x s32>)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: %sext:_(<4 x s32>) = G_VASHR [[SHL]], [[C1]](s32)
+ ; CHECK-NEXT: %sext:_(<4 x s32>) = G_VASHR [[SHL]], 16
; CHECK-NEXT: $q0 = COPY %sext(<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%v1:_(<4 x s32>) = COPY $q0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-vashr-vlshr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-vashr-vlshr.mir
index b3fb5a4..dfaddba 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-vashr-vlshr.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-vashr-vlshr.mir
@@ -15,8 +15,7 @@ body: |
; CHECK: liveins: $d0, $d1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
- ; CHECK-NEXT: [[VASHR:%[0-9]+]]:_(<4 x s32>) = G_VASHR [[COPY]], [[C]](s32)
+ ; CHECK-NEXT: [[VASHR:%[0-9]+]]:_(<4 x s32>) = G_VASHR [[COPY]], 5
; CHECK-NEXT: $q0 = COPY [[VASHR]](<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
@@ -39,8 +38,7 @@ body: |
; CHECK: liveins: $d0, $d1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
- ; CHECK-NEXT: [[VLSHR:%[0-9]+]]:_(<4 x s32>) = G_VLSHR [[COPY]], [[C]](s32)
+ ; CHECK-NEXT: [[VLSHR:%[0-9]+]]:_(<4 x s32>) = G_VLSHR [[COPY]], 5
; CHECK-NEXT: $q0 = COPY [[VLSHR]](<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
@@ -63,8 +61,7 @@ body: |
; CHECK: liveins: $d0, $d1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
- ; CHECK-NEXT: [[VLSHR:%[0-9]+]]:_(<8 x s16>) = G_VLSHR [[COPY]], [[C]](s32)
+ ; CHECK-NEXT: [[VLSHR:%[0-9]+]]:_(<8 x s16>) = G_VLSHR [[COPY]], 5
; CHECK-NEXT: $q0 = COPY [[VLSHR]](<8 x s16>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<8 x s16>) = COPY $q0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-neon-vcvtfxu2fp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-neon-vcvtfxu2fp.mir
index c38e4a8..cf227cb 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-neon-vcvtfxu2fp.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-neon-vcvtfxu2fp.mir
@@ -29,7 +29,6 @@ body: |
; CHECK-NEXT: [[UCVTFd:%[0-9]+]]:fpr64 = UCVTFd [[COPY]], 12
; CHECK-NEXT: $d1 = COPY [[UCVTFd]]
%0(s64) = COPY $d0
- %1(s32) = G_CONSTANT i32 12
- %2(s64) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.vcvtfxu2fp.f64), %0, %1
+ %2(s64) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.vcvtfxu2fp.f64), %0, 12
$d1 = COPY %2(s64)
...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir
index 0706115..9fa6326 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir
@@ -499,8 +499,7 @@ body: |
; CHECK-NEXT: $d0 = COPY [[SSHRv4i16_shift]]
; CHECK-NEXT: RET_ReallyLR implicit $d0
%0:fpr(<4 x s16>) = COPY $d0
- %1:gpr(s32) = G_CONSTANT i32 5
- %2:fpr(<4 x s16>) = G_VASHR %0, %1
+ %2:fpr(<4 x s16>) = G_VASHR %0, 5
$d0 = COPY %2(<4 x s16>)
RET_ReallyLR implicit $d0
...
@@ -520,8 +519,7 @@ body: |
; CHECK-NEXT: $d0 = COPY [[USHRv4i16_shift]]
; CHECK-NEXT: RET_ReallyLR implicit $d0
%0:fpr(<4 x s16>) = COPY $d0
- %1:gpr(s32) = G_CONSTANT i32 5
- %2:fpr(<4 x s16>) = G_VLSHR %0, %1
+ %2:fpr(<4 x s16>) = G_VLSHR %0, 5
$d0 = COPY %2(<4 x s16>)
RET_ReallyLR implicit $d0
...
diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
index cdde110..63c08dd 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
@@ -902,7 +902,7 @@ define void @sink_v8z16_0(ptr %p, ptr %d, i64 %n, <16 x i8> %a) {
; CHECK-GI-NEXT: subs x2, x2, #8
; CHECK-GI-NEXT: add x8, x8, #8
; CHECK-GI-NEXT: umull v1.8h, v1.8b, v0.8b
-; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #15
+; CHECK-GI-NEXT: cmlt v1.8h, v1.8h, #0
; CHECK-GI-NEXT: xtn v1.8b, v1.8h
; CHECK-GI-NEXT: str d1, [x0], #32
; CHECK-GI-NEXT: b.ne .LBB8_1
@@ -967,8 +967,8 @@ define void @sink_v16s16_8(ptr %p, ptr %d, i64 %n, <16 x i8> %a) {
; CHECK-GI-NEXT: mov d2, v1.d[1]
; CHECK-GI-NEXT: smull v1.8h, v1.8b, v0.8b
; CHECK-GI-NEXT: smull v2.8h, v2.8b, v0.8b
-; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #15
-; CHECK-GI-NEXT: sshr v2.8h, v2.8h, #15
+; CHECK-GI-NEXT: cmlt v1.8h, v1.8h, #0
+; CHECK-GI-NEXT: cmlt v2.8h, v2.8h, #0
; CHECK-GI-NEXT: uzp1 v1.16b, v1.16b, v2.16b
; CHECK-GI-NEXT: str q1, [x0], #32
; CHECK-GI-NEXT: b.ne .LBB9_1
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
index 9bafc5b..2a8b3ce2 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
@@ -999,16 +999,10 @@ entry:
}
define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SD-LABEL: test_vaddhn_s16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: addhn v0.8b, v0.8h, v1.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vaddhn_s16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vaddhn_s16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addhn v0.8b, v0.8h, v1.8h
+; CHECK-NEXT: ret
entry:
%vaddhn.i = add <8 x i16> %a, %b
%vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -1017,16 +1011,10 @@ entry:
}
define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SD-LABEL: test_vaddhn_s32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: addhn v0.4h, v0.4s, v1.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vaddhn_s32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vaddhn_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addhn v0.4h, v0.4s, v1.4s
+; CHECK-NEXT: ret
entry:
%vaddhn.i = add <4 x i32> %a, %b
%vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16>
@@ -1035,16 +1023,10 @@ entry:
}
define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-SD-LABEL: test_vaddhn_s64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: addhn v0.2s, v0.2d, v1.2d
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vaddhn_s64:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vaddhn_s64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addhn v0.2s, v0.2d, v1.2d
+; CHECK-NEXT: ret
entry:
%vaddhn.i = add <2 x i64> %a, %b
%vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32>
@@ -1053,16 +1035,10 @@ entry:
}
define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SD-LABEL: test_vaddhn_u16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: addhn v0.8b, v0.8h, v1.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vaddhn_u16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vaddhn_u16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addhn v0.8b, v0.8h, v1.8h
+; CHECK-NEXT: ret
entry:
%vaddhn.i = add <8 x i16> %a, %b
%vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -1071,16 +1047,10 @@ entry:
}
define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SD-LABEL: test_vaddhn_u32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: addhn v0.4h, v0.4s, v1.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vaddhn_u32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vaddhn_u32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addhn v0.4h, v0.4s, v1.4s
+; CHECK-NEXT: ret
entry:
%vaddhn.i = add <4 x i32> %a, %b
%vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16>
@@ -1089,16 +1059,10 @@ entry:
}
define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-SD-LABEL: test_vaddhn_u64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: addhn v0.2s, v0.2d, v1.2d
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vaddhn_u64:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vaddhn_u64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: addhn v0.2s, v0.2d, v1.2d
+; CHECK-NEXT: ret
entry:
%vaddhn.i = add <2 x i64> %a, %b
%vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32>
@@ -1115,9 +1079,8 @@ define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b)
;
; CHECK-GI-LABEL: test_vaddhn_high_s16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: add v1.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT: addhn v1.8b, v1.8h, v2.8h
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
@@ -1141,9 +1104,8 @@ define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b)
;
; CHECK-GI-LABEL: test_vaddhn_high_s32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: add v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT: addhn v1.4h, v1.4s, v2.4s
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
@@ -1167,9 +1129,8 @@ define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b)
;
; CHECK-GI-LABEL: test_vaddhn_high_s64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: add v1.2d, v1.2d, v2.2d
+; CHECK-GI-NEXT: addhn v1.2s, v1.2d, v2.2d
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
@@ -1193,9 +1154,8 @@ define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b)
;
; CHECK-GI-LABEL: test_vaddhn_high_u16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: add v1.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT: addhn v1.8b, v1.8h, v2.8h
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
@@ -1219,9 +1179,8 @@ define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b)
;
; CHECK-GI-LABEL: test_vaddhn_high_u32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: add v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT: addhn v1.4h, v1.4s, v2.4s
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
@@ -1245,9 +1204,8 @@ define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b)
;
; CHECK-GI-LABEL: test_vaddhn_high_u64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: add v1.2d, v1.2d, v2.2d
+; CHECK-GI-NEXT: addhn v1.2s, v1.2d, v2.2d
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
@@ -1461,16 +1419,10 @@ entry:
}
define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SD-LABEL: test_vsubhn_s16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: subhn v0.8b, v0.8h, v1.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vsubhn_s16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub v0.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vsubhn_s16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: subhn v0.8b, v0.8h, v1.8h
+; CHECK-NEXT: ret
entry:
%vsubhn.i = sub <8 x i16> %a, %b
%vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -1479,16 +1431,10 @@ entry:
}
define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SD-LABEL: test_vsubhn_s32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: subhn v0.4h, v0.4s, v1.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vsubhn_s32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub v0.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vsubhn_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: subhn v0.4h, v0.4s, v1.4s
+; CHECK-NEXT: ret
entry:
%vsubhn.i = sub <4 x i32> %a, %b
%vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16>
@@ -1497,16 +1443,10 @@ entry:
}
define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-SD-LABEL: test_vsubhn_s64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: subhn v0.2s, v0.2d, v1.2d
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vsubhn_s64:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub v0.2d, v0.2d, v1.2d
-; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vsubhn_s64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: subhn v0.2s, v0.2d, v1.2d
+; CHECK-NEXT: ret
entry:
%vsubhn.i = sub <2 x i64> %a, %b
%vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32>
@@ -1515,16 +1455,10 @@ entry:
}
define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SD-LABEL: test_vsubhn_u16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: subhn v0.8b, v0.8h, v1.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vsubhn_u16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub v0.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vsubhn_u16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: subhn v0.8b, v0.8h, v1.8h
+; CHECK-NEXT: ret
entry:
%vsubhn.i = sub <8 x i16> %a, %b
%vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -1533,16 +1467,10 @@ entry:
}
define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SD-LABEL: test_vsubhn_u32:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: subhn v0.4h, v0.4s, v1.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vsubhn_u32:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub v0.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vsubhn_u32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: subhn v0.4h, v0.4s, v1.4s
+; CHECK-NEXT: ret
entry:
%vsubhn.i = sub <4 x i32> %a, %b
%vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16>
@@ -1551,16 +1479,10 @@ entry:
}
define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-SD-LABEL: test_vsubhn_u64:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: subhn v0.2s, v0.2d, v1.2d
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: test_vsubhn_u64:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub v0.2d, v0.2d, v1.2d
-; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: test_vsubhn_u64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: subhn v0.2s, v0.2d, v1.2d
+; CHECK-NEXT: ret
entry:
%vsubhn.i = sub <2 x i64> %a, %b
%vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32>
@@ -1577,9 +1499,8 @@ define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b)
;
; CHECK-GI-LABEL: test_vsubhn_high_s16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub v1.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT: subhn v1.8b, v1.8h, v2.8h
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
@@ -1603,9 +1524,8 @@ define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b)
;
; CHECK-GI-LABEL: test_vsubhn_high_s32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT: subhn v1.4h, v1.4s, v2.4s
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
@@ -1629,9 +1549,8 @@ define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b)
;
; CHECK-GI-LABEL: test_vsubhn_high_s64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub v1.2d, v1.2d, v2.2d
+; CHECK-GI-NEXT: subhn v1.2s, v1.2d, v2.2d
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
@@ -1655,9 +1574,8 @@ define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b)
;
; CHECK-GI-LABEL: test_vsubhn_high_u16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub v1.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT: subhn v1.8b, v1.8h, v2.8h
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
@@ -1681,9 +1599,8 @@ define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b)
;
; CHECK-GI-LABEL: test_vsubhn_high_u32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT: subhn v1.4h, v1.4s, v2.4s
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
@@ -1707,9 +1624,8 @@ define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b)
;
; CHECK-GI-LABEL: test_vsubhn_high_u64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub v1.2d, v1.2d, v2.2d
+; CHECK-GI-NEXT: subhn v1.2s, v1.2d, v2.2d
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32
; CHECK-GI-NEXT: fmov x8, d1
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
index 84879d1..03e6ca1 100644
--- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
@@ -524,8 +524,8 @@ define <32 x i8> @sext_v32i1(<32 x i1> %arg) {
; CHECK-GI-NEXT: mov.b v1[15], w9
; CHECK-GI-NEXT: shl.16b v0, v0, #7
; CHECK-GI-NEXT: shl.16b v1, v1, #7
-; CHECK-GI-NEXT: sshr.16b v0, v0, #7
-; CHECK-GI-NEXT: sshr.16b v1, v1, #7
+; CHECK-GI-NEXT: cmlt.16b v0, v0, #0
+; CHECK-GI-NEXT: cmlt.16b v1, v1, #0
; CHECK-GI-NEXT: ret
%res = sext <32 x i1> %arg to <32 x i8>
ret <32 x i8> %res
@@ -934,10 +934,10 @@ define <64 x i8> @sext_v64i1(<64 x i1> %arg) {
; CHECK-GI-NEXT: shl.16b v1, v1, #7
; CHECK-GI-NEXT: shl.16b v2, v2, #7
; CHECK-GI-NEXT: shl.16b v3, v3, #7
-; CHECK-GI-NEXT: sshr.16b v0, v0, #7
-; CHECK-GI-NEXT: sshr.16b v1, v1, #7
-; CHECK-GI-NEXT: sshr.16b v2, v2, #7
-; CHECK-GI-NEXT: sshr.16b v3, v3, #7
+; CHECK-GI-NEXT: cmlt.16b v0, v0, #0
+; CHECK-GI-NEXT: cmlt.16b v1, v1, #0
+; CHECK-GI-NEXT: cmlt.16b v2, v2, #0
+; CHECK-GI-NEXT: cmlt.16b v3, v3, #0
; CHECK-GI-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-GI-NEXT: ret
%res = sext <64 x i1> %arg to <64 x i8>
diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
index c408d7f..a3f4722 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -1914,21 +1914,13 @@ define <2 x i128> @uabd_i64(<2 x i64> %a, <2 x i64> %b) {
}
define <8 x i16> @pr88784(<8 x i8> %l0, <8 x i8> %l1, <8 x i16> %l2) {
-; CHECK-SD-LABEL: pr88784:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: usubl.8h v0, v0, v1
-; CHECK-SD-NEXT: cmlt.8h v1, v2, #0
-; CHECK-SD-NEXT: ssra.8h v0, v2, #15
-; CHECK-SD-NEXT: eor.16b v0, v1, v0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: pr88784:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: usubl.8h v0, v0, v1
-; CHECK-GI-NEXT: sshr.8h v1, v2, #15
-; CHECK-GI-NEXT: ssra.8h v0, v2, #15
-; CHECK-GI-NEXT: eor.16b v0, v1, v0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: pr88784:
+; CHECK: // %bb.0:
+; CHECK-NEXT: usubl.8h v0, v0, v1
+; CHECK-NEXT: cmlt.8h v1, v2, #0
+; CHECK-NEXT: ssra.8h v0, v2, #15
+; CHECK-NEXT: eor.16b v0, v1, v0
+; CHECK-NEXT: ret
%l4 = zext <8 x i8> %l0 to <8 x i16>
%l5 = ashr <8 x i16> %l2, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%l6 = zext <8 x i8> %l1 to <8 x i16>
@@ -1947,7 +1939,7 @@ define <8 x i16> @pr88784_fixed(<8 x i8> %l0, <8 x i8> %l1, <8 x i16> %l2) {
; CHECK-GI-LABEL: pr88784_fixed:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: usubl.8h v0, v0, v1
-; CHECK-GI-NEXT: sshr.8h v1, v0, #15
+; CHECK-GI-NEXT: cmlt.8h v1, v0, #0
; CHECK-GI-NEXT: ssra.8h v0, v0, #15
; CHECK-GI-NEXT: eor.16b v0, v1, v0
; CHECK-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/arm64-vadd.ll b/llvm/test/CodeGen/AArch64/arm64-vadd.ll
index 11fb732..938712a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vadd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vadd.ll
@@ -1103,20 +1103,12 @@ define <2 x i64> @ssubl2_duplhs(i32 %lhs, <4 x i32> %rhs) {
}
define <8 x i8> @addhn8b_natural(ptr %A, ptr %B) nounwind {
-; CHECK-SD-LABEL: addhn8b_natural:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr q0, [x0]
-; CHECK-SD-NEXT: ldr q1, [x1]
-; CHECK-SD-NEXT: addhn v0.8b, v0.8h, v1.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: addhn8b_natural:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q0, [x0]
-; CHECK-GI-NEXT: ldr q1, [x1]
-; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: addhn8b_natural:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr q1, [x1]
+; CHECK-NEXT: addhn v0.8b, v0.8h, v1.8h
+; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%sum = add <8 x i16> %tmp1, %tmp2
@@ -1126,20 +1118,12 @@ define <8 x i8> @addhn8b_natural(ptr %A, ptr %B) nounwind {
}
define <4 x i16> @addhn4h_natural(ptr %A, ptr %B) nounwind {
-; CHECK-SD-LABEL: addhn4h_natural:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr q0, [x0]
-; CHECK-SD-NEXT: ldr q1, [x1]
-; CHECK-SD-NEXT: addhn v0.4h, v0.4s, v1.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: addhn4h_natural:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q0, [x0]
-; CHECK-GI-NEXT: ldr q1, [x1]
-; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: addhn4h_natural:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr q1, [x1]
+; CHECK-NEXT: addhn v0.4h, v0.4s, v1.4s
+; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%sum = add <4 x i32> %tmp1, %tmp2
@@ -1149,20 +1133,12 @@ define <4 x i16> @addhn4h_natural(ptr %A, ptr %B) nounwind {
}
define <2 x i32> @addhn2s_natural(ptr %A, ptr %B) nounwind {
-; CHECK-SD-LABEL: addhn2s_natural:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr q0, [x0]
-; CHECK-SD-NEXT: ldr q1, [x1]
-; CHECK-SD-NEXT: addhn v0.2s, v0.2d, v1.2d
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: addhn2s_natural:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q0, [x0]
-; CHECK-GI-NEXT: ldr q1, [x1]
-; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: addhn2s_natural:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr q1, [x1]
+; CHECK-NEXT: addhn v0.2s, v0.2d, v1.2d
+; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i64>, ptr %B
%sum = add <2 x i64> %tmp1, %tmp2
@@ -1172,22 +1148,13 @@ define <2 x i32> @addhn2s_natural(ptr %A, ptr %B) nounwind {
}
define <16 x i8> @addhn2_16b_natural(<8 x i8> %low, ptr %A, ptr %B) nounwind {
-; CHECK-SD-LABEL: addhn2_16b_natural:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr q1, [x0]
-; CHECK-SD-NEXT: ldr q2, [x1]
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: addhn2 v0.16b, v1.8h, v2.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: addhn2_16b_natural:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q1, [x0]
-; CHECK-GI-NEXT: ldr q2, [x1]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: add v1.8h, v1.8h, v2.8h
-; CHECK-GI-NEXT: shrn2 v0.16b, v1.8h, #8
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: addhn2_16b_natural:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q1, [x0]
+; CHECK-NEXT: ldr q2, [x1]
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: addhn2 v0.16b, v1.8h, v2.8h
+; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%sum = add <8 x i16> %tmp1, %tmp2
@@ -1198,22 +1165,13 @@ define <16 x i8> @addhn2_16b_natural(<8 x i8> %low, ptr %A, ptr %B) nounwind {
}
define <8 x i16> @addhn2_8h_natural(<4 x i16> %low, ptr %A, ptr %B) nounwind {
-; CHECK-SD-LABEL: addhn2_8h_natural:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr q1, [x0]
-; CHECK-SD-NEXT: ldr q2, [x1]
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: addhn2 v0.8h, v1.4s, v2.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: addhn2_8h_natural:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q1, [x0]
-; CHECK-GI-NEXT: ldr q2, [x1]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: add v1.4s, v1.4s, v2.4s
-; CHECK-GI-NEXT: shrn2 v0.8h, v1.4s, #16
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: addhn2_8h_natural:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q1, [x0]
+; CHECK-NEXT: ldr q2, [x1]
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: addhn2 v0.8h, v1.4s, v2.4s
+; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%sum = add <4 x i32> %tmp1, %tmp2
@@ -1224,22 +1182,13 @@ define <8 x i16> @addhn2_8h_natural(<4 x i16> %low, ptr %A, ptr %B) nounwind {
}
define <4 x i32> @addhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind {
-; CHECK-SD-LABEL: addhn2_4s_natural:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr q1, [x0]
-; CHECK-SD-NEXT: ldr q2, [x1]
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: addhn2 v0.4s, v1.2d, v2.2d
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: addhn2_4s_natural:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q1, [x0]
-; CHECK-GI-NEXT: ldr q2, [x1]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: add v1.2d, v1.2d, v2.2d
-; CHECK-GI-NEXT: shrn2 v0.4s, v1.2d, #32
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: addhn2_4s_natural:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q1, [x0]
+; CHECK-NEXT: ldr q2, [x1]
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: addhn2 v0.4s, v1.2d, v2.2d
+; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i64>, ptr %B
%sum = add <2 x i64> %tmp1, %tmp2
@@ -1250,22 +1199,13 @@ define <4 x i32> @addhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind {
}
define <4 x i32> @addhn_addhn2_4s(ptr %A, ptr %B, ptr %C, ptr %D) nounwind {
-; CHECK-SD-LABEL: addhn_addhn2_4s:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr q1, [x0]
-; CHECK-SD-NEXT: ldr q2, [x1]
-; CHECK-SD-NEXT: addhn v0.2s, v1.2d, v2.2d
-; CHECK-SD-NEXT: addhn2 v0.4s, v1.2d, v2.2d
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: addhn_addhn2_4s:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q0, [x0]
-; CHECK-GI-NEXT: ldr q1, [x1]
-; CHECK-GI-NEXT: add v1.2d, v0.2d, v1.2d
-; CHECK-GI-NEXT: shrn v0.2s, v1.2d, #32
-; CHECK-GI-NEXT: shrn2 v0.4s, v1.2d, #32
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: addhn_addhn2_4s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q1, [x0]
+; CHECK-NEXT: ldr q2, [x1]
+; CHECK-NEXT: addhn v0.2s, v1.2d, v2.2d
+; CHECK-NEXT: addhn2 v0.4s, v1.2d, v2.2d
+; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i64>, ptr %B
%sum1 = add <2 x i64> %tmp1, %tmp2
@@ -1281,20 +1221,12 @@ define <4 x i32> @addhn_addhn2_4s(ptr %A, ptr %B, ptr %C, ptr %D) nounwind {
}
define <8 x i8> @subhn8b_natural(ptr %A, ptr %B) nounwind {
-; CHECK-SD-LABEL: subhn8b_natural:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr q0, [x0]
-; CHECK-SD-NEXT: ldr q1, [x1]
-; CHECK-SD-NEXT: subhn v0.8b, v0.8h, v1.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: subhn8b_natural:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q0, [x0]
-; CHECK-GI-NEXT: ldr q1, [x1]
-; CHECK-GI-NEXT: sub v0.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: subhn8b_natural:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr q1, [x1]
+; CHECK-NEXT: subhn v0.8b, v0.8h, v1.8h
+; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%diff = sub <8 x i16> %tmp1, %tmp2
@@ -1304,20 +1236,12 @@ define <8 x i8> @subhn8b_natural(ptr %A, ptr %B) nounwind {
}
define <4 x i16> @subhn4h_natural(ptr %A, ptr %B) nounwind {
-; CHECK-SD-LABEL: subhn4h_natural:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr q0, [x0]
-; CHECK-SD-NEXT: ldr q1, [x1]
-; CHECK-SD-NEXT: subhn v0.4h, v0.4s, v1.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: subhn4h_natural:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q0, [x0]
-; CHECK-GI-NEXT: ldr q1, [x1]
-; CHECK-GI-NEXT: sub v0.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: subhn4h_natural:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr q1, [x1]
+; CHECK-NEXT: subhn v0.4h, v0.4s, v1.4s
+; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%diff = sub <4 x i32> %tmp1, %tmp2
@@ -1327,20 +1251,12 @@ define <4 x i16> @subhn4h_natural(ptr %A, ptr %B) nounwind {
}
define <2 x i32> @subhn2s_natural(ptr %A, ptr %B) nounwind {
-; CHECK-SD-LABEL: subhn2s_natural:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr q0, [x0]
-; CHECK-SD-NEXT: ldr q1, [x1]
-; CHECK-SD-NEXT: subhn v0.2s, v0.2d, v1.2d
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: subhn2s_natural:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q0, [x0]
-; CHECK-GI-NEXT: ldr q1, [x1]
-; CHECK-GI-NEXT: sub v0.2d, v0.2d, v1.2d
-; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: subhn2s_natural:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ldr q1, [x1]
+; CHECK-NEXT: subhn v0.2s, v0.2d, v1.2d
+; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i64>, ptr %B
%diff = sub <2 x i64> %tmp1, %tmp2
@@ -1350,22 +1266,13 @@ define <2 x i32> @subhn2s_natural(ptr %A, ptr %B) nounwind {
}
define <16 x i8> @subhn2_16b_natural(<8 x i8> %low, ptr %A, ptr %B) nounwind {
-; CHECK-SD-LABEL: subhn2_16b_natural:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr q1, [x0]
-; CHECK-SD-NEXT: ldr q2, [x1]
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: subhn2 v0.16b, v1.8h, v2.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: subhn2_16b_natural:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q1, [x0]
-; CHECK-GI-NEXT: ldr q2, [x1]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: sub v1.8h, v1.8h, v2.8h
-; CHECK-GI-NEXT: shrn2 v0.16b, v1.8h, #8
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: subhn2_16b_natural:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q1, [x0]
+; CHECK-NEXT: ldr q2, [x1]
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: subhn2 v0.16b, v1.8h, v2.8h
+; CHECK-NEXT: ret
%tmp1 = load <8 x i16>, ptr %A
%tmp2 = load <8 x i16>, ptr %B
%diff = sub <8 x i16> %tmp1, %tmp2
@@ -1376,22 +1283,13 @@ define <16 x i8> @subhn2_16b_natural(<8 x i8> %low, ptr %A, ptr %B) nounwind {
}
define <8 x i16> @subhn2_8h_natural(<4 x i16> %low, ptr %A, ptr %B) nounwind {
-; CHECK-SD-LABEL: subhn2_8h_natural:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr q1, [x0]
-; CHECK-SD-NEXT: ldr q2, [x1]
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: subhn2 v0.8h, v1.4s, v2.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: subhn2_8h_natural:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q1, [x0]
-; CHECK-GI-NEXT: ldr q2, [x1]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: sub v1.4s, v1.4s, v2.4s
-; CHECK-GI-NEXT: shrn2 v0.8h, v1.4s, #16
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: subhn2_8h_natural:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q1, [x0]
+; CHECK-NEXT: ldr q2, [x1]
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: subhn2 v0.8h, v1.4s, v2.4s
+; CHECK-NEXT: ret
%tmp1 = load <4 x i32>, ptr %A
%tmp2 = load <4 x i32>, ptr %B
%diff = sub <4 x i32> %tmp1, %tmp2
@@ -1402,22 +1300,13 @@ define <8 x i16> @subhn2_8h_natural(<4 x i16> %low, ptr %A, ptr %B) nounwind {
}
define <4 x i32> @subhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind {
-; CHECK-SD-LABEL: subhn2_4s_natural:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: ldr q1, [x0]
-; CHECK-SD-NEXT: ldr q2, [x1]
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: subhn2 v0.4s, v1.2d, v2.2d
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: subhn2_4s_natural:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q1, [x0]
-; CHECK-GI-NEXT: ldr q2, [x1]
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: sub v1.2d, v1.2d, v2.2d
-; CHECK-GI-NEXT: shrn2 v0.4s, v1.2d, #32
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: subhn2_4s_natural:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q1, [x0]
+; CHECK-NEXT: ldr q2, [x1]
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: subhn2 v0.4s, v1.2d, v2.2d
+; CHECK-NEXT: ret
%tmp1 = load <2 x i64>, ptr %A
%tmp2 = load <2 x i64>, ptr %B
%diff = sub <2 x i64> %tmp1, %tmp2
@@ -1428,20 +1317,12 @@ define <4 x i32> @subhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind {
}
define <16 x i8> @neg_narrow_i8(<16 x i16> %a) {
-; CHECK-SD-LABEL: neg_narrow_i8:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: movi v2.2d, #0xffffffffffffffff
-; CHECK-SD-NEXT: subhn v0.8b, v2.8h, v0.8h
-; CHECK-SD-NEXT: subhn2 v0.16b, v2.8h, v1.8h
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: neg_narrow_i8:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mvn v0.16b, v0.16b
-; CHECK-GI-NEXT: mvn v1.16b, v1.16b
-; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8
-; CHECK-GI-NEXT: shrn2 v0.16b, v1.8h, #8
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: neg_narrow_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff
+; CHECK-NEXT: subhn v0.8b, v2.8h, v0.8h
+; CHECK-NEXT: subhn2 v0.16b, v2.8h, v1.8h
+; CHECK-NEXT: ret
%not.i = xor <16 x i16> %a, splat (i16 -1)
%s = lshr <16 x i16> %not.i, splat (i16 8)
%vshrn_n = trunc nuw <16 x i16> %s to <16 x i8>
@@ -1449,20 +1330,12 @@ define <16 x i8> @neg_narrow_i8(<16 x i16> %a) {
}
define <8 x i16> @neg_narrow_i16(<8 x i32> %a) {
-; CHECK-SD-LABEL: neg_narrow_i16:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: movi v2.2d, #0xffffffffffffffff
-; CHECK-SD-NEXT: subhn v0.4h, v2.4s, v0.4s
-; CHECK-SD-NEXT: subhn2 v0.8h, v2.4s, v1.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: neg_narrow_i16:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mvn v0.16b, v0.16b
-; CHECK-GI-NEXT: mvn v1.16b, v1.16b
-; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16
-; CHECK-GI-NEXT: shrn2 v0.8h, v1.4s, #16
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: neg_narrow_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff
+; CHECK-NEXT: subhn v0.4h, v2.4s, v0.4s
+; CHECK-NEXT: subhn2 v0.8h, v2.4s, v1.4s
+; CHECK-NEXT: ret
%not.i = xor <8 x i32> %a, splat (i32 -1)
%s = lshr <8 x i32> %not.i, splat (i32 16)
%vshrn_n = trunc nuw <8 x i32> %s to <8 x i16>
@@ -1470,20 +1343,12 @@ define <8 x i16> @neg_narrow_i16(<8 x i32> %a) {
}
define <4 x i32> @neg_narrow_i32(<4 x i64> %a) {
-; CHECK-SD-LABEL: neg_narrow_i32:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: movi v2.2d, #0xffffffffffffffff
-; CHECK-SD-NEXT: subhn v0.2s, v2.2d, v0.2d
-; CHECK-SD-NEXT: subhn2 v0.4s, v2.2d, v1.2d
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: neg_narrow_i32:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mvn v0.16b, v0.16b
-; CHECK-GI-NEXT: mvn v1.16b, v1.16b
-; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32
-; CHECK-GI-NEXT: shrn2 v0.4s, v1.2d, #32
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: neg_narrow_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff
+; CHECK-NEXT: subhn v0.2s, v2.2d, v0.2d
+; CHECK-NEXT: subhn2 v0.4s, v2.2d, v1.2d
+; CHECK-NEXT: ret
%not.i = xor <4 x i64> %a, splat (i64 -1)
%s = lshr <4 x i64> %not.i, splat (i64 32)
%vshrn_n = trunc nuw <4 x i64> %s to <4 x i32>
diff --git a/llvm/test/CodeGen/AArch64/combine-sdiv.ll b/llvm/test/CodeGen/AArch64/combine-sdiv.ll
index 9d0ade2..dc88f94 100644
--- a/llvm/test/CodeGen/AArch64/combine-sdiv.ll
+++ b/llvm/test/CodeGen/AArch64/combine-sdiv.ll
@@ -66,9 +66,9 @@ define <4 x i32> @combine_vec_sdiv_by_minsigned(<4 x i32> %x) {
;
; CHECK-GI-LABEL: combine_vec_sdiv_by_minsigned:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v1.4s, v0.4s, #31
+; CHECK-GI-NEXT: cmlt v1.4s, v0.4s, #0
; CHECK-GI-NEXT: usra v0.4s, v1.4s, #1
-; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-GI-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-GI-NEXT: neg v0.4s, v0.4s
; CHECK-GI-NEXT: ret
%1 = sdiv <4 x i32> %x, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
@@ -176,7 +176,7 @@ define <4 x i32> @combine_vec_sdiv_by_pos1(<4 x i32> %x) {
; CHECK-GI-NEXT: mov v1.s[2], w9
; CHECK-GI-NEXT: mov v1.s[3], w9
; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31
-; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31
+; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0
; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT: ret
%1 = and <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
@@ -185,39 +185,24 @@ define <4 x i32> @combine_vec_sdiv_by_pos1(<4 x i32> %x) {
}
define <4 x i32> @combine_vec_sdiv_by_pow2a(<4 x i32> %x) {
-; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2a:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v1.4s, v0.4s, #0
-; CHECK-SD-NEXT: usra v0.4s, v1.4s, #30
-; CHECK-SD-NEXT: sshr v0.4s, v0.4s, #2
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2a:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v1.4s, v0.4s, #31
-; CHECK-GI-NEXT: usra v0.4s, v1.4s, #30
-; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #2
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: combine_vec_sdiv_by_pow2a:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v1.4s, v0.4s, #0
+; CHECK-NEXT: usra v0.4s, v1.4s, #30
+; CHECK-NEXT: sshr v0.4s, v0.4s, #2
+; CHECK-NEXT: ret
%1 = sdiv <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4>
ret <4 x i32> %1
}
define <4 x i32> @combine_vec_sdiv_by_pow2a_neg(<4 x i32> %x) {
-; CHECK-SD-LABEL: combine_vec_sdiv_by_pow2a_neg:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v1.4s, v0.4s, #0
-; CHECK-SD-NEXT: usra v0.4s, v1.4s, #30
-; CHECK-SD-NEXT: sshr v0.4s, v0.4s, #2
-; CHECK-SD-NEXT: neg v0.4s, v0.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2a_neg:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v1.4s, v0.4s, #31
-; CHECK-GI-NEXT: usra v0.4s, v1.4s, #30
-; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #2
-; CHECK-GI-NEXT: neg v0.4s, v0.4s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: combine_vec_sdiv_by_pow2a_neg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v1.4s, v0.4s, #0
+; CHECK-NEXT: usra v0.4s, v1.4s, #30
+; CHECK-NEXT: sshr v0.4s, v0.4s, #2
+; CHECK-NEXT: neg v0.4s, v0.4s
+; CHECK-NEXT: ret
%1 = sdiv <4 x i32> %x, <i32 -4, i32 -4, i32 -4, i32 -4>
ret <4 x i32> %1
}
@@ -240,7 +225,7 @@ define <16 x i8> @combine_vec_sdiv_by_pow2b_v16i8(<16 x i8> %x) {
; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v16i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI14_1
-; CHECK-GI-NEXT: sshr v2.16b, v0.16b, #7
+; CHECK-GI-NEXT: cmlt v2.16b, v0.16b, #0
; CHECK-GI-NEXT: adrp x9, .LCPI14_0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI14_1]
; CHECK-GI-NEXT: adrp x8, .LCPI14_2
@@ -252,7 +237,7 @@ define <16 x i8> @combine_vec_sdiv_by_pow2b_v16i8(<16 x i8> %x) {
; CHECK-GI-NEXT: neg v2.16b, v2.16b
; CHECK-GI-NEXT: add v1.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: sshl v1.16b, v1.16b, v2.16b
-; CHECK-GI-NEXT: sshr v2.16b, v3.16b, #7
+; CHECK-GI-NEXT: cmlt v2.16b, v3.16b, #0
; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-GI-NEXT: ret
%1 = sdiv <16 x i8> %x, <i8 1, i8 4, i8 2, i8 16, i8 8, i8 32, i8 64, i8 2, i8 1, i8 4, i8 2, i8 16, i8 8, i8 32, i8 64, i8 2>
@@ -278,7 +263,7 @@ define <8 x i16> @combine_vec_sdiv_by_pow2b_v8i16(<8 x i16> %x) {
; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v8i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI15_1
-; CHECK-GI-NEXT: sshr v2.8h, v0.8h, #15
+; CHECK-GI-NEXT: cmlt v2.8h, v0.8h, #0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI15_1]
; CHECK-GI-NEXT: adrp x8, .LCPI15_0
; CHECK-GI-NEXT: ldr d3, [x8, :lo12:.LCPI15_0]
@@ -291,7 +276,7 @@ define <8 x i16> @combine_vec_sdiv_by_pow2b_v8i16(<8 x i16> %x) {
; CHECK-GI-NEXT: add v1.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: shl v2.8h, v2.8h, #15
; CHECK-GI-NEXT: sshl v1.8h, v1.8h, v3.8h
-; CHECK-GI-NEXT: sshr v2.8h, v2.8h, #15
+; CHECK-GI-NEXT: cmlt v2.8h, v2.8h, #0
; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-GI-NEXT: ret
%1 = sdiv <8 x i16> %x, <i16 1, i16 4, i16 2, i16 16, i16 8, i16 32, i16 64, i16 2>
@@ -322,8 +307,8 @@ define <16 x i16> @combine_vec_sdiv_by_pow2b_v16i16(<16 x i16> %x) {
; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v16i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI16_1
-; CHECK-GI-NEXT: sshr v3.8h, v0.8h, #15
-; CHECK-GI-NEXT: sshr v4.8h, v1.8h, #15
+; CHECK-GI-NEXT: cmlt v3.8h, v0.8h, #0
+; CHECK-GI-NEXT: cmlt v4.8h, v1.8h, #0
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI16_1]
; CHECK-GI-NEXT: adrp x8, .LCPI16_0
; CHECK-GI-NEXT: ldr d5, [x8, :lo12:.LCPI16_0]
@@ -339,7 +324,7 @@ define <16 x i16> @combine_vec_sdiv_by_pow2b_v16i16(<16 x i16> %x) {
; CHECK-GI-NEXT: add v2.8h, v1.8h, v2.8h
; CHECK-GI-NEXT: sshl v3.8h, v3.8h, v4.8h
; CHECK-GI-NEXT: sshl v2.8h, v2.8h, v4.8h
-; CHECK-GI-NEXT: sshr v4.8h, v5.8h, #15
+; CHECK-GI-NEXT: cmlt v4.8h, v5.8h, #0
; CHECK-GI-NEXT: bif v0.16b, v3.16b, v4.16b
; CHECK-GI-NEXT: bif v1.16b, v2.16b, v4.16b
; CHECK-GI-NEXT: ret
@@ -381,12 +366,12 @@ define <32 x i16> @combine_vec_sdiv_by_pow2b_v32i16(<32 x i16> %x) {
; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v32i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI17_1
-; CHECK-GI-NEXT: sshr v5.8h, v0.8h, #15
-; CHECK-GI-NEXT: sshr v6.8h, v1.8h, #15
+; CHECK-GI-NEXT: cmlt v5.8h, v0.8h, #0
+; CHECK-GI-NEXT: cmlt v6.8h, v1.8h, #0
; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI17_1]
; CHECK-GI-NEXT: adrp x8, .LCPI17_0
-; CHECK-GI-NEXT: sshr v7.8h, v2.8h, #15
-; CHECK-GI-NEXT: sshr v16.8h, v3.8h, #15
+; CHECK-GI-NEXT: cmlt v7.8h, v2.8h, #0
+; CHECK-GI-NEXT: cmlt v16.8h, v3.8h, #0
; CHECK-GI-NEXT: ldr d17, [x8, :lo12:.LCPI17_0]
; CHECK-GI-NEXT: adrp x8, .LCPI17_2
; CHECK-GI-NEXT: neg v4.8h, v4.8h
@@ -402,7 +387,7 @@ define <32 x i16> @combine_vec_sdiv_by_pow2b_v32i16(<32 x i16> %x) {
; CHECK-GI-NEXT: add v6.8h, v1.8h, v6.8h
; CHECK-GI-NEXT: add v7.8h, v2.8h, v7.8h
; CHECK-GI-NEXT: add v4.8h, v3.8h, v4.8h
-; CHECK-GI-NEXT: sshr v17.8h, v17.8h, #15
+; CHECK-GI-NEXT: cmlt v17.8h, v17.8h, #0
; CHECK-GI-NEXT: sshl v5.8h, v5.8h, v16.8h
; CHECK-GI-NEXT: sshl v6.8h, v6.8h, v16.8h
; CHECK-GI-NEXT: sshl v7.8h, v7.8h, v16.8h
@@ -436,7 +421,7 @@ define <4 x i32> @combine_vec_sdiv_by_pow2b_v4i32(<4 x i32> %x) {
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: mov w9, #0 // =0x0
-; CHECK-GI-NEXT: sshr v3.4s, v0.4s, #31
+; CHECK-GI-NEXT: cmlt v3.4s, v0.4s, #0
; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: adrp x8, .LCPI18_0
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI18_0]
@@ -451,7 +436,7 @@ define <4 x i32> @combine_vec_sdiv_by_pow2b_v4i32(<4 x i32> %x) {
; CHECK-GI-NEXT: mov v1.s[3], w9
; CHECK-GI-NEXT: sshl v2.4s, v2.4s, v3.4s
; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31
-; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31
+; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0
; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT: ret
%1 = sdiv <4 x i32> %x, <i32 1, i32 4, i32 8, i32 16>
@@ -483,10 +468,10 @@ define <8 x i32> @combine_vec_sdiv_by_pow2b_v8i32(<8 x i32> %x) {
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: mov w9, #0 // =0x0
-; CHECK-GI-NEXT: sshr v4.4s, v0.4s, #31
+; CHECK-GI-NEXT: cmlt v4.4s, v0.4s, #0
; CHECK-GI-NEXT: fmov s2, w8
; CHECK-GI-NEXT: adrp x8, .LCPI19_0
-; CHECK-GI-NEXT: sshr v5.4s, v1.4s, #31
+; CHECK-GI-NEXT: cmlt v5.4s, v1.4s, #0
; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI19_0]
; CHECK-GI-NEXT: adrp x8, .LCPI19_1
; CHECK-GI-NEXT: mov v2.h[1], w9
@@ -503,7 +488,7 @@ define <8 x i32> @combine_vec_sdiv_by_pow2b_v8i32(<8 x i32> %x) {
; CHECK-GI-NEXT: sshl v3.4s, v3.4s, v5.4s
; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0
; CHECK-GI-NEXT: shl v2.4s, v2.4s, #31
-; CHECK-GI-NEXT: sshr v2.4s, v2.4s, #31
+; CHECK-GI-NEXT: cmlt v2.4s, v2.4s, #0
; CHECK-GI-NEXT: bif v0.16b, v4.16b, v2.16b
; CHECK-GI-NEXT: bif v1.16b, v3.16b, v2.16b
; CHECK-GI-NEXT: ret
@@ -546,13 +531,13 @@ define <16 x i32> @combine_vec_sdiv_by_pow2b_v16i32(<16 x i32> %x) {
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: mov w9, #0 // =0x0
-; CHECK-GI-NEXT: sshr v6.4s, v0.4s, #31
+; CHECK-GI-NEXT: cmlt v6.4s, v0.4s, #0
; CHECK-GI-NEXT: fmov s4, w8
; CHECK-GI-NEXT: adrp x8, .LCPI20_0
-; CHECK-GI-NEXT: sshr v7.4s, v1.4s, #31
+; CHECK-GI-NEXT: cmlt v7.4s, v1.4s, #0
; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI20_0]
-; CHECK-GI-NEXT: sshr v16.4s, v2.4s, #31
-; CHECK-GI-NEXT: sshr v17.4s, v3.4s, #31
+; CHECK-GI-NEXT: cmlt v16.4s, v2.4s, #0
+; CHECK-GI-NEXT: cmlt v17.4s, v3.4s, #0
; CHECK-GI-NEXT: adrp x8, .LCPI20_1
; CHECK-GI-NEXT: mov v4.h[1], w9
; CHECK-GI-NEXT: neg v5.4s, v5.4s
@@ -574,7 +559,7 @@ define <16 x i32> @combine_vec_sdiv_by_pow2b_v16i32(<16 x i32> %x) {
; CHECK-GI-NEXT: sshl v5.4s, v5.4s, v17.4s
; CHECK-GI-NEXT: ushll v4.4s, v4.4h, #0
; CHECK-GI-NEXT: shl v4.4s, v4.4s, #31
-; CHECK-GI-NEXT: sshr v4.4s, v4.4s, #31
+; CHECK-GI-NEXT: cmlt v4.4s, v4.4s, #0
; CHECK-GI-NEXT: bif v0.16b, v6.16b, v4.16b
; CHECK-GI-NEXT: bif v1.16b, v7.16b, v4.16b
; CHECK-GI-NEXT: bif v2.16b, v16.16b, v4.16b
@@ -603,7 +588,7 @@ define <2 x i64> @combine_vec_sdiv_by_pow2b_v2i64(<2 x i64> %x) {
; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v2i64:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI21_1
-; CHECK-GI-NEXT: sshr v2.2d, v0.2d, #63
+; CHECK-GI-NEXT: cmlt v2.2d, v0.2d, #0
; CHECK-GI-NEXT: adrp x9, .LCPI21_0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI21_1]
; CHECK-GI-NEXT: adrp x8, .LCPI21_2
@@ -615,7 +600,7 @@ define <2 x i64> @combine_vec_sdiv_by_pow2b_v2i64(<2 x i64> %x) {
; CHECK-GI-NEXT: neg v2.2d, v2.2d
; CHECK-GI-NEXT: add v1.2d, v0.2d, v1.2d
; CHECK-GI-NEXT: sshl v1.2d, v1.2d, v2.2d
-; CHECK-GI-NEXT: sshr v2.2d, v3.2d, #63
+; CHECK-GI-NEXT: cmlt v2.2d, v3.2d, #0
; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-GI-NEXT: ret
%1 = sdiv <2 x i64> %x, <i64 1, i64 4>
@@ -649,7 +634,7 @@ define <4 x i64> @combine_vec_sdiv_by_pow2b_v4i64(<4 x i64> %x) {
; CHECK-GI-LABEL: combine_vec_sdiv_by_pow2b_v4i64:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI22_2
-; CHECK-GI-NEXT: sshr v3.2d, v0.2d, #63
+; CHECK-GI-NEXT: cmlt v3.2d, v0.2d, #0
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI22_2]
; CHECK-GI-NEXT: adrp x8, .LCPI22_1
; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI22_1]
@@ -662,13 +647,13 @@ define <4 x i64> @combine_vec_sdiv_by_pow2b_v4i64(<4 x i64> %x) {
; CHECK-GI-NEXT: adrp x8, .LCPI22_3
; CHECK-GI-NEXT: neg v5.2d, v5.2d
; CHECK-GI-NEXT: ushl v2.2d, v3.2d, v2.2d
-; CHECK-GI-NEXT: sshr v3.2d, v1.2d, #63
+; CHECK-GI-NEXT: cmlt v3.2d, v1.2d, #0
; CHECK-GI-NEXT: shl v6.2d, v6.2d, #63
; CHECK-GI-NEXT: add v2.2d, v0.2d, v2.2d
; CHECK-GI-NEXT: ushl v3.2d, v3.2d, v4.2d
; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI22_3]
; CHECK-GI-NEXT: sshl v2.2d, v2.2d, v5.2d
-; CHECK-GI-NEXT: sshr v5.2d, v6.2d, #63
+; CHECK-GI-NEXT: cmlt v5.2d, v6.2d, #0
; CHECK-GI-NEXT: add v1.2d, v1.2d, v3.2d
; CHECK-GI-NEXT: neg v3.2d, v4.2d
; CHECK-GI-NEXT: bif v0.16b, v2.16b, v5.16b
@@ -715,13 +700,13 @@ define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) {
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: mov w9, #0 // =0x0
-; CHECK-GI-NEXT: sshr v7.2d, v0.2d, #63
+; CHECK-GI-NEXT: cmlt v7.2d, v0.2d, #0
; CHECK-GI-NEXT: fmov s4, w8
; CHECK-GI-NEXT: adrp x8, .LCPI23_1
-; CHECK-GI-NEXT: sshr v16.2d, v1.2d, #63
+; CHECK-GI-NEXT: cmlt v16.2d, v1.2d, #0
; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI23_1]
-; CHECK-GI-NEXT: sshr v17.2d, v2.2d, #63
-; CHECK-GI-NEXT: sshr v18.2d, v3.2d, #63
+; CHECK-GI-NEXT: cmlt v17.2d, v2.2d, #0
+; CHECK-GI-NEXT: cmlt v18.2d, v3.2d, #0
; CHECK-GI-NEXT: adrp x8, .LCPI23_3
; CHECK-GI-NEXT: mov v4.h[1], w9
; CHECK-GI-NEXT: neg v5.2d, v5.2d
@@ -754,9 +739,9 @@ define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) {
; CHECK-GI-NEXT: shl v4.2d, v4.2d, #63
; CHECK-GI-NEXT: sshl v16.2d, v16.2d, v20.2d
; CHECK-GI-NEXT: sshl v6.2d, v6.2d, v20.2d
-; CHECK-GI-NEXT: sshr v17.2d, v17.2d, #63
-; CHECK-GI-NEXT: sshr v18.2d, v18.2d, #63
-; CHECK-GI-NEXT: sshr v4.2d, v4.2d, #63
+; CHECK-GI-NEXT: cmlt v17.2d, v17.2d, #0
+; CHECK-GI-NEXT: cmlt v18.2d, v18.2d, #0
+; CHECK-GI-NEXT: cmlt v4.2d, v4.2d, #0
; CHECK-GI-NEXT: bif v0.16b, v7.16b, v17.16b
; CHECK-GI-NEXT: bif v1.16b, v16.16b, v18.16b
; CHECK-GI-NEXT: bif v2.16b, v5.16b, v4.16b
@@ -792,7 +777,7 @@ define <4 x i32> @combine_vec_sdiv_by_pow2b_PosAndNeg(<4 x i32> %x) {
; CHECK-GI-NEXT: adrp x10, .LCPI24_0
; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: ldr q2, [x10, :lo12:.LCPI24_0]
-; CHECK-GI-NEXT: sshr v3.4s, v0.4s, #31
+; CHECK-GI-NEXT: cmlt v3.4s, v0.4s, #0
; CHECK-GI-NEXT: fmov s4, w9
; CHECK-GI-NEXT: adrp x10, .LCPI24_1
; CHECK-GI-NEXT: neg v2.4s, v2.4s
@@ -807,10 +792,10 @@ define <4 x i32> @combine_vec_sdiv_by_pow2b_PosAndNeg(<4 x i32> %x) {
; CHECK-GI-NEXT: mov v1.s[3], w9
; CHECK-GI-NEXT: sshl v2.4s, v2.4s, v3.4s
; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31
-; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31
+; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0
; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT: shl v1.4s, v4.4s, #31
-; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31
+; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0
; CHECK-GI-NEXT: neg v2.4s, v0.4s
; CHECK-GI-NEXT: bit v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT: ret
@@ -871,7 +856,7 @@ define <16 x i8> @non_splat_minus_one_divisor_0(<16 x i8> %A) {
; CHECK-GI-NEXT: neg v2.16b, v0.16b
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI25_0]
; CHECK-GI-NEXT: shl v1.16b, v1.16b, #7
-; CHECK-GI-NEXT: sshr v1.16b, v1.16b, #7
+; CHECK-GI-NEXT: cmlt v1.16b, v1.16b, #0
; CHECK-GI-NEXT: bit v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT: ret
%div = sdiv <16 x i8> %A, <i8 -1, i8 -1, i8 1, i8 -1, i8 -1, i8 -1, i8 1, i8 -1, i8 -1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -901,7 +886,7 @@ define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) {
; CHECK-GI-LABEL: non_splat_minus_one_divisor_1:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: adrp x8, .LCPI26_2
-; CHECK-GI-NEXT: sshr v2.16b, v0.16b, #7
+; CHECK-GI-NEXT: cmlt v2.16b, v0.16b, #0
; CHECK-GI-NEXT: adrp x9, .LCPI26_1
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI26_2]
; CHECK-GI-NEXT: adrp x8, .LCPI26_3
@@ -914,11 +899,11 @@ define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) {
; CHECK-GI-NEXT: neg v2.16b, v2.16b
; CHECK-GI-NEXT: add v1.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: sshl v1.16b, v1.16b, v2.16b
-; CHECK-GI-NEXT: sshr v2.16b, v3.16b, #7
+; CHECK-GI-NEXT: cmlt v2.16b, v3.16b, #0
; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI26_0]
; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-GI-NEXT: shl v1.16b, v3.16b, #7
-; CHECK-GI-NEXT: sshr v1.16b, v1.16b, #7
+; CHECK-GI-NEXT: cmlt v1.16b, v1.16b, #0
; CHECK-GI-NEXT: neg v2.16b, v0.16b
; CHECK-GI-NEXT: bit v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT: ret
@@ -954,7 +939,7 @@ define <4 x i32> @non_splat_minus_one_divisor_2(<4 x i32> %A) {
; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: ldr q2, [x9, :lo12:.LCPI27_0]
; CHECK-GI-NEXT: fmov s4, w8
-; CHECK-GI-NEXT: sshr v3.4s, v0.4s, #31
+; CHECK-GI-NEXT: cmlt v3.4s, v0.4s, #0
; CHECK-GI-NEXT: adrp x9, .LCPI27_1
; CHECK-GI-NEXT: neg v2.4s, v2.4s
; CHECK-GI-NEXT: mov v1.s[1], w8
@@ -969,10 +954,10 @@ define <4 x i32> @non_splat_minus_one_divisor_2(<4 x i32> %A) {
; CHECK-GI-NEXT: sshl v2.4s, v2.4s, v3.4s
; CHECK-GI-NEXT: mov v4.s[3], w8
; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31
-; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31
+; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0
; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT: shl v1.4s, v4.4s, #31
-; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31
+; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0
; CHECK-GI-NEXT: neg v2.4s, v0.4s
; CHECK-GI-NEXT: bit v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT: ret
@@ -1207,7 +1192,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform7(<8 x i16> %x) {
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI34_0]
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: shl v1.8h, v1.8h, #15
-; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #15
+; CHECK-GI-NEXT: cmlt v1.8h, v1.8h, #0
; CHECK-GI-NEXT: bit v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT: ret
%1 = sdiv <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 1, i16 1, i16 1, i16 1>
diff --git a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
index 121cc30..babb4ed 100644
--- a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
@@ -605,7 +605,7 @@ define i32 @extract_v4i32_select(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %c
; CHECK-GI-NEXT: mov w8, w0
; CHECK-GI-NEXT: and x8, x8, #0x3
; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31
-; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31
+; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0
; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT: str q0, [sp]
; CHECK-GI-NEXT: ldr w0, [x9, x8, lsl #2]
@@ -634,7 +634,7 @@ define i32 @extract_v4i32_select_const(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x
; CHECK-GI-NEXT: adrp x8, .LCPI23_0
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI23_0]
; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31
-; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31
+; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0
; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT: mov s0, v0.s[2]
; CHECK-GI-NEXT: fmov w0, s0
diff --git a/llvm/test/CodeGen/AArch64/fcmp.ll b/llvm/test/CodeGen/AArch64/fcmp.ll
index 6d673f1..30fb82e 100644
--- a/llvm/test/CodeGen/AArch64/fcmp.ll
+++ b/llvm/test/CodeGen/AArch64/fcmp.ll
@@ -661,7 +661,7 @@ define <2 x double> @v2f128_double(<2 x fp128> %a, <2 x fp128> %b, <2 x double>
; CHECK-GI-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: shl v0.2d, v0.2d, #63
-; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #63
+; CHECK-GI-NEXT: cmlt v0.2d, v0.2d, #0
; CHECK-GI-NEXT: bsl v0.16b, v1.16b, v2.16b
; CHECK-GI-NEXT: add sp, sp, #80
; CHECK-GI-NEXT: ret
@@ -1540,7 +1540,7 @@ define <7 x i32> @v7f16_i32(<7 x half> %a, <7 x half> %b, <7 x i32> %d, <7 x i32
; CHECK-GI-FP16-NEXT: shl v0.4s, v0.4s, #31
; CHECK-GI-FP16-NEXT: mov v1.s[2], w8
; CHECK-GI-FP16-NEXT: mov w8, #-1 // =0xffffffff
-; CHECK-GI-FP16-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-GI-FP16-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-GI-FP16-NEXT: fmov s4, w8
; CHECK-GI-FP16-NEXT: mov v4.s[1], w8
; CHECK-GI-FP16-NEXT: ushl v1.4s, v1.4s, v2.4s
@@ -1602,7 +1602,7 @@ define <4 x i32> @v4f16_i32(<4 x half> %a, <4 x half> %b, <4 x i32> %d, <4 x i32
; CHECK-GI-FP16-NEXT: fcmgt v0.4h, v1.4h, v0.4h
; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-FP16-NEXT: shl v0.4s, v0.4s, #31
-; CHECK-GI-FP16-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-GI-FP16-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-GI-FP16-NEXT: bsl v0.16b, v2.16b, v3.16b
; CHECK-GI-FP16-NEXT: ret
entry:
@@ -1657,8 +1657,8 @@ define <8 x i32> @v8f16_i32(<8 x half> %a, <8 x half> %b, <8 x i32> %d, <8 x i32
; CHECK-GI-FP16-NEXT: ushll2 v0.4s, v0.8h, #0
; CHECK-GI-FP16-NEXT: shl v1.4s, v1.4s, #31
; CHECK-GI-FP16-NEXT: shl v0.4s, v0.4s, #31
-; CHECK-GI-FP16-NEXT: sshr v1.4s, v1.4s, #31
-; CHECK-GI-FP16-NEXT: sshr v6.4s, v0.4s, #31
+; CHECK-GI-FP16-NEXT: cmlt v1.4s, v1.4s, #0
+; CHECK-GI-FP16-NEXT: cmlt v6.4s, v0.4s, #0
; CHECK-GI-FP16-NEXT: mov v0.16b, v1.16b
; CHECK-GI-FP16-NEXT: mov v1.16b, v6.16b
; CHECK-GI-FP16-NEXT: bsl v0.16b, v2.16b, v4.16b
@@ -1748,10 +1748,10 @@ define <16 x i32> @v16f16_i32(<16 x half> %a, <16 x half> %b, <16 x i32> %d, <16
; CHECK-GI-FP16-NEXT: shl v0.4s, v0.4s, #31
; CHECK-GI-FP16-NEXT: shl v3.4s, v3.4s, #31
; CHECK-GI-FP16-NEXT: shl v1.4s, v1.4s, #31
-; CHECK-GI-FP16-NEXT: sshr v2.4s, v2.4s, #31
-; CHECK-GI-FP16-NEXT: sshr v16.4s, v0.4s, #31
-; CHECK-GI-FP16-NEXT: sshr v3.4s, v3.4s, #31
-; CHECK-GI-FP16-NEXT: sshr v17.4s, v1.4s, #31
+; CHECK-GI-FP16-NEXT: cmlt v2.4s, v2.4s, #0
+; CHECK-GI-FP16-NEXT: cmlt v16.4s, v0.4s, #0
+; CHECK-GI-FP16-NEXT: cmlt v3.4s, v3.4s, #0
+; CHECK-GI-FP16-NEXT: cmlt v17.4s, v1.4s, #0
; CHECK-GI-FP16-NEXT: ldp q0, q1, [sp]
; CHECK-GI-FP16-NEXT: bit v0.16b, v4.16b, v2.16b
; CHECK-GI-FP16-NEXT: mov v2.16b, v3.16b
diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat.ll b/llvm/test/CodeGen/AArch64/fpclamptosat.ll
index 00de153..24be923 100644
--- a/llvm/test/CodeGen/AArch64/fpclamptosat.ll
+++ b/llvm/test/CodeGen/AArch64/fpclamptosat.ll
@@ -111,14 +111,14 @@ entry:
ret i32 %conv6
}
-define i32 @utesth_f16i32(half %x) {
-; CHECK-CVT-LABEL: utesth_f16i32:
+define i32 @utest_f16i32(half %x) {
+; CHECK-CVT-LABEL: utest_f16i32:
; CHECK-CVT: // %bb.0: // %entry
; CHECK-CVT-NEXT: fcvt s0, h0
; CHECK-CVT-NEXT: fcvtzu w0, s0
; CHECK-CVT-NEXT: ret
;
-; CHECK-FP16-LABEL: utesth_f16i32:
+; CHECK-FP16-LABEL: utest_f16i32:
; CHECK-FP16: // %bb.0: // %entry
; CHECK-FP16-NEXT: fcvtzu w0, h0
; CHECK-FP16-NEXT: ret
@@ -298,8 +298,8 @@ entry:
ret i16 %conv6
}
-define i16 @utesth_f16i16(half %x) {
-; CHECK-CVT-LABEL: utesth_f16i16:
+define i16 @utest_f16i16(half %x) {
+; CHECK-CVT-LABEL: utest_f16i16:
; CHECK-CVT: // %bb.0: // %entry
; CHECK-CVT-NEXT: fcvt s0, h0
; CHECK-CVT-NEXT: mov w9, #65535 // =0xffff
@@ -308,7 +308,7 @@ define i16 @utesth_f16i16(half %x) {
; CHECK-CVT-NEXT: csel w0, w8, w9, lo
; CHECK-CVT-NEXT: ret
;
-; CHECK-FP16-LABEL: utesth_f16i16:
+; CHECK-FP16-LABEL: utest_f16i16:
; CHECK-FP16: // %bb.0: // %entry
; CHECK-FP16-NEXT: fcvtzu w8, h0
; CHECK-FP16-NEXT: mov w9, #65535 // =0xffff
@@ -493,8 +493,8 @@ entry:
ret i64 %conv6
}
-define i64 @utesth_f16i64(half %x) {
-; CHECK-LABEL: utesth_f16i64:
+define i64 @utest_f16i64(half %x) {
+; CHECK-LABEL: utest_f16i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
@@ -636,14 +636,14 @@ entry:
ret i32 %conv6
}
-define i32 @utesth_f16i32_mm(half %x) {
-; CHECK-CVT-LABEL: utesth_f16i32_mm:
+define i32 @utest_f16i32_mm(half %x) {
+; CHECK-CVT-LABEL: utest_f16i32_mm:
; CHECK-CVT: // %bb.0: // %entry
; CHECK-CVT-NEXT: fcvt s0, h0
; CHECK-CVT-NEXT: fcvtzu w0, s0
; CHECK-CVT-NEXT: ret
;
-; CHECK-FP16-LABEL: utesth_f16i32_mm:
+; CHECK-FP16-LABEL: utest_f16i32_mm:
; CHECK-FP16: // %bb.0: // %entry
; CHECK-FP16-NEXT: fcvtzu w0, h0
; CHECK-FP16-NEXT: ret
@@ -808,8 +808,8 @@ entry:
ret i16 %conv6
}
-define i16 @utesth_f16i16_mm(half %x) {
-; CHECK-CVT-LABEL: utesth_f16i16_mm:
+define i16 @utest_f16i16_mm(half %x) {
+; CHECK-CVT-LABEL: utest_f16i16_mm:
; CHECK-CVT: // %bb.0: // %entry
; CHECK-CVT-NEXT: fcvt s0, h0
; CHECK-CVT-NEXT: mov w9, #65535 // =0xffff
@@ -818,7 +818,7 @@ define i16 @utesth_f16i16_mm(half %x) {
; CHECK-CVT-NEXT: csel w0, w8, w9, lo
; CHECK-CVT-NEXT: ret
;
-; CHECK-FP16-LABEL: utesth_f16i16_mm:
+; CHECK-FP16-LABEL: utest_f16i16_mm:
; CHECK-FP16: // %bb.0: // %entry
; CHECK-FP16-NEXT: fcvtzu w8, h0
; CHECK-FP16-NEXT: mov w9, #65535 // =0xffff
@@ -986,8 +986,8 @@ entry:
ret i64 %conv6
}
-define i64 @utesth_f16i64_mm(half %x) {
-; CHECK-LABEL: utesth_f16i64_mm:
+define i64 @utest_f16i64_mm(half %x) {
+; CHECK-LABEL: utest_f16i64_mm:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
@@ -1026,6 +1026,29 @@ entry:
ret i64 %conv6
}
+; i32 non saturate
+
+define i32 @ustest_f16i32_nsat(half %x) {
+; CHECK-CVT-LABEL: ustest_f16i32_nsat:
+; CHECK-CVT: // %bb.0:
+; CHECK-CVT-NEXT: fcvt s0, h0
+; CHECK-CVT-NEXT: fcvtzs w8, s0
+; CHECK-CVT-NEXT: and w8, w8, w8, asr #31
+; CHECK-CVT-NEXT: bic w0, w8, w8, asr #31
+; CHECK-CVT-NEXT: ret
+;
+; CHECK-FP16-LABEL: ustest_f16i32_nsat:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: fcvtzs w8, h0
+; CHECK-FP16-NEXT: and w8, w8, w8, asr #31
+; CHECK-FP16-NEXT: bic w0, w8, w8, asr #31
+; CHECK-FP16-NEXT: ret
+ %conv = fptosi half %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+ ret i32 %spec.store.select7
+}
+
declare i32 @llvm.smin.i32(i32, i32)
declare i32 @llvm.smax.i32(i32, i32)
declare i32 @llvm.umin.i32(i32, i32)
diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
index b09a867..637c028 100644
--- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
@@ -321,20 +321,20 @@ entry:
ret <4 x i32> %conv6
}
-define <4 x i32> @utesth_f16i32(<4 x half> %x) {
-; CHECK-CVT-SD-LABEL: utesth_f16i32:
+define <4 x i32> @utest_f16i32(<4 x half> %x) {
+; CHECK-CVT-SD-LABEL: utest_f16i32:
; CHECK-CVT-SD: // %bb.0: // %entry
; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
; CHECK-CVT-SD-NEXT: fcvtzu v0.4s, v0.4s
; CHECK-CVT-SD-NEXT: ret
;
-; CHECK-FP16-SD-LABEL: utesth_f16i32:
+; CHECK-FP16-SD-LABEL: utest_f16i32:
; CHECK-FP16-SD: // %bb.0: // %entry
; CHECK-FP16-SD-NEXT: fcvtl v0.4s, v0.4h
; CHECK-FP16-SD-NEXT: fcvtzu v0.4s, v0.4s
; CHECK-FP16-SD-NEXT: ret
;
-; CHECK-CVT-GI-LABEL: utesth_f16i32:
+; CHECK-CVT-GI-LABEL: utest_f16i32:
; CHECK-CVT-GI: // %bb.0: // %entry
; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h
; CHECK-CVT-GI-NEXT: movi v1.2d, #0x000000ffffffff
@@ -349,7 +349,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-CVT-GI-NEXT: uzp1 v0.4s, v2.4s, v0.4s
; CHECK-CVT-GI-NEXT: ret
;
-; CHECK-FP16-GI-LABEL: utesth_f16i32:
+; CHECK-FP16-GI-LABEL: utest_f16i32:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-FP16-GI-NEXT: mov h2, v0.h[1]
@@ -614,8 +614,8 @@ entry:
ret <8 x i16> %conv6
}
-define <8 x i16> @utesth_f16i16(<8 x half> %x) {
-; CHECK-CVT-LABEL: utesth_f16i16:
+define <8 x i16> @utest_f16i16(<8 x half> %x) {
+; CHECK-CVT-LABEL: utest_f16i16:
; CHECK-CVT: // %bb.0: // %entry
; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
@@ -625,12 +625,12 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-CVT-NEXT: uqxtn2 v0.8h, v2.4s
; CHECK-CVT-NEXT: ret
;
-; CHECK-FP16-SD-LABEL: utesth_f16i16:
+; CHECK-FP16-SD-LABEL: utest_f16i16:
; CHECK-FP16-SD: // %bb.0: // %entry
; CHECK-FP16-SD-NEXT: fcvtzu v0.8h, v0.8h
; CHECK-FP16-SD-NEXT: ret
;
-; CHECK-FP16-GI-LABEL: utesth_f16i16:
+; CHECK-FP16-GI-LABEL: utest_f16i16:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h
; CHECK-FP16-GI-NEXT: fcvtl2 v0.4s, v0.8h
@@ -1746,8 +1746,8 @@ entry:
ret <2 x i64> %conv6
}
-define <2 x i64> @utesth_f16i64(<2 x half> %x) {
-; CHECK-CVT-SD-LABEL: utesth_f16i64:
+define <2 x i64> @utest_f16i64(<2 x half> %x) {
+; CHECK-CVT-SD-LABEL: utest_f16i64:
; CHECK-CVT-SD: // %bb.0: // %entry
; CHECK-CVT-SD-NEXT: sub sp, sp, #48
; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
@@ -1777,7 +1777,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
; CHECK-CVT-SD-NEXT: add sp, sp, #48
; CHECK-CVT-SD-NEXT: ret
;
-; CHECK-FP16-SD-LABEL: utesth_f16i64:
+; CHECK-FP16-SD-LABEL: utest_f16i64:
; CHECK-FP16-SD: // %bb.0: // %entry
; CHECK-FP16-SD-NEXT: sub sp, sp, #48
; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
@@ -1807,7 +1807,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
; CHECK-FP16-SD-NEXT: add sp, sp, #48
; CHECK-FP16-SD-NEXT: ret
;
-; CHECK-CVT-GI-LABEL: utesth_f16i64:
+; CHECK-CVT-GI-LABEL: utest_f16i64:
; CHECK-CVT-GI: // %bb.0: // %entry
; CHECK-CVT-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-CVT-GI-NEXT: mov h1, v0.h[1]
@@ -1819,7 +1819,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
; CHECK-CVT-GI-NEXT: mov v0.d[1], x9
; CHECK-CVT-GI-NEXT: ret
;
-; CHECK-FP16-GI-LABEL: utesth_f16i64:
+; CHECK-FP16-GI-LABEL: utest_f16i64:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-FP16-GI-NEXT: mov h1, v0.h[1]
@@ -2307,20 +2307,20 @@ entry:
ret <4 x i32> %conv6
}
-define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
-; CHECK-CVT-SD-LABEL: utesth_f16i32_mm:
+define <4 x i32> @utest_f16i32_mm(<4 x half> %x) {
+; CHECK-CVT-SD-LABEL: utest_f16i32_mm:
; CHECK-CVT-SD: // %bb.0: // %entry
; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
; CHECK-CVT-SD-NEXT: fcvtzu v0.4s, v0.4s
; CHECK-CVT-SD-NEXT: ret
;
-; CHECK-FP16-SD-LABEL: utesth_f16i32_mm:
+; CHECK-FP16-SD-LABEL: utest_f16i32_mm:
; CHECK-FP16-SD: // %bb.0: // %entry
; CHECK-FP16-SD-NEXT: fcvtl v0.4s, v0.4h
; CHECK-FP16-SD-NEXT: fcvtzu v0.4s, v0.4s
; CHECK-FP16-SD-NEXT: ret
;
-; CHECK-CVT-GI-LABEL: utesth_f16i32_mm:
+; CHECK-CVT-GI-LABEL: utest_f16i32_mm:
; CHECK-CVT-GI: // %bb.0: // %entry
; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h
; CHECK-CVT-GI-NEXT: movi v1.2d, #0x000000ffffffff
@@ -2335,7 +2335,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
; CHECK-CVT-GI-NEXT: uzp1 v0.4s, v2.4s, v0.4s
; CHECK-CVT-GI-NEXT: ret
;
-; CHECK-FP16-GI-LABEL: utesth_f16i32_mm:
+; CHECK-FP16-GI-LABEL: utest_f16i32_mm:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-FP16-GI-NEXT: mov h2, v0.h[1]
@@ -2585,8 +2585,8 @@ entry:
ret <8 x i16> %conv6
}
-define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
-; CHECK-CVT-LABEL: utesth_f16i16_mm:
+define <8 x i16> @utest_f16i16_mm(<8 x half> %x) {
+; CHECK-CVT-LABEL: utest_f16i16_mm:
; CHECK-CVT: // %bb.0: // %entry
; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h
; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h
@@ -2596,12 +2596,12 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-CVT-NEXT: uqxtn2 v0.8h, v2.4s
; CHECK-CVT-NEXT: ret
;
-; CHECK-FP16-SD-LABEL: utesth_f16i16_mm:
+; CHECK-FP16-SD-LABEL: utest_f16i16_mm:
; CHECK-FP16-SD: // %bb.0: // %entry
; CHECK-FP16-SD-NEXT: fcvtzu v0.8h, v0.8h
; CHECK-FP16-SD-NEXT: ret
;
-; CHECK-FP16-GI-LABEL: utesth_f16i16_mm:
+; CHECK-FP16-GI-LABEL: utest_f16i16_mm:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: fcvtl v1.4s, v0.4h
; CHECK-FP16-GI-NEXT: fcvtl2 v0.4s, v0.8h
@@ -3694,8 +3694,8 @@ entry:
ret <2 x i64> %conv6
}
-define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
-; CHECK-CVT-SD-LABEL: utesth_f16i64_mm:
+define <2 x i64> @utest_f16i64_mm(<2 x half> %x) {
+; CHECK-CVT-SD-LABEL: utest_f16i64_mm:
; CHECK-CVT-SD: // %bb.0: // %entry
; CHECK-CVT-SD-NEXT: sub sp, sp, #48
; CHECK-CVT-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
@@ -3725,7 +3725,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
; CHECK-CVT-SD-NEXT: add sp, sp, #48
; CHECK-CVT-SD-NEXT: ret
;
-; CHECK-FP16-SD-LABEL: utesth_f16i64_mm:
+; CHECK-FP16-SD-LABEL: utest_f16i64_mm:
; CHECK-FP16-SD: // %bb.0: // %entry
; CHECK-FP16-SD-NEXT: sub sp, sp, #48
; CHECK-FP16-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
@@ -3755,7 +3755,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
; CHECK-FP16-SD-NEXT: add sp, sp, #48
; CHECK-FP16-SD-NEXT: ret
;
-; CHECK-CVT-GI-LABEL: utesth_f16i64_mm:
+; CHECK-CVT-GI-LABEL: utest_f16i64_mm:
; CHECK-CVT-GI: // %bb.0: // %entry
; CHECK-CVT-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-CVT-GI-NEXT: mov h1, v0.h[1]
@@ -3767,7 +3767,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
; CHECK-CVT-GI-NEXT: mov v0.d[1], x9
; CHECK-CVT-GI-NEXT: ret
;
-; CHECK-FP16-GI-LABEL: utesth_f16i64_mm:
+; CHECK-FP16-GI-LABEL: utest_f16i64_mm:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-FP16-GI-NEXT: mov h1, v0.h[1]
@@ -3941,6 +3941,51 @@ entry:
ret <2 x i64> %conv6
}
+; i32 non saturate
+
+define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) {
+; CHECK-CVT-SD-LABEL: ustest_f16i32_nsat:
+; CHECK-CVT-SD: // %bb.0: // %entry
+; CHECK-CVT-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT: movi v1.2d, #0000000000000000
+; CHECK-CVT-SD-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-CVT-SD-NEXT: smin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: smax v0.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT: ret
+;
+; CHECK-FP16-SD-LABEL: ustest_f16i32_nsat:
+; CHECK-FP16-SD: // %bb.0: // %entry
+; CHECK-FP16-SD-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-FP16-SD-NEXT: movi v1.2d, #0000000000000000
+; CHECK-FP16-SD-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-FP16-SD-NEXT: smin v0.4s, v0.4s, v1.4s
+; CHECK-FP16-SD-NEXT: smax v0.4s, v0.4s, v1.4s
+; CHECK-FP16-SD-NEXT: ret
+;
+; CHECK-CVT-GI-LABEL: ustest_f16i32_nsat:
+; CHECK-CVT-GI: // %bb.0: // %entry
+; CHECK-CVT-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-CVT-GI-NEXT: movi v1.2d, #0000000000000000
+; CHECK-CVT-GI-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-CVT-GI-NEXT: smin v0.4s, v1.4s, v0.4s
+; CHECK-CVT-GI-NEXT: smax v0.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: ustest_f16i32_nsat:
+; CHECK-FP16-GI: // %bb.0: // %entry
+; CHECK-FP16-GI-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-FP16-GI-NEXT: movi v1.2d, #0000000000000000
+; CHECK-FP16-GI-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-FP16-GI-NEXT: smin v0.4s, v1.4s, v0.4s
+; CHECK-FP16-GI-NEXT: smax v0.4s, v0.4s, v1.4s
+; CHECK-FP16-GI-NEXT: ret
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i32>
+ %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv)
+ %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer)
+ ret <4 x i32> %spec.store.select7
+}
+
declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)
diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
index 0c84468f..2026959 100644
--- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
@@ -1110,7 +1110,7 @@ define <8 x i8> @vselect_constant_cond_zero_v8i8(<8 x i8> %a) {
; CHECK-GI-NEXT: adrp x8, .LCPI83_0
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI83_0]
; CHECK-GI-NEXT: shl v1.8b, v1.8b, #7
-; CHECK-GI-NEXT: sshr v1.8b, v1.8b, #7
+; CHECK-GI-NEXT: cmlt v1.8b, v1.8b, #0
; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%b = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x i8> %a, <8 x i8> zeroinitializer
@@ -1133,7 +1133,7 @@ define <4 x i16> @vselect_constant_cond_zero_v4i16(<4 x i16> %a) {
; CHECK-GI-NEXT: mov v1.h[2], w9
; CHECK-GI-NEXT: mov v1.h[3], w8
; CHECK-GI-NEXT: shl v1.4h, v1.4h, #15
-; CHECK-GI-NEXT: sshr v1.4h, v1.4h, #15
+; CHECK-GI-NEXT: cmlt v1.4h, v1.4h, #0
; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%b = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i16> %a, <4 x i16> zeroinitializer
@@ -1157,7 +1157,7 @@ define <4 x i32> @vselect_constant_cond_zero_v4i32(<4 x i32> %a) {
; CHECK-GI-NEXT: mov v1.s[2], w9
; CHECK-GI-NEXT: mov v1.s[3], w8
; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31
-; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31
+; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%b = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a, <4 x i32> zeroinitializer
@@ -1176,7 +1176,7 @@ define <8 x i8> @vselect_constant_cond_v8i8(<8 x i8> %a, <8 x i8> %b) {
; CHECK-GI-NEXT: adrp x8, .LCPI86_0
; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI86_0]
; CHECK-GI-NEXT: shl v2.8b, v2.8b, #7
-; CHECK-GI-NEXT: sshr v2.8b, v2.8b, #7
+; CHECK-GI-NEXT: cmlt v2.8b, v2.8b, #0
; CHECK-GI-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-GI-NEXT: ret
%c = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x i8> %a, <8 x i8> %b
@@ -1199,7 +1199,7 @@ define <4 x i16> @vselect_constant_cond_v4i16(<4 x i16> %a, <4 x i16> %b) {
; CHECK-GI-NEXT: mov v2.h[2], w9
; CHECK-GI-NEXT: mov v2.h[3], w8
; CHECK-GI-NEXT: shl v2.4h, v2.4h, #15
-; CHECK-GI-NEXT: sshr v2.4h, v2.4h, #15
+; CHECK-GI-NEXT: cmlt v2.4h, v2.4h, #0
; CHECK-GI-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-GI-NEXT: ret
%c = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i16> %a, <4 x i16> %b
@@ -1223,7 +1223,7 @@ define <4 x i32> @vselect_constant_cond_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-GI-NEXT: mov v2.s[2], w9
; CHECK-GI-NEXT: mov v2.s[3], w8
; CHECK-GI-NEXT: shl v2.4s, v2.4s, #31
-; CHECK-GI-NEXT: sshr v2.4s, v2.4s, #31
+; CHECK-GI-NEXT: cmlt v2.4s, v2.4s, #0
; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-GI-NEXT: ret
%c = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a, <4 x i32> %b
diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
index fb8b721..11b3b62 100644
--- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -966,7 +966,7 @@ define <8 x i8> @cmgez8xi8_alt(<8 x i8> %A) {
;
; CHECK-GI-LABEL: cmgez8xi8_alt:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.8b, v0.8b, #7
+; CHECK-GI-NEXT: cmlt v0.8b, v0.8b, #0
; CHECK-GI-NEXT: mvn v0.8b, v0.8b
; CHECK-GI-NEXT: ret
%sign = ashr <8 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
@@ -982,7 +982,7 @@ define <16 x i8> @cmgez16xi8_alt(<16 x i8> %A) {
;
; CHECK-GI-LABEL: cmgez16xi8_alt:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.16b, v0.16b, #7
+; CHECK-GI-NEXT: cmlt v0.16b, v0.16b, #0
; CHECK-GI-NEXT: mvn v0.16b, v0.16b
; CHECK-GI-NEXT: ret
%sign = ashr <16 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
@@ -998,7 +998,7 @@ define <4 x i16> @cmgez4xi16_alt(<4 x i16> %A) {
;
; CHECK-GI-LABEL: cmgez4xi16_alt:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.4h, v0.4h, #15
+; CHECK-GI-NEXT: cmlt v0.4h, v0.4h, #0
; CHECK-GI-NEXT: mvn v0.8b, v0.8b
; CHECK-GI-NEXT: ret
%sign = ashr <4 x i16> %A, <i16 15, i16 15, i16 15, i16 15>
@@ -1014,7 +1014,7 @@ define <8 x i16> @cmgez8xi16_alt(<8 x i16> %A) {
;
; CHECK-GI-LABEL: cmgez8xi16_alt:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #15
+; CHECK-GI-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-GI-NEXT: mvn v0.16b, v0.16b
; CHECK-GI-NEXT: ret
%sign = ashr <8 x i16> %A, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
@@ -1030,7 +1030,7 @@ define <2 x i32> @cmgez2xi32_alt(<2 x i32> %A) {
;
; CHECK-GI-LABEL: cmgez2xi32_alt:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.2s, v0.2s, #31
+; CHECK-GI-NEXT: cmlt v0.2s, v0.2s, #0
; CHECK-GI-NEXT: mvn v0.8b, v0.8b
; CHECK-GI-NEXT: ret
%sign = ashr <2 x i32> %A, <i32 31, i32 31>
@@ -1046,7 +1046,7 @@ define <4 x i32> @cmgez4xi32_alt(<4 x i32> %A) {
;
; CHECK-GI-LABEL: cmgez4xi32_alt:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-GI-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-GI-NEXT: mvn v0.16b, v0.16b
; CHECK-GI-NEXT: ret
%sign = ashr <4 x i32> %A, <i32 31, i32 31, i32 31, i32 31>
@@ -1062,7 +1062,7 @@ define <2 x i64> @cmgez2xi64_alt(<2 x i64> %A) {
;
; CHECK-GI-LABEL: cmgez2xi64_alt:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #63
+; CHECK-GI-NEXT: cmlt v0.2d, v0.2d, #0
; CHECK-GI-NEXT: mvn v0.16b, v0.16b
; CHECK-GI-NEXT: ret
%sign = ashr <2 x i64> %A, <i64 63, i64 63>
@@ -1503,99 +1503,64 @@ entry:
}
define <8 x i8> @cmltz8xi8_alt(<8 x i8> %A) {
-; CHECK-SD-LABEL: cmltz8xi8_alt:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v0.8b, v0.8b, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmltz8xi8_alt:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.8b, v0.8b, #7
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmltz8xi8_alt:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v0.8b, v0.8b, #0
+; CHECK-NEXT: ret
%A.lobit = ashr <8 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
ret <8 x i8> %A.lobit
}
define <16 x i8> @cmltz16xi8_alt(<16 x i8> %A) {
-; CHECK-SD-LABEL: cmltz16xi8_alt:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v0.16b, v0.16b, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmltz16xi8_alt:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.16b, v0.16b, #7
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmltz16xi8_alt:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
+; CHECK-NEXT: ret
%A.lobit = ashr <16 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
ret <16 x i8> %A.lobit
}
define <4 x i16> @cmltz4xi16_alt(<4 x i16> %A) {
-; CHECK-SD-LABEL: cmltz4xi16_alt:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v0.4h, v0.4h, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmltz4xi16_alt:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.4h, v0.4h, #15
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmltz4xi16_alt:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
+; CHECK-NEXT: ret
%A.lobit = ashr <4 x i16> %A, <i16 15, i16 15, i16 15, i16 15>
ret <4 x i16> %A.lobit
}
define <8 x i16> @cmltz8xi16_alt(<8 x i16> %A) {
-; CHECK-SD-LABEL: cmltz8xi16_alt:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v0.8h, v0.8h, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmltz8xi16_alt:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #15
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmltz8xi16_alt:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
+; CHECK-NEXT: ret
%A.lobit = ashr <8 x i16> %A, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
ret <8 x i16> %A.lobit
}
define <2 x i32> @cmltz2xi32_alt(<2 x i32> %A) {
-; CHECK-SD-LABEL: cmltz2xi32_alt:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v0.2s, v0.2s, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmltz2xi32_alt:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.2s, v0.2s, #31
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmltz2xi32_alt:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v0.2s, v0.2s, #0
+; CHECK-NEXT: ret
%A.lobit = ashr <2 x i32> %A, <i32 31, i32 31>
ret <2 x i32> %A.lobit
}
define <4 x i32> @cmltz4xi32_alt(<4 x i32> %A) {
-; CHECK-SD-LABEL: cmltz4xi32_alt:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v0.4s, v0.4s, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmltz4xi32_alt:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmltz4xi32_alt:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
+; CHECK-NEXT: ret
%A.lobit = ashr <4 x i32> %A, <i32 31, i32 31, i32 31, i32 31>
ret <4 x i32> %A.lobit
}
define <2 x i64> @cmltz2xi64_alt(<2 x i64> %A) {
-; CHECK-SD-LABEL: cmltz2xi64_alt:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v0.2d, v0.2d, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: cmltz2xi64_alt:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #63
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: cmltz2xi64_alt:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v0.2d, v0.2d, #0
+; CHECK-NEXT: ret
%A.lobit = ashr <2 x i64> %A, <i64 63, i64 63>
ret <2 x i64> %A.lobit
}
@@ -2523,7 +2488,7 @@ define <2 x i32> @fcmal2xfloat(<2 x float> %A, <2 x float> %B) {
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v0.2s, #1
; CHECK-GI-NEXT: shl v0.2s, v0.2s, #31
-; CHECK-GI-NEXT: sshr v0.2s, v0.2s, #31
+; CHECK-GI-NEXT: cmlt v0.2s, v0.2s, #0
; CHECK-GI-NEXT: ret
%tmp3 = fcmp true <2 x float> %A, %B
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
@@ -2542,7 +2507,7 @@ define <4 x i32> @fcmal4xfloat(<4 x float> %A, <4 x float> %B) {
; CHECK-GI-NEXT: dup v0.2s, w8
; CHECK-GI-NEXT: mov v0.d[1], v0.d[0]
; CHECK-GI-NEXT: shl v0.4s, v0.4s, #31
-; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-GI-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-GI-NEXT: ret
%tmp3 = fcmp true <4 x float> %A, %B
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
@@ -2559,7 +2524,7 @@ define <2 x i64> @fcmal2xdouble(<2 x double> %A, <2 x double> %B) {
; CHECK-GI-NEXT: adrp x8, .LCPI221_0
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI221_0]
; CHECK-GI-NEXT: shl v0.2d, v0.2d, #63
-; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #63
+; CHECK-GI-NEXT: cmlt v0.2d, v0.2d, #0
; CHECK-GI-NEXT: ret
%tmp3 = fcmp true <2 x double> %A, %B
%tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
@@ -2589,7 +2554,7 @@ define <4 x i32> @fcmnv4xfloat(<4 x float> %A, <4 x float> %B) {
; CHECK-GI-NEXT: mov v0.s[1], w8
; CHECK-GI-NEXT: mov v0.d[1], v0.d[0]
; CHECK-GI-NEXT: shl v0.4s, v0.4s, #31
-; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-GI-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-GI-NEXT: ret
%tmp3 = fcmp false <4 x float> %A, %B
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
diff --git a/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll b/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll
index 282f437..a8c55b4 100644
--- a/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll
+++ b/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll
@@ -465,7 +465,7 @@ define <8 x i16> @test_ushll_cmp(<8 x i8> %a, <8 x i8> %b) #0 {
; CHECK-GI-NEXT: movi v1.2d, #0xff00ff00ff00ff
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: shl v0.8h, v0.8h, #15
-; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #15
+; CHECK-GI-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%cmp.i = icmp eq <8 x i8> %a, %b
diff --git a/llvm/test/CodeGen/AArch64/select_cc.ll b/llvm/test/CodeGen/AArch64/select_cc.ll
index 483f6c2..b562340 100644
--- a/llvm/test/CodeGen/AArch64/select_cc.ll
+++ b/llvm/test/CodeGen/AArch64/select_cc.ll
@@ -98,7 +98,7 @@ define <2 x double> @select_olt_load_cmp(<2 x double> %a, ptr %src) {
; CHECK-GI-NEXT: fcmgt v1.2s, v1.2s, #0.0
; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
; CHECK-GI-NEXT: shl v1.2d, v1.2d, #63
-; CHECK-GI-NEXT: sshr v1.2d, v1.2d, #63
+; CHECK-GI-NEXT: cmlt v1.2d, v1.2d, #0
; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b
; CHECK-GI-NEXT: ret
entry:
@@ -136,7 +136,7 @@ define <4 x i32> @select_icmp_sgt(<4 x i32> %a, <4 x i8> %b) {
; CHECK-GI-NEXT: mov v2.s[2], w8
; CHECK-GI-NEXT: mov v2.s[3], w9
; CHECK-GI-NEXT: shl v1.4s, v2.4s, #31
-; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31
+; CHECK-GI-NEXT: cmlt v1.4s, v1.4s, #0
; CHECK-GI-NEXT: bic v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll b/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll
index 293b74ec..96a7a9d0 100644
--- a/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll
+++ b/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll
@@ -255,7 +255,7 @@ define <16 x i8> @sel_shift_bool_v16i8(<16 x i1> %t) {
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: shl v0.16b, v0.16b, #7
; CHECK-GI-NEXT: movi v1.16b, #128
-; CHECK-GI-NEXT: sshr v0.16b, v0.16b, #7
+; CHECK-GI-NEXT: cmlt v0.16b, v0.16b, #0
; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: ret
%shl = select <16 x i1> %t, <16 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>, <16 x i8> zeroinitializer
@@ -277,7 +277,7 @@ define <8 x i16> @sel_shift_bool_v8i16(<8 x i1> %t) {
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: movi v1.8h, #128
; CHECK-GI-NEXT: shl v0.8h, v0.8h, #15
-; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #15
+; CHECK-GI-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: ret
%shl= select <8 x i1> %t, <8 x i16> <i16 128, i16 128, i16 128, i16 128, i16 128, i16 128, i16 128, i16 128>, <8 x i16> zeroinitializer
@@ -299,7 +299,7 @@ define <4 x i32> @sel_shift_bool_v4i32(<4 x i1> %t) {
; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: movi v1.4s, #64
; CHECK-GI-NEXT: shl v0.4s, v0.4s, #31
-; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-GI-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: ret
%shl = select <4 x i1> %t, <4 x i32> <i32 64, i32 64, i32 64, i32 64>, <4 x i32> zeroinitializer
@@ -323,7 +323,7 @@ define <2 x i64> @sel_shift_bool_v2i64(<2 x i1> %t) {
; CHECK-GI-NEXT: adrp x8, .LCPI16_0
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI16_0]
; CHECK-GI-NEXT: shl v0.2d, v0.2d, #63
-; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #63
+; CHECK-GI-NEXT: cmlt v0.2d, v0.2d, #0
; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b
; CHECK-GI-NEXT: ret
%shl = select <2 x i1> %t, <2 x i64> <i64 65536, i64 65536>, <2 x i64> zeroinitializer
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-flat-scratch-init-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-flat-scratch-init-asan.ll
new file mode 100644
index 0000000..0d68762
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-flat-scratch-init-asan.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes='amdgpu-attributor' %s -o - | FileCheck %s
+
+@lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
+
+;.
+; CHECK: @lds_1 = internal addrspace(3) global [1 x i8] poison, align 4
+;.
+define amdgpu_kernel void @k0() #0 {
+; CHECK: Function Attrs: sanitize_address
+; CHECK-LABEL: define amdgpu_kernel void @k0(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: store i8 7, ptr addrspace(3) @lds_1, align 4
+; CHECK-NEXT: ret void
+;
+ store i8 7, ptr addrspace(3) @lds_1, align 4
+ ret void
+}
+
+attributes #0 = { sanitize_address }
+; "amdgpu-no-flat-scratch-init" attribute should not be present in attribute list
+;.
+; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
index a688b6f..fb566e5 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -707,8 +707,8 @@ attributes #6 = { "enqueued-block" }
; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR15:[0-9]+]] = { nounwind "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR19:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR20:[0-9]+]] = { "enqueued-block" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "enqueued-block" "uniform-work-group-size"="false" }
diff --git a/llvm/test/CodeGen/AMDGPU/true16-fold.mir b/llvm/test/CodeGen/AMDGPU/true16-fold.mir
index 93cc12f..9484417 100644
--- a/llvm/test/CodeGen/AMDGPU/true16-fold.mir
+++ b/llvm/test/CodeGen/AMDGPU/true16-fold.mir
@@ -57,6 +57,7 @@ body: |
%4:vgpr_16 = COPY %3:sgpr_lo16
%5:vgpr_32 = V_ALIGNBIT_B32_t16_e64 0, %0:sreg_32, 0, killed %1:sreg_32, 0, killed %4:vgpr_16, 0, 0, implicit $exec
S_ENDPGM 0, implicit %5
+...
---
name: fold_16bit_madmix_clamp
@@ -207,3 +208,27 @@ body: |
$vgpr0 = COPY %4
S_ENDPGM 0, implicit $vgpr0
...
+
+---
+name: fold_imm16_across_reg_sequence
+tracksRegLiveness: true
+registers:
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-LABEL: name: fold_imm16_across_reg_sequence
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_MOV_B16_t16_e64_:%[0-9]+]]:vgpr_16 = V_MOV_B16_t16_e64 0, -1, 0, implicit $exec
+ ; CHECK-NEXT: [[V_MOV_B16_t16_e64_1:%[0-9]+]]:vgpr_16 = V_MOV_B16_t16_e64 0, -1, 0, implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_MOV_B16_t16_e64_]], %subreg.lo16, [[V_MOV_B16_t16_e64_1]], %subreg.hi16
+ ; CHECK-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, -1, 0, -1, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F32_e64_]]
+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
+ %0:vgpr_16 = V_MOV_B16_t16_e64 0, -1, 0, implicit $exec
+ %1:vgpr_16 = V_MOV_B16_t16_e64 0, -1, 0, implicit $exec
+ %2:vgpr_32 = REG_SEQUENCE %0, %subreg.lo16, %1, %subreg.hi16
+ %3:vgpr_32 = nofpexcept V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec
+ $vgpr0 = COPY %3
+ S_ENDPGM 0, implicit $vgpr0
+...
diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll
index 8ab56b2..a6f0a03 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll
@@ -383,8 +383,8 @@ entry:
ret i32 %conv6
}
-define i32 @utesth_f16i32(half %x) {
-; SOFT-LABEL: utesth_f16i32:
+define i32 @utest_f16i32(half %x) {
+; SOFT-LABEL: utest_f16i32:
; SOFT: @ %bb.0: @ %entry
; SOFT-NEXT: .save {r7, lr}
; SOFT-NEXT: push {r7, lr}
@@ -400,7 +400,7 @@ define i32 @utesth_f16i32(half %x) {
; SOFT-NEXT: .LBB7_2: @ %entry
; SOFT-NEXT: pop {r7, pc}
;
-; VFP2-LABEL: utesth_f16i32:
+; VFP2-LABEL: utest_f16i32:
; VFP2: @ %bb.0: @ %entry
; VFP2-NEXT: .save {r7, lr}
; VFP2-NEXT: push {r7, lr}
@@ -411,7 +411,7 @@ define i32 @utesth_f16i32(half %x) {
; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: pop {r7, pc}
;
-; FULL-LABEL: utesth_f16i32:
+; FULL-LABEL: utest_f16i32:
; FULL: @ %bb.0: @ %entry
; FULL-NEXT: vcvt.u32.f16 s0, s0
; FULL-NEXT: vmov r0, s0
@@ -3985,6 +3985,46 @@ entry:
ret i32 %spec.store.select7
}
+; i32 non saturate
+
+define i32 @ustest_f16i32_nsat(half %x) {
+; SOFT-LABEL: ustest_f16i32_nsat:
+; SOFT: @ %bb.0:
+; SOFT-NEXT: .save {r7, lr}
+; SOFT-NEXT: push {r7, lr}
+; SOFT-NEXT: uxth r0, r0
+; SOFT-NEXT: bl __aeabi_h2f
+; SOFT-NEXT: bl __aeabi_f2iz
+; SOFT-NEXT: asrs r1, r0, #31
+; SOFT-NEXT: ands r0, r1
+; SOFT-NEXT: asrs r1, r0, #31
+; SOFT-NEXT: bics r0, r1
+; SOFT-NEXT: pop {r7, pc}
+;
+; VFP2-LABEL: ustest_f16i32_nsat:
+; VFP2: @ %bb.0:
+; VFP2-NEXT: .save {r7, lr}
+; VFP2-NEXT: push {r7, lr}
+; VFP2-NEXT: vmov r0, s0
+; VFP2-NEXT: bl __aeabi_h2f
+; VFP2-NEXT: vmov s0, r0
+; VFP2-NEXT: vcvt.s32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
+; VFP2-NEXT: usat r0, #0, r0
+; VFP2-NEXT: pop {r7, pc}
+;
+; FULL-LABEL: ustest_f16i32_nsat:
+; FULL: @ %bb.0:
+; FULL-NEXT: vcvt.s32.f16 s0, s0
+; FULL-NEXT: vmov r0, s0
+; FULL-NEXT: usat r0, #0, r0
+; FULL-NEXT: bx lr
+ %conv = fptosi half %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+ ret i32 %spec.store.select7
+}
+
declare i32 @llvm.smin.i32(i32, i32)
diff --git a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
index 96f009a..ba31b35 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
@@ -748,8 +748,8 @@ entry:
ret <4 x i32> %conv6
}
-define <4 x i32> @utesth_f16i32(<4 x half> %x) {
-; CHECK-NEON-LABEL: utesth_f16i32:
+define <4 x i32> @utest_f16i32(<4 x half> %x) {
+; CHECK-NEON-LABEL: utest_f16i32:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
@@ -821,7 +821,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-NEON-NEXT: vpop {d12, d13}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
;
-; CHECK-FP16-LABEL: utesth_f16i32:
+; CHECK-FP16-LABEL: utest_f16i32:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
@@ -1366,8 +1366,8 @@ entry:
ret <8 x i16> %conv6
}
-define <8 x i16> @utesth_f16i16(<8 x half> %x) {
-; CHECK-NEON-LABEL: utesth_f16i16:
+define <8 x i16> @utest_f16i16(<8 x half> %x) {
+; CHECK-NEON-LABEL: utest_f16i16:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
@@ -1441,7 +1441,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
;
-; CHECK-FP16-LABEL: utesth_f16i16:
+; CHECK-FP16-LABEL: utest_f16i16:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: vmovx.f16 s4, s0
; CHECK-FP16-NEXT: vcvt.u32.f16 s12, s0
@@ -2109,8 +2109,8 @@ entry:
ret <2 x i64> %conv6
}
-define <2 x i64> @utesth_f16i64(<2 x half> %x) {
-; CHECK-NEON-LABEL: utesth_f16i64:
+define <2 x i64> @utest_f16i64(<2 x half> %x) {
+; CHECK-NEON-LABEL: utest_f16i64:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, lr}
@@ -2148,7 +2148,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
; CHECK-NEON-NEXT: vpop {d8}
; CHECK-NEON-NEXT: pop {r4, r5, r6, pc}
;
-; CHECK-FP16-LABEL: utesth_f16i64:
+; CHECK-FP16-LABEL: utest_f16i64:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r4, r5, r6, lr}
; CHECK-FP16-NEXT: push {r4, r5, r6, lr}
@@ -2835,8 +2835,8 @@ entry:
ret <4 x i32> %conv6
}
-define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
-; CHECK-NEON-LABEL: utesth_f16i32_mm:
+define <4 x i32> @utest_f16i32_mm(<4 x half> %x) {
+; CHECK-NEON-LABEL: utest_f16i32_mm:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
@@ -2881,7 +2881,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
;
-; CHECK-FP16-LABEL: utesth_f16i32_mm:
+; CHECK-FP16-LABEL: utest_f16i32_mm:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r4, r5, r6, lr}
; CHECK-FP16-NEXT: push {r4, r5, r6, lr}
@@ -3344,8 +3344,8 @@ entry:
ret <8 x i16> %conv6
}
-define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
-; CHECK-NEON-LABEL: utesth_f16i16_mm:
+define <8 x i16> @utest_f16i16_mm(<8 x half> %x) {
+; CHECK-NEON-LABEL: utest_f16i16_mm:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
@@ -3419,7 +3419,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
;
-; CHECK-FP16-LABEL: utesth_f16i16_mm:
+; CHECK-FP16-LABEL: utest_f16i16_mm:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: vmovx.f16 s4, s0
; CHECK-FP16-NEXT: vcvt.u32.f16 s12, s0
@@ -4044,8 +4044,8 @@ entry:
ret <2 x i64> %conv6
}
-define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
-; CHECK-NEON-LABEL: utesth_f16i64_mm:
+define <2 x i64> @utest_f16i64_mm(<2 x half> %x) {
+; CHECK-NEON-LABEL: utest_f16i64_mm:
; CHECK-NEON: @ %bb.0: @ %entry
; CHECK-NEON-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEON-NEXT: push {r4, r5, r6, lr}
@@ -4083,7 +4083,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
; CHECK-NEON-NEXT: vpop {d8}
; CHECK-NEON-NEXT: pop {r4, r5, r6, pc}
;
-; CHECK-FP16-LABEL: utesth_f16i64_mm:
+; CHECK-FP16-LABEL: utest_f16i64_mm:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r4, r5, r6, lr}
; CHECK-FP16-NEXT: push {r4, r5, r6, lr}
@@ -4215,6 +4215,77 @@ entry:
ret <2 x i64> %conv6
}
+; i32 non saturate
+
+define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) {
+; CHECK-NEON-LABEL: ustest_f16i32_nsat:
+; CHECK-NEON: @ %bb.0: @ %entry
+; CHECK-NEON-NEXT: .save {r4, lr}
+; CHECK-NEON-NEXT: push {r4, lr}
+; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.f32 s16, s3
+; CHECK-NEON-NEXT: vmov.f32 s18, s2
+; CHECK-NEON-NEXT: vmov.f32 s20, s1
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r4, r0
+; CHECK-NEON-NEXT: vmov r0, s16
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s16, r0
+; CHECK-NEON-NEXT: vmov r0, s18
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vmov r1, s20
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov s18, r4
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.32 d11[0], r0
+; CHECK-NEON-NEXT: mov r0, r1
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vcvt.s32.f32 s2, s18
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vcvt.s32.f32 s4, s16
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov.i32 q8, #0x0
+; CHECK-NEON-NEXT: vmov r0, s2
+; CHECK-NEON-NEXT: vmov.32 d10[0], r0
+; CHECK-NEON-NEXT: vmov r0, s4
+; CHECK-NEON-NEXT: vmov.32 d11[1], r0
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.32 d10[1], r0
+; CHECK-NEON-NEXT: vmin.s32 q9, q5, q8
+; CHECK-NEON-NEXT: vmax.s32 q0, q9, q8
+; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEON-NEXT: pop {r4, pc}
+;
+; CHECK-FP16-LABEL: ustest_f16i32_nsat:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: vmovx.f16 s2, s0
+; CHECK-FP16-NEXT: vcvt.s32.f16 s6, s0
+; CHECK-FP16-NEXT: vcvt.s32.f16 s0, s1
+; CHECK-FP16-NEXT: vmovx.f16 s4, s1
+; CHECK-FP16-NEXT: vmov r0, s0
+; CHECK-FP16-NEXT: vcvt.s32.f16 s4, s4
+; CHECK-FP16-NEXT: vcvt.s32.f16 s2, s2
+; CHECK-FP16-NEXT: vmov.i32 q9, #0x0
+; CHECK-FP16-NEXT: vmov.32 d17[0], r0
+; CHECK-FP16-NEXT: vmov r0, s6
+; CHECK-FP16-NEXT: vmov.32 d16[0], r0
+; CHECK-FP16-NEXT: vmov r0, s4
+; CHECK-FP16-NEXT: vmov.32 d17[1], r0
+; CHECK-FP16-NEXT: vmov r0, s2
+; CHECK-FP16-NEXT: vmov.32 d16[1], r0
+; CHECK-FP16-NEXT: vmin.s32 q8, q8, q9
+; CHECK-FP16-NEXT: vmax.s32 q0, q8, q9
+; CHECK-FP16-NEXT: bx lr
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i32>
+ %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv)
+ %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer)
+ ret <4 x i32> %spec.store.select7
+}
+
declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)
diff --git a/llvm/test/CodeGen/Hexagon/inst_setcc_uno_uo.ll b/llvm/test/CodeGen/Hexagon/inst_setcc_uno_uo.ll
new file mode 100644
index 0000000..8b121c5
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/inst_setcc_uno_uo.ll
@@ -0,0 +1,93 @@
+;; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b %s -o - | FileCheck %s
+
+define dso_local void @store_isnan_f32(ptr %a, ptr %b, ptr %isnan_cmp) local_unnamed_addr {
+entry:
+ %arrayidx_a = getelementptr inbounds nuw float, ptr %a, i32 0
+ %arrayidx_b = getelementptr inbounds nuw float, ptr %b, i32 0
+ %0 = load <32 x float>, ptr %arrayidx_a, align 4
+ %1 = load <32 x float>, ptr %arrayidx_b, align 4
+ %.vectorized = fcmp uno <32 x float> %0, %1
+ %.LS.instance = zext <32 x i1> %.vectorized to <32 x i32>
+ %arrayidx1 = getelementptr inbounds nuw i32, ptr %isnan_cmp, i32 0
+ store <32 x i32> %.LS.instance, ptr %arrayidx1, align 4
+ ret void
+}
+
+; CHECK: store_isnan_f32
+; CHECK: [[RONE32:r[0-9]+]] = #1
+; CHECK: [[VOP2_F32:v[0-9]+]] = vxor([[VOP2_F32]],[[VOP2_F32]])
+; CHECK: [[VOP1_F32:v[0-9]+]] = vmemu(r0+#0)
+; CHECK: [[VONES32:v[0-9]+]] = vsplat([[RONE32]])
+; CHECK: [[Q1_F32:q[0-9]+]] = vcmp.eq([[VOP1_F32]].w,[[VOP1_F32]].w)
+; CHECK: [[VOP3_F32:v[0-9]+]] = vmemu(r1+#0)
+; CHECK: [[Q1_F32]] &= vcmp.eq([[VOP3_F32]].w,[[VOP3_F32]].w)
+; CHECK: [[VOUT_F32:v[0-9]+]] = vmux([[Q1_F32]],[[VOP2_F32]],[[VONES32]])
+; CHECK: vmemu(r2+#0) = [[VOUT_F32]]
+
+define dso_local void @store_isnan_f16(ptr %a, ptr %b, ptr %isnan_cmp) local_unnamed_addr {
+entry:
+ %arrayidx_a = getelementptr inbounds nuw half, ptr %a, i32 0
+ %arrayidx_b = getelementptr inbounds nuw half, ptr %b, i32 0
+ %0 = load <64 x half>, ptr %arrayidx_a, align 2
+ %1 = load <64 x half>, ptr %arrayidx_b, align 2
+ %.vectorized = fcmp uno <64 x half> %0, %1
+ %conv.LS.instance = zext <64 x i1> %.vectorized to <64 x i16>
+ %arrayidx1 = getelementptr inbounds nuw i16, ptr %isnan_cmp, i32 0
+ store <64 x i16> %conv.LS.instance, ptr %arrayidx1, align 2
+ ret void
+}
+; CHECK-LABEL: store_isnan_f16
+; CHECK: [[RONE16:r[0-9]+]] = #1
+; CHECK: [[VOP2_F16:v[0-9]+]] = vxor([[VOP2_F16]],[[VOP2_F16]])
+; CHECK: [[VOP1_F16:v[0-9]+]] = vmemu(r0+#0)
+; CHECK: [[VONES16:v[0-9]+]].h = vsplat([[RONE16]])
+; CHECK: [[Q1_F16:q[0-9]+]] = vcmp.eq([[VOP1_F16]].h,[[VOP1_F16]].h)
+; CHECK: [[VOP3_F16:v[0-9]+]] = vmemu(r1+#0)
+; CHECK: [[Q1_F16]] &= vcmp.eq([[VOP3_F16]].h,[[VOP3_F16]].h)
+; CHECK: [[VOUT_F16:v[0-9]+]] = vmux([[Q1_F16]],[[VOP2_F16]],[[VONES16]])
+; CHECK: vmemu(r2+#0) = [[VOUT_F32]]
+
+define dso_local void @store_isordered_f32(ptr %a, ptr %b, ptr %isordered_cmp) local_unnamed_addr {
+entry:
+ %arrayidx_a = getelementptr inbounds nuw float, ptr %a, i32 0
+ %arrayidx_b = getelementptr inbounds nuw float, ptr %b, i32 0
+ %0 = load <32 x float>, ptr %arrayidx_a, align 4
+ %1 = load <32 x float>, ptr %arrayidx_b, align 4
+ %.vectorized = fcmp ord <32 x float> %0, %1
+ %.LS.instance = zext <32 x i1> %.vectorized to <32 x i32>
+ %arrayidx1 = getelementptr inbounds nuw i32, ptr %isordered_cmp, i32 0
+ store <32 x i32> %.LS.instance, ptr %arrayidx1, align 4
+ ret void
+}
+; CHECK-LABEL: store_isordered_f32
+; CHECK: [[VOP2_ORD_F32:v[0-9]+]] = vxor([[VOP2_ORD_F32]],[[VOP2_ORD_F32]])
+; CHECK: [[VOP1_ORD_F32:v[0-9]+]] = vmemu(r0+#0)
+; CHECK: [[VONES_ORD_F32:v[0-9]+]] = vsplat([[RONE32]])
+; CHECK: [[Q1_ORD_F32:q[0-9]+]] = vcmp.eq([[VOP1_ORD_F32]].w,[[VOP1_ORD_F32]].w)
+; CHECK: [[VOP3_ORD_F32:v[0-9]+]] = vmemu(r1+#0)
+; CHECK: [[Q1_ORD_F32]] &= vcmp.eq([[VOP3_ORD_F32]].w,[[VOP3_ORD_F32]].w)
+; CHECK: [[VOUT_ORD_F32:v[0-9]+]] = vmux([[Q1_ORD_F32]],[[VONES_ORD_F32]],[[VOP2_ORD_F32]])
+; CHECK: vmemu(r2+#0) = [[VOUT_ORD_F32]]
+
+
+define dso_local void @store_isordered_f16(ptr %a, ptr %b, ptr %isordered_cmp) local_unnamed_addr {
+entry:
+ %arrayidx_a = getelementptr inbounds nuw half, ptr %a, i32 0
+ %arrayidx_b = getelementptr inbounds nuw half, ptr %b, i32 0
+ %0 = load <64 x half>, ptr %arrayidx_a, align 2
+ %1 = load <64 x half>, ptr %arrayidx_b, align 2
+ %.vectorized = fcmp ord <64 x half> %0, %1
+ %conv.LS.instance = zext <64 x i1> %.vectorized to <64 x i16>
+ %arrayidx1 = getelementptr inbounds nuw i16, ptr %isordered_cmp, i32 0
+ store <64 x i16> %conv.LS.instance, ptr %arrayidx1, align 2
+ ret void
+}
+; CHECK-LABEL: store_isordered_f16
+; CHECK: [[VOP2_ORD_F16:v[0-9]+]] = vxor([[VOP2_ORD_F16]],[[VOP2_ORD_F16]])
+; CHECK: [[VOP1_ORD_F16:v[0-9]+]] = vmemu(r0+#0)
+; CHECK: [[VONES_ORD_F16:v[0-9]+]].h = vsplat([[RONE16]])
+; CHECK: [[Q1_ORD_F16:q[0-9]+]] = vcmp.eq([[VOP1_ORD_F16]].h,[[VOP1_ORD_F16]].h)
+; CHECK: [[VOP3_ORD_F16:v[0-9]+]] = vmemu(r1+#0)
+; CHECK: [[Q1_ORD_F16]] &= vcmp.eq([[VOP3_ORD_F16]].h,[[VOP3_ORD_F16]].h)
+; CHECK: [[VOUT_ORD_F16:v[0-9]+]] = vmux([[Q1_ORD_F16]],[[VONES_ORD_F16]],[[VOP2_ORD_F16]])
+; CHECK: vmemu(r2+#0) = [[VOUT_ORD_F16]]
diff --git a/llvm/test/CodeGen/Hexagon/isel-fold-shl-zext.ll b/llvm/test/CodeGen/Hexagon/isel-fold-shl-zext.ll
index 5fa5023..fe0f7dd 100644
--- a/llvm/test/CodeGen/Hexagon/isel-fold-shl-zext.ll
+++ b/llvm/test/CodeGen/Hexagon/isel-fold-shl-zext.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple=hexagon-unknown-elf < %s | FileCheck %s
; In ISelLowering, when folding nodes (or (shl xx, s), (zext y))
@@ -11,17 +12,18 @@ target triple = "hexagon"
; Function Attrs: nofree nosync nounwind memory(readwrite, inaccessiblemem: none)
define dso_local void @foo(i64* nocapture noundef %buf, i32 %a, i32 %b) local_unnamed_addr {
; CHECK-LABEL: foo:
-; CHECK: // %bb.0: // %entry
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: {
-; CHECK-NEXT: r[[REG0:[0-9]+]] = addasl(r2,r1,#1)
-; CHECK-NEXT: r[[REG2:[0-9]+]] = asl(r1,#1)
+; CHECK-NEXT: r2 = addasl(r2,r1,#1)
+; CHECK-NEXT: r3 = asl(r1,#1)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: r[[REG1:[0-9]+]] = addasl(r[[REG0]],r1,#1)
+; CHECK-NEXT: r2 = addasl(r2,r1,#1)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: jumpr r31
-; CHECK-NEXT: memd(r0+#8) = r[[REG2]]:[[REG1]]
+; CHECK-NEXT: memd(r0+#8) = r3:2
; CHECK-NEXT: }
entry:
%arrayidx = getelementptr inbounds i64, i64* %buf, i32 1
diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
index 18d071c..a0d1ecc 100644
--- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll
+++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
@@ -436,8 +436,8 @@ entry:
ret i32 %conv6
}
-define i32 @utesth_f16i32(half %x) {
-; RV32-LABEL: utesth_f16i32:
+define i32 @utest_f16i32(half %x) {
+; RV32-LABEL: utest_f16i32:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
@@ -456,7 +456,7 @@ define i32 @utesth_f16i32(half %x) {
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
;
-; RV64-LABEL: utesth_f16i32:
+; RV64-LABEL: utest_f16i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
@@ -974,8 +974,8 @@ entry:
ret i16 %conv6
}
-define i16 @utesth_f16i16(half %x) {
-; RV32-LABEL: utesth_f16i16:
+define i16 @utest_f16i16(half %x) {
+; RV32-LABEL: utest_f16i16:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
@@ -995,7 +995,7 @@ define i16 @utesth_f16i16(half %x) {
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
;
-; RV64-LABEL: utesth_f16i16:
+; RV64-LABEL: utest_f16i16:
; RV64: # %bb.0: # %entry
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
@@ -3829,6 +3829,52 @@ entry:
ret i64 %conv6
}
+; i32 non saturate
+
+define i32 @ustest_f16i32_nsat(half %x) {
+; RV32-LABEL: ustest_f16i32_nsat:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __extendhfsf2
+; RV32-NEXT: fcvt.w.s a0, fa0, rtz
+; RV32-NEXT: srai a1, a0, 31
+; RV32-NEXT: and a0, a1, a0
+; RV32-NEXT: sgtz a1, a0
+; RV32-NEXT: neg a1, a1
+; RV32-NEXT: and a0, a1, a0
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: .cfi_restore ra
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: ustest_f16i32_nsat:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __extendhfsf2
+; RV64-NEXT: fcvt.l.s a0, fa0, rtz
+; RV64-NEXT: srai a1, a0, 63
+; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: sgtz a1, a0
+; RV64-NEXT: neg a1, a1
+; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: .cfi_restore ra
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: .cfi_def_cfa_offset 0
+; RV64-NEXT: ret
+ %conv = fptosi half %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+ ret i32 %spec.store.select7
+}
+
declare i32 @llvm.smin.i32(i32, i32)
declare i32 @llvm.smax.i32(i32, i32)
declare i32 @llvm.umin.i32(i32, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
index aba9d37..f5977625 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
@@ -519,8 +519,8 @@ entry:
ret <4 x i32> %conv6
}
-define <4 x i32> @utesth_f16i32(<4 x half> %x) {
-; CHECK-NOV-LABEL: utesth_f16i32:
+define <4 x i32> @utest_f16i32(<4 x half> %x) {
+; CHECK-NOV-LABEL: utest_f16i32:
; CHECK-NOV: # %bb.0: # %entry
; CHECK-NOV-NEXT: addi sp, sp, -64
; CHECK-NOV-NEXT: .cfi_def_cfa_offset 64
@@ -610,7 +610,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: bgeu a3, a1, .LBB7_4
; CHECK-NOV-NEXT: j .LBB7_5
;
-; CHECK-V-LABEL: utesth_f16i32:
+; CHECK-V-LABEL: utest_f16i32:
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: addi sp, sp, -48
; CHECK-V-NEXT: .cfi_def_cfa_offset 48
@@ -1594,8 +1594,8 @@ entry:
ret <8 x i16> %conv6
}
-define <8 x i16> @utesth_f16i16(<8 x half> %x) {
-; CHECK-NOV-LABEL: utesth_f16i16:
+define <8 x i16> @utest_f16i16(<8 x half> %x) {
+; CHECK-NOV-LABEL: utest_f16i16:
; CHECK-NOV: # %bb.0: # %entry
; CHECK-NOV-NEXT: addi sp, sp, -128
; CHECK-NOV-NEXT: .cfi_def_cfa_offset 128
@@ -1765,7 +1765,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: bgeu a7, a3, .LBB16_8
; CHECK-NOV-NEXT: j .LBB16_9
;
-; CHECK-V-LABEL: utesth_f16i16:
+; CHECK-V-LABEL: utest_f16i16:
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: addi sp, sp, -80
; CHECK-V-NEXT: .cfi_def_cfa_offset 80
@@ -3332,8 +3332,8 @@ entry:
ret <2 x i64> %conv6
}
-define <2 x i64> @utesth_f16i64(<2 x half> %x) {
-; CHECK-NOV-LABEL: utesth_f16i64:
+define <2 x i64> @utest_f16i64(<2 x half> %x) {
+; CHECK-NOV-LABEL: utest_f16i64:
; CHECK-NOV: # %bb.0: # %entry
; CHECK-NOV-NEXT: addi sp, sp, -32
; CHECK-NOV-NEXT: .cfi_def_cfa_offset 32
@@ -3373,7 +3373,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
; CHECK-NOV-NEXT: .cfi_def_cfa_offset 0
; CHECK-NOV-NEXT: ret
;
-; CHECK-V-LABEL: utesth_f16i64:
+; CHECK-V-LABEL: utest_f16i64:
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: addi sp, sp, -32
; CHECK-V-NEXT: .cfi_def_cfa_offset 32
@@ -4074,8 +4074,8 @@ entry:
ret <4 x i32> %conv6
}
-define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
-; CHECK-NOV-LABEL: utesth_f16i32_mm:
+define <4 x i32> @utest_f16i32_mm(<4 x half> %x) {
+; CHECK-NOV-LABEL: utest_f16i32_mm:
; CHECK-NOV: # %bb.0: # %entry
; CHECK-NOV-NEXT: addi sp, sp, -64
; CHECK-NOV-NEXT: .cfi_def_cfa_offset 64
@@ -4165,7 +4165,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: bgeu a3, a1, .LBB34_4
; CHECK-NOV-NEXT: j .LBB34_5
;
-; CHECK-V-LABEL: utesth_f16i32_mm:
+; CHECK-V-LABEL: utest_f16i32_mm:
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: addi sp, sp, -48
; CHECK-V-NEXT: .cfi_def_cfa_offset 48
@@ -5134,8 +5134,8 @@ entry:
ret <8 x i16> %conv6
}
-define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
-; CHECK-NOV-LABEL: utesth_f16i16_mm:
+define <8 x i16> @utest_f16i16_mm(<8 x half> %x) {
+; CHECK-NOV-LABEL: utest_f16i16_mm:
; CHECK-NOV: # %bb.0: # %entry
; CHECK-NOV-NEXT: addi sp, sp, -128
; CHECK-NOV-NEXT: .cfi_def_cfa_offset 128
@@ -5305,7 +5305,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: bgeu a7, a3, .LBB43_8
; CHECK-NOV-NEXT: j .LBB43_9
;
-; CHECK-V-LABEL: utesth_f16i16_mm:
+; CHECK-V-LABEL: utest_f16i16_mm:
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: addi sp, sp, -80
; CHECK-V-NEXT: .cfi_def_cfa_offset 80
@@ -6837,8 +6837,8 @@ entry:
ret <2 x i64> %conv6
}
-define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
-; CHECK-NOV-LABEL: utesth_f16i64_mm:
+define <2 x i64> @utest_f16i64_mm(<2 x half> %x) {
+; CHECK-NOV-LABEL: utest_f16i64_mm:
; CHECK-NOV: # %bb.0: # %entry
; CHECK-NOV-NEXT: addi sp, sp, -32
; CHECK-NOV-NEXT: .cfi_def_cfa_offset 32
@@ -6877,7 +6877,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
; CHECK-NOV-NEXT: .cfi_def_cfa_offset 0
; CHECK-NOV-NEXT: ret
;
-; CHECK-V-LABEL: utesth_f16i64_mm:
+; CHECK-V-LABEL: utest_f16i64_mm:
; CHECK-V: # %bb.0: # %entry
; CHECK-V-NEXT: addi sp, sp, -32
; CHECK-V-NEXT: .cfi_def_cfa_offset 32
@@ -7048,6 +7048,172 @@ entry:
ret <2 x i64> %conv6
}
+; i32 non saturate
+
+define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) {
+; CHECK-NOV-LABEL: ustest_f16i32_nsat:
+; CHECK-NOV: # %bb.0: # %entry
+; CHECK-NOV-NEXT: addi sp, sp, -64
+; CHECK-NOV-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NOV-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; CHECK-NOV-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; CHECK-NOV-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
+; CHECK-NOV-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
+; CHECK-NOV-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
+; CHECK-NOV-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill
+; CHECK-NOV-NEXT: fsd fs1, 8(sp) # 8-byte Folded Spill
+; CHECK-NOV-NEXT: .cfi_offset ra, -8
+; CHECK-NOV-NEXT: .cfi_offset s0, -16
+; CHECK-NOV-NEXT: .cfi_offset s1, -24
+; CHECK-NOV-NEXT: .cfi_offset s2, -32
+; CHECK-NOV-NEXT: .cfi_offset s3, -40
+; CHECK-NOV-NEXT: .cfi_offset fs0, -48
+; CHECK-NOV-NEXT: .cfi_offset fs1, -56
+; CHECK-NOV-NEXT: lhu s1, 0(a1)
+; CHECK-NOV-NEXT: lhu s2, 8(a1)
+; CHECK-NOV-NEXT: lhu a2, 16(a1)
+; CHECK-NOV-NEXT: lhu s3, 24(a1)
+; CHECK-NOV-NEXT: mv s0, a0
+; CHECK-NOV-NEXT: fmv.w.x fa0, a2
+; CHECK-NOV-NEXT: call __extendhfsf2
+; CHECK-NOV-NEXT: fmv.s fs0, fa0
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
+; CHECK-NOV-NEXT: call __extendhfsf2
+; CHECK-NOV-NEXT: fmv.s fs1, fa0
+; CHECK-NOV-NEXT: fmv.w.x fa0, s1
+; CHECK-NOV-NEXT: call __extendhfsf2
+; CHECK-NOV-NEXT: fcvt.l.s s1, fa0, rtz
+; CHECK-NOV-NEXT: fcvt.l.s s2, fs1, rtz
+; CHECK-NOV-NEXT: fmv.w.x fa0, s3
+; CHECK-NOV-NEXT: fcvt.l.s s3, fs0, rtz
+; CHECK-NOV-NEXT: call __extendhfsf2
+; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz
+; CHECK-NOV-NEXT: srai a1, s3, 63
+; CHECK-NOV-NEXT: and a1, a1, s3
+; CHECK-NOV-NEXT: srai a2, s2, 63
+; CHECK-NOV-NEXT: and a2, a2, s2
+; CHECK-NOV-NEXT: srai a3, s1, 63
+; CHECK-NOV-NEXT: and a3, a3, s1
+; CHECK-NOV-NEXT: srai a4, a0, 63
+; CHECK-NOV-NEXT: and a0, a4, a0
+; CHECK-NOV-NEXT: sgtz a4, a3
+; CHECK-NOV-NEXT: neg a4, a4
+; CHECK-NOV-NEXT: and a3, a4, a3
+; CHECK-NOV-NEXT: sgtz a4, a2
+; CHECK-NOV-NEXT: neg a4, a4
+; CHECK-NOV-NEXT: and a2, a4, a2
+; CHECK-NOV-NEXT: sgtz a4, a1
+; CHECK-NOV-NEXT: neg a4, a4
+; CHECK-NOV-NEXT: and a1, a4, a1
+; CHECK-NOV-NEXT: sgtz a4, a0
+; CHECK-NOV-NEXT: neg a4, a4
+; CHECK-NOV-NEXT: and a0, a4, a0
+; CHECK-NOV-NEXT: sw a3, 0(s0)
+; CHECK-NOV-NEXT: sw a2, 4(s0)
+; CHECK-NOV-NEXT: sw a1, 8(s0)
+; CHECK-NOV-NEXT: sw a0, 12(s0)
+; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; CHECK-NOV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; CHECK-NOV-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; CHECK-NOV-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
+; CHECK-NOV-NEXT: ld s3, 24(sp) # 8-byte Folded Reload
+; CHECK-NOV-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload
+; CHECK-NOV-NEXT: fld fs1, 8(sp) # 8-byte Folded Reload
+; CHECK-NOV-NEXT: .cfi_restore ra
+; CHECK-NOV-NEXT: .cfi_restore s0
+; CHECK-NOV-NEXT: .cfi_restore s1
+; CHECK-NOV-NEXT: .cfi_restore s2
+; CHECK-NOV-NEXT: .cfi_restore s3
+; CHECK-NOV-NEXT: .cfi_restore fs0
+; CHECK-NOV-NEXT: .cfi_restore fs1
+; CHECK-NOV-NEXT: addi sp, sp, 64
+; CHECK-NOV-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NOV-NEXT: ret
+;
+; CHECK-V-LABEL: ustest_f16i32_nsat:
+; CHECK-V: # %bb.0: # %entry
+; CHECK-V-NEXT: addi sp, sp, -48
+; CHECK-V-NEXT: .cfi_def_cfa_offset 48
+; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT: .cfi_offset ra, -8
+; CHECK-V-NEXT: .cfi_offset s0, -16
+; CHECK-V-NEXT: .cfi_offset s1, -24
+; CHECK-V-NEXT: .cfi_offset s2, -32
+; CHECK-V-NEXT: csrr a1, vlenb
+; CHECK-V-NEXT: slli a1, a1, 1
+; CHECK-V-NEXT: sub sp, sp, a1
+; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb
+; CHECK-V-NEXT: lhu s0, 0(a0)
+; CHECK-V-NEXT: lhu s1, 8(a0)
+; CHECK-V-NEXT: lhu s2, 16(a0)
+; CHECK-V-NEXT: lhu a0, 24(a0)
+; CHECK-V-NEXT: fmv.w.x fa0, a0
+; CHECK-V-NEXT: call __extendhfsf2
+; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
+; CHECK-V-NEXT: fmv.w.x fa0, s2
+; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
+; CHECK-V-NEXT: call __extendhfsf2
+; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
+; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; CHECK-V-NEXT: vslideup.vi v8, v9, 1
+; CHECK-V-NEXT: csrr a0, vlenb
+; CHECK-V-NEXT: add a0, sp, a0
+; CHECK-V-NEXT: addi a0, a0, 16
+; CHECK-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
+; CHECK-V-NEXT: fmv.w.x fa0, s1
+; CHECK-V-NEXT: call __extendhfsf2
+; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
+; CHECK-V-NEXT: fmv.w.x fa0, s0
+; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
+; CHECK-V-NEXT: call __extendhfsf2
+; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
+; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-V-NEXT: vmv.s.x v8, a0
+; CHECK-V-NEXT: addi a0, sp, 16
+; CHECK-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; CHECK-V-NEXT: vslideup.vi v8, v9, 1
+; CHECK-V-NEXT: csrr a0, vlenb
+; CHECK-V-NEXT: add a0, sp, a0
+; CHECK-V-NEXT: addi a0, a0, 16
+; CHECK-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-V-NEXT: vslideup.vi v8, v9, 2
+; CHECK-V-NEXT: vmin.vx v8, v8, zero
+; CHECK-V-NEXT: vmax.vx v8, v8, zero
+; CHECK-V-NEXT: csrr a0, vlenb
+; CHECK-V-NEXT: slli a0, a0, 1
+; CHECK-V-NEXT: add sp, sp, a0
+; CHECK-V-NEXT: .cfi_def_cfa sp, 48
+; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT: .cfi_restore ra
+; CHECK-V-NEXT: .cfi_restore s0
+; CHECK-V-NEXT: .cfi_restore s1
+; CHECK-V-NEXT: .cfi_restore s2
+; CHECK-V-NEXT: addi sp, sp, 48
+; CHECK-V-NEXT: .cfi_def_cfa_offset 0
+; CHECK-V-NEXT: ret
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i32>
+ %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv)
+ %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer)
+ ret <4 x i32> %spec.store.select7
+}
+
declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)
diff --git a/llvm/test/CodeGen/SPIRV/pointers/ptrcast-bitcast.ll b/llvm/test/CodeGen/SPIRV/pointers/ptrcast-bitcast.ll
new file mode 100644
index 0000000..8491328
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/pointers/ptrcast-bitcast.ll
@@ -0,0 +1,28 @@
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - | FileCheck %s --match-full-lines
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-DAG: %[[#uint:]] = OpTypeInt 32 0
+; CHECK-DAG: %[[#v2_uint:]] = OpTypeVector %[[#uint]] 2
+; CHECK-DAG: %[[#double:]] = OpTypeFloat 64
+; CHECK-DAG: %[[#v2_double:]] = OpTypeVector %[[#double]] 2
+; CHECK-DAG: %[[#v4_uint:]] = OpTypeVector %[[#uint]] 4
+@.str = private unnamed_addr constant [3 x i8] c"In\00", align 1
+@.str.2 = private unnamed_addr constant [4 x i8] c"Out\00", align 1
+
+define void @main() local_unnamed_addr #0 {
+entry:
+ %0 = tail call target("spirv.VulkanBuffer", [0 x <2 x i32>], 12, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0v2i32_12_0t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str)
+ %1 = tail call target("spirv.VulkanBuffer", [0 x <2 x double>], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0v2f64_12_1t(i32 0, i32 2, i32 1, i32 0, ptr nonnull @.str.2)
+ %2 = tail call noundef align 8 dereferenceable(8) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v2i32_12_0t(target("spirv.VulkanBuffer", [0 x <2 x i32>], 12, 0) %0, i32 0)
+ %3 = load <2 x i32>, ptr addrspace(11) %2, align 8
+ %4 = tail call noundef align 8 dereferenceable(8) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v2i32_12_0t(target("spirv.VulkanBuffer", [0 x <2 x i32>], 12, 0) %0, i32 1)
+ %5 = load <2 x i32>, ptr addrspace(11) %4, align 8
+; CHECK: %[[#tmp:]] = OpVectorShuffle %[[#v4_uint]] {{%[0-9]+}} {{%[0-9]+}} 0 2 1 3
+ %6 = shufflevector <2 x i32> %3, <2 x i32> %5, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; CHECK: %[[#access:]] = OpAccessChain {{.*}}
+ %7 = tail call noundef align 16 dereferenceable(16) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0v2f64_12_1t(target("spirv.VulkanBuffer", [0 x <2 x double>], 12, 1) %1, i32 0)
+; CHECK: %[[#bitcast:]] = OpBitcast %[[#v2_double]] %[[#tmp]]
+; CHECK: OpStore %[[#access]] %[[#bitcast]] Aligned 16
+ store <4 x i32> %6, ptr addrspace(11) %7, align 16
+ ret void
+}
diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
index 137994ce..59f3edc 100644
--- a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
@@ -136,9 +136,9 @@ entry:
ret i32 %conv6
}
-define i32 @utesth_f16i32(half %x) {
-; CHECK-LABEL: utesth_f16i32:
-; CHECK: .functype utesth_f16i32 (f32) -> (i32)
+define i32 @utest_f16i32(half %x) {
+; CHECK-LABEL: utest_f16i32:
+; CHECK: .functype utest_f16i32 (f32) -> (i32)
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
; CHECK-NEXT: call __truncsfhf2
@@ -153,9 +153,9 @@ entry:
ret i32 %conv6
}
-define i32 @utesth_f16i32_cse(half %x) {
-; CHECK-LABEL: utesth_f16i32_cse:
-; CHECK: .functype utesth_f16i32_cse (f32) -> (i32)
+define i32 @utest_f16i32_cse(half %x) {
+; CHECK-LABEL: utest_f16i32_cse:
+; CHECK: .functype utest_f16i32_cse (f32) -> (i32)
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
; CHECK-NEXT: call __truncsfhf2
@@ -403,9 +403,9 @@ entry:
ret i16 %conv6
}
-define i16 @utesth_f16i16(half %x) {
-; CHECK-LABEL: utesth_f16i16:
-; CHECK: .functype utesth_f16i16 (f32) -> (i32)
+define i16 @utest_f16i16(half %x) {
+; CHECK-LABEL: utest_f16i16:
+; CHECK: .functype utest_f16i16 (f32) -> (i32)
; CHECK-NEXT: .local i32
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
@@ -427,9 +427,9 @@ entry:
ret i16 %conv6
}
-define i16 @utesth_f16i16_cse(half %x) {
-; CHECK-LABEL: utesth_f16i16_cse:
-; CHECK: .functype utesth_f16i16_cse (f32) -> (i32)
+define i16 @utest_f16i16_cse(half %x) {
+; CHECK-LABEL: utest_f16i16_cse:
+; CHECK: .functype utest_f16i16_cse (f32) -> (i32)
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
; CHECK-NEXT: call __truncsfhf2
@@ -880,9 +880,9 @@ entry:
ret i64 %conv6
}
-define i64 @utesth_f16i64(half %x) {
-; CHECK-LABEL: utesth_f16i64:
-; CHECK: .functype utesth_f16i64 (f32) -> (i64)
+define i64 @utest_f16i64(half %x) {
+; CHECK-LABEL: utest_f16i64:
+; CHECK: .functype utest_f16i64 (f32) -> (i64)
; CHECK-NEXT: .local i32, i64, i64
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: global.get __stack_pointer
@@ -919,9 +919,9 @@ entry:
ret i64 %conv6
}
-define i64 @utesth_f16i64_cse(half %x) {
-; CHECK-LABEL: utesth_f16i64_cse:
-; CHECK: .functype utesth_f16i64_cse (f32) -> (i64)
+define i64 @utest_f16i64_cse(half %x) {
+; CHECK-LABEL: utest_f16i64_cse:
+; CHECK: .functype utest_f16i64_cse (f32) -> (i64)
; CHECK-NEXT: .local i32, i64
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: global.get __stack_pointer
@@ -1118,9 +1118,9 @@ entry:
ret i32 %conv6
}
-define i32 @utesth_f16i32_mm(half %x) {
-; CHECK-LABEL: utesth_f16i32_mm:
-; CHECK: .functype utesth_f16i32_mm (f32) -> (i32)
+define i32 @utest_f16i32_mm(half %x) {
+; CHECK-LABEL: utest_f16i32_mm:
+; CHECK: .functype utest_f16i32_mm (f32) -> (i32)
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
; CHECK-NEXT: call __truncsfhf2
@@ -1353,9 +1353,9 @@ entry:
ret i16 %conv6
}
-define i16 @utesth_f16i16_mm(half %x) {
-; CHECK-LABEL: utesth_f16i16_mm:
-; CHECK: .functype utesth_f16i16_mm (f32) -> (i32)
+define i16 @utest_f16i16_mm(half %x) {
+; CHECK-LABEL: utest_f16i16_mm:
+; CHECK: .functype utest_f16i16_mm (f32) -> (i32)
; CHECK-NEXT: .local i32
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 0
@@ -1637,9 +1637,9 @@ entry:
ret i64 %conv6
}
-define i64 @utesth_f16i64_mm(half %x) {
-; CHECK-LABEL: utesth_f16i64_mm:
-; CHECK: .functype utesth_f16i64_mm (f32) -> (i64)
+define i64 @utest_f16i64_mm(half %x) {
+; CHECK-LABEL: utest_f16i64_mm:
+; CHECK: .functype utest_f16i64_mm (f32) -> (i64)
; CHECK-NEXT: .local i32, i64, i64
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: global.get __stack_pointer
@@ -1724,9 +1724,9 @@ entry:
ret i64 %conv6
}
-define i64 @utesth_f16i64_mm_cse(half %x) {
-; CHECK-LABEL: utesth_f16i64_mm_cse:
-; CHECK: .functype utesth_f16i64_mm_cse (f32) -> (i64)
+define i64 @utest_f16i64_mm_cse(half %x) {
+; CHECK-LABEL: utest_f16i64_mm_cse:
+; CHECK: .functype utest_f16i64_mm_cse (f32) -> (i64)
; CHECK-NEXT: .local i32, i64
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: global.get __stack_pointer
@@ -1754,6 +1754,35 @@ entry:
ret i64 %conv6
}
+; i32 non saturate
+
+define i32 @ustest_f16i32_nsat(half %x) {
+; CHECK-LABEL: ustest_f16i32_nsat:
+; CHECK: .functype ustest_f16i32_nsat (f32) -> (i32)
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const 31
+; CHECK-NEXT: i32.shr_s
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.and
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: i32.gt_s
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: # fallthrough-return
+ %conv = fptosi half %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+ ret i32 %spec.store.select7
+}
+
declare i32 @llvm.smin.i32(i32, i32)
declare i32 @llvm.smax.i32(i32, i32)
declare i32 @llvm.umin.i32(i32, i32)
diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
index 7190e16..52f57dc 100644
--- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
@@ -209,9 +209,9 @@ entry:
ret <4 x i32> %conv6
}
-define <4 x i32> @utesth_f16i32(<4 x half> %x) {
-; CHECK-LABEL: utesth_f16i32:
-; CHECK: .functype utesth_f16i32 (f32, f32, f32, f32) -> (v128)
+define <4 x i32> @utest_f16i32(<4 x half> %x) {
+; CHECK-LABEL: utest_f16i32:
+; CHECK: .functype utest_f16i32 (f32, f32, f32, f32) -> (v128)
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 1
; CHECK-NEXT: call __truncsfhf2
@@ -513,9 +513,9 @@ entry:
ret <8 x i16> %conv6
}
-define <8 x i16> @utesth_f16i16(<8 x half> %x) {
-; CHECK-LABEL: utesth_f16i16:
-; CHECK: .functype utesth_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
+define <8 x i16> @utest_f16i16(<8 x half> %x) {
+; CHECK-LABEL: utest_f16i16:
+; CHECK: .functype utest_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
; CHECK-NEXT: .local v128
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 5
@@ -1295,9 +1295,9 @@ entry:
ret <2 x i64> %conv6
}
-define <2 x i64> @utesth_f16i64(<2 x half> %x) {
-; CHECK-LABEL: utesth_f16i64:
-; CHECK: .functype utesth_f16i64 (f32, f32) -> (v128)
+define <2 x i64> @utest_f16i64(<2 x half> %x) {
+; CHECK-LABEL: utest_f16i64:
+; CHECK: .functype utest_f16i64 (f32, f32) -> (v128)
; CHECK-NEXT: .local i32, i64, i64, i64, i64
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: global.get __stack_pointer
@@ -1649,9 +1649,9 @@ entry:
ret <4 x i32> %conv6
}
-define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
-; CHECK-LABEL: utesth_f16i32_mm:
-; CHECK: .functype utesth_f16i32_mm (f32, f32, f32, f32) -> (v128)
+define <4 x i32> @utest_f16i32_mm(<4 x half> %x) {
+; CHECK-LABEL: utest_f16i32_mm:
+; CHECK: .functype utest_f16i32_mm (f32, f32, f32, f32) -> (v128)
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 1
; CHECK-NEXT: call __truncsfhf2
@@ -1938,9 +1938,9 @@ entry:
ret <8 x i16> %conv6
}
-define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
-; CHECK-LABEL: utesth_f16i16_mm:
-; CHECK: .functype utesth_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
+define <8 x i16> @utest_f16i16_mm(<8 x half> %x) {
+; CHECK-LABEL: utest_f16i16_mm:
+; CHECK: .functype utest_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
; CHECK-NEXT: .local v128
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 5
@@ -2673,9 +2673,9 @@ entry:
ret <2 x i64> %conv6
}
-define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
-; CHECK-LABEL: utesth_f16i64_mm:
-; CHECK: .functype utesth_f16i64_mm (f32, f32) -> (v128)
+define <2 x i64> @utest_f16i64_mm(<2 x half> %x) {
+; CHECK-LABEL: utest_f16i64_mm:
+; CHECK: .functype utest_f16i64_mm (f32, f32) -> (v128)
; CHECK-NEXT: .local i32, i64, i64, i64, i64
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: global.get __stack_pointer
@@ -2810,6 +2810,48 @@ entry:
ret <2 x i64> %conv6
}
+; i32 non saturate
+
+define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) {
+; CHECK-LABEL: ustest_f16i32_nsat:
+; CHECK: .functype ustest_f16i32_nsat (f32, f32, f32, f32) -> (v128)
+; CHECK-NEXT: .local v128
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 1
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 2
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 3
+; CHECK-NEXT: v128.const 0, 0, 0, 0
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: i32x4.min_s
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i32x4.max_s
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i32>
+ %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv)
+ %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer)
+ ret <4 x i32> %spec.store.select7
+}
+
declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)
diff --git a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
index dec829f..44cf4e8 100644
--- a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
+++ b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
@@ -911,7 +911,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in.
; SSE2-NEXT: paddb (%rsi), %xmm0
; SSE2-NEXT: paddb 16(%rsi), %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: paddb (%rdx), %xmm0
; SSE2-NEXT: movdqa %xmm0, (%rcx)
@@ -1898,7 +1898,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
; SSE2-NEXT: paddb (%rsi), %xmm0
; SSE2-NEXT: paddb 32(%rsi), %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: movdqa %xmm0, %xmm3
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
@@ -4155,7 +4155,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
; SSE2-NEXT: paddb (%rsi), %xmm0
; SSE2-NEXT: paddb 48(%rsi), %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,0,0]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE2-NEXT: paddb (%rdx), %xmm2
diff --git a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
index 3d4cddb..89b5c33 100644
--- a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
+++ b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
@@ -769,7 +769,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in.
; SSE2-LABEL: vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0]
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: paddb (%rsi), %xmm1
; SSE2-NEXT: movdqa %xmm1, (%rdx)
@@ -1522,7 +1522,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
; SSE2-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0]
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3]
@@ -3335,7 +3335,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa (%rdi), %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,0,0]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE2-NEXT: paddb (%rsi), %xmm2
diff --git a/llvm/test/CodeGen/X86/fpclamptosat.ll b/llvm/test/CodeGen/X86/fpclamptosat.ll
index 3f5ec7b..67483be 100644
--- a/llvm/test/CodeGen/X86/fpclamptosat.ll
+++ b/llvm/test/CodeGen/X86/fpclamptosat.ll
@@ -161,8 +161,8 @@ entry:
ret i32 %conv6
}
-define i32 @utesth_f16i32(half %x) nounwind {
-; CHECK-LABEL: utesth_f16i32:
+define i32 @utest_f16i32(half %x) nounwind {
+; CHECK-LABEL: utest_f16i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq __extendhfsf2@PLT
@@ -360,8 +360,8 @@ entry:
ret i16 %conv6
}
-define i16 @utesth_f16i16(half %x) nounwind {
-; CHECK-LABEL: utesth_f16i16:
+define i16 @utest_f16i16(half %x) nounwind {
+; CHECK-LABEL: utest_f16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq __extendhfsf2@PLT
@@ -566,8 +566,8 @@ entry:
ret i64 %conv6
}
-define i64 @utesth_f16i64(half %x) nounwind {
-; CHECK-LABEL: utesth_f16i64:
+define i64 @utest_f16i64(half %x) nounwind {
+; CHECK-LABEL: utest_f16i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq __fixunshfti@PLT
@@ -762,8 +762,8 @@ entry:
ret i32 %conv6
}
-define i32 @utesth_f16i32_mm(half %x) nounwind {
-; CHECK-LABEL: utesth_f16i32_mm:
+define i32 @utest_f16i32_mm(half %x) nounwind {
+; CHECK-LABEL: utest_f16i32_mm:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq __extendhfsf2@PLT
@@ -946,8 +946,8 @@ entry:
ret i16 %conv6
}
-define i16 @utesth_f16i16_mm(half %x) nounwind {
-; CHECK-LABEL: utesth_f16i16_mm:
+define i16 @utest_f16i16_mm(half %x) nounwind {
+; CHECK-LABEL: utest_f16i16_mm:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq __extendhfsf2@PLT
@@ -1131,8 +1131,8 @@ entry:
ret i64 %conv6
}
-define i64 @utesth_f16i64_mm(half %x) nounwind {
-; CHECK-LABEL: utesth_f16i64_mm:
+define i64 @utest_f16i64_mm(half %x) nounwind {
+; CHECK-LABEL: utest_f16i64_mm:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq __fixunshfti@PLT
@@ -1170,6 +1170,27 @@ entry:
ret i64 %conv6
}
+; i32 non saturate
+
+define i32 @ustest_f16i32_nsat(half %x) nounwind {
+; CHECK-LABEL: ustest_f16i32_nsat:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: callq __extendhfsf2@PLT
+; CHECK-NEXT: cvttss2si %xmm0, %ecx
+; CHECK-NEXT: movl %ecx, %eax
+; CHECK-NEXT: sarl $31, %eax
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: andl %ecx, %eax
+; CHECK-NEXT: cmovlel %edx, %eax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: retq
+ %conv = fptosi half %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+ ret i32 %spec.store.select7
+}
+
declare i32 @llvm.smin.i32(i32, i32)
declare i32 @llvm.smax.i32(i32, i32)
declare i32 @llvm.umin.i32(i32, i32)
diff --git a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll
index 1a2cfd6..991ce33 100644
--- a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll
@@ -747,8 +747,8 @@ entry:
ret <4 x i32> %conv6
}
-define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind {
-; SSE-LABEL: utesth_f16i32:
+define <4 x i32> @utest_f16i32(<4 x half> %x) nounwind {
+; SSE-LABEL: utest_f16i32:
; SSE: # %bb.0: # %entry
; SSE-NEXT: subq $72, %rsp
; SSE-NEXT: movaps %xmm0, %xmm1
@@ -835,7 +835,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind {
; SSE-NEXT: addq $72, %rsp
; SSE-NEXT: retq
;
-; AVX2-LABEL: utesth_f16i32:
+; AVX2-LABEL: utest_f16i32:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm2
@@ -893,7 +893,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: utesth_f16i32:
+; AVX512-LABEL: utest_f16i32:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512-NEXT: vcvttps2uqq %ymm0, %zmm0
@@ -1338,8 +1338,8 @@ entry:
ret <8 x i16> %conv6
}
-define <8 x i16> @utesth_f16i16(<8 x half> %x) nounwind {
-; SSE-LABEL: utesth_f16i16:
+define <8 x i16> @utest_f16i16(<8 x half> %x) nounwind {
+; SSE-LABEL: utest_f16i16:
; SSE: # %bb.0: # %entry
; SSE-NEXT: subq $72, %rsp
; SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
@@ -1436,7 +1436,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) nounwind {
; SSE-NEXT: addq $72, %rsp
; SSE-NEXT: retq
;
-; AVX2-LABEL: utesth_f16i16:
+; AVX2-LABEL: utest_f16i16:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vcvtph2ps %xmm0, %ymm0
; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
@@ -1453,7 +1453,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) nounwind {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: utesth_f16i16:
+; AVX512-LABEL: utest_f16i16:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0
; AVX512-NEXT: vcvttps2udq %ymm0, %ymm0
@@ -2456,8 +2456,8 @@ entry:
ret <2 x i64> %conv6
}
-define <2 x i64> @utesth_f16i64(<2 x half> %x) nounwind {
-; SSE-LABEL: utesth_f16i64:
+define <2 x i64> @utest_f16i64(<2 x half> %x) nounwind {
+; SSE-LABEL: utest_f16i64:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %r14
; SSE-NEXT: pushq %rbx
@@ -2483,7 +2483,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) nounwind {
; SSE-NEXT: popq %r14
; SSE-NEXT: retq
;
-; AVX2-LABEL: utesth_f16i64:
+; AVX2-LABEL: utest_f16i64:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: pushq %r14
; AVX2-NEXT: pushq %rbx
@@ -2508,7 +2508,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) nounwind {
; AVX2-NEXT: popq %r14
; AVX2-NEXT: retq
;
-; AVX512-LABEL: utesth_f16i64:
+; AVX512-LABEL: utest_f16i64:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: pushq %r14
; AVX512-NEXT: pushq %rbx
@@ -3359,8 +3359,8 @@ entry:
ret <4 x i32> %conv6
}
-define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind {
-; SSE-LABEL: utesth_f16i32_mm:
+define <4 x i32> @utest_f16i32_mm(<4 x half> %x) nounwind {
+; SSE-LABEL: utest_f16i32_mm:
; SSE: # %bb.0: # %entry
; SSE-NEXT: subq $72, %rsp
; SSE-NEXT: movaps %xmm0, %xmm1
@@ -3447,7 +3447,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind {
; SSE-NEXT: addq $72, %rsp
; SSE-NEXT: retq
;
-; AVX2-LABEL: utesth_f16i32_mm:
+; AVX2-LABEL: utest_f16i32_mm:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vpsrlq $48, %xmm0, %xmm1
; AVX2-NEXT: vcvtph2ps %xmm1, %xmm2
@@ -3505,7 +3505,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: utesth_f16i32_mm:
+; AVX512-LABEL: utest_f16i32_mm:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512-NEXT: vcvttps2uqq %ymm0, %zmm0
@@ -3935,8 +3935,8 @@ entry:
ret <8 x i16> %conv6
}
-define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) nounwind {
-; SSE-LABEL: utesth_f16i16_mm:
+define <8 x i16> @utest_f16i16_mm(<8 x half> %x) nounwind {
+; SSE-LABEL: utest_f16i16_mm:
; SSE: # %bb.0: # %entry
; SSE-NEXT: subq $72, %rsp
; SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
@@ -4033,7 +4033,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) nounwind {
; SSE-NEXT: addq $72, %rsp
; SSE-NEXT: retq
;
-; AVX2-LABEL: utesth_f16i16_mm:
+; AVX2-LABEL: utest_f16i16_mm:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vcvtph2ps %xmm0, %ymm0
; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
@@ -4050,7 +4050,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) nounwind {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512-LABEL: utesth_f16i16_mm:
+; AVX512-LABEL: utest_f16i16_mm:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0
; AVX512-NEXT: vcvttps2udq %ymm0, %ymm0
@@ -4820,8 +4820,8 @@ entry:
ret <2 x i64> %conv6
}
-define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) nounwind {
-; SSE-LABEL: utesth_f16i64_mm:
+define <2 x i64> @utest_f16i64_mm(<2 x half> %x) nounwind {
+; SSE-LABEL: utest_f16i64_mm:
; SSE: # %bb.0: # %entry
; SSE-NEXT: pushq %r14
; SSE-NEXT: pushq %rbx
@@ -4847,7 +4847,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) nounwind {
; SSE-NEXT: popq %r14
; SSE-NEXT: retq
;
-; AVX2-LABEL: utesth_f16i64_mm:
+; AVX2-LABEL: utest_f16i64_mm:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: pushq %r14
; AVX2-NEXT: pushq %rbx
@@ -4872,7 +4872,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) nounwind {
; AVX2-NEXT: popq %r14
; AVX2-NEXT: retq
;
-; AVX512-LABEL: utesth_f16i64_mm:
+; AVX512-LABEL: utest_f16i64_mm:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: pushq %r14
; AVX512-NEXT: pushq %rbx
@@ -4974,6 +4974,63 @@ entry:
ret <2 x i64> %conv6
}
+; i32 non saturate
+
+define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) nounwind {
+; SSE-LABEL: ustest_f16i32_nsat:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: subq $72, %rsp
+; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psrld $16, %xmm1
+; SSE-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
+; SSE-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill
+; SSE-NEXT: psrlq $48, %xmm0
+; SSE-NEXT: callq __extendhfsf2@PLT
+; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; SSE-NEXT: callq __extendhfsf2@PLT
+; SSE-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; SSE-NEXT: cvttps2dq %xmm0, %xmm0
+; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: callq __extendhfsf2@PLT
+; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT: callq __extendhfsf2@PLT
+; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE-NEXT: cvttps2dq %xmm1, %xmm0
+; SSE-NEXT: punpcklqdq (%rsp), %xmm0 # 16-byte Folded Reload
+; SSE-NEXT: # xmm0 = xmm0[0],mem[0]
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: pxor %xmm2, %xmm2
+; SSE-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE-NEXT: pand %xmm0, %xmm2
+; SSE-NEXT: movdqa %xmm2, %xmm0
+; SSE-NEXT: pcmpgtd %xmm1, %xmm0
+; SSE-NEXT: pand %xmm2, %xmm0
+; SSE-NEXT: addq $72, %rsp
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ustest_f16i32_nsat:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i32>
+ %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv)
+ %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer)
+ ret <4 x i32> %spec.store.select7
+}
+
declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)
diff --git a/llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll b/llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll
index ecd9435..1766b4d 100644
--- a/llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll
+++ b/llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll
@@ -58,7 +58,7 @@ define <8 x float> @foo8(<8 x float> %v, ptr%p) nounwind {
define <4 x i32> @undef_splatmask(<4 x i32> %v) nounwind {
; AVX2-LABEL: undef_splatmask:
; AVX2: # %bb.0:
-; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,2,2]
; AVX2-NEXT: retq
%res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 2, i32 undef>
%res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
@@ -68,7 +68,7 @@ define <4 x i32> @undef_splatmask(<4 x i32> %v) nounwind {
define <4 x i32> @undef_splatmask2(<4 x i32> %v) nounwind {
; AVX2-LABEL: undef_splatmask2:
; AVX2: # %bb.0:
-; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,2,2]
; AVX2-NEXT: retq
%res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 undef>
%res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
@@ -78,7 +78,7 @@ define <4 x i32> @undef_splatmask2(<4 x i32> %v) nounwind {
define <4 x i32> @undef_splatmask3(<4 x i32> %v) nounwind {
; AVX2-LABEL: undef_splatmask3:
; AVX2: # %bb.0:
-; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,2,2,2]
; AVX2-NEXT: retq
%res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 2, i32 undef>
%res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 3>
@@ -88,7 +88,7 @@ define <4 x i32> @undef_splatmask3(<4 x i32> %v) nounwind {
define <4 x i32> @undef_splatmask4(<4 x i32> %v, ptr %p) nounwind {
; AVX2-LABEL: undef_splatmask4:
; AVX2: # %bb.0:
-; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,2,3,3]
+; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,2,2,2]
; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX2-NEXT: vmovaps %xmm0, (%rdi)
; AVX2-NEXT: vmovaps %xmm1, %xmm0
diff --git a/llvm/test/CodeGen/X86/vec-strict-cmp-128.ll b/llvm/test/CodeGen/X86/vec-strict-cmp-128.ll
index 209d6a5..93a692c 100644
--- a/llvm/test/CodeGen/X86/vec-strict-cmp-128.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-cmp-128.ll
@@ -1911,13 +1911,13 @@ define <2 x i64> @test_v2f64_ogt_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
; SSE-32-NEXT: movl $0, %edx
; SSE-32-NEXT: cmoval %ecx, %edx
; SSE-32-NEXT: movd %edx, %xmm3
-; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
; SSE-32-NEXT: ucomisd %xmm4, %xmm2
; SSE-32-NEXT: cmoval %ecx, %eax
; SSE-32-NEXT: movd %eax, %xmm2
-; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; SSE-32-NEXT: pand %xmm3, %xmm0
; SSE-32-NEXT: pandn %xmm1, %xmm3
@@ -2031,13 +2031,13 @@ define <2 x i64> @test_v2f64_oge_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
; SSE-32-NEXT: movl $0, %edx
; SSE-32-NEXT: cmovael %ecx, %edx
; SSE-32-NEXT: movd %edx, %xmm3
-; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
; SSE-32-NEXT: ucomisd %xmm4, %xmm2
; SSE-32-NEXT: cmovael %ecx, %eax
; SSE-32-NEXT: movd %eax, %xmm2
-; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; SSE-32-NEXT: pand %xmm3, %xmm0
; SSE-32-NEXT: pandn %xmm1, %xmm3
@@ -2151,13 +2151,13 @@ define <2 x i64> @test_v2f64_olt_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
; SSE-32-NEXT: movl $0, %edx
; SSE-32-NEXT: cmoval %ecx, %edx
; SSE-32-NEXT: movd %edx, %xmm3
-; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
; SSE-32-NEXT: ucomisd %xmm2, %xmm4
; SSE-32-NEXT: cmoval %ecx, %eax
; SSE-32-NEXT: movd %eax, %xmm2
-; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; SSE-32-NEXT: pand %xmm3, %xmm0
; SSE-32-NEXT: pandn %xmm1, %xmm3
@@ -2269,13 +2269,13 @@ define <2 x i64> @test_v2f64_ole_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
; SSE-32-NEXT: movl $0, %edx
; SSE-32-NEXT: cmovael %ecx, %edx
; SSE-32-NEXT: movd %edx, %xmm3
-; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
; SSE-32-NEXT: ucomisd %xmm2, %xmm4
; SSE-32-NEXT: cmovael %ecx, %eax
; SSE-32-NEXT: movd %eax, %xmm2
-; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; SSE-32-NEXT: pand %xmm3, %xmm0
; SSE-32-NEXT: pandn %xmm1, %xmm3
@@ -2680,13 +2680,13 @@ define <2 x i64> @test_v2f64_ugt_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
; SSE-32-NEXT: movl $0, %edx
; SSE-32-NEXT: cmovbl %ecx, %edx
; SSE-32-NEXT: movd %edx, %xmm3
-; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
; SSE-32-NEXT: ucomisd %xmm2, %xmm4
; SSE-32-NEXT: cmovbl %ecx, %eax
; SSE-32-NEXT: movd %eax, %xmm2
-; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; SSE-32-NEXT: pand %xmm3, %xmm0
; SSE-32-NEXT: pandn %xmm1, %xmm3
@@ -2798,13 +2798,13 @@ define <2 x i64> @test_v2f64_uge_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
; SSE-32-NEXT: movl $0, %edx
; SSE-32-NEXT: cmovbel %ecx, %edx
; SSE-32-NEXT: movd %edx, %xmm3
-; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
; SSE-32-NEXT: ucomisd %xmm2, %xmm4
; SSE-32-NEXT: cmovbel %ecx, %eax
; SSE-32-NEXT: movd %eax, %xmm2
-; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; SSE-32-NEXT: pand %xmm3, %xmm0
; SSE-32-NEXT: pandn %xmm1, %xmm3
@@ -2916,13 +2916,13 @@ define <2 x i64> @test_v2f64_ult_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
; SSE-32-NEXT: movl $0, %edx
; SSE-32-NEXT: cmovbl %ecx, %edx
; SSE-32-NEXT: movd %edx, %xmm3
-; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
; SSE-32-NEXT: ucomisd %xmm4, %xmm2
; SSE-32-NEXT: cmovbl %ecx, %eax
; SSE-32-NEXT: movd %eax, %xmm2
-; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; SSE-32-NEXT: pand %xmm3, %xmm0
; SSE-32-NEXT: pandn %xmm1, %xmm3
@@ -3036,13 +3036,13 @@ define <2 x i64> @test_v2f64_ule_q(<2 x i64> %a, <2 x i64> %b, <2 x double> %f1,
; SSE-32-NEXT: movl $0, %edx
; SSE-32-NEXT: cmovbel %ecx, %edx
; SSE-32-NEXT: movd %edx, %xmm3
-; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
; SSE-32-NEXT: ucomisd %xmm4, %xmm2
; SSE-32-NEXT: cmovbel %ecx, %eax
; SSE-32-NEXT: movd %eax, %xmm2
-; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE-32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; SSE-32-NEXT: pand %xmm3, %xmm0
; SSE-32-NEXT: pandn %xmm1, %xmm3
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll
index 9ecc629..b378dce 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll
@@ -162,7 +162,7 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
; SSE2-LABEL: splatvar_funnnel_v2i32:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; SSE2-NEXT: pslld $23, %xmm1
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -182,7 +182,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
;
; SSE41-LABEL: splatvar_funnnel_v2i32:
; SSE41: # %bb.0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; SSE41-NEXT: pslld $23, %xmm1
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
@@ -200,7 +200,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
;
; AVX1-LABEL: splatvar_funnnel_v2i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
@@ -277,7 +277,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
;
; XOPAVX1-LABEL: splatvar_funnnel_v2i32:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0
; XOPAVX1-NEXT: retq
;
@@ -289,7 +289,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
;
; X86-SSE2-LABEL: splatvar_funnnel_v2i32:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; X86-SSE2-NEXT: pslld $23, %xmm1
; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
diff --git a/llvm/test/CodeGen/X86/vector-fshl-sub128.ll b/llvm/test/CodeGen/X86/vector-fshl-sub128.ll
index 322ebe2..06ff7e7 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-sub128.ll
@@ -250,7 +250,7 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt)
define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt) nounwind {
; SSE2-LABEL: splatvar_funnnel_v2i32:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31]
; SSE2-NEXT: movdqa %xmm3, %xmm5
; SSE2-NEXT: pandn %xmm4, %xmm5
@@ -286,7 +286,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %
;
; SSE41-LABEL: splatvar_funnnel_v2i32:
; SSE41: # %bb.0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE41-NEXT: pmovsxbd {{.*#+}} xmm3 = [31,31,31,31]
; SSE41-NEXT: movdqa %xmm2, %xmm4
; SSE41-NEXT: pandn %xmm3, %xmm4
@@ -316,7 +316,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %
;
; AVX1-LABEL: splatvar_funnnel_v2i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31]
; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vpsrldq {{.*#+}} xmm5 = xmm4[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
@@ -423,7 +423,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %
;
; XOPAVX1-LABEL: splatvar_funnnel_v2i32:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31]
; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpshld %xmm4, %xmm0, %xmm0
@@ -450,7 +450,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %
;
; X86-SSE2-LABEL: splatvar_funnnel_v2i32:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1]
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31]
; X86-SSE2-NEXT: movdqa %xmm3, %xmm5
; X86-SSE2-NEXT: pandn %xmm4, %xmm5
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll
index 178c02f..ef5ffe4 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll
@@ -172,7 +172,7 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
; SSE2-LABEL: splatvar_funnnel_v2i32:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: psubd %xmm1, %xmm2
; SSE2-NEXT: pslld $23, %xmm2
@@ -194,7 +194,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
;
; SSE41-LABEL: splatvar_funnnel_v2i32:
; SSE41: # %bb.0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; SSE41-NEXT: pxor %xmm2, %xmm2
; SSE41-NEXT: psubd %xmm1, %xmm2
; SSE41-NEXT: pslld $23, %xmm2
@@ -214,7 +214,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
;
; AVX1-LABEL: splatvar_funnnel_v2i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
@@ -293,7 +293,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
;
; XOPAVX1-LABEL: splatvar_funnnel_v2i32:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; XOPAVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1
; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0
@@ -309,7 +309,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind
;
; X86-SSE2-LABEL: splatvar_funnnel_v2i32:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; X86-SSE2-NEXT: pxor %xmm2, %xmm2
; X86-SSE2-NEXT: psubd %xmm1, %xmm2
; X86-SSE2-NEXT: pslld $23, %xmm2
diff --git a/llvm/test/CodeGen/X86/vector-fshr-sub128.ll b/llvm/test/CodeGen/X86/vector-fshr-sub128.ll
index 372deb05..2d8670a 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-sub128.ll
@@ -251,7 +251,7 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt)
define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %amt) nounwind {
; SSE2-LABEL: splatvar_funnnel_v2i32:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31]
; SSE2-NEXT: movdqa %xmm3, %xmm5
; SSE2-NEXT: pand %xmm4, %xmm5
@@ -287,7 +287,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %
;
; SSE41-LABEL: splatvar_funnnel_v2i32:
; SSE41: # %bb.0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE41-NEXT: pmovsxbd {{.*#+}} xmm3 = [31,31,31,31]
; SSE41-NEXT: movdqa %xmm2, %xmm4
; SSE41-NEXT: pand %xmm3, %xmm4
@@ -317,7 +317,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %
;
; AVX1-LABEL: splatvar_funnnel_v2i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31]
; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vpsrldq {{.*#+}} xmm5 = xmm4[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
@@ -425,7 +425,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %
;
; XOPAVX1-LABEL: splatvar_funnnel_v2i32:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
+; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [31,31,31,31]
; XOPAVX1-NEXT: vpandn %xmm3, %xmm2, %xmm4
; XOPAVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0
@@ -452,7 +452,7 @@ define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %
;
; X86-SSE2-LABEL: splatvar_funnnel_v2i32:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1]
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [31,31,31,31]
; X86-SSE2-NEXT: movdqa %xmm3, %xmm5
; X86-SSE2-NEXT: pand %xmm4, %xmm5
diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll
index f57efb4..1e11ea9 100644
--- a/llvm/test/CodeGen/X86/vector-sext.ll
+++ b/llvm/test/CodeGen/X86/vector-sext.ll
@@ -1409,11 +1409,11 @@ define <2 x i64> @load_sext_2i1_to_2i64(ptr%ptr) {
; X86-SSE2-NEXT: movzbl %al, %eax
; X86-SSE2-NEXT: negl %eax
; X86-SSE2-NEXT: movd %eax, %xmm0
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
; X86-SSE2-NEXT: andl $1, %ecx
; X86-SSE2-NEXT: negl %ecx
; X86-SSE2-NEXT: movd %ecx, %xmm0
-; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X86-SSE2-NEXT: retl
;
diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll
index bd1a48b..7b0f1c9 100644
--- a/llvm/test/CodeGen/X86/vector-zext.ll
+++ b/llvm/test/CodeGen/X86/vector-zext.ll
@@ -2555,7 +2555,7 @@ entry:
define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) {
; SSE2-LABEL: splatshuf_zext_v4i64:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movdqa %xmm0, %xmm1
@@ -2563,7 +2563,7 @@ define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) {
;
; SSSE3-LABEL: splatshuf_zext_v4i64:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSSE3-NEXT: pxor %xmm1, %xmm1
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: movdqa %xmm0, %xmm1
@@ -2571,7 +2571,7 @@ define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) {
;
; SSE41-LABEL: splatshuf_zext_v4i64:
; SSE41: # %bb.0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
index 19a31a6..31ed745 100644
--- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
+++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
@@ -911,7 +911,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in.
; SSE2-NEXT: paddb (%rsi), %xmm0
; SSE2-NEXT: paddb 16(%rsi), %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: paddb (%rdx), %xmm0
; SSE2-NEXT: movdqa %xmm0, (%rcx)
@@ -1898,7 +1898,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
; SSE2-NEXT: paddb (%rsi), %xmm0
; SSE2-NEXT: paddb 32(%rsi), %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: movdqa %xmm0, %xmm3
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
@@ -4610,7 +4610,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
; SSE2-NEXT: paddb (%rsi), %xmm0
; SSE2-NEXT: paddb 48(%rsi), %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-NEXT: pxor %xmm1, %xmm1
@@ -6544,7 +6544,7 @@ define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in.
; SSE2-NEXT: movdqa (%rdi), %xmm0
; SSE2-NEXT: paddb (%rsi), %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movdqa 16(%rdx), %xmm1
; SSE2-NEXT: paddb %xmm0, %xmm1
diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
index 239472c..5b4cdd2 100644
--- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
+++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
@@ -769,7 +769,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in.
; SSE2-LABEL: vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0]
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: paddb (%rsi), %xmm1
; SSE2-NEXT: movdqa %xmm1, (%rdx)
@@ -1522,7 +1522,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
; SSE2-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0]
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3]
@@ -3660,7 +3660,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
; SSE2-LABEL: vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = mem[0,0,0,0]
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; SSE2-NEXT: pxor %xmm0, %xmm0
@@ -5250,7 +5250,7 @@ define void @vec512_i16_widen_to_i256_factor16_broadcast_to_v2i256_factor2(ptr %
define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in.elt.ptr, ptr %out.vec.bias.ptr, ptr %out.vec.ptr) nounwind {
; SSE2-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,1,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0]
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movdqa 16(%rsi), %xmm1
diff --git a/llvm/test/ThinLTO/X86/memprof-dups.ll b/llvm/test/ThinLTO/X86/memprof-dups.ll
new file mode 100644
index 0000000..8accc83
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/memprof-dups.ll
@@ -0,0 +1,138 @@
+;; Check that duplicate spurious duplicate (identical) clones are simply
+;; created as aliases to the first identical copy, rather than creating
+;; multiple clones that call the same callee clones or have the same
+;; allocation types. This currently happens in some cases due to additional
+;; cloning performed during function assignment.
+;;
+;; The ThinLTO combined summary was manually modified as described there
+;; to force multiple identical copies of various functions.
+
+;; -stats requires asserts
+; REQUIRES: asserts
+
+; RUN: rm -rf %t && split-file %s %t && cd %t
+; RUN: llvm-as src.ll -o src.o
+; RUN: llvm-as src.o.thinlto.ll -o src.o.thinlto.bc
+; RUN: opt -passes=memprof-context-disambiguation -stats \
+; RUN: -memprof-import-summary=src.o.thinlto.bc \
+; RUN: -pass-remarks=memprof-context-disambiguation \
+; RUN: src.o -S 2>&1 | FileCheck %s
+
+; CHECK: created clone bar.memprof.1
+;; Duplicates of bar are created as declarations since bar is available_externally,
+;; and the compiler does not well support available_externally aliases.
+; CHECK: created clone decl bar.memprof.2
+; CHECK: created clone decl bar.memprof.3
+; CHECK: created clone _Z3foov.memprof.1
+;; Duplicates of _Z3foov are created as aliases to the appropriate materialized
+;; clone of _Z3foov.
+; CHECK: created clone alias _Z3foov.memprof.2
+; CHECK: created clone alias _Z3foov.memprof.3
+
+;--- src.ll
+source_filename = "memprof-distrib-alias.ll"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@_Z8fooAliasv = alias ptr (...), ptr @_Z3foov
+
+;; Original alias is unchanged.
+; CHECK: @_Z8fooAliasv = alias ptr (...), ptr @_Z3foov{{$}}
+;; We create an equivalent alias for the cloned def @_Z3foov.memprof.1.
+; CHECK: @_Z8fooAliasv.memprof.1 = alias ptr (...), ptr @_Z3foov.memprof.1
+
+;; We should also create aliases for the duplicate clones of _Z3foov
+;; (_Z3foov.memprof.2 and _Z3foov.memprof.3) to the versions they are duplicates
+;; of, and ditto for the associated @_Z8fooAliasv clones.
+;;
+;; _Z3foov.memprof.2 is a duplicate of original _Z3foov, and thus so is _Z8fooAliasv.memprof.2
+; CHECK: @_Z3foov.memprof.2 = alias ptr (), ptr @_Z3foov{{$}}
+; CHECK: @_Z8fooAliasv.memprof.2 = alias ptr (...), ptr @_Z3foov{{$}}
+;; _Z3foov.memprof.3 is a duplicate of _Z3foov.memprof.1, and thus so is _Z8fooAliasv.memprof.3
+; CHECK: @_Z3foov.memprof.3 = alias ptr (), ptr @_Z3foov.memprof.1
+; CHECK: @_Z8fooAliasv.memprof.3 = alias ptr (...), ptr @_Z3foov.memprof.1
+
+; CHECK-LABEL: define i32 @main()
+define i32 @main() #0 {
+entry:
+ ;; The first call to bar does not allocate cold memory. It should call
+ ;; the original function, which eventually calls the original allocation
+ ;; decorated with a "notcold" attribute.
+ ; CHECK: call {{.*}} @bar()
+ %call = call ptr @bar(), !callsite !0
+ ;; The second call to bar allocates cold memory. It should call the cloned
+ ;; function which eventually calls a cloned allocation decorated with a
+ ;; "cold" attribute.
+ ; CHECK: call {{.*}} @bar.memprof.1()
+ %call1 = call ptr @bar(), !callsite !1
+ ret i32 0
+}
+
+; CHECK-LABEL: define available_externally i32 @bar()
+define available_externally i32 @bar() #0 {
+entry:
+ ; CHECK: call {{.*}} @_Z8fooAliasv()
+ %call = call ptr @_Z8fooAliasv(), !callsite !8
+ ret i32 0
+}
+
+declare ptr @_Znam(i64)
+
+; CHECK-LABEL: define ptr @_Z3foov()
+define ptr @_Z3foov() #0 {
+entry:
+ ; CHECK: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
+ %call = call ptr @_Znam(i64 0), !memprof !2, !callsite !7
+ ret ptr null
+}
+
+; We create actual clone for bar.memprof.1.
+; CHECK: define available_externally i32 @bar.memprof.1()
+; CHECK: call {{.*}} @_Z3foov.memprof.1()
+
+;; bar.memprof.2 and bar.memprof.3 are duplicates (of original bar and
+;; bar.memprof.1, respectively). However, they are available externally,
+;; so rather than create an alias we simply create a declaration, since the
+;; compiler does not fully support available_externally aliases.
+; CHECK: declare i32 @bar.memprof.2
+; CHECK: declare i32 @bar.memprof.3
+
+; We create actual clone for foo.memprof.1.
+; CHECK: define {{.*}} @_Z3foov.memprof.1()
+; CHECK: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
+
+; CHECK: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
+; CHECK: attributes #[[COLD]] = { "memprof"="cold" }
+
+; CHECK: 4 memprof-context-disambiguation - Number of function clone duplicates detected during ThinLTO backend
+; CHECK: 2 memprof-context-disambiguation - Number of function clones created during ThinLTO backend
+
+attributes #0 = { noinline optnone }
+
+!0 = !{i64 8632435727821051414}
+!1 = !{i64 -3421689549917153178}
+!2 = !{!3, !5}
+!3 = !{!4, !"notcold"}
+!4 = !{i64 9086428284934609951, i64 1234, i64 8632435727821051414}
+!5 = !{!6, !"cold"}
+!6 = !{i64 9086428284934609951, i64 1234, i64 -3421689549917153178}
+!7 = !{i64 9086428284934609951}
+!8 = !{i64 1234}
+
+;--- src.o.thinlto.ll
+; ModuleID = 'src.o.thinlto.ll'
+source_filename = "src.o.thinlto.bc"
+
+^0 = module: (path: "src.o", hash: (1720506022, 1575514144, 2506794664, 3599359797, 3160884478))
+^1 = gv: (guid: 6583049656999245004, summaries: (alias: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), aliasee: ^2)))
+;; Summary for _Z3foov, where the allocs part has been manually modified to add
+;; two additional clones that are the same as the prior versions:
+;; ... allocs: ((versions: (notcold, cold, notcold, cold), ...
+^2 = gv: (guid: 9191153033785521275, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (notcold, cold, notcold, cold), memProf: ((type: notcold, stackIds: (1234, 8632435727821051414)), (type: cold, stackIds: (1234, 15025054523792398438))))))))
+^3 = gv: (guid: 15822663052811949562, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 3, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^4)), callsites: ((callee: ^4, clones: (0), stackIds: (8632435727821051414)), (callee: ^4, clones: (1), stackIds: (15025054523792398438))))))
+;; Summary for bar, where the callsites part has been manually modified to add
+;; two additional clones that are the same as the prior clones:
+;; ... callsites: ((callee: ^1, clones: (0, 1, 0, 1), ...
+^4 = gv: (guid: 16434608426314478903, summaries: (function: (module: ^0, flags: (linkage: available_externally, visibility: default, notEligibleToImport: 0, live: 1, dsoLocal: 1, canAutoHide: 0, importType: definition), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^1)), callsites: ((callee: ^1, clones: (0, 1, 0, 1), stackIds: (1234))))))
+^6 = flags: 353
+^7 = blockcount: 0
diff --git a/llvm/test/ThinLTO/X86/memprof_imported_internal.ll b/llvm/test/ThinLTO/X86/memprof_imported_internal.ll
index a6e254c..09784f8 100644
--- a/llvm/test/ThinLTO/X86/memprof_imported_internal.ll
+++ b/llvm/test/ThinLTO/X86/memprof_imported_internal.ll
@@ -63,14 +63,14 @@
; CHECK: tail call void @_ZL9internal1v.llvm.3267420853450984672()
; CHECK: tail call void @_ZL9internal2v.llvm.3267420853450984672.memprof.1()
; CHECK-LABEL: declare void @_ZL9internal2v.llvm.3267420853450984672.memprof.1()
-;; We should have 2 clones of src2.cc's internal1 function, calling a single
-;; clone of external2.
+;; We should have one clone of src2.cc's internal1 function, calling a single
+;; clone of external2, and a second clone that was detected to be a duplicate
+;; of the first that becomes a declaration (since this is available_externally -
+;; in the module with the prevailing copy it would be an alias to clone 1).
; CHECK-LABEL: define available_externally void @_ZL9internal1v.llvm.3267420853450984672.memprof.1()
; CHECK: tail call void @_Z9external2v.memprof.1()
; CHECK: tail call void @_Z9external2v.memprof.1()
-; CHECK-LABEL: define available_externally void @_ZL9internal1v.llvm.3267420853450984672.memprof.2()
-; CHECK: tail call void @_Z9external2v.memprof.1()
-; CHECK: tail call void @_Z9external2v.memprof.1()
+; CHECK: declare void @_ZL9internal1v.llvm.3267420853450984672.memprof.2()
; CHECK-NOT: memprof
;--- src1.ll
diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
index 8f76834..67ab167 100644
--- a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
@@ -16,6 +16,14 @@ define <2 x double> @load_zeromask(ptr %ptr, <2 x double> %passthru) {
ret <2 x double> %res
}
+define <2 x double> @load_zero_withpoison_mask(ptr %ptr, <2 x double> %passthru) {
+; CHECK-LABEL: @load_zero_withpoison_mask(
+; CHECK-NEXT: ret <2 x double> [[PASSTHRU:%.*]]
+;
+ %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %ptr, i32 1, <2 x i1> <i1 0, i1 poison>, <2 x double> %passthru)
+ ret <2 x double> %res
+}
+
define <2 x double> @load_onemask(ptr %ptr, <2 x double> %passthru) {
; CHECK-LABEL: @load_onemask(
; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, ptr [[PTR:%.*]], align 2
@@ -150,6 +158,14 @@ define void @store_zeromask(ptr %ptr, <2 x double> %val) {
ret void
}
+define void @store_poisonmask(ptr %ptr, <2 x double> %val) {
+; CHECK-LABEL: @store_poisonmask(
+; CHECK-NEXT: ret void
+;
+ call void @llvm.masked.store.v2f64.p0(<2 x double> %val, ptr %ptr, i32 4, <2 x i1> splat(i1 poison))
+ ret void
+}
+
define void @store_onemask(ptr %ptr, <2 x double> %val) {
; CHECK-LABEL: @store_onemask(
; CHECK-NEXT: store <2 x double> [[VAL:%.*]], ptr [[PTR:%.*]], align 4
@@ -159,6 +175,15 @@ define void @store_onemask(ptr %ptr, <2 x double> %val) {
ret void
}
+define void @store_one_withpoison_mask(ptr %ptr, <2 x double> %val) {
+; CHECK-LABEL: @store_one_withpoison_mask(
+; CHECK-NEXT: store <2 x double> [[VAL:%.*]], ptr [[PTR:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+ call void @llvm.masked.store.v2f64.p0(<2 x double> %val, ptr %ptr, i32 4, <2 x i1> <i1 1, i1 poison>)
+ ret void
+}
+
define void @store_demandedelts(ptr %ptr, double %val) {
; CHECK-LABEL: @store_demandedelts(
; CHECK-NEXT: [[VALVEC1:%.*]] = insertelement <2 x double> poison, double [[VAL:%.*]], i64 0
@@ -189,6 +214,13 @@ define <2 x double> @gather_zeromask(<2 x ptr> %ptrs, <2 x double> %passthru) {
ret <2 x double> %res
}
+define <2 x double> @gather_zero_withpoison_mask(<2 x ptr> %ptrs, <2 x double> %passthru) {
+; CHECK-LABEL: @gather_zero_withpoison_mask(
+; CHECK-NEXT: ret <2 x double> [[PASSTHRU:%.*]]
+;
+ %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> <i1 0, i1 poison>, <2 x double> %passthru)
+ ret <2 x double> %res
+}
define <2 x double> @gather_onemask(<2 x ptr> %ptrs, <2 x double> %passthru) {
; CHECK-LABEL: @gather_onemask(
@@ -199,6 +231,15 @@ define <2 x double> @gather_onemask(<2 x ptr> %ptrs, <2 x double> %passthru) {
ret <2 x double> %res
}
+define <2 x double> @gather_one_withpoisonmask(<2 x ptr> %ptrs, <2 x double> %passthru) {
+; CHECK-LABEL: @gather_one_withpoisonmask(
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[PTRS:%.*]], i32 4, <2 x i1> <i1 true, i1 poison>, <2 x double> [[PASSTHRU:%.*]])
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> <i1 true, i1 poison>, <2 x double> %passthru)
+ ret <2 x double> %res
+}
+
define <4 x double> @gather_lane2(ptr %base, double %pt) {
; CHECK-LABEL: @gather_lane2(
; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, ptr [[BASE:%.*]], <4 x i64> <i64 poison, i64 poison, i64 2, i64 poison>
@@ -257,6 +298,23 @@ define void @scatter_zeromask(<2 x ptr> %ptrs, <2 x double> %val) {
ret void
}
+define void @scatter_zero_withpoison_mask(<2 x ptr> %ptrs, <2 x double> %val) {
+; CHECK-LABEL: @scatter_zero_withpoison_mask(
+; CHECK-NEXT: ret void
+;
+ call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> %val, <2 x ptr> %ptrs, i32 8, <2 x i1> <i1 0, i1 poison>)
+ ret void
+}
+
+define void @scatter_one_withpoison_mask(<2 x ptr> %ptrs, <2 x double> %val) {
+; CHECK-LABEL: @scatter_one_withpoison_mask(
+; CHECK-NEXT: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> [[VAL:%.*]], <2 x ptr> [[PTRS:%.*]], i32 8, <2 x i1> <i1 true, i1 poison>)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> %val, <2 x ptr> %ptrs, i32 8, <2 x i1> <i1 1, i1 poison>)
+ ret void
+}
+
define void @scatter_demandedelts(ptr %ptr, double %val) {
; CHECK-LABEL: @scatter_demandedelts(
; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, ptr [[PTR:%.*]], <2 x i64> <i64 0, i64 poison>
diff --git a/llvm/test/Transforms/InstCombine/pr83947.ll b/llvm/test/Transforms/InstCombine/pr83947.ll
index 1906502..679230a4 100644
--- a/llvm/test/Transforms/InstCombine/pr83947.ll
+++ b/llvm/test/Transforms/InstCombine/pr83947.ll
@@ -24,7 +24,6 @@ define void @masked_scatter2() {
define void @masked_scatter3() {
; CHECK-LABEL: define void @masked_scatter3() {
-; CHECK-NEXT: store i32 0, ptr @c, align 4
; CHECK-NEXT: ret void
;
call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> zeroinitializer, <2 x ptr> splat (ptr @c), i32 4, <2 x i1> undef)
@@ -50,7 +49,6 @@ define void @masked_scatter5() {
define void @masked_scatter6() {
; CHECK-LABEL: define void @masked_scatter6() {
-; CHECK-NEXT: store i32 0, ptr @c, align 4
; CHECK-NEXT: ret void
;
call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> zeroinitializer, <2 x ptr> splat (ptr @c), i32 4, <2 x i1> <i1 undef, i1 false>)
diff --git a/llvm/test/Transforms/InstCombine/select-and-cmp.ll b/llvm/test/Transforms/InstCombine/select-and-cmp.ll
index 50e1493..26c04ad 100644
--- a/llvm/test/Transforms/InstCombine/select-and-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/select-and-cmp.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
define i32 @select_and_icmp(i32 %x, i32 %y, i32 %z) {
@@ -114,34 +114,34 @@ define i32 @select_and_icmp_inv(i32 %x, i32 %y, i32 %z) {
; Below used to be negative tests in InstSimplify, but are no more negative cases here
-define i32 @select_and_icmp_pred_bad_1(i32 %x, i32 %y, i32 %z) {
+define i32 @select_and_icmp_pred_bad_1(i32 %x, i32 %y, i32 %z) !prof !0 {
; CHECK-LABEL: @select_and_icmp_pred_bad_1(
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 [[X:%.*]]
;
%A = icmp eq i32 %x, %z
%B = icmp ne i32 %y, %z
%C = and i1 %A, %B
- %D = select i1 %C, i32 %z, i32 %x
+ %D = select i1 %C, i32 %z, i32 %x, !prof !1
ret i32 %D
}
-define i32 @select_and_icmp_pred_bad_2(i32 %x, i32 %y, i32 %z) {
+define i32 @select_and_icmp_pred_bad_2(i32 %x, i32 %y, i32 %z) !prof !0 {
; CHECK-LABEL: @select_and_icmp_pred_bad_2(
; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]]
-; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[Z]], i32 [[X:%.*]], !prof [[PROF1:![0-9]+]]
; CHECK-NEXT: ret i32 [[D]]
;
%A = icmp ne i32 %x, %z
%B = icmp eq i32 %y, %z
%C = and i1 %A, %B
- %D = select i1 %C, i32 %z, i32 %x
+ %D = select i1 %C, i32 %z, i32 %x, !prof !1
ret i32 %D
}
define i32 @select_and_icmp_pred_bad_3(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @select_and_icmp_pred_bad_3(
-; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
-; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[X:%.*]], i32 [[Z]]
; CHECK-NEXT: ret i32 [[D]]
;
%A = icmp ne i32 %x, %z
@@ -153,8 +153,8 @@ define i32 @select_and_icmp_pred_bad_3(i32 %x, i32 %y, i32 %z) {
define i32 @select_and_icmp_pred_bad_4(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @select_and_icmp_pred_bad_4(
-; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
-; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[Z]], i32 [[X:%.*]]
; CHECK-NEXT: ret i32 [[D]]
;
%A = icmp eq i32 %x, %z
@@ -166,7 +166,7 @@ define i32 @select_and_icmp_pred_bad_4(i32 %x, i32 %y, i32 %z) {
define i32 @select_and_icmp_alt_bad_1(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @select_and_icmp_alt_bad_1(
-; CHECK-NEXT: ret i32 [[Z]]
+; CHECK-NEXT: ret i32 [[Z:%.*]]
;
%A = icmp eq i32 %x, %z
%B = icmp ne i32 %y, %z
@@ -177,8 +177,8 @@ define i32 @select_and_icmp_alt_bad_1(i32 %x, i32 %y, i32 %z) {
define i32 @select_and_icmp_alt_bad_2(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @select_and_icmp_alt_bad_2(
-; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
-; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[X:%.*]], i32 [[Z]]
; CHECK-NEXT: ret i32 [[D]]
;
%A = icmp ne i32 %x, %z
@@ -191,8 +191,8 @@ define i32 @select_and_icmp_alt_bad_2(i32 %x, i32 %y, i32 %z) {
define i32 @select_and_icmp_alt_bad_3(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @select_and_icmp_alt_bad_3(
-; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
-; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[Z]], i32 [[X:%.*]]
; CHECK-NEXT: ret i32 [[D]]
;
%A = icmp ne i32 %x, %z
@@ -204,8 +204,8 @@ define i32 @select_and_icmp_alt_bad_3(i32 %x, i32 %y, i32 %z) {
define i32 @select_and_icmp_alt_bad_4(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @select_and_icmp_alt_bad_4(
-; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
-; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[X:%.*]], i32 [[Z]]
; CHECK-NEXT: ret i32 [[D]]
;
%A = icmp eq i32 %x, %z
@@ -322,3 +322,11 @@ define i32 @select_and_icmp_alt_bad_false_val(i32 %x, i32 %y, i32 %z, i32 %k) {
%D = select i1 %C, i32 %x, i32 %k
ret i32 %D
}
+
+!0 = !{!"function_entry_count", i64 1000}
+!1 = !{!"branch_weights", i32 2, i32 3}
+
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 2, i32 3}
+;.
diff --git a/llvm/test/Transforms/InstCombine/select-or-cmp.ll b/llvm/test/Transforms/InstCombine/select-or-cmp.ll
index 72a3747..82b069b 100644
--- a/llvm/test/Transforms/InstCombine/select-or-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/select-or-cmp.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
define i32 @select_or_icmp(i32 %x, i32 %y, i32 %z) {
@@ -114,47 +114,47 @@ define i32 @select_or_icmp_inv(i32 %x, i32 %y, i32 %z) {
; Below used to be negative tests in InstSimplify, but are no more negative cases here
-define i32 @select_and_icmp_pred_bad_1(i32 %x, i32 %y, i32 %z) {
+define i32 @select_and_icmp_pred_bad_1(i32 %x, i32 %y, i32 %z) !prof !0 {
; CHECK-LABEL: @select_and_icmp_pred_bad_1(
-; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
-; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[X:%.*]], i32 [[Z]], !prof [[PROF1:![0-9]+]]
; CHECK-NEXT: ret i32 [[D]]
;
%A = icmp eq i32 %x, %z
%B = icmp ne i32 %y, %z
%C = or i1 %A, %B
- %D = select i1 %C, i32 %z, i32 %x
+ %D = select i1 %C, i32 %z, i32 %x, !prof !1
ret i32 %D
}
-define i32 @select_and_icmp_pred_bad_2(i32 %x, i32 %y, i32 %z) {
+define i32 @select_and_icmp_pred_bad_2(i32 %x, i32 %y, i32 %z) !prof !0 {
; CHECK-LABEL: @select_and_icmp_pred_bad_2(
-; CHECK-NEXT: ret i32 [[Z]]
+; CHECK-NEXT: ret i32 [[Z:%.*]]
;
%A = icmp ne i32 %x, %z
%B = icmp eq i32 %y, %z
%C = or i1 %A, %B
- %D = select i1 %C, i32 %z, i32 %x
+ %D = select i1 %C, i32 %z, i32 %x, !prof !1
ret i32 %D
}
-define i32 @select_and_icmp_pred_bad_3(i32 %x, i32 %y, i32 %z) {
+define i32 @select_and_icmp_pred_bad_3(i32 %x, i32 %y, i32 %z) !prof !0 {
; CHECK-LABEL: @select_and_icmp_pred_bad_3(
-; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
-; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[Z]], i32 [[X:%.*]], !prof [[PROF2:![0-9]+]]
; CHECK-NEXT: ret i32 [[D]]
;
%A = icmp eq i32 %x, %z
%B = icmp eq i32 %y, %z
%C = or i1 %A, %B
- %D = select i1 %C, i32 %z, i32 %x
+ %D = select i1 %C, i32 %z, i32 %x, !prof !1
ret i32 %D
}
define i32 @select_and_icmp_pred_bad_4(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @select_and_icmp_pred_bad_4(
-; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
-; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[X:%.*]], i32 [[Z]]
; CHECK-NEXT: ret i32 [[D]]
;
%A = icmp ne i32 %x, %z
@@ -166,8 +166,8 @@ define i32 @select_and_icmp_pred_bad_4(i32 %x, i32 %y, i32 %z) {
define i32 @select_or_icmp_alt_bad_1(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @select_or_icmp_alt_bad_1(
-; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
-; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[Z]], i32 [[X:%.*]]
; CHECK-NEXT: ret i32 [[D]]
;
%A = icmp eq i32 %x, %z
@@ -179,7 +179,7 @@ define i32 @select_or_icmp_alt_bad_1(i32 %x, i32 %y, i32 %z) {
define i32 @select_or_icmp_alt_bad_2(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @select_or_icmp_alt_bad_2(
-; CHECK-NEXT: ret i32 [[X]]
+; CHECK-NEXT: ret i32 [[X:%.*]]
;
%A = icmp ne i32 %x, %z
%B = icmp eq i32 %y, %z
@@ -190,8 +190,8 @@ define i32 @select_or_icmp_alt_bad_2(i32 %x, i32 %y, i32 %z) {
define i32 @select_or_icmp_alt_bad_3(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @select_or_icmp_alt_bad_3(
-; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
-; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[X]], i32 [[Z]]
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[B]], i32 [[X:%.*]], i32 [[Z]]
; CHECK-NEXT: ret i32 [[D]]
;
%A = icmp eq i32 %x, %z
@@ -203,8 +203,8 @@ define i32 @select_or_icmp_alt_bad_3(i32 %x, i32 %y, i32 %z) {
define i32 @select_or_icmp_alt_bad_4(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @select_or_icmp_alt_bad_4(
-; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z]]
-; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[Z]], i32 [[X]]
+; CHECK-NEXT: [[B_NOT:%.*]] = icmp eq i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[D:%.*]] = select i1 [[B_NOT]], i32 [[Z]], i32 [[X:%.*]]
; CHECK-NEXT: ret i32 [[D]]
;
%A = icmp ne i32 %x, %z
@@ -321,3 +321,11 @@ define i32 @select_or_icmp_alt_bad_false_val(i32 %x, i32 %y, i32 %z, i32 %k) {
%D = select i1 %C, i32 %x, i32 %k
ret i32 %D
}
+
+!0 = !{!"function_entry_count", i64 1000}
+!1 = !{!"branch_weights", i32 2, i32 3}
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 3, i32 2}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 2, i32 3}
+;.
diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
index 8784873..f5329cf 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
@@ -454,6 +454,132 @@ exit:
ret void
}
+declare i1 @cond()
+
+define double @test_load_used_by_other_load_scev(ptr %ptr.a, ptr %ptr.b, ptr %ptr.c) {
+; I64-LABEL: define double @test_load_used_by_other_load_scev(
+; I64-SAME: ptr [[PTR_A:%.*]], ptr [[PTR_B:%.*]], ptr [[PTR_C:%.*]]) {
+; I64-NEXT: [[ENTRY:.*]]:
+; I64-NEXT: br label %[[OUTER_LOOP:.*]]
+; I64: [[OUTER_LOOP_LOOPEXIT:.*]]:
+; I64-NEXT: br label %[[OUTER_LOOP]]
+; I64: [[OUTER_LOOP]]:
+; I64-NEXT: [[ACCUM:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP29:%.*]], %[[OUTER_LOOP_LOOPEXIT]] ]
+; I64-NEXT: [[COND:%.*]] = call i1 @cond()
+; I64-NEXT: br i1 [[COND]], label %[[INNER_LOOP_PREHEADER:.*]], label %[[EXIT:.*]]
+; I64: [[INNER_LOOP_PREHEADER]]:
+; I64-NEXT: br label %[[VECTOR_PH:.*]]
+; I64: [[VECTOR_PH]]:
+; I64-NEXT: br label %[[VECTOR_BODY:.*]]
+; I64: [[VECTOR_BODY]]:
+; I64-NEXT: [[TMP0:%.*]] = add i64 0, 1
+; I64-NEXT: [[TMP1:%.*]] = add i64 1, 1
+; I64-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR_C]], i64 [[TMP0]]
+; I64-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR_C]], i64 [[TMP1]]
+; I64-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[PTR_A]], i64 [[TMP0]]
+; I64-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[PTR_A]], i64 [[TMP1]]
+; I64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8
+; I64-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
+; I64-NEXT: [[TMP8:%.*]] = getelementptr double, ptr [[PTR_B]], i64 [[TMP6]]
+; I64-NEXT: [[TMP9:%.*]] = getelementptr double, ptr [[PTR_B]], i64 [[TMP7]]
+; I64-NEXT: [[TMP10:%.*]] = load double, ptr [[PTR_A]], align 8
+; I64-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP10]], i64 0
+; I64-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
+; I64-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[BROADCAST_SPLAT]], zeroinitializer
+; I64-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP2]], i64 8
+; I64-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP3]], i64 8
+; I64-NEXT: [[TMP14:%.*]] = load double, ptr [[TMP12]], align 8
+; I64-NEXT: [[TMP15:%.*]] = load double, ptr [[TMP13]], align 8
+; I64-NEXT: [[TMP16:%.*]] = insertelement <2 x double> poison, double [[TMP14]], i32 0
+; I64-NEXT: [[TMP17:%.*]] = insertelement <2 x double> [[TMP16]], double [[TMP15]], i32 1
+; I64-NEXT: [[TMP18:%.*]] = fmul <2 x double> [[TMP11]], zeroinitializer
+; I64-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x double> poison, double [[ACCUM]], i64 0
+; I64-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT1]], <2 x double> poison, <2 x i32> zeroinitializer
+; I64-NEXT: [[TMP19:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLAT2]], <2 x double> [[TMP18]], <2 x i32> <i32 1, i32 2>
+; I64-NEXT: [[TMP20:%.*]] = fmul <2 x double> [[TMP17]], zeroinitializer
+; I64-NEXT: [[TMP21:%.*]] = fadd <2 x double> [[TMP20]], zeroinitializer
+; I64-NEXT: [[TMP22:%.*]] = fadd <2 x double> [[TMP21]], splat (double 1.000000e+00)
+; I64-NEXT: [[TMP23:%.*]] = load double, ptr [[TMP8]], align 8
+; I64-NEXT: [[TMP24:%.*]] = load double, ptr [[TMP9]], align 8
+; I64-NEXT: [[TMP25:%.*]] = insertelement <2 x double> poison, double [[TMP23]], i32 0
+; I64-NEXT: [[TMP26:%.*]] = insertelement <2 x double> [[TMP25]], double [[TMP24]], i32 1
+; I64-NEXT: [[TMP27:%.*]] = fdiv <2 x double> [[TMP26]], [[TMP22]]
+; I64-NEXT: [[TMP28:%.*]] = fsub <2 x double> [[TMP19]], [[TMP27]]
+; I64-NEXT: br label %[[MIDDLE_BLOCK:.*]]
+; I64: [[MIDDLE_BLOCK]]:
+; I64-NEXT: [[TMP29]] = extractelement <2 x double> [[TMP28]], i32 1
+; I64-NEXT: br label %[[OUTER_LOOP_LOOPEXIT]]
+; I64: [[EXIT]]:
+; I64-NEXT: ret double [[ACCUM]]
+;
+; I32-LABEL: define double @test_load_used_by_other_load_scev(
+; I32-SAME: ptr [[PTR_A:%.*]], ptr [[PTR_B:%.*]], ptr [[PTR_C:%.*]]) {
+; I32-NEXT: [[ENTRY:.*]]:
+; I32-NEXT: br label %[[OUTER_LOOP:.*]]
+; I32: [[OUTER_LOOP]]:
+; I32-NEXT: [[ACCUM:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[RESULT:%.*]], %[[INNER_LOOP:.*]] ]
+; I32-NEXT: [[COND:%.*]] = call i1 @cond()
+; I32-NEXT: br i1 [[COND]], label %[[INNER_LOOP]], label %[[EXIT:.*]]
+; I32: [[INNER_LOOP]]:
+; I32-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[OUTER_LOOP]] ], [ [[IV_NEXT:%.*]], %[[INNER_LOOP]] ]
+; I32-NEXT: [[ACCUM_INNER:%.*]] = phi double [ [[ACCUM]], %[[OUTER_LOOP]] ], [ [[MUL1:%.*]], %[[INNER_LOOP]] ]
+; I32-NEXT: [[IDX_PLUS1:%.*]] = add i64 [[IV]], 1
+; I32-NEXT: [[GEP_C:%.*]] = getelementptr i8, ptr [[PTR_C]], i64 [[IDX_PLUS1]]
+; I32-NEXT: [[GEP_A_I64:%.*]] = getelementptr i64, ptr [[PTR_A]], i64 [[IDX_PLUS1]]
+; I32-NEXT: [[LOAD_IDX:%.*]] = load i64, ptr [[GEP_A_I64]], align 8
+; I32-NEXT: [[GEP_B:%.*]] = getelementptr double, ptr [[PTR_B]], i64 [[LOAD_IDX]]
+; I32-NEXT: [[LOAD_A:%.*]] = load double, ptr [[PTR_A]], align 8
+; I32-NEXT: [[ADD1:%.*]] = fadd double [[LOAD_A]], 0.000000e+00
+; I32-NEXT: [[GEP_C_OFFSET:%.*]] = getelementptr i8, ptr [[GEP_C]], i64 8
+; I32-NEXT: [[LOAD_C:%.*]] = load double, ptr [[GEP_C_OFFSET]], align 8
+; I32-NEXT: [[MUL1]] = fmul double [[ADD1]], 0.000000e+00
+; I32-NEXT: [[MUL2:%.*]] = fmul double [[LOAD_C]], 0.000000e+00
+; I32-NEXT: [[ADD2:%.*]] = fadd double [[MUL2]], 0.000000e+00
+; I32-NEXT: [[ADD3:%.*]] = fadd double [[ADD2]], 1.000000e+00
+; I32-NEXT: [[LOAD_B:%.*]] = load double, ptr [[GEP_B]], align 8
+; I32-NEXT: [[DIV:%.*]] = fdiv double [[LOAD_B]], [[ADD3]]
+; I32-NEXT: [[RESULT]] = fsub double [[ACCUM_INNER]], [[DIV]]
+; I32-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; I32-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 1
+; I32-NEXT: br i1 [[EXITCOND]], label %[[OUTER_LOOP]], label %[[INNER_LOOP]]
+; I32: [[EXIT]]:
+; I32-NEXT: ret double [[ACCUM]]
+;
+entry:
+ br label %outer.loop
+
+outer.loop:
+ %accum = phi double [ 0.0, %entry ], [ %result, %inner.loop ]
+ %cond = call i1 @cond()
+ br i1 %cond, label %inner.loop, label %exit
+
+inner.loop:
+ %iv = phi i64 [ 0, %outer.loop ], [ %iv.next, %inner.loop ]
+ %accum.inner = phi double [ %accum, %outer.loop ], [ %mul1, %inner.loop ]
+ %idx.plus1 = add i64 %iv, 1
+ %gep.c = getelementptr i8, ptr %ptr.c, i64 %idx.plus1
+ %gep.a.i64 = getelementptr i64, ptr %ptr.a, i64 %idx.plus1
+ %load.idx = load i64, ptr %gep.a.i64, align 8
+ %gep.b = getelementptr double, ptr %ptr.b, i64 %load.idx
+ %load.a = load double, ptr %ptr.a, align 8
+ %add1 = fadd double %load.a, 0.000000e+00
+ %gep.c.offset = getelementptr i8, ptr %gep.c, i64 8
+ %load.c = load double, ptr %gep.c.offset, align 8
+ %mul1 = fmul double %add1, 0.000000e+00
+ %mul2 = fmul double %load.c, 0.000000e+00
+ %add2 = fadd double %mul2, 0.000000e+00
+ %add3 = fadd double %add2, 1.000000e+00
+ %load.b = load double, ptr %gep.b, align 8
+ %div = fdiv double %load.b, %add3
+ %result = fsub double %accum.inner, %div
+ %iv.next = add i64 %iv, 1
+ %exitcond = icmp eq i64 %iv, 1
+ br i1 %exitcond, label %outer.loop, label %inner.loop
+
+exit:
+ ret double %accum
+}
+
attributes #0 = { "target-cpu"="znver2" }
!0 = distinct !{!0, !1}
diff --git a/llvm/test/Transforms/NewGVN/pr159918.ll b/llvm/test/Transforms/NewGVN/pr159918.ll
new file mode 100644
index 0000000..3fad6e6
--- /dev/null
+++ b/llvm/test/Transforms/NewGVN/pr159918.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -passes=newgvn < %s | FileCheck %s
+
+; Don't use returned argument in memory defining intrinsics.
+define void @wombat(ptr %arg) {
+; CHECK-LABEL: define void @wombat(
+; CHECK-SAME: ptr [[ARG:%.*]]) {
+; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr [[ARG]], align 8
+; CHECK-NEXT: [[CALL:%.*]] = call ptr @llvm.objc.retain(ptr [[LOAD]])
+; CHECK-NEXT: store ptr [[CALL]], ptr [[ARG]], align 8
+; CHECK-NEXT: ret void
+;
+ %load = load ptr, ptr %arg, align 8
+ %call = call ptr @llvm.objc.retain(ptr %load)
+ store ptr %call, ptr %arg, align 8
+ ret void
+}
+
+declare ptr @llvm.objc.retain(ptr returned) #0
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
index ed0bd3f..cf62fd5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
@@ -55,6 +55,54 @@ entry:
ret void
}
+define void @test_add_udiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
+; CHECK-LABEL: @test_add_udiv(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[GEP1_2:%.*]] = getelementptr i32, ptr [[ARR1:%.*]], i32 2
+; CHECK-NEXT: [[GEP1_3:%.*]] = getelementptr i32, ptr [[ARR1]], i32 3
+; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP1_2]], align 4
+; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP1_3]], align 4
+; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42
+; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[ARR1]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, i32 [[A0:%.*]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A3:%.*]], i32 3
+; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> <i32 1146, i32 146, i32 0, i32 0>, [[TMP3]]
+; CHECK-NEXT: [[RES2:%.*]] = udiv i32 [[V2]], [[Y2]]
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[RES2]], i32 2
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V3]], i32 3
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP4]]
+; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[ARR2:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %gep1.1 = getelementptr i32, ptr %arr1, i32 1
+ %gep1.2 = getelementptr i32, ptr %arr1, i32 2
+ %gep1.3 = getelementptr i32, ptr %arr1, i32 3
+ %gep2.1 = getelementptr i32, ptr %arr2, i32 1
+ %gep2.2 = getelementptr i32, ptr %arr2, i32 2
+ %gep2.3 = getelementptr i32, ptr %arr2, i32 3
+ %v0 = load i32, ptr %arr1
+ %v1 = load i32, ptr %gep1.1
+ %v2 = load i32, ptr %gep1.2
+ %v3 = load i32, ptr %gep1.3
+ %y0 = add nsw i32 %a0, 1146
+ %y1 = add nsw i32 %a1, 146
+ %y2 = add nsw i32 %a2, 42
+ %y3 = add nsw i32 %a3, 0
+ %res0 = add nsw i32 %v0, %y0
+ %res1 = add nsw i32 %v1, %y1
+ %res2 = udiv i32 %v2, %y2
+ %res3 = add nsw i32 %v3, %y3
+ store i32 %res0, ptr %arr2
+ store i32 %res1, ptr %gep2.1
+ store i32 %res2, ptr %gep2.2
+ store i32 %res3, ptr %gep2.3
+ ret void
+}
+
;; Similar test, but now div/rem is main opcode and not the alternate one. Same issue.
define void @test_urem_add(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: @test_urem_add(
@@ -114,3 +162,56 @@ entry:
store i32 %res3, ptr %gep2.3
ret void
}
+
+define void @test_srem_add(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
+; CHECK-LABEL: @test_srem_add(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[GEP1_1:%.*]] = getelementptr i32, ptr [[ARR1:%.*]], i32 1
+; CHECK-NEXT: [[GEP1_2:%.*]] = getelementptr i32, ptr [[ARR1]], i32 2
+; CHECK-NEXT: [[GEP1_3:%.*]] = getelementptr i32, ptr [[ARR1]], i32 3
+; CHECK-NEXT: [[GEP2_1:%.*]] = getelementptr i32, ptr [[ARR2:%.*]], i32 1
+; CHECK-NEXT: [[GEP2_2:%.*]] = getelementptr i32, ptr [[ARR2]], i32 2
+; CHECK-NEXT: [[GEP2_3:%.*]] = getelementptr i32, ptr [[ARR2]], i32 3
+; CHECK-NEXT: [[V0:%.*]] = load i32, ptr [[ARR1]], align 4
+; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[GEP1_1]], align 4
+; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP1_2]], align 4
+; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP1_3]], align 4
+; CHECK-NEXT: [[Y0:%.*]] = add nsw i32 [[A0:%.*]], 1146
+; CHECK-NEXT: [[Y1:%.*]] = add nsw i32 [[A1:%.*]], 146
+; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42
+; CHECK-NEXT: [[Y3:%.*]] = add nsw i32 [[A3:%.*]], 0
+; CHECK-NEXT: [[RES0:%.*]] = srem i32 [[V0]], [[Y0]]
+; CHECK-NEXT: [[RES1:%.*]] = srem i32 [[V1]], [[Y1]]
+; CHECK-NEXT: [[RES2:%.*]] = srem i32 [[V2]], [[Y2]]
+; CHECK-NEXT: [[RES3:%.*]] = add nsw i32 [[V3]], [[Y3]]
+; CHECK-NEXT: store i32 [[RES0]], ptr [[ARR2]], align 4
+; CHECK-NEXT: store i32 [[RES1]], ptr [[GEP2_1]], align 4
+; CHECK-NEXT: store i32 [[RES2]], ptr [[GEP2_2]], align 4
+; CHECK-NEXT: store i32 [[RES3]], ptr [[GEP2_3]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %gep1.1 = getelementptr i32, ptr %arr1, i32 1
+ %gep1.2 = getelementptr i32, ptr %arr1, i32 2
+ %gep1.3 = getelementptr i32, ptr %arr1, i32 3
+ %gep2.1 = getelementptr i32, ptr %arr2, i32 1
+ %gep2.2 = getelementptr i32, ptr %arr2, i32 2
+ %gep2.3 = getelementptr i32, ptr %arr2, i32 3
+ %v0 = load i32, ptr %arr1
+ %v1 = load i32, ptr %gep1.1
+ %v2 = load i32, ptr %gep1.2
+ %v3 = load i32, ptr %gep1.3
+ %y0 = add nsw i32 %a0, 1146
+ %y1 = add nsw i32 %a1, 146
+ %y2 = add nsw i32 %a2, 42
+ %y3 = add nsw i32 %a3, 0
+ %res0 = srem i32 %v0, %y0
+ %res1 = srem i32 %v1, %y1
+ %res2 = srem i32 %v2, %y2
+ %res3 = add nsw i32 %v3, %y3
+ store i32 %res0, ptr %arr2
+ store i32 %res1, ptr %gep2.1
+ store i32 %res2, ptr %gep2.2
+ store i32 %res3, ptr %gep2.3
+ ret void
+}
diff --git a/llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp b/llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp
index b6e8567..497da8f 100644
--- a/llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp
+++ b/llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp
@@ -46,8 +46,8 @@ public:
MAM.registerPass([VocabVector = std::move(VocabVector)]() mutable {
return IR2VecVocabAnalysis(std::move(VocabVector));
});
- IR2VecVocab =
- new ir2vec::Vocabulary(ir2vec::Vocabulary::createDummyVocabForTest(1));
+ IR2VecVocab = std::make_unique<ir2vec::Vocabulary>(
+ ir2vec::Vocabulary::createDummyVocabForTest(1));
MAM.registerPass([&] { return PassInstrumentationAnalysis(); });
FAM.registerPass([&] { return ModuleAnalysisManagerFunctionProxy(MAM); });
FAM.registerPass([&] { return DominatorTreeAnalysis(); });
@@ -69,7 +69,7 @@ protected:
std::unique_ptr<LoopInfo> LI;
FunctionAnalysisManager FAM;
ModuleAnalysisManager MAM;
- ir2vec::Vocabulary *IR2VecVocab;
+ std::unique_ptr<ir2vec::Vocabulary> IR2VecVocab;
void TearDown() override {
// Restore original IR2Vec weights
diff --git a/llvm/unittests/Analysis/IR2VecTest.cpp b/llvm/unittests/Analysis/IR2VecTest.cpp
index 743628f..d136cb6 100644
--- a/llvm/unittests/Analysis/IR2VecTest.cpp
+++ b/llvm/unittests/Analysis/IR2VecTest.cpp
@@ -295,7 +295,7 @@ TEST(IR2VecTest, ZeroDimensionEmbedding) {
// Fixture for IR2Vec tests requiring IR setup.
class IR2VecTestFixture : public ::testing::Test {
protected:
- Vocabulary *V;
+ std::unique_ptr<Vocabulary> V;
LLVMContext Ctx;
std::unique_ptr<Module> M;
Function *F = nullptr;
@@ -304,7 +304,7 @@ protected:
Instruction *RetInst = nullptr;
void SetUp() override {
- V = new Vocabulary(Vocabulary::createDummyVocabForTest(2));
+ V = std::make_unique<Vocabulary>(Vocabulary::createDummyVocabForTest(2));
// Setup IR
M = std::make_unique<Module>("TestM", Ctx);
diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt
index d1dfb1d..25efa00 100644
--- a/llvm/unittests/Support/CMakeLists.txt
+++ b/llvm/unittests/Support/CMakeLists.txt
@@ -52,6 +52,7 @@ add_llvm_unittest(SupportTests
IndexedAccessorTest.cpp
InstructionCostTest.cpp
InterleavedRangeTest.cpp
+ JobserverTest.cpp
JSONTest.cpp
KnownBitsTest.cpp
LEB128Test.cpp
diff --git a/llvm/unittests/Support/JobserverTest.cpp b/llvm/unittests/Support/JobserverTest.cpp
new file mode 100644
index 0000000..ddee023
--- /dev/null
+++ b/llvm/unittests/Support/JobserverTest.cpp
@@ -0,0 +1,442 @@
+//===- llvm/unittest/Support/JobserverTest.cpp ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Jobserver.h unit tests.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Jobserver.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Parallel.h"
+#include "llvm/Support/ThreadPool.h"
+#include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
+#include <future>
+#include <random>
+#include <stdlib.h>
+
+#if defined(LLVM_ON_UNIX)
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/FileSystem.h"
+#include <atomic>
+#include <condition_variable>
+#include <fcntl.h>
+#include <mutex>
+#include <sys/stat.h>
+#include <thread>
+#include <unistd.h>
+#elif defined(_WIN32)
+#include <windows.h>
+#endif
+
+#define DEBUG_TYPE "jobserver-test"
+
+using namespace llvm;
+
+namespace {
+
+// RAII helper to set an environment variable for the duration of a test.
+class ScopedEnvironment {
+ std::string Name;
+ std::string OldValue;
+ bool HadOldValue;
+
+public:
+ ScopedEnvironment(const char *Name, const char *Value) : Name(Name) {
+#if defined(_WIN32)
+ char *Old = nullptr;
+ size_t OldLen;
+ errno_t err = _dupenv_s(&Old, &OldLen, Name);
+ if (err == 0 && Old != nullptr) {
+ HadOldValue = true;
+ OldValue = Old;
+ free(Old);
+ } else {
+ HadOldValue = false;
+ }
+ _putenv_s(Name, Value);
+#else
+ const char *Old = getenv(Name);
+ if (Old) {
+ HadOldValue = true;
+ OldValue = Old;
+ } else {
+ HadOldValue = false;
+ }
+ setenv(Name, Value, 1);
+#endif
+ }
+
+ ~ScopedEnvironment() {
+#if defined(_WIN32)
+ if (HadOldValue)
+ _putenv_s(Name.c_str(), OldValue.c_str());
+ else
+ // On Windows, setting an environment variable to an empty string
+ // unsets it, making getenv() return NULL.
+ _putenv_s(Name.c_str(), "");
+#else
+ if (HadOldValue)
+ setenv(Name.c_str(), OldValue.c_str(), 1);
+ else
+ unsetenv(Name.c_str());
+#endif
+ }
+};
+
+TEST(Jobserver, Slot) {
+ // Default constructor creates an invalid slot.
+ JobSlot S1;
+ EXPECT_FALSE(S1.isValid());
+ EXPECT_FALSE(S1.isImplicit());
+
+ // Create an implicit slot.
+ JobSlot S2 = JobSlot::createImplicit();
+ EXPECT_TRUE(S2.isValid());
+ EXPECT_TRUE(S2.isImplicit());
+
+ // Create an explicit slot.
+ JobSlot S3 = JobSlot::createExplicit(42);
+ EXPECT_TRUE(S3.isValid());
+ EXPECT_FALSE(S3.isImplicit());
+
+ // Test move construction.
+ JobSlot S4 = std::move(S2);
+ EXPECT_TRUE(S4.isValid());
+ EXPECT_TRUE(S4.isImplicit());
+ EXPECT_FALSE(S2.isValid()); // S2 is now invalid.
+
+ // Test move assignment.
+ S1 = std::move(S3);
+ EXPECT_TRUE(S1.isValid());
+ EXPECT_FALSE(S1.isImplicit());
+ EXPECT_FALSE(S3.isValid()); // S3 is now invalid.
+}
+
+// Test fixture for parsing tests to ensure the singleton state is
+// reset between each test case.
+class JobserverParsingTest : public ::testing::Test {
+protected:
+ void TearDown() override { JobserverClient::resetForTesting(); }
+};
+
+TEST_F(JobserverParsingTest, NoMakeflags) {
+ // No MAKEFLAGS, should be null.
+ ScopedEnvironment Env("MAKEFLAGS", "");
+ // On Unix, setting an env var to "" makes getenv() return an empty
+ // string, not NULL. We must call unsetenv() to test the case where
+ // the variable is truly not present.
+#if !defined(_WIN32)
+ unsetenv("MAKEFLAGS");
+#endif
+ EXPECT_EQ(JobserverClient::getInstance(), nullptr);
+}
+
+TEST_F(JobserverParsingTest, EmptyMakeflags) {
+ // Empty MAKEFLAGS, should be null.
+ ScopedEnvironment Env("MAKEFLAGS", "");
+ EXPECT_EQ(JobserverClient::getInstance(), nullptr);
+}
+
+TEST_F(JobserverParsingTest, DryRunFlag) {
+ // Dry-run flag 'n', should be null.
+ ScopedEnvironment Env("MAKEFLAGS", "n -j --jobserver-auth=fifo:/tmp/foo");
+ EXPECT_EQ(JobserverClient::getInstance(), nullptr);
+}
+
+// Separate fixture for non-threaded client tests.
+class JobserverClientTest : public JobserverParsingTest {};
+
+#if defined(LLVM_ON_UNIX)
+// RAII helper to create and clean up a temporary FIFO file.
+class ScopedFifo {
+ SmallString<128> Path;
+ bool IsValid = false;
+
+public:
+ ScopedFifo() {
+ // To get a unique, non-colliding name for a FIFO, we use the
+ // createTemporaryFile function to reserve a name in the filesystem.
+ std::error_code EC =
+ sys::fs::createTemporaryFile("jobserver-test", "fifo", Path);
+ if (EC)
+ return;
+ // Then we immediately remove the regular file it created, but keep the
+ // unique path.
+ sys::fs::remove(Path);
+ // Finally, we create the FIFO at that safe, unique path.
+ if (mkfifo(Path.c_str(), 0600) != 0)
+ return;
+ IsValid = true;
+ }
+
+ ~ScopedFifo() {
+ if (IsValid)
+ sys::fs::remove(Path);
+ }
+
+ const char *c_str() const { return Path.data(); }
+ bool isValid() const { return IsValid; }
+};
+
+TEST_F(JobserverClientTest, UnixClientFifo) {
+ // This test covers basic FIFO client creation and behavior with an empty
+ // FIFO. No job tokens are available.
+ ScopedFifo F;
+ ASSERT_TRUE(F.isValid());
+
+ // Intentionally inserted \t in environment string.
+ std::string Makeflags = " \t -j4\t \t--jobserver-auth=fifo:";
+ Makeflags += F.c_str();
+ ScopedEnvironment Env("MAKEFLAGS", Makeflags.c_str());
+
+ JobserverClient *Client = JobserverClient::getInstance();
+ ASSERT_NE(Client, nullptr);
+
+ // Get the implicit token.
+ JobSlot S1 = Client->tryAcquire();
+ EXPECT_TRUE(S1.isValid());
+ EXPECT_TRUE(S1.isImplicit());
+
+ // FIFO is empty, next acquire fails.
+ JobSlot S2 = Client->tryAcquire();
+ EXPECT_FALSE(S2.isValid());
+
+ // Release does not write to the pipe for the implicit token.
+ Client->release(std::move(S1));
+
+ // Re-acquire the implicit token.
+ S1 = Client->tryAcquire();
+ EXPECT_TRUE(S1.isValid());
+}
+
+#if LLVM_ENABLE_THREADS
+// Test fixture for tests that use the jobserver strategy. It creates a
+// temporary FIFO, sets MAKEFLAGS, and provides a helper to pre-load the FIFO
+// with job tokens, simulating `make -jN`.
+class JobserverStrategyTest : public JobserverParsingTest {
+protected:
+ std::unique_ptr<ScopedFifo> TheFifo;
+ std::thread MakeThread;
+ std::atomic<bool> StopMakeThread{false};
+ // Save and restore the global parallel strategy to avoid interfering with
+ // other tests in the same process.
+ ThreadPoolStrategy SavedStrategy;
+
+ void SetUp() override {
+ SavedStrategy = parallel::strategy;
+ TheFifo = std::make_unique<ScopedFifo>();
+ ASSERT_TRUE(TheFifo->isValid());
+
+ std::string MakeFlags = "--jobserver-auth=fifo:";
+ MakeFlags += TheFifo->c_str();
+ setenv("MAKEFLAGS", MakeFlags.c_str(), 1);
+ }
+
+ void TearDown() override {
+ if (MakeThread.joinable()) {
+ StopMakeThread = true;
+ MakeThread.join();
+ }
+ unsetenv("MAKEFLAGS");
+ TheFifo.reset();
+ // Restore the original strategy to ensure subsequent tests are unaffected.
+ parallel::strategy = SavedStrategy;
+ }
+
+ // Starts a background thread that emulates `make`. It populates the FIFO
+ // with initial tokens and then recycles tokens released by clients.
+ void startMakeProxy(int NumInitialJobs) {
+ MakeThread = std::thread([this, NumInitialJobs]() {
+ LLVM_DEBUG(dbgs() << "[MakeProxy] Thread started.\n");
+ // Open the FIFO for reading and writing. This call does not block.
+ int RWFd = open(TheFifo->c_str(), O_RDWR);
+ LLVM_DEBUG(dbgs() << "[MakeProxy] Opened FIFO " << TheFifo->c_str()
+ << " with O_RDWR, FD=" << RWFd << "\n");
+ if (RWFd == -1) {
+ LLVM_DEBUG(
+ dbgs()
+ << "[MakeProxy] ERROR: Failed to open FIFO with O_RDWR. Errno: "
+ << errno << "\n");
+ return;
+ }
+
+ // Populate with initial jobs.
+ LLVM_DEBUG(dbgs() << "[MakeProxy] Writing " << NumInitialJobs
+ << " initial tokens.\n");
+ for (int i = 0; i < NumInitialJobs; ++i) {
+ if (write(RWFd, "+", 1) != 1) {
+ LLVM_DEBUG(dbgs()
+ << "[MakeProxy] ERROR: Failed to write initial token " << i
+ << ".\n");
+ close(RWFd);
+ return;
+ }
+ }
+ LLVM_DEBUG(dbgs() << "[MakeProxy] Finished writing initial tokens.\n");
+
+ // Make the read non-blocking so we can periodically check StopMakeThread.
+ int flags = fcntl(RWFd, F_GETFL, 0);
+ fcntl(RWFd, F_SETFL, flags | O_NONBLOCK);
+
+ while (!StopMakeThread) {
+ char Token;
+ ssize_t Ret = read(RWFd, &Token, 1);
+ if (Ret == 1) {
+ LLVM_DEBUG(dbgs() << "[MakeProxy] Read token '" << Token
+ << "' to recycle.\n");
+ // A client released a token, 'make' makes it available again.
+ std::this_thread::sleep_for(std::chrono::microseconds(100));
+ ssize_t WRet;
+ do {
+ WRet = write(RWFd, &Token, 1);
+ } while (WRet < 0 && errno == EINTR);
+ if (WRet <= 0) {
+ LLVM_DEBUG(
+ dbgs()
+ << "[MakeProxy] ERROR: Failed to write recycled token.\n");
+ break; // Error, stop the proxy.
+ }
+ LLVM_DEBUG(dbgs()
+ << "[MakeProxy] Wrote token '" << Token << "' back.\n");
+ } else if (Ret < 0 && errno != EAGAIN && errno != EWOULDBLOCK) {
+ LLVM_DEBUG(dbgs() << "[MakeProxy] ERROR: Read failed with errno "
+ << errno << ".\n");
+ break; // Error, stop the proxy.
+ }
+ // Yield to prevent this thread from busy-waiting.
+ std::this_thread::sleep_for(std::chrono::milliseconds(1));
+ }
+ LLVM_DEBUG(dbgs() << "[MakeProxy] Thread stopping.\n");
+ close(RWFd);
+ });
+
+ // Give the proxy thread a moment to start and populate the FIFO.
+ // This is a simple way to avoid a race condition where the client starts
+ // before the initial tokens are in the pipe.
+ std::this_thread::sleep_for(std::chrono::milliseconds(50));
+ }
+};
+
+TEST_F(JobserverStrategyTest, ThreadPoolConcurrencyIsLimited) {
+ // This test simulates `make -j3`. We will have 1 implicit job slot and
+ // we will add 2 explicit job tokens to the FIFO, for a total of 3.
+ const int NumExplicitJobs = 2;
+ const int ConcurrencyLimit = NumExplicitJobs + 1; // +1 for the implicit slot
+ const int NumTasks = 8; // More tasks than available slots.
+
+ LLVM_DEBUG(dbgs() << "Calling startMakeProxy with " << NumExplicitJobs
+ << " jobs.\n");
+ startMakeProxy(NumExplicitJobs);
+ LLVM_DEBUG(dbgs() << "MakeProxy is running.\n");
+
+ // Create the thread pool. Its constructor will call jobserver_concurrency()
+ // and create a client that reads from our pre-loaded FIFO.
+ StdThreadPool Pool(jobserver_concurrency());
+
+ std::atomic<int> ActiveTasks{0};
+ std::atomic<int> MaxActiveTasks{0};
+ std::atomic<int> CompletedTasks{0};
+ std::mutex M;
+ std::condition_variable CV;
+
+ // Dispatch more tasks than there are job slots. The pool should block
+ // and only run up to `ConcurrencyLimit` tasks at once.
+ for (int i = 0; i < NumTasks; ++i) {
+ Pool.async([&, i] {
+ // Track the number of concurrently running tasks.
+ int CurrentActive = ++ActiveTasks;
+ LLVM_DEBUG(dbgs() << "Task " << i << ": Active tasks: " << CurrentActive
+ << "\n");
+ int OldMax = MaxActiveTasks.load();
+ while (CurrentActive > OldMax)
+ MaxActiveTasks.compare_exchange_weak(OldMax, CurrentActive);
+
+ std::this_thread::sleep_for(std::chrono::milliseconds(25));
+
+ --ActiveTasks;
+ if (++CompletedTasks == NumTasks) {
+ std::lock_guard<std::mutex> Lock(M);
+ CV.notify_one();
+ }
+ });
+ }
+
+ // Wait for all tasks to complete.
+ std::unique_lock<std::mutex> Lock(M);
+ CV.wait(Lock, [&] { return CompletedTasks == NumTasks; });
+
+ LLVM_DEBUG(dbgs() << "Test finished. Max active tasks was " << MaxActiveTasks
+ << ".\n");
+ // The key assertion: the maximum number of concurrent tasks should
+ // not have exceeded the limit imposed by the jobserver.
+ EXPECT_LE(MaxActiveTasks, ConcurrencyLimit);
+ EXPECT_EQ(CompletedTasks, NumTasks);
+}
+
+TEST_F(JobserverStrategyTest, ParallelForIsLimited) {
+ // This test verifies that llvm::parallelFor respects the jobserver limit.
+ const int NumExplicitJobs = 3;
+ const int ConcurrencyLimit = NumExplicitJobs + 1; // +1 implicit
+ const int NumTasks = 20;
+
+ LLVM_DEBUG(dbgs() << "Calling startMakeProxy with " << NumExplicitJobs
+ << " jobs.\n");
+ startMakeProxy(NumExplicitJobs);
+ LLVM_DEBUG(dbgs() << "MakeProxy is running.\n");
+
+ // Set the global strategy. parallelFor will use this.
+ parallel::strategy = jobserver_concurrency();
+
+ std::atomic<int> ActiveTasks{0};
+ std::atomic<int> MaxActiveTasks{0};
+
+ parallelFor(0, NumTasks, [&](int i) {
+ int CurrentActive = ++ActiveTasks;
+ LLVM_DEBUG(dbgs() << "Task " << i << ": Active tasks: " << CurrentActive
+ << "\n");
+ int OldMax = MaxActiveTasks.load();
+ while (CurrentActive > OldMax)
+ MaxActiveTasks.compare_exchange_weak(OldMax, CurrentActive);
+
+ std::this_thread::sleep_for(std::chrono::milliseconds(20));
+ --ActiveTasks;
+ });
+
+ LLVM_DEBUG(dbgs() << "ParallelFor finished. Max active tasks was "
+ << MaxActiveTasks << ".\n");
+ EXPECT_LE(MaxActiveTasks, ConcurrencyLimit);
+}
+
+TEST_F(JobserverStrategyTest, ParallelSortIsLimited) {
+ // This test serves as an integration test to ensure parallelSort completes
+ // correctly when running under the jobserver strategy. It doesn't directly
+ // measure concurrency but verifies correctness.
+ const int NumExplicitJobs = 3;
+ startMakeProxy(NumExplicitJobs);
+
+ parallel::strategy = jobserver_concurrency();
+
+ std::vector<int> V(1024);
+ // Fill with random data
+ std::mt19937 randEngine;
+ std::uniform_int_distribution<int> dist;
+ for (int &i : V)
+ i = dist(randEngine);
+
+ parallelSort(V.begin(), V.end());
+ ASSERT_TRUE(llvm::is_sorted(V));
+}
+
+#endif // LLVM_ENABLE_THREADS
+
+#endif // defined(LLVM_ON_UNIX)
+
+} // end anonymous namespace
diff --git a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp
index 75bea77..8076ce2 100644
--- a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp
+++ b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp
@@ -246,16 +246,14 @@ bool TypeSetByHwMode::operator==(const TypeSetByHwMode &VTS) const {
return true;
}
-namespace llvm {
-raw_ostream &operator<<(raw_ostream &OS, const MachineValueTypeSet &T) {
+raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineValueTypeSet &T) {
T.writeToStream(OS);
return OS;
}
-raw_ostream &operator<<(raw_ostream &OS, const TypeSetByHwMode &T) {
+raw_ostream &llvm::operator<<(raw_ostream &OS, const TypeSetByHwMode &T) {
T.writeToStream(OS);
return OS;
}
-} // namespace llvm
LLVM_DUMP_METHOD
void TypeSetByHwMode::dump() const { dbgs() << *this << '\n'; }
diff --git a/llvm/utils/TableGen/Common/CodeGenRegisters.cpp b/llvm/utils/TableGen/Common/CodeGenRegisters.cpp
index 294f3af..8d0ec9a 100644
--- a/llvm/utils/TableGen/Common/CodeGenRegisters.cpp
+++ b/llvm/utils/TableGen/Common/CodeGenRegisters.cpp
@@ -857,17 +857,6 @@ unsigned CodeGenRegisterClass::getWeight(const CodeGenRegBank &RegBank) const {
return (*Members.begin())->getWeight(RegBank);
}
-namespace llvm {
-
-raw_ostream &operator<<(raw_ostream &OS, const CodeGenRegisterClass::Key &K) {
- OS << "{ " << K.RSI;
- for (const auto R : *K.Members)
- OS << ", " << R->getName();
- return OS << " }";
-}
-
-} // end namespace llvm
-
// This is a simple lexicographical order that can be used to search for sets.
// It is not the same as the topological order provided by TopoOrderRC.
bool CodeGenRegisterClass::Key::operator<(
diff --git a/llvm/utils/TableGen/Common/InfoByHwMode.cpp b/llvm/utils/TableGen/Common/InfoByHwMode.cpp
index a6e2fc4..4c8197d 100644
--- a/llvm/utils/TableGen/Common/InfoByHwMode.cpp
+++ b/llvm/utils/TableGen/Common/InfoByHwMode.cpp
@@ -227,19 +227,17 @@ EncodingInfoByHwMode::EncodingInfoByHwMode(const Record *R,
}
}
-namespace llvm {
-raw_ostream &operator<<(raw_ostream &OS, const ValueTypeByHwMode &T) {
+raw_ostream &llvm::operator<<(raw_ostream &OS, const ValueTypeByHwMode &T) {
T.writeToStream(OS);
return OS;
}
-raw_ostream &operator<<(raw_ostream &OS, const RegSizeInfo &T) {
+raw_ostream &llvm::operator<<(raw_ostream &OS, const RegSizeInfo &T) {
T.writeToStream(OS);
return OS;
}
-raw_ostream &operator<<(raw_ostream &OS, const RegSizeInfoByHwMode &T) {
+raw_ostream &llvm::operator<<(raw_ostream &OS, const RegSizeInfoByHwMode &T) {
T.writeToStream(OS);
return OS;
}
-} // namespace llvm
diff --git a/llvm/utils/TableGen/Common/PredicateExpander.cpp b/llvm/utils/TableGen/Common/PredicateExpander.cpp
index 09d9538..03252ed 100644
--- a/llvm/utils/TableGen/Common/PredicateExpander.cpp
+++ b/llvm/utils/TableGen/Common/PredicateExpander.cpp
@@ -14,7 +14,7 @@
#include "CodeGenSchedule.h" // Definition of STIPredicateFunction.
#include "llvm/TableGen/Record.h"
-namespace llvm {
+using namespace llvm;
void PredicateExpander::expandTrue(raw_ostream &OS) { OS << "true"; }
void PredicateExpander::expandFalse(raw_ostream &OS) { OS << "false"; }
@@ -553,5 +553,3 @@ void STIPredicateExpander::expandSTIPredicate(raw_ostream &OS,
expandEpilogue(OS, Fn);
}
}
-
-} // namespace llvm
diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index 09ce9f3..9471959 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -37,15 +37,6 @@ struct DXILIntrinsicSelect {
SmallVector<const Record *> ArgSelectRecords;
};
-static StringRef StripIntrinArgSelectTypePrefix(StringRef Type) {
- StringRef Prefix = "IntrinArgSelect_";
- if (!Type.starts_with(Prefix)) {
- PrintFatalError("IntrinArgSelectType definintion must be prefixed with "
- "'IntrinArgSelect_'");
- }
- return Type.substr(Prefix.size());
-}
-
struct DXILOperationDesc {
std::string OpName; // name of DXIL operation
int OpCode; // ID of DXIL operation
@@ -66,6 +57,15 @@ struct DXILOperationDesc {
};
} // end anonymous namespace
+static StringRef stripIntrinArgSelectTypePrefix(StringRef Type) {
+ StringRef Prefix = "IntrinArgSelect_";
+ if (!Type.starts_with(Prefix)) {
+ PrintFatalError("IntrinArgSelectType definintion must be prefixed with "
+ "'IntrinArgSelect_'");
+ }
+ return Type.substr(Prefix.size());
+}
+
/// In-place sort TableGen records of class with a field
/// Version dxil_version
/// in the ascending version order.
@@ -449,7 +449,7 @@ static void emitDXILIntrinsicMap(ArrayRef<DXILOperationDesc> Ops,
ArgSelect->getValueAsDef("type")->getNameInitAsString();
int Value = ArgSelect->getValueAsInt("value");
OS << "(IntrinArgSelect{"
- << "IntrinArgSelect::Type::" << StripIntrinArgSelectTypePrefix(Type)
+ << "IntrinArgSelect::Type::" << stripIntrinArgSelectTypePrefix(Type)
<< "," << Value << "}), ";
}
OS << ")\n";
@@ -466,7 +466,7 @@ static void emitDXILIntrinsicArgSelectTypes(const RecordKeeper &Records,
OS << "#ifdef DXIL_OP_INTRINSIC_ARG_SELECT_TYPE\n";
for (const Record *Records :
Records.getAllDerivedDefinitions("IntrinArgSelectType")) {
- StringRef StrippedName = StripIntrinArgSelectTypePrefix(Records->getName());
+ StringRef StrippedName = stripIntrinArgSelectTypePrefix(Records->getName());
OS << "DXIL_OP_INTRINSIC_ARG_SELECT_TYPE(" << StrippedName << ")\n";
}
OS << "#undef DXIL_OP_INTRINSIC_ARG_SELECT_TYPE\n";
diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 961dc28..5d41b7d 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -194,10 +194,6 @@ private:
void parseInstructionEncodings();
};
-} // end anonymous namespace
-
-namespace {
-
struct EncodingIsland {
unsigned StartBit;
unsigned NumBits;
diff --git a/llvm/utils/TableGen/ExegesisEmitter.cpp b/llvm/utils/TableGen/ExegesisEmitter.cpp
index 1b4b072..bd69919 100644
--- a/llvm/utils/TableGen/ExegesisEmitter.cpp
+++ b/llvm/utils/TableGen/ExegesisEmitter.cpp
@@ -58,6 +58,14 @@ private:
const std::map<llvm::StringRef, unsigned> PfmCounterNameTable;
};
+struct ValidationCounterInfo {
+ int64_t EventNumber;
+ StringRef EventName;
+ unsigned PfmCounterID;
+};
+
+} // namespace
+
static std::map<llvm::StringRef, unsigned>
collectPfmCounters(const RecordKeeper &Records) {
std::map<llvm::StringRef, unsigned> PfmCounterNameTable;
@@ -106,14 +114,8 @@ ExegesisEmitter::ExegesisEmitter(const RecordKeeper &RK)
Target = Targets[0]->getName().str();
}
-struct ValidationCounterInfo {
- int64_t EventNumber;
- StringRef EventName;
- unsigned PfmCounterID;
-};
-
-bool EventNumberLess(const ValidationCounterInfo &LHS,
- const ValidationCounterInfo &RHS) {
+static bool EventNumberLess(const ValidationCounterInfo &LHS,
+ const ValidationCounterInfo &RHS) {
return LHS.EventNumber < RHS.EventNumber;
}
@@ -221,7 +223,7 @@ void ExegesisEmitter::emitPfmCounters(raw_ostream &OS) const {
emitPfmCountersInfo(*Def, IssueCountersTableOffset, OS);
OS << "\n";
-} // namespace
+}
void ExegesisEmitter::emitPfmCountersLookupTable(raw_ostream &OS) const {
std::vector<const Record *> Bindings =
@@ -249,7 +251,5 @@ void ExegesisEmitter::run(raw_ostream &OS) const {
emitPfmCountersLookupTable(OS);
}
-} // end anonymous namespace
-
static TableGen::Emitter::OptClass<ExegesisEmitter>
X("gen-exegesis", "Generate llvm-exegesis tables");
diff --git a/llvm/utils/TableGen/FastISelEmitter.cpp b/llvm/utils/TableGen/FastISelEmitter.cpp
index 694d89a..dba8bde 100644
--- a/llvm/utils/TableGen/FastISelEmitter.cpp
+++ b/llvm/utils/TableGen/FastISelEmitter.cpp
@@ -52,11 +52,9 @@ struct InstructionMemo {
InstructionMemo(const InstructionMemo &Other) = delete;
InstructionMemo(InstructionMemo &&Other) = default;
};
-} // End anonymous namespace
/// ImmPredicateSet - This uniques predicates (represented as a string) and
/// gives them unique (small) integer ID's that start at 0.
-namespace {
class ImmPredicateSet {
DenseMap<TreePattern *, unsigned> ImmIDs;
std::vector<TreePredicateFn> PredsByName;
@@ -77,12 +75,10 @@ public:
iterator begin() const { return PredsByName.begin(); }
iterator end() const { return PredsByName.end(); }
};
-} // End anonymous namespace
/// OperandsSignature - This class holds a description of a list of operand
/// types. It has utility methods for emitting text based on the operands.
///
-namespace {
struct OperandsSignature {
class OpKind {
enum { OK_Reg, OK_FP, OK_Imm, OK_Invalid = -1 };
@@ -366,9 +362,7 @@ struct OperandsSignature {
Opnd.printManglingSuffix(OS, ImmPredicates, StripImmCodes);
}
};
-} // End anonymous namespace
-namespace {
class FastISelMap {
// A multimap is needed instead of a "plain" map because the key is
// the instruction's complexity (an int) and they are not unique.
diff --git a/llvm/utils/TableGen/X86DisassemblerShared.h b/llvm/utils/TableGen/X86DisassemblerShared.h
index f60fd47..d5f936d 100644
--- a/llvm/utils/TableGen/X86DisassemblerShared.h
+++ b/llvm/utils/TableGen/X86DisassemblerShared.h
@@ -14,6 +14,8 @@
#include "llvm/Support/X86DisassemblerDecoderCommon.h"
+namespace llvm::X86Disassembler {
+
struct InstructionSpecifier {
llvm::X86Disassembler::OperandSpecifier
operands[llvm::X86Disassembler::X86_MAX_OPERANDS];
@@ -52,4 +54,6 @@ struct ContextDecision {
ContextDecision() { memset(opcodeDecisions, 0, sizeof(opcodeDecisions)); }
};
+} // namespace llvm::X86Disassembler
+
#endif
diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
index 1e1e4ab..6f523b5 100644
--- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
+++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
@@ -30,22 +30,23 @@ struct ManualMapEntry {
const char *MemInstStr;
uint16_t Strategy;
};
+} // namespace
// List of instructions requiring explicitly aligned memory.
-const char *ExplicitAlign[] = {"MOVDQA", "MOVAPS", "MOVAPD", "MOVNTPS",
- "MOVNTPD", "MOVNTDQ", "MOVNTDQA"};
+static constexpr const char *ExplicitAlign[] = {
+ "MOVDQA", "MOVAPS", "MOVAPD", "MOVNTPS", "MOVNTPD", "MOVNTDQ", "MOVNTDQA"};
// List of instructions NOT requiring explicit memory alignment.
-const char *ExplicitUnalign[] = {"MOVDQU", "MOVUPS", "MOVUPD",
- "PCMPESTRM", "PCMPESTRI", "PCMPISTRM",
- "PCMPISTRI"};
+static constexpr const char *ExplicitUnalign[] = {
+ "MOVDQU", "MOVUPS", "MOVUPD", "PCMPESTRM",
+ "PCMPESTRI", "PCMPISTRM", "PCMPISTRI"};
-const ManualMapEntry ManualMapSet[] = {
+static const ManualMapEntry ManualMapSet[] = {
#define ENTRY(REG, MEM, FLAGS) {#REG, #MEM, FLAGS},
#include "X86ManualFoldTables.def"
};
-const std::set<StringRef> NoFoldSet = {
+static const std::set<StringRef> NoFoldSet = {
#define NOFOLD(INSN) #INSN,
#include "X86ManualFoldTables.def"
};
@@ -62,6 +63,7 @@ static bool isExplicitUnalign(const CodeGenInstruction *Inst) {
});
}
+namespace {
class X86FoldTablesEmitter {
const RecordKeeper &Records;
const CodeGenTarget Target;
@@ -230,6 +232,7 @@ private:
OS << "};\n\n";
}
};
+} // namespace
// Return true if one of the instruction's operands is a RST register class
static bool hasRSTRegClass(const CodeGenInstruction *Inst) {
@@ -318,6 +321,7 @@ static bool isNOREXRegClass(const Record *Op) {
// Function object - Operator() returns true if the given Reg instruction
// matches the Mem instruction of this object.
+namespace {
class IsMatch {
const CodeGenInstruction *MemInst;
const X86Disassembler::RecognizableInstrBase MemRI;
diff --git a/llvm/utils/TableGen/X86InstrMappingEmitter.cpp b/llvm/utils/TableGen/X86InstrMappingEmitter.cpp
index be5e2a7..2745ba7 100644
--- a/llvm/utils/TableGen/X86InstrMappingEmitter.cpp
+++ b/llvm/utils/TableGen/X86InstrMappingEmitter.cpp
@@ -66,6 +66,7 @@ private:
void printTable(ArrayRef<Entry> Table, StringRef Name, StringRef Macro,
raw_ostream &OS);
};
+} // namespace
void X86InstrMappingEmitter::printClassDef(raw_ostream &OS) {
OS << "struct X86TableEntry {\n"
@@ -106,6 +107,7 @@ void X86InstrMappingEmitter::printTable(ArrayRef<Entry> Table, StringRef Name,
printMacroEnd(Macro, OS);
}
+namespace {
class IsMatch {
const CodeGenInstruction *OldInst;
@@ -146,6 +148,7 @@ public:
return true;
}
};
+} // namespace
static bool isInteresting(const Record *Rec) {
// _REV instruction should not appear before encoding optimization
@@ -368,7 +371,6 @@ void X86InstrMappingEmitter::run(raw_ostream &OS) {
emitND2NonNDTable(Insts, OS);
emitSSE2AVXTable(Insts, OS);
}
-} // namespace
static TableGen::Emitter::OptClass<X86InstrMappingEmitter>
X("gen-x86-instr-mapping", "Generate X86 instruction mapping");
diff --git a/llvm/utils/TableGen/X86MnemonicTables.cpp b/llvm/utils/TableGen/X86MnemonicTables.cpp
index 85bd4df..7851919 100644
--- a/llvm/utils/TableGen/X86MnemonicTables.cpp
+++ b/llvm/utils/TableGen/X86MnemonicTables.cpp
@@ -30,6 +30,7 @@ public:
// Output X86 mnemonic tables.
void run(raw_ostream &OS);
};
+} // namespace
void X86MnemonicTablesEmitter::run(raw_ostream &OS) {
emitSourceFileHeader("X86 Mnemonic tables", OS);
@@ -83,7 +84,5 @@ void X86MnemonicTablesEmitter::run(raw_ostream &OS) {
OS << "} // end namespace X86\n} // end namespace llvm";
}
-} // namespace
-
static TableGen::Emitter::OptClass<X86MnemonicTablesEmitter>
X("gen-x86-mnemonic-tables", "Generate X86 mnemonic tables");
diff --git a/llvm/utils/TableGen/X86ModRMFilters.h b/llvm/utils/TableGen/X86ModRMFilters.h
index b579f22..7bf111f 100644
--- a/llvm/utils/TableGen/X86ModRMFilters.h
+++ b/llvm/utils/TableGen/X86ModRMFilters.h
@@ -19,9 +19,7 @@
#include <cstdint>
-namespace llvm {
-
-namespace X86Disassembler {
+namespace llvm::X86Disassembler {
/// ModRMFilter - Abstract base class for clases that recognize patterns in
/// ModR/M bytes.
@@ -135,8 +133,6 @@ public:
bool accepts(uint8_t modRM) const override { return (ModRM == modRM); }
};
-} // namespace X86Disassembler
-
-} // namespace llvm
+} // namespace llvm::X86Disassembler
#endif
diff --git a/llvm/utils/TableGen/X86RecognizableInstr.h b/llvm/utils/TableGen/X86RecognizableInstr.h
index b74e74d..52f9538 100644
--- a/llvm/utils/TableGen/X86RecognizableInstr.h
+++ b/llvm/utils/TableGen/X86RecognizableInstr.h
@@ -22,8 +22,6 @@
#include <string>
#include <vector>
-struct InstructionSpecifier;
-
namespace llvm {
class Record;
#define X86_INSTR_MRM_MAPPING \
@@ -179,6 +177,8 @@ enum { ExplicitREX2 = 1, ExplicitEVEX = 3 };
namespace X86Disassembler {
class DisassemblerTables;
+struct InstructionSpecifier;
+
/// Extract common fields of a single X86 instruction from a CodeGenInstruction
struct RecognizableInstrBase {
/// The OpPrefix field from the record
diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/sources.gni b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/sources.gni
index 2ab2a0e..5d1fb02 100644
--- a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/sources.gni
+++ b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/sources.gni
@@ -529,7 +529,7 @@ if (current_cpu == "ve") {
if (current_cpu == "wasm") {
builtins_sources += [
"wasm/__c_longjmp.S",
- "wasm/__cpp_exceptions.S",
+ "wasm/__cpp_exception.S",
]
}
diff --git a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn
index 6ca766ca..38ba466 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn
@@ -103,6 +103,7 @@ static_library("Support") {
"IntEqClasses.cpp",
"IntervalMap.cpp",
"JSON.cpp",
+ "Jobserver.cpp",
"KnownBits.cpp",
"KnownFPClass.cpp",
"LEB128.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn
index 42c1a15..a25f058 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn
@@ -56,6 +56,7 @@ unittest("SupportTests") {
"InstructionCostTest.cpp",
"InterleavedRangeTest.cpp",
"JSONTest.cpp",
+ "JobserverTest.cpp",
"KnownBitsTest.cpp",
"LEB128Test.cpp",
"LineIteratorTest.cpp",
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index 01ab6df..77e833f 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -2383,15 +2383,38 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
let summary = "loop construct";
let description = [{
- The "acc.loop" operation represents the OpenACC loop construct. The lower
- and upper bounds specify a half-open range: the range includes the lower
- bound but does not include the upper bound. If the `inclusive` attribute is
- set then the upper bound is included.
+ The `acc.loop` operation represents the OpenACC loop construct and when
+ bounds are included, the associated source language loop iterators. The
+ lower and upper bounds specify a half-open range: the range includes the
+ lower bound but does not include the upper bound. If the `inclusive`
+ attribute is set then the upper bound is included.
+
+ In cases where the OpenACC loop directive needs to capture multiple
+ source language loops, such as in the case of `collapse` or `tile`,
+ the multiple induction arguments are used to capture each case. Having
+ such a representation makes sure no intermediate transformation such
+ as Loop Invariant Code Motion breaks the property requested by the
+ clause on the loop constructs.
+
+ Each `acc.loop` holds private and reduction operands which are the
+ ssa values from the corresponding `acc.private` or `acc.reduction`
+ operations. Additionally, firstprivate operands are supported to
+ represent cases where privatization is needed with initialization
+ from an original value. While the OpenACC specification does not
+ explicitly support firstprivate on loop constructs, this extension
+ enables representing privatization scenarios that arise from an
+ optimization and codegen pipeline operating on acc dialect.
+
+ The operation supports capturing information that it comes combined
+ constructs (e.g., `parallel loop`, `kernels loop`, `serial loop`)
+ through the `combined` attribute despite requiring the `acc.loop`
+ to be decomposed from the compute operation representing compute
+ construct.
Example:
```mlir
- acc.loop gang() vector() (%arg3 : index, %arg4 : index, %arg5 : index) =
+ acc.loop gang() vector() (%arg3 : index, %arg4 : index, %arg5 : index) =
(%c0, %c0, %c0 : index, index, index) to
(%c10, %c10, %c10 : index, index, index) step
(%c1, %c1, %c1 : index, index, index) {
@@ -2400,10 +2423,12 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
} attributes { collapse = [3] }
```
- `collapse`, `gang`, `worker`, `vector`, `seq`, `independent`, `auto` and
- `tile` operands are supported with `device_type` information. They should
- only be accessed by the extra provided getters. If modified, the
- corresponding `device_type` attributes must be modified as well.
+ `collapse`, `gang`, `worker`, `vector`, `seq`, `independent`, `auto`,
+ `cache`, and `tile` operands are supported with `device_type`
+ information. These clauses should only be accessed through the provided
+ device-type-aware getter methods. When modifying these operands, the
+ corresponding `device_type` attributes must be updated to maintain
+ consistency between operands and their target device types.
}];
let arguments = (ins
@@ -2433,6 +2458,8 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
Variadic<OpenACC_AnyPointerOrMappableType>:$cacheOperands,
Variadic<OpenACC_AnyPointerOrMappableType>:$privateOperands,
OptionalAttr<SymbolRefArrayAttr>:$privatizationRecipes,
+ Variadic<OpenACC_AnyPointerOrMappableType>:$firstprivateOperands,
+ OptionalAttr<SymbolRefArrayAttr>:$firstprivatizationRecipes,
Variadic<AnyType>:$reductionOperands,
OptionalAttr<SymbolRefArrayAttr>:$reductionRecipes,
OptionalAttr<OpenACC_CombinedConstructsAttr>:$combined
@@ -2589,6 +2616,10 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
/// Adds a private clause variable to this operation, including its recipe.
void addPrivatization(MLIRContext *, mlir::acc::PrivateOp op,
mlir::acc::PrivateRecipeOp recipe);
+ /// Adds a firstprivate clause variable to this operation, including its
+ /// recipe.
+ void addFirstPrivatization(MLIRContext *, mlir::acc::FirstprivateOp op,
+ mlir::acc::FirstprivateRecipeOp recipe);
/// Adds a reduction clause variable to this operation, including its
/// recipe.
void addReduction(MLIRContext *, mlir::acc::ReductionOp op,
@@ -2609,6 +2640,8 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
type($vectorOperands), $vectorOperandsDeviceType, $vector)
| `private` `(` custom<SymOperandList>(
$privateOperands, type($privateOperands), $privatizationRecipes) `)`
+ | `firstprivate` `(` custom<SymOperandList>($firstprivateOperands,
+ type($firstprivateOperands), $firstprivatizationRecipes) `)`
| `tile` `(` custom<DeviceTypeOperandsWithSegment>($tileOperands,
type($tileOperands), $tileOperandsDeviceType, $tileOperandsSegments)
`)`
@@ -2665,6 +2698,8 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
/*cacheOperands=*/{},
/*privateOperands=*/{},
/*privatizationRecipes=*/nullptr,
+ /*firstprivateOperands=*/{},
+ /*firstprivatizationRecipes=*/nullptr,
/*reductionOperands=*/{},
/*reductionRecipes=*/nullptr,
/*combined=*/nullptr);
diff --git a/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td
index 83b128e..564d9c4 100644
--- a/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td
@@ -27,10 +27,6 @@ def XeGPUSubgroupDistribute : Pass<"xegpu-subgroup-distribute"> {
}];
let dependentDialects = ["memref::MemRefDialect", "xegpu::XeGPUDialect",
"vector::VectorDialect"];
- let options = [Option<
- "enableSGReductions", "enable-sg-reductions", "bool",
- /*default=*/"true",
- "Enable subgroup reductions using subgroup shuffles.">];
}
def XeGPUPropagateLayout : Pass<"xegpu-propagate-layout"> {
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index ee3e402..6598ac1 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -2674,6 +2674,11 @@ LogicalResult acc::LoopOp::verify() {
"privatizations", false)))
return failure();
+ if (failed(checkSymOperandList<mlir::acc::FirstprivateRecipeOp>(
+ *this, getFirstprivatizationRecipes(), getFirstprivateOperands(),
+ "firstprivate", "firstprivatizations", /*checkOperandType=*/false)))
+ return failure();
+
if (failed(checkSymOperandList<mlir::acc::ReductionRecipeOp>(
*this, getReductionRecipes(), getReductionOperands(), "reduction",
"reductions", false)))
@@ -2737,7 +2742,8 @@ LogicalResult acc::LoopOp::verify() {
}
unsigned LoopOp::getNumDataOperands() {
- return getReductionOperands().size() + getPrivateOperands().size();
+ return getReductionOperands().size() + getPrivateOperands().size() +
+ getFirstprivateOperands().size();
}
Value LoopOp::getDataOperand(unsigned i) {
@@ -3117,6 +3123,21 @@ void acc::LoopOp::addPrivatization(MLIRContext *context,
setPrivatizationRecipesAttr(mlir::ArrayAttr::get(context, recipes));
}
+void acc::LoopOp::addFirstPrivatization(
+ MLIRContext *context, mlir::acc::FirstprivateOp op,
+ mlir::acc::FirstprivateRecipeOp recipe) {
+ getFirstprivateOperandsMutable().append(op.getResult());
+
+ llvm::SmallVector<mlir::Attribute> recipes;
+
+ if (getFirstprivatizationRecipesAttr())
+ llvm::copy(getFirstprivatizationRecipesAttr(), std::back_inserter(recipes));
+
+ recipes.push_back(
+ mlir::SymbolRefAttr::get(context, recipe.getSymName().str()));
+ setFirstprivatizationRecipesAttr(mlir::ArrayAttr::get(context, recipes));
+}
+
void acc::LoopOp::addReduction(MLIRContext *context, mlir::acc::ReductionOp op,
mlir::acc::ReductionRecipeOp recipe) {
getReductionOperandsMutable().append(op.getResult());
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp
index 3a6684f..255f2bf 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp
@@ -796,7 +796,7 @@ struct ConvertVectorStore final : OpConversionPattern<vector::StoreOp> {
currentSourceIndex, remainingElements, 0);
// Generate back mask.
- auto maskValues = SmallVector<bool>(emulatedPerContainerElem, 0);
+ auto maskValues = SmallVector<bool>(emulatedPerContainerElem, false);
std::fill_n(maskValues.begin(), remainingElements, 1);
auto backMask = arith::ConstantOp::create(
rewriter, loc,
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
index 882691f..f1dbc5d 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
@@ -875,14 +875,17 @@ struct StoreDistribution final : public gpu::WarpDistributionPattern {
storeScatterOp,
"Some vector operands have no layouts, using defaults instead.");
}
- VectorType distPayloadTy = distStoreVecByWarpOpOrFailure.value();
- VectorType expectedPayloadTy = VectorType::get(
- {distPayloadTy.getNumElements()}, distPayloadTy.getElementType());
+ // Distributed store payload type according to the lane layout.
+ VectorType distPayloadTyByWarpOp = distStoreVecByWarpOpOrFailure.value();
+ // Expected distributed payload type is always 1D.
+ VectorType expectedPayloadTy =
+ VectorType::get({distPayloadTyByWarpOp.getNumElements()},
+ distPayloadTyByWarpOp.getElementType());
SmallVector<size_t> newRetIndices;
SmallVector<Value> operands = storeScatterOp->getOperands();
SmallVector<Type> operandTypesToYield = {
- expectedPayloadTy, operands[1].getType(),
+ distPayloadTyByWarpOp, operands[1].getType(),
distOffsetsByWarpOpOrFailure.value(),
distMaskByWarpOpOrFailure.value()};
@@ -890,8 +893,11 @@ struct StoreDistribution final : public gpu::WarpDistributionPattern {
rewriter, warpOp, operands, operandTypesToYield, newRetIndices);
SmallVector<Value> newStoreScatterOpOperands = llvm::map_to_vector(
newRetIndices, [&](size_t idx) { return newWarpOp.getResult(idx); });
-
+ // The payload operand may need type adjustment due to mismatch between warp
+ // distributed type and expected SIMT type.
rewriter.setInsertionPointAfter(newWarpOp);
+ newStoreScatterOpOperands[0] = resolveDistributedTy(
+ newStoreScatterOpOperands[0], expectedPayloadTy, rewriter);
xegpu::StoreScatterOp newOp = xegpu::StoreScatterOp::create(
rewriter, newWarpOp.getLoc(), TypeRange{}, newStoreScatterOpOperands,
storeScatterOp->getAttrs());
@@ -976,8 +982,11 @@ struct LoadDistribution final : public gpu::WarpDistributionPattern {
distMaskByWarpOpOrFailure.value()};
const unsigned operandIdx = producedByLastLoad->getOperandNumber();
- VectorType loadVecTy =
+ VectorType distResultTy =
cast<VectorType>(warpOp.getResult(operandIdx).getType());
+ // Distributed load op will always be 1D.
+ VectorType loadVecTy = VectorType::get({distResultTy.getNumElements()},
+ distResultTy.getElementType());
gpu::WarpExecuteOnLane0Op newWarpOp = moveRegionToNewWarpOpAndAppendReturns(
rewriter, warpOp, operands, operandTypesToYield, newRetIndices);
@@ -991,13 +1000,16 @@ struct LoadDistribution final : public gpu::WarpDistributionPattern {
loadGatherOp->getAttrs());
xegpu::removeLayoutAttrs(newOp);
Value distributedVal = newWarpOp.getResult(operandIdx);
- rewriter.replaceAllUsesWith(distributedVal, newOp->getResult(0));
+ // Resolve the output type and replace all uses.
+ rewriter.replaceAllUsesWith(
+ distributedVal,
+ resolveDistributedTy(newOp.getResult(), distResultTy, rewriter));
return success();
}
};
/// Helper to rewrite a 2D VectorMultiReductionOp into a sequence of 1D
-/// VectorReductionOps.
+/// VectorReductionOps. We also insert layouts for the newly created ops.
static Value lowerToVectorReductions(TypedValue<VectorType> src,
TypedValue<VectorType> acc,
vector::CombiningKind kind,
@@ -1014,6 +1026,9 @@ static Value lowerToVectorReductions(TypedValue<VectorType> src,
Value reductionResult = arith::ConstantOp::create(
rewriter, loc, acc.getType(),
DenseElementsAttr::get(acc.getType(), zeroAttr));
+ // Reduction result should have the same layout as the accumulator.
+ xegpu::setDistributeLayoutAttr(cast<OpResult>(reductionResult),
+ xegpu::getDistributeLayoutAttr(acc));
// For each slice of the source, extract the slice vector, do a reduction
// and, insert the reduced value back to the result vector.
for (int i = 0; i < nSlices; ++i) {
@@ -1029,13 +1044,23 @@ static Value lowerToVectorReductions(TypedValue<VectorType> src,
vector::ExtractStridedSliceOp::create(rewriter, loc, src, sliceOffsets,
sliceSizes, {1, 1});
int64_t nSliceElements = extractOp.getResult().getType().getNumElements();
- Value slice = vector::ShapeCastOp::create(
+ vector::ShapeCastOp slice = vector::ShapeCastOp::create(
rewriter, loc,
VectorType::get({nSliceElements}, sourceType.getElementType()),
extractOp.getResult());
+ // Shape cast is currently handled in xegpu side. So layouts must be
+ // retained during lowering. Shape cast output has the same layout as the
+ // accumulator. Shape cast source has the same layout as the original
+ // reduction source.
+ // TODO: other ops generated here may also need layout attributes.
+ xegpu::setDistributeLayoutAttr(slice->getOpOperand(0),
+ xegpu::getDistributeLayoutAttr(src));
+ xegpu::setDistributeLayoutAttr(slice->getOpResult(0),
+ xegpu::getDistributeLayoutAttr(acc));
+ // Extract and reduction results in scalars, so no result layout is needed.
Value accExtract = vector::ExtractOp::create(rewriter, loc, acc, i);
- Value reduction =
- vector::ReductionOp::create(rewriter, loc, kind, slice, accExtract);
+ Value reduction = vector::ReductionOp::create(
+ rewriter, loc, kind, slice.getResult(), accExtract);
reductionResult =
vector::InsertOp::create(rewriter, loc, reduction, reductionResult, i);
}
@@ -1107,7 +1132,7 @@ struct VectorMultiReductionDistribution : public gpu::WarpDistributionPattern {
return failure();
auto reductionOp =
cast<vector::MultiDimReductionOp>(yieldOperand->get().getDefiningOp());
- unsigned operandNumber = yieldOperand->getOperandNumber();
+ unsigned operandIdx = yieldOperand->getOperandNumber();
VectorType sourceType = reductionOp.getSourceVectorType();
// Only 2D vectors are supported.
if (sourceType.getRank() != 2)
@@ -1121,7 +1146,7 @@ struct VectorMultiReductionDistribution : public gpu::WarpDistributionPattern {
warpOp, "Only 1 reduction dimension is supported.");
int64_t reductionDim = reductionDims[0];
VectorType distributedResultType =
- cast<VectorType>(warpOp.getResult(operandNumber).getType());
+ cast<VectorType>(warpOp.getResult(operandIdx).getType());
VectorType resultType = cast<VectorType>(reductionOp.getType());
xegpu::DistributeLayoutAttr sourceLayout =
xegpu::getDistributeLayoutAttr(reductionOp.getSource());
@@ -1184,7 +1209,7 @@ struct VectorMultiReductionDistribution : public gpu::WarpDistributionPattern {
cast<TypedValue<VectorType>>(newWarpOp->getResult(newRetIndices[1])),
reductionOp.getKind(), reductionDim, reductionOp.getLoc(), rewriter);
// Replace the warp op result with the final result.
- rewriter.replaceAllUsesWith(reductionOp.getResult(), result);
+ rewriter.replaceAllUsesWith(newWarpOp.getResult(operandIdx), result);
return success();
}
// For non-lane-local case, we simply rewrite the MultiReductionOp in terms
@@ -1217,7 +1242,7 @@ struct VectorShapeCastDistribution : public gpu::WarpDistributionPattern {
auto resultDistTy =
cast<VectorType>(warpOp.getResult(operandNumber).getType());
xegpu::DistributeLayoutAttr sourceLayout =
- xegpu::getDistributeLayoutAttr(shapeCastOp.getSource());
+ xegpu::getDistributeLayoutAttr(shapeCastOp->getOpOperand(0));
xegpu::DistributeLayoutAttr resultLayout =
xegpu::getDistributeLayoutAttr(shapeCastOp.getResult());
if (!sourceLayout || !resultLayout)
@@ -1403,11 +1428,6 @@ namespace {
struct XeGPUSubgroupDistributePass final
: public xegpu::impl::XeGPUSubgroupDistributeBase<
XeGPUSubgroupDistributePass> {
- XeGPUSubgroupDistributePass() = default;
- XeGPUSubgroupDistributePass(const XeGPUSubgroupDistributePass &other) =
- default;
- XeGPUSubgroupDistributePass(xegpu::XeGPUSubgroupDistributeOptions options)
- : XeGPUSubgroupDistributeBase(options) {}
void runOnOperation() override;
};
} // namespace
@@ -1515,10 +1535,9 @@ void XeGPUSubgroupDistributePass::runOnOperation() {
return laneVal;
};
- if (enableSGReductions)
- vector::populateDistributeReduction(
- patterns, warpReduction,
- /*pattern benefit=*/regularPatternBenefit);
+ vector::populateDistributeReduction(
+ patterns, warpReduction,
+ /*pattern benefit=*/regularPatternBenefit);
vector::populatePropagateWarpVectorDistributionPatterns(
patterns, distributionFn, shuffleFn,
diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir
index cb69058..1484d7e 100644
--- a/mlir/test/Dialect/OpenACC/ops.mlir
+++ b/mlir/test/Dialect/OpenACC/ops.mlir
@@ -358,6 +358,41 @@ func.func @acc_loop_multiple_block() {
// -----
+acc.firstprivate.recipe @firstprivatization_memref_10xf32 : memref<10xf32> init {
+^bb0(%arg0: memref<10xf32>):
+ %0 = memref.alloca() : memref<10xf32>
+ acc.yield %0 : memref<10xf32>
+} copy {
+^bb0(%arg0: memref<10xf32>, %arg1: memref<10xf32>):
+ memref.copy %arg0, %arg1 : memref<10xf32> to memref<10xf32>
+ acc.terminator
+} destroy {
+^bb0(%arg0: memref<10xf32>):
+ acc.terminator
+}
+
+func.func @testloopfirstprivate(%a: memref<10xf32>, %b: memref<10xf32>) -> () {
+ %c0 = arith.constant 0 : index
+ %c10 = arith.constant 10 : index
+ %c1 = arith.constant 1 : index
+ %firstprivate = acc.firstprivate varPtr(%a : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32>
+ acc.loop firstprivate(@firstprivatization_memref_10xf32 -> %firstprivate : memref<10xf32>) control(%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
+ "test.openacc_dummy_op"() : () -> ()
+ acc.yield
+ } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+ return
+}
+
+// CHECK-LABEL: func.func @testloopfirstprivate(
+// CHECK-SAME: %[[ARG0:.*]]: memref<10xf32>, %[[ARG1:.*]]: memref<10xf32>)
+// CHECK: %[[FIRSTPRIVATE:.*]] = acc.firstprivate varPtr(%[[ARG0]] : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32>
+// CHECK: acc.loop firstprivate(@firstprivatization_memref_10xf32 -> %[[FIRSTPRIVATE]] : memref<10xf32>) control(%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) {
+// CHECK: "test.openacc_dummy_op"() : () -> ()
+// CHECK: acc.yield
+// CHECK: } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+
+// -----
+
acc.private.recipe @privatization_memref_10_f32 : memref<10xf32> init {
^bb0(%arg0: memref<10xf32>):
%0 = memref.alloc() : memref<10xf32>
@@ -535,6 +570,7 @@ acc.firstprivate.recipe @firstprivatization_memref_10xf32 : memref<10xf32> init
acc.yield %0 : memref<10xf32>
} copy {
^bb0(%arg0: memref<10xf32>, %arg1: memref<10xf32>):
+ memref.copy %arg0, %arg1 : memref<10xf32> to memref<10xf32>
acc.terminator
} destroy {
^bb0(%arg0: memref<10xf32>):
diff --git a/mlir/test/Dialect/XeGPU/subgroup-distribute-unit.mlir b/mlir/test/Dialect/XeGPU/subgroup-distribute-unit.mlir
new file mode 100644
index 0000000..40b66d1
--- /dev/null
+++ b/mlir/test/Dialect/XeGPU/subgroup-distribute-unit.mlir
@@ -0,0 +1,575 @@
+// RUN: mlir-opt --xevm-attach-target='module=xevm_* chip=pvc' -test-xegpu-sg-distribute -allow-unregistered-dialect \
+// RUN: -canonicalize -cse -split-input-file %s | FileCheck %s
+
+// CHECK-LABEL: gpu.func @store_nd_1d
+// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: index) {
+// CHECK: %[[W:.*]]:3 = gpu.warp_execute_on_lane_0(%[[ARG0]])[16]
+// CHECK-SAME: -> (vector<1xf32>, !xegpu.tensor_desc<16xf32, #xegpu.layout<lane_layout = [16], lane_data = [1]>>, index) {
+// CHECK: gpu.yield %{{.*}} : vector<16xf32>,
+// CHECK-SAME: !xegpu.tensor_desc<16xf32, #xegpu.layout<lane_layout = [16], lane_data = [1]>>, index
+// CHECK-NEXT: }
+// CHECK-NEXT: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[W]]#1 : !xegpu.tensor_desc<16xf32,
+// CHECK-SAME: #xegpu.layout<lane_layout = [16], lane_data = [1]>> to !xegpu.tensor_desc<16xf32> {resolve_simt_type_mismatch}
+// CHECK-NEXT: xegpu.store_nd %[[W]]#0, %[[T1]][%[[W]]#2] : vector<1xf32>, !xegpu.tensor_desc<16xf32>
+gpu.module @xevm_module{
+ gpu.func @store_nd_1d(%laneid: index) {
+ %c0 = arith.constant 0 : index
+ gpu.warp_execute_on_lane_0(%laneid)[16] {
+ %0 = "some_op"() : () -> !xegpu.tensor_desc<16xf32, #xegpu.layout<lane_layout = [16], lane_data = [1]>>
+ %cst = "some_op"() : () -> vector<16xf32>
+ xegpu.store_nd %cst, %0 [%c0] {layout_operand_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>}
+ : vector<16xf32>, !xegpu.tensor_desc<16xf32, #xegpu.layout<lane_layout = [16], lane_data = [1]>>
+ }
+ gpu.return
+ }
+}
+
+// -----
+// CHECK-LABEL: gpu.func @store_nd_2d
+// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: index) {
+// CHECK: %[[W:.*]]:4 = gpu.warp_execute_on_lane_0(%[[ARG0]])[16]
+// CHECK-SAME: -> (vector<16x1xf16>, !xegpu.tensor_desc<16x16xf16,
+// CHECK-SAME: #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, index, index) {
+// CHECK: gpu.yield %{{.*}} : vector<16x16xf16>, !xegpu.tensor_desc<16x16xf16,
+// CHECK-SAME: #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, index, index
+// CHECK-NEXT: }
+// CHECK-NEXT: %[[CAST:.*]] = vector.shape_cast %[[W]]#0 : vector<16x1xf16> to vector<16xf16>
+// CHECK-NEXT: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[W]]#1 : !xegpu.tensor_desc<16x16xf16,
+// CHECK-SAME: #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> to !xegpu.tensor_desc<16x16xf16> {resolve_simt_type_mismatch}
+// CHECK-NEXT: xegpu.store_nd %[[CAST]], %[[T1]][%[[W]]#2, %[[W]]#3] : vector<16xf16>, !xegpu.tensor_desc<16x16xf16>
+gpu.module @xevm_module{
+ gpu.func @store_nd_2d(%laneid : index) {
+ %c0 = arith.constant 0 : index
+ gpu.warp_execute_on_lane_0(%laneid)[16] {
+ %0 = "some_op"() : () -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
+ %cst = "some_op"() : () -> vector<16x16xf16>
+ xegpu.store_nd %cst, %0 [%c0, %c0] {layout_operand_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}
+ : vector<16x16xf16>, !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
+ }
+ gpu.return
+ }
+}
+
+
+
+// -----
+// CHECK-LABEL: gpu.func @load_nd_1d
+// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: index) {
+// CHECK: %[[W:.*]]:3 = gpu.warp_execute_on_lane_0(%[[ARG0]])[16] -> (vector<1xf32>,
+// CHECK-SAME: !xegpu.tensor_desc<16xf32, #xegpu.layout<lane_layout = [16], lane_data = [1]>>, index) {
+// CHECK: gpu.yield %{{.*}} : vector<16xf32>, !xegpu.tensor_desc<16xf32,
+// CHECK-SAME: #xegpu.layout<lane_layout = [16], lane_data = [1]>>, index
+// CHECK-NEXT: }
+// CHECK-NEXT: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[W]]#1 : !xegpu.tensor_desc<16xf32,
+// CHECK-SAME: #xegpu.layout<lane_layout = [16], lane_data = [1]>> to !xegpu.tensor_desc<16xf32> {resolve_simt_type_mismatch}
+// CHECK-NEXT: xegpu.load_nd %[[T1]][%[[W]]#2] : !xegpu.tensor_desc<16xf32> -> vector<1xf32>
+gpu.module @xevm_module{
+ gpu.func @load_nd_1d(%laneid: index) {
+ %c0 = arith.constant 0 : index
+ %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<1xf32>) {
+ %0 = "some_op"() : () -> !xegpu.tensor_desc<16xf32, #xegpu.layout<lane_layout = [16], lane_data = [1]>>
+ %1 = xegpu.load_nd %0 [%c0] {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} :
+ !xegpu.tensor_desc<16xf32, #xegpu.layout<lane_layout = [16], lane_data = [1]>> -> vector<16xf32>
+ gpu.yield %1 : vector<16xf32>
+ }
+ "some_user_op"(%r) : (vector<1xf32>) -> ()
+ gpu.return
+ }
+}
+
+// -----
+// CHECK-LABEL: gpu.func @load_nd_2d
+// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: index) {
+// CHECK: %[[W:.*]]:4 = gpu.warp_execute_on_lane_0(%[[ARG0]])[16] -> (vector<16x1xf16>, !xegpu.tensor_desc<16x16xf16,
+// CHECK-SAME: #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, index, index) {
+// CHECK: gpu.yield %{{.*}} : vector<16x16xf16>, !xegpu.tensor_desc<16x16xf16,
+// CHECK-SAME: #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, index, index
+// CHECK-NEXT: }
+// CHECK-NEXT: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[W]]#1 : !xegpu.tensor_desc<16x16xf16,
+// CHECK-SAME: #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> to !xegpu.tensor_desc<16x16xf16> {resolve_simt_type_mismatch}
+// CHECK-NEXT: %[[T2:.*]] = xegpu.load_nd %[[T1]][%[[W]]#2, %[[W]]#3] : !xegpu.tensor_desc<16x16xf16> -> vector<16xf16>
+// CHECK: vector.shape_cast %[[T2]] : vector<16xf16> to vector<16x1xf16>
+gpu.module @xevm_module{
+ gpu.func @load_nd_2d(%laneid: index) {
+ %c0 = arith.constant 0 : index
+ %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<16x1xf16>) {
+ %0 = "some_op"() : () -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
+ %1 = xegpu.load_nd %0[%c0, %c0] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}
+ : !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<16x16xf16>
+ gpu.yield %1 : vector<16x16xf16>
+ }
+ "some_user_op"(%r) : (vector<16x1xf16>) -> ()
+ gpu.return
+ }
+}
+
+// -----
+// CHECK-LABEL: gpu.func @load_nd_array_length
+// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: index) {
+// CHECK: %[[W:.*]]:4 = gpu.warp_execute_on_lane_0(%[[ARG0]])[16] -> (vector<2x16x1xf16>,
+// CHECK-SAME: !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<array_length = 2 : i64>,
+// CHECK-SAME: #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, index, index) {
+// CHECK: gpu.yield %{{.*}} : vector<2x16x16xf16>, !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<
+// CHECK-SAME: array_length = 2 : i64>, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, index, index
+// CHECK-NEXT: }
+// CHECK-NEXT: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[W]]#1 : !xegpu.tensor_desc<16x16xf16,
+// CHECK-SAME: #xegpu.block_tdesc_attr<array_length = 2 : i64>, #xegpu.layout<lane_layout = [1, 16],
+// CHECK-SAME: lane_data = [1, 1]>> to !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<array_length = 2 : i64>>
+// CHECK-NEXT: %[[T2:.*]] = xegpu.load_nd %[[T1]][%[[W]]#2, %[[W]]#3] : !xegpu.tensor_desc<16x16xf16,
+// CHECK-SAME: #xegpu.block_tdesc_attr<array_length = 2 : i64>> -> vector<32xf16>
+// CHECK-NEXT: vector.shape_cast %[[T2]] : vector<32xf16> to vector<2x16x1xf16>
+gpu.module @xevm_module{
+ gpu.func @load_nd_array_length(%laneid: index) {
+ %c0 = arith.constant 0 : index
+ %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<2x16x1xf16>) {
+ %0 = "some_op"() : () -> !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<array_length = 2 : i64>,
+ #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
+ %1 = xegpu.load_nd %0[%c0, %c0] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}
+ : !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<array_length = 2 : i64>,
+ #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<2x16x16xf16>
+ gpu.yield %1 : vector<2x16x16xf16>
+ }
+ "some_user_op"(%r) : (vector<2x16x1xf16>) -> ()
+ gpu.return
+ }
+}
+
+// -----
+// CHECK-LABEL: gpu.func @dpas
+// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: index) {
+// CHECK: %[[W:.*]]:4 = gpu.warp_execute_on_lane_0(%[[ARG0]])[16] ->
+// CHECK-SAME: (vector<8x1xf32>, vector<8x1xf16>, vector<16x1xf16>, vector<8x1xf32>) {
+// CHECK: gpu.yield %{{.*}} : vector<8x16xf32>, vector<8x16xf16>, vector<16x16xf16>, vector<8x16xf32>
+// CHECK-NEXT: }
+// CHECK-DAG: %[[T1:.*]] = vector.shape_cast %[[W]]#1 : vector<8x1xf16> to vector<8xf16>
+// CHECK-DAG: %[[T2:.*]] = vector.shape_cast %[[W]]#2 : vector<16x1xf16> to vector<16xf16>
+// CHECK-DAG: %[[T3:.*]] = vector.shape_cast %[[W]]#3 : vector<8x1xf32> to vector<8xf32>
+// CHECK-NEXT: %[[T4:.*]] = xegpu.dpas %[[T1]], %[[T2]], %[[T3]] : vector<8xf16>, vector<16xf16>, vector<8xf32> -> vector<8xf32>
+// CHECK-NEXT: vector.shape_cast %[[T4]] : vector<8xf32> to vector<8x1xf32>
+gpu.module @xevm_module{
+ gpu.func @dpas(%laneid: index) {
+ %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<8x1xf32>) {
+ %0 = "some_op"() : () -> vector<8x16xf16>
+ %1 = "some_op"() : () -> vector<16x16xf16>
+ %2 = "some_op"() : () -> vector<8x16xf32>
+ %3 = xegpu.dpas %0, %1, %2
+ {
+ layout_operand_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>,
+ layout_operand_1 = #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>,
+ layout_operand_2 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>,
+ layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>
+ }
+ : vector<8x16xf16>, vector<16x16xf16>, vector<8x16xf32> -> vector<8x16xf32>
+ gpu.yield %3 : vector<8x16xf32>
+ }
+ "some_user_op"(%r) : (vector<8x1xf32>) -> ()
+ gpu.return
+ }
+}
+
+
+// -----
+// CHECK-LABEL: gpu.func @create_nd_tdesc_non_memref
+// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: ui64, %[[ARG1:[0-9a-zA-Z]+]]: index) {
+// CHECK: %[[W:.*]]:2 = gpu.warp_execute_on_lane_0(%[[ARG1]])[16] -> (!xegpu.tensor_desc<16x16xf16,
+// CHECK-SAME: #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, ui64) {
+// CHECK: gpu.yield %{{.*}} : !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, ui64
+// CHECK-NEXT: }
+// CHECK-NEXT: %[[T1:.*]] = xegpu.create_nd_tdesc %[[W]]#1, shape : [64, 128], strides : [128, 1] : ui64 -> !xegpu.tensor_desc<16x16xf16>
+// CHECK-NEXT: builtin.unrealized_conversion_cast %[[T1]] : !xegpu.tensor_desc<16x16xf16> to !xegpu.tensor_desc<16x16xf16,
+// CHECK-SAME: #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> {resolve_simt_type_mismatch}
+gpu.module @xevm_module{
+ gpu.func @create_nd_tdesc_non_memref(%arg0: ui64, %laneid: index) {
+ %c0 = arith.constant 0 : index
+ %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (!xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>) {
+ %0 = xegpu.create_nd_tdesc %arg0, shape:[64, 128], strides:[128, 1] : ui64 ->
+ !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
+ gpu.yield %0 : !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
+ }
+ "some_user_op"(%r)
+ : (!xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>) -> ()
+ gpu.return
+ }
+}
+
+// -----
+// CHECK-LABEL: gpu.func @prefetch_2d
+// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: index) {
+// CHECK: %[[W:.*]]:3 = gpu.warp_execute_on_lane_0(%[[ARG0]])[16] -> (!xegpu.tensor_desc<16x16xf16,
+// CHECK-SAME: #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, index, index) {
+// CHECK: gpu.yield %{{.*}} : !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
+// CHECK-SAME: , index, index
+// CHECK-NEXT: }
+// CHECK-NEXT: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[W]]#0 : !xegpu.tensor_desc<16x16xf16,
+// CHECK-SAME: #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> to !xegpu.tensor_desc<16x16xf16> {resolve_simt_type_mismatch}
+// CHECK-NEXT: xegpu.prefetch_nd %[[T1]][%[[W]]#1, %[[W]]#2]
+// CHECK-SAME: <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<16x16xf16>
+gpu.module @xevm_module{
+ gpu.func @prefetch_2d(%laneid: index) {
+ %c0 = arith.constant 0 : index
+ gpu.warp_execute_on_lane_0(%laneid)[16] {
+ %0 = "some_op"() : ()
+ -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
+ xegpu.prefetch_nd %0[%c0, %c0]
+ <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>
+ : !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
+ }
+ gpu.return
+ }
+}
+
+// -----
+// CHECK-LABEL: gpu.func @prefetch_1d
+// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: index) {
+// CHECK: %[[W:.*]]:2 = gpu.warp_execute_on_lane_0(%[[ARG0]])[16] -> (!xegpu.tensor_desc<16xf16,
+// CHECK-SAME: #xegpu.layout<lane_layout = [16], lane_data = [1]>>, index) {
+// CHECK: gpu.yield %{{.*}} : !xegpu.tensor_desc<16xf16, #xegpu.layout<lane_layout = [16], lane_data = [1]>>, index
+// CHECK-NEXT: }
+// CHECK-NEXT: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[W]]#0 : !xegpu.tensor_desc<16xf16,
+// CHECK-SAME: #xegpu.layout<lane_layout = [16], lane_data = [1]>> to !xegpu.tensor_desc<16xf16> {resolve_simt_type_mismatch}
+// CHECK-NEXT: xegpu.prefetch_nd %[[T1]][%[[W]]#1] <{l1_hint = #xegpu.cache_hint<cached>,
+// CHECK-SAME: l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<16xf16>
+gpu.module @xevm_module{
+ gpu.func @prefetch_1d(%laneid: index) {
+ %c0 = arith.constant 0 : index
+ gpu.warp_execute_on_lane_0(%laneid)[16] {
+ %0 = "some_op"() : ()
+ -> !xegpu.tensor_desc<16xf16, #xegpu.layout<lane_layout = [16], lane_data = [1]>>
+ xegpu.prefetch_nd %0[%c0]
+ <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>
+ : !xegpu.tensor_desc<16xf16, #xegpu.layout<lane_layout = [16], lane_data = [1]>>
+ }
+ gpu.return
+ }
+}
+
+// -----
+// CHECK-LABEL: gpu.func @gpu_barrier({{.*}}) {
+// CHECK: gpu.warp_execute_on_lane_0(%{{.*}})[16] -> ({{.*}}) {
+// CHECK: gpu.yield %{{.*}}
+// CHECK: }
+// CHECK: %{{.*}} = xegpu.load_nd %{{.*}} : !xegpu.tensor_desc<16xf16> -> vector<1xf16>
+// CHECK: gpu.barrier
+gpu.module @xevm_module{
+ gpu.func @gpu_barrier(%laneid: index) {
+ %c0 = arith.constant 0 : index
+ %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<1xf16>) {
+ %0 = "some_op"() : () -> !xegpu.tensor_desc<16xf16, #xegpu.layout<lane_layout = [16], lane_data = [1]>>
+ %1 = xegpu.load_nd %0[%c0]
+ {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>}
+ : !xegpu.tensor_desc<16xf16, #xegpu.layout<lane_layout = [16], lane_data = [1]>> -> vector<16xf16>
+ gpu.barrier
+ gpu.yield %1 : vector<16xf16>
+ }
+ "some_user_op"(%r) : (vector<1xf16>) -> ()
+ gpu.return
+ }
+}
+
+// -----
+// CHECK-LABEL: gpu.func @vector_multi_reduction_dim1_distributed_dim0_reduction
+// CHECK: %[[ACC:.*]] = arith.constant {{.*}} dense<0.000000e+00> : vector<32xf32>
+// CHECK: %[[W:.*]]:3 = gpu.warp_execute_on_lane_0(%{{.*}})[16]
+// CHECK-SAME: -> (vector<2xf32>, vector<16x2xf32>, vector<2xf32>) {
+// CHECK: %[[SRC:.*]] = "some_def"() {{.*}} : () -> vector<16x32xf32>
+// CHECK: gpu.yield %{{.*}}, %[[SRC]], %[[ACC]] : vector<32xf32>, vector<16x32xf32>, vector<32xf32>
+// CHECK-NEXT: }
+// CHECK: %[[T1:.*]] = vector.extract_strided_slice %[[W]]#1
+// CHECK-SAME: {offsets = [0, 0], sizes = [16, 1], strides = [1, 1]} : vector<16x2xf32> to vector<16x1xf32>
+// CHECK: %[[T2:.*]] = vector.shape_cast %[[T1]] : vector<16x1xf32> to vector<16xf32>
+// CHECK: %[[T3:.*]] = vector.extract %[[W]]#2[0] : f32 from vector<2xf32>
+// CHECK: %[[T4:.*]] = vector.reduction <add>, %[[T2]], %[[T3]] : vector<16xf32> into f32
+// CHECK: %[[T5:.*]] = vector.extract_strided_slice %[[W]]#1
+// CHECK-SAME: {offsets = [0, 1], sizes = [16, 1], strides = [1, 1]} : vector<16x2xf32> to vector<16x1xf32>
+// CHECK: %[[T6:.*]] = vector.shape_cast %[[T5]] : vector<16x1xf32> to vector<16xf32>
+// CHECK: %[[T7:.*]] = vector.extract %[[W]]#2[1] : f32 from vector<2xf32>
+// CHECK: %[[T8:.*]] = vector.reduction <add>, %[[T6]], %[[T7]] : vector<16xf32> into f32
+// CHECK: %[[T9:.*]] = vector.from_elements %[[T4]], %[[T8]] : vector<2xf32>
+gpu.module @xevm_module{
+gpu.func @vector_multi_reduction_dim1_distributed_dim0_reduction(%laneid: index) {
+ %c0 = arith.constant 0 : index
+ %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<2xf32>) {
+ %src = "some_def"()
+ {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}
+ : () -> (vector<16x32xf32>)
+ %acc = arith.constant
+ {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [0]>}
+ dense<0.0> : vector<32xf32>
+ %1 = vector.multi_reduction <add>, %src, %acc
+ {
+ layout_operand_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>,
+ layout_operand_1 = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [0]>,
+ layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [0]>
+ } [0]
+ : vector<16x32xf32> to vector<32xf32>
+ gpu.yield %1 : vector<32xf32>
+ }
+ "some_user_op"(%r) : (vector<2xf32>) -> ()
+ gpu.return
+}
+}
+
+// -----
+// CHECK-LABEL: gpu.func @vector_multi_reduction_dim1_distributed_dim1_reduction
+// CHECK: %[[W:.*]] = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (vector<2xf32>) {
+// CHECK-NEXT: %[[SRC:.*]] = "some_def"() {{.*}} : () -> vector<2x16xf32>
+// CHECK-NEXT: %[[T2:.*]] = vector.extract %[[SRC]][0] : vector<16xf32> from vector<2x16xf32>
+// CHECK-NEXT: %[[T3:.*]] = vector.reduction <add>, %[[T2]], %cst : vector<16xf32> into f32
+// CHECK-NEXT: %[[T4:.*]] = vector.extract %[[SRC]][1] : vector<16xf32> from vector<2x16xf32>
+// CHECK-NEXT: %[[T5:.*]] = vector.reduction <add>, %[[T4]], %cst : vector<16xf32> into f32
+// CHECK-NEXT: %[[T6:.*]] = vector.from_elements %[[T3]], %[[T5]] : vector<2xf32>
+// CHECK-NEXT: gpu.yield %[[T6]] : vector<2xf32>
+// CHECK-NEXT: }
+gpu.module @xevm_module{
+gpu.func @vector_multi_reduction_dim1_distributed_dim1_reduction(%laneid: index) {
+ %c0 = arith.constant 0 : index
+ %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<2xf32>) {
+ %src = "some_def"()
+ {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}
+ : () -> (vector<2x16xf32>)
+ %acc = arith.constant
+ {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [1]>}
+ dense<0.0> : vector<2xf32>
+ %1 = vector.multi_reduction <add>, %src, %acc
+ {
+ layout_operand_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>,
+ layout_operand_1 = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [1]>,
+ layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [1]>
+ }
+ [1] : vector<2x16xf32> to vector<2xf32>
+ gpu.yield %1 : vector<2xf32>
+ }
+ "some_user_op"(%r) : (vector<2xf32>) -> ()
+ gpu.return
+}
+}
+
+// -----
+// CHECK-LABEL: gpu.func @vector_multi_reduction_dim0_distributed_dim1_reduction
+// CHECK: %[[ACC:.*]] = arith.constant {{.*}} dense<0.000000e+00> : vector<32xf32>
+// CHECK: %[[W:.*]]:3 = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (vector<2xf32>, vector<2x16xf32>, vector<2xf32>) {
+// CHECK: %[[SRC:.*]] = "some_def"() {{.*}} : () -> vector<32x16xf32>
+// CHECK: gpu.yield %9, %[[SRC]], %[[ACC]] : vector<32xf32>, vector<32x16xf32>, vector<32xf32>
+// CHECK: }
+// CHECK: %[[T1:.*]] = vector.extract %[[W]]#1[0] : vector<16xf32> from vector<2x16xf32>
+// CHECK: %[[T2:.*]] = vector.extract %[[W]]#2[0] : f32 from vector<2xf32>
+// CHECK: %[[T3:.*]] = vector.reduction <add>, %[[T1]], %[[T2]] : vector<16xf32> into f32
+// CHECK: %[[T4:.*]] = vector.extract %[[W]]#1[1] : vector<16xf32> from vector<2x16xf32>
+// CHECK: %[[T5:.*]] = vector.extract %[[W]]#2[1] : f32 from vector<2xf32>
+// CHECK: %[[T6:.*]] = vector.reduction <add>, %[[T4]], %[[T5]] : vector<16xf32> into f32
+// CHECK: %[[T7:.*]] = vector.from_elements %[[T3]], %[[T6]] : vector<2xf32>
+gpu.module @xevm_module{
+gpu.func @vector_multi_reduction_dim0_distributed_dim1_reduction(%laneid: index) {
+ %c0 = arith.constant 0 : index
+ %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<2xf32>) {
+ %src = "some_def"()
+ {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>}
+ : () -> (vector<32x16xf32>)
+ %acc = arith.constant
+ {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>, dims = [1]>}
+ dense<0.0> : vector<32xf32>
+ %1 = vector.multi_reduction <add>, %src, %acc
+ {
+ layout_operand_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>,
+ layout_operand_1 = #xegpu.slice<#xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>, dims = [1]>,
+ layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>, dims = [1]>
+ }
+ [1] : vector<32x16xf32> to vector<32xf32>
+ gpu.yield %1 : vector<32xf32>
+ }
+ "some_user_op"(%r) : (vector<2xf32>) -> ()
+ gpu.return
+}
+}
+
+// -----
+// CHECK-LABEL: gpu.func @vector_multi_reduction_dim0_distributed_dim0_reduction
+// CHECK: %[[W:.*]] = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (vector<2xf32>) {
+// CHECK: %[[SRC:.*]] = "some_def"() {{.*}} : () -> vector<16x2xf32>
+// CHECK: %[[T1:.*]] = vector.extract_strided_slice %[[SRC]]
+// CHECK-SAME: {offsets = [0, 0], sizes = [16, 1], strides = [1, 1]} : vector<16x2xf32> to vector<16x1xf32>
+// CHECK: %[[T2:.*]] = vector.shape_cast %[[T1]] {{.*}} : vector<16x1xf32> to vector<16xf32>
+// CHECK: %[[T3:.*]] = vector.reduction <add>, %[[T2]], %{{.*}} : vector<16xf32> into f32
+// CHECK: %[[T4:.*]] = vector.extract_strided_slice %[[SRC]]
+// CHECK-SAME: {offsets = [0, 1], sizes = [16, 1], strides = [1, 1]} : vector<16x2xf32> to vector<16x1xf32>
+// CHECK: %[[T5:.*]] = vector.shape_cast %[[T4]] {{.*}} : vector<16x1xf32> to vector<16xf32>
+// CHECK: %[[T6:.*]] = vector.reduction <add>, %[[T5]], %{{.*}} : vector<16xf32> into f32
+// CHECK: %[[T7:.*]] = vector.from_elements %[[T3]], %[[T6]] : vector<2xf32>
+// CHECK: gpu.yield %[[T7]] : vector<2xf32>
+// CHECK: }
+gpu.module @xevm_module{
+gpu.func @vector_multi_reduction_dim0_distributed_dim0_reduction(%laneid: index) {
+ %c0 = arith.constant 0 : index
+ %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<2xf32>) {
+ %src = "some_def"()
+ {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>}
+ : () -> (vector<16x2xf32>)
+ %acc = arith.constant
+ {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>, dims = [0]>}
+ dense<0.0> : vector<2xf32>
+ %1 = vector.multi_reduction <add>, %src, %acc
+ {
+ layout_operand_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>,
+ layout_operand_1 = #xegpu.slice<#xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>, dims = [0]>,
+ layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>, dims = [0]>
+ }
+ [0] : vector<16x2xf32> to vector<2xf32>
+ gpu.yield %1 : vector<2xf32>
+ }
+ "some_user_op"(%r) : (vector<2xf32>) -> ()
+ gpu.return
+}
+}
+
+// -----
+// CHECK-LABEL: gpu.func @scatter_ops_chunksize({{.*}}) {
+// CHECK: %[[OFFSETS:.*]] = arith.constant {{.*}} dense<12> : vector<16xindex>
+// CHECK: %[[MASKS:.*]] = arith.constant {{.*}} dense<true> : vector<16xi1>
+// CHECK: %[[W:.*]]:4 = gpu.warp_execute_on_lane_0(%{{.*}})[16]
+// CHECK-SAME: -> (vector<1x8xf16>, memref<256xf16>, vector<1xindex>, vector<1xi1>) {
+// CHECK: gpu.yield %{{.*}}, %{{.*}}, %[[OFFSETS]], %[[MASKS]] :
+// CHECK-SAME: vector<16x8xf16>, memref<256xf16>, vector<16xindex>, vector<16xi1>
+// CHECK-NEXT: }
+// CHECK-NEXT: %[[T1:.*]] = xegpu.load %[[W]]#1[%[[W]]#2], %[[W]]#3 <{chunk_size = 8 : i64}>
+// CHECK-SAME: : memref<256xf16>, vector<1xindex>, vector<1xi1> -> vector<8xf16>
+// CHECK-NEXT: xegpu.store %[[T1]], %[[W]]#1[%[[W]]#2], %[[W]]#3 <{chunk_size = 8 : i64}>
+// CHECK-SAME: : vector<8xf16>, memref<256xf16>, vector<1xindex>, vector<1xi1>
+gpu.module @xevm_module{
+ gpu.func @scatter_ops_chunksize(%laneid: index, %src: memref<256xf16>) {
+ gpu.warp_execute_on_lane_0(%laneid)[16] {
+ %1 = arith.constant
+ {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>}
+ dense<1>: vector<16xi1>
+ %offset = arith.constant
+ {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>}
+ dense<12> : vector<16xindex>
+ %3 = xegpu.load %src[%offset], %1 <{chunk_size=8}>
+ {
+ layout_operand_1 = #xegpu.layout<lane_layout = [16], lane_data = [1]>,
+ layout_operand_2 = #xegpu.layout<lane_layout = [16], lane_data = [1]>,
+ layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 2]>
+ }
+ : memref<256xf16>, vector<16xindex>, vector<16xi1> -> vector<16x8xf16>
+ xegpu.store %3, %src[%offset], %1 <{chunk_size=8}>
+ {
+ layout_operand_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 2]>,
+ layout_operand_2 = #xegpu.layout<lane_layout = [16], lane_data = [1]>,
+ layout_operand_3 = #xegpu.layout<lane_layout = [16], lane_data = [1]>
+ }
+ : vector<16x8xf16>, memref<256xf16>, vector<16xindex>, vector<16xi1>
+ }
+ gpu.return
+ }
+}
+
+// -----
+// CHECK-LABEL: gpu.func @scatter_ops({{.*}}) {
+// CHECK: %[[OFFSETS:.*]] = arith.constant {{.*}} dense<12> : vector<16xindex>
+// CHECK: %[[MASKS:.*]] = arith.constant {{.*}} dense<true> : vector<16xi1>
+// CHECK: %[[W:.*]]:4 = gpu.warp_execute_on_lane_0(%{{.*}})[16]
+// CHECK-SAME: -> (vector<1xf16>, memref<256xf16>, vector<1xindex>, vector<1xi1>) {
+// CHECK: gpu.yield %{{.*}}, %{{.*}}, %[[OFFSETS]], %[[MASKS]]
+// CHECK-SAME: : vector<16xf16>, memref<256xf16>, vector<16xindex>, vector<16xi1>
+// CHECK-NEXT: }
+// CHECK-NEXT: %[[T1:.*]] = xegpu.load %[[W]]#1[%[[W]]#2], %[[W]]#3
+// CHECK-SAME: : memref<256xf16>, vector<1xindex>, vector<1xi1> -> vector<1xf16>
+// CHECK-NEXT: xegpu.store %[[T1]], %[[W]]#1[%[[W]]#2], %[[W]]#3
+// CHECK-SAME: : vector<1xf16>, memref<256xf16>, vector<1xindex>, vector<1xi1>
+gpu.module @xevm_module{
+ gpu.func @scatter_ops(%src: memref<256xf16>, %laneid: index) {
+ gpu.warp_execute_on_lane_0(%laneid)[16] {
+ %1 = arith.constant
+ {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>}
+ dense<1> : vector<16xi1>
+ %offset = arith.constant
+ {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>}
+ dense<12> : vector<16xindex>
+ %3 = xegpu.load %src[%offset], %1
+ {
+ layout_operand_1 = #xegpu.layout<lane_layout = [16], lane_data = [1]>,
+ layout_operand_2 = #xegpu.layout<lane_layout = [16], lane_data = [1]>,
+ layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>
+ } : memref<256xf16>, vector<16xindex>, vector<16xi1> -> vector<16xf16>
+ xegpu.store %3, %src[%offset], %1
+ {
+ layout_operand_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>,
+ layout_operand_2 = #xegpu.layout<lane_layout = [16], lane_data = [1]>,
+ layout_operand_3 = #xegpu.layout<lane_layout = [16], lane_data = [1]>
+ }
+ : vector<16xf16>, memref<256xf16>, vector<16xindex>, vector<16xi1>
+ }
+ gpu.return
+ }
+}
+
+// -----
+// CHECK-LABEL: gpu.func @memref_extract_aligned_pointer_as_index(
+// CHECK: %[[W:.*]]:2 = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (index, memref<256x256xf16>) {
+// CHECK: gpu.yield %{{.*}}, %{{.*}} : index, memref<256x256xf16>
+// CHECK-NEXT: }
+// CHECK-NEXT: %[[INTPTR:.*]] = memref.extract_aligned_pointer_as_index %[[W]]#1 : memref<256x256xf16> -> index
+// CHECK-NEXT: arith.index_cast %[[INTPTR]] : index to i64
+gpu.module @xevm_module{
+ gpu.func @memref_extract_aligned_pointer_as_index(%arg0 : memref<256x256xf16>, %laneid: index) {
+ %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (index) {
+ %ptr = memref.extract_aligned_pointer_as_index %arg0 : memref<256x256xf16> -> index
+ gpu.yield %ptr : index
+ }
+ %ptr_i64 = arith.index_cast %r : index to i64
+ "some_user_op"(%ptr_i64) : (i64) -> ()
+ gpu.return
+ }
+}
+
+
+// -----
+// CHECK-LABEL: gpu.func @vector_transpose(
+// CHECK: %[[W:.*]]:2 = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (vector<2x1xf32>, vector<1x2xf32>) {
+// CHECK: %[[SRC:.*]] = "some_op"() {{.*}} : () -> vector<16x2xf32>
+// CHECK: gpu.yield %{{.*}}, %[[SRC]] : vector<2x16xf32>, vector<16x2xf32>
+// CHECK-NEXT: }
+// CHECK-NEXT: %[[T1:.*]] = vector.transpose %[[W]]#1, [1, 0] : vector<1x2xf32> to vector<2x1xf32>
+gpu.module @xevm_module{
+ gpu.func @vector_transpose(%arg0: memref<2x16xf32>, %laneid: index) {
+ %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<2x1xf32>) {
+ %cst = "some_op"()
+ {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>}
+ : () -> (vector<16x2xf32>)
+ %transpose = vector.transpose %cst, [1, 0]
+ {
+ layout_operand_0 = #xegpu.layout<lane_layout = [16 , 1], lane_data = [1, 1]>,
+ layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>
+ }
+ : vector<16x2xf32> to vector<2x16xf32>
+ gpu.yield %transpose : vector<2x16xf32>
+ }
+ "some_user_op"(%r) : (vector<2x1xf32>) -> ()
+ gpu.return
+ }
+}
+
+// -----
+// CHECK-LABEL: gpu.func @vector_bitcast(
+// CHECK: %[[W:.*]]:2 = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (vector<4x1xi16>, vector<4x2xi8>) {
+// CHECK: %[[SRC:.*]] = "some_op"() {{.*}} : () -> vector<4x32xi8>
+// CHECK: gpu.yield %{{.*}}, %[[SRC]] : vector<4x16xi16>, vector<4x32xi8>
+// CHECK: }
+// CHECK: vector.bitcast %[[W]]#1 : vector<4x2xi8> to vector<4x1xi16>
+gpu.module @xevm_module{
+ gpu.func @vector_bitcast(%arg0: memref<4x16xi16>, %laneid: index) {
+ %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<4x1xi16>) {
+ %cst = "some_op"()
+ {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 2]>}
+ : () -> (vector<4x32xi8>)
+ %bitcast = vector.bitcast %cst
+ {
+ layout_operand_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 2]>,
+ layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>
+ }
+ : vector<4x32xi8> to vector<4x16xi16>
+ gpu.yield %bitcast : vector<4x16xi16>
+ }
+ "some_user_op"(%r) : (vector<4x1xi16>) -> ()
+ gpu.return
+ }
+}
diff --git a/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir b/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
index 59fac26..0e1365a 100644
--- a/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
+++ b/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
@@ -1,198 +1,76 @@
// RUN: mlir-opt --xevm-attach-target='module=xevm_* chip=pvc' -xegpu-subgroup-distribute \
// RUN: -allow-unregistered-dialect -canonicalize -cse -split-input-file %s | FileCheck %s
-// RUN: mlir-opt --xevm-attach-target='module=xevm_* chip=pvc' \
-// RUN: -xegpu-subgroup-distribute="enable-sg-reductions=false" -allow-unregistered-dialect \
-// RUN: -canonicalize -cse -split-input-file %s | FileCheck %s --check-prefix=CHECK-REDUCTION
-
-// CHECK-LABEL: gpu.func @store_nd_1d
-// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<16xf32>) {
-// CHECK-DAG: %[[CST:.*]] = arith.constant dense<1.000000e+00> : vector<1xf32>
-// CHECK-DAG: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<16xf32> -> !xegpu.tensor_desc<16xf32>
-// CHECK: xegpu.store_nd %[[CST]], %[[T0]][%{{.*}}] : vector<1xf32>, !xegpu.tensor_desc<16xf32>
-// CHECK: gpu.return
-gpu.module @xevm_module{
- gpu.func @store_nd_1d(%arg0: memref<16xf32>) {
- %c0 = arith.constant 0 : index
- %cst = arith.constant {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} dense<1.000000e+00> : vector<16xf32>
- %0 = xegpu.create_nd_tdesc %arg0 : memref<16xf32> -> !xegpu.tensor_desc<16xf32, #xegpu.layout<lane_layout = [16], lane_data = [1]>>
- xegpu.store_nd %cst, %0 [%c0] : vector<16xf32>, !xegpu.tensor_desc<16xf32, #xegpu.layout<lane_layout = [16], lane_data = [1]>>
- gpu.return
- }
-}
-
-// -----
-// CHECK-LABEL: gpu.func @store_nd_2d
-// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<16x16xf16>) {
-// CHECK-DAG: %[[CST:.*]] = arith.constant dense<1.000000e+00> : vector<16xf16>
-// CHECK-DAG: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16>
-// CHECK: xegpu.store_nd %[[CST]], %[[T0]][%{{.*}}] : vector<16xf16>, !xegpu.tensor_desc<16x16xf16>
-gpu.module @xevm_module{
- gpu.func @store_nd_2d(%arg0: memref<16x16xf16>) {
- %c0 = arith.constant 0 : index
- %cst = arith.constant {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} dense<1.000000e+00> : vector<16x16xf16>
- %0 = xegpu.create_nd_tdesc %arg0 : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- xegpu.store_nd %cst, %0 [%c0, %c0] : vector<16x16xf16>, !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- gpu.return
- }
-}
-
-
-
-// -----
-// CHECK-LABEL: gpu.func @load_nd_1d
-// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<16xf32>, %[[ARG1:[0-9a-zA-Z]+]]: memref<16xf32>) {
-// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<16xf32> -> !xegpu.tensor_desc<16xf32>
-// CHECK-DAG: %[[T1:.*]] = xegpu.load_nd %[[T0]][%{{.*}}] : !xegpu.tensor_desc<16xf32> -> vector<1xf32>
-// CHECK-DAG: %[[T2:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<16xf32> -> !xegpu.tensor_desc<16xf32>
-// CHECK: xegpu.store_nd %[[T1]], %[[T2]][%{{.*}}] : vector<1xf32>, !xegpu.tensor_desc<16xf32>
-gpu.module @xevm_module{
- gpu.func @load_nd_1d(%arg0: memref<16xf32>, %arg1: memref<16xf32>) {
- %c0 = arith.constant 0 : index
- %0 = xegpu.create_nd_tdesc %arg0 : memref<16xf32> -> !xegpu.tensor_desc<16xf32, #xegpu.layout<lane_layout = [16], lane_data = [1]>>
- %1 = xegpu.load_nd %0 [%c0] {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} : !xegpu.tensor_desc<16xf32, #xegpu.layout<lane_layout = [16], lane_data = [1]>> -> vector<16xf32>
- %2 = xegpu.create_nd_tdesc %arg1 : memref<16xf32> -> !xegpu.tensor_desc<16xf32, #xegpu.layout<lane_layout = [16], lane_data = [1]>>
- xegpu.store_nd %1, %2 [%c0] : vector<16xf32>, !xegpu.tensor_desc<16xf32, #xegpu.layout<lane_layout = [16], lane_data = [1]>>
- gpu.return
- }
-}
-
-// -----
-// CHECK-LABEL: gpu.func @load_nd_2d
-// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<16x16xf16>, %[[ARG1:[0-9a-zA-Z]+]]: memref<16x16xf16>) {
-// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16>
-// CHECK-DAG: %[[T1:.*]] = xegpu.load_nd %[[T0]][%{{.*}}] : !xegpu.tensor_desc<16x16xf16> -> vector<16xf16>
-// CHECK-DAG: %[[T2:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16>
-// CHECK: xegpu.store_nd %[[T1]], %[[T2]][%{{.*}}] : vector<16xf16>, !xegpu.tensor_desc<16x16xf16>
-gpu.module @xevm_module{
- gpu.func @load_nd_2d(%arg0: memref<16x16xf16>, %arg1: memref<16x16xf16>) {
- %c0 = arith.constant 0 : index
- %0 = xegpu.create_nd_tdesc %arg0 : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- %1 = xegpu.load_nd %0[%c0, %c0] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<16x16xf16>
- %2 = xegpu.create_nd_tdesc %arg1: memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- xegpu.store_nd %1, %2[%c0, %c0] : vector<16x16xf16>, !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- gpu.return
- }
-}
-
-// -----
-// CHECK-LABEL: gpu.func @load_nd_array_length
-// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<16x16xf16>, %[[ARG1:[0-9a-zA-Z]+]]: memref<16x16xf16>) {
-// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<array_length = 2 : i64>>
-// CHECK: %[[T1:.*]] = xegpu.load_nd %[[T0]][%{{.*}}] : !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<array_length = 2 : i64>> -> vector<32xf16>
-// CHECK: %[[T2:.*]] = vector.shape_cast %[[T1]] : vector<32xf16> to vector<2x16x1xf16>
-// CHECK: %[[T3:.*]] = vector.extract %[[T2]][0] : vector<16x1xf16> from vector<2x16x1xf16>
-// CHECK-DAG: %[[T4:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16>
-// CHECK-DAG: %[[T5:.*]] = vector.shape_cast %[[T3]] : vector<16x1xf16> to vector<16xf16>
-// CHECK: xegpu.store_nd %[[T5]], %[[T4]][%{{.*}}] : vector<16xf16>, !xegpu.tensor_desc<16x16xf16>
-gpu.module @xevm_module{
- gpu.func @load_nd_array_length(%arg0: memref<16x16xf16>, %arg1: memref<16x16xf16>) {
- %c0 = arith.constant 0 : index
- %0 = xegpu.create_nd_tdesc %arg0 : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<array_length = 2 : i64>, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- %1 = xegpu.load_nd %0[%c0, %c0] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : !xegpu.tensor_desc<16x16xf16, #xegpu.block_tdesc_attr<array_length = 2 : i64>, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<2x16x16xf16>
- %2 = vector.extract %1[%c0] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : vector<16x16xf16> from vector<2x16x16xf16>
- %3 = xegpu.create_nd_tdesc %arg1 : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- xegpu.store_nd %2, %3[%c0, %c0] : vector<16x16xf16>, !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- gpu.return
- }
-}
-
-// -----
-// CHECK-LABEL: gpu.func @load_dpas_store
-// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<8x16xf16>, %[[ARG1:[0-9a-zA-Z]+]]: memref<16x16xf16>, %[[ARG2:[0-9a-zA-Z]+]]: memref<8x16xf32>) {
-// CHECK: %[[T2:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16>
-// CHECK: %[[T3:.*]] = xegpu.load_nd %[[T2]][%{{.*}}] : !xegpu.tensor_desc<8x16xf16> -> vector<8xf16>
-// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16>
-// CHECK: %[[T1:.*]] = xegpu.load_nd %[[T0]][%{{.*}}] <{packed}> : !xegpu.tensor_desc<16x16xf16> -> vector<16xf16>
-// CHECK-DAG: %[[T4:.*]] = xegpu.dpas %[[T3]], %[[T1]] : vector<8xf16>, vector<16xf16> -> vector<8xf32>
-// CHECK-DAG: %[[T5:.*]] = xegpu.create_nd_tdesc %[[ARG2]] : memref<8x16xf32> -> !xegpu.tensor_desc<8x16xf32>
-// CHECK: xegpu.store_nd %[[T4]], %[[T5]][%{{.*}}] : vector<8xf32>, !xegpu.tensor_desc<8x16xf32>
-gpu.module @xevm_module{
- gpu.func @load_dpas_store(%arg0: memref<8x16xf16>, %arg1: memref<16x16xf16>, %arg2: memref<8x16xf32>) {
- %c0 = arith.constant 0 : index
- %0 = xegpu.create_nd_tdesc %arg0 : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- %1 = xegpu.load_nd %0[%c0, %c0] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : !xegpu.tensor_desc<8x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<8x16xf16>
- %2 = xegpu.create_nd_tdesc %arg1: memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>>
- %3 = xegpu.load_nd %2[%c0, %c0] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>} : !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>> -> vector<16x16xf16>
- %4 = xegpu.dpas %1, %3 {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : vector<8x16xf16>, vector<16x16xf16> -> vector<8x16xf32>
- %5 = xegpu.create_nd_tdesc %arg2 : memref<8x16xf32> -> !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- xegpu.store_nd %4, %5[%c0, %c0] : vector<8x16xf32>, !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- gpu.return
- }
-}
-
-
-// -----
// CHECK-LABEL: gpu.func @load_dpas_postop_store
-// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<8x16xf16>, %[[ARG1:[0-9a-zA-Z]+]]: memref<16x16xf16>, %[[ARG2:[0-9a-zA-Z]+]]: memref<8x16xf32>) {
-// CHECK: %[[T2:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16>
-// CHECK: %[[T3:.*]] = xegpu.load_nd %[[T2]][%{{.*}}] : !xegpu.tensor_desc<8x16xf16> -> vector<8xf16>
-// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16>
-// CHECK: %[[T1:.*]] = xegpu.load_nd %[[T0]][%{{.*}}] <{packed}> : !xegpu.tensor_desc<16x16xf16> -> vector<16xf16>
-// CHECK-DAG: %[[T4:.*]] = xegpu.dpas %[[T3]], %[[T1]] : vector<8xf16>, vector<16xf16> -> vector<8xf32>
-// CHECK: %[[T5:.*]] = vector.shape_cast %[[T4]] : vector<8xf32> to vector<8x1xf32>
-// CHECK: %[[T6:.*]] = math.exp %[[T5]] {{{.*}}} : vector<8x1xf32>
-// CHECK-DAG: %[[T8:.*]] = vector.shape_cast %[[T6]] : vector<8x1xf32> to vector<8xf32>
-// CHECK-DAG: %[[T7:.*]] = xegpu.create_nd_tdesc %[[ARG2]] : memref<8x16xf32> -> !xegpu.tensor_desc<8x16xf32>
-// CHECK: xegpu.store_nd %[[T8]], %[[T7]][{{.*}}] : vector<8xf32>, !xegpu.tensor_desc<8x16xf32>
+// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<8x16xf16>, %[[ARG1:[0-9a-zA-Z]+]]: memref<16x16xf16>,
+// CHECK-SAME: %[[ARG2:[0-9a-zA-Z]+]]: memref<8x16xf32>) {
+// CHECK: %[[T2:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16>
+// CHECK: %[[T3:.*]] = xegpu.load_nd %[[T2]][%{{.*}}] : !xegpu.tensor_desc<8x16xf16> -> vector<8xf16>
+// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16>
+// CHECK: %[[T1:.*]] = xegpu.load_nd %[[T0]][%{{.*}}] <{packed}> : !xegpu.tensor_desc<16x16xf16> -> vector<16xf16>
+// CHECK-DAG: %[[T4:.*]] = xegpu.dpas %[[T3]], %[[T1]] : vector<8xf16>, vector<16xf16> -> vector<8xf32>
+// CHECK: %[[T5:.*]] = vector.shape_cast %[[T4]] : vector<8xf32> to vector<8x1xf32>
+// CHECK: %[[T6:.*]] = math.exp %[[T5]] {{{.*}}} : vector<8x1xf32>
+// CHECK-DAG: %[[T8:.*]] = vector.shape_cast %[[T6]] : vector<8x1xf32> to vector<8xf32>
+// CHECK-DAG: %[[T7:.*]] = xegpu.create_nd_tdesc %[[ARG2]] : memref<8x16xf32> -> !xegpu.tensor_desc<8x16xf32>
+// CHECK: xegpu.store_nd %[[T8]], %[[T7]][{{.*}}] : vector<8xf32>, !xegpu.tensor_desc<8x16xf32>
gpu.module @xevm_module{
gpu.func @load_dpas_postop_store(%arg0: memref<8x16xf16>, %arg1: memref<16x16xf16>, %arg2: memref<8x16xf32>) {
%c0 = arith.constant 0 : index
- %0 = xegpu.create_nd_tdesc %arg0 : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- %1 = xegpu.load_nd %0[%c0, %c0] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : !xegpu.tensor_desc<8x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<8x16xf16>
- %2 = xegpu.create_nd_tdesc %arg1: memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>>
- %3 = xegpu.load_nd %2[%c0, %c0] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>} : !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>> -> vector<16x16xf16>
- %4 = xegpu.dpas %1, %3 {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : vector<8x16xf16>, vector<16x16xf16> -> vector<8x16xf32>
- %5 = math.exp %4 {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : vector<8x16xf32>
- %6 = xegpu.create_nd_tdesc %arg2 : memref<8x16xf32> -> !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- xegpu.store_nd %5, %6[%c0, %c0] : vector<8x16xf32>, !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- gpu.return
- }
-}
+ %0 = xegpu.create_nd_tdesc %arg0 : memref<8x16xf16>
+ -> !xegpu.tensor_desc<8x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
+ %1 = xegpu.load_nd %0[%c0, %c0]
+ {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} :
+ !xegpu.tensor_desc<8x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<8x16xf16>
+
+ %2 = xegpu.create_nd_tdesc %arg1: memref<16x16xf16>
+ -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>>
+ %3 = xegpu.load_nd %2[%c0, %c0]
+ {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>}
+ : !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>>
+ -> vector<16x16xf16>
+
+ %4 = xegpu.dpas %1, %3
+ {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}
+ : vector<8x16xf16>, vector<16x16xf16> -> vector<8x16xf32>
-// -----
-// CHECK-LABEL: gpu.func @create_nd_tdesc_non_memref
-// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: ui64, %[[ARG1:[0-9a-zA-Z]+]]: ui64, %[[ARG2:[0-9a-zA-Z]+]]: index,
-// CHECK-SAME: %[[ARG3:[0-9a-zA-Z]+]]: index, %[[ARG4:[0-9a-zA-Z]+]]: index,
-// CHECK-SAME: %[[ARG5:[0-9a-zA-Z]+]]: index, %[[ARG6:[0-9a-zA-Z]+]]: index, %[[ARG7:[0-9a-zA-Z]+]]: index) {
-// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]], shape : [%[[ARG2]], %[[ARG3]]], strides : [%[[ARG4]], %[[ARG5]]] : ui64 -> !xegpu.tensor_desc<16x16xf16>
-// CHECK: %[[T1:.*]] = xegpu.load_nd %[[T0]][{{.*}}] : !xegpu.tensor_desc<16x16xf16> -> vector<16xf16>
-// CHECK: %[[T2:.*]] = xegpu.create_nd_tdesc %[[ARG1]], shape : [%[[ARG2]], %[[ARG3]]], strides : [%[[ARG4]], %[[ARG5]]] : ui64 -> !xegpu.tensor_desc<16x16xf16>
-// CHECK: xegpu.store_nd %[[T1]], %[[T2]][{{.*}}] : vector<16xf16>, !xegpu.tensor_desc<16x16xf16>
-gpu.module @xevm_module{
- gpu.func @create_nd_tdesc_non_memref(%arg0: ui64, %arg1: ui64, %arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: index, %arg7: index) {
- %c0 = arith.constant 0 : index
- %0 = xegpu.create_nd_tdesc %arg0, shape:[%arg2, %arg3], strides:[%arg4, %arg5] : ui64 -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- %1 = xegpu.load_nd %0[%c0, %c0] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<16x16xf16>
- %2 = xegpu.create_nd_tdesc %arg1, shape:[%arg2, %arg3], strides:[%arg4, %arg5] : ui64 -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- xegpu.store_nd %1, %2[%c0, %c0] : vector<16x16xf16>, !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
+ %5 = math.exp %4
+ {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}
+ : vector<8x16xf32>
+
+ %6 = xegpu.create_nd_tdesc %arg2 : memref<8x16xf32> ->
+ !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
+ xegpu.store_nd %5, %6[%c0, %c0] : vector<8x16xf32>,
+ !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
gpu.return
}
}
// -----
-// TODO: gemm does not use update_nd_offset because of an issue in scf-for distribution.
// CHECK-LABEL: gpu.func @gemm
-// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<1024x1024xbf16>, %[[ARG1:[0-9a-zA-Z]+]]: memref<1024x1024xbf16>, %[[ARG2:[0-9a-zA-Z]+]]: memref<1024x1024xf32>) {
-// CHECK-DAG: %[[BLOCK_ID_X:.*]] = gpu.block_id x
-// CHECK-DAG: %[[BLOCK_ID_Y:.*]] = gpu.block_id y
-// CHECK-DAG: %[[Y_COORD:.*]] = arith.muli %[[BLOCK_ID_Y]], %c16 : index
-// CHECK-DAG: %[[X_COORD:.*]] = arith.muli %[[BLOCK_ID_X]], %c8 : index
-// CHECK: %[[T2:.*]] = xegpu.create_nd_tdesc %[[ARG2]] : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32>
-// CHECK-NEXT: %[[T3:.*]] = xegpu.load_nd %[[T2]][%[[X_COORD]], %[[Y_COORD]]] : !xegpu.tensor_desc<8x16xf32> -> vector<8xf32>
-// CHECK-NEXT: %[[T4:.*]] = vector.shape_cast %[[T3]] : vector<8xf32> to vector<8x1xf32>
-// CHECK: %[[T5:.*]] = scf.for %[[K:.*]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ARG4:.*]] = %[[T4]]) -> (vector<8x1xf32>) {
-// CHECK-DAG: %[[T10:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<1024x1024xbf16> -> !xegpu.tensor_desc<16x16xbf16>
-// CHECK-DAG: %[[T11:.*]] = xegpu.load_nd %[[T10]][%[[K]], %[[Y_COORD]]] <{packed}> : !xegpu.tensor_desc<16x16xbf16> -> vector<16xbf16>
-// CHECK-DAG: %[[T12:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<1024x1024xbf16> -> !xegpu.tensor_desc<8x16xbf16>
-// CHECK-DAG: %[[T13:.*]] = xegpu.load_nd %[[T12]][%[[X_COORD]], %[[K]]] : !xegpu.tensor_desc<8x16xbf16> -> vector<8xbf16>
-// CHECK-DAG: %[[T14:.*]] = vector.shape_cast %[[ARG4]] : vector<8x1xf32> to vector<8xf32>
-// CHECK-NEXT: %[[T15:.*]] = xegpu.dpas %[[T13]], %[[T11]], %[[T14]] : vector<8xbf16>, vector<16xbf16>, vector<8xf32> -> vector<8xf32>
-// CHECK-NEXT: %[[T16:.*]] = vector.shape_cast %[[T15]] : vector<8xf32> to vector<8x1xf32>
-// CHECK-NEXT: scf.yield %[[T16]] : vector<8x1xf32>
-// CHECK-NEXT: }
-// CHECK-NEXT: %[[T9:.*]] = vector.shape_cast %[[T5]] : vector<8x1xf32> to vector<8xf32>
-// CHECK-NEXT: xegpu.store_nd %[[T9]], %[[T2]][%[[X_COORD]], %[[Y_COORD]]] : vector<8xf32>, !xegpu.tensor_desc<8x16xf32>
+// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<1024x1024xbf16>, %[[ARG1:[0-9a-zA-Z]+]]: memref<1024x1024xbf16>,
+// CHECK-SAME: %[[ARG2:[0-9a-zA-Z]+]]: memref<1024x1024xf32>) {
+// CHECK-DAG: %[[BLOCK_ID_X:.*]] = gpu.block_id x
+// CHECK-DAG: %[[BLOCK_ID_Y:.*]] = gpu.block_id y
+// CHECK-DAG: %[[Y_COORD:.*]] = arith.muli %[[BLOCK_ID_Y]], %c16 : index
+// CHECK-DAG: %[[X_COORD:.*]] = arith.muli %[[BLOCK_ID_X]], %c8 : index
+// CHECK: %[[T2:.*]] = xegpu.create_nd_tdesc %[[ARG2]] : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32>
+// CHECK-NEXT: %[[T3:.*]] = xegpu.load_nd %[[T2]][%[[X_COORD]], %[[Y_COORD]]] : !xegpu.tensor_desc<8x16xf32> -> vector<8xf32>
+// CHECK-NEXT: %[[T4:.*]] = vector.shape_cast %[[T3]] : vector<8xf32> to vector<8x1xf32>
+// CHECK: %[[T5:.*]] = scf.for %[[K:.*]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ARG4:.*]] = %[[T4]])
+// CHECK-SAME: -> (vector<8x1xf32>) {
+// CHECK-DAG: %[[T10:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<1024x1024xbf16> -> !xegpu.tensor_desc<16x16xbf16>
+// CHECK-DAG: %[[T11:.*]] = xegpu.load_nd %[[T10]][%[[K]], %[[Y_COORD]]] <{packed}> : !xegpu.tensor_desc<16x16xbf16> -> vector<16xbf16>
+// CHECK-DAG: %[[T12:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<1024x1024xbf16> -> !xegpu.tensor_desc<8x16xbf16>
+// CHECK-DAG: %[[T13:.*]] = xegpu.load_nd %[[T12]][%[[X_COORD]], %[[K]]] : !xegpu.tensor_desc<8x16xbf16> -> vector<8xbf16>
+// CHECK-DAG: %[[T14:.*]] = vector.shape_cast %[[ARG4]] : vector<8x1xf32> to vector<8xf32>
+// CHECK-NEXT: %[[T15:.*]] = xegpu.dpas %[[T13]], %[[T11]], %[[T14]]
+// CHECK-SAME: : vector<8xbf16>, vector<16xbf16>, vector<8xf32> -> vector<8xf32>
+// CHECK-NEXT: %[[T16:.*]] = vector.shape_cast %[[T15]] : vector<8xf32> to vector<8x1xf32>
+// CHECK-NEXT: scf.yield %[[T16]] : vector<8x1xf32>
+// CHECK-NEXT: }
+// CHECK-NEXT: %[[T9:.*]] = vector.shape_cast %[[T5]] : vector<8x1xf32> to vector<8xf32>
+// CHECK-NEXT: xegpu.store_nd %[[T9]], %[[T2]][%[[X_COORD]], %[[Y_COORD]]] : vector<8xf32>, !xegpu.tensor_desc<8x16xf32>
gpu.module @xevm_module{
gpu.func @gemm(%arg0: memref<1024x1024xbf16>, %arg1: memref<1024x1024xbf16>, %arg2: memref<1024x1024xf32>){
%c0 = arith.constant 0 : index
@@ -203,213 +81,56 @@ gpu.func @gemm(%arg0: memref<1024x1024xbf16>, %arg1: memref<1024x1024xbf16>, %ar
%block_id_y = gpu.block_id y
%0 = arith.muli %block_id_x, %c8 : index
%1 = arith.muli %block_id_y, %c16 : index
- %2 = xegpu.create_nd_tdesc %arg2 : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- %3 = xegpu.load_nd %2[%0, %1] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<8x16xf32>
- %4 = scf.for %arg3 = %c0 to %c1024 step %c16 iter_args(%arg4 = %3) -> (vector<8x16xf32>) {
- %5 = xegpu.create_nd_tdesc %arg0: memref<1024x1024xbf16> -> !xegpu.tensor_desc<8x16xbf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- %6 = xegpu.create_nd_tdesc %arg1 : memref<1024x1024xbf16> -> !xegpu.tensor_desc<16x16xbf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>>
- %7 = xegpu.load_nd %5[%0, %arg3] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : !xegpu.tensor_desc<8x16xbf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<8x16xbf16>
- %8 = xegpu.load_nd %6[%arg3, %1] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>} : !xegpu.tensor_desc<16x16xbf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>> -> vector<16x16xbf16>
- %9 = xegpu.dpas %7, %8, %arg4 {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : vector<8x16xbf16>, vector<16x16xbf16>, vector<8x16xf32> -> vector<8x16xf32>
- scf.yield %9 : vector<8x16xf32>
- } {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}
- xegpu.store_nd %4, %2[%0, %1] : vector<8x16xf32>, !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- gpu.return
-}
-}
+ %2 = xegpu.create_nd_tdesc %arg2 : memref<1024x1024xf32> ->
+ !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
+ %3 = xegpu.load_nd %2[%0, %1]
+ {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}
+ : !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<8x16xf32>
-// -----
-// CHECK-LABEL: gpu.func @prefetch_2d
-// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<256x256xf16>) {
-// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<256x256xf16> -> !xegpu.tensor_desc<16x16xf16>
-// CHECK: xegpu.prefetch_nd %[[T0]][%{{.*}}] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<16x16xf16>
-gpu.module @xevm_module{
- gpu.func @prefetch_2d(%arg0: memref<256x256xf16>) {
- %c0 = arith.constant 0 : index
- %0 = xegpu.create_nd_tdesc %arg0 : memref<256x256xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- xegpu.prefetch_nd %0[%c0, %c0] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- gpu.return
- }
-}
-
-// -----
-// CHECK-LABEL: gpu.func @prefetch_1d
-// CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: memref<256xf16>) {
-// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<256xf16> -> !xegpu.tensor_desc<16xf16>
-// CHECK: xegpu.prefetch_nd %[[T0]][%{{.*}}] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<16xf16>
-gpu.module @xevm_module{
- gpu.func @prefetch_1d(%arg0: memref<256xf16>) {
- %c0 = arith.constant 0 : index
- %0 = xegpu.create_nd_tdesc %arg0: memref<256xf16> -> !xegpu.tensor_desc<16xf16, #xegpu.layout<lane_layout = [16], lane_data = [1]>>
- xegpu.prefetch_nd %0[%c0] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<16xf16, #xegpu.layout<lane_layout = [16], lane_data = [1]>>
- gpu.return
- }
-}
+ %4 = scf.for %arg3 = %c0 to %c1024 step %c16 iter_args(%arg4 = %3) -> (vector<8x16xf32>) {
-// -----
-// CHECK-LABEL: gpu.func @gpu_barrier({{.*}}) {
-// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<256xf16> -> !xegpu.tensor_desc<16xf16>
-// CHECK-NEXT: %[[T1:.*]] = xegpu.load_nd %[[T0]][{{.*}}] : !xegpu.tensor_desc<16xf16> -> vector<1xf16>
-// CHECK-NEXT: gpu.barrier
-// CHECK-NEXT: %[[T2:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<256xf16> -> !xegpu.tensor_desc<16xf16>
-// CHECK-NEXT: xegpu.store_nd %[[T1]], %[[T2]][{{.*}}] : vector<1xf16>, !xegpu.tensor_desc<16xf16>
-gpu.module @xevm_module{
- gpu.func @gpu_barrier(%arg0: memref<256xf16>, %arg1: memref<256xf16>) {
- %c0 = arith.constant 0 : index
- %0 = xegpu.create_nd_tdesc %arg0 : memref<256xf16> -> !xegpu.tensor_desc<16xf16, #xegpu.layout<lane_layout = [16], lane_data = [1]>>
- %1 = xegpu.load_nd %0[%c0] {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} : !xegpu.tensor_desc<16xf16, #xegpu.layout<lane_layout = [16], lane_data = [1]>> -> vector<16xf16>
- gpu.barrier
- %2 = xegpu.create_nd_tdesc %arg1 : memref<256xf16> -> !xegpu.tensor_desc<16xf16, #xegpu.layout<lane_layout = [16], lane_data = [1]>>
- xegpu.store_nd %1, %2[%c0] : vector<16xf16>, !xegpu.tensor_desc<16xf16, #xegpu.layout<lane_layout = [16], lane_data = [1]>>
- gpu.return
- }
-}
+ %5 = xegpu.create_nd_tdesc %arg0: memref<1024x1024xbf16>
+ -> !xegpu.tensor_desc<8x16xbf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
+ %6 = xegpu.create_nd_tdesc %arg1 : memref<1024x1024xbf16>
+ -> !xegpu.tensor_desc<16x16xbf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>>
-// -----
-// CHECK-LABEL: gpu.func @vector_multi_reduction_dim1_distributed_dim0_reduction
-// CHECK: %[[W:.*]]:2 = gpu.warp_execute_on_lane_0(%{{.*}})[16] ->
-// CHECK-SAME: (!xegpu.tensor_desc<1x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, vector<16x2xf32>) {
-// CHECK: %[[SRC:.*]] = "some_def"() {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : () -> vector<16x32xf32>
-// CHECK-NEXT: gpu.yield %{{.*}}, %[[SRC]] : !xegpu.tensor_desc<1x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, vector<16x32xf32>
-// CHECK-NEXT: }
-// CHECK: %[[COL0:.*]] = vector.extract_strided_slice %[[W]]#1 {offsets = [0, 0], sizes = [16, 1], strides = [1, 1]} : vector<16x2xf32> to vector<16x1xf32>
-// CHECK-NEXT: %[[CAST0:.*]] = vector.shape_cast %[[COL0]] : vector<16x1xf32> to vector<16xf32>
-// CHECK-NEXT: %[[RED0:.*]] = vector.reduction <add>, %[[CAST0]], %{{.*}} : vector<16xf32> into f32
-// CHECK: %[[COL1:.*]] = vector.extract_strided_slice %[[W]]#1 {offsets = [0, 1], sizes = [16, 1], strides = [1, 1]} : vector<16x2xf32> to vector<16x1xf32>
-// CHECK-NEXT: %[[CAST1:.*]] = vector.shape_cast %[[COL1]] : vector<16x1xf32> to vector<16xf32>
-// CHECK-NEXT: %[[RED1:.*]] = vector.reduction <add>, %[[CAST1]], %{{.*}} : vector<16xf32> into f32
-// CHECK-NEXT: vector.from_elements %[[RED0]], %[[RED1]] : vector<2xf32>
-gpu.module @xevm_module{
-gpu.func @vector_multi_reduction_dim1_distributed_dim0_reduction() {
- %c0 = arith.constant 0 : index
- %0 = "some_def"() : () -> !xegpu.tensor_desc<1x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- %src = "some_def"() {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : () -> (vector<16x32xf32>)
- %acc = arith.constant {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [0]>} dense<0.0> : vector<32xf32>
- %1 = vector.multi_reduction <add>, %src, %acc {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [0]>} [0]
- : vector<16x32xf32> to vector<32xf32>
- %3 = vector.shape_cast %1 {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}
- : vector<32xf32> to vector<1x32xf32>
- xegpu.store_nd %3, %0[%c0, %c0] : vector<1x32xf32>, !xegpu.tensor_desc<1x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- gpu.return
-}
-}
+ %7 = xegpu.load_nd %5[%0, %arg3]
+ {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}
+ : !xegpu.tensor_desc<8x16xbf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<8x16xbf16>
+ %8 = xegpu.load_nd %6[%arg3, %1]
+ {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>}
+ : !xegpu.tensor_desc<16x16xbf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>> -> vector<16x16xbf16>
-// -----
-// CHECK-REDUCTION-LABEL: gpu.func @vector_multi_reduction_dim1_distributed_dim1_reduction
-// CHECK-REDUCTION: %[[W:.*]]:3 = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (!xegpu.tensor_desc<2x16xf32,
-// CHECK-REDUCTION-SAME: #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, f32, f32) {
-// CHECK-REDUCTION: %[[SRC:.*]] = "some_def"() {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : () -> vector<2x16xf32>
-// CHECK-REDUCTION-NEXT: %[[ROW0:.*]] = vector.extract %[[SRC]][0] : vector<16xf32> from vector<2x16xf32>
-// CHECK-REDUCTION-NEXT: %[[R0:.*]] = vector.reduction <add>, %[[ROW0]], %{{.*}} : vector<16xf32> into f32
-// CHECK-REDUCTION-NEXT: %[[ROW1:.*]] = vector.extract %[[SRC]][1] : vector<16xf32> from vector<2x16xf32>
-// CHECK-REDUCTION-NEXT: %[[R1:.*]] = vector.reduction <add>, %[[ROW1]], %{{.*}} : vector<16xf32> into f32
-// CHECK-REDUCTION-NEXT: gpu.yield %4, %[[R1]], %[[R0]] : !xegpu.tensor_desc<2x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>, f32, f32
-// CHECK-REDUCTION-NEXT: }
-// CHECK-REDUCTION-NEXT: vector.from_elements %[[W]]#2, %[[W]]#1 : vector<2xf32>
-gpu.module @xevm_module{
-gpu.func @vector_multi_reduction_dim1_distributed_dim1_reduction() {
- %c0 = arith.constant 0 : index
- %0 = "some_def"() : () -> !xegpu.tensor_desc<2x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- %src = "some_def"() {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : () -> (vector<2x16xf32>)
- %acc = arith.constant {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [1]>} dense<0.0> : vector<2xf32>
- %1 = vector.multi_reduction <add>, %src, %acc {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [1]>}
- [1] : vector<2x16xf32> to vector<2xf32>
- %3 = vector.shape_cast %1 {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}
- : vector<2xf32> to vector<2x1xf32>
- %4 = vector.broadcast %3 {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : vector<2x1xf32> to vector<2x16xf32>
- xegpu.store_nd %4, %0[%c0, %c0] : vector<2x16xf32>, !xegpu.tensor_desc<2x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- gpu.return
-}
-}
+ %9 = xegpu.dpas %7, %8, %arg4
+ {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}
+ : vector<8x16xbf16>, vector<16x16xbf16>, vector<8x16xf32> -> vector<8x16xf32>
-// -----
-// CHECK-LABEL: gpu.func @vector_multi_reduction_dim0_distributed_dim1_reduction
-// CHECK: %[[W:.*]]:2 = gpu.warp_execute_on_lane_0(%0)[16] ->
-// CHECK-SAME: (!xegpu.tensor_desc<32x1xf32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>>, vector<2x16xf32>) {
-// CHECK: %[[SRC:.*]] = "some_def"() {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>} : () -> vector<32x16xf32>
-// CHECK-NEXT: gpu.yield %{{.*}}, %[[SRC]] : !xegpu.tensor_desc<32x1xf32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>>, vector<32x16xf32>
-// CHECK-NEXT: }
-// CHECK: %[[ROW0:.*]] = vector.extract %[[W]]#1[0] : vector<16xf32> from vector<2x16xf32>
-// CHECK-NEXT: %[[R0:.*]] = vector.reduction <add>, %[[ROW0]], %{{.*}} : vector<16xf32> into f32
-// CHECK: %[[ROW1:.*]] = vector.extract %[[W]]#1[1] : vector<16xf32> from vector<2x16xf32>
-// CHECK-NEXT: %[[R1:.*]] = vector.reduction <add>, %[[ROW1]], %{{.*}} : vector<16xf32> into f32
-// CHECK-NEXT: vector.from_elements %[[R0]], %[[R1]] : vector<2xf32>
-gpu.module @xevm_module{
-gpu.func @vector_multi_reduction_dim0_distributed_dim1_reduction() {
- %c0 = arith.constant 0 : index
- %0 = "some_def"() : () -> !xegpu.tensor_desc<32x1xf32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>>
- %src = "some_def"() {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>} : () -> (vector<32x16xf32>)
- %acc = arith.constant {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>, dims = [1]>} dense<0.0> : vector<32xf32>
- %1 = vector.multi_reduction <add>, %src, %acc {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>, dims = [1]>} [1]
- : vector<32x16xf32> to vector<32xf32>
- %3 = vector.shape_cast %1 {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>}
- : vector<32xf32> to vector<32x1xf32>
- xegpu.store_nd %3, %0[%c0, %c0] : vector<32x1xf32>, !xegpu.tensor_desc<32x1xf32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>>
- gpu.return
-}
-}
+ scf.yield %9 : vector<8x16xf32>
+ } {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}
-// -----
-// CHECK-REDUCTION-LABEL: gpu.func @vector_multi_reduction_dim0_distributed_dim0_reduction
-// CHECK-REDUCTION: %[[W:.*]]:3 = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (!xegpu.tensor_desc<16x2xf32,
-// CHECK-REDUCTION-SAME: #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>>, f32, f32) {
-// CHECK-REDUCTION: %[[SRC:.*]] = "some_def"() {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>} : () -> vector<16x2xf32>
-// CHECK-REDUCTION-NEXT: %[[COL0:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [0, 0], sizes = [16, 1], strides = [1, 1]} : vector<16x2xf32> to vector<16x1xf32>
-// CHECK-REDUCTION-NEXT: %[[CAST0:.*]] = vector.shape_cast %[[COL0]] : vector<16x1xf32> to vector<16xf32>
-// CHECK-REDUCTION-NEXT: %[[R0:.*]] = vector.reduction <add>, %[[CAST0]], %{{.*}} : vector<16xf32> into f32
-// CHECK-REDUCTION-NEXT: %[[COL1:.*]] = vector.extract_strided_slice %5 {offsets = [0, 1], sizes = [16, 1], strides = [1, 1]} : vector<16x2xf32> to vector<16x1xf32>
-// CHECK-REDUCTION-NEXT: %[[CAST1:.*]] = vector.shape_cast %[[COL1]] : vector<16x1xf32> to vector<16xf32>
-// CHECK-REDUCTION-NEXT: %[[R1:.*]] = vector.reduction <add>, %[[CAST1]], %cst : vector<16xf32> into f32
-// CHECK-REDUCTION-NEXT: gpu.yield %4, %[[R1]], %[[R0]] : !xegpu.tensor_desc<16x2xf32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>>, f32, f32
-// CHECK-REDUCTION-NEXT: }
-// CHECK-REDUCTION-NEXT: vector.from_elements %[[W]]#2, %[[W]]#1 : vector<2xf32>
-gpu.module @xevm_module{
-gpu.func @vector_multi_reduction_dim0_distributed_dim0_reduction() {
- %c0 = arith.constant 0 : index
- %0 = "some_def"() : () -> !xegpu.tensor_desc<16x2xf32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>>
- %src = "some_def"() {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>} : () -> (vector<16x2xf32>)
- %acc = arith.constant {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>, dims = [0]>} dense<0.0> : vector<2xf32>
- %1 = vector.multi_reduction <add>, %src, %acc {layout_result_0 = #xegpu.slice<#xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>, dims = [0]>}
- [0] : vector<16x2xf32> to vector<2xf32>
- %3 = vector.shape_cast %1 {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>}
- : vector<2xf32> to vector<1x2xf32>
- %4 = vector.broadcast %3 {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>} : vector<1x2xf32> to vector<16x2xf32>
- xegpu.store_nd %4, %0[%c0, %c0] : vector<16x2xf32>, !xegpu.tensor_desc<16x2xf32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>>
+ xegpu.store_nd %4, %2[%0, %1] : vector<8x16xf32>,
+ !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
gpu.return
}
}
// -----
-// CHECK-LABEL: gpu.func @scatter_ops_chunksize({{.*}}) {
-// CHECK: %[[MASK:.*]] = arith.constant dense<true> : vector<1xi1>
-// CHECK-NEXT: %[[LANE_OFFSET:.*]] = arith.constant dense<12> : vector<1xindex>
-// CHECK-NEXT: %[[LOADED:.*]] = xegpu.load %arg0[%[[LANE_OFFSET]]], %[[MASK]] <{chunk_size = 8 : i64}> : memref<256xf16>, vector<1xindex>, vector<1xi1> -> vector<8xf16>
-// CHECK-NEXT: xegpu.store %[[LOADED]], %arg0[%[[LANE_OFFSET]]], %[[MASK]] <{chunk_size = 8 : i64}> : vector<8xf16>, memref<256xf16>, vector<1xindex>, vector<1xi1>
-gpu.module @xevm_module{
- gpu.func @scatter_ops_chunksize(%src: memref<256xf16>) {
- %1 = arith.constant {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} dense<1>: vector<16xi1>
- %offset = arith.constant {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} dense<12> : vector<16xindex>
- %3 = xegpu.load %src[%offset], %1 <{chunk_size=8}> {
- layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 2]>
- } : memref<256xf16>, vector<16xindex>, vector<16xi1> -> vector<16x8xf16>
- xegpu.store %3, %src[%offset], %1 <{chunk_size=8}> : vector<16x8xf16>, memref<256xf16>, vector<16xindex>, vector<16xi1>
- gpu.return
- }
-}
-
-// -----
-// CHECK-LABEL: gpu.func @scatter_ops_scf_yield({{.*}},
-// CHECK-SAME: %[[PREDICATE:.*]]: i1) {
-// CHECK: %[[DEFAULT:.*]] = arith.constant dense<1.200000e+01> : vector<8xf16>
-// CHECK: %[[OFFSET:.*]] = arith.constant dense<12> : vector<1xindex>
-// CHECK: %[[MASK:.*]] = arith.constant dense<true> : vector<1xi1>
-// CHECK: %[[PREDICATED_LOAD:.*]] = scf.if %[[PREDICATE]] -> (vector<8xf16>) {
-// CHECK-NEXT: %[[LOADED:.*]] = xegpu.load %arg0[%[[OFFSET]]], %[[MASK]] <{chunk_size = 8 : i64}> : memref<256xf16>, vector<1xindex>, vector<1xi1> -> vector<8xf16>
-// CHECK-NEXT: scf.yield %[[LOADED]] : vector<8xf16>
-// CHECK-NEXT: } else {
-// CHECK-NEXT: scf.yield %[[DEFAULT]] : vector<8xf16>
-// CHECK-NEXT: }
-// CHECK-NEXT: xegpu.store %[[PREDICATED_LOAD]], %arg0[%[[OFFSET]]], %[[MASK]] <{chunk_size = 8 : i64}> : vector<8xf16>, memref<256xf16>, vector<1xindex>, vector<1xi1>
+// CHECK-LABEL: gpu.func @scatter_ops_scf_yield
+// CHECK: (%{{.*}}: memref<256xf16>, %[[PREDICATE:[a-zA-Z0-9]+]]: i1) {
+// CHECK-DAG: %[[CST:.*]] = arith.constant dense<1.200000e+01> : vector<1x8xf16>
+// CHECK-DAG: %[[OFFSET:.*]] = arith.constant dense<12> : vector<1xindex>
+// CHECK-DAG: %[[MASK:.*]] = arith.constant dense<true> : vector<1xi1>
+// CHECK: %[[IF:.*]] = scf.if %[[PREDICATE]] -> (vector<1x8xf16>) {
+// CHECK-NEXT: %[[LD:.*]] = xegpu.load %{{.*}}[%[[OFFSET]]], %[[MASK]] <{chunk_size = 8 : i64}>
+// CHECK-SAME: : memref<256xf16>, vector<1xindex>, vector<1xi1> -> vector<8xf16>
+// CHECK-NEXT: %[[LD_CAST:.*]] = vector.shape_cast %[[LD]] : vector<8xf16> to vector<1x8xf16>
+// CHECK-NEXT: scf.yield %[[LD_CAST]] : vector<1x8xf16>
+// CHECK-NEXT: } else {
+// CHECK-NEXT: scf.yield %[[CST]] : vector<1x8xf16>
+// CHECK-NEXT: }
+// CHECK-NEXT: %[[IF_CAST:.*]] = vector.shape_cast %[[IF]] : vector<1x8xf16> to vector<8xf16>
+// CHECK-NEXT: xegpu.store %[[IF_CAST]], %{{.*}}[%[[OFFSET]]], %[[MASK]] <{chunk_size = 8 : i64}>
+// CHECK-SAME: vector<8xf16>, memref<256xf16>, vector<1xindex>, vector<1xi1>
gpu.module @xevm_module{
gpu.func @scatter_ops_scf_yield(%src: memref<256xf16>, %pred : i1) {
%1 = arith.constant {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} dense<1>: vector<16xi1>
@@ -432,13 +153,15 @@ gpu.module @xevm_module{
// -----
// CHECK-LABEL: gpu.func @scatter_ops_scf_non_yield({{.*}}) {
-// CHECK: %[[OFFSET:.*]] = arith.constant dense<12> : vector<1xindex>
-// CHECK: %[[MASK:.*]] = arith.constant dense<true> : vector<1xi1>
-// CHECK: %[[PREDICATE:.*]] = llvm.mlir.poison : i1
-// CHECK: scf.if %[[PREDICATE]] {
-// CHECK-NEXT: %[[LOADED:.*]] = xegpu.load %arg0[%[[OFFSET]]], %[[MASK]] <{chunk_size = 8 : i64}> : memref<256xf16>, vector<1xindex>, vector<1xi1> -> vector<8xf16>
-// CHECK-NEXT: xegpu.store %[[LOADED]], %arg0[%[[OFFSET]]], %[[MASK]] <{chunk_size = 8 : i64}> : vector<8xf16>, memref<256xf16>, vector<1xindex>, vector<1xi1>
-// CHECK-NEXT: }
+// CHECK: %[[OFFSET:.*]] = arith.constant dense<12> : vector<1xindex>
+// CHECK: %[[MASK:.*]] = arith.constant dense<true> : vector<1xi1>
+// CHECK: %[[PREDICATE:.*]] = llvm.mlir.poison : i1
+// CHECK: scf.if %[[PREDICATE]] {
+// CHECK-NEXT: %[[LOADED:.*]] = xegpu.load %arg0[%[[OFFSET]]], %[[MASK]] <{chunk_size = 8 : i64}>
+// CHECK-SAME: memref<256xf16>, vector<1xindex>, vector<1xi1> -> vector<8xf16>
+// CHECK-NEXT: xegpu.store %[[LOADED]], %arg0[%[[OFFSET]]], %[[MASK]] <{chunk_size = 8 : i64}>
+// CHECK-SAME: vector<8xf16>, memref<256xf16>, vector<1xindex>, vector<1xi1>
+// CHECK-NEXT: }
gpu.module @xevm_module{
gpu.func @scatter_ops_scf_non_yield(%src: memref<256xf16>) {
%pred = llvm.mlir.poison : i1
@@ -455,88 +178,13 @@ gpu.module @xevm_module{
}
// -----
-// CHECK-LABEL: gpu.func @scatter_ops({{.*}}) {
-// CHECK: %[[MASK:.*]] = arith.constant dense<true> : vector<1xi1>
-// CHECK-NEXT: %[[LANE_OFFSET:.*]] = arith.constant dense<12> : vector<1xindex>
-// CHECK-NEXT: %[[LOADED:.*]] = xegpu.load %arg0[%[[LANE_OFFSET]]], %[[MASK]] : memref<256xf16>, vector<1xindex>, vector<1xi1> -> vector<1xf16>
-// CHECK-NEXT: xegpu.store %[[LOADED]], %arg0[%[[LANE_OFFSET]]], %[[MASK]] : vector<1xf16>, memref<256xf16>, vector<1xindex>, vector<1xi1>
-gpu.module @xevm_module{
- gpu.func @scatter_ops(%src: memref<256xf16>) {
- %1 = arith.constant {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} dense<1>: vector<16xi1>
- %offset = arith.constant {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} dense<12> : vector<16xindex>
- %3 = xegpu.load %src[%offset], %1 {
- layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>
- } : memref<256xf16>, vector<16xindex>, vector<16xi1> -> vector<16xf16>
- xegpu.store %3, %src[%offset], %1 : vector<16xf16>, memref<256xf16>, vector<16xindex>, vector<16xi1>
- gpu.return
- }
-}
-
-// -----
-// CHECK-LABEL: gpu.func @memref_extract_aligned_pointer_as_index(
-// CHECK: %{{.*}} = memref.extract_aligned_pointer_as_index %{{.*}} : memref<256x256xf16> -> index
-gpu.module @xevm_module{
- gpu.func @memref_extract_aligned_pointer_as_index(%arg0 : memref<256x256xf16>) {
- %c0 = arith.constant 0 : index
- %cst = arith.constant {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} dense<1.000000e+00> : vector<16xf16>
- %ptr = memref.extract_aligned_pointer_as_index %arg0 : memref<256x256xf16> -> index
- %ptr_i64 = arith.index_cast %ptr : index to i64
- %tdesc = xegpu.create_nd_tdesc %ptr_i64, shape: [16], strides: [16] : i64
- -> !xegpu.tensor_desc<16xf16, #xegpu.layout<lane_layout = [16], lane_data = [1]>>
- xegpu.store_nd %cst, %tdesc[%c0] : vector<16xf16>, !xegpu.tensor_desc<16xf16, #xegpu.layout<lane_layout = [16], lane_data = [1]>>
- gpu.return
- }
-}
-
-
-// -----
-// CHECK-LABEL: gpu.func @vector_transpose(
-// CHECK: %[[CST:.*]] = arith.constant dense<1.000000e+00> : vector<2xf32>
-// CHECK: %[[DEST:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<2x16xf32> -> !xegpu.tensor_desc<2x16xf32>
-// CHECK: xegpu.store_nd %[[CST]], %[[DEST]][{{.*}}] : vector<2xf32>, !xegpu.tensor_desc<2x16xf32>
-gpu.module @xevm_module{
- gpu.func @vector_transpose(%arg0: memref<2x16xf32>) {
- %cst = arith.constant {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>} dense<1.000000e+00>
- : vector<16x2xf32>
- %c0 = arith.constant 0 : index
- %transpose = vector.transpose %cst, [1, 0] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}
- : vector<16x2xf32> to vector<2x16xf32>
- %0 = xegpu.create_nd_tdesc %arg0 : memref<2x16xf32>
- -> !xegpu.tensor_desc<2x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- xegpu.store_nd %transpose, %0[%c0, %c0] : vector<2x16xf32>,
- !xegpu.tensor_desc<2x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- gpu.return
- }
-}
-
-// -----
-// CHECK-LABEL: gpu.func @vector_bitcast(
-// CHECK: %[[CAST:.*]] = vector.bitcast %{{.*}} : vector<4x2xi8> to vector<4x1xi16>
-// CHECK-NEXT: %[[DEST:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<4x16xi16> -> !xegpu.tensor_desc<4x16xi16>
-// CHECK-NEXT: %[[T0:.*]] = vector.shape_cast %[[CAST]] : vector<4x1xi16> to vector<4xi16>
-// CHECK-NEXT: xegpu.store_nd %[[T0]], %[[DEST]][{{.*}}] : vector<4xi16>, !xegpu.tensor_desc<4x16xi16>
-gpu.module @xevm_module{
- gpu.func @vector_bitcast(%arg0: memref<4x16xi16>) {
- %cst = "some_op"() {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 2]>}
- : () -> (vector<4x32xi8>)
- %bitcast = vector.bitcast %cst {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}
- : vector<4x32xi8> to vector<4x16xi16>
- %c0 = arith.constant 0 : index
- %0 = xegpu.create_nd_tdesc %arg0 : memref<4x16xi16>
- -> !xegpu.tensor_desc<4x16xi16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- xegpu.store_nd %bitcast, %0[%c0, %c0] : vector<4x16xi16>,
- !xegpu.tensor_desc<4x16xi16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- gpu.return
- }
-}
-
-// -----
// CHECK-LABEL: gpu.func @mma_transpose_b(
// CHECK: %[[ARG0:[0-9a-zA-Z]+]]: memref<8x16xf16>, %[[ARG1:[0-9a-zA-Z]+]]: memref<16x8xi32>, %[[ARG2:[0-9a-zA-Z]+]]: memref<8x16xf32>) {
// CHECK-DAG: %[[ADESC:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16>
// CHECK-DAG: %[[BDESC:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<16x8xi32> -> !xegpu.tensor_desc<16x8xi32>
// CHECK-DAG: %[[A:.*]] = xegpu.load_nd %[[ADESC]][%{{.*}}] : !xegpu.tensor_desc<8x16xf16> -> vector<8xf16>
-// CHECK-DAG: %[[B:.*]] = xegpu.load_nd %[[BDESC]][%{{.*}}] <{transpose = array<i64: 1, 0>}> : !xegpu.tensor_desc<16x8xi32> -> vector<8xi32>
+// CHECK-DAG: %[[B:.*]] = xegpu.load_nd %[[BDESC]][%{{.*}}] <{transpose = array<i64: 1, 0>}>
+// CHECK-SAME: !xegpu.tensor_desc<16x8xi32> -> vector<8xi32>
// CHECK-NEXT: %[[BCAST0:.*]] = vector.shape_cast %[[B]] : vector<8xi32> to vector<1x8xi32>
// CHECK-NEXT: %[[BCAST1:.*]] = vector.bitcast %[[BCAST0]] : vector<1x8xi32> to vector<1x16xf16>
// CHECK-NEXT: %[[BCAST2:.*]] = vector.shape_cast %[[BCAST1]] : vector<1x16xf16> to vector<16xf16>
diff --git a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
index e51cac4..6ba7a00 100644
--- a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
+++ b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
@@ -218,6 +218,35 @@ class TestStepOpPattern : public OpConversionPattern<vector::StepOp> {
}
};
+struct TestXeGPUSGDistribute
+ : public PassWrapper<TestXeGPUSGDistribute,
+ OperationPass<gpu::GPUModuleOp>> {
+ MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestXeGPUSGDistribute)
+
+ StringRef getArgument() const final { return "test-xegpu-sg-distribute"; }
+
+ StringRef getDescription() const final {
+ return "Test the implementation of XeGPU Subgroup Distribution";
+ }
+
+ void getDependentDialects(::mlir::DialectRegistry &registry) const override {
+ registry.insert<arith::ArithDialect>();
+ registry.insert<memref::MemRefDialect>();
+ registry.insert<xegpu::XeGPUDialect>();
+ registry.insert<vector::VectorDialect>();
+ registry.insert<index::IndexDialect>();
+ }
+
+ TestXeGPUSGDistribute() = default;
+ TestXeGPUSGDistribute(const TestXeGPUSGDistribute &pass) = default;
+
+ void runOnOperation() override {
+ RewritePatternSet patterns(&getContext());
+ xegpu::populateXeGPUSubgroupDistributePatterns(patterns);
+ (void)applyPatternsGreedily(getOperation(), std::move(patterns));
+ }
+};
+
struct TestXeGPULayoutInterface
: public PassWrapper<TestXeGPULayoutInterface,
OperationPass<gpu::GPUModuleOp>> {
@@ -282,6 +311,7 @@ namespace test {
void registerTestXeGPULowerings() {
PassRegistration<TestXeGPUUnrollingPatterns>();
PassRegistration<TestXeGPULayoutInterface>();
+ PassRegistration<TestXeGPUSGDistribute>();
}
} // namespace test
} // namespace mlir
diff --git a/openmp/runtime/test/transform/tile/intfor.f90 b/openmp/runtime/test/transform/tile/intfor.F90
index dac0de6..4ca9f14 100644
--- a/openmp/runtime/test/transform/tile/intfor.f90
+++ b/openmp/runtime/test/transform/tile/intfor.F90
@@ -10,6 +10,7 @@
! RUN: %t-ub18.exe | FileCheck %s --match-full-lines
program tile_intfor_1d
+ implicit none
integer i
print *, 'do'
diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
index 5af035d..258d732 100644
--- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
@@ -1445,6 +1445,7 @@ cc_library(
":crosstu",
":driver",
":frontend",
+ ":index",
":lex",
":rewrite",
":static_analyzer_checkers_gen",