[SVE][CodeGen] Legalisation of unpredicated load instructions

Summary: When splitting a load of a scalable type, the new address is calculated in SplitVecRes_LOAD using a vscale and an add instruction. This patch also adds a DAG combiner fold to visitADD for vscale: - Fold (add (vscale(C0)), (vscale(C1))) to (add (vscale(C0 + C1))) Reviewers: sdesmalen, efriedma, david-arm Reviewed By: david-arm Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D82792
author: Kerry McLaughlin <kerry.mclaughlin@arm.com> 2020-07-07 10:35:41 +0100
committer: Kerry McLaughlin <kerry.mclaughlin@arm.com> 2020-07-07 11:05:03 +0100
commit: 5e8084beba20f27ce14536168087e5c6971e292d (patch)
tree: 354e2505e2dfc6f81721f034807f182a82fb0d18 /llvm
parent: 8c2a613976075368a1f6e3ac3c9c8b1927b465ec (diff)
download: llvm-5e8084beba20f27ce14536168087e5c6971e292d.zip
llvm-5e8084beba20f27ce14536168087e5c6971e292d.tar.gz
llvm-5e8084beba20f27ce14536168087e5c6971e292d.tar.bz2
5 files changed, 86 insertions, 6 deletions
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index e084c42..f26ab6f 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -931,7 +931,8 @@ public:
   SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm) {
     assert(MulImm.getMinSignedBits() <= VT.getSizeInBits() &&
            "Immediate does not fit VT");
-    return getNode(ISD::VSCALE, DL, VT, getConstant(MulImm, DL, VT));
+    return getNode(ISD::VSCALE, DL, VT,
+                   getConstant(MulImm.sextOrTrunc(VT.getSizeInBits()), DL, VT));
   }
 
   /// Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c94bbeb..4042a81 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2371,6 +2371,16 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
     return DAG.getVScale(DL, VT, C0 + C1);
   }
 
+  // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
+  if ((N0.getOpcode() == ISD::ADD) &&
+      (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
+      (N1.getOpcode() == ISD::VSCALE)) {
+    auto VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
+    auto VS1 = N1->getConstantOperandAPInt(0);
+    auto VS = DAG.getVScale(DL, VT, VS0 + VS1);
+    return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
+  }
+
   return SDValue();
 }
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 6afa7b1..cacc2df 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1537,11 +1537,22 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
                    LD->getPointerInfo(), LoMemVT, LD->getOriginalAlign(),
                    MMOFlags, AAInfo);
 
-  unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
-  Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
-  Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
-                   LD->getPointerInfo().getWithOffset(IncrementSize), HiMemVT,
-                   LD->getOriginalAlign(), MMOFlags, AAInfo);
+  unsigned IncrementSize = LoMemVT.getSizeInBits().getKnownMinSize() / 8;
+
+  MachinePointerInfo MPI;
+  if (LoVT.isScalableVector()) {
+    SDValue BytesIncrement = DAG.getVScale(
+        dl, Ptr.getValueType(),
+        APInt(Ptr.getValueSizeInBits().getFixedSize(), IncrementSize));
+    MPI = MachinePointerInfo(LD->getPointerInfo().getAddrSpace());
+    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, BytesIncrement);
+  } else {
+    MPI = LD->getPointerInfo().getWithOffset(IncrementSize);
+    Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
+  }
+
+  Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, MPI,
+                   HiMemVT, LD->getOriginalAlign(), MMOFlags, AAInfo);
 
   // Build a factor node to remember that this load is independent of the
   // other one.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 732aea8..d1411c4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4802,6 +4802,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     if (OpOpcode == ISD::FNEG)  // abs(-X) -> abs(X)
       return getNode(ISD::FABS, DL, VT, Operand.getOperand(0));
     break;
+  case ISD::VSCALE:
+    assert(VT == Operand.getValueType() && "Unexpected VT!");
+    break;
   }
 
   SDNode *N;
diff --git a/llvm/test/CodeGen/AArch64/sve-split-load.ll b/llvm/test/CodeGen/AArch64/sve-split-load.ll
new file mode 100644
index 0000000..a76b27e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-split-load.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+; LOAD
+
+define <vscale x 4 x i16> @load_promote_4i8(<vscale x 4 x i16>* %a) {
+; CHECK-LABEL: load_promote_4i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    ret
+  %load = load <vscale x 4 x i16>, <vscale x 4 x i16>* %a
+  ret <vscale x 4 x i16> %load
+}
+
+define <vscale x 16 x i16> @load_split_i16(<vscale x 16 x i16>* %a) {
+; CHECK-LABEL: load_split_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT:    ret
+  %load = load <vscale x 16 x i16>, <vscale x 16 x i16>* %a
+  ret <vscale x 16 x i16> %load
+}
+
+define <vscale x 32 x i16> @load_split_32i16(<vscale x 32 x i16>* %a) {
+; CHECK-LABEL: load_split_32i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT:    ld1h { z2.h }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT:    ld1h { z3.h }, p0/z, [x0, #3, mul vl]
+; CHECK-NEXT:    ret
+  %load = load <vscale x 32 x i16>, <vscale x 32 x i16>* %a
+  ret <vscale x 32 x i16> %load
+}
+
+define <vscale x 16 x i64> @load_split_16i64(<vscale x 16 x i64>* %a) {
+; CHECK-LABEL: load_split_16i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT:    ld1d { z2.d }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT:    ld1d { z3.d }, p0/z, [x0, #3, mul vl]
+; CHECK-NEXT:    ld1d { z4.d }, p0/z, [x0, #4, mul vl]
+; CHECK-NEXT:    ld1d { z5.d }, p0/z, [x0, #5, mul vl]
+; CHECK-NEXT:    ld1d { z6.d }, p0/z, [x0, #6, mul vl]
+; CHECK-NEXT:    ld1d { z7.d }, p0/z, [x0, #7, mul vl]
+; CHECK-NEXT:    ret
+  %load = load <vscale x 16 x i64>, <vscale x 16 x i64>* %a
+  ret <vscale x 16 x i64> %load
+}
author	Kerry McLaughlin <kerry.mclaughlin@arm.com>	2020-07-07 10:35:41 +0100
committer	Kerry McLaughlin <kerry.mclaughlin@arm.com>	2020-07-07 11:05:03 +0100
commit	5e8084beba20f27ce14536168087e5c6971e292d (patch)
tree	354e2505e2dfc6f81721f034807f182a82fb0d18 /llvm
parent	8c2a613976075368a1f6e3ac3c9c8b1927b465ec (diff)
download	llvm-5e8084beba20f27ce14536168087e5c6971e292d.zip llvm-5e8084beba20f27ce14536168087e5c6971e292d.tar.gz llvm-5e8084beba20f27ce14536168087e5c6971e292d.tar.bz2