aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2014-08-22 18:49:35 +0000
committerTom Stellard <thomas.stellard@amd.com>2014-08-22 18:49:35 +0000
commitf3fc555e3b3e277d3f47150fd749f82dcca43f6c (patch)
tree6599b6159c38f9eadc6dafa12ceee5051789d25d /llvm/lib
parent85e8b6d5f90399ff1bb57a51393432efcc2863f6 (diff)
downloadllvm-f3fc555e3b3e277d3f47150fd749f82dcca43f6c.zip
llvm-f3fc555e3b3e277d3f47150fd749f82dcca43f6c.tar.gz
llvm-f3fc555e3b3e277d3f47150fd749f82dcca43f6c.tar.bz2
R600/SI: Use READ2/WRITE2 instructions for 64-bit mem ops with 32-bit alignment
llvm-svn: 216279
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp27
-rw-r--r--llvm/lib/Target/R600/AMDGPUInstructions.td11
-rw-r--r--llvm/lib/Target/R600/SIInstrInfo.td1
-rw-r--r--llvm/lib/Target/R600/SIInstructions.td26
4 files changed, 63 insertions, 2 deletions
diff --git a/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index 7911b6f..b988d33 100644
--- a/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -91,6 +91,8 @@ private:
bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
unsigned OffsetBits) const;
bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
+ bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
+ SDValue &Offset1) const;
void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
SDValue &SOffset, SDValue &Offset, SDValue &Offen,
SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
@@ -782,6 +784,31 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
return true;
}
+bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
+ SDValue &Offset0,
+ SDValue &Offset1) const {
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ SDValue N0 = Addr.getOperand(0);
+ SDValue N1 = Addr.getOperand(1);
+ ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
+ unsigned DWordOffset0 = C1->getZExtValue() / 4;
+ unsigned DWordOffset1 = DWordOffset0 + 1;
+ // (add n0, c0)
+ if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
+ Base = N0;
+ Offset0 = CurDAG->getTargetConstant(DWordOffset0, MVT::i8);
+ Offset1 = CurDAG->getTargetConstant(DWordOffset1, MVT::i8);
+ return true;
+ }
+ }
+
+ // default case
+ Base = Addr;
+ Offset0 = CurDAG->getTargetConstant(0, MVT::i8);
+ Offset1 = CurDAG->getTargetConstant(1, MVT::i8);
+ return true;
+}
+
static SDValue wrapAddr64Rsrc(SelectionDAG *DAG, SDLoc DL, SDValue Ptr) {
return SDValue(DAG->getMachineNode(AMDGPU::SI_ADDR64_RSRC, DL, MVT::v4i32,
Ptr), 0);
diff --git a/llvm/lib/Target/R600/AMDGPUInstructions.td b/llvm/lib/Target/R600/AMDGPUInstructions.td
index 0f2b625..cf3bffa 100644
--- a/llvm/lib/Target/R600/AMDGPUInstructions.td
+++ b/llvm/lib/Target/R600/AMDGPUInstructions.td
@@ -282,6 +282,17 @@ def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return isLocalLoad(dyn_cast<LoadSDNode>(N));
}]>;
+class Aligned8Bytes <dag ops, dag frag> : PatFrag <ops, frag, [{
+ return cast<MemSDNode>(N)->getAlignment() % 8 == 0;
+}]>;
+
+def local_load_aligned8bytes : Aligned8Bytes <
+ (ops node:$ptr), (local_load node:$ptr)
+>;
+
+def local_store_aligned8bytes : Aligned8Bytes <
+ (ops node:$val, node:$ptr), (local_store node:$val, node:$ptr)
+>;
class local_binary_atomic_op<SDNode atomic_op> :
PatFrag<(ops node:$ptr, node:$value),
diff --git a/llvm/lib/Target/R600/SIInstrInfo.td b/llvm/lib/Target/R600/SIInstrInfo.td
index 5357af9..064a67e 100644
--- a/llvm/lib/Target/R600/SIInstrInfo.td
+++ b/llvm/lib/Target/R600/SIInstrInfo.td
@@ -192,6 +192,7 @@ def tfe : Operand <i1> {
//===----------------------------------------------------------------------===//
def DS1Addr1Offset : ComplexPattern<i32, 2, "SelectDS1Addr1Offset">;
+def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">;
def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
def MUBUFAddr64 : ComplexPattern<i64, 3, "SelectMUBUFAddr64">;
diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td
index 40fca9f..0597892 100644
--- a/llvm/lib/Target/R600/SIInstructions.td
+++ b/llvm/lib/Target/R600/SIInstructions.td
@@ -2530,7 +2530,18 @@ def : DSReadPat <DS_READ_U8, i32, az_extloadi8_local>;
def : DSReadPat <DS_READ_I16, i32, sextloadi16_local>;
def : DSReadPat <DS_READ_U16, i32, az_extloadi16_local>;
def : DSReadPat <DS_READ_B32, i32, local_load>;
-def : DSReadPat <DS_READ_B64, v2i32, local_load>;
+
+let AddedComplexity = 100 in {
+
+def : DSReadPat <DS_READ_B64, v2i32, local_load_aligned8bytes>;
+
+} // End AddedComplexity = 100
+
+def : Pat <
+ (v2i32 (local_load (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
+ i8:$offset1))),
+ (DS_READ2_B32 (i1 0), $ptr, $offset0, $offset1)
+>;
class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat <
(frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
@@ -2540,7 +2551,18 @@ class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat <
def : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>;
def : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local>;
def : DSWritePat <DS_WRITE_B32, i32, local_store>;
-def : DSWritePat <DS_WRITE_B64, v2i32, local_store>;
+
+let AddedComplexity = 100 in {
+
+def : DSWritePat <DS_WRITE_B64, v2i32, local_store_aligned8bytes>;
+} // End AddedComplexity = 100
+
+def : Pat <
+ (local_store v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
+ i8:$offset1)),
+ (DS_WRITE2_B32 (i1 0), $ptr, (EXTRACT_SUBREG $value, sub0),
+ (EXTRACT_SUBREG $value, sub1), $offset0, $offset1)
+>;
multiclass DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> {
def : Pat <