aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
authorHim188 <tguan@nvidia.com>2024-06-13 14:29:31 +0100
committerGitHub <noreply@github.com>2024-06-13 14:29:31 +0100
commita959ad155343095fc8f73aa397a72e3ced1df354 (patch)
treedf0aff87eff46f72eb8f2d5249a11fd57f7114ea /llvm
parent00798354c553d48d27006a2b06a904bd6013e31b (diff)
downloadllvm-a959ad155343095fc8f73aa397a72e3ced1df354.zip
llvm-a959ad155343095fc8f73aa397a72e3ced1df354.tar.gz
llvm-a959ad155343095fc8f73aa397a72e3ced1df354.tar.bz2
[AArch64][GISel] Assign G_LOAD defs into FPR if they feed FP instructions indirectly via PHI (#94618)
If G_LOAD def is used by a PHI, recursively check if the def of PHI is used by any instruction that only uses FP oprand. If so, assign the def of G_LOAD to FPR. In other words, this change helps RBS to assign G_LOAD defs into FPR if they are later on used in an FP instruction **indirectly via PHI**, to avoid unnecessary copies. Before this patch, we only considered direct usages. It helps to fix GISel regression in TSVC kernel s3110. GISel was 50% slower than SDAG before this change. Now GISel and SDAG have the same runtime.
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp21
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h7
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir48
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-fp-loads.mir192
4 files changed, 253 insertions, 15 deletions
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 4aa6999..f63e29e 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -496,6 +496,20 @@ static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
}
}
+bool AArch64RegisterBankInfo::isPHIWithFPContraints(
+ const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI, const unsigned Depth) const {
+ if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
+ return false;
+
+ return any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
+ [&](const MachineInstr &UseMI) {
+ if (onlyUsesFP(UseMI, MRI, TRI, Depth + 1))
+ return true;
+ return isPHIWithFPContraints(UseMI, MRI, TRI, Depth + 1);
+ });
+}
+
bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI,
@@ -851,13 +865,18 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// instead of blind map every scalar to GPR.
if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
[&](const MachineInstr &UseMI) {
- // If we have at least one direct use in a FP instruction,
+ // If we have at least one direct or indirect use
+ // in a FP instruction,
// assume this was a floating point load in the IR. If it was
// not, we would have had a bitcast before reaching that
// instruction.
//
// Int->FP conversion operations are also captured in
// onlyDefinesFP().
+
+ if (isPHIWithFPContraints(UseMI, MRI, TRI))
+ return true;
+
return onlyUsesFP(UseMI, MRI, TRI) ||
onlyDefinesFP(UseMI, MRI, TRI);
}))
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
index b6364c6..9d23f84 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
@@ -120,6 +120,13 @@ class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo {
/// Maximum recursion depth for hasFPConstraints.
const unsigned MaxFPRSearchDepth = 2;
+ /// \returns true if \p MI is a PHI that its def is used by
+ /// any instruction that onlyUsesFP.
+ bool isPHIWithFPContraints(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ unsigned Depth = 0) const;
+
/// \returns true if \p MI only uses and defines FPRs.
bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir
index eb768f3..4cdefa8 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir
@@ -150,7 +150,7 @@ body: |
...
---
-name: load_used_by_phi_gpr
+name: load_used_by_phi_gpr_copy_gpr
legalized: true
tracksRegLiveness: true
body: |
@@ -173,6 +173,50 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: %phi:gpr(s32) = G_PHI %gpr_copy(s32), %bb.0, %load(s32), %bb.1
+ ; CHECK-NEXT: $w2 = COPY %phi(s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w2
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $x0, $s0, $s1, $w0, $w1
+ %cond_wide:_(s32) = COPY $w0
+ %gpr_copy:_(s32) = COPY $w1
+ %ptr:_(p0) = COPY $x0
+ G_BRCOND %cond_wide, %bb.1
+ G_BR %bb.2
+ bb.1:
+ successors: %bb.2
+ %load:_(s32) = G_LOAD %ptr(p0) :: (load (s32))
+ G_BR %bb.2
+ bb.2:
+ %phi:_(s32) = G_PHI %gpr_copy(s32), %bb.0, %load(s32), %bb.1
+ $w2 = COPY %phi(s32)
+ RET_ReallyLR implicit $w2
+
+...
+---
+name: load_used_by_phi_gpr_copy_fpr
+legalized: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: load_used_by_phi_gpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $x0, $s0, $s1, $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cond_wide:gpr(s32) = COPY $w0
+ ; CHECK-NEXT: %gpr_copy:gpr(s32) = COPY $w1
+ ; CHECK-NEXT: %ptr:gpr(p0) = COPY $x0
+ ; CHECK-NEXT: G_BRCOND %cond_wide(s32), %bb.1
+ ; CHECK-NEXT: G_BR %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %load:fpr(s32) = G_LOAD %ptr(p0) :: (load (s32))
+ ; CHECK-NEXT: G_BR %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: %phi:gpr(s32) = G_PHI %gpr_copy(s32), %bb.0, %load(s32), %bb.1
; CHECK-NEXT: $s0 = COPY %phi(s32)
; CHECK-NEXT: RET_ReallyLR implicit $s0
bb.0:
@@ -189,7 +233,7 @@ body: |
G_BR %bb.2
bb.2:
%phi:_(s32) = G_PHI %gpr_copy(s32), %bb.0, %load(s32), %bb.1
- $s0 = COPY %phi(s32)
+ $s0 = COPY %phi(s32) ; G_LOAD should consider this FPR constraint and assign %load FPR
RET_ReallyLR implicit $s0
...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-fp-loads.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-fp-loads.mir
index 6efa6f2..b3261a9 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-fp-loads.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-fp-loads.mir
@@ -8,7 +8,8 @@
@var_int = global i32 0
define float @fp_load_phi() { ret float undef }
- define i32 @int_load_phi() { ret i32 undef }
+ define i32 @int_load_phi_gpr() { ret i32 undef }
+ define i32 @int_load_phi_fpr() { ret i32 undef }
@array_double = dso_local global [32000 x double] zeroinitializer, align 8
@@ -23,9 +24,12 @@
@struct_array_int = dso_local global { [32000 x i32] } zeroinitializer, align 8
@struct_struct_array_int = dso_local global {{ [32000 x i32] }} zeroinitializer, align 8
- define i32 @array_load_int() { ret i32 undef }
- define i32 @struct_array_load_int() { ret i32 undef }
- define i32 @struct_struct_array_load_int() { ret i32 undef }
+ define i32 @array_load_int_gpr() { ret i32 undef }
+ define i32 @array_load_int_fpr() { ret i32 undef }
+ define i32 @struct_array_load_int_gpr() { ret i32 undef }
+ define i32 @struct_array_load_int_fpr() { ret i32 undef }
+ define i32 @struct_struct_array_load_int_gpr() { ret i32 undef }
+ define i32 @struct_struct_array_load_int_fpr() { ret i32 undef }
...
---
@@ -70,12 +74,12 @@ body: |
...
---
-name: int_load_phi
+name: int_load_phi_gpr
legalized: true
regBankSelected: false
tracksRegLiveness: true
body: |
- ; CHECK-LABEL: name: int_load_phi
+ ; CHECK-LABEL: name: int_load_phi_gpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $w0
@@ -91,6 +95,47 @@ body: |
; CHECK-NEXT: G_BRCOND [[COPY]](s32), %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: $w1 = COPY [[PHI]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w1
+ bb.0:
+ liveins: $w0
+ successors: %bb.1
+ %0:_(s32) = COPY $w0
+ %1:_(p0) = G_GLOBAL_VALUE @var_fp
+ %fp_load:_(s32) = G_LOAD %1 :: (load 4 from @var_int)
+
+ bb.1:
+ successors: %bb.1, %bb.2
+ %2:_(s32) = PHI %fp_load, %bb.0, %2, %bb.1
+ G_BRCOND %0, %bb.1
+
+ bb.2:
+ $w1 = COPY %2
+ RET_ReallyLR implicit $w1
+...
+
+---
+name: int_load_phi_fpr
+legalized: true
+regBankSelected: false
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: int_load_phi_fpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var_fp
+ ; CHECK-NEXT: %fp_load:fpr(s32) = G_LOAD [[GV]](p0) :: (load (s32) from @var_int)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:fpr(s32) = PHI %fp_load(s32), %bb.0, [[PHI]](s32), %bb.1
+ ; CHECK-NEXT: G_BRCOND [[COPY]](s32), %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
; CHECK-NEXT: $s0 = COPY [[PHI]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $s0
bb.0:
@@ -234,12 +279,12 @@ body: |
...
---
-name: array_load_int
+name: array_load_int_gpr
legalized: true
regBankSelected: false
tracksRegLiveness: true
body: |
- ; CHECK-LABEL: name: array_load_int
+ ; CHECK-LABEL: name: array_load_int_gpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $w0
@@ -255,6 +300,47 @@ body: |
; CHECK-NEXT: G_BRCOND [[COPY]](s32), %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: $w1 = COPY [[PHI]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w1
+ bb.0:
+ liveins: $w0
+ successors: %bb.1
+ %0:_(s32) = COPY $w0
+ %1:_(p0) = G_GLOBAL_VALUE @array_int
+ %fp_load:_(s64) = G_LOAD %1(p0) :: (dereferenceable load (s64) from @array_int)
+
+ bb.1:
+ successors: %bb.1, %bb.2
+ %2:_(s32) = PHI %fp_load, %bb.0, %2, %bb.1
+ G_BRCOND %0, %bb.1
+
+ bb.2:
+ $w1 = COPY %2
+ RET_ReallyLR implicit $w1
+...
+
+---
+name: array_load_int_fpr
+legalized: true
+regBankSelected: false
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: array_load_int_fpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @array_int
+ ; CHECK-NEXT: %fp_load:fpr(s64) = G_LOAD [[GV]](p0) :: (dereferenceable load (s64) from @array_int)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:fpr(s32) = PHI %fp_load(s64), %bb.0, [[PHI]](s32), %bb.1
+ ; CHECK-NEXT: G_BRCOND [[COPY]](s32), %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
; CHECK-NEXT: $s0 = COPY [[PHI]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $s0
bb.0:
@@ -275,12 +361,12 @@ body: |
...
---
-name: struct_array_load_int
+name: struct_array_load_int_gpr
legalized: true
regBankSelected: false
tracksRegLiveness: true
body: |
- ; CHECK-LABEL: name: struct_array_load_int
+ ; CHECK-LABEL: name: struct_array_load_int_gpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $w0
@@ -296,6 +382,47 @@ body: |
; CHECK-NEXT: G_BRCOND [[COPY]](s32), %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: $w1 = COPY [[PHI]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w1
+ bb.0:
+ liveins: $w0
+ successors: %bb.1
+ %0:_(s32) = COPY $w0
+ %1:_(p0) = G_GLOBAL_VALUE @struct_array_int
+ %fp_load:_(s64) = G_LOAD %1(p0) :: (dereferenceable load (s64) from @struct_array_int)
+
+ bb.1:
+ successors: %bb.1, %bb.2
+ %2:_(s32) = PHI %fp_load, %bb.0, %2, %bb.1
+ G_BRCOND %0, %bb.1
+
+ bb.2:
+ $w1 = COPY %2
+ RET_ReallyLR implicit $w1
+...
+
+---
+name: struct_array_load_int_fpr
+legalized: true
+regBankSelected: false
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: struct_array_load_int_fpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @struct_array_int
+ ; CHECK-NEXT: %fp_load:fpr(s64) = G_LOAD [[GV]](p0) :: (dereferenceable load (s64) from @struct_array_int)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:fpr(s32) = PHI %fp_load(s64), %bb.0, [[PHI]](s32), %bb.1
+ ; CHECK-NEXT: G_BRCOND [[COPY]](s32), %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
; CHECK-NEXT: $s0 = COPY [[PHI]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $s0
bb.0:
@@ -316,12 +443,12 @@ body: |
...
---
-name: struct_struct_array_load_int
+name: struct_struct_array_load_int_gpr
legalized: true
regBankSelected: false
tracksRegLiveness: true
body: |
- ; CHECK-LABEL: name: struct_struct_array_load_int
+ ; CHECK-LABEL: name: struct_struct_array_load_int_gpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $w0
@@ -337,6 +464,47 @@ body: |
; CHECK-NEXT: G_BRCOND [[COPY]](s32), %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: $w1 = COPY [[PHI]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w1
+ bb.0:
+ liveins: $w0
+ successors: %bb.1
+ %0:_(s32) = COPY $w0
+ %1:_(p0) = G_GLOBAL_VALUE @struct_struct_array_int
+ %fp_load:_(s64) = G_LOAD %1(p0) :: (dereferenceable load (s64) from @struct_struct_array_int)
+
+ bb.1:
+ successors: %bb.1, %bb.2
+ %2:_(s32) = PHI %fp_load, %bb.0, %2, %bb.1
+ G_BRCOND %0, %bb.1
+
+ bb.2:
+ $w1 = COPY %2
+ RET_ReallyLR implicit $w1
+...
+
+---
+name: struct_struct_array_load_int_fpr
+legalized: true
+regBankSelected: false
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: struct_struct_array_load_int_fpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @struct_struct_array_int
+ ; CHECK-NEXT: %fp_load:fpr(s64) = G_LOAD [[GV]](p0) :: (dereferenceable load (s64) from @struct_struct_array_int)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:fpr(s32) = PHI %fp_load(s64), %bb.0, [[PHI]](s32), %bb.1
+ ; CHECK-NEXT: G_BRCOND [[COPY]](s32), %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
; CHECK-NEXT: $s0 = COPY [[PHI]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $s0
bb.0: