diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2021-03-20 12:53:58 -0400 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2021-03-21 10:07:37 -0400 |
commit | 6314a727308a76b9ef8783d69797ce3bead096ff (patch) | |
tree | 94e134196fac2f5c1125c38f257c7a86b7d9b678 | |
parent | 64c2641c895ab8d1d71c338294af8252969b7803 (diff) | |
download | llvm-6314a727308a76b9ef8783d69797ce3bead096ff.zip llvm-6314a727308a76b9ef8783d69797ce3bead096ff.tar.gz llvm-6314a727308a76b9ef8783d69797ce3bead096ff.tar.bz2 |
AMDGPU/GlobalISel: Enable CSE in pre-legalizer combiner
4 files changed, 60 insertions, 44 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp index 0ca0ea1..c58b15f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp @@ -249,6 +249,9 @@ void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MachineDominatorTree>(); AU.addPreserved<MachineDominatorTree>(); } + + AU.addRequired<GISelCSEAnalysisWrapperPass>(); + AU.addPreserved<GISelCSEAnalysisWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -270,8 +273,13 @@ bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>(); AMDGPUPreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), F.hasMinSize(), KB, MDT); + // Enable CSE. + GISelCSEAnalysisWrapper &Wrapper = + getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper(); + auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig()); + Combiner C(PCInfo, TPC); - return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); + return C.combineMachineInstrs(MF, CSEInfo); } char AMDGPUPreLegalizerCombiner::ID = 0; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-urem-pow-2.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-urem-pow-2.mir index 93b723c..44ef61c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-urem-pow-2.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-urem-pow-2.mir @@ -29,10 +29,8 @@ body: | ; GCN-LABEL: name: urem_s32_var_const1 ; GCN: liveins: $vgpr0 - ; GCN: %const:_(s32) = G_CONSTANT i32 1 - ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GCN: [[ADD:%[0-9]+]]:_(s32) = G_ADD %const, [[C]] - ; GCN: $vgpr0 = COPY [[ADD]](s32) + ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN: $vgpr0 = COPY [[C]](s32) %var:_(s32) = COPY $vgpr0 %const:_(s32) = G_CONSTANT i32 1 %rem:_(s32) = G_UREM %var, %const @@ -49,10 +47,8 @@ body: | ; GCN-LABEL: name: urem_s32_var_const2 ; GCN: liveins: $vgpr0 ; GCN: %var:_(s32) = COPY $vgpr0 - ; GCN: %const:_(s32) = G_CONSTANT i32 2 - ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GCN: [[ADD:%[0-9]+]]:_(s32) = G_ADD %const, [[C]] - ; GCN: %rem:_(s32) = G_AND %var, [[ADD]] + ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GCN: %rem:_(s32) = G_AND %var, [[C]] ; GCN: $vgpr0 = COPY %rem(s32) %var:_(s32) = COPY $vgpr0 %const:_(s32) = G_CONSTANT i32 2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll index 6d4ffa6..f3dc84b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll @@ -207,21 +207,28 @@ define i32 @v_urem_i32_pow2k_denom(i32 %num) { ; CHECK-LABEL: v_urem_i32_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_add_i32 s4, 0x1000, -1 -; CHECK-NEXT: v_and_b32_e32 v0, s4, v0 +; CHECK-NEXT: v_and_b32_e32 v0, 0xfff, v0 ; CHECK-NEXT: s_setpc_b64 s[30:31] %result = urem i32 %num, 4096 ret i32 %result } define <2 x i32> @v_urem_v2i32_pow2k_denom(<2 x i32> %num) { -; CHECK-LABEL: v_urem_v2i32_pow2k_denom: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_add_i32 s4, 0x1000, -1 -; CHECK-NEXT: v_and_b32_e32 v0, s4, v0 -; CHECK-NEXT: v_and_b32_e32 v1, s4, v1 -; CHECK-NEXT: s_setpc_b64 s[30:31] +; GISEL-LABEL: v_urem_v2i32_pow2k_denom: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_add_i32 s4, 0x1000, -1 +; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 +; GISEL-NEXT: v_and_b32_e32 v1, s4, v1 +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_urem_v2i32_pow2k_denom: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_movk_i32 s4, 0xfff +; CGP-NEXT: v_and_b32_e32 v0, s4, v0 +; CGP-NEXT: v_and_b32_e32 v1, s4, v1 +; CGP-NEXT: s_setpc_b64 s[30:31] %result = urem <2 x i32> %num, <i32 4096, i32 4096> ret <2 x i32> %result } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll index 6219bc0..7411807 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -949,38 +949,43 @@ define i64 @v_urem_i64_pow2k_denom(i64 %num) { ; CHECK-LABEL: v_urem_i64_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_add_u32 s4, 0x1000, -1 -; CHECK-NEXT: s_cselect_b32 s5, 1, 0 -; CHECK-NEXT: s_and_b32 s5, s5, 1 -; CHECK-NEXT: s_cmp_lg_u32 s5, 0 -; CHECK-NEXT: s_addc_u32 s5, 0, -1 -; CHECK-NEXT: v_and_b32_e32 v0, s4, v0 -; CHECK-NEXT: v_and_b32_e32 v1, s5, v1 +; CHECK-NEXT: v_and_b32_e32 v0, 0xfff, v0 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_setpc_b64 s[30:31] %result = urem i64 %num, 4096 ret i64 %result } define <2 x i64> @v_urem_v2i64_pow2k_denom(<2 x i64> %num) { -; CHECK-LABEL: v_urem_v2i64_pow2k_denom: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_movk_i32 s4, 0x1000 -; CHECK-NEXT: s_add_u32 s5, s4, -1 -; CHECK-NEXT: s_cselect_b32 s6, 1, 0 -; CHECK-NEXT: s_and_b32 s6, s6, 1 -; CHECK-NEXT: s_cmp_lg_u32 s6, 0 -; CHECK-NEXT: s_addc_u32 s6, 0, -1 -; CHECK-NEXT: s_add_u32 s4, s4, -1 -; CHECK-NEXT: s_cselect_b32 s7, 1, 0 -; CHECK-NEXT: v_and_b32_e32 v0, s5, v0 -; CHECK-NEXT: s_and_b32 s5, s7, 1 -; CHECK-NEXT: v_and_b32_e32 v1, s6, v1 -; CHECK-NEXT: s_cmp_lg_u32 s5, 0 -; CHECK-NEXT: s_addc_u32 s5, 0, -1 -; CHECK-NEXT: v_and_b32_e32 v2, s4, v2 -; CHECK-NEXT: v_and_b32_e32 v3, s5, v3 -; CHECK-NEXT: s_setpc_b64 s[30:31] +; GISEL-LABEL: v_urem_v2i64_pow2k_denom: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_movk_i32 s4, 0x1000 +; GISEL-NEXT: s_add_u32 s5, s4, -1 +; GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; GISEL-NEXT: s_and_b32 s6, s6, 1 +; GISEL-NEXT: s_cmp_lg_u32 s6, 0 +; GISEL-NEXT: s_addc_u32 s6, 0, -1 +; GISEL-NEXT: s_add_u32 s4, s4, -1 +; GISEL-NEXT: s_cselect_b32 s7, 1, 0 +; GISEL-NEXT: v_and_b32_e32 v0, s5, v0 +; GISEL-NEXT: s_and_b32 s5, s7, 1 +; GISEL-NEXT: v_and_b32_e32 v1, s6, v1 +; GISEL-NEXT: s_cmp_lg_u32 s5, 0 +; GISEL-NEXT: s_addc_u32 s5, 0, -1 +; GISEL-NEXT: v_and_b32_e32 v2, s4, v2 +; GISEL-NEXT: v_and_b32_e32 v3, s5, v3 +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_urem_v2i64_pow2k_denom: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_movk_i32 s4, 0xfff +; CGP-NEXT: v_and_b32_e32 v0, s4, v0 +; CGP-NEXT: v_and_b32_e32 v2, s4, v2 +; CGP-NEXT: v_mov_b32_e32 v1, 0 +; CGP-NEXT: v_mov_b32_e32 v3, 0 +; CGP-NEXT: s_setpc_b64 s[30:31] %result = urem <2 x i64> %num, <i64 4096, i64 4096> ret <2 x i64> %result } |