aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/RISCV
diff options
context:
space:
mode:
authorLuke Lau <luke@igalia.com>2024-03-29 05:55:38 +0800
committerGitHub <noreply@github.com>2024-03-29 05:55:38 +0800
commita3c2d8c0720424579c2a9b6313664908db7fcb14 (patch)
tree6e162bb90c5f1c694cddeaf374139ea310490f24 /llvm/test/CodeGen/RISCV
parent380f0fb682041aca3d682d9f1be9d3021f4b2daa (diff)
downloadllvm-a3c2d8c0720424579c2a9b6313664908db7fcb14.zip
llvm-a3c2d8c0720424579c2a9b6313664908db7fcb14.tar.gz
llvm-a3c2d8c0720424579c2a9b6313664908db7fcb14.tar.bz2
[RISCV] Combine ({s,u}{div,rem} (zext, zext)) -> (zext ({s,u}{div,rem} (zext, zext))) (#86779)
This narrows unsigned and signed div and rem nodes via combineBinOpOfZExt. Unlike other binary ops, there are no widening div or rem instructions. So we will end up with an extra vzext.vf2. However I'm assuming that div/rem are expensive enough that by reducing their EMUL we will gain back the cost. Alive2 proof: https://alive2.llvm.org/ce/z/Et_L6y
Diffstat (limited to 'llvm/test/CodeGen/RISCV')
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/binop-zext.ll40
1 files changed, 24 insertions, 16 deletions
diff --git a/llvm/test/CodeGen/RISCV/rvv/binop-zext.ll b/llvm/test/CodeGen/RISCV/rvv/binop-zext.ll
index e050240..2d5258f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/binop-zext.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/binop-zext.ll
@@ -50,10 +50,12 @@ define <vscale x 8 x i32> @mul(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
define <vscale x 8 x i32> @sdiv(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
; CHECK-LABEL: sdiv:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vzext.vf4 v12, v8
-; CHECK-NEXT: vzext.vf4 v16, v9
-; CHECK-NEXT: vdivu.vv v8, v12, v16
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vzext.vf2 v10, v9
+; CHECK-NEXT: vzext.vf2 v12, v8
+; CHECK-NEXT: vdivu.vv v12, v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vzext.vf2 v8, v12
; CHECK-NEXT: ret
%a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32>
%b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32>
@@ -64,10 +66,12 @@ define <vscale x 8 x i32> @sdiv(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
define <vscale x 8 x i32> @udiv(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
; CHECK-LABEL: udiv:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vzext.vf4 v12, v8
-; CHECK-NEXT: vzext.vf4 v16, v9
-; CHECK-NEXT: vdivu.vv v8, v12, v16
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vzext.vf2 v10, v9
+; CHECK-NEXT: vzext.vf2 v12, v8
+; CHECK-NEXT: vdivu.vv v12, v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vzext.vf2 v8, v12
; CHECK-NEXT: ret
%a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32>
%b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32>
@@ -78,10 +82,12 @@ define <vscale x 8 x i32> @udiv(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
define <vscale x 8 x i32> @srem(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
; CHECK-LABEL: srem:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vzext.vf4 v12, v8
-; CHECK-NEXT: vzext.vf4 v16, v9
-; CHECK-NEXT: vremu.vv v8, v12, v16
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vzext.vf2 v10, v9
+; CHECK-NEXT: vzext.vf2 v12, v8
+; CHECK-NEXT: vremu.vv v12, v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vzext.vf2 v8, v12
; CHECK-NEXT: ret
%a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32>
%b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32>
@@ -92,10 +98,12 @@ define <vscale x 8 x i32> @srem(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
define <vscale x 8 x i32> @urem(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
; CHECK-LABEL: urem:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
-; CHECK-NEXT: vzext.vf4 v12, v8
-; CHECK-NEXT: vzext.vf4 v16, v9
-; CHECK-NEXT: vremu.vv v8, v12, v16
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vzext.vf2 v10, v9
+; CHECK-NEXT: vzext.vf2 v12, v8
+; CHECK-NEXT: vremu.vv v12, v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vzext.vf2 v8, v12
; CHECK-NEXT: ret
%a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i32>
%b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i32>