; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck %s ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s declare @llvm.riscv.vadd.nxv4i32.nxv4i32(, , , iXLen) define @different_imm_vl_with_ta( %passthru, %a, %b, iXLen %vl1, iXLen %vl2) { ; CHECK-LABEL: different_imm_vl_with_ta: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; CHECK-NEXT: vadd.vv v8, v10, v12 ; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %a, %b, iXLen 5) %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %v, %a, iXLen 4) ret %w } define @vlmax_and_imm_vl_with_ta( %passthru, %a, %b, iXLen %vl1, iXLen %vl2) { ; CHECK-LABEL: vlmax_and_imm_vl_with_ta: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; CHECK-NEXT: vadd.vv v8, v10, v12 ; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %a, %b, iXLen -1) %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %v, %a, iXLen 4) ret %w } ; Not beneficial to propagate VL since VL is larger in the use side. define @different_imm_vl_with_ta_larger_vl( %passthru, %a, %b, iXLen %vl1, iXLen %vl2) { ; CHECK-LABEL: different_imm_vl_with_ta_larger_vl: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; CHECK-NEXT: vadd.vv v8, v10, v12 ; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %a, %b, iXLen 4) %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %v, %a, iXLen 5) ret %w } define @different_imm_reg_vl_with_ta( %passthru, %a, %b, iXLen %vl1, iXLen %vl2) { ; CHECK-LABEL: different_imm_reg_vl_with_ta: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; CHECK-NEXT: vadd.vv v8, v10, v12 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %a, %b, iXLen 4) %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %v, %a, iXLen %vl1) ret %w } ; Not beneficial to propagate VL since VL is already one. define @different_imm_vl_with_ta_1( %passthru, %a, %b, iXLen %vl1, iXLen %vl2) { ; CHECK-LABEL: different_imm_vl_with_ta_1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; CHECK-NEXT: vadd.vv v8, v10, v12 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %a, %b, iXLen 1) %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %v, %a, iXLen %vl1) ret %w } ; Propgate %vl2 to last instruction since it is may smaller than %vl1, ; it's still safe even %vl2 is larger than %vl1, becuase rest of the vector are ; undefined value. define @different_vl_with_ta( %a, %b, iXLen %vl1, iXLen %vl2) { ; CHECK-LABEL: different_vl_with_ta: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vadd.vv v10, v8, v10 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vadd.vv v8, v10, v8 ; CHECK-NEXT: ret %v = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %a, %b, iXLen %vl1) %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( poison, %v, %a,iXLen %vl2) ret %w } ; We can propagate VL to a tail-undisturbed policy, provided none of its users ; are passthrus (i.e. read past VL). define @different_vl_with_tu( %passthru, %a, %b, iXLen %vl1, iXLen %vl2) { ; CHECK-LABEL: different_vl_with_tu: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma ; CHECK-NEXT: vmv2r.v v14, v10 ; CHECK-NEXT: vadd.vv v14, v10, v12 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma ; CHECK-NEXT: vadd.vv v8, v14, v10 ; CHECK-NEXT: ret %v = call @llvm.riscv.vadd.nxv4i32.nxv4i32( %a, %a, %b, iXLen %vl1) %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( %passthru, %v, %a, iXLen %vl2) ret %w } ; We can propagate VL to a tail-undisturbed policy, provided none of its users ; are passthrus (i.e. read past VL). define @different_imm_vl_with_tu( %passthru, %a, %b, iXLen %vl1, iXLen %vl2) { ; CHECK-LABEL: different_imm_vl_with_tu: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; CHECK-NEXT: vmv2r.v v14, v10 ; CHECK-NEXT: vadd.vv v14, v10, v12 ; CHECK-NEXT: vadd.vv v8, v14, v10 ; CHECK-NEXT: ret %v = call @llvm.riscv.vadd.nxv4i32.nxv4i32( %a, %a, %b, iXLen 5) %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( %passthru, %v, %a, iXLen 4) ret %w } ; We can't reduce the VL as %v is used as a passthru, i.e. the elements past VL ; are demanded. define @different_vl_as_passthru( %a, %b, iXLen %vl1, iXLen %vl2) { ; CHECK-LABEL: different_vl_as_passthru: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma ; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vadd.vv v12, v8, v10 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma ; CHECK-NEXT: vadd.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret %v = call @llvm.riscv.vadd.nxv4i32.nxv4i32( %a, %a, %b, iXLen %vl1) %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( %v, %a, %b, iXLen %vl2) ret %w } ; We can't reduce the VL as %v is used as a passthru, i.e. the elements past VL ; are demanded. define @different_imm_vl_as_passthru( %a, %b, iXLen %vl1, iXLen %vl2) { ; CHECK-LABEL: different_imm_vl_as_passthru: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vadd.vv v12, v8, v10 ; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; CHECK-NEXT: vadd.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret %v = call @llvm.riscv.vadd.nxv4i32.nxv4i32( %a, %a, %b, iXLen 5) %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( %v, %a, %b, iXLen 4) ret %w } define @dont_optimize_tied_def( %a, %b, %c, iXLen %vl) { ; CHECK-LABEL: dont_optimize_tied_def: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma ; CHECK-NEXT: vwmacc.vv v8, v10, v11 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma ; CHECK-NEXT: vwmacc.vv v8, v10, v11 ; CHECK-NEXT: ret %1 = call @llvm.riscv.vwmacc.nxv4i32.nxv4i16( %a, %b, %c, iXLen -1, iXLen 0) %2 = call @llvm.riscv.vwmacc.nxv4i32.nxv4i16( %1, %b, %c, iXLen %vl, iXLen 0) ret %2 } define void @optimize_ternary_use( %a, %b, %c, ptr %p, iXLen %vl) { ; CHECK-LABEL: optimize_ternary_use: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vzext.vf2 v14, v8 ; CHECK-NEXT: vmadd.vv v14, v10, v12 ; CHECK-NEXT: vse32.v v14, (a0) ; CHECK-NEXT: ret %1 = zext %a to %2 = mul %b, %1 %3 = add %2, %c call void @llvm.riscv.vse( %3, ptr %p, iXLen %vl) ret void } ; This function has a copy between two vrm2 virtual registers, make sure we can ; reduce vl between it. define void @fadd_fcmp_select_copy( %v, %c, ptr %p, iXLen %vl) { ; CHECK-LABEL: fadd_fcmp_select_copy: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v8 ; CHECK-NEXT: fmv.w.x fa5, zero ; CHECK-NEXT: vmflt.vf v10, v8, fa5 ; CHECK-NEXT: vmand.mm v10, v0, v10 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: vsm.v v10, (a0) ; CHECK-NEXT: ret %fadd = fadd %v, %v %fcmp = fcmp olt %fadd, zeroinitializer %select = select %c, %fcmp, zeroinitializer call void @llvm.riscv.vse( %fadd, ptr %p, iXLen %vl) call void @llvm.riscv.vsm( %select, ptr %p, iXLen %vl) ret void } define @vcompress_cmp( %a, %b, %c, iXLen %vl) { ; CHECK-LABEL: vcompress_cmp: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmseq.vv v20, v8, v12 ; CHECK-NEXT: vcompress.vm v8, v16, v20 ; CHECK-NEXT: ret %cmp = icmp eq %a, %b %compress = call @llvm.riscv.vcompress.nxv8i32( poison, %c, %cmp, iXLen %vl) ret %compress } define @vcompress_add( %a, %b, %c, iXLen %vl) { ; CHECK-LABEL: vcompress_add: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vadd.vv v12, v8, v12 ; CHECK-NEXT: vcompress.vm v8, v12, v0 ; CHECK-NEXT: ret %add = add %a, %b %compress = call @llvm.riscv.vcompress.nxv8i32( poison, %add, %c, iXLen %vl) ret %compress } ; Make sure we peek through INSERT_SUBREG of tuple registers. define void @segmented_store_insert_subreg( %v0, %v1, %v2, ptr %p, iXLen %vl) { ; CHECK-LABEL: segmented_store_insert_subreg: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vfadd.vv v10, v8, v10 ; CHECK-NEXT: vsseg3e32.v v8, (a0) ; CHECK-NEXT: ret %fadd = fadd %v0, %v1 %t0 = call target("riscv.vector.tuple", , 3) @llvm.riscv.tuple.insert(target("riscv.vector.tuple", , 3) poison, %v0, i32 0) %t1 = call target("riscv.vector.tuple", , 3) @llvm.riscv.tuple.insert(target("riscv.vector.tuple", , 3) %t0, %fadd, i32 1) %t2 = call target("riscv.vector.tuple", , 3) @llvm.riscv.tuple.insert(target("riscv.vector.tuple", , 3) %t1, %v2, i32 2) call void @llvm.riscv.vsseg3(target("riscv.vector.tuple", , 3) %t2, ptr %p, iXLen %vl, iXLen 5) ret void } define void @recurrence( %v, ptr %p, iXLen %n, iXLen %vl) { ; CHECK-LABEL: recurrence: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: .LBB16_1: # %loop ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: vadd.vv v10, v10, v8 ; CHECK-NEXT: bnez a1, .LBB16_1 ; CHECK-NEXT: # %bb.2: # %exit ; CHECK-NEXT: vse32.v v10, (a0) ; CHECK-NEXT: ret entry: br label %loop loop: %iv = phi iXLen [ 0, %entry ], [ %iv.next, %loop ] %phi = phi [ zeroinitializer, %entry ], [ %x, %loop ] %x = add %phi, %v %iv.next = add iXLen %iv, 1 %done = icmp eq iXLen %iv.next, %n br i1 %done, label %exit, label %loop exit: call void @llvm.riscv.vse( %x, ptr %p, iXLen %vl) ret void } define void @recurrence_vleff( %v, ptr %p, iXLen %n, iXLen %vl) { ; CHECK-LABEL: recurrence_vleff: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: mv a3, a0 ; CHECK-NEXT: .LBB17_1: # %loop ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma ; CHECK-NEXT: vle32ff.v v10, (a3) ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: addi a3, a3, 4 ; CHECK-NEXT: bnez a1, .LBB17_1 ; CHECK-NEXT: # %bb.2: # %exit ; CHECK-NEXT: ret entry: br label %loop loop: %iv = phi iXLen [ 0, %entry ], [ %iv.next, %loop ] %phi = phi [ zeroinitializer, %entry ], [ %y, %loop ] %gep = getelementptr i32, ptr %p, iXLen %iv %vleff = call { , iXLen } @llvm.riscv.vleff( poison, ptr %gep, iXLen %vl) %vleff.x = extractvalue { , iXLen } %vleff, 0 %vleff.vl = extractvalue { , iXLen } %vleff, 1 %y = add %phi, %vleff.x call void @llvm.riscv.vse( %y, ptr %p, iXLen %vleff.vl) %iv.next = add iXLen %iv, 1 %done = icmp eq iXLen %iv.next, %n br i1 %done, label %exit, label %loop exit: ret void } define @join( %v, i1 %cond, iXLen %vl) { ; CHECK-LABEL: join: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, 1 ; CHECK-NEXT: beqz a0, .LBB18_2 ; CHECK-NEXT: # %bb.1: # %foo ; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, 1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB18_2: # %bar ; CHECK-NEXT: vadd.vi v8, v8, 2 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vadd( poison, %v, iXLen 1, iXLen -1) br i1 %cond, label %foo, label %bar foo: %b = call @llvm.riscv.vadd( poison, %a, iXLen 1, iXLen 1) ret %b bar: %c = call @llvm.riscv.vadd( poison, %a, iXLen 2, iXLen 2) ret %c }