diff options
author | Krzysztof Parzyszek <kparzysz@quicinc.com> | 2021-04-22 09:05:05 -0500 |
---|---|---|
committer | Krzysztof Parzyszek <kparzysz@quicinc.com> | 2021-04-22 11:49:29 -0500 |
commit | deda60fcaf0be162e893ff68d8d91355e3ac5542 (patch) | |
tree | 5a459a14bbd0c49bc7a5cc63bce2e11f8e1203cd | |
parent | 2b6f20082e8cf7552d2a4f641b27a8862f3af983 (diff) | |
download | llvm-deda60fcaf0be162e893ff68d8d91355e3ac5542.zip llvm-deda60fcaf0be162e893ff68d8d91355e3ac5542.tar.gz llvm-deda60fcaf0be162e893ff68d8d91355e3ac5542.tar.bz2 |
[Hexagon] Add HVX intrinsics for conditional vector loads/stores
Intrinsics for the following instructions are added. The intrinsic
name is "int_hexagon_<inst>[_128B]", e.g.
int_hexagon_V6_vL32b_pred_ai for 64-byte version
int_hexagon_V6_vL32b_pred_ai_128B for 128-byte version
V6_vL32b_pred_ai if (Pv4) Vd32 = vmem(Rt32+#s4)
V6_vL32b_pred_pi if (Pv4) Vd32 = vmem(Rx32++#s3)
V6_vL32b_pred_ppu if (Pv4) Vd32 = vmem(Rx32++Mu2)
V6_vL32b_npred_ai if (!Pv4) Vd32 = vmem(Rt32+#s4)
V6_vL32b_npred_pi if (!Pv4) Vd32 = vmem(Rx32++#s3)
V6_vL32b_npred_ppu if (!Pv4) Vd32 = vmem(Rx32++Mu2)
V6_vL32b_nt_pred_ai if (Pv4) Vd32 = vmem(Rt32+#s4):nt
V6_vL32b_nt_pred_pi if (Pv4) Vd32 = vmem(Rx32++#s3):nt
V6_vL32b_nt_pred_ppu if (Pv4) Vd32 = vmem(Rx32++Mu2):nt
V6_vL32b_nt_npred_ai if (!Pv4) Vd32 = vmem(Rt32+#s4):nt
V6_vL32b_nt_npred_pi if (!Pv4) Vd32 = vmem(Rx32++#s3):nt
V6_vL32b_nt_npred_ppu if (!Pv4) Vd32 = vmem(Rx32++Mu2):nt
V6_vS32b_pred_ai if (Pv4) vmem(Rt32+#s4) = Vs32
V6_vS32b_pred_pi if (Pv4) vmem(Rx32++#s3) = Vs32
V6_vS32b_pred_ppu if (Pv4) vmem(Rx32++Mu2) = Vs32
V6_vS32b_npred_ai if (!Pv4) vmem(Rt32+#s4) = Vs32
V6_vS32b_npred_pi if (!Pv4) vmem(Rx32++#s3) = Vs32
V6_vS32b_npred_ppu if (!Pv4) vmem(Rx32++Mu2) = Vs32
V6_vS32Ub_pred_ai if (Pv4) vmemu(Rt32+#s4) = Vs32
V6_vS32Ub_pred_pi if (Pv4) vmemu(Rx32++#s3) = Vs32
V6_vS32Ub_pred_ppu if (Pv4) vmemu(Rx32++Mu2) = Vs32
V6_vS32Ub_npred_ai if (!Pv4) vmemu(Rt32+#s4) = Vs32
V6_vS32Ub_npred_pi if (!Pv4) vmemu(Rx32++#s3) = Vs32
V6_vS32Ub_npred_ppu if (!Pv4) vmemu(Rx32++Mu2) = Vs32
V6_vS32b_nt_pred_ai if (Pv4) vmem(Rt32+#s4):nt = Vs32
V6_vS32b_nt_pred_pi if (Pv4) vmem(Rx32++#s3):nt = Vs32
V6_vS32b_nt_pred_ppu if (Pv4) vmem(Rx32++Mu2):nt = Vs32
V6_vS32b_nt_npred_ai if (!Pv4) vmem(Rt32+#s4):nt = Vs32
V6_vS32b_nt_npred_pi if (!Pv4) vmem(Rx32++#s3):nt = Vs32
V6_vS32b_nt_npred_ppu if (!Pv4) vmem(Rx32++Mu2):nt = Vs32
-rw-r--r-- | llvm/include/llvm/IR/IntrinsicsHexagon.td | 120 | ||||
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonIntrinsics.td | 70 | ||||
-rw-r--r-- | llvm/test/CodeGen/Hexagon/autohvx/pred-vmem-128b.ll | 679 | ||||
-rw-r--r-- | llvm/test/CodeGen/Hexagon/autohvx/pred-vmem-64b.ll | 679 |
4 files changed, 1548 insertions, 0 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsHexagon.td b/llvm/include/llvm/IR/IntrinsicsHexagon.td index fe16a36..212262c 100644 --- a/llvm/include/llvm/IR/IntrinsicsHexagon.td +++ b/llvm/include/llvm/IR/IntrinsicsHexagon.td @@ -253,6 +253,124 @@ Hexagon_v32i32_v32i32v16i32i64_rtt_Intrinsic<"HEXAGON_V6_vrmpyub_rtt_acc">; def int_hexagon_V6_vrmpyub_rtt_acc_128B : Hexagon_v64i32_v64i32v32i32i64_rtt_Intrinsic<"HEXAGON_V6_vrmpyub_rtt_acc_128B">; +// HVX conditional loads/stores + +class Hexagon_pred_vload_imm<LLVMType ValTy> + : Hexagon_NonGCC_Intrinsic< + [ValTy], + [llvm_i1_ty, LLVMPointerType<ValTy>, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly, NoCapture<ArgIndex<1>>, + ImmArg<ArgIndex<2>>]>; + +class Hexagon_pred_vload_imm_64B: Hexagon_pred_vload_imm<llvm_v16i32_ty>; +class Hexagon_pred_vload_imm_128B: Hexagon_pred_vload_imm<llvm_v32i32_ty>; + +def int_hexagon_V6_vL32b_pred_ai: Hexagon_pred_vload_imm_64B; +def int_hexagon_V6_vL32b_npred_ai: Hexagon_pred_vload_imm_64B; +def int_hexagon_V6_vL32b_nt_pred_ai: Hexagon_pred_vload_imm_64B; +def int_hexagon_V6_vL32b_nt_npred_ai: Hexagon_pred_vload_imm_64B; +def int_hexagon_V6_vL32b_pred_ai_128B: Hexagon_pred_vload_imm_128B; +def int_hexagon_V6_vL32b_npred_ai_128B: Hexagon_pred_vload_imm_128B; +def int_hexagon_V6_vL32b_nt_pred_ai_128B: Hexagon_pred_vload_imm_128B; +def int_hexagon_V6_vL32b_nt_npred_ai_128B: Hexagon_pred_vload_imm_128B; + +class Hexagom_pred_vload_upd<LLVMType ValTy, bit TakesImm> + : Hexagon_NonGCC_Intrinsic< + [ValTy, LLVMPointerType<ValTy>], + [llvm_i1_ty, LLVMPointerType<ValTy>, llvm_i32_ty], + !if(TakesImm, + [IntrReadMem, IntrArgMemOnly, NoCapture<ArgIndex<1>>, + ImmArg<ArgIndex<2>>], + [IntrReadMem, IntrArgMemOnly, NoCapture<ArgIndex<1>>])>; + +class Hexagom_pred_vload_upd_64B<bit TakesImm> + : Hexagom_pred_vload_upd<llvm_v16i32_ty, TakesImm>; +class Hexagom_pred_vload_upd_128B<bit TakesImm> + : Hexagom_pred_vload_upd<llvm_v32i32_ty, TakesImm>; + +def int_hexagon_V6_vL32b_pred_pi: Hexagom_pred_vload_upd_64B<1>; +def int_hexagon_V6_vL32b_npred_pi: Hexagom_pred_vload_upd_64B<1>; +def int_hexagon_V6_vL32b_nt_pred_pi: Hexagom_pred_vload_upd_64B<1>; +def int_hexagon_V6_vL32b_nt_npred_pi: Hexagom_pred_vload_upd_64B<1>; +def int_hexagon_V6_vL32b_pred_pi_128B: Hexagom_pred_vload_upd_128B<1>; +def int_hexagon_V6_vL32b_npred_pi_128B: Hexagom_pred_vload_upd_128B<1>; +def int_hexagon_V6_vL32b_nt_pred_pi_128B: Hexagom_pred_vload_upd_128B<1>; +def int_hexagon_V6_vL32b_nt_npred_pi_128B: Hexagom_pred_vload_upd_128B<1>; + +def int_hexagon_V6_vL32b_pred_ppu: Hexagom_pred_vload_upd_64B<0>; +def int_hexagon_V6_vL32b_npred_ppu: Hexagom_pred_vload_upd_64B<0>; +def int_hexagon_V6_vL32b_nt_pred_ppu: Hexagom_pred_vload_upd_64B<0>; +def int_hexagon_V6_vL32b_nt_npred_ppu: Hexagom_pred_vload_upd_64B<0>; +def int_hexagon_V6_vL32b_pred_ppu_128B: Hexagom_pred_vload_upd_128B<0>; +def int_hexagon_V6_vL32b_npred_ppu_128B: Hexagom_pred_vload_upd_128B<0>; +def int_hexagon_V6_vL32b_nt_pred_ppu_128B: Hexagom_pred_vload_upd_128B<0>; +def int_hexagon_V6_vL32b_nt_npred_ppu_128B: Hexagom_pred_vload_upd_128B<0>; + + +class Hexagon_pred_vstore_imm<LLVMType ValTy> + : Hexagon_NonGCC_Intrinsic< + [], + [llvm_i1_ty, LLVMPointerType<ValTy>, llvm_i32_ty, ValTy], + [IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<1>>, + ImmArg<ArgIndex<2>>]>; + +class Hexagon_pred_vstore_imm_64B: Hexagon_pred_vstore_imm<llvm_v16i32_ty>; +class Hexagon_pred_vstore_imm_128B: Hexagon_pred_vstore_imm<llvm_v32i32_ty>; + +def int_hexagon_V6_vS32b_pred_ai: Hexagon_pred_vstore_imm_64B; +def int_hexagon_V6_vS32b_npred_ai: Hexagon_pred_vstore_imm_64B; +def int_hexagon_V6_vS32Ub_pred_ai: Hexagon_pred_vstore_imm_64B; +def int_hexagon_V6_vS32Ub_npred_ai: Hexagon_pred_vstore_imm_64B; +def int_hexagon_V6_vS32b_nt_pred_ai: Hexagon_pred_vstore_imm_64B; +def int_hexagon_V6_vS32b_nt_npred_ai: Hexagon_pred_vstore_imm_64B; +def int_hexagon_V6_vS32b_pred_ai_128B: Hexagon_pred_vstore_imm_128B; +def int_hexagon_V6_vS32b_npred_ai_128B: Hexagon_pred_vstore_imm_128B; +def int_hexagon_V6_vS32Ub_pred_ai_128B: Hexagon_pred_vstore_imm_128B; +def int_hexagon_V6_vS32Ub_npred_ai_128B: Hexagon_pred_vstore_imm_128B; +def int_hexagon_V6_vS32b_nt_pred_ai_128B: Hexagon_pred_vstore_imm_128B; +def int_hexagon_V6_vS32b_nt_npred_ai_128B: Hexagon_pred_vstore_imm_128B; + +class Hexagon_pred_vstore_upd<LLVMType ValTy, bit TakesImm> + : Hexagon_NonGCC_Intrinsic< + [LLVMPointerType<ValTy>], + [llvm_i1_ty, LLVMPointerType<ValTy>, llvm_i32_ty, ValTy], + !if(TakesImm, + [IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<1>>, + ImmArg<ArgIndex<2>>], + [IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<1>>])>; + +class Hexagon_pred_vstore_upd_64B<bit TakesImm> + : Hexagon_pred_vstore_upd<llvm_v16i32_ty, TakesImm>; +class Hexagon_pred_vstore_upd_128B<bit TakesImm> + : Hexagon_pred_vstore_upd<llvm_v32i32_ty, TakesImm>; + +def int_hexagon_V6_vS32b_pred_pi: Hexagon_pred_vstore_upd_64B<1>; +def int_hexagon_V6_vS32b_npred_pi: Hexagon_pred_vstore_upd_64B<1>; +def int_hexagon_V6_vS32Ub_pred_pi: Hexagon_pred_vstore_upd_64B<1>; +def int_hexagon_V6_vS32Ub_npred_pi: Hexagon_pred_vstore_upd_64B<1>; +def int_hexagon_V6_vS32b_nt_pred_pi: Hexagon_pred_vstore_upd_64B<1>; +def int_hexagon_V6_vS32b_nt_npred_pi: Hexagon_pred_vstore_upd_64B<1>; +def int_hexagon_V6_vS32b_pred_pi_128B: Hexagon_pred_vstore_upd_128B<1>; +def int_hexagon_V6_vS32b_npred_pi_128B: Hexagon_pred_vstore_upd_128B<1>; +def int_hexagon_V6_vS32Ub_pred_pi_128B: Hexagon_pred_vstore_upd_128B<1>; +def int_hexagon_V6_vS32Ub_npred_pi_128B: Hexagon_pred_vstore_upd_128B<1>; +def int_hexagon_V6_vS32b_nt_pred_pi_128B: Hexagon_pred_vstore_upd_128B<1>; +def int_hexagon_V6_vS32b_nt_npred_pi_128B: Hexagon_pred_vstore_upd_128B<1>; + +def int_hexagon_V6_vS32b_pred_ppu: Hexagon_pred_vstore_upd_64B<0>; +def int_hexagon_V6_vS32b_npred_ppu: Hexagon_pred_vstore_upd_64B<0>; +def int_hexagon_V6_vS32Ub_pred_ppu: Hexagon_pred_vstore_upd_64B<0>; +def int_hexagon_V6_vS32Ub_npred_ppu: Hexagon_pred_vstore_upd_64B<0>; +def int_hexagon_V6_vS32b_nt_pred_ppu: Hexagon_pred_vstore_upd_64B<0>; +def int_hexagon_V6_vS32b_nt_npred_ppu: Hexagon_pred_vstore_upd_64B<0>; +def int_hexagon_V6_vS32b_pred_ppu_128B: Hexagon_pred_vstore_upd_128B<0>; +def int_hexagon_V6_vS32b_npred_ppu_128B: Hexagon_pred_vstore_upd_128B<0>; +def int_hexagon_V6_vS32Ub_pred_ppu_128B: Hexagon_pred_vstore_upd_128B<0>; +def int_hexagon_V6_vS32Ub_npred_ppu_128B: Hexagon_pred_vstore_upd_128B<0>; +def int_hexagon_V6_vS32b_nt_pred_ppu_128B: Hexagon_pred_vstore_upd_128B<0>; +def int_hexagon_V6_vS32b_nt_npred_ppu_128B: Hexagon_pred_vstore_upd_128B<0>; + + // HVX Vector predicate casts. // These intrinsics do not emit (nor do they correspond to) any instructions, // they are no-ops. @@ -265,6 +383,8 @@ Hexagon_NonGCC_Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; // Masked vector stores // +// These are all deprecated, the intrinsics matching instruction names +// should be used instead, e.g. int_hexagon_V6_vS32b_qpred_ai, etc. class Hexagon_custom_vms_Intrinsic : Hexagon_NonGCC_Intrinsic< diff --git a/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/llvm/lib/Target/Hexagon/HexagonIntrinsics.td index 10d0261..68d3e6f 100644 --- a/llvm/lib/Target/Hexagon/HexagonIntrinsics.td +++ b/llvm/lib/Target/Hexagon/HexagonIntrinsics.td @@ -402,4 +402,74 @@ def: Pat<(int_hexagon_V6_vscattermhw_add_128B IntRegs:$src1, ModRegs:$src2, HvxW def: Pat<(int_hexagon_V6_vscattermhwq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5), (V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>; +multiclass T_pRI_pat<InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID PredRegs:$P, IntRegs:$R, timm:$s), + (MI PredRegs:$P, IntRegs:$R, imm:$s)>; + def: Pat<(!cast<Intrinsic>(IntID#"_128B") + PredRegs:$P, IntRegs:$R, timm:$s), + (MI PredRegs:$P, IntRegs:$R, imm:$s)>; +} + +multiclass T_pRM_pat<InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID PredRegs:$P, IntRegs:$R, ModRegs:$M), + (MI PredRegs:$P, IntRegs:$R, ModRegs:$M)>; + def: Pat<(!cast<Intrinsic>(IntID#"_128B") + PredRegs:$P, IntRegs:$R, ModRegs:$M), + (MI PredRegs:$P, IntRegs:$R, ModRegs:$M)>; +} + +let Predicates = [HasV62, UseHVX] in { + defm: T_pRI_pat<V6_vL32b_pred_ai, int_hexagon_V6_vL32b_pred_ai>; + defm: T_pRI_pat<V6_vL32b_npred_ai, int_hexagon_V6_vL32b_npred_ai>; + defm: T_pRI_pat<V6_vL32b_pred_pi, int_hexagon_V6_vL32b_pred_pi>; + defm: T_pRI_pat<V6_vL32b_npred_pi, int_hexagon_V6_vL32b_npred_pi>; + defm: T_pRI_pat<V6_vL32b_nt_pred_ai, int_hexagon_V6_vL32b_nt_pred_ai>; + defm: T_pRI_pat<V6_vL32b_nt_npred_ai, int_hexagon_V6_vL32b_nt_npred_ai>; + defm: T_pRI_pat<V6_vL32b_nt_pred_pi, int_hexagon_V6_vL32b_nt_pred_pi>; + defm: T_pRI_pat<V6_vL32b_nt_npred_pi, int_hexagon_V6_vL32b_nt_npred_pi>; + + defm: T_pRM_pat<V6_vL32b_pred_ppu, int_hexagon_V6_vL32b_pred_ppu>; + defm: T_pRM_pat<V6_vL32b_npred_ppu, int_hexagon_V6_vL32b_npred_ppu>; + defm: T_pRM_pat<V6_vL32b_nt_pred_ppu, int_hexagon_V6_vL32b_nt_pred_ppu>; + defm: T_pRM_pat<V6_vL32b_nt_npred_ppu, int_hexagon_V6_vL32b_nt_npred_ppu>; +} + +multiclass T_pRIV_pat<InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID PredRegs:$P, IntRegs:$R, timm:$s, HvxVR:$V), + (MI PredRegs:$P, IntRegs:$R, imm:$s, HvxVR:$V)>; + def: Pat<(!cast<Intrinsic>(IntID#"_128B") + PredRegs:$P, IntRegs:$R, timm:$s, HvxVR:$V), + (MI PredRegs:$P, IntRegs:$R, imm:$s, HvxVR:$V)>; +} + +multiclass T_pRMV_pat<InstHexagon MI, Intrinsic IntID> { + def: Pat<(IntID PredRegs:$P, IntRegs:$R, ModRegs:$M, HvxVR:$V), + (MI PredRegs:$P, IntRegs:$R, ModRegs:$M, HvxVR:$V)>; + def: Pat<(!cast<Intrinsic>(IntID#"_128B") + PredRegs:$P, IntRegs:$R, ModRegs:$M, HvxVR:$V), + (MI PredRegs:$P, IntRegs:$R, ModRegs:$M, HvxVR:$V)>; +} + +let Predicates = [HasV60, UseHVX] in { + defm: T_pRIV_pat<V6_vS32b_pred_ai, int_hexagon_V6_vS32b_pred_ai>; + defm: T_pRIV_pat<V6_vS32b_npred_ai, int_hexagon_V6_vS32b_npred_ai>; + defm: T_pRIV_pat<V6_vS32b_pred_pi, int_hexagon_V6_vS32b_pred_pi>; + defm: T_pRIV_pat<V6_vS32b_npred_pi, int_hexagon_V6_vS32b_npred_pi>; + defm: T_pRIV_pat<V6_vS32Ub_pred_ai, int_hexagon_V6_vS32Ub_pred_ai>; + defm: T_pRIV_pat<V6_vS32Ub_npred_ai, int_hexagon_V6_vS32Ub_npred_ai>; + defm: T_pRIV_pat<V6_vS32Ub_pred_pi, int_hexagon_V6_vS32Ub_pred_pi>; + defm: T_pRIV_pat<V6_vS32Ub_npred_pi, int_hexagon_V6_vS32Ub_npred_pi>; + defm: T_pRIV_pat<V6_vS32b_nt_pred_ai, int_hexagon_V6_vS32b_nt_pred_ai>; + defm: T_pRIV_pat<V6_vS32b_nt_npred_ai, int_hexagon_V6_vS32b_nt_npred_ai>; + defm: T_pRIV_pat<V6_vS32b_nt_pred_pi, int_hexagon_V6_vS32b_nt_pred_pi>; + defm: T_pRIV_pat<V6_vS32b_nt_npred_pi, int_hexagon_V6_vS32b_nt_npred_pi>; + + defm: T_pRMV_pat<V6_vS32b_pred_ppu, int_hexagon_V6_vS32b_pred_ppu>; + defm: T_pRMV_pat<V6_vS32b_npred_ppu, int_hexagon_V6_vS32b_npred_ppu>; + defm: T_pRMV_pat<V6_vS32Ub_pred_ppu, int_hexagon_V6_vS32Ub_pred_ppu>; + defm: T_pRMV_pat<V6_vS32Ub_npred_ppu, int_hexagon_V6_vS32Ub_npred_ppu>; + defm: T_pRMV_pat<V6_vS32b_nt_pred_ppu, int_hexagon_V6_vS32b_nt_pred_ppu>; + defm: T_pRMV_pat<V6_vS32b_nt_npred_ppu, int_hexagon_V6_vS32b_nt_npred_ppu>; +} + include "HexagonDepMapAsm2Intrin.td" diff --git a/llvm/test/CodeGen/Hexagon/autohvx/pred-vmem-128b.ll b/llvm/test/CodeGen/Hexagon/autohvx/pred-vmem-128b.ll new file mode 100644 index 0000000..c7f052b --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/pred-vmem-128b.ll @@ -0,0 +1,679 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=hexagon < %s | FileCheck %s + +declare <32 x i32> @llvm.hexagon.V6.vL32b.pred.ai.128B(i1, <32 x i32>*, i32) +declare <32 x i32> @llvm.hexagon.V6.vL32b.npred.ai.128B(i1, <32 x i32>*, i32) +declare <32 x i32> @llvm.hexagon.V6.vL32b.nt.pred.ai.128B(i1, <32 x i32>*, i32) +declare <32 x i32> @llvm.hexagon.V6.vL32b.nt.npred.ai.128B(i1, <32 x i32>*, i32) + +declare { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.pred.pi.128B(i1, <32 x i32>*, i32) +declare { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.npred.pi.128B(i1, <32 x i32>*, i32) +declare { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.nt.pred.pi.128B(i1, <32 x i32>*, i32) +declare { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.nt.npred.pi.128B(i1, <32 x i32>*, i32) + +declare { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.pred.ppu.128B(i1, <32 x i32>*, i32) +declare { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.npred.ppu.128B(i1, <32 x i32>*, i32) +declare { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.nt.pred.ppu.128B(i1, <32 x i32>*, i32) +declare { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.nt.npred.ppu.128B(i1, <32 x i32>*, i32) + +declare void @llvm.hexagon.V6.vS32b.pred.ai.128B(i1, <32 x i32>*, i32, <32 x i32>) +declare void @llvm.hexagon.V6.vS32b.npred.ai.128B(i1, <32 x i32>*, i32, <32 x i32>) +declare void @llvm.hexagon.V6.vS32Ub.pred.ai.128B(i1, <32 x i32>*, i32, <32 x i32>) +declare void @llvm.hexagon.V6.vS32Ub.npred.ai.128B(i1, <32 x i32>*, i32, <32 x i32>) +declare void @llvm.hexagon.V6.vS32b.nt.pred.ai.128B(i1, <32 x i32>*, i32, <32 x i32>) +declare void @llvm.hexagon.V6.vS32b.nt.npred.ai.128B(i1, <32 x i32>*, i32, <32 x i32>) + +declare <32 x i32>* @llvm.hexagon.V6.vS32b.pred.pi.128B(i1, <32 x i32>*, i32, <32 x i32>) +declare <32 x i32>* @llvm.hexagon.V6.vS32b.npred.pi.128B(i1, <32 x i32>*, i32, <32 x i32>) +declare <32 x i32>* @llvm.hexagon.V6.vS32Ub.pred.pi.128B(i1, <32 x i32>*, i32, <32 x i32>) +declare <32 x i32>* @llvm.hexagon.V6.vS32Ub.npred.pi.128B(i1, <32 x i32>*, i32, <32 x i32>) +declare <32 x i32>* @llvm.hexagon.V6.vS32b.nt.pred.pi.128B(i1, <32 x i32>*, i32, <32 x i32>) +declare <32 x i32>* @llvm.hexagon.V6.vS32b.nt.npred.pi.128B(i1, <32 x i32>*, i32, <32 x i32>) + +declare <32 x i32>* @llvm.hexagon.V6.vS32b.pred.ppu.128B(i1, <32 x i32>*, i32, <32 x i32>) +declare <32 x i32>* @llvm.hexagon.V6.vS32b.npred.ppu.128B(i1, <32 x i32>*, i32, <32 x i32>) +declare <32 x i32>* @llvm.hexagon.V6.vS32Ub.pred.ppu.128B(i1, <32 x i32>*, i32, <32 x i32>) +declare <32 x i32>* @llvm.hexagon.V6.vS32Ub.npred.ppu.128B(i1, <32 x i32>*, i32, <32 x i32>) +declare <32 x i32>* @llvm.hexagon.V6.vS32b.nt.pred.ppu.128B(i1, <32 x i32>*, i32, <32 x i32>) +declare <32 x i32>* @llvm.hexagon.V6.vS32b.nt.npred.ppu.128B(i1, <32 x i32>*, i32, <32 x i32>) + + +define <32 x i32> @f0(i32 %a0, <32 x i32>* %a1) #0 { +; CHECK-LABEL: f0: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) v0 = vmem(r1+#3) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <32 x i32> @llvm.hexagon.V6.vL32b.pred.ai.128B(i1 %v0, <32 x i32>* %a1, i32 384) + ret <32 x i32> %v1 +} + +define <32 x i32> @f1(i32 %a0, <32 x i32>* %a1) #0 { +; CHECK-LABEL: f1: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) v0 = vmem(r1+#3) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <32 x i32> @llvm.hexagon.V6.vL32b.npred.ai.128B(i1 %v0, <32 x i32>* %a1, i32 384) + ret <32 x i32> %v1 +} + +define <32 x i32> @f2(i32 %a0, <32 x i32>* %a1) #0 { +; CHECK-LABEL: f2: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) v0 = vmem(r1+#3):nt +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <32 x i32> @llvm.hexagon.V6.vL32b.nt.pred.ai.128B(i1 %v0, <32 x i32>* %a1, i32 384) + ret <32 x i32> %v1 +} + +define <32 x i32> @f3(i32 %a0, <32 x i32>* %a1) #0 { +; CHECK-LABEL: f3: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) v0 = vmem(r1+#3):nt +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <32 x i32> @llvm.hexagon.V6.vL32b.nt.npred.ai.128B(i1 %v0, <32 x i32>* %a1, i32 384) + ret <32 x i32> %v1 +} + +define <32 x i32>* @f4(i32 %a0, <32 x i32>* %a1) #0 { +; CHECK-LABEL: f4: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) v0 = vmem(r0++#3) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.pred.pi.128B(i1 %v0, <32 x i32>* %a1, i32 384) + %v2 = extractvalue { <32 x i32>, <32 x i32>* } %v1, 1 + ret <32 x i32>* %v2 +} + +define <32 x i32>* @f5(i32 %a0, <32 x i32>* %a1) #0 { +; CHECK-LABEL: f5: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) v0 = vmem(r0++#3) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.npred.pi.128B(i1 %v0, <32 x i32>* %a1, i32 384) + %v2 = extractvalue { <32 x i32>, <32 x i32>* } %v1, 1 + ret <32 x i32>* %v2 +} + +define <32 x i32>* @f6(i32 %a0, <32 x i32>* %a1) #0 { +; CHECK-LABEL: f6: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) v0 = vmem(r0++#3):nt +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.nt.pred.pi.128B(i1 %v0, <32 x i32>* %a1, i32 384) + %v2 = extractvalue { <32 x i32>, <32 x i32>* } %v1, 1 + ret <32 x i32>* %v2 +} + +define <32 x i32>* @f7(i32 %a0, <32 x i32>* %a1) #0 { +; CHECK-LABEL: f7: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) v0 = vmem(r0++#3):nt +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.nt.npred.pi.128B(i1 %v0, <32 x i32>* %a1, i32 384) + %v2 = extractvalue { <32 x i32>, <32 x i32>* } %v1, 1 + ret <32 x i32>* %v2 +} + +define <32 x i32>* @f8(i32 %a0, <32 x i32>* %a1, i32 %a2) #0 { +; CHECK-LABEL: f8: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: m0 = r2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) v0 = vmem(r0++m0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.pred.ppu.128B(i1 %v0, <32 x i32>* %a1, i32 %a2) + %v2 = extractvalue { <32 x i32>, <32 x i32>* } %v1, 1 + ret <32 x i32>* %v2 +} + +define <32 x i32>* @f9(i32 %a0, <32 x i32>* %a1, i32 %a2) #0 { +; CHECK-LABEL: f9: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: m0 = r2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) v0 = vmem(r0++m0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.npred.ppu.128B(i1 %v0, <32 x i32>* %a1, i32 %a2) + %v2 = extractvalue { <32 x i32>, <32 x i32>* } %v1, 1 + ret <32 x i32>* %v2 +} + +define <32 x i32>* @f10(i32 %a0, <32 x i32>* %a1, i32 %a2) #0 { +; CHECK-LABEL: f10: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: m0 = r2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) v0 = vmem(r0++m0):nt +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.nt.pred.ppu.128B(i1 %v0, <32 x i32>* %a1, i32 %a2) + %v2 = extractvalue { <32 x i32>, <32 x i32>* } %v1, 1 + ret <32 x i32>* %v2 +} + +define <32 x i32>* @f11(i32 %a0, <32 x i32>* %a1, i32 %a2) #0 { +; CHECK-LABEL: f11: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: m0 = r2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) v0 = vmem(r0++m0):nt +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call { <32 x i32>, <32 x i32>* } @llvm.hexagon.V6.vL32b.nt.npred.ppu.128B(i1 %v0, <32 x i32>* %a1, i32 %a2) + %v2 = extractvalue { <32 x i32>, <32 x i32>* } %v1, 1 + ret <32 x i32>* %v2 +} + +define void @f12(i32 %a0, <32 x i32>* %a1, <32 x i32> %a2) #0 { +; CHECK-LABEL: f12: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) vmem(r1+#-3) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + call void @llvm.hexagon.V6.vS32b.pred.ai.128B(i1 %v0, <32 x i32>* %a1, i32 -384, <32 x i32> %a2) + ret void +} + +define void @f13(i32 %a0, <32 x i32>* %a1, <32 x i32> %a2) #0 { +; CHECK-LABEL: f13: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) vmem(r1+#-3) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + call void @llvm.hexagon.V6.vS32b.npred.ai.128B(i1 %v0, <32 x i32>* %a1, i32 -384, <32 x i32> %a2) + ret void +} + +define void @f14(i32 %a0, <32 x i32>* %a1, <32 x i32> %a2) #0 { +; CHECK-LABEL: f14: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) vmemu(r1+#-3) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + call void @llvm.hexagon.V6.vS32Ub.pred.ai.128B(i1 %v0, <32 x i32>* %a1, i32 -384, <32 x i32> %a2) + ret void +} + +define void @f15(i32 %a0, <32 x i32>* %a1, <32 x i32> %a2) #0 { +; CHECK-LABEL: f15: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) vmemu(r1+#-3) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + call void @llvm.hexagon.V6.vS32Ub.npred.ai.128B(i1 %v0, <32 x i32>* %a1, i32 -384, <32 x i32> %a2) + ret void +} + +define void @f16(i32 %a0, <32 x i32>* %a1, <32 x i32> %a2) #0 { +; CHECK-LABEL: f16: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) vmem(r1+#-3):nt = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + call void @llvm.hexagon.V6.vS32b.nt.pred.ai.128B(i1 %v0, <32 x i32>* %a1, i32 -384, <32 x i32> %a2) + ret void +} + +define void @f17(i32 %a0, <32 x i32>* %a1, <32 x i32> %a2) #0 { +; CHECK-LABEL: f17: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) vmem(r1+#-3):nt = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + call void @llvm.hexagon.V6.vS32b.nt.npred.ai.128B(i1 %v0, <32 x i32>* %a1, i32 -384, <32 x i32> %a2) + ret void +} + +define <32 x i32>* @f18(i32 %a0, <32 x i32>* %a1, <32 x i32> %a2) #0 { +; CHECK-LABEL: f18: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) vmem(r0++#-3) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <32 x i32>* @llvm.hexagon.V6.vS32b.pred.pi.128B(i1 %v0, <32 x i32>* %a1, i32 -384, <32 x i32> %a2) + ret <32 x i32>* %v1 +} + +define <32 x i32>* @f19(i32 %a0, <32 x i32>* %a1, <32 x i32> %a2) #0 { +; CHECK-LABEL: f19: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) vmem(r0++#-3) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <32 x i32>* @llvm.hexagon.V6.vS32b.npred.pi.128B(i1 %v0, <32 x i32>* %a1, i32 -384, <32 x i32> %a2) + ret <32 x i32>* %v1 +} + +define <32 x i32>* @f20(i32 %a0, <32 x i32>* %a1, <32 x i32> %a2) #0 { +; CHECK-LABEL: f20: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) vmemu(r0++#-3) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <32 x i32>* @llvm.hexagon.V6.vS32Ub.pred.pi.128B(i1 %v0, <32 x i32>* %a1, i32 -384, <32 x i32> %a2) + ret <32 x i32>* %v1 +} + +define <32 x i32>* @f21(i32 %a0, <32 x i32>* %a1, <32 x i32> %a2) #0 { +; CHECK-LABEL: f21: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) vmemu(r0++#-3) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <32 x i32>* @llvm.hexagon.V6.vS32Ub.npred.pi.128B(i1 %v0, <32 x i32>* %a1, i32 -384, <32 x i32> %a2) + ret <32 x i32>* %v1 +} + +define <32 x i32>* @f22(i32 %a0, <32 x i32>* %a1, <32 x i32> %a2) #0 { +; CHECK-LABEL: f22: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) vmem(r0++#-3):nt = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <32 x i32>* @llvm.hexagon.V6.vS32b.nt.pred.pi.128B(i1 %v0, <32 x i32>* %a1, i32 -384, <32 x i32> %a2) + ret <32 x i32>* %v1 +} + +define <32 x i32>* @f23(i32 %a0, <32 x i32>* %a1, <32 x i32> %a2) #0 { +; CHECK-LABEL: f23: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) vmem(r0++#-3):nt = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <32 x i32>* @llvm.hexagon.V6.vS32b.nt.npred.pi.128B(i1 %v0, <32 x i32>* %a1, i32 -384, <32 x i32> %a2) + ret <32 x i32>* %v1 +} + +define <32 x i32>* @f24(i32 %a0, <32 x i32>* %a1, i32 %a2, <32 x i32> %a3) #0 { +; CHECK-LABEL: f24: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: m0 = r2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) vmem(r0++m0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <32 x i32>* @llvm.hexagon.V6.vS32b.pred.ppu.128B(i1 %v0, <32 x i32>* %a1, i32 %a2, <32 x i32> %a3) + ret <32 x i32>* %v1 +} + +define <32 x i32>* @f25(i32 %a0, <32 x i32>* %a1, i32 %a2, <32 x i32> %a3) #0 { +; CHECK-LABEL: f25: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: m0 = r2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) vmem(r0++m0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <32 x i32>* @llvm.hexagon.V6.vS32b.npred.ppu.128B(i1 %v0, <32 x i32>* %a1, i32 %a2, <32 x i32> %a3) + ret <32 x i32>* %v1 +} + +define <32 x i32>* @f26(i32 %a0, <32 x i32>* %a1, i32 %a2, <32 x i32> %a3) #0 { +; CHECK-LABEL: f26: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: m0 = r2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) vmemu(r0++m0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <32 x i32>* @llvm.hexagon.V6.vS32Ub.pred.ppu.128B(i1 %v0, <32 x i32>* %a1, i32 %a2, <32 x i32> %a3) + ret <32 x i32>* %v1 +} + +define <32 x i32>* @f27(i32 %a0, <32 x i32>* %a1, i32 %a2, <32 x i32> %a3) #0 { +; CHECK-LABEL: f27: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: m0 = r2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) vmemu(r0++m0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <32 x i32>* @llvm.hexagon.V6.vS32Ub.npred.ppu.128B(i1 %v0, <32 x i32>* %a1, i32 %a2, <32 x i32> %a3) + ret <32 x i32>* %v1 +} + +define <32 x i32>* @f28(i32 %a0, <32 x i32>* %a1, i32 %a2, <32 x i32> %a3) #0 { +; CHECK-LABEL: f28: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: m0 = r2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) vmem(r0++m0):nt = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <32 x i32>* @llvm.hexagon.V6.vS32b.nt.pred.ppu.128B(i1 %v0, <32 x i32>* %a1, i32 %a2, <32 x i32> %a3) + ret <32 x i32>* %v1 +} + +define <32 x i32>* @f29(i32 %a0, <32 x i32>* %a1, i32 %a2, <32 x i32> %a3) #0 { +; CHECK-LABEL: f29: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: m0 = r2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) vmem(r0++m0):nt = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <32 x i32>* @llvm.hexagon.V6.vS32b.nt.npred.ppu.128B(i1 %v0, <32 x i32>* %a1, i32 %a2, <32 x i32> %a3) + ret <32 x i32>* %v1 +} + +attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+hvxv66,+hvx-length128b,-packets" } diff --git a/llvm/test/CodeGen/Hexagon/autohvx/pred-vmem-64b.ll b/llvm/test/CodeGen/Hexagon/autohvx/pred-vmem-64b.ll new file mode 100644 index 0000000..8fd6f26 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/pred-vmem-64b.ll @@ -0,0 +1,679 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=hexagon < %s | FileCheck %s + +declare <16 x i32> @llvm.hexagon.V6.vL32b.pred.ai(i1, <16 x i32>*, i32) +declare <16 x i32> @llvm.hexagon.V6.vL32b.npred.ai(i1, <16 x i32>*, i32) +declare <16 x i32> @llvm.hexagon.V6.vL32b.nt.pred.ai(i1, <16 x i32>*, i32) +declare <16 x i32> @llvm.hexagon.V6.vL32b.nt.npred.ai(i1, <16 x i32>*, i32) + +declare { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.pred.pi(i1, <16 x i32>*, i32) +declare { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.npred.pi(i1, <16 x i32>*, i32) +declare { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.nt.pred.pi(i1, <16 x i32>*, i32) +declare { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.nt.npred.pi(i1, <16 x i32>*, i32) + +declare { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.pred.ppu(i1, <16 x i32>*, i32) +declare { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.npred.ppu(i1, <16 x i32>*, i32) +declare { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.nt.pred.ppu(i1, <16 x i32>*, i32) +declare { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.nt.npred.ppu(i1, <16 x i32>*, i32) + +declare void @llvm.hexagon.V6.vS32b.pred.ai(i1, <16 x i32>*, i32, <16 x i32>) +declare void @llvm.hexagon.V6.vS32b.npred.ai(i1, <16 x i32>*, i32, <16 x i32>) +declare void @llvm.hexagon.V6.vS32Ub.pred.ai(i1, <16 x i32>*, i32, <16 x i32>) +declare void @llvm.hexagon.V6.vS32Ub.npred.ai(i1, <16 x i32>*, i32, <16 x i32>) +declare void @llvm.hexagon.V6.vS32b.nt.pred.ai(i1, <16 x i32>*, i32, <16 x i32>) +declare void @llvm.hexagon.V6.vS32b.nt.npred.ai(i1, <16 x i32>*, i32, <16 x i32>) + +declare <16 x i32>* @llvm.hexagon.V6.vS32b.pred.pi(i1, <16 x i32>*, i32, <16 x i32>) +declare <16 x i32>* @llvm.hexagon.V6.vS32b.npred.pi(i1, <16 x i32>*, i32, <16 x i32>) +declare <16 x i32>* @llvm.hexagon.V6.vS32Ub.pred.pi(i1, <16 x i32>*, i32, <16 x i32>) +declare <16 x i32>* @llvm.hexagon.V6.vS32Ub.npred.pi(i1, <16 x i32>*, i32, <16 x i32>) +declare <16 x i32>* @llvm.hexagon.V6.vS32b.nt.pred.pi(i1, <16 x i32>*, i32, <16 x i32>) +declare <16 x i32>* @llvm.hexagon.V6.vS32b.nt.npred.pi(i1, <16 x i32>*, i32, <16 x i32>) + +declare <16 x i32>* @llvm.hexagon.V6.vS32b.pred.ppu(i1, <16 x i32>*, i32, <16 x i32>) +declare <16 x i32>* @llvm.hexagon.V6.vS32b.npred.ppu(i1, <16 x i32>*, i32, <16 x i32>) +declare <16 x i32>* @llvm.hexagon.V6.vS32Ub.pred.ppu(i1, <16 x i32>*, i32, <16 x i32>) +declare <16 x i32>* @llvm.hexagon.V6.vS32Ub.npred.ppu(i1, <16 x i32>*, i32, <16 x i32>) +declare <16 x i32>* @llvm.hexagon.V6.vS32b.nt.pred.ppu(i1, <16 x i32>*, i32, <16 x i32>) +declare <16 x i32>* @llvm.hexagon.V6.vS32b.nt.npred.ppu(i1, <16 x i32>*, i32, <16 x i32>) + + +define <16 x i32> @f0(i32 %a0, <16 x i32>* %a1) #0 { +; CHECK-LABEL: f0: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) v0 = vmem(r1+#3) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <16 x i32> @llvm.hexagon.V6.vL32b.pred.ai(i1 %v0, <16 x i32>* %a1, i32 192) + ret <16 x i32> %v1 +} + +define <16 x i32> @f1(i32 %a0, <16 x i32>* %a1) #0 { +; CHECK-LABEL: f1: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) v0 = vmem(r1+#3) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <16 x i32> @llvm.hexagon.V6.vL32b.npred.ai(i1 %v0, <16 x i32>* %a1, i32 192) + ret <16 x i32> %v1 +} + +define <16 x i32> @f2(i32 %a0, <16 x i32>* %a1) #0 { +; CHECK-LABEL: f2: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) v0 = vmem(r1+#3):nt +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <16 x i32> @llvm.hexagon.V6.vL32b.nt.pred.ai(i1 %v0, <16 x i32>* %a1, i32 192) + ret <16 x i32> %v1 +} + +define <16 x i32> @f3(i32 %a0, <16 x i32>* %a1) #0 { +; CHECK-LABEL: f3: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) v0 = vmem(r1+#3):nt +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <16 x i32> @llvm.hexagon.V6.vL32b.nt.npred.ai(i1 %v0, <16 x i32>* %a1, i32 192) + ret <16 x i32> %v1 +} + +define <16 x i32>* @f4(i32 %a0, <16 x i32>* %a1) #0 { +; CHECK-LABEL: f4: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) v0 = vmem(r0++#3) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.pred.pi(i1 %v0, <16 x i32>* %a1, i32 192) + %v2 = extractvalue { <16 x i32>, <16 x i32>* } %v1, 1 + ret <16 x i32>* %v2 +} + +define <16 x i32>* @f5(i32 %a0, <16 x i32>* %a1) #0 { +; CHECK-LABEL: f5: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) v0 = vmem(r0++#3) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.npred.pi(i1 %v0, <16 x i32>* %a1, i32 192) + %v2 = extractvalue { <16 x i32>, <16 x i32>* } %v1, 1 + ret <16 x i32>* %v2 +} + +define <16 x i32>* @f6(i32 %a0, <16 x i32>* %a1) #0 { +; CHECK-LABEL: f6: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) v0 = vmem(r0++#3):nt +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.nt.pred.pi(i1 %v0, <16 x i32>* %a1, i32 192) + %v2 = extractvalue { <16 x i32>, <16 x i32>* } %v1, 1 + ret <16 x i32>* %v2 +} + +define <16 x i32>* @f7(i32 %a0, <16 x i32>* %a1) #0 { +; CHECK-LABEL: f7: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) v0 = vmem(r0++#3):nt +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.nt.npred.pi(i1 %v0, <16 x i32>* %a1, i32 192) + %v2 = extractvalue { <16 x i32>, <16 x i32>* } %v1, 1 + ret <16 x i32>* %v2 +} + +define <16 x i32>* @f8(i32 %a0, <16 x i32>* %a1, i32 %a2) #0 { +; CHECK-LABEL: f8: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: m0 = r2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) v0 = vmem(r0++m0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.pred.ppu(i1 %v0, <16 x i32>* %a1, i32 %a2) + %v2 = extractvalue { <16 x i32>, <16 x i32>* } %v1, 1 + ret <16 x i32>* %v2 +} + +define <16 x i32>* @f9(i32 %a0, <16 x i32>* %a1, i32 %a2) #0 { +; CHECK-LABEL: f9: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: m0 = r2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) v0 = vmem(r0++m0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.npred.ppu(i1 %v0, <16 x i32>* %a1, i32 %a2) + %v2 = extractvalue { <16 x i32>, <16 x i32>* } %v1, 1 + ret <16 x i32>* %v2 +} + +define <16 x i32>* @f10(i32 %a0, <16 x i32>* %a1, i32 %a2) #0 { +; CHECK-LABEL: f10: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: m0 = r2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) v0 = vmem(r0++m0):nt +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.nt.pred.ppu(i1 %v0, <16 x i32>* %a1, i32 %a2) + %v2 = extractvalue { <16 x i32>, <16 x i32>* } %v1, 1 + ret <16 x i32>* %v2 +} + +define <16 x i32>* @f11(i32 %a0, <16 x i32>* %a1, i32 %a2) #0 { +; CHECK-LABEL: f11: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: m0 = r2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) v0 = vmem(r0++m0):nt +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call { <16 x i32>, <16 x i32>* } @llvm.hexagon.V6.vL32b.nt.npred.ppu(i1 %v0, <16 x i32>* %a1, i32 %a2) + %v2 = extractvalue { <16 x i32>, <16 x i32>* } %v1, 1 + ret <16 x i32>* %v2 +} + +define void @f12(i32 %a0, <16 x i32>* %a1, <16 x i32> %a2) #0 { +; CHECK-LABEL: f12: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) vmem(r1+#-3) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + call void @llvm.hexagon.V6.vS32b.pred.ai(i1 %v0, <16 x i32>* %a1, i32 -192, <16 x i32> %a2) + ret void +} + +define void @f13(i32 %a0, <16 x i32>* %a1, <16 x i32> %a2) #0 { +; CHECK-LABEL: f13: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) vmem(r1+#-3) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + call void @llvm.hexagon.V6.vS32b.npred.ai(i1 %v0, <16 x i32>* %a1, i32 -192, <16 x i32> %a2) + ret void +} + +define void @f14(i32 %a0, <16 x i32>* %a1, <16 x i32> %a2) #0 { +; CHECK-LABEL: f14: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) vmemu(r1+#-3) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + call void @llvm.hexagon.V6.vS32Ub.pred.ai(i1 %v0, <16 x i32>* %a1, i32 -192, <16 x i32> %a2) + ret void +} + +define void @f15(i32 %a0, <16 x i32>* %a1, <16 x i32> %a2) #0 { +; CHECK-LABEL: f15: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) vmemu(r1+#-3) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + call void @llvm.hexagon.V6.vS32Ub.npred.ai(i1 %v0, <16 x i32>* %a1, i32 -192, <16 x i32> %a2) + ret void +} + +define void @f16(i32 %a0, <16 x i32>* %a1, <16 x i32> %a2) #0 { +; CHECK-LABEL: f16: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) vmem(r1+#-3):nt = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + call void @llvm.hexagon.V6.vS32b.nt.pred.ai(i1 %v0, <16 x i32>* %a1, i32 -192, <16 x i32> %a2) + ret void +} + +define void @f17(i32 %a0, <16 x i32>* %a1, <16 x i32> %a2) #0 { +; CHECK-LABEL: f17: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) vmem(r1+#-3):nt = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + call void @llvm.hexagon.V6.vS32b.nt.npred.ai(i1 %v0, <16 x i32>* %a1, i32 -192, <16 x i32> %a2) + ret void +} + +define <16 x i32>* @f18(i32 %a0, <16 x i32>* %a1, <16 x i32> %a2) #0 { +; CHECK-LABEL: f18: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) vmem(r0++#-3) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <16 x i32>* @llvm.hexagon.V6.vS32b.pred.pi(i1 %v0, <16 x i32>* %a1, i32 -192, <16 x i32> %a2) + ret <16 x i32>* %v1 +} + +define <16 x i32>* @f19(i32 %a0, <16 x i32>* %a1, <16 x i32> %a2) #0 { +; CHECK-LABEL: f19: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) vmem(r0++#-3) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <16 x i32>* @llvm.hexagon.V6.vS32b.npred.pi(i1 %v0, <16 x i32>* %a1, i32 -192, <16 x i32> %a2) + ret <16 x i32>* %v1 +} + +define <16 x i32>* @f20(i32 %a0, <16 x i32>* %a1, <16 x i32> %a2) #0 { +; CHECK-LABEL: f20: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) vmemu(r0++#-3) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <16 x i32>* @llvm.hexagon.V6.vS32Ub.pred.pi(i1 %v0, <16 x i32>* %a1, i32 -192, <16 x i32> %a2) + ret <16 x i32>* %v1 +} + +define <16 x i32>* @f21(i32 %a0, <16 x i32>* %a1, <16 x i32> %a2) #0 { +; CHECK-LABEL: f21: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) vmemu(r0++#-3) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <16 x i32>* @llvm.hexagon.V6.vS32Ub.npred.pi(i1 %v0, <16 x i32>* %a1, i32 -192, <16 x i32> %a2) + ret <16 x i32>* %v1 +} + +define <16 x i32>* @f22(i32 %a0, <16 x i32>* %a1, <16 x i32> %a2) #0 { +; CHECK-LABEL: f22: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) vmem(r0++#-3):nt = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <16 x i32>* @llvm.hexagon.V6.vS32b.nt.pred.pi(i1 %v0, <16 x i32>* %a1, i32 -192, <16 x i32> %a2) + ret <16 x i32>* %v1 +} + +define <16 x i32>* @f23(i32 %a0, <16 x i32>* %a1, <16 x i32> %a2) #0 { +; CHECK-LABEL: f23: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) vmem(r0++#-3):nt = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <16 x i32>* @llvm.hexagon.V6.vS32b.nt.npred.pi(i1 %v0, <16 x i32>* %a1, i32 -192, <16 x i32> %a2) + ret <16 x i32>* %v1 +} + +define <16 x i32>* @f24(i32 %a0, <16 x i32>* %a1, i32 %a2, <16 x i32> %a3) #0 { +; CHECK-LABEL: f24: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: m0 = r2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) vmem(r0++m0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <16 x i32>* @llvm.hexagon.V6.vS32b.pred.ppu(i1 %v0, <16 x i32>* %a1, i32 %a2, <16 x i32> %a3) + ret <16 x i32>* %v1 +} + +define <16 x i32>* @f25(i32 %a0, <16 x i32>* %a1, i32 %a2, <16 x i32> %a3) #0 { +; CHECK-LABEL: f25: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: m0 = r2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) vmem(r0++m0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <16 x i32>* @llvm.hexagon.V6.vS32b.npred.ppu(i1 %v0, <16 x i32>* %a1, i32 %a2, <16 x i32> %a3) + ret <16 x i32>* %v1 +} + +define <16 x i32>* @f26(i32 %a0, <16 x i32>* %a1, i32 %a2, <16 x i32> %a3) #0 { +; CHECK-LABEL: f26: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: m0 = r2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) vmemu(r0++m0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <16 x i32>* @llvm.hexagon.V6.vS32Ub.pred.ppu(i1 %v0, <16 x i32>* %a1, i32 %a2, <16 x i32> %a3) + ret <16 x i32>* %v1 +} + +define <16 x i32>* @f27(i32 %a0, <16 x i32>* %a1, i32 %a2, <16 x i32> %a3) #0 { +; CHECK-LABEL: f27: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: m0 = r2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) vmemu(r0++m0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <16 x i32>* @llvm.hexagon.V6.vS32Ub.npred.ppu(i1 %v0, <16 x i32>* %a1, i32 %a2, <16 x i32> %a3) + ret <16 x i32>* %v1 +} + +define <16 x i32>* @f28(i32 %a0, <16 x i32>* %a1, i32 %a2, <16 x i32> %a3) #0 { +; CHECK-LABEL: f28: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: m0 = r2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (p0) vmem(r0++m0):nt = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <16 x i32>* @llvm.hexagon.V6.vS32b.nt.pred.ppu(i1 %v0, <16 x i32>* %a1, i32 %a2, <16 x i32> %a3) + ret <16 x i32>* %v1 +} + +define <16 x i32>* @f29(i32 %a0, <16 x i32>* %a1, i32 %a2, <16 x i32> %a3) #0 { +; CHECK-LABEL: f29: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: p0 = cmp.eq(r0,#0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r0 = r1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: m0 = r2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: if (!p0) vmem(r0++m0):nt = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } +b0: + %v0 = icmp eq i32 %a0, 0 + %v1 = call <16 x i32>* @llvm.hexagon.V6.vS32b.nt.npred.ppu(i1 %v0, <16 x i32>* %a1, i32 %a2, <16 x i32> %a3) + ret <16 x i32>* %v1 +} + +attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+hvxv66,+hvx-length64b,-packets" } |