aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEsme-Yi <esme.yi@ibm.com>2020-10-04 16:24:20 +0000
committerEsme-Yi <esme.yi@ibm.com>2020-10-04 16:24:20 +0000
commite3475f5b91c8dc3142b90b2bb4a1884d6e8d8c2c (patch)
treed42ee1bd23f38e8a86d0158564a8f64b4585d847
parent2ccbf3dbd5bac9d4fea8b67404b4c6b006d4adbe (diff)
downloadllvm-e3475f5b91c8dc3142b90b2bb4a1884d6e8d8c2c.zip
llvm-e3475f5b91c8dc3142b90b2bb4a1884d6e8d8c2c.tar.gz
llvm-e3475f5b91c8dc3142b90b2bb4a1884d6e8d8c2c.tar.bz2
[PowerPC] Add builtins for xvtdiv(dp|sp) and xvtsqrt(dp|sp).
Summary: This patch implements the builtins for xvtdivdp, xvtdivsp, xvtsqrtdp, xvtsqrtsp. The instructions correspond to the following builtins: int vec_test_swdiv(vector double v1, vector double v2); int vec_test_swdivs(vector float v1, vector float v2); int vec_test_swsqrt(vector double v1); int vec_test_swsqrts(vector float v1); This patch depends on D88274, which fixes the bug in copying from CRRC to GPRC/G8RC. Reviewed By: steven.zhang, amyk Differential Revision: https://reviews.llvm.org/D88278
-rw-r--r--clang/include/clang/Basic/BuiltinsPPC.def5
-rw-r--r--clang/lib/Headers/altivec.h26
-rw-r--r--clang/test/CodeGen/builtins-ppc-vsx.c18
-rw-r--r--llvm/include/llvm/IR/IntrinsicsPowerPC.td10
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrVSX.td10
-rw-r--r--llvm/test/CodeGen/PowerPC/vsx_builtins.ll52
6 files changed, 121 insertions, 0 deletions
diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index 29bce79..015411a 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -558,6 +558,11 @@ BUILTIN(__builtin_vsx_xxeval, "V2ULLiV2ULLiV2ULLiV2ULLiIi", "")
BUILTIN(__builtin_vsx_xvtlsbb, "iV16UcUi", "")
+BUILTIN(__builtin_vsx_xvtdivdp, "iV2dV2d", "")
+BUILTIN(__builtin_vsx_xvtdivsp, "iV4fV4f", "")
+BUILTIN(__builtin_vsx_xvtsqrtdp, "iV2d", "")
+BUILTIN(__builtin_vsx_xvtsqrtsp, "iV4f", "")
+
// P10 Vector Permute Extended built-in.
BUILTIN(__builtin_vsx_xxpermx, "V16UcV16UcV16UcV16UcIi", "")
diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index 572b886..1d7bc20 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -3504,6 +3504,20 @@ vec_div(vector signed __int128 __a, vector signed __int128 __b) {
}
#endif __POWER10_VECTOR__
+/* vec_xvtdiv */
+
+#ifdef __VSX__
+static __inline__ int __ATTRS_o_ai vec_test_swdiv(vector double __a,
+ vector double __b) {
+ return __builtin_vsx_xvtdivdp(__a, __b);
+}
+
+static __inline__ int __ATTRS_o_ai vec_test_swdivs(vector float __a,
+ vector float __b) {
+ return __builtin_vsx_xvtdivsp(__a, __b);
+}
+#endif
+
/* vec_dss */
#define vec_dss __builtin_altivec_dss
@@ -8057,6 +8071,18 @@ vec_vrsqrtefp(vector float __a) {
return __builtin_altivec_vrsqrtefp(__a);
}
+/* vec_xvtsqrt */
+
+#ifdef __VSX__
+static __inline__ int __ATTRS_o_ai vec_test_swsqrt(vector double __a) {
+ return __builtin_vsx_xvtsqrtdp(__a);
+}
+
+static __inline__ int __ATTRS_o_ai vec_test_swsqrts(vector float __a) {
+ return __builtin_vsx_xvtsqrtsp(__a);
+}
+#endif
+
/* vec_sel */
#define __builtin_altivec_vsel_4si vec_sel
diff --git a/clang/test/CodeGen/builtins-ppc-vsx.c b/clang/test/CodeGen/builtins-ppc-vsx.c
index 2542b30..d99b0c1 100644
--- a/clang/test/CodeGen/builtins-ppc-vsx.c
+++ b/clang/test/CodeGen/builtins-ppc-vsx.c
@@ -52,6 +52,7 @@ vector unsigned long long res_vull;
vector signed __int128 res_vslll;
double res_d;
+int res_i;
float res_af[4];
double res_ad[2];
signed char res_asc[16];
@@ -878,6 +879,23 @@ void test1() {
// CHECK: call <2 x double> @llvm.ppc.vsx.xvrsqrtedp(<2 x double> %{{[0-9]+}})
// CHECK-LE: call <2 x double> @llvm.ppc.vsx.xvrsqrtedp(<2 x double> %{{[0-9]+}})
+ res_i = vec_test_swsqrt(vd);
+// CHECK: call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %{{[0-9]+}})
+// CHECK-LE: call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %{{[0-9]+}})
+
+ res_i = vec_test_swsqrts(vf);
+// CHECK: call i32 @llvm.ppc.vsx.xvtsqrtsp(<4 x float> %{{[0-9]+}})
+// CHECK-LE: call i32 @llvm.ppc.vsx.xvtsqrtsp(<4 x float> %{{[0-9]+}})
+
+ res_i = vec_test_swdiv(vd, vd);
+// CHECK: call i32 @llvm.ppc.vsx.xvtdivdp(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}})
+// CHECK-LE: call i32 @llvm.ppc.vsx.xvtdivdp(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}})
+
+ res_i = vec_test_swdivs(vf, vf);
+// CHECK: call i32 @llvm.ppc.vsx.xvtdivsp(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}})
+// CHECK-LE: call i32 @llvm.ppc.vsx.xvtdivsp(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}})
+
+
dummy();
// CHECK: call void @dummy()
// CHECK-LE: call void @dummy()
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 7b11555..7ab4ee3 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1249,6 +1249,16 @@ def int_ppc_vsx_xxinsertw :
def int_ppc_vsx_xvtlsbb :
PowerPC_VSX_Intrinsic<"xvtlsbb", [llvm_i32_ty],
[llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvtdivdp :
+ PowerPC_VSX_Intrinsic<"xvtdivdp", [llvm_i32_ty],
+ [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvtdivsp :
+ PowerPC_VSX_Intrinsic<"xvtdivsp", [llvm_i32_ty],
+ [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvtsqrtdp :
+ PowerPC_VSX_Intrinsic<"xvtsqrtdp", [llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvtsqrtsp :
+ PowerPC_VSX_Intrinsic<"xvtsqrtsp", [llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_ppc_vsx_xxeval :
PowerPC_VSX_Intrinsic<"xxeval", [llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_v2i64_ty,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index f4612b9..18ed2cc 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2591,6 +2591,16 @@ def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B),
def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B),
(XVDIVDP $A, $B)>;
+// Vector test for software divide and sqrt.
+def : Pat<(i32 (int_ppc_vsx_xvtdivdp v2f64:$A, v2f64:$B)),
+ (COPY_TO_REGCLASS (XVTDIVDP $A, $B), GPRC)>;
+def : Pat<(i32 (int_ppc_vsx_xvtdivsp v4f32:$A, v4f32:$B)),
+ (COPY_TO_REGCLASS (XVTDIVSP $A, $B), GPRC)>;
+def : Pat<(i32 (int_ppc_vsx_xvtsqrtdp v2f64:$A)),
+ (COPY_TO_REGCLASS (XVTSQRTDP $A), GPRC)>;
+def : Pat<(i32 (int_ppc_vsx_xvtsqrtsp v4f32:$A)),
+ (COPY_TO_REGCLASS (XVTSQRTSP $A), GPRC)>;
+
// Reciprocal estimate
def : Pat<(int_ppc_vsx_xvresp v4f32:$A),
(XVRESP $A)>;
diff --git a/llvm/test/CodeGen/PowerPC/vsx_builtins.ll b/llvm/test/CodeGen/PowerPC/vsx_builtins.ll
index b386565..2ab7473 100644
--- a/llvm/test/CodeGen/PowerPC/vsx_builtins.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx_builtins.ll
@@ -54,3 +54,55 @@ define void @test4(<2 x double> %a, i8* %b) {
}
; Function Attrs: nounwind readnone
declare void @llvm.ppc.vsx.stxvd2x.be(<2 x double>, i8*)
+
+define i32 @test_vec_test_swdiv(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_vec_test_swdiv:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvtdivdp cr0, v2, v3
+; CHECK-NEXT: mfocrf r3, 128
+; CHECK-NEXT: srwi r3, r3, 28
+; CHECK-NEXT: blr
+ entry:
+ %0 = tail call i32 @llvm.ppc.vsx.xvtdivdp(<2 x double> %a, <2 x double> %b)
+ ret i32 %0
+}
+declare i32 @llvm.ppc.vsx.xvtdivdp(<2 x double>, <2 x double>)
+
+define i32 @test_vec_test_swdivs(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_vec_test_swdivs:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvtdivsp cr0, v2, v3
+; CHECK-NEXT: mfocrf r3, 128
+; CHECK-NEXT: srwi r3, r3, 28
+; CHECK-NEXT: blr
+ entry:
+ %0 = tail call i32 @llvm.ppc.vsx.xvtdivsp(<4 x float> %a, <4 x float> %b)
+ ret i32 %0
+}
+declare i32 @llvm.ppc.vsx.xvtdivsp(<4 x float>, <4 x float>)
+
+define i32 @test_vec_test_swsqrt(<2 x double> %a) {
+; CHECK-LABEL: test_vec_test_swsqrt:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvtsqrtdp cr0, v2
+; CHECK-NEXT: mfocrf r3, 128
+; CHECK-NEXT: srwi r3, r3, 28
+; CHECK-NEXT: blr
+ entry:
+ %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %a)
+ ret i32 %0
+}
+declare i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double>)
+
+define i32 @test_vec_test_swsqrts(<4 x float> %a) {
+; CHECK-LABEL: test_vec_test_swsqrts:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvtsqrtsp cr0, v2
+; CHECK-NEXT: mfocrf r3, 128
+; CHECK-NEXT: srwi r3, r3, 28
+; CHECK-NEXT: blr
+ entry:
+ %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtsp(<4 x float> %a)
+ ret i32 %0
+}
+declare i32 @llvm.ppc.vsx.xvtsqrtsp(<4 x float>)