aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--clang/lib/Headers/altivec.h26
-rw-r--r--clang/test/CodeGen/builtins-ppc-altivec.c4
2 files changed, 30 insertions, 0 deletions
diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index 65990e0..bda5a0ea 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -8398,10 +8398,26 @@ vec_vsum2sws(vector int __a, vector int __b)
/* vec_sums */
+/* The vsumsws instruction has a big-endian bias, so that the second
+ input vector and the result always reference big-endian element 3
+ (little-endian element 0). For ease of porting the programmer
+ wants element 3 in both cases, so for little endian we must perform
+ some permutes. */
+
static vector signed int __attribute__((__always_inline__))
vec_sums(vector signed int __a, vector signed int __b)
{
+#ifdef __LITTLE_ENDIAN__
+ __b = (vector signed int)
+ vec_perm(__b, __b, (vector unsigned char)
+ (12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11));
+ __b = __builtin_altivec_vsumsws(__a, __b);
+ return (vector signed int)
+ vec_perm(__b, __b, (vector unsigned char)
+ (4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3));
+#else
return __builtin_altivec_vsumsws(__a, __b);
+#endif
}
/* vec_vsumsws */
@@ -8409,7 +8425,17 @@ vec_sums(vector signed int __a, vector signed int __b)
static vector signed int __attribute__((__always_inline__))
vec_vsumsws(vector signed int __a, vector signed int __b)
{
+#ifdef __LITTLE_ENDIAN__
+ __b = (vector signed int)
+ vec_perm(__b, __b, (vector unsigned char)
+ (12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11));
+ __b = __builtin_altivec_vsumsws(__a, __b);
+ return (vector signed int)
+ vec_perm(__b, __b, (vector unsigned char)
+ (4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3));
+#else
return __builtin_altivec_vsumsws(__a, __b);
+#endif
}
/* vec_trunc */
diff --git a/clang/test/CodeGen/builtins-ppc-altivec.c b/clang/test/CodeGen/builtins-ppc-altivec.c
index 8277f5d..c94656e 100644
--- a/clang/test/CodeGen/builtins-ppc-altivec.c
+++ b/clang/test/CodeGen/builtins-ppc-altivec.c
@@ -5155,11 +5155,15 @@ void test6() {
/* vec_sums */
res_vi = vec_sums(vi, vi);
// CHECK: @llvm.ppc.altivec.vsumsws
+// CHECK-LE: @llvm.ppc.altivec.vperm
// CHECK-LE: @llvm.ppc.altivec.vsumsws
+// CHECK-LE: @llvm.ppc.altivec.vperm
res_vi = vec_vsumsws(vi, vi);
// CHECK: @llvm.ppc.altivec.vsumsws
+// CHECK-LE: @llvm.ppc.altivec.vperm
// CHECK-LE: @llvm.ppc.altivec.vsumsws
+// CHECK-LE: @llvm.ppc.altivec.vperm
/* vec_trunc */
res_vf = vec_trunc(vf);