aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJames Greenhalgh <james.greenhalgh@arm.com>2015-09-23 11:39:48 +0000
committerJames Greenhalgh <jgreenhalgh@gcc.gnu.org>2015-09-23 11:39:48 +0000
commitd5d27976c89bdc3d1429c741d30572bd85c0a92c (patch)
tree49f8009dcb3308e28450ee4ea7f5b9c76d2c432f /gcc
parent07dc170b6f2dd893e6b4befe6706dfc0e48771d9 (diff)
downloadgcc-d5d27976c89bdc3d1429c741d30572bd85c0a92c.zip
gcc-d5d27976c89bdc3d1429c741d30572bd85c0a92c.tar.gz
gcc-d5d27976c89bdc3d1429c741d30572bd85c0a92c.tar.bz2
[AArch64] Fix vcvt_high_f64_f32 and vcvt_figh_f32_f64 intrinsics.
gcc/ * config/aarch64/aarch64-simd.md (aarch64_float_truncate_hi_v4sf): Rewrite as an expand. (aarch64_float_truncate_hi_v4sf_le): New. (aarch64_float_truncate_hi_v4sf_be): Likewise. gcc/testsuite/ * gcc.target/aarch64/advsimd-intrinsics/vcvt_high_1.c: New. From-SVN: r228044
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog7
-rw-r--r--gcc/config/aarch64/aarch64-simd.md38
-rw-r--r--gcc/testsuite/ChangeLog4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt_high_1.c98
4 files changed, 145 insertions, 2 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c46bd6d..27e527d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2015-09-23 James Greenhalgh <james.greenhalgh@arm.com>
+
+ * config/aarch64/aarch64-simd.md
+ (aarch64_float_truncate_hi_v4sf): Rewrite as an expand.
+ (aarch64_float_truncate_hi_v4sf_le): New.
+ (aarch64_float_truncate_hi_v4sf_be): Likewise.
+
2015-09-23 Richard Biener <rguenther@suse.de>
* tree-ssa-structalias.c (intra_create_variable_infos): Build
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index dbe5259..5ab2f2b 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1703,6 +1703,15 @@
[(set_attr "type" "neon_fp_cvt_widen_s")]
)
+;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
+;; is inconsistent with vector ordering elsewhere in the compiler, in that
+;; the meaning of HI and LO changes depending on the target endianness.
+;; While elsewhere we map the higher numbered elements of a vector to
+;; the lower architectural lanes of the vector, for these patterns we want
+;; to always treat "hi" as referring to the higher architectural lanes.
+;; Consequently, while the patterns below look inconsistent with our
+;; other big-endian patterns their behaviour is as required.
+
(define_expand "vec_unpacks_lo_<mode>"
[(match_operand:<VWIDE> 0 "register_operand" "")
(match_operand:VQ_HSF 1 "register_operand" "")]
@@ -1757,17 +1766,42 @@
[(set_attr "type" "neon_fp_cvt_narrow_d_q")]
)
-(define_insn "aarch64_float_truncate_hi_<Vdbl>"
+(define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
[(set (match_operand:<VDBL> 0 "register_operand" "=w")
(vec_concat:<VDBL>
(match_operand:VDF 1 "register_operand" "0")
(float_truncate:VDF
(match_operand:<VWIDE> 2 "register_operand" "w"))))]
- "TARGET_SIMD"
+ "TARGET_SIMD && !BYTES_BIG_ENDIAN"
"fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
[(set_attr "type" "neon_fp_cvt_narrow_d_q")]
)
+(define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
+ [(set (match_operand:<VDBL> 0 "register_operand" "=w")
+ (vec_concat:<VDBL>
+ (float_truncate:VDF
+ (match_operand:<VWIDE> 2 "register_operand" "w"))
+ (match_operand:VDF 1 "register_operand" "0")))]
+ "TARGET_SIMD && BYTES_BIG_ENDIAN"
+ "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
+ [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
+)
+
+(define_expand "aarch64_float_truncate_hi_<Vdbl>"
+ [(match_operand:<VDBL> 0 "register_operand" "=w")
+ (match_operand:VDF 1 "register_operand" "0")
+ (match_operand:<VWIDE> 2 "register_operand" "w")]
+ "TARGET_SIMD"
+{
+ rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
+ ? gen_aarch64_float_truncate_hi_<Vdbl>_be
+ : gen_aarch64_float_truncate_hi_<Vdbl>_le;
+ emit_insn (gen (operands[0], operands[1], operands[2]));
+ DONE;
+}
+)
+
(define_expand "vec_pack_trunc_v2df"
[(set (match_operand:V4SF 0 "register_operand")
(vec_concat:V4SF
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 36e30b6..3e722c2 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2015-09-23 James Greenhalgh <james.greenhalgh@arm.com>
+
+ * gcc.target/aarch64/advsimd-intrinsics/vcvt_high_1.c: New.
+
2015-09-23 Richard Biener <rguenther@suse.de>
* g++.dg/tree-ssa/restrict2.C: Un-XFAIL testcase.
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt_high_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt_high_1.c
new file mode 100644
index 0000000..27c6b57
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt_high_1.c
@@ -0,0 +1,98 @@
+/* { dg-skip-if "" { arm*-*-* } } */
+
+#include "arm_neon.h"
+
+void abort (void);
+
+void
+foo (void)
+{
+ /* Test vcvt_high_f32_f64. */
+ float32x2_t arg1;
+ float64x2_t arg2;
+ float32x4_t result;
+ arg1 = vcreate_f32 (UINT64_C (0x3f0db5793f6e1892));
+ arg2 = vcombine_f64 (vcreate_f64 (UINT64_C (0x3fe8e49d23fb575d)),
+ vcreate_f64 (UINT64_C (0x3fd921291b3df73e)));
+ // Expect: "result" = 3ec909483f4724e93f0db5793f6e1892
+ result = vcvt_high_f32_f64 (arg1, arg2);
+ float32_t got;
+ float32_t exp;
+
+ /* Lane 0. */
+ got = vgetq_lane_f32 (result, 0);
+ exp = ((float32_t) 0.9300624132156372);
+ if (((((exp / got) < ((float32_t) 0.999))
+ || ((exp / got) > ((float32_t) 1.001)))
+ && (((exp - got) < ((float32_t) -1.0e-4))
+ || ((exp - got) > ((float32_t) 1.0e-4)))))
+ abort ();
+
+ /* Lane 1. */
+ got = vgetq_lane_f32 (result, 1);
+ exp = ((float32_t) 0.5535503029823303);
+ if (((((exp / got) < ((float32_t) 0.999))
+ || ((exp / got) > ((float32_t) 1.001)))
+ && (((exp - got) < ((float32_t) -1.0e-4))
+ || ((exp - got) > ((float32_t) 1.0e-4)))))
+ abort ();
+
+ /* Lane 2. */
+ got = vgetq_lane_f32 (result, 2);
+ exp = ((float32_t) 0.7779069617051665);
+ if (((((exp / got) < ((float32_t) 0.999))
+ || ((exp / got) > ((float32_t) 1.001)))
+ && (((exp - got) < ((float32_t) -1.0e-4))
+ || ((exp - got) > ((float32_t) 1.0e-4)))))
+ abort ();
+
+ /* Lane 3. */
+ got = vgetq_lane_f32 (result, 3);
+ exp = ((float32_t) 0.3926489606891329);
+ if (((((exp / got) < ((float32_t) 0.999))
+ || ((exp / got) > ((float32_t) 1.001)))
+ && (((exp - got) < ((float32_t) -1.0e-4))
+ || ((exp - got) > ((float32_t) 1.0e-4)))))
+ abort ();
+}
+
+void
+bar (void)
+{
+ /* Test vcvt_high_f64_f32. */
+ float32x4_t arg1;
+ float64x2_t result;
+ arg1 = vcombine_f32 (vcreate_f32 (UINT64_C (0x3f7c5cf13f261f74)),
+ vcreate_f32 (UINT64_C (0x3e3a7bc03f6ccc1d)));
+ // Expect: "result" = 3fc74f78000000003fed9983a0000000
+ result = vcvt_high_f64_f32 (arg1);
+
+ float64_t got;
+ float64_t exp;
+
+ /* Lane 0. */
+ got = vgetq_lane_f64 (result, 0);
+ exp = 0.9249895215034485;
+ if (((((exp / got) < 0.999)
+ || ((exp / got) > 1.001))
+ && (((exp - got) < -1.0e-4)
+ || ((exp - got) > 1.0e-4))))
+ abort ();
+
+ /* Lane 1. */
+ got = vgetq_lane_f64 (result, 1);
+ exp = 0.1821126937866211;
+ if (((((exp / got) < 0.999)
+ || ((exp / got) > 1.001))
+ && (((exp - got) < -1.0e-4)
+ || ((exp - got) > 1.0e-4))))
+ abort ();
+}
+
+int
+main (int argc, char **argv)
+{
+ foo ();
+ bar ();
+ return 0;
+}