From 9242223583dde373111fd39fbcff9cc06abefe6f Mon Sep 17 00:00:00 2001 From: Christophe Lyon Date: Tue, 26 Jan 2016 15:17:00 +0000 Subject: re PR target/68620 (ICE on gcc.target/arm/attr-neon-fp16.c) 2016-01-26 Christophe Lyon gcc/ PR target/68620 * config/arm/arm.c (neon_valid_immediate): Handle FP16 vectors. * config/arm/arm_neon.h (__ARM_NUM_LANES, __arm_lane, arm_lanq): New helper macros. (vget_lane_f16): Handle big-endian. (vgetq_lane_f16): Likewise. (vset_lane_f16): Likewise. (vsetq_lane_f16): Likewise. * config/arm/iterators.md (VQXMOV): Add V8HF. (VDQ): Add V4HF and V8HF. (V_reg): Handle V4HF and V8HF. (Is_float_mode): Likewise. * config/arm/neon.md (movv4hf, movv8hf, neon_vdup_nv4hf, neon_vdup_nv8hf): New patterns. (vec_set_internal, vec_extract, neon_vld1_dup): Use VD_LANE iterator. (neon_vld1_dup): Use VQ2 iterator. testsuite/ PR target/68620 * gcc.target/arm/pr68620.c: New test. From-SVN: r232832 --- gcc/ChangeLog | 20 ++++++++++ gcc/config/arm/arm.c | 4 ++ gcc/config/arm/arm_neon.h | 72 ++++++++++++++++++++-------------- gcc/config/arm/iterators.md | 6 ++- gcc/config/arm/neon.md | 64 +++++++++++++++++++++++++----- gcc/testsuite/ChangeLog | 5 +++ gcc/testsuite/gcc.target/arm/pr68620.c | 12 ++++++ 7 files changed, 142 insertions(+), 41 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/pr68620.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 0a7a417..a8f68e7 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,23 @@ +2016-01-26 Christophe Lyon + + PR target/68620 + * config/arm/arm.c (neon_valid_immediate): Handle FP16 vectors. + * config/arm/arm_neon.h (__ARM_NUM_LANES, __arm_lane, arm_lanq): + New helper macros. + (vget_lane_f16): Handle big-endian. + (vgetq_lane_f16): Likewise. + (vset_lane_f16): Likewise. + (vsetq_lane_f16): Likewise. + * config/arm/iterators.md (VQXMOV): Add V8HF. + (VDQ): Add V4HF and V8HF. + (V_reg): Handle V4HF and V8HF. + (Is_float_mode): Likewise. + * config/arm/neon.md (movv4hf, movv8hf, neon_vdup_nv4hf, + neon_vdup_nv8hf): New patterns. + (vec_set_internal, vec_extract, neon_vld1_dup): + Use VD_LANE iterator. + (neon_vld1_dup): Use VQ2 iterator. + 2016-01-26 Nathan Sidwell * omp-low.h (oacc_fn_attrib_kernels_p): Declare. diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index f152afa..9b03b05 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -12381,6 +12381,10 @@ neon_valid_immediate (rtx op, machine_mode mode, int inverse, if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0))) return -1; + /* FP16 vectors cannot be represented. */ + if (GET_MODE_INNER (mode) == HFmode) + return -1; + r0 = CONST_DOUBLE_REAL_VALUE (el0); for (i = 1; i < n_elts; i++) diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index b311b3a..47816d5 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -5302,14 +5302,26 @@ vget_lane_s32 (int32x2_t __a, const int __b) were marked always-inline so there were no call sites, the declaration would nonetheless raise an error. Hence, we must use a macro instead. */ -#define vget_lane_f16(__v, __idx) \ - __extension__ \ - ({ \ - float16x4_t __vec = (__v); \ - __builtin_arm_lane_check (4, __idx); \ - float16_t __res = __vec[__idx]; \ - __res; \ - }) + /* For big-endian, GCC's vector indices are reversed within each 64 + bits compared to the architectural lane indices used by Neon + intrinsics. */ +#ifdef __ARM_BIG_ENDIAN +#define __ARM_NUM_LANES(__v) (sizeof (__v) / sizeof (__v[0])) +#define __arm_lane(__vec, __idx) (__idx ^ (__ARM_NUM_LANES(__vec) - 1)) +#define __arm_laneq(__vec, __idx) (__idx ^ (__ARM_NUM_LANES(__vec)/2 - 1)) +#else +#define __arm_lane(__vec, __idx) __idx +#define __arm_laneq(__vec, __idx) __idx +#endif + +#define vget_lane_f16(__v, __idx) \ + __extension__ \ + ({ \ + float16x4_t __vec = (__v); \ + __builtin_arm_lane_check (4, __idx); \ + float16_t __res = __vec[__arm_lane(__vec, __idx)]; \ + __res; \ + }) #endif __extension__ static __inline float32_t __attribute__ ((__always_inline__)) @@ -5379,14 +5391,14 @@ vgetq_lane_s32 (int32x4_t __a, const int __b) } #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -#define vgetq_lane_f16(__v, __idx) \ - __extension__ \ - ({ \ - float16x8_t __vec = (__v); \ - __builtin_arm_lane_check (8, __idx); \ - float16_t __res = __vec[__idx]; \ - __res; \ - }) +#define vgetq_lane_f16(__v, __idx) \ + __extension__ \ + ({ \ + float16x8_t __vec = (__v); \ + __builtin_arm_lane_check (8, __idx); \ + float16_t __res = __vec[__arm_laneq(__vec, __idx)]; \ + __res; \ + }) #endif __extension__ static __inline float32_t __attribute__ ((__always_inline__)) @@ -5458,13 +5470,13 @@ vset_lane_s32 (int32_t __a, int32x2_t __b, const int __c) #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) #define vset_lane_f16(__e, __v, __idx) \ __extension__ \ - ({ \ - float16_t __elem = (__e); \ - float16x4_t __vec = (__v); \ - __builtin_arm_lane_check (4, __idx); \ - __vec[__idx] = __elem; \ - __vec; \ - }) + ({ \ + float16_t __elem = (__e); \ + float16x4_t __vec = (__v); \ + __builtin_arm_lane_check (4, __idx); \ + __vec[__arm_lane (__vec, __idx)] = __elem; \ + __vec; \ + }) #endif __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) @@ -5536,13 +5548,13 @@ vsetq_lane_s32 (int32_t __a, int32x4_t __b, const int __c) #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) #define vsetq_lane_f16(__e, __v, __idx) \ __extension__ \ - ({ \ - float16_t __elem = (__e); \ - float16x8_t __vec = (__v); \ - __builtin_arm_lane_check (8, __idx); \ - __vec[__idx] = __elem; \ - __vec; \ - }) + ({ \ + float16_t __elem = (__e); \ + float16x8_t __vec = (__v); \ + __builtin_arm_lane_check (8, __idx); \ + __vec[__arm_laneq (__vec, __idx)] = __elem; \ + __vec; \ + }) #endif __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 974cf51..aba1023 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -99,7 +99,7 @@ (define_mode_iterator VQI [V16QI V8HI V4SI]) ;; Quad-width vector modes, with TImode added, for moves. -(define_mode_iterator VQXMOV [V16QI V8HI V4SI V4SF V2DI TI]) +(define_mode_iterator VQXMOV [V16QI V8HI V8HF V4SI V4SF V2DI TI]) ;; Opaque structure types wider than TImode. (define_mode_iterator VSTRUCT [EI OI CI XI]) @@ -114,7 +114,7 @@ (define_mode_iterator VN [V8HI V4SI V2DI]) ;; All supported vector modes (except singleton DImode). -(define_mode_iterator VDQ [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DI]) +(define_mode_iterator VDQ [V8QI V16QI V4HI V8HI V2SI V4SI V4HF V8HF V2SF V4SF V2DI]) ;; All supported vector modes (except those with 64-bit integer elements). (define_mode_iterator VDQW [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF]) @@ -428,6 +428,7 @@ ;; Register width from element mode (define_mode_attr V_reg [(V8QI "P") (V16QI "q") (V4HI "P") (V8HI "q") + (V4HF "P") (V8HF "q") (V2SI "P") (V4SI "q") (V2SF "P") (V4SF "q") (DI "P") (V2DI "q") @@ -576,6 +577,7 @@ (define_mode_attr Is_float_mode [(V8QI "false") (V16QI "false") (V4HI "false") (V8HI "false") (V2SI "false") (V4SI "false") + (V4HF "true") (V8HF "true") (V2SF "true") (V4SF "true") (DI "false") (V2DI "false")]) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index aff5023..55b61eb 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -137,6 +137,36 @@ } }) +(define_expand "movv4hf" + [(set (match_operand:V4HF 0 "s_register_operand") + (match_operand:V4HF 1 "s_register_operand"))] + "TARGET_NEON && TARGET_FP16" +{ + /* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS + causing an ICE on big-endian because it cannot extract subregs in + this case. */ + if (can_create_pseudo_p ()) + { + if (!REG_P (operands[0])) + operands[1] = force_reg (V4HFmode, operands[1]); + } +}) + +(define_expand "movv8hf" + [(set (match_operand:V8HF 0 "") + (match_operand:V8HF 1 ""))] + "TARGET_NEON && TARGET_FP16" +{ + /* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS + causing an ICE on big-endian because it cannot extract subregs in + this case. */ + if (can_create_pseudo_p ()) + { + if (!REG_P (operands[0])) + operands[1] = force_reg (V8HFmode, operands[1]); + } +}) + (define_insn "*neon_mov" [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w") (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))] @@ -299,11 +329,11 @@ [(set_attr "type" "neon_load1_1reg")]) (define_insn "vec_set_internal" - [(set (match_operand:VD 0 "s_register_operand" "=w,w") - (vec_merge:VD - (vec_duplicate:VD + [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w") + (vec_merge:VD_LANE + (vec_duplicate:VD_LANE (match_operand: 1 "nonimmediate_operand" "Um,r")) - (match_operand:VD 3 "s_register_operand" "0,0") + (match_operand:VD_LANE 3 "s_register_operand" "0,0") (match_operand:SI 2 "immediate_operand" "i,i")))] "TARGET_NEON" { @@ -385,7 +415,7 @@ (define_insn "vec_extract" [(set (match_operand: 0 "nonimmediate_operand" "=Um,r") (vec_select: - (match_operand:VD 1 "s_register_operand" "w,w") + (match_operand:VD_LANE 1 "s_register_operand" "w,w") (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] "TARGET_NEON" { @@ -2829,6 +2859,22 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_from_gp")] ) +(define_insn "neon_vdup_nv4hf" + [(set (match_operand:V4HF 0 "s_register_operand" "=w") + (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))] + "TARGET_NEON" + "vdup.16\t%P0, %1" + [(set_attr "type" "neon_from_gp")] +) + +(define_insn "neon_vdup_nv8hf" + [(set (match_operand:V8HF 0 "s_register_operand" "=w") + (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))] + "TARGET_NEON" + "vdup.16\t%q0, %1" + [(set_attr "type" "neon_from_gp_q")] +) + (define_insn "neon_vdup_n" [(set (match_operand:V32 0 "s_register_operand" "=w,w") (vec_duplicate:V32 (match_operand: 1 "s_register_operand" "r,t")))] @@ -4361,8 +4407,8 @@ if (BYTES_BIG_ENDIAN) ) (define_insn "neon_vld1_dup" - [(set (match_operand:VD 0 "s_register_operand" "=w") - (vec_duplicate:VD (match_operand: 1 "neon_struct_operand" "Um")))] + [(set (match_operand:VD_LANE 0 "s_register_operand" "=w") + (vec_duplicate:VD_LANE (match_operand: 1 "neon_struct_operand" "Um")))] "TARGET_NEON" "vld1.\t{%P0[]}, %A1" [(set_attr "type" "neon_load1_all_lanes")] @@ -4378,8 +4424,8 @@ if (BYTES_BIG_ENDIAN) ) (define_insn "neon_vld1_dup" - [(set (match_operand:VQ 0 "s_register_operand" "=w") - (vec_duplicate:VQ (match_operand: 1 "neon_struct_operand" "Um")))] + [(set (match_operand:VQ2 0 "s_register_operand" "=w") + (vec_duplicate:VQ2 (match_operand: 1 "neon_struct_operand" "Um")))] "TARGET_NEON" { return "vld1.\t{%e0[], %f0[]}, %A1"; diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 8e7912c..033faa3 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2016-01-26 Christophe Lyon + + PR target/68620 + * gcc.target/arm/pr68620.c: New test. + 2016-01-26 H.J. Lu * gcc.target/i386/pr68986-2.c: Remove -m32. diff --git a/gcc/testsuite/gcc.target/arm/pr68620.c b/gcc/testsuite/gcc.target/arm/pr68620.c new file mode 100644 index 0000000..984992f --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/pr68620.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_fp_ok } */ +/* { dg-options "-mfp16-format=ieee" } */ +/* { dg-add-options arm_fp } */ + +#include "arm_neon.h" + +float16x4_t __attribute__((target("fpu=neon-fp16"))) +foo (float32x4_t arg) +{ + return vcvt_f16_f32 (arg); +} -- cgit v1.1