diff options
author | Tejas Belagod <tejas.belagod@arm.com> | 2010-08-25 08:23:26 +0100 |
---|---|---|
committer | Ramana Radhakrishnan <ramana@gcc.gnu.org> | 2010-08-25 07:23:26 +0000 |
commit | 46b57af175e889126f65a6ca667c2c4606f93f84 (patch) | |
tree | ece038bdb45a512974b020f1864bb6948370b6b2 | |
parent | 34f41f7c1ada644dca11e1bcb1a5e13e0eb6eecb (diff) | |
download | gcc-46b57af175e889126f65a6ca667c2c4606f93f84.zip gcc-46b57af175e889126f65a6ca667c2c4606f93f84.tar.gz gcc-46b57af175e889126f65a6ca667c2c4606f93f84.tar.bz2 |
vmull / vmovl support for Neon.
For Tejas Belagod
2010-08-25 Tejas Belagod <tejas.belagod@arm.com>
* lib/target-supports.exp (check_effective_target_vect_unpack):
Set vect_unpack supported flag to true for neon.
* config/arm/iterators.md (VU, SE, V_widen_l): New.
(V_unpack, US): New.
* config/arm/neon.md (vec_unpack<US>_hi_<mode>): Expansion for
vmovl.
(vec_unpack<US>_lo_<mode>): Likewise.
(neon_vec_unpack<US>_hi_<mode>): Instruction pattern for vmovl.
(neon_vec_unpack<US>_lo_<mode>): Likewise.
(vec_widen_<US>mult_lo_<mode>): Expansion for vmull.
(vec_widen_<US>mult_hi_<mode>): Likewise.
(neon_vec_<US>mult_lo_<mode>"): Instruction pattern for vmull.
(neon_vec_<US>mult_hi_<mode>"): Likewise.
(neon_unpack<US>_<mode>): Widening move intermediate step for
vectorizing without -mvectorize-with-neon-quad.
(neon_vec_<US>mult_<mode>): Widening multiply intermediate step
for vectorizing without -mvectorize-with-neon-quad.
* config/arm/predicates.md (vect_par_constant_high): Check for
high-half lanes of a vector.
(vect_par_constant_low): Check for low-half lanes of a vector.
From-SVN: r163538
-rw-r--r-- | gcc/ChangeLog | 21 | ||||
-rw-r--r-- | gcc/config/arm/iterators.md | 14 | ||||
-rw-r--r-- | gcc/config/arm/neon.md | 202 | ||||
-rw-r--r-- | gcc/config/arm/predicates.md | 58 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/lib/target-supports.exp | 3 |
6 files changed, 301 insertions, 2 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d0d6bb9..29c5aec 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,24 @@ +2010-08-25 Tejas Belagod <tejas.belagod@arm.com> + + * config/arm/iterators.md (VU, SE, V_widen_l): New. + (V_unpack, US): New. + * config/arm/neon.md (vec_unpack<US>_hi_<mode>): Expansion for + vmovl. + (vec_unpack<US>_lo_<mode>): Likewise. + (neon_vec_unpack<US>_hi_<mode>): Instruction pattern for vmovl. + (neon_vec_unpack<US>_lo_<mode>): Likewise. + (vec_widen_<US>mult_lo_<mode>): Expansion for vmull. + (vec_widen_<US>mult_hi_<mode>): Likewise. + (neon_vec_<US>mult_lo_<mode>"): Instruction pattern for vmull. + (neon_vec_<US>mult_hi_<mode>"): Likewise. + (neon_unpack<US>_<mode>): Widening move intermediate step for + vectorizing without -mvectorize-with-neon-quad. + (neon_vec_<US>mult_<mode>): Widening multiply intermediate step + for vectorizing without -mvectorize-with-neon-quad. + * config/arm/predicates.md (vect_par_constant_high): Check for + high-half lanes of a vector. + (vect_par_constant_low): Check for low-half lanes of a vector. + 2010-08-24 Sebastian Pop <sebastian.pop@amd.com> * tree-if-conv.c (struct ifc_dr): New. diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index ee04aab..d9b5621 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -136,7 +136,9 @@ ;; Modes with 32-bit elements only. (define_mode_iterator V32 [V2SI V2SF V4SI V4SF]) - +;; Modes with 8-bit, 16-bit and 32-bit elements. +(define_mode_iterator VU [V16QI V8HI V4SI]) + ;;---------------------------------------------------------------------------- ;; Code iterators ;;---------------------------------------------------------------------------- @@ -156,6 +158,8 @@ ;; without unsigned variants (for use with *SFmode pattern). (define_code_iterator vqhs_ops [plus smin smax]) +;; A list of widening operators +(define_code_iterator SE [sign_extend zero_extend]) ;;---------------------------------------------------------------------------- ;; Mode attributes @@ -360,6 +364,11 @@ (V2SF "2") (V4SF "4") (DI "1") (V2DI "2")]) +;; Same as V_widen, but lower-case. +(define_mode_attr V_widen_l [(V8QI "v8hi") (V4HI "v4si") ( V2SI "v2di")]) + +;; Widen. Result is half the number of elements, but widened to double-width. +(define_mode_attr V_unpack [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")]) ;;---------------------------------------------------------------------------- ;; Code attributes @@ -375,3 +384,6 @@ (define_code_attr cnb [(ltu "CC_C") (geu "CC")]) (define_code_attr optab [(ltu "ltu") (geu "geu")]) + +;; Assembler mnemonics for signedness of widening operations. +(define_code_attr US [(sign_extend "s") (zero_extend "u")]) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index bdc279a..96241b9 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -4977,3 +4977,205 @@ emit_insn (gen_orn<mode>3_neon (operands[0], operands[1], operands[2])); DONE; }) + +(define_insn "neon_vec_unpack<US>_lo_<mode>" + [(set (match_operand:<V_unpack> 0 "register_operand" "=w") + (SE:<V_unpack> (vec_select:<V_HALF> + (match_operand:VU 1 "register_operand" "w") + (match_operand:VU 2 "vect_par_constant_low" ""))))] + "TARGET_NEON" + "vmovl.<US><V_sz_elem> %q0, %e1" + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_insn "neon_vec_unpack<US>_hi_<mode>" + [(set (match_operand:<V_unpack> 0 "register_operand" "=w") + (SE:<V_unpack> (vec_select:<V_HALF> + (match_operand:VU 1 "register_operand" "w") + (match_operand:VU 2 "vect_par_constant_high" ""))))] + "TARGET_NEON" + "vmovl.<US><V_sz_elem> %q0, %f1" + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_expand "vec_unpack<US>_hi_<mode>" + [(match_operand:<V_unpack> 0 "register_operand" "") + (SE:<V_unpack> (match_operand:VU 1 "register_operand"))] + "TARGET_NEON" + { + rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; + rtx t1; + int i; + for (i = 0; i < (<V_mode_nunits>/2); i++) + RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i); + + t1 = gen_rtx_PARALLEL (<MODE>mode, v); + emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0], + operands[1], + t1)); + DONE; + } +) + +(define_expand "vec_unpack<US>_lo_<mode>" + [(match_operand:<V_unpack> 0 "register_operand" "") + (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))] + "TARGET_NEON" + { + rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; + rtx t1; + int i; + for (i = 0; i < (<V_mode_nunits>/2) ; i++) + RTVEC_ELT (v, i) = GEN_INT (i); + t1 = gen_rtx_PARALLEL (<MODE>mode, v); + emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0], + operands[1], + t1)); + DONE; + } +) + +(define_insn "neon_vec_<US>mult_lo_<mode>" + [(set (match_operand:<V_unpack> 0 "register_operand" "=w") + (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF> + (match_operand:VU 1 "register_operand" "w") + (match_operand:VU 2 "vect_par_constant_low" ""))) + (SE:<V_unpack> (vec_select:<V_HALF> + (match_operand:VU 3 "register_operand" "w") + (match_dup 2)))))] + "TARGET_NEON" + "vmull.<US><V_sz_elem> %q0, %e1, %e3" + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_expand "vec_widen_<US>mult_lo_<mode>" + [(match_operand:<V_unpack> 0 "register_operand" "") + (SE:<V_unpack> (match_operand:VU 1 "register_operand" "")) + (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))] + "TARGET_NEON" + { + rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; + rtx t1; + int i; + for (i = 0; i < (<V_mode_nunits>/2) ; i++) + RTVEC_ELT (v, i) = GEN_INT (i); + t1 = gen_rtx_PARALLEL (<MODE>mode, v); + + emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0], + operands[1], + t1, + operands[2])); + DONE; + } +) + +(define_insn "neon_vec_<US>mult_hi_<mode>" + [(set (match_operand:<V_unpack> 0 "register_operand" "=w") + (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF> + (match_operand:VU 1 "register_operand" "w") + (match_operand:VU 2 "vect_par_constant_high" ""))) + (SE:<V_unpack> (vec_select:<V_HALF> + (match_operand:VU 3 "register_operand" "w") + (match_dup 2)))))] + "TARGET_NEON" + "vmull.<US><V_sz_elem> %q0, %f1, %f3" + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_expand "vec_widen_<US>mult_hi_<mode>" + [(match_operand:<V_unpack> 0 "register_operand" "") + (SE:<V_unpack> (match_operand:VU 1 "register_operand" "")) + (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))] + "TARGET_NEON" + { + rtvec v = rtvec_alloc (<V_mode_nunits>/2) ; + rtx t1; + int i; + for (i = 0; i < (<V_mode_nunits>/2) ; i++) + RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i); + t1 = gen_rtx_PARALLEL (<MODE>mode, v); + + emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0], + operands[1], + t1, + operands[2])); + DONE; + + } +) + +;; Vectorize for non-neon-quad case +(define_insn "neon_unpack<US>_<mode>" + [(set (match_operand:<V_widen> 0 "register_operand" "=w") + (SE:<V_widen> (match_operand:VDI 1 "register_operand" "")))] + "TARGET_NEON" + "vmovl.<US><V_sz_elem> %q0, %1" + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_expand "vec_unpack<US>_lo_<mode>" + [(match_operand:<V_double_width> 0 "register_operand" "") + (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))] + "TARGET_NEON" +{ + rtx tmpreg = gen_reg_rtx (<V_widen>mode); + emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1])); + emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg)); + + DONE; +} +) + +(define_expand "vec_unpack<US>_hi_<mode>" + [(match_operand:<V_double_width> 0 "register_operand" "") + (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))] + "TARGET_NEON" +{ + rtx tmpreg = gen_reg_rtx (<V_widen>mode); + emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1])); + emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg)); + + DONE; +} +) + +(define_insn "neon_vec_<US>mult_<mode>" + [(set (match_operand:<V_widen> 0 "register_operand" "=w") + (mult:<V_widen> (SE:<V_widen> + (match_operand:VDI 1 "register_operand" "w")) + (SE:<V_widen> + (match_operand:VDI 2 "register_operand" "w"))))] + "TARGET_NEON" + "vmull.<US><V_sz_elem> %q0, %1, %2" + [(set_attr "neon_type" "neon_shift_1")] +) + +(define_expand "vec_widen_<US>mult_hi_<mode>" + [(match_operand:<V_double_width> 0 "register_operand" "") + (SE:<V_double_width> (match_operand:VDI 1 "register_operand" "")) + (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))] + "TARGET_NEON" + { + rtx tmpreg = gen_reg_rtx (<V_widen>mode); + emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2])); + emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg)); + + DONE; + + } +) + +(define_expand "vec_widen_<US>mult_lo_<mode>" + [(match_operand:<V_double_width> 0 "register_operand" "") + (SE:<V_double_width> (match_operand:VDI 1 "register_operand" "")) + (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))] + "TARGET_NEON" + { + rtx tmpreg = gen_reg_rtx (<V_widen>mode); + emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2])); + emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg)); + + DONE; + + } +) diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md index da3b6dc..032b2ec 100644 --- a/gcc/config/arm/predicates.md +++ b/gcc/config/arm/predicates.md @@ -619,3 +619,61 @@ (and (match_test "TARGET_32BIT") (match_operand 0 "arm_di_operand")))) +;; Predicates for parallel expanders based on mode. +(define_special_predicate "vect_par_constant_high" + (match_code "parallel") +{ + HOST_WIDE_INT count = XVECLEN (op, 0); + int i; + int base = GET_MODE_NUNITS (mode); + + if ((count < 1) + || (count != base/2)) + return false; + + if (!VECTOR_MODE_P (mode)) + return false; + + for (i = 0; i < count; i++) + { + rtx elt = XVECEXP (op, 0, i); + int val; + + if (GET_CODE (elt) != CONST_INT) + return false; + + val = INTVAL (elt); + if (val != (base/2) + i) + return false; + } + return true; +}) + +(define_special_predicate "vect_par_constant_low" + (match_code "parallel") +{ + HOST_WIDE_INT count = XVECLEN (op, 0); + int i; + int base = GET_MODE_NUNITS (mode); + + if ((count < 1) + || (count != base/2)) + return false; + + if (!VECTOR_MODE_P (mode)) + return false; + + for (i = 0; i < count; i++) + { + rtx elt = XVECEXP (op, 0, i); + int val; + + if (GET_CODE (elt) != CONST_INT) + return false; + + val = INTVAL (elt); + if (val != i) + return false; + } + return true; +}) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 26e6df7..7a92568 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2010-08-25 Tejas Belagod <tejas.belagod@arm.com> + + * lib/target-supports.exp (check_effective_target_vect_unpack): + Set vect_unpack supported flag to true for neon. + 2010-08-24 Sebastian Pop <sebastian.pop@amd.com> * gcc.dg/tree-ssa/ifc-5.c: New. diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 1682d58..4b95323 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -2640,7 +2640,8 @@ proc check_effective_target_vect_unpack { } { if { ([istarget powerpc*-*-*] && ![istarget powerpc-*paired*]) || [istarget i?86-*-*] || [istarget x86_64-*-*] - || [istarget spu-*-*] } { + || [istarget spu-*-*] + || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } { set et_vect_unpack_saved 1 } } |