diff options
author | Daniel Jacobowitz <dan@codesourcery.com> | 2009-11-11 14:23:03 +0000 |
---|---|---|
committer | Daniel Jacobowitz <drow@gcc.gnu.org> | 2009-11-11 14:23:03 +0000 |
commit | 814a4c3b3548e6fae72992026982d3ce8a186568 (patch) | |
tree | 8b82758d87fb4dcc505191cc1b4a840c0e65c2cb /gcc/config/arm | |
parent | 40f73786e9ae1b4a4fafffaff58a54d79f4a5a1a (diff) | |
download | gcc-814a4c3b3548e6fae72992026982d3ce8a186568.zip gcc-814a4c3b3548e6fae72992026982d3ce8a186568.tar.gz gcc-814a4c3b3548e6fae72992026982d3ce8a186568.tar.bz2 |
arm.c (neon_vdup_constant, [...]): New.
gcc/
* config/arm/arm.c (neon_vdup_constant, neon_make_constant): New.
(neon_expand_vector_init): Use them. Also handle non-constant
vectors with identical elements and vectors with only one
non-constant element.
(arm_print_operand): Handle 'y' modifier.
* config/arm/arm-protos.h (neon_make_constant): Declare.
* config/arm/neon.md (neon_vdup_n<mode>): Split into two
patterns. Use VX instead of VDQW for the first one. Allow
a VFP alternative and V32 modes for the second one.
* config/arm/neon.ml (shape_elt): Add Alternatives.
(ops): Use Alternatives for vdup lane instructions.
* config/arm/neon-testgen.ml (analyze_shape): Handle Alternatives.
* config/arm/vec-common.md (mov<mode>): Use neon_make_constant.
gcc/testsuite/
* gcc.target/arm/neon: Regenerate generated tests.
From-SVN: r154094
Diffstat (limited to 'gcc/config/arm')
-rw-r--r-- | gcc/config/arm/arm-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/arm/arm.c | 190 | ||||
-rw-r--r-- | gcc/config/arm/neon-testgen.ml | 1 | ||||
-rw-r--r-- | gcc/config/arm/neon.md | 19 | ||||
-rw-r--r-- | gcc/config/arm/neon.ml | 21 | ||||
-rw-r--r-- | gcc/config/arm/vec-common.md | 5 |
6 files changed, 220 insertions, 17 deletions
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index ed70926..4d6d51b 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -68,6 +68,7 @@ extern char *neon_output_logic_immediate (const char *, rtx *, enum machine_mode, int, int); extern void neon_pairwise_reduce (rtx, rtx, enum machine_mode, rtx (*) (rtx, rtx, rtx)); +extern rtx neon_make_constant (rtx); extern void neon_expand_vector_init (rtx, rtx); extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT); extern void neon_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index fc0c559..5fb2d17 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -8085,25 +8085,171 @@ neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode, } } -/* Initialize a vector with non-constant elements. FIXME: We can do better - than the current implementation (building a vector on the stack and then - loading it) in many cases. See rs6000.c. */ +/* If VALS is a vector constant that can be loaded into a register + using VDUP, generate instructions to do so and return an RTX to + assign to the register. Otherwise return NULL_RTX. */ + +static rtx +neon_vdup_constant (rtx vals) +{ + enum machine_mode mode = GET_MODE (vals); + enum machine_mode inner_mode = GET_MODE_INNER (mode); + int n_elts = GET_MODE_NUNITS (mode); + bool all_same = true; + rtx x; + int i; + + if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4) + return NULL_RTX; + + for (i = 0; i < n_elts; ++i) + { + x = XVECEXP (vals, 0, i); + if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) + all_same = false; + } + + if (!all_same) + /* The elements are not all the same. We could handle repeating + patterns of a mode larger than INNER_MODE here (e.g. int8x8_t + {0, C, 0, C, 0, C, 0, C} which can be loaded using + vdup.i16). */ + return NULL_RTX; + + /* We can load this constant by using VDUP and a constant in a + single ARM register. This will be cheaper than a vector + load. */ + + x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); + return gen_rtx_UNSPEC (mode, gen_rtvec (1, x), + UNSPEC_VDUP_N); +} + +/* Generate code to load VALS, which is a PARALLEL containing only + constants (for vec_init) or CONST_VECTOR, efficiently into a + register. Returns an RTX to copy into the register, or NULL_RTX + for a PARALLEL that can not be converted into a CONST_VECTOR. */ + +rtx +neon_make_constant (rtx vals) +{ + enum machine_mode mode = GET_MODE (vals); + rtx target; + rtx const_vec = NULL_RTX; + int n_elts = GET_MODE_NUNITS (mode); + int n_const = 0; + int i; + + if (GET_CODE (vals) == CONST_VECTOR) + const_vec = vals; + else if (GET_CODE (vals) == PARALLEL) + { + /* A CONST_VECTOR must contain only CONST_INTs and + CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF). + Only store valid constants in a CONST_VECTOR. */ + for (i = 0; i < n_elts; ++i) + { + rtx x = XVECEXP (vals, 0, i); + if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE) + n_const++; + } + if (n_const == n_elts) + const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); + } + else + gcc_unreachable (); + + if (const_vec != NULL + && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL)) + /* Load using VMOV. On Cortex-A8 this takes one cycle. */ + return const_vec; + else if ((target = neon_vdup_constant (vals)) != NULL_RTX) + /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON + pipeline cycle; creating the constant takes one or two ARM + pipeline cycles. */ + return target; + else if (const_vec != NULL_RTX) + /* Load from constant pool. On Cortex-A8 this takes two cycles + (for either double or quad vectors). We can not take advantage + of single-cycle VLD1 because we need a PC-relative addressing + mode. */ + return const_vec; + else + /* A PARALLEL containing something not valid inside CONST_VECTOR. + We can not construct an initializer. */ + return NULL_RTX; +} + +/* Initialize vector TARGET to VALS. */ void neon_expand_vector_init (rtx target, rtx vals) { enum machine_mode mode = GET_MODE (target); - enum machine_mode inner = GET_MODE_INNER (mode); - unsigned int i, n_elts = GET_MODE_NUNITS (mode); - rtx mem; + enum machine_mode inner_mode = GET_MODE_INNER (mode); + int n_elts = GET_MODE_NUNITS (mode); + int n_var = 0, one_var = -1; + bool all_same = true; + rtx x, mem; + int i; - gcc_assert (VECTOR_MODE_P (mode)); + for (i = 0; i < n_elts; ++i) + { + x = XVECEXP (vals, 0, i); + if (!CONSTANT_P (x)) + ++n_var, one_var = i; + + if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) + all_same = false; + } + if (n_var == 0) + { + rtx constant = neon_make_constant (vals); + if (constant != NULL_RTX) + { + emit_move_insn (target, constant); + return; + } + } + + /* Splat a single non-constant element if we can. */ + if (all_same && GET_MODE_SIZE (inner_mode) <= 4) + { + x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); + emit_insn (gen_rtx_SET (VOIDmode, target, + gen_rtx_UNSPEC (mode, gen_rtvec (1, x), + UNSPEC_VDUP_N))); + return; + } + + /* One field is non-constant. Load constant then overwrite varying + field. This is more efficient than using the stack. */ + if (n_var == 1) + { + rtx copy = copy_rtx (vals); + rtvec ops; + + /* Load constant part of vector, substitute neighboring value for + varying element. */ + XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts); + neon_expand_vector_init (target, copy); + + /* Insert variable. */ + x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var)); + ops = gen_rtvec (3, x, target, GEN_INT (one_var)); + emit_insn (gen_rtx_SET (VOIDmode, target, + gen_rtx_UNSPEC (mode, ops, UNSPEC_VSET_LANE))); + return; + } + + /* Construct the vector in memory one field at a time + and load the whole vector. */ mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0); for (i = 0; i < n_elts; i++) - emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)), - XVECEXP (vals, 0, i)); - + emit_move_insn (adjust_address_nv (mem, inner_mode, + i * GET_MODE_SIZE (inner_mode)), + XVECEXP (vals, 0, i)); emit_move_insn (target, mem); } @@ -15253,6 +15399,30 @@ arm_print_operand (FILE *stream, rtx x, int code) } return; + /* Translate an S register number into a D register number and element index. */ + case 'y': + { + int mode = GET_MODE (x); + int regno; + + if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + regno = REGNO (x); + if (!VFP_REGNO_OK_FOR_SINGLE (regno)) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + regno = regno - FIRST_VFP_REGNUM; + fprintf (stream, "d%d[%d]", regno / 2, regno % 2); + } + return; + /* Register specifier for vld1.16/vst1.16. Translate the S register number into a D register number and element index. */ case 'z': diff --git a/gcc/config/arm/neon-testgen.ml b/gcc/config/arm/neon-testgen.ml index 00a8e53..f1c431a 100644 --- a/gcc/config/arm/neon-testgen.ml +++ b/gcc/config/arm/neon-testgen.ml @@ -175,6 +175,7 @@ let rec analyze_shape shape = | Element_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]" | Element_of_qreg -> (analyze_shape_elt Qreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]" | All_elements_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\\\\\]" + | Alternatives (elts) -> "(" ^ (String.concat "|" (List.map analyze_shape_elt elts)) ^ ")" in match shape with All (n, elt) -> commas analyze_shape_elt (n_things n elt) "" diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index ac1e9d4..43b3805 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -2687,9 +2687,9 @@ }) (define_insn "neon_vdup_n<mode>" - [(set (match_operand:VDQW 0 "s_register_operand" "=w") - (unspec:VDQW [(match_operand:<V_elem> 1 "s_register_operand" "r")] - UNSPEC_VDUP_N))] + [(set (match_operand:VX 0 "s_register_operand" "=w") + (unspec:VX [(match_operand:<V_elem> 1 "s_register_operand" "r")] + UNSPEC_VDUP_N))] "TARGET_NEON" "vdup%?.<V_sz_elem>\t%<V_reg>0, %1" ;; Assume this schedules like vmov. @@ -2697,6 +2697,19 @@ (set_attr "neon_type" "neon_bp_simple")] ) +(define_insn "neon_vdup_n<mode>" + [(set (match_operand:V32 0 "s_register_operand" "=w,w") + (unspec:V32 [(match_operand:<V_elem> 1 "s_register_operand" "r,t")] + UNSPEC_VDUP_N))] + "TARGET_NEON" + "@ + vdup%?.<V_sz_elem>\t%<V_reg>0, %1 + vdup%?.<V_sz_elem>\t%<V_reg>0, %y1" + ;; Assume this schedules like vmov. + [(set_attr "predicable" "yes") + (set_attr "neon_type" "neon_bp_simple")] +) + (define_insn "neon_vdup_ndi" [(set (match_operand:DI 0 "s_register_operand" "=w") (unspec:DI [(match_operand:DI 1 "s_register_operand" "r")] diff --git a/gcc/config/arm/neon.ml b/gcc/config/arm/neon.ml index 114097d..466b06b 100644 --- a/gcc/config/arm/neon.ml +++ b/gcc/config/arm/neon.ml @@ -68,6 +68,7 @@ type shape_elt = Dreg | Qreg | Corereg | Immed | VecArray of int * shape_elt | Element_of_dreg (* Used for "lane" variants. *) | Element_of_qreg (* Likewise. *) | All_elements_of_dreg (* Used for "dup" variants. *) + | Alternatives of shape_elt list (* Used for multiple valid operands *) type shape_form = All of int * shape_elt | Long @@ -1008,7 +1009,10 @@ let ops = pf_su_8_64; (* Set all lanes to the same value. *) - Vdup_n, [], + Vdup_n, + [Disassembles_as [Use_operands [| Dreg; + Alternatives [ Corereg; + Element_of_dreg ] |]]], Use_operands [| Dreg; Corereg |], "vdup_n", bits_1, pf_su_8_32; Vdup_n, @@ -1016,7 +1020,10 @@ let ops = Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]], Use_operands [| Dreg; Corereg |], "vdup_n", notype_1, [S64; U64]; - Vdup_n, [], + Vdup_n, + [Disassembles_as [Use_operands [| Qreg; + Alternatives [ Corereg; + Element_of_dreg ] |]]], Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1, pf_su_8_32; Vdup_n, @@ -1028,7 +1035,10 @@ let ops = (* These are just aliases for the above. *) Vmov_n, - [Builtin_name "vdup_n"], + [Builtin_name "vdup_n"; + Disassembles_as [Use_operands [| Dreg; + Alternatives [ Corereg; + Element_of_dreg ] |]]], Use_operands [| Dreg; Corereg |], "vmov_n", bits_1, pf_su_8_32; Vmov_n, @@ -1038,7 +1048,10 @@ let ops = Use_operands [| Dreg; Corereg |], "vmov_n", notype_1, [S64; U64]; Vmov_n, - [Builtin_name "vdupQ_n"], + [Builtin_name "vdupQ_n"; + Disassembles_as [Use_operands [| Qreg; + Alternatives [ Corereg; + Element_of_dreg ] |]]], Use_operands [| Qreg; Corereg |], "vmovQ_n", bits_1, pf_su_8_32; Vmov_n, diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md index a01efb4..d33fdf9 100644 --- a/gcc/config/arm/vec-common.md +++ b/gcc/config/arm/vec-common.md @@ -42,6 +42,11 @@ { if (GET_CODE (operands[0]) != REG) operands[1] = force_reg (<MODE>mode, operands[1]); + else if (TARGET_NEON && CONSTANT_P (operands[1])) + { + operands[1] = neon_make_constant (operands[1]); + gcc_assert (operands[1] != NULL_RTX); + } } }) |