aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/arm
diff options
context:
space:
mode:
authorDaniel Jacobowitz <dan@codesourcery.com>2009-11-11 14:23:03 +0000
committerDaniel Jacobowitz <drow@gcc.gnu.org>2009-11-11 14:23:03 +0000
commit814a4c3b3548e6fae72992026982d3ce8a186568 (patch)
tree8b82758d87fb4dcc505191cc1b4a840c0e65c2cb /gcc/config/arm
parent40f73786e9ae1b4a4fafffaff58a54d79f4a5a1a (diff)
downloadgcc-814a4c3b3548e6fae72992026982d3ce8a186568.zip
gcc-814a4c3b3548e6fae72992026982d3ce8a186568.tar.gz
gcc-814a4c3b3548e6fae72992026982d3ce8a186568.tar.bz2
arm.c (neon_vdup_constant, [...]): New.
gcc/ * config/arm/arm.c (neon_vdup_constant, neon_make_constant): New. (neon_expand_vector_init): Use them. Also handle non-constant vectors with identical elements and vectors with only one non-constant element. (arm_print_operand): Handle 'y' modifier. * config/arm/arm-protos.h (neon_make_constant): Declare. * config/arm/neon.md (neon_vdup_n<mode>): Split into two patterns. Use VX instead of VDQW for the first one. Allow a VFP alternative and V32 modes for the second one. * config/arm/neon.ml (shape_elt): Add Alternatives. (ops): Use Alternatives for vdup lane instructions. * config/arm/neon-testgen.ml (analyze_shape): Handle Alternatives. * config/arm/vec-common.md (mov<mode>): Use neon_make_constant. gcc/testsuite/ * gcc.target/arm/neon: Regenerate generated tests. From-SVN: r154094
Diffstat (limited to 'gcc/config/arm')
-rw-r--r--gcc/config/arm/arm-protos.h1
-rw-r--r--gcc/config/arm/arm.c190
-rw-r--r--gcc/config/arm/neon-testgen.ml1
-rw-r--r--gcc/config/arm/neon.md19
-rw-r--r--gcc/config/arm/neon.ml21
-rw-r--r--gcc/config/arm/vec-common.md5
6 files changed, 220 insertions, 17 deletions
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index ed70926..4d6d51b 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -68,6 +68,7 @@ extern char *neon_output_logic_immediate (const char *, rtx *,
enum machine_mode, int, int);
extern void neon_pairwise_reduce (rtx, rtx, enum machine_mode,
rtx (*) (rtx, rtx, rtx));
+extern rtx neon_make_constant (rtx);
extern void neon_expand_vector_init (rtx, rtx);
extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
extern void neon_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index fc0c559..5fb2d17 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -8085,25 +8085,171 @@ neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
}
}
-/* Initialize a vector with non-constant elements. FIXME: We can do better
- than the current implementation (building a vector on the stack and then
- loading it) in many cases. See rs6000.c. */
+/* If VALS is a vector constant that can be loaded into a register
+ using VDUP, generate instructions to do so and return an RTX to
+ assign to the register. Otherwise return NULL_RTX. */
+
+static rtx
+neon_vdup_constant (rtx vals)
+{
+ enum machine_mode mode = GET_MODE (vals);
+ enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ int n_elts = GET_MODE_NUNITS (mode);
+ bool all_same = true;
+ rtx x;
+ int i;
+
+ if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
+ return NULL_RTX;
+
+ for (i = 0; i < n_elts; ++i)
+ {
+ x = XVECEXP (vals, 0, i);
+ if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+ all_same = false;
+ }
+
+ if (!all_same)
+ /* The elements are not all the same. We could handle repeating
+ patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
+ {0, C, 0, C, 0, C, 0, C} which can be loaded using
+ vdup.i16). */
+ return NULL_RTX;
+
+ /* We can load this constant by using VDUP and a constant in a
+ single ARM register. This will be cheaper than a vector
+ load. */
+
+ x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
+ return gen_rtx_UNSPEC (mode, gen_rtvec (1, x),
+ UNSPEC_VDUP_N);
+}
+
+/* Generate code to load VALS, which is a PARALLEL containing only
+ constants (for vec_init) or CONST_VECTOR, efficiently into a
+ register. Returns an RTX to copy into the register, or NULL_RTX
+ for a PARALLEL that can not be converted into a CONST_VECTOR. */
+
+rtx
+neon_make_constant (rtx vals)
+{
+ enum machine_mode mode = GET_MODE (vals);
+ rtx target;
+ rtx const_vec = NULL_RTX;
+ int n_elts = GET_MODE_NUNITS (mode);
+ int n_const = 0;
+ int i;
+
+ if (GET_CODE (vals) == CONST_VECTOR)
+ const_vec = vals;
+ else if (GET_CODE (vals) == PARALLEL)
+ {
+ /* A CONST_VECTOR must contain only CONST_INTs and
+ CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
+ Only store valid constants in a CONST_VECTOR. */
+ for (i = 0; i < n_elts; ++i)
+ {
+ rtx x = XVECEXP (vals, 0, i);
+ if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
+ n_const++;
+ }
+ if (n_const == n_elts)
+ const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
+ }
+ else
+ gcc_unreachable ();
+
+ if (const_vec != NULL
+ && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
+ /* Load using VMOV. On Cortex-A8 this takes one cycle. */
+ return const_vec;
+ else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
+ /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
+ pipeline cycle; creating the constant takes one or two ARM
+ pipeline cycles. */
+ return target;
+ else if (const_vec != NULL_RTX)
+ /* Load from constant pool. On Cortex-A8 this takes two cycles
+ (for either double or quad vectors). We can not take advantage
+ of single-cycle VLD1 because we need a PC-relative addressing
+ mode. */
+ return const_vec;
+ else
+ /* A PARALLEL containing something not valid inside CONST_VECTOR.
+ We can not construct an initializer. */
+ return NULL_RTX;
+}
+
+/* Initialize vector TARGET to VALS. */
void
neon_expand_vector_init (rtx target, rtx vals)
{
enum machine_mode mode = GET_MODE (target);
- enum machine_mode inner = GET_MODE_INNER (mode);
- unsigned int i, n_elts = GET_MODE_NUNITS (mode);
- rtx mem;
+ enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ int n_elts = GET_MODE_NUNITS (mode);
+ int n_var = 0, one_var = -1;
+ bool all_same = true;
+ rtx x, mem;
+ int i;
- gcc_assert (VECTOR_MODE_P (mode));
+ for (i = 0; i < n_elts; ++i)
+ {
+ x = XVECEXP (vals, 0, i);
+ if (!CONSTANT_P (x))
+ ++n_var, one_var = i;
+
+ if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+ all_same = false;
+ }
+ if (n_var == 0)
+ {
+ rtx constant = neon_make_constant (vals);
+ if (constant != NULL_RTX)
+ {
+ emit_move_insn (target, constant);
+ return;
+ }
+ }
+
+ /* Splat a single non-constant element if we can. */
+ if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
+ {
+ x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
+ emit_insn (gen_rtx_SET (VOIDmode, target,
+ gen_rtx_UNSPEC (mode, gen_rtvec (1, x),
+ UNSPEC_VDUP_N)));
+ return;
+ }
+
+ /* One field is non-constant. Load constant then overwrite varying
+ field. This is more efficient than using the stack. */
+ if (n_var == 1)
+ {
+ rtx copy = copy_rtx (vals);
+ rtvec ops;
+
+ /* Load constant part of vector, substitute neighboring value for
+ varying element. */
+ XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
+ neon_expand_vector_init (target, copy);
+
+ /* Insert variable. */
+ x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
+ ops = gen_rtvec (3, x, target, GEN_INT (one_var));
+ emit_insn (gen_rtx_SET (VOIDmode, target,
+ gen_rtx_UNSPEC (mode, ops, UNSPEC_VSET_LANE)));
+ return;
+ }
+
+ /* Construct the vector in memory one field at a time
+ and load the whole vector. */
mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
for (i = 0; i < n_elts; i++)
- emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
- XVECEXP (vals, 0, i));
-
+ emit_move_insn (adjust_address_nv (mem, inner_mode,
+ i * GET_MODE_SIZE (inner_mode)),
+ XVECEXP (vals, 0, i));
emit_move_insn (target, mem);
}
@@ -15253,6 +15399,30 @@ arm_print_operand (FILE *stream, rtx x, int code)
}
return;
+ /* Translate an S register number into a D register number and element index. */
+ case 'y':
+ {
+ int mode = GET_MODE (x);
+ int regno;
+
+ if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
+ {
+ output_operand_lossage ("invalid operand for code '%c'", code);
+ return;
+ }
+
+ regno = REGNO (x);
+ if (!VFP_REGNO_OK_FOR_SINGLE (regno))
+ {
+ output_operand_lossage ("invalid operand for code '%c'", code);
+ return;
+ }
+
+ regno = regno - FIRST_VFP_REGNUM;
+ fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
+ }
+ return;
+
/* Register specifier for vld1.16/vst1.16. Translate the S register
number into a D register number and element index. */
case 'z':
diff --git a/gcc/config/arm/neon-testgen.ml b/gcc/config/arm/neon-testgen.ml
index 00a8e53..f1c431a 100644
--- a/gcc/config/arm/neon-testgen.ml
+++ b/gcc/config/arm/neon-testgen.ml
@@ -175,6 +175,7 @@ let rec analyze_shape shape =
| Element_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]"
| Element_of_qreg -> (analyze_shape_elt Qreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]"
| All_elements_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\\\\\]"
+ | Alternatives (elts) -> "(" ^ (String.concat "|" (List.map analyze_shape_elt elts)) ^ ")"
in
match shape with
All (n, elt) -> commas analyze_shape_elt (n_things n elt) ""
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index ac1e9d4..43b3805 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -2687,9 +2687,9 @@
})
(define_insn "neon_vdup_n<mode>"
- [(set (match_operand:VDQW 0 "s_register_operand" "=w")
- (unspec:VDQW [(match_operand:<V_elem> 1 "s_register_operand" "r")]
- UNSPEC_VDUP_N))]
+ [(set (match_operand:VX 0 "s_register_operand" "=w")
+ (unspec:VX [(match_operand:<V_elem> 1 "s_register_operand" "r")]
+ UNSPEC_VDUP_N))]
"TARGET_NEON"
"vdup%?.<V_sz_elem>\t%<V_reg>0, %1"
;; Assume this schedules like vmov.
@@ -2697,6 +2697,19 @@
(set_attr "neon_type" "neon_bp_simple")]
)
+(define_insn "neon_vdup_n<mode>"
+ [(set (match_operand:V32 0 "s_register_operand" "=w,w")
+ (unspec:V32 [(match_operand:<V_elem> 1 "s_register_operand" "r,t")]
+ UNSPEC_VDUP_N))]
+ "TARGET_NEON"
+ "@
+ vdup%?.<V_sz_elem>\t%<V_reg>0, %1
+ vdup%?.<V_sz_elem>\t%<V_reg>0, %y1"
+ ;; Assume this schedules like vmov.
+ [(set_attr "predicable" "yes")
+ (set_attr "neon_type" "neon_bp_simple")]
+)
+
(define_insn "neon_vdup_ndi"
[(set (match_operand:DI 0 "s_register_operand" "=w")
(unspec:DI [(match_operand:DI 1 "s_register_operand" "r")]
diff --git a/gcc/config/arm/neon.ml b/gcc/config/arm/neon.ml
index 114097d..466b06b 100644
--- a/gcc/config/arm/neon.ml
+++ b/gcc/config/arm/neon.ml
@@ -68,6 +68,7 @@ type shape_elt = Dreg | Qreg | Corereg | Immed | VecArray of int * shape_elt
| Element_of_dreg (* Used for "lane" variants. *)
| Element_of_qreg (* Likewise. *)
| All_elements_of_dreg (* Used for "dup" variants. *)
+ | Alternatives of shape_elt list (* Used for multiple valid operands *)
type shape_form = All of int * shape_elt
| Long
@@ -1008,7 +1009,10 @@ let ops =
pf_su_8_64;
(* Set all lanes to the same value. *)
- Vdup_n, [],
+ Vdup_n,
+ [Disassembles_as [Use_operands [| Dreg;
+ Alternatives [ Corereg;
+ Element_of_dreg ] |]]],
Use_operands [| Dreg; Corereg |], "vdup_n", bits_1,
pf_su_8_32;
Vdup_n,
@@ -1016,7 +1020,10 @@ let ops =
Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
Use_operands [| Dreg; Corereg |], "vdup_n", notype_1,
[S64; U64];
- Vdup_n, [],
+ Vdup_n,
+ [Disassembles_as [Use_operands [| Qreg;
+ Alternatives [ Corereg;
+ Element_of_dreg ] |]]],
Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1,
pf_su_8_32;
Vdup_n,
@@ -1028,7 +1035,10 @@ let ops =
(* These are just aliases for the above. *)
Vmov_n,
- [Builtin_name "vdup_n"],
+ [Builtin_name "vdup_n";
+ Disassembles_as [Use_operands [| Dreg;
+ Alternatives [ Corereg;
+ Element_of_dreg ] |]]],
Use_operands [| Dreg; Corereg |],
"vmov_n", bits_1, pf_su_8_32;
Vmov_n,
@@ -1038,7 +1048,10 @@ let ops =
Use_operands [| Dreg; Corereg |],
"vmov_n", notype_1, [S64; U64];
Vmov_n,
- [Builtin_name "vdupQ_n"],
+ [Builtin_name "vdupQ_n";
+ Disassembles_as [Use_operands [| Qreg;
+ Alternatives [ Corereg;
+ Element_of_dreg ] |]]],
Use_operands [| Qreg; Corereg |],
"vmovQ_n", bits_1, pf_su_8_32;
Vmov_n,
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index a01efb4..d33fdf9 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -42,6 +42,11 @@
{
if (GET_CODE (operands[0]) != REG)
operands[1] = force_reg (<MODE>mode, operands[1]);
+ else if (TARGET_NEON && CONSTANT_P (operands[1]))
+ {
+ operands[1] = neon_make_constant (operands[1]);
+ gcc_assert (operands[1] != NULL_RTX);
+ }
}
})