aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJ"orn Rennecke <joern.rennecke@superh.com>2002-07-03 09:49:46 +0000
committerJoern Rennecke <amylaar@gcc.gnu.org>2002-07-03 10:49:46 +0100
commit34a80643d8c74d96786fa19eec8a39fc94ac10b4 (patch)
tree4e697d4885e1475c42eeedf76c38e9b4ccdff584
parent032b2b29901082316175de7c7edb51c927ab06aa (diff)
downloadgcc-34a80643d8c74d96786fa19eec8a39fc94ac10b4.zip
gcc-34a80643d8c74d96786fa19eec8a39fc94ac10b4.tar.gz
gcc-34a80643d8c74d96786fa19eec8a39fc94ac10b4.tar.bz2
optabs.c (expand_vector_binop): Don't store using a SUBREG smaller than UNITS_PER_WORD...
gcc: * optabs.c (expand_vector_binop): Don't store using a SUBREG smaller than UNITS_PER_WORD, unless this is little endian and the first unit in this word. Let extract_bit_field decide how to load an element. Force arguments to matching mode. (expand_vector_unop): Likewise. * simplify-rtx.c (simplify_subreg): Don't assume that all vectors consist of word_mode elements. * c-typeck.c (build_binary_op): Allow vector types for BIT_AND_EXPR, BIT_ANDTC_EXPR, BIT_IOR_EXPR and BIT_XOR_EXPR. (build_unary_op): Allow vector types for BIT_NOT_EPR. * emit-rtl.c (gen_lowpart_common): Use simplify_gen_subreg for CONST_VECTOR. * optabs.c (expand_vector_binop): Try to perform operation in smaller vector modes with same inner size. Add handling of AND, IOR and XOR. Reject expansion to inner-mode sized scalars when using OPTAB_DIRECT. Use simplify_gen_subreg on constants. (expand_vector_unop): Try to perform operation in smaller vector modes with same inner size. Add handling of one's complement. When there is no vector negate operation, try a vector subtract operation. Use simplify_gen_subreg on constants. * simplify-rtx.c (simplify_subreg): Add capability to convert vector constants into smaller vectors with same inner mode, and to integer CONST_DOUBLEs. gcc/testsuite: * gcc.c-torture/execute/simd-1.c (main): Also test &, |, ^, ~. * gcc.c-torture/execute/simd-2.c (main): Likewise. From-SVN: r55209
-rw-r--r--gcc/ChangeLog27
-rw-r--r--gcc/c-typeck.c13
-rw-r--r--gcc/emit-rtl.c2
-rw-r--r--gcc/optabs.c162
-rw-r--r--gcc/simplify-rtx.c56
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.c-torture/execute/simd-1.c19
-rw-r--r--gcc/testsuite/gcc.c-torture/execute/simd-2.c19
8 files changed, 259 insertions, 44 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 8aaa224..9218d3f 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,30 @@
+Wed Jul 3 10:24:16 2002 J"orn Rennecke <joern.rennecke@superh.com>
+
+ * optabs.c (expand_vector_binop): Don't store using a SUBREG smaller
+ than UNITS_PER_WORD, unless this is little endian and the first unit
+ in this word. Let extract_bit_field decide how to load an element.
+ Force arguments to matching mode.
+ (expand_vector_unop): Likewise.
+
+ * simplify-rtx.c (simplify_subreg): Don't assume that all vectors
+ consist of word_mode elements.
+ * c-typeck.c (build_binary_op): Allow vector types for BIT_AND_EXPR,
+ BIT_ANDTC_EXPR, BIT_IOR_EXPR and BIT_XOR_EXPR.
+ (build_unary_op): Allow vector types for BIT_NOT_EPR.
+ * emit-rtl.c (gen_lowpart_common): Use simplify_gen_subreg for
+ CONST_VECTOR.
+ * optabs.c (expand_vector_binop): Try to perform operation in
+ smaller vector modes with same inner size. Add handling of AND, IOR
+ and XOR. Reject expansion to inner-mode sized scalars when using
+ OPTAB_DIRECT. Use simplify_gen_subreg on constants.
+ (expand_vector_unop): Try to perform operation in smaller vector
+ modes with same inner size. Add handling of one's complement.
+ When there is no vector negate operation, try a vector subtract
+ operation. Use simplify_gen_subreg on constants.
+ * simplify-rtx.c (simplify_subreg): Add capability to convert vector
+ constants into smaller vectors with same inner mode, and to
+ integer CONST_DOUBLEs.
+
2002-07-02 Kaveh R. Ghazi <ghazi@caip.rutgers.edu>
* c-parse.in (parsing_iso_function_signature): New variable.
diff --git a/gcc/c-typeck.c b/gcc/c-typeck.c
index d26f877..0a70363 100644
--- a/gcc/c-typeck.c
+++ b/gcc/c-typeck.c
@@ -2071,6 +2071,8 @@ build_binary_op (code, orig_op0, orig_op1, convert_p)
case BIT_XOR_EXPR:
if (code0 == INTEGER_TYPE && code1 == INTEGER_TYPE)
shorten = -1;
+ else if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE)
+ common = 1;
break;
case TRUNC_MOD_EXPR:
@@ -2778,7 +2780,12 @@ build_unary_op (code, xarg, flag)
break;
case BIT_NOT_EXPR:
- if (typecode == COMPLEX_TYPE)
+ if (typecode == INTEGER_TYPE || typecode == VECTOR_TYPE)
+ {
+ if (!noconvert)
+ arg = default_conversion (arg);
+ }
+ else if (typecode == COMPLEX_TYPE)
{
code = CONJ_EXPR;
if (pedantic)
@@ -2786,13 +2793,11 @@ build_unary_op (code, xarg, flag)
if (!noconvert)
arg = default_conversion (arg);
}
- else if (typecode != INTEGER_TYPE)
+ else
{
error ("wrong type argument to bit-complement");
return error_mark_node;
}
- else if (!noconvert)
- arg = default_conversion (arg);
break;
case ABS_EXPR:
diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c
index 379de96..0ee4f11 100644
--- a/gcc/emit-rtl.c
+++ b/gcc/emit-rtl.c
@@ -991,7 +991,7 @@ gen_lowpart_common (mode, x)
return gen_rtx_fmt_e (GET_CODE (x), mode, XEXP (x, 0));
}
else if (GET_CODE (x) == SUBREG || GET_CODE (x) == REG
- || GET_CODE (x) == CONCAT)
+ || GET_CODE (x) == CONCAT || GET_CODE (x) == CONST_VECTOR)
return simplify_gen_subreg (mode, x, GET_MODE (x), offset);
/* If X is a CONST_INT or a CONST_DOUBLE, extract the appropriate bits
from the low-order part of the constant. */
diff --git a/gcc/optabs.c b/gcc/optabs.c
index d3568b2..c923061 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -1923,40 +1923,86 @@ expand_vector_binop (mode, binoptab, op0, op1, target, unsignedp, methods)
int unsignedp;
enum optab_methods methods;
{
- enum machine_mode submode;
- int elts, subsize, i;
+ enum machine_mode submode, tmode;
+ int size, elts, subsize, subbitsize, i;
rtx t, a, b, res, seq;
enum mode_class class;
class = GET_MODE_CLASS (mode);
+ size = GET_MODE_SIZE (mode);
submode = GET_MODE_INNER (mode);
- subsize = GET_MODE_UNIT_SIZE (mode);
- elts = GET_MODE_NUNITS (mode);
-
- if (!target)
- target = gen_reg_rtx (mode);
-
- start_sequence ();
- /* FIXME: Optimally, we should try to do this in narrower vector
- modes if available. E.g. When trying V8SI, try V4SI, else
- V2SI, else decay into SI. */
+ /* Search for the widest vector mode with the same inner mode that is
+ still narrower than MODE and that allows to open-code this operator.
+ Note, if we find such a mode and the handler later decides it can't
+ do the expansion, we'll be called recursively with the narrower mode. */
+ for (tmode = GET_CLASS_NARROWEST_MODE (class);
+ GET_MODE_SIZE (tmode) < GET_MODE_SIZE (mode);
+ tmode = GET_MODE_WIDER_MODE (tmode))
+ {
+ if (GET_MODE_INNER (tmode) == GET_MODE_INNER (mode)
+ && binoptab->handlers[(int) tmode].insn_code != CODE_FOR_nothing)
+ submode = tmode;
+ }
switch (binoptab->code)
{
+ case AND:
+ case IOR:
+ case XOR:
+ tmode = int_mode_for_mode (mode);
+ if (tmode != BLKmode)
+ submode = tmode;
case PLUS:
case MINUS:
case MULT:
case DIV:
+ subsize = GET_MODE_SIZE (submode);
+ subbitsize = GET_MODE_BITSIZE (submode);
+ elts = size / subsize;
+
+ /* If METHODS is OPTAB_DIRECT, we don't insist on the exact mode,
+ but that we operate on more than one element at a time. */
+ if (subsize == GET_MODE_UNIT_SIZE (mode) && methods == OPTAB_DIRECT)
+ return 0;
+
+ start_sequence ();
+
+ /* Errors can leave us with a const0_rtx as operand. */
+ if (GET_MODE (op0) != mode)
+ op0 = copy_to_mode_reg (mode, op0);
+ if (GET_MODE (op1) != mode)
+ op1 = copy_to_mode_reg (mode, op1);
+
+ if (!target)
+ target = gen_reg_rtx (mode);
+
for (i = 0; i < elts; ++i)
{
- t = simplify_gen_subreg (submode, target, mode,
- i * subsize);
- a = simplify_gen_subreg (submode, op0, mode,
- i * subsize);
- b = simplify_gen_subreg (submode, op1, mode,
- i * subsize);
+ /* If this is part of a register, and not the first item in the
+ word, we can't store using a SUBREG - that would clobber
+ previous results.
+ And storing with a SUBREG is only possible for the least
+ significant part, hence we can't do it for big endian
+ (unless we want to permute the evaluation order. */
+ if (GET_CODE (target) == REG
+ && (BYTES_BIG_ENDIAN
+ ? subsize < UNITS_PER_WORD
+ : ((i * subsize) % UNITS_PER_WORD) != 0))
+ t = NULL_RTX;
+ else
+ t = simplify_gen_subreg (submode, target, mode, i * subsize);
+ if (CONSTANT_P (op0))
+ a = simplify_gen_subreg (submode, op0, mode, i * subsize);
+ else
+ a = extract_bit_field (op0, subbitsize, i * subbitsize, unsignedp,
+ NULL_RTX, submode, submode, size);
+ if (CONSTANT_P (op1))
+ b = simplify_gen_subreg (submode, op1, mode, i * subsize);
+ else
+ b = extract_bit_field (op1, subbitsize, i * subbitsize, unsignedp,
+ NULL_RTX, submode, submode, size);
if (binoptab->code == DIV)
{
@@ -1974,7 +2020,11 @@ expand_vector_binop (mode, binoptab, op0, op1, target, unsignedp, methods)
if (res == 0)
break;
- emit_move_insn (t, res);
+ if (t)
+ emit_move_insn (t, res);
+ else
+ store_bit_field (target, subbitsize, i * subbitsize, submode, res,
+ size);
}
break;
@@ -1999,31 +2049,83 @@ expand_vector_unop (mode, unoptab, op0, target, unsignedp)
rtx target;
int unsignedp;
{
- enum machine_mode submode;
- int elts, subsize, i;
+ enum machine_mode submode, tmode;
+ int size, elts, subsize, subbitsize, i;
rtx t, a, res, seq;
+ size = GET_MODE_SIZE (mode);
submode = GET_MODE_INNER (mode);
- subsize = GET_MODE_UNIT_SIZE (mode);
- elts = GET_MODE_NUNITS (mode);
+
+ /* Search for the widest vector mode with the same inner mode that is
+ still narrower than MODE and that allows to open-code this operator.
+ Note, if we find such a mode and the handler later decides it can't
+ do the expansion, we'll be called recursively with the narrower mode. */
+ for (tmode = GET_CLASS_NARROWEST_MODE (GET_MODE_CLASS (mode));
+ GET_MODE_SIZE (tmode) < GET_MODE_SIZE (mode);
+ tmode = GET_MODE_WIDER_MODE (tmode))
+ {
+ if (GET_MODE_INNER (tmode) == GET_MODE_INNER (mode)
+ && unoptab->handlers[(int) tmode].insn_code != CODE_FOR_nothing)
+ submode = tmode;
+ }
+ /* If there is no negate operation, try doing a subtract from zero. */
+ if (unoptab == neg_optab && GET_MODE_CLASS (submode) == MODE_INT)
+ {
+ rtx temp;
+ temp = expand_binop (mode, sub_optab, CONST0_RTX (mode), op0,
+ target, unsignedp, OPTAB_DIRECT);
+ if (temp)
+ return temp;
+ }
+
+ if (unoptab == one_cmpl_optab)
+ {
+ tmode = int_mode_for_mode (mode);
+ if (tmode != BLKmode)
+ submode = tmode;
+ }
+
+ subsize = GET_MODE_SIZE (submode);
+ subbitsize = GET_MODE_BITSIZE (submode);
+ elts = size / subsize;
+
+ /* Errors can leave us with a const0_rtx as operand. */
+ if (GET_MODE (op0) != mode)
+ op0 = copy_to_mode_reg (mode, op0);
if (!target)
target = gen_reg_rtx (mode);
start_sequence ();
- /* FIXME: Optimally, we should try to do this in narrower vector
- modes if available. E.g. When trying V8SI, try V4SI, else
- V2SI, else decay into SI. */
-
for (i = 0; i < elts; ++i)
{
- t = simplify_gen_subreg (submode, target, mode, i * subsize);
- a = simplify_gen_subreg (submode, op0, mode, i * subsize);
+ /* If this is part of a register, and not the first item in the
+ word, we can't store using a SUBREG - that would clobber
+ previous results.
+ And storing with a SUBREG is only possible for the least
+ significant part, hence we can't do it for big endian
+ (unless we want to permute the evaluation order. */
+ if (GET_CODE (target) == REG
+ && (BYTES_BIG_ENDIAN
+ ? subsize < UNITS_PER_WORD
+ : ((i * subsize) % UNITS_PER_WORD) != 0))
+ t = NULL_RTX;
+ else
+ t = simplify_gen_subreg (submode, target, mode, i * subsize);
+ if (CONSTANT_P (op0))
+ a = simplify_gen_subreg (submode, op0, mode, i * subsize);
+ else
+ a = extract_bit_field (op0, subbitsize, i * subbitsize, unsignedp,
+ t, submode, submode, size);
res = expand_unop (submode, unoptab, a, t, unsignedp);
- emit_move_insn (t, res);
+ if (t)
+ emit_move_insn (t, res);
+ else
+ store_bit_field (target, subbitsize, i * subbitsize, submode, res,
+ size);
}
seq = get_insns ();
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index cdc6043..63961dd 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -2271,19 +2271,57 @@ simplify_subreg (outermode, op, innermode, byte)
/* Simplify subregs of vector constants. */
if (GET_CODE (op) == CONST_VECTOR)
{
- int offset = byte / UNITS_PER_WORD;
+ int elt_size = GET_MODE_SIZE (GET_MODE_INNER (innermode));
+ int offset = byte / elt_size;
rtx elt;
- /* This shouldn't happen, but let's not do anything stupid. */
- if (GET_MODE_INNER (innermode) != outermode)
- return NULL_RTX;
-
- elt = CONST_VECTOR_ELT (op, offset);
+ if (GET_MODE_INNER (innermode) == outermode)
+ {
+ elt = CONST_VECTOR_ELT (op, offset);
- /* ?? We probably don't need this copy_rtx because constants
- can be shared. ?? */
+ /* ?? We probably don't need this copy_rtx because constants
+ can be shared. ?? */
- return copy_rtx (elt);
+ return copy_rtx (elt);
+ }
+ else if (GET_MODE_INNER (innermode) == GET_MODE_INNER (outermode)
+ && GET_MODE_SIZE (innermode) > GET_MODE_SIZE (outermode))
+ {
+ return (gen_rtx_CONST_VECTOR
+ (outermode,
+ gen_rtvec_v (GET_MODE_NUNITS (outermode),
+ &CONST_VECTOR_ELT (op, offset))));
+ }
+ else if (GET_MODE_CLASS (outermode) == MODE_INT
+ && (GET_MODE_SIZE (outermode) % elt_size == 0))
+ {
+ /* This happens when the target register size is smaller then
+ the vector mode, and we synthesize operations with vectors
+ of elements that are smaller than the register size. */
+ HOST_WIDE_INT sum = 0, high = 0;
+ unsigned n_elts = (GET_MODE_SIZE (outermode) / elt_size);
+ unsigned i = BYTES_BIG_ENDIAN ? offset : offset + n_elts - 1;
+ unsigned step = BYTES_BIG_ENDIAN ? 1 : -1;
+ int shift = BITS_PER_UNIT * elt_size;
+
+ for (; n_elts--; i += step)
+ {
+ elt = CONST_VECTOR_ELT (op, i);
+ if (GET_CODE (elt) != CONST_INT)
+ return NULL_RTX;
+ high = high << shift | sum >> (HOST_BITS_PER_WIDE_INT - shift);
+ sum = (sum << shift) + INTVAL (elt);
+ }
+ if (GET_MODE_BITSIZE (outermode) <= HOST_BITS_PER_WIDE_INT)
+ return GEN_INT (trunc_int_for_mode (sum, outermode));
+ else if (GET_MODE_BITSIZE (outermode) == 2* HOST_BITS_PER_WIDE_INT)
+ return immed_double_const (high, sum, outermode);
+ else
+ return NULL_RTX;
+ }
+ else
+ /* This shouldn't happen, but let's not do anything stupid. */
+ return NULL_RTX;
}
/* Attempt to simplify constant to non-SUBREG expression. */
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 06b43e1..9daae1a 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+Wed Jul 3 10:25:41 2002 J"orn Rennecke <joern.rennecke@superh.com>
+
+ * gcc.c-torture/execute/simd-1.c (main): Also test &, |, ^, ~.
+ * gcc.c-torture/execute/simd-2.c (main): Likewise.
+
2002-07-02 Kaveh R. Ghazi <ghazi@caip.rutgers.edu>
* gcc.dg/cpp/tr-warn2.c: Use traditional C style function definitions.
diff --git a/gcc/testsuite/gcc.c-torture/execute/simd-1.c b/gcc/testsuite/gcc.c-torture/execute/simd-1.c
index cb503e4..a93a619 100644
--- a/gcc/testsuite/gcc.c-torture/execute/simd-1.c
+++ b/gcc/testsuite/gcc.c-torture/execute/simd-1.c
@@ -45,10 +45,29 @@ main ()
verify (res.i[0], res.i[1], res.i[2], res.i[3], 15, 7, 7, 6);
+ k = i & j;
+ res.v = k;
+
+ verify (res.i[0], res.i[1], res.i[2], res.i[3], 2, 4, 20, 8);
+
+ k = i | j;
+ res.v = k;
+
+ verify (res.i[0], res.i[1], res.i[2], res.i[3], 158, 109, 150, 222);
+
+ k = i ^ j;
+ res.v = k;
+
+ verify (res.i[0], res.i[1], res.i[2], res.i[3], 156, 105, 130, 214);
+
k = -i;
res.v = k;
verify (res.i[0], res.i[1], res.i[2], res.i[3],
-150, -100, -150, -200);
+ k = ~i;
+ res.v = k;
+ verify (res.i[0], res.i[1], res.i[2], res.i[3], -151, -101, -151, -201);
+
exit (0);
}
diff --git a/gcc/testsuite/gcc.c-torture/execute/simd-2.c b/gcc/testsuite/gcc.c-torture/execute/simd-2.c
index a49d9da..2d1b922 100644
--- a/gcc/testsuite/gcc.c-torture/execute/simd-2.c
+++ b/gcc/testsuite/gcc.c-torture/execute/simd-2.c
@@ -44,10 +44,29 @@ main ()
verify (res.i[0], res.i[1], res.i[2], res.i[3], 15, 7, 7, 6);
+ k = i & j;
+ res.v = k;
+
+ verify (res.i[0], res.i[1], res.i[2], res.i[3], 2, 4, 20, 8);
+
+ k = i | j;
+ res.v = k;
+
+ verify (res.i[0], res.i[1], res.i[2], res.i[3], 158, 109, 150, 222);
+
+ k = i ^ j;
+ res.v = k;
+
+ verify (res.i[0], res.i[1], res.i[2], res.i[3], 156, 105, 130, 214);
+
k = -i;
res.v = k;
verify (res.i[0], res.i[1], res.i[2], res.i[3],
-150, -100, -150, -200);
+ k = ~i;
+ res.v = k;
+ verify (res.i[0], res.i[1], res.i[2], res.i[3], -151, -101, -151, -201);
+
exit (0);
}