diff options
author | J"orn Rennecke <joern.rennecke@superh.com> | 2002-07-03 09:49:46 +0000 |
---|---|---|
committer | Joern Rennecke <amylaar@gcc.gnu.org> | 2002-07-03 10:49:46 +0100 |
commit | 34a80643d8c74d96786fa19eec8a39fc94ac10b4 (patch) | |
tree | 4e697d4885e1475c42eeedf76c38e9b4ccdff584 | |
parent | 032b2b29901082316175de7c7edb51c927ab06aa (diff) | |
download | gcc-34a80643d8c74d96786fa19eec8a39fc94ac10b4.zip gcc-34a80643d8c74d96786fa19eec8a39fc94ac10b4.tar.gz gcc-34a80643d8c74d96786fa19eec8a39fc94ac10b4.tar.bz2 |
optabs.c (expand_vector_binop): Don't store using a SUBREG smaller than UNITS_PER_WORD...
gcc:
* optabs.c (expand_vector_binop): Don't store using a SUBREG smaller
than UNITS_PER_WORD, unless this is little endian and the first unit
in this word. Let extract_bit_field decide how to load an element.
Force arguments to matching mode.
(expand_vector_unop): Likewise.
* simplify-rtx.c (simplify_subreg): Don't assume that all vectors
consist of word_mode elements.
* c-typeck.c (build_binary_op): Allow vector types for BIT_AND_EXPR,
BIT_ANDTC_EXPR, BIT_IOR_EXPR and BIT_XOR_EXPR.
(build_unary_op): Allow vector types for BIT_NOT_EPR.
* emit-rtl.c (gen_lowpart_common): Use simplify_gen_subreg for
CONST_VECTOR.
* optabs.c (expand_vector_binop): Try to perform operation in
smaller vector modes with same inner size. Add handling of AND, IOR
and XOR. Reject expansion to inner-mode sized scalars when using
OPTAB_DIRECT. Use simplify_gen_subreg on constants.
(expand_vector_unop): Try to perform operation in smaller vector
modes with same inner size. Add handling of one's complement.
When there is no vector negate operation, try a vector subtract
operation. Use simplify_gen_subreg on constants.
* simplify-rtx.c (simplify_subreg): Add capability to convert vector
constants into smaller vectors with same inner mode, and to
integer CONST_DOUBLEs.
gcc/testsuite:
* gcc.c-torture/execute/simd-1.c (main): Also test &, |, ^, ~.
* gcc.c-torture/execute/simd-2.c (main): Likewise.
From-SVN: r55209
-rw-r--r-- | gcc/ChangeLog | 27 | ||||
-rw-r--r-- | gcc/c-typeck.c | 13 | ||||
-rw-r--r-- | gcc/emit-rtl.c | 2 | ||||
-rw-r--r-- | gcc/optabs.c | 162 | ||||
-rw-r--r-- | gcc/simplify-rtx.c | 56 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.c-torture/execute/simd-1.c | 19 | ||||
-rw-r--r-- | gcc/testsuite/gcc.c-torture/execute/simd-2.c | 19 |
8 files changed, 259 insertions, 44 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8aaa224..9218d3f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,30 @@ +Wed Jul 3 10:24:16 2002 J"orn Rennecke <joern.rennecke@superh.com> + + * optabs.c (expand_vector_binop): Don't store using a SUBREG smaller + than UNITS_PER_WORD, unless this is little endian and the first unit + in this word. Let extract_bit_field decide how to load an element. + Force arguments to matching mode. + (expand_vector_unop): Likewise. + + * simplify-rtx.c (simplify_subreg): Don't assume that all vectors + consist of word_mode elements. + * c-typeck.c (build_binary_op): Allow vector types for BIT_AND_EXPR, + BIT_ANDTC_EXPR, BIT_IOR_EXPR and BIT_XOR_EXPR. + (build_unary_op): Allow vector types for BIT_NOT_EPR. + * emit-rtl.c (gen_lowpart_common): Use simplify_gen_subreg for + CONST_VECTOR. + * optabs.c (expand_vector_binop): Try to perform operation in + smaller vector modes with same inner size. Add handling of AND, IOR + and XOR. Reject expansion to inner-mode sized scalars when using + OPTAB_DIRECT. Use simplify_gen_subreg on constants. + (expand_vector_unop): Try to perform operation in smaller vector + modes with same inner size. Add handling of one's complement. + When there is no vector negate operation, try a vector subtract + operation. Use simplify_gen_subreg on constants. + * simplify-rtx.c (simplify_subreg): Add capability to convert vector + constants into smaller vectors with same inner mode, and to + integer CONST_DOUBLEs. + 2002-07-02 Kaveh R. Ghazi <ghazi@caip.rutgers.edu> * c-parse.in (parsing_iso_function_signature): New variable. diff --git a/gcc/c-typeck.c b/gcc/c-typeck.c index d26f877..0a70363 100644 --- a/gcc/c-typeck.c +++ b/gcc/c-typeck.c @@ -2071,6 +2071,8 @@ build_binary_op (code, orig_op0, orig_op1, convert_p) case BIT_XOR_EXPR: if (code0 == INTEGER_TYPE && code1 == INTEGER_TYPE) shorten = -1; + else if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE) + common = 1; break; case TRUNC_MOD_EXPR: @@ -2778,7 +2780,12 @@ build_unary_op (code, xarg, flag) break; case BIT_NOT_EXPR: - if (typecode == COMPLEX_TYPE) + if (typecode == INTEGER_TYPE || typecode == VECTOR_TYPE) + { + if (!noconvert) + arg = default_conversion (arg); + } + else if (typecode == COMPLEX_TYPE) { code = CONJ_EXPR; if (pedantic) @@ -2786,13 +2793,11 @@ build_unary_op (code, xarg, flag) if (!noconvert) arg = default_conversion (arg); } - else if (typecode != INTEGER_TYPE) + else { error ("wrong type argument to bit-complement"); return error_mark_node; } - else if (!noconvert) - arg = default_conversion (arg); break; case ABS_EXPR: diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c index 379de96..0ee4f11 100644 --- a/gcc/emit-rtl.c +++ b/gcc/emit-rtl.c @@ -991,7 +991,7 @@ gen_lowpart_common (mode, x) return gen_rtx_fmt_e (GET_CODE (x), mode, XEXP (x, 0)); } else if (GET_CODE (x) == SUBREG || GET_CODE (x) == REG - || GET_CODE (x) == CONCAT) + || GET_CODE (x) == CONCAT || GET_CODE (x) == CONST_VECTOR) return simplify_gen_subreg (mode, x, GET_MODE (x), offset); /* If X is a CONST_INT or a CONST_DOUBLE, extract the appropriate bits from the low-order part of the constant. */ diff --git a/gcc/optabs.c b/gcc/optabs.c index d3568b2..c923061 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -1923,40 +1923,86 @@ expand_vector_binop (mode, binoptab, op0, op1, target, unsignedp, methods) int unsignedp; enum optab_methods methods; { - enum machine_mode submode; - int elts, subsize, i; + enum machine_mode submode, tmode; + int size, elts, subsize, subbitsize, i; rtx t, a, b, res, seq; enum mode_class class; class = GET_MODE_CLASS (mode); + size = GET_MODE_SIZE (mode); submode = GET_MODE_INNER (mode); - subsize = GET_MODE_UNIT_SIZE (mode); - elts = GET_MODE_NUNITS (mode); - - if (!target) - target = gen_reg_rtx (mode); - - start_sequence (); - /* FIXME: Optimally, we should try to do this in narrower vector - modes if available. E.g. When trying V8SI, try V4SI, else - V2SI, else decay into SI. */ + /* Search for the widest vector mode with the same inner mode that is + still narrower than MODE and that allows to open-code this operator. + Note, if we find such a mode and the handler later decides it can't + do the expansion, we'll be called recursively with the narrower mode. */ + for (tmode = GET_CLASS_NARROWEST_MODE (class); + GET_MODE_SIZE (tmode) < GET_MODE_SIZE (mode); + tmode = GET_MODE_WIDER_MODE (tmode)) + { + if (GET_MODE_INNER (tmode) == GET_MODE_INNER (mode) + && binoptab->handlers[(int) tmode].insn_code != CODE_FOR_nothing) + submode = tmode; + } switch (binoptab->code) { + case AND: + case IOR: + case XOR: + tmode = int_mode_for_mode (mode); + if (tmode != BLKmode) + submode = tmode; case PLUS: case MINUS: case MULT: case DIV: + subsize = GET_MODE_SIZE (submode); + subbitsize = GET_MODE_BITSIZE (submode); + elts = size / subsize; + + /* If METHODS is OPTAB_DIRECT, we don't insist on the exact mode, + but that we operate on more than one element at a time. */ + if (subsize == GET_MODE_UNIT_SIZE (mode) && methods == OPTAB_DIRECT) + return 0; + + start_sequence (); + + /* Errors can leave us with a const0_rtx as operand. */ + if (GET_MODE (op0) != mode) + op0 = copy_to_mode_reg (mode, op0); + if (GET_MODE (op1) != mode) + op1 = copy_to_mode_reg (mode, op1); + + if (!target) + target = gen_reg_rtx (mode); + for (i = 0; i < elts; ++i) { - t = simplify_gen_subreg (submode, target, mode, - i * subsize); - a = simplify_gen_subreg (submode, op0, mode, - i * subsize); - b = simplify_gen_subreg (submode, op1, mode, - i * subsize); + /* If this is part of a register, and not the first item in the + word, we can't store using a SUBREG - that would clobber + previous results. + And storing with a SUBREG is only possible for the least + significant part, hence we can't do it for big endian + (unless we want to permute the evaluation order. */ + if (GET_CODE (target) == REG + && (BYTES_BIG_ENDIAN + ? subsize < UNITS_PER_WORD + : ((i * subsize) % UNITS_PER_WORD) != 0)) + t = NULL_RTX; + else + t = simplify_gen_subreg (submode, target, mode, i * subsize); + if (CONSTANT_P (op0)) + a = simplify_gen_subreg (submode, op0, mode, i * subsize); + else + a = extract_bit_field (op0, subbitsize, i * subbitsize, unsignedp, + NULL_RTX, submode, submode, size); + if (CONSTANT_P (op1)) + b = simplify_gen_subreg (submode, op1, mode, i * subsize); + else + b = extract_bit_field (op1, subbitsize, i * subbitsize, unsignedp, + NULL_RTX, submode, submode, size); if (binoptab->code == DIV) { @@ -1974,7 +2020,11 @@ expand_vector_binop (mode, binoptab, op0, op1, target, unsignedp, methods) if (res == 0) break; - emit_move_insn (t, res); + if (t) + emit_move_insn (t, res); + else + store_bit_field (target, subbitsize, i * subbitsize, submode, res, + size); } break; @@ -1999,31 +2049,83 @@ expand_vector_unop (mode, unoptab, op0, target, unsignedp) rtx target; int unsignedp; { - enum machine_mode submode; - int elts, subsize, i; + enum machine_mode submode, tmode; + int size, elts, subsize, subbitsize, i; rtx t, a, res, seq; + size = GET_MODE_SIZE (mode); submode = GET_MODE_INNER (mode); - subsize = GET_MODE_UNIT_SIZE (mode); - elts = GET_MODE_NUNITS (mode); + + /* Search for the widest vector mode with the same inner mode that is + still narrower than MODE and that allows to open-code this operator. + Note, if we find such a mode and the handler later decides it can't + do the expansion, we'll be called recursively with the narrower mode. */ + for (tmode = GET_CLASS_NARROWEST_MODE (GET_MODE_CLASS (mode)); + GET_MODE_SIZE (tmode) < GET_MODE_SIZE (mode); + tmode = GET_MODE_WIDER_MODE (tmode)) + { + if (GET_MODE_INNER (tmode) == GET_MODE_INNER (mode) + && unoptab->handlers[(int) tmode].insn_code != CODE_FOR_nothing) + submode = tmode; + } + /* If there is no negate operation, try doing a subtract from zero. */ + if (unoptab == neg_optab && GET_MODE_CLASS (submode) == MODE_INT) + { + rtx temp; + temp = expand_binop (mode, sub_optab, CONST0_RTX (mode), op0, + target, unsignedp, OPTAB_DIRECT); + if (temp) + return temp; + } + + if (unoptab == one_cmpl_optab) + { + tmode = int_mode_for_mode (mode); + if (tmode != BLKmode) + submode = tmode; + } + + subsize = GET_MODE_SIZE (submode); + subbitsize = GET_MODE_BITSIZE (submode); + elts = size / subsize; + + /* Errors can leave us with a const0_rtx as operand. */ + if (GET_MODE (op0) != mode) + op0 = copy_to_mode_reg (mode, op0); if (!target) target = gen_reg_rtx (mode); start_sequence (); - /* FIXME: Optimally, we should try to do this in narrower vector - modes if available. E.g. When trying V8SI, try V4SI, else - V2SI, else decay into SI. */ - for (i = 0; i < elts; ++i) { - t = simplify_gen_subreg (submode, target, mode, i * subsize); - a = simplify_gen_subreg (submode, op0, mode, i * subsize); + /* If this is part of a register, and not the first item in the + word, we can't store using a SUBREG - that would clobber + previous results. + And storing with a SUBREG is only possible for the least + significant part, hence we can't do it for big endian + (unless we want to permute the evaluation order. */ + if (GET_CODE (target) == REG + && (BYTES_BIG_ENDIAN + ? subsize < UNITS_PER_WORD + : ((i * subsize) % UNITS_PER_WORD) != 0)) + t = NULL_RTX; + else + t = simplify_gen_subreg (submode, target, mode, i * subsize); + if (CONSTANT_P (op0)) + a = simplify_gen_subreg (submode, op0, mode, i * subsize); + else + a = extract_bit_field (op0, subbitsize, i * subbitsize, unsignedp, + t, submode, submode, size); res = expand_unop (submode, unoptab, a, t, unsignedp); - emit_move_insn (t, res); + if (t) + emit_move_insn (t, res); + else + store_bit_field (target, subbitsize, i * subbitsize, submode, res, + size); } seq = get_insns (); diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index cdc6043..63961dd 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -2271,19 +2271,57 @@ simplify_subreg (outermode, op, innermode, byte) /* Simplify subregs of vector constants. */ if (GET_CODE (op) == CONST_VECTOR) { - int offset = byte / UNITS_PER_WORD; + int elt_size = GET_MODE_SIZE (GET_MODE_INNER (innermode)); + int offset = byte / elt_size; rtx elt; - /* This shouldn't happen, but let's not do anything stupid. */ - if (GET_MODE_INNER (innermode) != outermode) - return NULL_RTX; - - elt = CONST_VECTOR_ELT (op, offset); + if (GET_MODE_INNER (innermode) == outermode) + { + elt = CONST_VECTOR_ELT (op, offset); - /* ?? We probably don't need this copy_rtx because constants - can be shared. ?? */ + /* ?? We probably don't need this copy_rtx because constants + can be shared. ?? */ - return copy_rtx (elt); + return copy_rtx (elt); + } + else if (GET_MODE_INNER (innermode) == GET_MODE_INNER (outermode) + && GET_MODE_SIZE (innermode) > GET_MODE_SIZE (outermode)) + { + return (gen_rtx_CONST_VECTOR + (outermode, + gen_rtvec_v (GET_MODE_NUNITS (outermode), + &CONST_VECTOR_ELT (op, offset)))); + } + else if (GET_MODE_CLASS (outermode) == MODE_INT + && (GET_MODE_SIZE (outermode) % elt_size == 0)) + { + /* This happens when the target register size is smaller then + the vector mode, and we synthesize operations with vectors + of elements that are smaller than the register size. */ + HOST_WIDE_INT sum = 0, high = 0; + unsigned n_elts = (GET_MODE_SIZE (outermode) / elt_size); + unsigned i = BYTES_BIG_ENDIAN ? offset : offset + n_elts - 1; + unsigned step = BYTES_BIG_ENDIAN ? 1 : -1; + int shift = BITS_PER_UNIT * elt_size; + + for (; n_elts--; i += step) + { + elt = CONST_VECTOR_ELT (op, i); + if (GET_CODE (elt) != CONST_INT) + return NULL_RTX; + high = high << shift | sum >> (HOST_BITS_PER_WIDE_INT - shift); + sum = (sum << shift) + INTVAL (elt); + } + if (GET_MODE_BITSIZE (outermode) <= HOST_BITS_PER_WIDE_INT) + return GEN_INT (trunc_int_for_mode (sum, outermode)); + else if (GET_MODE_BITSIZE (outermode) == 2* HOST_BITS_PER_WIDE_INT) + return immed_double_const (high, sum, outermode); + else + return NULL_RTX; + } + else + /* This shouldn't happen, but let's not do anything stupid. */ + return NULL_RTX; } /* Attempt to simplify constant to non-SUBREG expression. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 06b43e1..9daae1a 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +Wed Jul 3 10:25:41 2002 J"orn Rennecke <joern.rennecke@superh.com> + + * gcc.c-torture/execute/simd-1.c (main): Also test &, |, ^, ~. + * gcc.c-torture/execute/simd-2.c (main): Likewise. + 2002-07-02 Kaveh R. Ghazi <ghazi@caip.rutgers.edu> * gcc.dg/cpp/tr-warn2.c: Use traditional C style function definitions. diff --git a/gcc/testsuite/gcc.c-torture/execute/simd-1.c b/gcc/testsuite/gcc.c-torture/execute/simd-1.c index cb503e4..a93a619 100644 --- a/gcc/testsuite/gcc.c-torture/execute/simd-1.c +++ b/gcc/testsuite/gcc.c-torture/execute/simd-1.c @@ -45,10 +45,29 @@ main () verify (res.i[0], res.i[1], res.i[2], res.i[3], 15, 7, 7, 6); + k = i & j; + res.v = k; + + verify (res.i[0], res.i[1], res.i[2], res.i[3], 2, 4, 20, 8); + + k = i | j; + res.v = k; + + verify (res.i[0], res.i[1], res.i[2], res.i[3], 158, 109, 150, 222); + + k = i ^ j; + res.v = k; + + verify (res.i[0], res.i[1], res.i[2], res.i[3], 156, 105, 130, 214); + k = -i; res.v = k; verify (res.i[0], res.i[1], res.i[2], res.i[3], -150, -100, -150, -200); + k = ~i; + res.v = k; + verify (res.i[0], res.i[1], res.i[2], res.i[3], -151, -101, -151, -201); + exit (0); } diff --git a/gcc/testsuite/gcc.c-torture/execute/simd-2.c b/gcc/testsuite/gcc.c-torture/execute/simd-2.c index a49d9da..2d1b922 100644 --- a/gcc/testsuite/gcc.c-torture/execute/simd-2.c +++ b/gcc/testsuite/gcc.c-torture/execute/simd-2.c @@ -44,10 +44,29 @@ main () verify (res.i[0], res.i[1], res.i[2], res.i[3], 15, 7, 7, 6); + k = i & j; + res.v = k; + + verify (res.i[0], res.i[1], res.i[2], res.i[3], 2, 4, 20, 8); + + k = i | j; + res.v = k; + + verify (res.i[0], res.i[1], res.i[2], res.i[3], 158, 109, 150, 222); + + k = i ^ j; + res.v = k; + + verify (res.i[0], res.i[1], res.i[2], res.i[3], 156, 105, 130, 214); + k = -i; res.v = k; verify (res.i[0], res.i[1], res.i[2], res.i[3], -150, -100, -150, -200); + k = ~i; + res.v = k; + verify (res.i[0], res.i[1], res.i[2], res.i[3], -151, -101, -151, -201); + exit (0); } |