aboutsummaryrefslogtreecommitdiff
path: root/gcc/expr.cc
diff options
context:
space:
mode:
authorTamar Christina <tamar.christina@arm.com>2022-11-14 15:43:48 +0000
committerTamar Christina <tamar.christina@arm.com>2022-11-14 17:41:32 +0000
commit8beff04a325ba3c3707d8a6dd954ec881193d655 (patch)
tree5500b57540f9a13b22f7dae5a4e9db85349049c0 /gcc/expr.cc
parentb2bb611d90d01f64a2456c29de2a2ca1211ac134 (diff)
downloadgcc-8beff04a325ba3c3707d8a6dd954ec881193d655.zip
gcc-8beff04a325ba3c3707d8a6dd954ec881193d655.tar.gz
gcc-8beff04a325ba3c3707d8a6dd954ec881193d655.tar.bz2
middle-end: Support not decomposing specific divisions during vectorization.
In plenty of image and video processing code it's common to modify pixel values by a widening operation and then scale them back into range by dividing by 255. e.g.: x = y / (2 ^ (bitsize (y)/2)-1 This patch adds a new target hook can_special_div_by_const, similar to can_vec_perm which can be called to check if a target will handle a particular division in a special way in the back-end. The vectorizer will then vectorize the division using the standard tree code and at expansion time the hook is called again to generate the code for the division. Alot of the changes in the patch are to pass down the tree operands in all paths that can lead to the divmod expansion so that the target hook always has the type of the expression you're expanding since the types can change the expansion. gcc/ChangeLog: * expmed.h (expand_divmod): Pass tree operands down in addition to RTX. * expmed.cc (expand_divmod): Likewise. * explow.cc (round_push, align_dynamic_address): Likewise. * expr.cc (force_operand, expand_expr_divmod): Likewise. * optabs.cc (expand_doubleword_mod, expand_doubleword_divmod): Likewise. * target.h: Include tree-core. * target.def (can_special_div_by_const): New. * targhooks.cc (default_can_special_div_by_const): New. * targhooks.h (default_can_special_div_by_const): New. * tree-vect-generic.cc (expand_vector_operation): Use it. * doc/tm.texi.in: Document it. * doc/tm.texi: Regenerate. * tree-vect-patterns.cc (vect_recog_divmod_pattern): Check for support. * tree-vect-stmts.cc (vectorizable_operation): Likewise. gcc/testsuite/ChangeLog: * gcc.dg/vect/vect-div-bitmask-1.c: New test. * gcc.dg/vect/vect-div-bitmask-2.c: New test. * gcc.dg/vect/vect-div-bitmask-3.c: New test. * gcc.dg/vect/vect-div-bitmask.h: New file.
Diffstat (limited to 'gcc/expr.cc')
-rw-r--r--gcc/expr.cc24
1 files changed, 14 insertions, 10 deletions
diff --git a/gcc/expr.cc b/gcc/expr.cc
index c6917fb..d940743 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -8204,16 +8204,17 @@ force_operand (rtx value, rtx target)
return expand_divmod (0,
FLOAT_MODE_P (GET_MODE (value))
? RDIV_EXPR : TRUNC_DIV_EXPR,
- GET_MODE (value), op1, op2, target, 0);
+ GET_MODE (value), NULL, NULL, op1, op2,
+ target, 0);
case MOD:
- return expand_divmod (1, TRUNC_MOD_EXPR, GET_MODE (value), op1, op2,
- target, 0);
+ return expand_divmod (1, TRUNC_MOD_EXPR, GET_MODE (value), NULL, NULL,
+ op1, op2, target, 0);
case UDIV:
- return expand_divmod (0, TRUNC_DIV_EXPR, GET_MODE (value), op1, op2,
- target, 1);
+ return expand_divmod (0, TRUNC_DIV_EXPR, GET_MODE (value), NULL, NULL,
+ op1, op2, target, 1);
case UMOD:
- return expand_divmod (1, TRUNC_MOD_EXPR, GET_MODE (value), op1, op2,
- target, 1);
+ return expand_divmod (1, TRUNC_MOD_EXPR, GET_MODE (value), NULL, NULL,
+ op1, op2, target, 1);
case ASHIFTRT:
return expand_simple_binop (GET_MODE (value), code, op1, op2,
target, 0, OPTAB_LIB_WIDEN);
@@ -9166,11 +9167,13 @@ expand_expr_divmod (tree_code code, machine_mode mode, tree treeop0,
bool speed_p = optimize_insn_for_speed_p ();
do_pending_stack_adjust ();
start_sequence ();
- rtx uns_ret = expand_divmod (mod_p, code, mode, op0, op1, target, 1);
+ rtx uns_ret = expand_divmod (mod_p, code, mode, treeop0, treeop1,
+ op0, op1, target, 1);
rtx_insn *uns_insns = get_insns ();
end_sequence ();
start_sequence ();
- rtx sgn_ret = expand_divmod (mod_p, code, mode, op0, op1, target, 0);
+ rtx sgn_ret = expand_divmod (mod_p, code, mode, treeop0, treeop1,
+ op0, op1, target, 0);
rtx_insn *sgn_insns = get_insns ();
end_sequence ();
unsigned uns_cost = seq_cost (uns_insns, speed_p);
@@ -9192,7 +9195,8 @@ expand_expr_divmod (tree_code code, machine_mode mode, tree treeop0,
emit_insn (sgn_insns);
return sgn_ret;
}
- return expand_divmod (mod_p, code, mode, op0, op1, target, unsignedp);
+ return expand_divmod (mod_p, code, mode, treeop0, treeop1,
+ op0, op1, target, unsignedp);
}
rtx