diff options
author | Roger Sayle <roger@nextmovesoftware.com> | 2024-06-08 19:47:08 -0600 |
---|---|---|
committer | Jeff Law <jlaw@ventanamicro.com> | 2024-06-08 19:47:08 -0600 |
commit | 2277f987979445f4390a5c6e092d79e04814d641 (patch) | |
tree | ecc7e95e2f37a72fcbfdf86834671bdfc2c11181 /gcc | |
parent | e1a2423934404083f85cbbf932dd263c1bf1bbfb (diff) | |
download | gcc-2277f987979445f4390a5c6e092d79e04814d641.zip gcc-2277f987979445f4390a5c6e092d79e04814d641.tar.gz gcc-2277f987979445f4390a5c6e092d79e04814d641.tar.bz2 |
[middle-end PATCH] Prefer PLUS over IOR in RTL expansion of multi-word shifts/rotates.
This patch tweaks RTL expansion of multi-word shifts and rotates to use
PLUS rather than IOR for disjunctive operations. During expansion of
these operations, the middle-end creates RTL like (X<<C1) | (Y>>C2)
where the constants C1 and C2 guarantee that bits don't overlap.
Hence the IOR can be performed by any any_or_plus operation, such as
IOR, XOR or PLUS; for word-size operations where carry chains aren't
an issue these should all be equally fast (single-cycle) instructions.
The benefit of this change is that targets with shift-and-add insns,
like x86's lea, can benefit from the LSHIFT-ADD form.
An example of a backend that benefits is ARC, which is demonstrated
by these two simple functions:
unsigned long long foo(unsigned long long x) { return x<<2; }
which with -O2 is currently compiled to:
foo: lsr r2,r0,30
asl_s r1,r1,2
asl_s r0,r0,2
j_s.d [blink]
or_s r1,r1,r2
with this patch becomes:
foo: lsr r2,r0,30
add2 r1,r2,r1
j_s.d [blink]
asl_s r0,r0,2
unsigned long long bar(unsigned long long x) { return (x<<2)|(x>>62); }
which with -O2 is currently compiled to 6 insns + return:
bar: lsr r12,r0,30
asl_s r3,r1,2
asl_s r0,r0,2
lsr_s r1,r1,30
or_s r0,r0,r1
j_s.d [blink]
or r1,r12,r3
with this patch becomes 4 insns + return:
bar: lsr r3,r1,30
lsr r2,r0,30
add2 r1,r2,r1
j_s.d [blink]
add2 r0,r3,r0
This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures. Ok for mainline?
gcc/ChangeLog
* expmed.cc (expand_shift_1): Use add_optab instead of ior_optab
to generate PLUS instead or IOR when unioning disjoint bitfields.
* optabs.cc (expand_subword_shift): Likewise.
(expand_binop): Likewise for double-word rotate.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/expmed.cc | 12 | ||||
-rw-r--r-- | gcc/optabs.cc | 8 |
2 files changed, 11 insertions, 9 deletions
diff --git a/gcc/expmed.cc b/gcc/expmed.cc index 50d2276..9ba0169 100644 --- a/gcc/expmed.cc +++ b/gcc/expmed.cc @@ -2616,10 +2616,11 @@ expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted, else if (methods == OPTAB_LIB_WIDEN) { /* If we have been unable to open-code this by a rotation, - do it as the IOR of two shifts. I.e., to rotate A - by N bits, compute + do it as the IOR or PLUS of two shifts. I.e., to rotate + A by N bits, compute (A << N) | ((unsigned) A >> ((-N) & (C - 1))) - where C is the bitsize of A. + where C is the bitsize of A. If N cannot be zero, + use PLUS instead of IOR. It is theoretically possible that the target machine might not be able to perform either shift and hence we would @@ -2656,8 +2657,9 @@ expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted, temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR, mode, shifted, other_amount, subtarget, 1); - return expand_binop (mode, ior_optab, temp, temp1, target, - unsignedp, methods); + return expand_binop (mode, + CONST_INT_P (op1) ? add_optab : ior_optab, + temp, temp1, target, unsignedp, methods); } temp = expand_binop (mode, diff --git a/gcc/optabs.cc b/gcc/optabs.cc index e791388..78cd9ef 100644 --- a/gcc/optabs.cc +++ b/gcc/optabs.cc @@ -566,8 +566,8 @@ expand_subword_shift (scalar_int_mode op1_mode, optab binoptab, if (tmp == 0) return false; - /* Now OR in the bits carried over from OUTOF_INPUT. */ - if (!force_expand_binop (word_mode, ior_optab, tmp, carries, + /* Now OR/PLUS in the bits carried over from OUTOF_INPUT. */ + if (!force_expand_binop (word_mode, add_optab, tmp, carries, into_target, unsignedp, methods)) return false; } @@ -1937,7 +1937,7 @@ expand_binop (machine_mode mode, optab binoptab, rtx op0, rtx op1, NULL_RTX, unsignedp, next_methods); if (into_temp1 != 0 && into_temp2 != 0) - inter = expand_binop (word_mode, ior_optab, into_temp1, into_temp2, + inter = expand_binop (word_mode, add_optab, into_temp1, into_temp2, into_target, unsignedp, next_methods); else inter = 0; @@ -1953,7 +1953,7 @@ expand_binop (machine_mode mode, optab binoptab, rtx op0, rtx op1, NULL_RTX, unsignedp, next_methods); if (inter != 0 && outof_temp1 != 0 && outof_temp2 != 0) - inter = expand_binop (word_mode, ior_optab, + inter = expand_binop (word_mode, add_optab, outof_temp1, outof_temp2, outof_target, unsignedp, next_methods); |