aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorRoger Sayle <roger@nextmovesoftware.com>2024-08-25 09:14:34 -0600
committerRoger Sayle <roger@nextmovesoftware.com>2024-08-25 09:14:34 -0600
commit07d62a1711f3e3bbdd2146ab5914d3bc5e246509 (patch)
treef901f84e14c6e5577cc3fd22bd113e049af32c64 /gcc/config
parent70edccf88738ec204036e498a4a50c46e5e4f0c0 (diff)
downloadgcc-07d62a1711f3e3bbdd2146ab5914d3bc5e246509.zip
gcc-07d62a1711f3e3bbdd2146ab5914d3bc5e246509.tar.gz
gcc-07d62a1711f3e3bbdd2146ab5914d3bc5e246509.tar.bz2
i386: Update STV's gains for TImode arithmetic right shifts on AVX2.
This patch tweaks timode_scalar_chain::compute_convert_gain to better reflect the expansion of V1TImode arithmetic right shifts by the i386 backend. The comment "see ix86_expand_v1ti_ashiftrt" appears after "case ASHIFTRT" in compute_convert_gain, and the changes below attempt to better match the logic used there. The original motivating example is: __int128 m1; void foo() { m1 = (m1 << 8) >> 8; } which with -O2 -mavx2 we fail to convert to vector form due to the inappropriate cost of the arithmetic right shift. Instruction gain -16 for 7: {r103:TI=r101:TI>>0x8;clobber flags:CC;} Total gain: -3 Chain #1 conversion is not profitable This is reporting that the ASHIFTRT is four instructions worse using vectors than in scalar form, which is incorrect as the AVX2 expansion of this shift only requires three instructions (and the scalar form requires two). With more accurate costs in timode_scalar_chain::compute_convert_gain we now see (with -O2 -mavx2): Instruction gain -4 for 7: {r103:TI=r101:TI>>0x8;clobber flags:CC;} Total gain: 9 Converting chain #1... which results in: foo: vmovdqa m1(%rip), %xmm0 vpslldq $1, %xmm0, %xmm0 vpsrad $8, %xmm0, %xmm1 vpsrldq $1, %xmm0, %xmm0 vpblendd $7, %xmm0, %xmm1, %xmm0 vmovdqa %xmm0, m1(%rip) ret 2024-08-25 Roger Sayle <roger@nextmovesoftware.com> Uros Bizjak <ubizjak@gmail.com> gcc/ChangeLog * config/i386/i386-features.cc (compute_convert_gain) <case ASHIFTRT>: Update to match ix86_expand_v1ti_ashiftrt.
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/i386/i386-features.cc21
1 files changed, 13 insertions, 8 deletions
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index 7e80e7b..ca902ec 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -1650,23 +1650,28 @@ timode_scalar_chain::compute_convert_gain ()
else if (op1val == 64)
vcost = COSTS_N_INSNS (3);
else if (op1val == 96)
- vcost = COSTS_N_INSNS (4);
- else if (op1val >= 111)
vcost = COSTS_N_INSNS (3);
- else if (TARGET_AVX2 && op1val == 32)
+ else if (op1val >= 111)
vcost = COSTS_N_INSNS (3);
else if (TARGET_SSE4_1 && op1val == 32)
- vcost = COSTS_N_INSNS (4);
+ vcost = COSTS_N_INSNS (3);
+ else if (TARGET_SSE4_1
+ && (op1val == 8 || op1val == 16 || op1val == 24))
+ vcost = COSTS_N_INSNS (3);
else if (op1val >= 96)
- vcost = COSTS_N_INSNS (5);
+ vcost = COSTS_N_INSNS (4);
+ else if (TARGET_SSE4_1 && (op1val == 28 || op1val == 80))
+ vcost = COSTS_N_INSNS (4);
else if ((op1val & 7) == 0)
- vcost = COSTS_N_INSNS (6);
+ vcost = COSTS_N_INSNS (5);
else if (TARGET_AVX2 && op1val < 32)
vcost = COSTS_N_INSNS (6);
+ else if (TARGET_SSE4_1 && op1val < 15)
+ vcost = COSTS_N_INSNS (6);
else if (op1val == 1 || op1val >= 64)
- vcost = COSTS_N_INSNS (9);
+ vcost = COSTS_N_INSNS (8);
else
- vcost = COSTS_N_INSNS (10);
+ vcost = COSTS_N_INSNS (9);
}
igain = scost - vcost;
break;