aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorliuhongt <hongtao.liu@intel.com>2023-11-28 14:46:21 +0800
committerliuhongt <hongtao.liu@intel.com>2023-11-30 15:39:31 +0800
commita1a3939bea5b0d9cbd3465d96e7e4a5222ae6c48 (patch)
tree2d54e8442de5de3192c3576f2faa6de058add5db
parenta0905fd7405cd7a1b60c52242a218a52f392b49d (diff)
downloadgcc-a1a3939bea5b0d9cbd3465d96e7e4a5222ae6c48.zip
gcc-a1a3939bea5b0d9cbd3465d96e7e4a5222ae6c48.tar.gz
gcc-a1a3939bea5b0d9cbd3465d96e7e4a5222ae6c48.tar.bz2
Use vec_extact_lo instead of subreg in reduc_<code>_scal_m.
Loop vectorizer will use vec_perm to select lower part of a vector, there could be some redundancy when using subreg in reduc_<code>_scal_m, because rtl cse can't figure out vec_select lower part is just subreg. I'm trying to canonicalize vec_select to subreg like aarch64 did, but there're so many regressions, some are easy to fix, some requires middle-end adjustment. So for simplicity, the patch use vec_select instead of subreg in reduc_<code>_scal_m. gcc/ChangeLog: * config/i386/sse.md: (reduc_plus_scal_<mode>): Use vec_extract_lo instead of subreg. (reduc_<code>_scal_<mode>): Ditto. (reduc_<code>_scal_<mode>): Ditto. (reduc_<code>_scal_<mode>): Ditto. (reduc_<code>_scal_<mode>): Ditto.
-rw-r--r--gcc/config/i386/sse.md47
1 files changed, 26 insertions, 21 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 4f51169..5e0e0e9 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -3480,11 +3480,12 @@
""
{
rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
- emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
- rtx tmp3 = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
- emit_insn (gen_add<ssehalfvecmodelower>3 (tmp2, tmp, tmp3));
- emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp2));
+ rtx tmp3 = gen_reg_rtx (<ssehalfvecmode>mode);
+ emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
+ emit_insn (gen_vec_extract_lo_<mode> (tmp2, operands[1]));
+ emit_insn (gen_add<ssehalfvecmodelower>3 (tmp3, tmp, tmp2));
+ emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp3));
DONE;
})
@@ -3528,11 +3529,12 @@
""
{
rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
- emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
- emit_insn (gen_<code><ssehalfvecmodelower>3
- (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
- emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
+ rtx tmp3 = gen_reg_rtx (<ssehalfvecmode>mode);
+ emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
+ emit_insn (gen_vec_extract_lo_<mode> (tmp2, operands[1]));
+ emit_insn (gen_<code><ssehalfvecmodelower>3 (tmp3, tmp, tmp2));
+ emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp3));
DONE;
})
@@ -3543,11 +3545,12 @@
"TARGET_AVX512F"
{
rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
- emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
- emit_insn (gen_<code><ssehalfvecmodelower>3
- (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
- emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
+ rtx tmp3 = gen_reg_rtx (<ssehalfvecmode>mode);
+ emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
+ emit_insn (gen_vec_extract_lo_<mode> (tmp2, operands[1]));
+ emit_insn (gen_<code><ssehalfvecmodelower>3 (tmp3, tmp, tmp2));
+ emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp3));
DONE;
})
@@ -3558,14 +3561,15 @@
"TARGET_AVX2"
{
rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
- emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
- emit_insn (gen_<code><ssehalfvecmodelower>3
- (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
rtx tmp3 = gen_reg_rtx (<ssehalfvecmode>mode);
- ix86_expand_reduc (gen_<code><ssehalfvecmodelower>3, tmp3, tmp2);
+ emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
+ emit_insn (gen_vec_extract_lo_<mode> (tmp2, operands[1]));
+ emit_insn (gen_<code><ssehalfvecmodelower>3 (tmp3, tmp, tmp2));
+ rtx tmp4 = gen_reg_rtx (<ssehalfvecmode>mode);
+ ix86_expand_reduc (gen_<code><ssehalfvecmodelower>3, tmp4, tmp3);
emit_insn (gen_vec_extract<ssehalfvecmodelower><ssescalarmodelower>
- (operands[0], tmp3, const0_rtx));
+ (operands[0], tmp4, const0_rtx));
DONE;
})
@@ -3637,11 +3641,12 @@
""
{
rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
- emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
- rtx tmp3 = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
- emit_insn (gen_<code><ssehalfvecmodelower>3 (tmp2, tmp, tmp3));
- emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
+ rtx tmp3 = gen_reg_rtx (<ssehalfvecmode>mode);
+ emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
+ emit_insn (gen_vec_extract_lo_<mode> (tmp2, operands[1]));
+ emit_insn (gen_<code><ssehalfvecmodelower>3 (tmp3, tmp, tmp2));
+ emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp3));
DONE;
})