diff options
author | liuhongt <hongtao.liu@intel.com> | 2023-11-28 14:46:21 +0800 |
---|---|---|
committer | liuhongt <hongtao.liu@intel.com> | 2023-11-30 15:39:31 +0800 |
commit | a1a3939bea5b0d9cbd3465d96e7e4a5222ae6c48 (patch) | |
tree | 2d54e8442de5de3192c3576f2faa6de058add5db | |
parent | a0905fd7405cd7a1b60c52242a218a52f392b49d (diff) | |
download | gcc-a1a3939bea5b0d9cbd3465d96e7e4a5222ae6c48.zip gcc-a1a3939bea5b0d9cbd3465d96e7e4a5222ae6c48.tar.gz gcc-a1a3939bea5b0d9cbd3465d96e7e4a5222ae6c48.tar.bz2 |
Use vec_extact_lo instead of subreg in reduc_<code>_scal_m.
Loop vectorizer will use vec_perm to select lower part of a vector,
there could be some redundancy when using subreg in
reduc_<code>_scal_m, because rtl cse can't figure out vec_select lower
part is just subreg.
I'm trying to canonicalize vec_select to subreg like aarch64 did, but
there're so many regressions, some are easy to fix, some requires
middle-end adjustment.
So for simplicity, the patch use vec_select instead of subreg in
reduc_<code>_scal_m.
gcc/ChangeLog:
* config/i386/sse.md: (reduc_plus_scal_<mode>): Use
vec_extract_lo instead of subreg.
(reduc_<code>_scal_<mode>): Ditto.
(reduc_<code>_scal_<mode>): Ditto.
(reduc_<code>_scal_<mode>): Ditto.
(reduc_<code>_scal_<mode>): Ditto.
-rw-r--r-- | gcc/config/i386/sse.md | 47 |
1 files changed, 26 insertions, 21 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 4f51169..5e0e0e9 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -3480,11 +3480,12 @@ "" { rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode); - emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1])); rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode); - rtx tmp3 = gen_lowpart (<ssehalfvecmode>mode, operands[1]); - emit_insn (gen_add<ssehalfvecmodelower>3 (tmp2, tmp, tmp3)); - emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp2)); + rtx tmp3 = gen_reg_rtx (<ssehalfvecmode>mode); + emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1])); + emit_insn (gen_vec_extract_lo_<mode> (tmp2, operands[1])); + emit_insn (gen_add<ssehalfvecmodelower>3 (tmp3, tmp, tmp2)); + emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp3)); DONE; }) @@ -3528,11 +3529,12 @@ "" { rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode); - emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1])); rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode); - emit_insn (gen_<code><ssehalfvecmodelower>3 - (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1]))); - emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2)); + rtx tmp3 = gen_reg_rtx (<ssehalfvecmode>mode); + emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1])); + emit_insn (gen_vec_extract_lo_<mode> (tmp2, operands[1])); + emit_insn (gen_<code><ssehalfvecmodelower>3 (tmp3, tmp, tmp2)); + emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp3)); DONE; }) @@ -3543,11 +3545,12 @@ "TARGET_AVX512F" { rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode); - emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1])); rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode); - emit_insn (gen_<code><ssehalfvecmodelower>3 - (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1]))); - emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2)); + rtx tmp3 = gen_reg_rtx (<ssehalfvecmode>mode); + emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1])); + emit_insn (gen_vec_extract_lo_<mode> (tmp2, operands[1])); + emit_insn (gen_<code><ssehalfvecmodelower>3 (tmp3, tmp, tmp2)); + emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp3)); DONE; }) @@ -3558,14 +3561,15 @@ "TARGET_AVX2" { rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode); - emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1])); rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode); - emit_insn (gen_<code><ssehalfvecmodelower>3 - (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1]))); rtx tmp3 = gen_reg_rtx (<ssehalfvecmode>mode); - ix86_expand_reduc (gen_<code><ssehalfvecmodelower>3, tmp3, tmp2); + emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1])); + emit_insn (gen_vec_extract_lo_<mode> (tmp2, operands[1])); + emit_insn (gen_<code><ssehalfvecmodelower>3 (tmp3, tmp, tmp2)); + rtx tmp4 = gen_reg_rtx (<ssehalfvecmode>mode); + ix86_expand_reduc (gen_<code><ssehalfvecmodelower>3, tmp4, tmp3); emit_insn (gen_vec_extract<ssehalfvecmodelower><ssescalarmodelower> - (operands[0], tmp3, const0_rtx)); + (operands[0], tmp4, const0_rtx)); DONE; }) @@ -3637,11 +3641,12 @@ "" { rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode); - emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1])); rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode); - rtx tmp3 = gen_lowpart (<ssehalfvecmode>mode, operands[1]); - emit_insn (gen_<code><ssehalfvecmodelower>3 (tmp2, tmp, tmp3)); - emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2)); + rtx tmp3 = gen_reg_rtx (<ssehalfvecmode>mode); + emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1])); + emit_insn (gen_vec_extract_lo_<mode> (tmp2, operands[1])); + emit_insn (gen_<code><ssehalfvecmodelower>3 (tmp3, tmp, tmp2)); + emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp3)); DONE; }) |