diff options
author | Uros Bizjak <ubizjak@gmail.com> | 2021-11-29 22:16:12 +0100 |
---|---|---|
committer | Uros Bizjak <ubizjak@gmail.com> | 2021-11-29 22:17:20 +0100 |
commit | ca5667e867252db3c8642ee90f55427149cd92b6 (patch) | |
tree | 63b20ecc038f1f5c3ec9a21db9943143180d3eaf /gcc/config/i386 | |
parent | f81c5a86dc03352682ac8ae3daa6ec507528a265 (diff) | |
download | gcc-ca5667e867252db3c8642ee90f55427149cd92b6.zip gcc-ca5667e867252db3c8642ee90f55427149cd92b6.tar.gz gcc-ca5667e867252db3c8642ee90f55427149cd92b6.tar.bz2 |
i386: Fix and improve movhi_internal and movhf_internal some more.
An (*v,C) alternative can be added to movhi_internal to directly load
HImode constant 0 to xmm register. Also, V4SFmode moves can be used
for xmm->xmm moves instead of TImode moves when optimizing for size.
Fix invalid %vpinsrw insn template, which needs to duplicate %xmm
register for AVX targets.
Optimize GPR moves in movhf_internal in the same way as in movhi_internal.
Fix pinsrw and pextrw templates for AVX targets. Use sselog1
instead of sselog type. Also, handle TARGET_SSE_PARTIAL_REG_DEPENDENCY
and TARGET_SSE_SPLIT_REGS targets.
2021-11-29 Uroš Bizjak <ubizjak@gmail.com>
gcc/ChangeLog:
PR target/102811
* config/i386/i386.md (*movhi_internal): Introduce (*v,C) alternative.
Do not allocate non-GPR registers. Optimize xmm->xmm moves when
optimizing for size. Fix vpinsrw insn template.
(*movhf_internal): Fix pinsrw and pextrw insn templates for
AVX targets. Use sselog1 type instead of sselog. Optimize GPR moves.
Optimize xmm->xmm moves for TARGET_SSE_PARTIAL_REG_DEPENDENCY
and TARGET_SSE_SPLIT_REGS targets.
Diffstat (limited to 'gcc/config/i386')
-rw-r--r-- | gcc/config/i386/i386.md | 211 |
1 files changed, 128 insertions, 83 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index a384dae..c88374c 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2495,11 +2495,12 @@ (symbol_ref "true")))]) (define_insn "*movhi_internal" - [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,*k,*k ,*r,*m,*k,?r,?v,*v,*v,*m") - (match_operand:HI 1 "general_operand" "r ,rn,rm,rn,*r,*km,*k,*k,CBC,v, r, v, m, v"))] + [(set (match_operand:HI 0 "nonimmediate_operand" + "=r,r,r,m ,*k,*k ,r ,m ,*k ,?r,?*v,*v,*v,*v,m") + (match_operand:HI 1 "general_operand" + "r ,n,m,rn,r ,*km,*k,*k,CBC,*v,r ,C ,*v,m ,*v"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && ix86_hardreg_mov_ok (operands[0], operands[1])" - { switch (get_attr_type (insn)) { @@ -2526,10 +2527,13 @@ return ix86_output_ssemov (insn, operands); case TYPE_SSELOG1: + if (satisfies_constraint_C (operands[1])) + return standard_sse_constant_opcode (insn, operands); + if (SSE_REG_P (operands[0])) return MEM_P (operands[1]) - ? "%vpinsrw\t{$0, %1, %0|%0, %1, 0}" - : "%vpinsrw\t{$0, %k1, %0|%0, %k1, 0}"; + ? "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}" + : "%vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0}"; else return MEM_P (operands[0]) ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}" @@ -2550,23 +2554,25 @@ } } [(set (attr "isa") - (cond [(eq_attr "alternative" "9,10,11,12") + (cond [(eq_attr "alternative" "9,10,11,12,13") (const_string "sse2") - (eq_attr "alternative" "13") + (eq_attr "alternative" "14") (const_string "sse4") ] (const_string "*"))) (set (attr "type") - (cond [(eq_attr "alternative" "9,10,12,13") + (cond [(eq_attr "alternative" "4,5,6,7") + (const_string "mskmov") + (eq_attr "alternative" "8") + (const_string "msklog") + (eq_attr "alternative" "9,10,13,14") (if_then_else (match_test "TARGET_AVX512FP16") (const_string "ssemov") (const_string "sselog1")) - (eq_attr "alternative" "4,5,6,7") - (const_string "mskmov") (eq_attr "alternative" "11") + (const_string "sselog1") + (eq_attr "alternative" "12") (const_string "ssemov") - (eq_attr "alternative" "8") - (const_string "msklog") (match_test "optimize_function_for_size_p (cfun)") (const_string "imov") (and (eq_attr "alternative" "0") @@ -2581,33 +2587,54 @@ (const_string "imovx") ] (const_string "imov"))) - (set (attr "prefix") - (cond [(eq_attr "alternative" "9,10,11,12,13") - (const_string "maybe_evex") - (eq_attr "alternative" "4,5,6,7,8") - (const_string "vex") - ] - (const_string "orig"))) - (set (attr "mode") - (cond [(eq_attr "type" "imovx") - (const_string "SI") - (eq_attr "alternative" "9,10,12,13") - (if_then_else (match_test "TARGET_AVX512FP16") - (const_string "HI") - (const_string "TI")) - (eq_attr "alternative" "11") - (if_then_else (match_test "TARGET_AVX512FP16") - (const_string "HF") - (const_string "SF")) - (and (eq_attr "alternative" "1,2") - (match_operand:HI 1 "aligned_operand")) - (const_string "SI") - (and (eq_attr "alternative" "0") - (ior (not (match_test "TARGET_PARTIAL_REG_STALL")) - (not (match_test "TARGET_HIMODE_MATH")))) - (const_string "SI") + (set (attr "prefix") + (cond [(eq_attr "alternative" "4,5,6,7,8") + (const_string "vex") + (eq_attr "alternative" "9,10,11,12,13,14") + (const_string "maybe_evex") + ] + (const_string "orig"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "9,10,13,14") + (if_then_else (match_test "TARGET_AVX512FP16") + (const_string "HI") + (const_string "TI")) + (eq_attr "alternative" "11") + (cond [(match_test "TARGET_AVX") + (const_string "TI") + (ior (not (match_test "TARGET_SSE2")) + (match_test "optimize_function_for_size_p (cfun)")) + (const_string "V4SF") + ] + (const_string "TI")) + (eq_attr "alternative" "12") + (cond [(match_test "TARGET_AVX512FP16") + (const_string "HI") + (match_test "TARGET_AVX") + (const_string "TI") + (ior (not (match_test "TARGET_SSE2")) + (match_test "optimize_function_for_size_p (cfun)")) + (const_string "V4SF") + ] + (const_string "TI")) + (eq_attr "type" "imovx") + (const_string "SI") + (and (eq_attr "alternative" "1,2") + (match_operand:HI 1 "aligned_operand")) + (const_string "SI") + (and (eq_attr "alternative" "0") + (ior (not (match_test "TARGET_PARTIAL_REG_STALL")) + (not (match_test "TARGET_HIMODE_MATH")))) + (const_string "SI") ] - (const_string "HI")))]) + (const_string "HI"))) + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "9") + (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC") + (eq_attr "alternative" "10") + (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC") + ] + (symbol_ref "true")))]) ;; Situation is quite tricky about when to choose full sized (SImode) move ;; over QImode moves. For Q_REG -> Q_REG move we use full size only for @@ -3774,92 +3801,110 @@ (define_insn "*movhf_internal" [(set (match_operand:HF 0 "nonimmediate_operand" - "=?r,?m,v,v,?r,m,?v,v") + "=?r,?r,?r,?m,v,v,?r,m,?v,v") (match_operand:HF 1 "general_operand" - "rmF,rF,C,v, v,v, r,m"))] + "r ,F ,m ,rF,C,v, v,v,r ,m"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && (lra_in_progress || reload_completed || !CONST_DOUBLE_P (operands[1]) - || (TARGET_SSE && TARGET_SSE_MATH + || (TARGET_SSE2 && standard_sse_constant_p (operands[1], HFmode) == 1) || memory_operand (operands[0], HFmode))" { switch (get_attr_type (insn)) { - case TYPE_IMOV: - return "mov{w}\t{%1, %0|%0, %1}"; - - case TYPE_SSELOG1: - return standard_sse_constant_opcode (insn, operands); + case TYPE_IMOVX: + /* movzwl is faster than movw on p2 due to partial word stalls, + though not as fast as an aligned movl. */ + return "movz{wl|x}\t{%1, %k0|%k0, %1}"; case TYPE_SSEMOV: return ix86_output_ssemov (insn, operands); - case TYPE_SSELOG: + case TYPE_SSELOG1: + if (satisfies_constraint_C (operands[1])) + return standard_sse_constant_opcode (insn, operands); + if (SSE_REG_P (operands[0])) return MEM_P (operands[1]) - ? "pinsrw\t{$0, %1, %0|%0, %1, 0}" - : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}"; + ? "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}" + : "%vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0}"; else return MEM_P (operands[0]) - ? "pextrw\t{$0, %1, %0|%0, %1, 0}" - : "pextrw\t{$0, %1, %k0|%k0, %1, 0}"; + ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}" + : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}"; default: - gcc_unreachable (); + if (get_attr_mode (insn) == MODE_SI) + return "mov{l}\t{%k1, %k0|%k0, %k1}"; + else + return "mov{w}\t{%1, %0|%0, %1}"; } } [(set (attr "isa") - (cond [(eq_attr "alternative" "2,3,4,6,7") + (cond [(eq_attr "alternative" "4,5,6,8,9") (const_string "sse2") - (eq_attr "alternative" "5") + (eq_attr "alternative" "7") (const_string "sse4") ] (const_string "*"))) (set (attr "type") - (cond [(eq_attr "alternative" "0,1") - (const_string "imov") - (eq_attr "alternative" "2") + (cond [(eq_attr "alternative" "4") (const_string "sselog1") - (eq_attr "alternative" "4,5,6,7") + (eq_attr "alternative" "5") + (const_string "ssemov") + (eq_attr "alternative" "6,7,8,9") (if_then_else (match_test ("TARGET_AVX512FP16")) (const_string "ssemov") - (const_string "sselog")) - ] - (const_string "ssemov"))) - (set (attr "memory") - (cond [(eq_attr "alternative" "4,6") - (const_string "none") - (eq_attr "alternative" "5") - (const_string "store") - (eq_attr "alternative" "7") - (const_string "load") - ] - (const_string "*"))) + (const_string "sselog1")) + (match_test "optimize_function_for_size_p (cfun)") + (const_string "imov") + (and (eq_attr "alternative" "0") + (ior (not (match_test "TARGET_PARTIAL_REG_STALL")) + (not (match_test "TARGET_HIMODE_MATH")))) + (const_string "imov") + (and (eq_attr "alternative" "1,2") + (match_operand:HI 1 "aligned_operand")) + (const_string "imov") + (and (match_test "TARGET_MOVX") + (eq_attr "alternative" "0,2")) + (const_string "imovx") + ] + (const_string "imov"))) (set (attr "prefix") - (cond [(eq_attr "alternative" "0,1") - (const_string "orig") + (cond [(eq_attr "alternative" "4,5,6,7,8,9") + (const_string "maybe_vex") ] - (const_string "maybe_vex"))) + (const_string "orig"))) (set (attr "mode") - (cond [(eq_attr "alternative" "0,1") - (const_string "HI") - (eq_attr "alternative" "2") + (cond [(eq_attr "alternative" "4") (const_string "V4SF") - (eq_attr "alternative" "4,5,6,7") + (eq_attr "alternative" "6,7,8,9") (if_then_else (match_test "TARGET_AVX512FP16") (const_string "HI") (const_string "TI")) - (eq_attr "alternative" "3") - (if_then_else - (match_test "TARGET_AVX512FP16") - (const_string "HF") - (const_string "SF")) + (eq_attr "alternative" "5") + (cond [(match_test "TARGET_AVX512FP16") + (const_string "HF") + (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (match_test "TARGET_SSE_SPLIT_REGS")) + (const_string "V4SF") + ] + (const_string "SF")) + (eq_attr "type" "imovx") + (const_string "SI") + (and (eq_attr "alternative" "1,2") + (match_operand:HI 1 "aligned_operand")) + (const_string "SI") + (and (eq_attr "alternative" "0") + (ior (not (match_test "TARGET_PARTIAL_REG_STALL")) + (not (match_test "TARGET_HIMODE_MATH")))) + (const_string "SI") ] - (const_string "*")))]) + (const_string "HI")))]) (define_split [(set (match_operand 0 "any_fp_register_operand") |