diff options
author | Roger Sayle <roger@nextmovesoftware.com> | 2021-12-23 12:33:07 +0000 |
---|---|---|
committer | Roger Sayle <roger@nextmovesoftware.com> | 2021-12-23 12:35:22 +0000 |
commit | ef26c151c14a87177d46fd3d725e7f82e040e89f (patch) | |
tree | a937ab620ad5df0e89095457cd1011c14f39219e /gcc/config | |
parent | 61e53698a08dc1d9a54d785218af687a6751c1b3 (diff) | |
download | gcc-ef26c151c14a87177d46fd3d725e7f82e040e89f.zip gcc-ef26c151c14a87177d46fd3d725e7f82e040e89f.tar.gz gcc-ef26c151c14a87177d46fd3d725e7f82e040e89f.tar.bz2 |
x86: PR target/103773: Fix wrong-code with -Oz from pop to memory.
This is a fix to PR target/103773 where -Oz shouldn't use push/pop
on x86 to shrink writing small integer constants to memory.
Instead clang uses "andl $0, mem" for writing zero, and "orl $-1, mem"
when writing -1 to memory when using -Oz. This patch implements this
via peephole2 where we can confirm that its ok to clobber the flags.
2021-12-23 Roger Sayle <roger@nextmovesoftware.com>
Uroš Bizjak <ubizjak@gmail.com>
gcc/ChangeLog
PR target/103773
* config/i386/i386.md (*mov<mode>_and): New define_insn for
writing a zero to memory using AND.
(*mov<mode>_or): Extend to allow memory destination and HImode.
(*movdi_internal): Remove -Oz push/pop optimization from here.
(*movsi_internal): Likewise.
(peephole2): Perform -Oz push/pop optimization here, only for
register destinations, values other than zero, and in functions
that don't used the red zone.
(peephole2): With -Oz, convert writes of 0 or -1 to memory into
their clobber forms, i.e. *mov<mode>_and and *mov<mode>_or resp.
gcc/testsuite/ChangeLog
PR target/103773
* gcc.target/i386/pr103773-2.c: New test case.
* gcc.target/i386/pr103773.c: New test case.
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/i386/i386.md | 62 |
1 files changed, 44 insertions, 18 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 58b1064..284b950 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2028,9 +2028,19 @@ (set_attr "mode" "SI") (set_attr "length_immediate" "0")]) +(define_insn "*mov<mode>_and" + [(set (match_operand:SWI248 0 "memory_operand" "=m") + (match_operand:SWI248 1 "const0_operand")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + "and{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "<MODE>") + (set_attr "length_immediate" "1")]) + (define_insn "*mov<mode>_or" - [(set (match_operand:SWI48 0 "register_operand" "=r") - (match_operand:SWI48 1 "constm1_operand")) + [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm") + (match_operand:SWI248 1 "constm1_operand")) (clobber (reg:CC FLAGS_REG))] "reload_completed" "or{<imodesuffix>}\t{%1, %0|%0, %1}" @@ -2218,14 +2228,7 @@ case TYPE_IMOV: gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1])); if (get_attr_mode (insn) == MODE_SI) - { - if (optimize_size > 1 - && TARGET_64BIT - && CONST_INT_P (operands[1]) - && IN_RANGE (INTVAL (operands[1]), -128, 127)) - return "push{q}\t%1\n\tpop{q}\t%0"; - return "mov{l}\t{%k1, %k0|%k0, %k1}"; - } + return "mov{l}\t{%k1, %k0|%k0, %k1}"; else if (which_alternative == 4) return "movabs{q}\t{%1, %0|%0, %1}"; else if (ix86_use_lea_for_mov (insn, operands)) @@ -2443,14 +2446,6 @@ gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1])); if (ix86_use_lea_for_mov (insn, operands)) return "lea{l}\t{%E1, %0|%0, %E1}"; - else if (optimize_size > 1 - && CONST_INT_P (operands[1]) - && IN_RANGE (INTVAL (operands[1]), -128, 127)) - { - if (TARGET_64BIT) - return "push{q}\t%1\n\tpop{q}\t%q0"; - return "push{l}\t%1\n\tpop{l}\t%0"; - } else return "mov{l}\t{%1, %0|%0, %1}"; @@ -2514,6 +2509,37 @@ ] (symbol_ref "true")))]) +;; With -Oz, transform mov $imm,reg to the shorter push $imm; pop reg. +(define_peephole2 + [(set (match_operand:SWI248 0 "general_reg_operand") + (match_operand:SWI248 1 "const_int_operand"))] + "optimize_insn_for_size_p () && optimize_size > 1 + && operands[1] != const0_rtx + && IN_RANGE (INTVAL (operands[1]), -128, 127) + && !ix86_red_zone_used" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 3))] +{ + if (GET_MODE (operands[0]) != word_mode) + operands[0] = gen_rtx_REG (word_mode, REGNO (operands[0])); + + operands[2] = gen_rtx_MEM (word_mode, + gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx)); + operands[3] = gen_rtx_MEM (word_mode, + gen_rtx_POST_INC (Pmode, stack_pointer_rtx)); +}) + +;; With -Oz, transform mov $0,mem to the shorter and $0,mem. +;; Likewise, transform mov $-1,mem to the shorter or $-1,mem. +(define_peephole2 + [(set (match_operand:SWI248 0 "memory_operand") + (match_operand:SWI248 1 "const_int_operand"))] + "(operands[1] == const0_rtx || operands[1] == constm1_rtx) + && optimize_insn_for_size_p () && optimize_size > 1 + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])]) + (define_insn "*movhi_internal" [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m ,*k,*k ,r ,m ,*k ,?r,?*v,*v,*v,*v,m") |