diff options
author | Uros Bizjak <ubizjak@gmail.com> | 2023-11-08 21:46:26 +0100 |
---|---|---|
committer | Uros Bizjak <ubizjak@gmail.com> | 2023-11-08 21:51:31 +0100 |
commit | dced5ae64703507a7159972316a1dde48e5f7470 (patch) | |
tree | b785ec3b0ca319e0497662dec74e51c2b2a14b0a /gcc | |
parent | 39d81b667373b0033f44702a4b532a4618dde9ff (diff) | |
download | gcc-dced5ae64703507a7159972316a1dde48e5f7470.zip gcc-dced5ae64703507a7159972316a1dde48e5f7470.tar.gz gcc-dced5ae64703507a7159972316a1dde48e5f7470.tar.bz2 |
i386: Apply LRA reload workaround to insns with high registers [PR82524]
LRA is not able to reload zero_extracted in-out operand with matched input
operand in the same way as strict_low_part in-out operand. The patch
applies the strict_low_part workaround, where we allow LRA to generate
an instruction with non-matched input operand, which is split post reload
to the instruction that inserts non-matched input operand to an in-out
operand and the instruction that uses matched operand, also to
zero_extracted in-out operand case.
The generated code from the pr82524.c testcase improves from:
movl %esi, %ecx
movl %edi, %eax
movsbl %ch, %esi
addl %esi, %edx
movb %dl, %ah
to:
movl %edi, %eax
movl %esi, %ecx
movb %ch, %ah
addb %dl, %ah
The compiler is now also able to handle non-commutative operations:
movl %edi, %eax
movl %esi, %ecx
movb %ch, %ah
subb %dl, %ah
and unary operations:
movl %edi, %eax
movl %esi, %edx
movb %dh, %ah
negb %ah
The patch also robustifies split condition of the splitters to ensure that
only alternatives with unmatched operands are split.
PR target/82524
gcc/ChangeLog:
* config/i386/i386.md (*add<mode>_1_slp):
Split insn only for unmatched operand 0.
(*sub<mode>_1_slp): Ditto.
(*<any_logic:code><mode>_1_slp): Merge pattern from "*and<mode>_1_slp"
and "*<any_logic:code><mode>_1_slp" using any_logic code iterator.
Split insn only for unmatched operand 0.
(*neg<mode>1_slp): Split insn only for unmatched operand 0.
(*one_cmpl<mode>_1_slp): Ditto.
(*ashl<mode>3_1_slp): Ditto.
(*<any_shiftrt:insn><mode>_1_slp): Ditto.
(*<any_rotate:insn><mode>_1_slp): Ditto.
(*addqi_ext<mode>_1): Redefine as define_insn_and_split. Add
alternative 1 and split insn after reload for unmatched operand 0.
(*<plusminus:insn>qi_ext<mode>_2): Merge pattern from
"*addqi_ext<mode>_2" and "*subqi_ext<mode>_2" using plusminus code
iterator. Redefine as define_insn_and_split. Add alternative 1
and split insn after reload for unmatched operand 0.
(*subqi_ext<mode>_1): Redefine as define_insn_and_split. Add
alternative 1 and split insn after reload for unmatched operand 0.
(*<any_logic:code>qi_ext<mode>_0): Merge pattern from
"*andqi_ext<mode>_0" and and "*<any_logic:code>qi_ext<mode>_0" using
any_logic code iterator.
(*<any_logic:code>qi_ext<mode>_1): Merge pattern from
"*andqi_ext<mode>_1" and "*<any_logic:code>qi_ext<mode>_1" using
any_logic code iterator. Redefine as define_insn_and_split. Add
alternative 1 and split insn after reload for unmatched operand 0.
(*<any_logic:code>qi_ext<mode>_1_cc): Merge pattern from
"*andqi_ext<mode>_1_cc" and "*xorqi_ext<mode>_1_cc" using any_logic
code iterator. Redefine as define_insn_and_split. Add alternative 1
and split insn after reload for unmatched operand 0.
(*<any_logic:code>qi_ext<mode>_2): Merge pattern from
"*andqi_ext<mode>_2" and "*<any_or:code>qi_ext<mode>_2" using
any_logic code iterator. Redefine as define_insn_and_split. Add
alternative 1 and split insn after reload for unmatched operand 0.
(*<any_logic:code>qi_ext<mode>_3): Redefine as define_insn_and_split.
Add alternative 1 and split insn after reload for unmatched operand 0.
(*negqi_ext<mode>_1): Rename from "*negqi_ext<mode>_2". Add
alternative 1 and split insn after reload for unmatched operand 0.
(*one_cmplqi_ext<mode>_1): Ditto.
(*ashlqi_ext<mode>_1): Ditto.
(*<any_shiftrt:insn>qi_ext<mode>_1): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/i386/pr78904-1.c (test_sub): New test.
* gcc.target/i386/pr78904-1a.c (test_sub): Ditto.
* gcc.target/i386/pr78904-1b.c (test_sub): Ditto.
* gcc.target/i386/pr78904-2.c (test_sub): Ditto.
* gcc.target/i386/pr78904-2a.c (test_sub): Ditto.
* gcc.target/i386/pr78904-2b.c (test_sub): Ditto.
* gcc.target/i386/pr78952-4.c (test_sub): Ditto.
* gcc.target/i386/pr82524.c: New test.
* gcc.target/i386/pr82524-1.c: New test.
* gcc.target/i386/pr82524-2.c: New test.
* gcc.target/i386/pr82524-3.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/i386.md | 602 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr78904-1.c | 9 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr78904-1a.c | 9 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr78904-1b.c | 9 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr78904-2.c | 9 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr78904-2a.c | 9 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr78904-2b.c | 9 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr78952-4.c | 9 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr82524-1.c | 63 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr82524-2.c | 63 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr82524-3.c | 42 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr82524.c | 83 |
12 files changed, 673 insertions, 243 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 9902299..ce7102af 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -6596,7 +6596,9 @@ return "add{<imodesuffix>}\t{%2, %0|%0, %2}"; } } - "&& reload_completed" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]) + || rtx_equal_p (operands[0], operands[2]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) @@ -7001,38 +7003,58 @@ (match_operand:QI 2 "const_int_operand")) 0)) (clobber (reg:CC FLAGS_REG))])]) -(define_insn "*addqi_ext<mode>_1" +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*addqi_ext<mode>_1" [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") + (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 (plus:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0") + [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0) - (match_operand:QI 2 "general_operand" "QnBn")) 0)) + (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0)) (clobber (reg:CC FLAGS_REG))] - "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ - rtx_equal_p (operands[0], operands[1])" + "" { + if (which_alternative) + return "#"; + switch (get_attr_type (insn)) { case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{b}\t%h0"; else - { + { gcc_assert (operands[2] == constm1_rtx); - return "dec{b}\t%h0"; - } + return "dec{b}\t%h0"; + } default: return "add{b}\t{%2, %h0|%h0, %2}"; } } + "reload_completed + && !rtx_equal_p (operands[0], operands[1])" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8))) + (parallel + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (plus:QI + (subreg:QI + (match_op_dup 3 + [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (match_dup 2)) 0)) + (clobber (reg:CC FLAGS_REG))])] + "" [(set_attr "addr" "gpr8") (set (attr "type") (if_then_else (match_operand:QI 2 "incdec_operand") @@ -7040,28 +7062,49 @@ (const_string "alu"))) (set_attr "mode" "QI")]) -(define_insn "*addqi_ext<mode>_2" +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*<insn>qi_ext<mode>_2" [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") + (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 - (plus:QI + (plusminus:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "%0") + [(match_operand 1 "int248_register_operand" "<comm>0,!Q") (const_int 8) (const_int 8)]) 0) (subreg:QI (match_operator:SWI248 4 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q") + [(match_operand 2 "int248_register_operand" "Q,Q") (const_int 8) (const_int 8)]) 0)) 0)) - (clobber (reg:CC FLAGS_REG))] - "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ - rtx_equal_p (operands[0], operands[1]) - || rtx_equal_p (operands[0], operands[2])" - "add{b}\t{%h2, %h0|%h0, %h2}" + (clobber (reg:CC FLAGS_REG))] + "" + "@ + <insn>{b}\t{%h2, %h0|%h0, %h2} + #" + "reload_completed + && !(rtx_equal_p (operands[0], operands[1]) + || (<CODE> == PLUS && rtx_equal_p (operands[0], operands[2])))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8))) + (parallel + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (plusminus:QI + (subreg:QI + (match_op_dup 3 + [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (subreg:QI + (match_op_dup 4 + [(match_dup 2) (const_int 8) (const_int 8)]) 0)) 0)) + (clobber (reg:CC FLAGS_REG))])] + "" [(set_attr "type" "alu") (set_attr "mode" "QI")]) @@ -7570,7 +7613,8 @@ "@ sub{<imodesuffix>}\t{%2, %0|%0, %2} #" - "&& reload_completed" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) @@ -7627,28 +7671,44 @@ (set_attr "type" "alu") (set_attr "mode" "QI")]) -(define_insn "*subqi_ext<mode>_2" +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*subqi_ext<mode>_1" [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") + (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 (minus:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0") + [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0) - (subreg:QI - (match_operator:SWI248 4 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0)) 0)) - (clobber (reg:CC FLAGS_REG))] - "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ - rtx_equal_p (operands[0], operands[1])" - "sub{b}\t{%h2, %h0|%h0, %h2}" - [(set_attr "type" "alu") + (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0)) + (clobber (reg:CC FLAGS_REG))] + "" + "@ + sub{b}\t{%2, %h0|%h0, %2} + #" + "reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8))) + (parallel + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (minus:QI + (subreg:QI + (match_op_dup 3 + [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (match_dup 2)) 0)) + (clobber (reg:CC FLAGS_REG))])] + "" + [(set_attr "addr" "gpr8") + (set_attr "type" "alu") (set_attr "mode" "QI")]) ;; Subtract with jump on overflow. @@ -11338,20 +11398,22 @@ (symbol_ref "true")))]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. -(define_insn_and_split "*and<mode>_1_slp" +(define_insn_and_split "*<code><mode>_1_slp" [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>")) - (and:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!<r>") - (match_operand:SWI12 2 "general_operand" "<r>mn,<r>mn"))) + (any_logic:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!<r>") + (match_operand:SWI12 2 "general_operand" "<r>mn,<r>mn"))) (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "@ - and{<imodesuffix>}\t{%2, %0|%0, %2} + <logic>{<imodesuffix>}\t{%2, %0|%0, %2} #" - "&& reload_completed" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]) + || rtx_equal_p (operands[0], operands[2]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) - (and:SWI12 (match_dup 0) (match_dup 2))) + (any_logic:SWI12 (match_dup 0) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "alu") @@ -11528,9 +11590,9 @@ [(set_attr "type" "alu") (set_attr "mode" "<MODE>")]) -(define_insn "*andqi_ext<mode>_0" +(define_insn "*<code>qi_ext<mode>_0" [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn") - (and:QI + (any_logic:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" [(match_operand 2 "int248_register_operand" "Q") @@ -11539,7 +11601,7 @@ (match_operand:QI 1 "nonimmediate_operand" "0"))) (clobber (reg:CC FLAGS_REG))] "" - "and{b}\t{%h2, %0|%0, %h2}" + "<logic>{b}\t{%h2, %0|%0, %h2}" [(set_attr "addr" "gpr8") (set_attr "type" "alu") (set_attr "mode" "QI")]) @@ -11558,86 +11620,180 @@ (match_operand:QI 2 "const_int_operand")) 0)) (clobber (reg:CC FLAGS_REG))])]) -(define_insn "*andqi_ext<mode>_1" +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*<code>qi_ext<mode>_1" [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") + (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 - (and:QI + (any_logic:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0") + [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0) - (match_operand:QI 2 "general_operand" "QnBn")) 0)) + (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0)) (clobber (reg:CC FLAGS_REG))] - "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ - rtx_equal_p (operands[0], operands[1])" - "and{b}\t{%2, %h0|%h0, %2}" + "" + "@ + <logic>{b}\t{%2, %h0|%h0, %2} + #" + "reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8))) + (parallel + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (any_logic:QI + (subreg:QI + (match_op_dup 3 + [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (match_dup 2)) 0)) + (clobber (reg:CC FLAGS_REG))])] + "" [(set_attr "addr" "gpr8") (set_attr "type" "alu") (set_attr "mode" "QI")]) -;; Generated by peephole translating test to and. This shows up -;; often in fp comparisons. -(define_insn "*andqi_ext<mode>_1_cc" - [(set (reg FLAGS_REG) - (compare - (and:QI - (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0") - (const_int 8) - (const_int 8)]) 0) - (match_operand:QI 2 "general_operand" "QnBn")) - (const_int 0))) +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*<code>qi_ext<mode>_1_cc" + [(set (match_operand 4 "flags_reg_operand") + (match_operator 5 "compare_operator" + [(any_logic:QI + (subreg:QI + (match_operator:SWI248 3 "extract_operator" + [(match_operand 1 "int248_register_operand" "0,!Q") + (const_int 8) + (const_int 8)]) 0) + (match_operand:QI 2 "general_operand" "QnBn,QnBn")) + (const_int 0)])) (set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") + (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 - (and:QI + (any_logic:QI (subreg:QI (match_op_dup 3 - [(match_dup 1) - (const_int 8) - (const_int 8)]) 0) + [(match_dup 0) (const_int 8) (const_int 8)]) 0) (match_dup 2)) 0))] - "ix86_match_ccmode (insn, CCNOmode) - /* FIXME: without this LRA can't reload this pattern, see PR82524. */ - && rtx_equal_p (operands[0], operands[1])" - "and{b}\t{%2, %h0|%h0, %2}" + "ix86_match_ccmode (insn, CCNOmode)" + "@ + <logic>{b}\t{%2, %h0|%h0, %2} + #" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8))) + (parallel + [(set (match_dup 4) + (match_op_dup 5 + [(any_logic:QI + (subreg:QI + (match_op_dup 3 + [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (match_dup 2)) + (const_int 0)])) + (set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (any_logic:QI + (subreg:QI + (match_op_dup 3 + [(match_dup 1) (const_int 8) (const_int 8)]) 0) + (match_dup 2)) 0))])] + "" [(set_attr "addr" "gpr8") (set_attr "type" "alu") (set_attr "mode" "QI")]) -(define_insn "*andqi_ext<mode>_2" +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*<code>qi_ext<mode>_2" [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") + (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 - (and:QI + (any_logic:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "%0") + [(match_operand 1 "int248_register_operand" "%0,!Q") (const_int 8) (const_int 8)]) 0) (subreg:QI (match_operator:SWI248 4 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q") + [(match_operand 2 "int248_register_operand" "Q,Q") (const_int 8) (const_int 8)]) 0)) 0)) (clobber (reg:CC FLAGS_REG))] - "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ - rtx_equal_p (operands[0], operands[1]) - || rtx_equal_p (operands[0], operands[2])" - "and{b}\t{%h2, %h0|%h0, %h2}" + "" + "@ + <logic>{b}\t{%h2, %h0|%h0, %h2} + #" + "reload_completed + && !(rtx_equal_p (operands[0], operands[1]) + || rtx_equal_p (operands[0], operands[2]))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8))) + (parallel + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (any_logic:QI + (subreg:QI + (match_op_dup 3 + [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (subreg:QI + (match_op_dup 4 + [(match_dup 2) (const_int 8) (const_int 8)]) 0)) 0)) + (clobber (reg:CC FLAGS_REG))])] + "" [(set_attr "type" "alu") (set_attr "mode" "QI")]) -;; *andqi_ext<mode>_3 is defined via *<code>qi_ext<mode>_3 below. +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*<code>qi_ext<mode>_3" + [(set (zero_extract:SWI248 + (match_operand 0 "int248_register_operand" "+Q,&Q") + (const_int 8) + (const_int 8)) + (match_operator:SWI248 3 "extract_operator" + [(any_logic + (match_operand 1 "int248_register_operand" "%0,!Q") + (match_operand 2 "int248_register_operand" "Q,Q")) + (const_int 8) + (const_int 8)])) + (clobber (reg:CC FLAGS_REG))] + "GET_MODE (operands[1]) == GET_MODE (operands[2])" + "@ + <logic>{b}\t{%h2, %h0|%h0, %h2} + #" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]) + || rtx_equal_p (operands[0], operands[2]))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8))) + (parallel + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (match_op_dup 3 + [(any_logic (match_dup 4) (match_dup 2)) + (const_int 8) (const_int 8)])) + (clobber (reg:CC FLAGS_REG))])] + "operands[4] = gen_lowpart (GET_MODE (operands[1]), operands[0]);" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) ;; Convert wide AND instructions with immediate operand to shorter QImode ;; equivalents when possible. @@ -12166,26 +12322,6 @@ (symbol_ref "!TARGET_PARTIAL_REG_STALL")] (symbol_ref "true")))]) -;; Alternative 1 is needed to work around LRA limitation, see PR82524. -(define_insn_and_split "*<code><mode>_1_slp" - [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>")) - (any_or:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!<r>") - (match_operand:SWI12 2 "general_operand" "<r>mn,<r>mn"))) - (clobber (reg:CC FLAGS_REG))] - "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" - "@ - <logic>{<imodesuffix>}\t{%2, %0|%0, %2} - #" - "&& reload_completed" - [(set (strict_low_part (match_dup 0)) (match_dup 1)) - (parallel - [(set (strict_low_part (match_dup 0)) - (any_or:SWI12 (match_dup 0) (match_dup 2))) - (clobber (reg:CC FLAGS_REG))])] - "" - [(set_attr "type" "alu") - (set_attr "mode" "<MODE>")]) - ;; convert (sign_extend:WIDE (any_logic:NARROW (memory, immediate))) ;; to (any_logic:WIDE (sign_extend (memory)), (sign_extend (immediate))). ;; This eliminates sign extension after logic operation. @@ -12276,90 +12412,6 @@ [(set_attr "type" "alu") (set_attr "mode" "<MODE>")]) -(define_insn "*<code>qi_ext<mode>_0" - [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn") - (any_or:QI - (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0) - (match_operand:QI 1 "nonimmediate_operand" "0"))) - (clobber (reg:CC FLAGS_REG))] - "" - "<logic>{b}\t{%h2, %0|%0, %h2}" - [(set_attr "addr" "gpr8") - (set_attr "type" "alu") - (set_attr "mode" "QI")]) - -(define_insn "*<code>qi_ext<mode>_1" - [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") - (const_int 8) - (const_int 8)) - (subreg:SWI248 - (any_or:QI - (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0") - (const_int 8) - (const_int 8)]) 0) - (match_operand:QI 2 "general_operand" "QnBn")) 0)) - (clobber (reg:CC FLAGS_REG))] - "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) - /* FIXME: without this LRA can't reload this pattern, see PR82524. */ - && rtx_equal_p (operands[0], operands[1])" - "<logic>{b}\t{%2, %h0|%h0, %2}" - [(set_attr "addr" "gpr8") - (set_attr "type" "alu") - (set_attr "mode" "QI")]) - -(define_insn "*<code>qi_ext<mode>_2" - [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") - (const_int 8) - (const_int 8)) - (subreg:SWI248 - (any_or:QI - (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "%0") - (const_int 8) - (const_int 8)]) 0) - (subreg:QI - (match_operator:SWI248 4 "extract_operator" - [(match_operand 2 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)]) 0)) 0)) - (clobber (reg:CC FLAGS_REG))] - "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) - /* FIXME: without this LRA can't reload this pattern, see PR82524. */ - && (rtx_equal_p (operands[0], operands[1]) - || rtx_equal_p (operands[0], operands[2]))" - "<logic>{b}\t{%h2, %h0|%h0, %h2}" - [(set_attr "type" "alu") - (set_attr "mode" "QI")]) - -(define_insn "*<code>qi_ext<mode>_3" - [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") - (const_int 8) - (const_int 8)) - (zero_extract:SWI248 - (any_logic:SWI248 - (match_operand 1 "int248_register_operand" "%0") - (match_operand 2 "int248_register_operand" "Q")) - (const_int 8) - (const_int 8))) - (clobber (reg:CC FLAGS_REG))] - "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) - /* FIXME: without this LRA can't reload this pattern, see PR82524. */ - && (rtx_equal_p (operands[0], operands[1]) - || rtx_equal_p (operands[0], operands[2]))" - "<logic>{b}\t{%h2, %h0|%h0, %h2}" - [(set_attr "type" "alu") - (set_attr "mode" "QI")]) - ;; Convert wide OR instructions with immediate operand to shorter QImode ;; equivalents when possible. ;; Don't do the splitting with memory operands, since it introduces risk @@ -12443,37 +12495,6 @@ (const_int 8)) 0) (match_dup 2)) 0))])]) -(define_insn "*xorqi_ext<mode>_1_cc" - [(set (reg FLAGS_REG) - (compare - (xor:QI - (subreg:QI - (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0") - (const_int 8) - (const_int 8)]) 0) - (match_operand:QI 2 "general_operand" "QnBn")) - (const_int 0))) - (set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") - (const_int 8) - (const_int 8)) - (subreg:SWI248 - (xor:QI - (subreg:QI - (match_op_dup 3 - [(match_dup 1) - (const_int 8) - (const_int 8)]) 0) - (match_dup 2)) 0))] - "ix86_match_ccmode (insn, CCNOmode) - /* FIXME: without this LRA can't reload this pattern, see PR82524. */ - && rtx_equal_p (operands[0], operands[1])" - "xor{b}\t{%2, %h0|%h0, %2}" - [(set_attr "addr" "gpr8") - (set_attr "type" "alu") - (set_attr "mode" "QI")]) - ;; Peephole2 rega = 0; rega op= regb into rega = regb. (define_peephole2 [(parallel [(set (match_operand:SWI 0 "general_reg_operand") @@ -12813,7 +12834,8 @@ "@ neg{<imodesuffix>}\t%0 #" - "&& reload_completed" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) @@ -12881,22 +12903,40 @@ (set (match_operand:SWI48 0 "register_operand") (neg:SWI48 (match_dup 1)))])]) -(define_insn "*negqi_ext<mode>_2" +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*negqi_ext<mode>_1" [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") + (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 (neg:QI (subreg:QI (match_operator:SWI248 2 "extract_operator" - [(match_operand 1 "int248_register_operand" "0") + [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0)) 0)) (clobber (reg:CC FLAGS_REG))] - "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ - rtx_equal_p (operands[0], operands[1])" - "neg{b}\t%h0" + "" + "@ + neg{b}\t%h0 + #" + "reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8))) + (parallel + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (neg:QI + (subreg:QI + (match_op_dup 2 + [(match_dup 0) (const_int 8) (const_int 8)]) 0)) 0)) + (clobber (reg:CC FLAGS_REG))])] + "" [(set_attr "type" "negnot") (set_attr "mode" "QI")]) @@ -13420,7 +13460,8 @@ "@ not{<imodesuffix>}\t%0 #" - "&& reload_completed" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (set (strict_low_part (match_dup 0)) (not:SWI12 (match_dup 0)))] @@ -13479,6 +13520,40 @@ (const_int 0)])) (set (match_dup 1) (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])]) + +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*one_cmplqi_ext<mode>_1" + [(set (zero_extract:SWI248 + (match_operand 0 "int248_register_operand" "+Q,&Q") + (const_int 8) + (const_int 8)) + (subreg:SWI248 + (not:QI + (subreg:QI + (match_operator:SWI248 2 "extract_operator" + [(match_operand 1 "int248_register_operand" "0,!Q") + (const_int 8) + (const_int 8)]) 0)) 0))] + "" + "@ + not{b}\t%h0 + #" + "reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (zero_extract:SWI248 + (match_dup 1) (const_int 8) (const_int 8))) + (set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (not:QI + (subreg:QI + (match_op_dup 2 + [(match_dup 0) (const_int 8) (const_int 8)]) 0)) 0))] + "" + [(set_attr "type" "negnot") + (set_attr "mode" "QI")]) ;; Shift instructions @@ -14254,7 +14329,8 @@ return "sal{<imodesuffix>}\t{%2, %0|%0, %2}"; } } - "&& reload_completed" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) @@ -14458,23 +14534,26 @@ (const_string "*"))) (set_attr "mode" "<MODE>")]) -(define_insn "*ashlqi_ext<mode>_2" +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*ashlqi_ext<mode>_1" [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") + (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 (ashift:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0") + [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0) - (match_operand:QI 2 "nonmemory_operand" "cI")) 0)) - (clobber (reg:CC FLAGS_REG))] - "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ - rtx_equal_p (operands[0], operands[1])" + (match_operand:QI 2 "nonmemory_operand" "cI,cI")) 0)) + (clobber (reg:CC FLAGS_REG))] + "" { + if (which_alternative) + return "#"; + switch (get_attr_type (insn)) { case TYPE_ALU: @@ -14489,6 +14568,22 @@ return "sal{b}\t{%2, %h0|%h0, %2}"; } } + "reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (match_dup 1)) + (parallel + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (ashift:QI + (subreg:QI + (match_op_dup 3 + [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (match_dup 2)) 0)) + (clobber (reg:CC FLAGS_REG))])] + "" [(set (attr "type") (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 2 "const1_operand")) @@ -15247,7 +15342,8 @@ else return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}"; } - "&& reload_completed" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) @@ -15361,29 +15457,48 @@ (const_string "*"))) (set_attr "mode" "<MODE>")]) -(define_insn "*<insn>qi_ext<mode>_2" +;; Alternative 1 is needed to work around LRA limitation, see PR82524. +(define_insn_and_split "*<insn>qi_ext<mode>_1" [(set (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "+Q") + (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 (any_shiftrt:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" - [(match_operand 1 "int248_register_operand" "0") + [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0) - (match_operand:QI 2 "nonmemory_operand" "cI")) 0)) - (clobber (reg:CC FLAGS_REG))] - "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ - rtx_equal_p (operands[0], operands[1])" + (match_operand:QI 2 "nonmemory_operand" "cI,cI")) 0)) + (clobber (reg:CC FLAGS_REG))] + "" { + if (which_alternative) + return "#"; + if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) return "<shift>{b}\t%h0"; else return "<shift>{b}\t{%2, %h0|%h0, %2}"; } + "reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (match_dup 1)) + (parallel + [(set (zero_extract:SWI248 + (match_dup 0) (const_int 8) (const_int 8)) + (subreg:SWI248 + (any_shiftrt:QI + (subreg:QI + (match_op_dup 3 + [(match_dup 0) (const_int 8) (const_int 8)]) 0) + (match_dup 2)) 0)) + (clobber (reg:CC FLAGS_REG))])] + "" [(set_attr "type" "ishift") (set (attr "length_immediate") (if_then_else @@ -15875,7 +15990,8 @@ else return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}"; } - "&& reload_completed" + "&& reload_completed + && !(rtx_equal_p (operands[0], operands[1]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) diff --git a/gcc/testsuite/gcc.target/i386/pr78904-1.c b/gcc/testsuite/gcc.target/i386/pr78904-1.c index d27d7fd..ed5403f 100644 --- a/gcc/testsuite/gcc.target/i386/pr78904-1.c +++ b/gcc/testsuite/gcc.target/i386/pr78904-1.c @@ -46,3 +46,12 @@ struct S1 test_add (struct S1 a, struct S1 b) } /* { dg-final { scan-assembler "\[ \t\]addb" } } */ + +struct S1 test_sub (struct S1 a, struct S1 b) +{ + a.val -= b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr78904-1a.c b/gcc/testsuite/gcc.target/i386/pr78904-1a.c index 7746477..aa9273e 100644 --- a/gcc/testsuite/gcc.target/i386/pr78904-1a.c +++ b/gcc/testsuite/gcc.target/i386/pr78904-1a.c @@ -45,3 +45,12 @@ struct S1 test_add (struct S1 a, struct S1 b) } /* { dg-final { scan-assembler "\[ \t\]addb" } } */ + +struct S1 test_sub (struct S1 a, struct S1 b) +{ + a.val -= b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr78904-1b.c b/gcc/testsuite/gcc.target/i386/pr78904-1b.c index 20b6772..0687c95 100644 --- a/gcc/testsuite/gcc.target/i386/pr78904-1b.c +++ b/gcc/testsuite/gcc.target/i386/pr78904-1b.c @@ -47,3 +47,12 @@ struct S1 test_add (struct S1 a, struct S1 b) } /* { dg-final { scan-assembler "\[ \t\]addb" } } */ + +struct S1 test_sub (struct S1 a, struct S1 b) +{ + a.val -= b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr78904-2.c b/gcc/testsuite/gcc.target/i386/pr78904-2.c index 0cc4aaa..3e9389e 100644 --- a/gcc/testsuite/gcc.target/i386/pr78904-2.c +++ b/gcc/testsuite/gcc.target/i386/pr78904-2.c @@ -47,3 +47,12 @@ struct S1 test_add (struct S1 a) } /* { dg-final { scan-assembler "\[ \t\]addb\[ \t\]+t\[^\n\r]*, %.h" } } */ + +struct S1 test_sub (struct S1 a) +{ + a.val -= t.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb\[ \t\]+t\[^\n\r]*, %.h" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr78904-2a.c b/gcc/testsuite/gcc.target/i386/pr78904-2a.c index 41eaa25..f0c5979 100644 --- a/gcc/testsuite/gcc.target/i386/pr78904-2a.c +++ b/gcc/testsuite/gcc.target/i386/pr78904-2a.c @@ -46,3 +46,12 @@ struct S1 test_add (struct S1 a) } /* { dg-final { scan-assembler "\[ \t\]addb\[ \t\]+t\[^\n\r]*, %.h" } } */ + +struct S1 test_sub (struct S1 a) +{ + a.val -= t.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb\[ \t\]+t\[^\n\r]*, %.h" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr78904-2b.c b/gcc/testsuite/gcc.target/i386/pr78904-2b.c index 23e975a..e6154e6 100644 --- a/gcc/testsuite/gcc.target/i386/pr78904-2b.c +++ b/gcc/testsuite/gcc.target/i386/pr78904-2b.c @@ -48,3 +48,12 @@ struct S1 test_add (struct S1 a) } /* { dg-final { scan-assembler "\[ \t\]addb\[ \t\]+t\[^\n\r]*, %.h" } } */ + +struct S1 test_sub (struct S1 a) +{ + a.val -= t.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb\[ \t\]+t\[^\n\r]*, %.h" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr78952-4.c b/gcc/testsuite/gcc.target/i386/pr78952-4.c index c7bd63c..d997967 100644 --- a/gcc/testsuite/gcc.target/i386/pr78952-4.c +++ b/gcc/testsuite/gcc.target/i386/pr78952-4.c @@ -46,3 +46,12 @@ struct S1 test_add (struct S1 a, struct S1 b) } /* { dg-final { scan-assembler "\[ \t\]addb" } } */ + +struct S1 test_sub (struct S1 a, struct S1 b) +{ + a.val -= b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr82524-1.c b/gcc/testsuite/gcc.target/i386/pr82524-1.c new file mode 100644 index 0000000..6539630 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82524-1.c @@ -0,0 +1,63 @@ +/* PR target/82524 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -masm=att" } */ +/* { dg-additional-options "-mregparm=3" { target ia32 } } */ +/* { dg-final { scan-assembler-not "mov\[sz\]bl" } } */ + +struct S +{ + char pad1; + char val; + char pad2; + char pad3; +}; + +struct S +test_plus (struct S a, struct S b, struct S c) +{ + a.val = b.val + c.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]addb" } } */ + +struct S +test_minus (struct S a, struct S b, struct S c) +{ + a.val = b.val - c.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb" } } */ + +struct S +test_and (struct S a, struct S b, struct S c) +{ + a.val = b.val & c.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]andb" } } */ + +struct S +test_or (struct S a, struct S b, struct S c) +{ + a.val = b.val | c.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]orb" } } */ + +struct S +test_xor (struct S a, struct S b, struct S c) +{ + a.val = b.val ^ c.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]xorb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr82524-2.c b/gcc/testsuite/gcc.target/i386/pr82524-2.c new file mode 100644 index 0000000..766dd1a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82524-2.c @@ -0,0 +1,63 @@ +/* PR target/82524 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -masm=att" } */ +/* { dg-additional-options "-mregparm=3" { target ia32 } } */ +/* { dg-final { scan-assembler-not "mov\[sz\]bl" } } */ + +struct S +{ + char pad1; + char val; + char pad2; + char pad3; +}; + +struct S +test_plus (struct S a, struct S b, char *c) +{ + a.val = b.val + *c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]addb" } } */ + +struct S +test_minus (struct S a, struct S b, char *c) +{ + a.val = b.val - *c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb" } } */ + +struct S +test_and (struct S a, struct S b, char *c) +{ + a.val = b.val & *c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]andb" } } */ + +struct S +test_or (struct S a, struct S b, char *c) +{ + a.val = b.val | *c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]orb" } } */ + +struct S +test_xor (struct S a, struct S b, char *c) +{ + a.val = b.val ^ *c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]xorb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr82524-3.c b/gcc/testsuite/gcc.target/i386/pr82524-3.c new file mode 100644 index 0000000..7a66712 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82524-3.c @@ -0,0 +1,42 @@ +/* PR target/82524 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -masm=att" } */ +/* { dg-final { scan-assembler-not "movzbl" } } */ + +struct S +{ + unsigned char pad1; + unsigned char val; + unsigned short pad2; + unsigned int pad3; +}; + +struct S +test_and (struct S a, struct S b, struct S c) +{ + a.val = b.val & c.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]andb" } } */ + +struct S +test_or (struct S a, struct S b, struct S c) +{ + a.val = b.val | c.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]orb" } } */ + +struct S +test_xor (struct S a, struct S b, struct S c) +{ + a.val = b.val ^ c.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]xorb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr82524.c b/gcc/testsuite/gcc.target/i386/pr82524.c new file mode 100644 index 0000000..058f0a2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82524.c @@ -0,0 +1,83 @@ +/* PR target/82524 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -masm=att" } */ +/* { dg-additional-options "-mregparm=3" { target ia32 } } */ +/* { dg-final { scan-assembler-not "mov\[sz\]bl" } } */ + +struct S +{ + char pad1; + char val; + char pad2; + char pad3; +}; + +struct S +test_plus (struct S a, struct S b, char c) +{ + a.val = b.val + c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]addb" } } */ + +struct S +test_minus (struct S a, struct S b, char c) +{ + a.val = b.val - c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]subb" } } */ + +struct S +test_neg (struct S a, struct S b) +{ + a.val = -b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]negb" } } */ + +struct S +test_and (struct S a, struct S b, char c) +{ + a.val = b.val & c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]andb" } } */ + +struct S +test_or (struct S a, struct S b, char c) +{ + a.val = b.val | c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]orb" } } */ + +struct S +test_xor (struct S a, struct S b, char c) +{ + a.val = b.val ^ c; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]xorb" } } */ + +struct S +test_not (struct S a, struct S b) +{ + a.val = ~b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]notb" } } */ |