aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLingling Kong <lingling.kong@intel.com>2024-08-14 16:35:29 +0800
committerLingling Kong <lingling.kong@intel.com>2024-08-14 16:56:01 +0800
commita302cd6abbeccc93784b7ecc86f2e9106aa412ca (patch)
tree3f7d16b11edd3c49db68bd4c427e672e46ac147b
parent42aba4786e42ac2317b4f1185a93bffb3de2ce50 (diff)
downloadgcc-a302cd6abbeccc93784b7ecc86f2e9106aa412ca.zip
gcc-a302cd6abbeccc93784b7ecc86f2e9106aa412ca.tar.gz
gcc-a302cd6abbeccc93784b7ecc86f2e9106aa412ca.tar.bz2
i386: Optimization for APX NDD is always zero-uppered for ADD
gcc/ChangeLog: PR target/113729 * config/i386/i386.md (*addqi_1_zext<mode><nf_name>): New define. (*addhi_1_zext<mode><nf_name>): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/pr113729.c: New test.
-rw-r--r--gcc/config/i386/i386.md80
-rw-r--r--gcc/testsuite/gcc.target/i386/pr113729.c27
2 files changed, 107 insertions, 0 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 1a6188f..5f23727 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -6572,6 +6572,86 @@
(set_attr "has_nf" "1")
(set_attr "mode" "<MODE>")])
+;; For APX instruction with an NDD, the destination GPR will get the
+;; instruction’s result in bits [OSIZE-1:0] and, if OSIZE < 64b, have
+;; its upper bits [63:OSIZE] zeroed.
+
+(define_insn "*addqi_1_zext<mode><nf_name>"
+ [(set (match_operand:SWI248x 0 "register_operand" "=r,r")
+ (zero_extend:SWI248x
+ (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%rm,r")
+ (match_operand:QI 2 "general_operand" "rn,m"))))]
+ "TARGET_APX_NDD && <nf_condition>
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return "<nf_prefix>inc{b}\t{%1, %b0|%b0, %1}";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "<nf_prefix>dec{b}\t{%1, %b0|%b0, %1}";
+ }
+
+ default:
+ if (x86_maybe_negate_const_int (&operands[2], QImode))
+ return "<nf_prefix>sub{b}\t{%2, %1, %b0|%b0, %1, %2}";
+ return "<nf_prefix>add{b}\t{%2, %1, %b0|%b0, %1, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(match_operand:QI 2 "incdec_operand")
+ (const_string "incdec")
+ ]
+ (const_string "alu")))
+ (set (attr "length_immediate")
+ (if_then_else
+ (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
+ (const_string "1")
+ (const_string "*")))
+ (set_attr "has_nf" "1")
+ (set_attr "mode" "QI")])
+
+(define_insn "*addhi_1_zext<mode><nf_name>"
+ [(set (match_operand:SWI48x 0 "register_operand" "=r,r")
+ (zero_extend:SWI48x
+ (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,r")
+ (match_operand:HI 2 "general_operand" "rn,m"))))]
+ "TARGET_APX_NDD && <nf_condition>
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return "<nf_prefix>inc{w}\t{%1, %w0|%w0, %1}";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "<nf_prefix>dec{w}\t{%1, %w0|%w0, %1}";
+ }
+
+ default:
+ if (x86_maybe_negate_const_int (&operands[2], HImode))
+ return "<nf_prefix>sub{w}\t{%2, %1, %w0|%w0, %1, %2}";
+ return "<nf_prefix>add{w}\t{%2, %1, %w0|%w0, %1, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(match_operand:QI 2 "incdec_operand")
+ (const_string "incdec")
+ ]
+ (const_string "alu")))
+ (set (attr "length_immediate")
+ (if_then_else
+ (and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
+ (const_string "1")
+ (const_string "*")))
+ (set_attr "has_nf" "1")
+ (set_attr "mode" "HI")])
+
;; It may seem that nonimmediate operand is proper one for operand 1.
;; The addsi_1 pattern allows nonimmediate operand at that place and
;; we take care in ix86_binary_operator_ok to not allow two memory
diff --git a/gcc/testsuite/gcc.target/i386/pr113729.c b/gcc/testsuite/gcc.target/i386/pr113729.c
new file mode 100644
index 0000000..34518a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113729.c
@@ -0,0 +1,27 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mapx-features=ndd -march=x86-64 -O2" } */
+/* { dg-final { scan-assembler-not "movz"} } */
+
+#include <stdint.h>
+
+#define F(TYPE1, TYPE2, OP_NAME, OP) \
+TYPE1 \
+__attribute__ ((noipa)) \
+f_##OP_NAME##_##TYPE2##_##TYPE1 (unsigned TYPE2 b) \
+{ \
+ return (unsigned TYPE2) (200 OP b); \
+} \
+TYPE1 \
+__attribute__ ((noipa)) \
+f1_##OP_NAME##_##TYPE2##_##TYPE1 \
+(unsigned TYPE2 a, unsigned TYPE2 b) \
+{ \
+ return (unsigned TYPE2) (a OP b); \
+}
+
+/* addqi_1_zext<mode> */
+F (short, char, add, +)
+F (int, char, add, +)
+F (int64_t, char, add, +)
+F (int, short, add, +)
+F (int64_t, short, add, +)