aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386')
-rw-r--r--gcc/config/i386/i386-expand.cc12
-rw-r--r--gcc/config/i386/i386-options.cc6
-rw-r--r--gcc/config/i386/i386.cc23
-rw-r--r--gcc/config/i386/i386.md56
-rw-r--r--gcc/config/i386/sse.md96
5 files changed, 170 insertions, 23 deletions
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 7bb4d39..9a8d244 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -10234,6 +10234,18 @@ ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
machine_mode mode1 = insn_data[d->icode].operand[1].mode;
enum rtx_code comparison = d->comparison;
+ /* ptest reg, reg sets the carry flag. */
+ if (comparison == LTU
+ && (d->code == IX86_BUILTIN_PTESTC
+ || d->code == IX86_BUILTIN_PTESTC256)
+ && rtx_equal_p (op0, op1))
+ {
+ if (!target)
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const1_rtx);
+ return target;
+ }
+
if (VECTOR_MODE_P (mode0))
op0 = safe_vector_operand (op0, mode0);
if (VECTOR_MODE_P (mode1))
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index 2cb0bdd..7f593ce 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -1400,7 +1400,11 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args,
if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
opts->x_ix86_tune_string
= ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]);
- else if (orig_tune_defaulted)
+ /* If we have explicit arch string and no tune string specified, set
+ tune_string to NULL and later it will be overriden by arch_string
+ so target clones can get proper optimization. */
+ else if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
+ || orig_tune_defaulted)
opts->x_ix86_tune_string = NULL;
/* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 32851a5..0761965 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -21423,16 +21423,23 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
else if (XINT (x, 1) == UNSPEC_PTEST)
{
*total = cost->sse_op;
- if (XVECLEN (x, 0) == 2
- && GET_CODE (XVECEXP (x, 0, 0)) == AND)
+ rtx test_op0 = XVECEXP (x, 0, 0);
+ if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1)))
+ return false;
+ if (GET_CODE (test_op0) == AND)
{
- rtx andop = XVECEXP (x, 0, 0);
- *total += rtx_cost (XEXP (andop, 0), GET_MODE (andop),
- AND, opno, speed)
- + rtx_cost (XEXP (andop, 1), GET_MODE (andop),
- AND, opno, speed);
- return true;
+ rtx and_op0 = XEXP (test_op0, 0);
+ if (GET_CODE (and_op0) == NOT)
+ and_op0 = XEXP (and_op0, 0);
+ *total += rtx_cost (and_op0, GET_MODE (and_op0),
+ AND, 0, speed)
+ + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0),
+ AND, 1, speed);
}
+ else
+ *total = rtx_cost (test_op0, GET_MODE (test_op0),
+ UNSPEC, 0, speed);
+ return true;
}
return false;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 95a6653c..15c0310 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -11380,6 +11380,8 @@
[(set_attr "type" "alu")
(set_attr "mode" "QI")])
+;; *andqi_ext<mode>_3 is defined via *<code>qi_ext<mode>_3 below.
+
;; Convert wide AND instructions with immediate operand to shorter QImode
;; equivalents when possible.
;; Don't do the splitting with memory operands, since it introduces risk
@@ -12092,6 +12094,26 @@
[(set_attr "type" "alu")
(set_attr "mode" "QI")])
+(define_insn "*<code>qi_ext<mode>_3"
+ [(set (zero_extract:SWI248
+ (match_operand 0 "int248_register_operand" "+Q")
+ (const_int 8)
+ (const_int 8))
+ (zero_extract:SWI248
+ (any_logic:SWI248
+ (match_operand 1 "int248_register_operand" "%0")
+ (match_operand 2 "int248_register_operand" "Q"))
+ (const_int 8)
+ (const_int 8)))
+ (clobber (reg:CC FLAGS_REG))]
+ "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ /* FIXME: without this LRA can't reload this pattern, see PR82524. */
+ && (rtx_equal_p (operands[0], operands[1])
+ || rtx_equal_p (operands[0], operands[2]))"
+ "<logic>{b}\t{%h2, %h0|%h0, %h2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
;; Convert wide OR instructions with immediate operand to shorter QImode
;; equivalents when possible.
;; Don't do the splitting with memory operands, since it introduces risk
@@ -12206,6 +12228,18 @@
(set_attr "type" "alu")
(set_attr "mode" "QI")])
+;; Peephole2 rega = 0; rega op= regb into rega = regb.
+(define_peephole2
+ [(parallel [(set (match_operand:SWI 0 "general_reg_operand")
+ (const_int 0))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_dup 0)
+ (any_or_plus:SWI (match_dup 0)
+ (match_operand:SWI 1 "<general_operand>")))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ [(set (match_dup 0) (match_dup 1))])
+
;; Split DST = (HI<<32)|LO early to minimize register usage.
(define_insn_and_split "*concat<mode><dwi>3_1"
[(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro,r")
@@ -13365,6 +13399,28 @@
[(const_int 0)]
"ix86_split_ashl (operands, operands[3], <DWI>mode); DONE;")
+(define_insn_and_split "*ashl<dwi>3_doubleword_highpart"
+ [(set (match_operand:<DWI> 0 "register_operand" "=r")
+ (ashift:<DWI>
+ (any_extend:<DWI> (match_operand:DWIH 1 "nonimmediate_operand" "rm"))
+ (match_operand:QI 2 "const_int_operand")))
+ (clobber (reg:CC FLAGS_REG))]
+ "INTVAL (operands[2]) >= <MODE_SIZE> * BITS_PER_UNIT
+ && INTVAL (operands[2]) < <MODE_SIZE> * BITS_PER_UNIT * 2"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[3]);
+ int bits = INTVAL (operands[2]) - (<MODE_SIZE> * BITS_PER_UNIT);
+ if (!rtx_equal_p (operands[3], operands[1]))
+ emit_move_insn (operands[3], operands[1]);
+ if (bits > 0)
+ emit_insn (gen_ashl<mode>3 (operands[3], operands[3], GEN_INT (bits)));
+ ix86_expand_clear (operands[0]);
+ DONE;
+})
+
(define_insn "x86_64_shld"
[(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
(ior:DI (ashift:DI (match_dup 0)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index f793258..3b50c71 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1465,12 +1465,12 @@
})
(define_insn "*<avx512>_load<mode>_mask"
- [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
- (vec_merge:VI12_AVX512VL
- (unspec:VI12_AVX512VL
- [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
+ [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:VI12HFBF_AVX512VL
+ (unspec:VI12HFBF_AVX512VL
+ [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand" "m")]
UNSPEC_MASKLOAD)
- (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
+ (match_operand:VI12HFBF_AVX512VL 2 "nonimm_or_0_operand" "0C")
(match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
"TARGET_AVX512BW"
"vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
@@ -1479,9 +1479,9 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn_and_split "*<avx512>_load<mode>"
- [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
- (unspec:VI12_AVX512VL
- [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
+ [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand" "=v")
+ (unspec:VI12HFBF_AVX512VL
+ [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand" "m")]
UNSPEC_MASKLOAD))]
"TARGET_AVX512BW"
"#"
@@ -23490,6 +23490,70 @@
[(set (reg:CCZ FLAGS_REG)
(unspec:CCZ [(match_dup 0) (match_dup 1)] UNSPEC_PTEST))])
+;; ptest reg,reg sets the carry flag.
+(define_split
+ [(set (reg:CCC FLAGS_REG)
+ (unspec:CCC [(match_operand:V_AVX 0 "register_operand")
+ (match_operand:V_AVX 1 "register_operand")]
+ UNSPEC_PTEST))]
+ "TARGET_SSE4_1
+ && rtx_equal_p (operands[0], operands[1])"
+ [(set (reg:CCC FLAGS_REG)
+ (unspec:CCC [(const_int 0)] UNSPEC_STC))])
+
+;; Changing the CCmode of FLAGS_REG requires updating both def and use.
+;; pandn/ptestz/set{n?}e -> ptestc/set{n?}c
+(define_split
+ [(set (match_operand:SWI 0 "register_operand")
+ (match_operator:SWI 3 "bt_comparison_operator"
+ [(unspec:CCZ [
+ (and:V_AVX (not:V_AVX (match_operand:V_AVX 1 "register_operand"))
+ (match_operand:V_AVX 2 "register_operand"))
+ (and:V_AVX (not:V_AVX (match_dup 1)) (match_dup 2))]
+ UNSPEC_PTEST)
+ (const_int 0)]))]
+ "TARGET_SSE4_1"
+ [(set (reg:CCC FLAGS_REG)
+ (unspec:CCC [(match_dup 1) (match_dup 2)] UNSPEC_PTEST))
+ (set (match_dup 0)
+ (match_op_dup 3 [(reg:CCC FLAGS_REG) (const_int 0)]))])
+
+(define_split
+ [(set (strict_low_part (match_operand:QI 0 "register_operand"))
+ (match_operator:QI 3 "bt_comparison_operator"
+ [(unspec:CCZ [
+ (and:V_AVX (not:V_AVX (match_operand:V_AVX 1 "register_operand"))
+ (match_operand:V_AVX 2 "register_operand"))
+ (and:V_AVX (not:V_AVX (match_dup 1)) (match_dup 2))]
+ UNSPEC_PTEST)
+ (const_int 0)]))]
+ "TARGET_SSE4_1"
+ [(set (reg:CCC FLAGS_REG)
+ (unspec:CCC [(match_dup 1) (match_dup 2)] UNSPEC_PTEST))
+ (set (strict_low_part (match_dup 0))
+ (match_op_dup 3 [(reg:CCC FLAGS_REG) (const_int 0)]))])
+
+;; pandn/ptestz/j{n?}e -> ptestc/j{n?}c
+(define_split
+ [(set (pc)
+ (if_then_else
+ (match_operator 3 "bt_comparison_operator"
+ [(unspec:CCZ [
+ (and:V_AVX
+ (not:V_AVX (match_operand:V_AVX 1 "register_operand"))
+ (match_operand:V_AVX 2 "register_operand"))
+ (and:V_AVX (not:V_AVX (match_dup 1)) (match_dup 2))]
+ UNSPEC_PTEST)
+ (const_int 0)])
+ (match_operand 0)
+ (pc)))]
+ "TARGET_SSE4_1"
+ [(set (reg:CCC FLAGS_REG)
+ (unspec:CCC [(match_dup 1) (match_dup 2)] UNSPEC_PTEST))
+ (set (pc) (if_then_else (match_op_dup 3 [(reg:CCC FLAGS_REG) (const_int 0)])
+ (match_dup 0)
+ (pc)))])
+
(define_expand "nearbyint<mode>2"
[(set (match_operand:VFH 0 "register_operand")
(unspec:VFH
@@ -26915,17 +26979,21 @@
"TARGET_AVX")
(define_expand "maskload<mode><avx512fmaskmodelower>"
- [(set (match_operand:V48H_AVX512VL 0 "register_operand")
- (vec_merge:V48H_AVX512VL
- (match_operand:V48H_AVX512VL 1 "memory_operand")
+ [(set (match_operand:V48_AVX512VL 0 "register_operand")
+ (vec_merge:V48_AVX512VL
+ (unspec:V48_AVX512VL
+ [(match_operand:V48_AVX512VL 1 "memory_operand")]
+ UNSPEC_MASKLOAD)
(match_dup 0)
(match_operand:<avx512fmaskmode> 2 "register_operand")))]
"TARGET_AVX512F")
(define_expand "maskload<mode><avx512fmaskmodelower>"
- [(set (match_operand:VI12_AVX512VL 0 "register_operand")
- (vec_merge:VI12_AVX512VL
- (match_operand:VI12_AVX512VL 1 "memory_operand")
+ [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand")
+ (vec_merge:VI12HFBF_AVX512VL
+ (unspec:VI12HFBF_AVX512VL
+ [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand")]
+ UNSPEC_MASKLOAD)
(match_dup 0)
(match_operand:<avx512fmaskmode> 2 "register_operand")))]
"TARGET_AVX512BW")