diff options
Diffstat (limited to 'gcc/config/i386/i386.cc')
-rw-r--r-- | gcc/config/i386/i386.cc | 82 |
1 files changed, 77 insertions, 5 deletions
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 0c808c2..5ad47e1 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -22025,6 +22025,15 @@ vec_fp_conversion_cost (const struct processor_costs *cost, int size) return cost->vcvtps2pd512; } +/* Return true of X is UNSPEC with UNSPEC_PCMP or UNSPEC_UNSIGNED_PCMP. */ + +static bool +unspec_pcmp_p (rtx x) +{ + return GET_CODE (x) == UNSPEC + && (XINT (x, 1) == UNSPEC_PCMP || XINT (x, 1) == UNSPEC_UNSIGNED_PCMP); +} + /* Compute a (partial) cost for rtx X. Return true if the complete cost has been computed, and false if subexpressions should be scanned. In either case, *TOTAL contains the cost result. */ @@ -22807,14 +22816,77 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, case VEC_MERGE: mask = XEXP (x, 2); + /* Scalar versions of SSE instructions may be represented as: + + (vec_merge (vec_duplicate (operation ....)) + (register or memory) + (const_int 1)) + + In this case vec_merge and vec_duplicate is for free. + Just recurse into operation and second operand. */ + if (mask == const1_rtx + && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE) + { + *total = rtx_cost (XEXP (XEXP (x, 0), 0), mode, + outer_code, opno, speed) + + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed); + return true; + } /* This is masked instruction, assume the same cost, as nonmasked variant. */ - if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask))) - *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed); + else if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask))) + { + *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed) + + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed); + return true; + } + /* Combination of the two above: + + (vec_merge (vec_merge (vec_duplicate (operation ...)) + (register or memory) + (reg:QI mask)) + (register or memory) + (const_int 1)) + + i.e. avx512fp16_vcvtss2sh_mask. */ + else if (TARGET_AVX512F + && mask == const1_rtx + && GET_CODE (XEXP (x, 0)) == VEC_MERGE + && GET_CODE (XEXP (XEXP (x, 0), 0)) == VEC_DUPLICATE + && register_operand (XEXP (XEXP (x, 0), 2), + GET_MODE (XEXP (XEXP (x, 0), 2)))) + { + *total = rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), + mode, outer_code, opno, speed) + + rtx_cost (XEXP (XEXP (x, 0), 1), + mode, outer_code, opno, speed) + + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed); + return true; + } + /* vcmp. */ + else if (unspec_pcmp_p (mask) + || (GET_CODE (mask) == NOT + && unspec_pcmp_p (XEXP (mask, 0)))) + { + rtx uns = GET_CODE (mask) == NOT ? XEXP (mask, 0) : mask; + rtx unsop0 = XVECEXP (uns, 0, 0); + /* Make (subreg:V4SI (not:V16QI (reg:V16QI ..)) 0) + cost the same as register. + This is used by avx_cmp<mode>3_ltint_not. */ + if (GET_CODE (unsop0) == SUBREG) + unsop0 = XEXP (unsop0, 0); + if (GET_CODE (unsop0) == NOT) + unsop0 = XEXP (unsop0, 0); + *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed) + + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed) + + rtx_cost (unsop0, mode, UNSPEC, opno, speed) + + rtx_cost (XVECEXP (uns, 0, 1), mode, UNSPEC, opno, speed) + + cost->sse_op; + return true; + } else - /* ??? We should still recruse when computing cost. */ *total = cost->sse_op; - return true; + return false; case MEM: /* CONST_VECTOR_DUPLICATE_P in constant_pool is just broadcast. @@ -22831,7 +22903,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, } /* An insn that accesses memory is slightly more expensive - than one that does not. */ + than one that does not. */ if (speed) { *total += 1; |