diff options
Diffstat (limited to 'gcc/config/i386')
-rw-r--r-- | gcc/config/i386/i386-expand.cc | 50 | ||||
-rw-r--r-- | gcc/config/i386/i386.cc | 191 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 4 | ||||
-rw-r--r-- | gcc/config/i386/predicates.md | 14 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 94 |
5 files changed, 292 insertions, 61 deletions
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index cdfd94d..a314800 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -4138,6 +4138,10 @@ ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0, return false; mode = GET_MODE (dest); + if (immediate_operand (if_false, mode)) + if_false = force_reg (mode, if_false); + if (immediate_operand (if_true, mode)) + if_true = force_reg (mode, if_true); /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here, but MODE may be a vector mode and thus not appropriate. */ @@ -4687,6 +4691,8 @@ ix86_expand_fp_movcc (rtx operands[]) compare_op = ix86_expand_compare (NE, tmp, const0_rtx); } + operands[2] = force_reg (mode, operands[2]); + operands[3] = force_reg (mode, operands[3]); emit_insn (gen_rtx_SET (operands[0], gen_rtx_IF_THEN_ELSE (mode, compare_op, operands[2], operands[3]))); @@ -19256,8 +19262,6 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode) e1 = gen_reg_rtx (mode); x1 = gen_reg_rtx (mode); - /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */ - b = force_reg (mode, b); /* x0 = rcp(b) estimate */ @@ -19270,20 +19274,42 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode) emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b), UNSPEC_RCP))); - /* e0 = x0 * b */ - emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b))); + unsigned vector_size = GET_MODE_SIZE (mode); + + /* (a - (rcp(b) * a * b)) * rcp(b) + rcp(b) * a + N-R step with 2 fma implementation. */ + if (TARGET_FMA + || (TARGET_AVX512F && vector_size == 64) + || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))) + { + /* e0 = x0 * a */ + emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a))); + /* e1 = e0 * b - a */ + emit_insn (gen_rtx_SET (e1, gen_rtx_FMA (mode, e0, b, + gen_rtx_NEG (mode, a)))); + /* res = - e1 * x0 + e0 */ + emit_insn (gen_rtx_SET (res, gen_rtx_FMA (mode, + gen_rtx_NEG (mode, e1), + x0, e0))); + } + else + /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */ + { + /* e0 = x0 * b */ + emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b))); - /* e0 = x0 * e0 */ - emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0))); + /* e1 = x0 + x0 */ + emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0))); - /* e1 = x0 + x0 */ - emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0))); + /* e0 = x0 * e0 */ + emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0))); - /* x1 = e1 - e0 */ - emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0))); + /* x1 = e1 - e0 */ + emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0))); - /* res = a * x1 */ - emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1))); + /* res = a * x1 */ + emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1))); + } } /* Output code to perform a Newton-Rhapson approximation of a diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 28603c2..78df3d9 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -25257,32 +25257,6 @@ ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar) return new ix86_vector_costs (vinfo, costing_for_scalar); } -/* Return cost of statement doing FP conversion. */ - -static unsigned -fp_conversion_stmt_cost (machine_mode mode, gimple *stmt, bool scalar_p) -{ - int outer_size - = tree_to_uhwi - (TYPE_SIZE - (TREE_TYPE (gimple_assign_lhs (stmt)))); - int inner_size - = tree_to_uhwi - (TYPE_SIZE - (TREE_TYPE (gimple_assign_rhs1 (stmt)))); - int stmt_cost = vec_fp_conversion_cost - (ix86_tune_cost, GET_MODE_BITSIZE (mode)); - /* VEC_PACK_TRUNC_EXPR: If inner size is greater than outer size we will end - up doing two conversions and packing them. */ - if (!scalar_p && inner_size > outer_size) - { - int n = inner_size / outer_size; - stmt_cost = stmt_cost * n - + (n - 1) * ix86_vec_cost (mode, ix86_cost->sse_op); - } - return stmt_cost; -} - unsigned ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, stmt_vec_info stmt_info, slp_tree node, @@ -25326,7 +25300,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, else if (X87_FLOAT_MODE_P (mode)) stmt_cost = ix86_cost->fadd; else - stmt_cost = ix86_cost->add; + stmt_cost = ix86_cost->add; } else stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss @@ -25381,7 +25355,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, (subcode == RSHIFT_EXPR && !TYPE_UNSIGNED (TREE_TYPE (op1))) ? ASHIFTRT : LSHIFTRT, mode, - TREE_CODE (op2) == INTEGER_CST, + TREE_CODE (op2) == INTEGER_CST, cst_and_fits_in_hwi (op2) ? int_cst_value (op2) : -1, false, false, NULL, NULL); @@ -25390,30 +25364,152 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, case NOP_EXPR: /* Only sign-conversions are free. */ if (tree_nop_conversion_p - (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)), + (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)), TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))) stmt_cost = 0; else if (fp) - stmt_cost = fp_conversion_stmt_cost (mode, stmt_info->stmt, - scalar_p); + stmt_cost = vec_fp_conversion_cost + (ix86_tune_cost, GET_MODE_BITSIZE (mode)); + break; + + case COND_EXPR: + { + /* SSE2 conditinal move sequence is: + pcmpgtd %xmm5, %xmm0 (accounted separately) + pand %xmm0, %xmm2 + pandn %xmm1, %xmm0 + por %xmm2, %xmm0 + while SSE4 uses cmp + blend + and AVX512 masked moves. + + The condition is accounted separately since we usually have + p = a < b + c = p ? x : y + and we will account first statement as setcc. Exception is when + p is loaded from memory as bool and then we will not acocunt + the compare, but there is no way to check for this. */ + + int ninsns = TARGET_SSE4_1 ? 1 : 3; + + /* If one of parameters is 0 or -1 the sequence will be simplified: + (if_true & mask) | (if_false & ~mask) -> if_true & mask */ + if (ninsns > 1 + && (zerop (gimple_assign_rhs2 (stmt_info->stmt)) + || zerop (gimple_assign_rhs3 (stmt_info->stmt)) + || integer_minus_onep + (gimple_assign_rhs2 (stmt_info->stmt)) + || integer_minus_onep + (gimple_assign_rhs3 (stmt_info->stmt)))) + ninsns = 1; + + if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + stmt_cost = ninsns * ix86_cost->sse_op; + else if (X87_FLOAT_MODE_P (mode)) + /* x87 requires conditional branch. We don't have cost for + that. */ + ; + else if (VECTOR_MODE_P (mode)) + stmt_cost = ix86_vec_cost (mode, ninsns * ix86_cost->sse_op); + else + /* compare (accounted separately) + cmov. */ + stmt_cost = ix86_cost->add; + } break; - case BIT_IOR_EXPR: - case ABS_EXPR: - case ABSU_EXPR: case MIN_EXPR: case MAX_EXPR: + if (fp) + { + if (X87_FLOAT_MODE_P (mode)) + /* x87 requires conditional branch. We don't have cost for + that. */ + ; + else + /* minss */ + stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); + } + else + { + if (VECTOR_MODE_P (mode)) + { + stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); + /* vpmin was introduced in SSE3. + SSE2 needs pcmpgtd + pand + pandn + pxor. + If one of parameters is 0 or -1 the sequence is simplified + to pcmpgtd + pand. */ + if (!TARGET_SSSE3) + { + if (zerop (gimple_assign_rhs2 (stmt_info->stmt)) + || integer_minus_onep + (gimple_assign_rhs2 (stmt_info->stmt))) + stmt_cost *= 2; + else + stmt_cost *= 4; + } + } + else + /* cmp + cmov. */ + stmt_cost = ix86_cost->add * 2; + } + break; + + case ABS_EXPR: + case ABSU_EXPR: + if (fp) + { + if (X87_FLOAT_MODE_P (mode)) + /* fabs. */ + stmt_cost = ix86_cost->fabs; + else + /* andss of sign bit. */ + stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); + } + else + { + if (VECTOR_MODE_P (mode)) + { + stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); + /* vabs was introduced in SSE3. + SSE3 uses psrat + pxor + psub. */ + if (!TARGET_SSSE3) + stmt_cost *= 3; + } + else + /* neg + cmov. */ + stmt_cost = ix86_cost->add * 2; + } + break; + + case BIT_IOR_EXPR: case BIT_XOR_EXPR: case BIT_AND_EXPR: case BIT_NOT_EXPR: - if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) - stmt_cost = ix86_cost->sse_op; - else if (VECTOR_MODE_P (mode)) + gcc_assert (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode) + && !X87_FLOAT_MODE_P (mode)); + if (VECTOR_MODE_P (mode)) stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); else stmt_cost = ix86_cost->add; break; + default: + if (truth_value_p (subcode)) + { + if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + /* CMPccS? insructions are cheap, so use sse_op. While they + produce a mask which may need to be turned to 0/1 by and, + expect that this will be optimized away in a common case. */ + stmt_cost = ix86_cost->sse_op; + else if (X87_FLOAT_MODE_P (mode)) + /* fcmp + setcc. */ + stmt_cost = ix86_cost->fadd + ix86_cost->add; + else if (VECTOR_MODE_P (mode)) + stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); + else + /* setcc. */ + stmt_cost = ix86_cost->add; + break; + } break; } } @@ -25439,7 +25535,26 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, if (kind == vec_promote_demote && fp && FLOAT_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))) - stmt_cost = fp_conversion_stmt_cost (mode, stmt_info->stmt, scalar_p); + { + int outer_size + = tree_to_uhwi + (TYPE_SIZE + (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)))); + int inner_size + = tree_to_uhwi + (TYPE_SIZE + (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))); + int stmt_cost = vec_fp_conversion_cost + (ix86_tune_cost, GET_MODE_BITSIZE (mode)); + /* VEC_PACK_TRUNC_EXPR: If inner size is greater than outer size we will end + up doing two conversions and packing them. */ + if (inner_size > outer_size) + { + int n = inner_size / outer_size; + stmt_cost = stmt_cost * n + + (n - 1) * ix86_vec_cost (mode, ix86_cost->sse_op); + } + } /* If we do elementwise loads into a vector then we are bound by latency and execution resources for the many scalar loads diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index d6b2f29..e170da3 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -26592,8 +26592,8 @@ [(set (match_operand:X87MODEF 0 "register_operand") (if_then_else:X87MODEF (match_operand 1 "comparison_operator") - (match_operand:X87MODEF 2 "register_operand") - (match_operand:X87MODEF 3 "register_operand")))] + (match_operand:X87MODEF 2 "nonimm_or_0_or_1s_operand") + (match_operand:X87MODEF 3 "nonimm_or_0_operand")))] "(TARGET_80387 && TARGET_CMOVE) || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;") diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 3d3848c..4b23e18 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1267,6 +1267,14 @@ (match_operand 0 "vector_memory_operand") (match_code "const_vector"))) +; Return true when OP is register_operand, vector_memory_operand, +; const_vector zero or const_vector all ones. +(define_predicate "vector_or_0_or_1s_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "vector_memory_operand") + (match_operand 0 "const0_operand") + (match_operand 0 "int_float_vector_all_ones_operand"))) + (define_predicate "bcst_mem_operand" (and (match_code "vec_duplicate") (and (match_test "TARGET_AVX512F") @@ -1333,6 +1341,12 @@ (ior (match_operand 0 "nonimmediate_operand") (match_operand 0 "const0_operand"))) +; Return true when OP is a nonimmediate or zero or all ones. +(define_predicate "nonimm_or_0_or_1s_operand" + (ior (match_operand 0 "nonimmediate_operand") + (match_operand 0 "const0_operand") + (match_operand 0 "int_float_vector_all_ones_operand"))) + ;; Return true for RTX codes that force SImode address. (define_predicate "SImode_address_operand" (match_code "subreg,zero_extend,and")) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b280676..2ed348c 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -527,6 +527,16 @@ (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) +(define_mode_iterator V48_AVX512VL_4 + [(V4SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL") + (V4SI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL")]) + +(define_mode_iterator VI48_AVX512VL_4 + [(V4SI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL")]) + +(define_mode_iterator V8_AVX512VL_2 + [(V2DF "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) + (define_mode_iterator VFH_AVX10_2 [(V32HF "TARGET_AVX10_2") V16HF V8HF (V16SF "TARGET_AVX10_2") V8SF V4SF @@ -4410,7 +4420,7 @@ (unspec:<V48H_AVX512VL:avx512fmaskmode> [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand" "v") (match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm") - (match_operand:SI 3 "const_0_to_7_operand" "n")] + (match_operand:SI 3 "<cmp_imm_predicate>" "n")] UNSPEC_PCMP)))] "TARGET_AVX512F && (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW) @@ -4428,7 +4438,7 @@ (unspec:<V48H_AVX512VL:avx512fmaskmode> [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand") (match_operand:V48H_AVX512VL 2 "nonimmediate_operand") - (match_operand:SI 3 "const_0_to_7_operand")] + (match_operand:SI 3 "<cmp_imm_predicate>")] UNSPEC_PCMP))) (set (match_operand:<V48H_AVX512VL:avx512fmaskmode> 4 "register_operand") (unspec:<V48H_AVX512VL:avx512fmaskmode> @@ -4469,7 +4479,8 @@ (match_operand:V48H_AVX512VL 2 "nonimmediate_operand") (match_operand:SI 3 "<cmp_imm_predicate>" "n")] UNSPEC_PCMP)))] - "TARGET_AVX512F && ix86_pre_reload_split ()" + "TARGET_AVX512F && GET_MODE_NUNITS (<MODE>mode) >= 8 + && ix86_pre_reload_split ()" "#" "&& 1" [(set (match_dup 0) @@ -4480,6 +4491,70 @@ UNSPEC_PCMP))] "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);") +(define_insn "*<avx512>_cmp<mode>3_and15" + [(set (match_operand:QI 0 "register_operand" "=k") + (and:QI + (unspec:QI + [(match_operand:V48_AVX512VL_4 1 "nonimmediate_operand" "v") + (match_operand:V48_AVX512VL_4 2 "nonimmediate_operand" "vm") + (match_operand:SI 3 "<cmp_imm_predicate>" "n")] + UNSPEC_PCMP) + (const_int 15)))] + "TARGET_AVX512F" + "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "*<avx512>_ucmp<mode>3_and15" + [(set (match_operand:QI 0 "register_operand" "=k") + (and:QI + (unspec:QI + [(match_operand:VI48_AVX512VL_4 1 "nonimmediate_operand" "v") + (match_operand:VI48_AVX512VL_4 2 "nonimmediate_operand" "vm") + (match_operand:SI 3 "const_0_to_7_operand" "n")] + UNSPEC_UNSIGNED_PCMP) + (const_int 15)))] + "TARGET_AVX512F" + "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "*<avx512>_cmp<mode>3_and3" + [(set (match_operand:QI 0 "register_operand" "=k") + (and:QI + (unspec:QI + [(match_operand:V8_AVX512VL_2 1 "nonimmediate_operand" "v") + (match_operand:V8_AVX512VL_2 2 "nonimmediate_operand" "vm") + (match_operand:SI 3 "<cmp_imm_predicate>" "n")] + UNSPEC_PCMP) + (const_int 3)))] + "TARGET_AVX512F" + "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +(define_insn "*avx512vl_ucmpv2di3_and3" + [(set (match_operand:QI 0 "register_operand" "=k") + (and:QI + (unspec:QI + [(match_operand:V2DI 1 "nonimmediate_operand" "v") + (match_operand:V2DI 2 "nonimmediate_operand" "vm") + (match_operand:SI 3 "const_0_to_7_operand" "n")] + UNSPEC_UNSIGNED_PCMP) + (const_int 3)))] + "TARGET_AVX512F" + "vpcmpuq\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>" [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k") (unspec:<avx512fmaskmode> @@ -4762,7 +4837,8 @@ (match_operand:VI48_AVX512VL 2 "nonimmediate_operand") (match_operand:SI 3 "const_0_to_7_operand")] UNSPEC_UNSIGNED_PCMP)))] - "TARGET_AVX512F && ix86_pre_reload_split ()" + "TARGET_AVX512F && ix86_pre_reload_split () + && GET_MODE_NUNITS (<MODE>mode) >= 8" "#" "&& 1" [(set (match_dup 0) @@ -5142,7 +5218,7 @@ (define_expand "vcond_mask_<mode><sseintvecmodelower>" [(set (match_operand:VI_256_AVX2 0 "register_operand") (vec_merge:VI_256_AVX2 - (match_operand:VI_256_AVX2 1 "nonimmediate_operand") + (match_operand:VI_256_AVX2 1 "nonimm_or_0_or_1s_operand") (match_operand:VI_256_AVX2 2 "nonimm_or_0_operand") (match_operand:<sseintvecmode> 3 "register_operand")))] "TARGET_AVX" @@ -5155,7 +5231,7 @@ (define_expand "vcond_mask_<mode><sseintvecmodelower>" [(set (match_operand:VI_128 0 "register_operand") (vec_merge:VI_128 - (match_operand:VI_128 1 "vector_operand") + (match_operand:VI_128 1 "vector_or_0_or_1s_operand") (match_operand:VI_128 2 "nonimm_or_0_operand") (match_operand:<sseintvecmode> 3 "register_operand")))] "TARGET_SSE2" @@ -5168,7 +5244,7 @@ (define_expand "vcond_mask_v1tiv1ti" [(set (match_operand:V1TI 0 "register_operand") (vec_merge:V1TI - (match_operand:V1TI 1 "vector_operand") + (match_operand:V1TI 1 "vector_or_0_or_1s_operand") (match_operand:V1TI 2 "nonimm_or_0_operand") (match_operand:V1TI 3 "register_operand")))] "TARGET_SSE2" @@ -5181,7 +5257,7 @@ (define_expand "vcond_mask_<mode><sseintvecmodelower>" [(set (match_operand:VF_256 0 "register_operand") (vec_merge:VF_256 - (match_operand:VF_256 1 "nonimmediate_operand") + (match_operand:VF_256 1 "nonimm_or_0_or_1s_operand") (match_operand:VF_256 2 "nonimm_or_0_operand") (match_operand:<sseintvecmode> 3 "register_operand")))] "TARGET_AVX" @@ -5194,7 +5270,7 @@ (define_expand "vcond_mask_<mode><sseintvecmodelower>" [(set (match_operand:VF_128 0 "register_operand") (vec_merge:VF_128 - (match_operand:VF_128 1 "vector_operand") + (match_operand:VF_128 1 "vector_or_0_or_1s_operand") (match_operand:VF_128 2 "nonimm_or_0_operand") (match_operand:<sseintvecmode> 3 "register_operand")))] "TARGET_SSE" |