;; GCC machine description for MMX and 3dNOW! instructions
;; Copyright (C) 2005-2023 Free Software Foundation, Inc.
;;
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;;
;; GCC is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; .
;; The MMX and 3dNOW! patterns are in the same file because they use
;; the same register file, and 3dNOW! adds a number of extensions to
;; the base integer MMX isa.
;; Note! Except for the basic move instructions, *all* of these
;; patterns are outside the normal optabs namespace. This is because
;; use of these registers requires the insertion of emms or femms
;; instructions to return to normal fpu mode. The compiler doesn't
;; know how to do that itself, which means it's up to the user. Which
;; means that we should never use any of these patterns except at the
;; direction of the user via a builtin.
(define_c_enum "unspec" [
UNSPEC_MOVNTQ
UNSPEC_PFRCP
UNSPEC_PFRCPIT1
UNSPEC_PFRCPIT2
UNSPEC_PFRSQRT
UNSPEC_PFRSQIT1
])
(define_c_enum "unspecv" [
UNSPECV_EMMS
UNSPECV_FEMMS
])
;; 8 byte integral modes handled by MMX (and by extension, SSE)
(define_mode_iterator MMXMODEI [V8QI V4HI V2SI])
(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI (V1DI "TARGET_SSE2")])
;; All 8-byte vector modes handled by MMX
(define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF V4HF V4BF])
(define_mode_iterator MMXMODE124 [V8QI V4HI V2SI V2SF])
;; Mix-n-match
(define_mode_iterator MMXMODE12 [V8QI V4HI])
(define_mode_iterator MMXMODE14 [V8QI V2SI])
(define_mode_iterator MMXMODE24 [V4HI V2SI])
(define_mode_iterator MMXMODE248 [V4HI V2SI V1DI])
;; All 4-byte integer/float16 vector modes
(define_mode_iterator V_32 [V4QI V2HI V1SI V2HF V2BF])
(define_mode_iterator V2FI_32 [V2HF V2BF V2HI])
(define_mode_iterator V4FI_64 [V4HF V4BF V4HI])
(define_mode_iterator V4F_64 [V4HF V4BF])
(define_mode_iterator V2F_32 [V2HF V2BF])
;; 4-byte integer vector modes
(define_mode_iterator VI_32 [V4QI V2HI])
;; 4-byte and 2-byte integer vector modes
(define_mode_iterator VI_16_32 [V4QI V2QI V2HI])
;; 4-byte and 2-byte QImode vector modes
(define_mode_iterator VI1_16_32 [V4QI V2QI])
;; All 2-byte, 4-byte and 8-byte vector modes with more than 1 element
(define_mode_iterator V_16_32_64
[V2QI V4QI V2HI V2HF
(V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT")
(V4HF "TARGET_64BIT") (V4BF "TARGET_64BIT")
(V2SI "TARGET_64BIT") (V2SF "TARGET_64BIT")])
;; V2S* modes
(define_mode_iterator V2FI [V2SF V2SI])
(define_mode_iterator V24FI [V2SF V2SI V4HF V4HI])
;; Mapping from integer vector mode to mnemonic suffix
(define_mode_attr mmxvecsize
[(V8QI "b") (V4QI "b") (V2QI "b")
(V4HI "w") (V2HI "w") (V2SI "d") (V1DI "q")])
;; Mapping to same size integral mode.
(define_mode_attr mmxinsnmode
[(V8QI "DI") (V4QI "SI") (V2QI "HI")
(V4HI "DI") (V2HI "SI")
(V2SI "DI")
(V4HF "DI") (V2HF "SI")
(V4BF "DI") (V2BF "SI")
(V2SF "DI")])
(define_mode_attr mmxdoublemode
[(V8QI "V8HI") (V4HI "V4SI")])
;; Mapping of vector float modes to an integer mode of the same size
(define_mode_attr mmxintvecmode
[(V2SF "V2SI") (V2SI "V2SI") (V4HI "V4HI") (V8QI "V8QI")
(V4HF "V4HI") (V2HF "V2HI")])
(define_mode_attr mmxintvecmodelower
[(V2SF "v2si") (V2SI "v2si") (V4HI "v4hi") (V8QI "v8qi")
(V4HF "v4hi") (V2HF "v2hi")])
;; Mapping of vector modes to a vector mode of double size
(define_mode_attr mmxdoublevecmode
[(V2SF "V4SF") (V2SI "V4SI") (V4HF "V8HF") (V4HI "V8HI")])
;; Mapping of vector modes back to the scalar modes
(define_mode_attr mmxscalarmode
[(V2SI "SI") (V2SF "SF")])
(define_mode_attr Yv_Yw
[(V8QI "Yw") (V4HI "Yw") (V2SI "Yv") (V1DI "Yv") (V2SF "Yv")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Move patterns
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; All of these patterns are enabled for MMX as well as 3dNOW.
;; This is essential for maintaining stable calling conventions.
(define_expand "mov"
[(set (match_operand:MMXMODE 0 "nonimmediate_operand")
(match_operand:MMXMODE 1 "nonimmediate_operand"))]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
{
ix86_expand_vector_move (mode, operands);
DONE;
})
(define_insn "*mov_internal"
[(set (match_operand:MMXMODE 0 "nonimmediate_operand"
"=r ,o ,r,r ,m ,?!y,!y,?!y,m ,r ,?!y,v,v,v,m,r,v,!y,*x")
(match_operand:MMXMODE 1 "nonimm_or_0_operand"
"rCo,rC,C,rm,rC,C ,!y,m ,?!y,?!y,r ,C,v,m,v,v,r,*x,!y"))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))
&& ix86_hardreg_mov_ok (operands[0], operands[1])"
{
switch (get_attr_type (insn))
{
case TYPE_MULTI:
return "#";
case TYPE_IMOV:
if (get_attr_mode (insn) == MODE_SI)
return "mov{l}\t{%1, %k0|%k0, %1}";
else
return "mov{q}\t{%1, %0|%0, %1}";
case TYPE_MMX:
return "pxor\t%0, %0";
case TYPE_MMXMOV:
/* Handle broken assemblers that require movd instead of movq. */
if (!HAVE_AS_IX86_INTERUNIT_MOVQ
&& (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
return "movd\t{%1, %0|%0, %1}";
return "movq\t{%1, %0|%0, %1}";
case TYPE_SSECVT:
if (SSE_REG_P (operands[0]))
return "movq2dq\t{%1, %0|%0, %1}";
else
return "movdq2q\t{%1, %0|%0, %1}";
case TYPE_SSELOG1:
return standard_sse_constant_opcode (insn, operands);
case TYPE_SSEMOV:
return ix86_output_ssemov (insn, operands);
default:
gcc_unreachable ();
}
}
[(set (attr "isa")
(cond [(eq_attr "alternative" "0,1")
(const_string "nox64")
(eq_attr "alternative" "2,3,4,9,10")
(const_string "x64")
(eq_attr "alternative" "15,16")
(const_string "x64_sse2")
(eq_attr "alternative" "17,18")
(const_string "sse2")
]
(const_string "*")))
(set (attr "type")
(cond [(eq_attr "alternative" "0,1")
(const_string "multi")
(eq_attr "alternative" "2,3,4")
(const_string "imov")
(eq_attr "alternative" "5")
(const_string "mmx")
(eq_attr "alternative" "6,7,8,9,10")
(const_string "mmxmov")
(eq_attr "alternative" "11")
(const_string "sselog1")
(eq_attr "alternative" "17,18")
(const_string "ssecvt")
]
(const_string "ssemov")))
(set (attr "prefix_rex")
(if_then_else (eq_attr "alternative" "9,10,15,16")
(const_string "1")
(const_string "*")))
(set (attr "prefix")
(if_then_else (eq_attr "type" "sselog1,ssemov")
(const_string "maybe_vex")
(const_string "orig")))
(set (attr "prefix_data16")
(if_then_else
(and (eq_attr "type" "ssemov") (eq_attr "mode" "DI"))
(const_string "1")
(const_string "*")))
(set (attr "mode")
(cond [(eq_attr "alternative" "2")
(const_string "SI")
(eq_attr "alternative" "11,12")
(cond [(match_test "mode == V2SFmode
|| mode == V4HFmode
|| mode == V4BFmode")
(const_string "V4SF")
(ior (not (match_test "TARGET_SSE2"))
(match_test "optimize_function_for_size_p (cfun)"))
(const_string "V4SF")
]
(const_string "TI"))
(and (eq_attr "alternative" "13")
(ior (ior (and (match_test "mode == V2SFmode")
(not (match_test "TARGET_MMX_WITH_SSE")))
(not (match_test "TARGET_SSE2")))
(match_test "mode == V4HFmode
|| mode == V4BFmode")))
(const_string "V2SF")
(and (eq_attr "alternative" "14")
(ior (ior (match_test "mode == V2SFmode")
(not (match_test "TARGET_SSE2")))
(match_test "mode == V4HFmode
|| mode == V4BFmode")))
(const_string "V2SF")
]
(const_string "DI")))
(set (attr "preferred_for_speed")
(cond [(eq_attr "alternative" "9,15")
(symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
(eq_attr "alternative" "10,16")
(symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
]
(symbol_ref "true")))])
(define_split
[(set (match_operand:MMXMODE 0 "nonimmediate_gr_operand")
(match_operand:MMXMODE 1 "nonimmediate_gr_operand"))]
"!TARGET_64BIT && reload_completed"
[(const_int 0)]
"ix86_split_long_move (operands); DONE;")
(define_split
[(set (match_operand:MMXMODE 0 "nonimmediate_gr_operand")
(match_operand:MMXMODE 1 "const0_operand"))]
"!TARGET_64BIT && reload_completed"
[(const_int 0)]
"ix86_split_long_move (operands); DONE;")
(define_expand "movmisalign"
[(set (match_operand:MMXMODE 0 "nonimmediate_operand")
(match_operand:MMXMODE 1 "nonimmediate_operand"))]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
{
ix86_expand_vector_move (mode, operands);
DONE;
})
(define_expand "mov"
[(set (match_operand:V_32 0 "nonimmediate_operand")
(match_operand:V_32 1 "nonimmediate_operand"))]
""
{
ix86_expand_vector_move (mode, operands);
DONE;
})
(define_insn "*mov_internal"
[(set (match_operand:V_32 0 "nonimmediate_operand"
"=r ,m ,v,v,v,m,r,v")
(match_operand:V_32 1 "general_operand"
"rmC,rC,C,v,m,v,v,r"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))
&& ix86_hardreg_mov_ok (operands[0], operands[1])"
{
switch (get_attr_type (insn))
{
case TYPE_IMOV:
return "mov{l}\t{%1, %0|%0, %1}";
case TYPE_SSELOG1:
return standard_sse_constant_opcode (insn, operands);
case TYPE_SSEMOV:
return ix86_output_ssemov (insn, operands);
default:
gcc_unreachable ();
}
}
[(set (attr "isa")
(cond [(eq_attr "alternative" "6,7")
(const_string "sse2")
]
(const_string "*")))
(set (attr "type")
(cond [(eq_attr "alternative" "2")
(const_string "sselog1")
(eq_attr "alternative" "3,4,5,6,7")
(const_string "ssemov")
]
(const_string "imov")))
(set (attr "prefix")
(if_then_else (eq_attr "type" "sselog1,ssemov")
(const_string "maybe_vex")
(const_string "orig")))
(set (attr "prefix_data16")
(if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI"))
(const_string "1")
(const_string "*")))
(set (attr "mode")
(cond [(eq_attr "alternative" "2,3")
(cond [(match_test "mode == V2HFmode
|| mode == V2BFmode")
(const_string "V4SF")
(match_test "TARGET_AVX")
(const_string "TI")
(ior (not (match_test "TARGET_SSE2"))
(match_test "optimize_function_for_size_p (cfun)"))
(const_string "V4SF")
]
(const_string "TI"))
(and (eq_attr "alternative" "4,5")
(ior (match_test "mode == V2HFmode
|| mode == V2BFmode")
(not (match_test "TARGET_SSE2"))))
(const_string "SF")
]
(const_string "SI")))
(set (attr "preferred_for_speed")
(cond [(eq_attr "alternative" "6")
(symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
(eq_attr "alternative" "7")
(symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
]
(symbol_ref "true")))])
;; 16-bit, 32-bit and 64-bit constant vector stores. After reload,
;; convert them to immediate integer stores.
(define_insn_and_split "*mov_imm"
[(set (match_operand:V_16_32_64 0 "memory_operand" "=m")
(match_operand:V_16_32_64 1 "x86_64_const_vector_operand" "i"))]
""
"#"
"&& reload_completed"
[(set (match_dup 0) (match_dup 1))]
{
HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (operands[1],
mode);
operands[1] = GEN_INT (val);
operands[0] = lowpart_subreg (mode, operands[0], mode);
})
;; For TARGET_64BIT we always round up to 8 bytes.
(define_insn "*push2_rex64"
[(set (match_operand:V_32 0 "push_operand" "=X,X")
(match_operand:V_32 1 "nonmemory_no_elim_operand" "rC,*v"))]
"TARGET_64BIT"
"@
push{q}\t%q1
#"
[(set_attr "type" "push,multi")
(set_attr "mode" "DI")])
(define_split
[(set (match_operand:V_32 0 "push_operand")
(match_operand:V_32 1 "sse_reg_operand"))]
"TARGET_64BIT && TARGET_SSE && reload_completed"
[(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
(set (match_dup 0) (match_dup 1))]
{
operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (mode)));
/* Preserve memory attributes. */
operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
})
(define_expand "movmisalign"
[(set (match_operand:V_32 0 "nonimmediate_operand")
(match_operand:V_32 1 "nonimmediate_operand"))]
""
{
ix86_expand_vector_move (mode, operands);
DONE;
})
(define_expand "movv2qi"
[(set (match_operand:V2QI 0 "nonimmediate_operand")
(match_operand:V2QI 1 "nonimmediate_operand"))]
""
{
ix86_expand_vector_move (V2QImode, operands);
DONE;
})
(define_insn "*movv2qi_internal"
[(set (match_operand:V2QI 0 "nonimmediate_operand"
"=r,r,r,m ,v,v,v,jm,m,r,v")
(match_operand:V2QI 1 "general_operand"
"r ,C,m,rC,C,v,m,x,v,v,r"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
{
case TYPE_IMOV:
if (get_attr_mode (insn) == MODE_SI)
return "mov{l}\t{%k1, %k0|%k0, %k1}";
else
return "mov{w}\t{%1, %0|%0, %1}";
case TYPE_IMOVX:
/* movzwl is faster than movw on p2 due to partial word stalls,
though not as fast as an aligned movl. */
return "movz{wl|x}\t{%1, %k0|%k0, %1}";
case TYPE_SSELOG1:
if (satisfies_constraint_C (operands[1]))
return standard_sse_constant_opcode (insn, operands);
if (SSE_REG_P (operands[0]))
return "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}";
else
return "%vpextrw\t{$0, %1, %0|%0, %1, 0}";
case TYPE_SSEMOV:
return ix86_output_ssemov (insn, operands);
default:
gcc_unreachable ();
}
}
[(set (attr "isa")
(cond [(eq_attr "alternative" "6,9,10")
(const_string "sse2")
(eq_attr "alternative" "7")
(const_string "sse4_noavx")
(eq_attr "alternative" "8")
(const_string "avx")
]
(const_string "*")))
(set (attr "addr")
(if_then_else (eq_attr "alternative" "7")
(const_string "gpr16")
(const_string "*")))
(set (attr "type")
(cond [(eq_attr "alternative" "6,7,8")
(if_then_else (match_test "TARGET_AVX512FP16")
(const_string "ssemov")
(const_string "sselog1"))
(eq_attr "alternative" "4")
(const_string "sselog1")
(eq_attr "alternative" "5,9,10")
(const_string "ssemov")
(match_test "optimize_function_for_size_p (cfun)")
(const_string "imov")
(and (eq_attr "alternative" "0")
(ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
(not (match_test "TARGET_HIMODE_MATH"))))
(const_string "imov")
(and (eq_attr "alternative" "1,2")
(match_operand:V2QI 1 "aligned_operand"))
(const_string "imov")
(and (match_test "TARGET_MOVX")
(eq_attr "alternative" "0,2"))
(const_string "imovx")
]
(const_string "imov")))
(set (attr "prefix")
(cond [(eq_attr "alternative" "4,5,6,7,8,9,10")
(const_string "maybe_evex")
]
(const_string "orig")))
(set (attr "mode")
(cond [(eq_attr "alternative" "6,7,8")
(if_then_else (match_test "TARGET_AVX512FP16")
(const_string "HI")
(const_string "TI"))
(eq_attr "alternative" "9,10")
(if_then_else (match_test "TARGET_AVX512FP16")
(const_string "HI")
(const_string "SI"))
(eq_attr "alternative" "4")
(cond [(match_test "TARGET_AVX")
(const_string "TI")
(ior (not (match_test "TARGET_SSE2"))
(match_test "optimize_function_for_size_p (cfun)"))
(const_string "V4SF")
]
(const_string "TI"))
(eq_attr "alternative" "5")
(cond [(match_test "TARGET_AVX512FP16")
(const_string "HF")
(match_test "TARGET_AVX")
(const_string "TI")
(ior (not (match_test "TARGET_SSE2"))
(match_test "optimize_function_for_size_p (cfun)"))
(const_string "V4SF")
]
(const_string "TI"))
(eq_attr "type" "imovx")
(const_string "SI")
(and (eq_attr "alternative" "1,2")
(match_operand:V2QI 1 "aligned_operand"))
(const_string "SI")
(and (eq_attr "alternative" "0")
(ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
(not (match_test "TARGET_HIMODE_MATH"))))
(const_string "SI")
]
(const_string "HI")))
(set (attr "preferred_for_speed")
(cond [(eq_attr "alternative" "9")
(symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
(eq_attr "alternative" "10")
(symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
]
(symbol_ref "true")))])
;; We always round up to UNITS_PER_WORD bytes.
(define_insn "*pushv2qi2"
[(set (match_operand:V2QI 0 "push_operand" "=X,X")
(match_operand:V2QI 1 "nonmemory_no_elim_operand" "rC,v"))]
""
"* return TARGET_64BIT ? \"push{q}\t%q1\" : \"push{l}\t%k1\";
#"
[(set_attr "isa" "*,sse4")
(set_attr "type" "push,multi")
(set (attr "mode")
(cond [(eq_attr "alternative" "0")
(if_then_else (match_test "TARGET_64BIT")
(const_string "DI")
(const_string "SI"))
(eq_attr "alternative" "1")
(if_then_else (match_test "TARGET_AVX512FP16")
(const_string "HI")
(const_string "TI"))
]
(const_string "HI")))])
(define_split
[(set (match_operand:V2QI 0 "push_operand")
(match_operand:V2QI 1 "sse_reg_operand"))]
"TARGET_SSE4_1 && reload_completed"
[(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
(set (match_dup 0) (match_dup 1))]
{
operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (V2QImode)));
/* Preserve memory attributes. */
operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
})
(define_expand "movmisalignv2qi"
[(set (match_operand:V2QI 0 "nonimmediate_operand")
(match_operand:V2QI 1 "nonimmediate_operand"))]
""
{
ix86_expand_vector_move (V2QImode, operands);
DONE;
})
(define_insn "sse_movntq"
[(set (match_operand:DI 0 "memory_operand" "=m,m")
(unspec:DI [(match_operand:DI 1 "register_operand" "y,r")]
UNSPEC_MOVNTQ))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& (TARGET_SSE || TARGET_3DNOW_A)"
"@
movntq\t{%1, %0|%0, %1}
movnti\t{%1, %0|%0, %1}"
[(set_attr "isa" "*,x64")
(set_attr "mmx_isa" "native,*")
(set_attr "type" "mmxmov,ssemov")
(set_attr "mode" "DI")])
(define_expand "movq__to_sse"
[(set (match_operand: 0 "register_operand")
(vec_concat:
(match_operand:V24FI 1 "nonimmediate_operand")
(match_dup 2)))]
"TARGET_SSE2"
{
if (mode != V2SImode
&& !flag_trapping_math)
{
rtx op1 = force_reg (mode, operands[1]);
emit_move_insn (operands[0], lowpart_subreg (mode,
op1, mode));
DONE;
}
operands[2] = CONST0_RTX (mode);
})
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel single-precision floating point arithmetic
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_expand "v2sf2"
[(set (match_operand:V2SF 0 "register_operand")
(absneg:V2SF
(match_operand:V2SF 1 "register_operand")))]
"TARGET_MMX_WITH_SSE"
"ix86_expand_fp_absneg_operator (, V2SFmode, operands); DONE;")
(define_insn_and_split "*mmx_v2sf2"
[(set (match_operand:V2SF 0 "register_operand" "=x,x,x")
(absneg:V2SF
(match_operand:V2SF 1 "register_operand" "0,x,x")))
(use (match_operand:V2SF 2 "nonimmediate_operand" "x,0,x"))]
"TARGET_MMX_WITH_SSE"
"#"
"&& reload_completed"
[(set (match_dup 0)
(:V2SF (match_dup 1) (match_dup 2)))]
{
if (!TARGET_AVX && operands_match_p (operands[0], operands[2]))
std::swap (operands[1], operands[2]);
}
[(set_attr "isa" "noavx,noavx,avx")])
(define_insn_and_split "*mmx_nabsv2sf2"
[(set (match_operand:V2SF 0 "register_operand" "=x,x,x")
(neg:V2SF
(abs:V2SF
(match_operand:V2SF 1 "register_operand" "0,x,x"))))
(use (match_operand:V2SF 2 "nonimmediate_operand" "x,0,x"))]
"TARGET_MMX_WITH_SSE"
"#"
"&& reload_completed"
[(set (match_dup 0)
(ior:V2SF (match_dup 1) (match_dup 2)))]
{
if (!TARGET_AVX && operands_match_p (operands[0], operands[2]))
std::swap (operands[1], operands[2]);
}
[(set_attr "isa" "noavx,noavx,avx")])
(define_expand "v2sf3"
[(set (match_operand:V2SF 0 "register_operand")
(plusminusmult:V2SF
(match_operand:V2SF 1 "nonimmediate_operand")
(match_operand:V2SF 2 "nonimmediate_operand")))]
"TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx op2 = gen_reg_rtx (V4SFmode);
rtx op1 = gen_reg_rtx (V4SFmode);
rtx op0 = gen_reg_rtx (V4SFmode);
emit_insn (gen_movq_v2sf_to_sse (op2, operands[2]));
emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
emit_insn (gen_v4sf3 (op0, op1, op2));
emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
DONE;
})
(define_expand "mmx_addv2sf3"
[(set (match_operand:V2SF 0 "register_operand")
(plus:V2SF
(match_operand:V2SF 1 "nonimmediate_operand")
(match_operand:V2SF 2 "nonimmediate_operand")))]
"TARGET_3DNOW"
"ix86_fixup_binary_operands_no_copy (PLUS, V2SFmode, operands);")
(define_insn "*mmx_addv2sf3"
[(set (match_operand:V2SF 0 "register_operand" "=y")
(plus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
(match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
"TARGET_3DNOW && ix86_binary_operator_ok (PLUS, V2SFmode, operands)"
"pfadd\t{%2, %0|%0, %2}"
[(set_attr "type" "mmxadd")
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
(define_expand "mmx_subv2sf3"
[(set (match_operand:V2SF 0 "register_operand")
(minus:V2SF (match_operand:V2SF 1 "register_operand")
(match_operand:V2SF 2 "nonimmediate_operand")))]
"TARGET_3DNOW")
(define_expand "mmx_subrv2sf3"
[(set (match_operand:V2SF 0 "register_operand")
(minus:V2SF (match_operand:V2SF 2 "register_operand")
(match_operand:V2SF 1 "nonimmediate_operand")))]
"TARGET_3DNOW")
(define_insn "*mmx_subv2sf3"
[(set (match_operand:V2SF 0 "register_operand" "=y,y")
(minus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "0,ym")
(match_operand:V2SF 2 "nonimmediate_operand" "ym,0")))]
"TARGET_3DNOW && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"@
pfsub\t{%2, %0|%0, %2}
pfsubr\t{%1, %0|%0, %1}"
[(set_attr "type" "mmxadd")
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
(define_expand "mmx_mulv2sf3"
[(set (match_operand:V2SF 0 "register_operand")
(mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand")
(match_operand:V2SF 2 "nonimmediate_operand")))]
"TARGET_3DNOW"
"ix86_fixup_binary_operands_no_copy (MULT, V2SFmode, operands);")
(define_insn "*mmx_mulv2sf3"
[(set (match_operand:V2SF 0 "register_operand" "=y")
(mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
(match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
"TARGET_3DNOW && ix86_binary_operator_ok (MULT, V2SFmode, operands)"
"pfmul\t{%2, %0|%0, %2}"
[(set_attr "type" "mmxmul")
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
(define_expand "divv2sf3"
[(set (match_operand:V2SF 0 "register_operand")
(div:V2SF (match_operand:V2SF 1 "register_operand")
(match_operand:V2SF 2 "register_operand")))]
"TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx op2 = gen_reg_rtx (V4SFmode);
rtx op1 = gen_reg_rtx (V4SFmode);
rtx op0 = gen_reg_rtx (V4SFmode);
rtx tmp = gen_rtx_VEC_CONCAT (V4SFmode, operands[2],
force_reg (V2SFmode, CONST1_RTX (V2SFmode)));
emit_insn (gen_rtx_SET (op2, tmp));
emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
emit_insn (gen_divv4sf3 (op0, op1, op2));
emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
DONE;
})
(define_expand "v2sf3"
[(set (match_operand:V2SF 0 "register_operand")
(smaxmin:V2SF
(match_operand:V2SF 1 "register_operand")
(match_operand:V2SF 2 "register_operand")))]
"TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx op2 = gen_reg_rtx (V4SFmode);
rtx op1 = gen_reg_rtx (V4SFmode);
rtx op0 = gen_reg_rtx (V4SFmode);
emit_insn (gen_movq_v2sf_to_sse (op2, operands[2]));
emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
emit_insn (gen_v4sf3 (op0, op1, op2));
emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
DONE;
})
(define_expand "mmx_v2sf3"
[(set (match_operand:V2SF 0 "register_operand")
(smaxmin:V2SF
(match_operand:V2SF 1 "nonimmediate_operand")
(match_operand:V2SF 2 "nonimmediate_operand")))]
"TARGET_3DNOW"
{
if (!flag_finite_math_only || flag_signed_zeros)
{
operands[1] = force_reg (V2SFmode, operands[1]);
emit_insn (gen_mmx_ieee_v2sf3
(operands[0], operands[1], operands[2]));
DONE;
}
else
ix86_fixup_binary_operands_no_copy (, V2SFmode, operands);
})
;; These versions of the min/max patterns are intentionally ignorant of
;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
;; are undefined in this condition, we're certain this is correct.
(define_insn "*mmx_v2sf3"
[(set (match_operand:V2SF 0 "register_operand" "=y")
(smaxmin:V2SF
(match_operand:V2SF 1 "nonimmediate_operand" "%0")
(match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
"TARGET_3DNOW && ix86_binary_operator_ok (, V2SFmode, operands)"
"pf\t{%2, %0|%0, %2}"
[(set_attr "type" "mmxadd")
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
;; These versions of the min/max patterns implement exactly the operations
;; min = (op1 < op2 ? op1 : op2)
;; max = (!(op1 < op2) ? op1 : op2)
;; Their operands are not commutative, and thus they may be used in the
;; presence of -0.0 and NaN.
(define_insn "mmx_ieee_v2sf3"
[(set (match_operand:V2SF 0 "register_operand" "=y")
(unspec:V2SF
[(match_operand:V2SF 1 "register_operand" "0")
(match_operand:V2SF 2 "nonimmediate_operand" "ym")]
IEEE_MAXMIN))]
"TARGET_3DNOW"
"pf\t{%2, %0|%0, %2}"
[(set_attr "type" "mmxadd")
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
(define_insn "mmx_rcpv2sf2"
[(set (match_operand:V2SF 0 "register_operand" "=y")
(unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")]
UNSPEC_PFRCP))]
"TARGET_3DNOW"
"pfrcp\t{%1, %0|%0, %1}"
[(set_attr "type" "mmx")
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
(define_insn "mmx_rcpit1v2sf3"
[(set (match_operand:V2SF 0 "register_operand" "=y")
(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
(match_operand:V2SF 2 "nonimmediate_operand" "ym")]
UNSPEC_PFRCPIT1))]
"TARGET_3DNOW"
"pfrcpit1\t{%2, %0|%0, %2}"
[(set_attr "type" "mmx")
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
(define_insn "mmx_rcpit2v2sf3"
[(set (match_operand:V2SF 0 "register_operand" "=y")
(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
(match_operand:V2SF 2 "nonimmediate_operand" "ym")]
UNSPEC_PFRCPIT2))]
"TARGET_3DNOW"
"pfrcpit2\t{%2, %0|%0, %2}"
[(set_attr "type" "mmx")
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
(define_expand "sqrtv2sf2"
[(set (match_operand:V2SF 0 "register_operand")
(sqrt:V2SF (match_operand:V2SF 1 "nonimmediate_operand")))]
"TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx op1 = gen_reg_rtx (V4SFmode);
rtx op0 = gen_reg_rtx (V4SFmode);
emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
emit_insn (gen_sqrtv4sf2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
DONE;
})
(define_insn "mmx_rsqrtv2sf2"
[(set (match_operand:V2SF 0 "register_operand" "=y")
(unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")]
UNSPEC_PFRSQRT))]
"TARGET_3DNOW"
"pfrsqrt\t{%1, %0|%0, %1}"
[(set_attr "type" "mmx")
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
(define_insn "mmx_rsqit1v2sf3"
[(set (match_operand:V2SF 0 "register_operand" "=y")
(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
(match_operand:V2SF 2 "nonimmediate_operand" "ym")]
UNSPEC_PFRSQIT1))]
"TARGET_3DNOW"
"pfrsqit1\t{%2, %0|%0, %2}"
[(set_attr "type" "mmx")
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
(define_expand "mmx_haddv2sf3"
[(set (match_operand:V2SF 0 "register_operand")
(vec_concat:V2SF
(plus:SF
(vec_select:SF
(match_operand:V2SF 1 "register_operand")
(parallel [(const_int 0)]))
(vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
(plus:SF
(vec_select:SF
(match_operand:V2SF 2 "nonimmediate_operand")
(parallel [(const_int 0)]))
(vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
"TARGET_3DNOW")
(define_insn "*mmx_haddv2sf3"
[(set (match_operand:V2SF 0 "register_operand" "=y")
(vec_concat:V2SF
(plus:SF
(vec_select:SF
(match_operand:V2SF 1 "register_operand" "0")
(parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
(vec_select:SF (match_dup 1)
(parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
(plus:SF
(vec_select:SF
(match_operand:V2SF 2 "nonimmediate_operand" "ym")
(parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
(vec_select:SF (match_dup 2)
(parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
"TARGET_3DNOW
&& INTVAL (operands[3]) != INTVAL (operands[4])
&& INTVAL (operands[5]) != INTVAL (operands[6])"
"pfacc\t{%2, %0|%0, %2}"
[(set_attr "type" "mmxadd")
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
(define_insn_and_split "*mmx_haddv2sf3_low"
[(set (match_operand:SF 0 "register_operand")
(plus:SF
(vec_select:SF
(match_operand:V2SF 1 "nonimmediate_operand")
(parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
(vec_select:SF
(match_dup 1)
(parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
"TARGET_SSE3 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math
&& INTVAL (operands[2]) != INTVAL (operands[3])
&& ix86_pre_reload_split ()"
"#"
"&& 1"
[(const_int 0)]
{
rtx op1 = gen_reg_rtx (V4SFmode);
rtx op0 = gen_reg_rtx (V4SFmode);
emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
emit_insn (gen_sse3_haddv4sf3 (op0, op1, op1));
emit_move_insn (operands[0], lowpart_subreg (SFmode, op0, V4SFmode));
DONE;
})
(define_insn "mmx_hsubv2sf3"
[(set (match_operand:V2SF 0 "register_operand" "=y")
(vec_concat:V2SF
(minus:SF
(vec_select:SF
(match_operand:V2SF 1 "register_operand" "0")
(parallel [(const_int 0)]))
(vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
(minus:SF
(vec_select:SF
(match_operand:V2SF 2 "nonimmediate_operand" "ym")
(parallel [(const_int 0)]))
(vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
"TARGET_3DNOW_A"
"pfnacc\t{%2, %0|%0, %2}"
[(set_attr "type" "mmxadd")
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
(define_insn_and_split "*mmx_hsubv2sf3_low"
[(set (match_operand:SF 0 "register_operand")
(minus:SF
(vec_select:SF
(match_operand:V2SF 1 "register_operand")
(parallel [(const_int 0)]))
(vec_select:SF
(match_dup 1)
(parallel [(const_int 1)]))))]
"TARGET_SSE3 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math
&& ix86_pre_reload_split ()"
"#"
"&& 1"
[(const_int 0)]
{
rtx op1 = gen_reg_rtx (V4SFmode);
rtx op0 = gen_reg_rtx (V4SFmode);
emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
emit_insn (gen_sse3_hsubv4sf3 (op0, op1, op1));
emit_move_insn (operands[0], lowpart_subreg (SFmode, op0, V4SFmode));
DONE;
})
(define_expand "mmx_haddsubv2sf3"
[(set (match_operand:V2SF 0 "register_operand")
(vec_concat:V2SF
(minus:SF
(vec_select:SF
(match_operand:V2SF 1 "register_operand")
(parallel [(const_int 0)]))
(vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
(plus:SF
(vec_select:SF
(match_operand:V2SF 2 "nonimmediate_operand")
(parallel [(const_int 0)]))
(vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
"TARGET_3DNOW_A")
(define_insn "*mmx_haddsubv2sf3"
[(set (match_operand:V2SF 0 "register_operand" "=y")
(vec_concat:V2SF
(minus:SF
(vec_select:SF
(match_operand:V2SF 1 "register_operand" "0")
(parallel [(const_int 0)]))
(vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
(plus:SF
(vec_select:SF
(match_operand:V2SF 2 "nonimmediate_operand" "ym")
(parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
(vec_select:SF
(match_dup 2)
(parallel [(match_operand:SI 4 "const_0_to_1_operand")])))))]
"TARGET_3DNOW_A
&& INTVAL (operands[3]) != INTVAL (operands[4])"
"pfpnacc\t{%2, %0|%0, %2}"
[(set_attr "type" "mmxadd")
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
(define_expand "vec_addsubv2sf3"
[(set (match_operand:V2SF 0 "register_operand")
(vec_merge:V2SF
(minus:V2SF
(match_operand:V2SF 1 "nonimmediate_operand")
(match_operand:V2SF 2 "nonimmediate_operand"))
(plus:V2SF (match_dup 1) (match_dup 2))
(const_int 1)))]
"TARGET_SSE3 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx op2 = gen_reg_rtx (V4SFmode);
rtx op1 = gen_reg_rtx (V4SFmode);
rtx op0 = gen_reg_rtx (V4SFmode);
emit_insn (gen_movq_v2sf_to_sse (op2, operands[2]));
emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
emit_insn (gen_vec_addsubv4sf3 (op0, op1, op2));
emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
DONE;
})
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel single-precision floating point comparisons
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_expand "mmx_eqv2sf3"
[(set (match_operand:V2SI 0 "register_operand")
(eq:V2SI (match_operand:V2SF 1 "nonimmediate_operand")
(match_operand:V2SF 2 "nonimmediate_operand")))]
"TARGET_3DNOW"
"ix86_fixup_binary_operands_no_copy (EQ, V2SFmode, operands);")
(define_insn "*mmx_eqv2sf3"
[(set (match_operand:V2SI 0 "register_operand" "=y")
(eq:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "%0")
(match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
"TARGET_3DNOW && ix86_binary_operator_ok (EQ, V2SFmode, operands)"
"pfcmpeq\t{%2, %0|%0, %2}"
[(set_attr "type" "mmxcmp")
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
(define_insn "mmx_gtv2sf3"
[(set (match_operand:V2SI 0 "register_operand" "=y")
(gt:V2SI (match_operand:V2SF 1 "register_operand" "0")
(match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
"TARGET_3DNOW"
"pfcmpgt\t{%2, %0|%0, %2}"
[(set_attr "type" "mmxcmp")
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
(define_insn "mmx_gev2sf3"
[(set (match_operand:V2SI 0 "register_operand" "=y")
(ge:V2SI (match_operand:V2SF 1 "register_operand" "0")
(match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
"TARGET_3DNOW"
"pfcmpge\t{%2, %0|%0, %2}"
[(set_attr "type" "mmxcmp")
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
(define_expand "vec_cmpv2sfv2si"
[(set (match_operand:V2SI 0 "register_operand")
(match_operator:V2SI 1 ""
[(match_operand:V2SF 2 "nonimmediate_operand")
(match_operand:V2SF 3 "nonimmediate_operand")]))]
"TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx ops[4];
ops[3] = gen_reg_rtx (V4SFmode);
ops[2] = gen_reg_rtx (V4SFmode);
ops[1] = gen_rtx_fmt_ee (GET_CODE (operands[1]), V4SImode, ops[2], ops[3]);
ops[0] = gen_reg_rtx (V4SImode);
emit_insn (gen_movq_v2sf_to_sse (ops[3], operands[3]));
emit_insn (gen_movq_v2sf_to_sse (ops[2], operands[2]));
bool ok = ix86_expand_fp_vec_cmp (ops);
gcc_assert (ok);
emit_move_insn (operands[0], lowpart_subreg (V2SImode, ops[0], V4SImode));
DONE;
})
(define_expand "vcondv2sf"
[(set (match_operand:V2FI 0 "register_operand")
(if_then_else:V2FI
(match_operator 3 ""
[(match_operand:V2SF 4 "nonimmediate_operand")
(match_operand:V2SF 5 "nonimmediate_operand")])
(match_operand:V2FI 1 "general_operand")
(match_operand:V2FI 2 "general_operand")))]
"TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx ops[6];
ops[5] = gen_reg_rtx (V4SFmode);
ops[4] = gen_reg_rtx (V4SFmode);
ops[3] = gen_rtx_fmt_ee (GET_CODE (operands[3]), VOIDmode, ops[4], ops[5]);
ops[2] = lowpart_subreg (mode,
force_reg (mode, operands[2]),
mode);
ops[1] = lowpart_subreg (mode,
force_reg (mode, operands[1]),
mode);
ops[0] = gen_reg_rtx (mode);
emit_insn (gen_movq_v2sf_to_sse (ops[5], operands[5]));
emit_insn (gen_movq_v2sf_to_sse (ops[4], operands[4]));
bool ok = ix86_expand_fp_vcond (ops);
gcc_assert (ok);
emit_move_insn (operands[0], lowpart_subreg (mode, ops[0],
mode));
DONE;
})
(define_insn "@sse4_1_insertps_"
[(set (match_operand:V2FI 0 "register_operand" "=Yr,*x,v")
(unspec:V2FI
[(match_operand:V2FI 2 "nonimmediate_operand" "Yrjm,*xjm,vm")
(match_operand:V2FI 1 "register_operand" "0,0,v")
(match_operand:SI 3 "const_0_to_255_operand")]
UNSPEC_INSERTPS))]
"TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
{
if (MEM_P (operands[2]))
{
unsigned count_s = INTVAL (operands[3]) >> 6;
if (count_s)
operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
operands[2] = adjust_address_nv (operands[2],
mode, count_s * 4);
}
switch (which_alternative)
{
case 0:
case 1:
return "insertps\t{%3, %2, %0|%0, %2, %3}";
case 2:
return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
default:
gcc_unreachable ();
}
}
[(set_attr "isa" "noavx,noavx,avx")
(set_attr "addr" "*,*,gpr16")
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,1,*")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set_attr "prefix" "orig,orig,maybe_evex")
(set_attr "mode" "V4SF")])
(define_insn "*mmx_blendps"
[(set (match_operand:V2SF 0 "register_operand" "=Yr,*x,x")
(vec_merge:V2SF
(match_operand:V2SF 2 "register_operand" "Yr,*x,x")
(match_operand:V2SF 1 "register_operand" "0,0,x")
(match_operand:SI 3 "const_0_to_3_operand")))]
"TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
"@
blendps\t{%3, %2, %0|%0, %2, %3}
blendps\t{%3, %2, %0|%0, %2, %3}
vblendps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "ssemov")
(set_attr "length_immediate" "1")
(set_attr "prefix_data16" "1,1,*")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "orig,orig,vex")
(set_attr "mode" "V4SF")])
(define_insn "mmx_blendvps"
[(set (match_operand:V2SF 0 "register_operand" "=Yr,*x,x")
(unspec:V2SF
[(match_operand:V2SF 1 "register_operand" "0,0,x")
(match_operand:V2SF 2 "register_operand" "Yr,*x,x")
(match_operand:V2SF 3 "register_operand" "Yz,Yz,x")]
UNSPEC_BLENDV))]
"TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
"@
blendvps\t{%3, %2, %0|%0, %2, %3}
blendvps\t{%3, %2, %0|%0, %2, %3}
vblendvps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "ssemov")
(set_attr "length_immediate" "1")
(set_attr "prefix_data16" "1,1,*")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "orig,orig,vex")
(set_attr "btver2_decode" "vector")
(set_attr "mode" "V4SF")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel single-precision floating point logical operations
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn "*mmx_andnotv2sf3"
[(set (match_operand:V2SF 0 "register_operand" "=x,x")
(and:V2SF
(not:V2SF
(match_operand:V2SF 1 "register_operand" "0,x"))
(match_operand:V2SF 2 "register_operand" "x,x")))]
"TARGET_MMX_WITH_SSE"
"@
andnps\t{%2, %0|%0, %2}
vandnps\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix" "orig,vex")
(set_attr "mode" "V4SF")])
(define_insn "v2sf3"
[(set (match_operand:V2SF 0 "register_operand" "=x,x")
(any_logic:V2SF
(match_operand:V2SF 1 "register_operand" "%0,x")
(match_operand:V2SF 2 "register_operand" "x,x")))]
"TARGET_MMX_WITH_SSE"
"@
ps\t{%2, %0|%0, %2}
vps\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix" "orig,vex")
(set_attr "mode" "V4SF")])
(define_expand "copysignv2sf3"
[(set (match_dup 4)
(and:V2SF
(not:V2SF (match_dup 3))
(match_operand:V2SF 1 "register_operand")))
(set (match_dup 5)
(and:V2SF (match_dup 3)
(match_operand:V2SF 2 "register_operand")))
(set (match_operand:V2SF 0 "register_operand")
(ior:V2SF (match_dup 4) (match_dup 5)))]
"TARGET_MMX_WITH_SSE"
{
operands[3] = ix86_build_signbit_mask (V2SFmode, true, false);
operands[4] = gen_reg_rtx (V2SFmode);
operands[5] = gen_reg_rtx (V2SFmode);
})
(define_expand "xorsignv2sf3"
[(set (match_dup 4)
(and:V2SF (match_dup 3)
(match_operand:V2SF 2 "register_operand")))
(set (match_operand:V2SF 0 "register_operand")
(xor:V2SF (match_dup 4)
(match_operand:V2SF 1 "register_operand")))]
"TARGET_MMX_WITH_SSE"
{
operands[3] = ix86_build_signbit_mask (V2SFmode, true, false);
operands[4] = gen_reg_rtx (V2SFmode);
})
(define_expand "signbitv2sf2"
[(set (match_operand:V2SI 0 "register_operand")
(lshiftrt:V2SI
(subreg:V2SI
(match_operand:V2SF 1 "register_operand") 0)
(match_dup 2)))]
"TARGET_MMX_WITH_SSE"
"operands[2] = GEN_INT (GET_MODE_UNIT_BITSIZE (V2SFmode)-1);")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel single-precision FMA multiply/accumulate instructions.
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_expand "fmav2sf4"
[(set (match_operand:V2SF 0 "register_operand")
(fma:V2SF
(match_operand:V2SF 1 "nonimmediate_operand")
(match_operand:V2SF 2 "nonimmediate_operand")
(match_operand:V2SF 3 "nonimmediate_operand")))]
"(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL)
&& TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx op3 = gen_reg_rtx (V4SFmode);
rtx op2 = gen_reg_rtx (V4SFmode);
rtx op1 = gen_reg_rtx (V4SFmode);
rtx op0 = gen_reg_rtx (V4SFmode);
emit_insn (gen_movq_v2sf_to_sse (op3, operands[3]));
emit_insn (gen_movq_v2sf_to_sse (op2, operands[2]));
emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
emit_insn (gen_fmav4sf4 (op0, op1, op2, op3));
emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
DONE;
})
(define_expand "fmsv2sf4"
[(set (match_operand:V2SF 0 "register_operand")
(fma:V2SF
(match_operand:V2SF 1 "nonimmediate_operand")
(match_operand:V2SF 2 "nonimmediate_operand")
(neg:V2SF
(match_operand:V2SF 3 "nonimmediate_operand"))))]
"(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL)
&& TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx op3 = gen_reg_rtx (V4SFmode);
rtx op2 = gen_reg_rtx (V4SFmode);
rtx op1 = gen_reg_rtx (V4SFmode);
rtx op0 = gen_reg_rtx (V4SFmode);
emit_insn (gen_movq_v2sf_to_sse (op3, operands[3]));
emit_insn (gen_movq_v2sf_to_sse (op2, operands[2]));
emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
emit_insn (gen_fmsv4sf4 (op0, op1, op2, op3));
emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
DONE;
})
(define_expand "fnmav2sf4"
[(set (match_operand:V2SF 0 "register_operand")
(fma:V2SF
(neg:V2SF
(match_operand:V2SF 1 "nonimmediate_operand"))
(match_operand:V2SF 2 "nonimmediate_operand")
(match_operand:V2SF 3 "nonimmediate_operand")))]
"(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL)
&& TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx op3 = gen_reg_rtx (V4SFmode);
rtx op2 = gen_reg_rtx (V4SFmode);
rtx op1 = gen_reg_rtx (V4SFmode);
rtx op0 = gen_reg_rtx (V4SFmode);
emit_insn (gen_movq_v2sf_to_sse (op3, operands[3]));
emit_insn (gen_movq_v2sf_to_sse (op2, operands[2]));
emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
emit_insn (gen_fnmav4sf4 (op0, op1, op2, op3));
emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
DONE;
})
(define_expand "fnmsv2sf4"
[(set (match_operand:V2SF 0 "register_operand" "=v,v,x")
(fma:V2SF
(neg:V2SF
(match_operand:V2SF 1 "nonimmediate_operand"))
(match_operand:V2SF 2 "nonimmediate_operand")
(neg:V2SF
(match_operand:V2SF 3 "nonimmediate_operand"))))]
"(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL)
&& TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx op3 = gen_reg_rtx (V4SFmode);
rtx op2 = gen_reg_rtx (V4SFmode);
rtx op1 = gen_reg_rtx (V4SFmode);
rtx op0 = gen_reg_rtx (V4SFmode);
emit_insn (gen_movq_v2sf_to_sse (op3, operands[3]));
emit_insn (gen_movq_v2sf_to_sse (op2, operands[2]));
emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
emit_insn (gen_fnmsv4sf4 (op0, op1, op2, op3));
emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
DONE;
})
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel single-precision floating point conversion operations
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_expand "fix_truncv2sfv2si2"
[(set (match_operand:V2SI 0 "register_operand")
(fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand")))]
"TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx op1 = gen_reg_rtx (V4SFmode);
rtx op0 = gen_reg_rtx (V4SImode);
emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
emit_insn (gen_fix_truncv4sfv4si2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
DONE;
})
(define_expand "fixuns_truncv2sfv2si2"
[(set (match_operand:V2SI 0 "register_operand")
(unsigned_fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand")))]
"TARGET_AVX512VL && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx op1 = gen_reg_rtx (V4SFmode);
rtx op0 = gen_reg_rtx (V4SImode);
emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
emit_insn (gen_fixuns_truncv4sfv4si2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
DONE;
})
(define_insn "mmx_fix_truncv2sfv2si2"
[(set (match_operand:V2SI 0 "register_operand" "=y")
(fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))]
"TARGET_3DNOW"
"pf2id\t{%1, %0|%0, %1}"
[(set_attr "type" "mmxcvt")
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
(define_expand "floatv2siv2sf2"
[(set (match_operand:V2SF 0 "register_operand")
(float:V2SF (match_operand:V2SI 1 "nonimmediate_operand")))]
"TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx op1 = gen_reg_rtx (V4SImode);
rtx op0 = gen_reg_rtx (V4SFmode);
emit_insn (gen_movq_v2si_to_sse (op1, operands[1]));
emit_insn (gen_floatv4siv4sf2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
DONE;
})
(define_expand "floatunsv2siv2sf2"
[(set (match_operand:V2SF 0 "register_operand")
(unsigned_float:V2SF (match_operand:V2SI 1 "nonimmediate_operand")))]
"TARGET_AVX512VL && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx op1 = gen_reg_rtx (V4SImode);
rtx op0 = gen_reg_rtx (V4SFmode);
emit_insn (gen_movq_v2si_to_sse (op1, operands[1]));
emit_insn (gen_floatunsv4siv4sf2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
DONE;
})
(define_insn "mmx_floatv2siv2sf2"
[(set (match_operand:V2SF 0 "register_operand" "=y")
(float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))]
"TARGET_3DNOW"
"pi2fd\t{%1, %0|%0, %1}"
[(set_attr "type" "mmxcvt")
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
(define_insn "mmx_pf2iw"
[(set (match_operand:V2SI 0 "register_operand" "=y")
(sign_extend:V2SI
(ss_truncate:V2HI
(fix:V2SI
(match_operand:V2SF 1 "nonimmediate_operand" "ym")))))]
"TARGET_3DNOW_A"
"pf2iw\t{%1, %0|%0, %1}"
[(set_attr "type" "mmxcvt")
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
(define_insn "mmx_pi2fw"
[(set (match_operand:V2SF 0 "register_operand" "=y")
(float:V2SF
(sign_extend:V2SI
(truncate:V2HI
(match_operand:V2SI 1 "nonimmediate_operand" "ym")))))]
"TARGET_3DNOW_A"
"pi2fw\t{%1, %0|%0, %1}"
[(set_attr "type" "mmxcvt")
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel single-precision floating point element swizzling
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn "mmx_pswapdv2sf2"
[(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv")
(vec_select:V2SF
(match_operand:V2SF 1 "register_mmxmem_operand" "ym,0,Yv")
(parallel [(const_int 1) (const_int 0)])))]
"TARGET_3DNOW_A || TARGET_MMX_WITH_SSE"
"@
pswapd\t{%1, %0|%0, %1}
shufps\t{$0xe1, %1, %0|%0, %1, 0xe1}
vshufps\t{$0xe1, %1, %1, %0|%0, %1, %1, 0xe1}"
[(set_attr "isa" "*,sse_noavx,avx")
(set_attr "mmx_isa" "native,*,*")
(set_attr "type" "mmxcvt,ssemov,ssemov")
(set_attr "prefix_extra" "1,*,*")
(set_attr "mode" "V2SF,V4SF,V4SF")])
(define_insn "*mmx_movshdup"
[(set (match_operand:V2SF 0 "register_operand" "=v,x")
(vec_select:V2SF
(match_operand:V2SF 1 "register_operand" "v,0")
(parallel [(const_int 1) (const_int 1)])))]
"TARGET_MMX_WITH_SSE"
"@
%vmovshdup\t{%1, %0|%0, %1}
shufps\t{$0xe5, %0, %0|%0, %0, 0xe5}"
[(set_attr "isa" "sse3,*")
(set_attr "type" "sse,sseshuf1")
(set_attr "length_immediate" "*,1")
(set_attr "prefix_rep" "1,*")
(set_attr "prefix" "maybe_vex,orig")
(set_attr "mode" "V4SF")])
(define_insn "*mmx_movsldup"
[(set (match_operand:V2SF 0 "register_operand" "=v,x")
(vec_select:V2SF
(match_operand:V2SF 1 "register_operand" "v,0")
(parallel [(const_int 0) (const_int 0)])))]
"TARGET_MMX_WITH_SSE"
"@
%vmovsldup\t{%1, %0|%0, %1}
shufps\t{$0xe0, %0, %0|%0, %0, 0xe0}"
[(set_attr "isa" "sse3,*")
(set_attr "type" "sse,sseshuf1")
(set_attr "length_immediate" "*,1")
(set_attr "prefix_rep" "1,*")
(set_attr "prefix" "maybe_vex,orig")
(set_attr "mode" "V4SF")])
(define_insn_and_split "*vec_interleave_lowv2sf"
[(set (match_operand:V2SF 0 "register_operand" "=x,v")
(vec_select:V2SF
(vec_concat:V4SF
(match_operand:V2SF 1 "register_operand" "0,v")
(match_operand:V2SF 2 "register_operand" "x,v"))
(parallel [(const_int 0) (const_int 2)])))]
"TARGET_MMX_WITH_SSE"
"#"
"&& reload_completed"
[(const_int 0)]
"ix86_split_mmx_punpck (operands, false); DONE;"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix" "orig,maybe_evex")
(set_attr "mode" "V4SF")])
(define_insn_and_split "*vec_interleave_highv2sf"
[(set (match_operand:V2SF 0 "register_operand" "=x,v")
(vec_select:V2SF
(vec_concat:V4SF
(match_operand:V2SF 1 "register_operand" "0,v")
(match_operand:V2SF 2 "register_operand" "x,v"))
(parallel [(const_int 1) (const_int 3)])))]
"TARGET_MMX_WITH_SSE"
"#"
"&& reload_completed"
[(const_int 0)]
"ix86_split_mmx_punpck (operands, true); DONE;"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix" "orig,vex")
(set_attr "mode" "V4SF")])
(define_insn "*vec_dupv2sf"
[(set (match_operand:V2SF 0 "register_operand" "=y,Yv,x")
(vec_duplicate:V2SF
(match_operand:SF 1 "register_operand" "0,Yv,0")))]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
"@
punpckldq\t%0, %0
%vmovsldup\t{%1, %0|%0, %1}
shufps\t{$0xe0, %0, %0|%0, %0, 0xe0}"
[(set_attr "isa" "*,sse3,sse_noavx")
(set_attr "mmx_isa" "native,*,*")
(set_attr "type" "mmxcvt,sse,sseshuf1")
(set_attr "length_immediate" "*,*,1")
(set_attr "prefix_rep" "*,1,*")
(set_attr "prefix" "*,maybe_vex,orig")
(set_attr "mode" "DI,V4SF,V4SF")])
(define_insn "*mmx_movss_"
[(set (match_operand:V2FI 0 "register_operand" "=x,v")
(vec_merge:V2FI
(match_operand:V2FI 2 "register_operand" " x,v")
(match_operand:V2FI 1 "register_operand" " 0,v")
(const_int 1)))]
"TARGET_MMX_WITH_SSE"
"@
movss\t{%2, %0|%0, %2}
vmovss\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "ssemov")
(set_attr "prefix" "orig,maybe_evex")
(set_attr "mode" "SF")])
(define_insn "*mmx_concatv2sf"
[(set (match_operand:V2SF 0 "register_operand" "=y,y")
(vec_concat:V2SF
(match_operand:SF 1 "nonimmediate_operand" " 0,rm")
(match_operand:SF 2 "nonimm_or_0_operand" "ym,C")))]
"TARGET_MMX && !TARGET_SSE"
"@
punpckldq\t{%2, %0|%0, %2}
movd\t{%1, %0|%0, %1}"
[(set_attr "type" "mmxcvt,mmxmov")
(set_attr "mode" "DI")])
(define_expand "vec_setv2sf"
[(match_operand:V2SF 0 "register_operand")
(match_operand:SF 1 "register_operand")
(match_operand 2 "vec_setm_mmx_operand")]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
{
if (CONST_INT_P (operands[2]))
ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
INTVAL (operands[2]));
else
ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
DONE;
})
;; Avoid combining registers from different units in a single alternative,
;; see comment above inline_secondary_memory_needed function in i386.cc
(define_insn_and_split "*vec_extractv2sf_0"
[(set (match_operand:SF 0 "nonimmediate_operand" "=x, m,y ,m,f,r")
(vec_select:SF
(match_operand:V2SF 1 "nonimmediate_operand" " xm,x,ym,y,m,m")
(parallel [(const_int 0)])))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"#"
"&& reload_completed"
[(set (match_dup 0) (match_dup 1))]
"operands[1] = gen_lowpart (SFmode, operands[1]);"
[(set_attr "mmx_isa" "*,*,native,native,*,*")])
;; Avoid combining registers from different units in a single alternative,
;; see comment above inline_secondary_memory_needed function in i386.cc
(define_insn "*vec_extractv2sf_1"
[(set (match_operand:SF 0 "nonimmediate_operand" "=y,x,x,y,x,f,r")
(vec_select:SF
(match_operand:V2SF 1 "nonimmediate_operand" " 0,x,0,o,o,o,o")
(parallel [(const_int 1)])))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"@
punpckhdq\t%0, %0
%vmovshdup\t{%1, %0|%0, %1}
shufps\t{$0xe5, %0, %0|%0, %0, 0xe5}
#
#
#
#"
[(set_attr "isa" "*,sse3,noavx,*,*,*,*")
(set_attr "mmx_isa" "native,*,*,native,*,*,*")
(set_attr "type" "mmxcvt,sse,sseshuf1,mmxmov,ssemov,fmov,imov")
(set (attr "length_immediate")
(if_then_else (eq_attr "alternative" "2")
(const_string "1")
(const_string "*")))
(set (attr "prefix_rep")
(if_then_else (eq_attr "alternative" "1")
(const_string "1")
(const_string "*")))
(set_attr "prefix" "orig,maybe_vex,orig,orig,orig,orig,orig")
(set_attr "mode" "DI,V4SF,V4SF,SF,SF,SF,SF")])
(define_split
[(set (match_operand:SF 0 "register_operand")
(vec_select:SF
(match_operand:V2SF 1 "memory_operand")
(parallel [(const_int 1)])))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE) && reload_completed"
[(set (match_dup 0) (match_dup 1))]
"operands[1] = adjust_address (operands[1], SFmode, 4);")
(define_expand "vec_extractv2sfsf"
[(match_operand:SF 0 "register_operand")
(match_operand:V2SF 1 "register_operand")
(match_operand 2 "const_int_operand")]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
{
ix86_expand_vector_extract (TARGET_MMX_WITH_SSE, operands[0],
operands[1], INTVAL (operands[2]));
DONE;
})
(define_expand "vec_initv2sfsf"
[(match_operand:V2SF 0 "register_operand")
(match_operand 1)]
"(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
{
ix86_expand_vector_init (TARGET_MMX_WITH_SSE, operands[0],
operands[1]);
DONE;
})
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel single-precision floating point rounding operations.
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_expand "nearbyintv2sf2"
[(match_operand:V2SF 0 "register_operand")
(match_operand:V2SF 1 "nonimmediate_operand")]
"TARGET_SSE4_1 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx op1 = gen_reg_rtx (V4SFmode);
rtx op0 = gen_reg_rtx (V4SFmode);
emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
emit_insn (gen_nearbyintv4sf2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
DONE;
})
(define_expand "rintv2sf2"
[(match_operand:V2SF 0 "register_operand")
(match_operand:V2SF 1 "nonimmediate_operand")]
"TARGET_SSE4_1 && TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx op1 = gen_reg_rtx (V4SFmode);
rtx op0 = gen_reg_rtx (V4SFmode);
emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
emit_insn (gen_rintv4sf2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
DONE;
})
(define_expand "lrintv2sfv2si2"
[(match_operand:V2SI 0 "register_operand")
(match_operand:V2SF 1 "nonimmediate_operand")]
"TARGET_SSE4_1 && !flag_trapping_math
&& TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx op1 = gen_reg_rtx (V4SFmode);
rtx op0 = gen_reg_rtx (V4SImode);
emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
emit_insn (gen_lrintv4sfv4si2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
DONE;
})
(define_expand "ceilv2sf2"
[(match_operand:V2SF 0 "register_operand")
(match_operand:V2SF 1 "nonimmediate_operand")]
"TARGET_SSE4_1 && !flag_trapping_math
&& TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx op1 = gen_reg_rtx (V4SFmode);
rtx op0 = gen_reg_rtx (V4SFmode);
emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
emit_insn (gen_ceilv4sf2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
DONE;
})
(define_expand "lceilv2sfv2si2"
[(match_operand:V2SI 0 "register_operand")
(match_operand:V2SF 1 "nonimmediate_operand")]
"TARGET_SSE4_1 && !flag_trapping_math
&& TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx op1 = gen_reg_rtx (V4SFmode);
rtx op0 = gen_reg_rtx (V4SImode);
emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
emit_insn (gen_lceilv4sfv4si2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
DONE;
})
(define_expand "floorv2sf2"
[(match_operand:V2SF 0 "register_operand")
(match_operand:V2SF 1 "nonimmediate_operand")]
"TARGET_SSE4_1 && !flag_trapping_math
&& TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx op1 = gen_reg_rtx (V4SFmode);
rtx op0 = gen_reg_rtx (V4SFmode);
emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
emit_insn (gen_floorv4sf2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
DONE;
})
(define_expand "lfloorv2sfv2si2"
[(match_operand:V2SI 0 "register_operand")
(match_operand:V2SF 1 "nonimmediate_operand")]
"TARGET_SSE4_1 && !flag_trapping_math
&& TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx op1 = gen_reg_rtx (V4SFmode);
rtx op0 = gen_reg_rtx (V4SImode);
emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
emit_insn (gen_lfloorv4sfv4si2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
DONE;
})
(define_expand "btruncv2sf2"
[(match_operand:V2SF 0 "register_operand")
(match_operand:V2SF 1 "nonimmediate_operand")]
"TARGET_SSE4_1 && !flag_trapping_math
&& TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx op1 = gen_reg_rtx (V4SFmode);
rtx op0 = gen_reg_rtx (V4SFmode);
emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
emit_insn (gen_btruncv4sf2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
DONE;
})
(define_expand "roundv2sf2"
[(match_operand:V2SF 0 "register_operand")
(match_operand:V2SF 1 "nonimmediate_operand")]
"TARGET_SSE4_1 && !flag_trapping_math
&& TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx op1 = gen_reg_rtx (V4SFmode);
rtx op0 = gen_reg_rtx (V4SFmode);
emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
emit_insn (gen_roundv4sf2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
DONE;
})
(define_expand "lroundv2sfv2si2"
[(match_operand:V2SI 0 "register_operand")
(match_operand:V2SF 1 "nonimmediate_operand")]
"TARGET_SSE4_1 && !flag_trapping_math
&& TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx op1 = gen_reg_rtx (V4SFmode);
rtx op0 = gen_reg_rtx (V4SImode);
emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
emit_insn (gen_lroundv4sfv4si2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
DONE;
})
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel half-precision floating point arithmetic
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_mode_iterator VHF_32_64 [V2HF (V4HF "TARGET_MMX_WITH_SSE")])
(define_expand "divv4hf3"
[(set (match_operand:V4HF 0 "register_operand")
(div:V4HF
(match_operand:V4HF 1 "nonimmediate_operand")
(match_operand:V4HF 2 "register_operand")))]
"TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math"
{
rtx op2 = gen_reg_rtx (V8HFmode);
rtx op1 = gen_reg_rtx (V8HFmode);
rtx op0 = gen_reg_rtx (V8HFmode);
emit_insn (gen_movq_v4hf_to_sse (op1, operands[1]));
rtx tmp = gen_rtx_VEC_CONCAT (V8HFmode, operands[2],
force_reg (V4HFmode, CONST1_RTX (V4HFmode)));
emit_insn (gen_rtx_SET (op2, tmp));
emit_insn (gen_divv8hf3 (op0, op1, op2));
emit_move_insn (operands[0], lowpart_subreg (V4HFmode, op0, V8HFmode));
DONE;
})
(define_mode_attr mov_to_sse_suffix
[(V2HF "d") (V4HF "q") (V2HI "d") (V4HI "q")])
(define_mode_attr mmxxmmmode
[(V2HF "V8HF") (V2HI "V8HI") (V2BF "V8BF")
(V4HF "V8HF") (V4HI "V8HI") (V4BF "V8BF")])
(define_mode_attr mmxxmmmodelower
[(V2HF "v8hf") (V2HI "v8hi") (V2BF "v8bf")
(V4HF "v8hf") (V4HI "v8hi") (V4BF "v8bf")])
(define_expand "movd__to_sse"
[(set (match_operand: 0 "register_operand")
(vec_merge:
(vec_duplicate:
(match_operand:V2FI_32 1 "nonimmediate_operand"))
(match_dup 2)
(const_int 3)))]
"TARGET_SSE"
{
if (!flag_trapping_math)
{
rtx op1 = force_reg (mode, operands[1]);
emit_move_insn (operands[0],
lowpart_subreg (mode, op1, mode));
DONE;
}
operands[2] = CONST0_RTX (mode);
})
(define_expand "movd__to_sse_reg"
[(set (match_operand: 0 "register_operand")
(vec_merge:
(vec_duplicate:
(match_operand:V2FI_32 1 "nonimmediate_operand"))
(match_operand: 2 "register_operand")
(const_int 3)))]
"TARGET_SSE")
(define_expand "3"
[(set (match_operand:VHF_32_64 0 "register_operand")
(plusminusmult:VHF_32_64
(match_operand:VHF_32_64 1 "nonimmediate_operand")
(match_operand:VHF_32_64 2 "nonimmediate_operand")))]
"TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math"
{
rtx op2 = gen_reg_rtx (V8HFmode);
rtx op1 = gen_reg_rtx (V8HFmode);
rtx op0 = gen_reg_rtx (V8HFmode);
emit_insn (gen_mov__to_sse (op2, operands[2]));
emit_insn (gen_mov__to_sse (op1, operands[1]));
emit_insn (gen_v8hf3 (op0, op1, op2));
emit_move_insn (operands[0], lowpart_subreg (mode, op0, V8HFmode));
DONE;
})
(define_expand "divv2hf3"
[(set (match_operand:V2HF 0 "register_operand")
(div:V2HF
(match_operand:V2HF 1 "nonimmediate_operand")
(match_operand:V2HF 2 "nonimmediate_operand")))]
"TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math"
{
rtx op2 = gen_reg_rtx (V8HFmode);
rtx op1 = gen_reg_rtx (V8HFmode);
rtx op0 = gen_reg_rtx (V8HFmode);
emit_insn (gen_movd_v2hf_to_sse_reg (op2, operands[2],
force_reg (V8HFmode, CONST1_RTX (V8HFmode))));
emit_insn (gen_movd_v2hf_to_sse (op1, operands[1]));
emit_insn (gen_divv8hf3 (op0, op1, op2));
emit_move_insn (operands[0], lowpart_subreg (V2HFmode, op0, V8HFmode));
DONE;
})
(define_expand "3"
[(set (match_operand:VHF_32_64 0 "register_operand")
(smaxmin:VHF_32_64
(match_operand:VHF_32_64 1 "nonimmediate_operand")
(match_operand:VHF_32_64 2 "nonimmediate_operand")))]
"TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math"
{
rtx op2 = gen_reg_rtx (V8HFmode);
rtx op1 = gen_reg_rtx (V8HFmode);
rtx op0 = gen_reg_rtx (V8HFmode);
emit_insn (gen_mov__to_sse (op2, operands[2]));
emit_insn (gen_mov__to_sse (op1, operands[1]));
emit_insn (gen_v8hf3 (op0, op1, op2));
emit_move_insn (operands[0], lowpart_subreg (mode, op0, V8HFmode));
DONE;
})
(define_expand "sqrt2"
[(set (match_operand:VHF_32_64 0 "register_operand")
(sqrt:VHF_32_64
(match_operand:VHF_32_64 1 "nonimmediate_operand")))]
"TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math"
{
rtx op1 = gen_reg_rtx (V8HFmode);
rtx op0 = gen_reg_rtx (V8HFmode);
emit_insn (gen_mov__to_sse (op1, operands[1]));
emit_insn (gen_sqrtv8hf2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (mode, op0, V8HFmode));
DONE;
})
(define_expand "2"
[(set (match_operand:VHF_32_64 0 "register_operand")
(absneg:VHF_32_64
(match_operand:VHF_32_64 1 "register_operand")))]
"TARGET_SSE"
"ix86_expand_fp_absneg_operator (, mode, operands); DONE;")
(define_insn_and_split "*mmx_"
[(set (match_operand:VHF_32_64 0 "register_operand" "=x,x,x")
(absneg:VHF_32_64
(match_operand:VHF_32_64 1 "register_operand" "0,x,x")))
(use (match_operand:VHF_32_64 2 "register_operand" "x,0,x"))]
"TARGET_SSE"
"#"
"&& reload_completed"
[(set (match_dup 0)
(: (match_dup 1) (match_dup 2)))]
{
if (!TARGET_AVX && operands_match_p (operands[0], operands[2]))
std::swap (operands[1], operands[2]);
}
[(set_attr "isa" "noavx,noavx,avx")])
(define_insn_and_split "*mmx_nabs2"
[(set (match_operand:VHF_32_64 0 "register_operand" "=x,x,x")
(neg:VHF_32_64
(abs:VHF_32_64
(match_operand:VHF_32_64 1 "register_operand" "0,x,x"))))
(use (match_operand:VHF_32_64 2 "register_operand" "x,0,x"))]
"TARGET_SSE"
"#"
"&& reload_completed"
[(set (match_dup 0)
(ior: (match_dup 1) (match_dup 2)))])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel half-precision floating point comparisons
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_expand "vec_cmpv4hfqi"
[(set (match_operand:QI 0 "register_operand")
(match_operator:QI 1 ""
[(match_operand:V4HF 2 "nonimmediate_operand")
(match_operand:V4HF 3 "nonimmediate_operand")]))]
"TARGET_MMX_WITH_SSE && TARGET_AVX512FP16 && TARGET_AVX512VL
&& ix86_partial_vec_fp_math"
{
rtx ops[4];
ops[3] = gen_reg_rtx (V8HFmode);
ops[2] = gen_reg_rtx (V8HFmode);
emit_insn (gen_movq_v4hf_to_sse (ops[3], operands[3]));
emit_insn (gen_movq_v4hf_to_sse (ops[2], operands[2]));
emit_insn (gen_vec_cmpv8hfqi (operands[0], operands[1], ops[2], ops[3]));
DONE;
})
(define_expand "vcond_mask_v4hi"
[(set (match_operand:V4F_64 0 "register_operand")
(vec_merge:V4F_64
(match_operand:V4F_64 1 "register_operand")
(match_operand:V4F_64 2 "register_operand")
(match_operand:V4HI 3 "register_operand")))]
"TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
{
ix86_expand_sse_movcc (operands[0], operands[3],
operands[1], operands[2]);
DONE;
})
(define_expand "vcond_mask_qi"
[(set (match_operand:V4FI_64 0 "register_operand")
(vec_merge:V4FI_64
(match_operand:V4FI_64 1 "register_operand")
(match_operand:V4FI_64 2 "register_operand")
(match_operand:QI 3 "register_operand")))]
"TARGET_MMX_WITH_SSE && TARGET_AVX512BW && TARGET_AVX512VL"
{
rtx op0 = gen_reg_rtx (mode);
operands[1] = lowpart_subreg (mode, operands[1], mode);
operands[2] = lowpart_subreg (mode, operands[2], mode);
emit_insn (gen_vcond_mask_qi (op0, operands[1],
operands[2], operands[3]));
emit_move_insn (operands[0],
lowpart_subreg (mode, op0, mode));
DONE;
})
(define_expand "vec_cmpv2hfqi"
[(set (match_operand:QI 0 "register_operand")
(match_operator:QI 1 ""
[(match_operand:V2HF 2 "nonimmediate_operand")
(match_operand:V2HF 3 "nonimmediate_operand")]))]
"TARGET_AVX512FP16 && TARGET_AVX512VL
&& ix86_partial_vec_fp_math"
{
rtx ops[4];
ops[3] = gen_reg_rtx (V8HFmode);
ops[2] = gen_reg_rtx (V8HFmode);
emit_insn (gen_movd_v2hf_to_sse (ops[3], operands[3]));
emit_insn (gen_movd_v2hf_to_sse (ops[2], operands[2]));
emit_insn (gen_vec_cmpv8hfqi (operands[0], operands[1], ops[2], ops[3]));
DONE;
})
(define_expand "vcond_mask_v2hi"
[(set (match_operand:V2F_32 0 "register_operand")
(vec_merge:V2F_32
(match_operand:V2F_32 1 "register_operand")
(match_operand:V2F_32 2 "register_operand")
(match_operand:V2HI 3 "register_operand")))]
"TARGET_SSE4_1"
{
ix86_expand_sse_movcc (operands[0], operands[3],
operands[1], operands[2]);
DONE;
})
(define_expand "vcond_mask_qi"
[(set (match_operand:V2FI_32 0 "register_operand")
(vec_merge:V2FI_32
(match_operand:V2FI_32 1 "register_operand")
(match_operand:V2FI_32 2 "register_operand")
(match_operand:QI 3 "register_operand")))]
"TARGET_AVX512BW && TARGET_AVX512VL"
{
rtx op0 = gen_reg_rtx (mode);
operands[1] = lowpart_subreg (mode, operands[1], mode);
operands[2] = lowpart_subreg (mode, operands[2], mode);
emit_insn (gen_vcond_mask_qi (op0, operands[1],
operands[2], operands[3]));
emit_move_insn (operands[0],
lowpart_subreg (mode, op0, mode));
DONE;
})
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel half-precision floating point rounding operations.
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_expand "btrunc2"
[(match_operand:VHF_32_64 0 "register_operand")
(match_operand:VHF_32_64 1 "nonimmediate_operand")]
"TARGET_AVX512FP16 && TARGET_AVX512VL
&& ix86_partial_vec_fp_math
&& !flag_trapping_math"
{
rtx op1 = gen_reg_rtx (V8HFmode);
rtx op0 = gen_reg_rtx (V8HFmode);
emit_insn (gen_mov__to_sse (op1, operands[1]));
emit_insn (gen_btruncv8hf2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (mode, op0, V8HFmode));
DONE;
})
(define_expand "nearbyint2"
[(match_operand:VHF_32_64 0 "register_operand")
(match_operand:VHF_32_64 1 "nonimmediate_operand")]
"TARGET_AVX512FP16 && TARGET_AVX512VL
&& ix86_partial_vec_fp_math"
{
rtx op1 = gen_reg_rtx (V8HFmode);
rtx op0 = gen_reg_rtx (V8HFmode);
emit_insn (gen_mov__to_sse (op1, operands[1]));
emit_insn (gen_nearbyintv8hf2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (mode, op0, V8HFmode));
DONE;
})
(define_expand "rint2"
[(match_operand:VHF_32_64 0 "register_operand")
(match_operand:VHF_32_64 1 "nonimmediate_operand")]
"TARGET_AVX512FP16 && TARGET_AVX512VL
&& ix86_partial_vec_fp_math"
{
rtx op1 = gen_reg_rtx (V8HFmode);
rtx op0 = gen_reg_rtx (V8HFmode);
emit_insn (gen_mov__to_sse (op1, operands[1]));
emit_insn (gen_rintv8hf2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (mode, op0, V8HFmode));
DONE;
})
(define_expand "lrint2"
[(match_operand: 0 "register_operand")
(match_operand:VHF_32_64 1 "nonimmediate_operand")]
"TARGET_AVX512FP16 && TARGET_AVX512VL
&& ix86_partial_vec_fp_math"
{
rtx op1 = gen_reg_rtx (V8HFmode);
rtx op0 = gen_reg_rtx (V8HFmode);
emit_insn (gen_mov__to_sse (op1, operands[1]));
emit_insn (gen_lrintv8hfv8hi2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (mode, op0, V8HFmode));
DONE;
})
(define_expand "floor2"
[(match_operand:VHF_32_64 0 "register_operand")
(match_operand:VHF_32_64 1 "nonimmediate_operand")]
"TARGET_AVX512FP16 && TARGET_AVX512VL
&& ix86_partial_vec_fp_math
&& !flag_trapping_math"
{
rtx op1 = gen_reg_rtx (V8HFmode);
rtx op0 = gen_reg_rtx (V8HFmode);
emit_insn (gen_mov__to_sse (op1, operands[1]));
emit_insn (gen_floorv8hf2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (mode, op0, V8HFmode));
DONE;
})
(define_expand "lfloor2"
[(match_operand: 0 "register_operand")
(match_operand:VHF_32_64 1 "nonimmediate_operand")]
"TARGET_AVX512FP16 && TARGET_AVX512VL
&& ix86_partial_vec_fp_math
&& !flag_trapping_math"
{
rtx op1 = gen_reg_rtx (V8HFmode);
rtx op0 = gen_reg_rtx (V8HFmode);
emit_insn (gen_mov__to_sse (op1, operands[1]));
emit_insn (gen_lfloorv8hfv8hi2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (mode, op0, V8HFmode));
DONE;
})
(define_expand "ceil2"
[(match_operand:VHF_32_64 0 "register_operand")
(match_operand:VHF_32_64 1 "nonimmediate_operand")]
"TARGET_AVX512FP16 && TARGET_AVX512VL
&& ix86_partial_vec_fp_math
&& !flag_trapping_math"
{
rtx op1 = gen_reg_rtx (V8HFmode);
rtx op0 = gen_reg_rtx (V8HFmode);
emit_insn (gen_mov__to_sse (op1, operands[1]));
emit_insn (gen_ceilv8hf2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (mode, op0, V8HFmode));
DONE;
})
(define_expand "lceil2"
[(match_operand: 0 "register_operand")
(match_operand:VHF_32_64 1 "nonimmediate_operand")]
"TARGET_AVX512FP16 && TARGET_AVX512VL
&& ix86_partial_vec_fp_math
&& !flag_trapping_math"
{
rtx op1 = gen_reg_rtx (V8HFmode);
rtx op0 = gen_reg_rtx (V8HFmode);
emit_insn (gen_mov__to_sse (op1, operands[1]));
emit_insn (gen_lceilv8hfv8hi2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (mode, op0, V8HFmode));
DONE;
})
(define_expand "round2"
[(match_operand:VHF_32_64 0 "register_operand")
(match_operand:VHF_32_64 1 "nonimmediate_operand")]
"TARGET_AVX512FP16 && TARGET_AVX512VL
&& ix86_partial_vec_fp_math
&& !flag_trapping_math"
{
rtx op1 = gen_reg_rtx (V8HFmode);
rtx op0 = gen_reg_rtx (V8HFmode);
emit_insn (gen_mov__to_sse (op1, operands[1]));
emit_insn (gen_roundv8hf2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (mode, op0, V8HFmode));
DONE;
})
(define_expand "lround2"
[(match_operand: 0 "register_operand")
(match_operand:VHF_32_64 1 "nonimmediate_operand")]
"TARGET_AVX512FP16 && TARGET_AVX512VL
&& ix86_partial_vec_fp_math
&& !flag_trapping_math"
{
rtx op1 = gen_reg_rtx (V8HFmode);
rtx op0 = gen_reg_rtx (V8HFmode);
emit_insn (gen_mov__to_sse (op1, operands[1]));
emit_insn (gen_lroundv8hfv8hi2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (mode, op0, V8HFmode));
DONE;
})
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel half-precision floating point logical operations
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn "*mmx_andnot3"
[(set (match_operand:VHF_32_64 0 "register_operand" "=x,x")
(and:VHF_32_64
(not:VHF_32_64
(match_operand:VHF_32_64 1 "register_operand" "0,x"))
(match_operand:VHF_32_64 2 "register_operand" "x,x")))]
"TARGET_SSE"
"@
andnps\t{%2, %0|%0, %2}
vandnps\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix" "orig,vex")
(set_attr "mode" "V4SF")])
(define_insn "3"
[(set (match_operand:VHF_32_64 0 "register_operand" "=x,x")
(any_logic:VHF_32_64
(match_operand:VHF_32_64 1 "register_operand" "%0,x")
(match_operand:VHF_32_64 2 "register_operand" " x,x")))]
"TARGET_SSE"
"@
ps\t{%2, %0|%0, %2}
vps\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog,sselog")
(set_attr "prefix" "orig,vex")
(set_attr "mode" "V4SF")])
(define_expand "copysign3"
[(set (match_dup 4)
(and:VHF_32_64
(not:VHF_32_64 (match_dup 3))
(match_operand:VHF_32_64 1 "register_operand")))
(set (match_dup 5)
(and:VHF_32_64 (match_dup 3)
(match_operand:VHF_32_64 2 "register_operand")))
(set (match_operand:VHF_32_64 0 "register_operand")
(ior:VHF_32_64 (match_dup 4) (match_dup 5)))]
"TARGET_SSE"
{
operands[3] = ix86_build_signbit_mask (mode, true, false);
operands[4] = gen_reg_rtx (mode);
operands[5] = gen_reg_rtx (mode);
})
(define_expand "xorsign3"
[(set (match_dup 4)
(and:VHF_32_64 (match_dup 3)
(match_operand:VHF_32_64 2 "register_operand")))
(set (match_operand:VHF_32_64 0 "register_operand")
(xor:VHF_32_64 (match_dup 4)
(match_operand:VHF_32_64 1 "register_operand")))]
"TARGET_SSE"
{
operands[3] = ix86_build_signbit_mask (mode, true, false);
operands[4] = gen_reg_rtx (mode);
})
(define_expand "signbit2"
[(set (match_operand: 0 "register_operand")
(lshiftrt:
(subreg:
(match_operand:VHF_32_64 1 "register_operand") 0)
(match_dup 2)))]
"TARGET_SSE2"
"operands[2] = GEN_INT (GET_MODE_UNIT_BITSIZE (mode)-1);")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel half-precision FMA multiply/accumulate instructions.
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_expand "fma4"
[(set (match_operand:VHF_32_64 0 "register_operand")
(fma:VHF_32_64
(match_operand:VHF_32_64 1 "nonimmediate_operand")
(match_operand:VHF_32_64 2 "nonimmediate_operand")
(match_operand:VHF_32_64 3 "nonimmediate_operand")))]
"TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math"
{
rtx op3 = gen_reg_rtx (V8HFmode);
rtx op2 = gen_reg_rtx (V8HFmode);
rtx op1 = gen_reg_rtx (V8HFmode);
rtx op0 = gen_reg_rtx (V8HFmode);
emit_insn (gen_mov__to_sse (op3, operands[3]));
emit_insn (gen_mov__to_sse (op2, operands[2]));
emit_insn (gen_mov__to_sse (op1, operands[1]));
emit_insn (gen_fmav8hf4 (op0, op1, op2, op3));
emit_move_insn (operands[0], lowpart_subreg (mode, op0, V8HFmode));
DONE;
})
(define_expand "fms4"
[(set (match_operand:VHF_32_64 0 "register_operand")
(fma:VHF_32_64
(match_operand:VHF_32_64 1 "nonimmediate_operand")
(match_operand:VHF_32_64 2 "nonimmediate_operand")
(neg:VHF_32_64
(match_operand:VHF_32_64 3 "nonimmediate_operand"))))]
"TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math"
{
rtx op3 = gen_reg_rtx (V8HFmode);
rtx op2 = gen_reg_rtx (V8HFmode);
rtx op1 = gen_reg_rtx (V8HFmode);
rtx op0 = gen_reg_rtx (V8HFmode);
emit_insn (gen_mov__to_sse (op3, operands[3]));
emit_insn (gen_mov__to_sse (op2, operands[2]));
emit_insn (gen_mov__to_sse (op1, operands[1]));
emit_insn (gen_fmsv8hf4 (op0, op1, op2, op3));
emit_move_insn (operands[0], lowpart_subreg (mode, op0, V8HFmode));
DONE;
})
(define_expand "fnma4"
[(set (match_operand:VHF_32_64 0 "register_operand")
(fma:VHF_32_64
(neg:VHF_32_64
(match_operand:VHF_32_64 1 "nonimmediate_operand"))
(match_operand:VHF_32_64 2 "nonimmediate_operand")
(match_operand:VHF_32_64 3 "nonimmediate_operand")))]
"TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math"
{
rtx op3 = gen_reg_rtx (V8HFmode);
rtx op2 = gen_reg_rtx (V8HFmode);
rtx op1 = gen_reg_rtx (V8HFmode);
rtx op0 = gen_reg_rtx (V8HFmode);
emit_insn (gen_mov__to_sse (op3, operands[3]));
emit_insn (gen_mov__to_sse (op2, operands[2]));
emit_insn (gen_mov__to_sse (op1, operands[1]));
emit_insn (gen_fnmav8hf4 (op0, op1, op2, op3));
emit_move_insn (operands[0], lowpart_subreg (mode, op0, V8HFmode));
DONE;
})
(define_expand "fnms4"
[(set (match_operand:VHF_32_64 0 "register_operand" "=v,v,x")
(fma:VHF_32_64
(neg:VHF_32_64
(match_operand:VHF_32_64 1 "nonimmediate_operand"))
(match_operand:VHF_32_64 2 "nonimmediate_operand")
(neg:VHF_32_64
(match_operand:VHF_32_64 3 "nonimmediate_operand"))))]
"TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math"
{
rtx op3 = gen_reg_rtx (V8HFmode);
rtx op2 = gen_reg_rtx (V8HFmode);
rtx op1 = gen_reg_rtx (V8HFmode);
rtx op0 = gen_reg_rtx (V8HFmode);
emit_insn (gen_mov__to_sse (op3, operands[3]));
emit_insn (gen_mov__to_sse (op2, operands[2]));
emit_insn (gen_mov__to_sse (op1, operands[1]));
emit_insn (gen_fnmsv8hf4 (op0, op1, op2, op3));
emit_move_insn (operands[0], lowpart_subreg (mode, op0, V8HFmode));
DONE;
})
(define_expand "vec_fmaddsubv4hf4"
[(match_operand:V4HF 0 "register_operand")
(match_operand:V4HF 1 "nonimmediate_operand")
(match_operand:V4HF 2 "nonimmediate_operand")
(match_operand:V4HF 3 "nonimmediate_operand")]
"TARGET_AVX512FP16 && TARGET_AVX512VL
&& TARGET_MMX_WITH_SSE
&& ix86_partial_vec_fp_math"
{
rtx op3 = gen_reg_rtx (V8HFmode);
rtx op2 = gen_reg_rtx (V8HFmode);
rtx op1 = gen_reg_rtx (V8HFmode);
rtx op0 = gen_reg_rtx (V8HFmode);
emit_insn (gen_movq_v4hf_to_sse (op3, operands[3]));
emit_insn (gen_movq_v4hf_to_sse (op2, operands[2]));
emit_insn (gen_movq_v4hf_to_sse (op1, operands[1]));
emit_insn (gen_vec_fmaddsubv8hf4 (op0, op1, op2, op3));
emit_move_insn (operands[0], lowpart_subreg (V4HFmode, op0, V8HFmode));
DONE;
})
(define_expand "vec_fmsubaddv4hf4"
[(match_operand:V4HF 0 "register_operand")
(match_operand:V4HF 1 "nonimmediate_operand")
(match_operand:V4HF 2 "nonimmediate_operand")
(match_operand:V4HF 3 "nonimmediate_operand")]
"TARGET_AVX512FP16 && TARGET_AVX512VL
&& ix86_partial_vec_fp_math
&& TARGET_MMX_WITH_SSE"
{
rtx op3 = gen_reg_rtx (V8HFmode);
rtx op2 = gen_reg_rtx (V8HFmode);
rtx op1 = gen_reg_rtx (V8HFmode);
rtx op0 = gen_reg_rtx (V8HFmode);
emit_insn (gen_movq_v4hf_to_sse (op3, operands[3]));
emit_insn (gen_movq_v4hf_to_sse (op2, operands[2]));
emit_insn (gen_movq_v4hf_to_sse (op1, operands[1]));
emit_insn (gen_vec_fmsubaddv8hf4 (op0, op1, op2, op3));
emit_move_insn (operands[0], lowpart_subreg (V4HFmode, op0, V8HFmode));
DONE;
})
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel half-precision floating point complex type operations
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_expand "cmlav4hf4"
[(match_operand:V4HF 0 "register_operand")
(match_operand:V4HF 1 "vector_operand")
(match_operand:V4HF 2 "vector_operand")
(match_operand:V4HF 3 "vector_operand")]
"TARGET_AVX512FP16 && TARGET_AVX512VL"
{
rtx op3 = gen_reg_rtx (V8HFmode);
rtx op2 = gen_reg_rtx (V8HFmode);
rtx op1 = gen_reg_rtx (V8HFmode);
rtx op0 = gen_reg_rtx (V8HFmode);
emit_insn (gen_movq_v4hf_to_sse (op3, operands[3]));
emit_insn (gen_movq_v4hf_to_sse (op2, operands[2]));
emit_insn (gen_movq_v4hf_to_sse (op1, operands[1]));
emit_insn (gen_cmlav8hf4 (op0, op1, op2, op3));
emit_move_insn (operands[0], lowpart_subreg (V4HFmode, op0, V8HFmode));
DONE;
})
(define_expand "cmla_conjv4hf4"
[(match_operand:V4HF 0 "register_operand")
(match_operand:V4HF 1 "vector_operand")
(match_operand:V4HF 2 "vector_operand")
(match_operand:V4HF 3 "vector_operand")]
"TARGET_AVX512FP16 && TARGET_AVX512VL"
{
rtx op3 = gen_reg_rtx (V8HFmode);
rtx op2 = gen_reg_rtx (V8HFmode);
rtx op1 = gen_reg_rtx (V8HFmode);
rtx op0 = gen_reg_rtx (V8HFmode);
emit_insn (gen_movq_v4hf_to_sse (op3, operands[3]));
emit_insn (gen_movq_v4hf_to_sse (op2, operands[2]));
emit_insn (gen_movq_v4hf_to_sse (op1, operands[1]));
emit_insn (gen_cmla_conjv8hf4 (op0, op1, op2, op3));
emit_move_insn (operands[0], lowpart_subreg (V4HFmode, op0, V8HFmode));
DONE;
})
(define_expand "cmulv4hf3"
[(match_operand:V4HF 0 "register_operand")
(match_operand:V4HF 1 "vector_operand")
(match_operand:V4HF 2 "vector_operand")]
"TARGET_AVX512FP16 && TARGET_AVX512VL"
{
rtx op2 = gen_reg_rtx (V8HFmode);
rtx op1 = gen_reg_rtx (V8HFmode);
rtx op0 = gen_reg_rtx (V8HFmode);
emit_insn (gen_movq_v4hf_to_sse (op2, operands[2]));
emit_insn (gen_movq_v4hf_to_sse (op1, operands[1]));
emit_insn (gen_cmulv8hf3 (op0, op1, op2));
emit_move_insn (operands[0], lowpart_subreg (V4HFmode, op0, V8HFmode));
DONE;
})
(define_expand "cmul_conjv4hf3"
[(match_operand:V4HF 0 "register_operand")
(match_operand:V4HF 1 "vector_operand")
(match_operand:V4HF 2 "vector_operand")]
"TARGET_AVX512FP16 && TARGET_AVX512VL"
{
rtx op2 = gen_reg_rtx (V8HFmode);
rtx op1 = gen_reg_rtx (V8HFmode);
rtx op0 = gen_reg_rtx (V8HFmode);
emit_insn (gen_movq_v4hf_to_sse (op2, operands[2]));
emit_insn (gen_movq_v4hf_to_sse (op1, operands[1]));
emit_insn (gen_cmul_conjv8hf3 (op0, op1, op2));
emit_move_insn (operands[0], lowpart_subreg (V4HFmode, op0, V8HFmode));
DONE;
})
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel half-precision floating point conversion operations
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_expand "fix_trunc2"
[(set (match_operand: 0 "register_operand")
(any_fix:
(match_operand:VHF_32_64 1 "nonimmediate_operand")))]
"TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math"
{
rtx op1 = gen_reg_rtx (V8HFmode);
rtx op0 = gen_reg_rtx (V8HImode);
emit_insn (gen_mov__to_sse (op1, operands[1]));
emit_insn (gen_fix_truncv8hfv8hi2 (op0, op1));
emit_move_insn (operands[0],
lowpart_subreg (mode, op0, V8HImode));
DONE;
})
(define_expand "fix_truncv2hfv2si2"
[(set (match_operand:V2SI 0 "register_operand")
(any_fix:V2SI
(match_operand:V2HF 1 "nonimmediate_operand")))]
"TARGET_AVX512FP16 && TARGET_AVX512VL
&& TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx op1 = gen_reg_rtx (V8HFmode);
rtx op0 = gen_reg_rtx (V4SImode);
emit_insn (gen_movd_v2hf_to_sse (op1, operands[1]));
emit_insn (gen_avx512fp16_fix_truncv4si2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
DONE;
})
(define_expand "float2"
[(set (match_operand:VHF_32_64 0 "register_operand")
(any_float:VHF_32_64
(match_operand: 1 "nonimmediate_operand")))]
"TARGET_AVX512FP16 && TARGET_AVX512VL && ix86_partial_vec_fp_math"
{
rtx op1 = gen_reg_rtx (V8HImode);
rtx op0 = gen_reg_rtx (V8HFmode);
rtx (*gen_movd_sse) (rtx, rtx)
= gen_mov__to_sse;
emit_insn (gen_movd_sse (op1, operands[1]));
emit_insn (gen_floatv8hiv8hf2 (op0, op1));
emit_move_insn (operands[0],
lowpart_subreg (mode, op0, V8HFmode));
DONE;
})
(define_expand "floatv2siv2hf2"
[(set (match_operand:V2HF 0 "register_operand")
(any_float:V2HF
(match_operand:V2SI 1 "nonimmediate_operand")))]
"TARGET_AVX512FP16 && TARGET_AVX512VL
&& TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx op1 = gen_reg_rtx (V4SImode);
rtx op0 = gen_reg_rtx (V8HFmode);
emit_insn (gen_movq_v2si_to_sse (op1, operands[1]));
emit_insn (gen_avx512fp16_floatv4siv4hf2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (V2HFmode, op0, V8HFmode));
DONE;
})
(define_expand "extendv2hfv2sf2"
[(set (match_operand:V2SF 0 "register_operand")
(float_extend:V2SF
(match_operand:V2HF 1 "nonimmediate_operand")))]
"TARGET_AVX512FP16 && TARGET_AVX512VL
&& TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx op1 = gen_reg_rtx (V8HFmode);
rtx op0 = gen_reg_rtx (V4SFmode);
emit_insn (gen_movd_v2hf_to_sse (op1, operands[1]));
emit_insn (gen_avx512fp16_float_extend_phv4sf2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
DONE;
})
(define_expand "truncv2sfv2hf2"
[(set (match_operand:V2HF 0 "register_operand")
(float_truncate:V2HF
(match_operand:V2SF 1 "nonimmediate_operand")))]
"TARGET_AVX512FP16 && TARGET_AVX512VL
&& TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
{
rtx op1 = gen_reg_rtx (V4SFmode);
rtx op0 = gen_reg_rtx (V8HFmode);
emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
emit_insn (gen_avx512fp16_truncv4sfv4hf2 (op0, op1));
emit_move_insn (operands[0], lowpart_subreg (V2HFmode, op0, V8HFmode));
DONE;
})
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel integral arithmetic
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_expand "neg2"
[(set (match_operand:MMXMODEI 0 "register_operand")
(minus:MMXMODEI
(match_dup 2)
(match_operand:MMXMODEI 1 "register_operand")))]
"TARGET_MMX_WITH_SSE"
"operands[2] = force_reg (mode, CONST0_RTX (mode));")
(define_expand "neg2"
[(set (match_operand:VI_32 0 "register_operand")
(minus:VI_32
(match_dup 2)
(match_operand:VI_32 1 "register_operand")))]
"TARGET_SSE2"
"operands[2] = force_reg (