;; GCC machine description for IA-32 and x86-64. ;; Copyright (C) 1988-2024 Free Software Foundation, Inc. ;; Mostly by William Schelter. ;; x86_64 support added by Jan Hubicka ;; ;; This file is part of GCC. ;; ;; GCC is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 3, or (at your option) ;; any later version. ;; ;; GCC is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License ;; along with GCC; see the file COPYING3. If not see ;; . */ ;; ;; The original PO technology requires these to be ordered by speed, ;; so that assigner will pick the fastest. ;; ;; See file "rtl.def" for documentation on define_insn, match_*, et. al. ;; ;; The special asm out single letter directives following a '%' are: ;; L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. ;; C -- print opcode suffix for set/cmov insn. ;; c -- like C, but print reversed condition ;; F,f -- likewise, but for floating-point. ;; O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", ;; otherwise nothing ;; R -- print the prefix for register names. ;; z -- print the opcode suffix for the size of the current operand. ;; Z -- likewise, with special suffixes for x87 instructions. ;; * -- print a star (in certain assembler syntax) ;; A -- print an absolute memory reference. ;; E -- print address with DImode register names if TARGET_64BIT. ;; w -- print the operand as if it's a "word" (HImode) even if it isn't. ;; s -- print a shift double count, followed by the assemblers argument ;; delimiter. ;; b -- print the QImode name of the register for the indicated operand. ;; %b0 would print %al if operands[0] is reg 0. ;; w -- likewise, print the HImode name of the register. ;; k -- likewise, print the SImode name of the register. ;; q -- likewise, print the DImode name of the register. ;; x -- likewise, print the V4SFmode name of the register. ;; t -- likewise, print the V8SFmode name of the register. ;; h -- print the QImode name for a "high" register, either ah, bh, ch or dh. ;; y -- print "st(0)" instead of "st" as a register. ;; d -- print duplicated register operand for AVX instruction. ;; D -- print condition for SSE cmp instruction. ;; P -- if PIC, print an @PLT suffix. ;; p -- print raw symbol name. ;; X -- don't print any sort of PIC '@' suffix for a symbol. ;; & -- print some in-use local-dynamic symbol name. ;; H -- print a memory address offset by 8; used for sse high-parts ;; K -- print HLE lock prefix ;; Y -- print condition for XOP pcom* instruction. ;; + -- print a branch hint as 'cs' or 'ds' prefix ;; ; -- print a semicolon (after prefixes due to bug in older gas). ;; ~ -- print "i" if TARGET_AVX2, "f" otherwise. ;; ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode ;; ! -- print NOTRACK prefix for jxx/call/ret instructions if required. (define_c_enum "unspec" [ ;; Relocation specifiers UNSPEC_GOT UNSPEC_GOTOFF UNSPEC_GOTPCREL UNSPEC_GOTTPOFF UNSPEC_TPOFF UNSPEC_NTPOFF UNSPEC_DTPOFF UNSPEC_GOTNTPOFF UNSPEC_INDNTPOFF UNSPEC_PLTOFF UNSPEC_MACHOPIC_OFFSET UNSPEC_PCREL UNSPEC_SIZEOF ;; Prologue support UNSPEC_STACK_ALLOC UNSPEC_SET_GOT UNSPEC_SET_RIP UNSPEC_SET_GOT_OFFSET UNSPEC_MEMORY_BLOCKAGE UNSPEC_PROBE_STACK ;; TLS support UNSPEC_TP UNSPEC_TLS_GD UNSPEC_TLS_LD_BASE UNSPEC_TLSDESC UNSPEC_TLS_IE_SUN ;; Other random patterns UNSPEC_SCAS UNSPEC_FNSTSW UNSPEC_SAHF UNSPEC_NOTRAP UNSPEC_PARITY UNSPEC_FSTCW UNSPEC_REP UNSPEC_LD_MPIC ; load_macho_picbase UNSPEC_TRUNC_NOOP UNSPEC_DIV_ALREADY_SPLIT UNSPEC_PAUSE UNSPEC_LEA_ADDR UNSPEC_XBEGIN_ABORT UNSPEC_STOS UNSPEC_PEEPSIB UNSPEC_INSN_FALSE_DEP UNSPEC_SBB UNSPEC_CC_NE UNSPEC_STC UNSPEC_PUSHFL UNSPEC_POPFL ;; For SSE/MMX support: UNSPEC_FIX_NOTRUNC UNSPEC_MASKMOV UNSPEC_MOVCC_MASK UNSPEC_MOVMSK UNSPEC_INSERTPS UNSPEC_BLENDV UNSPEC_PSHUFB UNSPEC_XOP_PERMUTE UNSPEC_RCP UNSPEC_RSQRT UNSPEC_PSADBW ;; Different from generic us_truncate RTX ;; as it does unsigned saturation of signed source. UNSPEC_US_TRUNCATE ;; For AVX/AVX512F support UNSPEC_SCALEF UNSPEC_PCMP UNSPEC_CVTBFSF ;; Generic math support UNSPEC_IEEE_MIN ; not commutative UNSPEC_IEEE_MAX ; not commutative ;; x87 Floating point UNSPEC_SIN UNSPEC_COS UNSPEC_FPATAN UNSPEC_FYL2X UNSPEC_FYL2XP1 UNSPEC_FRNDINT UNSPEC_FIST UNSPEC_F2XM1 UNSPEC_TAN UNSPEC_FXAM ;; x87 Rounding UNSPEC_FRNDINT_ROUNDEVEN UNSPEC_FRNDINT_FLOOR UNSPEC_FRNDINT_CEIL UNSPEC_FRNDINT_TRUNC UNSPEC_FIST_FLOOR UNSPEC_FIST_CEIL ;; x87 Double output FP UNSPEC_SINCOS_COS UNSPEC_SINCOS_SIN UNSPEC_XTRACT_FRACT UNSPEC_XTRACT_EXP UNSPEC_FSCALE_FRACT UNSPEC_FSCALE_EXP UNSPEC_FPREM_F UNSPEC_FPREM_U UNSPEC_FPREM1_F UNSPEC_FPREM1_U UNSPEC_C2_FLAG UNSPEC_FXAM_MEM ;; SSP patterns UNSPEC_SP_SET UNSPEC_SP_TEST ;; For ROUND support UNSPEC_ROUND ;; For CRC32 support UNSPEC_CRC32 ;; For LZCNT suppoprt UNSPEC_LZCNT ;; For BMI support UNSPEC_TZCNT UNSPEC_BEXTR ;; For BMI2 support UNSPEC_PDEP UNSPEC_PEXT ;; IRET support UNSPEC_INTERRUPT_RETURN ;; For MOVDIRI and MOVDIR64B support UNSPEC_MOVDIRI UNSPEC_MOVDIR64B ;; For insn_callee_abi: UNSPEC_CALLEE_ABI ;; For APX PUSH2/POP2 support UNSPEC_APXPUSH2 UNSPEC_APXPOP2_LOW UNSPEC_APXPOP2_HIGH ;; For APX PPX support UNSPEC_APX_PPX ]) (define_c_enum "unspecv" [ UNSPECV_UD2 UNSPECV_BLOCKAGE UNSPECV_STACK_PROBE UNSPECV_PROBE_STACK_RANGE UNSPECV_ALIGN UNSPECV_PROLOGUE_USE UNSPECV_SPLIT_STACK_RETURN UNSPECV_CLD UNSPECV_NOPS UNSPECV_RDTSC UNSPECV_RDTSCP UNSPECV_RDPMC UNSPECV_LLWP_INTRINSIC UNSPECV_SLWP_INTRINSIC UNSPECV_LWPVAL_INTRINSIC UNSPECV_LWPINS_INTRINSIC UNSPECV_RDFSBASE UNSPECV_RDGSBASE UNSPECV_WRFSBASE UNSPECV_WRGSBASE UNSPECV_FXSAVE UNSPECV_FXRSTOR UNSPECV_FXSAVE64 UNSPECV_FXRSTOR64 UNSPECV_XSAVE UNSPECV_XRSTOR UNSPECV_XSAVE64 UNSPECV_XRSTOR64 UNSPECV_XSAVEOPT UNSPECV_XSAVEOPT64 UNSPECV_XSAVES UNSPECV_XRSTORS UNSPECV_XSAVES64 UNSPECV_XRSTORS64 UNSPECV_XSAVEC UNSPECV_XSAVEC64 UNSPECV_XGETBV UNSPECV_XSETBV UNSPECV_WBINVD UNSPECV_WBNOINVD ;; For atomic compound assignments. UNSPECV_FNSTENV UNSPECV_FLDENV UNSPECV_FNSTSW UNSPECV_FNCLEX ;; For RDRAND support UNSPECV_RDRAND ;; For RDSEED support UNSPECV_RDSEED ;; For RTM support UNSPECV_XBEGIN UNSPECV_XEND UNSPECV_XABORT UNSPECV_XTEST UNSPECV_NLGR ;; For CLWB support UNSPECV_CLWB ;; For CLFLUSHOPT support UNSPECV_CLFLUSHOPT ;; For MONITORX and MWAITX support UNSPECV_MONITORX UNSPECV_MWAITX ;; For CLZERO support UNSPECV_CLZERO ;; For RDPKRU and WRPKRU support UNSPECV_PKU ;; For RDPID support UNSPECV_RDPID ;; For CET support UNSPECV_NOP_ENDBR UNSPECV_NOP_RDSSP UNSPECV_INCSSP UNSPECV_SAVEPREVSSP UNSPECV_RSTORSSP UNSPECV_WRSS UNSPECV_WRUSS UNSPECV_SETSSBSY UNSPECV_CLRSSBSY ;; For TSXLDTRK support UNSPECV_XSUSLDTRK UNSPECV_XRESLDTRK ;; For WAITPKG support UNSPECV_UMWAIT UNSPECV_UMONITOR UNSPECV_TPAUSE ;; For UINTR support UNSPECV_CLUI UNSPECV_STUI UNSPECV_TESTUI UNSPECV_SENDUIPI ;; For CLDEMOTE support UNSPECV_CLDEMOTE ;; For Speculation Barrier support UNSPECV_SPECULATION_BARRIER UNSPECV_PTWRITE ;; For ENQCMD and ENQCMDS support UNSPECV_ENQCMD UNSPECV_ENQCMDS ;; For SERIALIZE support UNSPECV_SERIALIZE ;; For patchable area support UNSPECV_PATCHABLE_AREA ;; For HRESET support UNSPECV_HRESET ;; For PREFETCHI support UNSPECV_PREFETCHI ;; For USER_MSR support UNSPECV_URDMSR UNSPECV_UWRMSR ;; For AMX-TILE UNSPECV_LDTILECFG UNSPECV_STTILECFG ]) ;; Constants to represent rounding modes in the ROUND instruction (define_constants [(ROUND_ROUNDEVEN 0x0) (ROUND_FLOOR 0x1) (ROUND_CEIL 0x2) (ROUND_TRUNC 0x3) (ROUND_MXCSR 0x4) (ROUND_NO_EXC 0x8) ]) ;; Constants to represent AVX512F embeded rounding (define_constants [(ROUND_NEAREST_INT 0) (ROUND_NEG_INF 1) (ROUND_POS_INF 2) (ROUND_ZERO 3) (NO_ROUND 4) (ROUND_SAE 8) ]) ;; Constants to represent pcomtrue/pcomfalse variants (define_constants [(PCOM_FALSE 0) (PCOM_TRUE 1) (COM_FALSE_S 2) (COM_FALSE_P 3) (COM_TRUE_S 4) (COM_TRUE_P 5) ]) ;; Constants used in the XOP pperm instruction (define_constants [(PPERM_SRC 0x00) /* copy source */ (PPERM_INVERT 0x20) /* invert source */ (PPERM_REVERSE 0x40) /* bit reverse source */ (PPERM_REV_INV 0x60) /* bit reverse & invert src */ (PPERM_ZERO 0x80) /* all 0's */ (PPERM_ONES 0xa0) /* all 1's */ (PPERM_SIGN 0xc0) /* propagate sign bit */ (PPERM_INV_SIGN 0xe0) /* invert & propagate sign */ (PPERM_SRC1 0x00) /* use first source byte */ (PPERM_SRC2 0x10) /* use second source byte */ ]) ;; Registers by name. (define_constants [(AX_REG 0) (DX_REG 1) (CX_REG 2) (BX_REG 3) (SI_REG 4) (DI_REG 5) (BP_REG 6) (SP_REG 7) (ST0_REG 8) (ST1_REG 9) (ST2_REG 10) (ST3_REG 11) (ST4_REG 12) (ST5_REG 13) (ST6_REG 14) (ST7_REG 15) (ARGP_REG 16) (FLAGS_REG 17) (FPSR_REG 18) (FRAME_REG 19) (XMM0_REG 20) (XMM1_REG 21) (XMM2_REG 22) (XMM3_REG 23) (XMM4_REG 24) (XMM5_REG 25) (XMM6_REG 26) (XMM7_REG 27) (MM0_REG 28) (MM1_REG 29) (MM2_REG 30) (MM3_REG 31) (MM4_REG 32) (MM5_REG 33) (MM6_REG 34) (MM7_REG 35) (R8_REG 36) (R9_REG 37) (R10_REG 38) (R11_REG 39) (R12_REG 40) (R13_REG 41) (R14_REG 42) (R15_REG 43) (XMM8_REG 44) (XMM9_REG 45) (XMM10_REG 46) (XMM11_REG 47) (XMM12_REG 48) (XMM13_REG 49) (XMM14_REG 50) (XMM15_REG 51) (XMM16_REG 52) (XMM17_REG 53) (XMM18_REG 54) (XMM19_REG 55) (XMM20_REG 56) (XMM21_REG 57) (XMM22_REG 58) (XMM23_REG 59) (XMM24_REG 60) (XMM25_REG 61) (XMM26_REG 62) (XMM27_REG 63) (XMM28_REG 64) (XMM29_REG 65) (XMM30_REG 66) (XMM31_REG 67) (MASK0_REG 68) (MASK1_REG 69) (MASK2_REG 70) (MASK3_REG 71) (MASK4_REG 72) (MASK5_REG 73) (MASK6_REG 74) (MASK7_REG 75) (R16_REG 76) (R17_REG 77) (R18_REG 78) (R19_REG 79) (R20_REG 80) (R21_REG 81) (R22_REG 82) (R23_REG 83) (R24_REG 84) (R25_REG 85) (R26_REG 86) (R27_REG 87) (R28_REG 88) (R29_REG 89) (R30_REG 90) (R31_REG 91) (FIRST_PSEUDO_REG 92) ]) ;; Insn callee abi index. (define_constants [(ABI_DEFAULT 0) (ABI_VZEROUPPER 1) (ABI_UNKNOWN 2)]) ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls ;; from i386.cc. ;; In C guard expressions, put expressions which may be compile-time ;; constants first. This allows for better optimization. For ;; example, write "TARGET_64BIT && reload_completed", not ;; "reload_completed && TARGET_64BIT". ;; Processor type. (define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,nehalem, atom,slm,glm,haswell,generic,lujiazui,yongfeng,amdfam10,bdver1, bdver2,bdver3,bdver4,btver2,znver1,znver2,znver3,znver4, znver5" (const (symbol_ref "ix86_schedule"))) ;; A basic instruction type. Refinements due to arguments to be ;; provided in other attributes. (define_attr "type" "other,multi, alu,alu1,negnot,imov,imovx,lea, incdec,ishift,ishiftx,ishift1,rotate,rotatex,rotate1, imul,imulx,idiv,icmp,test,ibr,setcc,icmov, push,pop,call,callv,leave, str,bitmanip, fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp, fxch,fistp,fisttp,frndint, sse,ssemov,sseadd,sseadd1,sseiadd,sseiadd1, ssemul,sseimul,ssediv,sselog,sselog1, sseishft,sseishft1,ssecmp,ssecomi, ssecvt,ssecvt1,sseicvt,sseins, sseshuf,sseshuf1,ssemuladd,sse4arg, lwp,mskmov,msklog, mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft" (const_string "other")) ;; Main data type used by the insn (define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,OI,XI,HF,BF,SF,DF,XF,TF, V32HF,V16HF,V8HF,V4HF,V2HF,V32BF,V16BF,V8BF,V4BF,V2BF, V16SF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,V8DF" (const_string "unknown")) ;; The CPU unit operations uses. (define_attr "unit" "integer,i387,sse,mmx,unknown" (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp, fxch,fistp,fisttp,frndint") (const_string "i387") (eq_attr "type" "sse,ssemov,sseadd,sseadd1,sseiadd,sseiadd1, ssemul,sseimul,ssediv,sselog,sselog1, sseishft,sseishft1,ssecmp,ssecomi, ssecvt,ssecvt1,sseicvt,sseins, sseshuf,sseshuf1,ssemuladd,sse4arg,mskmov") (const_string "sse") (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft") (const_string "mmx") (eq_attr "type" "other") (const_string "unknown")] (const_string "integer"))) ;; Used to control the "enabled" attribute on a per-instruction basis. (define_attr "isa" "base,x64,nox64,x64_sse2,x64_sse4,x64_sse4_noavx, x64_avx,x64_avx512bw,x64_avx512dq,apx_ndd,apx_ndd_64, sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx, avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,avx512f_512, noavx512f,avx512bw,avx512bw_512,noavx512bw,avx512dq, noavx512dq,fma_or_avx512vl,avx512vl,noavx512vl,avxvnni, avx512vnnivl,avx512fp16,avxifma,avx512ifmavl,avxneconvert, avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl, vaes_avx512vl" (const_string "base")) ;; The (bounding maximum) length of an instruction immediate. (define_attr "length_immediate" "" (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave, bitmanip,imulx,msklog,mskmov") (const_int 0) (ior (eq_attr "type" "sse4arg") (eq_attr "isa" "fma4")) (const_int 1) (eq_attr "unit" "i387,sse,mmx") (const_int 0) (eq_attr "type" "alu,alu1,negnot,imovx,ishift,ishiftx,ishift1, rotate,rotatex,rotate1,imul,icmp,push,pop") (symbol_ref "ix86_attr_length_immediate_default (insn, true)") (eq_attr "type" "imov,test") (symbol_ref "ix86_attr_length_immediate_default (insn, false)") (eq_attr "type" "call") (if_then_else (match_operand 0 "constant_call_address_operand") (const_int 4) (const_int 0)) (eq_attr "type" "callv") (if_then_else (match_operand 1 "constant_call_address_operand") (const_int 4) (const_int 0)) ;; We don't know the size before shorten_branches. Expect ;; the instruction to fit for better scheduling. (eq_attr "type" "ibr") (const_int 1) ] (symbol_ref "/* Update immediate_length and other attributes! */ gcc_unreachable (),1"))) ;; The (bounding maximum) length of an instruction address. (define_attr "length_address" "" (cond [(eq_attr "type" "str,other,multi,fxch") (const_int 0) (and (eq_attr "type" "call") (match_operand 0 "constant_call_address_operand")) (const_int 0) (and (eq_attr "type" "callv") (match_operand 1 "constant_call_address_operand")) (const_int 0) ] (symbol_ref "ix86_attr_length_address_default (insn)"))) ;; Set when length prefix is used. (define_attr "prefix_data16" "" (cond [(eq_attr "type" "ssemuladd,sse4arg,sseiadd1,ssecvt1") (const_int 0) (eq_attr "mode" "HI") (const_int 1) (and (eq_attr "unit" "sse") (eq_attr "mode" "V2DF,TI")) (const_int 1) ] (const_int 0))) ;; Set when string REP prefix is used. (define_attr "prefix_rep" "" (cond [(eq_attr "type" "ssemuladd,sse4arg,sseiadd1,ssecvt1") (const_int 0) (and (eq_attr "unit" "sse") (eq_attr "mode" "SF,DF")) (const_int 1) ] (const_int 0))) ;; Set when 0f opcode prefix is used. (define_attr "prefix_0f" "" (if_then_else (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip,msklog,mskmov") (eq_attr "unit" "sse,mmx")) (const_int 1) (const_int 0))) ;; Set when REX opcode prefix is used. (define_attr "prefix_rex" "" (cond [(not (match_test "TARGET_64BIT")) (const_int 0) (and (eq_attr "mode" "DI") (and (eq_attr "type" "!push,pop,call,callv,leave,ibr") (eq_attr "unit" "!mmx"))) (const_int 1) (and (eq_attr "mode" "QI") (match_test "x86_extended_QIreg_mentioned_p (insn)")) (const_int 1) (match_test "x86_extended_reg_mentioned_p (insn)") (const_int 1) (and (eq_attr "type" "imovx") (match_operand:QI 1 "ext_QIreg_operand")) (const_int 1) ] (const_int 0))) ;; There are also additional prefixes in 3DNOW, SSSE3. ;; 3DNOW has 0f0f prefix, SSSE3 and SSE4_{1,2} 0f38/0f3a. ;; While generally inapplicable to VEX/XOP/EVEX encodings, "length_vex" uses ;; the attribute evaluating to zero to know that VEX2 encoding may be usable. (define_attr "prefix_extra" "" (cond [(eq_attr "type" "ssemuladd,sse4arg,sseiadd1,ssecvt1") (const_int 1) ] (const_int 0))) ;; Prefix used: original, VEX or maybe VEX. (define_attr "prefix" "orig,vex,maybe_vex,evex,maybe_evex" (cond [(eq_attr "mode" "OI,V8SF,V4DF") (const_string "vex") (eq_attr "mode" "XI,V16SF,V8DF") (const_string "evex") (eq_attr "type" "ssemuladd") (if_then_else (eq_attr "isa" "fma4") (const_string "vex") (const_string "maybe_evex")) (eq_attr "type" "sse4arg") (const_string "vex") ] (const_string "orig"))) ;; VEX W bit is used. (define_attr "prefix_vex_w" "" (const_int 0)) ;; The length of VEX prefix ;; Only instructions with 0f prefix can have 2 byte VEX prefix, ;; 0f38/0f3a prefixes can't. In i386.md 0f3[8a] is ;; still prefix_0f 1, with prefix_extra 1. (define_attr "length_vex" "" (if_then_else (and (eq_attr "prefix_0f" "1") (eq_attr "prefix_extra" "0")) (if_then_else (eq_attr "prefix_vex_w" "1") (symbol_ref "ix86_attr_length_vex_default (insn, true, true)") (symbol_ref "ix86_attr_length_vex_default (insn, true, false)")) (if_then_else (eq_attr "prefix_vex_w" "1") (symbol_ref "ix86_attr_length_vex_default (insn, false, true)") (symbol_ref "ix86_attr_length_vex_default (insn, false, false)")))) ;; 4-bytes evex prefix and 1 byte opcode. (define_attr "length_evex" "" (const_int 5)) ;; Set when modrm byte is used. (define_attr "modrm" "" (cond [(eq_attr "type" "str,leave") (const_int 0) (eq_attr "unit" "i387") (const_int 0) (and (eq_attr "type" "incdec") (and (not (match_test "TARGET_64BIT")) (ior (match_operand:SI 1 "register_operand") (match_operand:HI 1 "register_operand")))) (const_int 0) (and (eq_attr "type" "push") (not (match_operand 1 "memory_operand"))) (const_int 0) (and (eq_attr "type" "pop") (not (match_operand 0 "memory_operand"))) (const_int 0) (and (eq_attr "type" "imov") (and (not (eq_attr "mode" "DI")) (ior (and (match_operand 0 "register_operand") (match_operand 1 "immediate_operand")) (ior (and (match_operand 0 "ax_reg_operand") (match_operand 1 "memory_displacement_only_operand")) (and (match_operand 0 "memory_displacement_only_operand") (match_operand 1 "ax_reg_operand")))))) (const_int 0) (and (eq_attr "type" "call") (match_operand 0 "constant_call_address_operand")) (const_int 0) (and (eq_attr "type" "callv") (match_operand 1 "constant_call_address_operand")) (const_int 0) (and (eq_attr "type" "alu,alu1,icmp,test") (match_operand 0 "ax_reg_operand")) (symbol_ref "(get_attr_length_immediate (insn) <= (get_attr_mode (insn) != MODE_QI))") ] (const_int 1))) ;; The (bounding maximum) length of an instruction in bytes. ;; ??? fistp and frndint are in fact fldcw/{fistp,frndint}/fldcw sequences. ;; Later we may want to split them and compute proper length as for ;; other insns. (define_attr "length" "" (cond [(eq_attr "type" "other,multi,fistp,frndint") (const_int 16) (eq_attr "type" "fcmp") (const_int 4) (eq_attr "unit" "i387") (plus (const_int 2) (plus (attr "prefix_data16") (attr "length_address"))) (ior (eq_attr "prefix" "evex") (and (ior (eq_attr "prefix" "maybe_evex") (eq_attr "prefix" "maybe_vex")) (match_test "TARGET_AVX512F"))) (plus (attr "length_evex") (plus (attr "length_immediate") (plus (attr "modrm") (attr "length_address")))) (ior (eq_attr "prefix" "vex") (and (ior (eq_attr "prefix" "maybe_vex") (eq_attr "prefix" "maybe_evex")) (match_test "TARGET_AVX"))) (plus (attr "length_vex") (plus (attr "length_immediate") (plus (attr "modrm") (attr "length_address"))))] (plus (plus (attr "modrm") (plus (attr "prefix_0f") (plus (attr "prefix_rex") (plus (attr "prefix_extra") (const_int 1))))) (plus (attr "prefix_rep") (plus (attr "prefix_data16") (plus (attr "length_immediate") (attr "length_address"))))))) ;; The `memory' attribute is `none' if no memory is referenced, `load' or ;; `store' if there is a simple memory reference therein, or `unknown' ;; if the instruction is complex. (define_attr "memory" "none,load,store,both,unknown" (cond [(eq_attr "type" "other,multi,str,lwp") (const_string "unknown") (eq_attr "type" "lea,fcmov,fpspc") (const_string "none") (eq_attr "type" "fistp,leave") (const_string "both") (eq_attr "type" "frndint") (const_string "load") (eq_attr "type" "push") (if_then_else (match_operand 1 "memory_operand") (const_string "both") (const_string "store")) (eq_attr "type" "pop") (if_then_else (match_operand 0 "memory_operand") (const_string "both") (const_string "load")) (eq_attr "type" "setcc") (if_then_else (match_operand 0 "memory_operand") (const_string "store") (const_string "none")) (eq_attr "type" "icmp,test,ssecmp,ssecomi,mmxcmp,fcmp") (if_then_else (ior (match_operand 0 "memory_operand") (match_operand 1 "memory_operand")) (const_string "load") (const_string "none")) (eq_attr "type" "ibr") (if_then_else (match_operand 0 "memory_operand") (const_string "load") (const_string "none")) (eq_attr "type" "call") (if_then_else (match_operand 0 "constant_call_address_operand") (const_string "none") (const_string "load")) (eq_attr "type" "callv") (if_then_else (match_operand 1 "constant_call_address_operand") (const_string "none") (const_string "load")) (and (eq_attr "type" "alu1,negnot,ishift1,rotate1,sselog1,sseshuf1") (match_operand 1 "memory_operand")) (const_string "both") (and (match_operand 0 "memory_operand") (match_operand 1 "memory_operand")) (const_string "both") (match_operand 0 "memory_operand") (const_string "store") (match_operand 1 "memory_operand") (const_string "load") (and (eq_attr "type" "!alu1,negnot,ishift1,rotate1, imov,imovx,icmp,test,bitmanip, fmov,fcmp,fsgn, sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt, sselog1,sseshuf1,sseadd1,sseiadd1,sseishft1, mmx,mmxmov,mmxcmp,mmxcvt,mskmov,msklog") (match_operand 2 "memory_operand")) (const_string "load") (and (eq_attr "type" "icmov,ssemuladd,sse4arg") (match_operand 3 "memory_operand")) (const_string "load") ] (const_string "none"))) ;; Indicates if an instruction has both an immediate and a displacement. (define_attr "imm_disp" "false,true,unknown" (cond [(eq_attr "type" "other,multi") (const_string "unknown") (and (eq_attr "type" "icmp,test,imov,alu1,ishift1,rotate1") (and (match_operand 0 "memory_displacement_operand") (match_operand 1 "immediate_operand"))) (const_string "true") (and (eq_attr "type" "alu,ishift,ishiftx,rotate,rotatex,imul,idiv") (and (match_operand 0 "memory_displacement_operand") (match_operand 2 "immediate_operand"))) (const_string "true") ] (const_string "false"))) ;; Indicates if an FP operation has an integer source. (define_attr "fp_int_src" "false,true" (const_string "false")) ;; Defines rounding mode of an FP operation. (define_attr "i387_cw" "roundeven,floor,ceil,trunc,uninitialized,any" (const_string "any")) ;; Define attribute to indicate AVX insns with partial XMM register update. (define_attr "avx_partial_xmm_update" "false,true" (const_string "false")) ;; Define attribute to classify add/sub insns that consumes carry flag (CF) (define_attr "use_carry" "0,1" (const_string "0")) ;; Define attribute to indicate unaligned ssemov insns (define_attr "movu" "0,1" (const_string "0")) ;; Define attribute to limit memory address register set. (define_attr "addr" "gpr8,gpr16,gpr32" (const_string "gpr32")) ;; Define instruction set of MMX instructions (define_attr "mmx_isa" "base,native,sse,sse_noavx,avx" (const_string "base")) (define_attr "enabled" "" (cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT") (eq_attr "isa" "nox64") (symbol_ref "!TARGET_64BIT") (eq_attr "isa" "x64_sse2") (symbol_ref "TARGET_64BIT && TARGET_SSE2") (eq_attr "isa" "x64_sse4") (symbol_ref "TARGET_64BIT && TARGET_SSE4_1") (eq_attr "isa" "x64_sse4_noavx") (symbol_ref "TARGET_64BIT && TARGET_SSE4_1 && !TARGET_AVX") (eq_attr "isa" "x64_avx") (symbol_ref "TARGET_64BIT && TARGET_AVX") (eq_attr "isa" "x64_avx512bw") (symbol_ref "TARGET_64BIT && TARGET_AVX512BW") (eq_attr "isa" "x64_avx512dq") (symbol_ref "TARGET_64BIT && TARGET_AVX512DQ") (eq_attr "isa" "sse_noavx") (symbol_ref "TARGET_SSE && !TARGET_AVX") (eq_attr "isa" "sse2") (symbol_ref "TARGET_SSE2") (eq_attr "isa" "sse2_noavx") (symbol_ref "TARGET_SSE2 && !TARGET_AVX") (eq_attr "isa" "sse3") (symbol_ref "TARGET_SSE3") (eq_attr "isa" "sse3_noavx") (symbol_ref "TARGET_SSE3 && !TARGET_AVX") (eq_attr "isa" "sse4") (symbol_ref "TARGET_SSE4_1") (eq_attr "isa" "sse4_noavx") (symbol_ref "TARGET_SSE4_1 && !TARGET_AVX") (eq_attr "isa" "avx") (symbol_ref "TARGET_AVX") (eq_attr "isa" "avx_noavx512f") (symbol_ref "TARGET_AVX && !TARGET_AVX512F") (eq_attr "isa" "avx_noavx512vl") (symbol_ref "TARGET_AVX && !TARGET_AVX512VL") (eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX") (eq_attr "isa" "avx2") (symbol_ref "TARGET_AVX2") (eq_attr "isa" "noavx2") (symbol_ref "!TARGET_AVX2") (eq_attr "isa" "bmi") (symbol_ref "TARGET_BMI") (eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2") (eq_attr "isa" "fma4") (symbol_ref "TARGET_FMA4") (eq_attr "isa" "fma") (symbol_ref "TARGET_FMA") (eq_attr "isa" "fma_or_avx512vl") (symbol_ref "TARGET_FMA || TARGET_AVX512VL") (eq_attr "isa" "avx512f") (symbol_ref "TARGET_AVX512F") (eq_attr "isa" "avx512f_512") (symbol_ref "TARGET_AVX512F && TARGET_EVEX512") (eq_attr "isa" "noavx512f") (symbol_ref "!TARGET_AVX512F") (eq_attr "isa" "avx512bw") (symbol_ref "TARGET_AVX512BW") (eq_attr "isa" "avx512bw_512") (symbol_ref "TARGET_AVX512BW && TARGET_EVEX512") (eq_attr "isa" "noavx512bw") (symbol_ref "!TARGET_AVX512BW") (eq_attr "isa" "avx512dq") (symbol_ref "TARGET_AVX512DQ") (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ") (eq_attr "isa" "avx512vl") (symbol_ref "TARGET_AVX512VL") (eq_attr "isa" "noavx512vl") (symbol_ref "!TARGET_AVX512VL") (eq_attr "isa" "avxvnni") (symbol_ref "TARGET_AVXVNNI") (eq_attr "isa" "avx512vnnivl") (symbol_ref "TARGET_AVX512VNNI && TARGET_AVX512VL") (eq_attr "isa" "avx512fp16") (symbol_ref "TARGET_AVX512FP16") (eq_attr "isa" "avxifma") (symbol_ref "TARGET_AVXIFMA") (eq_attr "isa" "avx512ifmavl") (symbol_ref "TARGET_AVX512IFMA && TARGET_AVX512VL") (eq_attr "isa" "avxneconvert") (symbol_ref "TARGET_AVXNECONVERT") (eq_attr "isa" "avx512bf16vl") (symbol_ref "TARGET_AVX512BF16 && TARGET_AVX512VL") (eq_attr "isa" "vpclmulqdqvl") (symbol_ref "TARGET_VPCLMULQDQ && TARGET_AVX512VL") (eq_attr "isa" "apx_ndd") (symbol_ref "TARGET_APX_NDD") (eq_attr "isa" "apx_ndd_64") (symbol_ref "TARGET_APX_NDD && Pmode == DImode") (eq_attr "isa" "vaes_avx512vl") (symbol_ref "TARGET_VAES && TARGET_AVX512VL") (eq_attr "mmx_isa" "native") (symbol_ref "!TARGET_MMX_WITH_SSE") (eq_attr "mmx_isa" "sse") (symbol_ref "TARGET_MMX_WITH_SSE") (eq_attr "mmx_isa" "sse_noavx") (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX") (eq_attr "mmx_isa" "avx") (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX") ] (const_int 1))) (define_attr "preferred_for_size" "" (const_int 1)) (define_attr "preferred_for_speed" "" (const_int 1)) ;; Describe a user's asm statement. (define_asm_attributes [(set_attr "length" "128") (set_attr "type" "multi")]) (define_code_iterator plusminus [plus minus]) (define_code_iterator plusminusmult [plus minus mult]) (define_code_iterator plusminusmultdiv [plus minus mult div]) (define_code_iterator sat_plusminus [ss_plus us_plus ss_minus us_minus]) ;; Base name for insn mnemonic. (define_code_attr plusminus_mnemonic [(plus "add") (ss_plus "adds") (us_plus "addus") (minus "sub") (ss_minus "subs") (us_minus "subus")]) (define_code_iterator multdiv [mult div]) (define_code_attr multdiv_mnemonic [(mult "mul") (div "div")]) ;; Mark commutative operators as such in constraints. (define_code_attr comm [(plus "%") (ss_plus "%") (us_plus "%") (minus "") (ss_minus "") (us_minus "") (mult "%") (div "")]) ;; Mapping of max and min (define_code_iterator maxmin [smax smin umax umin]) ;; Mapping of signed max and min (define_code_iterator smaxmin [smax smin]) ;; Mapping of unsigned max and min (define_code_iterator umaxmin [umax umin]) ;; Base name for integer and FP insn mnemonic (define_code_attr maxmin_int [(smax "maxs") (smin "mins") (umax "maxu") (umin "minu")]) (define_code_attr maxmin_float [(smax "max") (smin "min")]) (define_int_iterator IEEE_MAXMIN [UNSPEC_IEEE_MAX UNSPEC_IEEE_MIN]) (define_int_attr ieee_maxmin [(UNSPEC_IEEE_MAX "max") (UNSPEC_IEEE_MIN "min")]) ;; Mapping of logic operators (define_code_iterator any_logic [and ior xor]) (define_code_iterator any_or [ior xor]) (define_code_iterator fpint_logic [and xor]) ;; Base name for insn mnemonic. (define_code_attr logic [(and "and") (ior "or") (xor "xor")]) ;; Mapping of logic-shift operators (define_code_iterator any_lshift [ashift lshiftrt]) ;; Mapping of shift-right operators (define_code_iterator any_shiftrt [lshiftrt ashiftrt]) ;; Mapping of all shift operators (define_code_iterator any_shift [ashift lshiftrt ashiftrt]) ;; Base name for insn mnemonic. (define_code_attr shift [(ashift "sal") (lshiftrt "shr") (ashiftrt "sar")]) (define_code_attr vshift [(ashift "sll") (lshiftrt "srl") (ashiftrt "sra")]) ;; Mapping of rotate operators (define_code_iterator any_rotate [rotate rotatert]) ;; Base name for insn mnemonic. (define_code_attr rotate [(rotate "rol") (rotatert "ror")]) ;; Mapping of abs neg operators (define_code_iterator absneg [abs neg]) ;; Mapping of abs neg operators to logic operation (define_code_attr absneg_op [(abs "and") (neg "xor")]) ;; Base name for x87 insn mnemonic. (define_code_attr absneg_mnemonic [(abs "fabs") (neg "fchs")]) ;; Mapping of extend operators (define_code_iterator any_extend [sign_extend zero_extend]) ;; Mapping of highpart multiply operators (define_code_iterator any_mul_highpart [smul_highpart umul_highpart]) ;; Prefix for insn menmonic. (define_code_attr sgnprefix [(sign_extend "i") (zero_extend "") (smul_highpart "i") (umul_highpart "") (div "i") (udiv "")]) ;; Prefix for define_insn (define_code_attr s [(sign_extend "s") (zero_extend "u") (smul_highpart "s") (umul_highpart "u")]) (define_code_attr u [(sign_extend "") (zero_extend "u") (div "") (udiv "u")]) (define_code_attr u_bool [(sign_extend "false") (zero_extend "true") (div "false") (udiv "true")]) ;; Used in signed and unsigned truncations. (define_code_iterator any_truncate [ss_truncate truncate us_truncate]) ;; Instruction suffix for truncations. (define_code_attr trunsuffix [(ss_truncate "s") (truncate "") (us_truncate "us")]) ;; Instruction suffix for SSE sign and zero extensions. (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")]) ;; Used in signed and unsigned fix. (define_code_iterator any_fix [fix unsigned_fix]) (define_code_attr fixsuffix [(fix "") (unsigned_fix "u")]) (define_code_attr fixunssuffix [(fix "") (unsigned_fix "uns")]) (define_code_attr fixprefix [(fix "s") (unsigned_fix "u")]) ;; Used in signed and unsigned float. (define_code_iterator any_float [float unsigned_float]) (define_code_attr floatsuffix [(float "") (unsigned_float "u")]) (define_code_attr floatunssuffix [(float "") (unsigned_float "uns")]) (define_code_attr floatprefix [(float "s") (unsigned_float "u")]) ;; Base name for expression (define_code_attr insn [(plus "add") (ss_plus "ssadd") (us_plus "usadd") (minus "sub") (ss_minus "sssub") (us_minus "ussub") (sign_extend "extend") (zero_extend "zero_extend") (ashift "ashl") (lshiftrt "lshr") (ashiftrt "ashr") (rotate "rotl") (rotatert "rotr") (mult "mul") (div "div")]) ;; All integer modes. (define_mode_iterator SWI1248x [QI HI SI DI]) ;; All integer modes without QImode. (define_mode_iterator SWI248x [HI SI DI]) ;; All integer modes without QImode and HImode. (define_mode_iterator SWI48x [SI DI]) ;; All integer modes without SImode and DImode. (define_mode_iterator SWI12 [QI HI]) ;; All integer modes without DImode. (define_mode_iterator SWI124 [QI HI SI]) ;; All integer modes without QImode and DImode. (define_mode_iterator SWI24 [HI SI]) ;; Single word integer modes. (define_mode_iterator SWI [QI HI SI (DI "TARGET_64BIT")]) ;; Single word integer modes without QImode. (define_mode_iterator SWI248 [HI SI (DI "TARGET_64BIT")]) ;; Single word integer modes without QImode and HImode. (define_mode_iterator SWI48 [SI (DI "TARGET_64BIT")]) ;; All math-dependant single and double word integer modes. (define_mode_iterator SDWIM [(QI "TARGET_QIMODE_MATH") (HI "TARGET_HIMODE_MATH") SI DI (TI "TARGET_64BIT")]) ;; Math-dependant single word integer modes. (define_mode_iterator SWIM [(QI "TARGET_QIMODE_MATH") (HI "TARGET_HIMODE_MATH") SI (DI "TARGET_64BIT")]) ;; Math-dependant integer modes without DImode. (define_mode_iterator SWIM124 [(QI "TARGET_QIMODE_MATH") (HI "TARGET_HIMODE_MATH") SI]) ;; Math-dependant integer modes with DImode. (define_mode_iterator SWIM1248x [(QI "TARGET_QIMODE_MATH") (HI "TARGET_HIMODE_MATH") SI DI]) ;; Math-dependant single word integer modes without QImode. (define_mode_iterator SWIM248 [(HI "TARGET_HIMODE_MATH") SI (DI "TARGET_64BIT")]) ;; Double word integer modes. (define_mode_iterator DWI [(DI "!TARGET_64BIT") (TI "TARGET_64BIT")]) ;; SWI and DWI together. (define_mode_iterator SWIDWI [QI HI SI DI (TI "TARGET_64BIT")]) ;; SWI48 and DWI together. (define_mode_iterator SWI48DWI [SI DI (TI "TARGET_64BIT")]) ;; GET_MODE_SIZE for selected modes. As GET_MODE_SIZE is not ;; compile time constant, it is faster to use than ;; GET_MODE_SIZE (mode). For XFmode which depends on ;; command line options just use GET_MODE_SIZE macro. (define_mode_attr MODE_SIZE [(QI "1") (HI "2") (SI "4") (DI "8") (TI "16") (HF "2") (BF "2") (SF "4") (DF "8") (XF "GET_MODE_SIZE (XFmode)") (V16QI "16") (V32QI "32") (V64QI "64") (V8HI "16") (V16HI "32") (V32HI "64") (V4SI "16") (V8SI "32") (V16SI "64") (V2DI "16") (V4DI "32") (V8DI "64") (V1TI "16") (V2TI "32") (V4TI "64") (V2DF "16") (V4DF "32") (V8DF "64") (V4SF "16") (V8SF "32") (V16SF "64") (V8HF "16") (V16HF "32") (V32HF "64") (V4HF "8") (V2HF "4") (V8BF "16") (V16BF "32") (V32BF "64") (V4BF "8") (V2BF "4")]) ;; Double word integer modes as mode attribute. (define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI") (TI "OI")]) (define_mode_attr dwi [(QI "hi") (HI "si") (SI "di") (DI "ti") (TI "oi")]) ;; Half sized integer modes. (define_mode_attr HALF [(TI "DI") (DI "SI")]) (define_mode_attr half [(TI "di") (DI "si")]) ;; LEA mode corresponding to an integer mode (define_mode_attr LEAMODE [(QI "SI") (HI "SI") (SI "SI") (DI "DI")]) ;; Half mode for double word integer modes. (define_mode_iterator DWIH [(SI "!TARGET_64BIT") (DI "TARGET_64BIT")]) ;; Instruction suffix for integer modes. (define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")]) ;; Instruction suffix for masks. (define_mode_attr mskmodesuffix [(QI "b") (HI "w") (SI "d") (DI "q")]) ;; Pointer size prefix for integer modes (Intel asm dialect) (define_mode_attr iptrsize [(QI "BYTE") (HI "WORD") (SI "DWORD") (DI "QWORD")]) ;; Register class for integer modes. (define_mode_attr r [(QI "q") (HI "r") (SI "r") (DI "r")]) ;; Immediate operand constraint for integer modes. (define_mode_attr i [(QI "n") (HI "n") (SI "e") (DI "e")]) ;; General operand constraint for word modes. (define_mode_attr g [(QI "qmn") (HI "rmn") (SI "rme") (DI "rme")]) ;; Memory operand constraint for word modes. (define_mode_attr m [(QI "m") (HI "m") (SI "BM") (DI "BM")]) ;; Immediate operand constraint for double integer modes. (define_mode_attr di [(SI "nF") (DI "Wd")]) ;; Immediate operand constraint for shifts. (define_mode_attr S [(QI "I") (HI "I") (SI "I") (DI "J") (TI "O")]) (define_mode_attr KS [(QI "Wb") (HI "Ww") (SI "I") (DI "J")]) ;; Print register name in the specified mode. (define_mode_attr k [(QI "b") (HI "w") (SI "k") (DI "q")]) ;; General operand predicate for integer modes. (define_mode_attr general_operand [(QI "general_operand") (HI "general_operand") (SI "x86_64_general_operand") (DI "x86_64_general_operand") (TI "x86_64_general_operand")]) ;; General operand predicate for integer modes, where for TImode ;; we need both words of the operand to be general operands. (define_mode_attr general_hilo_operand [(QI "general_operand") (HI "general_operand") (SI "x86_64_general_operand") (DI "x86_64_general_operand") (TI "x86_64_hilo_general_operand")]) ;; General sign extend operand predicate for integer modes, ;; which disallows VOIDmode operands and thus it is suitable ;; for use inside sign_extend. (define_mode_attr general_sext_operand [(QI "sext_operand") (HI "sext_operand") (SI "x86_64_sext_operand") (DI "x86_64_sext_operand")]) ;; General sign/zero extend operand predicate for integer modes. (define_mode_attr general_szext_operand [(QI "general_operand") (HI "general_operand") (SI "x86_64_szext_general_operand") (DI "x86_64_szext_general_operand") (TI "x86_64_hilo_general_operand")]) (define_mode_attr nonmemory_szext_operand [(QI "nonmemory_operand") (HI "nonmemory_operand") (SI "x86_64_szext_nonmemory_operand") (DI "x86_64_szext_nonmemory_operand")]) ;; Immediate operand predicate for integer modes. (define_mode_attr immediate_operand [(QI "immediate_operand") (HI "immediate_operand") (SI "x86_64_immediate_operand") (DI "x86_64_immediate_operand")]) ;; Nonmemory operand predicate for integer modes. (define_mode_attr nonmemory_operand [(QI "nonmemory_operand") (HI "nonmemory_operand") (SI "x86_64_nonmemory_operand") (DI "x86_64_nonmemory_operand")]) ;; Operand predicate for shifts. (define_mode_attr shift_operand [(QI "nonimmediate_operand") (HI "nonimmediate_operand") (SI "nonimmediate_operand") (DI "shiftdi_operand") (TI "register_operand")]) ;; Operand predicate for shift argument. (define_mode_attr shift_immediate_operand [(QI "const_1_to_31_operand") (HI "const_1_to_31_operand") (SI "const_1_to_31_operand") (DI "const_1_to_63_operand")]) ;; Input operand predicate for arithmetic left shifts. (define_mode_attr ashl_input_operand [(QI "nonimmediate_operand") (HI "nonimmediate_operand") (SI "nonimmediate_operand") (DI "ashldi_input_operand") (TI "reg_or_pm1_operand")]) ;; SSE and x87 SFmode and DFmode floating point modes (define_mode_iterator MODEF [SF DF]) (define_mode_iterator MODEF248 [BF HF SF (DF "TARGET_SSE2")]) ;; SSE floating point modes (define_mode_iterator MODEFH [(HF "TARGET_AVX512FP16") SF DF]) ;; All x87 floating point modes (define_mode_iterator X87MODEF [SF DF XF]) ;; All x87 floating point modes plus HFmode (define_mode_iterator X87MODEFH [HF SF DF XF BF]) ;; All SSE floating point modes (define_mode_iterator SSEMODEF [HF SF DF TF]) (define_mode_attr ssevecmodef [(HF "V8HF") (SF "V4SF") (DF "V2DF") (TF "TF")]) ;; SSE instruction suffix for various modes (define_mode_attr ssemodesuffix [(HF "sh") (SF "ss") (DF "sd") (V32HF "ph") (V16SF "ps") (V8DF "pd") (V16HF "ph") (V16BF "bf") (V8SF "ps") (V4DF "pd") (V8HF "ph") (V8BF "bf") (V4SF "ps") (V2DF "pd") (V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q") (V32QI "b") (V16HI "w") (V8SI "d") (V4DI "q") (V64QI "b") (V32HI "w") (V16SI "d") (V8DI "q")]) ;; SSE vector suffix for floating point modes ;; BF HF use same suffix as SF for logic operations. (define_mode_attr ssevecmodesuffix [(BF "ps") (HF "ps") (SF "ps") (DF "pd")]) ;; SSE vector mode corresponding to a scalar mode (define_mode_attr ssevecmode [(QI "V16QI") (HI "V8HI") (SI "V4SI") (DI "V2DI") (HF "V8HF") (BF "V8BF") (SF "V4SF") (DF "V2DF")]) (define_mode_attr ssevecmodelower [(QI "v16qi") (HI "v8hi") (SI "v4si") (DI "v2di") (SF "v4sf") (DF "v2df")]) ;; AVX512F vector mode corresponding to a scalar mode (define_mode_attr avx512fvecmode [(QI "V64QI") (HI "V32HI") (SI "V16SI") (DI "V8DI") (HF "V32HF") (BF "V32BF") (SF "V16SF") (DF "V8DF")]) ;; Instruction suffix for REX 64bit operators. (define_mode_attr rex64suffix [(SI "{l}") (DI "{q}")]) (define_mode_attr rex64namesuffix [(SI "") (DI "q")]) ;; This mode iterator allows :P to be used for patterns that operate on ;; pointer-sized quantities. Exactly one of the two alternatives will match. (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")]) ;; This mode iterator allows :W to be used for patterns that operate on ;; word_mode sized quantities. (define_mode_iterator W [(SI "word_mode == SImode") (DI "word_mode == DImode")]) ;; This mode iterator allows :PTR to be used for patterns that operate on ;; ptr_mode sized quantities. (define_mode_iterator PTR [(SI "ptr_mode == SImode") (DI "ptr_mode == DImode")]) ;; Scheduling descriptions (include "pentium.md") (include "ppro.md") (include "k6.md") (include "athlon.md") (include "bdver1.md") (include "bdver3.md") (include "btver2.md") (include "znver.md") (include "zn4zn5.md") (include "geode.md") (include "atom.md") (include "slm.md") (include "glm.md") (include "core2.md") (include "haswell.md") (include "lujiazui.md") (include "yongfeng.md") ;; Operand and operator predicates and constraints (include "predicates.md") (include "constraints.md") ;; Compare and branch/compare and store instructions. (define_expand "cbranch4" [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:SWIM1248x 1 "nonimmediate_operand") (match_operand:SWIM1248x 2 ""))) (set (pc) (if_then_else (match_operator 0 "ordered_comparison_operator" [(reg:CC FLAGS_REG) (const_int 0)]) (label_ref (match_operand 3)) (pc)))] "" { if (MEM_P (operands[1]) && MEM_P (operands[2])) operands[1] = force_reg (mode, operands[1]); ix86_expand_branch (GET_CODE (operands[0]), operands[1], operands[2], operands[3]); DONE; }) (define_expand "cbranchti4" [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:TI 1 "nonimmediate_operand") (match_operand:TI 2 "ix86_timode_comparison_operand"))) (set (pc) (if_then_else (match_operator 0 "ix86_timode_comparison_operator" [(reg:CC FLAGS_REG) (const_int 0)]) (label_ref (match_operand 3)) (pc)))] "TARGET_64BIT || TARGET_SSE4_1" { ix86_expand_branch (GET_CODE (operands[0]), operands[1], operands[2], operands[3]); DONE; }) (define_expand "cbranchoi4" [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:OI 1 "nonimmediate_operand") (match_operand:OI 2 "nonimmediate_operand"))) (set (pc) (if_then_else (match_operator 0 "bt_comparison_operator" [(reg:CC FLAGS_REG) (const_int 0)]) (label_ref (match_operand 3)) (pc)))] "TARGET_AVX" { ix86_expand_branch (GET_CODE (operands[0]), operands[1], operands[2], operands[3]); DONE; }) (define_expand "cbranchxi4" [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:XI 1 "nonimmediate_operand") (match_operand:XI 2 "nonimmediate_operand"))) (set (pc) (if_then_else (match_operator 0 "bt_comparison_operator" [(reg:CC FLAGS_REG) (const_int 0)]) (label_ref (match_operand 3)) (pc)))] "TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256" { ix86_expand_branch (GET_CODE (operands[0]), operands[1], operands[2], operands[3]); DONE; }) (define_expand "cstore4" [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:SDWIM 2 "nonimmediate_operand") (match_operand:SDWIM 3 ""))) (set (match_operand:QI 0 "register_operand") (match_operator 1 "ordered_comparison_operator" [(reg:CC FLAGS_REG) (const_int 0)]))] "" { if (mode == (TARGET_64BIT ? TImode : DImode)) { if (GET_CODE (operands[1]) != EQ && GET_CODE (operands[1]) != NE) FAIL; } else if (MEM_P (operands[2]) && MEM_P (operands[3])) operands[2] = force_reg (mode, operands[2]); ix86_expand_setcc (operands[0], GET_CODE (operands[1]), operands[2], operands[3]); DONE; }) (define_expand "@cmp_1" [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:SWI48 0 "nonimmediate_operand") (match_operand:SWI48 1 "")))]) (define_mode_iterator SWI1248_AVX512BWDQ_64 [(QI "TARGET_AVX512DQ") HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW && TARGET_64BIT")]) (define_insn "*cmp_ccz_1" [(set (reg FLAGS_REG) (compare (match_operand:SWI1248_AVX512BWDQ_64 0 "nonimmediate_operand" ",?m,$k") (match_operand:SWI1248_AVX512BWDQ_64 1 "const0_operand")))] "TARGET_AVX512F && ix86_match_ccmode (insn, CCZmode)" "@ test{}\t%0, %0 cmp{}\t{%1, %0|%0, %1} kortest\t%0, %0" [(set_attr "type" "test,icmp,msklog") (set_attr "length_immediate" "0,1,*") (set_attr "prefix" "*,*,vex") (set_attr "mode" "")]) (define_insn "*cmp_ccno_1" [(set (reg FLAGS_REG) (compare (match_operand:SWI 0 "nonimmediate_operand" ",?m") (match_operand:SWI 1 "const0_operand")))] "ix86_match_ccmode (insn, CCNOmode)" "@ test{}\t%0, %0 cmp{}\t{%1, %0|%0, %1}" [(set_attr "type" "test,icmp") (set_attr "length_immediate" "0,1") (set_attr "mode" "")]) (define_insn "*cmp_1" [(set (reg FLAGS_REG) (compare (match_operand:SWI 0 "nonimmediate_operand" "m,") (match_operand:SWI 1 "" ",")))] "ix86_match_ccmode (insn, CCmode)" "cmp{}\t{%1, %0|%0, %1}" [(set_attr "type" "icmp") (set_attr "mode" "")]) (define_insn "*cmp_minus_1" [(set (reg FLAGS_REG) (compare (minus:SWI (match_operand:SWI 0 "nonimmediate_operand" "m,") (match_operand:SWI 1 "" ",")) (const_int 0)))] "ix86_match_ccmode (insn, CCGOCmode)" "cmp{}\t{%1, %0|%0, %1}" [(set_attr "type" "icmp") (set_attr "mode" "")]) (define_insn "*cmpqi_ext_1" [(set (reg FLAGS_REG) (compare (match_operand:QI 0 "nonimmediate_operand" "QBn") (subreg:QI (match_operator:SWI248 2 "extract_operator" [(match_operand 1 "int248_register_operand" "Q") (const_int 8) (const_int 8)]) 0)))] "ix86_match_ccmode (insn, CCmode)" "cmp{b}\t{%h1, %0|%0, %h1}" [(set_attr "addr" "gpr8") (set_attr "type" "icmp") (set_attr "mode" "QI")]) (define_insn "*cmpqi_ext_2" [(set (reg FLAGS_REG) (compare (subreg:QI (match_operator:SWI248 2 "extract_operator" [(match_operand 0 "int248_register_operand" "Q") (const_int 8) (const_int 8)]) 0) (match_operand:QI 1 "const0_operand")))] "ix86_match_ccmode (insn, CCNOmode)" "test{b}\t%h0, %h0" [(set_attr "type" "test") (set_attr "length_immediate" "0") (set_attr "mode" "QI")]) (define_expand "cmpqi_ext_3" [(set (reg:CC FLAGS_REG) (compare:CC (subreg:QI (zero_extract:HI (match_operand:HI 0 "register_operand") (const_int 8) (const_int 8)) 0) (match_operand:QI 1 "const_int_operand")))]) (define_insn "*cmpqi_ext_3" [(set (reg FLAGS_REG) (compare (subreg:QI (match_operator:SWI248 2 "extract_operator" [(match_operand 0 "int248_register_operand" "Q") (const_int 8) (const_int 8)]) 0) (match_operand:QI 1 "general_operand" "QnBn")))] "ix86_match_ccmode (insn, CCmode)" "cmp{b}\t{%1, %h0|%h0, %1}" [(set_attr "addr" "gpr8") (set_attr "type" "icmp") (set_attr "mode" "QI")]) (define_insn "*cmpqi_ext_4" [(set (reg FLAGS_REG) (compare (subreg:QI (match_operator:SWI248 2 "extract_operator" [(match_operand 0 "int248_register_operand" "Q") (const_int 8) (const_int 8)]) 0) (subreg:QI (match_operator:SWI248 3 "extract_operator" [(match_operand 1 "int248_register_operand" "Q") (const_int 8) (const_int 8)]) 0)))] "ix86_match_ccmode (insn, CCmode)" "cmp{b}\t{%h1, %h0|%h0, %h1}" [(set_attr "type" "icmp") (set_attr "mode" "QI")]) (define_insn_and_split "*cmp_doubleword" [(set (reg:CCZ FLAGS_REG) (compare:CCZ (match_operand: 0 "nonimmediate_operand") (match_operand: 1 "general_operand")))] "ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (reg:CCZ FLAGS_REG) (compare:CCZ (ior:DWIH (match_dup 4) (match_dup 5)) (const_int 0))) (set (match_dup 4) (ior:DWIH (match_dup 4) (match_dup 5)))])] { split_double_mode (mode, &operands[0], 2, &operands[0], &operands[2]); operands[4] = gen_reg_rtx (mode); /* Special case comparisons against -1. */ if (operands[1] == constm1_rtx && operands[3] == constm1_rtx) { emit_insn (gen_and3 (operands[4], operands[0], operands[2])); emit_insn (gen_cmp_1 (mode, operands[4], constm1_rtx)); DONE; } if (operands[1] == const0_rtx) emit_move_insn (operands[4], operands[0]); else if (operands[0] == const0_rtx) emit_move_insn (operands[4], operands[1]); else if (operands[1] == constm1_rtx) emit_insn (gen_one_cmpl2 (operands[4], operands[0])); else if (operands[0] == constm1_rtx) emit_insn (gen_one_cmpl2 (operands[4], operands[1])); else { if (CONST_SCALAR_INT_P (operands[1]) && !x86_64_immediate_operand (operands[1], mode)) operands[1] = force_reg (mode, operands[1]); emit_insn (gen_xor3 (operands[4], operands[0], operands[1])); } if (operands[3] == const0_rtx) operands[5] = operands[2]; else if (operands[2] == const0_rtx) operands[5] = operands[3]; else { operands[5] = gen_reg_rtx (mode); if (operands[3] == constm1_rtx) emit_insn (gen_one_cmpl2 (operands[5], operands[2])); else if (operands[2] == constm1_rtx) emit_insn (gen_one_cmpl2 (operands[5], operands[3])); else { if (CONST_SCALAR_INT_P (operands[3]) && !x86_64_immediate_operand (operands[3], mode)) operands[3] = force_reg (mode, operands[3]); emit_insn (gen_xor3 (operands[5], operands[2], operands[3])); } } }) ;; These implement float point compares. ;; %%% See if we can get away with VOIDmode operands on the actual insns, ;; which would allow mix and match FP modes on the compares. Which is what ;; the old patterns did, but with many more of them. (define_expand "cbranchxf4" [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:XF 1 "nonmemory_operand") (match_operand:XF 2 "nonmemory_operand"))) (set (pc) (if_then_else (match_operator 0 "ix86_fp_comparison_operator" [(reg:CC FLAGS_REG) (const_int 0)]) (label_ref (match_operand 3)) (pc)))] "TARGET_80387" { ix86_expand_branch (GET_CODE (operands[0]), operands[1], operands[2], operands[3]); DONE; }) (define_expand "cstorexf4" [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:XF 2 "nonmemory_operand") (match_operand:XF 3 "nonmemory_operand"))) (set (match_operand:QI 0 "register_operand") (match_operator 1 "ix86_fp_comparison_operator" [(reg:CC FLAGS_REG) (const_int 0)]))] "TARGET_80387" { ix86_expand_setcc (operands[0], GET_CODE (operands[1]), operands[2], operands[3]); DONE; }) (define_expand "cbranchhf4" [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:HF 1 "cmp_fp_expander_operand") (match_operand:HF 2 "cmp_fp_expander_operand"))) (set (pc) (if_then_else (match_operator 0 "ix86_fp_comparison_operator" [(reg:CC FLAGS_REG) (const_int 0)]) (label_ref (match_operand 3)) (pc)))] "TARGET_AVX512FP16" { ix86_expand_branch (GET_CODE (operands[0]), operands[1], operands[2], operands[3]); DONE; }) (define_expand "cbranch4" [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:MODEF 1 "cmp_fp_expander_operand") (match_operand:MODEF 2 "cmp_fp_expander_operand"))) (set (pc) (if_then_else (match_operator 0 "ix86_fp_comparison_operator" [(reg:CC FLAGS_REG) (const_int 0)]) (label_ref (match_operand 3)) (pc)))] "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" { ix86_expand_branch (GET_CODE (operands[0]), operands[1], operands[2], operands[3]); DONE; }) (define_expand "cbranchbf4" [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:BF 1 "cmp_fp_expander_operand") (match_operand:BF 2 "cmp_fp_expander_operand"))) (set (pc) (if_then_else (match_operator 0 "comparison_operator" [(reg:CC FLAGS_REG) (const_int 0)]) (label_ref (match_operand 3)) (pc)))] "TARGET_80387 || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)" { rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[1]); rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[2]); do_compare_rtx_and_jump (op1, op2, GET_CODE (operands[0]), 0, SFmode, NULL_RTX, NULL, as_a (operands[3]), /* Unfortunately this isn't propagated. */ profile_probability::even ()); DONE; }) (define_expand "cstorehf4" [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:HF 2 "cmp_fp_expander_operand") (match_operand:HF 3 "cmp_fp_expander_operand"))) (set (match_operand:QI 0 "register_operand") (match_operator 1 "ix86_fp_comparison_operator" [(reg:CC FLAGS_REG) (const_int 0)]))] "TARGET_AVX512FP16" { ix86_expand_setcc (operands[0], GET_CODE (operands[1]), operands[2], operands[3]); DONE; }) (define_expand "cstorebf4" [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:BF 2 "cmp_fp_expander_operand") (match_operand:BF 3 "cmp_fp_expander_operand"))) (set (match_operand:QI 0 "register_operand") (match_operator 1 "comparison_operator" [(reg:CC FLAGS_REG) (const_int 0)]))] "TARGET_80387 || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)" { rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[2]); rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[3]); rtx res = emit_store_flag_force (operands[0], GET_CODE (operands[1]), op1, op2, SFmode, 0, 1); if (!rtx_equal_p (res, operands[0])) emit_move_insn (operands[0], res); DONE; }) (define_expand "cstore4" [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:MODEF 2 "cmp_fp_expander_operand") (match_operand:MODEF 3 "cmp_fp_expander_operand"))) (set (match_operand:QI 0 "register_operand") (match_operator 1 "ix86_fp_comparison_operator" [(reg:CC FLAGS_REG) (const_int 0)]))] "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" { ix86_expand_setcc (operands[0], GET_CODE (operands[1]), operands[2], operands[3]); DONE; }) (define_expand "cbranchcc4" [(set (pc) (if_then_else (match_operator 0 "comparison_operator" [(match_operand 1 "flags_reg_operand") (match_operand 2 "const0_operand")]) (label_ref (match_operand 3)) (pc)))] "" { ix86_expand_branch (GET_CODE (operands[0]), operands[1], operands[2], operands[3]); DONE; }) (define_expand "cstorecc4" [(set (match_operand:QI 0 "register_operand") (match_operator 1 "comparison_operator" [(match_operand 2 "flags_reg_operand") (match_operand 3 "const0_operand")]))] "" { ix86_expand_setcc (operands[0], GET_CODE (operands[1]), operands[2], operands[3]); DONE; }) ;; FP compares, step 1: ;; Set the FP condition codes and move fpsr to ax. ;; We may not use "#" to split and emit these ;; due to reg-stack pops killing fpsr. (define_insn "*cmpxf_i387" [(set (match_operand:HI 0 "register_operand" "=a") (unspec:HI [(compare:CCFP (match_operand:XF 1 "register_operand" "f") (match_operand:XF 2 "reg_or_0_operand" "fC"))] UNSPEC_FNSTSW))] "TARGET_80387" "* return output_fp_compare (insn, operands, false, false);" [(set_attr "type" "multi") (set_attr "unit" "i387") (set_attr "mode" "XF")]) (define_insn "*cmp_i387" [(set (match_operand:HI 0 "register_operand" "=a") (unspec:HI [(compare:CCFP (match_operand:MODEF 1 "register_operand" "f") (match_operand:MODEF 2 "nonimm_or_0_operand" "fmC"))] UNSPEC_FNSTSW))] "TARGET_80387" "* return output_fp_compare (insn, operands, false, false);" [(set_attr "type" "multi") (set_attr "unit" "i387") (set_attr "mode" "")]) (define_insn "*cmp__i387" [(set (match_operand:HI 0 "register_operand" "=a") (unspec:HI [(compare:CCFP (match_operand:X87MODEF 1 "register_operand" "f") (float:X87MODEF (match_operand:SWI24 2 "nonimmediate_operand" "m")))] UNSPEC_FNSTSW))] "TARGET_80387 && (TARGET_USE_MODE_FIOP || optimize_function_for_size_p (cfun))" "* return output_fp_compare (insn, operands, false, false);" [(set_attr "type" "multi") (set_attr "unit" "i387") (set_attr "fp_int_src" "true") (set_attr "mode" "")]) (define_insn "*cmpu_i387" [(set (match_operand:HI 0 "register_operand" "=a") (unspec:HI [(unspec:CCFP [(compare:CCFP (match_operand:X87MODEF 1 "register_operand" "f") (match_operand:X87MODEF 2 "register_operand" "f"))] UNSPEC_NOTRAP)] UNSPEC_FNSTSW))] "TARGET_80387" "* return output_fp_compare (insn, operands, false, true);" [(set_attr "type" "multi") (set_attr "unit" "i387") (set_attr "mode" "")]) ;; FP compares, step 2: ;; Get ax into flags, general case. (define_insn "x86_sahf_1" [(set (reg:CC FLAGS_REG) (unspec:CC [(match_operand:HI 0 "register_operand" "a")] UNSPEC_SAHF))] "TARGET_SAHF" { #ifndef HAVE_AS_IX86_SAHF if (TARGET_64BIT) return ASM_BYTE "0x9e"; else #endif return "sahf"; } [(set_attr "length" "1") (set_attr "athlon_decode" "vector") (set_attr "amdfam10_decode" "direct") (set_attr "bdver1_decode" "direct") (set_attr "mode" "SI")]) ;; Pentium Pro can do both steps in one go. ;; (these instructions set flags directly) (define_subst_attr "unord" "unord_subst" "" "u") (define_subst_attr "unordered" "unord_subst" "false" "true") (define_subst "unord_subst" [(set (match_operand:CCFP 0) (match_operand:CCFP 1))] "" [(set (match_dup 0) (unspec:CCFP [(match_dup 1)] UNSPEC_NOTRAP))]) (define_insn "*cmpixf_i387" [(set (reg:CCFP FLAGS_REG) (compare:CCFP (match_operand:XF 0 "register_operand" "f") (match_operand:XF 1 "register_operand" "f")))] "TARGET_80387 && TARGET_CMOVE" "* return output_fp_compare (insn, operands, true, );" [(set_attr "type" "fcmp") (set_attr "mode" "XF") (set_attr "athlon_decode" "vector") (set_attr "amdfam10_decode" "direct") (set_attr "bdver1_decode" "double") (set_attr "znver1_decode" "double")]) (define_insn "*cmpi" [(set (reg:CCFP FLAGS_REG) (compare:CCFP (match_operand:MODEF 0 "register_operand" "f,v") (match_operand:MODEF 1 "register_ssemem_operand" "f,vm")))] "(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || (TARGET_80387 && TARGET_CMOVE)" "@ * return output_fp_compare (insn, operands, true, ); %vcomi\t{%1, %0|%0, %1}" [(set_attr "type" "fcmp,ssecomi") (set_attr "prefix" "orig,maybe_vex") (set_attr "mode" "") (set_attr "prefix_rep" "*,0") (set (attr "prefix_data16") (cond [(eq_attr "alternative" "0") (const_string "*") (eq_attr "mode" "DF") (const_string "1") ] (const_string "0"))) (set_attr "athlon_decode" "vector") (set_attr "amdfam10_decode" "direct") (set_attr "bdver1_decode" "double") (set_attr "znver1_decode" "double") (set (attr "enabled") (if_then_else (match_test ("SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH")) (if_then_else (eq_attr "alternative" "0") (symbol_ref "TARGET_MIX_SSE_I387") (symbol_ref "true")) (if_then_else (eq_attr "alternative" "0") (symbol_ref "true") (symbol_ref "false"))))]) (define_insn "*cmpihf" [(set (reg:CCFP FLAGS_REG) (compare:CCFP (match_operand:HF 0 "register_operand" "v") (match_operand:HF 1 "nonimmediate_operand" "vm")))] "TARGET_AVX512FP16" "vcomish\t{%1, %0|%0, %1}" [(set_attr "type" "ssecomi") (set_attr "prefix" "evex") (set_attr "mode" "HF")]) ;; Set carry flag. (define_insn "x86_stc" [(set (reg:CCC FLAGS_REG) (unspec:CCC [(const_int 0)] UNSPEC_STC))] "" "stc" [(set_attr "length" "1") (set_attr "length_immediate" "0") (set_attr "modrm" "0")]) ;; On Pentium 4, set the carry flag using mov $1,%al;addb $-1,%al. (define_peephole2 [(match_scratch:QI 0 "r") (set (reg:CCC FLAGS_REG) (unspec:CCC [(const_int 0)] UNSPEC_STC))] "TARGET_SLOW_STC && !optimize_insn_for_size_p ()" [(set (match_dup 0) (const_int 1)) (parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:QI (match_dup 0) (const_int -1)) (match_dup 0))) (set (match_dup 0) (plus:QI (match_dup 0) (const_int -1)))])]) ;; Complement carry flag. (define_insn "*x86_cmc" [(set (reg:CCC FLAGS_REG) (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0))) (geu:QI (reg:CCC FLAGS_REG) (const_int 0))))] "" "cmc" [(set_attr "length" "1") (set_attr "length_immediate" "0") (set_attr "use_carry" "1") (set_attr "modrm" "0")]) ;; On Pentium 4, cmc is replaced with setnc %al;addb $-1,%al. (define_peephole2 [(match_scratch:QI 0 "r") (set (reg:CCC FLAGS_REG) (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0))) (geu:QI (reg:CCC FLAGS_REG) (const_int 0))))] "TARGET_SLOW_STC && !optimize_insn_for_size_p ()" [(set (match_dup 0) (ne:QI (reg:CCC FLAGS_REG) (const_int 0))) (parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:QI (match_dup 0) (const_int -1)) (match_dup 0))) (set (match_dup 0) (plus:QI (match_dup 0) (const_int -1)))])]) ;; Push/pop instructions. (define_insn_and_split "*pushv1ti2" [(set (match_operand:V1TI 0 "push_operand" "=<") (match_operand:V1TI 1 "register_operand" "v"))] "TARGET_64BIT && TARGET_STV" "#" "&& reload_completed" [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) (set (match_dup 0) (match_dup 1))] { operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (V1TImode))); /* Preserve memory attributes. */ operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx); } [(set_attr "type" "multi") (set_attr "mode" "TI")]) (define_insn "*push2" [(set (match_operand:DWI 0 "push_operand" "=<,<") (match_operand:DWI 1 "general_no_elim_operand" "riF*o,*v"))] "" "#" [(set_attr "type" "multi") (set_attr "mode" "")]) (define_split [(set (match_operand:DWI 0 "push_operand") (match_operand:DWI 1 "general_gr_operand"))] "reload_completed" [(const_int 0)] "ix86_split_long_move (operands); DONE;") (define_insn "*pushdi2_rex64" [(set (match_operand:DI 0 "push_operand" "=<,<,!<") (match_operand:DI 1 "general_no_elim_operand" "re*m,*v,n"))] "TARGET_64BIT" "@ push{q}\t%1 # #" [(set_attr "type" "push,multi,multi") (set_attr "mode" "DI")]) ;; Convert impossible pushes of immediate to existing instructions. ;; First try to get scratch register and go through it. In case this ;; fails, push sign extended lower part first and then overwrite ;; upper part by 32bit move. (define_peephole2 [(match_scratch:DI 2 "r") (set (match_operand:DI 0 "push_operand") (match_operand:DI 1 "immediate_operand"))] "TARGET_64BIT && !symbolic_operand (operands[1], DImode) && !x86_64_immediate_operand (operands[1], DImode)" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))]) (define_split [(set (match_operand:DI 0 "push_operand") (match_operand:DI 1 "immediate_operand"))] "TARGET_64BIT && epilogue_completed && !symbolic_operand (operands[1], DImode) && !x86_64_immediate_operand (operands[1], DImode)" [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))] { split_double_mode (DImode, &operands[1], 1, &operands[2], &operands[3]); operands[1] = gen_lowpart (DImode, operands[2]); operands[2] = gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx, 4)); }) ;; For TARGET_64BIT we always round up to 8 bytes. (define_insn "*pushsi2_rex64" [(set (match_operand:SI 0 "push_operand" "=X,X") (match_operand:SI 1 "nonmemory_no_elim_operand" "re,*v"))] "TARGET_64BIT" "@ push{q}\t%q1 #" [(set_attr "type" "push,multi") (set_attr "mode" "DI")]) (define_insn "*pushsi2" [(set (match_operand:SI 0 "push_operand" "=<,<") (match_operand:SI 1 "general_no_elim_operand" "ri*m,*v"))] "!TARGET_64BIT" "@ push{l}\t%1 #" [(set_attr "type" "push,multi") (set_attr "mode" "SI")]) (define_split [(set (match_operand:SWI48DWI 0 "push_operand") (match_operand:SWI48DWI 1 "sse_reg_operand"))] "TARGET_SSE && reload_completed" [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) (set (match_dup 0) (match_dup 1))] { operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (mode))); /* Preserve memory attributes. */ operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx); }) ;; emit_push_insn when it calls move_by_pieces requires an insn to ;; "push a byte/word". But actually we use push{l,q}, which has ;; the effect of rounding the amount pushed up to a word. (define_insn "*push2" [(set (match_operand:SWI12 0 "push_operand" "=X") (match_operand:SWI12 1 "nonmemory_no_elim_operand" "rn"))] "" "* return TARGET_64BIT ? \"push{q}\t%q1\" : \"push{l}\t%k1\";" [(set_attr "type" "push") (set (attr "mode") (if_then_else (match_test "TARGET_64BIT") (const_string "DI") (const_string "SI")))]) (define_insn "*push2_prologue" [(set (match_operand:W 0 "push_operand" "=<") (match_operand:W 1 "general_no_elim_operand" "r*m")) (clobber (mem:BLK (scratch)))] "" "push{}\t%1" [(set_attr "type" "push") (set_attr "mode" "")]) (define_insn "*pop1" [(set (match_operand:W 0 "nonimmediate_operand" "=r*m") (match_operand:W 1 "pop_operand" ">"))] "" "pop{}\t%0" [(set_attr "type" "pop") (set_attr "mode" "")]) (define_insn "*pop1_epilogue" [(set (match_operand:W 0 "nonimmediate_operand" "=r*m") (match_operand:W 1 "pop_operand" ">")) (clobber (mem:BLK (scratch)))] "" "pop{}\t%0" [(set_attr "type" "pop") (set_attr "mode" "")]) (define_insn "@pushfl2" [(set (match_operand:W 0 "push_operand" "=<") (unspec:W [(match_operand 1 "flags_reg_operand")] UNSPEC_PUSHFL))] "GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_CC" "pushf{}" [(set_attr "type" "push") (set_attr "mode" "")]) (define_insn "@popfl1" [(set (match_operand:CC 0 "flags_reg_operand") (unspec:CC [(match_operand:W 1 "pop_operand" ">")] UNSPEC_POPFL))] "" "popf{}" [(set_attr "type" "pop") (set_attr "mode" "")]) ;; Reload patterns to support multi-word load/store ;; with non-offsetable address. (define_expand "reload_noff_store" [(parallel [(match_operand 0 "memory_operand" "=m") (match_operand 1 "register_operand" "r") (match_operand:DI 2 "register_operand" "=&r")])] "TARGET_64BIT" { rtx mem = operands[0]; rtx addr = XEXP (mem, 0); emit_move_insn (operands[2], addr); mem = replace_equiv_address_nv (mem, operands[2]); emit_insn (gen_rtx_SET (mem, operands[1])); DONE; }) (define_expand "reload_noff_load" [(parallel [(match_operand 0 "register_operand" "=r") (match_operand 1 "memory_operand" "m") (match_operand:DI 2 "register_operand" "=r")])] "TARGET_64BIT" { rtx mem = operands[1]; rtx addr = XEXP (mem, 0); emit_move_insn (operands[2], addr); mem = replace_equiv_address_nv (mem, operands[2]); emit_insn (gen_rtx_SET (operands[0], mem)); DONE; }) ;; Move instructions. (define_expand "movxi" [(set (match_operand:XI 0 "nonimmediate_operand") (match_operand:XI 1 "general_operand"))] "TARGET_AVX512F && TARGET_EVEX512" "ix86_expand_vector_move (XImode, operands); DONE;") (define_expand "movoi" [(set (match_operand:OI 0 "nonimmediate_operand") (match_operand:OI 1 "general_operand"))] "TARGET_AVX" "ix86_expand_vector_move (OImode, operands); DONE;") (define_expand "movti" [(set (match_operand:TI 0 "nonimmediate_operand") (match_operand:TI 1 "general_operand"))] "TARGET_64BIT || TARGET_SSE" { if (TARGET_64BIT) ix86_expand_move (TImode, operands); else ix86_expand_vector_move (TImode, operands); DONE; }) ;; This expands to what emit_move_complex would generate if we didn't ;; have a movti pattern. Having this avoids problems with reload on ;; 32-bit targets when SSE is present, but doesn't seem to be harmful ;; to have around all the time. (define_expand "movcdi" [(set (match_operand:CDI 0 "nonimmediate_operand") (match_operand:CDI 1 "general_operand"))] "" { if (push_operand (operands[0], CDImode)) emit_move_complex_push (CDImode, operands[0], operands[1]); else emit_move_complex_parts (operands[0], operands[1]); DONE; }) (define_expand "mov" [(set (match_operand:SWI1248x 0 "nonimmediate_operand") (match_operand:SWI1248x 1 "general_operand"))] "" "ix86_expand_move (mode, operands); DONE;") (define_insn "*mov_xor" [(set (match_operand:SWI48 0 "register_operand" "=r") (match_operand:SWI48 1 "const0_operand")) (clobber (reg:CC FLAGS_REG))] "reload_completed" "xor{l}\t%k0, %k0" [(set_attr "type" "alu1") (set_attr "mode" "SI") (set_attr "length_immediate" "0")]) (define_insn "*mov_and" [(set (match_operand:SWI248 0 "memory_operand" "=m") (match_operand:SWI248 1 "const0_operand")) (clobber (reg:CC FLAGS_REG))] "reload_completed" "and{}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "mode" "") (set_attr "length_immediate" "1")]) (define_insn "*mov_or" [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm") (match_operand:SWI248 1 "constm1_operand")) (clobber (reg:CC FLAGS_REG))] "reload_completed" "or{}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "mode" "") (set_attr "length_immediate" "1")]) (define_insn "*movxi_internal_avx512f" [(set (match_operand:XI 0 "nonimmediate_operand" "=v,v ,v ,m") (match_operand:XI 1 "nonimmediate_or_sse_const_operand" " C,BC,vm,v"))] "TARGET_AVX512F && TARGET_EVEX512 && (register_operand (operands[0], XImode) || register_operand (operands[1], XImode))" { switch (get_attr_type (insn)) { case TYPE_SSELOG1: return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: return ix86_output_ssemov (insn, operands); default: gcc_unreachable (); } } [(set_attr "type" "sselog1,sselog1,ssemov,ssemov") (set_attr "prefix" "evex") (set_attr "mode" "XI")]) (define_insn "*movoi_internal_avx" [(set (match_operand:OI 0 "nonimmediate_operand" "=v,v ,v ,m") (match_operand:OI 1 "nonimmediate_or_sse_const_operand" " C,BC,vm,v"))] "TARGET_AVX && (register_operand (operands[0], OImode) || register_operand (operands[1], OImode))" { switch (get_attr_type (insn)) { case TYPE_SSELOG1: return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: return ix86_output_ssemov (insn, operands); default: gcc_unreachable (); } } [(set_attr "isa" "*,avx2,*,*") (set_attr "type" "sselog1,sselog1,ssemov,ssemov") (set_attr "prefix" "vex") (set_attr "mode" "OI")]) (define_insn "*movti_internal" [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,v ,m,?jc,?Yd") (match_operand:TI 1 "general_operand" "riFo,re,C,BC,vm,v,Yd,jc"))] "(TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))) || (TARGET_SSE && nonimmediate_or_sse_const_operand (operands[1], TImode) && (register_operand (operands[0], TImode) || register_operand (operands[1], TImode)))" { switch (get_attr_type (insn)) { case TYPE_MULTI: return "#"; case TYPE_SSELOG1: return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: return ix86_output_ssemov (insn, operands); default: gcc_unreachable (); } } [(set (attr "isa") (cond [(eq_attr "alternative" "0,1,6,7") (const_string "x64") (eq_attr "alternative" "3") (const_string "sse2") ] (const_string "*"))) (set (attr "type") (cond [(eq_attr "alternative" "0,1,6,7") (const_string "multi") (eq_attr "alternative" "2,3") (const_string "sselog1") ] (const_string "ssemov"))) (set (attr "prefix") (if_then_else (eq_attr "type" "sselog1,ssemov") (const_string "maybe_vex") (const_string "orig"))) (set (attr "mode") (cond [(eq_attr "alternative" "0,1") (const_string "DI") (match_test "TARGET_AVX") (const_string "TI") (ior (not (match_test "TARGET_SSE2")) (match_test "optimize_function_for_size_p (cfun)")) (const_string "V4SF") (and (eq_attr "alternative" "5") (match_test "TARGET_SSE_TYPELESS_STORES")) (const_string "V4SF") ] (const_string "TI"))) (set (attr "preferred_for_speed") (cond [(eq_attr "alternative" "6") (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC") (eq_attr "alternative" "7") (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC") ] (symbol_ref "true")))]) (define_split [(set (match_operand:TI 0 "sse_reg_operand") (match_operand:TI 1 "general_reg_operand"))] "TARGET_64BIT && TARGET_SSE4_1 && reload_completed" [(set (match_dup 2) (vec_merge:V2DI (vec_duplicate:V2DI (match_dup 3)) (match_dup 2) (const_int 2)))] { operands[2] = lowpart_subreg (V2DImode, operands[0], TImode); operands[3] = gen_highpart (DImode, operands[1]); emit_move_insn (gen_lowpart (DImode, operands[0]), gen_lowpart (DImode, operands[1])); }) (define_insn "*movdi_internal" [(set (match_operand:DI 0 "nonimmediate_operand" "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r,?*y,?Yv,?v,?v,m ,m,?jc,?*Yd,?r,?v,?*y,?*x,*k,*k ,*r,*m,*k") (match_operand:DI 1 "general_operand" "riFo,riF,Z,rem,i,re,C ,*y,Bk ,*y,*y,r ,C ,?v,Bk,?v,v,*Yd,jc ,?v,r ,*x ,*y ,*r,*kBk,*k,*k,CBC"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && ix86_hardreg_mov_ok (operands[0], operands[1])" { switch (get_attr_type (insn)) { case TYPE_MSKMOV: return "kmovq\t{%1, %0|%0, %1}"; case TYPE_MSKLOG: if (operands[1] == const0_rtx) return "kxorq\t%0, %0, %0"; else if (operands[1] == constm1_rtx) return "kxnorq\t%0, %0, %0"; gcc_unreachable (); case TYPE_MULTI: return "#"; case TYPE_MMX: return "pxor\t%0, %0"; case TYPE_MMXMOV: /* Handle broken assemblers that require movd instead of movq. */ if (!HAVE_AS_IX86_INTERUNIT_MOVQ && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1]))) return "movd\t{%1, %0|%0, %1}"; return "movq\t{%1, %0|%0, %1}"; case TYPE_SSELOG1: return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: return ix86_output_ssemov (insn, operands); case TYPE_SSECVT: if (SSE_REG_P (operands[0])) return "movq2dq\t{%1, %0|%0, %1}"; else return "movdq2q\t{%1, %0|%0, %1}"; case TYPE_LEA: return "lea{q}\t{%E1, %0|%0, %E1}"; case TYPE_IMOV: gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1])); if (get_attr_mode (insn) == MODE_SI) return "mov{l}\t{%k1, %k0|%k0, %k1}"; else if (which_alternative == 4) return "movabs{q}\t{%1, %0|%0, %1}"; else if (ix86_use_lea_for_mov (insn, operands)) return "lea{q}\t{%E1, %0|%0, %E1}"; else return "mov{q}\t{%1, %0|%0, %1}"; default: gcc_unreachable (); } } [(set (attr "isa") (cond [(eq_attr "alternative" "0,1,17,18") (const_string "nox64") (eq_attr "alternative" "2,3,4,5,10,11,23,25") (const_string "x64") (eq_attr "alternative" "19,20") (const_string "x64_sse2") (eq_attr "alternative" "21,22") (const_string "sse2") ] (const_string "*"))) (set (attr "type") (cond [(eq_attr "alternative" "0,1,17,18") (const_string "multi") (eq_attr "alternative" "6") (const_string "mmx") (eq_attr "alternative" "7,8,9,10,11") (const_string "mmxmov") (eq_attr "alternative" "12") (const_string "sselog1") (eq_attr "alternative" "13,14,15,16,19,20") (const_string "ssemov") (eq_attr "alternative" "21,22") (const_string "ssecvt") (eq_attr "alternative" "23,24,25,26") (const_string "mskmov") (eq_attr "alternative" "27") (const_string "msklog") (and (match_operand 0 "register_operand") (match_operand 1 "pic_32bit_operand")) (const_string "lea") ] (const_string "imov"))) (set (attr "modrm") (if_then_else (and (eq_attr "alternative" "4") (eq_attr "type" "imov")) (const_string "0") (const_string "*"))) (set (attr "length_immediate") (if_then_else (and (eq_attr "alternative" "4") (eq_attr "type" "imov")) (const_string "8") (const_string "*"))) (set (attr "prefix_rex") (if_then_else (eq_attr "alternative" "10,11,19,20") (const_string "1") (const_string "*"))) (set (attr "prefix") (if_then_else (eq_attr "type" "sselog1,ssemov") (const_string "maybe_vex") (const_string "orig"))) (set (attr "prefix_data16") (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "DI")) (const_string "1") (const_string "*"))) (set (attr "mode") (cond [(eq_attr "alternative" "2") (const_string "SI") (eq_attr "alternative" "12") (cond [(match_test "TARGET_AVX") (const_string "TI") (ior (not (match_test "TARGET_SSE2")) (match_test "optimize_function_for_size_p (cfun)")) (const_string "V4SF") ] (const_string "TI")) (eq_attr "alternative" "13") (cond [(match_test "TARGET_AVX512VL") (const_string "TI") (match_test "TARGET_AVX512F") (const_string "DF") (match_test "TARGET_AVX") (const_string "TI") (ior (not (match_test "TARGET_SSE2")) (match_test "optimize_function_for_size_p (cfun)")) (const_string "V4SF") ] (const_string "TI")) (and (eq_attr "alternative" "14,15,16") (not (match_test "TARGET_SSE2"))) (const_string "V2SF") ] (const_string "DI"))) (set (attr "preferred_for_speed") (cond [(eq_attr "alternative" "10,17,19") (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC") (eq_attr "alternative" "11,18,20") (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC") ] (symbol_ref "true"))) (set (attr "enabled") (cond [(eq_attr "alternative" "15") (if_then_else (match_test "TARGET_STV && TARGET_SSE2") (symbol_ref "false") (const_string "*")) (eq_attr "alternative" "16") (if_then_else (match_test "TARGET_STV && TARGET_SSE2") (symbol_ref "true") (symbol_ref "false")) ] (const_string "*")))]) (define_split [(set (match_operand: 0 "general_reg_operand") (match_operand: 1 "sse_reg_operand"))] "TARGET_SSE4_1 && reload_completed" [(set (match_dup 2) (vec_select:DWIH (match_dup 3) (parallel [(const_int 1)])))] { operands[2] = gen_highpart (mode, operands[0]); operands[3] = lowpart_subreg (mode, operands[1], mode); emit_move_insn (gen_lowpart (mode, operands[0]), gen_lowpart (mode, operands[1])); }) (define_split [(set (match_operand:DWI 0 "nonimmediate_gr_operand") (match_operand:DWI 1 "general_gr_operand"))] "reload_completed" [(const_int 0)] "ix86_split_long_move (operands); DONE;") (define_split [(set (match_operand:DI 0 "sse_reg_operand") (match_operand:DI 1 "general_reg_operand"))] "!TARGET_64BIT && TARGET_SSE4_1 && reload_completed" [(set (match_dup 2) (vec_merge:V4SI (vec_duplicate:V4SI (match_dup 3)) (match_dup 2) (const_int 2)))] { operands[2] = lowpart_subreg (V4SImode, operands[0], DImode); operands[3] = gen_highpart (SImode, operands[1]); emit_move_insn (gen_lowpart (SImode, operands[0]), gen_lowpart (SImode, operands[1])); }) ;; movabsq $0x0012345678000000, %rax is longer ;; than movl $0x12345678, %eax; shlq $24, %rax. (define_peephole2 [(set (match_operand:DI 0 "register_operand") (match_operand:DI 1 "const_int_operand"))] "TARGET_64BIT && optimize_insn_for_size_p () && LEGACY_INT_REG_P (operands[0]) && !x86_64_immediate_operand (operands[1], DImode) && !x86_64_zext_immediate_operand (operands[1], DImode) && !((UINTVAL (operands[1]) >> ctz_hwi (UINTVAL (operands[1]))) & ~HOST_WIDE_INT_C (0xffffffff)) && peep2_regno_dead_p (0, FLAGS_REG)" [(set (match_dup 0) (match_dup 1)) (parallel [(set (match_dup 0) (ashift:DI (match_dup 0) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] { int shift = ctz_hwi (UINTVAL (operands[1])); rtx op1 = gen_int_mode (UINTVAL (operands[1]) >> shift, DImode); if (ix86_endbr_immediate_operand (op1, VOIDmode)) FAIL; operands[1] = op1; operands[2] = gen_int_mode (shift, QImode); }) (define_insn "*movsi_internal" [(set (match_operand:SI 0 "nonimmediate_operand" "=r,m ,*y,*y,?*y,?m,?r,?*y,?Yv,?v,?v,m ,?r,?v,*k,*k ,*rm,*k") (match_operand:SI 1 "general_operand" "g ,re,C ,*y,Bk ,*y,*y,r ,C ,?v,Bk,?v,?v,r ,*r,*kBk,*k ,CBC"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && ix86_hardreg_mov_ok (operands[0], operands[1])" { switch (get_attr_type (insn)) { case TYPE_SSELOG1: return standard_sse_constant_opcode (insn, operands); case TYPE_MSKMOV: return "kmovd\t{%1, %0|%0, %1}"; case TYPE_MSKLOG: if (operands[1] == const0_rtx) return "kxord\t%0, %0, %0"; else if (operands[1] == constm1_rtx) return "kxnord\t%0, %0, %0"; gcc_unreachable (); case TYPE_SSEMOV: return ix86_output_ssemov (insn, operands); case TYPE_MMX: return "pxor\t%0, %0"; case TYPE_MMXMOV: switch (get_attr_mode (insn)) { case MODE_DI: return "movq\t{%1, %0|%0, %1}"; case MODE_SI: return "movd\t{%1, %0|%0, %1}"; default: gcc_unreachable (); } case TYPE_LEA: return "lea{l}\t{%E1, %0|%0, %E1}"; case TYPE_IMOV: gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1])); if (ix86_use_lea_for_mov (insn, operands)) return "lea{l}\t{%E1, %0|%0, %E1}"; else return "mov{l}\t{%1, %0|%0, %1}"; default: gcc_unreachable (); } } [(set (attr "isa") (cond [(eq_attr "alternative" "12,13") (const_string "sse2") ] (const_string "*"))) (set (attr "type") (cond [(eq_attr "alternative" "2") (const_string "mmx") (eq_attr "alternative" "3,4,5,6,7") (const_string "mmxmov") (eq_attr "alternative" "8") (const_string "sselog1") (eq_attr "alternative" "9,10,11,12,13") (const_string "ssemov") (eq_attr "alternative" "14,15,16") (const_string "mskmov") (eq_attr "alternative" "17") (const_string "msklog") (and (match_operand 0 "register_operand") (match_operand 1 "pic_32bit_operand")) (const_string "lea") ] (const_string "imov"))) (set (attr "prefix") (if_then_else (eq_attr "type" "sselog1,ssemov") (const_string "maybe_vex") (const_string "orig"))) (set (attr "prefix_data16") (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI")) (const_string "1") (const_string "*"))) (set (attr "mode") (cond [(eq_attr "alternative" "2,3") (const_string "DI") (eq_attr "alternative" "8") (cond [(match_test "TARGET_AVX") (const_string "TI") (ior (not (match_test "TARGET_SSE2")) (match_test "optimize_function_for_size_p (cfun)")) (const_string "V4SF") ] (const_string "TI")) (eq_attr "alternative" "9") (cond [(match_test "TARGET_AVX512VL") (const_string "TI") (match_test "TARGET_AVX512F") (const_string "SF") (match_test "TARGET_AVX") (const_string "TI") (ior (not (match_test "TARGET_SSE2")) (match_test "optimize_function_for_size_p (cfun)")) (const_string "V4SF") ] (const_string "TI")) (and (eq_attr "alternative" "10,11") (not (match_test "TARGET_SSE2"))) (const_string "SF") ] (const_string "SI"))) (set (attr "preferred_for_speed") (cond [(eq_attr "alternative" "6,12") (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC") (eq_attr "alternative" "7,13") (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC") ] (symbol_ref "true")))]) ;; With -Oz, transform mov $imm,reg to the shorter push $imm; pop reg. (define_peephole2 [(set (match_operand:SWI248 0 "general_reg_operand") (match_operand:SWI248 1 "const_int_operand"))] "optimize_insn_for_size_p () && optimize_size > 1 && operands[1] != const0_rtx && IN_RANGE (INTVAL (operands[1]), -128, 127) && !ix86_red_zone_used && REGNO (operands[0]) != SP_REG" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 3))] { if (GET_MODE (operands[0]) != word_mode) operands[0] = gen_rtx_REG (word_mode, REGNO (operands[0])); operands[2] = gen_rtx_MEM (word_mode, gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx)); operands[3] = gen_rtx_MEM (word_mode, gen_rtx_POST_INC (Pmode, stack_pointer_rtx)); }) ;; With -Oz, transform mov $0,mem to the shorter and $0,mem. ;; Likewise, transform mov $-1,mem to the shorter or $-1,mem. (define_peephole2 [(set (match_operand:SWI248 0 "memory_operand") (match_operand:SWI248 1 "const_int_operand"))] "(operands[1] == const0_rtx || operands[1] == constm1_rtx) && optimize_insn_for_size_p () && optimize_size > 1 && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (match_dup 1)) (clobber (reg:CC FLAGS_REG))])]) (define_insn "*movhi_internal" [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m ,*k,*k ,r ,m ,*k ,?r,?*v,*Yv,*v,*v,jm,m") (match_operand:HI 1 "general_operand" "r ,n,m,rn,r ,*km,*k,*k,CBC,*v,r ,C ,*v,m ,*x,*v"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && ix86_hardreg_mov_ok (operands[0], operands[1])" { switch (get_attr_type (insn)) { case TYPE_IMOVX: /* movzwl is faster than movw on p2 due to partial word stalls, though not as fast as an aligned movl. */ return "movz{wl|x}\t{%1, %k0|%k0, %1}"; case TYPE_MSKMOV: switch (which_alternative) { case 4: return "kmovw\t{%k1, %0|%0, %k1}"; case 6: return "kmovw\t{%1, %k0|%k0, %1}"; case 5: case 7: return "kmovw\t{%1, %0|%0, %1}"; default: gcc_unreachable (); } case TYPE_SSEMOV: return ix86_output_ssemov (insn, operands); case TYPE_SSELOG1: if (satisfies_constraint_C (operands[1])) return standard_sse_constant_opcode (insn, operands); if (SSE_REG_P (operands[0])) return "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}"; else return "%vpextrw\t{$0, %1, %0|%0, %1, 0}"; case TYPE_MSKLOG: if (operands[1] == const0_rtx) return "kxorw\t%0, %0, %0"; else if (operands[1] == constm1_rtx) return "kxnorw\t%0, %0, %0"; gcc_unreachable (); default: if (get_attr_mode (insn) == MODE_SI) return "mov{l}\t{%k1, %k0|%k0, %k1}"; else return "mov{w}\t{%1, %0|%0, %1}"; } } [(set (attr "isa") (cond [(eq_attr "alternative" "9,10,11,12,13") (const_string "sse2") (eq_attr "alternative" "14") (const_string "sse4_noavx") (eq_attr "alternative" "15") (const_string "avx") ] (const_string "*"))) (set (attr "addr") (if_then_else (eq_attr "alternative" "14") (const_string "gpr16") (const_string "*"))) (set (attr "type") (cond [(eq_attr "alternative" "4,5,6,7") (const_string "mskmov") (eq_attr "alternative" "8") (const_string "msklog") (eq_attr "alternative" "13,14,15") (if_then_else (match_test "TARGET_AVX512FP16") (const_string "ssemov") (const_string "sselog1")) (eq_attr "alternative" "11") (const_string "sselog1") (eq_attr "alternative" "9,10,12") (const_string "ssemov") (match_test "optimize_function_for_size_p (cfun)") (const_string "imov") (and (eq_attr "alternative" "0") (ior (not (match_test "TARGET_PARTIAL_REG_STALL")) (not (match_test "TARGET_HIMODE_MATH")))) (const_string "imov") (and (eq_attr "alternative" "1,2") (match_operand:HI 1 "aligned_operand")) (const_string "imov") (and (match_test "TARGET_MOVX") (eq_attr "alternative" "0,2")) (const_string "imovx") ] (const_string "imov"))) (set (attr "prefix") (cond [(eq_attr "alternative" "4,5,6,7,8") (const_string "vex") (eq_attr "alternative" "9,10,11,12,13,14,15") (const_string "maybe_evex") ] (const_string "orig"))) (set (attr "mode") (cond [(eq_attr "alternative" "9,10") (if_then_else (match_test "TARGET_AVX512FP16") (const_string "HI") (const_string "SI")) (eq_attr "alternative" "13,14,15") (if_then_else (match_test "TARGET_AVX512FP16") (const_string "HI") (const_string "TI")) (eq_attr "alternative" "11") (cond [(match_test "TARGET_AVX") (const_string "TI") (ior (not (match_test "TARGET_SSE2")) (match_test "optimize_function_for_size_p (cfun)")) (const_string "V4SF") ] (const_string "TI")) (eq_attr "alternative" "12") (cond [(match_test "TARGET_AVX512VL") (const_string "TI") (match_test "TARGET_AVX512FP16") (const_string "HF") (match_test "TARGET_AVX512F") (const_string "SF") (match_test "TARGET_AVX") (const_string "TI") (ior (not (match_test "TARGET_SSE2")) (match_test "optimize_function_for_size_p (cfun)")) (const_string "V4SF") ] (const_string "TI")) (eq_attr "type" "imovx") (const_string "SI") (and (eq_attr "alternative" "1,2") (match_operand:HI 1 "aligned_operand")) (const_string "SI") (and (eq_attr "alternative" "0") (ior (not (match_test "TARGET_PARTIAL_REG_STALL")) (not (match_test "TARGET_HIMODE_MATH")))) (const_string "SI") ] (const_string "HI"))) (set (attr "preferred_for_speed") (cond [(eq_attr "alternative" "9") (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC") (eq_attr "alternative" "10") (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC") ] (symbol_ref "true")))]) ;; Situation is quite tricky about when to choose full sized (SImode) move ;; over QImode moves. For Q_REG -> Q_REG move we use full size only for ;; partial register dependency machines (such as AMD Athlon), where QImode ;; moves issue extra dependency and for partial register stalls machines ;; that don't use QImode patterns (and QImode move cause stall on the next ;; instruction). ;; ;; For loads of Q_REG to NONQ_REG we use full sized moves except for partial ;; register stall machines with, where we use QImode instructions, since ;; partial register stall can be caused there. Then we use movzx. (define_insn "*movqi_internal" [(set (match_operand:QI 0 "nonimmediate_operand" "=Q,R,r,q,q,r,r ,?r,m ,*k,*k,*r,*m,*k,*k,*k") (match_operand:QI 1 "general_operand" "Q ,R,r,n,m,q,rn, m,qn,*r,*k,*k,*k,*m,C,BC"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && ix86_hardreg_mov_ok (operands[0], operands[1])" { char buf[128]; const char *ops; const char *suffix; switch (get_attr_type (insn)) { case TYPE_IMOVX: gcc_assert (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1])); return "movz{bl|x}\t{%1, %k0|%k0, %1}"; case TYPE_MSKMOV: switch (which_alternative) { case 9: ops = "kmov%s\t{%%k1, %%0|%%0, %%k1}"; break; case 11: ops = "kmov%s\t{%%1, %%k0|%%k0, %%1}"; break; case 12: case 13: gcc_assert (TARGET_AVX512DQ); /* FALLTHRU */ case 10: ops = "kmov%s\t{%%1, %%0|%%0, %%1}"; break; default: gcc_unreachable (); } suffix = (get_attr_mode (insn) == MODE_HI) ? "w" : "b"; snprintf (buf, sizeof (buf), ops, suffix); output_asm_insn (buf, operands); return ""; case TYPE_MSKLOG: if (operands[1] == const0_rtx) { if (get_attr_mode (insn) == MODE_HI) return "kxorw\t%0, %0, %0"; else return "kxorb\t%0, %0, %0"; } else if (operands[1] == constm1_rtx) { gcc_assert (TARGET_AVX512DQ); return "kxnorb\t%0, %0, %0"; } gcc_unreachable (); default: if (get_attr_mode (insn) == MODE_SI) return "mov{l}\t{%k1, %k0|%k0, %k1}"; else return "mov{b}\t{%1, %0|%0, %1}"; } } [(set (attr "isa") (cond [(eq_attr "alternative" "1,2") (const_string "x64") (eq_attr "alternative" "12,13,15") (const_string "avx512dq") ] (const_string "*"))) (set (attr "type") (cond [(eq_attr "alternative" "9,10,11,12,13") (const_string "mskmov") (eq_attr "alternative" "14,15") (const_string "msklog") (and (eq_attr "alternative" "7") (not (match_operand:QI 1 "aligned_operand"))) (const_string "imovx") (match_test "optimize_function_for_size_p (cfun)") (const_string "imov") (and (eq_attr "alternative" "5") (ior (not (match_test "TARGET_PARTIAL_REG_STALL")) (not (match_test "TARGET_QIMODE_MATH")))) (const_string "imov") (eq_attr "alternative" "5,7") (const_string "imovx") (and (match_test "TARGET_MOVX") (eq_attr "alternative" "4")) (const_string "imovx") ] (const_string "imov"))) (set (attr "prefix") (if_then_else (eq_attr "alternative" "9,10,11,12,13,14,15") (const_string "vex") (const_string "orig"))) (set (attr "mode") (cond [(eq_attr "alternative" "5,6,7") (const_string "SI") (eq_attr "alternative" "8") (const_string "QI") (and (eq_attr "alternative" "9,10,11,14") (not (match_test "TARGET_AVX512DQ"))) (const_string "HI") (eq_attr "type" "imovx") (const_string "SI") ;; For -Os, 8-bit immediates are always shorter than 32-bit ;; ones. (and (eq_attr "type" "imov") (and (eq_attr "alternative" "3") (match_test "optimize_function_for_size_p (cfun)"))) (const_string "QI") ;; For -Os, movl where one or both operands are NON_Q_REGS ;; and both are LEGACY_REGS is shorter than movb. ;; Otherwise movb and movl sizes are the same, so decide purely ;; based on speed factors. (and (eq_attr "type" "imov") (and (eq_attr "alternative" "1") (match_test "optimize_function_for_size_p (cfun)"))) (const_string "SI") (and (eq_attr "type" "imov") (and (eq_attr "alternative" "0,1,2,3") (and (match_test "TARGET_PARTIAL_REG_DEPENDENCY") (not (match_test "TARGET_PARTIAL_REG_STALL"))))) (const_string "SI") ;; Avoid partial register stalls when not using QImode arithmetic (and (eq_attr "type" "imov") (and (eq_attr "alternative" "0,1,2,3") (and (match_test "TARGET_PARTIAL_REG_STALL") (not (match_test "TARGET_QIMODE_MATH"))))) (const_string "SI") ] (const_string "QI")))]) /* Reload dislikes loading 0/-1 directly into mask registers. Try to tidy things up here. */ (define_peephole2 [(set (match_operand:SWI 0 "general_reg_operand") (match_operand:SWI 1 "immediate_operand")) (set (match_operand:SWI 2 "mask_reg_operand") (match_dup 0))] "peep2_reg_dead_p (2, operands[0]) && (const0_operand (operands[1], mode) || (constm1_operand (operands[1], mode) && ( > 1 || TARGET_AVX512DQ)))" [(set (match_dup 2) (match_dup 1))]) ;; Stores and loads of ax to arbitrary constant address. ;; We fake an second form of instruction to force reload to load address ;; into register when rax is not available (define_insn "*movabs_1" [(set (mem:SWI1248x (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) (match_operand:SWI1248x 1 "nonmemory_operand" "a,r"))] "TARGET_LP64 && ix86_check_movabs (insn, 0)" { /* Recover the full memory rtx. */ operands[0] = SET_DEST (PATTERN (insn)); switch (which_alternative) { case 0: return "movabs{}\t{%1, %P0| PTR [%P0], %1}"; case 1: return "mov{}\t{%1, %0|%0, %1}"; default: gcc_unreachable (); } } [(set_attr "type" "imov") (set_attr "modrm" "0,*") (set_attr "length_address" "8,0") (set_attr "length_immediate" "0,*") (set_attr "memory" "store") (set_attr "mode" "")]) (define_insn "*movabs_2" [(set (match_operand:SWI1248x 0 "register_operand" "=a,r") (mem:SWI1248x (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] "TARGET_LP64 && ix86_check_movabs (insn, 1)" { /* Recover the full memory rtx. */ operands[1] = SET_SRC (PATTERN (insn)); switch (which_alternative) { case 0: return "movabs{}\t{%P1, %0|%0, PTR [%P1]}"; case 1: return "mov{}\t{%1, %0|%0, %1}"; default: gcc_unreachable (); } } [(set_attr "type" "imov") (set_attr "modrm" "0,*") (set_attr "length_address" "8,0") (set_attr "length_immediate" "0") (set_attr "memory" "load") (set_attr "mode" "")]) (define_insn "swap" [(set (match_operand:SWI48 0 "register_operand" "+r") (match_operand:SWI48 1 "register_operand" "+r")) (set (match_dup 1) (match_dup 0))] "" "xchg{}\t%1, %0" [(set_attr "type" "imov") (set_attr "mode" "") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") (set_attr "amdfam10_decode" "double") (set_attr "bdver1_decode" "double")]) (define_insn "*swap" [(set (match_operand:SWI12 0 "register_operand" "+,r") (match_operand:SWI12 1 "register_operand" "+,r")) (set (match_dup 1) (match_dup 0))] "" "@ xchg{}\t%1, %0 xchg{l}\t%k1, %k0" [(set_attr "type" "imov") (set_attr "mode" ",SI") (set (attr "preferred_for_size") (cond [(eq_attr "alternative" "0") (symbol_ref "false")] (symbol_ref "true"))) ;; Potential partial reg stall on alternative 1. (set (attr "preferred_for_speed") (cond [(eq_attr "alternative" "1") (symbol_ref "!TARGET_PARTIAL_REG_STALL")] (symbol_ref "true"))) (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") (set_attr "amdfam10_decode" "double") (set_attr "bdver1_decode" "double")]) (define_peephole2 [(set (match_operand:SWI 0 "general_reg_operand") (match_operand:SWI 1 "general_reg_operand")) (set (match_dup 1) (match_operand:SWI 2 "general_reg_operand")) (set (match_dup 2) (match_dup 0))] "peep2_reg_dead_p (3, operands[0]) && optimize_insn_for_size_p ()" [(parallel [(set (match_dup 1) (match_dup 2)) (set (match_dup 2) (match_dup 1))])]) ;; Convert xchg with a REG_UNUSED note to a mov (variant #1). (define_peephole2 [(parallel [(set (match_operand:SWI 0 "general_reg_operand") (match_operand:SWI 1 "general_reg_operand")) (set (match_dup 1) (match_dup 0))])] "((REGNO (operands[0]) != AX_REG && REGNO (operands[1]) != AX_REG) || optimize_size < 2 || !optimize_insn_for_size_p ()) && peep2_reg_dead_p (1, operands[0])" [(set (match_dup 1) (match_dup 0))]) ;; Convert xchg with a REG_UNUSED note to a mov (variant #2). (define_peephole2 [(parallel [(set (match_operand:SWI 0 "general_reg_operand") (match_operand:SWI 1 "general_reg_operand")) (set (match_dup 1) (match_dup 0))])] "((REGNO (operands[0]) != AX_REG && REGNO (operands[1]) != AX_REG) || optimize_size < 2 || !optimize_insn_for_size_p ()) && peep2_reg_dead_p (1, operands[1])" [(set (match_dup 0) (match_dup 1))]) ;; Convert moves to/from AX_REG into xchg with -Oz. (define_peephole2 [(set (match_operand:SWI48 0 "general_reg_operand") (match_operand:SWI48 1 "general_reg_operand"))] "optimize_size > 1 && ((REGNO (operands[0]) == AX_REG) != (REGNO (operands[1]) == AX_REG)) && optimize_insn_for_size_p () && peep2_reg_dead_p (1, operands[1])" [(parallel [(set (match_dup 0) (match_dup 1)) (set (match_dup 1) (match_dup 0))])]) (define_expand "movstrict" [(set (strict_low_part (match_operand:SWI12 0 "register_operand")) (match_operand:SWI12 1 "general_operand"))] "" { gcc_assert (SUBREG_P (operands[0])); if ((TARGET_PARTIAL_REG_STALL && optimize_function_for_speed_p (cfun)) || !VALID_INT_MODE_P (GET_MODE (SUBREG_REG (operands[0])))) FAIL; }) (define_insn "*movstrict_1" [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+")) (match_operand:SWI12 1 "general_operand" "mn"))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "mov{}\t{%1, %0|%0, %1}" [(set_attr "type" "imov") (set_attr "mode" "")]) (define_insn "*movstrict_xor" [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+")) (match_operand:SWI12 1 "const0_operand")) (clobber (reg:CC FLAGS_REG))] "reload_completed" "xor{}\t%0, %0" [(set_attr "type" "alu1") (set_attr "mode" "") (set_attr "length_immediate" "0")]) (define_insn "*movstrictqi_ext_1" [(set (strict_low_part (match_operand:QI 0 "register_operand" "+Q")) (subreg:QI (match_operator:SWI248 2 "extract_operator" [(match_operand 1 "int248_register_operand" "Q") (const_int 8) (const_int 8)]) 0))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "mov{b}\t{%h1, %0|%0, %h1}" [(set_attr "type" "imov") (set_attr "mode" "QI")]) (define_expand "extv" [(set (match_operand:SWI24 0 "register_operand") (sign_extract:SWI24 (match_operand:SWI24 1 "register_operand") (match_operand:QI 2 "const_int_operand") (match_operand:QI 3 "const_int_operand")))] "" { /* Handle extractions from %ah et al. */ if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8) FAIL; unsigned int regno = reg_or_subregno (operands[1]); /* Be careful to expand only with registers having upper parts. */ if (regno <= LAST_VIRTUAL_REGISTER && !QI_REGNO_P (regno)) operands[1] = copy_to_reg (operands[1]); }) (define_insn "*extv" [(set (match_operand:SWI24 0 "register_operand" "=R") (sign_extract:SWI24 (match_operand 1 "int248_register_operand" "Q") (const_int 8) (const_int 8)))] "" "movs{bl|x}\t{%h1, %k0|%k0, %h1}" [(set_attr "type" "imovx") (set_attr "mode" "SI")]) ;; Split sign-extension of single least significant bit as and x,$1;neg x (define_insn_and_split "*extv_1_0" [(set (match_operand:SWI48 0 "register_operand" "=r") (sign_extract:SWI48 (match_operand:SWI48 1 "register_operand" "0") (const_int 1) (const_int 0))) (clobber (reg:CC FLAGS_REG))] "" "#" "" [(parallel [(set (match_dup 0) (and:SWI48 (match_dup 1) (const_int 1))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 0) (neg:SWI48 (match_dup 0))) (clobber (reg:CC FLAGS_REG))])]) (define_expand "extzv" [(set (match_operand:SWI248 0 "register_operand") (zero_extract:SWI248 (match_operand:SWI248 1 "register_operand") (match_operand:QI 2 "const_int_operand") (match_operand:QI 3 "const_int_operand")))] "" { if (ix86_expand_pextr (operands)) DONE; /* Handle extractions from %ah et al. */ if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8) FAIL; unsigned int regno = reg_or_subregno (operands[1]); /* Be careful to expand only with registers having upper parts. */ if (regno <= LAST_VIRTUAL_REGISTER && !QI_REGNO_P (regno)) operands[1] = copy_to_reg (operands[1]); }) (define_insn "*extzv" [(set (match_operand:SWI248 0 "register_operand" "=R") (zero_extract:SWI248 (match_operand 1 "int248_register_operand" "Q") (const_int 8) (const_int 8)))] "" "movz{bl|x}\t{%h1, %k0|%k0, %h1}" [(set_attr "type" "imovx") (set_attr "mode" "SI")]) (define_insn "*extzvqi" [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn,?R") (subreg:QI (match_operator:SWI248 2 "extract_operator" [(match_operand 1 "int248_register_operand" "Q,Q") (const_int 8) (const_int 8)]) 0))] "" { switch (get_attr_type (insn)) { case TYPE_IMOVX: return "movz{bl|x}\t{%h1, %k0|%k0, %h1}"; default: return "mov{b}\t{%h1, %0|%0, %h1}"; } } [(set_attr "addr" "gpr8,*") (set (attr "type") (if_then_else (and (match_operand:QI 0 "register_operand") (ior (not (match_operand:QI 0 "QIreg_operand")) (match_test "TARGET_MOVX"))) (const_string "imovx") (const_string "imov"))) (set (attr "mode") (if_then_else (eq_attr "type" "imovx") (const_string "SI") (const_string "QI")))]) (define_expand "insv" [(set (zero_extract:SWI248 (match_operand:SWI248 0 "register_operand") (match_operand:QI 1 "const_int_operand") (match_operand:QI 2 "const_int_operand")) (match_operand:SWI248 3 "register_operand"))] "" { rtx dst; if (ix86_expand_pinsr (operands)) DONE; /* Handle insertions to %ah et al. */ if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) != 8) FAIL; unsigned int regno = reg_or_subregno (operands[0]); /* Be careful to expand only with registers having upper parts. */ if (regno <= LAST_VIRTUAL_REGISTER && !QI_REGNO_P (regno)) dst = copy_to_reg (operands[0]); else dst = operands[0]; emit_insn (gen_insv_1 (mode, dst, operands[3])); /* Fix up the destination if needed. */ if (dst != operands[0]) emit_move_insn (operands[0], dst); DONE; }) (define_insn "@insv_1" [(set (zero_extract:SWI248 (match_operand 0 "int248_register_operand" "+Q") (const_int 8) (const_int 8)) (match_operand:SWI248 1 "general_operand" "QnBn"))] "" { if (CONST_INT_P (operands[1])) operands[1] = gen_int_mode (INTVAL (operands[1]), QImode); return "mov{b}\t{%b1, %h0|%h0, %b1}"; } [(set_attr "addr" "gpr8") (set_attr "type" "imov") (set_attr "mode" "QI")]) (define_insn "*insvqi_1" [(set (zero_extract:SWI248 (match_operand 0 "int248_register_operand" "+Q") (const_int 8) (const_int 8)) (subreg:SWI248 (match_operand:QI 1 "general_operand" "QnBn") 0))] "" "mov{b}\t{%1, %h0|%h0, %1}" [(set_attr "addr" "gpr8") (set_attr "type" "imov") (set_attr "mode" "QI")]) ;; Eliminate redundant insv, e.g. xorl %eax,%eax; movb $0, %ah (define_peephole2 [(parallel [(set (match_operand:SWI48 0 "general_reg_operand") (const_int 0)) (clobber (reg:CC FLAGS_REG))]) (set (zero_extract:SWI248 (match_operand 1 "int248_register_operand") (const_int 8) (const_int 8)) (const_int 0))] "REGNO (operands[0]) == REGNO (operands[1])" [(parallel [(set (match_operand:SWI48 0 "general_reg_operand") (const_int 0)) (clobber (reg:CC FLAGS_REG))])]) ;; Combine movl followed by movb. (define_peephole2 [(set (match_operand:SWI48 0 "general_reg_operand") (match_operand:SWI48 1 "const_int_operand")) (set (zero_extract:SWI248 (match_operand 2 "int248_register_operand") (const_int 8) (const_int 8)) (match_operand:SWI248 3 "const_int_operand"))] "REGNO (operands[0]) == REGNO (operands[2])" [(set (match_operand:SWI48 0 "general_reg_operand") (match_dup 4))] { HOST_WIDE_INT tmp = INTVAL (operands[1]) & ~HOST_WIDE_INT_C (0xff00); tmp |= (INTVAL (operands[3]) & 0xff) << 8; operands[4] = gen_int_mode (tmp, mode); }) (define_insn "*insvqi_2" [(set (zero_extract:SWI248 (match_operand 0 "int248_register_operand" "+Q") (const_int 8) (const_int 8)) (match_operator:SWI248 2 "extract_operator" [(match_operand 1 "int248_register_operand" "Q") (const_int 8) (const_int 8)]))] "" "mov{b}\t{%h1, %h0|%h0, %h1}" [(set_attr "type" "imov") (set_attr "mode" "QI")]) (define_insn "*insvqi_3" [(set (zero_extract:SWI248 (match_operand 0 "int248_register_operand" "+Q") (const_int 8) (const_int 8)) (any_shiftrt:SWI248 (match_operand:SWI248 1 "register_operand" "Q") (const_int 8)))] "" "mov{b}\t{%h1, %h0|%h0, %h1}" [(set_attr "type" "imov") (set_attr "mode" "QI")]) (define_code_iterator any_or_plus [plus ior xor]) (define_insn_and_split "*insvti_highpart_1" [(set (match_operand:TI 0 "nonimmediate_operand" "=ro,r,r,&r") (any_or_plus:TI (and:TI (match_operand:TI 1 "nonimmediate_operand" "r,m,r,m") (match_operand:TI 3 "const_scalar_int_operand" "n,n,n,n")) (ashift:TI (zero_extend:TI (match_operand:DI 2 "nonimmediate_operand" "r,r,m,m")) (const_int 64))))] "TARGET_64BIT && CONST_WIDE_INT_P (operands[3]) && CONST_WIDE_INT_NUNITS (operands[3]) == 2 && CONST_WIDE_INT_ELT (operands[3], 0) == -1 && CONST_WIDE_INT_ELT (operands[3], 1) == 0" "#" "&& reload_completed" [(const_int 0)] { operands[4] = gen_lowpart (DImode, operands[1]); split_double_concat (TImode, operands[0], operands[4], operands[2]); DONE; }) (define_insn_and_split "*insvti_lowpart_1" [(set (match_operand:TI 0 "nonimmediate_operand" "=ro,r,r,&r") (any_or_plus:TI (and:TI (match_operand:TI 1 "nonimmediate_operand" "r,m,r,m") (match_operand:TI 3 "const_scalar_int_operand" "n,n,n,n")) (zero_extend:TI (match_operand:DI 2 "nonimmediate_operand" "r,r,m,m"))))] "TARGET_64BIT && CONST_WIDE_INT_P (operands[3]) && CONST_WIDE_INT_NUNITS (operands[3]) == 2 && CONST_WIDE_INT_ELT (operands[3], 0) == 0 && CONST_WIDE_INT_ELT (operands[3], 1) == -1" "#" "&& reload_completed" [(const_int 0)] { operands[4] = gen_highpart (DImode, operands[1]); split_double_concat (TImode, operands[0], operands[2], operands[4]); DONE; }) (define_insn_and_split "*insvdi_lowpart_1" [(set (match_operand:DI 0 "nonimmediate_operand" "=ro,r,r,&r") (any_or_plus:DI (and:DI (match_operand:DI 1 "nonimmediate_operand" "r,m,r,m") (match_operand:DI 3 "const_int_operand" "n,n,n,n")) (zero_extend:DI (match_operand:SI 2 "nonimmediate_operand" "r,r,m,m"))))] "!TARGET_64BIT && CONST_INT_P (operands[3]) && UINTVAL (operands[3]) == 0xffffffff00000000ll" "#" "&& reload_completed" [(const_int 0)] { operands[4] = gen_highpart (SImode, operands[1]); split_double_concat (DImode, operands[0], operands[2], operands[4]); DONE; }) ;; Floating point push instructions. (define_insn "*pushtf" [(set (match_operand:TF 0 "push_operand" "=<,<") (match_operand:TF 1 "general_no_elim_operand" "v,*roC"))] "TARGET_64BIT || TARGET_SSE" { /* This insn should be already split before reg-stack. */ return "#"; } [(set_attr "isa" "*,x64") (set_attr "type" "multi") (set_attr "unit" "sse,*") (set_attr "mode" "TF,DI")]) ;; %%% Kill this when call knows how to work this out. (define_split [(set (match_operand:TF 0 "push_operand") (match_operand:TF 1 "sse_reg_operand"))] "TARGET_SSE && reload_completed" [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -16))) (set (match_dup 0) (match_dup 1))] { /* Preserve memory attributes. */ operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx); }) (define_insn "*pushxf" [(set (match_operand:XF 0 "push_operand" "=<,<,<,<,<") (match_operand:XF 1 "general_no_elim_operand" "f,r,*r,oF,oC"))] "" { /* This insn should be already split before reg-stack. */ return "#"; } [(set_attr "isa" "*,*,*,nox64,x64") (set_attr "type" "multi") (set_attr "unit" "i387,*,*,*,*") (set (attr "mode") (cond [(eq_attr "alternative" "1,2,3,4") (if_then_else (match_test "TARGET_64BIT") (const_string "DI") (const_string "SI")) ] (const_string "XF"))) (set (attr "preferred_for_size") (cond [(eq_attr "alternative" "1") (symbol_ref "false")] (symbol_ref "true")))]) ;; %%% Kill this when call knows how to work this out. (define_split [(set (match_operand:XF 0 "push_operand") (match_operand:XF 1 "fp_register_operand"))] "reload_completed" [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) (set (match_dup 0) (match_dup 1))] { operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (XFmode))); /* Preserve memory attributes. */ operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx); }) (define_insn "*pushdf" [(set (match_operand:DF 0 "push_operand" "=<,<,<,<,<,<") (match_operand:DF 1 "general_no_elim_operand" "f,r,*r,oF,rmC,v"))] "" { /* This insn should be already split before reg-stack. */ return "#"; } [(set_attr "isa" "*,nox64,nox64,nox64,x64,sse2") (set_attr "type" "multi") (set_attr "unit" "i387,*,*,*,*,sse") (set_attr "mode" "DF,SI,SI,SI,DI,DF") (set (attr "preferred_for_size") (cond [(eq_attr "alternative" "1") (symbol_ref "false")] (symbol_ref "true"))) (set (attr "preferred_for_speed") (cond [(eq_attr "alternative" "1") (symbol_ref "TARGET_INTEGER_DFMODE_MOVES")] (symbol_ref "true")))]) ;; %%% Kill this when call knows how to work this out. (define_split [(set (match_operand:DF 0 "push_operand") (match_operand:DF 1 "any_fp_register_operand"))] "reload_completed" [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8))) (set (match_dup 0) (match_dup 1))] { /* Preserve memory attributes. */ operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx); }) (define_mode_iterator HFBF [HF BF]) (define_insn "*push_rex64" [(set (match_operand:HFBF 0 "push_operand" "=X,X") (match_operand:HFBF 1 "nonmemory_no_elim_operand" "r,x"))] "TARGET_64BIT" { /* Anything else should be already split before reg-stack. */ gcc_assert (which_alternative == 0); return "push{q}\t%q1"; } [(set_attr "isa" "*,sse4") (set_attr "type" "push,multi") (set_attr "mode" "DI,TI")]) (define_insn "*push" [(set (match_operand:HFBF 0 "push_operand" "=X,X") (match_operand:HFBF 1 "general_no_elim_operand" "rmF,x"))] "!TARGET_64BIT" { /* Anything else should be already split before reg-stack. */ gcc_assert (which_alternative == 0); return "push{l}\t%k1"; } [(set_attr "isa" "*,sse4") (set_attr "type" "push,multi") (set_attr "mode" "SI,TI")]) (define_insn "push2_di" [(set (match_operand:TI 0 "push_operand" "=<") (unspec:TI [(match_operand:DI 1 "register_operand" "r") (match_operand:DI 2 "register_operand" "r")] UNSPEC_APXPUSH2))] "TARGET_APX_PUSH2POP2" "push2\t{%2, %1|%1, %2}" [(set_attr "mode" "TI") (set_attr "type" "multi") (set_attr "prefix" "evex")]) (define_insn "pop2_di" [(parallel [(set (match_operand:DI 0 "register_operand" "=r") (unspec:DI [(match_operand:TI 1 "pop_operand" ">")] UNSPEC_APXPOP2_LOW)) (set (match_operand:DI 2 "register_operand" "=r") (unspec:DI [(const_int 0)] UNSPEC_APXPOP2_HIGH))])] "TARGET_APX_PUSH2POP2" "pop2\t{%2, %0|%0, %2}" [(set_attr "mode" "TI") (set_attr "prefix" "evex")]) (define_insn "pushp_di" [(set (match_operand:DI 0 "push_operand" "=<") (match_operand:DI 1 "register_operand" "r")) (unspec:DI [(const_int 0)] UNSPEC_APX_PPX)] "TARGET_64BIT" "pushp\t%1" [(set_attr "mode" "DI")]) (define_insn "popp_di" [(set (match_operand:DI 0 "register_operand" "=r") (match_operand:DI 1 "pop_operand" ">")) (unspec:DI [(const_int 0)] UNSPEC_APX_PPX)] "TARGET_APX_PPX" "popp\t%0" [(set_attr "mode" "DI")]) (define_insn "push2p_di" [(set (match_operand:TI 0 "push_operand" "=<") (unspec:TI [(match_operand:DI 1 "register_operand" "r") (match_operand:DI 2 "register_operand" "r")] UNSPEC_APXPUSH2)) (unspec:DI [(const_int 0)] UNSPEC_APX_PPX)] "TARGET_APX_PUSH2POP2 && TARGET_APX_PPX" "push2p\t{%2, %1|%1, %2}" [(set_attr "mode" "TI") (set_attr "type" "multi") (set_attr "prefix" "evex")]) (define_insn "pop2p_di" [(parallel [(set (match_operand:DI 0 "register_operand" "=r") (unspec:DI [(match_operand:TI 1 "pop_operand" ">")] UNSPEC_APXPOP2_LOW)) (set (match_operand:DI 2 "register_operand" "=r") (unspec:DI [(const_int 0)] UNSPEC_APXPOP2_HIGH)) (unspec:DI [(const_int 0)] UNSPEC_APX_PPX)])] "TARGET_APX_PUSH2POP2 && TARGET_APX_PPX" "pop2p\t{%2, %0|%0, %2}" [(set_attr "mode" "TI") (set_attr "prefix" "evex")]) (define_insn "*pushsf_rex64" [(set (match_operand:SF 0 "push_operand" "=X,X,X") (match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,v"))] "TARGET_64BIT" { /* Anything else should be already split before reg-stack. */ if (which_alternative != 1) return "#"; return "push{q}\t%q1"; } [(set_attr "type" "multi,push,multi") (set_attr "unit" "i387,*,*") (set_attr "mode" "SF,DI,SF")]) (define_insn "*pushsf" [(set (match_operand:SF 0 "push_operand" "=<,<,<") (match_operand:SF 1 "general_no_elim_operand" "f,rmF,v"))] "!TARGET_64BIT" { /* Anything else should be already split before reg-stack. */ if (which_alternative != 1) return "#"; return "push{l}\t%1"; } [(set_attr "type" "multi,push,multi") (set_attr "unit" "i387,*,*") (set_attr "mode" "SF,SI,SF")]) (define_mode_iterator MODESH [SF HF BF]) ;; %%% Kill this when call knows how to work this out. (define_split [(set (match_operand:MODESH 0 "push_operand") (match_operand:MODESH 1 "any_fp_register_operand"))] "reload_completed" [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) (set (match_dup 0) (match_dup 1))] { rtx op = XEXP (operands[0], 0); if (GET_CODE (op) == PRE_DEC) { gcc_assert (!TARGET_64BIT); op = GEN_INT (-4); } else { op = XEXP (XEXP (op, 1), 1); gcc_assert (CONST_INT_P (op)); } operands[2] = op; /* Preserve memory attributes. */ operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx); }) (define_split [(set (match_operand:SF 0 "push_operand") (match_operand:SF 1 "memory_operand"))] "reload_completed && find_constant_src (insn)" [(set (match_dup 0) (match_dup 2))] "operands[2] = find_constant_src (curr_insn);") (define_split [(set (match_operand 0 "push_operand") (match_operand 1 "general_gr_operand"))] "reload_completed && (GET_MODE (operands[0]) == TFmode || GET_MODE (operands[0]) == XFmode || GET_MODE (operands[0]) == DFmode)" [(const_int 0)] "ix86_split_long_move (operands); DONE;") ;; Floating point move instructions. (define_expand "movtf" [(set (match_operand:TF 0 "nonimmediate_operand") (match_operand:TF 1 "nonimmediate_operand"))] "TARGET_64BIT || TARGET_SSE" "ix86_expand_move (TFmode, operands); DONE;") (define_expand "mov" [(set (match_operand:X87MODEFH 0 "nonimmediate_operand") (match_operand:X87MODEFH 1 "general_operand"))] "" "ix86_expand_move (mode, operands); DONE;") (define_insn "*movtf_internal" [(set (match_operand:TF 0 "nonimmediate_operand" "=v,v ,m,?*r ,!o") (match_operand:TF 1 "general_operand" "C ,vm,v,*roF,*rC"))] "(TARGET_64BIT || TARGET_SSE) && !(MEM_P (operands[0]) && MEM_P (operands[1])) && (lra_in_progress || reload_completed || !CONST_DOUBLE_P (operands[1]) || (standard_sse_constant_p (operands[1], TFmode) == 1 && !memory_operand (operands[0], TFmode)) || (!TARGET_MEMORY_MISMATCH_STALL && memory_operand (operands[0], TFmode)))" { switch (get_attr_type (insn)) { case TYPE_SSELOG1: return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: return ix86_output_ssemov (insn, operands); case TYPE_MULTI: return "#"; default: gcc_unreachable (); } } [(set_attr "isa" "*,*,*,x64,x64") (set_attr "type" "sselog1,ssemov,ssemov,multi,multi") (set (attr "prefix") (if_then_else (eq_attr "type" "sselog1,ssemov") (const_string "maybe_vex") (const_string "orig"))) (set (attr "mode") (cond [(eq_attr "alternative" "3,4") (const_string "DI") (match_test "TARGET_AVX") (const_string "TI") (ior (not (match_test "TARGET_SSE2")) (match_test "optimize_function_for_size_p (cfun)")) (const_string "V4SF") (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") (const_string "V4SF") (and (eq_attr "alternative" "2") (match_test "TARGET_SSE_TYPELESS_STORES")) (const_string "V4SF") ] (const_string "TI")))]) (define_split [(set (match_operand:TF 0 "nonimmediate_gr_operand") (match_operand:TF 1 "general_gr_operand"))] "reload_completed" [(const_int 0)] "ix86_split_long_move (operands); DONE;") ;; Possible store forwarding (partial memory) stall ;; in alternatives 4, 6, 7 and 8. (define_insn "*movxf_internal" [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,?r ,!o,?*r ,!o,!o,!o,r ,o ,o") (match_operand:XF 1 "general_operand" "fm,f,G,roF,r ,*roF,*r,F ,C ,roF,rF,rC"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && (lra_in_progress || reload_completed || !CONST_DOUBLE_P (operands[1]) || ((optimize_function_for_size_p (cfun) || (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)) && standard_80387_constant_p (operands[1]) > 0 && !memory_operand (operands[0], XFmode)) || (!TARGET_MEMORY_MISMATCH_STALL && memory_operand (operands[0], XFmode)) || !TARGET_HARD_XF_REGS)" { switch (get_attr_type (insn)) { case TYPE_FMOV: if (which_alternative == 2) return standard_80387_constant_opcode (operands[1]); return output_387_reg_move (insn, operands); case TYPE_MULTI: return "#"; default: gcc_unreachable (); } } [(set (attr "isa") (cond [(eq_attr "alternative" "7,10") (const_string "nox64") (eq_attr "alternative" "8,11") (const_string "x64") ] (const_string "*"))) (set (attr "type") (cond [(eq_attr "alternative" "3,4,5,6,7,8,9,10,11") (const_string "multi") ] (const_string "fmov"))) (set (attr "mode") (cond [(eq_attr "alternative" "3,4,5,6,7,8,9,10,11") (if_then_else (match_test "TARGET_64BIT") (const_string "DI") (const_string "SI")) ] (const_string "XF"))) (set (attr "preferred_for_size") (cond [(eq_attr "alternative" "3,4") (symbol_ref "false")] (symbol_ref "true"))) (set (attr "enabled") (cond [(eq_attr "alternative" "9,10,11") (if_then_else (match_test "TARGET_HARD_XF_REGS") (symbol_ref "false") (const_string "*")) (not (match_test "TARGET_HARD_XF_REGS")) (symbol_ref "false") ] (const_string "*")))]) (define_split [(set (match_operand:XF 0 "nonimmediate_gr_operand") (match_operand:XF 1 "general_gr_operand"))] "reload_completed" [(const_int 0)] "ix86_split_long_move (operands); DONE;") ;; Possible store forwarding (partial memory) stall in alternatives 4, 6 and 7. (define_insn "*movdf_internal" [(set (match_operand:DF 0 "nonimmediate_operand" "=Yf*f,m ,Yf*f,?r ,!o,?*r ,!o,!o,?r,?m,?r,?r,Yv,v,v,m,*x,*x,*x,m ,?r,?v,r ,o ,r ,m") (match_operand:DF 1 "general_operand" "Yf*fm,Yf*f,G ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C ,v,m,v,C ,*x,m ,*x, v, r,roF,rF,rmF,rC"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && (lra_in_progress || reload_completed || !CONST_DOUBLE_P (operands[1]) || ((optimize_function_for_size_p (cfun) || (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)) && IS_STACK_MODE (DFmode) && standard_80387_constant_p (operands[1]) > 0 && !memory_operand (operands[0], DFmode)) || (TARGET_SSE2 && TARGET_SSE_MATH && standard_sse_constant_p (operands[1], DFmode) == 1 && !memory_operand (operands[0], DFmode)) || ((TARGET_64BIT || !TARGET_MEMORY_MISMATCH_STALL) && memory_operand (operands[0], DFmode)) || !TARGET_HARD_DF_REGS)" { switch (get_attr_type (insn)) { case TYPE_FMOV: if (which_alternative == 2) return standard_80387_constant_opcode (operands[1]); return output_387_reg_move (insn, operands); case TYPE_MULTI: return "#"; case TYPE_IMOV: if (get_attr_mode (insn) == MODE_SI) return "mov{l}\t{%1, %k0|%k0, %1}"; else if (which_alternative == 11) return "movabs{q}\t{%1, %0|%0, %1}"; else return "mov{q}\t{%1, %0|%0, %1}"; case TYPE_SSELOG1: return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: return ix86_output_ssemov (insn, operands); default: gcc_unreachable (); } } [(set (attr "isa") (cond [(eq_attr "alternative" "3,4,5,6,7,22,23") (const_string "nox64") (eq_attr "alternative" "8,9,10,11,24,25") (const_string "x64") (eq_attr "alternative" "12,13,14,15") (const_string "sse2") (eq_attr "alternative" "20,21") (const_string "x64_sse2") ] (const_string "*"))) (set (attr "type") (cond [(eq_attr "alternative" "0,1,2") (const_string "fmov") (eq_attr "alternative" "3,4,5,6,7,22,23") (const_string "multi") (eq_attr "alternative" "8,9,10,11,24,25") (const_string "imov") (eq_attr "alternative" "12,16") (const_string "sselog1") ] (const_string "ssemov"))) (set (attr "modrm") (if_then_else (eq_attr "alternative" "11") (const_string "0") (const_string "*"))) (set (attr "length_immediate") (if_then_else (eq_attr "alternative" "11") (const_string "8") (const_string "*"))) (set (attr "prefix") (if_then_else (eq_attr "type" "sselog1,ssemov") (const_string "maybe_vex") (const_string "orig"))) (set (attr "prefix_data16") (if_then_else (ior (and (eq_attr "type" "ssemov") (eq_attr "mode" "DI")) (eq_attr "mode" "V1DF")) (const_string "1") (const_string "*"))) (set (attr "mode") (cond [(eq_attr "alternative" "3,4,5,6,7,10,22,23") (const_string "SI") (eq_attr "alternative" "8,9,11,20,21,24,25") (const_string "DI") /* xorps is one byte shorter for non-AVX targets. */ (eq_attr "alternative" "12,16") (cond [(match_test "TARGET_AVX") (const_string "V2DF") (ior (not (match_test "TARGET_SSE2")) (match_test "optimize_function_for_size_p (cfun)")) (const_string "V4SF") (match_test "TARGET_SSE_LOAD0_BY_PXOR") (const_string "TI") ] (const_string "V2DF")) /* For architectures resolving dependencies on whole SSE registers use movapd to break dependency chains, otherwise use short move to avoid extra work. */ /* movaps is one byte shorter for non-AVX targets. */ (eq_attr "alternative" "13,17") (cond [(match_test "TARGET_AVX512VL") (const_string "V2DF") (match_test "TARGET_AVX512F") (const_string "DF") (match_test "TARGET_AVX") (const_string "V2DF") (ior (not (match_test "TARGET_SSE2")) (match_test "optimize_function_for_size_p (cfun)")) (const_string "V4SF") (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") (const_string "V4SF") (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY") (const_string "V2DF") ] (const_string "DF")) /* For architectures resolving dependencies on register parts we may avoid extra work to zero out upper part of register. */ (eq_attr "alternative" "14,18") (cond [(not (match_test "TARGET_SSE2")) (const_string "V2SF") (match_test "TARGET_AVX") (const_string "DF") (match_test "TARGET_SSE_SPLIT_REGS") (const_string "V1DF") ] (const_string "DF")) (and (eq_attr "alternative" "15,19") (not (match_test "TARGET_SSE2"))) (const_string "V2SF") ] (const_string "DF"))) (set (attr "preferred_for_size") (cond [(eq_attr "alternative" "3,4") (symbol_ref "false")] (symbol_ref "true"))) (set (attr "preferred_for_speed") (cond [(eq_attr "alternative" "3,4") (symbol_ref "TARGET_INTEGER_DFMODE_MOVES") (eq_attr "alternative" "20") (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC") (eq_attr "alternative" "21") (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC") ] (symbol_ref "true"))) (set (attr "enabled") (cond [(eq_attr "alternative" "22,23,24,25") (if_then_else (match_test "TARGET_HARD_DF_REGS") (symbol_ref "false") (const_string "*")) (not (match_test "TARGET_HARD_DF_REGS")) (symbol_ref "false") ] (const_string "*")))]) (define_split [(set (match_operand:DF 0 "nonimmediate_gr_operand") (match_operand:DF 1 "general_gr_operand"))] "!TARGET_64BIT && reload_completed" [(const_int 0)] "ix86_split_long_move (operands); DONE;") (define_insn "*movsf_internal" [(set (match_operand:SF 0 "nonimmediate_operand" "=Yf*f,m ,Yf*f,?r ,?m,Yv,v,v,m,?r,?v,!*y,!*y,!m,!r,!*y,r ,m") (match_operand:SF 1 "general_operand" "Yf*fm,Yf*f,G ,rmF,rF,C ,v,m,v,v ,r ,*y ,m ,*y,*y,r ,rmF,rF"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && (lra_in_progress || reload_completed || !CONST_DOUBLE_P (operands[1]) || ((optimize_function_for_size_p (cfun) || (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)) && IS_STACK_MODE (SFmode) && standard_80387_constant_p (operands[1]) > 0) || (TARGET_SSE && TARGET_SSE_MATH && standard_sse_constant_p (operands[1], SFmode) == 1) || memory_operand (operands[0], SFmode) || !TARGET_HARD_SF_REGS)" { switch (get_attr_type (insn)) { case TYPE_FMOV: if (which_alternative == 2) return standard_80387_constant_opcode (operands[1]); return output_387_reg_move (insn, operands); case TYPE_IMOV: return "mov{l}\t{%1, %0|%0, %1}"; case TYPE_SSELOG1: return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: return ix86_output_ssemov (insn, operands); case TYPE_MMXMOV: switch (get_attr_mode (insn)) { case MODE_DI: return "movq\t{%1, %0|%0, %1}"; case MODE_SI: return "movd\t{%1, %0|%0, %1}"; default: gcc_unreachable (); } default: gcc_unreachable (); } } [(set (attr "isa") (cond [(eq_attr "alternative" "9,10") (const_string "sse2") ] (const_string "*"))) (set (attr "type") (cond [(eq_attr "alternative" "0,1,2") (const_string "fmov") (eq_attr "alternative" "3,4,16,17") (const_string "imov") (eq_attr "alternative" "5") (const_string "sselog1") (eq_attr "alternative" "11,12,13,14,15") (const_string "mmxmov") ] (const_string "ssemov"))) (set (attr "prefix") (if_then_else (eq_attr "type" "sselog1,ssemov") (const_string "maybe_vex") (const_string "orig"))) (set (attr "prefix_data16") (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI")) (const_string "1") (const_string "*"))) (set (attr "mode") (cond [(eq_attr "alternative" "3,4,9,10,12,13,14,15,16,17") (const_string "SI") (eq_attr "alternative" "11") (const_string "DI") (eq_attr "alternative" "5") (cond [(and (match_test "TARGET_AVX512F && TARGET_EVEX512") (not (match_test "TARGET_PREFER_AVX256"))) (const_string "V16SF") (match_test "TARGET_AVX") (const_string "V4SF") (ior (not (match_test "TARGET_SSE2")) (match_test "optimize_function_for_size_p (cfun)")) (const_string "V4SF") (match_test "TARGET_SSE_LOAD0_BY_PXOR") (const_string "TI") ] (const_string "V4SF")) /* For architectures resolving dependencies on whole SSE registers use APS move to break dependency chains, otherwise use short move to avoid extra work. Do the same for architectures resolving dependencies on the parts. While in DF mode it is better to always handle just register parts, the SF mode is different due to lack of instructions to load just part of the register. It is better to maintain the whole registers in single format to avoid problems on using packed logical operations. */ (eq_attr "alternative" "6") (cond [(match_test "TARGET_AVX512VL") (const_string "V4SF") (match_test "TARGET_AVX512F") (const_string "SF") (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY") (match_test "TARGET_SSE_SPLIT_REGS")) (const_string "V4SF") ] (const_string "SF")) ] (const_string "SF"))) (set (attr "preferred_for_speed") (cond [(eq_attr "alternative" "9,14") (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC") (eq_attr "alternative" "10,15") (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC") ] (symbol_ref "true"))) (set (attr "enabled") (cond [(eq_attr "alternative" "16,17") (if_then_else (match_test "TARGET_HARD_SF_REGS") (symbol_ref "false") (const_string "*")) (not (match_test "TARGET_HARD_SF_REGS")) (symbol_ref "false") ] (const_string "*")))]) (define_mode_attr hfbfconstf [(HF "F") (BF "")]) (define_insn "*mov_internal" [(set (match_operand:HFBF 0 "nonimmediate_operand" "=?r,?r,?r,?m ,Yv,v,?r,jm,m,?v,v") (match_operand:HFBF 1 "general_operand" "r ,F ,m ,r,C ,v, v,v ,v,r ,m"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && (lra_in_progress || reload_completed || !CONST_DOUBLE_P (operands[1]) || (TARGET_SSE2 && standard_sse_constant_p (operands[1], mode) == 1) || memory_operand (operands[0], mode))" { switch (get_attr_type (insn)) { case TYPE_IMOVX: /* movzwl is faster than movw on p2 due to partial word stalls, though not as fast as an aligned movl. */ return "movz{wl|x}\t{%1, %k0|%k0, %1}"; case TYPE_SSEMOV: return ix86_output_ssemov (insn, operands); case TYPE_SSELOG1: if (satisfies_constraint_C (operands[1])) return standard_sse_constant_opcode (insn, operands); if (SSE_REG_P (operands[0])) return "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}"; else return "%vpextrw\t{$0, %1, %0|%0, %1, 0}"; default: if (get_attr_mode (insn) == MODE_SI) return "mov{l}\t{%k1, %k0|%k0, %k1}"; else return "mov{w}\t{%1, %0|%0, %1}"; } } [(set (attr "isa") (cond [(eq_attr "alternative" "4,5,6,9,10") (const_string "sse2") (eq_attr "alternative" "7") (const_string "sse4_noavx") (eq_attr "alternative" "8") (const_string "avx") ] (const_string "*"))) (set (attr "addr") (if_then_else (eq_attr "alternative" "7") (const_string "gpr16") (const_string "*"))) (set (attr "type") (cond [(eq_attr "alternative" "4") (const_string "sselog1") (eq_attr "alternative" "5,6,9") (const_string "ssemov") (eq_attr "alternative" "7,8,10") (if_then_else (match_test ("TARGET_AVX512FP16")) (const_string "ssemov") (const_string "sselog1")) (match_test "optimize_function_for_size_p (cfun)") (const_string "imov") (and (eq_attr "alternative" "0") (ior (not (match_test "TARGET_PARTIAL_REG_STALL")) (not (match_test "TARGET_HIMODE_MATH")))) (const_string "imov") (and (eq_attr "alternative" "1,2") (match_operand:HI 1 "aligned_operand")) (const_string "imov") (and (match_test "TARGET_MOVX") (eq_attr "alternative" "0,2")) (const_string "imovx") ] (const_string "imov"))) (set (attr "prefix") (cond [(eq_attr "alternative" "4,5,6,7,8,9,10") (const_string "maybe_vex") ] (const_string "orig"))) (set (attr "mode") (cond [(eq_attr "alternative" "4") (const_string "V4SF") (eq_attr "alternative" "6,9") (if_then_else (match_test "TARGET_AVX512FP16") (const_string "HI") (const_string "SI")) (eq_attr "alternative" "7,8,10") (if_then_else (match_test "TARGET_AVX512FP16") (const_string "HI") (const_string "TI")) (eq_attr "alternative" "5") (cond [(match_test "TARGET_AVX512VL") (const_string "V4SF") (match_test "TARGET_AVX512FP16") (const_string "HF") (match_test "TARGET_AVX512F") (const_string "SF") (match_test "TARGET_AVX") (const_string "V4SF") (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY") (match_test "TARGET_SSE_SPLIT_REGS")) (const_string "V4SF") ] (const_string "SF")) (eq_attr "type" "imovx") (const_string "SI") (and (eq_attr "alternative" "1,2") (match_operand:HI 1 "aligned_operand")) (const_string "SI") (and (eq_attr "alternative" "0") (ior (not (match_test "TARGET_PARTIAL_REG_STALL")) (not (match_test "TARGET_HIMODE_MATH")))) (const_string "SI") ] (const_string "HI"))) (set (attr "enabled") (cond [(and (match_test "mode == BFmode") (eq_attr "alternative" "1")) (symbol_ref "false") ] (const_string "*")))]) (define_split [(set (match_operand 0 "any_fp_register_operand") (match_operand 1 "memory_operand"))] "reload_completed && (GET_MODE (operands[0]) == TFmode || GET_MODE (operands[0]) == XFmode || GET_MODE (operands[0]) == DFmode || GET_MODE (operands[0]) == SFmode) && ix86_standard_x87sse_constant_load_p (insn, operands[0])" [(set (match_dup 0) (match_dup 2))] "operands[2] = find_constant_src (curr_insn);") (define_split [(set (match_operand 0 "any_fp_register_operand") (float_extend (match_operand 1 "memory_operand")))] "reload_completed && (GET_MODE (operands[0]) == TFmode || GET_MODE (operands[0]) == XFmode || GET_MODE (operands[0]) == DFmode) && ix86_standard_x87sse_constant_load_p (insn, operands[0])" [(set (match_dup 0) (match_dup 2))] "operands[2] = find_constant_src (curr_insn);") ;; Split the load of -0.0 or -1.0 into fldz;fchs or fld1;fchs sequence (define_split [(set (match_operand:X87MODEF 0 "fp_register_operand") (match_operand:X87MODEF 1 "immediate_operand"))] "reload_completed && (standard_80387_constant_p (operands[1]) == 8 || standard_80387_constant_p (operands[1]) == 9)" [(set (match_dup 0)(match_dup 1)) (set (match_dup 0) (neg:X87MODEF (match_dup 0)))] { if (real_isnegzero (CONST_DOUBLE_REAL_VALUE (operands[1]))) operands[1] = CONST0_RTX (mode); else operands[1] = CONST1_RTX (mode); }) (define_insn "*swapxf" [(set (match_operand:XF 0 "register_operand" "+f") (match_operand:XF 1 "register_operand" "+f")) (set (match_dup 1) (match_dup 0))] "TARGET_80387" { if (STACK_TOP_P (operands[0])) return "fxch\t%1"; else return "fxch\t%0"; } [(set_attr "type" "fxch") (set_attr "mode" "XF")]) ;; Zero extension instructions (define_insn_and_split "zero_extendditi2" [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o") (zero_extend:TI (match_operand:DI 1 "nonimmediate_operand" "rm,r")))] "TARGET_64BIT" "#" "&& reload_completed" [(set (match_dup 3) (match_dup 1)) (set (match_dup 4) (const_int 0))] "split_double_mode (TImode, &operands[0], 1, &operands[3], &operands[4]);") (define_expand "zero_extendsidi2" [(set (match_operand:DI 0 "nonimmediate_operand") (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))]) (define_insn "*zero_extendsidi2" [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?o,r ,o,?*y,?!*y,$r,$v,$x,*x,*v,?r,?k") (zero_extend:DI (match_operand:SI 1 "x86_64_zext_operand" "0 ,rm,r ,rmWz,0,r ,m ,v ,r ,m ,*x,*v,?k,?km")))] "" { switch (get_attr_type (insn)) { case TYPE_IMOVX: if (ix86_use_lea_for_mov (insn, operands)) return "lea{l}\t{%E1, %k0|%k0, %E1}"; else return "mov{l}\t{%1, %k0|%k0, %1}"; case TYPE_MULTI: return "#"; case TYPE_MMXMOV: return "movd\t{%1, %0|%0, %1}"; case TYPE_SSEMOV: if (SSE_REG_P (operands[0]) && SSE_REG_P (operands[1])) { if (EXT_REX_SSE_REG_P (operands[0]) || EXT_REX_SSE_REG_P (operands[1])) return "vpmovzxdq\t{%t1, %g0|%g0, %t1}"; else return "%vpmovzxdq\t{%1, %0|%0, %1}"; } if (GENERAL_REG_P (operands[0])) return "%vmovd\t{%1, %k0|%k0, %1}"; return "%vmovd\t{%1, %0|%0, %1}"; case TYPE_MSKMOV: return "kmovd\t{%1, %k0|%k0, %1}"; default: gcc_unreachable (); } } [(set (attr "isa") (cond [(eq_attr "alternative" "0,1,2") (const_string "nox64") (eq_attr "alternative" "3") (const_string "x64") (eq_attr "alternative" "7,8,9") (const_string "sse2") (eq_attr "alternative" "10") (const_string "sse4") (eq_attr "alternative" "11") (const_string "avx512f") (eq_attr "alternative" "12") (const_string "x64_avx512bw") (eq_attr "alternative" "13") (const_string "avx512bw") ] (const_string "*"))) (set (attr "mmx_isa") (if_then_else (eq_attr "alternative" "5,6") (const_string "native") (const_string "*"))) (set (attr "type") (cond [(eq_attr "alternative" "0,1,2,4") (const_string "multi") (eq_attr "alternative" "5,6") (const_string "mmxmov") (eq_attr "alternative" "7") (if_then_else (match_test "TARGET_64BIT") (const_string "ssemov") (const_string "multi")) (eq_attr "alternative" "8,9,10,11") (const_string "ssemov") (eq_attr "alternative" "12,13") (const_string "mskmov") ] (const_string "imovx"))) (set (attr "prefix_extra") (if_then_else (eq_attr "alternative" "10,11") (const_string "1") (const_string "*"))) (set (attr "prefix") (if_then_else (eq_attr "type" "ssemov") (const_string "maybe_vex") (const_string "orig"))) (set (attr "prefix_0f") (if_then_else (eq_attr "type" "imovx") (const_string "0") (const_string "*"))) (set (attr "mode") (cond [(eq_attr "alternative" "5,6") (const_string "DI") (and (eq_attr "alternative" "7") (match_test "TARGET_64BIT")) (const_string "TI") (eq_attr "alternative" "8,10,11") (const_string "TI") ] (const_string "SI"))) (set (attr "preferred_for_speed") (cond [(eq_attr "alternative" "7") (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC") (eq_attr "alternative" "5,8") (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC") ] (symbol_ref "true")))]) (define_split [(set (match_operand:DI 0 "memory_operand") (zero_extend:DI (match_operand:SI 1 "memory_operand")))] "reload_completed" [(set (match_dup 4) (const_int 0))] "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);") (define_split [(set (match_operand:DI 0 "general_reg_operand") (zero_extend:DI (match_operand:SI 1 "general_reg_operand")))] "!TARGET_64BIT && reload_completed && REGNO (operands[0]) == REGNO (operands[1])" [(set (match_dup 4) (const_int 0))] "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);") (define_split [(set (match_operand:DI 0 "nonimmediate_gr_operand") (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))] "!TARGET_64BIT && reload_completed && !(MEM_P (operands[0]) && MEM_P (operands[1]))" [(set (match_dup 3) (match_dup 1)) (set (match_dup 4) (const_int 0))] "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);") (define_mode_attr kmov_isa [(QI "avx512dq") (HI "avx512f") (SI "avx512bw") (DI "avx512bw")]) (define_insn "zero_extenddi2" [(set (match_operand:DI 0 "register_operand" "=r,?r,?k") (zero_extend:DI (match_operand:SWI12 1 "nonimmediate_operand" "m,?k,?km")))] "TARGET_64BIT" "@ movz{l|x}\t{%1, %k0|%k0, %1} kmov\t{%1, %k0|%k0, %1} kmov\t{%1, %k0|%k0, %1}" [(set_attr "isa" "*,,") (set_attr "type" "imovx,mskmov,mskmov") (set_attr "mode" "SI,,")]) (define_expand "zero_extendsi2" [(set (match_operand:SI 0 "register_operand") (zero_extend:SI (match_operand:SWI12 1 "nonimmediate_operand")))] "" { if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)) { operands[1] = force_reg (mode, operands[1]); emit_insn (gen_zero_extendsi2_and (operands[0], operands[1])); DONE; } }) (define_insn_and_split "zero_extendsi2_and" [(set (match_operand:SI 0 "register_operand" "=r,?&") (zero_extend:SI (match_operand:SWI12 1 "nonimmediate_operand" "0,m"))) (clobber (reg:CC FLAGS_REG))] "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)" "#" "&& reload_completed" [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] { if (!REG_P (operands[1]) || REGNO (operands[0]) != REGNO (operands[1])) { ix86_expand_clear (operands[0]); gcc_assert (!TARGET_PARTIAL_REG_STALL); emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (mode, operands[0])), operands[1])); DONE; } operands[2] = GEN_INT (GET_MODE_MASK (mode)); } [(set_attr "type" "alu1") (set_attr "mode" "SI")]) (define_insn "*zero_extendsi2" [(set (match_operand:SI 0 "register_operand" "=r,?r,?k") (zero_extend:SI (match_operand:SWI12 1 "nonimmediate_operand" "m,?k,?km")))] "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))" "@ movz{l|x}\t{%1, %0|%0, %1} kmov\t{%1, %0|%0, %1} kmov\t{%1, %0|%0, %1}" [(set_attr "isa" "*,,") (set_attr "type" "imovx,mskmov,mskmov") (set_attr "mode" "SI,,")]) (define_expand "zero_extendqihi2" [(set (match_operand:HI 0 "register_operand") (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand")))] "" { if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)) { operands[1] = force_reg (QImode, operands[1]); emit_insn (gen_zero_extendqihi2_and (operands[0], operands[1])); DONE; } }) (define_insn_and_split "zero_extendqihi2_and" [(set (match_operand:HI 0 "register_operand" "=r,?&q") (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0,qm"))) (clobber (reg:CC FLAGS_REG))] "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)" "#" "&& reload_completed" [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 255))) (clobber (reg:CC FLAGS_REG))])] { if (!REG_P (operands[1]) || REGNO (operands[0]) != REGNO (operands[1])) { ix86_expand_clear (operands[0]); gcc_assert (!TARGET_PARTIAL_REG_STALL); emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (QImode, operands[0])), operands[1])); DONE; } operands[0] = gen_lowpart (SImode, operands[0]); } [(set_attr "type" "alu1") (set_attr "mode" "SI")]) ; zero extend to SImode to avoid partial register stalls (define_insn "*zero_extendqihi2" [(set (match_operand:HI 0 "register_operand" "=r,?r,?k") (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,?k,?km")))] "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))" "@ movz{bl|x}\t{%1, %k0|%k0, %1} kmovb\t{%1, %k0|%k0, %1} kmovb\t{%1, %0|%0, %1}" [(set_attr "isa" "*,avx512dq,avx512dq") (set_attr "type" "imovx,mskmov,mskmov") (set_attr "mode" "SI,QI,QI")]) ;; Transform xorl; mov[bw] (set strict_low_part) into movz[bw]l. (define_peephole2 [(parallel [(set (match_operand:SWI48 0 "general_reg_operand") (const_int 0)) (clobber (reg:CC FLAGS_REG))]) (set (strict_low_part (match_operand:SWI12 1 "general_reg_operand")) (match_operand:SWI12 2 "nonimmediate_operand"))] "REGNO (operands[0]) == REGNO (operands[1]) && (mode != SImode || !TARGET_ZERO_EXTEND_WITH_AND || !optimize_function_for_speed_p (cfun))" [(set (match_dup 0) (zero_extend:SWI48 (match_dup 2)))]) ;; Likewise, but preserving FLAGS_REG. (define_peephole2 [(set (match_operand:SWI48 0 "general_reg_operand") (const_int 0)) (set (strict_low_part (match_operand:SWI12 1 "general_reg_operand")) (match_operand:SWI12 2 "nonimmediate_operand"))] "REGNO (operands[0]) == REGNO (operands[1]) && (mode != SImode || !TARGET_ZERO_EXTEND_WITH_AND || !optimize_function_for_speed_p (cfun))" [(set (match_dup 0) (zero_extend:SWI48 (match_dup 2)))]) ;; Sign extension instructions (define_expand "extendsidi2" [(set (match_operand:DI 0 "register_operand") (sign_extend:DI (match_operand:SI 1 "register_operand")))] "" { if (!TARGET_64BIT) { emit_insn (gen_extendsidi2_1 (operands[0], operands[1])); DONE; } }) (define_insn "*extendsidi2_rex64" [(set (match_operand:DI 0 "register_operand" "=*a,r") (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "*0,rm")))] "TARGET_64BIT" "@ {cltq|cdqe} movs{lq|x}\t{%1, %0|%0, %1}" [(set_attr "type" "imovx") (set_attr "mode" "DI") (set_attr "prefix_0f" "0") (set_attr "modrm" "0,1")]) (define_insn "extendsidi2_1" [(set (match_operand:DI 0 "nonimmediate_operand" "=*A,r,?r,?*o") (sign_extend:DI (match_operand:SI 1 "register_operand" "0,0,r,r"))) (clobber (reg:CC FLAGS_REG)) (clobber (match_scratch:SI 2 "=X,X,X,&r"))] "!TARGET_64BIT" "#") (define_insn "extendditi2" [(set (match_operand:TI 0 "nonimmediate_operand" "=*A,r,?r,?*o") (sign_extend:TI (match_operand:DI 1 "register_operand" "0,0,r,r"))) (clobber (reg:CC FLAGS_REG)) (clobber (match_scratch:DI 2 "=X,X,X,&r"))] "TARGET_64BIT" "#") ;; Split the memory case. If the source register doesn't die, it will stay ;; this way, if it does die, following peephole2s take care of it. (define_split [(set (match_operand: 0 "memory_operand") (sign_extend: (match_operand:DWIH 1 "register_operand"))) (clobber (reg:CC FLAGS_REG)) (clobber (match_operand:DWIH 2 "register_operand"))] "reload_completed" [(const_int 0)] { rtx bits = GEN_INT ( * BITS_PER_UNIT - 1); split_double_mode (mode, &operands[0], 1, &operands[3], &operands[4]); emit_move_insn (operands[3], operands[1]); /* Generate a cltd if possible and doing so it profitable. */ if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) && REGNO (operands[1]) == AX_REG && REGNO (operands[2]) == DX_REG) { emit_insn (gen_ashr3_cvt (operands[2], operands[1], bits)); } else { emit_move_insn (operands[2], operands[1]); emit_insn (gen_ashr3_cvt (operands[2], operands[2], bits)); } emit_move_insn (operands[4], operands[2]); DONE; }) ;; Peepholes for the case where the source register does die, after ;; being split with the above splitter. (define_peephole2 [(set (match_operand:DWIH 0 "memory_operand") (match_operand:DWIH 1 "general_reg_operand")) (set (match_operand:DWIH 2 "general_reg_operand") (match_dup 1)) (parallel [(set (match_dup 2) (ashiftrt:DWIH (match_dup 2) (match_operand 4 "const_int_operand"))) (clobber (reg:CC FLAGS_REG))]) (set (match_operand:DWIH 3 "memory_operand") (match_dup 2))] "REGNO (operands[1]) != REGNO (operands[2]) && INTVAL (operands[4]) == ( * BITS_PER_UNIT - 1) && peep2_reg_dead_p (2, operands[1]) && peep2_reg_dead_p (4, operands[2]) && !reg_mentioned_p (operands[2], operands[3])" [(set (match_dup 0) (match_dup 1)) (parallel [(set (match_dup 1) (ashiftrt:DWIH (match_dup 1) (match_dup 4))) (clobber (reg:CC FLAGS_REG))]) (set (match_dup 3) (match_dup 1))]) (define_peephole2 [(set (match_operand:DWIH 0 "memory_operand") (match_operand:DWIH 1 "general_reg_operand")) (parallel [(set (match_operand:DWIH 2 "general_reg_operand") (ashiftrt:DWIH (match_dup 1) (match_operand 4 "const_int_operand"))) (clobber (reg:CC FLAGS_REG))]) (set (match_operand:DWIH 3 "memory_operand") (match_dup 2))] "/* cltd is shorter than sarl $31, %eax */ !optimize_function_for_size_p (cfun) && REGNO (operands[1]) == AX_REG && REGNO (operands[2]) == DX_REG && INTVAL (operands[4]) == ( * BITS_PER_UNIT - 1) && peep2_reg_dead_p (2, operands[1]) && peep2_reg_dead_p (3, operands[2]) && !reg_mentioned_p (operands[2], operands[3])" [(set (match_dup 0) (match_dup 1)) (parallel [(set (match_dup 1) (ashiftrt:DWIH (match_dup 1) (match_dup 4))) (clobber (reg:CC FLAGS_REG))]) (set (match_dup 3) (match_dup 1))]) ;; Extend to register case. Optimize case where source and destination ;; registers match and cases where we can use cltd. (define_split [(set (match_operand: 0 "register_operand") (sign_extend: (match_operand:DWIH 1 "register_operand"))) (clobber (reg:CC FLAGS_REG)) (clobber (match_scratch:DWIH 2))] "reload_completed" [(const_int 0)] { rtx bits = GEN_INT ( * BITS_PER_UNIT - 1); split_double_mode (mode, &operands[0], 1, &operands[3], &operands[4]); if (REGNO (operands[3]) != REGNO (operands[1])) emit_move_insn (operands[3], operands[1]); rtx src = operands[1]; if (REGNO (operands[3]) == AX_REG) src = operands[3]; /* Generate a cltd if possible and doing so it profitable. */ if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) && REGNO (src) == AX_REG && REGNO (operands[4]) == DX_REG) { emit_insn (gen_ashr3_cvt (operands[4], src, bits)); DONE; } if (REGNO (operands[4]) != REGNO (operands[1])) emit_move_insn (operands[4], operands[1]); emit_insn (gen_ashr3_cvt (operands[4], operands[4], bits)); DONE; }) (define_insn "extenddi2" [(set (match_operand:DI 0 "register_operand" "=r") (sign_extend:DI (match_operand:SWI12 1 "nonimmediate_operand" "m")))] "TARGET_64BIT" "movs{q|x}\t{%1, %0|%0, %1}" [(set_attr "type" "imovx") (set_attr "mode" "DI")]) (define_insn "extendhisi2" [(set (match_operand:SI 0 "register_operand" "=*a,r") (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "*0,rm")))] "" { switch (get_attr_prefix_0f (insn)) { case 0: return "{cwtl|cwde}"; default: return "movs{wl|x}\t{%1, %0|%0, %1}"; } } [(set_attr "type" "imovx") (set_attr "mode" "SI") (set (attr "prefix_0f") ;; movsx is short decodable while cwtl is vector decoded. (if_then_else (and (eq_attr "cpu" "!k6") (eq_attr "alternative" "0")) (const_string "0") (const_string "1"))) (set (attr "znver1_decode") (if_then_else (eq_attr "prefix_0f" "0") (const_string "double") (const_string "direct"))) (set (attr "modrm") (if_then_else (eq_attr "prefix_0f" "0") (const_string "0") (const_string "1")))]) (define_insn "*extendhisi2_zext" [(set (match_operand:DI 0 "register_operand" "=*a,r") (zero_extend:DI (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "*0,rm"))))] "TARGET_64BIT" { switch (get_attr_prefix_0f (insn)) { case 0: return "{cwtl|cwde}"; default: return "movs{wl|x}\t{%1, %k0|%k0, %1}"; } } [(set_attr "type" "imovx") (set_attr "mode" "SI") (set (attr "prefix_0f") ;; movsx is short decodable while cwtl is vector decoded. (if_then_else (and (eq_attr "cpu" "!k6") (eq_attr "alternative" "0")) (const_string "0") (const_string "1"))) (set (attr "modrm") (if_then_else (eq_attr "prefix_0f" "0") (const_string "0") (const_string "1")))]) (define_insn "extendqisi2" [(set (match_operand:SI 0 "register_operand" "=r") (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))] "" "movs{bl|x}\t{%1, %0|%0, %1}" [(set_attr "type" "imovx") (set_attr "mode" "SI")]) (define_insn "*extendqisi2_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm"))))] "TARGET_64BIT" "movs{bl|x}\t{%1, %k0|%k0, %1}" [(set_attr "type" "imovx") (set_attr "mode" "SI")]) (define_insn "extendqihi2" [(set (match_operand:HI 0 "register_operand" "=*a,r") (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "*0,qm")))] "" { switch (get_attr_prefix_0f (insn)) { case 0: return "{cbtw|cbw}"; default: return "movs{bw|x}\t{%1, %0|%0, %1}"; } } [(set_attr "type" "imovx") (set_attr "mode" "HI") (set (attr "prefix_0f") ;; movsx is short decodable while cwtl is vector decoded. (if_then_else (and (eq_attr "cpu" "!k6") (eq_attr "alternative" "0")) (const_string "0") (const_string "1"))) (set (attr "modrm") (if_then_else (eq_attr "prefix_0f" "0") (const_string "0") (const_string "1")))]) (define_insn "*extendqi_ext_1" [(set (match_operand:SWI24 0 "register_operand" "=R") (sign_extend:SWI24 (subreg:QI (match_operator:SWI248 2 "extract_operator" [(match_operand 1 "int248_register_operand" "Q") (const_int 8) (const_int 8)]) 0)))] "" "movs{b|x}\t{%h1, %0|%0, %h1}" [(set_attr "type" "imovx") (set_attr "mode" "")]) ;; Conversions between float and double. ;; These are all no-ops in the model used for the 80387. ;; So just emit moves. ;; %%% Kill these when call knows how to work out a DFmode push earlier. (define_split [(set (match_operand:DF 0 "push_operand") (float_extend:DF (match_operand:SF 1 "fp_register_operand")))] "reload_completed" [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8))) (set (mem:DF (reg:P SP_REG)) (float_extend:DF (match_dup 1)))]) (define_split [(set (match_operand:XF 0 "push_operand") (float_extend:XF (match_operand:MODEF 1 "fp_register_operand")))] "reload_completed" [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) (set (mem:XF (reg:P SP_REG)) (float_extend:XF (match_dup 1)))] "operands[2] = GEN_INT (-GET_MODE_SIZE (XFmode));") (define_expand "extendsfdf2" [(set (match_operand:DF 0 "nonimm_ssenomem_operand") (float_extend:DF (match_operand:SF 1 "general_operand")))] "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" { /* ??? Needed for compress_float_constant since all fp constants are TARGET_LEGITIMATE_CONSTANT_P. */ if (CONST_DOUBLE_P (operands[1])) { if ((!TARGET_SSE2 || TARGET_MIX_SSE_I387) && standard_80387_constant_p (operands[1]) > 0) { operands[1] = simplify_const_unary_operation (FLOAT_EXTEND, DFmode, operands[1], SFmode); emit_move_insn_1 (operands[0], operands[1]); DONE; } operands[1] = validize_mem (force_const_mem (SFmode, operands[1])); } }) (define_insn "*extendsfdf2" [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v,v") (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,f,v,m")))] "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" { switch (which_alternative) { case 0: case 1: return output_387_reg_move (insn, operands); case 2: return "%vcvtss2sd\t{%d1, %0|%0, %d1}"; case 3: return "%vcvtss2sd\t{%1, %d0|%d0, %1}"; default: gcc_unreachable (); } } [(set_attr "type" "fmov,fmov,ssecvt,ssecvt") (set_attr "avx_partial_xmm_update" "false,false,false,true") (set_attr "prefix" "orig,orig,maybe_vex,maybe_vex") (set_attr "mode" "SF,XF,DF,DF") (set (attr "enabled") (if_then_else (match_test ("TARGET_SSE2 && TARGET_SSE_MATH")) (if_then_else (eq_attr "alternative" "0,1") (symbol_ref "TARGET_MIX_SSE_I387") (symbol_ref "true")) (if_then_else (eq_attr "alternative" "0,1") (symbol_ref "true") (symbol_ref "false"))))]) /* For converting SF(xmm2) to DF(xmm1), use the following code instead of cvtss2sd: unpcklps xmm2,xmm2 ; packed conversion might crash on signaling NaNs cvtps2pd xmm2,xmm1 We do the conversion post reload to avoid producing of 128bit spills that might lead to ICE on 32bit target. The sequence unlikely combine anyway. */ (define_split [(set (match_operand:DF 0 "sse_reg_operand") (float_extend:DF (match_operand:SF 1 "nonimmediate_operand")))] "TARGET_USE_VECTOR_FP_CONVERTS && optimize_insn_for_speed_p () && reload_completed && (!EXT_REX_SSE_REG_P (operands[0]) || TARGET_AVX512VL)" [(set (match_dup 2) (float_extend:V2DF (vec_select:V2SF (match_dup 3) (parallel [(const_int 0) (const_int 1)]))))] { operands[2] = lowpart_subreg (V2DFmode, operands[0], DFmode); operands[3] = lowpart_subreg (V4SFmode, operands[0], DFmode); /* Use movss for loading from memory, unpcklps reg, reg for registers. Try to avoid move when unpacking can be done in source. */ if (REG_P (operands[1])) { /* If it is unsafe to overwrite upper half of source, we need to move to destination and unpack there. */ if (REGNO (operands[0]) != REGNO (operands[1]) || (EXT_REX_SSE_REG_P (operands[1]) && !TARGET_AVX512VL)) { rtx tmp = lowpart_subreg (SFmode, operands[0], DFmode); emit_move_insn (tmp, operands[1]); } else operands[3] = lowpart_subreg (V4SFmode, operands[1], SFmode); /* FIXME: vec_interleave_lowv4sf for AVX512VL should allow =v, v, then vbroadcastss will be only needed for AVX512F without AVX512VL. */ if (!EXT_REX_SSE_REGNO_P (REGNO (operands[3]))) emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3], operands[3])); else { rtx tmp = lowpart_subreg (V16SFmode, operands[3], V4SFmode); emit_insn (gen_avx512f_vec_dupv16sf_1 (tmp, tmp)); } } else emit_insn (gen_vec_setv4sf_0 (operands[3], CONST0_RTX (V4SFmode), operands[1])); }) ;; It's more profitable to split and then extend in the same register. (define_peephole2 [(set (match_operand:DF 0 "sse_reg_operand") (float_extend:DF (match_operand:SF 1 "memory_operand")))] "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS && optimize_insn_for_speed_p ()" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (float_extend:DF (match_dup 2)))] "operands[2] = lowpart_subreg (SFmode, operands[0], DFmode);") ;; Break partial SSE register dependency stall. This splitter should split ;; late in the pass sequence (after register rename pass), so allocated ;; registers won't change anymore (define_split [(set (match_operand:DF 0 "sse_reg_operand") (float_extend:DF (match_operand:SF 1 "nonimmediate_operand")))] "!TARGET_AVX && TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY && epilogue_completed && optimize_function_for_speed_p (cfun) && (!REG_P (operands[1]) || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1]))) && (!EXT_REX_SSE_REG_P (operands[0]) || TARGET_AVX512VL)" [(set (match_dup 0) (vec_merge:V2DF (vec_duplicate:V2DF (float_extend:DF (match_dup 1))) (match_dup 0) (const_int 1)))] { operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode); emit_move_insn (operands[0], CONST0_RTX (V2DFmode)); }) (define_expand "extendhfsf2" [(set (match_operand:SF 0 "register_operand") (float_extend:SF (match_operand:HF 1 "nonimmediate_operand")))] "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL" { if (!TARGET_AVX512FP16) { rtx res = gen_reg_rtx (V4SFmode); rtx tmp = gen_reg_rtx (V8HFmode); rtx zero = force_reg (V8HFmode, CONST0_RTX (V8HFmode)); emit_insn (gen_vec_setv8hf_0 (tmp, zero, operands[1])); emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp))); emit_move_insn (operands[0], gen_lowpart (SFmode, res)); DONE; } }) (define_expand "extendhfdf2" [(set (match_operand:DF 0 "register_operand") (float_extend:DF (match_operand:HF 1 "nonimmediate_operand")))] "TARGET_AVX512FP16") (define_insn "*extendhf2" [(set (match_operand:MODEF 0 "register_operand" "=v") (float_extend:MODEF (match_operand:HF 1 "nonimmediate_operand" "vm")))] "TARGET_AVX512FP16" "vcvtsh2\t{%1, %0, %0|%0, %0, %1}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") (set_attr "mode" "")]) (define_expand "extendbfsf2" [(set (match_operand:SF 0 "register_operand") (unspec:SF [(match_operand:BF 1 "register_operand")] UNSPEC_CVTBFSF))] "TARGET_SSE2 && !HONOR_NANS (BFmode)") ;; Don't use float_extend since psrlld doesn't raise ;; exceptions and turn a sNaN into a qNaN. (define_insn "extendbfsf2_1" [(set (match_operand:SF 0 "register_operand" "=x,Yv,v") (unspec:SF [(match_operand:BF 1 "register_operand" " 0,Yv,v")] UNSPEC_CVTBFSF))] "TARGET_SSE2" "@ pslld\t{$16, %0|%0, 16} vpslld\t{$16, %1, %0|%0, %1, 16} vpslld\t{$16, %g1, %g0|%g0, %g1, 16}" [(set_attr "isa" "noavx,avx,*") (set_attr "type" "sseishft1") (set_attr "length_immediate" "1") (set_attr "prefix_data16" "1,*,*") (set_attr "prefix" "orig,maybe_evex,evex") (set_attr "mode" "TI,TI,XI") (set_attr "memory" "none") (set (attr "enabled") (if_then_else (eq_attr "alternative" "2") (symbol_ref "TARGET_AVX512F && TARGET_EVEX512 && !TARGET_AVX512VL && !TARGET_PREFER_AVX256") (const_string "*")))]) (define_expand "extendxf2" [(set (match_operand:XF 0 "nonimmediate_operand") (float_extend:XF (match_operand:MODEF 1 "general_operand")))] "TARGET_80387" { /* ??? Needed for compress_float_constant since all fp constants are TARGET_LEGITIMATE_CONSTANT_P. */ if (CONST_DOUBLE_P (operands[1])) { if (standard_80387_constant_p (operands[1]) > 0) { operands[1] = simplify_const_unary_operation (FLOAT_EXTEND, XFmode, operands[1], mode); emit_move_insn_1 (operands[0], operands[1]); DONE; } operands[1] = validize_mem (force_const_mem (mode, operands[1])); } }) (define_insn "*extendxf2_i387" [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m") (float_extend:XF (match_operand:MODEF 1 "nonimmediate_operand" "fm,f")))] "TARGET_80387" "* return output_387_reg_move (insn, operands);" [(set_attr "type" "fmov") (set_attr "mode" ",XF")]) ;; %%% This seems like bad news. ;; This cannot output into an f-reg because there is no way to be sure ;; of truncating in that case. Otherwise this is just like a simple move ;; insn. So we pretend we can output to a reg in order to get better ;; register preferencing, but we really use a stack slot. ;; Conversion from DFmode to SFmode. (define_insn "truncdfsf2" [(set (match_operand:SF 0 "nonimm_ssenomem_operand" "=m,f,v,v") (float_truncate:SF (match_operand:DF 1 "register_ssemem_operand" "f,f,v,m")))] "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" { switch (which_alternative) { case 0: case 1: return output_387_reg_move (insn, operands); case 2: return "%vcvtsd2ss\t{%d1, %0|%0, %d1}"; case 3: return "%vcvtsd2ss\t{%1, %d0|%d0, %1}"; default: gcc_unreachable (); } } [(set_attr "type" "fmov,fmov,ssecvt,ssecvt") (set_attr "avx_partial_xmm_update" "false,false,false,true") (set_attr "mode" "SF") (set (attr "enabled") (if_then_else (match_test ("TARGET_SSE2 && TARGET_SSE_MATH")) (cond [(eq_attr "alternative" "0") (symbol_ref "TARGET_MIX_SSE_I387") (eq_attr "alternative" "1") (symbol_ref "TARGET_MIX_SSE_I387 && flag_unsafe_math_optimizations") ] (symbol_ref "true")) (cond [(eq_attr "alternative" "0") (symbol_ref "true") (eq_attr "alternative" "1") (symbol_ref "flag_unsafe_math_optimizations") ] (symbol_ref "false"))))]) /* For converting DF(xmm2) to SF(xmm1), use the following code instead of cvtsd2ss: unpcklpd xmm2,xmm2 ; packed conversion might crash on signaling NaNs cvtpd2ps xmm2,xmm1 We do the conversion post reload to avoid producing of 128bit spills that might lead to ICE on 32bit target. The sequence unlikely combine anyway. */ (define_split [(set (match_operand:SF 0 "sse_reg_operand") (float_truncate:SF (match_operand:DF 1 "nonimmediate_operand")))] "TARGET_USE_VECTOR_FP_CONVERTS && optimize_insn_for_speed_p () && reload_completed && (!EXT_REX_SSE_REG_P (operands[0]) || TARGET_AVX512VL)" [(set (match_dup 2) (vec_concat:V4SF (float_truncate:V2SF (match_dup 4)) (match_dup 3)))] { operands[2] = lowpart_subreg (V4SFmode, operands[0], SFmode); operands[3] = CONST0_RTX (V2SFmode); operands[4] = lowpart_subreg (V2DFmode, operands[0], SFmode); /* Use movsd for loading from memory, unpcklpd for registers. Try to avoid move when unpacking can be done in source, or SSE3 movddup is available. */ if (REG_P (operands[1])) { if ((!TARGET_SSE3 && REGNO (operands[0]) != REGNO (operands[1])) || (EXT_REX_SSE_REG_P (operands[1]) && !TARGET_AVX512VL)) { rtx tmp = lowpart_subreg (DFmode, operands[0], SFmode); emit_move_insn (tmp, operands[1]); operands[1] = tmp; } else if (!TARGET_SSE3) operands[4] = lowpart_subreg (V2DFmode, operands[1], DFmode); emit_insn (gen_vec_dupv2df (operands[4], operands[1])); } else emit_insn (gen_vec_concatv2df (operands[4], operands[1], CONST0_RTX (DFmode))); }) ;; It's more profitable to split and then truncate in the same register. (define_peephole2 [(set (match_operand:SF 0 "sse_reg_operand") (float_truncate:SF (match_operand:DF 1 "memory_operand")))] "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS && optimize_insn_for_speed_p ()" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (float_truncate:SF (match_dup 2)))] "operands[2] = lowpart_subreg (DFmode, operands[0], SFmode);") ;; Break partial SSE register dependency stall. This splitter should split ;; late in the pass sequence (after register rename pass), so allocated ;; registers won't change anymore (define_split [(set (match_operand:SF 0 "sse_reg_operand") (float_truncate:SF (match_operand:DF 1 "nonimmediate_operand")))] "!TARGET_AVX && TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY && epilogue_completed && optimize_function_for_speed_p (cfun) && (!REG_P (operands[1]) || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1]))) && (!EXT_REX_SSE_REG_P (operands[0]) || TARGET_AVX512VL)" [(set (match_dup 0) (vec_merge:V4SF (vec_duplicate:V4SF (float_truncate:SF (match_dup 1))) (match_dup 0) (const_int 1)))] { operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode); emit_move_insn (operands[0], CONST0_RTX (V4SFmode)); }) ;; Conversion from XFmode to {SF,DF}mode (define_insn "truncxf2" [(set (match_operand:MODEF 0 "nonimmediate_operand" "=m,f") (float_truncate:MODEF (match_operand:XF 1 "register_operand" "f,f")))] "TARGET_80387" "* return output_387_reg_move (insn, operands);" [(set_attr "type" "fmov") (set_attr "mode" "") (set (attr "enabled") (cond [(eq_attr "alternative" "1") (symbol_ref "flag_unsafe_math_optimizations") ] (symbol_ref "true")))]) ;; Conversion from {SF,DF}mode to HFmode. (define_expand "truncsfhf2" [(set (match_operand:HF 0 "register_operand") (float_truncate:HF (match_operand:SF 1 "nonimmediate_operand")))] "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL" { if (!TARGET_AVX512FP16) { rtx res = gen_reg_rtx (V8HFmode); rtx tmp = gen_reg_rtx (V4SFmode); rtx zero = force_reg (V4SFmode, CONST0_RTX (V4SFmode)); emit_insn (gen_vec_setv4sf_0 (tmp, zero, operands[1])); emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT (4))); emit_move_insn (operands[0], gen_lowpart (HFmode, res)); DONE; } }) (define_expand "truncdfhf2" [(set (match_operand:HF 0 "register_operand") (float_truncate:HF (match_operand:DF 1 "nonimmediate_operand")))] "TARGET_AVX512FP16") (define_insn "*trunchf2" [(set (match_operand:HF 0 "register_operand" "=v") (float_truncate:HF (match_operand:MODEF 1 "nonimmediate_operand" "vm")))] "TARGET_AVX512FP16" "vcvt2sh\t{%1, %d0|%d0, %1}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") (set_attr "mode" "HF")]) (define_insn "truncsfbf2" [(set (match_operand:BF 0 "register_operand" "=x, v") (float_truncate:BF (match_operand:SF 1 "register_operand" "x,v")))] "((TARGET_AVX512BF16 && TARGET_AVX512VL) || TARGET_AVXNECONVERT) && !HONOR_NANS (BFmode) && flag_unsafe_math_optimizations" "@ %{vex%} vcvtneps2bf16\t{%1, %0|%0, %1} vcvtneps2bf16\t{%1, %0|%0, %1}" [(set_attr "isa" "avxneconvert,avx512bf16vl") (set_attr "prefix" "vex,evex")]) ;; Signed conversion to DImode. (define_expand "fix_truncxfdi2" [(parallel [(set (match_operand:DI 0 "nonimmediate_operand") (fix:DI (match_operand:XF 1 "register_operand"))) (clobber (reg:CC FLAGS_REG))])] "TARGET_80387" { if (TARGET_FISTTP) { emit_insn (gen_fix_truncdi_i387_fisttp (operands[0], operands[1])); DONE; } }) (define_expand "fix_truncdi2" [(parallel [(set (match_operand:DI 0 "nonimmediate_operand") (fix:DI (match_operand:MODEF 1 "register_operand"))) (clobber (reg:CC FLAGS_REG))])] "TARGET_80387 || (TARGET_64BIT && SSE_FLOAT_MODE_P (mode))" { if (TARGET_FISTTP && !(TARGET_64BIT && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) { emit_insn (gen_fix_truncdi_i387_fisttp (operands[0], operands[1])); DONE; } if (TARGET_64BIT && SSE_FLOAT_MODE_P (mode)) { rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DImode); emit_insn (gen_fix_truncdi_sse (out, operands[1])); if (out != operands[0]) emit_move_insn (operands[0], out); DONE; } }) (define_insn "fix_trunchf2" [(set (match_operand:SWI48 0 "register_operand" "=r") (any_fix:SWI48 (match_operand:HF 1 "nonimmediate_operand" "vm")))] "TARGET_AVX512FP16" "vcvttsh2si\t{%1, %0|%0, %1}" [(set_attr "type" "sseicvt") (set_attr "prefix" "evex") (set_attr "mode" "")]) ;; Signed conversion to SImode. (define_expand "fix_truncxfsi2" [(parallel [(set (match_operand:SI 0 "nonimmediate_operand") (fix:SI (match_operand:XF 1 "register_operand"))) (clobber (reg:CC FLAGS_REG))])] "TARGET_80387" { if (TARGET_FISTTP) { emit_insn (gen_fix_truncsi_i387_fisttp (operands[0], operands[1])); DONE; } }) (define_expand "fix_truncsi2" [(parallel [(set (match_operand:SI 0 "nonimmediate_operand") (fix:SI (match_operand:MODEF 1 "register_operand"))) (clobber (reg:CC FLAGS_REG))])] "TARGET_80387 || SSE_FLOAT_MODE_P (mode)" { if (TARGET_FISTTP && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) { emit_insn (gen_fix_truncsi_i387_fisttp (operands[0], operands[1])); DONE; } if (SSE_FLOAT_MODE_P (mode)) { rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode); emit_insn (gen_fix_truncsi_sse (out, operands[1])); if (out != operands[0]) emit_move_insn (operands[0], out); DONE; } }) ;; Signed conversion to HImode. (define_expand "fix_trunchi2" [(parallel [(set (match_operand:HI 0 "nonimmediate_operand") (fix:HI (match_operand:X87MODEF 1 "register_operand"))) (clobber (reg:CC FLAGS_REG))])] "TARGET_80387 && !(SSE_FLOAT_MODE_P (mode) && (!TARGET_FISTTP || TARGET_SSE_MATH))" { if (TARGET_FISTTP) { emit_insn (gen_fix_trunchi_i387_fisttp (operands[0], operands[1])); DONE; } }) ;; Unsigned conversion to DImode (define_insn "fixuns_truncdi2" [(set (match_operand:DI 0 "register_operand" "=r") (unsigned_fix:DI (match_operand:MODEF 1 "nonimmediate_operand" "vm")))] "TARGET_64BIT && TARGET_AVX512F && TARGET_SSE_MATH" "vcvtt2usi\t{%1, %0|%0, %1}" [(set_attr "type" "sseicvt") (set_attr "prefix" "evex") (set_attr "mode" "DI")]) ;; Unsigned conversion to SImode. (define_expand "fixuns_truncsi2" [(parallel [(set (match_operand:SI 0 "register_operand") (unsigned_fix:SI (match_operand:MODEF 1 "nonimmediate_operand"))) (use (match_dup 2)) (clobber (scratch:)) (clobber (scratch:))])] "(!TARGET_64BIT || TARGET_AVX512F) && TARGET_SSE2 && TARGET_SSE_MATH" { machine_mode mode = mode; machine_mode vecmode = mode; REAL_VALUE_TYPE TWO31r; rtx two31; if (TARGET_AVX512F) { emit_insn (gen_fixuns_truncsi2_avx512f (operands[0], operands[1])); DONE; } if (optimize_insn_for_size_p ()) FAIL; real_ldexp (&TWO31r, &dconst1, 31); two31 = const_double_from_real_value (TWO31r, mode); two31 = ix86_build_const_vector (vecmode, true, two31); operands[2] = force_reg (vecmode, two31); }) (define_insn "fixuns_truncsi2_avx512f" [(set (match_operand:SI 0 "register_operand" "=r") (unsigned_fix:SI (match_operand:MODEF 1 "nonimmediate_operand" "vm")))] "TARGET_AVX512F && TARGET_SSE_MATH" "vcvtt2usi\t{%1, %0|%0, %1}" [(set_attr "type" "sseicvt") (set_attr "prefix" "evex") (set_attr "mode" "SI")]) (define_insn "*fixuns_trunchfsi2zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (unsigned_fix:SI (match_operand:HF 1 "nonimmediate_operand" "vm"))))] "TARGET_64BIT && TARGET_AVX512FP16" "vcvttsh2usi\t{%1, %k0|%k0, %1}" [(set_attr "type" "sseicvt") (set_attr "prefix" "evex") (set_attr "mode" "SI")]) (define_insn "*fixuns_truncsi2_avx512f_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (unsigned_fix:SI (match_operand:MODEF 1 "nonimmediate_operand" "vm"))))] "TARGET_64BIT && TARGET_AVX512F && TARGET_SSE_MATH" "vcvtt2usi\t{%1, %k0|%k0, %1}" [(set_attr "type" "sseicvt") (set_attr "prefix" "evex") (set_attr "mode" "SI")]) (define_insn_and_split "*fixuns_trunc_1" [(set (match_operand:SI 0 "register_operand" "=&x,&x") (unsigned_fix:SI (match_operand:MODEF 3 "nonimmediate_operand" "xm,xm"))) (use (match_operand: 4 "nonimmediate_operand" "m,x")) (clobber (match_scratch: 1 "=x,&x")) (clobber (match_scratch: 2 "=x,x"))] "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH && optimize_function_for_speed_p (cfun)" "#" "&& reload_completed" [(const_int 0)] { ix86_split_convert_uns_si_sse (operands); DONE; }) ;; Unsigned conversion to HImode. ;; Without these patterns, we'll try the unsigned SI conversion which ;; is complex for SSE, rather than the signed SI conversion, which isn't. (define_expand "fixuns_trunchfhi2" [(set (match_dup 2) (fix:SI (match_operand:HF 1 "nonimmediate_operand"))) (set (match_operand:HI 0 "nonimmediate_operand") (subreg:HI (match_dup 2) 0))] "TARGET_AVX512FP16" "operands[2] = gen_reg_rtx (SImode);") (define_expand "fixuns_trunchi2" [(set (match_dup 2) (fix:SI (match_operand:MODEF 1 "nonimmediate_operand"))) (set (match_operand:HI 0 "nonimmediate_operand") (subreg:HI (match_dup 2) 0))] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" "operands[2] = gen_reg_rtx (SImode);") ;; When SSE is available, it is always faster to use it! (define_insn "fix_trunc_sse" [(set (match_operand:SWI48 0 "register_operand" "=r,r") (fix:SWI48 (match_operand:MODEF 1 "nonimmediate_operand" "v,m")))] "SSE_FLOAT_MODE_P (mode) && (!TARGET_FISTTP || TARGET_SSE_MATH)" "%vcvtt2si\t{%1, %0|%0, %1}" [(set_attr "type" "sseicvt") (set_attr "prefix" "maybe_vex") (set (attr "prefix_rex") (if_then_else (match_test "mode == DImode") (const_string "1") (const_string "*"))) (set_attr "mode" "") (set_attr "athlon_decode" "double,vector") (set_attr "amdfam10_decode" "double,double") (set_attr "bdver1_decode" "double,double")]) ;; Avoid vector decoded forms of the instruction. (define_peephole2 [(match_scratch:MODEF 2 "x") (set (match_operand:SWI48 0 "register_operand") (fix:SWI48 (match_operand:MODEF 1 "memory_operand")))] "TARGET_AVOID_VECTOR_DECODE && SSE_FLOAT_MODE_P (mode) && optimize_insn_for_speed_p ()" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (fix:SWI48 (match_dup 2)))]) (define_insn "fix_trunc_i387_fisttp" [(set (match_operand:SWI248x 0 "nonimmediate_operand" "=m") (fix:SWI248x (match_operand 1 "register_operand" "f"))) (clobber (match_scratch:XF 2 "=&f"))] "X87_FLOAT_MODE_P (GET_MODE (operands[1])) && TARGET_FISTTP && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) && (TARGET_64BIT || mode != DImode)) && TARGET_SSE_MATH)" "* return output_fix_trunc (insn, operands, true);" [(set_attr "type" "fisttp") (set_attr "mode" "")]) ;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description ;; of the machinery. Please note the clobber of FLAGS_REG. In i387 control ;; word calculation (inserted by LCM in mode switching pass) a FLAGS_REG ;; clobbering insns can be used. Look at emit_i387_cw_initialization () ;; function in i386.cc. (define_insn_and_split "*fix_trunc_i387_1" [(set (match_operand:SWI248x 0 "nonimmediate_operand") (fix:SWI248x (match_operand 1 "register_operand"))) (clobber (reg:CC FLAGS_REG))] "X87_FLOAT_MODE_P (GET_MODE (operands[1])) && !TARGET_FISTTP && !(SSE_FLOAT_MODE_P (GET_MODE (operands[1])) && (TARGET_64BIT || mode != DImode)) && ix86_pre_reload_split ()" "#" "&& 1" [(const_int 0)] { ix86_optimize_mode_switching[I387_TRUNC] = 1; operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC); emit_insn (gen_fix_trunc_i387 (operands[0], operands[1], operands[2], operands[3])); DONE; } [(set_attr "type" "fistp") (set_attr "i387_cw" "trunc") (set_attr "mode" "")]) (define_insn "fix_truncdi_i387" [(set (match_operand:DI 0 "nonimmediate_operand" "=m") (fix:DI (match_operand 1 "register_operand" "f"))) (use (match_operand:HI 2 "memory_operand" "m")) (use (match_operand:HI 3 "memory_operand" "m")) (clobber (match_scratch:XF 4 "=&f"))] "X87_FLOAT_MODE_P (GET_MODE (operands[1])) && !TARGET_FISTTP && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))" "* return output_fix_trunc (insn, operands, false);" [(set_attr "type" "fistp") (set_attr "i387_cw" "trunc") (set_attr "mode" "DI")]) (define_insn "fix_trunc_i387" [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m") (fix:SWI24 (match_operand 1 "register_operand" "f"))) (use (match_operand:HI 2 "memory_operand" "m")) (use (match_operand:HI 3 "memory_operand" "m"))] "X87_FLOAT_MODE_P (GET_MODE (operands[1])) && !TARGET_FISTTP && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" "* return output_fix_trunc (insn, operands, false);" [(set_attr "type" "fistp") (set_attr "i387_cw" "trunc") (set_attr "mode" "")]) (define_insn "x86_fnstcw_1" [(set (match_operand:HI 0 "memory_operand" "=m") (unspec:HI [(const_int 0)] UNSPEC_FSTCW))] "TARGET_80387" "fnstcw\t%0" [(set (attr "length") (symbol_ref "ix86_attr_length_address_default (insn) + 2")) (set_attr "mode" "HI") (set_attr "unit" "i387") (set_attr "bdver1_decode" "vector")]) ;; Conversion between fixed point and floating point. ;; Even though we only accept memory inputs, the backend _really_ ;; wants to be able to do this between registers. Thankfully, LRA ;; will fix this up for us during register allocation. (define_insn "floathi2" [(set (match_operand:X87MODEF 0 "register_operand" "=f") (float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "m")))] "TARGET_80387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)" "fild%Z1\t%1" [(set_attr "type" "fmov") (set_attr "mode" "") (set_attr "znver1_decode" "double") (set_attr "fp_int_src" "true")]) (define_insn "floatxf2" [(set (match_operand:XF 0 "register_operand" "=f") (float:XF (match_operand:SWI48x 1 "nonimmediate_operand" "m")))] "TARGET_80387" "fild%Z1\t%1" [(set_attr "type" "fmov") (set_attr "mode" "XF") (set_attr "znver1_decode" "double") (set_attr "fp_int_src" "true")]) (define_expand "float2" [(set (match_operand:MODEF 0 "register_operand") (float:MODEF (match_operand:SWI48x 1 "nonimmediate_operand")))] "(TARGET_80387 && X87_ENABLE_FLOAT (mode, mode)) || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && ((mode != DImode) || TARGET_64BIT))") (define_insn "*float2" [(set (match_operand:MODEF 0 "register_operand" "=f,v,v") (float:MODEF (match_operand:SWI48 1 "nonimmediate_operand" "m,r,m")))] "(TARGET_80387 && X87_ENABLE_FLOAT (mode, mode)) || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" "@ fild%Z1\t%1 %vcvtsi2\t{%1, %d0|%d0, %1} %vcvtsi2\t{%1, %d0|%d0, %1}" [(set_attr "type" "fmov,sseicvt,sseicvt") (set_attr "avx_partial_xmm_update" "false,true,true") (set_attr "prefix" "orig,maybe_vex,maybe_vex") (set_attr "mode" "") (set (attr "prefix_rex") (if_then_else (and (eq_attr "prefix" "maybe_vex") (match_test "mode == DImode")) (const_string "1") (const_string "*"))) (set_attr "unit" "i387,*,*") (set_attr "athlon_decode" "*,double,direct") (set_attr "amdfam10_decode" "*,vector,double") (set_attr "bdver1_decode" "*,double,direct") (set_attr "znver1_decode" "double,*,*") (set_attr "fp_int_src" "true") (set (attr "enabled") (if_then_else (match_test ("SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH")) (if_then_else (eq_attr "alternative" "0") (symbol_ref "TARGET_MIX_SSE_I387 && X87_ENABLE_FLOAT (mode, mode)") (symbol_ref "true")) (if_then_else (eq_attr "alternative" "0") (symbol_ref "true") (symbol_ref "false")))) (set (attr "preferred_for_speed") (cond [(eq_attr "alternative" "1") (symbol_ref "TARGET_INTER_UNIT_CONVERSIONS")] (symbol_ref "true")))]) (define_insn "floathf2" [(set (match_operand:HF 0 "register_operand" "=v") (any_float:HF (match_operand:SWI48 1 "nonimmediate_operand" "rm")))] "TARGET_AVX512FP16" "vcvtsi2sh\t{%1, %d0|%d0, %1}" [(set_attr "type" "sseicvt") (set_attr "prefix" "evex") (set_attr "mode" "HF")]) (define_insn "*floatdi2_i387" [(set (match_operand:MODEF 0 "register_operand" "=f") (float:MODEF (match_operand:DI 1 "nonimmediate_operand" "m")))] "!TARGET_64BIT && TARGET_80387 && X87_ENABLE_FLOAT (mode, DImode)" "fild%Z1\t%1" [(set_attr "type" "fmov") (set_attr "mode" "") (set_attr "znver1_decode" "double") (set_attr "fp_int_src" "true")]) ;; Try TARGET_USE_VECTOR_CONVERTS, but not so hard as to require extra memory ;; slots when !TARGET_INTER_UNIT_MOVES_TO_VEC disables the general_regs ;; alternative in sse2_loadld. (define_split [(set (match_operand:MODEF 0 "sse_reg_operand") (float:MODEF (match_operand:SI 1 "nonimmediate_operand")))] "TARGET_SSE2 && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun) && reload_completed && (MEM_P (operands[1]) || TARGET_INTER_UNIT_MOVES_TO_VEC) && (!EXT_REX_SSE_REG_P (operands[0]) || TARGET_AVX512VL)" [(const_int 0)] { operands[3] = lowpart_subreg (mode, operands[0], mode); operands[4] = lowpart_subreg (V4SImode, operands[0], mode); emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode), operands[1])); if (mode == V4SFmode) emit_insn (gen_floatv4siv4sf2 (operands[3], operands[4])); else emit_insn (gen_sse2_cvtdq2pd (operands[3], operands[4])); DONE; }) ;; Avoid store forwarding (partial memory) stall penalty ;; by passing DImode value through XMM registers. */ (define_split [(set (match_operand:X87MODEF 0 "register_operand") (float:X87MODEF (match_operand:DI 1 "register_operand")))] "!TARGET_64BIT && TARGET_INTER_UNIT_MOVES_TO_VEC && TARGET_80387 && X87_ENABLE_FLOAT (mode, DImode) && TARGET_SSE2 && optimize_function_for_speed_p (cfun) && can_create_pseudo_p ()" [(const_int 0)] { rtx s = assign_386_stack_local (DImode, SLOT_FLOATxFDI_387); emit_insn (gen_floatdi2_i387_with_xmm (operands[0], operands[1], s)); DONE; }) (define_insn_and_split "floatdi2_i387_with_xmm" [(set (match_operand:X87MODEF 0 "register_operand" "=f,f") (float:X87MODEF (match_operand:DI 1 "register_operand" "r,r"))) (clobber (match_operand:DI 2 "memory_operand" "=m,m")) (clobber (match_scratch:V4SI 3 "=x,x")) (clobber (match_scratch:V4SI 4 "=X,x"))] "!TARGET_64BIT && TARGET_INTER_UNIT_MOVES_TO_VEC && TARGET_80387 && X87_ENABLE_FLOAT (mode, DImode) && TARGET_SSE2 && optimize_function_for_speed_p (cfun)" "#" "&& reload_completed" [(set (match_dup 2) (match_dup 3)) (set (match_dup 0) (float:X87MODEF (match_dup 2)))] { /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax). Assemble the 64-bit DImode value in an xmm register. */ emit_insn (gen_sse2_loadld (operands[3], CONST0_RTX (V4SImode), gen_lowpart (SImode, operands[1]))); if (TARGET_SSE4_1) emit_insn (gen_sse4_1_pinsrd (operands[3], operands[3], gen_highpart (SImode, operands[1]), GEN_INT (2))); else { emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode), gen_highpart (SImode, operands[1]))); emit_insn (gen_vec_interleave_lowv4si (operands[3], operands[3], operands[4])); } operands[3] = gen_lowpart (DImode, operands[3]); } [(set_attr "isa" "sse4,*") (set_attr "type" "multi") (set_attr "mode" "") (set_attr "unit" "i387") (set_attr "fp_int_src" "true")]) ;; Break partial SSE register dependency stall. This splitter should split ;; late in the pass sequence (after register rename pass), so allocated ;; registers won't change anymore (define_split [(set (match_operand:MODEF 0 "sse_reg_operand") (float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))] "!TARGET_AVX && TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY && epilogue_completed && optimize_function_for_speed_p (cfun) && (!EXT_REX_SSE_REG_P (operands[0]) || TARGET_AVX512VL)" [(set (match_dup 0) (vec_merge: (vec_duplicate: (float:MODEF (match_dup 1))) (match_dup 0) (const_int 1)))] { const machine_mode vmode = mode; operands[0] = lowpart_subreg (vmode, operands[0], mode); emit_move_insn (operands[0], CONST0_RTX (vmode)); }) (define_expand "floatuns2" [(set (match_operand:MODEF 0 "register_operand") (unsigned_float:MODEF (match_operand:SWI12 1 "nonimmediate_operand")))] "!TARGET_64BIT && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" { operands[1] = convert_to_mode (SImode, operands[1], 1); emit_insn (gen_floatsi2 (operands[0], operands[1])); DONE; }) (define_insn "*floatuns2_avx512" [(set (match_operand:MODEF 0 "register_operand" "=v") (unsigned_float:MODEF (match_operand:SWI48 1 "nonimmediate_operand" "rm")))] "TARGET_AVX512F && TARGET_SSE_MATH" "vcvtusi2\t{%1, %0, %0|%0, %0, %1}" [(set_attr "type" "sseicvt") (set_attr "avx_partial_xmm_update" "true") (set_attr "prefix" "evex") (set_attr "mode" "")]) ;; Avoid store forwarding (partial memory) stall penalty by extending ;; SImode value to DImode through XMM register instead of pushing two ;; SImode values to stack. Also note that fild loads from memory only. (define_insn_and_split "floatunssi2_i387_with_xmm" [(set (match_operand:X87MODEF 0 "register_operand" "=f") (unsigned_float:X87MODEF (match_operand:SI 1 "nonimmediate_operand" "rm"))) (clobber (match_operand:DI 2 "memory_operand" "=m")) (clobber (match_scratch:DI 3 "=x"))] "!TARGET_64BIT && TARGET_80387 && X87_ENABLE_FLOAT (mode, DImode) && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC" "#" "&& reload_completed" [(set (match_dup 3) (zero_extend:DI (match_dup 1))) (set (match_dup 2) (match_dup 3)) (set (match_dup 0) (float:X87MODEF (match_dup 2)))] "" [(set_attr "type" "multi") (set_attr "mode" "")]) (define_expand "floatunssi2" [(set (match_operand:X87MODEF 0 "register_operand") (unsigned_float:X87MODEF (match_operand:SI 1 "nonimmediate_operand")))] "(!TARGET_64BIT && TARGET_80387 && X87_ENABLE_FLOAT (mode, DImode) && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC) || ((!TARGET_64BIT || TARGET_AVX512F) && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" { if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) { emit_insn (gen_floatunssi2_i387_with_xmm (operands[0], operands[1], assign_386_stack_local (DImode, SLOT_TEMP))); DONE; } if (!TARGET_AVX512F) { ix86_expand_convert_uns_si_sse (operands[0], operands[1]); DONE; } }) (define_expand "floatunsdisf2" [(set (match_operand:SF 0 "register_operand") (unsigned_float:SF (match_operand:DI 1 "nonimmediate_operand")))] "TARGET_64BIT && TARGET_SSE && TARGET_SSE_MATH" { if (!TARGET_AVX512F) { x86_emit_floatuns (operands); DONE; } }) (define_expand "floatunsdidf2" [(set (match_operand:DF 0 "register_operand") (unsigned_float:DF (match_operand:DI 1 "nonimmediate_operand")))] "((TARGET_64BIT && TARGET_AVX512F) || TARGET_KEEPS_VECTOR_ALIGNED_STACK) && TARGET_SSE2 && TARGET_SSE_MATH" { if (!TARGET_64BIT) { ix86_expand_convert_uns_didf_sse (operands[0], operands[1]); DONE; } if (!TARGET_AVX512F) { x86_emit_floatuns (operands); DONE; } }) ;; Load effective address instructions (define_insn "*lea" [(set (match_operand:SWI48 0 "register_operand" "=r") (match_operand:SWI48 1 "address_no_seg_operand" "Ts"))] "ix86_hardreg_mov_ok (operands[0], operands[1])" { if (SImode_address_operand (operands[1], VOIDmode)) { gcc_assert (TARGET_64BIT); return "lea{l}\t{%E1, %k0|%k0, %E1}"; } else return "lea{}\t{%E1, %0|%0, %E1}"; } [(set_attr "type" "lea") (set (attr "mode") (if_then_else (match_operand 1 "SImode_address_operand") (const_string "SI") (const_string "")))]) (define_peephole2 [(set (match_operand:SWI48 0 "register_operand") (match_operand:SWI48 1 "address_no_seg_operand"))] "ix86_hardreg_mov_ok (operands[0], operands[1]) && peep2_regno_dead_p (0, FLAGS_REG) && ix86_avoid_lea_for_addr (peep2_next_insn (0), operands)" [(const_int 0)] { machine_mode mode = mode; /* Emit all operations in SImode for zero-extended addresses. */ if (SImode_address_operand (operands[1], VOIDmode)) mode = SImode; ix86_split_lea_for_addr (peep2_next_insn (0), operands, mode); /* Zero-extend return register to DImode for zero-extended addresses. */ if (mode != mode) emit_insn (gen_zero_extendsidi2 (operands[0], gen_lowpart (mode, operands[0]))); DONE; }) ;; ix86_split_lea_for_addr emits the shifts as MULT to avoid it from being ;; peephole2 optimized back into a lea. Split that into the shift during ;; the following split pass. (define_split [(set (match_operand:SWI48 0 "general_reg_operand") (mult:SWI48 (match_dup 0) (match_operand:SWI48 1 "const1248_operand"))) (clobber (reg:CC FLAGS_REG))] "reload_completed" [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1))) (clobber (reg:CC FLAGS_REG))])] "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));") ;; Add instructions (define_expand "add3" [(set (match_operand:SDWIM 0 "nonimmediate_operand") (plus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand") (match_operand:SDWIM 2 "")))] "" { ix86_expand_binary_operator (PLUS, mode, operands, TARGET_APX_NDD); DONE; }) (define_insn_and_split "*add3_doubleword" [(set (match_operand: 0 "nonimmediate_operand" "=ro,r,&r,&r,&r,&r,&r") (plus: (match_operand: 1 "nonimmediate_operand" "%0,0,ro,r,ro,jO,r") (match_operand: 2 "x86_64_hilo_general_operand" "r,o,r,,K,,r"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD)" "#" "&& reload_completed" [(parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:DWIH (match_dup 1) (match_dup 2)) (match_dup 1))) (set (match_dup 0) (plus:DWIH (match_dup 1) (match_dup 2)))]) (parallel [(set (match_dup 3) (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) (match_dup 4)) (match_dup 5))) (clobber (reg:CC FLAGS_REG))])] { split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]); if (operands[2] == const0_rtx) { /* Under NDD op0 and op1 may not equal, do not delete insn then. */ bool emit_insn_deleted_note_p = true; if (!rtx_equal_p (operands[0], operands[1])) { emit_move_insn (operands[0], operands[1]); emit_insn_deleted_note_p = false; } if (operands[5] != const0_rtx) ix86_expand_binary_operator (PLUS, mode, &operands[3], TARGET_APX_NDD); else if (!rtx_equal_p (operands[3], operands[4])) emit_move_insn (operands[3], operands[4]); else if (emit_insn_deleted_note_p) emit_note (NOTE_INSN_DELETED); DONE; } } [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd_64,apx_ndd")]) (define_insn_and_split "*add3_doubleword_zext" [(set (match_operand: 0 "nonimmediate_operand" "=r,o,&r,&r") (plus: (zero_extend: (match_operand:DWIH 2 "nonimmediate_operand" "rm,r,rm,r")) (match_operand: 1 "nonimmediate_operand" "0,0,r,m"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (UNKNOWN, mode, operands, TARGET_APX_NDD)" "#" "&& reload_completed" [(parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:DWIH (match_dup 1) (match_dup 2)) (match_dup 1))) (set (match_dup 0) (plus:DWIH (match_dup 1) (match_dup 2)))]) (parallel [(set (match_dup 3) (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) (match_dup 4)) (const_int 0))) (clobber (reg:CC FLAGS_REG))])] "split_double_mode (mode, &operands[0], 2, &operands[0], &operands[3]);" [(set_attr "isa" "*,*,apx_ndd,apx_ndd")]) (define_insn_and_split "*add3_doubleword_concat" [(set (match_operand: 0 "register_operand" "=&r") (plus: (any_or_plus: (ashift: (zero_extend: (match_operand:DWIH 2 "nonimmediate_operand" "rm")) (match_operand:QI 3 "const_int_operand")) (zero_extend: (match_operand:DWIH 4 "nonimmediate_operand" "rm"))) (match_operand: 1 "register_operand" "0"))) (clobber (reg:CC FLAGS_REG))] "INTVAL (operands[3]) == * BITS_PER_UNIT" "#" "&& reload_completed" [(parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:DWIH (match_dup 1) (match_dup 4)) (match_dup 1))) (set (match_dup 0) (plus:DWIH (match_dup 1) (match_dup 4)))]) (parallel [(set (match_dup 5) (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) (match_dup 6)) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] "split_double_mode (mode, &operands[0], 2, &operands[0], &operands[5]);") (define_insn_and_split "*add3_doubleword_concat_zext" [(set (match_operand: 0 "register_operand" "=&r") (plus: (any_or_plus: (ashift: (zero_extend: (match_operand:DWIH 2 "nonimmediate_operand" "rm")) (match_operand:QI 3 "const_int_operand")) (zero_extend: (match_operand:DWIH 4 "nonimmediate_operand" "rm"))) (zero_extend: (match_operand:DWIH 1 "nonimmediate_operand" "rm")))) (clobber (reg:CC FLAGS_REG))] "INTVAL (operands[3]) == * BITS_PER_UNIT" "#" "&& reload_completed" [(set (match_dup 0) (match_dup 4)) (parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:DWIH (match_dup 0) (match_dup 1)) (match_dup 0))) (set (match_dup 0) (plus:DWIH (match_dup 0) (match_dup 1)))]) (set (match_dup 5) (match_dup 2)) (parallel [(set (match_dup 5) (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) (match_dup 5)) (const_int 0))) (clobber (reg:CC FLAGS_REG))])] "split_double_mode (mode, &operands[0], 1, &operands[0], &operands[5]);") (define_insn "*add_1" [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r,r,r,r") (plus:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r,rje,jM,r") (match_operand:SWI48 2 "x86_64_general_operand" "re,BM,0,le,r,e,BM"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD)" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_LEA: return "#"; case TYPE_INCDEC: if (operands[2] == const1_rtx) return use_ndd ? "inc{}\t{%1, %0|%0, %1}" : "inc{}\t%0"; else { gcc_assert (operands[2] == constm1_rtx); return use_ndd ? "dec{}\t{%1, %0|%0, %1}" : "dec{}\t%0"; } default: /* For most processors, ADD is faster than LEA. This alternative was added to use ADD as much as possible. */ if (which_alternative == 2) std::swap (operands[1], operands[2]); if (x86_maybe_negate_const_int (&operands[2], mode)) return use_ndd ? "sub{}\t{%2, %1, %0|%0, %1, %2}" : "sub{}\t{%2, %0|%0, %2}"; return use_ndd ? "add{}\t{%2, %1, %0|%0, %1, %2}" : "add{}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd,apx_ndd") (set (attr "type") (cond [(eq_attr "alternative" "3") (const_string "lea") (match_operand:SWI48 2 "incdec_operand") (const_string "incdec") ] (const_string "alu"))) (set (attr "length_immediate") (if_then_else (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) (const_string "1") (const_string "*"))) (set_attr "mode" "")]) ;; It may seem that nonimmediate operand is proper one for operand 1. ;; The addsi_1 pattern allows nonimmediate operand at that place and ;; we take care in ix86_binary_operator_ok to not allow two memory ;; operands so proper swapping will be done in reload. This allow ;; patterns constructed from addsi_1 to match. (define_insn "addsi_1_zext" [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r,r") (zero_extend:DI (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,r,r,rm,rjM") (match_operand:SI 2 "x86_64_general_operand" "rBMe,0,le,rBMe,r,e")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands, TARGET_APX_NDD)" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_LEA: return "#"; case TYPE_INCDEC: if (operands[2] == const1_rtx) return use_ndd ? "inc{l}\t{%1, %k0|%k0, %1}" : "inc{l}\t%k0"; else { gcc_assert (operands[2] == constm1_rtx); return use_ndd ? "dec{l}\t{%1, %k0|%k0, %1}" : "dec{l}\t%k0"; } default: /* For most processors, ADD is faster than LEA. This alternative was added to use ADD as much as possible. */ if (which_alternative == 1) std::swap (operands[1], operands[2]); if (x86_maybe_negate_const_int (&operands[2], SImode)) return use_ndd ? "sub{l}\t{%2 ,%1, %k0|%k0, %1, %2}" : "sub{l}\t{%2, %k0|%k0, %2}"; return use_ndd ? "add{l}\t{%2 ,%1, %k0|%k0, %1, %2}" : "add{l}\t{%2, %k0|%k0, %2}"; } } [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd") (set (attr "type") (cond [(eq_attr "alternative" "2") (const_string "lea") (match_operand:SI 2 "incdec_operand") (const_string "incdec") ] (const_string "alu"))) (set (attr "length_immediate") (if_then_else (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) (const_string "1") (const_string "*"))) (set_attr "mode" "SI")]) (define_insn "*addhi_1" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r,Yp,r,r") (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,Yp,rm,r") (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (PLUS, HImode, operands, TARGET_APX_NDD)" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_LEA: return "#"; case TYPE_INCDEC: if (operands[2] == const1_rtx) return use_ndd ? "inc{w}\t{%1, %0|%0, %1}" : "inc{w}\t%0"; else { gcc_assert (operands[2] == constm1_rtx); return use_ndd ? "dec{w}\t{%1, %0|%0, %1}" : "dec{w}\t%0"; } default: /* For most processors, ADD is faster than LEA. This alternative was added to use ADD as much as possible. */ if (which_alternative == 2) std::swap (operands[1], operands[2]); if (x86_maybe_negate_const_int (&operands[2], HImode)) return use_ndd ? "sub{w}\t{%2, %1, %0|%0, %1, %2}" : "sub{w}\t{%2, %0|%0, %2}"; return use_ndd ? "add{w}\t{%2, %1, %0|%0, %1, %2}" : "add{w}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd") (set (attr "type") (cond [(eq_attr "alternative" "3") (const_string "lea") (match_operand:HI 2 "incdec_operand") (const_string "incdec") ] (const_string "alu"))) (set (attr "length_immediate") (if_then_else (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) (const_string "1") (const_string "*"))) (set_attr "mode" "HI,HI,HI,SI,HI,HI")]) (define_insn "*addqi_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp,r,r") (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp,rm,r") (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln,rn,m"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD)" { bool widen = (get_attr_mode (insn) != MODE_QI); bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_LEA: return "#"; case TYPE_INCDEC: if (operands[2] == const1_rtx) if (use_ndd) return "inc{b}\t{%1, %0|%0, %1}"; else return widen ? "inc{l}\t%k0" : "inc{b}\t%0"; else { gcc_assert (operands[2] == constm1_rtx); if (use_ndd) return "dec{b}\t{%1, %0|%0, %1}"; else return widen ? "dec{l}\t%k0" : "dec{b}\t%0"; } default: /* For most processors, ADD is faster than LEA. These alternatives were added to use ADD as much as possible. */ if (which_alternative == 2 || which_alternative == 4) std::swap (operands[1], operands[2]); if (x86_maybe_negate_const_int (&operands[2], QImode)) { if (use_ndd) return "sub{b}\t{%2, %1, %0|%0, %1, %2}"; else return widen ? "sub{l}\t{%2, %k0|%k0, %2}" : "sub{b}\t{%2, %0|%0, %2}"; } if (use_ndd) return "add{b}\t{%2, %1, %0|%0, %1, %2}"; else return widen ? "add{l}\t{%k2, %k0|%k0, %k2}" : "add{b}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,*,*,*,*,*,apx_ndd,apx_ndd") (set (attr "type") (cond [(eq_attr "alternative" "5") (const_string "lea") (match_operand:QI 2 "incdec_operand") (const_string "incdec") ] (const_string "alu"))) (set (attr "length_immediate") (if_then_else (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) (const_string "1") (const_string "*"))) (set_attr "mode" "QI,QI,QI,SI,SI,SI,QI,QI") ;; Potential partial reg stall on alternatives 3 and 4. (set (attr "preferred_for_speed") (cond [(eq_attr "alternative" "3,4") (symbol_ref "!TARGET_PARTIAL_REG_STALL")] (symbol_ref "true")))]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. (define_insn_and_split "*add_1_slp" [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+,&")) (plus:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!") (match_operand:SWI12 2 "general_operand" "mn,mn"))) (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" { if (which_alternative) return "#"; switch (get_attr_type (insn)) { case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{}\t%0"; else { gcc_assert (operands[2] == constm1_rtx); return "dec{}\t%0"; } default: if (x86_maybe_negate_const_int (&operands[2], QImode)) return "sub{}\t{%2, %0|%0, %2}"; return "add{}\t{%2, %0|%0, %2}"; } } "&& reload_completed && !(rtx_equal_p (operands[0], operands[1]) || rtx_equal_p (operands[0], operands[2]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) (plus:SWI12 (match_dup 0) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] "" [(set (attr "type") (if_then_else (match_operand:QI 2 "incdec_operand") (const_string "incdec") (const_string "alu"))) (set_attr "mode" "")]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. (define_insn_and_split "*addqi_ext_1_slp" [(set (strict_low_part (match_operand:QI 0 "register_operand" "+Q,&Q")) (plus:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" [(match_operand 2 "int248_register_operand" "Q,Q") (const_int 8) (const_int 8)]) 0) (match_operand:QI 1 "nonimmediate_operand" "0,!qm"))) (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "@ add{b}\t{%h2, %0|%0, %h2} #" "&& reload_completed && !rtx_equal_p (operands[0], operands[1])" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) (plus:QI (subreg:QI (match_op_dup 3 [(match_dup 2) (const_int 8) (const_int 8)]) 0) (match_dup 0))) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "alu") (set_attr "mode" "QI")]) (define_insn_and_split "*addqi_ext_2_slp" [(set (strict_low_part (match_operand:QI 0 "register_operand" "+&Q")) (plus:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" [(match_operand 1 "int248_register_operand" "Q") (const_int 8) (const_int 8)]) 0) (subreg:QI (match_operator:SWI248 4 "extract_operator" [(match_operand 2 "int248_register_operand" "Q") (const_int 8) (const_int 8)]) 0))) (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "#" "&& reload_completed" [(set (strict_low_part (match_dup 0)) (subreg:QI (match_op_dup 4 [(match_dup 2) (const_int 8) (const_int 8)]) 0)) (parallel [(set (strict_low_part (match_dup 0)) (plus:QI (subreg:QI (match_op_dup 3 [(match_dup 1) (const_int 8) (const_int 8)]) 0) (match_dup 0))) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "alu") (set_attr "mode" "QI")]) ;; Split non destructive adds if we cannot use lea. (define_split [(set (match_operand:SWI48 0 "register_operand") (plus:SWI48 (match_operand:SWI48 1 "register_operand") (match_operand:SWI48 2 "x86_64_nonmemory_operand"))) (clobber (reg:CC FLAGS_REG))] "reload_completed && ix86_avoid_lea_for_add (insn, operands)" [(set (match_dup 0) (match_dup 1)) (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])]) ;; Split non destructive adds if we cannot use lea. (define_split [(set (match_operand:DI 0 "register_operand") (zero_extend:DI (plus:SI (match_operand:SI 1 "register_operand") (match_operand:SI 2 "x86_64_nonmemory_operand")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && reload_completed && ix86_avoid_lea_for_add (insn, operands)" [(set (match_dup 3) (match_dup 1)) (parallel [(set (match_dup 0) (zero_extend:DI (plus:SI (match_dup 3) (match_dup 2)))) (clobber (reg:CC FLAGS_REG))])] "operands[3] = gen_lowpart (SImode, operands[0]);") ;; Convert add to the lea pattern to avoid flags dependency. (define_split [(set (match_operand:SWI 0 "register_operand") (plus:SWI (match_operand:SWI 1 "register_operand") (match_operand:SWI 2 ""))) (clobber (reg:CC FLAGS_REG))] "reload_completed && ix86_lea_for_add_ok (insn, operands)" [(set (match_dup 0) (plus: (match_dup 1) (match_dup 2)))] { if (mode != mode) { operands[0] = gen_lowpart (mode, operands[0]); operands[1] = gen_lowpart (mode, operands[1]); operands[2] = gen_lowpart (mode, operands[2]); } }) ;; Convert add to the lea pattern to avoid flags dependency. (define_split [(set (match_operand:DI 0 "register_operand") (zero_extend:DI (plus:SI (match_operand:SI 1 "register_operand") (match_operand:SI 2 "x86_64_nonmemory_operand")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && reload_completed && ix86_lea_for_add_ok (insn, operands)" [(set (match_dup 0) (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]) (define_insn "*add_2" [(set (reg FLAGS_REG) (compare (plus:SWI (match_operand:SWI 1 "nonimmediate_operand" "%0,0,,rm,r") (match_operand:SWI 2 "" ",,0,r,")) (const_int 0))) (set (match_operand:SWI 0 "nonimmediate_operand" "=m,,,r,r") (plus:SWI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD)" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_INCDEC: if (operands[2] == const1_rtx) return use_ndd ? "inc{}\t{%1, %0|%0, %1}" : "inc{}\t%0"; else { gcc_assert (operands[2] == constm1_rtx); return use_ndd ? "dec{}\t{%1, %0|%0, %1}" : "dec{}\t%0"; } default: if (which_alternative == 2) std::swap (operands[1], operands[2]); if (x86_maybe_negate_const_int (&operands[2], mode)) return use_ndd ? "sub{}\t{%2, %1, %0|%0, %1, %2}" : "sub{}\t{%2, %0|%0, %2}"; return use_ndd ? "add{}\t{%2, %1, %0|%0, %1, %2}" : "add{}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd") (set (attr "type") (if_then_else (match_operand:SWI 2 "incdec_operand") (const_string "incdec") (const_string "alu"))) (set (attr "length_immediate") (if_then_else (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) (const_string "1") (const_string "*"))) (set_attr "mode" "")]) ;; See comment for addsi_1_zext why we do use nonimmediate_operand (define_insn "*addsi_2_zext" [(set (reg FLAGS_REG) (compare (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,r,rm") (match_operand:SI 2 "x86_64_general_operand" "rBMe,0,rBMe,re")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r,r,r,r") (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (PLUS, SImode, operands, TARGET_APX_NDD)" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_INCDEC: if (operands[2] == const1_rtx) return use_ndd ? "inc{l}\t{%1, %k0|%k0, %1}" : "inc{l}\t%k0"; else { gcc_assert (operands[2] == constm1_rtx); return use_ndd ? "dec{l}\t{%1, %k0|%k0, %1}" : "dec{l}\t%k0"; } default: if (which_alternative == 1) std::swap (operands[1], operands[2]); if (x86_maybe_negate_const_int (&operands[2], SImode)) return use_ndd ? "sub{l}\t{%2, %1, %k0|%k0, %1, %2}" : "sub{l}\t{%2, %k0|%k0, %2}"; return use_ndd ? "add{l}\t{%2, %1, %k0|%k0, %1, %2}" : "add{l}\t{%2, %k0|%k0, %2}"; } } [(set_attr "isa" "*,*,apx_ndd,apx_ndd") (set (attr "type") (if_then_else (match_operand:SI 2 "incdec_operand") (const_string "incdec") (const_string "alu"))) (set (attr "length_immediate") (if_then_else (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) (const_string "1") (const_string "*"))) (set_attr "mode" "SI")]) (define_insn "*add_3" [(set (reg FLAGS_REG) (compare (neg:SWI (match_operand:SWI 2 "" ",0,,re")) (match_operand:SWI 1 "nonimmediate_operand" "%0,,r,rm"))) (clobber (match_scratch:SWI 0 "=,,r,r"))] "ix86_match_ccmode (insn, CCZmode) && !(MEM_P (operands[1]) && MEM_P (operands[2]))" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_INCDEC: if (operands[2] == const1_rtx) return use_ndd ? "inc{}\t{%1, %0|%0, %1}" : "inc{}\t%0"; else { gcc_assert (operands[2] == constm1_rtx); return use_ndd ? "dec{}\t{%1, %0|%0, %1}" : "dec{}\t%0"; } default: if (which_alternative == 1) std::swap (operands[1], operands[2]); if (x86_maybe_negate_const_int (&operands[2], mode)) return use_ndd ? "sub{}\t{%2, %1, %0|%0, %1, %2}" : "sub{}\t{%2, %0|%0, %2}"; return use_ndd ? "add{}\t{%2, %1, %0|%0, %1, %2}" : "add{}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,*,apx_ndd,apx_ndd") (set (attr "type") (if_then_else (match_operand:SWI 2 "incdec_operand") (const_string "incdec") (const_string "alu"))) (set (attr "length_immediate") (if_then_else (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) (const_string "1") (const_string "*"))) (set_attr "mode" "")]) ;; See comment for addsi_1_zext why we do use nonimmediate_operand (define_insn "*addsi_3_zext" [(set (reg FLAGS_REG) (compare (neg:SI (match_operand:SI 2 "x86_64_general_operand" "rBMe,0,rBMe,re")) (match_operand:SI 1 "nonimmediate_operand" "%0,r,r,rm"))) (set (match_operand:DI 0 "register_operand" "=r,r,r,r") (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] "TARGET_64BIT && ix86_match_ccmode (insn, CCZmode) && ix86_binary_operator_ok (PLUS, SImode, operands, TARGET_APX_NDD)" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_INCDEC: if (operands[2] == const1_rtx) return use_ndd ? "inc{l}\t{%1, %k0|%k0, %1}" : "inc{l}\t%k0"; else { gcc_assert (operands[2] == constm1_rtx); return use_ndd ? "dec{l}\t{%1, %k0|%k0, %1}" : "dec{l}\t%k0"; } default: if (which_alternative == 1) std::swap (operands[1], operands[2]); if (x86_maybe_negate_const_int (&operands[2], SImode)) return use_ndd ? "sub{l}\t{%2, %1, %k0|%k0, %1, %2}" : "sub{l}\t{%2, %k0|%k0, %2}"; return use_ndd ? "add{l}\t{%2, %1, %k0|%k0, %1, %2}" : "add{l}\t{%2, %k0|%k0, %2}"; } } [(set_attr "isa" "*,*,apx_ndd,apx_ndd") (set (attr "type") (if_then_else (match_operand:SI 2 "incdec_operand") (const_string "incdec") (const_string "alu"))) (set (attr "length_immediate") (if_then_else (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) (const_string "1") (const_string "*"))) (set_attr "mode" "SI")]) ; For comparisons against 1, -1 and 128, we may generate better code ; by converting cmp to add, inc or dec as done by peephole2. This pattern ; is matched then. We can't accept general immediate, because for ; case of overflows, the result is messed up. ; Also carry flag is reversed compared to cmp, so this conversion is valid ; only for comparisons not depending on it. (define_insn "*adddi_4" [(set (reg FLAGS_REG) (compare (match_operand:DI 1 "nonimmediate_operand" "0,rm") (match_operand:DI 2 "x86_64_immediate_operand" "e,e"))) (clobber (match_scratch:DI 0 "=r,r"))] "TARGET_64BIT && ix86_match_ccmode (insn, CCGCmode)" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_INCDEC: if (operands[2] == constm1_rtx) return use_ndd ? "inc{q}\t{%1, %0|%0, %1}" : "inc{q}\t%0"; else { gcc_assert (operands[2] == const1_rtx); return use_ndd ? "dec{q}\t{%1, %0|%0, %1}" : "dec{q}\t%0"; } default: if (x86_maybe_negate_const_int (&operands[2], DImode)) return use_ndd ? "add{q}\t{%2, %1, %0|%0, %1, %2}" : "add{q}\t{%2, %0|%0, %2}"; return use_ndd ? "sub{q}\t{%2, %1, %0|%0, %1, %2}" : "sub{q}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,apx_ndd") (set (attr "type") (if_then_else (match_operand:DI 2 "incdec_operand") (const_string "incdec") (const_string "alu"))) (set (attr "length_immediate") (if_then_else (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) (const_string "1") (const_string "*"))) (set_attr "mode" "DI")]) ; For comparisons against 1, -1 and 128, we may generate better code ; by converting cmp to add, inc or dec as done by peephole2. This pattern ; is matched then. We can't accept general immediate, because for ; case of overflows, the result is messed up. ; Also carry flag is reversed compared to cmp, so this conversion is valid ; only for comparisons not depending on it. (define_insn "*add_4" [(set (reg FLAGS_REG) (compare (match_operand:SWI124 1 "nonimmediate_operand" "0,rm") (match_operand:SWI124 2 "const_int_operand"))) (clobber (match_scratch:SWI124 0 "=,r"))] "ix86_match_ccmode (insn, CCGCmode)" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_INCDEC: if (operands[2] == constm1_rtx) return use_ndd ? "inc{}\t{%1, %0|%0, %1}" : "inc{}\t%0"; else { gcc_assert (operands[2] == const1_rtx); return use_ndd ? "dec{}\t{%1, %0|%0, %1}" : "dec{}\t%0"; } default: if (x86_maybe_negate_const_int (&operands[2], mode)) return use_ndd ? "add{}\t{%2, %1, %0|%0, %1, %2}" : "add{}\t{%2, %0|%0, %2}"; return use_ndd ? "sub{}\t{%2, %1, %0|%0, %1, %2}" : "sub{}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,apx_ndd") (set (attr "type") (if_then_else (match_operand: 2 "incdec_operand") (const_string "incdec") (const_string "alu"))) (set (attr "length_immediate") (if_then_else (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) (const_string "1") (const_string "*"))) (set_attr "mode" "")]) (define_insn "*add_5" [(set (reg FLAGS_REG) (compare (plus:SWI (match_operand:SWI 1 "nonimmediate_operand" "%0,,r,rm") (match_operand:SWI 2 "" ",0,,re")) (const_int 0))) (clobber (match_scratch:SWI 0 "=,,r,r"))] "ix86_match_ccmode (insn, CCGOCmode) && !(MEM_P (operands[1]) && MEM_P (operands[2]))" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_INCDEC: if (operands[2] == const1_rtx) return use_ndd ? "inc{}\t{%1, %0|%0, %1}" : "inc{}\t%0"; else { gcc_assert (operands[2] == constm1_rtx); return use_ndd ? "dec{}\t{%1, %0|%0, %1}" : "dec{}\t%0"; } default: if (which_alternative == 1) std::swap (operands[1], operands[2]); if (x86_maybe_negate_const_int (&operands[2], mode)) return use_ndd ? "sub{}\t{%2, %1, %0|%0, %1, %2}" : "sub{}\t{%2, %0|%0, %2}"; return use_ndd ? "add{}\t{%2, %1, %0|%0, %1, %2}" : "add{}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,*,apx_ndd,apx_ndd") (set (attr "type") (if_then_else (match_operand:SWI 2 "incdec_operand") (const_string "incdec") (const_string "alu"))) (set (attr "length_immediate") (if_then_else (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) (const_string "1") (const_string "*"))) (set_attr "mode" "")]) (define_insn "*addqi_ext_0" [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn") (plus:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" [(match_operand 2 "int248_register_operand" "Q") (const_int 8) (const_int 8)]) 0) (match_operand:QI 1 "nonimmediate_operand" "0"))) (clobber (reg:CC FLAGS_REG))] "" "add{b}\t{%h2, %0|%0, %h2}" [(set_attr "addr" "gpr8") (set_attr "type" "alu") (set_attr "mode" "QI")]) (define_insn_and_split "*addqi_ext2_0" [(set (match_operand:QI 0 "register_operand" "=&Q") (plus:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" [(match_operand 1 "int248_register_operand" "Q") (const_int 8) (const_int 8)]) 0) (subreg:QI (match_operator:SWI248 4 "extract_operator" [(match_operand 2 "int248_register_operand" "Q") (const_int 8) (const_int 8)]) 0))) (clobber (reg:CC FLAGS_REG))] "" "#" "&& reload_completed" [(set (match_dup 0) (subreg:QI (match_op_dup 4 [(match_dup 2) (const_int 8) (const_int 8)]) 0)) (parallel [(set (match_dup 0) (plus:QI (subreg:QI (match_op_dup 3 [(match_dup 1) (const_int 8) (const_int 8)]) 0) (match_dup 0))) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "alu") (set_attr "mode" "QI")]) (define_expand "addqi_ext_1" [(parallel [(set (zero_extract:HI (match_operand:HI 0 "register_operand") (const_int 8) (const_int 8)) (subreg:HI (plus:QI (subreg:QI (zero_extract:HI (match_operand:HI 1 "register_operand") (const_int 8) (const_int 8)) 0) (match_operand:QI 2 "const_int_operand")) 0)) (clobber (reg:CC FLAGS_REG))])]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. (define_insn_and_split "*addqi_ext_1" [(set (zero_extract:SWI248 (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 (plus:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0) (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0)) (clobber (reg:CC FLAGS_REG))] "" { if (which_alternative) return "#"; switch (get_attr_type (insn)) { case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{b}\t%h0"; else { gcc_assert (operands[2] == constm1_rtx); return "dec{b}\t%h0"; } default: return "add{b}\t{%2, %h0|%h0, %2}"; } } "reload_completed && !rtx_equal_p (operands[0], operands[1])" [(set (zero_extract:SWI248 (match_dup 0) (const_int 8) (const_int 8)) (zero_extract:SWI248 (match_dup 1) (const_int 8) (const_int 8))) (parallel [(set (zero_extract:SWI248 (match_dup 0) (const_int 8) (const_int 8)) (subreg:SWI248 (plus:QI (subreg:QI (match_op_dup 3 [(match_dup 0) (const_int 8) (const_int 8)]) 0) (match_dup 2)) 0)) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "addr" "gpr8") (set (attr "type") (if_then_else (match_operand:QI 2 "incdec_operand") (const_string "incdec") (const_string "alu"))) (set_attr "mode" "QI")]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. (define_insn_and_split "*qi_ext_2" [(set (zero_extract:SWI248 (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 (plusminus:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0) (subreg:QI (match_operator:SWI248 4 "extract_operator" [(match_operand 2 "int248_register_operand" "Q,Q") (const_int 8) (const_int 8)]) 0)) 0)) (clobber (reg:CC FLAGS_REG))] "" "@ {b}\t{%h2, %h0|%h0, %h2} #" "reload_completed && !(rtx_equal_p (operands[0], operands[1]) || ( == PLUS && rtx_equal_p (operands[0], operands[2])))" [(set (zero_extract:SWI248 (match_dup 0) (const_int 8) (const_int 8)) (zero_extract:SWI248 (match_dup 1) (const_int 8) (const_int 8))) (parallel [(set (zero_extract:SWI248 (match_dup 0) (const_int 8) (const_int 8)) (subreg:SWI248 (plusminus:QI (subreg:QI (match_op_dup 3 [(match_dup 0) (const_int 8) (const_int 8)]) 0) (subreg:QI (match_op_dup 4 [(match_dup 2) (const_int 8) (const_int 8)]) 0)) 0)) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "alu") (set_attr "mode" "QI")]) ;; Like DWI, but use POImode instead of OImode. (define_mode_attr DPWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI") (TI "POI")]) ;; Add with jump on overflow. (define_expand "addv4" [(parallel [(set (reg:CCO FLAGS_REG) (eq:CCO (plus: (sign_extend: (match_operand:SWIDWI 1 "nonimmediate_operand")) (match_dup 4)) (sign_extend: (plus:SWIDWI (match_dup 1) (match_operand:SWIDWI 2 ""))))) (set (match_operand:SWIDWI 0 "register_operand") (plus:SWIDWI (match_dup 1) (match_dup 2)))]) (set (pc) (if_then_else (eq (reg:CCO FLAGS_REG) (const_int 0)) (label_ref (match_operand 3)) (pc)))] "" { ix86_fixup_binary_operands_no_copy (PLUS, mode, operands); if (CONST_SCALAR_INT_P (operands[2])) operands[4] = operands[2]; else operands[4] = gen_rtx_SIGN_EXTEND (mode, operands[2]); }) (define_insn "*addv4" [(set (reg:CCO FLAGS_REG) (eq:CCO (plus: (sign_extend: (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r")) (sign_extend: (match_operand:SWI 2 "" "We,m,rWe,m"))) (sign_extend: (plus:SWI (match_dup 1) (match_dup 2))))) (set (match_operand:SWI 0 "nonimmediate_operand" "=m,,r,r") (plus:SWI (match_dup 1) (match_dup 2)))] "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD)" "@ add{}\t{%2, %0|%0, %2} add{}\t{%2, %0|%0, %2} add{}\t{%2, %1, %0|%0, %1, %2} add{}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "*,*,apx_ndd,apx_ndd") (set_attr "type" "alu") (set_attr "mode" "")]) (define_insn "addv4_1" [(set (reg:CCO FLAGS_REG) (eq:CCO (plus: (sign_extend: (match_operand:SWI 1 "nonimmediate_operand" "0,rm")) (match_operand: 3 "const_int_operand")) (sign_extend: (plus:SWI (match_dup 1) (match_operand:SWI 2 "x86_64_immediate_operand" ","))))) (set (match_operand:SWI 0 "nonimmediate_operand" "=m,r") (plus:SWI (match_dup 1) (match_dup 2)))] "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD) && CONST_INT_P (operands[2]) && INTVAL (operands[2]) == INTVAL (operands[3])" "@ add{}\t{%2, %0|%0, %2} add{}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "*,apx_ndd") (set_attr "type" "alu") (set_attr "mode" "") (set (attr "length_immediate") (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)") (const_string "1") (match_test " == 8") (const_string "4")] (const_string "")))]) ;; Quad word integer modes as mode attribute. (define_mode_attr QPWI [(SI "TI") (DI "POI")]) (define_insn_and_split "*addv4_doubleword" [(set (reg:CCO FLAGS_REG) (eq:CCO (plus: (sign_extend: (match_operand: 1 "nonimmediate_operand" "%0,0,ro,r")) (sign_extend: (match_operand: 2 "nonimmediate_operand" "r,o,r,o"))) (sign_extend: (plus: (match_dup 1) (match_dup 2))))) (set (match_operand: 0 "nonimmediate_operand" "=ro,r,&r,&r") (plus: (match_dup 1) (match_dup 2)))] "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD)" "#" "&& reload_completed" [(parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:DWIH (match_dup 1) (match_dup 2)) (match_dup 1))) (set (match_dup 0) (plus:DWIH (match_dup 1) (match_dup 2)))]) (parallel [(set (reg:CCO FLAGS_REG) (eq:CCO (plus: (plus: (ltu: (reg:CC FLAGS_REG) (const_int 0)) (sign_extend: (match_dup 4))) (sign_extend: (match_dup 5))) (sign_extend: (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) (match_dup 4)) (match_dup 5))))) (set (match_dup 3) (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) (match_dup 4)) (match_dup 5)))])] { split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]); } [(set_attr "isa" "*,*,apx_ndd,apx_ndd")]) (define_insn_and_split "*addv4_doubleword_1" [(set (reg:CCO FLAGS_REG) (eq:CCO (plus: (sign_extend: (match_operand: 1 "nonimmediate_operand" "%0,rjM")) (match_operand: 3 "const_scalar_int_operand" "n,n")) (sign_extend: (plus: (match_dup 1) (match_operand: 2 "x86_64_hilo_general_operand" ","))))) (set (match_operand: 0 "nonimmediate_operand" "=ro,&r") (plus: (match_dup 1) (match_dup 2)))] "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD) && CONST_SCALAR_INT_P (operands[2]) && rtx_equal_p (operands[2], operands[3])" "#" "&& reload_completed" [(parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:DWIH (match_dup 1) (match_dup 2)) (match_dup 1))) (set (match_dup 0) (plus:DWIH (match_dup 1) (match_dup 2)))]) (parallel [(set (reg:CCO FLAGS_REG) (eq:CCO (plus: (plus: (ltu: (reg:CC FLAGS_REG) (const_int 0)) (sign_extend: (match_dup 4))) (match_dup 5)) (sign_extend: (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) (match_dup 4)) (match_dup 5))))) (set (match_dup 3) (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) (match_dup 4)) (match_dup 5)))])] { split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]); if (operands[2] == const0_rtx) { if (!rtx_equal_p (operands[0], operands[1])) emit_move_insn (operands[0], operands[1]); emit_insn (gen_addv4_1 (operands[3], operands[4], operands[5], operands[5])); DONE; } } [(set_attr "isa" "*,apx_ndd")]) (define_insn "*addv4_overflow_1" [(set (reg:CCO FLAGS_REG) (eq:CCO (plus: (plus: (match_operator: 4 "ix86_carry_flag_operator" [(match_operand 3 "flags_reg_operand") (const_int 0)]) (sign_extend: (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r"))) (sign_extend: (match_operand:SWI 2 "" "rWe,m,rWe,m"))) (sign_extend: (plus:SWI (plus:SWI (match_operator:SWI 5 "ix86_carry_flag_operator" [(match_dup 3) (const_int 0)]) (match_dup 1)) (match_dup 2))))) (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r,r,r") (plus:SWI (plus:SWI (match_op_dup 5 [(match_dup 3) (const_int 0)]) (match_dup 1)) (match_dup 2)))] "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD)" "@ adc{}\t{%2, %0|%0, %2} adc{}\t{%2, %0|%0, %2} adc{}\t{%2, %1, %0|%0, %1, %2} adc{}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "*,*,apx_ndd,apx_ndd") (set_attr "type" "alu") (set_attr "mode" "")]) (define_insn "*addv4_overflow_2" [(set (reg:CCO FLAGS_REG) (eq:CCO (plus: (plus: (match_operator: 4 "ix86_carry_flag_operator" [(match_operand 3 "flags_reg_operand") (const_int 0)]) (sign_extend: (match_operand:SWI 1 "nonimmediate_operand" "%0,rm"))) (match_operand: 6 "const_int_operand" "n,n")) (sign_extend: (plus:SWI (plus:SWI (match_operator:SWI 5 "ix86_carry_flag_operator" [(match_dup 3) (const_int 0)]) (match_dup 1)) (match_operand:SWI 2 "x86_64_immediate_operand" "e,e"))))) (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r") (plus:SWI (plus:SWI (match_op_dup 5 [(match_dup 3) (const_int 0)]) (match_dup 1)) (match_dup 2)))] "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD) && CONST_INT_P (operands[2]) && INTVAL (operands[2]) == INTVAL (operands[6])" "@ adc{}\t{%2, %0|%0, %2} adc{}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "*,apx_ndd") (set_attr "type" "alu") (set_attr "mode" "") (set (attr "length_immediate") (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)") (const_string "1") (const_string "4")))]) (define_expand "uaddv4" [(parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:SWIDWI (match_operand:SWIDWI 1 "nonimmediate_operand") (match_operand:SWIDWI 2 "")) (match_dup 1))) (set (match_operand:SWIDWI 0 "register_operand") (plus:SWIDWI (match_dup 1) (match_dup 2)))]) (set (pc) (if_then_else (ltu (reg:CCC FLAGS_REG) (const_int 0)) (label_ref (match_operand 3)) (pc)))] "" "ix86_fixup_binary_operands_no_copy (PLUS, mode, operands);") ;; The lea patterns for modes less than 32 bits need to be matched by ;; several insns converted to real lea by splitters. (define_insn_and_split "*lea_general_1" [(set (match_operand:SWI12 0 "register_operand" "=r") (plus:SWI12 (plus:SWI12 (match_operand:SWI12 1 "register_no_SP_operand" "l") (match_operand:SWI12 2 "register_operand" "r")) (match_operand:SWI12 3 "immediate_operand" "i")))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "#" "&& reload_completed" [(set (match_dup 0) (plus:SI (plus:SI (match_dup 1) (match_dup 2)) (match_dup 3)))] { operands[0] = gen_lowpart (SImode, operands[0]); operands[1] = gen_lowpart (SImode, operands[1]); operands[2] = gen_lowpart (SImode, operands[2]); operands[3] = gen_lowpart (SImode, operands[3]); } [(set_attr "type" "lea") (set_attr "mode" "SI")]) (define_insn_and_split "*lea_general_2" [(set (match_operand:SWI12 0 "register_operand" "=r") (plus:SWI12 (mult:SWI12 (match_operand:SWI12 1 "register_no_SP_operand" "l") (match_operand 2 "const248_operand" "n")) (match_operand:SWI12 3 "nonmemory_operand" "ri")))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "#" "&& reload_completed" [(set (match_dup 0) (plus:SI (mult:SI (match_dup 1) (match_dup 2)) (match_dup 3)))] { operands[0] = gen_lowpart (SImode, operands[0]); operands[1] = gen_lowpart (SImode, operands[1]); operands[3] = gen_lowpart (SImode, operands[3]); } [(set_attr "type" "lea") (set_attr "mode" "SI")]) (define_insn_and_split "*lea_general_2b" [(set (match_operand:SWI12 0 "register_operand" "=r") (plus:SWI12 (ashift:SWI12 (match_operand:SWI12 1 "register_no_SP_operand" "l") (match_operand 2 "const123_operand" "n")) (match_operand:SWI12 3 "nonmemory_operand" "ri")))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "#" "&& reload_completed" [(set (match_dup 0) (plus:SI (ashift:SI (match_dup 1) (match_dup 2)) (match_dup 3)))] { operands[0] = gen_lowpart (SImode, operands[0]); operands[1] = gen_lowpart (SImode, operands[1]); operands[3] = gen_lowpart (SImode, operands[3]); } [(set_attr "type" "lea") (set_attr "mode" "SI")]) (define_insn_and_split "*lea_general_3" [(set (match_operand:SWI12 0 "register_operand" "=r") (plus:SWI12 (plus:SWI12 (mult:SWI12 (match_operand:SWI12 1 "register_no_SP_operand" "l") (match_operand 2 "const248_operand" "n")) (match_operand:SWI12 3 "register_operand" "r")) (match_operand:SWI12 4 "immediate_operand" "i")))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "#" "&& reload_completed" [(set (match_dup 0) (plus:SI (plus:SI (mult:SI (match_dup 1) (match_dup 2)) (match_dup 3)) (match_dup 4)))] { operands[0] = gen_lowpart (SImode, operands[0]); operands[1] = gen_lowpart (SImode, operands[1]); operands[3] = gen_lowpart (SImode, operands[3]); operands[4] = gen_lowpart (SImode, operands[4]); } [(set_attr "type" "lea") (set_attr "mode" "SI")]) (define_insn_and_split "*lea_general_3b" [(set (match_operand:SWI12 0 "register_operand" "=r") (plus:SWI12 (plus:SWI12 (ashift:SWI12 (match_operand:SWI12 1 "register_no_SP_operand" "l") (match_operand 2 "const123_operand" "n")) (match_operand:SWI12 3 "register_operand" "r")) (match_operand:SWI12 4 "immediate_operand" "i")))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "#" "&& reload_completed" [(set (match_dup 0) (plus:SI (plus:SI (ashift:SI (match_dup 1) (match_dup 2)) (match_dup 3)) (match_dup 4)))] { operands[0] = gen_lowpart (SImode, operands[0]); operands[1] = gen_lowpart (SImode, operands[1]); operands[3] = gen_lowpart (SImode, operands[3]); operands[4] = gen_lowpart (SImode, operands[4]); } [(set_attr "type" "lea") (set_attr "mode" "SI")]) (define_insn_and_split "*lea_general_4" [(set (match_operand:SWI12 0 "register_operand" "=r") (any_or:SWI12 (ashift:SWI12 (match_operand:SWI12 1 "register_no_SP_operand" "l") (match_operand 2 "const_0_to_3_operand")) (match_operand 3 "const_int_operand")))] "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) && ((unsigned HOST_WIDE_INT) INTVAL (operands[3]) < (HOST_WIDE_INT_1U << INTVAL (operands[2])))" "#" "&& reload_completed" [(set (match_dup 0) (plus:SI (mult:SI (match_dup 1) (match_dup 2)) (match_dup 3)))] { operands[0] = gen_lowpart (SImode, operands[0]); operands[1] = gen_lowpart (SImode, operands[1]); operands[2] = GEN_INT (1 << INTVAL (operands[2])); } [(set_attr "type" "lea") (set_attr "mode" "SI")]) (define_insn_and_split "*lea_general_4" [(set (match_operand:SWI48 0 "register_operand" "=r") (any_or:SWI48 (ashift:SWI48 (match_operand:SWI48 1 "register_no_SP_operand" "l") (match_operand 2 "const_0_to_3_operand")) (match_operand 3 "const_int_operand")))] "(unsigned HOST_WIDE_INT) INTVAL (operands[3]) < (HOST_WIDE_INT_1U << INTVAL (operands[2]))" "#" "&& reload_completed" [(set (match_dup 0) (plus:SWI48 (mult:SWI48 (match_dup 1) (match_dup 2)) (match_dup 3)))] "operands[2] = GEN_INT (1 << INTVAL (operands[2]));" [(set_attr "type" "lea") (set_attr "mode" "")]) ;; Subtract instructions (define_expand "sub3" [(set (match_operand:SDWIM 0 "nonimmediate_operand") (minus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand") (match_operand:SDWIM 2 "")))] "" { ix86_expand_binary_operator (MINUS, mode, operands, TARGET_APX_NDD); DONE; }) (define_insn_and_split "*sub3_doubleword" [(set (match_operand: 0 "nonimmediate_operand" "=ro,r,&r,&r") (minus: (match_operand: 1 "nonimmediate_operand" "0,0,ro,r") (match_operand: 2 "x86_64_hilo_general_operand" "r,o,r,o"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (MINUS, mode, operands, TARGET_APX_NDD)" "#" "&& reload_completed" [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2))) (set (match_dup 0) (minus:DWIH (match_dup 1) (match_dup 2)))]) (parallel [(set (match_dup 3) (minus:DWIH (minus:DWIH (match_dup 4) (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))) (match_dup 5))) (clobber (reg:CC FLAGS_REG))])] { split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]); if (operands[2] == const0_rtx) { if (!rtx_equal_p (operands[0], operands[1])) emit_move_insn (operands[0], operands[1]); ix86_expand_binary_operator (MINUS, mode, &operands[3], TARGET_APX_NDD); DONE; } } [(set_attr "isa" "*,*,apx_ndd,apx_ndd")]) (define_insn_and_split "*sub3_doubleword_zext" [(set (match_operand: 0 "nonimmediate_operand" "=r,o,&r,&r") (minus: (match_operand: 1 "nonimmediate_operand" "0,0,r,o") (zero_extend: (match_operand:DWIH 2 "nonimmediate_operand" "rm,r,rm,r")))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (UNKNOWN, mode, operands, TARGET_APX_NDD)" "#" "&& reload_completed" [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2))) (set (match_dup 0) (minus:DWIH (match_dup 1) (match_dup 2)))]) (parallel [(set (match_dup 3) (minus:DWIH (minus:DWIH (match_dup 4) (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))) (const_int 0))) (clobber (reg:CC FLAGS_REG))])] "split_double_mode (mode, &operands[0], 2, &operands[0], &operands[3]);" [(set_attr "isa" "*,*,apx_ndd,apx_ndd")]) (define_insn "*sub_1" [(set (match_operand:SWI 0 "nonimmediate_operand" "=m,,r,r,r") (minus:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,rjM,r") (match_operand:SWI 2 "" ",,r,,"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (MINUS, mode, operands, TARGET_APX_NDD)" "@ sub{}\t{%2, %0|%0, %2} sub{}\t{%2, %0|%0, %2} sub{}\t{%2, %1, %0|%0, %1, %2} sub{}\t{%2, %1, %0|%0, %1, %2} sub{}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd") (set_attr "type" "alu") (set_attr "mode" "")]) (define_insn "*subsi_1_zext" [(set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,r,rm") (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands, TARGET_APX_NDD)" "@ sub{l}\t{%2, %k0|%k0, %2} sub{l}\t{%2, %1, %k0|%k0, %1, %2} sub{l}\t{%2, %1, %k0|%k0, %1, %2}" [(set_attr "isa" "*,apx_ndd,apx_ndd") (set_attr "type" "alu") (set_attr "mode" "SI")]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. (define_insn_and_split "*sub_1_slp" [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+,&")) (minus:SWI12 (match_operand:SWI12 1 "register_operand" "0,!") (match_operand:SWI12 2 "general_operand" "mn,mn"))) (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "@ sub{}\t{%2, %0|%0, %2} #" "&& reload_completed && !(rtx_equal_p (operands[0], operands[1]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) (minus:SWI12 (match_dup 0) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "alu") (set_attr "mode" "")]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. (define_insn_and_split "*subqi_ext_1_slp" [(set (strict_low_part (match_operand:QI 0 "register_operand" "+Q,&Q")) (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,!qm") (subreg:QI (match_operator:SWI248 3 "extract_operator" [(match_operand 2 "int248_register_operand" "Q,Q") (const_int 8) (const_int 8)]) 0))) (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "@ sub{b}\t{%h2, %0|%0, %h2} #" "&& reload_completed && !rtx_equal_p (operands[0], operands[1])" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) (minus:QI (match_dup 0) (subreg:QI (match_op_dup 3 [(match_dup 2) (const_int 8) (const_int 8)]) 0))) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "alu") (set_attr "mode" "QI")]) (define_insn_and_split "*subqi_ext_2_slp" [(set (strict_low_part (match_operand:QI 0 "register_operand" "+&Q")) (minus:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" [(match_operand 1 "int248_register_operand" "Q") (const_int 8) (const_int 8)]) 0) (subreg:QI (match_operator:SWI248 4 "extract_operator" [(match_operand 2 "int248_register_operand" "Q") (const_int 8) (const_int 8)]) 0))) (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "#" "&& reload_completed" [(set (strict_low_part (match_dup 0)) (subreg:QI (match_op_dup 3 [(match_dup 1) (const_int 8) (const_int 8)]) 0)) (parallel [(set (strict_low_part (match_dup 0)) (minus:QI (match_dup 0) (subreg:QI (match_op_dup 4 [(match_dup 2) (const_int 8) (const_int 8)]) 0))) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "alu") (set_attr "mode" "QI")]) (define_insn "*sub_2" [(set (reg FLAGS_REG) (compare (minus:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r") (match_operand:SWI 2 "" ",,r,")) (const_int 0))) (set (match_operand:SWI 0 "nonimmediate_operand" "=m,,r,r") (minus:SWI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (MINUS, mode, operands, TARGET_APX_NDD)" "@ sub{}\t{%2, %0|%0, %2} sub{}\t{%2, %0|%0, %2} sub{}\t{%2, %1, %0|%0, %1, %2} sub{}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "*,*,apx_ndd,apx_ndd") (set_attr "type" "alu") (set_attr "mode" "")]) (define_insn "*subsi_2_zext" [(set (reg FLAGS_REG) (compare (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,r,rm") (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI (minus:SI (match_dup 1) (match_dup 2))))] "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (MINUS, SImode, operands, TARGET_APX_NDD)" "@ sub{l}\t{%2, %k0|%k0, %2} sub{l}\t{%2, %1, %k0|%k0, %1, %2} sub{l}\t{%2, %1, %k0|%k0, %1, %2}" [(set_attr "isa" "*,apx_ndd,apx_ndd") (set_attr "type" "alu") (set_attr "mode" "SI")]) (define_insn "*subqi_ext_0" [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn") (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0") (subreg:QI (match_operator:SWI248 3 "extract_operator" [(match_operand 2 "int248_register_operand" "Q") (const_int 8) (const_int 8)]) 0))) (clobber (reg:CC FLAGS_REG))] "" "sub{b}\t{%h2, %0|%0, %h2}" [(set_attr "addr" "gpr8") (set_attr "type" "alu") (set_attr "mode" "QI")]) (define_insn_and_split "*subqi_ext2_0" [(set (match_operand:QI 0 "register_operand" "=&Q") (minus:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" [(match_operand 1 "int248_register_operand" "Q") (const_int 8) (const_int 8)]) 0) (subreg:QI (match_operator:SWI248 4 "extract_operator" [(match_operand 2 "int248_register_operand" "Q") (const_int 8) (const_int 8)]) 0))) (clobber (reg:CC FLAGS_REG))] "" "#" "&& reload_completed" [(set (match_dup 0) (subreg:QI (match_op_dup 3 [(match_dup 1) (const_int 8) (const_int 8)]) 0)) (parallel [(set (match_dup 0) (minus:QI (match_dup 0) (subreg:QI (match_op_dup 4 [(match_dup 2) (const_int 8) (const_int 8)]) 0))) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "alu") (set_attr "mode" "QI")]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. (define_insn_and_split "*subqi_ext_1" [(set (zero_extract:SWI248 (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 (minus:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0) (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0)) (clobber (reg:CC FLAGS_REG))] "" "@ sub{b}\t{%2, %h0|%h0, %2} #" "reload_completed && !(rtx_equal_p (operands[0], operands[1]))" [(set (zero_extract:SWI248 (match_dup 0) (const_int 8) (const_int 8)) (zero_extract:SWI248 (match_dup 1) (const_int 8) (const_int 8))) (parallel [(set (zero_extract:SWI248 (match_dup 0) (const_int 8) (const_int 8)) (subreg:SWI248 (minus:QI (subreg:QI (match_op_dup 3 [(match_dup 0) (const_int 8) (const_int 8)]) 0) (match_dup 2)) 0)) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "addr" "gpr8") (set_attr "type" "alu") (set_attr "mode" "QI")]) ;; Subtract with jump on overflow. (define_expand "subv4" [(parallel [(set (reg:CCO FLAGS_REG) (eq:CCO (minus: (sign_extend: (match_operand:SWIDWI 1 "nonimmediate_operand")) (match_dup 4)) (sign_extend: (minus:SWIDWI (match_dup 1) (match_operand:SWIDWI 2 ""))))) (set (match_operand:SWIDWI 0 "register_operand") (minus:SWIDWI (match_dup 1) (match_dup 2)))]) (set (pc) (if_then_else (eq (reg:CCO FLAGS_REG) (const_int 0)) (label_ref (match_operand 3)) (pc)))] "" { ix86_fixup_binary_operands_no_copy (MINUS, mode, operands, TARGET_APX_NDD); if (CONST_SCALAR_INT_P (operands[2])) operands[4] = operands[2]; else operands[4] = gen_rtx_SIGN_EXTEND (mode, operands[2]); }) (define_insn "*subv4" [(set (reg:CCO FLAGS_REG) (eq:CCO (minus: (sign_extend: (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r")) (sign_extend: (match_operand:SWI 2 "" "We,m,rWe,m"))) (sign_extend: (minus:SWI (match_dup 1) (match_dup 2))))) (set (match_operand:SWI 0 "nonimmediate_operand" "=m,,r,r") (minus:SWI (match_dup 1) (match_dup 2)))] "ix86_binary_operator_ok (MINUS, mode, operands, TARGET_APX_NDD)" "@ sub{}\t{%2, %0|%0, %2} sub{}\t{%2, %0|%0, %2} sub{}\t{%2, %1, %0|%0, %1, %2} sub{}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "*,*,apx_ndd,apx_ndd") (set_attr "type" "alu") (set_attr "mode" "")]) (define_insn "subv4_1" [(set (reg:CCO FLAGS_REG) (eq:CCO (minus: (sign_extend: (match_operand:SWI 1 "nonimmediate_operand" "0,rm")) (match_operand: 3 "const_int_operand")) (sign_extend: (minus:SWI (match_dup 1) (match_operand:SWI 2 "x86_64_immediate_operand" ","))))) (set (match_operand:SWI 0 "nonimmediate_operand" "=m,r") (minus:SWI (match_dup 1) (match_dup 2)))] "ix86_binary_operator_ok (MINUS, mode, operands, TARGET_APX_NDD) && CONST_INT_P (operands[2]) && INTVAL (operands[2]) == INTVAL (operands[3])" "@ sub{}\t{%2, %0|%0, %2} sub{}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "*,apx_ndd") (set_attr "type" "alu") (set_attr "mode" "") (set (attr "length_immediate") (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)") (const_string "1") (match_test " == 8") (const_string "4")] (const_string "")))]) (define_insn_and_split "*subv4_doubleword" [(set (reg:CCO FLAGS_REG) (eq:CCO (minus: (sign_extend: (match_operand: 1 "nonimmediate_operand" "0,0,ro,r")) (sign_extend: (match_operand: 2 "nonimmediate_operand" "r,o,r,o"))) (sign_extend: (minus: (match_dup 1) (match_dup 2))))) (set (match_operand: 0 "nonimmediate_operand" "=ro,r,&r,&r") (minus: (match_dup 1) (match_dup 2)))] "ix86_binary_operator_ok (MINUS, mode, operands, TARGET_APX_NDD)" "#" "&& reload_completed" [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2))) (set (match_dup 0) (minus:DWIH (match_dup 1) (match_dup 2)))]) (parallel [(set (reg:CCO FLAGS_REG) (eq:CCO (minus: (minus: (sign_extend: (match_dup 4)) (ltu: (reg:CC FLAGS_REG) (const_int 0))) (sign_extend: (match_dup 5))) (sign_extend: (minus:DWIH (minus:DWIH (match_dup 4) (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))) (match_dup 5))))) (set (match_dup 3) (minus:DWIH (minus:DWIH (match_dup 4) (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))) (match_dup 5)))])] { split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]); } [(set_attr "isa" "*,*,apx_ndd,apx_ndd")]) (define_insn_and_split "*subv4_doubleword_1" [(set (reg:CCO FLAGS_REG) (eq:CCO (minus: (sign_extend: (match_operand: 1 "nonimmediate_operand" "0,ro")) (match_operand: 3 "const_scalar_int_operand")) (sign_extend: (minus: (match_dup 1) (match_operand: 2 "x86_64_hilo_general_operand" ","))))) (set (match_operand: 0 "nonimmediate_operand" "=ro,&r") (minus: (match_dup 1) (match_dup 2)))] "ix86_binary_operator_ok (MINUS, mode, operands, TARGET_APX_NDD) && CONST_SCALAR_INT_P (operands[2]) && rtx_equal_p (operands[2], operands[3])" "#" "&& reload_completed" [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2))) (set (match_dup 0) (minus:DWIH (match_dup 1) (match_dup 2)))]) (parallel [(set (reg:CCO FLAGS_REG) (eq:CCO (minus: (minus: (sign_extend: (match_dup 4)) (ltu: (reg:CC FLAGS_REG) (const_int 0))) (match_dup 5)) (sign_extend: (minus:DWIH (minus:DWIH (match_dup 4) (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))) (match_dup 5))))) (set (match_dup 3) (minus:DWIH (minus:DWIH (match_dup 4) (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))) (match_dup 5)))])] { split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]); if (operands[2] == const0_rtx) { if (!rtx_equal_p (operands[0], operands[1])) emit_move_insn (operands[0], operands[1]); emit_insn (gen_subv4_1 (operands[3], operands[4], operands[5], operands[5])); DONE; } } [(set_attr "isa" "*,apx_ndd")]) (define_insn "*subv4_overflow_1" [(set (reg:CCO FLAGS_REG) (eq:CCO (minus: (minus: (sign_extend: (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r")) (match_operator: 4 "ix86_carry_flag_operator" [(match_operand 3 "flags_reg_operand") (const_int 0)])) (sign_extend: (match_operand:SWI 2 "" "rWe,m,rWe,m"))) (sign_extend: (minus:SWI (minus:SWI (match_dup 1) (match_operator:SWI 5 "ix86_carry_flag_operator" [(match_dup 3) (const_int 0)])) (match_dup 2))))) (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r,r,r") (minus:SWI (minus:SWI (match_dup 1) (match_op_dup 5 [(match_dup 3) (const_int 0)])) (match_dup 2)))] "ix86_binary_operator_ok (MINUS, mode, operands, TARGET_APX_NDD)" "@ sbb{}\t{%2, %0|%0, %2} sbb{}\t{%2, %0|%0, %2} sbb{}\t{%2, %1, %0|%0, %1, %2} sbb{}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "*,*,apx_ndd,apx_ndd") (set_attr "type" "alu") (set_attr "mode" "")]) (define_insn "*subv4_overflow_2" [(set (reg:CCO FLAGS_REG) (eq:CCO (minus: (minus: (sign_extend: (match_operand:SWI 1 "nonimmediate_operand" "%0,rm")) (match_operator: 4 "ix86_carry_flag_operator" [(match_operand 3 "flags_reg_operand") (const_int 0)])) (match_operand: 6 "const_int_operand" "n,n")) (sign_extend: (minus:SWI (minus:SWI (match_dup 1) (match_operator:SWI 5 "ix86_carry_flag_operator" [(match_dup 3) (const_int 0)])) (match_operand:SWI 2 "x86_64_immediate_operand" "e,e"))))) (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r") (minus:SWI (minus:SWI (match_dup 1) (match_op_dup 5 [(match_dup 3) (const_int 0)])) (match_dup 2)))] "ix86_binary_operator_ok (MINUS, mode, operands, TARGET_APX_NDD) && CONST_INT_P (operands[2]) && INTVAL (operands[2]) == INTVAL (operands[6])" "@ sbb{}\t{%2, %0|%0, %2} sbb{}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "*,apx_ndd") (set_attr "type" "alu") (set_attr "mode" "") (set (attr "length_immediate") (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)") (const_string "1") (const_string "4")))]) (define_expand "usubv4" [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:SWI 1 "nonimmediate_operand") (match_operand:SWI 2 ""))) (set (match_operand:SWI 0 "register_operand") (minus:SWI (match_dup 1) (match_dup 2)))]) (set (pc) (if_then_else (ltu (reg:CC FLAGS_REG) (const_int 0)) (label_ref (match_operand 3)) (pc)))] "" "ix86_fixup_binary_operands_no_copy (MINUS, mode, operands, TARGET_APX_NDD);") (define_insn "*sub_3" [(set (reg FLAGS_REG) (compare (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r") (match_operand:SWI 2 "" ",,r,"))) (set (match_operand:SWI 0 "nonimmediate_operand" "=m,i,r,r") (minus:SWI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCmode) && ix86_binary_operator_ok (MINUS, mode, operands, TARGET_APX_NDD)" "@ sub{}\t{%2, %0|%0, %2} sub{}\t{%2, %0|%0, %2} sub{}\t{%2, %1, %0|%0, %1, %2} sub{}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "*,*,apx_ndd,apx_ndd") (set_attr "type" "alu") (set_attr "mode" "")]) (define_peephole2 [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:SWI 0 "general_reg_operand") (match_operand:SWI 1 "general_gr_operand"))) (set (match_dup 0) (minus:SWI (match_dup 0) (match_dup 1)))])] "find_regno_note (peep2_next_insn (0), REG_UNUSED, REGNO (operands[0])) != 0" [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 0) (match_dup 1)))]) (define_peephole2 [(set (match_operand:SWI 0 "general_reg_operand") (match_operand:SWI 1 "memory_operand")) (parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 0) (match_operand:SWI 2 "memory_operand"))) (set (match_dup 0) (minus:SWI (match_dup 0) (match_dup 2)))]) (set (match_dup 1) (match_dup 0))] "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) && peep2_reg_dead_p (3, operands[0]) && !reg_overlap_mentioned_p (operands[0], operands[1]) && !reg_overlap_mentioned_p (operands[0], operands[2])" [(set (match_dup 0) (match_dup 2)) (parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 0))) (set (match_dup 1) (minus:SWI (match_dup 1) (match_dup 0)))])]) ;; decl %eax; cmpl $-1, %eax; jne .Lxx; can be optimized into ;; subl $1, %eax; jnc .Lxx; (define_peephole2 [(parallel [(set (match_operand:SWI 0 "general_reg_operand") (plus:SWI (match_dup 0) (const_int -1))) (clobber (reg FLAGS_REG))]) (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 0) (const_int -1))) (set (pc) (if_then_else (match_operator 1 "bt_comparison_operator" [(reg:CCZ FLAGS_REG) (const_int 0)]) (match_operand 2) (pc)))] "peep2_regno_dead_p (3, FLAGS_REG)" [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 0) (const_int 1))) (set (match_dup 0) (minus:SWI (match_dup 0) (const_int 1)))]) (set (pc) (if_then_else (match_dup 3) (match_dup 2) (pc)))] { rtx cc = gen_rtx_REG (CCmode, FLAGS_REG); operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[1]) == NE ? GEU : LTU, VOIDmode, cc, const0_rtx); }) ;; Help combine use borrow flag to test for -1 after dec (add $-1). (define_insn_and_split "*dec_cmov" [(set (match_operand:SWI248 0 "register_operand" "=r") (if_then_else:SWI248 (match_operator 1 "bt_comparison_operator" [(match_operand:SWI248 2 "register_operand" "0") (const_int 0)]) (plus:SWI248 (match_dup 2) (const_int -1)) (match_operand:SWI248 3 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] "TARGET_CMOVE" "#" "&& reload_completed" [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 2) (const_int 1))) (set (match_dup 0) (minus:SWI248 (match_dup 2) (const_int 1)))]) (set (match_dup 0) (if_then_else:SWI248 (match_dup 4) (match_dup 0) (match_dup 3)))] { rtx cc = gen_rtx_REG (CCCmode, FLAGS_REG); operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[1]) == NE ? GEU : LTU, VOIDmode, cc, const0_rtx); }) (define_insn "*subsi_3_zext" [(set (reg FLAGS_REG) (compare (match_operand:SI 1 "nonimmediate_operand" "0,r,rm") (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re"))) (set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI (minus:SI (match_dup 1) (match_dup 2))))] "TARGET_64BIT && ix86_match_ccmode (insn, CCmode) && ix86_binary_operator_ok (MINUS, SImode, operands, TARGET_APX_NDD)" "@ sub{l}\t{%2, %1|%1, %2} sub{l}\t{%2, %1, %k0|%k0, %1, %2} sub{l}\t{%2, %1, %k0|%k0, %1, %2}" [(set_attr "isa" "*,apx_ndd,apx_ndd") (set_attr "type" "alu") (set_attr "mode" "SI")]) ;; Add with carry and subtract with borrow (define_insn "@add3_carry" [(set (match_operand:SWI 0 "nonimmediate_operand" "=m,,r,r") (plus:SWI (plus:SWI (match_operator:SWI 4 "ix86_carry_flag_operator" [(match_operand 3 "flags_reg_operand") (const_int 0)]) (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r")) (match_operand:SWI 2 "" ",,r,"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD)" "@ adc{}\t{%2, %0|%0, %2} adc{}\t{%2, %0|%0, %2} adc{}\t{%2, %1, %0|%0, %1, %2} adc{}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "*,*,apx_ndd,apx_ndd") (set_attr "type" "alu") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "")]) (define_peephole2 [(set (match_operand:SWI 0 "general_reg_operand") (match_operand:SWI 1 "memory_operand")) (parallel [(set (match_dup 0) (plus:SWI (plus:SWI (match_operator:SWI 4 "ix86_carry_flag_operator" [(match_operand 3 "flags_reg_operand") (const_int 0)]) (match_dup 0)) (match_operand:SWI 2 "memory_operand"))) (clobber (reg:CC FLAGS_REG))]) (set (match_dup 1) (match_dup 0))] "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) && peep2_reg_dead_p (3, operands[0]) && !reg_overlap_mentioned_p (operands[0], operands[1]) && !reg_overlap_mentioned_p (operands[0], operands[2])" [(set (match_dup 0) (match_dup 2)) (parallel [(set (match_dup 1) (plus:SWI (plus:SWI (match_op_dup 4 [(match_dup 3) (const_int 0)]) (match_dup 1)) (match_dup 0))) (clobber (reg:CC FLAGS_REG))])]) (define_peephole2 [(set (match_operand:SWI 0 "general_reg_operand") (match_operand:SWI 1 "memory_operand")) (parallel [(set (match_dup 0) (plus:SWI (plus:SWI (match_operator:SWI 4 "ix86_carry_flag_operator" [(match_operand 3 "flags_reg_operand") (const_int 0)]) (match_dup 0)) (match_operand:SWI 2 "memory_operand"))) (clobber (reg:CC FLAGS_REG))]) (set (match_operand:SWI 5 "general_reg_operand") (match_dup 0)) (set (match_dup 1) (match_dup 5))] "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) && peep2_reg_dead_p (3, operands[0]) && peep2_reg_dead_p (4, operands[5]) && !reg_overlap_mentioned_p (operands[0], operands[1]) && !reg_overlap_mentioned_p (operands[0], operands[2]) && !reg_overlap_mentioned_p (operands[5], operands[1])" [(set (match_dup 0) (match_dup 2)) (parallel [(set (match_dup 1) (plus:SWI (plus:SWI (match_op_dup 4 [(match_dup 3) (const_int 0)]) (match_dup 1)) (match_dup 0))) (clobber (reg:CC FLAGS_REG))])]) (define_insn "*add3_carry_0" [(set (match_operand:SWI 0 "nonimmediate_operand" "=m") (plus:SWI (match_operator:SWI 2 "ix86_carry_flag_operator" [(reg FLAGS_REG) (const_int 0)]) (match_operand:SWI 1 "nonimmediate_operand" "0"))) (clobber (reg:CC FLAGS_REG))] "!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[1])" "adc{}\t{$0, %0|%0, 0}" [(set_attr "type" "alu") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "")]) (define_insn "*add3_carry_0r" [(set (match_operand:SWI 0 "nonimmediate_operand" "=m") (plus:SWI (match_operator:SWI 2 "ix86_carry_flag_unset_operator" [(reg FLAGS_REG) (const_int 0)]) (match_operand:SWI 1 "nonimmediate_operand" "0"))) (clobber (reg:CC FLAGS_REG))] "!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[1])" "sbb{}\t{$-1, %0|%0, -1}" [(set_attr "type" "alu") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "")]) (define_insn "*addsi3_carry_zext" [(set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI (plus:SI (plus:SI (match_operator:SI 3 "ix86_carry_flag_operator" [(reg FLAGS_REG) (const_int 0)]) (match_operand:SI 1 "nonimmediate_operand" "%0,r,rm")) (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands, TARGET_APX_NDD)" "@ adc{l}\t{%2, %k0|%k0, %2} adc{l}\t{%2, %1, %k0|%k0, %1, %2} adc{l}\t{%2, %1, %k0|%k0, %1, %2}" [(set_attr "isa" "*,apx_ndd,apx_ndd") (set_attr "type" "alu") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "SI")]) (define_insn "*addsi3_carry_zext_0" [(set (match_operand:DI 0 "register_operand" "=r,r") (zero_extend:DI (plus:SI (match_operator:SI 2 "ix86_carry_flag_operator" [(reg FLAGS_REG) (const_int 0)]) (match_operand:SI 1 "nonimmediate_operand" "0,rm")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" "@ adc{l}\t{$0, %k0|%k0, 0} adc{l}\t{$0, %1, %k0|%k0, %1, 0}" [(set_attr "isa" "*,apx_ndd") (set_attr "type" "alu") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "SI")]) (define_insn "*addsi3_carry_zext_0r" [(set (match_operand:DI 0 "register_operand" "=r,r") (zero_extend:DI (plus:SI (match_operator:SI 2 "ix86_carry_flag_unset_operator" [(reg FLAGS_REG) (const_int 0)]) (match_operand:SI 1 "nonimmediate_operand" "0,rm")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" "@ sbb{l}\t{$-1, %k0|%k0, -1} sbb{l}\t{$-1, %1, %k0|%k0, %1, -1}" [(set_attr "isa" "*,apx_ndd") (set_attr "type" "alu") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "SI")]) ;; There is no point to generate ADCX instruction. ADC is shorter and faster. (define_insn "addcarry" [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extend: (plus:SWI48 (plus:SWI48 (match_operator:SWI48 5 "ix86_carry_flag_operator" [(match_operand 3 "flags_reg_operand") (const_int 0)]) (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,rm,r")) (match_operand:SWI48 2 "nonimmediate_operand" "r,rm,r,m"))) (plus: (zero_extend: (match_dup 2)) (match_operator: 4 "ix86_carry_flag_operator" [(match_dup 3) (const_int 0)])))) (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r") (plus:SWI48 (plus:SWI48 (match_op_dup 5 [(match_dup 3) (const_int 0)]) (match_dup 1)) (match_dup 2)))] "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD)" "@ adc{}\t{%2, %0|%0, %2} adc{}\t{%2, %0|%0, %2} adc{}\t{%2, %1, %0|%0, %1, %2} adc{}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "*,*,apx_ndd,apx_ndd") (set_attr "type" "alu") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "")]) (define_peephole2 [(parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extend: (plus:SWI48 (plus:SWI48 (match_operator:SWI48 4 "ix86_carry_flag_operator" [(match_operand 2 "flags_reg_operand") (const_int 0)]) (match_operand:SWI48 0 "general_reg_operand")) (match_operand:SWI48 1 "memory_operand"))) (plus: (zero_extend: (match_dup 1)) (match_operator: 3 "ix86_carry_flag_operator" [(match_dup 2) (const_int 0)])))) (set (match_dup 0) (plus:SWI48 (plus:SWI48 (match_op_dup 4 [(match_dup 2) (const_int 0)]) (match_dup 0)) (match_dup 1)))]) (set (match_dup 1) (match_dup 0))] "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) && peep2_reg_dead_p (2, operands[0]) && !reg_overlap_mentioned_p (operands[0], operands[1])" [(parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extend: (plus:SWI48 (plus:SWI48 (match_op_dup 4 [(match_dup 2) (const_int 0)]) (match_dup 1)) (match_dup 0))) (plus: (zero_extend: (match_dup 0)) (match_op_dup 3 [(match_dup 2) (const_int 0)])))) (set (match_dup 1) (plus:SWI48 (plus:SWI48 (match_op_dup 4 [(match_dup 2) (const_int 0)]) (match_dup 1)) (match_dup 0)))])]) (define_peephole2 [(set (match_operand:SWI48 0 "general_reg_operand") (match_operand:SWI48 1 "memory_operand")) (parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extend: (plus:SWI48 (plus:SWI48 (match_operator:SWI48 5 "ix86_carry_flag_operator" [(match_operand 3 "flags_reg_operand") (const_int 0)]) (match_dup 0)) (match_operand:SWI48 2 "memory_operand"))) (plus: (zero_extend: (match_dup 2)) (match_operator: 4 "ix86_carry_flag_operator" [(match_dup 3) (const_int 0)])))) (set (match_dup 0) (plus:SWI48 (plus:SWI48 (match_op_dup 5 [(match_dup 3) (const_int 0)]) (match_dup 0)) (match_dup 2)))]) (set (match_dup 1) (match_dup 0))] "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) && peep2_reg_dead_p (3, operands[0]) && !reg_overlap_mentioned_p (operands[0], operands[1]) && !reg_overlap_mentioned_p (operands[0], operands[2])" [(set (match_dup 0) (match_dup 2)) (parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extend: (plus:SWI48 (plus:SWI48 (match_op_dup 5 [(match_dup 3) (const_int 0)]) (match_dup 1)) (match_dup 0))) (plus: (zero_extend: (match_dup 0)) (match_op_dup 4 [(match_dup 3) (const_int 0)])))) (set (match_dup 1) (plus:SWI48 (plus:SWI48 (match_op_dup 5 [(match_dup 3) (const_int 0)]) (match_dup 1)) (match_dup 0)))])]) (define_peephole2 [(parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extend: (plus:SWI48 (plus:SWI48 (match_operator:SWI48 4 "ix86_carry_flag_operator" [(match_operand 2 "flags_reg_operand") (const_int 0)]) (match_operand:SWI48 0 "general_reg_operand")) (match_operand:SWI48 1 "memory_operand"))) (plus: (zero_extend: (match_dup 1)) (match_operator: 3 "ix86_carry_flag_operator" [(match_dup 2) (const_int 0)])))) (set (match_dup 0) (plus:SWI48 (plus:SWI48 (match_op_dup 4 [(match_dup 2) (const_int 0)]) (match_dup 0)) (match_dup 1)))]) (set (match_operand:QI 5 "general_reg_operand") (ltu:QI (reg:CCC FLAGS_REG) (const_int 0))) (set (match_operand:SWI48 6 "general_reg_operand") (zero_extend:SWI48 (match_dup 5))) (set (match_dup 1) (match_dup 0))] "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) && peep2_reg_dead_p (4, operands[0]) && !reg_overlap_mentioned_p (operands[0], operands[1]) && !reg_overlap_mentioned_p (operands[0], operands[5]) && !reg_overlap_mentioned_p (operands[5], operands[1]) && !reg_overlap_mentioned_p (operands[0], operands[6]) && !reg_overlap_mentioned_p (operands[6], operands[1])" [(parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extend: (plus:SWI48 (plus:SWI48 (match_op_dup 4 [(match_dup 2) (const_int 0)]) (match_dup 1)) (match_dup 0))) (plus: (zero_extend: (match_dup 0)) (match_op_dup 3 [(match_dup 2) (const_int 0)])))) (set (match_dup 1) (plus:SWI48 (plus:SWI48 (match_op_dup 4 [(match_dup 2) (const_int 0)]) (match_dup 1)) (match_dup 0)))]) (set (match_dup 5) (ltu:QI (reg:CCC FLAGS_REG) (const_int 0))) (set (match_dup 6) (zero_extend:SWI48 (match_dup 5)))]) (define_expand "addcarry_0" [(parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") (match_operand:SWI48 2 "x86_64_general_operand")) (match_dup 1))) (set (match_operand:SWI48 0 "nonimmediate_operand") (plus:SWI48 (match_dup 1) (match_dup 2)))])] "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD)") (define_insn "*addcarry_1" [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extend: (plus:SWI48 (plus:SWI48 (match_operator:SWI48 5 "ix86_carry_flag_operator" [(match_operand 3 "flags_reg_operand") (const_int 0)]) (match_operand:SWI48 1 "nonimmediate_operand" "%0,rm")) (match_operand:SWI48 2 "x86_64_immediate_operand" "e,e"))) (plus: (match_operand: 6 "const_scalar_int_operand") (match_operator: 4 "ix86_carry_flag_operator" [(match_dup 3) (const_int 0)])))) (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r") (plus:SWI48 (plus:SWI48 (match_op_dup 5 [(match_dup 3) (const_int 0)]) (match_dup 1)) (match_dup 2)))] "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD) && CONST_INT_P (operands[2]) /* Check that operands[6] is operands[2] zero extended from mode to mode. */ && ((mode == SImode || INTVAL (operands[2]) >= 0) ? (CONST_INT_P (operands[6]) && UINTVAL (operands[6]) == (UINTVAL (operands[2]) & GET_MODE_MASK (mode))) : (CONST_WIDE_INT_P (operands[6]) && CONST_WIDE_INT_NUNITS (operands[6]) == 2 && ((unsigned HOST_WIDE_INT) CONST_WIDE_INT_ELT (operands[6], 0) == UINTVAL (operands[2])) && CONST_WIDE_INT_ELT (operands[6], 1) == 0))" "@ adc{}\t{%2, %0|%0, %2} adc{}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "*,apx_ndd") (set_attr "type" "alu") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "") (set (attr "length_immediate") (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)") (const_string "1") (const_string "4")))]) (define_insn "@sub3_carry" [(set (match_operand:SWI 0 "nonimmediate_operand" "=m,,r,r") (minus:SWI (minus:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r") (match_operator:SWI 4 "ix86_carry_flag_operator" [(match_operand 3 "flags_reg_operand") (const_int 0)])) (match_operand:SWI 2 "" ",,r,"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (MINUS, mode, operands, TARGET_APX_NDD)" "@ sbb{}\t{%2, %0|%0, %2} sbb{}\t{%2, %0|%0, %2} sbb{}\t{%2, %1, %0|%0, %1, %2} sbb{}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "*,*,apx_ndd,apx_ndd") (set_attr "type" "alu") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "")]) (define_peephole2 [(set (match_operand:SWI 0 "general_reg_operand") (match_operand:SWI 1 "memory_operand")) (parallel [(set (match_dup 0) (minus:SWI (minus:SWI (match_dup 0) (match_operator:SWI 4 "ix86_carry_flag_operator" [(match_operand 3 "flags_reg_operand") (const_int 0)])) (match_operand:SWI 2 "memory_operand"))) (clobber (reg:CC FLAGS_REG))]) (set (match_dup 1) (match_dup 0))] "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) && peep2_reg_dead_p (3, operands[0]) && !reg_overlap_mentioned_p (operands[0], operands[1]) && !reg_overlap_mentioned_p (operands[0], operands[2])" [(set (match_dup 0) (match_dup 2)) (parallel [(set (match_dup 1) (minus:SWI (minus:SWI (match_dup 1) (match_op_dup 4 [(match_dup 3) (const_int 0)])) (match_dup 0))) (clobber (reg:CC FLAGS_REG))])]) (define_peephole2 [(set (match_operand:SWI 0 "general_reg_operand") (match_operand:SWI 1 "memory_operand")) (parallel [(set (match_dup 0) (minus:SWI (minus:SWI (match_dup 0) (match_operator:SWI 4 "ix86_carry_flag_operator" [(match_operand 3 "flags_reg_operand") (const_int 0)])) (match_operand:SWI 2 "memory_operand"))) (clobber (reg:CC FLAGS_REG))]) (set (match_operand:SWI 5 "general_reg_operand") (match_dup 0)) (set (match_dup 1) (match_dup 5))] "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) && peep2_reg_dead_p (3, operands[0]) && peep2_reg_dead_p (4, operands[5]) && !reg_overlap_mentioned_p (operands[0], operands[1]) && !reg_overlap_mentioned_p (operands[0], operands[2]) && !reg_overlap_mentioned_p (operands[5], operands[1])" [(set (match_dup 0) (match_dup 2)) (parallel [(set (match_dup 1) (minus:SWI (minus:SWI (match_dup 1) (match_op_dup 4 [(match_dup 3) (const_int 0)])) (match_dup 0))) (clobber (reg:CC FLAGS_REG))])]) (define_insn "*sub3_carry_0" [(set (match_operand:SWI 0 "nonimmediate_operand" "=m") (minus:SWI (match_operand:SWI 1 "nonimmediate_operand" "0") (match_operator:SWI 2 "ix86_carry_flag_operator" [(reg FLAGS_REG) (const_int 0)]))) (clobber (reg:CC FLAGS_REG))] "!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[1])" "sbb{}\t{$0, %0|%0, 0}" [(set_attr "type" "alu") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "")]) (define_insn "*sub3_carry_0r" [(set (match_operand:SWI 0 "nonimmediate_operand" "=m") (minus:SWI (match_operand:SWI 1 "nonimmediate_operand" "0") (match_operator:SWI 2 "ix86_carry_flag_unset_operator" [(reg FLAGS_REG) (const_int 0)]))) (clobber (reg:CC FLAGS_REG))] "!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[1])" "adc{}\t{$-1, %0|%0, -1}" [(set_attr "type" "alu") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "")]) (define_insn "*subsi3_carry_zext" [(set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI (minus:SI (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,r,rm") (match_operator:SI 3 "ix86_carry_flag_operator" [(reg FLAGS_REG) (const_int 0)])) (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands, TARGET_APX_NDD)" "@ sbb{l}\t{%2, %k0|%k0, %2} sbb{l}\t{%2, %1, %k0|%k0, %1, %2} sbb{l}\t{%2, %1, %k0|%k0, %1, %2}" [(set_attr "isa" "*,apx_ndd,apx_ndd") (set_attr "type" "alu") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "SI")]) (define_insn "*subsi3_carry_zext_0" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (minus:SI (match_operand:SI 1 "register_operand" "0") (match_operator:SI 2 "ix86_carry_flag_operator" [(reg FLAGS_REG) (const_int 0)])))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" "sbb{l}\t{$0, %k0|%k0, 0}" [(set_attr "type" "alu") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "SI")]) (define_insn "*subsi3_carry_zext_0r" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (minus:SI (match_operand:SI 1 "register_operand" "0") (match_operator:SI 2 "ix86_carry_flag_unset_operator" [(reg FLAGS_REG) (const_int 0)])))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" "adc{l}\t{$-1, %k0|%k0, -1}" [(set_attr "type" "alu") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "SI")]) (define_insn "@sub3_carry_ccc" [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extend: (match_operand:DWIH 1 "register_operand" "0")) (plus: (ltu: (reg:CC FLAGS_REG) (const_int 0)) (zero_extend: (match_operand:DWIH 2 "x86_64_sext_operand" "rmWe"))))) (clobber (match_scratch:DWIH 0 "=r"))] "" "sbb{}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") (set_attr "mode" "")]) (define_insn "*sub3_carry_ccc_1" [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extend: (match_operand:DWIH 1 "register_operand" "0")) (plus: (ltu: (reg:CC FLAGS_REG) (const_int 0)) (match_operand: 2 "x86_64_dwzext_immediate_operand" "Wf")))) (clobber (match_scratch:DWIH 0 "=r"))] "" { operands[3] = simplify_subreg (mode, operands[2], mode, 0); return "sbb{}\t{%3, %0|%0, %3}"; } [(set_attr "type" "alu") (set_attr "mode" "")]) ;; The sign flag is set from the ;; (compare (match_dup 1) (plus:DWIH (ltu:DWIH ...) (match_dup 2))) ;; result, the overflow flag likewise, but the overflow flag is also ;; set if the (plus:DWIH (ltu:DWIH ...) (match_dup 2)) overflows. (define_insn "@sub3_carry_ccgz" [(set (reg:CCGZ FLAGS_REG) (unspec:CCGZ [(match_operand:DWIH 1 "register_operand" "0") (match_operand:DWIH 2 "x86_64_general_operand" "rBMe") (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))] UNSPEC_SBB)) (clobber (match_scratch:DWIH 0 "=r"))] "" "sbb{}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") (set_attr "mode" "")]) (define_insn "subborrow" [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extend: (match_operand:SWI48 1 "nonimmediate_operand" "0,0,r,rm")) (plus: (match_operator: 4 "ix86_carry_flag_operator" [(match_operand 3 "flags_reg_operand") (const_int 0)]) (zero_extend: (match_operand:SWI48 2 "nonimmediate_operand" "r,rm,rm,r"))))) (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r") (minus:SWI48 (minus:SWI48 (match_dup 1) (match_operator:SWI48 5 "ix86_carry_flag_operator" [(match_dup 3) (const_int 0)])) (match_dup 2)))] "ix86_binary_operator_ok (MINUS, mode, operands, TARGET_APX_NDD)" "@ sbb{}\t{%2, %0|%0, %2} sbb{}\t{%2, %0|%0, %2} sbb{}\t{%2, %1, %0|%0, %1, %2} sbb{}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "*,*,apx_ndd,apx_ndd") (set_attr "type" "alu") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "")]) (define_peephole2 [(set (match_operand:SWI48 0 "general_reg_operand") (match_operand:SWI48 1 "memory_operand")) (parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extend: (match_dup 0)) (plus: (match_operator: 4 "ix86_carry_flag_operator" [(match_operand 3 "flags_reg_operand") (const_int 0)]) (zero_extend: (match_operand:SWI48 2 "memory_operand"))))) (set (match_dup 0) (minus:SWI48 (minus:SWI48 (match_dup 0) (match_operator:SWI48 5 "ix86_carry_flag_operator" [(match_dup 3) (const_int 0)])) (match_dup 2)))]) (set (match_dup 1) (match_dup 0))] "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) && peep2_reg_dead_p (3, operands[0]) && !reg_overlap_mentioned_p (operands[0], operands[1]) && !reg_overlap_mentioned_p (operands[0], operands[2])" [(set (match_dup 0) (match_dup 2)) (parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extend: (match_dup 1)) (plus: (match_op_dup 4 [(match_dup 3) (const_int 0)]) (zero_extend: (match_dup 0))))) (set (match_dup 1) (minus:SWI48 (minus:SWI48 (match_dup 1) (match_op_dup 5 [(match_dup 3) (const_int 0)])) (match_dup 0)))])]) (define_peephole2 [(set (match_operand:SWI48 6 "general_reg_operand") (match_operand:SWI48 7 "memory_operand")) (set (match_operand:SWI48 8 "general_reg_operand") (match_operand:SWI48 9 "memory_operand")) (parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extend: (match_operand:SWI48 0 "general_reg_operand")) (plus: (match_operator: 4 "ix86_carry_flag_operator" [(match_operand 3 "flags_reg_operand") (const_int 0)]) (zero_extend: (match_operand:SWI48 2 "general_reg_operand"))))) (set (match_dup 0) (minus:SWI48 (minus:SWI48 (match_dup 0) (match_operator:SWI48 5 "ix86_carry_flag_operator" [(match_dup 3) (const_int 0)])) (match_dup 2)))]) (set (match_operand:SWI48 1 "memory_operand") (match_dup 0))] "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) && peep2_reg_dead_p (4, operands[0]) && peep2_reg_dead_p (3, operands[2]) && !reg_overlap_mentioned_p (operands[0], operands[1]) && !reg_overlap_mentioned_p (operands[2], operands[1]) && !reg_overlap_mentioned_p (operands[6], operands[9]) && (rtx_equal_p (operands[6], operands[0]) ? (rtx_equal_p (operands[7], operands[1]) && rtx_equal_p (operands[8], operands[2])) : (rtx_equal_p (operands[8], operands[0]) && rtx_equal_p (operands[9], operands[1]) && rtx_equal_p (operands[6], operands[2])))" [(set (match_dup 0) (match_dup 9)) (parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extend: (match_dup 1)) (plus: (match_op_dup 4 [(match_dup 3) (const_int 0)]) (zero_extend: (match_dup 0))))) (set (match_dup 1) (minus:SWI48 (minus:SWI48 (match_dup 1) (match_op_dup 5 [(match_dup 3) (const_int 0)])) (match_dup 0)))])] { if (!rtx_equal_p (operands[6], operands[0])) operands[9] = operands[7]; }) (define_peephole2 [(set (match_operand:SWI48 6 "general_reg_operand") (match_operand:SWI48 7 "memory_operand")) (set (match_operand:SWI48 8 "general_reg_operand") (match_operand:SWI48 9 "memory_operand")) (parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extend: (match_operand:SWI48 0 "general_reg_operand")) (plus: (match_operator: 4 "ix86_carry_flag_operator" [(match_operand 3 "flags_reg_operand") (const_int 0)]) (zero_extend: (match_operand:SWI48 2 "general_reg_operand"))))) (set (match_dup 0) (minus:SWI48 (minus:SWI48 (match_dup 0) (match_operator:SWI48 5 "ix86_carry_flag_operator" [(match_dup 3) (const_int 0)])) (match_dup 2)))]) (set (match_operand:QI 10 "general_reg_operand") (ltu:QI (reg:CCC FLAGS_REG) (const_int 0))) (set (match_operand:SWI48 11 "general_reg_operand") (zero_extend:SWI48 (match_dup 10))) (set (match_operand:SWI48 1 "memory_operand") (match_dup 0))] "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) && peep2_reg_dead_p (6, operands[0]) && peep2_reg_dead_p (3, operands[2]) && !reg_overlap_mentioned_p (operands[0], operands[1]) && !reg_overlap_mentioned_p (operands[2], operands[1]) && !reg_overlap_mentioned_p (operands[6], operands[9]) && !reg_overlap_mentioned_p (operands[0], operands[10]) && !reg_overlap_mentioned_p (operands[10], operands[1]) && !reg_overlap_mentioned_p (operands[0], operands[11]) && !reg_overlap_mentioned_p (operands[11], operands[1]) && (rtx_equal_p (operands[6], operands[0]) ? (rtx_equal_p (operands[7], operands[1]) && rtx_equal_p (operands[8], operands[2])) : (rtx_equal_p (operands[8], operands[0]) && rtx_equal_p (operands[9], operands[1]) && rtx_equal_p (operands[6], operands[2])))" [(set (match_dup 0) (match_dup 9)) (parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extend: (match_dup 1)) (plus: (match_op_dup 4 [(match_dup 3) (const_int 0)]) (zero_extend: (match_dup 0))))) (set (match_dup 1) (minus:SWI48 (minus:SWI48 (match_dup 1) (match_op_dup 5 [(match_dup 3) (const_int 0)])) (match_dup 0)))]) (set (match_dup 10) (ltu:QI (reg:CCC FLAGS_REG) (const_int 0))) (set (match_dup 11) (zero_extend:SWI48 (match_dup 10)))] { if (!rtx_equal_p (operands[6], operands[0])) operands[9] = operands[7]; }) (define_expand "subborrow_0" [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_operand:SWI48 1 "nonimmediate_operand") (match_operand:SWI48 2 ""))) (set (match_operand:SWI48 0 "register_operand") (minus:SWI48 (match_dup 1) (match_dup 2)))])] "ix86_binary_operator_ok (MINUS, mode, operands, TARGET_APX_NDD)") (define_expand "uaddc5" [(match_operand:SWI48 0 "register_operand") (match_operand:SWI48 1 "register_operand") (match_operand:SWI48 2 "register_operand") (match_operand:SWI48 3 "register_operand") (match_operand:SWI48 4 "nonmemory_operand")] "" { rtx cf = gen_rtx_REG (CCCmode, FLAGS_REG), pat, pat2; if (operands[4] == const0_rtx) emit_insn (gen_addcarry_0 (operands[0], operands[2], operands[3])); else { ix86_expand_carry (operands[4]); pat = gen_rtx_LTU (mode, cf, const0_rtx); pat2 = gen_rtx_LTU (mode, cf, const0_rtx); emit_insn (gen_addcarry (operands[0], operands[2], operands[3], cf, pat, pat2)); } rtx cc = gen_reg_rtx (QImode); pat = gen_rtx_LTU (QImode, cf, const0_rtx); emit_insn (gen_rtx_SET (cc, pat)); emit_insn (gen_zero_extendqi2 (operands[1], cc)); DONE; }) (define_expand "usubc5" [(match_operand:SWI48 0 "register_operand") (match_operand:SWI48 1 "register_operand") (match_operand:SWI48 2 "register_operand") (match_operand:SWI48 3 "register_operand") (match_operand:SWI48 4 "nonmemory_operand")] "" { rtx cf, pat, pat2; if (operands[4] == const0_rtx) { cf = gen_rtx_REG (CCmode, FLAGS_REG); emit_insn (gen_subborrow_0 (operands[0], operands[2], operands[3])); } else { cf = gen_rtx_REG (CCCmode, FLAGS_REG); ix86_expand_carry (operands[4]); pat = gen_rtx_LTU (mode, cf, const0_rtx); pat2 = gen_rtx_LTU (mode, cf, const0_rtx); emit_insn (gen_subborrow (operands[0], operands[2], operands[3], cf, pat, pat2)); } rtx cc = gen_reg_rtx (QImode); pat = gen_rtx_LTU (QImode, cf, const0_rtx); emit_insn (gen_rtx_SET (cc, pat)); emit_insn (gen_zero_extendqi2 (operands[1], cc)); DONE; }) (define_mode_iterator CC_CCC [CC CCC]) ;; Pre-reload splitter to optimize ;; *setcc_qi followed by *addqi3_cconly_overflow_1 with the same QI ;; operand and no intervening flags modifications into nothing. (define_insn_and_split "*setcc_qi_addqi3_cconly_overflow_1_" [(set (reg:CCC FLAGS_REG) (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))))] "ix86_pre_reload_split ()" "#" "&& 1" [(const_int 0)] "emit_note (NOTE_INSN_DELETED); DONE;") ;; Set the carry flag from the carry flag. (define_insn_and_split "*setccc" [(set (reg:CCC FLAGS_REG) (reg:CCC FLAGS_REG))] "ix86_pre_reload_split ()" "#" "&& 1" [(const_int 0)] "emit_note (NOTE_INSN_DELETED); DONE;") ;; Set the carry flag from the carry flag. (define_insn_and_split "*setcc_qi_negqi_ccc_1_" [(set (reg:CCC FLAGS_REG) (ltu:CCC (reg:CC_CCC FLAGS_REG) (const_int 0)))] "ix86_pre_reload_split ()" "#" "&& 1" [(const_int 0)] "emit_note (NOTE_INSN_DELETED); DONE;") ;; Set the carry flag from the carry flag. (define_insn_and_split "*setcc_qi_negqi_ccc_2_" [(set (reg:CCC FLAGS_REG) (unspec:CCC [(ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)) (const_int 0)] UNSPEC_CC_NE))] "ix86_pre_reload_split ()" "#" "&& 1" [(const_int 0)] "emit_note (NOTE_INSN_DELETED); DONE;") ;; Overflow setting add instructions (define_expand "addqi3_cconly_overflow" [(parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:QI (match_operand:QI 0 "nonimmediate_operand") (match_operand:QI 1 "general_operand")) (match_dup 0))) (clobber (scratch:QI))])] "!(MEM_P (operands[0]) && MEM_P (operands[1]))") (define_insn "*add3_cconly_overflow_1" [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:SWI (match_operand:SWI 1 "nonimmediate_operand" "%0,r,rm") (match_operand:SWI 2 "" ",,re")) (match_dup 1))) (clobber (match_scratch:SWI 0 "=,r,r"))] "!(MEM_P (operands[1]) && MEM_P (operands[2]))" "@ add{}\t{%2, %0|%0, %2} add{}\t{%2, %1, %0|%0, %1, %2} add{}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "*,apx_ndd,apx_ndd") (set_attr "type" "alu") (set_attr "mode" "")]) (define_insn "@add3_cc_overflow_1" [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:SWI (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,rjM,r") (match_operand:SWI 2 "" ",,r,,")) (match_dup 1))) (set (match_operand:SWI 0 "nonimmediate_operand" "=m,,r,r,r") (plus:SWI (match_dup 1) (match_dup 2)))] "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD)" "@ add{}\t{%2, %0|%0, %2} add{}\t{%2, %0|%0, %2} add{}\t{%2, %1, %0|%0, %1, %2} add{}\t{%2, %1, %0|%0, %1, %2} add{}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd") (set_attr "type" "alu") (set_attr "mode" "")]) (define_peephole2 [(parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:SWI (match_operand:SWI 0 "general_reg_operand") (match_operand:SWI 1 "memory_operand")) (match_dup 0))) (set (match_dup 0) (plus:SWI (match_dup 0) (match_dup 1)))]) (set (match_dup 1) (match_dup 0))] "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) && peep2_reg_dead_p (2, operands[0]) && !reg_overlap_mentioned_p (operands[0], operands[1])" [(parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:SWI (match_dup 1) (match_dup 0)) (match_dup 1))) (set (match_dup 1) (plus:SWI (match_dup 1) (match_dup 0)))])]) (define_peephole2 [(set (match_operand:SWI 0 "general_reg_operand") (match_operand:SWI 1 "memory_operand")) (parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:SWI (match_dup 0) (match_operand:SWI 2 "memory_operand")) (match_dup 0))) (set (match_dup 0) (plus:SWI (match_dup 0) (match_dup 2)))]) (set (match_dup 1) (match_dup 0))] "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) && peep2_reg_dead_p (3, operands[0]) && !reg_overlap_mentioned_p (operands[0], operands[1]) && !reg_overlap_mentioned_p (operands[0], operands[2])" [(set (match_dup 0) (match_dup 2)) (parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:SWI (match_dup 1) (match_dup 0)) (match_dup 1))) (set (match_dup 1) (plus:SWI (match_dup 1) (match_dup 0)))])]) (define_insn "*addsi3_zext_cc_overflow_1" [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,rm") (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re")) (match_dup 1))) (set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands, TARGET_APX_NDD)" "@ add{l}\t{%2, %k0|%k0, %2} add{l}\t{%2, %1, %k0|%k0, %1, %2} add{l}\t{%2, %1, %k0|%k0, %1, %2}" [(set_attr "isa" "*,apx_ndd,apx_ndd") (set_attr "type" "alu") (set_attr "mode" "SI")]) (define_insn "*add3_cconly_overflow_2" [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:SWI (match_operand:SWI 1 "nonimmediate_operand" "%0,r,rm") (match_operand:SWI 2 "" ",,re")) (match_dup 2))) (clobber (match_scratch:SWI 0 "=,r,r"))] "!(MEM_P (operands[1]) && MEM_P (operands[2]))" "@ add{}\t{%2, %0|%0, %2} add{}\t{%2, %1, %0|%0, %1, %2} add{}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "*,apx_ndd,apx_ndd") (set_attr "type" "alu") (set_attr "mode" "")]) (define_insn "*add3_cc_overflow_2" [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:SWI (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r") (match_operand:SWI 2 "" ",,r,")) (match_dup 2))) (set (match_operand:SWI 0 "nonimmediate_operand" "=m,,r,r") (plus:SWI (match_dup 1) (match_dup 2)))] "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD)" "@ add{}\t{%2, %0|%0, %2} add{}\t{%2, %0|%0, %2} add{}\t{%2, %1, %0|%0, %1, %2} add{}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "*,*,apx_ndd,apx_ndd") (set_attr "type" "alu") (set_attr "mode" "")]) (define_insn "*addsi3_zext_cc_overflow_2" [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,rm") (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re")) (match_dup 2))) (set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands, TARGET_APX_NDD)" "@ add{l}\t{%2, %k0|%k0, %2} add{l}\t{%2, %1, %k0|%k0, %1, %2} add{l}\t{%2, %1, %k0|%k0, %1, %2}" [(set_attr "isa" "*,apx_ndd,apx_ndd") (set_attr "type" "alu") (set_attr "mode" "SI")]) (define_insn_and_split "*add3_doubleword_cc_overflow_1" [(set (reg:CCC FLAGS_REG) (compare:CCC (plus: (match_operand: 1 "nonimmediate_operand" "%0,0,ro,r,ro,jO,r") (match_operand: 2 "x86_64_hilo_general_operand" "r,o,r,,K,,o")) (match_dup 1))) (set (match_operand: 0 "nonimmediate_operand" "=ro,r,&r,&r,&r,&r,&r") (plus: (match_dup 1) (match_dup 2)))] "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD)" "#" "&& reload_completed" [(parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:DWIH (match_dup 1) (match_dup 2)) (match_dup 1))) (set (match_dup 0) (plus:DWIH (match_dup 1) (match_dup 2)))]) (parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extend: (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) (match_dup 4)) (match_dup 5))) (plus: (match_dup 6) (ltu: (reg:CC FLAGS_REG) (const_int 0))))) (set (match_dup 3) (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) (match_dup 4)) (match_dup 5)))])] { split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]); if (operands[2] == const0_rtx) { if (!rtx_equal_p (operands[0], operands[1])) emit_move_insn (operands[0], operands[1]); emit_insn (gen_addcarry_0 (operands[3], operands[4], operands[5])); DONE; } if (CONST_INT_P (operands[5])) operands[6] = simplify_unary_operation (ZERO_EXTEND, mode, operands[5], mode); else operands[6] = gen_rtx_ZERO_EXTEND (mode, operands[5]); } [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd_64,apx_ndd")]) ;; x == 0 with zero flag test can be done also as x < 1U with carry flag ;; test, where the latter is preferrable if we have some carry consuming ;; instruction. ;; For x != 0, we need to use x < 1U with negation of carry, i.e. ;; + (1 - CF). (define_insn_and_split "*add3_eq" [(set (match_operand:SWI 0 "nonimmediate_operand") (plus:SWI (plus:SWI (eq:SWI (match_operand 3 "int_nonimmediate_operand") (const_int 0)) (match_operand:SWI 1 "nonimmediate_operand")) (match_operand:SWI 2 ""))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD) && ix86_pre_reload_split ()" "#" "&& 1" [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 3) (const_int 1))) (parallel [(set (match_dup 0) (plus:SWI (plus:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0)) (match_dup 1)) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])]) (define_insn_and_split "*add3_ne" [(set (match_operand:SWI 0 "nonimmediate_operand") (plus:SWI (plus:SWI (ne:SWI (match_operand 3 "int_nonimmediate_operand") (const_int 0)) (match_operand:SWI 1 "nonimmediate_operand")) (match_operand:SWI 2 ""))) (clobber (reg:CC FLAGS_REG))] "CONST_INT_P (operands[2]) && (mode != DImode || INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000)) && ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD) && ix86_pre_reload_split ()" "#" "&& 1" [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 3) (const_int 1))) (parallel [(set (match_dup 0) (minus:SWI (minus:SWI (match_dup 1) (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] { operands[2] = gen_int_mode (~INTVAL (operands[2]), mode == DImode ? SImode : mode); }) (define_insn_and_split "*add3_eq_0" [(set (match_operand:SWI 0 "nonimmediate_operand") (plus:SWI (eq:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0)) (match_operand:SWI 1 ""))) (clobber (reg:CC FLAGS_REG))] "ix86_unary_operator_ok (PLUS, mode, operands) && ix86_pre_reload_split ()" "#" "&& 1" [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 2) (const_int 1))) (parallel [(set (match_dup 0) (plus:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0)) (match_dup 1))) (clobber (reg:CC FLAGS_REG))])] { if (!nonimmediate_operand (operands[1], mode)) operands[1] = force_reg (mode, operands[1]); }) (define_insn_and_split "*add3_ne_0" [(set (match_operand:SWI 0 "nonimmediate_operand") (plus:SWI (ne:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0)) (match_operand:SWI 1 ""))) (clobber (reg:CC FLAGS_REG))] "ix86_unary_operator_ok (PLUS, mode, operands) && ix86_pre_reload_split ()" "#" "&& 1" [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 2) (const_int 1))) (parallel [(set (match_dup 0) (minus:SWI (minus:SWI (match_dup 1) (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))) (const_int -1))) (clobber (reg:CC FLAGS_REG))])] { if (!nonimmediate_operand (operands[1], mode)) operands[1] = force_reg (mode, operands[1]); }) (define_insn_and_split "*sub3_eq" [(set (match_operand:SWI 0 "nonimmediate_operand") (minus:SWI (minus:SWI (match_operand:SWI 1 "nonimmediate_operand") (eq:SWI (match_operand 3 "int_nonimmediate_operand") (const_int 0))) (match_operand:SWI 2 ""))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (MINUS, mode, operands, TARGET_APX_NDD) && ix86_pre_reload_split ()" "#" "&& 1" [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 3) (const_int 1))) (parallel [(set (match_dup 0) (minus:SWI (minus:SWI (match_dup 1) (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])]) (define_insn_and_split "*sub3_ne" [(set (match_operand:SWI 0 "nonimmediate_operand") (plus:SWI (minus:SWI (match_operand:SWI 1 "nonimmediate_operand") (ne:SWI (match_operand 3 "int_nonimmediate_operand") (const_int 0))) (match_operand:SWI 2 ""))) (clobber (reg:CC FLAGS_REG))] "CONST_INT_P (operands[2]) && (mode != DImode || INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000)) && ix86_binary_operator_ok (MINUS, mode, operands, TARGET_APX_NDD) && ix86_pre_reload_split ()" "#" "&& 1" [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 3) (const_int 1))) (parallel [(set (match_dup 0) (plus:SWI (plus:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0)) (match_dup 1)) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] { operands[2] = gen_int_mode (INTVAL (operands[2]) - 1, mode == DImode ? SImode : mode); }) (define_insn_and_split "*sub3_eq_1" [(set (match_operand:SWI 0 "nonimmediate_operand") (plus:SWI (minus:SWI (match_operand:SWI 1 "nonimmediate_operand") (eq:SWI (match_operand 3 "int_nonimmediate_operand") (const_int 0))) (match_operand:SWI 2 ""))) (clobber (reg:CC FLAGS_REG))] "CONST_INT_P (operands[2]) && (mode != DImode || INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000)) && ix86_binary_operator_ok (MINUS, mode, operands, TARGET_APX_NDD) && ix86_pre_reload_split ()" "#" "&& 1" [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 3) (const_int 1))) (parallel [(set (match_dup 0) (minus:SWI (minus:SWI (match_dup 1) (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] { operands[2] = gen_int_mode (-INTVAL (operands[2]), mode == DImode ? SImode : mode); }) (define_insn_and_split "*sub3_eq_0" [(set (match_operand:SWI 0 "nonimmediate_operand") (minus:SWI (match_operand:SWI 1 "") (eq:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0)))) (clobber (reg:CC FLAGS_REG))] "ix86_unary_operator_ok (MINUS, mode, operands) && ix86_pre_reload_split ()" "#" "&& 1" [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 2) (const_int 1))) (parallel [(set (match_dup 0) (minus:SWI (match_dup 1) (ltu:SWI (reg:CC FLAGS_REG) (const_int 0)))) (clobber (reg:CC FLAGS_REG))])] { if (!nonimmediate_operand (operands[1], mode)) operands[1] = force_reg (mode, operands[1]); }) (define_insn_and_split "*sub3_ne_0" [(set (match_operand:SWI 0 "nonimmediate_operand") (minus:SWI (match_operand:SWI 1 "") (ne:SWI (match_operand 2 "int_nonimmediate_operand") (const_int 0)))) (clobber (reg:CC FLAGS_REG))] "ix86_unary_operator_ok (MINUS, mode, operands) && ix86_pre_reload_split ()" "#" "&& 1" [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 2) (const_int 1))) (parallel [(set (match_dup 0) (plus:SWI (plus:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0)) (match_dup 1)) (const_int -1))) (clobber (reg:CC FLAGS_REG))])] { if (!nonimmediate_operand (operands[1], mode)) operands[1] = force_reg (mode, operands[1]); }) ;; The patterns that match these are at the end of this file. (define_expand "xf3" [(set (match_operand:XF 0 "register_operand") (plusminus:XF (match_operand:XF 1 "register_operand") (match_operand:XF 2 "register_operand")))] "TARGET_80387") (define_expand "hf3" [(set (match_operand:HF 0 "register_operand") (plusminus:HF (match_operand:HF 1 "register_operand") (match_operand:HF 2 "nonimmediate_operand")))] "TARGET_AVX512FP16") (define_expand "3" [(set (match_operand:MODEF 0 "register_operand") (plusminus:MODEF (match_operand:MODEF 1 "register_operand") (match_operand:MODEF 2 "nonimmediate_operand")))] "(TARGET_80387 && X87_ENABLE_ARITH (mode)) || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)") ;; Multiply instructions (define_expand "mul3" [(parallel [(set (match_operand:SWIM248 0 "register_operand") (mult:SWIM248 (match_operand:SWIM248 1 "register_operand") (match_operand:SWIM248 2 ""))) (clobber (reg:CC FLAGS_REG))])]) (define_expand "mulqi3" [(parallel [(set (match_operand:QI 0 "register_operand") (mult:QI (match_operand:QI 1 "register_operand") (match_operand:QI 2 "nonimmediate_operand"))) (clobber (reg:CC FLAGS_REG))])] "TARGET_QIMODE_MATH") ;; On AMDFAM10 ;; IMUL reg32/64, reg32/64, imm8 Direct ;; IMUL reg32/64, mem32/64, imm8 VectorPath ;; IMUL reg32/64, reg32/64, imm32 Direct ;; IMUL reg32/64, mem32/64, imm32 VectorPath ;; IMUL reg32/64, reg32/64 Direct ;; IMUL reg32/64, mem32/64 Direct ;; ;; On BDVER1, all above IMULs use DirectPath ;; ;; On AMDFAM10 ;; IMUL reg16, reg16, imm8 VectorPath ;; IMUL reg16, mem16, imm8 VectorPath ;; IMUL reg16, reg16, imm16 VectorPath ;; IMUL reg16, mem16, imm16 VectorPath ;; IMUL reg16, reg16 Direct ;; IMUL reg16, mem16 Direct ;; ;; On BDVER1, all HI MULs use DoublePath (define_insn "*mul3_1" [(set (match_operand:SWIM248 0 "register_operand" "=r,r,r") (mult:SWIM248 (match_operand:SWIM248 1 "nonimmediate_operand" "%rm,rm,0") (match_operand:SWIM248 2 "" "K,,r"))) (clobber (reg:CC FLAGS_REG))] "!(MEM_P (operands[1]) && MEM_P (operands[2]))" "@ imul{}\t{%2, %1, %0|%0, %1, %2} imul{}\t{%2, %1, %0|%0, %1, %2} imul{}\t{%2, %0|%0, %2}" [(set_attr "type" "imul") (set_attr "prefix_0f" "0,0,1") (set (attr "athlon_decode") (cond [(eq_attr "cpu" "athlon") (const_string "vector") (eq_attr "alternative" "1") (const_string "vector") (and (eq_attr "alternative" "2") (ior (match_test "mode == HImode") (match_operand 1 "memory_operand"))) (const_string "vector")] (const_string "direct"))) (set (attr "amdfam10_decode") (cond [(and (eq_attr "alternative" "0,1") (ior (match_test "mode == HImode") (match_operand 1 "memory_operand"))) (const_string "vector")] (const_string "direct"))) (set (attr "bdver1_decode") (if_then_else (match_test "mode == HImode") (const_string "double") (const_string "direct"))) (set_attr "mode" "")]) (define_insn "*mulsi3_1_zext" [(set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0") (match_operand:SI 2 "x86_64_general_operand" "K,e,BMr")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "@ imul{l}\t{%2, %1, %k0|%k0, %1, %2} imul{l}\t{%2, %1, %k0|%k0, %1, %2} imul{l}\t{%2, %k0|%k0, %2}" [(set_attr "type" "imul") (set_attr "prefix_0f" "0,0,1") (set (attr "athlon_decode") (cond [(eq_attr "cpu" "athlon") (const_string "vector") (eq_attr "alternative" "1") (const_string "vector") (and (eq_attr "alternative" "2") (match_operand 1 "memory_operand")) (const_string "vector")] (const_string "direct"))) (set (attr "amdfam10_decode") (cond [(and (eq_attr "alternative" "0,1") (match_operand 1 "memory_operand")) (const_string "vector")] (const_string "direct"))) (set_attr "bdver1_decode" "direct") (set_attr "mode" "SI")]) ;;On AMDFAM10 and BDVER1 ;; MUL reg8 Direct ;; MUL mem8 Direct (define_insn "*mulqi3_1" [(set (match_operand:QI 0 "register_operand" "=a") (mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0") (match_operand:QI 2 "nonimmediate_operand" "qm"))) (clobber (reg:CC FLAGS_REG))] "TARGET_QIMODE_MATH && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "mul{b}\t%2" [(set_attr "type" "imul") (set_attr "length_immediate" "0") (set (attr "athlon_decode") (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "direct"))) (set_attr "amdfam10_decode" "direct") (set_attr "bdver1_decode" "direct") (set_attr "mode" "QI")]) ;; Multiply with jump on overflow. (define_expand "mulv4" [(parallel [(set (reg:CCO FLAGS_REG) (eq:CCO (mult: (sign_extend: (match_operand:SWI248 1 "register_operand")) (match_dup 4)) (sign_extend: (mult:SWI248 (match_dup 1) (match_operand:SWI248 2 ""))))) (set (match_operand:SWI248 0 "register_operand") (mult:SWI248 (match_dup 1) (match_dup 2)))]) (set (pc) (if_then_else (eq (reg:CCO FLAGS_REG) (const_int 0)) (label_ref (match_operand 3)) (pc)))] "" { if (CONST_INT_P (operands[2])) operands[4] = operands[2]; else operands[4] = gen_rtx_SIGN_EXTEND (mode, operands[2]); }) (define_insn "*mulv4" [(set (reg:CCO FLAGS_REG) (eq:CCO (mult: (sign_extend: (match_operand:SWI48 1 "nonimmediate_operand" "%rm,0")) (sign_extend: (match_operand:SWI48 2 "x86_64_sext_operand" "We,mr"))) (sign_extend: (mult:SWI48 (match_dup 1) (match_dup 2))))) (set (match_operand:SWI48 0 "register_operand" "=r,r") (mult:SWI48 (match_dup 1) (match_dup 2)))] "!(MEM_P (operands[1]) && MEM_P (operands[2]))" "@ imul{}\t{%2, %1, %0|%0, %1, %2} imul{}\t{%2, %0|%0, %2}" [(set_attr "type" "imul") (set_attr "prefix_0f" "0,1") (set (attr "athlon_decode") (cond [(eq_attr "cpu" "athlon") (const_string "vector") (eq_attr "alternative" "0") (const_string "vector") (and (eq_attr "alternative" "1") (match_operand 1 "memory_operand")) (const_string "vector")] (const_string "direct"))) (set (attr "amdfam10_decode") (cond [(and (eq_attr "alternative" "1") (match_operand 1 "memory_operand")) (const_string "vector")] (const_string "direct"))) (set_attr "bdver1_decode" "direct") (set_attr "mode" "")]) (define_insn "*mulvhi4" [(set (reg:CCO FLAGS_REG) (eq:CCO (mult:SI (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "%0")) (sign_extend:SI (match_operand:HI 2 "nonimmediate_operand" "mr"))) (sign_extend:SI (mult:HI (match_dup 1) (match_dup 2))))) (set (match_operand:HI 0 "register_operand" "=r") (mult:HI (match_dup 1) (match_dup 2)))] "!(MEM_P (operands[1]) && MEM_P (operands[2]))" "imul{w}\t{%2, %0|%0, %2}" [(set_attr "type" "imul") (set_attr "prefix_0f" "1") (set_attr "athlon_decode" "vector") (set_attr "amdfam10_decode" "direct") (set_attr "bdver1_decode" "double") (set_attr "mode" "HI")]) (define_insn "*mulv4_1" [(set (reg:CCO FLAGS_REG) (eq:CCO (mult: (sign_extend: (match_operand:SWI248 1 "nonimmediate_operand" "rm,rm")) (match_operand: 3 "const_int_operand" "K,i")) (sign_extend: (mult:SWI248 (match_dup 1) (match_operand:SWI248 2 "" "K,"))))) (set (match_operand:SWI248 0 "register_operand" "=r,r") (mult:SWI248 (match_dup 1) (match_dup 2)))] "!(MEM_P (operands[1]) && MEM_P (operands[2])) && CONST_INT_P (operands[2]) && INTVAL (operands[2]) == INTVAL (operands[3])" "imul{}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "imul") (set (attr "prefix_0f") (if_then_else (match_test "mode == HImode") (const_string "0") (const_string "*"))) (set (attr "athlon_decode") (cond [(eq_attr "cpu" "athlon") (const_string "vector") (eq_attr "alternative" "1") (const_string "vector")] (const_string "direct"))) (set (attr "amdfam10_decode") (cond [(ior (match_test "mode == HImode") (match_operand 1 "memory_operand")) (const_string "vector")] (const_string "direct"))) (set (attr "bdver1_decode") (if_then_else (match_test "mode == HImode") (const_string "double") (const_string "direct"))) (set_attr "mode" "") (set (attr "length_immediate") (cond [(eq_attr "alternative" "0") (const_string "1") (match_test " == 8") (const_string "4")] (const_string "")))]) (define_expand "umulv4" [(parallel [(set (reg:CCO FLAGS_REG) (eq:CCO (mult: (zero_extend: (match_operand:SWI248 1 "nonimmediate_operand")) (zero_extend: (match_operand:SWI248 2 "nonimmediate_operand"))) (zero_extend: (mult:SWI248 (match_dup 1) (match_dup 2))))) (set (match_operand:SWI248 0 "register_operand") (mult:SWI248 (match_dup 1) (match_dup 2))) (clobber (scratch:SWI248))]) (set (pc) (if_then_else (eq (reg:CCO FLAGS_REG) (const_int 0)) (label_ref (match_operand 3)) (pc)))] "" { if (MEM_P (operands[1]) && MEM_P (operands[2])) operands[1] = force_reg (mode, operands[1]); }) (define_insn "*umulv4" [(set (reg:CCO FLAGS_REG) (eq:CCO (mult: (zero_extend: (match_operand:SWI248 1 "nonimmediate_operand" "%0")) (zero_extend: (match_operand:SWI248 2 "nonimmediate_operand" "rm"))) (zero_extend: (mult:SWI248 (match_dup 1) (match_dup 2))))) (set (match_operand:SWI248 0 "register_operand" "=a") (mult:SWI248 (match_dup 1) (match_dup 2))) (clobber (match_scratch:SWI248 3 "=d"))] "!(MEM_P (operands[1]) && MEM_P (operands[2]))" "mul{}\t%2" [(set_attr "type" "imul") (set_attr "length_immediate" "0") (set (attr "athlon_decode") (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "double"))) (set_attr "amdfam10_decode" "double") (set_attr "bdver1_decode" "direct") (set_attr "mode" "")]) (define_expand "mulvqi4" [(parallel [(set (reg:CCO FLAGS_REG) (eq:CCO (mult:HI (any_extend:HI (match_operand:QI 1 "nonimmediate_operand")) (any_extend:HI (match_operand:QI 2 "nonimmediate_operand"))) (any_extend:HI (mult:QI (match_dup 1) (match_dup 2))))) (set (match_operand:QI 0 "register_operand") (mult:QI (match_dup 1) (match_dup 2)))]) (set (pc) (if_then_else (eq (reg:CCO FLAGS_REG) (const_int 0)) (label_ref (match_operand 3)) (pc)))] "TARGET_QIMODE_MATH" { if (MEM_P (operands[1]) && MEM_P (operands[2])) operands[1] = force_reg (QImode, operands[1]); }) (define_insn "*mulvqi4" [(set (reg:CCO FLAGS_REG) (eq:CCO (mult:HI (any_extend:HI (match_operand:QI 1 "nonimmediate_operand" "%0")) (any_extend:HI (match_operand:QI 2 "nonimmediate_operand" "qm"))) (any_extend:HI (mult:QI (match_dup 1) (match_dup 2))))) (set (match_operand:QI 0 "register_operand" "=a") (mult:QI (match_dup 1) (match_dup 2)))] "TARGET_QIMODE_MATH && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "mul{b}\t%2" [(set_attr "type" "imul") (set_attr "length_immediate" "0") (set (attr "athlon_decode") (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "direct"))) (set_attr "amdfam10_decode" "direct") (set_attr "bdver1_decode" "direct") (set_attr "mode" "QI")]) (define_expand "mul3" [(parallel [(set (match_operand: 0 "register_operand") (mult: (any_extend: (match_operand:DWIH 1 "register_operand")) (any_extend: (match_operand:DWIH 2 "nonimmediate_operand")))) (clobber (reg:CC FLAGS_REG))])]) (define_expand "mulqihi3" [(parallel [(set (match_operand:HI 0 "register_operand") (mult:HI (any_extend:HI (match_operand:QI 1 "register_operand")) (any_extend:HI (match_operand:QI 2 "nonimmediate_operand")))) (clobber (reg:CC FLAGS_REG))])] "TARGET_QIMODE_MATH") (define_insn "*bmi2_umul3_1" [(set (match_operand:DWIH 0 "register_operand" "=r") (mult:DWIH (match_operand:DWIH 2 "register_operand" "%d") (match_operand:DWIH 3 "nonimmediate_operand" "rm"))) (set (match_operand:DWIH 1 "register_operand" "=r") (umul_highpart:DWIH (match_dup 2) (match_dup 3)))] "TARGET_BMI2" "mulx\t{%3, %0, %1|%1, %0, %3}" [(set_attr "type" "imulx") (set_attr "prefix" "vex") (set_attr "mode" "")]) ;; Tweak *bmi2_umul3_1 to eliminate following mov. (define_peephole2 [(parallel [(set (match_operand:DWIH 0 "general_reg_operand") (mult:DWIH (match_operand:DWIH 2 "register_operand") (match_operand:DWIH 3 "nonimmediate_operand"))) (set (match_operand:DWIH 1 "general_reg_operand") (umul_highpart:DWIH (match_dup 2) (match_dup 3)))]) (set (match_operand:DWIH 4 "general_reg_operand") (match_operand:DWIH 5 "general_reg_operand"))] "TARGET_BMI2 && ((REGNO (operands[5]) == REGNO (operands[0]) && REGNO (operands[1]) != REGNO (operands[4])) || (REGNO (operands[5]) == REGNO (operands[1]) && REGNO (operands[0]) != REGNO (operands[4]))) && peep2_reg_dead_p (2, operands[5])" [(parallel [(set (match_dup 0) (mult:DWIH (match_dup 2) (match_dup 3))) (set (match_dup 1) (umul_highpart:DWIH (match_dup 2) (match_dup 3)))])] { if (REGNO (operands[5]) == REGNO (operands[0])) operands[0] = operands[4]; else operands[1] = operands[4]; }) (define_insn "*umul3_1" [(set (match_operand: 0 "register_operand" "=r,A") (mult: (zero_extend: (match_operand:DWIH 1 "register_operand" "%d,a")) (zero_extend: (match_operand:DWIH 2 "nonimmediate_operand" "rm,rm")))) (clobber (reg:CC FLAGS_REG))] "!(MEM_P (operands[1]) && MEM_P (operands[2]))" "@ # mul{}\t%2" [(set_attr "isa" "bmi2,*") (set_attr "type" "imulx,imul") (set_attr "length_immediate" "*,0") (set (attr "athlon_decode") (cond [(eq_attr "alternative" "1") (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "double"))] (const_string "*"))) (set_attr "amdfam10_decode" "*,double") (set_attr "bdver1_decode" "*,direct") (set_attr "prefix" "vex,orig") (set_attr "mode" "")]) ;; Convert mul to the mulx pattern to avoid flags dependency. (define_split [(set (match_operand: 0 "register_operand") (mult: (zero_extend: (match_operand:DWIH 1 "register_operand")) (zero_extend: (match_operand:DWIH 2 "nonimmediate_operand")))) (clobber (reg:CC FLAGS_REG))] "TARGET_BMI2 && reload_completed && REGNO (operands[1]) == DX_REG" [(parallel [(set (match_dup 3) (mult:DWIH (match_dup 1) (match_dup 2))) (set (match_dup 4) (umul_highpart:DWIH (match_dup 1) (match_dup 2)))])] { split_double_mode (mode, &operands[0], 1, &operands[3], &operands[4]); operands[5] = GEN_INT ( * BITS_PER_UNIT); }) (define_insn "*mul3_1" [(set (match_operand: 0 "register_operand" "=A") (mult: (sign_extend: (match_operand:DWIH 1 "register_operand" "%a")) (sign_extend: (match_operand:DWIH 2 "nonimmediate_operand" "rm")))) (clobber (reg:CC FLAGS_REG))] "!(MEM_P (operands[1]) && MEM_P (operands[2]))" "imul{}\t%2" [(set_attr "type" "imul") (set_attr "length_immediate" "0") (set (attr "athlon_decode") (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "double"))) (set_attr "amdfam10_decode" "double") (set_attr "bdver1_decode" "direct") (set_attr "mode" "")]) (define_insn "*mulqihi3_1" [(set (match_operand:HI 0 "register_operand" "=a") (mult:HI (any_extend:HI (match_operand:QI 1 "register_operand" "%0")) (any_extend:HI (match_operand:QI 2 "nonimmediate_operand" "qm")))) (clobber (reg:CC FLAGS_REG))] "TARGET_QIMODE_MATH && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "mul{b}\t%2" [(set_attr "type" "imul") (set_attr "length_immediate" "0") (set (attr "athlon_decode") (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "direct"))) (set_attr "amdfam10_decode" "direct") (set_attr "bdver1_decode" "direct") (set_attr "mode" "QI")]) ;; Widening multiplication peephole2s to tweak register allocation. ;; mov imm,%rdx; mov %rdi,%rax; mulq %rdx -> mov imm,%rax; mulq %rdi (define_peephole2 [(set (match_operand:DWIH 0 "general_reg_operand") (match_operand:DWIH 1 "immediate_operand")) (set (match_operand:DWIH 2 "general_reg_operand") (match_operand:DWIH 3 "general_reg_operand")) (parallel [(set (match_operand: 4 "general_reg_operand") (mult: (zero_extend: (match_dup 2)) (zero_extend: (match_dup 0)))) (clobber (reg:CC FLAGS_REG))])] "REGNO (operands[3]) != AX_REG && REGNO (operands[0]) != REGNO (operands[2]) && REGNO (operands[0]) != REGNO (operands[3]) && (REGNO (operands[0]) == REGNO (operands[4]) || REGNO (operands[0]) == DX_REG || peep2_reg_dead_p (3, operands[0]))" [(set (match_dup 2) (match_dup 1)) (parallel [(set (match_dup 4) (mult: (zero_extend: (match_dup 2)) (zero_extend: (match_dup 3)))) (clobber (reg:CC FLAGS_REG))])]) ;; mov imm,%rax; mov %rdi,%rdx; mulx %rax -> mov imm,%rdx; mulx %rdi (define_peephole2 [(set (match_operand:DWIH 0 "general_reg_operand") (match_operand:DWIH 1 "immediate_operand")) (set (match_operand:DWIH 2 "general_reg_operand") (match_operand:DWIH 3 "general_reg_operand")) (parallel [(set (match_operand:DWIH 4 "general_reg_operand") (mult:DWIH (match_dup 2) (match_dup 0))) (set (match_operand:DWIH 5 "general_reg_operand") (umul_highpart:DWIH (match_dup 2) (match_dup 0)))])] "REGNO (operands[3]) != DX_REG && REGNO (operands[0]) != REGNO (operands[2]) && REGNO (operands[0]) != REGNO (operands[3]) && (REGNO (operands[0]) == REGNO (operands[4]) || REGNO (operands[0]) == REGNO (operands[5]) || peep2_reg_dead_p (3, operands[0])) && (REGNO (operands[2]) == REGNO (operands[4]) || REGNO (operands[2]) == REGNO (operands[5]) || peep2_reg_dead_p (3, operands[2]))" [(set (match_dup 2) (match_dup 1)) (parallel [(set (match_dup 4) (mult:DWIH (match_dup 2) (match_dup 3))) (set (match_dup 5) (umul_highpart:DWIH (match_dup 2) (match_dup 3)))])]) ;; Highpart multiplication patterns (define_insn "mul3_highpart" [(set (match_operand:DWIH 0 "register_operand" "=d") (any_mul_highpart:DWIH (match_operand:DWIH 1 "register_operand" "%a") (match_operand:DWIH 2 "nonimmediate_operand" "rm"))) (clobber (match_scratch:DWIH 3 "=1")) (clobber (reg:CC FLAGS_REG))] "" "mul{}\t%2" [(set_attr "type" "imul") (set_attr "length_immediate" "0") (set (attr "athlon_decode") (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "double"))) (set_attr "amdfam10_decode" "double") (set_attr "bdver1_decode" "direct") (set_attr "mode" "")]) (define_insn "*mulsi3_highpart_zext" [(set (match_operand:DI 0 "register_operand" "=d") (zero_extend:DI (any_mul_highpart:SI (match_operand:SI 1 "register_operand" "%a") (match_operand:SI 2 "nonimmediate_operand" "rm")))) (clobber (match_scratch:SI 3 "=1")) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" "mul{l}\t%2" [(set_attr "type" "imul") (set_attr "length_immediate" "0") (set (attr "athlon_decode") (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "double"))) (set_attr "amdfam10_decode" "double") (set_attr "bdver1_decode" "direct") (set_attr "mode" "SI")]) (define_insn "*muldi3_highpart_1" [(set (match_operand:DI 0 "register_operand" "=d") (truncate:DI (lshiftrt:TI (mult:TI (any_extend:TI (match_operand:DI 1 "nonimmediate_operand" "%a")) (any_extend:TI (match_operand:DI 2 "nonimmediate_operand" "rm"))) (const_int 64)))) (clobber (match_scratch:DI 3 "=1")) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "mul{q}\t%2" [(set_attr "type" "imul") (set_attr "length_immediate" "0") (set (attr "athlon_decode") (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "double"))) (set_attr "amdfam10_decode" "double") (set_attr "bdver1_decode" "direct") (set_attr "mode" "DI")]) (define_insn "*mulsi3_highpart_zext" [(set (match_operand:DI 0 "register_operand" "=d") (zero_extend:DI (truncate:SI (lshiftrt:DI (mult:DI (any_extend:DI (match_operand:SI 1 "nonimmediate_operand" "%a")) (any_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm"))) (const_int 32))))) (clobber (match_scratch:SI 3 "=1")) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "mul{l}\t%2" [(set_attr "type" "imul") (set_attr "length_immediate" "0") (set (attr "athlon_decode") (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "double"))) (set_attr "amdfam10_decode" "double") (set_attr "bdver1_decode" "direct") (set_attr "mode" "SI")]) (define_insn "*mulsi3_highpart_1" [(set (match_operand:SI 0 "register_operand" "=d") (truncate:SI (lshiftrt:DI (mult:DI (any_extend:DI (match_operand:SI 1 "nonimmediate_operand" "%a")) (any_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm"))) (const_int 32)))) (clobber (match_scratch:SI 3 "=1")) (clobber (reg:CC FLAGS_REG))] "!(MEM_P (operands[1]) && MEM_P (operands[2]))" "mul{l}\t%2" [(set_attr "type" "imul") (set_attr "length_immediate" "0") (set (attr "athlon_decode") (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "double"))) (set_attr "amdfam10_decode" "double") (set_attr "bdver1_decode" "direct") (set_attr "mode" "SI")]) ;; Highpart multiplication peephole2s to tweak register allocation. ;; mov imm,%rdx; mov %rdi,%rax; imulq %rdx -> mov imm,%rax; imulq %rdi (define_peephole2 [(set (match_operand:SWI48 0 "general_reg_operand") (match_operand:SWI48 1 "immediate_operand")) (set (match_operand:SWI48 2 "general_reg_operand") (match_operand:SWI48 3 "general_reg_operand")) (parallel [(set (match_operand:SWI48 4 "general_reg_operand") (any_mul_highpart:SWI48 (match_dup 2) (match_dup 0))) (clobber (match_dup 2)) (clobber (reg:CC FLAGS_REG))])] "REGNO (operands[3]) != AX_REG && REGNO (operands[0]) != REGNO (operands[2]) && REGNO (operands[0]) != REGNO (operands[3]) && (REGNO (operands[0]) == REGNO (operands[4]) || peep2_reg_dead_p (3, operands[0]))" [(set (match_dup 2) (match_dup 1)) (parallel [(set (match_dup 4) (any_mul_highpart:SWI48 (match_dup 2) (match_dup 3))) (clobber (match_dup 2)) (clobber (reg:CC FLAGS_REG))])]) (define_peephole2 [(set (match_operand:SI 0 "general_reg_operand") (match_operand:SI 1 "immediate_operand")) (set (match_operand:SI 2 "general_reg_operand") (match_operand:SI 3 "general_reg_operand")) (parallel [(set (match_operand:DI 4 "general_reg_operand") (zero_extend:DI (any_mul_highpart:SI (match_dup 2) (match_dup 0)))) (clobber (match_dup 2)) (clobber (reg:CC FLAGS_REG))])] "TARGET_64BIT && REGNO (operands[3]) != AX_REG && REGNO (operands[0]) != REGNO (operands[2]) && REGNO (operands[2]) != REGNO (operands[3]) && REGNO (operands[0]) != REGNO (operands[3]) && (REGNO (operands[0]) == REGNO (operands[4]) || peep2_reg_dead_p (3, operands[0]))" [(set (match_dup 2) (match_dup 1)) (parallel [(set (match_dup 4) (zero_extend:DI (any_mul_highpart:SI (match_dup 2) (match_dup 3)))) (clobber (match_dup 2)) (clobber (reg:CC FLAGS_REG))])]) ;; The patterns that match these are at the end of this file. (define_expand "mulxf3" [(set (match_operand:XF 0 "register_operand") (mult:XF (match_operand:XF 1 "register_operand") (match_operand:XF 2 "register_operand")))] "TARGET_80387") (define_expand "mulhf3" [(set (match_operand:HF 0 "register_operand") (mult:HF (match_operand:HF 1 "register_operand") (match_operand:HF 2 "nonimmediate_operand")))] "TARGET_AVX512FP16") (define_expand "mul3" [(set (match_operand:MODEF 0 "register_operand") (mult:MODEF (match_operand:MODEF 1 "register_operand") (match_operand:MODEF 2 "nonimmediate_operand")))] "(TARGET_80387 && X87_ENABLE_ARITH (mode)) || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)") ;; Divide instructions ;; The patterns that match these are at the end of this file. (define_expand "divxf3" [(set (match_operand:XF 0 "register_operand") (div:XF (match_operand:XF 1 "register_operand") (match_operand:XF 2 "register_operand")))] "TARGET_80387") /* There is no more precision loss than Newton-Rhapson approximation when using HFmode rcp/rsqrt, so do the transformation directly under TARGET_RECIP_DIV and fast-math. */ (define_expand "divhf3" [(set (match_operand:HF 0 "register_operand") (div:HF (match_operand:HF 1 "register_operand") (match_operand:HF 2 "nonimmediate_operand")))] "TARGET_AVX512FP16" { if (TARGET_RECIP_DIV && optimize_insn_for_speed_p () && flag_finite_math_only && !flag_trapping_math && flag_unsafe_math_optimizations) { rtx op = gen_reg_rtx (HFmode); operands[2] = force_reg (HFmode, operands[2]); emit_insn (gen_rcphf2 (op, operands[2])); emit_insn (gen_mulhf3 (operands[0], operands[1], op)); DONE; } }) (define_expand "div3" [(set (match_operand:MODEF 0 "register_operand") (div:MODEF (match_operand:MODEF 1 "register_operand") (match_operand:MODEF 2 "nonimmediate_operand")))] "(TARGET_80387 && X87_ENABLE_ARITH (mode)) || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" { if (mode == SFmode && TARGET_SSE && TARGET_SSE_MATH && TARGET_RECIP_DIV && optimize_insn_for_speed_p () && flag_finite_math_only && !flag_trapping_math && flag_unsafe_math_optimizations) { ix86_emit_swdivsf (operands[0], operands[1], operands[2], SFmode); DONE; } }) ;; Divmod instructions. (define_code_iterator any_div [div udiv]) (define_code_attr paired_mod [(div "mod") (udiv "umod")]) (define_expand "divmod4" [(parallel [(set (match_operand:SWIM248 0 "register_operand") (any_div:SWIM248 (match_operand:SWIM248 1 "register_operand") (match_operand:SWIM248 2 "nonimmediate_operand"))) (set (match_operand:SWIM248 3 "register_operand") (:SWIM248 (match_dup 1) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])]) ;; Split with 8bit unsigned divide: ;; if (dividend an divisor are in [0-255]) ;; use 8bit unsigned integer divide ;; else ;; use original integer divide (define_split [(set (match_operand:SWI48 0 "register_operand") (any_div:SWI48 (match_operand:SWI48 2 "register_operand") (match_operand:SWI48 3 "nonimmediate_operand"))) (set (match_operand:SWI48 1 "register_operand") (:SWI48 (match_dup 2) (match_dup 3))) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_8BIT_IDIV && TARGET_QIMODE_MATH && can_create_pseudo_p () && !optimize_insn_for_size_p ()" [(const_int 0)] "ix86_split_idivmod (mode, operands, ); DONE;") (define_split [(set (match_operand:DI 0 "register_operand") (zero_extend:DI (any_div:SI (match_operand:SI 2 "register_operand") (match_operand:SI 3 "nonimmediate_operand")))) (set (match_operand:SI 1 "register_operand") (:SI (match_dup 2) (match_dup 3))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && TARGET_USE_8BIT_IDIV && TARGET_QIMODE_MATH && can_create_pseudo_p () && !optimize_insn_for_size_p ()" [(const_int 0)] "ix86_split_idivmod (SImode, operands, ); DONE;") (define_split [(set (match_operand:DI 1 "register_operand") (zero_extend:DI (:SI (match_operand:SI 2 "register_operand") (match_operand:SI 3 "nonimmediate_operand")))) (set (match_operand:SI 0 "register_operand") (any_div:SI (match_dup 2) (match_dup 3))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && TARGET_USE_8BIT_IDIV && TARGET_QIMODE_MATH && can_create_pseudo_p () && !optimize_insn_for_size_p ()" [(const_int 0)] "ix86_split_idivmod (SImode, operands, ); DONE;") (define_insn_and_split "divmod4_1" [(set (match_operand:SWI48 0 "register_operand" "=a") (div:SWI48 (match_operand:SWI48 2 "register_operand" "0") (match_operand:SWI48 3 "nonimmediate_operand" "rm"))) (set (match_operand:SWI48 1 "register_operand" "=&d") (mod:SWI48 (match_dup 2) (match_dup 3))) (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT) (clobber (reg:CC FLAGS_REG))] "" "#" "reload_completed" [(parallel [(set (match_dup 1) (ashiftrt:SWI48 (match_dup 4) (match_dup 5))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 0) (div:SWI48 (match_dup 2) (match_dup 3))) (set (match_dup 1) (mod:SWI48 (match_dup 2) (match_dup 3))) (use (match_dup 1)) (clobber (reg:CC FLAGS_REG))])] { operands[5] = GEN_INT (GET_MODE_BITSIZE (mode)-1); if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) operands[4] = operands[2]; else { /* Avoid use of cltd in favor of a mov+shift. */ emit_move_insn (operands[1], operands[2]); operands[4] = operands[1]; } } [(set_attr "type" "multi") (set_attr "mode" "")]) (define_insn_and_split "udivmod4_1" [(set (match_operand:SWI48 0 "register_operand" "=a") (udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0") (match_operand:SWI48 3 "nonimmediate_operand" "rm"))) (set (match_operand:SWI48 1 "register_operand" "=&d") (umod:SWI48 (match_dup 2) (match_dup 3))) (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT) (clobber (reg:CC FLAGS_REG))] "" "#" "reload_completed" [(set (match_dup 1) (const_int 0)) (parallel [(set (match_dup 0) (udiv:SWI48 (match_dup 2) (match_dup 3))) (set (match_dup 1) (umod:SWI48 (match_dup 2) (match_dup 3))) (use (match_dup 1)) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "multi") (set_attr "mode" "")]) (define_insn_and_split "divmodsi4_zext_1" [(set (match_operand:DI 0 "register_operand" "=a") (zero_extend:DI (div:SI (match_operand:SI 2 "register_operand" "0") (match_operand:SI 3 "nonimmediate_operand" "rm")))) (set (match_operand:SI 1 "register_operand" "=&d") (mod:SI (match_dup 2) (match_dup 3))) (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" "#" "&& reload_completed" [(parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 4) (match_dup 5))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 0) (zero_extend:DI (div:SI (match_dup 2) (match_dup 3)))) (set (match_dup 1) (mod:SI (match_dup 2) (match_dup 3))) (use (match_dup 1)) (clobber (reg:CC FLAGS_REG))])] { operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1); if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) operands[4] = operands[2]; else { /* Avoid use of cltd in favor of a mov+shift. */ emit_move_insn (operands[1], operands[2]); operands[4] = operands[1]; } } [(set_attr "type" "multi") (set_attr "mode" "SI")]) (define_insn_and_split "udivmodsi4_zext_1" [(set (match_operand:DI 0 "register_operand" "=a") (zero_extend:DI (udiv:SI (match_operand:SI 2 "register_operand" "0") (match_operand:SI 3 "nonimmediate_operand" "rm")))) (set (match_operand:SI 1 "register_operand" "=&d") (umod:SI (match_dup 2) (match_dup 3))) (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" "#" "&& reload_completed" [(set (match_dup 1) (const_int 0)) (parallel [(set (match_dup 0) (zero_extend:DI (udiv:SI (match_dup 2) (match_dup 3)))) (set (match_dup 1) (umod:SI (match_dup 2) (match_dup 3))) (use (match_dup 1)) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "multi") (set_attr "mode" "SI")]) (define_insn_and_split "divmodsi4_zext_2" [(set (match_operand:DI 1 "register_operand" "=&d") (zero_extend:DI (mod:SI (match_operand:SI 2 "register_operand" "0") (match_operand:SI 3 "nonimmediate_operand" "rm")))) (set (match_operand:SI 0 "register_operand" "=a") (div:SI (match_dup 2) (match_dup 3))) (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" "#" "&& reload_completed" [(parallel [(set (match_dup 6) (ashiftrt:SI (match_dup 4) (match_dup 5))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 1) (zero_extend:DI (mod:SI (match_dup 2) (match_dup 3)))) (set (match_dup 0) (div:SI (match_dup 2) (match_dup 3))) (use (match_dup 6)) (clobber (reg:CC FLAGS_REG))])] { operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1); operands[6] = gen_lowpart (SImode, operands[1]); if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) operands[4] = operands[2]; else { /* Avoid use of cltd in favor of a mov+shift. */ emit_move_insn (operands[6], operands[2]); operands[4] = operands[6]; } } [(set_attr "type" "multi") (set_attr "mode" "SI")]) (define_insn_and_split "udivmodsi4_zext_2" [(set (match_operand:DI 1 "register_operand" "=&d") (zero_extend:DI (umod:SI (match_operand:SI 2 "register_operand" "0") (match_operand:SI 3 "nonimmediate_operand" "rm")))) (set (match_operand:SI 0 "register_operand" "=a") (udiv:SI (match_dup 2) (match_dup 3))) (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" "#" "&& reload_completed" [(set (match_dup 4) (const_int 0)) (parallel [(set (match_dup 1) (zero_extend:DI (umod:SI (match_dup 2) (match_dup 3)))) (set (match_dup 0) (udiv:SI (match_dup 2) (match_dup 3))) (use (match_dup 4)) (clobber (reg:CC FLAGS_REG))])] "operands[4] = gen_lowpart (SImode, operands[1]);" [(set_attr "type" "multi") (set_attr "mode" "SI")]) (define_insn_and_split "*divmod4" [(set (match_operand:SWIM248 0 "register_operand" "=a") (div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0") (match_operand:SWIM248 3 "nonimmediate_operand" "rm"))) (set (match_operand:SWIM248 1 "register_operand" "=&d") (mod:SWIM248 (match_dup 2) (match_dup 3))) (clobber (reg:CC FLAGS_REG))] "" "#" "reload_completed" [(parallel [(set (match_dup 1) (ashiftrt:SWIM248 (match_dup 4) (match_dup 5))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 0) (div:SWIM248 (match_dup 2) (match_dup 3))) (set (match_dup 1) (mod:SWIM248 (match_dup 2) (match_dup 3))) (use (match_dup 1)) (clobber (reg:CC FLAGS_REG))])] { operands[5] = GEN_INT (GET_MODE_BITSIZE (mode)-1); if (mode != HImode && (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)) operands[4] = operands[2]; else { /* Avoid use of cltd in favor of a mov+shift. */ emit_move_insn (operands[1], operands[2]); operands[4] = operands[1]; } } [(set_attr "type" "multi") (set_attr "mode" "")]) (define_insn_and_split "*udivmod4" [(set (match_operand:SWIM248 0 "register_operand" "=a") (udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0") (match_operand:SWIM248 3 "nonimmediate_operand" "rm"))) (set (match_operand:SWIM248 1 "register_operand" "=&d") (umod:SWIM248 (match_dup 2) (match_dup 3))) (clobber (reg:CC FLAGS_REG))] "" "#" "reload_completed" [(set (match_dup 1) (const_int 0)) (parallel [(set (match_dup 0) (udiv:SWIM248 (match_dup 2) (match_dup 3))) (set (match_dup 1) (umod:SWIM248 (match_dup 2) (match_dup 3))) (use (match_dup 1)) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "multi") (set_attr "mode" "")]) ;; Optimize division or modulo by constant power of 2, if the constant ;; materializes only after expansion. (define_insn_and_split "*udivmod4_pow2" [(set (match_operand:SWI48 0 "register_operand" "=r") (udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0") (match_operand:SWI48 3 "const_int_operand"))) (set (match_operand:SWI48 1 "register_operand" "=r") (umod:SWI48 (match_dup 2) (match_dup 3))) (clobber (reg:CC FLAGS_REG))] "IN_RANGE (exact_log2 (UINTVAL (operands[3])), 1, 31)" "#" "&& reload_completed" [(set (match_dup 1) (match_dup 2)) (parallel [(set (match_dup 0) (lshiftrt: (match_dup 2) (match_dup 4))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 1) (and: (match_dup 1) (match_dup 5))) (clobber (reg:CC FLAGS_REG))])] { int v = exact_log2 (UINTVAL (operands[3])); operands[4] = GEN_INT (v); operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1); } [(set_attr "type" "multi") (set_attr "mode" "")]) (define_insn_and_split "*divmodsi4_zext_1" [(set (match_operand:DI 0 "register_operand" "=a") (zero_extend:DI (div:SI (match_operand:SI 2 "register_operand" "0") (match_operand:SI 3 "nonimmediate_operand" "rm")))) (set (match_operand:SI 1 "register_operand" "=&d") (mod:SI (match_dup 2) (match_dup 3))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" "#" "&& reload_completed" [(parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 4) (match_dup 5))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 0) (zero_extend:DI (div:SI (match_dup 2) (match_dup 3)))) (set (match_dup 1) (mod:SI (match_dup 2) (match_dup 3))) (use (match_dup 1)) (clobber (reg:CC FLAGS_REG))])] { operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1); if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) operands[4] = operands[2]; else { /* Avoid use of cltd in favor of a mov+shift. */ emit_move_insn (operands[1], operands[2]); operands[4] = operands[1]; } } [(set_attr "type" "multi") (set_attr "mode" "SI")]) (define_insn_and_split "*udivmodsi4_zext_1" [(set (match_operand:DI 0 "register_operand" "=a") (zero_extend:DI (udiv:SI (match_operand:SI 2 "register_operand" "0") (match_operand:SI 3 "nonimmediate_operand" "rm")))) (set (match_operand:SI 1 "register_operand" "=&d") (umod:SI (match_dup 2) (match_dup 3))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" "#" "&& reload_completed" [(set (match_dup 1) (const_int 0)) (parallel [(set (match_dup 0) (zero_extend:DI (udiv:SI (match_dup 2) (match_dup 3)))) (set (match_dup 1) (umod:SI (match_dup 2) (match_dup 3))) (use (match_dup 1)) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "multi") (set_attr "mode" "SI")]) (define_insn_and_split "*udivmodsi4_pow2_zext_1" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (udiv:SI (match_operand:SI 2 "register_operand" "0") (match_operand:SI 3 "const_int_operand")))) (set (match_operand:SI 1 "register_operand" "=r") (umod:SI (match_dup 2) (match_dup 3))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && IN_RANGE (exact_log2 (UINTVAL (operands[3])), 1, 31)" "#" "&& reload_completed" [(set (match_dup 1) (match_dup 2)) (parallel [(set (match_dup 0) (zero_extend:DI (lshiftrt:SI (match_dup 2) (match_dup 4)))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 1) (and:SI (match_dup 1) (match_dup 5))) (clobber (reg:CC FLAGS_REG))])] { int v = exact_log2 (UINTVAL (operands[3])); operands[4] = GEN_INT (v); operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1); } [(set_attr "type" "multi") (set_attr "mode" "SI")]) (define_insn_and_split "*divmodsi4_zext_2" [(set (match_operand:DI 1 "register_operand" "=&d") (zero_extend:DI (mod:SI (match_operand:SI 2 "register_operand" "0") (match_operand:SI 3 "nonimmediate_operand" "rm")))) (set (match_operand:SI 0 "register_operand" "=a") (div:SI (match_dup 2) (match_dup 3))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" "#" "&& reload_completed" [(parallel [(set (match_dup 6) (ashiftrt:SI (match_dup 4) (match_dup 5))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 1) (zero_extend:DI (mod:SI (match_dup 2) (match_dup 3)))) (set (match_dup 0) (div:SI (match_dup 2) (match_dup 3))) (use (match_dup 6)) (clobber (reg:CC FLAGS_REG))])] { operands[5] = GEN_INT (GET_MODE_BITSIZE (SImode)-1); operands[6] = gen_lowpart (SImode, operands[1]); if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) operands[4] = operands[2]; else { /* Avoid use of cltd in favor of a mov+shift. */ emit_move_insn (operands[6], operands[2]); operands[4] = operands[6]; } } [(set_attr "type" "multi") (set_attr "mode" "SI")]) (define_insn_and_split "*udivmodsi4_zext_2" [(set (match_operand:DI 1 "register_operand" "=&d") (zero_extend:DI (umod:SI (match_operand:SI 2 "register_operand" "0") (match_operand:SI 3 "nonimmediate_operand" "rm")))) (set (match_operand:SI 0 "register_operand" "=a") (udiv:SI (match_dup 2) (match_dup 3))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" "#" "&& reload_completed" [(set (match_dup 4) (const_int 0)) (parallel [(set (match_dup 1) (zero_extend:DI (umod:SI (match_dup 2) (match_dup 3)))) (set (match_dup 0) (udiv:SI (match_dup 2) (match_dup 3))) (use (match_dup 4)) (clobber (reg:CC FLAGS_REG))])] "operands[4] = gen_lowpart (SImode, operands[1]);" [(set_attr "type" "multi") (set_attr "mode" "SI")]) (define_insn_and_split "*udivmodsi4_pow2_zext_2" [(set (match_operand:DI 1 "register_operand" "=r") (zero_extend:DI (umod:SI (match_operand:SI 2 "register_operand" "0") (match_operand:SI 3 "const_int_operand")))) (set (match_operand:SI 0 "register_operand" "=r") (udiv:SI (match_dup 2) (match_dup 3))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && IN_RANGE (exact_log2 (UINTVAL (operands[3])), 1, 31)" "#" "&& reload_completed" [(set (match_dup 1) (match_dup 2)) (parallel [(set (match_dup 0) (lshiftrt:SI (match_dup 2) (match_dup 4))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 1) (zero_extend:DI (and:SI (match_dup 1) (match_dup 5)))) (clobber (reg:CC FLAGS_REG))])] { int v = exact_log2 (UINTVAL (operands[3])); operands[4] = GEN_INT (v); operands[5] = GEN_INT ((HOST_WIDE_INT_1U << v) - 1); } [(set_attr "type" "multi") (set_attr "mode" "SI")]) (define_insn "*divmod4_noext" [(set (match_operand:SWIM248 0 "register_operand" "=a") (any_div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0") (match_operand:SWIM248 3 "nonimmediate_operand" "rm"))) (set (match_operand:SWIM248 1 "register_operand" "=d") (:SWIM248 (match_dup 2) (match_dup 3))) (use (match_operand:SWIM248 4 "register_operand" "1")) (clobber (reg:CC FLAGS_REG))] "" "div{}\t%3" [(set_attr "type" "idiv") (set_attr "mode" "")]) (define_insn "*divmodsi4_noext_zext_1" [(set (match_operand:DI 0 "register_operand" "=a") (zero_extend:DI (any_div:SI (match_operand:SI 2 "register_operand" "0") (match_operand:SI 3 "nonimmediate_operand" "rm")))) (set (match_operand:SI 1 "register_operand" "=d") (:SI (match_dup 2) (match_dup 3))) (use (match_operand:SI 4 "register_operand" "1")) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" "div{l}\t%3" [(set_attr "type" "idiv") (set_attr "mode" "SI")]) (define_insn "*divmodsi4_noext_zext_2" [(set (match_operand:DI 1 "register_operand" "=d") (zero_extend:DI (:SI (match_operand:SI 2 "register_operand" "0") (match_operand:SI 3 "nonimmediate_operand" "rm")))) (set (match_operand:SI 0 "register_operand" "=a") (any_div:SI (match_dup 2) (match_dup 3))) (use (match_operand:SI 4 "register_operand" "1")) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" "div{l}\t%3" [(set_attr "type" "idiv") (set_attr "mode" "SI")]) ;; Avoid sign-extension (using cdq) for constant numerators. (define_insn_and_split "*divmodsi4_const" [(set (match_operand:SI 0 "register_operand" "=&a") (div:SI (match_operand:SI 2 "const_int_operand") (match_operand:SI 3 "nonimmediate_operand" "rm"))) (set (match_operand:SI 1 "register_operand" "=&d") (mod:SI (match_dup 2) (match_dup 3))) (clobber (reg:CC FLAGS_REG))] "!optimize_function_for_size_p (cfun)" "#" "&& reload_completed" [(set (match_dup 0) (match_dup 2)) (set (match_dup 1) (match_dup 4)) (parallel [(set (match_dup 0) (div:SI (match_dup 0) (match_dup 3))) (set (match_dup 1) (mod:SI (match_dup 0) (match_dup 3))) (use (match_dup 1)) (clobber (reg:CC FLAGS_REG))])] { operands[4] = INTVAL (operands[2]) < 0 ? constm1_rtx : const0_rtx; } [(set_attr "type" "multi") (set_attr "mode" "SI")]) (define_expand "divmodqi4" [(parallel [(set (match_operand:QI 0 "register_operand") (div:QI (match_operand:QI 1 "register_operand") (match_operand:QI 2 "nonimmediate_operand"))) (set (match_operand:QI 3 "register_operand") (mod:QI (match_dup 1) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] "TARGET_QIMODE_MATH" { rtx div, mod; rtx tmp0, tmp1; tmp0 = gen_reg_rtx (HImode); tmp1 = gen_reg_rtx (HImode); /* Extend operands[1] to HImode. Generate 8bit divide. Result is in AX. */ emit_insn (gen_extendqihi2 (tmp1, operands[1])); emit_insn (gen_divmodhiqi3 (tmp0, tmp1, operands[2])); /* Extract remainder from AH. */ tmp1 = gen_rtx_ZERO_EXTRACT (HImode, tmp0, GEN_INT (8), GEN_INT (8)); tmp1 = lowpart_subreg (QImode, tmp1, HImode); rtx_insn *insn = emit_move_insn (operands[3], tmp1); mod = gen_rtx_MOD (QImode, operands[1], operands[2]); set_unique_reg_note (insn, REG_EQUAL, mod); /* Extract quotient from AL. */ insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0)); div = gen_rtx_DIV (QImode, operands[1], operands[2]); set_unique_reg_note (insn, REG_EQUAL, div); DONE; }) (define_expand "udivmodqi4" [(parallel [(set (match_operand:QI 0 "register_operand") (udiv:QI (match_operand:QI 1 "register_operand") (match_operand:QI 2 "nonimmediate_operand"))) (set (match_operand:QI 3 "register_operand") (umod:QI (match_dup 1) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] "TARGET_QIMODE_MATH" { rtx div, mod; rtx tmp0, tmp1; tmp0 = gen_reg_rtx (HImode); tmp1 = gen_reg_rtx (HImode); /* Extend operands[1] to HImode. Generate 8bit divide. Result is in AX. */ emit_insn (gen_zero_extendqihi2 (tmp1, operands[1])); emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, operands[2])); /* Extract remainder from AH. */ tmp1 = gen_rtx_ZERO_EXTRACT (HImode, tmp0, GEN_INT (8), GEN_INT (8)); tmp1 = lowpart_subreg (QImode, tmp1, HImode); rtx_insn *insn = emit_move_insn (operands[3], tmp1); mod = gen_rtx_UMOD (QImode, operands[1], operands[2]); set_unique_reg_note (insn, REG_EQUAL, mod); /* Extract quotient from AL. */ insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0)); div = gen_rtx_UDIV (QImode, operands[1], operands[2]); set_unique_reg_note (insn, REG_EQUAL, div); DONE; }) ;; Divide AX by r/m8, with result stored in ;; AL <- Quotient ;; AH <- Remainder ;; Change div/mod to HImode and extend the second argument to HImode ;; so that mode of div/mod matches with mode of arguments. Otherwise ;; combine may fail. (define_insn "divmodhiqi3" [(set (match_operand:HI 0 "register_operand" "=a") (ior:HI (ashift:HI (zero_extend:HI (truncate:QI (mod:HI (match_operand:HI 1 "register_operand" "0") (any_extend:HI (match_operand:QI 2 "nonimmediate_operand" "qm"))))) (const_int 8)) (zero_extend:HI (truncate:QI (div:HI (match_dup 1) (any_extend:HI (match_dup 2))))))) (clobber (reg:CC FLAGS_REG))] "TARGET_QIMODE_MATH" "div{b}\t%2" [(set_attr "type" "idiv") (set_attr "mode" "QI")]) ;; We cannot use div/idiv for double division, because it causes ;; "division by zero" on the overflow and that's not what we expect ;; from truncate. Because true (non truncating) double division is ;; never generated, we can't create this insn anyway. ; ;(define_insn "" ; [(set (match_operand:SI 0 "register_operand" "=a") ; (truncate:SI ; (udiv:DI (match_operand:DI 1 "register_operand" "A") ; (zero_extend:DI ; (match_operand:SI 2 "nonimmediate_operand" "rm"))))) ; (set (match_operand:SI 3 "register_operand" "=d") ; (truncate:SI ; (umod:DI (match_dup 1) (zero_extend:DI (match_dup 2))))) ; (clobber (reg:CC FLAGS_REG))] ; "" ; "div{l}\t{%2, %0|%0, %2}" ; [(set_attr "type" "idiv")]) ;;- Logical AND instructions ;; On Pentium, "test imm, reg" is pairable only with eax, ax, and al. ;; Note that this excludes ah. (define_expand "@test_ccno_1" [(set (reg:CCNO FLAGS_REG) (compare:CCNO (and:SWI48 (match_operand:SWI48 0 "nonimmediate_operand") (match_operand:SWI48 1 "")) (const_int 0)))]) (define_expand "testqi_ccz_1" [(set (reg:CCZ FLAGS_REG) (compare:CCZ (and:QI (match_operand:QI 0 "nonimmediate_operand") (match_operand:QI 1 "nonmemory_operand")) (const_int 0)))]) (define_insn "*testdi_1" [(set (reg FLAGS_REG) (compare (and:DI (match_operand:DI 0 "nonimmediate_operand" "%r,rm") (match_operand:DI 1 "x86_64_szext_nonmemory_operand" "Z,re")) (const_int 0)))] "TARGET_64BIT && ix86_match_ccmode (insn, /* If we are going to emit testl instead of testq, and the operands[1] constant might have the SImode sign bit set, make sure the sign flag isn't tested, because the instruction will set the sign flag based on bit 31 rather than bit 63. If it isn't CONST_INT, conservatively assume it might have bit 31 set. */ (satisfies_constraint_Z (operands[1]) && (!CONST_INT_P (operands[1]) || val_signbit_known_set_p (SImode, INTVAL (operands[1])))) ? CCZmode : CCNOmode)" "@ test{l}\t{%k1, %k0|%k0, %k1} test{q}\t{%1, %0|%0, %1}" [(set_attr "type" "test") (set_attr "mode" "SI,DI")]) (define_insn "*testqi_1_maybe_si" [(set (reg FLAGS_REG) (compare (and:QI (match_operand:QI 0 "nonimmediate_operand" "%qm,qm,r") (match_operand:QI 1 "nonmemory_operand" "q,n,n")) (const_int 0)))] "ix86_match_ccmode (insn, CONST_INT_P (operands[1]) && INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode)" { if (get_attr_mode (insn) == MODE_SI) { if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) < 0) operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff); return "test{l}\t{%1, %k0|%k0, %1}"; } return "test{b}\t{%1, %0|%0, %1}"; } [(set_attr "type" "test") (set (attr "mode") (cond [(eq_attr "alternative" "2") (const_string "SI") (and (match_test "optimize_insn_for_size_p ()") (and (match_operand 0 "ext_QIreg_operand") (match_operand 1 "const_0_to_127_operand"))) (const_string "SI") ] (const_string "QI"))) (set_attr "pent_pair" "uv,np,np")]) (define_insn "*test_1" [(set (reg FLAGS_REG) (compare (and:SWI124 (match_operand:SWI124 0 "nonimmediate_operand" "%m,*a,m") (match_operand:SWI124 1 "" ",,")) (const_int 0)))] "ix86_match_ccmode (insn, CCNOmode)" "test{}\t{%1, %0|%0, %1}" [(set_attr "type" "test") (set_attr "mode" "") (set_attr "pent_pair" "uv,uv,np")]) (define_expand "testqi_ext_1_ccno" [(set (reg:CCNO FLAGS_REG) (compare:CCNO (and:QI (subreg:QI (zero_extract:HI (match_operand:HI 0 "register_operand") (const_int 8) (const_int 8)) 0) (match_operand:QI 1 "const_int_operand")) (const_int 0)))]) (define_insn "*testqi_ext_1" [(set (reg FLAGS_REG) (compare (and:QI (subreg:QI (match_operator:SWI248 2 "extract_operator" [(match_operand 0 "int248_register_operand" "Q") (const_int 8) (const_int 8)]) 0) (match_operand:QI 1 "general_operand" "QnBn")) (const_int 0)))] "ix86_match_ccmode (insn, CCNOmode)" "test{b}\t{%1, %h0|%h0, %1}" [(set_attr "addr" "gpr8") (set_attr "type" "test") (set_attr "mode" "QI")]) (define_insn "*testqi_ext_2" [(set (reg FLAGS_REG) (compare (and:QI (subreg:QI (match_operator:SWI248 2 "extract_operator" [(match_operand 0 "int248_register_operand" "Q") (const_int 8) (const_int 8)]) 0) (subreg:QI (match_operator:SWI248 3 "extract_operator" [(match_operand 1 "int248_register_operand" "Q") (const_int 8) (const_int 8)]) 0)) (const_int 0)))] "ix86_match_ccmode (insn, CCNOmode)" "test{b}\t{%h1, %h0|%h0, %h1}" [(set_attr "type" "test") (set_attr "mode" "QI")]) ;; Provide a *testti instruction that STV can implement using ptest. ;; This pattern splits into *andti3_doubleword and *cmpti_doubleword. (define_insn_and_split "*testti_doubleword" [(set (reg:CCZ FLAGS_REG) (compare:CCZ (and:TI (match_operand:TI 0 "register_operand") (match_operand:TI 1 "general_operand")) (const_int 0)))] "TARGET_64BIT && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (match_dup 2) (and:TI (match_dup 0) (match_dup 1))) (clobber (reg:CC FLAGS_REG))]) (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 2) (const_int 0)))] { operands[2] = gen_reg_rtx (TImode); if (!x86_64_hilo_general_operand (operands[1], TImode)) operands[1] = force_reg (TImode, operands[1]); }) ;; Combine likes to form bit extractions for some tests. Humor it. (define_insn_and_split "*testqi_ext_3" [(set (match_operand 0 "flags_reg_operand") (match_operator 1 "compare_operator" [(zero_extract:SWI248 (match_operand 2 "int_nonimmediate_operand" "rm") (match_operand:QI 3 "const_int_operand") (match_operand:QI 4 "const_int_operand")) (const_int 0)]))] "/* Ensure that resulting mask is zero or sign extended operand. */ INTVAL (operands[4]) >= 0 && ((INTVAL (operands[3]) > 0 && INTVAL (operands[3]) + INTVAL (operands[4]) <= 32) || (mode == DImode && INTVAL (operands[3]) > 32 && INTVAL (operands[3]) + INTVAL (operands[4]) == 64)) && ix86_match_ccmode (insn, /* If zero_extract mode precision is the same as len, the SF of the zero_extract comparison will be the most significant extracted bit, but this could be matched after splitting only for pos 0 len all bits trivial extractions. Require CCZmode. */ (GET_MODE_PRECISION (mode) == INTVAL (operands[3])) /* Otherwise, require CCZmode if we'd use a mask with the most significant bit set and can't widen it to wider mode. *testdi_1 also requires CCZmode if the mask has bit 31 set and all bits above it clear. */ || (INTVAL (operands[3]) + INTVAL (operands[4]) >= 32) /* We can't widen also if val is not a REG. */ || (INTVAL (operands[3]) + INTVAL (operands[4]) == GET_MODE_PRECISION (GET_MODE (operands[2])) && !register_operand (operands[2], GET_MODE (operands[2]))) /* And we shouldn't widen if TARGET_PARTIAL_REG_STALL. */ || (TARGET_PARTIAL_REG_STALL && (INTVAL (operands[3]) + INTVAL (operands[4]) >= (paradoxical_subreg_p (operands[2]) && (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[2]))) == MODE_INT) ? GET_MODE_PRECISION (GET_MODE (SUBREG_REG (operands[2]))) : GET_MODE_PRECISION (GET_MODE (operands[2]))))) ? CCZmode : CCNOmode)" "#" "&& 1" [(set (match_dup 0) (match_op_dup 1 [(match_dup 2) (const_int 0)]))] { rtx val = operands[2]; HOST_WIDE_INT len = INTVAL (operands[3]); HOST_WIDE_INT pos = INTVAL (operands[4]); machine_mode mode = GET_MODE (val); if (SUBREG_P (val)) { machine_mode submode = GET_MODE (SUBREG_REG (val)); /* Narrow paradoxical subregs to prevent partial register stalls. */ if (GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (submode) && GET_MODE_CLASS (submode) == MODE_INT && (GET_MODE (operands[0]) == CCZmode || pos + len < GET_MODE_PRECISION (submode) || REG_P (SUBREG_REG (val)))) { val = SUBREG_REG (val); mode = submode; } } /* Small HImode tests can be converted to QImode. */ if (pos + len <= 8 && register_operand (val, HImode)) { rtx nval = gen_lowpart (QImode, val); if (!MEM_P (nval) || GET_MODE (operands[0]) == CCZmode || pos + len < 8) { val = nval; mode = QImode; } } gcc_assert (pos + len <= GET_MODE_PRECISION (mode)); /* If the mask is going to have the sign bit set in the mode we want to do the comparison in and user isn't interested just in the zero flag, then we must widen the target mode. */ if (pos + len == GET_MODE_PRECISION (mode) && GET_MODE (operands[0]) != CCZmode) { gcc_assert (pos + len < 32 && !MEM_P (val)); mode = SImode; val = gen_lowpart (mode, val); } wide_int mask = wi::shifted_mask (pos, len, false, GET_MODE_PRECISION (mode)); operands[2] = gen_rtx_AND (mode, val, immed_wide_int_const (mask, mode)); }) ;; Split and;cmp (as optimized by combine) into not;test ;; Except when TARGET_BMI provides andn (*andn__ccno). (define_insn_and_split "*test_not" [(set (reg:CCZ FLAGS_REG) (compare:CCZ (and:SWI (not:SWI (match_operand:SWI 0 "register_operand")) (match_operand:SWI 1 "")) (const_int 0)))] "ix86_pre_reload_split () && (!TARGET_BMI || !REG_P (operands[1]))" "#" "&& 1" [(set (match_dup 2) (not:SWI (match_dup 0))) (set (reg:CCZ FLAGS_REG) (compare:CCZ (and:SWI (match_dup 2) (match_dup 1)) (const_int 0)))] "operands[2] = gen_reg_rtx (mode);") ;; Split and;cmp (as optimized by combine) into andn;cmp $0 (define_insn_and_split "*test_not_doubleword" [(set (reg:CCZ FLAGS_REG) (compare:CCZ (and:DWI (not:DWI (match_operand:DWI 0 "nonimmediate_operand")) (match_operand:DWI 1 "nonimmediate_operand")) (const_int 0)))] "ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (match_dup 2) (and:DWI (not:DWI (match_dup 0)) (match_dup 1))) (clobber (reg:CC FLAGS_REG))]) (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 2) (const_int 0)))] { operands[0] = force_reg (mode, operands[0]); operands[2] = gen_reg_rtx (mode); }) ;; Convert HImode/SImode test instructions with immediate to QImode ones. ;; i386 does not allow to encode test with 8bit sign extended immediate, so ;; this is relatively important trick. ;; Do the conversion only post-reload to avoid limiting of the register class ;; to QI regs. (define_split [(set (match_operand 0 "flags_reg_operand") (match_operator 1 "compare_operator" [(and (match_operand 2 "QIreg_operand") (match_operand 3 "const_int_operand")) (const_int 0)]))] "reload_completed && GET_MODE (operands[2]) != QImode && ((ix86_match_ccmode (insn, CCZmode) && !(INTVAL (operands[3]) & ~(255 << 8))) || (ix86_match_ccmode (insn, CCNOmode) && !(INTVAL (operands[3]) & ~(127 << 8))))" [(set (match_dup 0) (match_op_dup 1 [(and:QI (subreg:QI (zero_extract:HI (match_dup 2) (const_int 8) (const_int 8)) 0) (match_dup 3)) (const_int 0)]))] { operands[2] = gen_lowpart (HImode, operands[2]); operands[3] = gen_int_mode (INTVAL (operands[3]) >> 8, QImode); }) (define_split [(set (match_operand 0 "flags_reg_operand") (match_operator 1 "compare_operator" [(and (match_operand 2 "nonimmediate_operand") (match_operand 3 "const_int_operand")) (const_int 0)]))] "reload_completed && GET_MODE (operands[2]) != QImode && (!REG_P (operands[2]) || ANY_QI_REG_P (operands[2])) && ((ix86_match_ccmode (insn, CCZmode) && !(INTVAL (operands[3]) & ~255)) || (ix86_match_ccmode (insn, CCNOmode) && !(INTVAL (operands[3]) & ~127)))" [(set (match_dup 0) (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3)) (const_int 0)]))] { operands[2] = gen_lowpart (QImode, operands[2]); operands[3] = gen_int_mode (INTVAL (operands[3]), QImode); }) ;; Narrow test instructions with immediate operands that test ;; memory locations for zero. E.g. testl $0x00aa0000, mem can be ;; converted to testb $0xaa, mem+2. Reject volatile locations and ;; targets where reading (possibly unaligned) part of memory ;; location after a large write to the same address causes ;; store-to-load forwarding stall. (define_peephole2 [(set (reg:CCZ FLAGS_REG) (compare:CCZ (and:SWI248 (match_operand:SWI248 0 "memory_operand") (match_operand 1 "const_int_operand")) (const_int 0)))] "!TARGET_PARTIAL_MEMORY_READ_STALL && !MEM_VOLATILE_P (operands[0])" [(set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 2) (const_int 0)))] { unsigned HOST_WIDE_INT ival = UINTVAL (operands[1]); int first_nonzero_byte, bitsize; rtx new_addr, new_const; machine_mode new_mode; if (ival == 0) FAIL; /* Clear bits outside mode width. */ ival &= GET_MODE_MASK (mode); first_nonzero_byte = ctz_hwi (ival) / BITS_PER_UNIT; ival >>= first_nonzero_byte * BITS_PER_UNIT; bitsize = sizeof (ival) * BITS_PER_UNIT - clz_hwi (ival); if (bitsize <= GET_MODE_BITSIZE (QImode)) new_mode = QImode; else if (bitsize <= GET_MODE_BITSIZE (HImode)) new_mode = HImode; else if (bitsize <= GET_MODE_BITSIZE (SImode)) new_mode = SImode; else new_mode = DImode; if (GET_MODE_SIZE (new_mode) >= GET_MODE_SIZE (mode)) FAIL; new_addr = adjust_address (operands[0], new_mode, first_nonzero_byte); new_const = gen_int_mode (ival, new_mode); operands[2] = gen_rtx_AND (new_mode, new_addr, new_const); }) ;; %%% This used to optimize known byte-wide and operations to memory, ;; and sometimes to QImode registers. If this is considered useful, ;; it should be done with splitters. (define_expand "and3" [(set (match_operand:SDWIM 0 "nonimmediate_operand") (and:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand") (match_operand:SDWIM 2 "")))] "" { machine_mode mode = mode; if (GET_MODE_SIZE (mode) > UNITS_PER_WORD && !x86_64_hilo_general_operand (operands[2], mode)) operands[2] = force_reg (mode, operands[2]); if (GET_MODE_SIZE (mode) <= UNITS_PER_WORD && const_int_operand (operands[2], mode) && register_operand (operands[0], mode) && !(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))) { unsigned HOST_WIDE_INT ival = UINTVAL (operands[2]); if (ival == GET_MODE_MASK (SImode)) mode = SImode; else if (ival == GET_MODE_MASK (HImode)) mode = HImode; else if (ival == GET_MODE_MASK (QImode)) mode = QImode; } if (mode != mode) emit_insn (gen_extend_insn (operands[0], gen_lowpart (mode, operands[1]), mode, mode, 1)); else ix86_expand_binary_operator (AND, mode, operands, TARGET_APX_NDD); DONE; }) (define_insn_and_split "*and3_doubleword" [(set (match_operand: 0 "nonimmediate_operand" "=ro,r,&r,&r,&r,&r,&r") (and: (match_operand: 1 "nonimmediate_operand" "%0,0,ro,r,ro,jO,r") (match_operand: 2 "x86_64_hilo_general_operand" "r,o,r,,K,,o"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (AND, mode, operands, TARGET_APX_NDD)" "#" "&& reload_completed" [(const_int:DWIH 0)] { bool emit_insn_deleted_note_p = false; split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]); if (operands[2] == const0_rtx) emit_move_insn (operands[0], const0_rtx); else if (operands[2] == constm1_rtx) { if (!rtx_equal_p (operands[0], operands[1])) emit_move_insn (operands[0], operands[1]); else emit_insn_deleted_note_p = true; } else ix86_expand_binary_operator (AND, mode, &operands[0], TARGET_APX_NDD); if (operands[5] == const0_rtx) emit_move_insn (operands[3], const0_rtx); else if (operands[5] == constm1_rtx) { if (!rtx_equal_p (operands[3], operands[4])) emit_move_insn (operands[3], operands[4]); else if (emit_insn_deleted_note_p) emit_note (NOTE_INSN_DELETED); } else ix86_expand_binary_operator (AND, mode, &operands[3], TARGET_APX_NDD); DONE; } [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd_64,apx_ndd")]) (define_insn "*anddi_1" [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm,r,r,r,r,r,?k") (and:DI (match_operand:DI 1 "nonimmediate_operand" "%0,r,0,0,rm,rjM,r,qm,k") (match_operand:DI 2 "x86_64_szext_general_operand" "Z,Z,re,m,r,e,m,L,k"))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands, TARGET_APX_NDD)" "@ and{l}\t{%k2, %k0|%k0, %k2} and{l}\t{%k2, %k1, %k0|%k0, %k1, %k2} and{q}\t{%2, %0|%0, %2} and{q}\t{%2, %0|%0, %2} and{q}\t{%2, %1, %0|%0, %1, %2} and{q}\t{%2, %1, %0|%0, %1, %2} and{q}\t{%2, %1, %0|%0, %1, %2} # #" [(set_attr "isa" "x64,apx_ndd,x64,x64,apx_ndd,apx_ndd,apx_ndd,x64,avx512bw") (set_attr "type" "alu,alu,alu,alu,alu,alu,alu,imovx,msklog") (set_attr "length_immediate" "*,*,*,*,*,*,*,0,*") (set (attr "prefix_rex") (if_then_else (and (eq_attr "type" "imovx") (and (match_test "INTVAL (operands[2]) == 0xff") (match_operand 1 "ext_QIreg_operand"))) (const_string "1") (const_string "*"))) (set_attr "mode" "SI,SI,DI,DI,DI,DI,DI,SI,DI")]) (define_insn_and_split "*anddi_1_btr" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") (and:DI (match_operand:DI 1 "nonimmediate_operand" "%0") (match_operand:DI 2 "const_int_operand" "n"))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && TARGET_USE_BT && ix86_binary_operator_ok (AND, DImode, operands) && IN_RANGE (exact_log2 (~INTVAL (operands[2])), 31, 63)" "#" "&& reload_completed" [(parallel [(set (zero_extract:DI (match_dup 0) (const_int 1) (match_dup 3)) (const_int 0)) (clobber (reg:CC FLAGS_REG))])] "operands[3] = GEN_INT (exact_log2 (~INTVAL (operands[2])));" [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") (set_attr "znver1_decode" "double") (set_attr "mode" "DI")]) ;; Turn *anddi_1 into *andsi_1_zext if possible. (define_split [(set (match_operand:DI 0 "register_operand") (and:DI (subreg:DI (match_operand:SI 1 "register_operand") 0) (match_operand:DI 2 "x86_64_zext_immediate_operand"))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" [(parallel [(set (match_dup 0) (zero_extend:DI (and:SI (match_dup 1) (match_dup 2)))) (clobber (reg:CC FLAGS_REG))])] { if (GET_CODE (operands[2]) == SYMBOL_REF || GET_CODE (operands[2]) == LABEL_REF) { operands[2] = shallow_copy_rtx (operands[2]); PUT_MODE (operands[2], SImode); } else if (GET_CODE (operands[2]) == CONST) { /* (const:DI (plus:DI (symbol_ref:DI ("...")) (const_int N))) */ operands[2] = copy_rtx (operands[2]); PUT_MODE (operands[2], SImode); PUT_MODE (XEXP (operands[2], 0), SImode); PUT_MODE (XEXP (XEXP (operands[2], 0), 0), SImode); } else operands[2] = gen_lowpart (SImode, operands[2]); }) ;; See comment for addsi_1_zext why we do use nonimmediate_operand (define_insn "*andsi_1_zext" [(set (match_operand:DI 0 "register_operand" "=r,r,r,r") (zero_extend:DI (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,rm,rjM,r") (match_operand:SI 2 "x86_64_general_operand" "rBMe,r,e,BM")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (AND, SImode, operands, TARGET_APX_NDD)" "@ and{l}\t{%2, %k0|%k0, %2} and{l}\t{%2, %1, %k0|%k0, %1, %2} and{l}\t{%2, %1, %k0|%k0, %1, %2} and{l}\t{%2, %1, %k0|%k0, %1, %2}" [(set_attr "type" "alu") (set_attr "isa" "*,apx_ndd,apx_ndd,apx_ndd") (set_attr "mode" "SI")]) (define_insn "*and_1" [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r,r,r,r,Ya,?k") (and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,rm,rjM,r,qm,k") (match_operand:SWI24 2 "" "r,,r,,,L,k"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (AND, mode, operands, TARGET_APX_NDD)" "@ and{}\t{%2, %0|%0, %2} and{}\t{%2, %0|%0, %2} and{}\t{%2, %1, %0|%0, %1, %2} and{}\t{%2, %1, %0|%0, %1, %2} and{}\t{%2, %1, %0|%0, %1, %2} # #" [(set (attr "isa") (cond [(eq_attr "alternative" "2,3,4") (const_string "apx_ndd") (eq_attr "alternative" "6") (if_then_else (eq_attr "mode" "SI") (const_string "avx512bw") (const_string "avx512f")) ] (const_string "*"))) (set_attr "type" "alu,alu,alu,alu,alu,imovx,msklog") (set_attr "length_immediate" "*,*,*,*,*,0,*") (set (attr "prefix_rex") (if_then_else (and (eq_attr "type" "imovx") (and (match_test "INTVAL (operands[2]) == 0xff") (match_operand 1 "ext_QIreg_operand"))) (const_string "1") (const_string "*"))) (set_attr "mode" ",,,,,SI,")]) (define_insn "*andqi_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k") (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k") (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (AND, QImode, operands, TARGET_APX_NDD)" "@ and{b}\t{%2, %0|%0, %2} and{b}\t{%2, %0|%0, %2} and{l}\t{%k2, %k0|%k0, %k2} and{b}\t{%2, %1, %0|%0, %1, %2} and{b}\t{%2, %1, %0|%0, %1, %2} #" [(set_attr "type" "alu,alu,alu,alu,alu,msklog") (set_attr "isa" "*,*,*,apx_ndd,apx_ndd,*") (set (attr "mode") (cond [(eq_attr "alternative" "2") (const_string "SI") (and (eq_attr "alternative" "5") (match_test "!TARGET_AVX512DQ")) (const_string "HI") ] (const_string "QI"))) ;; Potential partial reg stall on alternative 2. (set (attr "preferred_for_speed") (cond [(eq_attr "alternative" "2") (symbol_ref "!TARGET_PARTIAL_REG_STALL")] (symbol_ref "true")))]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. (define_insn_and_split "*_1_slp" [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+,&")) (any_logic:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "%0,!") (match_operand:SWI12 2 "general_operand" "mn,mn"))) (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "@ {}\t{%2, %0|%0, %2} #" "&& reload_completed && !(rtx_equal_p (operands[0], operands[1]) || rtx_equal_p (operands[0], operands[2]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) (any_logic:SWI12 (match_dup 0) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "alu") (set_attr "mode" "")]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. (define_insn_and_split "*qi_ext_1_slp" [(set (strict_low_part (match_operand:QI 0 "register_operand" "+Q,&Q")) (any_logic:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" [(match_operand 2 "int248_register_operand" "Q,Q") (const_int 8) (const_int 8)]) 0) (match_operand:QI 1 "nonimmediate_operand" "0,!qm"))) (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "@ {b}\t{%h2, %0|%0, %h2} #" "&& reload_completed && !rtx_equal_p (operands[0], operands[1])" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) (any_logic:QI (subreg:QI (match_op_dup 3 [(match_dup 2) (const_int 8) (const_int 8)]) 0) (match_dup 0))) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "alu") (set_attr "mode" "QI")]) (define_insn_and_split "*qi_ext_2_slp" [(set (strict_low_part (match_operand:QI 0 "register_operand" "+&Q")) (any_logic:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" [(match_operand 1 "int248_register_operand" "Q") (const_int 8) (const_int 8)]) 0) (subreg:QI (match_operator:SWI248 4 "extract_operator" [(match_operand 2 "int248_register_operand" "Q") (const_int 8) (const_int 8)]) 0))) (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "#" "&& reload_completed" [(set (strict_low_part (match_dup 0)) (subreg:QI (match_op_dup 4 [(match_dup 2) (const_int 8) (const_int 8)]) 0)) (parallel [(set (strict_low_part (match_dup 0)) (any_logic:QI (subreg:QI (match_op_dup 3 [(match_dup 1) (const_int 8) (const_int 8)]) 0) (match_dup 0))) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "alu") (set_attr "mode" "QI")]) (define_split [(set (match_operand:SWI248 0 "register_operand") (and:SWI248 (match_operand:SWI248 1 "nonimmediate_operand") (match_operand:SWI248 2 "const_int_operand"))) (clobber (reg:CC FLAGS_REG))] "reload_completed && (!REG_P (operands[1]) || REGNO (operands[0]) != REGNO (operands[1])) && (UINTVAL (operands[2]) == GET_MODE_MASK (SImode) || UINTVAL (operands[2]) == GET_MODE_MASK (HImode) || UINTVAL (operands[2]) == GET_MODE_MASK (QImode))" [(const_int 0)] { unsigned HOST_WIDE_INT ival = UINTVAL (operands[2]); machine_mode mode; if (ival == GET_MODE_MASK (SImode)) mode = SImode; else if (ival == GET_MODE_MASK (HImode)) mode = HImode; else if (ival == GET_MODE_MASK (QImode)) mode = QImode; else gcc_unreachable (); /* Zero extend to SImode to avoid partial register stalls. */ if ( < GET_MODE_SIZE (SImode)) operands[0] = gen_lowpart (SImode, operands[0]); emit_insn (gen_extend_insn (operands[0], gen_lowpart (mode, operands[1]), GET_MODE (operands[0]), mode, 1)); DONE; }) (define_split [(set (match_operand:SWI48 0 "register_operand") (and:SWI48 (match_dup 0) (const_int -65536))) (clobber (reg:CC FLAGS_REG))] "(TARGET_FAST_PREFIX && !TARGET_PARTIAL_REG_STALL) || optimize_function_for_size_p (cfun)" [(set (strict_low_part (match_dup 1)) (const_int 0))] "operands[1] = gen_lowpart (HImode, operands[0]);") (define_split [(set (match_operand:SWI248 0 "any_QIreg_operand") (and:SWI248 (match_dup 0) (const_int -256))) (clobber (reg:CC FLAGS_REG))] "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) && reload_completed" [(set (strict_low_part (match_dup 1)) (const_int 0))] "operands[1] = gen_lowpart (QImode, operands[0]);") (define_split [(set (match_operand:SWI248 0 "QIreg_operand") (and:SWI248 (match_dup 0) (const_int -65281))) (clobber (reg:CC FLAGS_REG))] "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) && reload_completed" [(parallel [(set (zero_extract:HI (match_dup 0) (const_int 8) (const_int 8)) (subreg:HI (xor:QI (subreg:QI (zero_extract:HI (match_dup 0) (const_int 8) (const_int 8)) 0) (subreg:QI (zero_extract:HI (match_dup 0) (const_int 8) (const_int 8)) 0)) 0)) (clobber (reg:CC FLAGS_REG))])] "operands[0] = gen_lowpart (HImode, operands[0]);") (define_insn "*anddi_2" [(set (reg FLAGS_REG) (compare (and:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,r,rm,r") (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m,Z,re,m")) (const_int 0))) (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r,r,r") (and:DI (match_dup 1) (match_dup 2)))] "TARGET_64BIT && ix86_match_ccmode (insn, /* If we are going to emit andl instead of andq, and the operands[2] constant might have the SImode sign bit set, make sure the sign flag isn't tested, because the instruction will set the sign flag based on bit 31 rather than bit 63. If it isn't CONST_INT, conservatively assume it might have bit 31 set. */ (satisfies_constraint_Z (operands[2]) && (!CONST_INT_P (operands[2]) || val_signbit_known_set_p (SImode, INTVAL (operands[2])))) ? CCZmode : CCNOmode) && ix86_binary_operator_ok (AND, DImode, operands, TARGET_APX_NDD)" "@ and{l}\t{%k2, %k0|%k0, %k2} and{q}\t{%2, %0|%0, %2} and{q}\t{%2, %0|%0, %2} and{l}\t{%k2, %k1, %k0|%k0, %k1, %k2} and{q}\t{%2, %1, %0|%0, %1, %2} and{q}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "alu") (set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd") (set_attr "mode" "SI,DI,DI,SI,DI,DI")]) ;; See comment for addsi_1_zext why we do use nonimmediate_operand (define_insn "*andsi_2_zext" [(set (reg FLAGS_REG) (compare (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,rm,r") (match_operand:SI 2 "x86_64_general_operand" "rBMe,re,BM")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))] "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) && ix86_binary_operator_ok (AND, SImode, operands, TARGET_APX_NDD)" "@ and{l}\t{%2, %k0|%k0, %2} and{l}\t{%2, %1, %k0|%k0, %1, %2} and{l}\t{%2, %1, %k0|%k0, %1, %2}" [(set_attr "type" "alu") (set_attr "isa" "*,apx_ndd,apx_ndd") (set_attr "mode" "SI")]) (define_insn "*andqi_2_maybe_si" [(set (reg FLAGS_REG) (compare (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r") (match_operand:QI 2 "general_operand" "qn,m,n,rn,m")) (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r") (and:QI (match_dup 1) (match_dup 2)))] "ix86_binary_operator_ok (AND, QImode, operands, TARGET_APX_NDD) && ix86_match_ccmode (insn, CONST_INT_P (operands[2]) && INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)" { if (get_attr_mode (insn) == MODE_SI) { if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0) operands[2] = GEN_INT (INTVAL (operands[2]) & 0xff); return "and{l}\t{%2, %k0|%k0, %2}"; } if (which_alternative > 2) return "and{b}\t{%2, %1, %0|%0, %1, %2}"; return "and{b}\t{%2, %0|%0, %2}"; } [(set_attr "type" "alu") (set_attr "isa" "*,*,*,apx_ndd,apx_ndd") (set (attr "mode") (cond [(eq_attr "alternative" "3,4") (const_string "QI") (eq_attr "alternative" "2") (const_string "SI") (and (match_test "optimize_insn_for_size_p ()") (and (match_operand 0 "ext_QIreg_operand") (match_operand 2 "const_0_to_127_operand"))) (const_string "SI") ] (const_string "QI"))) ;; Potential partial reg stall on alternative 2. (set (attr "preferred_for_speed") (cond [(eq_attr "alternative" "2") (symbol_ref "!TARGET_PARTIAL_REG_STALL")] (symbol_ref "true")))]) (define_insn "*and_2" [(set (reg FLAGS_REG) (compare (and:SWI124 (match_operand:SWI124 1 "nonimmediate_operand" "%0,0,rm,r") (match_operand:SWI124 2 "" ",,r,")) (const_int 0))) (set (match_operand:SWI124 0 "nonimmediate_operand" "=m,,r,r") (and:SWI124 (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCNOmode) && ix86_binary_operator_ok (AND, mode, operands, TARGET_APX_NDD)" "@ and{}\t{%2, %0|%0, %2} and{}\t{%2, %0|%0, %2} and{}\t{%2, %1, %0|%0, %1, %2} and{}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "alu") (set_attr "isa" "*,*,apx_ndd,apx_ndd") (set_attr "mode" "")]) (define_insn "*qi_ext_0" [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn") (any_logic:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" [(match_operand 2 "int248_register_operand" "Q") (const_int 8) (const_int 8)]) 0) (match_operand:QI 1 "nonimmediate_operand" "0"))) (clobber (reg:CC FLAGS_REG))] "" "{b}\t{%h2, %0|%0, %h2}" [(set_attr "addr" "gpr8") (set_attr "type" "alu") (set_attr "mode" "QI")]) (define_insn_and_split "*qi_ext2_0" [(set (match_operand:QI 0 "register_operand" "=&Q") (any_logic:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" [(match_operand 1 "int248_register_operand" "Q") (const_int 8) (const_int 8)]) 0) (subreg:QI (match_operator:SWI248 4 "extract_operator" [(match_operand 2 "int248_register_operand" "Q") (const_int 8) (const_int 8)]) 0))) (clobber (reg:CC FLAGS_REG))] "" "#" "&& reload_completed" [(set (match_dup 0) (subreg:QI (match_op_dup 4 [(match_dup 2) (const_int 8) (const_int 8)]) 0)) (parallel [(set (match_dup 0) (any_logic:QI (subreg:QI (match_op_dup 3 [(match_dup 1) (const_int 8) (const_int 8)]) 0) (match_dup 0))) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "alu") (set_attr "mode" "QI")]) (define_expand "andqi_ext_1" [(parallel [(set (zero_extract:HI (match_operand:HI 0 "register_operand") (const_int 8) (const_int 8)) (subreg:HI (and:QI (subreg:QI (zero_extract:HI (match_operand:HI 1 "register_operand") (const_int 8) (const_int 8)) 0) (match_operand:QI 2 "const_int_operand")) 0)) (clobber (reg:CC FLAGS_REG))])]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. (define_insn_and_split "*qi_ext_1" [(set (zero_extract:SWI248 (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 (any_logic:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0) (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0)) (clobber (reg:CC FLAGS_REG))] "" "@ {b}\t{%2, %h0|%h0, %2} #" "reload_completed && !(rtx_equal_p (operands[0], operands[1]))" [(set (zero_extract:SWI248 (match_dup 0) (const_int 8) (const_int 8)) (zero_extract:SWI248 (match_dup 1) (const_int 8) (const_int 8))) (parallel [(set (zero_extract:SWI248 (match_dup 0) (const_int 8) (const_int 8)) (subreg:SWI248 (any_logic:QI (subreg:QI (match_op_dup 3 [(match_dup 0) (const_int 8) (const_int 8)]) 0) (match_dup 2)) 0)) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "addr" "gpr8") (set_attr "type" "alu") (set_attr "mode" "QI")]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. (define_insn_and_split "*qi_ext_1_cc" [(set (match_operand 4 "flags_reg_operand") (match_operator 5 "compare_operator" [(any_logic:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0) (match_operand:QI 2 "general_operand" "QnBn,QnBn")) (const_int 0)])) (set (zero_extract:SWI248 (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 (any_logic:QI (subreg:QI (match_op_dup 3 [(match_dup 0) (const_int 8) (const_int 8)]) 0) (match_dup 2)) 0))] "ix86_match_ccmode (insn, CCNOmode)" "@ {b}\t{%2, %h0|%h0, %2} #" "&& reload_completed && !(rtx_equal_p (operands[0], operands[1]))" [(set (zero_extract:SWI248 (match_dup 0) (const_int 8) (const_int 8)) (zero_extract:SWI248 (match_dup 1) (const_int 8) (const_int 8))) (parallel [(set (match_dup 4) (match_op_dup 5 [(any_logic:QI (subreg:QI (match_op_dup 3 [(match_dup 0) (const_int 8) (const_int 8)]) 0) (match_dup 2)) (const_int 0)])) (set (zero_extract:SWI248 (match_dup 0) (const_int 8) (const_int 8)) (subreg:SWI248 (any_logic:QI (subreg:QI (match_op_dup 3 [(match_dup 1) (const_int 8) (const_int 8)]) 0) (match_dup 2)) 0))])] "" [(set_attr "addr" "gpr8") (set_attr "type" "alu") (set_attr "mode" "QI")]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. (define_insn_and_split "*qi_ext_2" [(set (zero_extract:SWI248 (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 (any_logic:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" [(match_operand 1 "int248_register_operand" "%0,!Q") (const_int 8) (const_int 8)]) 0) (subreg:QI (match_operator:SWI248 4 "extract_operator" [(match_operand 2 "int248_register_operand" "Q,Q") (const_int 8) (const_int 8)]) 0)) 0)) (clobber (reg:CC FLAGS_REG))] "" "@ {b}\t{%h2, %h0|%h0, %h2} #" "reload_completed && !(rtx_equal_p (operands[0], operands[1]) || rtx_equal_p (operands[0], operands[2]))" [(set (zero_extract:SWI248 (match_dup 0) (const_int 8) (const_int 8)) (zero_extract:SWI248 (match_dup 1) (const_int 8) (const_int 8))) (parallel [(set (zero_extract:SWI248 (match_dup 0) (const_int 8) (const_int 8)) (subreg:SWI248 (any_logic:QI (subreg:QI (match_op_dup 3 [(match_dup 0) (const_int 8) (const_int 8)]) 0) (subreg:QI (match_op_dup 4 [(match_dup 2) (const_int 8) (const_int 8)]) 0)) 0)) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "alu") (set_attr "mode" "QI")]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. (define_insn_and_split "*qi_ext_3" [(set (zero_extract:SWI248 (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (match_operator:SWI248 3 "extract_operator" [(any_logic (match_operand 1 "int248_register_operand" "%0,!Q") (match_operand 2 "int248_register_operand" "Q,Q")) (const_int 8) (const_int 8)])) (clobber (reg:CC FLAGS_REG))] "GET_MODE (operands[1]) == GET_MODE (operands[2])" "@ {b}\t{%h2, %h0|%h0, %h2} #" "&& reload_completed && !(rtx_equal_p (operands[0], operands[1]) || rtx_equal_p (operands[0], operands[2]))" [(set (zero_extract:SWI248 (match_dup 0) (const_int 8) (const_int 8)) (zero_extract:SWI248 (match_dup 1) (const_int 8) (const_int 8))) (parallel [(set (zero_extract:SWI248 (match_dup 0) (const_int 8) (const_int 8)) (match_op_dup 3 [(any_logic (match_dup 4) (match_dup 2)) (const_int 8) (const_int 8)])) (clobber (reg:CC FLAGS_REG))])] "operands[4] = gen_lowpart (GET_MODE (operands[1]), operands[0]);" [(set_attr "type" "alu") (set_attr "mode" "QI")]) ;; Convert wide AND instructions with immediate operand to shorter QImode ;; equivalents when possible. ;; Don't do the splitting with memory operands, since it introduces risk ;; of memory mismatch stalls. We may want to do the splitting for optimizing ;; for size, but that can (should?) be handled by generic code instead. ;; Don't do the splitting for APX NDD as NDD does not support *h registers. (define_split [(set (match_operand:SWI248 0 "QIreg_operand") (and:SWI248 (match_operand:SWI248 1 "register_operand") (match_operand:SWI248 2 "const_int_operand"))) (clobber (reg:CC FLAGS_REG))] "reload_completed && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) && !(~INTVAL (operands[2]) & ~(255 << 8)) && !(TARGET_APX_NDD && REGNO (operands[0]) != REGNO (operands[1]))" [(parallel [(set (zero_extract:HI (match_dup 0) (const_int 8) (const_int 8)) (subreg:HI (and:QI (subreg:QI (zero_extract:HI (match_dup 1) (const_int 8) (const_int 8)) 0) (match_dup 2)) 0)) (clobber (reg:CC FLAGS_REG))])] { operands[0] = gen_lowpart (HImode, operands[0]); operands[1] = gen_lowpart (HImode, operands[1]); operands[2] = gen_int_mode (INTVAL (operands[2]) >> 8, QImode); }) ;; Since AND can be encoded with sign extended immediate, this is only ;; profitable when 7th bit is not set. (define_split [(set (match_operand:SWI248 0 "any_QIreg_operand") (and:SWI248 (match_operand:SWI248 1 "general_operand") (match_operand:SWI248 2 "const_int_operand"))) (clobber (reg:CC FLAGS_REG))] "reload_completed && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) && !(~INTVAL (operands[2]) & ~255) && !(INTVAL (operands[2]) & 128) && !(TARGET_APX_NDD && !rtx_equal_p (operands[0], operands[1]))" [(parallel [(set (strict_low_part (match_dup 0)) (and:QI (match_dup 1) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] { operands[0] = gen_lowpart (QImode, operands[0]); operands[1] = gen_lowpart (QImode, operands[1]); operands[2] = gen_int_mode (INTVAL (operands[2]), QImode); }) (define_insn_and_split "*andn3_doubleword_bmi" [(set (match_operand: 0 "register_operand" "=&r,&r,r,r") (and: (not: (match_operand: 1 "register_operand" "r,r,0,r")) (match_operand: 2 "nonimmediate_operand" "r,o,ro,0"))) (clobber (reg:CC FLAGS_REG))] "TARGET_BMI" "#" "&& reload_completed" [(parallel [(set (match_dup 0) (and:DWIH (not:DWIH (match_dup 1)) (match_dup 2))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 3) (and:DWIH (not:DWIH (match_dup 4)) (match_dup 5))) (clobber (reg:CC FLAGS_REG))])] "split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]);" [(set_attr "isa" "x64,*,*,*")]) (define_insn_and_split "*andn3_doubleword" [(set (match_operand:DWI 0 "register_operand") (and:DWI (not:DWI (match_operand:DWI 1 "register_operand")) (match_operand:DWI 2 "nonimmediate_operand"))) (clobber (reg:CC FLAGS_REG))] "!TARGET_BMI && ix86_pre_reload_split ()" "#" "&& 1" [(set (match_dup 3) (not:DWI (match_dup 1))) (parallel [(set (match_dup 0) (and:DWI (match_dup 3) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] "operands[3] = gen_reg_rtx (mode);") (define_insn "*andn_1" [(set (match_operand:SWI48 0 "register_operand" "=r,r,?k") (and:SWI48 (not:SWI48 (match_operand:SWI48 1 "register_operand" "r,r,k")) (match_operand:SWI48 2 "nonimmediate_operand" "r,m,k"))) (clobber (reg:CC FLAGS_REG))] "TARGET_BMI || TARGET_AVX512BW" "@ andn\t{%2, %1, %0|%0, %1, %2} andn\t{%2, %1, %0|%0, %1, %2} #" [(set_attr "isa" "bmi,bmi,avx512bw") (set_attr "type" "bitmanip,bitmanip,msklog") (set_attr "btver2_decode" "direct, double,*") (set_attr "mode" "")]) (define_insn "*andn_1" [(set (match_operand:SWI12 0 "register_operand" "=r,?k") (and:SWI12 (not:SWI12 (match_operand:SWI12 1 "register_operand" "r,k")) (match_operand:SWI12 2 "register_operand" "r,k"))) (clobber (reg:CC FLAGS_REG))] "TARGET_BMI || TARGET_AVX512BW" "@ andn\t{%k2, %k1, %k0|%k0, %k1, %k2} #" [(set_attr "isa" "bmi,avx512f") (set_attr "type" "bitmanip,msklog") (set_attr "btver2_decode" "direct,*") (set (attr "mode") (cond [(eq_attr "alternative" "0") (const_string "SI") (and (eq_attr "alternative" "1") (match_test "!TARGET_AVX512DQ")) (const_string "HI") ] (const_string "")))]) (define_insn "*andn__ccno" [(set (reg FLAGS_REG) (compare (and:SWI48 (not:SWI48 (match_operand:SWI48 1 "register_operand" "r,r")) (match_operand:SWI48 2 "nonimmediate_operand" "r,m")) (const_int 0))) (clobber (match_scratch:SWI48 0 "=r,r"))] "TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)" "andn\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "bitmanip") (set_attr "btver2_decode" "direct, double") (set_attr "mode" "")]) ;; Split *andnsi_1 after reload with -Oz when not;and is shorter. (define_split [(set (match_operand:SI 0 "register_operand") (and:SI (not:SI (match_operand:SI 1 "register_operand")) (match_operand:SI 2 "nonimmediate_operand"))) (clobber (reg:CC FLAGS_REG))] "reload_completed && optimize_insn_for_size_p () && optimize_size > 1 && REGNO (operands[0]) == REGNO (operands[1]) && LEGACY_INT_REG_P (operands[0]) && !REX_INT_REG_P (operands[2]) && !reg_overlap_mentioned_p (operands[0], operands[2])" [(set (match_dup 0) (not:SI (match_dup 1))) (parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])]) ;; Split *andn_si_ccno with -Oz when not;test is shorter. (define_split [(set (match_operand 0 "flags_reg_operand") (match_operator 1 "compare_operator" [(and:SI (not:SI (match_operand:SI 2 "general_reg_operand")) (match_operand:SI 3 "nonimmediate_operand")) (const_int 0)])) (clobber (match_dup 2))] "reload_completed && optimize_insn_for_size_p () && optimize_size > 1 && LEGACY_INT_REG_P (operands[2]) && !REX_INT_REG_P (operands[3]) && !reg_overlap_mentioned_p (operands[2], operands[3])" [(set (match_dup 2) (not:SI (match_dup 2))) (set (match_dup 0) (match_op_dup 1 [(and:SI (match_dup 3) (match_dup 2)) (const_int 0)]))]) ;; Variant 1 of 4: Split ((A | B) ^ A) ^ C as (B & ~A) ^ C. (define_split [(set (match_operand:SWI48 0 "register_operand") (xor:SWI48 (xor:SWI48 (ior:SWI48 (match_operand:SWI48 1 "register_operand") (match_operand:SWI48 2 "nonimmediate_operand")) (match_dup 1)) (match_operand:SWI48 3 "nonimmediate_operand"))) (clobber (reg:CC FLAGS_REG))] "TARGET_BMI" [(parallel [(set (match_dup 4) (and:SWI48 (not:SWI48 (match_dup 1)) (match_dup 2))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 0) (xor:SWI48 (match_dup 4) (match_dup 3))) (clobber (reg:CC FLAGS_REG))])] "operands[4] = gen_reg_rtx (mode);") ;; Variant 2 of 4: Split ((A | B) ^ B) ^ C as (A & ~B) ^ C. (define_split [(set (match_operand:SWI48 0 "register_operand") (xor:SWI48 (xor:SWI48 (ior:SWI48 (match_operand:SWI48 1 "register_operand") (match_operand:SWI48 2 "register_operand")) (match_dup 2)) (match_operand:SWI48 3 "nonimmediate_operand"))) (clobber (reg:CC FLAGS_REG))] "TARGET_BMI" [(parallel [(set (match_dup 4) (and:SWI48 (not:SWI48 (match_dup 2)) (match_dup 1))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 0) (xor:SWI48 (match_dup 4) (match_dup 3))) (clobber (reg:CC FLAGS_REG))])] "operands[4] = gen_reg_rtx (mode);") ;; Variant 3 of 4: Split ((A | B) ^ C) ^ A as (B & ~A) ^ C. (define_split [(set (match_operand:SWI48 0 "register_operand") (xor:SWI48 (xor:SWI48 (ior:SWI48 (match_operand:SWI48 1 "register_operand") (match_operand:SWI48 2 "nonimmediate_operand")) (match_operand:SWI48 3 "nonimmediate_operand")) (match_dup 1))) (clobber (reg:CC FLAGS_REG))] "TARGET_BMI" [(parallel [(set (match_dup 4) (and:SWI48 (not:SWI48 (match_dup 1)) (match_dup 2))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 0) (xor:SWI48 (match_dup 4) (match_dup 3))) (clobber (reg:CC FLAGS_REG))])] "operands[4] = gen_reg_rtx (mode);") ;; Variant 4 of 4: Split ((A | B) ^ C) ^ B as (A & ~B) ^ C. (define_split [(set (match_operand:SWI48 0 "register_operand") (xor:SWI48 (xor:SWI48 (ior:SWI48 (match_operand:SWI48 1 "register_operand") (match_operand:SWI48 2 "register_operand")) (match_operand:SWI48 3 "nonimmediate_operand")) (match_dup 2))) (clobber (reg:CC FLAGS_REG))] "TARGET_BMI" [(parallel [(set (match_dup 4) (and:SWI48 (not:SWI48 (match_dup 2)) (match_dup 1))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 0) (xor:SWI48 (match_dup 4) (match_dup 3))) (clobber (reg:CC FLAGS_REG))])] "operands[4] = gen_reg_rtx (mode);") ;; Logical inclusive and exclusive OR instructions ;; %%% This used to optimize known byte-wide and operations to memory. ;; If this is considered useful, it should be done with splitters. (define_expand "3" [(set (match_operand:SDWIM 0 "nonimmediate_operand") (any_or:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand") (match_operand:SDWIM 2 "")))] "" { if (GET_MODE_SIZE (mode) > UNITS_PER_WORD && !x86_64_hilo_general_operand (operands[2], mode)) operands[2] = force_reg (mode, operands[2]); ix86_expand_binary_operator (, mode, operands, TARGET_APX_NDD); DONE; }) (define_insn_and_split "*3_doubleword" [(set (match_operand: 0 "nonimmediate_operand" "=ro,r,&r,&r,&r,&r,&r") (any_or: (match_operand: 1 "nonimmediate_operand" "%0,0,ro,r,ro,jO,r") (match_operand: 2 "x86_64_hilo_general_operand" "r,o,r,,K,,o"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (, mode, operands, TARGET_APX_NDD)" "#" "&& reload_completed" [(const_int:DWIH 0)] { /* This insn may disappear completely when operands[2] == const0_rtx and operands[0] == operands[1], which requires a NOTE_INSN_DELETED. */ bool emit_insn_deleted_note_p = false; split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]); if (operands[2] == const0_rtx) { if (!rtx_equal_p (operands[0], operands[1])) emit_move_insn (operands[0], operands[1]); else emit_insn_deleted_note_p = true; } else if (operands[2] == constm1_rtx) { if ( == IOR) emit_move_insn (operands[0], constm1_rtx); else ix86_expand_unary_operator (NOT, mode, &operands[0], TARGET_APX_NDD); } else ix86_expand_binary_operator (, mode, &operands[0], TARGET_APX_NDD); if (operands[5] == const0_rtx) { if (!rtx_equal_p (operands[3], operands[4])) emit_move_insn (operands[3], operands[4]); else if (emit_insn_deleted_note_p) emit_note (NOTE_INSN_DELETED); } else if (operands[5] == constm1_rtx) { if ( == IOR) emit_move_insn (operands[3], constm1_rtx); else ix86_expand_unary_operator (NOT, mode, &operands[3], TARGET_APX_NDD); } else ix86_expand_binary_operator (, mode, &operands[3], TARGET_APX_NDD); DONE; } [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd_64,apx_ndd")]) (define_insn "*_1" [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,r,r,r,?k") (any_or:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,rm,rjM,r,k") (match_operand:SWI248 2 "" "r,,r,,,k"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (, mode, operands, TARGET_APX_NDD)" "@ {}\t{%2, %0|%0, %2} {}\t{%2, %0|%0, %2} {}\t{%2, %1, %0|%0, %1, %2} {}\t{%2, %1, %0|%0, %1, %2} {}\t{%2, %1, %0|%0, %1, %2} #" [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd,") (set_attr "type" "alu, alu, alu, alu, alu, msklog") (set_attr "mode" "")]) (define_insn_and_split "*notxor_1" [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,r,r,?k") (not:SWI248 (xor:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,rm,r,k") (match_operand:SWI248 2 "" "r,,r,,k")))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (XOR, mode, operands, TARGET_APX_NDD)" "#" "&& reload_completed" [(parallel [(set (match_dup 0) (xor:SWI248 (match_dup 1) (match_dup 2))) (clobber (reg:CC FLAGS_REG))]) (set (match_dup 0) (not:SWI248 (match_dup 0)))] { if (MASK_REG_P (operands[0])) { emit_insn (gen_kxnor (operands[0], operands[1], operands[2])); DONE; } } [(set_attr "isa" "*,*,apx_ndd,apx_ndd,") (set_attr "type" "alu, alu, alu, alu, msklog") (set_attr "mode" "")]) (define_insn_and_split "*iordi_1_bts" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") (ior:DI (match_operand:DI 1 "nonimmediate_operand" "%0") (match_operand:DI 2 "const_int_operand" "n"))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && TARGET_USE_BT && ix86_binary_operator_ok (IOR, DImode, operands) && IN_RANGE (exact_log2 (INTVAL (operands[2])), 31, 63)" "#" "&& reload_completed" [(parallel [(set (zero_extract:DI (match_dup 0) (const_int 1) (match_dup 3)) (const_int 1)) (clobber (reg:CC FLAGS_REG))])] "operands[3] = GEN_INT (exact_log2 (INTVAL (operands[2])));" [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") (set_attr "znver1_decode" "double") (set_attr "mode" "DI")]) (define_insn_and_split "*xordi_1_btc" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0") (match_operand:DI 2 "const_int_operand" "n"))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && TARGET_USE_BT && ix86_binary_operator_ok (XOR, DImode, operands) && IN_RANGE (exact_log2 (INTVAL (operands[2])), 31, 63)" "#" "&& reload_completed" [(parallel [(set (zero_extract:DI (match_dup 0) (const_int 1) (match_dup 3)) (not:DI (zero_extract:DI (match_dup 0) (const_int 1) (match_dup 3)))) (clobber (reg:CC FLAGS_REG))])] "operands[3] = GEN_INT (exact_log2 (INTVAL (operands[2])));" [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") (set_attr "znver1_decode" "double") (set_attr "mode" "DI")]) ;; Optimize a ^ ((a ^ b) & mask) to (~mask & a) | (b & mask) (define_insn_and_split "*xor2andn" [(set (match_operand:SWI248 0 "register_operand") (xor:SWI248 (and:SWI248 (xor:SWI248 (match_operand:SWI248 1 "nonimmediate_operand") (match_operand:SWI248 2 "nonimmediate_operand")) (match_operand:SWI248 3 "nonimmediate_operand")) (match_dup 1))) (clobber (reg:CC FLAGS_REG))] "TARGET_BMI && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (match_dup 4) (and:SWI248 (not:SWI248 (match_dup 3)) (match_dup 1))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 5) (and:SWI248 (match_dup 3) (match_dup 2))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 0) (ior:SWI248 (match_dup 4) (match_dup 5))) (clobber (reg:CC FLAGS_REG))])] { operands[1] = force_reg (mode, operands[1]); operands[3] = force_reg (mode, operands[3]); operands[4] = gen_reg_rtx (mode); operands[5] = gen_reg_rtx (mode); }) ;; See comment for addsi_1_zext why we do use nonimmediate_operand (define_insn "*si_1_zext" [(set (match_operand:DI 0 "register_operand" "=r,r,r,r") (zero_extend:DI (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0,rm,rjM,r") (match_operand:SI 2 "x86_64_general_operand" "rBMe,r,e,BM")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (, SImode, operands, TARGET_APX_NDD)" "@ {l}\t{%2, %k0|%k0, %2} {l}\t{%2, %1, %k0|%k0, %1, %2} {l}\t{%2, %1, %k0|%k0, %1, %2} {l}\t{%2, %1, %k0|%k0, %1, %2}" [(set_attr "type" "alu") (set_attr "isa" "*,apx_ndd,apx_ndd,apx_ndd") (set_attr "mode" "SI")]) (define_insn "*si_1_zext_imm" [(set (match_operand:DI 0 "register_operand" "=r,r") (any_or:DI (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "%0,rm")) (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z,Z"))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (, SImode, operands, TARGET_APX_NDD)" "@ {l}\t{%2, %k0|%k0, %2} {l}\t{%2, %1, %k0|%k0, %1, %2}" [(set_attr "type" "alu") (set_attr "isa" "*,apx_ndd") (set_attr "mode" "SI")]) (define_insn "*qi_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k") (any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k") (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (, QImode, operands, TARGET_APX_NDD)" "@ {b}\t{%2, %0|%0, %2} {b}\t{%2, %0|%0, %2} {l}\t{%k2, %k0|%k0, %k2} {b}\t{%2, %1, %0|%0, %1, %2} {b}\t{%2, %1, %0|%0, %1, %2} #" [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,avx512f") (set_attr "type" "alu,alu,alu,alu,alu,msklog") (set (attr "mode") (cond [(eq_attr "alternative" "2") (const_string "SI") (and (eq_attr "alternative" "5") (match_test "!TARGET_AVX512DQ")) (const_string "HI") ] (const_string "QI"))) ;; Potential partial reg stall on alternative 2. (set (attr "preferred_for_speed") (cond [(eq_attr "alternative" "2") (symbol_ref "!TARGET_PARTIAL_REG_STALL")] (symbol_ref "true")))]) (define_insn_and_split "*notxorqi_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k") (not:QI (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k") (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k")))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (XOR, QImode, operands, TARGET_APX_NDD)" "#" "&& reload_completed" [(parallel [(set (match_dup 0) (xor:QI (match_dup 1) (match_dup 2))) (clobber (reg:CC FLAGS_REG))]) (set (match_dup 0) (not:QI (match_dup 0)))] { if (mask_reg_operand (operands[0], QImode)) { emit_insn (gen_kxnorqi (operands[0], operands[1], operands[2])); DONE; } } [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,avx512f") (set_attr "type" "alu,alu,alu,alu,alu,msklog") (set (attr "mode") (cond [(eq_attr "alternative" "2") (const_string "SI") (and (eq_attr "alternative" "5") (match_test "!TARGET_AVX512DQ")) (const_string "HI") ] (const_string "QI"))) ;; Potential partial reg stall on alternative 2. (set (attr "preferred_for_speed") (cond [(eq_attr "alternative" "2") (symbol_ref "!TARGET_PARTIAL_REG_STALL")] (symbol_ref "true")))]) ;; convert (sign_extend:WIDE (any_logic:NARROW (memory, immediate))) ;; to (any_logic:WIDE (sign_extend (memory)), (sign_extend (immediate))). ;; This eliminates sign extension after logic operation. (define_split [(set (match_operand:SWI248 0 "register_operand") (sign_extend:SWI248 (any_logic:QI (match_operand:QI 1 "memory_operand") (match_operand:QI 2 "const_int_operand"))))] "" [(set (match_dup 3) (sign_extend:SWI248 (match_dup 1))) (set (match_dup 0) (any_logic:SWI248 (match_dup 3) (match_dup 2)))] "operands[3] = gen_reg_rtx (mode);") (define_split [(set (match_operand:SWI48 0 "register_operand") (sign_extend:SWI48 (any_logic:HI (match_operand:HI 1 "memory_operand") (match_operand:HI 2 "const_int_operand"))))] "" [(set (match_dup 3) (sign_extend:SWI48 (match_dup 1))) (set (match_dup 0) (any_logic:SWI48 (match_dup 3) (match_dup 2)))] "operands[3] = gen_reg_rtx (mode);") (define_split [(set (match_operand:DI 0 "register_operand") (sign_extend:DI (any_logic:SI (match_operand:SI 1 "memory_operand") (match_operand:SI 2 "const_int_operand"))))] "TARGET_64BIT" [(set (match_dup 3) (sign_extend:DI (match_dup 1))) (set (match_dup 0) (any_logic:DI (match_dup 3) (match_dup 2)))] "operands[3] = gen_reg_rtx (DImode);") (define_insn "*_2" [(set (reg FLAGS_REG) (compare (any_or:SWI (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r") (match_operand:SWI 2 "" ",,r,")) (const_int 0))) (set (match_operand:SWI 0 "nonimmediate_operand" "=m,,r,r") (any_or:SWI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCNOmode) && ix86_binary_operator_ok (, mode, operands, TARGET_APX_NDD)" "@ {}\t{%2, %0|%0, %2} {}\t{%2, %0|%0, %2} {}\t{%2, %1, %0|%0, %1, %2} {}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "alu") (set_attr "isa" "*,*,apx_ndd,apx_ndd") (set_attr "mode" "")]) ;; See comment for addsi_1_zext why we do use nonimmediate_operand ;; ??? Special case for immediate operand is missing - it is tricky. (define_insn "*si_2_zext" [(set (reg FLAGS_REG) (compare (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0,rm,r") (match_operand:SI 2 "x86_64_general_operand" "rBMe,re,BM")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI (any_or:SI (match_dup 1) (match_dup 2))))] "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) && ix86_binary_operator_ok (, SImode, operands, TARGET_APX_NDD)" "@ {l}\t{%2, %k0|%k0, %2} {l}\t{%2, %1, %k0|%k0, %1, %2} {l}\t{%2, %1, %k0|%k0, %1, %2}" [(set_attr "type" "alu") (set_attr "isa" "*,apx_ndd,apx_ndd") (set_attr "mode" "SI")]) (define_insn "*si_2_zext_imm" [(set (reg FLAGS_REG) (compare (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0,rm") (match_operand:SI 2 "x86_64_zext_immediate_operand" "Z,Z")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r,r") (any_or:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))] "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) && ix86_binary_operator_ok (, SImode, operands, TARGET_APX_NDD)" "@ {l}\t{%2, %k0|%k0, %2} {l}\t{%2, %1, %k0|%k0, %1, %2}" [(set_attr "type" "alu") (set_attr "isa" "*,apx_ndd") (set_attr "mode" "SI")]) (define_insn "*_3" [(set (reg FLAGS_REG) (compare (any_or:SWI (match_operand:SWI 1 "nonimmediate_operand" "%0") (match_operand:SWI 2 "" "")) (const_int 0))) (clobber (match_scratch:SWI 0 "="))] "ix86_match_ccmode (insn, CCNOmode) && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "{}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") (set_attr "mode" "")]) ;; Convert wide OR instructions with immediate operand to shorter QImode ;; equivalents when possible. ;; Don't do the splitting with memory operands, since it introduces risk ;; of memory mismatch stalls. We may want to do the splitting for optimizing ;; for size, but that can (should?) be handled by generic code instead. ;; Don't do the splitting for APX NDD as NDD does not support *h registers. (define_split [(set (match_operand:SWI248 0 "QIreg_operand") (any_or:SWI248 (match_operand:SWI248 1 "register_operand") (match_operand:SWI248 2 "const_int_operand"))) (clobber (reg:CC FLAGS_REG))] "reload_completed && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) && !(INTVAL (operands[2]) & ~(255 << 8)) && !(TARGET_APX_NDD && REGNO (operands[0]) != REGNO (operands[1]))" [(parallel [(set (zero_extract:HI (match_dup 0) (const_int 8) (const_int 8)) (subreg:HI (any_or:QI (subreg:QI (zero_extract:HI (match_dup 1) (const_int 8) (const_int 8)) 0) (match_dup 2)) 0)) (clobber (reg:CC FLAGS_REG))])] { /* Handle the case where INTVAL (operands[2]) == 0. */ if (operands[2] == const0_rtx) { if (!rtx_equal_p (operands[0], operands[1])) emit_move_insn (operands[0], operands[1]); else emit_note (NOTE_INSN_DELETED); DONE; } operands[0] = gen_lowpart (HImode, operands[0]); operands[1] = gen_lowpart (HImode, operands[1]); operands[2] = gen_int_mode (INTVAL (operands[2]) >> 8, QImode); }) ;; Since OR can be encoded with sign extended immediate, this is only ;; profitable when 7th bit is set. (define_split [(set (match_operand:SWI248 0 "any_QIreg_operand") (any_or:SWI248 (match_operand:SWI248 1 "general_operand") (match_operand:SWI248 2 "const_int_operand"))) (clobber (reg:CC FLAGS_REG))] "reload_completed && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) && !(INTVAL (operands[2]) & ~255) && (INTVAL (operands[2]) & 128) && !(TARGET_APX_NDD && !rtx_equal_p (operands[0], operands[1]))" [(parallel [(set (strict_low_part (match_dup 0)) (any_or:QI (match_dup 1) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] { operands[0] = gen_lowpart (QImode, operands[0]); operands[1] = gen_lowpart (QImode, operands[1]); operands[2] = gen_int_mode (INTVAL (operands[2]), QImode); }) (define_expand "xorqi_ext_1_cc" [(parallel [(set (reg:CCNO FLAGS_REG) (compare:CCNO (xor:QI (subreg:QI (zero_extract:HI (match_operand:HI 1 "register_operand") (const_int 8) (const_int 8)) 0) (match_operand:QI 2 "const_int_operand")) (const_int 0))) (set (zero_extract:HI (match_operand:HI 0 "register_operand") (const_int 8) (const_int 8)) (subreg:HI (xor:QI (subreg:QI (zero_extract:HI (match_dup 1) (const_int 8) (const_int 8)) 0) (match_dup 2)) 0))])]) ;; Peephole2 rega = 0; rega op= regb into rega = regb. (define_peephole2 [(parallel [(set (match_operand:SWI 0 "general_reg_operand") (const_int 0)) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 0) (any_or_plus:SWI (match_dup 0) (match_operand:SWI 1 ""))) (clobber (reg:CC FLAGS_REG))])] "!reg_mentioned_p (operands[0], operands[1])" [(set (match_dup 0) (match_dup 1))]) ;; Peephole2 dead instruction in rega = 0; rega op= rega. (define_peephole2 [(parallel [(set (match_operand:SWI 0 "general_reg_operand") (const_int 0)) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 0) (any_or_plus:SWI (match_dup 0) (match_dup 0))) (clobber (reg:CC FLAGS_REG))])] "" [(parallel [(set (match_dup 0) (const_int 0)) (clobber (reg:CC FLAGS_REG))])]) ;; Split DST = (HI<<32)|LO early to minimize register usage. (define_insn_and_split "*concat3_1" [(set (match_operand: 0 "nonimmediate_operand" "=ro,r") (any_or_plus: (ashift: (match_operand: 1 "register_operand" "r,r") (match_operand:QI 2 "const_int_operand")) (zero_extend: (match_operand:DWIH 3 "nonimmediate_operand" "r,m"))))] "INTVAL (operands[2]) == * BITS_PER_UNIT" "#" "&& reload_completed" [(const_int 0)] { split_double_concat (mode, operands[0], operands[3], gen_lowpart (mode, operands[1])); DONE; }) (define_insn_and_split "*concat3_2" [(set (match_operand: 0 "nonimmediate_operand" "=ro,r") (any_or_plus: (zero_extend: (match_operand:DWIH 1 "nonimmediate_operand" "r,m")) (ashift: (match_operand: 2 "register_operand" "r,r") (match_operand:QI 3 "const_int_operand"))))] "INTVAL (operands[3]) == * BITS_PER_UNIT" "#" "&& reload_completed" [(const_int 0)] { split_double_concat (mode, operands[0], operands[1], gen_lowpart (mode, operands[2])); DONE; }) (define_insn_and_split "*concat3_3" [(set (match_operand: 0 "nonimmediate_operand" "=ro,r,r,&r,x") (any_or_plus: (ashift: (zero_extend: (match_operand:DWIH 1 "nonimmediate_operand" "r,m,r,m,x")) (match_operand:QI 2 "const_int_operand")) (zero_extend: (match_operand:DWIH 3 "nonimmediate_operand" "r,r,m,m,0"))))] "INTVAL (operands[2]) == * BITS_PER_UNIT" "#" "&& reload_completed" [(const_int 0)] { if (SSE_REG_P (operands[0])) { rtx tmp = gen_rtx_REG (V2DImode, REGNO (operands[0])); emit_insn (gen_vec_concatv2di (tmp, operands[3], operands[1])); } else split_double_concat (mode, operands[0], operands[3], operands[1]); DONE; } [(set_attr "isa" "*,*,*,x64,x64")]) (define_insn_and_split "*concat3_4" [(set (match_operand: 0 "nonimmediate_operand" "=ro,r,r,&r") (any_or_plus: (zero_extend: (match_operand:DWIH 1 "nonimmediate_operand" "r,m,r,m")) (ashift: (zero_extend: (match_operand:DWIH 2 "nonimmediate_operand" "r,r,m,m")) (match_operand:QI 3 "const_int_operand"))))] "INTVAL (operands[3]) == * BITS_PER_UNIT" "#" "&& reload_completed" [(const_int 0)] { split_double_concat (mode, operands[0], operands[1], operands[2]); DONE; } [(set_attr "isa" "*,*,*,x64")]) (define_insn_and_split "*concat3_5" [(set (match_operand:DWI 0 "nonimmediate_operand" "=r,o,o") (any_or_plus:DWI (ashift:DWI (match_operand:DWI 1 "register_operand" "r,r,r") (match_operand:QI 2 "const_int_operand")) (match_operand:DWI 3 "const_scalar_int_operand" "n,n,Wd")))] "INTVAL (operands[2]) == * BITS_PER_UNIT / 2 && (mode == DImode ? CONST_INT_P (operands[3]) && (UINTVAL (operands[3]) & ~GET_MODE_MASK (SImode)) == 0 : CONST_INT_P (operands[3]) ? INTVAL (operands[3]) >= 0 : CONST_WIDE_INT_NUNITS (operands[3]) == 2 && CONST_WIDE_INT_ELT (operands[3], 1) == 0) && !(CONST_INT_P (operands[3]) ? ix86_endbr_immediate_operand (operands[3], VOIDmode) : ix86_endbr_immediate_operand (GEN_INT (CONST_WIDE_INT_ELT (operands[3], 0)), VOIDmode))" "#" "&& reload_completed" [(const_int 0)] { rtx op3 = simplify_subreg (mode, operands[3], mode, 0); split_double_concat (mode, operands[0], op3, gen_lowpart (mode, operands[1])); DONE; } [(set_attr "isa" "*,nox64,x64")]) (define_insn_and_split "*concat3_6" [(set (match_operand: 0 "nonimmediate_operand" "=r,o,o,r") (any_or_plus: (ashift: (zero_extend: (match_operand:DWIH 1 "nonimmediate_operand" "r,r,r,m")) (match_operand:QI 2 "const_int_operand")) (match_operand: 3 "const_scalar_int_operand" "n,n,Wd,n")))] "INTVAL (operands[2]) == * BITS_PER_UNIT && (mode == DImode ? CONST_INT_P (operands[3]) && (UINTVAL (operands[3]) & ~GET_MODE_MASK (SImode)) == 0 : CONST_INT_P (operands[3]) ? INTVAL (operands[3]) >= 0 : CONST_WIDE_INT_NUNITS (operands[3]) == 2 && CONST_WIDE_INT_ELT (operands[3], 1) == 0) && !(CONST_INT_P (operands[3]) ? ix86_endbr_immediate_operand (operands[3], VOIDmode) : ix86_endbr_immediate_operand (GEN_INT (CONST_WIDE_INT_ELT (operands[3], 0)), VOIDmode))" "#" "&& reload_completed" [(const_int 0)] { rtx op3 = simplify_subreg (mode, operands[3], mode, 0); split_double_concat (mode, operands[0], op3, operands[1]); DONE; } [(set_attr "isa" "*,nox64,x64,*")]) (define_insn_and_split "*concat3_7" [(set (match_operand: 0 "nonimmediate_operand" "=r,o,o,r") (any_or_plus: (zero_extend: (match_operand:DWIH 1 "nonimmediate_operand" "r,r,r,m")) (match_operand: 2 "const_scalar_int_operand" "n,n,Wd,n")))] "mode == DImode ? CONST_INT_P (operands[2]) && (UINTVAL (operands[2]) & GET_MODE_MASK (SImode)) == 0 && !ix86_endbr_immediate_operand (operands[2], VOIDmode) : CONST_WIDE_INT_P (operands[2]) && CONST_WIDE_INT_NUNITS (operands[2]) == 2 && CONST_WIDE_INT_ELT (operands[2], 0) == 0 && !ix86_endbr_immediate_operand (GEN_INT (CONST_WIDE_INT_ELT (operands[2], 1)), VOIDmode)" "#" "&& reload_completed" [(const_int 0)] { rtx op2; if (mode == DImode) op2 = gen_int_mode (INTVAL (operands[2]) >> 32, mode); else op2 = gen_int_mode (CONST_WIDE_INT_ELT (operands[2], 1), mode); split_double_concat (mode, operands[0], operands[1], op2); DONE; } [(set_attr "isa" "*,nox64,x64,*")]) ;; Negation instructions (define_expand "neg2" [(set (match_operand:SDWIM 0 "nonimmediate_operand") (neg:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")))] "" { ix86_expand_unary_operator (NEG, mode, operands, TARGET_APX_NDD); DONE; }) (define_insn_and_split "*neg2_doubleword" [(set (match_operand: 0 "nonimmediate_operand" "=ro,&r") (neg: (match_operand: 1 "nonimmediate_operand" "0,ro"))) (clobber (reg:CC FLAGS_REG))] "ix86_unary_operator_ok (NEG, mode, operands, TARGET_APX_NDD)" "#" "&& reload_completed" [(parallel [(set (reg:CCC FLAGS_REG) (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE)) (set (match_dup 0) (neg:DWIH (match_dup 1)))]) (parallel [(set (match_dup 2) (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) (match_dup 3)) (const_int 0))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 2) (neg:DWIH (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] "split_double_mode (mode, &operands[0], 2, &operands[0], &operands[2]);" [(set_attr "isa" "*,apx_ndd")]) ;; Convert: ;; mov %esi, %edx ;; negl %eax ;; adcl $0, %edx ;; negl %edx ;; to: ;; xorl %edx, %edx ;; negl %eax ;; sbbl %esi, %edx (define_peephole2 [(set (match_operand:SWI48 0 "general_reg_operand") (match_operand:SWI48 1 "nonimmediate_gr_operand")) (parallel [(set (reg:CCC FLAGS_REG) (unspec:CCC [(match_operand:SWI48 2 "general_reg_operand") (const_int 0)] UNSPEC_CC_NE)) (set (match_dup 2) (neg:SWI48 (match_dup 2)))]) (parallel [(set (match_dup 0) (plus:SWI48 (plus:SWI48 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0)) (match_dup 0)) (const_int 0))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 0) (neg:SWI48 (match_dup 0))) (clobber (reg:CC FLAGS_REG))])] "REGNO (operands[0]) != REGNO (operands[2]) && !reg_mentioned_p (operands[0], operands[1]) && !reg_mentioned_p (operands[2], operands[1])" [(parallel [(set (reg:CCC FLAGS_REG) (unspec:CCC [(match_dup 2) (const_int 0)] UNSPEC_CC_NE)) (set (match_dup 2) (neg:SWI48 (match_dup 2)))]) (parallel [(set (match_dup 0) (minus:SWI48 (minus:SWI48 (match_dup 0) (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0))) (match_dup 1))) (clobber (reg:CC FLAGS_REG))])] "ix86_expand_clear (operands[0]);") ;; Convert: ;; xorl %edx, %edx ;; negl %eax ;; adcl $0, %edx ;; negl %edx ;; to: ;; negl %eax ;; sbbl %edx, %edx // *x86_movcc_0_m1 (define_peephole2 [(parallel [(set (match_operand:SWI48 0 "general_reg_operand") (const_int 0)) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (reg:CCC FLAGS_REG) (unspec:CCC [(match_operand:SWI48 1 "general_reg_operand") (const_int 0)] UNSPEC_CC_NE)) (set (match_dup 1) (neg:SWI48 (match_dup 1)))]) (parallel [(set (match_dup 0) (plus:SWI48 (plus:SWI48 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0)) (match_dup 0)) (const_int 0))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 0) (neg:SWI48 (match_dup 0))) (clobber (reg:CC FLAGS_REG))])] "REGNO (operands[0]) != REGNO (operands[1])" [(parallel [(set (reg:CCC FLAGS_REG) (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE)) (set (match_dup 1) (neg:SWI48 (match_dup 1)))]) (parallel [(set (match_dup 0) (if_then_else:SWI48 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0)) (const_int -1) (const_int 0))) (clobber (reg:CC FLAGS_REG))])]) (define_insn "*neg_1" [(set (match_operand:SWI 0 "nonimmediate_operand" "=m,r") (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm"))) (clobber (reg:CC FLAGS_REG))] "ix86_unary_operator_ok (NEG, mode, operands, TARGET_APX_NDD)" "@ neg{}\t%0 neg{}\t{%1, %0|%0, %1}" [(set_attr "type" "negnot") (set_attr "isa" "*,apx_ndd") (set_attr "mode" "")]) (define_insn "*negsi_1_zext" [(set (match_operand:DI 0 "register_operand" "=r,r") (zero_extend:DI (neg:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands, TARGET_APX_NDD)" "@ neg{l}\t%k0 neg{l}\t{%k1, %k0|%k0, %k1}" [(set_attr "type" "negnot") (set_attr "isa" "*,apx_ndd") (set_attr "mode" "SI")]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. (define_insn_and_split "*neg_1_slp" [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+,&")) (neg:SWI12 (match_operand:SWI12 1 "register_operand" "0,!"))) (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "@ neg{}\t%0 #" "&& reload_completed && !(rtx_equal_p (operands[0], operands[1]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) (neg:SWI12 (match_dup 0))) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "negnot") (set_attr "mode" "")]) (define_insn "*neg_2" [(set (reg FLAGS_REG) (compare (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm")) (const_int 0))) (set (match_operand:SWI 0 "nonimmediate_operand" "=m,r") (neg:SWI (match_dup 1)))] "ix86_match_ccmode (insn, CCGOCmode) && ix86_unary_operator_ok (NEG, mode, operands, TARGET_APX_NDD)" "@ neg{}\t%0 neg{}\t{%1, %0|%0, %1}" [(set_attr "type" "negnot") (set_attr "isa" "*,apx_ndd") (set_attr "mode" "")]) (define_insn "*negsi_2_zext" [(set (reg FLAGS_REG) (compare (neg:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r,r") (zero_extend:DI (neg:SI (match_dup 1))))] "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) && ix86_unary_operator_ok (NEG, SImode, operands, TARGET_APX_NDD)" "@ neg{l}\t%k0 neg{l}\t{%1, %k0|%k0, %1}" [(set_attr "type" "negnot") (set_attr "isa" "*,apx_ndd") (set_attr "mode" "SI")]) (define_insn "*neg_ccc_1" [(set (reg:CCC FLAGS_REG) (unspec:CCC [(match_operand:SWI 1 "nonimmediate_operand" "0,rm") (const_int 0)] UNSPEC_CC_NE)) (set (match_operand:SWI 0 "nonimmediate_operand" "=m,r") (neg:SWI (match_dup 1)))] "" "@ neg{}\t%0 neg{}\t{%1, %0|%0, %1}" [(set_attr "type" "negnot") (set_attr "isa" "*,apx_ndd") (set_attr "mode" "")]) (define_insn "*neg_ccc_2" [(set (reg:CCC FLAGS_REG) (unspec:CCC [(match_operand:SWI 1 "nonimmediate_operand" "0,rm") (const_int 0)] UNSPEC_CC_NE)) (clobber (match_scratch:SWI 0 "=,r"))] "" "@ neg{}\t%0 neg{}\t{%1, %0|%0, %1}" [(set_attr "type" "negnot") (set_attr "isa" "*,apx_ndd") (set_attr "mode" "")]) (define_expand "x86_neg_ccc" [(parallel [(set (reg:CCC FLAGS_REG) (unspec:CCC [(match_operand:SWI48 1 "register_operand") (const_int 0)] UNSPEC_CC_NE)) (set (match_operand:SWI48 0 "register_operand") (neg:SWI48 (match_dup 1)))])]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. (define_insn_and_split "*negqi_ext_1" [(set (zero_extract:SWI248 (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 (neg:QI (subreg:QI (match_operator:SWI248 2 "extract_operator" [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0)) 0)) (clobber (reg:CC FLAGS_REG))] "" "@ neg{b}\t%h0 #" "reload_completed && !(rtx_equal_p (operands[0], operands[1]))" [(set (zero_extract:SWI248 (match_dup 0) (const_int 8) (const_int 8)) (zero_extract:SWI248 (match_dup 1) (const_int 8) (const_int 8))) (parallel [(set (zero_extract:SWI248 (match_dup 0) (const_int 8) (const_int 8)) (subreg:SWI248 (neg:QI (subreg:QI (match_op_dup 2 [(match_dup 0) (const_int 8) (const_int 8)]) 0)) 0)) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "negnot") (set_attr "mode" "QI")]) ;; Negate with jump on overflow. (define_expand "negv3" [(parallel [(set (reg:CCO FLAGS_REG) (unspec:CCO [(match_operand:SWI 1 "register_operand") (match_dup 3)] UNSPEC_CC_NE)) (set (match_operand:SWI 0 "register_operand") (neg:SWI (match_dup 1)))]) (set (pc) (if_then_else (eq (reg:CCO FLAGS_REG) (const_int 0)) (label_ref (match_operand 2)) (pc)))] "" { operands[3] = gen_int_mode (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1), mode); }) (define_insn "*negv3" [(set (reg:CCO FLAGS_REG) (unspec:CCO [(match_operand:SWI 1 "nonimmediate_operand" "0") (match_operand:SWI 2 "const_int_operand")] UNSPEC_CC_NE)) (set (match_operand:SWI 0 "nonimmediate_operand" "=m") (neg:SWI (match_dup 1)))] "ix86_unary_operator_ok (NEG, mode, operands) && mode_signbit_p (mode, operands[2])" "neg{}\t%0" [(set_attr "type" "negnot") (set_attr "mode" "")]) ;; Optimize *negsi_1 followed by *cmpsi_ccno_1 (PR target/91384) (define_peephole2 [(set (match_operand:SWI 0 "general_reg_operand") (match_operand:SWI 1 "general_reg_operand")) (parallel [(set (match_dup 0) (neg:SWI (match_dup 0))) (clobber (reg:CC FLAGS_REG))]) (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 1) (const_int 0)))] "" [(set (match_dup 0) (match_dup 1)) (parallel [(set (reg:CCZ FLAGS_REG) (compare:CCZ (neg:SWI (match_dup 0)) (const_int 0))) (set (match_dup 0) (neg:SWI (match_dup 0)))])]) ;; Special expand pattern to handle integer mode abs (define_expand "abs2" [(parallel [(set (match_operand:SDWIM 0 "register_operand") (abs:SDWIM (match_operand:SDWIM 1 "general_operand"))) (clobber (reg:CC FLAGS_REG))])] "TARGET_CMOVE && (mode != QImode || !TARGET_PARTIAL_REG_STALL)" { if (TARGET_EXPAND_ABS) { machine_mode mode = mode; operands[1] = force_reg (mode, operands[1]); /* Generate rtx abs using: abs (x) = (((signed) x >> (W-1)) ^ x) - ((signed) x >> (W-1)) */ rtx shift_amount = gen_int_mode (GET_MODE_PRECISION (mode) - 1, QImode); rtx shift_dst = expand_simple_binop (mode, ASHIFTRT, operands[1], shift_amount, NULL_RTX, 0, OPTAB_DIRECT); rtx xor_dst = expand_simple_binop (mode, XOR, shift_dst, operands[1], operands[0], 0, OPTAB_DIRECT); rtx minus_dst = expand_simple_binop (mode, MINUS, xor_dst, shift_dst, operands[0], 0, OPTAB_DIRECT); if (!rtx_equal_p (minus_dst, operands[0])) emit_move_insn (operands[0], minus_dst); DONE; } }) (define_insn_and_split "*abs2_doubleword" [(set (match_operand: 0 "register_operand") (abs: (match_operand: 1 "general_operand"))) (clobber (reg:CC FLAGS_REG))] "TARGET_CMOVE && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (reg:CCC FLAGS_REG) (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE)) (set (match_dup 2) (neg:DWIH (match_dup 1)))]) (parallel [(set (match_dup 5) (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) (match_dup 4)) (const_int 0))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (reg:CCGOC FLAGS_REG) (compare:CCGOC (neg:DWIH (match_dup 5)) (const_int 0))) (set (match_dup 5) (neg:DWIH (match_dup 5)))]) (set (match_dup 0) (if_then_else:DWIH (ge (reg:CCGOC FLAGS_REG) (const_int 0)) (match_dup 2) (match_dup 1))) (set (match_dup 3) (if_then_else:DWIH (ge (reg:CCGOC FLAGS_REG) (const_int 0)) (match_dup 5) (match_dup 4)))] { operands[1] = force_reg (mode, operands[1]); operands[2] = gen_reg_rtx (mode); split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]); }) (define_insn_and_split "*nabs2_doubleword" [(set (match_operand: 0 "register_operand") (neg: (abs: (match_operand: 1 "general_operand")))) (clobber (reg:CC FLAGS_REG))] "TARGET_CMOVE && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (reg:CCC FLAGS_REG) (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE)) (set (match_dup 2) (neg:DWIH (match_dup 1)))]) (parallel [(set (match_dup 5) (plus:DWIH (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) (match_dup 4)) (const_int 0))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (reg:CCGOC FLAGS_REG) (compare:CCGOC (neg:DWIH (match_dup 5)) (const_int 0))) (set (match_dup 5) (neg:DWIH (match_dup 5)))]) (set (match_dup 0) (if_then_else:DWIH (lt (reg:CCGOC FLAGS_REG) (const_int 0)) (match_dup 2) (match_dup 1))) (set (match_dup 3) (if_then_else:DWIH (lt (reg:CCGOC FLAGS_REG) (const_int 0)) (match_dup 5) (match_dup 4)))] { operands[1] = force_reg (mode, operands[1]); operands[2] = gen_reg_rtx (mode); split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]); }) (define_insn_and_split "*abs2_1" [(set (match_operand:SWI 0 "register_operand") (abs:SWI (match_operand:SWI 1 "general_operand"))) (clobber (reg:CC FLAGS_REG))] "TARGET_CMOVE && (mode != QImode || !TARGET_PARTIAL_REG_STALL) && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (reg:CCGOC FLAGS_REG) (compare:CCGOC (neg:SWI (match_dup 1)) (const_int 0))) (set (match_dup 2) (neg:SWI (match_dup 1)))]) (set (match_dup 0) (if_then_else:SWI (ge (reg:CCGOC FLAGS_REG) (const_int 0)) (match_dup 2) (match_dup 1)))] { operands[1] = force_reg (mode, operands[1]); operands[2] = gen_reg_rtx (mode); }) (define_insn_and_split "*nabs2_1" [(set (match_operand:SWI 0 "register_operand") (neg:SWI (abs:SWI (match_operand:SWI 1 "general_operand")))) (clobber (reg:CC FLAGS_REG))] "TARGET_CMOVE && (mode != QImode || !TARGET_PARTIAL_REG_STALL) && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (reg:CCGOC FLAGS_REG) (compare:CCGOC (neg:SWI (match_dup 1)) (const_int 0))) (set (match_dup 2) (neg:SWI (match_dup 1)))]) (set (match_dup 0) (if_then_else:SWI (lt (reg:CCGOC FLAGS_REG) (const_int 0)) (match_dup 2) (match_dup 1)))] { operands[1] = force_reg (mode, operands[1]); operands[2] = gen_reg_rtx (mode); }) (define_expand "tf2" [(set (match_operand:TF 0 "register_operand") (absneg:TF (match_operand:TF 1 "register_operand")))] "TARGET_SSE" "ix86_expand_fp_absneg_operator (, TFmode, operands); DONE;") (define_insn_and_split "*tf2_1" [(set (match_operand:TF 0 "register_operand" "=x,x,Yv,Yv") (absneg:TF (match_operand:TF 1 "vector_operand" "0,xBm,Yv,m"))) (use (match_operand:TF 2 "vector_operand" "xBm,0,Yvm,Yv"))] "TARGET_SSE" "#" "&& reload_completed" [(set (match_dup 0) (:TF (match_dup 1) (match_dup 2)))] { if (TARGET_AVX) { if (MEM_P (operands[1])) std::swap (operands[1], operands[2]); } else { if (operands_match_p (operands[0], operands[2])) std::swap (operands[1], operands[2]); } } [(set_attr "isa" "noavx,noavx,avx,avx")]) (define_insn_and_split "*nabstf2_1" [(set (match_operand:TF 0 "register_operand" "=x,x,Yv,Yv") (neg:TF (abs:TF (match_operand:TF 1 "vector_operand" "0,xBm,Yv,m")))) (use (match_operand:TF 2 "vector_operand" "xBm,0,Yvm,Yv"))] "TARGET_SSE" "#" "&& reload_completed" [(set (match_dup 0) (ior:TF (match_dup 1) (match_dup 2)))] { if (TARGET_AVX) { if (MEM_P (operands[1])) std::swap (operands[1], operands[2]); } else { if (operands_match_p (operands[0], operands[2])) std::swap (operands[1], operands[2]); } } [(set_attr "isa" "noavx,noavx,avx,avx")]) (define_expand "hf2" [(set (match_operand:HF 0 "register_operand") (absneg:HF (match_operand:HF 1 "register_operand")))] "TARGET_AVX512FP16" "ix86_expand_fp_absneg_operator (, HFmode, operands); DONE;") (define_expand "2" [(set (match_operand:X87MODEF 0 "register_operand") (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand")))] "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" "ix86_expand_fp_absneg_operator (, mode, operands); DONE;") ;; Changing of sign for FP values is doable using integer unit too. (define_insn "*2_i387_1" [(set (match_operand:X87MODEF 0 "register_operand" "=f,!r") (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "0,0"))) (clobber (reg:CC FLAGS_REG))] "TARGET_80387 && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" "#") (define_split [(set (match_operand:X87MODEF 0 "fp_register_operand") (absneg:X87MODEF (match_operand:X87MODEF 1 "fp_register_operand"))) (clobber (reg:CC FLAGS_REG))] "TARGET_80387 && reload_completed" [(set (match_dup 0) (absneg:X87MODEF (match_dup 1)))]) (define_split [(set (match_operand:X87MODEF 0 "general_reg_operand") (absneg:X87MODEF (match_operand:X87MODEF 1 "general_reg_operand"))) (clobber (reg:CC FLAGS_REG))] "TARGET_80387 && reload_completed" [(const_int 0)] "ix86_split_fp_absneg_operator (, mode, operands); DONE;") (define_insn_and_split "*hf2_1" [(set (match_operand:HF 0 "register_operand" "=Yv") (absneg:HF (match_operand:HF 1 "register_operand" "Yv"))) (use (match_operand:V8HF 2 "vector_operand" "Yvm")) (clobber (reg:CC FLAGS_REG))] "TARGET_AVX512FP16" "#" "&& reload_completed" [(set (match_dup 0) (:V8HF (match_dup 1) (match_dup 2)))] { operands[0] = lowpart_subreg (V8HFmode, operands[0], HFmode); operands[1] = lowpart_subreg (V8HFmode, operands[1], HFmode); }) (define_insn "*2_1" [(set (match_operand:MODEF 0 "register_operand" "=x,x,Yv,f,!r") (absneg:MODEF (match_operand:MODEF 1 "register_operand" "0,x,Yv,0,0"))) (use (match_operand: 2 "vector_operand" "xBm,0,Yvm,X,X")) (clobber (reg:CC FLAGS_REG))] "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" "#" [(set_attr "isa" "noavx,noavx,avx,*,*") (set (attr "enabled") (if_then_else (match_test ("SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH")) (if_then_else (eq_attr "alternative" "3,4") (symbol_ref "TARGET_MIX_SSE_I387") (const_string "*")) (if_then_else (eq_attr "alternative" "3,4") (symbol_ref "true") (symbol_ref "false"))))]) (define_split [(set (match_operand:MODEF 0 "sse_reg_operand") (absneg:MODEF (match_operand:MODEF 1 "sse_reg_operand"))) (use (match_operand: 2 "vector_operand")) (clobber (reg:CC FLAGS_REG))] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && reload_completed" [(set (match_dup 0) (: (match_dup 1) (match_dup 2)))] { machine_mode mode = mode; machine_mode vmode = mode; operands[0] = lowpart_subreg (vmode, operands[0], mode); operands[1] = lowpart_subreg (vmode, operands[1], mode); if (!TARGET_AVX && operands_match_p (operands[0], operands[2])) std::swap (operands[1], operands[2]); }) (define_split [(set (match_operand:MODEF 0 "fp_register_operand") (absneg:MODEF (match_operand:MODEF 1 "fp_register_operand"))) (use (match_operand 2)) (clobber (reg:CC FLAGS_REG))] "TARGET_80387 && reload_completed" [(set (match_dup 0) (absneg:MODEF (match_dup 1)))]) (define_split [(set (match_operand:MODEF 0 "general_reg_operand") (absneg:MODEF (match_operand:MODEF 1 "general_reg_operand"))) (use (match_operand 2)) (clobber (reg:CC FLAGS_REG))] "TARGET_80387 && reload_completed" [(const_int 0)] "ix86_split_fp_absneg_operator (, mode, operands); DONE;") (define_insn_and_split "*nabs2_1" [(set (match_operand:MODEF 0 "register_operand" "=x,x,Yv") (neg:MODEF (abs:MODEF (match_operand:MODEF 1 "register_operand" "0,x,Yv")))) (use (match_operand: 2 "vector_operand" "xBm,0,Yvm"))] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" "#" "&& reload_completed" [(set (match_dup 0) (ior: (match_dup 1) (match_dup 2)))] { machine_mode mode = mode; machine_mode vmode = mode; operands[0] = lowpart_subreg (vmode, operands[0], mode); operands[1] = lowpart_subreg (vmode, operands[1], mode); if (!TARGET_AVX && operands_match_p (operands[0], operands[2])) std::swap (operands[1], operands[2]); } [(set_attr "isa" "noavx,noavx,avx")]) ;; Conditionalize these after reload. If they match before reload, we ;; lose the clobber and ability to use integer instructions. (define_insn "*2_i387" [(set (match_operand:X87MODEF 0 "register_operand" "=f") (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "0")))] "TARGET_80387 && reload_completed" "" [(set_attr "type" "fsgn") (set_attr "mode" "")]) ;; Copysign instructions (define_expand "copysign3" [(match_operand:SSEMODEF 0 "register_operand") (match_operand:SSEMODEF 1 "nonmemory_operand") (match_operand:SSEMODEF 2 "register_operand")] "(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || (TARGET_SSE && (mode == TFmode)) || (TARGET_AVX512FP16 && (mode ==HFmode))" "ix86_expand_copysign (operands); DONE;") (define_expand "xorsign3" [(match_operand:MODEFH 0 "register_operand") (match_operand:MODEFH 1 "register_operand") (match_operand:MODEFH 2 "register_operand")] "(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || mode == HFmode" { if (rtx_equal_p (operands[1], operands[2])) emit_insn (gen_abs2 (operands[0], operands[1])); else ix86_expand_xorsign (operands); DONE; }) ;; One complement instructions (define_expand "one_cmpl2" [(set (match_operand:SDWIM 0 "nonimmediate_operand") (not:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")))] "" { ix86_expand_unary_operator (NOT, mode, operands, TARGET_APX_NDD); DONE; }) (define_insn_and_split "*one_cmpl2_doubleword" [(set (match_operand: 0 "nonimmediate_operand" "=ro,&r") (not: (match_operand: 1 "nonimmediate_operand" "0,ro")))] "ix86_unary_operator_ok (NOT, mode, operands, TARGET_APX_NDD)" "#" "&& reload_completed" [(set (match_dup 0) (not:DWIH (match_dup 1))) (set (match_dup 2) (not:DWIH (match_dup 3)))] "split_double_mode (mode, &operands[0], 2, &operands[0], &operands[2]);" [(set_attr "isa" "*,apx_ndd")]) (define_insn "*one_cmpl2_1" [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,?k") (not:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "0,rm,k")))] "ix86_unary_operator_ok (NOT, mode, operands, TARGET_APX_NDD)" "@ not{}\t%0 not{}\t{%1, %0|%0, %1} #" [(set_attr "isa" "*,apx_ndd,") (set_attr "type" "negnot,negnot,msklog") (set_attr "mode" "")]) (define_insn "*one_cmplsi2_1_zext" [(set (match_operand:DI 0 "register_operand" "=r,r,?k") (zero_extend:DI (not:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm,k"))))] "TARGET_64BIT && ix86_unary_operator_ok (NOT, SImode, operands, TARGET_APX_NDD)" "@ not{l}\t%k0 not{l}\t{%1, %k0|%k0, %1} #" [(set_attr "isa" "x64,apx_ndd,avx512bw") (set_attr "type" "negnot,negnot,msklog") (set_attr "mode" "SI,SI,SI")]) (define_insn "*one_cmplqi2_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,r,?k") (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,rm,k")))] "ix86_unary_operator_ok (NOT, QImode, operands, TARGET_APX_NDD)" "@ not{b}\t%0 not{l}\t%k0 not{b}\t{%1, %0|%0, %1} #" [(set_attr "isa" "*,*,apx_ndd,avx512f") (set_attr "type" "negnot,negnot,negnot,msklog") (set (attr "mode") (cond [(eq_attr "alternative" "1") (const_string "SI") (and (eq_attr "alternative" "3") (match_test "!TARGET_AVX512DQ")) (const_string "HI") ] (const_string "QI"))) ;; Potential partial reg stall on alternative 1. (set (attr "preferred_for_speed") (cond [(eq_attr "alternative" "1") (symbol_ref "!TARGET_PARTIAL_REG_STALL")] (symbol_ref "true")))]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. (define_insn_and_split "*one_cmpl_1_slp" [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+,&")) (not:SWI12 (match_operand:SWI12 1 "register_operand" "0,!")))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "@ not{}\t%0 #" "&& reload_completed && !(rtx_equal_p (operands[0], operands[1]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (set (strict_low_part (match_dup 0)) (not:SWI12 (match_dup 0)))] "" [(set_attr "type" "negnot") (set_attr "mode" "")]) (define_insn "*one_cmpl2_2" [(set (reg FLAGS_REG) (compare (not:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm")) (const_int 0))) (set (match_operand:SWI 0 "nonimmediate_operand" "=m,r") (not:SWI (match_dup 1)))] "ix86_match_ccmode (insn, CCNOmode) && ix86_unary_operator_ok (NOT, mode, operands, TARGET_APX_NDD)" "#" [(set_attr "type" "alu1") (set_attr "isa" "*,apx_ndd") (set_attr "mode" "")]) (define_split [(set (match_operand 0 "flags_reg_operand") (match_operator 2 "compare_operator" [(not:SWI (match_operand:SWI 3 "nonimmediate_operand")) (const_int 0)])) (set (match_operand:SWI 1 "nonimmediate_operand") (not:SWI (match_dup 3)))] "ix86_match_ccmode (insn, CCNOmode)" [(parallel [(set (match_dup 0) (match_op_dup 2 [(xor:SWI (match_dup 3) (const_int -1)) (const_int 0)])) (set (match_dup 1) (xor:SWI (match_dup 3) (const_int -1)))])]) (define_insn "*one_cmplsi2_2_zext" [(set (reg FLAGS_REG) (compare (not:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r,r") (zero_extend:DI (not:SI (match_dup 1))))] "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) && ix86_unary_operator_ok (NOT, SImode, operands, TARGET_APX_NDD)" "#" [(set_attr "type" "alu1") (set_attr "isa" "*,apx_ndd") (set_attr "mode" "SI")]) (define_split [(set (match_operand 0 "flags_reg_operand") (match_operator 2 "compare_operator" [(not:SI (match_operand:SI 3 "nonimmediate_operand")) (const_int 0)])) (set (match_operand:DI 1 "register_operand") (zero_extend:DI (not:SI (match_dup 3))))] "ix86_match_ccmode (insn, CCNOmode)" [(parallel [(set (match_dup 0) (match_op_dup 2 [(xor:SI (match_dup 3) (const_int -1)) (const_int 0)])) (set (match_dup 1) (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. (define_insn_and_split "*one_cmplqi_ext_1" [(set (zero_extract:SWI248 (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 (not:QI (subreg:QI (match_operator:SWI248 2 "extract_operator" [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0)) 0))] "" "@ not{b}\t%h0 #" "reload_completed && !(rtx_equal_p (operands[0], operands[1]))" [(set (zero_extract:SWI248 (match_dup 0) (const_int 8) (const_int 8)) (zero_extract:SWI248 (match_dup 1) (const_int 8) (const_int 8))) (set (zero_extract:SWI248 (match_dup 0) (const_int 8) (const_int 8)) (subreg:SWI248 (not:QI (subreg:QI (match_op_dup 2 [(match_dup 0) (const_int 8) (const_int 8)]) 0)) 0))] "" [(set_attr "type" "negnot") (set_attr "mode" "QI")]) ;; Shift instructions ;; DImode shifts are implemented using the i386 "shift double" opcode, ;; which is written as "sh[lr]d[lw] imm,reg,reg/mem". If the shift count ;; is variable, then the count is in %cl and the "imm" operand is dropped ;; from the assembler input. ;; ;; This instruction shifts the target reg/mem as usual, but instead of ;; shifting in zeros, bits are shifted in from reg operand. If the insn ;; is a left shift double, bits are taken from the high order bits of ;; reg, else if the insn is a shift right double, bits are taken from the ;; low order bits of reg. So if %eax is "1234" and %edx is "5678", ;; "shldl $8,%edx,%eax" leaves %edx unchanged and sets %eax to "2345". ;; ;; Since sh[lr]d does not change the `reg' operand, that is done ;; separately, making all shifts emit pairs of shift double and normal ;; shift. Since sh[lr]d does not shift more than 31 bits, and we wish to ;; support a 63 bit shift, each shift where the count is in a reg expands ;; to a pair of shifts, a branch, a shift by 32 and a label. ;; ;; If the shift count is a constant, we need never emit more than one ;; shift pair, instead using moves and sign extension for counts greater ;; than 31. (define_expand "ashl3" [(set (match_operand:SDWIM 0 "") (ashift:SDWIM (match_operand:SDWIM 1 "") (match_operand:QI 2 "nonmemory_operand")))] "" { ix86_expand_binary_operator (ASHIFT, mode, operands, TARGET_APX_NDD); DONE; }) (define_insn_and_split "*ashl3_doubleword_mask" [(set (match_operand: 0 "register_operand") (ashift: (match_operand: 1 "register_operand") (subreg:QI (and (match_operand 2 "int248_register_operand" "c") (match_operand 3 "const_int_operand")) 0))) (clobber (reg:CC FLAGS_REG))] "((INTVAL (operands[3]) & ( * BITS_PER_UNIT)) == 0 || ((INTVAL (operands[3]) & (2 * * BITS_PER_UNIT - 1)) == (2 * * BITS_PER_UNIT - 1))) && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (match_dup 6) (ior:DWIH (ashift:DWIH (match_dup 6) (and:QI (match_dup 2) (match_dup 8))) (subreg:DWIH (lshiftrt: (zero_extend: (match_dup 5)) (minus:QI (match_dup 9) (and:QI (match_dup 2) (match_dup 8)))) 0))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 4) (ashift:DWIH (match_dup 5) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] { if ((INTVAL (operands[3]) & ( * BITS_PER_UNIT)) != 0) { operands[2] = force_reg (GET_MODE (operands[2]), operands[2]); operands[2] = gen_lowpart (QImode, operands[2]); emit_insn (gen_ashl3_doubleword (operands[0], operands[1], operands[2])); DONE; } split_double_mode (mode, &operands[0], 2, &operands[4], &operands[6]); operands[8] = GEN_INT ( * BITS_PER_UNIT - 1); operands[9] = GEN_INT ( * BITS_PER_UNIT); if ((INTVAL (operands[3]) & (( * BITS_PER_UNIT) - 1)) != (( * BITS_PER_UNIT) - 1)) { rtx xops[3]; xops[0] = gen_reg_rtx (GET_MODE (operands[2])); xops[1] = operands[2]; xops[2] = GEN_INT (INTVAL (operands[3]) & (( * BITS_PER_UNIT) - 1)); ix86_expand_binary_operator (AND, GET_MODE (operands[2]), xops); operands[2] = xops[0]; } operands[2] = force_reg (GET_MODE (operands[2]), operands[2]); operands[2] = gen_lowpart (QImode, operands[2]); if (!rtx_equal_p (operands[6], operands[7])) emit_move_insn (operands[6], operands[7]); }) (define_insn_and_split "*ashl3_doubleword_mask_1" [(set (match_operand: 0 "register_operand") (ashift: (match_operand: 1 "register_operand") (and:QI (match_operand:QI 2 "register_operand" "c") (match_operand:QI 3 "const_int_operand")))) (clobber (reg:CC FLAGS_REG))] "((INTVAL (operands[3]) & ( * BITS_PER_UNIT)) == 0 || ((INTVAL (operands[3]) & (2 * * BITS_PER_UNIT - 1)) == (2 * * BITS_PER_UNIT - 1))) && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (match_dup 6) (ior:DWIH (ashift:DWIH (match_dup 6) (and:QI (match_dup 2) (match_dup 8))) (subreg:DWIH (lshiftrt: (zero_extend: (match_dup 5)) (minus:QI (match_dup 9) (and:QI (match_dup 2) (match_dup 8)))) 0))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 4) (ashift:DWIH (match_dup 5) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] { if ((INTVAL (operands[3]) & ( * BITS_PER_UNIT)) != 0) { emit_insn (gen_ashl3_doubleword (operands[0], operands[1], operands[2])); DONE; } split_double_mode (mode, &operands[0], 2, &operands[4], &operands[6]); operands[8] = GEN_INT ( * BITS_PER_UNIT - 1); operands[9] = GEN_INT ( * BITS_PER_UNIT); if ((INTVAL (operands[3]) & (( * BITS_PER_UNIT) - 1)) != (( * BITS_PER_UNIT) - 1)) { rtx tem = gen_reg_rtx (QImode); emit_insn (gen_andqi3 (tem, operands[2], operands[3])); operands[2] = tem; } if (!rtx_equal_p (operands[6], operands[7])) emit_move_insn (operands[6], operands[7]); }) (define_insn "ashl3_doubleword" [(set (match_operand:DWI 0 "register_operand" "=&r,&r") (ashift:DWI (match_operand:DWI 1 "reg_or_pm1_operand" "0n,r") (match_operand:QI 2 "nonmemory_operand" "c,c"))) (clobber (reg:CC FLAGS_REG))] "" "#" [(set_attr "type" "multi") (set_attr "isa" "*,apx_ndd")]) (define_split [(set (match_operand:DWI 0 "register_operand") (ashift:DWI (match_operand:DWI 1 "nonmemory_operand") (match_operand:QI 2 "nonmemory_operand"))) (clobber (reg:CC FLAGS_REG))] "epilogue_completed" [(const_int 0)] { if (TARGET_APX_NDD && !rtx_equal_p (operands[0], operands[1]) && REG_P (operands[1])) ix86_split_ashl_ndd (operands, NULL_RTX); else ix86_split_ashl (operands, NULL_RTX, mode); DONE; }) ;; By default we don't ask for a scratch register, because when DWImode ;; values are manipulated, registers are already at a premium. But if ;; we have one handy, we won't turn it away. (define_peephole2 [(match_scratch:DWIH 3 "r") (parallel [(set (match_operand: 0 "register_operand") (ashift: (match_operand: 1 "nonmemory_operand") (match_operand:QI 2 "nonmemory_operand"))) (clobber (reg:CC FLAGS_REG))]) (match_dup 3)] "TARGET_CMOVE" [(const_int 0)] { if (TARGET_APX_NDD && !rtx_equal_p (operands[0], operands[1]) && (REG_P (operands[1]))) ix86_split_ashl_ndd (operands, operands[3]); else ix86_split_ashl (operands, operands[3], mode); DONE; }) (define_insn_and_split "*ashl3_doubleword_highpart" [(set (match_operand: 0 "register_operand" "=r") (ashift: (any_extend: (match_operand:DWIH 1 "nonimmediate_operand" "rm")) (match_operand:QI 2 "const_int_operand"))) (clobber (reg:CC FLAGS_REG))] "INTVAL (operands[2]) >= * BITS_PER_UNIT && INTVAL (operands[2]) < * BITS_PER_UNIT * 2" "#" "&& reload_completed" [(const_int 0)] { split_double_mode (mode, &operands[0], 1, &operands[0], &operands[3]); int bits = INTVAL (operands[2]) - ( * BITS_PER_UNIT); bool op_equal_p = rtx_equal_p (operands[3], operands[1]); if (bits == 0) { if (!op_equal_p) emit_move_insn (operands[3], operands[1]); } else { if (!op_equal_p && !TARGET_APX_NDD) emit_move_insn (operands[3], operands[1]); rtx op_tmp = TARGET_APX_NDD ? operands[1] : operands[3]; emit_insn (gen_ashl3 (operands[3], op_tmp, GEN_INT (bits))); } ix86_expand_clear (operands[0]); DONE; }) (define_insn "x86_64_shld" [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") (ior:DI (ashift:DI (match_dup 0) (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc") (const_int 63))) (subreg:DI (lshiftrt:TI (zero_extend:TI (match_operand:DI 1 "register_operand" "r")) (minus:QI (const_int 64) (and:QI (match_dup 2) (const_int 63)))) 0))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" "shld{q}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "mode" "DI") (set_attr "athlon_decode" "vector") (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) (define_insn "x86_64_shld_ndd" [(set (match_operand:DI 0 "register_operand" "=r") (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm") (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc") (const_int 63))) (subreg:DI (lshiftrt:TI (zero_extend:TI (match_operand:DI 2 "register_operand" "r")) (minus:QI (const_int 64) (and:QI (match_dup 3) (const_int 63)))) 0))) (clobber (reg:CC FLAGS_REG))] "TARGET_APX_NDD" "shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "mode" "DI")]) (define_insn "x86_64_shld_1" [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") (ior:DI (ashift:DI (match_dup 0) (match_operand:QI 2 "const_0_to_63_operand")) (subreg:DI (lshiftrt:TI (zero_extend:TI (match_operand:DI 1 "register_operand" "r")) (match_operand:QI 3 "const_0_to_255_operand")) 0))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && INTVAL (operands[3]) == 64 - INTVAL (operands[2])" "shld{q}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "mode" "DI") (set_attr "length_immediate" "1") (set_attr "athlon_decode" "vector") (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) (define_insn "x86_64_shld_ndd_1" [(set (match_operand:DI 0 "register_operand" "=r") (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm") (match_operand:QI 3 "const_0_to_63_operand")) (subreg:DI (lshiftrt:TI (zero_extend:TI (match_operand:DI 2 "register_operand" "r")) (match_operand:QI 4 "const_0_to_255_operand")) 0))) (clobber (reg:CC FLAGS_REG))] "TARGET_APX_NDD && INTVAL (operands[4]) == 64 - INTVAL (operands[3])" "shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "mode" "DI") (set_attr "length_immediate" "1")]) (define_insn_and_split "*x86_64_shld_shrd_1_nozext" [(set (match_operand:DI 0 "nonimmediate_operand") (ior:DI (ashift:DI (match_operand:DI 4 "nonimmediate_operand") (match_operand:QI 2 "const_0_to_63_operand")) (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand") (match_operand:QI 3 "const_0_to_63_operand")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && INTVAL (operands[3]) == 64 - INTVAL (operands[2]) && ix86_pre_reload_split ()" "#" "&& 1" [(const_int 0)] { if (rtx_equal_p (operands[4], operands[0])) { operands[1] = force_reg (DImode, operands[1]); emit_insn (gen_x86_64_shld_1 (operands[0], operands[1], operands[2], operands[3])); } else if (rtx_equal_p (operands[1], operands[0])) { operands[4] = force_reg (DImode, operands[4]); emit_insn (gen_x86_64_shrd_1 (operands[0], operands[4], operands[3], operands[2])); } else if (TARGET_APX_NDD) { rtx tmp = gen_reg_rtx (DImode); if (MEM_P (operands[4])) { operands[1] = force_reg (DImode, operands[1]); emit_insn (gen_x86_64_shld_ndd_1 (tmp, operands[4], operands[1], operands[2], operands[3])); } else if (MEM_P (operands[1])) emit_insn (gen_x86_64_shrd_ndd_1 (tmp, operands[1], operands[4], operands[3], operands[2])); else emit_insn (gen_x86_64_shld_ndd_1 (tmp, operands[4], operands[1], operands[2], operands[3])); emit_move_insn (operands[0], tmp); } else { operands[1] = force_reg (DImode, operands[1]); rtx tmp = gen_reg_rtx (DImode); emit_move_insn (tmp, operands[4]); emit_insn (gen_x86_64_shld_1 (tmp, operands[1], operands[2], operands[3])); emit_move_insn (operands[0], tmp); } DONE; }) (define_insn_and_split "*x86_64_shld_2" [(set (match_operand:DI 0 "nonimmediate_operand") (ior:DI (ashift:DI (match_dup 0) (match_operand:QI 2 "nonmemory_operand")) (lshiftrt:DI (match_operand:DI 1 "register_operand") (minus:QI (const_int 64) (match_dup 2))))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (match_dup 0) (ior:DI (ashift:DI (match_dup 0) (and:QI (match_dup 2) (const_int 63))) (subreg:DI (lshiftrt:TI (zero_extend:TI (match_dup 1)) (minus:QI (const_int 64) (and:QI (match_dup 2) (const_int 63)))) 0))) (clobber (reg:CC FLAGS_REG))])]) (define_insn_and_split "*x86_64_shld_ndd_2" [(set (match_operand:DI 0 "nonimmediate_operand") (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand") (match_operand:QI 3 "nonmemory_operand")) (lshiftrt:DI (match_operand:DI 2 "register_operand") (minus:QI (const_int 64) (match_dup 3))))) (clobber (reg:CC FLAGS_REG))] "TARGET_APX_NDD && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (match_dup 4) (ior:DI (ashift:DI (match_dup 1) (and:QI (match_dup 3) (const_int 63))) (subreg:DI (lshiftrt:TI (zero_extend:TI (match_dup 2)) (minus:QI (const_int 64) (and:QI (match_dup 3) (const_int 63)))) 0))) (clobber (reg:CC FLAGS_REG)) (set (match_dup 0) (match_dup 4))])] { operands[4] = gen_reg_rtx (DImode); emit_move_insn (operands[4], operands[0]); }) (define_insn "x86_shld" [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") (ior:SI (ashift:SI (match_dup 0) (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic") (const_int 31))) (subreg:SI (lshiftrt:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) (minus:QI (const_int 32) (and:QI (match_dup 2) (const_int 31)))) 0))) (clobber (reg:CC FLAGS_REG))] "" "shld{l}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "mode" "SI") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) (define_insn "x86_shld_ndd" [(set (match_operand:SI 0 "nonimmediate_operand" "=r") (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm") (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic") (const_int 31))) (subreg:SI (lshiftrt:DI (zero_extend:DI (match_operand:SI 2 "register_operand" "r")) (minus:QI (const_int 32) (and:QI (match_dup 3) (const_int 31)))) 0))) (clobber (reg:CC FLAGS_REG))] "TARGET_APX_NDD" "shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "mode" "SI")]) (define_insn "x86_shld_1" [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") (ior:SI (ashift:SI (match_dup 0) (match_operand:QI 2 "const_0_to_31_operand")) (subreg:SI (lshiftrt:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) (match_operand:QI 3 "const_0_to_63_operand")) 0))) (clobber (reg:CC FLAGS_REG))] "INTVAL (operands[3]) == 32 - INTVAL (operands[2])" "shld{l}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "length_immediate" "1") (set_attr "mode" "SI") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) (define_insn "x86_shld_ndd_1" [(set (match_operand:SI 0 "register_operand" "=r") (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm") (match_operand:QI 3 "const_0_to_31_operand")) (subreg:SI (lshiftrt:DI (zero_extend:DI (match_operand:SI 2 "register_operand" "r")) (match_operand:QI 4 "const_0_to_63_operand")) 0))) (clobber (reg:CC FLAGS_REG))] "TARGET_APX_NDD && INTVAL (operands[4]) == 32 - INTVAL (operands[3])" "shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "length_immediate" "1") (set_attr "mode" "SI")]) (define_insn_and_split "*x86_shld_shrd_1_nozext" [(set (match_operand:SI 0 "nonimmediate_operand") (ior:SI (ashift:SI (match_operand:SI 4 "nonimmediate_operand") (match_operand:QI 2 "const_0_to_31_operand")) (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand") (match_operand:QI 3 "const_0_to_31_operand")))) (clobber (reg:CC FLAGS_REG))] "INTVAL (operands[3]) == 32 - INTVAL (operands[2]) && ix86_pre_reload_split ()" "#" "&& 1" [(const_int 0)] { if (rtx_equal_p (operands[4], operands[0])) { operands[1] = force_reg (SImode, operands[1]); emit_insn (gen_x86_shld_1 (operands[0], operands[1], operands[2], operands[3])); } else if (rtx_equal_p (operands[1], operands[0])) { operands[4] = force_reg (SImode, operands[4]); emit_insn (gen_x86_shrd_1 (operands[0], operands[4], operands[3], operands[2])); } else if (TARGET_APX_NDD) { rtx tmp = gen_reg_rtx (SImode); if (MEM_P (operands[4])) { operands[1] = force_reg (SImode, operands[1]); emit_insn (gen_x86_shld_ndd_1 (tmp, operands[4], operands[1], operands[2], operands[3])); } else if (MEM_P (operands[1])) emit_insn (gen_x86_shrd_ndd_1 (tmp, operands[1], operands[4], operands[3], operands[2])); else emit_insn (gen_x86_shld_ndd_1 (tmp, operands[4], operands[1], operands[2], operands[3])); emit_move_insn (operands[0], tmp); } else { operands[1] = force_reg (SImode, operands[1]); rtx tmp = gen_reg_rtx (SImode); emit_move_insn (tmp, operands[4]); emit_insn (gen_x86_shld_1 (tmp, operands[1], operands[2], operands[3])); emit_move_insn (operands[0], tmp); } DONE; }) (define_insn_and_split "*x86_shld_2" [(set (match_operand:SI 0 "nonimmediate_operand") (ior:SI (ashift:SI (match_dup 0) (match_operand:QI 2 "nonmemory_operand")) (lshiftrt:SI (match_operand:SI 1 "register_operand") (minus:QI (const_int 32) (match_dup 2))))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (match_dup 0) (ior:SI (ashift:SI (match_dup 0) (and:QI (match_dup 2) (const_int 31))) (subreg:SI (lshiftrt:DI (zero_extend:DI (match_dup 1)) (minus:QI (const_int 32) (and:QI (match_dup 2) (const_int 31)))) 0))) (clobber (reg:CC FLAGS_REG))])]) (define_insn_and_split "*x86_shld_ndd_2" [(set (match_operand:SI 0 "nonimmediate_operand") (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand") (match_operand:QI 3 "nonmemory_operand")) (lshiftrt:SI (match_operand:SI 2 "register_operand") (minus:QI (const_int 32) (match_dup 3))))) (clobber (reg:CC FLAGS_REG))] "TARGET_APX_NDD && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (match_dup 4) (ior:SI (ashift:SI (match_dup 1) (and:QI (match_dup 3) (const_int 31))) (subreg:SI (lshiftrt:DI (zero_extend:DI (match_dup 2)) (minus:QI (const_int 32) (and:QI (match_dup 3) (const_int 31)))) 0))) (clobber (reg:CC FLAGS_REG)) (set (match_dup 0) (match_dup 4))])] { operands[4] = gen_reg_rtx (SImode); emit_move_insn (operands[4], operands[0]); }) (define_expand "@x86_shift_adj_1" [(set (reg:CCZ FLAGS_REG) (compare:CCZ (and:QI (match_operand:QI 2 "register_operand") (match_dup 4)) (const_int 0))) (set (match_operand:SWI48 0 "register_operand") (if_then_else:SWI48 (ne (reg:CCZ FLAGS_REG) (const_int 0)) (match_operand:SWI48 1 "register_operand") (match_dup 0))) (set (match_dup 1) (if_then_else:SWI48 (ne (reg:CCZ FLAGS_REG) (const_int 0)) (match_operand:SWI48 3 "register_operand") (match_dup 1)))] "TARGET_CMOVE" "operands[4] = GEN_INT (GET_MODE_BITSIZE (mode));") (define_expand "@x86_shift_adj_2" [(use (match_operand:SWI48 0 "register_operand")) (use (match_operand:SWI48 1 "register_operand")) (use (match_operand:QI 2 "register_operand"))] "" { rtx_code_label *label = gen_label_rtx (); rtx tmp; emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (GET_MODE_BITSIZE (mode)))); tmp = gen_rtx_REG (CCZmode, FLAGS_REG); tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx); tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); JUMP_LABEL (tmp) = label; emit_move_insn (operands[0], operands[1]); ix86_expand_clear (operands[1]); emit_label (label); LABEL_NUSES (label) = 1; DONE; }) ;; Avoid useless masking of count operand. (define_insn_and_split "*ashl3_mask" [(set (match_operand:SWI48 0 "nonimmediate_operand") (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") (subreg:QI (and (match_operand 2 "int248_register_operand" "c,r") (match_operand 3 "const_int_operand")) 0))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ASHIFT, mode, operands) && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (mode)-1)) == GET_MODE_BITSIZE (mode)-1 && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 1) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] { operands[2] = force_reg (GET_MODE (operands[2]), operands[2]); operands[2] = gen_lowpart (QImode, operands[2]); } [(set_attr "isa" "*,bmi2")]) (define_insn_and_split "*ashl3_mask_1" [(set (match_operand:SWI48 0 "nonimmediate_operand") (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") (and:QI (match_operand:QI 2 "register_operand" "c,r") (match_operand:QI 3 "const_int_operand")))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ASHIFT, mode, operands) && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (mode)-1)) == GET_MODE_BITSIZE (mode)-1 && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 1) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "isa" "*,bmi2")]) (define_insn "*bmi2_ashl3_1" [(set (match_operand:SWI48 0 "register_operand" "=r") (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm") (match_operand:SWI48 2 "register_operand" "r")))] "TARGET_BMI2" "shlx\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishiftx") (set_attr "mode" "")]) (define_insn "*ashl3_1" [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k,r") (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k,rm") (match_operand:QI 2 "nonmemory_operand" "c,M,r,,c"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ASHIFT, mode, operands, TARGET_APX_NDD)" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_LEA: case TYPE_ISHIFTX: case TYPE_MSKLOG: return "#"; case TYPE_ALU: gcc_assert (operands[2] == const1_rtx); gcc_assert (rtx_equal_p (operands[0], operands[1])); return "add{}\t%0, %0"; default: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) /* For NDD form instructions related to TARGET_SHIFT1, the $1 immediate do not need to be omitted as assembler will map it to use shorter encoding. */ && !use_ndd) return "sal{}\t%0"; else return use_ndd ? "sal{}\t{%2, %1, %0|%0, %1, %2}" : "sal{}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,*,bmi2,avx512bw,apx_ndd") (set (attr "type") (cond [(eq_attr "alternative" "1") (const_string "lea") (eq_attr "alternative" "2") (const_string "ishiftx") (eq_attr "alternative" "4") (const_string "ishift") (and (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 0 "register_operand")) (match_operand 2 "const1_operand")) (const_string "alu") (eq_attr "alternative" "3") (const_string "msklog") ] (const_string "ishift"))) (set (attr "length_immediate") (if_then_else (ior (eq_attr "type" "alu") (and (eq_attr "type" "ishift") (and (match_operand 2 "const1_operand") (ior (match_test "TARGET_SHIFT1") (match_test "optimize_function_for_size_p (cfun)"))))) (const_string "0") (const_string "*"))) (set_attr "mode" "")]) ;; Convert shift to the shiftx pattern to avoid flags dependency. (define_split [(set (match_operand:SWI48 0 "register_operand") (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") (match_operand:QI 2 "register_operand"))) (clobber (reg:CC FLAGS_REG))] "TARGET_BMI2 && reload_completed" [(set (match_dup 0) (ashift:SWI48 (match_dup 1) (match_dup 2)))] "operands[2] = gen_lowpart (mode, operands[2]);") (define_insn "*bmi2_ashlsi3_1_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm") (match_operand:SI 2 "register_operand" "r"))))] "TARGET_64BIT && TARGET_BMI2" "shlx\t{%2, %1, %k0|%k0, %1, %2}" [(set_attr "type" "ishiftx") (set_attr "mode" "SI")]) (define_insn "*ashlsi3_1_zext" [(set (match_operand:DI 0 "register_operand" "=r,r,r,r") (zero_extend:DI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm,rm") (match_operand:QI 2 "nonmemory_operand" "cI,M,r,cI")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands, TARGET_APX_NDD)" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_LEA: case TYPE_ISHIFTX: return "#"; case TYPE_ALU: gcc_assert (operands[2] == const1_rtx); return "add{l}\t%k0, %k0"; default: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) && !use_ndd) return "sal{l}\t%k0"; else return use_ndd ? "sal{l}\t{%2, %1, %k0|%k0, %1, %2}" : "sal{l}\t{%2, %k0|%k0, %2}"; } } [(set_attr "isa" "*,*,bmi2,apx_ndd") (set (attr "type") (cond [(eq_attr "alternative" "1") (const_string "lea") (eq_attr "alternative" "2") (const_string "ishiftx") (eq_attr "alternative" "3") (const_string "ishift") (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 2 "const1_operand")) (const_string "alu") ] (const_string "ishift"))) (set (attr "length_immediate") (if_then_else (ior (eq_attr "type" "alu") (and (eq_attr "type" "ishift") (and (match_operand 2 "const1_operand") (ior (match_test "TARGET_SHIFT1") (match_test "optimize_function_for_size_p (cfun)"))))) (const_string "0") (const_string "*"))) (set_attr "mode" "SI")]) ;; Convert shift to the shiftx pattern to avoid flags dependency. (define_split [(set (match_operand:DI 0 "register_operand") (zero_extend:DI (ashift:SI (match_operand:SI 1 "nonimmediate_operand") (match_operand:QI 2 "register_operand")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && TARGET_BMI2 && reload_completed" [(set (match_dup 0) (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))] "operands[2] = gen_lowpart (SImode, operands[2]);") (define_insn "*ashlhi3_1" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k,r") (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k,rm") (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww,cI"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ASHIFT, HImode, operands, TARGET_APX_NDD)" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_LEA: case TYPE_MSKLOG: return "#"; case TYPE_ALU: gcc_assert (operands[2] == const1_rtx); return "add{w}\t%0, %0"; default: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) && !use_ndd) return "sal{w}\t%0"; else return use_ndd ? "sal{w}\t{%2, %1, %0|%0, %1, %2}" : "sal{w}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,*,avx512f,apx_ndd") (set (attr "type") (cond [(eq_attr "alternative" "1") (const_string "lea") (eq_attr "alternative" "2") (const_string "msklog") (eq_attr "alternative" "3") (const_string "ishift") (and (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 0 "register_operand")) (match_operand 2 "const1_operand")) (const_string "alu") ] (const_string "ishift"))) (set (attr "length_immediate") (if_then_else (ior (eq_attr "type" "alu") (and (eq_attr "type" "ishift") (and (match_operand 2 "const1_operand") (ior (match_test "TARGET_SHIFT1") (match_test "optimize_function_for_size_p (cfun)"))))) (const_string "0") (const_string "*"))) (set_attr "mode" "HI,SI,HI,HI")]) (define_insn "*ashlqi3_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k,r") (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k,rm") (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb,cI"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ASHIFT, QImode, operands, TARGET_APX_NDD)" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_LEA: case TYPE_MSKLOG: return "#"; case TYPE_ALU: gcc_assert (operands[2] == const1_rtx); if (REG_P (operands[1]) && !ANY_QI_REGNO_P (REGNO (operands[1]))) return "add{l}\t%k0, %k0"; else return "add{b}\t%0, %0"; default: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) && !use_ndd) { if (get_attr_mode (insn) == MODE_SI) return "sal{l}\t%k0"; else return "sal{b}\t%0"; } else { if (get_attr_mode (insn) == MODE_SI) return "sal{l}\t{%2, %k0|%k0, %2}"; else return use_ndd ? "sal{b}\t{%2, %1, %0|%0, %1, %2}" : "sal{b}\t{%2, %0|%0, %2}"; } } } [(set_attr "isa" "*,*,*,avx512dq,apx_ndd") (set (attr "type") (cond [(eq_attr "alternative" "2") (const_string "lea") (eq_attr "alternative" "3") (const_string "msklog") (eq_attr "alternative" "4") (const_string "ishift") (and (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 0 "register_operand")) (match_operand 2 "const1_operand")) (const_string "alu") ] (const_string "ishift"))) (set (attr "length_immediate") (if_then_else (ior (eq_attr "type" "alu") (and (eq_attr "type" "ishift") (and (match_operand 2 "const1_operand") (ior (match_test "TARGET_SHIFT1") (match_test "optimize_function_for_size_p (cfun)"))))) (const_string "0") (const_string "*"))) (set_attr "mode" "QI,SI,SI,QI,QI") ;; Potential partial reg stall on alternative 1. (set (attr "preferred_for_speed") (cond [(eq_attr "alternative" "1,4") (symbol_ref "!TARGET_PARTIAL_REG_STALL")] (symbol_ref "true")))]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. (define_insn_and_split "*ashl3_1_slp" [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+,&")) (ashift:SWI12 (match_operand:SWI12 1 "register_operand" "0,!") (match_operand:QI 2 "nonmemory_operand" "cI,cI"))) (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" { if (which_alternative) return "#"; switch (get_attr_type (insn)) { case TYPE_ALU: gcc_assert (operands[2] == const1_rtx); return "add{}\t%0, %0"; default: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) return "sal{}\t%0"; else return "sal{}\t{%2, %0|%0, %2}"; } } "&& reload_completed && !(rtx_equal_p (operands[0], operands[1]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) (ashift:SWI12 (match_dup 0) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] "" [(set (attr "type") (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 2 "const1_operand")) (const_string "alu") ] (const_string "ishift"))) (set (attr "length_immediate") (if_then_else (ior (eq_attr "type" "alu") (and (eq_attr "type" "ishift") (and (match_operand 2 "const1_operand") (ior (match_test "TARGET_SHIFT1") (match_test "optimize_function_for_size_p (cfun)"))))) (const_string "0") (const_string "*"))) (set_attr "mode" "")]) ;; Convert ashift to the lea pattern to avoid flags dependency. (define_split [(set (match_operand:SWI 0 "general_reg_operand") (ashift:SWI (match_operand:SWI 1 "index_reg_operand") (match_operand 2 "const_0_to_3_operand"))) (clobber (reg:CC FLAGS_REG))] "reload_completed && REGNO (operands[0]) != REGNO (operands[1])" [(set (match_dup 0) (mult: (match_dup 1) (match_dup 2)))] { if (mode != mode) { operands[0] = gen_lowpart (mode, operands[0]); operands[1] = gen_lowpart (mode, operands[1]); } operands[2] = GEN_INT (1 << INTVAL (operands[2])); }) ;; Convert ashift to the lea pattern to avoid flags dependency. (define_split [(set (match_operand:DI 0 "general_reg_operand") (zero_extend:DI (ashift:SI (match_operand:SI 1 "index_reg_operand") (match_operand 2 "const_0_to_3_operand")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && reload_completed && REGNO (operands[0]) != REGNO (operands[1])" [(set (match_dup 0) (zero_extend:DI (mult:SI (match_dup 1) (match_dup 2))))] { operands[1] = gen_lowpart (SImode, operands[1]); operands[2] = GEN_INT (1 << INTVAL (operands[2])); }) ;; This pattern can't accept a variable shift count, since shifts by ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. (define_insn "*ashl3_cmp" [(set (reg FLAGS_REG) (compare (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm") (match_operand:QI 2 "" ",")) (const_int 0))) (set (match_operand:SWI 0 "nonimmediate_operand" "=m,r") (ashift:SWI (match_dup 1) (match_dup 2)))] "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL || (operands[2] == const1_rtx && (TARGET_SHIFT1 || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0]))))) && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFT, mode, operands, TARGET_APX_NDD)" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_ALU: gcc_assert (operands[2] == const1_rtx); return "add{}\t%0, %0"; default: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) && !use_ndd) return "sal{}\t%0"; else return use_ndd ? "sal{}\t{%2, %1, %0|%0, %1, %2}" : "sal{}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,apx_ndd") (set (attr "type") (cond [(eq_attr "alternative" "1") (const_string "ishift") (and (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 0 "register_operand")) (match_operand 2 "const1_operand")) (const_string "alu") ] (const_string "ishift"))) (set (attr "length_immediate") (if_then_else (ior (eq_attr "type" "alu") (and (eq_attr "type" "ishift") (and (match_operand 2 "const1_operand") (ior (match_test "TARGET_SHIFT1") (match_test "optimize_function_for_size_p (cfun)"))))) (const_string "0") (const_string "*"))) (set_attr "mode" "")]) (define_insn "*ashlsi3_cmp_zext" [(set (reg FLAGS_REG) (compare (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm") (match_operand:QI 2 "const_1_to_31_operand")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r,r") (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))] "TARGET_64BIT && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL || (operands[2] == const1_rtx && (TARGET_SHIFT1 || TARGET_DOUBLE_WITH_ADD))) && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (ASHIFT, SImode, operands, TARGET_APX_NDD)" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_ALU: gcc_assert (operands[2] == const1_rtx); return "add{l}\t%k0, %k0"; default: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) && !use_ndd) return "sal{l}\t%k0"; else return use_ndd ? "sal{l}\t{%2, %1, %k0|%k0, %1, %2}" : "sal{l}\t{%2, %k0|%k0, %2}"; } } [(set_attr "isa" "*,apx_ndd") (set (attr "type") (cond [(eq_attr "alternative" "1") (const_string "ishift") (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 2 "const1_operand")) (const_string "alu") ] (const_string "ishift"))) (set (attr "length_immediate") (if_then_else (ior (eq_attr "type" "alu") (and (eq_attr "type" "ishift") (and (match_operand 2 "const1_operand") (ior (match_test "TARGET_SHIFT1") (match_test "optimize_function_for_size_p (cfun)"))))) (const_string "0") (const_string "*"))) (set_attr "mode" "SI")]) (define_insn "*ashl3_cconly" [(set (reg FLAGS_REG) (compare (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm") (match_operand:QI 2 "" ",")) (const_int 0))) (clobber (match_scratch:SWI 0 "=,r"))] "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL || (operands[2] == const1_rtx && (TARGET_SHIFT1 || TARGET_DOUBLE_WITH_ADD))) && ix86_match_ccmode (insn, CCGOCmode)" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_ALU: gcc_assert (operands[2] == const1_rtx); return "add{}\t%0, %0"; default: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) && !use_ndd) return "sal{}\t%0"; else return use_ndd ? "sal{}\t{%2, %1, %0|%0, %1, %2}" : "sal{}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,apx_ndd") (set (attr "type") (cond [(eq_attr "alternative" "1") (const_string "ishift") (and (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 0 "register_operand")) (match_operand 2 "const1_operand")) (const_string "alu") ] (const_string "ishift"))) (set (attr "length_immediate") (if_then_else (ior (eq_attr "type" "alu") (and (eq_attr "type" "ishift") (and (match_operand 2 "const1_operand") (ior (match_test "TARGET_SHIFT1") (match_test "optimize_function_for_size_p (cfun)"))))) (const_string "0") (const_string "*"))) (set_attr "mode" "")]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. (define_insn_and_split "*ashlqi_ext_1" [(set (zero_extract:SWI248 (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 (ashift:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0) (match_operand:QI 2 "nonmemory_operand" "cI,cI")) 0)) (clobber (reg:CC FLAGS_REG))] "" { if (which_alternative) return "#"; switch (get_attr_type (insn)) { case TYPE_ALU: gcc_assert (operands[2] == const1_rtx); return "add{b}\t%h0, %h0"; default: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) return "sal{b}\t%h0"; else return "sal{b}\t{%2, %h0|%h0, %2}"; } } "reload_completed && !(rtx_equal_p (operands[0], operands[1]))" [(set (zero_extract:SWI248 (match_dup 0) (const_int 8) (const_int 8)) (zero_extract:SWI248 (match_dup 1) (const_int 8) (const_int 8))) (parallel [(set (zero_extract:SWI248 (match_dup 0) (const_int 8) (const_int 8)) (subreg:SWI248 (ashift:QI (subreg:QI (match_op_dup 3 [(match_dup 0) (const_int 8) (const_int 8)]) 0) (match_dup 2)) 0)) (clobber (reg:CC FLAGS_REG))])] "" [(set (attr "type") (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 2 "const1_operand")) (const_string "alu") ] (const_string "ishift"))) (set (attr "length_immediate") (if_then_else (ior (eq_attr "type" "alu") (and (eq_attr "type" "ishift") (and (match_operand 2 "const1_operand") (ior (match_test "TARGET_SHIFT1") (match_test "optimize_function_for_size_p (cfun)"))))) (const_string "0") (const_string "*"))) (set_attr "mode" "QI")]) ;; See comment above `ashl3' about how this works. (define_expand "3" [(set (match_operand:SDWIM 0 "") (any_shiftrt:SDWIM (match_operand:SDWIM 1 "") (match_operand:QI 2 "nonmemory_operand")))] "" { ix86_expand_binary_operator (, mode, operands, TARGET_APX_NDD); DONE; }) ;; Avoid useless masking of count operand. (define_insn_and_split "*3_mask" [(set (match_operand:SWI48 0 "nonimmediate_operand") (any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") (subreg:QI (and (match_operand 2 "int248_register_operand" "c,r") (match_operand 3 "const_int_operand")) 0))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (, mode, operands) && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (mode)-1)) == GET_MODE_BITSIZE (mode)-1 && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (match_dup 0) (any_shiftrt:SWI48 (match_dup 1) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] { operands[2] = force_reg (GET_MODE (operands[2]), operands[2]); operands[2] = gen_lowpart (QImode, operands[2]); } [(set_attr "isa" "*,bmi2")]) (define_insn_and_split "*3_mask_1" [(set (match_operand:SWI48 0 "nonimmediate_operand") (any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") (and:QI (match_operand:QI 2 "register_operand" "c,r") (match_operand:QI 3 "const_int_operand")))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (, mode, operands) && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (mode)-1)) == GET_MODE_BITSIZE (mode)-1 && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (match_dup 0) (any_shiftrt:SWI48 (match_dup 1) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "isa" "*,bmi2")]) (define_insn_and_split "*3_doubleword_mask" [(set (match_operand: 0 "register_operand") (any_shiftrt: (match_operand: 1 "register_operand") (subreg:QI (and (match_operand 2 "int248_register_operand" "c") (match_operand 3 "const_int_operand")) 0))) (clobber (reg:CC FLAGS_REG))] "((INTVAL (operands[3]) & ( * BITS_PER_UNIT)) == 0 || ((INTVAL (operands[3]) & (2 * * BITS_PER_UNIT - 1)) == (2 * * BITS_PER_UNIT - 1))) && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (match_dup 4) (ior:DWIH (lshiftrt:DWIH (match_dup 4) (and:QI (match_dup 2) (match_dup 8))) (subreg:DWIH (ashift: (zero_extend: (match_dup 7)) (minus:QI (match_dup 9) (and:QI (match_dup 2) (match_dup 8)))) 0))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 6) (any_shiftrt:DWIH (match_dup 7) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] { if ((INTVAL (operands[3]) & ( * BITS_PER_UNIT)) != 0) { operands[2] = force_reg (GET_MODE (operands[2]), operands[2]); operands[2] = gen_lowpart (QImode, operands[2]); emit_insn (gen_3_doubleword (operands[0], operands[1], operands[2])); DONE; } split_double_mode (mode, &operands[0], 2, &operands[4], &operands[6]); operands[8] = GEN_INT ( * BITS_PER_UNIT - 1); operands[9] = GEN_INT ( * BITS_PER_UNIT); if ((INTVAL (operands[3]) & (( * BITS_PER_UNIT) - 1)) != (( * BITS_PER_UNIT) - 1)) { rtx xops[3]; xops[0] = gen_reg_rtx (GET_MODE (operands[2])); xops[1] = operands[2]; xops[2] = GEN_INT (INTVAL (operands[3]) & (( * BITS_PER_UNIT) - 1)); ix86_expand_binary_operator (AND, GET_MODE (operands[2]), xops); operands[2] = xops[0]; } operands[2] = force_reg (GET_MODE (operands[2]), operands[2]); operands[2] = gen_lowpart (QImode, operands[2]); if (!rtx_equal_p (operands[4], operands[5])) emit_move_insn (operands[4], operands[5]); }) (define_insn_and_split "*3_doubleword_mask_1" [(set (match_operand: 0 "register_operand") (any_shiftrt: (match_operand: 1 "register_operand") (and:QI (match_operand:QI 2 "register_operand" "c") (match_operand:QI 3 "const_int_operand")))) (clobber (reg:CC FLAGS_REG))] "((INTVAL (operands[3]) & ( * BITS_PER_UNIT)) == 0 || ((INTVAL (operands[3]) & (2 * * BITS_PER_UNIT - 1)) == (2 * * BITS_PER_UNIT - 1))) && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (match_dup 4) (ior:DWIH (lshiftrt:DWIH (match_dup 4) (and:QI (match_dup 2) (match_dup 8))) (subreg:DWIH (ashift: (zero_extend: (match_dup 7)) (minus:QI (match_dup 9) (and:QI (match_dup 2) (match_dup 8)))) 0))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 6) (any_shiftrt:DWIH (match_dup 7) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] { if ((INTVAL (operands[3]) & ( * BITS_PER_UNIT)) != 0) { emit_insn (gen_3_doubleword (operands[0], operands[1], operands[2])); DONE; } split_double_mode (mode, &operands[0], 2, &operands[4], &operands[6]); operands[8] = GEN_INT ( * BITS_PER_UNIT - 1); operands[9] = GEN_INT ( * BITS_PER_UNIT); if ((INTVAL (operands[3]) & (( * BITS_PER_UNIT) - 1)) != (( * BITS_PER_UNIT) - 1)) { rtx tem = gen_reg_rtx (QImode); emit_insn (gen_andqi3 (tem, operands[2], operands[3])); operands[2] = tem; } if (!rtx_equal_p (operands[4], operands[5])) emit_move_insn (operands[4], operands[5]); }) (define_insn_and_split "3_doubleword" [(set (match_operand:DWI 0 "register_operand" "=&r,&r") (any_shiftrt:DWI (match_operand:DWI 1 "register_operand" "0,r") (match_operand:QI 2 "nonmemory_operand" "c,c"))) (clobber (reg:CC FLAGS_REG))] "" "#" "epilogue_completed" [(const_int 0)] { if (TARGET_APX_NDD && !rtx_equal_p (operands[0], operands[1])) ix86_split_rshift_ndd (, operands, NULL_RTX); else ix86_split_ (operands, NULL_RTX, mode); DONE; } [(set_attr "type" "multi") (set_attr "isa" "*,apx_ndd")]) ;; By default we don't ask for a scratch register, because when DWImode ;; values are manipulated, registers are already at a premium. But if ;; we have one handy, we won't turn it away. (define_peephole2 [(match_scratch:DWIH 3 "r") (parallel [(set (match_operand: 0 "register_operand") (any_shiftrt: (match_operand: 1 "register_operand") (match_operand:QI 2 "nonmemory_operand"))) (clobber (reg:CC FLAGS_REG))]) (match_dup 3)] "TARGET_CMOVE" [(const_int 0)] { if (TARGET_APX_NDD && !rtx_equal_p (operands[0], operands[1])) ix86_split_rshift_ndd (, operands, operands[3]); else ix86_split_ (operands, operands[3], mode); DONE; }) ;; Split truncations of double word right shifts into x86_shrd_1. (define_insn_and_split "3_doubleword_lowpart" [(set (match_operand:DWIH 0 "register_operand" "=&r") (subreg:DWIH (any_shiftrt: (match_operand: 1 "register_operand" "r") (match_operand:QI 2 "const_int_operand")) 0)) (clobber (reg:CC FLAGS_REG))] "UINTVAL (operands[2]) < * BITS_PER_UNIT" "#" "&& reload_completed" [(parallel [(set (match_dup 0) (ior:DWIH (lshiftrt:DWIH (match_dup 0) (match_dup 2)) (subreg:DWIH (ashift: (zero_extend: (match_dup 3)) (match_dup 4)) 0))) (clobber (reg:CC FLAGS_REG))])] { split_double_mode (mode, &operands[1], 1, &operands[1], &operands[3]); operands[4] = GEN_INT (( * BITS_PER_UNIT) - INTVAL (operands[2])); if (!rtx_equal_p (operands[0], operands[1])) emit_move_insn (operands[0], operands[1]); }) (define_insn "x86_64_shrd" [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") (ior:DI (lshiftrt:DI (match_dup 0) (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc") (const_int 63))) (subreg:DI (ashift:TI (zero_extend:TI (match_operand:DI 1 "register_operand" "r")) (minus:QI (const_int 64) (and:QI (match_dup 2) (const_int 63)))) 0))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" "shrd{q}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "mode" "DI") (set_attr "athlon_decode" "vector") (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) (define_insn "x86_64_shrd_ndd" [(set (match_operand:DI 0 "register_operand" "=r") (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm") (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc") (const_int 63))) (subreg:DI (ashift:TI (zero_extend:TI (match_operand:DI 2 "register_operand" "r")) (minus:QI (const_int 64) (and:QI (match_dup 3) (const_int 63)))) 0))) (clobber (reg:CC FLAGS_REG))] "TARGET_APX_NDD" "shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "mode" "DI")]) (define_insn "x86_64_shrd_1" [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") (ior:DI (lshiftrt:DI (match_dup 0) (match_operand:QI 2 "const_0_to_63_operand")) (subreg:DI (ashift:TI (zero_extend:TI (match_operand:DI 1 "register_operand" "r")) (match_operand:QI 3 "const_0_to_255_operand")) 0))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && INTVAL (operands[3]) == 64 - INTVAL (operands[2])" "shrd{q}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "length_immediate" "1") (set_attr "mode" "DI") (set_attr "athlon_decode" "vector") (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) (define_insn "x86_64_shrd_ndd_1" [(set (match_operand:DI 0 "register_operand" "=r") (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm") (match_operand:QI 3 "const_0_to_63_operand")) (subreg:DI (ashift:TI (zero_extend:TI (match_operand:DI 2 "register_operand" "r")) (match_operand:QI 4 "const_0_to_255_operand")) 0))) (clobber (reg:CC FLAGS_REG))] "TARGET_APX_NDD && INTVAL (operands[4]) == 64 - INTVAL (operands[3])" "shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "length_immediate" "1") (set_attr "mode" "DI")]) (define_insn_and_split "*x86_64_shrd_shld_1_nozext" [(set (match_operand:DI 0 "nonimmediate_operand") (ior:DI (lshiftrt:DI (match_operand:DI 4 "nonimmediate_operand") (match_operand:QI 2 "const_0_to_63_operand")) (ashift:DI (match_operand:DI 1 "nonimmediate_operand") (match_operand:QI 3 "const_0_to_63_operand")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && INTVAL (operands[3]) == 64 - INTVAL (operands[2]) && ix86_pre_reload_split ()" "#" "&& 1" [(const_int 0)] { if (rtx_equal_p (operands[4], operands[0])) { operands[1] = force_reg (DImode, operands[1]); emit_insn (gen_x86_64_shrd_1 (operands[0], operands[1], operands[2], operands[3])); } else if (rtx_equal_p (operands[1], operands[0])) { operands[4] = force_reg (DImode, operands[4]); emit_insn (gen_x86_64_shld_1 (operands[0], operands[4], operands[3], operands[2])); } else if (TARGET_APX_NDD) { rtx tmp = gen_reg_rtx (DImode); if (MEM_P (operands[4])) { operands[1] = force_reg (DImode, operands[1]); emit_insn (gen_x86_64_shrd_ndd_1 (tmp, operands[4], operands[1], operands[2], operands[3])); } else if (MEM_P (operands[1])) emit_insn (gen_x86_64_shld_ndd_1 (tmp, operands[1], operands[4], operands[3], operands[2])); else emit_insn (gen_x86_64_shrd_ndd_1 (tmp, operands[4], operands[1], operands[2], operands[3])); emit_move_insn (operands[0], tmp); } else { operands[1] = force_reg (DImode, operands[1]); rtx tmp = gen_reg_rtx (DImode); emit_move_insn (tmp, operands[4]); emit_insn (gen_x86_64_shrd_1 (tmp, operands[1], operands[2], operands[3])); emit_move_insn (operands[0], tmp); } DONE; }) (define_insn_and_split "*x86_64_shrd_2" [(set (match_operand:DI 0 "nonimmediate_operand") (ior:DI (lshiftrt:DI (match_dup 0) (match_operand:QI 2 "nonmemory_operand")) (ashift:DI (match_operand:DI 1 "register_operand") (minus:QI (const_int 64) (match_dup 2))))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (match_dup 0) (ior:DI (lshiftrt:DI (match_dup 0) (and:QI (match_dup 2) (const_int 63))) (subreg:DI (ashift:TI (zero_extend:TI (match_dup 1)) (minus:QI (const_int 64) (and:QI (match_dup 2) (const_int 63)))) 0))) (clobber (reg:CC FLAGS_REG))])]) (define_insn_and_split "*x86_64_shrd_ndd_2" [(set (match_operand:DI 0 "nonimmediate_operand") (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand") (match_operand:QI 3 "nonmemory_operand")) (ashift:DI (match_operand:DI 2 "register_operand") (minus:QI (const_int 64) (match_dup 2))))) (clobber (reg:CC FLAGS_REG))] "TARGET_APX_NDD && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (match_dup 4) (ior:DI (lshiftrt:DI (match_dup 1) (and:QI (match_dup 3) (const_int 63))) (subreg:DI (ashift:TI (zero_extend:TI (match_dup 2)) (minus:QI (const_int 64) (and:QI (match_dup 3) (const_int 63)))) 0))) (clobber (reg:CC FLAGS_REG)) (set (match_dup 0) (match_dup 4))])] { operands[4] = gen_reg_rtx (DImode); emit_move_insn (operands[4], operands[0]); }) (define_insn "x86_shrd" [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") (ior:SI (lshiftrt:SI (match_dup 0) (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic") (const_int 31))) (subreg:SI (ashift:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) (minus:QI (const_int 32) (and:QI (match_dup 2) (const_int 31)))) 0))) (clobber (reg:CC FLAGS_REG))] "" "shrd{l}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "mode" "SI") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) (define_insn "x86_shrd_ndd" [(set (match_operand:SI 0 "register_operand" "=r") (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm") (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic") (const_int 31))) (subreg:SI (ashift:DI (zero_extend:DI (match_operand:SI 2 "register_operand" "r")) (minus:QI (const_int 32) (and:QI (match_dup 3) (const_int 31)))) 0))) (clobber (reg:CC FLAGS_REG))] "TARGET_APX_NDD" "shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "mode" "SI")]) (define_insn "x86_shrd_1" [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") (ior:SI (lshiftrt:SI (match_dup 0) (match_operand:QI 2 "const_0_to_31_operand")) (subreg:SI (ashift:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) (match_operand:QI 3 "const_0_to_63_operand")) 0))) (clobber (reg:CC FLAGS_REG))] "INTVAL (operands[3]) == 32 - INTVAL (operands[2])" "shrd{l}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "length_immediate" "1") (set_attr "mode" "SI") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) (define_insn "x86_shrd_ndd_1" [(set (match_operand:SI 0 "register_operand" "=r") (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm") (match_operand:QI 3 "const_0_to_31_operand")) (subreg:SI (ashift:DI (zero_extend:DI (match_operand:SI 2 "register_operand" "r")) (match_operand:QI 4 "const_0_to_63_operand")) 0))) (clobber (reg:CC FLAGS_REG))] "TARGET_APX_NDD && (INTVAL (operands[4]) == 32 - INTVAL (operands[3]))" "shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ishift") (set_attr "length_immediate" "1") (set_attr "mode" "SI")]) (define_insn_and_split "*x86_shrd_shld_1_nozext" [(set (match_operand:SI 0 "nonimmediate_operand") (ior:SI (lshiftrt:SI (match_operand:SI 4 "nonimmediate_operand") (match_operand:QI 2 "const_0_to_31_operand")) (ashift:SI (match_operand:SI 1 "nonimmediate_operand") (match_operand:QI 3 "const_0_to_31_operand")))) (clobber (reg:CC FLAGS_REG))] "INTVAL (operands[3]) == 32 - INTVAL (operands[2]) && ix86_pre_reload_split ()" "#" "&& 1" [(const_int 0)] { if (rtx_equal_p (operands[4], operands[0])) { operands[1] = force_reg (SImode, operands[1]); emit_insn (gen_x86_shrd_1 (operands[0], operands[1], operands[2], operands[3])); } else if (rtx_equal_p (operands[1], operands[0])) { operands[4] = force_reg (SImode, operands[4]); emit_insn (gen_x86_shld_1 (operands[0], operands[4], operands[3], operands[2])); } else if (TARGET_APX_NDD) { rtx tmp = gen_reg_rtx (SImode); if (MEM_P (operands[4])) { operands[1] = force_reg (SImode, operands[1]); emit_insn (gen_x86_shrd_ndd_1 (tmp, operands[4], operands[1], operands[2], operands[3])); } else if (MEM_P (operands[1])) emit_insn (gen_x86_shld_ndd_1 (tmp, operands[1], operands[4], operands[3], operands[2])); else emit_insn (gen_x86_shrd_ndd_1 (tmp, operands[4], operands[1], operands[2], operands[3])); emit_move_insn (operands[0], tmp); } else { operands[1] = force_reg (SImode, operands[1]); rtx tmp = gen_reg_rtx (SImode); emit_move_insn (tmp, operands[4]); emit_insn (gen_x86_shrd_1 (tmp, operands[1], operands[2], operands[3])); emit_move_insn (operands[0], tmp); } DONE; }) (define_insn_and_split "*x86_shrd_2" [(set (match_operand:SI 0 "nonimmediate_operand") (ior:SI (lshiftrt:SI (match_dup 0) (match_operand:QI 2 "nonmemory_operand")) (ashift:SI (match_operand:SI 1 "register_operand") (minus:QI (const_int 32) (match_dup 2))))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (match_dup 0) (ior:SI (lshiftrt:SI (match_dup 0) (and:QI (match_dup 2) (const_int 31))) (subreg:SI (ashift:DI (zero_extend:DI (match_dup 1)) (minus:QI (const_int 32) (and:QI (match_dup 2) (const_int 31)))) 0))) (clobber (reg:CC FLAGS_REG))])]) (define_insn_and_split "*x86_shrd_ndd_2" [(set (match_operand:SI 0 "nonimmediate_operand") (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand") (match_operand:QI 3 "nonmemory_operand")) (ashift:SI (match_operand:SI 2 "register_operand") (minus:QI (const_int 32) (match_dup 3))))) (clobber (reg:CC FLAGS_REG))] "TARGET_APX_NDD && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (match_dup 4) (ior:SI (lshiftrt:SI (match_dup 1) (and:QI (match_dup 3) (const_int 31))) (subreg:SI (ashift:DI (zero_extend:DI (match_dup 2)) (minus:QI (const_int 32) (and:QI (match_dup 3) (const_int 31)))) 0))) (clobber (reg:CC FLAGS_REG)) (set (match_dup 0) (match_dup 4))])] { operands[4] = gen_reg_rtx (SImode); emit_move_insn (operands[4], operands[0]); }) ;; Base name for insn mnemonic. (define_mode_attr cvt_mnemonic [(SI "{cltd|cdq}") (DI "{cqto|cqo}")]) (define_insn "ashr3_cvt" [(set (match_operand:SWI48 0 "nonimmediate_operand" "=*d,rm,r") (ashiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "*a,0,rm") (match_operand:QI 2 "const_int_operand"))) (clobber (reg:CC FLAGS_REG))] "INTVAL (operands[2]) == GET_MODE_BITSIZE (mode)-1 && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun)) && ix86_binary_operator_ok (ASHIFTRT, mode, operands, TARGET_APX_NDD)" "@ sar{}\t{%2, %0|%0, %2} sar{}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "*,*,apx_ndd") (set_attr "type" "imovx,ishift,ishift") (set_attr "prefix_0f" "0,*,*") (set_attr "length_immediate" "0,*,*") (set_attr "modrm" "0,1,1") (set_attr "mode" "")]) (define_insn "*ashrsi3_cvt_zext" [(set (match_operand:DI 0 "register_operand" "=*d,r,r") (zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "*a,0,rm") (match_operand:QI 2 "const_int_operand")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && INTVAL (operands[2]) == 31 && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun)) && ix86_binary_operator_ok (ASHIFTRT, SImode, operands, TARGET_APX_NDD)" "@ {cltd|cdq} sar{l}\t{%2, %k0|%k0, %2} sar{l}\t{%2, %1, %k0|%k0, %1, %2}" [(set_attr "isa" "*,*,apx_ndd") (set_attr "type" "imovx,ishift,ishift") (set_attr "prefix_0f" "0,*,*") (set_attr "length_immediate" "0,*,*") (set_attr "modrm" "0,1,1") (set_attr "mode" "SI")]) (define_expand "@x86_shift_adj_3" [(use (match_operand:SWI48 0 "register_operand")) (use (match_operand:SWI48 1 "register_operand")) (use (match_operand:QI 2 "register_operand"))] "" { rtx_code_label *label = gen_label_rtx (); rtx tmp; emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (GET_MODE_BITSIZE (mode)))); tmp = gen_rtx_REG (CCZmode, FLAGS_REG); tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx); tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); JUMP_LABEL (tmp) = label; emit_move_insn (operands[0], operands[1]); emit_insn (gen_ashr3_cvt (operands[1], operands[1], GEN_INT (GET_MODE_BITSIZE (mode)-1))); emit_label (label); LABEL_NUSES (label) = 1; DONE; }) (define_insn "*bmi2_3_1" [(set (match_operand:SWI48 0 "register_operand" "=r") (any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm") (match_operand:SWI48 2 "register_operand" "r")))] "TARGET_BMI2" "x\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ishiftx") (set_attr "mode" "")]) (define_insn "*ashr3_1" [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r") (ashiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,rm") (match_operand:QI 2 "nonmemory_operand" "c,r,c"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ASHIFTRT, mode, operands, TARGET_APX_NDD)" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_ISHIFTX: return "#"; default: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) && !use_ndd) return "sar{}\t%0"; else return use_ndd ? "sar{}\t{%2, %1, %0|%0, %1, %2}" : "sar{}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,bmi2,apx_ndd") (set_attr "type" "ishift,ishiftx,ishift") (set (attr "length_immediate") (if_then_else (and (match_operand 2 "const1_operand") (ior (match_test "TARGET_SHIFT1") (match_test "optimize_function_for_size_p (cfun)"))) (const_string "0") (const_string "*"))) (set_attr "mode" "")]) ;; Specialization of *lshr3_1 below, extracting the SImode ;; highpart of a DI to be extracted, but allowing it to be clobbered. (define_insn_and_split "*highpartdisi2" [(set (subreg:DI (match_operand:SI 0 "register_operand" "=r,x,?k,r") 0) (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,0,k,rm") (const_int 32))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" "#" "&& reload_completed" [(parallel [(set (match_dup 0) (lshiftrt:DI (match_dup 1) (const_int 32))) (clobber (reg:CC FLAGS_REG))])] { if (SSE_REG_P (operands[0])) { rtx tmp = gen_rtx_REG (V4SImode, REGNO (operands[0])); emit_insn (gen_sse_shufps_v4si (tmp, tmp, tmp, const1_rtx, const1_rtx, GEN_INT (5), GEN_INT (5))); DONE; } operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); } [(set_attr "isa" "*,*,*,apx_ndd")]) (define_insn "*lshr3_1" [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k,r") (lshiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k,rm") (match_operand:QI 2 "nonmemory_operand" "c,r,,c"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (LSHIFTRT, mode, operands, TARGET_APX_NDD)" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_ISHIFTX: case TYPE_MSKLOG: return "#"; default: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) && !use_ndd) return "shr{}\t%0"; else return use_ndd ? "shr{}\t{%2, %1, %0|%0, %1, %2}" : "shr{}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,bmi2,avx512bw,apx_ndd") (set_attr "type" "ishift,ishiftx,msklog,ishift") (set (attr "length_immediate") (if_then_else (and (and (match_operand 2 "const1_operand") (eq_attr "alternative" "0")) (ior (match_test "TARGET_SHIFT1") (match_test "optimize_function_for_size_p (cfun)"))) (const_string "0") (const_string "*"))) (set_attr "mode" "")]) ;; Convert shift to the shiftx pattern to avoid flags dependency. (define_split [(set (match_operand:SWI48 0 "register_operand") (any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") (match_operand:QI 2 "register_operand"))) (clobber (reg:CC FLAGS_REG))] "TARGET_BMI2 && reload_completed" [(set (match_dup 0) (any_shiftrt:SWI48 (match_dup 1) (match_dup 2)))] "operands[2] = gen_lowpart (mode, operands[2]);") (define_insn "*bmi2_si3_1_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm") (match_operand:SI 2 "register_operand" "r"))))] "TARGET_64BIT && TARGET_BMI2" "x\t{%2, %1, %k0|%k0, %1, %2}" [(set_attr "type" "ishiftx") (set_attr "mode" "SI")]) (define_insn "*si3_1_zext" [(set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm,rm") (match_operand:QI 2 "nonmemory_operand" "cI,r,cI")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (, SImode, operands, TARGET_APX_NDD)" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_ISHIFTX: return "#"; default: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) && !use_ndd) return "{l}\t%k0"; else return use_ndd ? "{l}\t{%2, %1, %k0|%k0, %1, %2}" : "{l}\t{%2, %k0|%k0, %2}"; } } [(set_attr "isa" "*,bmi2,apx_ndd") (set_attr "type" "ishift,ishiftx,ishift") (set (attr "length_immediate") (if_then_else (and (match_operand 2 "const1_operand") (ior (match_test "TARGET_SHIFT1") (match_test "optimize_function_for_size_p (cfun)"))) (const_string "0") (const_string "*"))) (set_attr "mode" "SI")]) ;; Convert shift to the shiftx pattern to avoid flags dependency. (define_split [(set (match_operand:DI 0 "register_operand") (zero_extend:DI (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand") (match_operand:QI 2 "register_operand")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && TARGET_BMI2 && reload_completed" [(set (match_dup 0) (zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))] "operands[2] = gen_lowpart (SImode, operands[2]);") (define_insn "*ashr3_1" [(set (match_operand:SWI12 0 "nonimmediate_operand" "=m, r") (ashiftrt:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0, rm") (match_operand:QI 2 "nonmemory_operand" "c, c"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ASHIFTRT, mode, operands, TARGET_APX_NDD)" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) && !use_ndd) return "sar{}\t%0"; else return use_ndd ? "sar{}\t{%2, %1, %0|%0, %1, %2}" : "sar{}\t{%2, %0|%0, %2}"; } [(set_attr "isa" "*, apx_ndd") (set_attr "type" "ishift") (set (attr "length_immediate") (if_then_else (and (match_operand 2 "const1_operand") (ior (match_test "TARGET_SHIFT1") (match_test "optimize_function_for_size_p (cfun)"))) (const_string "0") (const_string "*"))) (set_attr "mode" "")]) (define_insn "*lshrqi3_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,?k,r") (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0, k, rm") (match_operand:QI 2 "nonmemory_operand" "cI,Wb,cI"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (LSHIFTRT, QImode, operands, TARGET_APX_NDD)" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_ISHIFT: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) && !use_ndd) return "shr{b}\t%0"; else return use_ndd ? "shr{b}\t{%2, %1, %0|%0, %1, %2}" : "shr{b}\t{%2, %0|%0, %2}"; case TYPE_MSKLOG: return "#"; default: gcc_unreachable (); } } [(set_attr "isa" "*,avx512dq,apx_ndd") (set_attr "type" "ishift,msklog,ishift") (set (attr "length_immediate") (if_then_else (and (and (match_operand 2 "const1_operand") (eq_attr "alternative" "0")) (ior (match_test "TARGET_SHIFT1") (match_test "optimize_function_for_size_p (cfun)"))) (const_string "0") (const_string "*"))) (set_attr "mode" "QI")]) (define_insn "*lshrhi3_1" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm, ?k, r") (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0, k, rm") (match_operand:QI 2 "nonmemory_operand" "cI, Ww, cI"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (LSHIFTRT, HImode, operands, TARGET_APX_NDD)" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_ISHIFT: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) && !use_ndd) return "shr{w}\t%0"; else return use_ndd ? "shr{w}\t{%2, %1, %0|%0, %1, %2}" : "shr{w}\t{%2, %0|%0, %2}"; case TYPE_MSKLOG: return "#"; default: gcc_unreachable (); } } [(set_attr "isa" "*, avx512f, apx_ndd") (set_attr "type" "ishift,msklog,ishift") (set (attr "length_immediate") (if_then_else (and (and (match_operand 2 "const1_operand") (eq_attr "alternative" "0")) (ior (match_test "TARGET_SHIFT1") (match_test "optimize_function_for_size_p (cfun)"))) (const_string "0") (const_string "*"))) (set_attr "mode" "HI")]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. (define_insn_and_split "*3_1_slp" [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+,&")) (any_shiftrt:SWI12 (match_operand:SWI12 1 "register_operand" "0,!") (match_operand:QI 2 "nonmemory_operand" "cI,cI"))) (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" { if (which_alternative) return "#"; if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) return "{}\t%0"; else return "{}\t{%2, %0|%0, %2}"; } "&& reload_completed && !(rtx_equal_p (operands[0], operands[1]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) (any_shiftrt:SWI12 (match_dup 0) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "ishift") (set (attr "length_immediate") (if_then_else (and (match_operand 2 "const1_operand") (ior (match_test "TARGET_SHIFT1") (match_test "optimize_function_for_size_p (cfun)"))) (const_string "0") (const_string "*"))) (set_attr "mode" "")]) ;; This pattern can't accept a variable shift count, since shifts by ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. (define_insn "*3_cmp" [(set (reg FLAGS_REG) (compare (any_shiftrt:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm") (match_operand:QI 2 "" ",")) (const_int 0))) (set (match_operand:SWI 0 "nonimmediate_operand" "=m,r") (any_shiftrt:SWI (match_dup 1) (match_dup 2)))] "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL || (operands[2] == const1_rtx && TARGET_SHIFT1)) && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (, mode, operands, TARGET_APX_NDD)" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) && !use_ndd) return "{}\t%0"; else return use_ndd ? "{}\t{%2, %1, %0|%0, %1, %2}" : "{}\t{%2, %0|%0, %2}"; } [(set_attr "isa" "*,apx_ndd") (set_attr "type" "ishift") (set (attr "length_immediate") (if_then_else (and (match_operand 2 "const1_operand") (ior (match_test "TARGET_SHIFT1") (match_test "optimize_function_for_size_p (cfun)"))) (const_string "0") (const_string "*"))) (set_attr "mode" "")]) (define_insn "*si3_cmp_zext" [(set (reg FLAGS_REG) (compare (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm") (match_operand:QI 2 "const_1_to_31_operand")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r,r") (zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))] "TARGET_64BIT && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL || (operands[2] == const1_rtx && TARGET_SHIFT1)) && ix86_match_ccmode (insn, CCGOCmode) && ix86_binary_operator_ok (, SImode, operands, TARGET_APX_NDD)" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) && !use_ndd) return "{l}\t%k0"; else return use_ndd ? "{l}\t{%2, %1, %k0|%k0, %1, %2}" : "{l}\t{%2, %k0|%k0, %2}"; } [(set_attr "isa" "*,apx_ndd") (set_attr "type" "ishift") (set (attr "length_immediate") (if_then_else (and (match_operand 2 "const1_operand") (ior (match_test "TARGET_SHIFT1") (match_test "optimize_function_for_size_p (cfun)"))) (const_string "0") (const_string "*"))) (set_attr "mode" "SI")]) (define_insn "*3_cconly" [(set (reg FLAGS_REG) (compare (any_shiftrt:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm") (match_operand:QI 2 "" ",")) (const_int 0))) (clobber (match_scratch:SWI 0 "=,r"))] "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL || (operands[2] == const1_rtx && TARGET_SHIFT1)) && ix86_match_ccmode (insn, CCGOCmode)" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) && !use_ndd) return "{}\t%0"; else return use_ndd ? "{}\t{%2, %1, %0|%0, %1, %2}" : "{}\t{%2, %0|%0, %2}"; } [(set_attr "isa" "*,apx_ndd") (set_attr "type" "ishift") (set (attr "length_immediate") (if_then_else (and (match_operand 2 "const1_operand") (ior (match_test "TARGET_SHIFT1") (match_test "optimize_function_for_size_p (cfun)"))) (const_string "0") (const_string "*"))) (set_attr "mode" "")]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. (define_insn_and_split "*qi_ext_1" [(set (zero_extract:SWI248 (match_operand 0 "int248_register_operand" "+Q,&Q") (const_int 8) (const_int 8)) (subreg:SWI248 (any_shiftrt:QI (subreg:QI (match_operator:SWI248 3 "extract_operator" [(match_operand 1 "int248_register_operand" "0,!Q") (const_int 8) (const_int 8)]) 0) (match_operand:QI 2 "nonmemory_operand" "cI,cI")) 0)) (clobber (reg:CC FLAGS_REG))] "" { if (which_alternative) return "#"; if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) return "{b}\t%h0"; else return "{b}\t{%2, %h0|%h0, %2}"; } "reload_completed && !(rtx_equal_p (operands[0], operands[1]))" [(set (zero_extract:SWI248 (match_dup 0) (const_int 8) (const_int 8)) (zero_extract:SWI248 (match_dup 1) (const_int 8) (const_int 8))) (parallel [(set (zero_extract:SWI248 (match_dup 0) (const_int 8) (const_int 8)) (subreg:SWI248 (any_shiftrt:QI (subreg:QI (match_op_dup 3 [(match_dup 0) (const_int 8) (const_int 8)]) 0) (match_dup 2)) 0)) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "ishift") (set (attr "length_immediate") (if_then_else (and (match_operand 2 "const1_operand") (ior (match_test "TARGET_SHIFT1") (match_test "optimize_function_for_size_p (cfun)"))) (const_string "0") (const_string "*"))) (set_attr "mode" "QI")]) (define_insn_and_split "*extend2_doubleword_highpart" [(set (match_operand: 0 "register_operand" "=r") (ashiftrt: (ashift: (match_operand: 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const_int_operand")) (match_operand:QI 3 "const_int_operand"))) (clobber (reg:CC FLAGS_REG))] "INTVAL (operands[2]) == INTVAL (operands[3]) && UINTVAL (operands[2]) < * BITS_PER_UNIT" "#" "&& reload_completed" [(parallel [(set (match_dup 4) (ashift:DWIH (match_dup 4) (match_dup 2))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 4) (ashiftrt:DWIH (match_dup 4) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] "split_double_mode (mode, &operands[0], 1, &operands[0], &operands[4]);") (define_insn_and_split "*extendv2di2_highpart_stv" [(set (match_operand:V2DI 0 "register_operand" "=v") (ashiftrt:V2DI (ashift:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "vm") (match_operand:QI 2 "const_int_operand")) (match_operand:QI 3 "const_int_operand")))] "!TARGET_64BIT && TARGET_STV && TARGET_AVX512VL && INTVAL (operands[2]) == INTVAL (operands[3]) && UINTVAL (operands[2]) < 32" "#" "&& reload_completed" [(set (match_dup 0) (ashift:V2DI (match_dup 1) (match_dup 2))) (set (match_dup 0) (ashiftrt:V2DI (match_dup 0) (match_dup 2)))]) ;; Rotate instructions (define_expand "ti3" [(set (match_operand:TI 0 "register_operand") (any_rotate:TI (match_operand:TI 1 "register_operand") (match_operand:QI 2 "nonmemory_operand")))] "TARGET_64BIT" { if (const_1_to_63_operand (operands[2], VOIDmode)) emit_insn (gen_ix86_ti3_doubleword (operands[0], operands[1], operands[2])); else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 64) { operands[1] = force_reg (TImode, operands[1]); emit_insn (gen_64ti2_doubleword (operands[0], operands[1])); } else { rtx amount = force_reg (QImode, operands[2]); rtx src_lo = gen_lowpart (DImode, operands[1]); rtx src_hi = gen_highpart (DImode, operands[1]); rtx tmp_lo = gen_reg_rtx (DImode); rtx tmp_hi = gen_reg_rtx (DImode); emit_move_insn (tmp_lo, src_lo); emit_move_insn (tmp_hi, src_hi); rtx (*shiftd) (rtx, rtx, rtx) = ( == ROTATE) ? gen_x86_64_shld : gen_x86_64_shrd; emit_insn (shiftd (tmp_lo, src_hi, amount)); emit_insn (shiftd (tmp_hi, src_lo, amount)); rtx dst_lo = gen_lowpart (DImode, operands[0]); rtx dst_hi = gen_highpart (DImode, operands[0]); emit_move_insn (dst_lo, tmp_lo); emit_move_insn (dst_hi, tmp_hi); emit_insn (gen_x86_shiftdi_adj_1 (dst_lo, dst_hi, amount, tmp_lo)); } DONE; }) (define_expand "di3" [(set (match_operand:DI 0 "shiftdi_operand") (any_rotate:DI (match_operand:DI 1 "shiftdi_operand") (match_operand:QI 2 "nonmemory_operand")))] "" { if (TARGET_64BIT) ix86_expand_binary_operator (, DImode, operands, TARGET_APX_NDD); else if (const_1_to_31_operand (operands[2], VOIDmode)) emit_insn (gen_ix86_di3_doubleword (operands[0], operands[1], operands[2])); else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 32) { operands[1] = force_reg (DImode, operands[1]); emit_insn (gen_32di2_doubleword (operands[0], operands[1])); } else FAIL; DONE; }) (define_expand "3" [(set (match_operand:SWIM124 0 "nonimmediate_operand") (any_rotate:SWIM124 (match_operand:SWIM124 1 "nonimmediate_operand") (match_operand:QI 2 "nonmemory_operand")))] "" { ix86_expand_binary_operator (, mode, operands, TARGET_APX_NDD); DONE; }) ;; Avoid useless masking of count operand. (define_insn_and_split "*3_mask" [(set (match_operand:SWI 0 "nonimmediate_operand") (any_rotate:SWI (match_operand:SWI 1 "nonimmediate_operand") (subreg:QI (and (match_operand 2 "int248_register_operand" "c") (match_operand 3 "const_int_operand")) 0))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (, mode, operands) && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (mode)-1)) == GET_MODE_BITSIZE (mode)-1 && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (match_dup 0) (any_rotate:SWI (match_dup 1) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] { operands[2] = force_reg (GET_MODE (operands[2]), operands[2]); operands[2] = gen_lowpart (QImode, operands[2]); }) (define_split [(set (match_operand:SWI 0 "register_operand") (any_rotate:SWI (match_operand:SWI 1 "const_int_operand") (subreg:QI (and (match_operand 2 "int248_register_operand") (match_operand 3 "const_int_operand")) 0)))] "(INTVAL (operands[3]) & (GET_MODE_BITSIZE (mode) - 1)) == GET_MODE_BITSIZE (mode) - 1" [(set (match_dup 4) (match_dup 1)) (set (match_dup 0) (any_rotate:SWI (match_dup 4) (subreg:QI (match_dup 2) 0)))] "operands[4] = gen_reg_rtx (mode);") (define_insn_and_split "*3_mask_1" [(set (match_operand:SWI 0 "nonimmediate_operand") (any_rotate:SWI (match_operand:SWI 1 "nonimmediate_operand") (and:QI (match_operand:QI 2 "register_operand" "c") (match_operand:QI 3 "const_int_operand")))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (, mode, operands) && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (mode)-1)) == GET_MODE_BITSIZE (mode)-1 && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (match_dup 0) (any_rotate:SWI (match_dup 1) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])]) (define_split [(set (match_operand:SWI 0 "register_operand") (any_rotate:SWI (match_operand:SWI 1 "const_int_operand") (and:QI (match_operand:QI 2 "register_operand") (match_operand:QI 3 "const_int_operand"))))] "(INTVAL (operands[3]) & (GET_MODE_BITSIZE (mode) - 1)) == GET_MODE_BITSIZE (mode) - 1" [(set (match_dup 4) (match_dup 1)) (set (match_dup 0) (any_rotate:SWI (match_dup 4) (match_dup 2)))] "operands[4] = gen_reg_rtx (mode);") ;; Implement rotation using two double-precision ;; shift instructions and a scratch register. (define_insn_and_split "ix86_rotl3_doubleword" [(set (match_operand: 0 "register_operand" "=r") (rotate: (match_operand: 1 "register_operand" "0") (match_operand:QI 2 "" ""))) (clobber (reg:CC FLAGS_REG)) (clobber (match_scratch:DWIH 3 "=&r"))] "" "#" "reload_completed" [(set (match_dup 3) (match_dup 4)) (parallel [(set (match_dup 4) (ior:DWIH (ashift:DWIH (match_dup 4) (and:QI (match_dup 2) (match_dup 6))) (subreg:DWIH (lshiftrt: (zero_extend: (match_dup 5)) (minus:QI (match_dup 7) (and:QI (match_dup 2) (match_dup 6)))) 0))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 5) (ior:DWIH (ashift:DWIH (match_dup 5) (and:QI (match_dup 2) (match_dup 6))) (subreg:DWIH (lshiftrt: (zero_extend: (match_dup 3)) (minus:QI (match_dup 7) (and:QI (match_dup 2) (match_dup 6)))) 0))) (clobber (reg:CC FLAGS_REG))])] { operands[6] = GEN_INT (GET_MODE_BITSIZE (mode) - 1); operands[7] = GEN_INT (GET_MODE_BITSIZE (mode)); split_double_mode (mode, &operands[0], 1, &operands[4], &operands[5]); }) (define_insn_and_split "ix86_rotr3_doubleword" [(set (match_operand: 0 "register_operand" "=r") (rotatert: (match_operand: 1 "register_operand" "0") (match_operand:QI 2 "" ""))) (clobber (reg:CC FLAGS_REG)) (clobber (match_scratch:DWIH 3 "=&r"))] "" "#" "reload_completed" [(set (match_dup 3) (match_dup 4)) (parallel [(set (match_dup 4) (ior:DWIH (lshiftrt:DWIH (match_dup 4) (and:QI (match_dup 2) (match_dup 6))) (subreg:DWIH (ashift: (zero_extend: (match_dup 5)) (minus:QI (match_dup 7) (and:QI (match_dup 2) (match_dup 6)))) 0))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 5) (ior:DWIH (lshiftrt:DWIH (match_dup 5) (and:QI (match_dup 2) (match_dup 6))) (subreg:DWIH (ashift: (zero_extend: (match_dup 3)) (minus:QI (match_dup 7) (and:QI (match_dup 2) (match_dup 6)))) 0))) (clobber (reg:CC FLAGS_REG))])] { operands[6] = GEN_INT (GET_MODE_BITSIZE (mode) - 1); operands[7] = GEN_INT (GET_MODE_BITSIZE (mode)); split_double_mode (mode, &operands[0], 1, &operands[4], &operands[5]); }) (define_insn_and_split "32di2_doubleword" [(set (match_operand:DI 0 "register_operand" "=r,r") (any_rotate:DI (match_operand:DI 1 "register_operand" "0,r") (const_int 32)))] "!TARGET_64BIT" "#" "&& reload_completed" [(set (match_dup 0) (match_dup 3)) (set (match_dup 2) (match_dup 1))] { split_double_mode (DImode, &operands[0], 2, &operands[0], &operands[2]); if (rtx_equal_p (operands[0], operands[1])) { emit_insn (gen_swapsi (operands[0], operands[2])); DONE; } }) (define_insn_and_split "64ti2_doubleword" [(set (match_operand:TI 0 "register_operand" "=r,r") (any_rotate:TI (match_operand:TI 1 "register_operand" "0,r") (const_int 64)))] "TARGET_64BIT" "#" "&& reload_completed" [(set (match_dup 0) (match_dup 3)) (set (match_dup 2) (match_dup 1))] { split_double_mode (TImode, &operands[0], 2, &operands[0], &operands[2]); if (rtx_equal_p (operands[0], operands[1])) { emit_insn (gen_swapdi (operands[0], operands[2])); DONE; } }) (define_mode_attr rorx_immediate_operand [(SI "const_0_to_31_operand") (DI "const_0_to_63_operand")]) (define_insn "*bmi2_rorx3_1" [(set (match_operand:SWI48 0 "register_operand" "=r") (rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm") (match_operand:QI 2 "" "")))] "TARGET_BMI2 && !optimize_function_for_size_p (cfun)" "rorx\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "rotatex") (set_attr "mode" "")]) (define_insn "*3_1" [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r") (any_rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,rm") (match_operand:QI 2 "nonmemory_operand" "c,,c"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (, mode, operands, TARGET_APX_NDD)" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_ROTATEX: return "#"; default: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) && !use_ndd) return "{}\t%0"; else return use_ndd ? "{}\t{%2, %1, %0|%0, %1, %2}" : "{}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,bmi2,apx_ndd") (set_attr "type" "rotate,rotatex,rotate") (set (attr "preferred_for_size") (cond [(eq_attr "alternative" "0") (symbol_ref "true")] (symbol_ref "false"))) (set (attr "length_immediate") (if_then_else (and (eq_attr "type" "rotate") (and (match_operand 2 "const1_operand") (ior (match_test "TARGET_SHIFT1") (match_test "optimize_function_for_size_p (cfun)")))) (const_string "0") (const_string "*"))) (set_attr "mode" "")]) ;; Convert rotate to the rotatex pattern to avoid flags dependency. (define_split [(set (match_operand:SWI48 0 "register_operand") (rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") (match_operand:QI 2 "const_int_operand"))) (clobber (reg:CC FLAGS_REG))] "TARGET_BMI2 && reload_completed && !optimize_function_for_size_p (cfun)" [(set (match_dup 0) (rotatert:SWI48 (match_dup 1) (match_dup 2)))] { int bitsize = GET_MODE_BITSIZE (mode); operands[2] = GEN_INT ((bitsize - INTVAL (operands[2])) % bitsize); }) (define_split [(set (match_operand:SWI48 0 "register_operand") (rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") (match_operand:QI 2 "const_int_operand"))) (clobber (reg:CC FLAGS_REG))] "TARGET_BMI2 && reload_completed && !optimize_function_for_size_p (cfun)" [(set (match_dup 0) (rotatert:SWI48 (match_dup 1) (match_dup 2)))]) (define_insn "*bmi2_rorxsi3_1_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "rm") (match_operand:QI 2 "const_0_to_31_operand"))))] "TARGET_64BIT && TARGET_BMI2 && !optimize_function_for_size_p (cfun)" "rorx\t{%2, %1, %k0|%k0, %1, %2}" [(set_attr "type" "rotatex") (set_attr "mode" "SI")]) (define_insn "*si3_1_zext" [(set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI (any_rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm,rm") (match_operand:QI 2 "nonmemory_operand" "cI,I,cI")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (, SImode, operands)" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_ROTATEX: return "#"; default: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) && !use_ndd) return "{l}\t%k0"; else return use_ndd ? "{l}\t{%2, %1, %k0|%k0, %1, %2}" : "{l}\t{%2, %k0|%k0, %2}"; } } [(set_attr "isa" "*,bmi2,apx_ndd") (set_attr "type" "rotate,rotatex,rotate") (set (attr "preferred_for_size") (cond [(eq_attr "alternative" "0") (symbol_ref "true")] (symbol_ref "false"))) (set (attr "length_immediate") (if_then_else (and (eq_attr "type" "rotate") (and (match_operand 2 "const1_operand") (ior (match_test "TARGET_SHIFT1") (match_test "optimize_function_for_size_p (cfun)")))) (const_string "0") (const_string "*"))) (set_attr "mode" "SI")]) ;; Convert rotate to the rotatex pattern to avoid flags dependency. (define_split [(set (match_operand:DI 0 "register_operand") (zero_extend:DI (rotate:SI (match_operand:SI 1 "nonimmediate_operand") (match_operand:QI 2 "const_int_operand")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && TARGET_BMI2 && reload_completed && !optimize_function_for_size_p (cfun)" [(set (match_dup 0) (zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))] { int bitsize = GET_MODE_BITSIZE (SImode); operands[2] = GEN_INT ((bitsize - INTVAL (operands[2])) % bitsize); }) (define_split [(set (match_operand:DI 0 "register_operand") (zero_extend:DI (rotatert:SI (match_operand:SI 1 "nonimmediate_operand") (match_operand:QI 2 "const_int_operand")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && TARGET_BMI2 && reload_completed && !optimize_function_for_size_p (cfun)" [(set (match_dup 0) (zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))]) (define_insn "*3_1" [(set (match_operand:SWI12 0 "nonimmediate_operand" "=m,r") (any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0,rm") (match_operand:QI 2 "nonmemory_operand" "c,c"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (, mode, operands, TARGET_APX_NDD)" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) && !use_ndd) return "{}\t%0"; else return use_ndd ? "{}\t{%2, %1, %0|%0, %1, %2}" : "{}\t{%2, %0|%0, %2}"; } [(set_attr "isa" "*,apx_ndd") (set_attr "type" "rotate") (set (attr "length_immediate") (if_then_else (and (match_operand 2 "const1_operand") (ior (match_test "TARGET_SHIFT1") (match_test "optimize_function_for_size_p (cfun)"))) (const_string "0") (const_string "*"))) (set_attr "mode" "")]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. (define_insn_and_split "*3_1_slp" [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+,&")) (any_rotate:SWI12 (match_operand:SWI12 1 "register_operand" "0,!") (match_operand:QI 2 "nonmemory_operand" "cI,cI"))) (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" { if (which_alternative) return "#"; if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) return "{}\t%0"; else return "{}\t{%2, %0|%0, %2}"; } "&& reload_completed && !(rtx_equal_p (operands[0], operands[1]))" [(set (strict_low_part (match_dup 0)) (match_dup 1)) (parallel [(set (strict_low_part (match_dup 0)) (any_rotate:SWI12 (match_dup 0) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] "" [(set_attr "type" "rotate") (set (attr "length_immediate") (if_then_else (and (match_operand 2 "const1_operand") (ior (match_test "TARGET_SHIFT1") (match_test "optimize_function_for_size_p (cfun)"))) (const_string "0") (const_string "*"))) (set_attr "mode" "")]) (define_split [(set (match_operand:HI 0 "QIreg_operand") (any_rotate:HI (match_dup 0) (const_int 8))) (clobber (reg:CC FLAGS_REG))] "reload_completed && (TARGET_USE_XCHGB || optimize_function_for_size_p (cfun))" [(set (match_dup 0) (bswap:HI (match_dup 0)))]) ;; Rotations through carry flag (define_insn "rcrsi2" [(set (match_operand:SI 0 "register_operand" "=r,r") (plus:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm") (const_int 1)) (ashift:SI (ltu:SI (reg:CCC FLAGS_REG) (const_int 0)) (const_int 31)))) (clobber (reg:CC FLAGS_REG))] "" "@ rcr{l}\t%0 rcr{l}\t{%1, %0|%0, %1}" [(set_attr "isa" "*,apx_ndd") (set_attr "type" "ishift1") (set_attr "memory" "none") (set_attr "length_immediate" "0") (set_attr "mode" "SI")]) (define_insn "rcrdi2" [(set (match_operand:DI 0 "register_operand" "=r,r") (plus:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,rm") (const_int 1)) (ashift:DI (ltu:DI (reg:CCC FLAGS_REG) (const_int 0)) (const_int 63)))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" "@ rcr{q}\t%0 rcr{q}\t{%1, %0|%0, %1}" [(set_attr "isa" "*,apx_ndd") (set_attr "type" "ishift1") (set_attr "length_immediate" "0") (set_attr "mode" "DI")]) ;; Versions of sar and shr that set the carry flag. (define_insn "3_carry" [(set (reg:CCC FLAGS_REG) (unspec:CCC [(and:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm") (const_int 1)) (const_int 0)] UNSPEC_CC_NE)) (set (match_operand:SWI48 0 "register_operand" "=r,r") (any_shiftrt:SWI48 (match_dup 1) (const_int 1)))] "" { bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; if ((TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) && !use_ndd) return "{}\t%0"; return use_ndd ? "{}\t{$1, %1, %0|%0, %1, 1}" : "{}\t{$1, %0|%0, 1}"; } [(set_attr "isa" "*, apx_ndd") (set_attr "type" "ishift1") (set (attr "length_immediate") (if_then_else (ior (match_test "TARGET_SHIFT1") (match_test "optimize_function_for_size_p (cfun)")) (const_string "0") (const_string "*"))) (set_attr "mode" "")]) ;; Bit set / bit test instructions ;; %%% bts, btr, btc ;; These instructions are *slow* when applied to memory. (define_code_attr btsc [(ior "bts") (xor "btc")]) (define_insn "*" [(set (match_operand:SWI48 0 "register_operand" "=r") (any_or:SWI48 (ashift:SWI48 (const_int 1) (match_operand:QI 2 "register_operand" "r")) (match_operand:SWI48 1 "register_operand" "0"))) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_BT" "{}\t{%2, %0|%0, %2}" [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") (set_attr "znver1_decode" "double") (set_attr "mode" "")]) ;; Avoid useless masking of count operand. (define_insn_and_split "*_mask" [(set (match_operand:SWI48 0 "register_operand") (any_or:SWI48 (ashift:SWI48 (const_int 1) (subreg:QI (and (match_operand 1 "int248_register_operand") (match_operand 2 "const_int_operand")) 0)) (match_operand:SWI48 3 "register_operand"))) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_BT && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode)-1)) == GET_MODE_BITSIZE (mode)-1 && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (match_dup 0) (any_or:SWI48 (ashift:SWI48 (const_int 1) (match_dup 1)) (match_dup 3))) (clobber (reg:CC FLAGS_REG))])] { operands[1] = force_reg (GET_MODE (operands[1]), operands[1]); operands[1] = gen_lowpart (QImode, operands[1]); }) (define_insn_and_split "*_mask_1" [(set (match_operand:SWI48 0 "register_operand") (any_or:SWI48 (ashift:SWI48 (const_int 1) (and:QI (match_operand:QI 1 "register_operand") (match_operand:QI 2 "const_int_operand"))) (match_operand:SWI48 3 "register_operand"))) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_BT && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode)-1)) == GET_MODE_BITSIZE (mode)-1 && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (match_dup 0) (any_or:SWI48 (ashift:SWI48 (const_int 1) (match_dup 1)) (match_dup 3))) (clobber (reg:CC FLAGS_REG))])]) (define_insn "*btr" [(set (match_operand:SWI48 0 "register_operand" "=r") (and:SWI48 (rotate:SWI48 (const_int -2) (match_operand:QI 2 "register_operand" "r")) (match_operand:SWI48 1 "register_operand" "0"))) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_BT" "btr{}\t{%2, %0|%0, %2}" [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") (set_attr "znver1_decode" "double") (set_attr "mode" "")]) ;; Avoid useless masking of count operand. (define_insn_and_split "*btr_mask" [(set (match_operand:SWI48 0 "register_operand") (and:SWI48 (rotate:SWI48 (const_int -2) (subreg:QI (and (match_operand 1 "int248_register_operand") (match_operand 2 "const_int_operand")) 0)) (match_operand:SWI48 3 "register_operand"))) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_BT && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode)-1)) == GET_MODE_BITSIZE (mode)-1 && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (match_dup 0) (and:SWI48 (rotate:SWI48 (const_int -2) (match_dup 1)) (match_dup 3))) (clobber (reg:CC FLAGS_REG))])] { operands[1] = force_reg (GET_MODE (operands[1]), operands[1]); operands[1] = gen_lowpart (QImode, operands[1]); }) (define_insn_and_split "*btr_mask_1" [(set (match_operand:SWI48 0 "register_operand") (and:SWI48 (rotate:SWI48 (const_int -2) (and:QI (match_operand:QI 1 "register_operand") (match_operand:QI 2 "const_int_operand"))) (match_operand:SWI48 3 "register_operand"))) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_BT && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode)-1)) == GET_MODE_BITSIZE (mode)-1 && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (match_dup 0) (and:SWI48 (rotate:SWI48 (const_int -2) (match_dup 1)) (match_dup 3))) (clobber (reg:CC FLAGS_REG))])]) (define_insn_and_split "*btr_1" [(set (match_operand:SWI12 0 "register_operand") (and:SWI12 (subreg:SWI12 (rotate:SI (const_int -2) (match_operand:QI 2 "register_operand")) 0) (match_operand:SWI12 1 "nonimmediate_operand"))) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_BT && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (match_dup 0) (and:SI (rotate:SI (const_int -2) (match_dup 2)) (match_dup 1))) (clobber (reg:CC FLAGS_REG))])] { operands[0] = lowpart_subreg (SImode, operands[0], mode); operands[1] = force_reg (mode, operands[1]); operands[1] = lowpart_subreg (SImode, operands[1], mode); }) (define_insn_and_split "*btr_2" [(set (zero_extract:HI (match_operand:SWI12 0 "nonimmediate_operand") (const_int 1) (match_operand:QI 1 "register_operand")) (const_int 0)) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_BT && ix86_pre_reload_split ()" "#" "&& MEM_P (operands[0])" [(set (match_dup 2) (match_dup 0)) (parallel [(set (match_dup 3) (and:SI (rotate:SI (const_int -2) (match_dup 1)) (match_dup 4))) (clobber (reg:CC FLAGS_REG))]) (set (match_dup 0) (match_dup 5))] { operands[2] = gen_reg_rtx (mode); operands[5] = gen_reg_rtx (mode); operands[3] = lowpart_subreg (SImode, operands[5], mode); operands[4] = lowpart_subreg (SImode, operands[2], mode); }) (define_split [(set (zero_extract:HI (match_operand:SWI12 0 "register_operand") (const_int 1) (match_operand:QI 1 "register_operand")) (const_int 0)) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_BT && ix86_pre_reload_split ()" [(parallel [(set (match_dup 0) (and:SI (rotate:SI (const_int -2) (match_dup 1)) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] { operands[2] = lowpart_subreg (SImode, operands[0], mode); operands[0] = lowpart_subreg (SImode, operands[0], mode); }) ;; These instructions are never faster than the corresponding ;; and/ior/xor operations when using immediate operand, so with ;; 32-bit there's no point. But in 64-bit, we can't hold the ;; relevant immediates within the instruction itself, so operating ;; on bits in the high 32-bits of a register becomes easier. ;; ;; These are slow on Nocona, but fast on Athlon64. We do require the use ;; of btrq and btcq for corner cases of post-reload expansion of absdf and ;; negdf respectively, so they can never be disabled entirely. (define_insn "*btsq_imm" [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm") (const_int 1) (match_operand:QI 1 "const_0_to_63_operand")) (const_int 1)) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && (TARGET_USE_BT || reload_completed)" "bts{q}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") (set_attr "znver1_decode" "double") (set_attr "mode" "DI")]) (define_insn "*btrq_imm" [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm") (const_int 1) (match_operand:QI 1 "const_0_to_63_operand")) (const_int 0)) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && (TARGET_USE_BT || reload_completed)" "btr{q}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") (set_attr "znver1_decode" "double") (set_attr "mode" "DI")]) (define_insn "*btcq_imm" [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm") (const_int 1) (match_operand:QI 1 "const_0_to_63_operand")) (not:DI (zero_extract:DI (match_dup 0) (const_int 1) (match_dup 1)))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && (TARGET_USE_BT || reload_completed)" "btc{q}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") (set_attr "znver1_decode" "double") (set_attr "mode" "DI")]) ;; Allow Nocona to avoid these instructions if a register is available. (define_peephole2 [(match_scratch:DI 2 "r") (parallel [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand") (const_int 1) (match_operand:QI 1 "const_0_to_63_operand")) (const_int 1)) (clobber (reg:CC FLAGS_REG))])] "TARGET_64BIT && !TARGET_USE_BT" [(parallel [(set (match_dup 0) (ior:DI (match_dup 0) (match_dup 3))) (clobber (reg:CC FLAGS_REG))])] { int i = INTVAL (operands[1]); operands[3] = gen_int_mode (HOST_WIDE_INT_1U << i, DImode); if (!x86_64_immediate_operand (operands[3], DImode)) { emit_move_insn (operands[2], operands[3]); operands[3] = operands[2]; } }) (define_peephole2 [(match_scratch:DI 2 "r") (parallel [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand") (const_int 1) (match_operand:QI 1 "const_0_to_63_operand")) (const_int 0)) (clobber (reg:CC FLAGS_REG))])] "TARGET_64BIT && !TARGET_USE_BT" [(parallel [(set (match_dup 0) (and:DI (match_dup 0) (match_dup 3))) (clobber (reg:CC FLAGS_REG))])] { int i = INTVAL (operands[1]); operands[3] = gen_int_mode (~(HOST_WIDE_INT_1U << i), DImode); if (!x86_64_immediate_operand (operands[3], DImode)) { emit_move_insn (operands[2], operands[3]); operands[3] = operands[2]; } }) (define_peephole2 [(match_scratch:DI 2 "r") (parallel [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand") (const_int 1) (match_operand:QI 1 "const_0_to_63_operand")) (not:DI (zero_extract:DI (match_dup 0) (const_int 1) (match_dup 1)))) (clobber (reg:CC FLAGS_REG))])] "TARGET_64BIT && !TARGET_USE_BT" [(parallel [(set (match_dup 0) (xor:DI (match_dup 0) (match_dup 3))) (clobber (reg:CC FLAGS_REG))])] { int i = INTVAL (operands[1]); operands[3] = gen_int_mode (HOST_WIDE_INT_1U << i, DImode); if (!x86_64_immediate_operand (operands[3], DImode)) { emit_move_insn (operands[2], operands[3]); operands[3] = operands[2]; } }) ;; %%% bt (define_insn "*bt" [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extract:SWI48 (match_operand:SWI48 0 "nonimmediate_operand" "r,m") (const_int 1) (match_operand:QI 1 "nonmemory_operand" "q,")) (const_int 0)))] "" { switch (get_attr_mode (insn)) { case MODE_SI: return "bt{l}\t{%k1, %k0|%k0, %k1}"; case MODE_DI: return "bt{q}\t{%q1, %0|%0, %q1}"; default: gcc_unreachable (); } } [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") (set (attr "mode") (if_then_else (and (match_test "CONST_INT_P (operands[1])") (match_test "INTVAL (operands[1]) < 32")) (const_string "SI") (const_string "")))]) (define_insn_and_split "*bt_mask" [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extract:SWI48 (match_operand:SWI48 0 "nonimmediate_operand" "r,m") (const_int 1) (subreg:QI (and:SWI248 (match_operand:SWI248 1 "register_operand") (match_operand 2 "const_int_operand")) 0)) (const_int 0)))] "TARGET_USE_BT && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode)-1)) == GET_MODE_BITSIZE (mode)-1 && ix86_pre_reload_split ()" "#" "&& 1" [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extract:SWI48 (match_dup 0) (const_int 1) (match_dup 1)) (const_int 0)))] "operands[1] = gen_lowpart (QImode, operands[1]);") (define_insn_and_split "*jcc_bt" [(set (pc) (if_then_else (match_operator 0 "bt_comparison_operator" [(zero_extract:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") (const_int 1) (match_operand:QI 2 "nonmemory_operand")) (const_int 0)]) (label_ref (match_operand 3)) (pc))) (clobber (reg:CC FLAGS_REG))] "(TARGET_USE_BT || optimize_function_for_size_p (cfun)) && (CONST_INT_P (operands[2]) ? (INTVAL (operands[2]) < GET_MODE_BITSIZE (mode) && INTVAL (operands[2]) >= (optimize_function_for_size_p (cfun) ? 8 : 32)) : !memory_operand (operands[1], mode)) && ix86_pre_reload_split ()" "#" "&& 1" [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2)) (const_int 0))) (set (pc) (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) (label_ref (match_dup 3)) (pc)))] { operands[0] = shallow_copy_rtx (operands[0]); PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); }) ;; Avoid useless masking of bit offset operand. (define_insn_and_split "*jcc_bt_mask" [(set (pc) (if_then_else (match_operator 0 "bt_comparison_operator" [(zero_extract:SWI48 (match_operand:SWI48 1 "register_operand") (const_int 1) (and:QI (match_operand:QI 2 "register_operand") (match_operand 3 "const_int_operand"))) (const_int 0)]) (label_ref (match_operand 4)) (pc))) (clobber (reg:CC FLAGS_REG))] "(TARGET_USE_BT || optimize_function_for_size_p (cfun)) && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (mode)-1)) == GET_MODE_BITSIZE (mode)-1 && ix86_pre_reload_split ()" "#" "&& 1" [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2)) (const_int 0))) (set (pc) (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) (label_ref (match_dup 4)) (pc)))] { operands[0] = shallow_copy_rtx (operands[0]); PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); }) ;; Avoid useless masking of bit offset operand. (define_insn_and_split "*jcc_bt_mask_1" [(set (pc) (if_then_else (match_operator 0 "bt_comparison_operator" [(zero_extract:SWI48 (match_operand:SWI48 1 "register_operand") (const_int 1) (subreg:QI (and:SWI248 (match_operand:SWI248 2 "register_operand") (match_operand 3 "const_int_operand")) 0)) (const_int 0)]) (label_ref (match_operand 4)) (pc))) (clobber (reg:CC FLAGS_REG))] "(TARGET_USE_BT || optimize_function_for_size_p (cfun)) && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (mode)-1)) == GET_MODE_BITSIZE (mode)-1 && ix86_pre_reload_split ()" "#" "&& 1" [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2)) (const_int 0))) (set (pc) (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) (label_ref (match_dup 4)) (pc)))] { operands[0] = shallow_copy_rtx (operands[0]); PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); operands[2] = gen_lowpart (QImode, operands[2]); }) ;; Help combine recognize bt followed by cmov (define_split [(set (match_operand:SWI248 0 "register_operand") (if_then_else:SWI248 (match_operator 5 "bt_comparison_operator" [(zero_extract:SWI48 (match_operand:SWI48 1 "register_operand") (const_int 1) (match_operand:QI 2 "register_operand")) (const_int 0)]) (match_operand:SWI248 3 "nonimmediate_operand") (match_operand:SWI248 4 "nonimmediate_operand")))] "TARGET_USE_BT && TARGET_CMOVE && !(MEM_P (operands[3]) && MEM_P (operands[4])) && ix86_pre_reload_split ()" [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2)) (const_int 0))) (set (match_dup 0) (if_then_else:SWI248 (eq (reg:CCC FLAGS_REG) (const_int 0)) (match_dup 3) (match_dup 4)))] { if (GET_CODE (operands[5]) == EQ) std::swap (operands[3], operands[4]); }) ;; Help combine recognize bt followed by setc (define_insn_and_split "*bt_setcqi" [(set (subreg:SWI48 (match_operand:QI 0 "register_operand") 0) (zero_extract:SWI48 (match_operand:SWI48 1 "register_operand") (const_int 1) (match_operand:QI 2 "register_operand"))) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_BT && ix86_pre_reload_split ()" "#" "&& 1" [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2)) (const_int 0))) (set (match_dup 0) (eq:QI (reg:CCC FLAGS_REG) (const_int 0)))]) ;; Help combine recognize bt followed by setnc (define_insn_and_split "*bt_setncqi" [(set (match_operand:QI 0 "register_operand") (and:QI (not:QI (subreg:QI (lshiftrt:SWI48 (match_operand:SWI48 1 "register_operand") (match_operand:QI 2 "register_operand")) 0)) (const_int 1))) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_BT && ix86_pre_reload_split ()" "#" "&& 1" [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2)) (const_int 0))) (set (match_dup 0) (ne:QI (reg:CCC FLAGS_REG) (const_int 0)))]) (define_insn_and_split "*bt_setnc" [(set (match_operand:SWI48 0 "register_operand") (and:SWI48 (not:SWI48 (lshiftrt:SWI48 (match_operand:SWI48 1 "register_operand") (match_operand:QI 2 "register_operand"))) (const_int 1))) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_BT && ix86_pre_reload_split ()" "#" "&& 1" [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2)) (const_int 0))) (set (match_dup 3) (ne:QI (reg:CCC FLAGS_REG) (const_int 0))) (set (match_dup 0) (zero_extend:SWI48 (match_dup 3)))] "operands[3] = gen_reg_rtx (QImode);") ;; Help combine recognize bt followed by setnc (PR target/110588) (define_insn_and_split "*bt_setncqi_2" [(set (match_operand:QI 0 "register_operand") (eq:QI (zero_extract:SWI48 (match_operand:SWI48 1 "register_operand") (const_int 1) (match_operand:QI 2 "register_operand")) (const_int 0))) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_BT && ix86_pre_reload_split ()" "#" "&& 1" [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2)) (const_int 0))) (set (match_dup 0) (ne:QI (reg:CCC FLAGS_REG) (const_int 0)))]) ;; Help combine recognize bt followed by setc (define_insn_and_split "*bt_setc_mask" [(set (match_operand:SWI48 0 "register_operand") (zero_extract:SWI48 (match_operand:SWI48 1 "register_operand") (const_int 1) (subreg:QI (and:SWI48 (match_operand:SWI48 2 "register_operand") (match_operand 3 "const_int_operand")) 0))) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_BT && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (mode)-1)) == GET_MODE_BITSIZE (mode)-1 && ix86_pre_reload_split ()" "#" "&& 1" [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2)) (const_int 0))) (set (match_dup 3) (eq:QI (reg:CCC FLAGS_REG) (const_int 0))) (set (match_dup 0) (zero_extend:SWI48 (match_dup 3)))] { operands[2] = gen_lowpart (QImode, operands[2]); operands[3] = gen_reg_rtx (QImode); }) ;; Store-flag instructions. (define_split [(set (match_operand:QI 0 "nonimmediate_operand") (match_operator:QI 1 "add_comparison_operator" [(not:SWI (match_operand:SWI 2 "register_operand")) (match_operand:SWI 3 "nonimmediate_operand")]))] "" [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:SWI (match_dup 2) (match_dup 3)) (match_dup 2))) (set (match_dup 0) (match_op_dup 1 [(reg:CCC FLAGS_REG) (const_int 0)]))]) (define_split [(set (match_operand:QI 0 "nonimmediate_operand") (match_operator:QI 1 "shr_comparison_operator" [(match_operand:DI 2 "register_operand") (match_operand 3 "const_int_operand")]))] "TARGET_64BIT && IN_RANGE (exact_log2 (UINTVAL (operands[3]) + 1), 32, 63)" [(set (reg:CCZ FLAGS_REG) (compare:CCZ (lshiftrt:DI (match_dup 2) (match_dup 4)) (const_int 0))) (set (match_dup 0) (match_op_dup 1 [(reg:CCZ FLAGS_REG) (const_int 0)]))] { enum rtx_code new_code; operands[1] = shallow_copy_rtx (operands[1]); switch (GET_CODE (operands[1])) { case GTU: new_code = NE; break; case LEU: new_code = EQ; break; default: gcc_unreachable (); } PUT_CODE (operands[1], new_code); operands[4] = GEN_INT (exact_log2 (UINTVAL (operands[3]) + 1)); }) ;; For all sCOND expanders, also expand the compare or test insn that ;; generates cc0. Generate an equality comparison if `seq' or `sne'. (define_insn_and_split "*setcc_di_1" [(set (match_operand:DI 0 "register_operand" "=q") (match_operator:DI 1 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]))] "TARGET_64BIT && !TARGET_PARTIAL_REG_STALL" "#" "&& reload_completed" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (zero_extend:DI (match_dup 2)))] { operands[1] = shallow_copy_rtx (operands[1]); PUT_MODE (operands[1], QImode); operands[2] = gen_lowpart (QImode, operands[0]); }) (define_insn_and_split "*setcc__1_and" [(set (match_operand:SWI24 0 "register_operand" "=q") (match_operator:SWI24 1 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)])) (clobber (reg:CC FLAGS_REG))] "!TARGET_PARTIAL_REG_STALL && TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)" "#" "&& reload_completed" [(set (match_dup 2) (match_dup 1)) (parallel [(set (match_dup 0) (zero_extend:SWI24 (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] { operands[1] = shallow_copy_rtx (operands[1]); PUT_MODE (operands[1], QImode); operands[2] = gen_lowpart (QImode, operands[0]); }) (define_insn_and_split "*setcc__1_movzbl" [(set (match_operand:SWI24 0 "register_operand" "=q") (match_operator:SWI24 1 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]))] "!TARGET_PARTIAL_REG_STALL && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))" "#" "&& reload_completed" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (zero_extend:SWI24 (match_dup 2)))] { operands[1] = shallow_copy_rtx (operands[1]); PUT_MODE (operands[1], QImode); operands[2] = gen_lowpart (QImode, operands[0]); }) (define_insn "*setcc_qi" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") (match_operator:QI 1 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]))] "" "set%C1\t%0" [(set_attr "type" "setcc") (set_attr "mode" "QI")]) (define_insn "*setcc_qi_slp" [(set (strict_low_part (match_operand:QI 0 "register_operand" "+q")) (match_operator:QI 1 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]))] "" "set%C1\t%0" [(set_attr "type" "setcc") (set_attr "mode" "QI")]) ;; In general it is not safe to assume too much about CCmode registers, ;; so simplify-rtx stops when it sees a second one. Under certain ;; conditions this is safe on x86, so help combine not create ;; ;; seta %al ;; testb %al, %al ;; sete %al (define_split [(set (match_operand:QI 0 "nonimmediate_operand") (ne:QI (match_operator 1 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (const_int 0)))] "" [(set (match_dup 0) (match_dup 1))] { operands[1] = shallow_copy_rtx (operands[1]); PUT_MODE (operands[1], QImode); }) (define_split [(set (strict_low_part (match_operand:QI 0 "register_operand")) (ne:QI (match_operator 1 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (const_int 0)))] "" [(set (match_dup 0) (match_dup 1))] { operands[1] = shallow_copy_rtx (operands[1]); PUT_MODE (operands[1], QImode); }) (define_split [(set (match_operand:QI 0 "nonimmediate_operand") (eq:QI (match_operator 1 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (const_int 0)))] "" [(set (match_dup 0) (match_dup 1))] { operands[1] = shallow_copy_rtx (operands[1]); PUT_MODE (operands[1], QImode); PUT_CODE (operands[1], ix86_reverse_condition (GET_CODE (operands[1]), GET_MODE (XEXP (operands[1], 0)))); /* Make sure that (a) the CCmode we have for the flags is strong enough for the reversed compare or (b) we have a valid FP compare. */ if (! ix86_comparison_operator (operands[1], VOIDmode)) FAIL; }) (define_split [(set (strict_low_part (match_operand:QI 0 "register_operand")) (eq:QI (match_operator 1 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (const_int 0)))] "" [(set (match_dup 0) (match_dup 1))] { operands[1] = shallow_copy_rtx (operands[1]); PUT_MODE (operands[1], QImode); PUT_CODE (operands[1], ix86_reverse_condition (GET_CODE (operands[1]), GET_MODE (XEXP (operands[1], 0)))); /* Make sure that (a) the CCmode we have for the flags is strong enough for the reversed compare or (b) we have a valid FP compare. */ if (! ix86_comparison_operator (operands[1], VOIDmode)) FAIL; }) ;; Eliminate redundant compare between set{z,nz} and j{z,nz}: ;; setz %al; test %al,%al; jz <...> -> setz %al; jnz <...> and ;; setnz %al, test %al,%al; jz <...> -> setnz %al; jz <...>. (define_peephole2 [(set (match_operand:QI 0 "nonimmediate_operand") (match_operator:QI 1 "bt_comparison_operator" [(reg:CCZ FLAGS_REG) (const_int 0)])) (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 0) (const_int 0))) (set (pc) (if_then_else (match_operator 2 "bt_comparison_operator" [(reg:CCZ FLAGS_REG) (const_int 0)]) (match_operand 3) (pc)))] "peep2_regno_dead_p (3, FLAGS_REG)" [(set (match_dup 0) (match_op_dup 1 [(reg:CCZ FLAGS_REG) (const_int 0)])) (set (pc) (if_then_else (match_dup 2) (match_dup 3) (pc)))] { if (GET_CODE (operands[1]) == EQ) { operands[2] = shallow_copy_rtx (operands[2]); PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2]))); } }) ;; The SSE store flag instructions saves 0 or 0xffffffff to the result. ;; subsequent logical operations are used to imitate conditional moves. ;; 0xffffffff is NaN, but not in normalized form, so we can't represent ;; it directly. (define_insn "setcc__sse" [(set (match_operand:MODEF 0 "register_operand" "=x,x") (match_operator:MODEF 3 "sse_comparison_operator" [(match_operand:MODEF 1 "register_operand" "0,x") (match_operand:MODEF 2 "nonimmediate_operand" "xm,xjm")]))] "SSE_FLOAT_MODE_P (mode)" "@ cmp%D3\t{%2, %0|%0, %2} vcmp%D3\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "addr" "*,gpr16") (set_attr "type" "ssecmp") (set_attr "length_immediate" "1") (set_attr "prefix" "orig,vex") (set_attr "mode" "")]) (define_insn "setcc_hf_mask" [(set (match_operand:QI 0 "register_operand" "=k") (unspec:QI [(match_operand:HF 1 "register_operand" "v") (match_operand:HF 2 "nonimmediate_operand" "vm") (match_operand:SI 3 "const_0_to_31_operand")] UNSPEC_PCMP))] "TARGET_AVX512FP16" "vcmpsh\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssecmp") (set_attr "prefix" "evex") (set_attr "mode" "HF")]) ;; Basic conditional jump instructions. (define_split [(set (pc) (if_then_else (match_operator 1 "add_comparison_operator" [(not:SWI (match_operand:SWI 2 "register_operand")) (match_operand:SWI 3 "nonimmediate_operand")]) (label_ref (match_operand 0)) (pc)))] "" [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:SWI (match_dup 2) (match_dup 3)) (match_dup 2))) (set (pc) (if_then_else (match_op_dup 1 [(reg:CCC FLAGS_REG) (const_int 0)]) (label_ref (match_operand 0)) (pc)))]) (define_split [(set (pc) (if_then_else (match_operator 1 "shr_comparison_operator" [(match_operand:DI 2 "register_operand") (match_operand 3 "const_int_operand")]) (label_ref (match_operand 0)) (pc)))] "TARGET_64BIT && IN_RANGE (exact_log2 (UINTVAL (operands[3]) + 1), 32, 63)" [(set (reg:CCZ FLAGS_REG) (compare:CCZ (lshiftrt:DI (match_dup 2) (match_dup 4)) (const_int 0))) (set (pc) (if_then_else (match_op_dup 1 [(reg:CCZ FLAGS_REG) (const_int 0)]) (label_ref (match_operand 0)) (pc)))] { enum rtx_code new_code; operands[1] = shallow_copy_rtx (operands[1]); switch (GET_CODE (operands[1])) { case GTU: new_code = NE; break; case LEU: new_code = EQ; break; default: gcc_unreachable (); } PUT_CODE (operands[1], new_code); operands[4] = GEN_INT (exact_log2 (UINTVAL (operands[3]) + 1)); }) ;; We ignore the overflow flag for signed branch instructions. (define_insn "*jcc" [(set (pc) (if_then_else (match_operator 1 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (label_ref (match_operand 0)) (pc)))] "" "%!%+j%C1\t%l0" [(set_attr "type" "ibr") (set_attr "modrm" "0") (set (attr "length") (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -126)) (lt (minus (match_dup 0) (pc)) (const_int 128))) (const_int 2) (const_int 6)))]) ;; In general it is not safe to assume too much about CCmode registers, ;; so simplify-rtx stops when it sees a second one. Under certain ;; conditions this is safe on x86, so help combine not create ;; ;; seta %al ;; testb %al, %al ;; je Lfoo (define_split [(set (pc) (if_then_else (ne (match_operator 0 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (const_int 0)) (label_ref (match_operand 1)) (pc)))] "" [(set (pc) (if_then_else (match_dup 0) (label_ref (match_dup 1)) (pc)))] { operands[0] = shallow_copy_rtx (operands[0]); PUT_MODE (operands[0], VOIDmode); }) (define_split [(set (pc) (if_then_else (eq (match_operator 0 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (const_int 0)) (label_ref (match_operand 1)) (pc)))] "" [(set (pc) (if_then_else (match_dup 0) (label_ref (match_dup 1)) (pc)))] { operands[0] = shallow_copy_rtx (operands[0]); PUT_MODE (operands[0], VOIDmode); PUT_CODE (operands[0], ix86_reverse_condition (GET_CODE (operands[0]), GET_MODE (XEXP (operands[0], 0)))); /* Make sure that (a) the CCmode we have for the flags is strong enough for the reversed compare or (b) we have a valid FP compare. */ if (! ix86_comparison_operator (operands[0], VOIDmode)) FAIL; }) ;; Unconditional and other jump instructions (define_insn "jump" [(set (pc) (label_ref (match_operand 0)))] "" "%!jmp\t%l0" [(set_attr "type" "ibr") (set_attr "modrm" "0") (set (attr "length") (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -126)) (lt (minus (match_dup 0) (pc)) (const_int 128))) (const_int 2) (const_int 5)))]) (define_expand "indirect_jump" [(set (pc) (match_operand 0 "indirect_branch_operand"))] "" { if (TARGET_X32 || TARGET_INDIRECT_BRANCH_REGISTER) operands[0] = convert_memory_address (word_mode, operands[0]); cfun->machine->has_local_indirect_jump = true; }) (define_insn "*indirect_jump" [(set (pc) (match_operand:W 0 "indirect_branch_operand" "rBw"))] "" "* return ix86_output_indirect_jmp (operands[0]);" [(set (attr "type") (if_then_else (match_test "(cfun->machine->indirect_branch_type != indirect_branch_keep)") (const_string "multi") (const_string "ibr"))) (set_attr "length_immediate" "0")]) (define_expand "tablejump" [(parallel [(set (pc) (match_operand 0 "indirect_branch_operand")) (use (label_ref (match_operand 1)))])] "" { /* In PIC mode, the table entries are stored GOT (32-bit) or PC (64-bit) relative. Convert the relative address to an absolute address. */ if (flag_pic) { rtx op0, op1; enum rtx_code code; /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */ if (TARGET_64BIT || TARGET_VXWORKS_RTP) { code = PLUS; op0 = operands[0]; op1 = gen_rtx_LABEL_REF (Pmode, operands[1]); } else if (TARGET_MACHO || HAVE_AS_GOTOFF_IN_DATA) { code = PLUS; op0 = operands[0]; op1 = pic_offset_table_rtx; } else { code = MINUS; op0 = pic_offset_table_rtx; op1 = operands[0]; } operands[0] = expand_simple_binop (Pmode, code, op0, op1, NULL_RTX, 0, OPTAB_DIRECT); } if (TARGET_X32 || TARGET_INDIRECT_BRANCH_REGISTER) operands[0] = convert_memory_address (word_mode, operands[0]); cfun->machine->has_local_indirect_jump = true; }) (define_insn "*tablejump_1" [(set (pc) (match_operand:W 0 "indirect_branch_operand" "rBw")) (use (label_ref (match_operand 1)))] "" "* return ix86_output_indirect_jmp (operands[0]);" [(set (attr "type") (if_then_else (match_test "(cfun->machine->indirect_branch_type != indirect_branch_keep)") (const_string "multi") (const_string "ibr"))) (set_attr "length_immediate" "0")]) ;; Convert setcc + movzbl to xor + setcc if operands don't overlap. (define_peephole2 [(set (match_operand 4 "flags_reg_operand") (match_operand 0)) (set (match_operand:QI 1 "register_operand") (match_operator:QI 2 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)])) (set (match_operand 3 "any_QIreg_operand") (zero_extend (match_dup 1)))] "(peep2_reg_dead_p (3, operands[1]) || operands_match_p (operands[1], operands[3])) && ! reg_overlap_mentioned_p (operands[3], operands[0]) && peep2_regno_dead_p (0, FLAGS_REG)" [(set (match_dup 4) (match_dup 0)) (set (strict_low_part (match_dup 5)) (match_dup 2))] { operands[5] = gen_lowpart (QImode, operands[3]); ix86_expand_clear (operands[3]); }) (define_peephole2 [(parallel [(set (match_operand 5 "flags_reg_operand") (match_operand 0)) (match_operand 4)]) (set (match_operand:QI 1 "register_operand") (match_operator:QI 2 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)])) (set (match_operand 3 "any_QIreg_operand") (zero_extend (match_dup 1)))] "(peep2_reg_dead_p (3, operands[1]) || operands_match_p (operands[1], operands[3])) && ! reg_overlap_mentioned_p (operands[3], operands[0]) && ! reg_overlap_mentioned_p (operands[3], operands[4]) && ! reg_set_p (operands[3], operands[4]) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 5) (match_dup 0)) (match_dup 4)]) (set (strict_low_part (match_dup 6)) (match_dup 2))] { operands[6] = gen_lowpart (QImode, operands[3]); ix86_expand_clear (operands[3]); }) (define_peephole2 [(set (match_operand 6 "flags_reg_operand") (match_operand 0)) (parallel [(set (match_operand 7 "flags_reg_operand") (match_operand 1)) (match_operand 5)]) (set (match_operand:QI 2 "register_operand") (match_operator:QI 3 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)])) (set (match_operand 4 "any_QIreg_operand") (zero_extend (match_dup 2)))] "(peep2_reg_dead_p (4, operands[2]) || operands_match_p (operands[2], operands[4])) && ! reg_overlap_mentioned_p (operands[4], operands[0]) && ! reg_overlap_mentioned_p (operands[4], operands[1]) && ! reg_overlap_mentioned_p (operands[4], operands[5]) && ! reg_set_p (operands[4], operands[5]) && refers_to_regno_p (FLAGS_REG, operands[1], (rtx *)NULL) && peep2_regno_dead_p (0, FLAGS_REG)" [(set (match_dup 6) (match_dup 0)) (parallel [(set (match_dup 7) (match_dup 1)) (match_dup 5)]) (set (strict_low_part (match_dup 8)) (match_dup 3))] { operands[8] = gen_lowpart (QImode, operands[4]); ix86_expand_clear (operands[4]); }) ;; Similar, but match zero extend with andsi3. (define_peephole2 [(set (match_operand 4 "flags_reg_operand") (match_operand 0)) (set (match_operand:QI 1 "register_operand") (match_operator:QI 2 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)])) (parallel [(set (match_operand:SI 3 "any_QIreg_operand") (and:SI (match_dup 3) (const_int 255))) (clobber (reg:CC FLAGS_REG))])] "REGNO (operands[1]) == REGNO (operands[3]) && ! reg_overlap_mentioned_p (operands[3], operands[0]) && peep2_regno_dead_p (0, FLAGS_REG)" [(set (match_dup 4) (match_dup 0)) (set (strict_low_part (match_dup 5)) (match_dup 2))] { operands[5] = gen_lowpart (QImode, operands[3]); ix86_expand_clear (operands[3]); }) (define_peephole2 [(parallel [(set (match_operand 5 "flags_reg_operand") (match_operand 0)) (match_operand 4)]) (set (match_operand:QI 1 "register_operand") (match_operator:QI 2 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)])) (parallel [(set (match_operand 3 "any_QIreg_operand") (zero_extend (match_dup 1))) (clobber (reg:CC FLAGS_REG))])] "(peep2_reg_dead_p (3, operands[1]) || operands_match_p (operands[1], operands[3])) && ! reg_overlap_mentioned_p (operands[3], operands[0]) && ! reg_overlap_mentioned_p (operands[3], operands[4]) && ! reg_set_p (operands[3], operands[4]) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 5) (match_dup 0)) (match_dup 4)]) (set (strict_low_part (match_dup 6)) (match_dup 2))] { operands[6] = gen_lowpart (QImode, operands[3]); ix86_expand_clear (operands[3]); }) (define_peephole2 [(set (match_operand 6 "flags_reg_operand") (match_operand 0)) (parallel [(set (match_operand 7 "flags_reg_operand") (match_operand 1)) (match_operand 5)]) (set (match_operand:QI 2 "register_operand") (match_operator:QI 3 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)])) (parallel [(set (match_operand 4 "any_QIreg_operand") (zero_extend (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] "(peep2_reg_dead_p (4, operands[2]) || operands_match_p (operands[2], operands[4])) && ! reg_overlap_mentioned_p (operands[4], operands[0]) && ! reg_overlap_mentioned_p (operands[4], operands[1]) && ! reg_overlap_mentioned_p (operands[4], operands[5]) && ! reg_set_p (operands[4], operands[5]) && refers_to_regno_p (FLAGS_REG, operands[1], (rtx *)NULL) && peep2_regno_dead_p (0, FLAGS_REG)" [(set (match_dup 6) (match_dup 0)) (parallel [(set (match_dup 7) (match_dup 1)) (match_dup 5)]) (set (strict_low_part (match_dup 8)) (match_dup 3))] { operands[8] = gen_lowpart (QImode, operands[4]); ix86_expand_clear (operands[4]); }) ;; Call instructions. ;; The predicates normally associated with named expanders are not properly ;; checked for calls. This is a bug in the generic code, but it isn't that ;; easy to fix. Ignore it for now and be prepared to fix things up. ;; P6 processors will jump to the address after the decrement when %esp ;; is used as a call operand, so they will execute return address as a code. ;; See Pentium Pro errata 70, Pentium 2 errata A33 and Pentium 3 errata E17. ;; Register constraint for call instruction. (define_mode_attr c [(SI "l") (DI "r")]) ;; Call subroutine returning no value. (define_expand "call" [(call (match_operand:QI 0) (match_operand 1)) (use (match_operand 2))] "" { ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL, false); DONE; }) (define_expand "sibcall" [(call (match_operand:QI 0) (match_operand 1)) (use (match_operand 2))] "" { ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL, true); DONE; }) (define_insn "*call" [(call (mem:QI (match_operand:W 0 "call_insn_operand" "BwBz")) (match_operand 1))] "!SIBLING_CALL_P (insn)" "* return ix86_output_call_insn (insn, operands[0]);" [(set_attr "type" "call")]) ;; This covers both call and sibcall since only GOT slot is allowed. (define_insn "*call_got_x32" [(call (mem:QI (zero_extend:DI (match_operand:SI 0 "GOT_memory_operand" "Bg"))) (match_operand 1))] "TARGET_X32" { rtx fnaddr = gen_const_mem (DImode, XEXP (operands[0], 0)); return ix86_output_call_insn (insn, fnaddr); } [(set_attr "type" "call")]) ;; Since sibcall never returns, we can only use call-clobbered register ;; as GOT base. (define_insn "*sibcall_GOT_32" [(call (mem:QI (mem:SI (plus:SI (match_operand:SI 0 "register_no_elim_operand" "U") (match_operand:SI 1 "GOT32_symbol_operand")))) (match_operand 2))] "!TARGET_MACHO && !TARGET_64BIT && !TARGET_INDIRECT_BRANCH_REGISTER && SIBLING_CALL_P (insn)" { rtx fnaddr = gen_rtx_PLUS (SImode, operands[0], operands[1]); fnaddr = gen_const_mem (SImode, fnaddr); return ix86_output_call_insn (insn, fnaddr); } [(set_attr "type" "call")]) (define_insn "*sibcall" [(call (mem:QI (match_operand:W 0 "sibcall_insn_operand" "UBsBz")) (match_operand 1))] "SIBLING_CALL_P (insn)" "* return ix86_output_call_insn (insn, operands[0]);" [(set_attr "type" "call")]) (define_insn "*sibcall_memory" [(call (mem:QI (match_operand:W 0 "memory_operand" "m")) (match_operand 1)) (unspec [(const_int 0)] UNSPEC_PEEPSIB)] "!TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER" "* return ix86_output_call_insn (insn, operands[0]);" [(set_attr "type" "call")]) (define_peephole2 [(set (match_operand:W 0 "register_operand") (match_operand:W 1 "memory_operand")) (call (mem:QI (match_dup 0)) (match_operand 3))] "!TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER && SIBLING_CALL_P (peep2_next_insn (1)) && !reg_mentioned_p (operands[0], CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))" [(parallel [(call (mem:QI (match_dup 1)) (match_dup 3)) (unspec [(const_int 0)] UNSPEC_PEEPSIB)])]) (define_peephole2 [(set (match_operand:W 0 "register_operand") (match_operand:W 1 "memory_operand")) (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE) (call (mem:QI (match_dup 0)) (match_operand 3))] "!TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER && SIBLING_CALL_P (peep2_next_insn (2)) && !reg_mentioned_p (operands[0], CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))" [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE) (parallel [(call (mem:QI (match_dup 1)) (match_dup 3)) (unspec [(const_int 0)] UNSPEC_PEEPSIB)])]) (define_expand "call_pop" [(parallel [(call (match_operand:QI 0) (match_operand:SI 1)) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_operand:SI 3)))])] "!TARGET_64BIT" { ix86_expand_call (NULL, operands[0], operands[1], operands[2], operands[3], false); DONE; }) (define_insn "*call_pop" [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lBwBz")) (match_operand 1)) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_operand:SI 2 "immediate_operand" "i")))] "!TARGET_64BIT && !SIBLING_CALL_P (insn)" "* return ix86_output_call_insn (insn, operands[0]);" [(set_attr "type" "call")]) (define_insn "*sibcall_pop" [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "UBsBz")) (match_operand 1)) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_operand:SI 2 "immediate_operand" "i")))] "!TARGET_64BIT && SIBLING_CALL_P (insn)" "* return ix86_output_call_insn (insn, operands[0]);" [(set_attr "type" "call")]) (define_insn "*sibcall_pop_memory" [(call (mem:QI (match_operand:SI 0 "memory_operand" "Bs")) (match_operand 1)) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_operand:SI 2 "immediate_operand" "i"))) (unspec [(const_int 0)] UNSPEC_PEEPSIB)] "!TARGET_64BIT" "* return ix86_output_call_insn (insn, operands[0]);" [(set_attr "type" "call")]) (define_peephole2 [(set (match_operand:SI 0 "register_operand") (match_operand:SI 1 "memory_operand")) (parallel [(call (mem:QI (match_dup 0)) (match_operand 3)) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_operand:SI 4 "immediate_operand")))])] "!TARGET_64BIT && SIBLING_CALL_P (peep2_next_insn (1)) && !reg_mentioned_p (operands[0], CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))" [(parallel [(call (mem:QI (match_dup 1)) (match_dup 3)) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_dup 4))) (unspec [(const_int 0)] UNSPEC_PEEPSIB)])]) (define_peephole2 [(set (match_operand:SI 0 "register_operand") (match_operand:SI 1 "memory_operand")) (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE) (parallel [(call (mem:QI (match_dup 0)) (match_operand 3)) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_operand:SI 4 "immediate_operand")))])] "!TARGET_64BIT && SIBLING_CALL_P (peep2_next_insn (2)) && !reg_mentioned_p (operands[0], CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))" [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE) (parallel [(call (mem:QI (match_dup 1)) (match_dup 3)) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_dup 4))) (unspec [(const_int 0)] UNSPEC_PEEPSIB)])]) ;; Combining simple memory jump instruction (define_peephole2 [(set (match_operand:W 0 "register_operand") (match_operand:W 1 "memory_operand")) (set (pc) (match_dup 0))] "!TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER && peep2_reg_dead_p (2, operands[0])" [(set (pc) (match_dup 1))]) ;; Call subroutine, returning value in operand 0 (define_expand "call_value" [(set (match_operand 0) (call (match_operand:QI 1) (match_operand 2))) (use (match_operand 3))] "" { ix86_expand_call (operands[0], operands[1], operands[2], operands[3], NULL, false); DONE; }) (define_expand "sibcall_value" [(set (match_operand 0) (call (match_operand:QI 1) (match_operand 2))) (use (match_operand 3))] "" { ix86_expand_call (operands[0], operands[1], operands[2], operands[3], NULL, true); DONE; }) (define_insn "*call_value" [(set (match_operand 0) (call (mem:QI (match_operand:W 1 "call_insn_operand" "BwBz")) (match_operand 2)))] "!SIBLING_CALL_P (insn)" "* return ix86_output_call_insn (insn, operands[1]);" [(set_attr "type" "callv")]) ;; This covers both call and sibcall since only GOT slot is allowed. (define_insn "*call_value_got_x32" [(set (match_operand 0) (call (mem:QI (zero_extend:DI (match_operand:SI 1 "GOT_memory_operand" "Bg"))) (match_operand 2)))] "TARGET_X32" { rtx fnaddr = gen_const_mem (DImode, XEXP (operands[1], 0)); return ix86_output_call_insn (insn, fnaddr); } [(set_attr "type" "callv")]) ;; Since sibcall never returns, we can only use call-clobbered register ;; as GOT base. (define_insn "*sibcall_value_GOT_32" [(set (match_operand 0) (call (mem:QI (mem:SI (plus:SI (match_operand:SI 1 "register_no_elim_operand" "U") (match_operand:SI 2 "GOT32_symbol_operand")))) (match_operand 3)))] "!TARGET_MACHO && !TARGET_64BIT && !TARGET_INDIRECT_BRANCH_REGISTER && SIBLING_CALL_P (insn)" { rtx fnaddr = gen_rtx_PLUS (SImode, operands[1], operands[2]); fnaddr = gen_const_mem (SImode, fnaddr); return ix86_output_call_insn (insn, fnaddr); } [(set_attr "type" "callv")]) (define_insn "*sibcall_value" [(set (match_operand 0) (call (mem:QI (match_operand:W 1 "sibcall_insn_operand" "UBsBz")) (match_operand 2)))] "SIBLING_CALL_P (insn)" "* return ix86_output_call_insn (insn, operands[1]);" [(set_attr "type" "callv")]) (define_insn "*sibcall_value_memory" [(set (match_operand 0) (call (mem:QI (match_operand:W 1 "memory_operand" "m")) (match_operand 2))) (unspec [(const_int 0)] UNSPEC_PEEPSIB)] "!TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER" "* return ix86_output_call_insn (insn, operands[1]);" [(set_attr "type" "callv")]) (define_peephole2 [(set (match_operand:W 0 "register_operand") (match_operand:W 1 "memory_operand")) (set (match_operand 2) (call (mem:QI (match_dup 0)) (match_operand 3)))] "!TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER && SIBLING_CALL_P (peep2_next_insn (1)) && !reg_mentioned_p (operands[0], CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))" [(parallel [(set (match_dup 2) (call (mem:QI (match_dup 1)) (match_dup 3))) (unspec [(const_int 0)] UNSPEC_PEEPSIB)])]) (define_peephole2 [(set (match_operand:W 0 "register_operand") (match_operand:W 1 "memory_operand")) (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE) (set (match_operand 2) (call (mem:QI (match_dup 0)) (match_operand 3)))] "!TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER && SIBLING_CALL_P (peep2_next_insn (2)) && !reg_mentioned_p (operands[0], CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))" [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE) (parallel [(set (match_dup 2) (call (mem:QI (match_dup 1)) (match_dup 3))) (unspec [(const_int 0)] UNSPEC_PEEPSIB)])]) (define_expand "call_value_pop" [(parallel [(set (match_operand 0) (call (match_operand:QI 1) (match_operand:SI 2))) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_operand:SI 4)))])] "!TARGET_64BIT" { ix86_expand_call (operands[0], operands[1], operands[2], operands[3], operands[4], false); DONE; }) (define_insn "*call_value_pop" [(set (match_operand 0) (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lBwBz")) (match_operand 2))) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_operand:SI 3 "immediate_operand" "i")))] "!TARGET_64BIT && !SIBLING_CALL_P (insn)" "* return ix86_output_call_insn (insn, operands[1]);" [(set_attr "type" "callv")]) (define_insn "*sibcall_value_pop" [(set (match_operand 0) (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "UBsBz")) (match_operand 2))) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_operand:SI 3 "immediate_operand" "i")))] "!TARGET_64BIT && SIBLING_CALL_P (insn)" "* return ix86_output_call_insn (insn, operands[1]);" [(set_attr "type" "callv")]) (define_insn "*sibcall_value_pop_memory" [(set (match_operand 0) (call (mem:QI (match_operand:SI 1 "memory_operand" "m")) (match_operand 2))) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_operand:SI 3 "immediate_operand" "i"))) (unspec [(const_int 0)] UNSPEC_PEEPSIB)] "!TARGET_64BIT" "* return ix86_output_call_insn (insn, operands[1]);" [(set_attr "type" "callv")]) (define_peephole2 [(set (match_operand:SI 0 "register_operand") (match_operand:SI 1 "memory_operand")) (parallel [(set (match_operand 2) (call (mem:QI (match_dup 0)) (match_operand 3))) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_operand:SI 4 "immediate_operand")))])] "!TARGET_64BIT && SIBLING_CALL_P (peep2_next_insn (1)) && !reg_mentioned_p (operands[0], CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))" [(parallel [(set (match_dup 2) (call (mem:QI (match_dup 1)) (match_dup 3))) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_dup 4))) (unspec [(const_int 0)] UNSPEC_PEEPSIB)])]) (define_peephole2 [(set (match_operand:SI 0 "register_operand") (match_operand:SI 1 "memory_operand")) (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE) (parallel [(set (match_operand 2) (call (mem:QI (match_dup 0)) (match_operand 3))) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_operand:SI 4 "immediate_operand")))])] "!TARGET_64BIT && SIBLING_CALL_P (peep2_next_insn (2)) && !reg_mentioned_p (operands[0], CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))" [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE) (parallel [(set (match_dup 2) (call (mem:QI (match_dup 1)) (match_dup 3))) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_dup 4))) (unspec [(const_int 0)] UNSPEC_PEEPSIB)])]) ;; Call subroutine returning any type. (define_expand "untyped_call" [(parallel [(call (match_operand 0) (const_int 0)) (match_operand 1) (match_operand 2)])] "" { int i; /* In order to give reg-stack an easier job in validating two coprocessor registers as containing a possible return value, simply pretend the untyped call returns a complex long double value. We can't use SSE_REGPARM_MAX here since callee is unprototyped and should have the default ABI. */ ix86_expand_call ((TARGET_FLOAT_RETURNS_IN_80387 ? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL), operands[0], const0_rtx, GEN_INT ((TARGET_64BIT ? (ix86_abi == SYSV_ABI ? X86_64_SSE_REGPARM_MAX : X86_64_MS_SSE_REGPARM_MAX) : X86_32_SSE_REGPARM_MAX) - 1), NULL, false); for (i = 0; i < XVECLEN (operands[2], 0); i++) { rtx set = XVECEXP (operands[2], 0, i); emit_move_insn (SET_DEST (set), SET_SRC (set)); } /* The optimizer does not know that the call sets the function value registers we stored in the result block. We avoid problems by claiming that all hard registers are used and clobbered at this point. */ emit_insn (gen_blockage ()); DONE; }) ;; Prologue and epilogue instructions ;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and ;; all of memory. This blocks insns from being moved across this point. (define_insn "blockage" [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)] "" "" [(set_attr "length" "0")]) ;; Do not schedule instructions accessing memory across this point. (define_expand "memory_blockage" [(set (match_dup 0) (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))] "" { operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); MEM_VOLATILE_P (operands[0]) = 1; }) (define_insn "*memory_blockage" [(set (match_operand:BLK 0) (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))] "" "" [(set_attr "length" "0")]) ;; As USE insns aren't meaningful after reload, this is used instead ;; to prevent deleting instructions setting registers for PIC code (define_insn "prologue_use" [(unspec_volatile [(match_operand 0)] UNSPECV_PROLOGUE_USE)] "" "" [(set_attr "length" "0")]) ;; Insn emitted into the body of a function to return from a function. ;; This is only done if the function's epilogue is known to be simple. ;; See comments for ix86_can_use_return_insn_p in i386.cc. (define_expand "return" [(simple_return)] "ix86_can_use_return_insn_p ()" { if (crtl->args.pops_args) { rtx popc = GEN_INT (crtl->args.pops_args); emit_jump_insn (gen_simple_return_pop_internal (popc)); DONE; } }) ;; We need to disable this for TARGET_SEH, as otherwise ;; shrink-wrapped prologue gets enabled too. This might exceed ;; the maximum size of prologue in unwind information. ;; Also disallow shrink-wrapping if using stack slot to pass the ;; static chain pointer - the first instruction has to be pushl %esi ;; and it can't be moved around, as we use alternate entry points ;; in that case. ;; Also disallow for ms_hook_prologue functions which have frame ;; pointer set up in function label which is correctly handled in ;; ix86_expand_{prologue|epligoue}() only. (define_expand "simple_return" [(simple_return)] "!TARGET_SEH && !ix86_static_chain_on_stack && !ix86_function_ms_hook_prologue (cfun->decl)" { if (crtl->args.pops_args) { rtx popc = GEN_INT (crtl->args.pops_args); emit_jump_insn (gen_simple_return_pop_internal (popc)); DONE; } }) (define_insn "simple_return_internal" [(simple_return)] "reload_completed" "* return ix86_output_function_return (false);" [(set_attr "length" "1") (set_attr "atom_unit" "jeu") (set_attr "length_immediate" "0") (set_attr "modrm" "0")]) (define_insn "interrupt_return" [(simple_return) (unspec [(const_int 0)] UNSPEC_INTERRUPT_RETURN)] "reload_completed" { return TARGET_64BIT ? (TARGET_UINTR ? "uiret" : "iretq") : "iret"; }) ;; Used by x86_machine_dependent_reorg to avoid penalty on single byte RET ;; instruction Athlon and K8 have. (define_insn "simple_return_internal_long" [(simple_return) (unspec [(const_int 0)] UNSPEC_REP)] "reload_completed" "* return ix86_output_function_return (true);" [(set_attr "length" "2") (set_attr "atom_unit" "jeu") (set_attr "length_immediate" "0") (set_attr "prefix_rep" "1") (set_attr "modrm" "0")]) (define_insn_and_split "simple_return_pop_internal" [(simple_return) (use (match_operand:SI 0 "const_int_operand"))] "reload_completed" "ret\t%0" "&& cfun->machine->function_return_type != indirect_branch_keep" [(const_int 0)] "ix86_split_simple_return_pop_internal (operands[0]); DONE;" [(set_attr "length" "3") (set_attr "atom_unit" "jeu") (set_attr "length_immediate" "2") (set_attr "modrm" "0")]) (define_expand "simple_return_indirect_internal" [(parallel [(simple_return) (use (match_operand 0 "register_operand"))])]) (define_insn "*simple_return_indirect_internal" [(simple_return) (use (match_operand:W 0 "register_operand" "r"))] "reload_completed" "* return ix86_output_indirect_function_return (operands[0]);" [(set (attr "type") (if_then_else (match_test "(cfun->machine->indirect_branch_type != indirect_branch_keep)") (const_string "multi") (const_string "ibr"))) (set_attr "length_immediate" "0")]) (define_insn "nop" [(const_int 0)] "" "nop" [(set_attr "length" "1") (set_attr "length_immediate" "0") (set_attr "modrm" "0")]) ;; Generate nops. Operand 0 is the number of nops, up to 8. (define_insn "nops" [(unspec_volatile [(match_operand 0 "const_int_operand")] UNSPECV_NOPS)] "reload_completed" { int num = INTVAL (operands[0]); gcc_assert (IN_RANGE (num, 1, 8)); while (num--) fputs ("\tnop\n", asm_out_file); return ""; } [(set (attr "length") (symbol_ref "INTVAL (operands[0])")) (set_attr "length_immediate" "0") (set_attr "modrm" "0")]) ;; Pad to 1 << op0 byte boundary, max skip in op1. Used to avoid ;; branch prediction penalty for the third jump in a 16-byte ;; block on K8. ;; Also it's used to align tight loops which can be fix into 1 cacheline. ;; It can help code prefetch and reduce DSB miss. (define_insn "max_skip_align" [(unspec_volatile [(match_operand 0) (match_operand 1)] UNSPECV_ALIGN)] "" { #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN ASM_OUTPUT_MAX_SKIP_ALIGN (asm_out_file, (int)INTVAL (operands[0]), (int)INTVAL (operands[1])); #else /* It is tempting to use ASM_OUTPUT_ALIGN here, but we don't want to do that. The align insn is used to avoid 3 jump instructions in the row to improve branch prediction and the benefits hardly outweigh the cost of extra 8 nops on the average inserted by full alignment pseudo operation. */ #endif return ""; } [(set_attr "length" "16")]) (define_expand "prologue" [(const_int 0)] "" "ix86_expand_prologue (); DONE;") (define_expand "set_got" [(parallel [(set (match_operand:SI 0 "register_operand") (unspec:SI [(const_int 0)] UNSPEC_SET_GOT)) (clobber (reg:CC FLAGS_REG))])] "!TARGET_64BIT" { if (flag_pic && !TARGET_VXWORKS_RTP) ix86_pc_thunk_call_expanded = true; }) (define_insn "*set_got" [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI [(const_int 0)] UNSPEC_SET_GOT)) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT" "* return output_set_got (operands[0], NULL_RTX);" [(set_attr "type" "multi") (set_attr "length" "12")]) (define_expand "set_got_labelled" [(parallel [(set (match_operand:SI 0 "register_operand") (unspec:SI [(label_ref (match_operand 1))] UNSPEC_SET_GOT)) (clobber (reg:CC FLAGS_REG))])] "!TARGET_64BIT" { if (flag_pic && !TARGET_VXWORKS_RTP) ix86_pc_thunk_call_expanded = true; }) (define_insn "*set_got_labelled" [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI [(label_ref (match_operand 1))] UNSPEC_SET_GOT)) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT" "* return output_set_got (operands[0], operands[1]);" [(set_attr "type" "multi") (set_attr "length" "12")]) (define_insn "set_got_rex64" [(set (match_operand:DI 0 "register_operand" "=r") (unspec:DI [(const_int 0)] UNSPEC_SET_GOT))] "TARGET_64BIT" "lea{q}\t{_GLOBAL_OFFSET_TABLE_(%%rip), %0|%0, _GLOBAL_OFFSET_TABLE_[rip]}" [(set_attr "type" "lea") (set_attr "length_address" "4") (set_attr "mode" "DI")]) (define_insn "set_rip_rex64" [(set (match_operand:DI 0 "register_operand" "=r") (unspec:DI [(label_ref (match_operand 1))] UNSPEC_SET_RIP))] "TARGET_64BIT" "lea{q}\t{%l1(%%rip), %0|%0, %l1[rip]}" [(set_attr "type" "lea") (set_attr "length_address" "4") (set_attr "mode" "DI")]) (define_insn "set_got_offset_rex64" [(set (match_operand:DI 0 "register_operand" "=r") (unspec:DI [(label_ref (match_operand 1))] UNSPEC_SET_GOT_OFFSET))] "TARGET_LP64" "movabs{q}\t{$_GLOBAL_OFFSET_TABLE_-%l1, %0|%0, OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-%l1}" [(set_attr "type" "imov") (set_attr "length_immediate" "0") (set_attr "length_address" "8") (set_attr "mode" "DI")]) (define_expand "epilogue" [(const_int 0)] "" "ix86_expand_epilogue (1); DONE;") (define_expand "sibcall_epilogue" [(const_int 0)] "" "ix86_expand_epilogue (0); DONE;") (define_expand "eh_return" [(use (match_operand 0 "register_operand"))] "" { rtx tmp, sa = EH_RETURN_STACKADJ_RTX, ra = operands[0]; /* Tricky bit: we write the address of the handler to which we will be returning into someone else's stack frame, one word below the stack address we wish to restore. */ tmp = gen_rtx_PLUS (Pmode, arg_pointer_rtx, sa); tmp = plus_constant (Pmode, tmp, -UNITS_PER_WORD); /* Return address is always in word_mode. */ tmp = gen_rtx_MEM (word_mode, tmp); if (GET_MODE (ra) != word_mode) ra = convert_to_mode (word_mode, ra, 1); emit_move_insn (tmp, ra); emit_jump_insn (gen_eh_return_internal ()); emit_barrier (); DONE; }) (define_insn_and_split "eh_return_internal" [(eh_return)] "" "#" "epilogue_completed" [(const_int 0)] "ix86_expand_epilogue (2); DONE;") (define_expand "@leave_" [(parallel [(set (reg:W SP_REG) (plus:W (reg:W BP_REG) (match_dup 0))) (set (reg:W BP_REG) (mem:W (reg:W BP_REG))) (clobber (mem:BLK (scratch)))])] "" "operands[0] = GEN_INT ();") (define_insn "*leave" [(set (reg:SI SP_REG) (plus:SI (reg:SI BP_REG) (const_int 4))) (set (reg:SI BP_REG) (mem:SI (reg:SI BP_REG))) (clobber (mem:BLK (scratch)))] "!TARGET_64BIT" "leave" [(set_attr "type" "leave")]) (define_insn "*leave_rex64" [(set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8))) (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG))) (clobber (mem:BLK (scratch)))] "TARGET_64BIT" "leave" [(set_attr "type" "leave")]) ;; Handle -fsplit-stack. (define_expand "split_stack_prologue" [(const_int 0)] "" { ix86_expand_split_stack_prologue (); DONE; }) ;; In order to support the call/return predictor, we use a return ;; instruction which the middle-end doesn't see. (define_insn "split_stack_return" [(unspec_volatile [(match_operand:SI 0 "const_int_operand")] UNSPECV_SPLIT_STACK_RETURN)] "" { if (operands[0] == const0_rtx) return "ret"; else return "ret\t%0"; } [(set_attr "atom_unit" "jeu") (set_attr "modrm" "0") (set (attr "length") (if_then_else (match_operand:SI 0 "const0_operand") (const_int 1) (const_int 3))) (set (attr "length_immediate") (if_then_else (match_operand:SI 0 "const0_operand") (const_int 0) (const_int 2)))]) ;; If there are operand 0 bytes available on the stack, jump to ;; operand 1. (define_expand "split_stack_space_check" [(set (pc) (if_then_else (ltu (minus (reg SP_REG) (match_operand 0 "register_operand")) (match_dup 2)) (label_ref (match_operand 1)) (pc)))] "" { rtx reg = gen_reg_rtx (Pmode); emit_insn (gen_sub3_insn (reg, stack_pointer_rtx, operands[0])); operands[2] = ix86_split_stack_guard (); ix86_expand_branch (GEU, reg, operands[2], operands[1]); DONE; }) ;; Bit manipulation instructions. (define_expand "ffs2" [(set (match_dup 2) (const_int -1)) (parallel [(set (match_dup 3) (match_dup 4)) (set (match_operand:SWI48 0 "register_operand") (ctz:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")))]) (set (match_dup 0) (if_then_else:SWI48 (eq (match_dup 3) (const_int 0)) (match_dup 2) (match_dup 0))) (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (const_int 1))) (clobber (reg:CC FLAGS_REG))])] "" { machine_mode flags_mode; if (mode == SImode && !TARGET_CMOVE) { emit_insn (gen_ffssi2_no_cmove (operands[0], operands [1])); DONE; } flags_mode = TARGET_BMI ? CCCmode : CCZmode; operands[2] = gen_reg_rtx (mode); operands[3] = gen_rtx_REG (flags_mode, FLAGS_REG); operands[4] = gen_rtx_COMPARE (flags_mode, operands[1], const0_rtx); }) (define_insn_and_split "ffssi2_no_cmove" [(set (match_operand:SI 0 "register_operand" "=r") (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))) (clobber (match_scratch:SI 2 "=&q")) (clobber (reg:CC FLAGS_REG))] "!TARGET_CMOVE" "#" "&& reload_completed" [(parallel [(set (match_dup 4) (match_dup 5)) (set (match_dup 0) (ctz:SI (match_dup 1)))]) (set (strict_low_part (match_dup 3)) (eq:QI (match_dup 4) (const_int 0))) (parallel [(set (match_dup 2) (neg:SI (match_dup 2))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 0) (ior:SI (match_dup 0) (match_dup 2))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))) (clobber (reg:CC FLAGS_REG))])] { machine_mode flags_mode = TARGET_BMI ? CCCmode : CCZmode; operands[3] = gen_lowpart (QImode, operands[2]); operands[4] = gen_rtx_REG (flags_mode, FLAGS_REG); operands[5] = gen_rtx_COMPARE (flags_mode, operands[1], const0_rtx); ix86_expand_clear (operands[2]); }) (define_insn_and_split "*tzcnt_1" [(set (reg:CCC FLAGS_REG) (compare:CCC (match_operand:SWI48 1 "nonimmediate_operand" "rm") (const_int 0))) (set (match_operand:SWI48 0 "register_operand" "=r") (ctz:SWI48 (match_dup 1)))] "TARGET_BMI" "tzcnt{}\t{%1, %0|%0, %1}"; "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed && optimize_function_for_speed_p (cfun) && !reg_mentioned_p (operands[0], operands[1])" [(parallel [(set (reg:CCC FLAGS_REG) (compare:CCC (match_dup 1) (const_int 0))) (set (match_dup 0) (ctz:SWI48 (match_dup 1))) (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])] "ix86_expand_clear (operands[0]);" [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") (set_attr "prefix_rep" "1") (set_attr "btver2_decode" "double") (set_attr "mode" "")]) ; False dependency happens when destination is only updated by tzcnt, ; lzcnt or popcnt. There is no false dependency when destination is ; also used in source. (define_insn "*tzcnt_1_falsedep" [(set (reg:CCC FLAGS_REG) (compare:CCC (match_operand:SWI48 1 "nonimmediate_operand" "rm") (const_int 0))) (set (match_operand:SWI48 0 "register_operand" "=r") (ctz:SWI48 (match_dup 1))) (unspec [(match_operand:SWI48 2 "register_operand" "0")] UNSPEC_INSN_FALSE_DEP)] "TARGET_BMI" "tzcnt{}\t{%1, %0|%0, %1}"; [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") (set_attr "prefix_rep" "1") (set_attr "btver2_decode" "double") (set_attr "mode" "")]) (define_insn "*bsf_1" [(set (reg:CCZ FLAGS_REG) (compare:CCZ (match_operand:SWI48 1 "nonimmediate_operand" "rm") (const_int 0))) (set (match_operand:SWI48 0 "register_operand" "=r") (ctz:SWI48 (match_dup 1)))] "" "bsf{}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") (set_attr "btver2_decode" "double") (set_attr "znver1_decode" "vector") (set_attr "mode" "")]) (define_insn_and_split "ctz2" [(set (match_operand:SWI48 0 "register_operand" "=r") (ctz:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] "" { if (TARGET_BMI) return "tzcnt{}\t{%1, %0|%0, %1}"; else if (optimize_function_for_size_p (cfun)) ; else if (TARGET_CPU_P (GENERIC)) /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */ return "rep%; bsf{}\t{%1, %0|%0, %1}"; return "bsf{}\t{%1, %0|%0, %1}"; } "(TARGET_BMI || TARGET_CPU_P (GENERIC)) && TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed && optimize_function_for_speed_p (cfun) && !reg_mentioned_p (operands[0], operands[1])" [(parallel [(set (match_dup 0) (ctz:SWI48 (match_dup 1))) (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) (clobber (reg:CC FLAGS_REG))])] "ix86_expand_clear (operands[0]);" [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") (set (attr "prefix_rep") (if_then_else (ior (match_test "TARGET_BMI") (and (not (match_test "optimize_function_for_size_p (cfun)")) (match_test "TARGET_CPU_P (GENERIC)"))) (const_string "1") (const_string "0"))) (set_attr "mode" "")]) ; False dependency happens when destination is only updated by tzcnt, ; lzcnt or popcnt. There is no false dependency when destination is ; also used in source. (define_insn "*ctz2_falsedep" [(set (match_operand:SWI48 0 "register_operand" "=r") (ctz:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) (unspec [(match_operand:SWI48 2 "register_operand" "0")] UNSPEC_INSN_FALSE_DEP) (clobber (reg:CC FLAGS_REG))] "" { if (TARGET_BMI) return "tzcnt{}\t{%1, %0|%0, %1}"; else if (TARGET_CPU_P (GENERIC)) /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */ return "rep%; bsf{}\t{%1, %0|%0, %1}"; else gcc_unreachable (); } [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") (set_attr "prefix_rep" "1") (set_attr "mode" "")]) (define_insn_and_split "*ctzsi2_zext" [(set (match_operand:DI 0 "register_operand" "=r") (and:DI (subreg:DI (ctz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")) 0) (const_int 63))) (clobber (reg:CC FLAGS_REG))] "TARGET_BMI && TARGET_64BIT" "tzcnt{l}\t{%1, %k0|%k0, %1}" "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed && optimize_function_for_speed_p (cfun) && !reg_mentioned_p (operands[0], operands[1])" [(parallel [(set (match_dup 0) (and:DI (subreg:DI (ctz:SI (match_dup 1)) 0) (const_int 63))) (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) (clobber (reg:CC FLAGS_REG))])] "ix86_expand_clear (operands[0]);" [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") (set_attr "prefix_rep" "1") (set_attr "mode" "SI")]) ; False dependency happens when destination is only updated by tzcnt, ; lzcnt or popcnt. There is no false dependency when destination is ; also used in source. (define_insn "*ctzsi2_zext_falsedep" [(set (match_operand:DI 0 "register_operand" "=r") (and:DI (subreg:DI (ctz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")) 0) (const_int 63))) (unspec [(match_operand:DI 2 "register_operand" "0")] UNSPEC_INSN_FALSE_DEP) (clobber (reg:CC FLAGS_REG))] "TARGET_BMI && TARGET_64BIT" "tzcnt{l}\t{%1, %k0|%k0, %1}" [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") (set_attr "prefix_rep" "1") (set_attr "mode" "SI")]) (define_insn_and_split "*ctzsidi2_ext" [(set (match_operand:DI 0 "register_operand" "=r") (any_extend:DI (ctz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" { if (TARGET_BMI) return "tzcnt{l}\t{%1, %k0|%k0, %1}"; else if (TARGET_CPU_P (GENERIC) && !optimize_function_for_size_p (cfun)) /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */ return "rep%; bsf{l}\t{%1, %k0|%k0, %1}"; return "bsf{l}\t{%1, %k0|%k0, %1}"; } "(TARGET_BMI || TARGET_CPU_P (GENERIC)) && TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed && optimize_function_for_speed_p (cfun) && !reg_mentioned_p (operands[0], operands[1])" [(parallel [(set (match_dup 0) (any_extend:DI (ctz:SI (match_dup 1)))) (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) (clobber (reg:CC FLAGS_REG))])] "ix86_expand_clear (operands[0]);" [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") (set (attr "prefix_rep") (if_then_else (ior (match_test "TARGET_BMI") (and (not (match_test "optimize_function_for_size_p (cfun)")) (match_test "TARGET_CPU_P (GENERIC)"))) (const_string "1") (const_string "0"))) (set_attr "mode" "SI")]) (define_insn "*ctzsidi2_ext_falsedep" [(set (match_operand:DI 0 "register_operand" "=r") (any_extend:DI (ctz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))) (unspec [(match_operand:DI 2 "register_operand" "0")] UNSPEC_INSN_FALSE_DEP) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" { if (TARGET_BMI) return "tzcnt{l}\t{%1, %k0|%k0, %1}"; else if (TARGET_CPU_P (GENERIC)) /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */ return "rep%; bsf{l}\t{%1, %k0|%k0, %1}"; else gcc_unreachable (); } [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") (set_attr "prefix_rep" "1") (set_attr "mode" "SI")]) (define_insn "bsr_rex64" [(set (reg:CCZ FLAGS_REG) (compare:CCZ (match_operand:DI 1 "nonimmediate_operand" "rm") (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (minus:DI (const_int 63) (clz:DI (match_dup 1))))] "TARGET_64BIT" "bsr{q}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") (set_attr "znver1_decode" "vector") (set_attr "mode" "DI")]) (define_insn "bsr_rex64_1" [(set (match_operand:DI 0 "register_operand" "=r") (minus:DI (const_int 63) (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm")))) (clobber (reg:CC FLAGS_REG))] "!TARGET_LZCNT && TARGET_64BIT" "bsr{q}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") (set_attr "znver1_decode" "vector") (set_attr "mode" "DI")]) (define_insn "bsr_rex64_1_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (minus:SI (const_int 63) (subreg:SI (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm")) 0)))) (clobber (reg:CC FLAGS_REG))] "!TARGET_LZCNT && TARGET_64BIT" "bsr{q}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") (set_attr "znver1_decode" "vector") (set_attr "mode" "DI")]) (define_insn "bsr" [(set (reg:CCZ FLAGS_REG) (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm") (const_int 0))) (set (match_operand:SI 0 "register_operand" "=r") (minus:SI (const_int 31) (clz:SI (match_dup 1))))] "" "bsr{l}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") (set_attr "znver1_decode" "vector") (set_attr "mode" "SI")]) (define_insn "bsr_1" [(set (match_operand:SI 0 "register_operand" "=r") (minus:SI (const_int 31) (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))) (clobber (reg:CC FLAGS_REG))] "!TARGET_LZCNT" "bsr{l}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") (set_attr "znver1_decode" "vector") (set_attr "mode" "SI")]) (define_insn "bsr_zext_1" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (minus:SI (const_int 31) (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))) (clobber (reg:CC FLAGS_REG))] "!TARGET_LZCNT && TARGET_64BIT" "bsr{l}\t{%1, %k0|%k0, %1}" [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") (set_attr "znver1_decode" "vector") (set_attr "mode" "SI")]) ; As bsr is undefined behavior on zero and for other input ; values it is in range 0 to 63, we can optimize away sign-extends. (define_insn_and_split "*bsr_rex64_2" [(set (match_operand:DI 0 "register_operand") (xor:DI (sign_extend:DI (minus:SI (const_int 63) (subreg:SI (clz:DI (match_operand:DI 1 "nonimmediate_operand")) 0))) (const_int 63))) (clobber (reg:CC FLAGS_REG))] "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 1) (const_int 0))) (set (match_dup 2) (minus:DI (const_int 63) (clz:DI (match_dup 1))))]) (parallel [(set (match_dup 0) (zero_extend:DI (xor:SI (match_dup 3) (const_int 63)))) (clobber (reg:CC FLAGS_REG))])] { operands[2] = gen_reg_rtx (DImode); operands[3] = lowpart_subreg (SImode, operands[2], DImode); }) (define_insn_and_split "*bsr_2" [(set (match_operand:DI 0 "register_operand") (sign_extend:DI (xor:SI (minus:SI (const_int 31) (clz:SI (match_operand:SI 1 "nonimmediate_operand"))) (const_int 31)))) (clobber (reg:CC FLAGS_REG))] "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split ()" "#" "&& 1" [(parallel [(set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 1) (const_int 0))) (set (match_dup 2) (minus:SI (const_int 31) (clz:SI (match_dup 1))))]) (parallel [(set (match_dup 0) (zero_extend:DI (xor:SI (match_dup 2) (const_int 31)))) (clobber (reg:CC FLAGS_REG))])] "operands[2] = gen_reg_rtx (SImode);") ; Splitters to optimize 64 - __builtin_clzl (x) or 32 - __builtin_clz (x). ; Again, as for !TARGET_LZCNT CLZ is UB at zero, CLZ is guaranteed to be ; in [0, 63] or [0, 31] range. (define_split [(set (match_operand:SI 0 "register_operand") (minus:SI (match_operand:SI 2 "const_int_operand") (xor:SI (minus:SI (const_int 63) (subreg:SI (clz:DI (match_operand:DI 1 "nonimmediate_operand")) 0)) (const_int 63))))] "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split ()" [(set (match_dup 3) (minus:DI (const_int 63) (clz:DI (match_dup 1)))) (set (match_dup 0) (plus:SI (match_dup 5) (match_dup 4)))] { operands[3] = gen_reg_rtx (DImode); operands[5] = lowpart_subreg (SImode, operands[3], DImode); if (INTVAL (operands[2]) == 63) { emit_insn (gen_bsr_rex64_1_zext (operands[3], operands[1])); emit_move_insn (operands[0], operands[5]); DONE; } operands[4] = gen_int_mode (UINTVAL (operands[2]) - 63, SImode); }) (define_split [(set (match_operand:SI 0 "register_operand") (minus:SI (match_operand:SI 2 "const_int_operand") (xor:SI (minus:SI (const_int 31) (clz:SI (match_operand:SI 1 "nonimmediate_operand"))) (const_int 31))))] "!TARGET_LZCNT && ix86_pre_reload_split ()" [(set (match_dup 3) (minus:SI (const_int 31) (clz:SI (match_dup 1)))) (set (match_dup 0) (plus:SI (match_dup 3) (match_dup 4)))] { if (INTVAL (operands[2]) == 31) { emit_insn (gen_bsr_1 (operands[0], operands[1])); DONE; } operands[3] = gen_reg_rtx (SImode); operands[4] = gen_int_mode (UINTVAL (operands[2]) - 31, SImode); }) (define_split [(set (match_operand:DI 0 "register_operand") (minus:DI (match_operand:DI 2 "const_int_operand") (xor:DI (sign_extend:DI (minus:SI (const_int 63) (subreg:SI (clz:DI (match_operand:DI 1 "nonimmediate_operand")) 0))) (const_int 63))))] "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split () && ((unsigned HOST_WIDE_INT) trunc_int_for_mode (UINTVAL (operands[2]) - 63, SImode) == UINTVAL (operands[2]) - 63)" [(set (match_dup 3) (minus:DI (const_int 63) (clz:DI (match_dup 1)))) (set (match_dup 0) (plus:DI (match_dup 3) (match_dup 4)))] { if (INTVAL (operands[2]) == 63) { emit_insn (gen_bsr_rex64_1 (operands[0], operands[1])); DONE; } operands[3] = gen_reg_rtx (DImode); operands[4] = GEN_INT (UINTVAL (operands[2]) - 63); }) (define_split [(set (match_operand:DI 0 "register_operand") (minus:DI (match_operand:DI 2 "const_int_operand") (sign_extend:DI (xor:SI (minus:SI (const_int 31) (clz:SI (match_operand:SI 1 "nonimmediate_operand"))) (const_int 31)))))] "!TARGET_LZCNT && TARGET_64BIT && ix86_pre_reload_split () && ((unsigned HOST_WIDE_INT) trunc_int_for_mode (UINTVAL (operands[2]) - 31, SImode) == UINTVAL (operands[2]) - 31)" [(set (match_dup 3) (zero_extend:DI (minus:SI (const_int 31) (clz:SI (match_dup 1))))) (set (match_dup 0) (plus:DI (match_dup 3) (match_dup 4)))] { if (INTVAL (operands[2]) == 31) { emit_insn (gen_bsr_zext_1 (operands[0], operands[1])); DONE; } operands[3] = gen_reg_rtx (DImode); operands[4] = GEN_INT (UINTVAL (operands[2]) - 31); }) (define_expand "clz2" [(parallel [(set (reg:CCZ FLAGS_REG) (compare:CCZ (match_operand:SWI48 1 "nonimmediate_operand" "rm") (const_int 0))) (set (match_dup 3) (minus:SWI48 (match_dup 2) (clz:SWI48 (match_dup 1))))]) (parallel [(set (match_operand:SWI48 0 "register_operand") (xor:SWI48 (match_dup 3) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] "" { if (TARGET_LZCNT) { emit_insn (gen_clz2_lzcnt (operands[0], operands[1])); DONE; } operands[2] = GEN_INT (GET_MODE_BITSIZE (mode)-1); operands[3] = gen_reg_rtx (mode); }) (define_insn_and_split "clz2_lzcnt" [(set (match_operand:SWI48 0 "register_operand" "=r") (clz:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] "TARGET_LZCNT" "lzcnt{}\t{%1, %0|%0, %1}" "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed && optimize_function_for_speed_p (cfun) && !reg_mentioned_p (operands[0], operands[1])" [(parallel [(set (match_dup 0) (clz:SWI48 (match_dup 1))) (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) (clobber (reg:CC FLAGS_REG))])] "ix86_expand_clear (operands[0]);" [(set_attr "prefix_rep" "1") (set_attr "type" "bitmanip") (set_attr "mode" "")]) ; False dependency happens when destination is only updated by tzcnt, ; lzcnt or popcnt. There is no false dependency when destination is ; also used in source. (define_insn "*clz2_lzcnt_falsedep" [(set (match_operand:SWI48 0 "register_operand" "=r") (clz:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) (unspec [(match_operand:SWI48 2 "register_operand" "0")] UNSPEC_INSN_FALSE_DEP) (clobber (reg:CC FLAGS_REG))] "TARGET_LZCNT" "lzcnt{}\t{%1, %0|%0, %1}" [(set_attr "prefix_rep" "1") (set_attr "type" "bitmanip") (set_attr "mode" "")]) (define_insn_and_split "*clzsi2_lzcnt_zext" [(set (match_operand:DI 0 "register_operand" "=r") (and:DI (subreg:DI (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")) 0) (const_int 63))) (clobber (reg:CC FLAGS_REG))] "TARGET_LZCNT && TARGET_64BIT" "lzcnt{l}\t{%1, %k0|%k0, %1}" "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed && optimize_function_for_speed_p (cfun) && !reg_mentioned_p (operands[0], operands[1])" [(parallel [(set (match_dup 0) (and:DI (subreg:DI (clz:SI (match_dup 1)) 0) (const_int 63))) (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) (clobber (reg:CC FLAGS_REG))])] "ix86_expand_clear (operands[0]);" [(set_attr "prefix_rep" "1") (set_attr "type" "bitmanip") (set_attr "mode" "SI")]) ; False dependency happens when destination is only updated by tzcnt, ; lzcnt or popcnt. There is no false dependency when destination is ; also used in source. (define_insn "*clzsi2_lzcnt_zext_falsedep" [(set (match_operand:DI 0 "register_operand" "=r") (and:DI (subreg:DI (clz:SI (match_operand:SWI48 1 "nonimmediate_operand" "rm")) 0) (const_int 63))) (unspec [(match_operand:DI 2 "register_operand" "0")] UNSPEC_INSN_FALSE_DEP) (clobber (reg:CC FLAGS_REG))] "TARGET_LZCNT" "lzcnt{l}\t{%1, %k0|%k0, %1}" [(set_attr "prefix_rep" "1") (set_attr "type" "bitmanip") (set_attr "mode" "SI")]) (define_insn_and_split "*clzsi2_lzcnt_zext_2" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))) (clobber (reg:CC FLAGS_REG))] "TARGET_LZCNT && TARGET_64BIT" "lzcnt{l}\t{%1, %k0|%k0, %1}" "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed && optimize_function_for_speed_p (cfun) && !reg_mentioned_p (operands[0], operands[1])" [(parallel [(set (match_dup 0) (zero_extend:DI (clz:SI (match_dup 1)))) (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) (clobber (reg:CC FLAGS_REG))])] "ix86_expand_clear (operands[0]);" [(set_attr "prefix_rep" "1") (set_attr "type" "bitmanip") (set_attr "mode" "SI")]) ; False dependency happens when destination is only updated by tzcnt, ; lzcnt or popcnt. There is no false dependency when destination is ; also used in source. (define_insn "*clzsi2_lzcnt_zext_2_falsedep" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (clz:SI (match_operand:SWI48 1 "nonimmediate_operand" "rm")))) (unspec [(match_operand:DI 2 "register_operand" "0")] UNSPEC_INSN_FALSE_DEP) (clobber (reg:CC FLAGS_REG))] "TARGET_LZCNT" "lzcnt{l}\t{%1, %k0|%k0, %1}" [(set_attr "prefix_rep" "1") (set_attr "type" "bitmanip") (set_attr "mode" "SI")]) (define_int_iterator LT_ZCNT [(UNSPEC_TZCNT "TARGET_BMI") (UNSPEC_LZCNT "TARGET_LZCNT")]) (define_int_attr lt_zcnt [(UNSPEC_TZCNT "tzcnt") (UNSPEC_LZCNT "lzcnt")]) (define_int_attr lt_zcnt_type [(UNSPEC_TZCNT "alu1") (UNSPEC_LZCNT "bitmanip")]) ;; Version of lzcnt/tzcnt that is expanded from intrinsics. This version ;; provides operand size as output when source operand is zero. (define_insn_and_split "_" [(set (match_operand:SWI48 0 "register_operand" "=r") (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT)) (clobber (reg:CC FLAGS_REG))] "" "{}\t{%1, %0|%0, %1}" "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed && optimize_function_for_speed_p (cfun) && !reg_mentioned_p (operands[0], operands[1])" [(parallel [(set (match_dup 0) (unspec:SWI48 [(match_dup 1)] LT_ZCNT)) (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) (clobber (reg:CC FLAGS_REG))])] "ix86_expand_clear (operands[0]);" [(set_attr "type" "") (set_attr "prefix_0f" "1") (set_attr "prefix_rep" "1") (set_attr "mode" "")]) ; False dependency happens when destination is only updated by tzcnt, ; lzcnt or popcnt. There is no false dependency when destination is ; also used in source. (define_insn "*__falsedep" [(set (match_operand:SWI48 0 "register_operand" "=r") (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT)) (unspec [(match_operand:SWI48 2 "register_operand" "0")] UNSPEC_INSN_FALSE_DEP) (clobber (reg:CC FLAGS_REG))] "" "{}\t{%1, %0|%0, %1}" [(set_attr "type" "") (set_attr "prefix_0f" "1") (set_attr "prefix_rep" "1") (set_attr "mode" "")]) (define_insn "_hi" [(set (match_operand:HI 0 "register_operand" "=r") (unspec:HI [(match_operand:HI 1 "nonimmediate_operand" "rm")] LT_ZCNT)) (clobber (reg:CC FLAGS_REG))] "" "{w}\t{%1, %0|%0, %1}" [(set_attr "type" "") (set_attr "prefix_0f" "1") (set_attr "prefix_rep" "1") (set_attr "mode" "HI")]) ;; BMI instructions. (define_insn "bmi_bextr_" [(set (match_operand:SWI48 0 "register_operand" "=r,r") (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "r,m") (match_operand:SWI48 2 "register_operand" "r,r")] UNSPEC_BEXTR)) (clobber (reg:CC FLAGS_REG))] "TARGET_BMI" "bextr\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "bitmanip") (set_attr "btver2_decode" "direct, double") (set_attr "mode" "")]) (define_insn "*bmi_bextr__ccz" [(set (reg:CCZ FLAGS_REG) (compare:CCZ (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "r,m") (match_operand:SWI48 2 "register_operand" "r,r")] UNSPEC_BEXTR) (const_int 0))) (clobber (match_scratch:SWI48 0 "=r,r"))] "TARGET_BMI" "bextr\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "bitmanip") (set_attr "btver2_decode" "direct, double") (set_attr "mode" "")]) (define_insn "*bmi_blsi_" [(set (match_operand:SWI48 0 "register_operand" "=r") (and:SWI48 (neg:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")) (match_dup 1))) (clobber (reg:CC FLAGS_REG))] "TARGET_BMI" "blsi\t{%1, %0|%0, %1}" [(set_attr "type" "bitmanip") (set_attr "btver2_decode" "double") (set_attr "mode" "")]) (define_insn "*bmi_blsi__cmp" [(set (reg FLAGS_REG) (compare (and:SWI48 (neg:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")) (match_dup 1)) (const_int 0))) (set (match_operand:SWI48 0 "register_operand" "=r") (and:SWI48 (neg:SWI48 (match_dup 1)) (match_dup 1)))] "TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)" "blsi\t{%1, %0|%0, %1}" [(set_attr "type" "bitmanip") (set_attr "btver2_decode" "double") (set_attr "mode" "")]) (define_insn "*bmi_blsi__ccno" [(set (reg FLAGS_REG) (compare (and:SWI48 (neg:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")) (match_dup 1)) (const_int 0))) (clobber (match_scratch:SWI48 0 "=r"))] "TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)" "blsi\t{%1, %0|%0, %1}" [(set_attr "type" "bitmanip") (set_attr "btver2_decode" "double") (set_attr "mode" "")]) (define_insn "*bmi_blsmsk_" [(set (match_operand:SWI48 0 "register_operand" "=r") (xor:SWI48 (plus:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm") (const_int -1)) (match_dup 1))) (clobber (reg:CC FLAGS_REG))] "TARGET_BMI" "blsmsk\t{%1, %0|%0, %1}" [(set_attr "type" "bitmanip") (set_attr "btver2_decode" "double") (set_attr "mode" "")]) (define_insn "*bmi_blsr_" [(set (match_operand:SWI48 0 "register_operand" "=r") (and:SWI48 (plus:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm") (const_int -1)) (match_dup 1))) (clobber (reg:CC FLAGS_REG))] "TARGET_BMI" "blsr\t{%1, %0|%0, %1}" [(set_attr "type" "bitmanip") (set_attr "btver2_decode" "double") (set_attr "mode" "")]) (define_insn "*bmi_blsr__cmp" [(set (reg:CCZ FLAGS_REG) (compare:CCZ (and:SWI48 (plus:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm") (const_int -1)) (match_dup 1)) (const_int 0))) (set (match_operand:SWI48 0 "register_operand" "=r") (and:SWI48 (plus:SWI48 (match_dup 1) (const_int -1)) (match_dup 1)))] "TARGET_BMI" "blsr\t{%1, %0|%0, %1}" [(set_attr "type" "bitmanip") (set_attr "btver2_decode" "double") (set_attr "mode" "")]) (define_insn "*bmi_blsr__ccz" [(set (reg:CCZ FLAGS_REG) (compare:CCZ (and:SWI48 (plus:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm") (const_int -1)) (match_dup 1)) (const_int 0))) (clobber (match_scratch:SWI48 0 "=r"))] "TARGET_BMI" "blsr\t{%1, %0|%0, %1}" [(set_attr "type" "bitmanip") (set_attr "btver2_decode" "double") (set_attr "mode" "")]) ;; BMI2 instructions. (define_expand "bmi2_bzhi_3" [(parallel [(set (match_operand:SWI48 0 "register_operand") (if_then_else:SWI48 (ne:QI (match_operand:QI 2 "register_operand") (const_int 0)) (zero_extract:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") (umin:QI (match_dup 2) (match_dup 3)) (const_int 0)) (const_int 0))) (clobber (reg:CC FLAGS_REG))])] "TARGET_BMI2" { operands[2] = gen_lowpart (QImode, operands[2]); operands[3] = GEN_INT ( * BITS_PER_UNIT); }) (define_insn "*bmi2_bzhi_3" [(set (match_operand:SWI48 0 "register_operand" "=r") (if_then_else:SWI48 (ne:QI (match_operand:QI 2 "register_operand" "q") (const_int 0)) (zero_extract:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm") (umin:QI (match_dup 2) (match_operand:QI 3 "const_int_operand")) (const_int 0)) (const_int 0))) (clobber (reg:CC FLAGS_REG))] "TARGET_BMI2 && INTVAL (operands[3]) == * BITS_PER_UNIT" "bzhi\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "bitmanip") (set_attr "prefix" "vex") (set_attr "mode" "")]) (define_insn "*bmi2_bzhi_3_1_ccz" [(set (reg:CCZ FLAGS_REG) (compare:CCZ (if_then_else:SWI48 (ne:QI (match_operand:QI 2 "register_operand" "r") (const_int 0)) (zero_extract:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm") (umin:QI (match_dup 2) (match_operand:QI 3 "const_int_operand")) (const_int 0)) (const_int 0)) (const_int 0))) (clobber (match_scratch:SWI48 0 "=r"))] "TARGET_BMI2 && INTVAL (operands[3]) == * BITS_PER_UNIT" "bzhi\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "bitmanip") (set_attr "prefix" "vex") (set_attr "mode" "")]) (define_insn "*bmi2_bzhi_3_2" [(set (match_operand:SWI48 0 "register_operand" "=r") (and:SWI48 (plus:SWI48 (ashift:SWI48 (const_int 1) (match_operand:QI 2 "register_operand" "r")) (const_int -1)) (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] "TARGET_BMI2" "bzhi\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "bitmanip") (set_attr "prefix" "vex") (set_attr "mode" "")]) (define_insn "*bmi2_bzhi_3_3" [(set (match_operand:SWI48 0 "register_operand" "=r") (and:SWI48 (not:SWI48 (ashift:SWI48 (const_int -1) (match_operand:QI 2 "register_operand" "r"))) (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] "TARGET_BMI2" "bzhi\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "bitmanip") (set_attr "prefix" "vex") (set_attr "mode" "")]) (define_insn "*bmi2_bzhi_zero_extendsidi_4" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (and:SI (plus:SI (ashift:SI (const_int 1) (match_operand:QI 2 "register_operand" "r")) (const_int -1)) (match_operand:SI 1 "nonimmediate_operand" "rm")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && TARGET_BMI2" "bzhi\t{%q2, %q1, %q0|%q0, %q1, %q2}" [(set_attr "type" "bitmanip") (set_attr "prefix" "vex") (set_attr "mode" "DI")]) (define_insn "*bmi2_bzhi_zero_extendsidi_5" [(set (match_operand:DI 0 "register_operand" "=r") (and:DI (zero_extend:DI (plus:SI (ashift:SI (const_int 1) (match_operand:QI 2 "register_operand" "r")) (const_int -1))) (match_operand:DI 1 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && TARGET_BMI2" "bzhi\t{%q2, %q1, %q0|%q0, %q1, %q2}" [(set_attr "type" "bitmanip") (set_attr "prefix" "vex") (set_attr "mode" "DI")]) (define_insn "bmi2_pdep_3" [(set (match_operand:SWI48 0 "register_operand" "=r") (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r") (match_operand:SWI48 2 "nonimmediate_operand" "rm")] UNSPEC_PDEP))] "TARGET_BMI2" "pdep\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "bitmanip") (set_attr "prefix" "vex") (set_attr "mode" "")]) (define_insn "bmi2_pext_3" [(set (match_operand:SWI48 0 "register_operand" "=r") (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r") (match_operand:SWI48 2 "nonimmediate_operand" "rm")] UNSPEC_PEXT))] "TARGET_BMI2" "pext\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "bitmanip") (set_attr "prefix" "vex") (set_attr "mode" "")]) ;; TBM instructions. (define_insn "@tbm_bextri_" [(set (match_operand:SWI48 0 "register_operand" "=r") (zero_extract:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm") (match_operand:QI 2 "const_0_to_255_operand") (match_operand:QI 3 "const_0_to_255_operand"))) (clobber (reg:CC FLAGS_REG))] "TARGET_TBM" { operands[2] = GEN_INT (INTVAL (operands[2]) << 8 | INTVAL (operands[3])); return "bextr\t{%2, %1, %0|%0, %1, %2}"; } [(set_attr "type" "bitmanip") (set_attr "mode" "")]) (define_insn "*tbm_blcfill_" [(set (match_operand:SWI48 0 "register_operand" "=r") (and:SWI48 (plus:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm") (const_int 1)) (match_dup 1))) (clobber (reg:CC FLAGS_REG))] "TARGET_TBM" "blcfill\t{%1, %0|%0, %1}" [(set_attr "type" "bitmanip") (set_attr "mode" "")]) (define_insn "*tbm_blci_" [(set (match_operand:SWI48 0 "register_operand" "=r") (ior:SWI48 (not:SWI48 (plus:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm") (const_int 1))) (match_dup 1))) (clobber (reg:CC FLAGS_REG))] "TARGET_TBM" "blci\t{%1, %0|%0, %1}" [(set_attr "type" "bitmanip") (set_attr "mode" "")]) (define_insn "*tbm_blcic_" [(set (match_operand:SWI48 0 "register_operand" "=r") (and:SWI48 (plus:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm") (const_int 1)) (not:SWI48 (match_dup 1)))) (clobber (reg:CC FLAGS_REG))] "TARGET_TBM" "blcic\t{%1, %0|%0, %1}" [(set_attr "type" "bitmanip") (set_attr "mode" "")]) (define_insn "*tbm_blcmsk_" [(set (match_operand:SWI48 0 "register_operand" "=r") (xor:SWI48 (plus:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm") (const_int 1)) (match_dup 1))) (clobber (reg:CC FLAGS_REG))] "TARGET_TBM" "blcmsk\t{%1, %0|%0, %1}" [(set_attr "type" "bitmanip") (set_attr "mode" "")]) (define_insn "*tbm_blcs_" [(set (match_operand:SWI48 0 "register_operand" "=r") (ior:SWI48 (plus:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm") (const_int 1)) (match_dup 1))) (clobber (reg:CC FLAGS_REG))] "TARGET_TBM" "blcs\t{%1, %0|%0, %1}" [(set_attr "type" "bitmanip") (set_attr "mode" "")]) (define_insn "*tbm_blsfill_" [(set (match_operand:SWI48 0 "register_operand" "=r") (ior:SWI48 (plus:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm") (const_int -1)) (match_dup 1))) (clobber (reg:CC FLAGS_REG))] "TARGET_TBM" "blsfill\t{%1, %0|%0, %1}" [(set_attr "type" "bitmanip") (set_attr "mode" "")]) (define_insn "*tbm_blsic_" [(set (match_operand:SWI48 0 "register_operand" "=r") (ior:SWI48 (plus:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm") (const_int -1)) (not:SWI48 (match_dup 1)))) (clobber (reg:CC FLAGS_REG))] "TARGET_TBM" "blsic\t{%1, %0|%0, %1}" [(set_attr "type" "bitmanip") (set_attr "mode" "")]) (define_insn "*tbm_t1mskc_" [(set (match_operand:SWI48 0 "register_operand" "=r") (ior:SWI48 (plus:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm") (const_int 1)) (not:SWI48 (match_dup 1)))) (clobber (reg:CC FLAGS_REG))] "TARGET_TBM" "t1mskc\t{%1, %0|%0, %1}" [(set_attr "type" "bitmanip") (set_attr "mode" "")]) (define_insn "*tbm_tzmsk_" [(set (match_operand:SWI48 0 "register_operand" "=r") (and:SWI48 (plus:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm") (const_int -1)) (not:SWI48 (match_dup 1)))) (clobber (reg:CC FLAGS_REG))] "TARGET_TBM" "tzmsk\t{%1, %0|%0, %1}" [(set_attr "type" "bitmanip") (set_attr "mode" "")]) (define_insn_and_split "popcount2" [(set (match_operand:SWI48 0 "register_operand" "=r") (popcount:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] "TARGET_POPCNT" { #if TARGET_MACHO return "popcnt\t{%1, %0|%0, %1}"; #else return "popcnt{}\t{%1, %0|%0, %1}"; #endif } "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed && optimize_function_for_speed_p (cfun) && !reg_mentioned_p (operands[0], operands[1])" [(parallel [(set (match_dup 0) (popcount:SWI48 (match_dup 1))) (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) (clobber (reg:CC FLAGS_REG))])] "ix86_expand_clear (operands[0]);" [(set_attr "prefix_rep" "1") (set_attr "type" "bitmanip") (set_attr "mode" "")]) ; False dependency happens when destination is only updated by tzcnt, ; lzcnt or popcnt. There is no false dependency when destination is ; also used in source. (define_insn "*popcount2_falsedep" [(set (match_operand:SWI48 0 "register_operand" "=r") (popcount:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) (unspec [(match_operand:SWI48 2 "register_operand" "0")] UNSPEC_INSN_FALSE_DEP) (clobber (reg:CC FLAGS_REG))] "TARGET_POPCNT" { #if TARGET_MACHO return "popcnt\t{%1, %0|%0, %1}"; #else return "popcnt{}\t{%1, %0|%0, %1}"; #endif } [(set_attr "prefix_rep" "1") (set_attr "type" "bitmanip") (set_attr "mode" "")]) (define_insn_and_split "*popcountsi2_zext" [(set (match_operand:DI 0 "register_operand" "=r") (and:DI (subreg:DI (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm")) 0) (const_int 63))) (clobber (reg:CC FLAGS_REG))] "TARGET_POPCNT && TARGET_64BIT" { #if TARGET_MACHO return "popcnt\t{%1, %k0|%k0, %1}"; #else return "popcnt{l}\t{%1, %k0|%k0, %1}"; #endif } "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed && optimize_function_for_speed_p (cfun) && !reg_mentioned_p (operands[0], operands[1])" [(parallel [(set (match_dup 0) (and:DI (subreg:DI (popcount:SI (match_dup 1)) 0) (const_int 63))) (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) (clobber (reg:CC FLAGS_REG))])] "ix86_expand_clear (operands[0]);" [(set_attr "prefix_rep" "1") (set_attr "type" "bitmanip") (set_attr "mode" "SI")]) ; False dependency happens when destination is only updated by tzcnt, ; lzcnt or popcnt. There is no false dependency when destination is ; also used in source. (define_insn "*popcountsi2_zext_falsedep" [(set (match_operand:DI 0 "register_operand" "=r") (and:DI (subreg:DI (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm")) 0) (const_int 63))) (unspec [(match_operand:DI 2 "register_operand" "0")] UNSPEC_INSN_FALSE_DEP) (clobber (reg:CC FLAGS_REG))] "TARGET_POPCNT && TARGET_64BIT" { #if TARGET_MACHO return "popcnt\t{%1, %k0|%k0, %1}"; #else return "popcnt{l}\t{%1, %k0|%k0, %1}"; #endif } [(set_attr "prefix_rep" "1") (set_attr "type" "bitmanip") (set_attr "mode" "SI")]) (define_insn_and_split "*popcountsi2_zext_2" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))) (clobber (reg:CC FLAGS_REG))] "TARGET_POPCNT && TARGET_64BIT" { #if TARGET_MACHO return "popcnt\t{%1, %k0|%k0, %1}"; #else return "popcnt{l}\t{%1, %k0|%k0, %1}"; #endif } "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed && optimize_function_for_speed_p (cfun) && !reg_mentioned_p (operands[0], operands[1])" [(parallel [(set (match_dup 0) (zero_extend:DI (popcount:SI (match_dup 1)))) (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) (clobber (reg:CC FLAGS_REG))])] "ix86_expand_clear (operands[0]);" [(set_attr "prefix_rep" "1") (set_attr "type" "bitmanip") (set_attr "mode" "SI")]) ; False dependency happens when destination is only updated by tzcnt, ; lzcnt or popcnt. There is no false dependency when destination is ; also used in source. (define_insn "*popcountsi2_zext_2_falsedep" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))) (unspec [(match_operand:DI 2 "register_operand" "0")] UNSPEC_INSN_FALSE_DEP) (clobber (reg:CC FLAGS_REG))] "TARGET_POPCNT && TARGET_64BIT" { #if TARGET_MACHO return "popcnt\t{%1, %k0|%k0, %1}"; #else return "popcnt{l}\t{%1, %k0|%k0, %1}"; #endif } [(set_attr "prefix_rep" "1") (set_attr "type" "bitmanip") (set_attr "mode" "SI")]) (define_insn_and_split "*popcounthi2_1" [(set (match_operand:SI 0 "register_operand") (popcount:SI (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand")))) (clobber (reg:CC FLAGS_REG))] "TARGET_POPCNT && ix86_pre_reload_split ()" "#" "&& 1" [(const_int 0)] { rtx tmp = gen_reg_rtx (HImode); emit_insn (gen_popcounthi2 (tmp, operands[1])); emit_insn (gen_zero_extendhisi2 (operands[0], tmp)); DONE; }) (define_insn_and_split "*popcounthi2_2" [(set (match_operand:SI 0 "register_operand") (zero_extend:SI (popcount:HI (match_operand:HI 1 "nonimmediate_operand")))) (clobber (reg:CC FLAGS_REG))] "TARGET_POPCNT && ix86_pre_reload_split ()" "#" "&& 1" [(const_int 0)] { rtx tmp = gen_reg_rtx (HImode); emit_insn (gen_popcounthi2 (tmp, operands[1])); emit_insn (gen_zero_extendhisi2 (operands[0], tmp)); DONE; }) (define_insn "popcounthi2" [(set (match_operand:HI 0 "register_operand" "=r") (popcount:HI (match_operand:HI 1 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] "TARGET_POPCNT" { #if TARGET_MACHO return "popcnt\t{%1, %0|%0, %1}"; #else return "popcnt{w}\t{%1, %0|%0, %1}"; #endif } [(set_attr "prefix_rep" "1") (set_attr "type" "bitmanip") (set_attr "mode" "HI")]) (define_expand "bswapdi2" [(set (match_operand:DI 0 "register_operand") (bswap:DI (match_operand:DI 1 "nonimmediate_operand")))] "TARGET_64BIT" { if (!TARGET_MOVBE) operands[1] = force_reg (DImode, operands[1]); }) (define_expand "bswapsi2" [(set (match_operand:SI 0 "register_operand") (bswap:SI (match_operand:SI 1 "nonimmediate_operand")))] "" { if (TARGET_MOVBE) ; else if (TARGET_BSWAP) operands[1] = force_reg (SImode, operands[1]); else { rtx x = gen_reg_rtx (SImode); emit_insn (gen_bswaphisi2_lowpart (x, operands[1])); emit_insn (gen_rotlsi3 (x, x, GEN_INT (16))); emit_insn (gen_bswaphisi2_lowpart (operands[0], x)); DONE; } }) (define_insn "*bswap2_movbe" [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,r,m") (bswap:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,m,r")))] "TARGET_MOVBE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ bswap\t%0 movbe{}\t{%1, %0|%0, %1} movbe{}\t{%1, %0|%0, %1}" [(set_attr "type" "bitmanip,imov,imov") (set_attr "modrm" "0,1,1") (set_attr "prefix_0f" "*,1,1") (set_attr "prefix_extra" "*,1,1") (set_attr "mode" "")]) (define_insn "*bswap2" [(set (match_operand:SWI48 0 "register_operand" "=r") (bswap:SWI48 (match_operand:SWI48 1 "register_operand" "0")))] "TARGET_BSWAP" "bswap\t%0" [(set_attr "type" "bitmanip") (set_attr "modrm" "0") (set_attr "mode" "")]) (define_expand "bswaphi2" [(set (match_operand:HI 0 "register_operand") (bswap:HI (match_operand:HI 1 "nonimmediate_operand")))] "" { if (!TARGET_MOVBE) operands[1] = force_reg (HImode, operands[1]); }) (define_insn "*bswaphi2_movbe" [(set (match_operand:HI 0 "nonimmediate_operand" "=Q,r,m") (bswap:HI (match_operand:HI 1 "nonimmediate_operand" "0,m,r")))] "TARGET_MOVBE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ xchg{b}\t{%h0, %b0|%b0, %h0} movbe{w}\t{%1, %0|%0, %1} movbe{w}\t{%1, %0|%0, %1}" [(set_attr "type" "imov") (set_attr "modrm" "*,1,1") (set_attr "prefix_0f" "*,1,1") (set_attr "prefix_extra" "*,1,1") (set_attr "pent_pair" "np,*,*") (set_attr "athlon_decode" "vector,*,*") (set_attr "amdfam10_decode" "double,*,*") (set_attr "bdver1_decode" "double,*,*") (set_attr "mode" "QI,HI,HI")]) (define_insn "*bswaphi2" [(set (match_operand:HI 0 "register_operand" "=Q") (bswap:HI (match_operand:HI 1 "register_operand" "0")))] "!TARGET_MOVBE" "xchg{b}\t{%h0, %b0|%b0, %h0}" [(set_attr "type" "imov") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") (set_attr "amdfam10_decode" "double") (set_attr "bdver1_decode" "double") (set_attr "mode" "QI")]) (define_peephole2 [(set (match_operand:HI 0 "general_reg_operand") (bswap:HI (match_dup 0)))] "!(TARGET_USE_XCHGB || TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (rotate:HI (match_dup 0) (const_int 8))) (clobber (reg:CC FLAGS_REG))])]) (define_insn "bswaphisi2_lowpart" [(set (match_operand:SI 0 "register_operand" "=Q") (ior:SI (and:SI (match_operand:SI 1 "register_operand" "0") (const_int -65536)) (lshiftrt:SI (bswap:SI (match_dup 1)) (const_int 16))))] "" "xchg{b}\t{%h0, %b0|%b0, %h0}" [(set_attr "type" "imov") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") (set_attr "amdfam10_decode" "double") (set_attr "bdver1_decode" "double") (set_attr "mode" "QI")]) (define_peephole2 [(set (match_operand:SI 0 "general_reg_operand") (ior:SI (and:SI (match_dup 0) (const_int -65536)) (lshiftrt:SI (bswap:SI (match_dup 0)) (const_int 16))))] "!(TARGET_USE_XCHGB || TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (strict_low_part (match_dup 0)) (rotate:HI (match_dup 0) (const_int 8))) (clobber (reg:CC FLAGS_REG))])] "operands[0] = gen_lowpart (HImode, operands[0]);") (define_expand "paritydi2" [(set (match_operand:DI 0 "register_operand") (parity:DI (match_operand:DI 1 "register_operand")))] "! TARGET_POPCNT" { rtx scratch = gen_reg_rtx (QImode); rtx hipart1 = gen_reg_rtx (SImode); rtx lopart1 = gen_reg_rtx (SImode); rtx xor1 = gen_reg_rtx (SImode); rtx shift2 = gen_reg_rtx (SImode); rtx hipart2 = gen_reg_rtx (HImode); rtx lopart2 = gen_reg_rtx (HImode); rtx xor2 = gen_reg_rtx (HImode); if (TARGET_64BIT) { rtx shift1 = gen_reg_rtx (DImode); emit_insn (gen_lshrdi3 (shift1, operands[1], GEN_INT (32))); emit_move_insn (hipart1, gen_lowpart (SImode, shift1)); } else emit_move_insn (hipart1, gen_highpart (SImode, operands[1])); emit_move_insn (lopart1, gen_lowpart (SImode, operands[1])); emit_insn (gen_xorsi3 (xor1, hipart1, lopart1)); emit_insn (gen_lshrsi3 (shift2, xor1, GEN_INT (16))); emit_move_insn (hipart2, gen_lowpart (HImode, shift2)); emit_move_insn (lopart2, gen_lowpart (HImode, xor1)); emit_insn (gen_xorhi3 (xor2, hipart2, lopart2)); emit_insn (gen_parityhi2_cmp (xor2)); ix86_expand_setcc (scratch, ORDERED, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx); if (TARGET_64BIT) emit_insn (gen_zero_extendqidi2 (operands[0], scratch)); else { rtx tmp = gen_reg_rtx (SImode); emit_insn (gen_zero_extendqisi2 (tmp, scratch)); emit_insn (gen_zero_extendsidi2 (operands[0], tmp)); } DONE; }) (define_expand "paritysi2" [(set (match_operand:SI 0 "register_operand") (parity:SI (match_operand:SI 1 "register_operand")))] "! TARGET_POPCNT" { rtx scratch = gen_reg_rtx (QImode); rtx shift = gen_reg_rtx (SImode); rtx hipart = gen_reg_rtx (HImode); rtx lopart = gen_reg_rtx (HImode); rtx tmp = gen_reg_rtx (HImode); emit_insn (gen_lshrsi3 (shift, operands[1], GEN_INT (16))); emit_move_insn (hipart, gen_lowpart (HImode, shift)); emit_move_insn (lopart, gen_lowpart (HImode, operands[1])); emit_insn (gen_xorhi3 (tmp, hipart, lopart)); emit_insn (gen_parityhi2_cmp (tmp)); ix86_expand_setcc (scratch, ORDERED, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx); emit_insn (gen_zero_extendqisi2 (operands[0], scratch)); DONE; }) (define_expand "parityhi2" [(set (match_operand:HI 0 "register_operand") (parity:HI (match_operand:HI 1 "register_operand")))] "! TARGET_POPCNT" { rtx scratch = gen_reg_rtx (QImode); rtx tmp = gen_reg_rtx (HImode); emit_move_insn (tmp, operands[1]); emit_insn (gen_parityhi2_cmp (tmp)); ix86_expand_setcc (scratch, ORDERED, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx); emit_insn (gen_zero_extendqihi2 (operands[0], scratch)); DONE; }) (define_expand "parityqi2" [(set (match_operand:QI 0 "register_operand") (parity:QI (match_operand:QI 1 "register_operand")))] "! TARGET_POPCNT" { emit_insn (gen_parityqi2_cmp (operands[1])); ix86_expand_setcc (operands[0], ORDERED, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx); DONE; }) (define_insn "parityhi2_cmp" [(set (reg:CC FLAGS_REG) (unspec:CC [(match_operand:HI 0 "register_operand" "+Q")] UNSPEC_PARITY)) (clobber (match_dup 0))] "" "xor{b}\t{%h0, %b0|%b0, %h0}" [(set_attr "length" "2") (set_attr "mode" "QI")]) (define_insn "parityqi2_cmp" [(set (reg:CC FLAGS_REG) (unspec:CC [(match_operand:QI 0 "register_operand" "q")] UNSPEC_PARITY))] "" "test{b}\t%0, %0" [(set_attr "mode" "QI")]) ;; Replace zero_extend:HI followed by parityhi2_cmp with parityqi2_cmp (define_peephole2 [(set (match_operand:HI 0 "register_operand") (zero_extend:HI (match_operand:QI 1 "general_reg_operand"))) (parallel [(set (reg:CC FLAGS_REG) (unspec:CC [(match_dup 0)] UNSPEC_PARITY)) (clobber (match_dup 0))])] "" [(set (reg:CC FLAGS_REG) (unspec:CC [(match_dup 1)] UNSPEC_PARITY))]) ;; Eliminate QImode popcount&1 using parity flag (define_peephole2 [(set (match_operand:SI 0 "register_operand") (zero_extend:SI (match_operand:QI 1 "general_reg_operand"))) (parallel [(set (match_operand:SI 2 "register_operand") (popcount:SI (match_dup 0))) (clobber (reg:CC FLAGS_REG))]) (set (reg:CCZ FLAGS_REG) (compare:CCZ (and:QI (match_operand:QI 3 "register_operand") (const_int 1)) (const_int 0))) (set (pc) (if_then_else (match_operator 4 "bt_comparison_operator" [(reg:CCZ FLAGS_REG) (const_int 0)]) (label_ref (match_operand 5)) (pc)))] "REGNO (operands[2]) == REGNO (operands[3]) && peep2_reg_dead_p (3, operands[0]) && peep2_reg_dead_p (3, operands[2]) && peep2_regno_dead_p (4, FLAGS_REG)" [(set (reg:CC FLAGS_REG) (unspec:CC [(match_dup 1)] UNSPEC_PARITY)) (set (pc) (if_then_else (match_op_dup 4 [(reg:CC FLAGS_REG) (const_int 0)]) (label_ref (match_dup 5)) (pc)))] { operands[4] = shallow_copy_rtx (operands[4]); PUT_CODE (operands[4], GET_CODE (operands[4]) == EQ ? UNORDERED : ORDERED); }) ;; Eliminate HImode popcount&1 using parity flag (define_peephole2 [(match_scratch:HI 0 "Q") (parallel [(set (match_operand:HI 1 "register_operand") (popcount:HI (match_operand:HI 2 "nonimmediate_operand"))) (clobber (reg:CC FLAGS_REG))]) (set (match_operand 3 "register_operand") (zero_extend (match_dup 1))) (set (reg:CCZ FLAGS_REG) (compare:CCZ (and:QI (match_operand:QI 4 "register_operand") (const_int 1)) (const_int 0))) (set (pc) (if_then_else (match_operator 5 "bt_comparison_operator" [(reg:CCZ FLAGS_REG) (const_int 0)]) (label_ref (match_operand 6)) (pc)))] "REGNO (operands[3]) == REGNO (operands[4]) && peep2_reg_dead_p (3, operands[1]) && peep2_reg_dead_p (3, operands[3]) && peep2_regno_dead_p (4, FLAGS_REG)" [(set (match_dup 0) (match_dup 2)) (parallel [(set (reg:CC FLAGS_REG) (unspec:CC [(match_dup 0)] UNSPEC_PARITY)) (clobber (match_dup 0))]) (set (pc) (if_then_else (match_op_dup 5 [(reg:CC FLAGS_REG) (const_int 0)]) (label_ref (match_dup 6)) (pc)))] { operands[5] = shallow_copy_rtx (operands[5]); PUT_CODE (operands[5], GET_CODE (operands[5]) == EQ ? UNORDERED : ORDERED); }) ;; Eliminate HImode popcount&1 using parity flag (variant 2) (define_peephole2 [(match_scratch:HI 0 "Q") (parallel [(set (match_operand:HI 1 "register_operand") (popcount:HI (match_operand:HI 2 "nonimmediate_operand"))) (clobber (reg:CC FLAGS_REG))]) (set (reg:CCZ FLAGS_REG) (compare:CCZ (and:QI (match_operand:QI 3 "register_operand") (const_int 1)) (const_int 0))) (set (pc) (if_then_else (match_operator 4 "bt_comparison_operator" [(reg:CCZ FLAGS_REG) (const_int 0)]) (label_ref (match_operand 5)) (pc)))] "REGNO (operands[1]) == REGNO (operands[3]) && peep2_reg_dead_p (2, operands[1]) && peep2_reg_dead_p (2, operands[3]) && peep2_regno_dead_p (3, FLAGS_REG)" [(set (match_dup 0) (match_dup 2)) (parallel [(set (reg:CC FLAGS_REG) (unspec:CC [(match_dup 0)] UNSPEC_PARITY)) (clobber (match_dup 0))]) (set (pc) (if_then_else (match_op_dup 4 [(reg:CC FLAGS_REG) (const_int 0)]) (label_ref (match_dup 5)) (pc)))] { operands[4] = shallow_copy_rtx (operands[4]); PUT_CODE (operands[4], GET_CODE (operands[4]) == EQ ? UNORDERED : ORDERED); }) ;; Thread-local storage patterns for ELF. ;; ;; Note that these code sequences must appear exactly as shown ;; in order to allow linker relaxation. (define_insn "*tls_global_dynamic_32_gnu" [(set (match_operand:SI 0 "register_operand" "=a") (unspec:SI [(match_operand:SI 1 "register_operand" "Yb") (match_operand 2 "tls_symbolic_operand") (match_operand 3 "constant_call_address_operand" "Bz") (reg:SI SP_REG)] UNSPEC_TLS_GD)) (clobber (match_scratch:SI 4 "=d")) (clobber (match_scratch:SI 5 "=c")) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT && TARGET_GNU_TLS" { if (TARGET_SUN_TLS || flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT) output_asm_insn ("lea{l}\t{%E2@tlsgd(,%1,1), %0|%0, %E2@tlsgd[%1*1]}", operands); else output_asm_insn ("lea{l}\t{%E2@tlsgd(%1), %0|%0, %E2@tlsgd[%1]}", operands); if (TARGET_SUN_TLS) #ifdef HAVE_AS_IX86_TLSGDPLT return "call\t%a2@tlsgdplt"; #else return "call\t%p3@plt"; #endif if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT) return "call\t%P3"; return "call\t{*%p3@GOT(%1)|[DWORD PTR %p3@GOT[%1]]}"; } [(set_attr "type" "multi") (set_attr "length" "12")]) (define_expand "tls_global_dynamic_32" [(parallel [(set (match_operand:SI 0 "register_operand") (unspec:SI [(match_operand:SI 2 "register_operand") (match_operand 1 "tls_symbolic_operand") (match_operand 3 "constant_call_address_operand") (reg:SI SP_REG)] UNSPEC_TLS_GD)) (clobber (scratch:SI)) (clobber (scratch:SI)) (clobber (reg:CC FLAGS_REG))])] "" "ix86_tls_descriptor_calls_expanded_in_cfun = true;") (define_insn "*tls_global_dynamic_64_" [(set (match_operand:P 0 "register_operand" "=a") (call:P (mem:QI (match_operand 2 "constant_call_address_operand" "Bz")) (match_operand 3))) (unspec:P [(match_operand 1 "tls_symbolic_operand") (reg:P SP_REG)] UNSPEC_TLS_GD)] "TARGET_64BIT" { if (!TARGET_X32) /* The .loc directive has effect for 'the immediately following assembly instruction'. So for a sequence: .loc f l .byte x insn1 the 'immediately following assembly instruction' is insn1. We want to emit an insn prefix here, but if we use .byte (as shown in 'ELF Handling For Thread-Local Storage'), a preceding .loc will point inside the insn sequence, rather than to the start. After relaxation of the sequence by the linker, the .loc might point inside an insn. Use data16 prefix instead, which doesn't have this problem. */ fputs ("\tdata16", asm_out_file); output_asm_insn ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands); if (TARGET_SUN_TLS || flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT) fputs (ASM_SHORT "0x6666\n", asm_out_file); else fputs (ASM_BYTE "0x66\n", asm_out_file); fputs ("\trex64\n", asm_out_file); if (TARGET_SUN_TLS) return "call\t%p2@plt"; if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT) return "call\t%P2"; return "call\t{*%p2@GOTPCREL(%%rip)|[QWORD PTR %p2@GOTPCREL[rip]]}"; } [(set_attr "type" "multi") (set (attr "length") (symbol_ref "TARGET_X32 ? 15 : 16"))]) (define_insn "*tls_global_dynamic_64_largepic" [(set (match_operand:DI 0 "register_operand" "=a") (call:DI (mem:QI (plus:DI (match_operand:DI 2 "register_operand" "b") (match_operand:DI 3 "immediate_operand" "i"))) (match_operand 4))) (unspec:DI [(match_operand 1 "tls_symbolic_operand") (reg:DI SP_REG)] UNSPEC_TLS_GD)] "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF && GET_CODE (operands[3]) == CONST && GET_CODE (XEXP (operands[3], 0)) == UNSPEC && XINT (XEXP (operands[3], 0), 1) == UNSPEC_PLTOFF" { output_asm_insn ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands); output_asm_insn ("movabs{q}\t{%3, %%rax|rax, %3}", operands); output_asm_insn ("add{q}\t{%2, %%rax|rax, %2}", operands); return "call\t{*%%rax|rax}"; } [(set_attr "type" "multi") (set_attr "length" "22")]) (define_expand "@tls_global_dynamic_64_" [(parallel [(set (match_operand:P 0 "register_operand") (call:P (mem:QI (match_operand 2)) (const_int 0))) (unspec:P [(match_operand 1 "tls_symbolic_operand") (reg:P SP_REG)] UNSPEC_TLS_GD)])] "TARGET_64BIT" "ix86_tls_descriptor_calls_expanded_in_cfun = true;") (define_insn "*tls_local_dynamic_base_32_gnu" [(set (match_operand:SI 0 "register_operand" "=a") (unspec:SI [(match_operand:SI 1 "register_operand" "Yb") (match_operand 2 "constant_call_address_operand" "Bz") (reg:SI SP_REG)] UNSPEC_TLS_LD_BASE)) (clobber (match_scratch:SI 3 "=d")) (clobber (match_scratch:SI 4 "=c")) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT && TARGET_GNU_TLS" { output_asm_insn ("lea{l}\t{%&@tlsldm(%1), %0|%0, %&@tlsldm[%1]}", operands); if (TARGET_SUN_TLS) { if (HAVE_AS_IX86_TLSLDMPLT) return "call\t%&@tlsldmplt"; else return "call\t%p2@plt"; } if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT) return "call\t%P2"; return "call\t{*%p2@GOT(%1)|[DWORD PTR %p2@GOT[%1]]}"; } [(set_attr "type" "multi") (set_attr "length" "11")]) (define_expand "tls_local_dynamic_base_32" [(parallel [(set (match_operand:SI 0 "register_operand") (unspec:SI [(match_operand:SI 1 "register_operand") (match_operand 2 "constant_call_address_operand") (reg:SI SP_REG)] UNSPEC_TLS_LD_BASE)) (clobber (scratch:SI)) (clobber (scratch:SI)) (clobber (reg:CC FLAGS_REG))])] "" "ix86_tls_descriptor_calls_expanded_in_cfun = true;") (define_insn "*tls_local_dynamic_base_64_" [(set (match_operand:P 0 "register_operand" "=a") (call:P (mem:QI (match_operand 1 "constant_call_address_operand" "Bz")) (match_operand 2))) (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)] "TARGET_64BIT" { output_asm_insn ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands); if (TARGET_SUN_TLS) return "call\t%p1@plt"; if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT) return "call\t%P1"; return "call\t{*%p1@GOTPCREL(%%rip)|[QWORD PTR %p1@GOTPCREL[rip]]}"; } [(set_attr "type" "multi") (set_attr "length" "12")]) (define_insn "*tls_local_dynamic_base_64_largepic" [(set (match_operand:DI 0 "register_operand" "=a") (call:DI (mem:QI (plus:DI (match_operand:DI 1 "register_operand" "b") (match_operand:DI 2 "immediate_operand" "i"))) (match_operand 3))) (unspec:DI [(reg:DI SP_REG)] UNSPEC_TLS_LD_BASE)] "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF && GET_CODE (operands[2]) == CONST && GET_CODE (XEXP (operands[2], 0)) == UNSPEC && XINT (XEXP (operands[2], 0), 1) == UNSPEC_PLTOFF" { output_asm_insn ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands); output_asm_insn ("movabs{q}\t{%2, %%rax|rax, %2}", operands); output_asm_insn ("add{q}\t{%1, %%rax|rax, %1}", operands); return "call\t{*%%rax|rax}"; } [(set_attr "type" "multi") (set_attr "length" "22")]) (define_expand "@tls_local_dynamic_base_64_" [(parallel [(set (match_operand:P 0 "register_operand") (call:P (mem:QI (match_operand 1)) (const_int 0))) (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)])] "TARGET_64BIT" "ix86_tls_descriptor_calls_expanded_in_cfun = true;") ;; Local dynamic of a single variable is a lose. Show combine how ;; to convert that back to global dynamic. (define_insn_and_split "*tls_local_dynamic_32_once" [(set (match_operand:SI 0 "register_operand" "=a") (plus:SI (unspec:SI [(match_operand:SI 1 "register_operand" "b") (match_operand 2 "constant_call_address_operand" "Bz") (reg:SI SP_REG)] UNSPEC_TLS_LD_BASE) (const:SI (unspec:SI [(match_operand 3 "tls_symbolic_operand")] UNSPEC_DTPOFF)))) (clobber (match_scratch:SI 4 "=d")) (clobber (match_scratch:SI 5 "=c")) (clobber (reg:CC FLAGS_REG))] "" "#" "" [(parallel [(set (match_dup 0) (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 2) (reg:SI SP_REG)] UNSPEC_TLS_GD)) (clobber (match_dup 4)) (clobber (match_dup 5)) (clobber (reg:CC FLAGS_REG))])]) ;; Load and add the thread base pointer from %:0. (define_expand "get_thread_pointer" [(set (match_operand:PTR 0 "register_operand") (unspec:PTR [(const_int 0)] UNSPEC_TP))] "" { /* targetm is not visible in the scope of the condition. */ if (!targetm.have_tls) error ("%<__builtin_thread_pointer%> is not supported on this target"); }) (define_insn_and_split "*load_tp_" [(set (match_operand:PTR 0 "register_operand" "=r") (unspec:PTR [(const_int 0)] UNSPEC_TP))] "" "#" "" [(set (match_dup 0) (match_dup 1))] { addr_space_t as = DEFAULT_TLS_SEG_REG; operands[1] = gen_const_mem (mode, const0_rtx); set_mem_addr_space (operands[1], as); }) (define_insn_and_split "*load_tp_x32_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (unspec:SI [(const_int 0)] UNSPEC_TP)))] "TARGET_X32" "#" "&& 1" [(set (match_dup 0) (zero_extend:DI (match_dup 1)))] { addr_space_t as = DEFAULT_TLS_SEG_REG; operands[1] = gen_const_mem (SImode, const0_rtx); set_mem_addr_space (operands[1], as); }) (define_insn_and_split "*add_tp_" [(set (match_operand:PTR 0 "register_operand" "=r") (plus:PTR (unspec:PTR [(const_int 0)] UNSPEC_TP) (match_operand:PTR 1 "register_operand" "0"))) (clobber (reg:CC FLAGS_REG))] "" "#" "" [(parallel [(set (match_dup 0) (plus:PTR (match_dup 1) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] { addr_space_t as = DEFAULT_TLS_SEG_REG; operands[2] = gen_const_mem (mode, const0_rtx); set_mem_addr_space (operands[2], as); }) (define_insn_and_split "*add_tp_x32_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP) (match_operand:SI 1 "register_operand" "0")))) (clobber (reg:CC FLAGS_REG))] "TARGET_X32" "#" "&& 1" [(parallel [(set (match_dup 0) (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2)))) (clobber (reg:CC FLAGS_REG))])] { addr_space_t as = DEFAULT_TLS_SEG_REG; operands[2] = gen_const_mem (SImode, const0_rtx); set_mem_addr_space (operands[2], as); }) ;; The Sun linker took the AMD64 TLS spec literally and can only handle ;; %rax as destination of the initial executable code sequence. (define_insn "tls_initial_exec_64_sun" [(set (match_operand:DI 0 "register_operand" "=a") (unspec:DI [(match_operand 1 "tls_symbolic_operand")] UNSPEC_TLS_IE_SUN)) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && TARGET_SUN_TLS" { output_asm_insn ("mov{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}", operands); return "add{q}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]}"; } [(set_attr "type" "multi")]) ;; GNU2 TLS patterns can be split. (define_expand "tls_dynamic_gnu2_32" [(set (match_dup 3) (plus:SI (match_operand:SI 2 "register_operand") (const:SI (unspec:SI [(match_operand 1 "tls_symbolic_operand")] UNSPEC_TLSDESC)))) (parallel [(set (match_operand:SI 0 "register_operand") (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 2) (reg:SI SP_REG)] UNSPEC_TLSDESC)) (clobber (reg:CC FLAGS_REG))])] "!TARGET_64BIT && TARGET_GNU2_TLS" { operands[3] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0]; ix86_tls_descriptor_calls_expanded_in_cfun = true; }) (define_insn "*tls_dynamic_gnu2_lea_32" [(set (match_operand:SI 0 "register_operand" "=r") (plus:SI (match_operand:SI 1 "register_operand" "b") (const:SI (unspec:SI [(match_operand 2 "tls_symbolic_operand")] UNSPEC_TLSDESC))))] "!TARGET_64BIT && TARGET_GNU2_TLS" "lea{l}\t{%E2@TLSDESC(%1), %0|%0, %E2@TLSDESC[%1]}" [(set_attr "type" "lea") (set_attr "mode" "SI") (set_attr "length" "6") (set_attr "length_address" "4")]) (define_insn "*tls_dynamic_gnu2_call_32" [(set (match_operand:SI 0 "register_operand" "=a") (unspec:SI [(match_operand 1 "tls_symbolic_operand") (match_operand:SI 2 "register_operand" "0") ;; we have to make sure %ebx still points to the GOT (match_operand:SI 3 "register_operand" "b") (reg:SI SP_REG)] UNSPEC_TLSDESC)) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT && TARGET_GNU2_TLS" "call\t{*%a1@TLSCALL(%2)|[DWORD PTR [%2+%a1@TLSCALL]]}" [(set_attr "type" "call") (set_attr "length" "2") (set_attr "length_address" "0")]) (define_insn_and_split "*tls_dynamic_gnu2_combine_32" [(set (match_operand:SI 0 "register_operand" "=&a") (plus:SI (unspec:SI [(match_operand 3 "tls_modbase_operand") (match_operand:SI 4) (match_operand:SI 2 "register_operand" "b") (reg:SI SP_REG)] UNSPEC_TLSDESC) (const:SI (unspec:SI [(match_operand 1 "tls_symbolic_operand")] UNSPEC_DTPOFF)))) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT && TARGET_GNU2_TLS" "#" "&& 1" [(set (match_dup 0) (match_dup 5))] { operands[5] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0]; emit_insn (gen_tls_dynamic_gnu2_32 (operands[5], operands[1], operands[2])); }) (define_expand "@tls_dynamic_gnu2_64_" [(set (match_dup 2) (unspec:PTR [(match_operand 1 "tls_symbolic_operand")] UNSPEC_TLSDESC)) (parallel [(set (match_operand:PTR 0 "register_operand") (unspec:PTR [(match_dup 1) (match_dup 2) (reg:PTR SP_REG)] UNSPEC_TLSDESC)) (clobber (reg:CC FLAGS_REG))])] "TARGET_64BIT && TARGET_GNU2_TLS" { operands[2] = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : operands[0]; ix86_tls_descriptor_calls_expanded_in_cfun = true; }) (define_insn "*tls_dynamic_gnu2_lea_64_" [(set (match_operand:PTR 0 "register_operand" "=r") (unspec:PTR [(match_operand 1 "tls_symbolic_operand")] UNSPEC_TLSDESC))] "TARGET_64BIT && TARGET_GNU2_TLS" "lea%z0\t{%E1@TLSDESC(%%rip), %0|%0, %E1@TLSDESC[rip]}" [(set_attr "type" "lea") (set_attr "mode" "") (set_attr "length" "7") (set_attr "length_address" "4")]) (define_insn "*tls_dynamic_gnu2_call_64_" [(set (match_operand:PTR 0 "register_operand" "=a") (unspec:PTR [(match_operand 1 "tls_symbolic_operand") (match_operand:PTR 2 "register_operand" "0") (reg:PTR SP_REG)] UNSPEC_TLSDESC)) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && TARGET_GNU2_TLS" "call\t{*%a1@TLSCALL(%2)|[QWORD PTR [%2+%a1@TLSCALL]]}" [(set_attr "type" "call") (set_attr "length" "2") (set_attr "length_address" "0")]) (define_insn_and_split "*tls_dynamic_gnu2_combine_64_" [(set (match_operand:PTR 0 "register_operand" "=&a") (plus:PTR (unspec:PTR [(match_operand 2 "tls_modbase_operand") (match_operand:PTR 3) (reg:PTR SP_REG)] UNSPEC_TLSDESC) (const:PTR (unspec:PTR [(match_operand 1 "tls_symbolic_operand")] UNSPEC_DTPOFF)))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && TARGET_GNU2_TLS" "#" "&& 1" [(set (match_dup 0) (match_dup 4))] { operands[4] = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : operands[0]; emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, operands[4], operands[1])); }) (define_split [(match_operand 0 "tls_address_pattern")] "TARGET_TLS_DIRECT_SEG_REFS" [(match_dup 0)] "operands[0] = ix86_rewrite_tls_address (operands[0]);") ;; These patterns match the binary 387 instructions for addM3, subM3, ;; mulM3 and divM3. There are three patterns for each of DFmode and ;; SFmode. The first is the normal insn, the second the same insn but ;; with one operand a conversion, and the third the same insn but with ;; the other operand a conversion. The conversion may be SFmode or ;; SImode if the target mode DFmode, but only SImode if the target mode ;; is SFmode. ;; Gcc is slightly more smart about handling normal two address instructions ;; so use special patterns for add and mull. (define_insn "*fop_xf_comm_i387" [(set (match_operand:XF 0 "register_operand" "=f") (match_operator:XF 3 "binary_fp_operator" [(match_operand:XF 1 "register_operand" "%0") (match_operand:XF 2 "register_operand" "f")]))] "TARGET_80387 && COMMUTATIVE_ARITH_P (operands[3])" "* return output_387_binary_op (insn, operands);" [(set (attr "type") (if_then_else (match_operand:XF 3 "mult_operator") (const_string "fmul") (const_string "fop"))) (set_attr "mode" "XF")]) (define_insn "*fop__comm" [(set (match_operand:MODEF 0 "register_operand" "=f,x,v") (match_operator:MODEF 3 "binary_fp_operator" [(match_operand:MODEF 1 "nonimmediate_operand" "%0,0,v") (match_operand:MODEF 2 "nonimmediate_operand" "fm,xm,vm")]))] "((SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || (TARGET_80387 && X87_ENABLE_ARITH (mode))) && COMMUTATIVE_ARITH_P (operands[3]) && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "* return output_387_binary_op (insn, operands);" [(set (attr "type") (if_then_else (eq_attr "alternative" "1,2") (if_then_else (match_operand:MODEF 3 "mult_operator") (const_string "ssemul") (const_string "sseadd")) (if_then_else (match_operand:MODEF 3 "mult_operator") (const_string "fmul") (const_string "fop")))) (set_attr "isa" "*,noavx,avx") (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "") (set (attr "enabled") (if_then_else (match_test ("SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH")) (if_then_else (eq_attr "alternative" "0") (symbol_ref "TARGET_MIX_SSE_I387 && X87_ENABLE_ARITH (mode)") (const_string "*")) (if_then_else (eq_attr "alternative" "0") (symbol_ref "true") (symbol_ref "false"))))]) (define_insn "*hf" [(set (match_operand:HF 0 "register_operand" "=v") (plusminusmultdiv:HF (match_operand:HF 1 "nonimmediate_operand" "v") (match_operand:HF 2 "nonimmediate_operand" "vm")))] "TARGET_AVX512FP16 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "vsh\t{%2, %1, %0|%0, %1, %2}" [(set_attr "prefix" "evex") (set_attr "mode" "HF")]) (define_insn "*rcpsf2_sse" [(set (match_operand:SF 0 "register_operand" "=x,x,x,x") (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "0,x,m,ja")] UNSPEC_RCP))] "TARGET_SSE && TARGET_SSE_MATH" "@ %vrcpss\t{%d1, %0|%0, %d1} %vrcpss\t{%d1, %0|%0, %d1} rcpss\t{%1, %d0|%d0, %1} vrcpss\t{%1, %d0|%d0, %1}" [(set_attr "isa" "*,*,noavx,avx") (set_attr "addr" "*,*,*,gpr16") (set_attr "type" "sse") (set_attr "atom_sse_attr" "rcp") (set_attr "btver2_sse_attr" "rcp") (set_attr "prefix" "maybe_vex") (set_attr "mode" "SF") (set_attr "avx_partial_xmm_update" "false,false,true,true") (set (attr "preferred_for_speed") (cond [(match_test "TARGET_AVX") (symbol_ref "true") (eq_attr "alternative" "1,2,3") (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY") ] (symbol_ref "true")))]) (define_insn "rcphf2" [(set (match_operand:HF 0 "register_operand" "=v,v") (unspec:HF [(match_operand:HF 1 "nonimmediate_operand" "v,m")] UNSPEC_RCP))] "TARGET_AVX512FP16" "@ vrcpsh\t{%d1, %0|%0, %d1} vrcpsh\t{%1, %d0|%d0, %1}" [(set_attr "type" "sse") (set_attr "prefix" "evex") (set_attr "mode" "HF") (set_attr "avx_partial_xmm_update" "false,true")]) (define_insn "*fop_xf_1_i387" [(set (match_operand:XF 0 "register_operand" "=f,f") (match_operator:XF 3 "binary_fp_operator" [(match_operand:XF 1 "register_operand" "0,f") (match_operand:XF 2 "register_operand" "f,0")]))] "TARGET_80387 && !COMMUTATIVE_ARITH_P (operands[3])" "* return output_387_binary_op (insn, operands);" [(set (attr "type") (if_then_else (match_operand:XF 3 "div_operator") (const_string "fdiv") (const_string "fop"))) (set_attr "mode" "XF")]) (define_insn "*fop__1" [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,v") (match_operator:MODEF 3 "binary_fp_operator" [(match_operand:MODEF 1 "x87nonimm_ssenomem_operand" "0,fm,0,v") (match_operand:MODEF 2 "nonimmediate_operand" "fm,0,xm,vm")]))] "((SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || (TARGET_80387 && X87_ENABLE_ARITH (mode))) && !COMMUTATIVE_ARITH_P (operands[3]) && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "* return output_387_binary_op (insn, operands);" [(set (attr "type") (if_then_else (eq_attr "alternative" "2,3") (if_then_else (match_operand:MODEF 3 "div_operator") (const_string "ssediv") (const_string "sseadd")) (if_then_else (match_operand:MODEF 3 "div_operator") (const_string "fdiv") (const_string "fop")))) (set_attr "isa" "*,*,noavx,avx") (set_attr "prefix" "orig,orig,orig,vex") (set_attr "mode" "") (set (attr "enabled") (if_then_else (match_test ("SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH")) (if_then_else (eq_attr "alternative" "0,1") (symbol_ref "TARGET_MIX_SSE_I387 && X87_ENABLE_ARITH (mode)") (const_string "*")) (if_then_else (eq_attr "alternative" "0,1") (symbol_ref "true") (symbol_ref "false"))))]) (define_insn "*fop__2_i387" [(set (match_operand:X87MODEF 0 "register_operand" "=f") (match_operator:X87MODEF 3 "binary_fp_operator" [(float:X87MODEF (match_operand:SWI24 1 "nonimmediate_operand" "m")) (match_operand:X87MODEF 2 "register_operand" "0")]))] "TARGET_80387 && X87_ENABLE_FLOAT (mode, mode) && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) && (TARGET_USE_MODE_FIOP || optimize_function_for_size_p (cfun))" "* return output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:X87MODEF 3 "mult_operator") (const_string "fmul") (match_operand:X87MODEF 3 "div_operator") (const_string "fdiv") ] (const_string "fop"))) (set_attr "fp_int_src" "true") (set_attr "mode" "")]) (define_insn "*fop__3_i387" [(set (match_operand:X87MODEF 0 "register_operand" "=f") (match_operator:X87MODEF 3 "binary_fp_operator" [(match_operand:X87MODEF 1 "register_operand" "0") (float:X87MODEF (match_operand:SWI24 2 "nonimmediate_operand" "m"))]))] "TARGET_80387 && X87_ENABLE_FLOAT (mode, mode) && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) && (TARGET_USE_MODE_FIOP || optimize_function_for_size_p (cfun))" "* return output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:X87MODEF 3 "mult_operator") (const_string "fmul") (match_operand:X87MODEF 3 "div_operator") (const_string "fdiv") ] (const_string "fop"))) (set_attr "fp_int_src" "true") (set_attr "mode" "")]) (define_insn "*fop_xf_4_i387" [(set (match_operand:XF 0 "register_operand" "=f,f") (match_operator:XF 3 "binary_fp_operator" [(float_extend:XF (match_operand:MODEF 1 "nonimmediate_operand" "fm,0")) (match_operand:XF 2 "register_operand" "0,f")]))] "TARGET_80387" "* return output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:XF 3 "mult_operator") (const_string "fmul") (match_operand:XF 3 "div_operator") (const_string "fdiv") ] (const_string "fop"))) (set_attr "mode" "")]) (define_insn "*fop_df_4_i387" [(set (match_operand:DF 0 "register_operand" "=f,f") (match_operator:DF 3 "binary_fp_operator" [(float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,0")) (match_operand:DF 2 "register_operand" "0,f")]))] "TARGET_80387 && X87_ENABLE_ARITH (DFmode) && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)" "* return output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:DF 3 "mult_operator") (const_string "fmul") (match_operand:DF 3 "div_operator") (const_string "fdiv") ] (const_string "fop"))) (set_attr "mode" "SF")]) (define_insn "*fop_xf_5_i387" [(set (match_operand:XF 0 "register_operand" "=f,f") (match_operator:XF 3 "binary_fp_operator" [(match_operand:XF 1 "register_operand" "0,f") (float_extend:XF (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))] "TARGET_80387" "* return output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:XF 3 "mult_operator") (const_string "fmul") (match_operand:XF 3 "div_operator") (const_string "fdiv") ] (const_string "fop"))) (set_attr "mode" "")]) (define_insn "*fop_df_5_i387" [(set (match_operand:DF 0 "register_operand" "=f,f") (match_operator:DF 3 "binary_fp_operator" [(match_operand:DF 1 "register_operand" "0,f") (float_extend:DF (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))] "TARGET_80387 && X87_ENABLE_ARITH (DFmode) && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)" "* return output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:DF 3 "mult_operator") (const_string "fmul") (match_operand:DF 3 "div_operator") (const_string "fdiv") ] (const_string "fop"))) (set_attr "mode" "SF")]) (define_insn "*fop_xf_6_i387" [(set (match_operand:XF 0 "register_operand" "=f,f") (match_operator:XF 3 "binary_fp_operator" [(float_extend:XF (match_operand:MODEF 1 "register_operand" "0,f")) (float_extend:XF (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))] "TARGET_80387" "* return output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:XF 3 "mult_operator") (const_string "fmul") (match_operand:XF 3 "div_operator") (const_string "fdiv") ] (const_string "fop"))) (set_attr "mode" "")]) (define_insn "*fop_df_6_i387" [(set (match_operand:DF 0 "register_operand" "=f,f") (match_operator:DF 3 "binary_fp_operator" [(float_extend:DF (match_operand:SF 1 "register_operand" "0,f")) (float_extend:DF (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))] "TARGET_80387 && X87_ENABLE_ARITH (DFmode) && !(SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)" "* return output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:DF 3 "mult_operator") (const_string "fmul") (match_operand:DF 3 "div_operator") (const_string "fdiv") ] (const_string "fop"))) (set_attr "mode" "SF")]) ;; FPU special functions. ;; This pattern implements a no-op XFmode truncation for ;; all fancy i386 XFmode math functions. (define_insn "truncxf2_i387_noop_unspec" [(set (match_operand:MODEF 0 "nonimmediate_operand" "=mf") (unspec:MODEF [(match_operand:XF 1 "register_operand" "f")] UNSPEC_TRUNC_NOOP))] "TARGET_USE_FANCY_MATH_387" "* return output_387_reg_move (insn, operands);" [(set_attr "type" "fmov") (set_attr "mode" "")]) (define_insn "sqrtxf2" [(set (match_operand:XF 0 "register_operand" "=f") (sqrt:XF (match_operand:XF 1 "register_operand" "0")))] "TARGET_USE_FANCY_MATH_387" "fsqrt" [(set_attr "type" "fpspc") (set_attr "mode" "XF") (set_attr "athlon_decode" "direct") (set_attr "amdfam10_decode" "direct") (set_attr "bdver1_decode" "direct")]) (define_insn "*rsqrtsf2_sse" [(set (match_operand:SF 0 "register_operand" "=x,x,x,x") (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "0,x,m,ja")] UNSPEC_RSQRT))] "TARGET_SSE && TARGET_SSE_MATH" "@ %vrsqrtss\t{%d1, %0|%0, %d1} %vrsqrtss\t{%d1, %0|%0, %d1} rsqrtss\t{%1, %d0|%d0, %1} vrsqrtss\t{%1, %d0|%d0, %1}" [(set_attr "isa" "*,*,noavx,avx") (set_attr "addr" "*,*,*,gpr16") (set_attr "type" "sse") (set_attr "atom_sse_attr" "rcp") (set_attr "btver2_sse_attr" "rcp") (set_attr "prefix" "maybe_vex") (set_attr "mode" "SF") (set_attr "avx_partial_xmm_update" "false,false,true,true") (set (attr "preferred_for_speed") (cond [(match_test "TARGET_AVX") (symbol_ref "true") (eq_attr "alternative" "1,2,3") (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY") ] (symbol_ref "true")))]) (define_expand "rsqrtsf2" [(set (match_operand:SF 0 "register_operand") (unspec:SF [(match_operand:SF 1 "nonimmediate_operand")] UNSPEC_RSQRT))] "TARGET_SSE && TARGET_SSE_MATH" { ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 1); DONE; }) (define_insn "rsqrthf2" [(set (match_operand:HF 0 "register_operand" "=v,v") (unspec:HF [(match_operand:HF 1 "nonimmediate_operand" "v,m")] UNSPEC_RSQRT))] "TARGET_AVX512FP16" "@ vrsqrtsh\t{%d1, %0|%0, %d1} vrsqrtsh\t{%1, %d0|%d0, %1}" [(set_attr "type" "sse") (set_attr "prefix" "evex") (set_attr "avx_partial_xmm_update" "false,true") (set_attr "mode" "HF")]) (define_insn "sqrthf2" [(set (match_operand:HF 0 "register_operand" "=v,v") (sqrt:HF (match_operand:HF 1 "nonimmediate_operand" "v,m")))] "TARGET_AVX512FP16" "@ vsqrtsh\t{%d1, %0|%0, %d1} vsqrtsh\t{%1, %d0|%d0, %1}" [(set_attr "type" "sse") (set_attr "prefix" "evex") (set_attr "avx_partial_xmm_update" "false,true") (set_attr "mode" "HF")]) (define_insn "*sqrt2_sse" [(set (match_operand:MODEF 0 "register_operand" "=v,v,v") (sqrt:MODEF (match_operand:MODEF 1 "nonimmediate_operand" "0,v,m")))] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" "@ %vsqrt\t{%d1, %0|%0, %d1} %vsqrt\t{%d1, %0|%0, %d1} %vsqrt\t{%1, %d0|%d0, %1}" [(set_attr "type" "sse") (set_attr "atom_sse_attr" "sqrt") (set_attr "btver2_sse_attr" "sqrt") (set_attr "prefix" "maybe_vex") (set_attr "avx_partial_xmm_update" "false,false,true") (set_attr "mode" "") (set (attr "preferred_for_speed") (cond [(match_test "TARGET_AVX") (symbol_ref "true") (eq_attr "alternative" "1,2") (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY") ] (symbol_ref "true")))]) (define_expand "sqrt2" [(set (match_operand:MODEF 0 "register_operand") (sqrt:MODEF (match_operand:MODEF 1 "nonimmediate_operand")))] "(TARGET_USE_FANCY_MATH_387 && X87_ENABLE_ARITH (mode)) || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" { if (mode == SFmode && TARGET_SSE && TARGET_SSE_MATH && TARGET_RECIP_SQRT && !optimize_function_for_size_p (cfun) && flag_finite_math_only && !flag_trapping_math && flag_unsafe_math_optimizations) { ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 0); DONE; } if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_sqrtxf2 (op0, op1)); emit_insn (gen_truncxf2_i387_noop_unspec (operands[0], op0)); DONE; } }) (define_expand "hypot3" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand")) (use (match_operand:MODEF 2 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_finite_math_only && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); rtx op2 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op2, operands[2])); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_mulxf3 (op1, op1, op1)); emit_insn (gen_mulxf3 (op2, op2, op2)); emit_insn (gen_addxf3 (op0, op2, op1)); emit_insn (gen_sqrtxf2 (op0, op0)); emit_insn (gen_truncxf2 (operands[0], op0)); DONE; }) (define_insn "x86_fnstsw_1" [(set (match_operand:HI 0 "register_operand" "=a") (unspec:HI [(reg:CCFP FPSR_REG)] UNSPEC_FNSTSW))] "TARGET_80387" "fnstsw\t%0" [(set_attr "length" "2") (set_attr "mode" "SI") (set_attr "unit" "i387")]) (define_insn "fpremxf4_i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 2 "register_operand" "0") (match_operand:XF 3 "register_operand" "1")] UNSPEC_FPREM_F)) (set (match_operand:XF 1 "register_operand" "=f") (unspec:XF [(match_dup 2) (match_dup 3)] UNSPEC_FPREM_U)) (set (reg:CCFP FPSR_REG) (unspec:CCFP [(match_dup 2) (match_dup 3)] UNSPEC_C2_FLAG))] "TARGET_USE_FANCY_MATH_387" "fprem" [(set_attr "type" "fpspc") (set_attr "znver1_decode" "vector") (set_attr "mode" "XF")]) (define_expand "fmodxf3" [(use (match_operand:XF 0 "register_operand")) (use (match_operand:XF 1 "general_operand")) (use (match_operand:XF 2 "general_operand"))] "TARGET_USE_FANCY_MATH_387" { rtx_code_label *label = gen_label_rtx (); rtx op1 = gen_reg_rtx (XFmode); rtx op2 = gen_reg_rtx (XFmode); emit_move_insn (op2, operands[2]); emit_move_insn (op1, operands[1]); emit_label (label); emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2)); ix86_emit_fp_unordered_jump (label); LABEL_NUSES (label) = 1; emit_move_insn (operands[0], op1); DONE; }) (define_expand "fmod3" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand")) (use (match_operand:MODEF 2 "general_operand"))] "TARGET_USE_FANCY_MATH_387" { rtx (*gen_truncxf) (rtx, rtx); rtx_code_label *label = gen_label_rtx (); rtx op1 = gen_reg_rtx (XFmode); rtx op2 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op2, operands[2])); emit_insn (gen_extendxf2 (op1, operands[1])); emit_label (label); emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2)); ix86_emit_fp_unordered_jump (label); LABEL_NUSES (label) = 1; /* Truncate the result properly for strict SSE math. */ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && !TARGET_MIX_SSE_I387) gen_truncxf = gen_truncxf2; else gen_truncxf = gen_truncxf2_i387_noop_unspec; emit_insn (gen_truncxf (operands[0], op1)); DONE; }) (define_insn "fprem1xf4_i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 2 "register_operand" "0") (match_operand:XF 3 "register_operand" "1")] UNSPEC_FPREM1_F)) (set (match_operand:XF 1 "register_operand" "=f") (unspec:XF [(match_dup 2) (match_dup 3)] UNSPEC_FPREM1_U)) (set (reg:CCFP FPSR_REG) (unspec:CCFP [(match_dup 2) (match_dup 3)] UNSPEC_C2_FLAG))] "TARGET_USE_FANCY_MATH_387" "fprem1" [(set_attr "type" "fpspc") (set_attr "znver1_decode" "vector") (set_attr "mode" "XF")]) (define_expand "remainderxf3" [(use (match_operand:XF 0 "register_operand")) (use (match_operand:XF 1 "general_operand")) (use (match_operand:XF 2 "general_operand"))] "TARGET_USE_FANCY_MATH_387" { rtx_code_label *label = gen_label_rtx (); rtx op1 = gen_reg_rtx (XFmode); rtx op2 = gen_reg_rtx (XFmode); emit_move_insn (op2, operands[2]); emit_move_insn (op1, operands[1]); emit_label (label); emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2)); ix86_emit_fp_unordered_jump (label); LABEL_NUSES (label) = 1; emit_move_insn (operands[0], op1); DONE; }) (define_expand "remainder3" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand")) (use (match_operand:MODEF 2 "general_operand"))] "TARGET_USE_FANCY_MATH_387" { rtx (*gen_truncxf) (rtx, rtx); rtx_code_label *label = gen_label_rtx (); rtx op1 = gen_reg_rtx (XFmode); rtx op2 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op2, operands[2])); emit_insn (gen_extendxf2 (op1, operands[1])); emit_label (label); emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2)); ix86_emit_fp_unordered_jump (label); LABEL_NUSES (label) = 1; /* Truncate the result properly for strict SSE math. */ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && !TARGET_MIX_SSE_I387) gen_truncxf = gen_truncxf2; else gen_truncxf = gen_truncxf2_i387_noop_unspec; emit_insn (gen_truncxf (operands[0], op1)); DONE; }) (define_int_iterator SINCOS [UNSPEC_SIN UNSPEC_COS]) (define_int_attr sincos [(UNSPEC_SIN "sin") (UNSPEC_COS "cos")]) (define_insn "xf2" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 1 "register_operand" "0")] SINCOS))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" "f" [(set_attr "type" "fpspc") (set_attr "znver1_decode" "vector") (set_attr "mode" "XF")]) (define_expand "2" [(set (match_operand:MODEF 0 "register_operand") (unspec:MODEF [(match_operand:MODEF 1 "general_operand")] SINCOS))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_xf2 (op0, op1)); emit_insn (gen_truncxf2 (operands[0], op0)); DONE; }) (define_insn "sincosxf3" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 2 "register_operand" "0")] UNSPEC_SINCOS_COS)) (set (match_operand:XF 1 "register_operand" "=f") (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" "fsincos" [(set_attr "type" "fpspc") (set_attr "znver1_decode" "vector") (set_attr "mode" "XF")]) (define_expand "sincos3" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "register_operand")) (use (match_operand:MODEF 2 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); rtx op2 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op2, operands[2])); emit_insn (gen_sincosxf3 (op0, op1, op2)); emit_insn (gen_truncxf2 (operands[0], op0)); emit_insn (gen_truncxf2 (operands[1], op1)); DONE; }) (define_insn "fptanxf4_i387" [(set (match_operand:SF 0 "register_operand" "=f") (match_operand:SF 3 "const1_operand")) (set (match_operand:XF 1 "register_operand" "=f") (unspec:XF [(match_operand:XF 2 "register_operand" "0")] UNSPEC_TAN))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" "fptan" [(set_attr "type" "fpspc") (set_attr "znver1_decode" "vector") (set_attr "mode" "XF")]) (define_expand "tanxf2" [(use (match_operand:XF 0 "register_operand")) (use (match_operand:XF 1 "register_operand"))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" { rtx one = gen_reg_rtx (SFmode); emit_insn (gen_fptanxf4_i387 (one, operands[0], operands[1], CONST1_RTX (SFmode))); DONE; }) (define_expand "tan2" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_tanxf2 (op0, op1)); emit_insn (gen_truncxf2 (operands[0], op0)); DONE; }) (define_insn "atan2xf3" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 2 "register_operand" "0") (match_operand:XF 1 "register_operand" "f")] UNSPEC_FPATAN)) (clobber (match_scratch:XF 3 "=1"))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" "fpatan" [(set_attr "type" "fpspc") (set_attr "znver1_decode" "vector") (set_attr "mode" "XF")]) (define_expand "atan23" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand")) (use (match_operand:MODEF 2 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); rtx op2 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op2, operands[2])); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_atan2xf3 (op0, op1, op2)); emit_insn (gen_truncxf2 (operands[0], op0)); DONE; }) (define_expand "atanxf2" [(parallel [(set (match_operand:XF 0 "register_operand") (unspec:XF [(match_dup 2) (match_operand:XF 1 "register_operand")] UNSPEC_FPATAN)) (clobber (scratch:XF))])] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" "operands[2] = force_reg (XFmode, CONST1_RTX (XFmode));") (define_expand "atan2" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_atanxf2 (op0, op1)); emit_insn (gen_truncxf2 (operands[0], op0)); DONE; }) (define_expand "asinxf2" [(set (match_dup 2) (mult:XF (match_operand:XF 1 "register_operand") (match_dup 1))) (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2))) (set (match_dup 5) (sqrt:XF (match_dup 4))) (parallel [(set (match_operand:XF 0 "register_operand") (unspec:XF [(match_dup 5) (match_dup 1)] UNSPEC_FPATAN)) (clobber (scratch:XF))])] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" { int i; for (i = 2; i < 6; i++) operands[i] = gen_reg_rtx (XFmode); emit_move_insn (operands[3], CONST1_RTX (XFmode)); }) (define_expand "asin2" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_asinxf2 (op0, op1)); emit_insn (gen_truncxf2 (operands[0], op0)); DONE; }) (define_expand "acosxf2" [(set (match_dup 2) (mult:XF (match_operand:XF 1 "register_operand") (match_dup 1))) (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2))) (set (match_dup 5) (sqrt:XF (match_dup 4))) (parallel [(set (match_operand:XF 0 "register_operand") (unspec:XF [(match_dup 1) (match_dup 5)] UNSPEC_FPATAN)) (clobber (scratch:XF))])] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" { int i; for (i = 2; i < 6; i++) operands[i] = gen_reg_rtx (XFmode); emit_move_insn (operands[3], CONST1_RTX (XFmode)); }) (define_expand "acos2" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_acosxf2 (op0, op1)); emit_insn (gen_truncxf2 (operands[0], op0)); DONE; }) (define_expand "sinhxf2" [(use (match_operand:XF 0 "register_operand")) (use (match_operand:XF 1 "register_operand"))] "TARGET_USE_FANCY_MATH_387 && flag_finite_math_only && flag_unsafe_math_optimizations" { ix86_emit_i387_sinh (operands[0], operands[1]); DONE; }) (define_expand "sinh2" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_finite_math_only && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_sinhxf2 (op0, op1)); emit_insn (gen_truncxf2 (operands[0], op0)); DONE; }) (define_expand "coshxf2" [(use (match_operand:XF 0 "register_operand")) (use (match_operand:XF 1 "register_operand"))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" { ix86_emit_i387_cosh (operands[0], operands[1]); DONE; }) (define_expand "cosh2" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_coshxf2 (op0, op1)); emit_insn (gen_truncxf2 (operands[0], op0)); DONE; }) (define_expand "tanhxf2" [(use (match_operand:XF 0 "register_operand")) (use (match_operand:XF 1 "register_operand"))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" { ix86_emit_i387_tanh (operands[0], operands[1]); DONE; }) (define_expand "tanh2" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_tanhxf2 (op0, op1)); emit_insn (gen_truncxf2 (operands[0], op0)); DONE; }) (define_expand "asinhxf2" [(use (match_operand:XF 0 "register_operand")) (use (match_operand:XF 1 "register_operand"))] "TARGET_USE_FANCY_MATH_387 && flag_finite_math_only && flag_unsafe_math_optimizations" { ix86_emit_i387_asinh (operands[0], operands[1]); DONE; }) (define_expand "asinh2" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_finite_math_only && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_asinhxf2 (op0, op1)); emit_insn (gen_truncxf2 (operands[0], op0)); DONE; }) (define_expand "acoshxf2" [(use (match_operand:XF 0 "register_operand")) (use (match_operand:XF 1 "register_operand"))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" { ix86_emit_i387_acosh (operands[0], operands[1]); DONE; }) (define_expand "acosh2" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_acoshxf2 (op0, op1)); emit_insn (gen_truncxf2 (operands[0], op0)); DONE; }) (define_expand "atanhxf2" [(use (match_operand:XF 0 "register_operand")) (use (match_operand:XF 1 "register_operand"))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" { ix86_emit_i387_atanh (operands[0], operands[1]); DONE; }) (define_expand "atanh2" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_atanhxf2 (op0, op1)); emit_insn (gen_truncxf2 (operands[0], op0)); DONE; }) (define_insn "fyl2xxf3_i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 1 "register_operand" "0") (match_operand:XF 2 "register_operand" "f")] UNSPEC_FYL2X)) (clobber (match_scratch:XF 3 "=2"))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" "fyl2x" [(set_attr "type" "fpspc") (set_attr "znver1_decode" "vector") (set_attr "mode" "XF")]) (define_expand "logxf2" [(parallel [(set (match_operand:XF 0 "register_operand") (unspec:XF [(match_operand:XF 1 "register_operand") (match_dup 2)] UNSPEC_FYL2X)) (clobber (scratch:XF))])] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" { operands[2] = force_reg (XFmode, standard_80387_constant_rtx (4)); /* fldln2 */ }) (define_expand "log2" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_logxf2 (op0, op1)); emit_insn (gen_truncxf2 (operands[0], op0)); DONE; }) (define_expand "log10xf2" [(parallel [(set (match_operand:XF 0 "register_operand") (unspec:XF [(match_operand:XF 1 "register_operand") (match_dup 2)] UNSPEC_FYL2X)) (clobber (scratch:XF))])] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" { operands[2] = force_reg (XFmode, standard_80387_constant_rtx (3)); /* fldlg2 */ }) (define_expand "log102" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_log10xf2 (op0, op1)); emit_insn (gen_truncxf2 (operands[0], op0)); DONE; }) (define_expand "log2xf2" [(parallel [(set (match_operand:XF 0 "register_operand") (unspec:XF [(match_operand:XF 1 "register_operand") (match_dup 2)] UNSPEC_FYL2X)) (clobber (scratch:XF))])] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" "operands[2] = force_reg (XFmode, CONST1_RTX (XFmode));") (define_expand "log22" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_log2xf2 (op0, op1)); emit_insn (gen_truncxf2 (operands[0], op0)); DONE; }) (define_insn "fyl2xp1xf3_i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 1 "register_operand" "0") (match_operand:XF 2 "register_operand" "f")] UNSPEC_FYL2XP1)) (clobber (match_scratch:XF 3 "=2"))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" "fyl2xp1" [(set_attr "type" "fpspc") (set_attr "znver1_decode" "vector") (set_attr "mode" "XF")]) (define_expand "log1pxf2" [(use (match_operand:XF 0 "register_operand")) (use (match_operand:XF 1 "register_operand"))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" { ix86_emit_i387_log1p (operands[0], operands[1]); DONE; }) (define_expand "log1p2" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_log1pxf2 (op0, op1)); emit_insn (gen_truncxf2 (operands[0], op0)); DONE; }) (define_insn "fxtractxf3_i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 2 "register_operand" "0")] UNSPEC_XTRACT_FRACT)) (set (match_operand:XF 1 "register_operand" "=f") (unspec:XF [(match_dup 2)] UNSPEC_XTRACT_EXP))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" "fxtract" [(set_attr "type" "fpspc") (set_attr "znver1_decode" "vector") (set_attr "mode" "XF")]) (define_expand "logbxf2" [(parallel [(set (match_dup 2) (unspec:XF [(match_operand:XF 1 "register_operand")] UNSPEC_XTRACT_FRACT)) (set (match_operand:XF 0 "register_operand") (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" "operands[2] = gen_reg_rtx (XFmode);") (define_expand "logb2" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_logbxf2 (op0, op1)); emit_insn (gen_truncxf2 (operands[0], op1)); DONE; }) (define_expand "ilogbxf2" [(use (match_operand:SI 0 "register_operand")) (use (match_operand:XF 1 "register_operand"))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" { rtx op0, op1; if (optimize_insn_for_size_p ()) FAIL; op0 = gen_reg_rtx (XFmode); op1 = gen_reg_rtx (XFmode); emit_insn (gen_fxtractxf3_i387 (op0, op1, operands[1])); emit_insn (gen_fix_truncxfsi2 (operands[0], op1)); DONE; }) (define_expand "ilogb2" [(use (match_operand:SI 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0, op1, op2; if (optimize_insn_for_size_p ()) FAIL; op0 = gen_reg_rtx (XFmode); op1 = gen_reg_rtx (XFmode); op2 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op2, operands[1])); emit_insn (gen_fxtractxf3_i387 (op0, op1, op2)); emit_insn (gen_fix_truncxfsi2 (operands[0], op1)); DONE; }) (define_insn "*f2xm1xf2_i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_F2XM1))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" "f2xm1" [(set_attr "type" "fpspc") (set_attr "znver1_decode" "vector") (set_attr "mode" "XF")]) (define_insn "fscalexf4_i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 2 "register_operand" "0") (match_operand:XF 3 "register_operand" "1")] UNSPEC_FSCALE_FRACT)) (set (match_operand:XF 1 "register_operand" "=f") (unspec:XF [(match_dup 2) (match_dup 3)] UNSPEC_FSCALE_EXP))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" "fscale" [(set_attr "type" "fpspc") (set_attr "znver1_decode" "vector") (set_attr "mode" "XF")]) (define_expand "expNcorexf3" [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand") (match_operand:XF 2 "register_operand"))) (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT)) (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4))) (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1)) (set (match_dup 8) (plus:XF (match_dup 6) (match_dup 7))) (parallel [(set (match_operand:XF 0 "register_operand") (unspec:XF [(match_dup 8) (match_dup 4)] UNSPEC_FSCALE_FRACT)) (set (match_dup 9) (unspec:XF [(match_dup 8) (match_dup 4)] UNSPEC_FSCALE_EXP))])] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" { int i; for (i = 3; i < 10; i++) operands[i] = gen_reg_rtx (XFmode); emit_move_insn (operands[7], CONST1_RTX (XFmode)); }) (define_expand "expxf2" [(use (match_operand:XF 0 "register_operand")) (use (match_operand:XF 1 "register_operand"))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" { rtx op2 = force_reg (XFmode, standard_80387_constant_rtx (5)); /* fldl2e */ emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2)); DONE; }) (define_expand "exp2" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_expxf2 (op0, op1)); emit_insn (gen_truncxf2 (operands[0], op0)); DONE; }) (define_expand "exp10xf2" [(use (match_operand:XF 0 "register_operand")) (use (match_operand:XF 1 "register_operand"))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" { rtx op2 = force_reg (XFmode, standard_80387_constant_rtx (6)); /* fldl2t */ emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2)); DONE; }) (define_expand "exp102" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_exp10xf2 (op0, op1)); emit_insn (gen_truncxf2 (operands[0], op0)); DONE; }) (define_expand "exp2xf2" [(use (match_operand:XF 0 "register_operand")) (use (match_operand:XF 1 "register_operand"))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" { rtx op2 = force_reg (XFmode, CONST1_RTX (XFmode)); emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2)); DONE; }) (define_expand "exp22" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_exp2xf2 (op0, op1)); emit_insn (gen_truncxf2 (operands[0], op0)); DONE; }) (define_expand "expm1xf2" [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand") (match_dup 2))) (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT)) (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4))) (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1)) (parallel [(set (match_dup 7) (unspec:XF [(match_dup 6) (match_dup 4)] UNSPEC_FSCALE_FRACT)) (set (match_dup 8) (unspec:XF [(match_dup 6) (match_dup 4)] UNSPEC_FSCALE_EXP))]) (parallel [(set (match_dup 10) (unspec:XF [(match_dup 9) (match_dup 8)] UNSPEC_FSCALE_FRACT)) (set (match_dup 11) (unspec:XF [(match_dup 9) (match_dup 8)] UNSPEC_FSCALE_EXP))]) (set (match_dup 12) (minus:XF (match_dup 10) (match_dup 9))) (set (match_operand:XF 0 "register_operand") (plus:XF (match_dup 12) (match_dup 7)))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" { int i; for (i = 2; i < 13; i++) operands[i] = gen_reg_rtx (XFmode); emit_move_insn (operands[2], standard_80387_constant_rtx (5)); /* fldl2e */ emit_move_insn (operands[9], CONST1_RTX (XFmode)); }) (define_expand "expm12" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_expm1xf2 (op0, op1)); emit_insn (gen_truncxf2 (operands[0], op0)); DONE; }) (define_insn "avx512f_scalef2" [(set (match_operand:MODEF 0 "register_operand" "=v") (unspec:MODEF [(match_operand:MODEF 1 "register_operand" "v") (match_operand:MODEF 2 "nonimmediate_operand" "vm")] UNSPEC_SCALEF))] "TARGET_AVX512F" "vscalef\t{%2, %1, %0|%0, %1, %2}" [(set_attr "prefix" "evex") (set_attr "mode" "")]) (define_expand "ldexpxf3" [(match_operand:XF 0 "register_operand") (match_operand:XF 1 "register_operand") (match_operand:SI 2 "register_operand")] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" { rtx tmp1 = gen_reg_rtx (XFmode); rtx tmp2 = gen_reg_rtx (XFmode); emit_insn (gen_floatsixf2 (tmp1, operands[2])); emit_insn (gen_fscalexf4_i387 (operands[0], tmp2, operands[1], tmp1)); DONE; }) (define_expand "ldexp3" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand")) (use (match_operand:SI 2 "register_operand"))] "((TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)) || (TARGET_AVX512F && TARGET_SSE_MATH)) && flag_unsafe_math_optimizations" { /* Prefer avx512f version. */ if (TARGET_AVX512F && TARGET_SSE_MATH) { rtx op2 = gen_reg_rtx (mode); operands[1] = force_reg (mode, operands[1]); emit_insn (gen_floatsi2 (op2, operands[2])); emit_insn (gen_avx512f_scalef2 (operands[0], operands[1], op2)); } else { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_ldexpxf3 (op0, op1, operands[2])); emit_insn (gen_truncxf2 (operands[0], op0)); } DONE; }) (define_expand "scalbxf3" [(parallel [(set (match_operand:XF 0 " register_operand") (unspec:XF [(match_operand:XF 1 "register_operand") (match_operand:XF 2 "register_operand")] UNSPEC_FSCALE_FRACT)) (set (match_dup 3) (unspec:XF [(match_dup 1) (match_dup 2)] UNSPEC_FSCALE_EXP))])] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" "operands[3] = gen_reg_rtx (XFmode);") (define_expand "scalb3" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand")) (use (match_operand:MODEF 2 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); rtx op2 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_extendxf2 (op2, operands[2])); emit_insn (gen_scalbxf3 (op0, op1, op2)); emit_insn (gen_truncxf2 (operands[0], op0)); DONE; }) (define_expand "significandxf2" [(parallel [(set (match_operand:XF 0 "register_operand") (unspec:XF [(match_operand:XF 1 "register_operand")] UNSPEC_XTRACT_FRACT)) (set (match_dup 2) (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" "operands[2] = gen_reg_rtx (XFmode);") (define_expand "significand2" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "general_operand"))] "TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations" { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_significandxf2 (op0, op1)); emit_insn (gen_truncxf2 (operands[0], op0)); DONE; }) (define_insn "sse4_1_round2" [(set (match_operand:MODEFH 0 "register_operand" "=x,x,x,v,v") (unspec:MODEFH [(match_operand:MODEFH 1 "nonimmediate_operand" "0,x,jm,v,m") (match_operand:SI 2 "const_0_to_15_operand")] UNSPEC_ROUND))] "TARGET_SSE4_1" "@ %vround\t{%2, %d1, %0|%0, %d1, %2} %vround\t{%2, %d1, %0|%0, %d1, %2} %vround\t{%2, %1, %d0|%d0, %1, %2} vrndscale\t{%2, %d1, %0|%0, %d1, %2} vrndscale\t{%2, %1, %d0|%d0, %1, %2}" [(set_attr "type" "ssecvt") (set_attr "prefix_extra" "1,1,1,*,*") (set_attr "length_immediate" "1") (set_attr "addr" "*,*,gpr16,*,*") (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,evex,evex") (set_attr "isa" "noavx512f,noavx512f,noavx512f,avx512f,avx512f") (set_attr "avx_partial_xmm_update" "false,false,true,false,true") (set_attr "mode" "") (set (attr "preferred_for_speed") (cond [(match_test "TARGET_AVX") (symbol_ref "true") (eq_attr "alternative" "1,2") (symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY") ] (symbol_ref "true")))]) (define_insn "rintxf2" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_FRNDINT))] "TARGET_USE_FANCY_MATH_387" "frndint" [(set_attr "type" "fpspc") (set_attr "znver1_decode" "vector") (set_attr "mode" "XF")]) (define_expand "rinthf2" [(match_operand:HF 0 "register_operand") (match_operand:HF 1 "nonimmediate_operand")] "TARGET_AVX512FP16" { emit_insn (gen_sse4_1_roundhf2 (operands[0], operands[1], GEN_INT (ROUND_MXCSR))); DONE; }) (define_expand "rint2" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "nonimmediate_operand"))] "TARGET_USE_FANCY_MATH_387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" { if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) { if (TARGET_SSE4_1) emit_insn (gen_sse4_1_round2 (operands[0], operands[1], GEN_INT (ROUND_MXCSR))); else ix86_expand_rint (operands[0], operands[1]); } else { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_rintxf2 (op0, op1)); emit_insn (gen_truncxf2_i387_noop_unspec (operands[0], op0)); } DONE; }) (define_expand "nearbyintxf2" [(set (match_operand:XF 0 "register_operand") (unspec:XF [(match_operand:XF 1 "register_operand")] UNSPEC_FRNDINT))] "TARGET_USE_FANCY_MATH_387 && !flag_trapping_math") (define_expand "nearbyinthf2" [(match_operand:HF 0 "register_operand") (match_operand:HF 1 "nonimmediate_operand")] "TARGET_AVX512FP16" { emit_insn (gen_sse4_1_roundhf2 (operands[0], operands[1], GEN_INT (ROUND_MXCSR | ROUND_NO_EXC))); DONE; }) (define_expand "nearbyint2" [(use (match_operand:MODEF 0 "register_operand")) (use (match_operand:MODEF 1 "nonimmediate_operand"))] "(TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && !flag_trapping_math) || (TARGET_SSE4_1 && TARGET_SSE_MATH)" { if (TARGET_SSE4_1 && TARGET_SSE_MATH) emit_insn (gen_sse4_1_round2 (operands[0], operands[1], GEN_INT (ROUND_MXCSR | ROUND_NO_EXC))); else { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_nearbyintxf2 (op0, op1)); emit_insn (gen_truncxf2_i387_noop_unspec (operands[0], op0)); } DONE; }) (define_expand "roundhf2" [(match_operand:HF 0 "register_operand") (match_operand:HF 1 "register_operand")] "TARGET_AVX512FP16 && !flag_trapping_math && !flag_rounding_math" { ix86_expand_round_sse4 (operands[0], operands[1]); DONE; }) (define_expand "round2" [(match_operand:X87MODEF 0 "register_operand") (match_operand:X87MODEF 1 "nonimmediate_operand")] "(TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations && (flag_fp_int_builtin_inexact || !flag_trapping_math)) || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && !flag_trapping_math && !flag_rounding_math)" { if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && !flag_trapping_math && !flag_rounding_math) { if (TARGET_SSE4_1) { operands[1] = force_reg (mode, operands[1]); ix86_expand_round_sse4 (operands[0], operands[1]); } else if (TARGET_64BIT || (mode != DFmode)) ix86_expand_round (operands[0], operands[1]); else ix86_expand_rounddf_32 (operands[0], operands[1]); } else { operands[1] = force_reg (mode, operands[1]); ix86_emit_i387_round (operands[0], operands[1]); } DONE; }) (define_insn "lrintxfdi2" [(set (match_operand:DI 0 "nonimmediate_operand" "=m") (unspec:DI [(match_operand:XF 1 "register_operand" "f")] UNSPEC_FIST)) (clobber (match_scratch:XF 2 "=&f"))] "TARGET_USE_FANCY_MATH_387" "* return output_fix_trunc (insn, operands, false);" [(set_attr "type" "fpspc") (set_attr "mode" "DI")]) (define_insn "lrintxf2" [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m") (unspec:SWI24 [(match_operand:XF 1 "register_operand" "f")] UNSPEC_FIST))] "TARGET_USE_FANCY_MATH_387" "* return output_fix_trunc (insn, operands, false);" [(set_attr "type" "fpspc") (set_attr "mode" "")]) (define_expand "lroundhf2" [(set (match_operand:SWI248 0 "register_operand") (unspec:SWI248 [(match_operand:HF 1 "nonimmediate_operand")] UNSPEC_FIX_NOTRUNC))] "TARGET_AVX512FP16 && !flag_trapping_math && !flag_rounding_math" { ix86_expand_lround (operands[0], operands[1]); DONE; }) (define_expand "lrinthf2" [(set (match_operand:SWI48 0 "register_operand") (unspec:SWI48 [(match_operand:HF 1 "nonimmediate_operand")] UNSPEC_FIX_NOTRUNC))] "TARGET_AVX512FP16") (define_expand "lrint2" [(set (match_operand:SWI48 0 "register_operand") (unspec:SWI48 [(match_operand:MODEF 1 "nonimmediate_operand")] UNSPEC_FIX_NOTRUNC))] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH") (define_expand "lround2" [(match_operand:SWI248x 0 "nonimmediate_operand") (match_operand:X87MODEF 1 "register_operand")] "(TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations) || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && mode != HImode && ((mode != DImode) || TARGET_64BIT) && !flag_trapping_math && !flag_rounding_math)" { if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && mode != HImode && ((mode != DImode) || TARGET_64BIT) && !flag_trapping_math && !flag_rounding_math) ix86_expand_lround (operands[0], operands[1]); else ix86_emit_i387_round (operands[0], operands[1]); DONE; }) (define_int_iterator FRNDINT_ROUNDING [UNSPEC_FRNDINT_ROUNDEVEN UNSPEC_FRNDINT_FLOOR UNSPEC_FRNDINT_CEIL UNSPEC_FRNDINT_TRUNC]) (define_int_iterator FIST_ROUNDING [UNSPEC_FIST_FLOOR UNSPEC_FIST_CEIL]) ;; Base name for define_insn (define_int_attr rounding_insn [(UNSPEC_FRNDINT_ROUNDEVEN "roundeven") (UNSPEC_FRNDINT_FLOOR "floor") (UNSPEC_FRNDINT_CEIL "ceil") (UNSPEC_FRNDINT_TRUNC "btrunc") (UNSPEC_FIST_FLOOR "floor") (UNSPEC_FIST_CEIL "ceil")]) (define_int_attr rounding [(UNSPEC_FRNDINT_ROUNDEVEN "roundeven") (UNSPEC_FRNDINT_FLOOR "floor") (UNSPEC_FRNDINT_CEIL "ceil") (UNSPEC_FRNDINT_TRUNC "trunc") (UNSPEC_FIST_FLOOR "floor") (UNSPEC_FIST_CEIL "ceil")]) (define_int_attr ROUNDING [(UNSPEC_FRNDINT_ROUNDEVEN "ROUNDEVEN") (UNSPEC_FRNDINT_FLOOR "FLOOR") (UNSPEC_FRNDINT_CEIL "CEIL") (UNSPEC_FRNDINT_TRUNC "TRUNC") (UNSPEC_FIST_FLOOR "FLOOR") (UNSPEC_FIST_CEIL "CEIL")]) ;; Rounding mode control word calculation could clobber FLAGS_REG. (define_insn_and_split "frndintxf2_" [(set (match_operand:XF 0 "register_operand") (unspec:XF [(match_operand:XF 1 "register_operand")] FRNDINT_ROUNDING)) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_FANCY_MATH_387 && (flag_fp_int_builtin_inexact || !flag_trapping_math) && ix86_pre_reload_split ()" "#" "&& 1" [(const_int 0)] { ix86_optimize_mode_switching[I387_] = 1; operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); operands[3] = assign_386_stack_local (HImode, SLOT_CW_); emit_insn (gen_frndintxf2__i387 (operands[0], operands[1], operands[2], operands[3])); DONE; } [(set_attr "type" "frndint") (set_attr "i387_cw" "") (set_attr "mode" "XF")]) (define_insn "frndintxf2__i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 1 "register_operand" "0")] FRNDINT_ROUNDING)) (use (match_operand:HI 2 "memory_operand" "m")) (use (match_operand:HI 3 "memory_operand" "m"))] "TARGET_USE_FANCY_MATH_387 && (flag_fp_int_builtin_inexact || !flag_trapping_math)" "fldcw\t%3\n\tfrndint\n\tfldcw\t%2" [(set_attr "type" "frndint") (set_attr "i387_cw" "") (set_attr "mode" "XF")]) (define_expand "xf2" [(parallel [(set (match_operand:XF 0 "register_operand") (unspec:XF [(match_operand:XF 1 "register_operand")] FRNDINT_ROUNDING)) (clobber (reg:CC FLAGS_REG))])] "TARGET_USE_FANCY_MATH_387 && (flag_fp_int_builtin_inexact || !flag_trapping_math)") (define_expand "hf2" [(parallel [(set (match_operand:HF 0 "register_operand") (unspec:HF [(match_operand:HF 1 "register_operand")] FRNDINT_ROUNDING)) (clobber (reg:CC FLAGS_REG))])] "TARGET_AVX512FP16" { emit_insn (gen_sse4_1_roundhf2 (operands[0], operands[1], GEN_INT (ROUND_ | ROUND_NO_EXC))); DONE; }) (define_expand "2" [(parallel [(set (match_operand:MODEF 0 "register_operand") (unspec:MODEF [(match_operand:MODEF 1 "register_operand")] FRNDINT_ROUNDING)) (clobber (reg:CC FLAGS_REG))])] "(TARGET_USE_FANCY_MATH_387 && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) && (flag_fp_int_builtin_inexact || !flag_trapping_math)) || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && (TARGET_SSE4_1 || (ROUND_ != ROUND_ROUNDEVEN && (flag_fp_int_builtin_inexact || !flag_trapping_math))))" { if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && (TARGET_SSE4_1 || (ROUND_ != ROUND_ROUNDEVEN && (flag_fp_int_builtin_inexact || !flag_trapping_math)))) { if (TARGET_SSE4_1) emit_insn (gen_sse4_1_round2 (operands[0], operands[1], GEN_INT (ROUND_ | ROUND_NO_EXC))); else if (TARGET_64BIT || (mode != DFmode)) { if (ROUND_ == ROUND_FLOOR) ix86_expand_floorceil (operands[0], operands[1], true); else if (ROUND_ == ROUND_CEIL) ix86_expand_floorceil (operands[0], operands[1], false); else if (ROUND_ == ROUND_TRUNC) ix86_expand_trunc (operands[0], operands[1]); else gcc_unreachable (); } else { if (ROUND_ == ROUND_FLOOR) ix86_expand_floorceildf_32 (operands[0], operands[1], true); else if (ROUND_ == ROUND_CEIL) ix86_expand_floorceildf_32 (operands[0], operands[1], false); else if (ROUND_ == ROUND_TRUNC) ix86_expand_truncdf_32 (operands[0], operands[1]); else gcc_unreachable (); } } else { rtx op0 = gen_reg_rtx (XFmode); rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_extendxf2 (op1, operands[1])); emit_insn (gen_frndintxf2_ (op0, op1)); emit_insn (gen_truncxf2_i387_noop_unspec (operands[0], op0)); } DONE; }) ;; Rounding mode control word calculation could clobber FLAGS_REG. (define_insn_and_split "*fist2__1" [(set (match_operand:SWI248x 0 "nonimmediate_operand") (unspec:SWI248x [(match_operand:XF 1 "register_operand")] FIST_ROUNDING)) (clobber (reg:CC FLAGS_REG))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations && ix86_pre_reload_split ()" "#" "&& 1" [(const_int 0)] { ix86_optimize_mode_switching[I387_] = 1; operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); operands[3] = assign_386_stack_local (HImode, SLOT_CW_); emit_insn (gen_fist2_ (operands[0], operands[1], operands[2], operands[3])); DONE; } [(set_attr "type" "fistp") (set_attr "i387_cw" "") (set_attr "mode" "")]) (define_insn "fistdi2_" [(set (match_operand:DI 0 "nonimmediate_operand" "=m") (unspec:DI [(match_operand:XF 1 "register_operand" "f")] FIST_ROUNDING)) (use (match_operand:HI 2 "memory_operand" "m")) (use (match_operand:HI 3 "memory_operand" "m")) (clobber (match_scratch:XF 4 "=&f"))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" "* return output_fix_trunc (insn, operands, false);" [(set_attr "type" "fistp") (set_attr "i387_cw" "") (set_attr "mode" "DI")]) (define_insn "fist2_" [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m") (unspec:SWI24 [(match_operand:XF 1 "register_operand" "f")] FIST_ROUNDING)) (use (match_operand:HI 2 "memory_operand" "m")) (use (match_operand:HI 3 "memory_operand" "m"))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" "* return output_fix_trunc (insn, operands, false);" [(set_attr "type" "fistp") (set_attr "i387_cw" "") (set_attr "mode" "")]) (define_expand "lxf2" [(parallel [(set (match_operand:SWI248x 0 "nonimmediate_operand") (unspec:SWI248x [(match_operand:XF 1 "register_operand")] FIST_ROUNDING)) (clobber (reg:CC FLAGS_REG))])] "TARGET_USE_FANCY_MATH_387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) && flag_unsafe_math_optimizations") (define_expand "lhf2" [(set (match_operand:SWI48 0 "nonimmediate_operand") (unspec:SWI48 [(match_operand:HF 1 "register_operand")] FIST_ROUNDING))] "TARGET_AVX512FP16" { rtx tmp = gen_reg_rtx (HFmode); emit_insn (gen_sse4_1_roundhf2 (tmp, operands[1], GEN_INT (ROUND_ | ROUND_NO_EXC))); emit_insn (gen_fix_trunchf2 (operands[0], tmp)); DONE; }) (define_expand "l2" [(parallel [(set (match_operand:SWI48 0 "nonimmediate_operand") (unspec:SWI48 [(match_operand:MODEF 1 "register_operand")] FIST_ROUNDING)) (clobber (reg:CC FLAGS_REG))])] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && (TARGET_SSE4_1 || !flag_trapping_math)" { if (TARGET_SSE4_1) { rtx tmp = gen_reg_rtx (mode); emit_insn (gen_sse4_1_round2 (tmp, operands[1], GEN_INT (ROUND_ | ROUND_NO_EXC))); emit_insn (gen_fix_trunc2 (operands[0], tmp)); } else if (ROUND_ == ROUND_FLOOR) ix86_expand_lfloorceil (operands[0], operands[1], true); else if (ROUND_ == ROUND_CEIL) ix86_expand_lfloorceil (operands[0], operands[1], false); else gcc_unreachable (); DONE; }) (define_insn "fxam2_i387" [(set (match_operand:HI 0 "register_operand" "=a") (unspec:HI [(match_operand:X87MODEF 1 "register_operand" "f")] UNSPEC_FXAM))] "TARGET_USE_FANCY_MATH_387" "fxam\n\tfnstsw\t%0" [(set_attr "type" "multi") (set_attr "length" "4") (set_attr "unit" "i387") (set_attr "mode" "")]) (define_expand "signbittf2" [(use (match_operand:SI 0 "register_operand")) (use (match_operand:TF 1 "register_operand"))] "TARGET_SSE" { if (TARGET_SSE4_1) { rtx mask = ix86_build_signbit_mask (TFmode, 0, 0); rtx scratch = gen_reg_rtx (QImode); emit_insn (gen_ptesttf2 (operands[1], mask)); ix86_expand_setcc (scratch, NE, gen_rtx_REG (CCZmode, FLAGS_REG), const0_rtx); emit_insn (gen_zero_extendqisi2 (operands[0], scratch)); } else { emit_insn (gen_sse_movmskps (operands[0], gen_lowpart (V4SFmode, operands[1]))); emit_insn (gen_andsi3 (operands[0], operands[0], GEN_INT (0x8))); } DONE; }) (define_expand "signbitxf2" [(use (match_operand:SI 0 "register_operand")) (use (match_operand:XF 1 "register_operand"))] "TARGET_USE_FANCY_MATH_387" { rtx scratch = gen_reg_rtx (HImode); emit_insn (gen_fxamxf2_i387 (scratch, operands[1])); emit_insn (gen_andsi3 (operands[0], gen_lowpart (SImode, scratch), GEN_INT (0x200))); DONE; }) (define_insn "movmsk_df" [(set (match_operand:SI 0 "register_operand" "=r,jr") (unspec:SI [(match_operand:DF 1 "register_operand" "x,x")] UNSPEC_MOVMSK))] "SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH" "%vmovmskpd\t{%1, %0|%0, %1}" [(set_attr "isa" "noavx,avx") (set_attr "type" "ssemov") (set_attr "prefix" "maybe_evex") (set_attr "mode" "DF")]) ;; Use movmskpd in SSE mode to avoid store forwarding stall ;; for 32bit targets and movq+shrq sequence for 64bit targets. (define_expand "signbitdf2" [(use (match_operand:SI 0 "register_operand")) (use (match_operand:DF 1 "register_operand"))] "TARGET_USE_FANCY_MATH_387 || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)" { if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH) { emit_insn (gen_movmsk_df (operands[0], operands[1])); emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx)); } else { rtx scratch = gen_reg_rtx (HImode); emit_insn (gen_fxamdf2_i387 (scratch, operands[1])); emit_insn (gen_andsi3 (operands[0], gen_lowpart (SImode, scratch), GEN_INT (0x200))); } DONE; }) (define_expand "signbitsf2" [(use (match_operand:SI 0 "register_operand")) (use (match_operand:SF 1 "register_operand"))] "TARGET_USE_FANCY_MATH_387 && !(SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)" { rtx scratch = gen_reg_rtx (HImode); emit_insn (gen_fxamsf2_i387 (scratch, operands[1])); emit_insn (gen_andsi3 (operands[0], gen_lowpart (SImode, scratch), GEN_INT (0x200))); DONE; }) ;; Block operation instructions (define_insn "cld" [(unspec_volatile [(const_int 0)] UNSPECV_CLD)] "" "cld" [(set_attr "length" "1") (set_attr "length_immediate" "0") (set_attr "modrm" "0")]) (define_expand "cpymem" [(use (match_operand:BLK 0 "memory_operand")) (use (match_operand:BLK 1 "memory_operand")) (use (match_operand:SWI48 2 "nonmemory_operand")) (use (match_operand:SWI48 3 "const_int_operand")) (use (match_operand:SI 4 "const_int_operand")) (use (match_operand:SI 5 "const_int_operand")) (use (match_operand:SI 6 "")) (use (match_operand:SI 7 "")) (use (match_operand:SI 8 ""))] "" { if (ix86_expand_set_or_cpymem (operands[0], operands[1], operands[2], NULL, operands[3], operands[4], operands[5], operands[6], operands[7], operands[8], false)) DONE; else FAIL; }) ;; Most CPUs don't like single string operations ;; Handle this case here to simplify previous expander. (define_expand "strmov" [(set (match_dup 4) (match_operand 3 "memory_operand")) (set (match_operand 1 "memory_operand") (match_dup 4)) (parallel [(set (match_operand 0 "register_operand") (match_dup 5)) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_operand 2 "register_operand") (match_dup 6)) (clobber (reg:CC FLAGS_REG))])] "" { /* Can't use this for non-default address spaces. */ if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[3]))) FAIL; int piece_size = GET_MODE_SIZE (GET_MODE (operands[1])); /* If .md ever supports :P for Pmode, these can be directly in the pattern above. */ operands[5] = plus_constant (Pmode, operands[0], piece_size); operands[6] = plus_constant (Pmode, operands[2], piece_size); /* Can't use this if the user has appropriated esi or edi. */ if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ()) && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])) { emit_insn (gen_strmov_singleop (operands[0], operands[1], operands[2], operands[3], operands[5], operands[6])); DONE; } operands[4] = gen_reg_rtx (GET_MODE (operands[1])); }) (define_expand "strmov_singleop" [(parallel [(set (match_operand 1 "memory_operand") (match_operand 3 "memory_operand")) (set (match_operand 0 "register_operand") (match_operand 4)) (set (match_operand 2 "register_operand") (match_operand 5))])] "" { if (TARGET_CLD) ix86_optimize_mode_switching[X86_DIRFLAG] = 1; }) (define_insn "*strmovdi_rex_1" [(set (mem:DI (match_operand:P 2 "register_operand" "0")) (mem:DI (match_operand:P 3 "register_operand" "1"))) (set (match_operand:P 0 "register_operand" "=D") (plus:P (match_dup 2) (const_int 8))) (set (match_operand:P 1 "register_operand" "=S") (plus:P (match_dup 3) (const_int 8)))] "TARGET_64BIT && !(fixed_regs[SI_REG] || fixed_regs[DI_REG]) && ix86_check_no_addr_space (insn)" "%^movsq" [(set_attr "type" "str") (set_attr "memory" "both") (set_attr "mode" "DI")]) (define_insn "*strmovsi_1" [(set (mem:SI (match_operand:P 2 "register_operand" "0")) (mem:SI (match_operand:P 3 "register_operand" "1"))) (set (match_operand:P 0 "register_operand" "=D") (plus:P (match_dup 2) (const_int 4))) (set (match_operand:P 1 "register_operand" "=S") (plus:P (match_dup 3) (const_int 4)))] "!(fixed_regs[SI_REG] || fixed_regs[DI_REG]) && ix86_check_no_addr_space (insn)" "%^movs{l|d}" [(set_attr "type" "str") (set_attr "memory" "both") (set_attr "mode" "SI")]) (define_insn "*strmovhi_1" [(set (mem:HI (match_operand:P 2 "register_operand" "0")) (mem:HI (match_operand:P 3 "register_operand" "1"))) (set (match_operand:P 0 "register_operand" "=D") (plus:P (match_dup 2) (const_int 2))) (set (match_operand:P 1 "register_operand" "=S") (plus:P (match_dup 3) (const_int 2)))] "!(fixed_regs[SI_REG] || fixed_regs[DI_REG]) && ix86_check_no_addr_space (insn)" "%^movsw" [(set_attr "type" "str") (set_attr "memory" "both") (set_attr "mode" "HI")]) (define_insn "*strmovqi_1" [(set (mem:QI (match_operand:P 2 "register_operand" "0")) (mem:QI (match_operand:P 3 "register_operand" "1"))) (set (match_operand:P 0 "register_operand" "=D") (plus:P (match_dup 2) (const_int 1))) (set (match_operand:P 1 "register_operand" "=S") (plus:P (match_dup 3) (const_int 1)))] "!(fixed_regs[SI_REG] || fixed_regs[DI_REG]) && ix86_check_no_addr_space (insn)" "%^movsb" [(set_attr "type" "str") (set_attr "memory" "both") (set (attr "prefix_rex") (if_then_else (match_test "mode == DImode") (const_string "0") (const_string "*"))) (set_attr "mode" "QI")]) (define_expand "rep_mov" [(parallel [(set (match_operand 4 "register_operand") (const_int 0)) (set (match_operand 0 "register_operand") (match_operand 5)) (set (match_operand 2 "register_operand") (match_operand 6)) (set (match_operand 1 "memory_operand") (match_operand 3 "memory_operand")) (use (match_dup 4))])] "" { if (TARGET_CLD) ix86_optimize_mode_switching[X86_DIRFLAG] = 1; }) (define_insn "*rep_movdi_rex64" [(set (match_operand:P 2 "register_operand" "=c") (const_int 0)) (set (match_operand:P 0 "register_operand" "=D") (plus:P (ashift:P (match_operand:P 5 "register_operand" "2") (const_int 3)) (match_operand:P 3 "register_operand" "0"))) (set (match_operand:P 1 "register_operand" "=S") (plus:P (ashift:P (match_dup 5) (const_int 3)) (match_operand:P 4 "register_operand" "1"))) (set (mem:BLK (match_dup 3)) (mem:BLK (match_dup 4))) (use (match_dup 5))] "TARGET_64BIT && !(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG]) && ix86_check_no_addr_space (insn)" "%^rep{%;} movsq" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") (set_attr "mode" "DI")]) (define_insn "*rep_movsi" [(set (match_operand:P 2 "register_operand" "=c") (const_int 0)) (set (match_operand:P 0 "register_operand" "=D") (plus:P (ashift:P (match_operand:P 5 "register_operand" "2") (const_int 2)) (match_operand:P 3 "register_operand" "0"))) (set (match_operand:P 1 "register_operand" "=S") (plus:P (ashift:P (match_dup 5) (const_int 2)) (match_operand:P 4 "register_operand" "1"))) (set (mem:BLK (match_dup 3)) (mem:BLK (match_dup 4))) (use (match_dup 5))] "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG]) && ix86_check_no_addr_space (insn)" "%^rep{%;} movs{l|d}" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") (set_attr "mode" "SI")]) (define_insn "*rep_movqi" [(set (match_operand:P 2 "register_operand" "=c") (const_int 0)) (set (match_operand:P 0 "register_operand" "=D") (plus:P (match_operand:P 3 "register_operand" "0") (match_operand:P 5 "register_operand" "2"))) (set (match_operand:P 1 "register_operand" "=S") (plus:P (match_operand:P 4 "register_operand" "1") (match_dup 5))) (set (mem:BLK (match_dup 3)) (mem:BLK (match_dup 4))) (use (match_dup 5))] "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG]) && ix86_check_no_addr_space (insn)" "%^rep{%;} movsb" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "both") (set_attr "mode" "QI")]) (define_expand "setmem" [(use (match_operand:BLK 0 "memory_operand")) (use (match_operand:SWI48 1 "nonmemory_operand")) (use (match_operand:QI 2 "nonmemory_operand")) (use (match_operand 3 "const_int_operand")) (use (match_operand:SI 4 "const_int_operand")) (use (match_operand:SI 5 "const_int_operand")) (use (match_operand:SI 6 "")) (use (match_operand:SI 7 "")) (use (match_operand:SI 8 ""))] "" { if (ix86_expand_set_or_cpymem (operands[0], NULL, operands[1], operands[2], operands[3], operands[4], operands[5], operands[6], operands[7], operands[8], true)) DONE; else FAIL; }) ;; Most CPUs don't like single string operations ;; Handle this case here to simplify previous expander. (define_expand "strset" [(set (match_operand 1 "memory_operand") (match_operand 2 "register_operand")) (parallel [(set (match_operand 0 "register_operand") (match_dup 3)) (clobber (reg:CC FLAGS_REG))])] "" { /* Can't use this for non-default address spaces. */ if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[1]))) FAIL; if (GET_MODE (operands[1]) != GET_MODE (operands[2])) operands[1] = adjust_address_nv (operands[1], GET_MODE (operands[2]), 0); /* If .md ever supports :P for Pmode, this can be directly in the pattern above. */ operands[3] = plus_constant (Pmode, operands[0], GET_MODE_SIZE (GET_MODE (operands[2]))); /* Can't use this if the user has appropriated eax or edi. */ if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ()) && !(fixed_regs[AX_REG] || fixed_regs[DI_REG])) { emit_insn (gen_strset_singleop (operands[0], operands[1], operands[2], operands[3])); DONE; } }) (define_expand "strset_singleop" [(parallel [(set (match_operand 1 "memory_operand") (match_operand 2 "register_operand")) (set (match_operand 0 "register_operand") (match_operand 3)) (unspec [(const_int 0)] UNSPEC_STOS)])] "" { if (TARGET_CLD) ix86_optimize_mode_switching[X86_DIRFLAG] = 1; }) (define_insn "*strsetdi_rex_1" [(set (mem:DI (match_operand:P 1 "register_operand" "0")) (match_operand:DI 2 "register_operand" "a")) (set (match_operand:P 0 "register_operand" "=D") (plus:P (match_dup 1) (const_int 8))) (unspec [(const_int 0)] UNSPEC_STOS)] "TARGET_64BIT && !(fixed_regs[AX_REG] || fixed_regs[DI_REG]) && ix86_check_no_addr_space (insn)" "%^stosq" [(set_attr "type" "str") (set_attr "memory" "store") (set_attr "mode" "DI")]) (define_insn "*strsetsi_1" [(set (mem:SI (match_operand:P 1 "register_operand" "0")) (match_operand:SI 2 "register_operand" "a")) (set (match_operand:P 0 "register_operand" "=D") (plus:P (match_dup 1) (const_int 4))) (unspec [(const_int 0)] UNSPEC_STOS)] "!(fixed_regs[AX_REG] || fixed_regs[DI_REG]) && ix86_check_no_addr_space (insn)" "%^stos{l|d}" [(set_attr "type" "str") (set_attr "memory" "store") (set_attr "mode" "SI")]) (define_insn "*strsethi_1" [(set (mem:HI (match_operand:P 1 "register_operand" "0")) (match_operand:HI 2 "register_operand" "a")) (set (match_operand:P 0 "register_operand" "=D") (plus:P (match_dup 1) (const_int 2))) (unspec [(const_int 0)] UNSPEC_STOS)] "!(fixed_regs[AX_REG] || fixed_regs[DI_REG]) && ix86_check_no_addr_space (insn)" "%^stosw" [(set_attr "type" "str") (set_attr "memory" "store") (set_attr "mode" "HI")]) (define_insn "*strsetqi_1" [(set (mem:QI (match_operand:P 1 "register_operand" "0")) (match_operand:QI 2 "register_operand" "a")) (set (match_operand:P 0 "register_operand" "=D") (plus:P (match_dup 1) (const_int 1))) (unspec [(const_int 0)] UNSPEC_STOS)] "!(fixed_regs[AX_REG] || fixed_regs[DI_REG]) && ix86_check_no_addr_space (insn)" "%^stosb" [(set_attr "type" "str") (set_attr "memory" "store") (set (attr "prefix_rex") (if_then_else (match_test "mode == DImode") (const_string "0") (const_string "*"))) (set_attr "mode" "QI")]) (define_expand "rep_stos" [(parallel [(set (match_operand 1 "register_operand") (const_int 0)) (set (match_operand 0 "register_operand") (match_operand 4)) (set (match_operand 2 "memory_operand") (const_int 0)) (use (match_operand 3 "register_operand")) (use (match_dup 1))])] "" { if (TARGET_CLD) ix86_optimize_mode_switching[X86_DIRFLAG] = 1; }) (define_insn "*rep_stosdi_rex64" [(set (match_operand:P 1 "register_operand" "=c") (const_int 0)) (set (match_operand:P 0 "register_operand" "=D") (plus:P (ashift:P (match_operand:P 4 "register_operand" "1") (const_int 3)) (match_operand:P 3 "register_operand" "0"))) (set (mem:BLK (match_dup 3)) (const_int 0)) (use (match_operand:DI 2 "register_operand" "a")) (use (match_dup 4))] "TARGET_64BIT && !(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG]) && ix86_check_no_addr_space (insn)" "%^rep{%;} stosq" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "store") (set_attr "mode" "DI")]) (define_insn "*rep_stossi" [(set (match_operand:P 1 "register_operand" "=c") (const_int 0)) (set (match_operand:P 0 "register_operand" "=D") (plus:P (ashift:P (match_operand:P 4 "register_operand" "1") (const_int 2)) (match_operand:P 3 "register_operand" "0"))) (set (mem:BLK (match_dup 3)) (const_int 0)) (use (match_operand:SI 2 "register_operand" "a")) (use (match_dup 4))] "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG]) && ix86_check_no_addr_space (insn)" "%^rep{%;} stos{l|d}" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "store") (set_attr "mode" "SI")]) (define_insn "*rep_stosqi" [(set (match_operand:P 1 "register_operand" "=c") (const_int 0)) (set (match_operand:P 0 "register_operand" "=D") (plus:P (match_operand:P 3 "register_operand" "0") (match_operand:P 4 "register_operand" "1"))) (set (mem:BLK (match_dup 3)) (const_int 0)) (use (match_operand:QI 2 "register_operand" "a")) (use (match_dup 4))] "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG]) && ix86_check_no_addr_space (insn)" "%^rep{%;} stosb" [(set_attr "type" "str") (set_attr "prefix_rep" "1") (set_attr "memory" "store") (set (attr "prefix_rex") (if_then_else (match_test "mode == DImode") (const_string "0") (const_string "*"))) (set_attr "mode" "QI")]) (define_expand "cmpmemsi" [(set (match_operand:SI 0 "register_operand" "") (compare:SI (match_operand:BLK 1 "memory_operand" "") (match_operand:BLK 2 "memory_operand" "") ) ) (use (match_operand 3 "general_operand")) (use (match_operand 4 "immediate_operand"))] "" { if (ix86_expand_cmpstrn_or_cmpmem (operands[0], operands[1], operands[2], operands[3], operands[4], false)) DONE; else FAIL; }) (define_expand "cmpstrnsi" [(set (match_operand:SI 0 "register_operand") (compare:SI (match_operand:BLK 1 "general_operand") (match_operand:BLK 2 "general_operand"))) (use (match_operand 3 "general_operand")) (use (match_operand 4 "immediate_operand"))] "" { if (ix86_expand_cmpstrn_or_cmpmem (operands[0], operands[1], operands[2], operands[3], operands[4], true)) DONE; else FAIL; }) ;; Produce a tri-state integer (-1, 0, 1) from condition codes. (define_expand "cmpintqi" [(set (match_dup 1) (gtu:QI (reg:CC FLAGS_REG) (const_int 0))) (set (match_dup 2) (ltu:QI (reg:CC FLAGS_REG) (const_int 0))) (parallel [(set (match_operand:QI 0 "register_operand") (minus:QI (match_dup 1) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] "" { operands[1] = gen_reg_rtx (QImode); operands[2] = gen_reg_rtx (QImode); }) ;; memcmp recognizers. The `cmpsb' opcode does nothing if the count is ;; zero. Emit extra code to make sure that a zero-length compare is EQ. (define_expand "cmpstrnqi_nz_1" [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_operand 4 "memory_operand") (match_operand 5 "memory_operand"))) (use (match_operand 2 "register_operand")) (use (match_operand:SI 3 "immediate_operand")) (clobber (match_operand 0 "register_operand")) (clobber (match_operand 1 "register_operand")) (clobber (match_dup 2))])] "" { if (TARGET_CLD) ix86_optimize_mode_switching[X86_DIRFLAG] = 1; }) (define_insn "*cmpstrnqi_nz_1" [(set (reg:CC FLAGS_REG) (compare:CC (mem:BLK (match_operand:P 4 "register_operand" "0")) (mem:BLK (match_operand:P 5 "register_operand" "1")))) (use (match_operand:P 6 "register_operand" "2")) (use (match_operand:SI 3 "immediate_operand" "i")) (clobber (match_operand:P 0 "register_operand" "=S")) (clobber (match_operand:P 1 "register_operand" "=D")) (clobber (match_operand:P 2 "register_operand" "=c"))] "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG]) && ix86_check_no_addr_space (insn)" "%^repz{%;} cmpsb" [(set_attr "type" "str") (set_attr "mode" "QI") (set (attr "prefix_rex") (if_then_else (match_test "mode == DImode") (const_string "0") (const_string "*"))) (set_attr "prefix_rep" "1")]) ;; The same, but the count is not known to not be zero. (define_expand "cmpstrnqi_1" [(parallel [(set (reg:CC FLAGS_REG) (if_then_else:CC (ne (match_operand 2 "register_operand") (const_int 0)) (compare:CC (match_operand 4 "memory_operand") (match_operand 5 "memory_operand")) (reg:CC FLAGS_REG))) (use (match_operand:SI 3 "immediate_operand")) (clobber (match_operand 0 "register_operand")) (clobber (match_operand 1 "register_operand")) (clobber (match_dup 2))])] "" { if (TARGET_CLD) ix86_optimize_mode_switching[X86_DIRFLAG] = 1; }) (define_insn "*cmpstrnqi_1" [(set (reg:CC FLAGS_REG) (if_then_else:CC (ne (match_operand:P 6 "register_operand" "2") (const_int 0)) (compare:CC (mem:BLK (match_operand:P 4 "register_operand" "0")) (mem:BLK (match_operand:P 5 "register_operand" "1"))) (reg:CC FLAGS_REG))) (use (match_operand:SI 3 "immediate_operand" "i")) (clobber (match_operand:P 0 "register_operand" "=S")) (clobber (match_operand:P 1 "register_operand" "=D")) (clobber (match_operand:P 2 "register_operand" "=c"))] "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG]) && ix86_check_no_addr_space (insn)" "%^repz{%;} cmpsb" [(set_attr "type" "str") (set_attr "mode" "QI") (set (attr "prefix_rex") (if_then_else (match_test "mode == DImode") (const_string "0") (const_string "*"))) (set_attr "prefix_rep" "1")]) (define_expand "strlen" [(set (match_operand:P 0 "register_operand") (unspec:P [(match_operand:BLK 1 "general_operand") (match_operand:QI 2 "immediate_operand") (match_operand 3 "immediate_operand")] UNSPEC_SCAS))] "" { if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3])) DONE; else FAIL; }) (define_expand "strlenqi_1" [(parallel [(set (match_operand 0 "register_operand") (match_operand 2)) (clobber (match_operand 1 "register_operand")) (clobber (reg:CC FLAGS_REG))])] "" { if (TARGET_CLD) ix86_optimize_mode_switching[X86_DIRFLAG] = 1; }) (define_insn "*strlenqi_1" [(set (match_operand:P 0 "register_operand" "=&c") (unspec:P [(mem:BLK (match_operand:P 5 "register_operand" "1")) (match_operand:QI 2 "register_operand" "a") (match_operand:P 3 "immediate_operand" "i") (match_operand:P 4 "register_operand" "0")] UNSPEC_SCAS)) (clobber (match_operand:P 1 "register_operand" "=D")) (clobber (reg:CC FLAGS_REG))] "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG]) && ix86_check_no_addr_space (insn)" "%^repnz{%;} scasb" [(set_attr "type" "str") (set_attr "mode" "QI") (set (attr "prefix_rex") (if_then_else (match_test "mode == DImode") (const_string "0") (const_string "*"))) (set_attr "prefix_rep" "1")]) ;; Peephole optimizations to clean up after cmpstrn*. This should be ;; handled in combine, but it is not currently up to the task. ;; When used for their truth value, the cmpstrn* expanders generate ;; code like this: ;; ;; repz cmpsb ;; seta %al ;; setb %dl ;; cmpb %al, %dl ;; jcc label ;; ;; The intermediate three instructions are unnecessary. ;; This one handles cmpstrn*_nz_1... (define_peephole2 [(parallel[ (set (reg:CC FLAGS_REG) (compare:CC (mem:BLK (match_operand 4 "register_operand")) (mem:BLK (match_operand 5 "register_operand")))) (use (match_operand 6 "register_operand")) (use (match_operand:SI 3 "immediate_operand")) (clobber (match_operand 0 "register_operand")) (clobber (match_operand 1 "register_operand")) (clobber (match_operand 2 "register_operand"))]) (set (match_operand:QI 7 "register_operand") (gtu:QI (reg:CC FLAGS_REG) (const_int 0))) (set (match_operand:QI 8 "register_operand") (ltu:QI (reg:CC FLAGS_REG) (const_int 0))) (set (reg FLAGS_REG) (compare (match_dup 7) (match_dup 8))) ] "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])" [(parallel[ (set (reg:CC FLAGS_REG) (compare:CC (mem:BLK (match_dup 4)) (mem:BLK (match_dup 5)))) (use (match_dup 6)) (use (match_dup 3)) (clobber (match_dup 0)) (clobber (match_dup 1)) (clobber (match_dup 2))])]) ;; ...and this one handles cmpstrn*_1. (define_peephole2 [(parallel[ (set (reg:CC FLAGS_REG) (if_then_else:CC (ne (match_operand 6 "register_operand") (const_int 0)) (compare:CC (mem:BLK (match_operand 4 "register_operand")) (mem:BLK (match_operand 5 "register_operand"))) (reg:CC FLAGS_REG))) (use (match_operand:SI 3 "immediate_operand")) (clobber (match_operand 0 "register_operand")) (clobber (match_operand 1 "register_operand")) (clobber (match_operand 2 "register_operand"))]) (set (match_operand:QI 7 "register_operand") (gtu:QI (reg:CC FLAGS_REG) (const_int 0))) (set (match_operand:QI 8 "register_operand") (ltu:QI (reg:CC FLAGS_REG) (const_int 0))) (set (reg FLAGS_REG) (compare (match_dup 7) (match_dup 8))) ] "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])" [(parallel[ (set (reg:CC FLAGS_REG) (if_then_else:CC (ne (match_dup 6) (const_int 0)) (compare:CC (mem:BLK (match_dup 4)) (mem:BLK (match_dup 5))) (reg:CC FLAGS_REG))) (use (match_dup 3)) (clobber (match_dup 0)) (clobber (match_dup 1)) (clobber (match_dup 2))])]) ;; Conditional move instructions. (define_expand "movcc" [(set (match_operand:SWIM 0 "register_operand") (if_then_else:SWIM (match_operand 1 "comparison_operator") (match_operand:SWIM 2 "") (match_operand:SWIM 3 "")))] "" "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;") ;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing ;; the register first winds up with `sbbl $0,reg', which is also weird. ;; So just document what we're doing explicitly. (define_expand "x86_movcc_0_m1" [(parallel [(set (match_operand:SWI48 0 "register_operand") (if_then_else:SWI48 (match_operator:SWI48 2 "ix86_carry_flag_operator" [(match_operand 1 "flags_reg_operand") (const_int 0)]) (const_int -1) (const_int 0))) (clobber (reg:CC FLAGS_REG))])]) (define_insn "*x86_movcc_0_m1" [(set (match_operand:SWI48 0 "register_operand" "=r") (if_then_else:SWI48 (match_operator 1 "ix86_carry_flag_operator" [(reg FLAGS_REG) (const_int 0)]) (const_int -1) (const_int 0))) (clobber (reg:CC FLAGS_REG))] "" "sbb{}\t%0, %0" [(set_attr "type" "alu1") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "") (set_attr "length_immediate" "0")]) (define_insn "*x86_movcc_0_m1_se" [(set (match_operand:SWI48 0 "register_operand" "=r") (sign_extract:SWI48 (match_operator 1 "ix86_carry_flag_operator" [(reg FLAGS_REG) (const_int 0)]) (const_int 1) (const_int 0))) (clobber (reg:CC FLAGS_REG))] "" "sbb{}\t%0, %0" [(set_attr "type" "alu1") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "") (set_attr "length_immediate" "0")]) (define_insn "*x86_movcc_0_m1_neg" [(set (match_operand:SWI 0 "register_operand" "=") (neg:SWI (match_operator 1 "ix86_carry_flag_operator" [(reg FLAGS_REG) (const_int 0)]))) (clobber (reg:CC FLAGS_REG))] "" "sbb{}\t%0, %0" [(set_attr "type" "alu1") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "") (set_attr "length_immediate" "0")]) (define_expand "x86_movcc_0_m1_neg" [(parallel [(set (match_operand:SWI48 0 "register_operand") (neg:SWI48 (ltu:SWI48 (reg:CCC FLAGS_REG) (const_int 0)))) (clobber (reg:CC FLAGS_REG))])]) (define_split [(set (match_operand:SWI48 0 "register_operand") (neg:SWI48 (leu:SWI48 (match_operand 1 "int_nonimmediate_operand") (match_operand 2 "const_int_operand"))))] "x86_64_immediate_operand (operands[2], VOIDmode) && INTVAL (operands[2]) != -1 && INTVAL (operands[2]) != 2147483647" [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2))) (set (match_dup 0) (neg:SWI48 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0))))] "operands[2] = GEN_INT (INTVAL (operands[2]) + 1);") (define_split [(set (match_operand:SWI 0 "register_operand") (neg:SWI (eq:SWI (match_operand 1 "int_nonimmediate_operand") (const_int 0))))] "" [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (const_int 1))) (set (match_dup 0) (neg:SWI (ltu:SWI (reg:CC FLAGS_REG) (const_int 0))))]) (define_split [(set (match_operand:SWI 0 "register_operand") (neg:SWI (ne:SWI (match_operand 1 "int_nonimmediate_operand") (const_int 0))))] "" [(set (reg:CCC FLAGS_REG) (unspec:CCC [(match_dup 1) (const_int 0)] UNSPEC_CC_NE)) (set (match_dup 0) (neg:SWI (ltu:SWI (reg:CCC FLAGS_REG) (const_int 0))))]) (define_insn "*movcc_noc" [(set (match_operand:SWI248 0 "register_operand" "=r,r,r,r") (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (match_operand:SWI248 2 "nonimmediate_operand" "rm,0,rm,r") (match_operand:SWI248 3 "nonimmediate_operand" "0,rm,r,rm")))] "TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "@ cmov%O2%C1\t{%2, %0|%0, %2} cmov%O2%c1\t{%3, %0|%0, %3} cmov%O2%C1\t{%2, %3, %0|%0, %3, %2} cmov%O2%c1\t{%3, %2, %0|%0, %2, %3}" [(set_attr "isa" "*,*,apx_ndd,apx_ndd") (set_attr "type" "icmov") (set_attr "mode" "")]) (define_insn "*movsicc_noc_zext" [(set (match_operand:DI 0 "register_operand" "=r,r,r,r") (if_then_else:DI (match_operator 1 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (zero_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm,0,rm,r")) (zero_extend:DI (match_operand:SI 3 "nonimmediate_operand" "0,rm,r,rm"))))] "TARGET_64BIT && TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "@ cmov%O2%C1\t{%2, %k0|%k0, %2} cmov%O2%c1\t{%3, %k0|%k0, %3} cmov%O2%C1\t{%2, %3, %k0|%k0, %3, %2} cmov%O2%c1\t{%3, %2, %k0|%k0, %2, %3}" [(set_attr "isa" "*,*,apx_ndd,apx_ndd") (set_attr "type" "icmov") (set_attr "mode" "SI")]) (define_insn "*movsicc_noc_zext_1" [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,r") (zero_extend:DI (if_then_else:SI (match_operator 1 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (match_operand:SI 2 "nonimmediate_operand" "rm,0,rm,r") (match_operand:SI 3 "nonimmediate_operand" "0,rm,r,rm"))))] "TARGET_64BIT && TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "@ cmov%O2%C1\t{%2, %k0|%k0, %2} cmov%O2%c1\t{%3, %k0|%k0, %3} cmov%O2%C1\t{%2, %3, %k0|%k0, %3, %2} cmov%O2%c1\t{%3, %2, %k0|%k0, %2, %3}" [(set_attr "isa" "*,*,apx_ndd,apx_ndd") (set_attr "type" "icmov") (set_attr "mode" "SI")]) ;; Don't do conditional moves with memory inputs. This splitter helps ;; register starved x86_32 by forcing inputs into registers before reload. (define_split [(set (match_operand:SWI248 0 "register_operand") (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (match_operand:SWI248 2 "nonimmediate_operand") (match_operand:SWI248 3 "nonimmediate_operand")))] "!TARGET_64BIT && TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE && (MEM_P (operands[2]) || MEM_P (operands[3])) && can_create_pseudo_p () && optimize_insn_for_speed_p ()" [(set (match_dup 0) (if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 3)))] { operands[2] = force_reg (mode, operands[2]); operands[3] = force_reg (mode, operands[3]); }) (define_insn "*movqicc_noc" [(set (match_operand:QI 0 "register_operand" "=r,r,r") (if_then_else:QI (match_operator 1 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (match_operand:QI 2 "register_operand" "r,0,r") (match_operand:QI 3 "register_operand" "0,r,r")))] "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL" "#" [(set_attr "isa" "*,*,apx_ndd") (set_attr "type" "icmov") (set_attr "mode" "QI")]) (define_split [(set (match_operand:SWI12 0 "register_operand") (if_then_else:SWI12 (match_operator 1 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (match_operand:SWI12 2 "register_operand") (match_operand:SWI12 3 "register_operand")))] "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL && reload_completed" [(set (match_dup 0) (if_then_else:SI (match_dup 1) (match_dup 2) (match_dup 3)))] { operands[0] = gen_lowpart (SImode, operands[0]); operands[2] = gen_lowpart (SImode, operands[2]); operands[3] = gen_lowpart (SImode, operands[3]); }) ;; Don't do conditional moves with memory inputs (define_peephole2 [(match_scratch:SWI248 4 "r") (set (match_operand:SWI248 0 "register_operand") (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (match_operand:SWI248 2 "nonimmediate_operand") (match_operand:SWI248 3 "nonimmediate_operand")))] "TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE && (MEM_P (operands[2]) || MEM_P (operands[3])) && optimize_insn_for_speed_p ()" [(set (match_dup 4) (match_dup 5)) (set (match_dup 0) (if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 3)))] { if (MEM_P (operands[2])) { operands[5] = operands[2]; operands[2] = operands[4]; } else if (MEM_P (operands[3])) { operands[5] = operands[3]; operands[3] = operands[4]; } else gcc_unreachable (); }) (define_peephole2 [(match_scratch:SI 4 "r") (set (match_operand:DI 0 "register_operand") (if_then_else:DI (match_operator 1 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (zero_extend:DI (match_operand:SI 2 "nonimmediate_operand")) (zero_extend:DI (match_operand:SI 3 "nonimmediate_operand"))))] "TARGET_64BIT && TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE && (MEM_P (operands[2]) || MEM_P (operands[3])) && optimize_insn_for_speed_p ()" [(set (match_dup 4) (match_dup 5)) (set (match_dup 0) (if_then_else:DI (match_dup 1) (zero_extend:DI (match_dup 2)) (zero_extend:DI (match_dup 3))))] { if (MEM_P (operands[2])) { operands[5] = operands[2]; operands[2] = operands[4]; } else if (MEM_P (operands[3])) { operands[5] = operands[3]; operands[3] = operands[4]; } else gcc_unreachable (); }) ;; Eliminate a reg-reg mov by inverting the condition of a cmov (#1). ;; mov r0,r1; dec r0; mov r2,r3; cmov r0,r2 -> dec r1; mov r0,r3; cmov r0, r1 (define_peephole2 [(set (match_operand:SWI248 0 "general_reg_operand") (match_operand:SWI248 1 "general_reg_operand")) (parallel [(set (reg FLAGS_REG) (match_operand 5)) (set (match_dup 0) (match_operand:SWI248 6))]) (set (match_operand:SWI248 2 "general_reg_operand") (match_operand:SWI248 3 "general_gr_operand")) (set (match_dup 0) (if_then_else:SWI248 (match_operator 4 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (match_dup 0) (match_dup 2)))] "TARGET_CMOVE && REGNO (operands[2]) != REGNO (operands[0]) && REGNO (operands[2]) != REGNO (operands[1]) && peep2_reg_dead_p (1, operands[1]) && peep2_reg_dead_p (4, operands[2]) && !reg_overlap_mentioned_p (operands[0], operands[3])" [(parallel [(set (match_dup 7) (match_dup 8)) (set (match_dup 1) (match_dup 9))]) (set (match_dup 0) (match_dup 3)) (set (match_dup 0) (if_then_else:SWI248 (match_dup 4) (match_dup 1) (match_dup 0)))] { operands[7] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0)); operands[8] = ix86_replace_reg_with_reg (operands[5], operands[0], operands[1]); operands[9] = ix86_replace_reg_with_reg (operands[6], operands[0], operands[1]); }) ;; Eliminate a reg-reg mov by inverting the condition of a cmov (#2). ;; mov r2,r3; mov r0,r1; dec r0; cmov r0,r2 -> dec r1; mov r0,r3; cmov r0, r1 (define_peephole2 [(set (match_operand:SWI248 2 "general_reg_operand") (match_operand:SWI248 3 "general_gr_operand")) (set (match_operand:SWI248 0 "general_reg_operand") (match_operand:SWI248 1 "general_reg_operand")) (parallel [(set (reg FLAGS_REG) (match_operand 5)) (set (match_dup 0) (match_operand:SWI248 6))]) (set (match_dup 0) (if_then_else:SWI248 (match_operator 4 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (match_dup 0) (match_dup 2)))] "TARGET_CMOVE && REGNO (operands[2]) != REGNO (operands[0]) && REGNO (operands[2]) != REGNO (operands[1]) && peep2_reg_dead_p (2, operands[1]) && peep2_reg_dead_p (4, operands[2]) && !reg_overlap_mentioned_p (operands[0], operands[3]) && !reg_mentioned_p (operands[2], operands[6])" [(parallel [(set (match_dup 7) (match_dup 8)) (set (match_dup 1) (match_dup 9))]) (set (match_dup 0) (match_dup 3)) (set (match_dup 0) (if_then_else:SWI248 (match_dup 4) (match_dup 1) (match_dup 0)))] { operands[7] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (2)), 0, 0)); operands[8] = ix86_replace_reg_with_reg (operands[5], operands[0], operands[1]); operands[9] = ix86_replace_reg_with_reg (operands[6], operands[0], operands[1]); }) (define_insn "movhf_mask" [(set (match_operand:HF 0 "nonimmediate_operand" "=v,m,v") (unspec:HF [(match_operand:HF 1 "nonimmediate_operand" "m,v,v") (match_operand:HF 2 "nonimm_or_0_operand" "0C,0C,0C") (match_operand:QI 3 "register_operand" "Yk,Yk,Yk")] UNSPEC_MOVCC_MASK))] "TARGET_AVX512FP16" "@ vmovsh\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1} vmovsh\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1} vmovsh\t{%d1, %0%{%3%}%N2|%0%{%3%}%N2, %d1}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") (set_attr "mode" "HF")]) (define_expand "movhfcc" [(set (match_operand:HF 0 "register_operand") (if_then_else:HF (match_operand 1 "comparison_operator") (match_operand:HF 2 "register_operand") (match_operand:HF 3 "register_operand")))] "TARGET_AVX512FP16" "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;") (define_expand "movcc" [(set (match_operand:X87MODEF 0 "register_operand") (if_then_else:X87MODEF (match_operand 1 "comparison_operator") (match_operand:X87MODEF 2 "register_operand") (match_operand:X87MODEF 3 "register_operand")))] "(TARGET_80387 && TARGET_CMOVE) || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;") (define_insn "*movxfcc_1" [(set (match_operand:XF 0 "register_operand" "=f,f") (if_then_else:XF (match_operator 1 "fcmov_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (match_operand:XF 2 "register_operand" "f,0") (match_operand:XF 3 "register_operand" "0,f")))] "TARGET_80387 && TARGET_CMOVE" "@ fcmov%F1\t{%2, %0|%0, %2} fcmov%f1\t{%3, %0|%0, %3}" [(set_attr "type" "fcmov") (set_attr "mode" "XF")]) (define_insn "*movdfcc_1" [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r,r ,r") (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (match_operand:DF 2 "nonimmediate_operand" "f ,0,rm,0 ,rm,0") (match_operand:DF 3 "nonimmediate_operand" "0 ,f,0 ,rm,0, rm")))] "TARGET_80387 && TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "@ fcmov%F1\t{%2, %0|%0, %2} fcmov%f1\t{%3, %0|%0, %3} # # cmov%O2%C1\t{%2, %0|%0, %2} cmov%O2%c1\t{%3, %0|%0, %3}" [(set_attr "isa" "*,*,nox64,nox64,x64,x64") (set_attr "type" "fcmov,fcmov,multi,multi,icmov,icmov") (set_attr "mode" "DF,DF,DI,DI,DI,DI")]) (define_split [(set (match_operand:DF 0 "general_reg_operand") (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (match_operand:DF 2 "nonimmediate_operand") (match_operand:DF 3 "nonimmediate_operand")))] "!TARGET_64BIT && reload_completed" [(set (match_dup 2) (if_then_else:SI (match_dup 1) (match_dup 4) (match_dup 5))) (set (match_dup 3) (if_then_else:SI (match_dup 1) (match_dup 6) (match_dup 7)))] { split_double_mode (DImode, &operands[2], 2, &operands[4], &operands[6]); split_double_mode (DImode, &operands[0], 1, &operands[2], &operands[3]); }) (define_insn "*movsfcc_1_387" [(set (match_operand:SF 0 "register_operand" "=f,f,r,r") (if_then_else:SF (match_operator 1 "fcmov_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (match_operand:SF 2 "nonimmediate_operand" "f,0,rm,0") (match_operand:SF 3 "nonimmediate_operand" "0,f,0,rm")))] "TARGET_80387 && TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "@ fcmov%F1\t{%2, %0|%0, %2} fcmov%f1\t{%3, %0|%0, %3} cmov%O2%C1\t{%2, %0|%0, %2} cmov%O2%c1\t{%3, %0|%0, %3}" [(set_attr "type" "fcmov,fcmov,icmov,icmov") (set_attr "mode" "SF,SF,SI,SI")]) ;; Don't do conditional moves with memory inputs. This splitter helps ;; register starved x86_32 by forcing inputs into registers before reload. (define_split [(set (match_operand:MODEF 0 "register_operand") (if_then_else:MODEF (match_operator 1 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (match_operand:MODEF 2 "nonimmediate_operand") (match_operand:MODEF 3 "nonimmediate_operand")))] "!TARGET_64BIT && TARGET_80387 && TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE && (MEM_P (operands[2]) || MEM_P (operands[3])) && can_create_pseudo_p () && optimize_insn_for_speed_p ()" [(set (match_dup 0) (if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 3)))] { operands[2] = force_reg (mode, operands[2]); operands[3] = force_reg (mode, operands[3]); }) ;; Don't do conditional moves with memory inputs (define_peephole2 [(match_scratch:MODEF 4 "r") (set (match_operand:MODEF 0 "general_reg_operand") (if_then_else:MODEF (match_operator 1 "fcmov_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (match_operand:MODEF 2 "nonimmediate_operand") (match_operand:MODEF 3 "nonimmediate_operand")))] "(mode != DFmode || TARGET_64BIT) && TARGET_80387 && TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE && (MEM_P (operands[2]) || MEM_P (operands[3])) && optimize_insn_for_speed_p ()" [(set (match_dup 4) (match_dup 5)) (set (match_dup 0) (if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 3)))] { if (MEM_P (operands[2])) { operands[5] = operands[2]; operands[2] = operands[4]; } else if (MEM_P (operands[3])) { operands[5] = operands[3]; operands[3] = operands[4]; } else gcc_unreachable (); }) ;; All moves in XOP pcmov instructions are 128 bits and hence we restrict ;; the scalar versions to have only XMM registers as operands. ;; XOP conditional move (define_insn "*xop_pcmov_" [(set (match_operand:MODEF 0 "register_operand" "=x") (if_then_else:MODEF (match_operand:MODEF 1 "register_operand" "x") (match_operand:MODEF 2 "register_operand" "x") (match_operand:MODEF 3 "register_operand" "x")))] "TARGET_XOP" "vpcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}" [(set_attr "type" "sse4arg") (set_attr "mode" "TI")]) ;; These versions of the min/max patterns are intentionally ignorant of ;; their behavior wrt -0.0 and NaN (via the commutative operand mark). ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator ;; are undefined in this condition, we're certain this is correct. (define_insn "3" [(set (match_operand:MODEF 0 "register_operand" "=x,v") (smaxmin:MODEF (match_operand:MODEF 1 "nonimmediate_operand" "%0,v") (match_operand:MODEF 2 "nonimmediate_operand" "xm,vm")))] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" "@ \t{%2, %0|%0, %2} v\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "prefix" "orig,vex") (set_attr "type" "sseadd") (set_attr "mode" "")]) (define_insn "hf3" [(set (match_operand:HF 0 "register_operand" "=v") (smaxmin:HF (match_operand:HF 1 "nonimmediate_operand" "%v") (match_operand:HF 2 "nonimmediate_operand" "vm")))] "TARGET_AVX512FP16" "vsh\t{%2, %1, %0|%0, %1, %2}" [(set_attr "prefix" "evex") (set_attr "type" "sseadd") (set_attr "mode" "HF")]) ;; These versions of the min/max patterns implement exactly the operations ;; min = (op1 < op2 ? op1 : op2) ;; max = (!(op1 < op2) ? op1 : op2) ;; Their operands are not commutative, and thus they may be used in the ;; presence of -0.0 and NaN. (define_insn "*ieee_shf3" [(set (match_operand:HF 0 "register_operand" "=v") (unspec:HF [(match_operand:HF 1 "register_operand" "v") (match_operand:HF 2 "nonimmediate_operand" "vm")] IEEE_MAXMIN))] "TARGET_AVX512FP16" "vsh\t{%2, %1, %0|%0, %1, %2}" [(set_attr "prefix" "evex") (set_attr "type" "sseadd") (set_attr "mode" "HF")]) (define_insn "*ieee_s3" [(set (match_operand:MODEF 0 "register_operand" "=x,v") (unspec:MODEF [(match_operand:MODEF 1 "register_operand" "0,v") (match_operand:MODEF 2 "nonimmediate_operand" "xm,vm")] IEEE_MAXMIN))] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" "@ \t{%2, %0|%0, %2} v\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "prefix" "orig,maybe_evex") (set_attr "type" "sseadd") (set_attr "mode" "")]) ;; Operands order in min/max instruction matters for signed zero and NANs. (define_insn_and_split "*ieee_max3_1" [(set (match_operand:MODEF 0 "register_operand") (unspec:MODEF [(match_operand:MODEF 1 "register_operand") (match_operand:MODEF 2 "register_operand") (lt:MODEF (match_operand:MODEF 3 "register_operand") (match_operand:MODEF 4 "register_operand"))] UNSPEC_BLENDV))] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && (rtx_equal_p (operands[1], operands[3]) && rtx_equal_p (operands[2], operands[4])) && ix86_pre_reload_split ()" "#" "&& 1" [(set (match_dup 0) (unspec:MODEF [(match_dup 2) (match_dup 1)] UNSPEC_IEEE_MAX))]) (define_insn_and_split "*ieee_min3_1" [(set (match_operand:MODEF 0 "register_operand") (unspec:MODEF [(match_operand:MODEF 1 "register_operand") (match_operand:MODEF 2 "register_operand") (lt:MODEF (match_operand:MODEF 3 "register_operand") (match_operand:MODEF 4 "register_operand"))] UNSPEC_BLENDV))] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && (rtx_equal_p (operands[1], operands[4]) && rtx_equal_p (operands[2], operands[3])) && ix86_pre_reload_split ()" "#" "&& 1" [(set (match_dup 0) (unspec:MODEF [(match_dup 2) (match_dup 1)] UNSPEC_IEEE_MIN))]) ;; Make two stack loads independent: ;; fld aa fld aa ;; fld %st(0) -> fld bb ;; fmul bb fmul %st(1), %st ;; ;; Actually we only match the last two instructions for simplicity. (define_peephole2 [(set (match_operand 0 "fp_register_operand") (match_operand 1 "fp_register_operand")) (set (match_dup 0) (match_operator 2 "binary_fp_operator" [(match_dup 0) (match_operand 3 "memory_operand")]))] "REGNO (operands[0]) != REGNO (operands[1])" [(set (match_dup 0) (match_dup 3)) (set (match_dup 0) (match_op_dup 2 [(match_dup 5) (match_dup 4)]))] { operands[4] = operands[0]; operands[5] = operands[1]; /* The % modifier is not operational anymore in peephole2's, so we have to swap the operands manually in the case of addition and multiplication. */ if (COMMUTATIVE_ARITH_P (operands[2])) std::swap (operands[4], operands[5]); }) (define_peephole2 [(set (match_operand 0 "fp_register_operand") (match_operand 1 "fp_register_operand")) (set (match_dup 0) (match_operator 2 "binary_fp_operator" [(match_operand 3 "memory_operand") (match_dup 0)]))] "REGNO (operands[0]) != REGNO (operands[1])" [(set (match_dup 0) (match_dup 3)) (set (match_dup 0) (match_op_dup 2 [(match_dup 4) (match_dup 5)]))] { operands[4] = operands[0]; operands[5] = operands[1]; /* The % modifier is not operational anymore in peephole2's, so we have to swap the operands manually in the case of addition and multiplication. */ if (COMMUTATIVE_ARITH_P (operands[2])) std::swap (operands[4], operands[5]); }) ;; Conditional addition patterns (define_expand "addcc" [(match_operand:SWI 0 "register_operand") (match_operand 1 "ordered_comparison_operator") (match_operand:SWI 2 "register_operand") (match_operand:SWI 3 "const_int_operand")] "" "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;") ;; min/max patterns (define_code_attr maxmin_rel [(smax "GE") (smin "LE") (umax "GEU") (umin "LEU")]) (define_expand "3" [(parallel [(set (match_operand:SDWIM 0 "register_operand") (maxmin:SDWIM (match_operand:SDWIM 1 "register_operand") (match_operand:SDWIM 2 "general_operand"))) (clobber (reg:CC FLAGS_REG))])] "TARGET_CMOVE && (mode != QImode || !TARGET_PARTIAL_REG_STALL)") (define_insn_and_split "*3_doubleword" [(set (match_operand: 0 "register_operand") (maxmin: (match_operand: 1 "register_operand") (match_operand: 2 "general_operand"))) (clobber (reg:CC FLAGS_REG))] "TARGET_CMOVE && ix86_pre_reload_split ()" "#" "&& 1" [(set (match_dup 0) (if_then_else:DWIH (match_dup 6) (match_dup 1) (match_dup 2))) (set (match_dup 3) (if_then_else:DWIH (match_dup 6) (match_dup 4) (match_dup 5)))] { operands[2] = force_reg (mode, operands[2]); split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]); rtx cmplo[2] = { operands[1], operands[2] }; rtx cmphi[2] = { operands[4], operands[5] }; enum rtx_code code = ; switch (code) { case LE: case LEU: std::swap (cmplo[0], cmplo[1]); std::swap (cmphi[0], cmphi[1]); code = swap_condition (code); /* FALLTHRU */ case GE: case GEU: { bool uns = (code == GEU); rtx (*sbb_insn) (machine_mode, rtx, rtx, rtx) = uns ? gen_sub3_carry_ccc : gen_sub3_carry_ccgz; emit_insn (gen_cmp_1 (mode, cmplo[0], cmplo[1])); rtx tmp = gen_rtx_SCRATCH (mode); emit_insn (sbb_insn (mode, tmp, cmphi[0], cmphi[1])); rtx flags = gen_rtx_REG (uns ? CCCmode : CCGZmode, FLAGS_REG); operands[6] = gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx); break; } default: gcc_unreachable (); } }) (define_insn_and_split "*3_1" [(set (match_operand:SWI 0 "register_operand") (maxmin:SWI (match_operand:SWI 1 "register_operand") (match_operand:SWI 2 "general_operand"))) (clobber (reg:CC FLAGS_REG))] "TARGET_CMOVE && (mode != QImode || !TARGET_PARTIAL_REG_STALL) && ix86_pre_reload_split ()" "#" "&& 1" [(set (match_dup 0) (if_then_else:SWI (match_dup 3) (match_dup 1) (match_dup 2)))] { machine_mode mode = mode; rtx cmp_op = operands[2]; operands[2] = force_reg (mode, cmp_op); enum rtx_code code = ; if (cmp_op == const1_rtx) { /* Convert smax (x, 1) into (x > 0 ? x : 1). Convert umax (x, 1) into (x != 0 ? x : 1). Convert ?min (x, 1) into (x <= 0 ? x : 1). */ cmp_op = const0_rtx; if (code == GE) code = GT; else if (code == GEU) code = NE; } /* Convert smin (x, -1) into (x < 0 ? x : -1). */ else if (cmp_op == constm1_rtx && code == LE) { cmp_op = const0_rtx; code = LT; } /* Convert smax (x, -1) into (x >= 0 ? x : -1). */ else if (cmp_op == constm1_rtx && code == GE) cmp_op = const0_rtx; else if (cmp_op != const0_rtx) cmp_op = operands[2]; machine_mode cmpmode = SELECT_CC_MODE (code, operands[1], cmp_op); rtx flags = gen_rtx_REG (cmpmode, FLAGS_REG); rtx tmp = gen_rtx_COMPARE (cmpmode, operands[1], cmp_op); emit_insn (gen_rtx_SET (flags, tmp)); operands[3] = gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx); }) ;; Avoid clearing a register between a flags setting comparison and its use, ;; i.e. prefer "xorl %eax,%eax; test/cmp" over "test/cmp; movl $0, %eax". (define_peephole2 [(set (reg FLAGS_REG) (match_operand 0)) (set (match_operand:SWI 1 "general_reg_operand") (const_int 0))] "peep2_regno_dead_p (0, FLAGS_REG) && !reg_overlap_mentioned_p (operands[1], operands[0])" [(set (match_dup 2) (match_dup 0))] { operands[2] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG); ix86_expand_clear (operands[1]); }) ;; When optimizing for size, zeroing memory should use a register. (define_peephole2 [(match_scratch:SWI48 0 "r") (set (match_operand:SWI48 1 "memory_operand") (const_int 0)) (set (match_operand:SWI48 2 "memory_operand") (const_int 0)) (set (match_operand:SWI48 3 "memory_operand") (const_int 0)) (set (match_operand:SWI48 4 "memory_operand") (const_int 0))] "optimize_insn_for_size_p () && peep2_regno_dead_p (0, FLAGS_REG)" [(const_int 0)] { ix86_expand_clear (operands[0]); emit_move_insn (operands[1], operands[0]); emit_move_insn (operands[2], operands[0]); emit_move_insn (operands[3], operands[0]); ix86_last_zero_store_uid = INSN_UID (emit_move_insn (operands[4], operands[0])); DONE; }) (define_peephole2 [(match_scratch:SWI48 0 "r") (set (match_operand:SWI48 1 "memory_operand") (const_int 0)) (set (match_operand:SWI48 2 "memory_operand") (const_int 0))] "optimize_insn_for_size_p () && peep2_regno_dead_p (0, FLAGS_REG)" [(const_int 0)] { ix86_expand_clear (operands[0]); emit_move_insn (operands[1], operands[0]); ix86_last_zero_store_uid = INSN_UID (emit_move_insn (operands[2], operands[0])); DONE; }) (define_peephole2 [(match_scratch:SWI48 0 "r") (set (match_operand:SWI48 1 "memory_operand") (const_int 0))] "optimize_insn_for_size_p () && peep2_regno_dead_p (0, FLAGS_REG)" [(const_int 0)] { ix86_expand_clear (operands[0]); ix86_last_zero_store_uid = INSN_UID (emit_move_insn (operands[1], operands[0])); DONE; }) (define_peephole2 [(set (match_operand:SWI48 5 "memory_operand") (match_operand:SWI48 0 "general_reg_operand")) (set (match_operand:SWI48 1 "memory_operand") (const_int 0)) (set (match_operand:SWI48 2 "memory_operand") (const_int 0)) (set (match_operand:SWI48 3 "memory_operand") (const_int 0)) (set (match_operand:SWI48 4 "memory_operand") (const_int 0))] "optimize_insn_for_size_p () && INSN_UID (peep2_next_insn (0)) == ix86_last_zero_store_uid" [(const_int 0)] { emit_move_insn (operands[5], operands[0]); emit_move_insn (operands[1], operands[0]); emit_move_insn (operands[2], operands[0]); emit_move_insn (operands[3], operands[0]); ix86_last_zero_store_uid = INSN_UID (emit_move_insn (operands[4], operands[0])); DONE; }) (define_peephole2 [(set (match_operand:SWI48 3 "memory_operand") (match_operand:SWI48 0 "general_reg_operand")) (set (match_operand:SWI48 1 "memory_operand") (const_int 0)) (set (match_operand:SWI48 2 "memory_operand") (const_int 0))] "optimize_insn_for_size_p () && INSN_UID (peep2_next_insn (0)) == ix86_last_zero_store_uid" [(const_int 0)] { emit_move_insn (operands[3], operands[0]); emit_move_insn (operands[1], operands[0]); ix86_last_zero_store_uid = INSN_UID (emit_move_insn (operands[2], operands[0])); DONE; }) (define_peephole2 [(set (match_operand:SWI48 2 "memory_operand") (match_operand:SWI48 0 "general_reg_operand")) (set (match_operand:SWI48 1 "memory_operand") (const_int 0))] "optimize_insn_for_size_p () && INSN_UID (peep2_next_insn (0)) == ix86_last_zero_store_uid" [(const_int 0)] { emit_move_insn (operands[2], operands[0]); ix86_last_zero_store_uid = INSN_UID (emit_move_insn (operands[1], operands[0])); DONE; }) ;; Reload dislikes loading constants directly into class_likely_spilled ;; hard registers. Try to tidy things up here. (define_peephole2 [(set (match_operand:SWI 0 "general_reg_operand") (match_operand:SWI 1 "x86_64_general_operand")) (set (match_operand:SWI 2 "general_reg_operand") (match_dup 0))] "peep2_reg_dead_p (2, operands[0])" [(set (match_dup 2) (match_dup 1))]) ;; Misc patterns (?) ;; This pattern exists to put a dependency on all ebp-based memory accesses. ;; Otherwise there will be nothing to keep ;; ;; [(set (reg ebp) (reg esp))] ;; [(set (reg esp) (plus (reg esp) (const_int -160000))) ;; (clobber (eflags)] ;; [(set (mem (plus (reg ebp) (const_int -160000))) (const_int 0))] ;; ;; in proper program order. (define_insn "@pro_epilogue_adjust_stack_add_" [(set (match_operand:P 0 "register_operand" "=r,r") (plus:P (match_operand:P 1 "register_operand" "0,r") (match_operand:P 2 "" "r,l"))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))] "" { switch (get_attr_type (insn)) { case TYPE_IMOV: return "mov{}\t{%1, %0|%0, %1}"; case TYPE_ALU: gcc_assert (rtx_equal_p (operands[0], operands[1])); if (x86_maybe_negate_const_int (&operands[2], mode)) return "sub{}\t{%2, %0|%0, %2}"; return "add{}\t{%2, %0|%0, %2}"; default: operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); return "lea{}\t{%E2, %0|%0, %E2}"; } } [(set (attr "type") (cond [(and (eq_attr "alternative" "0") (not (match_test "TARGET_OPT_AGU"))) (const_string "alu") (match_operand: 2 "const0_operand") (const_string "imov") ] (const_string "lea"))) (set (attr "length_immediate") (cond [(eq_attr "type" "imov") (const_string "0") (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) (const_string "1") ] (const_string "*"))) (set_attr "mode" "")]) (define_insn "@pro_epilogue_adjust_stack_sub_" [(set (match_operand:P 0 "register_operand" "=r") (minus:P (match_operand:P 1 "register_operand" "0") (match_operand:P 2 "register_operand" "r"))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))] "" "sub{}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") (set_attr "mode" "")]) (define_insn "@allocate_stack_worker_probe_" [(set (match_operand:P 0 "register_operand" "=a") (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")] UNSPECV_STACK_PROBE)) (clobber (reg:CC FLAGS_REG))] "ix86_target_stack_probe ()" "call\t___chkstk_ms" [(set_attr "type" "multi") (set_attr "length" "5")]) (define_expand "allocate_stack" [(match_operand 0 "register_operand") (match_operand 1 "general_operand")] "ix86_target_stack_probe ()" { rtx x; #ifndef CHECK_STACK_LIMIT #define CHECK_STACK_LIMIT 0 #endif if (CHECK_STACK_LIMIT && CONST_INT_P (operands[1]) && INTVAL (operands[1]) < CHECK_STACK_LIMIT) x = operands[1]; else { x = copy_to_mode_reg (Pmode, operands[1]); emit_insn (gen_allocate_stack_worker_probe (Pmode, x, x)); } x = expand_simple_binop (Pmode, MINUS, stack_pointer_rtx, x, stack_pointer_rtx, 0, OPTAB_DIRECT); if (x != stack_pointer_rtx) emit_move_insn (stack_pointer_rtx, x); emit_move_insn (operands[0], virtual_stack_dynamic_rtx); DONE; }) (define_expand "probe_stack" [(match_operand 0 "memory_operand")] "" { emit_insn (gen_probe_stack_1 (word_mode, operands[0], const0_rtx)); DONE; }) ;; Use OR for stack probes, this is shorter. (define_insn "@probe_stack_1_" [(set (match_operand:W 0 "memory_operand" "=m") (unspec:W [(match_operand:W 1 "const0_operand")] UNSPEC_PROBE_STACK)) (clobber (reg:CC FLAGS_REG))] "" "or{}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "mode" "") (set_attr "length_immediate" "1")]) (define_insn "@adjust_stack_and_probe_" [(set (match_operand:P 0 "register_operand" "=r") (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")] UNSPECV_PROBE_STACK_RANGE)) (set (reg:P SP_REG) (minus:P (reg:P SP_REG) (match_operand:P 2 "const_int_operand"))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))] "" "* return output_adjust_stack_and_probe (operands[0]);" [(set_attr "type" "multi")]) (define_insn "@probe_stack_range_" [(set (match_operand:P 0 "register_operand" "=r") (unspec_volatile:P [(match_operand:P 1 "register_operand" "0") (match_operand:P 2 "const_int_operand")] UNSPECV_PROBE_STACK_RANGE)) (clobber (reg:CC FLAGS_REG))] "" "* return output_probe_stack_range (operands[0], operands[2]);" [(set_attr "type" "multi")]) (define_expand "builtin_setjmp_receiver" [(label_ref (match_operand 0))] "!TARGET_64BIT && flag_pic" { #if TARGET_MACHO if (TARGET_MACHO) { rtx xops[3]; rtx_code_label *label_rtx = gen_label_rtx (); emit_insn (gen_set_got_labelled (pic_offset_table_rtx, label_rtx)); xops[0] = xops[1] = pic_offset_table_rtx; xops[2] = machopic_gen_offset (gen_rtx_LABEL_REF (SImode, label_rtx)); ix86_expand_binary_operator (MINUS, SImode, xops); } else #endif emit_insn (gen_set_got (pic_offset_table_rtx)); DONE; }) (define_expand "save_stack_nonlocal" [(set (match_operand 0 "memory_operand") (match_operand 1 "register_operand"))] "" { rtx stack_slot; if (flag_cf_protection & CF_RETURN) { /* Copy shadow stack pointer to the first slot and stack pointer to the second slot. */ rtx ssp_slot = adjust_address (operands[0], word_mode, 0); stack_slot = adjust_address (operands[0], Pmode, UNITS_PER_WORD); rtx reg_ssp = force_reg (word_mode, const0_rtx); emit_insn (gen_rdssp (word_mode, reg_ssp, reg_ssp)); emit_move_insn (ssp_slot, reg_ssp); } else stack_slot = adjust_address (operands[0], Pmode, 0); emit_move_insn (stack_slot, operands[1]); DONE; }) (define_expand "restore_stack_nonlocal" [(set (match_operand 0 "register_operand" "") (match_operand 1 "memory_operand" ""))] "" { rtx stack_slot; if (flag_cf_protection & CF_RETURN) { /* Restore shadow stack pointer from the first slot and stack pointer from the second slot. */ rtx ssp_slot = adjust_address (operands[1], word_mode, 0); stack_slot = adjust_address (operands[1], Pmode, UNITS_PER_WORD); /* Get the current shadow stack pointer. The code below will check if SHSTK feature is enabled. If it is not enabled the RDSSP instruction is a NOP. */ rtx reg_ssp = force_reg (word_mode, const0_rtx); emit_insn (gen_rdssp (word_mode, reg_ssp, reg_ssp)); /* Compare through subtraction the saved and the current ssp to decide if ssp has to be adjusted. */ reg_ssp = expand_simple_binop (word_mode, MINUS, reg_ssp, ssp_slot, reg_ssp, 1, OPTAB_DIRECT); /* Compare and jump over adjustment code. */ rtx noadj_label = gen_label_rtx (); emit_cmp_and_jump_insns (reg_ssp, const0_rtx, EQ, NULL_RTX, word_mode, 1, noadj_label); /* Compute the number of frames to adjust. */ rtx reg_adj = gen_lowpart (ptr_mode, reg_ssp); rtx reg_adj_neg = expand_simple_unop (ptr_mode, NEG, reg_adj, NULL_RTX, 1); reg_adj = expand_simple_binop (ptr_mode, LSHIFTRT, reg_adj_neg, GEN_INT (exact_log2 (UNITS_PER_WORD)), reg_adj, 1, OPTAB_DIRECT); /* Check if number of frames <= 255 so no loop is needed. */ rtx inc_label = gen_label_rtx (); emit_cmp_and_jump_insns (reg_adj, GEN_INT (255), LEU, NULL_RTX, ptr_mode, 1, inc_label); /* Adjust the ssp in a loop. */ rtx loop_label = gen_label_rtx (); emit_label (loop_label); LABEL_NUSES (loop_label) = 1; rtx reg_255 = force_reg (word_mode, GEN_INT (255)); emit_insn (gen_incssp (word_mode, reg_255)); reg_adj = expand_simple_binop (ptr_mode, MINUS, reg_adj, GEN_INT (255), reg_adj, 1, OPTAB_DIRECT); /* Compare and jump to the loop label. */ emit_cmp_and_jump_insns (reg_adj, GEN_INT (255), GTU, NULL_RTX, ptr_mode, 1, loop_label); emit_label (inc_label); LABEL_NUSES (inc_label) = 1; emit_insn (gen_incssp (word_mode, reg_ssp)); emit_label (noadj_label); LABEL_NUSES (noadj_label) = 1; } else stack_slot = adjust_address (operands[1], Pmode, 0); emit_move_insn (operands[0], stack_slot); DONE; }) (define_expand "stack_protect_set" [(match_operand 0 "memory_operand") (match_operand 1 "memory_operand")] "" { rtx scratch = gen_reg_rtx (word_mode); emit_insn (gen_stack_protect_set_1 (ptr_mode, word_mode, operands[0], operands[1], scratch)); DONE; }) (define_insn "@stack_protect_set_1__" [(set (match_operand:PTR 0 "memory_operand" "=m") (unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")] UNSPEC_SP_SET)) (set (match_operand:W 2 "register_operand" "=&r") (const_int 0)) (clobber (reg:CC FLAGS_REG))] "" { output_asm_insn ("mov{}\t{%1, %2|%2, %1}", operands); output_asm_insn ("mov{}\t{%2, %0|%0, %2}", operands); if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ()) return "xor{l}\t%k2, %k2"; else return "mov{l}\t{$0, %k2|%k2, 0}"; } [(set_attr "type" "multi")]) ;; Patterns and peephole2s to optimize stack_protect_set_1_ ;; immediately followed by *mov{s,d}i_internal, where we can avoid ;; the xor{l} above. We don't split this, so that scheduling or ;; anything else doesn't separate the *stack_protect_set* pattern from ;; the set of the register that overwrites the register with a new value. (define_peephole2 [(parallel [(set (match_operand:PTR 0 "memory_operand") (unspec:PTR [(match_operand:PTR 1 "memory_operand")] UNSPEC_SP_SET)) (set (match_operand 2 "general_reg_operand") (const_int 0)) (clobber (reg:CC FLAGS_REG))]) (set (match_operand 3 "general_reg_operand") (match_operand 4 "const0_operand"))] "GET_MODE (operands[2]) == word_mode && GET_MODE_SIZE (GET_MODE (operands[3])) <= UNITS_PER_WORD && peep2_reg_dead_p (0, operands[3]) && peep2_reg_dead_p (1, operands[2])" [(parallel [(set (match_dup 0) (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET)) (set (match_dup 3) (const_int 0)) (clobber (reg:CC FLAGS_REG))])] "operands[3] = gen_lowpart (word_mode, operands[3]);") (define_insn "*stack_protect_set_2__si" [(set (match_operand:PTR 0 "memory_operand" "=m") (unspec:PTR [(match_operand:PTR 3 "memory_operand" "m")] UNSPEC_SP_SET)) (set (match_operand:SI 1 "register_operand" "=&r") (match_operand:SI 2 "general_operand" "g"))] "reload_completed" { output_asm_insn ("mov{}\t{%3, %1|%1, %3}", operands); output_asm_insn ("mov{}\t{%1, %0|%0, %1}", operands); if (pic_32bit_operand (operands[2], SImode) || ix86_use_lea_for_mov (insn, operands + 1)) return "lea{l}\t{%E2, %1|%1, %E2}"; else return "mov{l}\t{%2, %1|%1, %2}"; } [(set_attr "type" "multi") (set_attr "length" "24")]) (define_insn "*stack_protect_set_2__di" [(set (match_operand:PTR 0 "memory_operand" "=m,m,m") (unspec:PTR [(match_operand:PTR 3 "memory_operand" "m,m,m")] UNSPEC_SP_SET)) (set (match_operand:DI 1 "register_operand" "=&r,&r,&r") (match_operand:DI 2 "general_operand" "Z,rem,i"))] "TARGET_64BIT && reload_completed" { output_asm_insn ("mov{}\t{%3, %1|%1, %3}", operands); output_asm_insn ("mov{}\t{%1, %0|%0, %1}", operands); if (pic_32bit_operand (operands[2], DImode)) return "lea{q}\t{%E2, %1|%1, %E2}"; else if (which_alternative == 0) return "mov{l}\t{%k2, %k1|%k1, %k2}"; else if (which_alternative == 2) return "movabs{q}\t{%2, %1|%1, %2}"; else if (ix86_use_lea_for_mov (insn, operands + 1)) return "lea{q}\t{%E2, %1|%1, %E2}"; else return "mov{q}\t{%2, %1|%1, %2}"; } [(set_attr "type" "multi") (set_attr "length" "24")]) (define_peephole2 [(parallel [(set (match_operand:PTR 0 "memory_operand") (unspec:PTR [(match_operand:PTR 1 "memory_operand")] UNSPEC_SP_SET)) (set (match_operand 2 "general_reg_operand") (const_int 0)) (clobber (reg:CC FLAGS_REG))]) (set (match_operand:SWI48 3 "general_reg_operand") (match_operand:SWI48 4 "general_gr_operand"))] "GET_MODE (operands[2]) == word_mode && peep2_reg_dead_p (0, operands[3]) && peep2_reg_dead_p (1, operands[2])" [(parallel [(set (match_dup 0) (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET)) (set (match_dup 3) (match_dup 4))])]) (define_peephole2 [(set (match_operand:SWI48 3 "general_reg_operand") (match_operand:SWI48 4 "general_gr_operand")) (parallel [(set (match_operand:PTR 0 "memory_operand") (unspec:PTR [(match_operand:PTR 1 "memory_operand")] UNSPEC_SP_SET)) (set (match_operand 2 "general_reg_operand") (const_int 0)) (clobber (reg:CC FLAGS_REG))])] "GET_MODE (operands[2]) == word_mode && peep2_reg_dead_p (0, operands[3]) && peep2_reg_dead_p (2, operands[2]) && !reg_mentioned_p (operands[3], operands[0]) && !reg_mentioned_p (operands[3], operands[1])" [(parallel [(set (match_dup 0) (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET)) (set (match_dup 3) (match_dup 4))])]) (define_insn "*stack_protect_set_3__" [(set (match_operand:PTR 0 "memory_operand" "=m") (unspec:PTR [(match_operand:PTR 3 "memory_operand" "m")] UNSPEC_SP_SET)) (set (match_operand:SWI48 1 "register_operand" "=&r") (match_operand:SWI48 2 "address_no_seg_operand" "Ts"))] "" { output_asm_insn ("mov{}\t{%3, %1|%1, %3}", operands); output_asm_insn ("mov{}\t{%1, %0|%0, %1}", operands); if (SImode_address_operand (operands[2], VOIDmode)) { gcc_assert (TARGET_64BIT); return "lea{l}\t{%E2, %k1|%k1, %E2}"; } else return "lea{}\t{%E2, %1|%1, %E2}"; } [(set_attr "type" "multi") (set_attr "length" "24")]) (define_peephole2 [(parallel [(set (match_operand:PTR 0 "memory_operand") (unspec:PTR [(match_operand:PTR 1 "memory_operand")] UNSPEC_SP_SET)) (set (match_operand 2 "general_reg_operand") (const_int 0)) (clobber (reg:CC FLAGS_REG))]) (set (match_operand:SWI48 3 "general_reg_operand") (match_operand:SWI48 4 "address_no_seg_operand"))] "GET_MODE (operands[2]) == word_mode && peep2_reg_dead_p (0, operands[3]) && peep2_reg_dead_p (1, operands[2])" [(parallel [(set (match_dup 0) (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET)) (set (match_dup 3) (match_dup 4))])]) (define_insn "*stack_protect_set_4z__di" [(set (match_operand:PTR 0 "memory_operand" "=m") (unspec:PTR [(match_operand:PTR 3 "memory_operand" "m")] UNSPEC_SP_SET)) (set (match_operand:DI 1 "register_operand" "=&r") (zero_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm")))] "TARGET_64BIT && reload_completed" { output_asm_insn ("mov{}\t{%3, %1|%1, %3}", operands); output_asm_insn ("mov{}\t{%1, %0|%0, %1}", operands); if (ix86_use_lea_for_mov (insn, operands + 1)) return "lea{l}\t{%E2, %k1|%k1, %E2}"; else return "mov{l}\t{%2, %k1|%k1, %2}"; } [(set_attr "type" "multi") (set_attr "length" "24")]) (define_insn "*stack_protect_set_4s__di" [(set (match_operand:PTR 0 "memory_operand" "=m") (unspec:PTR [(match_operand:PTR 3 "memory_operand" "m")] UNSPEC_SP_SET)) (set (match_operand:DI 1 "register_operand" "=&r") (sign_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm")))] "TARGET_64BIT && reload_completed" { output_asm_insn ("mov{}\t{%3, %1|%1, %3}", operands); output_asm_insn ("mov{}\t{%1, %0|%0, %1}", operands); return "movs{lq|x}\t{%2, %1|%1, %2}"; } [(set_attr "type" "multi") (set_attr "length" "24")]) (define_peephole2 [(parallel [(set (match_operand:PTR 0 "memory_operand") (unspec:PTR [(match_operand:PTR 1 "memory_operand")] UNSPEC_SP_SET)) (set (match_operand 2 "general_reg_operand") (const_int 0)) (clobber (reg:CC FLAGS_REG))]) (set (match_operand:DI 3 "general_reg_operand") (any_extend:DI (match_operand:SI 4 "nonimmediate_gr_operand")))] "TARGET_64BIT && GET_MODE (operands[2]) == word_mode && peep2_reg_dead_p (0, operands[3]) && peep2_reg_dead_p (1, operands[2])" [(parallel [(set (match_dup 0) (unspec:PTR [(match_dup 1)] UNSPEC_SP_SET)) (set (match_dup 3) (any_extend:DI (match_dup 4)))])]) (define_expand "stack_protect_test" [(match_operand 0 "memory_operand") (match_operand 1 "memory_operand") (match_operand 2)] "" { rtx flags = gen_rtx_REG (CCZmode, FLAGS_REG); emit_insn (gen_stack_protect_test_1 (ptr_mode, flags, operands[0], operands[1])); emit_jump_insn (gen_cbranchcc4 (gen_rtx_EQ (VOIDmode, flags, const0_rtx), flags, const0_rtx, operands[2])); DONE; }) (define_insn "@stack_protect_test_1_" [(set (match_operand:CCZ 0 "flags_reg_operand") (unspec:CCZ [(match_operand:PTR 1 "memory_operand" "m") (match_operand:PTR 2 "memory_operand" "m")] UNSPEC_SP_TEST)) (clobber (match_scratch:PTR 3 "=&r"))] "" { output_asm_insn ("mov{}\t{%1, %3|%3, %1}", operands); return "sub{}\t{%2, %3|%3, %2}"; } [(set_attr "type" "multi")]) ;; Avoid redundant prefixes by splitting HImode arithmetic to SImode. ;; Do not split instructions with mask registers. (define_split [(set (match_operand 0 "general_reg_operand") (match_operator 3 "promotable_binary_operator" [(match_operand 1 "general_reg_operand") (match_operand 2 "aligned_operand")])) (clobber (reg:CC FLAGS_REG))] "! TARGET_PARTIAL_REG_STALL && reload_completed && ((GET_MODE (operands[0]) == HImode && ((optimize_function_for_speed_p (cfun) && !TARGET_FAST_PREFIX) /* ??? next two lines just !satisfies_constraint_K (...) */ || !CONST_INT_P (operands[2]) || satisfies_constraint_K (operands[2]))) || (GET_MODE (operands[0]) == QImode && (TARGET_PROMOTE_QImode || optimize_function_for_size_p (cfun))))" [(parallel [(set (match_dup 0) (match_op_dup 3 [(match_dup 1) (match_dup 2)])) (clobber (reg:CC FLAGS_REG))])] { operands[0] = gen_lowpart (SImode, operands[0]); operands[1] = gen_lowpart (SImode, operands[1]); if (GET_CODE (operands[3]) != ASHIFT) operands[2] = gen_lowpart (SImode, operands[2]); operands[3] = shallow_copy_rtx (operands[3]); PUT_MODE (operands[3], SImode); }) ; Promote the QImode tests, as i386 has encoding of the AND ; instruction with 32-bit sign-extended immediate and thus the ; instruction size is unchanged, except in the %eax case for ; which it is increased by one byte, hence the ! optimize_size. (define_split [(set (match_operand 0 "flags_reg_operand") (match_operator 2 "compare_operator" [(and (match_operand 3 "aligned_operand") (match_operand 4 "const_int_operand")) (const_int 0)])) (set (match_operand 1 "register_operand") (and (match_dup 3) (match_dup 4)))] "! TARGET_PARTIAL_REG_STALL && reload_completed && optimize_insn_for_speed_p () && ((GET_MODE (operands[1]) == HImode && ! TARGET_FAST_PREFIX) || (GET_MODE (operands[1]) == QImode && TARGET_PROMOTE_QImode)) /* Ensure that the operand will remain sign-extended immediate. */ && ix86_match_ccmode (insn, INTVAL (operands[4]) >= 0 ? CCNOmode : CCZmode)" [(parallel [(set (match_dup 0) (match_op_dup 2 [(and:SI (match_dup 3) (match_dup 4)) (const_int 0)])) (set (match_dup 1) (and:SI (match_dup 3) (match_dup 4)))])] { operands[4] = gen_int_mode (INTVAL (operands[4]) & GET_MODE_MASK (GET_MODE (operands[1])), SImode); operands[1] = gen_lowpart (SImode, operands[1]); operands[3] = gen_lowpart (SImode, operands[3]); }) ; Don't promote the QImode tests, as i386 doesn't have encoding of ; the TEST instruction with 32-bit sign-extended immediate and thus ; the instruction size would at least double, which is not what we ; want even with ! optimize_size. (define_split [(set (match_operand 0 "flags_reg_operand") (match_operator 1 "compare_operator" [(and (match_operand:HI 2 "aligned_operand") (match_operand:HI 3 "const_int_operand")) (const_int 0)]))] "! TARGET_PARTIAL_REG_STALL && reload_completed && ! TARGET_FAST_PREFIX && optimize_insn_for_speed_p () /* Ensure that the operand will remain sign-extended immediate. */ && ix86_match_ccmode (insn, INTVAL (operands[3]) >= 0 ? CCNOmode : CCZmode)" [(set (match_dup 0) (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3)) (const_int 0)]))] { operands[3] = gen_int_mode (INTVAL (operands[3]) & GET_MODE_MASK (GET_MODE (operands[2])), SImode); operands[2] = gen_lowpart (SImode, operands[2]); }) (define_split [(set (match_operand 0 "register_operand") (neg (match_operand 1 "register_operand"))) (clobber (reg:CC FLAGS_REG))] "! TARGET_PARTIAL_REG_STALL && reload_completed && (GET_MODE (operands[0]) == HImode || (GET_MODE (operands[0]) == QImode && (TARGET_PROMOTE_QImode || optimize_insn_for_size_p ())))" [(parallel [(set (match_dup 0) (neg:SI (match_dup 1))) (clobber (reg:CC FLAGS_REG))])] { operands[0] = gen_lowpart (SImode, operands[0]); operands[1] = gen_lowpart (SImode, operands[1]); }) ;; Do not split instructions with mask regs. (define_split [(set (match_operand 0 "general_reg_operand") (not (match_operand 1 "general_reg_operand")))] "! TARGET_PARTIAL_REG_STALL && reload_completed && (GET_MODE (operands[0]) == HImode || (GET_MODE (operands[0]) == QImode && (TARGET_PROMOTE_QImode || optimize_insn_for_size_p ())))" [(set (match_dup 0) (not:SI (match_dup 1)))] { operands[0] = gen_lowpart (SImode, operands[0]); operands[1] = gen_lowpart (SImode, operands[1]); }) ;; RTL Peephole optimizations, run before sched2. These primarily look to ;; transform a complex memory operation into two memory to register operations. ;; Don't push memory operands (define_peephole2 [(set (match_operand:SWI 0 "push_operand") (match_operand:SWI 1 "memory_operand")) (match_scratch:SWI 2 "")] "!(TARGET_PUSH_MEMORY || optimize_insn_for_size_p ()) && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))]) ;; We need to handle SFmode only, because DFmode and XFmode are split to ;; SImode pushes. (define_peephole2 [(set (match_operand:SF 0 "push_operand") (match_operand:SF 1 "memory_operand")) (match_scratch:SF 2 "r")] "!(TARGET_PUSH_MEMORY || optimize_insn_for_size_p ()) && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))]) ;; Don't move an immediate directly to memory when the instruction ;; gets too big, or if LCP stalls are a problem for 16-bit moves. (define_peephole2 [(match_scratch:SWI124 1 "") (set (match_operand:SWI124 0 "memory_operand") (const_int 0))] "optimize_insn_for_speed_p () && ((mode == HImode && TARGET_LCP_STALL) || (!TARGET_USE_MOV0 && TARGET_SPLIT_LONG_MOVES && get_attr_length (insn) >= ix86_cur_cost ()->large_insn)) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 2) (const_int 0)) (clobber (reg:CC FLAGS_REG))]) (set (match_dup 0) (match_dup 1))] "operands[2] = gen_lowpart (SImode, operands[1]);") (define_peephole2 [(match_scratch:SWI124 2 "") (set (match_operand:SWI124 0 "memory_operand") (match_operand:SWI124 1 "immediate_operand"))] "optimize_insn_for_speed_p () && ((mode == HImode && TARGET_LCP_STALL) || (TARGET_SPLIT_LONG_MOVES && get_attr_length (insn) >= ix86_cur_cost ()->large_insn))" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))]) ;; Don't compare memory with zero, load and use a test instead. (define_peephole2 [(set (match_operand 0 "flags_reg_operand") (match_operator 1 "compare_operator" [(match_operand:SI 2 "memory_operand") (const_int 0)])) (match_scratch:SI 3 "r")] "optimize_insn_for_speed_p () && ix86_match_ccmode (insn, CCNOmode)" [(set (match_dup 3) (match_dup 2)) (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))]) ;; NOT is not pairable on Pentium, while XOR is, but one byte longer. ;; Don't split NOTs with a displacement operand, because resulting XOR ;; will not be pairable anyway. ;; ;; On AMD K6, NOT is vector decoded with memory operand that cannot be ;; represented using a modRM byte. The XOR replacement is long decoded, ;; so this split helps here as well. ;; ;; Note: Can't do this as a regular split because we can't get proper ;; lifetime information then. (define_peephole2 [(set (match_operand:SWI124 0 "nonimmediate_gr_operand") (not:SWI124 (match_operand:SWI124 1 "nonimmediate_gr_operand")))] "optimize_insn_for_speed_p () && ((TARGET_NOT_UNPAIRABLE && (!MEM_P (operands[0]) || !memory_displacement_operand (operands[0], mode))) || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], mode))) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (xor:SWI124 (match_dup 1) (const_int -1))) (clobber (reg:CC FLAGS_REG))])]) ;; Non pairable "test imm, reg" instructions can be translated to ;; "and imm, reg" if reg dies. The "and" form is also shorter (one ;; byte opcode instead of two, have a short form for byte operands), ;; so do it for other CPUs as well. Given that the value was dead, ;; this should not create any new dependencies. Pass on the sub-word ;; versions if we're concerned about partial register stalls. (define_peephole2 [(set (match_operand 0 "flags_reg_operand") (match_operator 1 "compare_operator" [(and:SI (match_operand:SI 2 "register_operand") (match_operand:SI 3 "immediate_operand")) (const_int 0)]))] "ix86_match_ccmode (insn, CCNOmode) && (REGNO (operands[2]) != AX_REG || satisfies_constraint_K (operands[3])) && peep2_reg_dead_p (1, operands[2])" [(parallel [(set (match_dup 0) (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3)) (const_int 0)])) (set (match_dup 2) (and:SI (match_dup 2) (match_dup 3)))])]) ;; We don't need to handle HImode case, because it will be promoted to SImode ;; on ! TARGET_PARTIAL_REG_STALL (define_peephole2 [(set (match_operand 0 "flags_reg_operand") (match_operator 1 "compare_operator" [(and:QI (match_operand:QI 2 "register_operand") (match_operand:QI 3 "immediate_operand")) (const_int 0)]))] "! TARGET_PARTIAL_REG_STALL && ix86_match_ccmode (insn, CCNOmode) && REGNO (operands[2]) != AX_REG && peep2_reg_dead_p (1, operands[2])" [(parallel [(set (match_dup 0) (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3)) (const_int 0)])) (set (match_dup 2) (and:QI (match_dup 2) (match_dup 3)))])]) (define_peephole2 [(set (match_operand 0 "flags_reg_operand") (match_operator 1 "compare_operator" [(and:QI (subreg:QI (match_operator:SWI248 4 "extract_operator" [(match_operand 2 "int248_register_operand") (const_int 8) (const_int 8)]) 0) (match_operand 3 "const_int_operand")) (const_int 0)]))] "! TARGET_PARTIAL_REG_STALL && ix86_match_ccmode (insn, CCNOmode) && REGNO (operands[2]) != AX_REG && peep2_reg_dead_p (1, operands[2])" [(parallel [(set (match_dup 0) (match_op_dup 1 [(and:QI (subreg:QI (match_op_dup 4 [(match_dup 2) (const_int 8) (const_int 8)]) 0) (match_dup 3)) (const_int 0)])) (set (zero_extract:SWI248 (match_dup 2) (const_int 8) (const_int 8)) (subreg:SWI248 (and:QI (subreg:QI (match_op_dup 4 [(match_dup 2) (const_int 8) (const_int 8)]) 0) (match_dup 3)) 0))])]) ;; Don't do logical operations with memory inputs. (define_peephole2 [(match_scratch:SWI 2 "") (parallel [(set (match_operand:SWI 0 "register_operand") (match_operator:SWI 3 "arith_or_logical_operator" [(match_dup 0) (match_operand:SWI 1 "memory_operand")])) (clobber (reg:CC FLAGS_REG))])] "!(TARGET_READ_MODIFY || optimize_insn_for_size_p ())" [(set (match_dup 2) (match_dup 1)) (parallel [(set (match_dup 0) (match_op_dup 3 [(match_dup 0) (match_dup 2)])) (clobber (reg:CC FLAGS_REG))])]) (define_peephole2 [(match_scratch:SWI 2 "") (parallel [(set (match_operand:SWI 0 "register_operand") (match_operator:SWI 3 "arith_or_logical_operator" [(match_operand:SWI 1 "memory_operand") (match_dup 0)])) (clobber (reg:CC FLAGS_REG))])] "!(TARGET_READ_MODIFY || optimize_insn_for_size_p ())" [(set (match_dup 2) (match_dup 1)) (parallel [(set (match_dup 0) (match_op_dup 3 [(match_dup 2) (match_dup 0)])) (clobber (reg:CC FLAGS_REG))])]) ;; Prefer Load+RegOp to Mov+MemOp. Watch out for cases when ;; the memory address refers to the destination of the load! (define_peephole2 [(set (match_operand:SWI 0 "general_reg_operand") (match_operand:SWI 1 "general_reg_operand")) (parallel [(set (match_dup 0) (match_operator:SWI 3 "commutative_operator" [(match_dup 0) (match_operand:SWI 2 "memory_operand")])) (clobber (reg:CC FLAGS_REG))])] "REGNO (operands[0]) != REGNO (operands[1]) && (mode != QImode || any_QIreg_operand (operands[1], QImode))" [(set (match_dup 0) (match_dup 4)) (parallel [(set (match_dup 0) (match_op_dup 3 [(match_dup 0) (match_dup 1)])) (clobber (reg:CC FLAGS_REG))])] { operands[4] = ix86_replace_reg_with_reg (operands[2], operands[0], operands[1]); }) (define_peephole2 [(set (match_operand 0 "mmx_reg_operand") (match_operand 1 "mmx_reg_operand")) (set (match_dup 0) (match_operator 3 "commutative_operator" [(match_dup 0) (match_operand 2 "memory_operand")]))] "REGNO (operands[0]) != REGNO (operands[1])" [(set (match_dup 0) (match_dup 2)) (set (match_dup 0) (match_op_dup 3 [(match_dup 0) (match_dup 1)]))]) (define_peephole2 [(set (match_operand 0 "sse_reg_operand") (match_operand 1 "sse_reg_operand")) (set (match_dup 0) (match_operator 3 "commutative_operator" [(match_dup 0) (match_operand 2 "memory_operand")]))] "REGNO (operands[0]) != REGNO (operands[1]) /* Punt if operands[1] is %[xy]mm16+ and AVX512BW is not enabled, as EVEX encoded vpadd[bw], vpmullw, vpmin[su][bw] and vpmax[su][bw] instructions require AVX512BW and AVX512VL, but with the original instructions it might require just AVX512VL. AVX512VL is implied from TARGET_HARD_REGNO_MODE_OK. */ && (!EXT_REX_SSE_REGNO_P (REGNO (operands[1])) || TARGET_AVX512BW || GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (operands[0]))) > 2 || logic_operator (operands[3], VOIDmode))" [(set (match_dup 0) (match_dup 2)) (set (match_dup 0) (match_op_dup 3 [(match_dup 0) (match_dup 1)]))]) ; Don't do logical operations with memory outputs ; ; These two don't make sense for PPro/PII -- we're expanding a 4-uop ; instruction into two 1-uop insns plus a 2-uop insn. That last has ; the same decoder scheduling characteristics as the original. (define_peephole2 [(match_scratch:SWI 2 "") (parallel [(set (match_operand:SWI 0 "memory_operand") (match_operator:SWI 3 "arith_or_logical_operator" [(match_dup 0) (match_operand:SWI 1 "")])) (clobber (reg:CC FLAGS_REG))])] "!(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())" [(set (match_dup 2) (match_dup 0)) (parallel [(set (match_dup 2) (match_op_dup 3 [(match_dup 2) (match_dup 1)])) (clobber (reg:CC FLAGS_REG))]) (set (match_dup 0) (match_dup 2))]) (define_peephole2 [(match_scratch:SWI 2 "") (parallel [(set (match_operand:SWI 0 "memory_operand") (match_operator:SWI 3 "arith_or_logical_operator" [(match_operand:SWI 1 "") (match_dup 0)])) (clobber (reg:CC FLAGS_REG))])] "!(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())" [(set (match_dup 2) (match_dup 0)) (parallel [(set (match_dup 2) (match_op_dup 3 [(match_dup 1) (match_dup 2)])) (clobber (reg:CC FLAGS_REG))]) (set (match_dup 0) (match_dup 2))]) ;; Attempt to use arith or logical operations with memory outputs with ;; setting of flags. (define_peephole2 [(set (match_operand:SWI 0 "register_operand") (match_operand:SWI 1 "memory_operand")) (parallel [(set (match_dup 0) (match_operator:SWI 3 "plusminuslogic_operator" [(match_dup 0) (match_operand:SWI 2 "")])) (clobber (reg:CC FLAGS_REG))]) (set (match_dup 1) (match_dup 0)) (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))] "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) && peep2_reg_dead_p (4, operands[0]) && !reg_overlap_mentioned_p (operands[0], operands[1]) && !reg_overlap_mentioned_p (operands[0], operands[2]) && (mode != QImode || immediate_operand (operands[2], QImode) || any_QIreg_operand (operands[2], QImode)) && ix86_match_ccmode (peep2_next_insn (3), (GET_CODE (operands[3]) == PLUS || GET_CODE (operands[3]) == MINUS) ? CCGOCmode : CCNOmode)" [(parallel [(set (match_dup 4) (match_dup 6)) (set (match_dup 1) (match_dup 5))])] { operands[4] = SET_DEST (PATTERN (peep2_next_insn (3))); operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]), copy_rtx (operands[1]), operands[2]); operands[6] = gen_rtx_COMPARE (GET_MODE (operands[4]), copy_rtx (operands[5]), const0_rtx); }) ;; Likewise for cmpelim optimized pattern. (define_peephole2 [(set (match_operand:SWI 0 "register_operand") (match_operand:SWI 1 "memory_operand")) (parallel [(set (reg FLAGS_REG) (compare (match_operator:SWI 3 "plusminuslogic_operator" [(match_dup 0) (match_operand:SWI 2 "")]) (const_int 0))) (set (match_dup 0) (match_dup 3))]) (set (match_dup 1) (match_dup 0))] "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) && peep2_reg_dead_p (3, operands[0]) && !reg_overlap_mentioned_p (operands[0], operands[1]) && !reg_overlap_mentioned_p (operands[0], operands[2]) && ix86_match_ccmode (peep2_next_insn (1), (GET_CODE (operands[3]) == PLUS || GET_CODE (operands[3]) == MINUS) ? CCGOCmode : CCNOmode)" [(parallel [(set (match_dup 4) (match_dup 6)) (set (match_dup 1) (match_dup 5))])] { operands[4] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0)); operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]), copy_rtx (operands[1]), operands[2]); operands[6] = gen_rtx_COMPARE (GET_MODE (operands[4]), copy_rtx (operands[5]), const0_rtx); }) ;; Likewise for instances where we have a lea pattern. (define_peephole2 [(set (match_operand:SWI 0 "register_operand") (match_operand:SWI 1 "memory_operand")) (set (match_operand: 3 "register_operand") (plus: (match_operand: 4 "register_operand") (match_operand: 2 ""))) (set (match_dup 1) (match_operand:SWI 5 "register_operand")) (set (reg FLAGS_REG) (compare (match_dup 5) (const_int 0)))] "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) && REGNO (operands[4]) == REGNO (operands[0]) && REGNO (operands[5]) == REGNO (operands[3]) && peep2_reg_dead_p (4, operands[3]) && ((REGNO (operands[0]) == REGNO (operands[3])) || peep2_reg_dead_p (2, operands[0])) && !reg_overlap_mentioned_p (operands[0], operands[1]) && !reg_overlap_mentioned_p (operands[3], operands[1]) && !reg_overlap_mentioned_p (operands[0], operands[2]) && (mode != QImode || immediate_operand (operands[2], QImode) || any_QIreg_operand (operands[2], QImode)) && ix86_match_ccmode (peep2_next_insn (3), CCGOCmode)" [(parallel [(set (match_dup 6) (match_dup 8)) (set (match_dup 1) (match_dup 7))])] { operands[6] = SET_DEST (PATTERN (peep2_next_insn (3))); operands[7] = gen_rtx_PLUS (mode, copy_rtx (operands[1]), gen_lowpart (mode, operands[2])); operands[8] = gen_rtx_COMPARE (GET_MODE (operands[6]), copy_rtx (operands[7]), const0_rtx); }) (define_peephole2 [(parallel [(set (match_operand:SWI 0 "register_operand") (match_operator:SWI 2 "plusminuslogic_operator" [(match_dup 0) (match_operand:SWI 1 "memory_operand")])) (clobber (reg:CC FLAGS_REG))]) (set (match_dup 1) (match_dup 0)) (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))] "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) && COMMUTATIVE_ARITH_P (operands[2]) && peep2_reg_dead_p (3, operands[0]) && !reg_overlap_mentioned_p (operands[0], operands[1]) && ix86_match_ccmode (peep2_next_insn (2), GET_CODE (operands[2]) == PLUS ? CCGOCmode : CCNOmode)" [(parallel [(set (match_dup 3) (match_dup 5)) (set (match_dup 1) (match_dup 4))])] { operands[3] = SET_DEST (PATTERN (peep2_next_insn (2))); operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]), copy_rtx (operands[1]), operands[0]); operands[5] = gen_rtx_COMPARE (GET_MODE (operands[3]), copy_rtx (operands[4]), const0_rtx); }) ;; Likewise for cmpelim optimized pattern. (define_peephole2 [(parallel [(set (reg FLAGS_REG) (compare (match_operator:SWI 2 "plusminuslogic_operator" [(match_operand:SWI 0 "register_operand") (match_operand:SWI 1 "memory_operand")]) (const_int 0))) (set (match_dup 0) (match_dup 2))]) (set (match_dup 1) (match_dup 0))] "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) && COMMUTATIVE_ARITH_P (operands[2]) && peep2_reg_dead_p (2, operands[0]) && !reg_overlap_mentioned_p (operands[0], operands[1]) && ix86_match_ccmode (peep2_next_insn (0), GET_CODE (operands[2]) == PLUS ? CCGOCmode : CCNOmode)" [(parallel [(set (match_dup 3) (match_dup 5)) (set (match_dup 1) (match_dup 4))])] { operands[3] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (0)), 0, 0)); operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]), copy_rtx (operands[1]), operands[0]); operands[5] = gen_rtx_COMPARE (GET_MODE (operands[3]), copy_rtx (operands[4]), const0_rtx); }) (define_peephole2 [(set (match_operand:SWI12 0 "register_operand") (match_operand:SWI12 1 "memory_operand")) (parallel [(set (match_operand:SI 4 "register_operand") (match_operator:SI 3 "plusminuslogic_operator" [(match_dup 4) (match_operand:SI 2 "nonmemory_operand")])) (clobber (reg:CC FLAGS_REG))]) (set (match_dup 1) (match_dup 0)) (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))] "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) && REGNO (operands[0]) == REGNO (operands[4]) && peep2_reg_dead_p (4, operands[0]) && (mode != QImode || immediate_operand (operands[2], SImode) || any_QIreg_operand (operands[2], SImode)) && !reg_overlap_mentioned_p (operands[0], operands[1]) && !reg_overlap_mentioned_p (operands[0], operands[2]) && ix86_match_ccmode (peep2_next_insn (3), (GET_CODE (operands[3]) == PLUS || GET_CODE (operands[3]) == MINUS) ? CCGOCmode : CCNOmode)" [(parallel [(set (match_dup 5) (match_dup 7)) (set (match_dup 1) (match_dup 6))])] { operands[5] = SET_DEST (PATTERN (peep2_next_insn (3))); operands[6] = gen_rtx_fmt_ee (GET_CODE (operands[3]), mode, copy_rtx (operands[1]), gen_lowpart (mode, operands[2])); operands[7] = gen_rtx_COMPARE (GET_MODE (operands[5]), copy_rtx (operands[6]), const0_rtx); }) ;; peephole2 comes before regcprop, so deal also with a case that ;; would be cleaned up by regcprop. (define_peephole2 [(set (match_operand:SWI 0 "register_operand") (match_operand:SWI 1 "memory_operand")) (parallel [(set (match_dup 0) (match_operator:SWI 3 "plusminuslogic_operator" [(match_dup 0) (match_operand:SWI 2 "")])) (clobber (reg:CC FLAGS_REG))]) (set (match_operand:SWI 4 "register_operand") (match_dup 0)) (set (match_dup 1) (match_dup 4)) (set (reg FLAGS_REG) (compare (match_dup 4) (const_int 0)))] "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) && peep2_reg_dead_p (3, operands[0]) && peep2_reg_dead_p (5, operands[4]) && !reg_overlap_mentioned_p (operands[0], operands[1]) && !reg_overlap_mentioned_p (operands[0], operands[2]) && !reg_overlap_mentioned_p (operands[4], operands[1]) && (mode != QImode || immediate_operand (operands[2], QImode) || any_QIreg_operand (operands[2], QImode)) && ix86_match_ccmode (peep2_next_insn (4), (GET_CODE (operands[3]) == PLUS || GET_CODE (operands[3]) == MINUS) ? CCGOCmode : CCNOmode)" [(parallel [(set (match_dup 5) (match_dup 7)) (set (match_dup 1) (match_dup 6))])] { operands[5] = SET_DEST (PATTERN (peep2_next_insn (4))); operands[6] = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]), copy_rtx (operands[1]), operands[2]); operands[7] = gen_rtx_COMPARE (GET_MODE (operands[5]), copy_rtx (operands[6]), const0_rtx); }) (define_peephole2 [(set (match_operand:SWI12 0 "register_operand") (match_operand:SWI12 1 "memory_operand")) (parallel [(set (match_operand:SI 4 "register_operand") (match_operator:SI 3 "plusminuslogic_operator" [(match_dup 4) (match_operand:SI 2 "nonmemory_operand")])) (clobber (reg:CC FLAGS_REG))]) (set (match_operand:SWI12 5 "register_operand") (match_dup 0)) (set (match_dup 1) (match_dup 5)) (set (reg FLAGS_REG) (compare (match_dup 5) (const_int 0)))] "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) && REGNO (operands[0]) == REGNO (operands[4]) && peep2_reg_dead_p (3, operands[0]) && peep2_reg_dead_p (5, operands[5]) && (mode != QImode || immediate_operand (operands[2], SImode) || any_QIreg_operand (operands[2], SImode)) && !reg_overlap_mentioned_p (operands[0], operands[1]) && !reg_overlap_mentioned_p (operands[0], operands[2]) && !reg_overlap_mentioned_p (operands[5], operands[1]) && ix86_match_ccmode (peep2_next_insn (4), (GET_CODE (operands[3]) == PLUS || GET_CODE (operands[3]) == MINUS) ? CCGOCmode : CCNOmode)" [(parallel [(set (match_dup 6) (match_dup 8)) (set (match_dup 1) (match_dup 7))])] { operands[6] = SET_DEST (PATTERN (peep2_next_insn (4))); operands[7] = gen_rtx_fmt_ee (GET_CODE (operands[3]), mode, copy_rtx (operands[1]), gen_lowpart (mode, operands[2])); operands[8] = gen_rtx_COMPARE (GET_MODE (operands[6]), copy_rtx (operands[7]), const0_rtx); }) ;; Likewise for cmpelim optimized pattern. (define_peephole2 [(set (match_operand:SWI 0 "register_operand") (match_operand:SWI 1 "memory_operand")) (parallel [(set (reg FLAGS_REG) (compare (match_operator:SWI 3 "plusminuslogic_operator" [(match_dup 0) (match_operand:SWI 2 "")]) (const_int 0))) (set (match_dup 0) (match_dup 3))]) (set (match_operand:SWI 4 "register_operand") (match_dup 0)) (set (match_dup 1) (match_dup 4))] "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) && peep2_reg_dead_p (3, operands[0]) && peep2_reg_dead_p (4, operands[4]) && !reg_overlap_mentioned_p (operands[0], operands[1]) && !reg_overlap_mentioned_p (operands[0], operands[2]) && !reg_overlap_mentioned_p (operands[4], operands[1]) && ix86_match_ccmode (peep2_next_insn (1), (GET_CODE (operands[3]) == PLUS || GET_CODE (operands[3]) == MINUS) ? CCGOCmode : CCNOmode)" [(parallel [(set (match_dup 5) (match_dup 7)) (set (match_dup 1) (match_dup 6))])] { operands[5] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (1)), 0, 0)); operands[6] = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]), copy_rtx (operands[1]), operands[2]); operands[7] = gen_rtx_COMPARE (GET_MODE (operands[5]), copy_rtx (operands[6]), const0_rtx); }) ;; Special cases for xor, where (x ^= y) != 0 is (misoptimized) ;; into x = z; x ^= y; x != z (define_peephole2 [(set (match_operand:SWI 0 "register_operand") (match_operand:SWI 1 "memory_operand")) (set (match_operand:SWI 3 "register_operand") (match_dup 0)) (parallel [(set (match_operand:SWI 4 "register_operand") (xor:SWI (match_dup 4) (match_operand:SWI 2 ""))) (clobber (reg:CC FLAGS_REG))]) (set (match_dup 1) (match_dup 4)) (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_operand:SWI 5 "register_operand") (match_operand:SWI 6 "")))] "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) && (REGNO (operands[4]) == REGNO (operands[0]) || REGNO (operands[4]) == REGNO (operands[3])) && (rtx_equal_p (operands[REGNO (operands[4]) == REGNO (operands[0]) ? 3 : 0], operands[5]) ? rtx_equal_p (operands[2], operands[6]) : rtx_equal_p (operands[2], operands[5]) && rtx_equal_p (operands[REGNO (operands[4]) == REGNO (operands[0]) ? 3 : 0], operands[6])) && peep2_reg_dead_p (4, operands[4]) && peep2_reg_dead_p (5, operands[REGNO (operands[4]) == REGNO (operands[0]) ? 3 : 0]) && !reg_overlap_mentioned_p (operands[0], operands[1]) && !reg_overlap_mentioned_p (operands[0], operands[2]) && !reg_overlap_mentioned_p (operands[3], operands[0]) && !reg_overlap_mentioned_p (operands[3], operands[1]) && !reg_overlap_mentioned_p (operands[3], operands[2]) && (mode != QImode || immediate_operand (operands[2], QImode) || any_QIreg_operand (operands[2], QImode))" [(parallel [(set (match_dup 7) (match_dup 9)) (set (match_dup 1) (match_dup 8))])] { operands[7] = SET_DEST (PATTERN (peep2_next_insn (4))); operands[8] = gen_rtx_XOR (mode, copy_rtx (operands[1]), operands[2]); operands[9] = gen_rtx_COMPARE (GET_MODE (operands[7]), copy_rtx (operands[8]), const0_rtx); }) (define_peephole2 [(set (match_operand:SWI12 0 "register_operand") (match_operand:SWI12 1 "memory_operand")) (set (match_operand:SWI12 3 "register_operand") (match_dup 0)) (parallel [(set (match_operand:SI 4 "register_operand") (xor:SI (match_dup 4) (match_operand:SI 2 ""))) (clobber (reg:CC FLAGS_REG))]) (set (match_dup 1) (match_operand:SWI12 5 "register_operand")) (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_operand:SWI12 6 "register_operand") (match_operand:SWI12 7 "")))] "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) && (REGNO (operands[5]) == REGNO (operands[0]) || REGNO (operands[5]) == REGNO (operands[3])) && REGNO (operands[5]) == REGNO (operands[4]) && (rtx_equal_p (operands[REGNO (operands[5]) == REGNO (operands[0]) ? 3 : 0], operands[6]) ? (REG_P (operands[2]) ? REG_P (operands[7]) && REGNO (operands[2]) == REGNO (operands[7]) : rtx_equal_p (operands[2], operands[7])) : (rtx_equal_p (operands[REGNO (operands[5]) == REGNO (operands[0]) ? 3 : 0], operands[7]) && REG_P (operands[2]) && REGNO (operands[2]) == REGNO (operands[6]))) && peep2_reg_dead_p (4, operands[5]) && peep2_reg_dead_p (5, operands[REGNO (operands[5]) == REGNO (operands[0]) ? 3 : 0]) && !reg_overlap_mentioned_p (operands[0], operands[1]) && !reg_overlap_mentioned_p (operands[0], operands[2]) && !reg_overlap_mentioned_p (operands[3], operands[0]) && !reg_overlap_mentioned_p (operands[3], operands[1]) && !reg_overlap_mentioned_p (operands[3], operands[2]) && (mode != QImode || immediate_operand (operands[2], SImode) || any_QIreg_operand (operands[2], SImode))" [(parallel [(set (match_dup 8) (match_dup 10)) (set (match_dup 1) (match_dup 9))])] { operands[8] = SET_DEST (PATTERN (peep2_next_insn (4))); operands[9] = gen_rtx_XOR (mode, copy_rtx (operands[1]), gen_lowpart (mode, operands[2])); operands[10] = gen_rtx_COMPARE (GET_MODE (operands[8]), copy_rtx (operands[9]), const0_rtx); }) ;; Attempt to optimize away memory stores of values the memory already ;; has. See PR79593. (define_peephole2 [(set (match_operand 0 "register_operand") (match_operand 1 "memory_operand")) (set (match_operand 2 "memory_operand") (match_dup 0))] "!MEM_VOLATILE_P (operands[1]) && !MEM_VOLATILE_P (operands[2]) && rtx_equal_p (operands[1], operands[2]) && !reg_overlap_mentioned_p (operands[0], operands[2])" [(set (match_dup 0) (match_dup 1))]) ;; Attempt to always use XOR for zeroing registers (including FP modes). (define_peephole2 [(set (match_operand 0 "general_reg_operand") (match_operand 1 "const0_operand"))] "GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD && (! TARGET_USE_MOV0 || optimize_insn_for_size_p ()) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (const_int 0)) (clobber (reg:CC FLAGS_REG))])] "operands[0] = gen_lowpart (word_mode, operands[0]);") (define_peephole2 [(set (strict_low_part (match_operand:SWI12 0 "general_reg_operand")) (const_int 0))] "(! TARGET_USE_MOV0 || optimize_insn_for_size_p ()) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (strict_low_part (match_dup 0)) (const_int 0)) (clobber (reg:CC FLAGS_REG))])]) ;; For HI, SI and DI modes, or $-1,reg is smaller than mov $-1,reg. (define_peephole2 [(set (match_operand:SWI248 0 "general_reg_operand") (const_int -1))] "(TARGET_MOVE_M1_VIA_OR || optimize_insn_for_size_p ()) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (const_int -1)) (clobber (reg:CC FLAGS_REG))])] { if ( < GET_MODE_SIZE (SImode)) operands[0] = gen_lowpart (SImode, operands[0]); }) ;; Attempt to convert simple lea to add/shift. ;; These can be created by move expanders. ;; Disable PLUS peepholes on TARGET_OPT_AGU, since all ;; relevant lea instructions were already split. (define_peephole2 [(set (match_operand:SWI48 0 "register_operand") (plus:SWI48 (match_dup 0) (match_operand:SWI48 1 "")))] "!TARGET_OPT_AGU && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1))) (clobber (reg:CC FLAGS_REG))])]) (define_peephole2 [(set (match_operand:SWI48 0 "register_operand") (plus:SWI48 (match_operand:SWI48 1 "") (match_dup 0)))] "!TARGET_OPT_AGU && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1))) (clobber (reg:CC FLAGS_REG))])]) (define_peephole2 [(set (match_operand:DI 0 "register_operand") (zero_extend:DI (plus:SI (match_operand:SI 1 "register_operand") (match_operand:SI 2 "nonmemory_operand"))))] "TARGET_64BIT && !TARGET_OPT_AGU && REGNO (operands[0]) == REGNO (operands[1]) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2)))) (clobber (reg:CC FLAGS_REG))])]) (define_peephole2 [(set (match_operand:DI 0 "register_operand") (zero_extend:DI (plus:SI (match_operand:SI 1 "nonmemory_operand") (match_operand:SI 2 "register_operand"))))] "TARGET_64BIT && !TARGET_OPT_AGU && REGNO (operands[0]) == REGNO (operands[2]) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (zero_extend:DI (plus:SI (match_dup 2) (match_dup 1)))) (clobber (reg:CC FLAGS_REG))])]) (define_peephole2 [(set (match_operand:SWI48 0 "register_operand") (mult:SWI48 (match_dup 0) (match_operand:SWI48 1 "const_int_operand")))] "pow2p_hwi (INTVAL (operands[1])) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1))) (clobber (reg:CC FLAGS_REG))])] "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));") (define_peephole2 [(set (match_operand:DI 0 "register_operand") (zero_extend:DI (mult:SI (match_operand:SI 1 "register_operand") (match_operand:SI 2 "const_int_operand"))))] "TARGET_64BIT && pow2p_hwi (INTVAL (operands[2])) && REGNO (operands[0]) == REGNO (operands[1]) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2)))) (clobber (reg:CC FLAGS_REG))])] "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));") ;; The ESP adjustments can be done by the push and pop instructions. Resulting ;; code is shorter, since push is only 1 byte, while add imm, %esp is 3 bytes. ;; On many CPUs it is also faster, since special hardware to avoid esp ;; dependencies is present. ;; While some of these conversions may be done using splitters, we use ;; peepholes in order to allow combine_stack_adjustments pass to see ;; nonobfuscated RTL. ;; Convert prologue esp subtractions to push. ;; We need register to push. In order to keep verify_flow_info happy we have ;; two choices ;; - use scratch and clobber it in order to avoid dependencies ;; - use already live register ;; We can't use the second way right now, since there is no reliable way how to ;; verify that given register is live. First choice will also most likely in ;; fewer dependencies. On the place of esp adjustments it is very likely that ;; call clobbered registers are dead. We may want to use base pointer as an ;; alternative when no register is available later. (define_peephole2 [(match_scratch:W 1 "r") (parallel [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_operand:P 0 "const_int_operand"))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ()) && INTVAL (operands[0]) == -GET_MODE_SIZE (word_mode) && !ix86_red_zone_used" [(clobber (match_dup 1)) (parallel [(set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1)) (clobber (mem:BLK (scratch)))])]) (define_peephole2 [(match_scratch:W 1 "r") (parallel [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_operand:P 0 "const_int_operand"))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ()) && INTVAL (operands[0]) == -2*GET_MODE_SIZE (word_mode) && !ix86_red_zone_used" [(clobber (match_dup 1)) (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1)) (parallel [(set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1)) (clobber (mem:BLK (scratch)))])]) ;; Convert esp subtractions to push. (define_peephole2 [(match_scratch:W 1 "r") (parallel [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_operand:P 0 "const_int_operand"))) (clobber (reg:CC FLAGS_REG))])] "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ()) && INTVAL (operands[0]) == -GET_MODE_SIZE (word_mode) && !ix86_red_zone_used" [(clobber (match_dup 1)) (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))]) (define_peephole2 [(match_scratch:W 1 "r") (parallel [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_operand:P 0 "const_int_operand"))) (clobber (reg:CC FLAGS_REG))])] "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ()) && INTVAL (operands[0]) == -2*GET_MODE_SIZE (word_mode) && !ix86_red_zone_used" [(clobber (match_dup 1)) (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1)) (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))]) ;; Convert epilogue deallocator to pop. (define_peephole2 [(match_scratch:W 1 "r") (parallel [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_operand:P 0 "const_int_operand"))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] "(TARGET_SINGLE_POP || optimize_insn_for_size_p ()) && INTVAL (operands[0]) == GET_MODE_SIZE (word_mode)" [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG)))) (clobber (mem:BLK (scratch)))])]) ;; Two pops case is tricky, since pop causes dependency ;; on destination register. We use two registers if available. (define_peephole2 [(match_scratch:W 1 "r") (match_scratch:W 2 "r") (parallel [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_operand:P 0 "const_int_operand"))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] "(TARGET_DOUBLE_POP || optimize_insn_for_size_p ()) && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)" [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG)))) (clobber (mem:BLK (scratch)))]) (set (match_dup 2) (mem:W (post_inc:P (reg:P SP_REG))))]) (define_peephole2 [(match_scratch:W 1 "r") (parallel [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_operand:P 0 "const_int_operand"))) (clobber (reg:CC FLAGS_REG)) (clobber (mem:BLK (scratch)))])] "optimize_insn_for_size_p () && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)" [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG)))) (clobber (mem:BLK (scratch)))]) (set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))]) ;; Convert esp additions to pop. (define_peephole2 [(match_scratch:W 1 "r") (parallel [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_operand:P 0 "const_int_operand"))) (clobber (reg:CC FLAGS_REG))])] "INTVAL (operands[0]) == GET_MODE_SIZE (word_mode)" [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))]) ;; Two pops case is tricky, since pop causes dependency ;; on destination register. We use two registers if available. (define_peephole2 [(match_scratch:W 1 "r") (match_scratch:W 2 "r") (parallel [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_operand:P 0 "const_int_operand"))) (clobber (reg:CC FLAGS_REG))])] "INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)" [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG)))) (set (match_dup 2) (mem:W (post_inc:P (reg:P SP_REG))))]) (define_peephole2 [(match_scratch:W 1 "r") (parallel [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_operand:P 0 "const_int_operand"))) (clobber (reg:CC FLAGS_REG))])] "optimize_insn_for_size_p () && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)" [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG)))) (set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))]) ;; Convert compares with 1 to shorter inc/dec operations when CF is not ;; required and register dies. Similarly for 128 to -128. (define_peephole2 [(set (match_operand 0 "flags_reg_operand") (match_operator 1 "compare_operator" [(match_operand 2 "register_operand") (match_operand 3 "const_int_operand")]))] "(((!TARGET_FUSE_CMP_AND_BRANCH || optimize_insn_for_size_p ()) && incdec_operand (operands[3], GET_MODE (operands[3]))) || (!TARGET_FUSE_CMP_AND_BRANCH && INTVAL (operands[3]) == 128)) && ix86_match_ccmode (insn, CCGCmode) && peep2_reg_dead_p (1, operands[2])" [(parallel [(set (match_dup 0) (match_op_dup 1 [(match_dup 2) (match_dup 3)])) (clobber (match_dup 2))])]) ;; Convert imul by three, five and nine into lea (define_peephole2 [(parallel [(set (match_operand:SWI48 0 "register_operand") (mult:SWI48 (match_operand:SWI48 1 "register_operand") (match_operand:SWI48 2 "const359_operand"))) (clobber (reg:CC FLAGS_REG))])] "!TARGET_PARTIAL_REG_STALL || mode == SImode || optimize_function_for_size_p (cfun)" [(set (match_dup 0) (plus:SWI48 (mult:SWI48 (match_dup 1) (match_dup 2)) (match_dup 1)))] "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);") (define_peephole2 [(parallel [(set (match_operand:SWI48 0 "register_operand") (mult:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") (match_operand:SWI48 2 "const359_operand"))) (clobber (reg:CC FLAGS_REG))])] "optimize_insn_for_speed_p () && (!TARGET_PARTIAL_REG_STALL || mode == SImode)" [(set (match_dup 0) (match_dup 1)) (set (match_dup 0) (plus:SWI48 (mult:SWI48 (match_dup 0) (match_dup 2)) (match_dup 0)))] "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);") ;; imul $32bit_imm, mem, reg is vector decoded, while ;; imul $32bit_imm, reg, reg is direct decoded. (define_peephole2 [(match_scratch:SWI48 3 "r") (parallel [(set (match_operand:SWI48 0 "register_operand") (mult:SWI48 (match_operand:SWI48 1 "memory_operand") (match_operand:SWI48 2 "immediate_operand"))) (clobber (reg:CC FLAGS_REG))])] "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p () && !satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 1)) (parallel [(set (match_dup 0) (mult:SWI48 (match_dup 3) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])]) (define_peephole2 [(match_scratch:SI 3 "r") (parallel [(set (match_operand:DI 0 "register_operand") (zero_extend:DI (mult:SI (match_operand:SI 1 "memory_operand") (match_operand:SI 2 "immediate_operand")))) (clobber (reg:CC FLAGS_REG))])] "TARGET_64BIT && TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p () && !satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 1)) (parallel [(set (match_dup 0) (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2)))) (clobber (reg:CC FLAGS_REG))])]) ;; imul $8/16bit_imm, regmem, reg is vector decoded. ;; Convert it into imul reg, reg ;; It would be better to force assembler to encode instruction using long ;; immediate, but there is apparently no way to do so. (define_peephole2 [(parallel [(set (match_operand:SWI248 0 "register_operand") (mult:SWI248 (match_operand:SWI248 1 "nonimmediate_operand") (match_operand:SWI248 2 "const_int_operand"))) (clobber (reg:CC FLAGS_REG))]) (match_scratch:SWI248 3 "r")] "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p () && satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 2)) (parallel [(set (match_dup 0) (mult:SWI248 (match_dup 0) (match_dup 3))) (clobber (reg:CC FLAGS_REG))])] { if (!rtx_equal_p (operands[0], operands[1])) emit_move_insn (operands[0], operands[1]); }) ;; After splitting up read-modify operations, array accesses with memory ;; operands might end up in form: ;; sall $2, %eax ;; movl 4(%esp), %edx ;; addl %edx, %eax ;; instead of pre-splitting: ;; sall $2, %eax ;; addl 4(%esp), %eax ;; Turn it into: ;; movl 4(%esp), %edx ;; leal (%edx,%eax,4), %eax (define_peephole2 [(match_scratch:W 5 "r") (parallel [(set (match_operand 0 "register_operand") (ashift (match_operand 1 "register_operand") (match_operand 2 "const_int_operand"))) (clobber (reg:CC FLAGS_REG))]) (parallel [(set (match_operand 3 "register_operand") (plus (match_dup 0) (match_operand 4 "x86_64_general_operand"))) (clobber (reg:CC FLAGS_REG))])] "IN_RANGE (INTVAL (operands[2]), 1, 3) /* Validate MODE for lea. */ && ((!TARGET_PARTIAL_REG_STALL && (GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode)) || GET_MODE (operands[0]) == SImode || (TARGET_64BIT && GET_MODE (operands[0]) == DImode)) && (rtx_equal_p (operands[0], operands[3]) || peep2_reg_dead_p (2, operands[0])) /* We reorder load and the shift. */ && !reg_overlap_mentioned_p (operands[0], operands[4])" [(set (match_dup 5) (match_dup 4)) (set (match_dup 0) (match_dup 1))] { machine_mode op1mode = GET_MODE (operands[1]); machine_mode mode = op1mode == DImode ? DImode : SImode; int scale = 1 << INTVAL (operands[2]); rtx index = gen_lowpart (word_mode, operands[1]); rtx base = gen_lowpart (word_mode, operands[5]); rtx dest = gen_lowpart (mode, operands[3]); operands[1] = gen_rtx_PLUS (word_mode, base, gen_rtx_MULT (word_mode, index, GEN_INT (scale))); if (mode != word_mode) operands[1] = gen_rtx_SUBREG (mode, operands[1], 0); operands[5] = base; if (op1mode != word_mode) operands[5] = gen_lowpart (op1mode, operands[5]); operands[0] = dest; }) ;; We used to use "int $5", in honor of #BR which maps to interrupt vector 5. ;; That, however, is usually mapped by the OS to SIGSEGV, which is often ;; caught for use by garbage collectors and the like. Using an insn that ;; maps to SIGILL makes it more likely the program will rightfully die. ;; Keeping with tradition, "6" is in honor of #UD. (define_insn "trap" [(trap_if (const_int 1) (const_int 6))] "" { #ifdef HAVE_AS_IX86_UD2 return "ud2"; #else return ASM_SHORT "0x0b0f"; #endif } [(set_attr "length" "2")]) (define_insn "ud2" [(unspec_volatile [(const_int 0)] UNSPECV_UD2)] "" { #ifdef HAVE_AS_IX86_UD2 return "ud2"; #else return ASM_SHORT "0x0b0f"; #endif } [(set_attr "length" "2")]) (define_expand "prefetch" [(prefetch (match_operand 0 "address_operand") (match_operand:SI 1 "const_int_operand") (match_operand:SI 2 "const_int_operand"))] "TARGET_3DNOW || TARGET_PREFETCH_SSE || TARGET_PRFCHW" { bool write = operands[1] != const0_rtx; int locality = INTVAL (operands[2]); gcc_assert (IN_RANGE (locality, 0, 3)); /* Use 3dNOW prefetch in case we are asking for write prefetch not supported by SSE counterpart (non-SSE2 athlon machines) or the SSE prefetch is not available (K6 machines). Otherwise use SSE prefetch as it allows specifying of locality. */ if (write) { if (TARGET_PRFCHW) operands[2] = GEN_INT (3); else if (TARGET_3DNOW && !TARGET_SSE2) operands[2] = GEN_INT (3); else if (TARGET_PREFETCH_SSE) operands[1] = const0_rtx; else { gcc_assert (TARGET_3DNOW); operands[2] = GEN_INT (3); } } else { if (TARGET_PREFETCH_SSE) ; else { gcc_assert (TARGET_3DNOW); operands[2] = GEN_INT (3); } } }) (define_insn "*prefetch_sse" [(prefetch (match_operand 0 "address_operand" "p") (const_int 0) (match_operand:SI 1 "const_int_operand"))] "TARGET_PREFETCH_SSE" { static const char * const patterns[4] = { "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0" }; int locality = INTVAL (operands[1]); gcc_assert (IN_RANGE (locality, 0, 3)); return patterns[locality]; } [(set_attr "type" "sse") (set_attr "atom_sse_attr" "prefetch") (set (attr "length_address") (symbol_ref "memory_address_length (operands[0], false)")) (set_attr "memory" "none")]) (define_insn "*prefetch_3dnow" [(prefetch (match_operand 0 "address_operand" "p") (match_operand:SI 1 "const_int_operand") (const_int 3))] "TARGET_3DNOW || TARGET_PRFCHW" { if (operands[1] == const0_rtx) return "prefetch\t%a0"; else return "prefetchw\t%a0"; } [(set_attr "type" "mmx") (set (attr "length_address") (symbol_ref "memory_address_length (operands[0], false)")) (set_attr "memory" "none")]) (define_insn "prefetchi" [(unspec_volatile [(match_operand 0 "local_func_symbolic_operand" "p") (match_operand:SI 1 "const_int_operand")] UNSPECV_PREFETCHI)] "TARGET_PREFETCHI && TARGET_64BIT" { static const char * const patterns[2] = { "prefetchit1\t%0", "prefetchit0\t%0" }; int locality = INTVAL (operands[1]); gcc_assert (IN_RANGE (locality, 2, 3)); return patterns[locality - 2]; } [(set_attr "type" "sse") (set (attr "length_address") (symbol_ref "memory_address_length (operands[0], false)")) (set_attr "memory" "none")]) (define_insn "sse4_2_crc32" [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI [(match_operand:SI 1 "register_operand" "0") (match_operand:SWI124 2 "nonimmediate_operand" "m")] UNSPEC_CRC32))] "TARGET_CRC32" "crc32{}\t{%2, %0|%0, %2}" [(set_attr "type" "sselog1") (set_attr "prefix_rep" "1") (set_attr "prefix_extra" "1") (set (attr "prefix_data16") (if_then_else (match_operand:HI 2) (const_string "1") (const_string "*"))) (set (attr "prefix_rex") (if_then_else (match_operand:QI 2 "ext_QIreg_operand") (const_string "1") (const_string "*"))) (set_attr "mode" "SI")]) (define_insn "sse4_2_crc32di" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (unspec:SI [(match_operand:SI 1 "register_operand" "0") (match_operand:DI 2 "nonimmediate_operand" "rm")] UNSPEC_CRC32)))] "TARGET_64BIT && TARGET_CRC32" "crc32{q}\t{%2, %0|%0, %2}" [(set_attr "type" "sselog1") (set_attr "prefix_rep" "1") (set_attr "prefix_extra" "1") (set_attr "mode" "DI")]) (define_insn "rdpmc" [(set (match_operand:DI 0 "register_operand" "=A") (unspec_volatile:DI [(match_operand:SI 1 "register_operand" "c")] UNSPECV_RDPMC))] "!TARGET_64BIT" "rdpmc" [(set_attr "type" "other") (set_attr "length" "2")]) (define_insn "rdpmc_rex64" [(set (match_operand:DI 0 "register_operand" "=a") (unspec_volatile:DI [(match_operand:SI 2 "register_operand" "c")] UNSPECV_RDPMC)) (set (match_operand:DI 1 "register_operand" "=d") (unspec_volatile:DI [(match_dup 2)] UNSPECV_RDPMC))] "TARGET_64BIT" "rdpmc" [(set_attr "type" "other") (set_attr "length" "2")]) (define_insn "rdtsc" [(set (match_operand:DI 0 "register_operand" "=A") (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))] "!TARGET_64BIT" "rdtsc" [(set_attr "type" "other") (set_attr "length" "2")]) (define_insn "rdtsc_rex64" [(set (match_operand:DI 0 "register_operand" "=a") (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC)) (set (match_operand:DI 1 "register_operand" "=d") (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))] "TARGET_64BIT" "rdtsc" [(set_attr "type" "other") (set_attr "length" "2")]) (define_insn "rdtscp" [(set (match_operand:DI 0 "register_operand" "=A") (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP)) (set (match_operand:SI 1 "register_operand" "=c") (unspec_volatile:SI [(const_int 0)] UNSPECV_RDTSCP))] "!TARGET_64BIT" "rdtscp" [(set_attr "type" "other") (set_attr "length" "3")]) (define_insn "rdtscp_rex64" [(set (match_operand:DI 0 "register_operand" "=a") (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP)) (set (match_operand:DI 1 "register_operand" "=d") (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP)) (set (match_operand:SI 2 "register_operand" "=c") (unspec_volatile:SI [(const_int 0)] UNSPECV_RDTSCP))] "TARGET_64BIT" "rdtscp" [(set_attr "type" "other") (set_attr "length" "3")]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; FXSR, XSAVE and XSAVEOPT instructions ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define_insn "fxsave" [(set (match_operand:BLK 0 "memory_operand" "=m") (unspec_volatile:BLK [(const_int 0)] UNSPECV_FXSAVE))] "TARGET_FXSR" "fxsave\t%0" [(set_attr "type" "other") (set_attr "memory" "store") (set (attr "length") (symbol_ref "ix86_attr_length_address_default (insn) + 3"))]) (define_insn "fxsave64" [(set (match_operand:BLK 0 "memory_operand" "=jm") (unspec_volatile:BLK [(const_int 0)] UNSPECV_FXSAVE64))] "TARGET_64BIT && TARGET_FXSR" "fxsave64\t%0" [(set_attr "type" "other") (set_attr "addr" "gpr16") (set_attr "memory" "store") (set (attr "length") (symbol_ref "ix86_attr_length_address_default (insn) + 4"))]) (define_insn "fxrstor" [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "m")] UNSPECV_FXRSTOR)] "TARGET_FXSR" "fxrstor\t%0" [(set_attr "type" "other") (set_attr "memory" "load") (set (attr "length") (symbol_ref "ix86_attr_length_address_default (insn) + 3"))]) (define_insn "fxrstor64" [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "jm")] UNSPECV_FXRSTOR64)] "TARGET_64BIT && TARGET_FXSR" "fxrstor64\t%0" [(set_attr "type" "other") (set_attr "addr" "gpr16") (set_attr "memory" "load") (set (attr "length") (symbol_ref "ix86_attr_length_address_default (insn) + 4"))]) (define_int_iterator ANY_XSAVE [UNSPECV_XSAVE (UNSPECV_XSAVEOPT "TARGET_XSAVEOPT") (UNSPECV_XSAVEC "TARGET_XSAVEC") (UNSPECV_XSAVES "TARGET_XSAVES")]) (define_int_iterator ANY_XSAVE64 [UNSPECV_XSAVE64 (UNSPECV_XSAVEOPT64 "TARGET_XSAVEOPT") (UNSPECV_XSAVEC64 "TARGET_XSAVEC") (UNSPECV_XSAVES64 "TARGET_XSAVES")]) (define_int_attr xsave [(UNSPECV_XSAVE "xsave") (UNSPECV_XSAVE64 "xsave64") (UNSPECV_XSAVEOPT "xsaveopt") (UNSPECV_XSAVEOPT64 "xsaveopt64") (UNSPECV_XSAVEC "xsavec") (UNSPECV_XSAVEC64 "xsavec64") (UNSPECV_XSAVES "xsaves") (UNSPECV_XSAVES64 "xsaves64")]) (define_int_iterator ANY_XRSTOR [UNSPECV_XRSTOR (UNSPECV_XRSTORS "TARGET_XSAVES")]) (define_int_iterator ANY_XRSTOR64 [UNSPECV_XRSTOR64 (UNSPECV_XRSTORS64 "TARGET_XSAVES")]) (define_int_attr xrstor [(UNSPECV_XRSTOR "xrstor") (UNSPECV_XRSTOR64 "xrstor") (UNSPECV_XRSTORS "xrstors") (UNSPECV_XRSTORS64 "xrstors")]) (define_insn "" [(set (match_operand:BLK 0 "memory_operand" "=m") (unspec_volatile:BLK [(match_operand:DI 1 "register_operand" "A")] ANY_XSAVE))] "!TARGET_64BIT && TARGET_XSAVE" "\t%0" [(set_attr "type" "other") (set_attr "memory" "store") (set (attr "length") (symbol_ref "ix86_attr_length_address_default (insn) + 3"))]) (define_insn "_rex64" [(set (match_operand:BLK 0 "memory_operand" "=jm") (unspec_volatile:BLK [(match_operand:SI 1 "register_operand" "a") (match_operand:SI 2 "register_operand" "d")] ANY_XSAVE))] "TARGET_64BIT && TARGET_XSAVE" "\t%0" [(set_attr "type" "other") (set_attr "memory" "store") (set_attr "addr" "gpr16") (set (attr "length") (symbol_ref "ix86_attr_length_address_default (insn) + 3"))]) (define_insn "" [(set (match_operand:BLK 0 "memory_operand" "=jm") (unspec_volatile:BLK [(match_operand:SI 1 "register_operand" "a") (match_operand:SI 2 "register_operand" "d")] ANY_XSAVE64))] "TARGET_64BIT && TARGET_XSAVE" "\t%0" [(set_attr "type" "other") (set_attr "memory" "store") (set_attr "addr" "gpr16") (set (attr "length") (symbol_ref "ix86_attr_length_address_default (insn) + 4"))]) (define_insn "" [(unspec_volatile:BLK [(match_operand:BLK 0 "memory_operand" "m") (match_operand:DI 1 "register_operand" "A")] ANY_XRSTOR)] "!TARGET_64BIT && TARGET_XSAVE" "\t%0" [(set_attr "type" "other") (set_attr "memory" "load") (set (attr "length") (symbol_ref "ix86_attr_length_address_default (insn) + 3"))]) (define_insn "_rex64" [(unspec_volatile:BLK [(match_operand:BLK 0 "memory_operand" "jm") (match_operand:SI 1 "register_operand" "a") (match_operand:SI 2 "register_operand" "d")] ANY_XRSTOR)] "TARGET_64BIT && TARGET_XSAVE" "\t%0" [(set_attr "type" "other") (set_attr "memory" "load") (set_attr "addr" "gpr16") (set (attr "length") (symbol_ref "ix86_attr_length_address_default (insn) + 3"))]) (define_insn "64" [(unspec_volatile:BLK [(match_operand:BLK 0 "memory_operand" "jm") (match_operand:SI 1 "register_operand" "a") (match_operand:SI 2 "register_operand" "d")] ANY_XRSTOR64)] "TARGET_64BIT && TARGET_XSAVE" "64\t%0" [(set_attr "type" "other") (set_attr "memory" "load") (set_attr "addr" "gpr16") (set (attr "length") (symbol_ref "ix86_attr_length_address_default (insn) + 4"))]) (define_insn "xsetbv" [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "c") (match_operand:DI 1 "register_operand" "A")] UNSPECV_XSETBV)] "!TARGET_64BIT && TARGET_XSAVE" "xsetbv" [(set_attr "type" "other")]) (define_insn "xsetbv_rex64" [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "c") (match_operand:SI 1 "register_operand" "a") (match_operand:SI 2 "register_operand" "d")] UNSPECV_XSETBV)] "TARGET_64BIT && TARGET_XSAVE" "xsetbv" [(set_attr "type" "other")]) (define_insn "xgetbv" [(set (match_operand:DI 0 "register_operand" "=A") (unspec_volatile:DI [(match_operand:SI 1 "register_operand" "c")] UNSPECV_XGETBV))] "!TARGET_64BIT && TARGET_XSAVE" "xgetbv" [(set_attr "type" "other")]) (define_insn "xgetbv_rex64" [(set (match_operand:DI 0 "register_operand" "=a") (unspec_volatile:DI [(match_operand:SI 2 "register_operand" "c")] UNSPECV_XGETBV)) (set (match_operand:DI 1 "register_operand" "=d") (unspec_volatile:DI [(match_dup 2)] UNSPECV_XGETBV))] "TARGET_64BIT && TARGET_XSAVE" "xgetbv" [(set_attr "type" "other")]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Floating-point instructions for atomic compound assignments ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Clobber all floating-point registers on environment save and restore ; to ensure that the TOS value saved at fnstenv is valid after fldenv. (define_insn "fnstenv" [(set (match_operand:BLK 0 "memory_operand" "=m") (unspec_volatile:BLK [(const_int 0)] UNSPECV_FNSTENV)) (clobber (reg:XF ST0_REG)) (clobber (reg:XF ST1_REG)) (clobber (reg:XF ST2_REG)) (clobber (reg:XF ST3_REG)) (clobber (reg:XF ST4_REG)) (clobber (reg:XF ST5_REG)) (clobber (reg:XF ST6_REG)) (clobber (reg:XF ST7_REG))] "TARGET_80387" "fnstenv\t%0" [(set_attr "type" "other") (set_attr "memory" "store") (set (attr "length") (symbol_ref "ix86_attr_length_address_default (insn) + 2"))]) (define_insn "fldenv" [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "m")] UNSPECV_FLDENV) (clobber (reg:XF ST0_REG)) (clobber (reg:XF ST1_REG)) (clobber (reg:XF ST2_REG)) (clobber (reg:XF ST3_REG)) (clobber (reg:XF ST4_REG)) (clobber (reg:XF ST5_REG)) (clobber (reg:XF ST6_REG)) (clobber (reg:XF ST7_REG))] "TARGET_80387" "fldenv\t%0" [(set_attr "type" "other") (set_attr "memory" "load") (set (attr "length") (symbol_ref "ix86_attr_length_address_default (insn) + 2"))]) (define_insn "fnstsw" [(set (match_operand:HI 0 "nonimmediate_operand" "=a,m") (unspec_volatile:HI [(const_int 0)] UNSPECV_FNSTSW))] "TARGET_80387" "fnstsw\t%0" [(set_attr "type" "other,other") (set_attr "memory" "none,store") (set (attr "length") (symbol_ref "ix86_attr_length_address_default (insn) + 2"))]) (define_insn "fnclex" [(unspec_volatile [(const_int 0)] UNSPECV_FNCLEX)] "TARGET_80387" "fnclex" [(set_attr "type" "other") (set_attr "memory" "none") (set_attr "length" "2")]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; LWP instructions ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define_insn "@lwp_llwpcb" [(unspec_volatile [(match_operand:P 0 "register_operand" "r")] UNSPECV_LLWP_INTRINSIC)] "TARGET_LWP" "llwpcb\t%0" [(set_attr "type" "lwp") (set_attr "mode" "") (set_attr "length" "5")]) (define_insn "@lwp_slwpcb" [(set (match_operand:P 0 "register_operand" "=r") (unspec_volatile:P [(const_int 0)] UNSPECV_SLWP_INTRINSIC))] "TARGET_LWP" "slwpcb\t%0" [(set_attr "type" "lwp") (set_attr "mode" "") (set_attr "length" "5")]) (define_insn "@lwp_lwpval" [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r") (match_operand:SI 1 "nonimmediate_operand" "rm") (match_operand:SI 2 "const_int_operand")] UNSPECV_LWPVAL_INTRINSIC)] "TARGET_LWP" "lwpval\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "lwp") (set_attr "mode" "") (set (attr "length") (symbol_ref "ix86_attr_length_address_default (insn) + 9"))]) (define_insn "@lwp_lwpins" [(set (reg:CCC FLAGS_REG) (unspec_volatile:CCC [(match_operand:SWI48 0 "register_operand" "r") (match_operand:SI 1 "nonimmediate_operand" "rm") (match_operand:SI 2 "const_int_operand")] UNSPECV_LWPINS_INTRINSIC))] "TARGET_LWP" "lwpins\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "lwp") (set_attr "mode" "") (set (attr "length") (symbol_ref "ix86_attr_length_address_default (insn) + 9"))]) (define_int_iterator RDFSGSBASE [UNSPECV_RDFSBASE UNSPECV_RDGSBASE]) (define_int_iterator WRFSGSBASE [UNSPECV_WRFSBASE UNSPECV_WRGSBASE]) (define_int_attr fsgs [(UNSPECV_RDFSBASE "fs") (UNSPECV_RDGSBASE "gs") (UNSPECV_WRFSBASE "fs") (UNSPECV_WRGSBASE "gs")]) (define_insn "rdbase" [(set (match_operand:SWI48 0 "register_operand" "=r") (unspec_volatile:SWI48 [(const_int 0)] RDFSGSBASE))] "TARGET_64BIT && TARGET_FSGSBASE" "rdbase\t%0" [(set_attr "type" "other") (set_attr "prefix_0f" "1") (set_attr "prefix_rep" "1")]) (define_insn "wrbase" [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")] WRFSGSBASE)] "TARGET_64BIT && TARGET_FSGSBASE" "wrbase\t%0" [(set_attr "type" "other") (set_attr "prefix_0f" "1") (set_attr "prefix_rep" "1")]) (define_insn "ptwrite" [(unspec_volatile [(match_operand:SWI48 0 "nonimmediate_operand" "rm")] UNSPECV_PTWRITE)] "TARGET_PTWRITE" "ptwrite\t%0" [(set_attr "type" "other") (set_attr "prefix_0f" "1") (set_attr "prefix_rep" "1")]) (define_insn "@rdrand" [(set (match_operand:SWI248 0 "register_operand" "=r") (unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDRAND)) (set (reg:CCC FLAGS_REG) (unspec_volatile:CCC [(const_int 0)] UNSPECV_RDRAND))] "TARGET_RDRND" "rdrand\t%0" [(set_attr "type" "other") (set_attr "prefix_0f" "1")]) (define_insn "@rdseed" [(set (match_operand:SWI248 0 "register_operand" "=r") (unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDSEED)) (set (reg:CCC FLAGS_REG) (unspec_volatile:CCC [(const_int 0)] UNSPECV_RDSEED))] "TARGET_RDSEED" "rdseed\t%0" [(set_attr "type" "other") (set_attr "prefix_0f" "1")]) (define_expand "pause" [(set (match_dup 0) (unspec:BLK [(match_dup 0)] UNSPEC_PAUSE))] "" { operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); MEM_VOLATILE_P (operands[0]) = 1; }) ;; Use "rep; nop", instead of "pause", to support older assemblers. ;; They have the same encoding. (define_insn "*pause" [(set (match_operand:BLK 0) (unspec:BLK [(match_dup 0)] UNSPEC_PAUSE))] "" "rep%; nop" [(set_attr "length" "2") (set_attr "memory" "unknown")]) ;; CET instructions (define_insn "@rdssp" [(set (match_operand:SWI48 0 "register_operand" "=r") (unspec_volatile:SWI48 [(match_operand:SWI48 1 "register_operand" "0")] UNSPECV_NOP_RDSSP))] "TARGET_SHSTK || (flag_cf_protection & CF_RETURN)" "rdssp\t%0" [(set_attr "length" "6") (set_attr "type" "other")]) (define_insn "@incssp" [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")] UNSPECV_INCSSP)] "TARGET_SHSTK || (flag_cf_protection & CF_RETURN)" "incssp\t%0" [(set_attr "length" "4") (set_attr "type" "other")]) (define_insn "saveprevssp" [(unspec_volatile [(const_int 0)] UNSPECV_SAVEPREVSSP)] "TARGET_SHSTK" "saveprevssp" [(set_attr "length" "5") (set_attr "type" "other")]) (define_insn "rstorssp" [(unspec_volatile [(match_operand:DI 0 "memory_operand" "m")] UNSPECV_RSTORSSP)] "TARGET_SHSTK" "rstorssp\t%0" [(set_attr "length" "5") (set_attr "type" "other")]) (define_insn "@wrss" [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r") (match_operand:SWI48 1 "memory_operand" "m")] UNSPECV_WRSS)] "TARGET_SHSTK" "wrss\t%0, %1" [(set_attr "length" "3") (set_attr "type" "other")]) (define_insn "@wruss" [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r") (match_operand:SWI48 1 "memory_operand" "m")] UNSPECV_WRUSS)] "TARGET_SHSTK" "wruss\t%0, %1" [(set_attr "length" "4") (set_attr "type" "other")]) (define_insn "setssbsy" [(unspec_volatile [(const_int 0)] UNSPECV_SETSSBSY)] "TARGET_SHSTK" "setssbsy" [(set_attr "length" "4") (set_attr "type" "other")]) (define_insn "clrssbsy" [(unspec_volatile [(match_operand:DI 0 "memory_operand" "m")] UNSPECV_CLRSSBSY)] "TARGET_SHSTK" "clrssbsy\t%0" [(set_attr "length" "4") (set_attr "type" "other")]) (define_insn "nop_endbr" [(unspec_volatile [(const_int 0)] UNSPECV_NOP_ENDBR)] "(flag_cf_protection & CF_BRANCH)" { return TARGET_64BIT ? "endbr64" : "endbr32"; } [(set_attr "length" "4") (set_attr "length_immediate" "0") (set_attr "modrm" "0")]) ;; For RTM support (define_expand "xbegin" [(set (match_operand:SI 0 "register_operand") (unspec_volatile:SI [(const_int 0)] UNSPECV_XBEGIN))] "TARGET_RTM" { rtx_code_label *label = gen_label_rtx (); /* xbegin is emitted as jump_insn, so reload won't be able to reload its operand. Force the value into AX hard register. */ rtx ax_reg = gen_rtx_REG (SImode, AX_REG); emit_move_insn (ax_reg, constm1_rtx); emit_jump_insn (gen_xbegin_1 (ax_reg, label)); emit_label (label); LABEL_NUSES (label) = 1; emit_move_insn (operands[0], ax_reg); DONE; }) (define_insn "xbegin_1" [(set (pc) (if_then_else (ne (unspec [(const_int 0)] UNSPEC_XBEGIN_ABORT) (const_int 0)) (label_ref (match_operand 1)) (pc))) (set (match_operand:SI 0 "register_operand" "+a") (unspec_volatile:SI [(match_dup 0)] UNSPECV_XBEGIN))] "TARGET_RTM" "xbegin\t%l1" [(set_attr "type" "other") (set_attr "length" "6")]) (define_insn "xend" [(unspec_volatile [(const_int 0)] UNSPECV_XEND)] "TARGET_RTM" "xend" [(set_attr "type" "other") (set_attr "length" "3")]) (define_insn "xabort" [(unspec_volatile [(match_operand:SI 0 "const_0_to_255_operand")] UNSPECV_XABORT)] "TARGET_RTM" "xabort\t%0" [(set_attr "type" "other") (set_attr "length" "3")]) (define_expand "xtest" [(set (match_operand:QI 0 "register_operand") (unspec_volatile:QI [(const_int 0)] UNSPECV_XTEST))] "TARGET_RTM" { emit_insn (gen_xtest_1 ()); ix86_expand_setcc (operands[0], NE, gen_rtx_REG (CCZmode, FLAGS_REG), const0_rtx); DONE; }) (define_insn "xtest_1" [(set (reg:CCZ FLAGS_REG) (unspec_volatile:CCZ [(const_int 0)] UNSPECV_XTEST))] "TARGET_RTM" "xtest" [(set_attr "type" "other") (set_attr "length" "3")]) (define_insn "clwb" [(unspec_volatile [(match_operand 0 "address_operand" "p")] UNSPECV_CLWB)] "TARGET_CLWB" "clwb\t%a0" [(set_attr "type" "sse") (set_attr "atom_sse_attr" "fence") (set_attr "memory" "unknown")]) (define_insn "clflushopt" [(unspec_volatile [(match_operand 0 "address_operand" "p")] UNSPECV_CLFLUSHOPT)] "TARGET_CLFLUSHOPT" "clflushopt\t%a0" [(set_attr "type" "sse") (set_attr "atom_sse_attr" "fence") (set_attr "memory" "unknown")]) ;; MONITORX and MWAITX (define_insn "mwaitx" [(unspec_volatile [(match_operand:SI 0 "register_operand" "c") (match_operand:SI 1 "register_operand" "a") (match_operand:SI 2 "register_operand" "b")] UNSPECV_MWAITX)] "TARGET_MWAITX" ;; 64bit version is "mwaitx %rax,%rcx,%rbx". But only lower 32bits are used. ;; Since 32bit register operands are implicitly zero extended to 64bit, ;; we only need to set up 32bit registers. "mwaitx" [(set_attr "length" "3")]) (define_insn "@monitorx_" [(unspec_volatile [(match_operand:P 0 "register_operand" "a") (match_operand:SI 1 "register_operand" "c") (match_operand:SI 2 "register_operand" "d")] UNSPECV_MONITORX)] "TARGET_MWAITX" ;; 64bit version is "monitorx %rax,%rcx,%rdx". But only lower 32bits in ;; RCX and RDX are used. Since 32bit register operands are implicitly ;; zero extended to 64bit, we only need to set up 32bit registers. "%^monitorx" [(set (attr "length") (symbol_ref ("(Pmode != word_mode) + 3")))]) ;; CLZERO (define_insn "@clzero_" [(unspec_volatile [(match_operand: P 0 "register_operand" "a")] UNSPECV_CLZERO)] "TARGET_CLZERO" "clzero" [(set_attr "length" "3") (set_attr "memory" "unknown")]) ;; RDPKRU and WRPKRU (define_expand "rdpkru" [(parallel [(set (match_operand:SI 0 "register_operand") (unspec_volatile:SI [(match_dup 1)] UNSPECV_PKU)) (set (match_dup 2) (const_int 0))])] "TARGET_PKU" { operands[1] = force_reg (SImode, const0_rtx); operands[2] = gen_reg_rtx (SImode); }) (define_insn "*rdpkru" [(set (match_operand:SI 0 "register_operand" "=a") (unspec_volatile:SI [(match_operand:SI 2 "register_operand" "c")] UNSPECV_PKU)) (set (match_operand:SI 1 "register_operand" "=d") (const_int 0))] "TARGET_PKU" "rdpkru" [(set_attr "type" "other")]) (define_expand "wrpkru" [(unspec_volatile:SI [(match_operand:SI 0 "register_operand") (match_dup 1) (match_dup 2)] UNSPECV_PKU)] "TARGET_PKU" { operands[1] = force_reg (SImode, const0_rtx); operands[2] = force_reg (SImode, const0_rtx); }) (define_insn "*wrpkru" [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "a") (match_operand:SI 1 "register_operand" "d") (match_operand:SI 2 "register_operand" "c")] UNSPECV_PKU)] "TARGET_PKU" "wrpkru" [(set_attr "type" "other")]) (define_insn "rdpid" [(set (match_operand:SI 0 "register_operand" "=r") (unspec_volatile:SI [(const_int 0)] UNSPECV_RDPID))] "!TARGET_64BIT && TARGET_RDPID" "rdpid\t%0" [(set_attr "type" "other")]) (define_insn "rdpid_rex64" [(set (match_operand:DI 0 "register_operand" "=r") (unspec_volatile:DI [(const_int 0)] UNSPECV_RDPID))] "TARGET_64BIT && TARGET_RDPID" "rdpid\t%0" [(set_attr "type" "other")]) ;; Intirinsics for > i486 (define_insn "wbinvd" [(unspec_volatile [(const_int 0)] UNSPECV_WBINVD)] "" "wbinvd" [(set_attr "type" "other")]) (define_insn "wbnoinvd" [(unspec_volatile [(const_int 0)] UNSPECV_WBNOINVD)] "TARGET_WBNOINVD" "wbnoinvd" [(set_attr "type" "other")]) ;; MOVDIRI and MOVDIR64B (define_insn "movdiri" [(set (match_operand:SWI48 0 "memory_operand" "=m") (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")] UNSPEC_MOVDIRI))] "TARGET_MOVDIRI" "movdiri\t{%1, %0|%0, %1}" [(set_attr "type" "other")]) (define_insn "@movdir64b_" [(set (mem:XI (match_operand:P 0 "register_operand" "r")) (unspec:XI [(match_operand:XI 1 "memory_operand" "m")] UNSPEC_MOVDIR64B))] "TARGET_MOVDIR64B" "movdir64b\t{%1, %0|%0, %1}" [(set_attr "type" "other")]) ;; TSXLDTRK (define_int_iterator TSXLDTRK [UNSPECV_XSUSLDTRK UNSPECV_XRESLDTRK]) (define_int_attr tsxldtrk [(UNSPECV_XSUSLDTRK "xsusldtrk") (UNSPECV_XRESLDTRK "xresldtrk")]) (define_insn "" [(unspec_volatile [(const_int 0)] TSXLDTRK)] "TARGET_TSXLDTRK" "" [(set_attr "type" "other") (set_attr "length" "4")]) ;; ENQCMD and ENQCMDS (define_int_iterator ENQCMD [UNSPECV_ENQCMD UNSPECV_ENQCMDS]) (define_int_attr enqcmd_sfx [(UNSPECV_ENQCMD "") (UNSPECV_ENQCMDS "s")]) (define_insn "@enqcmd_" [(set (reg:CCZ FLAGS_REG) (unspec_volatile:CCZ [(match_operand:P 0 "register_operand" "r") (match_operand:XI 1 "memory_operand" "m")] ENQCMD))] "TARGET_ENQCMD" "enqcmd\t{%1, %0|%0, %1}" [(set_attr "type" "other")]) ;; UINTR (define_int_iterator UINTR [UNSPECV_CLUI UNSPECV_STUI]) (define_int_attr uintr [(UNSPECV_CLUI "clui") (UNSPECV_STUI "stui")]) (define_insn "" [(unspec_volatile [(const_int 0)] UINTR)] "TARGET_UINTR && TARGET_64BIT" "" [(set_attr "type" "other") (set_attr "length" "4")]) (define_insn "testui" [(set (reg:CCC FLAGS_REG) (unspec_volatile:CCC [(const_int 0)] UNSPECV_TESTUI))] "TARGET_UINTR && TARGET_64BIT" "testui" [(set_attr "type" "other") (set_attr "length" "4")]) (define_insn "senduipi" [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")] UNSPECV_SENDUIPI)] "TARGET_UINTR && TARGET_64BIT" "senduipi\t%0" [(set_attr "type" "other") (set_attr "length" "4")]) ;; WAITPKG (define_insn "umwait" [(set (reg:CCC FLAGS_REG) (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r") (match_operand:DI 1 "register_operand" "A")] UNSPECV_UMWAIT))] "!TARGET_64BIT && TARGET_WAITPKG" "umwait\t%0" [(set_attr "length" "3")]) (define_insn "umwait_rex64" [(set (reg:CCC FLAGS_REG) (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r") (match_operand:SI 1 "register_operand" "a") (match_operand:SI 2 "register_operand" "d")] UNSPECV_UMWAIT))] "TARGET_64BIT && TARGET_WAITPKG" "umwait\t%0" [(set_attr "length" "3")]) (define_insn "@umonitor_" [(unspec_volatile [(match_operand:P 0 "register_operand" "r")] UNSPECV_UMONITOR)] "TARGET_WAITPKG" "umonitor\t%0" [(set (attr "length") (symbol_ref ("(Pmode != word_mode) + 3")))]) (define_insn "tpause" [(set (reg:CCC FLAGS_REG) (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r") (match_operand:DI 1 "register_operand" "A")] UNSPECV_TPAUSE))] "!TARGET_64BIT && TARGET_WAITPKG" "tpause\t%0" [(set_attr "length" "3")]) (define_insn "tpause_rex64" [(set (reg:CCC FLAGS_REG) (unspec_volatile:CCC [(match_operand:SI 0 "register_operand" "r") (match_operand:SI 1 "register_operand" "a") (match_operand:SI 2 "register_operand" "d")] UNSPECV_TPAUSE))] "TARGET_64BIT && TARGET_WAITPKG" "tpause\t%0" [(set_attr "length" "3")]) (define_insn "cldemote" [(unspec_volatile[(match_operand 0 "address_operand" "p")] UNSPECV_CLDEMOTE)] "TARGET_CLDEMOTE" "cldemote\t%a0" [(set_attr "type" "other") (set_attr "memory" "unknown")]) (define_insn "speculation_barrier" [(unspec_volatile [(const_int 0)] UNSPECV_SPECULATION_BARRIER)] "" "lfence" [(set_attr "type" "other") (set_attr "length" "3")]) (define_insn "serialize" [(unspec_volatile [(const_int 0)] UNSPECV_SERIALIZE)] "TARGET_SERIALIZE" "serialize" [(set_attr "type" "other") (set_attr "length" "3")]) (define_insn "patchable_area" [(unspec_volatile [(match_operand 0 "const_int_operand") (match_operand 1 "const_int_operand")] UNSPECV_PATCHABLE_AREA)] "" { ix86_output_patchable_area (INTVAL (operands[0]), INTVAL (operands[1]) != 0); return ""; } [(set (attr "length") (symbol_ref "INTVAL (operands[0])")) (set_attr "length_immediate" "0") (set_attr "modrm" "0")]) (define_insn "hreset" [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")] UNSPECV_HRESET)] "TARGET_HRESET" "hreset\t{$0|0}" [(set_attr "type" "other") (set_attr "length" "4")]) ;; Spaceship optimization (define_expand "spaceship3" [(match_operand:SI 0 "register_operand") (match_operand:MODEF 1 "cmp_fp_expander_operand") (match_operand:MODEF 2 "cmp_fp_expander_operand")] "(TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) && (TARGET_CMOVE || (TARGET_SAHF && TARGET_USE_SAHF))" { ix86_expand_fp_spaceship (operands[0], operands[1], operands[2]); DONE; }) (define_expand "spaceshipxf3" [(match_operand:SI 0 "register_operand") (match_operand:XF 1 "nonmemory_operand") (match_operand:XF 2 "nonmemory_operand")] "TARGET_80387 && (TARGET_CMOVE || (TARGET_SAHF && TARGET_USE_SAHF))" { ix86_expand_fp_spaceship (operands[0], operands[1], operands[2]); DONE; }) ;; Defined because the generic expand_builtin_issignaling for XFmode ;; only tests for sNaNs, but i387 treats also pseudo numbers as always ;; signaling. (define_expand "issignalingxf2" [(match_operand:SI 0 "register_operand") (match_operand:XF 1 "general_operand")] "" { rtx temp = operands[1]; if (!MEM_P (temp)) { rtx mem = assign_stack_temp (XFmode, GET_MODE_SIZE (XFmode)); emit_move_insn (mem, temp); temp = mem; } rtx ex = adjust_address (temp, HImode, 8); rtx hi = adjust_address (temp, SImode, 4); rtx lo = adjust_address (temp, SImode, 0); rtx val = GEN_INT (HOST_WIDE_INT_M1U << 30); rtx mask = GEN_INT (0x7fff); rtx bit = GEN_INT (HOST_WIDE_INT_1U << 30); /* Expand to: ((ex & mask) && (int) hi >= 0) || ((ex & mask) == mask && ((hi ^ bit) | ((lo | -lo) >> 31)) > val). */ rtx nlo = expand_unop (SImode, neg_optab, lo, NULL_RTX, 0); lo = expand_binop (SImode, ior_optab, lo, nlo, NULL_RTX, 1, OPTAB_LIB_WIDEN); lo = expand_shift (RSHIFT_EXPR, SImode, lo, 31, NULL_RTX, 1); temp = expand_binop (SImode, xor_optab, hi, bit, NULL_RTX, 1, OPTAB_LIB_WIDEN); temp = expand_binop (SImode, ior_optab, temp, lo, NULL_RTX, 1, OPTAB_LIB_WIDEN); temp = emit_store_flag_force (gen_reg_rtx (SImode), GTU, temp, val, SImode, 1, 1); ex = expand_binop (HImode, and_optab, ex, mask, NULL_RTX, 1, OPTAB_LIB_WIDEN); rtx temp2 = emit_store_flag_force (gen_reg_rtx (SImode), NE, ex, const0_rtx, SImode, 1, 1); ex = emit_store_flag_force (gen_reg_rtx (SImode), EQ, ex, mask, HImode, 1, 1); temp = expand_binop (SImode, and_optab, temp, ex, NULL_RTX, 1, OPTAB_LIB_WIDEN); rtx temp3 = emit_store_flag_force (gen_reg_rtx (SImode), GE, hi, const0_rtx, SImode, 0, 1); temp2 = expand_binop (SImode, and_optab, temp2, temp3, NULL_RTX, 1, OPTAB_LIB_WIDEN); temp = expand_binop (SImode, ior_optab, temp, temp2, NULL_RTX, 1, OPTAB_LIB_WIDEN); emit_move_insn (operands[0], temp); DONE; }) (define_insn "urdmsr" [(set (match_operand:DI 0 "register_operand" "=r") (unspec_volatile:DI [(match_operand:DI 1 "x86_64_szext_nonmemory_operand" "reZ")] UNSPECV_URDMSR))] "TARGET_USER_MSR && TARGET_64BIT" "urdmsr\t{%1, %0|%0, %1}" [(set_attr "prefix" "vex") (set_attr "type" "other")]) (define_insn "uwrmsr" [(unspec_volatile [(match_operand:DI 0 "x86_64_szext_nonmemory_operand" "reZ") (match_operand:DI 1 "register_operand" "r")] UNSPECV_UWRMSR)] "TARGET_USER_MSR && TARGET_64BIT" "uwrmsr\t{%1, %0|%0, %1}" [(set_attr "prefix" "vex") (set_attr "type" "other")]) (define_insn "ldtilecfg" [(unspec_volatile [(match_operand:XI 0 "memory_operand" "m")] UNSPECV_LDTILECFG)] "TARGET_AMX_TILE" "ldtilecfg\t%0" [(set_attr "type" "other") (set_attr "prefix" "maybe_evex") (set_attr "memory" "load") (set_attr "mode" "XI")]) (define_insn "sttilecfg" [(set (match_operand:XI 0 "memory_operand" "=m") (unspec_volatile:XI [(const_int 0)] UNSPECV_STTILECFG))] "TARGET_AMX_TILE" "sttilecfg\t%0" [(set_attr "type" "other") (set_attr "prefix" "maybe_evex") (set_attr "memory" "store") (set_attr "mode" "XI")]) (include "mmx.md") (include "sse.md") (include "sync.md")