;; Machine description for AArch64 architecture.
;; Copyright (C) 2009-2023 Free Software Foundation, Inc.
;; Contributed by ARM Ltd.
;;
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;;
;; GCC is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; .
;; Register numbers
(define_constants
[
(R0_REGNUM 0)
(R1_REGNUM 1)
(R2_REGNUM 2)
(R3_REGNUM 3)
(R4_REGNUM 4)
(R5_REGNUM 5)
(R6_REGNUM 6)
(R7_REGNUM 7)
(R8_REGNUM 8)
(R9_REGNUM 9)
(R10_REGNUM 10)
(R11_REGNUM 11)
(R12_REGNUM 12)
(R13_REGNUM 13)
(R14_REGNUM 14)
(R15_REGNUM 15)
(R16_REGNUM 16)
(R17_REGNUM 17)
(R18_REGNUM 18)
(R19_REGNUM 19)
(R20_REGNUM 20)
(R21_REGNUM 21)
(R22_REGNUM 22)
(R23_REGNUM 23)
(R24_REGNUM 24)
(R25_REGNUM 25)
(R26_REGNUM 26)
(R27_REGNUM 27)
(R28_REGNUM 28)
(R29_REGNUM 29)
(R30_REGNUM 30)
(SP_REGNUM 31)
(V0_REGNUM 32)
(V1_REGNUM 33)
(V2_REGNUM 34)
(V3_REGNUM 35)
(V4_REGNUM 36)
(V5_REGNUM 37)
(V6_REGNUM 38)
(V7_REGNUM 39)
(V8_REGNUM 40)
(V9_REGNUM 41)
(V10_REGNUM 42)
(V11_REGNUM 43)
(V12_REGNUM 44)
(V13_REGNUM 45)
(V14_REGNUM 46)
(V15_REGNUM 47)
(V16_REGNUM 48)
(V17_REGNUM 49)
(V18_REGNUM 50)
(V19_REGNUM 51)
(V20_REGNUM 52)
(V21_REGNUM 53)
(V22_REGNUM 54)
(V23_REGNUM 55)
(V24_REGNUM 56)
(V25_REGNUM 57)
(V26_REGNUM 58)
(V27_REGNUM 59)
(V28_REGNUM 60)
(V29_REGNUM 61)
(V30_REGNUM 62)
(V31_REGNUM 63)
(SFP_REGNUM 64)
(AP_REGNUM 65)
(CC_REGNUM 66)
;; Defined only to make the DWARF description simpler.
(VG_REGNUM 67)
(P0_REGNUM 68)
(P1_REGNUM 69)
(P2_REGNUM 70)
(P3_REGNUM 71)
(P4_REGNUM 72)
(P5_REGNUM 73)
(P6_REGNUM 74)
(P7_REGNUM 75)
(P8_REGNUM 76)
(P9_REGNUM 77)
(P10_REGNUM 78)
(P11_REGNUM 79)
(P12_REGNUM 80)
(P13_REGNUM 81)
(P14_REGNUM 82)
(P15_REGNUM 83)
(LAST_SAVED_REGNUM 83)
(FFR_REGNUM 84)
;; "FFR token": a fake register used for representing the scheduling
;; restrictions on FFR-related operations.
(FFRT_REGNUM 85)
;; The pair of scratch registers used for stack probing with -fstack-check.
;; Leave R9 alone as a possible choice for the static chain.
;; Note that the use of these registers is mutually exclusive with the use
;; of STACK_CLASH_SVE_CFA_REGNUM, which is for -fstack-clash-protection
;; rather than -fstack-check.
(PROBE_STACK_FIRST_REGNUM 10)
(PROBE_STACK_SECOND_REGNUM 11)
;; Scratch register used by stack clash protection to calculate
;; SVE CFA offsets during probing.
(STACK_CLASH_SVE_CFA_REGNUM 11)
;; Scratch registers for prologue/epilogue use.
(EP0_REGNUM 12)
(EP1_REGNUM 13)
;; A couple of call-clobbered registers that we need to reserve when
;; tracking speculation this is not ABI, so is subject to change.
(SPECULATION_SCRATCH_REGNUM 14)
(SPECULATION_TRACKER_REGNUM 15)
;; Scratch registers used in frame layout.
(IP0_REGNUM 16)
(IP1_REGNUM 17)
(FP_REGNUM 29)
(LR_REGNUM 30)
]
)
(define_c_enum "unspec" [
UNSPEC_AUTIA1716
UNSPEC_AUTIB1716
UNSPEC_AUTIASP
UNSPEC_AUTIBSP
UNSPEC_CALLEE_ABI
UNSPEC_CASESI
UNSPEC_CPYMEM
UNSPEC_CRC32B
UNSPEC_CRC32CB
UNSPEC_CRC32CH
UNSPEC_CRC32CW
UNSPEC_CRC32CX
UNSPEC_CRC32H
UNSPEC_CRC32W
UNSPEC_CRC32X
UNSPEC_FCVTZS
UNSPEC_FCVTZU
UNSPEC_FJCVTZS
UNSPEC_FRINT32Z
UNSPEC_FRINT32X
UNSPEC_FRINT64Z
UNSPEC_FRINT64X
UNSPEC_URECPE
UNSPEC_FRECPE
UNSPEC_FRECPS
UNSPEC_FRECPX
UNSPEC_FRINTA
UNSPEC_FRINTI
UNSPEC_FRINTM
UNSPEC_FRINTN
UNSPEC_FRINTP
UNSPEC_FRINTX
UNSPEC_FRINTZ
UNSPEC_GOTSMALLPIC
UNSPEC_GOTSMALLPIC28K
UNSPEC_GOTSMALLTLS
UNSPEC_GOTTINYPIC
UNSPEC_GOTTINYTLS
UNSPEC_LD1
UNSPEC_LD2
UNSPEC_LD2_DREG
UNSPEC_LD2_DUP
UNSPEC_LD3
UNSPEC_LD3_DREG
UNSPEC_LD3_DUP
UNSPEC_LD4
UNSPEC_LD4_DREG
UNSPEC_LD4_DUP
UNSPEC_LD2_LANE
UNSPEC_LD3_LANE
UNSPEC_LD4_LANE
UNSPEC_LD64B
UNSPEC_ST64B
UNSPEC_ST64BV
UNSPEC_ST64BV_RET
UNSPEC_ST64BV0
UNSPEC_ST64BV0_RET
UNSPEC_MB
UNSPEC_MOVMEM
UNSPEC_NOP
UNSPEC_PACIA1716
UNSPEC_PACIB1716
UNSPEC_PACIASP
UNSPEC_PACIBSP
UNSPEC_PRLG_STK
UNSPEC_REV
UNSPEC_RBIT
UNSPEC_SABAL
UNSPEC_SABAL2
UNSPEC_SABDL
UNSPEC_SABDL2
UNSPEC_SADALP
UNSPEC_SCVTF
UNSPEC_SETMEM
UNSPEC_SISD_NEG
UNSPEC_SISD_SSHL
UNSPEC_SISD_USHL
UNSPEC_SSHL_2S
UNSPEC_ST1
UNSPEC_ST2
UNSPEC_ST3
UNSPEC_ST4
UNSPEC_ST2_LANE
UNSPEC_ST3_LANE
UNSPEC_ST4_LANE
UNSPEC_TLS
UNSPEC_TLSDESC
UNSPEC_TLSLE12
UNSPEC_TLSLE24
UNSPEC_TLSLE32
UNSPEC_TLSLE48
UNSPEC_UABAL
UNSPEC_UABAL2
UNSPEC_UABDL
UNSPEC_UABDL2
UNSPEC_UADALP
UNSPEC_UCVTF
UNSPEC_USHL_2S
UNSPEC_VSTRUCTDUMMY
UNSPEC_SSP_SYSREG
UNSPEC_SP_SET
UNSPEC_SP_TEST
UNSPEC_RSHRN
UNSPEC_RSQRT
UNSPEC_RSQRTE
UNSPEC_RSQRTS
UNSPEC_NZCV
UNSPEC_XPACLRI
UNSPEC_LD1_SVE
UNSPEC_ST1_SVE
UNSPEC_LDNT1_SVE
UNSPEC_STNT1_SVE
UNSPEC_LD1RQ
UNSPEC_LD1_GATHER
UNSPEC_LDFF1_GATHER
UNSPEC_LDNT1_GATHER
UNSPEC_ST1_SCATTER
UNSPEC_STNT1_SCATTER
UNSPEC_PRED_X
UNSPEC_PRED_Z
UNSPEC_PTEST
UNSPEC_PTRUE
UNSPEC_UNPACKSHI
UNSPEC_UNPACKUHI
UNSPEC_UNPACKSLO
UNSPEC_UNPACKULO
UNSPEC_PACK
UNSPEC_WHILEGE
UNSPEC_WHILEGT
UNSPEC_WHILEHI
UNSPEC_WHILEHS
UNSPEC_WHILELE
UNSPEC_WHILELO
UNSPEC_WHILELS
UNSPEC_WHILELT
UNSPEC_WHILERW
UNSPEC_WHILEWR
UNSPEC_LDN
UNSPEC_STN
UNSPEC_INSR
UNSPEC_CLASTA
UNSPEC_CLASTB
UNSPEC_FADDA
UNSPEC_REV_SUBREG
UNSPEC_REINTERPRET
UNSPEC_SPECULATION_TRACKER
UNSPEC_SPECULATION_TRACKER_REV
UNSPEC_COPYSIGN
UNSPEC_TTEST ; Represent transaction test.
UNSPEC_UPDATE_FFR
UNSPEC_UPDATE_FFRT
UNSPEC_RDFFR
UNSPEC_WRFFR
;; Represents an SVE-style lane index, in which the indexing applies
;; within the containing 128-bit block.
UNSPEC_SVE_LANE_SELECT
UNSPEC_SVE_CNT_PAT
UNSPEC_SVE_PREFETCH
UNSPEC_SVE_PREFETCH_GATHER
UNSPEC_SVE_COMPACT
UNSPEC_SVE_SPLICE
UNSPEC_GEN_TAG ; Generate a 4-bit MTE tag.
UNSPEC_GEN_TAG_RND ; Generate a random 4-bit MTE tag.
UNSPEC_TAG_SPACE ; Translate address to MTE tag address space.
UNSPEC_LD1RO
UNSPEC_SALT_ADDR
UNSPECV_PATCHABLE_AREA
])
(define_c_enum "unspecv" [
UNSPECV_EH_RETURN ; Represent EH_RETURN
UNSPECV_GET_FPCR ; Represent fetch of FPCR content.
UNSPECV_SET_FPCR ; Represent assign of FPCR content.
UNSPECV_GET_FPSR ; Represent fetch of FPSR content.
UNSPECV_SET_FPSR ; Represent assign of FPSR content.
UNSPECV_BLOCKAGE ; Represent a blockage
UNSPECV_PROBE_STACK_RANGE ; Represent stack range probing.
UNSPECV_SPECULATION_BARRIER ; Represent speculation barrier.
UNSPECV_BTI_NOARG ; Represent BTI.
UNSPECV_BTI_C ; Represent BTI c.
UNSPECV_BTI_J ; Represent BTI j.
UNSPECV_BTI_JC ; Represent BTI jc.
UNSPECV_TSTART ; Represent transaction start.
UNSPECV_TCOMMIT ; Represent transaction commit.
UNSPECV_TCANCEL ; Represent transaction cancel.
UNSPEC_RNDR ; Represent RNDR
UNSPEC_RNDRRS ; Represent RNDRRS
]
)
;; These constants are used as a const_int in various SVE unspecs
;; to indicate whether the governing predicate is known to be a PTRUE.
(define_constants
[; Indicates that the predicate might not be a PTRUE.
(SVE_MAYBE_NOT_PTRUE 0)
; Indicates that the predicate is known to be a PTRUE.
(SVE_KNOWN_PTRUE 1)])
;; These constants are used as a const_int in predicated SVE FP arithmetic
;; to indicate whether the operation is allowed to make additional lanes
;; active without worrying about the effect on faulting behavior.
(define_constants
[; Indicates either that all lanes are active or that the instruction may
; operate on inactive inputs even if doing so could induce a fault.
(SVE_RELAXED_GP 0)
; Indicates that some lanes might be inactive and that the instruction
; must not operate on inactive inputs if doing so could induce a fault.
(SVE_STRICT_GP 1)])
(include "constraints.md")
(include "predicates.md")
(include "iterators.md")
;; -------------------------------------------------------------------
;; Instruction types and attributes
;; -------------------------------------------------------------------
; The "type" attribute is included here from AArch32 backend to be able
; to share pipeline descriptions.
(include "../arm/types.md")
;; It is important to set the fp or simd attributes to yes when a pattern
;; alternative uses the FP or SIMD register files, usually signified by use of
;; the 'w' constraint. This will ensure that the alternative will be
;; disabled when compiling with -mgeneral-regs-only or with the +nofp/+nosimd
;; architecture extensions. If all the alternatives in a pattern use the
;; FP or SIMD registers then the pattern predicate should include TARGET_FLOAT
;; or TARGET_SIMD.
;; Attributes of the architecture required to support the instruction (or
;; alternative). This attribute is used to compute attribute "enabled", use type
;; "any" to enable an alternative in all cases.
;;
;; As a convenience, "fp_q" means "fp" + the ability to move between
;; Q registers and is equivalent to "simd".
(define_enum "arches" [ any rcpc8_4 fp fp_q simd sve fp16])
(define_enum_attr "arch" "arches" (const_string "any"))
;; [For compatibility with Arm in pipeline models]
;; Attribute that specifies whether or not the instruction touches fp
;; registers.
;; Note that this attribute is not used anywhere in either the arm or aarch64
;; backends except in the scheduling description for xgene1. In that
;; scheduling description this attribute is used to subclass the load_4 and
;; load_8 types.
(define_attr "fp" "no,yes"
(if_then_else
(eq_attr "arch" "fp")
(const_string "yes")
(const_string "no")))
(define_attr "arch_enabled" "no,yes"
(if_then_else
(ior
(eq_attr "arch" "any")
(and (eq_attr "arch" "rcpc8_4")
(match_test "AARCH64_ISA_RCPC8_4"))
(and (eq_attr "arch" "fp")
(match_test "TARGET_FLOAT"))
(and (eq_attr "arch" "fp_q, simd")
(match_test "TARGET_SIMD"))
(and (eq_attr "arch" "fp16")
(match_test "TARGET_FP_F16INST"))
(and (eq_attr "arch" "sve")
(match_test "TARGET_SVE")))
(const_string "yes")
(const_string "no")))
;; Attribute that controls whether an alternative is enabled or not.
;; Currently it is only used to disable alternatives which touch fp or simd
;; registers when -mgeneral-regs-only is specified or to require a special
;; architecture support.
(define_attr "enabled" "no,yes" (attr "arch_enabled"))
;; Attribute that specifies whether we are dealing with a branch to a
;; label that is far away, i.e. further away than the maximum/minimum
;; representable in a signed 21-bits number.
;; 0 :=: no
;; 1 :=: yes
(define_attr "far_branch" "" (const_int 0))
;; Attribute that specifies whether the alternative uses MOVPRFX.
(define_attr "movprfx" "no,yes" (const_string "no"))
;; Attribute to specify that an alternative has the length of a single
;; instruction plus a speculation barrier.
(define_attr "sls_length" "none,retbr,casesi" (const_string "none"))
(define_attr "length" ""
(cond [(eq_attr "movprfx" "yes")
(const_int 8)
(eq_attr "sls_length" "retbr")
(cond [(match_test "!aarch64_harden_sls_retbr_p ()") (const_int 4)
(match_test "TARGET_SB") (const_int 8)]
(const_int 12))
(eq_attr "sls_length" "casesi")
(cond [(match_test "!aarch64_harden_sls_retbr_p ()") (const_int 16)
(match_test "TARGET_SB") (const_int 20)]
(const_int 24))
]
(const_int 4)))
;; Strictly for compatibility with AArch32 in pipeline models, since AArch64 has
;; no predicated insns.
(define_attr "predicated" "yes,no" (const_string "no"))
;; Set to true on an insn that requires the speculation tracking state to be
;; in the tracking register before the insn issues. Otherwise the compiler
;; may chose to hold the tracking state encoded in SP.
(define_attr "speculation_barrier" "true,false" (const_string "false"))
;; -------------------------------------------------------------------
;; Pipeline descriptions and scheduling
;; -------------------------------------------------------------------
;; Processor types.
(include "aarch64-tune.md")
;; Scheduling
(include "../arm/cortex-a53.md")
(include "../arm/cortex-a57.md")
(include "../arm/exynos-m1.md")
(include "falkor.md")
(include "saphira.md")
(include "thunderx.md")
(include "../arm/xgene1.md")
(include "thunderx2t99.md")
(include "tsv110.md")
(include "thunderx3t110.md")
;; -------------------------------------------------------------------
;; Jumps and other miscellaneous insns
;; -------------------------------------------------------------------
(define_insn "indirect_jump"
[(set (pc) (match_operand:DI 0 "register_operand" "r"))]
""
{
output_asm_insn ("br\\t%0", operands);
return aarch64_sls_barrier (aarch64_harden_sls_retbr_p ());
}
[(set_attr "type" "branch")
(set_attr "sls_length" "retbr")]
)
(define_insn "jump"
[(set (pc) (label_ref (match_operand 0 "" "")))]
""
"b\\t%l0"
[(set_attr "type" "branch")]
)
(define_expand "cbranch4"
[(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
[(match_operand:GPI 1 "register_operand")
(match_operand:GPI 2 "aarch64_plus_operand")])
(label_ref (match_operand 3 "" ""))
(pc)))]
""
"
operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
operands[2]);
operands[2] = const0_rtx;
"
)
(define_expand "cbranch4"
[(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
[(match_operand:GPF 1 "register_operand")
(match_operand:GPF 2 "aarch64_fp_compare_operand")])
(label_ref (match_operand 3 "" ""))
(pc)))]
""
"
operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
operands[2]);
operands[2] = const0_rtx;
"
)
(define_expand "cbranchcc4"
[(set (pc) (if_then_else
(match_operator 0 "aarch64_comparison_operator"
[(match_operand 1 "cc_register")
(match_operand 2 "const0_operand")])
(label_ref (match_operand 3 "" ""))
(pc)))]
""
"")
(define_insn "@ccmp"
[(set (match_operand:CC_ONLY 1 "cc_register" "")
(if_then_else:CC_ONLY
(match_operator 4 "aarch64_comparison_operator"
[(match_operand 0 "cc_register" "")
(const_int 0)])
(compare:CC_ONLY
(match_operand:GPI 2 "register_operand" "r,r,r")
(match_operand:GPI 3 "aarch64_ccmp_operand" "r,Uss,Usn"))
(unspec:CC_ONLY
[(match_operand 5 "immediate_operand")]
UNSPEC_NZCV)))]
""
"@
ccmp\\t%2, %3, %k5, %m4
ccmp\\t%2, %3, %k5, %m4
ccmn\\t%2, #%n3, %k5, %m4"
[(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
)
(define_insn "@ccmp"
[(set (match_operand:CCFP_CCFPE 1 "cc_register" "")
(if_then_else:CCFP_CCFPE
(match_operator 4 "aarch64_comparison_operator"
[(match_operand 0 "cc_register" "")
(const_int 0)])
(compare:CCFP_CCFPE
(match_operand:GPF 2 "register_operand" "w")
(match_operand:GPF 3 "register_operand" "w"))
(unspec:CCFP_CCFPE
[(match_operand 5 "immediate_operand")]
UNSPEC_NZCV)))]
"TARGET_FLOAT"
"fccmp\\t%2, %3, %k5, %m4"
[(set_attr "type" "fccmp")]
)
(define_insn "@ccmp_rev"
[(set (match_operand:CC_ONLY 1 "cc_register" "")
(if_then_else:CC_ONLY
(match_operator 4 "aarch64_comparison_operator"
[(match_operand 0 "cc_register" "")
(const_int 0)])
(unspec:CC_ONLY
[(match_operand 5 "immediate_operand")]
UNSPEC_NZCV)
(compare:CC_ONLY
(match_operand:GPI 2 "register_operand" "r,r,r")
(match_operand:GPI 3 "aarch64_ccmp_operand" "r,Uss,Usn"))))]
""
"@
ccmp\\t%2, %3, %k5, %M4
ccmp\\t%2, %3, %k5, %M4
ccmn\\t%2, #%n3, %k5, %M4"
[(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
)
(define_insn "@ccmp_rev"
[(set (match_operand:CCFP_CCFPE 1 "cc_register" "")
(if_then_else:CCFP_CCFPE
(match_operator 4 "aarch64_comparison_operator"
[(match_operand 0 "cc_register" "")
(const_int 0)])
(unspec:CCFP_CCFPE
[(match_operand 5 "immediate_operand")]
UNSPEC_NZCV)
(compare:CCFP_CCFPE
(match_operand:GPF 2 "register_operand" "w")
(match_operand:GPF 3 "register_operand" "w"))))]
"TARGET_FLOAT"
"fccmp\\t%2, %3, %k5, %M4"
[(set_attr "type" "fccmp")]
)
;; Expansion of signed mod by a power of 2 using CSNEG.
;; For x0 % n where n is a power of 2 produce:
;; negs x1, x0
;; and x0, x0, #(n - 1)
;; and x1, x1, #(n - 1)
;; csneg x0, x0, x1, mi
(define_expand "mod3"
[(match_operand:GPI 0 "register_operand")
(match_operand:GPI 1 "register_operand")
(match_operand:GPI 2 "const_int_operand")]
""
{
HOST_WIDE_INT val = INTVAL (operands[2]);
if (val <= 0
|| exact_log2 (val) <= 0
|| !aarch64_bitmask_imm (val - 1, mode))
FAIL;
rtx mask = GEN_INT (val - 1);
/* In the special case of x0 % 2 we can do the even shorter:
cmp x0, xzr
and x0, x0, 1
cneg x0, x0, lt. */
if (val == 2)
{
rtx masked = gen_reg_rtx (mode);
rtx ccreg = aarch64_gen_compare_reg (LT, operands[1], const0_rtx);
emit_insn (gen_and3 (masked, operands[1], mask));
rtx x = gen_rtx_LT (VOIDmode, ccreg, const0_rtx);
emit_insn (gen_csneg3_insn (operands[0], x, masked, masked));
DONE;
}
rtx neg_op = gen_reg_rtx (mode);
rtx_insn *insn = emit_insn (gen_neg2_compare0 (neg_op, operands[1]));
/* Extract the condition register and mode. */
rtx cmp = XVECEXP (PATTERN (insn), 0, 0);
rtx cc_reg = SET_DEST (cmp);
rtx cond = gen_rtx_GE (VOIDmode, cc_reg, const0_rtx);
rtx masked_pos = gen_reg_rtx (mode);
emit_insn (gen_and3 (masked_pos, operands[1], mask));
rtx masked_neg = gen_reg_rtx (mode);
emit_insn (gen_and3 (masked_neg, neg_op, mask));
emit_insn (gen_csneg3_insn (operands[0], cond,
masked_neg, masked_pos));
DONE;
}
)
(define_insn "condjump"
[(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
[(match_operand 1 "cc_register" "") (const_int 0)])
(label_ref (match_operand 2 "" ""))
(pc)))]
""
{
/* GCC's traditional style has been to use "beq" instead of "b.eq", etc.,
but the "." is required for SVE conditions. */
bool use_dot_p = GET_MODE (operands[1]) == CC_NZCmode;
if (get_attr_length (insn) == 8)
return aarch64_gen_far_branch (operands, 2, "Lbcond",
use_dot_p ? "b.%M0\\t" : "b%M0\\t");
else
return use_dot_p ? "b.%m0\\t%l2" : "b%m0\\t%l2";
}
[(set_attr "type" "branch")
(set (attr "length")
(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576))
(lt (minus (match_dup 2) (pc)) (const_int 1048572)))
(const_int 4)
(const_int 8)))
(set (attr "far_branch")
(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576))
(lt (minus (match_dup 2) (pc)) (const_int 1048572)))
(const_int 0)
(const_int 1)))]
)
;; For a 24-bit immediate CST we can optimize the compare for equality
;; and branch sequence from:
;; mov x0, #imm1
;; movk x0, #imm2, lsl 16 /* x0 contains CST. */
;; cmp x1, x0
;; b .Label
;; into the shorter:
;; sub x0, x1, #(CST & 0xfff000)
;; subs x0, x0, #(CST & 0x000fff)
;; b .Label
(define_insn_and_split "*compare_condjump"
[(set (pc) (if_then_else (EQL
(match_operand:GPI 0 "register_operand" "r")
(match_operand:GPI 1 "aarch64_imm24" "n"))
(label_ref:P (match_operand 2 "" ""))
(pc)))]
"!aarch64_move_imm (INTVAL (operands[1]), mode)
&& !aarch64_plus_operand (operands[1], mode)
&& !reload_completed"
"#"
"&& true"
[(const_int 0)]
{
HOST_WIDE_INT lo_imm = UINTVAL (operands[1]) & 0xfff;
HOST_WIDE_INT hi_imm = UINTVAL (operands[1]) & 0xfff000;
rtx tmp = gen_reg_rtx (mode);
emit_insn (gen_add3 (tmp, operands[0], GEN_INT (-hi_imm)));
emit_insn (gen_add3_compare0 (tmp, tmp, GEN_INT (-lo_imm)));
rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
rtx cmp_rtx = gen_rtx_fmt_ee (, mode,
cc_reg, const0_rtx);
emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[2]));
DONE;
}
)
(define_expand "casesi"
[(match_operand:SI 0 "register_operand") ; Index
(match_operand:SI 1 "const_int_operand") ; Lower bound
(match_operand:SI 2 "const_int_operand") ; Total range
(match_operand:DI 3 "" "") ; Table label
(match_operand:DI 4 "" "")] ; Out of range label
""
{
if (operands[1] != const0_rtx)
{
rtx reg = gen_reg_rtx (SImode);
/* Canonical RTL says that if you have:
(minus (X) (CONST))
then this should be emitted as:
(plus (X) (-CONST))
The use of trunc_int_for_mode ensures that the resulting
constant can be represented in SImode, this is important
for the corner case where operand[1] is INT_MIN. */
operands[1]
= GEN_INT (trunc_int_for_mode (-UINTVAL (operands[1]), SImode));
if (!(*insn_data[CODE_FOR_addsi3].operand[2].predicate)
(operands[1], SImode))
operands[1] = force_reg (SImode, operands[1]);
emit_insn (gen_addsi3 (reg, operands[0], operands[1]));
operands[0] = reg;
}
if (!aarch64_plus_operand (operands[2], SImode))
operands[2] = force_reg (SImode, operands[2]);
emit_jump_insn (gen_cbranchsi4 (gen_rtx_GTU (SImode, const0_rtx,
const0_rtx),
operands[0], operands[2], operands[4]));
operands[2] = force_reg (DImode, gen_rtx_LABEL_REF (DImode, operands[3]));
operands[2]
= gen_rtx_UNSPEC (Pmode, gen_rtvec (2, operands[2], operands[0]),
UNSPEC_CASESI);
operands[2] = gen_rtx_MEM (DImode, operands[2]);
MEM_READONLY_P (operands[2]) = 1;
MEM_NOTRAP_P (operands[2]) = 1;
emit_jump_insn (gen_casesi_dispatch (operands[2], operands[3]));
DONE;
}
)
(define_expand "casesi_dispatch"
[(parallel
[(set (pc) (match_operand:DI 0 ""))
(clobber (reg:CC CC_REGNUM))
(clobber (match_scratch:DI 2))
(clobber (match_scratch:DI 3))
(use (label_ref:DI (match_operand 1 "")))])]
"")
(define_insn "*casesi_dispatch"
[(parallel
[(set (pc)
(mem:DI (unspec [(match_operand:DI 0 "register_operand" "r")
(match_operand:SI 1 "register_operand" "r")]
UNSPEC_CASESI)))
(clobber (reg:CC CC_REGNUM))
(clobber (match_scratch:DI 3 "=r"))
(clobber (match_scratch:DI 4 "=r"))
(use (label_ref:DI (match_operand 2 "" "")))])]
""
"*
return aarch64_output_casesi (operands);
"
[(set_attr "sls_length" "casesi")
(set_attr "type" "branch")]
)
(define_insn "nop"
[(unspec[(const_int 0)] UNSPEC_NOP)]
""
"nop"
[(set_attr "type" "no_insn")]
)
(define_insn "prefetch"
[(prefetch (match_operand:DI 0 "aarch64_prefetch_operand" "Dp")
(match_operand:QI 1 "const_int_operand" "")
(match_operand:QI 2 "const_int_operand" ""))]
""
{
const char * pftype[2][4] =
{
{"prfm\\tPLDL1STRM, %0",
"prfm\\tPLDL3KEEP, %0",
"prfm\\tPLDL2KEEP, %0",
"prfm\\tPLDL1KEEP, %0"},
{"prfm\\tPSTL1STRM, %0",
"prfm\\tPSTL3KEEP, %0",
"prfm\\tPSTL2KEEP, %0",
"prfm\\tPSTL1KEEP, %0"},
};
int locality = INTVAL (operands[2]);
gcc_assert (IN_RANGE (locality, 0, 3));
/* PRFM accepts the same addresses as a 64-bit LDR so wrap
the address into a DImode MEM so that aarch64_print_operand knows
how to print it. */
operands[0] = gen_rtx_MEM (DImode, operands[0]);
return pftype[INTVAL(operands[1])][locality];
}
[(set_attr "type" "load_4")]
)
(define_insn "trap"
[(trap_if (const_int 1) (const_int 8))]
""
"brk #1000"
[(set_attr "type" "trap")])
(define_expand "prologue"
[(clobber (const_int 0))]
""
"
aarch64_expand_prologue ();
DONE;
"
)
(define_expand "epilogue"
[(clobber (const_int 0))]
""
"
aarch64_expand_epilogue (false);
DONE;
"
)
(define_expand "sibcall_epilogue"
[(clobber (const_int 0))]
""
"
aarch64_expand_epilogue (true);
DONE;
"
)
(define_insn "*do_return"
[(return)]
""
{
const char *ret = NULL;
if (aarch64_return_address_signing_enabled ()
&& (TARGET_PAUTH))
{
if (aarch_ra_sign_key == AARCH_KEY_B)
ret = "retab";
else
ret = "retaa";
}
else
ret = "ret";
output_asm_insn (ret, operands);
return aarch64_sls_barrier (aarch64_harden_sls_retbr_p ());
}
[(set_attr "type" "branch")
(set_attr "sls_length" "retbr")]
)
(define_expand "return"
[(simple_return)]
"aarch64_use_return_insn_p ()"
""
)
(define_insn "simple_return"
[(simple_return)]
""
{
output_asm_insn ("ret", operands);
return aarch64_sls_barrier (aarch64_harden_sls_retbr_p ());
}
[(set_attr "type" "branch")
(set_attr "sls_length" "retbr")]
)
(define_insn "*cb1"
[(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
(const_int 0))
(label_ref (match_operand 1 "" ""))
(pc)))]
"!aarch64_track_speculation"
{
if (get_attr_length (insn) == 8)
return aarch64_gen_far_branch (operands, 1, "Lcb", "\\t%0, ");
else
return "\\t%0, %l1";
}
[(set_attr "type" "branch")
(set (attr "length")
(if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -1048576))
(lt (minus (match_dup 1) (pc)) (const_int 1048572)))
(const_int 4)
(const_int 8)))
(set (attr "far_branch")
(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576))
(lt (minus (match_dup 2) (pc)) (const_int 1048572)))
(const_int 0)
(const_int 1)))]
)
(define_expand "tbranch_3"
[(set (pc) (if_then_else
(EQL (match_operand:ALLI 0 "register_operand")
(match_operand 1 "aarch64_simd_shift_imm_"))
(label_ref (match_operand 2 ""))
(pc)))]
""
{
rtx bitvalue = gen_reg_rtx (mode);
rtx reg = gen_lowpart (mode, operands[0]);
rtx val = gen_int_mode (HOST_WIDE_INT_1U << UINTVAL (operands[1]), mode);
emit_insn (gen_and3 (bitvalue, reg, val));
operands[1] = const0_rtx;
operands[0] = aarch64_gen_compare_reg (, bitvalue,
operands[1]);
})
(define_insn "*tb1"
[(set (pc) (if_then_else
(EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r")
(const_int 1)
(match_operand 1
"aarch64_simd_shift_imm_" "n"))
(const_int 0))
(label_ref (match_operand 2 "" ""))
(pc)))
(clobber (reg:CC CC_REGNUM))]
"!aarch64_track_speculation"
{
if (get_attr_length (insn) == 8)
{
if (get_attr_far_branch (insn) == 1)
return aarch64_gen_far_branch (operands, 2, "Ltb",
"\\t%0, %1, ");
else
{
operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
return "tst\t%0, %1\;\t%l2";
}
}
else
return "\t%0, %1, %l2";
}
[(set_attr "type" "branch")
(set (attr "length")
(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -32768))
(lt (minus (match_dup 2) (pc)) (const_int 32764)))
(const_int 4)
(const_int 8)))
(set (attr "far_branch")
(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576))
(lt (minus (match_dup 2) (pc)) (const_int 1048572)))
(const_int 0)
(const_int 1)))]
)
(define_insn "*cb1"
[(set (pc) (if_then_else (LTGE (match_operand:ALLI 0 "register_operand" "r")
(const_int 0))
(label_ref (match_operand 1 "" ""))
(pc)))
(clobber (reg:CC CC_REGNUM))]
"!aarch64_track_speculation"
{
if (get_attr_length (insn) == 8)
{
if (get_attr_far_branch (insn) == 1)
return aarch64_gen_far_branch (operands, 1, "Ltb",
"\\t%0, , ");
else
{
char buf[64];
uint64_t val = ((uint64_t) 1)
<< (GET_MODE_SIZE (mode) * BITS_PER_UNIT - 1);
sprintf (buf, "tst\t%%0, %" PRId64, val);
output_asm_insn (buf, operands);
return "\t%l1";
}
}
else
return "\t%0, , %l1";
}
[(set_attr "type" "branch")
(set (attr "length")
(if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -32768))
(lt (minus (match_dup 1) (pc)) (const_int 32764)))
(const_int 4)
(const_int 8)))
(set (attr "far_branch")
(if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -1048576))
(lt (minus (match_dup 1) (pc)) (const_int 1048572)))
(const_int 0)
(const_int 1)))]
)
;; -------------------------------------------------------------------
;; Subroutine calls and sibcalls
;; -------------------------------------------------------------------
(define_expand "call"
[(parallel
[(call (match_operand 0 "memory_operand")
(match_operand 1 "general_operand"))
(unspec:DI [(match_operand 2 "const_int_operand")] UNSPEC_CALLEE_ABI)
(clobber (reg:DI LR_REGNUM))])]
""
"
{
aarch64_expand_call (NULL_RTX, operands[0], operands[2], false);
DONE;
}"
)
(define_insn "*call_insn"
[(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "Ucr, Usf"))
(match_operand 1 "" ""))
(unspec:DI [(match_operand:DI 2 "const_int_operand")] UNSPEC_CALLEE_ABI)
(clobber (reg:DI LR_REGNUM))]
""
"@
* return aarch64_indirect_call_asm (operands[0]);
bl\\t%c0"
[(set_attr "type" "call, call")])
(define_expand "call_value"
[(parallel
[(set (match_operand 0 "")
(call (match_operand 1 "memory_operand")
(match_operand 2 "general_operand")))
(unspec:DI [(match_operand 3 "const_int_operand")] UNSPEC_CALLEE_ABI)
(clobber (reg:DI LR_REGNUM))])]
""
"
{
aarch64_expand_call (operands[0], operands[1], operands[3], false);
DONE;
}"
)
(define_insn "*call_value_insn"
[(set (match_operand 0 "" "")
(call (mem:DI (match_operand:DI 1 "aarch64_call_insn_operand" "Ucr, Usf"))
(match_operand 2 "" "")))
(unspec:DI [(match_operand:DI 3 "const_int_operand")] UNSPEC_CALLEE_ABI)
(clobber (reg:DI LR_REGNUM))]
""
"@
* return aarch64_indirect_call_asm (operands[1]);
bl\\t%c1"
[(set_attr "type" "call, call")]
)
(define_expand "sibcall"
[(parallel
[(call (match_operand 0 "memory_operand")
(match_operand 1 "general_operand"))
(unspec:DI [(match_operand 2 "const_int_operand")] UNSPEC_CALLEE_ABI)
(return)])]
""
{
aarch64_expand_call (NULL_RTX, operands[0], operands[2], true);
DONE;
}
)
(define_expand "sibcall_value"
[(parallel
[(set (match_operand 0 "")
(call (match_operand 1 "memory_operand")
(match_operand 2 "general_operand")))
(unspec:DI [(match_operand 3 "const_int_operand")] UNSPEC_CALLEE_ABI)
(return)])]
""
{
aarch64_expand_call (operands[0], operands[1], operands[3], true);
DONE;
}
)
(define_insn "*sibcall_insn"
[(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "Ucs, Usf"))
(match_operand 1 ""))
(unspec:DI [(match_operand:DI 2 "const_int_operand")] UNSPEC_CALLEE_ABI)
(return)]
"SIBLING_CALL_P (insn)"
{
if (which_alternative == 0)
{
output_asm_insn ("br\\t%0", operands);
return aarch64_sls_barrier (aarch64_harden_sls_retbr_p ());
}
return "b\\t%c0";
}
[(set_attr "type" "branch, branch")
(set_attr "sls_length" "retbr,none")]
)
(define_insn "*sibcall_value_insn"
[(set (match_operand 0 "")
(call (mem:DI
(match_operand:DI 1 "aarch64_call_insn_operand" "Ucs, Usf"))
(match_operand 2 "")))
(unspec:DI [(match_operand:DI 3 "const_int_operand")] UNSPEC_CALLEE_ABI)
(return)]
"SIBLING_CALL_P (insn)"
{
if (which_alternative == 0)
{
output_asm_insn ("br\\t%1", operands);
return aarch64_sls_barrier (aarch64_harden_sls_retbr_p ());
}
return "b\\t%c1";
}
[(set_attr "type" "branch, branch")
(set_attr "sls_length" "retbr,none")]
)
;; Call subroutine returning any type.
(define_expand "untyped_call"
[(parallel [(call (match_operand 0 "")
(const_int 0))
(match_operand 1 "")
(match_operand 2 "")])]
""
{
int i;
/* Untyped calls always use the default ABI. It's only possible to use
ABI variants if we know the type of the target function. */
emit_call_insn (gen_call (operands[0], const0_rtx, const0_rtx));
for (i = 0; i < XVECLEN (operands[2], 0); i++)
{
rtx set = XVECEXP (operands[2], 0, i);
emit_move_insn (SET_DEST (set), SET_SRC (set));
}
/* The optimizer does not know that the call sets the function value
registers we stored in the result block. We avoid problems by
claiming that all hard registers are used and clobbered at this
point. */
emit_insn (gen_blockage ());
DONE;
})
;; -------------------------------------------------------------------
;; Moves
;; -------------------------------------------------------------------
(define_expand "mov"
[(set (match_operand:SHORT 0 "nonimmediate_operand")
(match_operand:SHORT 1 "general_operand"))]
""
"
if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx)
operands[1] = force_reg (mode, operands[1]);
if (GET_CODE (operands[1]) == CONST_POLY_INT)
{
aarch64_expand_mov_immediate (operands[0], operands[1]);
DONE;
}
"
)
(define_insn "*mov_aarch64"
[(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r, w,r ,r,w, m,m,r,w,w")
(match_operand:SHORT 1 "aarch64_mov_operand" " r,M,D,Usv,m,m,rZ,w,w,rZ,w"))]
"(register_operand (operands[0], mode)
|| aarch64_reg_or_zero (operands[1], mode))"
{
switch (which_alternative)
{
case 0:
return "mov\t%w0, %w1";
case 1:
return "mov\t%w0, %1";
case 2:
return aarch64_output_scalar_simd_mov_immediate (operands[1],
mode);
case 3:
return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
case 4:
return "ldr\t%w0, %1";
case 5:
return "ldr\t%0, %1";
case 6:
return "str\t%w1, %0";
case 7:
return "str\t%1, %0";
case 8:
return TARGET_SIMD ? "umov\t%w0, %1.[0]" : "fmov\t%w0, %s1";
case 9:
return TARGET_SIMD ? "dup\t%0., %w1" : "fmov\t%s0, %w1";
case 10:
return TARGET_SIMD ? "dup\t%0, %1.[0]" : "fmov\t%s0, %s1";
default:
gcc_unreachable ();
}
}
;; The "mov_imm" type for CNT is just a placeholder.
[(set_attr "type" "mov_reg,mov_imm,neon_move,mov_imm,load_4,load_4,store_4,
store_4,neon_to_gp,neon_from_gp,neon_dup")
(set_attr "arch" "*,*,simd,sve,*,*,*,*,*,*,*")]
)
(define_expand "mov"
[(set (match_operand:GPI 0 "nonimmediate_operand")
(match_operand:GPI 1 "general_operand"))]
""
"
if (MEM_P (operands[0]) && !MEM_VOLATILE_P (operands[0])
&& CONST_INT_P (operands[1]) && mode == DImode
&& aarch64_split_dimode_const_store (operands[0], operands[1]))
DONE;
if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx)
operands[1] = force_reg (mode, operands[1]);
/* Lower moves of symbolic constants into individual instructions.
Doing this now is sometimes necessary for correctness, since some
sequences require temporary pseudo registers. Lowering now is also
often better for optimization, since more RTL passes get the
chance to optimize the individual instructions.
When called after RA, also split multi-instruction moves into
smaller pieces now, since we can't be sure that sure that there
will be a following split pass. */
if (CONST_INT_P (operands[1])
? (reload_completed
&& !aarch64_mov_imm_operand (operands[1], mode))
: CONSTANT_P (operands[1]))
{
aarch64_expand_mov_immediate (operands[0], operands[1]);
DONE;
}
"
)
(define_insn_and_split "*movsi_aarch64"
[(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m, r, r, r, w,r,w, w")
(match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Ds"))]
"(register_operand (operands[0], SImode)
|| aarch64_reg_or_zero (operands[1], SImode))"
"@
mov\\t%w0, %w1
mov\\t%w0, %w1
mov\\t%w0, %w1
mov\\t%w0, %1
#
* return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
ldr\\t%w0, %1
ldr\\t%s0, %1
str\\t%w1, %0
str\\t%s1, %0
adrp\\t%x0, %A1\;ldr\\t%w0, [%x0, %L1]
adr\\t%x0, %c1
adrp\\t%x0, %A1
fmov\\t%s0, %w1
fmov\\t%w0, %s1
fmov\\t%s0, %s1
* return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"
"CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
[(const_int 0)]
"{
aarch64_expand_mov_immediate (operands[0], operands[1]);
DONE;
}"
;; The "mov_imm" type for CNT is just a placeholder.
[(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4,
load_4,store_4,store_4,load_4,adr,adr,f_mcr,f_mrc,fmov,neon_move")
(set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
(set_attr "length" "4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")
]
)
(define_insn_and_split "*movdi_aarch64"
[(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m,m, r, r, r, w,r,w, w")
(match_operand:DI 1 "aarch64_mov_operand" " r,r,k,O,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
"(register_operand (operands[0], DImode)
|| aarch64_reg_or_zero (operands[1], DImode))"
"@
mov\\t%x0, %x1
mov\\t%0, %x1
mov\\t%x0, %1
* return aarch64_is_mov_xn_imm (INTVAL (operands[1])) ? \"mov\\t%x0, %1\" : \"mov\\t%w0, %1\";
#
* return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
ldr\\t%x0, %1
ldr\\t%d0, %1
str\\t%x1, %0
str\\t%d1, %0
* return TARGET_ILP32 ? \"adrp\\t%0, %A1\;ldr\\t%w0, [%0, %L1]\" : \"adrp\\t%0, %A1\;ldr\\t%0, [%0, %L1]\";
adr\\t%x0, %c1
adrp\\t%x0, %A1
fmov\\t%d0, %x1
fmov\\t%x0, %d1
fmov\\t%d0, %d1
* return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);"
"CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode)
&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
[(const_int 0)]
"{
aarch64_expand_mov_immediate (operands[0], operands[1]);
DONE;
}"
;; The "mov_imm" type for CNTD is just a placeholder.
[(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,
load_8,load_8,store_8,store_8,load_8,adr,adr,f_mcr,f_mrc,
fmov,neon_move")
(set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
(set_attr "length" "4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")]
)
(define_insn "insv_imm"
[(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
(const_int 16)
(match_operand:GPI 1 "const_int_operand" "n"))
(match_operand:GPI 2 "const_int_operand" "n"))]
"UINTVAL (operands[1]) < GET_MODE_BITSIZE (mode)
&& UINTVAL (operands[1]) % 16 == 0"
"movk\\t%0, %X2, lsl %1"
[(set_attr "type" "mov_imm")]
)
;; Match MOVK as a normal AND and IOR operation.
(define_insn "aarch64_movk"
[(set (match_operand:GPI 0 "register_operand" "=r")
(ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0")
(match_operand:GPI 2 "const_int_operand"))
(match_operand:GPI 3 "const_int_operand")))]
"aarch64_movk_shift (rtx_mode_t (operands[2], mode),
rtx_mode_t (operands[3], mode)) >= 0"
{
int shift = aarch64_movk_shift (rtx_mode_t (operands[2], mode),
rtx_mode_t (operands[3], mode));
operands[2] = gen_int_mode (UINTVAL (operands[3]) >> shift, SImode);
operands[3] = gen_int_mode (shift, SImode);
return "movk\\t%0, #%X2, lsl %3";
}
[(set_attr "type" "mov_imm")]
)
(define_expand "movti"
[(set (match_operand:TI 0 "nonimmediate_operand")
(match_operand:TI 1 "general_operand"))]
""
"
if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx)
operands[1] = force_reg (TImode, operands[1]);
if (GET_CODE (operands[1]) == CONST_POLY_INT)
{
emit_move_insn (gen_lowpart (DImode, operands[0]),
gen_lowpart (DImode, operands[1]));
emit_move_insn (gen_highpart (DImode, operands[0]), const0_rtx);
DONE;
}
"
)
(define_insn "*movti_aarch64"
[(set (match_operand:TI 0
"nonimmediate_operand" "= r,w,w,w, r,w,r,m,m,w,m")
(match_operand:TI 1
"aarch64_movti_operand" " rUti,Z,Z,r, w,w,m,r,Z,m,w"))]
"(register_operand (operands[0], TImode)
|| aarch64_reg_or_zero (operands[1], TImode))"
"@
#
movi\\t%0.2d, #0
fmov\t%d0, xzr
#
#
mov\\t%0.16b, %1.16b
ldp\\t%0, %H0, %1
stp\\t%1, %H1, %0
stp\\txzr, xzr, %0
ldr\\t%q0, %1
str\\t%q1, %0"
[(set_attr "type" "multiple,neon_move,f_mcr,f_mcr,f_mrc,neon_logic_q, \
load_16,store_16,store_16,\
load_16,store_16")
(set_attr "length" "8,4,4,8,8,4,4,4,4,4,4")
(set_attr "arch" "*,simd,*,*,*,simd,*,*,*,fp,fp")]
)
;; Split a TImode register-register or register-immediate move into
;; its component DImode pieces, taking care to handle overlapping
;; source and dest registers.
(define_split
[(set (match_operand:TI 0 "register_operand" "")
(match_operand:TI 1 "aarch64_reg_or_imm" ""))]
"reload_completed && aarch64_split_128bit_move_p (operands[0], operands[1])"
[(const_int 0)]
{
aarch64_split_128bit_move (operands[0], operands[1]);
DONE;
})
(define_expand "mov"
[(set (match_operand:GPF_TF_F16_MOV 0 "nonimmediate_operand")
(match_operand:GPF_TF_F16_MOV 1 "general_operand"))]
""
{
if (!TARGET_FLOAT)
{
aarch64_err_no_fpadvsimd (mode);
machine_mode intmode
= int_mode_for_size (GET_MODE_BITSIZE (mode), 0).require ();
emit_move_insn (gen_lowpart (intmode, operands[0]),
gen_lowpart (intmode, operands[1]));
DONE;
}
if (GET_CODE (operands[0]) == MEM
&& ! (GET_CODE (operands[1]) == CONST_DOUBLE
&& aarch64_float_const_zero_rtx_p (operands[1])))
operands[1] = force_reg (mode, operands[1]);
}
)
(define_insn "*mov_aarch64"
[(set (match_operand:HFBF 0 "nonimmediate_operand" "=w,w ,w ,w ,?r,?r,w,w,w ,w ,w,m,r,m ,r")
(match_operand:HFBF 1 "general_operand" "Y ,?rY,?r,?rY, w, w,w,w,Ufc,Uvi,m,w,m,rY,r"))]
"TARGET_FLOAT && (register_operand (operands[0], mode)
|| aarch64_reg_or_fp_zero (operands[1], mode))"
"@
movi\\t%0.4h, #0
fmov\\t%h0, %w1
dup\\t%w0.4h, %w1
fmov\\t%s0, %w1
umov\\t%w0, %1.h[0]
fmov\\t%w0, %s1
mov\\t%0.h[0], %1.h[0]
fmov\\t%s0, %s1
fmov\\t%h0, %1
* return aarch64_output_scalar_simd_mov_immediate (operands[1], HImode);
ldr\\t%h0, %1
str\\t%h1, %0
ldrh\\t%w0, %1
strh\\t%w1, %0
mov\\t%w0, %w1"
[(set_attr "type" "neon_move,f_mcr,neon_move,f_mcr,neon_to_gp,f_mrc,
neon_move,fmov,fconsts,neon_move,f_loads,f_stores,
load_4,store_4,mov_reg")
(set_attr "arch" "simd,fp16,simd,*,simd,*,simd,*,fp16,simd,*,*,*,*,*")]
)
(define_insn "*mov_aarch64"
[(set (match_operand:SFD 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r,r")
(match_operand:SFD 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M"))]
"TARGET_FLOAT && (register_operand (operands[0], mode)
|| aarch64_reg_or_fp_zero (operands[1], mode))"
"@
movi\\t%0.2s, #0
fmov\\t%s0, %w1
fmov\\t%w0, %s1
fmov\\t%s0, %s1
fmov\\t%s0, %1
* return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
ldr\\t%s0, %1
str\\t%s1, %0
ldr\\t%w0, %1
str\\t%w1, %0
mov\\t%w0, %w1
mov\\t%w0, %1"
[(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,neon_move,\
f_loads,f_stores,load_4,store_4,mov_reg,\
fconsts")
(set_attr "arch" "simd,*,*,*,*,simd,*,*,*,*,*,*")]
)
(define_insn "*mov_aarch64"
[(set (match_operand:DFD 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r")
(match_operand:DFD 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,O"))]
"TARGET_FLOAT && (register_operand (operands[0], mode)
|| aarch64_reg_or_fp_zero (operands[1], mode))"
"@
movi\\t%d0, #0
fmov\\t%d0, %x1
fmov\\t%x0, %d1
fmov\\t%d0, %d1
fmov\\t%d0, %1
* return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);
ldr\\t%d0, %1
str\\t%d1, %0
ldr\\t%x0, %1
str\\t%x1, %0
mov\\t%x0, %x1
* return aarch64_is_mov_xn_imm (INTVAL (operands[1])) ? \"mov\\t%x0, %1\" : \"mov\\t%w0, %1\";"
[(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\
f_loadd,f_stored,load_8,store_8,mov_reg,\
fconstd")
(set_attr "arch" "simd,*,*,*,*,simd,*,*,*,*,*,*")]
)
(define_split
[(set (match_operand:GPF_HF 0 "nonimmediate_operand")
(match_operand:GPF_HF 1 "const_double_operand"))]
"can_create_pseudo_p ()
&& !aarch64_can_const_movi_rtx_p (operands[1], mode)
&& !aarch64_float_const_representable_p (operands[1])
&& !aarch64_float_const_zero_rtx_p (operands[1])
&& aarch64_float_const_rtx_p (operands[1])"
[(const_int 0)]
{
unsigned HOST_WIDE_INT ival;
if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
FAIL;
rtx tmp = gen_reg_rtx (mode);
emit_move_insn (tmp, gen_int_mode (ival, mode));
emit_move_insn (operands[0], gen_lowpart (mode, tmp));
DONE;
}
)
(define_insn "*mov_aarch64"
[(set (match_operand:TFD 0
"nonimmediate_operand" "=w,?r ,w ,?r,w,?w,w,m,?r,m ,m")
(match_operand:TFD 1
"general_operand" " w,?rY,?r,w ,Y,Y ,m,w,m ,?r,Y"))]
"TARGET_FLOAT && (register_operand (operands[0], mode)
|| aarch64_reg_or_fp_zero (operands[1], mode))"
"@
mov\\t%0.16b, %1.16b
#
#
#
movi\\t%0.2d, #0
fmov\\t%s0, wzr
ldr\\t%q0, %1
str\\t%q1, %0
ldp\\t%0, %H0, %1
stp\\t%1, %H1, %0
stp\\txzr, xzr, %0"
[(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\
f_loadd,f_stored,load_16,store_16,store_16")
(set_attr "length" "4,8,8,8,4,4,4,4,4,4,4")
(set_attr "arch" "simd,*,*,*,simd,*,*,*,*,*,*")]
)
(define_split
[(set (match_operand:TFD 0 "register_operand" "")
(match_operand:TFD 1 "nonmemory_operand" ""))]
"reload_completed && aarch64_split_128bit_move_p (operands[0], operands[1])"
[(const_int 0)]
{
aarch64_split_128bit_move (operands[0], operands[1]);
DONE;
}
)
(define_expand "aarch64_cpymemdi"
[(parallel
[(set (match_operand 2) (const_int 0))
(clobber (match_dup 3))
(clobber (match_dup 4))
(clobber (reg:CC CC_REGNUM))
(set (match_operand 0)
(unspec:BLK [(match_operand 1) (match_dup 2)] UNSPEC_CPYMEM))])]
"TARGET_MOPS"
{
operands[3] = XEXP (operands[0], 0);
operands[4] = XEXP (operands[1], 0);
}
)
(define_insn "*aarch64_cpymemdi"
[(set (match_operand:DI 2 "register_operand" "+&r") (const_int 0))
(clobber (match_operand:DI 0 "register_operand" "+&r"))
(clobber (match_operand:DI 1 "register_operand" "+&r"))
(clobber (reg:CC CC_REGNUM))
(set (mem:BLK (match_dup 0))
(unspec:BLK [(mem:BLK (match_dup 1)) (match_dup 2)] UNSPEC_CPYMEM))]
"TARGET_MOPS"
"cpyfp\t[%x0]!, [%x1]!, %x2!\;cpyfm\t[%x0]!, [%x1]!, %x2!\;cpyfe\t[%x0]!, [%x1]!, %x2!"
[(set_attr "length" "12")]
)
;; 0 is dst
;; 1 is src
;; 2 is size of copy in bytes
;; 3 is alignment
(define_expand "cpymemdi"
[(match_operand:BLK 0 "memory_operand")
(match_operand:BLK 1 "memory_operand")
(match_operand:DI 2 "general_operand")
(match_operand:DI 3 "immediate_operand")]
"!STRICT_ALIGNMENT || TARGET_MOPS"
{
if (aarch64_expand_cpymem (operands))
DONE;
FAIL;
}
)
(define_insn "aarch64_movmemdi"
[(parallel [
(set (match_operand:DI 2 "register_operand" "+&r") (const_int 0))
(clobber (match_operand:DI 0 "register_operand" "+&r"))
(clobber (match_operand:DI 1 "register_operand" "+&r"))
(clobber (reg:CC CC_REGNUM))
(set (mem:BLK (match_dup 0))
(unspec:BLK [(mem:BLK (match_dup 1)) (match_dup 2)] UNSPEC_MOVMEM))])]
"TARGET_MOPS"
"cpyp\t[%x0]!, [%x1]!, %x2!\;cpym\t[%x0]!, [%x1]!, %x2!\;cpye\t[%x0]!, [%x1]!, %x2!"
[(set_attr "length" "12")]
)
;; 0 is dst
;; 1 is src
;; 2 is size of copy in bytes
;; 3 is alignment
(define_expand "movmemdi"
[(match_operand:BLK 0 "memory_operand")
(match_operand:BLK 1 "memory_operand")
(match_operand:DI 2 "general_operand")
(match_operand:DI 3 "immediate_operand")]
"TARGET_MOPS"
{
rtx sz_reg = operands[2];
/* For constant-sized memmoves check the threshold.
FIXME: We should add a non-MOPS memmove expansion for smaller,
constant-sized memmove to avoid going to a libcall. */
if (CONST_INT_P (sz_reg)
&& INTVAL (sz_reg) < aarch64_mops_memmove_size_threshold)
FAIL;
rtx addr_dst = XEXP (operands[0], 0);
rtx addr_src = XEXP (operands[1], 0);
if (!REG_P (sz_reg))
sz_reg = force_reg (DImode, sz_reg);
if (!REG_P (addr_dst))
addr_dst = force_reg (DImode, addr_dst);
if (!REG_P (addr_src))
addr_src = force_reg (DImode, addr_src);
emit_insn (gen_aarch64_movmemdi (addr_dst, addr_src, sz_reg));
DONE;
}
)
(define_expand "aarch64_setmemdi"
[(parallel
[(set (match_operand 2) (const_int 0))
(clobber (match_dup 3))
(clobber (reg:CC CC_REGNUM))
(set (match_operand 0)
(unspec:BLK [(match_operand 1)
(match_dup 2)] UNSPEC_SETMEM))])]
"TARGET_MOPS"
{
operands[3] = XEXP (operands[0], 0);
}
)
(define_insn "*aarch64_setmemdi"
[(set (match_operand:DI 2 "register_operand" "+&r") (const_int 0))
(clobber (match_operand:DI 0 "register_operand" "+&r"))
(clobber (reg:CC CC_REGNUM))
(set (mem:BLK (match_dup 0))
(unspec:BLK [(match_operand:QI 1 "aarch64_reg_or_zero" "rZ")
(match_dup 2)] UNSPEC_SETMEM))]
"TARGET_MOPS"
"setp\t[%x0]!, %x2!, %x1\;setm\t[%x0]!, %x2!, %x1\;sete\t[%x0]!, %x2!, %x1"
[(set_attr "length" "12")]
)
;; 0 is dst
;; 1 is val
;; 2 is size of copy in bytes
;; 3 is alignment
(define_expand "setmemdi"
[(set (match_operand:BLK 0 "memory_operand") ;; Dest
(match_operand:QI 2 "nonmemory_operand")) ;; Value
(use (match_operand:DI 1 "general_operand")) ;; Length
(match_operand 3 "immediate_operand")] ;; Align
"TARGET_SIMD || TARGET_MOPS"
{
if (aarch64_expand_setmem (operands))
DONE;
FAIL;
})
;; Operands 1 and 3 are tied together by the final condition; so we allow
;; fairly lax checking on the second memory operation.
(define_insn "load_pair_sw_"
[(set (match_operand:SX 0 "register_operand" "=r,w")
(match_operand:SX 1 "aarch64_mem_pair_operand" "Ump,Ump"))
(set (match_operand:SX2 2 "register_operand" "=r,w")
(match_operand:SX2 3 "memory_operand" "m,m"))]
"rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
XEXP (operands[1], 0),
GET_MODE_SIZE (mode)))"
"@
ldp\\t%w0, %w2, %z1
ldp\\t%s0, %s2, %z1"
[(set_attr "type" "load_8,neon_load1_2reg")
(set_attr "arch" "*,fp")]
)
;; Storing different modes that can still be merged
(define_insn "load_pair_dw_"
[(set (match_operand:DX 0 "register_operand" "=r,w")
(match_operand:DX 1 "aarch64_mem_pair_operand" "Ump,Ump"))
(set (match_operand:DX2 2 "register_operand" "=r,w")
(match_operand:DX2 3 "memory_operand" "m,m"))]
"rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
XEXP (operands[1], 0),
GET_MODE_SIZE (mode)))"
"@
ldp\\t%x0, %x2, %z1
ldp\\t%d0, %d2, %z1"
[(set_attr "type" "load_16,neon_load1_2reg")
(set_attr "arch" "*,fp")]
)
(define_insn "load_pair_dw_tftf"
[(set (match_operand:TF 0 "register_operand" "=w")
(match_operand:TF 1 "aarch64_mem_pair_operand" "Ump"))
(set (match_operand:TF 2 "register_operand" "=w")
(match_operand:TF 3 "memory_operand" "m"))]
"TARGET_SIMD
&& rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
XEXP (operands[1], 0),
GET_MODE_SIZE (TFmode)))"
"ldp\\t%q0, %q2, %z1"
[(set_attr "type" "neon_ldp_q")
(set_attr "fp" "yes")]
)
;; Operands 0 and 2 are tied together by the final condition; so we allow
;; fairly lax checking on the second memory operation.
(define_insn "store_pair_sw_"
[(set (match_operand:SX 0 "aarch64_mem_pair_operand" "=Ump,Ump")
(match_operand:SX 1 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))
(set (match_operand:SX2 2 "memory_operand" "=m,m")
(match_operand:SX2 3 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))]
"rtx_equal_p (XEXP (operands[2], 0),
plus_constant (Pmode,
XEXP (operands[0], 0),
GET_MODE_SIZE (mode)))"
"@
stp\\t%w1, %w3, %z0
stp\\t%s1, %s3, %z0"
[(set_attr "type" "store_8,neon_store1_2reg")
(set_attr "arch" "*,fp")]
)
;; Storing different modes that can still be merged
(define_insn "store_pair_dw_"
[(set (match_operand:DX 0 "aarch64_mem_pair_operand" "=Ump,Ump")
(match_operand:DX 1 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))
(set (match_operand:DX2 2 "memory_operand" "=m,m")
(match_operand:DX2 3 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))]
"rtx_equal_p (XEXP (operands[2], 0),
plus_constant (Pmode,
XEXP (operands[0], 0),
GET_MODE_SIZE (mode)))"
"@
stp\\t%x1, %x3, %z0
stp\\t%d1, %d3, %z0"
[(set_attr "type" "store_16,neon_store1_2reg")
(set_attr "arch" "*,fp")]
)
(define_insn "store_pair_dw_tftf"
[(set (match_operand:TF 0 "aarch64_mem_pair_operand" "=Ump")
(match_operand:TF 1 "register_operand" "w"))
(set (match_operand:TF 2 "memory_operand" "=m")
(match_operand:TF 3 "register_operand" "w"))]
"TARGET_SIMD &&
rtx_equal_p (XEXP (operands[2], 0),
plus_constant (Pmode,
XEXP (operands[0], 0),
GET_MODE_SIZE (TFmode)))"
"stp\\t%q1, %q3, %z0"
[(set_attr "type" "neon_stp_q")
(set_attr "fp" "yes")]
)
;; Load pair with post-index writeback. This is primarily used in function
;; epilogues.
(define_insn "loadwb_pair_"
[(parallel
[(set (match_operand:P 0 "register_operand" "=k")
(plus:P (match_operand:P 1 "register_operand" "0")
(match_operand:P 4 "aarch64_mem_pair_offset" "n")))
(set (match_operand:GPI 2 "register_operand" "=r")
(mem:GPI (match_dup 1)))
(set (match_operand:GPI 3 "register_operand" "=r")
(mem:GPI (plus:P (match_dup 1)
(match_operand:P 5 "const_int_operand" "n"))))])]
"INTVAL (operands[5]) == GET_MODE_SIZE (mode)"
"ldp\\t%2, %3, [%1], %4"
[(set_attr "type" "load_")]
)
(define_insn "loadwb_pair_"
[(parallel
[(set (match_operand:P 0 "register_operand" "=k")
(plus:P (match_operand:P 1 "register_operand" "0")
(match_operand:P 4 "aarch64_mem_pair_offset" "n")))
(set (match_operand:GPF 2 "register_operand" "=w")
(mem:GPF (match_dup 1)))
(set (match_operand:GPF 3 "register_operand" "=w")
(mem:GPF (plus:P (match_dup 1)
(match_operand:P 5 "const_int_operand" "n"))))])]
"INTVAL (operands[5]) == GET_MODE_SIZE (mode)"
"ldp\\t%2, %3, [%1], %4"
[(set_attr "type" "neon_load1_2reg")]
)
(define_insn "loadwb_pair_"
[(parallel
[(set (match_operand:P 0 "register_operand" "=k")
(plus:P (match_operand:P 1 "register_operand" "0")
(match_operand:P 4 "aarch64_mem_pair_offset" "n")))
(set (match_operand:TX 2 "register_operand" "=w")
(mem:TX (match_dup 1)))
(set (match_operand:TX 3 "register_operand" "=w")
(mem:TX (plus:P (match_dup 1)
(match_operand:P 5 "const_int_operand" "n"))))])]
"TARGET_SIMD && INTVAL (operands[5]) == GET_MODE_SIZE (mode)"
"ldp\\t%q2, %q3, [%1], %4"
[(set_attr "type" "neon_ldp_q")]
)
;; Store pair with pre-index writeback. This is primarily used in function
;; prologues.
(define_insn "storewb_pair_"
[(parallel
[(set (match_operand:P 0 "register_operand" "=&k")
(plus:P (match_operand:P 1 "register_operand" "0")
(match_operand:P 4 "aarch64_mem_pair_offset" "n")))
(set (mem:GPI (plus:P (match_dup 0)
(match_dup 4)))
(match_operand:GPI 2 "register_operand" "r"))
(set (mem:GPI (plus:P (match_dup 0)
(match_operand:P 5 "const_int_operand" "n")))
(match_operand:GPI 3 "register_operand" "r"))])]
"INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (mode)"
"stp\\t%2, %3, [%0, %4]!"
[(set_attr "type" "store_")]
)
(define_insn "storewb_pair_"
[(parallel
[(set (match_operand:P 0 "register_operand" "=&k")
(plus:P (match_operand:P 1 "register_operand" "0")
(match_operand:P 4 "aarch64_mem_pair_offset" "n")))
(set (mem:GPF (plus:P (match_dup 0)
(match_dup 4)))
(match_operand:GPF 2 "register_operand" "w"))
(set (mem:GPF (plus:P (match_dup 0)
(match_operand:P 5 "const_int_operand" "n")))
(match_operand:GPF 3 "register_operand" "w"))])]
"INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (mode)"
"stp\\t%2, %3, [%0, %4]!"
[(set_attr "type" "neon_store1_2reg")]
)
(define_insn "storewb_pair_"
[(parallel
[(set (match_operand:P 0 "register_operand" "=&k")
(plus:P (match_operand:P 1 "register_operand" "0")
(match_operand:P 4 "aarch64_mem_pair_offset" "n")))
(set (mem:TX (plus:P (match_dup 0)
(match_dup 4)))
(match_operand:TX 2 "register_operand" "w"))
(set (mem:TX (plus:P (match_dup 0)
(match_operand:P 5 "const_int_operand" "n")))
(match_operand:TX 3 "register_operand" "w"))])]
"TARGET_SIMD
&& INTVAL (operands[5])
== INTVAL (operands[4]) + GET_MODE_SIZE (mode)"
"stp\\t%q2, %q3, [%0, %4]!"
[(set_attr "type" "neon_stp_q")]
)
;; -------------------------------------------------------------------
;; Sign/Zero extension
;; -------------------------------------------------------------------
(define_expand "sidi2"
[(set (match_operand:DI 0 "register_operand")
(ANY_EXTEND:DI (match_operand:SI 1 "nonimmediate_operand")))]
""
)
(define_insn "*extendsidi2_aarch64"
[(set (match_operand:DI 0 "register_operand" "=r,r")
(sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m")))]
""
"@
sxtw\t%0, %w1
ldrsw\t%0, %1"
[(set_attr "type" "extend,load_4")]
)
(define_insn "*load_pair_extendsidi2_aarch64"
[(set (match_operand:DI 0 "register_operand" "=r")
(sign_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" "Ump")))
(set (match_operand:DI 2 "register_operand" "=r")
(sign_extend:DI (match_operand:SI 3 "memory_operand" "m")))]
"rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
XEXP (operands[1], 0),
GET_MODE_SIZE (SImode)))"
"ldpsw\\t%0, %2, %z1"
[(set_attr "type" "load_8")]
)
(define_insn "*zero_extendsidi2_aarch64"
[(set (match_operand:DI 0 "register_operand" "=r,r,w,w,r,w")
(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m,r,m,w,w")))]
""
"@
uxtw\t%0, %w1
ldr\t%w0, %1
fmov\t%s0, %w1
ldr\t%s0, %1
fmov\t%w0, %s1
fmov\t%s0, %s1"
[(set_attr "type" "mov_reg,load_4,f_mcr,f_loads,f_mrc,fmov")
(set_attr "arch" "*,*,fp,fp,fp,fp")]
)
(define_insn "*load_pair_zero_extendsidi2_aarch64"
[(set (match_operand:DI 0 "register_operand" "=r,w")
(zero_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" "Ump,Ump")))
(set (match_operand:DI 2 "register_operand" "=r,w")
(zero_extend:DI (match_operand:SI 3 "memory_operand" "m,m")))]
"rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
XEXP (operands[1], 0),
GET_MODE_SIZE (SImode)))"
"@
ldp\t%w0, %w2, %z1
ldp\t%s0, %s2, %z1"
[(set_attr "type" "load_8,neon_load1_2reg")
(set_attr "arch" "*,fp")]
)
(define_expand "2"
[(set (match_operand:GPI 0 "register_operand")
(ANY_EXTEND:GPI (match_operand:SHORT 1 "nonimmediate_operand")))]
""
)
(define_insn "*extend2_aarch64"
[(set (match_operand:GPI 0 "register_operand" "=r,r,r")
(sign_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" "r,m,w")))]
""
"@
sxt\t%0, %w1
ldrs\t%0, %1
smov\t%0, %1.[0]"
[(set_attr "type" "extend,load_4,neon_to_gp")
(set_attr "arch" "*,*,fp")]
)
(define_insn "*zero_extend2_aarch64"
[(set (match_operand:GPI 0 "register_operand" "=r,r,w,r")
(zero_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" "r,m,m,w")))]
""
"@
and\t%0, %1,
ldr\t%w0, %1
ldr\t%0, %1
umov\t%w0, %1.[0]"
[(set_attr "type" "logic_imm,load_4,f_loads,neon_to_gp")
(set_attr "arch" "*,*,fp,fp")]
)
(define_expand "qihi2"
[(set (match_operand:HI 0 "register_operand")
(ANY_EXTEND:HI (match_operand:QI 1 "nonimmediate_operand")))]
""
)
(define_insn "*extendqihi2_aarch64"
[(set (match_operand:HI 0 "register_operand" "=r,r")
(sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
""
"@
sxtb\t%w0, %w1
ldrsb\t%w0, %1"
[(set_attr "type" "extend,load_4")]
)
(define_insn "*zero_extendqihi2_aarch64"
[(set (match_operand:HI 0 "register_operand" "=r,r")
(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
""
"@
and\t%w0, %w1, 255
ldrb\t%w0, %1"
[(set_attr "type" "logic_imm,load_4")]
)
;; -------------------------------------------------------------------
;; Simple arithmetic
;; -------------------------------------------------------------------
(define_expand "add3"
[(set
(match_operand:GPI 0 "register_operand")
(plus:GPI (match_operand:GPI 1 "register_operand")
(match_operand:GPI 2 "aarch64_pluslong_or_poly_operand")))]
""
{
/* If operands[1] is a subreg extract the inner RTX. */
rtx op1 = REG_P (operands[1]) ? operands[1] : SUBREG_REG (operands[1]);
/* If the constant is too large for a single instruction and isn't frame
based, split off the immediate so it is available for CSE. */
if (!aarch64_plus_immediate (operands[2], mode)
&& !(TARGET_SVE && aarch64_sve_plus_immediate (operands[2], mode))
&& can_create_pseudo_p ()
&& (!REG_P (op1)
|| !REGNO_PTR_FRAME_P (REGNO (op1))))
operands[2] = force_reg (mode, operands[2]);
/* Some tunings prefer to avoid VL-based operations.
Split off the poly immediate here. The rtx costs hook will reject attempts
to combine them back. */
else if (GET_CODE (operands[2]) == CONST_POLY_INT
&& can_create_pseudo_p ()
&& (aarch64_tune_params.extra_tuning_flags
& AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS))
operands[2] = force_reg (mode, operands[2]);
/* Expand polynomial additions now if the destination is the stack
pointer, since we don't want to use that as a temporary. */
else if (operands[0] == stack_pointer_rtx
&& aarch64_split_add_offset_immediate (operands[2], mode))
{
aarch64_split_add_offset (mode, operands[0], operands[1],
operands[2], NULL_RTX, NULL_RTX);
DONE;
}
})
(define_insn "*add3_aarch64"
[(set
(match_operand:GPI 0 "register_operand" "=rk,rk,w,rk,r,r,rk")
(plus:GPI
(match_operand:GPI 1 "register_operand" "%rk,rk,w,rk,rk,0,rk")
(match_operand:GPI 2 "aarch64_pluslong_operand" "I,r,w,J,Uaa,Uai,Uav")))]
""
"@
add\\t%0, %1, %2
add\\t%0, %1, %2
add\\t%0, %1, %2
sub\\t%0, %1, #%n2
#
* return aarch64_output_sve_scalar_inc_dec (operands[2]);
* return aarch64_output_sve_addvl_addpl (operands[2]);"
;; The "alu_imm" types for INC/DEC and ADDVL/ADDPL are just placeholders.
[(set_attr "type" "alu_imm,alu_sreg,neon_add,alu_imm,multiple,alu_imm,alu_imm")
(set_attr "arch" "*,*,simd,*,*,sve,sve")]
)
;; zero_extend version of above
(define_insn "*addsi3_aarch64_uxtw"
[(set
(match_operand:DI 0 "register_operand" "=rk,rk,rk,r")
(zero_extend:DI
(plus:SI (match_operand:SI 1 "register_operand" "%rk,rk,rk,rk")
(match_operand:SI 2 "aarch64_pluslong_operand" "I,r,J,Uaa"))))]
""
"@
add\\t%w0, %w1, %2
add\\t%w0, %w1, %w2
sub\\t%w0, %w1, #%n2
#"
[(set_attr "type" "alu_imm,alu_sreg,alu_imm,multiple")]
)
;; If there's a free register, and we can load the constant with a
;; single instruction, do so. This has a chance to improve scheduling.
(define_peephole2
[(match_scratch:GPI 3 "r")
(set (match_operand:GPI 0 "register_operand")
(plus:GPI
(match_operand:GPI 1 "register_operand")
(match_operand:GPI 2 "aarch64_pluslong_strict_immedate")))]
"aarch64_move_imm (INTVAL (operands[2]), mode)"
[(set (match_dup 3) (match_dup 2))
(set (match_dup 0) (plus:GPI (match_dup 1) (match_dup 3)))]
)
(define_peephole2
[(match_scratch:SI 3 "r")
(set (match_operand:DI 0 "register_operand")
(zero_extend:DI
(plus:SI
(match_operand:SI 1 "register_operand")
(match_operand:SI 2 "aarch64_pluslong_strict_immedate"))))]
"aarch64_move_imm (INTVAL (operands[2]), SImode)"
[(set (match_dup 3) (match_dup 2))
(set (match_dup 0) (zero_extend:DI (plus:SI (match_dup 1) (match_dup 3))))]
)
;; After peephole2 has had a chance to run, split any remaining long
;; additions into two add immediates.
(define_split
[(set (match_operand:GPI 0 "register_operand")
(plus:GPI
(match_operand:GPI 1 "register_operand")
(match_operand:GPI 2 "aarch64_pluslong_strict_immedate")))]
"epilogue_completed"
[(set (match_dup 0) (plus:GPI (match_dup 1) (match_dup 3)))
(set (match_dup 0) (plus:GPI (match_dup 0) (match_dup 4)))]
{
HOST_WIDE_INT i = INTVAL (operands[2]);
HOST_WIDE_INT s = (i >= 0 ? i & 0xfff : -(-i & 0xfff));
operands[3] = GEN_INT (i - s);
operands[4] = GEN_INT (s);
}
)
;; Match addition of polynomial offsets that require one temporary, for which
;; we can use the early-clobbered destination register. This is a separate
;; pattern so that the early clobber doesn't affect register allocation
;; for other forms of addition. However, we still need to provide an
;; all-register alternative, in case the offset goes out of range after
;; elimination. For completeness we might as well provide all GPR-based
;; alternatives from the main pattern.
;;
;; We don't have a pattern for additions requiring two temporaries since at
;; present LRA doesn't allow new scratches to be added during elimination.
;; Such offsets should be rare anyway.
;;
;; ??? But if we added LRA support for new scratches, much of the ugliness
;; here would go away. We could just handle all polynomial constants in
;; this pattern.
(define_insn_and_split "*add3_poly_1"
[(set
(match_operand:GPI 0 "register_operand" "=r,r,r,r,r,r,&r")
(plus:GPI
(match_operand:GPI 1 "register_operand" "%rk,rk,rk,rk,0,rk,rk")
(match_operand:GPI 2 "aarch64_pluslong_or_poly_operand" "I,r,J,Uaa,Uai,Uav,Uat")))]
"TARGET_SVE && operands[0] != stack_pointer_rtx"
"@
add\\t%0, %1, %2
add\\t%0, %1, %2
sub\\t%0, %1, #%n2
#
* return aarch64_output_sve_scalar_inc_dec (operands[2]);
* return aarch64_output_sve_addvl_addpl (operands[2]);
#"
"&& epilogue_completed
&& !reg_overlap_mentioned_p (operands[0], operands[1])
&& aarch64_split_add_offset_immediate (operands[2], mode)"
[(const_int 0)]
{
aarch64_split_add_offset (mode, operands[0], operands[1],
operands[2], operands[0], NULL_RTX);
DONE;
}
;; The "alu_imm" types for INC/DEC and ADDVL/ADDPL are just placeholders.
[(set_attr "type" "alu_imm,alu_sreg,alu_imm,multiple,alu_imm,alu_imm,multiple")]
)
(define_split
[(set (match_operand:DI 0 "register_operand")
(zero_extend:DI
(plus:SI
(match_operand:SI 1 "register_operand")
(match_operand:SI 2 "aarch64_pluslong_strict_immedate"))))]
"epilogue_completed"
[(set (match_dup 5) (plus:SI (match_dup 1) (match_dup 3)))
(set (match_dup 0) (zero_extend:DI (plus:SI (match_dup 5) (match_dup 4))))]
{
HOST_WIDE_INT i = INTVAL (operands[2]);
HOST_WIDE_INT s = (i >= 0 ? i & 0xfff : -(-i & 0xfff));
operands[3] = GEN_INT (i - s);
operands[4] = GEN_INT (s);
operands[5] = gen_lowpart (SImode, operands[0]);
}
)
(define_expand "addv4"
[(match_operand:GPI 0 "register_operand")
(match_operand:GPI 1 "register_operand")
(match_operand:GPI 2 "aarch64_plus_operand")
(label_ref (match_operand 3 "" ""))]
""
{
if (CONST_INT_P (operands[2]))
emit_insn (gen_add3_compareV_imm (operands[0], operands[1],
operands[2]));
else
emit_insn (gen_add3_compareV (operands[0], operands[1], operands[2]));
aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
DONE;
})
(define_expand "uaddv4"
[(match_operand:GPI 0 "register_operand")
(match_operand:GPI 1 "register_operand")
(match_operand:GPI 2 "register_operand")
(label_ref (match_operand 3 "" ""))]
""
{
emit_insn (gen_add3_compareC (operands[0], operands[1], operands[2]));
aarch64_gen_unlikely_cbranch (LTU, CC_Cmode, operands[3]);
DONE;
})
(define_expand "addti3"
[(set (match_operand:TI 0 "register_operand")
(plus:TI (match_operand:TI 1 "register_operand")
(match_operand:TI 2 "aarch64_reg_or_imm")))]
""
{
rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high;
aarch64_addti_scratch_regs (operands[1], operands[2],
&low_dest, &op1_low, &op2_low,
&high_dest, &op1_high, &op2_high);
if (op2_low == const0_rtx)
{
low_dest = op1_low;
if (!aarch64_pluslong_operand (op2_high, DImode))
op2_high = force_reg (DImode, op2_high);
emit_insn (gen_adddi3 (high_dest, op1_high, op2_high));
}
else
{
emit_insn (gen_adddi3_compareC (low_dest, op1_low,
force_reg (DImode, op2_low)));
emit_insn (gen_adddi3_carryin (high_dest, op1_high,
force_reg (DImode, op2_high)));
}
emit_move_insn (gen_lowpart (DImode, operands[0]), low_dest);
emit_move_insn (gen_highpart (DImode, operands[0]), high_dest);
DONE;
})
(define_expand "addvti4"
[(match_operand:TI 0 "register_operand")
(match_operand:TI 1 "register_operand")
(match_operand:TI 2 "aarch64_reg_or_imm")
(label_ref (match_operand 3 "" ""))]
""
{
rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high;
aarch64_addti_scratch_regs (operands[1], operands[2],
&low_dest, &op1_low, &op2_low,
&high_dest, &op1_high, &op2_high);
if (op2_low == const0_rtx)
{
low_dest = op1_low;
emit_insn (gen_adddi3_compareV (high_dest, op1_high,
force_reg (DImode, op2_high)));
}
else
{
emit_insn (gen_adddi3_compareC (low_dest, op1_low,
force_reg (DImode, op2_low)));
emit_insn (gen_adddi3_carryinV (high_dest, op1_high,
force_reg (DImode, op2_high)));
}
emit_move_insn (gen_lowpart (DImode, operands[0]), low_dest);
emit_move_insn (gen_highpart (DImode, operands[0]), high_dest);
aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
DONE;
})
(define_expand "uaddvti4"
[(match_operand:TI 0 "register_operand")
(match_operand:TI 1 "register_operand")
(match_operand:TI 2 "aarch64_reg_or_imm")
(label_ref (match_operand 3 "" ""))]
""
{
rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high;
aarch64_addti_scratch_regs (operands[1], operands[2],
&low_dest, &op1_low, &op2_low,
&high_dest, &op1_high, &op2_high);
if (op2_low == const0_rtx)
{
low_dest = op1_low;
emit_insn (gen_adddi3_compareC (high_dest, op1_high,
force_reg (DImode, op2_high)));
}
else
{
emit_insn (gen_adddi3_compareC (low_dest, op1_low,
force_reg (DImode, op2_low)));
emit_insn (gen_adddi3_carryinC (high_dest, op1_high,
force_reg (DImode, op2_high)));
}
emit_move_insn (gen_lowpart (DImode, operands[0]), low_dest);
emit_move_insn (gen_highpart (DImode, operands[0]), high_dest);
aarch64_gen_unlikely_cbranch (GEU, CC_ADCmode, operands[3]);
DONE;
})
(define_insn "add3_compare0"
[(set (reg:CC_NZ CC_REGNUM)
(compare:CC_NZ
(plus:GPI (match_operand:GPI 1 "register_operand" "%rk,rk,rk")
(match_operand:GPI 2 "aarch64_plus_operand" "r,I,J"))
(const_int 0)))
(set (match_operand:GPI 0 "register_operand" "=r,r,r")
(plus:GPI (match_dup 1) (match_dup 2)))]
""
"@
adds\\t%0, %1, %2
adds\\t%0, %1, %2
subs\\t%0, %1, #%n2"
[(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
)
;; zero_extend version of above
(define_insn "*addsi3_compare0_uxtw"
[(set (reg:CC_NZ CC_REGNUM)
(compare:CC_NZ
(plus:SI (match_operand:SI 1 "register_operand" "%rk,rk,rk")
(match_operand:SI 2 "aarch64_plus_operand" "r,I,J"))
(const_int 0)))
(set (match_operand:DI 0 "register_operand" "=r,r,r")
(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
""
"@
adds\\t%w0, %w1, %w2
adds\\t%w0, %w1, %2
subs\\t%w0, %w1, #%n2"
[(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
)
(define_insn "*add3_compareC_cconly"
[(set (reg:CC_C CC_REGNUM)
(compare:CC_C
(plus:GPI
(match_operand:GPI 0 "register_operand" "r,r,r")
(match_operand:GPI 1 "aarch64_plus_operand" "r,I,J"))
(match_dup 0)))]
""
"@
cmn\\t%0, %1
cmn\\t%0, %1
cmp\\t%0, #%n1"
[(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
)
(define_insn "add3_compareC"
[(set (reg:CC_C CC_REGNUM)
(compare:CC_C
(plus:GPI
(match_operand:GPI 1 "register_operand" "rk,rk,rk")
(match_operand:GPI 2 "aarch64_plus_operand" "r,I,J"))
(match_dup 1)))
(set (match_operand:GPI 0 "register_operand" "=r,r,r")
(plus:GPI (match_dup 1) (match_dup 2)))]
""
"@
adds\\t%0, %1, %2
adds\\t%0, %1, %2
subs\\t%0, %1, #%n2"
[(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
)
(define_insn "*add3_compareV_cconly_imm"
[(set (reg:CC_V CC_REGNUM)
(compare:CC_V
(plus: