diff options
Diffstat (limited to 'gcc/config/aarch64/aarch64.md')
-rw-r--r-- | gcc/config/aarch64/aarch64.md | 805 |
1 files changed, 497 insertions, 308 deletions
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 031e621..dc2be81 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -136,6 +136,14 @@ ;; The actual value can sometimes vary, because it does not track ;; changes to PSTATE.ZA that happen during a lazy save and restore. ;; Those effects are instead tracked by ZA_SAVED_REGNUM. + ;; + ;; Sequences also write to this register if they synchronize the + ;; actual contents of ZA and PSTATE.ZA with the current function's + ;; ZA_REGNUM and SME_STATE_REGNUM. Conceptually, these extra writes + ;; do not change the value of SME_STATE_REGNUM. They simply act as + ;; sequencing points. They means that all direct accesses to ZA can + ;; depend only on ZA_REGNUM and SME_STATE_REGNUM, rather than also + ;; depending on ZA_SAVED_REGNUM etc. (SME_STATE_REGNUM 89) ;; Instructions write to this register if they set TPIDR2_EL0 to a @@ -272,6 +280,7 @@ UNSPEC_PACIBSP UNSPEC_PRLG_STK UNSPEC_REV + UNSPEC_REV_PRED UNSPEC_SADALP UNSPEC_SCVTF UNSPEC_SET_LANE @@ -474,6 +483,8 @@ ;; clobber for SVE predicates. (define_attr "pred_clobber" "any,no,yes" (const_string "any")) +(define_attr "enable_ldapur" "any,no,yes" (const_string "any")) + ;; [For compatibility with Arm in pipeline models] ;; Attribute that specifies whether or not the instruction touches fp ;; registers. @@ -498,7 +509,14 @@ (eq_attr "pred_clobber" "yes") (match_test "TARGET_SVE_PRED_CLOBBER")) (eq_attr "pred_clobber" "any")) - + (ior + (and + (eq_attr "enable_ldapur" "yes") + (match_test "TARGET_ENABLE_LDAPUR")) + (and + (eq_attr "enable_ldapur" "no") + (match_test "!TARGET_ENABLE_LDAPUR")) + (eq_attr "enable_ldapur" "any")) (ior (eq_attr "arch" "any") @@ -561,9 +579,7 @@ ;; Attribute that specifies whether we are dealing with a branch to a ;; label that is far away, i.e. further away than the maximum/minimum ;; representable in a signed 21-bits number. -;; 0 :=: no -;; 1 :=: yes -(define_attr "far_branch" "" (const_int 0)) +(define_attr "far_branch" "no,yes" (const_string "no")) ;; Attribute that specifies whether the alternative uses MOVPRFX. (define_attr "movprfx" "no,yes" (const_string "no")) @@ -674,6 +690,10 @@ "msrr\t%x0, %x1, %H1" ) +;; ------------------------------------------------------------------- +;; Unconditional jumps +;; ------------------------------------------------------------------- + (define_insn "indirect_jump" [(set (pc) (match_operand:DI 0 "register_operand" "r"))] "" @@ -692,43 +712,372 @@ [(set_attr "type" "branch")] ) -(define_expand "cbranch<mode>4" +;; Maximum PC-relative positive/negative displacements for various branching +;; instructions. +(define_constants + [ + ;; +/- 1MiB. Used by B.<cond>, CBZ, CBNZ. + (BRANCH_LEN_P_1MiB 1048572) + (BRANCH_LEN_N_1MiB -1048576) + + ;; +/- 32KiB. Used by TBZ, TBNZ. + (BRANCH_LEN_P_32KiB 32764) + (BRANCH_LEN_N_32KiB -32768) + + ;; +/- 1KiB. Used by CBB<cond>, CBH<cond>, CB<cond>. + (BRANCH_LEN_P_1Kib 1020) + (BRANCH_LEN_N_1Kib -1024) + ] +) + +;; ------------------------------------------------------------------- +;; Conditional jumps +;; ------------------------------------------------------------------- + +;; The order of the rules below is important. +;; Higher priority rules are preferred because they can express larger +;; displacements. +;; 1) EQ/NE comparisons against zero are handled by CBZ/CBNZ. +;; 2) LT/GE comparisons against zero are handled by TBZ/TBNZ. +;; 3) When the CMPBR extension is enabled: +;; a) Comparisons between two registers are handled by +;; CBB<cond>/CBH<cond>/CB<cond>. +;; b) Comparisons between a GP register and an in range immediate are +;; handled by CB<cond> (immediate). +;; 4) Otherwise, emit a CMP+B<cond> sequence. +;; ------------------------------------------------------------------- + +(define_expand "cbranch<GPI:mode>4" [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" [(match_operand:GPI 1 "register_operand") (match_operand:GPI 2 "aarch64_plus_operand")]) - (label_ref (match_operand 3 "" "")) + (label_ref (match_operand 3)) (pc)))] "" - " - operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1], - operands[2]); - operands[2] = const0_rtx; - " + { + if (TARGET_CMPBR && aarch64_cb_rhs (GET_CODE (operands[0]), operands[2])) + { + /* The branch is supported natively. */ + } + else + { + operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), + operands[1], operands[2]); + operands[2] = const0_rtx; + } + } ) -(define_expand "cbranch<mode>4" +(define_expand "cbranch<SHORT:mode>4" [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" - [(match_operand:GPF 1 "register_operand") - (match_operand:GPF 2 "aarch64_fp_compare_operand")]) - (label_ref (match_operand 3 "" "")) + [(match_operand:SHORT 1 "register_operand") + (match_operand:SHORT 2 "aarch64_reg_or_zero")]) + (label_ref (match_operand 3)) (pc)))] + "TARGET_CMPBR" "" - " - operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1], - operands[2]); - operands[2] = const0_rtx; - " ) -(define_expand "cbranchcc4" +(define_expand "cbranch<mode>4" [(set (pc) (if_then_else - (match_operator 0 "aarch64_comparison_operator" - [(match_operand 1 "cc_register") - (match_operand 2 "const0_operand")]) - (label_ref (match_operand 3 "" "")) - (pc)))] + (match_operator 0 "aarch64_comparison_operator" + [(match_operand:GPF_F16 1 "register_operand") + (match_operand:GPF_F16 2 "aarch64_fp_compare_operand")]) + (label_ref (match_operand 3)) + (pc)))] "" - "") + { + operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1], + operands[2]); + operands[2] = const0_rtx; + } +) + +(define_expand "cbranchcc4" + [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" + [(match_operand 1 "cc_register") + (match_operand 2 "const0_operand")]) + (label_ref (match_operand 3)) + (pc)))] + "" + "" +) + +;; For an EQ/NE comparison against zero, emit `CBZ`/`CBNZ` +(define_insn "aarch64_cbz<optab><mode>1" + [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r") + (const_int 0)) + (label_ref (match_operand 1)) + (pc)))] + "!aarch64_track_speculation" + { + if (get_attr_length (insn) == 8) + return aarch64_gen_far_branch (operands, 1, "Lcb", "<inv_cb>\\t%<w>0, "); + else + return "<cbz>\\t%<w>0, %l1"; + } + [(set_attr "type" "branch") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 1) (pc)) + (const_int BRANCH_LEN_N_1MiB)) + (lt (minus (match_dup 1) (pc)) + (const_int BRANCH_LEN_P_1MiB))) + (const_int 4) + (const_int 8))) + (set (attr "far_branch") + (if_then_else (and (ge (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_N_1MiB)) + (lt (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_P_1MiB))) + (const_string "no") + (const_string "yes")))] +) + +;; For an LT/GE comparison against zero, emit `TBZ`/`TBNZ` +(define_insn "*aarch64_tbz<optab><mode>1" + [(set (pc) (if_then_else (LTGE (match_operand:ALLI 0 "register_operand" "r") + (const_int 0)) + (label_ref (match_operand 1)) + (pc))) + (clobber (reg:CC CC_REGNUM))] + "!aarch64_track_speculation" + { + if (get_attr_length (insn) == 8) + { + if (get_attr_far_branch (insn) == FAR_BRANCH_YES) + return aarch64_gen_far_branch (operands, 1, "Ltb", + "<inv_tb>\\t%<w>0, <sizem1>, "); + else + { + char buf[64]; + uint64_t val = ((uint64_t) 1) + << (GET_MODE_SIZE (<MODE>mode) * BITS_PER_UNIT - 1); + sprintf (buf, "tst\t%%<w>0, %" PRId64, val); + output_asm_insn (buf, operands); + return "<bcond>\t%l1"; + } + } + else + return "<tbz>\t%<w>0, <sizem1>, %l1"; + } + [(set_attr "type" "branch") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 1) (pc)) + (const_int BRANCH_LEN_N_32KiB)) + (lt (minus (match_dup 1) (pc)) + (const_int BRANCH_LEN_P_32KiB))) + (const_int 4) + (const_int 8))) + (set (attr "far_branch") + (if_then_else (and (ge (minus (match_dup 1) (pc)) + (const_int BRANCH_LEN_N_1MiB)) + (lt (minus (match_dup 1) (pc)) + (const_int BRANCH_LEN_P_1MiB))) + (const_string "no") + (const_string "yes")))] +) + +;; Emit a `CB<cond> (register)` or `CB<cond> (immediate)` instruction. +;; The immediate range depends on the comparison code. +;; Comparisons against immediates outside this range fall back to +;; CMP + B<cond>. +(define_insn "aarch64_cb<INT_CMP:code><GPI:mode>" + [(set (pc) (if_then_else (INT_CMP + (match_operand:GPI 0 "register_operand" "r") + (match_operand:GPI 1 "nonmemory_operand" + "r<INT_CMP:cmpbr_imm_constraint>")) + (label_ref (match_operand 2)) + (pc)))] + "TARGET_CMPBR && aarch64_cb_rhs (<INT_CMP:CODE>, operands[1])" + { + return (get_attr_far_branch (insn) == FAR_BRANCH_NO) + ? "cb<INT_CMP:cmp_op>\\t%<w>0, %<w>1, %l2" + : aarch64_gen_far_branch (operands, 2, "L", + "cb<INT_CMP:inv_cmp_op>\\t%<w>0, %<w>1, "); + } + [(set_attr "type" "branch") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_N_1Kib)) + (lt (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_P_1Kib))) + (const_int 4) + (const_int 8))) + (set (attr "far_branch") + (if_then_else (and (ge (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_N_1Kib)) + (lt (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_P_1Kib))) + (const_string "no") + (const_string "yes")))] +) + +;; Emit a `CBB<cond> (register)` or `CBH<cond> (register)` instruction. +(define_insn "aarch64_cb<INT_CMP:code><SHORT:mode>" + [(set (pc) (if_then_else (INT_CMP + (match_operand:SHORT 0 "register_operand" "r") + (match_operand:SHORT 1 "aarch64_reg_or_zero" "rZ")) + (label_ref (match_operand 2)) + (pc)))] + "TARGET_CMPBR" + { + return (get_attr_far_branch (insn) == FAR_BRANCH_NO) + ? "cb<SHORT:cmpbr_suffix><INT_CMP:cmp_op>\\t%<w>0, %<w>1, %l2" + : aarch64_gen_far_branch (operands, 2, "L", + "cb<SHORT:cmpbr_suffix><INT_CMP:inv_cmp_op>\\t%<w>0, %<w>1, "); + } + [(set_attr "type" "branch") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_N_1Kib)) + (lt (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_P_1Kib))) + (const_int 4) + (const_int 8))) + (set (attr "far_branch") + (if_then_else (and (ge (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_N_1Kib)) + (lt (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_P_1Kib))) + (const_string "no") + (const_string "yes")))] +) + +;; Emit `B<cond>`, assuming that the condition is already in the CC register. +(define_insn "aarch64_bcond" + [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" + [(match_operand 1 "cc_register") + (const_int 0)]) + (label_ref (match_operand 2)) + (pc)))] + "" + { + /* GCC's traditional style has been to use "beq" instead of "b.eq", etc., + but the "." is required for SVE conditions. */ + bool use_dot_p = GET_MODE (operands[1]) == CC_NZCmode; + if (get_attr_length (insn) == 8) + return aarch64_gen_far_branch (operands, 2, "Lbcond", + use_dot_p ? "b.%M0\\t" : "b%M0\\t"); + else + return use_dot_p ? "b.%m0\\t%l2" : "b%m0\\t%l2"; + } + [(set_attr "type" "branch") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_N_1MiB)) + (lt (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_P_1MiB))) + (const_int 4) + (const_int 8))) + (set (attr "far_branch") + (if_then_else (and (ge (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_N_1MiB)) + (lt (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_P_1MiB))) + (const_string "no") + (const_string "yes")))] +) + +;; For a 24-bit immediate CST we can optimize the compare for equality +;; and branch sequence from: +;; mov x0, #imm1 +;; movk x0, #imm2, lsl 16 /* x0 contains CST. */ +;; cmp x1, x0 +;; b<ne,eq> .Label +;; into the shorter: +;; sub x0, x1, #(CST & 0xfff000) +;; subs x0, x0, #(CST & 0x000fff) +;; b<ne,eq> .Label +(define_insn_and_split "*aarch64_bcond_wide_imm<GPI:mode>" + [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r") + (match_operand:GPI 1 "aarch64_imm24" "n")) + (label_ref:P (match_operand 2)) + (pc)))] + "!aarch64_move_imm (INTVAL (operands[1]), <GPI:MODE>mode) + && !aarch64_plus_operand (operands[1], <GPI:MODE>mode) + && !reload_completed" + "#" + "&& true" + [(const_int 0)] + { + HOST_WIDE_INT lo_imm = UINTVAL (operands[1]) & 0xfff; + HOST_WIDE_INT hi_imm = UINTVAL (operands[1]) & 0xfff000; + rtx tmp = gen_reg_rtx (<GPI:MODE>mode); + emit_insn (gen_add<GPI:mode>3 (tmp, operands[0], GEN_INT (-hi_imm))); + emit_insn (gen_add<GPI:mode>3_compare0 (tmp, tmp, GEN_INT (-lo_imm))); + rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM); + rtx cmp_rtx = gen_rtx_fmt_ee (<EQL:CMP>, <GPI:MODE>mode, + cc_reg, const0_rtx); + emit_jump_insn (gen_aarch64_bcond (cmp_rtx, cc_reg, operands[2])); + DONE; + } +) + +;; ------------------------------------------------------------------- +;; Test bit and branch +;; ------------------------------------------------------------------- + +(define_expand "tbranch_<code><mode>3" + [(set (pc) (if_then_else (EQL + (match_operand:SHORT 0 "register_operand") + (match_operand 1 "const0_operand")) + (label_ref (match_operand 2 "")) + (pc)))] + "" +{ + rtx bitvalue = gen_reg_rtx (<ZEROM>mode); + rtx reg = gen_lowpart (<ZEROM>mode, operands[0]); + rtx val = gen_int_mode (HOST_WIDE_INT_1U << UINTVAL (operands[1]), + <MODE>mode); + emit_insn (gen_and<zerom>3 (bitvalue, reg, val)); + operands[1] = const0_rtx; + operands[0] = aarch64_gen_compare_reg (<CODE>, bitvalue, + operands[1]); +}) + +(define_insn "@aarch64_tbz<optab><ALLI:mode><GPI:mode>" + [(set (pc) (if_then_else (EQL + (zero_extract:GPI + (match_operand:ALLI 0 "register_operand" "r") + (const_int 1) + (match_operand 1 "aarch64_simd_shift_imm_<ALLI:mode>" "n")) + (const_int 0)) + (label_ref (match_operand 2)) + (pc))) + (clobber (reg:CC CC_REGNUM))] + "!aarch64_track_speculation" + { + if (get_attr_length (insn) == 8) + { + if (get_attr_far_branch (insn) == 1) + return aarch64_gen_far_branch (operands, 2, "Ltb", + "<inv_tb>\\t%<ALLI:w>0, %1, "); + else + { + operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1])); + return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2"; + } + } + else + return "<tbz>\t%<ALLI:w>0, %1, %l2"; + } + [(set_attr "type" "branch") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_N_32KiB)) + (lt (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_P_32KiB))) + (const_int 4) + (const_int 8))) + (set (attr "far_branch") + (if_then_else (and (ge (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_N_1MiB)) + (lt (minus (match_dup 2) (pc)) + (const_int BRANCH_LEN_P_1MiB))) + (const_string "no") + (const_string "yes")))] + +) (define_insn "@ccmp<CC_ONLY:mode><GPI:mode>" [(set (match_operand:CC_ONLY 1 "cc_register") @@ -860,71 +1209,6 @@ } ) -(define_insn "condjump" - [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" - [(match_operand 1 "cc_register" "") (const_int 0)]) - (label_ref (match_operand 2 "" "")) - (pc)))] - "" - { - /* GCC's traditional style has been to use "beq" instead of "b.eq", etc., - but the "." is required for SVE conditions. */ - bool use_dot_p = GET_MODE (operands[1]) == CC_NZCmode; - if (get_attr_length (insn) == 8) - return aarch64_gen_far_branch (operands, 2, "Lbcond", - use_dot_p ? "b.%M0\\t" : "b%M0\\t"); - else - return use_dot_p ? "b.%m0\\t%l2" : "b%m0\\t%l2"; - } - [(set_attr "type" "branch") - (set (attr "length") - (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576)) - (lt (minus (match_dup 2) (pc)) (const_int 1048572))) - (const_int 4) - (const_int 8))) - (set (attr "far_branch") - (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576)) - (lt (minus (match_dup 2) (pc)) (const_int 1048572))) - (const_int 0) - (const_int 1)))] -) - -;; For a 24-bit immediate CST we can optimize the compare for equality -;; and branch sequence from: -;; mov x0, #imm1 -;; movk x0, #imm2, lsl 16 /* x0 contains CST. */ -;; cmp x1, x0 -;; b<ne,eq> .Label -;; into the shorter: -;; sub x0, x1, #(CST & 0xfff000) -;; subs x0, x0, #(CST & 0x000fff) -;; b<ne,eq> .Label -(define_insn_and_split "*compare_condjump<GPI:mode>" - [(set (pc) (if_then_else (EQL - (match_operand:GPI 0 "register_operand" "r") - (match_operand:GPI 1 "aarch64_imm24" "n")) - (label_ref:P (match_operand 2 "" "")) - (pc)))] - "!aarch64_move_imm (INTVAL (operands[1]), <GPI:MODE>mode) - && !aarch64_plus_operand (operands[1], <GPI:MODE>mode) - && !reload_completed" - "#" - "&& true" - [(const_int 0)] - { - HOST_WIDE_INT lo_imm = UINTVAL (operands[1]) & 0xfff; - HOST_WIDE_INT hi_imm = UINTVAL (operands[1]) & 0xfff000; - rtx tmp = gen_reg_rtx (<GPI:MODE>mode); - emit_insn (gen_add<GPI:mode>3 (tmp, operands[0], GEN_INT (-hi_imm))); - emit_insn (gen_add<GPI:mode>3_compare0 (tmp, tmp, GEN_INT (-lo_imm))); - rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM); - rtx cmp_rtx = gen_rtx_fmt_ee (<EQL:CMP>, <GPI:MODE>mode, - cc_reg, const0_rtx); - emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[2])); - DONE; - } -) - (define_expand "casesi" [(match_operand:SI 0 "register_operand") ; Index (match_operand:SI 1 "const_int_operand") ; Lower bound @@ -1116,127 +1400,6 @@ (set_attr "sls_length" "retbr")] ) -(define_insn "aarch64_cb<optab><mode>1" - [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r") - (const_int 0)) - (label_ref (match_operand 1 "" "")) - (pc)))] - "!aarch64_track_speculation" - { - if (get_attr_length (insn) == 8) - return aarch64_gen_far_branch (operands, 1, "Lcb", "<inv_cb>\\t%<w>0, "); - else - return "<cbz>\\t%<w>0, %l1"; - } - [(set_attr "type" "branch") - (set (attr "length") - (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -1048576)) - (lt (minus (match_dup 1) (pc)) (const_int 1048572))) - (const_int 4) - (const_int 8))) - (set (attr "far_branch") - (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576)) - (lt (minus (match_dup 2) (pc)) (const_int 1048572))) - (const_int 0) - (const_int 1)))] -) - -(define_expand "tbranch_<code><mode>3" - [(set (pc) (if_then_else - (EQL (match_operand:SHORT 0 "register_operand") - (match_operand 1 "const0_operand")) - (label_ref (match_operand 2 "")) - (pc)))] - "" -{ - rtx bitvalue = gen_reg_rtx (<ZEROM>mode); - rtx reg = gen_lowpart (<ZEROM>mode, operands[0]); - rtx val = gen_int_mode (HOST_WIDE_INT_1U << UINTVAL (operands[1]), <MODE>mode); - emit_insn (gen_and<zerom>3 (bitvalue, reg, val)); - operands[1] = const0_rtx; - operands[0] = aarch64_gen_compare_reg (<CODE>, bitvalue, - operands[1]); -}) - -(define_insn "@aarch64_tb<optab><ALLI:mode><GPI:mode>" - [(set (pc) (if_then_else - (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r") - (const_int 1) - (match_operand 1 - "aarch64_simd_shift_imm_<ALLI:mode>" "n")) - (const_int 0)) - (label_ref (match_operand 2 "" "")) - (pc))) - (clobber (reg:CC CC_REGNUM))] - "!aarch64_track_speculation" - { - if (get_attr_length (insn) == 8) - { - if (get_attr_far_branch (insn) == 1) - return aarch64_gen_far_branch (operands, 2, "Ltb", - "<inv_tb>\\t%<ALLI:w>0, %1, "); - else - { - operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1])); - return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2"; - } - } - else - return "<tbz>\t%<ALLI:w>0, %1, %l2"; - } - [(set_attr "type" "branch") - (set (attr "length") - (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -32768)) - (lt (minus (match_dup 2) (pc)) (const_int 32764))) - (const_int 4) - (const_int 8))) - (set (attr "far_branch") - (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576)) - (lt (minus (match_dup 2) (pc)) (const_int 1048572))) - (const_int 0) - (const_int 1)))] - -) - -(define_insn "*cb<optab><mode>1" - [(set (pc) (if_then_else (LTGE (match_operand:ALLI 0 "register_operand" "r") - (const_int 0)) - (label_ref (match_operand 1 "" "")) - (pc))) - (clobber (reg:CC CC_REGNUM))] - "!aarch64_track_speculation" - { - if (get_attr_length (insn) == 8) - { - if (get_attr_far_branch (insn) == 1) - return aarch64_gen_far_branch (operands, 1, "Ltb", - "<inv_tb>\\t%<w>0, <sizem1>, "); - else - { - char buf[64]; - uint64_t val = ((uint64_t) 1) - << (GET_MODE_SIZE (<MODE>mode) * BITS_PER_UNIT - 1); - sprintf (buf, "tst\t%%<w>0, %" PRId64, val); - output_asm_insn (buf, operands); - return "<bcond>\t%l1"; - } - } - else - return "<tbz>\t%<w>0, <sizem1>, %l1"; - } - [(set_attr "type" "branch") - (set (attr "length") - (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -32768)) - (lt (minus (match_dup 1) (pc)) (const_int 32764))) - (const_int 4) - (const_int 8))) - (set (attr "far_branch") - (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -1048576)) - (lt (minus (match_dup 1) (pc)) (const_int 1048572))) - (const_int 0) - (const_int 1)))] -) - (define_expand "save_stack_nonlocal" [(set (match_operand 0 "memory_operand") (match_operand 1 "register_operand"))] @@ -1307,13 +1470,13 @@ emit_insn (gen_subdi3_compare1 (gcs_now, gcs_old, gcs_now)); rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM); rtx cmp_rtx = gen_rtx_fmt_ee (EQ, DImode, cc_reg, const0_rtx); - emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, done_label)); + emit_jump_insn (gen_aarch64_bcond (cmp_rtx, cc_reg, done_label)); emit_label (loop_label); emit_insn (gen_aarch64_gcspopm_xzr ()); emit_insn (gen_adddi3_compare0 (gcs_now, gcs_now, GEN_INT (-8))); cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM); cmp_rtx = gen_rtx_fmt_ee (NE, DImode, cc_reg, const0_rtx); - emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, loop_label)); + emit_jump_insn (gen_aarch64_bcond (cmp_rtx, cc_reg, loop_label)); emit_label (done_label); } DONE; @@ -2181,9 +2344,9 @@ "aarch64_mem_pair_offset (operands[4], <MODE>mode) && known_eq (INTVAL (operands[5]), INTVAL (operands[4]) + GET_MODE_SIZE (<MODE>mode))" - {@ [cons: =&0, 1, =2, =3; attrs: type ] - [ rk, 0, r, r; load_<ldpstp_sz>] ldp\t%<w>2, %<w>3, [%0, %4]! - [ rk, 0, w, w; neon_load1_2reg ] ldp\t%<v>2, %<v>3, [%0, %4]! + {@ [cons: =0, 1, =2, =3; attrs: type ] + [ &rk, 0, r, r; load_<ldpstp_sz>] ldp\t%<w>2, %<w>3, [%0, %4]! + [ rk, 0, w, w; neon_load1_2reg ] ldp\t%<v>2, %<v>3, [%0, %4]! } ) @@ -2237,9 +2400,9 @@ INTVAL (operands[4]) + GET_MODE_SIZE (<MODE>mode)) && !reg_overlap_mentioned_p (operands[0], operands[2]) && !reg_overlap_mentioned_p (operands[0], operands[3])" - {@ [cons: =&0, 1, 2, 3; attrs: type ] - [ rk, 0, rYZ, rYZ; store_<ldpstp_sz>] stp\t%<w>2, %<w>3, [%0, %4]! - [ rk, 0, w, w; neon_store1_2reg ] stp\t%<v>2, %<v>3, [%0, %4]! + {@ [cons: =0, 1, 2, 3; attrs: type ] + [ &rk, 0, rYZ, rYZ; store_<ldpstp_sz>] stp\t%<w>2, %<w>3, [%0, %4]! + [ rk, 0, w, w; neon_store1_2reg ] stp\t%<v>2, %<v>3, [%0, %4]! } ) @@ -2485,15 +2648,15 @@ (match_operand:GPI 1 "register_operand") (match_operand:GPI 2 "aarch64_pluslong_operand")))] "" - {@ [ cons: =0 , 1 , 2 ; attrs: type , arch ] - [ rk , %rk , I ; alu_imm , * ] add\t%<w>0, %<w>1, %2 - [ rk , rk , r ; alu_sreg , * ] add\t%<w>0, %<w>1, %<w>2 - [ w , w , w ; neon_add , simd ] add\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas> - [ rk , rk , J ; alu_imm , * ] sub\t%<w>0, %<w>1, #%n2 - [ r , rk , Uaa ; multiple , * ] # - [ r , 0 , Uai ; alu_imm , sve ] << aarch64_output_sve_scalar_inc_dec (operands[2]); - [ rk , rk , Uav ; alu_imm , sve ] << aarch64_output_sve_addvl_addpl (operands[2]); - [ rk , rk , UaV ; alu_imm , sme ] << aarch64_output_addsvl_addspl (operands[2]); + {@ [ cons: =0 , %1 , 2 ; attrs: type , arch ] + [ rk , rk , I ; alu_imm , * ] add\t%<w>0, %<w>1, %2 + [ rk , rk , r ; alu_sreg , * ] add\t%<w>0, %<w>1, %<w>2 + [ w , w , w ; neon_add , simd ] add\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas> + [ rk , rk , J ; alu_imm , * ] sub\t%<w>0, %<w>1, #%n2 + [ r , rk , Uaa ; multiple , * ] # + [ r , 0 , Uai ; alu_imm , sve ] << aarch64_output_sve_scalar_inc_dec (operands[2]); + [ rk , rk , Uav ; alu_imm , sve ] << aarch64_output_sve_addvl_addpl (operands[2]); + [ rk , rk , UaV ; alu_imm , sme ] << aarch64_output_addsvl_addspl (operands[2]); } ;; The "alu_imm" types for INC/DEC and ADDVL/ADDPL are just placeholders. ) @@ -2506,11 +2669,11 @@ (plus:SI (match_operand:SI 1 "register_operand") (match_operand:SI 2 "aarch64_pluslong_operand"))))] "" - {@ [ cons: =0 , 1 , 2 ; attrs: type ] - [ rk , %rk , I ; alu_imm ] add\t%w0, %w1, %2 - [ rk , rk , r ; alu_sreg ] add\t%w0, %w1, %w2 - [ rk , rk , J ; alu_imm ] sub\t%w0, %w1, #%n2 - [ r , rk , Uaa ; multiple ] # + {@ [ cons: =0 , %1 , 2 ; attrs: type ] + [ rk , rk , I ; alu_imm ] add\t%w0, %w1, %2 + [ rk , rk , r ; alu_sreg ] add\t%w0, %w1, %w2 + [ rk , rk , J ; alu_imm ] sub\t%w0, %w1, #%n2 + [ r , rk , Uaa ; multiple ] # } ) @@ -2579,14 +2742,14 @@ (match_operand:GPI 1 "register_operand") (match_operand:GPI 2 "aarch64_pluslong_or_poly_operand")))] "TARGET_SVE && operands[0] != stack_pointer_rtx" - {@ [ cons: =0 , 1 , 2 ; attrs: type ] - [ r , %rk , I ; alu_imm ] add\t%<w>0, %<w>1, %2 - [ r , rk , r ; alu_sreg ] add\t%<w>0, %<w>1, %<w>2 - [ r , rk , J ; alu_imm ] sub\t%<w>0, %<w>1, #%n2 - [ r , rk , Uaa ; multiple ] # - [ r , 0 , Uai ; alu_imm ] << aarch64_output_sve_scalar_inc_dec (operands[2]); - [ r , rk , Uav ; alu_imm ] << aarch64_output_sve_addvl_addpl (operands[2]); - [ &r , rk , Uat ; multiple ] # + {@ [ cons: =0 , %1 , 2 ; attrs: type ] + [ r , rk , I ; alu_imm ] add\t%<w>0, %<w>1, %2 + [ r , rk , r ; alu_sreg ] add\t%<w>0, %<w>1, %<w>2 + [ r , rk , J ; alu_imm ] sub\t%<w>0, %<w>1, #%n2 + [ r , rk , Uaa ; multiple ] # + [ r , 0 , Uai ; alu_imm ] << aarch64_output_sve_scalar_inc_dec (operands[2]); + [ r , rk , Uav ; alu_imm ] << aarch64_output_sve_addvl_addpl (operands[2]); + [ &r , rk , Uat ; multiple ] # } "&& epilogue_completed && !reg_overlap_mentioned_p (operands[0], operands[1]) @@ -2758,10 +2921,10 @@ (set (match_operand:GPI 0 "register_operand") (plus:GPI (match_dup 1) (match_dup 2)))] "" - {@ [ cons: =0 , 1 , 2 ; attrs: type ] - [ r , %rk , r ; alus_sreg ] adds\t%<w>0, %<w>1, %<w>2 - [ r , rk , I ; alus_imm ] adds\t%<w>0, %<w>1, %2 - [ r , rk , J ; alus_imm ] subs\t%<w>0, %<w>1, #%n2 + {@ [ cons: =0 , %1 , 2 ; attrs: type ] + [ r , rk , r ; alus_sreg ] adds\t%<w>0, %<w>1, %<w>2 + [ r , rk , I ; alus_imm ] adds\t%<w>0, %<w>1, %2 + [ r , rk , J ; alus_imm ] subs\t%<w>0, %<w>1, #%n2 } ) @@ -2775,10 +2938,10 @@ (set (match_operand:DI 0 "register_operand") (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] "" - {@ [ cons: =0 , 1 , 2 ; attrs: type ] - [ r , %rk , r ; alus_sreg ] adds\t%w0, %w1, %w2 - [ r , rk , I ; alus_imm ] adds\t%w0, %w1, %2 - [ r , rk , J ; alus_imm ] subs\t%w0, %w1, #%n2 + {@ [ cons: =0 , %1 , 2 ; attrs: type ] + [ r , rk , r ; alus_sreg ] adds\t%w0, %w1, %w2 + [ r , rk , I ; alus_imm ] adds\t%w0, %w1, %2 + [ r , rk , J ; alus_imm ] subs\t%w0, %w1, #%n2 } ) @@ -2979,10 +3142,10 @@ (match_operand:GPI 1 "aarch64_plus_operand")) (const_int 0)))] "" - {@ [ cons: 0 , 1 ; attrs: type ] - [ %r , r ; alus_sreg ] cmn\t%<w>0, %<w>1 - [ r , I ; alus_imm ] cmn\t%<w>0, %1 - [ r , J ; alus_imm ] cmp\t%<w>0, #%n1 + {@ [ cons: %0 , 1 ; attrs: type ] + [ r , r ; alus_sreg ] cmn\t%<w>0, %<w>1 + [ r , I ; alus_imm ] cmn\t%<w>0, %1 + [ r , J ; alus_imm ] cmp\t%<w>0, #%n1 } ) @@ -4337,26 +4500,28 @@ (define_insn "fcmp<mode>" [(set (reg:CCFP CC_REGNUM) - (compare:CCFP (match_operand:GPF 0 "register_operand") - (match_operand:GPF 1 "aarch64_fp_compare_operand")))] + (compare:CCFP + (match_operand:GPF_F16 0 "register_operand") + (match_operand:GPF_F16 1 "aarch64_fp_compare_operand")))] "TARGET_FLOAT" {@ [ cons: 0 , 1 ] [ w , Y ] fcmp\t%<s>0, #0.0 [ w , w ] fcmp\t%<s>0, %<s>1 } - [(set_attr "type" "fcmp<s>")] + [(set_attr "type" "fcmp<stype>")] ) (define_insn "fcmpe<mode>" [(set (reg:CCFPE CC_REGNUM) - (compare:CCFPE (match_operand:GPF 0 "register_operand") - (match_operand:GPF 1 "aarch64_fp_compare_operand")))] + (compare:CCFPE + (match_operand:GPF_F16 0 "register_operand") + (match_operand:GPF_F16 1 "aarch64_fp_compare_operand")))] "TARGET_FLOAT" {@ [ cons: 0 , 1 ] [ w , Y ] fcmpe\t%<s>0, #0.0 [ w , w ] fcmpe\t%<s>0, %<s>1 } - [(set_attr "type" "fcmp<s>")] + [(set_attr "type" "fcmp<stype>")] ) (define_insn "*cmp_swp_<shift>_reg<mode>" @@ -4392,6 +4557,49 @@ [(set_attr "type" "alus_ext")] ) +;; <=> operator pattern (integer) +;; (a == b) ? 0 : (a < b) ? -1 : 1. +(define_expand "spaceship<mode>4" + [(match_operand:SI 0 "register_operand") + (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "register_operand") + (match_operand:SI 3 "const_int_operand")] + "" + { + // 1 indicates unsigned comparison, -1 indicates signed. + gcc_assert (operands[3] == constm1_rtx || operands[3] == const1_rtx); + + rtx cc_reg = aarch64_gen_compare_reg (EQ, operands[1], operands[2]); + RTX_CODE code_gt = operands[3] == const1_rtx ? GTU : GT; + RTX_CODE code_lt = operands[3] == const1_rtx ? LTU : LT; + + rtx cc_gt = gen_rtx_fmt_ee (code_gt, VOIDmode, cc_reg, const0_rtx); + rtx cc_lt = gen_rtx_fmt_ee (code_lt, VOIDmode, cc_reg, const0_rtx); + + rtx temp = gen_reg_rtx (SImode); + emit_insn (gen_rtx_SET (temp, gen_rtx_IF_THEN_ELSE (SImode, cc_gt, + const1_rtx, const0_rtx))); + emit_insn (gen_rtx_SET (operands[0], gen_rtx_IF_THEN_ELSE (SImode, cc_lt, + constm1_rtx, temp))); + DONE; + } +) + +;; <=> operator pattern (floating-point) +;; (a == b) ? 0 : (a < b) ? -1 : (a > b) ? 1 : UNORDERED. +(define_expand "spaceship<mode>4" + [(match_operand:SI 0 "register_operand") + (match_operand:GPF 1 "register_operand") + (match_operand:GPF 2 "register_operand") + (match_operand:SI 3 "const_int_operand")] + "TARGET_FLOAT" + { + aarch64_expand_fp_spaceship (operands[0], operands[1], operands[2], + operands[3]); + DONE; + } +) + ;; ------------------------------------------------------------------- ;; Store-flag and conditional select insns ;; ------------------------------------------------------------------- @@ -4424,8 +4632,8 @@ (define_expand "cstore<mode>4" [(set (match_operand:SI 0 "register_operand") (match_operator:SI 1 "aarch64_comparison_operator_mode" - [(match_operand:GPF 2 "register_operand") - (match_operand:GPF 3 "aarch64_fp_compare_operand")]))] + [(match_operand:GPF_F16 2 "register_operand") + (match_operand:GPF_F16 3 "aarch64_fp_compare_operand")]))] "" " operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2], @@ -4509,38 +4717,6 @@ [(set_attr "type" "csel")] ) -(define_expand "cmov<mode>6" - [(set (match_operand:GPI 0 "register_operand") - (if_then_else:GPI - (match_operator 1 "aarch64_comparison_operator" - [(match_operand:GPI 2 "register_operand") - (match_operand:GPI 3 "aarch64_plus_operand")]) - (match_operand:GPI 4 "register_operand") - (match_operand:GPI 5 "register_operand")))] - "" - " - operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2], - operands[3]); - operands[3] = const0_rtx; - " -) - -(define_expand "cmov<mode>6" - [(set (match_operand:GPF 0 "register_operand") - (if_then_else:GPF - (match_operator 1 "aarch64_comparison_operator" - [(match_operand:GPF 2 "register_operand") - (match_operand:GPF 3 "aarch64_fp_compare_operand")]) - (match_operand:GPF 4 "register_operand") - (match_operand:GPF 5 "register_operand")))] - "" - " - operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2], - operands[3]); - operands[3] = const0_rtx; - " -) - (define_insn "*cmov<mode>_insn" [(set (match_operand:ALLI 0 "register_operand") (if_then_else:ALLI @@ -5045,8 +5221,8 @@ (LOGICAL:GPI (match_operand:GPI 1 "register_operand") (match_operand:GPI 2 "aarch64_logical_operand")))] "" - {@ [ cons: =0 , 1 , 2 ; attrs: type , arch ] - [ r , %r , r ; logic_reg , * ] <logical>\t%<w>0, %<w>1, %<w>2 + {@ [ cons: =0 , %1 , 2 ; attrs: type , arch ] + [ r , r , r ; logic_reg , * ] <logical>\t%<w>0, %<w>1, %<w>2 [ rk , r , <lconst> ; logic_imm , * ] <logical>\t%<w>0, %<w>1, %2 [ w , 0 , <lconst> ; * , sve ] <logical>\t%Z0.<s>, %Z0.<s>, #%2 [ w , w , w ; neon_logic , simd ] <logical>\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype> @@ -5060,8 +5236,8 @@ (LOGICAL:SI (match_operand:SI 1 "register_operand") (match_operand:SI 2 "aarch64_logical_operand"))))] "" - {@ [ cons: =0 , 1 , 2 ; attrs: type ] - [ r , %r , r ; logic_reg ] <logical>\t%w0, %w1, %w2 + {@ [ cons: =0 , %1 , 2 ; attrs: type ] + [ r , r , r ; logic_reg ] <logical>\t%w0, %w1, %w2 [ rk , r , K ; logic_imm ] <logical>\t%w0, %w1, %2 } ) @@ -5075,8 +5251,8 @@ (set (match_operand:GPI 0 "register_operand") (and:GPI (match_dup 1) (match_dup 2)))] "" - {@ [ cons: =0 , 1 , 2 ; attrs: type ] - [ r , %r , r ; logics_reg ] ands\t%<w>0, %<w>1, %<w>2 + {@ [ cons: =0 , %1 , 2 ; attrs: type ] + [ r , r , r ; logics_reg ] ands\t%<w>0, %<w>1, %<w>2 [ r , r , <lconst> ; logics_imm ] ands\t%<w>0, %<w>1, %2 } ) @@ -5091,8 +5267,8 @@ (set (match_operand:DI 0 "register_operand") (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))] "" - {@ [ cons: =0 , 1 , 2 ; attrs: type ] - [ r , %r , r ; logics_reg ] ands\t%w0, %w1, %w2 + {@ [ cons: =0 , %1 , 2 ; attrs: type ] + [ r , r , r ; logics_reg ] ands\t%w0, %w1, %w2 [ r , r , K ; logics_imm ] ands\t%w0, %w1, %2 } ) @@ -5605,6 +5781,19 @@ (match_operand:TI 1 "register_operand")] "TARGET_SIMD && !TARGET_CSSC" { + /* For SVE we can do popcount on DImode chunks of the TImode argument + and then use a cheap ADDP reduction. The SVE CNT instruction requires + materializing a PTRUE so don't do this if optimizing for size. */ + if (TARGET_SVE && !optimize_function_for_size_p (cfun)) + { + rtx v = gen_reg_rtx (V2DImode); + rtx v1 = gen_reg_rtx (V2DImode); + emit_move_insn (v, gen_lowpart (V2DImode, operands[1])); + rtx p = aarch64_ptrue_reg (VNx2BImode, 16); + emit_insn (gen_aarch64_pred_popcountv2di (v1, p, v)); + emit_insn (gen_reduc_plus_scal_v2di (operands[0], v1)); + DONE; + } rtx v = gen_reg_rtx (V16QImode); rtx v1 = gen_reg_rtx (V16QImode); emit_move_insn (v, gen_lowpart (V16QImode, operands[1])); @@ -5676,9 +5865,9 @@ (match_operand:GPI 1 "aarch64_logical_operand")) (const_int 0)))] "" - {@ [ cons: 0 , 1 ; attrs: type ] - [ %r , r ; logics_reg ] tst\t%<w>0, %<w>1 - [ r , <lconst> ; logics_imm ] tst\t%<w>0, %1 + {@ [ cons: %0 , 1 ; attrs: type ] + [ r , r ; logics_reg ] tst\t%<w>0, %<w>1 + [ r , <lconst> ; logics_imm ] tst\t%<w>0, %1 } ) @@ -8068,8 +8257,8 @@ : gen_stack_protect_test_si) (operands[0], operands[1])); rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM); - emit_jump_insn (gen_condjump (gen_rtx_EQ (VOIDmode, cc_reg, const0_rtx), - cc_reg, operands[2])); + emit_jump_insn (gen_aarch64_bcond (gen_rtx_EQ (VOIDmode, cc_reg, const0_rtx), + cc_reg, operands[2])); DONE; }) |