;; ARMv8-A crypto patterns.
;; Copyright (C) 2013-2024 Free Software Foundation, Inc.
;; Contributed by ARM Ltd.
;; This file is part of GCC.
;; GCC is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published
;; by the Free Software Foundation; either version 3, or (at your
;; option) any later version.
;; GCC is distributed in the hope that it will be useful, but WITHOUT
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
;; License for more details.
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; .
(define_insn "crypto_"
[(set (match_operand: 0 "register_operand" "=w")
(unspec:
[(match_operand: 1 "register_operand" "w")]
CRYPTO_AESMC))]
"TARGET_CRYPTO"
".\\t%q0, %q1"
[(set_attr "type" "")]
)
(define_expand "crypto_"
[(set (match_operand: 0 "register_operand" "=w")
(unspec:
[(xor:
(match_operand: 1 "register_operand" "%0")
(match_operand: 2 "register_operand" "w"))]
CRYPTO_AES))]
"TARGET_CRYPTO"
{
if (fix_aes_erratum_1742098)
{
rtx op1_protect = gen_reg_rtx (V16QImode);
emit_insn (gen_aes_op_protect (op1_protect, operands[1]));
operands[1] = op1_protect;
rtx op2_protect = gen_reg_rtx (V16QImode);
emit_insn (gen_aes_op_protect (op2_protect, operands[2]));
operands[2] = op2_protect;
}
/* Fall through to default expansion. */
})
(define_insn "*crypto__insn"
[(set (match_operand: 0 "register_operand" "=w")
(unspec:
[(xor:
(match_operand: 1 "register_operand" "%0")
(match_operand: 2 "register_operand" "w"))]
CRYPTO_AES))]
"TARGET_CRYPTO"
".\\t%q0, %q2"
[(set_attr "type" "")]
)
;; Mitigate against AES erratum on Cortex-A57 and Cortex-A72 by
;; performing a 128-bit operation on an operand producer. This can be
;; eliminated only if we know that the operand was produced by a
;; full-width operation. V16QImode matches for the AES
;; instructions. Handle some very common cases where the source is
;; known to be safe (transfers from core registers and memory).
(define_insn "aes_op_protect"
[(set (match_operand:V16QI 0 "register_operand" "=w,w,w")
(unspec:V16QI [(match_operand:V16QI 1 "general_operand" "w,r,Uni")]
UNSPEC_AES_PROTECT))]
"TARGET_CRYPTO && fix_aes_erratum_1742098"
{
switch (which_alternative)
{
case 0: return "vmov\t%q0, %q1";
case 1: return "vmov\t%e0, %Q1, %R1 @ V16QI\;vmov\t%f0, %J1, %K1";
case 2: return output_move_neon (operands);
default: gcc_unreachable ();
}
}
[(set_attr "type" "neon_move_q,neon_from_gp_q,neon_load1_4reg")
(set_attr "length" "4,8,8")
(set_attr "arm_pool_range" "*,*,1020")
(set_attr "thumb2_pool_range" "*,*,1018")
(set_attr "neg_pool_range" "*,*,996")]
)
;; Another safe case is when a movmisalign load is used as the source.
(define_insn "*aes_op_protect_misalign_load"
[(set (match_operand:V16QI 0 "s_register_operand" "=w")
(unspec:V16QI
[(unspec:V16QI
[(match_operand:V16QI 1 "neon_permissive_struct_operand" "Um")]
UNSPEC_MISALIGNED_ACCESS)]
UNSPEC_AES_PROTECT))]
"TARGET_CRYPTO && fix_aes_erratum_1742098"
"vld1.8\t%{q0}, %A1"
[(set_attr "type" "neon_load1_1reg_q")]
)
;; Similarly for the vld1 intrinsic
(define_insn "aes_op_protect_neon_vld1v16qi"
[(set (match_operand:V16QI 0 "s_register_operand" "=w")
(unspec:V16QI
[(unspec:V16QI [(match_operand:V16QI 1 "neon_struct_operand" "Um")]
UNSPEC_VLD1)]
UNSPEC_AES_PROTECT))]
"TARGET_NEON"
"vld1.8\t%h0, %A1"
[(set_attr "type" "neon_load1_1reg_q")]
)
;; An AESMC operation can feed directly into a subsequent AES
;; operation without needing mitigation.
(define_insn "*crypto__protected"
[(set (match_operand: 0 "register_operand" "=w")
(unspec:
[(unspec:
[(match_operand: 1 "register_operand" "w")]
CRYPTO_AESMC)]
UNSPEC_AES_PROTECT))]
"TARGET_CRYPTO && fix_aes_erratum_1742098"
".\\t%q0, %q1"
[(set_attr "type" "")]
)
;; When AESE/AESMC fusion is enabled we really want to keep the two together
;; and enforce the register dependency without scheduling or register
;; allocation messing up the order or introducing moves inbetween.
;; Mash the two together during combine.
(define_insn "*aarch32_crypto_aese_fused"
[(set (match_operand:V16QI 0 "register_operand" "=w")
(unspec:V16QI
[(unspec:V16QI [(xor:V16QI
(match_operand:V16QI 1 "register_operand" "%0")
(match_operand:V16QI 2 "register_operand" "w"))]
UNSPEC_AESE)]
UNSPEC_AESMC))]
"TARGET_CRYPTO
&& arm_fusion_enabled_p (tune_params::FUSE_AES_AESMC)"
"aese.8\\t%q0, %q2\;aesmc.8\\t%q0, %q0"
[(set_attr "type" "crypto_aese")
(set_attr "length" "8")]
)
;; And similarly when mitigation is enabled, but not needed in this
;; case.
(define_insn "*aarch32_crypto_aese_fused_protected"
[(set (match_operand:V16QI 0 "register_operand" "=w")
(unspec:V16QI
[(unspec:V16QI
[(unspec:V16QI [(xor:V16QI
(match_operand:V16QI 1 "register_operand" "%0")
(match_operand:V16QI 2 "register_operand" "w"))]
UNSPEC_AESE)]
UNSPEC_AESMC)]
UNSPEC_AES_PROTECT))]
"TARGET_CRYPTO && fix_aes_erratum_1742098
&& arm_fusion_enabled_p (tune_params::FUSE_AES_AESMC)"
"aese.8\\t%q0, %q2\;aesmc.8\\t%q0, %q0"
[(set_attr "type" "crypto_aese")
(set_attr "length" "8")]
)
;; When AESD/AESIMC fusion is enabled we really want to keep the two together
;; and enforce the register dependency without scheduling or register
;; allocation messing up the order or introducing moves inbetween.
;; Mash the two together during combine.
(define_insn "*aarch32_crypto_aesd_fused"
[(set (match_operand:V16QI 0 "register_operand" "=w")
(unspec:V16QI
[(unspec:V16QI [(xor:V16QI
(match_operand:V16QI 1 "register_operand" "%0")
(match_operand:V16QI 2 "register_operand" "w"))]
UNSPEC_AESD)]
UNSPEC_AESIMC))]
"TARGET_CRYPTO
&& arm_fusion_enabled_p (tune_params::FUSE_AES_AESMC)"
"aesd.8\\t%q0, %q2\;aesimc.8\\t%q0, %q0"
[(set_attr "type" "crypto_aese")
(set_attr "length" "8")]
)
(define_insn "*aarch32_crypto_aesd_fused_protected"
[(set (match_operand:V16QI 0 "register_operand" "=w")
(unspec:V16QI
[(unspec:V16QI
[(unspec:V16QI [(xor:V16QI
(match_operand:V16QI 1 "register_operand" "%0")
(match_operand:V16QI 2 "register_operand" "w"))]
UNSPEC_AESD)]
UNSPEC_AESIMC)]
UNSPEC_AES_PROTECT))]
"TARGET_CRYPTO && fix_aes_erratum_1742098
&& arm_fusion_enabled_p (tune_params::FUSE_AES_AESMC)"
"aesd.8\\t%q0, %q2\;aesimc.8\\t%q0, %q0"
[(set_attr "type" "crypto_aese")
(set_attr "length" "8")]
)
(define_insn "crypto_"
[(set (match_operand: 0 "register_operand" "=w")
(unspec:
[(match_operand: 1 "register_operand" "0")
(match_operand: 2 "register_operand" "w")]
CRYPTO_BINARY))]
"TARGET_CRYPTO"
".\\t%q0, %q2"
[(set_attr "type" "")]
)
(define_insn "crypto_"
[(set (match_operand: 0 "register_operand" "=w")
(unspec:
[(match_operand: 1 "register_operand" "0")
(match_operand: 2 "register_operand" "w")
(match_operand: 3 "register_operand" "w")]
CRYPTO_TERNARY))]
"TARGET_CRYPTO"
".\\t%q0, %q2, %q3"
[(set_attr "type" "")]
)
;; The vec_select operation always selects index 0 from the lower V2SI
;; subreg of the V4SI, adjusted for endianness. Required due to
;; neon_vget_lane and neon_set_lane that change the element ordering
;; in memory for big-endian.
(define_expand "crypto_sha1h"
[(set (match_operand:V4SI 0 "register_operand")
(match_operand:V4SI 1 "register_operand"))]
"TARGET_CRYPTO"
{
rtx op2 = GEN_INT (NEON_ENDIAN_LANE_N (V2SImode, 0));
emit_insn (gen_crypto_sha1h_lb (operands[0], operands[1], op2));
DONE;
})
(define_insn "crypto_sha1h_lb"
[(set (match_operand:V4SI 0 "register_operand" "=w")
(unspec:V4SI
[(vec_select:SI
(match_operand:V4SI 1 "register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")]))]
UNSPEC_SHA1H))]
"TARGET_CRYPTO && INTVAL (operands[2]) == NEON_ENDIAN_LANE_N (V2SImode, 0)"
"sha1h.32\\t%q0, %q1"
[(set_attr "type" "crypto_sha1_fast")]
)
(define_insn "crypto_vmullp64"
[(set (match_operand:TI 0 "register_operand" "=w")
(unspec:TI [(match_operand:DI 1 "register_operand" "w")
(match_operand:DI 2 "register_operand" "w")]
UNSPEC_VMULLP64))]
"TARGET_CRYPTO"
"vmull.p64\\t%q0, %P1, %P2"
[(set_attr "type" "crypto_pmull")]
)
/* The vec_select operation always selects index 0 from the lower V2SI subreg
of the V4SI, adjusted for endianness. Required due to neon_vget_lane and
neon_set_lane that change the element ordering in memory for big-endian. */
(define_expand "crypto_"
[(set (match_operand:V4SI 0 "register_operand")
(unspec:
[(match_operand: 1 "register_operand")
(match_operand: 2 "register_operand")
(match_operand: 3 "register_operand")]
CRYPTO_SELECTING))]
"TARGET_CRYPTO"
{
rtx op4 = GEN_INT (NEON_ENDIAN_LANE_N (V2SImode, 0));
emit_insn (gen_crypto__lb
(operands[0], operands[1], operands[2], operands[3], op4));
DONE;
})
(define_insn "crypto__lb"
[(set (match_operand:V4SI 0 "register_operand" "=w")
(unspec:
[(match_operand: 1 "register_operand" "0")
(vec_select:SI
(match_operand: 2 "register_operand" "w")
(parallel [(match_operand:SI 4 "immediate_operand" "i")]))
(match_operand: 3 "register_operand" "w")]
CRYPTO_SELECTING))]
"TARGET_CRYPTO && INTVAL (operands[4]) == NEON_ENDIAN_LANE_N (V2SImode, 0)"
".\\t%q0, %q2, %q3"
[(set_attr "type" "")]
)