aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AArch64/AArch64InstrInfo.td
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64InstrInfo.td')
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td125
1 files changed, 85 insertions, 40 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 6c46b18..ac31236 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -430,26 +430,32 @@ def UseWzrToVecMove : Predicate<"Subtarget->useWzrToVecMove()">;
def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2,
[SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
- SDTCisInt<0>, SDTCisVT<1, i32>]>;
+ SDTCisInt<0>,
+ SDTCisVT<1, FlagsVT>]>;
// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS
def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3,
[SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisInt<0>,
- SDTCisVT<3, i32>]>;
+ SDTCisVT<3, FlagsVT>]>;
// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS
def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
[SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
SDTCisInt<0>,
- SDTCisVT<1, i32>,
- SDTCisVT<4, i32>]>;
+ SDTCisVT<1, FlagsVT>,
+ SDTCisVT<4, FlagsVT>]>;
+
+// Value type used for condition codes.
+// Should be kept in sync with its C++ counterpart.
+defvar CondCodeVT = i32;
def SDT_AArch64Brcond : SDTypeProfile<0, 3,
- [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>,
- SDTCisVT<2, i32>]>;
+ [SDTCisVT<0, OtherVT>,
+ SDTCisVT<1, CondCodeVT>,
+ SDTCisVT<2, FlagsVT>]>;
def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>;
def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
SDTCisVT<2, OtherVT>]>;
@@ -457,23 +463,23 @@ def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
def SDT_AArch64CSel : SDTypeProfile<1, 4,
[SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
- SDTCisInt<3>,
- SDTCisVT<4, i32>]>;
+ SDTCisVT<3, CondCodeVT>,
+ SDTCisVT<4, FlagsVT>]>;
def SDT_AArch64CCMP : SDTypeProfile<1, 5,
- [SDTCisVT<0, i32>,
+ [SDTCisVT<0, FlagsVT>,
SDTCisInt<1>,
SDTCisSameAs<1, 2>,
SDTCisInt<3>,
- SDTCisInt<4>,
- SDTCisVT<5, i32>]>;
+ SDTCisVT<4, CondCodeVT>,
+ SDTCisVT<5, FlagsVT>]>;
def SDT_AArch64FCCMP : SDTypeProfile<1, 5,
- [SDTCisVT<0, i32>,
+ [SDTCisVT<0, FlagsVT>,
SDTCisFP<1>,
SDTCisSameAs<1, 2>,
SDTCisInt<3>,
- SDTCisInt<4>,
- SDTCisVT<5, i32>]>;
-def SDT_AArch64FCmp : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
+ SDTCisVT<4, CondCodeVT>,
+ SDTCisVT<5, FlagsVT>]>;
+def SDT_AArch64FCmp : SDTypeProfile<1, 2, [SDTCisVT<0, FlagsVT>,
SDTCisFP<1>,
SDTCisSameAs<2, 1>]>;
def SDT_AArch64Rev : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>]>;
@@ -518,10 +524,10 @@ def SDT_AArch64uaddlp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
def SDT_AArch64ldiapp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
-def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
+def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v2i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
def SDT_AArch64stilp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
-def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
+def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
// Generates the general dynamic sequences, i.e.
// adrp x0, :tlsdesc:var
@@ -545,7 +551,8 @@ def SDT_AArch64TBL : SDTypeProfile<1, 2, [
]>;
def SDT_AArch64cb : SDTypeProfile<0, 4,
- [SDTCisVT<0, i32>, SDTCisInt<1>, SDTCisInt<2>,
+ [SDTCisVT<0, CondCodeVT>,
+ SDTCisInt<1>, SDTCisInt<2>,
SDTCisVT<3, OtherVT>]>;
// non-extending masked load fragment.
@@ -1053,13 +1060,6 @@ def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
def AArch64uaddlv : SDNode<"AArch64ISD::UADDLV", SDT_AArch64uaddlp>;
def AArch64saddlv : SDNode<"AArch64ISD::SADDLV", SDT_AArch64uaddlp>;
-def AArch64uabd : PatFrags<(ops node:$lhs, node:$rhs),
- [(abdu node:$lhs, node:$rhs),
- (int_aarch64_neon_uabd node:$lhs, node:$rhs)]>;
-def AArch64sabd : PatFrags<(ops node:$lhs, node:$rhs),
- [(abds node:$lhs, node:$rhs),
- (int_aarch64_neon_sabd node:$lhs, node:$rhs)]>;
-
// Add Pairwise of two vectors
def AArch64addp_n : SDNode<"AArch64ISD::ADDP", SDT_AArch64Zip>;
// Add Long Pairwise
@@ -1131,10 +1131,10 @@ def AArch64probedalloca
SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
[SDNPHasChain, SDNPMayStore]>;
-// MRS, also sets the flags via a glue.
+// MRS, also sets the flags.
def AArch64mrs : SDNode<"AArch64ISD::MRS",
SDTypeProfile<2, 1, [SDTCisVT<0, i64>,
- SDTCisVT<1, i32>,
+ SDTCisVT<1, FlagsVT>,
SDTCisVT<2, i32>]>,
[SDNPHasChain]>;
@@ -2039,7 +2039,7 @@ let Predicates = [HasPAuth] in {
def DZB : SignAuthZero<prefix_z, 0b11, !strconcat(asm, "dzb"), op>;
}
- defm PAC : SignAuth<0b000, 0b010, "pac", int_ptrauth_sign>;
+ defm PAC : SignAuth<0b000, 0b010, "pac", null_frag>;
defm AUT : SignAuth<0b001, 0b011, "aut", null_frag>;
def XPACI : ClearAuth<0, "xpaci">;
@@ -2159,6 +2159,26 @@ let Predicates = [HasPAuth] in {
let Uses = [];
}
+ // PAC pseudo instruction. In AsmPrinter, it is expanded into an actual PAC*
+ // instruction immediately preceded by the discriminator computation.
+ // This enforces the expected immediate modifier is used for signing, even
+ // if an attacker is able to substitute AddrDisc.
+ def PAC : Pseudo<(outs GPR64:$SignedVal),
+ (ins GPR64:$Val, i32imm:$Key, i64imm:$Disc, GPR64noip:$AddrDisc),
+ [], "$SignedVal = $Val">, Sched<[WriteI, ReadI]> {
+ let isCodeGenOnly = 1;
+ let hasSideEffects = 0;
+ let mayStore = 0;
+ let mayLoad = 0;
+ let Size = 12;
+ let Defs = [X16, X17];
+ let usesCustomInserter = 1;
+ }
+
+ // A standalone pattern is used, so that literal 0 can be passed as $Disc.
+ def : Pat<(int_ptrauth_sign GPR64:$Val, timm:$Key, GPR64noip:$AddrDisc),
+ (PAC GPR64:$Val, $Key, 0, GPR64noip:$AddrDisc)>;
+
// AUT and re-PAC a value, using different keys/data.
// This directly manipulates x16/x17, which are the only registers that
// certain OSs guarantee are safe to use for sensitive operations.
@@ -3941,6 +3961,26 @@ defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw",
def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
(SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
+// load zero-extended i32, bitcast to f64
+def : Pat <(f64 (bitconvert (i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
+
+// load zero-extended i16, bitcast to f64
+def : Pat <(f64 (bitconvert (i64 (zextloadi16 (am_indexed32 GPR64sp:$Rn, uimm12s2:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
+
+// load zero-extended i8, bitcast to f64
+def : Pat <(f64 (bitconvert (i64 (zextloadi8 (am_indexed32 GPR64sp:$Rn, uimm12s1:$offset))))),
+ (SUBREG_TO_REG (i64 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
+
+// load zero-extended i16, bitcast to f32
+def : Pat <(f32 (bitconvert (i32 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
+ (SUBREG_TO_REG (i32 0), (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
+
+// load zero-extended i8, bitcast to f32
+def : Pat <(f32 (bitconvert (i32 (zextloadi8 (am_indexed16 GPR64sp:$Rn, uimm12s1:$offset))))),
+ (SUBREG_TO_REG (i32 0), (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
+
// Pre-fetch.
def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
[(AArch64Prefetch timm:$Rt,
@@ -5667,8 +5707,7 @@ let Predicates = [HasFullFP16] in {
// Advanced SIMD two vector instructions.
//===----------------------------------------------------------------------===//
-defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
- AArch64uabd>;
+defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl", abdu>;
// Match UABDL in log2-shuffle patterns.
def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)),
(zext (v8i8 V64:$opB))))),
@@ -6018,8 +6057,8 @@ defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>;
defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
- TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >;
-defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>;
+ TriOpFrag<(add node:$LHS, (abds node:$MHS, node:$RHS))> >;
+defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", abds>;
defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", avgfloors>;
defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
@@ -6037,8 +6076,8 @@ defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>;
defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
- TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >;
-defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>;
+ TriOpFrag<(add node:$LHS, (abdu node:$MHS, node:$RHS))> >;
+defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", abdu>;
defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", avgflooru>;
defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
@@ -6635,6 +6674,15 @@ def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
(UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>;
}
+def : Pat<(v4i32 (any_fp_to_sint (v4f32 (scalar_to_vector (f32 FPR32:$src))))),
+ (v4i32 (INSERT_SUBREG (IMPLICIT_DEF), (i32 (FCVTZSv1i32 (f32 FPR32:$src))), ssub))>;
+def : Pat<(v4i32 (any_fp_to_uint (v4f32 (scalar_to_vector (f32 FPR32:$src))))),
+ (v4i32 (INSERT_SUBREG (IMPLICIT_DEF), (i32 (FCVTZUv1i32 (f32 FPR32:$src))), ssub))>;
+def : Pat<(v2i64 (any_fp_to_sint (v2f64 (scalar_to_vector (f64 FPR64:$src))))),
+ (v2i64 (INSERT_SUBREG (IMPLICIT_DEF), (i64 (FCVTZSv1i64 (f64 FPR64:$src))), dsub))>;
+def : Pat<(v2i64 (any_fp_to_uint (v2f64 (scalar_to_vector (f64 FPR64:$src))))),
+ (v2i64 (INSERT_SUBREG (IMPLICIT_DEF), (i64 (FCVTZUv1i64 (f64 FPR64:$src))), dsub))>;
+
// int -> float conversion of value in lane 0 of simd vector should use
// correct cvtf variant to avoid costly fpr <-> gpr register transfers.
def : Pat<(f32 (sint_to_fp (i32 (vector_extract (v4i32 FPR128:$Rn), (i64 0))))),
@@ -6759,10 +6807,8 @@ defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>
defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>;
defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull", AArch64pmull>;
-defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
- AArch64sabd>;
-defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
- AArch64sabd>;
+defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal", abds>;
+defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl", abds>;
defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl",
BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw",
@@ -6780,8 +6826,7 @@ defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>;
defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
-defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
- AArch64uabd>;
+defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal", abdu>;
defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>;
defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",