aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Paoliello <danpao@microsoft.com>2024-06-22 11:01:36 -0700
committerGitHub <noreply@github.com>2024-06-22 11:01:36 -0700
commit2c9c22c6e295b1176225b63ae4cbbceb216da55e (patch)
tree40d9f755574894faff45e608fd9e5cad30ea595f
parent8fa4fe1f995a9bc85666d63e84c094f9a09686b5 (diff)
downloadllvm-2c9c22c6e295b1176225b63ae4cbbceb216da55e.zip
llvm-2c9c22c6e295b1176225b63ae4cbbceb216da55e.tar.gz
llvm-2c9c22c6e295b1176225b63ae4cbbceb216da55e.tar.bz2
[ARM64EC] Fix thunks for vector args (#96003)
The checks when building a thunk to decide if an arg needed to be cast to/from an integer or redirected via a pointer didn't match how arg types were changed in `canonicalizeThunkType`, this caused LLVM to ICE when using vector types as args due to incorrect types in a call instruction. Instead of duplicating these checks, we should check if the arg type differs between x64 and AArch64 and then cast or redirect as appropriate.
-rw-r--r--llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp153
-rw-r--r--llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll109
-rw-r--r--llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll117
3 files changed, 326 insertions, 53 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
index 218201f..f2c38b0 100644
--- a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
@@ -46,6 +46,18 @@ static cl::opt<bool> GenerateThunks("arm64ec-generate-thunks", cl::Hidden,
namespace {
+enum ThunkArgTranslation : uint8_t {
+ Direct,
+ Bitcast,
+ PointerIndirection,
+};
+
+struct ThunkArgInfo {
+ Type *Arm64Ty;
+ Type *X64Ty;
+ ThunkArgTranslation Translation;
+};
+
class AArch64Arm64ECCallLowering : public ModulePass {
public:
static char ID;
@@ -74,25 +86,30 @@ private:
void getThunkType(FunctionType *FT, AttributeList AttrList,
Arm64ECThunkType TT, raw_ostream &Out,
- FunctionType *&Arm64Ty, FunctionType *&X64Ty);
+ FunctionType *&Arm64Ty, FunctionType *&X64Ty,
+ SmallVector<ThunkArgTranslation> &ArgTranslations);
void getThunkRetType(FunctionType *FT, AttributeList AttrList,
raw_ostream &Out, Type *&Arm64RetTy, Type *&X64RetTy,
SmallVectorImpl<Type *> &Arm64ArgTypes,
- SmallVectorImpl<Type *> &X64ArgTypes, bool &HasSretPtr);
+ SmallVectorImpl<Type *> &X64ArgTypes,
+ SmallVector<ThunkArgTranslation> &ArgTranslations,
+ bool &HasSretPtr);
void getThunkArgTypes(FunctionType *FT, AttributeList AttrList,
Arm64ECThunkType TT, raw_ostream &Out,
SmallVectorImpl<Type *> &Arm64ArgTypes,
- SmallVectorImpl<Type *> &X64ArgTypes, bool HasSretPtr);
- void canonicalizeThunkType(Type *T, Align Alignment, bool Ret,
- uint64_t ArgSizeBytes, raw_ostream &Out,
- Type *&Arm64Ty, Type *&X64Ty);
+ SmallVectorImpl<Type *> &X64ArgTypes,
+ SmallVectorImpl<ThunkArgTranslation> &ArgTranslations,
+ bool HasSretPtr);
+ ThunkArgInfo canonicalizeThunkType(Type *T, Align Alignment, bool Ret,
+ uint64_t ArgSizeBytes, raw_ostream &Out);
};
} // end anonymous namespace
void AArch64Arm64ECCallLowering::getThunkType(
FunctionType *FT, AttributeList AttrList, Arm64ECThunkType TT,
- raw_ostream &Out, FunctionType *&Arm64Ty, FunctionType *&X64Ty) {
+ raw_ostream &Out, FunctionType *&Arm64Ty, FunctionType *&X64Ty,
+ SmallVector<ThunkArgTranslation> &ArgTranslations) {
Out << (TT == Arm64ECThunkType::Entry ? "$ientry_thunk$cdecl$"
: "$iexit_thunk$cdecl$");
@@ -111,10 +128,10 @@ void AArch64Arm64ECCallLowering::getThunkType(
bool HasSretPtr = false;
getThunkRetType(FT, AttrList, Out, Arm64RetTy, X64RetTy, Arm64ArgTypes,
- X64ArgTypes, HasSretPtr);
+ X64ArgTypes, ArgTranslations, HasSretPtr);
getThunkArgTypes(FT, AttrList, TT, Out, Arm64ArgTypes, X64ArgTypes,
- HasSretPtr);
+ ArgTranslations, HasSretPtr);
Arm64Ty = FunctionType::get(Arm64RetTy, Arm64ArgTypes, false);
@@ -124,7 +141,8 @@ void AArch64Arm64ECCallLowering::getThunkType(
void AArch64Arm64ECCallLowering::getThunkArgTypes(
FunctionType *FT, AttributeList AttrList, Arm64ECThunkType TT,
raw_ostream &Out, SmallVectorImpl<Type *> &Arm64ArgTypes,
- SmallVectorImpl<Type *> &X64ArgTypes, bool HasSretPtr) {
+ SmallVectorImpl<Type *> &X64ArgTypes,
+ SmallVectorImpl<ThunkArgTranslation> &ArgTranslations, bool HasSretPtr) {
Out << "$";
if (FT->isVarArg()) {
@@ -153,17 +171,20 @@ void AArch64Arm64ECCallLowering::getThunkArgTypes(
for (int i = HasSretPtr ? 1 : 0; i < 4; i++) {
Arm64ArgTypes.push_back(I64Ty);
X64ArgTypes.push_back(I64Ty);
+ ArgTranslations.push_back(ThunkArgTranslation::Direct);
}
// x4
Arm64ArgTypes.push_back(PtrTy);
X64ArgTypes.push_back(PtrTy);
+ ArgTranslations.push_back(ThunkArgTranslation::Direct);
// x5
Arm64ArgTypes.push_back(I64Ty);
if (TT != Arm64ECThunkType::Entry) {
// FIXME: x5 isn't actually used by the x64 side; revisit once we
// have proper isel for varargs
X64ArgTypes.push_back(I64Ty);
+ ArgTranslations.push_back(ThunkArgTranslation::Direct);
}
return;
}
@@ -187,18 +208,20 @@ void AArch64Arm64ECCallLowering::getThunkArgTypes(
uint64_t ArgSizeBytes = 0;
Align ParamAlign = Align();
#endif
- Type *Arm64Ty, *X64Ty;
- canonicalizeThunkType(FT->getParamType(I), ParamAlign,
- /*Ret*/ false, ArgSizeBytes, Out, Arm64Ty, X64Ty);
+ auto [Arm64Ty, X64Ty, ArgTranslation] =
+ canonicalizeThunkType(FT->getParamType(I), ParamAlign,
+ /*Ret*/ false, ArgSizeBytes, Out);
Arm64ArgTypes.push_back(Arm64Ty);
X64ArgTypes.push_back(X64Ty);
+ ArgTranslations.push_back(ArgTranslation);
}
}
void AArch64Arm64ECCallLowering::getThunkRetType(
FunctionType *FT, AttributeList AttrList, raw_ostream &Out,
Type *&Arm64RetTy, Type *&X64RetTy, SmallVectorImpl<Type *> &Arm64ArgTypes,
- SmallVectorImpl<Type *> &X64ArgTypes, bool &HasSretPtr) {
+ SmallVectorImpl<Type *> &X64ArgTypes,
+ SmallVector<ThunkArgTranslation> &ArgTranslations, bool &HasSretPtr) {
Type *T = FT->getReturnType();
#if 0
// FIXME: Need more information about argument size; see
@@ -240,13 +263,13 @@ void AArch64Arm64ECCallLowering::getThunkRetType(
// that's a miscompile.)
Type *SRetType = SRetAttr0.getValueAsType();
Align SRetAlign = AttrList.getParamAlignment(0).valueOrOne();
- Type *Arm64Ty, *X64Ty;
canonicalizeThunkType(SRetType, SRetAlign, /*Ret*/ true, ArgSizeBytes,
- Out, Arm64Ty, X64Ty);
+ Out);
Arm64RetTy = VoidTy;
X64RetTy = VoidTy;
Arm64ArgTypes.push_back(FT->getParamType(0));
X64ArgTypes.push_back(FT->getParamType(0));
+ ArgTranslations.push_back(ThunkArgTranslation::Direct);
HasSretPtr = true;
return;
}
@@ -258,8 +281,10 @@ void AArch64Arm64ECCallLowering::getThunkRetType(
return;
}
- canonicalizeThunkType(T, Align(), /*Ret*/ true, ArgSizeBytes, Out, Arm64RetTy,
- X64RetTy);
+ auto info =
+ canonicalizeThunkType(T, Align(), /*Ret*/ true, ArgSizeBytes, Out);
+ Arm64RetTy = info.Arm64Ty;
+ X64RetTy = info.X64Ty;
if (X64RetTy->isPointerTy()) {
// If the X64 type is canonicalized to a pointer, that means it's
// passed/returned indirectly. For a return value, that means it's an
@@ -269,21 +294,33 @@ void AArch64Arm64ECCallLowering::getThunkRetType(
}
}
-void AArch64Arm64ECCallLowering::canonicalizeThunkType(
- Type *T, Align Alignment, bool Ret, uint64_t ArgSizeBytes, raw_ostream &Out,
- Type *&Arm64Ty, Type *&X64Ty) {
+ThunkArgInfo AArch64Arm64ECCallLowering::canonicalizeThunkType(
+ Type *T, Align Alignment, bool Ret, uint64_t ArgSizeBytes,
+ raw_ostream &Out) {
+
+ auto direct = [](Type *T) {
+ return ThunkArgInfo{T, T, ThunkArgTranslation::Direct};
+ };
+
+ auto bitcast = [this](Type *Arm64Ty, uint64_t SizeInBytes) {
+ return ThunkArgInfo{Arm64Ty,
+ llvm::Type::getIntNTy(M->getContext(), SizeInBytes * 8),
+ ThunkArgTranslation::Bitcast};
+ };
+
+ auto pointerIndirection = [this](Type *Arm64Ty) {
+ return ThunkArgInfo{Arm64Ty, PtrTy,
+ ThunkArgTranslation::PointerIndirection};
+ };
+
if (T->isFloatTy()) {
Out << "f";
- Arm64Ty = T;
- X64Ty = T;
- return;
+ return direct(T);
}
if (T->isDoubleTy()) {
Out << "d";
- Arm64Ty = T;
- X64Ty = T;
- return;
+ return direct(T);
}
if (T->isFloatingPointTy()) {
@@ -306,16 +343,14 @@ void AArch64Arm64ECCallLowering::canonicalizeThunkType(
Out << (ElementTy->isFloatTy() ? "F" : "D") << TotalSizeBytes;
if (Alignment.value() >= 16 && !Ret)
Out << "a" << Alignment.value();
- Arm64Ty = T;
if (TotalSizeBytes <= 8) {
// Arm64 returns small structs of float/double in float registers;
// X64 uses RAX.
- X64Ty = llvm::Type::getIntNTy(M->getContext(), TotalSizeBytes * 8);
+ return bitcast(T, TotalSizeBytes);
} else {
// Struct is passed directly on Arm64, but indirectly on X64.
- X64Ty = PtrTy;
+ return pointerIndirection(T);
}
- return;
} else if (T->isFloatingPointTy()) {
report_fatal_error("Only 32 and 64 bit floating points are supported for "
"ARM64EC thunks");
@@ -324,9 +359,7 @@ void AArch64Arm64ECCallLowering::canonicalizeThunkType(
if ((T->isIntegerTy() || T->isPointerTy()) && DL.getTypeSizeInBits(T) <= 64) {
Out << "i8";
- Arm64Ty = I64Ty;
- X64Ty = I64Ty;
- return;
+ return direct(I64Ty);
}
unsigned TypeSize = ArgSizeBytes;
@@ -338,13 +371,12 @@ void AArch64Arm64ECCallLowering::canonicalizeThunkType(
if (Alignment.value() >= 16 && !Ret)
Out << "a" << Alignment.value();
// FIXME: Try to canonicalize Arm64Ty more thoroughly?
- Arm64Ty = T;
if (TypeSize == 1 || TypeSize == 2 || TypeSize == 4 || TypeSize == 8) {
// Pass directly in an integer register
- X64Ty = llvm::Type::getIntNTy(M->getContext(), TypeSize * 8);
+ return bitcast(T, TypeSize);
} else {
// Passed directly on Arm64, but indirectly on X64.
- X64Ty = PtrTy;
+ return pointerIndirection(T);
}
}
@@ -355,8 +387,9 @@ Function *AArch64Arm64ECCallLowering::buildExitThunk(FunctionType *FT,
SmallString<256> ExitThunkName;
llvm::raw_svector_ostream ExitThunkStream(ExitThunkName);
FunctionType *Arm64Ty, *X64Ty;
+ SmallVector<ThunkArgTranslation> ArgTranslations;
getThunkType(FT, Attrs, Arm64ECThunkType::Exit, ExitThunkStream, Arm64Ty,
- X64Ty);
+ X64Ty, ArgTranslations);
if (Function *F = M->getFunction(ExitThunkName))
return F;
@@ -387,6 +420,7 @@ Function *AArch64Arm64ECCallLowering::buildExitThunk(FunctionType *FT,
SmallVector<Value *> Args;
// Pass the called function in x9.
+ auto X64TyOffset = 1;
Args.push_back(F->arg_begin());
Type *RetTy = Arm64Ty->getReturnType();
@@ -396,10 +430,14 @@ Function *AArch64Arm64ECCallLowering::buildExitThunk(FunctionType *FT,
// pointer.
if (DL.getTypeStoreSize(RetTy) > 8) {
Args.push_back(IRB.CreateAlloca(RetTy));
+ X64TyOffset++;
}
}
- for (auto &Arg : make_range(F->arg_begin() + 1, F->arg_end())) {
+ for (auto [Arg, X64ArgType, ArgTranslation] : llvm::zip_equal(
+ make_range(F->arg_begin() + 1, F->arg_end()),
+ make_range(X64Ty->param_begin() + X64TyOffset, X64Ty->param_end()),
+ ArgTranslations)) {
// Translate arguments from AArch64 calling convention to x86 calling
// convention.
//
@@ -414,18 +452,20 @@ Function *AArch64Arm64ECCallLowering::buildExitThunk(FunctionType *FT,
// with an attribute.)
//
// The first argument is the called function, stored in x9.
- if (Arg.getType()->isArrayTy() || Arg.getType()->isStructTy() ||
- DL.getTypeStoreSize(Arg.getType()) > 8) {
+ if (ArgTranslation != ThunkArgTranslation::Direct) {
Value *Mem = IRB.CreateAlloca(Arg.getType());
IRB.CreateStore(&Arg, Mem);
- if (DL.getTypeStoreSize(Arg.getType()) <= 8) {
+ if (ArgTranslation == ThunkArgTranslation::Bitcast) {
Type *IntTy = IRB.getIntNTy(DL.getTypeStoreSizeInBits(Arg.getType()));
Args.push_back(IRB.CreateLoad(IntTy, IRB.CreateBitCast(Mem, PtrTy)));
- } else
+ } else {
+ assert(ArgTranslation == ThunkArgTranslation::PointerIndirection);
Args.push_back(Mem);
+ }
} else {
Args.push_back(&Arg);
}
+ assert(Args.back()->getType() == X64ArgType);
}
// FIXME: Transfer necessary attributes? sret? anything else?
@@ -459,8 +499,10 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) {
SmallString<256> EntryThunkName;
llvm::raw_svector_ostream EntryThunkStream(EntryThunkName);
FunctionType *Arm64Ty, *X64Ty;
+ SmallVector<ThunkArgTranslation> ArgTranslations;
getThunkType(F->getFunctionType(), F->getAttributes(),
- Arm64ECThunkType::Entry, EntryThunkStream, Arm64Ty, X64Ty);
+ Arm64ECThunkType::Entry, EntryThunkStream, Arm64Ty, X64Ty,
+ ArgTranslations);
if (Function *F = M->getFunction(EntryThunkName))
return F;
@@ -472,7 +514,6 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) {
// Copy MSVC, and always set up a frame pointer. (Maybe this isn't necessary.)
Thunk->addFnAttr("frame-pointer", "all");
- auto &DL = M->getDataLayout();
BasicBlock *BB = BasicBlock::Create(M->getContext(), "", Thunk);
IRBuilder<> IRB(BB);
@@ -481,24 +522,28 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) {
bool TransformDirectToSRet = X64RetType->isVoidTy() && !RetTy->isVoidTy();
unsigned ThunkArgOffset = TransformDirectToSRet ? 2 : 1;
- unsigned PassthroughArgSize = F->isVarArg() ? 5 : Thunk->arg_size();
+ unsigned PassthroughArgSize =
+ (F->isVarArg() ? 5 : Thunk->arg_size()) - ThunkArgOffset;
+ assert(ArgTranslations.size() == F->isVarArg() ? 5 : PassthroughArgSize);
// Translate arguments to call.
SmallVector<Value *> Args;
- for (unsigned i = ThunkArgOffset, e = PassthroughArgSize; i != e; ++i) {
- Value *Arg = Thunk->getArg(i);
- Type *ArgTy = Arm64Ty->getParamType(i - ThunkArgOffset);
- if (ArgTy->isArrayTy() || ArgTy->isStructTy() ||
- DL.getTypeStoreSize(ArgTy) > 8) {
+ for (unsigned i = 0; i != PassthroughArgSize; ++i) {
+ Value *Arg = Thunk->getArg(i + ThunkArgOffset);
+ Type *ArgTy = Arm64Ty->getParamType(i);
+ ThunkArgTranslation ArgTranslation = ArgTranslations[i];
+ if (ArgTranslation != ThunkArgTranslation::Direct) {
// Translate array/struct arguments to the expected type.
- if (DL.getTypeStoreSize(ArgTy) <= 8) {
+ if (ArgTranslation == ThunkArgTranslation::Bitcast) {
Value *CastAlloca = IRB.CreateAlloca(ArgTy);
IRB.CreateStore(Arg, IRB.CreateBitCast(CastAlloca, PtrTy));
Arg = IRB.CreateLoad(ArgTy, CastAlloca);
} else {
+ assert(ArgTranslation == ThunkArgTranslation::PointerIndirection);
Arg = IRB.CreateLoad(ArgTy, IRB.CreateBitCast(Arg, PtrTy));
}
}
+ assert(Arg->getType() == ArgTy);
Args.push_back(Arg);
}
@@ -558,8 +603,10 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) {
Function *AArch64Arm64ECCallLowering::buildGuestExitThunk(Function *F) {
llvm::raw_null_ostream NullThunkName;
FunctionType *Arm64Ty, *X64Ty;
+ SmallVector<ThunkArgTranslation> ArgTranslations;
getThunkType(F->getFunctionType(), F->getAttributes(),
- Arm64ECThunkType::GuestExit, NullThunkName, Arm64Ty, X64Ty);
+ Arm64ECThunkType::GuestExit, NullThunkName, Arm64Ty, X64Ty,
+ ArgTranslations);
auto MangledName = getArm64ECMangledFunctionName(F->getName().str());
assert(MangledName && "Can't guest exit to function that's already native");
std::string ThunkName = *MangledName;
diff --git a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
index 0cf678f..6aeeeed 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
@@ -487,6 +487,109 @@ define void @cxx_method(ptr noundef nonnull align 8 dereferenceable(8) %0, ptr d
ret void
}
+define <4 x i8> @small_vector(<4 x i8> %0) {
+; CHECK-LABEL: .def $ientry_thunk$cdecl$m$m;
+; CHECK: .section .wowthk$aa,"xr",discard,$ientry_thunk$cdecl$m$m
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #192
+; CHECK-NEXT: .seh_stackalloc 192
+; CHECK-NEXT: stp q6, q7, [sp, #16] // 32-byte Folded Spill
+; CHECK-NEXT: .seh_save_any_reg_p q6, 16
+; CHECK-NEXT: stp q8, q9, [sp, #48] // 32-byte Folded Spill
+; CHECK-NEXT: .seh_save_any_reg_p q8, 48
+; CHECK-NEXT: stp q10, q11, [sp, #80] // 32-byte Folded Spill
+; CHECK-NEXT: .seh_save_any_reg_p q10, 80
+; CHECK-NEXT: stp q12, q13, [sp, #112] // 32-byte Folded Spill
+; CHECK-NEXT: .seh_save_any_reg_p q12, 112
+; CHECK-NEXT: stp q14, q15, [sp, #144] // 32-byte Folded Spill
+; CHECK-NEXT: .seh_save_any_reg_p q14, 144
+; CHECK-NEXT: stp x29, x30, [sp, #176] // 16-byte Folded Spill
+; CHECK-NEXT: .seh_save_fplr 176
+; CHECK-NEXT: add x29, sp, #176
+; CHECK-NEXT: .seh_add_fp 176
+; CHECK-NEXT: .seh_endprologue
+; CHECK-NEXT: str w0, [sp, #12]
+; CHECK-NEXT: ldr s0, [sp, #12]
+; CHECK-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: blr x9
+; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b
+; CHECK-NEXT: adrp x9, __os_arm64x_dispatch_ret
+; CHECK-NEXT: str s0, [sp, #8]
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: ldr x0, [x9, :lo12:__os_arm64x_dispatch_ret]
+; CHECK-NEXT: .seh_startepilogue
+; CHECK-NEXT: ldp x29, x30, [sp, #176] // 16-byte Folded Reload
+; CHECK-NEXT: .seh_save_fplr 176
+; CHECK-NEXT: ldp q14, q15, [sp, #144] // 32-byte Folded Reload
+; CHECK-NEXT: .seh_save_any_reg_p q14, 144
+; CHECK-NEXT: ldp q12, q13, [sp, #112] // 32-byte Folded Reload
+; CHECK-NEXT: .seh_save_any_reg_p q12, 112
+; CHECK-NEXT: ldp q10, q11, [sp, #80] // 32-byte Folded Reload
+; CHECK-NEXT: .seh_save_any_reg_p q10, 80
+; CHECK-NEXT: ldp q8, q9, [sp, #48] // 32-byte Folded Reload
+; CHECK-NEXT: .seh_save_any_reg_p q8, 48
+; CHECK-NEXT: ldp q6, q7, [sp, #16] // 32-byte Folded Reload
+; CHECK-NEXT: .seh_save_any_reg_p q6, 16
+; CHECK-NEXT: add sp, sp, #192
+; CHECK-NEXT: .seh_stackalloc 192
+; CHECK-NEXT: .seh_endepilogue
+; CHECK-NEXT: br x0
+; CHECK-NEXT: .seh_endfunclet
+; CHECK-NEXT: .seh_endproc
+start:
+ ret <4 x i8> %0
+}
+
+define <8 x i16> @large_vector(<8 x i16> %0) {
+; CHECK-LABEL: .def $ientry_thunk$cdecl$m16$m16;
+; CHECK: .section .wowthk$aa,"xr",discard,$ientry_thunk$cdecl$m16$m16
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp q6, q7, [sp, #-192]! // 32-byte Folded Spill
+; CHECK-NEXT: .seh_save_any_reg_px q6, 192
+; CHECK-NEXT: stp q8, q9, [sp, #32] // 32-byte Folded Spill
+; CHECK-NEXT: .seh_save_any_reg_p q8, 32
+; CHECK-NEXT: stp q10, q11, [sp, #64] // 32-byte Folded Spill
+; CHECK-NEXT: .seh_save_any_reg_p q10, 64
+; CHECK-NEXT: stp q12, q13, [sp, #96] // 32-byte Folded Spill
+; CHECK-NEXT: .seh_save_any_reg_p q12, 96
+; CHECK-NEXT: stp q14, q15, [sp, #128] // 32-byte Folded Spill
+; CHECK-NEXT: .seh_save_any_reg_p q14, 128
+; CHECK-NEXT: str x19, [sp, #160] // 8-byte Folded Spill
+; CHECK-NEXT: .seh_save_reg x19, 160
+; CHECK-NEXT: stp x29, x30, [sp, #168] // 16-byte Folded Spill
+; CHECK-NEXT: .seh_save_fplr 168
+; CHECK-NEXT: add x29, sp, #168
+; CHECK-NEXT: .seh_add_fp 168
+; CHECK-NEXT: .seh_endprologue
+; CHECK-NEXT: ldr q0, [x1]
+; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: blr x9
+; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_ret
+; CHECK-NEXT: str q0, [x19]
+; CHECK-NEXT: ldr x0, [x8, :lo12:__os_arm64x_dispatch_ret]
+; CHECK-NEXT: .seh_startepilogue
+; CHECK-NEXT: ldp x29, x30, [sp, #168] // 16-byte Folded Reload
+; CHECK-NEXT: .seh_save_fplr 168
+; CHECK-NEXT: ldr x19, [sp, #160] // 8-byte Folded Reload
+; CHECK-NEXT: .seh_save_reg x19, 160
+; CHECK-NEXT: ldp q14, q15, [sp, #128] // 32-byte Folded Reload
+; CHECK-NEXT: .seh_save_any_reg_p q14, 128
+; CHECK-NEXT: ldp q12, q13, [sp, #96] // 32-byte Folded Reload
+; CHECK-NEXT: .seh_save_any_reg_p q12, 96
+; CHECK-NEXT: ldp q10, q11, [sp, #64] // 32-byte Folded Reload
+; CHECK-NEXT: .seh_save_any_reg_p q10, 64
+; CHECK-NEXT: ldp q8, q9, [sp, #32] // 32-byte Folded Reload
+; CHECK-NEXT: .seh_save_any_reg_p q8, 32
+; CHECK-NEXT: ldp q6, q7, [sp], #192 // 32-byte Folded Reload
+; CHECK-NEXT: .seh_save_any_reg_px q6, 192
+; CHECK-NEXT: .seh_endepilogue
+; CHECK-NEXT: br x0
+; CHECK-NEXT: .seh_endfunclet
+; CHECK-NEXT: .seh_endproc
+start:
+ ret <8 x i16> %0
+}
; Verify the hybrid bitmap
; CHECK-LABEL: .section .hybmp$x,"yi"
@@ -523,3 +626,9 @@ define void @cxx_method(ptr noundef nonnull align 8 dereferenceable(8) %0, ptr d
; CHECK-NEXT: .symidx "#cxx_method"
; CHECK-NEXT: .symidx $ientry_thunk$cdecl$i8$i8i8
; CHECK-NEXT: .word 1
+; CHECK-NEXT: .symidx "#small_vector"
+; CHECK-NEXT: .symidx $ientry_thunk$cdecl$m$m
+; CHECK-NEXT: .word 1
+; CHECK-NEXT: .symidx "#large_vector"
+; CHECK-NEXT: .symidx $ientry_thunk$cdecl$m16$m16
+; CHECK-NEXT: .word 1
diff --git a/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
index 7a40fcd..dcc6758 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
@@ -457,6 +457,109 @@ declare %T2 @simple_struct(%T1, %T2, %T3, %T4) nounwind;
; CHECK-NEXT: .seh_endfunclet
; CHECK-NEXT: .seh_endproc
+declare <4 x i8> @small_vector(<4 x i8> %0) nounwind;
+; CHECK-LABEL: .def $iexit_thunk$cdecl$m$m;
+; CHECK: .section .wowthk$aa,"xr",discard,$iexit_thunk$cdecl$m$m
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: .seh_stackalloc 64
+; CHECK-NEXT: stp x29, x30, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: .seh_save_fplr 48
+; CHECK-NEXT: add x29, sp, #48
+; CHECK-NEXT: .seh_add_fp 48
+; CHECK-NEXT: .seh_endprologue
+; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b
+; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_call_no_redirect
+; CHECK-NEXT: ldr x16, [x8, :lo12:__os_arm64x_dispatch_call_no_redirect]
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: stur s0, [x29, #-4]
+; CHECK-NEXT: blr x16
+; CHECK-NEXT: stur w8, [x29, #-8]
+; CHECK-NEXT: ldur s0, [x29, #-8]
+; CHECK-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: .seh_startepilogue
+; CHECK-NEXT: ldp x29, x30, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: .seh_save_fplr 48
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: .seh_stackalloc 64
+; CHECK-NEXT: .seh_endepilogue
+; CHECK-NEXT: ret
+; CHECK-NEXT: .seh_endfunclet
+; CHECK-NEXT: .seh_endproc
+; CHECK-LABEL: .def "#small_vector$exit_thunk";
+; CHECK: .section .wowthk$aa,"xr",discard,"#small_vector$exit_thunk"
+; CHECK: .weak_anti_dep small_vector
+; CHECK: .weak_anti_dep "#small_vector"
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .seh_save_reg_x x30, 16
+; CHECK-NEXT: .seh_endprologue
+; CHECK-NEXT: adrp x8, __os_arm64x_check_icall
+; CHECK-NEXT: adrp x11, small_vector
+; CHECK-NEXT: add x11, x11, :lo12:small_vector
+; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall]
+; CHECK-NEXT: adrp x10, ($iexit_thunk$cdecl$m$m)
+; CHECK-NEXT: add x10, x10, :lo12:($iexit_thunk$cdecl$m$m)
+; CHECK-NEXT: blr x8
+; CHECK-NEXT: .seh_startepilogue
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: .seh_save_reg_x x30, 16
+; CHECK-NEXT: .seh_endepilogue
+; CHECK-NEXT: br x11
+; CHECK-NEXT: .seh_endfunclet
+; CHECK-NEXT: .seh_endproc
+
+declare <8 x i16> @large_vector(<8 x i16> %0) nounwind;
+; CHECK-LABEL: .def $iexit_thunk$cdecl$m16$m16;
+; CHECK: .section .wowthk$aa,"xr",discard,$iexit_thunk$cdecl$m16$m16
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #80
+; CHECK-NEXT: .seh_stackalloc 80
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: .seh_save_fplr 64
+; CHECK-NEXT: add x29, sp, #64
+; CHECK-NEXT: .seh_add_fp 64
+; CHECK-NEXT: .seh_endprologue
+; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_call_no_redirect
+; CHECK-NEXT: sub x0, x29, #16
+; CHECK-NEXT: add x1, sp, #32
+; CHECK-NEXT: ldr x16, [x8, :lo12:__os_arm64x_dispatch_call_no_redirect]
+; CHECK-NEXT: str q0, [sp, #32]
+; CHECK-NEXT: blr x16
+; CHECK-NEXT: ldur q0, [x29, #-16]
+; CHECK-NEXT: .seh_startepilogue
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: .seh_save_fplr 64
+; CHECK-NEXT: add sp, sp, #80
+; CHECK-NEXT: .seh_stackalloc 80
+; CHECK-NEXT: .seh_endepilogue
+; CHECK-NEXT: ret
+; CHECK-NEXT: .seh_endfunclet
+; CHECK-NEXT: .seh_endproc
+; CHECK-LABEL: .def "#large_vector$exit_thunk";
+; CHECK: .section .wowthk$aa,"xr",discard,"#large_vector$exit_thunk"
+; CHECK: .weak_anti_dep large_vector
+; CHECK: .weak_anti_dep "#large_vector"
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .seh_save_reg_x x30, 16
+; CHECK-NEXT: .seh_endprologue
+; CHECK-NEXT: adrp x8, __os_arm64x_check_icall
+; CHECK-NEXT: adrp x11, large_vector
+; CHECK-NEXT: add x11, x11, :lo12:large_vector
+; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall]
+; CHECK-NEXT: adrp x10, ($iexit_thunk$cdecl$m16$m16)
+; CHECK-NEXT: add x10, x10, :lo12:($iexit_thunk$cdecl$m16$m16)
+; CHECK-NEXT: blr x8
+; CHECK-NEXT: .seh_startepilogue
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: .seh_save_reg_x x30, 16
+; CHECK-NEXT: .seh_endepilogue
+; CHECK-NEXT: br x11
+; CHECK-NEXT: .seh_endfunclet
+; CHECK-NEXT: .seh_endproc
+
; CHECK-LABEL: .section .hybmp$x,"yi"
; CHECK-NEXT: .symidx "#func_caller"
; CHECK-NEXT: .symidx $ientry_thunk$cdecl$v$v
@@ -515,6 +618,18 @@ declare %T2 @simple_struct(%T1, %T2, %T3, %T4) nounwind;
; CHECK-NEXT: .symidx "#simple_struct$exit_thunk"
; CHECK-NEXT: .symidx simple_struct
; CHECK-NEXT: .word 0
+; CHECK-NEXT: .symidx small_vector
+; CHECK-NEXT: .symidx $iexit_thunk$cdecl$m$m
+; CHECK-NEXT: .word 4
+; CHECK-NEXT: .symidx "#small_vector$exit_thunk"
+; CHECK-NEXT: .symidx small_vector
+; CHECK-NEXT: .word 0
+; CHECK-NEXT: .symidx large_vector
+; CHECK-NEXT: .symidx $iexit_thunk$cdecl$m16$m16
+; CHECK-NEXT: .word 4
+; CHECK-NEXT: .symidx "#large_vector$exit_thunk"
+; CHECK-NEXT: .symidx large_vector
+; CHECK-NEXT: .word 0
define void @func_caller() nounwind {
call void @no_op()
@@ -529,5 +644,7 @@ define void @func_caller() nounwind {
call [2 x i8] @small_array([2 x i8] [i8 0, i8 0], [2 x float] [float 0.0, float 0.0])
call [3 x i64] @large_array([3 x i64] [i64 0, i64 0, i64 0], [2 x double] [double 0.0, double 0.0], [2 x [2 x i64]] [[2 x i64] [i64 0, i64 0], [2 x i64] [i64 0, i64 0]])
call %T2 @simple_struct(%T1 { i16 0 }, %T2 { i32 0, float 0.0 }, %T3 { i64 0, double 0.0 }, %T4 { i64 0, double 0.0, i8 0 })
+ call <4 x i8> @small_vector(<4 x i8> <i8 0, i8 0, i8 0, i8 0>)
+ call <8 x i16> @large_vector(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
ret void
}