diff options
author | Daniel Paoliello <danpao@microsoft.com> | 2024-06-22 11:01:36 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-22 11:01:36 -0700 |
commit | 2c9c22c6e295b1176225b63ae4cbbceb216da55e (patch) | |
tree | 40d9f755574894faff45e608fd9e5cad30ea595f | |
parent | 8fa4fe1f995a9bc85666d63e84c094f9a09686b5 (diff) | |
download | llvm-2c9c22c6e295b1176225b63ae4cbbceb216da55e.zip llvm-2c9c22c6e295b1176225b63ae4cbbceb216da55e.tar.gz llvm-2c9c22c6e295b1176225b63ae4cbbceb216da55e.tar.bz2 |
[ARM64EC] Fix thunks for vector args (#96003)
The checks when building a thunk to decide if an arg needed to be cast
to/from an integer or redirected via a pointer didn't match how arg
types were changed in `canonicalizeThunkType`, this caused LLVM to ICE
when using vector types as args due to incorrect types in a call
instruction.
Instead of duplicating these checks, we should check if the arg type
differs between x64 and AArch64 and then cast or redirect as
appropriate.
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp | 153 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll | 109 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll | 117 |
3 files changed, 326 insertions, 53 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp index 218201f..f2c38b0 100644 --- a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp @@ -46,6 +46,18 @@ static cl::opt<bool> GenerateThunks("arm64ec-generate-thunks", cl::Hidden, namespace { +enum ThunkArgTranslation : uint8_t { + Direct, + Bitcast, + PointerIndirection, +}; + +struct ThunkArgInfo { + Type *Arm64Ty; + Type *X64Ty; + ThunkArgTranslation Translation; +}; + class AArch64Arm64ECCallLowering : public ModulePass { public: static char ID; @@ -74,25 +86,30 @@ private: void getThunkType(FunctionType *FT, AttributeList AttrList, Arm64ECThunkType TT, raw_ostream &Out, - FunctionType *&Arm64Ty, FunctionType *&X64Ty); + FunctionType *&Arm64Ty, FunctionType *&X64Ty, + SmallVector<ThunkArgTranslation> &ArgTranslations); void getThunkRetType(FunctionType *FT, AttributeList AttrList, raw_ostream &Out, Type *&Arm64RetTy, Type *&X64RetTy, SmallVectorImpl<Type *> &Arm64ArgTypes, - SmallVectorImpl<Type *> &X64ArgTypes, bool &HasSretPtr); + SmallVectorImpl<Type *> &X64ArgTypes, + SmallVector<ThunkArgTranslation> &ArgTranslations, + bool &HasSretPtr); void getThunkArgTypes(FunctionType *FT, AttributeList AttrList, Arm64ECThunkType TT, raw_ostream &Out, SmallVectorImpl<Type *> &Arm64ArgTypes, - SmallVectorImpl<Type *> &X64ArgTypes, bool HasSretPtr); - void canonicalizeThunkType(Type *T, Align Alignment, bool Ret, - uint64_t ArgSizeBytes, raw_ostream &Out, - Type *&Arm64Ty, Type *&X64Ty); + SmallVectorImpl<Type *> &X64ArgTypes, + SmallVectorImpl<ThunkArgTranslation> &ArgTranslations, + bool HasSretPtr); + ThunkArgInfo canonicalizeThunkType(Type *T, Align Alignment, bool Ret, + uint64_t ArgSizeBytes, raw_ostream &Out); }; } // end anonymous namespace void AArch64Arm64ECCallLowering::getThunkType( FunctionType *FT, AttributeList AttrList, Arm64ECThunkType TT, - raw_ostream &Out, FunctionType *&Arm64Ty, FunctionType *&X64Ty) { + raw_ostream &Out, FunctionType *&Arm64Ty, FunctionType *&X64Ty, + SmallVector<ThunkArgTranslation> &ArgTranslations) { Out << (TT == Arm64ECThunkType::Entry ? "$ientry_thunk$cdecl$" : "$iexit_thunk$cdecl$"); @@ -111,10 +128,10 @@ void AArch64Arm64ECCallLowering::getThunkType( bool HasSretPtr = false; getThunkRetType(FT, AttrList, Out, Arm64RetTy, X64RetTy, Arm64ArgTypes, - X64ArgTypes, HasSretPtr); + X64ArgTypes, ArgTranslations, HasSretPtr); getThunkArgTypes(FT, AttrList, TT, Out, Arm64ArgTypes, X64ArgTypes, - HasSretPtr); + ArgTranslations, HasSretPtr); Arm64Ty = FunctionType::get(Arm64RetTy, Arm64ArgTypes, false); @@ -124,7 +141,8 @@ void AArch64Arm64ECCallLowering::getThunkType( void AArch64Arm64ECCallLowering::getThunkArgTypes( FunctionType *FT, AttributeList AttrList, Arm64ECThunkType TT, raw_ostream &Out, SmallVectorImpl<Type *> &Arm64ArgTypes, - SmallVectorImpl<Type *> &X64ArgTypes, bool HasSretPtr) { + SmallVectorImpl<Type *> &X64ArgTypes, + SmallVectorImpl<ThunkArgTranslation> &ArgTranslations, bool HasSretPtr) { Out << "$"; if (FT->isVarArg()) { @@ -153,17 +171,20 @@ void AArch64Arm64ECCallLowering::getThunkArgTypes( for (int i = HasSretPtr ? 1 : 0; i < 4; i++) { Arm64ArgTypes.push_back(I64Ty); X64ArgTypes.push_back(I64Ty); + ArgTranslations.push_back(ThunkArgTranslation::Direct); } // x4 Arm64ArgTypes.push_back(PtrTy); X64ArgTypes.push_back(PtrTy); + ArgTranslations.push_back(ThunkArgTranslation::Direct); // x5 Arm64ArgTypes.push_back(I64Ty); if (TT != Arm64ECThunkType::Entry) { // FIXME: x5 isn't actually used by the x64 side; revisit once we // have proper isel for varargs X64ArgTypes.push_back(I64Ty); + ArgTranslations.push_back(ThunkArgTranslation::Direct); } return; } @@ -187,18 +208,20 @@ void AArch64Arm64ECCallLowering::getThunkArgTypes( uint64_t ArgSizeBytes = 0; Align ParamAlign = Align(); #endif - Type *Arm64Ty, *X64Ty; - canonicalizeThunkType(FT->getParamType(I), ParamAlign, - /*Ret*/ false, ArgSizeBytes, Out, Arm64Ty, X64Ty); + auto [Arm64Ty, X64Ty, ArgTranslation] = + canonicalizeThunkType(FT->getParamType(I), ParamAlign, + /*Ret*/ false, ArgSizeBytes, Out); Arm64ArgTypes.push_back(Arm64Ty); X64ArgTypes.push_back(X64Ty); + ArgTranslations.push_back(ArgTranslation); } } void AArch64Arm64ECCallLowering::getThunkRetType( FunctionType *FT, AttributeList AttrList, raw_ostream &Out, Type *&Arm64RetTy, Type *&X64RetTy, SmallVectorImpl<Type *> &Arm64ArgTypes, - SmallVectorImpl<Type *> &X64ArgTypes, bool &HasSretPtr) { + SmallVectorImpl<Type *> &X64ArgTypes, + SmallVector<ThunkArgTranslation> &ArgTranslations, bool &HasSretPtr) { Type *T = FT->getReturnType(); #if 0 // FIXME: Need more information about argument size; see @@ -240,13 +263,13 @@ void AArch64Arm64ECCallLowering::getThunkRetType( // that's a miscompile.) Type *SRetType = SRetAttr0.getValueAsType(); Align SRetAlign = AttrList.getParamAlignment(0).valueOrOne(); - Type *Arm64Ty, *X64Ty; canonicalizeThunkType(SRetType, SRetAlign, /*Ret*/ true, ArgSizeBytes, - Out, Arm64Ty, X64Ty); + Out); Arm64RetTy = VoidTy; X64RetTy = VoidTy; Arm64ArgTypes.push_back(FT->getParamType(0)); X64ArgTypes.push_back(FT->getParamType(0)); + ArgTranslations.push_back(ThunkArgTranslation::Direct); HasSretPtr = true; return; } @@ -258,8 +281,10 @@ void AArch64Arm64ECCallLowering::getThunkRetType( return; } - canonicalizeThunkType(T, Align(), /*Ret*/ true, ArgSizeBytes, Out, Arm64RetTy, - X64RetTy); + auto info = + canonicalizeThunkType(T, Align(), /*Ret*/ true, ArgSizeBytes, Out); + Arm64RetTy = info.Arm64Ty; + X64RetTy = info.X64Ty; if (X64RetTy->isPointerTy()) { // If the X64 type is canonicalized to a pointer, that means it's // passed/returned indirectly. For a return value, that means it's an @@ -269,21 +294,33 @@ void AArch64Arm64ECCallLowering::getThunkRetType( } } -void AArch64Arm64ECCallLowering::canonicalizeThunkType( - Type *T, Align Alignment, bool Ret, uint64_t ArgSizeBytes, raw_ostream &Out, - Type *&Arm64Ty, Type *&X64Ty) { +ThunkArgInfo AArch64Arm64ECCallLowering::canonicalizeThunkType( + Type *T, Align Alignment, bool Ret, uint64_t ArgSizeBytes, + raw_ostream &Out) { + + auto direct = [](Type *T) { + return ThunkArgInfo{T, T, ThunkArgTranslation::Direct}; + }; + + auto bitcast = [this](Type *Arm64Ty, uint64_t SizeInBytes) { + return ThunkArgInfo{Arm64Ty, + llvm::Type::getIntNTy(M->getContext(), SizeInBytes * 8), + ThunkArgTranslation::Bitcast}; + }; + + auto pointerIndirection = [this](Type *Arm64Ty) { + return ThunkArgInfo{Arm64Ty, PtrTy, + ThunkArgTranslation::PointerIndirection}; + }; + if (T->isFloatTy()) { Out << "f"; - Arm64Ty = T; - X64Ty = T; - return; + return direct(T); } if (T->isDoubleTy()) { Out << "d"; - Arm64Ty = T; - X64Ty = T; - return; + return direct(T); } if (T->isFloatingPointTy()) { @@ -306,16 +343,14 @@ void AArch64Arm64ECCallLowering::canonicalizeThunkType( Out << (ElementTy->isFloatTy() ? "F" : "D") << TotalSizeBytes; if (Alignment.value() >= 16 && !Ret) Out << "a" << Alignment.value(); - Arm64Ty = T; if (TotalSizeBytes <= 8) { // Arm64 returns small structs of float/double in float registers; // X64 uses RAX. - X64Ty = llvm::Type::getIntNTy(M->getContext(), TotalSizeBytes * 8); + return bitcast(T, TotalSizeBytes); } else { // Struct is passed directly on Arm64, but indirectly on X64. - X64Ty = PtrTy; + return pointerIndirection(T); } - return; } else if (T->isFloatingPointTy()) { report_fatal_error("Only 32 and 64 bit floating points are supported for " "ARM64EC thunks"); @@ -324,9 +359,7 @@ void AArch64Arm64ECCallLowering::canonicalizeThunkType( if ((T->isIntegerTy() || T->isPointerTy()) && DL.getTypeSizeInBits(T) <= 64) { Out << "i8"; - Arm64Ty = I64Ty; - X64Ty = I64Ty; - return; + return direct(I64Ty); } unsigned TypeSize = ArgSizeBytes; @@ -338,13 +371,12 @@ void AArch64Arm64ECCallLowering::canonicalizeThunkType( if (Alignment.value() >= 16 && !Ret) Out << "a" << Alignment.value(); // FIXME: Try to canonicalize Arm64Ty more thoroughly? - Arm64Ty = T; if (TypeSize == 1 || TypeSize == 2 || TypeSize == 4 || TypeSize == 8) { // Pass directly in an integer register - X64Ty = llvm::Type::getIntNTy(M->getContext(), TypeSize * 8); + return bitcast(T, TypeSize); } else { // Passed directly on Arm64, but indirectly on X64. - X64Ty = PtrTy; + return pointerIndirection(T); } } @@ -355,8 +387,9 @@ Function *AArch64Arm64ECCallLowering::buildExitThunk(FunctionType *FT, SmallString<256> ExitThunkName; llvm::raw_svector_ostream ExitThunkStream(ExitThunkName); FunctionType *Arm64Ty, *X64Ty; + SmallVector<ThunkArgTranslation> ArgTranslations; getThunkType(FT, Attrs, Arm64ECThunkType::Exit, ExitThunkStream, Arm64Ty, - X64Ty); + X64Ty, ArgTranslations); if (Function *F = M->getFunction(ExitThunkName)) return F; @@ -387,6 +420,7 @@ Function *AArch64Arm64ECCallLowering::buildExitThunk(FunctionType *FT, SmallVector<Value *> Args; // Pass the called function in x9. + auto X64TyOffset = 1; Args.push_back(F->arg_begin()); Type *RetTy = Arm64Ty->getReturnType(); @@ -396,10 +430,14 @@ Function *AArch64Arm64ECCallLowering::buildExitThunk(FunctionType *FT, // pointer. if (DL.getTypeStoreSize(RetTy) > 8) { Args.push_back(IRB.CreateAlloca(RetTy)); + X64TyOffset++; } } - for (auto &Arg : make_range(F->arg_begin() + 1, F->arg_end())) { + for (auto [Arg, X64ArgType, ArgTranslation] : llvm::zip_equal( + make_range(F->arg_begin() + 1, F->arg_end()), + make_range(X64Ty->param_begin() + X64TyOffset, X64Ty->param_end()), + ArgTranslations)) { // Translate arguments from AArch64 calling convention to x86 calling // convention. // @@ -414,18 +452,20 @@ Function *AArch64Arm64ECCallLowering::buildExitThunk(FunctionType *FT, // with an attribute.) // // The first argument is the called function, stored in x9. - if (Arg.getType()->isArrayTy() || Arg.getType()->isStructTy() || - DL.getTypeStoreSize(Arg.getType()) > 8) { + if (ArgTranslation != ThunkArgTranslation::Direct) { Value *Mem = IRB.CreateAlloca(Arg.getType()); IRB.CreateStore(&Arg, Mem); - if (DL.getTypeStoreSize(Arg.getType()) <= 8) { + if (ArgTranslation == ThunkArgTranslation::Bitcast) { Type *IntTy = IRB.getIntNTy(DL.getTypeStoreSizeInBits(Arg.getType())); Args.push_back(IRB.CreateLoad(IntTy, IRB.CreateBitCast(Mem, PtrTy))); - } else + } else { + assert(ArgTranslation == ThunkArgTranslation::PointerIndirection); Args.push_back(Mem); + } } else { Args.push_back(&Arg); } + assert(Args.back()->getType() == X64ArgType); } // FIXME: Transfer necessary attributes? sret? anything else? @@ -459,8 +499,10 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) { SmallString<256> EntryThunkName; llvm::raw_svector_ostream EntryThunkStream(EntryThunkName); FunctionType *Arm64Ty, *X64Ty; + SmallVector<ThunkArgTranslation> ArgTranslations; getThunkType(F->getFunctionType(), F->getAttributes(), - Arm64ECThunkType::Entry, EntryThunkStream, Arm64Ty, X64Ty); + Arm64ECThunkType::Entry, EntryThunkStream, Arm64Ty, X64Ty, + ArgTranslations); if (Function *F = M->getFunction(EntryThunkName)) return F; @@ -472,7 +514,6 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) { // Copy MSVC, and always set up a frame pointer. (Maybe this isn't necessary.) Thunk->addFnAttr("frame-pointer", "all"); - auto &DL = M->getDataLayout(); BasicBlock *BB = BasicBlock::Create(M->getContext(), "", Thunk); IRBuilder<> IRB(BB); @@ -481,24 +522,28 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) { bool TransformDirectToSRet = X64RetType->isVoidTy() && !RetTy->isVoidTy(); unsigned ThunkArgOffset = TransformDirectToSRet ? 2 : 1; - unsigned PassthroughArgSize = F->isVarArg() ? 5 : Thunk->arg_size(); + unsigned PassthroughArgSize = + (F->isVarArg() ? 5 : Thunk->arg_size()) - ThunkArgOffset; + assert(ArgTranslations.size() == F->isVarArg() ? 5 : PassthroughArgSize); // Translate arguments to call. SmallVector<Value *> Args; - for (unsigned i = ThunkArgOffset, e = PassthroughArgSize; i != e; ++i) { - Value *Arg = Thunk->getArg(i); - Type *ArgTy = Arm64Ty->getParamType(i - ThunkArgOffset); - if (ArgTy->isArrayTy() || ArgTy->isStructTy() || - DL.getTypeStoreSize(ArgTy) > 8) { + for (unsigned i = 0; i != PassthroughArgSize; ++i) { + Value *Arg = Thunk->getArg(i + ThunkArgOffset); + Type *ArgTy = Arm64Ty->getParamType(i); + ThunkArgTranslation ArgTranslation = ArgTranslations[i]; + if (ArgTranslation != ThunkArgTranslation::Direct) { // Translate array/struct arguments to the expected type. - if (DL.getTypeStoreSize(ArgTy) <= 8) { + if (ArgTranslation == ThunkArgTranslation::Bitcast) { Value *CastAlloca = IRB.CreateAlloca(ArgTy); IRB.CreateStore(Arg, IRB.CreateBitCast(CastAlloca, PtrTy)); Arg = IRB.CreateLoad(ArgTy, CastAlloca); } else { + assert(ArgTranslation == ThunkArgTranslation::PointerIndirection); Arg = IRB.CreateLoad(ArgTy, IRB.CreateBitCast(Arg, PtrTy)); } } + assert(Arg->getType() == ArgTy); Args.push_back(Arg); } @@ -558,8 +603,10 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) { Function *AArch64Arm64ECCallLowering::buildGuestExitThunk(Function *F) { llvm::raw_null_ostream NullThunkName; FunctionType *Arm64Ty, *X64Ty; + SmallVector<ThunkArgTranslation> ArgTranslations; getThunkType(F->getFunctionType(), F->getAttributes(), - Arm64ECThunkType::GuestExit, NullThunkName, Arm64Ty, X64Ty); + Arm64ECThunkType::GuestExit, NullThunkName, Arm64Ty, X64Ty, + ArgTranslations); auto MangledName = getArm64ECMangledFunctionName(F->getName().str()); assert(MangledName && "Can't guest exit to function that's already native"); std::string ThunkName = *MangledName; diff --git a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll index 0cf678f..6aeeeed 100644 --- a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll +++ b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll @@ -487,6 +487,109 @@ define void @cxx_method(ptr noundef nonnull align 8 dereferenceable(8) %0, ptr d ret void } +define <4 x i8> @small_vector(<4 x i8> %0) { +; CHECK-LABEL: .def $ientry_thunk$cdecl$m$m; +; CHECK: .section .wowthk$aa,"xr",discard,$ientry_thunk$cdecl$m$m +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #192 +; CHECK-NEXT: .seh_stackalloc 192 +; CHECK-NEXT: stp q6, q7, [sp, #16] // 32-byte Folded Spill +; CHECK-NEXT: .seh_save_any_reg_p q6, 16 +; CHECK-NEXT: stp q8, q9, [sp, #48] // 32-byte Folded Spill +; CHECK-NEXT: .seh_save_any_reg_p q8, 48 +; CHECK-NEXT: stp q10, q11, [sp, #80] // 32-byte Folded Spill +; CHECK-NEXT: .seh_save_any_reg_p q10, 80 +; CHECK-NEXT: stp q12, q13, [sp, #112] // 32-byte Folded Spill +; CHECK-NEXT: .seh_save_any_reg_p q12, 112 +; CHECK-NEXT: stp q14, q15, [sp, #144] // 32-byte Folded Spill +; CHECK-NEXT: .seh_save_any_reg_p q14, 144 +; CHECK-NEXT: stp x29, x30, [sp, #176] // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_fplr 176 +; CHECK-NEXT: add x29, sp, #176 +; CHECK-NEXT: .seh_add_fp 176 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: str w0, [sp, #12] +; CHECK-NEXT: ldr s0, [sp, #12] +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: blr x9 +; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-NEXT: adrp x9, __os_arm64x_dispatch_ret +; CHECK-NEXT: str s0, [sp, #8] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: ldr x0, [x9, :lo12:__os_arm64x_dispatch_ret] +; CHECK-NEXT: .seh_startepilogue +; CHECK-NEXT: ldp x29, x30, [sp, #176] // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_fplr 176 +; CHECK-NEXT: ldp q14, q15, [sp, #144] // 32-byte Folded Reload +; CHECK-NEXT: .seh_save_any_reg_p q14, 144 +; CHECK-NEXT: ldp q12, q13, [sp, #112] // 32-byte Folded Reload +; CHECK-NEXT: .seh_save_any_reg_p q12, 112 +; CHECK-NEXT: ldp q10, q11, [sp, #80] // 32-byte Folded Reload +; CHECK-NEXT: .seh_save_any_reg_p q10, 80 +; CHECK-NEXT: ldp q8, q9, [sp, #48] // 32-byte Folded Reload +; CHECK-NEXT: .seh_save_any_reg_p q8, 48 +; CHECK-NEXT: ldp q6, q7, [sp, #16] // 32-byte Folded Reload +; CHECK-NEXT: .seh_save_any_reg_p q6, 16 +; CHECK-NEXT: add sp, sp, #192 +; CHECK-NEXT: .seh_stackalloc 192 +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: br x0 +; CHECK-NEXT: .seh_endfunclet +; CHECK-NEXT: .seh_endproc +start: + ret <4 x i8> %0 +} + +define <8 x i16> @large_vector(<8 x i16> %0) { +; CHECK-LABEL: .def $ientry_thunk$cdecl$m16$m16; +; CHECK: .section .wowthk$aa,"xr",discard,$ientry_thunk$cdecl$m16$m16 +; CHECK: // %bb.0: +; CHECK-NEXT: stp q6, q7, [sp, #-192]! // 32-byte Folded Spill +; CHECK-NEXT: .seh_save_any_reg_px q6, 192 +; CHECK-NEXT: stp q8, q9, [sp, #32] // 32-byte Folded Spill +; CHECK-NEXT: .seh_save_any_reg_p q8, 32 +; CHECK-NEXT: stp q10, q11, [sp, #64] // 32-byte Folded Spill +; CHECK-NEXT: .seh_save_any_reg_p q10, 64 +; CHECK-NEXT: stp q12, q13, [sp, #96] // 32-byte Folded Spill +; CHECK-NEXT: .seh_save_any_reg_p q12, 96 +; CHECK-NEXT: stp q14, q15, [sp, #128] // 32-byte Folded Spill +; CHECK-NEXT: .seh_save_any_reg_p q14, 128 +; CHECK-NEXT: str x19, [sp, #160] // 8-byte Folded Spill +; CHECK-NEXT: .seh_save_reg x19, 160 +; CHECK-NEXT: stp x29, x30, [sp, #168] // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_fplr 168 +; CHECK-NEXT: add x29, sp, #168 +; CHECK-NEXT: .seh_add_fp 168 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: ldr q0, [x1] +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: blr x9 +; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_ret +; CHECK-NEXT: str q0, [x19] +; CHECK-NEXT: ldr x0, [x8, :lo12:__os_arm64x_dispatch_ret] +; CHECK-NEXT: .seh_startepilogue +; CHECK-NEXT: ldp x29, x30, [sp, #168] // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_fplr 168 +; CHECK-NEXT: ldr x19, [sp, #160] // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg x19, 160 +; CHECK-NEXT: ldp q14, q15, [sp, #128] // 32-byte Folded Reload +; CHECK-NEXT: .seh_save_any_reg_p q14, 128 +; CHECK-NEXT: ldp q12, q13, [sp, #96] // 32-byte Folded Reload +; CHECK-NEXT: .seh_save_any_reg_p q12, 96 +; CHECK-NEXT: ldp q10, q11, [sp, #64] // 32-byte Folded Reload +; CHECK-NEXT: .seh_save_any_reg_p q10, 64 +; CHECK-NEXT: ldp q8, q9, [sp, #32] // 32-byte Folded Reload +; CHECK-NEXT: .seh_save_any_reg_p q8, 32 +; CHECK-NEXT: ldp q6, q7, [sp], #192 // 32-byte Folded Reload +; CHECK-NEXT: .seh_save_any_reg_px q6, 192 +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: br x0 +; CHECK-NEXT: .seh_endfunclet +; CHECK-NEXT: .seh_endproc +start: + ret <8 x i16> %0 +} ; Verify the hybrid bitmap ; CHECK-LABEL: .section .hybmp$x,"yi" @@ -523,3 +626,9 @@ define void @cxx_method(ptr noundef nonnull align 8 dereferenceable(8) %0, ptr d ; CHECK-NEXT: .symidx "#cxx_method" ; CHECK-NEXT: .symidx $ientry_thunk$cdecl$i8$i8i8 ; CHECK-NEXT: .word 1 +; CHECK-NEXT: .symidx "#small_vector" +; CHECK-NEXT: .symidx $ientry_thunk$cdecl$m$m +; CHECK-NEXT: .word 1 +; CHECK-NEXT: .symidx "#large_vector" +; CHECK-NEXT: .symidx $ientry_thunk$cdecl$m16$m16 +; CHECK-NEXT: .word 1 diff --git a/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll index 7a40fcd..dcc6758 100644 --- a/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll +++ b/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll @@ -457,6 +457,109 @@ declare %T2 @simple_struct(%T1, %T2, %T3, %T4) nounwind; ; CHECK-NEXT: .seh_endfunclet ; CHECK-NEXT: .seh_endproc +declare <4 x i8> @small_vector(<4 x i8> %0) nounwind; +; CHECK-LABEL: .def $iexit_thunk$cdecl$m$m; +; CHECK: .section .wowthk$aa,"xr",discard,$iexit_thunk$cdecl$m$m +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: .seh_stackalloc 64 +; CHECK-NEXT: stp x29, x30, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_fplr 48 +; CHECK-NEXT: add x29, sp, #48 +; CHECK-NEXT: .seh_add_fp 48 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_call_no_redirect +; CHECK-NEXT: ldr x16, [x8, :lo12:__os_arm64x_dispatch_call_no_redirect] +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: stur s0, [x29, #-4] +; CHECK-NEXT: blr x16 +; CHECK-NEXT: stur w8, [x29, #-8] +; CHECK-NEXT: ldur s0, [x29, #-8] +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: .seh_startepilogue +; CHECK-NEXT: ldp x29, x30, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_fplr 48 +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: .seh_stackalloc 64 +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: ret +; CHECK-NEXT: .seh_endfunclet +; CHECK-NEXT: .seh_endproc +; CHECK-LABEL: .def "#small_vector$exit_thunk"; +; CHECK: .section .wowthk$aa,"xr",discard,"#small_vector$exit_thunk" +; CHECK: .weak_anti_dep small_vector +; CHECK: .weak_anti_dep "#small_vector" +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .seh_save_reg_x x30, 16 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: adrp x8, __os_arm64x_check_icall +; CHECK-NEXT: adrp x11, small_vector +; CHECK-NEXT: add x11, x11, :lo12:small_vector +; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall] +; CHECK-NEXT: adrp x10, ($iexit_thunk$cdecl$m$m) +; CHECK-NEXT: add x10, x10, :lo12:($iexit_thunk$cdecl$m$m) +; CHECK-NEXT: blr x8 +; CHECK-NEXT: .seh_startepilogue +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg_x x30, 16 +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: br x11 +; CHECK-NEXT: .seh_endfunclet +; CHECK-NEXT: .seh_endproc + +declare <8 x i16> @large_vector(<8 x i16> %0) nounwind; +; CHECK-LABEL: .def $iexit_thunk$cdecl$m16$m16; +; CHECK: .section .wowthk$aa,"xr",discard,$iexit_thunk$cdecl$m16$m16 +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #80 +; CHECK-NEXT: .seh_stackalloc 80 +; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_fplr 64 +; CHECK-NEXT: add x29, sp, #64 +; CHECK-NEXT: .seh_add_fp 64 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_call_no_redirect +; CHECK-NEXT: sub x0, x29, #16 +; CHECK-NEXT: add x1, sp, #32 +; CHECK-NEXT: ldr x16, [x8, :lo12:__os_arm64x_dispatch_call_no_redirect] +; CHECK-NEXT: str q0, [sp, #32] +; CHECK-NEXT: blr x16 +; CHECK-NEXT: ldur q0, [x29, #-16] +; CHECK-NEXT: .seh_startepilogue +; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_fplr 64 +; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: .seh_stackalloc 80 +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: ret +; CHECK-NEXT: .seh_endfunclet +; CHECK-NEXT: .seh_endproc +; CHECK-LABEL: .def "#large_vector$exit_thunk"; +; CHECK: .section .wowthk$aa,"xr",discard,"#large_vector$exit_thunk" +; CHECK: .weak_anti_dep large_vector +; CHECK: .weak_anti_dep "#large_vector" +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .seh_save_reg_x x30, 16 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: adrp x8, __os_arm64x_check_icall +; CHECK-NEXT: adrp x11, large_vector +; CHECK-NEXT: add x11, x11, :lo12:large_vector +; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall] +; CHECK-NEXT: adrp x10, ($iexit_thunk$cdecl$m16$m16) +; CHECK-NEXT: add x10, x10, :lo12:($iexit_thunk$cdecl$m16$m16) +; CHECK-NEXT: blr x8 +; CHECK-NEXT: .seh_startepilogue +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg_x x30, 16 +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: br x11 +; CHECK-NEXT: .seh_endfunclet +; CHECK-NEXT: .seh_endproc + ; CHECK-LABEL: .section .hybmp$x,"yi" ; CHECK-NEXT: .symidx "#func_caller" ; CHECK-NEXT: .symidx $ientry_thunk$cdecl$v$v @@ -515,6 +618,18 @@ declare %T2 @simple_struct(%T1, %T2, %T3, %T4) nounwind; ; CHECK-NEXT: .symidx "#simple_struct$exit_thunk" ; CHECK-NEXT: .symidx simple_struct ; CHECK-NEXT: .word 0 +; CHECK-NEXT: .symidx small_vector +; CHECK-NEXT: .symidx $iexit_thunk$cdecl$m$m +; CHECK-NEXT: .word 4 +; CHECK-NEXT: .symidx "#small_vector$exit_thunk" +; CHECK-NEXT: .symidx small_vector +; CHECK-NEXT: .word 0 +; CHECK-NEXT: .symidx large_vector +; CHECK-NEXT: .symidx $iexit_thunk$cdecl$m16$m16 +; CHECK-NEXT: .word 4 +; CHECK-NEXT: .symidx "#large_vector$exit_thunk" +; CHECK-NEXT: .symidx large_vector +; CHECK-NEXT: .word 0 define void @func_caller() nounwind { call void @no_op() @@ -529,5 +644,7 @@ define void @func_caller() nounwind { call [2 x i8] @small_array([2 x i8] [i8 0, i8 0], [2 x float] [float 0.0, float 0.0]) call [3 x i64] @large_array([3 x i64] [i64 0, i64 0, i64 0], [2 x double] [double 0.0, double 0.0], [2 x [2 x i64]] [[2 x i64] [i64 0, i64 0], [2 x i64] [i64 0, i64 0]]) call %T2 @simple_struct(%T1 { i16 0 }, %T2 { i32 0, float 0.0 }, %T3 { i64 0, double 0.0 }, %T4 { i64 0, double 0.0, i8 0 }) + call <4 x i8> @small_vector(<4 x i8> <i8 0, i8 0, i8 0, i8 0>) + call <8 x i16> @large_vector(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>) ret void } |