diff options
author | Jonathan Thackray <jonathan.thackray@arm.com> | 2025-07-23 22:12:30 +0100 |
---|---|---|
committer | Jonathan Thackray <jonathan.thackray@arm.com> | 2025-07-23 22:12:30 +0100 |
commit | 6e750e57d10acc9560731a082a41d3ba6a71e6c9 (patch) | |
tree | 27bf34b23baa1b8a794dabe43f5d179a97e81d4f /clang/lib/CodeGen/TargetBuiltins/BuiltinPPC.cpp | |
parent | f443f561331dc54aaed6897f51d7632d62a5ea95 (diff) | |
download | llvm-users/jthackray/rename-files.zip llvm-users/jthackray/rename-files.tar.gz llvm-users/jthackray/rename-files.tar.bz2 |
[clang] Rename files that MacOS libtool warns about (NFC)users/jthackray/rename-files
As mentioned in https://discourse.llvm.org/t/rfc-rename-source-files-in-clang-lib-codegen-targetbuiltins/87462/
it appears that MacOS's libtool warns about source filenames that
are identically named, even if they exist in separate directories.
Sadly, there doesn't appear to be an easy way to disable this warning,
so rename these files, as these warnings are annoying for MacOS users.
Fixes #133199.
Diffstat (limited to 'clang/lib/CodeGen/TargetBuiltins/BuiltinPPC.cpp')
-rw-r--r-- | clang/lib/CodeGen/TargetBuiltins/BuiltinPPC.cpp | 1359 |
1 files changed, 1359 insertions, 0 deletions
diff --git a/clang/lib/CodeGen/TargetBuiltins/BuiltinPPC.cpp b/clang/lib/CodeGen/TargetBuiltins/BuiltinPPC.cpp new file mode 100644 index 0000000..c2bef23 --- /dev/null +++ b/clang/lib/CodeGen/TargetBuiltins/BuiltinPPC.cpp @@ -0,0 +1,1359 @@ +//===---------- BuiltinPPC.cpp - Emit LLVM Code for builtins --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit Builtin calls as LLVM code. +// +//===----------------------------------------------------------------------===// + +#include "CGBuiltin.h" +#include "clang/Basic/TargetBuiltins.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/IntrinsicsPowerPC.h" +#include "llvm/Support/ScopedPrinter.h" + +using namespace clang; +using namespace CodeGen; +using namespace llvm; + +static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF, + unsigned BuiltinID, + const CallExpr *E) { + Value *Addr = CGF.EmitScalarExpr(E->getArg(0)); + + SmallString<64> Asm; + raw_svector_ostream AsmOS(Asm); + llvm::IntegerType *RetType = CGF.Int32Ty; + + switch (BuiltinID) { + case clang::PPC::BI__builtin_ppc_ldarx: + AsmOS << "ldarx "; + RetType = CGF.Int64Ty; + break; + case clang::PPC::BI__builtin_ppc_lwarx: + AsmOS << "lwarx "; + RetType = CGF.Int32Ty; + break; + case clang::PPC::BI__builtin_ppc_lharx: + AsmOS << "lharx "; + RetType = CGF.Int16Ty; + break; + case clang::PPC::BI__builtin_ppc_lbarx: + AsmOS << "lbarx "; + RetType = CGF.Int8Ty; + break; + default: + llvm_unreachable("Expected only PowerPC load reserve intrinsics"); + } + + AsmOS << "$0, ${1:y}"; + + std::string Constraints = "=r,*Z,~{memory}"; + std::string_view MachineClobbers = CGF.getTarget().getClobbers(); + if (!MachineClobbers.empty()) { + Constraints += ','; + Constraints += MachineClobbers; + } + + llvm::Type *PtrType = CGF.UnqualPtrTy; + llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false); + + llvm::InlineAsm *IA = + llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true); + llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr}); + CI->addParamAttr( + 0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType)); + return CI; +} + +Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { + // Do not emit the builtin arguments in the arguments of a function call, + // because the evaluation order of function arguments is not specified in C++. + // This is important when testing to ensure the arguments are emitted in the + // same order every time. Eg: + // Instead of: + // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)), + // EmitScalarExpr(E->getArg(1)), "swdiv"); + // Use: + // Value *Op0 = EmitScalarExpr(E->getArg(0)); + // Value *Op1 = EmitScalarExpr(E->getArg(1)); + // return Builder.CreateFDiv(Op0, Op1, "swdiv") + + Intrinsic::ID ID = Intrinsic::not_intrinsic; + +#include "llvm/TargetParser/PPCTargetParser.def" + auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx, + unsigned Mask, CmpInst::Predicate CompOp, + unsigned OpValue) -> Value * { + if (SupportMethod == BUILTIN_PPC_FALSE) + return llvm::ConstantInt::getFalse(ConvertType(E->getType())); + + if (SupportMethod == BUILTIN_PPC_TRUE) + return llvm::ConstantInt::getTrue(ConvertType(E->getType())); + + assert(SupportMethod <= SYS_CALL && "Invalid value for SupportMethod."); + + llvm::Value *FieldValue = nullptr; + if (SupportMethod == USE_SYS_CONF) { + llvm::Type *STy = llvm::StructType::get(PPC_SYSTEMCONFIG_TYPE); + llvm::Constant *SysConf = + CGM.CreateRuntimeVariable(STy, "_system_configuration"); + + // Grab the appropriate field from _system_configuration. + llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), + ConstantInt::get(Int32Ty, FieldIdx)}; + + FieldValue = Builder.CreateInBoundsGEP(STy, SysConf, Idxs); + FieldValue = Builder.CreateAlignedLoad(Int32Ty, FieldValue, + CharUnits::fromQuantity(4)); + } else if (SupportMethod == SYS_CALL) { + llvm::FunctionType *FTy = + llvm::FunctionType::get(Int64Ty, Int32Ty, false); + llvm::FunctionCallee Func = + CGM.CreateRuntimeFunction(FTy, "getsystemcfg"); + + FieldValue = + Builder.CreateCall(Func, {ConstantInt::get(Int32Ty, FieldIdx)}); + } + assert(FieldValue && + "SupportMethod value is not defined in PPCTargetParser.def."); + + if (Mask) + FieldValue = Builder.CreateAnd(FieldValue, Mask); + + llvm::Type *ValueType = FieldValue->getType(); + bool IsValueType64Bit = ValueType->isIntegerTy(64); + assert( + (IsValueType64Bit || ValueType->isIntegerTy(32)) && + "Only 32/64-bit integers are supported in GenAIXPPCBuiltinCpuExpr()."); + + return Builder.CreateICmp( + CompOp, FieldValue, + ConstantInt::get(IsValueType64Bit ? Int64Ty : Int32Ty, OpValue)); + }; + + switch (BuiltinID) { + default: return nullptr; + + case Builtin::BI__builtin_cpu_is: { + const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); + StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString(); + llvm::Triple Triple = getTarget().getTriple(); + + typedef std::tuple<unsigned, unsigned, unsigned, unsigned> CPUInfo; + + auto [LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue] = + static_cast<CPUInfo>(StringSwitch<CPUInfo>(CPUStr) +#define PPC_CPU(NAME, Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, \ + AIXID) \ + .Case(NAME, {Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, AIXID}) +#include "llvm/TargetParser/PPCTargetParser.def" + .Default({BUILTIN_PPC_UNSUPPORTED, 0, + BUILTIN_PPC_UNSUPPORTED, 0})); + + if (Triple.isOSAIX()) { + assert((AIXSupportMethod != BUILTIN_PPC_UNSUPPORTED) && + "Invalid CPU name. Missed by SemaChecking?"); + return GenAIXPPCBuiltinCpuExpr(AIXSupportMethod, AIX_SYSCON_IMPL_IDX, 0, + ICmpInst::ICMP_EQ, AIXIDValue); + } + + assert(Triple.isOSLinux() && + "__builtin_cpu_is() is only supported for AIX and Linux."); + + assert((LinuxSupportMethod != BUILTIN_PPC_UNSUPPORTED) && + "Invalid CPU name. Missed by SemaChecking?"); + + if (LinuxSupportMethod == BUILTIN_PPC_FALSE) + return llvm::ConstantInt::getFalse(ConvertType(E->getType())); + + Value *Op0 = llvm::ConstantInt::get(Int32Ty, PPC_FAWORD_CPUID); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld); + Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is"); + return Builder.CreateICmpEQ(TheCall, + llvm::ConstantInt::get(Int32Ty, LinuxIDValue)); + } + case Builtin::BI__builtin_cpu_supports: { + llvm::Triple Triple = getTarget().getTriple(); + const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); + StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString(); + if (Triple.isOSAIX()) { + typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate, + unsigned> + CPUSupportType; + auto [SupportMethod, FieldIdx, Mask, CompOp, Value] = + static_cast<CPUSupportType>(StringSwitch<CPUSupportType>(CPUStr) +#define PPC_AIX_FEATURE(NAME, DESC, SUPPORT_METHOD, INDEX, MASK, COMP_OP, \ + VALUE) \ + .Case(NAME, {SUPPORT_METHOD, INDEX, MASK, COMP_OP, VALUE}) +#include "llvm/TargetParser/PPCTargetParser.def" + .Default({BUILTIN_PPC_FALSE, 0, 0, + CmpInst::Predicate(), 0})); + return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp, + Value); + } + + assert(Triple.isOSLinux() && + "__builtin_cpu_supports() is only supported for AIX and Linux."); + auto [FeatureWord, BitMask] = + StringSwitch<std::pair<unsigned, unsigned>>(CPUStr) +#define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \ + .Case(Name, {FA_WORD, Bitmask}) +#include "llvm/TargetParser/PPCTargetParser.def" + .Default({0, 0}); + if (!BitMask) + return Builder.getFalse(); + Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld); + Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports"); + Value *Mask = + Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask)); + return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty)); +#undef PPC_FAWORD_HWCAP +#undef PPC_FAWORD_HWCAP2 +#undef PPC_FAWORD_CPUID + } + + // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we + // call __builtin_readcyclecounter. + case PPC::BI__builtin_ppc_get_timebase: + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter)); + + // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr + case PPC::BI__builtin_altivec_lvx: + case PPC::BI__builtin_altivec_lvxl: + case PPC::BI__builtin_altivec_lvebx: + case PPC::BI__builtin_altivec_lvehx: + case PPC::BI__builtin_altivec_lvewx: + case PPC::BI__builtin_altivec_lvsl: + case PPC::BI__builtin_altivec_lvsr: + case PPC::BI__builtin_vsx_lxvd2x: + case PPC::BI__builtin_vsx_lxvw4x: + case PPC::BI__builtin_vsx_lxvd2x_be: + case PPC::BI__builtin_vsx_lxvw4x_be: + case PPC::BI__builtin_vsx_lxvl: + case PPC::BI__builtin_vsx_lxvll: + { + SmallVector<Value *, 2> Ops; + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Ops.push_back(EmitScalarExpr(E->getArg(1))); + if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl || + BuiltinID == PPC::BI__builtin_vsx_lxvll)) { + Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]); + Ops.pop_back(); + } + + switch (BuiltinID) { + default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!"); + case PPC::BI__builtin_altivec_lvx: + ID = Intrinsic::ppc_altivec_lvx; + break; + case PPC::BI__builtin_altivec_lvxl: + ID = Intrinsic::ppc_altivec_lvxl; + break; + case PPC::BI__builtin_altivec_lvebx: + ID = Intrinsic::ppc_altivec_lvebx; + break; + case PPC::BI__builtin_altivec_lvehx: + ID = Intrinsic::ppc_altivec_lvehx; + break; + case PPC::BI__builtin_altivec_lvewx: + ID = Intrinsic::ppc_altivec_lvewx; + break; + case PPC::BI__builtin_altivec_lvsl: + ID = Intrinsic::ppc_altivec_lvsl; + break; + case PPC::BI__builtin_altivec_lvsr: + ID = Intrinsic::ppc_altivec_lvsr; + break; + case PPC::BI__builtin_vsx_lxvd2x: + ID = Intrinsic::ppc_vsx_lxvd2x; + break; + case PPC::BI__builtin_vsx_lxvw4x: + ID = Intrinsic::ppc_vsx_lxvw4x; + break; + case PPC::BI__builtin_vsx_lxvd2x_be: + ID = Intrinsic::ppc_vsx_lxvd2x_be; + break; + case PPC::BI__builtin_vsx_lxvw4x_be: + ID = Intrinsic::ppc_vsx_lxvw4x_be; + break; + case PPC::BI__builtin_vsx_lxvl: + ID = Intrinsic::ppc_vsx_lxvl; + break; + case PPC::BI__builtin_vsx_lxvll: + ID = Intrinsic::ppc_vsx_lxvll; + break; + } + llvm::Function *F = CGM.getIntrinsic(ID); + return Builder.CreateCall(F, Ops, ""); + } + + // vec_st, vec_xst_be + case PPC::BI__builtin_altivec_stvx: + case PPC::BI__builtin_altivec_stvxl: + case PPC::BI__builtin_altivec_stvebx: + case PPC::BI__builtin_altivec_stvehx: + case PPC::BI__builtin_altivec_stvewx: + case PPC::BI__builtin_vsx_stxvd2x: + case PPC::BI__builtin_vsx_stxvw4x: + case PPC::BI__builtin_vsx_stxvd2x_be: + case PPC::BI__builtin_vsx_stxvw4x_be: + case PPC::BI__builtin_vsx_stxvl: + case PPC::BI__builtin_vsx_stxvll: + { + SmallVector<Value *, 3> Ops; + Ops.push_back(EmitScalarExpr(E->getArg(0))); + Ops.push_back(EmitScalarExpr(E->getArg(1))); + Ops.push_back(EmitScalarExpr(E->getArg(2))); + if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl || + BuiltinID == PPC::BI__builtin_vsx_stxvll)) { + Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]); + Ops.pop_back(); + } + + switch (BuiltinID) { + default: llvm_unreachable("Unsupported st intrinsic!"); + case PPC::BI__builtin_altivec_stvx: + ID = Intrinsic::ppc_altivec_stvx; + break; + case PPC::BI__builtin_altivec_stvxl: + ID = Intrinsic::ppc_altivec_stvxl; + break; + case PPC::BI__builtin_altivec_stvebx: + ID = Intrinsic::ppc_altivec_stvebx; + break; + case PPC::BI__builtin_altivec_stvehx: + ID = Intrinsic::ppc_altivec_stvehx; + break; + case PPC::BI__builtin_altivec_stvewx: + ID = Intrinsic::ppc_altivec_stvewx; + break; + case PPC::BI__builtin_vsx_stxvd2x: + ID = Intrinsic::ppc_vsx_stxvd2x; + break; + case PPC::BI__builtin_vsx_stxvw4x: + ID = Intrinsic::ppc_vsx_stxvw4x; + break; + case PPC::BI__builtin_vsx_stxvd2x_be: + ID = Intrinsic::ppc_vsx_stxvd2x_be; + break; + case PPC::BI__builtin_vsx_stxvw4x_be: + ID = Intrinsic::ppc_vsx_stxvw4x_be; + break; + case PPC::BI__builtin_vsx_stxvl: + ID = Intrinsic::ppc_vsx_stxvl; + break; + case PPC::BI__builtin_vsx_stxvll: + ID = Intrinsic::ppc_vsx_stxvll; + break; + } + llvm::Function *F = CGM.getIntrinsic(ID); + return Builder.CreateCall(F, Ops, ""); + } + case PPC::BI__builtin_vsx_ldrmb: { + // Essentially boils down to performing an unaligned VMX load sequence so + // as to avoid crossing a page boundary and then shuffling the elements + // into the right side of the vector register. + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue(); + llvm::Type *ResTy = ConvertType(E->getType()); + bool IsLE = getTarget().isLittleEndian(); + + // If the user wants the entire vector, just load the entire vector. + if (NumBytes == 16) { + Value *LD = + Builder.CreateLoad(Address(Op0, ResTy, CharUnits::fromQuantity(1))); + if (!IsLE) + return LD; + + // Reverse the bytes on LE. + SmallVector<int, 16> RevMask; + for (int Idx = 0; Idx < 16; Idx++) + RevMask.push_back(15 - Idx); + return Builder.CreateShuffleVector(LD, LD, RevMask); + } + + llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx); + llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr + : Intrinsic::ppc_altivec_lvsl); + llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm); + Value *HiMem = Builder.CreateGEP( + Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1)); + Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo"); + Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi"); + Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1"); + + Op0 = IsLE ? HiLd : LoLd; + Op1 = IsLE ? LoLd : HiLd; + Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1"); + Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType()); + + if (IsLE) { + SmallVector<int, 16> Consts; + for (int Idx = 0; Idx < 16; Idx++) { + int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1) + : 16 - (NumBytes - Idx); + Consts.push_back(Val); + } + return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy), + Zero, Consts); + } + SmallVector<Constant *, 16> Consts; + for (int Idx = 0; Idx < 16; Idx++) + Consts.push_back(Builder.getInt8(NumBytes + Idx)); + Value *Mask2 = ConstantVector::get(Consts); + return Builder.CreateBitCast( + Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy); + } + case PPC::BI__builtin_vsx_strmb: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue(); + bool IsLE = getTarget().isLittleEndian(); + auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) { + // Storing the whole vector, simply store it on BE and reverse bytes and + // store on LE. + if (Width == 16) { + Value *StVec = Op2; + if (IsLE) { + SmallVector<int, 16> RevMask; + for (int Idx = 0; Idx < 16; Idx++) + RevMask.push_back(15 - Idx); + StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask); + } + return Builder.CreateStore( + StVec, Address(Op0, Op2->getType(), CharUnits::fromQuantity(1))); + } + auto *ConvTy = Int64Ty; + unsigned NumElts = 0; + switch (Width) { + default: + llvm_unreachable("width for stores must be a power of 2"); + case 8: + ConvTy = Int64Ty; + NumElts = 2; + break; + case 4: + ConvTy = Int32Ty; + NumElts = 4; + break; + case 2: + ConvTy = Int16Ty; + NumElts = 8; + break; + case 1: + ConvTy = Int8Ty; + NumElts = 16; + break; + } + Value *Vec = Builder.CreateBitCast( + Op2, llvm::FixedVectorType::get(ConvTy, NumElts)); + Value *Ptr = + Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset)); + Value *Elt = Builder.CreateExtractElement(Vec, EltNo); + if (IsLE && Width > 1) { + Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy); + Elt = Builder.CreateCall(F, Elt); + } + return Builder.CreateStore( + Elt, Address(Ptr, ConvTy, CharUnits::fromQuantity(1))); + }; + unsigned Stored = 0; + unsigned RemainingBytes = NumBytes; + Value *Result; + if (NumBytes == 16) + return StoreSubVec(16, 0, 0); + if (NumBytes >= 8) { + Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1); + RemainingBytes -= 8; + Stored += 8; + } + if (RemainingBytes >= 4) { + Result = StoreSubVec(4, NumBytes - Stored - 4, + IsLE ? (Stored >> 2) : 3 - (Stored >> 2)); + RemainingBytes -= 4; + Stored += 4; + } + if (RemainingBytes >= 2) { + Result = StoreSubVec(2, NumBytes - Stored - 2, + IsLE ? (Stored >> 1) : 7 - (Stored >> 1)); + RemainingBytes -= 2; + Stored += 2; + } + if (RemainingBytes) + Result = + StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored); + return Result; + } + // Square root + case PPC::BI__builtin_vsx_xvsqrtsp: + case PPC::BI__builtin_vsx_xvsqrtdp: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + if (Builder.getIsFPConstrained()) { + llvm::Function *F = CGM.getIntrinsic( + Intrinsic::experimental_constrained_sqrt, ResultType); + return Builder.CreateConstrainedFPCall(F, X); + } else { + llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); + return Builder.CreateCall(F, X); + } + } + // Count leading zeros + case PPC::BI__builtin_altivec_vclzb: + case PPC::BI__builtin_altivec_vclzh: + case PPC::BI__builtin_altivec_vclzw: + case PPC::BI__builtin_altivec_vclzd: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); + return Builder.CreateCall(F, {X, Undef}); + } + case PPC::BI__builtin_altivec_vctzb: + case PPC::BI__builtin_altivec_vctzh: + case PPC::BI__builtin_altivec_vctzw: + case PPC::BI__builtin_altivec_vctzd: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); + Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); + return Builder.CreateCall(F, {X, Undef}); + } + case PPC::BI__builtin_altivec_vinsd: + case PPC::BI__builtin_altivec_vinsw: + case PPC::BI__builtin_altivec_vinsd_elt: + case PPC::BI__builtin_altivec_vinsw_elt: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + + bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw || + BuiltinID == PPC::BI__builtin_altivec_vinsd); + + bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw || + BuiltinID == PPC::BI__builtin_altivec_vinsw_elt); + + // The third argument must be a compile time constant. + ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2); + assert(ArgCI && + "Third Arg to vinsw/vinsd intrinsic must be a constant integer!"); + + // Valid value for the third argument is dependent on the input type and + // builtin called. + int ValidMaxValue = 0; + if (IsUnaligned) + ValidMaxValue = (Is32bit) ? 12 : 8; + else + ValidMaxValue = (Is32bit) ? 3 : 1; + + // Get value of third argument. + int64_t ConstArg = ArgCI->getSExtValue(); + + // Compose range checking error message. + std::string RangeErrMsg = IsUnaligned ? "byte" : "element"; + RangeErrMsg += " number " + llvm::to_string(ConstArg); + RangeErrMsg += " is outside of the valid range [0, "; + RangeErrMsg += llvm::to_string(ValidMaxValue) + "]"; + + // Issue error if third argument is not within the valid range. + if (ConstArg < 0 || ConstArg > ValidMaxValue) + CGM.Error(E->getExprLoc(), RangeErrMsg); + + // Input to vec_replace_elt is an element index, convert to byte index. + if (!IsUnaligned) { + ConstArg *= Is32bit ? 4 : 8; + // Fix the constant according to endianess. + if (getTarget().isLittleEndian()) + ConstArg = (Is32bit ? 12 : 8) - ConstArg; + } + + ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd; + Op2 = ConstantInt::getSigned(Int32Ty, ConstArg); + // Casting input to vector int as per intrinsic definition. + Op0 = + Is32bit + ? Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4)) + : Builder.CreateBitCast(Op0, + llvm::FixedVectorType::get(Int64Ty, 2)); + return Builder.CreateBitCast( + Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType); + } + case PPC::BI__builtin_altivec_vadduqm: + case PPC::BI__builtin_altivec_vsubuqm: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); + Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1)); + Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1)); + if (BuiltinID == PPC::BI__builtin_altivec_vadduqm) + return Builder.CreateAdd(Op0, Op1, "vadduqm"); + else + return Builder.CreateSub(Op0, Op1, "vsubuqm"); + } + case PPC::BI__builtin_altivec_vaddcuq_c: + case PPC::BI__builtin_altivec_vsubcuq_c: { + SmallVector<Value *, 2> Ops; + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + llvm::Type *V1I128Ty = llvm::FixedVectorType::get( + llvm::IntegerType::get(getLLVMContext(), 128), 1); + Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty)); + Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty)); + ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c) + ? Intrinsic::ppc_altivec_vaddcuq + : Intrinsic::ppc_altivec_vsubcuq; + return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, ""); + } + case PPC::BI__builtin_altivec_vaddeuqm_c: + case PPC::BI__builtin_altivec_vaddecuq_c: + case PPC::BI__builtin_altivec_vsubeuqm_c: + case PPC::BI__builtin_altivec_vsubecuq_c: { + SmallVector<Value *, 3> Ops; + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + llvm::Type *V1I128Ty = llvm::FixedVectorType::get( + llvm::IntegerType::get(getLLVMContext(), 128), 1); + Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty)); + Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty)); + Ops.push_back(Builder.CreateBitCast(Op2, V1I128Ty)); + switch (BuiltinID) { + default: + llvm_unreachable("Unsupported intrinsic!"); + case PPC::BI__builtin_altivec_vaddeuqm_c: + ID = Intrinsic::ppc_altivec_vaddeuqm; + break; + case PPC::BI__builtin_altivec_vaddecuq_c: + ID = Intrinsic::ppc_altivec_vaddecuq; + break; + case PPC::BI__builtin_altivec_vsubeuqm_c: + ID = Intrinsic::ppc_altivec_vsubeuqm; + break; + case PPC::BI__builtin_altivec_vsubecuq_c: + ID = Intrinsic::ppc_altivec_vsubecuq; + break; + } + return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, ""); + } + case PPC::BI__builtin_ppc_rldimi: + case PPC::BI__builtin_ppc_rlwimi: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + Value *Op3 = EmitScalarExpr(E->getArg(3)); + // rldimi is 64-bit instruction, expand the intrinsic before isel to + // leverage peephole and avoid legalization efforts. + if (BuiltinID == PPC::BI__builtin_ppc_rldimi && + !getTarget().getTriple().isPPC64()) { + Function *F = CGM.getIntrinsic(Intrinsic::fshl, Op0->getType()); + Op2 = Builder.CreateZExt(Op2, Int64Ty); + Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2}); + return Builder.CreateOr(Builder.CreateAnd(Shift, Op3), + Builder.CreateAnd(Op1, Builder.CreateNot(Op3))); + } + return Builder.CreateCall( + CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi + ? Intrinsic::ppc_rldimi + : Intrinsic::ppc_rlwimi), + {Op0, Op1, Op2, Op3}); + } + case PPC::BI__builtin_ppc_rlwnm: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm), + {Op0, Op1, Op2}); + } + case PPC::BI__builtin_ppc_poppar4: + case PPC::BI__builtin_ppc_poppar8: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + llvm::Type *ArgType = Op0->getType(); + Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); + Value *Tmp = Builder.CreateCall(F, Op0); + + llvm::Type *ResultType = ConvertType(E->getType()); + Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); + if (Result->getType() != ResultType) + Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, + "cast"); + return Result; + } + case PPC::BI__builtin_ppc_cmpb: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + if (getTarget().getTriple().isPPC64()) { + Function *F = + CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty}); + return Builder.CreateCall(F, {Op0, Op1}, "cmpb"); + } + // For 32 bit, emit the code as below: + // %conv = trunc i64 %a to i32 + // %conv1 = trunc i64 %b to i32 + // %shr = lshr i64 %a, 32 + // %conv2 = trunc i64 %shr to i32 + // %shr3 = lshr i64 %b, 32 + // %conv4 = trunc i64 %shr3 to i32 + // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1) + // %conv5 = zext i32 %0 to i64 + // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4) + // %conv614 = zext i32 %1 to i64 + // %shl = shl nuw i64 %conv614, 32 + // %or = or i64 %shl, %conv5 + // ret i64 %or + Function *F = + CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty}); + Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty); + Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty); + Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32); + Value *ArgOneHi = + Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty); + Value *ArgTwoHi = + Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty); + Value *ResLo = Builder.CreateZExt( + Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty); + Value *ResHiShift = Builder.CreateZExt( + Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty); + Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt); + return Builder.CreateOr(ResLo, ResHi); + } + // Copy sign + case PPC::BI__builtin_vsx_xvcpsgnsp: + case PPC::BI__builtin_vsx_xvcpsgndp: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Y = EmitScalarExpr(E->getArg(1)); + ID = Intrinsic::copysign; + llvm::Function *F = CGM.getIntrinsic(ID, ResultType); + return Builder.CreateCall(F, {X, Y}); + } + // Rounding/truncation + case PPC::BI__builtin_vsx_xvrspip: + case PPC::BI__builtin_vsx_xvrdpip: + case PPC::BI__builtin_vsx_xvrdpim: + case PPC::BI__builtin_vsx_xvrspim: + case PPC::BI__builtin_vsx_xvrdpi: + case PPC::BI__builtin_vsx_xvrspi: + case PPC::BI__builtin_vsx_xvrdpic: + case PPC::BI__builtin_vsx_xvrspic: + case PPC::BI__builtin_vsx_xvrdpiz: + case PPC::BI__builtin_vsx_xvrspiz: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim || + BuiltinID == PPC::BI__builtin_vsx_xvrspim) + ID = Builder.getIsFPConstrained() + ? Intrinsic::experimental_constrained_floor + : Intrinsic::floor; + else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi || + BuiltinID == PPC::BI__builtin_vsx_xvrspi) + ID = Builder.getIsFPConstrained() + ? Intrinsic::experimental_constrained_round + : Intrinsic::round; + else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic || + BuiltinID == PPC::BI__builtin_vsx_xvrspic) + ID = Builder.getIsFPConstrained() + ? Intrinsic::experimental_constrained_rint + : Intrinsic::rint; + else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip || + BuiltinID == PPC::BI__builtin_vsx_xvrspip) + ID = Builder.getIsFPConstrained() + ? Intrinsic::experimental_constrained_ceil + : Intrinsic::ceil; + else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz || + BuiltinID == PPC::BI__builtin_vsx_xvrspiz) + ID = Builder.getIsFPConstrained() + ? Intrinsic::experimental_constrained_trunc + : Intrinsic::trunc; + llvm::Function *F = CGM.getIntrinsic(ID, ResultType); + return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X) + : Builder.CreateCall(F, X); + } + + // Absolute value + case PPC::BI__builtin_vsx_xvabsdp: + case PPC::BI__builtin_vsx_xvabssp: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); + return Builder.CreateCall(F, X); + } + + // Fastmath by default + case PPC::BI__builtin_ppc_recipdivf: + case PPC::BI__builtin_ppc_recipdivd: + case PPC::BI__builtin_ppc_rsqrtf: + case PPC::BI__builtin_ppc_rsqrtd: { + FastMathFlags FMF = Builder.getFastMathFlags(); + Builder.getFastMathFlags().setFast(); + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + + if (BuiltinID == PPC::BI__builtin_ppc_recipdivf || + BuiltinID == PPC::BI__builtin_ppc_recipdivd) { + Value *Y = EmitScalarExpr(E->getArg(1)); + Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv"); + Builder.getFastMathFlags() &= (FMF); + return FDiv; + } + auto *One = ConstantFP::get(ResultType, 1.0); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); + Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt"); + Builder.getFastMathFlags() &= (FMF); + return FDiv; + } + case PPC::BI__builtin_ppc_alignx: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + ConstantInt *AlignmentCI = cast<ConstantInt>(Op0); + if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment)) + AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(), + llvm::Value::MaximumAlignment); + + emitAlignmentAssumption(Op1, E->getArg(1), + /*The expr loc is sufficient.*/ SourceLocation(), + AlignmentCI, nullptr); + return Op1; + } + case PPC::BI__builtin_ppc_rdlam: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + llvm::Type *Ty = Op0->getType(); + Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false); + Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty); + Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt}); + return Builder.CreateAnd(Rotate, Op2); + } + case PPC::BI__builtin_ppc_load2r: { + Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r); + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *LoadIntrinsic = Builder.CreateCall(F, {Op0}); + return Builder.CreateTrunc(LoadIntrinsic, Int16Ty); + } + // FMA variations + case PPC::BI__builtin_ppc_fnmsub: + case PPC::BI__builtin_ppc_fnmsubs: + case PPC::BI__builtin_vsx_xvmaddadp: + case PPC::BI__builtin_vsx_xvmaddasp: + case PPC::BI__builtin_vsx_xvnmaddadp: + case PPC::BI__builtin_vsx_xvnmaddasp: + case PPC::BI__builtin_vsx_xvmsubadp: + case PPC::BI__builtin_vsx_xvmsubasp: + case PPC::BI__builtin_vsx_xvnmsubadp: + case PPC::BI__builtin_vsx_xvnmsubasp: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Value *Y = EmitScalarExpr(E->getArg(1)); + Value *Z = EmitScalarExpr(E->getArg(2)); + llvm::Function *F; + if (Builder.getIsFPConstrained()) + F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType); + else + F = CGM.getIntrinsic(Intrinsic::fma, ResultType); + switch (BuiltinID) { + case PPC::BI__builtin_vsx_xvmaddadp: + case PPC::BI__builtin_vsx_xvmaddasp: + if (Builder.getIsFPConstrained()) + return Builder.CreateConstrainedFPCall(F, {X, Y, Z}); + else + return Builder.CreateCall(F, {X, Y, Z}); + case PPC::BI__builtin_vsx_xvnmaddadp: + case PPC::BI__builtin_vsx_xvnmaddasp: + if (Builder.getIsFPConstrained()) + return Builder.CreateFNeg( + Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg"); + else + return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg"); + case PPC::BI__builtin_vsx_xvmsubadp: + case PPC::BI__builtin_vsx_xvmsubasp: + if (Builder.getIsFPConstrained()) + return Builder.CreateConstrainedFPCall( + F, {X, Y, Builder.CreateFNeg(Z, "neg")}); + else + return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}); + case PPC::BI__builtin_ppc_fnmsub: + case PPC::BI__builtin_ppc_fnmsubs: + case PPC::BI__builtin_vsx_xvnmsubadp: + case PPC::BI__builtin_vsx_xvnmsubasp: + if (Builder.getIsFPConstrained()) + return Builder.CreateFNeg( + Builder.CreateConstrainedFPCall( + F, {X, Y, Builder.CreateFNeg(Z, "neg")}), + "neg"); + else + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z}); + } + llvm_unreachable("Unknown FMA operation"); + return nullptr; // Suppress no-return warning + } + + case PPC::BI__builtin_vsx_insertword: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw); + + // Third argument is a compile time constant int. It must be clamped to + // to the range [0, 12]. + ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2); + assert(ArgCI && + "Third arg to xxinsertw intrinsic must be constant integer"); + const int64_t MaxIndex = 12; + int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex); + + // The builtin semantics don't exactly match the xxinsertw instructions + // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the + // word from the first argument, and inserts it in the second argument. The + // instruction extracts the word from its second input register and inserts + // it into its first input register, so swap the first and second arguments. + std::swap(Op0, Op1); + + // Need to cast the second argument from a vector of unsigned int to a + // vector of long long. + Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2)); + + if (getTarget().isLittleEndian()) { + // Reverse the double words in the vector we will extract from. + Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2)); + Op0 = Builder.CreateShuffleVector(Op0, Op0, {1, 0}); + + // Reverse the index. + Index = MaxIndex - Index; + } + + // Intrinsic expects the first arg to be a vector of int. + Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4)); + Op2 = ConstantInt::getSigned(Int32Ty, Index); + return Builder.CreateCall(F, {Op0, Op1, Op2}); + } + + case PPC::BI__builtin_vsx_extractuword: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw); + + // Intrinsic expects the first argument to be a vector of doublewords. + Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2)); + + // The second argument is a compile time constant int that needs to + // be clamped to the range [0, 12]. + ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1); + assert(ArgCI && + "Second Arg to xxextractuw intrinsic must be a constant integer!"); + const int64_t MaxIndex = 12; + int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex); + + if (getTarget().isLittleEndian()) { + // Reverse the index. + Index = MaxIndex - Index; + Op1 = ConstantInt::getSigned(Int32Ty, Index); + + // Emit the call, then reverse the double words of the results vector. + Value *Call = Builder.CreateCall(F, {Op0, Op1}); + + Value *ShuffleCall = + Builder.CreateShuffleVector(Call, Call, {1, 0}); + return ShuffleCall; + } else { + Op1 = ConstantInt::getSigned(Int32Ty, Index); + return Builder.CreateCall(F, {Op0, Op1}); + } + } + + case PPC::BI__builtin_vsx_xxpermdi: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2); + assert(ArgCI && "Third arg must be constant integer!"); + + unsigned Index = ArgCI->getZExtValue(); + Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2)); + Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2)); + + // Account for endianness by treating this as just a shuffle. So we use the + // same indices for both LE and BE in order to produce expected results in + // both cases. + int ElemIdx0 = (Index & 2) >> 1; + int ElemIdx1 = 2 + (Index & 1); + + int ShuffleElts[2] = {ElemIdx0, ElemIdx1}; + Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts); + QualType BIRetType = E->getType(); + auto RetTy = ConvertType(BIRetType); + return Builder.CreateBitCast(ShuffleCall, RetTy); + } + + case PPC::BI__builtin_vsx_xxsldwi: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2); + assert(ArgCI && "Third argument must be a compile time constant"); + unsigned Index = ArgCI->getZExtValue() & 0x3; + Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4)); + Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4)); + + // Create a shuffle mask + int ElemIdx0; + int ElemIdx1; + int ElemIdx2; + int ElemIdx3; + if (getTarget().isLittleEndian()) { + // Little endian element N comes from element 8+N-Index of the + // concatenated wide vector (of course, using modulo arithmetic on + // the total number of elements). + ElemIdx0 = (8 - Index) % 8; + ElemIdx1 = (9 - Index) % 8; + ElemIdx2 = (10 - Index) % 8; + ElemIdx3 = (11 - Index) % 8; + } else { + // Big endian ElemIdx<N> = Index + N + ElemIdx0 = Index; + ElemIdx1 = Index + 1; + ElemIdx2 = Index + 2; + ElemIdx3 = Index + 3; + } + + int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3}; + Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts); + QualType BIRetType = E->getType(); + auto RetTy = ConvertType(BIRetType); + return Builder.CreateBitCast(ShuffleCall, RetTy); + } + + case PPC::BI__builtin_pack_vector_int128: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + bool isLittleEndian = getTarget().isLittleEndian(); + Value *PoisonValue = + llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->getType(), 2)); + Value *Res = Builder.CreateInsertElement( + PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0)); + Res = Builder.CreateInsertElement(Res, Op1, + (uint64_t)(isLittleEndian ? 0 : 1)); + return Builder.CreateBitCast(Res, ConvertType(E->getType())); + } + + case PPC::BI__builtin_unpack_vector_int128: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + ConstantInt *Index = cast<ConstantInt>(Op1); + Value *Unpacked = Builder.CreateBitCast( + Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2)); + + if (getTarget().isLittleEndian()) + Index = + ConstantInt::get(Index->getIntegerType(), 1 - Index->getZExtValue()); + + return Builder.CreateExtractElement(Unpacked, Index); + } + + case PPC::BI__builtin_ppc_sthcx: { + llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx); + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty); + return Builder.CreateCall(F, {Op0, Op1}); + } + + // The PPC MMA builtins take a pointer to a __vector_quad as an argument. + // Some of the MMA instructions accumulate their result into an existing + // accumulator whereas the others generate a new accumulator. So we need to + // use custom code generation to expand a builtin call with a pointer to a + // load (if the corresponding instruction accumulates its result) followed by + // the call to the intrinsic and a store of the result. +#define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \ + case PPC::BI__builtin_##Name: +#include "clang/Basic/BuiltinsPPC.def" + { + SmallVector<Value *, 4> Ops; + for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) + if (E->getArg(i)->getType()->isArrayType()) + Ops.push_back( + EmitArrayToPointerDecay(E->getArg(i)).emitRawPointer(*this)); + else + Ops.push_back(EmitScalarExpr(E->getArg(i))); + // The first argument of these two builtins is a pointer used to store their + // result. However, the llvm intrinsics return their result in multiple + // return values. So, here we emit code extracting these values from the + // intrinsic results and storing them using that pointer. + if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc || + BuiltinID == PPC::BI__builtin_vsx_disassemble_pair || + BuiltinID == PPC::BI__builtin_mma_disassemble_pair) { + unsigned NumVecs = 2; + auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair; + if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) { + NumVecs = 4; + Intrinsic = Intrinsic::ppc_mma_disassemble_acc; + } + llvm::Function *F = CGM.getIntrinsic(Intrinsic); + Address Addr = EmitPointerWithAlignment(E->getArg(1)); + Value *Vec = Builder.CreateLoad(Addr); + Value *Call = Builder.CreateCall(F, {Vec}); + llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16); + Value *Ptr = Ops[0]; + for (unsigned i=0; i<NumVecs; i++) { + Value *Vec = Builder.CreateExtractValue(Call, i); + llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i); + Value *GEP = Builder.CreateInBoundsGEP(VTy, Ptr, Index); + Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16)); + } + return Call; + } + if (BuiltinID == PPC::BI__builtin_vsx_build_pair || + BuiltinID == PPC::BI__builtin_mma_build_acc) { + // Reverse the order of the operands for LE, so the + // same builtin call can be used on both LE and BE + // without the need for the programmer to swap operands. + // The operands are reversed starting from the second argument, + // the first operand is the pointer to the pair/accumulator + // that is being built. + if (getTarget().isLittleEndian()) + std::reverse(Ops.begin() + 1, Ops.end()); + } + bool Accumulate; + switch (BuiltinID) { + #define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \ + case PPC::BI__builtin_##Name: \ + ID = Intrinsic::ppc_##Intr; \ + Accumulate = Acc; \ + break; + #include "clang/Basic/BuiltinsPPC.def" + } + if (BuiltinID == PPC::BI__builtin_vsx_lxvp || + BuiltinID == PPC::BI__builtin_vsx_stxvp || + BuiltinID == PPC::BI__builtin_mma_lxvp || + BuiltinID == PPC::BI__builtin_mma_stxvp) { + if (BuiltinID == PPC::BI__builtin_vsx_lxvp || + BuiltinID == PPC::BI__builtin_mma_lxvp) { + Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]); + } else { + Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]); + } + Ops.pop_back(); + llvm::Function *F = CGM.getIntrinsic(ID); + return Builder.CreateCall(F, Ops, ""); + } + SmallVector<Value*, 4> CallOps; + if (Accumulate) { + Address Addr = EmitPointerWithAlignment(E->getArg(0)); + Value *Acc = Builder.CreateLoad(Addr); + CallOps.push_back(Acc); + } + if (BuiltinID == PPC::BI__builtin_mma_dmmr || + BuiltinID == PPC::BI__builtin_mma_dmxor) { + Address Addr = EmitPointerWithAlignment(E->getArg(1)); + Ops[1] = Builder.CreateLoad(Addr); + } + for (unsigned i=1; i<Ops.size(); i++) + CallOps.push_back(Ops[i]); + llvm::Function *F = CGM.getIntrinsic(ID); + Value *Call = Builder.CreateCall(F, CallOps); + return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign()); + } + + case PPC::BI__builtin_ppc_compare_and_swap: + case PPC::BI__builtin_ppc_compare_and_swaplp: { + Address Addr = EmitPointerWithAlignment(E->getArg(0)); + Address OldValAddr = EmitPointerWithAlignment(E->getArg(1)); + Value *OldVal = Builder.CreateLoad(OldValAddr); + QualType AtomicTy = E->getArg(0)->getType()->getPointeeType(); + LValue LV = MakeAddrLValue(Addr, AtomicTy); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + auto Pair = EmitAtomicCompareExchange( + LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(), + llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true); + // Unlike c11's atomic_compare_exchange, according to + // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp + // > In either case, the contents of the memory location specified by addr + // > are copied into the memory location specified by old_val_addr. + // But it hasn't specified storing to OldValAddr is atomic or not and + // which order to use. Now following XL's codegen, treat it as a normal + // store. + Value *LoadedVal = Pair.first.getScalarVal(); + Builder.CreateStore(LoadedVal, OldValAddr); + return Builder.CreateZExt(Pair.second, Builder.getInt32Ty()); + } + case PPC::BI__builtin_ppc_fetch_and_add: + case PPC::BI__builtin_ppc_fetch_and_addlp: { + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, + llvm::AtomicOrdering::Monotonic); + } + case PPC::BI__builtin_ppc_fetch_and_and: + case PPC::BI__builtin_ppc_fetch_and_andlp: { + return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, + llvm::AtomicOrdering::Monotonic); + } + + case PPC::BI__builtin_ppc_fetch_and_or: + case PPC::BI__builtin_ppc_fetch_and_orlp: { + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, + llvm::AtomicOrdering::Monotonic); + } + case PPC::BI__builtin_ppc_fetch_and_swap: + case PPC::BI__builtin_ppc_fetch_and_swaplp: { + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, + llvm::AtomicOrdering::Monotonic); + } + case PPC::BI__builtin_ppc_ldarx: + case PPC::BI__builtin_ppc_lwarx: + case PPC::BI__builtin_ppc_lharx: + case PPC::BI__builtin_ppc_lbarx: + return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E); + case PPC::BI__builtin_ppc_mfspr: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32 + ? Int32Ty + : Int64Ty; + Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType); + return Builder.CreateCall(F, {Op0}); + } + case PPC::BI__builtin_ppc_mtspr: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32 + ? Int32Ty + : Int64Ty; + Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType); + return Builder.CreateCall(F, {Op0, Op1}); + } + case PPC::BI__builtin_ppc_popcntb: { + Value *ArgValue = EmitScalarExpr(E->getArg(0)); + llvm::Type *ArgType = ArgValue->getType(); + Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType}); + return Builder.CreateCall(F, {ArgValue}, "popcntb"); + } + case PPC::BI__builtin_ppc_mtfsf: { + // The builtin takes a uint32 that needs to be cast to an + // f64 to be passed to the intrinsic. + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Cast = Builder.CreateUIToFP(Op1, DoubleTy); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf); + return Builder.CreateCall(F, {Op0, Cast}, ""); + } + + case PPC::BI__builtin_ppc_swdiv_nochk: + case PPC::BI__builtin_ppc_swdivs_nochk: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + FastMathFlags FMF = Builder.getFastMathFlags(); + Builder.getFastMathFlags().setFast(); + Value *FDiv = Builder.CreateFDiv(Op0, Op1, "swdiv_nochk"); + Builder.getFastMathFlags() &= (FMF); + return FDiv; + } + case PPC::BI__builtin_ppc_fric: + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::rint, + Intrinsic::experimental_constrained_rint)) + .getScalarVal(); + case PPC::BI__builtin_ppc_frim: + case PPC::BI__builtin_ppc_frims: + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::floor, + Intrinsic::experimental_constrained_floor)) + .getScalarVal(); + case PPC::BI__builtin_ppc_frin: + case PPC::BI__builtin_ppc_frins: + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::round, + Intrinsic::experimental_constrained_round)) + .getScalarVal(); + case PPC::BI__builtin_ppc_frip: + case PPC::BI__builtin_ppc_frips: + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::ceil, + Intrinsic::experimental_constrained_ceil)) + .getScalarVal(); + case PPC::BI__builtin_ppc_friz: + case PPC::BI__builtin_ppc_frizs: + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::trunc, + Intrinsic::experimental_constrained_trunc)) + .getScalarVal(); + case PPC::BI__builtin_ppc_fsqrt: + case PPC::BI__builtin_ppc_fsqrts: + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::sqrt, + Intrinsic::experimental_constrained_sqrt)) + .getScalarVal(); + case PPC::BI__builtin_ppc_test_data_class: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()), + {Op0, Op1}, "test_data_class"); + } + case PPC::BI__builtin_ppc_maxfe: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + Value *Op3 = EmitScalarExpr(E->getArg(3)); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe), + {Op0, Op1, Op2, Op3}); + } + case PPC::BI__builtin_ppc_maxfl: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + Value *Op3 = EmitScalarExpr(E->getArg(3)); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl), + {Op0, Op1, Op2, Op3}); + } + case PPC::BI__builtin_ppc_maxfs: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + Value *Op3 = EmitScalarExpr(E->getArg(3)); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs), + {Op0, Op1, Op2, Op3}); + } + case PPC::BI__builtin_ppc_minfe: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + Value *Op3 = EmitScalarExpr(E->getArg(3)); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe), + {Op0, Op1, Op2, Op3}); + } + case PPC::BI__builtin_ppc_minfl: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + Value *Op3 = EmitScalarExpr(E->getArg(3)); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl), + {Op0, Op1, Op2, Op3}); + } + case PPC::BI__builtin_ppc_minfs: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + Value *Op2 = EmitScalarExpr(E->getArg(2)); + Value *Op3 = EmitScalarExpr(E->getArg(3)); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs), + {Op0, Op1, Op2, Op3}); + } + case PPC::BI__builtin_ppc_swdiv: + case PPC::BI__builtin_ppc_swdivs: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Op1 = EmitScalarExpr(E->getArg(1)); + return Builder.CreateFDiv(Op0, Op1, "swdiv"); + } + case PPC::BI__builtin_ppc_set_fpscr_rn: + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd), + {EmitScalarExpr(E->getArg(0))}); + case PPC::BI__builtin_ppc_mffs: + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm)); + } +} |