//===---------- PPC.cpp - Emit LLVM Code for builtins ---------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This contains code to emit Builtin calls as LLVM code. // //===----------------------------------------------------------------------===// #include "CGBuiltin.h" #include "clang/Basic/TargetBuiltins.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/IntrinsicsPowerPC.h" #include "llvm/Support/ScopedPrinter.h" using namespace clang; using namespace CodeGen; using namespace llvm; static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E) { Value *Addr = CGF.EmitScalarExpr(E->getArg(0)); SmallString<64> Asm; raw_svector_ostream AsmOS(Asm); llvm::IntegerType *RetType = CGF.Int32Ty; switch (BuiltinID) { case clang::PPC::BI__builtin_ppc_ldarx: AsmOS << "ldarx "; RetType = CGF.Int64Ty; break; case clang::PPC::BI__builtin_ppc_lwarx: AsmOS << "lwarx "; RetType = CGF.Int32Ty; break; case clang::PPC::BI__builtin_ppc_lharx: AsmOS << "lharx "; RetType = CGF.Int16Ty; break; case clang::PPC::BI__builtin_ppc_lbarx: AsmOS << "lbarx "; RetType = CGF.Int8Ty; break; default: llvm_unreachable("Expected only PowerPC load reserve intrinsics"); } AsmOS << "$0, ${1:y}"; std::string Constraints = "=r,*Z,~{memory}"; std::string_view MachineClobbers = CGF.getTarget().getClobbers(); if (!MachineClobbers.empty()) { Constraints += ','; Constraints += MachineClobbers; } llvm::Type *PtrType = CGF.UnqualPtrTy; llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false); llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true); llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr}); CI->addParamAttr( 0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType)); return CI; } Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { // Do not emit the builtin arguments in the arguments of a function call, // because the evaluation order of function arguments is not specified in C++. // This is important when testing to ensure the arguments are emitted in the // same order every time. Eg: // Instead of: // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)), // EmitScalarExpr(E->getArg(1)), "swdiv"); // Use: // Value *Op0 = EmitScalarExpr(E->getArg(0)); // Value *Op1 = EmitScalarExpr(E->getArg(1)); // return Builder.CreateFDiv(Op0, Op1, "swdiv") Intrinsic::ID ID = Intrinsic::not_intrinsic; #include "llvm/TargetParser/PPCTargetParser.def" auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx, unsigned Mask, CmpInst::Predicate CompOp, unsigned OpValue) -> Value * { if (SupportMethod == BUILTIN_PPC_FALSE) return llvm::ConstantInt::getFalse(ConvertType(E->getType())); if (SupportMethod == BUILTIN_PPC_TRUE) return llvm::ConstantInt::getTrue(ConvertType(E->getType())); assert(SupportMethod <= SYS_CALL && "Invalid value for SupportMethod."); llvm::Value *FieldValue = nullptr; if (SupportMethod == USE_SYS_CONF) { llvm::Type *STy = llvm::StructType::get(PPC_SYSTEMCONFIG_TYPE); llvm::Constant *SysConf = CGM.CreateRuntimeVariable(STy, "_system_configuration"); // Grab the appropriate field from _system_configuration. llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, FieldIdx)}; FieldValue = Builder.CreateInBoundsGEP(STy, SysConf, Idxs); FieldValue = Builder.CreateAlignedLoad(Int32Ty, FieldValue, CharUnits::fromQuantity(4)); } else if (SupportMethod == SYS_CALL) { llvm::FunctionType *FTy = llvm::FunctionType::get(Int64Ty, Int32Ty, false); llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, "getsystemcfg"); FieldValue = Builder.CreateCall(Func, {ConstantInt::get(Int32Ty, FieldIdx)}); } assert(FieldValue && "SupportMethod value is not defined in PPCTargetParser.def."); if (Mask) FieldValue = Builder.CreateAnd(FieldValue, Mask); llvm::Type *ValueType = FieldValue->getType(); bool IsValueType64Bit = ValueType->isIntegerTy(64); assert( (IsValueType64Bit || ValueType->isIntegerTy(32)) && "Only 32/64-bit integers are supported in GenAIXPPCBuiltinCpuExpr()."); return Builder.CreateICmp( CompOp, FieldValue, ConstantInt::get(IsValueType64Bit ? Int64Ty : Int32Ty, OpValue)); }; switch (BuiltinID) { default: return nullptr; case Builtin::BI__builtin_cpu_is: { const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); StringRef CPUStr = cast(CPUExpr)->getString(); llvm::Triple Triple = getTarget().getTriple(); typedef std::tuple CPUInfo; auto [LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue] = static_cast(StringSwitch(CPUStr) #define PPC_CPU(NAME, Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, \ AIXID) \ .Case(NAME, {Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, AIXID}) #include "llvm/TargetParser/PPCTargetParser.def" .Default({BUILTIN_PPC_UNSUPPORTED, 0, BUILTIN_PPC_UNSUPPORTED, 0})); if (Triple.isOSAIX()) { assert((AIXSupportMethod != BUILTIN_PPC_UNSUPPORTED) && "Invalid CPU name. Missed by SemaChecking?"); return GenAIXPPCBuiltinCpuExpr(AIXSupportMethod, AIX_SYSCON_IMPL_IDX, 0, ICmpInst::ICMP_EQ, AIXIDValue); } assert(Triple.isOSLinux() && "__builtin_cpu_is() is only supported for AIX and Linux."); assert((LinuxSupportMethod != BUILTIN_PPC_UNSUPPORTED) && "Invalid CPU name. Missed by SemaChecking?"); if (LinuxSupportMethod == BUILTIN_PPC_FALSE) return llvm::ConstantInt::getFalse(ConvertType(E->getType())); Value *Op0 = llvm::ConstantInt::get(Int32Ty, PPC_FAWORD_CPUID); llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld); Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is"); return Builder.CreateICmpEQ(TheCall, llvm::ConstantInt::get(Int32Ty, LinuxIDValue)); } case Builtin::BI__builtin_cpu_supports: { llvm::Triple Triple = getTarget().getTriple(); const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); StringRef CPUStr = cast(CPUExpr)->getString(); if (Triple.isOSAIX()) { typedef std::tuple CPUSupportType; auto [SupportMethod, FieldIdx, Mask, CompOp, Value] = static_cast(StringSwitch(CPUStr) #define PPC_AIX_FEATURE(NAME, DESC, SUPPORT_METHOD, INDEX, MASK, COMP_OP, \ VALUE) \ .Case(NAME, {SUPPORT_METHOD, INDEX, MASK, COMP_OP, VALUE}) #include "llvm/TargetParser/PPCTargetParser.def" .Default({BUILTIN_PPC_FALSE, 0, 0, CmpInst::Predicate(), 0})); return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp, Value); } assert(Triple.isOSLinux() && "__builtin_cpu_supports() is only supported for AIX and Linux."); auto [FeatureWord, BitMask] = StringSwitch>(CPUStr) #define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \ .Case(Name, {FA_WORD, Bitmask}) #include "llvm/TargetParser/PPCTargetParser.def" .Default({0, 0}); if (!BitMask) return Builder.getFalse(); Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord); llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld); Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports"); Value *Mask = Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask)); return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty)); #undef PPC_FAWORD_HWCAP #undef PPC_FAWORD_HWCAP2 #undef PPC_FAWORD_CPUID } // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we // call __builtin_readcyclecounter. case PPC::BI__builtin_ppc_get_timebase: return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter)); // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr case PPC::BI__builtin_altivec_lvx: case PPC::BI__builtin_altivec_lvxl: case PPC::BI__builtin_altivec_lvebx: case PPC::BI__builtin_altivec_lvehx: case PPC::BI__builtin_altivec_lvewx: case PPC::BI__builtin_altivec_lvsl: case PPC::BI__builtin_altivec_lvsr: case PPC::BI__builtin_vsx_lxvd2x: case PPC::BI__builtin_vsx_lxvw4x: case PPC::BI__builtin_vsx_lxvd2x_be: case PPC::BI__builtin_vsx_lxvw4x_be: case PPC::BI__builtin_vsx_lxvl: case PPC::BI__builtin_vsx_lxvll: { SmallVector Ops; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops.push_back(EmitScalarExpr(E->getArg(1))); if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl || BuiltinID == PPC::BI__builtin_vsx_lxvll)) { Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]); Ops.pop_back(); } switch (BuiltinID) { default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!"); case PPC::BI__builtin_altivec_lvx: ID = Intrinsic::ppc_altivec_lvx; break; case PPC::BI__builtin_altivec_lvxl: ID = Intrinsic::ppc_altivec_lvxl; break; case PPC::BI__builtin_altivec_lvebx: ID = Intrinsic::ppc_altivec_lvebx; break; case PPC::BI__builtin_altivec_lvehx: ID = Intrinsic::ppc_altivec_lvehx; break; case PPC::BI__builtin_altivec_lvewx: ID = Intrinsic::ppc_altivec_lvewx; break; case PPC::BI__builtin_altivec_lvsl: ID = Intrinsic::ppc_altivec_lvsl; break; case PPC::BI__builtin_altivec_lvsr: ID = Intrinsic::ppc_altivec_lvsr; break; case PPC::BI__builtin_vsx_lxvd2x: ID = Intrinsic::ppc_vsx_lxvd2x; break; case PPC::BI__builtin_vsx_lxvw4x: ID = Intrinsic::ppc_vsx_lxvw4x; break; case PPC::BI__builtin_vsx_lxvd2x_be: ID = Intrinsic::ppc_vsx_lxvd2x_be; break; case PPC::BI__builtin_vsx_lxvw4x_be: ID = Intrinsic::ppc_vsx_lxvw4x_be; break; case PPC::BI__builtin_vsx_lxvl: ID = Intrinsic::ppc_vsx_lxvl; break; case PPC::BI__builtin_vsx_lxvll: ID = Intrinsic::ppc_vsx_lxvll; break; } llvm::Function *F = CGM.getIntrinsic(ID); return Builder.CreateCall(F, Ops, ""); } // vec_st, vec_xst_be case PPC::BI__builtin_altivec_stvx: case PPC::BI__builtin_altivec_stvxl: case PPC::BI__builtin_altivec_stvebx: case PPC::BI__builtin_altivec_stvehx: case PPC::BI__builtin_altivec_stvewx: case PPC::BI__builtin_vsx_stxvd2x: case PPC::BI__builtin_vsx_stxvw4x: case PPC::BI__builtin_vsx_stxvd2x_be: case PPC::BI__builtin_vsx_stxvw4x_be: case PPC::BI__builtin_vsx_stxvl: case PPC::BI__builtin_vsx_stxvll: { SmallVector Ops; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops.push_back(EmitScalarExpr(E->getArg(1))); Ops.push_back(EmitScalarExpr(E->getArg(2))); if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl || BuiltinID == PPC::BI__builtin_vsx_stxvll)) { Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]); Ops.pop_back(); } switch (BuiltinID) { default: llvm_unreachable("Unsupported st intrinsic!"); case PPC::BI__builtin_altivec_stvx: ID = Intrinsic::ppc_altivec_stvx; break; case PPC::BI__builtin_altivec_stvxl: ID = Intrinsic::ppc_altivec_stvxl; break; case PPC::BI__builtin_altivec_stvebx: ID = Intrinsic::ppc_altivec_stvebx; break; case PPC::BI__builtin_altivec_stvehx: ID = Intrinsic::ppc_altivec_stvehx; break; case PPC::BI__builtin_altivec_stvewx: ID = Intrinsic::ppc_altivec_stvewx; break; case PPC::BI__builtin_vsx_stxvd2x: ID = Intrinsic::ppc_vsx_stxvd2x; break; case PPC::BI__builtin_vsx_stxvw4x: ID = Intrinsic::ppc_vsx_stxvw4x; break; case PPC::BI__builtin_vsx_stxvd2x_be: ID = Intrinsic::ppc_vsx_stxvd2x_be; break; case PPC::BI__builtin_vsx_stxvw4x_be: ID = Intrinsic::ppc_vsx_stxvw4x_be; break; case PPC::BI__builtin_vsx_stxvl: ID = Intrinsic::ppc_vsx_stxvl; break; case PPC::BI__builtin_vsx_stxvll: ID = Intrinsic::ppc_vsx_stxvll; break; } llvm::Function *F = CGM.getIntrinsic(ID); return Builder.CreateCall(F, Ops, ""); } case PPC::BI__builtin_vsx_ldrmb: { // Essentially boils down to performing an unaligned VMX load sequence so // as to avoid crossing a page boundary and then shuffling the elements // into the right side of the vector register. Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); int64_t NumBytes = cast(Op1)->getZExtValue(); llvm::Type *ResTy = ConvertType(E->getType()); bool IsLE = getTarget().isLittleEndian(); // If the user wants the entire vector, just load the entire vector. if (NumBytes == 16) { Value *LD = Builder.CreateLoad(Address(Op0, ResTy, CharUnits::fromQuantity(1))); if (!IsLE) return LD; // Reverse the bytes on LE. SmallVector RevMask; for (int Idx = 0; Idx < 16; Idx++) RevMask.push_back(15 - Idx); return Builder.CreateShuffleVector(LD, LD, RevMask); } llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx); llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr : Intrinsic::ppc_altivec_lvsl); llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm); Value *HiMem = Builder.CreateGEP( Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1)); Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo"); Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi"); Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1"); Op0 = IsLE ? HiLd : LoLd; Op1 = IsLE ? LoLd : HiLd; Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1"); Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType()); if (IsLE) { SmallVector Consts; for (int Idx = 0; Idx < 16; Idx++) { int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1) : 16 - (NumBytes - Idx); Consts.push_back(Val); } return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy), Zero, Consts); } SmallVector Consts; for (int Idx = 0; Idx < 16; Idx++) Consts.push_back(Builder.getInt8(NumBytes + Idx)); Value *Mask2 = ConstantVector::get(Consts); return Builder.CreateBitCast( Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy); } case PPC::BI__builtin_vsx_strmb: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); int64_t NumBytes = cast(Op1)->getZExtValue(); bool IsLE = getTarget().isLittleEndian(); auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) { // Storing the whole vector, simply store it on BE and reverse bytes and // store on LE. if (Width == 16) { Value *StVec = Op2; if (IsLE) { SmallVector RevMask; for (int Idx = 0; Idx < 16; Idx++) RevMask.push_back(15 - Idx); StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask); } return Builder.CreateStore( StVec, Address(Op0, Op2->getType(), CharUnits::fromQuantity(1))); } auto *ConvTy = Int64Ty; unsigned NumElts = 0; switch (Width) { default: llvm_unreachable("width for stores must be a power of 2"); case 8: ConvTy = Int64Ty; NumElts = 2; break; case 4: ConvTy = Int32Ty; NumElts = 4; break; case 2: ConvTy = Int16Ty; NumElts = 8; break; case 1: ConvTy = Int8Ty; NumElts = 16; break; } Value *Vec = Builder.CreateBitCast( Op2, llvm::FixedVectorType::get(ConvTy, NumElts)); Value *Ptr = Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset)); Value *Elt = Builder.CreateExtractElement(Vec, EltNo); if (IsLE && Width > 1) { Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy); Elt = Builder.CreateCall(F, Elt); } return Builder.CreateStore( Elt, Address(Ptr, ConvTy, CharUnits::fromQuantity(1))); }; unsigned Stored = 0; unsigned RemainingBytes = NumBytes; Value *Result; if (NumBytes == 16) return StoreSubVec(16, 0, 0); if (NumBytes >= 8) { Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1); RemainingBytes -= 8; Stored += 8; } if (RemainingBytes >= 4) { Result = StoreSubVec(4, NumBytes - Stored - 4, IsLE ? (Stored >> 2) : 3 - (Stored >> 2)); RemainingBytes -= 4; Stored += 4; } if (RemainingBytes >= 2) { Result = StoreSubVec(2, NumBytes - Stored - 2, IsLE ? (Stored >> 1) : 7 - (Stored >> 1)); RemainingBytes -= 2; Stored += 2; } if (RemainingBytes) Result = StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored); return Result; } // Square root case PPC::BI__builtin_vsx_xvsqrtsp: case PPC::BI__builtin_vsx_xvsqrtdp: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); if (Builder.getIsFPConstrained()) { llvm::Function *F = CGM.getIntrinsic( Intrinsic::experimental_constrained_sqrt, ResultType); return Builder.CreateConstrainedFPCall(F, X); } else { llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); return Builder.CreateCall(F, X); } } // Count leading zeros case PPC::BI__builtin_altivec_vclzb: case PPC::BI__builtin_altivec_vclzh: case PPC::BI__builtin_altivec_vclzw: case PPC::BI__builtin_altivec_vclzd: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); return Builder.CreateCall(F, {X, Undef}); } case PPC::BI__builtin_altivec_vctzb: case PPC::BI__builtin_altivec_vctzh: case PPC::BI__builtin_altivec_vctzw: case PPC::BI__builtin_altivec_vctzd: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); return Builder.CreateCall(F, {X, Undef}); } case PPC::BI__builtin_altivec_vinsd: case PPC::BI__builtin_altivec_vinsw: case PPC::BI__builtin_altivec_vinsd_elt: case PPC::BI__builtin_altivec_vinsw_elt: { llvm::Type *ResultType = ConvertType(E->getType()); Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw || BuiltinID == PPC::BI__builtin_altivec_vinsd); bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw || BuiltinID == PPC::BI__builtin_altivec_vinsw_elt); // The third argument must be a compile time constant. ConstantInt *ArgCI = dyn_cast(Op2); assert(ArgCI && "Third Arg to vinsw/vinsd intrinsic must be a constant integer!"); // Valid value for the third argument is dependent on the input type and // builtin called. int ValidMaxValue = 0; if (IsUnaligned) ValidMaxValue = (Is32bit) ? 12 : 8; else ValidMaxValue = (Is32bit) ? 3 : 1; // Get value of third argument. int64_t ConstArg = ArgCI->getSExtValue(); // Compose range checking error message. std::string RangeErrMsg = IsUnaligned ? "byte" : "element"; RangeErrMsg += " number " + llvm::to_string(ConstArg); RangeErrMsg += " is outside of the valid range [0, "; RangeErrMsg += llvm::to_string(ValidMaxValue) + "]"; // Issue error if third argument is not within the valid range. if (ConstArg < 0 || ConstArg > ValidMaxValue) CGM.Error(E->getExprLoc(), RangeErrMsg); // Input to vec_replace_elt is an element index, convert to byte index. if (!IsUnaligned) { ConstArg *= Is32bit ? 4 : 8; // Fix the constant according to endianess. if (getTarget().isLittleEndian()) ConstArg = (Is32bit ? 12 : 8) - ConstArg; } ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd; Op2 = ConstantInt::getSigned(Int32Ty, ConstArg); // Casting input to vector int as per intrinsic definition. Op0 = Is32bit ? Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4)) : Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2)); return Builder.CreateBitCast( Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType); } case PPC::BI__builtin_altivec_vadduqm: case PPC::BI__builtin_altivec_vsubuqm: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1)); Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1)); if (BuiltinID == PPC::BI__builtin_altivec_vadduqm) return Builder.CreateAdd(Op0, Op1, "vadduqm"); else return Builder.CreateSub(Op0, Op1, "vsubuqm"); } case PPC::BI__builtin_altivec_vaddcuq_c: case PPC::BI__builtin_altivec_vsubcuq_c: { SmallVector Ops; Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); llvm::Type *V1I128Ty = llvm::FixedVectorType::get( llvm::IntegerType::get(getLLVMContext(), 128), 1); Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty)); Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty)); ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c) ? Intrinsic::ppc_altivec_vaddcuq : Intrinsic::ppc_altivec_vsubcuq; return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, ""); } case PPC::BI__builtin_altivec_vaddeuqm_c: case PPC::BI__builtin_altivec_vaddecuq_c: case PPC::BI__builtin_altivec_vsubeuqm_c: case PPC::BI__builtin_altivec_vsubecuq_c: { SmallVector Ops; Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); llvm::Type *V1I128Ty = llvm::FixedVectorType::get( llvm::IntegerType::get(getLLVMContext(), 128), 1); Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty)); Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty)); Ops.push_back(Builder.CreateBitCast(Op2, V1I128Ty)); switch (BuiltinID) { default: llvm_unreachable("Unsupported intrinsic!"); case PPC::BI__builtin_altivec_vaddeuqm_c: ID = Intrinsic::ppc_altivec_vaddeuqm; break; case PPC::BI__builtin_altivec_vaddecuq_c: ID = Intrinsic::ppc_altivec_vaddecuq; break; case PPC::BI__builtin_altivec_vsubeuqm_c: ID = Intrinsic::ppc_altivec_vsubeuqm; break; case PPC::BI__builtin_altivec_vsubecuq_c: ID = Intrinsic::ppc_altivec_vsubecuq; break; } return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, ""); } case PPC::BI__builtin_ppc_rldimi: case PPC::BI__builtin_ppc_rlwimi: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); Value *Op3 = EmitScalarExpr(E->getArg(3)); // rldimi is 64-bit instruction, expand the intrinsic before isel to // leverage peephole and avoid legalization efforts. if (BuiltinID == PPC::BI__builtin_ppc_rldimi && !getTarget().getTriple().isPPC64()) { Function *F = CGM.getIntrinsic(Intrinsic::fshl, Op0->getType()); Op2 = Builder.CreateZExt(Op2, Int64Ty); Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2}); return Builder.CreateOr(Builder.CreateAnd(Shift, Op3), Builder.CreateAnd(Op1, Builder.CreateNot(Op3))); } return Builder.CreateCall( CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi ? Intrinsic::ppc_rldimi : Intrinsic::ppc_rlwimi), {Op0, Op1, Op2, Op3}); } case PPC::BI__builtin_ppc_rlwnm: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm), {Op0, Op1, Op2}); } case PPC::BI__builtin_ppc_poppar4: case PPC::BI__builtin_ppc_poppar8: { Value *Op0 = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = Op0->getType(); Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); Value *Tmp = Builder.CreateCall(F, Op0); llvm::Type *ResultType = ConvertType(E->getType()); Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); if (Result->getType() != ResultType) Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, "cast"); return Result; } case PPC::BI__builtin_ppc_cmpb: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); if (getTarget().getTriple().isPPC64()) { Function *F = CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty}); return Builder.CreateCall(F, {Op0, Op1}, "cmpb"); } // For 32 bit, emit the code as below: // %conv = trunc i64 %a to i32 // %conv1 = trunc i64 %b to i32 // %shr = lshr i64 %a, 32 // %conv2 = trunc i64 %shr to i32 // %shr3 = lshr i64 %b, 32 // %conv4 = trunc i64 %shr3 to i32 // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1) // %conv5 = zext i32 %0 to i64 // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4) // %conv614 = zext i32 %1 to i64 // %shl = shl nuw i64 %conv614, 32 // %or = or i64 %shl, %conv5 // ret i64 %or Function *F = CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty}); Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty); Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty); Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32); Value *ArgOneHi = Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty); Value *ArgTwoHi = Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty); Value *ResLo = Builder.CreateZExt( Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty); Value *ResHiShift = Builder.CreateZExt( Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty); Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt); return Builder.CreateOr(ResLo, ResHi); } // Copy sign case PPC::BI__builtin_vsx_xvcpsgnsp: case PPC::BI__builtin_vsx_xvcpsgndp: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); Value *Y = EmitScalarExpr(E->getArg(1)); ID = Intrinsic::copysign; llvm::Function *F = CGM.getIntrinsic(ID, ResultType); return Builder.CreateCall(F, {X, Y}); } // Rounding/truncation case PPC::BI__builtin_vsx_xvrspip: case PPC::BI__builtin_vsx_xvrdpip: case PPC::BI__builtin_vsx_xvrdpim: case PPC::BI__builtin_vsx_xvrspim: case PPC::BI__builtin_vsx_xvrdpi: case PPC::BI__builtin_vsx_xvrspi: case PPC::BI__builtin_vsx_xvrdpic: case PPC::BI__builtin_vsx_xvrspic: case PPC::BI__builtin_vsx_xvrdpiz: case PPC::BI__builtin_vsx_xvrspiz: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim || BuiltinID == PPC::BI__builtin_vsx_xvrspim) ID = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_floor : Intrinsic::floor; else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi || BuiltinID == PPC::BI__builtin_vsx_xvrspi) ID = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_round : Intrinsic::round; else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic || BuiltinID == PPC::BI__builtin_vsx_xvrspic) ID = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_rint : Intrinsic::rint; else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip || BuiltinID == PPC::BI__builtin_vsx_xvrspip) ID = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_ceil : Intrinsic::ceil; else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz || BuiltinID == PPC::BI__builtin_vsx_xvrspiz) ID = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_trunc : Intrinsic::trunc; llvm::Function *F = CGM.getIntrinsic(ID, ResultType); return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X) : Builder.CreateCall(F, X); } // Absolute value case PPC::BI__builtin_vsx_xvabsdp: case PPC::BI__builtin_vsx_xvabssp: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); return Builder.CreateCall(F, X); } // Fastmath by default case PPC::BI__builtin_ppc_recipdivf: case PPC::BI__builtin_ppc_recipdivd: case PPC::BI__builtin_ppc_rsqrtf: case PPC::BI__builtin_ppc_rsqrtd: { FastMathFlags FMF = Builder.getFastMathFlags(); Builder.getFastMathFlags().setFast(); llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); if (BuiltinID == PPC::BI__builtin_ppc_recipdivf || BuiltinID == PPC::BI__builtin_ppc_recipdivd) { Value *Y = EmitScalarExpr(E->getArg(1)); Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv"); Builder.getFastMathFlags() &= (FMF); return FDiv; } auto *One = ConstantFP::get(ResultType, 1.0); llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt"); Builder.getFastMathFlags() &= (FMF); return FDiv; } case PPC::BI__builtin_ppc_alignx: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); ConstantInt *AlignmentCI = cast(Op0); if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment)) AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(), llvm::Value::MaximumAlignment); emitAlignmentAssumption(Op1, E->getArg(1), /*The expr loc is sufficient.*/ SourceLocation(), AlignmentCI, nullptr); return Op1; } case PPC::BI__builtin_ppc_rdlam: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); llvm::Type *Ty = Op0->getType(); Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false); Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty); Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt}); return Builder.CreateAnd(Rotate, Op2); } case PPC::BI__builtin_ppc_load2r: { Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r); Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *LoadIntrinsic = Builder.CreateCall(F, {Op0}); return Builder.CreateTrunc(LoadIntrinsic, Int16Ty); } // FMA variations case PPC::BI__builtin_ppc_fnmsub: case PPC::BI__builtin_ppc_fnmsubs: case PPC::BI__builtin_vsx_xvmaddadp: case PPC::BI__builtin_vsx_xvmaddasp: case PPC::BI__builtin_vsx_xvnmaddadp: case PPC::BI__builtin_vsx_xvnmaddasp: case PPC::BI__builtin_vsx_xvmsubadp: case PPC::BI__builtin_vsx_xvmsubasp: case PPC::BI__builtin_vsx_xvnmsubadp: case PPC::BI__builtin_vsx_xvnmsubasp: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); Value *Y = EmitScalarExpr(E->getArg(1)); Value *Z = EmitScalarExpr(E->getArg(2)); llvm::Function *F; if (Builder.getIsFPConstrained()) F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType); else F = CGM.getIntrinsic(Intrinsic::fma, ResultType); switch (BuiltinID) { case PPC::BI__builtin_vsx_xvmaddadp: case PPC::BI__builtin_vsx_xvmaddasp: if (Builder.getIsFPConstrained()) return Builder.CreateConstrainedFPCall(F, {X, Y, Z}); else return Builder.CreateCall(F, {X, Y, Z}); case PPC::BI__builtin_vsx_xvnmaddadp: case PPC::BI__builtin_vsx_xvnmaddasp: if (Builder.getIsFPConstrained()) return Builder.CreateFNeg( Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg"); else return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg"); case PPC::BI__builtin_vsx_xvmsubadp: case PPC::BI__builtin_vsx_xvmsubasp: if (Builder.getIsFPConstrained()) return Builder.CreateConstrainedFPCall( F, {X, Y, Builder.CreateFNeg(Z, "neg")}); else return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}); case PPC::BI__builtin_ppc_fnmsub: case PPC::BI__builtin_ppc_fnmsubs: case PPC::BI__builtin_vsx_xvnmsubadp: case PPC::BI__builtin_vsx_xvnmsubasp: if (Builder.getIsFPConstrained()) return Builder.CreateFNeg( Builder.CreateConstrainedFPCall( F, {X, Y, Builder.CreateFNeg(Z, "neg")}), "neg"); else return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z}); } llvm_unreachable("Unknown FMA operation"); return nullptr; // Suppress no-return warning } case PPC::BI__builtin_vsx_insertword: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw); // Third argument is a compile time constant int. It must be clamped to // to the range [0, 12]. ConstantInt *ArgCI = dyn_cast(Op2); assert(ArgCI && "Third arg to xxinsertw intrinsic must be constant integer"); const int64_t MaxIndex = 12; int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex); // The builtin semantics don't exactly match the xxinsertw instructions // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the // word from the first argument, and inserts it in the second argument. The // instruction extracts the word from its second input register and inserts // it into its first input register, so swap the first and second arguments. std::swap(Op0, Op1); // Need to cast the second argument from a vector of unsigned int to a // vector of long long. Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2)); if (getTarget().isLittleEndian()) { // Reverse the double words in the vector we will extract from. Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2)); Op0 = Builder.CreateShuffleVector(Op0, Op0, {1, 0}); // Reverse the index. Index = MaxIndex - Index; } // Intrinsic expects the first arg to be a vector of int. Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4)); Op2 = ConstantInt::getSigned(Int32Ty, Index); return Builder.CreateCall(F, {Op0, Op1, Op2}); } case PPC::BI__builtin_vsx_extractuword: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw); // Intrinsic expects the first argument to be a vector of doublewords. Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2)); // The second argument is a compile time constant int that needs to // be clamped to the range [0, 12]. ConstantInt *ArgCI = dyn_cast(Op1); assert(ArgCI && "Second Arg to xxextractuw intrinsic must be a constant integer!"); const int64_t MaxIndex = 12; int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex); if (getTarget().isLittleEndian()) { // Reverse the index. Index = MaxIndex - Index; Op1 = ConstantInt::getSigned(Int32Ty, Index); // Emit the call, then reverse the double words of the results vector. Value *Call = Builder.CreateCall(F, {Op0, Op1}); Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, {1, 0}); return ShuffleCall; } else { Op1 = ConstantInt::getSigned(Int32Ty, Index); return Builder.CreateCall(F, {Op0, Op1}); } } case PPC::BI__builtin_vsx_xxpermdi: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); ConstantInt *ArgCI = dyn_cast(Op2); assert(ArgCI && "Third arg must be constant integer!"); unsigned Index = ArgCI->getZExtValue(); Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2)); Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2)); // Account for endianness by treating this as just a shuffle. So we use the // same indices for both LE and BE in order to produce expected results in // both cases. int ElemIdx0 = (Index & 2) >> 1; int ElemIdx1 = 2 + (Index & 1); int ShuffleElts[2] = {ElemIdx0, ElemIdx1}; Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts); QualType BIRetType = E->getType(); auto RetTy = ConvertType(BIRetType); return Builder.CreateBitCast(ShuffleCall, RetTy); } case PPC::BI__builtin_vsx_xxsldwi: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); ConstantInt *ArgCI = dyn_cast(Op2); assert(ArgCI && "Third argument must be a compile time constant"); unsigned Index = ArgCI->getZExtValue() & 0x3; Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4)); Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4)); // Create a shuffle mask int ElemIdx0; int ElemIdx1; int ElemIdx2; int ElemIdx3; if (getTarget().isLittleEndian()) { // Little endian element N comes from element 8+N-Index of the // concatenated wide vector (of course, using modulo arithmetic on // the total number of elements). ElemIdx0 = (8 - Index) % 8; ElemIdx1 = (9 - Index) % 8; ElemIdx2 = (10 - Index) % 8; ElemIdx3 = (11 - Index) % 8; } else { // Big endian ElemIdx = Index + N ElemIdx0 = Index; ElemIdx1 = Index + 1; ElemIdx2 = Index + 2; ElemIdx3 = Index + 3; } int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3}; Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts); QualType BIRetType = E->getType(); auto RetTy = ConvertType(BIRetType); return Builder.CreateBitCast(ShuffleCall, RetTy); } case PPC::BI__builtin_pack_vector_int128: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); bool isLittleEndian = getTarget().isLittleEndian(); Value *PoisonValue = llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->getType(), 2)); Value *Res = Builder.CreateInsertElement( PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0)); Res = Builder.CreateInsertElement(Res, Op1, (uint64_t)(isLittleEndian ? 0 : 1)); return Builder.CreateBitCast(Res, ConvertType(E->getType())); } case PPC::BI__builtin_unpack_vector_int128: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); ConstantInt *Index = cast(Op1); Value *Unpacked = Builder.CreateBitCast( Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2)); if (getTarget().isLittleEndian()) Index = ConstantInt::get(Index->getIntegerType(), 1 - Index->getZExtValue()); return Builder.CreateExtractElement(Unpacked, Index); } case PPC::BI__builtin_ppc_sthcx: { llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx); Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty); return Builder.CreateCall(F, {Op0, Op1}); } // The PPC MMA builtins take a pointer to a __vector_quad as an argument. // Some of the MMA instructions accumulate their result into an existing // accumulator whereas the others generate a new accumulator. So we need to // use custom code generation to expand a builtin call with a pointer to a // load (if the corresponding instruction accumulates its result) followed by // the call to the intrinsic and a store of the result. #define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \ case PPC::BI__builtin_##Name: #include "clang/Basic/BuiltinsPPC.def" { SmallVector Ops; for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) if (E->getArg(i)->getType()->isArrayType()) Ops.push_back( EmitArrayToPointerDecay(E->getArg(i)).emitRawPointer(*this)); else Ops.push_back(EmitScalarExpr(E->getArg(i))); // The first argument of these two builtins is a pointer used to store their // result. However, the llvm intrinsics return their result in multiple // return values. So, here we emit code extracting these values from the // intrinsic results and storing them using that pointer. if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc || BuiltinID == PPC::BI__builtin_vsx_disassemble_pair || BuiltinID == PPC::BI__builtin_mma_disassemble_pair) { unsigned NumVecs = 2; auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair; if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) { NumVecs = 4; Intrinsic = Intrinsic::ppc_mma_disassemble_acc; } llvm::Function *F = CGM.getIntrinsic(Intrinsic); Address Addr = EmitPointerWithAlignment(E->getArg(1)); Value *Vec = Builder.CreateLoad(Addr); Value *Call = Builder.CreateCall(F, {Vec}); llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16); Value *Ptr = Ops[0]; for (unsigned i=0; i CallOps; if (Accumulate) { Address Addr = EmitPointerWithAlignment(E->getArg(0)); Value *Acc = Builder.CreateLoad(Addr); CallOps.push_back(Acc); } if (BuiltinID == PPC::BI__builtin_mma_dmmr || BuiltinID == PPC::BI__builtin_mma_dmxor) { Address Addr = EmitPointerWithAlignment(E->getArg(1)); Ops[1] = Builder.CreateLoad(Addr); } for (unsigned i=1; igetArg(0)); Address OldValAddr = EmitPointerWithAlignment(E->getArg(1)); Value *OldVal = Builder.CreateLoad(OldValAddr); QualType AtomicTy = E->getArg(0)->getType()->getPointeeType(); LValue LV = MakeAddrLValue(Addr, AtomicTy); Value *Op2 = EmitScalarExpr(E->getArg(2)); auto Pair = EmitAtomicCompareExchange( LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(), llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true); // Unlike c11's atomic_compare_exchange, according to // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp // > In either case, the contents of the memory location specified by addr // > are copied into the memory location specified by old_val_addr. // But it hasn't specified storing to OldValAddr is atomic or not and // which order to use. Now following XL's codegen, treat it as a normal // store. Value *LoadedVal = Pair.first.getScalarVal(); Builder.CreateStore(LoadedVal, OldValAddr); return Builder.CreateZExt(Pair.second, Builder.getInt32Ty()); } case PPC::BI__builtin_ppc_fetch_and_add: case PPC::BI__builtin_ppc_fetch_and_addlp: { return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, llvm::AtomicOrdering::Monotonic); } case PPC::BI__builtin_ppc_fetch_and_and: case PPC::BI__builtin_ppc_fetch_and_andlp: { return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, llvm::AtomicOrdering::Monotonic); } case PPC::BI__builtin_ppc_fetch_and_or: case PPC::BI__builtin_ppc_fetch_and_orlp: { return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, llvm::AtomicOrdering::Monotonic); } case PPC::BI__builtin_ppc_fetch_and_swap: case PPC::BI__builtin_ppc_fetch_and_swaplp: { return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, llvm::AtomicOrdering::Monotonic); } case PPC::BI__builtin_ppc_ldarx: case PPC::BI__builtin_ppc_lwarx: case PPC::BI__builtin_ppc_lharx: case PPC::BI__builtin_ppc_lbarx: return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E); case PPC::BI__builtin_ppc_mfspr: { Value *Op0 = EmitScalarExpr(E->getArg(0)); llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32 ? Int32Ty : Int64Ty; Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType); return Builder.CreateCall(F, {Op0}); } case PPC::BI__builtin_ppc_mtspr: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32 ? Int32Ty : Int64Ty; Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType); return Builder.CreateCall(F, {Op0, Op1}); } case PPC::BI__builtin_ppc_popcntb: { Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType}); return Builder.CreateCall(F, {ArgValue}, "popcntb"); } case PPC::BI__builtin_ppc_mtfsf: { // The builtin takes a uint32 that needs to be cast to an // f64 to be passed to the intrinsic. Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Cast = Builder.CreateUIToFP(Op1, DoubleTy); llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf); return Builder.CreateCall(F, {Op0, Cast}, ""); } case PPC::BI__builtin_ppc_swdiv_nochk: case PPC::BI__builtin_ppc_swdivs_nochk: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); FastMathFlags FMF = Builder.getFastMathFlags(); Builder.getFastMathFlags().setFast(); Value *FDiv = Builder.CreateFDiv(Op0, Op1, "swdiv_nochk"); Builder.getFastMathFlags() &= (FMF); return FDiv; } case PPC::BI__builtin_ppc_fric: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::rint, Intrinsic::experimental_constrained_rint)) .getScalarVal(); case PPC::BI__builtin_ppc_frim: case PPC::BI__builtin_ppc_frims: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::floor, Intrinsic::experimental_constrained_floor)) .getScalarVal(); case PPC::BI__builtin_ppc_frin: case PPC::BI__builtin_ppc_frins: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::round, Intrinsic::experimental_constrained_round)) .getScalarVal(); case PPC::BI__builtin_ppc_frip: case PPC::BI__builtin_ppc_frips: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::ceil, Intrinsic::experimental_constrained_ceil)) .getScalarVal(); case PPC::BI__builtin_ppc_friz: case PPC::BI__builtin_ppc_frizs: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::trunc, Intrinsic::experimental_constrained_trunc)) .getScalarVal(); case PPC::BI__builtin_ppc_fsqrt: case PPC::BI__builtin_ppc_fsqrts: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt)) .getScalarVal(); case PPC::BI__builtin_ppc_test_data_class: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()), {Op0, Op1}, "test_data_class"); } case PPC::BI__builtin_ppc_maxfe: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); Value *Op3 = EmitScalarExpr(E->getArg(3)); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe), {Op0, Op1, Op2, Op3}); } case PPC::BI__builtin_ppc_maxfl: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); Value *Op3 = EmitScalarExpr(E->getArg(3)); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl), {Op0, Op1, Op2, Op3}); } case PPC::BI__builtin_ppc_maxfs: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); Value *Op3 = EmitScalarExpr(E->getArg(3)); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs), {Op0, Op1, Op2, Op3}); } case PPC::BI__builtin_ppc_minfe: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); Value *Op3 = EmitScalarExpr(E->getArg(3)); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe), {Op0, Op1, Op2, Op3}); } case PPC::BI__builtin_ppc_minfl: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); Value *Op3 = EmitScalarExpr(E->getArg(3)); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl), {Op0, Op1, Op2, Op3}); } case PPC::BI__builtin_ppc_minfs: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); Value *Op3 = EmitScalarExpr(E->getArg(3)); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs), {Op0, Op1, Op2, Op3}); } case PPC::BI__builtin_ppc_swdiv: case PPC::BI__builtin_ppc_swdivs: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); return Builder.CreateFDiv(Op0, Op1, "swdiv"); } case PPC::BI__builtin_ppc_set_fpscr_rn: return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd), {EmitScalarExpr(E->getArg(0))}); case PPC::BI__builtin_ppc_mffs: return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm)); } }