diff options
Diffstat (limited to 'llvm/lib')
26 files changed, 201 insertions, 86 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 8676060..cf221bb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16481,10 +16481,34 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { DAG, DL); } break; - case ISD::AVGFLOORS: - case ISD::AVGFLOORU: case ISD::AVGCEILS: case ISD::AVGCEILU: + // trunc (avgceilu (sext (x), sext (y))) -> avgceils(x, y) + // trunc (avgceils (zext (x), zext (y))) -> avgceilu(x, y) + if (N0.hasOneUse()) { + SDValue Op0 = N0.getOperand(0); + SDValue Op1 = N0.getOperand(1); + if (N0.getOpcode() == ISD::AVGCEILU) { + if (TLI.isOperationLegalOrCustom(ISD::AVGCEILS, VT) && + Op0.getOpcode() == ISD::SIGN_EXTEND && + Op1.getOpcode() == ISD::SIGN_EXTEND && + Op0.getOperand(0).getValueType() == VT && + Op1.getOperand(0).getValueType() == VT) + return DAG.getNode(ISD::AVGCEILS, DL, VT, Op0.getOperand(0), + Op1.getOperand(0)); + } else { + if (TLI.isOperationLegalOrCustom(ISD::AVGCEILU, VT) && + Op0.getOpcode() == ISD::ZERO_EXTEND && + Op1.getOpcode() == ISD::ZERO_EXTEND && + Op0.getOperand(0).getValueType() == VT && + Op1.getOperand(0).getValueType() == VT) + return DAG.getNode(ISD::AVGCEILU, DL, VT, Op0.getOperand(0), + Op1.getOperand(0)); + } + } + [[fallthrough]]; + case ISD::AVGFLOORS: + case ISD::AVGFLOORU: case ISD::ABDS: case ISD::ABDU: // (trunc (avg a, b)) -> (avg (trunc a), (trunc b)) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 1c8383c..54d94b1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1466,6 +1466,13 @@ def FeatureClusters : SubtargetFeature< "clusters", "Has clusters of workgroups support" >; +def FeatureWaitsBeforeSystemScopeStores : SubtargetFeature< + "waits-before-system-scope-stores", + "RequiresWaitsBeforeSystemScopeStores", + "true", + "Target requires waits for loads and atomics before system scope stores" +>; + // Dummy feature used to disable assembler instructions. def FeatureDisable : SubtargetFeature<"", "FeatureDisable","true", @@ -2060,7 +2067,8 @@ def FeatureISAVersion12 : FeatureSet< FeatureMaxHardClauseLength32, Feature1_5xVGPRs, FeatureMemoryAtomicFAddF32DenormalSupport, - FeatureBVHDualAndBVH8Insts + FeatureBVHDualAndBVH8Insts, + FeatureWaitsBeforeSystemScopeStores, ]>; def FeatureISAVersion12_50 : FeatureSet< diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index ac660d5..f377b8a 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -290,6 +290,7 @@ protected: bool Has45BitNumRecordsBufferResource = false; bool HasClusters = false; + bool RequiresWaitsBeforeSystemScopeStores = false; // Dummy feature to use for assembler in tablegen. bool FeatureDisable = false; @@ -1861,6 +1862,10 @@ public: bool has45BitNumRecordsBufferResource() const { return Has45BitNumRecordsBufferResource; } + + bool requiresWaitsBeforeSystemScopeStores() const { + return RequiresWaitsBeforeSystemScopeStores; + } }; class GCNUserSGPRUsageInfo { diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index a177a42..6ab8d552 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -2673,7 +2673,8 @@ bool SIGfx12CacheControl::finalizeStore(MachineInstr &MI, bool Atomic) const { const unsigned Scope = CPol->getImm() & CPol::SCOPE; // GFX12.0 only: Extra waits needed before system scope stores. - if (!ST.hasGFX1250Insts() && !Atomic && Scope == CPol::SCOPE_SYS) + if (ST.requiresWaitsBeforeSystemScopeStores() && !Atomic && + Scope == CPol::SCOPE_SYS) Changed |= insertWaitsBeforeSystemScopeStore(MI.getIterator()); return Changed; diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td index 92af04a..4695a6f 100644 --- a/llvm/lib/Target/PowerPC/P10InstrResources.td +++ b/llvm/lib/Target/PowerPC/P10InstrResources.td @@ -825,8 +825,7 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read], def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read], (instrs SRADI_rec, - SRAWI_rec, - SRAWI8_rec + SRAWI8_rec, SRAWI_rec )>; // Single crack instructions @@ -834,8 +833,7 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read], def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read, P10F2_Read], (instrs SRAD_rec, - SRAW_rec, - SRAW8_rec + SRAW8_rec, SRAW_rec )>; // 2-way crack instructions @@ -883,7 +881,7 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY], // 3 Cycles ALU operations, 1 input operands def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read], (instrs - ADDI, ADDI8, ADDIdtprelL32, ADDItlsldLADDR32, ADDItocL, ADDItocL8, LI, LI8, + ADDI, ADDI8, ADDIdtprelL32, ADDItlsldLADDR32, ADDItocL, LI, LI8, ADDIC, ADDIC8, ADDIS, ADDIS8, ADDISdtprelHA32, ADDIStocHA, ADDIStocHA8, LIS, LIS8, ADDME, ADDME8, @@ -1864,7 +1862,7 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read, P10ST_Read] (instrs CP_PASTE8_rec, CP_PASTE_rec, SLBIEG, - TLBIE + TLBIE, TLBIE8P9, TLBIEP9 )>; // Single crack instructions @@ -1886,8 +1884,7 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read, P10ST_Read, def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY], (instrs ISYNC, - SYNCP10, - SYNC + SYNC, SYNCP10 )>; // Expand instructions diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td index 801ae83..3f5f7d3 100644 --- a/llvm/lib/Target/PowerPC/P9InstrResources.td +++ b/llvm/lib/Target/PowerPC/P9InstrResources.td @@ -905,7 +905,7 @@ def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C], SLBIEG, STMW, STSWI, - TLBIE + TLBIE, TLBIEP9, TLBIE8P9 )>; // Vector Store Instruction diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index 4ff2f8a..5d9ec4a 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -409,6 +409,7 @@ def HasP10Vector : Predicate<"Subtarget->hasP10Vector()">; def IsISA2_06 : Predicate<"Subtarget->isISA2_06()">; def IsISA2_07 : Predicate<"Subtarget->isISA2_07()">; def IsISA3_0 : Predicate<"Subtarget->isISA3_0()">; +def IsNotISA3_0 : Predicate<"!Subtarget->isISA3_0()">; def IsISA3_1 : Predicate<"Subtarget->isISA3_1()">; def IsNotISA3_1 : Predicate<"!Subtarget->isISA3_1()">; def IsISAFuture : Predicate<"Subtarget->isISAFuture()">; diff --git a/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def b/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def index 6bb66bc..043c9e4 100644 --- a/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def +++ b/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def @@ -29,7 +29,7 @@ FUSION_FEATURE(GeneralBack2Back, hasBack2BackFusion, -1, ADDIStocHA8, ADDIdtprelL32, ADDItlsldLADDR32, - ADDItocL8, + ADDItocL, ADDME, ADDME8, ADDME8O, @@ -209,7 +209,9 @@ FUSION_FEATURE(GeneralBack2Back, hasBack2BackFusion, -1, SRADI, SRADI_32, SRAW, + SRAW8, SRAWI, + SRAWI8, SRD, SRD_rec, SRW, @@ -518,7 +520,7 @@ FUSION_FEATURE(GeneralBack2Back, hasBack2BackFusion, -1, ADDIStocHA8, ADDIdtprelL32, ADDItlsldLADDR32, - ADDItocL8, + ADDItocL, ADDME, ADDME8, ADDME8O, @@ -747,7 +749,9 @@ FUSION_FEATURE(GeneralBack2Back, hasBack2BackFusion, -1, SRADI, SRADI_32, SRAW, + SRAW8, SRAWI, + SRAWI8, SRD, SRD_rec, SRW, diff --git a/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/llvm/lib/Target/PowerPC/PPCInstrFormats.td index fba1c66..98c5f09 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrFormats.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFormats.td @@ -850,6 +850,26 @@ class XForm_45<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, let Inst{31} = 0; } +class XForm_RSB5_UIMM2_2UIMM1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmstr, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, NoItinerary> { + + bits<5> RS; + bits<5> RB; + bits<2> RIC; + bits<1> PRS; + bits<1> R; + + let Pattern = pattern; + + let Inst{6...10} = RS; + let Inst{12...13} = RIC; + let Inst{14} = PRS; + let Inst{15} = R; + let Inst{16...20} = RB; + let Inst{21...30} = xo; +} + class X_FRT5_XO2_XO3_XO10<bits<6> opcode, bits<2> xo1, bits<3> xo2, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 44d1a44..f399811 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -4321,7 +4321,22 @@ def TLBLI : XForm_16b<31, 1010, (outs), (ins gprc:$RB), "tlbli $RB", IIC_LdStLoad, []>, Requires<[IsPPC6xx]>; def TLBIE : XForm_26<31, 306, (outs), (ins gprc:$RST, gprc:$RB), - "tlbie $RB,$RST", IIC_SprTLBIE, []>; + "tlbie $RB, $RST", IIC_SprTLBIE, []>, + Requires<[IsNotISA3_0]>; + +let Predicates = [IsISA3_0] in { + def TLBIEP9 : XForm_RSB5_UIMM2_2UIMM1<31, 306, (outs), + (ins gprc:$RB, gprc:$RS, u2imm:$RIC, + u1imm:$PRS, u1imm:$R), + "tlbie $RB, $RS, $RIC, $PRS, $R", []>; + let Interpretation64Bit = 1, isCodeGenOnly = 1 in { + def TLBIE8P9 + : XForm_RSB5_UIMM2_2UIMM1<31, 306, (outs), + (ins g8rc:$RB, g8rc:$RS, u2imm:$RIC, + u1imm:$PRS, u1imm:$R), + "tlbie $RB, $RS, $RIC, $PRS, $R", []>; + } +} def TLBSX : XForm_tlb<914, (outs), (ins gprc:$RA, gprc:$RB), "tlbsx $RA, $RB", IIC_LdStLoad>, Requires<[IsBookE]>; @@ -4669,7 +4684,11 @@ def : InstAlias<"mficcr $Rx", (MFSPR gprc:$Rx, 1019)>, Requires<[IsPPC4xx]>; } -def : InstAlias<"tlbie $RB", (TLBIE R0, gprc:$RB)>; +def : InstAlias<"tlbie $RB", (TLBIE R0, gprc:$RB)>, Requires<[IsNotISA3_0]>; +let Predicates = [IsISA3_0] in { + def : InstAlias<"tlbie $RB", (TLBIEP9 R0, gprc:$RB, 0, 0, 0)>; + def : InstAlias<"tlbie $RB, $RS", (TLBIEP9 gprc:$RB, gprc:$RS, 0, 0, 0)>; +} def : InstAlias<"tlbrehi $RS, $A", (TLBRE2 gprc:$RS, gprc:$A, 0)>, Requires<[IsPPC4xx]>; diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 706ab2b..51b540a 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -14,7 +14,10 @@ #ifndef LLVM_LIB_TARGET_X86_X86_H #define LLVM_LIB_TARGET_X86_X86_H +#include "llvm/IR/Analysis.h" +#include "llvm/IR/PassManager.h" #include "llvm/Support/CodeGen.h" +#include "llvm/Target/TargetMachine.h" namespace llvm { @@ -162,7 +165,17 @@ FunctionPass *createX86WinEHUnwindV2Pass(); /// The pass transforms load/store <256 x i32> to AMX load/store intrinsics /// or split the data to two <128 x i32>. -FunctionPass *createX86LowerAMXTypePass(); +class X86LowerAMXTypePass : public PassInfoMixin<X86LowerAMXTypePass> { +private: + const TargetMachine *TM; + +public: + X86LowerAMXTypePass(const TargetMachine *TM) : TM(TM) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + static bool isRequired() { return true; } +}; + +FunctionPass *createX86LowerAMXTypeLegacyPass(); /// The pass transforms amx intrinsics to scalar operation if the function has /// optnone attribute or it is O0. diff --git a/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp b/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp index d979517..2c0443d 100644 --- a/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp +++ b/llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp @@ -10,6 +10,7 @@ /// TODO: Port CodeGen passes to new pass manager. //===----------------------------------------------------------------------===// +#include "X86.h" #include "X86ISelDAGToDAG.h" #include "X86TargetMachine.h" diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp index 0ba71ad..8ffd454 100644 --- a/llvm/lib/Target/X86/X86LowerAMXType.cpp +++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp @@ -46,12 +46,14 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Analysis.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsX86.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -64,7 +66,7 @@ using namespace llvm; using namespace PatternMatch; -#define DEBUG_TYPE "lower-amx-type" +#define DEBUG_TYPE "x86-lower-amx-type" static bool isAMXCast(Instruction *II) { return match(II, @@ -137,7 +139,7 @@ static Instruction *getFirstNonAllocaInTheEntryBlock(Function &F) { class ShapeCalculator { private: - TargetMachine *TM = nullptr; + const TargetMachine *TM = nullptr; // In AMX intrinsics we let Shape = {Row, Col}, but the // RealCol = Col / ElementSize. We may use the RealCol @@ -145,7 +147,7 @@ private: std::map<Value *, Value *> Col2Row, Row2Col; public: - ShapeCalculator(TargetMachine *TargetM) : TM(TargetM) {} + ShapeCalculator(const TargetMachine *TargetM) : TM(TargetM) {} std::pair<Value *, Value *> getShape(IntrinsicInst *II, unsigned OpNo); std::pair<Value *, Value *> getShape(PHINode *Phi); Value *getRowFromCol(Instruction *II, Value *V, unsigned Granularity); @@ -1432,8 +1434,58 @@ bool X86LowerAMXCast::transformAllAMXCast() { return Change; } +bool lowerAmxType(Function &F, const TargetMachine *TM, + TargetLibraryInfo *TLI) { + // Performance optimization: most code doesn't use AMX, so return early if + // there are no instructions that produce AMX values. This is sufficient, as + // AMX arguments and constants are not allowed -- so any producer of an AMX + // value must be an instruction. + // TODO: find a cheaper way for this, without looking at all instructions. + if (!containsAMXCode(F)) + return false; + + bool C = false; + ShapeCalculator SC(TM); + X86LowerAMXCast LAC(F, &SC); + C |= LAC.combineAMXcast(TLI); + // There might be remaining AMXcast after combineAMXcast and they should be + // handled elegantly. + C |= LAC.transformAllAMXCast(); + + X86LowerAMXType LAT(F, &SC); + C |= LAT.visit(); + + // Prepare for fast register allocation at O0. + // Todo: May better check the volatile model of AMX code, not just + // by checking Attribute::OptimizeNone and CodeGenOptLevel::None. + if (TM->getOptLevel() == CodeGenOptLevel::None) { + // If Front End not use O0 but the Mid/Back end use O0, (e.g. + // "Clang -O2 -S -emit-llvm t.c" + "llc t.ll") we should make + // sure the amx data is volatile, that is necessary for AMX fast + // register allocation. + if (!F.hasFnAttribute(Attribute::OptimizeNone)) { + X86VolatileTileData VTD(F); + C = VTD.volatileTileData() || C; + } + } + + return C; +} + } // anonymous namespace +PreservedAnalyses X86LowerAMXTypePass::run(Function &F, + FunctionAnalysisManager &FAM) { + TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F); + bool Changed = lowerAmxType(F, TM, &TLI); + if (!Changed) + return PreservedAnalyses::all(); + + PreservedAnalyses PA = PreservedAnalyses::none(); + PA.preserveSet<CFGAnalyses>(); + return PA; +} + namespace { class X86LowerAMXTypeLegacyPass : public FunctionPass { @@ -1443,44 +1495,10 @@ public: X86LowerAMXTypeLegacyPass() : FunctionPass(ID) {} bool runOnFunction(Function &F) override { - // Performance optimization: most code doesn't use AMX, so return early if - // there are no instructions that produce AMX values. This is sufficient, as - // AMX arguments and constants are not allowed -- so any producer of an AMX - // value must be an instruction. - // TODO: find a cheaper way for this, without looking at all instructions. - if (!containsAMXCode(F)) - return false; - - bool C = false; TargetMachine *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); - - ShapeCalculator SC(TM); - X86LowerAMXCast LAC(F, &SC); - C |= LAC.combineAMXcast(TLI); - // There might be remaining AMXcast after combineAMXcast and they should be - // handled elegantly. - C |= LAC.transformAllAMXCast(); - - X86LowerAMXType LAT(F, &SC); - C |= LAT.visit(); - - // Prepare for fast register allocation at O0. - // Todo: May better check the volatile model of AMX code, not just - // by checking Attribute::OptimizeNone and CodeGenOptLevel::None. - if (TM->getOptLevel() == CodeGenOptLevel::None) { - // If Front End not use O0 but the Mid/Back end use O0, (e.g. - // "Clang -O2 -S -emit-llvm t.c" + "llc t.ll") we should make - // sure the amx data is volatile, that is nessary for AMX fast - // register allocation. - if (!F.hasFnAttribute(Attribute::OptimizeNone)) { - X86VolatileTileData VTD(F); - C = VTD.volatileTileData() || C; - } - } - - return C; + return lowerAmxType(F, TM, TLI); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -1501,6 +1519,6 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(X86LowerAMXTypeLegacyPass, DEBUG_TYPE, PassName, false, false) -FunctionPass *llvm::createX86LowerAMXTypePass() { +FunctionPass *llvm::createX86LowerAMXTypeLegacyPass() { return new X86LowerAMXTypeLegacyPass(); } diff --git a/llvm/lib/Target/X86/X86PassRegistry.def b/llvm/lib/Target/X86/X86PassRegistry.def index 3f2a433..fc25d55 100644 --- a/llvm/lib/Target/X86/X86PassRegistry.def +++ b/llvm/lib/Target/X86/X86PassRegistry.def @@ -12,11 +12,16 @@ // NOTE: NO INCLUDE GUARD DESIRED! +#ifndef FUNCTION_PASS +#define FUNCTION_PASS(NAME, CREATE_PASS) +#endif +FUNCTION_PASS("x86-lower-amx-type", X86LowerAMXTypePass(this)) +#undef FUNCTION_PASS + #ifndef DUMMY_FUNCTION_PASS #define DUMMY_FUNCTION_PASS(NAME, CREATE_PASS) #endif DUMMY_FUNCTION_PASS("lower-amx-intrinsics", X86LowerAMXIntrinsics(*this)) -DUMMY_FUNCTION_PASS("lower-amx-type", X86LowerAMXTypePass(*this)) DUMMY_FUNCTION_PASS("x86-partial-reduction", X86PartialReduction()) DUMMY_FUNCTION_PASS("x86-winehstate", WinEHStatePass()) #undef DUMMY_FUNCTION_PASS diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index 8dd6f3d..9a76abc 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -423,7 +423,7 @@ void X86PassConfig::addIRPasses() { // We add both pass anyway and when these two passes run, we skip the pass // based on the option level and option attribute. addPass(createX86LowerAMXIntrinsicsPass()); - addPass(createX86LowerAMXTypePass()); + addPass(createX86LowerAMXTypeLegacyPass()); TargetPassConfig::addIRPasses(); diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index cb6ca72..7c364f8 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -1539,7 +1539,7 @@ void AddressSanitizer::getInterestingMemoryOperands( IID == Intrinsic::experimental_vp_strided_load) { Stride = VPI->getOperand(PtrOpNo + 1); // Use the pointer alignment as the element alignment if the stride is a - // mutiple of the pointer alignment. Otherwise, the element alignment + // multiple of the pointer alignment. Otherwise, the element alignment // should be Align(1). unsigned PointerAlign = Alignment.valueOrOne().value(); if (!isa<ConstantInt>(Stride) || @@ -2399,7 +2399,7 @@ void ModuleAddressSanitizer::instrumentGlobalsELF( // Putting globals in a comdat changes the semantic and potentially cause // false negative odr violations at link time. If odr indicators are used, we - // keep the comdat sections, as link time odr violations will be dectected on + // keep the comdat sections, as link time odr violations will be detected on // the odr indicator symbols. bool UseComdatForGlobalsGC = UseOdrIndicator && !UniqueModuleId.empty(); @@ -3858,7 +3858,7 @@ void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) { I->eraseFromParent(); } - // Replace all uses of AddessReturnedByAlloca with NewAddressPtr. + // Replace all uses of AddressReturnedByAlloca with NewAddressPtr. AI->replaceAllUsesWith(NewAddressPtr); // We are done. Erase old alloca from parent. diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp index 72e8e50..0688bc7 100644 --- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp +++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp @@ -359,7 +359,7 @@ class CHR { unsigned Count = 0; // Find out how many times region R is cloned. Note that if the parent // of R is cloned, R is also cloned, but R's clone count is not updated - // from the clone of the parent. We need to accumlate all the counts + // from the clone of the parent. We need to accumulate all the counts // from the ancestors to get the clone count. while (R) { Count += DuplicationCount[R]; @@ -1513,7 +1513,7 @@ static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, BI->swapSuccessors(); // Don't need to swap this in terms of // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based - // mean whehter the branch is likely go into the if-then rather than + // mean whether the branch is likely go into the if-then rather than // successor0/successor1 and because we can tell which edge is the then or // the else one by comparing the destination to the region exit block. continue; diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp index cf87e35..1e5946a 100644 --- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -83,7 +83,7 @@ static cl::opt<unsigned> // ICP the candidate function even when only a declaration is present. static cl::opt<bool> ICPAllowDecls( "icp-allow-decls", cl::init(false), cl::Hidden, - cl::desc("Promote the target candidate even when the defintion " + cl::desc("Promote the target candidate even when the definition " " is not available")); // ICP hot candidate functions only. When setting to false, non-cold functions diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 5e7548b..7795cce 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -139,7 +139,7 @@ cl::opt<bool> ConditionalCounterUpdate( cl::init(false)); // If the option is not specified, the default behavior about whether -// counter promotion is done depends on how instrumentaiton lowering +// counter promotion is done depends on how instrumentation lowering // pipeline is setup, i.e., the default value of true of this option // does not mean the promotion will be done by default. Explicitly // setting this option can override the default behavior. @@ -1052,7 +1052,7 @@ void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { GlobalVariable *Name = Ind->getName(); auto It = ProfileDataMap.find(Name); assert(It != ProfileDataMap.end() && It->second.DataVar && - "value profiling detected in function with no counter incerement"); + "value profiling detected in function with no counter increment"); GlobalVariable *DataVar = It->second.DataVar; uint64_t ValueKind = Ind->getValueKind()->getZExtValue(); diff --git a/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp index 3c0f185..05616d8 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfInstrumentation.cpp @@ -490,7 +490,7 @@ void createProfileFileNameVar(Module &M) { } } -// Set MemprofHistogramFlag as a Global veriable in IR. This makes it accessible +// Set MemprofHistogramFlag as a Global variable in IR. This makes it accessible // to the runtime, changing shadow count behavior. void createMemprofHistogramFlagVar(Module &M) { const StringRef VarName(MemProfHistogramFlagVar); diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 10b03bb..471c6ec 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3136,7 +3136,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /// If we don't instrument it and it gets inlined, /// our interceptor will not kick in and we will lose the memmove. /// If we instrument the call here, but it does not get inlined, - /// we will memove the shadow twice: which is bad in case + /// we will memmove the shadow twice: which is bad in case /// of overlapping regions. So, we simply lower the intrinsic to a call. /// /// Similar situation exists for memcpy and memset. @@ -4775,7 +4775,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // _mm_round_ps / _mm_round_ps. // Similar to maybeHandleSimpleNomemIntrinsic except - // the second argument is guranteed to be a constant integer. + // the second argument is guaranteed to be a constant integer. void handleRoundPdPsIntrinsic(IntrinsicInst &I) { assert(I.getArgOperand(0)->getType() == I.getType()); assert(I.arg_size() == 2); diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp index f5b6686..5f87ed6 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp @@ -176,7 +176,7 @@ PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M, assert(areAllBBsReachable( F, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M) .getManager()) && - "Function has unreacheable basic blocks. The expectation was that " + "Function has unreachable basic blocks. The expectation was that " "DCE was run before."); auto It = FlattenedProfile.find(AssignGUIDPass::getGUID(F)); diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp index 0a358d4..de7c169 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp @@ -253,7 +253,7 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) { Value *RealContext = nullptr; StructType *ThisContextType = nullptr; - Value *TheRootFuctionData = nullptr; + Value *TheRootFunctionData = nullptr; Value *ExpectedCalleeTLSAddr = nullptr; Value *CallsiteInfoTLSAddr = nullptr; const bool HasMusttail = [&F]() { @@ -283,7 +283,7 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) { Guid = Builder.getInt64( AssignGUIDPass::getGUID(cast<Function>(*Mark->getNameValue()))); // The type of the context of this function is now knowable since we have - // NumCallsites and NumCounters. We delcare it here because it's more + // NumCallsites and NumCounters. We declare it here because it's more // convenient - we have the Builder. ThisContextType = StructType::get( F.getContext(), @@ -291,28 +291,27 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) { ArrayType::get(Builder.getPtrTy(), NumCallsites)}); // Figure out which way we obtain the context object for this function - // if it's an entrypoint, then we call StartCtx, otherwise GetCtx. In the - // former case, we also set TheRootFuctionData since we need to release it - // at the end (plus it can be used to know if we have an entrypoint or a - // regular function) - // Don't set a name, they end up taking a lot of space and we don't need - // them. + // former case, we also set TheRootFunctionData since we need to release + // it at the end (plus it can be used to know if we have an entrypoint or + // a regular function). Don't set a name, they end up taking a lot of + // space and we don't need them. // Zero-initialize the FunctionData, except for functions that have // musttail calls. There, we set the CtxRoot field to 1, which will be // treated as a "can't be set as root". - TheRootFuctionData = new GlobalVariable( + TheRootFunctionData = new GlobalVariable( M, FunctionDataTy, false, GlobalVariable::InternalLinkage, HasMusttail ? CannotBeRootInitializer : Constant::getNullValue(FunctionDataTy)); if (ContextRootSet.contains(&F)) { Context = Builder.CreateCall( - StartCtx, {TheRootFuctionData, Guid, Builder.getInt32(NumCounters), + StartCtx, {TheRootFunctionData, Guid, Builder.getInt32(NumCounters), Builder.getInt32(NumCallsites)}); ORE.emit( [&] { return OptimizationRemark(DEBUG_TYPE, "Entrypoint", &F); }); } else { - Context = Builder.CreateCall(GetCtx, {TheRootFuctionData, &F, Guid, + Context = Builder.CreateCall(GetCtx, {TheRootFunctionData, &F, Guid, Builder.getInt32(NumCounters), Builder.getInt32(NumCallsites)}); ORE.emit([&] { @@ -399,7 +398,7 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) { } else if (!HasMusttail && isa<ReturnInst>(I)) { // Remember to release the context if we are an entrypoint. IRBuilder<> Builder(&I); - Builder.CreateCall(ReleaseCtx, {TheRootFuctionData}); + Builder.CreateCall(ReleaseCtx, {TheRootFunctionData}); ContextWasReleased = true; } } diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 120c4f6..71736cf 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -1957,7 +1957,7 @@ static bool InstrumentAllFunctions( function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, function_ref<LoopInfo *(Function &)> LookupLI, PGOInstrumentationType InstrumentationType) { - // For the context-sensitve instrumentation, we should have a separated pass + // For the context-sensitive instrumentation, we should have a separated pass // (before LTO/ThinLTO linking) to create these variables. if (InstrumentationType == PGOInstrumentationType::FDO) createIRLevelProfileFlagVar(M, InstrumentationType); @@ -2248,7 +2248,7 @@ static bool annotateAllFunctions( Func.populateCoverage(); continue; } - // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo, + // When PseudoKind is set to a value other than InstrProfRecord::NotPseudo, // it means the profile for the function is unrepresentative and this // function is actually hot / warm. We will reset the function hot / cold // attribute and drop all the profile counters. diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 0d48a35..fd0e9f1 100644 --- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -355,7 +355,7 @@ static bool isVtableAccess(Instruction *I) { } // Do not instrument known races/"benign races" that come from compiler -// instrumentatin. The user has no way of suppressing them. +// instrumentation. The user has no way of suppressing them. static bool shouldInstrumentReadWriteFromAddress(const Module *M, Value *Addr) { // Peel off GEPs and BitCasts. Addr = Addr->stripInBoundsOffsets(); diff --git a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp index 9471ae3..78d4a57e 100644 --- a/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp @@ -643,7 +643,7 @@ bool TypeSanitizer::instrumentWithShadowUpdate( // doesn't match, then we call the runtime (which may yet determine that // the mismatch is okay). // - // The checks generated below have the following strucutre. + // The checks generated below have the following structure. // // ; First we load the descriptor for the load from shadow memory and // ; compare it against the type descriptor for the current access type. |
