diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 28 | 
1 files changed, 24 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 6dcbced..b7fa899 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1288,18 +1288,38 @@ void WaitcntBrackets::applyWaitcnt(InstCounterType T, unsigned Count) {  }  void WaitcntBrackets::applyXcnt(const AMDGPU::Waitcnt &Wait) { +  // On entry to a block with multiple predescessors, there may +  // be pending SMEM and VMEM events active at the same time. +  // In such cases, only clear one active event at a time. +  auto applyPendingXcntGroup = [this](unsigned E) { +    unsigned LowerBound = getScoreLB(X_CNT); +    applyWaitcnt(X_CNT, 0); +    PendingEvents |= (1 << E); +    setScoreLB(X_CNT, LowerBound); +  }; +    // Wait on XCNT is redundant if we are already waiting for a load to complete.    // SMEM can return out of order, so only omit XCNT wait if we are waiting till    // zero. -  if (Wait.KmCnt == 0 && hasPendingEvent(SMEM_GROUP)) -    return applyWaitcnt(X_CNT, 0); +  if (Wait.KmCnt == 0 && hasPendingEvent(SMEM_GROUP)) { +    if (hasPendingEvent(VMEM_GROUP)) +      applyPendingXcntGroup(VMEM_GROUP); +    else +      applyWaitcnt(X_CNT, 0); +    return; +  }    // If we have pending store we cannot optimize XCnt because we do not wait for    // stores. VMEM loads retun in order, so if we only have loads XCnt is    // decremented to the same number as LOADCnt.    if (Wait.LoadCnt != ~0u && hasPendingEvent(VMEM_GROUP) && -      !hasPendingEvent(STORE_CNT)) -    return applyWaitcnt(X_CNT, std::min(Wait.XCnt, Wait.LoadCnt)); +      !hasPendingEvent(STORE_CNT)) { +    if (hasPendingEvent(SMEM_GROUP)) +      applyPendingXcntGroup(SMEM_GROUP); +    else +      applyWaitcnt(X_CNT, std::min(Wait.XCnt, Wait.LoadCnt)); +    return; +  }    applyWaitcnt(X_CNT, Wait.XCnt);  }  | 
