aboutsummaryrefslogtreecommitdiff
path: root/flang/lib/Optimizer/Transforms/MemoryUtils.cpp
blob: 789111cd35f67a3bbea27bbd9008d9e1c5e2949e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
//===- MemoryUtils.cpp ----------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "flang/Optimizer/Transforms/MemoryUtils.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/Todo.h"
#include "mlir/Dialect/OpenACC/OpenACC.h"
#include "mlir/IR/Dominance.h"
#include "llvm/ADT/STLExtras.h"

namespace {
/// Helper class to detect if an alloca is inside an mlir::Block that can be
/// reached again before its deallocation points via block successors. This
/// analysis is only valid if the deallocation points are inside (or nested
/// inside) the same region as alloca because it does not consider region CFG
/// (for instance, the block inside a fir.do_loop is obviously inside a loop,
/// but is not a loop formed by blocks). The dominance of the alloca on its
/// deallocation points implies this pre-condition (although it is more
/// restrictive).
class BlockCycleDetector {
public:
  bool allocaIsInCycle(fir::AllocaOp alloca,
                       llvm::ArrayRef<mlir::Operation *> deallocationPoints);

private:
  // Cache for blocks owning alloca that have been analyzed. In many Fortran
  // programs, allocas are usually made in the same blocks with no block cycles.
  // So getting a fast "no" is beneficial.
  llvm::DenseMap<mlir::Block *, /*isInCycle*/ bool> analyzed;
};
} // namespace

namespace {
class AllocaReplaceImpl {
public:
  AllocaReplaceImpl(fir::AllocaRewriterCallBack allocaRewriter,
                    fir::DeallocCallBack deallocGenerator)
      : allocaRewriter{allocaRewriter}, deallocGenerator{deallocGenerator} {}
  bool replace(mlir::RewriterBase &, fir::AllocaOp);

private:
  mlir::Region *findDeallocationPointsAndOwner(
      fir::AllocaOp alloca,
      llvm::SmallVectorImpl<mlir::Operation *> &deallocationPoints);
  bool
  allocDominatesDealloc(fir::AllocaOp alloca,
                        llvm::ArrayRef<mlir::Operation *> deallocationPoints) {
    return llvm::all_of(deallocationPoints, [&](mlir::Operation *deallocPoint) {
      return this->dominanceInfo.properlyDominates(alloca.getOperation(),
                                                   deallocPoint);
    });
  }
  void
  genIndirectDeallocation(mlir::RewriterBase &, fir::AllocaOp,
                          llvm::ArrayRef<mlir::Operation *> deallocationPoints,
                          mlir::Value replacement, mlir::Region &owningRegion);

private:
  fir::AllocaRewriterCallBack allocaRewriter;
  fir::DeallocCallBack deallocGenerator;
  mlir::DominanceInfo dominanceInfo;
  BlockCycleDetector blockCycleDetector;
};
} // namespace

static bool
allocaIsInCycleImpl(mlir::Block *allocaBlock,
                    llvm::ArrayRef<mlir::Operation *> deallocationPoints) {
  llvm::DenseSet<mlir::Block *> seen;
  // Insert the deallocation point blocks as "seen" so that the block
  // traversal will stop at them.
  for (mlir::Operation *deallocPoint : deallocationPoints)
    seen.insert(deallocPoint->getBlock());
  if (seen.contains(allocaBlock))
    return false;
  // Traverse the block successor graph starting by the alloca block.
  llvm::SmallVector<mlir::Block *> successors{allocaBlock};
  while (!successors.empty())
    for (mlir::Block *next : successors.pop_back_val()->getSuccessors()) {
      if (next == allocaBlock)
        return true;
      if (auto pair = seen.insert(next); pair.second)
        successors.push_back(next);
    }
  // The traversal did not reach the alloca block again.
  return false;
}
bool BlockCycleDetector::allocaIsInCycle(
    fir::AllocaOp alloca,
    llvm::ArrayRef<mlir::Operation *> deallocationPoints) {
  mlir::Block *allocaBlock = alloca->getBlock();
  auto analyzedPair = analyzed.try_emplace(allocaBlock, /*isInCycle=*/false);
  bool alreadyAnalyzed = !analyzedPair.second;
  bool &isInCycle = analyzedPair.first->second;
  // Fast exit if block was already analyzed and no cycle was found.
  if (alreadyAnalyzed && !isInCycle)
    return false;
  // If the analysis was not done generically for this block, run it and
  // save the result.
  if (!alreadyAnalyzed)
    isInCycle = allocaIsInCycleImpl(allocaBlock, /*deallocationPoints*/ {});
  if (!isInCycle)
    return false;
  // If the generic analysis found a block loop, see if the deallocation
  // point would be reached before reaching the block again. Do not
  // cache that analysis that is specific to the deallocation points
  // found for this alloca.
  return allocaIsInCycleImpl(allocaBlock, deallocationPoints);
}

static bool terminatorYieldsMemory(mlir::Operation &terminator) {
  return llvm::any_of(terminator.getResults(), [](mlir::OpResult res) {
    return fir::conformsWithPassByRef(res.getType());
  });
}

static bool isRegionTerminator(mlir::Operation &terminator) {
  // Using ReturnLike trait is tempting but it is not set on
  // all region terminator that matters (like omp::TerminatorOp that
  // has no results).
  // May be true for dead code. It is not a correctness issue and dead code can
  // be eliminated by running region simplification before this utility is
  // used.
  // May also be true for unreachable like terminators (e.g., after an abort
  // call related to Fortran STOP). This is also OK, the inserted deallocation
  // will simply never be reached. It is easier for the rest of the code here
  // to assume there is always at least one deallocation point, so keep
  // unreachable terminators.
  return !terminator.hasSuccessors();
}

mlir::Region *AllocaReplaceImpl::findDeallocationPointsAndOwner(
    fir::AllocaOp alloca,
    llvm::SmallVectorImpl<mlir::Operation *> &deallocationPoints) {
  // Step 1: Identify the operation and region owning the alloca.
  mlir::Region *owningRegion = alloca.getOwnerRegion();
  if (!owningRegion)
    return nullptr;
  mlir::Operation *owningOp = owningRegion->getParentOp();
  assert(owningOp && "region expected to be owned");
  // Step 2: Identify the exit points of the owning region, they are the default
  // deallocation points. TODO: detect and use lifetime markers to get earlier
  // deallocation points.
  bool isOpenACCMPRecipe = mlir::isa<mlir::accomp::RecipeInterface>(owningOp);
  for (mlir::Block &block : owningRegion->getBlocks())
    if (mlir::Operation *terminator = block.getTerminator();
        isRegionTerminator(*terminator)) {
      // FIXME: OpenACC and OpenMP privatization recipe are stand alone
      // operation meant to be later "inlined", the value they return may
      // be the address of a local alloca. It would be incorrect to insert
      // deallocation before the terminator (this would introduce use after
      // free once the recipe is inlined.
      // This probably require redesign or special handling on the OpenACC/MP
      // side.
      if (isOpenACCMPRecipe && terminatorYieldsMemory(*terminator))
        return nullptr;
      deallocationPoints.push_back(terminator);
    }
  // If no block terminators without successors have been found, this is
  // an odd region we cannot reason about (never seen yet in FIR and
  // mainstream dialects, but MLIR does not really prevent it).
  if (deallocationPoints.empty())
    return nullptr;

  // Step 3: detect block based loops between the allocation and deallocation
  // points, and add a deallocation point on the back edge to avoid memory
  // leaks.
  // The detection avoids doing region CFG analysis by assuming that there may
  // be cycles if deallocation points are not dominated by the alloca.
  // This leaves the cases where the deallocation points are in the same region
  // as the alloca (or nested inside it). In which cases there may be a back
  // edge between the alloca and the deallocation point via block successors. An
  // analysis is run to detect those cases.
  // When a loop is detected, the easiest solution to deallocate on the back
  // edge is to store the allocated memory address in a variable (that dominates
  // the loops) and to deallocate the address in that variable if it is set
  // before executing the allocation. This strategy still leads to correct
  // execution in the "false positive" cases.
  // Hence, the alloca is added as a deallocation point when there is no
  // dominance. Note that bringing lifetime markers above will reduce the
  // false positives.
  if (!allocDominatesDealloc(alloca, deallocationPoints) ||
      blockCycleDetector.allocaIsInCycle(alloca, deallocationPoints))
    deallocationPoints.push_back(alloca.getOperation());
  return owningRegion;
}

void AllocaReplaceImpl::genIndirectDeallocation(
    mlir::RewriterBase &rewriter, fir::AllocaOp alloca,
    llvm::ArrayRef<mlir::Operation *> deallocationPoints,
    mlir::Value replacement, mlir::Region &owningRegion) {
  mlir::Location loc = alloca.getLoc();
  auto replacementInsertPoint = rewriter.saveInsertionPoint();
  // Create C pointer variable in the entry block to store the alloc
  // and access it indirectly in the entry points that do not dominate.
  rewriter.setInsertionPointToStart(&owningRegion.front());
  mlir::Type heapType = fir::HeapType::get(alloca.getInType());
  mlir::Value ptrVar = fir::AllocaOp::create(rewriter, loc, heapType);
  mlir::Value nullPtr = fir::ZeroOp::create(rewriter, loc, heapType);
  fir::StoreOp::create(rewriter, loc, nullPtr, ptrVar);
  // TODO: introducing a pointer compare op in FIR would help
  // generating less IR here.
  mlir::Type intPtrTy = fir::getIntPtrType(rewriter);
  mlir::Value c0 = mlir::arith::ConstantOp::create(
      rewriter, loc, intPtrTy, rewriter.getIntegerAttr(intPtrTy, 0));

  // Store new storage address right after its creation.
  rewriter.restoreInsertionPoint(replacementInsertPoint);
  mlir::Value castReplacement =
      fir::factory::createConvert(rewriter, loc, heapType, replacement);
  fir::StoreOp::create(rewriter, loc, castReplacement, ptrVar);

  // Generate conditional deallocation at every deallocation point.
  auto genConditionalDealloc = [&](mlir::Location loc) {
    mlir::Value ptrVal = fir::LoadOp::create(rewriter, loc, ptrVar);
    mlir::Value ptrToInt =
        fir::ConvertOp::create(rewriter, loc, intPtrTy, ptrVal);
    mlir::Value isAllocated = mlir::arith::CmpIOp::create(
        rewriter, loc, mlir::arith::CmpIPredicate::ne, ptrToInt, c0);
    auto ifOp = fir::IfOp::create(rewriter, loc, mlir::TypeRange{}, isAllocated,
                                  /*withElseRegion=*/false);
    rewriter.setInsertionPointToStart(&ifOp.getThenRegion().front());
    mlir::Value cast = fir::factory::createConvert(
        rewriter, loc, replacement.getType(), ptrVal);
    deallocGenerator(loc, rewriter, cast);
    // Currently there is no need to reset the pointer var because two
    // deallocation points can never be reached without going through the
    // alloca.
    rewriter.setInsertionPointAfter(ifOp);
  };
  for (mlir::Operation *deallocPoint : deallocationPoints) {
    rewriter.setInsertionPoint(deallocPoint);
    genConditionalDealloc(deallocPoint->getLoc());
  }
}

bool AllocaReplaceImpl::replace(mlir::RewriterBase &rewriter,
                                fir::AllocaOp alloca) {
  llvm::SmallVector<mlir::Operation *> deallocationPoints;
  mlir::Region *owningRegion =
      findDeallocationPointsAndOwner(alloca, deallocationPoints);
  if (!owningRegion)
    return false;
  rewriter.setInsertionPointAfter(alloca.getOperation());
  bool deallocPointsDominateAlloc =
      allocDominatesDealloc(alloca, deallocationPoints);
  if (mlir::Value replacement =
          allocaRewriter(rewriter, alloca, deallocPointsDominateAlloc)) {
    mlir::Value castReplacement = fir::factory::createConvert(
        rewriter, alloca.getLoc(), alloca.getType(), replacement);
    if (deallocPointsDominateAlloc)
      for (mlir::Operation *deallocPoint : deallocationPoints) {
        rewriter.setInsertionPoint(deallocPoint);
        deallocGenerator(deallocPoint->getLoc(), rewriter, replacement);
      }
    else
      genIndirectDeallocation(rewriter, alloca, deallocationPoints, replacement,
                              *owningRegion);
    rewriter.replaceOp(alloca, castReplacement);
  }
  return true;
}

bool fir::replaceAllocas(mlir::RewriterBase &rewriter,
                         mlir::Operation *parentOp,
                         MustRewriteCallBack mustReplace,
                         AllocaRewriterCallBack allocaRewriter,
                         DeallocCallBack deallocGenerator) {
  // If the parent operation is not an alloca owner, the code below would risk
  // modifying IR outside of parentOp.
  if (!fir::AllocaOp::ownsNestedAlloca(parentOp))
    return false;
  auto insertPoint = rewriter.saveInsertionPoint();
  bool replacedAllRequestedAlloca = true;
  AllocaReplaceImpl impl(allocaRewriter, deallocGenerator);
  parentOp->walk([&](fir::AllocaOp alloca) {
    if (mustReplace(alloca))
      replacedAllRequestedAlloca &= impl.replace(rewriter, alloca);
  });
  rewriter.restoreInsertionPoint(insertPoint);
  return replacedAllRequestedAlloca;
}