llvm/lib/Target/AMDGPU/AMDGPUAsanInstrumentation.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370

//===AMDGPUAsanInstrumentation.cpp - ASAN related helper functions===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===-------------------------------------------------------------===//

#include "AMDGPUAsanInstrumentation.h"

#define DEBUG_TYPE "amdgpu-asan-instrumentation"

using namespace llvm;

namespace llvm {
namespace AMDGPU {

static uint64_t getRedzoneSizeForScale(int AsanScale) {
  // Redzone used for stack and globals is at least 32 bytes.
  // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
  return std::max(32U, 1U << AsanScale);
}

static uint64_t getMinRedzoneSizeForGlobal(int AsanScale) {
  return getRedzoneSizeForScale(AsanScale);
}

uint64_t getRedzoneSizeForGlobal(int AsanScale, uint64_t SizeInBytes) {
  constexpr uint64_t kMaxRZ = 1 << 18;
  const uint64_t MinRZ = getMinRedzoneSizeForGlobal(AsanScale);

  uint64_t RZ = 0;
  if (SizeInBytes <= MinRZ / 2) {
    // Reduce redzone size for small size objects, e.g. int, char[1]. MinRZ is
    // at least 32 bytes, optimize when SizeInBytes is less than or equal to
    // half of MinRZ.
    RZ = MinRZ - SizeInBytes;
  } else {
    // Calculate RZ, where MinRZ <= RZ <= MaxRZ, and RZ ~ 1/4 * SizeInBytes.
    RZ = std::clamp((SizeInBytes / MinRZ / 4) * MinRZ, MinRZ, kMaxRZ);

    // Round up to multiple of MinRZ.
    if (SizeInBytes % MinRZ)
      RZ += MinRZ - (SizeInBytes % MinRZ);
  }

  assert((RZ + SizeInBytes) % MinRZ == 0);

  return RZ;
}

static size_t TypeStoreSizeToSizeIndex(uint32_t TypeSize) {
  size_t Res = llvm::countr_zero(TypeSize / 8);
  return Res;
}

static Instruction *genAMDGPUReportBlock(Module &M, IRBuilder<> &IRB,
                                         Value *Cond, bool Recover) {
  Value *ReportCond = Cond;
  if (!Recover) {
    auto *Ballot =
        IRB.CreateIntrinsic(Intrinsic::amdgcn_ballot, IRB.getInt64Ty(), {Cond});
    ReportCond = IRB.CreateIsNotNull(Ballot);
  }

  auto *Trm = SplitBlockAndInsertIfThen(
      ReportCond, &*IRB.GetInsertPoint(), false,
      MDBuilder(M.getContext()).createUnlikelyBranchWeights());
  Trm->getParent()->setName("asan.report");

  if (Recover)
    return Trm;

  Trm = SplitBlockAndInsertIfThen(Cond, Trm, false);
  IRB.SetInsertPoint(Trm);
  return IRB.CreateIntrinsic(Intrinsic::amdgcn_unreachable, {});
}

static Value *createSlowPathCmp(Module &M, IRBuilder<> &IRB, Type *IntptrTy,
                                Value *AddrLong, Value *ShadowValue,
                                uint32_t TypeStoreSize, int AsanScale) {
  uint64_t Granularity = static_cast<uint64_t>(1) << AsanScale;
  // Addr & (Granularity - 1)
  Value *LastAccessedByte =
      IRB.CreateAnd(AddrLong, ConstantInt::get(IntptrTy, Granularity - 1));
  // (Addr & (Granularity - 1)) + size - 1
  if (TypeStoreSize / 8 > 1)
    LastAccessedByte = IRB.CreateAdd(
        LastAccessedByte, ConstantInt::get(IntptrTy, TypeStoreSize / 8 - 1));
  // (uint8_t) ((Addr & (Granularity-1)) + size - 1)
  LastAccessedByte =
      IRB.CreateIntCast(LastAccessedByte, ShadowValue->getType(), false);
  // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue
  return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue);
}

static Instruction *generateCrashCode(Module &M, IRBuilder<> &IRB,
                                      Type *IntptrTy, Instruction *InsertBefore,
                                      Value *Addr, bool IsWrite,
                                      size_t AccessSizeIndex,
                                      Value *SizeArgument, bool Recover) {
  IRB.SetInsertPoint(InsertBefore);
  CallInst *Call = nullptr;
  SmallString<128> kAsanReportErrorTemplate{"__asan_report_"};
  SmallString<64> TypeStr{IsWrite ? "store" : "load"};
  SmallString<64> EndingStr{Recover ? "_noabort" : ""};

  SmallString<128> AsanErrorCallbackSizedString;
  raw_svector_ostream AsanErrorCallbackSizedOS(AsanErrorCallbackSizedString);
  AsanErrorCallbackSizedOS << kAsanReportErrorTemplate << TypeStr << "_n"
                           << EndingStr;

  SmallVector<Type *, 3> Args2 = {IntptrTy, IntptrTy};
  AttributeList AL2;
  FunctionCallee AsanErrorCallbackSized = M.getOrInsertFunction(
      AsanErrorCallbackSizedOS.str(),
      FunctionType::get(IRB.getVoidTy(), Args2, false), AL2);
  SmallVector<Type *, 2> Args1{1, IntptrTy};
  AttributeList AL1;

  SmallString<128> AsanErrorCallbackString;
  raw_svector_ostream AsanErrorCallbackOS(AsanErrorCallbackString);
  AsanErrorCallbackOS << kAsanReportErrorTemplate << TypeStr
                      << (1ULL << AccessSizeIndex) << EndingStr;

  FunctionCallee AsanErrorCallback = M.getOrInsertFunction(
      AsanErrorCallbackOS.str(),
      FunctionType::get(IRB.getVoidTy(), Args1, false), AL1);
  if (SizeArgument) {
    Call = IRB.CreateCall(AsanErrorCallbackSized, {Addr, SizeArgument});
  } else {
    Call = IRB.CreateCall(AsanErrorCallback, Addr);
  }

  Call->setCannotMerge();
  return Call;
}

static Value *memToShadow(Module &M, IRBuilder<> &IRB, Type *IntptrTy,
                          Value *Shadow, int AsanScale, uint32_t AsanOffset) {
  // Shadow >> scale
  Shadow = IRB.CreateLShr(Shadow, AsanScale);
  if (AsanOffset == 0)
    return Shadow;
  // (Shadow >> scale) | offset
  Value *ShadowBase = ConstantInt::get(IntptrTy, AsanOffset);
  return IRB.CreateAdd(Shadow, ShadowBase);
}

static void instrumentAddressImpl(Module &M, IRBuilder<> &IRB,
                                  Instruction *OrigIns,
                                  Instruction *InsertBefore, Value *Addr,
                                  Align Alignment, uint32_t TypeStoreSize,
                                  bool IsWrite, Value *SizeArgument,
                                  bool UseCalls, bool Recover, int AsanScale,
                                  int AsanOffset) {
  Type *AddrTy = Addr->getType();
  Type *IntptrTy = M.getDataLayout().getIntPtrType(
      M.getContext(), AddrTy->getPointerAddressSpace());
  IRB.SetInsertPoint(InsertBefore);
  size_t AccessSizeIndex = TypeStoreSizeToSizeIndex(TypeStoreSize);
  Type *ShadowTy = IntegerType::get(M.getContext(),
                                    std::max(8U, TypeStoreSize >> AsanScale));
  Type *ShadowPtrTy = PointerType::get(M.getContext(), 0);
  Value *AddrLong = IRB.CreatePtrToInt(Addr, IntptrTy);
  Value *ShadowPtr =
      memToShadow(M, IRB, IntptrTy, AddrLong, AsanScale, AsanOffset);
  const uint64_t ShadowAlign =
      std::max<uint64_t>(Alignment.value() >> AsanScale, 1);
  Value *ShadowValue = IRB.CreateAlignedLoad(
      ShadowTy, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy), Align(ShadowAlign));
  Value *Cmp = IRB.CreateIsNotNull(ShadowValue);
  auto *Cmp2 = createSlowPathCmp(M, IRB, IntptrTy, AddrLong, ShadowValue,
                                 TypeStoreSize, AsanScale);
  Cmp = IRB.CreateAnd(Cmp, Cmp2);
  Instruction *CrashTerm = genAMDGPUReportBlock(M, IRB, Cmp, Recover);
  Instruction *Crash =
      generateCrashCode(M, IRB, IntptrTy, CrashTerm, AddrLong, IsWrite,
                        AccessSizeIndex, SizeArgument, Recover);
  Crash->setDebugLoc(OrigIns->getDebugLoc());
}

void instrumentAddress(Module &M, IRBuilder<> &IRB, Instruction *OrigIns,
                       Instruction *InsertBefore, Value *Addr, Align Alignment,
                       TypeSize TypeStoreSize, bool IsWrite,
                       Value *SizeArgument, bool UseCalls, bool Recover,
                       int AsanScale, int AsanOffset) {
  if (!TypeStoreSize.isScalable()) {
    unsigned Granularity = 1 << AsanScale;
    const auto FixedSize = TypeStoreSize.getFixedValue();
    switch (FixedSize) {
    case 8:
    case 16:
    case 32:
    case 64:
    case 128:
      if (Alignment.value() >= Granularity ||
          Alignment.value() >= FixedSize / 8)
        return instrumentAddressImpl(
            M, IRB, OrigIns, InsertBefore, Addr, Alignment, FixedSize, IsWrite,
            SizeArgument, UseCalls, Recover, AsanScale, AsanOffset);
    }
  }
  // Instrument unusual size or unusual alignment.
  IRB.SetInsertPoint(InsertBefore);
  Type *AddrTy = Addr->getType();
  Type *IntptrTy = M.getDataLayout().getIntPtrType(AddrTy);
  Value *NumBits = IRB.CreateTypeSize(IntptrTy, TypeStoreSize);
  Value *Size = IRB.CreateLShr(NumBits, ConstantInt::get(IntptrTy, 3));
  Value *AddrLong = IRB.CreatePtrToInt(Addr, IntptrTy);
  Value *SizeMinusOne = IRB.CreateAdd(Size, ConstantInt::get(IntptrTy, -1));
  Value *LastByte =
      IRB.CreateIntToPtr(IRB.CreateAdd(AddrLong, SizeMinusOne), AddrTy);
  instrumentAddressImpl(M, IRB, OrigIns, InsertBefore, Addr, {}, 8, IsWrite,
                        SizeArgument, UseCalls, Recover, AsanScale, AsanOffset);
  instrumentAddressImpl(M, IRB, OrigIns, InsertBefore, LastByte, {}, 8, IsWrite,
                        SizeArgument, UseCalls, Recover, AsanScale, AsanOffset);
}

void getInterestingMemoryOperands(
    Module &M, Instruction *I,
    SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
  const DataLayout &DL = M.getDataLayout();
  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
    Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
                             LI->getType(), LI->getAlign());
  } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
    Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
                             SI->getValueOperand()->getType(), SI->getAlign());
  } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
    Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
                             RMW->getValOperand()->getType(), std::nullopt);
  } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
    Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
                             XCHG->getCompareOperand()->getType(),
                             std::nullopt);
  } else if (auto *CI = dyn_cast<CallInst>(I)) {
    switch (CI->getIntrinsicID()) {
    case Intrinsic::masked_load:
    case Intrinsic::masked_store:
    case Intrinsic::masked_gather:
    case Intrinsic::masked_scatter: {
      bool IsWrite = CI->getType()->isVoidTy();
      // Masked store has an initial operand for the value.
      unsigned OpOffset = IsWrite ? 1 : 0;
      Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
      MaybeAlign Alignment = Align(1);
      // Otherwise no alignment guarantees. We probably got Undef.
      if (auto *Op = dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset)))
        Alignment = Op->getMaybeAlignValue();
      Value *Mask = CI->getOperand(2 + OpOffset);
      Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, Mask);
      break;
    }
    case Intrinsic::masked_expandload:
    case Intrinsic::masked_compressstore: {
      bool IsWrite = CI->getIntrinsicID() == Intrinsic::masked_compressstore;
      unsigned OpOffset = IsWrite ? 1 : 0;
      auto *BasePtr = CI->getOperand(OpOffset);
      MaybeAlign Alignment = BasePtr->getPointerAlignment(DL);
      Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
      IRBuilder<> IB(I);
      Value *Mask = CI->getOperand(1 + OpOffset);
      Type *IntptrTy = M.getDataLayout().getIntPtrType(
          M.getContext(), BasePtr->getType()->getPointerAddressSpace());
      // Use the popcount of Mask as the effective vector length.
      Type *ExtTy = VectorType::get(IntptrTy, cast<VectorType>(Ty));
      Value *ExtMask = IB.CreateZExt(Mask, ExtTy);
      Value *EVL = IB.CreateAddReduce(ExtMask);
      Value *TrueMask = ConstantInt::get(Mask->getType(), 1);
      Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, TrueMask,
                               EVL);
      break;
    }
    case Intrinsic::vp_load:
    case Intrinsic::vp_store:
    case Intrinsic::experimental_vp_strided_load:
    case Intrinsic::experimental_vp_strided_store: {
      auto *VPI = cast<VPIntrinsic>(CI);
      unsigned IID = CI->getIntrinsicID();
      bool IsWrite = CI->getType()->isVoidTy();
      unsigned PtrOpNo = *VPI->getMemoryPointerParamPos(IID);
      Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
      MaybeAlign Alignment = VPI->getOperand(PtrOpNo)->getPointerAlignment(DL);
      Value *Stride = nullptr;
      if (IID == Intrinsic::experimental_vp_strided_store ||
          IID == Intrinsic::experimental_vp_strided_load) {
        Stride = VPI->getOperand(PtrOpNo + 1);
        // Use the pointer alignment as the element alignment if the stride is a
        // mutiple of the pointer alignment. Otherwise, the element alignment
        // should be Align(1).
        unsigned PointerAlign = Alignment.valueOrOne().value();
        if (!isa<ConstantInt>(Stride) ||
            cast<ConstantInt>(Stride)->getZExtValue() % PointerAlign != 0)
          Alignment = Align(1);
      }
      Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment,
                               VPI->getMaskParam(), VPI->getVectorLengthParam(),
                               Stride);
      break;
    }
    case Intrinsic::vp_gather:
    case Intrinsic::vp_scatter: {
      auto *VPI = cast<VPIntrinsic>(CI);
      unsigned IID = CI->getIntrinsicID();
      bool IsWrite = IID == Intrinsic::vp_scatter;
      unsigned PtrOpNo = *VPI->getMemoryPointerParamPos(IID);
      Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
      MaybeAlign Alignment = VPI->getPointerAlignment();
      Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment,
                               VPI->getMaskParam(),
                               VPI->getVectorLengthParam());
      break;
    }
    case Intrinsic::amdgcn_raw_buffer_load:
    case Intrinsic::amdgcn_raw_ptr_buffer_load:
    case Intrinsic::amdgcn_raw_buffer_load_format:
    case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
    case Intrinsic::amdgcn_raw_tbuffer_load:
    case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
    case Intrinsic::amdgcn_struct_buffer_load:
    case Intrinsic::amdgcn_struct_ptr_buffer_load:
    case Intrinsic::amdgcn_struct_buffer_load_format:
    case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
    case Intrinsic::amdgcn_struct_tbuffer_load:
    case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
    case Intrinsic::amdgcn_s_buffer_load:
    case Intrinsic::amdgcn_global_load_tr_b64:
    case Intrinsic::amdgcn_global_load_tr_b128: {
      unsigned PtrOpNo = 0;
      bool IsWrite = false;
      Type *Ty = CI->getType();
      Value *Ptr = CI->getArgOperand(PtrOpNo);
      MaybeAlign Alignment = Ptr->getPointerAlignment(DL);
      Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment);
      break;
    }
    case Intrinsic::amdgcn_raw_tbuffer_store:
    case Intrinsic::amdgcn_raw_ptr_tbuffer_store:
    case Intrinsic::amdgcn_raw_buffer_store:
    case Intrinsic::amdgcn_raw_ptr_buffer_store:
    case Intrinsic::amdgcn_raw_buffer_store_format:
    case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
    case Intrinsic::amdgcn_struct_buffer_store:
    case Intrinsic::amdgcn_struct_ptr_buffer_store:
    case Intrinsic::amdgcn_struct_buffer_store_format:
    case Intrinsic::amdgcn_struct_ptr_buffer_store_format:
    case Intrinsic::amdgcn_struct_tbuffer_store:
    case Intrinsic::amdgcn_struct_ptr_tbuffer_store: {
      unsigned PtrOpNo = 1;
      bool IsWrite = true;
      Value *Ptr = CI->getArgOperand(PtrOpNo);
      Type *Ty = Ptr->getType();
      MaybeAlign Alignment = Ptr->getPointerAlignment(DL);
      Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment);
      break;
    }
    default:
      for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
        if (Type *Ty = CI->getParamByRefType(ArgNo)) {
          Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
        } else if (Type *Ty = CI->getParamByValType(ArgNo)) {
          Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
        }
      }
    }
  }
}
} // end namespace AMDGPU
} // end namespace llvm