aboutsummaryrefslogtreecommitdiff
path: root/offload/plugins-nextgen/level_zero/include/L0Device.h
blob: 001a41ba77d7bf141689c5200ed2cfb42e8dc099 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
//===--- Level Zero Target RTL Implementation -----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// GenericDevice instatiation for SPIR-V/Xe machine.
//
//===----------------------------------------------------------------------===//

#ifndef OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_LEVEL_ZERO_L0DEVICE_H
#define OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_LEVEL_ZERO_L0DEVICE_H

#include "llvm/ADT/SmallVector.h"

#include "PerThreadTable.h"

#include "AsyncQueue.h"
#include "L0Context.h"
#include "L0Program.h"
#include "PluginInterface.h"
#include "TLS.h"

namespace llvm::omp::target::plugin {

using OmpInteropTy = omp_interop_val_t *;
class LevelZeroPluginTy;

// clang-format off
enum class PCIIdTy : int32_t {
  None            = 0x0000,
  SKL             = 0x1900,
  KBL             = 0x5900,
  CFL             = 0x3E00,
  CFL_2           = 0x9B00,
  ICX             = 0x8A00,
  TGL             = 0xFF20,
  TGL_2           = 0x9A00,
  DG1             = 0x4900,
  RKL             = 0x4C00,
  ADLS            = 0x4600,
  RTL             = 0xA700,
  MTL             = 0x7D00,
  PVC             = 0x0B00,
  DG2_ATS_M       = 0x4F00,
  DG2_ATS_M_2     = 0x5600,
  LNL             = 0x6400,
  BMG             = 0xE200,
};

/// Device type enumeration common to compiler and runtime.
enum class DeviceArchTy : uint64_t {
  DeviceArch_None   = 0,
  DeviceArch_Gen    = 0x0001, // Gen 9, Gen 11 or Xe
  DeviceArch_XeLPG  = 0x0002,
  DeviceArch_XeHPC  = 0x0004,
  DeviceArch_XeHPG  = 0x0008,
  DeviceArch_Xe2LP  = 0x0010,
  DeviceArch_Xe2HP  = 0x0020,
  DeviceArch_x86_64 = 0x0100
};
// clang-format on

struct L0DeviceIdTy {
  ze_device_handle_t zeId;
  int32_t RootId;
  int32_t SubId;
  int32_t CCSId;

  L0DeviceIdTy(ze_device_handle_t Device, int32_t RootId, int32_t SubId = -1,
               int32_t CCSId = -1)
      : zeId(Device), RootId(RootId), SubId(SubId), CCSId(CCSId) {}
};

class L0DeviceTLSTy {
  /// Command list for each device.
  ze_command_list_handle_t CmdList = nullptr;

  /// Main copy command list for each device.
  ze_command_list_handle_t CopyCmdList = nullptr;

  /// Command queue for each device.
  ze_command_queue_handle_t CmdQueue = nullptr;

  /// Main copy command queue for each device.
  ze_command_queue_handle_t CopyCmdQueue = nullptr;

  /// Immediate command list for each device.
  ze_command_list_handle_t ImmCmdList = nullptr;

  /// Immediate copy command list for each device.
  ze_command_list_handle_t ImmCopyCmdList = nullptr;

public:
  L0DeviceTLSTy() = default;
  ~L0DeviceTLSTy() {
    // assert all fields are nullptr on destruction.
    assert(!CmdList && !CopyCmdList && !CmdQueue && !CopyCmdQueue &&
           !ImmCmdList && !ImmCopyCmdList &&
           "L0DeviceTLSTy destroyed without clearing resources");
  }

  L0DeviceTLSTy(const L0DeviceTLSTy &) = delete;
  L0DeviceTLSTy(L0DeviceTLSTy &&Other) {
    CmdList = std::exchange(Other.CmdList, nullptr);
    CopyCmdList = std::exchange(Other.CopyCmdList, nullptr);
    CmdQueue = std::exchange(Other.CmdQueue, nullptr);
    CopyCmdQueue = std::exchange(Other.CopyCmdQueue, nullptr);
    ImmCmdList = std::exchange(Other.ImmCmdList, nullptr);
    ImmCopyCmdList = std::exchange(Other.ImmCopyCmdList, nullptr);
  }

  Error deinit() {
    // destroy all lists and queues.
    if (CmdList)
      CALL_ZE_RET_ERROR(zeCommandListDestroy, CmdList);
    if (CopyCmdList)
      CALL_ZE_RET_ERROR(zeCommandListDestroy, CopyCmdList);
    if (ImmCmdList)
      CALL_ZE_RET_ERROR(zeCommandListDestroy, ImmCmdList);
    if (ImmCopyCmdList)
      CALL_ZE_RET_ERROR(zeCommandListDestroy, ImmCopyCmdList);
    if (CmdQueue)
      CALL_ZE_RET_ERROR(zeCommandQueueDestroy, CmdQueue);
    if (CopyCmdQueue)
      CALL_ZE_RET_ERROR(zeCommandQueueDestroy, CopyCmdQueue);

    CmdList = nullptr;
    CopyCmdList = nullptr;
    CmdQueue = nullptr;
    CopyCmdQueue = nullptr;
    ImmCmdList = nullptr;
    ImmCopyCmdList = nullptr;

    return Plugin::success();
  }

  L0DeviceTLSTy &operator=(const L0DeviceTLSTy &) = delete;
  L0DeviceTLSTy &operator=(L0DeviceTLSTy &&) = delete;

  ze_command_list_handle_t getCmdList() const { return CmdList; }
  void setCmdList(ze_command_list_handle_t _CmdList) { CmdList = _CmdList; }

  ze_command_list_handle_t getCopyCmdList() const { return CopyCmdList; }
  void setCopyCmdList(ze_command_list_handle_t _CopyCmdList) {
    CopyCmdList = _CopyCmdList;
  }

  ze_command_list_handle_t getImmCmdList() const { return ImmCmdList; }
  void setImmCmdList(ze_command_list_handle_t ImmCmdListIn) {
    ImmCmdList = ImmCmdListIn;
  }

  ze_command_list_handle_t getImmCopyCmdList() const { return ImmCopyCmdList; }
  void setImmCopyCmdList(ze_command_list_handle_t ImmCopyCmdListIn) {
    ImmCopyCmdList = ImmCopyCmdListIn;
  }

  ze_command_queue_handle_t getCmdQueue() const { return CmdQueue; }
  void setCmdQueue(ze_command_queue_handle_t CmdQueueIn) {
    CmdQueue = CmdQueueIn;
  }

  ze_command_queue_handle_t getCopyCmdQueue() const { return CopyCmdQueue; }
  void setCopyCmdQueue(ze_command_queue_handle_t CopyCmdQueueIn) {
    CopyCmdQueue = CopyCmdQueueIn;
  }
};

struct L0DeviceTLSTableTy
    : public PerThreadContainer<std::vector<L0DeviceTLSTy>, 8> {
  Error deinit() {
    return PerThreadTable::deinit(
        [](L0DeviceTLSTy &Entry) { return Entry.deinit(); });
  }
};

class L0DeviceTy final : public GenericDeviceTy {
  // Level Zero Context for this Device.
  L0ContextTy &l0Context;

  // Level Zero handle  for this Device.
  ze_device_handle_t zeDevice;
  // Device Properties.
  ze_device_properties_t DeviceProperties{};
  ze_device_compute_properties_t ComputeProperties{};
  ze_device_memory_properties_t MemoryProperties{};
  ze_device_cache_properties_t CacheProperties{};

  /// Devices' default target allocation kind for internal allocation.
  int32_t AllocKind = TARGET_ALLOC_DEVICE;

  DeviceArchTy DeviceArch = DeviceArchTy::DeviceArch_None;

  std::string DeviceName;

  /// Common indirect access flags for this device.
  ze_kernel_indirect_access_flags_t IndirectAccessFlags = 0;

  /// Device UUID for toplevel devices only.
  std::string DeviceUuid;

  /// L0 Device ID as string.
  std::string zeId;

  /// Command queue group ordinals for each device.
  static constexpr uint32_t MaxOrdinal =
      std::numeric_limits<decltype(MaxOrdinal)>::max();
  std::pair<uint32_t, uint32_t> ComputeOrdinal{MaxOrdinal, 0};
  /// Command queue group ordinals for copying.
  std::pair<uint32_t, uint32_t> CopyOrdinal{MaxOrdinal, 0};

  /// Command queue index for each device.
  uint32_t ComputeIndex = 0;

  bool IsAsyncEnabled = false;

  /// Lock for this device.
  std::mutex Mutex;

  /// Contains all modules (possibly from multiple device images) to handle
  /// dynamic link across multiple images
  llvm::SmallVector<ze_module_handle_t> GlobalModules;

  /// L0 programs created for this device
  std::list<L0ProgramTy> Programs;

  /// MemAllocator for this device.
  MemAllocatorTy MemAllocator;

  DeviceArchTy computeArch() const;

  /// Get default compute group ordinal. Returns Ordinal-NumQueues pair.
  std::pair<uint32_t, uint32_t> findComputeOrdinal();

  /// Get copy command queue group ordinal. Returns Ordinal-NumQueues pair.
  std::pair<uint32_t, uint32_t> findCopyOrdinal(bool LinkCopy = false);

public:
  L0DeviceTy(GenericPluginTy &Plugin, int32_t DeviceId, int32_t NumDevices,
             ze_device_handle_t zeDevice, L0ContextTy &DriverInfo,
             const std::string_view zeId, int32_t ComputeIndex)
      : GenericDeviceTy(Plugin, DeviceId, NumDevices, SPIRVGridValues),
        l0Context(DriverInfo), zeDevice(zeDevice), zeId(zeId),
        ComputeIndex(ComputeIndex) {
    DeviceProperties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
    DeviceProperties.pNext = nullptr;
    ComputeProperties.stype = ZE_STRUCTURE_TYPE_DEVICE_COMPUTE_PROPERTIES;
    ComputeProperties.pNext = nullptr;
    MemoryProperties.stype = ZE_STRUCTURE_TYPE_DEVICE_MEMORY_PROPERTIES;
    MemoryProperties.pNext = nullptr;
    CacheProperties.stype = ZE_STRUCTURE_TYPE_DEVICE_CACHE_PROPERTIES;
    CacheProperties.pNext = nullptr;
  }

  static L0DeviceTy &makeL0Device(GenericDeviceTy &Device) {
    return static_cast<L0DeviceTy &>(Device);
  }
  LevelZeroPluginTy &getPlugin() {
    return reinterpret_cast<LevelZeroPluginTy &>(Plugin);
  }

  L0DeviceTLSTy &getTLS();

  Error setContext() override { return Plugin::success(); }
  Error initImpl(GenericPluginTy &Plugin) override;
  Error deinitImpl() override;
  ze_device_handle_t getZeDevice() const { return zeDevice; }

  const L0ContextTy &getL0Context() const { return l0Context; }
  L0ContextTy &getL0Context() { return l0Context; }

  const std::string_view getName() const { return DeviceName; }
  const char *getNameCStr() const { return DeviceName.c_str(); }

  const char *getArchCStr() const;

  const std::string_view getZeId() const { return zeId; }
  const char *getZeIdCStr() const { return zeId.c_str(); }

  std::mutex &getMutex() { return Mutex; }

  uint32_t getComputeIndex() const { return ComputeIndex; }
  ze_kernel_indirect_access_flags_t getIndirectFlags() const {
    return IndirectAccessFlags;
  }

  size_t getNumGlobalModules() const { return GlobalModules.size(); }
  void addGlobalModule(ze_module_handle_t Module) {
    GlobalModules.push_back(Module);
  }
  ze_module_handle_t *getGlobalModulesArray() { return GlobalModules.data(); }

  L0ProgramTy *getProgramFromImage(MemoryBufferRef Image) {
    for (auto &PGM : Programs)
      if (PGM.getMemoryBuffer() == Image)
        return &PGM;
    return nullptr;
  }

  Error buildAllKernels() {
    for (auto &PGM : Programs) {
      if (auto Err = PGM.loadModuleKernels())
        return Err;
    }
    return Plugin::success();
  }

  // add a new program to the device. Return a reference to the new program.
  Expected<L0ProgramTy &> addProgram(int32_t ImageId,
                                     L0ProgramBuilderTy &Builder) {
    auto ImageOrErr = Builder.getELF();
    if (!ImageOrErr)
      return ImageOrErr.takeError();
    Programs.emplace_back(ImageId, *this, std::move(*ImageOrErr),
                          Builder.getGlobalModule(),
                          std::move(Builder.getModules()));
    return Programs.back();
  }

  const L0ProgramTy &getLastProgram() const { return Programs.back(); }
  L0ProgramTy &getLastProgram() { return Programs.back(); }
  // Device properties getters.
  uint32_t getVendorId() const { return DeviceProperties.vendorId; }
  bool isGPU() const { return DeviceProperties.type == ZE_DEVICE_TYPE_GPU; }

  uint32_t getPCIId() const { return DeviceProperties.deviceId; }
  uint32_t getNumThreadsPerEU() const {
    return DeviceProperties.numThreadsPerEU;
  }
  uint32_t getSIMDWidth() const { return DeviceProperties.physicalEUSimdWidth; }
  uint32_t getNumEUsPerSubslice() const {
    return DeviceProperties.numEUsPerSubslice;
  }
  uint32_t getNumSubslicesPerSlice() const {
    return DeviceProperties.numSubslicesPerSlice;
  }
  uint32_t getNumSlices() const { return DeviceProperties.numSlices; }
  uint32_t getNumSubslices() const {
    return DeviceProperties.numSubslicesPerSlice * DeviceProperties.numSlices;
  }
  uint32_t getNumEUs() const {
    return DeviceProperties.numEUsPerSubslice * getNumSubslices();
  }
  uint32_t getTotalThreads() const {
    return DeviceProperties.numThreadsPerEU * getNumEUs();
  }
  uint32_t getNumThreadsPerSubslice() const {
    return getNumEUsPerSubslice() * getNumThreadsPerEU();
  }
  uint32_t getClockRate() const { return DeviceProperties.coreClockRate; }

  uint32_t getMaxSharedLocalMemory() const {
    return ComputeProperties.maxSharedLocalMemory;
  }
  uint32_t getMaxGroupSize() const {
    return ComputeProperties.maxTotalGroupSize;
  }
  uint32_t getMaxGroupCount() const {
    return getMaxGroupCountX() * getMaxGroupCountY() * getMaxGroupCountZ();
  }

  uint32_t getMaxGroupSizeX() const { return ComputeProperties.maxGroupSizeX; }
  uint32_t getMaxGroupSizeY() const { return ComputeProperties.maxGroupSizeY; }
  uint32_t getMaxGroupSizeZ() const { return ComputeProperties.maxGroupSizeZ; }
  uint32_t getMaxGroupCountX() const {
    return ComputeProperties.maxGroupCountX;
  }
  uint32_t getMaxGroupCountY() const {
    return ComputeProperties.maxGroupCountY;
  }
  uint32_t getMaxGroupCountZ() const {
    return ComputeProperties.maxGroupCountZ;
  }
  uint32_t getMemoryClockRate() const { return MemoryProperties.maxClockRate; }
  uint64_t getGlobalMemorySize() const { return MemoryProperties.totalSize; }
  size_t getCacheSize() const { return CacheProperties.cacheSize; }
  uint64_t getMaxMemAllocSize() const {
    return DeviceProperties.maxMemAllocSize;
  }

  int32_t getAllocKind() const { return AllocKind; }
  DeviceArchTy getDeviceArch() const { return DeviceArch; }
  bool isDeviceArch(DeviceArchTy Arch) const { return DeviceArch == Arch; }

  static bool isDiscrete(uint32_t PCIId) {
    switch (static_cast<PCIIdTy>(PCIId & 0xFF00)) {
    case PCIIdTy::DG1:
    case PCIIdTy::PVC:
    case PCIIdTy::DG2_ATS_M:
    case PCIIdTy::DG2_ATS_M_2:
    case PCIIdTy::BMG:
      return true;
    default:
      return false;
    }
  }

  static bool isDiscrete(ze_device_handle_t Device) {
    ze_device_properties_t PR{};
    PR.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
    PR.pNext = nullptr;
    CALL_ZE_RET(false, zeDeviceGetProperties, Device, &PR);
    return isDiscrete(PR.deviceId);
  }

  bool isDiscreteDevice() { return isDiscrete(getPCIId()); }
  bool isDeviceIPorNewer(uint32_t Version) const;

  const std::string_view getUuid() const { return DeviceUuid; }

  uint32_t getComputeEngine() const { return ComputeOrdinal.first; }
  uint32_t getNumComputeQueues() const { return ComputeOrdinal.second; }

  bool hasMainCopyEngine() const { return CopyOrdinal.first != MaxOrdinal; }
  uint32_t getMainCopyEngine() const { return CopyOrdinal.first; }

  bool deviceRequiresImmCmdList() const {
    constexpr uint32_t BMGIP = 0x05004000;
    return isDeviceIPorNewer(BMGIP);
  }
  bool asyncEnabled() const { return IsAsyncEnabled; }
  bool useImmForCompute() const { return true; }
  bool useImmForCopy() const { return true; }
  bool useImmForInterop() const { return true; }

  void reportDeviceInfo() const;

  // Command queues related functions.
  /// Create a command list with given ordinal and flags.
  Expected<ze_command_list_handle_t>
  createCmdList(ze_context_handle_t Context, ze_device_handle_t Device,
                uint32_t Ordinal, ze_command_list_flags_t Flags,
                const std::string_view DeviceIdStr);

  /// Create a command list with default flags.
  Expected<ze_command_list_handle_t>
  createCmdList(ze_context_handle_t Context, ze_device_handle_t Device,
                uint32_t Ordinal, const std::string_view DeviceIdStr);

  Expected<ze_command_list_handle_t> getCmdList();

  /// Create a command queue with given ordinal and flags.
  Expected<ze_command_queue_handle_t>
  createCmdQueue(ze_context_handle_t Context, ze_device_handle_t Device,
                 uint32_t Ordinal, uint32_t Index,
                 ze_command_queue_flags_t Flags,
                 const std::string_view DeviceIdStr);

  /// Create a command queue with default flags.
  Expected<ze_command_queue_handle_t>
  createCmdQueue(ze_context_handle_t Context, ze_device_handle_t Device,
                 uint32_t Ordinal, uint32_t Index,
                 const std::string_view DeviceIdStr, bool InOrder = false);

  /// Create a new command queue for the given OpenMP device ID.
  Expected<ze_command_queue_handle_t> createCommandQueue(bool InOrder = false);

  /// Create an immediate command list.
  Expected<ze_command_list_handle_t>
  createImmCmdList(uint32_t Ordinal, uint32_t Index, bool InOrder = false);

  /// Create an immediate command list for computing.
  Expected<ze_command_list_handle_t> createImmCmdList(bool InOrder = false) {
    return createImmCmdList(getComputeEngine(), getComputeIndex(), InOrder);
  }

  /// Create an immediate command list for copying.
  Expected<ze_command_list_handle_t> createImmCopyCmdList();
  Expected<ze_command_queue_handle_t> getCmdQueue();
  Expected<ze_command_list_handle_t> getCopyCmdList();
  Expected<ze_command_queue_handle_t> getCopyCmdQueue();
  Expected<ze_command_list_handle_t> getImmCmdList();
  Expected<ze_command_list_handle_t> getImmCopyCmdList();

  /// Enqueue copy command.
  Error enqueueMemCopy(void *Dst, const void *Src, size_t Size,
                       __tgt_async_info *AsyncInfo = nullptr,
                       bool UseCopyEngine = true);

  /// Enqueue asynchronous copy command.
  Error enqueueMemCopyAsync(void *Dst, const void *Src, size_t Size,
                            __tgt_async_info *AsyncInfo, bool CopyTo = true);

  /// Enqueue fill command.
  Error enqueueMemFill(void *Ptr, const void *Pattern, size_t PatternSize,
                       size_t Size);

  /// Driver related functions.

  /// Reurn the driver handle for this device.
  ze_driver_handle_t getZeDriver() const { return l0Context.getZeDriver(); }

  /// Return context for this device.
  ze_context_handle_t getZeContext() const { return l0Context.getZeContext(); }

  /// Return driver API version for this device.
  ze_api_version_t getDriverAPIVersion() const {
    return l0Context.getDriverAPIVersion();
  }

  /// Return an event from the driver associated to this device.
  Expected<ze_event_handle_t> getEvent() {
    return l0Context.getEventPool().getEvent();
  }

  /// Release event to the pool associated to this device.
  Error releaseEvent(ze_event_handle_t Event) {
    return l0Context.getEventPool().releaseEvent(Event, *this);
  }

  StagingBufferTy &getStagingBuffer() { return l0Context.getStagingBuffer(); }

  bool supportsLargeMem() const { return l0Context.supportsLargeMem(); }

  // Allocation related routines.

  /// Data alloc.
  Expected<void *> dataAlloc(
      size_t Size, size_t Align, int32_t Kind, intptr_t Offset, bool UserAlloc,
      bool DevMalloc = false,
      uint32_t MemAdvice = std::numeric_limits<decltype(MemAdvice)>::max(),
      AllocOptionTy AllocOpt = AllocOptionTy::ALLOC_OPT_NONE);

  /// Data delete.
  Error dataDelete(void *Ptr);

  /// Return the memory allocation type for the specified memory location.
  uint32_t getMemAllocType(const void *Ptr) const;

  const MemAllocatorTy &getDeviceMemAllocator() const { return MemAllocator; }
  MemAllocatorTy &getDeviceMemAllocator() { return MemAllocator; }

  MemAllocatorTy &getMemAllocator(int32_t Kind) {
    if (Kind == TARGET_ALLOC_HOST)
      return l0Context.getHostMemAllocator();
    return getDeviceMemAllocator();
  }

  MemAllocatorTy &getMemAllocator(const void *Ptr) {
    if (ZE_MEMORY_TYPE_HOST == getMemAllocType(Ptr))
      return l0Context.getHostMemAllocator();
    return getDeviceMemAllocator();
  }

  Error makeMemoryResident(void *Mem, size_t Size);

  // Generic device interface implementation.
  Expected<DeviceImageTy *>
  loadBinaryImpl(std::unique_ptr<MemoryBuffer> &&TgtImage,
                 int32_t ImageId) override;
  Error unloadBinaryImpl(DeviceImageTy *Image) override;
  Expected<void *> allocate(size_t Size, void *HstPtr,
                            TargetAllocTy Kind) override;
  Error free(void *TgtPtr, TargetAllocTy Kind = TARGET_ALLOC_DEFAULT) override;

  /// This plugin does nothing to lock buffers. Do not return an error, just
  /// return the same pointer as the device pointer.
  Expected<void *> dataLockImpl(void *HstPtr, int64_t Size) override {
    return HstPtr;
  }
  Error dataUnlockImpl(void *HstPtr) override { return Plugin::success(); }

  Expected<bool> isPinnedPtrImpl(void *, void *&, void *&,
                                 size_t &) const override {
    // Don't need to do anything, this is handled by the driver.
    return false;
  }

  Expected<bool> isAccessiblePtrImpl(const void *Ptr, size_t Size) override;
  Error dataFence(__tgt_async_info *Async) override;
  Error dataFillImpl(void *TgtPtr, const void *PatternPtr, int64_t PatternSize,
                     int64_t Size,
                     AsyncInfoWrapperTy &AsyncInfoWrapper) override;
  Error synchronizeImpl(__tgt_async_info &AsyncInfo,
                        bool ReleaseQueue) override;
  Error queryAsyncImpl(__tgt_async_info &AsyncInfo, bool ReleaseQueue,
                       bool *IsQueueWorkCompleted) override;
  Error dataSubmitImpl(void *TgtPtr, const void *HstPtr, int64_t Size,
                       AsyncInfoWrapperTy &AsyncInfoWrapper) override;
  Error dataRetrieveImpl(void *HstPtr, const void *TgtPtr, int64_t Size,
                         AsyncInfoWrapperTy &AsyncInfoWrapper) override;
  Error dataExchangeImpl(const void *SrcPtr, GenericDeviceTy &DstDev,
                         void *DstPtr, int64_t Size,
                         AsyncInfoWrapperTy &AsyncInfoWrapper) override;
  Error initAsyncInfoImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) override;
  Expected<bool>
  hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) override;

  Error enqueueHostCallImpl(void (*Callback)(void *), void *UserData,
                            AsyncInfoWrapperTy &AsyncInfo) override {
    return Plugin::error(ErrorCode::UNIMPLEMENTED,
                         "enqueueHostCallImpl not implemented yet");
  }

  // Event routines are used to ensure ordering between dataTransfers. Instead
  // of adding extra events in the queues, we make sure they're ordered by
  // using the events from the data submission APIs so we don't need to support
  // these routines.
  // They still need to report succes to indicate the event are handled
  // somewhere waitEvent and syncEvent should remain unimplemented.
  Expected<bool> isEventCompleteImpl(void *EventPtr,
                                     AsyncInfoWrapperTy &) override {
    return true;
  }

  Error createEventImpl(void **EventPtrStorage) override {
    return Plugin::success();
  }
  Error destroyEventImpl(void *EventPtr) override { return Plugin::success(); }
  Error recordEventImpl(void *EventPtr,
                        AsyncInfoWrapperTy &AsyncInfoWrapper) override {
    return Plugin::success();
  }

  Error waitEventImpl(void *EventPtr,
                      AsyncInfoWrapperTy &AsyncInfoWrapper) override {
    return Plugin::error(error::ErrorCode::UNKNOWN, "%s not implemented yet\n",
                         __func__);
  }

  Error syncEventImpl(void *EventPtr) override {
    return Plugin::error(error::ErrorCode::UNKNOWN, "%s not implemented yet\n",
                         __func__);
  }

  Expected<InfoTreeNode> obtainInfoImpl() override;
  uint64_t getClockFrequency() const override { return getClockRate(); }
  uint64_t getHardwareParallelism() const override { return getTotalThreads(); }
  Error getDeviceMemorySize(uint64_t &DSize) override {
    DSize = getGlobalMemorySize();
    return Plugin::success();
  }

  Error getDeviceStackSize(uint64_t &V) override {
    V = 0;
    return Plugin::success();
  }
  Expected<GenericKernelTy &> constructKernel(const char *Name) override;

  Error setDeviceStackSize(uint64_t V) override { return Plugin::success(); }

  Expected<omp_interop_val_t *>
  createInterop(int32_t InteropType, interop_spec_t &InteropSpec) override;
  Error releaseInterop(omp_interop_val_t *Interop) override;

  interop_spec_t selectInteropPreference(int32_t InteropType,
                                         int32_t NumPrefers,
                                         interop_spec_t *Prefers) override;
};

} // namespace llvm::omp::target::plugin
#endif // OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_LEVEL_ZERO_L0DEVICE_H