[Flang] Adjust the trampoline size for AArch64 and PPC (#118678)

Set the trampoline size to match that in compiler-rt/lib/builtins/trampoline_setup.c and AArch64 and PPC lowering.
author: ssijaric-nv <ssijaric@nvidia.com> 2025-01-27 08:02:18 -0800
committer: GitHub <noreply@github.com> 2025-01-27 08:02:18 -0800
commit: 16e9601e193e026d7f3f27e87f0adb81acf5969b (patch)
tree: 2242cbab9f70f99387a46735991b34d50e923f95
parent: 178f47143a3b3c547df6d1f07e9707792f5d9fd4 (diff)
download: llvm-16e9601e193e026d7f3f27e87f0adb81acf5969b.zip
llvm-16e9601e193e026d7f3f27e87f0adb81acf5969b.tar.gz
llvm-16e9601e193e026d7f3f27e87f0adb81acf5969b.tar.bz2
4 files changed, 41 insertions, 4 deletions
diff --git a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp
index c7c2dce..26f4aee 100644
--- a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp
+++ b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp
@@ -270,10 +270,18 @@ public:
             // Create the thunk.
             auto module = embox->getParentOfType<mlir::ModuleOp>();
             FirOpBuilder builder(rewriter, module);
+            const auto triple{fir::getTargetTriple(module)};
             auto loc = embox.getLoc();
             mlir::Type i8Ty = builder.getI8Type();
             mlir::Type i8Ptr = builder.getRefType(i8Ty);
-            mlir::Type buffTy = SequenceType::get({32}, i8Ty);
+            // For AArch64, PPC32 and PPC64, the thunk is populated by a call to
+            // __trampoline_setup, which is defined in
+            // compiler-rt/lib/builtins/trampoline_setup.c and requires the
+            // thunk size greater than 32 bytes.  For RISCV and x86_64, the
+            // thunk setup doesn't go through __trampoline_setup and fits in 32
+            // bytes.
+            fir::SequenceType::Extent thunkSize = triple.getTrampolineSize();
+            mlir::Type buffTy = SequenceType::get({thunkSize}, i8Ty);
             auto buffer = builder.create<AllocaOp>(loc, buffTy);
             mlir::Value closure =
                 builder.createConvert(loc, i8Ptr, embox.getHost());
diff --git a/flang/test/Fir/boxproc.fir b/flang/test/Fir/boxproc.fir
index 27d8953..d5d7859 100644
--- a/flang/test/Fir/boxproc.fir
+++ b/flang/test/Fir/boxproc.fir
@@ -1,7 +1,11 @@
-// RUN: tco %s | FileCheck %s
+// RUN: %if aarch64-registered-target %{tco --target=aarch64-unknown-linux-gnu %s | FileCheck %s --check-prefixes=CHECK,CHECK-AARCH64 %}
+// RUN: %if x86-registered-target %{tco --target=x86_64-unknown-linux-gnu %s | FileCheck %s --check-prefixes=CHECK,CHECK-X86 %}
+// RUN: %if powerpc-registered-target %{tco --target=powerpc64le-unknown-linux-gnu %s | FileCheck %s --check-prefixes=CHECK,CHECK-PPC %}
 
 // CHECK-LABEL: define void @_QPtest_proc_dummy()
-// CHECK:         %[[VAL_3:.*]] = alloca [32 x i8], i64 1, align 1
+// CHECK-AARCH64: %[[VAL_3:.*]] = alloca [36 x i8], i64 1, align 1
+// CHECK-X86:     %[[VAL_3:.*]] = alloca [32 x i8], i64 1, align 1
+// CHECK-PPC:     %[[VAL_3:.*]] = alloca [4{{[0-8]+}} x i8], i64 1, align 1
 // CHECK:         %[[VAL_1:.*]] = alloca { ptr }, i64 1, align 8
 // CHECK:         %[[VAL_0:.*]] = alloca i32, i64 1, align 4
 // CHECK:         %[[VAL_2:.*]] = getelementptr { ptr }, ptr %[[VAL_1]], i32 0, i32 0
@@ -59,7 +63,9 @@ func.func @_QPtest_proc_dummy_other(%arg0: !fir.boxproc<() -> ()>) {
 }
 
 // CHECK-LABEL: define void @_QPtest_proc_dummy_char()
-// CHECK:         %[[VAL_20:.*]] = alloca [32 x i8], i64 1, align 1
+// CHECK-AARCH64: %[[VAL_20:.*]] = alloca [36 x i8], i64 1, align 1
+// CHECK-X86:     %[[VAL_20:.*]] = alloca [32 x i8], i64 1, align 1
+// CHECK-PPC:     %[[VAL_20:.*]] = alloca [4{{[0-8]+}} x i8], i64 1, align 1
 // CHECK:         %[[VAL_2:.*]] = alloca { { ptr, i64 } }, i64 1, align 8
 // CHECK:         %[[VAL_1:.*]] = alloca [10 x i8], i64 1, align 1
 // CHECK:         %[[VAL_0:.*]] = alloca [40 x i8], i64 1, align 1
diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h
index 8097300..ed6f48f 100644
--- a/llvm/include/llvm/TargetParser/Triple.h
+++ b/llvm/include/llvm/TargetParser/Triple.h
@@ -498,6 +498,9 @@ public:
     return getArchPointerBitWidth(getArch());
   }
 
+  /// Returns the trampoline size in bytes for this configuration.
+  unsigned getTrampolineSize() const;
+
   /// Test whether the architecture is 64-bit
   ///
   /// Note that this tests for 64-bit pointer width, and nothing else. Note
diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp
index ed58e72..e9e6f13 100644
--- a/llvm/lib/TargetParser/Triple.cpp
+++ b/llvm/lib/TargetParser/Triple.cpp
@@ -1711,6 +1711,26 @@ unsigned Triple::getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
   llvm_unreachable("Invalid architecture value");
 }
 
+unsigned Triple::getTrampolineSize() const {
+  switch (getArch()) {
+  default:
+    break;
+  case Triple::ppc:
+  case Triple::ppcle:
+    if (isOSLinux())
+      return 40;
+    break;
+  case Triple::ppc64:
+  case Triple::ppc64le:
+    if (isOSLinux())
+      return 48;
+    break;
+  case Triple::aarch64:
+    return 36;
+  }
+  return 32;
+}
+
 bool Triple::isArch64Bit() const {
   return getArchPointerBitWidth(getArch()) == 64;
 }
author	ssijaric-nv <ssijaric@nvidia.com>	2025-01-27 08:02:18 -0800
committer	GitHub <noreply@github.com>	2025-01-27 08:02:18 -0800
commit	16e9601e193e026d7f3f27e87f0adb81acf5969b (patch)
tree	2242cbab9f70f99387a46735991b34d50e923f95
parent	178f47143a3b3c547df6d1f07e9707792f5d9fd4 (diff)
download	llvm-16e9601e193e026d7f3f27e87f0adb81acf5969b.zip llvm-16e9601e193e026d7f3f27e87f0adb81acf5969b.tar.gz llvm-16e9601e193e026d7f3f27e87f0adb81acf5969b.tar.bz2