[libc] Adjust NVPTX startup code

Summary: The startup code needs to include the environment pointer so we add this to the arguments. Also we need to ensure that the `crt1.o` file is made with `-fgpu-rdc` set so we can actually use it without undefined reference errors.
author: Joseph Huber <jhuber6@vols.utk.edu> 2023-03-22 19:58:08 -0500
committer: Joseph Huber <jhuber6@vols.utk.edu> 2023-03-22 20:08:08 -0500
commit: ae63b1a5767b89fe5af140365f9e3ccf74feb1f0 (patch)
tree: cce41857a8152796b6a08f06c16c44dc4c777d8b /libc/startup
parent: 9855fe4568770947abf6c465c513dfd4a6c6dca6 (diff)
download: llvm-ae63b1a5767b89fe5af140365f9e3ccf74feb1f0.zip
llvm-ae63b1a5767b89fe5af140365f9e3ccf74feb1f0.tar.gz
llvm-ae63b1a5767b89fe5af140365f9e3ccf74feb1f0.tar.bz2
2 files changed, 10 insertions, 4 deletions
diff --git a/libc/startup/gpu/nvptx/CMakeLists.txt b/libc/startup/gpu/nvptx/CMakeLists.txt
index f7f58ec..96ab7540 100644
--- a/libc/startup/gpu/nvptx/CMakeLists.txt
+++ b/libc/startup/gpu/nvptx/CMakeLists.txt
@@ -8,6 +8,7 @@ add_startup_object(
     -nogpulib # Do not include any GPU vendor libraries.
     -nostdinc
     -x cuda # Use the CUDA toolchain to emit the `_start` kernel.
+    -fgpu-rdc # Emit relocatable device code from CUDA.
     --offload-device-only
     --offload-arch=${LIBC_GPU_TARGET_ARCHITECTURE}
   NO_GPU_BUNDLE # Compile this file directly without special GPU handling.
@@ -15,4 +16,8 @@ add_startup_object(
 get_fq_target_name(crt1 fq_name)
 
 # Ensure that clang uses the correct linker for this object type.
-target_link_libraries(${fq_name} PUBLIC "--target=${LIBC_GPU_TARGET_TRIPLE}")
+target_link_libraries(${fq_name}
+  PUBLIC
+  "-march=${LIBC_GPU_TARGET_ARCHITECTURE}"
+  "--target=${LIBC_GPU_TARGET_TRIPLE}"
+)
diff --git a/libc/startup/gpu/nvptx/start.cpp b/libc/startup/gpu/nvptx/start.cpp
index 6156942..cf4077c 100644
--- a/libc/startup/gpu/nvptx/start.cpp
+++ b/libc/startup/gpu/nvptx/start.cpp
@@ -6,10 +6,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-extern "C" __attribute__((device)) int main(int argc, char **argv);
+extern "C" __attribute__((device)) int main(int argc, char **argv, char **envp);
 
 // TODO: We shouldn't need to use the CUDA language to emit a kernel for NVPTX.
 extern "C" [[gnu::visibility("protected")]] __attribute__((global)) void
-_start(int argc, char **argv, int *ret) {
-  __atomic_fetch_or(ret, main(argc, argv), __ATOMIC_RELAXED);
+_start(int argc, char **argv, char **envp, int *ret, void *in, void *out,
+       void *buffer) {
+  __atomic_fetch_or(ret, main(argc, argv, envp), __ATOMIC_RELAXED);
 }
author	Joseph Huber <jhuber6@vols.utk.edu>	2023-03-22 19:58:08 -0500
committer	Joseph Huber <jhuber6@vols.utk.edu>	2023-03-22 20:08:08 -0500
commit	ae63b1a5767b89fe5af140365f9e3ccf74feb1f0 (patch)
tree	cce41857a8152796b6a08f06c16c44dc4c777d8b /libc/startup
parent	9855fe4568770947abf6c465c513dfd4a6c6dca6 (diff)
download	llvm-ae63b1a5767b89fe5af140365f9e3ccf74feb1f0.zip llvm-ae63b1a5767b89fe5af140365f9e3ccf74feb1f0.tar.gz llvm-ae63b1a5767b89fe5af140365f9e3ccf74feb1f0.tar.bz2