aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/IR/Intrinsics.td4
-rw-r--r--llvm/include/llvm/IR/IntrinsicsNVVM.td10
-rw-r--r--llvm/test/CodeGen/NVPTX/noduplicate-syncthreads.ll74
-rw-r--r--llvm/test/Feature/intrinsic-noduplicate.ll9
-rw-r--r--llvm/utils/TableGen/CodeGenIntrinsics.h3
-rw-r--r--llvm/utils/TableGen/CodeGenTarget.cpp3
-rw-r--r--llvm/utils/TableGen/IntrinsicEmitter.cpp12
7 files changed, 109 insertions, 6 deletions
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 755c9c2b..94c7b50 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -69,6 +69,10 @@ class ReadNone<int argNo> : IntrinsicProperty {
def IntrNoReturn : IntrinsicProperty;
+// IntrNoduplicate - Calls to this intrinsic cannot be duplicated.
+// Parallels the noduplicate attribute on LLVM IR functions.
+def IntrNoDuplicate : IntrinsicProperty;
+
//===----------------------------------------------------------------------===//
// Types used by intrinsics.
//===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index a372c22..7f72ce8b 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -730,15 +730,15 @@ def llvm_anyi64ptr_ty : LLVMAnyPointerType<llvm_i64_ty>; // (space)i64*
// Bar.Sync
def int_cuda_syncthreads : GCCBuiltin<"__syncthreads">,
- Intrinsic<[], [], []>;
+ Intrinsic<[], [], [IntrNoDuplicate]>;
def int_nvvm_barrier0 : GCCBuiltin<"__nvvm_bar0">,
- Intrinsic<[], [], []>;
+ Intrinsic<[], [], [IntrNoDuplicate]>;
def int_nvvm_barrier0_popc : GCCBuiltin<"__nvvm_bar0_popc">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoDuplicate]>;
def int_nvvm_barrier0_and : GCCBuiltin<"__nvvm_bar0_and">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoDuplicate]>;
def int_nvvm_barrier0_or : GCCBuiltin<"__nvvm_bar0_or">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoDuplicate]>;
// Membar
def int_nvvm_membar_cta : GCCBuiltin<"__nvvm_membar_cta">,
diff --git a/llvm/test/CodeGen/NVPTX/noduplicate-syncthreads.ll b/llvm/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
new file mode 100644
index 0000000..64745fc
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
@@ -0,0 +1,74 @@
+; RUN: opt < %s -O3 -S | FileCheck %s
+
+; Make sure the call to syncthreads is not duplicate here by the LLVM
+; optimizations, because it has the noduplicate attribute set.
+
+; CHECK: call void @llvm.cuda.syncthreads
+; CHECK-NOT: call void @llvm.cuda.syncthreads
+
+; Function Attrs: nounwind
+define void @foo(float* %output) #1 {
+entry:
+ %output.addr = alloca float*, align 8
+ store float* %output, float** %output.addr, align 8
+ %0 = load float** %output.addr, align 8
+ %arrayidx = getelementptr inbounds float* %0, i64 0
+ %1 = load float* %arrayidx, align 4
+ %conv = fpext float %1 to double
+ %cmp = fcmp olt double %conv, 1.000000e+01
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ %2 = load float** %output.addr, align 8
+ %3 = load float* %2, align 4
+ %conv1 = fpext float %3 to double
+ %add = fadd double %conv1, 1.000000e+00
+ %conv2 = fptrunc double %add to float
+ store float %conv2, float* %2, align 4
+ br label %if.end
+
+if.else: ; preds = %entry
+ %4 = load float** %output.addr, align 8
+ %5 = load float* %4, align 4
+ %conv3 = fpext float %5 to double
+ %add4 = fadd double %conv3, 2.000000e+00
+ %conv5 = fptrunc double %add4 to float
+ store float %conv5, float* %4, align 4
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ call void @llvm.cuda.syncthreads()
+ %6 = load float** %output.addr, align 8
+ %arrayidx6 = getelementptr inbounds float* %6, i64 0
+ %7 = load float* %arrayidx6, align 4
+ %conv7 = fpext float %7 to double
+ %cmp8 = fcmp olt double %conv7, 1.000000e+01
+ br i1 %cmp8, label %if.then9, label %if.else13
+
+if.then9: ; preds = %if.end
+ %8 = load float** %output.addr, align 8
+ %9 = load float* %8, align 4
+ %conv10 = fpext float %9 to double
+ %add11 = fadd double %conv10, 3.000000e+00
+ %conv12 = fptrunc double %add11 to float
+ store float %conv12, float* %8, align 4
+ br label %if.end17
+
+if.else13: ; preds = %if.end
+ %10 = load float** %output.addr, align 8
+ %11 = load float* %10, align 4
+ %conv14 = fpext float %11 to double
+ %add15 = fadd double %conv14, 4.000000e+00
+ %conv16 = fptrunc double %add15 to float
+ store float %conv16, float* %10, align 4
+ br label %if.end17
+
+if.end17: ; preds = %if.else13, %if.then9
+ ret void
+}
+
+; Function Attrs: noduplicate nounwind
+declare void @llvm.cuda.syncthreads() #2
+
+!0 = metadata !{void (float*)* @foo, metadata !"kernel", i32 1}
+!1 = metadata !{null, metadata !"align", i32 8}
diff --git a/llvm/test/Feature/intrinsic-noduplicate.ll b/llvm/test/Feature/intrinsic-noduplicate.ll
new file mode 100644
index 0000000..9a2b0ab
--- /dev/null
+++ b/llvm/test/Feature/intrinsic-noduplicate.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+; Make sure LLVM knows about the noduplicate attribute on the
+; llvm.cuda.syncthreads intrinsic.
+
+declare void @llvm.cuda.syncthreads()
+
+; CHECK: declare void @llvm.cuda.syncthreads() #[[ATTRNUM:[0-9]+]]
+; CHECK: attributes #[[ATTRNUM]] = { noduplicate nounwind }
diff --git a/llvm/utils/TableGen/CodeGenIntrinsics.h b/llvm/utils/TableGen/CodeGenIntrinsics.h
index edbb18b..06daa97 100644
--- a/llvm/utils/TableGen/CodeGenIntrinsics.h
+++ b/llvm/utils/TableGen/CodeGenIntrinsics.h
@@ -73,6 +73,9 @@ namespace llvm {
/// canThrow - True if the intrinsic can throw.
bool canThrow;
+ /// isNoDuplicate - True if the intrinsic is marked as noduplicate.
+ bool isNoDuplicate;
+
/// isNoReturn - True if the intrinsic is no-return.
bool isNoReturn;
diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp
index 1f47675..884af4c 100644
--- a/llvm/utils/TableGen/CodeGenTarget.cpp
+++ b/llvm/utils/TableGen/CodeGenTarget.cpp
@@ -446,6 +446,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
isCommutative = false;
canThrow = false;
isNoReturn = false;
+ isNoDuplicate = false;
if (DefName.size() <= 4 ||
std::string(DefName.begin(), DefName.begin() + 4) != "int_")
@@ -570,6 +571,8 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
isCommutative = true;
else if (Property->getName() == "Throws")
canThrow = true;
+ else if (Property->getName() == "IntrNoDuplicate")
+ isNoDuplicate = true;
else if (Property->getName() == "IntrNoReturn")
isNoReturn = true;
else if (Property->isSubClassOf("NoCapture")) {
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index cf6934c..1b28128 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -502,6 +502,9 @@ struct AttributeComparator {
if (L->canThrow != R->canThrow)
return R->canThrow;
+ if (L->isNoDuplicate != R->isNoDuplicate)
+ return R->isNoDuplicate;
+
if (L->isNoReturn != R->isNoReturn)
return R->isNoReturn;
@@ -616,7 +619,8 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
ModRefKind modRef = getModRefKind(intrinsic);
- if (!intrinsic.canThrow || modRef || intrinsic.isNoReturn) {
+ if (!intrinsic.canThrow || modRef || intrinsic.isNoReturn ||
+ intrinsic.isNoDuplicate) {
OS << " const Attribute::AttrKind Atts[] = {";
bool addComma = false;
if (!intrinsic.canThrow) {
@@ -629,6 +633,12 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
OS << "Attribute::NoReturn";
addComma = true;
}
+ if (intrinsic.isNoDuplicate) {
+ if (addComma)
+ OS << ",";
+ OS << "Attribute::NoDuplicate";
+ addComma = true;
+ }
switch (modRef) {
case MRK_none: break;