aboutsummaryrefslogtreecommitdiff
path: root/clang
diff options
context:
space:
mode:
Diffstat (limited to 'clang')
-rw-r--r--clang/docs/ReleaseNotes.rst3
-rw-r--r--clang/include/clang/Basic/AMDGPUTypes.def1
-rw-r--r--clang/include/clang/Basic/Builtins.def1
-rw-r--r--clang/include/clang/Basic/DiagnosticParseKinds.td2
-rw-r--r--clang/include/clang/CIR/Dialect/IR/CIROps.td39
-rw-r--r--clang/include/clang/CIR/MissingFeatures.h2
-rw-r--r--clang/lib/AST/ASTContext.cpp4
-rw-r--r--clang/lib/AST/ByteCode/InterpBuiltin.cpp145
-rw-r--r--clang/lib/AST/ByteCode/Pointer.h1
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenClass.cpp42
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp215
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp11
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenFunction.h5
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenModule.cpp13
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenStmt.cpp1
-rw-r--r--clang/lib/CIR/Dialect/IR/CIRDialect.cpp128
-rw-r--r--clang/lib/CodeGen/BackendUtil.cpp5
-rw-r--r--clang/lib/CodeGen/CGDebugInfo.cpp64
-rw-r--r--clang/lib/CodeGen/CGDebugInfo.h1
-rw-r--r--clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp27
-rw-r--r--clang/lib/Driver/Driver.cpp3
-rw-r--r--clang/lib/Frontend/ModuleDependencyCollector.cpp11
-rw-r--r--clang/lib/Headers/avxintrin.h23
-rw-r--r--clang/lib/Parse/ParseExprCXX.cpp2
-rw-r--r--clang/lib/Sema/AnalysisBasedWarnings.cpp7
-rw-r--r--clang/lib/Sema/SemaConcept.cpp2
-rw-r--r--clang/lib/Sema/SemaExpr.cpp5
-rw-r--r--clang/lib/Sema/SemaOpenACCAtomic.cpp7
-rw-r--r--clang/lib/Sema/SemaOpenMP.cpp3
-rw-r--r--clang/lib/Sema/SemaTypeTraits.cpp17
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp4
-rw-r--r--clang/test/Analysis/buffer-overlap-decls.c23
-rw-r--r--clang/test/Analysis/buffer-overlap.c7
-rw-r--r--clang/test/CIR/CodeGen/complex.cpp37
-rw-r--r--clang/test/CIR/CodeGen/delete.cpp88
-rw-r--r--clang/test/CIR/CodeGen/lang-c-cpp.cpp4
-rw-r--r--clang/test/CIR/CodeGen/module-filename.cpp11
-rw-r--r--clang/test/CIR/CodeGen/opt-info-attr.cpp12
-rw-r--r--clang/test/CIR/CodeGen/vbase.cpp82
-rw-r--r--clang/test/CIR/CodeGen/vector-ext.cpp20
-rw-r--r--clang/test/CIR/CodeGen/vector.cpp20
-rw-r--r--clang/test/CIR/IR/global-init.cir48
-rw-r--r--clang/test/CodeGen/X86/avx-builtins.c3
-rw-r--r--clang/test/CodeGen/X86/bmi-builtins.c99
-rw-r--r--clang/test/CodeGen/X86/bmi2-builtins.c5
-rw-r--r--clang/test/CodeGen/X86/tbm-builtins.c83
-rw-r--r--clang/test/CodeGen/amdgpu-image-rsrc-type-debug-info.c17
-rw-r--r--clang/test/CodeGenCXX/amdgpu-image-rsrc-typeinfo.cpp7
-rw-r--r--clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp423
-rw-r--r--clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp103
-rw-r--r--clang/test/CodeGenCXX/gh56652.cpp41
-rw-r--r--clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl21
-rw-r--r--clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl28
-rw-r--r--clang/test/CodeGenOpenCL/builtins-amdgcn.cl3
-rw-r--r--clang/test/Driver/modules-print-library-module-manifest-path.cpp8
-rw-r--r--clang/test/OpenMP/for_reduction_codegen.cpp32
-rw-r--r--clang/test/Parser/cxx2b-lambdas-ext-warns.cpp32
-rw-r--r--clang/test/SemaCXX/amdgpu-image-rsrc.cpp17
-rw-r--r--clang/test/SemaCXX/decltype.cpp78
-rw-r--r--clang/test/SemaCXX/type-traits.cpp43
-rw-r--r--clang/test/SemaOpenCL/amdgpu-image-rsrc.cl13
-rw-r--r--clang/test/SemaOpenMP/amdgpu-image-rsrc.cpp12
-rw-r--r--clang/tools/clang-scan-deps/ClangScanDeps.cpp2
63 files changed, 1879 insertions, 337 deletions
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 270b5d3..79dc0b2 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -432,6 +432,9 @@ Bug Fixes to C++ Support
- Fix an assertion failure when taking the address on a non-type template parameter argument of
object type. (#GH151531)
- Suppress ``-Wdouble-promotion`` when explicitly asked for with C++ list initialization (#GH33409).
+- Fix the result of `__builtin_is_implicit_lifetime` for types with a user-provided constructor. (#GH160610)
+- Correctly deduce return types in ``decltype`` expressions. (#GH160497) (#GH56652) (#GH116319) (#GH161196)
+- Fixed a crash in the pre-C++23 warning for attributes before a lambda declarator (#GH161070).
Bug Fixes to AST Handling
^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/AMDGPUTypes.def b/clang/include/clang/Basic/AMDGPUTypes.def
index d3dff446..089a72b 100644
--- a/clang/include/clang/Basic/AMDGPUTypes.def
+++ b/clang/include/clang/Basic/AMDGPUTypes.def
@@ -21,6 +21,7 @@
#endif
AMDGPU_OPAQUE_PTR_TYPE("__amdgpu_buffer_rsrc_t", AMDGPUBufferRsrc, AMDGPUBufferRsrcTy, 128, 128, 8)
+AMDGPU_OPAQUE_PTR_TYPE("__amdgpu_texture_t", AMDGPUTexture, AMDGPUTextureTy, 256, 256, 0)
AMDGPU_NAMED_BARRIER_TYPE("__amdgpu_named_workgroup_barrier_t", AMDGPUNamedWorkgroupBarrier, AMDGPUNamedWorkgroupBarrierTy, 128, 32, 0)
diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
index 9aad00b..b856ad1 100644
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@@ -34,6 +34,7 @@
// Q -> target builtin type, followed by a character to distinguish the builtin type
// Qa -> AArch64 svcount_t builtin type.
// Qb -> AMDGPU __amdgpu_buffer_rsrc_t builtin type.
+// Qt -> AMDGPU __amdgpu_texture_t builtin type.
// E -> ext_vector, followed by the number of elements and the base type.
// X -> _Complex, followed by the base type.
// Y -> ptrdiff_t
diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index 4d9e123..c724136 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -1141,7 +1141,7 @@ def warn_cxx23_compat_binding_pack : Warning<
def err_capture_default_first : Error<
"capture default must be first">;
def ext_decl_attrs_on_lambda : ExtWarn<
- "%select{an attribute specifier sequence|%0}1 in this position "
+ "%select{an attribute specifier sequence|%1}0 in this position "
"is a C++23 extension">, InGroup<CXX23AttrsOnLambda>;
def ext_lambda_missing_parens : ExtWarn<
"lambda without a parameter clause is a C++23 extension">,
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index bb39444..e1be08c 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -683,8 +683,8 @@ def CIR_ConditionOp : CIR_Op<"condition", [
//===----------------------------------------------------------------------===//
defvar CIR_YieldableScopes = [
- "ArrayCtor", "ArrayDtor", "CaseOp", "DoWhileOp", "ForOp", "IfOp", "ScopeOp",
- "SwitchOp", "TernaryOp", "WhileOp"
+ "ArrayCtor", "ArrayDtor", "CaseOp", "DoWhileOp", "ForOp", "GlobalOp", "IfOp",
+ "ScopeOp", "SwitchOp", "TernaryOp", "WhileOp"
];
def CIR_YieldOp : CIR_Op<"yield", [
@@ -1776,7 +1776,9 @@ def CIR_GlobalLinkageKind : CIR_I32EnumAttr<
// is upstreamed.
def CIR_GlobalOp : CIR_Op<"global", [
- DeclareOpInterfaceMethods<CIRGlobalValueInterface>
+ DeclareOpInterfaceMethods<RegionBranchOpInterface>,
+ DeclareOpInterfaceMethods<CIRGlobalValueInterface>,
+ NoRegionArguments
]> {
let summary = "Declare or define a global variable";
let description = [{
@@ -1807,6 +1809,9 @@ def CIR_GlobalOp : CIR_Op<"global", [
UnitAttr:$dso_local,
OptionalAttr<I64Attr>:$alignment);
+ let regions = (region MaxSizedRegion<1>:$ctorRegion,
+ MaxSizedRegion<1>:$dtorRegion);
+
let assemblyFormat = [{
($sym_visibility^)?
(`` $global_visibility^)?
@@ -1815,24 +1820,34 @@ def CIR_GlobalOp : CIR_Op<"global", [
(`comdat` $comdat^)?
(`dso_local` $dso_local^)?
$sym_name
- custom<GlobalOpTypeAndInitialValue>($sym_type, $initial_value)
+ custom<GlobalOpTypeAndInitialValue>($sym_type, $initial_value,
+ $ctorRegion, $dtorRegion)
attr-dict
}];
let extraClassDeclaration = [{
- bool isDeclaration() { return !getInitialValue(); }
+ bool isDeclaration() {
+ return !getInitialValue() && getCtorRegion().empty() && getDtorRegion().empty();
+ }
bool hasInitializer() { return !isDeclaration(); }
}];
let skipDefaultBuilders = 1;
- let builders = [OpBuilder<(ins
- "llvm::StringRef":$sym_name,
- "mlir::Type":$sym_type,
- CArg<"bool", "false">:$isConstant,
- // CIR defaults to external linkage.
- CArg<"cir::GlobalLinkageKind",
- "cir::GlobalLinkageKind::ExternalLinkage">:$linkage)>];
+ let builders = [
+ OpBuilder<(ins
+ "llvm::StringRef":$sym_name,
+ "mlir::Type":$sym_type,
+ CArg<"bool", "false">:$isConstant,
+ // CIR defaults to external linkage.
+ CArg<"cir::GlobalLinkageKind",
+ "cir::GlobalLinkageKind::ExternalLinkage">:$linkage,
+ CArg<"llvm::function_ref<void(mlir::OpBuilder &, mlir::Location)>",
+ "nullptr">:$ctorBuilder,
+ CArg<"llvm::function_ref<void(mlir::OpBuilder &, mlir::Location)>",
+ "nullptr">:$dtorBuilder)
+ >
+ ];
let hasVerifier = 1;
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index 0fac1b2..7e59989 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -208,6 +208,7 @@ struct MissingFeatures {
static bool dataLayoutTypeAllocSize() { return false; }
static bool dataLayoutTypeStoreSize() { return false; }
static bool deferredCXXGlobalInit() { return false; }
+ static bool deleteArray() { return false; }
static bool devirtualizeMemberFunction() { return false; }
static bool ehCleanupFlags() { return false; }
static bool ehCleanupScope() { return false; }
@@ -219,6 +220,7 @@ struct MissingFeatures {
static bool emitCondLikelihoodViaExpectIntrinsic() { return false; }
static bool emitLifetimeMarkers() { return false; }
static bool emitLValueAlignmentAssumption() { return false; }
+ static bool emitNullCheckForDeleteCalls() { return false; }
static bool emitNullabilityCheck() { return false; }
static bool emitTypeCheck() { return false; }
static bool emitTypeMetadataCodeForVCall() { return false; }
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 61dd330..0fd0e7e 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -12590,6 +12590,10 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
Type = Context.AMDGPUBufferRsrcTy;
break;
}
+ case 't': {
+ Type = Context.AMDGPUTextureTy;
+ break;
+ }
default:
llvm_unreachable("Unexpected target builtin type");
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 891344d..a2e97fc 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -1294,95 +1294,6 @@ static bool interp__builtin_assume_aligned(InterpState &S, CodePtr OpPC,
return true;
}
-static bool interp__builtin_ia32_bextr(InterpState &S, CodePtr OpPC,
- const InterpFrame *Frame,
- const CallExpr *Call) {
- if (Call->getNumArgs() != 2 || !Call->getArg(0)->getType()->isIntegerType() ||
- !Call->getArg(1)->getType()->isIntegerType())
- return false;
-
- APSInt Index = popToAPSInt(S, Call->getArg(1));
- APSInt Val = popToAPSInt(S, Call->getArg(0));
-
- unsigned BitWidth = Val.getBitWidth();
- uint64_t Shift = Index.extractBitsAsZExtValue(8, 0);
- uint64_t Length = Index.extractBitsAsZExtValue(8, 8);
- Length = Length > BitWidth ? BitWidth : Length;
-
- // Handle out of bounds cases.
- if (Length == 0 || Shift >= BitWidth) {
- pushInteger(S, 0, Call->getType());
- return true;
- }
-
- uint64_t Result = Val.getZExtValue() >> Shift;
- Result &= llvm::maskTrailingOnes<uint64_t>(Length);
- pushInteger(S, Result, Call->getType());
- return true;
-}
-
-static bool interp__builtin_ia32_bzhi(InterpState &S, CodePtr OpPC,
- const InterpFrame *Frame,
- const CallExpr *Call) {
- QualType CallType = Call->getType();
- if (Call->getNumArgs() != 2 || !Call->getArg(0)->getType()->isIntegerType() ||
- !Call->getArg(1)->getType()->isIntegerType() ||
- !CallType->isIntegerType())
- return false;
-
- APSInt Idx = popToAPSInt(S, Call->getArg(1));
- APSInt Val = popToAPSInt(S, Call->getArg(0));
-
- unsigned BitWidth = Val.getBitWidth();
- uint64_t Index = Idx.extractBitsAsZExtValue(8, 0);
-
- if (Index < BitWidth)
- Val.clearHighBits(BitWidth - Index);
-
- pushInteger(S, Val, CallType);
- return true;
-}
-
-static bool interp__builtin_ia32_pdep(InterpState &S, CodePtr OpPC,
- const InterpFrame *Frame,
- const CallExpr *Call) {
- if (Call->getNumArgs() != 2 || !Call->getArg(0)->getType()->isIntegerType() ||
- !Call->getArg(1)->getType()->isIntegerType())
- return false;
-
- APSInt Mask = popToAPSInt(S, Call->getArg(1));
- APSInt Val = popToAPSInt(S, Call->getArg(0));
-
- unsigned BitWidth = Val.getBitWidth();
- APInt Result = APInt::getZero(BitWidth);
- for (unsigned I = 0, P = 0; I != BitWidth; ++I) {
- if (Mask[I])
- Result.setBitVal(I, Val[P++]);
- }
- pushInteger(S, std::move(Result), Call->getType());
- return true;
-}
-
-static bool interp__builtin_ia32_pext(InterpState &S, CodePtr OpPC,
- const InterpFrame *Frame,
- const CallExpr *Call) {
- if (Call->getNumArgs() != 2 || !Call->getArg(0)->getType()->isIntegerType() ||
- !Call->getArg(1)->getType()->isIntegerType())
- return false;
-
- APSInt Mask = popToAPSInt(S, Call->getArg(1));
- APSInt Val = popToAPSInt(S, Call->getArg(0));
-
- unsigned BitWidth = Val.getBitWidth();
- APInt Result = APInt::getZero(BitWidth);
- for (unsigned I = 0, P = 0; I != BitWidth; ++I) {
- if (Mask[I])
- Result.setBitVal(P++, Val[I]);
- }
- pushInteger(S, std::move(Result), Call->getType());
- return true;
-}
-
/// (CarryIn, LHS, RHS, Result)
static bool interp__builtin_ia32_addcarry_subborrow(InterpState &S,
CodePtr OpPC,
@@ -3275,11 +3186,37 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case clang::X86::BI__builtin_ia32_bextr_u64:
case clang::X86::BI__builtin_ia32_bextri_u32:
case clang::X86::BI__builtin_ia32_bextri_u64:
- return interp__builtin_ia32_bextr(S, OpPC, Frame, Call);
+ return interp__builtin_elementwise_int_binop(
+ S, OpPC, Call, [](const APSInt &Val, const APSInt &Idx) {
+ unsigned BitWidth = Val.getBitWidth();
+ uint64_t Shift = Idx.extractBitsAsZExtValue(8, 0);
+ uint64_t Length = Idx.extractBitsAsZExtValue(8, 8);
+ if (Length > BitWidth) {
+ Length = BitWidth;
+ }
+
+ // Handle out of bounds cases.
+ if (Length == 0 || Shift >= BitWidth)
+ return APInt(BitWidth, 0);
+
+ uint64_t Result = Val.getZExtValue() >> Shift;
+ Result &= llvm::maskTrailingOnes<uint64_t>(Length);
+ return APInt(BitWidth, Result);
+ });
case clang::X86::BI__builtin_ia32_bzhi_si:
case clang::X86::BI__builtin_ia32_bzhi_di:
- return interp__builtin_ia32_bzhi(S, OpPC, Frame, Call);
+ return interp__builtin_elementwise_int_binop(
+ S, OpPC, Call, [](const APSInt &Val, const APSInt &Idx) {
+ unsigned BitWidth = Val.getBitWidth();
+ uint64_t Index = Idx.extractBitsAsZExtValue(8, 0);
+ APSInt Result = Val;
+
+ if (Index < BitWidth)
+ Result.clearHighBits(BitWidth - Index);
+
+ return Result;
+ });
case clang::X86::BI__builtin_ia32_lzcnt_u16:
case clang::X86::BI__builtin_ia32_lzcnt_u32:
@@ -3299,11 +3236,33 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case clang::X86::BI__builtin_ia32_pdep_si:
case clang::X86::BI__builtin_ia32_pdep_di:
- return interp__builtin_ia32_pdep(S, OpPC, Frame, Call);
+ return interp__builtin_elementwise_int_binop(
+ S, OpPC, Call, [](const APSInt &Val, const APSInt &Mask) {
+ unsigned BitWidth = Val.getBitWidth();
+ APInt Result = APInt::getZero(BitWidth);
+
+ for (unsigned I = 0, P = 0; I != BitWidth; ++I) {
+ if (Mask[I])
+ Result.setBitVal(I, Val[P++]);
+ }
+
+ return Result;
+ });
case clang::X86::BI__builtin_ia32_pext_si:
case clang::X86::BI__builtin_ia32_pext_di:
- return interp__builtin_ia32_pext(S, OpPC, Frame, Call);
+ return interp__builtin_elementwise_int_binop(
+ S, OpPC, Call, [](const APSInt &Val, const APSInt &Mask) {
+ unsigned BitWidth = Val.getBitWidth();
+ APInt Result = APInt::getZero(BitWidth);
+
+ for (unsigned I = 0, P = 0; I != BitWidth; ++I) {
+ if (Mask[I])
+ Result.setBitVal(P++, Val[I]);
+ }
+
+ return Result;
+ });
case clang::X86::BI__builtin_ia32_addcarryx_u32:
case clang::X86::BI__builtin_ia32_addcarryx_u64:
diff --git a/clang/lib/AST/ByteCode/Pointer.h b/clang/lib/AST/ByteCode/Pointer.h
index af89b66..cd738ce 100644
--- a/clang/lib/AST/ByteCode/Pointer.h
+++ b/clang/lib/AST/ByteCode/Pointer.h
@@ -262,6 +262,7 @@ public:
case Storage::Typeid:
return false;
}
+ llvm_unreachable("Unknown clang::interp::Storage enum");
}
/// Checks if the pointer is live.
bool isLive() const {
diff --git a/clang/lib/CIR/CodeGen/CIRGenClass.cpp b/clang/lib/CIR/CodeGen/CIRGenClass.cpp
index cb8fe6c..9d12a13 100644
--- a/clang/lib/CIR/CodeGen/CIRGenClass.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenClass.cpp
@@ -951,28 +951,37 @@ Address CIRGenFunction::getAddressOfBaseClass(
bool nullCheckValue, SourceLocation loc) {
assert(!path.empty() && "Base path should not be empty!");
+ CastExpr::path_const_iterator start = path.begin();
+ const CXXRecordDecl *vBase = nullptr;
+
if ((*path.begin())->isVirtual()) {
- // The implementation here is actually complete, but let's flag this
- // as an error until the rest of the virtual base class support is in place.
- cgm.errorNYI(loc, "getAddrOfBaseClass: virtual base");
- return Address::invalid();
+ vBase = (*start)->getType()->castAsCXXRecordDecl();
+ ++start;
}
// Compute the static offset of the ultimate destination within its
// allocating subobject (the virtual base, if there is one, or else
// the "complete" object that we see).
- CharUnits nonVirtualOffset =
- cgm.computeNonVirtualBaseClassOffset(derived, path);
+ CharUnits nonVirtualOffset = cgm.computeNonVirtualBaseClassOffset(
+ vBase ? vBase : derived, {start, path.end()});
+
+ // If there's a virtual step, we can sometimes "devirtualize" it.
+ // For now, that's limited to when the derived type is final.
+ // TODO: "devirtualize" this for accesses to known-complete objects.
+ if (vBase && derived->hasAttr<FinalAttr>()) {
+ const ASTRecordLayout &layout = getContext().getASTRecordLayout(derived);
+ CharUnits vBaseOffset = layout.getVBaseClassOffset(vBase);
+ nonVirtualOffset += vBaseOffset;
+ vBase = nullptr; // we no longer have a virtual step
+ }
// Get the base pointer type.
mlir::Type baseValueTy = convertType((path.end()[-1])->getType());
assert(!cir::MissingFeatures::addressSpace());
- // The if statement here is redundant now, but it will be needed when we add
- // support for virtual base classes.
// If there is no virtual base, use cir.base_class_addr. It takes care of
// the adjustment and the null pointer check.
- if (nonVirtualOffset.isZero()) {
+ if (nonVirtualOffset.isZero() && !vBase) {
assert(!cir::MissingFeatures::sanitizers());
return builder.createBaseClassAddr(getLoc(loc), value, baseValueTy, 0,
/*assumeNotNull=*/true);
@@ -980,10 +989,17 @@ Address CIRGenFunction::getAddressOfBaseClass(
assert(!cir::MissingFeatures::sanitizers());
- // Apply the offset
- value = builder.createBaseClassAddr(getLoc(loc), value, baseValueTy,
- nonVirtualOffset.getQuantity(),
- /*assumeNotNull=*/true);
+ // Compute the virtual offset.
+ mlir::Value virtualOffset = nullptr;
+ if (vBase) {
+ virtualOffset = cgm.getCXXABI().getVirtualBaseClassOffset(
+ getLoc(loc), *this, value, derived, vBase);
+ }
+
+ // Apply both offsets.
+ value = applyNonVirtualAndVirtualOffset(
+ getLoc(loc), *this, value, nonVirtualOffset, virtualOffset, derived,
+ vBase, baseValueTy, not nullCheckValue);
// Cast to the destination type.
value = value.withElementType(builder, baseValueTy);
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
index 1f7e3dd..83208bf 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp
@@ -210,6 +210,60 @@ RValue CIRGenFunction::emitCXXMemberOrOperatorCall(
return emitCall(fnInfo, callee, returnValue, args, nullptr, loc);
}
+namespace {
+/// The parameters to pass to a usual operator delete.
+struct UsualDeleteParams {
+ TypeAwareAllocationMode typeAwareDelete = TypeAwareAllocationMode::No;
+ bool destroyingDelete = false;
+ bool size = false;
+ AlignedAllocationMode alignment = AlignedAllocationMode::No;
+};
+} // namespace
+
+// FIXME(cir): this should be shared with LLVM codegen
+static UsualDeleteParams getUsualDeleteParams(const FunctionDecl *fd) {
+ UsualDeleteParams params;
+
+ const FunctionProtoType *fpt = fd->getType()->castAs<FunctionProtoType>();
+ auto ai = fpt->param_type_begin(), ae = fpt->param_type_end();
+
+ if (fd->isTypeAwareOperatorNewOrDelete()) {
+ params.typeAwareDelete = TypeAwareAllocationMode::Yes;
+ assert(ai != ae);
+ ++ai;
+ }
+
+ // The first argument after the type-identity parameter (if any) is
+ // always a void* (or C* for a destroying operator delete for class
+ // type C).
+ ++ai;
+
+ // The next parameter may be a std::destroying_delete_t.
+ if (fd->isDestroyingOperatorDelete()) {
+ params.destroyingDelete = true;
+ assert(ai != ae);
+ ++ai;
+ }
+
+ // Figure out what other parameters we should be implicitly passing.
+ if (ai != ae && (*ai)->isIntegerType()) {
+ params.size = true;
+ ++ai;
+ } else {
+ assert(!isTypeAwareAllocation(params.typeAwareDelete));
+ }
+
+ if (ai != ae && (*ai)->isAlignValT()) {
+ params.alignment = AlignedAllocationMode::Yes;
+ ++ai;
+ } else {
+ assert(!isTypeAwareAllocation(params.typeAwareDelete));
+ }
+
+ assert(ai == ae && "unexpected usual deallocation function parameter");
+ return params;
+}
+
static mlir::Value emitCXXNewAllocSize(CIRGenFunction &cgf, const CXXNewExpr *e,
unsigned minElements,
mlir::Value &numElements,
@@ -332,6 +386,117 @@ static RValue emitNewDeleteCall(CIRGenFunction &cgf,
return rv;
}
+namespace {
+/// Calls the given 'operator delete' on a single object.
+struct CallObjectDelete final : EHScopeStack::Cleanup {
+ mlir::Value ptr;
+ const FunctionDecl *operatorDelete;
+ QualType elementType;
+
+ CallObjectDelete(mlir::Value ptr, const FunctionDecl *operatorDelete,
+ QualType elementType)
+ : ptr(ptr), operatorDelete(operatorDelete), elementType(elementType) {}
+
+ void emit(CIRGenFunction &cgf) override {
+ cgf.emitDeleteCall(operatorDelete, ptr, elementType);
+ }
+
+ // This is a placeholder until EHCleanupScope is implemented.
+ size_t getSize() const override {
+ assert(!cir::MissingFeatures::ehCleanupScope());
+ return sizeof(CallObjectDelete);
+ }
+};
+} // namespace
+
+/// Emit the code for deleting a single object.
+static void emitObjectDelete(CIRGenFunction &cgf, const CXXDeleteExpr *de,
+ Address ptr, QualType elementType) {
+ // C++11 [expr.delete]p3:
+ // If the static type of the object to be deleted is different from its
+ // dynamic type, the static type shall be a base class of the dynamic type
+ // of the object to be deleted and the static type shall have a virtual
+ // destructor or the behavior is undefined.
+ assert(!cir::MissingFeatures::emitTypeCheck());
+
+ const FunctionDecl *operatorDelete = de->getOperatorDelete();
+ assert(!operatorDelete->isDestroyingOperatorDelete());
+
+ // Find the destructor for the type, if applicable. If the
+ // destructor is virtual, we'll just emit the vcall and return.
+ const CXXDestructorDecl *dtor = nullptr;
+ if (const auto *rd = elementType->getAsCXXRecordDecl()) {
+ if (rd->hasDefinition() && !rd->hasTrivialDestructor()) {
+ dtor = rd->getDestructor();
+
+ if (dtor->isVirtual()) {
+ cgf.cgm.errorNYI(de->getSourceRange(),
+ "emitObjectDelete: virtual destructor");
+ }
+ }
+ }
+
+ // Make sure that we call delete even if the dtor throws.
+ // This doesn't have to a conditional cleanup because we're going
+ // to pop it off in a second.
+ cgf.ehStack.pushCleanup<CallObjectDelete>(
+ NormalAndEHCleanup, ptr.getPointer(), operatorDelete, elementType);
+
+ if (dtor) {
+ cgf.emitCXXDestructorCall(dtor, Dtor_Complete,
+ /*ForVirtualBase=*/false,
+ /*Delegating=*/false, ptr, elementType);
+ } else if (elementType.getObjCLifetime()) {
+ assert(!cir::MissingFeatures::objCLifetime());
+ cgf.cgm.errorNYI(de->getSourceRange(), "emitObjectDelete: ObjCLifetime");
+ }
+
+ // In traditional LLVM codegen null checks are emitted to save a delete call.
+ // In CIR we optimize for size by default, the null check should be added into
+ // this function callers.
+ assert(!cir::MissingFeatures::emitNullCheckForDeleteCalls());
+
+ cgf.popCleanupBlock();
+}
+
+void CIRGenFunction::emitCXXDeleteExpr(const CXXDeleteExpr *e) {
+ const Expr *arg = e->getArgument();
+ Address ptr = emitPointerWithAlignment(arg);
+
+ // Null check the pointer.
+ //
+ // We could avoid this null check if we can determine that the object
+ // destruction is trivial and doesn't require an array cookie; we can
+ // unconditionally perform the operator delete call in that case. For now, we
+ // assume that deleted pointers are null rarely enough that it's better to
+ // keep the branch. This might be worth revisiting for a -O0 code size win.
+ //
+ // CIR note: emit the code size friendly by default for now, such as mentioned
+ // in `emitObjectDelete`.
+ assert(!cir::MissingFeatures::emitNullCheckForDeleteCalls());
+ QualType deleteTy = e->getDestroyedType();
+
+ // A destroying operator delete overrides the entire operation of the
+ // delete expression.
+ if (e->getOperatorDelete()->isDestroyingOperatorDelete()) {
+ cgm.errorNYI(e->getSourceRange(),
+ "emitCXXDeleteExpr: destroying operator delete");
+ return;
+ }
+
+ // We might be deleting a pointer to array.
+ deleteTy = getContext().getBaseElementType(deleteTy);
+ ptr = ptr.withElementType(builder, convertTypeForMem(deleteTy));
+
+ if (e->isArrayForm()) {
+ assert(!cir::MissingFeatures::deleteArray());
+ cgm.errorNYI(e->getSourceRange(), "emitCXXDeleteExpr: array delete");
+ return;
+ } else {
+ emitObjectDelete(*this, e, ptr, deleteTy);
+ }
+}
+
mlir::Value CIRGenFunction::emitCXXNewExpr(const CXXNewExpr *e) {
// The element type being allocated.
QualType allocType = getContext().getBaseElementType(e->getAllocatedType());
@@ -443,3 +608,53 @@ mlir::Value CIRGenFunction::emitCXXNewExpr(const CXXNewExpr *e) {
allocSizeWithoutCookie);
return result.getPointer();
}
+
+void CIRGenFunction::emitDeleteCall(const FunctionDecl *deleteFD,
+ mlir::Value ptr, QualType deleteTy) {
+ assert(!cir::MissingFeatures::deleteArray());
+
+ const auto *deleteFTy = deleteFD->getType()->castAs<FunctionProtoType>();
+ CallArgList deleteArgs;
+
+ UsualDeleteParams params = getUsualDeleteParams(deleteFD);
+ auto paramTypeIt = deleteFTy->param_type_begin();
+
+ // Pass std::type_identity tag if present
+ if (isTypeAwareAllocation(params.typeAwareDelete))
+ cgm.errorNYI(deleteFD->getSourceRange(),
+ "emitDeleteCall: type aware delete");
+
+ // Pass the pointer itself.
+ QualType argTy = *paramTypeIt++;
+ mlir::Value deletePtr =
+ builder.createBitcast(ptr.getLoc(), ptr, convertType(argTy));
+ deleteArgs.add(RValue::get(deletePtr), argTy);
+
+ // Pass the std::destroying_delete tag if present.
+ if (params.destroyingDelete)
+ cgm.errorNYI(deleteFD->getSourceRange(),
+ "emitDeleteCall: destroying delete");
+
+ // Pass the size if the delete function has a size_t parameter.
+ if (params.size) {
+ QualType sizeType = *paramTypeIt++;
+ CharUnits deleteTypeSize = getContext().getTypeSizeInChars(deleteTy);
+ assert(mlir::isa<cir::IntType>(convertType(sizeType)) &&
+ "expected cir::IntType");
+ cir::ConstantOp size = builder.getConstInt(
+ *currSrcLoc, convertType(sizeType), deleteTypeSize.getQuantity());
+
+ deleteArgs.add(RValue::get(size), sizeType);
+ }
+
+ // Pass the alignment if the delete function has an align_val_t parameter.
+ if (isAlignedAllocation(params.alignment))
+ cgm.errorNYI(deleteFD->getSourceRange(),
+ "emitDeleteCall: aligned allocation");
+
+ assert(paramTypeIt == deleteFTy->param_type_end() &&
+ "unknown parameter to usual delete function");
+
+ // Emit the call to delete.
+ emitNewDeleteCall(*this, deleteFD, deleteFTy, deleteArgs);
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
index bd09d78..f4bbced 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
@@ -676,6 +676,10 @@ public:
mlir::Value VisitRealImag(const UnaryOperator *e,
QualType promotionType = QualType());
+ mlir::Value VisitUnaryExtension(const UnaryOperator *e) {
+ return Visit(e->getSubExpr());
+ }
+
mlir::Value VisitCXXDefaultInitExpr(CXXDefaultInitExpr *die) {
CIRGenFunction::CXXDefaultInitExprScope scope(cgf, die);
return Visit(die->getExpr());
@@ -687,6 +691,10 @@ public:
mlir::Value VisitCXXNewExpr(const CXXNewExpr *e) {
return cgf.emitCXXNewExpr(e);
}
+ mlir::Value VisitCXXDeleteExpr(const CXXDeleteExpr *e) {
+ cgf.emitCXXDeleteExpr(e);
+ return {};
+ }
mlir::Value VisitCXXThrowExpr(const CXXThrowExpr *e) {
cgf.emitCXXThrowExpr(e);
@@ -1274,9 +1282,6 @@ mlir::Value ScalarExprEmitter::emitPromoted(const Expr *e,
} else if (const auto *uo = dyn_cast<UnaryOperator>(e)) {
switch (uo->getOpcode()) {
case UO_Imag:
- cgf.cgm.errorNYI(e->getSourceRange(),
- "ScalarExprEmitter::emitPromoted unary imag");
- return {};
case UO_Real:
return VisitRealImag(uo, promotionType);
case UO_Minus:
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index 166435f..ef07db3 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -1197,6 +1197,8 @@ public:
bool delegating, Address thisAddr,
CallArgList &args, clang::SourceLocation loc);
+ void emitCXXDeleteExpr(const CXXDeleteExpr *e);
+
void emitCXXDestructorCall(const CXXDestructorDecl *dd, CXXDtorType type,
bool forVirtualBase, bool delegating,
Address thisAddr, QualType thisTy);
@@ -1244,6 +1246,9 @@ public:
void emitDelegatingCXXConstructorCall(const CXXConstructorDecl *ctor,
const FunctionArgList &args);
+ void emitDeleteCall(const FunctionDecl *deleteFD, mlir::Value ptr,
+ QualType deleteTy);
+
mlir::LogicalResult emitDoStmt(const clang::DoStmt &s);
/// Emit an expression as an initializer for an object (variable, field, etc.)
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index eef23a0..c977ff9 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -119,6 +119,19 @@ CIRGenModule::CIRGenModule(mlir::MLIRContext &mlirContext,
cir::OptInfoAttr::get(&mlirContext,
cgo.OptimizationLevel,
cgo.OptimizeSize));
+ // Set the module name to be the name of the main file. TranslationUnitDecl
+ // often contains invalid source locations and isn't a reliable source for the
+ // module location.
+ FileID mainFileId = astContext.getSourceManager().getMainFileID();
+ const FileEntry &mainFile =
+ *astContext.getSourceManager().getFileEntryForID(mainFileId);
+ StringRef path = mainFile.tryGetRealPathName();
+ if (!path.empty()) {
+ theModule.setSymName(path);
+ theModule->setLoc(mlir::FileLineColLoc::get(&mlirContext, path,
+ /*line=*/0,
+ /*column=*/0));
+ }
}
CIRGenModule::~CIRGenModule() = default;
diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
index e842892..644c383 100644
--- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
@@ -216,6 +216,7 @@ mlir::LogicalResult CIRGenFunction::emitStmt(const Stmt *s,
case Stmt::OMPSimdDirectiveClass:
case Stmt::OMPTileDirectiveClass:
case Stmt::OMPUnrollDirectiveClass:
+ case Stmt::OMPFuseDirectiveClass:
case Stmt::OMPForDirectiveClass:
case Stmt::OMPForSimdDirectiveClass:
case Stmt::OMPSectionsDirectiveClass:
diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
index 58ef500..fb87036 100644
--- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
@@ -1355,9 +1355,11 @@ mlir::LogicalResult cir::GlobalOp::verify() {
return success();
}
-void cir::GlobalOp::build(OpBuilder &odsBuilder, OperationState &odsState,
- llvm::StringRef sym_name, mlir::Type sym_type,
- bool isConstant, cir::GlobalLinkageKind linkage) {
+void cir::GlobalOp::build(
+ OpBuilder &odsBuilder, OperationState &odsState, llvm::StringRef sym_name,
+ mlir::Type sym_type, bool isConstant, cir::GlobalLinkageKind linkage,
+ function_ref<void(OpBuilder &, Location)> ctorBuilder,
+ function_ref<void(OpBuilder &, Location)> dtorBuilder) {
odsState.addAttribute(getSymNameAttrName(odsState.name),
odsBuilder.getStringAttr(sym_name));
odsState.addAttribute(getSymTypeAttrName(odsState.name),
@@ -1370,26 +1372,88 @@ void cir::GlobalOp::build(OpBuilder &odsBuilder, OperationState &odsState,
cir::GlobalLinkageKindAttr::get(odsBuilder.getContext(), linkage);
odsState.addAttribute(getLinkageAttrName(odsState.name), linkageAttr);
+ Region *ctorRegion = odsState.addRegion();
+ if (ctorBuilder) {
+ odsBuilder.createBlock(ctorRegion);
+ ctorBuilder(odsBuilder, odsState.location);
+ }
+
+ Region *dtorRegion = odsState.addRegion();
+ if (dtorBuilder) {
+ odsBuilder.createBlock(dtorRegion);
+ dtorBuilder(odsBuilder, odsState.location);
+ }
+
odsState.addAttribute(getGlobalVisibilityAttrName(odsState.name),
cir::VisibilityAttr::get(odsBuilder.getContext()));
}
+/// Given the region at `index`, or the parent operation if `index` is None,
+/// return the successor regions. These are the regions that may be selected
+/// during the flow of control. `operands` is a set of optional attributes that
+/// correspond to a constant value for each operand, or null if that operand is
+/// not a constant.
+void cir::GlobalOp::getSuccessorRegions(
+ mlir::RegionBranchPoint point, SmallVectorImpl<RegionSuccessor> &regions) {
+ // The `ctor` and `dtor` regions always branch back to the parent operation.
+ if (!point.isParent()) {
+ regions.push_back(RegionSuccessor());
+ return;
+ }
+
+ // Don't consider the ctor region if it is empty.
+ Region *ctorRegion = &this->getCtorRegion();
+ if (ctorRegion->empty())
+ ctorRegion = nullptr;
+
+ // Don't consider the dtor region if it is empty.
+ Region *dtorRegion = &this->getCtorRegion();
+ if (dtorRegion->empty())
+ dtorRegion = nullptr;
+
+ // If the condition isn't constant, both regions may be executed.
+ if (ctorRegion)
+ regions.push_back(RegionSuccessor(ctorRegion));
+ if (dtorRegion)
+ regions.push_back(RegionSuccessor(dtorRegion));
+}
+
static void printGlobalOpTypeAndInitialValue(OpAsmPrinter &p, cir::GlobalOp op,
- TypeAttr type,
- Attribute initAttr) {
+ TypeAttr type, Attribute initAttr,
+ mlir::Region &ctorRegion,
+ mlir::Region &dtorRegion) {
+ auto printType = [&]() { p << ": " << type; };
if (!op.isDeclaration()) {
p << "= ";
- // This also prints the type...
- if (initAttr)
- printConstant(p, initAttr);
+ if (!ctorRegion.empty()) {
+ p << "ctor ";
+ printType();
+ p << " ";
+ p.printRegion(ctorRegion,
+ /*printEntryBlockArgs=*/false,
+ /*printBlockTerminators=*/false);
+ } else {
+ // This also prints the type...
+ if (initAttr)
+ printConstant(p, initAttr);
+ }
+
+ if (!dtorRegion.empty()) {
+ p << " dtor ";
+ p.printRegion(dtorRegion,
+ /*printEntryBlockArgs=*/false,
+ /*printBlockTerminators=*/false);
+ }
} else {
- p << ": " << type;
+ printType();
}
}
-static ParseResult
-parseGlobalOpTypeAndInitialValue(OpAsmParser &parser, TypeAttr &typeAttr,
- Attribute &initialValueAttr) {
+static ParseResult parseGlobalOpTypeAndInitialValue(OpAsmParser &parser,
+ TypeAttr &typeAttr,
+ Attribute &initialValueAttr,
+ mlir::Region &ctorRegion,
+ mlir::Region &dtorRegion) {
mlir::Type opTy;
if (parser.parseOptionalEqual().failed()) {
// Absence of equal means a declaration, so we need to parse the type.
@@ -1397,16 +1461,38 @@ parseGlobalOpTypeAndInitialValue(OpAsmParser &parser, TypeAttr &typeAttr,
if (parser.parseColonType(opTy))
return failure();
} else {
- // Parse constant with initializer, examples:
- // cir.global @y = #cir.fp<1.250000e+00> : !cir.double
- // cir.global @rgb = #cir.const_array<[...] : !cir.array<i8 x 3>>
- if (parseConstantValue(parser, initialValueAttr).failed())
- return failure();
+ // Parse contructor, example:
+ // cir.global @rgb = ctor : type { ... }
+ if (!parser.parseOptionalKeyword("ctor")) {
+ if (parser.parseColonType(opTy))
+ return failure();
+ auto parseLoc = parser.getCurrentLocation();
+ if (parser.parseRegion(ctorRegion, /*arguments=*/{}, /*argTypes=*/{}))
+ return failure();
+ if (ensureRegionTerm(parser, ctorRegion, parseLoc).failed())
+ return failure();
+ } else {
+ // Parse constant with initializer, examples:
+ // cir.global @y = 3.400000e+00 : f32
+ // cir.global @rgb = #cir.const_array<[...] : !cir.array<i8 x 3>>
+ if (parseConstantValue(parser, initialValueAttr).failed())
+ return failure();
+
+ assert(mlir::isa<mlir::TypedAttr>(initialValueAttr) &&
+ "Non-typed attrs shouldn't appear here.");
+ auto typedAttr = mlir::cast<mlir::TypedAttr>(initialValueAttr);
+ opTy = typedAttr.getType();
+ }
- assert(mlir::isa<mlir::TypedAttr>(initialValueAttr) &&
- "Non-typed attrs shouldn't appear here.");
- auto typedAttr = mlir::cast<mlir::TypedAttr>(initialValueAttr);
- opTy = typedAttr.getType();
+ // Parse destructor, example:
+ // dtor { ... }
+ if (!parser.parseOptionalKeyword("dtor")) {
+ auto parseLoc = parser.getCurrentLocation();
+ if (parser.parseRegion(dtorRegion, /*arguments=*/{}, /*argTypes=*/{}))
+ return failure();
+ if (ensureRegionTerm(parser, dtorRegion, parseLoc).failed())
+ return failure();
+ }
}
typeAttr = TypeAttr::get(opTy);
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 57db20f7..64f1917 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -1090,8 +1090,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
if (std::optional<GCOVOptions> Options =
getGCOVOptions(CodeGenOpts, LangOpts))
PB.registerPipelineStartEPCallback(
- [Options](ModulePassManager &MPM, OptimizationLevel Level) {
- MPM.addPass(GCOVProfilerPass(*Options));
+ [this, Options](ModulePassManager &MPM, OptimizationLevel Level) {
+ MPM.addPass(
+ GCOVProfilerPass(*Options, CI.getVirtualFileSystemPtr()));
});
if (std::optional<InstrProfOptions> Options =
getInstrProfOptions(CodeGenOpts, LangOpts))
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 12c7d48..fee6bc0 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -26,6 +26,7 @@
#include "clang/AST/DeclObjC.h"
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/Expr.h"
+#include "clang/AST/LambdaCapture.h"
#include "clang/AST/RecordLayout.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/VTableBuilder.h"
@@ -1903,46 +1904,61 @@ CGDebugInfo::createInlinedSubprogram(StringRef FuncName,
return SP;
}
+llvm::StringRef
+CGDebugInfo::GetLambdaCaptureName(const LambdaCapture &Capture) {
+ if (Capture.capturesThis())
+ return CGM.getCodeGenOpts().EmitCodeView ? "__this" : "this";
+
+ assert(Capture.capturesVariable());
+
+ const ValueDecl *CaptureDecl = Capture.getCapturedVar();
+ assert(CaptureDecl && "Expected valid decl for captured variable.");
+
+ return CaptureDecl->getName();
+}
+
void CGDebugInfo::CollectRecordLambdaFields(
const CXXRecordDecl *CXXDecl, SmallVectorImpl<llvm::Metadata *> &elements,
llvm::DIType *RecordTy) {
// For C++11 Lambdas a Field will be the same as a Capture, but the Capture
// has the name and the location of the variable so we should iterate over
// both concurrently.
- const ASTRecordLayout &layout = CGM.getContext().getASTRecordLayout(CXXDecl);
RecordDecl::field_iterator Field = CXXDecl->field_begin();
unsigned fieldno = 0;
for (CXXRecordDecl::capture_const_iterator I = CXXDecl->captures_begin(),
E = CXXDecl->captures_end();
I != E; ++I, ++Field, ++fieldno) {
- const LambdaCapture &C = *I;
- if (C.capturesVariable()) {
- SourceLocation Loc = C.getLocation();
- assert(!Field->isBitField() && "lambdas don't have bitfield members!");
- ValueDecl *V = C.getCapturedVar();
- StringRef VName = V->getName();
- llvm::DIFile *VUnit = getOrCreateFile(Loc);
- auto Align = getDeclAlignIfRequired(V, CGM.getContext());
- llvm::DIType *FieldType = createFieldType(
- VName, Field->getType(), Loc, Field->getAccess(),
- layout.getFieldOffset(fieldno), Align, VUnit, RecordTy, CXXDecl);
- elements.push_back(FieldType);
- } else if (C.capturesThis()) {
+ const LambdaCapture &Capture = *I;
+ const uint64_t FieldOffset =
+ CGM.getContext().getASTRecordLayout(CXXDecl).getFieldOffset(fieldno);
+
+ assert(!Field->isBitField() && "lambdas don't have bitfield members!");
+
+ SourceLocation Loc;
+ uint32_t Align = 0;
+
+ if (Capture.capturesThis()) {
// TODO: Need to handle 'this' in some way by probably renaming the
// this of the lambda class and having a field member of 'this' or
// by using AT_object_pointer for the function and having that be
// used as 'this' for semantic references.
- FieldDecl *f = *Field;
- llvm::DIFile *VUnit = getOrCreateFile(f->getLocation());
- QualType type = f->getType();
- StringRef ThisName =
- CGM.getCodeGenOpts().EmitCodeView ? "__this" : "this";
- llvm::DIType *fieldType = createFieldType(
- ThisName, type, f->getLocation(), f->getAccess(),
- layout.getFieldOffset(fieldno), VUnit, RecordTy, CXXDecl);
-
- elements.push_back(fieldType);
+ Loc = Field->getLocation();
+ } else if (Capture.capturesVariable()) {
+ Loc = Capture.getLocation();
+
+ const ValueDecl *CaptureDecl = Capture.getCapturedVar();
+ assert(CaptureDecl && "Expected valid decl for captured variable.");
+
+ Align = getDeclAlignIfRequired(CaptureDecl, CGM.getContext());
+ } else {
+ continue;
}
+
+ llvm::DIFile *VUnit = getOrCreateFile(Loc);
+
+ elements.push_back(createFieldType(
+ GetLambdaCaptureName(Capture), Field->getType(), Loc,
+ Field->getAccess(), FieldOffset, Align, VUnit, RecordTy, CXXDecl));
}
}
diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h
index f860773..78c3eb9 100644
--- a/clang/lib/CodeGen/CGDebugInfo.h
+++ b/clang/lib/CodeGen/CGDebugInfo.h
@@ -397,6 +397,7 @@ private:
void CollectRecordFields(const RecordDecl *Decl, llvm::DIFile *F,
SmallVectorImpl<llvm::Metadata *> &E,
llvm::DICompositeType *RecordTy);
+ llvm::StringRef GetLambdaCaptureName(const LambdaCapture &Capture);
/// If the C++ class has vtable info then insert appropriate debug
/// info entry in EltTys vector.
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 07cf08c..6596ec0 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -192,9 +192,17 @@ static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
return CGF.Builder.CreateCall(F, {Src0, Src1});
}
+static inline StringRef mapScopeToSPIRV(StringRef AMDGCNScope) {
+ if (AMDGCNScope == "agent")
+ return "device";
+ if (AMDGCNScope == "wavefront")
+ return "subgroup";
+ return AMDGCNScope;
+}
+
// For processing memory ordering and memory scope arguments of various
// amdgcn builtins.
-// \p Order takes a C++11 comptabile memory-ordering specifier and converts
+// \p Order takes a C++11 compatible memory-ordering specifier and converts
// it into LLVM's memory ordering specifier using atomic C ABI, and writes
// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
// specific SyncScopeID and writes it to \p SSID.
@@ -227,6 +235,8 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
// Some of the atomic builtins take the scope as a string name.
StringRef scp;
if (llvm::getConstantStringInfo(Scope, scp)) {
+ if (getTarget().getTriple().isSPIRV())
+ scp = mapScopeToSPIRV(scp);
SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
return;
}
@@ -238,13 +248,19 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
SSID = llvm::SyncScope::System;
break;
case 1: // __MEMORY_SCOPE_DEVICE
- SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
+ if (getTarget().getTriple().isSPIRV())
+ SSID = getLLVMContext().getOrInsertSyncScopeID("device");
+ else
+ SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
break;
case 2: // __MEMORY_SCOPE_WRKGRP
SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup");
break;
case 3: // __MEMORY_SCOPE_WVFRNT
- SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
+ if (getTarget().getTriple().isSPIRV())
+ SSID = getLLVMContext().getOrInsertSyncScopeID("subgroup");
+ else
+ SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
break;
case 4: // __MEMORY_SCOPE_SINGLE
SSID = llvm::SyncScope::SingleThread;
@@ -1510,7 +1526,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
//
// The global/flat cases need to use agent scope to consistently produce
// the native instruction instead of a cmpxchg expansion.
- SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
+ if (getTarget().getTriple().isSPIRV())
+ SSID = getLLVMContext().getOrInsertSyncScopeID("device");
+ else
+ SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
AO = AtomicOrdering::Monotonic;
// The v2bf16 builtin uses i16 instead of a natural bfloat type.
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index f110dba..85a13357 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -6613,6 +6613,9 @@ std::string Driver::GetStdModuleManifestPath(const Compilation &C,
const ToolChain &TC) const {
std::string error = "<NOT PRESENT>";
+ if (C.getArgs().hasArg(options::OPT_nostdlib))
+ return error;
+
switch (TC.GetCXXStdlibType(C.getArgs())) {
case ToolChain::CST_Libcxx: {
auto evaluate = [&](const char *library) -> std::optional<std::string> {
diff --git a/clang/lib/Frontend/ModuleDependencyCollector.cpp b/clang/lib/Frontend/ModuleDependencyCollector.cpp
index 3b363f9..ff37065 100644
--- a/clang/lib/Frontend/ModuleDependencyCollector.cpp
+++ b/clang/lib/Frontend/ModuleDependencyCollector.cpp
@@ -91,10 +91,10 @@ void ModuleDependencyCollector::attachToPreprocessor(Preprocessor &PP) {
std::make_unique<ModuleDependencyMMCallbacks>(*this));
}
-static bool isCaseSensitivePath(StringRef Path) {
+static bool isCaseSensitivePath(llvm::vfs::FileSystem &VFS, StringRef Path) {
SmallString<256> TmpDest = Path, UpperDest, RealDest;
// Remove component traversals, links, etc.
- if (llvm::sys::fs::real_path(Path, TmpDest))
+ if (VFS.getRealPath(Path, TmpDest))
return true; // Current default value in vfs.yaml
Path = TmpDest;
@@ -104,7 +104,7 @@ static bool isCaseSensitivePath(StringRef Path) {
// already expects when sensitivity isn't setup.
for (auto &C : Path)
UpperDest.push_back(toUppercase(C));
- if (!llvm::sys::fs::real_path(UpperDest, RealDest) && Path == RealDest)
+ if (!VFS.getRealPath(UpperDest, RealDest) && Path == RealDest)
return false;
return true;
}
@@ -121,7 +121,8 @@ void ModuleDependencyCollector::writeFileMap() {
// Explicitly set case sensitivity for the YAML writer. For that, find out
// the sensitivity at the path where the headers all collected to.
- VFSWriter.setCaseSensitivity(isCaseSensitivePath(VFSDir));
+ VFSWriter.setCaseSensitivity(
+ isCaseSensitivePath(Canonicalizer.getFileSystem(), VFSDir));
// Do not rely on real path names when executing the crash reproducer scripts
// since we only want to actually use the files we have on the VFS cache.
@@ -153,7 +154,7 @@ std::error_code ModuleDependencyCollector::copyToRoot(StringRef Src,
} else {
// When collecting entries from input vfsoverlays, copy the external
// contents into the cache but still map from the source.
- if (!fs::exists(Dst))
+ if (!Canonicalizer.getFileSystem().exists(Dst))
return std::error_code();
path::append(CacheDst, Dst);
Paths.CopyFrom = Dst;
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index a7f7099..d6ba19a 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -2311,10 +2311,9 @@ _mm256_cvttps_epi32(__m256 __a)
/// \param __a
/// A 256-bit vector of [4 x double].
/// \returns A 64 bit double containing the first element of the input vector.
-static __inline double __DEFAULT_FN_ATTRS
-_mm256_cvtsd_f64(__m256d __a)
-{
- return __a[0];
+static __inline double __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_cvtsd_f64(__m256d __a) {
+ return __a[0];
}
/// Returns the first element of the input vector of [8 x i32].
@@ -2327,11 +2326,10 @@ _mm256_cvtsd_f64(__m256d __a)
/// \param __a
/// A 256-bit vector of [8 x i32].
/// \returns A 32 bit integer containing the first element of the input vector.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_cvtsi256_si32(__m256i __a)
-{
- __v8si __b = (__v8si)__a;
- return __b[0];
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_cvtsi256_si32(__m256i __a) {
+ __v8si __b = (__v8si)__a;
+ return __b[0];
}
/// Returns the first element of the input vector of [8 x float].
@@ -2344,10 +2342,9 @@ _mm256_cvtsi256_si32(__m256i __a)
/// \param __a
/// A 256-bit vector of [8 x float].
/// \returns A 32 bit float containing the first element of the input vector.
-static __inline float __DEFAULT_FN_ATTRS
-_mm256_cvtss_f32(__m256 __a)
-{
- return __a[0];
+static __inline float __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_cvtss_f32(__m256 __a) {
+ return __a[0];
}
/* Vector replicate */
diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp
index 8605ba2..a2c6957 100644
--- a/clang/lib/Parse/ParseExprCXX.cpp
+++ b/clang/lib/Parse/ParseExprCXX.cpp
@@ -1299,7 +1299,7 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer(
Diag(Tok, getLangOpts().CPlusPlus23
? diag::warn_cxx20_compat_decl_attrs_on_lambda
: diag::ext_decl_attrs_on_lambda)
- << Tok.getIdentifierInfo() << Tok.isRegularKeywordAttribute();
+ << Tok.isRegularKeywordAttribute() << Tok.getIdentifierInfo();
MaybeParseCXX11Attributes(D);
}
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index 1b66d83..8606227 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -983,10 +983,9 @@ static void DiagUninitUse(Sema &S, const VarDecl *VD, const UninitUse &Use,
case UninitUse::AfterDecl:
case UninitUse::AfterCall:
S.Diag(VD->getLocation(), diag::warn_sometimes_uninit_var)
- << VD->getDeclName() << IsCapturedByBlock
- << (Use.getKind() == UninitUse::AfterDecl ? 4 : 5)
- << const_cast<DeclContext*>(VD->getLexicalDeclContext())
- << VD->getSourceRange();
+ << VD->getDeclName() << IsCapturedByBlock
+ << (Use.getKind() == UninitUse::AfterDecl ? 4 : 5)
+ << VD->getLexicalDeclContext() << VD->getSourceRange();
S.Diag(Use.getUser()->getBeginLoc(), diag::note_uninit_var_use)
<< IsCapturedByBlock << Use.getUser()->getSourceRange();
return;
diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp
index d238b79..dc6d232 100644
--- a/clang/lib/Sema/SemaConcept.cpp
+++ b/clang/lib/Sema/SemaConcept.cpp
@@ -193,7 +193,7 @@ DiagRecursiveConstraintEval(Sema &S, llvm::FoldingSetNodeID &ID,
// Sema::InstantiatingTemplate::isAlreadyBeingInstantiated function.
if (S.SatisfactionStackContains(Templ, ID)) {
S.Diag(E->getExprLoc(), diag::err_constraint_depends_on_self)
- << const_cast<Expr *>(E) << E->getSourceRange();
+ << E << E->getSourceRange();
return true;
}
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 3b267c1..3302bfc 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -20108,8 +20108,9 @@ static void DoMarkVarDeclReferenced(
bool NeededForConstantEvaluation =
isPotentiallyConstantEvaluatedContext(SemaRef) && UsableInConstantExpr;
- bool NeedDefinition =
- OdrUse == OdrUseContext::Used || NeededForConstantEvaluation;
+ bool NeedDefinition = OdrUse == OdrUseContext::Used ||
+ NeededForConstantEvaluation ||
+ Var->getType()->isUndeducedType();
assert(!isa<VarTemplatePartialSpecializationDecl>(Var) &&
"Can't instantiate a partial template specialization.");
diff --git a/clang/lib/Sema/SemaOpenACCAtomic.cpp b/clang/lib/Sema/SemaOpenACCAtomic.cpp
index a9319dc..ad21129 100644
--- a/clang/lib/Sema/SemaOpenACCAtomic.cpp
+++ b/clang/lib/Sema/SemaOpenACCAtomic.cpp
@@ -454,9 +454,7 @@ class AtomicOperandChecker {
// If nothing matches, error out.
DiagnoseInvalidAtomic(BinInf->FoundExpr->getExprLoc(),
SemaRef.PDiag(diag::note_acc_atomic_mismatch_operand)
- << const_cast<Expr *>(AssignInf.LHS)
- << const_cast<Expr *>(BinInf->LHS)
- << const_cast<Expr *>(BinInf->RHS));
+ << AssignInf.LHS << BinInf->LHS << BinInf->RHS);
return IDACInfo::Fail();
}
@@ -592,8 +590,7 @@ class AtomicOperandChecker {
PartialDiagnostic PD =
SemaRef.PDiag(diag::note_acc_atomic_mismatch_compound_operand)
- << FirstKind << const_cast<Expr *>(FirstX) << SecondKind
- << const_cast<Expr *>(SecondX);
+ << FirstKind << FirstX << SecondKind << SecondX;
return DiagnoseInvalidAtomic(SecondX->getExprLoc(), PD);
}
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index f5feed6..0fa21e8 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -2490,7 +2490,8 @@ VarDecl *SemaOpenMP::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo,
DSAStackTy::DSAVarData DVarTop =
DSAStack->getTopDSA(D, DSAStack->isClauseParsingMode());
if (DVarTop.CKind != OMPC_unknown && isOpenMPPrivate(DVarTop.CKind) &&
- (!VD || VD->hasLocalStorage() || !DVarTop.AppliedToPointee))
+ (!VD || VD->hasLocalStorage() ||
+ !(DVarTop.AppliedToPointee && DVarTop.CKind != OMPC_reduction)))
return VD ? VD : cast<VarDecl>(DVarTop.PrivateCopy->getDecl());
// Threadprivate variables must not be captured.
if (isOpenMPThreadPrivate(DVarTop.CKind))
diff --git a/clang/lib/Sema/SemaTypeTraits.cpp b/clang/lib/Sema/SemaTypeTraits.cpp
index c2427dcf..6c798d6 100644
--- a/clang/lib/Sema/SemaTypeTraits.cpp
+++ b/clang/lib/Sema/SemaTypeTraits.cpp
@@ -1163,13 +1163,16 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT,
// - it has at least one trivial eligible constructor and a trivial,
// non-deleted destructor.
const CXXDestructorDecl *Dtor = RD->getDestructor();
- if (UnqualT->isAggregateType())
- if (Dtor && !Dtor->isUserProvided())
- return true;
- if (RD->hasTrivialDestructor() && (!Dtor || !Dtor->isDeleted()))
- if (RD->hasTrivialDefaultConstructor() ||
- RD->hasTrivialCopyConstructor() || RD->hasTrivialMoveConstructor())
- return true;
+ if (UnqualT->isAggregateType() && (!Dtor || !Dtor->isUserProvided()))
+ return true;
+ if (RD->hasTrivialDestructor() && (!Dtor || !Dtor->isDeleted())) {
+ for (CXXConstructorDecl *Ctr : RD->ctors()) {
+ if (Ctr->isIneligibleOrNotSelected() || Ctr->isDeleted())
+ continue;
+ if (Ctr->isTrivial())
+ return true;
+ }
+ }
return false;
}
case UTT_IsIntangibleType:
diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 36f316d..0ae784c 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -672,6 +672,10 @@ ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
ProgramStateRef stateTrue, stateFalse;
+ if (!First.Expression->getType()->isAnyPointerType() ||
+ !Second.Expression->getType()->isAnyPointerType())
+ return state;
+
// Assume different address spaces cannot overlap.
if (First.Expression->getType()->getPointeeType().getAddressSpace() !=
Second.Expression->getType()->getPointeeType().getAddressSpace())
diff --git a/clang/test/Analysis/buffer-overlap-decls.c b/clang/test/Analysis/buffer-overlap-decls.c
new file mode 100644
index 0000000..4830f4e
--- /dev/null
+++ b/clang/test/Analysis/buffer-overlap-decls.c
@@ -0,0 +1,23 @@
+// RUN: %clang_analyze_cc1 -verify %s -Wno-incompatible-library-redeclaration \
+// RUN: -analyzer-checker=alpha.unix.cstring.BufferOverlap
+// expected-no-diagnostics
+
+typedef typeof(sizeof(int)) size_t;
+
+void memcpy(int dst, int src, size_t size);
+
+void test_memcpy_proxy() {
+ memcpy(42, 42, 42); // no-crash
+}
+
+void strcpy(int dst, char *src);
+
+void test_strcpy_proxy() {
+ strcpy(42, (char *)42); // no-crash
+}
+
+void strxfrm(int dst, char *src, size_t size);
+
+void test_strxfrm_proxy() {
+ strxfrm(42, (char *)42, 42); // no-crash
+}
diff --git a/clang/test/Analysis/buffer-overlap.c b/clang/test/Analysis/buffer-overlap.c
index 8414a76..defb17a 100644
--- a/clang/test/Analysis/buffer-overlap.c
+++ b/clang/test/Analysis/buffer-overlap.c
@@ -96,3 +96,10 @@ void test_snprintf6() {
char b[4] = {0};
snprintf(a, sizeof(a), "%s", b); // no-warning
}
+
+void* memcpy(void* dest, const void* src, size_t count);
+
+void test_memcpy_esoteric() {
+label:
+ memcpy((char *)&&label, (const char *)memcpy, 1);
+}
diff --git a/clang/test/CIR/CodeGen/complex.cpp b/clang/test/CIR/CodeGen/complex.cpp
index e901631..4c396d3 100644
--- a/clang/test/CIR/CodeGen/complex.cpp
+++ b/clang/test/CIR/CodeGen/complex.cpp
@@ -1270,3 +1270,40 @@ void real_on_scalar_from_real_with_type_promotion() {
// OGCG: %[[A_REAL_F32:.*]] = fpext half %[[A_REAL]] to float
// OGCG: %[[A_REAL_F16:.*]] = fptrunc float %[[A_REAL_F32]] to half
// OGCG: store half %[[A_REAL_F16]], ptr %[[B_ADDR]], align 2
+
+void real_on_scalar_from_imag_with_type_promotion() {
+ _Float16 _Complex a;
+ _Float16 b = __real__(__imag__ a);
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.f16>, !cir.ptr<!cir.complex<!cir.f16>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["b", init]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.f16>>, !cir.complex<!cir.f16>
+// CIR: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16
+// CIR: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16
+// CIR: %[[A_REAL_F32:.*]] = cir.cast(floating, %[[A_REAL]] : !cir.f16), !cir.float
+// CIR: %[[A_IMAG_F32:.*]] = cir.cast(floating, %[[A_IMAG]] : !cir.f16), !cir.float
+// CIR: %[[A_COMPLEX_F32:.*]] = cir.complex.create %[[A_REAL_F32]], %[[A_IMAG_F32]] : !cir.float -> !cir.complex<!cir.float>
+// CIR: %[[A_IMAG_F32:.*]] = cir.complex.imag %[[A_COMPLEX_F32]] : !cir.complex<!cir.float> -> !cir.float
+// CIR: %[[A_IMAG_F16:.*]] = cir.cast(floating, %[[A_IMAG_F32]] : !cir.float), !cir.f16
+// CIR: cir.store{{.*}} %[[A_IMAG_F16]], %[[B_ADDR]] : !cir.f16, !cir.ptr<!cir.f16>
+
+// LLVM: %[[A_ADDR:.*]] = alloca { half, half }, i64 1, align 2
+// LLVM: %[[B_ADDR]] = alloca half, i64 1, align 2
+// LLVM: %[[TMP_A:.*]] = load { half, half }, ptr %[[A_ADDR]], align 2
+// LLVM: %[[A_REAL:.*]] = extractvalue { half, half } %[[TMP_A]], 0
+// LLVM: %[[A_IMAG:.*]] = extractvalue { half, half } %[[TMP_A]], 1
+// LLVM: %[[A_REAL_F32:.*]] = fpext half %[[A_REAL]] to float
+// LLVM: %[[A_IMAG_F32:.*]] = fpext half %[[A_IMAG]] to float
+// LLVM: %[[TMP_A_COMPLEX_F32:.*]] = insertvalue { float, float } {{.*}}, float %[[A_REAL_F32]], 0
+// LLVM: %[[A_COMPLEX_F32:.*]] = insertvalue { float, float } %[[TMP_A_COMPLEX_F32]], float %[[A_IMAG_F32]], 1
+// LLVM: %[[A_IMAG_F16:.*]] = fptrunc float %[[A_IMAG_F32]] to half
+// LLVM: store half %[[A_IMAG_F16]], ptr %[[B_ADDR]], align 2
+
+// OGCG: %[[A_ADDR:.*]] = alloca { half, half }, align 2
+// OGCG: %[[B_ADDR:.*]] = alloca half, align 2
+// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { half, half }, ptr %[[A_ADDR]], i32 0, i32 1
+// OGCG: %[[A_IMAG:.*]] = load half, ptr %[[A_IMAG_PTR]], align 2
+// OGCG: %[[A_IMAG_F32:.*]] = fpext half %[[A_IMAG]] to float
+// OGCG: %[[A_IMAG_F16:.*]] = fptrunc float %[[A_IMAG_F32]] to half
+// OGCG: store half %[[A_IMAG_F16]], ptr %[[B_ADDR]], align 2
diff --git a/clang/test/CIR/CodeGen/delete.cpp b/clang/test/CIR/CodeGen/delete.cpp
new file mode 100644
index 0000000..f21d203
--- /dev/null
+++ b/clang/test/CIR/CodeGen/delete.cpp
@@ -0,0 +1,88 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -mconstructor-aliases -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -mconstructor-aliases -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -mconstructor-aliases -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+typedef __typeof(sizeof(int)) size_t;
+
+struct SizedDelete {
+ void operator delete(void*, size_t);
+ int member;
+};
+void test_sized_delete(SizedDelete *x) {
+ delete x;
+}
+
+// SizedDelete::operator delete(void*, unsigned long)
+// CIR: cir.func private @_ZN11SizedDeletedlEPvm(!cir.ptr<!void>, !u64i)
+// LLVM: declare void @_ZN11SizedDeletedlEPvm(ptr, i64)
+
+// CIR: cir.func dso_local @_Z17test_sized_deleteP11SizedDelete
+// CIR: %[[X:.*]] = cir.load{{.*}} %{{.*}}
+// CIR: %[[X_CAST:.*]] = cir.cast(bitcast, %[[X]] : !cir.ptr<!rec_SizedDelete>), !cir.ptr<!void>
+// CIR: %[[OBJ_SIZE:.*]] = cir.const #cir.int<4> : !u64i
+// CIR: cir.call @_ZN11SizedDeletedlEPvm(%[[X_CAST]], %[[OBJ_SIZE]]) nothrow : (!cir.ptr<!void>, !u64i) -> ()
+
+// LLVM: define dso_local void @_Z17test_sized_deleteP11SizedDelete
+// LLVM: %[[X:.*]] = load ptr, ptr %{{.*}}
+// LLVM: call void @_ZN11SizedDeletedlEPvm(ptr %[[X]], i64 4)
+
+// OGCG: define dso_local void @_Z17test_sized_deleteP11SizedDelete
+// OGCG: %[[X:.*]] = load ptr, ptr %{{.*}}
+// OGCG: %[[ISNULL:.*]] = icmp eq ptr %[[X]], null
+// OGCG: br i1 %[[ISNULL]], label %{{.*}}, label %[[DELETE_NOTNULL:.*]]
+// OGCG: [[DELETE_NOTNULL]]:
+// OGCG: call void @_ZN11SizedDeletedlEPvm(ptr noundef %[[X]], i64 noundef 4)
+
+// This function is declared below the call in OGCG.
+// OGCG: declare void @_ZN11SizedDeletedlEPvm(ptr noundef, i64 noundef)
+
+struct Contents {
+ ~Contents() {}
+};
+struct Container {
+ Contents *contents;
+ ~Container();
+};
+Container::~Container() { delete contents; }
+
+// Contents::~Contents()
+// CIR: cir.func comdat linkonce_odr @_ZN8ContentsD2Ev
+// LLVM: define linkonce_odr void @_ZN8ContentsD2Ev
+
+// operator delete(void*, unsigned long)
+// CIR: cir.func private @_ZdlPvm(!cir.ptr<!void>, !u64i)
+// LLVM: declare void @_ZdlPvm(ptr, i64)
+
+// Container::~Container()
+// CIR: cir.func dso_local @_ZN9ContainerD2Ev
+// CIR: %[[THIS:.*]] = cir.load %{{.*}}
+// CIR: %[[CONTENTS_PTR_ADDR:.*]] = cir.get_member %[[THIS]][0] {name = "contents"} : !cir.ptr<!rec_Container> -> !cir.ptr<!cir.ptr<!rec_Contents>>
+// CIR: %[[CONTENTS_PTR:.*]] = cir.load{{.*}} %[[CONTENTS_PTR_ADDR]]
+// CIR: cir.call @_ZN8ContentsD2Ev(%[[CONTENTS_PTR]]) nothrow : (!cir.ptr<!rec_Contents>) -> ()
+// CIR: %[[CONTENTS_CAST:.*]] = cir.cast(bitcast, %[[CONTENTS_PTR]] : !cir.ptr<!rec_Contents>), !cir.ptr<!void>
+// CIR: %[[OBJ_SIZE:.*]] = cir.const #cir.int<1> : !u64i
+// CIR: cir.call @_ZdlPvm(%[[CONTENTS_CAST]], %[[OBJ_SIZE]]) nothrow : (!cir.ptr<!void>, !u64i) -> ()
+
+// LLVM: define dso_local void @_ZN9ContainerD2Ev
+// LLVM: %[[THIS:.*]] = load ptr, ptr %{{.*}}
+// LLVM: %[[CONTENTS_PTR_ADDR:.*]] = getelementptr %struct.Container, ptr %[[THIS]], i32 0, i32 0
+// LLVM: %[[CONTENTS_PTR:.*]] = load ptr, ptr %[[CONTENTS_PTR_ADDR]]
+// LLVM: call void @_ZN8ContentsD2Ev(ptr %[[CONTENTS_PTR]])
+// LLVM: call void @_ZdlPvm(ptr %[[CONTENTS_PTR]], i64 1)
+
+// OGCG: define dso_local void @_ZN9ContainerD2Ev
+// OGCG: %[[THIS:.*]] = load ptr, ptr %{{.*}}
+// OGCG: %[[CONTENTS:.*]] = getelementptr inbounds nuw %struct.Container, ptr %[[THIS]], i32 0, i32 0
+// OGCG: %[[CONTENTS_PTR:.*]] = load ptr, ptr %[[CONTENTS]]
+// OGCG: %[[ISNULL:.*]] = icmp eq ptr %[[CONTENTS_PTR]], null
+// OGCG: br i1 %[[ISNULL]], label %{{.*}}, label %[[DELETE_NOTNULL:.*]]
+// OGCG: [[DELETE_NOTNULL]]:
+// OGCG: call void @_ZN8ContentsD2Ev(ptr noundef nonnull align 1 dereferenceable(1) %[[CONTENTS_PTR]])
+// OGCG: call void @_ZdlPvm(ptr noundef %[[CONTENTS_PTR]], i64 noundef 1)
+
+// These functions are declared/defined below the calls in OGCG.
+// OGCG: define linkonce_odr void @_ZN8ContentsD2Ev
+// OGCG: declare void @_ZdlPvm(ptr noundef, i64 noundef)
diff --git a/clang/test/CIR/CodeGen/lang-c-cpp.cpp b/clang/test/CIR/CodeGen/lang-c-cpp.cpp
index e126932..8931783 100644
--- a/clang/test/CIR/CodeGen/lang-c-cpp.cpp
+++ b/clang/test/CIR/CodeGen/lang-c-cpp.cpp
@@ -3,8 +3,8 @@
// RUN: %clang_cc1 -x c -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.c.cir
// RUN: FileCheck --check-prefix=CIR-C --input-file=%t.c.cir %s
-// CIR-CPP: module attributes {{{.*}}cir.lang = #cir.lang<cxx>{{.*}}}
-// CIR-C: module attributes {{{.*}}cir.lang = #cir.lang<c>{{.*}}}
+// CIR-CPP: module{{.*}} attributes {{{.*}}cir.lang = #cir.lang<cxx>{{.*}}}
+// CIR-C: module{{.*}} attributes {{{.*}}cir.lang = #cir.lang<c>{{.*}}}
int main() {
return 0;
diff --git a/clang/test/CIR/CodeGen/module-filename.cpp b/clang/test/CIR/CodeGen/module-filename.cpp
new file mode 100644
index 0000000..05e2e92
--- /dev/null
+++ b/clang/test/CIR/CodeGen/module-filename.cpp
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// Normally, we try to avoid checking the filename of a test, but that's the
+// entire point of this test, so we use a wildcard for the path but check the
+// filename.
+// CIR: module @"{{.*}}module-filename.cpp"
+
+int main() {
+ return 0;
+}
diff --git a/clang/test/CIR/CodeGen/opt-info-attr.cpp b/clang/test/CIR/CodeGen/opt-info-attr.cpp
index 444286b..97071d7 100644
--- a/clang/test/CIR/CodeGen/opt-info-attr.cpp
+++ b/clang/test/CIR/CodeGen/opt-info-attr.cpp
@@ -13,10 +13,10 @@
void f() {}
-// CHECK-O0: module attributes
+// CHECK-O0: module{{.*}} attributes
// CHECK-O0-NOT: cir.opt_info
-// CHECK-O1: module attributes {{.+}}cir.opt_info = #cir.opt_info<level = 1, size = 0>{{.+}}
-// CHECK-O2: module attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 0>{{.+}}
-// CHECK-O3: module attributes {{.+}}cir.opt_info = #cir.opt_info<level = 3, size = 0>{{.+}}
-// CHECK-Os: module attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 1>{{.+}}
-// CHECK-Oz: module attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 2>{{.+}}
+// CHECK-O1: module{{.*}} attributes {{.+}}cir.opt_info = #cir.opt_info<level = 1, size = 0>{{.+}}
+// CHECK-O2: module{{.*}} attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 0>{{.+}}
+// CHECK-O3: module{{.*}} attributes {{.+}}cir.opt_info = #cir.opt_info<level = 3, size = 0>{{.+}}
+// CHECK-Os: module{{.*}} attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 1>{{.+}}
+// CHECK-Oz: module{{.*}} attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 2>{{.+}}
diff --git a/clang/test/CIR/CodeGen/vbase.cpp b/clang/test/CIR/CodeGen/vbase.cpp
index 9139651..4d57f8e 100644
--- a/clang/test/CIR/CodeGen/vbase.cpp
+++ b/clang/test/CIR/CodeGen/vbase.cpp
@@ -13,19 +13,29 @@ public:
class Derived : public virtual Base {};
-// This is just here to force the record types to be emitted.
void f() {
Derived d;
+ d.f();
+}
+
+class DerivedFinal final : public virtual Base {};
+
+void g() {
+ DerivedFinal df;
+ df.f();
}
// CIR: !rec_Base = !cir.record<class "Base" {!cir.vptr}>
// CIR: !rec_Derived = !cir.record<class "Derived" {!rec_Base}>
+// CIR: !rec_DerivedFinal = !cir.record<class "DerivedFinal" {!rec_Base}>
// LLVM: %class.Derived = type { %class.Base }
// LLVM: %class.Base = type { ptr }
+// LLVM: %class.DerivedFinal = type { %class.Base }
// OGCG: %class.Derived = type { %class.Base }
// OGCG: %class.Base = type { ptr }
+// OGCG: %class.DerivedFinal = type { %class.Base }
// Test the constructor handling for a class with a virtual base.
struct A {
@@ -47,6 +57,76 @@ void ppp() { B b; }
// OGCG: @_ZTV1B = linkonce_odr unnamed_addr constant { [3 x ptr] } { [3 x ptr] [ptr inttoptr (i64 12 to ptr), ptr null, ptr @_ZTI1B] }, comdat, align 8
+// CIR: cir.func {{.*}}@_Z1fv() {
+// CIR: %[[D:.+]] = cir.alloca !rec_Derived, !cir.ptr<!rec_Derived>, ["d", init]
+// CIR: cir.call @_ZN7DerivedC1Ev(%[[D]]) nothrow : (!cir.ptr<!rec_Derived>) -> ()
+// CIR: %[[VPTR_PTR:.+]] = cir.vtable.get_vptr %[[D]] : !cir.ptr<!rec_Derived> -> !cir.ptr<!cir.vptr>
+// CIR: %[[VPTR:.+]] = cir.load {{.*}} %[[VPTR_PTR]] : !cir.ptr<!cir.vptr>, !cir.vptr
+// CIR: %[[VPTR_I8:.+]] = cir.cast(bitcast, %[[VPTR]] : !cir.vptr), !cir.ptr<!u8i>
+// CIR: %[[NEG32:.+]] = cir.const #cir.int<-32> : !s64i
+// CIR: %[[ADJ_VPTR_I8:.+]] = cir.ptr_stride(%[[VPTR_I8]] : !cir.ptr<!u8i>, %[[NEG32]] : !s64i), !cir.ptr<!u8i>
+// CIR: %[[OFFSET_PTR:.+]] = cir.cast(bitcast, %[[ADJ_VPTR_I8]] : !cir.ptr<!u8i>), !cir.ptr<!s64i>
+// CIR: %[[OFFSET:.+]] = cir.load {{.*}} %[[OFFSET_PTR]] : !cir.ptr<!s64i>, !s64i
+// CIR: %[[D_I8:.+]] = cir.cast(bitcast, %[[D]] : !cir.ptr<!rec_Derived>), !cir.ptr<!u8i>
+// CIR: %[[ADJ_THIS_I8:.+]] = cir.ptr_stride(%[[D_I8]] : !cir.ptr<!u8i>, %[[OFFSET]] : !s64i), !cir.ptr<!u8i>
+// CIR: %[[ADJ_THIS_D:.+]] = cir.cast(bitcast, %[[ADJ_THIS_I8]] : !cir.ptr<!u8i>), !cir.ptr<!rec_Derived>
+// CIR: %[[BASE_THIS:.+]] = cir.cast(bitcast, %[[ADJ_THIS_D]] : !cir.ptr<!rec_Derived>), !cir.ptr<!rec_Base>
+// CIR: %[[BASE_VPTR_PTR:.+]] = cir.vtable.get_vptr %[[BASE_THIS]] : !cir.ptr<!rec_Base> -> !cir.ptr<!cir.vptr>
+// CIR: %[[BASE_VPTR:.+]] = cir.load {{.*}} %[[BASE_VPTR_PTR]] : !cir.ptr<!cir.vptr>, !cir.vptr
+// CIR: %[[SLOT_PTR:.+]] = cir.vtable.get_virtual_fn_addr %[[BASE_VPTR]][0] : !cir.vptr -> !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>>
+// CIR: %[[FN:.+]] = cir.load {{.*}} %[[SLOT_PTR]] : !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>>, !cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>
+// CIR: cir.call %[[FN]](%[[BASE_THIS]]) : (!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>, !cir.ptr<!rec_Base>) -> ()
+// CIR: cir.return
+
+// CIR: cir.func {{.*}}@_Z1gv() {
+// CIR: %[[DF:.+]] = cir.alloca !rec_DerivedFinal, !cir.ptr<!rec_DerivedFinal>, ["df", init]
+// CIR: cir.call @_ZN12DerivedFinalC1Ev(%[[DF]]) nothrow : (!cir.ptr<!rec_DerivedFinal>) -> ()
+// CIR: %[[BASE_THIS_2:.+]] = cir.base_class_addr %[[DF]] : !cir.ptr<!rec_DerivedFinal> nonnull [0] -> !cir.ptr<!rec_Base>
+// CIR: %[[BASE_VPTR_PTR_2:.+]] = cir.vtable.get_vptr %[[BASE_THIS_2]] : !cir.ptr<!rec_Base> -> !cir.ptr<!cir.vptr>
+// CIR: %[[BASE_VPTR_2:.+]] = cir.load {{.*}} %[[BASE_VPTR_PTR_2]] : !cir.ptr<!cir.vptr>, !cir.vptr
+// CIR: %[[SLOT_PTR_2:.+]] = cir.vtable.get_virtual_fn_addr %[[BASE_VPTR_2]][0] : !cir.vptr -> !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>>
+// CIR: %[[FN_2:.+]] = cir.load {{.*}} %[[SLOT_PTR_2]] : !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>>, !cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>
+// CIR: cir.call %[[FN_2]](%[[BASE_THIS_2]]) : (!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>, !cir.ptr<!rec_Base>) -> ()
+// CIR: cir.return
+
+// LLVM: define {{.*}}void @_Z1fv()
+// LLVM: %[[D:.+]] = alloca {{.*}}
+// LLVM: call void @_ZN7DerivedC1Ev(ptr %[[D]])
+// LLVM: %[[VPTR_ADDR:.+]] = load ptr, ptr %[[D]]
+// LLVM: %[[NEG32_PTR:.+]] = getelementptr i8, ptr %[[VPTR_ADDR]], i64 -32
+// LLVM: %[[OFF:.+]] = load i64, ptr %[[NEG32_PTR]]
+// LLVM: %[[ADJ_THIS:.+]] = getelementptr i8, ptr %[[D]], i64 %[[OFF]]
+// LLVM: %[[VFN_TAB:.+]] = load ptr, ptr %[[ADJ_THIS]]
+// LLVM: %[[SLOT0:.+]] = getelementptr inbounds ptr, ptr %[[VFN_TAB]], i32 0
+// LLVM: %[[VFN:.+]] = load ptr, ptr %[[SLOT0]]
+// LLVM: call void %[[VFN]](ptr %[[ADJ_THIS]])
+// LLVM: ret void
+
+// LLVM: define {{.*}}void @_Z1gv()
+// LLVM: %[[DF:.+]] = alloca {{.*}}
+// LLVM: call void @_ZN12DerivedFinalC1Ev(ptr %[[DF]])
+// LLVM: %[[VPTR2:.+]] = load ptr, ptr %[[DF]]
+// LLVM: %[[SLOT0_2:.+]] = getelementptr inbounds ptr, ptr %[[VPTR2]], i32 0
+// LLVM: %[[VFN2:.+]] = load ptr, ptr %[[SLOT0_2]]
+// LLVM: call void %[[VFN2]](ptr %[[DF]])
+// LLVM: ret void
+
+// OGCG: define {{.*}}void @_Z1fv()
+// OGCG: %[[D:.+]] = alloca {{.*}}
+// OGCG: call void @_ZN7DerivedC1Ev(ptr {{.*}} %[[D]])
+// OGCG: %[[VTABLE:.+]] = load ptr, ptr %[[D]]
+// OGCG: %[[NEG32_PTR:.+]] = getelementptr i8, ptr %[[VTABLE]], i64 -32
+// OGCG: %[[OFF:.+]] = load i64, ptr %[[NEG32_PTR]]
+// OGCG: %[[ADJ_THIS:.+]] = getelementptr inbounds i8, ptr %[[D]], i64 %[[OFF]]
+// OGCG: call void @_ZN4Base1fEv(ptr {{.*}} %[[ADJ_THIS]])
+// OGCG: ret void
+
+// OGCG: define {{.*}}void @_Z1gv()
+// OGCG: %[[DF:.+]] = alloca {{.*}}
+// OGCG: call void @_ZN12DerivedFinalC1Ev(ptr {{.*}} %[[DF]])
+// OGCG: call void @_ZN4Base1fEv(ptr {{.*}} %[[DF]])
+// OGCG: ret void
+
// Constructor for B
// CIR: cir.func comdat linkonce_odr @_ZN1BC1Ev(%arg0: !cir.ptr<!rec_B>
// CIR: %[[THIS_ADDR:.*]] = cir.alloca !cir.ptr<!rec_B>, !cir.ptr<!cir.ptr<!rec_B>>, ["this", init]
diff --git a/clang/test/CIR/CodeGen/vector-ext.cpp b/clang/test/CIR/CodeGen/vector-ext.cpp
index 8b5379a..8bca48d 100644
--- a/clang/test/CIR/CodeGen/vector-ext.cpp
+++ b/clang/test/CIR/CodeGen/vector-ext.cpp
@@ -1322,3 +1322,23 @@ void logical_not() {
// OGCG: %[[RESULT:.*]] = icmp eq <4 x i32> %[[TMP_A]], zeroinitializer
// OGCG: %[[RESULT_VI4:.*]] = sext <4 x i1> %[[RESULT]] to <4 x i32>
// OGCG: store <4 x i32> %[[RESULT_VI4]], ptr %[[B_ADDR]], align 16
+
+void unary_extension() {
+ vi4 a;
+ vi4 b = __extension__ a;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b", init]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CIR: cir.store{{.*}} %[[TMP_A]], %[[B_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[B_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// LLVM: store <4 x i32> %[[TMP_A]], ptr %[[B_ADDR]], align 16
+
+// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[B_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// OGCG: store <4 x i32> %[[TMP_A]], ptr %[[B_ADDR]], align 16
diff --git a/clang/test/CIR/CodeGen/vector.cpp b/clang/test/CIR/CodeGen/vector.cpp
index d8fdeea..f242779 100644
--- a/clang/test/CIR/CodeGen/vector.cpp
+++ b/clang/test/CIR/CodeGen/vector.cpp
@@ -1390,3 +1390,23 @@ void logical_not_float() {
// OGCG: %[[RESULT:.*]] = fcmp oeq <4 x float> %[[TMP_A]], zeroinitializer
// OGCG: %[[RESULT_VI4:.*]] = sext <4 x i1> %[[RESULT]] to <4 x i32>
// OGCG: store <4 x i32> %[[RESULT_VI4]], ptr %[[B_ADDR]], align 16
+
+void unary_extension() {
+ vi4 a;
+ vi4 b = __extension__ a;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b", init]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CIR: cir.store{{.*}} %[[TMP_A]], %[[B_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[B_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// LLVM: store <4 x i32> %[[TMP_A]], ptr %[[B_ADDR]], align 16
+
+// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[B_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// OGCG: store <4 x i32> %[[TMP_A]], ptr %[[B_ADDR]], align 16
diff --git a/clang/test/CIR/IR/global-init.cir b/clang/test/CIR/IR/global-init.cir
new file mode 100644
index 0000000..727c067
--- /dev/null
+++ b/clang/test/CIR/IR/global-init.cir
@@ -0,0 +1,48 @@
+// RUN: cir-opt --verify-roundtrip %s -o - | FileCheck %s
+
+!u8i = !cir.int<u, 8>
+
+!rec_NeedsCtor = !cir.record<struct "NeedsCtor" padded {!u8i}>
+!rec_NeedsDtor = !cir.record<struct "NeedsDtor" padded {!u8i}>
+!rec_NeedsCtorDtor = !cir.record<struct "NeedsCtorDtor" padded {!u8i}>
+
+module attributes {cir.triple = "x86_64-unknown-linux-gnu"} {
+ cir.func private @_ZN9NeedsCtorC1Ev(!cir.ptr<!rec_NeedsCtor>)
+ cir.global external @needsCtor = ctor : !rec_NeedsCtor {
+ %0 = cir.get_global @needsCtor : !cir.ptr<!rec_NeedsCtor>
+ cir.call @_ZN9NeedsCtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtor>) -> ()
+ }
+ // CHECK: cir.global external @needsCtor = ctor : !rec_NeedsCtor {
+ // CHECK: %0 = cir.get_global @needsCtor : !cir.ptr<!rec_NeedsCtor>
+ // CHECK: cir.call @_ZN9NeedsCtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtor>) -> ()
+ // CHECK: }
+
+ cir.func private @_ZN9NeedsDtorD1Ev(!cir.ptr<!rec_NeedsDtor>)
+ cir.global external dso_local @needsDtor = #cir.zero : !rec_NeedsDtor dtor {
+ %0 = cir.get_global @needsDtor : !cir.ptr<!rec_NeedsDtor>
+ cir.call @_ZN9NeedsDtorD1Ev(%0) : (!cir.ptr<!rec_NeedsDtor>) -> ()
+ }
+ // CHECK: cir.global external dso_local @needsDtor = #cir.zero : !rec_NeedsDtor dtor {
+ // CHECK: %0 = cir.get_global @needsDtor : !cir.ptr<!rec_NeedsDtor>
+ // CHECK: cir.call @_ZN9NeedsDtorD1Ev(%0) : (!cir.ptr<!rec_NeedsDtor>) -> ()
+ // CHECK: }
+
+ cir.func private @_ZN13NeedsCtorDtorC1Ev(!cir.ptr<!rec_NeedsCtorDtor>)
+ cir.func private @_ZN13NeedsCtorDtorD1Ev(!cir.ptr<!rec_NeedsCtorDtor>)
+ cir.global external dso_local @needsCtorDtor = ctor : !rec_NeedsCtorDtor {
+ %0 = cir.get_global @needsCtorDtor : !cir.ptr<!rec_NeedsCtorDtor>
+ cir.call @_ZN13NeedsCtorDtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtorDtor>) -> ()
+ } dtor {
+ %0 = cir.get_global @needsCtorDtor : !cir.ptr<!rec_NeedsCtorDtor>
+ cir.call @_ZN13NeedsCtorDtorD1Ev(%0) : (!cir.ptr<!rec_NeedsCtorDtor>) -> ()
+ }
+ // CHECK: cir.func private @_ZN13NeedsCtorDtorC1Ev(!cir.ptr<!rec_NeedsCtorDtor>)
+ // CHECK: cir.func private @_ZN13NeedsCtorDtorD1Ev(!cir.ptr<!rec_NeedsCtorDtor>)
+ // CHECK: cir.global external dso_local @needsCtorDtor = ctor : !rec_NeedsCtorDtor {
+ // CHECK: %0 = cir.get_global @needsCtorDtor : !cir.ptr<!rec_NeedsCtorDtor>
+ // CHECK: cir.call @_ZN13NeedsCtorDtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtorDtor>) -> ()
+ // CHECK: } dtor {
+ // CHECK: %0 = cir.get_global @needsCtorDtor : !cir.ptr<!rec_NeedsCtorDtor>
+ // CHECK: cir.call @_ZN13NeedsCtorDtorD1Ev(%0) : (!cir.ptr<!rec_NeedsCtorDtor>) -> ()
+ // CHECK: }
+}
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 347cd9e..3018bb97 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -985,18 +985,21 @@ double test_mm256_cvtsd_f64(__m256d __a) {
// CHECK: extractelement <4 x double> %{{.*}}, i32 0
return _mm256_cvtsd_f64(__a);
}
+TEST_CONSTEXPR(_mm256_cvtsd_f64((__m256d){8.0, 7.0, 6.0, 5.0}) == 8.0);
int test_mm256_cvtsi256_si32(__m256i __a) {
// CHECK-LABEL: test_mm256_cvtsi256_si32
// CHECK: extractelement <8 x i32> %{{.*}}, i32 0
return _mm256_cvtsi256_si32(__a);
}
+TEST_CONSTEXPR(_mm256_cvtsi256_si32((__m256i)(__v8si){8, 7, 6, 5, 4, 3, 2, 1}) == 8);
float test_mm256_cvtss_f32(__m256 __a) {
// CHECK-LABEL: test_mm256_cvtss_f32
// CHECK: extractelement <8 x float> %{{.*}}, i32 0
return _mm256_cvtss_f32(__a);
}
+TEST_CONSTEXPR(_mm256_cvtss_f32((__m256){8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f}) == 8.0f);
__m128i test_mm256_cvttpd_epi32(__m256d A) {
// CHECK-LABEL: test_mm256_cvttpd_epi32
diff --git a/clang/test/CodeGen/X86/bmi-builtins.c b/clang/test/CodeGen/X86/bmi-builtins.c
index ded40ca..d0ae0c7 100644
--- a/clang/test/CodeGen/X86/bmi-builtins.c
+++ b/clang/test/CodeGen/X86/bmi-builtins.c
@@ -1,7 +1,16 @@
-// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,TZCNT
-// RUN: %clang_cc1 -x c -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT | FileCheck %s --check-prefix=TZCNT
-// RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,TZCNT
-// RUN: %clang_cc1 -x c++ -std=c++11 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT | FileCheck %s --check-prefix=TZCNT
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64,TZCNT,TZCNT64
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,TZCNT
+// RUN: %clang_cc1 -x c -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT | FileCheck %s --check-prefixes=TZCNT,TZCNT64
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64,TZCNT,TZCNT64
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,TZCNT
+// RUN: %clang_cc1 -x c++ -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT | FileCheck %s --check-prefixes=TZCNT,TZCNT64
+
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64,TZCNT,TZCNT64
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,TZCNT
+// RUN: %clang_cc1 -x c -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=TZCNT,TZCNT64
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64,TZCNT,TZCNT64
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,TZCNT
+// RUN: %clang_cc1 -x c++ -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=TZCNT,TZCNT64
#include <immintrin.h>
@@ -48,20 +57,20 @@ unsigned int test_tzcnt_u32(unsigned int __X) {
#ifdef __x86_64__
unsigned long long test__tzcnt_u64(unsigned long long __X) {
-// TZCNT-LABEL: test__tzcnt_u64
-// TZCNT: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false)
+// TZCNT64-LABEL: test__tzcnt_u64
+// TZCNT64: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false)
return __tzcnt_u64(__X);
}
long long test_mm_tzcnt_64(unsigned long long __X) {
-// TZCNT-LABEL: test_mm_tzcnt_64
-// TZCNT: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false)
+// TZCNT64-LABEL: test_mm_tzcnt_64
+// TZCNT64: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false)
return _mm_tzcnt_64(__X);
}
unsigned long long test_tzcnt_u64(unsigned long long __X) {
-// TZCNT-LABEL: test_tzcnt_u64
-// TZCNT: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false)
+// TZCNT64-LABEL: test_tzcnt_u64
+// TZCNT64: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false)
return _tzcnt_u64(__X);
}
#endif
@@ -103,36 +112,36 @@ unsigned int test__blsr_u32(unsigned int __X) {
#ifdef __x86_64__
unsigned long long test__andn_u64(unsigned long __X, unsigned long __Y) {
-// CHECK-LABEL: test__andn_u64
-// CHECK: xor i64 %{{.*}}, -1
-// CHECK: and i64 %{{.*}}, %{{.*}}
+// X64-LABEL: test__andn_u64
+// X64: xor i64 %{{.*}}, -1
+// X64: and i64 %{{.*}}, %{{.*}}
return __andn_u64(__X, __Y);
}
unsigned long long test__bextr_u64(unsigned long __X, unsigned long __Y) {
-// CHECK-LABEL: test__bextr_u64
-// CHECK: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}})
+// X64-LABEL: test__bextr_u64
+// X64: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}})
return __bextr_u64(__X, __Y);
}
unsigned long long test__blsi_u64(unsigned long long __X) {
-// CHECK-LABEL: test__blsi_u64
-// CHECK: sub i64 0, %{{.*}}
-// CHECK: and i64 %{{.*}}, %{{.*}}
+// X64-LABEL: test__blsi_u64
+// X64: sub i64 0, %{{.*}}
+// X64: and i64 %{{.*}}, %{{.*}}
return __blsi_u64(__X);
}
unsigned long long test__blsmsk_u64(unsigned long long __X) {
-// CHECK-LABEL: test__blsmsk_u64
-// CHECK: sub i64 %{{.*}}, 1
-// CHECK: xor i64 %{{.*}}, %{{.*}}
+// X64-LABEL: test__blsmsk_u64
+// X64: sub i64 %{{.*}}, 1
+// X64: xor i64 %{{.*}}, %{{.*}}
return __blsmsk_u64(__X);
}
unsigned long long test__blsr_u64(unsigned long long __X) {
-// CHECK-LABEL: test__blsr_u64
-// CHECK: sub i64 %{{.*}}, 1
-// CHECK: and i64 %{{.*}}, %{{.*}}
+// X64-LABEL: test__blsr_u64
+// X64: sub i64 %{{.*}}, 1
+// X64: and i64 %{{.*}}, %{{.*}}
return __blsr_u64(__X);
}
#endif
@@ -186,49 +195,49 @@ unsigned int test_blsr_u32(unsigned int __X) {
#ifdef __x86_64__
unsigned long long test_andn_u64(unsigned long __X, unsigned long __Y) {
-// CHECK-LABEL: test_andn_u64
-// CHECK: xor i64 %{{.*}}, -1
-// CHECK: and i64 %{{.*}}, %{{.*}}
+// X64-LABEL: test_andn_u64
+// X64: xor i64 %{{.*}}, -1
+// X64: and i64 %{{.*}}, %{{.*}}
return _andn_u64(__X, __Y);
}
unsigned long long test_bextr_u64(unsigned long __X, unsigned int __Y,
unsigned int __Z) {
-// CHECK-LABEL: test_bextr_u64
-// CHECK: and i32 %{{.*}}, 255
-// CHECK: and i32 %{{.*}}, 255
-// CHECK: shl i32 %{{.*}}, 8
-// CHECK: or i32 %{{.*}}, %{{.*}}
-// CHECK: zext i32 %{{.*}} to i64
-// CHECK: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}})
+// X64-LABEL: test_bextr_u64
+// X64: and i32 %{{.*}}, 255
+// X64: and i32 %{{.*}}, 255
+// X64: shl i32 %{{.*}}, 8
+// X64: or i32 %{{.*}}, %{{.*}}
+// X64: zext i32 %{{.*}} to i64
+// X64: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}})
return _bextr_u64(__X, __Y, __Z);
}
unsigned long long test_bextr2_u64(unsigned long long __X,
unsigned long long __Y) {
-// CHECK-LABEL: test_bextr2_u64
-// CHECK: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}})
+// X64-LABEL: test_bextr2_u64
+// X64: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}})
return _bextr2_u64(__X, __Y);
}
unsigned long long test_blsi_u64(unsigned long long __X) {
-// CHECK-LABEL: test_blsi_u64
-// CHECK: sub i64 0, %{{.*}}
-// CHECK: and i64 %{{.*}}, %{{.*}}
+// X64-LABEL: test_blsi_u64
+// X64: sub i64 0, %{{.*}}
+// X64: and i64 %{{.*}}, %{{.*}}
return _blsi_u64(__X);
}
unsigned long long test_blsmsk_u64(unsigned long long __X) {
-// CHECK-LABEL: test_blsmsk_u64
-// CHECK: sub i64 %{{.*}}, 1
-// CHECK: xor i64 %{{.*}}, %{{.*}}
+// X64-LABEL: test_blsmsk_u64
+// X64: sub i64 %{{.*}}, 1
+// X64: xor i64 %{{.*}}, %{{.*}}
return _blsmsk_u64(__X);
}
unsigned long long test_blsr_u64(unsigned long long __X) {
-// CHECK-LABEL: test_blsr_u64
-// CHECK: sub i64 %{{.*}}, 1
-// CHECK: and i64 %{{.*}}, %{{.*}}
+// X64-LABEL: test_blsr_u64
+// X64: sub i64 %{{.*}}, 1
+// X64: and i64 %{{.*}}, %{{.*}}
return _blsr_u64(__X);
}
#endif
diff --git a/clang/test/CodeGen/X86/bmi2-builtins.c b/clang/test/CodeGen/X86/bmi2-builtins.c
index 48424f5..1b2cb90 100644
--- a/clang/test/CodeGen/X86/bmi2-builtins.c
+++ b/clang/test/CodeGen/X86/bmi2-builtins.c
@@ -3,6 +3,11 @@
// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi2 -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi2 -emit-llvm -o - | FileCheck %s --check-prefix=B32
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi2 -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi2 -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefix=B32
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi2 -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi2 -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefix=B32
+
#include <immintrin.h>
diff --git a/clang/test/CodeGen/X86/tbm-builtins.c b/clang/test/CodeGen/X86/tbm-builtins.c
index d916627..89746bf 100644
--- a/clang/test/CodeGen/X86/tbm-builtins.c
+++ b/clang/test/CodeGen/X86/tbm-builtins.c
@@ -1,5 +1,12 @@
-// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X64
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X64
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK
+
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-unknown-unknown -target-feature +tbm -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64
+// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-unknown-unknown -target-feature +tbm -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK
#include <x86intrin.h>
@@ -13,14 +20,14 @@ unsigned int test__bextri_u32(unsigned int a) {
#ifdef __x86_64__
unsigned long long test__bextri_u64(unsigned long long a) {
- // CHECK-LABEL: test__bextri_u64
- // CHECK: call i64 @llvm.x86.tbm.bextri.u64(i64 %{{.*}}, i64 2)
+ // X64-LABEL: test__bextri_u64
+ // X64: call i64 @llvm.x86.tbm.bextri.u64(i64 %{{.*}}, i64 2)
return __bextri_u64(a, 2);
}
unsigned long long test__bextri_u64_bigint(unsigned long long a) {
- // CHECK-LABEL: test__bextri_u64_bigint
- // CHECK: call i64 @llvm.x86.tbm.bextri.u64(i64 %{{.*}}, i64 549755813887)
+ // X64-LABEL: test__bextri_u64_bigint
+ // X64: call i64 @llvm.x86.tbm.bextri.u64(i64 %{{.*}}, i64 549755813887)
return __bextri_u64(a, 0x7fffffffffLL);
}
#endif
@@ -34,9 +41,9 @@ unsigned int test__blcfill_u32(unsigned int a) {
#ifdef __x86_64__
unsigned long long test__blcfill_u64(unsigned long long a) {
- // CHECK-LABEL: test__blcfill_u64
- // CHECK: [[TMP:%.*]] = add i64 %{{.*}}, 1
- // CHECK: %{{.*}} = and i64 %{{.*}}, [[TMP]]
+ // X64-LABEL: test__blcfill_u64
+ // X64: [[TMP:%.*]] = add i64 %{{.*}}, 1
+ // X64: %{{.*}} = and i64 %{{.*}}, [[TMP]]
return __blcfill_u64(a);
}
#endif
@@ -51,10 +58,10 @@ unsigned int test__blci_u32(unsigned int a) {
#ifdef __x86_64__
unsigned long long test__blci_u64(unsigned long long a) {
- // CHECK-LABEL: test__blci_u64
- // CHECK: [[TMP1:%.*]] = add i64 %{{.*}}, 1
- // CHECK: [[TMP2:%.*]] = xor i64 [[TMP1]], -1
- // CHECK: %{{.*}} = or i64 %{{.*}}, [[TMP2]]
+ // X64-LABEL: test__blci_u64
+ // X64: [[TMP1:%.*]] = add i64 %{{.*}}, 1
+ // X64: [[TMP2:%.*]] = xor i64 [[TMP1]], -1
+ // X64: %{{.*}} = or i64 %{{.*}}, [[TMP2]]
return __blci_u64(a);
}
#endif
@@ -69,10 +76,10 @@ unsigned int test__blcic_u32(unsigned int a) {
#ifdef __x86_64__
unsigned long long test__blcic_u64(unsigned long long a) {
- // CHECK-LABEL: test__blcic_u64
- // CHECK: [[TMP1:%.*]] = xor i64 %{{.*}}, -1
- // CHECK: [[TMP2:%.*]] = add i64 %{{.*}}, 1
- // CHECK-NEXT: {{.*}} = and i64 [[TMP1]], [[TMP2]]
+ // X64-LABEL: test__blcic_u64
+ // X64: [[TMP1:%.*]] = xor i64 %{{.*}}, -1
+ // X64: [[TMP2:%.*]] = add i64 %{{.*}}, 1
+ // X64-NEXT: {{.*}} = and i64 [[TMP1]], [[TMP2]]
return __blcic_u64(a);
}
#endif
@@ -86,9 +93,9 @@ unsigned int test__blcmsk_u32(unsigned int a) {
#ifdef __x86_64__
unsigned long long test__blcmsk_u64(unsigned long long a) {
- // CHECK-LABEL: test__blcmsk_u64
- // CHECK: [[TMP:%.*]] = add i64 %{{.*}}, 1
- // CHECK-NEXT: {{.*}} = xor i64 %{{.*}}, [[TMP]]
+ // X64-LABEL: test__blcmsk_u64
+ // X64: [[TMP:%.*]] = add i64 %{{.*}}, 1
+ // X64-NEXT: {{.*}} = xor i64 %{{.*}}, [[TMP]]
return __blcmsk_u64(a);
}
#endif
@@ -102,9 +109,9 @@ unsigned int test__blcs_u32(unsigned int a) {
#ifdef __x86_64__
unsigned long long test__blcs_u64(unsigned long long a) {
- // CHECK-LABEL: test__blcs_u64
- // CHECK: [[TMP:%.*]] = add i64 %{{.*}}, 1
- // CHECK-NEXT: {{.*}} = or i64 %{{.*}}, [[TMP]]
+ // X64-LABEL: test__blcs_u64
+ // X64: [[TMP:%.*]] = add i64 %{{.*}}, 1
+ // X64-NEXT: {{.*}} = or i64 %{{.*}}, [[TMP]]
return __blcs_u64(a);
}
#endif
@@ -118,9 +125,9 @@ unsigned int test__blsfill_u32(unsigned int a) {
#ifdef __x86_64__
unsigned long long test__blsfill_u64(unsigned long long a) {
- // CHECK-LABEL: test__blsfill_u64
- // CHECK: [[TMP:%.*]] = sub i64 %{{.*}}, 1
- // CHECK-NEXT: {{.*}} = or i64 %{{.*}}, [[TMP]]
+ // X64-LABEL: test__blsfill_u64
+ // X64: [[TMP:%.*]] = sub i64 %{{.*}}, 1
+ // X64-NEXT: {{.*}} = or i64 %{{.*}}, [[TMP]]
return __blsfill_u64(a);
}
#endif
@@ -135,10 +142,10 @@ unsigned int test__blsic_u32(unsigned int a) {
#ifdef __x86_64__
unsigned long long test__blsic_u64(unsigned long long a) {
- // CHECK-LABEL: test__blsic_u64
- // CHECK: [[TMP1:%.*]] = xor i64 %{{.*}}, -1
- // CHECK: [[TMP2:%.*]] = sub i64 %{{.*}}, 1
- // CHECK-NEXT: {{.*}} = or i64 [[TMP1]], [[TMP2]]
+ // X64-LABEL: test__blsic_u64
+ // X64: [[TMP1:%.*]] = xor i64 %{{.*}}, -1
+ // X64: [[TMP2:%.*]] = sub i64 %{{.*}}, 1
+ // X64-NEXT: {{.*}} = or i64 [[TMP1]], [[TMP2]]
return __blsic_u64(a);
}
#endif
@@ -153,10 +160,10 @@ unsigned int test__t1mskc_u32(unsigned int a) {
#ifdef __x86_64__
unsigned long long test__t1mskc_u64(unsigned long long a) {
- // CHECK-LABEL: test__t1mskc_u64
- // CHECK: [[TMP1:%.*]] = xor i64 %{{.*}}, -1
- // CHECK: [[TMP2:%.*]] = add i64 %{{.*}}, 1
- // CHECK-NEXT: {{.*}} = or i64 [[TMP1]], [[TMP2]]
+ // X64-LABEL: test__t1mskc_u64
+ // X64: [[TMP1:%.*]] = xor i64 %{{.*}}, -1
+ // X64: [[TMP2:%.*]] = add i64 %{{.*}}, 1
+ // X64-NEXT: {{.*}} = or i64 [[TMP1]], [[TMP2]]
return __t1mskc_u64(a);
}
#endif
@@ -171,10 +178,10 @@ unsigned int test__tzmsk_u32(unsigned int a) {
#ifdef __x86_64__
unsigned long long test__tzmsk_u64(unsigned long long a) {
- // CHECK-LABEL: test__tzmsk_u64
- // CHECK: [[TMP1:%.*]] = xor i64 %{{.*}}, -1
- // CHECK: [[TMP2:%.*]] = sub i64 %{{.*}}, 1
- // CHECK-NEXT: {{.*}} = and i64 [[TMP1]], [[TMP2]]
+ // X64-LABEL: test__tzmsk_u64
+ // X64: [[TMP1:%.*]] = xor i64 %{{.*}}, -1
+ // X64: [[TMP2:%.*]] = sub i64 %{{.*}}, 1
+ // X64-NEXT: {{.*}} = and i64 [[TMP1]], [[TMP2]]
return __tzmsk_u64(a);
}
#endif
diff --git a/clang/test/CodeGen/amdgpu-image-rsrc-type-debug-info.c b/clang/test/CodeGen/amdgpu-image-rsrc-type-debug-info.c
new file mode 100644
index 0000000..ef68c79
--- /dev/null
+++ b/clang/test/CodeGen/amdgpu-image-rsrc-type-debug-info.c
@@ -0,0 +1,17 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn -emit-llvm -o - %s -debug-info-kind=limited | FileCheck %s
+
+// CHECK-LABEL: define dso_local void @test_locals(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] !dbg [[DBG6:![0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[IMG:%.*]] = alloca ptr, align 32, addrspace(5)
+// CHECK-NEXT: [[IMG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IMG]] to ptr
+// CHECK-NEXT: #dbg_declare(ptr addrspace(5) [[IMG]], [[META11:![0-9]+]], !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), [[META14:![0-9]+]])
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[IMG_ASCAST]], align 32, !dbg [[DBG15:![0-9]+]]
+// CHECK-NEXT: ret void, !dbg [[DBG16:![0-9]+]]
+//
+void test_locals(void) {
+ __amdgpu_texture_t img;
+ (void)img;
+}
diff --git a/clang/test/CodeGenCXX/amdgpu-image-rsrc-typeinfo.cpp b/clang/test/CodeGenCXX/amdgpu-image-rsrc-typeinfo.cpp
new file mode 100644
index 0000000..0dbd517
--- /dev/null
+++ b/clang/test/CodeGenCXX/amdgpu-image-rsrc-typeinfo.cpp
@@ -0,0 +1,7 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn %s -emit-llvm -o - | FileCheck %s
+namespace std { class type_info; }
+auto &a = typeid(__amdgpu_texture_t);
+//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+// CHECK: {{.*}}
diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp
index 5920ced..137a49b 100644
--- a/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp
+++ b/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp
@@ -1,7 +1,10 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: amdgpu-registered-target
+// REQUIRES: spirv-registered-target
// RUN: %clang_cc1 %s -x hip -fcuda-is-device -emit-llvm -O0 -o - \
-// RUN: -triple=amdgcn-amd-amdhsa | FileCheck %s
+// RUN: -triple=amdgcn-amd-amdhsa | FileCheck --check-prefix=GCN %s
+// RUN: %clang_cc1 %s -x hip -fcuda-is-device -emit-llvm -O0 -o - \
+// RUN: -triple=spirv64-amd-amdhsa | FileCheck --check-prefix=AMDGCNSPIRV %s
// CHECK-LABEL: @_Z29test_non_volatile_parameter32Pj(
// CHECK-NEXT: entry:
@@ -21,6 +24,43 @@
// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z29test_non_volatile_parameter32Pj(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// GCN-NEXT: [[RES:%.*]] = alloca i32, align 4, addrspace(5)
+// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr
+// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4:![0-9]+]]
+// GCN-NEXT: store i32 [[TMP3]], ptr [[RES_ASCAST]], align 4
+// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z29test_non_volatile_parameter32Pj(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8
+// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i32, align 4
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5:![0-9]+]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 4
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[TMP5]], align 4
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 4
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_non_volatile_parameter32(__UINT32_TYPE__ *ptr) {
__UINT32_TYPE__ res;
@@ -47,6 +87,43 @@ __attribute__((device)) void test_non_volatile_parameter32(__UINT32_TYPE__ *ptr)
// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z29test_non_volatile_parameter64Py(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// GCN-NEXT: [[RES:%.*]] = alloca i64, align 8, addrspace(5)
+// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr
+// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP3]], ptr [[RES_ASCAST]], align 8
+// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z29test_non_volatile_parameter64Py(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8
+// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i64, align 8
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) [[TMP1]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i64, ptr addrspace(4) [[TMP5]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_non_volatile_parameter64(__UINT64_TYPE__ *ptr) {
__UINT64_TYPE__ res;
@@ -73,6 +150,43 @@ __attribute__((device)) void test_non_volatile_parameter64(__UINT64_TYPE__ *ptr)
// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z25test_volatile_parameter32PVj(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// GCN-NEXT: [[RES:%.*]] = alloca i32, align 4, addrspace(5)
+// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr
+// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP2:%.*]] = load volatile i32, ptr [[TMP1]], align 4
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP3]], ptr [[RES_ASCAST]], align 4
+// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP6:%.*]] = load volatile i32, ptr [[TMP5]], align 4
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z25test_volatile_parameter32PVj(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8
+// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i32, align 4
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load volatile i32, ptr addrspace(4) [[TMP1]], align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr addrspace(4) [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 4
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load volatile i32, ptr addrspace(4) [[TMP5]], align 4
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr addrspace(4) [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 4
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_volatile_parameter32(volatile __UINT32_TYPE__ *ptr) {
__UINT32_TYPE__ res;
@@ -99,6 +213,43 @@ __attribute__((device)) void test_volatile_parameter32(volatile __UINT32_TYPE__
// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z25test_volatile_parameter64PVy(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// GCN-NEXT: [[RES:%.*]] = alloca i64, align 8, addrspace(5)
+// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr
+// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP2:%.*]] = load volatile i64, ptr [[TMP1]], align 8
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP3]], ptr [[RES_ASCAST]], align 8
+// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP6:%.*]] = load volatile i64, ptr [[TMP5]], align 8
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z25test_volatile_parameter64PVy(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8
+// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i64, align 8
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load volatile i64, ptr addrspace(4) [[TMP1]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr addrspace(4) [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load volatile i64, ptr addrspace(4) [[TMP5]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr addrspace(4) [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_volatile_parameter64(volatile __UINT64_TYPE__ *ptr) {
__UINT64_TYPE__ res;
@@ -116,6 +267,25 @@ __attribute__((device)) void test_volatile_parameter64(volatile __UINT64_TYPE__
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z13test_shared32v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z13test_shared32v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_shared32() {
__attribute__((shared)) __UINT32_TYPE__ val;
@@ -134,6 +304,25 @@ __attribute__((device)) void test_shared32() {
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z13test_shared64v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z13test_shared64v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_shared64() {
__attribute__((shared)) __UINT64_TYPE__ val;
@@ -153,6 +342,25 @@ __attribute__((device)) __UINT32_TYPE__ global_val32;
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z13test_global32v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4
+// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z13test_global32v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_global32() {
global_val32 = __builtin_amdgcn_atomic_inc32(&global_val32, global_val32, __ATOMIC_SEQ_CST, "workgroup");
@@ -170,6 +378,25 @@ __attribute__((device)) __UINT64_TYPE__ global_val64;
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z13test_global64v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8
+// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z13test_global64v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_global64() {
global_val64 = __builtin_amdgcn_atomic_inc64(&global_val64, global_val64, __ATOMIC_SEQ_CST, "workgroup");
@@ -189,6 +416,29 @@ __attribute__((constant)) __UINT32_TYPE__ cval32;
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i32 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 4
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z15test_constant32v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[LOCAL_VAL:%.*]] = alloca i32, align 4, addrspace(5)
+// GCN-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LOCAL_VAL]] to ptr
+// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), align 4
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP1]], ptr [[LOCAL_VAL_ASCAST]], align 4
+// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), align 4
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 4
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z15test_constant32v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[LOCAL_VAL:%.*]] = alloca i32, align 4
+// AMDGCNSPIRV-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr [[LOCAL_VAL]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 4
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 4
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_constant32() {
__UINT32_TYPE__ local_val;
@@ -210,6 +460,29 @@ __attribute__((constant)) __UINT64_TYPE__ cval64;
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i64 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 8
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z15test_constant64v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[LOCAL_VAL:%.*]] = alloca i64, align 8, addrspace(5)
+// GCN-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LOCAL_VAL]] to ptr
+// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), align 8
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP1]], ptr [[LOCAL_VAL_ASCAST]], align 8
+// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), align 8
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 8
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z15test_constant64v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[LOCAL_VAL:%.*]] = alloca i64, align 8
+// AMDGCNSPIRV-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr [[LOCAL_VAL]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_constant64() {
__UINT64_TYPE__ local_val;
@@ -240,6 +513,49 @@ __attribute__((device)) void test_constant64() {
// CHECK-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP10]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i32 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z12test_order32v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP0]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP4:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP4]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP6:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP6]] syncscope("workgroup") release, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP8:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP8]] syncscope("workgroup") acq_rel, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP9]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP10:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP10]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z12test_order32v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP4]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP6]] syncscope("workgroup") release, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP8]] syncscope("workgroup") acq_rel, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP9]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP10]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP11]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_order32() {
__attribute__((shared)) __UINT32_TYPE__ val;
@@ -278,6 +594,49 @@ __attribute__((device)) void test_order32() {
// CHECK-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP10]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i64 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z12test_order64v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP0]] syncscope("workgroup") monotonic, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP4:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP4]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP6:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP6]] syncscope("workgroup") release, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP8:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP8]] syncscope("workgroup") acq_rel, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP9]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP10:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP10]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z12test_order64v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") monotonic, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP4]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP6]] syncscope("workgroup") release, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP8]] syncscope("workgroup") acq_rel, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP9]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP10]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP11]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_order64() {
__attribute__((shared)) __UINT64_TYPE__ val;
@@ -310,6 +669,37 @@ __attribute__((device)) void test_order64() {
// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP6]] syncscope("wavefront") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i32 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z12test_scope32v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP0]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP4:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP4]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP6:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP6]] syncscope("wavefront") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z12test_scope32v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP0]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP4]] syncscope("device") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP6]] syncscope("subgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_scope32() {
__attribute__((shared)) __UINT32_TYPE__ val;
@@ -338,6 +728,37 @@ __attribute__((device)) void test_scope32() {
// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP6]] syncscope("wavefront") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i64 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z12test_scope64v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP0]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP4:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP4]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP6:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP6]] syncscope("wavefront") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z12test_scope64v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP0]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP4]] syncscope("device") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP6]] syncscope("subgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_scope64() {
__attribute__((shared)) __UINT64_TYPE__ val;
diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
index 1e977dd..dd1ca45 100644
--- a/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
+++ b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
@@ -1,7 +1,10 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
// REQUIRES: amdgpu-registered-target
+// REQUIRES: spirv-registered-target
// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \
-// RUN: -triple=amdgcn-amd-amdhsa | FileCheck %s
+// RUN: -triple=amdgcn-amd-amdhsa | FileCheck --check-prefix=GCN %s
+// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \
+// RUN: -triple=spirv64-amd-amdhsa | FileCheck --check-prefix=AMDGCNSPIRV %s
// CHECK-LABEL: define dso_local void @_Z25test_memory_fence_successv(
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
@@ -12,6 +15,25 @@
// CHECK-NEXT: fence syncscope("agent") acq_rel
// CHECK-NEXT: fence syncscope("workgroup") release
// CHECK-NEXT: ret void
+// GCN-LABEL: define dso_local void @_Z25test_memory_fence_successv(
+// GCN-SAME: ) #[[ATTR0:[0-9]+]] {
+// GCN-NEXT: entry:
+// GCN-NEXT: fence syncscope("workgroup") seq_cst
+// GCN-NEXT: fence syncscope("agent") acquire
+// GCN-NEXT: fence seq_cst
+// GCN-NEXT: fence syncscope("agent") acq_rel
+// GCN-NEXT: fence syncscope("workgroup") release
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: define spir_func void @_Z25test_memory_fence_successv(
+// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0:[0-9]+]] {
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire
+// AMDGCNSPIRV-NEXT: fence seq_cst
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release
+// AMDGCNSPIRV-NEXT: ret void
//
void test_memory_fence_success() {
@@ -35,6 +57,25 @@ void test_memory_fence_success() {
// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]]
// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
// CHECK-NEXT: ret void
+// GCN-LABEL: define dso_local void @_Z10test_localv(
+// GCN-SAME: ) #[[ATTR0]] {
+// GCN-NEXT: entry:
+// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3:![0-9]+]]
+// GCN-NEXT: fence syncscope("agent") acquire, !mmra [[META3]]
+// GCN-NEXT: fence seq_cst, !mmra [[META3]]
+// GCN-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]]
+// GCN-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: define spir_func void @_Z10test_localv(
+// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] {
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3:![0-9]+]]
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: fence seq_cst, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: ret void
//
void test_local() {
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local");
@@ -58,6 +99,25 @@ void test_local() {
// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META4]]
// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META4]]
// CHECK-NEXT: ret void
+// GCN-LABEL: define dso_local void @_Z11test_globalv(
+// GCN-SAME: ) #[[ATTR0]] {
+// GCN-NEXT: entry:
+// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META4:![0-9]+]]
+// GCN-NEXT: fence syncscope("agent") acquire, !mmra [[META4]]
+// GCN-NEXT: fence seq_cst, !mmra [[META4]]
+// GCN-NEXT: fence syncscope("agent") acq_rel, !mmra [[META4]]
+// GCN-NEXT: fence syncscope("workgroup") release, !mmra [[META4]]
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: define spir_func void @_Z11test_globalv(
+// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] {
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META4:![0-9]+]]
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire, !mmra [[META4]]
+// AMDGCNSPIRV-NEXT: fence seq_cst, !mmra [[META4]]
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel, !mmra [[META4]]
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release, !mmra [[META4]]
+// AMDGCNSPIRV-NEXT: ret void
//
void test_global() {
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "global");
@@ -80,6 +140,25 @@ void test_global() {
// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]]
// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
// CHECK-NEXT: ret void
+// GCN-LABEL: define dso_local void @_Z10test_imagev(
+// GCN-SAME: ) #[[ATTR0]] {
+// GCN-NEXT: entry:
+// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3]]
+// GCN-NEXT: fence syncscope("agent") acquire, !mmra [[META3]]
+// GCN-NEXT: fence seq_cst, !mmra [[META3]]
+// GCN-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]]
+// GCN-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: define spir_func void @_Z10test_imagev(
+// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] {
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: fence seq_cst, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: ret void
//
void test_image() {
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local");
@@ -99,13 +178,33 @@ void test_image() {
// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]]
// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]]
// CHECK-NEXT: ret void
+// GCN-LABEL: define dso_local void @_Z10test_mixedv(
+// GCN-SAME: ) #[[ATTR0]] {
+// GCN-NEXT: entry:
+// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]]
+// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]]
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: define spir_func void @_Z10test_mixedv(
+// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] {
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]]
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]]
+// AMDGCNSPIRV-NEXT: ret void
//
void test_mixed() {
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local", "global");
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local", "local", "global", "local", "local");
}
-//.
// CHECK: [[META3]] = !{!"amdgpu-synchronize-as", !"local"}
// CHECK: [[META4]] = !{!"amdgpu-synchronize-as", !"global"}
// CHECK: [[META5]] = !{[[META4]], [[META3]]}
//.
+// GCN: [[META3]] = !{!"amdgpu-synchronize-as", !"local"}
+// GCN: [[META4]] = !{!"amdgpu-synchronize-as", !"global"}
+// GCN: [[META5]] = !{[[META4]], [[META3]]}
+//.
+// AMDGCNSPIRV: [[META3]] = !{!"amdgpu-synchronize-as", !"local"}
+// AMDGCNSPIRV: [[META4]] = !{!"amdgpu-synchronize-as", !"global"}
+// AMDGCNSPIRV: [[META5]] = !{[[META4]], [[META3]]}
+//.
diff --git a/clang/test/CodeGenCXX/gh56652.cpp b/clang/test/CodeGenCXX/gh56652.cpp
new file mode 100644
index 0000000..06a496e
--- /dev/null
+++ b/clang/test/CodeGenCXX/gh56652.cpp
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-elf-gnu %s -emit-llvm -o - | FileCheck %s
+
+namespace GH56652{
+
+struct foo {};
+
+template <typename T> struct bar {
+ using type = T;
+
+ template <foo> inline static constexpr auto b = true;
+};
+
+template <typename T>
+concept C = requires(T a) { T::template b<foo{}>; };
+
+template <typename T> auto fn(T) {
+ if constexpr (!C<T>)
+ return foo{};
+ else
+ return T{};
+}
+
+auto a = decltype(fn(bar<int>{})){};
+
+}
+
+namespace GH116319 {
+
+template <int = 0> struct a {
+template <class> static constexpr auto b = 2;
+template <class> static void c() noexcept(noexcept(b<int>)) {}
+};
+
+void test() { a<>::c<int>(); }
+
+
+}
+
+// CHECK: %"struct.GH56652::bar" = type { i8 }
+// CHECK: $_ZN8GH1163191aILi0EE1cIiEEvv = comdat any
+// CHECK: @_ZN7GH566521aE = global %"struct.GH56652::bar" undef
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
index 19ab656..7cd3f14 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
@@ -1,13 +1,13 @@
// REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1101 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1102 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1103 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1150 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1151 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1152 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1153 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1101 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1102 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1103 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1150 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1151 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1152 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1153 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,AMDGCNSPIRV %s
typedef unsigned int uint;
typedef unsigned long ulong;
@@ -50,7 +50,8 @@ void test_s_wait_event_export_ready() {
}
// CHECK-LABEL: @test_global_add_f32
-// CHECK: = atomicrmw fadd ptr addrspace(1) %addr, float %x syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
+// GCN: = atomicrmw fadd ptr addrspace(1) %addr, float %x syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
+// AMDGCNSPIRV: = atomicrmw fadd ptr addrspace(1) %addr, float %x syncscope("device") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
#if !defined(__SPIRV__)
void test_global_add_f32(float *rtn, global float *addr, float x) {
#else
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
index 5f202ba..6bb20bf 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -1,9 +1,9 @@
// REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,AMDGCNSPIRV %s
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
@@ -252,9 +252,11 @@ void test_update_dpp_const_int(global int* out, int arg1)
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
-// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
+// GCN: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
+// AMDGCNSPIRV: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("device") monotonic, align 4{{$}}
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}}
-// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
+// GCN: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
+// AMDGCNSPIRV: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("subgroup") monotonic, align 4{{$}}
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}}
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
#if !defined(__SPIRV__)
@@ -293,9 +295,11 @@ void test_ds_faddf(local float *out, float src) {
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
-// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
+// GCN: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
+// AMDGCNSPIRV: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("device") monotonic, align 4{{$}}
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}}
-// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
+// GCN: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
+// AMDGCNSPIRV: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("subgroup") monotonic, align 4{{$}}
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}}
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
@@ -334,9 +338,11 @@ void test_ds_fminf(__attribute__((address_space(3))) float *out, float src) {
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
-// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
+// GCN: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
+// AMDGCNSPIRV: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("device") monotonic, align 4{{$}}
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}}
-// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
+// GCN: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
+// AMDGCNSPIRV: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("subgroup") monotonic, align 4{{$}}
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}}
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index 039d032..ab0b0b9 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -1231,7 +1231,8 @@ void test_atomic_inc_dec(__attribute__((address_space(3))) uint *lptr, __attribu
// CHECK: atomicrmw udec_wrap ptr addrspace(3) %lptr, i32 %val syncscope("workgroup") seq_cst, align 4
res = __builtin_amdgcn_atomic_dec32(lptr, val, __ATOMIC_SEQ_CST, "workgroup");
- // CHECK: atomicrmw uinc_wrap ptr addrspace(1) %gptr, i32 %val syncscope("agent") seq_cst, align 4
+ // CHECK-AMDGCN: atomicrmw uinc_wrap ptr addrspace(1) %gptr, i32 %val syncscope("agent") seq_cst, align 4
+ // CHECK-SPIRV: atomicrmw uinc_wrap ptr addrspace(1) %gptr, i32 %val syncscope("device") seq_cst, align 4
res = __builtin_amdgcn_atomic_inc32(gptr, val, __ATOMIC_SEQ_CST, "agent");
// CHECK: atomicrmw udec_wrap ptr addrspace(1) %gptr, i32 %val seq_cst, align 4
diff --git a/clang/test/Driver/modules-print-library-module-manifest-path.cpp b/clang/test/Driver/modules-print-library-module-manifest-path.cpp
index 7606713..af0f124 100644
--- a/clang/test/Driver/modules-print-library-module-manifest-path.cpp
+++ b/clang/test/Driver/modules-print-library-module-manifest-path.cpp
@@ -18,6 +18,14 @@
// RUN: --target=x86_64-linux-gnu 2>&1 \
// RUN: | FileCheck libcxx.cpp
+// check that -nostdlib causes no library-provided module manifest to
+// be reported, even when libc++.modules.json is present.
+// RUN: %clang -print-library-module-manifest-path \
+// RUN: -nostdlib \
+// RUN: -resource-dir=%t/Inputs/usr/lib/x86_64-linux-gnu \
+// RUN: --target=x86_64-linux-gnu 2>&1 \
+// RUN: | FileCheck libcxx-no-module-json.cpp
+
// for macos there is a different directory structure
// where the library and libc++.modules.json file are in lib
// directly but headers are in clang/ver directory which
diff --git a/clang/test/OpenMP/for_reduction_codegen.cpp b/clang/test/OpenMP/for_reduction_codegen.cpp
index 83632db..cb4bcc9 100644
--- a/clang/test/OpenMP/for_reduction_codegen.cpp
+++ b/clang/test/OpenMP/for_reduction_codegen.cpp
@@ -27,7 +27,6 @@ struct S {
~S() {}
};
-
template <typename T, int length>
T tmain() {
T t;
@@ -60,6 +59,15 @@ T tmain() {
}
extern S<float> **foo();
+int g_arr[10];
+
+void reductionArrayElement() {
+#pragma omp parallel
+#pragma omp for reduction(+:g_arr[1])
+ for (int i = 0; i < 10; i++) {
+ g_arr[1] += i;
+ }
+}
int main() {
#ifdef LAMBDA
@@ -164,6 +172,7 @@ int main() {
#pragma omp for reduction(& : var3)
for (int i = 0; i < 10; ++i)
;
+ reductionArrayElement();
return tmain<int, 42>();
#endif
}
@@ -535,6 +544,26 @@ int main() {
//.
// CHECK4: @.gomp_critical_user_.reduction.var = common global [8 x i32] zeroinitializer, align 8
//.
+
+// CHECK1-LABEL: define {{.*}}reductionArrayElement{{.*}}.omp_outlined{{.*}}
+// CHECK1-NEXT: entry:
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK1: [[G_ARR:%.*]] = alloca i32, align 4
+// CHECK1: [[TMP0:%.*]] = sdiv exact i64 sub (i64 ptrtoint (ptr @g_arr to i64){{.*}}
+// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[G_ARR:%.*]], i64 [[TMP0]]
+// CHECK1: omp.inner.for.body:
+// CHECK1: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP1]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]],{{.+}}
+// CHECK1-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4
+// CHECK1: omp.loop.exit:
+// CHECK1-NEXT: call void {{.*}}__kmpc_for_static_fini{{.+}}
+// CHECK1: {{.*}}call i32 {{.*}}__kmpc_reduce{{.+}}
+// CHECK1: omp.reduction.default:
+// CHECK1-NEXT: call void @__kmpc_barrier{{.+}}
+// CHECK1-NEXT: ret void
+//
+
// CHECK1-LABEL: define {{[^@]+}}@main
// CHECK1-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
@@ -614,6 +643,7 @@ int main() {
// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @main.omp_outlined.11, ptr [[TMP7]])
// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[VAR3]], align 8
// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @main.omp_outlined.12, ptr [[TMP8]])
+// CHECK1-NEXT: call void {{.*}}reductionArrayElement{{.*}}
// CHECK1-NEXT: [[CALL10:%.*]] = call noundef i32 @_Z5tmainIiLi42EET_v()
// CHECK1-NEXT: store i32 [[CALL10]], ptr [[RETVAL]], align 4
// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8
diff --git a/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp b/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp
index 7ffb7aae..8c7a778 100644
--- a/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp
+++ b/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp
@@ -1,9 +1,7 @@
-// RUN: %clang_cc1 -std=c++20 %s -verify=cxx20
-// RUN: %clang_cc1 -std=c++23 %s -verify=cxx23
-// RUN: %clang_cc1 -std=c++23 -Wpre-c++23-compat %s -verify=precxx23
-// RUN: %clang_cc1 -std=c++23 -pedantic %s -verify=cxx23
-
-//cxx23-no-diagnostics
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +sme -std=c++20 %s -verify=cxx20
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +sme -std=c++23 %s -verify=cxx23
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +sme -std=c++23 -Wpre-c++23-compat %s -verify=precxx23
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +sme -std=c++23 -pedantic %s -verify=cxx23
auto L1 = [] constexpr {};
// cxx20-warning@-1 {{lambda without a parameter clause is a C++23 extension}}
@@ -14,3 +12,25 @@ auto L3 = [] static {};
// cxx20-warning@-1 {{lambda without a parameter clause is a C++23 extension}}
// cxx20-warning@-2 {{static lambdas are a C++23 extension}}
// precxx23-warning@-3 {{static lambdas are incompatible with C++ standards before C++23}}
+
+namespace GH161070 {
+void t1() { int a = [] __arm_streaming; }
+// precxx23-error@-1 {{'__arm_streaming' cannot be applied to a declaration}}
+// precxx23-error@-2 {{expected body of lambda expression}}
+// cxx23-error@-3 {{'__arm_streaming' cannot be applied to a declaration}}
+// cxx23-error@-4 {{expected body of lambda expression}}
+// cxx20-error@-5 {{'__arm_streaming' cannot be applied to a declaration}}
+// cxx20-error@-6 {{expected body of lambda expression}}
+// cxx20-warning@-7 {{'__arm_streaming' in this position is a C++23 extension}}
+// precxx23-warning@-8 {{'__arm_streaming' in this position is incompatible with C++ standards before C++23}}
+
+void t2() { int a = [] [[assume(true)]]; }
+// precxx23-error@-1 {{'assume' attribute cannot be applied to a declaration}}
+// precxx23-error@-2 {{expected body of lambda expression}}
+// cxx23-error@-3 {{'assume' attribute cannot be applied to a declaration}}
+// cxx23-error@-4 {{expected body of lambda expression}}
+// cxx20-error@-5 {{'assume' attribute cannot be applied to a declaration}}
+// cxx20-error@-6 {{expected body of lambda expression}}
+// cxx20-warning@-7 {{an attribute specifier sequence in this position is a C++23 extension}}
+// precxx23-warning@-8 {{an attribute specifier sequence in this position is incompatible with C++ standards before C++23}}
+}
diff --git a/clang/test/SemaCXX/amdgpu-image-rsrc.cpp b/clang/test/SemaCXX/amdgpu-image-rsrc.cpp
new file mode 100644
index 0000000..61a82d4
--- /dev/null
+++ b/clang/test/SemaCXX/amdgpu-image-rsrc.cpp
@@ -0,0 +1,17 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -fsyntax-only -verify -std=gnu++11 -triple amdgcn -Wno-unused-value %s
+
+void foo() {
+ int n = 100;
+ __amdgpu_texture_t v = 0; // expected-error {{cannot initialize a variable of type '__amdgpu_texture_t' with an rvalue of type 'int'}}
+ static_cast<__amdgpu_texture_t>(n); // expected-error {{static_cast from 'int' to '__amdgpu_texture_t' is not allowed}}
+ reinterpret_cast<__amdgpu_texture_t>(n); // expected-error {{reinterpret_cast from 'int' to '__amdgpu_texture_t' is not allowed}}
+ (void)(v + v); // expected-error {{invalid operands to binary expression ('__amdgpu_texture_t' and '__amdgpu_texture_t')}}
+ int x(v); // expected-error {{cannot initialize a variable of type 'int' with an lvalue of type '__amdgpu_texture_t'}}
+ __amdgpu_texture_t k;
+}
+
+template<class T> void bar(T);
+void use(__amdgpu_texture_t r) { bar(r); }
+struct S { __amdgpu_texture_t r; int a; };
diff --git a/clang/test/SemaCXX/decltype.cpp b/clang/test/SemaCXX/decltype.cpp
index 739485b..45a4c4c 100644
--- a/clang/test/SemaCXX/decltype.cpp
+++ b/clang/test/SemaCXX/decltype.cpp
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -Wno-c99-designator %s
+// RUN: %clang_cc1 -std=c++17 -fsyntax-only -verify -Wno-c99-designator %s
// PR5290
int const f0();
@@ -156,6 +157,8 @@ struct A {
}
};
+
+
// This shouldn't crash.
static_assert(A<int>().f<int>() == 0, "");
// The result should not be dependent.
@@ -163,6 +166,81 @@ static_assert(A<int>().f<int>() != 0, ""); // expected-error {{static assertion
// expected-note@-1 {{expression evaluates to '0 != 0'}}
}
+
+#if __cplusplus >= 201703L
+namespace GH160497 {
+
+template <class> struct S {
+ template <class>
+ inline static auto mem =
+ [] { static_assert(false); // expected-error {{static assertion failed}} \
+ // expected-note {{while substituting into a lambda expression here}}
+ return 42;
+ }();
+};
+
+using T = decltype(S<void>::mem<void>);
+ // expected-note@-1 {{in instantiation of static data member 'GH160497::S<void>::mem<void>' requested here}}
+
+
+template <class> struct S2 {
+ template <class>
+ inline static auto* mem =
+ [] { static_assert(false); // expected-error {{static assertion failed}} \
+ // expected-note {{while substituting into a lambda expression here}}
+ return static_cast<int*>(nullptr);
+ }();
+};
+
+using T2 = decltype(S2<void>::mem<void>);
+//expected-note@-1 {{in instantiation of static data member 'GH160497::S2<void>::mem<void>' requested here}}
+
+template <class> struct S3 {
+ template <class>
+ inline static int mem = // Check we don't instantiate when the type is not deduced.
+ [] { static_assert(false);
+ return 42;
+ }();
+};
+
+using T = decltype(S3<void>::mem<void>);
+}
+
+namespace N1 {
+
+template<class>
+struct S {
+ template<class>
+ inline static auto mem = 42;
+};
+
+using T = decltype(S<void>::mem<void>);
+
+T y = 42;
+
+}
+
+namespace GH161196 {
+
+template <typename> struct A {
+ static constexpr int digits = 0;
+};
+
+template <typename> struct B {
+ template <int, typename MaskInt = int, int = A<MaskInt>::digits>
+ static constexpr auto XBitMask = 0;
+};
+
+struct C {
+ using ReferenceHost = B<int>;
+ template <int> static decltype(ReferenceHost::XBitMask<0>) XBitMask;
+};
+
+void test() { (void)C::XBitMask<0>; }
+
+}
+#endif
+
template<typename>
class conditional {
};
diff --git a/clang/test/SemaCXX/type-traits.cpp b/clang/test/SemaCXX/type-traits.cpp
index 3f01247..d49330f 100644
--- a/clang/test/SemaCXX/type-traits.cpp
+++ b/clang/test/SemaCXX/type-traits.cpp
@@ -2038,6 +2038,49 @@ void is_implicit_lifetime(int n) {
static_assert(__builtin_is_implicit_lifetime(int * __restrict));
}
+namespace GH160610 {
+class NonAggregate {
+public:
+ NonAggregate() = default;
+
+ NonAggregate(const NonAggregate&) = delete;
+ NonAggregate& operator=(const NonAggregate&) = delete;
+private:
+ int num;
+};
+
+class DataMemberInitializer {
+public:
+ DataMemberInitializer() = default;
+
+ DataMemberInitializer(const DataMemberInitializer&) = delete;
+ DataMemberInitializer& operator=(const DataMemberInitializer&) = delete;
+private:
+ int num = 0;
+};
+
+class UserProvidedConstructor {
+public:
+ UserProvidedConstructor() {}
+
+ UserProvidedConstructor(const UserProvidedConstructor&) = delete;
+ UserProvidedConstructor& operator=(const UserProvidedConstructor&) = delete;
+};
+
+static_assert(__builtin_is_implicit_lifetime(NonAggregate));
+static_assert(!__builtin_is_implicit_lifetime(DataMemberInitializer));
+static_assert(!__builtin_is_implicit_lifetime(UserProvidedConstructor));
+
+#if __cplusplus >= 202002L
+template <typename T>
+class Tpl {
+ Tpl() requires false = default ;
+};
+static_assert(!__builtin_is_implicit_lifetime(Tpl<int>));
+
+#endif
+}
+
void is_signed()
{
//static_assert(__is_signed(char));
diff --git a/clang/test/SemaOpenCL/amdgpu-image-rsrc.cl b/clang/test/SemaOpenCL/amdgpu-image-rsrc.cl
new file mode 100644
index 0000000..dc56494
--- /dev/null
+++ b/clang/test/SemaOpenCL/amdgpu-image-rsrc.cl
@@ -0,0 +1,13 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -verify -cl-std=CL1.2 -triple amdgcn-amd-amdhsa %s
+// RUN: %clang_cc1 -verify -cl-std=CL2.0 -triple amdgcn-amd-amdhsa %s
+
+void f() {
+ int n = 3;
+ __amdgpu_texture_t v = (__amdgpu_texture_t)0; // expected-error {{used type '__amdgpu_texture_t' where arithmetic or pointer type is required}}
+ int k = v; // expected-error {{initializing '__private int' with an expression of incompatible type '__private __amdgpu_texture_t'}}
+ (void)(v + v); // expected-error {{invalid operands}}
+ __amdgpu_texture_t r;
+ int *p = (int*)r; // expected-error {{operand of type '__amdgpu_texture_t' where arithmetic or pointer type is required}}
+}
diff --git a/clang/test/SemaOpenMP/amdgpu-image-rsrc.cpp b/clang/test/SemaOpenMP/amdgpu-image-rsrc.cpp
new file mode 100644
index 0000000..51b3f72
--- /dev/null
+++ b/clang/test/SemaOpenMP/amdgpu-image-rsrc.cpp
@@ -0,0 +1,12 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -triple amdgcn-amd-amdhsa -fopenmp-is-target-device -Wno-unused-value %s
+
+void foo() {
+#pragma omp target
+ {
+ int n = 5;
+ __amdgpu_texture_t v = 0; // expected-error {{cannot initialize a variable of type '__amdgpu_texture_t' with an rvalue of type 'int'}}
+ (void)(v + v); // expected-error {{invalid operands to binary expression}}
+ }
+}
diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index 0e2758d..e41f4eb 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -420,7 +420,7 @@ public:
std::vector<ModuleDeps *> NewMDs;
{
std::unique_lock<std::mutex> ul(Lock);
- for (const ModuleDeps &MD : Graph) {
+ for (ModuleDeps &MD : Graph) {
auto I = Modules.find({MD.ID, 0});
if (I != Modules.end()) {
I->first.InputIndex = std::min(I->first.InputIndex, InputIndex);