; RUN: opt %s -strip-debug -o %t.no_debug.ll -S ; RUN: llc -O3 -mcpu=gfx1250 < %s -filetype=obj -o %t.with_debug.o ; RUN: llc -O3 -mcpu=gfx1250 < %t.no_debug.ll -filetype=obj -o %t.no_debug.o ; RUN: llvm-strip %t.with_debug.o %t.no_debug.o ; RUN: cmp %t.with_debug.o %t.no_debug.o ; Ensure that compiling with and without debug generates identical code. ; Test that revertScheduling only updates LiveIntervals if non-debug ; instructions are reordered. target triple = "amdgcn-amd-amdhsa" declare void @llvm.amdgcn.s.barrier() #0 define amdgpu_kernel void @_test_revertScheduling(i32 %lda, ptr addrspace(1) %infoA.coerce, i32 %add.ptr13.idx, ptr addrspace(3) %add.ptr10, i32 %i0, i1 %cmp59.not, ptr addrspace(3) %arrayidx53, ptr addrspace(3) %arrayidx75.3.6, <2 x float> %f1, float %f2, <2 x float> %f3, <14 x float> %f4) { entry: %cond13.in.i10.i.i.i = load i16, ptr addrspace(4) null, align 2 %f5 = tail call i32 @llvm.amdgcn.workitem.id.x() #dbg_value(ptr addrspace(1) %add.ptr, !4, !DIExpression(), !13) %idxprom = zext i32 %f5 to i64 %arrayidx = getelementptr float, ptr addrspace(1) null, i64 %idxprom %add16.1 = add i32 %lda, %f5 %idxprom.1 = sext i32 %add16.1 to i64 %arrayidx.1 = getelementptr float, ptr addrspace(1) null, i64 %idxprom.1 %mul15.2 = shl i32 %lda, 1 %add16.2 = add i32 %mul15.2, %f5 %idxprom.2 = sext i32 %add16.2 to i64 %arrayidx.2 = getelementptr float, ptr addrspace(1) null, i64 %idxprom.2 %mul15.3 = mul i32 %lda, 3 %add16.3 = add i32 %mul15.3, %f5 %idxprom.3 = sext i32 %add16.3 to i64 %arrayidx.3 = getelementptr float, ptr addrspace(1) null, i64 %idxprom.3 %mul15.4 = shl i32 %lda, 2 %add16.4 = add i32 %mul15.4, %f5 %idxprom.4 = sext i32 %add16.4 to i64 %arrayidx.4 = getelementptr float, ptr addrspace(1) null, i64 %idxprom.4 %mul15.5 = mul i32 %lda, 5 %add16.5 = add i32 %mul15.5, %f5 %idxprom.5 = sext i32 %add16.5 to i64 %arrayidx.5 = getelementptr float, ptr addrspace(1) null, i64 %idxprom.5 %mul15.6 = mul i32 %lda, 6 %add16.6 = add i32 %mul15.6, %f5 %idxprom.6 = sext i32 %add16.6 to i64 %arrayidx.6 = getelementptr float, ptr addrspace(1) null, i64 %idxprom.6 %mul15.7 = mul i32 %lda, 7 %add16.7 = add i32 %mul15.7, %f5 %idxprom.7 = sext i32 %add16.7 to i64 %arrayidx.7 = getelementptr float, ptr addrspace(1) null, i64 %idxprom.7 %mul15.8 = shl i32 %lda, 3 %add16.8 = add i32 %mul15.8, %f5 %idxprom.8 = sext i32 %add16.8 to i64 %arrayidx.8 = getelementptr float, ptr addrspace(1) null, i64 %idxprom.8 %mul15.9 = mul i32 %lda, 9 %add16.9 = add i32 %mul15.9, %f5 %idxprom.9 = sext i32 %add16.9 to i64 %arrayidx.9 = getelementptr float, ptr addrspace(1) null, i64 %idxprom.9 %mul15.10 = mul i32 %lda, 10 %add16.10 = add i32 %mul15.10, %f5 %idxprom.10 = sext i32 %add16.10 to i64 %arrayidx.10 = getelementptr float, ptr addrspace(1) null, i64 %idxprom.10 %mul15.11 = mul i32 %lda, 11 %add16.11 = add i32 %mul15.11, %f5 %idxprom.11 = sext i32 %add16.11 to i64 %arrayidx.11 = getelementptr float, ptr addrspace(1) null, i64 %idxprom.11 %mul15.12 = mul i32 %lda, 12 %add16.12 = add i32 %mul15.12, %f5 %idxprom.12 = sext i32 %add16.12 to i64 %arrayidx.12 = getelementptr float, ptr addrspace(1) null, i64 %idxprom.12 %mul15.13 = mul i32 %lda, 13 %add16.13 = add i32 %mul15.13, %f5 %idxprom.13 = sext i32 %add16.13 to i64 %arrayidx.13 = getelementptr float, ptr addrspace(1) null, i64 %idxprom.13 %f6 = load float, ptr addrspace(1) %arrayidx.13, align 4 %cmp54 = fcmp oeq float %f6, 0.000000e+00 br i1 %cmp59.not, label %if.end82, label %for.body71.preheader for.body71.preheader: ; preds = %entry %f7 = load float, ptr addrspace(1) %arrayidx, align 4 %mul65 = fmul float %f7, 0.000000e+00 %f8 = insertelement <14 x float> zeroinitializer, float %mul65, i64 0 br label %if.end82 if.end82: ; preds = %for.body71.preheader, %entry %f9 = load <2 x float>, ptr addrspace(3) null, align 8 tail call void @llvm.amdgcn.s.barrier() %f10 = load float, ptr addrspace(3) %add.ptr10, align 4 %f11 = load float, ptr addrspace(3) null, align 4 fence acquire %f12 = load float, ptr addrspace(3) %add.ptr10, align 4 %f13 = load float, ptr addrspace(3) %arrayidx53, align 4 %cmp54.2 = fcmp oeq float %f10, 0.000000e+00 %cmp54.1 = fcmp une float %f13, 0.000000e+00 %spec.select.1 = select i1 %cmp54.1, i32 %add16.13, i32 0 %spec.select.2 = select i1 %cmp54.2, i32 0, i32 %spec.select.1 %mul65.3 = fmul float 0.000000e+00, %f2 %f14 = insertelement <2 x float> zeroinitializer, float %mul65.3, i64 0 %f15 = fmul <2 x float> zeroinitializer, %f9 %f16 = shufflevector <2 x float> %f15, <2 x float> zeroinitializer, <14 x i32> %f17 = shufflevector <14 x float> zeroinitializer, <14 x float> %f16, <14 x i32> %f18 = insertelement <14 x float> %f17, float %f11, i64 13 %f19 = shufflevector <14 x float> %f18, <14 x float> zeroinitializer, <2 x i32> %f20 = load <2 x float>, ptr addrspace(3) null, align 8 %f21 = fmul contract <2 x float> zeroinitializer, %f20 store float 0.000000e+00, ptr addrspace(3) null, align 4 %f22 = load <2 x float>, ptr addrspace(3) %arrayidx75.3.6, align 8 %f23 = fmul <2 x float> zeroinitializer, %f22 %f24 = shufflevector <2 x float> %f23, <2 x float> zeroinitializer, <14 x i32> %f25 = shufflevector <14 x float> %f4, <14 x float> %f24, <14 x i32> %f26 = fsub contract <2 x float> %f19, %f21 %f27 = shufflevector <2 x float> %f26, <2 x float> zeroinitializer, <14 x i32> %f28 = shufflevector <14 x float> %f25, <14 x float> %f27, <14 x i32> %f29 = extractelement <14 x float> %f28, i64 0 %f30 = load <2 x float>, ptr addrspace(3) null, align 4 %f31 = shufflevector <14 x float> %f28, <14 x float> zeroinitializer, <2 x i32> %f32 = fsub <2 x float> zeroinitializer, %f14 %f33 = fmul contract <2 x float> %f32, zeroinitializer %f34 = fsub contract <2 x float> %f31, %f33 fence release %f35 = load <2 x float>, ptr addrspace(3) inttoptr (i32 32 to ptr addrspace(3)), align 8 %f36 = fmul contract <2 x float> zeroinitializer, %f35 %f37 = load <2 x float>, ptr addrspace(3) inttoptr (i32 24 to ptr addrspace(3)), align 8 %f38 = fmul contract <2 x float> %f3, %f37 %f39 = fsub contract <2 x float> %f33, %f38 %f40 = shufflevector <2 x float> %f39, <2 x float> zeroinitializer, <14 x i32> %f41 = shufflevector <14 x float> zeroinitializer, <14 x float> %f40, <14 x i32> %f42 = fsub contract <2 x float> %f1, %f36 %f43 = shufflevector <2 x float> %f42, <2 x float> zeroinitializer, <14 x i32> %f44 = shufflevector <14 x float> %f41, <14 x float> %f43, <14 x i32> %f45 = shufflevector <14 x float> %f44, <14 x float> zeroinitializer, <2 x i32> %f46 = fmul contract <2 x float> zeroinitializer, %f3 %f47 = fsub contract <2 x float> %f45, %f46 %f48 = load <2 x float>, ptr addrspace(3) inttoptr (i32 48 to ptr addrspace(3)), align 8 %f49 = fsub <2 x float> %f34, %f48 %f50 = fmul <2 x float> zeroinitializer, %f30 %f51 = shufflevector <2 x float> %f47, <2 x float> zeroinitializer, <14 x i32> %f52 = shufflevector <14 x float> %f44, <14 x float> %f51, <14 x i32> %f53 = fsub <2 x float> %f49, %f50 %f54 = shufflevector <2 x float> %f53, <2 x float> zeroinitializer, <14 x i32> %f55 = shufflevector <14 x float> %f52, <14 x float> %f54, <14 x i32> %f56 = extractelement <14 x float> %f55, i64 7 %mul65.7 = fmul float %f56, 0.000000e+00 %f57 = load <2 x float>, ptr addrspace(3) inttoptr (i32 40 to ptr addrspace(3)), align 8 store float 0.000000e+00, ptr addrspace(3) %add.ptr10, align 4 %f58 = insertelement <2 x float> zeroinitializer, float %mul65.7, i64 0 %f59 = shufflevector <14 x float> %f55, <14 x float> zeroinitializer, <2 x i32> %f60 = fmul <2 x float> %f58, %f57 %f61 = fsub <2 x float> %f59, %f60 %f62 = load <2 x float>, ptr addrspace(3) %arrayidx53, align 4 %f63 = fsub <2 x float> %f61, %f62 %f64 = shufflevector <2 x float> %f63, <2 x float> zeroinitializer, <14 x i32> %cmp59.13 = icmp ugt i32 %f5, 0 %f65 = shufflevector <14 x float> zeroinitializer, <14 x float> %f64, <14 x i32> %promotealloca.13 = select i1 %cmp59.13, <14 x float> zeroinitializer, <14 x float> %f65 %cmp54.3 = fcmp oeq float %f12, 0.000000e+00 %cmp54.4 = fcmp oeq float %f29, 0.000000e+00 %spec.select.3 = select i1 %cmp54.3, i32 0, i32 %spec.select.2 %spec.select.4 = select i1 %cmp54.4, i32 0, i32 %spec.select.3 %conv.i.i = zext i16 %cond13.in.i10.i.i.i to i32 %f66 = tail call i32 @llvm.amdgcn.workitem.id.y() %add = or i32 %conv.i.i, %f66 %conv.i = sext i32 %add to i64 %add.ptr = getelementptr i32, ptr addrspace(1) %infoA.coerce, i64 %conv.i store i32 %spec.select.4, ptr addrspace(1) %add.ptr, align 4 store float 0.000000e+00, ptr addrspace(1) %arrayidx, align 4 store float 0.000000e+00, ptr addrspace(1) %arrayidx.1, align 4 store float 0.000000e+00, ptr addrspace(1) %arrayidx.2, align 4 store float %mul65.3, ptr addrspace(1) %arrayidx.3, align 4 store float 0.000000e+00, ptr addrspace(1) %arrayidx.4, align 4 store float 0.000000e+00, ptr addrspace(1) %arrayidx.5, align 4 store float 0.000000e+00, ptr addrspace(1) %arrayidx.6, align 4 store float 0.000000e+00, ptr addrspace(1) %arrayidx.7, align 4 store float 0.000000e+00, ptr addrspace(1) %arrayidx.8, align 4 %f67 = extractelement <14 x float> %promotealloca.13, i64 9 store float %f67, ptr addrspace(1) %arrayidx.9, align 4 store float 0.000000e+00, ptr addrspace(1) %arrayidx.10, align 4 store float 0.000000e+00, ptr addrspace(1) %arrayidx.11, align 4 store float 0.000000e+00, ptr addrspace(1) %arrayidx.12, align 4 store float 0.000000e+00, ptr addrspace(1) null, align 4 ret void } declare noundef range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x() #1 declare noundef range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.y() #1 attributes #0 = { convergent nocallback nofree nounwind willreturn } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3} !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "AMD clang version 22.0.0git (ssh://github-emu/AMD-Lightning-Internal/llvm-project 25425 c51a87b7a53a3e8f308402aaffa3ecbc2953305a)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2, imports: !2, splitDebugInlining: false, nameTableKind: None) !1 = !DIFile(filename: "test.cpp", directory: "/tmp", checksumkind: CSK_MD5, checksum: "cc205700bf3536fe4ff21a07daf7e01d") !2 = !{} !3 = !{i32 2, !"Debug Info Version", i32 3} !4 = !DILocalVariable(name: "info", scope: !5, file: !6, line: 162, type: !11) !5 = distinct !DISubprogram(name: "test_revertScheduling", linkageName: "_test_revertScheduling", scope: !7, file: !6, line: 142, type: !9, scopeLine: 150, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, templateParams: !2, retainedNodes: !2) !6 = !DIFile(filename: "kernels.hpp", directory: "/tmp") !7 = !DINamespace(name: "v33200", scope: !8, exportSymbols: true) !8 = !DINamespace(name: "solve", scope: null) !9 = distinct !DISubroutineType(types: !10) !10 = !{null} !11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64) !12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) !13 = !DILocation(line: 0, scope: !5)