; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -pass-remarks='loop-interchange' -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -disable-output -S ; RUN: FileCheck --input-file=%t %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" ; This is a reduced test case for the example in "large-nested-6d.ll". For a ; full description of the purpose this test and its complexities, see that file. ; ; This reproducer contains the perfectly nested sub part of that bigger loop ; nest: ; ; for i=1 to NX ; for j=1 to NY ; for IL=1 to NX ; load GlobC(i,IL,L) ; load GlobG(i,IL,L) ; load GlobE(i,IL,L) ; load GlobI(i,IL,L) ; for JL=1 to NY ; load GlobD(j,JL,M) ; load GlobH(j,JL,M) ; load GlobF(j,JL,M) ; load GlobJ(j,JL,M) ; store GlobL(NY*i+j,NY*IL+JL) ; End ; End ; End ; End ; ; This reproducer is useful to focus on only on the 2nd challenge: the data ; dependence analysis problem, and not worry about the rest of loop nest ; structure. ; ; TODO: ; ; If loop-interchange is able to deal with imperfectly nested loops, this ; test is redundant and we only need to keep "large-nested-6d.ll". ; ; CHECK: --- !Analysis ; CHECK-NEXT: Pass: loop-interchange ; CHECK-NEXT: Name: Dependence ; CHECK-NEXT: Function: test ; CHECK-NEXT: Args: ; CHECK-NEXT: - String: Computed dependence info, invoking the transform. ; CHECK-NEXT: ... ; CHECK-NEXT: --- !Missed ; CHECK-NEXT: Pass: loop-interchange ; CHECK-NEXT: Name: Dependence ; CHECK-NEXT: Function: test ; CHECK-NEXT: Args: ; CHECK-NEXT: - String: All loops have dependencies in all directions. ; CHECK-NEXT: ... @GlobC = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer @GlobD = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer @GlobE = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer @GlobF = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer @GlobG = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer @GlobH = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer @GlobI = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer @GlobJ = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer @GlobL = local_unnamed_addr global [1000 x [1000 x double]] zeroinitializer define void @test(ptr noalias readonly captures(none) %0, ptr noalias readonly captures(none) %1, ptr noalias captures(none) %2, ptr noalias captures(none) %3, ptr noalias readonly captures(none) %4, ptr noalias readonly captures(none) %5, ptr noalias readonly captures(none) %6, ptr noalias readonly captures(none) %7, ptr noalias readonly captures(none) %8, ptr noalias readonly captures(none) %9) { entry: %17 = load i32, ptr %7, align 4 %18 = sext i32 %17 to i64 %20 = load i32, ptr %8, align 4 %21 = sext i32 %20 to i64 %cmp1 = icmp sgt i32 %17, 0 %cmp2 = icmp sgt i32 %20, 0 %cond = and i1 %cmp1, %cmp2 br i1 %cond, label %preheader, label %exit preheader: br label %i.header i.header: %i = phi i64 [ %i.next, %i.latch ], [ 1, %preheader ] %92 = add nsw i64 -55, %i %93 = add nsw i64 %i, -1 %94 = mul nsw i64 %93, %21 %invariant.gep = getelementptr double, ptr @GlobL, i64 %94 br label %j.header j.header: %j = phi i64 [ %j.next, %j.latch ], [ 1, %i.header ] %95 = add nsw i64 -55, %j %gep358 = getelementptr double, ptr %invariant.gep, i64 %j br label %IL.header IL.header: %IL = phi i64 [ %IL.next, %IL.latch ], [ 1, %j.header ] %96 = mul nuw nsw i64 %IL, 54 %97 = add nsw i64 %92, %96 %98 = getelementptr double, ptr @GlobC, i64 %97 %99 = load double, ptr %98, align 8 %100 = getelementptr double, ptr @GlobG, i64 %97 %101 = load double, ptr %100, align 8 %102 = getelementptr double, ptr @GlobE, i64 %97 %103 = load double, ptr %102, align 8 %104 = getelementptr double, ptr @GlobI, i64 %97 %105 = load double, ptr %104, align 8 %106 = add nsw i64 %IL, -1 %107 = mul nsw i64 %106, %21 br label %JL.body JL.body: %JL = phi i64 [ %JL.next, %JL.body ], [ 1, %IL.header ] %109 = mul nuw nsw i64 %JL, 54 %110 = add nsw i64 %95, %109 %111 = getelementptr double, ptr @GlobD, i64 %110 %112 = load double, ptr %111, align 8 %113 = fmul fast double %112, %99 %114 = getelementptr double, ptr @GlobH, i64 %110 %115 = load double, ptr %114, align 8 %116 = fmul fast double %115, %101 %117 = fadd fast double %116, %113 %118 = getelementptr double, ptr @GlobF, i64 %110 %119 = load double, ptr %118, align 8 %120 = fmul fast double %119, %103 %121 = fadd fast double %117, %120 %122 = getelementptr double, ptr @GlobJ, i64 %110 %123 = load double, ptr %122, align 8 %124 = fmul fast double %123, %105 %125 = fadd fast double %121, %124 %126 = add nsw i64 %JL, %107 %.idx247.us.us.us.us.us.us = mul nsw i64 %126, 8000 %gep.us.us.us.us.us.us = getelementptr i8, ptr %gep358, i64 %.idx247.us.us.us.us.us.us %127 = getelementptr i8, ptr %gep.us.us.us.us.us.us, i64 -8008 store double %125, ptr %127, align 8 %JL.next = add nuw nsw i64 %JL, 1 %exitcond.not = icmp eq i64 %JL, %21 br i1 %exitcond.not, label %IL.latch, label %JL.body IL.latch: %IL.next = add nuw nsw i64 %IL, 1 %exitcond320.not = icmp eq i64 %IL, %18 br i1 %exitcond320.not, label %j.latch, label %IL.header j.latch: %j.next = add nuw nsw i64 %j, 1 %exitcond324.not = icmp eq i64 %j, %21 br i1 %exitcond324.not, label %i.latch, label %j.header i.latch: %i.next = add nuw nsw i64 %i, 1 %exitcond328.not = icmp eq i64 %i, %18 br i1 %exitcond328.not, label %exit, label %i.header exit: ret void }