# RUN: llc -run-pass=machine-combiner -mtriple=arm64-unknown-unknown %s -o - | FileCheck %s # RUN: llc -run-pass=machine-combiner -mtriple=arm64-unknown-unknown -acc-max-width=2 %s -o - | FileCheck %s --check-prefix=NARROW-TREE # RUN: llc -run-pass=machine-combiner -mtriple=arm64-unknown-unknown -acc-min-depth=100 %s -o - | FileCheck %s --check-prefix=NO-TREE # A chain of UABAL instructions that can be reassociated for better ILP. # Before the optimization, we accumulate in a single long chain. # CHECK-LABEL: name: uabal_accumulation # CHECK: [[START1:%.*]]:fpr128 = UABDLv4i16_v4i32 # CHECK: [[START2:%.*]]:fpr128 = UABDLv4i16_v4i32 # CHECK: [[START3:%.*]]:fpr128 = UABDLv4i16_v4i32 # CHECK: [[A1:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[START1]] # CHECK: [[B1:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[START2]] # CHECK: [[C1:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[START3]] # CHECK: [[A2:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A1]] # CHECK: [[B2:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[B1]] # CHECK: [[C2:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[C1]] # CHECK: [[PARTIAL_SUM:%.*]]:fpr128 = ADDv4i32 killed [[A2]], killed [[B2]] # CHECK: [[TOTAL_SUM:%.*]]:fpr128 = ADDv4i32 killed [[PARTIAL_SUM]], killed [[C2]] # CHECK: [[END:%.*]]:fpr32 = ADDVv4i32v killed [[TOTAL_SUM]] # NARROW-TREE: [[START1:%.*]]:fpr128 = UABDLv4i16_v4i32 # NARROW-TREE: [[START2:%.*]]:fpr128 = UABDLv4i16_v4i32 # NARROW-TREE: [[A1:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[START1]] # NARROW-TREE: [[B1:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[START2]] # NARROW-TREE: [[A2:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A1]] # NARROW-TREE: [[B2:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[B1]] # NARROW-TREE: [[A3:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A2]] # NARROW-TREE: [[B3:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[B2]] # NARROW-TREE: [[A4:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A3]] # NARROW-TREE: [[PARTIAL_SUM:%.*]]:fpr128 = ADDv4i32 killed [[B3]], killed [[A4]] # NARROW-TREE: [[END:%.*]]:fpr32 = ADDVv4i32v killed [[PARTIAL_SUM]] # NO-TREE: [[START1:%.*]]:fpr128 = UABDLv4i16_v4i32 # NO-TREE: [[A1:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[START1]] # NO-TREE: [[A2:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A1]] # NO-TREE: [[A3:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A2]] # NO-TREE: [[A4:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A3]] # NO-TREE: [[A5:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A4]] # NO-TREE: [[A6:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A5]] # NO-TREE: [[A7:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A6]] # NO-TREE: [[A8:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A7]] # NO-TREE: [[END:%.*]]:fpr32 = ADDVv4i32v killed [[A8]] --- name: uabal_accumulation body: | bb.0.entry: liveins: $x0, $x1, $x2, $x3 %3:gpr64 = COPY $x3 %2:gpr64common = COPY $x2 %1:gpr64 = COPY $x1 %0:gpr64common = COPY $x0 %4:fpr64 = LDRDui %0, 0 :: (load (s64)) %5:fpr64 = LDRDui %2, 0 :: (load (s64)) %6:gpr64common = ADDXrr %0, %1 %7:gpr64common = ADDXrr %2, %3 %8:fpr64 = LDRDui %6, 0 :: (load (s64)) %9:fpr64 = LDRDui %7, 0 :: (load (s64)) %10:fpr128 = UABDLv4i16_v4i32 killed %8, killed %9 %11:fpr128 = UABALv4i16_v4i32 killed %10, killed %4, killed %5 %12:gpr64common = ADDXrr %6, %1 %13:gpr64common = ADDXrr %7, %3 %14:fpr64 = LDRDui %12, 0 :: (load (s64)) %15:fpr64 = LDRDui %13, 0 :: (load (s64)) %16:fpr128 = UABALv4i16_v4i32 killed %11, killed %14, killed %15 %17:gpr64common = ADDXrr %12, %1 %18:gpr64common = ADDXrr %13, %3 %19:fpr64 = LDRDui %17, 0 :: (load (s64)) %20:fpr64 = LDRDui %18, 0 :: (load (s64)) %21:fpr128 = UABALv4i16_v4i32 killed %16, killed %19, killed %20 %22:gpr64common = ADDXrr %17, %1 %23:gpr64common = ADDXrr %18, %3 %24:fpr64 = LDRDui %22, 0 :: (load (s64)) %25:fpr64 = LDRDui %23, 0 :: (load (s64)) %26:fpr128 = UABALv4i16_v4i32 killed %21, killed %24, killed %25 %27:gpr64common = ADDXrr %22, %1 %28:gpr64common = ADDXrr %23, %3 %29:fpr64 = LDRDui %27, 0 :: (load (s64)) %30:fpr64 = LDRDui %28, 0 :: (load (s64)) %31:fpr128 = UABALv4i16_v4i32 killed %26, killed %29, killed %30 %32:gpr64common = ADDXrr %27, %1 %33:gpr64common = ADDXrr %28, %3 %34:fpr64 = LDRDui %32, 0 :: (load (s64)) %35:fpr64 = LDRDui %33, 0 :: (load (s64)) %36:fpr128 = UABALv4i16_v4i32 killed %31, killed %34, killed %35 %37:gpr64common = ADDXrr %32, %1 %38:gpr64common = ADDXrr %33, %3 %39:fpr64 = LDRDui %37, 0 :: (load (s64)) %40:fpr64 = LDRDui %38, 0 :: (load (s64)) %41:fpr128 = UABALv4i16_v4i32 killed %36, killed %39, killed %40 %42:gpr64common = ADDXrr %37, %1 %43:gpr64common = ADDXrr %38, %3 %44:fpr64 = LDRDui %42, 0 :: (load (s64)) %45:fpr64 = LDRDui %43, 0 :: (load (s64)) %46:fpr128 = UABALv4i16_v4i32 killed %41, killed %44, killed %45 %47:fpr32 = ADDVv4i32v killed %46 %48:fpr128 = IMPLICIT_DEF %49:fpr128 = INSERT_SUBREG %48, killed %47, %subreg.ssub %50:gpr32all = COPY %49.ssub $w0 = COPY %50 RET_ReallyLR implicit $w0 ... # In this test case we don't perform the reassociation because we don't recognize the # instruction at the top of the chain. # CHECK-LABEL: name: uabal_accumulation_with_different_start # CHECK: [[START1:%.*]]:fpr128 = ADDv4i32 # CHECK: [[START2:%.*]]:fpr128 = UABDLv4i16_v4i32 # CHECK: [[START3:%.*]]:fpr128 = UABDLv4i16_v4i32 # CHECK: [[A1:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[START1]] # CHECK: [[B1:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[START2]] # CHECK: [[C1:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[START3]] # CHECK: [[A2:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[A1]] # CHECK: [[B2:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[B1]] # CHECK: [[C2:%.*]]:fpr128 = UABALv4i16_v4i32 killed [[C1]] # CHECK: [[PARTIAL_SUM:%.*]]:fpr128 = ADDv4i32 killed [[A2]], killed [[B2]] # CHECK: [[TOTAL_SUM:%.*]]:fpr128 = ADDv4i32 killed [[PARTIAL_SUM]], killed [[C2]] # CHECK: [[END:%.*]]:fpr32 = ADDVv4i32v killed [[TOTAL_SUM]] --- name: uabal_accumulation_with_different_start body: | bb.0.entry: liveins: $x0, $x1, $x2, $x3 %3:gpr64 = COPY $x3 %2:gpr64common = COPY $x2 %1:gpr64 = COPY $x1 %0:gpr64common = COPY $x0 %4:fpr64 = LDRDui %0, 0 :: (load (s64)) %5:fpr64 = LDRDui %2, 0 :: (load (s64)) %6:gpr64common = ADDXrr %0, %1 %7:gpr64common = ADDXrr %2, %3 %8:fpr128 = LDRQui %6, 0 :: (load (s128)) %9:fpr128 = LDRQui %7, 0 :: (load (s128)) %10:fpr128 = ADDv4i32 killed %8, killed %9 %11:fpr128 = UABALv4i16_v4i32 killed %10, killed %4, killed %5 %12:gpr64common = ADDXrr %6, %1 %13:gpr64common = ADDXrr %7, %3 %14:fpr64 = LDRDui %12, 0 :: (load (s64)) %15:fpr64 = LDRDui %13, 0 :: (load (s64)) %16:fpr128 = UABALv4i16_v4i32 killed %11, killed %14, killed %15 %17:gpr64common = ADDXrr %12, %1 %18:gpr64common = ADDXrr %13, %3 %19:fpr64 = LDRDui %17, 0 :: (load (s64)) %20:fpr64 = LDRDui %18, 0 :: (load (s64)) %21:fpr128 = UABALv4i16_v4i32 killed %16, killed %19, killed %20 %22:gpr64common = ADDXrr %17, %1 %23:gpr64common = ADDXrr %18, %3 %24:fpr64 = LDRDui %22, 0 :: (load (s64)) %25:fpr64 = LDRDui %23, 0 :: (load (s64)) %26:fpr128 = UABALv4i16_v4i32 killed %21, killed %24, killed %25 %27:gpr64common = ADDXrr %22, %1 %28:gpr64common = ADDXrr %23, %3 %29:fpr64 = LDRDui %27, 0 :: (load (s64)) %30:fpr64 = LDRDui %28, 0 :: (load (s64)) %31:fpr128 = UABALv4i16_v4i32 killed %26, killed %29, killed %30 %32:gpr64common = ADDXrr %27, %1 %33:gpr64common = ADDXrr %28, %3 %34:fpr64 = LDRDui %32, 0 :: (load (s64)) %35:fpr64 = LDRDui %33, 0 :: (load (s64)) %36:fpr128 = UABALv4i16_v4i32 killed %31, killed %34, killed %35 %37:gpr64common = ADDXrr %32, %1 %38:gpr64common = ADDXrr %33, %3 %39:fpr64 = LDRDui %37, 0 :: (load (s64)) %40:fpr64 = LDRDui %38, 0 :: (load (s64)) %41:fpr128 = UABALv4i16_v4i32 killed %36, killed %39, killed %40 %42:gpr64common = ADDXrr %37, %1 %43:gpr64common = ADDXrr %38, %3 %44:fpr64 = LDRDui %42, 0 :: (load (s64)) %45:fpr64 = LDRDui %43, 0 :: (load (s64)) %46:fpr128 = UABALv4i16_v4i32 killed %41, killed %44, killed %45 %47:fpr32 = ADDVv4i32v killed %46 %48:fpr128 = IMPLICIT_DEF %49:fpr128 = INSERT_SUBREG %48, killed %47, %subreg.ssub %50:gpr32all = COPY %49.ssub $w0 = COPY %50 RET_ReallyLR implicit $w0 ...