diff options
author | Nikita Popov <npopov@redhat.com> | 2024-02-05 13:56:59 +0100 |
---|---|---|
committer | Nikita Popov <npopov@redhat.com> | 2024-02-05 13:56:59 +0100 |
commit | b31fffbc7f1e0491bf599e82b7195e320d26e140 (patch) | |
tree | b3b3ed227da35e8d2e80681d732b89a2a55d9ff9 | |
parent | 7bdc80f35c325d148b1ddbdfce7dea8c6ba7af84 (diff) | |
download | llvm-b31fffbc7f1e0491bf599e82b7195e320d26e140.zip llvm-b31fffbc7f1e0491bf599e82b7195e320d26e140.tar.gz llvm-b31fffbc7f1e0491bf599e82b7195e320d26e140.tar.bz2 |
[ARM] Convert tests to opaque pointers (NFC)
112 files changed, 5129 insertions, 5129 deletions
diff --git a/llvm/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll b/llvm/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll index 80812a3..ce23d0e 100644 --- a/llvm/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll +++ b/llvm/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll @@ -11,7 +11,7 @@ bb74.i: ; preds = %bb88.i, %bb74.i, %entry bb88.i: ; preds = %bb74.i br i1 false, label %mandel.exit, label %bb74.i mandel.exit: ; preds = %bb88.i - %tmp2 = load volatile double, ptr getelementptr ({ double, double }, ptr @accum, i32 0, i32 0), align 8 ; <double> [#uses=1] + %tmp2 = load volatile double, ptr @accum, align 8 ; <double> [#uses=1] %tmp23 = fptosi double %tmp2 to i32 ; <i32> [#uses=1] %tmp5 = tail call i32 (ptr, ...) @printf( ptr @.str, i32 %tmp23 ) ; <i32> [#uses=0] ret i32 0 diff --git a/llvm/test/CodeGen/ARM/2009-07-18-RewriterBug.ll b/llvm/test/CodeGen/ARM/2009-07-18-RewriterBug.ll index 5fc3f6e..9029f08 100644 --- a/llvm/test/CodeGen/ARM/2009-07-18-RewriterBug.ll +++ b/llvm/test/CodeGen/ARM/2009-07-18-RewriterBug.ll @@ -1,95 +1,95 @@ ; RUN: llc -mtriple armv6-apple-darwin10 -mattr=+vfp2 -filetype asm -o - %s | FileCheck %s -%struct.EDGE_PAIR = type { %struct.edge_rec*, %struct.edge_rec* } +%struct.EDGE_PAIR = type { ptr, ptr } %struct.VEC2 = type { double, double, double } -%struct.VERTEX = type { %struct.VEC2, %struct.VERTEX*, %struct.VERTEX* } -%struct.edge_rec = type { %struct.VERTEX*, %struct.edge_rec*, i32, i8* } -@avail_edge = internal global %struct.edge_rec* null +%struct.VERTEX = type { %struct.VEC2, ptr, ptr } +%struct.edge_rec = type { ptr, ptr, i32, ptr } +@avail_edge = internal global ptr null @_2E_str7 = internal constant [21 x i8] c"ERROR: Only 1 point!\00", section "__TEXT,__cstring,cstring_literals", align 1 -@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%struct.EDGE_PAIR*, %struct.VERTEX*, %struct.VERTEX*)* @build_delaunay to i8*)], section "llvm.metadata" +@llvm.used = appending global [1 x ptr] [ptr @build_delaunay], section "llvm.metadata" -define void @build_delaunay(%struct.EDGE_PAIR* noalias nocapture sret(%struct.EDGE_PAIR) %agg.result, %struct.VERTEX* %tree, %struct.VERTEX* %extra) nounwind { +define void @build_delaunay(ptr noalias nocapture sret(%struct.EDGE_PAIR) %agg.result, ptr %tree, ptr %extra) nounwind { entry: %delright = alloca %struct.EDGE_PAIR, align 8 %delleft = alloca %struct.EDGE_PAIR, align 8 - %0 = icmp eq %struct.VERTEX* %tree, null + %0 = icmp eq ptr %tree, null br i1 %0, label %bb8, label %bb bb: - %1 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree, i32 0, i32 2 - %2 = load %struct.VERTEX*, %struct.VERTEX** %1, align 4 - %3 = icmp eq %struct.VERTEX* %2, null + %1 = getelementptr %struct.VERTEX, ptr %tree, i32 0, i32 2 + %2 = load ptr, ptr %1, align 4 + %3 = icmp eq ptr %2, null br i1 %3, label %bb7, label %bb1.i bb1.i: - %tree_addr.0.i = phi %struct.VERTEX* [ %5, %bb1.i ], [ %tree, %bb ] - %4 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree_addr.0.i, i32 0, i32 1 - %5 = load %struct.VERTEX*, %struct.VERTEX** %4, align 4 - %6 = icmp eq %struct.VERTEX* %5, null + %tree_addr.0.i = phi ptr [ %5, %bb1.i ], [ %tree, %bb ] + %4 = getelementptr %struct.VERTEX, ptr %tree_addr.0.i, i32 0, i32 1 + %5 = load ptr, ptr %4, align 4 + %6 = icmp eq ptr %5, null br i1 %6, label %get_low.exit, label %bb1.i get_low.exit: - call void @build_delaunay(%struct.EDGE_PAIR* noalias sret(%struct.EDGE_PAIR) %delright, %struct.VERTEX* %2, %struct.VERTEX* %extra) nounwind - %7 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree, i32 0, i32 1 - %8 = load %struct.VERTEX*, %struct.VERTEX** %7, align 4 - call void @build_delaunay(%struct.EDGE_PAIR* noalias sret(%struct.EDGE_PAIR) %delleft, %struct.VERTEX* %8, %struct.VERTEX* %tree) nounwind - %9 = getelementptr %struct.EDGE_PAIR, %struct.EDGE_PAIR* %delleft, i32 0, i32 0 - %10 = load %struct.edge_rec*, %struct.edge_rec** %9, align 8 - %11 = getelementptr %struct.EDGE_PAIR, %struct.EDGE_PAIR* %delleft, i32 0, i32 1 - %12 = load %struct.edge_rec*, %struct.edge_rec** %11, align 4 - %13 = getelementptr %struct.EDGE_PAIR, %struct.EDGE_PAIR* %delright, i32 0, i32 0 - %14 = load %struct.edge_rec*, %struct.edge_rec** %13, align 8 - %15 = getelementptr %struct.EDGE_PAIR, %struct.EDGE_PAIR* %delright, i32 0, i32 1 - %16 = load %struct.edge_rec*, %struct.edge_rec** %15, align 4 + call void @build_delaunay(ptr noalias sret(%struct.EDGE_PAIR) %delright, ptr %2, ptr %extra) nounwind + %7 = getelementptr %struct.VERTEX, ptr %tree, i32 0, i32 1 + %8 = load ptr, ptr %7, align 4 + call void @build_delaunay(ptr noalias sret(%struct.EDGE_PAIR) %delleft, ptr %8, ptr %tree) nounwind + %9 = getelementptr %struct.EDGE_PAIR, ptr %delleft, i32 0, i32 0 + %10 = load ptr, ptr %9, align 8 + %11 = getelementptr %struct.EDGE_PAIR, ptr %delleft, i32 0, i32 1 + %12 = load ptr, ptr %11, align 4 + %13 = getelementptr %struct.EDGE_PAIR, ptr %delright, i32 0, i32 0 + %14 = load ptr, ptr %13, align 8 + %15 = getelementptr %struct.EDGE_PAIR, ptr %delright, i32 0, i32 1 + %16 = load ptr, ptr %15, align 4 br label %bb.i bb.i: - %rdi_addr.0.i = phi %struct.edge_rec* [ %14, %get_low.exit ], [ %72, %bb4.i ] - %ldi_addr.1.i = phi %struct.edge_rec* [ %12, %get_low.exit ], [ %ldi_addr.0.i, %bb4.i ] - %17 = getelementptr %struct.edge_rec, %struct.edge_rec* %rdi_addr.0.i, i32 0, i32 0 - %18 = load %struct.VERTEX*, %struct.VERTEX** %17, align 4 - %19 = ptrtoint %struct.edge_rec* %ldi_addr.1.i to i32 - %20 = getelementptr %struct.VERTEX, %struct.VERTEX* %18, i32 0, i32 0, i32 0 - %21 = load double, double* %20, align 4 - %22 = getelementptr %struct.VERTEX, %struct.VERTEX* %18, i32 0, i32 0, i32 1 - %23 = load double, double* %22, align 4 + %rdi_addr.0.i = phi ptr [ %14, %get_low.exit ], [ %72, %bb4.i ] + %ldi_addr.1.i = phi ptr [ %12, %get_low.exit ], [ %ldi_addr.0.i, %bb4.i ] + %17 = getelementptr %struct.edge_rec, ptr %rdi_addr.0.i, i32 0, i32 0 + %18 = load ptr, ptr %17, align 4 + %19 = ptrtoint ptr %ldi_addr.1.i to i32 + %20 = getelementptr %struct.VERTEX, ptr %18, i32 0, i32 0, i32 0 + %21 = load double, ptr %20, align 4 + %22 = getelementptr %struct.VERTEX, ptr %18, i32 0, i32 0, i32 1 + %23 = load double, ptr %22, align 4 br label %bb2.i bb1.i1: - %24 = ptrtoint %struct.edge_rec* %ldi_addr.0.i to i32 + %24 = ptrtoint ptr %ldi_addr.0.i to i32 %25 = add i32 %24, 48 %26 = and i32 %25, 63 %27 = and i32 %24, -64 %28 = or i32 %26, %27 - %29 = inttoptr i32 %28 to %struct.edge_rec* - %30 = getelementptr %struct.edge_rec, %struct.edge_rec* %29, i32 0, i32 1 - %31 = load %struct.edge_rec*, %struct.edge_rec** %30, align 4 - %32 = ptrtoint %struct.edge_rec* %31 to i32 + %29 = inttoptr i32 %28 to ptr + %30 = getelementptr %struct.edge_rec, ptr %29, i32 0, i32 1 + %31 = load ptr, ptr %30, align 4 + %32 = ptrtoint ptr %31 to i32 %33 = add i32 %32, 16 %34 = and i32 %33, 63 %35 = and i32 %32, -64 %36 = or i32 %34, %35 - %37 = inttoptr i32 %36 to %struct.edge_rec* + %37 = inttoptr i32 %36 to ptr br label %bb2.i bb2.i: - %ldi_addr.1.pn.i = phi %struct.edge_rec* [ %ldi_addr.1.i, %bb.i ], [ %37, %bb1.i1 ] + %ldi_addr.1.pn.i = phi ptr [ %ldi_addr.1.i, %bb.i ], [ %37, %bb1.i1 ] %.pn6.in.in.i = phi i32 [ %19, %bb.i ], [ %36, %bb1.i1 ] - %ldi_addr.0.i = phi %struct.edge_rec* [ %ldi_addr.1.i, %bb.i ], [ %37, %bb1.i1 ] + %ldi_addr.0.i = phi ptr [ %ldi_addr.1.i, %bb.i ], [ %37, %bb1.i1 ] %.pn6.in.i = xor i32 %.pn6.in.in.i, 32 - %.pn6.i = inttoptr i32 %.pn6.in.i to %struct.edge_rec* - %t1.0.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %ldi_addr.1.pn.i, i32 0, i32 0 - %t2.0.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %.pn6.i, i32 0, i32 0 - %t1.0.i = load %struct.VERTEX*, %struct.VERTEX** %t1.0.in.i - %t2.0.i = load %struct.VERTEX*, %struct.VERTEX** %t2.0.in.i - %38 = getelementptr %struct.VERTEX, %struct.VERTEX* %t1.0.i, i32 0, i32 0, i32 0 - %39 = load double, double* %38, align 4 - %40 = getelementptr %struct.VERTEX, %struct.VERTEX* %t1.0.i, i32 0, i32 0, i32 1 - %41 = load double, double* %40, align 4 - %42 = getelementptr %struct.VERTEX, %struct.VERTEX* %t2.0.i, i32 0, i32 0, i32 0 - %43 = load double, double* %42, align 4 - %44 = getelementptr %struct.VERTEX, %struct.VERTEX* %t2.0.i, i32 0, i32 0, i32 1 - %45 = load double, double* %44, align 4 + %.pn6.i = inttoptr i32 %.pn6.in.i to ptr + %t1.0.in.i = getelementptr %struct.edge_rec, ptr %ldi_addr.1.pn.i, i32 0, i32 0 + %t2.0.in.i = getelementptr %struct.edge_rec, ptr %.pn6.i, i32 0, i32 0 + %t1.0.i = load ptr, ptr %t1.0.in.i + %t2.0.i = load ptr, ptr %t2.0.in.i + %38 = getelementptr %struct.VERTEX, ptr %t1.0.i, i32 0, i32 0, i32 0 + %39 = load double, ptr %38, align 4 + %40 = getelementptr %struct.VERTEX, ptr %t1.0.i, i32 0, i32 0, i32 1 + %41 = load double, ptr %40, align 4 + %42 = getelementptr %struct.VERTEX, ptr %t2.0.i, i32 0, i32 0, i32 0 + %43 = load double, ptr %42, align 4 + %44 = getelementptr %struct.VERTEX, ptr %t2.0.i, i32 0, i32 0, i32 1 + %45 = load double, ptr %44, align 4 %46 = fsub double %39, %21 %47 = fsub double %45, %23 %48 = fmul double %46, %47 @@ -101,15 +101,15 @@ bb2.i: br i1 %53, label %bb1.i1, label %bb3.i bb3.i: - %54 = ptrtoint %struct.edge_rec* %rdi_addr.0.i to i32 + %54 = ptrtoint ptr %rdi_addr.0.i to i32 %55 = xor i32 %54, 32 - %56 = inttoptr i32 %55 to %struct.edge_rec* - %57 = getelementptr %struct.edge_rec, %struct.edge_rec* %56, i32 0, i32 0 - %58 = load %struct.VERTEX*, %struct.VERTEX** %57, align 4 - %59 = getelementptr %struct.VERTEX, %struct.VERTEX* %58, i32 0, i32 0, i32 0 - %60 = load double, double* %59, align 4 - %61 = getelementptr %struct.VERTEX, %struct.VERTEX* %58, i32 0, i32 0, i32 1 - %62 = load double, double* %61, align 4 + %56 = inttoptr i32 %55 to ptr + %57 = getelementptr %struct.edge_rec, ptr %56, i32 0, i32 0 + %58 = load ptr, ptr %57, align 4 + %59 = getelementptr %struct.VERTEX, ptr %58, i32 0, i32 0, i32 0 + %60 = load double, ptr %59, align 4 + %61 = getelementptr %struct.VERTEX, ptr %58, i32 0, i32 0, i32 1 + %62 = load double, ptr %61, align 4 %63 = fsub double %60, %39 %64 = fsub double %23, %41 %65 = fmul double %63, %64 @@ -121,8 +121,8 @@ bb3.i: br i1 %70, label %bb4.i, label %bb5.i bb4.i: - %71 = getelementptr %struct.edge_rec, %struct.edge_rec* %56, i32 0, i32 1 - %72 = load %struct.edge_rec*, %struct.edge_rec** %71, align 4 + %71 = getelementptr %struct.edge_rec, ptr %56, i32 0, i32 1 + %72 = load ptr, ptr %71, align 4 br label %bb.i bb5.i: @@ -130,145 +130,145 @@ bb5.i: %74 = and i32 %73, 63 %75 = and i32 %55, -64 %76 = or i32 %74, %75 - %77 = inttoptr i32 %76 to %struct.edge_rec* - %78 = getelementptr %struct.edge_rec, %struct.edge_rec* %77, i32 0, i32 1 - %79 = load %struct.edge_rec*, %struct.edge_rec** %78, align 4 - %80 = ptrtoint %struct.edge_rec* %79 to i32 + %77 = inttoptr i32 %76 to ptr + %78 = getelementptr %struct.edge_rec, ptr %77, i32 0, i32 1 + %79 = load ptr, ptr %78, align 4 + %80 = ptrtoint ptr %79 to i32 %81 = add i32 %80, 16 %82 = and i32 %81, 63 %83 = and i32 %80, -64 %84 = or i32 %82, %83 - %85 = inttoptr i32 %84 to %struct.edge_rec* - %86 = getelementptr %struct.edge_rec, %struct.edge_rec* %ldi_addr.0.i, i32 0, i32 0 - %87 = load %struct.VERTEX*, %struct.VERTEX** %86, align 4 - %88 = call %struct.edge_rec* @alloc_edge() nounwind - %89 = getelementptr %struct.edge_rec, %struct.edge_rec* %88, i32 0, i32 1 - store %struct.edge_rec* %88, %struct.edge_rec** %89, align 4 - %90 = getelementptr %struct.edge_rec, %struct.edge_rec* %88, i32 0, i32 0 - store %struct.VERTEX* %18, %struct.VERTEX** %90, align 4 - %91 = ptrtoint %struct.edge_rec* %88 to i32 + %85 = inttoptr i32 %84 to ptr + %86 = getelementptr %struct.edge_rec, ptr %ldi_addr.0.i, i32 0, i32 0 + %87 = load ptr, ptr %86, align 4 + %88 = call ptr @alloc_edge() nounwind + %89 = getelementptr %struct.edge_rec, ptr %88, i32 0, i32 1 + store ptr %88, ptr %89, align 4 + %90 = getelementptr %struct.edge_rec, ptr %88, i32 0, i32 0 + store ptr %18, ptr %90, align 4 + %91 = ptrtoint ptr %88 to i32 %92 = add i32 %91, 16 - %93 = inttoptr i32 %92 to %struct.edge_rec* + %93 = inttoptr i32 %92 to ptr %94 = add i32 %91, 48 - %95 = inttoptr i32 %94 to %struct.edge_rec* - %96 = getelementptr %struct.edge_rec, %struct.edge_rec* %93, i32 0, i32 1 - store %struct.edge_rec* %95, %struct.edge_rec** %96, align 4 + %95 = inttoptr i32 %94 to ptr + %96 = getelementptr %struct.edge_rec, ptr %93, i32 0, i32 1 + store ptr %95, ptr %96, align 4 %97 = add i32 %91, 32 - %98 = inttoptr i32 %97 to %struct.edge_rec* - %99 = getelementptr %struct.edge_rec, %struct.edge_rec* %98, i32 0, i32 1 - store %struct.edge_rec* %98, %struct.edge_rec** %99, align 4 - %100 = getelementptr %struct.edge_rec, %struct.edge_rec* %98, i32 0, i32 0 - store %struct.VERTEX* %87, %struct.VERTEX** %100, align 4 - %101 = getelementptr %struct.edge_rec, %struct.edge_rec* %95, i32 0, i32 1 - store %struct.edge_rec* %93, %struct.edge_rec** %101, align 4 - %102 = load %struct.edge_rec*, %struct.edge_rec** %89, align 4 - %103 = ptrtoint %struct.edge_rec* %102 to i32 + %98 = inttoptr i32 %97 to ptr + %99 = getelementptr %struct.edge_rec, ptr %98, i32 0, i32 1 + store ptr %98, ptr %99, align 4 + %100 = getelementptr %struct.edge_rec, ptr %98, i32 0, i32 0 + store ptr %87, ptr %100, align 4 + %101 = getelementptr %struct.edge_rec, ptr %95, i32 0, i32 1 + store ptr %93, ptr %101, align 4 + %102 = load ptr, ptr %89, align 4 + %103 = ptrtoint ptr %102 to i32 %104 = add i32 %103, 16 %105 = and i32 %104, 63 %106 = and i32 %103, -64 %107 = or i32 %105, %106 - %108 = inttoptr i32 %107 to %struct.edge_rec* - %109 = getelementptr %struct.edge_rec, %struct.edge_rec* %85, i32 0, i32 1 - %110 = load %struct.edge_rec*, %struct.edge_rec** %109, align 4 - %111 = ptrtoint %struct.edge_rec* %110 to i32 + %108 = inttoptr i32 %107 to ptr + %109 = getelementptr %struct.edge_rec, ptr %85, i32 0, i32 1 + %110 = load ptr, ptr %109, align 4 + %111 = ptrtoint ptr %110 to i32 %112 = add i32 %111, 16 %113 = and i32 %112, 63 %114 = and i32 %111, -64 %115 = or i32 %113, %114 - %116 = inttoptr i32 %115 to %struct.edge_rec* - %117 = getelementptr %struct.edge_rec, %struct.edge_rec* %116, i32 0, i32 1 - %118 = load %struct.edge_rec*, %struct.edge_rec** %117, align 4 - %119 = getelementptr %struct.edge_rec, %struct.edge_rec* %108, i32 0, i32 1 - %120 = load %struct.edge_rec*, %struct.edge_rec** %119, align 4 - store %struct.edge_rec* %118, %struct.edge_rec** %119, align 4 - store %struct.edge_rec* %120, %struct.edge_rec** %117, align 4 - %121 = load %struct.edge_rec*, %struct.edge_rec** %89, align 4 - %122 = load %struct.edge_rec*, %struct.edge_rec** %109, align 4 - store %struct.edge_rec* %121, %struct.edge_rec** %109, align 4 - store %struct.edge_rec* %122, %struct.edge_rec** %89, align 4 + %116 = inttoptr i32 %115 to ptr + %117 = getelementptr %struct.edge_rec, ptr %116, i32 0, i32 1 + %118 = load ptr, ptr %117, align 4 + %119 = getelementptr %struct.edge_rec, ptr %108, i32 0, i32 1 + %120 = load ptr, ptr %119, align 4 + store ptr %118, ptr %119, align 4 + store ptr %120, ptr %117, align 4 + %121 = load ptr, ptr %89, align 4 + %122 = load ptr, ptr %109, align 4 + store ptr %121, ptr %109, align 4 + store ptr %122, ptr %89, align 4 %123 = xor i32 %91, 32 - %124 = inttoptr i32 %123 to %struct.edge_rec* - %125 = getelementptr %struct.edge_rec, %struct.edge_rec* %124, i32 0, i32 1 - %126 = load %struct.edge_rec*, %struct.edge_rec** %125, align 4 - %127 = ptrtoint %struct.edge_rec* %126 to i32 + %124 = inttoptr i32 %123 to ptr + %125 = getelementptr %struct.edge_rec, ptr %124, i32 0, i32 1 + %126 = load ptr, ptr %125, align 4 + %127 = ptrtoint ptr %126 to i32 %128 = add i32 %127, 16 %129 = and i32 %128, 63 %130 = and i32 %127, -64 %131 = or i32 %129, %130 - %132 = inttoptr i32 %131 to %struct.edge_rec* - %133 = getelementptr %struct.edge_rec, %struct.edge_rec* %ldi_addr.0.i, i32 0, i32 1 - %134 = load %struct.edge_rec*, %struct.edge_rec** %133, align 4 - %135 = ptrtoint %struct.edge_rec* %134 to i32 + %132 = inttoptr i32 %131 to ptr + %133 = getelementptr %struct.edge_rec, ptr %ldi_addr.0.i, i32 0, i32 1 + %134 = load ptr, ptr %133, align 4 + %135 = ptrtoint ptr %134 to i32 %136 = add i32 %135, 16 %137 = and i32 %136, 63 %138 = and i32 %135, -64 %139 = or i32 %137, %138 - %140 = inttoptr i32 %139 to %struct.edge_rec* - %141 = getelementptr %struct.edge_rec, %struct.edge_rec* %140, i32 0, i32 1 - %142 = load %struct.edge_rec*, %struct.edge_rec** %141, align 4 - %143 = getelementptr %struct.edge_rec, %struct.edge_rec* %132, i32 0, i32 1 - %144 = load %struct.edge_rec*, %struct.edge_rec** %143, align 4 - store %struct.edge_rec* %142, %struct.edge_rec** %143, align 4 - store %struct.edge_rec* %144, %struct.edge_rec** %141, align 4 - %145 = load %struct.edge_rec*, %struct.edge_rec** %125, align 4 - %146 = load %struct.edge_rec*, %struct.edge_rec** %133, align 4 - store %struct.edge_rec* %145, %struct.edge_rec** %133, align 4 - store %struct.edge_rec* %146, %struct.edge_rec** %125, align 4 + %140 = inttoptr i32 %139 to ptr + %141 = getelementptr %struct.edge_rec, ptr %140, i32 0, i32 1 + %142 = load ptr, ptr %141, align 4 + %143 = getelementptr %struct.edge_rec, ptr %132, i32 0, i32 1 + %144 = load ptr, ptr %143, align 4 + store ptr %142, ptr %143, align 4 + store ptr %144, ptr %141, align 4 + %145 = load ptr, ptr %125, align 4 + %146 = load ptr, ptr %133, align 4 + store ptr %145, ptr %133, align 4 + store ptr %146, ptr %125, align 4 %147 = and i32 %92, 63 %148 = and i32 %91, -64 %149 = or i32 %147, %148 - %150 = inttoptr i32 %149 to %struct.edge_rec* - %151 = getelementptr %struct.edge_rec, %struct.edge_rec* %150, i32 0, i32 1 - %152 = load %struct.edge_rec*, %struct.edge_rec** %151, align 4 - %153 = ptrtoint %struct.edge_rec* %152 to i32 + %150 = inttoptr i32 %149 to ptr + %151 = getelementptr %struct.edge_rec, ptr %150, i32 0, i32 1 + %152 = load ptr, ptr %151, align 4 + %153 = ptrtoint ptr %152 to i32 %154 = add i32 %153, 16 %155 = and i32 %154, 63 %156 = and i32 %153, -64 %157 = or i32 %155, %156 - %158 = inttoptr i32 %157 to %struct.edge_rec* - %159 = load %struct.VERTEX*, %struct.VERTEX** %90, align 4 - %160 = getelementptr %struct.edge_rec, %struct.edge_rec* %124, i32 0, i32 0 - %161 = load %struct.VERTEX*, %struct.VERTEX** %160, align 4 - %162 = getelementptr %struct.edge_rec, %struct.edge_rec* %16, i32 0, i32 0 - %163 = load %struct.VERTEX*, %struct.VERTEX** %162, align 4 - %164 = icmp eq %struct.VERTEX* %163, %159 - %rdo_addr.0.i = select i1 %164, %struct.edge_rec* %88, %struct.edge_rec* %16 - %165 = getelementptr %struct.edge_rec, %struct.edge_rec* %10, i32 0, i32 0 - %166 = load %struct.VERTEX*, %struct.VERTEX** %165, align 4 - %167 = icmp eq %struct.VERTEX* %166, %161 - %ldo_addr.0.ph.i = select i1 %167, %struct.edge_rec* %124, %struct.edge_rec* %10 + %158 = inttoptr i32 %157 to ptr + %159 = load ptr, ptr %90, align 4 + %160 = getelementptr %struct.edge_rec, ptr %124, i32 0, i32 0 + %161 = load ptr, ptr %160, align 4 + %162 = getelementptr %struct.edge_rec, ptr %16, i32 0, i32 0 + %163 = load ptr, ptr %162, align 4 + %164 = icmp eq ptr %163, %159 + %rdo_addr.0.i = select i1 %164, ptr %88, ptr %16 + %165 = getelementptr %struct.edge_rec, ptr %10, i32 0, i32 0 + %166 = load ptr, ptr %165, align 4 + %167 = icmp eq ptr %166, %161 + %ldo_addr.0.ph.i = select i1 %167, ptr %124, ptr %10 br label %bb9.i bb9.i: - %lcand.2.i = phi %struct.edge_rec* [ %146, %bb5.i ], [ %lcand.1.i, %bb24.i ], [ %739, %bb25.i ] - %rcand.2.i = phi %struct.edge_rec* [ %158, %bb5.i ], [ %666, %bb24.i ], [ %rcand.1.i, %bb25.i ] - %basel.0.i = phi %struct.edge_rec* [ %88, %bb5.i ], [ %595, %bb24.i ], [ %716, %bb25.i ] - %168 = getelementptr %struct.edge_rec, %struct.edge_rec* %lcand.2.i, i32 0, i32 1 - %169 = load %struct.edge_rec*, %struct.edge_rec** %168, align 4 - %170 = getelementptr %struct.edge_rec, %struct.edge_rec* %basel.0.i, i32 0, i32 0 - %171 = load %struct.VERTEX*, %struct.VERTEX** %170, align 4 - %172 = ptrtoint %struct.edge_rec* %basel.0.i to i32 + %lcand.2.i = phi ptr [ %146, %bb5.i ], [ %lcand.1.i, %bb24.i ], [ %739, %bb25.i ] + %rcand.2.i = phi ptr [ %158, %bb5.i ], [ %666, %bb24.i ], [ %rcand.1.i, %bb25.i ] + %basel.0.i = phi ptr [ %88, %bb5.i ], [ %595, %bb24.i ], [ %716, %bb25.i ] + %168 = getelementptr %struct.edge_rec, ptr %lcand.2.i, i32 0, i32 1 + %169 = load ptr, ptr %168, align 4 + %170 = getelementptr %struct.edge_rec, ptr %basel.0.i, i32 0, i32 0 + %171 = load ptr, ptr %170, align 4 + %172 = ptrtoint ptr %basel.0.i to i32 %173 = xor i32 %172, 32 - %174 = inttoptr i32 %173 to %struct.edge_rec* - %175 = getelementptr %struct.edge_rec, %struct.edge_rec* %174, i32 0, i32 0 - %176 = load %struct.VERTEX*, %struct.VERTEX** %175, align 4 - %177 = ptrtoint %struct.edge_rec* %169 to i32 + %174 = inttoptr i32 %173 to ptr + %175 = getelementptr %struct.edge_rec, ptr %174, i32 0, i32 0 + %176 = load ptr, ptr %175, align 4 + %177 = ptrtoint ptr %169 to i32 %178 = xor i32 %177, 32 - %179 = inttoptr i32 %178 to %struct.edge_rec* - %180 = getelementptr %struct.edge_rec, %struct.edge_rec* %179, i32 0, i32 0 - %181 = load %struct.VERTEX*, %struct.VERTEX** %180, align 4 - %182 = getelementptr %struct.VERTEX, %struct.VERTEX* %171, i32 0, i32 0, i32 0 - %183 = load double, double* %182, align 4 - %184 = getelementptr %struct.VERTEX, %struct.VERTEX* %171, i32 0, i32 0, i32 1 - %185 = load double, double* %184, align 4 - %186 = getelementptr %struct.VERTEX, %struct.VERTEX* %181, i32 0, i32 0, i32 0 - %187 = load double, double* %186, align 4 - %188 = getelementptr %struct.VERTEX, %struct.VERTEX* %181, i32 0, i32 0, i32 1 - %189 = load double, double* %188, align 4 - %190 = getelementptr %struct.VERTEX, %struct.VERTEX* %176, i32 0, i32 0, i32 0 - %191 = load double, double* %190, align 4 - %192 = getelementptr %struct.VERTEX, %struct.VERTEX* %176, i32 0, i32 0, i32 1 - %193 = load double, double* %192, align 4 + %179 = inttoptr i32 %178 to ptr + %180 = getelementptr %struct.edge_rec, ptr %179, i32 0, i32 0 + %181 = load ptr, ptr %180, align 4 + %182 = getelementptr %struct.VERTEX, ptr %171, i32 0, i32 0, i32 0 + %183 = load double, ptr %182, align 4 + %184 = getelementptr %struct.VERTEX, ptr %171, i32 0, i32 0, i32 1 + %185 = load double, ptr %184, align 4 + %186 = getelementptr %struct.VERTEX, ptr %181, i32 0, i32 0, i32 0 + %187 = load double, ptr %186, align 4 + %188 = getelementptr %struct.VERTEX, ptr %181, i32 0, i32 0, i32 1 + %189 = load double, ptr %188, align 4 + %190 = getelementptr %struct.VERTEX, ptr %176, i32 0, i32 0, i32 0 + %191 = load double, ptr %190, align 4 + %192 = getelementptr %struct.VERTEX, ptr %176, i32 0, i32 0, i32 1 + %193 = load double, ptr %192, align 4 %194 = fsub double %183, %191 %195 = fsub double %189, %193 %196 = fmul double %194, %195 @@ -280,146 +280,146 @@ bb9.i: br i1 %201, label %bb10.i, label %bb13.i bb10.i: - %202 = getelementptr %struct.VERTEX, %struct.VERTEX* %171, i32 0, i32 0, i32 2 - %avail_edge.promoted25 = load %struct.edge_rec*, %struct.edge_rec** @avail_edge + %202 = getelementptr %struct.VERTEX, ptr %171, i32 0, i32 0, i32 2 + %avail_edge.promoted25 = load ptr, ptr @avail_edge br label %bb12.i bb11.i: - %203 = ptrtoint %struct.edge_rec* %lcand.0.i to i32 + %203 = ptrtoint ptr %lcand.0.i to i32 %204 = add i32 %203, 16 %205 = and i32 %204, 63 %206 = and i32 %203, -64 %207 = or i32 %205, %206 - %208 = inttoptr i32 %207 to %struct.edge_rec* - %209 = getelementptr %struct.edge_rec, %struct.edge_rec* %208, i32 0, i32 1 - %210 = load %struct.edge_rec*, %struct.edge_rec** %209, align 4 - %211 = ptrtoint %struct.edge_rec* %210 to i32 + %208 = inttoptr i32 %207 to ptr + %209 = getelementptr %struct.edge_rec, ptr %208, i32 0, i32 1 + %210 = load ptr, ptr %209, align 4 + %211 = ptrtoint ptr %210 to i32 %212 = add i32 %211, 16 %213 = and i32 %212, 63 %214 = and i32 %211, -64 %215 = or i32 %213, %214 - %216 = inttoptr i32 %215 to %struct.edge_rec* - %217 = getelementptr %struct.edge_rec, %struct.edge_rec* %lcand.0.i, i32 0, i32 1 - %218 = load %struct.edge_rec*, %struct.edge_rec** %217, align 4 - %219 = ptrtoint %struct.edge_rec* %218 to i32 + %216 = inttoptr i32 %215 to ptr + %217 = getelementptr %struct.edge_rec, ptr %lcand.0.i, i32 0, i32 1 + %218 = load ptr, ptr %217, align 4 + %219 = ptrtoint ptr %218 to i32 %220 = add i32 %219, 16 %221 = and i32 %220, 63 %222 = and i32 %219, -64 %223 = or i32 %221, %222 - %224 = inttoptr i32 %223 to %struct.edge_rec* - %225 = getelementptr %struct.edge_rec, %struct.edge_rec* %216, i32 0, i32 1 - %226 = load %struct.edge_rec*, %struct.edge_rec** %225, align 4 - %227 = ptrtoint %struct.edge_rec* %226 to i32 + %224 = inttoptr i32 %223 to ptr + %225 = getelementptr %struct.edge_rec, ptr %216, i32 0, i32 1 + %226 = load ptr, ptr %225, align 4 + %227 = ptrtoint ptr %226 to i32 %228 = add i32 %227, 16 %229 = and i32 %228, 63 %230 = and i32 %227, -64 %231 = or i32 %229, %230 - %232 = inttoptr i32 %231 to %struct.edge_rec* - %233 = getelementptr %struct.edge_rec, %struct.edge_rec* %232, i32 0, i32 1 - %234 = load %struct.edge_rec*, %struct.edge_rec** %233, align 4 - %235 = getelementptr %struct.edge_rec, %struct.edge_rec* %224, i32 0, i32 1 - %236 = load %struct.edge_rec*, %struct.edge_rec** %235, align 4 - store %struct.edge_rec* %234, %struct.edge_rec** %235, align 4 - store %struct.edge_rec* %236, %struct.edge_rec** %233, align 4 - %237 = load %struct.edge_rec*, %struct.edge_rec** %217, align 4 - %238 = load %struct.edge_rec*, %struct.edge_rec** %225, align 4 - store %struct.edge_rec* %237, %struct.edge_rec** %225, align 4 - store %struct.edge_rec* %238, %struct.edge_rec** %217, align 4 + %232 = inttoptr i32 %231 to ptr + %233 = getelementptr %struct.edge_rec, ptr %232, i32 0, i32 1 + %234 = load ptr, ptr %233, align 4 + %235 = getelementptr %struct.edge_rec, ptr %224, i32 0, i32 1 + %236 = load ptr, ptr %235, align 4 + store ptr %234, ptr %235, align 4 + store ptr %236, ptr %233, align 4 + %237 = load ptr, ptr %217, align 4 + %238 = load ptr, ptr %225, align 4 + store ptr %237, ptr %225, align 4 + store ptr %238, ptr %217, align 4 %239 = xor i32 %203, 32 %240 = add i32 %239, 16 %241 = and i32 %240, 63 %242 = or i32 %241, %206 - %243 = inttoptr i32 %242 to %struct.edge_rec* - %244 = getelementptr %struct.edge_rec, %struct.edge_rec* %243, i32 0, i32 1 - %245 = load %struct.edge_rec*, %struct.edge_rec** %244, align 4 - %246 = ptrtoint %struct.edge_rec* %245 to i32 + %243 = inttoptr i32 %242 to ptr + %244 = getelementptr %struct.edge_rec, ptr %243, i32 0, i32 1 + %245 = load ptr, ptr %244, align 4 + %246 = ptrtoint ptr %245 to i32 %247 = add i32 %246, 16 %248 = and i32 %247, 63 %249 = and i32 %246, -64 %250 = or i32 %248, %249 - %251 = inttoptr i32 %250 to %struct.edge_rec* - %252 = inttoptr i32 %239 to %struct.edge_rec* - %253 = getelementptr %struct.edge_rec, %struct.edge_rec* %252, i32 0, i32 1 - %254 = load %struct.edge_rec*, %struct.edge_rec** %253, align 4 - %255 = ptrtoint %struct.edge_rec* %254 to i32 + %251 = inttoptr i32 %250 to ptr + %252 = inttoptr i32 %239 to ptr + %253 = getelementptr %struct.edge_rec, ptr %252, i32 0, i32 1 + %254 = load ptr, ptr %253, align 4 + %255 = ptrtoint ptr %254 to i32 %256 = add i32 %255, 16 %257 = and i32 %256, 63 %258 = and i32 %255, -64 %259 = or i32 %257, %258 - %260 = inttoptr i32 %259 to %struct.edge_rec* - %261 = getelementptr %struct.edge_rec, %struct.edge_rec* %251, i32 0, i32 1 - %262 = load %struct.edge_rec*, %struct.edge_rec** %261, align 4 - %263 = ptrtoint %struct.edge_rec* %262 to i32 + %260 = inttoptr i32 %259 to ptr + %261 = getelementptr %struct.edge_rec, ptr %251, i32 0, i32 1 + %262 = load ptr, ptr %261, align 4 + %263 = ptrtoint ptr %262 to i32 %264 = add i32 %263, 16 %265 = and i32 %264, 63 %266 = and i32 %263, -64 %267 = or i32 %265, %266 - %268 = inttoptr i32 %267 to %struct.edge_rec* - %269 = getelementptr %struct.edge_rec, %struct.edge_rec* %268, i32 0, i32 1 - %270 = load %struct.edge_rec*, %struct.edge_rec** %269, align 4 - %271 = getelementptr %struct.edge_rec, %struct.edge_rec* %260, i32 0, i32 1 - %272 = load %struct.edge_rec*, %struct.edge_rec** %271, align 4 - store %struct.edge_rec* %270, %struct.edge_rec** %271, align 4 - store %struct.edge_rec* %272, %struct.edge_rec** %269, align 4 - %273 = load %struct.edge_rec*, %struct.edge_rec** %253, align 4 - %274 = load %struct.edge_rec*, %struct.edge_rec** %261, align 4 - store %struct.edge_rec* %273, %struct.edge_rec** %261, align 4 - store %struct.edge_rec* %274, %struct.edge_rec** %253, align 4 - %275 = inttoptr i32 %206 to %struct.edge_rec* - %276 = getelementptr %struct.edge_rec, %struct.edge_rec* %275, i32 0, i32 1 - store %struct.edge_rec* %avail_edge.tmp.026, %struct.edge_rec** %276, align 4 - %277 = getelementptr %struct.edge_rec, %struct.edge_rec* %t.0.i, i32 0, i32 1 - %278 = load %struct.edge_rec*, %struct.edge_rec** %277, align 4 - %.pre.i = load double, double* %182, align 4 - %.pre22.i = load double, double* %184, align 4 + %268 = inttoptr i32 %267 to ptr + %269 = getelementptr %struct.edge_rec, ptr %268, i32 0, i32 1 + %270 = load ptr, ptr %269, align 4 + %271 = getelementptr %struct.edge_rec, ptr %260, i32 0, i32 1 + %272 = load ptr, ptr %271, align 4 + store ptr %270, ptr %271, align 4 + store ptr %272, ptr %269, align 4 + %273 = load ptr, ptr %253, align 4 + %274 = load ptr, ptr %261, align 4 + store ptr %273, ptr %261, align 4 + store ptr %274, ptr %253, align 4 + %275 = inttoptr i32 %206 to ptr + %276 = getelementptr %struct.edge_rec, ptr %275, i32 0, i32 1 + store ptr %avail_edge.tmp.026, ptr %276, align 4 + %277 = getelementptr %struct.edge_rec, ptr %t.0.i, i32 0, i32 1 + %278 = load ptr, ptr %277, align 4 + %.pre.i = load double, ptr %182, align 4 + %.pre22.i = load double, ptr %184, align 4 br label %bb12.i bb12.i: - %avail_edge.tmp.026 = phi %struct.edge_rec* [ %avail_edge.promoted25, %bb10.i ], [ %275, %bb11.i ] + %avail_edge.tmp.026 = phi ptr [ %avail_edge.promoted25, %bb10.i ], [ %275, %bb11.i ] %279 = phi double [ %.pre22.i, %bb11.i ], [ %185, %bb10.i ] %280 = phi double [ %.pre.i, %bb11.i ], [ %183, %bb10.i ] - %lcand.0.i = phi %struct.edge_rec* [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ] - %t.0.i = phi %struct.edge_rec* [ %169, %bb10.i ], [ %278, %bb11.i ] - %.pn5.in.in.in.i = phi %struct.edge_rec* [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ] - %.pn4.in.in.in.i = phi %struct.edge_rec* [ %169, %bb10.i ], [ %278, %bb11.i ] - %lcand.2.pn.i = phi %struct.edge_rec* [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ] - %.pn5.in.in.i = ptrtoint %struct.edge_rec* %.pn5.in.in.in.i to i32 - %.pn4.in.in.i = ptrtoint %struct.edge_rec* %.pn4.in.in.in.i to i32 + %lcand.0.i = phi ptr [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ] + %t.0.i = phi ptr [ %169, %bb10.i ], [ %278, %bb11.i ] + %.pn5.in.in.in.i = phi ptr [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ] + %.pn4.in.in.in.i = phi ptr [ %169, %bb10.i ], [ %278, %bb11.i ] + %lcand.2.pn.i = phi ptr [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ] + %.pn5.in.in.i = ptrtoint ptr %.pn5.in.in.in.i to i32 + %.pn4.in.in.i = ptrtoint ptr %.pn4.in.in.in.i to i32 %.pn5.in.i = xor i32 %.pn5.in.in.i, 32 %.pn4.in.i = xor i32 %.pn4.in.in.i, 32 - %.pn5.i = inttoptr i32 %.pn5.in.i to %struct.edge_rec* - %.pn4.i = inttoptr i32 %.pn4.in.i to %struct.edge_rec* - %v1.0.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %.pn5.i, i32 0, i32 0 - %v2.0.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %.pn4.i, i32 0, i32 0 - %v3.0.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %lcand.2.pn.i, i32 0, i32 0 - %v1.0.i = load %struct.VERTEX*, %struct.VERTEX** %v1.0.in.i - %v2.0.i = load %struct.VERTEX*, %struct.VERTEX** %v2.0.in.i - %v3.0.i = load %struct.VERTEX*, %struct.VERTEX** %v3.0.in.i - %281 = load double, double* %202, align 4 - %282 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 0 - %283 = load double, double* %282, align 4 + %.pn5.i = inttoptr i32 %.pn5.in.i to ptr + %.pn4.i = inttoptr i32 %.pn4.in.i to ptr + %v1.0.in.i = getelementptr %struct.edge_rec, ptr %.pn5.i, i32 0, i32 0 + %v2.0.in.i = getelementptr %struct.edge_rec, ptr %.pn4.i, i32 0, i32 0 + %v3.0.in.i = getelementptr %struct.edge_rec, ptr %lcand.2.pn.i, i32 0, i32 0 + %v1.0.i = load ptr, ptr %v1.0.in.i + %v2.0.i = load ptr, ptr %v2.0.in.i + %v3.0.i = load ptr, ptr %v3.0.in.i + %281 = load double, ptr %202, align 4 + %282 = getelementptr %struct.VERTEX, ptr %v1.0.i, i32 0, i32 0, i32 0 + %283 = load double, ptr %282, align 4 %284 = fsub double %283, %280 - %285 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 1 - %286 = load double, double* %285, align 4 + %285 = getelementptr %struct.VERTEX, ptr %v1.0.i, i32 0, i32 0, i32 1 + %286 = load double, ptr %285, align 4 %287 = fsub double %286, %279 - %288 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 2 - %289 = load double, double* %288, align 4 - %290 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 0 - %291 = load double, double* %290, align 4 + %288 = getelementptr %struct.VERTEX, ptr %v1.0.i, i32 0, i32 0, i32 2 + %289 = load double, ptr %288, align 4 + %290 = getelementptr %struct.VERTEX, ptr %v2.0.i, i32 0, i32 0, i32 0 + %291 = load double, ptr %290, align 4 %292 = fsub double %291, %280 - %293 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 1 - %294 = load double, double* %293, align 4 + %293 = getelementptr %struct.VERTEX, ptr %v2.0.i, i32 0, i32 0, i32 1 + %294 = load double, ptr %293, align 4 %295 = fsub double %294, %279 - %296 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 2 - %297 = load double, double* %296, align 4 - %298 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 0 - %299 = load double, double* %298, align 4 + %296 = getelementptr %struct.VERTEX, ptr %v2.0.i, i32 0, i32 0, i32 2 + %297 = load double, ptr %296, align 4 + %298 = getelementptr %struct.VERTEX, ptr %v3.0.i, i32 0, i32 0, i32 0 + %299 = load double, ptr %298, align 4 %300 = fsub double %299, %280 - %301 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 1 - %302 = load double, double* %301, align 4 + %301 = getelementptr %struct.VERTEX, ptr %v3.0.i, i32 0, i32 0, i32 1 + %302 = load double, ptr %301, align 4 %303 = fsub double %302, %279 - %304 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 2 - %305 = load double, double* %304, align 4 + %304 = getelementptr %struct.VERTEX, ptr %v3.0.i, i32 0, i32 0, i32 2 + %305 = load double, ptr %304, align 4 %306 = fsub double %289, %281 %307 = fmul double %292, %303 %308 = fmul double %295, %300 @@ -441,44 +441,44 @@ bb12.i: br i1 %323, label %bb11.i, label %bb13.loopexit.i bb13.loopexit.i: - store %struct.edge_rec* %avail_edge.tmp.026, %struct.edge_rec** @avail_edge - %.pre23.i = load %struct.VERTEX*, %struct.VERTEX** %170, align 4 - %.pre24.i = load %struct.VERTEX*, %struct.VERTEX** %175, align 4 + store ptr %avail_edge.tmp.026, ptr @avail_edge + %.pre23.i = load ptr, ptr %170, align 4 + %.pre24.i = load ptr, ptr %175, align 4 br label %bb13.i bb13.i: - %324 = phi %struct.VERTEX* [ %.pre24.i, %bb13.loopexit.i ], [ %176, %bb9.i ] - %325 = phi %struct.VERTEX* [ %.pre23.i, %bb13.loopexit.i ], [ %171, %bb9.i ] - %lcand.1.i = phi %struct.edge_rec* [ %lcand.0.i, %bb13.loopexit.i ], [ %lcand.2.i, %bb9.i ] - %326 = ptrtoint %struct.edge_rec* %rcand.2.i to i32 + %324 = phi ptr [ %.pre24.i, %bb13.loopexit.i ], [ %176, %bb9.i ] + %325 = phi ptr [ %.pre23.i, %bb13.loopexit.i ], [ %171, %bb9.i ] + %lcand.1.i = phi ptr [ %lcand.0.i, %bb13.loopexit.i ], [ %lcand.2.i, %bb9.i ] + %326 = ptrtoint ptr %rcand.2.i to i32 %327 = add i32 %326, 16 %328 = and i32 %327, 63 %329 = and i32 %326, -64 %330 = or i32 %328, %329 - %331 = inttoptr i32 %330 to %struct.edge_rec* - %332 = getelementptr %struct.edge_rec, %struct.edge_rec* %331, i32 0, i32 1 - %333 = load %struct.edge_rec*, %struct.edge_rec** %332, align 4 - %334 = ptrtoint %struct.edge_rec* %333 to i32 + %331 = inttoptr i32 %330 to ptr + %332 = getelementptr %struct.edge_rec, ptr %331, i32 0, i32 1 + %333 = load ptr, ptr %332, align 4 + %334 = ptrtoint ptr %333 to i32 %335 = add i32 %334, 16 %336 = and i32 %335, 63 %337 = and i32 %334, -64 %338 = or i32 %336, %337 %339 = xor i32 %338, 32 - %340 = inttoptr i32 %339 to %struct.edge_rec* - %341 = getelementptr %struct.edge_rec, %struct.edge_rec* %340, i32 0, i32 0 - %342 = load %struct.VERTEX*, %struct.VERTEX** %341, align 4 - %343 = getelementptr %struct.VERTEX, %struct.VERTEX* %325, i32 0, i32 0, i32 0 - %344 = load double, double* %343, align 4 - %345 = getelementptr %struct.VERTEX, %struct.VERTEX* %325, i32 0, i32 0, i32 1 - %346 = load double, double* %345, align 4 - %347 = getelementptr %struct.VERTEX, %struct.VERTEX* %342, i32 0, i32 0, i32 0 - %348 = load double, double* %347, align 4 - %349 = getelementptr %struct.VERTEX, %struct.VERTEX* %342, i32 0, i32 0, i32 1 - %350 = load double, double* %349, align 4 - %351 = getelementptr %struct.VERTEX, %struct.VERTEX* %324, i32 0, i32 0, i32 0 - %352 = load double, double* %351, align 4 - %353 = getelementptr %struct.VERTEX, %struct.VERTEX* %324, i32 0, i32 0, i32 1 - %354 = load double, double* %353, align 4 + %340 = inttoptr i32 %339 to ptr + %341 = getelementptr %struct.edge_rec, ptr %340, i32 0, i32 0 + %342 = load ptr, ptr %341, align 4 + %343 = getelementptr %struct.VERTEX, ptr %325, i32 0, i32 0, i32 0 + %344 = load double, ptr %343, align 4 + %345 = getelementptr %struct.VERTEX, ptr %325, i32 0, i32 0, i32 1 + %346 = load double, ptr %345, align 4 + %347 = getelementptr %struct.VERTEX, ptr %342, i32 0, i32 0, i32 0 + %348 = load double, ptr %347, align 4 + %349 = getelementptr %struct.VERTEX, ptr %342, i32 0, i32 0, i32 1 + %350 = load double, ptr %349, align 4 + %351 = getelementptr %struct.VERTEX, ptr %324, i32 0, i32 0, i32 0 + %352 = load double, ptr %351, align 4 + %353 = getelementptr %struct.VERTEX, ptr %324, i32 0, i32 0, i32 1 + %354 = load double, ptr %353, align 4 %355 = fsub double %344, %352 %356 = fsub double %350, %354 %357 = fmul double %355, %356 @@ -490,156 +490,156 @@ bb13.i: br i1 %362, label %bb14.i, label %bb17.i bb14.i: - %363 = getelementptr %struct.VERTEX, %struct.VERTEX* %324, i32 0, i32 0, i32 2 - %avail_edge.promoted = load %struct.edge_rec*, %struct.edge_rec** @avail_edge + %363 = getelementptr %struct.VERTEX, ptr %324, i32 0, i32 0, i32 2 + %avail_edge.promoted = load ptr, ptr @avail_edge br label %bb16.i bb15.i: - %364 = ptrtoint %struct.edge_rec* %rcand.0.i to i32 + %364 = ptrtoint ptr %rcand.0.i to i32 %365 = add i32 %364, 16 %366 = and i32 %365, 63 %367 = and i32 %364, -64 %368 = or i32 %366, %367 - %369 = inttoptr i32 %368 to %struct.edge_rec* - %370 = getelementptr %struct.edge_rec, %struct.edge_rec* %369, i32 0, i32 1 - %371 = load %struct.edge_rec*, %struct.edge_rec** %370, align 4 - %372 = ptrtoint %struct.edge_rec* %371 to i32 + %369 = inttoptr i32 %368 to ptr + %370 = getelementptr %struct.edge_rec, ptr %369, i32 0, i32 1 + %371 = load ptr, ptr %370, align 4 + %372 = ptrtoint ptr %371 to i32 %373 = add i32 %372, 16 %374 = and i32 %373, 63 %375 = and i32 %372, -64 %376 = or i32 %374, %375 - %377 = inttoptr i32 %376 to %struct.edge_rec* - %378 = getelementptr %struct.edge_rec, %struct.edge_rec* %rcand.0.i, i32 0, i32 1 - %379 = load %struct.edge_rec*, %struct.edge_rec** %378, align 4 - %380 = ptrtoint %struct.edge_rec* %379 to i32 + %377 = inttoptr i32 %376 to ptr + %378 = getelementptr %struct.edge_rec, ptr %rcand.0.i, i32 0, i32 1 + %379 = load ptr, ptr %378, align 4 + %380 = ptrtoint ptr %379 to i32 %381 = add i32 %380, 16 %382 = and i32 %381, 63 %383 = and i32 %380, -64 %384 = or i32 %382, %383 - %385 = inttoptr i32 %384 to %struct.edge_rec* - %386 = getelementptr %struct.edge_rec, %struct.edge_rec* %377, i32 0, i32 1 - %387 = load %struct.edge_rec*, %struct.edge_rec** %386, align 4 - %388 = ptrtoint %struct.edge_rec* %387 to i32 + %385 = inttoptr i32 %384 to ptr + %386 = getelementptr %struct.edge_rec, ptr %377, i32 0, i32 1 + %387 = load ptr, ptr %386, align 4 + %388 = ptrtoint ptr %387 to i32 %389 = add i32 %388, 16 %390 = and i32 %389, 63 %391 = and i32 %388, -64 %392 = or i32 %390, %391 - %393 = inttoptr i32 %392 to %struct.edge_rec* - %394 = getelementptr %struct.edge_rec, %struct.edge_rec* %393, i32 0, i32 1 - %395 = load %struct.edge_rec*, %struct.edge_rec** %394, align 4 - %396 = getelementptr %struct.edge_rec, %struct.edge_rec* %385, i32 0, i32 1 - %397 = load %struct.edge_rec*, %struct.edge_rec** %396, align 4 - store %struct.edge_rec* %395, %struct.edge_rec** %396, align 4 - store %struct.edge_rec* %397, %struct.edge_rec** %394, align 4 - %398 = load %struct.edge_rec*, %struct.edge_rec** %378, align 4 - %399 = load %struct.edge_rec*, %struct.edge_rec** %386, align 4 - store %struct.edge_rec* %398, %struct.edge_rec** %386, align 4 - store %struct.edge_rec* %399, %struct.edge_rec** %378, align 4 + %393 = inttoptr i32 %392 to ptr + %394 = getelementptr %struct.edge_rec, ptr %393, i32 0, i32 1 + %395 = load ptr, ptr %394, align 4 + %396 = getelementptr %struct.edge_rec, ptr %385, i32 0, i32 1 + %397 = load ptr, ptr %396, align 4 + store ptr %395, ptr %396, align 4 + store ptr %397, ptr %394, align 4 + %398 = load ptr, ptr %378, align 4 + %399 = load ptr, ptr %386, align 4 + store ptr %398, ptr %386, align 4 + store ptr %399, ptr %378, align 4 %400 = xor i32 %364, 32 %401 = add i32 %400, 16 %402 = and i32 %401, 63 %403 = or i32 %402, %367 - %404 = inttoptr i32 %403 to %struct.edge_rec* - %405 = getelementptr %struct.edge_rec, %struct.edge_rec* %404, i32 0, i32 1 - %406 = load %struct.edge_rec*, %struct.edge_rec** %405, align 4 - %407 = ptrtoint %struct.edge_rec* %406 to i32 + %404 = inttoptr i32 %403 to ptr + %405 = getelementptr %struct.edge_rec, ptr %404, i32 0, i32 1 + %406 = load ptr, ptr %405, align 4 + %407 = ptrtoint ptr %406 to i32 %408 = add i32 %407, 16 %409 = and i32 %408, 63 %410 = and i32 %407, -64 %411 = or i32 %409, %410 - %412 = inttoptr i32 %411 to %struct.edge_rec* - %413 = inttoptr i32 %400 to %struct.edge_rec* - %414 = getelementptr %struct.edge_rec, %struct.edge_rec* %413, i32 0, i32 1 - %415 = load %struct.edge_rec*, %struct.edge_rec** %414, align 4 - %416 = ptrtoint %struct.edge_rec* %415 to i32 + %412 = inttoptr i32 %411 to ptr + %413 = inttoptr i32 %400 to ptr + %414 = getelementptr %struct.edge_rec, ptr %413, i32 0, i32 1 + %415 = load ptr, ptr %414, align 4 + %416 = ptrtoint ptr %415 to i32 %417 = add i32 %416, 16 %418 = and i32 %417, 63 %419 = and i32 %416, -64 %420 = or i32 %418, %419 - %421 = inttoptr i32 %420 to %struct.edge_rec* - %422 = getelementptr %struct.edge_rec, %struct.edge_rec* %412, i32 0, i32 1 - %423 = load %struct.edge_rec*, %struct.edge_rec** %422, align 4 - %424 = ptrtoint %struct.edge_rec* %423 to i32 + %421 = inttoptr i32 %420 to ptr + %422 = getelementptr %struct.edge_rec, ptr %412, i32 0, i32 1 + %423 = load ptr, ptr %422, align 4 + %424 = ptrtoint ptr %423 to i32 %425 = add i32 %424, 16 %426 = and i32 %425, 63 %427 = and i32 %424, -64 %428 = or i32 %426, %427 - %429 = inttoptr i32 %428 to %struct.edge_rec* - %430 = getelementptr %struct.edge_rec, %struct.edge_rec* %429, i32 0, i32 1 - %431 = load %struct.edge_rec*, %struct.edge_rec** %430, align 4 - %432 = getelementptr %struct.edge_rec, %struct.edge_rec* %421, i32 0, i32 1 - %433 = load %struct.edge_rec*, %struct.edge_rec** %432, align 4 - store %struct.edge_rec* %431, %struct.edge_rec** %432, align 4 - store %struct.edge_rec* %433, %struct.edge_rec** %430, align 4 - %434 = load %struct.edge_rec*, %struct.edge_rec** %414, align 4 - %435 = load %struct.edge_rec*, %struct.edge_rec** %422, align 4 - store %struct.edge_rec* %434, %struct.edge_rec** %422, align 4 - store %struct.edge_rec* %435, %struct.edge_rec** %414, align 4 - %436 = inttoptr i32 %367 to %struct.edge_rec* - %437 = getelementptr %struct.edge_rec, %struct.edge_rec* %436, i32 0, i32 1 - store %struct.edge_rec* %avail_edge.tmp.0, %struct.edge_rec** %437, align 4 + %429 = inttoptr i32 %428 to ptr + %430 = getelementptr %struct.edge_rec, ptr %429, i32 0, i32 1 + %431 = load ptr, ptr %430, align 4 + %432 = getelementptr %struct.edge_rec, ptr %421, i32 0, i32 1 + %433 = load ptr, ptr %432, align 4 + store ptr %431, ptr %432, align 4 + store ptr %433, ptr %430, align 4 + %434 = load ptr, ptr %414, align 4 + %435 = load ptr, ptr %422, align 4 + store ptr %434, ptr %422, align 4 + store ptr %435, ptr %414, align 4 + %436 = inttoptr i32 %367 to ptr + %437 = getelementptr %struct.edge_rec, ptr %436, i32 0, i32 1 + store ptr %avail_edge.tmp.0, ptr %437, align 4 %438 = add i32 %t.1.in.i, 16 %439 = and i32 %438, 63 %440 = and i32 %t.1.in.i, -64 %441 = or i32 %439, %440 - %442 = inttoptr i32 %441 to %struct.edge_rec* - %443 = getelementptr %struct.edge_rec, %struct.edge_rec* %442, i32 0, i32 1 - %444 = load %struct.edge_rec*, %struct.edge_rec** %443, align 4 - %445 = ptrtoint %struct.edge_rec* %444 to i32 + %442 = inttoptr i32 %441 to ptr + %443 = getelementptr %struct.edge_rec, ptr %442, i32 0, i32 1 + %444 = load ptr, ptr %443, align 4 + %445 = ptrtoint ptr %444 to i32 %446 = add i32 %445, 16 %447 = and i32 %446, 63 %448 = and i32 %445, -64 %449 = or i32 %447, %448 - %.pre25.i = load double, double* %351, align 4 - %.pre26.i = load double, double* %353, align 4 + %.pre25.i = load double, ptr %351, align 4 + %.pre26.i = load double, ptr %353, align 4 br label %bb16.i bb16.i: - %avail_edge.tmp.0 = phi %struct.edge_rec* [ %avail_edge.promoted, %bb14.i ], [ %436, %bb15.i ] + %avail_edge.tmp.0 = phi ptr [ %avail_edge.promoted, %bb14.i ], [ %436, %bb15.i ] %450 = phi double [ %.pre26.i, %bb15.i ], [ %354, %bb14.i ] %451 = phi double [ %.pre25.i, %bb15.i ], [ %352, %bb14.i ] - %rcand.0.i = phi %struct.edge_rec* [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ] + %rcand.0.i = phi ptr [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ] %t.1.in.i = phi i32 [ %338, %bb14.i ], [ %449, %bb15.i ] %.pn3.in.in.i = phi i32 [ %338, %bb14.i ], [ %449, %bb15.i ] - %.pn.in.in.in.i = phi %struct.edge_rec* [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ] - %rcand.2.pn.i = phi %struct.edge_rec* [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ] - %t.1.i = inttoptr i32 %t.1.in.i to %struct.edge_rec* - %.pn.in.in.i = ptrtoint %struct.edge_rec* %.pn.in.in.in.i to i32 + %.pn.in.in.in.i = phi ptr [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ] + %rcand.2.pn.i = phi ptr [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ] + %t.1.i = inttoptr i32 %t.1.in.i to ptr + %.pn.in.in.i = ptrtoint ptr %.pn.in.in.in.i to i32 %.pn3.in.i = xor i32 %.pn3.in.in.i, 32 %.pn.in.i = xor i32 %.pn.in.in.i, 32 - %.pn3.i = inttoptr i32 %.pn3.in.i to %struct.edge_rec* - %.pn.i = inttoptr i32 %.pn.in.i to %struct.edge_rec* - %v1.1.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %.pn3.i, i32 0, i32 0 - %v2.1.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %.pn.i, i32 0, i32 0 - %v3.1.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %rcand.2.pn.i, i32 0, i32 0 - %v1.1.i = load %struct.VERTEX*, %struct.VERTEX** %v1.1.in.i - %v2.1.i = load %struct.VERTEX*, %struct.VERTEX** %v2.1.in.i - %v3.1.i = load %struct.VERTEX*, %struct.VERTEX** %v3.1.in.i - %452 = load double, double* %363, align 4 - %453 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 0 - %454 = load double, double* %453, align 4 + %.pn3.i = inttoptr i32 %.pn3.in.i to ptr + %.pn.i = inttoptr i32 %.pn.in.i to ptr + %v1.1.in.i = getelementptr %struct.edge_rec, ptr %.pn3.i, i32 0, i32 0 + %v2.1.in.i = getelementptr %struct.edge_rec, ptr %.pn.i, i32 0, i32 0 + %v3.1.in.i = getelementptr %struct.edge_rec, ptr %rcand.2.pn.i, i32 0, i32 0 + %v1.1.i = load ptr, ptr %v1.1.in.i + %v2.1.i = load ptr, ptr %v2.1.in.i + %v3.1.i = load ptr, ptr %v3.1.in.i + %452 = load double, ptr %363, align 4 + %453 = getelementptr %struct.VERTEX, ptr %v1.1.i, i32 0, i32 0, i32 0 + %454 = load double, ptr %453, align 4 %455 = fsub double %454, %451 - %456 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 1 - %457 = load double, double* %456, align 4 + %456 = getelementptr %struct.VERTEX, ptr %v1.1.i, i32 0, i32 0, i32 1 + %457 = load double, ptr %456, align 4 %458 = fsub double %457, %450 - %459 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 2 - %460 = load double, double* %459, align 4 - %461 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 0 - %462 = load double, double* %461, align 4 + %459 = getelementptr %struct.VERTEX, ptr %v1.1.i, i32 0, i32 0, i32 2 + %460 = load double, ptr %459, align 4 + %461 = getelementptr %struct.VERTEX, ptr %v2.1.i, i32 0, i32 0, i32 0 + %462 = load double, ptr %461, align 4 %463 = fsub double %462, %451 - %464 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 1 - %465 = load double, double* %464, align 4 + %464 = getelementptr %struct.VERTEX, ptr %v2.1.i, i32 0, i32 0, i32 1 + %465 = load double, ptr %464, align 4 %466 = fsub double %465, %450 - %467 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 2 - %468 = load double, double* %467, align 4 - %469 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 0 - %470 = load double, double* %469, align 4 + %467 = getelementptr %struct.VERTEX, ptr %v2.1.i, i32 0, i32 0, i32 2 + %468 = load double, ptr %467, align 4 + %469 = getelementptr %struct.VERTEX, ptr %v3.1.i, i32 0, i32 0, i32 0 + %470 = load double, ptr %469, align 4 %471 = fsub double %470, %451 - %472 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 1 - %473 = load double, double* %472, align 4 + %472 = getelementptr %struct.VERTEX, ptr %v3.1.i, i32 0, i32 0, i32 1 + %473 = load double, ptr %472, align 4 %474 = fsub double %473, %450 - %475 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 2 - %476 = load double, double* %475, align 4 + %475 = getelementptr %struct.VERTEX, ptr %v3.1.i, i32 0, i32 0, i32 2 + %476 = load double, ptr %475, align 4 %477 = fsub double %460, %452 %478 = fmul double %463, %474 %479 = fmul double %466, %471 @@ -661,32 +661,32 @@ bb16.i: br i1 %494, label %bb15.i, label %bb17.loopexit.i bb17.loopexit.i: - store %struct.edge_rec* %avail_edge.tmp.0, %struct.edge_rec** @avail_edge - %.pre27.i = load %struct.VERTEX*, %struct.VERTEX** %170, align 4 - %.pre28.i = load %struct.VERTEX*, %struct.VERTEX** %175, align 4 + store ptr %avail_edge.tmp.0, ptr @avail_edge + %.pre27.i = load ptr, ptr %170, align 4 + %.pre28.i = load ptr, ptr %175, align 4 br label %bb17.i bb17.i: - %495 = phi %struct.VERTEX* [ %.pre28.i, %bb17.loopexit.i ], [ %324, %bb13.i ] - %496 = phi %struct.VERTEX* [ %.pre27.i, %bb17.loopexit.i ], [ %325, %bb13.i ] - %rcand.1.i = phi %struct.edge_rec* [ %rcand.0.i, %bb17.loopexit.i ], [ %rcand.2.i, %bb13.i ] - %497 = ptrtoint %struct.edge_rec* %lcand.1.i to i32 + %495 = phi ptr [ %.pre28.i, %bb17.loopexit.i ], [ %324, %bb13.i ] + %496 = phi ptr [ %.pre27.i, %bb17.loopexit.i ], [ %325, %bb13.i ] + %rcand.1.i = phi ptr [ %rcand.0.i, %bb17.loopexit.i ], [ %rcand.2.i, %bb13.i ] + %497 = ptrtoint ptr %lcand.1.i to i32 %498 = xor i32 %497, 32 - %499 = inttoptr i32 %498 to %struct.edge_rec* - %500 = getelementptr %struct.edge_rec, %struct.edge_rec* %499, i32 0, i32 0 - %501 = load %struct.VERTEX*, %struct.VERTEX** %500, align 4 - %502 = getelementptr %struct.VERTEX, %struct.VERTEX* %496, i32 0, i32 0, i32 0 - %503 = load double, double* %502, align 4 - %504 = getelementptr %struct.VERTEX, %struct.VERTEX* %496, i32 0, i32 0, i32 1 - %505 = load double, double* %504, align 4 - %506 = getelementptr %struct.VERTEX, %struct.VERTEX* %501, i32 0, i32 0, i32 0 - %507 = load double, double* %506, align 4 - %508 = getelementptr %struct.VERTEX, %struct.VERTEX* %501, i32 0, i32 0, i32 1 - %509 = load double, double* %508, align 4 - %510 = getelementptr %struct.VERTEX, %struct.VERTEX* %495, i32 0, i32 0, i32 0 - %511 = load double, double* %510, align 4 - %512 = getelementptr %struct.VERTEX, %struct.VERTEX* %495, i32 0, i32 0, i32 1 - %513 = load double, double* %512, align 4 + %499 = inttoptr i32 %498 to ptr + %500 = getelementptr %struct.edge_rec, ptr %499, i32 0, i32 0 + %501 = load ptr, ptr %500, align 4 + %502 = getelementptr %struct.VERTEX, ptr %496, i32 0, i32 0, i32 0 + %503 = load double, ptr %502, align 4 + %504 = getelementptr %struct.VERTEX, ptr %496, i32 0, i32 0, i32 1 + %505 = load double, ptr %504, align 4 + %506 = getelementptr %struct.VERTEX, ptr %501, i32 0, i32 0, i32 0 + %507 = load double, ptr %506, align 4 + %508 = getelementptr %struct.VERTEX, ptr %501, i32 0, i32 0, i32 1 + %509 = load double, ptr %508, align 4 + %510 = getelementptr %struct.VERTEX, ptr %495, i32 0, i32 0, i32 0 + %511 = load double, ptr %510, align 4 + %512 = getelementptr %struct.VERTEX, ptr %495, i32 0, i32 0, i32 1 + %513 = load double, ptr %512, align 4 %514 = fsub double %503, %511 %515 = fsub double %509, %513 %516 = fmul double %514, %515 @@ -695,15 +695,15 @@ bb17.i: %519 = fmul double %517, %518 %520 = fsub double %516, %519 %521 = fcmp ogt double %520, 0.000000e+00 - %522 = ptrtoint %struct.edge_rec* %rcand.1.i to i32 + %522 = ptrtoint ptr %rcand.1.i to i32 %523 = xor i32 %522, 32 - %524 = inttoptr i32 %523 to %struct.edge_rec* - %525 = getelementptr %struct.edge_rec, %struct.edge_rec* %524, i32 0, i32 0 - %526 = load %struct.VERTEX*, %struct.VERTEX** %525, align 4 - %527 = getelementptr %struct.VERTEX, %struct.VERTEX* %526, i32 0, i32 0, i32 0 - %528 = load double, double* %527, align 4 - %529 = getelementptr %struct.VERTEX, %struct.VERTEX* %526, i32 0, i32 0, i32 1 - %530 = load double, double* %529, align 4 + %524 = inttoptr i32 %523 to ptr + %525 = getelementptr %struct.edge_rec, ptr %524, i32 0, i32 0 + %526 = load ptr, ptr %525, align 4 + %527 = getelementptr %struct.VERTEX, ptr %526, i32 0, i32 0, i32 0 + %528 = load double, ptr %527, align 4 + %529 = getelementptr %struct.VERTEX, ptr %526, i32 0, i32 0, i32 1 + %530 = load double, ptr %529, align 4 %531 = fsub double %530, %513 %532 = fmul double %514, %531 %533 = fsub double %528, %511 @@ -714,38 +714,38 @@ bb17.i: br i1 %537, label %bb21.i, label %do_merge.exit bb21.i: - %538 = getelementptr %struct.edge_rec, %struct.edge_rec* %lcand.1.i, i32 0, i32 0 - %539 = load %struct.VERTEX*, %struct.VERTEX** %538, align 4 - %540 = getelementptr %struct.edge_rec, %struct.edge_rec* %rcand.1.i, i32 0, i32 0 - %541 = load %struct.VERTEX*, %struct.VERTEX** %540, align 4 + %538 = getelementptr %struct.edge_rec, ptr %lcand.1.i, i32 0, i32 0 + %539 = load ptr, ptr %538, align 4 + %540 = getelementptr %struct.edge_rec, ptr %rcand.1.i, i32 0, i32 0 + %541 = load ptr, ptr %540, align 4 br i1 %521, label %bb22.i, label %bb24.i bb22.i: br i1 %536, label %bb23.i, label %bb25.i bb23.i: - %542 = getelementptr %struct.VERTEX, %struct.VERTEX* %526, i32 0, i32 0, i32 2 - %543 = load double, double* %542, align 4 + %542 = getelementptr %struct.VERTEX, ptr %526, i32 0, i32 0, i32 2 + %543 = load double, ptr %542, align 4 %544 = fsub double %507, %528 %545 = fsub double %509, %530 - %546 = getelementptr %struct.VERTEX, %struct.VERTEX* %501, i32 0, i32 0, i32 2 - %547 = load double, double* %546, align 4 - %548 = getelementptr %struct.VERTEX, %struct.VERTEX* %539, i32 0, i32 0, i32 0 - %549 = load double, double* %548, align 4 + %546 = getelementptr %struct.VERTEX, ptr %501, i32 0, i32 0, i32 2 + %547 = load double, ptr %546, align 4 + %548 = getelementptr %struct.VERTEX, ptr %539, i32 0, i32 0, i32 0 + %549 = load double, ptr %548, align 4 %550 = fsub double %549, %528 - %551 = getelementptr %struct.VERTEX, %struct.VERTEX* %539, i32 0, i32 0, i32 1 - %552 = load double, double* %551, align 4 + %551 = getelementptr %struct.VERTEX, ptr %539, i32 0, i32 0, i32 1 + %552 = load double, ptr %551, align 4 %553 = fsub double %552, %530 - %554 = getelementptr %struct.VERTEX, %struct.VERTEX* %539, i32 0, i32 0, i32 2 - %555 = load double, double* %554, align 4 - %556 = getelementptr %struct.VERTEX, %struct.VERTEX* %541, i32 0, i32 0, i32 0 - %557 = load double, double* %556, align 4 + %554 = getelementptr %struct.VERTEX, ptr %539, i32 0, i32 0, i32 2 + %555 = load double, ptr %554, align 4 + %556 = getelementptr %struct.VERTEX, ptr %541, i32 0, i32 0, i32 0 + %557 = load double, ptr %556, align 4 %558 = fsub double %557, %528 - %559 = getelementptr %struct.VERTEX, %struct.VERTEX* %541, i32 0, i32 0, i32 1 - %560 = load double, double* %559, align 4 + %559 = getelementptr %struct.VERTEX, ptr %541, i32 0, i32 0, i32 1 + %560 = load double, ptr %559, align 4 %561 = fsub double %560, %530 - %562 = getelementptr %struct.VERTEX, %struct.VERTEX* %541, i32 0, i32 0, i32 2 - %563 = load double, double* %562, align 4 + %562 = getelementptr %struct.VERTEX, ptr %541, i32 0, i32 0, i32 2 + %563 = load double, ptr %562, align 4 %564 = fsub double %547, %543 %565 = fmul double %550, %561 %566 = fmul double %553, %558 @@ -771,101 +771,101 @@ bb24.i: %583 = and i32 %582, 63 %584 = and i32 %522, -64 %585 = or i32 %583, %584 - %586 = inttoptr i32 %585 to %struct.edge_rec* - %587 = getelementptr %struct.edge_rec, %struct.edge_rec* %586, i32 0, i32 1 - %588 = load %struct.edge_rec*, %struct.edge_rec** %587, align 4 - %589 = ptrtoint %struct.edge_rec* %588 to i32 + %586 = inttoptr i32 %585 to ptr + %587 = getelementptr %struct.edge_rec, ptr %586, i32 0, i32 1 + %588 = load ptr, ptr %587, align 4 + %589 = ptrtoint ptr %588 to i32 %590 = add i32 %589, 16 %591 = and i32 %590, 63 %592 = and i32 %589, -64 %593 = or i32 %591, %592 - %594 = inttoptr i32 %593 to %struct.edge_rec* - %595 = call %struct.edge_rec* @alloc_edge() nounwind - %596 = getelementptr %struct.edge_rec, %struct.edge_rec* %595, i32 0, i32 1 - store %struct.edge_rec* %595, %struct.edge_rec** %596, align 4 - %597 = getelementptr %struct.edge_rec, %struct.edge_rec* %595, i32 0, i32 0 - store %struct.VERTEX* %526, %struct.VERTEX** %597, align 4 - %598 = ptrtoint %struct.edge_rec* %595 to i32 + %594 = inttoptr i32 %593 to ptr + %595 = call ptr @alloc_edge() nounwind + %596 = getelementptr %struct.edge_rec, ptr %595, i32 0, i32 1 + store ptr %595, ptr %596, align 4 + %597 = getelementptr %struct.edge_rec, ptr %595, i32 0, i32 0 + store ptr %526, ptr %597, align 4 + %598 = ptrtoint ptr %595 to i32 %599 = add i32 %598, 16 - %600 = inttoptr i32 %599 to %struct.edge_rec* + %600 = inttoptr i32 %599 to ptr %601 = add i32 %598, 48 - %602 = inttoptr i32 %601 to %struct.edge_rec* - %603 = getelementptr %struct.edge_rec, %struct.edge_rec* %600, i32 0, i32 1 - store %struct.edge_rec* %602, %struct.edge_rec** %603, align 4 + %602 = inttoptr i32 %601 to ptr + %603 = getelementptr %struct.edge_rec, ptr %600, i32 0, i32 1 + store ptr %602, ptr %603, align 4 %604 = add i32 %598, 32 - %605 = inttoptr i32 %604 to %struct.edge_rec* - %606 = getelementptr %struct.edge_rec, %struct.edge_rec* %605, i32 0, i32 1 - store %struct.edge_rec* %605, %struct.edge_rec** %606, align 4 - %607 = getelementptr %struct.edge_rec, %struct.edge_rec* %605, i32 0, i32 0 - store %struct.VERTEX* %495, %struct.VERTEX** %607, align 4 - %608 = getelementptr %struct.edge_rec, %struct.edge_rec* %602, i32 0, i32 1 - store %struct.edge_rec* %600, %struct.edge_rec** %608, align 4 - %609 = load %struct.edge_rec*, %struct.edge_rec** %596, align 4 - %610 = ptrtoint %struct.edge_rec* %609 to i32 + %605 = inttoptr i32 %604 to ptr + %606 = getelementptr %struct.edge_rec, ptr %605, i32 0, i32 1 + store ptr %605, ptr %606, align 4 + %607 = getelementptr %struct.edge_rec, ptr %605, i32 0, i32 0 + store ptr %495, ptr %607, align 4 + %608 = getelementptr %struct.edge_rec, ptr %602, i32 0, i32 1 + store ptr %600, ptr %608, align 4 + %609 = load ptr, ptr %596, align 4 + %610 = ptrtoint ptr %609 to i32 %611 = add i32 %610, 16 %612 = and i32 %611, 63 %613 = and i32 %610, -64 %614 = or i32 %612, %613 - %615 = inttoptr i32 %614 to %struct.edge_rec* - %616 = getelementptr %struct.edge_rec, %struct.edge_rec* %594, i32 0, i32 1 - %617 = load %struct.edge_rec*, %struct.edge_rec** %616, align 4 - %618 = ptrtoint %struct.edge_rec* %617 to i32 + %615 = inttoptr i32 %614 to ptr + %616 = getelementptr %struct.edge_rec, ptr %594, i32 0, i32 1 + %617 = load ptr, ptr %616, align 4 + %618 = ptrtoint ptr %617 to i32 %619 = add i32 %618, 16 %620 = and i32 %619, 63 %621 = and i32 %618, -64 %622 = or i32 %620, %621 - %623 = inttoptr i32 %622 to %struct.edge_rec* - %624 = getelementptr %struct.edge_rec, %struct.edge_rec* %623, i32 0, i32 1 - %625 = load %struct.edge_rec*, %struct.edge_rec** %624, align 4 - %626 = getelementptr %struct.edge_rec, %struct.edge_rec* %615, i32 0, i32 1 - %627 = load %struct.edge_rec*, %struct.edge_rec** %626, align 4 - store %struct.edge_rec* %625, %struct.edge_rec** %626, align 4 - store %struct.edge_rec* %627, %struct.edge_rec** %624, align 4 - %628 = load %struct.edge_rec*, %struct.edge_rec** %596, align 4 - %629 = load %struct.edge_rec*, %struct.edge_rec** %616, align 4 - store %struct.edge_rec* %628, %struct.edge_rec** %616, align 4 - store %struct.edge_rec* %629, %struct.edge_rec** %596, align 4 + %623 = inttoptr i32 %622 to ptr + %624 = getelementptr %struct.edge_rec, ptr %623, i32 0, i32 1 + %625 = load ptr, ptr %624, align 4 + %626 = getelementptr %struct.edge_rec, ptr %615, i32 0, i32 1 + %627 = load ptr, ptr %626, align 4 + store ptr %625, ptr %626, align 4 + store ptr %627, ptr %624, align 4 + %628 = load ptr, ptr %596, align 4 + %629 = load ptr, ptr %616, align 4 + store ptr %628, ptr %616, align 4 + store ptr %629, ptr %596, align 4 %630 = xor i32 %598, 32 - %631 = inttoptr i32 %630 to %struct.edge_rec* - %632 = getelementptr %struct.edge_rec, %struct.edge_rec* %631, i32 0, i32 1 - %633 = load %struct.edge_rec*, %struct.edge_rec** %632, align 4 - %634 = ptrtoint %struct.edge_rec* %633 to i32 + %631 = inttoptr i32 %630 to ptr + %632 = getelementptr %struct.edge_rec, ptr %631, i32 0, i32 1 + %633 = load ptr, ptr %632, align 4 + %634 = ptrtoint ptr %633 to i32 %635 = add i32 %634, 16 %636 = and i32 %635, 63 %637 = and i32 %634, -64 %638 = or i32 %636, %637 - %639 = inttoptr i32 %638 to %struct.edge_rec* - %640 = getelementptr %struct.edge_rec, %struct.edge_rec* %174, i32 0, i32 1 - %641 = load %struct.edge_rec*, %struct.edge_rec** %640, align 4 - %642 = ptrtoint %struct.edge_rec* %641 to i32 + %639 = inttoptr i32 %638 to ptr + %640 = getelementptr %struct.edge_rec, ptr %174, i32 0, i32 1 + %641 = load ptr, ptr %640, align 4 + %642 = ptrtoint ptr %641 to i32 %643 = add i32 %642, 16 %644 = and i32 %643, 63 %645 = and i32 %642, -64 %646 = or i32 %644, %645 - %647 = inttoptr i32 %646 to %struct.edge_rec* - %648 = getelementptr %struct.edge_rec, %struct.edge_rec* %647, i32 0, i32 1 - %649 = load %struct.edge_rec*, %struct.edge_rec** %648, align 4 - %650 = getelementptr %struct.edge_rec, %struct.edge_rec* %639, i32 0, i32 1 - %651 = load %struct.edge_rec*, %struct.edge_rec** %650, align 4 - store %struct.edge_rec* %649, %struct.edge_rec** %650, align 4 - store %struct.edge_rec* %651, %struct.edge_rec** %648, align 4 - %652 = load %struct.edge_rec*, %struct.edge_rec** %632, align 4 - %653 = load %struct.edge_rec*, %struct.edge_rec** %640, align 4 - store %struct.edge_rec* %652, %struct.edge_rec** %640, align 4 - store %struct.edge_rec* %653, %struct.edge_rec** %632, align 4 + %647 = inttoptr i32 %646 to ptr + %648 = getelementptr %struct.edge_rec, ptr %647, i32 0, i32 1 + %649 = load ptr, ptr %648, align 4 + %650 = getelementptr %struct.edge_rec, ptr %639, i32 0, i32 1 + %651 = load ptr, ptr %650, align 4 + store ptr %649, ptr %650, align 4 + store ptr %651, ptr %648, align 4 + %652 = load ptr, ptr %632, align 4 + %653 = load ptr, ptr %640, align 4 + store ptr %652, ptr %640, align 4 + store ptr %653, ptr %632, align 4 %654 = add i32 %630, 48 %655 = and i32 %654, 63 %656 = and i32 %598, -64 %657 = or i32 %655, %656 - %658 = inttoptr i32 %657 to %struct.edge_rec* - %659 = getelementptr %struct.edge_rec, %struct.edge_rec* %658, i32 0, i32 1 - %660 = load %struct.edge_rec*, %struct.edge_rec** %659, align 4 - %661 = ptrtoint %struct.edge_rec* %660 to i32 + %658 = inttoptr i32 %657 to ptr + %659 = getelementptr %struct.edge_rec, ptr %658, i32 0, i32 1 + %660 = load ptr, ptr %659, align 4 + %661 = ptrtoint ptr %660 to i32 %662 = add i32 %661, 16 %663 = and i32 %662, 63 %664 = and i32 %661, -64 %665 = or i32 %663, %664 - %666 = inttoptr i32 %665 to %struct.edge_rec* + %666 = inttoptr i32 %665 to ptr br label %bb9.i bb25.i: @@ -873,334 +873,334 @@ bb25.i: %668 = and i32 %667, 63 %669 = and i32 %172, -64 %670 = or i32 %668, %669 - %671 = inttoptr i32 %670 to %struct.edge_rec* - %672 = getelementptr %struct.edge_rec, %struct.edge_rec* %671, i32 0, i32 1 - %673 = load %struct.edge_rec*, %struct.edge_rec** %672, align 4 - %674 = ptrtoint %struct.edge_rec* %673 to i32 + %671 = inttoptr i32 %670 to ptr + %672 = getelementptr %struct.edge_rec, ptr %671, i32 0, i32 1 + %673 = load ptr, ptr %672, align 4 + %674 = ptrtoint ptr %673 to i32 %675 = add i32 %674, 16 %676 = and i32 %675, 63 %677 = and i32 %674, -64 %678 = or i32 %676, %677 - %679 = inttoptr i32 %678 to %struct.edge_rec* - %680 = call %struct.edge_rec* @alloc_edge() nounwind - %681 = getelementptr %struct.edge_rec, %struct.edge_rec* %680, i32 0, i32 1 - store %struct.edge_rec* %680, %struct.edge_rec** %681, align 4 - %682 = getelementptr %struct.edge_rec, %struct.edge_rec* %680, i32 0, i32 0 - store %struct.VERTEX* %501, %struct.VERTEX** %682, align 4 - %683 = ptrtoint %struct.edge_rec* %680 to i32 + %679 = inttoptr i32 %678 to ptr + %680 = call ptr @alloc_edge() nounwind + %681 = getelementptr %struct.edge_rec, ptr %680, i32 0, i32 1 + store ptr %680, ptr %681, align 4 + %682 = getelementptr %struct.edge_rec, ptr %680, i32 0, i32 0 + store ptr %501, ptr %682, align 4 + %683 = ptrtoint ptr %680 to i32 %684 = add i32 %683, 16 - %685 = inttoptr i32 %684 to %struct.edge_rec* + %685 = inttoptr i32 %684 to ptr %686 = add i32 %683, 48 - %687 = inttoptr i32 %686 to %struct.edge_rec* - %688 = getelementptr %struct.edge_rec, %struct.edge_rec* %685, i32 0, i32 1 - store %struct.edge_rec* %687, %struct.edge_rec** %688, align 4 + %687 = inttoptr i32 %686 to ptr + %688 = getelementptr %struct.edge_rec, ptr %685, i32 0, i32 1 + store ptr %687, ptr %688, align 4 %689 = add i32 %683, 32 - %690 = inttoptr i32 %689 to %struct.edge_rec* - %691 = getelementptr %struct.edge_rec, %struct.edge_rec* %690, i32 0, i32 1 - store %struct.edge_rec* %690, %struct.edge_rec** %691, align 4 - %692 = getelementptr %struct.edge_rec, %struct.edge_rec* %690, i32 0, i32 0 - store %struct.VERTEX* %496, %struct.VERTEX** %692, align 4 - %693 = getelementptr %struct.edge_rec, %struct.edge_rec* %687, i32 0, i32 1 - store %struct.edge_rec* %685, %struct.edge_rec** %693, align 4 - %694 = load %struct.edge_rec*, %struct.edge_rec** %681, align 4 - %695 = ptrtoint %struct.edge_rec* %694 to i32 + %690 = inttoptr i32 %689 to ptr + %691 = getelementptr %struct.edge_rec, ptr %690, i32 0, i32 1 + store ptr %690, ptr %691, align 4 + %692 = getelementptr %struct.edge_rec, ptr %690, i32 0, i32 0 + store ptr %496, ptr %692, align 4 + %693 = getelementptr %struct.edge_rec, ptr %687, i32 0, i32 1 + store ptr %685, ptr %693, align 4 + %694 = load ptr, ptr %681, align 4 + %695 = ptrtoint ptr %694 to i32 %696 = add i32 %695, 16 %697 = and i32 %696, 63 %698 = and i32 %695, -64 %699 = or i32 %697, %698 - %700 = inttoptr i32 %699 to %struct.edge_rec* - %701 = getelementptr %struct.edge_rec, %struct.edge_rec* %499, i32 0, i32 1 - %702 = load %struct.edge_rec*, %struct.edge_rec** %701, align 4 - %703 = ptrtoint %struct.edge_rec* %702 to i32 + %700 = inttoptr i32 %699 to ptr + %701 = getelementptr %struct.edge_rec, ptr %499, i32 0, i32 1 + %702 = load ptr, ptr %701, align 4 + %703 = ptrtoint ptr %702 to i32 %704 = add i32 %703, 16 %705 = and i32 %704, 63 %706 = and i32 %703, -64 %707 = or i32 %705, %706 - %708 = inttoptr i32 %707 to %struct.edge_rec* - %709 = getelementptr %struct.edge_rec, %struct.edge_rec* %708, i32 0, i32 1 - %710 = load %struct.edge_rec*, %struct.edge_rec** %709, align 4 - %711 = getelementptr %struct.edge_rec, %struct.edge_rec* %700, i32 0, i32 1 - %712 = load %struct.edge_rec*, %struct.edge_rec** %711, align 4 - store %struct.edge_rec* %710, %struct.edge_rec** %711, align 4 - store %struct.edge_rec* %712, %struct.edge_rec** %709, align 4 - %713 = load %struct.edge_rec*, %struct.edge_rec** %681, align 4 - %714 = load %struct.edge_rec*, %struct.edge_rec** %701, align 4 - store %struct.edge_rec* %713, %struct.edge_rec** %701, align 4 - store %struct.edge_rec* %714, %struct.edge_rec** %681, align 4 + %708 = inttoptr i32 %707 to ptr + %709 = getelementptr %struct.edge_rec, ptr %708, i32 0, i32 1 + %710 = load ptr, ptr %709, align 4 + %711 = getelementptr %struct.edge_rec, ptr %700, i32 0, i32 1 + %712 = load ptr, ptr %711, align 4 + store ptr %710, ptr %711, align 4 + store ptr %712, ptr %709, align 4 + %713 = load ptr, ptr %681, align 4 + %714 = load ptr, ptr %701, align 4 + store ptr %713, ptr %701, align 4 + store ptr %714, ptr %681, align 4 %715 = xor i32 %683, 32 - %716 = inttoptr i32 %715 to %struct.edge_rec* - %717 = getelementptr %struct.edge_rec, %struct.edge_rec* %716, i32 0, i32 1 - %718 = load %struct.edge_rec*, %struct.edge_rec** %717, align 4 - %719 = ptrtoint %struct.edge_rec* %718 to i32 + %716 = inttoptr i32 %715 to ptr + %717 = getelementptr %struct.edge_rec, ptr %716, i32 0, i32 1 + %718 = load ptr, ptr %717, align 4 + %719 = ptrtoint ptr %718 to i32 %720 = add i32 %719, 16 %721 = and i32 %720, 63 %722 = and i32 %719, -64 %723 = or i32 %721, %722 - %724 = inttoptr i32 %723 to %struct.edge_rec* - %725 = getelementptr %struct.edge_rec, %struct.edge_rec* %679, i32 0, i32 1 - %726 = load %struct.edge_rec*, %struct.edge_rec** %725, align 4 - %727 = ptrtoint %struct.edge_rec* %726 to i32 + %724 = inttoptr i32 %723 to ptr + %725 = getelementptr %struct.edge_rec, ptr %679, i32 0, i32 1 + %726 = load ptr, ptr %725, align 4 + %727 = ptrtoint ptr %726 to i32 %728 = add i32 %727, 16 %729 = and i32 %728, 63 %730 = and i32 %727, -64 %731 = or i32 %729, %730 - %732 = inttoptr i32 %731 to %struct.edge_rec* - %733 = getelementptr %struct.edge_rec, %struct.edge_rec* %732, i32 0, i32 1 - %734 = load %struct.edge_rec*, %struct.edge_rec** %733, align 4 - %735 = getelementptr %struct.edge_rec, %struct.edge_rec* %724, i32 0, i32 1 - %736 = load %struct.edge_rec*, %struct.edge_rec** %735, align 4 - store %struct.edge_rec* %734, %struct.edge_rec** %735, align 4 - store %struct.edge_rec* %736, %struct.edge_rec** %733, align 4 - %737 = load %struct.edge_rec*, %struct.edge_rec** %717, align 4 - %738 = load %struct.edge_rec*, %struct.edge_rec** %725, align 4 - store %struct.edge_rec* %737, %struct.edge_rec** %725, align 4 - store %struct.edge_rec* %738, %struct.edge_rec** %717, align 4 - %739 = load %struct.edge_rec*, %struct.edge_rec** %681, align 4 + %732 = inttoptr i32 %731 to ptr + %733 = getelementptr %struct.edge_rec, ptr %732, i32 0, i32 1 + %734 = load ptr, ptr %733, align 4 + %735 = getelementptr %struct.edge_rec, ptr %724, i32 0, i32 1 + %736 = load ptr, ptr %735, align 4 + store ptr %734, ptr %735, align 4 + store ptr %736, ptr %733, align 4 + %737 = load ptr, ptr %717, align 4 + %738 = load ptr, ptr %725, align 4 + store ptr %737, ptr %725, align 4 + store ptr %738, ptr %717, align 4 + %739 = load ptr, ptr %681, align 4 br label %bb9.i do_merge.exit: - %740 = getelementptr %struct.edge_rec, %struct.edge_rec* %ldo_addr.0.ph.i, i32 0, i32 0 - %741 = load %struct.VERTEX*, %struct.VERTEX** %740, align 4 - %742 = icmp eq %struct.VERTEX* %741, %tree_addr.0.i + %740 = getelementptr %struct.edge_rec, ptr %ldo_addr.0.ph.i, i32 0, i32 0 + %741 = load ptr, ptr %740, align 4 + %742 = icmp eq ptr %741, %tree_addr.0.i br i1 %742, label %bb5.loopexit, label %bb2 bb2: - %ldo.07 = phi %struct.edge_rec* [ %747, %bb2 ], [ %ldo_addr.0.ph.i, %do_merge.exit ] - %743 = ptrtoint %struct.edge_rec* %ldo.07 to i32 + %ldo.07 = phi ptr [ %747, %bb2 ], [ %ldo_addr.0.ph.i, %do_merge.exit ] + %743 = ptrtoint ptr %ldo.07 to i32 %744 = xor i32 %743, 32 - %745 = inttoptr i32 %744 to %struct.edge_rec* - %746 = getelementptr %struct.edge_rec, %struct.edge_rec* %745, i32 0, i32 1 - %747 = load %struct.edge_rec*, %struct.edge_rec** %746, align 4 - %748 = getelementptr %struct.edge_rec, %struct.edge_rec* %747, i32 0, i32 0 - %749 = load %struct.VERTEX*, %struct.VERTEX** %748, align 4 - %750 = icmp eq %struct.VERTEX* %749, %tree_addr.0.i + %745 = inttoptr i32 %744 to ptr + %746 = getelementptr %struct.edge_rec, ptr %745, i32 0, i32 1 + %747 = load ptr, ptr %746, align 4 + %748 = getelementptr %struct.edge_rec, ptr %747, i32 0, i32 0 + %749 = load ptr, ptr %748, align 4 + %750 = icmp eq ptr %749, %tree_addr.0.i br i1 %750, label %bb5.loopexit, label %bb2 bb4: - %rdo.05 = phi %struct.edge_rec* [ %755, %bb4 ], [ %rdo_addr.0.i, %bb5.loopexit ] - %751 = getelementptr %struct.edge_rec, %struct.edge_rec* %rdo.05, i32 0, i32 1 - %752 = load %struct.edge_rec*, %struct.edge_rec** %751, align 4 - %753 = ptrtoint %struct.edge_rec* %752 to i32 + %rdo.05 = phi ptr [ %755, %bb4 ], [ %rdo_addr.0.i, %bb5.loopexit ] + %751 = getelementptr %struct.edge_rec, ptr %rdo.05, i32 0, i32 1 + %752 = load ptr, ptr %751, align 4 + %753 = ptrtoint ptr %752 to i32 %754 = xor i32 %753, 32 - %755 = inttoptr i32 %754 to %struct.edge_rec* - %756 = getelementptr %struct.edge_rec, %struct.edge_rec* %755, i32 0, i32 0 - %757 = load %struct.VERTEX*, %struct.VERTEX** %756, align 4 - %758 = icmp eq %struct.VERTEX* %757, %extra + %755 = inttoptr i32 %754 to ptr + %756 = getelementptr %struct.edge_rec, ptr %755, i32 0, i32 0 + %757 = load ptr, ptr %756, align 4 + %758 = icmp eq ptr %757, %extra br i1 %758, label %bb6, label %bb4 bb5.loopexit: - %ldo.0.lcssa = phi %struct.edge_rec* [ %ldo_addr.0.ph.i, %do_merge.exit ], [ %747, %bb2 ] - %759 = getelementptr %struct.edge_rec, %struct.edge_rec* %rdo_addr.0.i, i32 0, i32 0 - %760 = load %struct.VERTEX*, %struct.VERTEX** %759, align 4 - %761 = icmp eq %struct.VERTEX* %760, %extra + %ldo.0.lcssa = phi ptr [ %ldo_addr.0.ph.i, %do_merge.exit ], [ %747, %bb2 ] + %759 = getelementptr %struct.edge_rec, ptr %rdo_addr.0.i, i32 0, i32 0 + %760 = load ptr, ptr %759, align 4 + %761 = icmp eq ptr %760, %extra br i1 %761, label %bb6, label %bb4 bb6: - %rdo.0.lcssa = phi %struct.edge_rec* [ %rdo_addr.0.i, %bb5.loopexit ], [ %755, %bb4 ] - %tmp16 = ptrtoint %struct.edge_rec* %ldo.0.lcssa to i32 - %tmp4 = ptrtoint %struct.edge_rec* %rdo.0.lcssa to i32 + %rdo.0.lcssa = phi ptr [ %rdo_addr.0.i, %bb5.loopexit ], [ %755, %bb4 ] + %tmp16 = ptrtoint ptr %ldo.0.lcssa to i32 + %tmp4 = ptrtoint ptr %rdo.0.lcssa to i32 br label %bb15 bb7: - %762 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree, i32 0, i32 1 - %763 = load %struct.VERTEX*, %struct.VERTEX** %762, align 4 - %764 = icmp eq %struct.VERTEX* %763, null - %765 = call %struct.edge_rec* @alloc_edge() nounwind - %766 = getelementptr %struct.edge_rec, %struct.edge_rec* %765, i32 0, i32 1 - store %struct.edge_rec* %765, %struct.edge_rec** %766, align 4 - %767 = getelementptr %struct.edge_rec, %struct.edge_rec* %765, i32 0, i32 0 + %762 = getelementptr %struct.VERTEX, ptr %tree, i32 0, i32 1 + %763 = load ptr, ptr %762, align 4 + %764 = icmp eq ptr %763, null + %765 = call ptr @alloc_edge() nounwind + %766 = getelementptr %struct.edge_rec, ptr %765, i32 0, i32 1 + store ptr %765, ptr %766, align 4 + %767 = getelementptr %struct.edge_rec, ptr %765, i32 0, i32 0 br i1 %764, label %bb10, label %bb11 bb8: - %768 = call i32 @puts(i8* getelementptr ([21 x i8], [21 x i8]* @_2E_str7, i32 0, i32 0)) nounwind + %768 = call i32 @puts(ptr @_2E_str7) nounwind call void @exit(i32 -1) noreturn nounwind unreachable bb10: - store %struct.VERTEX* %tree, %struct.VERTEX** %767, align 4 - %769 = ptrtoint %struct.edge_rec* %765 to i32 + store ptr %tree, ptr %767, align 4 + %769 = ptrtoint ptr %765 to i32 %770 = add i32 %769, 16 - %771 = inttoptr i32 %770 to %struct.edge_rec* + %771 = inttoptr i32 %770 to ptr %772 = add i32 %769, 48 - %773 = inttoptr i32 %772 to %struct.edge_rec* - %774 = getelementptr %struct.edge_rec, %struct.edge_rec* %771, i32 0, i32 1 - store %struct.edge_rec* %773, %struct.edge_rec** %774, align 4 + %773 = inttoptr i32 %772 to ptr + %774 = getelementptr %struct.edge_rec, ptr %771, i32 0, i32 1 + store ptr %773, ptr %774, align 4 %775 = add i32 %769, 32 - %776 = inttoptr i32 %775 to %struct.edge_rec* - %777 = getelementptr %struct.edge_rec, %struct.edge_rec* %776, i32 0, i32 1 - store %struct.edge_rec* %776, %struct.edge_rec** %777, align 4 - %778 = getelementptr %struct.edge_rec, %struct.edge_rec* %776, i32 0, i32 0 - store %struct.VERTEX* %extra, %struct.VERTEX** %778, align 4 - %779 = getelementptr %struct.edge_rec, %struct.edge_rec* %773, i32 0, i32 1 - store %struct.edge_rec* %771, %struct.edge_rec** %779, align 4 + %776 = inttoptr i32 %775 to ptr + %777 = getelementptr %struct.edge_rec, ptr %776, i32 0, i32 1 + store ptr %776, ptr %777, align 4 + %778 = getelementptr %struct.edge_rec, ptr %776, i32 0, i32 0 + store ptr %extra, ptr %778, align 4 + %779 = getelementptr %struct.edge_rec, ptr %773, i32 0, i32 1 + store ptr %771, ptr %779, align 4 %780 = xor i32 %769, 32 br label %bb15 bb11: - store %struct.VERTEX* %763, %struct.VERTEX** %767, align 4 - %781 = ptrtoint %struct.edge_rec* %765 to i32 + store ptr %763, ptr %767, align 4 + %781 = ptrtoint ptr %765 to i32 %782 = add i32 %781, 16 - %783 = inttoptr i32 %782 to %struct.edge_rec* + %783 = inttoptr i32 %782 to ptr %784 = add i32 %781, 48 - %785 = inttoptr i32 %784 to %struct.edge_rec* - %786 = getelementptr %struct.edge_rec, %struct.edge_rec* %783, i32 0, i32 1 - store %struct.edge_rec* %785, %struct.edge_rec** %786, align 4 + %785 = inttoptr i32 %784 to ptr + %786 = getelementptr %struct.edge_rec, ptr %783, i32 0, i32 1 + store ptr %785, ptr %786, align 4 %787 = add i32 %781, 32 - %788 = inttoptr i32 %787 to %struct.edge_rec* - %789 = getelementptr %struct.edge_rec, %struct.edge_rec* %788, i32 0, i32 1 - store %struct.edge_rec* %788, %struct.edge_rec** %789, align 4 - %790 = getelementptr %struct.edge_rec, %struct.edge_rec* %788, i32 0, i32 0 - store %struct.VERTEX* %tree, %struct.VERTEX** %790, align 4 - %791 = getelementptr %struct.edge_rec, %struct.edge_rec* %785, i32 0, i32 1 - store %struct.edge_rec* %783, %struct.edge_rec** %791, align 4 - %792 = call %struct.edge_rec* @alloc_edge() nounwind - %793 = getelementptr %struct.edge_rec, %struct.edge_rec* %792, i32 0, i32 1 - store %struct.edge_rec* %792, %struct.edge_rec** %793, align 4 - %794 = getelementptr %struct.edge_rec, %struct.edge_rec* %792, i32 0, i32 0 - store %struct.VERTEX* %tree, %struct.VERTEX** %794, align 4 - %795 = ptrtoint %struct.edge_rec* %792 to i32 + %788 = inttoptr i32 %787 to ptr + %789 = getelementptr %struct.edge_rec, ptr %788, i32 0, i32 1 + store ptr %788, ptr %789, align 4 + %790 = getelementptr %struct.edge_rec, ptr %788, i32 0, i32 0 + store ptr %tree, ptr %790, align 4 + %791 = getelementptr %struct.edge_rec, ptr %785, i32 0, i32 1 + store ptr %783, ptr %791, align 4 + %792 = call ptr @alloc_edge() nounwind + %793 = getelementptr %struct.edge_rec, ptr %792, i32 0, i32 1 + store ptr %792, ptr %793, align 4 + %794 = getelementptr %struct.edge_rec, ptr %792, i32 0, i32 0 + store ptr %tree, ptr %794, align 4 + %795 = ptrtoint ptr %792 to i32 %796 = add i32 %795, 16 - %797 = inttoptr i32 %796 to %struct.edge_rec* + %797 = inttoptr i32 %796 to ptr %798 = add i32 %795, 48 - %799 = inttoptr i32 %798 to %struct.edge_rec* - %800 = getelementptr %struct.edge_rec, %struct.edge_rec* %797, i32 0, i32 1 - store %struct.edge_rec* %799, %struct.edge_rec** %800, align 4 + %799 = inttoptr i32 %798 to ptr + %800 = getelementptr %struct.edge_rec, ptr %797, i32 0, i32 1 + store ptr %799, ptr %800, align 4 %801 = add i32 %795, 32 - %802 = inttoptr i32 %801 to %struct.edge_rec* - %803 = getelementptr %struct.edge_rec, %struct.edge_rec* %802, i32 0, i32 1 - store %struct.edge_rec* %802, %struct.edge_rec** %803, align 4 - %804 = getelementptr %struct.edge_rec, %struct.edge_rec* %802, i32 0, i32 0 - store %struct.VERTEX* %extra, %struct.VERTEX** %804, align 4 - %805 = getelementptr %struct.edge_rec, %struct.edge_rec* %799, i32 0, i32 1 - store %struct.edge_rec* %797, %struct.edge_rec** %805, align 4 + %802 = inttoptr i32 %801 to ptr + %803 = getelementptr %struct.edge_rec, ptr %802, i32 0, i32 1 + store ptr %802, ptr %803, align 4 + %804 = getelementptr %struct.edge_rec, ptr %802, i32 0, i32 0 + store ptr %extra, ptr %804, align 4 + %805 = getelementptr %struct.edge_rec, ptr %799, i32 0, i32 1 + store ptr %797, ptr %805, align 4 %806 = xor i32 %781, 32 - %807 = inttoptr i32 %806 to %struct.edge_rec* - %808 = getelementptr %struct.edge_rec, %struct.edge_rec* %807, i32 0, i32 1 - %809 = load %struct.edge_rec*, %struct.edge_rec** %808, align 4 - %810 = ptrtoint %struct.edge_rec* %809 to i32 + %807 = inttoptr i32 %806 to ptr + %808 = getelementptr %struct.edge_rec, ptr %807, i32 0, i32 1 + %809 = load ptr, ptr %808, align 4 + %810 = ptrtoint ptr %809 to i32 %811 = add i32 %810, 16 %812 = and i32 %811, 63 %813 = and i32 %810, -64 %814 = or i32 %812, %813 - %815 = inttoptr i32 %814 to %struct.edge_rec* - %816 = load %struct.edge_rec*, %struct.edge_rec** %793, align 4 - %817 = ptrtoint %struct.edge_rec* %816 to i32 + %815 = inttoptr i32 %814 to ptr + %816 = load ptr, ptr %793, align 4 + %817 = ptrtoint ptr %816 to i32 %818 = add i32 %817, 16 %819 = and i32 %818, 63 %820 = and i32 %817, -64 %821 = or i32 %819, %820 - %822 = inttoptr i32 %821 to %struct.edge_rec* - %823 = getelementptr %struct.edge_rec, %struct.edge_rec* %822, i32 0, i32 1 - %824 = load %struct.edge_rec*, %struct.edge_rec** %823, align 4 - %825 = getelementptr %struct.edge_rec, %struct.edge_rec* %815, i32 0, i32 1 - %826 = load %struct.edge_rec*, %struct.edge_rec** %825, align 4 - store %struct.edge_rec* %824, %struct.edge_rec** %825, align 4 - store %struct.edge_rec* %826, %struct.edge_rec** %823, align 4 - %827 = load %struct.edge_rec*, %struct.edge_rec** %808, align 4 - %828 = load %struct.edge_rec*, %struct.edge_rec** %793, align 4 - store %struct.edge_rec* %827, %struct.edge_rec** %793, align 4 - store %struct.edge_rec* %828, %struct.edge_rec** %808, align 4 + %822 = inttoptr i32 %821 to ptr + %823 = getelementptr %struct.edge_rec, ptr %822, i32 0, i32 1 + %824 = load ptr, ptr %823, align 4 + %825 = getelementptr %struct.edge_rec, ptr %815, i32 0, i32 1 + %826 = load ptr, ptr %825, align 4 + store ptr %824, ptr %825, align 4 + store ptr %826, ptr %823, align 4 + %827 = load ptr, ptr %808, align 4 + %828 = load ptr, ptr %793, align 4 + store ptr %827, ptr %793, align 4 + store ptr %828, ptr %808, align 4 %829 = xor i32 %795, 32 - %830 = inttoptr i32 %829 to %struct.edge_rec* - %831 = getelementptr %struct.edge_rec, %struct.edge_rec* %830, i32 0, i32 0 - %832 = load %struct.VERTEX*, %struct.VERTEX** %831, align 4 + %830 = inttoptr i32 %829 to ptr + %831 = getelementptr %struct.edge_rec, ptr %830, i32 0, i32 0 + %832 = load ptr, ptr %831, align 4 %833 = and i32 %798, 63 %834 = and i32 %795, -64 %835 = or i32 %833, %834 - %836 = inttoptr i32 %835 to %struct.edge_rec* - %837 = getelementptr %struct.edge_rec, %struct.edge_rec* %836, i32 0, i32 1 - %838 = load %struct.edge_rec*, %struct.edge_rec** %837, align 4 - %839 = ptrtoint %struct.edge_rec* %838 to i32 + %836 = inttoptr i32 %835 to ptr + %837 = getelementptr %struct.edge_rec, ptr %836, i32 0, i32 1 + %838 = load ptr, ptr %837, align 4 + %839 = ptrtoint ptr %838 to i32 %840 = add i32 %839, 16 %841 = and i32 %840, 63 %842 = and i32 %839, -64 %843 = or i32 %841, %842 - %844 = inttoptr i32 %843 to %struct.edge_rec* - %845 = load %struct.VERTEX*, %struct.VERTEX** %767, align 4 - %846 = call %struct.edge_rec* @alloc_edge() nounwind - %847 = getelementptr %struct.edge_rec, %struct.edge_rec* %846, i32 0, i32 1 - store %struct.edge_rec* %846, %struct.edge_rec** %847, align 4 - %848 = getelementptr %struct.edge_rec, %struct.edge_rec* %846, i32 0, i32 0 - store %struct.VERTEX* %832, %struct.VERTEX** %848, align 4 - %849 = ptrtoint %struct.edge_rec* %846 to i32 + %844 = inttoptr i32 %843 to ptr + %845 = load ptr, ptr %767, align 4 + %846 = call ptr @alloc_edge() nounwind + %847 = getelementptr %struct.edge_rec, ptr %846, i32 0, i32 1 + store ptr %846, ptr %847, align 4 + %848 = getelementptr %struct.edge_rec, ptr %846, i32 0, i32 0 + store ptr %832, ptr %848, align 4 + %849 = ptrtoint ptr %846 to i32 %850 = add i32 %849, 16 - %851 = inttoptr i32 %850 to %struct.edge_rec* + %851 = inttoptr i32 %850 to ptr %852 = add i32 %849, 48 - %853 = inttoptr i32 %852 to %struct.edge_rec* - %854 = getelementptr %struct.edge_rec, %struct.edge_rec* %851, i32 0, i32 1 - store %struct.edge_rec* %853, %struct.edge_rec** %854, align 4 + %853 = inttoptr i32 %852 to ptr + %854 = getelementptr %struct.edge_rec, ptr %851, i32 0, i32 1 + store ptr %853, ptr %854, align 4 %855 = add i32 %849, 32 - %856 = inttoptr i32 %855 to %struct.edge_rec* - %857 = getelementptr %struct.edge_rec, %struct.edge_rec* %856, i32 0, i32 1 - store %struct.edge_rec* %856, %struct.edge_rec** %857, align 4 - %858 = getelementptr %struct.edge_rec, %struct.edge_rec* %856, i32 0, i32 0 - store %struct.VERTEX* %845, %struct.VERTEX** %858, align 4 - %859 = getelementptr %struct.edge_rec, %struct.edge_rec* %853, i32 0, i32 1 - store %struct.edge_rec* %851, %struct.edge_rec** %859, align 4 - %860 = load %struct.edge_rec*, %struct.edge_rec** %847, align 4 - %861 = ptrtoint %struct.edge_rec* %860 to i32 + %856 = inttoptr i32 %855 to ptr + %857 = getelementptr %struct.edge_rec, ptr %856, i32 0, i32 1 + store ptr %856, ptr %857, align 4 + %858 = getelementptr %struct.edge_rec, ptr %856, i32 0, i32 0 + store ptr %845, ptr %858, align 4 + %859 = getelementptr %struct.edge_rec, ptr %853, i32 0, i32 1 + store ptr %851, ptr %859, align 4 + %860 = load ptr, ptr %847, align 4 + %861 = ptrtoint ptr %860 to i32 %862 = add i32 %861, 16 %863 = and i32 %862, 63 %864 = and i32 %861, -64 %865 = or i32 %863, %864 - %866 = inttoptr i32 %865 to %struct.edge_rec* - %867 = getelementptr %struct.edge_rec, %struct.edge_rec* %844, i32 0, i32 1 - %868 = load %struct.edge_rec*, %struct.edge_rec** %867, align 4 - %869 = ptrtoint %struct.edge_rec* %868 to i32 + %866 = inttoptr i32 %865 to ptr + %867 = getelementptr %struct.edge_rec, ptr %844, i32 0, i32 1 + %868 = load ptr, ptr %867, align 4 + %869 = ptrtoint ptr %868 to i32 %870 = add i32 %869, 16 %871 = and i32 %870, 63 %872 = and i32 %869, -64 %873 = or i32 %871, %872 - %874 = inttoptr i32 %873 to %struct.edge_rec* - %875 = getelementptr %struct.edge_rec, %struct.edge_rec* %874, i32 0, i32 1 - %876 = load %struct.edge_rec*, %struct.edge_rec** %875, align 4 - %877 = getelementptr %struct.edge_rec, %struct.edge_rec* %866, i32 0, i32 1 - %878 = load %struct.edge_rec*, %struct.edge_rec** %877, align 4 - store %struct.edge_rec* %876, %struct.edge_rec** %877, align 4 - store %struct.edge_rec* %878, %struct.edge_rec** %875, align 4 - %879 = load %struct.edge_rec*, %struct.edge_rec** %847, align 4 - %880 = load %struct.edge_rec*, %struct.edge_rec** %867, align 4 - store %struct.edge_rec* %879, %struct.edge_rec** %867, align 4 - store %struct.edge_rec* %880, %struct.edge_rec** %847, align 4 + %874 = inttoptr i32 %873 to ptr + %875 = getelementptr %struct.edge_rec, ptr %874, i32 0, i32 1 + %876 = load ptr, ptr %875, align 4 + %877 = getelementptr %struct.edge_rec, ptr %866, i32 0, i32 1 + %878 = load ptr, ptr %877, align 4 + store ptr %876, ptr %877, align 4 + store ptr %878, ptr %875, align 4 + %879 = load ptr, ptr %847, align 4 + %880 = load ptr, ptr %867, align 4 + store ptr %879, ptr %867, align 4 + store ptr %880, ptr %847, align 4 %881 = xor i32 %849, 32 - %882 = inttoptr i32 %881 to %struct.edge_rec* - %883 = getelementptr %struct.edge_rec, %struct.edge_rec* %882, i32 0, i32 1 - %884 = load %struct.edge_rec*, %struct.edge_rec** %883, align 4 - %885 = ptrtoint %struct.edge_rec* %884 to i32 + %882 = inttoptr i32 %881 to ptr + %883 = getelementptr %struct.edge_rec, ptr %882, i32 0, i32 1 + %884 = load ptr, ptr %883, align 4 + %885 = ptrtoint ptr %884 to i32 %886 = add i32 %885, 16 %887 = and i32 %886, 63 %888 = and i32 %885, -64 %889 = or i32 %887, %888 - %890 = inttoptr i32 %889 to %struct.edge_rec* - %891 = load %struct.edge_rec*, %struct.edge_rec** %766, align 4 - %892 = ptrtoint %struct.edge_rec* %891 to i32 + %890 = inttoptr i32 %889 to ptr + %891 = load ptr, ptr %766, align 4 + %892 = ptrtoint ptr %891 to i32 %893 = add i32 %892, 16 %894 = and i32 %893, 63 %895 = and i32 %892, -64 %896 = or i32 %894, %895 - %897 = inttoptr i32 %896 to %struct.edge_rec* - %898 = getelementptr %struct.edge_rec, %struct.edge_rec* %897, i32 0, i32 1 - %899 = load %struct.edge_rec*, %struct.edge_rec** %898, align 4 - %900 = getelementptr %struct.edge_rec, %struct.edge_rec* %890, i32 0, i32 1 - %901 = load %struct.edge_rec*, %struct.edge_rec** %900, align 4 - store %struct.edge_rec* %899, %struct.edge_rec** %900, align 4 - store %struct.edge_rec* %901, %struct.edge_rec** %898, align 4 - %902 = load %struct.edge_rec*, %struct.edge_rec** %883, align 4 - %903 = load %struct.edge_rec*, %struct.edge_rec** %766, align 4 - store %struct.edge_rec* %902, %struct.edge_rec** %766, align 4 - store %struct.edge_rec* %903, %struct.edge_rec** %883, align 4 - %904 = getelementptr %struct.VERTEX, %struct.VERTEX* %763, i32 0, i32 0, i32 0 - %905 = load double, double* %904, align 4 - %906 = getelementptr %struct.VERTEX, %struct.VERTEX* %763, i32 0, i32 0, i32 1 - %907 = load double, double* %906, align 4 - %908 = getelementptr %struct.VERTEX, %struct.VERTEX* %extra, i32 0, i32 0, i32 0 - %909 = load double, double* %908, align 4 - %910 = getelementptr %struct.VERTEX, %struct.VERTEX* %extra, i32 0, i32 0, i32 1 - %911 = load double, double* %910, align 4 - %912 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree, i32 0, i32 0, i32 0 - %913 = load double, double* %912, align 4 - %914 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree, i32 0, i32 0, i32 1 - %915 = load double, double* %914, align 4 + %897 = inttoptr i32 %896 to ptr + %898 = getelementptr %struct.edge_rec, ptr %897, i32 0, i32 1 + %899 = load ptr, ptr %898, align 4 + %900 = getelementptr %struct.edge_rec, ptr %890, i32 0, i32 1 + %901 = load ptr, ptr %900, align 4 + store ptr %899, ptr %900, align 4 + store ptr %901, ptr %898, align 4 + %902 = load ptr, ptr %883, align 4 + %903 = load ptr, ptr %766, align 4 + store ptr %902, ptr %766, align 4 + store ptr %903, ptr %883, align 4 + %904 = getelementptr %struct.VERTEX, ptr %763, i32 0, i32 0, i32 0 + %905 = load double, ptr %904, align 4 + %906 = getelementptr %struct.VERTEX, ptr %763, i32 0, i32 0, i32 1 + %907 = load double, ptr %906, align 4 + %908 = getelementptr %struct.VERTEX, ptr %extra, i32 0, i32 0, i32 0 + %909 = load double, ptr %908, align 4 + %910 = getelementptr %struct.VERTEX, ptr %extra, i32 0, i32 0, i32 1 + %911 = load double, ptr %910, align 4 + %912 = getelementptr %struct.VERTEX, ptr %tree, i32 0, i32 0, i32 0 + %913 = load double, ptr %912, align 4 + %914 = getelementptr %struct.VERTEX, ptr %tree, i32 0, i32 0, i32 1 + %915 = load double, ptr %914, align 4 %916 = fsub double %905, %913 %917 = fsub double %911, %915 %918 = fmul double %916, %917 @@ -1226,93 +1226,93 @@ bb14: %932 = and i32 %850, 63 %933 = and i32 %849, -64 %934 = or i32 %932, %933 - %935 = inttoptr i32 %934 to %struct.edge_rec* - %936 = getelementptr %struct.edge_rec, %struct.edge_rec* %935, i32 0, i32 1 - %937 = load %struct.edge_rec*, %struct.edge_rec** %936, align 4 - %938 = ptrtoint %struct.edge_rec* %937 to i32 + %935 = inttoptr i32 %934 to ptr + %936 = getelementptr %struct.edge_rec, ptr %935, i32 0, i32 1 + %937 = load ptr, ptr %936, align 4 + %938 = ptrtoint ptr %937 to i32 %939 = add i32 %938, 16 %940 = and i32 %939, 63 %941 = and i32 %938, -64 %942 = or i32 %940, %941 - %943 = inttoptr i32 %942 to %struct.edge_rec* - %944 = load %struct.edge_rec*, %struct.edge_rec** %847, align 4 - %945 = ptrtoint %struct.edge_rec* %944 to i32 + %943 = inttoptr i32 %942 to ptr + %944 = load ptr, ptr %847, align 4 + %945 = ptrtoint ptr %944 to i32 %946 = add i32 %945, 16 %947 = and i32 %946, 63 %948 = and i32 %945, -64 %949 = or i32 %947, %948 - %950 = inttoptr i32 %949 to %struct.edge_rec* - %951 = getelementptr %struct.edge_rec, %struct.edge_rec* %943, i32 0, i32 1 - %952 = load %struct.edge_rec*, %struct.edge_rec** %951, align 4 - %953 = ptrtoint %struct.edge_rec* %952 to i32 + %950 = inttoptr i32 %949 to ptr + %951 = getelementptr %struct.edge_rec, ptr %943, i32 0, i32 1 + %952 = load ptr, ptr %951, align 4 + %953 = ptrtoint ptr %952 to i32 %954 = add i32 %953, 16 %955 = and i32 %954, 63 %956 = and i32 %953, -64 %957 = or i32 %955, %956 - %958 = inttoptr i32 %957 to %struct.edge_rec* - %959 = getelementptr %struct.edge_rec, %struct.edge_rec* %958, i32 0, i32 1 - %960 = load %struct.edge_rec*, %struct.edge_rec** %959, align 4 - %961 = getelementptr %struct.edge_rec, %struct.edge_rec* %950, i32 0, i32 1 - %962 = load %struct.edge_rec*, %struct.edge_rec** %961, align 4 - store %struct.edge_rec* %960, %struct.edge_rec** %961, align 4 - store %struct.edge_rec* %962, %struct.edge_rec** %959, align 4 - %963 = load %struct.edge_rec*, %struct.edge_rec** %847, align 4 - %964 = load %struct.edge_rec*, %struct.edge_rec** %951, align 4 - store %struct.edge_rec* %963, %struct.edge_rec** %951, align 4 - store %struct.edge_rec* %964, %struct.edge_rec** %847, align 4 + %958 = inttoptr i32 %957 to ptr + %959 = getelementptr %struct.edge_rec, ptr %958, i32 0, i32 1 + %960 = load ptr, ptr %959, align 4 + %961 = getelementptr %struct.edge_rec, ptr %950, i32 0, i32 1 + %962 = load ptr, ptr %961, align 4 + store ptr %960, ptr %961, align 4 + store ptr %962, ptr %959, align 4 + %963 = load ptr, ptr %847, align 4 + %964 = load ptr, ptr %951, align 4 + store ptr %963, ptr %951, align 4 + store ptr %964, ptr %847, align 4 %965 = add i32 %881, 16 %966 = and i32 %965, 63 %967 = or i32 %966, %933 - %968 = inttoptr i32 %967 to %struct.edge_rec* - %969 = getelementptr %struct.edge_rec, %struct.edge_rec* %968, i32 0, i32 1 - %970 = load %struct.edge_rec*, %struct.edge_rec** %969, align 4 - %971 = ptrtoint %struct.edge_rec* %970 to i32 + %968 = inttoptr i32 %967 to ptr + %969 = getelementptr %struct.edge_rec, ptr %968, i32 0, i32 1 + %970 = load ptr, ptr %969, align 4 + %971 = ptrtoint ptr %970 to i32 %972 = add i32 %971, 16 %973 = and i32 %972, 63 %974 = and i32 %971, -64 %975 = or i32 %973, %974 - %976 = inttoptr i32 %975 to %struct.edge_rec* - %977 = load %struct.edge_rec*, %struct.edge_rec** %883, align 4 - %978 = ptrtoint %struct.edge_rec* %977 to i32 + %976 = inttoptr i32 %975 to ptr + %977 = load ptr, ptr %883, align 4 + %978 = ptrtoint ptr %977 to i32 %979 = add i32 %978, 16 %980 = and i32 %979, 63 %981 = and i32 %978, -64 %982 = or i32 %980, %981 - %983 = inttoptr i32 %982 to %struct.edge_rec* - %984 = getelementptr %struct.edge_rec, %struct.edge_rec* %976, i32 0, i32 1 - %985 = load %struct.edge_rec*, %struct.edge_rec** %984, align 4 - %986 = ptrtoint %struct.edge_rec* %985 to i32 + %983 = inttoptr i32 %982 to ptr + %984 = getelementptr %struct.edge_rec, ptr %976, i32 0, i32 1 + %985 = load ptr, ptr %984, align 4 + %986 = ptrtoint ptr %985 to i32 %987 = add i32 %986, 16 %988 = and i32 %987, 63 %989 = and i32 %986, -64 %990 = or i32 %988, %989 - %991 = inttoptr i32 %990 to %struct.edge_rec* - %992 = getelementptr %struct.edge_rec, %struct.edge_rec* %991, i32 0, i32 1 - %993 = load %struct.edge_rec*, %struct.edge_rec** %992, align 4 - %994 = getelementptr %struct.edge_rec, %struct.edge_rec* %983, i32 0, i32 1 - %995 = load %struct.edge_rec*, %struct.edge_rec** %994, align 4 - store %struct.edge_rec* %993, %struct.edge_rec** %994, align 4 - store %struct.edge_rec* %995, %struct.edge_rec** %992, align 4 - %996 = load %struct.edge_rec*, %struct.edge_rec** %883, align 4 - %997 = load %struct.edge_rec*, %struct.edge_rec** %984, align 4 - store %struct.edge_rec* %996, %struct.edge_rec** %984, align 4 - store %struct.edge_rec* %997, %struct.edge_rec** %883, align 4 - %998 = inttoptr i32 %933 to %struct.edge_rec* - %999 = load %struct.edge_rec*, %struct.edge_rec** @avail_edge, align 4 - %1000 = getelementptr %struct.edge_rec, %struct.edge_rec* %998, i32 0, i32 1 - store %struct.edge_rec* %999, %struct.edge_rec** %1000, align 4 - store %struct.edge_rec* %998, %struct.edge_rec** @avail_edge, align 4 + %991 = inttoptr i32 %990 to ptr + %992 = getelementptr %struct.edge_rec, ptr %991, i32 0, i32 1 + %993 = load ptr, ptr %992, align 4 + %994 = getelementptr %struct.edge_rec, ptr %983, i32 0, i32 1 + %995 = load ptr, ptr %994, align 4 + store ptr %993, ptr %994, align 4 + store ptr %995, ptr %992, align 4 + %996 = load ptr, ptr %883, align 4 + %997 = load ptr, ptr %984, align 4 + store ptr %996, ptr %984, align 4 + store ptr %997, ptr %883, align 4 + %998 = inttoptr i32 %933 to ptr + %999 = load ptr, ptr @avail_edge, align 4 + %1000 = getelementptr %struct.edge_rec, ptr %998, i32 0, i32 1 + store ptr %999, ptr %1000, align 4 + store ptr %998, ptr @avail_edge, align 4 br label %bb15 bb15: %retval.1.0 = phi i32 [ %780, %bb10 ], [ %829, %bb13 ], [ %829, %bb14 ], [ %tmp4, %bb6 ], [ %849, %bb11 ] %retval.0.0 = phi i32 [ %769, %bb10 ], [ %781, %bb13 ], [ %781, %bb14 ], [ %tmp16, %bb6 ], [ %881, %bb11 ] - %agg.result162 = bitcast %struct.EDGE_PAIR* %agg.result to i64* + %agg.result162 = bitcast ptr %agg.result to ptr %1001 = zext i32 %retval.0.0 to i64 %1002 = zext i32 %retval.1.0 to i64 %1003 = shl i64 %1002, 32 %1004 = or i64 %1003, %1001 - store i64 %1004, i64* %agg.result162, align 4 + store i64 %1004, ptr %agg.result162, align 4 ret void } @@ -1331,8 +1331,8 @@ bb15: ; CHECK: vcmp ; CHECK: vcmp -declare i32 @puts(i8* nocapture) nounwind +declare i32 @puts(ptr nocapture) nounwind declare void @exit(i32) noreturn nounwind -declare %struct.edge_rec* @alloc_edge() nounwind +declare ptr @alloc_edge() nounwind diff --git a/llvm/test/CodeGen/ARM/Windows/wineh-basic.ll b/llvm/test/CodeGen/ARM/Windows/wineh-basic.ll index c4e809f..d0bdd66 100644 --- a/llvm/test/CodeGen/ARM/Windows/wineh-basic.ll +++ b/llvm/test/CodeGen/ARM/Windows/wineh-basic.ll @@ -16,9 +16,9 @@ target triple = "thumbv7--windows-msvc19.0.24210" %class.field = type { i8 } ; Function Attrs: nounwind -define arm_aapcs_vfpcc void @"\01??1field@@AAA@XZ"(%class.field* nocapture readnone %this) unnamed_addr #0 align 2 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { +define arm_aapcs_vfpcc void @"\01??1field@@AAA@XZ"(ptr nocapture readnone %this) unnamed_addr #0 align 2 personality ptr @__CxxFrameHandler3 { entry: - invoke arm_aapcs_vfpcc void @free(i8* null) + invoke arm_aapcs_vfpcc void @free(ptr null) to label %invoke.cont unwind label %terminate invoke.cont: ; preds = %entry @@ -30,7 +30,7 @@ terminate: ; preds = %entry unreachable } -declare arm_aapcs_vfpcc void @free(i8*) local_unnamed_addr #1 +declare arm_aapcs_vfpcc void @free(ptr) local_unnamed_addr #1 declare arm_aapcs_vfpcc i32 @__CxxFrameHandler3(...) diff --git a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll index 9c2da34..17d1ca6 100644 --- a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll +++ b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll @@ -24,7 +24,7 @@ declare arm_aapcs_vfpcc <16 x i8> @get_inputf32(float) local_unnamed_addr -define arm_aapcs_vfpcc void @aese_zero(<16 x i8>* %0) nounwind { +define arm_aapcs_vfpcc void @aese_zero(ptr %0) nounwind { ; CHECK-FIX-LABEL: aese_zero: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0] @@ -33,14 +33,14 @@ define arm_aapcs_vfpcc void @aese_zero(<16 x i8>* %0) nounwind { ; CHECK-FIX-NEXT: aesmc.8 q8, q9 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0] ; CHECK-FIX-NEXT: bx lr - %2 = load <16 x i8>, <16 x i8>* %0, align 8 + %2 = load <16 x i8>, ptr %0, align 8 %3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> zeroinitializer, <16 x i8> %2) %4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3) - store <16 x i8> %4, <16 x i8>* %0, align 8 + store <16 x i8> %4, ptr %0, align 8 ret void } -define arm_aapcs_vfpcc void @aese_via_call1(<16 x i8>* %0) nounwind { +define arm_aapcs_vfpcc void @aese_via_call1(ptr %0) nounwind { ; CHECK-FIX-LABEL: aese_via_call1: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: .save {r4, lr} @@ -54,14 +54,14 @@ define arm_aapcs_vfpcc void @aese_via_call1(<16 x i8>* %0) nounwind { ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4] ; CHECK-FIX-NEXT: pop {r4, pc} %2 = call arm_aapcs_vfpcc <16 x i8> @get_input() - %3 = load <16 x i8>, <16 x i8>* %0, align 8 + %3 = load <16 x i8>, ptr %0, align 8 %4 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %2, <16 x i8> %3) %5 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %4) - store <16 x i8> %5, <16 x i8>* %0, align 8 + store <16 x i8> %5, ptr %0, align 8 ret void } -define arm_aapcs_vfpcc void @aese_via_call2(half %0, <16 x i8>* %1) nounwind { +define arm_aapcs_vfpcc void @aese_via_call2(half %0, ptr %1) nounwind { ; CHECK-FIX-LABEL: aese_via_call2: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: .save {r4, lr} @@ -75,14 +75,14 @@ define arm_aapcs_vfpcc void @aese_via_call2(half %0, <16 x i8>* %1) nounwind { ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4] ; CHECK-FIX-NEXT: pop {r4, pc} %3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf16(half %0) - %4 = load <16 x i8>, <16 x i8>* %1, align 8 + %4 = load <16 x i8>, ptr %1, align 8 %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4) %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5) - store <16 x i8> %6, <16 x i8>* %1, align 8 + store <16 x i8> %6, ptr %1, align 8 ret void } -define arm_aapcs_vfpcc void @aese_via_call3(float %0, <16 x i8>* %1) nounwind { +define arm_aapcs_vfpcc void @aese_via_call3(float %0, ptr %1) nounwind { ; CHECK-FIX-LABEL: aese_via_call3: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: .save {r4, lr} @@ -96,14 +96,14 @@ define arm_aapcs_vfpcc void @aese_via_call3(float %0, <16 x i8>* %1) nounwind { ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4] ; CHECK-FIX-NEXT: pop {r4, pc} %3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf32(float %0) - %4 = load <16 x i8>, <16 x i8>* %1, align 8 + %4 = load <16 x i8>, ptr %1, align 8 %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4) %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5) - store <16 x i8> %6, <16 x i8>* %1, align 8 + store <16 x i8> %6, ptr %1, align 8 ret void } -define arm_aapcs_vfpcc void @aese_once_via_ptr(<16 x i8>* %0, <16 x i8>* %1) nounwind { +define arm_aapcs_vfpcc void @aese_once_via_ptr(ptr %0, ptr %1) nounwind { ; CHECK-FIX-LABEL: aese_once_via_ptr: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0] @@ -112,11 +112,11 @@ define arm_aapcs_vfpcc void @aese_once_via_ptr(<16 x i8>* %0, <16 x i8>* %1) nou ; CHECK-FIX-NEXT: aesmc.8 q8, q9 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] ; CHECK-FIX-NEXT: bx lr - %3 = load <16 x i8>, <16 x i8>* %1, align 8 - %4 = load <16 x i8>, <16 x i8>* %0, align 8 + %3 = load <16 x i8>, ptr %1, align 8 + %4 = load <16 x i8>, ptr %0, align 8 %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4) %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5) - store <16 x i8> %6, <16 x i8>* %1, align 8 + store <16 x i8> %6, ptr %1, align 8 ret void } @@ -133,7 +133,7 @@ define arm_aapcs_vfpcc <16 x i8> @aese_once_via_val(<16 x i8> %0, <16 x i8> %1) ret <16 x i8> %4 } -define arm_aapcs_vfpcc void @aese_twice_via_ptr(<16 x i8>* %0, <16 x i8>* %1) nounwind { +define arm_aapcs_vfpcc void @aese_twice_via_ptr(ptr %0, ptr %1) nounwind { ; CHECK-FIX-LABEL: aese_twice_via_ptr: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0] @@ -146,15 +146,15 @@ define arm_aapcs_vfpcc void @aese_twice_via_ptr(<16 x i8>* %0, <16 x i8>* %1) no ; CHECK-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] ; CHECK-FIX-NEXT: bx lr - %3 = load <16 x i8>, <16 x i8>* %1, align 8 - %4 = load <16 x i8>, <16 x i8>* %0, align 8 + %3 = load <16 x i8>, ptr %1, align 8 + %4 = load <16 x i8>, ptr %0, align 8 %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4) %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5) - store <16 x i8> %6, <16 x i8>* %1, align 8 - %7 = load <16 x i8>, <16 x i8>* %0, align 8 + store <16 x i8> %6, ptr %1, align 8 + %7 = load <16 x i8>, ptr %0, align 8 %8 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %6, <16 x i8> %7) %9 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %8) - store <16 x i8> %9, <16 x i8>* %1, align 8 + store <16 x i8> %9, ptr %1, align 8 ret void } @@ -176,7 +176,7 @@ define arm_aapcs_vfpcc <16 x i8> @aese_twice_via_val(<16 x i8> %0, <16 x i8> %1) ret <16 x i8> %6 } -define arm_aapcs_vfpcc void @aese_loop_via_ptr(i32 %0, <16 x i8>* %1, <16 x i8>* %2) nounwind { +define arm_aapcs_vfpcc void @aese_loop_via_ptr(i32 %0, ptr %1, ptr %2) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aese_loop_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 @@ -214,11 +214,11 @@ define arm_aapcs_vfpcc void @aese_loop_via_ptr(i32 %0, <16 x i8>* %1, <16 x i8>* 6: %7 = phi i32 [ %12, %6 ], [ 0, %3 ] - %8 = load <16 x i8>, <16 x i8>* %2, align 8 - %9 = load <16 x i8>, <16 x i8>* %1, align 8 + %8 = load <16 x i8>, ptr %2, align 8 + %9 = load <16 x i8>, ptr %1, align 8 %10 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %9) %11 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %10) - store <16 x i8> %11, <16 x i8>* %2, align 8 + store <16 x i8> %11, ptr %2, align 8 %12 = add nuw i32 %7, 1 %13 = icmp eq i32 %12, %0 br i1 %13, label %5, label %6 @@ -256,7 +256,7 @@ define arm_aapcs_vfpcc <16 x i8> @aese_loop_via_val(i32 %0, <16 x i8> %1, <16 x br i1 %13, label %5, label %7 } -define arm_aapcs_vfpcc void @aese_set8_via_ptr(i8* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { +define arm_aapcs_vfpcc void @aese_set8_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aese_set8_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 @@ -280,17 +280,17 @@ define arm_aapcs_vfpcc void @aese_set8_via_ptr(i8* %0, <16 x i8> %1, <16 x i8>* ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] ; CHECK-CORTEX-FIX-NEXT: bx lr - %4 = load i8, i8* %0, align 1 - %5 = load <16 x i8>, <16 x i8>* %2, align 8 + %4 = load i8, ptr %0, align 1 + %5 = load <16 x i8>, ptr %2, align 8 %6 = insertelement <16 x i8> %5, i8 %4, i64 0 %7 = insertelement <16 x i8> %1, i8 %4, i64 0 %8 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %6, <16 x i8> %7) %9 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %8) - store <16 x i8> %9, <16 x i8>* %2, align 8 + store <16 x i8> %9, ptr %2, align 8 ret void } -define arm_aapcs_vfpcc void @aese_set8_via_val(i8 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind { +define arm_aapcs_vfpcc void @aese_set8_via_val(i8 zeroext %0, <16 x i8> %1, ptr %2) nounwind { ; CHECK-FIX-LABEL: aese_set8_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -301,16 +301,16 @@ define arm_aapcs_vfpcc void @aese_set8_via_val(i8 zeroext %0, <16 x i8> %1, <16 ; CHECK-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] ; CHECK-FIX-NEXT: bx lr - %4 = load <16 x i8>, <16 x i8>* %2, align 8 + %4 = load <16 x i8>, ptr %2, align 8 %5 = insertelement <16 x i8> %4, i8 %0, i64 0 %6 = insertelement <16 x i8> %1, i8 %0, i64 0 %7 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %5, <16 x i8> %6) %8 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %7) - store <16 x i8> %8, <16 x i8>* %2, align 8 + store <16 x i8> %8, ptr %2, align 8 ret void } -define arm_aapcs_vfpcc void @aese_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aese_set8_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aese_set8_cond_via_ptr: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -336,13 +336,13 @@ define arm_aapcs_vfpcc void @aese_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x br i1 %0, label %5, label %9 5: - %6 = load i8, i8* %1, align 1 - %7 = load <16 x i8>, <16 x i8>* %3, align 8 + %6 = load i8, ptr %1, align 1 + %7 = load <16 x i8>, ptr %3, align 8 %8 = insertelement <16 x i8> %7, i8 %6, i64 0 br label %11 9: - %10 = load <16 x i8>, <16 x i8>* %3, align 8 + %10 = load <16 x i8>, ptr %3, align 8 br label %11 11: @@ -350,7 +350,7 @@ define arm_aapcs_vfpcc void @aese_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x br i1 %0, label %13, label %16 13: - %14 = load i8, i8* %1, align 1 + %14 = load i8, ptr %1, align 1 %15 = insertelement <16 x i8> %2, i8 %14, i64 0 br label %16 @@ -358,11 +358,11 @@ define arm_aapcs_vfpcc void @aese_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x %17 = phi <16 x i8> [ %15, %13 ], [ %2, %11 ] %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %17) %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18) - store <16 x i8> %19, <16 x i8>* %3, align 8 + store <16 x i8> %19, ptr %3, align 8 ret void } -define arm_aapcs_vfpcc void @aese_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aese_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aese_set8_cond_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -381,18 +381,18 @@ define arm_aapcs_vfpcc void @aese_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1 ; CHECK-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: bx lr - %5 = load <16 x i8>, <16 x i8>* %3, align 8 + %5 = load <16 x i8>, ptr %3, align 8 %6 = insertelement <16 x i8> %5, i8 %1, i64 0 %7 = select i1 %0, <16 x i8> %6, <16 x i8> %5 %8 = insertelement <16 x i8> %2, i8 %1, i64 0 %9 = select i1 %0, <16 x i8> %8, <16 x i8> %2 %10 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %9) %11 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %10) - store <16 x i8> %11, <16 x i8>* %3, align 8 + store <16 x i8> %11, ptr %3, align 8 ret void } -define arm_aapcs_vfpcc void @aese_set8_loop_via_ptr(i32 %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aese_set8_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aese_set8_loop_via_ptr: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -411,19 +411,19 @@ define arm_aapcs_vfpcc void @aese_set8_loop_via_ptr(i32 %0, i8* %1, <16 x i8> %2 ; CHECK-FIX-NEXT: @ %bb.3: ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: bx lr - %5 = load i8, i8* %1, align 1 + %5 = load i8, ptr %1, align 1 %6 = insertelement <16 x i8> %2, i8 %5, i64 0 - %7 = getelementptr inbounds <16 x i8>, <16 x i8>* %3, i32 0, i32 0 - store i8 %5, i8* %7, align 8 + %7 = getelementptr inbounds <16 x i8>, ptr %3, i32 0, i32 0 + store i8 %5, ptr %7, align 8 %8 = icmp eq i32 %0, 0 br i1 %8, label %12, label %9 9: - %10 = load <16 x i8>, <16 x i8>* %3, align 8 + %10 = load <16 x i8>, ptr %3, align 8 br label %13 11: - store <16 x i8> %17, <16 x i8>* %3, align 8 + store <16 x i8> %17, ptr %3, align 8 br label %12 12: @@ -439,7 +439,7 @@ define arm_aapcs_vfpcc void @aese_set8_loop_via_ptr(i32 %0, i8* %1, <16 x i8> %2 br i1 %19, label %11, label %13 } -define arm_aapcs_vfpcc void @aese_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aese_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aese_set8_loop_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -462,11 +462,11 @@ define arm_aapcs_vfpcc void @aese_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x 6: %7 = insertelement <16 x i8> %2, i8 %1, i64 0 - %8 = load <16 x i8>, <16 x i8>* %3, align 8 + %8 = load <16 x i8>, ptr %3, align 8 br label %11 9: - store <16 x i8> %16, <16 x i8>* %3, align 8 + store <16 x i8> %16, ptr %3, align 8 br label %10 10: @@ -483,7 +483,7 @@ define arm_aapcs_vfpcc void @aese_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x br i1 %18, label %9, label %11 } -define arm_aapcs_vfpcc void @aese_set16_via_ptr(i16* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { +define arm_aapcs_vfpcc void @aese_set16_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aese_set16_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 @@ -507,9 +507,9 @@ define arm_aapcs_vfpcc void @aese_set16_via_ptr(i16* %0, <16 x i8> %1, <16 x i8> ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] ; CHECK-CORTEX-FIX-NEXT: bx lr - %4 = load i16, i16* %0, align 2 - %5 = bitcast <16 x i8>* %2 to <8 x i16>* - %6 = load <8 x i16>, <8 x i16>* %5, align 8 + %4 = load i16, ptr %0, align 2 + %5 = bitcast ptr %2 to ptr + %6 = load <8 x i16>, ptr %5, align 8 %7 = insertelement <8 x i16> %6, i16 %4, i64 0 %8 = bitcast <8 x i16> %7 to <16 x i8> %9 = bitcast <16 x i8> %1 to <8 x i16> @@ -517,11 +517,11 @@ define arm_aapcs_vfpcc void @aese_set16_via_ptr(i16* %0, <16 x i8> %1, <16 x i8> %11 = bitcast <8 x i16> %10 to <16 x i8> %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11) %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12) - store <16 x i8> %13, <16 x i8>* %2, align 8 + store <16 x i8> %13, ptr %2, align 8 ret void } -define arm_aapcs_vfpcc void @aese_set16_via_val(i16 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind { +define arm_aapcs_vfpcc void @aese_set16_via_val(i16 zeroext %0, <16 x i8> %1, ptr %2) nounwind { ; CHECK-FIX-LABEL: aese_set16_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -532,8 +532,8 @@ define arm_aapcs_vfpcc void @aese_set16_via_val(i16 zeroext %0, <16 x i8> %1, <1 ; CHECK-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] ; CHECK-FIX-NEXT: bx lr - %4 = bitcast <16 x i8>* %2 to <8 x i16>* - %5 = load <8 x i16>, <8 x i16>* %4, align 8 + %4 = bitcast ptr %2 to ptr + %5 = load <8 x i16>, ptr %4, align 8 %6 = insertelement <8 x i16> %5, i16 %0, i64 0 %7 = bitcast <8 x i16> %6 to <16 x i8> %8 = bitcast <16 x i8> %1 to <8 x i16> @@ -541,11 +541,11 @@ define arm_aapcs_vfpcc void @aese_set16_via_val(i16 zeroext %0, <16 x i8> %1, <1 %10 = bitcast <8 x i16> %9 to <16 x i8> %11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10) %12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11) - store <16 x i8> %12, <16 x i8>* %2, align 8 + store <16 x i8> %12, ptr %2, align 8 ret void } -define arm_aapcs_vfpcc void @aese_set16_cond_via_ptr(i1 zeroext %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aese_set16_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aese_set16_cond_via_ptr: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -571,15 +571,15 @@ define arm_aapcs_vfpcc void @aese_set16_cond_via_ptr(i1 zeroext %0, i16* %1, <16 br i1 %0, label %5, label %10 5: - %6 = load i16, i16* %1, align 2 - %7 = bitcast <16 x i8>* %3 to <8 x i16>* - %8 = load <8 x i16>, <8 x i16>* %7, align 8 + %6 = load i16, ptr %1, align 2 + %7 = bitcast ptr %3 to ptr + %8 = load <8 x i16>, ptr %7, align 8 %9 = insertelement <8 x i16> %8, i16 %6, i64 0 br label %13 10: - %11 = bitcast <16 x i8>* %3 to <8 x i16>* - %12 = load <8 x i16>, <8 x i16>* %11, align 8 + %11 = bitcast ptr %3 to ptr + %12 = load <8 x i16>, ptr %11, align 8 br label %13 13: @@ -587,7 +587,7 @@ define arm_aapcs_vfpcc void @aese_set16_cond_via_ptr(i1 zeroext %0, i16* %1, <16 br i1 %0, label %15, label %19 15: - %16 = load i16, i16* %1, align 2 + %16 = load i16, ptr %1, align 2 %17 = bitcast <16 x i8> %2 to <8 x i16> %18 = insertelement <8 x i16> %17, i16 %16, i64 0 br label %21 @@ -602,11 +602,11 @@ define arm_aapcs_vfpcc void @aese_set16_cond_via_ptr(i1 zeroext %0, i16* %1, <16 %24 = bitcast <8 x i16> %22 to <16 x i8> %25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24) %26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25) - store <16 x i8> %26, <16 x i8>* %3, align 8 + store <16 x i8> %26, ptr %3, align 8 ret void } -define arm_aapcs_vfpcc void @aese_set16_cond_via_val(i1 zeroext %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aese_set16_cond_via_val(i1 zeroext %0, i16 zeroext %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aese_set16_cond_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -625,8 +625,8 @@ define arm_aapcs_vfpcc void @aese_set16_cond_via_val(i1 zeroext %0, i16 zeroext ; CHECK-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: bx lr - %5 = bitcast <16 x i8>* %3 to <8 x i16>* - %6 = load <8 x i16>, <8 x i16>* %5, align 8 + %5 = bitcast ptr %3 to ptr + %6 = load <8 x i16>, ptr %5, align 8 %7 = insertelement <8 x i16> %6, i16 %1, i64 0 %8 = select i1 %0, <8 x i16> %7, <8 x i16> %6 %9 = bitcast <16 x i8> %2 to <8 x i16> @@ -636,11 +636,11 @@ define arm_aapcs_vfpcc void @aese_set16_cond_via_val(i1 zeroext %0, i16 zeroext %13 = bitcast <8 x i16> %11 to <16 x i8> %14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13) %15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14) - store <16 x i8> %15, <16 x i8>* %3, align 8 + store <16 x i8> %15, ptr %3, align 8 ret void } -define arm_aapcs_vfpcc void @aese_set16_loop_via_ptr(i32 %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aese_set16_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aese_set16_loop_via_ptr: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -659,21 +659,21 @@ define arm_aapcs_vfpcc void @aese_set16_loop_via_ptr(i32 %0, i16* %1, <16 x i8> ; CHECK-FIX-NEXT: @ %bb.3: ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: bx lr - %5 = load i16, i16* %1, align 2 + %5 = load i16, ptr %1, align 2 %6 = bitcast <16 x i8> %2 to <8 x i16> %7 = insertelement <8 x i16> %6, i16 %5, i64 0 %8 = bitcast <8 x i16> %7 to <16 x i8> - %9 = bitcast <16 x i8>* %3 to i16* - store i16 %5, i16* %9, align 8 + %9 = bitcast ptr %3 to ptr + store i16 %5, ptr %9, align 8 %10 = icmp eq i32 %0, 0 br i1 %10, label %14, label %11 11: - %12 = load <16 x i8>, <16 x i8>* %3, align 8 + %12 = load <16 x i8>, ptr %3, align 8 br label %15 13: - store <16 x i8> %19, <16 x i8>* %3, align 8 + store <16 x i8> %19, ptr %3, align 8 br label %14 14: @@ -689,7 +689,7 @@ define arm_aapcs_vfpcc void @aese_set16_loop_via_ptr(i32 %0, i16* %1, <16 x i8> br i1 %21, label %13, label %15 } -define arm_aapcs_vfpcc void @aese_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aese_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aese_set16_loop_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -714,8 +714,8 @@ define arm_aapcs_vfpcc void @aese_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 %7 = bitcast <16 x i8> %2 to <8 x i16> %8 = insertelement <8 x i16> %7, i16 %1, i64 0 %9 = bitcast <8 x i16> %8 to <16 x i8> - %10 = bitcast <16 x i8>* %3 to <8 x i16>* - %11 = bitcast <16 x i8>* %3 to i16* + %10 = bitcast ptr %3 to ptr + %11 = bitcast ptr %3 to ptr br label %13 12: @@ -723,19 +723,19 @@ define arm_aapcs_vfpcc void @aese_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 13: %14 = phi i32 [ 0, %6 ], [ %20, %13 ] - %15 = load <8 x i16>, <8 x i16>* %10, align 8 + %15 = load <8 x i16>, ptr %10, align 8 %16 = insertelement <8 x i16> %15, i16 %1, i64 0 %17 = bitcast <8 x i16> %16 to <16 x i8> - store i16 %1, i16* %11, align 8 + store i16 %1, ptr %11, align 8 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9) %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18) - store <16 x i8> %19, <16 x i8>* %3, align 8 + store <16 x i8> %19, ptr %3, align 8 %20 = add nuw i32 %14, 1 %21 = icmp eq i32 %20, %0 br i1 %21, label %12, label %13 } -define arm_aapcs_vfpcc void @aese_set32_via_ptr(i32* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { +define arm_aapcs_vfpcc void @aese_set32_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aese_set32_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 @@ -759,9 +759,9 @@ define arm_aapcs_vfpcc void @aese_set32_via_ptr(i32* %0, <16 x i8> %1, <16 x i8> ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] ; CHECK-CORTEX-FIX-NEXT: bx lr - %4 = load i32, i32* %0, align 4 - %5 = bitcast <16 x i8>* %2 to <4 x i32>* - %6 = load <4 x i32>, <4 x i32>* %5, align 8 + %4 = load i32, ptr %0, align 4 + %5 = bitcast ptr %2 to ptr + %6 = load <4 x i32>, ptr %5, align 8 %7 = insertelement <4 x i32> %6, i32 %4, i64 0 %8 = bitcast <4 x i32> %7 to <16 x i8> %9 = bitcast <16 x i8> %1 to <4 x i32> @@ -769,11 +769,11 @@ define arm_aapcs_vfpcc void @aese_set32_via_ptr(i32* %0, <16 x i8> %1, <16 x i8> %11 = bitcast <4 x i32> %10 to <16 x i8> %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11) %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12) - store <16 x i8> %13, <16 x i8>* %2, align 8 + store <16 x i8> %13, ptr %2, align 8 ret void } -define arm_aapcs_vfpcc void @aese_set32_via_val(i32 %0, <16 x i8> %1, <16 x i8>* %2) nounwind { +define arm_aapcs_vfpcc void @aese_set32_via_val(i32 %0, <16 x i8> %1, ptr %2) nounwind { ; CHECK-FIX-LABEL: aese_set32_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -784,8 +784,8 @@ define arm_aapcs_vfpcc void @aese_set32_via_val(i32 %0, <16 x i8> %1, <16 x i8>* ; CHECK-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] ; CHECK-FIX-NEXT: bx lr - %4 = bitcast <16 x i8>* %2 to <4 x i32>* - %5 = load <4 x i32>, <4 x i32>* %4, align 8 + %4 = bitcast ptr %2 to ptr + %5 = load <4 x i32>, ptr %4, align 8 %6 = insertelement <4 x i32> %5, i32 %0, i64 0 %7 = bitcast <4 x i32> %6 to <16 x i8> %8 = bitcast <16 x i8> %1 to <4 x i32> @@ -793,11 +793,11 @@ define arm_aapcs_vfpcc void @aese_set32_via_val(i32 %0, <16 x i8> %1, <16 x i8>* %10 = bitcast <4 x i32> %9 to <16 x i8> %11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10) %12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11) - store <16 x i8> %12, <16 x i8>* %2, align 8 + store <16 x i8> %12, ptr %2, align 8 ret void } -define arm_aapcs_vfpcc void @aese_set32_cond_via_ptr(i1 zeroext %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aese_set32_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aese_set32_cond_via_ptr: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -823,15 +823,15 @@ define arm_aapcs_vfpcc void @aese_set32_cond_via_ptr(i1 zeroext %0, i32* %1, <16 br i1 %0, label %5, label %10 5: - %6 = load i32, i32* %1, align 4 - %7 = bitcast <16 x i8>* %3 to <4 x i32>* - %8 = load <4 x i32>, <4 x i32>* %7, align 8 + %6 = load i32, ptr %1, align 4 + %7 = bitcast ptr %3 to ptr + %8 = load <4 x i32>, ptr %7, align 8 %9 = insertelement <4 x i32> %8, i32 %6, i64 0 br label %13 10: - %11 = bitcast <16 x i8>* %3 to <4 x i32>* - %12 = load <4 x i32>, <4 x i32>* %11, align 8 + %11 = bitcast ptr %3 to ptr + %12 = load <4 x i32>, ptr %11, align 8 br label %13 13: @@ -839,7 +839,7 @@ define arm_aapcs_vfpcc void @aese_set32_cond_via_ptr(i1 zeroext %0, i32* %1, <16 br i1 %0, label %15, label %19 15: - %16 = load i32, i32* %1, align 4 + %16 = load i32, ptr %1, align 4 %17 = bitcast <16 x i8> %2 to <4 x i32> %18 = insertelement <4 x i32> %17, i32 %16, i64 0 br label %21 @@ -854,11 +854,11 @@ define arm_aapcs_vfpcc void @aese_set32_cond_via_ptr(i1 zeroext %0, i32* %1, <16 %24 = bitcast <4 x i32> %22 to <16 x i8> %25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24) %26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25) - store <16 x i8> %26, <16 x i8>* %3, align 8 + store <16 x i8> %26, ptr %3, align 8 ret void } -define arm_aapcs_vfpcc void @aese_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aese_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aese_set32_cond_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -877,8 +877,8 @@ define arm_aapcs_vfpcc void @aese_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 ; CHECK-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: bx lr - %5 = bitcast <16 x i8>* %3 to <4 x i32>* - %6 = load <4 x i32>, <4 x i32>* %5, align 8 + %5 = bitcast ptr %3 to ptr + %6 = load <4 x i32>, ptr %5, align 8 %7 = insertelement <4 x i32> %6, i32 %1, i64 0 %8 = select i1 %0, <4 x i32> %7, <4 x i32> %6 %9 = bitcast <16 x i8> %2 to <4 x i32> @@ -888,11 +888,11 @@ define arm_aapcs_vfpcc void @aese_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 %13 = bitcast <4 x i32> %11 to <16 x i8> %14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13) %15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14) - store <16 x i8> %15, <16 x i8>* %3, align 8 + store <16 x i8> %15, ptr %3, align 8 ret void } -define arm_aapcs_vfpcc void @aese_set32_loop_via_ptr(i32 %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aese_set32_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aese_set32_loop_via_ptr: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -911,21 +911,21 @@ define arm_aapcs_vfpcc void @aese_set32_loop_via_ptr(i32 %0, i32* %1, <16 x i8> ; CHECK-FIX-NEXT: @ %bb.3: ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: bx lr - %5 = load i32, i32* %1, align 4 + %5 = load i32, ptr %1, align 4 %6 = bitcast <16 x i8> %2 to <4 x i32> %7 = insertelement <4 x i32> %6, i32 %5, i64 0 %8 = bitcast <4 x i32> %7 to <16 x i8> - %9 = bitcast <16 x i8>* %3 to i32* - store i32 %5, i32* %9, align 8 + %9 = bitcast ptr %3 to ptr + store i32 %5, ptr %9, align 8 %10 = icmp eq i32 %0, 0 br i1 %10, label %14, label %11 11: - %12 = load <16 x i8>, <16 x i8>* %3, align 8 + %12 = load <16 x i8>, ptr %3, align 8 br label %15 13: - store <16 x i8> %19, <16 x i8>* %3, align 8 + store <16 x i8> %19, ptr %3, align 8 br label %14 14: @@ -941,7 +941,7 @@ define arm_aapcs_vfpcc void @aese_set32_loop_via_ptr(i32 %0, i32* %1, <16 x i8> br i1 %21, label %13, label %15 } -define arm_aapcs_vfpcc void @aese_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aese_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aese_set32_loop_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -966,8 +966,8 @@ define arm_aapcs_vfpcc void @aese_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> % %7 = bitcast <16 x i8> %2 to <4 x i32> %8 = insertelement <4 x i32> %7, i32 %1, i64 0 %9 = bitcast <4 x i32> %8 to <16 x i8> - %10 = bitcast <16 x i8>* %3 to <4 x i32>* - %11 = bitcast <16 x i8>* %3 to i32* + %10 = bitcast ptr %3 to ptr + %11 = bitcast ptr %3 to ptr br label %13 12: @@ -975,19 +975,19 @@ define arm_aapcs_vfpcc void @aese_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> % 13: %14 = phi i32 [ 0, %6 ], [ %20, %13 ] - %15 = load <4 x i32>, <4 x i32>* %10, align 8 + %15 = load <4 x i32>, ptr %10, align 8 %16 = insertelement <4 x i32> %15, i32 %1, i64 0 %17 = bitcast <4 x i32> %16 to <16 x i8> - store i32 %1, i32* %11, align 8 + store i32 %1, ptr %11, align 8 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9) %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18) - store <16 x i8> %19, <16 x i8>* %3, align 8 + store <16 x i8> %19, ptr %3, align 8 %20 = add nuw i32 %14, 1 %21 = icmp eq i32 %20, %0 br i1 %21, label %12, label %13 } -define arm_aapcs_vfpcc void @aese_set64_via_ptr(i64* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { +define arm_aapcs_vfpcc void @aese_set64_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aese_set64_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 @@ -1009,9 +1009,9 @@ define arm_aapcs_vfpcc void @aese_set64_via_ptr(i64* %0, <16 x i8> %1, <16 x i8> ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] ; CHECK-CORTEX-FIX-NEXT: bx lr - %4 = load i64, i64* %0, align 8 - %5 = bitcast <16 x i8>* %2 to <2 x i64>* - %6 = load <2 x i64>, <2 x i64>* %5, align 8 + %4 = load i64, ptr %0, align 8 + %5 = bitcast ptr %2 to ptr + %6 = load <2 x i64>, ptr %5, align 8 %7 = insertelement <2 x i64> %6, i64 %4, i64 0 %8 = bitcast <2 x i64> %7 to <16 x i8> %9 = bitcast <16 x i8> %1 to <2 x i64> @@ -1019,11 +1019,11 @@ define arm_aapcs_vfpcc void @aese_set64_via_ptr(i64* %0, <16 x i8> %1, <16 x i8> %11 = bitcast <2 x i64> %10 to <16 x i8> %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11) %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12) - store <16 x i8> %13, <16 x i8>* %2, align 8 + store <16 x i8> %13, ptr %2, align 8 ret void } -define arm_aapcs_vfpcc void @aese_set64_via_val(i64 %0, <16 x i8> %1, <16 x i8>* %2) nounwind { +define arm_aapcs_vfpcc void @aese_set64_via_val(i64 %0, <16 x i8> %1, ptr %2) nounwind { ; CHECK-FIX-LABEL: aese_set64_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -1036,8 +1036,8 @@ define arm_aapcs_vfpcc void @aese_set64_via_val(i64 %0, <16 x i8> %1, <16 x i8>* ; CHECK-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: bx lr - %4 = bitcast <16 x i8>* %2 to <2 x i64>* - %5 = load <2 x i64>, <2 x i64>* %4, align 8 + %4 = bitcast ptr %2 to ptr + %5 = load <2 x i64>, ptr %4, align 8 %6 = insertelement <2 x i64> %5, i64 %0, i64 0 %7 = bitcast <2 x i64> %6 to <16 x i8> %8 = bitcast <16 x i8> %1 to <2 x i64> @@ -1045,11 +1045,11 @@ define arm_aapcs_vfpcc void @aese_set64_via_val(i64 %0, <16 x i8> %1, <16 x i8>* %10 = bitcast <2 x i64> %9 to <16 x i8> %11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10) %12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11) - store <16 x i8> %12, <16 x i8>* %2, align 8 + store <16 x i8> %12, ptr %2, align 8 ret void } -define arm_aapcs_vfpcc void @aese_set64_cond_via_ptr(i1 zeroext %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aese_set64_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aese_set64_cond_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 @@ -1091,15 +1091,15 @@ define arm_aapcs_vfpcc void @aese_set64_cond_via_ptr(i1 zeroext %0, i64* %1, <16 br i1 %0, label %5, label %10 5: - %6 = load i64, i64* %1, align 8 - %7 = bitcast <16 x i8>* %3 to <2 x i64>* - %8 = load <2 x i64>, <2 x i64>* %7, align 8 + %6 = load i64, ptr %1, align 8 + %7 = bitcast ptr %3 to ptr + %8 = load <2 x i64>, ptr %7, align 8 %9 = insertelement <2 x i64> %8, i64 %6, i64 0 br label %13 10: - %11 = bitcast <16 x i8>* %3 to <2 x i64>* - %12 = load <2 x i64>, <2 x i64>* %11, align 8 + %11 = bitcast ptr %3 to ptr + %12 = load <2 x i64>, ptr %11, align 8 br label %13 13: @@ -1107,7 +1107,7 @@ define arm_aapcs_vfpcc void @aese_set64_cond_via_ptr(i1 zeroext %0, i64* %1, <16 br i1 %0, label %15, label %19 15: - %16 = load i64, i64* %1, align 8 + %16 = load i64, ptr %1, align 8 %17 = bitcast <16 x i8> %2 to <2 x i64> %18 = insertelement <2 x i64> %17, i64 %16, i64 0 br label %21 @@ -1122,11 +1122,11 @@ define arm_aapcs_vfpcc void @aese_set64_cond_via_ptr(i1 zeroext %0, i64* %1, <16 %24 = bitcast <2 x i64> %22 to <16 x i8> %25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24) %26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25) - store <16 x i8> %26, <16 x i8>* %3, align 8 + store <16 x i8> %26, ptr %3, align 8 ret void } -define arm_aapcs_vfpcc void @aese_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aese_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aese_set64_cond_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -1148,8 +1148,8 @@ define arm_aapcs_vfpcc void @aese_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 ; CHECK-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] ; CHECK-FIX-NEXT: bx lr - %5 = bitcast <16 x i8>* %3 to <2 x i64>* - %6 = load <2 x i64>, <2 x i64>* %5, align 8 + %5 = bitcast ptr %3 to ptr + %6 = load <2 x i64>, ptr %5, align 8 %7 = insertelement <2 x i64> %6, i64 %1, i64 0 %8 = select i1 %0, <2 x i64> %7, <2 x i64> %6 %9 = bitcast <16 x i8> %2 to <2 x i64> @@ -1159,11 +1159,11 @@ define arm_aapcs_vfpcc void @aese_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 %13 = bitcast <2 x i64> %11 to <16 x i8> %14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13) %15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14) - store <16 x i8> %15, <16 x i8>* %3, align 8 + store <16 x i8> %15, ptr %3, align 8 ret void } -define arm_aapcs_vfpcc void @aese_set64_loop_via_ptr(i32 %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aese_set64_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aese_set64_loop_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 @@ -1206,21 +1206,21 @@ define arm_aapcs_vfpcc void @aese_set64_loop_via_ptr(i32 %0, i64* %1, <16 x i8> ; CHECK-CORTEX-FIX-NEXT: @ %bb.3: ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2] ; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r11, pc} - %5 = load i64, i64* %1, align 8 + %5 = load i64, ptr %1, align 8 %6 = bitcast <16 x i8> %2 to <2 x i64> %7 = insertelement <2 x i64> %6, i64 %5, i64 0 %8 = bitcast <2 x i64> %7 to <16 x i8> - %9 = bitcast <16 x i8>* %3 to i64* - store i64 %5, i64* %9, align 8 + %9 = bitcast ptr %3 to ptr + store i64 %5, ptr %9, align 8 %10 = icmp eq i32 %0, 0 br i1 %10, label %14, label %11 11: - %12 = load <16 x i8>, <16 x i8>* %3, align 8 + %12 = load <16 x i8>, ptr %3, align 8 br label %15 13: - store <16 x i8> %19, <16 x i8>* %3, align 8 + store <16 x i8> %19, ptr %3, align 8 br label %14 14: @@ -1236,7 +1236,7 @@ define arm_aapcs_vfpcc void @aese_set64_loop_via_ptr(i32 %0, i64* %1, <16 x i8> br i1 %21, label %13, label %15 } -define arm_aapcs_vfpcc void @aese_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aese_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aese_set64_loop_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -1264,8 +1264,8 @@ define arm_aapcs_vfpcc void @aese_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> % %7 = bitcast <16 x i8> %2 to <2 x i64> %8 = insertelement <2 x i64> %7, i64 %1, i64 0 %9 = bitcast <2 x i64> %8 to <16 x i8> - %10 = bitcast <16 x i8>* %3 to <2 x i64>* - %11 = bitcast <16 x i8>* %3 to i64* + %10 = bitcast ptr %3 to ptr + %11 = bitcast ptr %3 to ptr br label %13 12: @@ -1273,19 +1273,19 @@ define arm_aapcs_vfpcc void @aese_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> % 13: %14 = phi i32 [ 0, %6 ], [ %20, %13 ] - %15 = load <2 x i64>, <2 x i64>* %10, align 8 + %15 = load <2 x i64>, ptr %10, align 8 %16 = insertelement <2 x i64> %15, i64 %1, i64 0 %17 = bitcast <2 x i64> %16 to <16 x i8> - store i64 %1, i64* %11, align 8 + store i64 %1, ptr %11, align 8 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9) %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18) - store <16 x i8> %19, <16 x i8>* %3, align 8 + store <16 x i8> %19, ptr %3, align 8 %20 = add nuw i32 %14, 1 %21 = icmp eq i32 %20, %0 br i1 %21, label %12, label %13 } -define arm_aapcs_vfpcc void @aese_setf16_via_ptr(half* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { +define arm_aapcs_vfpcc void @aese_setf16_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aese_setf16_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 @@ -1309,10 +1309,10 @@ define arm_aapcs_vfpcc void @aese_setf16_via_ptr(half* %0, <16 x i8> %1, <16 x i ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] ; CHECK-CORTEX-FIX-NEXT: bx lr - %4 = bitcast half* %0 to i16* - %5 = load i16, i16* %4, align 2 - %6 = bitcast <16 x i8>* %2 to <8 x i16>* - %7 = load <8 x i16>, <8 x i16>* %6, align 8 + %4 = bitcast ptr %0 to ptr + %5 = load i16, ptr %4, align 2 + %6 = bitcast ptr %2 to ptr + %7 = load <8 x i16>, ptr %6, align 8 %8 = insertelement <8 x i16> %7, i16 %5, i64 0 %9 = bitcast <8 x i16> %8 to <16 x i8> %10 = bitcast <16 x i8> %1 to <8 x i16> @@ -1320,11 +1320,11 @@ define arm_aapcs_vfpcc void @aese_setf16_via_ptr(half* %0, <16 x i8> %1, <16 x i %12 = bitcast <8 x i16> %11 to <16 x i8> %13 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %9, <16 x i8> %12) %14 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %13) - store <16 x i8> %14, <16 x i8>* %2, align 8 + store <16 x i8> %14, ptr %2, align 8 ret void } -define arm_aapcs_vfpcc void @aese_setf16_via_val(half %0, <16 x i8> %1, <16 x i8>* %2) nounwind { +define arm_aapcs_vfpcc void @aese_setf16_via_val(half %0, <16 x i8> %1, ptr %2) nounwind { ; CHECK-FIX-LABEL: aese_setf16_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q1, q1, q1 @@ -1336,8 +1336,8 @@ define arm_aapcs_vfpcc void @aese_setf16_via_val(half %0, <16 x i8> %1, <16 x i8 ; CHECK-FIX-NEXT: aesmc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0] ; CHECK-FIX-NEXT: bx lr - %4 = bitcast <16 x i8>* %2 to <8 x i16>* - %5 = load <8 x i16>, <8 x i16>* %4, align 8 + %4 = bitcast ptr %2 to ptr + %5 = load <8 x i16>, ptr %4, align 8 %6 = bitcast half %0 to i16 %7 = insertelement <8 x i16> %5, i16 %6, i64 0 %8 = bitcast <8 x i16> %7 to <16 x i8> @@ -1346,11 +1346,11 @@ define arm_aapcs_vfpcc void @aese_setf16_via_val(half %0, <16 x i8> %1, <16 x i8 %11 = bitcast <8 x i16> %10 to <16 x i8> %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11) %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12) - store <16 x i8> %13, <16 x i8>* %2, align 8 + store <16 x i8> %13, ptr %2, align 8 ret void } -define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aese_setf16_cond_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -1532,17 +1532,17 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, half* %1, < br i1 %0, label %5, label %12 5: - %6 = bitcast half* %1 to i16* - %7 = load i16, i16* %6, align 2 - %8 = bitcast <16 x i8>* %3 to <8 x i16>* - %9 = load <8 x i16>, <8 x i16>* %8, align 8 + %6 = bitcast ptr %1 to ptr + %7 = load i16, ptr %6, align 2 + %8 = bitcast ptr %3 to ptr + %9 = load <8 x i16>, ptr %8, align 8 %10 = insertelement <8 x i16> %9, i16 %7, i64 0 %11 = bitcast <8 x i16> %10 to <8 x half> br label %15 12: - %13 = bitcast <16 x i8>* %3 to <8 x half>* - %14 = load <8 x half>, <8 x half>* %13, align 8 + %13 = bitcast ptr %3 to ptr + %14 = load <8 x half>, ptr %13, align 8 br label %15 15: @@ -1550,8 +1550,8 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, half* %1, < br i1 %0, label %17, label %23 17: - %18 = bitcast half* %1 to i16* - %19 = load i16, i16* %18, align 2 + %18 = bitcast ptr %1 to ptr + %19 = load i16, ptr %18, align 2 %20 = bitcast <16 x i8> %2 to <8 x i16> %21 = insertelement <8 x i16> %20, i16 %19, i64 0 %22 = bitcast <8 x i16> %21 to <8 x half> @@ -1567,11 +1567,11 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, half* %1, < %28 = bitcast <8 x half> %26 to <16 x i8> %29 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %27, <16 x i8> %28) %30 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %29) - store <16 x i8> %30, <16 x i8>* %3, align 8 + store <16 x i8> %30, ptr %3, align 8 ret void } -define arm_aapcs_vfpcc void @aese_setf16_cond_via_val(i1 zeroext %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aese_setf16_cond_via_val(i1 zeroext %0, half %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aese_setf16_cond_via_val: ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -1753,16 +1753,16 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_val(i1 zeroext %0, half %1, <1 br i1 %0, label %5, label %11 5: - %6 = bitcast <16 x i8>* %3 to <8 x i16>* - %7 = load <8 x i16>, <8 x i16>* %6, align 8 + %6 = bitcast ptr %3 to ptr + %7 = load <8 x i16>, ptr %6, align 8 %8 = bitcast half %1 to i16 %9 = insertelement <8 x i16> %7, i16 %8, i64 0 %10 = bitcast <8 x i16> %9 to <8 x half> br label %14 11: - %12 = bitcast <16 x i8>* %3 to <8 x half>* - %13 = load <8 x half>, <8 x half>* %12, align 8 + %12 = bitcast ptr %3 to ptr + %13 = load <8 x half>, ptr %12, align 8 br label %14 14: @@ -1786,11 +1786,11 @@ define arm_aapcs_vfpcc void @aese_setf16_cond_via_val(i1 zeroext %0, half %1, <1 %26 = bitcast <8 x half> %24 to <16 x i8> %27 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %25, <16 x i8> %26) %28 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %27) - store <16 x i8> %28, <16 x i8>* %3, align 8 + store <16 x i8> %28, ptr %3, align 8 ret void } -define arm_aapcs_vfpcc void @aese_setf16_loop_via_ptr(i32 %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aese_setf16_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aese_setf16_loop_via_ptr: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -1809,22 +1809,22 @@ define arm_aapcs_vfpcc void @aese_setf16_loop_via_ptr(i32 %0, half* %1, <16 x i8 ; CHECK-FIX-NEXT: @ %bb.3: ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: bx lr - %5 = bitcast half* %1 to i16* - %6 = load i16, i16* %5, align 2 + %5 = bitcast ptr %1 to ptr + %6 = load i16, ptr %5, align 2 %7 = bitcast <16 x i8> %2 to <8 x i16> %8 = insertelement <8 x i16> %7, i16 %6, i64 0 %9 = bitcast <8 x i16> %8 to <16 x i8> - %10 = bitcast <16 x i8>* %3 to i16* - store i16 %6, i16* %10, align 8 + %10 = bitcast ptr %3 to ptr + store i16 %6, ptr %10, align 8 %11 = icmp eq i32 %0, 0 br i1 %11, label %15, label %12 12: - %13 = load <16 x i8>, <16 x i8>* %3, align 8 + %13 = load <16 x i8>, ptr %3, align 8 br label %16 14: - store <16 x i8> %20, <16 x i8>* %3, align 8 + store <16 x i8> %20, ptr %3, align 8 br label %15 15: @@ -1840,7 +1840,7 @@ define arm_aapcs_vfpcc void @aese_setf16_loop_via_ptr(i32 %0, half* %1, <16 x i8 br i1 %22, label %14, label %16 } -define arm_aapcs_vfpcc void @aese_setf16_loop_via_val(i32 %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aese_setf16_loop_via_val(i32 %0, half %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aese_setf16_loop_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q1, q1, q1 @@ -1867,8 +1867,8 @@ define arm_aapcs_vfpcc void @aese_setf16_loop_via_val(i32 %0, half %1, <16 x i8> %8 = bitcast half %1 to i16 %9 = insertelement <8 x i16> %7, i16 %8, i64 0 %10 = bitcast <8 x i16> %9 to <16 x i8> - %11 = bitcast <16 x i8>* %3 to <8 x i16>* - %12 = bitcast <16 x i8>* %3 to half* + %11 = bitcast ptr %3 to ptr + %12 = bitcast ptr %3 to ptr br label %14 13: @@ -1876,19 +1876,19 @@ define arm_aapcs_vfpcc void @aese_setf16_loop_via_val(i32 %0, half %1, <16 x i8> 14: %15 = phi i32 [ 0, %6 ], [ %21, %14 ] - %16 = load <8 x i16>, <8 x i16>* %11, align 8 + %16 = load <8 x i16>, ptr %11, align 8 %17 = insertelement <8 x i16> %16, i16 %8, i64 0 %18 = bitcast <8 x i16> %17 to <16 x i8> - store half %1, half* %12, align 8 + store half %1, ptr %12, align 8 %19 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %18, <16 x i8> %10) %20 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %19) - store <16 x i8> %20, <16 x i8>* %3, align 8 + store <16 x i8> %20, ptr %3, align 8 %21 = add nuw i32 %15, 1 %22 = icmp eq i32 %21, %0 br i1 %22, label %13, label %14 } -define arm_aapcs_vfpcc void @aese_setf32_via_ptr(float* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { +define arm_aapcs_vfpcc void @aese_setf32_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind { ; CHECK-FIX-LABEL: aese_setf32_via_ptr: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vldr s0, [r0] @@ -1900,9 +1900,9 @@ define arm_aapcs_vfpcc void @aese_setf32_via_ptr(float* %0, <16 x i8> %1, <16 x ; CHECK-FIX-NEXT: aesmc.8 q8, q1 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] ; CHECK-FIX-NEXT: bx lr - %4 = load float, float* %0, align 4 - %5 = bitcast <16 x i8>* %2 to <4 x float>* - %6 = load <4 x float>, <4 x float>* %5, align 8 + %4 = load float, ptr %0, align 4 + %5 = bitcast ptr %2 to ptr + %6 = load <4 x float>, ptr %5, align 8 %7 = insertelement <4 x float> %6, float %4, i64 0 %8 = bitcast <4 x float> %7 to <16 x i8> %9 = bitcast <16 x i8> %1 to <4 x float> @@ -1910,11 +1910,11 @@ define arm_aapcs_vfpcc void @aese_setf32_via_ptr(float* %0, <16 x i8> %1, <16 x %11 = bitcast <4 x float> %10 to <16 x i8> %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11) %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12) - store <16 x i8> %13, <16 x i8>* %2, align 8 + store <16 x i8> %13, ptr %2, align 8 ret void } -define arm_aapcs_vfpcc void @aese_setf32_via_val(float %0, <16 x i8> %1, <16 x i8>* %2) nounwind { +define arm_aapcs_vfpcc void @aese_setf32_via_val(float %0, <16 x i8> %1, ptr %2) nounwind { ; CHECK-FIX-LABEL: aese_setf32_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vmov.f32 s4, s0 @@ -1926,8 +1926,8 @@ define arm_aapcs_vfpcc void @aese_setf32_via_val(float %0, <16 x i8> %1, <16 x i ; CHECK-FIX-NEXT: aesmc.8 q8, q0 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0] ; CHECK-FIX-NEXT: bx lr - %4 = bitcast <16 x i8>* %2 to <4 x float>* - %5 = load <4 x float>, <4 x float>* %4, align 8 + %4 = bitcast ptr %2 to ptr + %5 = load <4 x float>, ptr %4, align 8 %6 = insertelement <4 x float> %5, float %0, i64 0 %7 = bitcast <4 x float> %6 to <16 x i8> %8 = bitcast <16 x i8> %1 to <4 x float> @@ -1935,11 +1935,11 @@ define arm_aapcs_vfpcc void @aese_setf32_via_val(float %0, <16 x i8> %1, <16 x i %10 = bitcast <4 x float> %9 to <16 x i8> %11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10) %12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11) - store <16 x i8> %12, <16 x i8>* %2, align 8 + store <16 x i8> %12, ptr %2, align 8 ret void } -define arm_aapcs_vfpcc void @aese_setf32_cond_via_ptr(i1 zeroext %0, float* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aese_setf32_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aese_setf32_cond_via_ptr: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -1965,15 +1965,15 @@ define arm_aapcs_vfpcc void @aese_setf32_cond_via_ptr(i1 zeroext %0, float* %1, br i1 %0, label %5, label %10 5: - %6 = load float, float* %1, align 4 - %7 = bitcast <16 x i8>* %3 to <4 x float>* - %8 = load <4 x float>, <4 x float>* %7, align 8 + %6 = load float, ptr %1, align 4 + %7 = bitcast ptr %3 to ptr + %8 = load <4 x float>, ptr %7, align 8 %9 = insertelement <4 x float> %8, float %6, i64 0 br label %13 10: - %11 = bitcast <16 x i8>* %3 to <4 x float>* - %12 = load <4 x float>, <4 x float>* %11, align 8 + %11 = bitcast ptr %3 to ptr + %12 = load <4 x float>, ptr %11, align 8 br label %13 13: @@ -1981,7 +1981,7 @@ define arm_aapcs_vfpcc void @aese_setf32_cond_via_ptr(i1 zeroext %0, float* %1, br i1 %0, label %15, label %19 15: - %16 = load float, float* %1, align 4 + %16 = load float, ptr %1, align 4 %17 = bitcast <16 x i8> %2 to <4 x float> %18 = insertelement <4 x float> %17, float %16, i64 0 br label %21 @@ -1996,11 +1996,11 @@ define arm_aapcs_vfpcc void @aese_setf32_cond_via_ptr(i1 zeroext %0, float* %1, %24 = bitcast <4 x float> %22 to <16 x i8> %25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24) %26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25) - store <16 x i8> %26, <16 x i8>* %3, align 8 + store <16 x i8> %26, ptr %3, align 8 ret void } -define arm_aapcs_vfpcc void @aese_setf32_cond_via_val(i1 zeroext %0, float %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aese_setf32_cond_via_val(i1 zeroext %0, float %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aese_setf32_cond_via_val: ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1] @@ -2028,8 +2028,8 @@ define arm_aapcs_vfpcc void @aese_setf32_cond_via_val(i1 zeroext %0, float %1, < ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q2 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] ; CHECK-CORTEX-FIX-NEXT: bx lr - %5 = bitcast <16 x i8>* %3 to <4 x float>* - %6 = load <4 x float>, <4 x float>* %5, align 8 + %5 = bitcast ptr %3 to ptr + %6 = load <4 x float>, ptr %5, align 8 %7 = insertelement <4 x float> %6, float %1, i64 0 %8 = select i1 %0, <4 x float> %7, <4 x float> %6 %9 = bitcast <16 x i8> %2 to <4 x float> @@ -2039,11 +2039,11 @@ define arm_aapcs_vfpcc void @aese_setf32_cond_via_val(i1 zeroext %0, float %1, < %13 = bitcast <4 x float> %11 to <16 x i8> %14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13) %15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14) - store <16 x i8> %15, <16 x i8>* %3, align 8 + store <16 x i8> %15, ptr %3, align 8 ret void } -define arm_aapcs_vfpcc void @aese_setf32_loop_via_ptr(i32 %0, float* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aese_setf32_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aese_setf32_loop_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: vldr s4, [r1] @@ -2081,21 +2081,21 @@ define arm_aapcs_vfpcc void @aese_setf32_loop_via_ptr(i32 %0, float* %1, <16 x i ; CHECK-CORTEX-FIX-NEXT: @ %bb.3: ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2] ; CHECK-CORTEX-FIX-NEXT: bx lr - %5 = load float, float* %1, align 4 + %5 = load float, ptr %1, align 4 %6 = bitcast <16 x i8> %2 to <4 x float> %7 = insertelement <4 x float> %6, float %5, i64 0 %8 = bitcast <4 x float> %7 to <16 x i8> - %9 = bitcast <16 x i8>* %3 to float* - store float %5, float* %9, align 8 + %9 = bitcast ptr %3 to ptr + store float %5, ptr %9, align 8 %10 = icmp eq i32 %0, 0 br i1 %10, label %14, label %11 11: - %12 = load <16 x i8>, <16 x i8>* %3, align 8 + %12 = load <16 x i8>, ptr %3, align 8 br label %15 13: - store <16 x i8> %19, <16 x i8>* %3, align 8 + store <16 x i8> %19, ptr %3, align 8 br label %14 14: @@ -2111,7 +2111,7 @@ define arm_aapcs_vfpcc void @aese_setf32_loop_via_ptr(i32 %0, float* %1, <16 x i br i1 %21, label %13, label %15 } -define arm_aapcs_vfpcc void @aese_setf32_loop_via_val(i32 %0, float %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aese_setf32_loop_via_val(i32 %0, float %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aese_setf32_loop_via_val: ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 @@ -2156,8 +2156,8 @@ define arm_aapcs_vfpcc void @aese_setf32_loop_via_val(i32 %0, float %1, <16 x i8 %7 = bitcast <16 x i8> %2 to <4 x float> %8 = insertelement <4 x float> %7, float %1, i64 0 %9 = bitcast <4 x float> %8 to <16 x i8> - %10 = bitcast <16 x i8>* %3 to <4 x float>* - %11 = bitcast <16 x i8>* %3 to float* + %10 = bitcast ptr %3 to ptr + %11 = bitcast ptr %3 to ptr br label %13 12: @@ -2165,19 +2165,19 @@ define arm_aapcs_vfpcc void @aese_setf32_loop_via_val(i32 %0, float %1, <16 x i8 13: %14 = phi i32 [ 0, %6 ], [ %20, %13 ] - %15 = load <4 x float>, <4 x float>* %10, align 8 + %15 = load <4 x float>, ptr %10, align 8 %16 = insertelement <4 x float> %15, float %1, i64 0 %17 = bitcast <4 x float> %16 to <16 x i8> - store float %1, float* %11, align 8 + store float %1, ptr %11, align 8 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9) %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18) - store <16 x i8> %19, <16 x i8>* %3, align 8 + store <16 x i8> %19, ptr %3, align 8 %20 = add nuw i32 %14, 1 %21 = icmp eq i32 %20, %0 br i1 %21, label %12, label %13 } -define arm_aapcs_vfpcc void @aesd_zero(<16 x i8>* %0) nounwind { +define arm_aapcs_vfpcc void @aesd_zero(ptr %0) nounwind { ; CHECK-FIX-LABEL: aesd_zero: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0] @@ -2186,14 +2186,14 @@ define arm_aapcs_vfpcc void @aesd_zero(<16 x i8>* %0) nounwind { ; CHECK-FIX-NEXT: aesimc.8 q8, q9 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0] ; CHECK-FIX-NEXT: bx lr - %2 = load <16 x i8>, <16 x i8>* %0, align 8 + %2 = load <16 x i8>, ptr %0, align 8 %3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> zeroinitializer, <16 x i8> %2) %4 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %3) - store <16 x i8> %4, <16 x i8>* %0, align 8 + store <16 x i8> %4, ptr %0, align 8 ret void } -define arm_aapcs_vfpcc void @aesd_via_call1(<16 x i8>* %0) nounwind { +define arm_aapcs_vfpcc void @aesd_via_call1(ptr %0) nounwind { ; CHECK-FIX-LABEL: aesd_via_call1: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: .save {r4, lr} @@ -2207,14 +2207,14 @@ define arm_aapcs_vfpcc void @aesd_via_call1(<16 x i8>* %0) nounwind { ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4] ; CHECK-FIX-NEXT: pop {r4, pc} %2 = call arm_aapcs_vfpcc <16 x i8> @get_input() - %3 = load <16 x i8>, <16 x i8>* %0, align 8 + %3 = load <16 x i8>, ptr %0, align 8 %4 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %2, <16 x i8> %3) %5 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %4) - store <16 x i8> %5, <16 x i8>* %0, align 8 + store <16 x i8> %5, ptr %0, align 8 ret void } -define arm_aapcs_vfpcc void @aesd_via_call2(half %0, <16 x i8>* %1) nounwind { +define arm_aapcs_vfpcc void @aesd_via_call2(half %0, ptr %1) nounwind { ; CHECK-FIX-LABEL: aesd_via_call2: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: .save {r4, lr} @@ -2228,14 +2228,14 @@ define arm_aapcs_vfpcc void @aesd_via_call2(half %0, <16 x i8>* %1) nounwind { ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4] ; CHECK-FIX-NEXT: pop {r4, pc} %3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf16(half %0) - %4 = load <16 x i8>, <16 x i8>* %1, align 8 + %4 = load <16 x i8>, ptr %1, align 8 %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4) %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5) - store <16 x i8> %6, <16 x i8>* %1, align 8 + store <16 x i8> %6, ptr %1, align 8 ret void } -define arm_aapcs_vfpcc void @aesd_via_call3(float %0, <16 x i8>* %1) nounwind { +define arm_aapcs_vfpcc void @aesd_via_call3(float %0, ptr %1) nounwind { ; CHECK-FIX-LABEL: aesd_via_call3: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: .save {r4, lr} @@ -2249,14 +2249,14 @@ define arm_aapcs_vfpcc void @aesd_via_call3(float %0, <16 x i8>* %1) nounwind { ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4] ; CHECK-FIX-NEXT: pop {r4, pc} %3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf32(float %0) - %4 = load <16 x i8>, <16 x i8>* %1, align 8 + %4 = load <16 x i8>, ptr %1, align 8 %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4) %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5) - store <16 x i8> %6, <16 x i8>* %1, align 8 + store <16 x i8> %6, ptr %1, align 8 ret void } -define arm_aapcs_vfpcc void @aesd_once_via_ptr(<16 x i8>* %0, <16 x i8>* %1) nounwind { +define arm_aapcs_vfpcc void @aesd_once_via_ptr(ptr %0, ptr %1) nounwind { ; CHECK-FIX-LABEL: aesd_once_via_ptr: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0] @@ -2265,11 +2265,11 @@ define arm_aapcs_vfpcc void @aesd_once_via_ptr(<16 x i8>* %0, <16 x i8>* %1) nou ; CHECK-FIX-NEXT: aesimc.8 q8, q9 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] ; CHECK-FIX-NEXT: bx lr - %3 = load <16 x i8>, <16 x i8>* %1, align 8 - %4 = load <16 x i8>, <16 x i8>* %0, align 8 + %3 = load <16 x i8>, ptr %1, align 8 + %4 = load <16 x i8>, ptr %0, align 8 %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4) %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5) - store <16 x i8> %6, <16 x i8>* %1, align 8 + store <16 x i8> %6, ptr %1, align 8 ret void } @@ -2286,7 +2286,7 @@ define arm_aapcs_vfpcc <16 x i8> @aesd_once_via_val(<16 x i8> %0, <16 x i8> %1) ret <16 x i8> %4 } -define arm_aapcs_vfpcc void @aesd_twice_via_ptr(<16 x i8>* %0, <16 x i8>* %1) nounwind { +define arm_aapcs_vfpcc void @aesd_twice_via_ptr(ptr %0, ptr %1) nounwind { ; CHECK-FIX-LABEL: aesd_twice_via_ptr: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0] @@ -2299,15 +2299,15 @@ define arm_aapcs_vfpcc void @aesd_twice_via_ptr(<16 x i8>* %0, <16 x i8>* %1) no ; CHECK-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] ; CHECK-FIX-NEXT: bx lr - %3 = load <16 x i8>, <16 x i8>* %1, align 8 - %4 = load <16 x i8>, <16 x i8>* %0, align 8 + %3 = load <16 x i8>, ptr %1, align 8 + %4 = load <16 x i8>, ptr %0, align 8 %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4) %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5) - store <16 x i8> %6, <16 x i8>* %1, align 8 - %7 = load <16 x i8>, <16 x i8>* %0, align 8 + store <16 x i8> %6, ptr %1, align 8 + %7 = load <16 x i8>, ptr %0, align 8 %8 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %6, <16 x i8> %7) %9 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %8) - store <16 x i8> %9, <16 x i8>* %1, align 8 + store <16 x i8> %9, ptr %1, align 8 ret void } @@ -2329,7 +2329,7 @@ define arm_aapcs_vfpcc <16 x i8> @aesd_twice_via_val(<16 x i8> %0, <16 x i8> %1) ret <16 x i8> %6 } -define arm_aapcs_vfpcc void @aesd_loop_via_ptr(i32 %0, <16 x i8>* %1, <16 x i8>* %2) nounwind { +define arm_aapcs_vfpcc void @aesd_loop_via_ptr(i32 %0, ptr %1, ptr %2) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aesd_loop_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 @@ -2367,11 +2367,11 @@ define arm_aapcs_vfpcc void @aesd_loop_via_ptr(i32 %0, <16 x i8>* %1, <16 x i8>* 6: %7 = phi i32 [ %12, %6 ], [ 0, %3 ] - %8 = load <16 x i8>, <16 x i8>* %2, align 8 - %9 = load <16 x i8>, <16 x i8>* %1, align 8 + %8 = load <16 x i8>, ptr %2, align 8 + %9 = load <16 x i8>, ptr %1, align 8 %10 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %9) %11 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %10) - store <16 x i8> %11, <16 x i8>* %2, align 8 + store <16 x i8> %11, ptr %2, align 8 %12 = add nuw i32 %7, 1 %13 = icmp eq i32 %12, %0 br i1 %13, label %5, label %6 @@ -2409,7 +2409,7 @@ define arm_aapcs_vfpcc <16 x i8> @aesd_loop_via_val(i32 %0, <16 x i8> %1, <16 x br i1 %13, label %5, label %7 } -define arm_aapcs_vfpcc void @aesd_set8_via_ptr(i8* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { +define arm_aapcs_vfpcc void @aesd_set8_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aesd_set8_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 @@ -2433,17 +2433,17 @@ define arm_aapcs_vfpcc void @aesd_set8_via_ptr(i8* %0, <16 x i8> %1, <16 x i8>* ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] ; CHECK-CORTEX-FIX-NEXT: bx lr - %4 = load i8, i8* %0, align 1 - %5 = load <16 x i8>, <16 x i8>* %2, align 8 + %4 = load i8, ptr %0, align 1 + %5 = load <16 x i8>, ptr %2, align 8 %6 = insertelement <16 x i8> %5, i8 %4, i64 0 %7 = insertelement <16 x i8> %1, i8 %4, i64 0 %8 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %6, <16 x i8> %7) %9 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %8) - store <16 x i8> %9, <16 x i8>* %2, align 8 + store <16 x i8> %9, ptr %2, align 8 ret void } -define arm_aapcs_vfpcc void @aesd_set8_via_val(i8 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind { +define arm_aapcs_vfpcc void @aesd_set8_via_val(i8 zeroext %0, <16 x i8> %1, ptr %2) nounwind { ; CHECK-FIX-LABEL: aesd_set8_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -2454,16 +2454,16 @@ define arm_aapcs_vfpcc void @aesd_set8_via_val(i8 zeroext %0, <16 x i8> %1, <16 ; CHECK-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] ; CHECK-FIX-NEXT: bx lr - %4 = load <16 x i8>, <16 x i8>* %2, align 8 + %4 = load <16 x i8>, ptr %2, align 8 %5 = insertelement <16 x i8> %4, i8 %0, i64 0 %6 = insertelement <16 x i8> %1, i8 %0, i64 0 %7 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %5, <16 x i8> %6) %8 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %7) - store <16 x i8> %8, <16 x i8>* %2, align 8 + store <16 x i8> %8, ptr %2, align 8 ret void } -define arm_aapcs_vfpcc void @aesd_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aesd_set8_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aesd_set8_cond_via_ptr: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -2489,13 +2489,13 @@ define arm_aapcs_vfpcc void @aesd_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x br i1 %0, label %5, label %9 5: - %6 = load i8, i8* %1, align 1 - %7 = load <16 x i8>, <16 x i8>* %3, align 8 + %6 = load i8, ptr %1, align 1 + %7 = load <16 x i8>, ptr %3, align 8 %8 = insertelement <16 x i8> %7, i8 %6, i64 0 br label %11 9: - %10 = load <16 x i8>, <16 x i8>* %3, align 8 + %10 = load <16 x i8>, ptr %3, align 8 br label %11 11: @@ -2503,7 +2503,7 @@ define arm_aapcs_vfpcc void @aesd_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x br i1 %0, label %13, label %16 13: - %14 = load i8, i8* %1, align 1 + %14 = load i8, ptr %1, align 1 %15 = insertelement <16 x i8> %2, i8 %14, i64 0 br label %16 @@ -2511,11 +2511,11 @@ define arm_aapcs_vfpcc void @aesd_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x %17 = phi <16 x i8> [ %15, %13 ], [ %2, %11 ] %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %17) %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18) - store <16 x i8> %19, <16 x i8>* %3, align 8 + store <16 x i8> %19, ptr %3, align 8 ret void } -define arm_aapcs_vfpcc void @aesd_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aesd_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aesd_set8_cond_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -2534,18 +2534,18 @@ define arm_aapcs_vfpcc void @aesd_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1 ; CHECK-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: bx lr - %5 = load <16 x i8>, <16 x i8>* %3, align 8 + %5 = load <16 x i8>, ptr %3, align 8 %6 = insertelement <16 x i8> %5, i8 %1, i64 0 %7 = select i1 %0, <16 x i8> %6, <16 x i8> %5 %8 = insertelement <16 x i8> %2, i8 %1, i64 0 %9 = select i1 %0, <16 x i8> %8, <16 x i8> %2 %10 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %9) %11 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %10) - store <16 x i8> %11, <16 x i8>* %3, align 8 + store <16 x i8> %11, ptr %3, align 8 ret void } -define arm_aapcs_vfpcc void @aesd_set8_loop_via_ptr(i32 %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aesd_set8_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aesd_set8_loop_via_ptr: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -2564,19 +2564,19 @@ define arm_aapcs_vfpcc void @aesd_set8_loop_via_ptr(i32 %0, i8* %1, <16 x i8> %2 ; CHECK-FIX-NEXT: @ %bb.3: ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: bx lr - %5 = load i8, i8* %1, align 1 + %5 = load i8, ptr %1, align 1 %6 = insertelement <16 x i8> %2, i8 %5, i64 0 - %7 = getelementptr inbounds <16 x i8>, <16 x i8>* %3, i32 0, i32 0 - store i8 %5, i8* %7, align 8 + %7 = getelementptr inbounds <16 x i8>, ptr %3, i32 0, i32 0 + store i8 %5, ptr %7, align 8 %8 = icmp eq i32 %0, 0 br i1 %8, label %12, label %9 9: - %10 = load <16 x i8>, <16 x i8>* %3, align 8 + %10 = load <16 x i8>, ptr %3, align 8 br label %13 11: - store <16 x i8> %17, <16 x i8>* %3, align 8 + store <16 x i8> %17, ptr %3, align 8 br label %12 12: @@ -2592,7 +2592,7 @@ define arm_aapcs_vfpcc void @aesd_set8_loop_via_ptr(i32 %0, i8* %1, <16 x i8> %2 br i1 %19, label %11, label %13 } -define arm_aapcs_vfpcc void @aesd_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aesd_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aesd_set8_loop_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -2615,11 +2615,11 @@ define arm_aapcs_vfpcc void @aesd_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x 6: %7 = insertelement <16 x i8> %2, i8 %1, i64 0 - %8 = load <16 x i8>, <16 x i8>* %3, align 8 + %8 = load <16 x i8>, ptr %3, align 8 br label %11 9: - store <16 x i8> %16, <16 x i8>* %3, align 8 + store <16 x i8> %16, ptr %3, align 8 br label %10 10: @@ -2636,7 +2636,7 @@ define arm_aapcs_vfpcc void @aesd_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x br i1 %18, label %9, label %11 } -define arm_aapcs_vfpcc void @aesd_set16_via_ptr(i16* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { +define arm_aapcs_vfpcc void @aesd_set16_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aesd_set16_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 @@ -2660,9 +2660,9 @@ define arm_aapcs_vfpcc void @aesd_set16_via_ptr(i16* %0, <16 x i8> %1, <16 x i8> ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] ; CHECK-CORTEX-FIX-NEXT: bx lr - %4 = load i16, i16* %0, align 2 - %5 = bitcast <16 x i8>* %2 to <8 x i16>* - %6 = load <8 x i16>, <8 x i16>* %5, align 8 + %4 = load i16, ptr %0, align 2 + %5 = bitcast ptr %2 to ptr + %6 = load <8 x i16>, ptr %5, align 8 %7 = insertelement <8 x i16> %6, i16 %4, i64 0 %8 = bitcast <8 x i16> %7 to <16 x i8> %9 = bitcast <16 x i8> %1 to <8 x i16> @@ -2670,11 +2670,11 @@ define arm_aapcs_vfpcc void @aesd_set16_via_ptr(i16* %0, <16 x i8> %1, <16 x i8> %11 = bitcast <8 x i16> %10 to <16 x i8> %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11) %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12) - store <16 x i8> %13, <16 x i8>* %2, align 8 + store <16 x i8> %13, ptr %2, align 8 ret void } -define arm_aapcs_vfpcc void @aesd_set16_via_val(i16 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind { +define arm_aapcs_vfpcc void @aesd_set16_via_val(i16 zeroext %0, <16 x i8> %1, ptr %2) nounwind { ; CHECK-FIX-LABEL: aesd_set16_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -2685,8 +2685,8 @@ define arm_aapcs_vfpcc void @aesd_set16_via_val(i16 zeroext %0, <16 x i8> %1, <1 ; CHECK-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] ; CHECK-FIX-NEXT: bx lr - %4 = bitcast <16 x i8>* %2 to <8 x i16>* - %5 = load <8 x i16>, <8 x i16>* %4, align 8 + %4 = bitcast ptr %2 to ptr + %5 = load <8 x i16>, ptr %4, align 8 %6 = insertelement <8 x i16> %5, i16 %0, i64 0 %7 = bitcast <8 x i16> %6 to <16 x i8> %8 = bitcast <16 x i8> %1 to <8 x i16> @@ -2694,11 +2694,11 @@ define arm_aapcs_vfpcc void @aesd_set16_via_val(i16 zeroext %0, <16 x i8> %1, <1 %10 = bitcast <8 x i16> %9 to <16 x i8> %11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10) %12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11) - store <16 x i8> %12, <16 x i8>* %2, align 8 + store <16 x i8> %12, ptr %2, align 8 ret void } -define arm_aapcs_vfpcc void @aesd_set16_cond_via_ptr(i1 zeroext %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aesd_set16_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aesd_set16_cond_via_ptr: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -2724,15 +2724,15 @@ define arm_aapcs_vfpcc void @aesd_set16_cond_via_ptr(i1 zeroext %0, i16* %1, <16 br i1 %0, label %5, label %10 5: - %6 = load i16, i16* %1, align 2 - %7 = bitcast <16 x i8>* %3 to <8 x i16>* - %8 = load <8 x i16>, <8 x i16>* %7, align 8 + %6 = load i16, ptr %1, align 2 + %7 = bitcast ptr %3 to ptr + %8 = load <8 x i16>, ptr %7, align 8 %9 = insertelement <8 x i16> %8, i16 %6, i64 0 br label %13 10: - %11 = bitcast <16 x i8>* %3 to <8 x i16>* - %12 = load <8 x i16>, <8 x i16>* %11, align 8 + %11 = bitcast ptr %3 to ptr + %12 = load <8 x i16>, ptr %11, align 8 br label %13 13: @@ -2740,7 +2740,7 @@ define arm_aapcs_vfpcc void @aesd_set16_cond_via_ptr(i1 zeroext %0, i16* %1, <16 br i1 %0, label %15, label %19 15: - %16 = load i16, i16* %1, align 2 + %16 = load i16, ptr %1, align 2 %17 = bitcast <16 x i8> %2 to <8 x i16> %18 = insertelement <8 x i16> %17, i16 %16, i64 0 br label %21 @@ -2755,11 +2755,11 @@ define arm_aapcs_vfpcc void @aesd_set16_cond_via_ptr(i1 zeroext %0, i16* %1, <16 %24 = bitcast <8 x i16> %22 to <16 x i8> %25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24) %26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25) - store <16 x i8> %26, <16 x i8>* %3, align 8 + store <16 x i8> %26, ptr %3, align 8 ret void } -define arm_aapcs_vfpcc void @aesd_set16_cond_via_val(i1 zeroext %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aesd_set16_cond_via_val(i1 zeroext %0, i16 zeroext %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aesd_set16_cond_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -2778,8 +2778,8 @@ define arm_aapcs_vfpcc void @aesd_set16_cond_via_val(i1 zeroext %0, i16 zeroext ; CHECK-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: bx lr - %5 = bitcast <16 x i8>* %3 to <8 x i16>* - %6 = load <8 x i16>, <8 x i16>* %5, align 8 + %5 = bitcast ptr %3 to ptr + %6 = load <8 x i16>, ptr %5, align 8 %7 = insertelement <8 x i16> %6, i16 %1, i64 0 %8 = select i1 %0, <8 x i16> %7, <8 x i16> %6 %9 = bitcast <16 x i8> %2 to <8 x i16> @@ -2789,11 +2789,11 @@ define arm_aapcs_vfpcc void @aesd_set16_cond_via_val(i1 zeroext %0, i16 zeroext %13 = bitcast <8 x i16> %11 to <16 x i8> %14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13) %15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14) - store <16 x i8> %15, <16 x i8>* %3, align 8 + store <16 x i8> %15, ptr %3, align 8 ret void } -define arm_aapcs_vfpcc void @aesd_set16_loop_via_ptr(i32 %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aesd_set16_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aesd_set16_loop_via_ptr: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -2812,21 +2812,21 @@ define arm_aapcs_vfpcc void @aesd_set16_loop_via_ptr(i32 %0, i16* %1, <16 x i8> ; CHECK-FIX-NEXT: @ %bb.3: ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: bx lr - %5 = load i16, i16* %1, align 2 + %5 = load i16, ptr %1, align 2 %6 = bitcast <16 x i8> %2 to <8 x i16> %7 = insertelement <8 x i16> %6, i16 %5, i64 0 %8 = bitcast <8 x i16> %7 to <16 x i8> - %9 = bitcast <16 x i8>* %3 to i16* - store i16 %5, i16* %9, align 8 + %9 = bitcast ptr %3 to ptr + store i16 %5, ptr %9, align 8 %10 = icmp eq i32 %0, 0 br i1 %10, label %14, label %11 11: - %12 = load <16 x i8>, <16 x i8>* %3, align 8 + %12 = load <16 x i8>, ptr %3, align 8 br label %15 13: - store <16 x i8> %19, <16 x i8>* %3, align 8 + store <16 x i8> %19, ptr %3, align 8 br label %14 14: @@ -2842,7 +2842,7 @@ define arm_aapcs_vfpcc void @aesd_set16_loop_via_ptr(i32 %0, i16* %1, <16 x i8> br i1 %21, label %13, label %15 } -define arm_aapcs_vfpcc void @aesd_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aesd_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aesd_set16_loop_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -2867,8 +2867,8 @@ define arm_aapcs_vfpcc void @aesd_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 %7 = bitcast <16 x i8> %2 to <8 x i16> %8 = insertelement <8 x i16> %7, i16 %1, i64 0 %9 = bitcast <8 x i16> %8 to <16 x i8> - %10 = bitcast <16 x i8>* %3 to <8 x i16>* - %11 = bitcast <16 x i8>* %3 to i16* + %10 = bitcast ptr %3 to ptr + %11 = bitcast ptr %3 to ptr br label %13 12: @@ -2876,19 +2876,19 @@ define arm_aapcs_vfpcc void @aesd_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 13: %14 = phi i32 [ 0, %6 ], [ %20, %13 ] - %15 = load <8 x i16>, <8 x i16>* %10, align 8 + %15 = load <8 x i16>, ptr %10, align 8 %16 = insertelement <8 x i16> %15, i16 %1, i64 0 %17 = bitcast <8 x i16> %16 to <16 x i8> - store i16 %1, i16* %11, align 8 + store i16 %1, ptr %11, align 8 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9) %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18) - store <16 x i8> %19, <16 x i8>* %3, align 8 + store <16 x i8> %19, ptr %3, align 8 %20 = add nuw i32 %14, 1 %21 = icmp eq i32 %20, %0 br i1 %21, label %12, label %13 } -define arm_aapcs_vfpcc void @aesd_set32_via_ptr(i32* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { +define arm_aapcs_vfpcc void @aesd_set32_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aesd_set32_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 @@ -2912,9 +2912,9 @@ define arm_aapcs_vfpcc void @aesd_set32_via_ptr(i32* %0, <16 x i8> %1, <16 x i8> ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] ; CHECK-CORTEX-FIX-NEXT: bx lr - %4 = load i32, i32* %0, align 4 - %5 = bitcast <16 x i8>* %2 to <4 x i32>* - %6 = load <4 x i32>, <4 x i32>* %5, align 8 + %4 = load i32, ptr %0, align 4 + %5 = bitcast ptr %2 to ptr + %6 = load <4 x i32>, ptr %5, align 8 %7 = insertelement <4 x i32> %6, i32 %4, i64 0 %8 = bitcast <4 x i32> %7 to <16 x i8> %9 = bitcast <16 x i8> %1 to <4 x i32> @@ -2922,11 +2922,11 @@ define arm_aapcs_vfpcc void @aesd_set32_via_ptr(i32* %0, <16 x i8> %1, <16 x i8> %11 = bitcast <4 x i32> %10 to <16 x i8> %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11) %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12) - store <16 x i8> %13, <16 x i8>* %2, align 8 + store <16 x i8> %13, ptr %2, align 8 ret void } -define arm_aapcs_vfpcc void @aesd_set32_via_val(i32 %0, <16 x i8> %1, <16 x i8>* %2) nounwind { +define arm_aapcs_vfpcc void @aesd_set32_via_val(i32 %0, <16 x i8> %1, ptr %2) nounwind { ; CHECK-FIX-LABEL: aesd_set32_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -2937,8 +2937,8 @@ define arm_aapcs_vfpcc void @aesd_set32_via_val(i32 %0, <16 x i8> %1, <16 x i8>* ; CHECK-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] ; CHECK-FIX-NEXT: bx lr - %4 = bitcast <16 x i8>* %2 to <4 x i32>* - %5 = load <4 x i32>, <4 x i32>* %4, align 8 + %4 = bitcast ptr %2 to ptr + %5 = load <4 x i32>, ptr %4, align 8 %6 = insertelement <4 x i32> %5, i32 %0, i64 0 %7 = bitcast <4 x i32> %6 to <16 x i8> %8 = bitcast <16 x i8> %1 to <4 x i32> @@ -2946,11 +2946,11 @@ define arm_aapcs_vfpcc void @aesd_set32_via_val(i32 %0, <16 x i8> %1, <16 x i8>* %10 = bitcast <4 x i32> %9 to <16 x i8> %11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10) %12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11) - store <16 x i8> %12, <16 x i8>* %2, align 8 + store <16 x i8> %12, ptr %2, align 8 ret void } -define arm_aapcs_vfpcc void @aesd_set32_cond_via_ptr(i1 zeroext %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aesd_set32_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aesd_set32_cond_via_ptr: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -2976,15 +2976,15 @@ define arm_aapcs_vfpcc void @aesd_set32_cond_via_ptr(i1 zeroext %0, i32* %1, <16 br i1 %0, label %5, label %10 5: - %6 = load i32, i32* %1, align 4 - %7 = bitcast <16 x i8>* %3 to <4 x i32>* - %8 = load <4 x i32>, <4 x i32>* %7, align 8 + %6 = load i32, ptr %1, align 4 + %7 = bitcast ptr %3 to ptr + %8 = load <4 x i32>, ptr %7, align 8 %9 = insertelement <4 x i32> %8, i32 %6, i64 0 br label %13 10: - %11 = bitcast <16 x i8>* %3 to <4 x i32>* - %12 = load <4 x i32>, <4 x i32>* %11, align 8 + %11 = bitcast ptr %3 to ptr + %12 = load <4 x i32>, ptr %11, align 8 br label %13 13: @@ -2992,7 +2992,7 @@ define arm_aapcs_vfpcc void @aesd_set32_cond_via_ptr(i1 zeroext %0, i32* %1, <16 br i1 %0, label %15, label %19 15: - %16 = load i32, i32* %1, align 4 + %16 = load i32, ptr %1, align 4 %17 = bitcast <16 x i8> %2 to <4 x i32> %18 = insertelement <4 x i32> %17, i32 %16, i64 0 br label %21 @@ -3007,11 +3007,11 @@ define arm_aapcs_vfpcc void @aesd_set32_cond_via_ptr(i1 zeroext %0, i32* %1, <16 %24 = bitcast <4 x i32> %22 to <16 x i8> %25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24) %26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25) - store <16 x i8> %26, <16 x i8>* %3, align 8 + store <16 x i8> %26, ptr %3, align 8 ret void } -define arm_aapcs_vfpcc void @aesd_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aesd_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aesd_set32_cond_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -3030,8 +3030,8 @@ define arm_aapcs_vfpcc void @aesd_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 ; CHECK-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: bx lr - %5 = bitcast <16 x i8>* %3 to <4 x i32>* - %6 = load <4 x i32>, <4 x i32>* %5, align 8 + %5 = bitcast ptr %3 to ptr + %6 = load <4 x i32>, ptr %5, align 8 %7 = insertelement <4 x i32> %6, i32 %1, i64 0 %8 = select i1 %0, <4 x i32> %7, <4 x i32> %6 %9 = bitcast <16 x i8> %2 to <4 x i32> @@ -3041,11 +3041,11 @@ define arm_aapcs_vfpcc void @aesd_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 %13 = bitcast <4 x i32> %11 to <16 x i8> %14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13) %15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14) - store <16 x i8> %15, <16 x i8>* %3, align 8 + store <16 x i8> %15, ptr %3, align 8 ret void } -define arm_aapcs_vfpcc void @aesd_set32_loop_via_ptr(i32 %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aesd_set32_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aesd_set32_loop_via_ptr: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -3064,21 +3064,21 @@ define arm_aapcs_vfpcc void @aesd_set32_loop_via_ptr(i32 %0, i32* %1, <16 x i8> ; CHECK-FIX-NEXT: @ %bb.3: ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: bx lr - %5 = load i32, i32* %1, align 4 + %5 = load i32, ptr %1, align 4 %6 = bitcast <16 x i8> %2 to <4 x i32> %7 = insertelement <4 x i32> %6, i32 %5, i64 0 %8 = bitcast <4 x i32> %7 to <16 x i8> - %9 = bitcast <16 x i8>* %3 to i32* - store i32 %5, i32* %9, align 8 + %9 = bitcast ptr %3 to ptr + store i32 %5, ptr %9, align 8 %10 = icmp eq i32 %0, 0 br i1 %10, label %14, label %11 11: - %12 = load <16 x i8>, <16 x i8>* %3, align 8 + %12 = load <16 x i8>, ptr %3, align 8 br label %15 13: - store <16 x i8> %19, <16 x i8>* %3, align 8 + store <16 x i8> %19, ptr %3, align 8 br label %14 14: @@ -3094,7 +3094,7 @@ define arm_aapcs_vfpcc void @aesd_set32_loop_via_ptr(i32 %0, i32* %1, <16 x i8> br i1 %21, label %13, label %15 } -define arm_aapcs_vfpcc void @aesd_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aesd_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aesd_set32_loop_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -3119,8 +3119,8 @@ define arm_aapcs_vfpcc void @aesd_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> % %7 = bitcast <16 x i8> %2 to <4 x i32> %8 = insertelement <4 x i32> %7, i32 %1, i64 0 %9 = bitcast <4 x i32> %8 to <16 x i8> - %10 = bitcast <16 x i8>* %3 to <4 x i32>* - %11 = bitcast <16 x i8>* %3 to i32* + %10 = bitcast ptr %3 to ptr + %11 = bitcast ptr %3 to ptr br label %13 12: @@ -3128,19 +3128,19 @@ define arm_aapcs_vfpcc void @aesd_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> % 13: %14 = phi i32 [ 0, %6 ], [ %20, %13 ] - %15 = load <4 x i32>, <4 x i32>* %10, align 8 + %15 = load <4 x i32>, ptr %10, align 8 %16 = insertelement <4 x i32> %15, i32 %1, i64 0 %17 = bitcast <4 x i32> %16 to <16 x i8> - store i32 %1, i32* %11, align 8 + store i32 %1, ptr %11, align 8 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9) %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18) - store <16 x i8> %19, <16 x i8>* %3, align 8 + store <16 x i8> %19, ptr %3, align 8 %20 = add nuw i32 %14, 1 %21 = icmp eq i32 %20, %0 br i1 %21, label %12, label %13 } -define arm_aapcs_vfpcc void @aesd_set64_via_ptr(i64* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { +define arm_aapcs_vfpcc void @aesd_set64_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aesd_set64_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 @@ -3162,9 +3162,9 @@ define arm_aapcs_vfpcc void @aesd_set64_via_ptr(i64* %0, <16 x i8> %1, <16 x i8> ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] ; CHECK-CORTEX-FIX-NEXT: bx lr - %4 = load i64, i64* %0, align 8 - %5 = bitcast <16 x i8>* %2 to <2 x i64>* - %6 = load <2 x i64>, <2 x i64>* %5, align 8 + %4 = load i64, ptr %0, align 8 + %5 = bitcast ptr %2 to ptr + %6 = load <2 x i64>, ptr %5, align 8 %7 = insertelement <2 x i64> %6, i64 %4, i64 0 %8 = bitcast <2 x i64> %7 to <16 x i8> %9 = bitcast <16 x i8> %1 to <2 x i64> @@ -3172,11 +3172,11 @@ define arm_aapcs_vfpcc void @aesd_set64_via_ptr(i64* %0, <16 x i8> %1, <16 x i8> %11 = bitcast <2 x i64> %10 to <16 x i8> %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11) %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12) - store <16 x i8> %13, <16 x i8>* %2, align 8 + store <16 x i8> %13, ptr %2, align 8 ret void } -define arm_aapcs_vfpcc void @aesd_set64_via_val(i64 %0, <16 x i8> %1, <16 x i8>* %2) nounwind { +define arm_aapcs_vfpcc void @aesd_set64_via_val(i64 %0, <16 x i8> %1, ptr %2) nounwind { ; CHECK-FIX-LABEL: aesd_set64_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -3189,8 +3189,8 @@ define arm_aapcs_vfpcc void @aesd_set64_via_val(i64 %0, <16 x i8> %1, <16 x i8>* ; CHECK-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: bx lr - %4 = bitcast <16 x i8>* %2 to <2 x i64>* - %5 = load <2 x i64>, <2 x i64>* %4, align 8 + %4 = bitcast ptr %2 to ptr + %5 = load <2 x i64>, ptr %4, align 8 %6 = insertelement <2 x i64> %5, i64 %0, i64 0 %7 = bitcast <2 x i64> %6 to <16 x i8> %8 = bitcast <16 x i8> %1 to <2 x i64> @@ -3198,11 +3198,11 @@ define arm_aapcs_vfpcc void @aesd_set64_via_val(i64 %0, <16 x i8> %1, <16 x i8>* %10 = bitcast <2 x i64> %9 to <16 x i8> %11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10) %12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11) - store <16 x i8> %12, <16 x i8>* %2, align 8 + store <16 x i8> %12, ptr %2, align 8 ret void } -define arm_aapcs_vfpcc void @aesd_set64_cond_via_ptr(i1 zeroext %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aesd_set64_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aesd_set64_cond_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 @@ -3244,15 +3244,15 @@ define arm_aapcs_vfpcc void @aesd_set64_cond_via_ptr(i1 zeroext %0, i64* %1, <16 br i1 %0, label %5, label %10 5: - %6 = load i64, i64* %1, align 8 - %7 = bitcast <16 x i8>* %3 to <2 x i64>* - %8 = load <2 x i64>, <2 x i64>* %7, align 8 + %6 = load i64, ptr %1, align 8 + %7 = bitcast ptr %3 to ptr + %8 = load <2 x i64>, ptr %7, align 8 %9 = insertelement <2 x i64> %8, i64 %6, i64 0 br label %13 10: - %11 = bitcast <16 x i8>* %3 to <2 x i64>* - %12 = load <2 x i64>, <2 x i64>* %11, align 8 + %11 = bitcast ptr %3 to ptr + %12 = load <2 x i64>, ptr %11, align 8 br label %13 13: @@ -3260,7 +3260,7 @@ define arm_aapcs_vfpcc void @aesd_set64_cond_via_ptr(i1 zeroext %0, i64* %1, <16 br i1 %0, label %15, label %19 15: - %16 = load i64, i64* %1, align 8 + %16 = load i64, ptr %1, align 8 %17 = bitcast <16 x i8> %2 to <2 x i64> %18 = insertelement <2 x i64> %17, i64 %16, i64 0 br label %21 @@ -3275,11 +3275,11 @@ define arm_aapcs_vfpcc void @aesd_set64_cond_via_ptr(i1 zeroext %0, i64* %1, <16 %24 = bitcast <2 x i64> %22 to <16 x i8> %25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24) %26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25) - store <16 x i8> %26, <16 x i8>* %3, align 8 + store <16 x i8> %26, ptr %3, align 8 ret void } -define arm_aapcs_vfpcc void @aesd_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aesd_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aesd_set64_cond_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -3301,8 +3301,8 @@ define arm_aapcs_vfpcc void @aesd_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 ; CHECK-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] ; CHECK-FIX-NEXT: bx lr - %5 = bitcast <16 x i8>* %3 to <2 x i64>* - %6 = load <2 x i64>, <2 x i64>* %5, align 8 + %5 = bitcast ptr %3 to ptr + %6 = load <2 x i64>, ptr %5, align 8 %7 = insertelement <2 x i64> %6, i64 %1, i64 0 %8 = select i1 %0, <2 x i64> %7, <2 x i64> %6 %9 = bitcast <16 x i8> %2 to <2 x i64> @@ -3312,11 +3312,11 @@ define arm_aapcs_vfpcc void @aesd_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 %13 = bitcast <2 x i64> %11 to <16 x i8> %14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13) %15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14) - store <16 x i8> %15, <16 x i8>* %3, align 8 + store <16 x i8> %15, ptr %3, align 8 ret void } -define arm_aapcs_vfpcc void @aesd_set64_loop_via_ptr(i32 %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aesd_set64_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aesd_set64_loop_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 @@ -3359,21 +3359,21 @@ define arm_aapcs_vfpcc void @aesd_set64_loop_via_ptr(i32 %0, i64* %1, <16 x i8> ; CHECK-CORTEX-FIX-NEXT: @ %bb.3: ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2] ; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r11, pc} - %5 = load i64, i64* %1, align 8 + %5 = load i64, ptr %1, align 8 %6 = bitcast <16 x i8> %2 to <2 x i64> %7 = insertelement <2 x i64> %6, i64 %5, i64 0 %8 = bitcast <2 x i64> %7 to <16 x i8> - %9 = bitcast <16 x i8>* %3 to i64* - store i64 %5, i64* %9, align 8 + %9 = bitcast ptr %3 to ptr + store i64 %5, ptr %9, align 8 %10 = icmp eq i32 %0, 0 br i1 %10, label %14, label %11 11: - %12 = load <16 x i8>, <16 x i8>* %3, align 8 + %12 = load <16 x i8>, ptr %3, align 8 br label %15 13: - store <16 x i8> %19, <16 x i8>* %3, align 8 + store <16 x i8> %19, ptr %3, align 8 br label %14 14: @@ -3389,7 +3389,7 @@ define arm_aapcs_vfpcc void @aesd_set64_loop_via_ptr(i32 %0, i64* %1, <16 x i8> br i1 %21, label %13, label %15 } -define arm_aapcs_vfpcc void @aesd_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aesd_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aesd_set64_loop_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -3417,8 +3417,8 @@ define arm_aapcs_vfpcc void @aesd_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> % %7 = bitcast <16 x i8> %2 to <2 x i64> %8 = insertelement <2 x i64> %7, i64 %1, i64 0 %9 = bitcast <2 x i64> %8 to <16 x i8> - %10 = bitcast <16 x i8>* %3 to <2 x i64>* - %11 = bitcast <16 x i8>* %3 to i64* + %10 = bitcast ptr %3 to ptr + %11 = bitcast ptr %3 to ptr br label %13 12: @@ -3426,19 +3426,19 @@ define arm_aapcs_vfpcc void @aesd_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> % 13: %14 = phi i32 [ 0, %6 ], [ %20, %13 ] - %15 = load <2 x i64>, <2 x i64>* %10, align 8 + %15 = load <2 x i64>, ptr %10, align 8 %16 = insertelement <2 x i64> %15, i64 %1, i64 0 %17 = bitcast <2 x i64> %16 to <16 x i8> - store i64 %1, i64* %11, align 8 + store i64 %1, ptr %11, align 8 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9) %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18) - store <16 x i8> %19, <16 x i8>* %3, align 8 + store <16 x i8> %19, ptr %3, align 8 %20 = add nuw i32 %14, 1 %21 = icmp eq i32 %20, %0 br i1 %21, label %12, label %13 } -define arm_aapcs_vfpcc void @aesd_setf16_via_ptr(half* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { +define arm_aapcs_vfpcc void @aesd_setf16_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 @@ -3462,10 +3462,10 @@ define arm_aapcs_vfpcc void @aesd_setf16_via_ptr(half* %0, <16 x i8> %1, <16 x i ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] ; CHECK-CORTEX-FIX-NEXT: bx lr - %4 = bitcast half* %0 to i16* - %5 = load i16, i16* %4, align 2 - %6 = bitcast <16 x i8>* %2 to <8 x i16>* - %7 = load <8 x i16>, <8 x i16>* %6, align 8 + %4 = bitcast ptr %0 to ptr + %5 = load i16, ptr %4, align 2 + %6 = bitcast ptr %2 to ptr + %7 = load <8 x i16>, ptr %6, align 8 %8 = insertelement <8 x i16> %7, i16 %5, i64 0 %9 = bitcast <8 x i16> %8 to <16 x i8> %10 = bitcast <16 x i8> %1 to <8 x i16> @@ -3473,11 +3473,11 @@ define arm_aapcs_vfpcc void @aesd_setf16_via_ptr(half* %0, <16 x i8> %1, <16 x i %12 = bitcast <8 x i16> %11 to <16 x i8> %13 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %9, <16 x i8> %12) %14 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %13) - store <16 x i8> %14, <16 x i8>* %2, align 8 + store <16 x i8> %14, ptr %2, align 8 ret void } -define arm_aapcs_vfpcc void @aesd_setf16_via_val(half %0, <16 x i8> %1, <16 x i8>* %2) nounwind { +define arm_aapcs_vfpcc void @aesd_setf16_via_val(half %0, <16 x i8> %1, ptr %2) nounwind { ; CHECK-FIX-LABEL: aesd_setf16_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q1, q1, q1 @@ -3489,8 +3489,8 @@ define arm_aapcs_vfpcc void @aesd_setf16_via_val(half %0, <16 x i8> %1, <16 x i8 ; CHECK-FIX-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0] ; CHECK-FIX-NEXT: bx lr - %4 = bitcast <16 x i8>* %2 to <8 x i16>* - %5 = load <8 x i16>, <8 x i16>* %4, align 8 + %4 = bitcast ptr %2 to ptr + %5 = load <8 x i16>, ptr %4, align 8 %6 = bitcast half %0 to i16 %7 = insertelement <8 x i16> %5, i16 %6, i64 0 %8 = bitcast <8 x i16> %7 to <16 x i8> @@ -3499,11 +3499,11 @@ define arm_aapcs_vfpcc void @aesd_setf16_via_val(half %0, <16 x i8> %1, <16 x i8 %11 = bitcast <8 x i16> %10 to <16 x i8> %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11) %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12) - store <16 x i8> %13, <16 x i8>* %2, align 8 + store <16 x i8> %13, ptr %2, align 8 ret void } -define arm_aapcs_vfpcc void @aesd_setf16_cond_via_ptr(i1 zeroext %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aesd_setf16_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_cond_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -3685,17 +3685,17 @@ define arm_aapcs_vfpcc void @aesd_setf16_cond_via_ptr(i1 zeroext %0, half* %1, < br i1 %0, label %5, label %12 5: - %6 = bitcast half* %1 to i16* - %7 = load i16, i16* %6, align 2 - %8 = bitcast <16 x i8>* %3 to <8 x i16>* - %9 = load <8 x i16>, <8 x i16>* %8, align 8 + %6 = bitcast ptr %1 to ptr + %7 = load i16, ptr %6, align 2 + %8 = bitcast ptr %3 to ptr + %9 = load <8 x i16>, ptr %8, align 8 %10 = insertelement <8 x i16> %9, i16 %7, i64 0 %11 = bitcast <8 x i16> %10 to <8 x half> br label %15 12: - %13 = bitcast <16 x i8>* %3 to <8 x half>* - %14 = load <8 x half>, <8 x half>* %13, align 8 + %13 = bitcast ptr %3 to ptr + %14 = load <8 x half>, ptr %13, align 8 br label %15 15: @@ -3703,8 +3703,8 @@ define arm_aapcs_vfpcc void @aesd_setf16_cond_via_ptr(i1 zeroext %0, half* %1, < br i1 %0, label %17, label %23 17: - %18 = bitcast half* %1 to i16* - %19 = load i16, i16* %18, align 2 + %18 = bitcast ptr %1 to ptr + %19 = load i16, ptr %18, align 2 %20 = bitcast <16 x i8> %2 to <8 x i16> %21 = insertelement <8 x i16> %20, i16 %19, i64 0 %22 = bitcast <8 x i16> %21 to <8 x half> @@ -3720,11 +3720,11 @@ define arm_aapcs_vfpcc void @aesd_setf16_cond_via_ptr(i1 zeroext %0, half* %1, < %28 = bitcast <8 x half> %26 to <16 x i8> %29 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %27, <16 x i8> %28) %30 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %29) - store <16 x i8> %30, <16 x i8>* %3, align 8 + store <16 x i8> %30, ptr %3, align 8 ret void } -define arm_aapcs_vfpcc void @aesd_setf16_cond_via_val(i1 zeroext %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aesd_setf16_cond_via_val(i1 zeroext %0, half %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_cond_via_val: ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -3906,16 +3906,16 @@ define arm_aapcs_vfpcc void @aesd_setf16_cond_via_val(i1 zeroext %0, half %1, <1 br i1 %0, label %5, label %11 5: - %6 = bitcast <16 x i8>* %3 to <8 x i16>* - %7 = load <8 x i16>, <8 x i16>* %6, align 8 + %6 = bitcast ptr %3 to ptr + %7 = load <8 x i16>, ptr %6, align 8 %8 = bitcast half %1 to i16 %9 = insertelement <8 x i16> %7, i16 %8, i64 0 %10 = bitcast <8 x i16> %9 to <8 x half> br label %14 11: - %12 = bitcast <16 x i8>* %3 to <8 x half>* - %13 = load <8 x half>, <8 x half>* %12, align 8 + %12 = bitcast ptr %3 to ptr + %13 = load <8 x half>, ptr %12, align 8 br label %14 14: @@ -3939,11 +3939,11 @@ define arm_aapcs_vfpcc void @aesd_setf16_cond_via_val(i1 zeroext %0, half %1, <1 %26 = bitcast <8 x half> %24 to <16 x i8> %27 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %25, <16 x i8> %26) %28 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %27) - store <16 x i8> %28, <16 x i8>* %3, align 8 + store <16 x i8> %28, ptr %3, align 8 ret void } -define arm_aapcs_vfpcc void @aesd_setf16_loop_via_ptr(i32 %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aesd_setf16_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aesd_setf16_loop_via_ptr: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -3962,22 +3962,22 @@ define arm_aapcs_vfpcc void @aesd_setf16_loop_via_ptr(i32 %0, half* %1, <16 x i8 ; CHECK-FIX-NEXT: @ %bb.3: ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: bx lr - %5 = bitcast half* %1 to i16* - %6 = load i16, i16* %5, align 2 + %5 = bitcast ptr %1 to ptr + %6 = load i16, ptr %5, align 2 %7 = bitcast <16 x i8> %2 to <8 x i16> %8 = insertelement <8 x i16> %7, i16 %6, i64 0 %9 = bitcast <8 x i16> %8 to <16 x i8> - %10 = bitcast <16 x i8>* %3 to i16* - store i16 %6, i16* %10, align 8 + %10 = bitcast ptr %3 to ptr + store i16 %6, ptr %10, align 8 %11 = icmp eq i32 %0, 0 br i1 %11, label %15, label %12 12: - %13 = load <16 x i8>, <16 x i8>* %3, align 8 + %13 = load <16 x i8>, ptr %3, align 8 br label %16 14: - store <16 x i8> %20, <16 x i8>* %3, align 8 + store <16 x i8> %20, ptr %3, align 8 br label %15 15: @@ -3993,7 +3993,7 @@ define arm_aapcs_vfpcc void @aesd_setf16_loop_via_ptr(i32 %0, half* %1, <16 x i8 br i1 %22, label %14, label %16 } -define arm_aapcs_vfpcc void @aesd_setf16_loop_via_val(i32 %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aesd_setf16_loop_via_val(i32 %0, half %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aesd_setf16_loop_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q1, q1, q1 @@ -4020,8 +4020,8 @@ define arm_aapcs_vfpcc void @aesd_setf16_loop_via_val(i32 %0, half %1, <16 x i8> %8 = bitcast half %1 to i16 %9 = insertelement <8 x i16> %7, i16 %8, i64 0 %10 = bitcast <8 x i16> %9 to <16 x i8> - %11 = bitcast <16 x i8>* %3 to <8 x i16>* - %12 = bitcast <16 x i8>* %3 to half* + %11 = bitcast ptr %3 to ptr + %12 = bitcast ptr %3 to ptr br label %14 13: @@ -4029,19 +4029,19 @@ define arm_aapcs_vfpcc void @aesd_setf16_loop_via_val(i32 %0, half %1, <16 x i8> 14: %15 = phi i32 [ 0, %6 ], [ %21, %14 ] - %16 = load <8 x i16>, <8 x i16>* %11, align 8 + %16 = load <8 x i16>, ptr %11, align 8 %17 = insertelement <8 x i16> %16, i16 %8, i64 0 %18 = bitcast <8 x i16> %17 to <16 x i8> - store half %1, half* %12, align 8 + store half %1, ptr %12, align 8 %19 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %18, <16 x i8> %10) %20 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %19) - store <16 x i8> %20, <16 x i8>* %3, align 8 + store <16 x i8> %20, ptr %3, align 8 %21 = add nuw i32 %15, 1 %22 = icmp eq i32 %21, %0 br i1 %22, label %13, label %14 } -define arm_aapcs_vfpcc void @aesd_setf32_via_ptr(float* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { +define arm_aapcs_vfpcc void @aesd_setf32_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind { ; CHECK-FIX-LABEL: aesd_setf32_via_ptr: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vldr s0, [r0] @@ -4053,9 +4053,9 @@ define arm_aapcs_vfpcc void @aesd_setf32_via_ptr(float* %0, <16 x i8> %1, <16 x ; CHECK-FIX-NEXT: aesimc.8 q8, q1 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] ; CHECK-FIX-NEXT: bx lr - %4 = load float, float* %0, align 4 - %5 = bitcast <16 x i8>* %2 to <4 x float>* - %6 = load <4 x float>, <4 x float>* %5, align 8 + %4 = load float, ptr %0, align 4 + %5 = bitcast ptr %2 to ptr + %6 = load <4 x float>, ptr %5, align 8 %7 = insertelement <4 x float> %6, float %4, i64 0 %8 = bitcast <4 x float> %7 to <16 x i8> %9 = bitcast <16 x i8> %1 to <4 x float> @@ -4063,11 +4063,11 @@ define arm_aapcs_vfpcc void @aesd_setf32_via_ptr(float* %0, <16 x i8> %1, <16 x %11 = bitcast <4 x float> %10 to <16 x i8> %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11) %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12) - store <16 x i8> %13, <16 x i8>* %2, align 8 + store <16 x i8> %13, ptr %2, align 8 ret void } -define arm_aapcs_vfpcc void @aesd_setf32_via_val(float %0, <16 x i8> %1, <16 x i8>* %2) nounwind { +define arm_aapcs_vfpcc void @aesd_setf32_via_val(float %0, <16 x i8> %1, ptr %2) nounwind { ; CHECK-FIX-LABEL: aesd_setf32_via_val: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vmov.f32 s4, s0 @@ -4079,8 +4079,8 @@ define arm_aapcs_vfpcc void @aesd_setf32_via_val(float %0, <16 x i8> %1, <16 x i ; CHECK-FIX-NEXT: aesimc.8 q8, q0 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0] ; CHECK-FIX-NEXT: bx lr - %4 = bitcast <16 x i8>* %2 to <4 x float>* - %5 = load <4 x float>, <4 x float>* %4, align 8 + %4 = bitcast ptr %2 to ptr + %5 = load <4 x float>, ptr %4, align 8 %6 = insertelement <4 x float> %5, float %0, i64 0 %7 = bitcast <4 x float> %6 to <16 x i8> %8 = bitcast <16 x i8> %1 to <4 x float> @@ -4088,11 +4088,11 @@ define arm_aapcs_vfpcc void @aesd_setf32_via_val(float %0, <16 x i8> %1, <16 x i %10 = bitcast <4 x float> %9 to <16 x i8> %11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10) %12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11) - store <16 x i8> %12, <16 x i8>* %2, align 8 + store <16 x i8> %12, ptr %2, align 8 ret void } -define arm_aapcs_vfpcc void @aesd_setf32_cond_via_ptr(i1 zeroext %0, float* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aesd_setf32_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-LABEL: aesd_setf32_cond_via_ptr: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 @@ -4118,15 +4118,15 @@ define arm_aapcs_vfpcc void @aesd_setf32_cond_via_ptr(i1 zeroext %0, float* %1, br i1 %0, label %5, label %10 5: - %6 = load float, float* %1, align 4 - %7 = bitcast <16 x i8>* %3 to <4 x float>* - %8 = load <4 x float>, <4 x float>* %7, align 8 + %6 = load float, ptr %1, align 4 + %7 = bitcast ptr %3 to ptr + %8 = load <4 x float>, ptr %7, align 8 %9 = insertelement <4 x float> %8, float %6, i64 0 br label %13 10: - %11 = bitcast <16 x i8>* %3 to <4 x float>* - %12 = load <4 x float>, <4 x float>* %11, align 8 + %11 = bitcast ptr %3 to ptr + %12 = load <4 x float>, ptr %11, align 8 br label %13 13: @@ -4134,7 +4134,7 @@ define arm_aapcs_vfpcc void @aesd_setf32_cond_via_ptr(i1 zeroext %0, float* %1, br i1 %0, label %15, label %19 15: - %16 = load float, float* %1, align 4 + %16 = load float, ptr %1, align 4 %17 = bitcast <16 x i8> %2 to <4 x float> %18 = insertelement <4 x float> %17, float %16, i64 0 br label %21 @@ -4149,11 +4149,11 @@ define arm_aapcs_vfpcc void @aesd_setf32_cond_via_ptr(i1 zeroext %0, float* %1, %24 = bitcast <4 x float> %22 to <16 x i8> %25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24) %26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25) - store <16 x i8> %26, <16 x i8>* %3, align 8 + store <16 x i8> %26, ptr %3, align 8 ret void } -define arm_aapcs_vfpcc void @aesd_setf32_cond_via_val(i1 zeroext %0, float %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aesd_setf32_cond_via_val(i1 zeroext %0, float %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aesd_setf32_cond_via_val: ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1] @@ -4181,8 +4181,8 @@ define arm_aapcs_vfpcc void @aesd_setf32_cond_via_val(i1 zeroext %0, float %1, < ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q2 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] ; CHECK-CORTEX-FIX-NEXT: bx lr - %5 = bitcast <16 x i8>* %3 to <4 x float>* - %6 = load <4 x float>, <4 x float>* %5, align 8 + %5 = bitcast ptr %3 to ptr + %6 = load <4 x float>, ptr %5, align 8 %7 = insertelement <4 x float> %6, float %1, i64 0 %8 = select i1 %0, <4 x float> %7, <4 x float> %6 %9 = bitcast <16 x i8> %2 to <4 x float> @@ -4192,11 +4192,11 @@ define arm_aapcs_vfpcc void @aesd_setf32_cond_via_val(i1 zeroext %0, float %1, < %13 = bitcast <4 x float> %11 to <16 x i8> %14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13) %15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14) - store <16 x i8> %15, <16 x i8>* %3, align 8 + store <16 x i8> %15, ptr %3, align 8 ret void } -define arm_aapcs_vfpcc void @aesd_setf32_loop_via_ptr(i32 %0, float* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aesd_setf32_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aesd_setf32_loop_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: vldr s4, [r1] @@ -4234,21 +4234,21 @@ define arm_aapcs_vfpcc void @aesd_setf32_loop_via_ptr(i32 %0, float* %1, <16 x i ; CHECK-CORTEX-FIX-NEXT: @ %bb.3: ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2] ; CHECK-CORTEX-FIX-NEXT: bx lr - %5 = load float, float* %1, align 4 + %5 = load float, ptr %1, align 4 %6 = bitcast <16 x i8> %2 to <4 x float> %7 = insertelement <4 x float> %6, float %5, i64 0 %8 = bitcast <4 x float> %7 to <16 x i8> - %9 = bitcast <16 x i8>* %3 to float* - store float %5, float* %9, align 8 + %9 = bitcast ptr %3 to ptr + store float %5, ptr %9, align 8 %10 = icmp eq i32 %0, 0 br i1 %10, label %14, label %11 11: - %12 = load <16 x i8>, <16 x i8>* %3, align 8 + %12 = load <16 x i8>, ptr %3, align 8 br label %15 13: - store <16 x i8> %19, <16 x i8>* %3, align 8 + store <16 x i8> %19, ptr %3, align 8 br label %14 14: @@ -4264,7 +4264,7 @@ define arm_aapcs_vfpcc void @aesd_setf32_loop_via_ptr(i32 %0, float* %1, <16 x i br i1 %21, label %13, label %15 } -define arm_aapcs_vfpcc void @aesd_setf32_loop_via_val(i32 %0, float %1, <16 x i8> %2, <16 x i8>* %3) nounwind { +define arm_aapcs_vfpcc void @aesd_setf32_loop_via_val(i32 %0, float %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aesd_setf32_loop_via_val: ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 @@ -4309,8 +4309,8 @@ define arm_aapcs_vfpcc void @aesd_setf32_loop_via_val(i32 %0, float %1, <16 x i8 %7 = bitcast <16 x i8> %2 to <4 x float> %8 = insertelement <4 x float> %7, float %1, i64 0 %9 = bitcast <4 x float> %8 to <16 x i8> - %10 = bitcast <16 x i8>* %3 to <4 x float>* - %11 = bitcast <16 x i8>* %3 to float* + %10 = bitcast ptr %3 to ptr + %11 = bitcast ptr %3 to ptr br label %13 12: @@ -4318,19 +4318,19 @@ define arm_aapcs_vfpcc void @aesd_setf32_loop_via_val(i32 %0, float %1, <16 x i8 13: %14 = phi i32 [ 0, %6 ], [ %20, %13 ] - %15 = load <4 x float>, <4 x float>* %10, align 8 + %15 = load <4 x float>, ptr %10, align 8 %16 = insertelement <4 x float> %15, float %1, i64 0 %17 = bitcast <4 x float> %16 to <16 x i8> - store float %1, float* %11, align 8 + store float %1, ptr %11, align 8 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9) %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18) - store <16 x i8> %19, <16 x i8>* %3, align 8 + store <16 x i8> %19, ptr %3, align 8 %20 = add nuw i32 %14, 1 %21 = icmp eq i32 %20, %0 br i1 %21, label %12, label %13 } -define arm_aapcs_vfpcc void @aese_constantisland(<16 x i8>* %0) nounwind { +define arm_aapcs_vfpcc void @aese_constantisland(ptr %0) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aese_constantisland: ; CHECK-FIX-NOSCHED: @ %bb.0: ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r0] @@ -4388,9 +4388,9 @@ define arm_aapcs_vfpcc void @aese_constantisland(<16 x i8>* %0) nounwind { ; CHECK-CORTEX-FIX-NEXT: .byte 13 @ 0xd ; CHECK-CORTEX-FIX-NEXT: .byte 14 @ 0xe ; CHECK-CORTEX-FIX-NEXT: .byte 15 @ 0xf - %2 = load <16 x i8>, <16 x i8>* %0, align 8 + %2 = load <16 x i8>, ptr %0, align 8 %3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, <16 x i8> %2) %4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3) - store <16 x i8> %4, <16 x i8>* %0, align 8 + store <16 x i8> %4, ptr %0, align 8 ret void } diff --git a/llvm/test/CodeGen/ARM/aliases.ll b/llvm/test/CodeGen/ARM/aliases.ll index cc423af..6075ad8 100644 --- a/llvm/test/CodeGen/ARM/aliases.ll +++ b/llvm/test/CodeGen/ARM/aliases.ll @@ -48,7 +48,7 @@ define i32 @foo_f() { @A = alias i64, ptr @bar @structvar = private global {i32, i32} {i32 1, i32 2} -@elem0 = alias i32, getelementptr({i32, i32}, ptr @structvar, i32 0, i32 0) +@elem0 = alias i32, ptr @structvar @elem1 = alias i32, getelementptr({i32, i32}, ptr @structvar, i32 0, i32 1) define i32 @test() { diff --git a/llvm/test/CodeGen/ARM/code-placement.ll b/llvm/test/CodeGen/ARM/code-placement.ll index 01d72f1..c8bb10c 100644 --- a/llvm/test/CodeGen/ARM/code-placement.ll +++ b/llvm/test/CodeGen/ARM/code-placement.ll @@ -2,12 +2,12 @@ ; PHI elimination shouldn't break backedge. %struct.list_data_s = type { i16, i16 } -%struct.list_head = type { %struct.list_head*, %struct.list_data_s* } +%struct.list_head = type { ptr, ptr } -define arm_apcscc %struct.list_head* @t1(%struct.list_head* %list) nounwind { +define arm_apcscc ptr @t1(ptr %list) nounwind { entry: ; CHECK-LABEL: t1: - %0 = icmp eq %struct.list_head* %list, null + %0 = icmp eq ptr %list, null br i1 %0, label %bb2, label %bb bb: @@ -15,21 +15,21 @@ bb: ; CHECK: bne LBB0_[[LABEL]] ; CHECK-NOT: b LBB0_[[LABEL]] ; CHECK: bx lr - %list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ] - %next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ] - %1 = getelementptr inbounds %struct.list_head, %struct.list_head* %list_addr.05, i32 0, i32 0 - %2 = load %struct.list_head*, %struct.list_head** %1, align 4 - store %struct.list_head* %next.04, %struct.list_head** %1, align 4 - %3 = icmp eq %struct.list_head* %2, null + %list_addr.05 = phi ptr [ %2, %bb ], [ %list, %entry ] + %next.04 = phi ptr [ %list_addr.05, %bb ], [ null, %entry ] + %1 = getelementptr inbounds %struct.list_head, ptr %list_addr.05, i32 0, i32 0 + %2 = load ptr, ptr %1, align 4 + store ptr %next.04, ptr %1, align 4 + %3 = icmp eq ptr %2, null br i1 %3, label %bb2, label %bb bb2: - %next.0.lcssa = phi %struct.list_head* [ null, %entry ], [ %list_addr.05, %bb ] - ret %struct.list_head* %next.0.lcssa + %next.0.lcssa = phi ptr [ null, %entry ], [ %list_addr.05, %bb ] + ret ptr %next.0.lcssa } ; Optimize loop entry, eliminate intra loop branches -define i32 @t2(i32 %passes, i32* nocapture %src, i32 %size) nounwind readonly { +define i32 @t2(i32 %passes, ptr nocapture %src, i32 %size) nounwind readonly { entry: ; CHECK-LABEL: t2: %0 = icmp eq i32 %passes, 0 ; <i1> [#uses=1] @@ -42,8 +42,8 @@ bb1: ; preds = %bb2.preheader, %bb1 %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %bb2.preheader ] ; <i32> [#uses=2] %sum.08 = phi i32 [ %2, %bb1 ], [ %sum.110, %bb2.preheader ] ; <i32> [#uses=1] %tmp17 = sub i32 %i.07, %indvar ; <i32> [#uses=1] - %scevgep = getelementptr i32, i32* %src, i32 %tmp17 ; <i32*> [#uses=1] - %1 = load i32, i32* %scevgep, align 4 ; <i32> [#uses=1] + %scevgep = getelementptr i32, ptr %src, i32 %tmp17 ; <i32*> [#uses=1] + %1 = load i32, ptr %scevgep, align 4 ; <i32> [#uses=1] %2 = add nsw i32 %1, %sum.08 ; <i32> [#uses=2] %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2] %exitcond = icmp eq i32 %indvar.next, %size ; <i1> [#uses=1] diff --git a/llvm/test/CodeGen/ARM/constant-island-movwt.mir b/llvm/test/CodeGen/ARM/constant-island-movwt.mir index 2a2b4a9..7d21a4e4 100644 --- a/llvm/test/CodeGen/ARM/constant-island-movwt.mir +++ b/llvm/test/CodeGen/ARM/constant-island-movwt.mir @@ -6,7 +6,7 @@ target datalayout = "e-m:w-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv7-unknown-windows-gnu" - %struct.A = type { [201 x i8*] } + %struct.A = type { [201 x ptr] } @.str.17 = private unnamed_addr constant [10 x i8] c"__ashlhi3\00", align 1 @.str.18 = private unnamed_addr constant [10 x i8] c"__ashlsi3\00", align 1 @@ -147,166 +147,166 @@ @.str.153 = private unnamed_addr constant [9 x i8] c"copysign\00", align 1 ; Function Attrs: nounwind - define arm_aapcs_vfpcc void @func(%struct.A* %obj) #0 { + define arm_aapcs_vfpcc void @func(ptr %obj) #0 { entry: - %arrayidx.i1 = bitcast %struct.A* %obj to i8** - %0 = bitcast i8** %arrayidx.i1 to <4 x i8*>* - store <4 x i8*> <i8* null, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.18, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.19, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.20, i32 0, i32 0)>, <4 x i8*>* %0 - %arrayidx.i62 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 4 - %1 = bitcast i8** %arrayidx.i62 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.21, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.22, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.23, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.24, i32 0, i32 0)>, <4 x i8*>* %1 - %arrayidx.i523 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 8 - %2 = bitcast i8** %arrayidx.i523 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.25, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.26, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.27, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.28, i32 0, i32 0)>, <4 x i8*>* %2 - %arrayidx.i519 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 12 - %3 = bitcast i8** %arrayidx.i519 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.29, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.30, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.31, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.32, i32 0, i32 0)>, <4 x i8*>* %3 - %arrayidx.i515 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 16 - %4 = bitcast i8** %arrayidx.i515 to i8* - call void @llvm.memset.p0i8.i64(i8* align 4 %4, i8 0, i64 40, i1 false) - %arrayidx.i511 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 20 - %5 = bitcast i8** %arrayidx.i511 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.37, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.38, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.39, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.40, i32 0, i32 0)>, <4 x i8*>* %5 - %arrayidx.i507 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 24 - %6 = bitcast i8** %arrayidx.i507 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.41, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.42, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.43, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.44, i32 0, i32 0)>, <4 x i8*>* %6 - %arrayidx.i503 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 28 - %7 = bitcast i8** %arrayidx.i503 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.45, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.46, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.47, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.48, i32 0, i32 0)>, <4 x i8*>* %7 - %arrayidx.i499 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 32 - %8 = bitcast i8** %arrayidx.i499 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.49, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.50, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.51, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.52, i32 0, i32 0)>, <4 x i8*>* %8 - %arrayidx.i495 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 36 - %9 = bitcast i8** %arrayidx.i495 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.53, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.54, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.55, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.56, i32 0, i32 0)>, <4 x i8*>* %9 - %arrayidx.i491 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 40 - %arrayidx.i481 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 50 - %10 = bitcast i8** %arrayidx.i491 to i8* - call void @llvm.memset.p0i8.i64(i8* align 4 %10, i8 0, i64 40, i1 false) - %11 = bitcast i8** %arrayidx.i481 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.57, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.58, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.59, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.60, i32 0, i32 0)>, <4 x i8*>* %11 - %arrayidx.i477 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 54 - %12 = bitcast i8** %arrayidx.i477 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.61, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.62, i32 0, i32 0), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.63, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.64, i32 0, i32 0)>, <4 x i8*>* %12 - %arrayidx.i473 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 58 - %13 = bitcast i8** %arrayidx.i473 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.65, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.66, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.67, i32 0, i32 0), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.68, i32 0, i32 0)>, <4 x i8*>* %13 - %arrayidx.i469 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 62 - %14 = bitcast i8** %arrayidx.i469 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.69, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.70, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.71, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.72, i32 0, i32 0)>, <4 x i8*>* %14 - %arrayidx.i465 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 66 - %15 = bitcast i8** %arrayidx.i465 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.73, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.74, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.75, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.76, i32 0, i32 0)>, <4 x i8*>* %15 - %arrayidx.i461 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 70 - %16 = bitcast i8** %arrayidx.i461 to i8* - call void @llvm.memset.p0i8.i64(i8* align 4 %16, i8 0, i64 40, i1 false) - %arrayidx.i457 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 74 - %17 = bitcast i8** %arrayidx.i457 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.81, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.81, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.81, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.82, i32 0, i32 0)>, <4 x i8*>* %17 - %arrayidx.i453 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 78 - %18 = bitcast i8** %arrayidx.i453 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.83, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.84, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.84, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.84, i32 0, i32 0)>, <4 x i8*>* %18 - %arrayidx.i449 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 82 - %arrayidx.i445 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 86 - %19 = bitcast i8** %arrayidx.i445 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.88, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.89, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.90, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.91, i32 0, i32 0)>, <4 x i8*>* %19 - %arrayidx.i441 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 90 - %20 = bitcast i8** %arrayidx.i441 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.91, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.91, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.92, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.93, i32 0, i32 0)>, <4 x i8*>* %20 - %arrayidx.i437 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 94 - %21 = bitcast i8** %arrayidx.i437 to i8* - call void @llvm.memset.p0i8.i64(i8* align 4 %21, i8 0, i64 28, i1 false) - %arrayidx.i433 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 98 - %22 = bitcast i8** %arrayidx.i433 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.96, i32 0, i32 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.97, i32 0, i32 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.97, i32 0, i32 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.97, i32 0, i32 0)>, <4 x i8*>* %22 - %arrayidx.i429 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 102 - %23 = bitcast i8** %arrayidx.i429 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.98, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.99, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.100, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.100, i32 0, i32 0)>, <4 x i8*>* %23 - %arrayidx.i425 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 106 - %24 = bitcast i8** %arrayidx.i425 to i8* - call void @llvm.memset.p0i8.i64(i8* align 4 %24, i8 0, i64 28, i1 false) - %arrayidx.i421 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 110 - %25 = bitcast i8** %arrayidx.i421 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str.103, i32 0, i32 0), i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str.103, i32 0, i32 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.104, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.105, i32 0, i32 0)>, <4 x i8*>* %25 - %arrayidx.i417 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 114 - %26 = bitcast i8** %arrayidx.i417 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.106, i32 0, i32 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.106, i32 0, i32 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.106, i32 0, i32 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.107, i32 0, i32 0)>, <4 x i8*>* %26 - %arrayidx.i413 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 118 - %27 = bitcast i8** %arrayidx.i413 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str.108, i32 0, i32 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.109, i32 0, i32 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.109, i32 0, i32 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.109, i32 0, i32 0)>, <4 x i8*>* %27 - %arrayidx.i409 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 122 - %28 = bitcast i8** %arrayidx.i409 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.110, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.111, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.112, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.112, i32 0, i32 0)>, <4 x i8*>* %28 - %arrayidx.i405 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 126 - %29 = bitcast i8** %arrayidx.i405 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.112, i32 0, i32 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.113, i32 0, i32 0), i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.114, i32 0, i32 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.115, i32 0, i32 0)>, <4 x i8*>* %29 - %arrayidx.i401 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 130 - %30 = bitcast i8** %arrayidx.i401 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.115, i32 0, i32 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.115, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.116, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.117, i32 0, i32 0)>, <4 x i8*>* %30 - %arrayidx.i397 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 134 - %31 = bitcast i8** %arrayidx.i397 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.118, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.118, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.118, i32 0, i32 0), i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str.119, i32 0, i32 0)>, <4 x i8*>* %31 - %arrayidx.i393 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 138 - %32 = bitcast i8** %arrayidx.i393 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.120, i32 0, i32 0), i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str.121, i32 0, i32 0), i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str.121, i32 0, i32 0), i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str.121, i32 0, i32 0)>, <4 x i8*>* %32 - %arrayidx.i389 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 142 - %33 = bitcast i8** %arrayidx.i389 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.122, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.123, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.124, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.124, i32 0, i32 0)>, <4 x i8*>* %33 - %arrayidx.i385 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 146 - %34 = bitcast i8** %arrayidx.i385 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.124, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.125, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.126, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.127, i32 0, i32 0)>, <4 x i8*>* %34 - %arrayidx.i381 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 150 - store i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.127, i32 0, i32 0), i8** %arrayidx.i381 - %arrayidx.i380 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 151 - store i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.127, i32 0, i32 0), i8** %arrayidx.i380 - %arrayidx.i379 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 152 - %arrayidx.i375 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 156 - %arrayidx.i374 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 157 - %arrayidx.i373 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 158 - %arrayidx.i372 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 159 - %35 = bitcast i8** %arrayidx.i379 to i8* - call void @llvm.memset.p0i8.i64(i8* align 4 %35, i8 0, i64 28, i1 false) - store i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.128, i32 0, i32 0), i8** %arrayidx.i372 - %arrayidx.i371 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 160 - store i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.129, i32 0, i32 0), i8** %arrayidx.i371 - %arrayidx.i370 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 161 - %36 = bitcast i8** %arrayidx.i370 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.130, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.130, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.130, i32 0, i32 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.131, i32 0, i32 0)>, <4 x i8*>* %36 - %arrayidx.i366 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 165 - %37 = bitcast i8** %arrayidx.i366 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.132, i32 0, i32 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.133, i32 0, i32 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.133, i32 0, i32 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.133, i32 0, i32 0)>, <4 x i8*>* %37 - %arrayidx.i362 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 169 - %38 = bitcast i8** %arrayidx.i362 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.134, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.135, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.136, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.136, i32 0, i32 0)>, <4 x i8*>* %38 - %arrayidx.i358 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 173 - %39 = bitcast i8** %arrayidx.i358 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.136, i32 0, i32 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.137, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.138, i32 0, i32 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.139, i32 0, i32 0)>, <4 x i8*>* %39 - %arrayidx.i354 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 177 - %40 = bitcast i8** %arrayidx.i354 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.139, i32 0, i32 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.139, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.140, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.141, i32 0, i32 0)>, <4 x i8*>* %40 - %arrayidx.i350 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 181 - %41 = bitcast i8** %arrayidx.i350 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.142, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.142, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.142, i32 0, i32 0), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.143, i32 0, i32 0)>, <4 x i8*>* %41 - %arrayidx.i346 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 185 - %42 = bitcast i8** %arrayidx.i346 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.144, i32 0, i32 0), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.145, i32 0, i32 0), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.145, i32 0, i32 0), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.145, i32 0, i32 0)>, <4 x i8*>* %42 - %arrayidx.i342 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 189 - %43 = bitcast i8** %arrayidx.i342 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.146, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.147, i32 0, i32 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.148, i32 0, i32 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.148, i32 0, i32 0)>, <4 x i8*>* %43 - %arrayidx.i338 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 193 - %44 = bitcast i8** %arrayidx.i338 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.148, i32 0, i32 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.149, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.150, i32 0, i32 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.151, i32 0, i32 0)>, <4 x i8*>* %44 - %arrayidx.i334 = getelementptr inbounds %struct.A, %struct.A* %obj, i32 0, i32 0, i32 197 - %45 = bitcast i8** %arrayidx.i334 to <4 x i8*>* - store <4 x i8*> <i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.151, i32 0, i32 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.151, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.152, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.153, i32 0, i32 0)>, <4 x i8*>* %45 + %arrayidx.i1 = bitcast ptr %obj to ptr + %0 = bitcast ptr %arrayidx.i1 to ptr + store <4 x ptr> <ptr null, ptr @.str.18, ptr @.str.19, ptr @.str.20>, ptr %0 + %arrayidx.i62 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 4 + %1 = bitcast ptr %arrayidx.i62 to ptr + store <4 x ptr> <ptr @.str.21, ptr @.str.22, ptr @.str.23, ptr @.str.24>, ptr %1 + %arrayidx.i523 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 8 + %2 = bitcast ptr %arrayidx.i523 to ptr + store <4 x ptr> <ptr @.str.25, ptr @.str.26, ptr @.str.27, ptr @.str.28>, ptr %2 + %arrayidx.i519 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 12 + %3 = bitcast ptr %arrayidx.i519 to ptr + store <4 x ptr> <ptr @.str.29, ptr @.str.30, ptr @.str.31, ptr @.str.32>, ptr %3 + %arrayidx.i515 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 16 + %4 = bitcast ptr %arrayidx.i515 to ptr + call void @llvm.memset.p0.i64(ptr align 4 %4, i8 0, i64 40, i1 false) + %arrayidx.i511 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 20 + %5 = bitcast ptr %arrayidx.i511 to ptr + store <4 x ptr> <ptr @.str.37, ptr @.str.38, ptr @.str.39, ptr @.str.40>, ptr %5 + %arrayidx.i507 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 24 + %6 = bitcast ptr %arrayidx.i507 to ptr + store <4 x ptr> <ptr @.str.41, ptr @.str.42, ptr @.str.43, ptr @.str.44>, ptr %6 + %arrayidx.i503 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 28 + %7 = bitcast ptr %arrayidx.i503 to ptr + store <4 x ptr> <ptr @.str.45, ptr @.str.46, ptr @.str.47, ptr @.str.48>, ptr %7 + %arrayidx.i499 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 32 + %8 = bitcast ptr %arrayidx.i499 to ptr + store <4 x ptr> <ptr @.str.49, ptr @.str.50, ptr @.str.51, ptr @.str.52>, ptr %8 + %arrayidx.i495 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 36 + %9 = bitcast ptr %arrayidx.i495 to ptr + store <4 x ptr> <ptr @.str.53, ptr @.str.54, ptr @.str.55, ptr @.str.56>, ptr %9 + %arrayidx.i491 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 40 + %arrayidx.i481 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 50 + %10 = bitcast ptr %arrayidx.i491 to ptr + call void @llvm.memset.p0.i64(ptr align 4 %10, i8 0, i64 40, i1 false) + %11 = bitcast ptr %arrayidx.i481 to ptr + store <4 x ptr> <ptr @.str.57, ptr @.str.58, ptr @.str.59, ptr @.str.60>, ptr %11 + %arrayidx.i477 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 54 + %12 = bitcast ptr %arrayidx.i477 to ptr + store <4 x ptr> <ptr @.str.61, ptr @.str.62, ptr @.str.63, ptr @.str.64>, ptr %12 + %arrayidx.i473 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 58 + %13 = bitcast ptr %arrayidx.i473 to ptr + store <4 x ptr> <ptr @.str.65, ptr @.str.66, ptr @.str.67, ptr @.str.68>, ptr %13 + %arrayidx.i469 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 62 + %14 = bitcast ptr %arrayidx.i469 to ptr + store <4 x ptr> <ptr @.str.69, ptr @.str.70, ptr @.str.71, ptr @.str.72>, ptr %14 + %arrayidx.i465 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 66 + %15 = bitcast ptr %arrayidx.i465 to ptr + store <4 x ptr> <ptr @.str.73, ptr @.str.74, ptr @.str.75, ptr @.str.76>, ptr %15 + %arrayidx.i461 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 70 + %16 = bitcast ptr %arrayidx.i461 to ptr + call void @llvm.memset.p0.i64(ptr align 4 %16, i8 0, i64 40, i1 false) + %arrayidx.i457 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 74 + %17 = bitcast ptr %arrayidx.i457 to ptr + store <4 x ptr> <ptr @.str.81, ptr @.str.81, ptr @.str.81, ptr @.str.82>, ptr %17 + %arrayidx.i453 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 78 + %18 = bitcast ptr %arrayidx.i453 to ptr + store <4 x ptr> <ptr @.str.83, ptr @.str.84, ptr @.str.84, ptr @.str.84>, ptr %18 + %arrayidx.i449 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 82 + %arrayidx.i445 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 86 + %19 = bitcast ptr %arrayidx.i445 to ptr + store <4 x ptr> <ptr @.str.88, ptr @.str.89, ptr @.str.90, ptr @.str.91>, ptr %19 + %arrayidx.i441 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 90 + %20 = bitcast ptr %arrayidx.i441 to ptr + store <4 x ptr> <ptr @.str.91, ptr @.str.91, ptr @.str.92, ptr @.str.93>, ptr %20 + %arrayidx.i437 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 94 + %21 = bitcast ptr %arrayidx.i437 to ptr + call void @llvm.memset.p0.i64(ptr align 4 %21, i8 0, i64 28, i1 false) + %arrayidx.i433 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 98 + %22 = bitcast ptr %arrayidx.i433 to ptr + store <4 x ptr> <ptr @.str.96, ptr @.str.97, ptr @.str.97, ptr @.str.97>, ptr %22 + %arrayidx.i429 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 102 + %23 = bitcast ptr %arrayidx.i429 to ptr + store <4 x ptr> <ptr @.str.98, ptr @.str.99, ptr @.str.100, ptr @.str.100>, ptr %23 + %arrayidx.i425 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 106 + %24 = bitcast ptr %arrayidx.i425 to ptr + call void @llvm.memset.p0.i64(ptr align 4 %24, i8 0, i64 28, i1 false) + %arrayidx.i421 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 110 + %25 = bitcast ptr %arrayidx.i421 to ptr + store <4 x ptr> <ptr @.str.103, ptr @.str.103, ptr @.str.104, ptr @.str.105>, ptr %25 + %arrayidx.i417 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 114 + %26 = bitcast ptr %arrayidx.i417 to ptr + store <4 x ptr> <ptr @.str.106, ptr @.str.106, ptr @.str.106, ptr @.str.107>, ptr %26 + %arrayidx.i413 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 118 + %27 = bitcast ptr %arrayidx.i413 to ptr + store <4 x ptr> <ptr @.str.108, ptr @.str.109, ptr @.str.109, ptr @.str.109>, ptr %27 + %arrayidx.i409 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 122 + %28 = bitcast ptr %arrayidx.i409 to ptr + store <4 x ptr> <ptr @.str.110, ptr @.str.111, ptr @.str.112, ptr @.str.112>, ptr %28 + %arrayidx.i405 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 126 + %29 = bitcast ptr %arrayidx.i405 to ptr + store <4 x ptr> <ptr @.str.112, ptr @.str.113, ptr @.str.114, ptr @.str.115>, ptr %29 + %arrayidx.i401 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 130 + %30 = bitcast ptr %arrayidx.i401 to ptr + store <4 x ptr> <ptr @.str.115, ptr @.str.115, ptr @.str.116, ptr @.str.117>, ptr %30 + %arrayidx.i397 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 134 + %31 = bitcast ptr %arrayidx.i397 to ptr + store <4 x ptr> <ptr @.str.118, ptr @.str.118, ptr @.str.118, ptr @.str.119>, ptr %31 + %arrayidx.i393 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 138 + %32 = bitcast ptr %arrayidx.i393 to ptr + store <4 x ptr> <ptr @.str.120, ptr @.str.121, ptr @.str.121, ptr @.str.121>, ptr %32 + %arrayidx.i389 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 142 + %33 = bitcast ptr %arrayidx.i389 to ptr + store <4 x ptr> <ptr @.str.122, ptr @.str.123, ptr @.str.124, ptr @.str.124>, ptr %33 + %arrayidx.i385 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 146 + %34 = bitcast ptr %arrayidx.i385 to ptr + store <4 x ptr> <ptr @.str.124, ptr @.str.125, ptr @.str.126, ptr @.str.127>, ptr %34 + %arrayidx.i381 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 150 + store ptr @.str.127, ptr %arrayidx.i381 + %arrayidx.i380 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 151 + store ptr @.str.127, ptr %arrayidx.i380 + %arrayidx.i379 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 152 + %arrayidx.i375 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 156 + %arrayidx.i374 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 157 + %arrayidx.i373 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 158 + %arrayidx.i372 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 159 + %35 = bitcast ptr %arrayidx.i379 to ptr + call void @llvm.memset.p0.i64(ptr align 4 %35, i8 0, i64 28, i1 false) + store ptr @.str.128, ptr %arrayidx.i372 + %arrayidx.i371 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 160 + store ptr @.str.129, ptr %arrayidx.i371 + %arrayidx.i370 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 161 + %36 = bitcast ptr %arrayidx.i370 to ptr + store <4 x ptr> <ptr @.str.130, ptr @.str.130, ptr @.str.130, ptr @.str.131>, ptr %36 + %arrayidx.i366 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 165 + %37 = bitcast ptr %arrayidx.i366 to ptr + store <4 x ptr> <ptr @.str.132, ptr @.str.133, ptr @.str.133, ptr @.str.133>, ptr %37 + %arrayidx.i362 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 169 + %38 = bitcast ptr %arrayidx.i362 to ptr + store <4 x ptr> <ptr @.str.134, ptr @.str.135, ptr @.str.136, ptr @.str.136>, ptr %38 + %arrayidx.i358 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 173 + %39 = bitcast ptr %arrayidx.i358 to ptr + store <4 x ptr> <ptr @.str.136, ptr @.str.137, ptr @.str.138, ptr @.str.139>, ptr %39 + %arrayidx.i354 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 177 + %40 = bitcast ptr %arrayidx.i354 to ptr + store <4 x ptr> <ptr @.str.139, ptr @.str.139, ptr @.str.140, ptr @.str.141>, ptr %40 + %arrayidx.i350 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 181 + %41 = bitcast ptr %arrayidx.i350 to ptr + store <4 x ptr> <ptr @.str.142, ptr @.str.142, ptr @.str.142, ptr @.str.143>, ptr %41 + %arrayidx.i346 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 185 + %42 = bitcast ptr %arrayidx.i346 to ptr + store <4 x ptr> <ptr @.str.144, ptr @.str.145, ptr @.str.145, ptr @.str.145>, ptr %42 + %arrayidx.i342 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 189 + %43 = bitcast ptr %arrayidx.i342 to ptr + store <4 x ptr> <ptr @.str.146, ptr @.str.147, ptr @.str.148, ptr @.str.148>, ptr %43 + %arrayidx.i338 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 193 + %44 = bitcast ptr %arrayidx.i338 to ptr + store <4 x ptr> <ptr @.str.148, ptr @.str.149, ptr @.str.150, ptr @.str.151>, ptr %44 + %arrayidx.i334 = getelementptr inbounds %struct.A, ptr %obj, i32 0, i32 0, i32 197 + %45 = bitcast ptr %arrayidx.i334 to ptr + store <4 x ptr> <ptr @.str.151, ptr @.str.151, ptr @.str.152, ptr @.str.153>, ptr %45 ret void } ; Function Attrs: argmemonly nounwind - declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #1 + declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1) #1 ; Function Attrs: nounwind - declare void @llvm.stackprotector(i8*, i8**) #2 + declare void @llvm.stackprotector(ptr, ptr) #2 attributes #0 = { nounwind "target-cpu"="cortex-a9" "target-features"="+dsp,+fp16,+neon,+strict-align,+thumb-mode,+vfp3" } attributes #1 = { argmemonly nounwind } diff --git a/llvm/test/CodeGen/ARM/cortex-a57-misched-basic.ll b/llvm/test/CodeGen/ARM/cortex-a57-misched-basic.ll index ad729c2..2e8a054 100644 --- a/llvm/test/CodeGen/ARM/cortex-a57-misched-basic.ll +++ b/llvm/test/CodeGen/ARM/cortex-a57-misched-basic.ll @@ -41,10 +41,10 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "armv8r-arm-none-eabi" ; Function Attrs: norecurse nounwind readnone -define hidden i32 @foo(i32 %a, i32 %b, i32 %c, i32* %d) local_unnamed_addr #0 { +define hidden i32 @foo(i32 %a, i32 %b, i32 %c, ptr %d) local_unnamed_addr #0 { entry: %xor = xor i32 %c, %b - %ld = load i32, i32* %d + %ld = load i32, ptr %d %add = add nsw i32 %xor, %ld %div = sdiv i32 %a, %b %sub = sub i32 %div, %add diff --git a/llvm/test/CodeGen/ARM/debug-info-blocks.ll b/llvm/test/CodeGen/ARM/debug-info-blocks.ll index 8ef341f..2f68abe 100644 --- a/llvm/test/CodeGen/ARM/debug-info-blocks.ll +++ b/llvm/test/CodeGen/ARM/debug-info-blocks.ll @@ -16,89 +16,89 @@ target triple = "thumbv7-apple-ios" %0 = type opaque %1 = type { [4 x i32] } -%2 = type <{ i8*, i32, i32, i8*, %struct.Re*, i8*, %3*, %struct.my_struct* }> +%2 = type <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr }> %3 = type opaque %struct.CP = type { float, float } %struct.CR = type { %struct.CP, %struct.CP } %struct.Re = type { i32, i32 } -%struct.__block_byref_mydata = type { i8*, %struct.__block_byref_mydata*, i32, i32, i8*, i8*, %0* } +%struct.__block_byref_mydata = type { ptr, ptr, i32, i32, ptr, ptr, ptr } %struct.my_struct = type opaque -@"\01L_OBJC_SELECTOR_REFERENCES_13" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip" +@"\01L_OBJC_SELECTOR_REFERENCES_13" = external hidden global ptr, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip" @"OBJC_IVAR_$_MyWork._bounds" = external hidden global i32, section "__DATA, __objc_const", align 4 @"OBJC_IVAR_$_MyWork._data" = external hidden global i32, section "__DATA, __objc_const", align 4 -@"\01L_OBJC_SELECTOR_REFERENCES_222" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip" +@"\01L_OBJC_SELECTOR_REFERENCES_222" = external hidden global ptr, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip" declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone -declare i8* @objc_msgSend(i8*, i8*, ...) +declare ptr @objc_msgSend(ptr, ptr, ...) declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) nounwind -define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %loadedMydata, [4 x i32] %bounds.coerce0, [4 x i32] %data.coerce0) ssp !dbg !23 { - %1 = alloca %0*, align 4 +define hidden void @foobar_func_block_invoke_0(ptr %.block_descriptor, ptr %loadedMydata, [4 x i32] %bounds.coerce0, [4 x i32] %data.coerce0) ssp !dbg !23 { + %1 = alloca ptr, align 4 %bounds = alloca %struct.CR, align 4 %data = alloca %struct.CR, align 4 - call void @llvm.dbg.value(metadata i8* %.block_descriptor, metadata !27, metadata !DIExpression()), !dbg !129 - store %0* %loadedMydata, %0** %1, align 4 - call void @llvm.dbg.declare(metadata %0** %1, metadata !130, metadata !DIExpression()), !dbg !131 - %2 = bitcast %struct.CR* %bounds to %1* - %3 = getelementptr %1, %1* %2, i32 0, i32 0 - store [4 x i32] %bounds.coerce0, [4 x i32]* %3 - call void @llvm.dbg.declare(metadata %struct.CR* %bounds, metadata !132, metadata !DIExpression()), !dbg !133 - %4 = bitcast %struct.CR* %data to %1* - %5 = getelementptr %1, %1* %4, i32 0, i32 0 - store [4 x i32] %data.coerce0, [4 x i32]* %5 - call void @llvm.dbg.declare(metadata %struct.CR* %data, metadata !134, metadata !DIExpression()), !dbg !135 - %6 = bitcast i8* %.block_descriptor to %2* - %7 = getelementptr inbounds %2, %2* %6, i32 0, i32 6 - call void @llvm.dbg.declare(metadata %2* %6, metadata !136, metadata !163), !dbg !137 - call void @llvm.dbg.declare(metadata %2* %6, metadata !138, metadata !164), !dbg !137 - call void @llvm.dbg.declare(metadata %2* %6, metadata !139, metadata !165), !dbg !140 - %8 = load %0*, %0** %1, align 4, !dbg !141 - %9 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_13", !dbg !141 - %10 = bitcast %0* %8 to i8*, !dbg !141 - %11 = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %10, i8* %9), !dbg !141 - %12 = bitcast i8* %11 to %0*, !dbg !141 - %13 = getelementptr inbounds %2, %2* %6, i32 0, i32 5, !dbg !141 - %14 = load i8*, i8** %13, !dbg !141 - %15 = bitcast i8* %14 to %struct.__block_byref_mydata*, !dbg !141 - %16 = getelementptr inbounds %struct.__block_byref_mydata, %struct.__block_byref_mydata* %15, i32 0, i32 1, !dbg !141 - %17 = load %struct.__block_byref_mydata*, %struct.__block_byref_mydata** %16, !dbg !141 - %18 = getelementptr inbounds %struct.__block_byref_mydata, %struct.__block_byref_mydata* %17, i32 0, i32 6, !dbg !141 - store %0* %12, %0** %18, align 4, !dbg !141 - %19 = getelementptr inbounds %2, %2* %6, i32 0, i32 6, !dbg !143 - %20 = load %3*, %3** %19, align 4, !dbg !143 - %21 = load i32, i32* @"OBJC_IVAR_$_MyWork._data", !dbg !143 - %22 = bitcast %3* %20 to i8*, !dbg !143 - %23 = getelementptr inbounds i8, i8* %22, i32 %21, !dbg !143 - %24 = bitcast i8* %23 to %struct.CR*, !dbg !143 - %25 = bitcast %struct.CR* %24 to i8*, !dbg !143 - %26 = bitcast %struct.CR* %data to i8*, !dbg !143 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %25, i8* align 4 %26, i32 16, i1 false), !dbg !143 - %27 = getelementptr inbounds %2, %2* %6, i32 0, i32 6, !dbg !144 - %28 = load %3*, %3** %27, align 4, !dbg !144 - %29 = load i32, i32* @"OBJC_IVAR_$_MyWork._bounds", !dbg !144 - %30 = bitcast %3* %28 to i8*, !dbg !144 - %31 = getelementptr inbounds i8, i8* %30, i32 %29, !dbg !144 - %32 = bitcast i8* %31 to %struct.CR*, !dbg !144 - %33 = bitcast %struct.CR* %32 to i8*, !dbg !144 - %34 = bitcast %struct.CR* %bounds to i8*, !dbg !144 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %33, i8* align 4 %34, i32 16, i1 false), !dbg !144 - %35 = getelementptr inbounds %2, %2* %6, i32 0, i32 6, !dbg !145 - %36 = load %3*, %3** %35, align 4, !dbg !145 - %37 = getelementptr inbounds %2, %2* %6, i32 0, i32 5, !dbg !145 - %38 = load i8*, i8** %37, !dbg !145 - %39 = bitcast i8* %38 to %struct.__block_byref_mydata*, !dbg !145 - %40 = getelementptr inbounds %struct.__block_byref_mydata, %struct.__block_byref_mydata* %39, i32 0, i32 1, !dbg !145 - %41 = load %struct.__block_byref_mydata*, %struct.__block_byref_mydata** %40, !dbg !145 - %42 = getelementptr inbounds %struct.__block_byref_mydata, %struct.__block_byref_mydata* %41, i32 0, i32 6, !dbg !145 - %43 = load %0*, %0** %42, align 4, !dbg !145 - %44 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_222", !dbg !145 - %45 = bitcast %3* %36 to i8*, !dbg !145 - call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %0*)*)(i8* %45, i8* %44, %0* %43), !dbg !145 + call void @llvm.dbg.value(metadata ptr %.block_descriptor, metadata !27, metadata !DIExpression()), !dbg !129 + store ptr %loadedMydata, ptr %1, align 4 + call void @llvm.dbg.declare(metadata ptr %1, metadata !130, metadata !DIExpression()), !dbg !131 + %2 = bitcast ptr %bounds to ptr + %3 = getelementptr %1, ptr %2, i32 0, i32 0 + store [4 x i32] %bounds.coerce0, ptr %3 + call void @llvm.dbg.declare(metadata ptr %bounds, metadata !132, metadata !DIExpression()), !dbg !133 + %4 = bitcast ptr %data to ptr + %5 = getelementptr %1, ptr %4, i32 0, i32 0 + store [4 x i32] %data.coerce0, ptr %5 + call void @llvm.dbg.declare(metadata ptr %data, metadata !134, metadata !DIExpression()), !dbg !135 + %6 = bitcast ptr %.block_descriptor to ptr + %7 = getelementptr inbounds %2, ptr %6, i32 0, i32 6 + call void @llvm.dbg.declare(metadata ptr %6, metadata !136, metadata !163), !dbg !137 + call void @llvm.dbg.declare(metadata ptr %6, metadata !138, metadata !164), !dbg !137 + call void @llvm.dbg.declare(metadata ptr %6, metadata !139, metadata !165), !dbg !140 + %8 = load ptr, ptr %1, align 4, !dbg !141 + %9 = load ptr, ptr @"\01L_OBJC_SELECTOR_REFERENCES_13", !dbg !141 + %10 = bitcast ptr %8 to ptr, !dbg !141 + %11 = call ptr @objc_msgSend(ptr %10, ptr %9), !dbg !141 + %12 = bitcast ptr %11 to ptr, !dbg !141 + %13 = getelementptr inbounds %2, ptr %6, i32 0, i32 5, !dbg !141 + %14 = load ptr, ptr %13, !dbg !141 + %15 = bitcast ptr %14 to ptr, !dbg !141 + %16 = getelementptr inbounds %struct.__block_byref_mydata, ptr %15, i32 0, i32 1, !dbg !141 + %17 = load ptr, ptr %16, !dbg !141 + %18 = getelementptr inbounds %struct.__block_byref_mydata, ptr %17, i32 0, i32 6, !dbg !141 + store ptr %12, ptr %18, align 4, !dbg !141 + %19 = getelementptr inbounds %2, ptr %6, i32 0, i32 6, !dbg !143 + %20 = load ptr, ptr %19, align 4, !dbg !143 + %21 = load i32, ptr @"OBJC_IVAR_$_MyWork._data", !dbg !143 + %22 = bitcast ptr %20 to ptr, !dbg !143 + %23 = getelementptr inbounds i8, ptr %22, i32 %21, !dbg !143 + %24 = bitcast ptr %23 to ptr, !dbg !143 + %25 = bitcast ptr %24 to ptr, !dbg !143 + %26 = bitcast ptr %data to ptr, !dbg !143 + call void @llvm.memcpy.p0.p0.i32(ptr align 4 %25, ptr align 4 %26, i32 16, i1 false), !dbg !143 + %27 = getelementptr inbounds %2, ptr %6, i32 0, i32 6, !dbg !144 + %28 = load ptr, ptr %27, align 4, !dbg !144 + %29 = load i32, ptr @"OBJC_IVAR_$_MyWork._bounds", !dbg !144 + %30 = bitcast ptr %28 to ptr, !dbg !144 + %31 = getelementptr inbounds i8, ptr %30, i32 %29, !dbg !144 + %32 = bitcast ptr %31 to ptr, !dbg !144 + %33 = bitcast ptr %32 to ptr, !dbg !144 + %34 = bitcast ptr %bounds to ptr, !dbg !144 + call void @llvm.memcpy.p0.p0.i32(ptr align 4 %33, ptr align 4 %34, i32 16, i1 false), !dbg !144 + %35 = getelementptr inbounds %2, ptr %6, i32 0, i32 6, !dbg !145 + %36 = load ptr, ptr %35, align 4, !dbg !145 + %37 = getelementptr inbounds %2, ptr %6, i32 0, i32 5, !dbg !145 + %38 = load ptr, ptr %37, !dbg !145 + %39 = bitcast ptr %38 to ptr, !dbg !145 + %40 = getelementptr inbounds %struct.__block_byref_mydata, ptr %39, i32 0, i32 1, !dbg !145 + %41 = load ptr, ptr %40, !dbg !145 + %42 = getelementptr inbounds %struct.__block_byref_mydata, ptr %41, i32 0, i32 6, !dbg !145 + %43 = load ptr, ptr %42, align 4, !dbg !145 + %44 = load ptr, ptr @"\01L_OBJC_SELECTOR_REFERENCES_222", !dbg !145 + %45 = bitcast ptr %36 to ptr, !dbg !145 + call void @objc_msgSend(ptr %45, ptr %44, ptr %43), !dbg !145 ret void, !dbg !146 } diff --git a/llvm/test/CodeGen/ARM/debug-info-d16-reg.ll b/llvm/test/CodeGen/ARM/debug-info-d16-reg.ll index bba2c39..f5e7ffe 100644 --- a/llvm/test/CodeGen/ARM/debug-info-d16-reg.ll +++ b/llvm/test/CodeGen/ARM/debug-info-d16-reg.ll @@ -10,51 +10,51 @@ target triple = "thumbv7-apple-darwin10" @.str = private unnamed_addr constant [11 x i8] c"%p %lf %c\0A\00", align 4 @.str1 = private unnamed_addr constant [6 x i8] c"point\00", align 4 -define i32 @inlineprinter(i8* %ptr, double %val, i8 zeroext %c) nounwind optsize !dbg !9 { +define i32 @inlineprinter(ptr %ptr, double %val, i8 zeroext %c) nounwind optsize !dbg !9 { entry: - tail call void @llvm.dbg.value(metadata i8* %ptr, metadata !19, metadata !DIExpression()), !dbg !26 + tail call void @llvm.dbg.value(metadata ptr %ptr, metadata !19, metadata !DIExpression()), !dbg !26 tail call void @llvm.dbg.value(metadata double %val, metadata !20, metadata !DIExpression()), !dbg !26 tail call void @llvm.dbg.value(metadata i8 %c, metadata !21, metadata !DIExpression()), !dbg !26 %0 = zext i8 %c to i32, !dbg !27 - %1 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %val, i32 %0) nounwind, !dbg !27 + %1 = tail call i32 (ptr, ...) @printf(ptr @.str, ptr %ptr, double %val, i32 %0) nounwind, !dbg !27 ret i32 0, !dbg !29 } -define i32 @printer(i8* %ptr, double %val, i8 zeroext %c) nounwind optsize noinline !dbg !0 { +define i32 @printer(ptr %ptr, double %val, i8 zeroext %c) nounwind optsize noinline !dbg !0 { entry: - tail call void @llvm.dbg.value(metadata i8* %ptr, metadata !16, metadata !DIExpression()), !dbg !30 + tail call void @llvm.dbg.value(metadata ptr %ptr, metadata !16, metadata !DIExpression()), !dbg !30 tail call void @llvm.dbg.value(metadata double %val, metadata !17, metadata !DIExpression()), !dbg !30 tail call void @llvm.dbg.value(metadata i8 %c, metadata !18, metadata !DIExpression()), !dbg !30 %0 = zext i8 %c to i32, !dbg !31 - %1 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %val, i32 %0) nounwind, !dbg !31 + %1 = tail call i32 (ptr, ...) @printf(ptr @.str, ptr %ptr, double %val, i32 %0) nounwind, !dbg !31 ret i32 0, !dbg !33 } -declare i32 @printf(i8* nocapture, ...) nounwind +declare i32 @printf(ptr nocapture, ...) nounwind declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone -define i32 @main(i32 %argc, i8** nocapture %argv) nounwind optsize !dbg !10 { +define i32 @main(i32 %argc, ptr nocapture %argv) nounwind optsize !dbg !10 { entry: tail call void @llvm.dbg.value(metadata i32 %argc, metadata !22, metadata !DIExpression()), !dbg !34 - tail call void @llvm.dbg.value(metadata i8** %argv, metadata !23, metadata !DIExpression()), !dbg !34 + tail call void @llvm.dbg.value(metadata ptr %argv, metadata !23, metadata !DIExpression()), !dbg !34 %0 = sitofp i32 %argc to double, !dbg !35 %1 = fadd double %0, 5.555552e+05, !dbg !35 tail call void @llvm.dbg.value(metadata double %1, metadata !24, metadata !DIExpression()), !dbg !35 - %2 = tail call i32 @puts(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str1, i32 0, i32 0)) nounwind, !dbg !36 - %3 = getelementptr inbounds i8, i8* bitcast (i32 (i32, i8**)* @main to i8*), i32 %argc, !dbg !37 + %2 = tail call i32 @puts(ptr @.str1) nounwind, !dbg !36 + %3 = getelementptr inbounds i8, ptr @main, i32 %argc, !dbg !37 %4 = trunc i32 %argc to i8, !dbg !37 %5 = add i8 %4, 97, !dbg !37 - tail call void @llvm.dbg.value(metadata i8* %3, metadata !49, metadata !DIExpression()) nounwind, !dbg !38 + tail call void @llvm.dbg.value(metadata ptr %3, metadata !49, metadata !DIExpression()) nounwind, !dbg !38 tail call void @llvm.dbg.value(metadata double %1, metadata !50, metadata !DIExpression()) nounwind, !dbg !38 tail call void @llvm.dbg.value(metadata i8 %5, metadata !51, metadata !DIExpression()) nounwind, !dbg !38 %6 = zext i8 %5 to i32, !dbg !39 - %7 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %3, double %1, i32 %6) nounwind, !dbg !39 - %8 = tail call i32 @printer(i8* %3, double %1, i8 zeroext %5) nounwind, !dbg !40 + %7 = tail call i32 (ptr, ...) @printf(ptr @.str, ptr %3, double %1, i32 %6) nounwind, !dbg !39 + %8 = tail call i32 @printer(ptr %3, double %1, i8 zeroext %5) nounwind, !dbg !40 ret i32 0, !dbg !41 } -declare i32 @puts(i8* nocapture) nounwind +declare i32 @puts(ptr nocapture) nounwind !llvm.dbg.cu = !{!2} !llvm.module.flags = !{!48} diff --git a/llvm/test/CodeGen/ARM/debug-info-s16-reg.ll b/llvm/test/CodeGen/ARM/debug-info-s16-reg.ll index c260b2e..2f25614 100644 --- a/llvm/test/CodeGen/ARM/debug-info-s16-reg.ll +++ b/llvm/test/CodeGen/ARM/debug-info-s16-reg.ll @@ -10,53 +10,53 @@ target triple = "thumbv7-apple-macosx10.6.7" @.str = private unnamed_addr constant [11 x i8] c"%p %lf %c\0A\00" @.str1 = private unnamed_addr constant [6 x i8] c"point\00" -define i32 @inlineprinter(i8* %ptr, float %val, i8 zeroext %c) nounwind optsize ssp !dbg !0 { +define i32 @inlineprinter(ptr %ptr, float %val, i8 zeroext %c) nounwind optsize ssp !dbg !0 { entry: - tail call void @llvm.dbg.value(metadata i8* %ptr, metadata !8, metadata !DIExpression()), !dbg !24 + tail call void @llvm.dbg.value(metadata ptr %ptr, metadata !8, metadata !DIExpression()), !dbg !24 tail call void @llvm.dbg.value(metadata float %val, metadata !10, metadata !DIExpression()), !dbg !25 tail call void @llvm.dbg.value(metadata i8 %c, metadata !12, metadata !DIExpression()), !dbg !26 %conv = fpext float %val to double, !dbg !27 %conv3 = zext i8 %c to i32, !dbg !27 - %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %conv, i32 %conv3) nounwind optsize, !dbg !27 + %call = tail call i32 (ptr, ...) @printf(ptr @.str, ptr %ptr, double %conv, i32 %conv3) nounwind optsize, !dbg !27 ret i32 0, !dbg !29 } -declare i32 @printf(i8* nocapture, ...) nounwind optsize +declare i32 @printf(ptr nocapture, ...) nounwind optsize -define i32 @printer(i8* %ptr, float %val, i8 zeroext %c) nounwind optsize noinline ssp !dbg !6 { +define i32 @printer(ptr %ptr, float %val, i8 zeroext %c) nounwind optsize noinline ssp !dbg !6 { entry: - tail call void @llvm.dbg.value(metadata i8* %ptr, metadata !14, metadata !DIExpression()), !dbg !30 + tail call void @llvm.dbg.value(metadata ptr %ptr, metadata !14, metadata !DIExpression()), !dbg !30 tail call void @llvm.dbg.value(metadata float %val, metadata !15, metadata !DIExpression()), !dbg !31 tail call void @llvm.dbg.value(metadata i8 %c, metadata !16, metadata !DIExpression()), !dbg !32 %conv = fpext float %val to double, !dbg !33 %conv3 = zext i8 %c to i32, !dbg !33 - %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %conv, i32 %conv3) nounwind optsize, !dbg !33 + %call = tail call i32 (ptr, ...) @printf(ptr @.str, ptr %ptr, double %conv, i32 %conv3) nounwind optsize, !dbg !33 ret i32 0, !dbg !35 } -define i32 @main(i32 %argc, i8** nocapture %argv) nounwind optsize ssp !dbg !7 { +define i32 @main(i32 %argc, ptr nocapture %argv) nounwind optsize ssp !dbg !7 { entry: tail call void @llvm.dbg.value(metadata i32 %argc, metadata !17, metadata !DIExpression()), !dbg !36 - tail call void @llvm.dbg.value(metadata i8** %argv, metadata !18, metadata !DIExpression()), !dbg !37 + tail call void @llvm.dbg.value(metadata ptr %argv, metadata !18, metadata !DIExpression()), !dbg !37 %conv = sitofp i32 %argc to double, !dbg !38 %add = fadd double %conv, 5.555552e+05, !dbg !38 %conv1 = fptrunc double %add to float, !dbg !38 tail call void @llvm.dbg.value(metadata float %conv1, metadata !22, metadata !DIExpression()), !dbg !38 - %call = tail call i32 @puts(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str1, i32 0, i32 0)) nounwind optsize, !dbg !39 - %add.ptr = getelementptr i8, i8* bitcast (i32 (i32, i8**)* @main to i8*), i32 %argc, !dbg !40 + %call = tail call i32 @puts(ptr @.str1) nounwind optsize, !dbg !39 + %add.ptr = getelementptr i8, ptr @main, i32 %argc, !dbg !40 %add5 = add nsw i32 %argc, 97, !dbg !40 %conv6 = trunc i32 %add5 to i8, !dbg !40 - tail call void @llvm.dbg.value(metadata i8* %add.ptr, metadata !58, metadata !DIExpression()) nounwind, !dbg !41 + tail call void @llvm.dbg.value(metadata ptr %add.ptr, metadata !58, metadata !DIExpression()) nounwind, !dbg !41 tail call void @llvm.dbg.value(metadata float %conv1, metadata !60, metadata !DIExpression()) nounwind, !dbg !42 tail call void @llvm.dbg.value(metadata i8 %conv6, metadata !62, metadata !DIExpression()) nounwind, !dbg !43 %conv.i = fpext float %conv1 to double, !dbg !44 %conv3.i = and i32 %add5, 255, !dbg !44 - %call.i = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %add.ptr, double %conv.i, i32 %conv3.i) nounwind optsize, !dbg !44 - %call14 = tail call i32 @printer(i8* %add.ptr, float %conv1, i8 zeroext %conv6) optsize, !dbg !45 + %call.i = tail call i32 (ptr, ...) @printf(ptr @.str, ptr %add.ptr, double %conv.i, i32 %conv3.i) nounwind optsize, !dbg !44 + %call14 = tail call i32 @printer(ptr %add.ptr, float %conv1, i8 zeroext %conv6) optsize, !dbg !45 ret i32 0, !dbg !46 } -declare i32 @puts(i8* nocapture) nounwind optsize +declare i32 @puts(ptr nocapture) nounwind optsize declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone diff --git a/llvm/test/CodeGen/ARM/dwarf-eh.ll b/llvm/test/CodeGen/ARM/dwarf-eh.ll index 34a2032..75cf0861 100644 --- a/llvm/test/CodeGen/ARM/dwarf-eh.ll +++ b/llvm/test/CodeGen/ARM/dwarf-eh.ll @@ -9,55 +9,55 @@ target triple = "armv5e--netbsd-eabi" %struct.exception = type { i8 } -@_ZTVN10__cxxabiv117__class_type_infoE = external global i8* +@_ZTVN10__cxxabiv117__class_type_infoE = external global ptr @_ZTS9exception = linkonce_odr constant [11 x i8] c"9exception\00" -@_ZTI9exception = linkonce_odr unnamed_addr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i32 2) to i8*), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @_ZTS9exception, i32 0, i32 0) } +@_ZTI9exception = linkonce_odr unnamed_addr constant { ptr, ptr } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i32 2), ptr @_ZTS9exception } -define void @f() uwtable personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { - %1 = alloca i8* +define void @f() uwtable personality ptr @__gxx_personality_v0 { + %1 = alloca ptr %2 = alloca i32 - %e = alloca %struct.exception*, align 4 + %e = alloca ptr, align 4 invoke void @g() to label %3 unwind label %4 br label %16 - %5 = landingpad { i8*, i32 } - catch i8* bitcast ({ i8*, i8* }* @_ZTI9exception to i8*) - %6 = extractvalue { i8*, i32 } %5, 0 - store i8* %6, i8** %1 - %7 = extractvalue { i8*, i32 } %5, 1 - store i32 %7, i32* %2 + %5 = landingpad { ptr, i32 } + catch ptr @_ZTI9exception + %6 = extractvalue { ptr, i32 } %5, 0 + store ptr %6, ptr %1 + %7 = extractvalue { ptr, i32 } %5, 1 + store i32 %7, ptr %2 br label %8 - %9 = load i32, i32* %2 - %10 = call i32 @llvm.eh.typeid.for(i8* bitcast ({ i8*, i8* }* @_ZTI9exception to i8*)) nounwind + %9 = load i32, ptr %2 + %10 = call i32 @llvm.eh.typeid.for(ptr @_ZTI9exception) nounwind %11 = icmp eq i32 %9, %10 br i1 %11, label %12, label %17 - %13 = load i8*, i8** %1 - %14 = call i8* @__cxa_begin_catch(i8* %13) #3 - %15 = bitcast i8* %14 to %struct.exception* - store %struct.exception* %15, %struct.exception** %e + %13 = load ptr, ptr %1 + %14 = call ptr @__cxa_begin_catch(ptr %13) #3 + %15 = bitcast ptr %14 to ptr + store ptr %15, ptr %e call void @__cxa_end_catch() br label %16 ret void - %18 = load i8*, i8** %1 - %19 = load i32, i32* %2 - %20 = insertvalue { i8*, i32 } undef, i8* %18, 0 - %21 = insertvalue { i8*, i32 } %20, i32 %19, 1 - resume { i8*, i32 } %21 + %18 = load ptr, ptr %1 + %19 = load i32, ptr %2 + %20 = insertvalue { ptr, i32 } undef, ptr %18, 0 + %21 = insertvalue { ptr, i32 } %20, i32 %19, 1 + resume { ptr, i32 } %21 } declare void @g() declare i32 @__gxx_personality_v0(...) -declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone +declare i32 @llvm.eh.typeid.for(ptr) nounwind readnone -declare i8* @__cxa_begin_catch(i8*) +declare ptr @__cxa_begin_catch(ptr) declare void @__cxa_end_catch() diff --git a/llvm/test/CodeGen/ARM/ldrcppic.ll b/llvm/test/CodeGen/ARM/ldrcppic.ll index 0a4c8ab..5ec70b50 100644 --- a/llvm/test/CodeGen/ARM/ldrcppic.ll +++ b/llvm/test/CodeGen/ARM/ldrcppic.ll @@ -23,7 +23,7 @@ define dso_local fastcc void @_ZN15UsecaseSelector26IsAllowedImplDefinedFormatE1 br i1 undef, label %4, label %13 ; <label>:4: ; preds = %3 - %5 = getelementptr inbounds [16 x i32], ptr bitcast (<{ i32, i32, i32, i32, [12 x i32] }>* @_ZN15UsecaseSelector25AllowedImplDefinedFormatsE to ptr), i32 0, i32 undef + %5 = getelementptr inbounds [16 x i32], ptr @_ZN15UsecaseSelector25AllowedImplDefinedFormatsE, i32 0, i32 undef %6 = load i32, ptr %5, align 4 %7 = icmp eq i32 10, %6 br i1 %7, label %9, label %8 diff --git a/llvm/test/CodeGen/ARM/misched-copy-arm.ll b/llvm/test/CodeGen/ARM/misched-copy-arm.ll index dbed465..d830a2c 100644 --- a/llvm/test/CodeGen/ARM/misched-copy-arm.ll +++ b/llvm/test/CodeGen/ARM/misched-copy-arm.ll @@ -9,7 +9,7 @@ ; CHECK: t2ADDrr ; CHECK: t2CMPrr ; CHECK: COPY -define i32 @postinc(i32 %a, i32* nocapture %d, i32 %s) nounwind { +define i32 @postinc(i32 %a, ptr nocapture %d, i32 %s) nounwind { entry: %cmp4 = icmp eq i32 %a, 0 br i1 %cmp4, label %for.end, label %for.body @@ -18,8 +18,8 @@ for.body: ; preds = %entry, %for.body %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %s.05 = phi i32 [ %mul, %for.body ], [ 0, %entry ] %indvars.iv.next = add i32 %indvars.iv, %s - %arrayidx = getelementptr inbounds i32, i32* %d, i32 %indvars.iv - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %d, i32 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 %mul = mul nsw i32 %0, %s.05 %exitcond = icmp eq i32 %indvars.iv.next, %a br i1 %exitcond, label %for.end, label %for.body @@ -41,16 +41,16 @@ for.end: ; preds = %for.body, %entry %union.rtunion_def = type { i64 } ; Function Attrs: nounwind ssp -declare hidden fastcc void @df_ref_record(i32* nocapture, %struct.rtx_def*, %struct.rtx_def**, %struct.rtx_def*, i32, i32) #0 +declare hidden fastcc void @df_ref_record(ptr nocapture, ptr, ptr, ptr, i32, i32) #0 ; Function Attrs: nounwind ssp -define hidden fastcc void @df_def_record_1(i32* nocapture %df, %struct.rtx_def* %x, %struct.rtx_def* %insn) #0 { +define hidden fastcc void @df_def_record_1(ptr nocapture %df, ptr %x, ptr %insn) #0 { entry: br label %while.cond while.cond: ; preds = %if.end28, %entry - %loc.0 = phi %struct.rtx_def** [ %rtx31, %if.end28 ], [ undef, %entry ] - %dst.0 = phi %struct.rtx_def* [ %0, %if.end28 ], [ undef, %entry ] + %loc.0 = phi ptr [ %rtx31, %if.end28 ], [ undef, %entry ] + %dst.0 = phi ptr [ %0, %if.end28 ], [ undef, %entry ] switch i32 undef, label %if.end47 [ i32 61, label %if.then46 i32 64, label %if.then24 @@ -62,14 +62,14 @@ if.then24: ; preds = %while.cond br label %if.end28 if.end28: ; preds = %if.then24, %while.cond, %while.cond - %dst.1 = phi %struct.rtx_def* [ undef, %if.then24 ], [ %dst.0, %while.cond ], [ %dst.0, %while.cond ] - %arrayidx30 = getelementptr inbounds %struct.rtx_def, %struct.rtx_def* %dst.1, i32 0, i32 1, i32 0 - %rtx31 = bitcast %union.rtunion_def* %arrayidx30 to %struct.rtx_def** - %0 = load %struct.rtx_def*, %struct.rtx_def** %rtx31, align 4 + %dst.1 = phi ptr [ undef, %if.then24 ], [ %dst.0, %while.cond ], [ %dst.0, %while.cond ] + %arrayidx30 = getelementptr inbounds %struct.rtx_def, ptr %dst.1, i32 0, i32 1, i32 0 + %rtx31 = bitcast ptr %arrayidx30 to ptr + %0 = load ptr, ptr %rtx31, align 4 br label %while.cond if.then46: ; preds = %while.cond - tail call fastcc void @df_ref_record(i32* %df, %struct.rtx_def* %dst.0, %struct.rtx_def** %loc.0, %struct.rtx_def* %insn, i32 0, i32 undef) + tail call fastcc void @df_ref_record(ptr %df, ptr %dst.0, ptr %loc.0, ptr %insn, i32 0, i32 undef) unreachable if.end47: ; preds = %while.cond diff --git a/llvm/test/CodeGen/ARM/no-register-coalescing-in-returnsTwice.mir b/llvm/test/CodeGen/ARM/no-register-coalescing-in-returnsTwice.mir index 9285bbc..5c59566 100644 --- a/llvm/test/CodeGen/ARM/no-register-coalescing-in-returnsTwice.mir +++ b/llvm/test/CodeGen/ARM/no-register-coalescing-in-returnsTwice.mir @@ -33,55 +33,55 @@ define i32 @main() { entry: %P0 = alloca %struct.S37, align 8 - %0 = bitcast %struct.S37* %P0 to %struct.S18* + %0 = bitcast ptr %P0 to ptr %jb1 = alloca [20 x i64], align 8 %P1 = alloca %struct.S18, align 8 %jb2 = alloca [20 x i64], align 8 - %1 = bitcast %struct.S37* %P0 to i8* - %M2.i = getelementptr inbounds %struct.S37, %struct.S37* %P0, i32 0, i32 2 - %2 = bitcast %struct.S38* %M2.i to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 8 dereferenceable(48) %2, i8 0, i64 48, i1 false) - %M6.i = getelementptr inbounds %struct.S37, %struct.S37* %P0, i32 0, i32 7 - store i32 0, i32* %M6.i, align 8 - %3 = bitcast [20 x i64]* %jb1 to i8* - %arraydecay1 = bitcast [20 x i64]* %jb1 to i64* - %call1 = call i32 @setjmp(i64* nonnull %arraydecay1) + %1 = bitcast ptr %P0 to ptr + %M2.i = getelementptr inbounds %struct.S37, ptr %P0, i32 0, i32 2 + %2 = bitcast ptr %M2.i to ptr + call void @llvm.memset.p0.i64(ptr nonnull align 8 dereferenceable(48) %2, i8 0, i64 48, i1 false) + %M6.i = getelementptr inbounds %struct.S37, ptr %P0, i32 0, i32 7 + store i32 0, ptr %M6.i, align 8 + %3 = bitcast ptr %jb1 to ptr + %arraydecay1 = bitcast ptr %jb1 to ptr + %call1 = call i32 @setjmp(ptr nonnull %arraydecay1) %tobool = icmp eq i32 %call1, 0 br i1 %tobool, label %if.then, label %if.end if.then: ; preds = %entry - %4 = bitcast [20 x i64]* %jb1 to i64* - call void (i64*, %struct.S37*, ...) @_Z3barPx3S37z(i64* nonnull %4, %struct.S37* nonnull byval(%struct.S37) align 8 %P0) + %4 = bitcast ptr %jb1 to ptr + call void (ptr, ptr, ...) @_Z3barPx3S37z(ptr nonnull %4, ptr nonnull byval(%struct.S37) align 8 %P0) unreachable if.end: ; preds = %entry - %5 = bitcast [20 x i64]* %jb1 to i8* - %6 = bitcast %struct.S37* %P0 to i8* + %5 = bitcast ptr %jb1 to ptr + %6 = bitcast ptr %P0 to ptr call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{sp},~{lr}"() - %7 = bitcast %struct.S18* %0 to i8* - %BM0.i = getelementptr inbounds %struct.S18, %struct.S18* %0, i32 0, i32 2 - store double 0.000000e+00, double* %BM0.i, align 8 - %M0.i = getelementptr inbounds %struct.S18, %struct.S18* %0, i32 0, i32 5 - store i32 42, i32* %M0.i, align 8 - %M3.i = getelementptr inbounds %struct.S18, %struct.S18* %0, i32 0, i32 7 - %8 = bitcast %struct.S23* %M3.i to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull align 8 dereferenceable(28) %8, i8 0, i64 28, i1 false) - %9 = bitcast [20 x i64]* %jb1 to i8* - %arraydecay42 = bitcast [20 x i64]* %jb1 to i64* - %call5 = call i32 @setjmp(i64* nonnull %arraydecay42) + %7 = bitcast ptr %0 to ptr + %BM0.i = getelementptr inbounds %struct.S18, ptr %0, i32 0, i32 2 + store double 0.000000e+00, ptr %BM0.i, align 8 + %M0.i = getelementptr inbounds %struct.S18, ptr %0, i32 0, i32 5 + store i32 42, ptr %M0.i, align 8 + %M3.i = getelementptr inbounds %struct.S18, ptr %0, i32 0, i32 7 + %8 = bitcast ptr %M3.i to ptr + call void @llvm.memset.p0.i64(ptr nonnull align 8 dereferenceable(28) %8, i8 0, i64 28, i1 false) + %9 = bitcast ptr %jb1 to ptr + %arraydecay42 = bitcast ptr %jb1 to ptr + %call5 = call i32 @setjmp(ptr nonnull %arraydecay42) %tobool6 = icmp eq i32 %call5, 0 br i1 %tobool6, label %if.then7, label %if.end10 if.then7: ; preds = %if.end - %10 = bitcast [20 x i64]* %jb1 to i64* - call void (i64*, %struct.S18*, ...) @_Z3fooPx3S18z(i64* nonnull %10, %struct.S18* nonnull byval(%struct.S18) align 8 %0) + %10 = bitcast ptr %jb1 to ptr + call void (ptr, ptr, ...) @_Z3fooPx3S18z(ptr nonnull %10, ptr nonnull byval(%struct.S18) align 8 %0) unreachable if.end10: ; preds = %if.end - %11 = bitcast [20 x i64]* %jb1 to i8* - %12 = bitcast %struct.S18* %0 to i8* + %11 = bitcast ptr %jb1 to ptr + %12 = bitcast ptr %0 to ptr ret i32 0 } - declare i32 @setjmp(i64*) - declare void @_Z3barPx3S37z(i64*, %struct.S37* byval(%struct.S37) align 8, ...) - declare void @_Z3fooPx3S18z(i64*, %struct.S18* byval(%struct.S18) align 8, ...) - declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) + declare i32 @setjmp(ptr) + declare void @_Z3barPx3S37z(ptr, ptr byval(%struct.S37) align 8, ...) + declare void @_Z3fooPx3S18z(ptr, ptr byval(%struct.S18) align 8, ...) + declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) ... --- name: main diff --git a/llvm/test/CodeGen/ARM/readonly-aliases.ll b/llvm/test/CodeGen/ARM/readonly-aliases.ll index b12c5ea..11014e9 100644 --- a/llvm/test/CodeGen/ARM/readonly-aliases.ll +++ b/llvm/test/CodeGen/ARM/readonly-aliases.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple thumbv7-unknown-linux-android -filetype asm -o - %s | FileCheck %s @a = protected constant <{ i32, i32 }> <{ i32 0, i32 0 }> -@b = protected alias i32, getelementptr(i32, ptr getelementptr inbounds (<{ i32, i32 }>, <{ i32, i32 }>* @a, i32 0, i32 1), i32 -1) +@b = protected alias i32, getelementptr(i32, ptr getelementptr inbounds (<{ i32, i32 }>, ptr @a, i32 0, i32 1), i32 -1) declare void @f(ptr) diff --git a/llvm/test/CodeGen/ARM/tail-dup-kill-flags.ll b/llvm/test/CodeGen/ARM/tail-dup-kill-flags.ll index bce6cdc4..2ad9ded 100644 --- a/llvm/test/CodeGen/ARM/tail-dup-kill-flags.ll +++ b/llvm/test/CodeGen/ARM/tail-dup-kill-flags.ll @@ -8,19 +8,19 @@ target triple = "thumbv7-apple-ios8.0.0" ; CHECK-LABEL: @test -%struct.cdiff_ctx = type { i8*, %struct.cdiff_node*, %struct.cdiff_node*, %struct.cdiff_node*, %struct.cdiff_node* } -%struct.cdiff_node = type { i32, i8*, i8*, %struct.cdiff_node* } +%struct.cdiff_ctx = type { ptr, ptr, ptr, ptr, ptr } +%struct.cdiff_node = type { i32, ptr, ptr, ptr } declare i32 @logg(i32) -define hidden i32 @test(%struct.cdiff_ctx* nocapture %ctx, %struct.cdiff_node* %tmp10) { +define hidden i32 @test(ptr nocapture %ctx, ptr %tmp10) { bb: br label %.outer bb33: ; preds = %bb92, %.outer %lines.0 = phi i32 [ %tmp37, %bb92 ], [ %lines.0.ph, %.outer ] %tmp37 = add i32 %lines.0, 1 - %tmp39 = load i32, i32* %tmp57, align 4 + %tmp39 = load i32, ptr %tmp57, align 4 %tmp40 = icmp eq i32 %tmp39, %tmp37 br i1 %tmp40, label %bb41, label %bb92 @@ -34,14 +34,14 @@ bb47: ; preds = %bb41 ret i32 -1 bb53: ; preds = %bb41 - %tmp54 = getelementptr inbounds %struct.cdiff_node, %struct.cdiff_node* %del.0.ph, i32 0, i32 3 - %tmp55 = load %struct.cdiff_node*, %struct.cdiff_node** %tmp54, align 4 + %tmp54 = getelementptr inbounds %struct.cdiff_node, ptr %del.0.ph, i32 0, i32 3 + %tmp55 = load ptr, ptr %tmp54, align 4 br label %.outer .outer: ; preds = %bb53, %bb - %del.0.ph = phi %struct.cdiff_node* [ %tmp55, %bb53 ], [ null, %bb ] + %del.0.ph = phi ptr [ %tmp55, %bb53 ], [ null, %bb ] %lines.0.ph = phi i32 [ 1, %bb53 ], [ 0, %bb ] - %tmp57 = getelementptr inbounds %struct.cdiff_node, %struct.cdiff_node* %del.0.ph, i32 0, i32 0 + %tmp57 = getelementptr inbounds %struct.cdiff_node, ptr %del.0.ph, i32 0, i32 0 br label %bb33 bb92: ; preds = %bb33 diff --git a/llvm/test/CodeGen/Thumb/PR36658.mir b/llvm/test/CodeGen/Thumb/PR36658.mir index 26721d6..e298663 100644 --- a/llvm/test/CodeGen/Thumb/PR36658.mir +++ b/llvm/test/CodeGen/Thumb/PR36658.mir @@ -26,12 +26,12 @@ declare i32 @foo2(i32) - declare i32 @foo3(i32*) + declare i32 @foo3(ptr) ; Function Attrs: nounwind optsize - define internal fastcc i32 @foo4(i32* nocapture %ignore_ptr) #0 { + define internal fastcc i32 @foo4(ptr nocapture %ignore_ptr) #0 { entry: - %call = tail call i32 @foo3(i32* undef) + %call = tail call i32 @foo3(ptr undef) switch i32 %call, label %sw.epilog [ i32 120, label %sw.bb i32 48, label %sw.bb73 @@ -62,7 +62,7 @@ br label %while.cond while.cond: ; preds = %while.cond, %sw.bb - %call5 = tail call i32 @foo3(i32* null) + %call5 = tail call i32 @foo3(ptr null) br label %while.cond sw.bb73: ; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry @@ -74,7 +74,7 @@ br label %while.body83 while.body83: ; preds = %while.body83.preheader, %while.body83 - %call87 = tail call i32 @foo3(i32* null) + %call87 = tail call i32 @foo3(ptr null) br label %while.body83 while.end88: ; preds = %sw.bb73 @@ -82,7 +82,7 @@ unreachable sw.bb91: ; preds = %entry - store i32 1, i32* %ignore_ptr, align 4 + store i32 1, ptr %ignore_ptr, align 4 br label %cleanup sw.bb93: ; preds = %entry @@ -110,7 +110,7 @@ br i1 undef, label %cleanup, label %if.then109 if.then109: ; preds = %sw.bb107 - %call110 = tail call i32 bitcast (i32 (...)* @foo1 to i32 (i8*, i32)*)(i8* undef, i32 %call) + %call110 = tail call i32 @foo1(ptr undef, i32 %call) unreachable sw.epilog: ; preds = %entry @@ -123,7 +123,7 @@ } ; Function Attrs: nounwind - declare void @llvm.stackprotector(i8*, i8**) #1 + declare void @llvm.stackprotector(ptr, ptr) #1 attributes #0 = { nounwind optsize } attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/Thumb/branch-to-return.ll b/llvm/test/CodeGen/Thumb/branch-to-return.ll index 5bfccc0..11e8add 100644 --- a/llvm/test/CodeGen/Thumb/branch-to-return.ll +++ b/llvm/test/CodeGen/Thumb/branch-to-return.ll @@ -3,7 +3,7 @@ ; Test the branch to return in BB4 is converted to return. -define i32 @foo(i32* %x, i32 %n) { +define i32 @foo(ptr %x, i32 %n) { ; CHECK-LABEL: foo: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: cmp r1, #1 @@ -47,7 +47,7 @@ for.body.preheader: ; preds = %entry br i1 %min.iters.check, label %for.body.preheader1, label %middle.block middle.block: - %x3 = load i32, i32* %x, align 4 + %x3 = load i32, ptr %x, align 4 %cmp.n = icmp eq i32 %n.vec, %n br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader1 @@ -58,8 +58,8 @@ for.body.preheader1: ; preds = %middle.block, %for. for.body: ; preds = %for.body.preheader1, %for.body %i.08 = phi i32 [ %inc, %for.body ], [ %n.vec, %for.body.preheader1 ] %r.07 = phi i32 [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ] - %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08 - %v5 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08 + %v5 = load i32, ptr %arrayidx, align 4 %add = add nsw i32 %v5, %r.07 %inc = add nuw nsw i32 %i.08, 1 %exitcond = icmp eq i32 %inc, %n diff --git a/llvm/test/CodeGen/Thumb/tbb-reuse.mir b/llvm/test/CodeGen/Thumb/tbb-reuse.mir index b18a685..a085c31 100644 --- a/llvm/test/CodeGen/Thumb/tbb-reuse.mir +++ b/llvm/test/CodeGen/Thumb/tbb-reuse.mir @@ -49,7 +49,7 @@ } ; Function Attrs: nounwind - declare void @llvm.stackprotector(i8*, i8**) #0 + declare void @llvm.stackprotector(ptr, ptr) #0 attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll b/llvm/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll index 99daf7e..8ee7cd1 100644 --- a/llvm/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll +++ b/llvm/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll @@ -1,37 +1,37 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-ios -relocation-model=pic -frame-pointer=all -mcpu=cortex-a8 | FileCheck %s ; rdar://10676853 -%struct.Dict_node_struct = type { i8*, %struct.Word_file_struct*, %struct.Exp_struct*, %struct.Dict_node_struct*, %struct.Dict_node_struct* } -%struct.Word_file_struct = type { [60 x i8], i32, %struct.Word_file_struct* } +%struct.Dict_node_struct = type { ptr, ptr, ptr, ptr, ptr } +%struct.Word_file_struct = type { [60 x i8], i32, ptr } %struct.Exp_struct = type { i8, i8, i8, i8, %union.anon } -%union.anon = type { %struct.E_list_struct* } -%struct.E_list_struct = type { %struct.E_list_struct*, %struct.Exp_struct* } +%union.anon = type { ptr } +%struct.E_list_struct = type { ptr, ptr } -@lookup_list = external hidden unnamed_addr global %struct.Dict_node_struct*, align 4 +@lookup_list = external hidden unnamed_addr global ptr, align 4 -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) nounwind -define hidden fastcc void @rdictionary_lookup(%struct.Dict_node_struct* %dn, i8* nocapture %s) nounwind ssp { +define hidden fastcc void @rdictionary_lookup(ptr %dn, ptr nocapture %s) nounwind ssp { ; CHECK-LABEL: rdictionary_lookup: entry: br label %tailrecurse tailrecurse: ; preds = %if.then10, %entry - %dn.tr = phi %struct.Dict_node_struct* [ %dn, %entry ], [ %9, %if.then10 ] - %cmp = icmp eq %struct.Dict_node_struct* %dn.tr, null + %dn.tr = phi ptr [ %dn, %entry ], [ %9, %if.then10 ] + %cmp = icmp eq ptr %dn.tr, null br i1 %cmp, label %if.end11, label %if.end if.end: ; preds = %tailrecurse - %string = getelementptr inbounds %struct.Dict_node_struct, %struct.Dict_node_struct* %dn.tr, i32 0, i32 0 - %0 = load i8*, i8** %string, align 4 + %string = getelementptr inbounds %struct.Dict_node_struct, ptr %dn.tr, i32 0, i32 0 + %0 = load ptr, ptr %string, align 4 br label %while.cond.i while.cond.i: ; preds = %while.body.i, %if.end - %1 = phi i8* [ %s, %if.end ], [ %incdec.ptr.i, %while.body.i ] - %storemerge.i = phi i8* [ %0, %if.end ], [ %incdec.ptr6.i, %while.body.i ] - %2 = load i8, i8* %1, align 1 + %1 = phi ptr [ %s, %if.end ], [ %incdec.ptr.i, %while.body.i ] + %storemerge.i = phi ptr [ %0, %if.end ], [ %incdec.ptr6.i, %while.body.i ] + %2 = load i8, ptr %1, align 1 %cmp.i = icmp eq i8 %2, 0 - %.pre.i = load i8, i8* %storemerge.i, align 1 + %.pre.i = load i8, ptr %storemerge.i, align 1 br i1 %cmp.i, label %lor.lhs.false.i, label %land.end.i land.end.i: ; preds = %while.cond.i @@ -39,8 +39,8 @@ land.end.i: ; preds = %while.cond.i br i1 %cmp4.i, label %while.body.i, label %while.end.i while.body.i: ; preds = %land.end.i - %incdec.ptr.i = getelementptr inbounds i8, i8* %1, i32 1 - %incdec.ptr6.i = getelementptr inbounds i8, i8* %storemerge.i, i32 1 + %incdec.ptr.i = getelementptr inbounds i8, ptr %1, i32 1 + %incdec.ptr6.i = getelementptr inbounds i8, ptr %storemerge.i, i32 1 br label %while.cond.i while.end.i: ; preds = %land.end.i @@ -68,22 +68,22 @@ if.end3: ; preds = %dict_match.exit, %l ; CHECK: cmp ; CHECK-NOT: cbnz %storemerge1.i3 = phi i32 [ %sub.i, %dict_match.exit ], [ 0, %lor.lhs.false.i ], [ 0, %while.end.i ] - %right = getelementptr inbounds %struct.Dict_node_struct, %struct.Dict_node_struct* %dn.tr, i32 0, i32 4 - %4 = load %struct.Dict_node_struct*, %struct.Dict_node_struct** %right, align 4 - tail call fastcc void @rdictionary_lookup(%struct.Dict_node_struct* %4, i8* %s) + %right = getelementptr inbounds %struct.Dict_node_struct, ptr %dn.tr, i32 0, i32 4 + %4 = load ptr, ptr %right, align 4 + tail call fastcc void @rdictionary_lookup(ptr %4, ptr %s) %cmp4 = icmp eq i32 %storemerge1.i3, 0 br i1 %cmp4, label %if.then5, label %if.end8 if.then5: ; preds = %if.end3 - %call6 = tail call fastcc i8* @xalloc(i32 20) - %5 = bitcast i8* %call6 to %struct.Dict_node_struct* - %6 = bitcast %struct.Dict_node_struct* %dn.tr to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %call6, i8* align 4 %6, i32 16, i1 false) - %7 = load %struct.Dict_node_struct*, %struct.Dict_node_struct** @lookup_list, align 4 - %right7 = getelementptr inbounds i8, i8* %call6, i32 16 - %8 = bitcast i8* %right7 to %struct.Dict_node_struct** - store %struct.Dict_node_struct* %7, %struct.Dict_node_struct** %8, align 4 - store %struct.Dict_node_struct* %5, %struct.Dict_node_struct** @lookup_list, align 4 + %call6 = tail call fastcc ptr @xalloc(i32 20) + %5 = bitcast ptr %call6 to ptr + %6 = bitcast ptr %dn.tr to ptr + tail call void @llvm.memcpy.p0.p0.i32(ptr align 4 %call6, ptr align 4 %6, i32 16, i1 false) + %7 = load ptr, ptr @lookup_list, align 4 + %right7 = getelementptr inbounds i8, ptr %call6, i32 16 + %8 = bitcast ptr %right7 to ptr + store ptr %7, ptr %8, align 4 + store ptr %5, ptr @lookup_list, align 4 br label %if.then10 if.end8: ; preds = %if.end3 @@ -91,8 +91,8 @@ if.end8: ; preds = %if.end3 br i1 %cmp9, label %if.then10, label %if.end11 if.then10: ; preds = %if.end8, %if.then5, %dict_match.exit - %left = getelementptr inbounds %struct.Dict_node_struct, %struct.Dict_node_struct* %dn.tr, i32 0, i32 3 - %9 = load %struct.Dict_node_struct*, %struct.Dict_node_struct** %left, align 4 + %left = getelementptr inbounds %struct.Dict_node_struct, ptr %dn.tr, i32 0, i32 3 + %9 = load ptr, ptr %left, align 4 br label %tailrecurse if.end11: ; preds = %if.end8, %tailrecurse @@ -100,4 +100,4 @@ if.end11: ; preds = %if.end8, %tailrecur } ; Materializable -declare hidden fastcc i8* @xalloc(i32) nounwind ssp +declare hidden fastcc ptr @xalloc(i32) nounwind ssp diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/add_reduce.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/add_reduce.mir index ecaf68d..84ff0d7 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/add_reduce.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/add_reduce.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s --- | - define hidden i32 @max_min_add_reduce(i8* %input_1_vect, i8* %input_2_vect, i32 %input_1_offset, i32 %input_2_offset, i32* %output, i32 %out_offset, i32 %out_mult, i32 %out_shift, i32 %out_activation_min, i32 %out_activation_max, i32 %block_size) local_unnamed_addr #0 { + define hidden i32 @max_min_add_reduce(ptr %input_1_vect, ptr %input_2_vect, i32 %input_1_offset, i32 %input_2_offset, ptr %output, i32 %out_offset, i32 %out_mult, i32 %out_shift, i32 %out_activation_min, i32 %out_activation_max, i32 %block_size) local_unnamed_addr #0 { entry: %add = add i32 %block_size, 3 %div = lshr i32 %add, 2 @@ -14,7 +14,7 @@ %.splat.i42 = shufflevector <4 x i32> %.splatinsert.i41, <4 x i32> undef, <4 x i32> zeroinitializer %.splatinsert.i = insertelement <4 x i32> undef, i32 %out_activation_max, i32 0 %.splat.i = shufflevector <4 x i32> %.splatinsert.i, <4 x i32> undef, <4 x i32> zeroinitializer - %scevgep = getelementptr i32, i32* %output, i32 -1 + %scevgep = getelementptr i32, ptr %output, i32 -1 br label %for.body for.cond.cleanup: ; preds = %for.body, %entry @@ -22,21 +22,21 @@ for.body: ; preds = %for.body, %for.body.lr.ph %lsr.iv3 = phi i32 [ %lsr.iv.next, %for.body ], [ %div, %for.body.lr.ph ] - %lsr.iv = phi i32* [ %scevgep1, %for.body ], [ %scevgep, %for.body.lr.ph ] - %input_1_vect.addr.052 = phi i8* [ %input_1_vect, %for.body.lr.ph ], [ %add.ptr, %for.body ] - %input_2_vect.addr.051 = phi i8* [ %input_2_vect, %for.body.lr.ph ], [ %add.ptr14, %for.body ] + %lsr.iv = phi ptr [ %scevgep1, %for.body ], [ %scevgep, %for.body.lr.ph ] + %input_1_vect.addr.052 = phi ptr [ %input_1_vect, %for.body.lr.ph ], [ %add.ptr, %for.body ] + %input_2_vect.addr.051 = phi ptr [ %input_2_vect, %for.body.lr.ph ], [ %add.ptr14, %for.body ] %num_elements.049 = phi i32 [ %block_size, %for.body.lr.ph ], [ %sub, %for.body ] - %input_2_cast = bitcast i8* %input_2_vect.addr.051 to <4 x i32>* - %input_1_cast = bitcast i8* %input_1_vect.addr.052 to <4 x i32>* - %scevgep2 = getelementptr i32, i32* %lsr.iv, i32 1 + %input_2_cast = bitcast ptr %input_2_vect.addr.051 to ptr + %input_1_cast = bitcast ptr %input_1_vect.addr.052 to ptr + %scevgep2 = getelementptr i32, ptr %lsr.iv, i32 1 %pred = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %num_elements.049) - %load.1 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %input_1_cast, i32 4, <4 x i1> %pred, <4 x i32> undef) + %load.1 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %input_1_cast, i32 4, <4 x i1> %pred, <4 x i32> undef) %insert.input_1_offset = insertelement <4 x i32> undef, i32 %input_1_offset, i32 0 %splat.input_1_offset = shufflevector <4 x i32> %insert.input_1_offset, <4 x i32> undef, <4 x i32> zeroinitializer %insert.input_2_offset = insertelement <4 x i32> undef, i32 %input_2_offset, i32 0 %splat.input_2_offset = shufflevector <4 x i32> %insert.input_2_offset, <4 x i32> undef, <4 x i32> zeroinitializer %add.1 = add <4 x i32> %load.1, %splat.input_1_offset - %load.2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %input_2_cast, i32 4, <4 x i1> %pred, <4 x i32> undef) + %load.2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %input_2_cast, i32 4, <4 x i1> %pred, <4 x i32> undef) %add.2 = add <4 x i32> %load.2, %splat.input_2_offset %mul = mul <4 x i32> %add.1, %add.2 %insert.output = insertelement <4 x i32> undef, i32 %out_offset, i32 0 @@ -45,19 +45,19 @@ %max = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> %add7, <4 x i32> %.splat.i42, i32 1, <4 x i1> %pred, <4 x i32> undef) %min = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> %max, <4 x i32> %.splat.i, i32 1, <4 x i1> %pred, <4 x i32> undef) %reduce = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %min) - store i32 %reduce, i32* %scevgep2 - %add.ptr = getelementptr inbounds i8, i8* %input_1_vect.addr.052, i32 4 - %add.ptr14 = getelementptr inbounds i8, i8* %input_2_vect.addr.051, i32 4 + store i32 %reduce, ptr %scevgep2 + %add.ptr = getelementptr inbounds i8, ptr %input_1_vect.addr.052, i32 4 + %add.ptr14 = getelementptr inbounds i8, ptr %input_2_vect.addr.051, i32 4 %sub = add i32 %num_elements.049, -4 %iv.next = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv3, i32 1) %cmp = icmp ne i32 %iv.next, 0 - %scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1 + %scevgep1 = getelementptr i32, ptr %lsr.iv, i32 1 %lsr.iv.next = add i32 %lsr.iv3, -1 br i1 %cmp, label %for.body, label %for.cond.cleanup } declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1 - declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #2 - declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #3 + declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) #2 + declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) #3 declare <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>) #1 declare <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>) #1 declare i1 @llvm.test.set.loop.iterations.i32(i32) #4 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/arm_cmplx_dot_prod_f32.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/arm_cmplx_dot_prod_f32.ll index dcb57a5..13080fc 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/arm_cmplx_dot_prod_f32.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/arm_cmplx_dot_prod_f32.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s -define void @arm_cmplx_dot_prod_f32(float* %pSrcA, float* %pSrcB, i32 %numSamples, float* nocapture %realResult, float* nocapture %imagResult) { +define void @arm_cmplx_dot_prod_f32(ptr %pSrcA, ptr %pSrcB, i32 %numSamples, ptr nocapture %realResult, ptr nocapture %imagResult) { ; CHECK-LABEL: arm_cmplx_dot_prod_f32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r7, lr} @@ -84,51 +84,51 @@ entry: br i1 %cmp, label %while.body.preheader, label %if.else while.body.preheader: ; preds = %entry - %vecSrcA.0.in108 = bitcast float* %pSrcA to <4 x float>* - %vecSrcA.0109 = load <4 x float>, <4 x float>* %vecSrcA.0.in108, align 4 - %vecSrcB.0.in106 = bitcast float* %pSrcB to <4 x float>* - %vecSrcB.0107 = load <4 x float>, <4 x float>* %vecSrcB.0.in106, align 4 - %pSrcB.addr.0105 = getelementptr inbounds float, float* %pSrcB, i32 4 - %pSrcA.addr.0104 = getelementptr inbounds float, float* %pSrcA, i32 4 + %vecSrcA.0.in108 = bitcast ptr %pSrcA to ptr + %vecSrcA.0109 = load <4 x float>, ptr %vecSrcA.0.in108, align 4 + %vecSrcB.0.in106 = bitcast ptr %pSrcB to ptr + %vecSrcB.0107 = load <4 x float>, ptr %vecSrcB.0.in106, align 4 + %pSrcB.addr.0105 = getelementptr inbounds float, ptr %pSrcB, i32 4 + %pSrcA.addr.0104 = getelementptr inbounds float, ptr %pSrcA, i32 4 %shr = lshr i32 %numSamples, 2 br label %while.body while.body: ; preds = %while.body.preheader, %while.body %vecSrcA.0118 = phi <4 x float> [ %vecSrcA.0, %while.body ], [ %vecSrcA.0109, %while.body.preheader ] %vecSrcB.0117 = phi <4 x float> [ %vecSrcB.0, %while.body ], [ %vecSrcB.0107, %while.body.preheader ] - %pSrcB.addr.0116 = phi float* [ %pSrcB.addr.0, %while.body ], [ %pSrcB.addr.0105, %while.body.preheader ] - %pSrcA.addr.0115 = phi float* [ %pSrcA.addr.0, %while.body ], [ %pSrcA.addr.0104, %while.body.preheader ] + %pSrcB.addr.0116 = phi ptr [ %pSrcB.addr.0, %while.body ], [ %pSrcB.addr.0105, %while.body.preheader ] + %pSrcA.addr.0115 = phi ptr [ %pSrcA.addr.0, %while.body ], [ %pSrcA.addr.0104, %while.body.preheader ] %vec_acc.0114 = phi <4 x float> [ %7, %while.body ], [ zeroinitializer, %while.body.preheader ] - %vecSrcB.0.in.in113 = phi float* [ %add.ptr4, %while.body ], [ %pSrcB, %while.body.preheader ] - %vecSrcA.0.in.in112 = phi float* [ %add.ptr3, %while.body ], [ %pSrcA, %while.body.preheader ] + %vecSrcB.0.in.in113 = phi ptr [ %add.ptr4, %while.body ], [ %pSrcB, %while.body.preheader ] + %vecSrcA.0.in.in112 = phi ptr [ %add.ptr3, %while.body ], [ %pSrcA, %while.body.preheader ] %blkCnt.0.in111 = phi i32 [ %blkCnt.0, %while.body ], [ %shr, %while.body.preheader ] %blkCnt.0 = add nsw i32 %blkCnt.0.in111, -1 %0 = tail call fast <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 0, <4 x float> %vec_acc.0114, <4 x float> %vecSrcA.0118, <4 x float> %vecSrcB.0117) - %1 = bitcast float* %pSrcA.addr.0115 to <4 x float>* - %2 = load <4 x float>, <4 x float>* %1, align 4 - %add.ptr3 = getelementptr inbounds float, float* %vecSrcA.0.in.in112, i32 8 + %1 = bitcast ptr %pSrcA.addr.0115 to ptr + %2 = load <4 x float>, ptr %1, align 4 + %add.ptr3 = getelementptr inbounds float, ptr %vecSrcA.0.in.in112, i32 8 %3 = tail call fast <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 1, <4 x float> %0, <4 x float> %vecSrcA.0118, <4 x float> %vecSrcB.0117) - %4 = bitcast float* %pSrcB.addr.0116 to <4 x float>* - %5 = load <4 x float>, <4 x float>* %4, align 4 - %add.ptr4 = getelementptr inbounds float, float* %vecSrcB.0.in.in113, i32 8 + %4 = bitcast ptr %pSrcB.addr.0116 to ptr + %5 = load <4 x float>, ptr %4, align 4 + %add.ptr4 = getelementptr inbounds float, ptr %vecSrcB.0.in.in113, i32 8 %6 = tail call fast <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 0, <4 x float> %3, <4 x float> %2, <4 x float> %5) %7 = tail call fast <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 1, <4 x float> %6, <4 x float> %2, <4 x float> %5) - %pSrcA.addr.0 = getelementptr inbounds float, float* %vecSrcA.0.in.in112, i32 12 - %pSrcB.addr.0 = getelementptr inbounds float, float* %vecSrcB.0.in.in113, i32 12 - %vecSrcB.0.in = bitcast float* %add.ptr4 to <4 x float>* - %vecSrcB.0 = load <4 x float>, <4 x float>* %vecSrcB.0.in, align 4 - %vecSrcA.0.in = bitcast float* %add.ptr3 to <4 x float>* - %vecSrcA.0 = load <4 x float>, <4 x float>* %vecSrcA.0.in, align 4 + %pSrcA.addr.0 = getelementptr inbounds float, ptr %vecSrcA.0.in.in112, i32 12 + %pSrcB.addr.0 = getelementptr inbounds float, ptr %vecSrcB.0.in.in113, i32 12 + %vecSrcB.0.in = bitcast ptr %add.ptr4 to ptr + %vecSrcB.0 = load <4 x float>, ptr %vecSrcB.0.in, align 4 + %vecSrcA.0.in = bitcast ptr %add.ptr3 to ptr + %vecSrcA.0 = load <4 x float>, ptr %vecSrcA.0.in, align 4 %cmp2 = icmp sgt i32 %blkCnt.0.in111, 2 br i1 %cmp2, label %while.body, label %while.end while.end: ; preds = %while.body %8 = tail call fast <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 0, <4 x float> %7, <4 x float> %vecSrcA.0, <4 x float> %vecSrcB.0) - %9 = bitcast float* %pSrcA.addr.0 to <4 x float>* - %10 = load <4 x float>, <4 x float>* %9, align 4 + %9 = bitcast ptr %pSrcA.addr.0 to ptr + %10 = load <4 x float>, ptr %9, align 4 %11 = tail call fast <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 1, <4 x float> %8, <4 x float> %vecSrcA.0, <4 x float> %vecSrcB.0) - %12 = bitcast float* %pSrcB.addr.0 to <4 x float>* - %13 = load <4 x float>, <4 x float>* %12, align 4 + %12 = bitcast ptr %pSrcB.addr.0 to ptr + %13 = load <4 x float>, ptr %12, align 4 %14 = tail call fast <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 0, <4 x float> %11, <4 x float> %10, <4 x float> %13) %15 = tail call fast <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 1, <4 x float> %14, <4 x float> %10, <4 x float> %13) %and = shl i32 %numSamples, 1 @@ -138,12 +138,12 @@ while.end: ; preds = %while.body while.body9: ; preds = %while.end %16 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %mul) - %add.ptr10 = getelementptr inbounds float, float* %vecSrcA.0.in.in112, i32 16 - %add.ptr11 = getelementptr inbounds float, float* %vecSrcB.0.in.in113, i32 16 - %17 = bitcast float* %add.ptr10 to <4 x float>* - %18 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* nonnull %17, i32 4, <4 x i1> %16, <4 x float> zeroinitializer) - %19 = bitcast float* %add.ptr11 to <4 x float>* - %20 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* nonnull %19, i32 4, <4 x i1> %16, <4 x float> zeroinitializer) + %add.ptr10 = getelementptr inbounds float, ptr %vecSrcA.0.in.in112, i32 16 + %add.ptr11 = getelementptr inbounds float, ptr %vecSrcB.0.in.in113, i32 16 + %17 = bitcast ptr %add.ptr10 to ptr + %18 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull %17, i32 4, <4 x i1> %16, <4 x float> zeroinitializer) + %19 = bitcast ptr %add.ptr11 to ptr + %20 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull %19, i32 4, <4 x i1> %16, <4 x float> zeroinitializer) %21 = tail call fast <4 x float> @llvm.arm.mve.vcmlaq.predicated.v4f32.v4i1(i32 0, <4 x float> %15, <4 x float> %18, <4 x float> %20, <4 x i1> %16) %22 = tail call fast <4 x float> @llvm.arm.mve.vcmlaq.predicated.v4f32.v4i1(i32 1, <4 x float> %21, <4 x float> %18, <4 x float> %20, <4 x i1> %16) %cmp8 = icmp ugt i32 %mul, 4 @@ -156,17 +156,17 @@ if.else: ; preds = %entry do.body: ; preds = %do.body, %if.else %blkCnt.2 = phi i32 [ %mul14, %if.else ], [ %sub18, %do.body ] %vec_acc.2 = phi <4 x float> [ zeroinitializer, %if.else ], [ %29, %do.body ] - %pSrcB.addr.2 = phi float* [ %pSrcB, %if.else ], [ %add.ptr17, %do.body ] - %pSrcA.addr.2 = phi float* [ %pSrcA, %if.else ], [ %add.ptr16, %do.body ] + %pSrcB.addr.2 = phi ptr [ %pSrcB, %if.else ], [ %add.ptr17, %do.body ] + %pSrcA.addr.2 = phi ptr [ %pSrcA, %if.else ], [ %add.ptr16, %do.body ] %23 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.2) - %24 = bitcast float* %pSrcA.addr.2 to <4 x float>* - %25 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %24, i32 4, <4 x i1> %23, <4 x float> zeroinitializer) - %26 = bitcast float* %pSrcB.addr.2 to <4 x float>* - %27 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %26, i32 4, <4 x i1> %23, <4 x float> zeroinitializer) + %24 = bitcast ptr %pSrcA.addr.2 to ptr + %25 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %24, i32 4, <4 x i1> %23, <4 x float> zeroinitializer) + %26 = bitcast ptr %pSrcB.addr.2 to ptr + %27 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %26, i32 4, <4 x i1> %23, <4 x float> zeroinitializer) %28 = tail call fast <4 x float> @llvm.arm.mve.vcmlaq.predicated.v4f32.v4i1(i32 0, <4 x float> %vec_acc.2, <4 x float> %25, <4 x float> %27, <4 x i1> %23) %29 = tail call fast <4 x float> @llvm.arm.mve.vcmlaq.predicated.v4f32.v4i1(i32 1, <4 x float> %28, <4 x float> %25, <4 x float> %27, <4 x i1> %23) - %add.ptr16 = getelementptr inbounds float, float* %pSrcA.addr.2, i32 4 - %add.ptr17 = getelementptr inbounds float, float* %pSrcB.addr.2, i32 4 + %add.ptr16 = getelementptr inbounds float, ptr %pSrcA.addr.2, i32 4 + %add.ptr17 = getelementptr inbounds float, ptr %pSrcB.addr.2, i32 4 %sub18 = add nsw i32 %blkCnt.2, -4 %cmp19 = icmp sgt i32 %blkCnt.2, 4 br i1 %cmp19, label %do.body, label %if.end @@ -179,19 +179,19 @@ if.end: ; preds = %do.body, %while.bod %32 = extractelement <4 x float> %vec_acc.3, i32 1 %33 = extractelement <4 x float> %vec_acc.3, i32 3 %add20 = fadd fast float %32, %33 - store float %add, float* %realResult, align 4 - store float %add20, float* %imagResult, align 4 + store float %add, ptr %realResult, align 4 + store float %add20, ptr %imagResult, align 4 ret void while.body9.1: ; preds = %while.body9 %sub12 = add nsw i32 %mul, -4 %34 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %sub12) - %add.ptr10.1 = getelementptr inbounds float, float* %vecSrcA.0.in.in112, i32 20 - %add.ptr11.1 = getelementptr inbounds float, float* %vecSrcB.0.in.in113, i32 20 - %35 = bitcast float* %add.ptr10.1 to <4 x float>* - %36 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* nonnull %35, i32 4, <4 x i1> %34, <4 x float> zeroinitializer) - %37 = bitcast float* %add.ptr11.1 to <4 x float>* - %38 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* nonnull %37, i32 4, <4 x i1> %34, <4 x float> zeroinitializer) + %add.ptr10.1 = getelementptr inbounds float, ptr %vecSrcA.0.in.in112, i32 20 + %add.ptr11.1 = getelementptr inbounds float, ptr %vecSrcB.0.in.in113, i32 20 + %35 = bitcast ptr %add.ptr10.1 to ptr + %36 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull %35, i32 4, <4 x i1> %34, <4 x float> zeroinitializer) + %37 = bitcast ptr %add.ptr11.1 to ptr + %38 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr nonnull %37, i32 4, <4 x i1> %34, <4 x float> zeroinitializer) %39 = tail call fast <4 x float> @llvm.arm.mve.vcmlaq.predicated.v4f32.v4i1(i32 0, <4 x float> %22, <4 x float> %36, <4 x float> %38, <4 x i1> %34) %40 = tail call fast <4 x float> @llvm.arm.mve.vcmlaq.predicated.v4f32.v4i1(i32 1, <4 x float> %39, <4 x float> %36, <4 x float> %38, <4 x i1> %34) br label %if.end @@ -199,5 +199,5 @@ while.body9.1: ; preds = %while.body9 declare <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32, <4 x float>, <4 x float>, <4 x float>) #1 declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1 -declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) #2 +declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>) #2 declare <4 x float> @llvm.arm.mve.vcmlaq.predicated.v4f32.v4i1(i32, <4 x float>, <4 x float>, <4 x float>, <4 x i1>) #1 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir index dabebf4..40231e1 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir @@ -2,53 +2,53 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s --- | - %struct.arm_biquad_casd_df1_inst_q31 = type { i32*, i32*, i32, i32 } + %struct.arm_biquad_casd_df1_inst_q31 = type { ptr, ptr, i32, i32 } - define hidden void @arm_biquad_cascade_df1_q31(%struct.arm_biquad_casd_df1_inst_q31* nocapture readonly %arg, i32* nocapture readonly %arg1, i32* nocapture %arg2, i32 %arg3) { + define hidden void @arm_biquad_cascade_df1_q31(ptr nocapture readonly %arg, ptr nocapture readonly %arg1, ptr nocapture %arg2, i32 %arg3) { bb: - %i = bitcast %struct.arm_biquad_casd_df1_inst_q31* %arg to i32** - %i4 = load i32*, i32** %i, align 4 - %i5 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, %struct.arm_biquad_casd_df1_inst_q31* %arg, i32 0, i32 1 - %i6 = load i32*, i32** %i5, align 4 - %i7 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, %struct.arm_biquad_casd_df1_inst_q31* %arg, i32 0, i32 2 - %i8 = load i32, i32* %i7, align 4 + %i = bitcast ptr %arg to ptr + %i4 = load ptr, ptr %i, align 4 + %i5 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, ptr %arg, i32 0, i32 1 + %i6 = load ptr, ptr %i5, align 4 + %i7 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, ptr %arg, i32 0, i32 2 + %i8 = load i32, ptr %i7, align 4 %i9 = sub i32 31, %i8 - %i10 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, %struct.arm_biquad_casd_df1_inst_q31* %arg, i32 0, i32 3 - %i11 = load i32, i32* %i10, align 4 + %i10 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, ptr %arg, i32 0, i32 3 + %i11 = load i32, ptr %i10, align 4 br label %bb12 bb12: ; preds = %bb74, %bb - %i13 = phi i32* [ %i6, %bb ], [ %i18, %bb74 ] - %i14 = phi i32* [ %i4, %bb ], [ %i85, %bb74 ] - %i15 = phi i32* [ %arg1, %bb ], [ %arg2, %bb74 ] + %i13 = phi ptr [ %i6, %bb ], [ %i18, %bb74 ] + %i14 = phi ptr [ %i4, %bb ], [ %i85, %bb74 ] + %i15 = phi ptr [ %arg1, %bb ], [ %arg2, %bb74 ] %i16 = phi i32 [ %i11, %bb ], [ %i89, %bb74 ] - %i18 = getelementptr inbounds i32, i32* %i13, i32 5 - %i19 = load i32, i32* %i14, align 4 - %i20 = getelementptr inbounds i32, i32* %i14, i32 1 - %i21 = load i32, i32* %i20, align 4 - %i22 = getelementptr inbounds i32, i32* %i14, i32 2 - %i23 = load i32, i32* %i22, align 4 - %i24 = getelementptr inbounds i32, i32* %i14, i32 3 - %i25 = load i32, i32* %i24, align 4 + %i18 = getelementptr inbounds i32, ptr %i13, i32 5 + %i19 = load i32, ptr %i14, align 4 + %i20 = getelementptr inbounds i32, ptr %i14, i32 1 + %i21 = load i32, ptr %i20, align 4 + %i22 = getelementptr inbounds i32, ptr %i14, i32 2 + %i23 = load i32, ptr %i22, align 4 + %i24 = getelementptr inbounds i32, ptr %i14, i32 3 + %i25 = load i32, ptr %i24, align 4 %i26 = call i1 @llvm.test.set.loop.iterations.i32(i32 %arg3) br i1 %i26, label %bb27, label %bb74 bb27: ; preds = %bb12 - %i28 = getelementptr inbounds i32, i32* %i13, i32 4 - %i29 = load i32, i32* %i28, align 4 - %i30 = getelementptr inbounds i32, i32* %i13, i32 3 - %i31 = load i32, i32* %i30, align 4 - %i32 = getelementptr inbounds i32, i32* %i13, i32 2 - %i33 = load i32, i32* %i32, align 4 - %i34 = getelementptr inbounds i32, i32* %i13, i32 1 - %i35 = load i32, i32* %i34, align 4 - %i36 = load i32, i32* %i13, align 4 + %i28 = getelementptr inbounds i32, ptr %i13, i32 4 + %i29 = load i32, ptr %i28, align 4 + %i30 = getelementptr inbounds i32, ptr %i13, i32 3 + %i31 = load i32, ptr %i30, align 4 + %i32 = getelementptr inbounds i32, ptr %i13, i32 2 + %i33 = load i32, ptr %i32, align 4 + %i34 = getelementptr inbounds i32, ptr %i13, i32 1 + %i35 = load i32, ptr %i34, align 4 + %i36 = load i32, ptr %i13, align 4 br label %bb37 bb37: ; preds = %bb37, %bb27 %lsr.iv = phi i32 [ %lsr.iv.next, %bb37 ], [ %arg3, %bb27 ] - %i38 = phi i32* [ %i15, %bb27 ], [ %i51, %bb37 ] - %i39 = phi i32* [ %arg2, %bb27 ], [ %i69, %bb37 ] + %i38 = phi ptr [ %i15, %bb27 ], [ %i51, %bb37 ] + %i39 = phi ptr [ %arg2, %bb27 ], [ %i69, %bb37 ] %i40 = phi i32 [ %i25, %bb27 ], [ %i41, %bb37 ] %i41 = phi i32 [ %i23, %bb27 ], [ %i68, %bb37 ] %i42 = phi i32 [ %i21, %bb27 ], [ %i43, %bb37 ] @@ -59,8 +59,8 @@ %i48 = sext i32 %i35 to i64 %i49 = sext i32 %i36 to i64 %i50 = zext i32 %i9 to i64 - %i51 = getelementptr inbounds i32, i32* %i38, i32 1 - %i52 = load i32, i32* %i38, align 4 + %i51 = getelementptr inbounds i32, ptr %i38, i32 1 + %i52 = load i32, ptr %i38, align 4 %i53 = sext i32 %i52 to i64 %i54 = mul nsw i64 %i53, %i49 %i55 = sext i32 %i43 to i64 @@ -77,8 +77,8 @@ %i66 = add i64 %i65, %i54 %i67 = ashr i64 %i66, %i50 %i68 = trunc i64 %i67 to i32 - %i69 = getelementptr inbounds i32, i32* %i39, i32 1 - store i32 %i68, i32* %i39, align 4 + %i69 = getelementptr inbounds i32, ptr %i39, i32 1 + store i32 %i68, ptr %i39, align 4 %i70 = call i32 @llvm.loop.decrement.reg.i32(i32 %lsr.iv, i32 1) %i71 = icmp ne i32 %i70, 0 %lsr.iv.next = add i32 %lsr.iv, -1 @@ -93,20 +93,20 @@ %i76 = phi i32 [ %i21, %bb12 ], [ %i43, %bb72 ] %i77 = phi i32 [ %i23, %bb12 ], [ %i73, %bb72 ] %i78 = phi i32 [ %i25, %bb12 ], [ %i41, %bb72 ] - store i32 %i75, i32* %i14, align 4 - %i79 = bitcast i32* %i14 to i8* - %i80 = getelementptr inbounds i8, i8* %i79, i32 4 - %i81 = bitcast i8* %i80 to i32* - store i32 %i76, i32* %i81, align 4 - %i82 = bitcast i32* %i14 to i8* - %i83 = getelementptr inbounds i8, i8* %i82, i32 8 - %i84 = bitcast i8* %i83 to i32* - store i32 %i77, i32* %i84, align 4 - %i85 = getelementptr inbounds i32, i32* %i14, i32 4 - %i86 = bitcast i32* %i14 to i8* - %i87 = getelementptr inbounds i8, i8* %i86, i32 12 - %i88 = bitcast i8* %i87 to i32* - store i32 %i78, i32* %i88, align 4 + store i32 %i75, ptr %i14, align 4 + %i79 = bitcast ptr %i14 to ptr + %i80 = getelementptr inbounds i8, ptr %i79, i32 4 + %i81 = bitcast ptr %i80 to ptr + store i32 %i76, ptr %i81, align 4 + %i82 = bitcast ptr %i14 to ptr + %i83 = getelementptr inbounds i8, ptr %i82, i32 8 + %i84 = bitcast ptr %i83 to ptr + store i32 %i77, ptr %i84, align 4 + %i85 = getelementptr inbounds i32, ptr %i14, i32 4 + %i86 = bitcast ptr %i14 to ptr + %i87 = getelementptr inbounds i8, ptr %i86, i32 12 + %i88 = bitcast ptr %i87 to ptr + store i32 %i78, ptr %i88, align 4 %i89 = add i32 %i16, -1 %i90 = icmp eq i32 %i89, 0 br i1 %i90, label %bb91, label %bb12 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir index 10574ba..789429a 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir @@ -1,56 +1,56 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -o - -verify-machineinstrs | FileCheck %s --- | - %struct.arm_biquad_casd_df1_inst_q31 = type { i32*, i32*, i32, i32 } + %struct.arm_biquad_casd_df1_inst_q31 = type { ptr, ptr, i32, i32 } ; Function Attrs: optsize - define hidden void @arm_biquad_cascade_df1_q31(%struct.arm_biquad_casd_df1_inst_q31* nocapture readonly %arg, i32* nocapture readonly %arg1, i32* nocapture %arg2, i32 %arg3) #0 { + define hidden void @arm_biquad_cascade_df1_q31(ptr nocapture readonly %arg, ptr nocapture readonly %arg1, ptr nocapture %arg2, i32 %arg3) #0 { bb: - %i = bitcast %struct.arm_biquad_casd_df1_inst_q31* %arg to i32** - %i4 = load i32*, i32** %i, align 4 - %i5 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, %struct.arm_biquad_casd_df1_inst_q31* %arg, i32 0, i32 1 - %i6 = load i32*, i32** %i5, align 4 - %i7 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, %struct.arm_biquad_casd_df1_inst_q31* %arg, i32 0, i32 2 - %i8 = load i32, i32* %i7, align 4 + %i = bitcast ptr %arg to ptr + %i4 = load ptr, ptr %i, align 4 + %i5 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, ptr %arg, i32 0, i32 1 + %i6 = load ptr, ptr %i5, align 4 + %i7 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, ptr %arg, i32 0, i32 2 + %i8 = load i32, ptr %i7, align 4 %i9 = sub i32 31, %i8 - %i10 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, %struct.arm_biquad_casd_df1_inst_q31* %arg, i32 0, i32 3 - %i11 = load i32, i32* %i10, align 4 + %i10 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, ptr %arg, i32 0, i32 3 + %i11 = load i32, ptr %i10, align 4 br label %bb12 bb12: ; preds = %bb74, %bb - %i13 = phi i32* [ %i6, %bb ], [ %i18, %bb74 ] - %i14 = phi i32* [ %i4, %bb ], [ %i85, %bb74 ] - %i15 = phi i32* [ %arg1, %bb ], [ %arg2, %bb74 ] + %i13 = phi ptr [ %i6, %bb ], [ %i18, %bb74 ] + %i14 = phi ptr [ %i4, %bb ], [ %i85, %bb74 ] + %i15 = phi ptr [ %arg1, %bb ], [ %arg2, %bb74 ] %i16 = phi i32 [ %i11, %bb ], [ %i89, %bb74 ] - %i18 = getelementptr inbounds i32, i32* %i13, i32 5 - %i19 = load i32, i32* %i14, align 4 - %i20 = getelementptr inbounds i32, i32* %i14, i32 1 - %i21 = load i32, i32* %i20, align 4 - %i22 = getelementptr inbounds i32, i32* %i14, i32 2 - %i23 = load i32, i32* %i22, align 4 - %i24 = getelementptr inbounds i32, i32* %i14, i32 3 - %i25 = load i32, i32* %i24, align 4 + %i18 = getelementptr inbounds i32, ptr %i13, i32 5 + %i19 = load i32, ptr %i14, align 4 + %i20 = getelementptr inbounds i32, ptr %i14, i32 1 + %i21 = load i32, ptr %i20, align 4 + %i22 = getelementptr inbounds i32, ptr %i14, i32 2 + %i23 = load i32, ptr %i22, align 4 + %i24 = getelementptr inbounds i32, ptr %i14, i32 3 + %i25 = load i32, ptr %i24, align 4 %i26 = call { i32, i1 } @llvm.test.start.loop.iterations.i32(i32 %arg3) %i26.0 = extractvalue { i32, i1 } %i26, 0 %i26.1 = extractvalue { i32, i1 } %i26, 1 br i1 %i26.1, label %bb27, label %bb74 bb27: ; preds = %bb12 - %i28 = getelementptr inbounds i32, i32* %i13, i32 4 - %i29 = load i32, i32* %i28, align 4 - %i30 = getelementptr inbounds i32, i32* %i13, i32 3 - %i31 = load i32, i32* %i30, align 4 - %i32 = getelementptr inbounds i32, i32* %i13, i32 2 - %i33 = load i32, i32* %i32, align 4 - %i34 = getelementptr inbounds i32, i32* %i13, i32 1 - %i35 = load i32, i32* %i34, align 4 - %i36 = load i32, i32* %i13, align 4 + %i28 = getelementptr inbounds i32, ptr %i13, i32 4 + %i29 = load i32, ptr %i28, align 4 + %i30 = getelementptr inbounds i32, ptr %i13, i32 3 + %i31 = load i32, ptr %i30, align 4 + %i32 = getelementptr inbounds i32, ptr %i13, i32 2 + %i33 = load i32, ptr %i32, align 4 + %i34 = getelementptr inbounds i32, ptr %i13, i32 1 + %i35 = load i32, ptr %i34, align 4 + %i36 = load i32, ptr %i13, align 4 br label %bb37 bb37: ; preds = %bb37, %bb27 %lsr.iv = phi i32 [ %i70, %bb37 ], [ %i26.0, %bb27 ] - %i38 = phi i32* [ %i15, %bb27 ], [ %i51, %bb37 ] - %i39 = phi i32* [ %arg2, %bb27 ], [ %i69, %bb37 ] + %i38 = phi ptr [ %i15, %bb27 ], [ %i51, %bb37 ] + %i39 = phi ptr [ %arg2, %bb27 ], [ %i69, %bb37 ] %i40 = phi i32 [ %i25, %bb27 ], [ %i41, %bb37 ] %i41 = phi i32 [ %i23, %bb27 ], [ %i68, %bb37 ] %i42 = phi i32 [ %i21, %bb27 ], [ %i43, %bb37 ] @@ -61,8 +61,8 @@ %i48 = sext i32 %i35 to i64 %i49 = sext i32 %i36 to i64 %i50 = zext i32 %i9 to i64 - %i51 = getelementptr inbounds i32, i32* %i38, i32 1 - %i52 = load i32, i32* %i38, align 4 + %i51 = getelementptr inbounds i32, ptr %i38, i32 1 + %i52 = load i32, ptr %i38, align 4 %i53 = sext i32 %i52 to i64 %i54 = mul nsw i64 %i53, %i49 %i55 = sext i32 %i43 to i64 @@ -79,8 +79,8 @@ %i66 = add i64 %i65, %i54 %i67 = ashr i64 %i66, %i50 %i68 = trunc i64 %i67 to i32 - %i69 = getelementptr inbounds i32, i32* %i39, i32 1 - store i32 %i68, i32* %i39, align 4 + %i69 = getelementptr inbounds i32, ptr %i39, i32 1 + store i32 %i68, ptr %i39, align 4 %i70 = call i32 @llvm.loop.decrement.reg.i32(i32 %lsr.iv, i32 1) %i71 = icmp ne i32 %i70, 0 br i1 %i71, label %bb37, label %bb72 @@ -94,20 +94,20 @@ %i76 = phi i32 [ %i21, %bb12 ], [ %i43, %bb72 ] %i77 = phi i32 [ %i23, %bb12 ], [ %i73, %bb72 ] %i78 = phi i32 [ %i25, %bb12 ], [ %i41, %bb72 ] - store i32 %i75, i32* %i14, align 4 - %i79 = bitcast i32* %i14 to i8* - %i80 = getelementptr inbounds i8, i8* %i79, i32 4 - %i81 = bitcast i8* %i80 to i32* - store i32 %i76, i32* %i81, align 4 - %i82 = bitcast i32* %i14 to i8* - %i83 = getelementptr inbounds i8, i8* %i82, i32 8 - %i84 = bitcast i8* %i83 to i32* - store i32 %i77, i32* %i84, align 4 - %i85 = getelementptr inbounds i32, i32* %i14, i32 4 - %i86 = bitcast i32* %i14 to i8* - %i87 = getelementptr inbounds i8, i8* %i86, i32 12 - %i88 = bitcast i8* %i87 to i32* - store i32 %i78, i32* %i88, align 4 + store i32 %i75, ptr %i14, align 4 + %i79 = bitcast ptr %i14 to ptr + %i80 = getelementptr inbounds i8, ptr %i79, i32 4 + %i81 = bitcast ptr %i80 to ptr + store i32 %i76, ptr %i81, align 4 + %i82 = bitcast ptr %i14 to ptr + %i83 = getelementptr inbounds i8, ptr %i82, i32 8 + %i84 = bitcast ptr %i83 to ptr + store i32 %i77, ptr %i84, align 4 + %i85 = getelementptr inbounds i32, ptr %i14, i32 4 + %i86 = bitcast ptr %i14 to ptr + %i87 = getelementptr inbounds i8, ptr %i86, i32 12 + %i88 = bitcast ptr %i87 to ptr + store i32 %i78, ptr %i88, align 4 %i89 = add i32 %i16, -1 %i90 = icmp eq i32 %i89, 0 br i1 %i90, label %bb91, label %bb12 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize.mir index 62a266e..3a8765f 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize.mir @@ -2,54 +2,54 @@ # RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s --- | - %struct.arm_biquad_casd_df1_inst_q31 = type { i32*, i32*, i32, i32 } + %struct.arm_biquad_casd_df1_inst_q31 = type { ptr, ptr, i32, i32 } ; Function Attrs: optsize - define hidden void @arm_biquad_cascade_df1_q31(%struct.arm_biquad_casd_df1_inst_q31* nocapture readonly %arg, i32* nocapture readonly %arg1, i32* nocapture %arg2, i32 %arg3) #0 { + define hidden void @arm_biquad_cascade_df1_q31(ptr nocapture readonly %arg, ptr nocapture readonly %arg1, ptr nocapture %arg2, i32 %arg3) #0 { bb: - %i = bitcast %struct.arm_biquad_casd_df1_inst_q31* %arg to i32** - %i4 = load i32*, i32** %i, align 4 - %i5 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, %struct.arm_biquad_casd_df1_inst_q31* %arg, i32 0, i32 1 - %i6 = load i32*, i32** %i5, align 4 - %i7 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, %struct.arm_biquad_casd_df1_inst_q31* %arg, i32 0, i32 2 - %i8 = load i32, i32* %i7, align 4 + %i = bitcast ptr %arg to ptr + %i4 = load ptr, ptr %i, align 4 + %i5 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, ptr %arg, i32 0, i32 1 + %i6 = load ptr, ptr %i5, align 4 + %i7 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, ptr %arg, i32 0, i32 2 + %i8 = load i32, ptr %i7, align 4 %i9 = sub i32 31, %i8 - %i10 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, %struct.arm_biquad_casd_df1_inst_q31* %arg, i32 0, i32 3 - %i11 = load i32, i32* %i10, align 4 + %i10 = getelementptr inbounds %struct.arm_biquad_casd_df1_inst_q31, ptr %arg, i32 0, i32 3 + %i11 = load i32, ptr %i10, align 4 br label %bb12 bb12: ; preds = %bb74, %bb - %i13 = phi i32* [ %i6, %bb ], [ %i18, %bb74 ] - %i14 = phi i32* [ %i4, %bb ], [ %i85, %bb74 ] - %i15 = phi i32* [ %arg1, %bb ], [ %arg2, %bb74 ] + %i13 = phi ptr [ %i6, %bb ], [ %i18, %bb74 ] + %i14 = phi ptr [ %i4, %bb ], [ %i85, %bb74 ] + %i15 = phi ptr [ %arg1, %bb ], [ %arg2, %bb74 ] %i16 = phi i32 [ %i11, %bb ], [ %i89, %bb74 ] - %i18 = getelementptr inbounds i32, i32* %i13, i32 5 - %i19 = load i32, i32* %i14, align 4 - %i20 = getelementptr inbounds i32, i32* %i14, i32 1 - %i21 = load i32, i32* %i20, align 4 - %i22 = getelementptr inbounds i32, i32* %i14, i32 2 - %i23 = load i32, i32* %i22, align 4 - %i24 = getelementptr inbounds i32, i32* %i14, i32 3 - %i25 = load i32, i32* %i24, align 4 + %i18 = getelementptr inbounds i32, ptr %i13, i32 5 + %i19 = load i32, ptr %i14, align 4 + %i20 = getelementptr inbounds i32, ptr %i14, i32 1 + %i21 = load i32, ptr %i20, align 4 + %i22 = getelementptr inbounds i32, ptr %i14, i32 2 + %i23 = load i32, ptr %i22, align 4 + %i24 = getelementptr inbounds i32, ptr %i14, i32 3 + %i25 = load i32, ptr %i24, align 4 %i26 = call i1 @llvm.test.set.loop.iterations.i32(i32 %arg3) br i1 %i26, label %bb27, label %bb74 bb27: ; preds = %bb12 - %i28 = getelementptr inbounds i32, i32* %i13, i32 4 - %i29 = load i32, i32* %i28, align 4 - %i30 = getelementptr inbounds i32, i32* %i13, i32 3 - %i31 = load i32, i32* %i30, align 4 - %i32 = getelementptr inbounds i32, i32* %i13, i32 2 - %i33 = load i32, i32* %i32, align 4 - %i34 = getelementptr inbounds i32, i32* %i13, i32 1 - %i35 = load i32, i32* %i34, align 4 - %i36 = load i32, i32* %i13, align 4 + %i28 = getelementptr inbounds i32, ptr %i13, i32 4 + %i29 = load i32, ptr %i28, align 4 + %i30 = getelementptr inbounds i32, ptr %i13, i32 3 + %i31 = load i32, ptr %i30, align 4 + %i32 = getelementptr inbounds i32, ptr %i13, i32 2 + %i33 = load i32, ptr %i32, align 4 + %i34 = getelementptr inbounds i32, ptr %i13, i32 1 + %i35 = load i32, ptr %i34, align 4 + %i36 = load i32, ptr %i13, align 4 br label %bb37 bb37: ; preds = %bb37, %bb27 %lsr.iv = phi i32 [ %lsr.iv.next, %bb37 ], [ %arg3, %bb27 ] - %i38 = phi i32* [ %i15, %bb27 ], [ %i51, %bb37 ] - %i39 = phi i32* [ %arg2, %bb27 ], [ %i69, %bb37 ] + %i38 = phi ptr [ %i15, %bb27 ], [ %i51, %bb37 ] + %i39 = phi ptr [ %arg2, %bb27 ], [ %i69, %bb37 ] %i40 = phi i32 [ %i25, %bb27 ], [ %i41, %bb37 ] %i41 = phi i32 [ %i23, %bb27 ], [ %i68, %bb37 ] %i42 = phi i32 [ %i21, %bb27 ], [ %i43, %bb37 ] @@ -60,8 +60,8 @@ %i48 = sext i32 %i35 to i64 %i49 = sext i32 %i36 to i64 %i50 = zext i32 %i9 to i64 - %i51 = getelementptr inbounds i32, i32* %i38, i32 1 - %i52 = load i32, i32* %i38, align 4 + %i51 = getelementptr inbounds i32, ptr %i38, i32 1 + %i52 = load i32, ptr %i38, align 4 %i53 = sext i32 %i52 to i64 %i54 = mul nsw i64 %i53, %i49 %i55 = sext i32 %i43 to i64 @@ -78,8 +78,8 @@ %i66 = add i64 %i65, %i54 %i67 = ashr i64 %i66, %i50 %i68 = trunc i64 %i67 to i32 - %i69 = getelementptr inbounds i32, i32* %i39, i32 1 - store i32 %i68, i32* %i39, align 4 + %i69 = getelementptr inbounds i32, ptr %i39, i32 1 + store i32 %i68, ptr %i39, align 4 %i70 = call i32 @llvm.loop.decrement.reg.i32(i32 %lsr.iv, i32 1) %i71 = icmp ne i32 %i70, 0 %lsr.iv.next = add i32 %lsr.iv, -1 @@ -94,20 +94,20 @@ %i76 = phi i32 [ %i21, %bb12 ], [ %i43, %bb72 ] %i77 = phi i32 [ %i23, %bb12 ], [ %i73, %bb72 ] %i78 = phi i32 [ %i25, %bb12 ], [ %i41, %bb72 ] - store i32 %i75, i32* %i14, align 4 - %i79 = bitcast i32* %i14 to i8* - %i80 = getelementptr inbounds i8, i8* %i79, i32 4 - %i81 = bitcast i8* %i80 to i32* - store i32 %i76, i32* %i81, align 4 - %i82 = bitcast i32* %i14 to i8* - %i83 = getelementptr inbounds i8, i8* %i82, i32 8 - %i84 = bitcast i8* %i83 to i32* - store i32 %i77, i32* %i84, align 4 - %i85 = getelementptr inbounds i32, i32* %i14, i32 4 - %i86 = bitcast i32* %i14 to i8* - %i87 = getelementptr inbounds i8, i8* %i86, i32 12 - %i88 = bitcast i8* %i87 to i32* - store i32 %i78, i32* %i88, align 4 + store i32 %i75, ptr %i14, align 4 + %i79 = bitcast ptr %i14 to ptr + %i80 = getelementptr inbounds i8, ptr %i79, i32 4 + %i81 = bitcast ptr %i80 to ptr + store i32 %i76, ptr %i81, align 4 + %i82 = bitcast ptr %i14 to ptr + %i83 = getelementptr inbounds i8, ptr %i82, i32 8 + %i84 = bitcast ptr %i83 to ptr + store i32 %i77, ptr %i84, align 4 + %i85 = getelementptr inbounds i32, ptr %i14, i32 4 + %i86 = bitcast ptr %i14 to ptr + %i87 = getelementptr inbounds i8, ptr %i86, i32 12 + %i88 = bitcast ptr %i87 to ptr + store i32 %i78, ptr %i88, align 4 %i89 = add i32 %i16, -1 %i90 = icmp eq i32 %i89, 0 br i1 %i90, label %bb91, label %bb12 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/constbound.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/constbound.ll index 5804858..79665af 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/constbound.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/constbound.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve %s -o - | FileCheck %s -define dso_local i32 @test_500_504(i32* nocapture readonly %x) { +define dso_local i32 @test_500_504(ptr nocapture readonly %x) { ; CHECK-LABEL: test_500_504: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -38,9 +38,9 @@ vector.body: ; preds = %vector.body, %entry %index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ] %vec.phi = phi i32 [ 0, %entry ], [ %4, %vector.body ] %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 500) - %0 = getelementptr inbounds i32, i32* %x, i32 %index - %1 = bitcast i32* %0 to <4 x i32>* - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %0 = getelementptr inbounds i32, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %2 = select <4 x i1> %active.lane.mask, <4 x i32> %wide.masked.load, <4 x i32> zeroinitializer %3 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %2) %4 = add i32 %3, %vec.phi @@ -52,7 +52,7 @@ for.cond.cleanup: ; preds = %vector.body ret i32 %4 } -define dso_local i32 @test_501_504(i32* nocapture readonly %x) { +define dso_local i32 @test_501_504(ptr nocapture readonly %x) { ; CHECK-LABEL: test_501_504: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -75,9 +75,9 @@ vector.body: ; preds = %vector.body, %entry %index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ] %vec.phi = phi i32 [ 0, %entry ], [ %4, %vector.body ] %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 501) - %0 = getelementptr inbounds i32, i32* %x, i32 %index - %1 = bitcast i32* %0 to <4 x i32>* - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %0 = getelementptr inbounds i32, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %2 = select <4 x i1> %active.lane.mask, <4 x i32> %wide.masked.load, <4 x i32> zeroinitializer %3 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %2) %4 = add i32 %3, %vec.phi @@ -89,7 +89,7 @@ for.cond.cleanup: ; preds = %vector.body ret i32 %4 } -define dso_local i32 @test_502_504(i32* nocapture readonly %x) { +define dso_local i32 @test_502_504(ptr nocapture readonly %x) { ; CHECK-LABEL: test_502_504: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -112,9 +112,9 @@ vector.body: ; preds = %vector.body, %entry %index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ] %vec.phi = phi i32 [ 0, %entry ], [ %4, %vector.body ] %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 502) - %0 = getelementptr inbounds i32, i32* %x, i32 %index - %1 = bitcast i32* %0 to <4 x i32>* - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %0 = getelementptr inbounds i32, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %2 = select <4 x i1> %active.lane.mask, <4 x i32> %wide.masked.load, <4 x i32> zeroinitializer %3 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %2) %4 = add i32 %3, %vec.phi @@ -126,7 +126,7 @@ for.cond.cleanup: ; preds = %vector.body ret i32 %4 } -define dso_local i32 @test_503_504(i32* nocapture readonly %x) { +define dso_local i32 @test_503_504(ptr nocapture readonly %x) { ; CHECK-LABEL: test_503_504: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -149,9 +149,9 @@ vector.body: ; preds = %vector.body, %entry %index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ] %vec.phi = phi i32 [ 0, %entry ], [ %4, %vector.body ] %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 503) - %0 = getelementptr inbounds i32, i32* %x, i32 %index - %1 = bitcast i32* %0 to <4 x i32>* - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %0 = getelementptr inbounds i32, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %2 = select <4 x i1> %active.lane.mask, <4 x i32> %wide.masked.load, <4 x i32> zeroinitializer %3 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %2) %4 = add i32 %3, %vec.phi @@ -163,7 +163,7 @@ for.cond.cleanup: ; preds = %vector.body ret i32 %4 } -define dso_local i32 @test_504_504(i32* nocapture readonly %x) { +define dso_local i32 @test_504_504(ptr nocapture readonly %x) { ; CHECK-LABEL: test_504_504: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -186,9 +186,9 @@ vector.body: ; preds = %vector.body, %entry %index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ] %vec.phi = phi i32 [ 0, %entry ], [ %4, %vector.body ] %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 504) - %0 = getelementptr inbounds i32, i32* %x, i32 %index - %1 = bitcast i32* %0 to <4 x i32>* - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %0 = getelementptr inbounds i32, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %2 = select <4 x i1> %active.lane.mask, <4 x i32> %wide.masked.load, <4 x i32> zeroinitializer %3 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %2) %4 = add i32 %3, %vec.phi @@ -201,5 +201,5 @@ for.cond.cleanup: ; preds = %vector.body } declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) -declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) +declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/count_dominates_start.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/count_dominates_start.mir index b20b3d14..6307db7 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/count_dominates_start.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/count_dominates_start.mir @@ -3,7 +3,7 @@ --- | - define i32 @test(i16* nocapture readonly %x, i16* nocapture readonly %y, i32 %n) { + define i32 @test(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) { entry: %cmp10 = icmp sgt i32 %n, 0 %0 = add i32 %n, 7 @@ -19,25 +19,25 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv3 = phi i16* [ %scevgep4, %vector.body ], [ %x, %vector.ph ] - %lsr.iv1 = phi i16* [ %scevgep, %vector.body ], [ %y, %vector.ph ] + %lsr.iv3 = phi ptr [ %scevgep4, %vector.body ], [ %x, %vector.ph ] + %lsr.iv1 = phi ptr [ %scevgep, %vector.body ], [ %y, %vector.ph ] %vec.phi = phi i32 [ 0, %vector.ph ], [ %16, %vector.body ] %7 = phi i32 [ %6, %vector.ph ], [ %17, %vector.body ] %8 = phi i32 [ %n, %vector.ph ], [ %10, %vector.body ] - %lsr.iv12 = bitcast i16* %lsr.iv1 to <8 x i16>* - %lsr.iv35 = bitcast i16* %lsr.iv3 to <8 x i16>* + %lsr.iv12 = bitcast ptr %lsr.iv1 to ptr + %lsr.iv35 = bitcast ptr %lsr.iv3 to ptr %9 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %8) %10 = sub i32 %8, 8 - %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv35, i32 2, <8 x i1> %9, <8 x i16> undef) + %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %lsr.iv35, i32 2, <8 x i1> %9, <8 x i16> undef) %11 = sext <8 x i16> %wide.masked.load to <8 x i32> - %wide.masked.load13 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv12, i32 2, <8 x i1> %9, <8 x i16> undef) + %wide.masked.load13 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %lsr.iv12, i32 2, <8 x i1> %9, <8 x i16> undef) %12 = sext <8 x i16> %wide.masked.load13 to <8 x i32> %13 = mul nsw <8 x i32> %12, %11 %14 = select <8 x i1> %9, <8 x i32> %13, <8 x i32> zeroinitializer %15 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %14) %16 = add i32 %15, %vec.phi - %scevgep = getelementptr i16, i16* %lsr.iv1, i32 8 - %scevgep4 = getelementptr i16, i16* %lsr.iv3, i32 8 + %scevgep = getelementptr i16, ptr %lsr.iv1, i32 8 + %scevgep4 = getelementptr i16, ptr %lsr.iv3, i32 8 %17 = call i32 @llvm.loop.decrement.reg.i32(i32 %7, i32 1) %18 = icmp ne i32 %17, 0 br i1 %18, label %vector.body, label %for.cond.cleanup @@ -48,7 +48,7 @@ } declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32) - declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>) + declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32 immarg, <8 x i1>, <8 x i16>) declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) declare i32 @llvm.start.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32(i32, i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/disjoint-vcmp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/disjoint-vcmp.mir index d71a829..d578115 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/disjoint-vcmp.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/disjoint-vcmp.mir @@ -3,7 +3,7 @@ # --- | @mask = external global i16 - define dso_local void @test(i32* noalias nocapture %arg, i32* noalias nocapture readonly %arg1, i32 %arg2, i32* noalias nocapture readonly %arg3) local_unnamed_addr #0 { + define dso_local void @test(ptr noalias nocapture %arg, ptr noalias nocapture readonly %arg1, i32 %arg2, ptr noalias nocapture readonly %arg3) local_unnamed_addr #0 { bb: %tmp = icmp eq i32 %arg2, 0 %tmp1 = add i32 %arg2, 3 @@ -12,50 +12,50 @@ %tmp4 = add i32 %tmp3, -4 %tmp5 = lshr i32 %tmp4, 2 %tmp6 = add nuw nsw i32 %tmp5, 1 - %mask.gep9 = bitcast i16* @mask to i16* - %mask.load = load i16, i16* %mask.gep9 + %mask.gep9 = bitcast ptr @mask to ptr + %mask.load = load i16, ptr %mask.gep9 %conv.mask = zext i16 %mask.load to i32 %invariant.mask = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %conv.mask) br i1 %tmp, label %bb27, label %bb3 bb3: ; preds = %bb %start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp6) - %scevgep1 = getelementptr i32, i32* %arg3, i32 -4 + %scevgep1 = getelementptr i32, ptr %arg3, i32 -4 br label %bb9 bb9: ; preds = %bb9, %bb3 - %lsr.iv4 = phi i32* [ %scevgep6, %bb9 ], [ %scevgep1, %bb3 ] - %lsr.iv2 = phi i32* [ %scevgep3, %bb9 ], [ %arg1, %bb3 ] - %lsr.iv = phi i32* [ %scevgep, %bb9 ], [ %arg, %bb3 ] + %lsr.iv4 = phi ptr [ %scevgep6, %bb9 ], [ %scevgep1, %bb3 ] + %lsr.iv2 = phi ptr [ %scevgep3, %bb9 ], [ %arg1, %bb3 ] + %lsr.iv = phi ptr [ %scevgep, %bb9 ], [ %arg, %bb3 ] %tmp7 = phi i32 [ %start, %bb3 ], [ %tmp12, %bb9 ] %tmp8 = phi i32 [ %arg2, %bb3 ], [ %tmp11, %bb9 ] - %lsr.iv1 = bitcast i32* %lsr.iv to <4 x i32>* - %lsr.iv24 = bitcast i32* %lsr.iv2 to <4 x i32>* - %lsr.iv47 = bitcast i32* %lsr.iv4 to <4 x i32>* + %lsr.iv1 = bitcast ptr %lsr.iv to ptr + %lsr.iv24 = bitcast ptr %lsr.iv2 to ptr + %lsr.iv47 = bitcast ptr %lsr.iv4 to ptr %vctp = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp8) %and = and <4 x i1> %vctp, %invariant.mask %tmp11 = sub i32 %tmp8, 4 - %tmp17 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv24, i32 4, <4 x i1> %and, <4 x i32> undef) - %tmp22 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1, i32 4, <4 x i1> %and, <4 x i32> undef) + %tmp17 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv24, i32 4, <4 x i1> %and, <4 x i32> undef) + %tmp22 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv1, i32 4, <4 x i1> %and, <4 x i32> undef) %tmp23 = mul nsw <4 x i32> %tmp22, %tmp17 - %scevgep2 = getelementptr <4 x i32>, <4 x i32>* %lsr.iv47, i32 1 - %load.limits = load <4 x i32>, <4 x i32>* %scevgep2 + %scevgep2 = getelementptr <4 x i32>, ptr %lsr.iv47, i32 1 + %load.limits = load <4 x i32>, ptr %scevgep2 %0 = insertelement <4 x i32> undef, i32 %conv.mask, i32 0 %1 = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer %bad.icmp = icmp ule <4 x i32> %load.limits, %1 - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %tmp23, <4 x i32>* %lsr.iv1, i32 4, <4 x i1> %bad.icmp) + call void @llvm.masked.store.v4i32.p0(<4 x i32> %tmp23, ptr %lsr.iv1, i32 4, <4 x i1> %bad.icmp) %tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %tmp7, i32 1) %tmp13 = icmp ne i32 %tmp12, 0 - %scevgep = getelementptr i32, i32* %lsr.iv, i32 4 - %scevgep3 = getelementptr i32, i32* %lsr.iv2, i32 4 - %scevgep6 = getelementptr i32, i32* %lsr.iv4, i32 4 + %scevgep = getelementptr i32, ptr %lsr.iv, i32 4 + %scevgep3 = getelementptr i32, ptr %lsr.iv2, i32 4 + %scevgep6 = getelementptr i32, ptr %lsr.iv4, i32 4 br i1 %tmp13, label %bb9, label %bb27 bb27: ; preds = %bb9, %bb ret void } - declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) - declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) + declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) + declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) declare i32 @llvm.start.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) declare <4 x i1> @llvm.arm.mve.vctp32(i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update.mir index 56bb50a..d540235 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update.mir @@ -7,7 +7,7 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8.1m.main-arm-unknown-eabi" - define dso_local void @use_before_def(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 { + define dso_local void @use_before_def(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 { entry: %cmp8 = icmp sgt i32 %N, 0 %0 = add i32 %N, 3 @@ -23,23 +23,23 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv17 = phi i32* [ %scevgep18, %vector.body ], [ %A, %vector.ph ] - %lsr.iv14 = phi i32* [ %scevgep15, %vector.body ], [ %C, %vector.ph ] - %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %vector.ph ] + %lsr.iv17 = phi ptr [ %scevgep18, %vector.body ], [ %A, %vector.ph ] + %lsr.iv14 = phi ptr [ %scevgep15, %vector.body ], [ %C, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %B, %vector.ph ] %6 = phi i32 [ %start, %vector.ph ], [ %11, %vector.body ] %7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ] - %lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>* - %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* - %lsr.iv1719 = bitcast i32* %lsr.iv17 to <4 x i32>* + %lsr.iv13 = bitcast ptr %lsr.iv to ptr + %lsr.iv1416 = bitcast ptr %lsr.iv14 to ptr + %lsr.iv1719 = bitcast ptr %lsr.iv17 to ptr %8 = call <4 x i1> @llvm.arm.vctp32(i32 %7) %9 = sub i32 %7, 4 - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv13, i32 4, <4 x i1> %8, <4 x i32> undef) - %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1416, i32 4, <4 x i1> %8, <4 x i32> undef) + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv13, i32 4, <4 x i1> %8, <4 x i32> undef) + %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv1416, i32 4, <4 x i1> %8, <4 x i32> undef) %10 = add nsw <4 x i32> %wide.masked.load12, %wide.masked.load - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %10, <4 x i32>* %lsr.iv1719, i32 4, <4 x i1> %8) - %scevgep = getelementptr i32, i32* %lsr.iv, i32 4 - %scevgep15 = getelementptr i32, i32* %lsr.iv14, i32 4 - %scevgep18 = getelementptr i32, i32* %lsr.iv17, i32 4 + call void @llvm.masked.store.v4i32.p0(<4 x i32> %10, ptr %lsr.iv1719, i32 4, <4 x i1> %8) + %scevgep = getelementptr i32, ptr %lsr.iv, i32 4 + %scevgep15 = getelementptr i32, ptr %lsr.iv14, i32 4 + %scevgep18 = getelementptr i32, ptr %lsr.iv17, i32 4 %11 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1) %12 = icmp ne i32 %11, 0 br i1 %12, label %vector.body, label %for.cond.cleanup @@ -50,9 +50,9 @@ declare i32 @llvm.start.loop.iterations.i32(i32) declare <4 x i1> @llvm.arm.vctp32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) - declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) - declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) - declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) + declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) + declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) + declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) ... --- diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir index f7db9a6..4998b5b 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir @@ -7,17 +7,17 @@ # CHECK: LETP --- | - %struct.DCT_InstanceTypeDef = type { float*, i32, i32 } + %struct.DCT_InstanceTypeDef = type { ptr, i32, i32 } ; Function Attrs: nofree nounwind - define hidden arm_aapcs_vfpcc void @test(%struct.DCT_InstanceTypeDef* nocapture readonly %S, float* %pIn, float* nocapture %pOut) { + define hidden arm_aapcs_vfpcc void @test(ptr nocapture readonly %S, ptr %pIn, ptr nocapture %pOut) { entry: - %NumInputs = getelementptr inbounds %struct.DCT_InstanceTypeDef, %struct.DCT_InstanceTypeDef* %S, i32 0, i32 2 - %0 = load i32, i32* %NumInputs, align 4 - %NumFilters = getelementptr inbounds %struct.DCT_InstanceTypeDef, %struct.DCT_InstanceTypeDef* %S, i32 0, i32 1 - %1 = load i32, i32* %NumFilters, align 4 - %pDCTCoefs34 = bitcast %struct.DCT_InstanceTypeDef* %S to float** - %2 = load float*, float** %pDCTCoefs34, align 4 + %NumInputs = getelementptr inbounds %struct.DCT_InstanceTypeDef, ptr %S, i32 0, i32 2 + %0 = load i32, ptr %NumInputs, align 4 + %NumFilters = getelementptr inbounds %struct.DCT_InstanceTypeDef, ptr %S, i32 0, i32 1 + %1 = load i32, ptr %NumFilters, align 4 + %pDCTCoefs34 = bitcast ptr %S to ptr + %2 = load ptr, ptr %pDCTCoefs34, align 4 %3 = add i32 %0, 3 %4 = icmp slt i32 %0, 4 %smin36 = select i1 %4, i32 %0, i32 4 @@ -29,14 +29,14 @@ do.body: ; preds = %do.body, %entry %count.0 = phi i32 [ %0, %entry ], [ %12, %do.body ] - %pInT.0 = phi float* [ %pIn, %entry ], [ %add.ptr, %do.body ] + %pInT.0 = phi ptr [ %pIn, %entry ], [ %add.ptr, %do.body ] %sumVec.0 = phi <4 x float> [ zeroinitializer, %entry ], [ %11, %do.body ] %8 = phi i32 [ %start1, %entry ], [ %13, %do.body ] - %pInT.033 = bitcast float* %pInT.0 to <4 x float>* + %pInT.033 = bitcast ptr %pInT.0 to ptr %9 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %count.0) - %10 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pInT.033, i32 4, <4 x i1> %9, <4 x float> zeroinitializer) + %10 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %pInT.033, i32 4, <4 x i1> %9, <4 x float> zeroinitializer) %11 = tail call fast <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %sumVec.0, <4 x float> %10, <4 x i1> %9, <4 x float> undef) - %add.ptr = getelementptr inbounds float, float* %pInT.0, i32 4 + %add.ptr = getelementptr inbounds float, ptr %pInT.0, i32 4 %12 = add i32 %count.0, -4 %13 = call i32 @llvm.loop.decrement.reg.i32(i32 %8, i32 1) %14 = icmp ne i32 %13, 0 @@ -50,27 +50,27 @@ %add1 = fadd fast float %add, %17 %18 = extractelement <4 x float> %11, i32 3 %add2 = fadd fast float %add1, %18 - %19 = load float, float* %2, align 4 + %19 = load float, ptr %2, align 4 %mul = fmul fast float %19, %add2 - store float %mul, float* %pOut, align 4 + store float %mul, ptr %pOut, align 4 %sub4 = add i32 %1, -4 %cmp5201 = icmp ugt i32 %sub4, 1 br i1 %cmp5201, label %for.body.lr.ph, label %for.cond54.preheader for.body.lr.ph: ; preds = %do.end - %scevgep = getelementptr float, float* %pIn, i32 4 + %scevgep = getelementptr float, ptr %pIn, i32 4 %20 = add i32 %0, 4 - %scevgep5 = getelementptr float, float* %2, i32 %20 + %scevgep5 = getelementptr float, ptr %2, i32 %20 %21 = shl i32 %0, 4 %22 = shl i32 %0, 1 %23 = add i32 %22, 4 - %scevgep12 = getelementptr float, float* %2, i32 %23 + %scevgep12 = getelementptr float, ptr %2, i32 %23 %24 = mul i32 %0, 3 %25 = add i32 %24, 4 - %scevgep19 = getelementptr float, float* %2, i32 %25 + %scevgep19 = getelementptr float, ptr %2, i32 %25 %26 = shl i32 %0, 2 %27 = add i32 %26, 4 - %scevgep26 = getelementptr float, float* %2, i32 %27 + %scevgep26 = getelementptr float, ptr %2, i32 %27 %28 = add i32 %0, -1 %29 = add i32 %0, -4 %30 = icmp slt i32 %29, 4 @@ -95,78 +95,78 @@ br label %for.body56 for.body: ; preds = %do.end33, %for.body.lr.ph - %lsr.iv27 = phi float* [ %88, %do.end33 ], [ %scevgep26, %for.body.lr.ph ] - %lsr.iv20 = phi float* [ %87, %do.end33 ], [ %scevgep19, %for.body.lr.ph ] - %lsr.iv13 = phi float* [ %86, %do.end33 ], [ %scevgep12, %for.body.lr.ph ] - %lsr.iv6 = phi float* [ %85, %do.end33 ], [ %scevgep5, %for.body.lr.ph ] + %lsr.iv27 = phi ptr [ %88, %do.end33 ], [ %scevgep26, %for.body.lr.ph ] + %lsr.iv20 = phi ptr [ %87, %do.end33 ], [ %scevgep19, %for.body.lr.ph ] + %lsr.iv13 = phi ptr [ %86, %do.end33 ], [ %scevgep12, %for.body.lr.ph ] + %lsr.iv6 = phi ptr [ %85, %do.end33 ], [ %scevgep5, %for.body.lr.ph ] %k.0202 = phi i32 [ 1, %for.body.lr.ph ], [ %add53, %do.end33 ] - %39 = bitcast float* %pIn to <4 x float>* + %39 = bitcast ptr %pIn to ptr %mul7 = mul i32 %k.0202, %0 - %arrayidx8 = getelementptr inbounds float, float* %2, i32 %mul7 + %arrayidx8 = getelementptr inbounds float, ptr %2, i32 %mul7 %add9 = add nuw nsw i32 %k.0202, 1 %mul10 = mul i32 %add9, %0 - %arrayidx11 = getelementptr inbounds float, float* %2, i32 %mul10 + %arrayidx11 = getelementptr inbounds float, ptr %2, i32 %mul10 %add12 = add nuw nsw i32 %k.0202, 2 %mul13 = mul i32 %add12, %0 - %arrayidx14 = getelementptr inbounds float, float* %2, i32 %mul13 + %arrayidx14 = getelementptr inbounds float, ptr %2, i32 %mul13 %add15 = add i32 %k.0202, 3 %mul16 = mul i32 %add15, %0 - %arrayidx17 = getelementptr inbounds float, float* %2, i32 %mul16 - %40 = load <4 x float>, <4 x float>* %39, align 4 - %41 = bitcast float* %arrayidx8 to <4 x float>* - %42 = load <4 x float>, <4 x float>* %41, align 4 + %arrayidx17 = getelementptr inbounds float, ptr %2, i32 %mul16 + %40 = load <4 x float>, ptr %39, align 4 + %41 = bitcast ptr %arrayidx8 to ptr + %42 = load <4 x float>, ptr %41, align 4 %43 = fmul fast <4 x float> %42, %40 - %44 = bitcast float* %arrayidx11 to <4 x float>* - %45 = load <4 x float>, <4 x float>* %44, align 4 + %44 = bitcast ptr %arrayidx11 to ptr + %45 = load <4 x float>, ptr %44, align 4 %46 = fmul fast <4 x float> %45, %40 - %47 = bitcast float* %arrayidx14 to <4 x float>* - %48 = load <4 x float>, <4 x float>* %47, align 4 + %47 = bitcast ptr %arrayidx14 to ptr + %48 = load <4 x float>, ptr %47, align 4 %49 = fmul fast <4 x float> %48, %40 - %50 = bitcast float* %arrayidx17 to <4 x float>* - %51 = load <4 x float>, <4 x float>* %50, align 4 + %50 = bitcast ptr %arrayidx17 to ptr + %51 = load <4 x float>, ptr %50, align 4 %52 = fmul fast <4 x float> %51, %40 %start2 = call i32 @llvm.start.loop.iterations.i32(i32 %33) br label %do.body24 do.body24: ; preds = %do.body24, %for.body - %lsr.iv30 = phi float* [ %scevgep31, %do.body24 ], [ %lsr.iv27, %for.body ] - %lsr.iv23 = phi float* [ %scevgep24, %do.body24 ], [ %lsr.iv20, %for.body ] - %lsr.iv16 = phi float* [ %scevgep17, %do.body24 ], [ %lsr.iv13, %for.body ] - %lsr.iv9 = phi float* [ %scevgep10, %do.body24 ], [ %lsr.iv6, %for.body ] - %lsr.iv = phi float* [ %scevgep3, %do.body24 ], [ %scevgep, %for.body ] + %lsr.iv30 = phi ptr [ %scevgep31, %do.body24 ], [ %lsr.iv27, %for.body ] + %lsr.iv23 = phi ptr [ %scevgep24, %do.body24 ], [ %lsr.iv20, %for.body ] + %lsr.iv16 = phi ptr [ %scevgep17, %do.body24 ], [ %lsr.iv13, %for.body ] + %lsr.iv9 = phi ptr [ %scevgep10, %do.body24 ], [ %lsr.iv6, %for.body ] + %lsr.iv = phi ptr [ %scevgep3, %do.body24 ], [ %scevgep, %for.body ] %sumVec0.0 = phi <4 x float> [ %43, %for.body ], [ %56, %do.body24 ] %sumVec1.0 = phi <4 x float> [ %46, %for.body ], [ %58, %do.body24 ] %sumVec2.0 = phi <4 x float> [ %49, %for.body ], [ %60, %do.body24 ] %sumVec3.0 = phi <4 x float> [ %52, %for.body ], [ %62, %do.body24 ] %53 = phi i32 [ %start2, %for.body ], [ %63, %do.body24 ] - %lsr.iv4 = bitcast float* %lsr.iv to <4 x float>* - %lsr.iv911 = bitcast float* %lsr.iv9 to <4 x float>* - %lsr.iv1618 = bitcast float* %lsr.iv16 to <4 x float>* - %lsr.iv2325 = bitcast float* %lsr.iv23 to <4 x float>* - %lsr.iv3032 = bitcast float* %lsr.iv30 to <4 x float>* - %54 = load <4 x float>, <4 x float>* %lsr.iv4, align 4 - %55 = load <4 x float>, <4 x float>* %lsr.iv911, align 4 + %lsr.iv4 = bitcast ptr %lsr.iv to ptr + %lsr.iv911 = bitcast ptr %lsr.iv9 to ptr + %lsr.iv1618 = bitcast ptr %lsr.iv16 to ptr + %lsr.iv2325 = bitcast ptr %lsr.iv23 to ptr + %lsr.iv3032 = bitcast ptr %lsr.iv30 to ptr + %54 = load <4 x float>, ptr %lsr.iv4, align 4 + %55 = load <4 x float>, ptr %lsr.iv911, align 4 %56 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %54, <4 x float> %55, <4 x float> %sumVec0.0) - %57 = load <4 x float>, <4 x float>* %lsr.iv1618, align 4 + %57 = load <4 x float>, ptr %lsr.iv1618, align 4 %58 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %54, <4 x float> %57, <4 x float> %sumVec1.0) - %59 = load <4 x float>, <4 x float>* %lsr.iv2325, align 4 + %59 = load <4 x float>, ptr %lsr.iv2325, align 4 %60 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %54, <4 x float> %59, <4 x float> %sumVec2.0) - %61 = load <4 x float>, <4 x float>* %lsr.iv3032, align 4 + %61 = load <4 x float>, ptr %lsr.iv3032, align 4 %62 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %54, <4 x float> %61, <4 x float> %sumVec3.0) - %scevgep3 = getelementptr float, float* %lsr.iv, i32 4 - %scevgep10 = getelementptr float, float* %lsr.iv9, i32 4 - %scevgep17 = getelementptr float, float* %lsr.iv16, i32 4 - %scevgep24 = getelementptr float, float* %lsr.iv23, i32 4 - %scevgep31 = getelementptr float, float* %lsr.iv30, i32 4 + %scevgep3 = getelementptr float, ptr %lsr.iv, i32 4 + %scevgep10 = getelementptr float, ptr %lsr.iv9, i32 4 + %scevgep17 = getelementptr float, ptr %lsr.iv16, i32 4 + %scevgep24 = getelementptr float, ptr %lsr.iv23, i32 4 + %scevgep31 = getelementptr float, ptr %lsr.iv30, i32 4 %63 = call i32 @llvm.loop.decrement.reg.i32(i32 %53, i32 1) %64 = icmp ne i32 %63, 0 br i1 %64, label %do.body24, label %do.end33 do.end33: ; preds = %do.body24 - %65 = bitcast float* %lsr.iv27 to i1* - %66 = bitcast float* %lsr.iv20 to i1* - %67 = bitcast float* %lsr.iv13 to i1* - %68 = bitcast float* %lsr.iv6 to i1* + %65 = bitcast ptr %lsr.iv27 to ptr + %66 = bitcast ptr %lsr.iv20 to ptr + %67 = bitcast ptr %lsr.iv13 to ptr + %68 = bitcast ptr %lsr.iv6 to ptr %69 = extractelement <4 x float> %56, i32 0 %70 = extractelement <4 x float> %56, i32 1 %add34 = fadd fast float %69, %70 @@ -174,8 +174,8 @@ %add35 = fadd fast float %add34, %71 %72 = extractelement <4 x float> %56, i32 3 %add36 = fadd fast float %add35, %72 - %arrayidx37 = getelementptr inbounds float, float* %pOut, i32 %k.0202 - store float %add36, float* %arrayidx37, align 4 + %arrayidx37 = getelementptr inbounds float, ptr %pOut, i32 %k.0202 + store float %add36, ptr %arrayidx37, align 4 %73 = extractelement <4 x float> %58, i32 0 %74 = extractelement <4 x float> %58, i32 1 %add38 = fadd fast float %73, %74 @@ -183,8 +183,8 @@ %add39 = fadd fast float %add38, %75 %76 = extractelement <4 x float> %58, i32 3 %add40 = fadd fast float %add39, %76 - %arrayidx42 = getelementptr inbounds float, float* %pOut, i32 %add9 - store float %add40, float* %arrayidx42, align 4 + %arrayidx42 = getelementptr inbounds float, ptr %pOut, i32 %add9 + store float %add40, ptr %arrayidx42, align 4 %77 = extractelement <4 x float> %60, i32 0 %78 = extractelement <4 x float> %60, i32 1 %add43 = fadd fast float %77, %78 @@ -192,8 +192,8 @@ %add44 = fadd fast float %add43, %79 %80 = extractelement <4 x float> %60, i32 3 %add45 = fadd fast float %add44, %80 - %arrayidx47 = getelementptr inbounds float, float* %pOut, i32 %add12 - store float %add45, float* %arrayidx47, align 4 + %arrayidx47 = getelementptr inbounds float, ptr %pOut, i32 %add12 + store float %add45, ptr %arrayidx47, align 4 %81 = extractelement <4 x float> %62, i32 0 %82 = extractelement <4 x float> %62, i32 1 %add48 = fadd fast float %81, %82 @@ -201,41 +201,41 @@ %add49 = fadd fast float %add48, %83 %84 = extractelement <4 x float> %62, i32 3 %add50 = fadd fast float %add49, %84 - %arrayidx52 = getelementptr inbounds float, float* %pOut, i32 %add15 - store float %add50, float* %arrayidx52, align 4 + %arrayidx52 = getelementptr inbounds float, ptr %pOut, i32 %add15 + store float %add50, ptr %arrayidx52, align 4 %add53 = add i32 %k.0202, 4 - %scevgep8 = getelementptr i1, i1* %68, i32 %21 - %85 = bitcast i1* %scevgep8 to float* - %scevgep15 = getelementptr i1, i1* %67, i32 %21 - %86 = bitcast i1* %scevgep15 to float* - %scevgep22 = getelementptr i1, i1* %66, i32 %21 - %87 = bitcast i1* %scevgep22 to float* - %scevgep29 = getelementptr i1, i1* %65, i32 %21 - %88 = bitcast i1* %scevgep29 to float* + %scevgep8 = getelementptr i1, ptr %68, i32 %21 + %85 = bitcast ptr %scevgep8 to ptr + %scevgep15 = getelementptr i1, ptr %67, i32 %21 + %86 = bitcast ptr %scevgep15 to ptr + %scevgep22 = getelementptr i1, ptr %66, i32 %21 + %87 = bitcast ptr %scevgep22 to ptr + %scevgep29 = getelementptr i1, ptr %65, i32 %21 + %88 = bitcast ptr %scevgep29 to ptr %cmp5 = icmp ult i32 %add53, %sub4 br i1 %cmp5, label %for.body, label %for.cond54.preheader for.body56: ; preds = %for.body56.preheader, %do.end66 %k.1200 = phi i32 [ %inc, %do.end66 ], [ %k.0.lcssa, %for.body56.preheader ] %mul57 = mul i32 %k.1200, %0 - %arrayidx58 = getelementptr inbounds float, float* %2, i32 %mul57 + %arrayidx58 = getelementptr inbounds float, ptr %2, i32 %mul57 %start3 = call i32 @llvm.start.loop.iterations.i32(i32 %38) br label %do.body59 do.body59: ; preds = %do.body59, %for.body56 %count.2 = phi i32 [ %0, %for.body56 ], [ %94, %do.body59 ] - %pInT.2 = phi float* [ %pIn, %for.body56 ], [ %add.ptr61, %do.body59 ] - %pCos0.1 = phi float* [ %arrayidx58, %for.body56 ], [ %add.ptr62, %do.body59 ] + %pInT.2 = phi ptr [ %pIn, %for.body56 ], [ %add.ptr61, %do.body59 ] + %pCos0.1 = phi ptr [ %arrayidx58, %for.body56 ], [ %add.ptr62, %do.body59 ] %sumVec.1 = phi <4 x float> [ zeroinitializer, %for.body56 ], [ %93, %do.body59 ] %89 = phi i32 [ %start3, %for.body56 ], [ %95, %do.body59 ] - %pInT.21 = bitcast float* %pInT.2 to <4 x float>* - %pCos0.12 = bitcast float* %pCos0.1 to <4 x float>* + %pInT.21 = bitcast ptr %pInT.2 to ptr + %pCos0.12 = bitcast ptr %pCos0.1 to ptr %90 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %count.2) - %91 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pInT.21, i32 4, <4 x i1> %90, <4 x float> zeroinitializer) - %92 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pCos0.12, i32 4, <4 x i1> %90, <4 x float> zeroinitializer) + %91 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %pInT.21, i32 4, <4 x i1> %90, <4 x float> zeroinitializer) + %92 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %pCos0.12, i32 4, <4 x i1> %90, <4 x float> zeroinitializer) %93 = tail call fast <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> %91, <4 x float> %92, <4 x float> %sumVec.1, <4 x i1> %90) - %add.ptr61 = getelementptr inbounds float, float* %pInT.2, i32 4 - %add.ptr62 = getelementptr inbounds float, float* %pCos0.1, i32 4 + %add.ptr61 = getelementptr inbounds float, ptr %pInT.2, i32 4 + %add.ptr62 = getelementptr inbounds float, ptr %pCos0.1, i32 4 %94 = add i32 %count.2, -4 %95 = call i32 @llvm.loop.decrement.reg.i32(i32 %89, i32 1) %96 = icmp ne i32 %95, 0 @@ -249,8 +249,8 @@ %add68 = fadd fast float %add67, %99 %100 = extractelement <4 x float> %93, i32 3 %add69 = fadd fast float %add68, %100 - %arrayidx70 = getelementptr inbounds float, float* %pOut, i32 %k.1200 - store float %add69, float* %arrayidx70, align 4 + %arrayidx70 = getelementptr inbounds float, ptr %pOut, i32 %k.1200 + store float %add69, ptr %arrayidx70, align 4 %inc = add nuw i32 %k.1200, 1 %exitcond.not = icmp eq i32 %inc, %1 br i1 %exitcond.not, label %for.end72, label %for.body56 @@ -260,7 +260,7 @@ } declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1 - declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) #2 + declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>) #2 declare <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) #1 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #3 declare <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x float>, <4 x i1>) #1 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extract-element.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extract-element.mir index 6955007..fe156fe 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extract-element.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extract-element.mir @@ -5,7 +5,7 @@ # predication. --- | - define dso_local i32 @no_vpsel_liveout(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %N) local_unnamed_addr #0 { + define dso_local i32 @no_vpsel_liveout(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr #0 { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 3 @@ -22,22 +22,22 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv18 = phi ptr [ %scevgep19, %vector.body ], [ %b, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp13, %vector.body ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] - %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>* - %lsr.iv1820 = bitcast i16* %lsr.iv18 to <4 x i16>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr + %lsr.iv1820 = bitcast ptr %lsr.iv18 to ptr %tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7) %tmp9 = sub i32 %tmp7, 4 - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32> - %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp11 = sext <4 x i16> %wide.masked.load14 to <4 x i32> %tmp12 = mul nsw <4 x i32> %tmp11, %tmp10 %tmp13 = add <4 x i32> %tmp12, %vec.phi - %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 - %scevgep19 = getelementptr i16, i16* %lsr.iv18, i32 4 + %scevgep = getelementptr i16, ptr %lsr.iv, i32 4 + %scevgep19 = getelementptr i16, ptr %lsr.iv18, i32 4 %tmp14 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp15 = icmp ne i32 %tmp14, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -51,7 +51,7 @@ %res.0.lcssa = phi i32 [ 0, %entry ], [ %tmp16, %middle.block ] ret i32 %res.0.lcssa } - declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) + declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) declare i32 @llvm.start.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) declare <4 x i1> @llvm.arm.mve.vctp32(i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-16.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-16.mir index 4d3f0ac..e529a69 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-16.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-16.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s --- | - define dso_local void @incorrect_sub_16(i16* noalias nocapture %A, i16* noalias nocapture readonly %B, i16* noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 { + define dso_local void @incorrect_sub_16(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 { entry: %cmp8 = icmp sgt i32 %N, 0 %0 = add i32 %N, 3 @@ -17,23 +17,23 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv17 = phi i16* [ %scevgep18, %vector.body ], [ %A, %vector.ph ] - %lsr.iv14 = phi i16* [ %scevgep15, %vector.body ], [ %C, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %B, %vector.ph ] + %lsr.iv17 = phi ptr [ %scevgep18, %vector.body ], [ %A, %vector.ph ] + %lsr.iv14 = phi ptr [ %scevgep15, %vector.body ], [ %C, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %B, %vector.ph ] %6 = phi i32 [ %start, %vector.ph ], [ %11, %vector.body ] %7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ] - %lsr.iv13 = bitcast i16* %lsr.iv to <8 x i16>* - %lsr.iv1416 = bitcast i16* %lsr.iv14 to <8 x i16>* - %lsr.iv1719 = bitcast i16* %lsr.iv17 to <8 x i16>* + %lsr.iv13 = bitcast ptr %lsr.iv to ptr + %lsr.iv1416 = bitcast ptr %lsr.iv14 to ptr + %lsr.iv1719 = bitcast ptr %lsr.iv17 to ptr %8 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %7) %9 = sub i32 %7, 7 - %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv13, i32 4, <8 x i1> %8, <8 x i16> undef) - %wide.masked.load12 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv1416, i32 4, <8 x i1> %8, <8 x i16> undef) + %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %lsr.iv13, i32 4, <8 x i1> %8, <8 x i16> undef) + %wide.masked.load12 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %lsr.iv1416, i32 4, <8 x i1> %8, <8 x i16> undef) %10 = add nsw <8 x i16> %wide.masked.load12, %wide.masked.load - call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %10, <8 x i16>* %lsr.iv1719, i32 4, <8 x i1> %8) - %scevgep = getelementptr i16, i16* %lsr.iv, i32 8 - %scevgep15 = getelementptr i16, i16* %lsr.iv14, i32 8 - %scevgep18 = getelementptr i16, i16* %lsr.iv17, i32 8 + call void @llvm.masked.store.v8i16.p0(<8 x i16> %10, ptr %lsr.iv1719, i32 4, <8 x i1> %8) + %scevgep = getelementptr i16, ptr %lsr.iv, i32 8 + %scevgep15 = getelementptr i16, ptr %lsr.iv14, i32 8 + %scevgep18 = getelementptr i16, ptr %lsr.iv17, i32 8 %11 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1) %12 = icmp ne i32 %11, 0 br i1 %12, label %vector.body, label %for.cond.cleanup @@ -44,8 +44,8 @@ declare i32 @llvm.start.loop.iterations.i32(i32) declare <8 x i1> @llvm.arm.mve.vctp16(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) - declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>) - declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>) + declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32 immarg, <8 x i1>, <8 x i16>) + declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32 immarg, <8 x i1>) ... --- name: incorrect_sub_16 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-32.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-32.mir index 7ea07bd..51844a7 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-32.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-32.mir @@ -8,7 +8,7 @@ # We should optimise away the SUB --- | - define dso_local void @incorrect_sub_32(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 { + define dso_local void @incorrect_sub_32(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 { entry: %cmp8 = icmp sgt i32 %N, 0 %0 = add i32 %N, 3 @@ -24,23 +24,23 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv17 = phi i32* [ %scevgep18, %vector.body ], [ %A, %vector.ph ] - %lsr.iv14 = phi i32* [ %scevgep15, %vector.body ], [ %C, %vector.ph ] - %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %vector.ph ] + %lsr.iv17 = phi ptr [ %scevgep18, %vector.body ], [ %A, %vector.ph ] + %lsr.iv14 = phi ptr [ %scevgep15, %vector.body ], [ %C, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %B, %vector.ph ] %6 = phi i32 [ %start, %vector.ph ], [ %11, %vector.body ] %7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ] - %lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>* - %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* - %lsr.iv1719 = bitcast i32* %lsr.iv17 to <4 x i32>* + %lsr.iv13 = bitcast ptr %lsr.iv to ptr + %lsr.iv1416 = bitcast ptr %lsr.iv14 to ptr + %lsr.iv1719 = bitcast ptr %lsr.iv17 to ptr %8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7) %9 = sub i32 %7, 5 - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv13, i32 4, <4 x i1> %8, <4 x i32> undef) - %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1416, i32 4, <4 x i1> %8, <4 x i32> undef) + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv13, i32 4, <4 x i1> %8, <4 x i32> undef) + %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv1416, i32 4, <4 x i1> %8, <4 x i32> undef) %10 = add nsw <4 x i32> %wide.masked.load12, %wide.masked.load - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %10, <4 x i32>* %lsr.iv1719, i32 4, <4 x i1> %8) - %scevgep = getelementptr i32, i32* %lsr.iv, i32 4 - %scevgep15 = getelementptr i32, i32* %lsr.iv14, i32 4 - %scevgep18 = getelementptr i32, i32* %lsr.iv17, i32 4 + call void @llvm.masked.store.v4i32.p0(<4 x i32> %10, ptr %lsr.iv1719, i32 4, <4 x i1> %8) + %scevgep = getelementptr i32, ptr %lsr.iv, i32 4 + %scevgep15 = getelementptr i32, ptr %lsr.iv14, i32 4 + %scevgep18 = getelementptr i32, ptr %lsr.iv17, i32 4 %11 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1) %12 = icmp ne i32 %11, 0 br i1 %12, label %vector.body, label %for.cond.cleanup @@ -51,8 +51,8 @@ declare i32 @llvm.start.loop.iterations.i32(i32) declare <4 x i1> @llvm.arm.mve.vctp32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) - declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) - declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) + declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) + declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) ... --- diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-8.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-8.mir index eb57831..305c31b 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-8.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/incorrect-sub-8.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s --- | - define dso_local void @incorrect_sub_8(i8* noalias nocapture %A, i8* noalias nocapture readonly %B, i8* noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 { + define dso_local void @incorrect_sub_8(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 { entry: %cmp8 = icmp sgt i32 %N, 0 %0 = add i32 %N, 3 @@ -17,23 +17,23 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv17 = phi i8* [ %scevgep18, %vector.body ], [ %A, %vector.ph ] - %lsr.iv14 = phi i8* [ %scevgep15, %vector.body ], [ %C, %vector.ph ] - %lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %B, %vector.ph ] + %lsr.iv17 = phi ptr [ %scevgep18, %vector.body ], [ %A, %vector.ph ] + %lsr.iv14 = phi ptr [ %scevgep15, %vector.body ], [ %C, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %B, %vector.ph ] %6 = phi i32 [ %start, %vector.ph ], [ %11, %vector.body ] %7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ] - %lsr.iv13 = bitcast i8* %lsr.iv to <16 x i8>* - %lsr.iv1416 = bitcast i8* %lsr.iv14 to <16 x i8>* - %lsr.iv1719 = bitcast i8* %lsr.iv17 to <16 x i8>* + %lsr.iv13 = bitcast ptr %lsr.iv to ptr + %lsr.iv1416 = bitcast ptr %lsr.iv14 to ptr + %lsr.iv1719 = bitcast ptr %lsr.iv17 to ptr %8 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %7) %9 = sub i32 %7, 15 - %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv13, i32 4, <16 x i1> %8, <16 x i8> undef) - %wide.masked.load12 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv1416, i32 4, <16 x i1> %8, <16 x i8> undef) + %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %lsr.iv13, i32 4, <16 x i1> %8, <16 x i8> undef) + %wide.masked.load12 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %lsr.iv1416, i32 4, <16 x i1> %8, <16 x i8> undef) %10 = add nsw <16 x i8> %wide.masked.load12, %wide.masked.load - call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %10, <16 x i8>* %lsr.iv1719, i32 4, <16 x i1> %8) - %scevgep = getelementptr i8, i8* %lsr.iv, i32 16 - %scevgep15 = getelementptr i8, i8* %lsr.iv14, i32 16 - %scevgep18 = getelementptr i8, i8* %lsr.iv17, i32 16 + call void @llvm.masked.store.v16i8.p0(<16 x i8> %10, ptr %lsr.iv1719, i32 4, <16 x i1> %8) + %scevgep = getelementptr i8, ptr %lsr.iv, i32 16 + %scevgep15 = getelementptr i8, ptr %lsr.iv14, i32 16 + %scevgep18 = getelementptr i8, ptr %lsr.iv17, i32 16 %11 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1) %12 = icmp ne i32 %11, 0 br i1 %12, label %vector.body, label %for.cond.cleanup @@ -44,9 +44,9 @@ declare i32 @llvm.start.loop.iterations.i32(i32) declare <16 x i1> @llvm.arm.mve.vctp8(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) - declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) - declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32 immarg, <16 x i1>) - declare void @llvm.stackprotector(i8*, i8**) + declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32 immarg, <16 x i1>, <16 x i8>) + declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32 immarg, <16 x i1>) + declare void @llvm.stackprotector(ptr, ptr) ... --- name: incorrect_sub_8 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-1.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-1.mir index d9b8ca2..6ef6ba1 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-1.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-1.mir @@ -4,7 +4,7 @@ # Test that VPNOTs cannot be within a tail predicated loop. --- | - define dso_local void @inloop_vpnot(i16* nocapture readonly %a, i16* nocapture readonly %b, i16* nocapture readonly %c, i16* nocapture readonly %d, i32* nocapture %e, i32 %N) local_unnamed_addr #0 { + define dso_local void @inloop_vpnot(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture readonly %c, ptr nocapture readonly %d, ptr nocapture %e, i32 %N) local_unnamed_addr #0 { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 3 @@ -21,39 +21,39 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv.e = phi i32* [ %scevgep.e, %vector.body ], [ %e, %vector.ph ] - %lsr.iv.d = phi i16* [ %scevgep.d, %vector.body ], [ %d, %vector.ph ] - %lsr.iv.c = phi i16* [ %scevgep.c, %vector.body ], [ %c, %vector.ph ] - %lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv.e = phi ptr [ %scevgep.e, %vector.body ], [ %e, %vector.ph ] + %lsr.iv.d = phi ptr [ %scevgep.d, %vector.body ], [ %d, %vector.ph ] + %lsr.iv.c = phi ptr [ %scevgep.c, %vector.body ], [ %c, %vector.ph ] + %lsr.iv18 = phi ptr [ %scevgep19, %vector.body ], [ %b, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp14, %vector.body ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] - %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>* - %lsr.iv1820 = bitcast i16* %lsr.iv18 to <4 x i16>* - %lsr.iv1820.c = bitcast i16* %lsr.iv.c to <4 x i16>* - %lsr.iv17.d = bitcast i16* %lsr.iv.d to <4 x i16>* - %lsr.cast.e = bitcast i32* %lsr.iv.e to <4 x i32>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr + %lsr.iv1820 = bitcast ptr %lsr.iv18 to ptr + %lsr.iv1820.c = bitcast ptr %lsr.iv.c to ptr + %lsr.iv17.d = bitcast ptr %lsr.iv.d to ptr + %lsr.cast.e = bitcast ptr %lsr.iv.e to ptr %tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7) %tmp9 = sub i32 %tmp7, 4 - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32> - %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp11 = sext <4 x i16> %wide.masked.load14 to <4 x i32> - %wide.masked.load.c = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820.c, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load.c = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820.c, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %sext.load.c = sext <4 x i16> %wide.masked.load.c to <4 x i32> - %wide.masked.load.d = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17.d, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load.d = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17.d, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %sext.load.d = sext <4 x i16> %wide.masked.load.d to <4 x i32> %tmp12 = mul nsw <4 x i32> %tmp11, %tmp10 %mul.2 = mul nsw <4 x i32> %sext.load.c, %sext.load.d %tmp13 = add <4 x i32> %tmp12, %mul.2 %tmp14 = add <4 x i32> %tmp13, %vec.phi %vpnot = xor <4 x i1> %tmp8, <i1 true, i1 true, i1 true, i1 true> - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %tmp14, <4 x i32>* %lsr.cast.e, i32 4, <4 x i1> %vpnot) - %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 - %scevgep19 = getelementptr i16, i16* %lsr.iv18, i32 4 - %scevgep.c = getelementptr i16, i16* %lsr.iv.c, i32 4 - %scevgep.d = getelementptr i16, i16* %lsr.iv.d, i32 4 - %scevgep.e = getelementptr i32, i32* %lsr.iv.e, i32 4 + call void @llvm.masked.store.v4i32.p0(<4 x i32> %tmp14, ptr %lsr.cast.e, i32 4, <4 x i1> %vpnot) + %scevgep = getelementptr i16, ptr %lsr.iv, i32 4 + %scevgep19 = getelementptr i16, ptr %lsr.iv18, i32 4 + %scevgep.c = getelementptr i16, ptr %lsr.iv.c, i32 4 + %scevgep.d = getelementptr i16, ptr %lsr.iv.d, i32 4 + %scevgep.e = getelementptr i32, ptr %lsr.iv.e, i32 4 %tmp15 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp16 = icmp ne i32 %tmp15, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -62,8 +62,8 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret void } - declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1 - declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #2 + declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) #1 + declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) #2 declare i32 @llvm.start.loop.iterations.i32(i32) #3 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3 declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-2.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-2.mir index 92d5998..6681e0a 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-2.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-2.mir @@ -4,7 +4,7 @@ # Test that a predicated VPNOT cannot be in a tail predicated loop. --- | - define dso_local void @inloop_vpnot(i16* nocapture readonly %a, i16* nocapture readonly %b, i16* nocapture readonly %c, i16* nocapture readonly %d, i32* nocapture %e, i32 %N) local_unnamed_addr #0 { + define dso_local void @inloop_vpnot(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture readonly %c, ptr nocapture readonly %d, ptr nocapture %e, i32 %N) local_unnamed_addr #0 { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 3 @@ -21,39 +21,39 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv.e = phi i32* [ %scevgep.e, %vector.body ], [ %e, %vector.ph ] - %lsr.iv.d = phi i16* [ %scevgep.d, %vector.body ], [ %d, %vector.ph ] - %lsr.iv.c = phi i16* [ %scevgep.c, %vector.body ], [ %c, %vector.ph ] - %lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv.e = phi ptr [ %scevgep.e, %vector.body ], [ %e, %vector.ph ] + %lsr.iv.d = phi ptr [ %scevgep.d, %vector.body ], [ %d, %vector.ph ] + %lsr.iv.c = phi ptr [ %scevgep.c, %vector.body ], [ %c, %vector.ph ] + %lsr.iv18 = phi ptr [ %scevgep19, %vector.body ], [ %b, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp14, %vector.body ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] - %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>* - %lsr.iv1820 = bitcast i16* %lsr.iv18 to <4 x i16>* - %lsr.iv1820.c = bitcast i16* %lsr.iv.c to <4 x i16>* - %lsr.iv17.d = bitcast i16* %lsr.iv.d to <4 x i16>* - %lsr.cast.e = bitcast i32* %lsr.iv.e to <4 x i32>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr + %lsr.iv1820 = bitcast ptr %lsr.iv18 to ptr + %lsr.iv1820.c = bitcast ptr %lsr.iv.c to ptr + %lsr.iv17.d = bitcast ptr %lsr.iv.d to ptr + %lsr.cast.e = bitcast ptr %lsr.iv.e to ptr %tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7) %tmp9 = sub i32 %tmp7, 4 - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32> - %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp11 = sext <4 x i16> %wide.masked.load14 to <4 x i32> - %wide.masked.load.c = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820.c, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load.c = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820.c, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %sext.load.c = sext <4 x i16> %wide.masked.load.c to <4 x i32> - %wide.masked.load.d = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17.d, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load.d = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17.d, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %sext.load.d = sext <4 x i16> %wide.masked.load.d to <4 x i32> %tmp12 = mul nsw <4 x i32> %tmp11, %tmp10 %mul.2 = mul nsw <4 x i32> %sext.load.c, %sext.load.d %tmp13 = add <4 x i32> %tmp12, %mul.2 %tmp14 = add <4 x i32> %tmp13, %vec.phi %vpnot = xor <4 x i1> %tmp8, <i1 true, i1 true, i1 true, i1 true> - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %tmp14, <4 x i32>* %lsr.cast.e, i32 4, <4 x i1> %vpnot) - %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 - %scevgep19 = getelementptr i16, i16* %lsr.iv18, i32 4 - %scevgep.c = getelementptr i16, i16* %lsr.iv.c, i32 4 - %scevgep.d = getelementptr i16, i16* %lsr.iv.d, i32 4 - %scevgep.e = getelementptr i32, i32* %lsr.iv.e, i32 4 + call void @llvm.masked.store.v4i32.p0(<4 x i32> %tmp14, ptr %lsr.cast.e, i32 4, <4 x i1> %vpnot) + %scevgep = getelementptr i16, ptr %lsr.iv, i32 4 + %scevgep19 = getelementptr i16, ptr %lsr.iv18, i32 4 + %scevgep.c = getelementptr i16, ptr %lsr.iv.c, i32 4 + %scevgep.d = getelementptr i16, ptr %lsr.iv.d, i32 4 + %scevgep.e = getelementptr i32, ptr %lsr.iv.e, i32 4 %tmp15 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp16 = icmp ne i32 %tmp15, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -62,8 +62,8 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret void } - declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1 - declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #2 + declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) #1 + declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) #2 declare i32 @llvm.start.loop.iterations.i32(i32) #3 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3 declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-3.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-3.mir index 2a8aa84..d0716d8 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-3.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpnot-3.mir @@ -4,7 +4,7 @@ # Test that a VPNOT is not added to a max sized VPT block. --- | - define dso_local void @inloop_vpnot(i16* nocapture readonly %a, i16* nocapture readonly %b, i16* nocapture readonly %c, i16* nocapture readonly %d, i32* nocapture %e, i32 %N) local_unnamed_addr #0 { + define dso_local void @inloop_vpnot(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture readonly %c, ptr nocapture readonly %d, ptr nocapture %e, i32 %N) local_unnamed_addr #0 { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 3 @@ -21,39 +21,39 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv.e = phi i32* [ %scevgep.e, %vector.body ], [ %e, %vector.ph ] - %lsr.iv.d = phi i16* [ %scevgep.d, %vector.body ], [ %d, %vector.ph ] - %lsr.iv.c = phi i16* [ %scevgep.c, %vector.body ], [ %c, %vector.ph ] - %lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv.e = phi ptr [ %scevgep.e, %vector.body ], [ %e, %vector.ph ] + %lsr.iv.d = phi ptr [ %scevgep.d, %vector.body ], [ %d, %vector.ph ] + %lsr.iv.c = phi ptr [ %scevgep.c, %vector.body ], [ %c, %vector.ph ] + %lsr.iv18 = phi ptr [ %scevgep19, %vector.body ], [ %b, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp14, %vector.body ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] - %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>* - %lsr.iv1820 = bitcast i16* %lsr.iv18 to <4 x i16>* - %lsr.iv1820.c = bitcast i16* %lsr.iv.c to <4 x i16>* - %lsr.iv17.d = bitcast i16* %lsr.iv.d to <4 x i16>* - %lsr.cast.e = bitcast i32* %lsr.iv.e to <4 x i32>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr + %lsr.iv1820 = bitcast ptr %lsr.iv18 to ptr + %lsr.iv1820.c = bitcast ptr %lsr.iv.c to ptr + %lsr.iv17.d = bitcast ptr %lsr.iv.d to ptr + %lsr.cast.e = bitcast ptr %lsr.iv.e to ptr %tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7) %tmp9 = sub i32 %tmp7, 4 - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32> - %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp11 = sext <4 x i16> %wide.masked.load14 to <4 x i32> - %wide.masked.load.c = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820.c, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load.c = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820.c, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %sext.load.c = sext <4 x i16> %wide.masked.load.c to <4 x i32> - %wide.masked.load.d = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17.d, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load.d = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17.d, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %sext.load.d = sext <4 x i16> %wide.masked.load.d to <4 x i32> %tmp12 = mul nsw <4 x i32> %tmp11, %tmp10 %mul.2 = mul nsw <4 x i32> %sext.load.c, %sext.load.d %tmp13 = add <4 x i32> %tmp12, %mul.2 %tmp14 = add <4 x i32> %tmp13, %vec.phi %vpnot = xor <4 x i1> %tmp8, <i1 true, i1 true, i1 true, i1 true> - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %tmp14, <4 x i32>* %lsr.cast.e, i32 4, <4 x i1> %vpnot) - %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 - %scevgep19 = getelementptr i16, i16* %lsr.iv18, i32 4 - %scevgep.c = getelementptr i16, i16* %lsr.iv.c, i32 4 - %scevgep.d = getelementptr i16, i16* %lsr.iv.d, i32 4 - %scevgep.e = getelementptr i32, i32* %lsr.iv.e, i32 4 + call void @llvm.masked.store.v4i32.p0(<4 x i32> %tmp14, ptr %lsr.cast.e, i32 4, <4 x i1> %vpnot) + %scevgep = getelementptr i16, ptr %lsr.iv, i32 4 + %scevgep19 = getelementptr i16, ptr %lsr.iv18, i32 4 + %scevgep.c = getelementptr i16, ptr %lsr.iv.c, i32 4 + %scevgep.d = getelementptr i16, ptr %lsr.iv.d, i32 4 + %scevgep.e = getelementptr i32, ptr %lsr.iv.e, i32 4 %tmp15 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp16 = icmp ne i32 %tmp15, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -62,8 +62,8 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret void } - declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1 - declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #2 + declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) #1 + declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) #2 declare i32 @llvm.start.loop.iterations.i32(i32) #3 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3 declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-1.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-1.mir index 46a011d..2608276 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-1.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-1.mir @@ -4,7 +4,7 @@ # General test for vpsel exclusion from tail predication --- | - define dso_local i32 @vpsel_after_vpt(i16* nocapture readonly %a, i16* nocapture readonly %b, i16* nocapture readonly %c, i16* nocapture readonly %d, i32 %N) local_unnamed_addr #0 { + define dso_local i32 @vpsel_after_vpt(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture readonly %c, ptr nocapture readonly %d, i32 %N) local_unnamed_addr #0 { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 3 @@ -21,35 +21,35 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv.d = phi i16* [ %scevgep.d, %vector.body ], [ %d, %vector.ph ] - %lsr.iv.c = phi i16* [ %scevgep.c, %vector.body ], [ %c, %vector.ph ] - %lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv.d = phi ptr [ %scevgep.d, %vector.body ], [ %d, %vector.ph ] + %lsr.iv.c = phi ptr [ %scevgep.c, %vector.body ], [ %c, %vector.ph ] + %lsr.iv18 = phi ptr [ %scevgep19, %vector.body ], [ %b, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp14, %vector.body ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] - %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>* - %lsr.iv1820 = bitcast i16* %lsr.iv18 to <4 x i16>* - %lsr.iv1820.c = bitcast i16* %lsr.iv.c to <4 x i16>* - %lsr.iv17.d = bitcast i16* %lsr.iv.d to <4 x i16>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr + %lsr.iv1820 = bitcast ptr %lsr.iv18 to ptr + %lsr.iv1820.c = bitcast ptr %lsr.iv.c to ptr + %lsr.iv17.d = bitcast ptr %lsr.iv.d to ptr %tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7) %tmp9 = sub i32 %tmp7, 4 - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32> - %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp11 = sext <4 x i16> %wide.masked.load14 to <4 x i32> - %wide.masked.load.c = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820.c, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load.c = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820.c, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %sext.load.c = sext <4 x i16> %wide.masked.load.c to <4 x i32> - %wide.masked.load.d = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17.d, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load.d = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17.d, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %sext.load.d = sext <4 x i16> %wide.masked.load.d to <4 x i32> %tmp12 = mul nsw <4 x i32> %tmp11, %tmp10 %mul.2 = mul nsw <4 x i32> %sext.load.c, %sext.load.d %tmp13 = add <4 x i32> %tmp12, %mul.2 %acc = add <4 x i32> %tmp13, %vec.phi %tmp14 = select <4 x i1> %tmp8, <4 x i32> %acc, <4 x i32> %vec.phi - %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 - %scevgep19 = getelementptr i16, i16* %lsr.iv18, i32 4 - %scevgep.c = getelementptr i16, i16* %lsr.iv.c, i32 4 - %scevgep.d = getelementptr i16, i16* %lsr.iv.d, i32 4 + %scevgep = getelementptr i16, ptr %lsr.iv, i32 4 + %scevgep19 = getelementptr i16, ptr %lsr.iv18, i32 4 + %scevgep.c = getelementptr i16, ptr %lsr.iv.c, i32 4 + %scevgep.d = getelementptr i16, ptr %lsr.iv.d, i32 4 %tmp15 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp16 = icmp ne i32 %tmp15, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -63,7 +63,7 @@ %res.0.lcssa = phi i32 [ 0, %entry ], [ %tmp17, %middle.block ] ret i32 %res.0.lcssa } - declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1 + declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) #1 declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2 declare i32 @llvm.start.loop.iterations.i32(i32) #3 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-2.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-2.mir index dd9fc35..a28abae 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-2.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-2.mir @@ -6,7 +6,7 @@ # the block. --- | - define dso_local i32 @vpsel_after_vpt(i16* nocapture readonly %a, i16* nocapture readonly %b, i16* nocapture readonly %c, i16* nocapture readonly %d, i32 %N) local_unnamed_addr #0 { + define dso_local i32 @vpsel_after_vpt(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture readonly %c, ptr nocapture readonly %d, i32 %N) local_unnamed_addr #0 { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 3 @@ -23,35 +23,35 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv.d = phi i16* [ %scevgep.d, %vector.body ], [ %d, %vector.ph ] - %lsr.iv.c = phi i16* [ %scevgep.c, %vector.body ], [ %c, %vector.ph ] - %lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv.d = phi ptr [ %scevgep.d, %vector.body ], [ %d, %vector.ph ] + %lsr.iv.c = phi ptr [ %scevgep.c, %vector.body ], [ %c, %vector.ph ] + %lsr.iv18 = phi ptr [ %scevgep19, %vector.body ], [ %b, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp14, %vector.body ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] - %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>* - %lsr.iv1820 = bitcast i16* %lsr.iv18 to <4 x i16>* - %lsr.iv1820.c = bitcast i16* %lsr.iv.c to <4 x i16>* - %lsr.iv17.d = bitcast i16* %lsr.iv.d to <4 x i16>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr + %lsr.iv1820 = bitcast ptr %lsr.iv18 to ptr + %lsr.iv1820.c = bitcast ptr %lsr.iv.c to ptr + %lsr.iv17.d = bitcast ptr %lsr.iv.d to ptr %tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7) %tmp9 = sub i32 %tmp7, 4 - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32> - %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp11 = sext <4 x i16> %wide.masked.load14 to <4 x i32> - %wide.masked.load.c = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820.c, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load.c = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820.c, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %sext.load.c = sext <4 x i16> %wide.masked.load.c to <4 x i32> - %wide.masked.load.d = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17.d, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load.d = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17.d, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %sext.load.d = sext <4 x i16> %wide.masked.load.d to <4 x i32> %tmp12 = mul nsw <4 x i32> %tmp11, %tmp10 %mul.2 = mul nsw <4 x i32> %sext.load.c, %sext.load.d %tmp13 = add <4 x i32> %tmp12, %mul.2 %acc = add <4 x i32> %tmp13, %vec.phi %tmp14 = select <4 x i1> %tmp8, <4 x i32> %acc, <4 x i32> %vec.phi - %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 - %scevgep19 = getelementptr i16, i16* %lsr.iv18, i32 4 - %scevgep.c = getelementptr i16, i16* %lsr.iv.c, i32 4 - %scevgep.d = getelementptr i16, i16* %lsr.iv.d, i32 4 + %scevgep = getelementptr i16, ptr %lsr.iv, i32 4 + %scevgep19 = getelementptr i16, ptr %lsr.iv18, i32 4 + %scevgep.c = getelementptr i16, ptr %lsr.iv.c, i32 4 + %scevgep.d = getelementptr i16, ptr %lsr.iv.d, i32 4 %tmp15 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp16 = icmp ne i32 %tmp15, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -65,7 +65,7 @@ %res.0.lcssa = phi i32 [ 0, %entry ], [ %tmp17, %middle.block ] ret i32 %res.0.lcssa } - declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1 + declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) #1 declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2 declare i32 @llvm.start.loop.iterations.i32(i32) #3 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/invariant-qreg.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/invariant-qreg.mir index 2890b72..f4d6ce6 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/invariant-qreg.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/invariant-qreg.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s --- | - define dso_local <4 x i32> @invariant_use_store(i16* nocapture readonly %a, i32* %c, i32 %N, <4 x i32> %pass) { + define dso_local <4 x i32> @invariant_use_store(ptr nocapture readonly %a, ptr %c, i32 %N, <4 x i32> %pass) { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 3 @@ -19,21 +19,21 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv20 = phi i32* [ %scevgep20, %vector.body ], [ %c, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv20 = phi ptr [ %scevgep20, %vector.body ], [ %c, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp13, %vector.body ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] - %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>* - %lsr.store = bitcast i32* %lsr.iv20 to <4 x i32>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr + %lsr.store = bitcast ptr %lsr.iv20 to ptr %tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7) %tmp9 = sub i32 %tmp7, 4 - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32> %tmp12 = mul nsw <4 x i32> %pass, %tmp10 %tmp13 = add <4 x i32> %tmp12, %vec.phi - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %tmp13, <4 x i32>* %lsr.store, i32 4, <4 x i1> %tmp8) - %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 - %scevgep20 = getelementptr i32, i32* %lsr.iv20, i32 4 + call void @llvm.masked.store.v4i32.p0(<4 x i32> %tmp13, ptr %lsr.store, i32 4, <4 x i1> %tmp8) + %scevgep = getelementptr i16, ptr %lsr.iv, i32 4 + %scevgep20 = getelementptr i32, ptr %lsr.iv20, i32 4 %tmp14 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp15 = icmp ne i32 %tmp14, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -43,7 +43,7 @@ ret <4 x i32> %pass } - define dso_local i32 @invariant_mul_use_reduce(i16* nocapture readonly %a, i32* %c, i32 %N, <4 x i32> %pass) { + define dso_local i32 @invariant_mul_use_reduce(ptr nocapture readonly %a, ptr %c, i32 %N, <4 x i32> %pass) { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 3 @@ -60,16 +60,16 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] - %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr %tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7) %tmp9 = sub i32 %tmp7, 4 - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32> %tmp12 = mul nsw <4 x i32> %pass, %tmp10 %tmp13 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp12) - %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 + %scevgep = getelementptr i16, ptr %lsr.iv, i32 4 %tmp15 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp16 = icmp ne i32 %tmp15, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -80,7 +80,7 @@ ret i32 %res } - define dso_local i32 @invariant_add_use_reduce(i16* nocapture readonly %a, i32* %c, i32 %N, <4 x i32> %pass) { + define dso_local i32 @invariant_add_use_reduce(ptr nocapture readonly %a, ptr %c, i32 %N, <4 x i32> %pass) { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 3 @@ -97,16 +97,16 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] - %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr %tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7) %tmp9 = sub i32 %tmp7, 4 - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32> %tmp12 = add nsw <4 x i32> %pass, %tmp10 %tmp13 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp12) - %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 + %scevgep = getelementptr i16, ptr %lsr.iv, i32 4 %tmp15 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp16 = icmp ne i32 %tmp15, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -118,8 +118,8 @@ } declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) - declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) - declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) + declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) + declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) declare i32 @llvm.start.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) declare <4 x i1> @llvm.arm.mve.vctp32(i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir index ae13493..b1749fd 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s --- | - define hidden arm_aapcs_vfpcc void @it_block_store_count_before_start(float* %pSrc, float* %pDst, i32 %blockSize, i32* %iter.addr) #0 { + define hidden arm_aapcs_vfpcc void @it_block_store_count_before_start(ptr %pSrc, ptr %pDst, i32 %blockSize, ptr %iter.addr) #0 { entry: %mul = shl i32 %blockSize, 1 %0 = add i32 %mul, 3 @@ -11,23 +11,23 @@ %2 = sub i32 %0, %smin %3 = lshr i32 %2, 2 %4 = add nuw nsw i32 %3, 1 - store i32 %4, i32* %iter.addr, align 4 + store i32 %4, ptr %iter.addr, align 4 %start = call i32 @llvm.start.loop.iterations.i32(i32 %4) br label %do.body do.body: ; preds = %do.body, %entry %lsr.iv = phi i32 [ %lsr.iv.next, %do.body ], [ %start, %entry ] %blkCnt.0 = phi i32 [ %mul, %entry ], [ %sub, %do.body ] - %pDst.addr.0 = phi float* [ %pDst, %entry ], [ %add.ptr4, %do.body ] - %pSrc.addr.0 = phi float* [ %pSrc, %entry ], [ %add.ptr, %do.body ] - %pDst.addr.01 = bitcast float* %pDst.addr.0 to <4 x float>* - %pSrc.addr.02 = bitcast float* %pSrc.addr.0 to <4 x float>* + %pDst.addr.0 = phi ptr [ %pDst, %entry ], [ %add.ptr4, %do.body ] + %pSrc.addr.0 = phi ptr [ %pSrc, %entry ], [ %add.ptr, %do.body ] + %pDst.addr.01 = bitcast ptr %pDst.addr.0 to ptr + %pSrc.addr.02 = bitcast ptr %pSrc.addr.0 to ptr %5 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0) - %6 = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pSrc.addr.02, i32 4, <4 x i1> %5, <4 x float> undef) + %6 = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %pSrc.addr.02, i32 4, <4 x i1> %5, <4 x float> undef) %7 = fmul <4 x float> %6, %6 - tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %7, <4 x float>* %pDst.addr.01, i32 4, <4 x i1> %5) - %add.ptr = getelementptr inbounds float, float* %pSrc.addr.0, i32 4 - %add.ptr4 = getelementptr inbounds float, float* %pDst.addr.0, i32 4 + tail call void @llvm.masked.store.v4f32.p0(<4 x float> %7, ptr %pDst.addr.01, i32 4, <4 x i1> %5) + %add.ptr = getelementptr inbounds float, ptr %pSrc.addr.0, i32 4 + %add.ptr4 = getelementptr inbounds float, ptr %pDst.addr.0, i32 4 %sub = add nsw i32 %blkCnt.0, -4 %8 = call i32 @llvm.loop.decrement.reg.i32(i32 %lsr.iv, i32 1) %9 = icmp ne i32 %8, 0 @@ -38,7 +38,7 @@ ret void } - define hidden arm_aapcs_vfpcc void @it_block_store_count_after_start(float* %pSrc, float* %pDst, i32 %blockSize, i32* %iter.addr) #0 { + define hidden arm_aapcs_vfpcc void @it_block_store_count_after_start(ptr %pSrc, ptr %pDst, i32 %blockSize, ptr %iter.addr) #0 { entry: %mul = shl i32 %blockSize, 1 %0 = add i32 %mul, 3 @@ -48,22 +48,22 @@ %3 = lshr i32 %2, 2 %4 = add nuw nsw i32 %3, 1 %start = call i32 @llvm.start.loop.iterations.i32(i32 %4) - store i32 %4, i32* %iter.addr, align 4 + store i32 %4, ptr %iter.addr, align 4 br label %do.body do.body: ; preds = %do.body, %entry %lsr.iv = phi i32 [ %lsr.iv.next, %do.body ], [ %start, %entry ] %blkCnt.0 = phi i32 [ %mul, %entry ], [ %sub, %do.body ] - %pDst.addr.0 = phi float* [ %pDst, %entry ], [ %add.ptr4, %do.body ] - %pSrc.addr.0 = phi float* [ %pSrc, %entry ], [ %add.ptr, %do.body ] - %pDst.addr.01 = bitcast float* %pDst.addr.0 to <4 x float>* - %pSrc.addr.02 = bitcast float* %pSrc.addr.0 to <4 x float>* + %pDst.addr.0 = phi ptr [ %pDst, %entry ], [ %add.ptr4, %do.body ] + %pSrc.addr.0 = phi ptr [ %pSrc, %entry ], [ %add.ptr, %do.body ] + %pDst.addr.01 = bitcast ptr %pDst.addr.0 to ptr + %pSrc.addr.02 = bitcast ptr %pSrc.addr.0 to ptr %5 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0) - %6 = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pSrc.addr.02, i32 4, <4 x i1> %5, <4 x float> undef) + %6 = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %pSrc.addr.02, i32 4, <4 x i1> %5, <4 x float> undef) %7 = fmul <4 x float> %6, %6 - tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %7, <4 x float>* %pDst.addr.01, i32 4, <4 x i1> %5) - %add.ptr = getelementptr inbounds float, float* %pSrc.addr.0, i32 4 - %add.ptr4 = getelementptr inbounds float, float* %pDst.addr.0, i32 4 + tail call void @llvm.masked.store.v4f32.p0(<4 x float> %7, ptr %pDst.addr.01, i32 4, <4 x i1> %5) + %add.ptr = getelementptr inbounds float, ptr %pSrc.addr.0, i32 4 + %add.ptr4 = getelementptr inbounds float, ptr %pDst.addr.0, i32 4 %sub = add nsw i32 %blkCnt.0, -4 %8 = call i32 @llvm.loop.decrement.reg.i32(i32 %lsr.iv, i32 1) %9 = icmp ne i32 %8, 0 @@ -78,10 +78,10 @@ declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1 ; Function Attrs: argmemonly nounwind readonly willreturn - declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) #2 + declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>) #2 ; Function Attrs: argmemonly nounwind willreturn writeonly - declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32 immarg, <4 x i1>) #3 + declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32 immarg, <4 x i1>) #3 ; Function Attrs: noduplicate nounwind declare i32 @llvm.start.loop.iterations.i32(i32) #4 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp-reordered.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp-reordered.mir index dbe0ac4..f715476 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp-reordered.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp-reordered.mir @@ -4,7 +4,7 @@ # TODO: We should be able to handle the VCMP -> VPST -> VCMP -> VCTP case. --- | - define dso_local arm_aapcs_vfpcc void @test(i32* noalias nocapture %a, i32* nocapture readonly %b, i32 %N) local_unnamed_addr #0 { + define dso_local arm_aapcs_vfpcc void @test(ptr noalias nocapture %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr #0 { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 3 @@ -25,12 +25,12 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv3 = phi i32* [ %scevgep4, %vector.body ], [ %b, %vector.ph ] - %lsr.iv1 = phi i32* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv3 = phi ptr [ %scevgep4, %vector.body ], [ %b, %vector.ph ] + %lsr.iv1 = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %vec.ind = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %vector.ph ], [ %vec.ind.next, %vector.body ] %elts.rem = phi i32 [ %N, %vector.ph ], [ %elts.rem.next, %vector.body ] - %lsr.iv12 = bitcast i32* %lsr.iv1 to <4 x i32>* - %lsr.iv35 = bitcast i32* %lsr.iv3 to <4 x i32>* + %lsr.iv12 = bitcast ptr %lsr.iv1 to ptr + %lsr.iv35 = bitcast ptr %lsr.iv3 to ptr %tmp7 = insertelement <4 x i32> undef, i32 %div, i32 0 %tmp8 = shufflevector <4 x i32> %tmp7, <4 x i32> undef, <4 x i32> zeroinitializer %tmp9 = icmp ult <4 x i32> %vec.ind, %tmp8 @@ -38,12 +38,12 @@ %tmp10 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %elts.rem) %tmp11 = and <4 x i1> %tmp9, %tmp10 %pred = and <4 x i1> %tmp11, %lower - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv35, i32 4, <4 x i1> %pred, <4 x i32> undef) - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %wide.masked.load, <4 x i32>* %lsr.iv12, i32 4, <4 x i1> %pred) + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv35, i32 4, <4 x i1> %pred, <4 x i32> undef) + call void @llvm.masked.store.v4i32.p0(<4 x i32> %wide.masked.load, ptr %lsr.iv12, i32 4, <4 x i1> %pred) %vec.ind.next = add <4 x i32> %vec.ind, <i32 4, i32 4, i32 4, i32 4> %elts.rem.next = sub i32 %elts.rem, 4 - %scevgep = getelementptr i32, i32* %lsr.iv1, i32 4 - %scevgep4 = getelementptr i32, i32* %lsr.iv3, i32 4 + %scevgep = getelementptr i32, ptr %lsr.iv1, i32 4 + %scevgep4 = getelementptr i32, ptr %lsr.iv3, i32 4 %tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv, i32 1) %tmp13 = icmp ne i32 %tmp12, 0 %lsr.iv.next = add nsw i32 %lsr.iv, -1 @@ -53,8 +53,8 @@ ret void } - declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) - declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) + declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) + declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) declare <4 x i1> @llvm.arm.mve.vctp32(i32) declare i32 @llvm.start.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp.mir index be8fd89..551cf31 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-two-vcmp.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s --- | - define dso_local arm_aapcs_vfpcc void @test(i32* noalias nocapture %a, i32* nocapture readonly %b, i32 %N) local_unnamed_addr #0 { + define dso_local arm_aapcs_vfpcc void @test(ptr noalias nocapture %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr #0 { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 3 @@ -23,12 +23,12 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv3 = phi i32* [ %scevgep4, %vector.body ], [ %b, %vector.ph ] - %lsr.iv1 = phi i32* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv3 = phi ptr [ %scevgep4, %vector.body ], [ %b, %vector.ph ] + %lsr.iv1 = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %vec.ind = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %vector.ph ], [ %vec.ind.next, %vector.body ] %elts.rem = phi i32 [ %N, %vector.ph ], [ %elts.rem.next, %vector.body ] - %lsr.iv12 = bitcast i32* %lsr.iv1 to <4 x i32>* - %lsr.iv35 = bitcast i32* %lsr.iv3 to <4 x i32>* + %lsr.iv12 = bitcast ptr %lsr.iv1 to ptr + %lsr.iv35 = bitcast ptr %lsr.iv3 to ptr %tmp7 = insertelement <4 x i32> undef, i32 %div, i32 0 %tmp8 = shufflevector <4 x i32> %tmp7, <4 x i32> undef, <4 x i32> zeroinitializer %tmp9 = icmp ult <4 x i32> %vec.ind, %tmp8 @@ -36,12 +36,12 @@ %tmp10 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %elts.rem) %tmp11 = and <4 x i1> %tmp9, %tmp10 %pred = and <4 x i1> %tmp11, %lower - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv35, i32 4, <4 x i1> %pred, <4 x i32> undef) - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %wide.masked.load, <4 x i32>* %lsr.iv12, i32 4, <4 x i1> %pred) + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv35, i32 4, <4 x i1> %pred, <4 x i32> undef) + call void @llvm.masked.store.v4i32.p0(<4 x i32> %wide.masked.load, ptr %lsr.iv12, i32 4, <4 x i1> %pred) %vec.ind.next = add <4 x i32> %vec.ind, <i32 4, i32 4, i32 4, i32 4> %elts.rem.next = sub i32 %elts.rem, 4 - %scevgep = getelementptr i32, i32* %lsr.iv1, i32 4 - %scevgep4 = getelementptr i32, i32* %lsr.iv3, i32 4 + %scevgep = getelementptr i32, ptr %lsr.iv1, i32 4 + %scevgep4 = getelementptr i32, ptr %lsr.iv3, i32 4 %tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv, i32 1) %tmp13 = icmp ne i32 %tmp12, 0 %lsr.iv.next = add nsw i32 %lsr.iv, -1 @@ -51,8 +51,8 @@ ret void } - declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) - declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) + declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) + declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) declare <4 x i1> @llvm.arm.mve.vctp32(i32) declare i32 @llvm.start.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-vcmp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-vcmp.mir index ec39618..5ede970 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-vcmp.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/iv-vcmp.mir @@ -3,7 +3,7 @@ --- | ; Function Attrs: nofree norecurse nounwind - define dso_local arm_aapcs_vfpcc void @test(i32* noalias nocapture %a, i32* nocapture readonly %b, i32 %N) local_unnamed_addr #0 { + define dso_local arm_aapcs_vfpcc void @test(ptr noalias nocapture %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr #0 { entry: %cmp9 = icmp eq i32 %N, 0 %0 = add i32 %N, 3 @@ -23,24 +23,24 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv3 = phi i32* [ %scevgep4, %vector.body ], [ %b, %vector.ph ] - %lsr.iv1 = phi i32* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv3 = phi ptr [ %scevgep4, %vector.body ], [ %b, %vector.ph ] + %lsr.iv1 = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %vec.ind = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %vector.ph ], [ %vec.ind.next, %vector.body ] %elts.rem = phi i32 [ %N, %vector.ph ], [ %elts.rem.next, %vector.body ] %6 = phi i32 [ %start, %vector.ph ], [ %12, %vector.body ] - %lsr.iv35 = bitcast i32* %lsr.iv3 to <4 x i32>* - %lsr.iv12 = bitcast i32* %lsr.iv1 to <4 x i32>* + %lsr.iv35 = bitcast ptr %lsr.iv3 to ptr + %lsr.iv12 = bitcast ptr %lsr.iv1 to ptr %7 = insertelement <4 x i32> undef, i32 %div, i32 0 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer %9 = icmp ult <4 x i32> %vec.ind, %8 %10 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %elts.rem) %11 = and <4 x i1> %9, %10 - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv35, i32 4, <4 x i1> %11, <4 x i32> undef) - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %wide.masked.load, <4 x i32>* %lsr.iv12, i32 4, <4 x i1> %11) + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv35, i32 4, <4 x i1> %11, <4 x i32> undef) + call void @llvm.masked.store.v4i32.p0(<4 x i32> %wide.masked.load, ptr %lsr.iv12, i32 4, <4 x i1> %11) %vec.ind.next = add <4 x i32> %vec.ind, <i32 4, i32 4, i32 4, i32 4> %elts.rem.next = sub i32 %elts.rem, 4 - %scevgep = getelementptr i32, i32* %lsr.iv1, i32 4 - %scevgep4 = getelementptr i32, i32* %lsr.iv3, i32 4 + %scevgep = getelementptr i32, ptr %lsr.iv1, i32 4 + %scevgep4 = getelementptr i32, ptr %lsr.iv3, i32 4 %12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1) %13 = icmp ne i32 %12, 0 br i1 %13, label %vector.body, label %for.cond.cleanup @@ -49,8 +49,8 @@ ret void } - declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) - declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) + declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) + declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) declare <4 x i1> @llvm.arm.mve.vctp32(i32) declare i32 @llvm.start.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/livereg-no-loop-def.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/livereg-no-loop-def.mir index 6322ddf..1e0c546 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/livereg-no-loop-def.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/livereg-no-loop-def.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s --- | - define dso_local <4 x i32> @exit_liveout(i16* nocapture readonly %a, i16* nocapture readonly %b, i32* %c, i32 %N, <4 x i32> %pass) { + define dso_local <4 x i32> @exit_liveout(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr %c, i32 %N, <4 x i32> %pass) { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 3 @@ -19,26 +19,26 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ] - %lsr.iv20 = phi i32* [ %scevgep20, %vector.body ], [ %c, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv18 = phi ptr [ %scevgep19, %vector.body ], [ %b, %vector.ph ] + %lsr.iv20 = phi ptr [ %scevgep20, %vector.body ], [ %c, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp13, %vector.body ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] - %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>* - %lsr.store = bitcast i32* %lsr.iv20 to <4 x i32>* - %lsr.iv1820 = bitcast i16* %lsr.iv18 to <4 x i16>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr + %lsr.store = bitcast ptr %lsr.iv20 to ptr + %lsr.iv1820 = bitcast ptr %lsr.iv18 to ptr %tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7) %tmp9 = sub i32 %tmp7, 4 - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32> - %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp11 = sext <4 x i16> %wide.masked.load14 to <4 x i32> %tmp12 = mul nsw <4 x i32> %tmp11, %tmp10 %tmp13 = add <4 x i32> %tmp12, %vec.phi - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %tmp13, <4 x i32>* %lsr.store, i32 4, <4 x i1> %tmp8) - %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 - %scevgep19 = getelementptr i16, i16* %lsr.iv18, i32 4 - %scevgep20 = getelementptr i32, i32* %lsr.iv20, i32 4 + call void @llvm.masked.store.v4i32.p0(<4 x i32> %tmp13, ptr %lsr.store, i32 4, <4 x i1> %tmp8) + %scevgep = getelementptr i16, ptr %lsr.iv, i32 4 + %scevgep19 = getelementptr i16, ptr %lsr.iv18, i32 4 + %scevgep20 = getelementptr i32, ptr %lsr.iv20, i32 4 %tmp14 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp15 = icmp ne i32 %tmp14, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -48,8 +48,8 @@ ret <4 x i32> %pass } - declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) - declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) + declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) + declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) declare i32 @llvm.start.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) declare <4 x i1> @llvm.arm.mve.vctp32(i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-chain.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-chain.mir index 34c8a25..4278cfc 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-chain.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-chain.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s --- | - define dso_local arm_aapcscc void @test1(i32* nocapture %arg, i32* nocapture readonly %arg1, i32* nocapture readonly %arg2, i32 %arg3) { + define dso_local arm_aapcscc void @test1(ptr nocapture %arg, ptr nocapture readonly %arg1, ptr nocapture readonly %arg2, i32 %arg3) { bb: %tmp = icmp eq i32 %arg3, 0 br i1 %tmp, label %bb27, label %bb4 @@ -28,15 +28,15 @@ br i1 %tmp15, label %bb27, label %bb16 bb16: ; preds = %bb13 - %tmp17 = getelementptr inbounds i32, i32* %arg1, i32 %tmp14 - %tmp18 = load i32, i32* %tmp17, align 4 - %tmp19 = getelementptr inbounds i32, i32* %arg2, i32 %tmp14 - %tmp20 = load i32, i32* %tmp19, align 4 + %tmp17 = getelementptr inbounds i32, ptr %arg1, i32 %tmp14 + %tmp18 = load i32, ptr %tmp17, align 4 + %tmp19 = getelementptr inbounds i32, ptr %arg2, i32 %tmp14 + %tmp20 = load i32, ptr %tmp19, align 4 %tmp21 = xor i32 %tmp20, %tmp18 - %tmp22 = getelementptr inbounds i32, i32* %arg, i32 %tmp14 - %tmp23 = load i32, i32* %tmp22, align 4 + %tmp22 = getelementptr inbounds i32, ptr %arg, i32 %tmp14 + %tmp23 = load i32, ptr %tmp22, align 4 %tmp24 = add nsw i32 %tmp23, %tmp21 - store i32 %tmp24, i32* %tmp22, align 4 + store i32 %tmp24, ptr %tmp22, align 4 %tmp25 = add nuw i32 %tmp14, 1 %tmp26 = icmp eq i32 %tmp6, 1 br i1 %tmp26, label %bb27, label %bb57 @@ -48,69 +48,69 @@ %lsr.iv15 = phi i32 [ %lsr.iv.next16, %bb28 ], [ %start, %bb12 ] %lsr.iv = phi i32 [ %lsr.iv.next, %bb28 ], [ 0, %bb12 ] %tmp29 = phi i32 [ 0, %bb12 ], [ %tmp54, %bb28 ] - %0 = bitcast i32* %arg1 to i8* - %1 = bitcast i32* %arg2 to i8* - %2 = bitcast i32* %arg to i8* - %uglygep14 = getelementptr i8, i8* %0, i32 %lsr.iv - %uglygep1415 = bitcast i8* %uglygep14 to i32* - %scevgep617 = bitcast i32* %uglygep1415 to i32* - %tmp34 = load i32, i32* %scevgep617, align 4 - %uglygep8 = getelementptr i8, i8* %1, i32 %lsr.iv - %uglygep89 = bitcast i8* %uglygep8 to i32* - %scevgep418 = bitcast i32* %uglygep89 to i32* - %tmp35 = load i32, i32* %scevgep418, align 4 + %0 = bitcast ptr %arg1 to ptr + %1 = bitcast ptr %arg2 to ptr + %2 = bitcast ptr %arg to ptr + %uglygep14 = getelementptr i8, ptr %0, i32 %lsr.iv + %uglygep1415 = bitcast ptr %uglygep14 to ptr + %scevgep617 = bitcast ptr %uglygep1415 to ptr + %tmp34 = load i32, ptr %scevgep617, align 4 + %uglygep8 = getelementptr i8, ptr %1, i32 %lsr.iv + %uglygep89 = bitcast ptr %uglygep8 to ptr + %scevgep418 = bitcast ptr %uglygep89 to ptr + %tmp35 = load i32, ptr %scevgep418, align 4 %tmp36 = xor i32 %tmp35, %tmp34 - %uglygep2 = getelementptr i8, i8* %2, i32 %lsr.iv - %uglygep23 = bitcast i8* %uglygep2 to i32* - %scevgep219 = bitcast i32* %uglygep23 to i32* - %tmp37 = load i32, i32* %scevgep219, align 4 + %uglygep2 = getelementptr i8, ptr %2, i32 %lsr.iv + %uglygep23 = bitcast ptr %uglygep2 to ptr + %scevgep219 = bitcast ptr %uglygep23 to ptr + %tmp37 = load i32, ptr %scevgep219, align 4 %tmp38 = add nsw i32 %tmp37, %tmp36 - store i32 %tmp38, i32* %scevgep219, align 4 - %uglygep33 = getelementptr i8, i8* %0, i32 %lsr.iv - %uglygep3334 = bitcast i8* %uglygep33 to i32* - %scevgep14 = getelementptr i32, i32* %uglygep3334, i32 1 - %tmp39 = load i32, i32* %scevgep14, align 4 - %uglygep27 = getelementptr i8, i8* %1, i32 %lsr.iv - %uglygep2728 = bitcast i8* %uglygep27 to i32* - %scevgep11 = getelementptr i32, i32* %uglygep2728, i32 1 - %tmp40 = load i32, i32* %scevgep11, align 4 + store i32 %tmp38, ptr %scevgep219, align 4 + %uglygep33 = getelementptr i8, ptr %0, i32 %lsr.iv + %uglygep3334 = bitcast ptr %uglygep33 to ptr + %scevgep14 = getelementptr i32, ptr %uglygep3334, i32 1 + %tmp39 = load i32, ptr %scevgep14, align 4 + %uglygep27 = getelementptr i8, ptr %1, i32 %lsr.iv + %uglygep2728 = bitcast ptr %uglygep27 to ptr + %scevgep11 = getelementptr i32, ptr %uglygep2728, i32 1 + %tmp40 = load i32, ptr %scevgep11, align 4 %tmp41 = xor i32 %tmp40, %tmp39 - %uglygep20 = getelementptr i8, i8* %2, i32 %lsr.iv - %uglygep2021 = bitcast i8* %uglygep20 to i32* - %scevgep9 = getelementptr i32, i32* %uglygep2021, i32 1 - %tmp42 = load i32, i32* %scevgep9, align 4 + %uglygep20 = getelementptr i8, ptr %2, i32 %lsr.iv + %uglygep2021 = bitcast ptr %uglygep20 to ptr + %scevgep9 = getelementptr i32, ptr %uglygep2021, i32 1 + %tmp42 = load i32, ptr %scevgep9, align 4 %tmp43 = add nsw i32 %tmp42, %tmp41 - store i32 %tmp43, i32* %scevgep9, align 4 - %uglygep30 = getelementptr i8, i8* %0, i32 %lsr.iv - %uglygep3031 = bitcast i8* %uglygep30 to i32* - %scevgep12 = getelementptr i32, i32* %uglygep3031, i32 2 - %tmp44 = load i32, i32* %scevgep12, align 4 - %uglygep24 = getelementptr i8, i8* %1, i32 %lsr.iv - %uglygep2425 = bitcast i8* %uglygep24 to i32* - %scevgep10 = getelementptr i32, i32* %uglygep2425, i32 2 - %tmp45 = load i32, i32* %scevgep10, align 4 + store i32 %tmp43, ptr %scevgep9, align 4 + %uglygep30 = getelementptr i8, ptr %0, i32 %lsr.iv + %uglygep3031 = bitcast ptr %uglygep30 to ptr + %scevgep12 = getelementptr i32, ptr %uglygep3031, i32 2 + %tmp44 = load i32, ptr %scevgep12, align 4 + %uglygep24 = getelementptr i8, ptr %1, i32 %lsr.iv + %uglygep2425 = bitcast ptr %uglygep24 to ptr + %scevgep10 = getelementptr i32, ptr %uglygep2425, i32 2 + %tmp45 = load i32, ptr %scevgep10, align 4 %tmp46 = xor i32 %tmp45, %tmp44 - %uglygep17 = getelementptr i8, i8* %2, i32 %lsr.iv - %uglygep1718 = bitcast i8* %uglygep17 to i32* - %scevgep8 = getelementptr i32, i32* %uglygep1718, i32 2 - %tmp47 = load i32, i32* %scevgep8, align 4 + %uglygep17 = getelementptr i8, ptr %2, i32 %lsr.iv + %uglygep1718 = bitcast ptr %uglygep17 to ptr + %scevgep8 = getelementptr i32, ptr %uglygep1718, i32 2 + %tmp47 = load i32, ptr %scevgep8, align 4 %tmp48 = add nsw i32 %tmp47, %tmp46 - store i32 %tmp48, i32* %scevgep8, align 4 - %uglygep11 = getelementptr i8, i8* %0, i32 %lsr.iv - %uglygep1112 = bitcast i8* %uglygep11 to i32* - %scevgep5 = getelementptr i32, i32* %uglygep1112, i32 3 - %tmp49 = load i32, i32* %scevgep5, align 4 - %uglygep5 = getelementptr i8, i8* %1, i32 %lsr.iv - %uglygep56 = bitcast i8* %uglygep5 to i32* - %scevgep3 = getelementptr i32, i32* %uglygep56, i32 3 - %tmp50 = load i32, i32* %scevgep3, align 4 + store i32 %tmp48, ptr %scevgep8, align 4 + %uglygep11 = getelementptr i8, ptr %0, i32 %lsr.iv + %uglygep1112 = bitcast ptr %uglygep11 to ptr + %scevgep5 = getelementptr i32, ptr %uglygep1112, i32 3 + %tmp49 = load i32, ptr %scevgep5, align 4 + %uglygep5 = getelementptr i8, ptr %1, i32 %lsr.iv + %uglygep56 = bitcast ptr %uglygep5 to ptr + %scevgep3 = getelementptr i32, ptr %uglygep56, i32 3 + %tmp50 = load i32, ptr %scevgep3, align 4 %tmp51 = xor i32 %tmp50, %tmp49 - %uglygep = getelementptr i8, i8* %2, i32 %lsr.iv - %uglygep1 = bitcast i8* %uglygep to i32* - %scevgep1 = getelementptr i32, i32* %uglygep1, i32 3 - %tmp52 = load i32, i32* %scevgep1, align 4 + %uglygep = getelementptr i8, ptr %2, i32 %lsr.iv + %uglygep1 = bitcast ptr %uglygep to ptr + %scevgep1 = getelementptr i32, ptr %uglygep1, i32 3 + %tmp52 = load i32, ptr %scevgep1, align 4 %tmp53 = add nsw i32 %tmp52, %tmp51 - store i32 %tmp53, i32* %scevgep1, align 4 + store i32 %tmp53, ptr %scevgep1, align 4 %tmp54 = add nuw i32 %tmp29, 4 %lsr.iv.next = add i32 %lsr.iv, 16 %loop.dec = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv15, i32 1) @@ -119,29 +119,29 @@ br i1 %tmp56, label %bb28, label %bb13 bb57: ; preds = %bb16 - %tmp58 = getelementptr inbounds i32, i32* %arg1, i32 %tmp25 - %tmp59 = load i32, i32* %tmp58, align 4 - %tmp60 = getelementptr inbounds i32, i32* %arg2, i32 %tmp25 - %tmp61 = load i32, i32* %tmp60, align 4 + %tmp58 = getelementptr inbounds i32, ptr %arg1, i32 %tmp25 + %tmp59 = load i32, ptr %tmp58, align 4 + %tmp60 = getelementptr inbounds i32, ptr %arg2, i32 %tmp25 + %tmp61 = load i32, ptr %tmp60, align 4 %tmp62 = xor i32 %tmp61, %tmp59 - %tmp63 = getelementptr inbounds i32, i32* %arg, i32 %tmp25 - %tmp64 = load i32, i32* %tmp63, align 4 + %tmp63 = getelementptr inbounds i32, ptr %arg, i32 %tmp25 + %tmp64 = load i32, ptr %tmp63, align 4 %tmp65 = add nsw i32 %tmp64, %tmp62 - store i32 %tmp65, i32* %tmp63, align 4 + store i32 %tmp65, ptr %tmp63, align 4 %tmp66 = add nuw i32 %tmp14, 2 %tmp67 = icmp eq i32 %tmp6, 2 br i1 %tmp67, label %bb27, label %bb68 bb68: ; preds = %bb57 - %tmp69 = getelementptr inbounds i32, i32* %arg1, i32 %tmp66 - %tmp70 = load i32, i32* %tmp69, align 4 - %tmp71 = getelementptr inbounds i32, i32* %arg2, i32 %tmp66 - %tmp72 = load i32, i32* %tmp71, align 4 + %tmp69 = getelementptr inbounds i32, ptr %arg1, i32 %tmp66 + %tmp70 = load i32, ptr %tmp69, align 4 + %tmp71 = getelementptr inbounds i32, ptr %arg2, i32 %tmp66 + %tmp72 = load i32, ptr %tmp71, align 4 %tmp73 = xor i32 %tmp72, %tmp70 - %tmp74 = getelementptr inbounds i32, i32* %arg, i32 %tmp66 - %tmp75 = load i32, i32* %tmp74, align 4 + %tmp74 = getelementptr inbounds i32, ptr %arg, i32 %tmp66 + %tmp75 = load i32, ptr %tmp74, align 4 %tmp76 = add nsw i32 %tmp75, %tmp73 - store i32 %tmp76, i32* %tmp74, align 4 + store i32 %tmp76, ptr %tmp74, align 4 br label %bb27 } diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-prev-iteration.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-prev-iteration.mir index 214eb48..14c383f 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-prev-iteration.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-prev-iteration.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s --- | - define dso_local arm_aapcscc i32 @test1(i32* nocapture %arg, i32* nocapture readonly %arg1, i32* nocapture readonly %arg2, i32 %arg3) { + define dso_local arm_aapcscc i32 @test1(ptr nocapture %arg, ptr nocapture readonly %arg1, ptr nocapture readonly %arg2, i32 %arg3) { bb: %tmp = icmp eq i32 %arg3, 0 br i1 %tmp, label %bb27, label %bb4 @@ -28,15 +28,15 @@ br i1 %tmp15, label %bb27, label %bb16 bb16: ; preds = %bb13 - %tmp17 = getelementptr inbounds i32, i32* %arg1, i32 %tmp14 - %tmp18 = load i32, i32* %tmp17, align 4 - %tmp19 = getelementptr inbounds i32, i32* %arg2, i32 %tmp14 - %tmp20 = load i32, i32* %tmp19, align 4 + %tmp17 = getelementptr inbounds i32, ptr %arg1, i32 %tmp14 + %tmp18 = load i32, ptr %tmp17, align 4 + %tmp19 = getelementptr inbounds i32, ptr %arg2, i32 %tmp14 + %tmp20 = load i32, ptr %tmp19, align 4 %tmp21 = xor i32 %tmp20, %tmp18 - %tmp22 = getelementptr inbounds i32, i32* %arg, i32 %tmp14 - %tmp23 = load i32, i32* %tmp22, align 4 + %tmp22 = getelementptr inbounds i32, ptr %arg, i32 %tmp14 + %tmp23 = load i32, ptr %tmp22, align 4 %tmp24 = add nsw i32 %tmp23, %tmp21 - store i32 %tmp24, i32* %tmp22, align 4 + store i32 %tmp24, ptr %tmp22, align 4 %tmp25 = add nuw i32 %tmp14, 1 %tmp26 = icmp eq i32 %tmp6, 1 br i1 %tmp26, label %bb27, label %bb57 @@ -49,69 +49,69 @@ %lsr.iv15 = phi i32 [ %lsr.iv.next16, %bb28 ], [ %start, %bb12 ] %lsr.iv = phi i32 [ %lsr.iv.next, %bb28 ], [ 0, %bb12 ] %tmp29 = phi i32 [ 0, %bb12 ], [ %tmp54, %bb28 ] - %0 = bitcast i32* %arg1 to i8* - %1 = bitcast i32* %arg2 to i8* - %2 = bitcast i32* %arg to i8* - %uglygep14 = getelementptr i8, i8* %0, i32 %lsr.iv - %uglygep1415 = bitcast i8* %uglygep14 to i32* - %scevgep617 = bitcast i32* %uglygep1415 to i32* - %tmp34 = load i32, i32* %scevgep617, align 4 - %uglygep8 = getelementptr i8, i8* %1, i32 %lsr.iv - %uglygep89 = bitcast i8* %uglygep8 to i32* - %scevgep418 = bitcast i32* %uglygep89 to i32* - %tmp35 = load i32, i32* %scevgep418, align 4 + %0 = bitcast ptr %arg1 to ptr + %1 = bitcast ptr %arg2 to ptr + %2 = bitcast ptr %arg to ptr + %uglygep14 = getelementptr i8, ptr %0, i32 %lsr.iv + %uglygep1415 = bitcast ptr %uglygep14 to ptr + %scevgep617 = bitcast ptr %uglygep1415 to ptr + %tmp34 = load i32, ptr %scevgep617, align 4 + %uglygep8 = getelementptr i8, ptr %1, i32 %lsr.iv + %uglygep89 = bitcast ptr %uglygep8 to ptr + %scevgep418 = bitcast ptr %uglygep89 to ptr + %tmp35 = load i32, ptr %scevgep418, align 4 %tmp36 = xor i32 %tmp35, %tmp34 - %uglygep2 = getelementptr i8, i8* %2, i32 %lsr.iv - %uglygep23 = bitcast i8* %uglygep2 to i32* - %scevgep219 = bitcast i32* %uglygep23 to i32* - %tmp37 = load i32, i32* %scevgep219, align 4 + %uglygep2 = getelementptr i8, ptr %2, i32 %lsr.iv + %uglygep23 = bitcast ptr %uglygep2 to ptr + %scevgep219 = bitcast ptr %uglygep23 to ptr + %tmp37 = load i32, ptr %scevgep219, align 4 %tmp38 = add nsw i32 %tmp37, %tmp36 - store i32 %tmp38, i32* %scevgep219, align 4 - %uglygep33 = getelementptr i8, i8* %0, i32 %lsr.iv - %uglygep3334 = bitcast i8* %uglygep33 to i32* - %scevgep14 = getelementptr i32, i32* %uglygep3334, i32 1 - %tmp39 = load i32, i32* %scevgep14, align 4 - %uglygep27 = getelementptr i8, i8* %1, i32 %lsr.iv - %uglygep2728 = bitcast i8* %uglygep27 to i32* - %scevgep11 = getelementptr i32, i32* %uglygep2728, i32 1 - %tmp40 = load i32, i32* %scevgep11, align 4 + store i32 %tmp38, ptr %scevgep219, align 4 + %uglygep33 = getelementptr i8, ptr %0, i32 %lsr.iv + %uglygep3334 = bitcast ptr %uglygep33 to ptr + %scevgep14 = getelementptr i32, ptr %uglygep3334, i32 1 + %tmp39 = load i32, ptr %scevgep14, align 4 + %uglygep27 = getelementptr i8, ptr %1, i32 %lsr.iv + %uglygep2728 = bitcast ptr %uglygep27 to ptr + %scevgep11 = getelementptr i32, ptr %uglygep2728, i32 1 + %tmp40 = load i32, ptr %scevgep11, align 4 %tmp41 = xor i32 %tmp40, %tmp39 - %uglygep20 = getelementptr i8, i8* %2, i32 %lsr.iv - %uglygep2021 = bitcast i8* %uglygep20 to i32* - %scevgep9 = getelementptr i32, i32* %uglygep2021, i32 1 - %tmp42 = load i32, i32* %scevgep9, align 4 + %uglygep20 = getelementptr i8, ptr %2, i32 %lsr.iv + %uglygep2021 = bitcast ptr %uglygep20 to ptr + %scevgep9 = getelementptr i32, ptr %uglygep2021, i32 1 + %tmp42 = load i32, ptr %scevgep9, align 4 %tmp43 = add nsw i32 %tmp42, %tmp41 - store i32 %tmp43, i32* %scevgep9, align 4 - %uglygep30 = getelementptr i8, i8* %0, i32 %lsr.iv - %uglygep3031 = bitcast i8* %uglygep30 to i32* - %scevgep12 = getelementptr i32, i32* %uglygep3031, i32 2 - %tmp44 = load i32, i32* %scevgep12, align 4 - %uglygep24 = getelementptr i8, i8* %1, i32 %lsr.iv - %uglygep2425 = bitcast i8* %uglygep24 to i32* - %scevgep10 = getelementptr i32, i32* %uglygep2425, i32 2 - %tmp45 = load i32, i32* %scevgep10, align 4 + store i32 %tmp43, ptr %scevgep9, align 4 + %uglygep30 = getelementptr i8, ptr %0, i32 %lsr.iv + %uglygep3031 = bitcast ptr %uglygep30 to ptr + %scevgep12 = getelementptr i32, ptr %uglygep3031, i32 2 + %tmp44 = load i32, ptr %scevgep12, align 4 + %uglygep24 = getelementptr i8, ptr %1, i32 %lsr.iv + %uglygep2425 = bitcast ptr %uglygep24 to ptr + %scevgep10 = getelementptr i32, ptr %uglygep2425, i32 2 + %tmp45 = load i32, ptr %scevgep10, align 4 %tmp46 = xor i32 %tmp45, %tmp44 - %uglygep17 = getelementptr i8, i8* %2, i32 %lsr.iv - %uglygep1718 = bitcast i8* %uglygep17 to i32* - %scevgep8 = getelementptr i32, i32* %uglygep1718, i32 2 - %tmp47 = load i32, i32* %scevgep8, align 4 + %uglygep17 = getelementptr i8, ptr %2, i32 %lsr.iv + %uglygep1718 = bitcast ptr %uglygep17 to ptr + %scevgep8 = getelementptr i32, ptr %uglygep1718, i32 2 + %tmp47 = load i32, ptr %scevgep8, align 4 %tmp48 = add nsw i32 %tmp47, %tmp46 - store i32 %tmp48, i32* %scevgep8, align 4 - %uglygep11 = getelementptr i8, i8* %0, i32 %lsr.iv - %uglygep1112 = bitcast i8* %uglygep11 to i32* - %scevgep5 = getelementptr i32, i32* %uglygep1112, i32 3 - %tmp49 = load i32, i32* %scevgep5, align 4 - %uglygep5 = getelementptr i8, i8* %1, i32 %lsr.iv - %uglygep56 = bitcast i8* %uglygep5 to i32* - %scevgep3 = getelementptr i32, i32* %uglygep56, i32 3 - %tmp50 = load i32, i32* %scevgep3, align 4 + store i32 %tmp48, ptr %scevgep8, align 4 + %uglygep11 = getelementptr i8, ptr %0, i32 %lsr.iv + %uglygep1112 = bitcast ptr %uglygep11 to ptr + %scevgep5 = getelementptr i32, ptr %uglygep1112, i32 3 + %tmp49 = load i32, ptr %scevgep5, align 4 + %uglygep5 = getelementptr i8, ptr %1, i32 %lsr.iv + %uglygep56 = bitcast ptr %uglygep5 to ptr + %scevgep3 = getelementptr i32, ptr %uglygep56, i32 3 + %tmp50 = load i32, ptr %scevgep3, align 4 %tmp51 = xor i32 %tmp50, %tmp49 - %uglygep = getelementptr i8, i8* %2, i32 %lsr.iv - %uglygep1 = bitcast i8* %uglygep to i32* - %scevgep1 = getelementptr i32, i32* %uglygep1, i32 3 - %tmp52 = load i32, i32* %scevgep1, align 4 + %uglygep = getelementptr i8, ptr %2, i32 %lsr.iv + %uglygep1 = bitcast ptr %uglygep to ptr + %scevgep1 = getelementptr i32, ptr %uglygep1, i32 3 + %tmp52 = load i32, ptr %scevgep1, align 4 %tmp53 = add nsw i32 %tmp52, %tmp51 - store i32 %tmp53, i32* %scevgep1, align 4 + store i32 %tmp53, ptr %scevgep1, align 4 %tmp54 = add nuw i32 %tmp29, 4 %lsr.iv.next = add i32 %lsr.iv, 16 %loop.dec = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv15, i32 1) @@ -120,29 +120,29 @@ br i1 %tmp56, label %bb28, label %bb13 bb57: ; preds = %bb16 - %tmp58 = getelementptr inbounds i32, i32* %arg1, i32 %tmp25 - %tmp59 = load i32, i32* %tmp58, align 4 - %tmp60 = getelementptr inbounds i32, i32* %arg2, i32 %tmp25 - %tmp61 = load i32, i32* %tmp60, align 4 + %tmp58 = getelementptr inbounds i32, ptr %arg1, i32 %tmp25 + %tmp59 = load i32, ptr %tmp58, align 4 + %tmp60 = getelementptr inbounds i32, ptr %arg2, i32 %tmp25 + %tmp61 = load i32, ptr %tmp60, align 4 %tmp62 = xor i32 %tmp61, %tmp59 - %tmp63 = getelementptr inbounds i32, i32* %arg, i32 %tmp25 - %tmp64 = load i32, i32* %tmp63, align 4 + %tmp63 = getelementptr inbounds i32, ptr %arg, i32 %tmp25 + %tmp64 = load i32, ptr %tmp63, align 4 %tmp65 = add nsw i32 %tmp64, %tmp62 - store i32 %tmp65, i32* %tmp63, align 4 + store i32 %tmp65, ptr %tmp63, align 4 %tmp66 = add nuw i32 %tmp14, 2 %tmp67 = icmp eq i32 %tmp6, 2 br i1 %tmp67, label %bb27, label %bb68 bb68: ; preds = %bb57 - %tmp69 = getelementptr inbounds i32, i32* %arg1, i32 %tmp66 - %tmp70 = load i32, i32* %tmp69, align 4 - %tmp71 = getelementptr inbounds i32, i32* %arg2, i32 %tmp66 - %tmp72 = load i32, i32* %tmp71, align 4 + %tmp69 = getelementptr inbounds i32, ptr %arg1, i32 %tmp66 + %tmp70 = load i32, ptr %tmp69, align 4 + %tmp71 = getelementptr inbounds i32, ptr %arg2, i32 %tmp66 + %tmp72 = load i32, ptr %tmp71, align 4 %tmp73 = xor i32 %tmp72, %tmp70 - %tmp74 = getelementptr inbounds i32, i32* %arg, i32 %tmp66 - %tmp75 = load i32, i32* %tmp74, align 4 + %tmp74 = getelementptr inbounds i32, ptr %arg, i32 %tmp66 + %tmp75 = load i32, ptr %tmp74, align 4 %tmp76 = add nsw i32 %tmp75, %tmp73 - store i32 %tmp76, i32* %tmp74, align 4 + store i32 %tmp76, ptr %tmp74, align 4 br label %bb27 } diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-liveout.mir index 8a1a3ed..98f6dbc 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-liveout.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-liveout.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s --- | - define dso_local arm_aapcscc i32 @test1(i32* nocapture %arg, i32* nocapture readonly %arg1, i32* nocapture readonly %arg2, i32 %arg3) { + define dso_local arm_aapcscc i32 @test1(ptr nocapture %arg, ptr nocapture readonly %arg1, ptr nocapture readonly %arg2, i32 %arg3) { bb: %tmp = icmp eq i32 %arg3, 0 br i1 %tmp, label %bb27, label %bb4 @@ -28,15 +28,15 @@ br i1 %tmp15, label %bb27, label %bb16 bb16: ; preds = %bb13 - %tmp17 = getelementptr inbounds i32, i32* %arg1, i32 %tmp14 - %tmp18 = load i32, i32* %tmp17, align 4 - %tmp19 = getelementptr inbounds i32, i32* %arg2, i32 %tmp14 - %tmp20 = load i32, i32* %tmp19, align 4 + %tmp17 = getelementptr inbounds i32, ptr %arg1, i32 %tmp14 + %tmp18 = load i32, ptr %tmp17, align 4 + %tmp19 = getelementptr inbounds i32, ptr %arg2, i32 %tmp14 + %tmp20 = load i32, ptr %tmp19, align 4 %tmp21 = xor i32 %tmp20, %tmp18 - %tmp22 = getelementptr inbounds i32, i32* %arg, i32 %tmp14 - %tmp23 = load i32, i32* %tmp22, align 4 + %tmp22 = getelementptr inbounds i32, ptr %arg, i32 %tmp14 + %tmp23 = load i32, ptr %tmp22, align 4 %tmp24 = add nsw i32 %tmp23, %tmp21 - store i32 %tmp24, i32* %tmp22, align 4 + store i32 %tmp24, ptr %tmp22, align 4 %tmp25 = add nuw i32 %tmp14, 1 %tmp26 = icmp eq i32 %tmp6, 1 br i1 %tmp26, label %bb27, label %bb57 @@ -49,69 +49,69 @@ %lsr.iv15 = phi i32 [ %lsr.iv.next16, %bb28 ], [ %start, %bb12 ] %lsr.iv = phi i32 [ %lsr.iv.next, %bb28 ], [ 0, %bb12 ] %tmp29 = phi i32 [ 0, %bb12 ], [ %tmp54, %bb28 ] - %0 = bitcast i32* %arg1 to i8* - %1 = bitcast i32* %arg2 to i8* - %2 = bitcast i32* %arg to i8* - %uglygep14 = getelementptr i8, i8* %0, i32 %lsr.iv - %uglygep1415 = bitcast i8* %uglygep14 to i32* - %scevgep617 = bitcast i32* %uglygep1415 to i32* - %tmp34 = load i32, i32* %scevgep617, align 4 - %uglygep8 = getelementptr i8, i8* %1, i32 %lsr.iv - %uglygep89 = bitcast i8* %uglygep8 to i32* - %scevgep418 = bitcast i32* %uglygep89 to i32* - %tmp35 = load i32, i32* %scevgep418, align 4 + %0 = bitcast ptr %arg1 to ptr + %1 = bitcast ptr %arg2 to ptr + %2 = bitcast ptr %arg to ptr + %uglygep14 = getelementptr i8, ptr %0, i32 %lsr.iv + %uglygep1415 = bitcast ptr %uglygep14 to ptr + %scevgep617 = bitcast ptr %uglygep1415 to ptr + %tmp34 = load i32, ptr %scevgep617, align 4 + %uglygep8 = getelementptr i8, ptr %1, i32 %lsr.iv + %uglygep89 = bitcast ptr %uglygep8 to ptr + %scevgep418 = bitcast ptr %uglygep89 to ptr + %tmp35 = load i32, ptr %scevgep418, align 4 %tmp36 = xor i32 %tmp35, %tmp34 - %uglygep2 = getelementptr i8, i8* %2, i32 %lsr.iv - %uglygep23 = bitcast i8* %uglygep2 to i32* - %scevgep219 = bitcast i32* %uglygep23 to i32* - %tmp37 = load i32, i32* %scevgep219, align 4 + %uglygep2 = getelementptr i8, ptr %2, i32 %lsr.iv + %uglygep23 = bitcast ptr %uglygep2 to ptr + %scevgep219 = bitcast ptr %uglygep23 to ptr + %tmp37 = load i32, ptr %scevgep219, align 4 %tmp38 = add nsw i32 %tmp37, %tmp36 - store i32 %tmp38, i32* %scevgep219, align 4 - %uglygep33 = getelementptr i8, i8* %0, i32 %lsr.iv - %uglygep3334 = bitcast i8* %uglygep33 to i32* - %scevgep14 = getelementptr i32, i32* %uglygep3334, i32 1 - %tmp39 = load i32, i32* %scevgep14, align 4 - %uglygep27 = getelementptr i8, i8* %1, i32 %lsr.iv - %uglygep2728 = bitcast i8* %uglygep27 to i32* - %scevgep11 = getelementptr i32, i32* %uglygep2728, i32 1 - %tmp40 = load i32, i32* %scevgep11, align 4 + store i32 %tmp38, ptr %scevgep219, align 4 + %uglygep33 = getelementptr i8, ptr %0, i32 %lsr.iv + %uglygep3334 = bitcast ptr %uglygep33 to ptr + %scevgep14 = getelementptr i32, ptr %uglygep3334, i32 1 + %tmp39 = load i32, ptr %scevgep14, align 4 + %uglygep27 = getelementptr i8, ptr %1, i32 %lsr.iv + %uglygep2728 = bitcast ptr %uglygep27 to ptr + %scevgep11 = getelementptr i32, ptr %uglygep2728, i32 1 + %tmp40 = load i32, ptr %scevgep11, align 4 %tmp41 = xor i32 %tmp40, %tmp39 - %uglygep20 = getelementptr i8, i8* %2, i32 %lsr.iv - %uglygep2021 = bitcast i8* %uglygep20 to i32* - %scevgep9 = getelementptr i32, i32* %uglygep2021, i32 1 - %tmp42 = load i32, i32* %scevgep9, align 4 + %uglygep20 = getelementptr i8, ptr %2, i32 %lsr.iv + %uglygep2021 = bitcast ptr %uglygep20 to ptr + %scevgep9 = getelementptr i32, ptr %uglygep2021, i32 1 + %tmp42 = load i32, ptr %scevgep9, align 4 %tmp43 = add nsw i32 %tmp42, %tmp41 - store i32 %tmp43, i32* %scevgep9, align 4 - %uglygep30 = getelementptr i8, i8* %0, i32 %lsr.iv - %uglygep3031 = bitcast i8* %uglygep30 to i32* - %scevgep12 = getelementptr i32, i32* %uglygep3031, i32 2 - %tmp44 = load i32, i32* %scevgep12, align 4 - %uglygep24 = getelementptr i8, i8* %1, i32 %lsr.iv - %uglygep2425 = bitcast i8* %uglygep24 to i32* - %scevgep10 = getelementptr i32, i32* %uglygep2425, i32 2 - %tmp45 = load i32, i32* %scevgep10, align 4 + store i32 %tmp43, ptr %scevgep9, align 4 + %uglygep30 = getelementptr i8, ptr %0, i32 %lsr.iv + %uglygep3031 = bitcast ptr %uglygep30 to ptr + %scevgep12 = getelementptr i32, ptr %uglygep3031, i32 2 + %tmp44 = load i32, ptr %scevgep12, align 4 + %uglygep24 = getelementptr i8, ptr %1, i32 %lsr.iv + %uglygep2425 = bitcast ptr %uglygep24 to ptr + %scevgep10 = getelementptr i32, ptr %uglygep2425, i32 2 + %tmp45 = load i32, ptr %scevgep10, align 4 %tmp46 = xor i32 %tmp45, %tmp44 - %uglygep17 = getelementptr i8, i8* %2, i32 %lsr.iv - %uglygep1718 = bitcast i8* %uglygep17 to i32* - %scevgep8 = getelementptr i32, i32* %uglygep1718, i32 2 - %tmp47 = load i32, i32* %scevgep8, align 4 + %uglygep17 = getelementptr i8, ptr %2, i32 %lsr.iv + %uglygep1718 = bitcast ptr %uglygep17 to ptr + %scevgep8 = getelementptr i32, ptr %uglygep1718, i32 2 + %tmp47 = load i32, ptr %scevgep8, align 4 %tmp48 = add nsw i32 %tmp47, %tmp46 - store i32 %tmp48, i32* %scevgep8, align 4 - %uglygep11 = getelementptr i8, i8* %0, i32 %lsr.iv - %uglygep1112 = bitcast i8* %uglygep11 to i32* - %scevgep5 = getelementptr i32, i32* %uglygep1112, i32 3 - %tmp49 = load i32, i32* %scevgep5, align 4 - %uglygep5 = getelementptr i8, i8* %1, i32 %lsr.iv - %uglygep56 = bitcast i8* %uglygep5 to i32* - %scevgep3 = getelementptr i32, i32* %uglygep56, i32 3 - %tmp50 = load i32, i32* %scevgep3, align 4 + store i32 %tmp48, ptr %scevgep8, align 4 + %uglygep11 = getelementptr i8, ptr %0, i32 %lsr.iv + %uglygep1112 = bitcast ptr %uglygep11 to ptr + %scevgep5 = getelementptr i32, ptr %uglygep1112, i32 3 + %tmp49 = load i32, ptr %scevgep5, align 4 + %uglygep5 = getelementptr i8, ptr %1, i32 %lsr.iv + %uglygep56 = bitcast ptr %uglygep5 to ptr + %scevgep3 = getelementptr i32, ptr %uglygep56, i32 3 + %tmp50 = load i32, ptr %scevgep3, align 4 %tmp51 = xor i32 %tmp50, %tmp49 - %uglygep = getelementptr i8, i8* %2, i32 %lsr.iv - %uglygep1 = bitcast i8* %uglygep to i32* - %scevgep1 = getelementptr i32, i32* %uglygep1, i32 3 - %tmp52 = load i32, i32* %scevgep1, align 4 + %uglygep = getelementptr i8, ptr %2, i32 %lsr.iv + %uglygep1 = bitcast ptr %uglygep to ptr + %scevgep1 = getelementptr i32, ptr %uglygep1, i32 3 + %tmp52 = load i32, ptr %scevgep1, align 4 %tmp53 = add nsw i32 %tmp52, %tmp51 - store i32 %tmp53, i32* %scevgep1, align 4 + store i32 %tmp53, ptr %scevgep1, align 4 %tmp54 = add nuw i32 %tmp29, 4 %lsr.iv.next = add i32 %lsr.iv, 16 %loop.dec = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv15, i32 1) @@ -120,29 +120,29 @@ br i1 %tmp56, label %bb28, label %bb13 bb57: ; preds = %bb16 - %tmp58 = getelementptr inbounds i32, i32* %arg1, i32 %tmp25 - %tmp59 = load i32, i32* %tmp58, align 4 - %tmp60 = getelementptr inbounds i32, i32* %arg2, i32 %tmp25 - %tmp61 = load i32, i32* %tmp60, align 4 + %tmp58 = getelementptr inbounds i32, ptr %arg1, i32 %tmp25 + %tmp59 = load i32, ptr %tmp58, align 4 + %tmp60 = getelementptr inbounds i32, ptr %arg2, i32 %tmp25 + %tmp61 = load i32, ptr %tmp60, align 4 %tmp62 = xor i32 %tmp61, %tmp59 - %tmp63 = getelementptr inbounds i32, i32* %arg, i32 %tmp25 - %tmp64 = load i32, i32* %tmp63, align 4 + %tmp63 = getelementptr inbounds i32, ptr %arg, i32 %tmp25 + %tmp64 = load i32, ptr %tmp63, align 4 %tmp65 = add nsw i32 %tmp64, %tmp62 - store i32 %tmp65, i32* %tmp63, align 4 + store i32 %tmp65, ptr %tmp63, align 4 %tmp66 = add nuw i32 %tmp14, 2 %tmp67 = icmp eq i32 %tmp6, 2 br i1 %tmp67, label %bb27, label %bb68 bb68: ; preds = %bb57 - %tmp69 = getelementptr inbounds i32, i32* %arg1, i32 %tmp66 - %tmp70 = load i32, i32* %tmp69, align 4 - %tmp71 = getelementptr inbounds i32, i32* %arg2, i32 %tmp66 - %tmp72 = load i32, i32* %tmp71, align 4 + %tmp69 = getelementptr inbounds i32, ptr %arg1, i32 %tmp66 + %tmp70 = load i32, ptr %tmp69, align 4 + %tmp71 = getelementptr inbounds i32, ptr %arg2, i32 %tmp66 + %tmp72 = load i32, ptr %tmp71, align 4 %tmp73 = xor i32 %tmp72, %tmp70 - %tmp74 = getelementptr inbounds i32, i32* %arg, i32 %tmp66 - %tmp75 = load i32, i32* %tmp74, align 4 + %tmp74 = getelementptr inbounds i32, ptr %arg, i32 %tmp66 + %tmp75 = load i32, ptr %tmp74, align 4 %tmp76 = add nsw i32 %tmp75, %tmp73 - store i32 %tmp76, i32* %tmp74, align 4 + store i32 %tmp76, ptr %tmp74, align 4 br label %bb27 } diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir index c8d03fd..d137467 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s --- | - define dso_local arm_aapcs_vfpcc float @insert_after_vdup_1(float* nocapture readonly %a, float* nocapture readonly %b, float %init, i32 %N) { + define dso_local arm_aapcs_vfpcc float @insert_after_vdup_1(ptr nocapture readonly %a, ptr nocapture readonly %b, float %init, i32 %N) { entry: %cmp8.not = icmp eq i32 %N, 0 %0 = add i32 %N, 3 @@ -19,22 +19,22 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv13 = phi float* [ %scevgep14, %vector.body ], [ %b, %vector.ph ] - %lsr.iv = phi float* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv13 = phi ptr [ %scevgep14, %vector.body ], [ %b, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %vec.phi = phi <4 x float> [ %6, %vector.ph ], [ %13, %vector.body ] %7 = phi i32 [ %start, %vector.ph ], [ %14, %vector.body ] %8 = phi i32 [ %N, %vector.ph ], [ %10, %vector.body ] - %lsr.iv12 = bitcast float* %lsr.iv to <4 x float>* - %lsr.iv1315 = bitcast float* %lsr.iv13 to <4 x float>* + %lsr.iv12 = bitcast ptr %lsr.iv to ptr + %lsr.iv1315 = bitcast ptr %lsr.iv13 to ptr %9 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %8) %10 = sub i32 %8, 4 - %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %lsr.iv12, i32 4, <4 x i1> %9, <4 x float> undef) - %wide.masked.load11 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %lsr.iv1315, i32 4, <4 x i1> %9, <4 x float> undef) + %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %lsr.iv12, i32 4, <4 x i1> %9, <4 x float> undef) + %wide.masked.load11 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %lsr.iv1315, i32 4, <4 x i1> %9, <4 x float> undef) %11 = fmul fast <4 x float> %wide.masked.load11, %wide.masked.load %12 = fadd fast <4 x float> %11, %vec.phi %13 = select <4 x i1> %9, <4 x float> %12, <4 x float> %vec.phi - %scevgep = getelementptr float, float* %lsr.iv, i32 4 - %scevgep14 = getelementptr float, float* %lsr.iv13, i32 4 + %scevgep = getelementptr float, ptr %lsr.iv, i32 4 + %scevgep14 = getelementptr float, ptr %lsr.iv13, i32 4 %14 = call i32 @llvm.loop.decrement.reg.i32(i32 %7, i32 1) %15 = icmp ne i32 %14, 0 br i1 %15, label %vector.body, label %middle.block @@ -49,7 +49,7 @@ } ; Function Attrs: norecurse nounwind readonly - define dso_local arm_aapcs_vfpcc float @insert_after_vdup_2(float* nocapture readonly %a, float* nocapture readonly %b, float %init, i32 %N) local_unnamed_addr #0 { + define dso_local arm_aapcs_vfpcc float @insert_after_vdup_2(ptr nocapture readonly %a, ptr nocapture readonly %b, float %init, i32 %N) local_unnamed_addr #0 { entry: %shr = lshr i32 %N, 2 %cmp9.not = icmp eq i32 %shr, 0 @@ -67,22 +67,22 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv14 = phi float* [ %scevgep15, %vector.body ], [ %b, %vector.ph ] - %lsr.iv = phi float* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv14 = phi ptr [ %scevgep15, %vector.body ], [ %b, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %vec.phi = phi <4 x float> [ %6, %vector.ph ], [ %13, %vector.body ] %7 = phi i32 [ %start, %vector.ph ], [ %14, %vector.body ] %8 = phi i32 [ %shr, %vector.ph ], [ %10, %vector.body ] - %lsr.iv13 = bitcast float* %lsr.iv to <4 x float>* - %lsr.iv1416 = bitcast float* %lsr.iv14 to <4 x float>* + %lsr.iv13 = bitcast ptr %lsr.iv to ptr + %lsr.iv1416 = bitcast ptr %lsr.iv14 to ptr %9 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %8) %10 = sub i32 %8, 4 - %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %lsr.iv13, i32 4, <4 x i1> %9, <4 x float> undef) - %wide.masked.load12 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %lsr.iv1416, i32 4, <4 x i1> %9, <4 x float> undef) + %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %lsr.iv13, i32 4, <4 x i1> %9, <4 x float> undef) + %wide.masked.load12 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %lsr.iv1416, i32 4, <4 x i1> %9, <4 x float> undef) %11 = fmul fast <4 x float> %wide.masked.load12, %wide.masked.load %12 = fadd fast <4 x float> %11, %vec.phi %13 = select <4 x i1> %9, <4 x float> %12, <4 x float> %vec.phi - %scevgep = getelementptr float, float* %lsr.iv, i32 4 - %scevgep15 = getelementptr float, float* %lsr.iv14, i32 4 + %scevgep = getelementptr float, ptr %lsr.iv, i32 4 + %scevgep15 = getelementptr float, ptr %lsr.iv14, i32 4 %14 = call i32 @llvm.loop.decrement.reg.i32(i32 %7, i32 1) %15 = icmp ne i32 %14, 0 br i1 %15, label %vector.body, label %middle.block @@ -97,7 +97,7 @@ } declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) - declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) + declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>) declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>) declare i32 @llvm.start.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32(i32, i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix-debug.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix-debug.mir index 3a0bc9d..8cf8589 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix-debug.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix-debug.mir @@ -2,14 +2,14 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s --- | - define dso_local arm_aapcscc void @test_debug(i32 %d, i32* %e, i16* nocapture readonly %k, i16* nocapture readonly %l) !dbg !15 { + define dso_local arm_aapcscc void @test_debug(i32 %d, ptr %e, ptr nocapture readonly %k, ptr nocapture readonly %l) !dbg !15 { entry: call void @llvm.dbg.value(metadata i32 %d, metadata !23, metadata !DIExpression()), !dbg !32 - call void @llvm.dbg.value(metadata i32* %e, metadata !24, metadata !DIExpression()), !dbg !32 - call void @llvm.dbg.value(metadata i16* %k, metadata !25, metadata !DIExpression()), !dbg !32 - call void @llvm.dbg.value(metadata i16* %l, metadata !26, metadata !DIExpression()), !dbg !32 + call void @llvm.dbg.value(metadata ptr %e, metadata !24, metadata !DIExpression()), !dbg !32 + call void @llvm.dbg.value(metadata ptr %k, metadata !25, metadata !DIExpression()), !dbg !32 + call void @llvm.dbg.value(metadata ptr %l, metadata !26, metadata !DIExpression()), !dbg !32 call void @llvm.dbg.value(metadata i16 0, metadata !29, metadata !DIExpression()), !dbg !32 - %call = tail call arm_aapcscc signext i16 @get_input(i32 %d, i32* %e, i16 signext 0) #4, !dbg !33 + %call = tail call arm_aapcscc signext i16 @get_input(i32 %d, ptr %e, i16 signext 0) #4, !dbg !33 call void @llvm.dbg.value(metadata i16 %call, metadata !28, metadata !DIExpression()), !dbg !32 call void @llvm.dbg.value(metadata i32 0, metadata !30, metadata !DIExpression()), !dbg !32 %cmp30 = icmp sgt i32 %d, 0, !dbg !34 @@ -20,52 +20,52 @@ br label %for.cond1.preheader.us, !dbg !37 for.cond1.preheader.us: ; preds = %for.cond1.preheader.us.preheader, %for.cond1.for.inc9_crit_edge.us - %lsr.iv2 = phi i16* [ %k, %for.cond1.preheader.us.preheader ], [ %9, %for.cond1.for.inc9_crit_edge.us ] + %lsr.iv2 = phi ptr [ %k, %for.cond1.preheader.us.preheader ], [ %9, %for.cond1.for.inc9_crit_edge.us ] %i.031.us = phi i32 [ %inc10.us, %for.cond1.for.inc9_crit_edge.us ], [ 0, %for.cond1.preheader.us.preheader ] call void @llvm.dbg.value(metadata i32 %i.031.us, metadata !30, metadata !DIExpression()), !dbg !32 call void @llvm.dbg.value(metadata i32 0, metadata !31, metadata !DIExpression()), !dbg !32 - %arrayidx7.us = getelementptr inbounds i32, i32* %e, i32 %i.031.us, !dbg !38 - %arrayidx7.promoted.us = load i32, i32* %arrayidx7.us, align 4, !dbg !41 + %arrayidx7.us = getelementptr inbounds i32, ptr %e, i32 %i.031.us, !dbg !38 + %arrayidx7.promoted.us = load i32, ptr %arrayidx7.us, align 4, !dbg !41 %start = call i32 @llvm.start.loop.iterations.i32(i32 %d), !dbg !46 br label %for.body3.us, !dbg !46 for.body3.us: ; preds = %for.body3.us, %for.cond1.preheader.us - %lsr.iv5 = phi i16* [ %scevgep6, %for.body3.us ], [ %lsr.iv2, %for.cond1.preheader.us ], !dbg !32 - %lsr.iv1 = phi i16* [ %scevgep, %for.body3.us ], [ %l, %for.cond1.preheader.us ], !dbg !32 + %lsr.iv5 = phi ptr [ %scevgep6, %for.body3.us ], [ %lsr.iv2, %for.cond1.preheader.us ], !dbg !32 + %lsr.iv1 = phi ptr [ %scevgep, %for.body3.us ], [ %l, %for.cond1.preheader.us ], !dbg !32 %add829.us = phi i32 [ %arrayidx7.promoted.us, %for.cond1.preheader.us ], [ %add8.us, %for.body3.us ], !dbg !32 %1 = phi i32 [ %start, %for.cond1.preheader.us ], [ %4, %for.body3.us ], !dbg !32 call void @llvm.dbg.value(metadata i32 undef, metadata !31, metadata !DIExpression()), !dbg !32 - %2 = load i16, i16* %lsr.iv5, align 2, !dbg !47 + %2 = load i16, ptr %lsr.iv5, align 2, !dbg !47 %conv.us = sext i16 %2 to i32, !dbg !47 - %3 = load i16, i16* %lsr.iv1, align 2, !dbg !50 + %3 = load i16, ptr %lsr.iv1, align 2, !dbg !50 %conv5.us = sext i16 %3 to i32, !dbg !50 %mul6.us = mul nsw i32 %conv5.us, %conv.us, !dbg !51 %add8.us = add nsw i32 %mul6.us, %add829.us, !dbg !41 call void @llvm.dbg.value(metadata i32 undef, metadata !31, metadata !DIExpression(DW_OP_plus_uconst, 1, DW_OP_stack_value)), !dbg !32 - %scevgep = getelementptr i16, i16* %lsr.iv1, i32 1, !dbg !52 - %scevgep6 = getelementptr i16, i16* %lsr.iv5, i32 1, !dbg !52 + %scevgep = getelementptr i16, ptr %lsr.iv1, i32 1, !dbg !52 + %scevgep6 = getelementptr i16, ptr %lsr.iv5, i32 1, !dbg !52 %4 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %1, i32 1), !dbg !46 %5 = icmp ne i32 %4, 0, !dbg !46 br i1 %5, label %for.body3.us, label %for.cond1.for.inc9_crit_edge.us, !dbg !46, !llvm.loop !53 for.cond1.for.inc9_crit_edge.us: ; preds = %for.body3.us - %6 = bitcast i16* %lsr.iv2 to i1* + %6 = bitcast ptr %lsr.iv2 to ptr %sunkaddr = mul i32 %i.031.us, 4, !dbg !41 - %7 = bitcast i32* %e to i8*, !dbg !41 - %sunkaddr7 = getelementptr inbounds i8, i8* %7, i32 %sunkaddr, !dbg !41 - %8 = bitcast i8* %sunkaddr7 to i32*, !dbg !41 - store i32 %add8.us, i32* %8, align 4, !dbg !41 + %7 = bitcast ptr %e to ptr, !dbg !41 + %sunkaddr7 = getelementptr inbounds i8, ptr %7, i32 %sunkaddr, !dbg !41 + %8 = bitcast ptr %sunkaddr7 to ptr, !dbg !41 + store i32 %add8.us, ptr %8, align 4, !dbg !41 %inc10.us = add nuw nsw i32 %i.031.us, 1, !dbg !55 call void @llvm.dbg.value(metadata i32 %inc10.us, metadata !30, metadata !DIExpression()), !dbg !32 - %scevgep4 = getelementptr i1, i1* %6, i32 %0, !dbg !37 - %9 = bitcast i1* %scevgep4 to i16*, !dbg !37 + %scevgep4 = getelementptr i1, ptr %6, i32 %0, !dbg !37 + %9 = bitcast ptr %scevgep4 to ptr, !dbg !37 %exitcond33 = icmp eq i32 %inc10.us, %d, !dbg !34 br i1 %exitcond33, label %for.end11, label %for.cond1.preheader.us, !dbg !37, !llvm.loop !56 for.end11: ; preds = %for.cond1.for.inc9_crit_edge.us, %entry ret void, !dbg !58 } - declare !dbg !4 dso_local arm_aapcscc signext i16 @get_input(i32, i32*, i16 signext) + declare !dbg !4 dso_local arm_aapcscc signext i16 @get_input(i32, ptr, i16 signext) declare void @llvm.dbg.value(metadata, metadata, metadata) declare i32 @llvm.start.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix.mir index 070a207..4d3f2e2 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix.mir @@ -4,7 +4,7 @@ # A decent sized test to handle a matrix, with scalar and vector low-overhead loops. --- | - define dso_local arm_aapcs_vfpcc signext i16 @matrix_test(i32 %d, i32* nocapture %e, i16* nocapture readonly %k, i16* nocapture readonly %l) { + define dso_local arm_aapcs_vfpcc signext i16 @matrix_test(i32 %d, ptr nocapture %e, ptr nocapture readonly %k, ptr nocapture readonly %l) { entry: %cmp19.i = icmp sgt i32 %d, 0 br i1 %cmp19.i, label %for.body.i.preheader, label %c.exit.thread @@ -14,16 +14,16 @@ br label %for.body.i c.exit.thread: ; preds = %entry - %call169 = tail call arm_aapcs_vfpcc signext i16 bitcast (i16 (...)* @crc16 to i16 (i32)*)(i32 0) + %call169 = tail call arm_aapcs_vfpcc signext i16 @crc16(i32 0) %conv270 = sext i16 %call169 to i32 br label %c.exit59 for.body.i: ; preds = %for.body.i, %for.body.i.preheader - %lsr.iv15 = phi i32* [ %e, %for.body.i.preheader ], [ %scevgep16, %for.body.i ] + %lsr.iv15 = phi ptr [ %e, %for.body.i.preheader ], [ %scevgep16, %for.body.i ] %h.022.i = phi i16 [ %h.1.i, %for.body.i ], [ 0, %for.body.i.preheader ] %f.020.i = phi i32 [ %f.1.i, %for.body.i ], [ undef, %for.body.i.preheader ] %0 = phi i32 [ %start1, %for.body.i.preheader ], [ %2, %for.body.i ] - %1 = load i32, i32* %lsr.iv15, align 4 + %1 = load i32, ptr %lsr.iv15, align 4 %add.i = add nsw i32 %1, %f.020.i %cmp1.i = icmp sgt i32 %add.i, 0 %cmp3.i = icmp sgt i32 %1, 0 @@ -31,7 +31,7 @@ %narrow.i = and i1 %cmp3.i, %cmp1.i %add6.i = zext i1 %narrow.i to i16 %h.1.i = add i16 %h.022.i, %add6.i - %scevgep16 = getelementptr i32, i32* %lsr.iv15, i32 1 + %scevgep16 = getelementptr i32, ptr %lsr.iv15, i32 1 %2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1) %3 = icmp ne i32 %2, 0 br i1 %3, label %for.body.i, label %c.exit @@ -39,7 +39,7 @@ c.exit: ; preds = %for.body.i %4 = icmp sgt i32 %d, 0 %phitmp = sext i16 %h.1.i to i32 - %call1 = tail call arm_aapcs_vfpcc signext i16 bitcast (i16 (...)* @crc16 to i16 (i32)*)(i32 %phitmp) + %call1 = tail call arm_aapcs_vfpcc signext i16 @crc16(i32 %phitmp) %conv2 = sext i16 %call1 to i32 br i1 %4, label %for.cond4.preheader.us.preheader, label %c.exit59 @@ -55,32 +55,32 @@ br label %for.cond4.preheader.us for.cond4.preheader.us: ; preds = %middle.block, %for.cond4.preheader.us.preheader - %lsr.iv7 = phi i16* [ %28, %middle.block ], [ %k, %for.cond4.preheader.us.preheader ] + %lsr.iv7 = phi ptr [ %28, %middle.block ], [ %k, %for.cond4.preheader.us.preheader ] %i.064.us = phi i32 [ %inc15.us, %middle.block ], [ 0, %for.cond4.preheader.us.preheader ] - %arrayidx12.us = getelementptr inbounds i32, i32* %e, i32 %i.064.us - %arrayidx12.promoted.us = load i32, i32* %arrayidx12.us, align 4 + %arrayidx12.us = getelementptr inbounds i32, ptr %e, i32 %i.064.us + %arrayidx12.promoted.us = load i32, ptr %arrayidx12.us, align 4 %11 = insertelement <4 x i32> <i32 undef, i32 0, i32 0, i32 0>, i32 %arrayidx12.promoted.us, i32 0 %start2 = call i32 @llvm.start.loop.iterations.i32(i32 %8) br label %vector.body vector.body: ; preds = %vector.body, %for.cond4.preheader.us - %lsr.iv10 = phi i16* [ %scevgep11, %vector.body ], [ %lsr.iv7, %for.cond4.preheader.us ] - %lsr.iv4 = phi i16* [ %scevgep5, %vector.body ], [ %l, %for.cond4.preheader.us ] + %lsr.iv10 = phi ptr [ %scevgep11, %vector.body ], [ %lsr.iv7, %for.cond4.preheader.us ] + %lsr.iv4 = phi ptr [ %scevgep5, %vector.body ], [ %l, %for.cond4.preheader.us ] %vec.phi = phi <4 x i32> [ %11, %for.cond4.preheader.us ], [ %19, %vector.body ] %12 = phi i32 [ %start2, %for.cond4.preheader.us ], [ %20, %vector.body ] %13 = phi i32 [ %d, %for.cond4.preheader.us ], [ %15, %vector.body ] - %lsr.iv1012 = bitcast i16* %lsr.iv10 to <4 x i16>* - %lsr.iv46 = bitcast i16* %lsr.iv4 to <4 x i16>* + %lsr.iv1012 = bitcast ptr %lsr.iv10 to ptr + %lsr.iv46 = bitcast ptr %lsr.iv4 to ptr %14 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %13) %15 = sub i32 %13, 4 - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1012, i32 2, <4 x i1> %14, <4 x i16> undef) + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1012, i32 2, <4 x i1> %14, <4 x i16> undef) %16 = sext <4 x i16> %wide.masked.load to <4 x i32> - %wide.masked.load76 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv46, i32 2, <4 x i1> %14, <4 x i16> undef) + %wide.masked.load76 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv46, i32 2, <4 x i1> %14, <4 x i16> undef) %17 = sext <4 x i16> %wide.masked.load76 to <4 x i32> %18 = mul nsw <4 x i32> %17, %16 %19 = add <4 x i32> %18, %vec.phi - %scevgep5 = getelementptr i16, i16* %lsr.iv4, i32 4 - %scevgep11 = getelementptr i16, i16* %lsr.iv10, i32 4 + %scevgep5 = getelementptr i16, ptr %lsr.iv4, i32 4 + %scevgep11 = getelementptr i16, ptr %lsr.iv10, i32 4 %20 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %12, i32 1) %21 = icmp ne i32 %20, 0 br i1 %21, label %vector.body, label %middle.block @@ -89,17 +89,17 @@ %vec.phi.lcssa = phi <4 x i32> [ %vec.phi, %vector.body ] %.lcssa = phi <4 x i32> [ %19, %vector.body ] %22 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %10) - %23 = bitcast i16* %lsr.iv7 to i1* + %23 = bitcast ptr %lsr.iv7 to ptr %24 = select <4 x i1> %22, <4 x i32> %.lcssa, <4 x i32> %vec.phi.lcssa %25 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %24) %sunkaddr = mul i32 %i.064.us, 4 - %26 = bitcast i32* %e to i8* - %sunkaddr17 = getelementptr inbounds i8, i8* %26, i32 %sunkaddr - %27 = bitcast i8* %sunkaddr17 to i32* - store i32 %25, i32* %27, align 4 + %26 = bitcast ptr %e to ptr + %sunkaddr17 = getelementptr inbounds i8, ptr %26, i32 %sunkaddr + %27 = bitcast ptr %sunkaddr17 to ptr + store i32 %25, ptr %27, align 4 %inc15.us = add nuw nsw i32 %i.064.us, 1 - %scevgep9 = getelementptr i1, i1* %23, i32 %5 - %28 = bitcast i1* %scevgep9 to i16* + %scevgep9 = getelementptr i1, ptr %23, i32 %5 + %28 = bitcast ptr %scevgep9 to ptr %exitcond66 = icmp eq i32 %inc15.us, %d br i1 %exitcond66, label %for.end16, label %for.cond4.preheader.us @@ -112,11 +112,11 @@ br label %for.body.i57 for.body.i57: ; preds = %for.body.i57, %for.body.i57.preheader - %lsr.iv1 = phi i32* [ %e, %for.body.i57.preheader ], [ %scevgep, %for.body.i57 ] + %lsr.iv1 = phi ptr [ %e, %for.body.i57.preheader ], [ %scevgep, %for.body.i57 ] %h.022.i44 = phi i16 [ %h.1.i54, %for.body.i57 ], [ 0, %for.body.i57.preheader ] %f.020.i46 = phi i32 [ %f.1.i51, %for.body.i57 ], [ undef, %for.body.i57.preheader ] %30 = phi i32 [ %start3, %for.body.i57.preheader ], [ %32, %for.body.i57 ] - %31 = load i32, i32* %lsr.iv1, align 4 + %31 = load i32, ptr %lsr.iv1, align 4 %add.i48 = add nsw i32 %31, %f.020.i46 %cmp1.i49 = icmp sgt i32 %add.i48, 0 %cmp3.i50 = icmp sgt i32 %31, 0 @@ -124,7 +124,7 @@ %narrow.i52 = and i1 %cmp3.i50, %cmp1.i49 %add6.i53 = zext i1 %narrow.i52 to i16 %h.1.i54 = add i16 %h.022.i44, %add6.i53 - %scevgep = getelementptr i32, i32* %lsr.iv1, i32 1 + %scevgep = getelementptr i32, ptr %lsr.iv1, i32 1 %32 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %30, i32 1) %33 = icmp ne i32 %32, 0 br i1 %33, label %for.body.i57, label %c.exit59.loopexit @@ -136,11 +136,11 @@ c.exit59: ; preds = %c.exit59.loopexit, %for.end16, %c.exit, %c.exit.thread %conv27173 = phi i32 [ %conv2, %for.end16 ], [ %conv2, %c.exit59.loopexit ], [ %conv2, %c.exit ], [ %conv270, %c.exit.thread ] %h.0.lcssa.i58 = phi i32 [ 0, %for.end16 ], [ %phitmp67, %c.exit59.loopexit ], [ 0, %c.exit ], [ 0, %c.exit.thread ] - %call19 = tail call arm_aapcs_vfpcc signext i16 bitcast (i16 (...)* @crc16 to i16 (i32, i32)*)(i32 %h.0.lcssa.i58, i32 %conv27173) + %call19 = tail call arm_aapcs_vfpcc signext i16 @crc16(i32 %h.0.lcssa.i58, i32 %conv27173) ret i16 %call19 } declare dso_local arm_aapcs_vfpcc signext i16 @crc16(...) local_unnamed_addr #0 - declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1 + declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) #1 declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2 declare i32 @llvm.start.loop.iterations.i32(i32) #3 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir index af76970..0580a97 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir @@ -4,7 +4,7 @@ # TODOD: As far as I can tell this test is fine. The tail predicating the second loop means we remove the instruction that would otherwise block the first. --- | - define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float* nocapture %pResult) #0 { + define arm_aapcs_vfpcc void @arm_var_f32_mve(ptr %pSrc, i32 %blockSize, ptr nocapture %pResult) #0 { entry: %0 = add i32 %blockSize, 3 %1 = icmp slt i32 %blockSize, 4 @@ -23,13 +23,13 @@ do.body.i: ; preds = %do.body.i, %entry %blkCnt.0.i = phi i32 [ %13, %do.body.i ], [ %blockSize, %entry ] %sumVec.0.i = phi <4 x float> [ %12, %do.body.i ], [ zeroinitializer, %entry ] - %pSrc.addr.0.i = phi float* [ %add.ptr.i, %do.body.i ], [ %pSrc, %entry ] + %pSrc.addr.0.i = phi ptr [ %add.ptr.i, %do.body.i ], [ %pSrc, %entry ] %9 = phi i32 [ %start1, %entry ], [ %14, %do.body.i ] - %pSrc.addr.0.i2 = bitcast float* %pSrc.addr.0.i to <4 x float>* + %pSrc.addr.0.i2 = bitcast ptr %pSrc.addr.0.i to ptr %10 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0.i) - %11 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pSrc.addr.0.i2, i32 4, <4 x i1> %10, <4 x float> zeroinitializer) + %11 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %pSrc.addr.0.i2, i32 4, <4 x i1> %10, <4 x float> zeroinitializer) %12 = tail call fast <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %sumVec.0.i, <4 x float> %11, <4 x i1> %10, <4 x float> %sumVec.0.i) - %add.ptr.i = getelementptr inbounds float, float* %pSrc.addr.0.i, i32 4 + %add.ptr.i = getelementptr inbounds float, ptr %pSrc.addr.0.i, i32 4 %13 = add i32 %blkCnt.0.i, -4 %14 = call i32 @llvm.loop.decrement.reg.i32(i32 %9, i32 1) %15 = icmp ne i32 %14, 0 @@ -50,14 +50,14 @@ do.body: ; preds = %do.body, %arm_mean_f32_mve.exit %blkCnt.0 = phi i32 [ %blockSize, %arm_mean_f32_mve.exit ], [ %26, %do.body ] %sumVec.0 = phi <4 x float> [ zeroinitializer, %arm_mean_f32_mve.exit ], [ %25, %do.body ] - %pSrc.addr.0 = phi float* [ %pSrc, %arm_mean_f32_mve.exit ], [ %add.ptr, %do.body ] + %pSrc.addr.0 = phi ptr [ %pSrc, %arm_mean_f32_mve.exit ], [ %add.ptr, %do.body ] %21 = phi i32 [ %start2, %arm_mean_f32_mve.exit ], [ %27, %do.body ] - %pSrc.addr.01 = bitcast float* %pSrc.addr.0 to <4 x float>* + %pSrc.addr.01 = bitcast ptr %pSrc.addr.0 to ptr %22 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0) - %23 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pSrc.addr.01, i32 4, <4 x i1> %22, <4 x float> zeroinitializer) + %23 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %pSrc.addr.01, i32 4, <4 x i1> %22, <4 x float> zeroinitializer) %24 = tail call fast <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> %23, <4 x float> %20, <4 x i1> %22, <4 x float> undef) %25 = tail call fast <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> %24, <4 x float> %24, <4 x float> %sumVec.0, <4 x i1> %22) - %add.ptr = getelementptr inbounds float, float* %pSrc.addr.0, i32 4 + %add.ptr = getelementptr inbounds float, ptr %pSrc.addr.0, i32 4 %26 = add i32 %blkCnt.0, -4 %27 = call i32 @llvm.loop.decrement.reg.i32(i32 %21, i32 1) %28 = icmp ne i32 %27, 0 @@ -69,7 +69,7 @@ %sub2 = add i32 %blockSize, -1 %conv = uitofp i32 %sub2 to float %div = fdiv fast float %add2.i, %conv - store float %div, float* %pResult, align 4 + store float %div, ptr %pResult, align 4 ret void } @@ -83,7 +83,7 @@ declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1 ; Function Attrs: argmemonly nounwind readonly willreturn - declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) #2 + declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) #1 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-lr-terminator.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-lr-terminator.mir index 6f7a8cd..ae8acdd 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-lr-terminator.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-lr-terminator.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s --- | - define dso_local arm_aapcs_vfpcc void @start_before_elems(i32* noalias nocapture %a, i8* nocapture readonly %b, i8* nocapture readonly %c, i32 %N) local_unnamed_addr #0 { + define dso_local arm_aapcs_vfpcc void @start_before_elems(ptr noalias nocapture %a, ptr nocapture readonly %b, ptr nocapture readonly %c, i32 %N) local_unnamed_addr #0 { entry: %div = lshr i32 %N, 1 %cmp9 = icmp eq i32 %div, 0 @@ -18,25 +18,25 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %6 = phi i32 [ %start, %vector.ph ], [ %13, %vector.body ] %7 = phi i32 [ %div, %vector.ph ], [ %9, %vector.body ] - %lsr.iv1 = bitcast i32* %lsr.iv to <4 x i32>* + %lsr.iv1 = bitcast ptr %lsr.iv to ptr %8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7) %9 = sub i32 %7, 4 - %scevgep4 = getelementptr i8, i8* %b, i32 %index - %scevgep45 = bitcast i8* %scevgep4 to <4 x i8>* - %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %scevgep45, i32 1, <4 x i1> %8, <4 x i8> undef) + %scevgep4 = getelementptr i8, ptr %b, i32 %index + %scevgep45 = bitcast ptr %scevgep4 to ptr + %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %scevgep45, i32 1, <4 x i1> %8, <4 x i8> undef) %10 = zext <4 x i8> %wide.masked.load to <4 x i32> - %scevgep2 = getelementptr i8, i8* %c, i32 %index - %scevgep23 = bitcast i8* %scevgep2 to <4 x i8>* - %wide.masked.load13 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %scevgep23, i32 1, <4 x i1> %8, <4 x i8> undef) + %scevgep2 = getelementptr i8, ptr %c, i32 %index + %scevgep23 = bitcast ptr %scevgep2 to ptr + %wide.masked.load13 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %scevgep23, i32 1, <4 x i1> %8, <4 x i8> undef) %11 = zext <4 x i8> %wide.masked.load13 to <4 x i32> %12 = mul nuw nsw <4 x i32> %11, %10 - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %12, <4 x i32>* %lsr.iv1, i32 4, <4 x i1> %8) + call void @llvm.masked.store.v4i32.p0(<4 x i32> %12, ptr %lsr.iv1, i32 4, <4 x i1> %8) %index.next = add i32 %index, 4 - %scevgep = getelementptr i32, i32* %lsr.iv, i32 4 + %scevgep = getelementptr i32, ptr %lsr.iv, i32 4 %13 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1) %14 = icmp ne i32 %13, 0 br i1 %14, label %vector.body, label %for.cond.cleanup @@ -44,8 +44,8 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret void } - declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32 immarg, <4 x i1>, <4 x i8>) - declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) + declare <4 x i8> @llvm.masked.load.v4i8.p0(ptr, i32 immarg, <4 x i1>, <4 x i8>) + declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) declare i32 @llvm.start.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) declare <4 x i1> @llvm.arm.mve.vctp32(i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll index 9162d4a..bd461eb 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s -define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float* nocapture %pResult) { +define arm_aapcs_vfpcc void @arm_var_f32_mve(ptr %pSrc, i32 %blockSize, ptr nocapture %pResult) { ; CHECK-LABEL: arm_var_f32_mve: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -42,13 +42,13 @@ entry: do.body.i: ; preds = %entry, %do.body.i %blkCnt.0.i = phi i32 [ %sub.i, %do.body.i ], [ %blockSize, %entry ] %sumVec.0.i = phi <4 x float> [ %3, %do.body.i ], [ zeroinitializer, %entry ] - %pSrc.addr.0.i = phi float* [ %add.ptr.i, %do.body.i ], [ %pSrc, %entry ] + %pSrc.addr.0.i = phi ptr [ %add.ptr.i, %do.body.i ], [ %pSrc, %entry ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0.i) - %1 = bitcast float* %pSrc.addr.0.i to <4 x float>* - %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) + %1 = bitcast ptr %pSrc.addr.0.i to ptr + %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) %3 = tail call fast <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %sumVec.0.i, <4 x float> %2, <4 x i1> %0, <4 x float> %sumVec.0.i) %sub.i = add nsw i32 %blkCnt.0.i, -4 - %add.ptr.i = getelementptr inbounds float, float* %pSrc.addr.0.i, i32 4 + %add.ptr.i = getelementptr inbounds float, ptr %pSrc.addr.0.i, i32 4 %cmp.i = icmp sgt i32 %blkCnt.0.i, 4 br i1 %cmp.i, label %do.body.i, label %arm_mean_f32_mve.exit @@ -64,14 +64,14 @@ arm_mean_f32_mve.exit: ; preds = %do.body.i do.body: ; preds = %do.body, %arm_mean_f32_mve.exit %blkCnt.0 = phi i32 [ %blockSize, %arm_mean_f32_mve.exit ], [ %sub, %do.body ] %sumVec.0 = phi <4 x float> [ zeroinitializer, %arm_mean_f32_mve.exit ], [ %9, %do.body ] - %pSrc.addr.0 = phi float* [ %pSrc, %arm_mean_f32_mve.exit ], [ %add.ptr, %do.body ] + %pSrc.addr.0 = phi ptr [ %pSrc, %arm_mean_f32_mve.exit ], [ %add.ptr, %do.body ] %5 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0) - %6 = bitcast float* %pSrc.addr.0 to <4 x float>* - %7 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %6, i32 4, <4 x i1> %5, <4 x float> zeroinitializer) + %6 = bitcast ptr %pSrc.addr.0 to ptr + %7 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %6, i32 4, <4 x i1> %5, <4 x float> zeroinitializer) %8 = tail call fast <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> %7, <4 x float> %.splat, <4 x i1> %5, <4 x float> undef) %9 = tail call fast <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> %8, <4 x float> %8, <4 x float> %sumVec.0, <4 x i1> %5) %sub = add nsw i32 %blkCnt.0, -4 - %add.ptr = getelementptr inbounds float, float* %pSrc.addr.0, i32 4 + %add.ptr = getelementptr inbounds float, ptr %pSrc.addr.0, i32 4 %cmp1 = icmp sgt i32 %blkCnt.0, 4 br i1 %cmp1, label %do.body, label %do.end @@ -84,7 +84,7 @@ do.end: ; preds = %do.body br label %cleanup cleanup: ; preds = %entry, %do.end - store float %div, float* %pResult, align 4 + store float %div, ptr %pResult, align 4 ret void } @@ -94,7 +94,7 @@ declare <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float>, <4 x fl declare <4 x i1> @llvm.arm.mve.vctp32(i32) -declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) +declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>) declare <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir index 08353c8..e9f0dbe 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-def-before-start.mir @@ -5,7 +5,7 @@ # that the correct value is used for the dlstp. --- | - define dso_local arm_aapcs_vfpcc void @start_before_elems(i32* noalias nocapture %a, i8* nocapture readonly %b, i8* nocapture readonly %c, i32 %N) local_unnamed_addr #0 { + define dso_local arm_aapcs_vfpcc void @start_before_elems(ptr noalias nocapture %a, ptr nocapture readonly %b, ptr nocapture readonly %c, i32 %N) local_unnamed_addr #0 { entry: %div = lshr i32 %N, 1 %cmp9 = icmp eq i32 %div, 0 @@ -22,25 +22,25 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %6 = phi i32 [ %start, %vector.ph ], [ %13, %vector.body ] %7 = phi i32 [ %div, %vector.ph ], [ %9, %vector.body ] - %lsr.iv1 = bitcast i32* %lsr.iv to <4 x i32>* + %lsr.iv1 = bitcast ptr %lsr.iv to ptr %8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7) %9 = sub i32 %7, 4 - %scevgep4 = getelementptr i8, i8* %b, i32 %index - %scevgep45 = bitcast i8* %scevgep4 to <4 x i8>* - %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %scevgep45, i32 1, <4 x i1> %8, <4 x i8> undef) + %scevgep4 = getelementptr i8, ptr %b, i32 %index + %scevgep45 = bitcast ptr %scevgep4 to ptr + %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %scevgep45, i32 1, <4 x i1> %8, <4 x i8> undef) %10 = zext <4 x i8> %wide.masked.load to <4 x i32> - %scevgep2 = getelementptr i8, i8* %c, i32 %index - %scevgep23 = bitcast i8* %scevgep2 to <4 x i8>* - %wide.masked.load13 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %scevgep23, i32 1, <4 x i1> %8, <4 x i8> undef) + %scevgep2 = getelementptr i8, ptr %c, i32 %index + %scevgep23 = bitcast ptr %scevgep2 to ptr + %wide.masked.load13 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %scevgep23, i32 1, <4 x i1> %8, <4 x i8> undef) %11 = zext <4 x i8> %wide.masked.load13 to <4 x i32> %12 = mul nuw nsw <4 x i32> %11, %10 - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %12, <4 x i32>* %lsr.iv1, i32 4, <4 x i1> %8) + call void @llvm.masked.store.v4i32.p0(<4 x i32> %12, ptr %lsr.iv1, i32 4, <4 x i1> %8) %index.next = add i32 %index, 4 - %scevgep = getelementptr i32, i32* %lsr.iv, i32 4 + %scevgep = getelementptr i32, ptr %lsr.iv, i32 4 %13 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1) %14 = icmp ne i32 %13, 0 br i1 %14, label %vector.body, label %for.cond.cleanup @@ -48,8 +48,8 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret void } - declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32 immarg, <4 x i1>, <4 x i8>) #1 - declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #2 + declare <4 x i8> @llvm.masked.load.v4i8.p0(ptr, i32 immarg, <4 x i1>, <4 x i8>) #1 + declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) #2 declare i32 @llvm.start.loop.iterations.i32(i32) #3 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3 declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir index 26b8879..fef4daf 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir @@ -5,7 +5,7 @@ # that the correct value is used for the dlstp. --- | - define dso_local arm_aapcs_vfpcc void @start_before_elems(i32* noalias nocapture %a, i8* nocapture readonly %b, i8* nocapture readonly %c, i32 %N) local_unnamed_addr #0 { + define dso_local arm_aapcs_vfpcc void @start_before_elems(ptr noalias nocapture %a, ptr nocapture readonly %b, ptr nocapture readonly %c, i32 %N) local_unnamed_addr #0 { entry: %div = lshr i32 %N, 1 %cmp9 = icmp eq i32 %div, 0 @@ -22,25 +22,25 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %6 = phi i32 [ %start, %vector.ph ], [ %13, %vector.body ] %7 = phi i32 [ %div, %vector.ph ], [ %9, %vector.body ] - %lsr.iv1 = bitcast i32* %lsr.iv to <4 x i32>* + %lsr.iv1 = bitcast ptr %lsr.iv to ptr %8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7) %9 = sub i32 %7, 4 - %scevgep4 = getelementptr i8, i8* %b, i32 %index - %scevgep45 = bitcast i8* %scevgep4 to <4 x i8>* - %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %scevgep45, i32 1, <4 x i1> %8, <4 x i8> undef) + %scevgep4 = getelementptr i8, ptr %b, i32 %index + %scevgep45 = bitcast ptr %scevgep4 to ptr + %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %scevgep45, i32 1, <4 x i1> %8, <4 x i8> undef) %10 = zext <4 x i8> %wide.masked.load to <4 x i32> - %scevgep2 = getelementptr i8, i8* %c, i32 %index - %scevgep23 = bitcast i8* %scevgep2 to <4 x i8>* - %wide.masked.load13 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %scevgep23, i32 1, <4 x i1> %8, <4 x i8> undef) + %scevgep2 = getelementptr i8, ptr %c, i32 %index + %scevgep23 = bitcast ptr %scevgep2 to ptr + %wide.masked.load13 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %scevgep23, i32 1, <4 x i1> %8, <4 x i8> undef) %11 = zext <4 x i8> %wide.masked.load13 to <4 x i32> %12 = mul nuw nsw <4 x i32> %11, %10 - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %12, <4 x i32>* %lsr.iv1, i32 4, <4 x i1> %8) + call void @llvm.masked.store.v4i32.p0(<4 x i32> %12, ptr %lsr.iv1, i32 4, <4 x i1> %8) %index.next = add i32 %index, 4 - %scevgep = getelementptr i32, i32* %lsr.iv, i32 4 + %scevgep = getelementptr i32, ptr %lsr.iv, i32 4 %13 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1) %14 = icmp ne i32 %13, 0 br i1 %14, label %vector.body, label %for.cond.cleanup @@ -48,8 +48,8 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret void } - declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32 immarg, <4 x i1>, <4 x i8>) #1 - declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #2 + declare <4 x i8> @llvm.masked.load.v4i8.p0(ptr, i32 immarg, <4 x i1>, <4 x i8>) #1 + declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) #2 declare i32 @llvm.start.loop.iterations.i32(i32) #3 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3 declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multi-block-cond-iter-count.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multi-block-cond-iter-count.mir index f17496c..6b16683b 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multi-block-cond-iter-count.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multi-block-cond-iter-count.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s --- | - define dso_local arm_aapcs_vfpcc void @multi_cond_iter_count(i32* nocapture %0, i32* nocapture readonly %1, i32 %2, i32 %3) local_unnamed_addr #0 { + define dso_local arm_aapcs_vfpcc void @multi_cond_iter_count(ptr nocapture %0, ptr nocapture readonly %1, i32 %2, i32 %3) local_unnamed_addr #0 { %5 = icmp eq i32 %3, 2 %6 = select i1 %5, i32 2, i32 4 %7 = icmp eq i32 %3, 4 @@ -12,10 +12,10 @@ br i1 %10, label %64, label %11 11: ; preds = %4 - %12 = getelementptr i32, i32* %0, i32 %9 - %13 = getelementptr i32, i32* %1, i32 %9 - %14 = icmp ugt i32* %13, %0 - %15 = icmp ugt i32* %12, %1 + %12 = getelementptr i32, ptr %0, i32 %9 + %13 = getelementptr i32, ptr %1, i32 %9 + %14 = icmp ugt ptr %13, %0 + %15 = icmp ugt ptr %12, %1 %16 = and i1 %14, %15 %17 = add i32 %9, 3 %18 = lshr i32 %17, 2 @@ -44,20 +44,20 @@ br label %33 33: ; preds = %33, %32 - %34 = phi i32* [ %46, %33 ], [ %0, %32 ] - %35 = phi i32* [ %45, %33 ], [ %1, %32 ] + %34 = phi ptr [ %46, %33 ], [ %0, %32 ] + %35 = phi ptr [ %45, %33 ], [ %1, %32 ] %36 = phi i32 [ %start2, %32 ], [ %47, %33 ] %37 = phi i32 [ %9, %32 ], [ %41, %33 ] - %38 = bitcast i32* %34 to <4 x i32>* - %39 = bitcast i32* %35 to <4 x i32>* + %38 = bitcast ptr %34 to ptr + %39 = bitcast ptr %35 to ptr %40 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %37) %41 = sub i32 %37, 4 - %42 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %39, i32 4, <4 x i1> %40, <4 x i32> undef) - %43 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %38, i32 4, <4 x i1> %40, <4 x i32> undef) + %42 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %39, i32 4, <4 x i1> %40, <4 x i32> undef) + %43 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %38, i32 4, <4 x i1> %40, <4 x i32> undef) %44 = mul nsw <4 x i32> %43, %42 - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %44, <4 x i32>* %38, i32 4, <4 x i1> %40) - %45 = getelementptr i32, i32* %35, i32 4 - %46 = getelementptr i32, i32* %34, i32 4 + call void @llvm.masked.store.v4i32.p0(<4 x i32> %44, ptr %38, i32 4, <4 x i1> %40) + %45 = getelementptr i32, ptr %35, i32 4 + %46 = getelementptr i32, ptr %34, i32 4 %47 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %36, i32 1) %48 = icmp ne i32 %47, 0 br i1 %48, label %33, label %64 @@ -68,19 +68,19 @@ br i1 %51, label %64, label %52 52: ; preds = %49 - %53 = getelementptr inbounds i32, i32* %1, i32 %50 - %54 = load i32, i32* %53, align 4 - %55 = getelementptr inbounds i32, i32* %0, i32 %50 - %56 = load i32, i32* %55, align 4 + %53 = getelementptr inbounds i32, ptr %1, i32 %50 + %54 = load i32, ptr %53, align 4 + %55 = getelementptr inbounds i32, ptr %0, i32 %50 + %56 = load i32, ptr %55, align 4 %57 = mul nsw i32 %56, %54 - store i32 %57, i32* %55, align 4 + store i32 %57, ptr %55, align 4 %58 = add nuw i32 %50, 1 - %59 = getelementptr inbounds i32, i32* %1, i32 %58 - %60 = load i32, i32* %59, align 4 - %61 = getelementptr inbounds i32, i32* %0, i32 %58 - %62 = load i32, i32* %61, align 4 + %59 = getelementptr inbounds i32, ptr %1, i32 %58 + %60 = load i32, ptr %59, align 4 + %61 = getelementptr inbounds i32, ptr %0, i32 %58 + %62 = load i32, ptr %61, align 4 %63 = mul nsw i32 %62, %60 - store i32 %63, i32* %61, align 4 + store i32 %63, ptr %61, align 4 br label %64 64: ; preds = %33, %52, %49, %4 @@ -90,48 +90,48 @@ %66 = phi i32 [ %108, %65 ], [ 0, %31 ] %67 = phi i32 [ 0, %31 ], [ %107, %65 ] %68 = phi i32 [ %start1, %31 ], [ %109, %65 ] - %69 = bitcast i32* %0 to i8* - %70 = bitcast i32* %1 to i8* - %71 = getelementptr i8, i8* %70, i32 %66 - %72 = bitcast i8* %71 to i32* - %73 = bitcast i32* %72 to i32* - %74 = load i32, i32* %73, align 4 - %75 = getelementptr i8, i8* %69, i32 %66 - %76 = bitcast i8* %75 to i32* - %77 = bitcast i32* %76 to i32* - %78 = load i32, i32* %77, align 4 + %69 = bitcast ptr %0 to ptr + %70 = bitcast ptr %1 to ptr + %71 = getelementptr i8, ptr %70, i32 %66 + %72 = bitcast ptr %71 to ptr + %73 = bitcast ptr %72 to ptr + %74 = load i32, ptr %73, align 4 + %75 = getelementptr i8, ptr %69, i32 %66 + %76 = bitcast ptr %75 to ptr + %77 = bitcast ptr %76 to ptr + %78 = load i32, ptr %77, align 4 %79 = mul nsw i32 %78, %74 - store i32 %79, i32* %77, align 4 - %80 = getelementptr i8, i8* %70, i32 %66 - %81 = bitcast i8* %80 to i32* - %82 = getelementptr i32, i32* %81, i32 1 - %83 = load i32, i32* %82, align 4 - %84 = getelementptr i8, i8* %69, i32 %66 - %85 = bitcast i8* %84 to i32* - %86 = getelementptr i32, i32* %85, i32 1 - %87 = load i32, i32* %86, align 4 + store i32 %79, ptr %77, align 4 + %80 = getelementptr i8, ptr %70, i32 %66 + %81 = bitcast ptr %80 to ptr + %82 = getelementptr i32, ptr %81, i32 1 + %83 = load i32, ptr %82, align 4 + %84 = getelementptr i8, ptr %69, i32 %66 + %85 = bitcast ptr %84 to ptr + %86 = getelementptr i32, ptr %85, i32 1 + %87 = load i32, ptr %86, align 4 %88 = mul nsw i32 %87, %83 - store i32 %88, i32* %86, align 4 - %89 = getelementptr i8, i8* %70, i32 %66 - %90 = bitcast i8* %89 to i32* - %91 = getelementptr i32, i32* %90, i32 2 - %92 = load i32, i32* %91, align 4 - %93 = getelementptr i8, i8* %69, i32 %66 - %94 = bitcast i8* %93 to i32* - %95 = getelementptr i32, i32* %94, i32 2 - %96 = load i32, i32* %95, align 4 + store i32 %88, ptr %86, align 4 + %89 = getelementptr i8, ptr %70, i32 %66 + %90 = bitcast ptr %89 to ptr + %91 = getelementptr i32, ptr %90, i32 2 + %92 = load i32, ptr %91, align 4 + %93 = getelementptr i8, ptr %69, i32 %66 + %94 = bitcast ptr %93 to ptr + %95 = getelementptr i32, ptr %94, i32 2 + %96 = load i32, ptr %95, align 4 %97 = mul nsw i32 %96, %92 - store i32 %97, i32* %95, align 4 - %98 = getelementptr i8, i8* %70, i32 %66 - %99 = bitcast i8* %98 to i32* - %100 = getelementptr i32, i32* %99, i32 3 - %101 = load i32, i32* %100, align 4 - %102 = getelementptr i8, i8* %69, i32 %66 - %103 = bitcast i8* %102 to i32* - %104 = getelementptr i32, i32* %103, i32 3 - %105 = load i32, i32* %104, align 4 + store i32 %97, ptr %95, align 4 + %98 = getelementptr i8, ptr %70, i32 %66 + %99 = bitcast ptr %98 to ptr + %100 = getelementptr i32, ptr %99, i32 3 + %101 = load i32, ptr %100, align 4 + %102 = getelementptr i8, ptr %69, i32 %66 + %103 = bitcast ptr %102 to ptr + %104 = getelementptr i32, ptr %103, i32 3 + %105 = load i32, ptr %104, align 4 %106 = mul nsw i32 %105, %101 - store i32 %106, i32* %104, align 4 + store i32 %106, ptr %104, align 4 %107 = add nuw i32 %67, 4 %108 = add i32 %66, 16 %109 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %68, i32 1) @@ -139,8 +139,8 @@ br i1 %110, label %65, label %49 } - declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #1 - declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #2 + declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) #1 + declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) #2 declare i32 @llvm.start.loop.iterations.i32(i32) #3 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3 declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multi-cond-iter-count.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multi-cond-iter-count.mir index 5ce9a63..a530c1e 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multi-cond-iter-count.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multi-cond-iter-count.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s --- | - define dso_local arm_aapcs_vfpcc void @multi_cond_iter_count(i32* noalias nocapture %0, i32* nocapture readonly %1, i32 %2, i32 %3) { + define dso_local arm_aapcs_vfpcc void @multi_cond_iter_count(ptr noalias nocapture %0, ptr nocapture readonly %1, i32 %2, i32 %3) { %5 = icmp eq i32 %3, 2 %6 = select i1 %5, i32 2, i32 4 %7 = icmp eq i32 %3, 4 @@ -22,20 +22,20 @@ br label %18 18: ; preds = %18, %17 - %19 = phi i32* [ %31, %18 ], [ %0, %17 ] - %20 = phi i32* [ %30, %18 ], [ %1, %17 ] + %19 = phi ptr [ %31, %18 ], [ %0, %17 ] + %20 = phi ptr [ %30, %18 ], [ %1, %17 ] %21 = phi i32 [ %start, %17 ], [ %32, %18 ] %22 = phi i32 [ %9, %17 ], [ %26, %18 ] - %23 = bitcast i32* %19 to <4 x i32>* - %24 = bitcast i32* %20 to <4 x i32>* + %23 = bitcast ptr %19 to ptr + %24 = bitcast ptr %20 to ptr %25 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %22) %26 = sub i32 %22, 4 - %27 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %24, i32 4, <4 x i1> %25, <4 x i32> undef) - %28 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %23, i32 4, <4 x i1> %25, <4 x i32> undef) + %27 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %24, i32 4, <4 x i1> %25, <4 x i32> undef) + %28 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %23, i32 4, <4 x i1> %25, <4 x i32> undef) %29 = mul nsw <4 x i32> %28, %27 - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %29, <4 x i32>* %23, i32 4, <4 x i1> %25) - %30 = getelementptr i32, i32* %20, i32 4 - %31 = getelementptr i32, i32* %19, i32 4 + call void @llvm.masked.store.v4i32.p0(<4 x i32> %29, ptr %23, i32 4, <4 x i1> %25) + %30 = getelementptr i32, ptr %20, i32 4 + %31 = getelementptr i32, ptr %19, i32 4 %32 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %21, i32 1) %33 = icmp ne i32 %32, 0 br i1 %33, label %18, label %34 @@ -43,8 +43,8 @@ 34: ; preds = %18, %4 ret void } - declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) - declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) + declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) + declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) declare i32 @llvm.start.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) declare <4 x i1> @llvm.arm.mve.vctp32(i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multiple-do-loops.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multiple-do-loops.mir index 6e8ad08..c2f8cc0 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multiple-do-loops.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/multiple-do-loops.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s --- | - define dso_local arm_aapcs_vfpcc void @test1(i32* noalias nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) local_unnamed_addr { + define dso_local arm_aapcs_vfpcc void @test1(ptr noalias nocapture %a, ptr nocapture readonly %b, ptr nocapture readonly %c, i32 %N) local_unnamed_addr { entry: %cmp30 = icmp eq i32 %N, 0 %0 = add i32 %N, 3 @@ -18,23 +18,23 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv68 = phi i32* [ %scevgep69, %vector.body ], [ %a, %vector.ph ] - %lsr.iv65 = phi i32* [ %scevgep66, %vector.body ], [ %c, %vector.ph ] - %lsr.iv62 = phi i32* [ %scevgep63, %vector.body ], [ %b, %vector.ph ] + %lsr.iv68 = phi ptr [ %scevgep69, %vector.body ], [ %a, %vector.ph ] + %lsr.iv65 = phi ptr [ %scevgep66, %vector.body ], [ %c, %vector.ph ] + %lsr.iv62 = phi ptr [ %scevgep63, %vector.body ], [ %b, %vector.ph ] %6 = phi i32 [ %start1, %vector.ph ], [ %11, %vector.body ] %7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ] - %lsr.iv6870 = bitcast i32* %lsr.iv68 to <4 x i32>* - %lsr.iv6567 = bitcast i32* %lsr.iv65 to <4 x i32>* - %lsr.iv6264 = bitcast i32* %lsr.iv62 to <4 x i32>* + %lsr.iv6870 = bitcast ptr %lsr.iv68 to ptr + %lsr.iv6567 = bitcast ptr %lsr.iv65 to ptr + %lsr.iv6264 = bitcast ptr %lsr.iv62 to ptr %8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7) %9 = sub i32 %7, 4 - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv6264, i32 4, <4 x i1> %8, <4 x i32> undef) - %wide.masked.load35 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv6567, i32 4, <4 x i1> %8, <4 x i32> undef) + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv6264, i32 4, <4 x i1> %8, <4 x i32> undef) + %wide.masked.load35 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv6567, i32 4, <4 x i1> %8, <4 x i32> undef) %10 = mul nsw <4 x i32> %wide.masked.load35, %wide.masked.load - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %10, <4 x i32>* %lsr.iv6870, i32 4, <4 x i1> %8) - %scevgep63 = getelementptr i32, i32* %lsr.iv62, i32 4 - %scevgep66 = getelementptr i32, i32* %lsr.iv65, i32 4 - %scevgep69 = getelementptr i32, i32* %lsr.iv68, i32 4 + call void @llvm.masked.store.v4i32.p0(<4 x i32> %10, ptr %lsr.iv6870, i32 4, <4 x i1> %8) + %scevgep63 = getelementptr i32, ptr %lsr.iv62, i32 4 + %scevgep66 = getelementptr i32, ptr %lsr.iv65, i32 4 + %scevgep69 = getelementptr i32, ptr %lsr.iv68, i32 4 %11 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1) %12 = icmp ne i32 %11, 0 br i1 %12, label %vector.body, label %for.cond4.preheader @@ -54,25 +54,25 @@ br label %vector.body38 vector.body38: ; preds = %vector.body38, %vector.ph39 - %lsr.iv59 = phi i32* [ %scevgep60, %vector.body38 ], [ %a, %vector.ph39 ] - %lsr.iv56 = phi i32* [ %scevgep57, %vector.body38 ], [ %c, %vector.ph39 ] - %lsr.iv = phi i32* [ %scevgep, %vector.body38 ], [ %b, %vector.ph39 ] + %lsr.iv59 = phi ptr [ %scevgep60, %vector.body38 ], [ %a, %vector.ph39 ] + %lsr.iv56 = phi ptr [ %scevgep57, %vector.body38 ], [ %c, %vector.ph39 ] + %lsr.iv = phi ptr [ %scevgep, %vector.body38 ], [ %b, %vector.ph39 ] %20 = phi i32 [ %start2, %vector.ph39 ], [ %26, %vector.body38 ] %21 = phi i32 [ %N, %vector.ph39 ], [ %23, %vector.body38 ] - %lsr.iv5961 = bitcast i32* %lsr.iv59 to <4 x i32>* - %lsr.iv5658 = bitcast i32* %lsr.iv56 to <4 x i32>* - %lsr.iv55 = bitcast i32* %lsr.iv to <4 x i32>* + %lsr.iv5961 = bitcast ptr %lsr.iv59 to ptr + %lsr.iv5658 = bitcast ptr %lsr.iv56 to ptr + %lsr.iv55 = bitcast ptr %lsr.iv to ptr %22 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %21) %23 = sub i32 %21, 4 - %wide.masked.load52 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv55, i32 4, <4 x i1> %22, <4 x i32> undef) - %wide.masked.load53 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv5658, i32 4, <4 x i1> %22, <4 x i32> undef) + %wide.masked.load52 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv55, i32 4, <4 x i1> %22, <4 x i32> undef) + %wide.masked.load53 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv5658, i32 4, <4 x i1> %22, <4 x i32> undef) %24 = xor <4 x i32> %wide.masked.load53, %wide.masked.load52 - %wide.masked.load54 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv5961, i32 4, <4 x i1> %22, <4 x i32> undef) + %wide.masked.load54 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv5961, i32 4, <4 x i1> %22, <4 x i32> undef) %25 = add nsw <4 x i32> %wide.masked.load54, %24 - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %25, <4 x i32>* %lsr.iv5961, i32 4, <4 x i1> %22) - %scevgep = getelementptr i32, i32* %lsr.iv, i32 4 - %scevgep57 = getelementptr i32, i32* %lsr.iv56, i32 4 - %scevgep60 = getelementptr i32, i32* %lsr.iv59, i32 4 + call void @llvm.masked.store.v4i32.p0(<4 x i32> %25, ptr %lsr.iv5961, i32 4, <4 x i1> %22) + %scevgep = getelementptr i32, ptr %lsr.iv, i32 4 + %scevgep57 = getelementptr i32, ptr %lsr.iv56, i32 4 + %scevgep60 = getelementptr i32, ptr %lsr.iv59, i32 4 %26 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %20, i32 1) %27 = icmp ne i32 %26, 0 br i1 %27, label %vector.body38, label %for.cond.cleanup6 @@ -81,7 +81,7 @@ ret void } ; Function Attrs: nofree norecurse nounwind - define dso_local arm_aapcs_vfpcc void @test2(i32* noalias nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) local_unnamed_addr { + define dso_local arm_aapcs_vfpcc void @test2(ptr noalias nocapture %a, ptr nocapture readonly %b, ptr nocapture readonly %c, i32 %N) local_unnamed_addr { entry: %div = lshr i32 %N, 1 %cmp30 = icmp eq i32 %div, 0 @@ -98,23 +98,23 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv68 = phi i32* [ %scevgep69, %vector.body ], [ %a, %vector.ph ] - %lsr.iv65 = phi i32* [ %scevgep66, %vector.body ], [ %c, %vector.ph ] - %lsr.iv62 = phi i32* [ %scevgep63, %vector.body ], [ %b, %vector.ph ] + %lsr.iv68 = phi ptr [ %scevgep69, %vector.body ], [ %a, %vector.ph ] + %lsr.iv65 = phi ptr [ %scevgep66, %vector.body ], [ %c, %vector.ph ] + %lsr.iv62 = phi ptr [ %scevgep63, %vector.body ], [ %b, %vector.ph ] %6 = phi i32 [ %start1, %vector.ph ], [ %11, %vector.body ] %7 = phi i32 [ %div, %vector.ph ], [ %9, %vector.body ] - %lsr.iv6870 = bitcast i32* %lsr.iv68 to <4 x i32>* - %lsr.iv6567 = bitcast i32* %lsr.iv65 to <4 x i32>* - %lsr.iv6264 = bitcast i32* %lsr.iv62 to <4 x i32>* + %lsr.iv6870 = bitcast ptr %lsr.iv68 to ptr + %lsr.iv6567 = bitcast ptr %lsr.iv65 to ptr + %lsr.iv6264 = bitcast ptr %lsr.iv62 to ptr %8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7) %9 = sub i32 %7, 4 - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv6264, i32 4, <4 x i1> %8, <4 x i32> undef) - %wide.masked.load35 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv6567, i32 4, <4 x i1> %8, <4 x i32> undef) + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv6264, i32 4, <4 x i1> %8, <4 x i32> undef) + %wide.masked.load35 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv6567, i32 4, <4 x i1> %8, <4 x i32> undef) %10 = mul nsw <4 x i32> %wide.masked.load35, %wide.masked.load - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %10, <4 x i32>* %lsr.iv6870, i32 4, <4 x i1> %8) - %scevgep63 = getelementptr i32, i32* %lsr.iv62, i32 4 - %scevgep66 = getelementptr i32, i32* %lsr.iv65, i32 4 - %scevgep69 = getelementptr i32, i32* %lsr.iv68, i32 4 + call void @llvm.masked.store.v4i32.p0(<4 x i32> %10, ptr %lsr.iv6870, i32 4, <4 x i1> %8) + %scevgep63 = getelementptr i32, ptr %lsr.iv62, i32 4 + %scevgep66 = getelementptr i32, ptr %lsr.iv65, i32 4 + %scevgep69 = getelementptr i32, ptr %lsr.iv68, i32 4 %11 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1) %12 = icmp ne i32 %11, 0 br i1 %12, label %vector.body, label %for.cond4.preheader @@ -134,25 +134,25 @@ br label %vector.body38 vector.body38: ; preds = %vector.body38, %vector.ph39 - %lsr.iv59 = phi i32* [ %scevgep60, %vector.body38 ], [ %a, %vector.ph39 ] - %lsr.iv56 = phi i32* [ %scevgep57, %vector.body38 ], [ %c, %vector.ph39 ] - %lsr.iv = phi i32* [ %scevgep, %vector.body38 ], [ %b, %vector.ph39 ] + %lsr.iv59 = phi ptr [ %scevgep60, %vector.body38 ], [ %a, %vector.ph39 ] + %lsr.iv56 = phi ptr [ %scevgep57, %vector.body38 ], [ %c, %vector.ph39 ] + %lsr.iv = phi ptr [ %scevgep, %vector.body38 ], [ %b, %vector.ph39 ] %19 = phi i32 [ %start2, %vector.ph39 ], [ %25, %vector.body38 ] %20 = phi i32 [ %N, %vector.ph39 ], [ %22, %vector.body38 ] - %lsr.iv5961 = bitcast i32* %lsr.iv59 to <4 x i32>* - %lsr.iv5658 = bitcast i32* %lsr.iv56 to <4 x i32>* - %lsr.iv55 = bitcast i32* %lsr.iv to <4 x i32>* + %lsr.iv5961 = bitcast ptr %lsr.iv59 to ptr + %lsr.iv5658 = bitcast ptr %lsr.iv56 to ptr + %lsr.iv55 = bitcast ptr %lsr.iv to ptr %21 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %20) %22 = sub i32 %20, 4 - %wide.masked.load52 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv55, i32 4, <4 x i1> %21, <4 x i32> undef) - %wide.masked.load53 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv5658, i32 4, <4 x i1> %21, <4 x i32> undef) + %wide.masked.load52 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv55, i32 4, <4 x i1> %21, <4 x i32> undef) + %wide.masked.load53 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv5658, i32 4, <4 x i1> %21, <4 x i32> undef) %23 = xor <4 x i32> %wide.masked.load53, %wide.masked.load52 - %wide.masked.load54 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv5961, i32 4, <4 x i1> %21, <4 x i32> undef) + %wide.masked.load54 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv5961, i32 4, <4 x i1> %21, <4 x i32> undef) %24 = add nsw <4 x i32> %wide.masked.load54, %23 - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %24, <4 x i32>* %lsr.iv5961, i32 4, <4 x i1> %21) - %scevgep = getelementptr i32, i32* %lsr.iv, i32 4 - %scevgep57 = getelementptr i32, i32* %lsr.iv56, i32 4 - %scevgep60 = getelementptr i32, i32* %lsr.iv59, i32 4 + call void @llvm.masked.store.v4i32.p0(<4 x i32> %24, ptr %lsr.iv5961, i32 4, <4 x i1> %21) + %scevgep = getelementptr i32, ptr %lsr.iv, i32 4 + %scevgep57 = getelementptr i32, ptr %lsr.iv56, i32 4 + %scevgep60 = getelementptr i32, ptr %lsr.iv59, i32 4 %25 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %19, i32 1) %26 = icmp ne i32 %25, 0 br i1 %26, label %vector.body38, label %for.cond.cleanup6 @@ -161,7 +161,7 @@ ret void } ; Function Attrs: nofree norecurse nounwind - define dso_local arm_aapcs_vfpcc void @test3(i32* noalias nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) local_unnamed_addr { + define dso_local arm_aapcs_vfpcc void @test3(ptr noalias nocapture %a, ptr nocapture readonly %b, ptr nocapture readonly %c, i32 %N) local_unnamed_addr { entry: %cmp54 = icmp eq i32 %N, 0 %0 = add i32 %N, 3 @@ -177,23 +177,23 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv123 = phi i32* [ %scevgep124, %vector.body ], [ %a, %vector.ph ] - %lsr.iv120 = phi i32* [ %scevgep121, %vector.body ], [ %c, %vector.ph ] - %lsr.iv117 = phi i32* [ %scevgep118, %vector.body ], [ %b, %vector.ph ] + %lsr.iv123 = phi ptr [ %scevgep124, %vector.body ], [ %a, %vector.ph ] + %lsr.iv120 = phi ptr [ %scevgep121, %vector.body ], [ %c, %vector.ph ] + %lsr.iv117 = phi ptr [ %scevgep118, %vector.body ], [ %b, %vector.ph ] %6 = phi i32 [ %start1, %vector.ph ], [ %11, %vector.body ] %7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ] - %lsr.iv123125 = bitcast i32* %lsr.iv123 to <4 x i32>* - %lsr.iv120122 = bitcast i32* %lsr.iv120 to <4 x i32>* - %lsr.iv117119 = bitcast i32* %lsr.iv117 to <4 x i32>* + %lsr.iv123125 = bitcast ptr %lsr.iv123 to ptr + %lsr.iv120122 = bitcast ptr %lsr.iv120 to ptr + %lsr.iv117119 = bitcast ptr %lsr.iv117 to ptr %8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7) %9 = sub i32 %7, 4 - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv117119, i32 4, <4 x i1> %8, <4 x i32> undef) - %wide.masked.load62 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv120122, i32 4, <4 x i1> %8, <4 x i32> undef) + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv117119, i32 4, <4 x i1> %8, <4 x i32> undef) + %wide.masked.load62 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv120122, i32 4, <4 x i1> %8, <4 x i32> undef) %10 = mul nsw <4 x i32> %wide.masked.load62, %wide.masked.load - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %10, <4 x i32>* %lsr.iv123125, i32 4, <4 x i1> %8) - %scevgep118 = getelementptr i32, i32* %lsr.iv117, i32 4 - %scevgep121 = getelementptr i32, i32* %lsr.iv120, i32 4 - %scevgep124 = getelementptr i32, i32* %lsr.iv123, i32 4 + call void @llvm.masked.store.v4i32.p0(<4 x i32> %10, ptr %lsr.iv123125, i32 4, <4 x i1> %8) + %scevgep118 = getelementptr i32, ptr %lsr.iv117, i32 4 + %scevgep121 = getelementptr i32, ptr %lsr.iv120, i32 4 + %scevgep124 = getelementptr i32, ptr %lsr.iv123, i32 4 %11 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1) %12 = icmp ne i32 %11, 0 br i1 %12, label %vector.body, label %for.cond4.preheader @@ -214,25 +214,25 @@ br label %vector.body65 vector.body65: ; preds = %vector.body65, %vector.ph66 - %lsr.iv114 = phi i32* [ %scevgep115, %vector.body65 ], [ %a, %vector.ph66 ] - %lsr.iv111 = phi i32* [ %scevgep112, %vector.body65 ], [ %c, %vector.ph66 ] - %lsr.iv108 = phi i32* [ %scevgep109, %vector.body65 ], [ %b, %vector.ph66 ] + %lsr.iv114 = phi ptr [ %scevgep115, %vector.body65 ], [ %a, %vector.ph66 ] + %lsr.iv111 = phi ptr [ %scevgep112, %vector.body65 ], [ %c, %vector.ph66 ] + %lsr.iv108 = phi ptr [ %scevgep109, %vector.body65 ], [ %b, %vector.ph66 ] %19 = phi i32 [ %start2, %vector.ph66 ], [ %25, %vector.body65 ] %20 = phi i32 [ %div, %vector.ph66 ], [ %22, %vector.body65 ] - %lsr.iv114116 = bitcast i32* %lsr.iv114 to <4 x i32>* - %lsr.iv111113 = bitcast i32* %lsr.iv111 to <4 x i32>* - %lsr.iv108110 = bitcast i32* %lsr.iv108 to <4 x i32>* + %lsr.iv114116 = bitcast ptr %lsr.iv114 to ptr + %lsr.iv111113 = bitcast ptr %lsr.iv111 to ptr + %lsr.iv108110 = bitcast ptr %lsr.iv108 to ptr %21 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %20) %22 = sub i32 %20, 4 - %wide.masked.load79 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv108110, i32 4, <4 x i1> %21, <4 x i32> undef) - %wide.masked.load80 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv111113, i32 4, <4 x i1> %21, <4 x i32> undef) + %wide.masked.load79 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv108110, i32 4, <4 x i1> %21, <4 x i32> undef) + %wide.masked.load80 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv111113, i32 4, <4 x i1> %21, <4 x i32> undef) %23 = xor <4 x i32> %wide.masked.load80, %wide.masked.load79 - %wide.masked.load81 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv114116, i32 4, <4 x i1> %21, <4 x i32> undef) + %wide.masked.load81 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv114116, i32 4, <4 x i1> %21, <4 x i32> undef) %24 = add nsw <4 x i32> %wide.masked.load81, %23 - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %24, <4 x i32>* %lsr.iv114116, i32 4, <4 x i1> %21) - %scevgep109 = getelementptr i32, i32* %lsr.iv108, i32 4 - %scevgep112 = getelementptr i32, i32* %lsr.iv111, i32 4 - %scevgep115 = getelementptr i32, i32* %lsr.iv114, i32 4 + call void @llvm.masked.store.v4i32.p0(<4 x i32> %24, ptr %lsr.iv114116, i32 4, <4 x i1> %21) + %scevgep109 = getelementptr i32, ptr %lsr.iv108, i32 4 + %scevgep112 = getelementptr i32, ptr %lsr.iv111, i32 4 + %scevgep115 = getelementptr i32, ptr %lsr.iv114, i32 4 %25 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %19, i32 1) %26 = icmp ne i32 %25, 0 br i1 %26, label %vector.body65, label %for.cond15.preheader @@ -252,25 +252,25 @@ br label %vector.body84 vector.body84: ; preds = %vector.body84, %vector.ph85 - %lsr.iv105 = phi i32* [ %scevgep106, %vector.body84 ], [ %a, %vector.ph85 ] - %lsr.iv102 = phi i32* [ %scevgep103, %vector.body84 ], [ %c, %vector.ph85 ] - %lsr.iv = phi i32* [ %scevgep, %vector.body84 ], [ %b, %vector.ph85 ] + %lsr.iv105 = phi ptr [ %scevgep106, %vector.body84 ], [ %a, %vector.ph85 ] + %lsr.iv102 = phi ptr [ %scevgep103, %vector.body84 ], [ %c, %vector.ph85 ] + %lsr.iv = phi ptr [ %scevgep, %vector.body84 ], [ %b, %vector.ph85 ] %34 = phi i32 [ %start3, %vector.ph85 ], [ %40, %vector.body84 ] %35 = phi i32 [ %N, %vector.ph85 ], [ %37, %vector.body84 ] - %lsr.iv105107 = bitcast i32* %lsr.iv105 to <4 x i32>* - %lsr.iv102104 = bitcast i32* %lsr.iv102 to <4 x i32>* - %lsr.iv101 = bitcast i32* %lsr.iv to <4 x i32>* + %lsr.iv105107 = bitcast ptr %lsr.iv105 to ptr + %lsr.iv102104 = bitcast ptr %lsr.iv102 to ptr + %lsr.iv101 = bitcast ptr %lsr.iv to ptr %36 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %35) %37 = sub i32 %35, 4 - %wide.masked.load98 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv101, i32 4, <4 x i1> %36, <4 x i32> undef) - %wide.masked.load99 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv102104, i32 4, <4 x i1> %36, <4 x i32> undef) - %wide.masked.load100 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv105107, i32 4, <4 x i1> %36, <4 x i32> undef) + %wide.masked.load98 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv101, i32 4, <4 x i1> %36, <4 x i32> undef) + %wide.masked.load99 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv102104, i32 4, <4 x i1> %36, <4 x i32> undef) + %wide.masked.load100 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv105107, i32 4, <4 x i1> %36, <4 x i32> undef) %38 = add <4 x i32> %wide.masked.load99, %wide.masked.load98 %39 = sub <4 x i32> %wide.masked.load100, %38 - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %39, <4 x i32>* %lsr.iv105107, i32 4, <4 x i1> %36) - %scevgep = getelementptr i32, i32* %lsr.iv, i32 4 - %scevgep103 = getelementptr i32, i32* %lsr.iv102, i32 4 - %scevgep106 = getelementptr i32, i32* %lsr.iv105, i32 4 + call void @llvm.masked.store.v4i32.p0(<4 x i32> %39, ptr %lsr.iv105107, i32 4, <4 x i1> %36) + %scevgep = getelementptr i32, ptr %lsr.iv, i32 4 + %scevgep103 = getelementptr i32, ptr %lsr.iv102, i32 4 + %scevgep106 = getelementptr i32, ptr %lsr.iv105, i32 4 %40 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %34, i32 1) %41 = icmp ne i32 %40, 0 br i1 %41, label %vector.body84, label %for.cond.cleanup17 @@ -278,8 +278,8 @@ for.cond.cleanup17: ; preds = %vector.body84, %entry, %for.cond15.preheader ret void } - declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) - declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) + declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) + declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) declare i32 @llvm.start.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) declare <4 x i1> @llvm.arm.mve.vctp32(i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll index cfa6eb3..715f656 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled %s -o - | FileCheck %s -define arm_aapcs_vfpcc i32 @test_acc_scalar_char(i8 zeroext %a, i8* nocapture readonly %b, i32 %N) { +define arm_aapcs_vfpcc i32 @test_acc_scalar_char(i8 zeroext %a, ptr nocapture readonly %b, i32 %N) { ; CHECK-LABEL: test_acc_scalar_char: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: cmp r2, #0 @@ -45,10 +45,10 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %5, %vector.body ] - %0 = getelementptr inbounds i8, i8* %b, i32 %index + %0 = getelementptr inbounds i8, ptr %b, i32 %index %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i8* %0 to <4 x i8>* - %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %2, i32 1, <4 x i1> %1, <4 x i8> undef) + %2 = bitcast ptr %0 to ptr + %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %2, i32 1, <4 x i1> %1, <4 x i8> undef) %3 = zext <4 x i8> %wide.masked.load to <4 x i32> %4 = mul nuw nsw <4 x i32> %broadcast.splat13, %3 %5 = add nuw nsw <4 x i32> %4, %vec.phi @@ -66,7 +66,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr ret i32 %res.0.lcssa } -define arm_aapcs_vfpcc i32 @test_acc_scalar_short(i16 signext %a, i16* nocapture readonly %b, i32 %N) { +define arm_aapcs_vfpcc i32 @test_acc_scalar_short(i16 signext %a, ptr nocapture readonly %b, i32 %N) { ; CHECK-LABEL: test_acc_scalar_short: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: cmp r2, #0 @@ -110,10 +110,10 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %5, %vector.body ] - %0 = getelementptr inbounds i16, i16* %b, i32 %index + %0 = getelementptr inbounds i16, ptr %b, i32 %index %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i16* %0 to <4 x i16>* - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %2, i32 2, <4 x i1> %1, <4 x i16> undef) + %2 = bitcast ptr %0 to ptr + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %2, i32 2, <4 x i1> %1, <4 x i16> undef) %3 = sext <4 x i16> %wide.masked.load to <4 x i32> %4 = mul nsw <4 x i32> %broadcast.splat13, %3 %5 = add nsw <4 x i32> %4, %vec.phi @@ -131,7 +131,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr ret i32 %res.0.lcssa } -define arm_aapcs_vfpcc i32 @test_acc_scalar_uchar(i8 zeroext %a, i8* nocapture readonly %b, i32 %N) { +define arm_aapcs_vfpcc i32 @test_acc_scalar_uchar(i8 zeroext %a, ptr nocapture readonly %b, i32 %N) { ; CHECK-LABEL: test_acc_scalar_uchar: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: cmp r2, #0 @@ -175,10 +175,10 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %5, %vector.body ] - %0 = getelementptr inbounds i8, i8* %b, i32 %index + %0 = getelementptr inbounds i8, ptr %b, i32 %index %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i8* %0 to <4 x i8>* - %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %2, i32 1, <4 x i1> %1, <4 x i8> undef) + %2 = bitcast ptr %0 to ptr + %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %2, i32 1, <4 x i1> %1, <4 x i8> undef) %3 = zext <4 x i8> %wide.masked.load to <4 x i32> %4 = mul nuw nsw <4 x i32> %broadcast.splat13, %3 %5 = add nuw nsw <4 x i32> %4, %vec.phi @@ -196,7 +196,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr ret i32 %res.0.lcssa } -define arm_aapcs_vfpcc i32 @test_acc_scalar_ushort(i16 signext %a, i16* nocapture readonly %b, i32 %N) { +define arm_aapcs_vfpcc i32 @test_acc_scalar_ushort(i16 signext %a, ptr nocapture readonly %b, i32 %N) { ; CHECK-LABEL: test_acc_scalar_ushort: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: cmp r2, #0 @@ -240,10 +240,10 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %5, %vector.body ] - %0 = getelementptr inbounds i16, i16* %b, i32 %index + %0 = getelementptr inbounds i16, ptr %b, i32 %index %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i16* %0 to <4 x i16>* - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %2, i32 2, <4 x i1> %1, <4 x i16> undef) + %2 = bitcast ptr %0 to ptr + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %2, i32 2, <4 x i1> %1, <4 x i16> undef) %3 = zext <4 x i16> %wide.masked.load to <4 x i32> %4 = mul nsw <4 x i32> %broadcast.splat13, %3 %5 = add nsw <4 x i32> %4, %vec.phi @@ -261,7 +261,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr ret i32 %res.0.lcssa } -define arm_aapcs_vfpcc i32 @test_acc_scalar_int(i32 %a, i32* nocapture readonly %b, i32 %N) { +define arm_aapcs_vfpcc i32 @test_acc_scalar_int(i32 %a, ptr nocapture readonly %b, i32 %N) { ; CHECK-LABEL: test_acc_scalar_int: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: cmp r2, #0 @@ -304,10 +304,10 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %4, %vector.body ] - %0 = getelementptr inbounds i32, i32* %b, i32 %index + %0 = getelementptr inbounds i32, ptr %b, i32 %index %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i32* %0 to <4 x i32>* - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %1, <4 x i32> undef) + %2 = bitcast ptr %0 to ptr + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %2, i32 4, <4 x i1> %1, <4 x i32> undef) %3 = mul nsw <4 x i32> %wide.masked.load, %broadcast.splat12 %4 = add nsw <4 x i32> %3, %vec.phi %index.next = add i32 %index, 4 @@ -324,7 +324,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr ret i32 %res.0.lcssa } -define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(i8* nocapture readonly %a, i8* nocapture readonly %b, i8 zeroext %c, i32* nocapture %res, i32 %N) { +define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(ptr nocapture readonly %a, ptr nocapture readonly %b, i8 zeroext %c, ptr nocapture %res, i32 %N) { ; CHECK-LABEL: test_vec_mul_scalar_add_char: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} @@ -411,21 +411,21 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(i8* nocapture readonly ; CHECK-NEXT: .LBB5_11: @ %for.cond.cleanup ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} entry: - %res12 = bitcast i32* %res to i8* + %res12 = bitcast ptr %res to ptr %cmp10 = icmp eq i32 %N, 0 br i1 %cmp10, label %for.cond.cleanup, label %for.body.lr.ph for.body.lr.ph: ; preds = %entry %conv3 = zext i8 %c to i32 - %scevgep = getelementptr i32, i32* %res, i32 %N - %scevgep13 = bitcast i32* %scevgep to i8* - %scevgep14 = getelementptr i8, i8* %a, i32 %N - %scevgep15 = getelementptr i8, i8* %b, i32 %N - %bound0 = icmp ugt i8* %scevgep14, %res12 - %bound1 = icmp ugt i8* %scevgep13, %a + %scevgep = getelementptr i32, ptr %res, i32 %N + %scevgep13 = bitcast ptr %scevgep to ptr + %scevgep14 = getelementptr i8, ptr %a, i32 %N + %scevgep15 = getelementptr i8, ptr %b, i32 %N + %bound0 = icmp ugt ptr %scevgep14, %res12 + %bound1 = icmp ugt ptr %scevgep13, %a %found.conflict = and i1 %bound0, %bound1 - %bound016 = icmp ugt i8* %scevgep15, %res12 - %bound117 = icmp ugt i8* %scevgep13, %b + %bound016 = icmp ugt ptr %scevgep15, %res12 + %bound117 = icmp ugt ptr %scevgep13, %b %found.conflict18 = and i1 %bound016, %bound117 %conflict.rdx = or i1 %found.conflict, %found.conflict18 br i1 %conflict.rdx, label %for.body.preheader, label %vector.ph @@ -449,20 +449,20 @@ vector.ph: ; preds = %for.body.lr.ph vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %2 = getelementptr inbounds i8, i8* %a, i32 %index + %2 = getelementptr inbounds i8, ptr %a, i32 %index %3 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %4 = bitcast i8* %2 to <4 x i8>* - %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %4, i32 1, <4 x i1> %3, <4 x i8> undef) + %4 = bitcast ptr %2 to ptr + %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %4, i32 1, <4 x i1> %3, <4 x i8> undef) %5 = zext <4 x i8> %wide.masked.load to <4 x i32> - %6 = getelementptr inbounds i8, i8* %b, i32 %index - %7 = bitcast i8* %6 to <4 x i8>* - %wide.masked.load21 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %7, i32 1, <4 x i1> %3, <4 x i8> undef) + %6 = getelementptr inbounds i8, ptr %b, i32 %index + %7 = bitcast ptr %6 to ptr + %wide.masked.load21 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %7, i32 1, <4 x i1> %3, <4 x i8> undef) %8 = zext <4 x i8> %wide.masked.load21 to <4 x i32> %9 = mul nuw nsw <4 x i32> %8, %5 %10 = add nuw nsw <4 x i32> %9, %broadcast.splat23 - %11 = getelementptr inbounds i32, i32* %res, i32 %index - %12 = bitcast i32* %11 to <4 x i32>* - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %10, <4 x i32>* %12, i32 4, <4 x i1> %3) + %11 = getelementptr inbounds i32, ptr %res, i32 %index + %12 = bitcast ptr %11 to ptr + call void @llvm.masked.store.v4i32.p0(<4 x i32> %10, ptr %12, i32 4, <4 x i1> %3) %index.next = add i32 %index, 4 %13 = icmp eq i32 %index.next, %n.vec br i1 %13, label %for.cond.cleanup, label %vector.body @@ -475,16 +475,16 @@ for.cond.cleanup.loopexit.unr-lcssa: ; preds = %for.body, %for.body for.body.epil: ; preds = %for.cond.cleanup.loopexit.unr-lcssa, %for.body.epil %i.011.epil = phi i32 [ %inc.epil, %for.body.epil ], [ %i.011.unr, %for.cond.cleanup.loopexit.unr-lcssa ] %epil.iter = phi i32 [ %epil.iter.sub, %for.body.epil ], [ %xtraiter, %for.cond.cleanup.loopexit.unr-lcssa ] - %arrayidx.epil = getelementptr inbounds i8, i8* %a, i32 %i.011.epil - %14 = load i8, i8* %arrayidx.epil, align 1 + %arrayidx.epil = getelementptr inbounds i8, ptr %a, i32 %i.011.epil + %14 = load i8, ptr %arrayidx.epil, align 1 %conv.epil = zext i8 %14 to i32 - %arrayidx1.epil = getelementptr inbounds i8, i8* %b, i32 %i.011.epil - %15 = load i8, i8* %arrayidx1.epil, align 1 + %arrayidx1.epil = getelementptr inbounds i8, ptr %b, i32 %i.011.epil + %15 = load i8, ptr %arrayidx1.epil, align 1 %conv2.epil = zext i8 %15 to i32 %mul.epil = mul nuw nsw i32 %conv2.epil, %conv.epil %add.epil = add nuw nsw i32 %mul.epil, %conv3 - %arrayidx4.epil = getelementptr inbounds i32, i32* %res, i32 %i.011.epil - store i32 %add.epil, i32* %arrayidx4.epil, align 4 + %arrayidx4.epil = getelementptr inbounds i32, ptr %res, i32 %i.011.epil + store i32 %add.epil, ptr %arrayidx4.epil, align 4 %inc.epil = add nuw i32 %i.011.epil, 1 %epil.iter.sub = add i32 %epil.iter, -1 %epil.iter.cmp = icmp eq i32 %epil.iter.sub, 0 @@ -496,56 +496,56 @@ for.cond.cleanup: ; preds = %vector.body, %for.c for.body: ; preds = %for.body, %for.body.preheader.new %i.011 = phi i32 [ 0, %for.body.preheader.new ], [ %inc.3, %for.body ] %niter = phi i32 [ %unroll_iter, %for.body.preheader.new ], [ %niter.nsub.3, %for.body ] - %arrayidx = getelementptr inbounds i8, i8* %a, i32 %i.011 - %16 = load i8, i8* %arrayidx, align 1 + %arrayidx = getelementptr inbounds i8, ptr %a, i32 %i.011 + %16 = load i8, ptr %arrayidx, align 1 %conv = zext i8 %16 to i32 - %arrayidx1 = getelementptr inbounds i8, i8* %b, i32 %i.011 - %17 = load i8, i8* %arrayidx1, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %b, i32 %i.011 + %17 = load i8, ptr %arrayidx1, align 1 %conv2 = zext i8 %17 to i32 %mul = mul nuw nsw i32 %conv2, %conv %add = add nuw nsw i32 %mul, %conv3 - %arrayidx4 = getelementptr inbounds i32, i32* %res, i32 %i.011 - store i32 %add, i32* %arrayidx4, align 4 + %arrayidx4 = getelementptr inbounds i32, ptr %res, i32 %i.011 + store i32 %add, ptr %arrayidx4, align 4 %inc = or disjoint i32 %i.011, 1 - %arrayidx.1 = getelementptr inbounds i8, i8* %a, i32 %inc - %18 = load i8, i8* %arrayidx.1, align 1 + %arrayidx.1 = getelementptr inbounds i8, ptr %a, i32 %inc + %18 = load i8, ptr %arrayidx.1, align 1 %conv.1 = zext i8 %18 to i32 - %arrayidx1.1 = getelementptr inbounds i8, i8* %b, i32 %inc - %19 = load i8, i8* %arrayidx1.1, align 1 + %arrayidx1.1 = getelementptr inbounds i8, ptr %b, i32 %inc + %19 = load i8, ptr %arrayidx1.1, align 1 %conv2.1 = zext i8 %19 to i32 %mul.1 = mul nuw nsw i32 %conv2.1, %conv.1 %add.1 = add nuw nsw i32 %mul.1, %conv3 - %arrayidx4.1 = getelementptr inbounds i32, i32* %res, i32 %inc - store i32 %add.1, i32* %arrayidx4.1, align 4 + %arrayidx4.1 = getelementptr inbounds i32, ptr %res, i32 %inc + store i32 %add.1, ptr %arrayidx4.1, align 4 %inc.1 = or disjoint i32 %i.011, 2 - %arrayidx.2 = getelementptr inbounds i8, i8* %a, i32 %inc.1 - %20 = load i8, i8* %arrayidx.2, align 1 + %arrayidx.2 = getelementptr inbounds i8, ptr %a, i32 %inc.1 + %20 = load i8, ptr %arrayidx.2, align 1 %conv.2 = zext i8 %20 to i32 - %arrayidx1.2 = getelementptr inbounds i8, i8* %b, i32 %inc.1 - %21 = load i8, i8* %arrayidx1.2, align 1 + %arrayidx1.2 = getelementptr inbounds i8, ptr %b, i32 %inc.1 + %21 = load i8, ptr %arrayidx1.2, align 1 %conv2.2 = zext i8 %21 to i32 %mul.2 = mul nuw nsw i32 %conv2.2, %conv.2 %add.2 = add nuw nsw i32 %mul.2, %conv3 - %arrayidx4.2 = getelementptr inbounds i32, i32* %res, i32 %inc.1 - store i32 %add.2, i32* %arrayidx4.2, align 4 + %arrayidx4.2 = getelementptr inbounds i32, ptr %res, i32 %inc.1 + store i32 %add.2, ptr %arrayidx4.2, align 4 %inc.2 = or disjoint i32 %i.011, 3 - %arrayidx.3 = getelementptr inbounds i8, i8* %a, i32 %inc.2 - %22 = load i8, i8* %arrayidx.3, align 1 + %arrayidx.3 = getelementptr inbounds i8, ptr %a, i32 %inc.2 + %22 = load i8, ptr %arrayidx.3, align 1 %conv.3 = zext i8 %22 to i32 - %arrayidx1.3 = getelementptr inbounds i8, i8* %b, i32 %inc.2 - %23 = load i8, i8* %arrayidx1.3, align 1 + %arrayidx1.3 = getelementptr inbounds i8, ptr %b, i32 %inc.2 + %23 = load i8, ptr %arrayidx1.3, align 1 %conv2.3 = zext i8 %23 to i32 %mul.3 = mul nuw nsw i32 %conv2.3, %conv.3 %add.3 = add nuw nsw i32 %mul.3, %conv3 - %arrayidx4.3 = getelementptr inbounds i32, i32* %res, i32 %inc.2 - store i32 %add.3, i32* %arrayidx4.3, align 4 + %arrayidx4.3 = getelementptr inbounds i32, ptr %res, i32 %inc.2 + store i32 %add.3, ptr %arrayidx4.3, align 4 %inc.3 = add nuw i32 %i.011, 4 %niter.nsub.3 = add i32 %niter, -4 %niter.ncmp.3 = icmp eq i32 %niter.nsub.3, 0 br i1 %niter.ncmp.3, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body } -define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_short(i16* nocapture readonly %a, i16* nocapture readonly %b, i16 signext %c, i32* nocapture %res, i32 %N) { +define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_short(ptr nocapture readonly %a, ptr nocapture readonly %b, i16 signext %c, ptr nocapture %res, i32 %N) { ; CHECK-LABEL: test_vec_mul_scalar_add_short: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r4, lr} @@ -578,20 +578,20 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %0 = getelementptr inbounds i16, i16* %a, i32 %index + %0 = getelementptr inbounds i16, ptr %a, i32 %index %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i16* %0 to <4 x i16>* - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %2, i32 2, <4 x i1> %1, <4 x i16> undef) + %2 = bitcast ptr %0 to ptr + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %2, i32 2, <4 x i1> %1, <4 x i16> undef) %3 = sext <4 x i16> %wide.masked.load to <4 x i32> - %4 = getelementptr inbounds i16, i16* %b, i32 %index - %5 = bitcast i16* %4 to <4 x i16>* - %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %5, i32 2, <4 x i1> %1, <4 x i16> undef) + %4 = getelementptr inbounds i16, ptr %b, i32 %index + %5 = bitcast ptr %4 to ptr + %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %5, i32 2, <4 x i1> %1, <4 x i16> undef) %6 = sext <4 x i16> %wide.masked.load14 to <4 x i32> %7 = mul nsw <4 x i32> %6, %3 %8 = add nsw <4 x i32> %7, %broadcast.splat16 - %9 = getelementptr inbounds i32, i32* %res, i32 %index - %10 = bitcast i32* %9 to <4 x i32>* - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %8, <4 x i32>* %10, i32 4, <4 x i1> %1) + %9 = getelementptr inbounds i32, ptr %res, i32 %index + %10 = bitcast ptr %9 to ptr + call void @llvm.masked.store.v4i32.p0(<4 x i32> %8, ptr %10, i32 4, <4 x i1> %1) %index.next = add i32 %index, 4 %11 = icmp eq i32 %index.next, %n.vec br i1 %11, label %for.cond.cleanup, label %vector.body @@ -600,7 +600,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret void } -define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(i8* nocapture readonly %a, i8* nocapture readonly %b, i8 zeroext %c, i32* nocapture %res, i32 %N) { +define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(ptr nocapture readonly %a, ptr nocapture readonly %b, i8 zeroext %c, ptr nocapture %res, i32 %N) { ; CHECK-LABEL: test_vec_mul_scalar_add_uchar: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} @@ -687,21 +687,21 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(i8* nocapture readonl ; CHECK-NEXT: .LBB7_11: @ %for.cond.cleanup ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} entry: - %res12 = bitcast i32* %res to i8* + %res12 = bitcast ptr %res to ptr %cmp10 = icmp eq i32 %N, 0 br i1 %cmp10, label %for.cond.cleanup, label %for.body.lr.ph for.body.lr.ph: ; preds = %entry %conv3 = zext i8 %c to i32 - %scevgep = getelementptr i32, i32* %res, i32 %N - %scevgep13 = bitcast i32* %scevgep to i8* - %scevgep14 = getelementptr i8, i8* %a, i32 %N - %scevgep15 = getelementptr i8, i8* %b, i32 %N - %bound0 = icmp ugt i8* %scevgep14, %res12 - %bound1 = icmp ugt i8* %scevgep13, %a + %scevgep = getelementptr i32, ptr %res, i32 %N + %scevgep13 = bitcast ptr %scevgep to ptr + %scevgep14 = getelementptr i8, ptr %a, i32 %N + %scevgep15 = getelementptr i8, ptr %b, i32 %N + %bound0 = icmp ugt ptr %scevgep14, %res12 + %bound1 = icmp ugt ptr %scevgep13, %a %found.conflict = and i1 %bound0, %bound1 - %bound016 = icmp ugt i8* %scevgep15, %res12 - %bound117 = icmp ugt i8* %scevgep13, %b + %bound016 = icmp ugt ptr %scevgep15, %res12 + %bound117 = icmp ugt ptr %scevgep13, %b %found.conflict18 = and i1 %bound016, %bound117 %conflict.rdx = or i1 %found.conflict, %found.conflict18 br i1 %conflict.rdx, label %for.body.preheader, label %vector.ph @@ -725,20 +725,20 @@ vector.ph: ; preds = %for.body.lr.ph vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %2 = getelementptr inbounds i8, i8* %a, i32 %index + %2 = getelementptr inbounds i8, ptr %a, i32 %index %3 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %4 = bitcast i8* %2 to <4 x i8>* - %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %4, i32 1, <4 x i1> %3, <4 x i8> undef) + %4 = bitcast ptr %2 to ptr + %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %4, i32 1, <4 x i1> %3, <4 x i8> undef) %5 = zext <4 x i8> %wide.masked.load to <4 x i32> - %6 = getelementptr inbounds i8, i8* %b, i32 %index - %7 = bitcast i8* %6 to <4 x i8>* - %wide.masked.load21 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %7, i32 1, <4 x i1> %3, <4 x i8> undef) + %6 = getelementptr inbounds i8, ptr %b, i32 %index + %7 = bitcast ptr %6 to ptr + %wide.masked.load21 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %7, i32 1, <4 x i1> %3, <4 x i8> undef) %8 = zext <4 x i8> %wide.masked.load21 to <4 x i32> %9 = mul nuw nsw <4 x i32> %8, %5 %10 = add nuw nsw <4 x i32> %9, %broadcast.splat23 - %11 = getelementptr inbounds i32, i32* %res, i32 %index - %12 = bitcast i32* %11 to <4 x i32>* - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %10, <4 x i32>* %12, i32 4, <4 x i1> %3) + %11 = getelementptr inbounds i32, ptr %res, i32 %index + %12 = bitcast ptr %11 to ptr + call void @llvm.masked.store.v4i32.p0(<4 x i32> %10, ptr %12, i32 4, <4 x i1> %3) %index.next = add i32 %index, 4 %13 = icmp eq i32 %index.next, %n.vec br i1 %13, label %for.cond.cleanup, label %vector.body @@ -751,16 +751,16 @@ for.cond.cleanup.loopexit.unr-lcssa: ; preds = %for.body, %for.body for.body.epil: ; preds = %for.cond.cleanup.loopexit.unr-lcssa, %for.body.epil %i.011.epil = phi i32 [ %inc.epil, %for.body.epil ], [ %i.011.unr, %for.cond.cleanup.loopexit.unr-lcssa ] %epil.iter = phi i32 [ %epil.iter.sub, %for.body.epil ], [ %xtraiter, %for.cond.cleanup.loopexit.unr-lcssa ] - %arrayidx.epil = getelementptr inbounds i8, i8* %a, i32 %i.011.epil - %14 = load i8, i8* %arrayidx.epil, align 1 + %arrayidx.epil = getelementptr inbounds i8, ptr %a, i32 %i.011.epil + %14 = load i8, ptr %arrayidx.epil, align 1 %conv.epil = zext i8 %14 to i32 - %arrayidx1.epil = getelementptr inbounds i8, i8* %b, i32 %i.011.epil - %15 = load i8, i8* %arrayidx1.epil, align 1 + %arrayidx1.epil = getelementptr inbounds i8, ptr %b, i32 %i.011.epil + %15 = load i8, ptr %arrayidx1.epil, align 1 %conv2.epil = zext i8 %15 to i32 %mul.epil = mul nuw nsw i32 %conv2.epil, %conv.epil %add.epil = add nuw nsw i32 %mul.epil, %conv3 - %arrayidx4.epil = getelementptr inbounds i32, i32* %res, i32 %i.011.epil - store i32 %add.epil, i32* %arrayidx4.epil, align 4 + %arrayidx4.epil = getelementptr inbounds i32, ptr %res, i32 %i.011.epil + store i32 %add.epil, ptr %arrayidx4.epil, align 4 %inc.epil = add nuw i32 %i.011.epil, 1 %epil.iter.sub = add i32 %epil.iter, -1 %epil.iter.cmp = icmp eq i32 %epil.iter.sub, 0 @@ -772,56 +772,56 @@ for.cond.cleanup: ; preds = %vector.body, %for.c for.body: ; preds = %for.body, %for.body.preheader.new %i.011 = phi i32 [ 0, %for.body.preheader.new ], [ %inc.3, %for.body ] %niter = phi i32 [ %unroll_iter, %for.body.preheader.new ], [ %niter.nsub.3, %for.body ] - %arrayidx = getelementptr inbounds i8, i8* %a, i32 %i.011 - %16 = load i8, i8* %arrayidx, align 1 + %arrayidx = getelementptr inbounds i8, ptr %a, i32 %i.011 + %16 = load i8, ptr %arrayidx, align 1 %conv = zext i8 %16 to i32 - %arrayidx1 = getelementptr inbounds i8, i8* %b, i32 %i.011 - %17 = load i8, i8* %arrayidx1, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %b, i32 %i.011 + %17 = load i8, ptr %arrayidx1, align 1 %conv2 = zext i8 %17 to i32 %mul = mul nuw nsw i32 %conv2, %conv %add = add nuw nsw i32 %mul, %conv3 - %arrayidx4 = getelementptr inbounds i32, i32* %res, i32 %i.011 - store i32 %add, i32* %arrayidx4, align 4 + %arrayidx4 = getelementptr inbounds i32, ptr %res, i32 %i.011 + store i32 %add, ptr %arrayidx4, align 4 %inc = or disjoint i32 %i.011, 1 - %arrayidx.1 = getelementptr inbounds i8, i8* %a, i32 %inc - %18 = load i8, i8* %arrayidx.1, align 1 + %arrayidx.1 = getelementptr inbounds i8, ptr %a, i32 %inc + %18 = load i8, ptr %arrayidx.1, align 1 %conv.1 = zext i8 %18 to i32 - %arrayidx1.1 = getelementptr inbounds i8, i8* %b, i32 %inc - %19 = load i8, i8* %arrayidx1.1, align 1 + %arrayidx1.1 = getelementptr inbounds i8, ptr %b, i32 %inc + %19 = load i8, ptr %arrayidx1.1, align 1 %conv2.1 = zext i8 %19 to i32 %mul.1 = mul nuw nsw i32 %conv2.1, %conv.1 %add.1 = add nuw nsw i32 %mul.1, %conv3 - %arrayidx4.1 = getelementptr inbounds i32, i32* %res, i32 %inc - store i32 %add.1, i32* %arrayidx4.1, align 4 + %arrayidx4.1 = getelementptr inbounds i32, ptr %res, i32 %inc + store i32 %add.1, ptr %arrayidx4.1, align 4 %inc.1 = or disjoint i32 %i.011, 2 - %arrayidx.2 = getelementptr inbounds i8, i8* %a, i32 %inc.1 - %20 = load i8, i8* %arrayidx.2, align 1 + %arrayidx.2 = getelementptr inbounds i8, ptr %a, i32 %inc.1 + %20 = load i8, ptr %arrayidx.2, align 1 %conv.2 = zext i8 %20 to i32 - %arrayidx1.2 = getelementptr inbounds i8, i8* %b, i32 %inc.1 - %21 = load i8, i8* %arrayidx1.2, align 1 + %arrayidx1.2 = getelementptr inbounds i8, ptr %b, i32 %inc.1 + %21 = load i8, ptr %arrayidx1.2, align 1 %conv2.2 = zext i8 %21 to i32 %mul.2 = mul nuw nsw i32 %conv2.2, %conv.2 %add.2 = add nuw nsw i32 %mul.2, %conv3 - %arrayidx4.2 = getelementptr inbounds i32, i32* %res, i32 %inc.1 - store i32 %add.2, i32* %arrayidx4.2, align 4 + %arrayidx4.2 = getelementptr inbounds i32, ptr %res, i32 %inc.1 + store i32 %add.2, ptr %arrayidx4.2, align 4 %inc.2 = or disjoint i32 %i.011, 3 - %arrayidx.3 = getelementptr inbounds i8, i8* %a, i32 %inc.2 - %22 = load i8, i8* %arrayidx.3, align 1 + %arrayidx.3 = getelementptr inbounds i8, ptr %a, i32 %inc.2 + %22 = load i8, ptr %arrayidx.3, align 1 %conv.3 = zext i8 %22 to i32 - %arrayidx1.3 = getelementptr inbounds i8, i8* %b, i32 %inc.2 - %23 = load i8, i8* %arrayidx1.3, align 1 + %arrayidx1.3 = getelementptr inbounds i8, ptr %b, i32 %inc.2 + %23 = load i8, ptr %arrayidx1.3, align 1 %conv2.3 = zext i8 %23 to i32 %mul.3 = mul nuw nsw i32 %conv2.3, %conv.3 %add.3 = add nuw nsw i32 %mul.3, %conv3 - %arrayidx4.3 = getelementptr inbounds i32, i32* %res, i32 %inc.2 - store i32 %add.3, i32* %arrayidx4.3, align 4 + %arrayidx4.3 = getelementptr inbounds i32, ptr %res, i32 %inc.2 + store i32 %add.3, ptr %arrayidx4.3, align 4 %inc.3 = add nuw i32 %i.011, 4 %niter.nsub.3 = add i32 %niter, -4 %niter.ncmp.3 = icmp eq i32 %niter.nsub.3, 0 br i1 %niter.ncmp.3, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body } -define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_ushort(i16* nocapture readonly %a, i16* nocapture readonly %b, i16 signext %c, i32* nocapture %res, i32 %N) { +define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_ushort(ptr nocapture readonly %a, ptr nocapture readonly %b, i16 signext %c, ptr nocapture %res, i32 %N) { ; CHECK-LABEL: test_vec_mul_scalar_add_ushort: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r4, lr} @@ -854,20 +854,20 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %0 = getelementptr inbounds i16, i16* %a, i32 %index + %0 = getelementptr inbounds i16, ptr %a, i32 %index %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i16* %0 to <4 x i16>* - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %2, i32 2, <4 x i1> %1, <4 x i16> undef) + %2 = bitcast ptr %0 to ptr + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %2, i32 2, <4 x i1> %1, <4 x i16> undef) %3 = zext <4 x i16> %wide.masked.load to <4 x i32> - %4 = getelementptr inbounds i16, i16* %b, i32 %index - %5 = bitcast i16* %4 to <4 x i16>* - %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %5, i32 2, <4 x i1> %1, <4 x i16> undef) + %4 = getelementptr inbounds i16, ptr %b, i32 %index + %5 = bitcast ptr %4 to ptr + %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %5, i32 2, <4 x i1> %1, <4 x i16> undef) %6 = zext <4 x i16> %wide.masked.load14 to <4 x i32> %7 = mul nuw nsw <4 x i32> %6, %3 %8 = add nsw <4 x i32> %7, %broadcast.splat16 - %9 = getelementptr inbounds i32, i32* %res, i32 %index - %10 = bitcast i32* %9 to <4 x i32>* - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %8, <4 x i32>* %10, i32 4, <4 x i1> %1) + %9 = getelementptr inbounds i32, ptr %res, i32 %index + %10 = bitcast ptr %9 to ptr + call void @llvm.masked.store.v4i32.p0(<4 x i32> %8, ptr %10, i32 4, <4 x i1> %1) %index.next = add i32 %index, 4 %11 = icmp eq i32 %index.next, %n.vec br i1 %11, label %for.cond.cleanup, label %vector.body @@ -876,7 +876,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret void } -define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_int(i32* nocapture readonly %a, i32* nocapture readonly %b, i32 %c, i32* nocapture %res, i32 %N) { +define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_int(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %c, ptr nocapture %res, i32 %N) { ; CHECK-LABEL: test_vec_mul_scalar_add_int: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} @@ -969,14 +969,14 @@ entry: br i1 %cmp8, label %for.cond.cleanup, label %vector.memcheck vector.memcheck: ; preds = %entry - %scevgep = getelementptr i32, i32* %res, i32 %N - %scevgep13 = getelementptr i32, i32* %a, i32 %N - %scevgep16 = getelementptr i32, i32* %b, i32 %N - %bound0 = icmp ugt i32* %scevgep13, %res - %bound1 = icmp ugt i32* %scevgep, %a + %scevgep = getelementptr i32, ptr %res, i32 %N + %scevgep13 = getelementptr i32, ptr %a, i32 %N + %scevgep16 = getelementptr i32, ptr %b, i32 %N + %bound0 = icmp ugt ptr %scevgep13, %res + %bound1 = icmp ugt ptr %scevgep, %a %found.conflict = and i1 %bound0, %bound1 - %bound018 = icmp ugt i32* %scevgep16, %res - %bound119 = icmp ugt i32* %scevgep, %b + %bound018 = icmp ugt ptr %scevgep16, %res + %bound119 = icmp ugt ptr %scevgep, %b %found.conflict20 = and i1 %bound018, %bound119 %conflict.rdx = or i1 %found.conflict, %found.conflict20 br i1 %conflict.rdx, label %for.body.preheader, label %vector.ph @@ -1000,18 +1000,18 @@ vector.ph: ; preds = %vector.memcheck vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %2 = getelementptr inbounds i32, i32* %a, i32 %index + %2 = getelementptr inbounds i32, ptr %a, i32 %index %3 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %4 = bitcast i32* %2 to <4 x i32>* - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %4, i32 4, <4 x i1> %3, <4 x i32> undef) - %5 = getelementptr inbounds i32, i32* %b, i32 %index - %6 = bitcast i32* %5 to <4 x i32>* - %wide.masked.load23 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %6, i32 4, <4 x i1> %3, <4 x i32> undef) + %4 = bitcast ptr %2 to ptr + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %4, i32 4, <4 x i1> %3, <4 x i32> undef) + %5 = getelementptr inbounds i32, ptr %b, i32 %index + %6 = bitcast ptr %5 to ptr + %wide.masked.load23 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %6, i32 4, <4 x i1> %3, <4 x i32> undef) %7 = mul nsw <4 x i32> %wide.masked.load23, %wide.masked.load %8 = add nsw <4 x i32> %7, %broadcast.splat25 - %9 = getelementptr inbounds i32, i32* %res, i32 %index - %10 = bitcast i32* %9 to <4 x i32>* - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %8, <4 x i32>* %10, i32 4, <4 x i1> %3) + %9 = getelementptr inbounds i32, ptr %res, i32 %index + %10 = bitcast ptr %9 to ptr + call void @llvm.masked.store.v4i32.p0(<4 x i32> %8, ptr %10, i32 4, <4 x i1> %3) %index.next = add i32 %index, 4 %11 = icmp eq i32 %index.next, %n.vec br i1 %11, label %for.cond.cleanup, label %vector.body @@ -1024,14 +1024,14 @@ for.cond.cleanup.loopexit.unr-lcssa: ; preds = %for.body, %for.body for.body.epil: ; preds = %for.cond.cleanup.loopexit.unr-lcssa, %for.body.epil %i.09.epil = phi i32 [ %inc.epil, %for.body.epil ], [ %i.09.unr, %for.cond.cleanup.loopexit.unr-lcssa ] %epil.iter = phi i32 [ %epil.iter.sub, %for.body.epil ], [ %xtraiter, %for.cond.cleanup.loopexit.unr-lcssa ] - %arrayidx.epil = getelementptr inbounds i32, i32* %a, i32 %i.09.epil - %12 = load i32, i32* %arrayidx.epil, align 4 - %arrayidx1.epil = getelementptr inbounds i32, i32* %b, i32 %i.09.epil - %13 = load i32, i32* %arrayidx1.epil, align 4 + %arrayidx.epil = getelementptr inbounds i32, ptr %a, i32 %i.09.epil + %12 = load i32, ptr %arrayidx.epil, align 4 + %arrayidx1.epil = getelementptr inbounds i32, ptr %b, i32 %i.09.epil + %13 = load i32, ptr %arrayidx1.epil, align 4 %mul.epil = mul nsw i32 %13, %12 %add.epil = add nsw i32 %mul.epil, %c - %arrayidx2.epil = getelementptr inbounds i32, i32* %res, i32 %i.09.epil - store i32 %add.epil, i32* %arrayidx2.epil, align 4 + %arrayidx2.epil = getelementptr inbounds i32, ptr %res, i32 %i.09.epil + store i32 %add.epil, ptr %arrayidx2.epil, align 4 %inc.epil = add nuw i32 %i.09.epil, 1 %epil.iter.sub = add i32 %epil.iter, -1 %epil.iter.cmp = icmp eq i32 %epil.iter.sub, 0 @@ -1043,48 +1043,48 @@ for.cond.cleanup: ; preds = %vector.body, %for.c for.body: ; preds = %for.body, %for.body.preheader.new %i.09 = phi i32 [ 0, %for.body.preheader.new ], [ %inc.3, %for.body ] %niter = phi i32 [ %unroll_iter, %for.body.preheader.new ], [ %niter.nsub.3, %for.body ] - %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.09 - %14 = load i32, i32* %arrayidx, align 4 - %arrayidx1 = getelementptr inbounds i32, i32* %b, i32 %i.09 - %15 = load i32, i32* %arrayidx1, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i32 %i.09 + %14 = load i32, ptr %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32, ptr %b, i32 %i.09 + %15 = load i32, ptr %arrayidx1, align 4 %mul = mul nsw i32 %15, %14 %add = add nsw i32 %mul, %c - %arrayidx2 = getelementptr inbounds i32, i32* %res, i32 %i.09 - store i32 %add, i32* %arrayidx2, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %res, i32 %i.09 + store i32 %add, ptr %arrayidx2, align 4 %inc = or disjoint i32 %i.09, 1 - %arrayidx.1 = getelementptr inbounds i32, i32* %a, i32 %inc - %16 = load i32, i32* %arrayidx.1, align 4 - %arrayidx1.1 = getelementptr inbounds i32, i32* %b, i32 %inc - %17 = load i32, i32* %arrayidx1.1, align 4 + %arrayidx.1 = getelementptr inbounds i32, ptr %a, i32 %inc + %16 = load i32, ptr %arrayidx.1, align 4 + %arrayidx1.1 = getelementptr inbounds i32, ptr %b, i32 %inc + %17 = load i32, ptr %arrayidx1.1, align 4 %mul.1 = mul nsw i32 %17, %16 %add.1 = add nsw i32 %mul.1, %c - %arrayidx2.1 = getelementptr inbounds i32, i32* %res, i32 %inc - store i32 %add.1, i32* %arrayidx2.1, align 4 + %arrayidx2.1 = getelementptr inbounds i32, ptr %res, i32 %inc + store i32 %add.1, ptr %arrayidx2.1, align 4 %inc.1 = or disjoint i32 %i.09, 2 - %arrayidx.2 = getelementptr inbounds i32, i32* %a, i32 %inc.1 - %18 = load i32, i32* %arrayidx.2, align 4 - %arrayidx1.2 = getelementptr inbounds i32, i32* %b, i32 %inc.1 - %19 = load i32, i32* %arrayidx1.2, align 4 + %arrayidx.2 = getelementptr inbounds i32, ptr %a, i32 %inc.1 + %18 = load i32, ptr %arrayidx.2, align 4 + %arrayidx1.2 = getelementptr inbounds i32, ptr %b, i32 %inc.1 + %19 = load i32, ptr %arrayidx1.2, align 4 %mul.2 = mul nsw i32 %19, %18 %add.2 = add nsw i32 %mul.2, %c - %arrayidx2.2 = getelementptr inbounds i32, i32* %res, i32 %inc.1 - store i32 %add.2, i32* %arrayidx2.2, align 4 + %arrayidx2.2 = getelementptr inbounds i32, ptr %res, i32 %inc.1 + store i32 %add.2, ptr %arrayidx2.2, align 4 %inc.2 = or disjoint i32 %i.09, 3 - %arrayidx.3 = getelementptr inbounds i32, i32* %a, i32 %inc.2 - %20 = load i32, i32* %arrayidx.3, align 4 - %arrayidx1.3 = getelementptr inbounds i32, i32* %b, i32 %inc.2 - %21 = load i32, i32* %arrayidx1.3, align 4 + %arrayidx.3 = getelementptr inbounds i32, ptr %a, i32 %inc.2 + %20 = load i32, ptr %arrayidx.3, align 4 + %arrayidx1.3 = getelementptr inbounds i32, ptr %b, i32 %inc.2 + %21 = load i32, ptr %arrayidx1.3, align 4 %mul.3 = mul nsw i32 %21, %20 %add.3 = add nsw i32 %mul.3, %c - %arrayidx2.3 = getelementptr inbounds i32, i32* %res, i32 %inc.2 - store i32 %add.3, i32* %arrayidx2.3, align 4 + %arrayidx2.3 = getelementptr inbounds i32, ptr %res, i32 %inc.2 + store i32 %add.3, ptr %arrayidx2.3, align 4 %inc.3 = add nuw i32 %i.09, 4 %niter.nsub.3 = add i32 %niter, -4 %niter.ncmp.3 = icmp eq i32 %niter.nsub.3, 0 br i1 %niter.ncmp.3, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body } -define dso_local arm_aapcs_vfpcc void @test_v8i8_to_v8i16(i16* noalias nocapture %a, i8* nocapture readonly %b, i8* nocapture readonly %c, i32 %N) { +define dso_local arm_aapcs_vfpcc void @test_v8i8_to_v8i16(ptr noalias nocapture %a, ptr nocapture readonly %b, ptr nocapture readonly %c, i32 %N) { ; CHECK-LABEL: test_v8i8_to_v8i16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r7, lr} @@ -1113,19 +1113,19 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %0 = getelementptr inbounds i8, i8* %b, i32 %index + %0 = getelementptr inbounds i8, ptr %b, i32 %index %1 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N) - %2 = bitcast i8* %0 to <8 x i8>* - %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %2, i32 1, <8 x i1> %1, <8 x i8> undef) + %2 = bitcast ptr %0 to ptr + %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %2, i32 1, <8 x i1> %1, <8 x i8> undef) %3 = zext <8 x i8> %wide.masked.load to <8 x i16> - %4 = getelementptr inbounds i8, i8* %c, i32 %index - %5 = bitcast i8* %4 to <8 x i8>* - %wide.masked.load14 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %5, i32 1, <8 x i1> %1, <8 x i8> undef) + %4 = getelementptr inbounds i8, ptr %c, i32 %index + %5 = bitcast ptr %4 to ptr + %wide.masked.load14 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %5, i32 1, <8 x i1> %1, <8 x i8> undef) %6 = zext <8 x i8> %wide.masked.load14 to <8 x i16> %7 = mul nuw <8 x i16> %6, %3 - %8 = getelementptr inbounds i16, i16* %a, i32 %index - %9 = bitcast i16* %8 to <8 x i16>* - call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %7, <8 x i16>* %9, i32 2, <8 x i1> %1) + %8 = getelementptr inbounds i16, ptr %a, i32 %index + %9 = bitcast ptr %8 to ptr + call void @llvm.masked.store.v8i16.p0(<8 x i16> %7, ptr %9, i32 2, <8 x i1> %1) %index.next = add i32 %index, 8 %10 = icmp eq i32 %index.next, %n.vec br i1 %10, label %for.cond.cleanup, label %vector.body @@ -1134,12 +1134,12 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret void } -declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32 immarg, <4 x i1>, <4 x i8>) -declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32 immarg, <8 x i1>, <8 x i8>) -declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) -declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) -declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>) -declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) +declare <4 x i8> @llvm.masked.load.v4i8.p0(ptr, i32 immarg, <4 x i1>, <4 x i8>) +declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32 immarg, <8 x i1>, <8 x i8>) +declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) +declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) +declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32 immarg, <8 x i1>) +declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-cbnz.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-cbnz.mir index b599829..0c50a95 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-cbnz.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-cbnz.mir @@ -3,68 +3,68 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=-lob %s -run-pass=arm-cp-islands --verify-machineinstrs -o - | FileCheck %s --check-prefix=CHECK-NOLOB --- | - %struct.head_s = type { %struct.head_s*, %struct.data_s* } + %struct.head_s = type { ptr, ptr } %struct.data_s = type { i16, i16 } - define dso_local arm_aapcscc %struct.head_s* @search(%struct.head_s* readonly %list, %struct.data_s* nocapture readonly %info) local_unnamed_addr { + define dso_local arm_aapcscc ptr @search(ptr readonly %list, ptr nocapture readonly %info) local_unnamed_addr { entry: - %idx = getelementptr inbounds %struct.data_s, %struct.data_s* %info, i32 0, i32 1 - %0 = load i16, i16* %idx, align 2 + %idx = getelementptr inbounds %struct.data_s, ptr %info, i32 0, i32 1 + %0 = load i16, ptr %idx, align 2 %cmp = icmp sgt i16 %0, -1 br i1 %cmp, label %while.cond.preheader, label %while.cond9.preheader while.cond9.preheader: ; preds = %entry - %1 = icmp eq %struct.head_s* %list, null + %1 = icmp eq ptr %list, null br i1 %1, label %return, label %land.rhs11.lr.ph land.rhs11.lr.ph: ; preds = %while.cond9.preheader - %data16143 = bitcast %struct.data_s* %info to i16* - %2 = load i16, i16* %data16143, align 2 + %data16143 = bitcast ptr %info to ptr + %2 = load i16, ptr %data16143, align 2 %conv15 = sext i16 %2 to i32 br label %land.rhs11 while.cond.preheader: ; preds = %entry - %3 = icmp eq %struct.head_s* %list, null + %3 = icmp eq ptr %list, null br i1 %3, label %return, label %land.rhs.preheader land.rhs.preheader: ; preds = %while.cond.preheader br label %land.rhs land.rhs: ; preds = %land.rhs.preheader, %while.body - %list.addr.033 = phi %struct.head_s* [ %6, %while.body ], [ %list, %land.rhs.preheader ] - %info2 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.033, i32 0, i32 1 - %4 = load %struct.data_s*, %struct.data_s** %info2, align 4 - %idx3 = getelementptr inbounds %struct.data_s, %struct.data_s* %4, i32 0, i32 1 - %5 = load i16, i16* %idx3, align 2 + %list.addr.033 = phi ptr [ %6, %while.body ], [ %list, %land.rhs.preheader ] + %info2 = getelementptr inbounds %struct.head_s, ptr %list.addr.033, i32 0, i32 1 + %4 = load ptr, ptr %info2, align 4 + %idx3 = getelementptr inbounds %struct.data_s, ptr %4, i32 0, i32 1 + %5 = load i16, ptr %idx3, align 2 %cmp7 = icmp eq i16 %5, %0 br i1 %cmp7, label %return, label %while.body while.body: ; preds = %land.rhs - %next4 = bitcast %struct.head_s* %list.addr.033 to %struct.head_s** - %6 = load %struct.head_s*, %struct.head_s** %next4, align 4 - %tobool = icmp ne %struct.head_s* %6, null + %next4 = bitcast ptr %list.addr.033 to ptr + %6 = load ptr, ptr %next4, align 4 + %tobool = icmp ne ptr %6, null br i1 %tobool, label %return, label %land.rhs land.rhs11: ; preds = %while.body19, %land.rhs11.lr.ph - %list.addr.136 = phi %struct.head_s* [ %list, %land.rhs11.lr.ph ], [ %10, %while.body19 ] - %info12 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.136, i32 0, i32 1 - %7 = load %struct.data_s*, %struct.data_s** %info12, align 4 - %data165 = bitcast %struct.data_s* %7 to i16* - %8 = load i16, i16* %data165, align 2 + %list.addr.136 = phi ptr [ %list, %land.rhs11.lr.ph ], [ %10, %while.body19 ] + %info12 = getelementptr inbounds %struct.head_s, ptr %list.addr.136, i32 0, i32 1 + %7 = load ptr, ptr %info12, align 4 + %data165 = bitcast ptr %7 to ptr + %8 = load i16, ptr %data165, align 2 %9 = and i16 %8, 255 %and = zext i16 %9 to i32 %cmp16 = icmp eq i32 %and, %conv15 br i1 %cmp16, label %return, label %while.body19 while.body19: ; preds = %land.rhs11 - %next206 = bitcast %struct.head_s* %list.addr.136 to %struct.head_s** - %10 = load %struct.head_s*, %struct.head_s** %next206, align 4 - %tobool10 = icmp eq %struct.head_s* %10, null + %next206 = bitcast ptr %list.addr.136 to ptr + %10 = load ptr, ptr %next206, align 4 + %tobool10 = icmp eq ptr %10, null br i1 %tobool10, label %return, label %land.rhs11 return: ; preds = %while.body19, %land.rhs11, %while.body, %land.rhs, %while.cond.preheader, %while.cond9.preheader - %retval.0 = phi %struct.head_s* [ null, %while.cond.preheader ], [ null, %while.cond9.preheader ], [ null, %while.body ], [ %list.addr.033, %land.rhs ], [ null, %while.body19 ], [ %list.addr.136, %land.rhs11 ] - ret %struct.head_s* %retval.0 + %retval.0 = phi ptr [ null, %while.cond.preheader ], [ null, %while.cond9.preheader ], [ null, %while.body ], [ %list.addr.033, %land.rhs ], [ null, %while.body19 ], [ %list.addr.136, %land.rhs11 ] + ret ptr %retval.0 } ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-reorder.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-reorder.mir index 465d080..0ea3b26 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-reorder.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-reorder.mir @@ -2,68 +2,68 @@ # RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-cp-islands %s -o - --verify-machineinstrs | FileCheck %s --- | - %struct.head_s = type { %struct.head_s*, %struct.data_s* } + %struct.head_s = type { ptr, ptr } %struct.data_s = type { i16, i16 } - define dso_local arm_aapcscc %struct.head_s* @search(%struct.head_s* readonly %list, %struct.data_s* nocapture readonly %info) local_unnamed_addr { + define dso_local arm_aapcscc ptr @search(ptr readonly %list, ptr nocapture readonly %info) local_unnamed_addr { entry: - %idx = getelementptr inbounds %struct.data_s, %struct.data_s* %info, i32 0, i32 1 - %tmp = load i16, i16* %idx, align 2 + %idx = getelementptr inbounds %struct.data_s, ptr %info, i32 0, i32 1 + %tmp = load i16, ptr %idx, align 2 %cmp = icmp sgt i16 %tmp, -1 br i1 %cmp, label %while.cond.preheader, label %while.cond9.preheader while.cond9.preheader: ; preds = %entry - %0 = icmp eq %struct.head_s* %list, null + %0 = icmp eq ptr %list, null br i1 %0, label %return, label %land.rhs11.lr.ph land.rhs11.lr.ph: ; preds = %while.cond9.preheader - %data16143 = bitcast %struct.data_s* %info to i16* - %tmp1 = load i16, i16* %data16143, align 2 + %data16143 = bitcast ptr %info to ptr + %tmp1 = load i16, ptr %data16143, align 2 %conv15 = sext i16 %tmp1 to i32 br label %land.rhs11 while.cond.preheader: ; preds = %entry - %1 = icmp eq %struct.head_s* %list, null + %1 = icmp eq ptr %list, null br i1 %1, label %return, label %land.rhs.preheader land.rhs.preheader: ; preds = %while.cond.preheader br label %land.rhs while.body: ; preds = %land.rhs - %next4 = bitcast %struct.head_s* %list.addr.033 to %struct.head_s** - %tmp4 = load %struct.head_s*, %struct.head_s** %next4, align 4 - %tobool = icmp eq %struct.head_s* %tmp4, null + %next4 = bitcast ptr %list.addr.033 to ptr + %tmp4 = load ptr, ptr %next4, align 4 + %tobool = icmp eq ptr %tmp4, null br i1 %tobool, label %return, label %land.rhs land.rhs: ; preds = %land.rhs.preheader, %while.body - %list.addr.033 = phi %struct.head_s* [ %tmp4, %while.body ], [ %list, %land.rhs.preheader ] - %info2 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.033, i32 0, i32 1 - %tmp2 = load %struct.data_s*, %struct.data_s** %info2, align 4 - %idx3 = getelementptr inbounds %struct.data_s, %struct.data_s* %tmp2, i32 0, i32 1 - %tmp3 = load i16, i16* %idx3, align 2 + %list.addr.033 = phi ptr [ %tmp4, %while.body ], [ %list, %land.rhs.preheader ] + %info2 = getelementptr inbounds %struct.head_s, ptr %list.addr.033, i32 0, i32 1 + %tmp2 = load ptr, ptr %info2, align 4 + %idx3 = getelementptr inbounds %struct.data_s, ptr %tmp2, i32 0, i32 1 + %tmp3 = load i16, ptr %idx3, align 2 %cmp7 = icmp eq i16 %tmp3, %tmp br i1 %cmp7, label %return, label %while.body while.body19: ; preds = %land.rhs11 - %next205 = bitcast %struct.head_s* %list.addr.136 to %struct.head_s** - %tmp8 = load %struct.head_s*, %struct.head_s** %next205, align 4 - %tobool10 = icmp eq %struct.head_s* %tmp8, null + %next205 = bitcast ptr %list.addr.136 to ptr + %tmp8 = load ptr, ptr %next205, align 4 + %tobool10 = icmp eq ptr %tmp8, null br i1 %tobool10, label %return, label %land.rhs11 land.rhs11: ; preds = %while.body19, %land.rhs11.lr.ph - %list.addr.136 = phi %struct.head_s* [ %list, %land.rhs11.lr.ph ], [ %tmp8, %while.body19 ] - %info12 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.136, i32 0, i32 1 - %tmp5 = load %struct.data_s*, %struct.data_s** %info12, align 4 - %data166 = bitcast %struct.data_s* %tmp5 to i16* - %tmp6 = load i16, i16* %data166, align 2 + %list.addr.136 = phi ptr [ %list, %land.rhs11.lr.ph ], [ %tmp8, %while.body19 ] + %info12 = getelementptr inbounds %struct.head_s, ptr %list.addr.136, i32 0, i32 1 + %tmp5 = load ptr, ptr %info12, align 4 + %data166 = bitcast ptr %tmp5 to ptr + %tmp6 = load i16, ptr %data166, align 2 %2 = and i16 %tmp6, 255 %and = zext i16 %2 to i32 %cmp16 = icmp eq i32 %and, %conv15 br i1 %cmp16, label %return, label %while.body19 return: ; preds = %land.rhs11, %while.body19, %land.rhs, %while.body, %while.cond.preheader, %while.cond9.preheader - %retval.0 = phi %struct.head_s* [ null, %while.cond.preheader ], [ null, %while.cond9.preheader ], [ null, %while.body ], [ %list.addr.033, %land.rhs ], [ null, %while.body19 ], [ %list.addr.136, %land.rhs11 ] - ret %struct.head_s* %retval.0 + %retval.0 = phi ptr [ null, %while.cond.preheader ], [ null, %while.cond9.preheader ], [ null, %while.body ], [ %list.addr.033, %land.rhs ], [ null, %while.body19 ], [ %list.addr.136, %land.rhs11 ] + ret ptr %retval.0 } ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec.mir index 3d53b0a..87694e3 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec.mir @@ -6,69 +6,69 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8.1m.main-unknown-unknown" - %struct.head_s = type { %struct.head_s*, %struct.data_s* } + %struct.head_s = type { ptr, ptr } %struct.data_s = type { i16, i16 } ; Function Attrs: norecurse nounwind readonly - define dso_local arm_aapcscc %struct.head_s* @search(%struct.head_s* readonly %list, %struct.data_s* nocapture readonly %info) local_unnamed_addr { + define dso_local arm_aapcscc ptr @search(ptr readonly %list, ptr nocapture readonly %info) local_unnamed_addr { entry: - %idx = getelementptr inbounds %struct.data_s, %struct.data_s* %info, i32 0, i32 1 - %0 = load i16, i16* %idx, align 2 + %idx = getelementptr inbounds %struct.data_s, ptr %info, i32 0, i32 1 + %0 = load i16, ptr %idx, align 2 %cmp = icmp sgt i16 %0, -1 br i1 %cmp, label %while.cond.preheader, label %while.cond9.preheader while.cond9.preheader: ; preds = %entry - %1 = icmp eq %struct.head_s* %list, null + %1 = icmp eq ptr %list, null br i1 %1, label %return, label %land.rhs11.lr.ph land.rhs11.lr.ph: ; preds = %while.cond9.preheader - %data16143 = bitcast %struct.data_s* %info to i16* - %2 = load i16, i16* %data16143, align 2 + %data16143 = bitcast ptr %info to ptr + %2 = load i16, ptr %data16143, align 2 %conv15 = sext i16 %2 to i32 br label %land.rhs11 while.cond.preheader: ; preds = %entry - %3 = icmp eq %struct.head_s* %list, null + %3 = icmp eq ptr %list, null br i1 %3, label %return, label %land.rhs.preheader land.rhs.preheader: ; preds = %while.cond.preheader br label %land.rhs land.rhs: ; preds = %land.rhs.preheader, %while.body - %list.addr.033 = phi %struct.head_s* [ %6, %while.body ], [ %list, %land.rhs.preheader ] - %info2 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.033, i32 0, i32 1 - %4 = load %struct.data_s*, %struct.data_s** %info2, align 4 - %idx3 = getelementptr inbounds %struct.data_s, %struct.data_s* %4, i32 0, i32 1 - %5 = load i16, i16* %idx3, align 2 + %list.addr.033 = phi ptr [ %6, %while.body ], [ %list, %land.rhs.preheader ] + %info2 = getelementptr inbounds %struct.head_s, ptr %list.addr.033, i32 0, i32 1 + %4 = load ptr, ptr %info2, align 4 + %idx3 = getelementptr inbounds %struct.data_s, ptr %4, i32 0, i32 1 + %5 = load i16, ptr %idx3, align 2 %cmp7 = icmp eq i16 %5, %0 br i1 %cmp7, label %return, label %while.body while.body: ; preds = %land.rhs - %next4 = bitcast %struct.head_s* %list.addr.033 to %struct.head_s** - %6 = load %struct.head_s*, %struct.head_s** %next4, align 4 - %tobool = icmp eq %struct.head_s* %6, null + %next4 = bitcast ptr %list.addr.033 to ptr + %6 = load ptr, ptr %next4, align 4 + %tobool = icmp eq ptr %6, null br i1 %tobool, label %return, label %land.rhs land.rhs11: ; preds = %while.body19, %land.rhs11.lr.ph - %list.addr.136 = phi %struct.head_s* [ %list, %land.rhs11.lr.ph ], [ %10, %while.body19 ] - %info12 = getelementptr inbounds %struct.head_s, %struct.head_s* %list.addr.136, i32 0, i32 1 - %7 = load %struct.data_s*, %struct.data_s** %info12, align 4 - %data165 = bitcast %struct.data_s* %7 to i16* - %8 = load i16, i16* %data165, align 2 + %list.addr.136 = phi ptr [ %list, %land.rhs11.lr.ph ], [ %10, %while.body19 ] + %info12 = getelementptr inbounds %struct.head_s, ptr %list.addr.136, i32 0, i32 1 + %7 = load ptr, ptr %info12, align 4 + %data165 = bitcast ptr %7 to ptr + %8 = load i16, ptr %data165, align 2 %9 = and i16 %8, 255 %and = zext i16 %9 to i32 %cmp16 = icmp eq i32 %and, %conv15 br i1 %cmp16, label %return, label %while.body19 while.body19: ; preds = %land.rhs11 - %next206 = bitcast %struct.head_s* %list.addr.136 to %struct.head_s** - %10 = load %struct.head_s*, %struct.head_s** %next206, align 4 - %tobool10 = icmp eq %struct.head_s* %10, null + %next206 = bitcast ptr %list.addr.136 to ptr + %10 = load ptr, ptr %next206, align 4 + %tobool10 = icmp eq ptr %10, null br i1 %tobool10, label %return, label %land.rhs11 return: ; preds = %while.body19, %land.rhs11, %while.body, %land.rhs, %while.cond.preheader, %while.cond9.preheader - %retval.0 = phi %struct.head_s* [ null, %while.cond.preheader ], [ null, %while.cond9.preheader ], [ %list.addr.033, %land.rhs ], [ null, %while.body ], [ %list.addr.136, %land.rhs11 ], [ null, %while.body19 ] - ret %struct.head_s* %retval.0 + %retval.0 = phi ptr [ null, %while.cond.preheader ], [ null, %while.cond9.preheader ], [ %list.addr.033, %land.rhs ], [ null, %while.body ], [ %list.addr.136, %land.rhs11 ], [ null, %while.body19 ] + ret ptr %retval.0 } ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-vpsel-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-vpsel-liveout.mir index 7d89865..e445598 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-vpsel-liveout.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-vpsel-liveout.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s --- | - define dso_local i32 @no_vpsel_liveout(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %N) local_unnamed_addr #0 { + define dso_local i32 @no_vpsel_liveout(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr #0 { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 3 @@ -18,22 +18,22 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv18 = phi ptr [ %scevgep19, %vector.body ], [ %b, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp13, %vector.body ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] - %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>* - %lsr.iv1820 = bitcast i16* %lsr.iv18 to <4 x i16>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr + %lsr.iv1820 = bitcast ptr %lsr.iv18 to ptr %tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7) %tmp9 = sub i32 %tmp7, 4 - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32> - %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp11 = sext <4 x i16> %wide.masked.load14 to <4 x i32> %tmp12 = mul nsw <4 x i32> %tmp11, %tmp10 %tmp13 = add <4 x i32> %tmp12, %vec.phi - %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 - %scevgep19 = getelementptr i16, i16* %lsr.iv18, i32 4 + %scevgep = getelementptr i16, ptr %lsr.iv, i32 4 + %scevgep19 = getelementptr i16, ptr %lsr.iv18, i32 4 %tmp14 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp15 = icmp ne i32 %tmp14, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -47,7 +47,7 @@ %res.0.lcssa = phi i32 [ 0, %entry ], [ %tmp16, %middle.block ] ret i32 %res.0.lcssa } - declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1 + declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) #1 declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2 declare i32 @llvm.start.loop.iterations.i32(i32) #3 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-load.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-load.mir index e0a4830..1401937 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-load.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-load.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve %s -run-pass=arm-low-overhead-loops -o - --verify-machineinstrs | FileCheck %s --- | - define dso_local arm_aapcs_vfpcc zeroext i8 @non_masked_load(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) { + define dso_local arm_aapcs_vfpcc zeroext i8 @non_masked_load(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) { entry: %cmp11 = icmp eq i32 %N, 0 %0 = add i32 %N, 15 @@ -20,21 +20,21 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv20 = phi i8* [ %scevgep21, %vector.body ], [ %b, %vector.ph ] - %lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv20 = phi ptr [ %scevgep21, %vector.body ], [ %b, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %vec.phi = phi <16 x i8> [ zeroinitializer, %vector.ph ], [ %13, %vector.body ] %8 = phi i32 [ %start, %vector.ph ], [ %14, %vector.body ] %9 = phi i32 [ %N, %vector.ph ], [ %11, %vector.body ] - %lsr.iv2022 = bitcast i8* %lsr.iv20 to <16 x i8>* - %lsr.iv19 = bitcast i8* %lsr.iv to <16 x i8>* + %lsr.iv2022 = bitcast ptr %lsr.iv20 to ptr + %lsr.iv19 = bitcast ptr %lsr.iv to ptr %10 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %9) %11 = sub i32 %9, 16 - %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv19, i32 1, <16 x i1> %10, <16 x i8> undef) - %wide.load16 = load <16 x i8>, <16 x i8>* %lsr.iv2022 + %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %lsr.iv19, i32 1, <16 x i1> %10, <16 x i8> undef) + %wide.load16 = load <16 x i8>, ptr %lsr.iv2022 %12 = add <16 x i8> %wide.masked.load, %vec.phi %13 = add <16 x i8> %12, %wide.load16 - %scevgep = getelementptr i8, i8* %lsr.iv, i32 16 - %scevgep21 = getelementptr i8, i8* %lsr.iv20, i32 16 + %scevgep = getelementptr i8, ptr %lsr.iv, i32 16 + %scevgep21 = getelementptr i8, ptr %lsr.iv20, i32 16 %14 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %8, i32 1) %15 = icmp ne i32 %14, 0 br i1 %15, label %vector.body, label %middle.block @@ -52,7 +52,7 @@ ret i8 %res.0.lcssa } - declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) #1 + declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32 immarg, <16 x i1>, <16 x i8>) #1 declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) #2 declare i32 @llvm.start.loop.iterations.i32(i32) #3 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-store.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-store.mir index 3ee066a..d0959c0 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-store.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-store.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s --- | - define dso_local arm_aapcs_vfpcc void @non_masked_store(i8* noalias nocapture %res, i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) { + define dso_local arm_aapcs_vfpcc void @non_masked_store(ptr noalias nocapture %res, ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) { entry: %cmp10 = icmp eq i32 %N, 0 %0 = add i32 %N, 15 @@ -18,23 +18,23 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv19 = phi i8* [ %scevgep20, %vector.body ], [ %res, %vector.ph ] - %lsr.iv16 = phi i8* [ %scevgep17, %vector.body ], [ %b, %vector.ph ] - %lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv19 = phi ptr [ %scevgep20, %vector.body ], [ %res, %vector.ph ] + %lsr.iv16 = phi ptr [ %scevgep17, %vector.body ], [ %b, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %6 = phi i32 [ %start, %vector.ph ], [ %11, %vector.body ] %7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ] - %lsr.iv1921 = bitcast i8* %lsr.iv19 to <16 x i8>* - %lsr.iv1618 = bitcast i8* %lsr.iv16 to <16 x i8>* - %lsr.iv15 = bitcast i8* %lsr.iv to <16 x i8>* + %lsr.iv1921 = bitcast ptr %lsr.iv19 to ptr + %lsr.iv1618 = bitcast ptr %lsr.iv16 to ptr + %lsr.iv15 = bitcast ptr %lsr.iv to ptr %8 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %7) %9 = sub i32 %7, 16 - %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv15, i32 1, <16 x i1> %8, <16 x i8> undef) - %wide.masked.load14 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv1618, i32 1, <16 x i1> %8, <16 x i8> undef) + %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %lsr.iv15, i32 1, <16 x i1> %8, <16 x i8> undef) + %wide.masked.load14 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %lsr.iv1618, i32 1, <16 x i1> %8, <16 x i8> undef) %10 = add <16 x i8> %wide.masked.load14, %wide.masked.load - store <16 x i8> %10, <16 x i8>* %lsr.iv1921 - %scevgep = getelementptr i8, i8* %lsr.iv, i32 16 - %scevgep17 = getelementptr i8, i8* %lsr.iv16, i32 16 - %scevgep20 = getelementptr i8, i8* %lsr.iv19, i32 16 + store <16 x i8> %10, ptr %lsr.iv1921 + %scevgep = getelementptr i8, ptr %lsr.iv, i32 16 + %scevgep17 = getelementptr i8, ptr %lsr.iv16, i32 16 + %scevgep20 = getelementptr i8, ptr %lsr.iv19, i32 16 %11 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1) %12 = icmp ne i32 %11, 0 br i1 %12, label %vector.body, label %for.cond.cleanup @@ -43,8 +43,8 @@ ret void } - declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) - declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32 immarg, <16 x i1>) + declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32 immarg, <16 x i1>, <16 x i8>) + declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32 immarg, <16 x i1>) declare i32 @llvm.start.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) declare <16 x i1> @llvm.arm.mve.vctp8(i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-invariant.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-invariant.mir index 31b7ee2..911e1d6 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-invariant.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-invariant.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s --- | - define dso_local <4 x i32> @invariant_predicated_add_use(i16* nocapture readonly %a, i32* %c, i32 %N, <4 x i32> %pass) #0 { + define dso_local <4 x i32> @invariant_predicated_add_use(ptr nocapture readonly %a, ptr %c, i32 %N, <4 x i32> %pass) #0 { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 3 @@ -19,15 +19,15 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] - %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr %tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7) %tmp9 = sub i32 %tmp7, 4 - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32> %acc.next = tail call <4 x i32> @llvm.arm.mve.add.predicated.v4i32.v4i1(<4 x i32> %pass, <4 x i32> %tmp10, <4 x i1> %tmp8, <4 x i32> undef) - %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 + %scevgep = getelementptr i16, ptr %lsr.iv, i32 4 %tmp11 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp12 = icmp ne i32 %tmp11, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -38,7 +38,7 @@ ret <4 x i32> %res } - declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) + declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) declare i32 @llvm.start.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) declare <4 x i1> @llvm.arm.mve.vctp32(i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll index c5d63a2..9194d78 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout-unknown-lanes.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -O3 -tail-predication=force-enabled-no-reductions %s -o - | FileCheck %s -define arm_aapcs_vfpcc <4 x float> @arm_max_no_idx_f32_mve(float* %pSrc, i32 %blockSize, float* nocapture %pResult) { +define arm_aapcs_vfpcc <4 x float> @arm_max_no_idx_f32_mve(ptr %pSrc, i32 %blockSize, ptr nocapture %pResult) { ; CHECK-LABEL: arm_max_no_idx_f32_mve: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -24,12 +24,12 @@ entry: do.body: ; preds = %do.body, %entry %blockSize.addr.0 = phi i32 [ %blockSize, %entry ], [ %sub, %do.body ] %curExtremValVec.0 = phi <4 x float> [ <float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000>, %entry ], [ %3, %do.body ] - %pSrc.addr.0 = phi float* [ %pSrc, %entry ], [ %add.ptr, %do.body ] + %pSrc.addr.0 = phi ptr [ %pSrc, %entry ], [ %add.ptr, %do.body ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blockSize.addr.0) - %1 = bitcast float* %pSrc.addr.0 to <4 x float>* - %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) + %1 = bitcast ptr %pSrc.addr.0 to ptr + %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) %3 = tail call fast <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> %2, <4 x float> %curExtremValVec.0, i32 0, <4 x i1> %0, <4 x float> %curExtremValVec.0) - %add.ptr = getelementptr inbounds float, float* %pSrc.addr.0, i32 4 + %add.ptr = getelementptr inbounds float, ptr %pSrc.addr.0, i32 4 %sub = add i32 %blockSize.addr.0, -4 %cmp = icmp sgt i32 %sub, 0 br i1 %cmp, label %do.body, label %do.end @@ -40,6 +40,6 @@ do.end: ; preds = %do.body declare <4 x i1> @llvm.arm.mve.vctp32(i32) -declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) +declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>) declare <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float>, <4 x float>, i32, <4 x i1>, <4 x float>) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout.mir index 13ba359..35cc7dd 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/predicated-liveout.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s --verify-machineinstrs -o - | FileCheck %s --- | - define i16 @predicated_livout(i8* %input_1_vect, i8* %input_2_vect, i32 %block_size) #0 { + define i16 @predicated_livout(ptr %input_1_vect, ptr %input_2_vect, i32 %block_size) #0 { entry: %rnd.up = add i32 %block_size, 7 %div = lshr i32 %rnd.up, 3 @@ -14,21 +14,21 @@ for.body: ; preds = %for.body.preheader, %for.body %lsr.iv = phi i32 [ 0, %for.body.preheader ], [ %lsr.iv.next, %for.body ] - %input_1_vect.addr.052 = phi i8* [ %add.ptr, %for.body ], [ %input_1_vect, %for.body.preheader ] - %input_2_vect.addr.051 = phi i8* [ %add.ptr14, %for.body ], [ %input_2_vect, %for.body.preheader ] + %input_1_vect.addr.052 = phi ptr [ %add.ptr, %for.body ], [ %input_1_vect, %for.body.preheader ] + %input_2_vect.addr.051 = phi ptr [ %add.ptr14, %for.body ], [ %input_2_vect, %for.body.preheader ] %num_elements.049 = phi i32 [ %sub, %for.body ], [ %block_size, %for.body.preheader ] %acc = phi <8 x i16> [ %acc.next, %for.body ], [ zeroinitializer, %for.body.preheader ] - %input_2_cast = bitcast i8* %input_2_vect.addr.051 to <8 x i8>* - %input_1_cast = bitcast i8* %input_1_vect.addr.052 to <8 x i8>* + %input_2_cast = bitcast ptr %input_2_vect.addr.051 to ptr + %input_1_cast = bitcast ptr %input_1_vect.addr.052 to ptr %pred = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %num_elements.049) - %load.1 = tail call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %input_1_cast, i32 1, <8 x i1> %pred, <8 x i8> undef) + %load.1 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %input_1_cast, i32 1, <8 x i1> %pred, <8 x i8> undef) %zext.load.1 = zext <8 x i8> %load.1 to <8 x i16> - %load.2 = tail call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %input_2_cast, i32 1, <8 x i1> %pred, <8 x i8> undef) + %load.2 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %input_2_cast, i32 1, <8 x i1> %pred, <8 x i8> undef) %zext.load.2 = zext <8 x i8> %load.2 to <8 x i16> %add = tail call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> %zext.load.1, <8 x i16> %zext.load.2, <8 x i1> %pred, <8 x i16> undef) %acc.next = tail call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> %add, <8 x i16> %acc, <8 x i1> %pred, <8 x i16> undef) - %add.ptr = getelementptr inbounds i8, i8* %input_1_vect.addr.052, i32 8 - %add.ptr14 = getelementptr inbounds i8, i8* %input_2_vect.addr.051, i32 8 + %add.ptr = getelementptr inbounds i8, ptr %input_1_vect.addr.052, i32 8 + %add.ptr14 = getelementptr inbounds i8, ptr %input_2_vect.addr.051, i32 8 %sub = add i32 %num_elements.049, -8 %iv.next = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv, i32 1) %cmp = icmp ne i32 %iv.next, 0 @@ -44,7 +44,7 @@ } declare <8 x i1> @llvm.arm.mve.vctp16(i32) #1 - declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32 immarg, <8 x i1>, <8 x i8>) #2 + declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32 immarg, <8 x i1>, <8 x i8>) #2 declare i1 @llvm.test.set.loop.iterations.i32(i32) #3 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3 declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) #4 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions-vpt-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions-vpt-liveout.mir index 4ac6c60..3ef1569 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions-vpt-liveout.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions-vpt-liveout.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s --- | - define dso_local arm_aapcs_vfpcc i32 @mul_var_i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) { + define dso_local arm_aapcs_vfpcc i32 @mul_var_i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) { entry: %cmp9.not = icmp eq i32 %N, 0 %0 = add i32 %N, 3 @@ -18,24 +18,24 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv14 = phi i8* [ %scevgep15, %vector.body ], [ %b, %vector.ph ] - %lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv14 = phi ptr [ %scevgep15, %vector.body ], [ %b, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %14, %vector.body ] %6 = phi i32 [ %start, %vector.ph ], [ %15, %vector.body ] %7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ] - %lsr.iv13 = bitcast i8* %lsr.iv to <4 x i8>* - %lsr.iv1416 = bitcast i8* %lsr.iv14 to <4 x i8>* + %lsr.iv13 = bitcast ptr %lsr.iv to ptr + %lsr.iv1416 = bitcast ptr %lsr.iv14 to ptr %8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7) %9 = sub i32 %7, 4 - %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %lsr.iv13, i32 1, <4 x i1> %8, <4 x i8> undef) + %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %lsr.iv13, i32 1, <4 x i1> %8, <4 x i8> undef) %10 = zext <4 x i8> %wide.masked.load to <4 x i32> - %wide.masked.load12 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %lsr.iv1416, i32 1, <4 x i1> %8, <4 x i8> undef) + %wide.masked.load12 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %lsr.iv1416, i32 1, <4 x i1> %8, <4 x i8> undef) %11 = zext <4 x i8> %wide.masked.load12 to <4 x i32> %12 = mul nuw nsw <4 x i32> %11, %10 %13 = select <4 x i1> %8, <4 x i32> %12, <4 x i32> zeroinitializer %14 = add <4 x i32> %vec.phi, %13 - %scevgep = getelementptr i8, i8* %lsr.iv, i32 4 - %scevgep15 = getelementptr i8, i8* %lsr.iv14, i32 4 + %scevgep = getelementptr i8, ptr %lsr.iv, i32 4 + %scevgep15 = getelementptr i8, ptr %lsr.iv14, i32 4 %15 = call i32 @llvm.loop.decrement.reg.i32(i32 %6, i32 1) %16 = icmp ne i32 %15, 0 br i1 %16, label %vector.body, label %middle.block @@ -49,7 +49,7 @@ ret i32 %res.0.lcssa } - define dso_local arm_aapcs_vfpcc i32 @add_var_i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) { + define dso_local arm_aapcs_vfpcc i32 @add_var_i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) { entry: %cmp10.not = icmp eq i32 %N, 0 %0 = add i32 %N, 3 @@ -65,24 +65,24 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv15 = phi i8* [ %scevgep16, %vector.body ], [ %b, %vector.ph ] - %lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv15 = phi ptr [ %scevgep16, %vector.body ], [ %b, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %14, %vector.body ] %6 = phi i32 [ %start, %vector.ph ], [ %15, %vector.body ] %7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ] - %lsr.iv14 = bitcast i8* %lsr.iv to <4 x i8>* - %lsr.iv1517 = bitcast i8* %lsr.iv15 to <4 x i8>* + %lsr.iv14 = bitcast ptr %lsr.iv to ptr + %lsr.iv1517 = bitcast ptr %lsr.iv15 to ptr %8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7) %9 = sub i32 %7, 4 - %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %lsr.iv14, i32 1, <4 x i1> %8, <4 x i8> undef) + %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %lsr.iv14, i32 1, <4 x i1> %8, <4 x i8> undef) %10 = zext <4 x i8> %wide.masked.load to <4 x i32> - %wide.masked.load13 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %lsr.iv1517, i32 1, <4 x i1> %8, <4 x i8> undef) + %wide.masked.load13 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %lsr.iv1517, i32 1, <4 x i1> %8, <4 x i8> undef) %11 = zext <4 x i8> %wide.masked.load13 to <4 x i32> %12 = add <4 x i32> %vec.phi, %10 %13 = add <4 x i32> %12, %11 %14 = select <4 x i1> %8, <4 x i32> %13, <4 x i32> %vec.phi - %scevgep = getelementptr i8, i8* %lsr.iv, i32 4 - %scevgep16 = getelementptr i8, i8* %lsr.iv15, i32 4 + %scevgep = getelementptr i8, ptr %lsr.iv, i32 4 + %scevgep16 = getelementptr i8, ptr %lsr.iv15, i32 4 %15 = call i32 @llvm.loop.decrement.reg.i32(i32 %6, i32 1) %16 = icmp ne i32 %15, 0 br i1 %16, label %vector.body, label %middle.block @@ -96,7 +96,7 @@ ret i32 %res.0.lcssa } - define dso_local arm_aapcs_vfpcc i32 @mul_var_i16(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %N) { + define dso_local arm_aapcs_vfpcc i32 @mul_var_i16(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) { entry: %cmp9.not = icmp eq i32 %N, 0 %0 = add i32 %N, 3 @@ -112,24 +112,24 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv14 = phi i16* [ %scevgep15, %vector.body ], [ %b, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv14 = phi ptr [ %scevgep15, %vector.body ], [ %b, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %14, %vector.body ] %6 = phi i32 [ %start, %vector.ph ], [ %15, %vector.body ] %7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ] - %lsr.iv13 = bitcast i16* %lsr.iv to <4 x i16>* - %lsr.iv1416 = bitcast i16* %lsr.iv14 to <4 x i16>* + %lsr.iv13 = bitcast ptr %lsr.iv to ptr + %lsr.iv1416 = bitcast ptr %lsr.iv14 to ptr %8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7) %9 = sub i32 %7, 4 - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv13, i32 2, <4 x i1> %8, <4 x i16> undef) + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv13, i32 2, <4 x i1> %8, <4 x i16> undef) %10 = sext <4 x i16> %wide.masked.load to <4 x i32> - %wide.masked.load12 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1416, i32 2, <4 x i1> %8, <4 x i16> undef) + %wide.masked.load12 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1416, i32 2, <4 x i1> %8, <4 x i16> undef) %11 = sext <4 x i16> %wide.masked.load12 to <4 x i32> %12 = mul nsw <4 x i32> %11, %10 %13 = select <4 x i1> %8, <4 x i32> %12, <4 x i32> zeroinitializer %14 = add <4 x i32> %vec.phi, %13 - %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 - %scevgep15 = getelementptr i16, i16* %lsr.iv14, i32 4 + %scevgep = getelementptr i16, ptr %lsr.iv, i32 4 + %scevgep15 = getelementptr i16, ptr %lsr.iv14, i32 4 %15 = call i32 @llvm.loop.decrement.reg.i32(i32 %6, i32 1) %16 = icmp ne i32 %15, 0 br i1 %16, label %vector.body, label %middle.block @@ -143,7 +143,7 @@ ret i32 %res.0.lcssa } - define dso_local arm_aapcs_vfpcc i32 @add_var_i16(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %N) { + define dso_local arm_aapcs_vfpcc i32 @add_var_i16(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) { entry: %cmp10.not = icmp eq i32 %N, 0 %0 = add i32 %N, 3 @@ -159,24 +159,24 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv15 = phi i16* [ %scevgep16, %vector.body ], [ %b, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv15 = phi ptr [ %scevgep16, %vector.body ], [ %b, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %14, %vector.body ] %6 = phi i32 [ %start, %vector.ph ], [ %15, %vector.body ] %7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ] - %lsr.iv14 = bitcast i16* %lsr.iv to <4 x i16>* - %lsr.iv1517 = bitcast i16* %lsr.iv15 to <4 x i16>* + %lsr.iv14 = bitcast ptr %lsr.iv to ptr + %lsr.iv1517 = bitcast ptr %lsr.iv15 to ptr %8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7) %9 = sub i32 %7, 4 - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv14, i32 2, <4 x i1> %8, <4 x i16> undef) + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv14, i32 2, <4 x i1> %8, <4 x i16> undef) %10 = sext <4 x i16> %wide.masked.load to <4 x i32> - %wide.masked.load13 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1517, i32 2, <4 x i1> %8, <4 x i16> undef) + %wide.masked.load13 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1517, i32 2, <4 x i1> %8, <4 x i16> undef) %11 = sext <4 x i16> %wide.masked.load13 to <4 x i32> %12 = add <4 x i32> %vec.phi, %10 %13 = add <4 x i32> %12, %11 %14 = select <4 x i1> %8, <4 x i32> %13, <4 x i32> %vec.phi - %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 - %scevgep16 = getelementptr i16, i16* %lsr.iv15, i32 4 + %scevgep = getelementptr i16, ptr %lsr.iv, i32 4 + %scevgep16 = getelementptr i16, ptr %lsr.iv15, i32 4 %15 = call i32 @llvm.loop.decrement.reg.i32(i32 %6, i32 1) %16 = icmp ne i32 %15, 0 br i1 %16, label %vector.body, label %middle.block @@ -191,7 +191,7 @@ } ; Function Attrs: norecurse nounwind readonly - define dso_local arm_aapcs_vfpcc i32 @mul_var_i32(i32* nocapture readonly %a, i32* nocapture readonly %b, i32 %N) local_unnamed_addr #0 { + define dso_local arm_aapcs_vfpcc i32 @mul_var_i32(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr #0 { entry: %cmp8.not = icmp eq i32 %N, 0 %0 = add i32 %N, 3 @@ -207,22 +207,22 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv13 = phi i32* [ %scevgep14, %vector.body ], [ %b, %vector.ph ] - %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv13 = phi ptr [ %scevgep14, %vector.body ], [ %b, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %12, %vector.body ] %6 = phi i32 [ %start, %vector.ph ], [ %13, %vector.body ] %7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ] - %lsr.iv12 = bitcast i32* %lsr.iv to <4 x i32>* - %lsr.iv1315 = bitcast i32* %lsr.iv13 to <4 x i32>* + %lsr.iv12 = bitcast ptr %lsr.iv to ptr + %lsr.iv1315 = bitcast ptr %lsr.iv13 to ptr %8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7) %9 = sub i32 %7, 4 - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv12, i32 4, <4 x i1> %8, <4 x i32> undef) - %wide.masked.load11 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1315, i32 4, <4 x i1> %8, <4 x i32> undef) + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv12, i32 4, <4 x i1> %8, <4 x i32> undef) + %wide.masked.load11 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv1315, i32 4, <4 x i1> %8, <4 x i32> undef) %10 = mul nsw <4 x i32> %wide.masked.load11, %wide.masked.load %11 = select <4 x i1> %8, <4 x i32> %10, <4 x i32> zeroinitializer %12 = add <4 x i32> %vec.phi, %11 - %scevgep = getelementptr i32, i32* %lsr.iv, i32 4 - %scevgep14 = getelementptr i32, i32* %lsr.iv13, i32 4 + %scevgep = getelementptr i32, ptr %lsr.iv, i32 4 + %scevgep14 = getelementptr i32, ptr %lsr.iv13, i32 4 %13 = call i32 @llvm.loop.decrement.reg.i32(i32 %6, i32 1) %14 = icmp ne i32 %13, 0 br i1 %14, label %vector.body, label %middle.block @@ -237,7 +237,7 @@ } ; Function Attrs: norecurse nounwind readonly - define dso_local arm_aapcs_vfpcc i32 @add_var_i32(i32* nocapture readonly %a, i32* nocapture readonly %b, i32 %N) local_unnamed_addr #0 { + define dso_local arm_aapcs_vfpcc i32 @add_var_i32(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr #0 { entry: %cmp9.not = icmp eq i32 %N, 0 %0 = add i32 %N, 3 @@ -253,22 +253,22 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv14 = phi i32* [ %scevgep15, %vector.body ], [ %b, %vector.ph ] - %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv14 = phi ptr [ %scevgep15, %vector.body ], [ %b, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %12, %vector.body ] %6 = phi i32 [ %start, %vector.ph ], [ %13, %vector.body ] %7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ] - %lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>* - %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* + %lsr.iv13 = bitcast ptr %lsr.iv to ptr + %lsr.iv1416 = bitcast ptr %lsr.iv14 to ptr %8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7) %9 = sub i32 %7, 4 - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv13, i32 4, <4 x i1> %8, <4 x i32> undef) - %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1416, i32 4, <4 x i1> %8, <4 x i32> undef) + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv13, i32 4, <4 x i1> %8, <4 x i32> undef) + %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv1416, i32 4, <4 x i1> %8, <4 x i32> undef) %10 = add <4 x i32> %wide.masked.load, %vec.phi %11 = add <4 x i32> %10, %wide.masked.load12 %12 = select <4 x i1> %8, <4 x i32> %11, <4 x i32> %vec.phi - %scevgep = getelementptr i32, i32* %lsr.iv, i32 4 - %scevgep15 = getelementptr i32, i32* %lsr.iv14, i32 4 + %scevgep = getelementptr i32, ptr %lsr.iv, i32 4 + %scevgep15 = getelementptr i32, ptr %lsr.iv14, i32 4 %13 = call i32 @llvm.loop.decrement.reg.i32(i32 %6, i32 1) %14 = icmp ne i32 %13, 0 br i1 %14, label %vector.body, label %middle.block @@ -282,9 +282,9 @@ ret i32 %res.0.lcssa } - declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32 immarg, <4 x i1>, <4 x i8>) - declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) - declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) + declare <4 x i8> @llvm.masked.load.v4i8.p0(ptr, i32 immarg, <4 x i1>, <4 x i8>) + declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) + declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) declare i32 @llvm.start.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32(i32, i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll index 93119ea..3c4af10 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled %s -o - | FileCheck %s -define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_add_add_v16i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) local_unnamed_addr { +define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_add_add_v16i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr { ; CHECK-LABEL: one_loop_add_add_v16i8: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r7, lr} @@ -35,13 +35,13 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <16 x i8> [ zeroinitializer, %vector.ph ], [ %i5, %vector.body ] - %i = getelementptr inbounds i8, i8* %a, i32 %index + %i = getelementptr inbounds i8, ptr %a, i32 %index %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %N) - %i1 = bitcast i8* %i to <16 x i8>* - %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %i1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) - %i2 = getelementptr inbounds i8, i8* %b, i32 %index - %i3 = bitcast i8* %i2 to <16 x i8>* - %wide.masked.load16 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %i3, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) + %i1 = bitcast ptr %i to ptr + %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %i1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) + %i2 = getelementptr inbounds i8, ptr %b, i32 %index + %i3 = bitcast ptr %i2 to ptr + %wide.masked.load16 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %i3, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) %i4 = add <16 x i8> %wide.masked.load, %wide.masked.load16 %i5 = select <16 x i1> %active.lane.mask, <16 x i8> %i4, <16 x i8> %vec.phi %i6 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %i5) @@ -57,7 +57,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr ret i8 %res.0.lcssa } -define dso_local arm_aapcs_vfpcc signext i16 @one_loop_add_add_v8i16(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) local_unnamed_addr { +define dso_local arm_aapcs_vfpcc signext i16 @one_loop_add_add_v8i16(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr { ; CHECK-LABEL: one_loop_add_add_v8i16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: cmp r2, #0 @@ -103,14 +103,14 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <8 x i16> [ zeroinitializer, %vector.ph ], [ %i7, %vector.body ] - %i = getelementptr inbounds i8, i8* %a, i32 %index + %i = getelementptr inbounds i8, ptr %a, i32 %index %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N) - %i1 = bitcast i8* %i to <8 x i8>* - %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %i1, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef) + %i1 = bitcast ptr %i to ptr + %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %i1, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef) %i2 = zext <8 x i8> %wide.masked.load to <8 x i16> - %i3 = getelementptr inbounds i8, i8* %b, i32 %index - %i4 = bitcast i8* %i3 to <8 x i8>* - %wide.masked.load17 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %i4, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef) + %i3 = getelementptr inbounds i8, ptr %b, i32 %index + %i4 = bitcast ptr %i3 to ptr + %wide.masked.load17 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %i4, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef) %i5 = zext <8 x i8> %wide.masked.load17 to <8 x i16> %i6 = add <8 x i16> %vec.phi, %i2 %i7 = add <8 x i16> %i6, %i5 @@ -128,7 +128,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr ret i16 %res.0.lcssa } -define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_sub_add_v16i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) local_unnamed_addr { +define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_sub_add_v16i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr { ; CHECK-LABEL: one_loop_sub_add_v16i8: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: cmp r2, #0 @@ -174,13 +174,13 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <16 x i8> [ zeroinitializer, %vector.ph ], [ %i5, %vector.body ] - %i = getelementptr inbounds i8, i8* %a, i32 %index + %i = getelementptr inbounds i8, ptr %a, i32 %index %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %N) - %i1 = bitcast i8* %i to <16 x i8>* - %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %i1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) - %i2 = getelementptr inbounds i8, i8* %b, i32 %index - %i3 = bitcast i8* %i2 to <16 x i8>* - %wide.masked.load16 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %i3, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) + %i1 = bitcast ptr %i to ptr + %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %i1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) + %i2 = getelementptr inbounds i8, ptr %b, i32 %index + %i3 = bitcast ptr %i2 to ptr + %wide.masked.load16 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %i3, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) %i4 = sub <16 x i8> %wide.masked.load, %wide.masked.load16 %i5 = add <16 x i8> %i4, %vec.phi %index.next = add i32 %index, 16 @@ -197,7 +197,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr ret i8 %res.0.lcssa } -define dso_local arm_aapcs_vfpcc signext i16 @one_loop_sub_add_v8i16(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) local_unnamed_addr { +define dso_local arm_aapcs_vfpcc signext i16 @one_loop_sub_add_v8i16(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr { ; CHECK-LABEL: one_loop_sub_add_v8i16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: cmp r2, #0 @@ -243,14 +243,14 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <8 x i16> [ zeroinitializer, %vector.ph ], [ %i7, %vector.body ] - %i = getelementptr inbounds i8, i8* %a, i32 %index + %i = getelementptr inbounds i8, ptr %a, i32 %index %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N) - %i1 = bitcast i8* %i to <8 x i8>* - %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %i1, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef) + %i1 = bitcast ptr %i to ptr + %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %i1, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef) %i2 = zext <8 x i8> %wide.masked.load to <8 x i16> - %i3 = getelementptr inbounds i8, i8* %b, i32 %index - %i4 = bitcast i8* %i3 to <8 x i8>* - %wide.masked.load17 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %i4, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef) + %i3 = getelementptr inbounds i8, ptr %b, i32 %index + %i4 = bitcast ptr %i3 to ptr + %wide.masked.load17 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %i4, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef) %i5 = zext <8 x i8> %wide.masked.load17 to <8 x i16> %i6 = sub <8 x i16> %i5, %i2 %i7 = add <8 x i16> %i6, %vec.phi @@ -268,7 +268,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr ret i16 %res.0.lcssa } -define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_mul_add_v16i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) local_unnamed_addr { +define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_mul_add_v16i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr { ; CHECK-LABEL: one_loop_mul_add_v16i8: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: cmp r2, #0 @@ -314,13 +314,13 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <16 x i8> [ zeroinitializer, %vector.ph ], [ %i5, %vector.body ] - %i = getelementptr inbounds i8, i8* %a, i32 %index + %i = getelementptr inbounds i8, ptr %a, i32 %index %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %N) - %i1 = bitcast i8* %i to <16 x i8>* - %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %i1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) - %i2 = getelementptr inbounds i8, i8* %b, i32 %index - %i3 = bitcast i8* %i2 to <16 x i8>* - %wide.masked.load15 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %i3, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) + %i1 = bitcast ptr %i to ptr + %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %i1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) + %i2 = getelementptr inbounds i8, ptr %b, i32 %index + %i3 = bitcast ptr %i2 to ptr + %wide.masked.load15 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %i3, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) %i4 = mul <16 x i8> %wide.masked.load15, %wide.masked.load %i5 = add <16 x i8> %i4, %vec.phi %index.next = add i32 %index, 16 @@ -337,7 +337,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr ret i8 %res.0.lcssa } -define dso_local arm_aapcs_vfpcc signext i16 @one_loop_mul_add_v8i16(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) local_unnamed_addr { +define dso_local arm_aapcs_vfpcc signext i16 @one_loop_mul_add_v8i16(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr { ; CHECK-LABEL: one_loop_mul_add_v8i16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: cmp r2, #0 @@ -383,14 +383,14 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <8 x i16> [ zeroinitializer, %vector.ph ], [ %i7, %vector.body ] - %i = getelementptr inbounds i8, i8* %a, i32 %index + %i = getelementptr inbounds i8, ptr %a, i32 %index %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N) - %i1 = bitcast i8* %i to <8 x i8>* - %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %i1, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef) + %i1 = bitcast ptr %i to ptr + %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %i1, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef) %i2 = zext <8 x i8> %wide.masked.load to <8 x i16> - %i3 = getelementptr inbounds i8, i8* %b, i32 %index - %i4 = bitcast i8* %i3 to <8 x i8>* - %wide.masked.load17 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %i4, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef) + %i3 = getelementptr inbounds i8, ptr %b, i32 %index + %i4 = bitcast ptr %i3 to ptr + %wide.masked.load17 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %i4, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef) %i5 = zext <8 x i8> %wide.masked.load17 to <8 x i16> %i6 = mul <8 x i16> %i5, %i2 %i7 = add <8 x i16> %i6, %vec.phi @@ -408,7 +408,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr ret i16 %res.0.lcssa } -define dso_local arm_aapcs_vfpcc i32 @two_loops_mul_add_v4i32(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) local_unnamed_addr { +define dso_local arm_aapcs_vfpcc i32 @two_loops_mul_add_v4i32(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr { ; CHECK-LABEL: two_loops_mul_add_v4i32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: cmp r2, #0 @@ -479,14 +479,14 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %i7, %vector.body ] - %i = getelementptr inbounds i8, i8* %a, i32 %index + %i = getelementptr inbounds i8, ptr %a, i32 %index %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %i1 = bitcast i8* %i to <4 x i8>* - %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %i1, i32 1, <4 x i1> %active.lane.mask, <4 x i8> undef) + %i1 = bitcast ptr %i to ptr + %wide.masked.load = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %i1, i32 1, <4 x i1> %active.lane.mask, <4 x i8> undef) %i2 = zext <4 x i8> %wide.masked.load to <4 x i32> - %i3 = getelementptr inbounds i8, i8* %b, i32 %index - %i4 = bitcast i8* %i3 to <4 x i8>* - %wide.masked.load43 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %i4, i32 1, <4 x i1> %active.lane.mask, <4 x i8> undef) + %i3 = getelementptr inbounds i8, ptr %b, i32 %index + %i4 = bitcast ptr %i3 to ptr + %wide.masked.load43 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %i4, i32 1, <4 x i1> %active.lane.mask, <4 x i8> undef) %i5 = zext <4 x i8> %wide.masked.load43 to <4 x i32> %i6 = mul nuw nsw <4 x i32> %i5, %i2 %i7 = add <4 x i32> %i6, %vec.phi @@ -508,14 +508,14 @@ vector.ph47: ; preds = %middle.block vector.body46: ; preds = %vector.body46, %vector.ph47 %index51 = phi i32 [ 0, %vector.ph47 ], [ %index.next52, %vector.body46 ] %vec.phi60 = phi <4 x i32> [ %i11, %vector.ph47 ], [ %i19, %vector.body46 ] - %i12 = getelementptr inbounds i8, i8* %a, i32 %index51 + %i12 = getelementptr inbounds i8, ptr %a, i32 %index51 %active.lane.mask61 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index51, i32 %N) - %i13 = bitcast i8* %i12 to <4 x i8>* - %wide.masked.load62 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %i13, i32 1, <4 x i1> %active.lane.mask61, <4 x i8> undef) + %i13 = bitcast ptr %i12 to ptr + %wide.masked.load62 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %i13, i32 1, <4 x i1> %active.lane.mask61, <4 x i8> undef) %i14 = zext <4 x i8> %wide.masked.load62 to <4 x i32> - %i15 = getelementptr inbounds i8, i8* %b, i32 %index51 - %i16 = bitcast i8* %i15 to <4 x i8>* - %wide.masked.load63 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %i16, i32 1, <4 x i1> %active.lane.mask61, <4 x i8> undef) + %i15 = getelementptr inbounds i8, ptr %b, i32 %index51 + %i16 = bitcast ptr %i15 to ptr + %wide.masked.load63 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %i16, i32 1, <4 x i1> %active.lane.mask61, <4 x i8> undef) %i17 = zext <4 x i8> %wide.masked.load63 to <4 x i32> %i18 = mul nuw nsw <4 x i32> %i17, %i14 %i19 = add <4 x i32> %i18, %vec.phi60 @@ -533,7 +533,7 @@ for.cond.cleanup7: ; preds = %middle.block44, %mi ret i32 %res.1.lcssa } -define dso_local arm_aapcs_vfpcc void @two_reductions_mul_add_v8i16(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) local_unnamed_addr { +define dso_local arm_aapcs_vfpcc void @two_reductions_mul_add_v8i16(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr { ; CHECK-LABEL: two_reductions_mul_add_v8i16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r4, lr} @@ -591,14 +591,14 @@ vector.body: ; preds = %vector.body, %vecto %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <8 x i16> [ zeroinitializer, %vector.ph ], [ %i8, %vector.body ] %vec.phi.1 = phi <8 x i16> [ zeroinitializer, %vector.ph ], [ %i9, %vector.body ] - %i = getelementptr inbounds i8, i8* %a, i32 %index + %i = getelementptr inbounds i8, ptr %a, i32 %index %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N) - %i1 = bitcast i8* %i to <8 x i8>* - %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %i1, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef) + %i1 = bitcast ptr %i to ptr + %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %i1, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef) %i2 = zext <8 x i8> %wide.masked.load to <8 x i16> - %i3 = getelementptr inbounds i8, i8* %b, i32 %index - %i4 = bitcast i8* %i3 to <8 x i8>* - %wide.masked.load17 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %i4, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef) + %i3 = getelementptr inbounds i8, ptr %b, i32 %index + %i4 = bitcast ptr %i3 to ptr + %wide.masked.load17 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %i4, i32 1, <8 x i1> %active.lane.mask, <8 x i8> undef) %i5 = zext <8 x i8> %wide.masked.load17 to <8 x i16> %i6 = mul <8 x i16> %i5, %i2 %i7 = sub <8 x i16> %i5, %i2 @@ -619,15 +619,15 @@ for.cond.cleanup: ; preds = %middle.block, %entr %res.0.lcssa = phi i16 [ 0, %entry ], [ %i12, %middle.block ] %res.1.lcssa = phi i16 [ 0, %entry ], [ %i14, %middle.block ] %trunc.res.0 = trunc i16 %res.0.lcssa to i8 - store i8 %trunc.res.0, i8* %a + store i8 %trunc.res.0, ptr %a %trunc.res.1 = trunc i16 %res.1.lcssa to i8 - store i8 %trunc.res.1, i8* %b + store i8 %trunc.res.1, ptr %b ret void } %struct.date = type { i32, i32, i32, i32 } @days = internal unnamed_addr constant [2 x [13 x i32]] [[13 x i32] [i32 0, i32 31, i32 28, i32 31, i32 30, i32 31, i32 30, i32 31, i32 31, i32 30, i32 31, i32 30, i32 31], [13 x i32] [i32 0, i32 31, i32 29, i32 31, i32 30, i32 31, i32 30, i32 31, i32 31, i32 30, i32 31, i32 30, i32 31]], align 4 -define i32 @wrongop(%struct.date* nocapture readonly %pd) { +define i32 @wrongop(ptr nocapture readonly %pd) { ; CHECK-LABEL: wrongop: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r4, lr} @@ -682,10 +682,10 @@ define i32 @wrongop(%struct.date* nocapture readonly %pd) { ; CHECK-NEXT: vaddv.u32 r0, q0 ; CHECK-NEXT: pop {r4, pc} entry: - %day1 = getelementptr inbounds %struct.date, %struct.date* %pd, i32 0, i32 0 - %0 = load i32, i32* %day1, align 4 - %year = getelementptr inbounds %struct.date, %struct.date* %pd, i32 0, i32 2 - %1 = load i32, i32* %year, align 4 + %day1 = getelementptr inbounds %struct.date, ptr %pd, i32 0, i32 0 + %0 = load i32, ptr %day1, align 4 + %year = getelementptr inbounds %struct.date, ptr %pd, i32 0, i32 2 + %1 = load i32, ptr %year, align 4 %2 = and i32 %1, 3 %cmp = icmp ne i32 %2, 0 %rem3 = srem i32 %1, 100 @@ -701,8 +701,8 @@ lor.rhs: ; preds = %entry lor.end: ; preds = %entry, %lor.rhs %3 = phi i32 [ %phi.cast, %lor.rhs ], [ 1, %entry ] - %month = getelementptr inbounds %struct.date, %struct.date* %pd, i32 0, i32 1 - %4 = load i32, i32* %month, align 4 + %month = getelementptr inbounds %struct.date, ptr %pd, i32 0, i32 1 + %4 = load i32, ptr %month, align 4 %cmp820 = icmp sgt i32 %4, 0 br i1 %cmp820, label %vector.ph, label %for.end @@ -715,10 +715,10 @@ vector.ph: ; preds = %lor.end vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ %5, %vector.ph ], [ %8, %vector.body ] - %6 = getelementptr inbounds [2 x [13 x i32]], [2 x [13 x i32]]* @days, i32 0, i32 %3, i32 %index + %6 = getelementptr inbounds [2 x [13 x i32]], ptr @days, i32 0, i32 %3, i32 %index %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %4) - %7 = bitcast i32* %6 to <4 x i32>* - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* nonnull %7, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %7 = bitcast ptr %6 to ptr + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr nonnull %7, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %8 = add <4 x i32> %wide.masked.load, %vec.phi %index.next = add i32 %index, 4 %9 = icmp eq i32 %index.next, %n.vec @@ -734,13 +734,13 @@ for.end: ; preds = %middle.block, %lor. ret i32 %day.0.lcssa } -declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) +declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32) -declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) +declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32 immarg, <16 x i1>, <16 x i8>) declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32) -declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32 immarg, <8 x i1>, <8 x i8>) +declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32 immarg, <8 x i1>, <8 x i8>) declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) -declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32 immarg, <4 x i1>, <4 x i8>) +declare <4 x i8> @llvm.masked.load.v4i8.p0(ptr, i32 immarg, <4 x i1>, <4 x i8>) declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-header.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-header.mir index 616c16b..eec1c39 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-header.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-header.mir @@ -13,7 +13,7 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8.1m.main" - define void @header_not_target_unrolled_loop(i32* nocapture %v, i32 %n) { + define void @header_not_target_unrolled_loop(ptr nocapture %v, i32 %n) { entry: %cmp56 = icmp sgt i32 %n, 1 br i1 %cmp56, label %for.cond1.preheader.preheader, label %for.end20 @@ -36,13 +36,13 @@ for.cond4.preheader.preheader: ; preds = %for.cond1.preheader %tmp2 = mul i32 %gap.057, -4 %tmp6 = mul i32 %gap.057, -2 - %scevgep1 = getelementptr i32, i32* %v, i32 %gap.057 + %scevgep1 = getelementptr i32, ptr %v, i32 %gap.057 %0 = shl i32 %gap.057, 2 br label %for.cond4.preheader for.cond4.preheader: ; preds = %for.inc16, %for.cond4.preheader.preheader - %lsr.iv2 = phi i32* [ %scevgep3, %for.inc16 ], [ %scevgep1, %for.cond4.preheader.preheader ] - %lsr.iv = phi i32* [ %v, %for.cond4.preheader.preheader ], [ %scevgep, %for.inc16 ] + %lsr.iv2 = phi ptr [ %scevgep3, %for.inc16 ], [ %scevgep1, %for.cond4.preheader.preheader ] + %lsr.iv = phi ptr [ %v, %for.cond4.preheader.preheader ], [ %scevgep, %for.inc16 ] %i.053 = phi i32 [ %inc, %for.inc16 ], [ %gap.057, %for.cond4.preheader.preheader ] %tmp8 = phi i32 [ %start, %for.cond4.preheader.preheader ], [ %tmp16, %for.inc16 ] %j.048 = sub nsw i32 %i.053, %gap.057 @@ -55,26 +55,26 @@ land.rhs: ; preds = %land.rhs.preheader, %for.body8 %lsr.iv4 = phi i32 [ %lsr.iv.next, %for.body8 ], [ 0, %land.rhs.preheader ] %j.051 = phi i32 [ %j.0, %for.body8 ], [ %j.048, %land.rhs.preheader ] - %1 = bitcast i32* %lsr.iv2 to i8* - %2 = bitcast i32* %lsr.iv to i8* - %uglygep3 = getelementptr i8, i8* %2, i32 %lsr.iv4 - %uglygep34 = bitcast i8* %uglygep3 to i32* - %tmp9 = load i32, i32* %uglygep34, align 4 - %uglygep1 = getelementptr i8, i8* %1, i32 %lsr.iv4 - %uglygep12 = bitcast i8* %uglygep1 to i32* - %tmp12 = load i32, i32* %uglygep12, align 4 + %1 = bitcast ptr %lsr.iv2 to ptr + %2 = bitcast ptr %lsr.iv to ptr + %uglygep3 = getelementptr i8, ptr %2, i32 %lsr.iv4 + %uglygep34 = bitcast ptr %uglygep3 to ptr + %tmp9 = load i32, ptr %uglygep34, align 4 + %uglygep1 = getelementptr i8, ptr %1, i32 %lsr.iv4 + %uglygep12 = bitcast ptr %uglygep1 to ptr + %tmp12 = load i32, ptr %uglygep12, align 4 %cmp7 = icmp sgt i32 %tmp9, %tmp12 br i1 %cmp7, label %for.body8, label %for.inc16 for.body8: ; preds = %land.rhs - %3 = bitcast i32* %lsr.iv2 to i8* - %4 = bitcast i32* %lsr.iv to i8* - %sunkaddr = getelementptr i8, i8* %4, i32 %lsr.iv4 - %5 = bitcast i8* %sunkaddr to i32* - store i32 %tmp12, i32* %5, align 4 - %uglygep = getelementptr i8, i8* %3, i32 %lsr.iv4 - %uglygep6 = bitcast i8* %uglygep to i32* - store i32 %tmp9, i32* %uglygep6, align 4 + %3 = bitcast ptr %lsr.iv2 to ptr + %4 = bitcast ptr %lsr.iv to ptr + %sunkaddr = getelementptr i8, ptr %4, i32 %lsr.iv4 + %5 = bitcast ptr %sunkaddr to ptr + store i32 %tmp12, ptr %5, align 4 + %uglygep = getelementptr i8, ptr %3, i32 %lsr.iv4 + %uglygep6 = bitcast ptr %uglygep to ptr + store i32 %tmp9, ptr %uglygep6, align 4 %j.0 = sub nsw i32 %j.051, %gap.057 %lsr.iv.next = add i32 %lsr.iv4, %0 %cmp5 = icmp sgt i32 %j.0, -1 @@ -82,10 +82,10 @@ for.inc16: ; preds = %for.body8, %land.rhs, %for.cond4.preheader %inc = add nsw i32 %i.053, 1 - %scevgep = getelementptr i32, i32* %lsr.iv, i32 1 + %scevgep = getelementptr i32, ptr %lsr.iv, i32 1 %tmp16 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %tmp8, i32 1) %tmp17 = icmp ne i32 %tmp16, 0 - %scevgep3 = getelementptr i32, i32* %lsr.iv2, i32 1 + %scevgep3 = getelementptr i32, ptr %lsr.iv2, i32 1 br i1 %tmp17, label %for.cond4.preheader, label %for.cond.loopexit for.end20: ; preds = %for.cond.loopexit, %entry @@ -99,7 +99,7 @@ declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0 ; Function Attrs: nounwind - declare void @llvm.stackprotector(i8*, i8**) #1 + declare void @llvm.stackprotector(ptr, ptr) #1 attributes #0 = { noduplicate nounwind } attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-debug.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-debug.mir index 5966df9..12bc894 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-debug.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-debug.mir @@ -1,12 +1,12 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s --- | - define dso_local arm_aapcs_vfpcc void @skip_debug(i32* nocapture %a, i16* nocapture readonly %b, i32 %N) !dbg !8 { + define dso_local arm_aapcs_vfpcc void @skip_debug(ptr nocapture %a, ptr nocapture readonly %b, i32 %N) !dbg !8 { entry: - call void @llvm.dbg.value(metadata i32* %a, metadata !17, metadata !DIExpression()), !dbg !23 - call void @llvm.dbg.value(metadata i16* %b, metadata !18, metadata !DIExpression()), !dbg !23 + call void @llvm.dbg.value(metadata ptr %a, metadata !17, metadata !DIExpression()), !dbg !23 + call void @llvm.dbg.value(metadata ptr %b, metadata !18, metadata !DIExpression()), !dbg !23 call void @llvm.dbg.value(metadata i32 %N, metadata !19, metadata !DIExpression()), !dbg !23 - %0 = load i32, i32* %a, align 4, !dbg !24 + %0 = load i32, ptr %a, align 4, !dbg !24 call void @llvm.dbg.value(metadata i32 %0, metadata !20, metadata !DIExpression()), !dbg !23 call void @llvm.dbg.value(metadata i32 0, metadata !21, metadata !DIExpression()), !dbg !29 %cmp7 = icmp eq i32 %N, 0, !dbg !30 @@ -26,17 +26,17 @@ br label %vector.body, !dbg !32 vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %b, %vector.ph ], !dbg !33 + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %b, %vector.ph ], !dbg !33 %vec.phi = phi <4 x i32> [ %7, %vector.ph ], [ %15, %vector.body ] %10 = phi i32 [ %start, %vector.ph ], [ %16, %vector.body ] %11 = phi i32 [ %N, %vector.ph ], [ %13, %vector.body ] - %lsr.iv14 = bitcast i16* %lsr.iv to <4 x i16>* + %lsr.iv14 = bitcast ptr %lsr.iv to ptr %12 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %11), !dbg !34 %13 = sub i32 %11, 4, !dbg !34 - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv14, i32 2, <4 x i1> %12, <4 x i16> undef), !dbg !34 + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv14, i32 2, <4 x i1> %12, <4 x i16> undef), !dbg !34 %14 = sext <4 x i16> %wide.masked.load to <4 x i32>, !dbg !34 %15 = sub <4 x i32> %vec.phi, %14, !dbg !38 - %scevgep = getelementptr i16, i16* %lsr.iv, i32 4, !dbg !33 + %scevgep = getelementptr i16, ptr %lsr.iv, i32 4, !dbg !33 %16 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %10, i32 1), !dbg !33 %17 = icmp ne i32 %16, 0, !dbg !33 br i1 %17, label %vector.body, label %middle.block, !dbg !33 @@ -52,12 +52,12 @@ for.cond.cleanup: ; preds = %middle.block, %entry %temp.0.lcssa = phi i32 [ %0, %entry ], [ %20, %middle.block ], !dbg !23 call void @llvm.dbg.value(metadata i32 %temp.0.lcssa, metadata !20, metadata !DIExpression()), !dbg !23 - store i32 %temp.0.lcssa, i32* %a, align 4, !dbg !42 + store i32 %temp.0.lcssa, ptr %a, align 4, !dbg !42 ret void, !dbg !43 } declare void @llvm.dbg.value(metadata, metadata, metadata) - declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) + declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) declare i32 @llvm.start.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-vpt-debug.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-vpt-debug.mir index 1bfe8f9..6c9cd15 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-vpt-debug.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/skip-vpt-debug.mir @@ -7,11 +7,11 @@ target triple = "thumbv8.1m.main-arm-none-eabihf" ; Function Attrs: nofree norecurse nounwind optsize - define hidden void @arm_max_no_idx_f32(float* nocapture readonly %pSrc, i32 %blockSize, float* nocapture %pResult) local_unnamed_addr #0 !dbg !13 { + define hidden void @arm_max_no_idx_f32(ptr nocapture readonly %pSrc, i32 %blockSize, ptr nocapture %pResult) local_unnamed_addr #0 !dbg !13 { entry: - call void @llvm.dbg.value(metadata float* %pSrc, metadata !24, metadata !DIExpression()), !dbg !29 + call void @llvm.dbg.value(metadata ptr %pSrc, metadata !24, metadata !DIExpression()), !dbg !29 call void @llvm.dbg.value(metadata i32 %blockSize, metadata !25, metadata !DIExpression()), !dbg !29 - call void @llvm.dbg.value(metadata float* %pResult, metadata !26, metadata !DIExpression()), !dbg !29 + call void @llvm.dbg.value(metadata ptr %pResult, metadata !26, metadata !DIExpression()), !dbg !29 call void @llvm.dbg.value(metadata float 0x3810000000000000, metadata !27, metadata !DIExpression()), !dbg !29 %cmp.not7 = icmp eq i32 %blockSize, 0, !dbg !30 br i1 %cmp.not7, label %while.end, label %vector.ph, !dbg !31 @@ -26,18 +26,18 @@ br label %vector.body, !dbg !31 vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv1 = phi float* [ %scevgep, %vector.body ], [ %pSrc, %vector.ph ] + %lsr.iv1 = phi ptr [ %scevgep, %vector.body ], [ %pSrc, %vector.ph ] %vec.phi = phi <4 x float> [ <float 0x3810000000000000, float 0x3810000000000000, float 0x3810000000000000, float 0x3810000000000000>, %vector.ph ], [ %10, %vector.body ] %4 = phi i32 [ %3, %vector.ph ], [ %11, %vector.body ] %5 = phi i32 [ %blockSize, %vector.ph ], [ %7, %vector.body ] - %lsr.iv12 = bitcast float* %lsr.iv1 to <4 x float>* + %lsr.iv12 = bitcast ptr %lsr.iv1 to ptr %6 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %5) %7 = sub i32 %5, 4 - %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %lsr.iv12, i32 4, <4 x i1> %6, <4 x float> poison), !dbg !32, !tbaa !34 + %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %lsr.iv12, i32 4, <4 x i1> %6, <4 x float> poison), !dbg !32, !tbaa !34 %8 = fcmp nnan ninf nsz olt <4 x float> %vec.phi, %wide.masked.load, !dbg !38 %9 = and <4 x i1> %6, %8, !dbg !40 %10 = select <4 x i1> %9, <4 x float> %wide.masked.load, <4 x float> %vec.phi, !dbg !40 - %scevgep = getelementptr float, float* %lsr.iv1, i32 4 + %scevgep = getelementptr float, ptr %lsr.iv1, i32 4 %11 = call i32 @llvm.loop.decrement.reg.i32(i32 %4, i32 1) %12 = icmp ne i32 %11, 0 br i1 %12, label %vector.body, label %middle.block, !llvm.loop !41 @@ -48,7 +48,7 @@ while.end: ; preds = %middle.block, %entry %maxValue.0.lcssa = phi float [ 0x3810000000000000, %entry ], [ %13, %middle.block ], !dbg !29 - store float %maxValue.0.lcssa, float* %pResult, align 4, !dbg !45, !tbaa !34 + store float %maxValue.0.lcssa, ptr %pResult, align 4, !dbg !45, !tbaa !34 ret void, !dbg !46 } @@ -59,7 +59,7 @@ declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) #2 ; Function Attrs: argmemonly nofree nosync nounwind readonly willreturn - declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) #3 + declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>) #3 ; Function Attrs: nofree nosync nounwind readnone willreturn declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) #2 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/subreg-liveness.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/subreg-liveness.mir index 3b142e7..046b5bf 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/subreg-liveness.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/subreg-liveness.mir @@ -5,7 +5,7 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8.1m.main-arm-none-eabi" - define i32 @test(i8* nocapture readnone %x, i32* noalias %y, i32 %n, <4 x i32> %0, <4 x i32> %1, <4 x i32> %2, <4 x i32> %3, <4 x i32> %p) { + define i32 @test(ptr nocapture readnone %x, ptr noalias %y, i32 %n, <4 x i32> %0, <4 x i32> %1, <4 x i32> %2, <4 x i32> %3, <4 x i32> %p) { entry: %cmp13 = icmp sgt i32 %n, 0 br i1 %cmp13, label %while.body.preheader, label %while.end @@ -20,15 +20,15 @@ br label %while.body while.body: ; preds = %while.body.preheader, %while.body - %y.addr.016 = phi i32* [ %add.ptr, %while.body ], [ %y, %while.body.preheader ] + %y.addr.016 = phi ptr [ %add.ptr, %while.body ], [ %y, %while.body.preheader ] %s.015 = phi <4 x i32> [ %mul, %while.body ], [ <i32 1, i32 1, i32 1, i32 1>, %while.body.preheader ] %n.addr.014 = phi i32 [ %12, %while.body ], [ %n, %while.body.preheader ] %9 = phi i32 [ %8, %while.body.preheader ], [ %13, %while.body ] - %y.addr.0161 = bitcast i32* %y.addr.016 to <4 x i32>* + %y.addr.0161 = bitcast ptr %y.addr.016 to ptr %10 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %n.addr.014) - %11 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %y.addr.0161, i32 4, <4 x i1> %10, <4 x i32> zeroinitializer) + %11 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %y.addr.0161, i32 4, <4 x i1> %10, <4 x i32> zeroinitializer) %mul = mul <4 x i32> %11, %s.015 - %add.ptr = getelementptr inbounds i32, i32* %y.addr.016, i32 4 + %add.ptr = getelementptr inbounds i32, ptr %y.addr.016, i32 4 %12 = add i32 %n.addr.014, -4 %13 = call i32 @llvm.loop.decrement.reg.i32(i32 %9, i32 1) %14 = icmp ne i32 %13, 0 @@ -43,7 +43,7 @@ } declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1 - declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #2 + declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) #2 declare i32 @llvm.smin.i32(i32, i32) #3 declare i32 @llvm.start.loop.iterations.i32(i32) #4 declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #4 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tp-multiple-vpst.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tp-multiple-vpst.ll index 7777d7a..c355f8f 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tp-multiple-vpst.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tp-multiple-vpst.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled %s -o - | FileCheck %s -define dso_local arm_aapcs_vfpcc i32 @minmaxval4(i32* nocapture readonly %x, i32* nocapture %minp) { +define dso_local arm_aapcs_vfpcc i32 @minmaxval4(ptr nocapture readonly %x, ptr nocapture %minp) { ; CHECK-LABEL: minmaxval4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r7, lr} @@ -31,10 +31,10 @@ vector.body: ; preds = %vector.body, %entry %index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>, %entry ], [ %5, %vector.body ] %vec.phi29 = phi <4 x i32> [ <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, %entry ], [ %7, %vector.body ] - %0 = getelementptr inbounds i32, i32* %x, i32 %index + %0 = getelementptr inbounds i32, ptr %x, i32 %index %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 10) - %1 = bitcast i32* %0 to <4 x i32>* - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %1 = bitcast ptr %0 to ptr + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %2 = icmp sgt <4 x i32> %wide.masked.load, %vec.phi29 %3 = icmp slt <4 x i32> %wide.masked.load, %vec.phi %4 = and <4 x i1> %active.lane.mask, %3 @@ -48,12 +48,12 @@ vector.body: ; preds = %vector.body, %entry middle.block: ; preds = %vector.body %9 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %7) %10 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %5) - store i32 %10, i32* %minp, align 4 + store i32 %10, ptr %minp, align 4 ret i32 %9 } declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) #1 -declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #2 +declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) #2 declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) #3 declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) #3 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredicated-max.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredicated-max.mir index 57cfaa88..9afdce1 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredicated-max.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredicated-max.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve %s -run-pass=arm-low-overhead-loops -o - | FileCheck %s --- | - define dso_local void @variant_max_use(i16* nocapture readonly %a, i16* %c, i32 %N) #0 { + define dso_local void @variant_max_use(ptr nocapture readonly %a, ptr %c, i32 %N) #0 { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 3 @@ -19,17 +19,17 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] - %lsr.iv.2 = phi i16* [ %scevgep.2, %vector.body ], [ %c, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv.2 = phi ptr [ %scevgep.2, %vector.body ], [ %c, %vector.ph ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] - %lsr.iv17 = bitcast i16* %lsr.iv to <8 x i16>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr %tmp8 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %tmp7) %tmp9 = sub i32 %tmp7, 8 - %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv17, i32 2, <8 x i1> %tmp8, <8 x i16> undef) + %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %lsr.iv17, i32 2, <8 x i1> %tmp8, <8 x i16> undef) %min = tail call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %wide.masked.load) - store i16 %min, i16* %lsr.iv.2 - %scevgep = getelementptr i16, i16* %lsr.iv, i32 8 - %scevgep.2 = getelementptr i16, i16* %lsr.iv.2, i32 1 + store i16 %min, ptr %lsr.iv.2 + %scevgep = getelementptr i16, ptr %lsr.iv, i32 8 + %scevgep.2 = getelementptr i16, ptr %lsr.iv.2, i32 1 %tmp10 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp11 = icmp ne i32 %tmp10, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -39,7 +39,7 @@ ret void } - declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>) + declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32 immarg, <8 x i1>, <8 x i16>) declare i32 @llvm.start.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) declare <8 x i1> @llvm.arm.mve.vctp16(i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll index 888fbcc..6b5b6b2 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=armv8.1m.main -mattr=+mve -tail-predication=enabled --verify-machineinstrs %s -o - | FileCheck %s -define void @arm_cmplx_mag_squared_q15_mve(i16* %pSrc, i16* %pDst, i32 %blockSize) { +define void @arm_cmplx_mag_squared_q15_mve(ptr %pSrc, ptr %pDst, i32 %blockSize) { ; CHECK-LABEL: arm_cmplx_mag_squared_q15_mve: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r7, lr} @@ -24,20 +24,20 @@ entry: do.body: ; preds = %do.body, %entry %blockSize.addr.0 = phi i32 [ %blockSize, %entry ], [ %sub, %do.body ] - %pDst.addr.0 = phi i16* [ %pDst, %entry ], [ %add.ptr7, %do.body ] - %pSrc.addr.0 = phi i16* [ %pSrc, %entry ], [ %add.ptr, %do.body ] + %pDst.addr.0 = phi ptr [ %pDst, %entry ], [ %add.ptr7, %do.body ] + %pSrc.addr.0 = phi ptr [ %pSrc, %entry ], [ %add.ptr, %do.body ] %0 = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %blockSize.addr.0) - %1 = tail call { <8 x i16>, <8 x i16> } @llvm.arm.mve.vld2q.v8i16.p0i16(i16* %pSrc.addr.0) + %1 = tail call { <8 x i16>, <8 x i16> } @llvm.arm.mve.vld2q.v8i16.p0(ptr %pSrc.addr.0) %2 = extractvalue { <8 x i16>, <8 x i16> } %1, 0 %3 = extractvalue { <8 x i16>, <8 x i16> } %1, 1 %4 = tail call <8 x i16> @llvm.arm.mve.mulh.predicated.v8i16.v8i1(<8 x i16> %2, <8 x i16> %2, i32 0, <8 x i1> %0, <8 x i16> undef) %5 = tail call <8 x i16> @llvm.arm.mve.mulh.predicated.v8i16.v8i1(<8 x i16> %3, <8 x i16> %3, i32 0, <8 x i1> %0, <8 x i16> undef) %6 = tail call <8 x i16> @llvm.arm.mve.qadd.predicated.v8i16.v8i1(<8 x i16> %4, <8 x i16> %5, i32 0, <8 x i1> %0, <8 x i16> undef) %7 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %6, i32 1, i32 0, <8 x i1> %0, <8 x i16> undef) - %8 = bitcast i16* %pDst.addr.0 to <8 x i16>* - tail call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %7, <8 x i16>* %8, i32 2, <8 x i1> %0) - %add.ptr = getelementptr inbounds i16, i16* %pSrc.addr.0, i32 16 - %add.ptr7 = getelementptr inbounds i16, i16* %pDst.addr.0, i32 8 + %8 = bitcast ptr %pDst.addr.0 to ptr + tail call void @llvm.masked.store.v8i16.p0(<8 x i16> %7, ptr %8, i32 2, <8 x i1> %0) + %add.ptr = getelementptr inbounds i16, ptr %pSrc.addr.0, i32 16 + %add.ptr7 = getelementptr inbounds i16, ptr %pDst.addr.0, i32 8 %sub = add i32 %blockSize.addr.0, -8 %cmp = icmp sgt i32 %sub, 0 br i1 %cmp, label %do.body, label %do.end @@ -46,7 +46,7 @@ do.end: ; preds = %do.body ret void } -define i32 @bad(i32* readonly %x, i32* nocapture readonly %y, i32 %n) { +define i32 @bad(ptr readonly %x, ptr nocapture readonly %y, i32 %n) { ; CHECK-LABEL: bad: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r7, lr} @@ -78,15 +78,15 @@ entry: do.body: ; preds = %do.body, %entry %s.0 = phi i32 [ 0, %entry ], [ %5, %do.body ] %n.addr.0 = phi i32 [ %n, %entry ], [ %sub, %do.body ] - %y.addr.0 = phi i32* [ %y, %entry ], [ %add.ptr1, %do.body ] - %x.addr.0 = phi i32* [ %x, %entry ], [ %add.ptr, %do.body ] + %y.addr.0 = phi ptr [ %y, %entry ], [ %add.ptr1, %do.body ] + %x.addr.0 = phi ptr [ %x, %entry ], [ %add.ptr, %do.body ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %n.addr.0) - %1 = bitcast i32* %x.addr.0 to <4 x i32>* - %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) - %add.ptr = getelementptr inbounds i32, i32* %x.addr.0, i32 4 - %3 = bitcast i32* %y.addr.0 to <4 x i32>* - %4 = load <4 x i32>, <4 x i32>* %3, align 4 - %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.0, i32 4 + %1 = bitcast ptr %x.addr.0 to ptr + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %add.ptr = getelementptr inbounds i32, ptr %x.addr.0, i32 4 + %3 = bitcast ptr %y.addr.0 to ptr + %4 = load <4 x i32>, ptr %3, align 4 + %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.0, i32 4 %5 = tail call i32 @llvm.arm.mve.vmldava.v4i32(i32 0, i32 0, i32 0, i32 %s.0, <4 x i32> %2, <4 x i32> %4) %sub = add nsw i32 %n.addr.0, -4 %cmp = icmp sgt i32 %n.addr.0, 4 @@ -96,7 +96,7 @@ do.end: ; preds = %do.body ret i32 %5 } -define i32 @good(i32* readonly %x, i32* readonly %y, i32 %n) { +define i32 @good(ptr readonly %x, ptr readonly %y, i32 %n) { ; CHECK-LABEL: good: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r7, lr} @@ -117,15 +117,15 @@ entry: do.body: ; preds = %do.body, %entry %s.0 = phi i32 [ 0, %entry ], [ %5, %do.body ] %n.addr.0 = phi i32 [ %n, %entry ], [ %sub, %do.body ] - %y.addr.0 = phi i32* [ %y, %entry ], [ %add.ptr1, %do.body ] - %x.addr.0 = phi i32* [ %x, %entry ], [ %add.ptr, %do.body ] + %y.addr.0 = phi ptr [ %y, %entry ], [ %add.ptr1, %do.body ] + %x.addr.0 = phi ptr [ %x, %entry ], [ %add.ptr, %do.body ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %n.addr.0) - %1 = bitcast i32* %x.addr.0 to <4 x i32>* - %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) - %add.ptr = getelementptr inbounds i32, i32* %x.addr.0, i32 4 - %3 = bitcast i32* %y.addr.0 to <4 x i32>* - %4 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %3, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) - %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.0, i32 4 + %1 = bitcast ptr %x.addr.0 to ptr + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %add.ptr = getelementptr inbounds i32, ptr %x.addr.0, i32 4 + %3 = bitcast ptr %y.addr.0 to ptr + %4 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %3, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.0, i32 4 %5 = tail call i32 @llvm.arm.mve.vmldava.v4i32(i32 0, i32 0, i32 0, i32 %s.0, <4 x i32> %2, <4 x i32> %4) %sub = add nsw i32 %n.addr.0, -4 %cmp = icmp sgt i32 %n.addr.0, 4 @@ -135,7 +135,7 @@ do.end: ; preds = %do.body ret i32 %5 } -define i32 @good2(i32* nocapture readonly %x, i32* nocapture readonly %y, i32 %n) { +define i32 @good2(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) { ; CHECK-LABEL: good2: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r7, lr} @@ -156,15 +156,15 @@ entry: do.body: ; preds = %do.body, %entry %s.0 = phi i32 [ 0, %entry ], [ %5, %do.body ] %n.addr.0 = phi i32 [ %n, %entry ], [ %sub, %do.body ] - %y.addr.0 = phi i32* [ %y, %entry ], [ %add.ptr1, %do.body ] - %x.addr.0 = phi i32* [ %x, %entry ], [ %add.ptr, %do.body ] + %y.addr.0 = phi ptr [ %y, %entry ], [ %add.ptr1, %do.body ] + %x.addr.0 = phi ptr [ %x, %entry ], [ %add.ptr, %do.body ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %n.addr.0) - %1 = bitcast i32* %x.addr.0 to <4 x i32>* - %2 = load <4 x i32>, <4 x i32>* %1, align 4 - %add.ptr = getelementptr inbounds i32, i32* %x.addr.0, i32 4 - %3 = bitcast i32* %y.addr.0 to <4 x i32>* - %4 = load <4 x i32>, <4 x i32>* %3, align 4 - %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.0, i32 4 + %1 = bitcast ptr %x.addr.0 to ptr + %2 = load <4 x i32>, ptr %1, align 4 + %add.ptr = getelementptr inbounds i32, ptr %x.addr.0, i32 4 + %3 = bitcast ptr %y.addr.0 to ptr + %4 = load <4 x i32>, ptr %3, align 4 + %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.0, i32 4 %5 = tail call i32 @llvm.arm.mve.vmldava.predicated.v4i32.v4i1(i32 0, i32 0, i32 0, i32 %s.0, <4 x i32> %2, <4 x i32> %4, <4 x i1> %0) %sub = add nsw i32 %n.addr.0, -4 %cmp = icmp sgt i32 %n.addr.0, 4 @@ -175,12 +175,12 @@ do.end: ; preds = %do.body } declare <8 x i1> @llvm.arm.mve.vctp16(i32) -declare { <8 x i16>, <8 x i16> } @llvm.arm.mve.vld2q.v8i16.p0i16(i16*) +declare { <8 x i16>, <8 x i16> } @llvm.arm.mve.vld2q.v8i16.p0(ptr) declare <8 x i16> @llvm.arm.mve.mulh.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, <8 x i1>, <8 x i16>) declare <8 x i16> @llvm.arm.mve.qadd.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, <8 x i1>, <8 x i16>) declare <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16>, i32, i32, <8 x i1>, <8 x i16>) -declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>) +declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32 immarg, <8 x i1>) declare i32 @llvm.arm.mve.vmldava.predicated.v4i32.v4i1(i32, i32, i32, i32, <4 x i32>, <4 x i32>, <4 x i1>) declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1 -declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #2 +declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) #2 declare i32 @llvm.arm.mve.vmldava.v4i32(i32, i32, i32, i32, <4 x i32>, <4 x i32>) #1 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unrolled-and-vector.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unrolled-and-vector.mir index 7e2eda8..482a87e 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unrolled-and-vector.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unrolled-and-vector.mir @@ -2,20 +2,20 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s --- | - define dso_local arm_aapcs_vfpcc void @unrolled_and_vector(i8* nocapture %res, i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) { + define dso_local arm_aapcs_vfpcc void @unrolled_and_vector(ptr nocapture %res, ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) { entry: %cmp10 = icmp eq i32 %N, 0 br i1 %cmp10, label %for.cond.cleanup, label %vector.memcheck vector.memcheck: ; preds = %entry - %scevgep = getelementptr i8, i8* %res, i32 %N - %scevgep12 = getelementptr i8, i8* %a, i32 %N - %scevgep13 = getelementptr i8, i8* %b, i32 %N - %bound0 = icmp ugt i8* %scevgep12, %res - %bound1 = icmp ugt i8* %scevgep, %a + %scevgep = getelementptr i8, ptr %res, i32 %N + %scevgep12 = getelementptr i8, ptr %a, i32 %N + %scevgep13 = getelementptr i8, ptr %b, i32 %N + %bound0 = icmp ugt ptr %scevgep12, %res + %bound1 = icmp ugt ptr %scevgep, %a %found.conflict = and i1 %bound0, %bound1 - %bound014 = icmp ugt i8* %scevgep13, %res - %bound115 = icmp ugt i8* %scevgep, %b + %bound014 = icmp ugt ptr %scevgep13, %res + %bound115 = icmp ugt ptr %scevgep, %b %found.conflict16 = and i1 %bound014, %bound115 %conflict.rdx = or i1 %found.conflict, %found.conflict16 %0 = add i32 %N, 15 @@ -45,23 +45,23 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv50 = phi i8* [ %scevgep51, %vector.body ], [ %res, %vector.ph ] - %lsr.iv47 = phi i8* [ %scevgep48, %vector.body ], [ %b, %vector.ph ] - %lsr.iv = phi i8* [ %scevgep45, %vector.body ], [ %a, %vector.ph ] + %lsr.iv50 = phi ptr [ %scevgep51, %vector.body ], [ %res, %vector.ph ] + %lsr.iv47 = phi ptr [ %scevgep48, %vector.body ], [ %b, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep45, %vector.body ], [ %a, %vector.ph ] %12 = phi i32 [ %start2, %vector.ph ], [ %17, %vector.body ] %13 = phi i32 [ %N, %vector.ph ], [ %15, %vector.body ] - %lsr.iv5052 = bitcast i8* %lsr.iv50 to <16 x i8>* - %lsr.iv4749 = bitcast i8* %lsr.iv47 to <16 x i8>* - %lsr.iv46 = bitcast i8* %lsr.iv to <16 x i8>* + %lsr.iv5052 = bitcast ptr %lsr.iv50 to ptr + %lsr.iv4749 = bitcast ptr %lsr.iv47 to ptr + %lsr.iv46 = bitcast ptr %lsr.iv to ptr %14 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %13) %15 = sub i32 %13, 16 - %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv46, i32 1, <16 x i1> %14, <16 x i8> undef) - %wide.masked.load19 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv4749, i32 1, <16 x i1> %14, <16 x i8> undef) + %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %lsr.iv46, i32 1, <16 x i1> %14, <16 x i8> undef) + %wide.masked.load19 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %lsr.iv4749, i32 1, <16 x i1> %14, <16 x i8> undef) %16 = add <16 x i8> %wide.masked.load19, %wide.masked.load - call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %16, <16 x i8>* %lsr.iv5052, i32 1, <16 x i1> %14) - %scevgep45 = getelementptr i8, i8* %lsr.iv, i32 16 - %scevgep48 = getelementptr i8, i8* %lsr.iv47, i32 16 - %scevgep51 = getelementptr i8, i8* %lsr.iv50, i32 16 + call void @llvm.masked.store.v16i8.p0(<16 x i8> %16, ptr %lsr.iv5052, i32 1, <16 x i1> %14) + %scevgep45 = getelementptr i8, ptr %lsr.iv, i32 16 + %scevgep48 = getelementptr i8, ptr %lsr.iv47, i32 16 + %scevgep51 = getelementptr i8, ptr %lsr.iv50, i32 16 %17 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %12, i32 1) %18 = icmp ne i32 %17, 0 br i1 %18, label %vector.body, label %for.cond.cleanup @@ -72,13 +72,13 @@ br i1 %lcmp.mod, label %for.cond.cleanup, label %for.body.epil for.body.epil: ; preds = %for.cond.cleanup.loopexit.unr-lcssa - %arrayidx.epil = getelementptr inbounds i8, i8* %a, i32 %i.011.unr - %19 = load i8, i8* %arrayidx.epil, align 1 - %arrayidx1.epil = getelementptr inbounds i8, i8* %b, i32 %i.011.unr - %20 = load i8, i8* %arrayidx1.epil, align 1 + %arrayidx.epil = getelementptr inbounds i8, ptr %a, i32 %i.011.unr + %19 = load i8, ptr %arrayidx.epil, align 1 + %arrayidx1.epil = getelementptr inbounds i8, ptr %b, i32 %i.011.unr + %20 = load i8, ptr %arrayidx1.epil, align 1 %add.epil = add i8 %20, %19 - %arrayidx4.epil = getelementptr inbounds i8, i8* %res, i32 %i.011.unr - store i8 %add.epil, i8* %arrayidx4.epil, align 1 + %arrayidx4.epil = getelementptr inbounds i8, ptr %res, i32 %i.011.unr + store i8 %add.epil, ptr %arrayidx4.epil, align 1 %inc.epil = add nuw i32 %i.011.unr, 1 %epil.iter.cmp = icmp eq i32 %xtraiter, 1 br i1 %epil.iter.cmp, label %for.cond.cleanup, label %for.body.epil.1 @@ -89,76 +89,76 @@ for.body: ; preds = %for.body, %for.body.preheader.new %i.011 = phi i32 [ 0, %for.body.preheader.new ], [ %inc.3, %for.body ] %21 = phi i32 [ %start1, %for.body.preheader.new ], [ %30, %for.body ] - %scevgep23 = getelementptr i8, i8* %a, i32 %i.011 - %scevgep2453 = bitcast i8* %scevgep23 to i8* - %22 = load i8, i8* %scevgep2453, align 1 - %scevgep27 = getelementptr i8, i8* %b, i32 %i.011 - %scevgep2854 = bitcast i8* %scevgep27 to i8* - %23 = load i8, i8* %scevgep2854, align 1 + %scevgep23 = getelementptr i8, ptr %a, i32 %i.011 + %scevgep2453 = bitcast ptr %scevgep23 to ptr + %22 = load i8, ptr %scevgep2453, align 1 + %scevgep27 = getelementptr i8, ptr %b, i32 %i.011 + %scevgep2854 = bitcast ptr %scevgep27 to ptr + %23 = load i8, ptr %scevgep2854, align 1 %add = add i8 %23, %22 - %scevgep31 = getelementptr i8, i8* %res, i32 %i.011 - %scevgep3255 = bitcast i8* %scevgep31 to i8* - store i8 %add, i8* %scevgep3255, align 1 - %scevgep39 = getelementptr i8, i8* %a, i32 %i.011 - %scevgep40 = getelementptr i8, i8* %scevgep39, i32 1 - %24 = load i8, i8* %scevgep40, align 1 - %scevgep41 = getelementptr i8, i8* %b, i32 %i.011 - %scevgep42 = getelementptr i8, i8* %scevgep41, i32 1 - %25 = load i8, i8* %scevgep42, align 1 + %scevgep31 = getelementptr i8, ptr %res, i32 %i.011 + %scevgep3255 = bitcast ptr %scevgep31 to ptr + store i8 %add, ptr %scevgep3255, align 1 + %scevgep39 = getelementptr i8, ptr %a, i32 %i.011 + %scevgep40 = getelementptr i8, ptr %scevgep39, i32 1 + %24 = load i8, ptr %scevgep40, align 1 + %scevgep41 = getelementptr i8, ptr %b, i32 %i.011 + %scevgep42 = getelementptr i8, ptr %scevgep41, i32 1 + %25 = load i8, ptr %scevgep42, align 1 %add.1 = add i8 %25, %24 - %scevgep43 = getelementptr i8, i8* %res, i32 %i.011 - %scevgep44 = getelementptr i8, i8* %scevgep43, i32 1 - store i8 %add.1, i8* %scevgep44, align 1 - %scevgep33 = getelementptr i8, i8* %a, i32 %i.011 - %scevgep34 = getelementptr i8, i8* %scevgep33, i32 2 - %26 = load i8, i8* %scevgep34, align 1 - %scevgep35 = getelementptr i8, i8* %b, i32 %i.011 - %scevgep36 = getelementptr i8, i8* %scevgep35, i32 2 - %27 = load i8, i8* %scevgep36, align 1 + %scevgep43 = getelementptr i8, ptr %res, i32 %i.011 + %scevgep44 = getelementptr i8, ptr %scevgep43, i32 1 + store i8 %add.1, ptr %scevgep44, align 1 + %scevgep33 = getelementptr i8, ptr %a, i32 %i.011 + %scevgep34 = getelementptr i8, ptr %scevgep33, i32 2 + %26 = load i8, ptr %scevgep34, align 1 + %scevgep35 = getelementptr i8, ptr %b, i32 %i.011 + %scevgep36 = getelementptr i8, ptr %scevgep35, i32 2 + %27 = load i8, ptr %scevgep36, align 1 %add.2 = add i8 %27, %26 - %scevgep37 = getelementptr i8, i8* %res, i32 %i.011 - %scevgep38 = getelementptr i8, i8* %scevgep37, i32 2 - store i8 %add.2, i8* %scevgep38, align 1 - %scevgep21 = getelementptr i8, i8* %a, i32 %i.011 - %scevgep22 = getelementptr i8, i8* %scevgep21, i32 3 - %28 = load i8, i8* %scevgep22, align 1 - %scevgep25 = getelementptr i8, i8* %b, i32 %i.011 - %scevgep26 = getelementptr i8, i8* %scevgep25, i32 3 - %29 = load i8, i8* %scevgep26, align 1 + %scevgep37 = getelementptr i8, ptr %res, i32 %i.011 + %scevgep38 = getelementptr i8, ptr %scevgep37, i32 2 + store i8 %add.2, ptr %scevgep38, align 1 + %scevgep21 = getelementptr i8, ptr %a, i32 %i.011 + %scevgep22 = getelementptr i8, ptr %scevgep21, i32 3 + %28 = load i8, ptr %scevgep22, align 1 + %scevgep25 = getelementptr i8, ptr %b, i32 %i.011 + %scevgep26 = getelementptr i8, ptr %scevgep25, i32 3 + %29 = load i8, ptr %scevgep26, align 1 %add.3 = add i8 %29, %28 - %scevgep29 = getelementptr i8, i8* %res, i32 %i.011 - %scevgep30 = getelementptr i8, i8* %scevgep29, i32 3 - store i8 %add.3, i8* %scevgep30, align 1 + %scevgep29 = getelementptr i8, ptr %res, i32 %i.011 + %scevgep30 = getelementptr i8, ptr %scevgep29, i32 3 + store i8 %add.3, ptr %scevgep30, align 1 %inc.3 = add nuw i32 %i.011, 4 %30 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %21, i32 1) %31 = icmp ne i32 %30, 0 br i1 %31, label %for.body, label %for.cond.cleanup.loopexit.unr-lcssa for.body.epil.1: ; preds = %for.body.epil - %arrayidx.epil.1 = getelementptr inbounds i8, i8* %a, i32 %inc.epil - %32 = load i8, i8* %arrayidx.epil.1, align 1 - %arrayidx1.epil.1 = getelementptr inbounds i8, i8* %b, i32 %inc.epil - %33 = load i8, i8* %arrayidx1.epil.1, align 1 + %arrayidx.epil.1 = getelementptr inbounds i8, ptr %a, i32 %inc.epil + %32 = load i8, ptr %arrayidx.epil.1, align 1 + %arrayidx1.epil.1 = getelementptr inbounds i8, ptr %b, i32 %inc.epil + %33 = load i8, ptr %arrayidx1.epil.1, align 1 %add.epil.1 = add i8 %33, %32 - %arrayidx4.epil.1 = getelementptr inbounds i8, i8* %res, i32 %inc.epil - store i8 %add.epil.1, i8* %arrayidx4.epil.1, align 1 + %arrayidx4.epil.1 = getelementptr inbounds i8, ptr %res, i32 %inc.epil + store i8 %add.epil.1, ptr %arrayidx4.epil.1, align 1 %inc.epil.1 = add nuw i32 %i.011.unr, 2 %epil.iter.cmp.1 = icmp eq i32 %xtraiter, 2 br i1 %epil.iter.cmp.1, label %for.cond.cleanup, label %for.body.epil.2 for.body.epil.2: ; preds = %for.body.epil.1 - %arrayidx.epil.2 = getelementptr inbounds i8, i8* %a, i32 %inc.epil.1 - %34 = load i8, i8* %arrayidx.epil.2, align 1 - %arrayidx1.epil.2 = getelementptr inbounds i8, i8* %b, i32 %inc.epil.1 - %35 = load i8, i8* %arrayidx1.epil.2, align 1 + %arrayidx.epil.2 = getelementptr inbounds i8, ptr %a, i32 %inc.epil.1 + %34 = load i8, ptr %arrayidx.epil.2, align 1 + %arrayidx1.epil.2 = getelementptr inbounds i8, ptr %b, i32 %inc.epil.1 + %35 = load i8, ptr %arrayidx1.epil.2, align 1 %add.epil.2 = add i8 %35, %34 - %arrayidx4.epil.2 = getelementptr inbounds i8, i8* %res, i32 %inc.epil.1 - store i8 %add.epil.2, i8* %arrayidx4.epil.2, align 1 + %arrayidx4.epil.2 = getelementptr inbounds i8, ptr %res, i32 %inc.epil.1 + store i8 %add.epil.2, ptr %arrayidx4.epil.2, align 1 br label %for.cond.cleanup } - declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) #1 - declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32 immarg, <16 x i1>) #2 + declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32 immarg, <16 x i1>, <16 x i8>) #1 + declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32 immarg, <16 x i1>) #2 declare i32 @llvm.start.loop.iterations.i32(i32) #3 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3 declare <16 x i1> @llvm.arm.mve.vctp8(i32) #4 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir index 2d1c743..ec17e1e 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vaddv.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s --- | - define dso_local void @legal_vaddv_s32(i16* nocapture readonly %a, i32* %c, i32 %N) { + define dso_local void @legal_vaddv_s32(ptr nocapture readonly %a, ptr %c, i32 %N) { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 3 @@ -18,18 +18,18 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] - %store.addr = phi i32* [ %c, %vector.ph ], [ %store.next, %vector.body ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %store.addr = phi ptr [ %c, %vector.ph ], [ %store.next, %vector.body ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] - %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr %tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7) %tmp9 = sub i32 %tmp7, 4 - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32> %tmp11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp10) - store i32 %tmp11, i32* %store.addr - %store.next = getelementptr i32, i32* %store.addr, i32 1 - %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 + store i32 %tmp11, ptr %store.addr + %store.next = getelementptr i32, ptr %store.addr, i32 1 + %scevgep = getelementptr i16, ptr %lsr.iv, i32 4 %tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp13 = icmp ne i32 %tmp12, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -39,7 +39,7 @@ ret void } - define dso_local void @legal_vaddv_s16(i16* nocapture readonly %a, i32* %c, i32 %N) { + define dso_local void @legal_vaddv_s16(ptr nocapture readonly %a, ptr %c, i32 %N) { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 3 @@ -56,18 +56,18 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] - %store.addr = phi i32* [ %c, %vector.ph ], [ %store.next, %vector.body ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %store.addr = phi ptr [ %c, %vector.ph ], [ %store.next, %vector.body ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] - %lsr.iv17 = bitcast i16* %lsr.iv to <8 x i16>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr %tmp8 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %tmp7) %tmp9 = sub i32 %tmp7, 8 - %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv17, i32 2, <8 x i1> %tmp8, <8 x i16> undef) + %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %lsr.iv17, i32 2, <8 x i1> %tmp8, <8 x i16> undef) %sext = sext <8 x i16> %wide.masked.load to <8 x i32> %tmp11 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %sext) - store i32 %tmp11, i32* %store.addr - %store.next = getelementptr i32, i32* %store.addr, i32 1 - %scevgep = getelementptr i16, i16* %lsr.iv, i32 8 + store i32 %tmp11, ptr %store.addr + %store.next = getelementptr i32, ptr %store.addr, i32 1 + %scevgep = getelementptr i16, ptr %lsr.iv, i32 8 %tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp13 = icmp ne i32 %tmp12, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -77,7 +77,7 @@ ret void } - define dso_local void @legal_vaddv_s8(i8* nocapture readonly %a, i32* %c, i32 %N) { + define dso_local void @legal_vaddv_s8(ptr nocapture readonly %a, ptr %c, i32 %N) { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 7 @@ -94,18 +94,18 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ] - %store.addr = phi i32* [ %c, %vector.ph ], [ %store.next, %vector.body ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %store.addr = phi ptr [ %c, %vector.ph ], [ %store.next, %vector.body ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] - %lsr.iv17 = bitcast i8* %lsr.iv to <16 x i8>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr %tmp8 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %tmp7) %tmp9 = sub i32 %tmp7, 16 - %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef) + %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef) %sext = sext <16 x i8> %wide.masked.load to <16 x i32> %tmp11 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %sext) - store i32 %tmp11, i32* %store.addr - %store.next = getelementptr i32, i32* %store.addr, i32 1 - %scevgep = getelementptr i8, i8* %lsr.iv, i32 16 + store i32 %tmp11, ptr %store.addr + %store.next = getelementptr i32, ptr %store.addr, i32 1 + %scevgep = getelementptr i8, ptr %lsr.iv, i32 16 %tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp13 = icmp ne i32 %tmp12, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -115,7 +115,7 @@ ret void } - define dso_local i32 @legal_vaddva_s32(i16* nocapture readonly %a, i32 %N) { + define dso_local i32 @legal_vaddva_s32(ptr nocapture readonly %a, i32 %N) { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 3 @@ -132,17 +132,17 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] %acc = phi i32 [ 0, %vector.ph ], [ %acc.next, %vector.body ] - %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr %tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7) %tmp9 = sub i32 %tmp7, 4 - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32> %tmp11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp10) %acc.next = add i32 %tmp11, %acc - %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 + %scevgep = getelementptr i16, ptr %lsr.iv, i32 4 %tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp13 = icmp ne i32 %tmp12, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -153,7 +153,7 @@ ret i32 %res } - define dso_local void @illegal_vaddv_s32(i16* nocapture readonly %a, i32* %c, i32 %N) { + define dso_local void @illegal_vaddv_s32(ptr nocapture readonly %a, ptr %c, i32 %N) { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 3 @@ -170,19 +170,19 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] - %store.addr = phi i32* [ %c, %vector.ph ], [ %store.next, %vector.body ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %store.addr = phi ptr [ %c, %vector.ph ], [ %store.next, %vector.body ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] - %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr %tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7) %tmp9 = sub i32 %tmp7, 4 - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32> %not = xor <4 x i32> %tmp10, <i32 -1, i32 -1, i32 -1, i32 -1> %tmp11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %not) - store i32 %tmp11, i32* %store.addr - %store.next = getelementptr i32, i32* %store.addr, i32 1 - %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 + store i32 %tmp11, ptr %store.addr + %store.next = getelementptr i32, ptr %store.addr, i32 1 + %scevgep = getelementptr i16, ptr %lsr.iv, i32 4 %tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp13 = icmp ne i32 %tmp12, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -192,7 +192,7 @@ ret void } - define dso_local i32 @illegal_vaddva_s32(i16* nocapture readonly %a, i32 %N) { + define dso_local i32 @illegal_vaddva_s32(ptr nocapture readonly %a, i32 %N) { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 3 @@ -209,18 +209,18 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] %acc = phi i32 [ 0, %vector.ph ], [ %acc.next, %vector.body ] - %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr %tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7) %tmp9 = sub i32 %tmp7, 4 - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32> %not = xor <4 x i32> %tmp10, <i32 -1, i32 -1, i32 -1, i32 -1> %tmp11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %not) %acc.next = add i32 %tmp11, %acc - %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 + %scevgep = getelementptr i16, ptr %lsr.iv, i32 4 %tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp13 = icmp ne i32 %tmp12, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -231,7 +231,7 @@ ret i32 %res } - define dso_local void @illegal_vaddv_u32(i16* nocapture readonly %a, i32* %c, i32 %N) { + define dso_local void @illegal_vaddv_u32(ptr nocapture readonly %a, ptr %c, i32 %N) { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 3 @@ -248,19 +248,19 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] - %store.addr = phi i32* [ %c, %vector.ph ], [ %store.next, %vector.body ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %store.addr = phi ptr [ %c, %vector.ph ], [ %store.next, %vector.body ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] - %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr %tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7) %tmp9 = sub i32 %tmp7, 4 - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp10 = zext <4 x i16> %wide.masked.load to <4 x i32> %not = xor <4 x i32> %tmp10, <i32 -1, i32 -1, i32 -1, i32 -1> %tmp11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %not) - store i32 %tmp11, i32* %store.addr - %store.next = getelementptr i32, i32* %store.addr, i32 1 - %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 + store i32 %tmp11, ptr %store.addr + %store.next = getelementptr i32, ptr %store.addr, i32 1 + %scevgep = getelementptr i16, ptr %lsr.iv, i32 4 %tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp13 = icmp ne i32 %tmp12, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -270,7 +270,7 @@ ret void } - define dso_local i32 @illegal_vaddva_u32(i16* nocapture readonly %a, i32 %N) { + define dso_local i32 @illegal_vaddva_u32(ptr nocapture readonly %a, i32 %N) { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 3 @@ -287,18 +287,18 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] %acc = phi i32 [ 0, %vector.ph ], [ %acc.next, %vector.body ] - %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr %tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7) %tmp9 = sub i32 %tmp7, 4 - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp10 = zext <4 x i16> %wide.masked.load to <4 x i32> %not = xor <4 x i32> %tmp10, <i32 -1, i32 -1, i32 -1, i32 -1> %tmp11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %not) %acc.next = add i32 %tmp11, %acc - %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 + %scevgep = getelementptr i16, ptr %lsr.iv, i32 4 %tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp13 = icmp ne i32 %tmp12, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -309,7 +309,7 @@ ret i32 %res } - define dso_local void @illegal_vaddv_s16(i8* nocapture readonly %a, i32* %c, i32 %N, <8 x i16> %pass) { + define dso_local void @illegal_vaddv_s16(ptr nocapture readonly %a, ptr %c, i32 %N, <8 x i16> %pass) { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 3 @@ -326,20 +326,20 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ] - %store.addr = phi i32* [ %c, %vector.ph ], [ %store.next, %vector.body ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %store.addr = phi ptr [ %c, %vector.ph ], [ %store.next, %vector.body ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] - %lsr.iv17 = bitcast i8* %lsr.iv to <8 x i8>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr %tmp8 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %tmp7) %tmp9 = sub i32 %tmp7, 8 - %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %lsr.iv17, i32 1, <8 x i1> %tmp8, <8 x i8> undef) + %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %lsr.iv17, i32 1, <8 x i1> %tmp8, <8 x i8> undef) %sext.wide = sext <8 x i8> %wide.masked.load to <8 x i16> %sub = sub <8 x i16> %sext.wide, %pass %reduce = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %sub) %sext.reduce = sext i16 %reduce to i32 - store i32 %sext.reduce, i32* %store.addr - %store.next = getelementptr i32, i32* %store.addr, i32 1 - %scevgep = getelementptr i8, i8* %lsr.iv, i32 8 + store i32 %sext.reduce, ptr %store.addr + %store.next = getelementptr i32, ptr %store.addr, i32 1 + %scevgep = getelementptr i8, ptr %lsr.iv, i32 8 %tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp13 = icmp ne i32 %tmp12, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -349,7 +349,7 @@ ret void } - define dso_local i32 @illegal_vaddva_s16(i8* nocapture readonly %a, i32 %N, <8 x i16> %pass) { + define dso_local i32 @illegal_vaddva_s16(ptr nocapture readonly %a, i32 %N, <8 x i16> %pass) { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 3 @@ -366,19 +366,19 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] %acc = phi i32 [ 0, %vector.ph ], [ %acc.next, %vector.body ] - %lsr.iv17 = bitcast i8* %lsr.iv to <8 x i8>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr %tmp8 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %tmp7) %tmp9 = sub i32 %tmp7, 8 - %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %lsr.iv17, i32 1, <8 x i1> %tmp8, <8 x i8> undef) + %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %lsr.iv17, i32 1, <8 x i1> %tmp8, <8 x i8> undef) %sext.wide = sext <8 x i8> %wide.masked.load to <8 x i16> %sub = sub <8 x i16> %sext.wide, %pass %reduce = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %sub) %sext.reduce = sext i16 %reduce to i32 %acc.next = add i32 %sext.reduce, %acc - %scevgep = getelementptr i8, i8* %lsr.iv, i32 8 + %scevgep = getelementptr i8, ptr %lsr.iv, i32 8 %tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp13 = icmp ne i32 %tmp12, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -389,7 +389,7 @@ ret i32 %res } - define dso_local void @illegal_vaddv_u16(i16* nocapture readonly %a, i32* %c, i32 %N, <8 x i16> %pass) { + define dso_local void @illegal_vaddv_u16(ptr nocapture readonly %a, ptr %c, i32 %N, <8 x i16> %pass) { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 3 @@ -406,19 +406,19 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] - %store.addr = phi i32* [ %c, %vector.ph ], [ %store.next, %vector.body ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %store.addr = phi ptr [ %c, %vector.ph ], [ %store.next, %vector.body ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] - %lsr.iv17 = bitcast i16* %lsr.iv to <8 x i16>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr %tmp8 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %tmp7) %tmp9 = sub i32 %tmp7, 8 - %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv17, i32 2, <8 x i1> %tmp8, <8 x i16> undef) + %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %lsr.iv17, i32 2, <8 x i1> %tmp8, <8 x i16> undef) %sub = sub <8 x i16> %wide.masked.load, %pass %reduce = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %sub) %zext.reduce = zext i16 %reduce to i32 - store i32 %zext.reduce, i32* %store.addr - %store.next = getelementptr i32, i32* %store.addr, i32 1 - %scevgep = getelementptr i16, i16* %lsr.iv, i32 8 + store i32 %zext.reduce, ptr %store.addr + %store.next = getelementptr i32, ptr %store.addr, i32 1 + %scevgep = getelementptr i16, ptr %lsr.iv, i32 8 %tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp13 = icmp ne i32 %tmp12, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -428,7 +428,7 @@ ret void } - define dso_local i32 @illegal_vaddva_u16(i16* nocapture readonly %a, i32 %N, <8 x i16> %pass) { + define dso_local i32 @illegal_vaddva_u16(ptr nocapture readonly %a, i32 %N, <8 x i16> %pass) { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 3 @@ -445,18 +445,18 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] %acc = phi i32 [ 0, %vector.ph ], [ %acc.next, %vector.body ] - %lsr.iv17 = bitcast i16* %lsr.iv to <8 x i16>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr %tmp8 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %tmp7) %tmp9 = sub i32 %tmp7, 8 - %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv17, i32 2, <8 x i1> %tmp8, <8 x i16> undef) + %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %lsr.iv17, i32 2, <8 x i1> %tmp8, <8 x i16> undef) %sub = sub <8 x i16> %wide.masked.load, %pass %reduce = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %sub) %zext.reduce = zext i16 %reduce to i32 %acc.next = add i32 %zext.reduce, %acc - %scevgep = getelementptr i16, i16* %lsr.iv, i32 8 + %scevgep = getelementptr i16, ptr %lsr.iv, i32 8 %tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp13 = icmp ne i32 %tmp12, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -467,7 +467,7 @@ ret i32 %res } - define dso_local void @illegal_vaddv_s8(i8* nocapture readonly %a, i32* %c, i32 %N, <16 x i8> %pass) { + define dso_local void @illegal_vaddv_s8(ptr nocapture readonly %a, ptr %c, i32 %N, <16 x i8> %pass) { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 7 @@ -484,19 +484,19 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ] - %store.addr = phi i32* [ %c, %vector.ph ], [ %store.next, %vector.body ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %store.addr = phi ptr [ %c, %vector.ph ], [ %store.next, %vector.body ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] - %lsr.iv17 = bitcast i8* %lsr.iv to <16 x i8>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr %tmp8 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %tmp7) %tmp9 = sub i32 %tmp7, 16 - %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef) + %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef) %xor = xor <16 x i8> %wide.masked.load, %pass %reduce = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %xor) %sext.reduce = sext i8 %reduce to i32 - store i32 %sext.reduce, i32* %store.addr - %store.next = getelementptr i32, i32* %store.addr, i32 1 - %scevgep = getelementptr i8, i8* %lsr.iv, i32 16 + store i32 %sext.reduce, ptr %store.addr + %store.next = getelementptr i32, ptr %store.addr, i32 1 + %scevgep = getelementptr i8, ptr %lsr.iv, i32 16 %tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp13 = icmp ne i32 %tmp12, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -506,7 +506,7 @@ ret void } - define dso_local i32 @illegal_vaddva_s8(i8* nocapture readonly %a, i32 %N, <16 x i8> %pass) { + define dso_local i32 @illegal_vaddva_s8(ptr nocapture readonly %a, i32 %N, <16 x i8> %pass) { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 7 @@ -523,18 +523,18 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] %acc = phi i32 [ 0, %vector.ph ], [ %acc.next, %vector.body ] - %lsr.iv17 = bitcast i8* %lsr.iv to <16 x i8>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr %tmp8 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %tmp7) %tmp9 = sub i32 %tmp7, 16 - %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef) + %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef) %xor = xor <16 x i8> %wide.masked.load, %pass %reduce = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %xor) %sext.reduce = sext i8 %reduce to i32 %acc.next = add i32 %sext.reduce, %acc - %scevgep = getelementptr i8, i8* %lsr.iv, i32 16 + %scevgep = getelementptr i8, ptr %lsr.iv, i32 16 %tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp13 = icmp ne i32 %tmp12, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -545,7 +545,7 @@ ret i32 %res } - define dso_local void @illegal_vaddv_u8(i8* nocapture readonly %a, i32* %c, i32 %N, <16 x i8> %pass) { + define dso_local void @illegal_vaddv_u8(ptr nocapture readonly %a, ptr %c, i32 %N, <16 x i8> %pass) { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 7 @@ -562,19 +562,19 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ] - %store.addr = phi i32* [ %c, %vector.ph ], [ %store.next, %vector.body ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %store.addr = phi ptr [ %c, %vector.ph ], [ %store.next, %vector.body ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] - %lsr.iv17 = bitcast i8* %lsr.iv to <16 x i8>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr %tmp8 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %tmp7) %tmp9 = sub i32 %tmp7, 16 - %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef) + %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef) %xor = xor <16 x i8> %wide.masked.load, %pass %reduce = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %xor) %zext.reduce = zext i8 %reduce to i32 - store i32 %zext.reduce, i32* %store.addr - %store.next = getelementptr i32, i32* %store.addr, i32 1 - %scevgep = getelementptr i8, i8* %lsr.iv, i32 16 + store i32 %zext.reduce, ptr %store.addr + %store.next = getelementptr i32, ptr %store.addr, i32 1 + %scevgep = getelementptr i8, ptr %lsr.iv, i32 16 %tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp13 = icmp ne i32 %tmp12, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -584,7 +584,7 @@ ret void } - define dso_local i32 @illegal_vaddva_u8(i8* nocapture readonly %a, i32 %N, <16 x i8> %pass) { + define dso_local i32 @illegal_vaddva_u8(ptr nocapture readonly %a, i32 %N, <16 x i8> %pass) { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 7 @@ -601,18 +601,18 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] %acc = phi i32 [ 0, %vector.ph ], [ %acc.next, %vector.body ] - %lsr.iv17 = bitcast i8* %lsr.iv to <16 x i8>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr %tmp8 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %tmp7) %tmp9 = sub i32 %tmp7, 16 - %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef) + %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %lsr.iv17, i32 1, <16 x i1> %tmp8, <16 x i8> undef) %xor = xor <16 x i8> %wide.masked.load, %pass %reduce = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %xor) %zext.reduce = zext i8 %reduce to i32 %acc.next = add i32 %zext.reduce, %acc - %scevgep = getelementptr i8, i8* %lsr.iv, i32 16 + %scevgep = getelementptr i8, ptr %lsr.iv, i32 16 %tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp13 = icmp ne i32 %tmp12, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -623,7 +623,7 @@ ret i32 %res } - define hidden i32 @regalloc_legality_vaddva_u32(i16* %x, i16* %y, i32 %n) { + define hidden i32 @regalloc_legality_vaddva_u32(ptr %x, ptr %y, i32 %n) { entry: %cmp22 = icmp sgt i32 %n, 0 %0 = add i32 %n, 3 @@ -639,23 +639,23 @@ br label %while.body while.body: ; preds = %while.body.preheader, %while.body - %x.addr.026 = phi i16* [ %add.ptr, %while.body ], [ %x, %while.body.preheader ] - %y.addr.025 = phi i16* [ %add.ptr4, %while.body ], [ %y, %while.body.preheader ] + %x.addr.026 = phi ptr [ %add.ptr, %while.body ], [ %x, %while.body.preheader ] + %y.addr.025 = phi ptr [ %add.ptr4, %while.body ], [ %y, %while.body.preheader ] %n.addr.023 = phi i32 [ %sub, %while.body ], [ %n, %while.body.preheader ] %acc = phi i32 [ %acc.next, %while.body ], [ 0, %while.body.preheader ] %5 = phi i32 [ %start, %while.body.preheader ], [ %6, %while.body ] - %tmp3 = bitcast i16* %y.addr.025 to <4 x i16>* - %tmp1 = bitcast i16* %x.addr.026 to <4 x i16>* + %tmp3 = bitcast ptr %y.addr.025 to ptr + %tmp1 = bitcast ptr %x.addr.026 to ptr %tmp = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %n.addr.023) - %tmp2 = tail call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %tmp1, i32 2, <4 x i1> %tmp, <4 x i16> zeroinitializer) + %tmp2 = tail call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %tmp1, i32 2, <4 x i1> %tmp, <4 x i16> zeroinitializer) %zext.wide.1 = zext <4 x i16> %tmp2 to <4 x i32> - %tmp4 = tail call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %tmp3, i32 2, <4 x i1> %tmp, <4 x i16> zeroinitializer) + %tmp4 = tail call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %tmp3, i32 2, <4 x i1> %tmp, <4 x i16> zeroinitializer) %zext.wide.2 = zext <4 x i16> %tmp4 to <4 x i32> %or = or <4 x i32> %zext.wide.1, %zext.wide.2 %reduce = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %or) %acc.next = add i32 %reduce, %acc - %add.ptr = getelementptr inbounds i16, i16* %x.addr.026, i32 4 - %add.ptr4 = getelementptr inbounds i16, i16* %y.addr.025, i32 4 + %add.ptr = getelementptr inbounds i16, ptr %x.addr.026, i32 4 + %add.ptr4 = getelementptr inbounds i16, ptr %y.addr.025, i32 4 %sub = add nsw i32 %n.addr.023, -4 %6 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %5, i32 1) %7 = icmp ne i32 %6, 0 @@ -666,7 +666,7 @@ ret i32 %res } - define hidden i32 @regalloc_legality_vaddv_u16(i16* %x, i16* %y, i32 %n) { + define hidden i32 @regalloc_legality_vaddv_u16(ptr %x, ptr %y, i32 %n) { entry: %cmp22 = icmp sgt i32 %n, 0 %0 = add i32 %n, 7 @@ -682,22 +682,22 @@ br label %while.body while.body: ; preds = %while.body.preheader, %while.body - %x.addr.026 = phi i16* [ %add.ptr, %while.body ], [ %x, %while.body.preheader ] - %y.addr.025 = phi i16* [ %add.ptr4, %while.body ], [ %y, %while.body.preheader ] + %x.addr.026 = phi ptr [ %add.ptr, %while.body ], [ %x, %while.body.preheader ] + %y.addr.025 = phi ptr [ %add.ptr4, %while.body ], [ %y, %while.body.preheader ] %n.addr.023 = phi i32 [ %sub, %while.body ], [ %n, %while.body.preheader ] %acc = phi i32 [ %acc.next, %while.body ], [ 0, %while.body.preheader ] %5 = phi i32 [ %start, %while.body.preheader ], [ %6, %while.body ] - %tmp3 = bitcast i16* %y.addr.025 to <8 x i16>* - %tmp1 = bitcast i16* %x.addr.026 to <8 x i16>* + %tmp3 = bitcast ptr %y.addr.025 to ptr + %tmp1 = bitcast ptr %x.addr.026 to ptr %tmp = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %n.addr.023) - %tmp2 = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp1, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer) - %tmp4 = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp3, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer) + %tmp2 = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %tmp1, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer) + %tmp4 = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %tmp3, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer) %or = or <8 x i16> %tmp2, %tmp4 %reduce = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %or) %zext.reduce = zext i16 %reduce to i32 %acc.next = add i32 %zext.reduce, %acc - %add.ptr = getelementptr inbounds i16, i16* %x.addr.026, i32 8 - %add.ptr4 = getelementptr inbounds i16, i16* %y.addr.025, i32 8 + %add.ptr = getelementptr inbounds i16, ptr %x.addr.026, i32 8 + %add.ptr4 = getelementptr inbounds i16, ptr %y.addr.025, i32 8 %sub = add nsw i32 %n.addr.023, -8 %6 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %5, i32 1) %7 = icmp ne i32 %6, 0 @@ -708,7 +708,7 @@ ret i32 %res } - define hidden i32 @regalloc_illegality_vaddva_s32(i16* %x, i16* %y, i16* %z, i32 %n) { + define hidden i32 @regalloc_illegality_vaddva_s32(ptr %x, ptr %y, ptr %z, i32 %n) { entry: %cmp22 = icmp sgt i32 %n, 0 %0 = add i32 %n, 7 @@ -724,23 +724,23 @@ br label %while.body while.body: ; preds = %while.body.preheader, %while.body - %x.addr.026 = phi i16* [ %add.ptr, %while.body ], [ %x, %while.body.preheader ] - %y.addr.025 = phi i16* [ %add.ptr4, %while.body ], [ %y, %while.body.preheader ] + %x.addr.026 = phi ptr [ %add.ptr, %while.body ], [ %x, %while.body.preheader ] + %y.addr.025 = phi ptr [ %add.ptr4, %while.body ], [ %y, %while.body.preheader ] %n.addr.023 = phi i32 [ %sub, %while.body ], [ %n, %while.body.preheader ] %acc = phi i32 [ %acc.next, %while.body ], [ 0, %while.body.preheader ] %5 = phi i32 [ %start, %while.body.preheader ], [ %6, %while.body ] - %tmp3 = bitcast i16* %y.addr.025 to <8 x i16>* - %tmp1 = bitcast i16* %x.addr.026 to <8 x i16>* + %tmp3 = bitcast ptr %y.addr.025 to ptr + %tmp1 = bitcast ptr %x.addr.026 to ptr %tmp = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %n.addr.023) - %tmp2 = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp1, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer) - %tmp4 = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp3, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer) + %tmp2 = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %tmp1, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer) + %tmp4 = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %tmp3, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer) %tmp5 = tail call <4 x i32> @llvm.arm.mve.vmull.v4i32.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp4, i32 0, i32 1) %tmp6 = tail call <4 x i32> @llvm.arm.mve.vmull.v4i32.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp4, i32 0, i32 0) %mul = add <4 x i32> %tmp5, %tmp6 %reduce = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %mul) %acc.next = add i32 %reduce, %acc - %add.ptr = getelementptr inbounds i16, i16* %x.addr.026, i32 8 - %add.ptr4 = getelementptr inbounds i16, i16* %y.addr.025, i32 8 + %add.ptr = getelementptr inbounds i16, ptr %x.addr.026, i32 8 + %add.ptr4 = getelementptr inbounds i16, ptr %y.addr.025, i32 8 %sub = add nsw i32 %n.addr.023, -8 %6 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %5, i32 1) %7 = icmp ne i32 %6, 0 @@ -751,7 +751,7 @@ ret i32 %res } - define hidden i32 @illegal_vmull_non_zero(i16* %x, i16* %y, i16* %z, i32 %n) { + define hidden i32 @illegal_vmull_non_zero(ptr %x, ptr %y, ptr %z, i32 %n) { entry: %cmp22 = icmp sgt i32 %n, 0 %0 = add i32 %n, 7 @@ -767,21 +767,21 @@ br label %while.body while.body: ; preds = %while.body.preheader, %while.body - %x.addr.026 = phi i16* [ %add.ptr, %while.body ], [ %x, %while.body.preheader ] - %y.addr.025 = phi i16* [ %add.ptr4, %while.body ], [ %y, %while.body.preheader ] + %x.addr.026 = phi ptr [ %add.ptr, %while.body ], [ %x, %while.body.preheader ] + %y.addr.025 = phi ptr [ %add.ptr4, %while.body ], [ %y, %while.body.preheader ] %n.addr.023 = phi i32 [ %sub, %while.body ], [ %n, %while.body.preheader ] %acc = phi i32 [ %acc.next, %while.body ], [ 0, %while.body.preheader ] %5 = phi i32 [ %start, %while.body.preheader ], [ %6, %while.body ] - %tmp3 = bitcast i16* %y.addr.025 to <8 x i16>* - %tmp1 = bitcast i16* %x.addr.026 to <8 x i16>* + %tmp3 = bitcast ptr %y.addr.025 to ptr + %tmp1 = bitcast ptr %x.addr.026 to ptr %tmp = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %n.addr.023) - %tmp2 = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp1, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer) - %tmp4 = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %tmp3, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer) + %tmp2 = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %tmp1, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer) + %tmp4 = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %tmp3, i32 2, <8 x i1> %tmp, <8 x i16> zeroinitializer) %mul = tail call <4 x i32> @llvm.arm.mve.vmull.v4i32.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp4, i32 0, i32 1) %reduce = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %mul) %acc.next = add i32 %reduce, %acc - %add.ptr = getelementptr inbounds i16, i16* %x.addr.026, i32 8 - %add.ptr4 = getelementptr inbounds i16, i16* %y.addr.025, i32 8 + %add.ptr = getelementptr inbounds i16, ptr %x.addr.026, i32 8 + %add.ptr4 = getelementptr inbounds i16, ptr %y.addr.025, i32 8 %sub = add nsw i32 %n.addr.023, -8 %6 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %5, i32 1) %7 = icmp ne i32 %6, 0 @@ -792,12 +792,12 @@ ret i32 %res } - declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32 immarg, <8 x i1>, <8 x i8>) - declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) - declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>) - declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) - declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>) - declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) + declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32 immarg, <8 x i1>, <8 x i8>) + declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) + declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32 immarg, <8 x i1>, <8 x i16>) + declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32 immarg, <16 x i1>, <16 x i8>) + declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32 immarg, <8 x i1>) + declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vcmp-vpst-combination.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vcmp-vpst-combination.ll index c8001df..e0a61b1 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vcmp-vpst-combination.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vcmp-vpst-combination.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -tail-predication=force-enabled-no-reductions -o - %s | FileCheck %s -define arm_aapcs_vfpcc <16 x i8> @vcmp_vpst_combination(<16 x i8>* %pSrc, i16 zeroext %blockSize, i8* nocapture %pResult, i32* nocapture %pIndex) { +define arm_aapcs_vfpcc <16 x i8> @vcmp_vpst_combination(ptr %pSrc, i16 zeroext %blockSize, ptr nocapture %pResult, ptr nocapture %pIndex) { ; CHECK-LABEL: vcmp_vpst_combination: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -28,7 +28,7 @@ do.body: ; preds = %do.body, %entry %curExtremValVec.0 = phi <16 x i8> [ <i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127>, %entry ], [ %6, %do.body ] %blkCnt.0 = phi i32 [ %conv, %entry ], [ %sub2, %do.body ] %2 = tail call <16 x i1> @llvm.arm.mve.vctp8(i32 %blkCnt.0) - %3 = tail call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %pSrc, i32 1, <16 x i1> %2, <16 x i8> zeroinitializer) + %3 = tail call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %pSrc, i32 1, <16 x i1> %2, <16 x i8> zeroinitializer) %4 = icmp sle <16 x i8> %3, %curExtremValVec.0 %5 = and <16 x i1> %4, %2 %6 = tail call <16 x i8> @llvm.arm.mve.orr.predicated.v16i8.v16i1(<16 x i8> %3, <16 x i8> %3, <16 x i1> %5, <16 x i8> %curExtremValVec.0) @@ -41,7 +41,7 @@ do.end: ; preds = %do.body ret <16 x i8> %6 } -define i32 @vcmp_new_vpst_combination(i32 %len, i32* nocapture readonly %arr) { +define i32 @vcmp_new_vpst_combination(i32 %len, ptr nocapture readonly %arr) { ; CHECK-LABEL: vcmp_new_vpst_combination: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -82,9 +82,9 @@ vector.body: ; preds = %vector.body, %vecto %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi i32 [ 0, %vector.ph ], [ %5, %vector.body ] %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %len) - %0 = getelementptr inbounds i32, i32* %arr, i32 %index - %1 = bitcast i32* %0 to <4 x i32>* - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %0 = getelementptr inbounds i32, ptr %arr, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %2 = icmp ne <4 x i32> %wide.masked.load, zeroinitializer %narrow = and <4 x i1> %active.lane.mask, %2 %3 = zext <4 x i1> %narrow to <4 x i32> @@ -101,7 +101,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) -declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) +declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) @@ -109,6 +109,6 @@ declare { <16 x i8>, i32 } @llvm.arm.mve.vidup.v16i8(i32, i32) declare <16 x i1> @llvm.arm.mve.vctp8(i32) -declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) +declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32 immarg, <16 x i1>, <16 x i8>) declare <16 x i8> @llvm.arm.mve.orr.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir index 32ea68a..25f6448 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-add-operand-liveout.mir @@ -5,7 +5,7 @@ # then used by the add in the exit - making the vctp operands equivalent. --- | - define dso_local i32 @wrong_vctp_liveout(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %N) local_unnamed_addr #0 { + define dso_local i32 @wrong_vctp_liveout(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr #0 { entry: %cmp9 = icmp eq i32 %N, 0 %0 = add i32 %N, 3 @@ -22,22 +22,22 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv18 = phi ptr [ %scevgep19, %vector.body ], [ %b, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %12, %vector.body ] %6 = phi i32 [ %N, %vector.ph ], [ %8, %vector.body ] - %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>* - %lsr.iv1820 = bitcast i16* %lsr.iv18 to <4 x i16>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr + %lsr.iv1820 = bitcast ptr %lsr.iv18 to ptr %7 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %6) %8 = sub i32 %6, 4 - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %7, <4 x i16> undef) + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %7, <4 x i16> undef) %9 = sext <4 x i16> %wide.masked.load to <4 x i32> - %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820, i32 2, <4 x i1> %7, <4 x i16> undef) + %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820, i32 2, <4 x i1> %7, <4 x i16> undef) %10 = sext <4 x i16> %wide.masked.load14 to <4 x i32> %11 = mul nsw <4 x i32> %10, %9 %12 = add <4 x i32> %11, %vec.phi - %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 - %scevgep19 = getelementptr i16, i16* %lsr.iv18, i32 4 + %scevgep = getelementptr i16, ptr %lsr.iv, i32 4 + %scevgep19 = getelementptr i16, ptr %lsr.iv18, i32 4 %13 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %14 = icmp ne i32 %13, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -54,7 +54,7 @@ %res.0.lcssa = phi i32 [ 0, %entry ], [ %18, %middle.block ] ret i32 %res.0.lcssa } - declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) + declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) declare i32 @llvm.start.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt-2.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt-2.mir index 1fb505b..ad3e5fd 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt-2.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt-2.mir @@ -3,7 +3,7 @@ --- | ; Function Attrs: nofree norecurse nounwind - define dso_local void @test(i32* noalias nocapture %arg, i32* noalias nocapture readonly %arg1, i32 %arg2, i16 zeroext %mask) local_unnamed_addr #0 { + define dso_local void @test(ptr noalias nocapture %arg, ptr noalias nocapture readonly %arg1, i32 %arg2, i16 zeroext %mask) local_unnamed_addr #0 { bb: %tmp = icmp eq i32 %arg2, 0 %tmp1 = add i32 %arg2, 3 @@ -21,32 +21,32 @@ br label %bb9 bb9: ; preds = %bb9, %bb3 - %lsr.iv2 = phi i32* [ %scevgep3, %bb9 ], [ %arg1, %bb3 ] - %lsr.iv = phi i32* [ %scevgep, %bb9 ], [ %arg, %bb3 ] + %lsr.iv2 = phi ptr [ %scevgep3, %bb9 ], [ %arg1, %bb3 ] + %lsr.iv = phi ptr [ %scevgep, %bb9 ], [ %arg, %bb3 ] %tmp7 = phi i32 [ %start, %bb3 ], [ %tmp12, %bb9 ] %tmp8 = phi i32 [ %arg2, %bb3 ], [ %tmp11, %bb9 ] - %lsr.iv24 = bitcast i32* %lsr.iv2 to <4 x i32>* - %lsr.iv1 = bitcast i32* %lsr.iv to <4 x i32>* + %lsr.iv24 = bitcast ptr %lsr.iv2 to ptr + %lsr.iv1 = bitcast ptr %lsr.iv to ptr %vctp = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp8) %and = and <4 x i1> %vctp, %invariant.mask %tmp11 = sub i32 %tmp8, 4 - %tmp17 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv24, i32 4, <4 x i1> %and, <4 x i32> undef) + %tmp17 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv24, i32 4, <4 x i1> %and, <4 x i32> undef) %tmp18 = icmp ne <4 x i32> %tmp17, zeroinitializer %tmp20 = and <4 x i1> %tmp18, %vctp - %tmp22 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1, i32 4, <4 x i1> %tmp20, <4 x i32> undef) + %tmp22 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv1, i32 4, <4 x i1> %tmp20, <4 x i32> undef) %tmp23 = mul nsw <4 x i32> %tmp22, %tmp17 - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %tmp23, <4 x i32>* %lsr.iv1, i32 4, <4 x i1> %tmp20) + call void @llvm.masked.store.v4i32.p0(<4 x i32> %tmp23, ptr %lsr.iv1, i32 4, <4 x i1> %tmp20) %tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %tmp7, i32 1) %tmp13 = icmp ne i32 %tmp12, 0 - %scevgep = getelementptr i32, i32* %lsr.iv, i32 4 - %scevgep3 = getelementptr i32, i32* %lsr.iv2, i32 4 + %scevgep = getelementptr i32, ptr %lsr.iv, i32 4 + %scevgep3 = getelementptr i32, ptr %lsr.iv2, i32 4 br i1 %tmp13, label %bb9, label %bb27 bb27: ; preds = %bb9, %bb ret void } - declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) - declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) + declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) + declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) declare i32 @llvm.start.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) declare <4 x i1> @llvm.arm.mve.vctp32(i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt.mir index 0615fce..7af7971 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -verify-machineinstrs -o - | FileCheck %s --- | ; Function Attrs: nofree norecurse nounwind - define dso_local void @test_vldr_p0(i32* noalias nocapture %arg, i32* noalias nocapture readonly %arg1, i32 %arg2, i16 zeroext %mask) local_unnamed_addr #0 { + define dso_local void @test_vldr_p0(ptr noalias nocapture %arg, ptr noalias nocapture readonly %arg1, i32 %arg2, i16 zeroext %mask) local_unnamed_addr #0 { bb: %tmp = icmp eq i32 %arg2, 0 %tmp1 = add i32 %arg2, 3 @@ -20,30 +20,30 @@ br label %bb9 bb9: ; preds = %bb9, %bb3 - %lsr.iv2 = phi i32* [ %scevgep3, %bb9 ], [ %arg1, %bb3 ] - %lsr.iv = phi i32* [ %scevgep, %bb9 ], [ %arg, %bb3 ] + %lsr.iv2 = phi ptr [ %scevgep3, %bb9 ], [ %arg1, %bb3 ] + %lsr.iv = phi ptr [ %scevgep, %bb9 ], [ %arg, %bb3 ] %tmp7 = phi i32 [ %start, %bb3 ], [ %tmp12, %bb9 ] %tmp8 = phi i32 [ %arg2, %bb3 ], [ %tmp11, %bb9 ] - %lsr.iv24 = bitcast i32* %lsr.iv2 to <4 x i32>* - %lsr.iv1 = bitcast i32* %lsr.iv to <4 x i32>* + %lsr.iv24 = bitcast ptr %lsr.iv2 to ptr + %lsr.iv1 = bitcast ptr %lsr.iv to ptr %vctp = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp8) %and = and <4 x i1> %vctp, %invariant.mask %tmp11 = sub i32 %tmp8, 4 - %tmp17 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv24, i32 4, <4 x i1> %and, <4 x i32> undef) - %tmp22 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1, i32 4, <4 x i1> %and, <4 x i32> undef) + %tmp17 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv24, i32 4, <4 x i1> %and, <4 x i32> undef) + %tmp22 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv1, i32 4, <4 x i1> %and, <4 x i32> undef) %tmp23 = mul nsw <4 x i32> %tmp22, %tmp17 - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %tmp23, <4 x i32>* %lsr.iv1, i32 4, <4 x i1> %and) + call void @llvm.masked.store.v4i32.p0(<4 x i32> %tmp23, ptr %lsr.iv1, i32 4, <4 x i1> %and) %tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %tmp7, i32 1) %tmp13 = icmp ne i32 %tmp12, 0 - %scevgep = getelementptr i32, i32* %lsr.iv, i32 4 - %scevgep3 = getelementptr i32, i32* %lsr.iv2, i32 4 + %scevgep = getelementptr i32, ptr %lsr.iv, i32 4 + %scevgep3 = getelementptr i32, ptr %lsr.iv2, i32 4 br i1 %tmp13, label %bb9, label %bb27 bb27: ; preds = %bb9, %bb ret void } - define dso_local void @test_vstr_p0(i32* noalias nocapture %arg, i32* noalias nocapture readonly %arg1, i32 %arg2, i16 zeroext %mask) { + define dso_local void @test_vstr_p0(ptr noalias nocapture %arg, ptr noalias nocapture readonly %arg1, i32 %arg2, i16 zeroext %mask) { bb: unreachable bb3: ; preds = %bb @@ -54,7 +54,7 @@ ret void } - define dso_local void @test_vmsr_p0(i32* noalias nocapture %arg, i32* noalias nocapture readonly %arg1, i32 %arg2, i16 zeroext %mask) { + define dso_local void @test_vmsr_p0(ptr noalias nocapture %arg, ptr noalias nocapture readonly %arg1, i32 %arg2, i16 zeroext %mask) { bb: unreachable bb3: ; preds = %bb @@ -65,7 +65,7 @@ ret void } - define dso_local void @test_vmrs_p0(i32* noalias nocapture %arg, i32* noalias nocapture readonly %arg1, i32 %arg2, i16 zeroext %mask) { + define dso_local void @test_vmrs_p0(ptr noalias nocapture %arg, ptr noalias nocapture readonly %arg1, i32 %arg2, i16 zeroext %mask) { bb: unreachable bb3: ; preds = %bb @@ -76,8 +76,8 @@ ret void } - declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #1 - declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #2 + declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) #1 + declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) #2 declare i32 @llvm.start.loop.iterations.i32(i32) #3 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3 declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subi3.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subi3.mir index a9f4d7c..5153320 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subi3.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subi3.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve,+lob -run-pass=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s --- | - define dso_local void @vctp_tsubi3(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 { + define dso_local void @vctp_tsubi3(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 { entry: %cmp8 = icmp sgt i32 %N, 0 %0 = add i32 %N, 3 @@ -18,23 +18,23 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv17 = phi i32* [ %scevgep18, %vector.body ], [ %A, %vector.ph ] - %lsr.iv14 = phi i32* [ %scevgep15, %vector.body ], [ %C, %vector.ph ] - %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %vector.ph ] + %lsr.iv17 = phi ptr [ %scevgep18, %vector.body ], [ %A, %vector.ph ] + %lsr.iv14 = phi ptr [ %scevgep15, %vector.body ], [ %C, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %B, %vector.ph ] %6 = phi i32 [ %start, %vector.ph ], [ %11, %vector.body ] %7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ] - %lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>* - %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* - %lsr.iv1719 = bitcast i32* %lsr.iv17 to <4 x i32>* + %lsr.iv13 = bitcast ptr %lsr.iv to ptr + %lsr.iv1416 = bitcast ptr %lsr.iv14 to ptr + %lsr.iv1719 = bitcast ptr %lsr.iv17 to ptr %8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7) %9 = sub i32 %7, 5 - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv13, i32 4, <4 x i1> %8, <4 x i32> undef) - %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1416, i32 4, <4 x i1> %8, <4 x i32> undef) + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv13, i32 4, <4 x i1> %8, <4 x i32> undef) + %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv1416, i32 4, <4 x i1> %8, <4 x i32> undef) %10 = add nsw <4 x i32> %wide.masked.load12, %wide.masked.load - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %10, <4 x i32>* %lsr.iv1719, i32 4, <4 x i1> %8) - %scevgep = getelementptr i32, i32* %lsr.iv, i32 4 - %scevgep15 = getelementptr i32, i32* %lsr.iv14, i32 4 - %scevgep18 = getelementptr i32, i32* %lsr.iv17, i32 4 + call void @llvm.masked.store.v4i32.p0(<4 x i32> %10, ptr %lsr.iv1719, i32 4, <4 x i1> %8) + %scevgep = getelementptr i32, ptr %lsr.iv, i32 4 + %scevgep15 = getelementptr i32, ptr %lsr.iv14, i32 4 + %scevgep18 = getelementptr i32, ptr %lsr.iv17, i32 4 %11 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1) %12 = icmp ne i32 %11, 0 br i1 %12, label %vector.body, label %for.cond.cleanup @@ -45,9 +45,9 @@ declare i32 @llvm.start.loop.iterations.i32(i32) #1 declare <4 x i1> @llvm.arm.mve.vctp32(i32) #2 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1 - declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #3 - declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #4 - declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #3 + declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #3 + declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) #4 + declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) #3 ... --- diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri.mir index d995f11..303d936 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve,+lob -run-pass=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s --- | - define dso_local void @vctp_tsubi3(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 { + define dso_local void @vctp_tsubi3(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 { entry: %cmp8 = icmp sgt i32 %N, 0 %0 = add i32 %N, 3 @@ -18,23 +18,23 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv17 = phi i32* [ %scevgep18, %vector.body ], [ %A, %vector.ph ] - %lsr.iv14 = phi i32* [ %scevgep15, %vector.body ], [ %C, %vector.ph ] - %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %vector.ph ] + %lsr.iv17 = phi ptr [ %scevgep18, %vector.body ], [ %A, %vector.ph ] + %lsr.iv14 = phi ptr [ %scevgep15, %vector.body ], [ %C, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %B, %vector.ph ] %6 = phi i32 [ %start, %vector.ph ], [ %11, %vector.body ] %7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ] - %lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>* - %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* - %lsr.iv1719 = bitcast i32* %lsr.iv17 to <4 x i32>* + %lsr.iv13 = bitcast ptr %lsr.iv to ptr + %lsr.iv1416 = bitcast ptr %lsr.iv14 to ptr + %lsr.iv1719 = bitcast ptr %lsr.iv17 to ptr %8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7) %9 = sub i32 %7, 5 - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv13, i32 4, <4 x i1> %8, <4 x i32> undef) - %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1416, i32 4, <4 x i1> %8, <4 x i32> undef) + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv13, i32 4, <4 x i1> %8, <4 x i32> undef) + %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv1416, i32 4, <4 x i1> %8, <4 x i32> undef) %10 = add nsw <4 x i32> %wide.masked.load12, %wide.masked.load - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %10, <4 x i32>* %lsr.iv1719, i32 4, <4 x i1> %8) - %scevgep = getelementptr i32, i32* %lsr.iv, i32 4 - %scevgep15 = getelementptr i32, i32* %lsr.iv14, i32 4 - %scevgep18 = getelementptr i32, i32* %lsr.iv17, i32 4 + call void @llvm.masked.store.v4i32.p0(<4 x i32> %10, ptr %lsr.iv1719, i32 4, <4 x i1> %8) + %scevgep = getelementptr i32, ptr %lsr.iv, i32 4 + %scevgep15 = getelementptr i32, ptr %lsr.iv14, i32 4 + %scevgep18 = getelementptr i32, ptr %lsr.iv17, i32 4 %11 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1) %12 = icmp ne i32 %11, 0 br i1 %12, label %vector.body, label %for.cond.cleanup @@ -45,8 +45,8 @@ declare i32 @llvm.start.loop.iterations.i32(i32) declare <4 x i1> @llvm.arm.mve.vctp32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) - declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) - declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) + declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) + declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) ... --- diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri12.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri12.mir index 48e161d..2516e5b 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri12.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-subri12.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve,+lob -run-pass=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s --- | - define dso_local void @vctp_tsubi3(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 { + define dso_local void @vctp_tsubi3(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 { entry: %cmp8 = icmp sgt i32 %N, 0 %0 = add i32 %N, 3 @@ -18,23 +18,23 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv17 = phi i32* [ %scevgep18, %vector.body ], [ %A, %vector.ph ] - %lsr.iv14 = phi i32* [ %scevgep15, %vector.body ], [ %C, %vector.ph ] - %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %B, %vector.ph ] + %lsr.iv17 = phi ptr [ %scevgep18, %vector.body ], [ %A, %vector.ph ] + %lsr.iv14 = phi ptr [ %scevgep15, %vector.body ], [ %C, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %B, %vector.ph ] %6 = phi i32 [ %start, %vector.ph ], [ %11, %vector.body ] %7 = phi i32 [ %N, %vector.ph ], [ %9, %vector.body ] - %lsr.iv13 = bitcast i32* %lsr.iv to <4 x i32>* - %lsr.iv1416 = bitcast i32* %lsr.iv14 to <4 x i32>* - %lsr.iv1719 = bitcast i32* %lsr.iv17 to <4 x i32>* + %lsr.iv13 = bitcast ptr %lsr.iv to ptr + %lsr.iv1416 = bitcast ptr %lsr.iv14 to ptr + %lsr.iv1719 = bitcast ptr %lsr.iv17 to ptr %8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %7) %9 = sub i32 %7, 5 - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv13, i32 4, <4 x i1> %8, <4 x i32> undef) - %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1416, i32 4, <4 x i1> %8, <4 x i32> undef) + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv13, i32 4, <4 x i1> %8, <4 x i32> undef) + %wide.masked.load12 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv1416, i32 4, <4 x i1> %8, <4 x i32> undef) %10 = add nsw <4 x i32> %wide.masked.load12, %wide.masked.load - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %10, <4 x i32>* %lsr.iv1719, i32 4, <4 x i1> %8) - %scevgep = getelementptr i32, i32* %lsr.iv, i32 4 - %scevgep15 = getelementptr i32, i32* %lsr.iv14, i32 4 - %scevgep18 = getelementptr i32, i32* %lsr.iv17, i32 4 + call void @llvm.masked.store.v4i32.p0(<4 x i32> %10, ptr %lsr.iv1719, i32 4, <4 x i1> %8) + %scevgep = getelementptr i32, ptr %lsr.iv, i32 4 + %scevgep15 = getelementptr i32, ptr %lsr.iv14, i32 4 + %scevgep18 = getelementptr i32, ptr %lsr.iv17, i32 4 %11 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %6, i32 1) %12 = icmp ne i32 %11, 0 br i1 %12, label %vector.body, label %for.cond.cleanup @@ -45,8 +45,8 @@ declare i32 @llvm.start.loop.iterations.i32(i32) declare <4 x i1> @llvm.arm.mve.vctp32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) - declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) - declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) + declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) + declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) ... --- diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp16-reduce.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp16-reduce.mir index 3b2e776..4b015f8 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp16-reduce.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp16-reduce.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s --- | - define dso_local arm_aapcs_vfpcc signext i16 @wrong_liveout_shift(i8* nocapture readonly %b, i8* nocapture readonly %c, i32 %N) { + define dso_local arm_aapcs_vfpcc signext i16 @wrong_liveout_shift(ptr nocapture readonly %b, ptr nocapture readonly %c, i32 %N) { entry: %cmp11 = icmp eq i32 %N, 0 %0 = add i32 %N, 7 @@ -20,23 +20,23 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv20 = phi i8* [ %scevgep21, %vector.body ], [ %c, %vector.ph ] - %lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %b, %vector.ph ] + %lsr.iv20 = phi ptr [ %scevgep21, %vector.body ], [ %c, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %b, %vector.ph ] %vec.phi = phi <8 x i16> [ <i16 32767, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, %vector.ph ], [ %15, %vector.body ] %8 = phi i32 [ %start, %vector.ph ], [ %16, %vector.body ] %9 = phi i32 [ %N, %vector.ph ], [ %11, %vector.body ] - %lsr.iv2022 = bitcast i8* %lsr.iv20 to <8 x i8>* - %lsr.iv19 = bitcast i8* %lsr.iv to <8 x i8>* + %lsr.iv2022 = bitcast ptr %lsr.iv20 to ptr + %lsr.iv19 = bitcast ptr %lsr.iv to ptr %10 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %9) %11 = sub i32 %9, 8 - %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %lsr.iv19, i32 1, <8 x i1> %10, <8 x i8> undef) + %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %lsr.iv19, i32 1, <8 x i1> %10, <8 x i8> undef) %12 = zext <8 x i8> %wide.masked.load to <8 x i16> - %wide.masked.load16 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %lsr.iv2022, i32 1, <8 x i1> %10, <8 x i8> undef) + %wide.masked.load16 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %lsr.iv2022, i32 1, <8 x i1> %10, <8 x i8> undef) %13 = zext <8 x i8> %wide.masked.load16 to <8 x i16> %14 = mul nuw <8 x i16> %13, %12 %15 = sub <8 x i16> %vec.phi, %14 - %scevgep = getelementptr i8, i8* %lsr.iv, i32 8 - %scevgep21 = getelementptr i8, i8* %lsr.iv20, i32 8 + %scevgep = getelementptr i8, ptr %lsr.iv, i32 8 + %scevgep21 = getelementptr i8, ptr %lsr.iv20, i32 8 %16 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %8, i32 1) %17 = icmp ne i32 %16, 0 br i1 %17, label %vector.body, label %middle.block @@ -53,7 +53,7 @@ %a.0.lcssa = phi i16 [ 32767, %entry ], [ %20, %middle.block ] ret i16 %a.0.lcssa } - declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32 immarg, <8 x i1>, <8 x i8>) + declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32 immarg, <8 x i1>, <8 x i8>) declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) declare i32 @llvm.start.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll index 015af0b..0f14200 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=armv8.1m.main -mattr=+mve -tail-predication=enabled --verify-machineinstrs %s -o - | FileCheck %s -define dso_local i32 @mul_reduce_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32 %N) { +define dso_local i32 @mul_reduce_add(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i32 %N) { ; CHECK-LABEL: mul_reduce_add: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: cmp r2, #0 @@ -44,13 +44,13 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %6, %vector.body ] - %0 = getelementptr inbounds i32, i32* %a, i32 %index + %0 = getelementptr inbounds i32, ptr %a, i32 %index %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i32* %0 to <4 x i32>* - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %1, <4 x i32> undef) - %3 = getelementptr inbounds i32, i32* %b, i32 %index - %4 = bitcast i32* %3 to <4 x i32>* - %wide.masked.load13 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %4, i32 4, <4 x i1> %1, <4 x i32> undef) + %2 = bitcast ptr %0 to ptr + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %2, i32 4, <4 x i1> %1, <4 x i32> undef) + %3 = getelementptr inbounds i32, ptr %b, i32 %index + %4 = bitcast ptr %3 to ptr + %wide.masked.load13 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %4, i32 4, <4 x i1> %1, <4 x i32> undef) %5 = mul nsw <4 x i32> %wide.masked.load13, %wide.masked.load %6 = add nsw <4 x i32> %5, %vec.phi %index.next = add i32 %index, 4 @@ -67,7 +67,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr ret i32 %res.0.lcssa } -define dso_local i32 @mul_reduce_add_const(i32* noalias nocapture readonly %a, i32 %b, i32 %N) { +define dso_local i32 @mul_reduce_add_const(ptr noalias nocapture readonly %a, i32 %b, i32 %N) { ; CHECK-LABEL: mul_reduce_add_const: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: cmp r2, #0 @@ -108,10 +108,10 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %3, %vector.body ] - %0 = getelementptr inbounds i32, i32* %a, i32 %index + %0 = getelementptr inbounds i32, ptr %a, i32 %index %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i32* %0 to <4 x i32>* - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %1, <4 x i32> undef) + %2 = bitcast ptr %0 to ptr + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %2, i32 4, <4 x i1> %1, <4 x i32> undef) %3 = add nsw <4 x i32> %wide.masked.load, %vec.phi %index.next = add i32 %index, 4 %4 = icmp eq i32 %index.next, %n.vec @@ -127,7 +127,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr ret i32 %res.0.lcssa } -define dso_local i32 @add_reduce_add_const(i32* noalias nocapture readonly %a, i32 %b, i32 %N) { +define dso_local i32 @add_reduce_add_const(ptr noalias nocapture readonly %a, i32 %b, i32 %N) { ; CHECK-LABEL: add_reduce_add_const: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: cmp r2, #0 @@ -168,10 +168,10 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %3, %vector.body ] - %0 = getelementptr inbounds i32, i32* %a, i32 %index + %0 = getelementptr inbounds i32, ptr %a, i32 %index %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i32* %0 to <4 x i32>* - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %1, <4 x i32> undef) + %2 = bitcast ptr %0 to ptr + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %2, i32 4, <4 x i1> %1, <4 x i32> undef) %3 = add nsw <4 x i32> %wide.masked.load, %vec.phi %index.next = add i32 %index, 4 %4 = icmp eq i32 %index.next, %n.vec @@ -187,7 +187,7 @@ for.cond.cleanup: ; preds = %middle.block, %entr ret i32 %res.0.lcssa } -define dso_local void @vector_mul_const(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %c, i32 %N) { +define dso_local void @vector_mul_const(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i32 %c, i32 %N) { ; CHECK-LABEL: vector_mul_const: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r7, lr} @@ -217,14 +217,14 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %0 = getelementptr inbounds i32, i32* %b, i32 %index + %0 = getelementptr inbounds i32, ptr %b, i32 %index %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i32* %0 to <4 x i32>* - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %1, <4 x i32> undef) + %2 = bitcast ptr %0 to ptr + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %2, i32 4, <4 x i1> %1, <4 x i32> undef) %3 = mul nsw <4 x i32> %wide.masked.load, %broadcast.splat11 - %4 = getelementptr inbounds i32, i32* %a, i32 %index - %5 = bitcast i32* %4 to <4 x i32>* - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %5, i32 4, <4 x i1> %1) + %4 = getelementptr inbounds i32, ptr %a, i32 %index + %5 = bitcast ptr %4 to ptr + call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %5, i32 4, <4 x i1> %1) %index.next = add i32 %index, 4 %6 = icmp eq i32 %index.next, %n.vec br i1 %6, label %for.cond.cleanup, label %vector.body @@ -233,7 +233,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret void } -define dso_local void @vector_add_const(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %c, i32 %N) { +define dso_local void @vector_add_const(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i32 %c, i32 %N) { ; CHECK-LABEL: vector_add_const: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r7, lr} @@ -263,14 +263,14 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %0 = getelementptr inbounds i32, i32* %b, i32 %index + %0 = getelementptr inbounds i32, ptr %b, i32 %index %1 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %2 = bitcast i32* %0 to <4 x i32>* - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %1, <4 x i32> undef) + %2 = bitcast ptr %0 to ptr + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %2, i32 4, <4 x i1> %1, <4 x i32> undef) %3 = add nsw <4 x i32> %wide.masked.load, %broadcast.splat11 - %4 = getelementptr inbounds i32, i32* %a, i32 %index - %5 = bitcast i32* %4 to <4 x i32>* - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %5, i32 4, <4 x i1> %1) + %4 = getelementptr inbounds i32, ptr %a, i32 %index + %5 = bitcast ptr %4 to ptr + call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %5, i32 4, <4 x i1> %1) %index.next = add i32 %index, 4 %6 = icmp eq i32 %index.next, %n.vec br i1 %6, label %for.cond.cleanup, label %vector.body @@ -279,7 +279,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret void } -define dso_local arm_aapcs_vfpcc void @vector_mul_vector_i8(i8* noalias nocapture %a, i8* noalias nocapture readonly %b, i8* noalias nocapture readonly %c, i32 %N) { +define dso_local arm_aapcs_vfpcc void @vector_mul_vector_i8(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %c, i32 %N) { ; CHECK-LABEL: vector_mul_vector_i8: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r7, lr} @@ -308,17 +308,17 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %0 = getelementptr inbounds i8, i8* %b, i32 %index + %0 = getelementptr inbounds i8, ptr %b, i32 %index %1 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %N) - %2 = bitcast i8* %0 to <16 x i8>* - %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %2, i32 1, <16 x i1> %1, <16 x i8> undef) - %3 = getelementptr inbounds i8, i8* %c, i32 %index - %4 = bitcast i8* %3 to <16 x i8>* - %wide.masked.load14 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %4, i32 1, <16 x i1> %1, <16 x i8> undef) + %2 = bitcast ptr %0 to ptr + %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %2, i32 1, <16 x i1> %1, <16 x i8> undef) + %3 = getelementptr inbounds i8, ptr %c, i32 %index + %4 = bitcast ptr %3 to ptr + %wide.masked.load14 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %4, i32 1, <16 x i1> %1, <16 x i8> undef) %5 = mul <16 x i8> %wide.masked.load14, %wide.masked.load - %6 = getelementptr inbounds i8, i8* %a, i32 %index - %7 = bitcast i8* %6 to <16 x i8>* - call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %5, <16 x i8>* %7, i32 1, <16 x i1> %1) + %6 = getelementptr inbounds i8, ptr %a, i32 %index + %7 = bitcast ptr %6 to ptr + call void @llvm.masked.store.v16i8.p0(<16 x i8> %5, ptr %7, i32 1, <16 x i1> %1) %index.next = add i32 %index, 16 %8 = icmp eq i32 %index.next, %n.vec br i1 %8, label %for.cond.cleanup, label %vector.body @@ -328,7 +328,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry } ; Function Attrs: nofree norecurse nounwind -define dso_local arm_aapcs_vfpcc void @vector_mul_vector_i16(i16* noalias nocapture %a, i16* noalias nocapture readonly %b, i16* noalias nocapture readonly %c, i32 %N) local_unnamed_addr #0 { +define dso_local arm_aapcs_vfpcc void @vector_mul_vector_i16(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %c, i32 %N) local_unnamed_addr #0 { ; CHECK-LABEL: vector_mul_vector_i16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r7, lr} @@ -357,17 +357,17 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %0 = getelementptr inbounds i16, i16* %b, i32 %index + %0 = getelementptr inbounds i16, ptr %b, i32 %index %1 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %N) - %2 = bitcast i16* %0 to <8 x i16>* - %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %2, i32 2, <8 x i1> %1, <8 x i16> undef) - %3 = getelementptr inbounds i16, i16* %c, i32 %index - %4 = bitcast i16* %3 to <8 x i16>* - %wide.masked.load14 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %4, i32 2, <8 x i1> %1, <8 x i16> undef) + %2 = bitcast ptr %0 to ptr + %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %2, i32 2, <8 x i1> %1, <8 x i16> undef) + %3 = getelementptr inbounds i16, ptr %c, i32 %index + %4 = bitcast ptr %3 to ptr + %wide.masked.load14 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %4, i32 2, <8 x i1> %1, <8 x i16> undef) %5 = mul <8 x i16> %wide.masked.load14, %wide.masked.load - %6 = getelementptr inbounds i16, i16* %a, i32 %index - %7 = bitcast i16* %6 to <8 x i16>* - call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %5, <8 x i16>* %7, i32 2, <8 x i1> %1) + %6 = getelementptr inbounds i16, ptr %a, i32 %index + %7 = bitcast ptr %6 to ptr + call void @llvm.masked.store.v8i16.p0(<8 x i16> %5, ptr %7, i32 2, <8 x i1> %1) %index.next = add i32 %index, 8 %8 = icmp eq i32 %index.next, %n.vec br i1 %8, label %for.cond.cleanup, label %vector.body @@ -376,12 +376,12 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret void } -declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) -declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>) -declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) -declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32 immarg, <16 x i1>) -declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>) -declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) +declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32 immarg, <16 x i1>, <16 x i8>) +declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32 immarg, <8 x i1>, <8 x i16>) +declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) +declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32 immarg, <16 x i1>) +declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32 immarg, <8 x i1>) +declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmaxmin_vpred_r.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmaxmin_vpred_r.mir index e6e6834..1f9cde8 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmaxmin_vpred_r.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmaxmin_vpred_r.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s --- | - define hidden i32 @arm_elementwise_mul_s8(i8* %input_1_vect, i8* %input_2_vect, i32 %input_1_offset, i32 %input_2_offset, i8* %output, i32 %out_offset, i32 %out_mult, i32 %out_shift, i32 %out_activation_min, i32 %out_activation_max, i32 %block_size) local_unnamed_addr #0 { + define hidden i32 @arm_elementwise_mul_s8(ptr %input_1_vect, ptr %input_2_vect, i32 %input_1_offset, i32 %input_2_offset, ptr %output, i32 %out_offset, i32 %out_mult, i32 %out_shift, i32 %out_activation_min, i32 %out_activation_max, i32 %block_size) local_unnamed_addr #0 { entry: %add = add i32 %block_size, 3 %div = lshr i32 %add, 2 @@ -20,22 +20,22 @@ ret i32 0 for.body: ; preds = %for.body, %for.body.lr.ph - %input_1_vect.addr.052 = phi i8* [ %input_1_vect, %for.body.lr.ph ], [ %add.ptr, %for.body ] - %input_2_vect.addr.051 = phi i8* [ %input_2_vect, %for.body.lr.ph ], [ %add.ptr14, %for.body ] - %output.addr.050 = phi i8* [ %output, %for.body.lr.ph ], [ %add.ptr15, %for.body ] + %input_1_vect.addr.052 = phi ptr [ %input_1_vect, %for.body.lr.ph ], [ %add.ptr, %for.body ] + %input_2_vect.addr.051 = phi ptr [ %input_2_vect, %for.body.lr.ph ], [ %add.ptr14, %for.body ] + %output.addr.050 = phi ptr [ %output, %for.body.lr.ph ], [ %add.ptr15, %for.body ] %num_elements.049 = phi i32 [ %block_size, %for.body.lr.ph ], [ %sub, %for.body ] %iv = phi i32 [ %div, %for.body.lr.ph ], [ %iv.next, %for.body ] - %output_cast = bitcast i8* %output.addr.050 to <4 x i32>* - %input_2_cast = bitcast i8* %input_2_vect.addr.051 to <4 x i32>* - %input_1_cast = bitcast i8* %input_1_vect.addr.052 to <4 x i32>* + %output_cast = bitcast ptr %output.addr.050 to ptr + %input_2_cast = bitcast ptr %input_2_vect.addr.051 to ptr + %input_1_cast = bitcast ptr %input_1_vect.addr.052 to ptr %pred = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %num_elements.049) - %load.1 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %input_1_cast, i32 4, <4 x i1> %pred, <4 x i32> undef) + %load.1 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %input_1_cast, i32 4, <4 x i1> %pred, <4 x i32> undef) %insert.input_1_offset = insertelement <4 x i32> undef, i32 %input_1_offset, i32 0 %splat.input_1_offset = shufflevector <4 x i32> %insert.input_1_offset, <4 x i32> undef, <4 x i32> zeroinitializer %insert.input_2_offset = insertelement <4 x i32> undef, i32 %input_2_offset, i32 0 %splat.input_2_offset = shufflevector <4 x i32> %insert.input_2_offset, <4 x i32> undef, <4 x i32> zeroinitializer %add.1 = add <4 x i32> %load.1, %splat.input_1_offset - %load.2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %input_2_cast, i32 4, <4 x i1> %pred, <4 x i32> undef) + %load.2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %input_2_cast, i32 4, <4 x i1> %pred, <4 x i32> undef) %add.2 = add <4 x i32> %load.2, %splat.input_2_offset %mul = mul <4 x i32> %add.1, %add.2 %insert.output = insertelement <4 x i32> undef, i32 %out_offset, i32 0 @@ -43,23 +43,23 @@ %add7 = add <4 x i32> %mul, %splat.output %max = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> %add7, <4 x i32> %.splat.i42, i32 1, <4 x i1> %pred, <4 x i32> undef) %min = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> %max, <4 x i32> %.splat.i, i32 1, <4 x i1> %pred, <4 x i32> undef) - tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %min, <4 x i32>* %output_cast, i32 4, <4 x i1> %pred) - %add.ptr = getelementptr inbounds i8, i8* %input_1_vect.addr.052, i32 4 - %add.ptr14 = getelementptr inbounds i8, i8* %input_2_vect.addr.051, i32 4 - %add.ptr15 = getelementptr inbounds i8, i8* %output.addr.050, i32 4 + tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %min, ptr %output_cast, i32 4, <4 x i1> %pred) + %add.ptr = getelementptr inbounds i8, ptr %input_1_vect.addr.052, i32 4 + %add.ptr14 = getelementptr inbounds i8, ptr %input_2_vect.addr.051, i32 4 + %add.ptr15 = getelementptr inbounds i8, ptr %output.addr.050, i32 4 %sub = add i32 %num_elements.049, -4 %iv.next = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %iv, i32 1) %cmp = icmp ne i32 %iv.next, 0 br i1 %cmp, label %for.body, label %for.cond.cleanup } declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1 - declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #2 - declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #3 + declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) #2 + declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) #3 declare <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>) #1 declare <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>) #1 declare i1 @llvm.test.set.loop.iterations.i32(i32) #4 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #4 - declare void @llvm.stackprotector(i8*, i8**) #5 + declare void @llvm.stackprotector(ptr, ptr) #5 ... --- name: arm_elementwise_mul_s8 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmldava_in_vpt.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmldava_in_vpt.mir index 082095f..4d3593a 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmldava_in_vpt.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmldava_in_vpt.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s --- | - define hidden i32 @vmldava_in_vpt(i8* %input_1_vect, i8* %input_2_vect, i32 %input_1_offset, i32 %input_2_offset, i32 %out_offset, i32 %out_mult, i32 %out_shift, i32 %out_activation_min, i32 %out_activation_max, i32 %block_size) local_unnamed_addr #0 { + define hidden i32 @vmldava_in_vpt(ptr %input_1_vect, ptr %input_2_vect, i32 %input_1_offset, i32 %input_2_offset, i32 %out_offset, i32 %out_mult, i32 %out_shift, i32 %out_activation_min, i32 %out_activation_max, i32 %block_size) local_unnamed_addr #0 { entry: %add = add i32 %block_size, 3 %div = lshr i32 %add, 2 @@ -24,20 +24,20 @@ for.body: ; preds = %for.body, %for.body.lr.ph %lsr.iv = phi i32 [ %iv.next, %for.body ], [ %wls0, %for.body.lr.ph ] - %input_1_vect.addr.052 = phi i8* [ %input_1_vect, %for.body.lr.ph ], [ %add.ptr, %for.body ] - %input_2_vect.addr.051 = phi i8* [ %input_2_vect, %for.body.lr.ph ], [ %add.ptr14, %for.body ] + %input_1_vect.addr.052 = phi ptr [ %input_1_vect, %for.body.lr.ph ], [ %add.ptr, %for.body ] + %input_2_vect.addr.051 = phi ptr [ %input_2_vect, %for.body.lr.ph ], [ %add.ptr14, %for.body ] %num_elements.049 = phi i32 [ %block_size, %for.body.lr.ph ], [ %sub, %for.body ] %acc = phi i32 [ 0, %for.body.lr.ph ], [ %acc.next, %for.body ] - %input_2_cast = bitcast i8* %input_2_vect.addr.051 to <4 x i32>* - %input_1_cast = bitcast i8* %input_1_vect.addr.052 to <4 x i32>* + %input_2_cast = bitcast ptr %input_2_vect.addr.051 to ptr + %input_1_cast = bitcast ptr %input_1_vect.addr.052 to ptr %pred = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %num_elements.049) - %load.1 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %input_1_cast, i32 4, <4 x i1> %pred, <4 x i32> undef) + %load.1 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %input_1_cast, i32 4, <4 x i1> %pred, <4 x i32> undef) %insert.input_1_offset = insertelement <4 x i32> undef, i32 %input_1_offset, i32 0 %splat.input_1_offset = shufflevector <4 x i32> %insert.input_1_offset, <4 x i32> undef, <4 x i32> zeroinitializer %insert.input_2_offset = insertelement <4 x i32> undef, i32 %input_2_offset, i32 0 %splat.input_2_offset = shufflevector <4 x i32> %insert.input_2_offset, <4 x i32> undef, <4 x i32> zeroinitializer %add.1 = add <4 x i32> %load.1, %splat.input_1_offset - %load.2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %input_2_cast, i32 4, <4 x i1> %pred, <4 x i32> undef) + %load.2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %input_2_cast, i32 4, <4 x i1> %pred, <4 x i32> undef) %add.2 = add <4 x i32> %load.2, %splat.input_2_offset %mul = mul <4 x i32> %add.1, %add.2 %insert.output = insertelement <4 x i32> undef, i32 %out_offset, i32 0 @@ -46,16 +46,16 @@ %max = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> %add7, <4 x i32> %.splat.i42, i32 1, <4 x i1> %pred, <4 x i32> undef) %min = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> %max, <4 x i32> %.splat.i, i32 1, <4 x i1> %pred, <4 x i32> undef) %acc.next = call i32 @llvm.arm.mve.vmldava.predicated.v4i32.v4i1(i32 0, i32 0, i32 0, i32 %acc, <4 x i32> %min, <4 x i32> %max, <4 x i1> %pred) - %add.ptr = getelementptr inbounds i8, i8* %input_1_vect.addr.052, i32 4 - %add.ptr14 = getelementptr inbounds i8, i8* %input_2_vect.addr.051, i32 4 + %add.ptr = getelementptr inbounds i8, ptr %input_1_vect.addr.052, i32 4 + %add.ptr14 = getelementptr inbounds i8, ptr %input_2_vect.addr.051, i32 4 %sub = add i32 %num_elements.049, -4 %iv.next = call i32 @llvm.loop.decrement.reg.i32(i32 %lsr.iv, i32 1) %cmp = icmp ne i32 %iv.next, 0 br i1 %cmp, label %for.body, label %for.cond.cleanup } declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1 - declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #2 - declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #3 + declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) #2 + declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) #3 declare <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>) #1 declare <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>) #1 declare i32 @llvm.arm.mve.vmldava.predicated.v4i32.v4i1(i32, i32, i32, i32, <4 x i32>, <4 x i32>, <4 x i1>) #1 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-block-debug.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-block-debug.mir index 9f2a7d9..34821c1 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-block-debug.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-block-debug.mir @@ -7,37 +7,37 @@ %struct.arm_2d_size_t = type { i16, i16 } - define void @__arm_2d_impl_rgb16_cl_key_1x1_paving_x_mirror_xx(i16* noalias %pSource, i16 signext %iSourceStride, i16* noalias %pTarget, i16 signext %iTargetStride, %struct.arm_2d_size_t* noalias nocapture readonly %ptSrcCopySize, %struct.arm_2d_size_t* noalias nocapture readonly %ptDstCopySize, i16 zeroext %Colour) local_unnamed_addr #0 !dbg !12 { + define void @__arm_2d_impl_rgb16_cl_key_1x1_paving_x_mirror_xx(ptr noalias %pSource, i16 signext %iSourceStride, ptr noalias %pTarget, i16 signext %iTargetStride, ptr noalias nocapture readonly %ptSrcCopySize, ptr noalias nocapture readonly %ptDstCopySize, i16 zeroext %Colour) local_unnamed_addr #0 !dbg !12 { entry: - call void @llvm.dbg.value(metadata i16* %pSource, metadata !33, metadata !DIExpression()), !dbg !62 + call void @llvm.dbg.value(metadata ptr %pSource, metadata !33, metadata !DIExpression()), !dbg !62 call void @llvm.dbg.value(metadata i16 %iSourceStride, metadata !34, metadata !DIExpression()), !dbg !62 - call void @llvm.dbg.value(metadata i16* %pTarget, metadata !35, metadata !DIExpression()), !dbg !62 + call void @llvm.dbg.value(metadata ptr %pTarget, metadata !35, metadata !DIExpression()), !dbg !62 call void @llvm.dbg.value(metadata i16 %iTargetStride, metadata !36, metadata !DIExpression()), !dbg !62 - call void @llvm.dbg.value(metadata %struct.arm_2d_size_t* %ptSrcCopySize, metadata !37, metadata !DIExpression()), !dbg !62 - call void @llvm.dbg.value(metadata %struct.arm_2d_size_t* %ptDstCopySize, metadata !38, metadata !DIExpression()), !dbg !62 + call void @llvm.dbg.value(metadata ptr %ptSrcCopySize, metadata !37, metadata !DIExpression()), !dbg !62 + call void @llvm.dbg.value(metadata ptr %ptDstCopySize, metadata !38, metadata !DIExpression()), !dbg !62 call void @llvm.dbg.value(metadata i16 %Colour, metadata !39, metadata !DIExpression()), !dbg !62 call void @llvm.dbg.value(metadata i16 undef, metadata !40, metadata !DIExpression(DW_OP_LLVM_convert, 16, DW_ATE_signed, DW_OP_LLVM_convert, 32, DW_ATE_signed, DW_OP_stack_value)), !dbg !62 call void @llvm.dbg.value(metadata i32 0, metadata !43, metadata !DIExpression()), !dbg !63 - %iHeight = getelementptr inbounds %struct.arm_2d_size_t, %struct.arm_2d_size_t* %ptDstCopySize, i32 0, i32 1 - %0 = load i16, i16* %iHeight, align 2, !tbaa !64 + %iHeight = getelementptr inbounds %struct.arm_2d_size_t, ptr %ptDstCopySize, i32 0, i32 1 + %0 = load i16, ptr %iHeight, align 2, !tbaa !64 %conv1 = sext i16 %0 to i32 %conv8 = sext i16 %iSourceStride to i32 %conv10 = sext i16 %iTargetStride to i32 - call void @llvm.dbg.value(metadata i16* %pSource, metadata !33, metadata !DIExpression()), !dbg !62 - call void @llvm.dbg.value(metadata i16* %pTarget, metadata !35, metadata !DIExpression()), !dbg !62 + call void @llvm.dbg.value(metadata ptr %pSource, metadata !33, metadata !DIExpression()), !dbg !62 + call void @llvm.dbg.value(metadata ptr %pTarget, metadata !35, metadata !DIExpression()), !dbg !62 call void @llvm.dbg.value(metadata i32 0, metadata !43, metadata !DIExpression()), !dbg !63 %cmp34 = icmp sgt i16 %0, 0, !dbg !69 br i1 %cmp34, label %for.body.lr.ph, label %for.cond.cleanup, !dbg !70 for.body.lr.ph: ; preds = %entry - %iWidth2 = bitcast %struct.arm_2d_size_t* %ptSrcCopySize to i16*, !dbg !71 - %1 = load i16, i16* %iWidth2, align 2, !dbg !71, !tbaa !72 + %iWidth2 = bitcast ptr %ptSrcCopySize to ptr, !dbg !71 + %1 = load i16, ptr %iWidth2, align 2, !dbg !71, !tbaa !72 call void @llvm.dbg.value(metadata i16 %1, metadata !40, metadata !DIExpression(DW_OP_LLVM_convert, 16, DW_ATE_signed, DW_OP_LLVM_convert, 32, DW_ATE_signed, DW_OP_stack_value)), !dbg !62 %conv = sext i16 %1 to i32, !dbg !73 call void @llvm.dbg.value(metadata i32 %conv, metadata !40, metadata !DIExpression()), !dbg !62 %sub = add nsw i32 %conv, -1 - %iWidth33 = bitcast %struct.arm_2d_size_t* %ptDstCopySize to i16* - %2 = load i16, i16* %iWidth33, align 2, !tbaa !72 + %iWidth33 = bitcast ptr %ptDstCopySize to ptr + %2 = load i16, ptr %iWidth33, align 2, !tbaa !72 %conv4 = sext i16 %2 to i32 %3 = tail call { <8 x i16>, i32 } @llvm.arm.mve.vddup.v8i16(i32 %sub, i32 1), !dbg !62 %4 = add i32 %conv4, 7, !dbg !70 @@ -51,14 +51,14 @@ ret void, !dbg !74 for.body: ; preds = %do.end, %for.body.lr.ph - %pSource.addr.037 = phi i16* [ %pSource, %for.body.lr.ph ], [ %add.ptr9, %do.end ] - %pTarget.addr.036 = phi i16* [ %pTarget, %for.body.lr.ph ], [ %add.ptr11, %do.end ] + %pSource.addr.037 = phi ptr [ %pSource, %for.body.lr.ph ], [ %add.ptr9, %do.end ] + %pTarget.addr.036 = phi ptr [ %pTarget, %for.body.lr.ph ], [ %add.ptr11, %do.end ] %y.035 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %do.end ] - call void @llvm.dbg.value(metadata i16* %pSource.addr.037, metadata !33, metadata !DIExpression()), !dbg !62 - call void @llvm.dbg.value(metadata i16* %pTarget.addr.036, metadata !35, metadata !DIExpression()), !dbg !62 + call void @llvm.dbg.value(metadata ptr %pSource.addr.037, metadata !33, metadata !DIExpression()), !dbg !62 + call void @llvm.dbg.value(metadata ptr %pTarget.addr.036, metadata !35, metadata !DIExpression()), !dbg !62 call void @llvm.dbg.value(metadata i32 %y.035, metadata !43, metadata !DIExpression()), !dbg !63 - call void @llvm.dbg.value(metadata i16* %pTarget.addr.036, metadata !46, metadata !DIExpression()), !dbg !75 - call void @llvm.dbg.value(metadata i16* %pSource.addr.037, metadata !49, metadata !DIExpression()), !dbg !75 + call void @llvm.dbg.value(metadata ptr %pTarget.addr.036, metadata !46, metadata !DIExpression()), !dbg !75 + call void @llvm.dbg.value(metadata ptr %pSource.addr.037, metadata !49, metadata !DIExpression()), !dbg !75 call void @llvm.dbg.value(metadata i32 %conv4, metadata !50, metadata !DIExpression()), !dbg !75 call void @llvm.dbg.value(metadata i32 undef, metadata !51, metadata !DIExpression()), !dbg !75 call void @llvm.dbg.value(metadata <8 x i16> undef, metadata !52, metadata !DIExpression()), !dbg !75 @@ -66,20 +66,20 @@ br label %do.body, !dbg !76 do.body: ; preds = %do.body, %for.body - %pDst.0 = phi i16* [ %pTarget.addr.036, %for.body ], [ %add.ptr, %do.body ], !dbg !75 + %pDst.0 = phi ptr [ %pTarget.addr.036, %for.body ], [ %add.ptr, %do.body ], !dbg !75 %dstWidth.0 = phi i32 [ %conv4, %for.body ], [ %sub5, %do.body ], !dbg !75 %.pn = phi { <8 x i16>, i32 } [ %3, %for.body ], [ %12, %do.body ] %9 = phi i32 [ %8, %for.body ], [ %17, %do.body ], !dbg !75 - %pDst.01 = bitcast i16* %pDst.0 to <8 x i16>*, !dbg !75 + %pDst.01 = bitcast ptr %pDst.0 to ptr, !dbg !75 %offset.0 = extractvalue { <8 x i16>, i32 } %.pn, 0, !dbg !75 %curOffsetIdx.0 = extractvalue { <8 x i16>, i32 } %.pn, 1, !dbg !75 call void @llvm.dbg.value(metadata <8 x i16> %offset.0, metadata !52, metadata !DIExpression()), !dbg !75 call void @llvm.dbg.value(metadata i32 %curOffsetIdx.0, metadata !51, metadata !DIExpression()), !dbg !75 call void @llvm.dbg.value(metadata i32 %dstWidth.0, metadata !50, metadata !DIExpression()), !dbg !75 - call void @llvm.dbg.value(metadata i16* %pDst.0, metadata !46, metadata !DIExpression()), !dbg !75 + call void @llvm.dbg.value(metadata ptr %pDst.0, metadata !46, metadata !DIExpression()), !dbg !75 %10 = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %dstWidth.0), !dbg !77 call void @llvm.dbg.value(metadata i32 undef, metadata !58, metadata !DIExpression()), !dbg !78 - %11 = tail call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %pSource.addr.037, <8 x i16> %offset.0, i32 16, i32 1, i32 1, <8 x i1> %10), !dbg !79 + %11 = tail call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0.v8i16.v8i1(ptr %pSource.addr.037, <8 x i16> %offset.0, i32 16, i32 1, i32 1, <8 x i1> %10), !dbg !79 call void @llvm.dbg.value(metadata <8 x i16> %11, metadata !61, metadata !DIExpression()), !dbg !78 %12 = tail call { <8 x i16>, i32 } @llvm.arm.mve.vddup.predicated.v8i16.v8i1(<8 x i16> undef, i32 %curOffsetIdx.0, i32 1, <8 x i1> %10), !dbg !80 call void @llvm.dbg.value(metadata i32 undef, metadata !51, metadata !DIExpression()), !dbg !75 @@ -89,9 +89,9 @@ %15 = icmp ne <8 x i16> %11, %14, !dbg !81 %16 = and <8 x i1> %15, %10, !dbg !81 call void @llvm.dbg.value(metadata i32 undef, metadata !58, metadata !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 16, DW_ATE_unsigned, DW_OP_stack_value)), !dbg !78 - tail call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %11, <8 x i16>* %pDst.01, i32 2, <8 x i1> %16), !dbg !82 - %add.ptr = getelementptr inbounds i16, i16* %pDst.0, i32 8, !dbg !83 - call void @llvm.dbg.value(metadata i16* %add.ptr, metadata !46, metadata !DIExpression()), !dbg !75 + tail call void @llvm.masked.store.v8i16.p0(<8 x i16> %11, ptr %pDst.01, i32 2, <8 x i1> %16), !dbg !82 + %add.ptr = getelementptr inbounds i16, ptr %pDst.0, i32 8, !dbg !83 + call void @llvm.dbg.value(metadata ptr %add.ptr, metadata !46, metadata !DIExpression()), !dbg !75 %sub5 = add nsw i32 %dstWidth.0, -8, !dbg !84 call void @llvm.dbg.value(metadata i32 %sub5, metadata !50, metadata !DIExpression()), !dbg !75 %17 = call i32 @llvm.loop.decrement.reg.i32(i32 %9, i32 1), !dbg !85 @@ -99,10 +99,10 @@ br i1 %18, label %do.body, label %do.end, !dbg !85, !llvm.loop !86 do.end: ; preds = %do.body - %add.ptr9 = getelementptr inbounds i16, i16* %pSource.addr.037, i32 %conv8, !dbg !89 - call void @llvm.dbg.value(metadata i16* %add.ptr9, metadata !33, metadata !DIExpression()), !dbg !62 - %add.ptr11 = getelementptr inbounds i16, i16* %pTarget.addr.036, i32 %conv10, !dbg !90 - call void @llvm.dbg.value(metadata i16* %add.ptr11, metadata !35, metadata !DIExpression()), !dbg !62 + %add.ptr9 = getelementptr inbounds i16, ptr %pSource.addr.037, i32 %conv8, !dbg !89 + call void @llvm.dbg.value(metadata ptr %add.ptr9, metadata !33, metadata !DIExpression()), !dbg !62 + %add.ptr11 = getelementptr inbounds i16, ptr %pTarget.addr.036, i32 %conv10, !dbg !90 + call void @llvm.dbg.value(metadata ptr %add.ptr11, metadata !35, metadata !DIExpression()), !dbg !62 %inc = add nuw nsw i32 %y.035, 1, !dbg !91 call void @llvm.dbg.value(metadata i32 %inc, metadata !43, metadata !DIExpression()), !dbg !63 %exitcond.not = icmp eq i32 %inc, %conv1, !dbg !69 @@ -111,9 +111,9 @@ declare { <8 x i16>, i32 } @llvm.arm.mve.vddup.v8i16(i32, i32) declare <8 x i1> @llvm.arm.mve.vctp16(i32) - declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16*, <8 x i16>, i32, i32, i32, <8 x i1>) + declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0.v8i16.v8i1(ptr, <8 x i16>, i32, i32, i32, <8 x i1>) declare { <8 x i16>, i32 } @llvm.arm.mve.vddup.predicated.v8i16.v8i1(<8 x i16>, i32, i32, <8 x i1>) - declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>) + declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32 immarg, <8 x i1>) declare void @llvm.dbg.value(metadata, metadata, metadata) declare i32 @llvm.smin.i32(i32, i32) declare i32 @llvm.start.loop.iterations.i32(i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wls-revert-placement.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wls-revert-placement.mir index b6e5ca3..f1a40dd 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wls-revert-placement.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wls-revert-placement.mir @@ -48,8 +48,8 @@ for.body.lr.ph.split.us.split.us: ; preds = %for.body.lr.ph.split.us %2 = sext i32 %cond11 to i64 %const19 = bitcast i32 1514690832 to i32 - store i64 %2, i64* @var_76, align 8 - store i8 %frombool, i8* @var_77, align 1 + store i64 %2, ptr @var_76, align 8 + store i8 %frombool, ptr @var_77, align 1 %3 = add i32 %i, %const19 %4 = add nsw i32 %k, -1 %xtraiter154 = and i32 %k, 3 @@ -61,19 +61,19 @@ br label %for.body.us.us for.body.us.us: ; preds = %for.body.us.us, %for.body.lr.ph.split.us.split.us.new - %lsr.iv = phi [12 x [12 x i8]]* [ %6, %for.body.us.us ], [ bitcast (i8* getelementptr inbounds ([12 x [12 x i8]], [12 x [12 x i8]]* @arr_163, i32 0, i32 2, i32 0) to [12 x [12 x i8]]*), %for.body.lr.ph.split.us.split.us.new ] + %lsr.iv = phi ptr [ %6, %for.body.us.us ], [ getelementptr inbounds ([12 x [12 x i8]], ptr @arr_163, i32 0, i32 2, i32 0), %for.body.lr.ph.split.us.split.us.new ] %ac.085.us.us = phi i32 [ 0, %for.body.lr.ph.split.us.split.us.new ], [ %add43.us.us.3, %for.body.us.us ] - %lsr.iv3 = bitcast [12 x [12 x i8]]* %lsr.iv to i8* - %scevgep6 = getelementptr i8, i8* %lsr.iv3, i32 -24 - call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep6, i8 %conv29, i32 %3, i1 false) - %scevgep5 = getelementptr i8, i8* %lsr.iv3, i32 -12 - call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep5, i8 %conv29, i32 %3, i1 false) - call void @llvm.memset.p0i8.i32(i8* align 1 %lsr.iv3, i8 %conv29, i32 %3, i1 false) - %scevgep4 = getelementptr i8, i8* %lsr.iv3, i32 12 - call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep4, i8 %conv29, i32 %3, i1 false) + %lsr.iv3 = bitcast ptr %lsr.iv to ptr + %scevgep6 = getelementptr i8, ptr %lsr.iv3, i32 -24 + call void @llvm.memset.p0.i32(ptr align 1 %scevgep6, i8 %conv29, i32 %3, i1 false) + %scevgep5 = getelementptr i8, ptr %lsr.iv3, i32 -12 + call void @llvm.memset.p0.i32(ptr align 1 %scevgep5, i8 %conv29, i32 %3, i1 false) + call void @llvm.memset.p0.i32(ptr align 1 %lsr.iv3, i8 %conv29, i32 %3, i1 false) + %scevgep4 = getelementptr i8, ptr %lsr.iv3, i32 12 + call void @llvm.memset.p0.i32(ptr align 1 %scevgep4, i8 %conv29, i32 %3, i1 false) %add43.us.us.3 = add nuw i32 %ac.085.us.us, 4 - %scevgep2 = getelementptr [12 x [12 x i8]], [12 x [12 x i8]]* %lsr.iv, i32 0, i32 4, i32 0 - %6 = bitcast i8* %scevgep2 to [12 x [12 x i8]]* + %scevgep2 = getelementptr [12 x [12 x i8]], ptr %lsr.iv, i32 0, i32 4, i32 0 + %6 = bitcast ptr %scevgep2 to ptr %niter159.ncmp.3 = icmp eq i32 %unroll_iter158, %add43.us.us.3 br i1 %niter159.ncmp.3, label %for.cond.for.cond45.preheader_crit_edge.loopexit135.unr-lcssa, label %for.body.us.us @@ -102,36 +102,36 @@ br label %for.body.us.us115 for.body.us.us115: ; preds = %for.body.us.us115, %for.body.us.us115.preheader.new - %lsr.iv7 = phi [12 x [12 x i8]]* [ %12, %for.body.us.us115 ], [ bitcast (i8* getelementptr inbounds ([12 x [12 x i8]], [12 x [12 x i8]]* @arr_163, i32 0, i32 2, i32 0) to [12 x [12 x i8]]*), %for.body.us.us115.preheader.new ] + %lsr.iv7 = phi ptr [ %12, %for.body.us.us115 ], [ getelementptr inbounds ([12 x [12 x i8]], ptr @arr_163, i32 0, i32 2, i32 0), %for.body.us.us115.preheader.new ] %ac.085.us.us116 = phi i32 [ 0, %for.body.us.us115.preheader.new ], [ %add43.us.us120.3, %for.body.us.us115 ] - %lsr.iv79 = bitcast [12 x [12 x i8]]* %lsr.iv7 to i8* - %scevgep12 = getelementptr i8, i8* %lsr.iv79, i32 -24 - call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep12, i8 %conv29, i32 %8, i1 false) - %scevgep11 = getelementptr i8, i8* %lsr.iv79, i32 -12 - call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep11, i8 %conv29, i32 %8, i1 false) - call void @llvm.memset.p0i8.i32(i8* align 1 %lsr.iv79, i8 %conv29, i32 %8, i1 false) - %scevgep10 = getelementptr i8, i8* %lsr.iv79, i32 12 - call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep10, i8 %conv29, i32 %8, i1 false) + %lsr.iv79 = bitcast ptr %lsr.iv7 to ptr + %scevgep12 = getelementptr i8, ptr %lsr.iv79, i32 -24 + call void @llvm.memset.p0.i32(ptr align 1 %scevgep12, i8 %conv29, i32 %8, i1 false) + %scevgep11 = getelementptr i8, ptr %lsr.iv79, i32 -12 + call void @llvm.memset.p0.i32(ptr align 1 %scevgep11, i8 %conv29, i32 %8, i1 false) + call void @llvm.memset.p0.i32(ptr align 1 %lsr.iv79, i8 %conv29, i32 %8, i1 false) + %scevgep10 = getelementptr i8, ptr %lsr.iv79, i32 12 + call void @llvm.memset.p0.i32(ptr align 1 %scevgep10, i8 %conv29, i32 %8, i1 false) %add43.us.us120.3 = add nuw i32 %ac.085.us.us116, 4 - %scevgep8 = getelementptr [12 x [12 x i8]], [12 x [12 x i8]]* %lsr.iv7, i32 0, i32 4, i32 0 - %12 = bitcast i8* %scevgep8 to [12 x [12 x i8]]* + %scevgep8 = getelementptr [12 x [12 x i8]], ptr %lsr.iv7, i32 0, i32 4, i32 0 + %12 = bitcast ptr %scevgep8 to ptr %niter153.ncmp.3 = icmp eq i32 %unroll_iter152, %add43.us.us120.3 br i1 %niter153.ncmp.3, label %for.cond.for.cond45.preheader_crit_edge.loopexit.unr-lcssa, label %for.body.us.us115 for.body.us: ; preds = %for.body.us, %for.body.us.preheader.new - %lsr.iv13 = phi [12 x [12 x i8]]* [ %13, %for.body.us ], [ bitcast (i8* getelementptr inbounds ([12 x [12 x i8]], [12 x [12 x i8]]* @arr_163, i32 0, i32 2, i32 0) to [12 x [12 x i8]]*), %for.body.us.preheader.new ] + %lsr.iv13 = phi ptr [ %13, %for.body.us ], [ getelementptr inbounds ([12 x [12 x i8]], ptr @arr_163, i32 0, i32 2, i32 0), %for.body.us.preheader.new ] %ac.085.us = phi i32 [ 0, %for.body.us.preheader.new ], [ %add43.us.3, %for.body.us ] - %lsr.iv1315 = bitcast [12 x [12 x i8]]* %lsr.iv13 to i8* - %scevgep18 = getelementptr i8, i8* %lsr.iv1315, i32 -24 - call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep18, i8 %conv29, i32 %8, i1 false) - %scevgep17 = getelementptr i8, i8* %lsr.iv1315, i32 -12 - call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep17, i8 %conv29, i32 %8, i1 false) - call void @llvm.memset.p0i8.i32(i8* align 1 %lsr.iv1315, i8 %conv29, i32 %8, i1 false) - %scevgep16 = getelementptr i8, i8* %lsr.iv1315, i32 12 - call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep16, i8 %conv29, i32 %8, i1 false) + %lsr.iv1315 = bitcast ptr %lsr.iv13 to ptr + %scevgep18 = getelementptr i8, ptr %lsr.iv1315, i32 -24 + call void @llvm.memset.p0.i32(ptr align 1 %scevgep18, i8 %conv29, i32 %8, i1 false) + %scevgep17 = getelementptr i8, ptr %lsr.iv1315, i32 -12 + call void @llvm.memset.p0.i32(ptr align 1 %scevgep17, i8 %conv29, i32 %8, i1 false) + call void @llvm.memset.p0.i32(ptr align 1 %lsr.iv1315, i8 %conv29, i32 %8, i1 false) + %scevgep16 = getelementptr i8, ptr %lsr.iv1315, i32 12 + call void @llvm.memset.p0.i32(ptr align 1 %scevgep16, i8 %conv29, i32 %8, i1 false) %add43.us.3 = add nuw i32 %ac.085.us, 4 - %scevgep14 = getelementptr [12 x [12 x i8]], [12 x [12 x i8]]* %lsr.iv13, i32 0, i32 4, i32 0 - %13 = bitcast i8* %scevgep14 to [12 x [12 x i8]]* + %scevgep14 = getelementptr [12 x [12 x i8]], ptr %lsr.iv13, i32 0, i32 4, i32 0 + %13 = bitcast ptr %scevgep14 to ptr %niter.ncmp.3 = icmp eq i32 %unroll_iter, %add43.us.3 br i1 %niter.ncmp.3, label %for.cond.for.cond45.preheader_crit_edge.loopexit147.unr-lcssa, label %for.body.us @@ -142,8 +142,8 @@ for.body.lr.ph.split.split.us: ; preds = %for.body.lr.ph.split %15 = icmp eq i32 %m, 0 %16 = sext i32 %cond11 to i64 - store i64 %16, i64* @var_76, align 8 - store i8 %frombool, i8* @var_77, align 1 + store i64 %16, ptr @var_76, align 8 + store i8 %frombool, ptr @var_77, align 1 %spec.select = select i1 %15, i32 %lor.ext, i32 %conv36 br label %for.cond.for.cond45.preheader_crit_edge @@ -158,8 +158,8 @@ br i1 %lcmp.mod157.not, label %for.cond.for.cond45.preheader_crit_edge.loopexit135, label %for.body.us.us.epil for.body.us.us.epil: ; preds = %for.cond.for.cond45.preheader_crit_edge.loopexit135.unr-lcssa - %scevgep140.epil = getelementptr [12 x [12 x i8]], [12 x [12 x i8]]* @arr_163, i32 0, i32 %ac.085.us.us.unr, i32 0 - call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep140.epil, i8 %conv29, i32 %3, i1 false) + %scevgep140.epil = getelementptr [12 x [12 x i8]], ptr @arr_163, i32 0, i32 %ac.085.us.us.unr, i32 0 + call void @llvm.memset.p0.i32(ptr align 1 %scevgep140.epil, i8 %conv29, i32 %3, i1 false) %epil.iter.cmp156.not = icmp eq i32 %xtraiter154, 1 br i1 %epil.iter.cmp156.not, label %for.cond.for.cond45.preheader_crit_edge.loopexit135, label %for.body.us.us.epil.1 @@ -174,8 +174,8 @@ br i1 %lcmp.mod151.not, label %for.cond.for.cond45.preheader_crit_edge, label %for.body.us.us115.epil for.body.us.us115.epil: ; preds = %for.cond.for.cond45.preheader_crit_edge.loopexit.unr-lcssa - %scevgep138.epil = getelementptr [12 x [12 x i8]], [12 x [12 x i8]]* @arr_163, i32 0, i32 %ac.085.us.us116.unr, i32 0 - call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep138.epil, i8 %conv29, i32 %8, i1 false) + %scevgep138.epil = getelementptr [12 x [12 x i8]], ptr @arr_163, i32 0, i32 %ac.085.us.us116.unr, i32 0 + call void @llvm.memset.p0.i32(ptr align 1 %scevgep138.epil, i8 %conv29, i32 %8, i1 false) %epil.iter.cmp150.not = icmp eq i32 %xtraiter148, 1 br i1 %epil.iter.cmp150.not, label %for.cond.for.cond45.preheader_crit_edge, label %for.body.us.us115.epil.1 @@ -185,14 +185,14 @@ br i1 %lcmp.mod.not, label %for.cond.for.cond45.preheader_crit_edge, label %for.body.us.epil for.body.us.epil: ; preds = %for.cond.for.cond45.preheader_crit_edge.loopexit147.unr-lcssa - %scevgep.epil = getelementptr [12 x [12 x i8]], [12 x [12 x i8]]* @arr_163, i32 0, i32 %ac.085.us.unr, i32 0 - call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep.epil, i8 %conv29, i32 %8, i1 false) + %scevgep.epil = getelementptr [12 x [12 x i8]], ptr @arr_163, i32 0, i32 %ac.085.us.unr, i32 0 + call void @llvm.memset.p0.i32(ptr align 1 %scevgep.epil, i8 %conv29, i32 %8, i1 false) %epil.iter.cmp.not = icmp eq i32 %xtraiter148, 1 br i1 %epil.iter.cmp.not, label %for.cond.for.cond45.preheader_crit_edge, label %for.body.us.epil.1 for.cond.for.cond45.preheader_crit_edge: ; preds = %for.body.us.us115.epil.2, %for.body.us.us115.epil.1, %for.body.us.epil.2, %for.body.us.epil.1, %for.body.us.epil, %for.cond.for.cond45.preheader_crit_edge.loopexit147.unr-lcssa, %for.body.us.us115.epil, %for.cond.for.cond45.preheader_crit_edge.loopexit.unr-lcssa, %for.cond.for.cond45.preheader_crit_edge.loopexit135, %for.body.lr.ph.split.split, %for.body.lr.ph.split.split.us %.us-phi = phi i32 [ %cond41.us.us, %for.cond.for.cond45.preheader_crit_edge.loopexit135 ], [ %spec.select, %for.body.lr.ph.split.split.us ], [ %spec.select143, %for.body.lr.ph.split.split ], [ %lor.ext, %for.body.us.us115.epil ], [ %lor.ext, %for.body.us.us115.epil.1 ], [ %lor.ext, %for.body.us.us115.epil.2 ], [ %lor.ext, %for.cond.for.cond45.preheader_crit_edge.loopexit.unr-lcssa ], [ %conv36, %for.body.us.epil ], [ %conv36, %for.body.us.epil.1 ], [ %conv36, %for.body.us.epil.2 ], [ %conv36, %for.cond.for.cond45.preheader_crit_edge.loopexit147.unr-lcssa ] - store i32 %.us-phi, i32* @var_81, align 4 + store i32 %.us-phi, ptr @var_81, align 4 br label %for.cond45.preheader for.cond45.preheader: ; preds = %for.cond.for.cond45.preheader_crit_edge, %entry @@ -216,69 +216,69 @@ %tobool47.not = icmp eq i32 %conv46, 0 %cond51 = select i1 %tobool47.not, i32 %l, i32 %h %idxprom = and i32 %cond51, 255 - %arrayidx59 = getelementptr inbounds [22 x i8], [22 x i8]* @arr_239, i32 0, i32 %idxprom - store i8 %conv58, i8* %arrayidx59, align 1 + %arrayidx59 = getelementptr inbounds [22 x i8], ptr @arr_239, i32 0, i32 %idxprom + store i8 %conv58, ptr %arrayidx59, align 1 %conv46.1 = and i32 %cond51, 255 %tobool47.not.1 = icmp eq i32 %conv46.1, 0 %cond51.1 = select i1 %tobool47.not.1, i32 %l, i32 %h %idxprom.1 = and i32 %cond51.1, 255 - %arrayidx59.1 = getelementptr inbounds [22 x i8], [22 x i8]* @arr_239, i32 0, i32 %idxprom.1 - store i8 %conv58, i8* %arrayidx59.1, align 1 + %arrayidx59.1 = getelementptr inbounds [22 x i8], ptr @arr_239, i32 0, i32 %idxprom.1 + store i8 %conv58, ptr %arrayidx59.1, align 1 %conv46.2 = and i32 %cond51.1, 255 %tobool47.not.2 = icmp eq i32 %conv46.2, 0 %cond51.2 = select i1 %tobool47.not.2, i32 %l, i32 %h %idxprom.2 = and i32 %cond51.2, 255 - %arrayidx59.2 = getelementptr inbounds [22 x i8], [22 x i8]* @arr_239, i32 0, i32 %idxprom.2 - store i8 %conv58, i8* %arrayidx59.2, align 1 + %arrayidx59.2 = getelementptr inbounds [22 x i8], ptr @arr_239, i32 0, i32 %idxprom.2 + store i8 %conv58, ptr %arrayidx59.2, align 1 %conv46.3 = and i32 %cond51.2, 255 %tobool47.not.3 = icmp eq i32 %conv46.3, 0 %cond51.3 = select i1 %tobool47.not.3, i32 %l, i32 %h %idxprom.3 = and i32 %cond51.3, 255 - %arrayidx59.3 = getelementptr inbounds [22 x i8], [22 x i8]* @arr_239, i32 0, i32 %idxprom.3 - store i8 %conv58, i8* %arrayidx59.3, align 1 + %arrayidx59.3 = getelementptr inbounds [22 x i8], ptr @arr_239, i32 0, i32 %idxprom.3 + store i8 %conv58, ptr %arrayidx59.3, align 1 br label %for.cond45 for.body.us.epil.1: ; preds = %for.body.us.epil %add43.us.epil = add nuw nsw i32 %ac.085.us.unr, 1 - %scevgep.epil.1 = getelementptr [12 x [12 x i8]], [12 x [12 x i8]]* @arr_163, i32 0, i32 %add43.us.epil, i32 0 - call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep.epil.1, i8 %conv29, i32 %8, i1 false) + %scevgep.epil.1 = getelementptr [12 x [12 x i8]], ptr @arr_163, i32 0, i32 %add43.us.epil, i32 0 + call void @llvm.memset.p0.i32(ptr align 1 %scevgep.epil.1, i8 %conv29, i32 %8, i1 false) %epil.iter.cmp.1.not = icmp eq i32 %xtraiter148, 2 br i1 %epil.iter.cmp.1.not, label %for.cond.for.cond45.preheader_crit_edge, label %for.body.us.epil.2 for.body.us.epil.2: ; preds = %for.body.us.epil.1 %add43.us.epil.1 = add nuw nsw i32 %ac.085.us.unr, 2 - %scevgep.epil.2 = getelementptr [12 x [12 x i8]], [12 x [12 x i8]]* @arr_163, i32 0, i32 %add43.us.epil.1, i32 0 - call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep.epil.2, i8 %conv29, i32 %8, i1 false) + %scevgep.epil.2 = getelementptr [12 x [12 x i8]], ptr @arr_163, i32 0, i32 %add43.us.epil.1, i32 0 + call void @llvm.memset.p0.i32(ptr align 1 %scevgep.epil.2, i8 %conv29, i32 %8, i1 false) br label %for.cond.for.cond45.preheader_crit_edge for.body.us.us115.epil.1: ; preds = %for.body.us.us115.epil %add43.us.us120.epil = add nuw nsw i32 %ac.085.us.us116.unr, 1 - %scevgep138.epil.1 = getelementptr [12 x [12 x i8]], [12 x [12 x i8]]* @arr_163, i32 0, i32 %add43.us.us120.epil, i32 0 - call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep138.epil.1, i8 %conv29, i32 %8, i1 false) + %scevgep138.epil.1 = getelementptr [12 x [12 x i8]], ptr @arr_163, i32 0, i32 %add43.us.us120.epil, i32 0 + call void @llvm.memset.p0.i32(ptr align 1 %scevgep138.epil.1, i8 %conv29, i32 %8, i1 false) %epil.iter.cmp150.1.not = icmp eq i32 %xtraiter148, 2 br i1 %epil.iter.cmp150.1.not, label %for.cond.for.cond45.preheader_crit_edge, label %for.body.us.us115.epil.2 for.body.us.us115.epil.2: ; preds = %for.body.us.us115.epil.1 %add43.us.us120.epil.1 = add nuw nsw i32 %ac.085.us.us116.unr, 2 - %scevgep138.epil.2 = getelementptr [12 x [12 x i8]], [12 x [12 x i8]]* @arr_163, i32 0, i32 %add43.us.us120.epil.1, i32 0 - call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep138.epil.2, i8 %conv29, i32 %8, i1 false) + %scevgep138.epil.2 = getelementptr [12 x [12 x i8]], ptr @arr_163, i32 0, i32 %add43.us.us120.epil.1, i32 0 + call void @llvm.memset.p0.i32(ptr align 1 %scevgep138.epil.2, i8 %conv29, i32 %8, i1 false) br label %for.cond.for.cond45.preheader_crit_edge for.body.us.us.epil.1: ; preds = %for.body.us.us.epil %add43.us.us.epil = add nuw nsw i32 %ac.085.us.us.unr, 1 - %scevgep140.epil.1 = getelementptr [12 x [12 x i8]], [12 x [12 x i8]]* @arr_163, i32 0, i32 %add43.us.us.epil, i32 0 - call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep140.epil.1, i8 %conv29, i32 %3, i1 false) + %scevgep140.epil.1 = getelementptr [12 x [12 x i8]], ptr @arr_163, i32 0, i32 %add43.us.us.epil, i32 0 + call void @llvm.memset.p0.i32(ptr align 1 %scevgep140.epil.1, i8 %conv29, i32 %3, i1 false) %epil.iter.cmp156.1.not = icmp eq i32 %xtraiter154, 2 br i1 %epil.iter.cmp156.1.not, label %for.cond.for.cond45.preheader_crit_edge.loopexit135, label %for.body.us.us.epil.2 for.body.us.us.epil.2: ; preds = %for.body.us.us.epil.1 %add43.us.us.epil.1 = add nuw nsw i32 %ac.085.us.us.unr, 2 - %scevgep140.epil.2 = getelementptr [12 x [12 x i8]], [12 x [12 x i8]]* @arr_163, i32 0, i32 %add43.us.us.epil.1, i32 0 - call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep140.epil.2, i8 %conv29, i32 %3, i1 false) + %scevgep140.epil.2 = getelementptr [12 x [12 x i8]], ptr @arr_163, i32 0, i32 %add43.us.us.epil.1, i32 0 + call void @llvm.memset.p0.i32(ptr align 1 %scevgep140.epil.2, i8 %conv29, i32 %3, i1 false) br label %for.cond.for.cond45.preheader_crit_edge.loopexit135 } - declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1 immarg) + declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1 immarg) ... --- diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir index b5998e3..8076caa 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s --- | - define dso_local arm_aapcs_vfpcc void @test_wlstp8(i8* noalias nocapture %a, i8* noalias nocapture readonly %b, i8* noalias nocapture readonly %c, i32 %N) { + define dso_local arm_aapcs_vfpcc void @test_wlstp8(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %c, i32 %N) { entry: %0 = add i32 %N, 15 %1 = lshr i32 %0, 4 @@ -22,16 +22,16 @@ %5 = phi i32 [ %N, %vector.ph ], [ %7, %vector.body ] %6 = call <16 x i1> @llvm.arm.vctp8(i32 %5) %7 = sub i32 %5, 16 - %scevgep4 = getelementptr i8, i8* %b, i32 %index - %scevgep45 = bitcast i8* %scevgep4 to <16 x i8>* - %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %scevgep45, i32 1, <16 x i1> %6, <16 x i8> undef) - %scevgep2 = getelementptr i8, i8* %c, i32 %index - %scevgep23 = bitcast i8* %scevgep2 to <16 x i8>* - %wide.masked.load14 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %scevgep23, i32 1, <16 x i1> %6, <16 x i8> undef) + %scevgep4 = getelementptr i8, ptr %b, i32 %index + %scevgep45 = bitcast ptr %scevgep4 to ptr + %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %scevgep45, i32 1, <16 x i1> %6, <16 x i8> undef) + %scevgep2 = getelementptr i8, ptr %c, i32 %index + %scevgep23 = bitcast ptr %scevgep2 to ptr + %wide.masked.load14 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %scevgep23, i32 1, <16 x i1> %6, <16 x i8> undef) %tmp5 = mul <16 x i8> %wide.masked.load14, %wide.masked.load - %scevgep = getelementptr i8, i8* %a, i32 %index - %scevgep1 = bitcast i8* %scevgep to <16 x i8>* - call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %tmp5, <16 x i8>* %scevgep1, i32 1, <16 x i1> %6) + %scevgep = getelementptr i8, ptr %a, i32 %index + %scevgep1 = bitcast ptr %scevgep to ptr + call void @llvm.masked.store.v16i8.p0(<16 x i8> %tmp5, ptr %scevgep1, i32 1, <16 x i1> %6) %index.next = add i32 %index, 16 %loop.dec = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %count, i32 1) %tmp8 = icmp eq i32 %loop.dec, 0 @@ -41,7 +41,7 @@ ret void } - define dso_local arm_aapcs_vfpcc void @test_wlstp16(i16* noalias nocapture %a, i16* noalias nocapture readonly %b, i16* noalias nocapture readonly %c, i32 %N) { + define dso_local arm_aapcs_vfpcc void @test_wlstp16(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %c, i32 %N) { entry: %0 = add i32 %N, 7 %1 = lshr i32 %0, 3 @@ -56,32 +56,32 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv5 = phi i16* [ %scevgep6, %vector.body ], [ %b, %vector.ph ] - %lsr.iv2 = phi i16* [ %scevgep3, %vector.body ], [ %c, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv5 = phi ptr [ %scevgep6, %vector.body ], [ %b, %vector.ph ] + %lsr.iv2 = phi ptr [ %scevgep3, %vector.body ], [ %c, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %count = phi i32 [ %n.vec, %vector.ph ], [ %loop.dec, %vector.body ] %5 = phi i32 [ %N, %vector.ph ], [ %7, %vector.body ] - %lsr.iv57 = bitcast i16* %lsr.iv5 to <8 x i16>* - %lsr.iv24 = bitcast i16* %lsr.iv2 to <8 x i16>* - %lsr.iv1 = bitcast i16* %lsr.iv to <8 x i16>* + %lsr.iv57 = bitcast ptr %lsr.iv5 to ptr + %lsr.iv24 = bitcast ptr %lsr.iv2 to ptr + %lsr.iv1 = bitcast ptr %lsr.iv to ptr %6 = call <8 x i1> @llvm.arm.vctp16(i32 %5) %7 = sub i32 %5, 8 - %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv57, i32 2, <8 x i1> %6, <8 x i16> undef) - %wide.masked.load14 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv24, i32 2, <8 x i1> %6, <8 x i16> undef) + %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %lsr.iv57, i32 2, <8 x i1> %6, <8 x i16> undef) + %wide.masked.load14 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %lsr.iv24, i32 2, <8 x i1> %6, <8 x i16> undef) %tmp5 = mul <8 x i16> %wide.masked.load14, %wide.masked.load - call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %tmp5, <8 x i16>* %lsr.iv1, i32 2, <8 x i1> %6) + call void @llvm.masked.store.v8i16.p0(<8 x i16> %tmp5, ptr %lsr.iv1, i32 2, <8 x i1> %6) %loop.dec = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %count, i32 1) %tmp8 = icmp eq i32 %loop.dec, 0 - %scevgep = getelementptr i16, i16* %lsr.iv, i32 8 - %scevgep3 = getelementptr i16, i16* %lsr.iv2, i32 8 - %scevgep6 = getelementptr i16, i16* %lsr.iv5, i32 8 + %scevgep = getelementptr i16, ptr %lsr.iv, i32 8 + %scevgep3 = getelementptr i16, ptr %lsr.iv2, i32 8 + %scevgep6 = getelementptr i16, ptr %lsr.iv5, i32 8 br i1 %tmp8, label %for.cond.cleanup, label %vector.body for.cond.cleanup: ; preds = %vector.body, %entry ret void } - define dso_local i32 @test_wlstp32(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32 %N) { + define dso_local i32 @test_wlstp32(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i32 %N) { entry: %0 = add i32 %N, 3 %1 = lshr i32 %0, 2 @@ -96,23 +96,23 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv2 = phi i32* [ %scevgep3, %vector.body ], [ %a, %vector.ph ] - %lsr.iv = phi i32* [ %scevgep, %vector.body ], [ %b, %vector.ph ] + %lsr.iv2 = phi ptr [ %scevgep3, %vector.body ], [ %a, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %b, %vector.ph ] %count = phi i32 [ %n.vec, %vector.ph ], [ %loop.dec, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp6, %vector.body ] %5 = phi i32 [ %N, %vector.ph ], [ %7, %vector.body ] - %lsr.iv24 = bitcast i32* %lsr.iv2 to <4 x i32>* - %lsr.iv1 = bitcast i32* %lsr.iv to <4 x i32>* + %lsr.iv24 = bitcast ptr %lsr.iv2 to ptr + %lsr.iv1 = bitcast ptr %lsr.iv to ptr %6 = call <4 x i1> @llvm.arm.vctp32(i32 %5) %7 = sub i32 %5, 4 - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv24, i32 4, <4 x i1> %6, <4 x i32> undef) - %wide.masked.load13 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %lsr.iv1, i32 4, <4 x i1> %6, <4 x i32> undef) + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv24, i32 4, <4 x i1> %6, <4 x i32> undef) + %wide.masked.load13 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv1, i32 4, <4 x i1> %6, <4 x i32> undef) %tmp5 = mul nsw <4 x i32> %wide.masked.load13, %wide.masked.load %tmp6 = add nsw <4 x i32> %tmp5, %vec.phi %loop.dec = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %count, i32 1) %tmp7 = icmp eq i32 %loop.dec, 0 - %scevgep = getelementptr i32, i32* %lsr.iv, i32 4 - %scevgep3 = getelementptr i32, i32* %lsr.iv2, i32 4 + %scevgep = getelementptr i32, ptr %lsr.iv, i32 4 + %scevgep3 = getelementptr i32, ptr %lsr.iv2, i32 4 br i1 %tmp7, label %middle.block, label %vector.body middle.block: ; preds = %vector.body @@ -128,15 +128,15 @@ declare i1 @llvm.test.set.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) - declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) - declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>) - declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) - declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32 immarg, <16 x i1>) - declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>) - declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) + declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32 immarg, <16 x i1>, <16 x i8>) + declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32 immarg, <8 x i1>, <8 x i16>) + declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) + declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32 immarg, <16 x i1>) + declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32 immarg, <8 x i1>) + declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) declare <16 x i1> @llvm.arm.vctp8(i32) - declare void @llvm.stackprotector(i8*, i8**) + declare void @llvm.stackprotector(ptr, ptr) declare <8 x i1> @llvm.arm.vctp16(i32) declare <4 x i1> @llvm.arm.vctp32(i32) ... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-liveout-lsr-shift.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-liveout-lsr-shift.mir index 6d4c644..3e1fd79 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-liveout-lsr-shift.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-liveout-lsr-shift.mir @@ -2,7 +2,7 @@ # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s --- | - define dso_local arm_aapcs_vfpcc signext i16 @wrong_liveout_shift(i8* nocapture readonly %b, i8* nocapture readonly %c, i32 %N) { + define dso_local arm_aapcs_vfpcc signext i16 @wrong_liveout_shift(ptr nocapture readonly %b, ptr nocapture readonly %c, i32 %N) { entry: %cmp11 = icmp eq i32 %N, 0 %0 = add i32 %N, 7 @@ -20,23 +20,23 @@ br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %lsr.iv20 = phi i8* [ %scevgep21, %vector.body ], [ %c, %vector.ph ] - %lsr.iv = phi i8* [ %scevgep, %vector.body ], [ %b, %vector.ph ] + %lsr.iv20 = phi ptr [ %scevgep21, %vector.body ], [ %c, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %b, %vector.ph ] %vec.phi = phi <8 x i16> [ <i16 32767, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, %vector.ph ], [ %15, %vector.body ] %8 = phi i32 [ %start, %vector.ph ], [ %16, %vector.body ] %9 = phi i32 [ %N, %vector.ph ], [ %11, %vector.body ] - %lsr.iv2022 = bitcast i8* %lsr.iv20 to <8 x i8>* - %lsr.iv19 = bitcast i8* %lsr.iv to <8 x i8>* + %lsr.iv2022 = bitcast ptr %lsr.iv20 to ptr + %lsr.iv19 = bitcast ptr %lsr.iv to ptr %10 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %9) %11 = sub i32 %9, 8 - %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %lsr.iv19, i32 1, <8 x i1> %10, <8 x i8> undef) + %wide.masked.load = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %lsr.iv19, i32 1, <8 x i1> %10, <8 x i8> undef) %12 = zext <8 x i8> %wide.masked.load to <8 x i16> - %wide.masked.load16 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %lsr.iv2022, i32 1, <8 x i1> %10, <8 x i8> undef) + %wide.masked.load16 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %lsr.iv2022, i32 1, <8 x i1> %10, <8 x i8> undef) %13 = zext <8 x i8> %wide.masked.load16 to <8 x i16> %14 = mul nuw <8 x i16> %13, %12 %15 = sub <8 x i16> %vec.phi, %14 - %scevgep = getelementptr i8, i8* %lsr.iv, i32 8 - %scevgep21 = getelementptr i8, i8* %lsr.iv20, i32 8 + %scevgep = getelementptr i8, ptr %lsr.iv, i32 8 + %scevgep21 = getelementptr i8, ptr %lsr.iv20, i32 8 %16 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %8, i32 1) %17 = icmp ne i32 %16, 0 br i1 %17, label %vector.body, label %middle.block @@ -53,7 +53,7 @@ %a.0.lcssa = phi i16 [ 32767, %entry ], [ %20, %middle.block ] ret i16 %a.0.lcssa } - declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32 immarg, <8 x i1>, <8 x i8>) + declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32 immarg, <8 x i1>, <8 x i8>) declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) declare i32 @llvm.start.loop.iterations.i32(i32) declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir index 76b08a6..2aab1ed 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-opcode-liveout.mir @@ -5,7 +5,7 @@ # is too complex to process for now. --- | - define dso_local i32 @wrong_vctp_liveout(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %N) local_unnamed_addr #0 { + define dso_local i32 @wrong_vctp_liveout(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr #0 { entry: %cmp9 = icmp eq i32 %N, 0 %tmp = add i32 %N, 3 @@ -22,22 +22,22 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv18 = phi ptr [ %scevgep19, %vector.body ], [ %b, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %tmp13, %vector.body ] %tmp7 = phi i32 [ %N, %vector.ph ], [ %tmp9, %vector.body ] - %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>* - %lsr.iv1820 = bitcast i16* %lsr.iv18 to <4 x i16>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr + %lsr.iv1820 = bitcast ptr %lsr.iv18 to ptr %tmp8 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp7) %tmp9 = sub i32 %tmp7, 4 - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32> - %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef) + %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820, i32 2, <4 x i1> %tmp8, <4 x i16> undef) %tmp11 = sext <4 x i16> %wide.masked.load14 to <4 x i32> %tmp12 = mul nsw <4 x i32> %tmp11, %tmp10 %tmp13 = add <4 x i32> %tmp12, %vec.phi - %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 - %scevgep19 = getelementptr i16, i16* %lsr.iv18, i32 4 + %scevgep = getelementptr i16, ptr %lsr.iv, i32 4 + %scevgep19 = getelementptr i16, ptr %lsr.iv18, i32 4 %tmp14 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %tmp15 = icmp ne i32 %tmp14, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -59,7 +59,7 @@ %res.0.lcssa = phi i32 [ 0, %entry ], [ %tmp18, %middle.block ] ret i32 %res.0.lcssa } - declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1 + declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) #1 declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2 declare i32 @llvm.start.loop.iterations.i32(i32) #3 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir index ae88700..b0a0ccb 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wrong-vctp-operand-liveout.mir @@ -4,7 +4,7 @@ # The VCTP uses r2, which is redefined in the loop. --- | - define dso_local i32 @wrong_vctp_liveout(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %N) local_unnamed_addr #0 { + define dso_local i32 @wrong_vctp_liveout(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N) local_unnamed_addr #0 { entry: %cmp9 = icmp eq i32 %N, 0 %0 = add i32 %N, 3 @@ -21,22 +21,22 @@ vector.body: ; preds = %vector.body, %vector.ph %lsr.iv1 = phi i32 [ %lsr.iv.next, %vector.body ], [ %start, %vector.ph ] - %lsr.iv18 = phi i16* [ %scevgep19, %vector.body ], [ %b, %vector.ph ] - %lsr.iv = phi i16* [ %scevgep, %vector.body ], [ %a, %vector.ph ] + %lsr.iv18 = phi ptr [ %scevgep19, %vector.body ], [ %b, %vector.ph ] + %lsr.iv = phi ptr [ %scevgep, %vector.body ], [ %a, %vector.ph ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %12, %vector.body ] %6 = phi i32 [ %N, %vector.ph ], [ %8, %vector.body ] - %lsr.iv17 = bitcast i16* %lsr.iv to <4 x i16>* - %lsr.iv1820 = bitcast i16* %lsr.iv18 to <4 x i16>* + %lsr.iv17 = bitcast ptr %lsr.iv to ptr + %lsr.iv1820 = bitcast ptr %lsr.iv18 to ptr %7 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %6) %8 = sub i32 %6, 4 - %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %7, <4 x i16> undef) + %wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv17, i32 2, <4 x i1> %7, <4 x i16> undef) %9 = sext <4 x i16> %wide.masked.load to <4 x i32> - %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv1820, i32 2, <4 x i1> %7, <4 x i16> undef) + %wide.masked.load14 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %lsr.iv1820, i32 2, <4 x i1> %7, <4 x i16> undef) %10 = sext <4 x i16> %wide.masked.load14 to <4 x i32> %11 = mul nsw <4 x i32> %10, %9 %12 = add <4 x i32> %11, %vec.phi - %scevgep = getelementptr i16, i16* %lsr.iv, i32 4 - %scevgep19 = getelementptr i16, i16* %lsr.iv18, i32 4 + %scevgep = getelementptr i16, ptr %lsr.iv, i32 4 + %scevgep19 = getelementptr i16, ptr %lsr.iv18, i32 4 %13 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1) %14 = icmp ne i32 %13, 0 %lsr.iv.next = add nsw i32 %lsr.iv1, -1 @@ -52,7 +52,7 @@ %res.0.lcssa = phi i32 [ 0, %entry ], [ %17, %middle.block ] ret i32 %res.0.lcssa } - declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1 + declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) #1 declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2 declare i32 @llvm.start.loop.iterations.i32(i32) #3 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3 diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll b/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll index cdfd2c8..b91800c 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll @@ -3,7 +3,7 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" -define arm_aapcs_vfpcc void @push_out_add_sub_block(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) { +define arm_aapcs_vfpcc void @push_out_add_sub_block(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec) { ; CHECK-LABEL: @push_out_add_sub_block( ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: [[PUSHEDOUTADD:%.*]] = add <4 x i32> <i32 0, i32 2, i32 4, i32 6>, <i32 6, i32 6, i32 6, i32 6> @@ -45,11 +45,11 @@ vector.body: ; preds = %vector.body, %vecto lower.block: ; preds = %vector.body %1 = add <4 x i32> %vec.ind, <i32 6, i32 6, i32 6, i32 6> - %2 = getelementptr inbounds i32, i32* %data, <4 x i32> %1 - %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) - %3 = getelementptr inbounds i32, i32* %dst, i32 %index - %4 = bitcast i32* %3 to <4 x i32>* - store <4 x i32> %wide.masked.gather, <4 x i32>* %4, align 4 + %2 = getelementptr inbounds i32, ptr %data, <4 x i32> %1 + %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) + %3 = getelementptr inbounds i32, ptr %dst, i32 %index + %4 = bitcast ptr %3 to ptr + store <4 x i32> %wide.masked.gather, ptr %4, align 4 %index.next = add i32 %index, 4 %vec.ind.next = add <4 x i32> %vec.ind, <i32 8, i32 8, i32 8, i32 8> br label %vector.body.end @@ -62,7 +62,7 @@ end: ret void; } -define arm_aapcs_vfpcc void @push_out_add_sub_block_commutedphi(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) { +define arm_aapcs_vfpcc void @push_out_add_sub_block_commutedphi(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec) { ; CHECK-LABEL: @push_out_add_sub_block_commutedphi( ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: [[PUSHEDOUTADD:%.*]] = add <4 x i32> <i32 0, i32 2, i32 4, i32 6>, <i32 6, i32 6, i32 6, i32 6> @@ -104,11 +104,11 @@ vector.body: ; preds = %vector.body, %vecto lower.block: ; preds = %vector.body %1 = add <4 x i32> %vec.ind, <i32 6, i32 6, i32 6, i32 6> - %2 = getelementptr inbounds i32, i32* %data, <4 x i32> %1 - %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) - %3 = getelementptr inbounds i32, i32* %dst, i32 %index - %4 = bitcast i32* %3 to <4 x i32>* - store <4 x i32> %wide.masked.gather, <4 x i32>* %4, align 4 + %2 = getelementptr inbounds i32, ptr %data, <4 x i32> %1 + %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) + %3 = getelementptr inbounds i32, ptr %dst, i32 %index + %4 = bitcast ptr %3 to ptr + store <4 x i32> %wide.masked.gather, ptr %4, align 4 %index.next = add i32 %index, 4 %vec.ind.next = add <4 x i32> %vec.ind, <i32 8, i32 8, i32 8, i32 8> br label %vector.body.end @@ -121,7 +121,7 @@ end: ret void; } -define arm_aapcs_vfpcc void @push_out_mul_sub_block(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) { +define arm_aapcs_vfpcc void @push_out_mul_sub_block(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec) { ; CHECK-LABEL: @push_out_mul_sub_block( ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: [[PUSHEDOUTMUL:%.*]] = mul <4 x i32> <i32 0, i32 2, i32 4, i32 6>, <i32 3, i32 3, i32 3, i32 3> @@ -166,11 +166,11 @@ vector.body: ; preds = %vector.body, %vecto lower.block: ; preds = %vector.body %1 = mul <4 x i32> %vec.ind, <i32 3, i32 3, i32 3, i32 3> %2 = add <4 x i32> %1, <i32 6, i32 6, i32 6, i32 6> - %3 = getelementptr inbounds i32, i32* %data, <4 x i32> %2 - %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %3, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) - %4 = getelementptr inbounds i32, i32* %dst, i32 %index - %5 = bitcast i32* %4 to <4 x i32>* - store <4 x i32> %wide.masked.gather, <4 x i32>* %5, align 4 + %3 = getelementptr inbounds i32, ptr %data, <4 x i32> %2 + %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %3, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) + %4 = getelementptr inbounds i32, ptr %dst, i32 %index + %5 = bitcast ptr %4 to ptr + store <4 x i32> %wide.masked.gather, ptr %5, align 4 %index.next = add i32 %index, 4 %vec.ind.next = add <4 x i32> %vec.ind, <i32 8, i32 8, i32 8, i32 8> br label %vector.body.end @@ -184,7 +184,7 @@ end: } -define arm_aapcs_vfpcc void @push_out_mul_sub_loop(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) { +define arm_aapcs_vfpcc void @push_out_mul_sub_loop(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec) { ; CHECK-LABEL: @push_out_mul_sub_loop( ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -232,11 +232,11 @@ vector.2.ph: vector.2.body: ; preds = %vector.body %0 = mul <4 x i32> %vec.ind, <i32 3, i32 3, i32 3, i32 3> %1 = add <4 x i32> %0, <i32 6, i32 6, i32 6, i32 6> - %2 = getelementptr inbounds i32, i32* %data, <4 x i32> %1 - %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) - %3 = getelementptr inbounds i32, i32* %dst, i32 %index - %4 = bitcast i32* %3 to <4 x i32>* - store <4 x i32> %wide.masked.gather, <4 x i32>* %4, align 4 + %2 = getelementptr inbounds i32, ptr %data, <4 x i32> %1 + %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) + %3 = getelementptr inbounds i32, ptr %dst, i32 %index + %4 = bitcast ptr %3 to ptr + store <4 x i32> %wide.masked.gather, ptr %4, align 4 br label %vector.2.body.end vector.2.body.end: ; preds = %lower.block @@ -254,7 +254,7 @@ end: ret void; } -define arm_aapcs_vfpcc void @invariant_add(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) { +define arm_aapcs_vfpcc void @invariant_add(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec) { ; CHECK-LABEL: @invariant_add( ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -282,11 +282,11 @@ vector.body: ; preds = %vector.body, %vecto %vec.ind = phi <4 x i32> [ <i32 0, i32 2, i32 4, i32 6>, %vector.ph ], [ %vec.ind.next, %vector.body ] %l0 = mul <4 x i32> %vec.ind, <i32 3, i32 3, i32 3, i32 3> %l1 = add <4 x i32> %l0, %vec.ind - %l2 = getelementptr inbounds i32, i32* %data, <4 x i32> %l1 - %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %l2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) - %l3 = getelementptr inbounds i32, i32* %dst, i32 %index - %l4 = bitcast i32* %l3 to <4 x i32>* - store <4 x i32> %wide.masked.gather, <4 x i32>* %l4, align 4 + %l2 = getelementptr inbounds i32, ptr %data, <4 x i32> %l1 + %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %l2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) + %l3 = getelementptr inbounds i32, ptr %dst, i32 %index + %l4 = bitcast ptr %l3 to ptr + store <4 x i32> %wide.masked.gather, ptr %l4, align 4 %index.next = add i32 %index, 4 %vec.ind.next = add <4 x i32> %vec.ind, <i32 8, i32 8, i32 8, i32 8> %l5 = icmp eq i32 %index.next, %n.vec @@ -296,7 +296,7 @@ end: ret void; } -define void @gatherload(i32 %n, i32 %m, i32* nocapture %a, i32* nocapture readonly %b, i32 %call.us.us) { +define void @gatherload(i32 %n, i32 %m, ptr nocapture %a, ptr nocapture readonly %b, i32 %call.us.us) { ; CHECK-LABEL: @gatherload( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[N:%.*]], 0 @@ -360,7 +360,7 @@ define void @gatherload(i32 %n, i32 %m, i32* nocapture %a, i32* nocapture readon ; CHECK-NEXT: ret void ; entry: - %a57 = bitcast i32* %a to i8* + %a57 = bitcast ptr %a to ptr %cmp38 = icmp sgt i32 %n, 0 br i1 %cmp38, label %for.body.lr.ph, label %for.end16 @@ -370,11 +370,11 @@ for.body.lr.ph: ; preds = %entry for.body.us.us.preheader: ; preds = %for.body.lr.ph %0 = shl nuw i32 %m, 2 - %scevgep = getelementptr i32, i32* %a, i32 %m - %scevgep64 = getelementptr i32, i32* %b, i32 %m + %scevgep = getelementptr i32, ptr %a, i32 %m + %scevgep64 = getelementptr i32, ptr %b, i32 %m %min.iters.check = icmp ult i32 %m, 4 - %bound0 = icmp ugt i32* %scevgep64, %a - %bound1 = icmp ugt i32* %scevgep, %b + %bound0 = icmp ugt ptr %scevgep64, %a + %bound1 = icmp ugt ptr %scevgep, %b %found.conflict = and i1 %bound0, %bound1 %n.vec = and i32 %m, -4 %cmp.n = icmp eq i32 %n.vec, %m @@ -384,21 +384,21 @@ for.body.us.us: ; preds = %for.body.us.us.preh %i.039.us.us = phi i32 [ %inc15.us.us, %for.cond5.for.end13_crit_edge.us.us ], [ 0, %for.body.us.us.preheader ] %1 = add i32 0, 0 %vla.us.us = alloca i32, i32 %call.us.us, align 4 - %vla.us.us56 = bitcast i32* %vla.us.us to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* nonnull align 4 %vla.us.us56, i8* align 4 %a57, i32 %0, i1 false) + %vla.us.us56 = bitcast ptr %vla.us.us to ptr + call void @llvm.memcpy.p0.p0.i32(ptr nonnull align 4 %vla.us.us56, ptr align 4 %a57, i32 %0, i1 false) %brmerge = select i1 %min.iters.check, i1 true, i1 %found.conflict br i1 %brmerge, label %for.body7.us.us.preheader, label %vector.body vector.body: ; preds = %for.body.us.us, %vector.body %index = phi i32 [ %index.next, %vector.body ], [ 0, %for.body.us.us ] - %2 = getelementptr inbounds i32, i32* %b, i32 %index - %3 = bitcast i32* %2 to <4 x i32>* - %wide.load = load <4 x i32>, <4 x i32>* %3, align 4 - %4 = getelementptr inbounds i32, i32* %vla.us.us, <4 x i32> %wide.load - %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %4, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) - %5 = getelementptr inbounds i32, i32* %a, i32 %index - %6 = bitcast i32* %5 to <4 x i32>* - store <4 x i32> %wide.masked.gather, <4 x i32>* %6, align 4 + %2 = getelementptr inbounds i32, ptr %b, i32 %index + %3 = bitcast ptr %2 to ptr + %wide.load = load <4 x i32>, ptr %3, align 4 + %4 = getelementptr inbounds i32, ptr %vla.us.us, <4 x i32> %wide.load + %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %4, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) + %5 = getelementptr inbounds i32, ptr %a, i32 %index + %6 = bitcast ptr %5 to ptr + store <4 x i32> %wide.masked.gather, ptr %6, align 4 %index.next = add nuw i32 %index, 4 %7 = icmp eq i32 %index.next, %n.vec br i1 %7, label %middle.block, label %vector.body @@ -412,12 +412,12 @@ for.body7.us.us.preheader: ; preds = %for.body.us.us, %mi for.body7.us.us: ; preds = %for.body7.us.us.preheader, %for.body7.us.us %j.137.us.us = phi i32 [ %inc12.us.us, %for.body7.us.us ], [ %j.137.us.us.ph, %for.body7.us.us.preheader ] - %arrayidx8.us.us = getelementptr inbounds i32, i32* %b, i32 %j.137.us.us - %8 = load i32, i32* %arrayidx8.us.us, align 4 - %arrayidx9.us.us = getelementptr inbounds i32, i32* %vla.us.us, i32 %8 - %9 = load i32, i32* %arrayidx9.us.us, align 4 - %arrayidx10.us.us = getelementptr inbounds i32, i32* %a, i32 %j.137.us.us - store i32 %9, i32* %arrayidx10.us.us, align 4 + %arrayidx8.us.us = getelementptr inbounds i32, ptr %b, i32 %j.137.us.us + %8 = load i32, ptr %arrayidx8.us.us, align 4 + %arrayidx9.us.us = getelementptr inbounds i32, ptr %vla.us.us, i32 %8 + %9 = load i32, ptr %arrayidx9.us.us, align 4 + %arrayidx10.us.us = getelementptr inbounds i32, ptr %a, i32 %j.137.us.us + store i32 %9, ptr %arrayidx10.us.us, align 4 %inc12.us.us = add nuw nsw i32 %j.137.us.us, 1 %exitcond58.not = icmp eq i32 %inc12.us.us, %m br i1 %exitcond58.not, label %for.cond5.for.end13_crit_edge.us.us, label %for.body7.us.us @@ -437,5 +437,5 @@ for.end16: ; preds = %for.body, %for.cond ret void } -declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) -declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) +declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>) +declare void @llvm.memcpy.p0.p0.i32(ptr, ptr, i32, i1) diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll index a89d352..18c8a8a 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll @@ -16,7 +16,7 @@ -define arm_aapcs_vfpcc void @push_out_mul_gather(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) { +define arm_aapcs_vfpcc void @push_out_mul_gather(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec) { ; CHECK-LABEL: push_out_mul_gather: ; CHECK: @ %bb.0: @ %vector.ph ; CHECK-NEXT: adr r3, .LCPI0_0 @@ -45,11 +45,11 @@ vector.body: ; preds = %vector.body, %vecto %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.ind = phi <4 x i32> [ <i32 0, i32 2, i32 4, i32 6>, %vector.ph ], [ %vec.ind.next, %vector.body ] %0 = mul <4 x i32> %vec.ind, <i32 3, i32 3, i32 3, i32 3> - %1 = getelementptr inbounds i32, i32* %data, <4 x i32> %0 - %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) - %2 = getelementptr inbounds i32, i32* %dst, i32 %index - %3 = bitcast i32* %2 to <4 x i32>* - store <4 x i32> %wide.masked.gather, <4 x i32>* %3, align 4 + %1 = getelementptr inbounds i32, ptr %data, <4 x i32> %0 + %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) + %2 = getelementptr inbounds i32, ptr %dst, i32 %index + %3 = bitcast ptr %2 to ptr + store <4 x i32> %wide.masked.gather, ptr %3, align 4 %index.next = add i32 %index, 4 %vec.ind.next = add <4 x i32> %vec.ind, <i32 8, i32 8, i32 8, i32 8> %4 = icmp eq i32 %index.next, %n.vec @@ -59,7 +59,7 @@ end: ret void; } -define arm_aapcs_vfpcc void @push_out_add_gather(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) { +define arm_aapcs_vfpcc void @push_out_add_gather(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec) { ; CHECK-LABEL: push_out_add_gather: ; CHECK: @ %bb.0: @ %vector.ph ; CHECK-NEXT: adr r3, .LCPI1_0 @@ -88,11 +88,11 @@ vector.body: ; preds = %vector.body, %vecto %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.ind = phi <4 x i32> [ <i32 0, i32 2, i32 4, i32 6>, %vector.ph ], [ %vec.ind.next, %vector.body ] %0 = add <4 x i32> %vec.ind, <i32 6, i32 6, i32 6, i32 6> - %1 = getelementptr inbounds i32, i32* %data, <4 x i32> %0 - %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) - %2 = getelementptr inbounds i32, i32* %dst, i32 %index - %3 = bitcast i32* %2 to <4 x i32>* - store <4 x i32> %wide.masked.gather, <4 x i32>* %3, align 4 + %1 = getelementptr inbounds i32, ptr %data, <4 x i32> %0 + %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) + %2 = getelementptr inbounds i32, ptr %dst, i32 %index + %3 = bitcast ptr %2 to ptr + store <4 x i32> %wide.masked.gather, ptr %3, align 4 %index.next = add i32 %index, 4 %vec.ind.next = add <4 x i32> %vec.ind, <i32 8, i32 8, i32 8, i32 8> %4 = icmp eq i32 %index.next, %n.vec @@ -102,7 +102,7 @@ end: ret void; } -define arm_aapcs_vfpcc void @push_out_mul_add_gather(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) { +define arm_aapcs_vfpcc void @push_out_mul_add_gather(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec) { ; CHECK-LABEL: push_out_mul_add_gather: ; CHECK: @ %bb.0: @ %vector.ph ; CHECK-NEXT: adr r3, .LCPI2_0 @@ -132,11 +132,11 @@ vector.body: ; preds = %vector.body, %vecto %vec.ind = phi <4 x i32> [ <i32 0, i32 2, i32 4, i32 6>, %vector.ph ], [ %vec.ind.next, %vector.body ] %0 = mul <4 x i32> %vec.ind, <i32 3, i32 3, i32 3, i32 3> %1 = add <4 x i32> %0, <i32 6, i32 6, i32 6, i32 6> - %2 = getelementptr inbounds i32, i32* %data, <4 x i32> %1 - %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) - %3 = getelementptr inbounds i32, i32* %dst, i32 %index - %4 = bitcast i32* %3 to <4 x i32>* - store <4 x i32> %wide.masked.gather, <4 x i32>* %4, align 4 + %2 = getelementptr inbounds i32, ptr %data, <4 x i32> %1 + %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) + %3 = getelementptr inbounds i32, ptr %dst, i32 %index + %4 = bitcast ptr %3 to ptr + store <4 x i32> %wide.masked.gather, ptr %4, align 4 %index.next = add i32 %index, 4 %vec.ind.next = add <4 x i32> %vec.ind, <i32 8, i32 8, i32 8, i32 8> %5 = icmp eq i32 %index.next, %n.vec @@ -146,7 +146,7 @@ end: ret void; } -define arm_aapcs_vfpcc void @push_out_mul_scatter(i32* noalias nocapture readonly %data, +define arm_aapcs_vfpcc void @push_out_mul_scatter(ptr noalias nocapture readonly %data, ; CHECK-LABEL: push_out_mul_scatter: ; CHECK: @ %bb.0: @ %vector.ph ; CHECK-NEXT: adr r1, .LCPI3_0 @@ -166,7 +166,7 @@ define arm_aapcs_vfpcc void @push_out_mul_scatter(i32* noalias nocapture readonl ; CHECK-NEXT: .long 4294967224 @ 0xffffffb8 ; CHECK-NEXT: .long 4294967248 @ 0xffffffd0 ; CHECK-NEXT: .long 4294967272 @ 0xffffffe8 - i32* noalias nocapture %dst, i32 %n.vec, + ptr noalias nocapture %dst, i32 %n.vec, <4 x i32> %to.store) { vector.ph: ; preds = %for.body.preheader @@ -176,8 +176,8 @@ vector.body: ; preds = %vector.body, %vecto %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.ind = phi <4 x i32> [ <i32 0, i32 2, i32 4, i32 6>, %vector.ph ], [ %vec.ind.next, %vector.body ] %0 = mul <4 x i32> %vec.ind, <i32 3, i32 3, i32 3, i32 3> - %1 = getelementptr inbounds i32, i32* %data, <4 x i32> %0 - call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %to.store, <4 x i32*> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) + %1 = getelementptr inbounds i32, ptr %data, <4 x i32> %0 + call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %to.store, <4 x ptr> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) %index.next = add i32 %index, 4 %vec.ind.next = add <4 x i32> %vec.ind, <i32 8, i32 8, i32 8, i32 8> %2 = icmp eq i32 %index.next, %n.vec @@ -187,7 +187,7 @@ end: ret void; } -define arm_aapcs_vfpcc void @push_out_add_scatter(i32* noalias nocapture readonly %data, +define arm_aapcs_vfpcc void @push_out_add_scatter(ptr noalias nocapture readonly %data, ; CHECK-LABEL: push_out_add_scatter: ; CHECK: @ %bb.0: @ %vector.ph ; CHECK-NEXT: adr r1, .LCPI4_0 @@ -207,7 +207,7 @@ define arm_aapcs_vfpcc void @push_out_add_scatter(i32* noalias nocapture readonl ; CHECK-NEXT: .long 0 @ 0x0 ; CHECK-NEXT: .long 8 @ 0x8 ; CHECK-NEXT: .long 16 @ 0x10 - i32* noalias nocapture %dst, i32 %n.vec, + ptr noalias nocapture %dst, i32 %n.vec, <4 x i32> %to.store) { vector.ph: ; preds = %for.body.preheader @@ -217,8 +217,8 @@ vector.body: ; preds = %vector.body, %vecto %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.ind = phi <4 x i32> [ <i32 0, i32 2, i32 4, i32 6>, %vector.ph ], [ %vec.ind.next, %vector.body ] %0 = add <4 x i32> %vec.ind, <i32 6, i32 6, i32 6, i32 6> - %1 = getelementptr inbounds i32, i32* %data, <4 x i32> %0 - call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %to.store, <4 x i32*> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) + %1 = getelementptr inbounds i32, ptr %data, <4 x i32> %0 + call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %to.store, <4 x ptr> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) %index.next = add i32 %index, 4 %vec.ind.next = add <4 x i32> %vec.ind, <i32 8, i32 8, i32 8, i32 8> %2 = icmp eq i32 %index.next, %n.vec @@ -228,7 +228,7 @@ end: ret void; } -define arm_aapcs_vfpcc void @push_out_mul_gather_scatter(i32* noalias nocapture readonly %data, +define arm_aapcs_vfpcc void @push_out_mul_gather_scatter(ptr noalias nocapture readonly %data, ; CHECK-LABEL: push_out_mul_gather_scatter: ; CHECK: @ %bb.0: @ %vector.ph ; CHECK-NEXT: adr r1, .LCPI5_0 @@ -251,7 +251,7 @@ define arm_aapcs_vfpcc void @push_out_mul_gather_scatter(i32* noalias nocapture ; CHECK-NEXT: .long 6 @ 0x6 ; CHECK-NEXT: .long 12 @ 0xc ; CHECK-NEXT: .long 18 @ 0x12 - i32* noalias nocapture %dst, i32 %n.vec) { + ptr noalias nocapture %dst, i32 %n.vec) { vector.ph: ; preds = %for.body.preheader br label %vector.body @@ -260,9 +260,9 @@ vector.body: ; preds = %vector.body, %vecto %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.ind = phi <4 x i32> [ <i32 0, i32 2, i32 4, i32 6>, %vector.ph ], [ %vec.ind.next, %vector.body ] %0 = mul <4 x i32> %vec.ind, <i32 3, i32 3, i32 3, i32 3> - %1 = getelementptr inbounds i32, i32* %data, <4 x i32> %0 - %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) - call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %wide.masked.gather, <4 x i32*> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) + %1 = getelementptr inbounds i32, ptr %data, <4 x i32> %0 + %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) + call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %wide.masked.gather, <4 x ptr> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) %index.next = add i32 %index, 4 %vec.ind.next = add <4 x i32> %vec.ind, <i32 8, i32 8, i32 8, i32 8> %2 = icmp eq i32 %index.next, %n.vec @@ -272,7 +272,7 @@ end: ret void; } -define arm_aapcs_vfpcc void @push_out_add_sub_block(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) { +define arm_aapcs_vfpcc void @push_out_add_sub_block(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec) { ; CHECK-LABEL: push_out_add_sub_block: ; CHECK: @ %bb.0: @ %vector.ph ; CHECK-NEXT: adr r3, .LCPI6_0 @@ -304,11 +304,11 @@ vector.body: ; preds = %vector.body, %vecto lower.block: ; preds = %vector.body %0 = add <4 x i32> %vec.ind, <i32 6, i32 6, i32 6, i32 6> - %1 = getelementptr inbounds i32, i32* %data, <4 x i32> %0 - %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) - %2 = getelementptr inbounds i32, i32* %dst, i32 %index - %3 = bitcast i32* %2 to <4 x i32>* - store <4 x i32> %wide.masked.gather, <4 x i32>* %3, align 4 + %1 = getelementptr inbounds i32, ptr %data, <4 x i32> %0 + %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) + %2 = getelementptr inbounds i32, ptr %dst, i32 %index + %3 = bitcast ptr %2 to ptr + store <4 x i32> %wide.masked.gather, ptr %3, align 4 %index.next = add i32 %index, 4 %vec.ind.next = add <4 x i32> %vec.ind, <i32 8, i32 8, i32 8, i32 8> br label %vector.body.end @@ -321,7 +321,7 @@ end: ret void; } -define arm_aapcs_vfpcc void @non_gatscat_use1(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec, <4 x i32>* %x) { +define arm_aapcs_vfpcc void @non_gatscat_use1(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec, ptr %x) { ; CHECK-LABEL: non_gatscat_use1: ; CHECK: @ %bb.0: @ %vector.ph ; CHECK-NEXT: .save {r4, lr} @@ -365,13 +365,13 @@ vector.body: ; preds = %vector.body, %vecto %vec.ind = phi <4 x i32> [ <i32 0, i32 2, i32 4, i32 6>, %vector.ph ], [ %vec.ind.next, %vector.body ] %0 = mul <4 x i32> %vec.ind, <i32 3, i32 3, i32 3, i32 3> %1 = add <4 x i32> %0, <i32 6, i32 6, i32 6, i32 6> - %2 = getelementptr inbounds i32, i32* %data, <4 x i32> %1 - %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) - %3 = getelementptr inbounds i32, i32* %dst, i32 %index - %4 = bitcast i32* %3 to <4 x i32>* - store <4 x i32> %wide.masked.gather, <4 x i32>* %4, align 4 + %2 = getelementptr inbounds i32, ptr %data, <4 x i32> %1 + %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) + %3 = getelementptr inbounds i32, ptr %dst, i32 %index + %4 = bitcast ptr %3 to ptr + store <4 x i32> %wide.masked.gather, ptr %4, align 4 %non_gatscat_use = mul <4 x i32> %0, <i32 3, i32 3, i32 3, i32 3> - store <4 x i32> %non_gatscat_use, <4 x i32>* %x, align 4 + store <4 x i32> %non_gatscat_use, ptr %x, align 4 %index.next = add i32 %index, 4 %vec.ind.next = add <4 x i32> %vec.ind, <i32 8, i32 8, i32 8, i32 8> %5 = icmp eq i32 %index.next, %n.vec @@ -381,7 +381,7 @@ end: ret void; } -define arm_aapcs_vfpcc void @non_gatscat_use2(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec, <4 x i32>* %x) { +define arm_aapcs_vfpcc void @non_gatscat_use2(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec, ptr %x) { ; CHECK-LABEL: non_gatscat_use2: ; CHECK: @ %bb.0: @ %vector.ph ; CHECK-NEXT: .save {r4, r5, r7, lr} @@ -428,13 +428,13 @@ vector.body: ; preds = %vector.body, %vecto %vec.ind = phi <4 x i32> [ <i32 0, i32 2, i32 4, i32 6>, %vector.ph ], [ %vec.ind.next, %vector.body ] %0 = mul <4 x i32> %vec.ind, <i32 3, i32 3, i32 3, i32 3> %1 = add <4 x i32> %0, <i32 6, i32 6, i32 6, i32 6> - %2 = getelementptr inbounds i32, i32* %data, <4 x i32> %1 - %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) - %3 = getelementptr inbounds i32, i32* %dst, i32 %index - %4 = bitcast i32* %3 to <4 x i32>* - store <4 x i32> %wide.masked.gather, <4 x i32>* %4, align 4 + %2 = getelementptr inbounds i32, ptr %data, <4 x i32> %1 + %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) + %3 = getelementptr inbounds i32, ptr %dst, i32 %index + %4 = bitcast ptr %3 to ptr + store <4 x i32> %wide.masked.gather, ptr %4, align 4 %non_gatscat_use = mul <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3> - store <4 x i32> %non_gatscat_use, <4 x i32>* %x, align 4 + store <4 x i32> %non_gatscat_use, ptr %x, align 4 %index.next = add i32 %index, 4 %vec.ind.next = add <4 x i32> %vec.ind, <i32 8, i32 8, i32 8, i32 8> %5 = icmp eq i32 %index.next, %n.vec @@ -444,7 +444,7 @@ end: ret void; } -define dso_local void @arm_mat_mult_q31(i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %B, i32* noalias nocapture %C, i32 %n, i32 %m, i32 %l) local_unnamed_addr #0 { +define dso_local void @arm_mat_mult_q31(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %n, i32 %m, i32 %l) local_unnamed_addr #0 { ; CHECK-LABEL: arm_mat_mult_q31: ; CHECK: @ %bb.0: @ %for.cond8.preheader.us.us.preheader.preheader ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} @@ -568,12 +568,12 @@ vector.body: ; preds = %vector.body, %vecto %vec.ind = phi <4 x i32> [ <i32 0, i32 2, i32 4, i32 6>, %vector.ph ], [ %vec.ind.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %9, %vector.body ] %3 = add <4 x i32> %vec.ind, %broadcast.splat - %4 = getelementptr inbounds i32, i32* %A, <4 x i32> %3 - %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %4, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef), !tbaa !3 + %4 = getelementptr inbounds i32, ptr %A, <4 x i32> %3 + %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %4, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef), !tbaa !3 %5 = mul <4 x i32> %vec.ind, %broadcast.splat87 %6 = add <4 x i32> %5, %broadcast.splat89 - %7 = getelementptr inbounds i32, i32* %B, <4 x i32> %6 - %wide.masked.gather90 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %7, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef), !tbaa !3 + %7 = getelementptr inbounds i32, ptr %B, <4 x i32> %6 + %wide.masked.gather90 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %7, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef), !tbaa !3 %8 = mul nsw <4 x i32> %wide.masked.gather90, %wide.masked.gather %9 = add <4 x i32> %8, %vec.phi %index.next = add i32 %index, 4 @@ -585,8 +585,8 @@ middle.block: ; preds = %vector.body %11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %9) ;for.cond8.for.cond.cleanup10_crit_edge.us.us: ; preds = %for.body11.us.us, %middle.block %add19.us.us = add i32 %j.051.us.us, %mul18.us - %arrayidx20.us.us = getelementptr inbounds i32, i32* %C, i32 %add19.us.us - store i32 %11, i32* %arrayidx20.us.us, align 4, !tbaa !3 + %arrayidx20.us.us = getelementptr inbounds i32, ptr %C, i32 %add19.us.us + store i32 %11, ptr %arrayidx20.us.us, align 4, !tbaa !3 %inc.us.us = add nuw nsw i32 %j.051.us.us, 1 %exitcond = icmp eq i32 %inc.us.us, %m br i1 %exitcond, label %for.cond4.for.cond.cleanup6_crit_edge.us, label %vector.ph @@ -595,7 +595,7 @@ for.end25: ; preds = %for.cond4.for.cond. ret void } -define dso_local void @arm_mat_mult_q15(i16* noalias nocapture readonly %A, i16* noalias nocapture readonly %B, i16* noalias nocapture %C, i32 %n, i32 %m, i32 %l) local_unnamed_addr #0 { +define dso_local void @arm_mat_mult_q15(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %n, i32 %m, i32 %l) local_unnamed_addr #0 { ; CHECK-LABEL: arm_mat_mult_q15: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -772,9 +772,9 @@ for.cond1.preheader.us: ; preds = %for.cond1.for.cond. br i1 %cmp642, label %for.cond5.preheader.us73.preheader, label %for.cond5.preheader.us.us for.cond5.preheader.us73.preheader: ; preds = %for.cond1.preheader.us - %scevgep = getelementptr i16, i16* %C, i32 %1 - %scevgep82 = bitcast i16* %scevgep to i8* - call void @llvm.memset.p0i8.i32(i8* align 2 %scevgep82, i8 0, i32 %0, i1 false) + %scevgep = getelementptr i16, ptr %C, i32 %1 + %scevgep82 = bitcast ptr %scevgep to ptr + call void @llvm.memset.p0.i32(ptr align 2 %scevgep82, i8 0, i32 %0, i1 false) br label %for.cond1.for.cond.cleanup3_crit_edge.us for.cond1.for.cond.cleanup3_crit_edge.us: ; preds = %for.cond5.for.cond.cleanup7_crit_edge.us.us, %for.cond5.preheader.us73.preheader @@ -801,14 +801,14 @@ vector.body: ; preds = %vector.body, %vecto %vec.ind = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %vector.ph ], [ %vec.ind.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %11, %vector.body ] %2 = add i32 %index, %mul.us - %3 = getelementptr inbounds i16, i16* %A, i32 %2 - %4 = bitcast i16* %3 to <4 x i16>* - %wide.load = load <4 x i16>, <4 x i16>* %4, align 2, !tbaa !3 + %3 = getelementptr inbounds i16, ptr %A, i32 %2 + %4 = bitcast ptr %3 to ptr + %wide.load = load <4 x i16>, ptr %4, align 2, !tbaa !3 %5 = sext <4 x i16> %wide.load to <4 x i32> %6 = mul <4 x i32> %vec.ind, %broadcast.splat %7 = add <4 x i32> %6, %broadcast.splat86 - %8 = getelementptr inbounds i16, i16* %B, <4 x i32> %7 - %wide.masked.gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %8, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef), !tbaa !3 + %8 = getelementptr inbounds i16, ptr %B, <4 x i32> %7 + %wide.masked.gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %8, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef), !tbaa !3 %9 = sext <4 x i16> %wide.masked.gather to <4 x i32> %10 = mul nsw <4 x i32> %9, %5 %11 = add <4 x i32> %10, %vec.phi @@ -825,8 +825,8 @@ for.cond5.for.cond.cleanup7_crit_edge.us.us: ; preds = %for.body8.us.us, %m %add14.us.us.lcssa = phi i32 [ %13, %middle.block ], [ %add14.us.us, %for.body8.us.us ] %conv15.us.us = trunc i32 %add14.us.us.lcssa to i16 %add17.us.us = add i32 %j.046.us.us, %1 - %arrayidx18.us.us = getelementptr inbounds i16, i16* %C, i32 %add17.us.us - store i16 %conv15.us.us, i16* %arrayidx18.us.us, align 2, !tbaa !3 + %arrayidx18.us.us = getelementptr inbounds i16, ptr %C, i32 %add17.us.us + store i16 %conv15.us.us, ptr %arrayidx18.us.us, align 2, !tbaa !3 %inc20.us.us = add nuw nsw i32 %j.046.us.us, 1 %exitcond83 = icmp eq i32 %inc20.us.us, %m br i1 %exitcond83, label %for.cond1.for.cond.cleanup3_crit_edge.us, label %for.cond5.preheader.us.us @@ -835,13 +835,13 @@ for.body8.us.us: ; preds = %for.body8.us.us.pre %k.044.us.us = phi i32 [ %inc.us.us, %for.body8.us.us ], [ %k.044.us.us.ph, %for.body8.us.us.preheader ] %sum.043.us.us = phi i32 [ %add14.us.us, %for.body8.us.us ], [ %sum.043.us.us.ph, %for.body8.us.us.preheader ] %add.us.us = add i32 %k.044.us.us, %mul.us - %arrayidx.us.us = getelementptr inbounds i16, i16* %A, i32 %add.us.us - %14 = load i16, i16* %arrayidx.us.us, align 2, !tbaa !3 + %arrayidx.us.us = getelementptr inbounds i16, ptr %A, i32 %add.us.us + %14 = load i16, ptr %arrayidx.us.us, align 2, !tbaa !3 %conv.us.us = sext i16 %14 to i32 %mul9.us.us = mul i32 %k.044.us.us, %m %add10.us.us = add i32 %mul9.us.us, %j.046.us.us - %arrayidx11.us.us = getelementptr inbounds i16, i16* %B, i32 %add10.us.us - %15 = load i16, i16* %arrayidx11.us.us, align 2, !tbaa !3 + %arrayidx11.us.us = getelementptr inbounds i16, ptr %B, i32 %add10.us.us + %15 = load i16, ptr %arrayidx11.us.us, align 2, !tbaa !3 %conv12.us.us = sext i16 %15 to i32 %mul13.us.us = mul nsw i32 %conv12.us.us, %conv.us.us %add14.us.us = add nsw i32 %mul13.us.us, %sum.043.us.us @@ -853,7 +853,7 @@ for.cond.cleanup: ; preds = %for.cond1.for.cond. ret void } -define hidden arm_aapcs_vfpcc i32 @arm_depthwise_conv_s8(i8* nocapture readonly %input, i16 zeroext %input_x, i16 zeroext %input_y, i16 zeroext %input_ch, i8* nocapture readonly %kernel, i16 zeroext %output_ch, i16 zeroext %ch_mult, i16 zeroext %kernel_x, i16 zeroext %kernel_y, i16 zeroext %pad_x, i16 zeroext %pad_y, i16 zeroext %stride_x, i16 zeroext %stride_y, i32* nocapture readonly %bias, i8* nocapture %output, i32* nocapture readonly %output_shift, i32* nocapture readonly %output_mult, i16 zeroext %output_x, i16 zeroext %output_y, i32 %output_offset, i32 %input_offset, i32 %output_activation_min, i32 %output_activation_max, i16 zeroext %dilation_x, i16 zeroext %dilation_y, i16* nocapture readnone %buffer_a) local_unnamed_addr #0 { +define hidden arm_aapcs_vfpcc i32 @arm_depthwise_conv_s8(ptr nocapture readonly %input, i16 zeroext %input_x, i16 zeroext %input_y, i16 zeroext %input_ch, ptr nocapture readonly %kernel, i16 zeroext %output_ch, i16 zeroext %ch_mult, i16 zeroext %kernel_x, i16 zeroext %kernel_y, i16 zeroext %pad_x, i16 zeroext %pad_y, i16 zeroext %stride_x, i16 zeroext %stride_y, ptr nocapture readonly %bias, ptr nocapture %output, ptr nocapture readonly %output_shift, ptr nocapture readonly %output_mult, i16 zeroext %output_x, i16 zeroext %output_y, i32 %output_offset, i32 %input_offset, i32 %output_activation_min, i32 %output_activation_max, i16 zeroext %dilation_x, i16 zeroext %dilation_y, ptr nocapture readnone %buffer_a) local_unnamed_addr #0 { ; CHECK-LABEL: arm_depthwise_conv_s8: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -1074,12 +1074,12 @@ vector.body: ; preds = %vector.body, %vecto %tmp79 = add nsw <4 x i32> %vec.ind, %broadcast.splat68 %tmp80 = mul nsw <4 x i32> %broadcast.splat70, %tmp79 %tmp81 = add nsw <4 x i32> %tmp80, %broadcast.splat72 - %tmp82 = getelementptr inbounds i8, i8* %input, <4 x i32> %tmp78 - %wide.masked.gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %tmp82, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef) + %tmp82 = getelementptr inbounds i8, ptr %input, <4 x i32> %tmp78 + %wide.masked.gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %tmp82, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef) %tmp83 = sext <4 x i8> %wide.masked.gather to <4 x i32> %tmp84 = add nsw <4 x i32> %broadcast.splat74, %tmp83 - %tmp85 = getelementptr inbounds i8, i8* %kernel, <4 x i32> %tmp81 - %wide.masked.gather75 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %tmp85, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef) + %tmp85 = getelementptr inbounds i8, ptr %kernel, <4 x i32> %tmp81 + %wide.masked.gather75 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %tmp85, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef) %tmp86 = sext <4 x i8> %wide.masked.gather75 to <4 x i32> %tmp87 = mul nsw <4 x i32> %tmp84, %tmp86 %tmp88 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp87) @@ -1104,11 +1104,11 @@ if.end: ; preds = %for.cond.cleanup9.i ret i32 0 } -declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) -declare <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*>, i32, <4 x i1>, <4 x i16>) -declare <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*>, i32 immarg, <4 x i1>, <4 x i8>) #3 +declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>) +declare <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i16>) +declare <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr>, i32 immarg, <4 x i1>, <4 x i8>) #3 declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) -declare void @llvm.memset.p0i8.i32(i8* align 2, i8, i32, i1) +declare void @llvm.memset.p0.i32(ptr align 2, i8, i32, i1) -declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>) diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll index 66c41bb..4b845294 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedldst -enable-mem-access-versioning=false -tail-predication=force-enabled %s -o - | FileCheck %s -define dso_local void @mve_gather_qi_wb(i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %B, i32* noalias nocapture %C, i32 %n, i32 %m, i32 %l) { +define dso_local void @mve_gather_qi_wb(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %n, i32 %m, i32 %l) { ; CHECK-LABEL: mve_gather_qi_wb: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, lr} @@ -33,21 +33,21 @@ define dso_local void @mve_gather_qi_wb(i32* noalias nocapture readonly %A, i32* ; CHECK-NEXT: .long 4294967288 @ 0xfffffff8 entry: ; preds = %middle. %add.us.us = add i32 4, %n - %arrayidx.us.us = getelementptr inbounds i32, i32* %C, i32 %add.us.us + %arrayidx.us.us = getelementptr inbounds i32, ptr %C, i32 %add.us.us br label %vector.body vector.body: ; preds = %vector.body, %entry %index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %entry ], [ %7, %vector.body ] %vec.ind = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %entry ], [ %vec.ind.next, %vector.body ] %0 = add i32 %index, %n - %1 = getelementptr inbounds i32, i32* %A, i32 %0 + %1 = getelementptr inbounds i32, ptr %A, i32 %0 %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %2 = bitcast i32* %1 to <4 x i32>* - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %2 = bitcast ptr %1 to ptr + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %2, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %3 = mul <4 x i32> %vec.ind, <i32 5, i32 5, i32 5, i32 5> %4 = add <4 x i32> %3, <i32 3, i32 3, i32 3, i32 3> - %5 = getelementptr inbounds i32, i32* %B, <4 x i32> %4 - %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %5, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %5 = getelementptr inbounds i32, ptr %B, <4 x i32> %4 + %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %5, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %6 = mul nsw <4 x i32> %wide.masked.gather, %wide.masked.load %7 = add <4 x i32> %vec.phi, %6 %index.next = add i32 %index, 4 @@ -57,7 +57,7 @@ vector.body: ; preds = %vector.body, %entry middle.block: ; preds = %vector.body %9 = select <4 x i1> %active.lane.mask, <4 x i32> %7, <4 x i32> %vec.phi %10 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %7) - store i32 %10, i32* %arrayidx.us.us, align 4 + store i32 %10, ptr %arrayidx.us.us, align 4 %inc21.us.us = add nuw i32 4, 1 %exitcond81.not = icmp eq i32 %inc21.us.us, %n br label %end @@ -65,7 +65,7 @@ end: ; preds = %middle.block ret void } -define dso_local void @mve_gatherscatter_offset(i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %B, i32* noalias nocapture %C, i32 %n, i32 %m, i32 %l) { +define dso_local void @mve_gatherscatter_offset(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %n, i32 %m, i32 %l) { ; CHECK-LABEL: mve_gatherscatter_offset: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, lr} @@ -102,23 +102,23 @@ define dso_local void @mve_gatherscatter_offset(i32* noalias nocapture readonly ; CHECK-NEXT: .long 18 @ 0x12 entry: ; preds = %middle. %add.us.us = add i32 4, %n - %arrayidx.us.us = getelementptr inbounds i32, i32* %C, i32 %add.us.us + %arrayidx.us.us = getelementptr inbounds i32, ptr %C, i32 %add.us.us br label %vector.body vector.body: ; preds = %vector.body, %entry %index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %entry ], [ %7, %vector.body ] %vec.ind = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %entry ], [ %vec.ind.next, %vector.body ] %0 = add i32 %index, %n - %1 = getelementptr inbounds i32, i32* %A, i32 %0 + %1 = getelementptr inbounds i32, ptr %A, i32 %0 %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %2 = bitcast i32* %1 to <4 x i32>* - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %2 = bitcast ptr %1 to ptr + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %2, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %3 = mul <4 x i32> %vec.ind, <i32 5, i32 5, i32 5, i32 5> %4 = add <4 x i32> %3, <i32 3, i32 3, i32 3, i32 3> - %5 = getelementptr inbounds i32, i32* %B, <4 x i32> %4 - %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %5, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %5 = getelementptr inbounds i32, ptr %B, <4 x i32> %4 + %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %5, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %6 = mul nsw <4 x i32> %wide.masked.gather, %wide.masked.load - call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %6, <4 x i32*> %5, i32 4, <4 x i1> %active.lane.mask) + call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %6, <4 x ptr> %5, i32 4, <4 x i1> %active.lane.mask) %7 = add <4 x i32> %vec.phi, %6 %index.next = add i32 %index, 4 %vec.ind.next = add <4 x i32> %vec.ind, <i32 4, i32 4, i32 4, i32 4> @@ -127,7 +127,7 @@ vector.body: ; preds = %vector.body, %entry middle.block: ; preds = %vector.body %9 = select <4 x i1> %active.lane.mask, <4 x i32> %7, <4 x i32> %vec.phi %10 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %7) - store i32 %10, i32* %arrayidx.us.us, align 4 + store i32 %10, ptr %arrayidx.us.us, align 4 %inc21.us.us = add nuw i32 4, 1 %exitcond81.not = icmp eq i32 %inc21.us.us, %n br label %end @@ -135,7 +135,7 @@ end: ; preds = %middle.block ret void } -define dso_local void @mve_scatter_qi(i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %B, i32* noalias nocapture %C, i32 %n, i32 %m, i32 %l) { +define dso_local void @mve_scatter_qi(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %n, i32 %m, i32 %l) { ; CHECK-LABEL: mve_scatter_qi: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, lr} @@ -172,22 +172,22 @@ define dso_local void @mve_scatter_qi(i32* noalias nocapture readonly %A, i32* n ; CHECK-NEXT: .long 4294967288 @ 0xfffffff8 entry: ; preds = %middle. %add.us.us = add i32 4, %n - %arrayidx.us.us = getelementptr inbounds i32, i32* %C, i32 %add.us.us + %arrayidx.us.us = getelementptr inbounds i32, ptr %C, i32 %add.us.us br label %vector.body vector.body: ; preds = %vector.body, %entry %index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %entry ], [ %7, %vector.body ] %vec.ind = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %entry ], [ %vec.ind.next, %vector.body ] %0 = add i32 %index, %n - %1 = getelementptr inbounds i32, i32* %A, i32 %0 + %1 = getelementptr inbounds i32, ptr %A, i32 %0 %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %2 = bitcast i32* %1 to <4 x i32>* - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %2, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %2 = bitcast ptr %1 to ptr + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %2, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %3 = mul <4 x i32> %vec.ind, <i32 5, i32 5, i32 5, i32 5> %4 = add <4 x i32> %3, <i32 3, i32 3, i32 3, i32 3> - %5 = getelementptr inbounds i32, i32* %B, <4 x i32> %4 + %5 = getelementptr inbounds i32, ptr %B, <4 x i32> %4 %6 = mul nsw <4 x i32> <i32 3, i32 3, i32 3, i32 3>, %wide.masked.load - call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %6, <4 x i32*> %5, i32 4, <4 x i1> %active.lane.mask) + call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %6, <4 x ptr> %5, i32 4, <4 x i1> %active.lane.mask) %7 = add <4 x i32> %vec.phi, %6 %index.next = add i32 %index, 4 %vec.ind.next = add <4 x i32> %vec.ind, <i32 4, i32 4, i32 4, i32 4> @@ -196,7 +196,7 @@ vector.body: ; preds = %vector.body, %entry middle.block: ; preds = %vector.body %9 = select <4 x i1> %active.lane.mask, <4 x i32> %7, <4 x i32> %vec.phi %10 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %7) - store i32 %10, i32* %arrayidx.us.us, align 4 + store i32 %10, ptr %arrayidx.us.us, align 4 %inc21.us.us = add nuw i32 4, 1 %exitcond81.not = icmp eq i32 %inc21.us.us, %n br label %end @@ -204,7 +204,7 @@ end: ; preds = %middle.block ret void } -define void @justoffsets(i8* noalias nocapture readonly %r, i8* noalias nocapture %w, i32 %N) { +define void @justoffsets(ptr noalias nocapture readonly %r, ptr noalias nocapture %w, i32 %N) { ; CHECK-LABEL: justoffsets: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -293,17 +293,17 @@ vector.ph: ; preds = %vector.memcheck br label %vector.body vector.body: ; preds = %vector.body, %vector.ph - %pointer.phi = phi i8* [ %r, %vector.ph ], [ %ptr.ind, %vector.body ] - %pointer.phi55 = phi i8* [ %w, %vector.ph ], [ %ptr.ind56, %vector.body ] + %pointer.phi = phi ptr [ %r, %vector.ph ], [ %ptr.ind, %vector.body ] + %pointer.phi55 = phi ptr [ %w, %vector.ph ], [ %ptr.ind56, %vector.body ] %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %l1 = getelementptr i8, i8* %pointer.phi, <4 x i32> <i32 0, i32 3, i32 6, i32 9> - %l2 = getelementptr i8, i8* %pointer.phi55, <4 x i32> <i32 0, i32 3, i32 6, i32 9> - %l3 = getelementptr inbounds i8, <4 x i8*> %l1, i32 1 + %l1 = getelementptr i8, ptr %pointer.phi, <4 x i32> <i32 0, i32 3, i32 6, i32 9> + %l2 = getelementptr i8, ptr %pointer.phi55, <4 x i32> <i32 0, i32 3, i32 6, i32 9> + %l3 = getelementptr inbounds i8, <4 x ptr> %l1, i32 1 %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %N) - %wide.masked.gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %l1, i32 1, <4 x i1> %active.lane.mask, <4 x i8> undef) - %l4 = getelementptr inbounds i8, <4 x i8*> %l1, i32 2 - %wide.masked.gather57 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %l3, i32 1, <4 x i1> %active.lane.mask, <4 x i8> undef) - %wide.masked.gather58 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %l4, i32 1, <4 x i1> %active.lane.mask, <4 x i8> undef) + %wide.masked.gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %l1, i32 1, <4 x i1> %active.lane.mask, <4 x i8> undef) + %l4 = getelementptr inbounds i8, <4 x ptr> %l1, i32 2 + %wide.masked.gather57 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %l3, i32 1, <4 x i1> %active.lane.mask, <4 x i8> undef) + %wide.masked.gather58 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %l4, i32 1, <4 x i1> %active.lane.mask, <4 x i8> undef) %l5 = zext <4 x i8> %wide.masked.gather to <4 x i32> %l6 = mul nuw nsw <4 x i32> %l5, <i32 19595, i32 19595, i32 19595, i32 19595> %l7 = zext <4 x i8> %wide.masked.gather57 to <4 x i32> @@ -331,15 +331,15 @@ vector.body: ; preds = %vector.body, %vecto %l29 = add nsw <4 x i32> %l28, %l26 %l30 = lshr <4 x i32> %l29, <i32 16, i32 16, i32 16, i32 16> %l31 = trunc <4 x i32> %l30 to <4 x i8> - %l32 = getelementptr inbounds i8, <4 x i8*> %l2, i32 1 - call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %l15, <4 x i8*> %l2, i32 1, <4 x i1> %active.lane.mask) - %l33 = getelementptr inbounds i8, <4 x i8*> %l2, i32 2 - call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %l23, <4 x i8*> %l32, i32 1, <4 x i1> %active.lane.mask) - call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %l31, <4 x i8*> %l33, i32 1, <4 x i1> %active.lane.mask) + %l32 = getelementptr inbounds i8, <4 x ptr> %l2, i32 1 + call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %l15, <4 x ptr> %l2, i32 1, <4 x i1> %active.lane.mask) + %l33 = getelementptr inbounds i8, <4 x ptr> %l2, i32 2 + call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %l23, <4 x ptr> %l32, i32 1, <4 x i1> %active.lane.mask) + call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %l31, <4 x ptr> %l33, i32 1, <4 x i1> %active.lane.mask) %index.next = add i32 %index, 4 %l34 = icmp eq i32 %index.next, %n.vec - %ptr.ind = getelementptr i8, i8* %pointer.phi, i32 12 - %ptr.ind56 = getelementptr i8, i8* %pointer.phi55, i32 12 + %ptr.ind = getelementptr i8, ptr %pointer.phi, i32 12 + %ptr.ind56 = getelementptr i8, ptr %pointer.phi55, i32 12 br i1 %l34, label %for.cond.cleanup, label %vector.body for.cond.cleanup: ; preds = %vector.body, %for.body, %entry @@ -347,9 +347,9 @@ for.cond.cleanup: ; preds = %vector.body, %for.b } declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) -declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) -declare <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*>, i32, <4 x i1>, <4 x i8>) +declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>) +declare <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i8>) declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) -declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) -declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>) -declare void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8>, <4 x i8*>, i32, <4 x i1>) +declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32, <4 x i1>, <4 x i32>) +declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v4i8.v4p0(<4 x i8>, <4 x ptr>, i32, <4 x i1>) diff --git a/llvm/test/CodeGen/Thumb2/mve-phireg.ll b/llvm/test/CodeGen/Thumb2/mve-phireg.ll index d2f79fc..dad856c 100644 --- a/llvm/test/CodeGen/Thumb2/mve-phireg.ll +++ b/llvm/test/CodeGen/Thumb2/mve-phireg.ll @@ -92,8 +92,8 @@ vector.body: ; preds = %vector.body, %entry %0 = and <8 x i32> %vec.ind, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> %1 = icmp eq <8 x i32> %0, zeroinitializer %2 = select <8 x i1> %1, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6> - %3 = bitcast i16* undef to <8 x i16>* - store <8 x i16> %2, <8 x i16>* %3, align 2 + %3 = bitcast ptr undef to ptr + store <8 x i16> %2, ptr %3, align 2 %4 = icmp eq i32 undef, 128 br i1 %4, label %for.cond4.preheader, label %vector.body @@ -101,7 +101,7 @@ for.cond4.preheader: ; preds = %vector.body br i1 undef, label %vector.body105, label %for.body10 for.cond4.loopexit: ; preds = %for.body10 - %call5 = call arm_aapcs_vfpcc i32 bitcast (i32 (...)* @l to i32 ()*)() + %call5 = call arm_aapcs_vfpcc i32 @l() br label %vector.body105 for.body10: ; preds = %for.body10, %for.cond4.preheader @@ -128,7 +128,7 @@ vector.body115: ; preds = %vector.body115, %ve @a = external dso_local global i32, align 4 -@b = dso_local local_unnamed_addr global i32 ptrtoint (i32* @a to i32), align 4 +@b = dso_local local_unnamed_addr global i32 ptrtoint (ptr @a to i32), align 4 @c = dso_local global i32 2, align 4 @d = dso_local global i32 2, align 4 @@ -211,43 +211,43 @@ define dso_local i32 @e() #0 { ; CHECK-NEXT: .long 0x00000000 @ float 0 entry: %f = alloca i16, align 2 - %g = alloca [3 x [8 x [4 x i16*]]], align 4 - store i16 4, i16* %f, align 2 - %0 = load i32, i32* @c, align 4 - %1 = load i32, i32* @d, align 4 - %arrayinit.element7 = getelementptr inbounds [3 x [8 x [4 x i16*]]], [3 x [8 x [4 x i16*]]]* %g, i32 0, i32 0, i32 1, i32 1 - %2 = bitcast i16** %arrayinit.element7 to i32* - store i32 %0, i32* %2, align 4 - %arrayinit.element8 = getelementptr inbounds [3 x [8 x [4 x i16*]]], [3 x [8 x [4 x i16*]]]* %g, i32 0, i32 0, i32 1, i32 2 - store i16* null, i16** %arrayinit.element8, align 4 - %3 = bitcast i16** undef to i32* - store i32 %1, i32* %3, align 4 - %4 = bitcast i16** undef to i32* - store i32 %0, i32* %4, align 4 - %arrayinit.element13 = getelementptr inbounds [3 x [8 x [4 x i16*]]], [3 x [8 x [4 x i16*]]]* %g, i32 0, i32 0, i32 2, i32 2 - %5 = bitcast i16** %arrayinit.element13 to <4 x i16*>* - store <4 x i16*> <i16* inttoptr (i32 4 to i16*), i16* bitcast (i32* @c to i16*), i16* bitcast (i32* @c to i16*), i16* null>, <4 x i16*>* %5, align 4 - %arrayinit.element24 = getelementptr inbounds [3 x [8 x [4 x i16*]]], [3 x [8 x [4 x i16*]]]* %g, i32 0, i32 0, i32 4, i32 2 - %6 = bitcast i16** %arrayinit.element24 to <4 x i16*>* - store <4 x i16*> <i16* bitcast (i32* @d to i16*), i16* null, i16* bitcast (i32* @d to i16*), i16* bitcast (i32 ()* @e to i16*)>, <4 x i16*>* %6, align 4 - %7 = bitcast i16** undef to <4 x i16*>* - store <4 x i16*> <i16* inttoptr (i32 4 to i16*), i16* bitcast (i32 ()* @e to i16*), i16* bitcast (i32* @c to i16*), i16* null>, <4 x i16*>* %7, align 4 - %8 = bitcast i16** undef to <4 x i16*>* - store <4 x i16*> <i16* bitcast (i32* @c to i16*), i16* bitcast (i32 ()* @e to i16*), i16* bitcast (i32* @c to i16*), i16* bitcast (i32* @c to i16*)>, <4 x i16*>* %8, align 4 - %9 = bitcast i16** undef to <4 x i16*>* - store <4 x i16*> <i16* bitcast (i32 ()* @e to i16*), i16* bitcast (i32* @c to i16*), i16* bitcast (i32* @c to i16*), i16* bitcast (i32* @c to i16*)>, <4 x i16*>* %9, align 4 - %10 = bitcast i16** undef to <4 x i16*>* - store <4 x i16*> <i16* bitcast (i32* @c to i16*), i16* bitcast (i32* @c to i16*), i16* null, i16* bitcast (i32 ()* @e to i16*)>, <4 x i16*>* %10, align 4 - call void @llvm.memset.p0i8.i32(i8* nonnull align 4 dereferenceable(64) undef, i8 0, i32 64, i1 false) - %11 = bitcast i16** undef to <4 x i16*>* - store <4 x i16*> <i16* bitcast (i32* @d to i16*), i16* bitcast (i32 ()* @e to i16*), i16* bitcast (i32* @c to i16*), i16* bitcast (i32* @d to i16*)>, <4 x i16*>* %11, align 4 - %12 = bitcast i16** undef to <4 x i16*>* - store <4 x i16*> <i16* null, i16* bitcast (i32* @c to i16*), i16* bitcast (i32* @c to i16*), i16* bitcast (i32* @c to i16*)>, <4 x i16*>* %12, align 4 - %13 = bitcast i16** undef to <4 x i16*>* - store <4 x i16*> <i16* bitcast (i32* @c to i16*), i16* bitcast (i32* @d to i16*), i16* bitcast (i32* @c to i16*), i16* null>, <4 x i16*>* %13, align 4 - %arrayinit.begin78 = getelementptr inbounds [3 x [8 x [4 x i16*]]], [3 x [8 x [4 x i16*]]]* %g, i32 0, i32 2, i32 3, i32 0 - store i16* inttoptr (i32 4 to i16*), i16** %arrayinit.begin78, align 4 - store i32 0, i32* @b, align 4 + %g = alloca [3 x [8 x [4 x ptr]]], align 4 + store i16 4, ptr %f, align 2 + %0 = load i32, ptr @c, align 4 + %1 = load i32, ptr @d, align 4 + %arrayinit.element7 = getelementptr inbounds [3 x [8 x [4 x ptr]]], ptr %g, i32 0, i32 0, i32 1, i32 1 + %2 = bitcast ptr %arrayinit.element7 to ptr + store i32 %0, ptr %2, align 4 + %arrayinit.element8 = getelementptr inbounds [3 x [8 x [4 x ptr]]], ptr %g, i32 0, i32 0, i32 1, i32 2 + store ptr null, ptr %arrayinit.element8, align 4 + %3 = bitcast ptr undef to ptr + store i32 %1, ptr %3, align 4 + %4 = bitcast ptr undef to ptr + store i32 %0, ptr %4, align 4 + %arrayinit.element13 = getelementptr inbounds [3 x [8 x [4 x ptr]]], ptr %g, i32 0, i32 0, i32 2, i32 2 + %5 = bitcast ptr %arrayinit.element13 to ptr + store <4 x ptr> <ptr inttoptr (i32 4 to ptr), ptr @c, ptr @c, ptr null>, ptr %5, align 4 + %arrayinit.element24 = getelementptr inbounds [3 x [8 x [4 x ptr]]], ptr %g, i32 0, i32 0, i32 4, i32 2 + %6 = bitcast ptr %arrayinit.element24 to ptr + store <4 x ptr> <ptr @d, ptr null, ptr @d, ptr @e>, ptr %6, align 4 + %7 = bitcast ptr undef to ptr + store <4 x ptr> <ptr inttoptr (i32 4 to ptr), ptr @e, ptr @c, ptr null>, ptr %7, align 4 + %8 = bitcast ptr undef to ptr + store <4 x ptr> <ptr @c, ptr @e, ptr @c, ptr @c>, ptr %8, align 4 + %9 = bitcast ptr undef to ptr + store <4 x ptr> <ptr @e, ptr @c, ptr @c, ptr @c>, ptr %9, align 4 + %10 = bitcast ptr undef to ptr + store <4 x ptr> <ptr @c, ptr @c, ptr null, ptr @e>, ptr %10, align 4 + call void @llvm.memset.p0.i32(ptr nonnull align 4 dereferenceable(64) undef, i8 0, i32 64, i1 false) + %11 = bitcast ptr undef to ptr + store <4 x ptr> <ptr @d, ptr @e, ptr @c, ptr @d>, ptr %11, align 4 + %12 = bitcast ptr undef to ptr + store <4 x ptr> <ptr null, ptr @c, ptr @c, ptr @c>, ptr %12, align 4 + %13 = bitcast ptr undef to ptr + store <4 x ptr> <ptr @c, ptr @d, ptr @c, ptr null>, ptr %13, align 4 + %arrayinit.begin78 = getelementptr inbounds [3 x [8 x [4 x ptr]]], ptr %g, i32 0, i32 2, i32 3, i32 0 + store ptr inttoptr (i32 4 to ptr), ptr %arrayinit.begin78, align 4 + store i32 0, ptr @b, align 4 br label %for.cond for.cond: ; preds = %for.cond, %entry @@ -255,10 +255,10 @@ for.cond: ; preds = %for.cond, %entry } ; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1 immarg) #1 +declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1 immarg) #1 ; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #1 +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #1 declare arm_aapcs_vfpcc i32 @l(...) diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll index 39deded..dd63b85 100644 --- a/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll @@ -3,7 +3,7 @@ ; Check some loop postinc's for properly distributed post-incs -define i32 @vaddv(i32* nocapture readonly %data, i32 %N) { +define i32 @vaddv(ptr nocapture readonly %data, i32 %N) { ; CHECK-LABEL: vaddv: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -35,25 +35,25 @@ for.cond.cleanup: ; preds = %for.body, %entry ret i32 %x.0.lcssa for.body: ; preds = %entry, %for.body - %data.addr.014 = phi i32* [ %add.ptr1, %for.body ], [ %data, %entry ] + %data.addr.014 = phi ptr [ %add.ptr1, %for.body ], [ %data, %entry ] %i.013 = phi i32 [ %inc, %for.body ], [ 0, %entry ] %x.012 = phi i32 [ %7, %for.body ], [ 0, %entry ] - %0 = bitcast i32* %data.addr.014 to <4 x i32>* - %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %0 = bitcast ptr %data.addr.014 to ptr + %1 = load <4 x i32>, ptr %0, align 4 %2 = tail call i32 @llvm.arm.mve.addv.v4i32(<4 x i32> %1, i32 0) %3 = add i32 %2, %x.012 - %add.ptr = getelementptr inbounds i32, i32* %data.addr.014, i32 4 - %4 = bitcast i32* %add.ptr to <4 x i32>* - %5 = load <4 x i32>, <4 x i32>* %4, align 4 + %add.ptr = getelementptr inbounds i32, ptr %data.addr.014, i32 4 + %4 = bitcast ptr %add.ptr to ptr + %5 = load <4 x i32>, ptr %4, align 4 %6 = tail call i32 @llvm.arm.mve.addv.v4i32(<4 x i32> %5, i32 0) %7 = add i32 %3, %6 - %add.ptr1 = getelementptr inbounds i32, i32* %data.addr.014, i32 8 + %add.ptr1 = getelementptr inbounds i32, ptr %data.addr.014, i32 8 %inc = add nuw nsw i32 %i.013, 1 %exitcond = icmp eq i32 %inc, %N br i1 %exitcond, label %for.cond.cleanup, label %for.body } -define void @arm_cmplx_dot_prod_q15(i16* nocapture readonly %pSrcA, i16* nocapture readonly %pSrcB, i32 %numSamples, i32* nocapture %realResult, i32* nocapture %imagResult) { +define void @arm_cmplx_dot_prod_q15(ptr nocapture readonly %pSrcA, ptr nocapture readonly %pSrcB, i32 %numSamples, ptr nocapture %realResult, ptr nocapture %imagResult) { ; CHECK-LABEL: arm_cmplx_dot_prod_q15: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -129,42 +129,42 @@ entry: %mul = shl i32 %numSamples, 1 %sub = add i32 %mul, -8 %shr = lshr i32 %sub, 3 - %vecSrcB.0.in102 = bitcast i16* %pSrcB to <8 x i16>* - %vecSrcB.0103 = load <8 x i16>, <8 x i16>* %vecSrcB.0.in102, align 2 - %vecSrcA.0.in104 = bitcast i16* %pSrcA to <8 x i16>* - %vecSrcA.0105 = load <8 x i16>, <8 x i16>* %vecSrcA.0.in104, align 2 + %vecSrcB.0.in102 = bitcast ptr %pSrcB to ptr + %vecSrcB.0103 = load <8 x i16>, ptr %vecSrcB.0.in102, align 2 + %vecSrcA.0.in104 = bitcast ptr %pSrcA to ptr + %vecSrcA.0105 = load <8 x i16>, ptr %vecSrcA.0.in104, align 2 %cmp106 = icmp eq i32 %shr, 0 br i1 %cmp106, label %while.end, label %while.body.preheader while.body.preheader: ; preds = %entry %0 = shl i32 %shr, 4 - %scevgep = getelementptr i16, i16* %pSrcA, i32 %0 + %scevgep = getelementptr i16, ptr %pSrcA, i32 %0 br label %while.body while.body: ; preds = %while.body.preheader, %while.body %vecSrcA.0115 = phi <8 x i16> [ %vecSrcA.0, %while.body ], [ %vecSrcA.0105, %while.body.preheader ] %vecSrcB.0114 = phi <8 x i16> [ %vecSrcB.0, %while.body ], [ %vecSrcB.0103, %while.body.preheader ] - %vecSrcB.0.in.in113 = phi i16* [ %add.ptr3, %while.body ], [ %pSrcB, %while.body.preheader ] - %vecSrcA.0.in.in112 = phi i16* [ %add.ptr2, %while.body ], [ %pSrcA, %while.body.preheader ] + %vecSrcB.0.in.in113 = phi ptr [ %add.ptr3, %while.body ], [ %pSrcB, %while.body.preheader ] + %vecSrcA.0.in.in112 = phi ptr [ %add.ptr2, %while.body ], [ %pSrcA, %while.body.preheader ] %accImag.0.off32111 = phi i32 [ %15, %while.body ], [ 0, %while.body.preheader ] %accImag.0.off0110 = phi i32 [ %16, %while.body ], [ 0, %while.body.preheader ] %accReal.0.off32109 = phi i32 [ %12, %while.body ], [ 0, %while.body.preheader ] %accReal.0.off0108 = phi i32 [ %13, %while.body ], [ 0, %while.body.preheader ] %blkCnt.0107 = phi i32 [ %dec, %while.body ], [ %shr, %while.body.preheader ] - %pSrcB.addr.0 = getelementptr inbounds i16, i16* %vecSrcB.0.in.in113, i32 8 - %pSrcA.addr.0 = getelementptr inbounds i16, i16* %vecSrcA.0.in.in112, i32 8 + %pSrcB.addr.0 = getelementptr inbounds i16, ptr %vecSrcB.0.in.in113, i32 8 + %pSrcA.addr.0 = getelementptr inbounds i16, ptr %vecSrcA.0.in.in112, i32 8 %1 = tail call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 1, i32 0, i32 %accReal.0.off0108, i32 %accReal.0.off32109, <8 x i16> %vecSrcA.0115, <8 x i16> %vecSrcB.0114) %2 = extractvalue { i32, i32 } %1, 1 %3 = extractvalue { i32, i32 } %1, 0 - %4 = bitcast i16* %pSrcA.addr.0 to <8 x i16>* - %5 = load <8 x i16>, <8 x i16>* %4, align 2 - %add.ptr2 = getelementptr inbounds i16, i16* %vecSrcA.0.in.in112, i32 16 + %4 = bitcast ptr %pSrcA.addr.0 to ptr + %5 = load <8 x i16>, ptr %4, align 2 + %add.ptr2 = getelementptr inbounds i16, ptr %vecSrcA.0.in.in112, i32 16 %6 = tail call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 1, i32 %accImag.0.off0110, i32 %accImag.0.off32111, <8 x i16> %vecSrcA.0115, <8 x i16> %vecSrcB.0114) %7 = extractvalue { i32, i32 } %6, 1 %8 = extractvalue { i32, i32 } %6, 0 - %9 = bitcast i16* %pSrcB.addr.0 to <8 x i16>* - %10 = load <8 x i16>, <8 x i16>* %9, align 2 - %add.ptr3 = getelementptr inbounds i16, i16* %vecSrcB.0.in.in113, i32 16 + %9 = bitcast ptr %pSrcB.addr.0 to ptr + %10 = load <8 x i16>, ptr %9, align 2 + %add.ptr3 = getelementptr inbounds i16, ptr %vecSrcB.0.in.in113, i32 16 %11 = tail call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 1, i32 0, i32 %3, i32 %2, <8 x i16> %5, <8 x i16> %10) %12 = extractvalue { i32, i32 } %11, 1 %13 = extractvalue { i32, i32 } %11, 0 @@ -172,15 +172,15 @@ while.body: ; preds = %while.body.preheade %15 = extractvalue { i32, i32 } %14, 1 %16 = extractvalue { i32, i32 } %14, 0 %dec = add nsw i32 %blkCnt.0107, -1 - %vecSrcB.0.in = bitcast i16* %add.ptr3 to <8 x i16>* - %vecSrcB.0 = load <8 x i16>, <8 x i16>* %vecSrcB.0.in, align 2 - %vecSrcA.0.in = bitcast i16* %add.ptr2 to <8 x i16>* - %vecSrcA.0 = load <8 x i16>, <8 x i16>* %vecSrcA.0.in, align 2 + %vecSrcB.0.in = bitcast ptr %add.ptr3 to ptr + %vecSrcB.0 = load <8 x i16>, ptr %vecSrcB.0.in, align 2 + %vecSrcA.0.in = bitcast ptr %add.ptr2 to ptr + %vecSrcA.0 = load <8 x i16>, ptr %vecSrcA.0.in, align 2 %cmp = icmp eq i32 %dec, 0 br i1 %cmp, label %while.cond.while.end_crit_edge, label %while.body while.cond.while.end_crit_edge: ; preds = %while.body - %scevgep136 = getelementptr i16, i16* %pSrcB, i32 %0 + %scevgep136 = getelementptr i16, ptr %pSrcB, i32 %0 br label %while.end while.end: ; preds = %while.cond.while.end_crit_edge, %entry @@ -188,8 +188,8 @@ while.end: ; preds = %while.cond.while.en %accReal.0.off32.lcssa = phi i32 [ %12, %while.cond.while.end_crit_edge ], [ 0, %entry ] %accImag.0.off0.lcssa = phi i32 [ %16, %while.cond.while.end_crit_edge ], [ 0, %entry ] %accImag.0.off32.lcssa = phi i32 [ %15, %while.cond.while.end_crit_edge ], [ 0, %entry ] - %vecSrcA.0.in.in.lcssa = phi i16* [ %scevgep, %while.cond.while.end_crit_edge ], [ %pSrcA, %entry ] - %vecSrcB.0.in.in.lcssa = phi i16* [ %scevgep136, %while.cond.while.end_crit_edge ], [ %pSrcB, %entry ] + %vecSrcA.0.in.in.lcssa = phi ptr [ %scevgep, %while.cond.while.end_crit_edge ], [ %pSrcA, %entry ] + %vecSrcB.0.in.in.lcssa = phi ptr [ %scevgep136, %while.cond.while.end_crit_edge ], [ %pSrcB, %entry ] %vecSrcB.0.lcssa = phi <8 x i16> [ %vecSrcB.0, %while.cond.while.end_crit_edge ], [ %vecSrcB.0103, %entry ] %vecSrcA.0.lcssa = phi <8 x i16> [ %vecSrcA.0, %while.cond.while.end_crit_edge ], [ %vecSrcA.0105, %entry ] %17 = tail call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 1, i32 0, i32 %accReal.0.off0.lcssa, i32 %accReal.0.off32.lcssa, <8 x i16> %vecSrcA.0.lcssa, <8 x i16> %vecSrcB.0.lcssa) @@ -215,19 +215,19 @@ while.end: ; preds = %while.cond.while.en br i1 %cmp1095, label %while.end34, label %while.body11 while.body11: ; preds = %while.end, %while.body11 - %pSrcA.addr.1100 = phi i16* [ %incdec.ptr12, %while.body11 ], [ %vecSrcA.0.in.in.lcssa, %while.end ] - %pSrcB.addr.199 = phi i16* [ %incdec.ptr14, %while.body11 ], [ %vecSrcB.0.in.in.lcssa, %while.end ] + %pSrcA.addr.1100 = phi ptr [ %incdec.ptr12, %while.body11 ], [ %vecSrcA.0.in.in.lcssa, %while.end ] + %pSrcB.addr.199 = phi ptr [ %incdec.ptr14, %while.body11 ], [ %vecSrcB.0.in.in.lcssa, %while.end ] %accImag.198 = phi i64 [ %add32, %while.body11 ], [ %30, %while.end ] %accReal.197 = phi i64 [ %sub27, %while.body11 ], [ %23, %while.end ] %blkCnt.196 = phi i32 [ %dec33, %while.body11 ], [ %shr8, %while.end ] - %incdec.ptr = getelementptr inbounds i16, i16* %pSrcA.addr.1100, i32 1 - %31 = load i16, i16* %pSrcA.addr.1100, align 2 - %incdec.ptr12 = getelementptr inbounds i16, i16* %pSrcA.addr.1100, i32 2 - %32 = load i16, i16* %incdec.ptr, align 2 - %incdec.ptr13 = getelementptr inbounds i16, i16* %pSrcB.addr.199, i32 1 - %33 = load i16, i16* %pSrcB.addr.199, align 2 - %incdec.ptr14 = getelementptr inbounds i16, i16* %pSrcB.addr.199, i32 2 - %34 = load i16, i16* %incdec.ptr13, align 2 + %incdec.ptr = getelementptr inbounds i16, ptr %pSrcA.addr.1100, i32 1 + %31 = load i16, ptr %pSrcA.addr.1100, align 2 + %incdec.ptr12 = getelementptr inbounds i16, ptr %pSrcA.addr.1100, i32 2 + %32 = load i16, ptr %incdec.ptr, align 2 + %incdec.ptr13 = getelementptr inbounds i16, ptr %pSrcB.addr.199, i32 1 + %33 = load i16, ptr %pSrcB.addr.199, align 2 + %incdec.ptr14 = getelementptr inbounds i16, ptr %pSrcB.addr.199, i32 2 + %34 = load i16, ptr %incdec.ptr13, align 2 %conv = sext i16 %31 to i32 %conv15 = sext i16 %33 to i32 %mul16 = mul nsw i32 %conv15, %conv @@ -258,13 +258,13 @@ while.end34.loopexit: ; preds = %while.body11 while.end34: ; preds = %while.end34.loopexit, %while.end %accReal.1.lcssa.off6 = phi i32 [ %extract.t, %while.end ], [ %extract.t128, %while.end34.loopexit ] %accImag.1.lcssa.off6 = phi i32 [ %extract.t130, %while.end ], [ %extract.t132, %while.end34.loopexit ] - store i32 %accReal.1.lcssa.off6, i32* %realResult, align 4 - store i32 %accImag.1.lcssa.off6, i32* %imagResult, align 4 + store i32 %accReal.1.lcssa.off6, ptr %realResult, align 4 + store i32 %accImag.1.lcssa.off6, ptr %imagResult, align 4 ret void } -define void @fma8(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %n) { +define void @fma8(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %n) { ; CHECK-LABEL: fma8: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, lr} @@ -335,18 +335,18 @@ vector.ph: ; preds = %for.body.preheader vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %0 = getelementptr inbounds float, float* %A, i32 %index - %1 = bitcast float* %0 to <8 x float>* - %wide.load = load <8 x float>, <8 x float>* %1, align 4 - %2 = getelementptr inbounds float, float* %B, i32 %index - %3 = bitcast float* %2 to <8 x float>* - %wide.load10 = load <8 x float>, <8 x float>* %3, align 4 + %0 = getelementptr inbounds float, ptr %A, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.load = load <8 x float>, ptr %1, align 4 + %2 = getelementptr inbounds float, ptr %B, i32 %index + %3 = bitcast ptr %2 to ptr + %wide.load10 = load <8 x float>, ptr %3, align 4 %4 = fmul fast <8 x float> %wide.load10, %wide.load - %5 = getelementptr inbounds float, float* %C, i32 %index - %6 = bitcast float* %5 to <8 x float>* - %wide.load11 = load <8 x float>, <8 x float>* %6, align 4 + %5 = getelementptr inbounds float, ptr %C, i32 %index + %6 = bitcast ptr %5 to ptr + %wide.load11 = load <8 x float>, ptr %6, align 4 %7 = fadd fast <8 x float> %wide.load11, %4 - store <8 x float> %7, <8 x float>* %6, align 4 + store <8 x float> %7, ptr %6, align 4 %index.next = add i32 %index, 8 %8 = icmp eq i32 %index.next, %n.vec br i1 %8, label %middle.block, label %vector.body @@ -360,15 +360,15 @@ for.cond.cleanup: ; preds = %for.body, %middle.b for.body: ; preds = %for.body.preheader12, %for.body %i.09 = phi i32 [ %inc, %for.body ], [ %i.09.ph, %for.body.preheader12 ] - %arrayidx = getelementptr inbounds float, float* %A, i32 %i.09 - %9 = load float, float* %arrayidx, align 4 - %arrayidx1 = getelementptr inbounds float, float* %B, i32 %i.09 - %10 = load float, float* %arrayidx1, align 4 + %arrayidx = getelementptr inbounds float, ptr %A, i32 %i.09 + %9 = load float, ptr %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds float, ptr %B, i32 %i.09 + %10 = load float, ptr %arrayidx1, align 4 %mul = fmul fast float %10, %9 - %arrayidx2 = getelementptr inbounds float, float* %C, i32 %i.09 - %11 = load float, float* %arrayidx2, align 4 + %arrayidx2 = getelementptr inbounds float, ptr %C, i32 %i.09 + %11 = load float, ptr %arrayidx2, align 4 %add = fadd fast float %11, %mul - store float %add, float* %arrayidx2, align 4 + store float %add, ptr %arrayidx2, align 4 %inc = add nuw nsw i32 %i.09, 1 %exitcond = icmp eq i32 %inc, %n br i1 %exitcond, label %for.cond.cleanup, label %for.body diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll index 2aa183c..82a186b 100644 --- a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll @@ -4,7 +4,7 @@ ; Check some LSR loop postinc ; fma loop with a destination that is the same as one of the sources -define void @fma(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %n) { +define void @fma(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %n) { ; CHECK-LABEL: fma: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, lr} @@ -70,19 +70,19 @@ vector.ph: ; preds = %for.body.preheader vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %0 = getelementptr inbounds float, float* %A, i32 %index - %1 = bitcast float* %0 to <4 x float>* - %wide.load = load <4 x float>, <4 x float>* %1, align 4 - %2 = getelementptr inbounds float, float* %B, i32 %index - %3 = bitcast float* %2 to <4 x float>* - %wide.load10 = load <4 x float>, <4 x float>* %3, align 4 + %0 = getelementptr inbounds float, ptr %A, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.load = load <4 x float>, ptr %1, align 4 + %2 = getelementptr inbounds float, ptr %B, i32 %index + %3 = bitcast ptr %2 to ptr + %wide.load10 = load <4 x float>, ptr %3, align 4 %4 = fmul fast <4 x float> %wide.load10, %wide.load - %5 = getelementptr inbounds float, float* %C, i32 %index - %6 = bitcast float* %5 to <4 x float>* - %wide.load11 = load <4 x float>, <4 x float>* %6, align 4 + %5 = getelementptr inbounds float, ptr %C, i32 %index + %6 = bitcast ptr %5 to ptr + %wide.load11 = load <4 x float>, ptr %6, align 4 %7 = fadd fast <4 x float> %wide.load11, %4 - %8 = bitcast float* %5 to <4 x float>* - store <4 x float> %7, <4 x float>* %8, align 4 + %8 = bitcast ptr %5 to ptr + store <4 x float> %7, ptr %8, align 4 %index.next = add i32 %index, 4 %9 = icmp eq i32 %index.next, %n.vec br i1 %9, label %middle.block, label %vector.body @@ -96,15 +96,15 @@ for.cond.cleanup: ; preds = %for.body, %middle.b for.body: ; preds = %for.body.preheader12, %for.body %i.09 = phi i32 [ %inc, %for.body ], [ %i.09.ph, %for.body.preheader12 ] - %arrayidx = getelementptr inbounds float, float* %A, i32 %i.09 - %10 = load float, float* %arrayidx, align 4 - %arrayidx1 = getelementptr inbounds float, float* %B, i32 %i.09 - %11 = load float, float* %arrayidx1, align 4 + %arrayidx = getelementptr inbounds float, ptr %A, i32 %i.09 + %10 = load float, ptr %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds float, ptr %B, i32 %i.09 + %11 = load float, ptr %arrayidx1, align 4 %mul = fmul fast float %11, %10 - %arrayidx2 = getelementptr inbounds float, float* %C, i32 %i.09 - %12 = load float, float* %arrayidx2, align 4 + %arrayidx2 = getelementptr inbounds float, ptr %C, i32 %i.09 + %12 = load float, ptr %arrayidx2, align 4 %add = fadd fast float %12, %mul - store float %add, float* %arrayidx2, align 4 + store float %add, ptr %arrayidx2, align 4 %inc = add nuw nsw i32 %i.09, 1 %exitcond = icmp eq i32 %inc, %n br i1 %exitcond, label %for.cond.cleanup, label %for.body @@ -113,7 +113,7 @@ for.body: ; preds = %for.body.preheader1 ; Same as above but tail predicated ; FIXME: The postinc here is put on the load, not the store. An extra mov is needed in the loop because of it. -define void @fma_tailpred(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %n) { +define void @fma_tailpred(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %n) { ; CHECK-LABEL: fma_tailpred: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, lr} @@ -173,20 +173,20 @@ vector.body: ; preds = %vector.body, %vecto %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer %induction = or <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3> - %0 = getelementptr inbounds float, float* %A, i32 %index + %0 = getelementptr inbounds float, ptr %A, i32 %index %1 = icmp ule <4 x i32> %induction, %broadcast.splat11 - %2 = bitcast float* %0 to <4 x float>* - %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> undef) - %3 = getelementptr inbounds float, float* %B, i32 %index - %4 = bitcast float* %3 to <4 x float>* - %wide.masked.load12 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %4, i32 4, <4 x i1> %1, <4 x float> undef) + %2 = bitcast ptr %0 to ptr + %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %2, i32 4, <4 x i1> %1, <4 x float> undef) + %3 = getelementptr inbounds float, ptr %B, i32 %index + %4 = bitcast ptr %3 to ptr + %wide.masked.load12 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %4, i32 4, <4 x i1> %1, <4 x float> undef) %5 = fmul fast <4 x float> %wide.masked.load12, %wide.masked.load - %6 = getelementptr inbounds float, float* %C, i32 %index - %7 = bitcast float* %6 to <4 x float>* - %wide.masked.load13 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %7, i32 4, <4 x i1> %1, <4 x float> undef) + %6 = getelementptr inbounds float, ptr %C, i32 %index + %7 = bitcast ptr %6 to ptr + %wide.masked.load13 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %7, i32 4, <4 x i1> %1, <4 x float> undef) %8 = fadd fast <4 x float> %wide.masked.load13, %5 - %9 = bitcast float* %6 to <4 x float>* - call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %8, <4 x float>* %9, i32 4, <4 x i1> %1) + %9 = bitcast ptr %6 to ptr + call void @llvm.masked.store.v4f32.p0(<4 x float> %8, ptr %9, i32 4, <4 x i1> %1) %index.next = add i32 %index, 4 %10 = icmp eq i32 %index.next, %n.vec br i1 %10, label %for.cond.cleanup, label %vector.body @@ -197,7 +197,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry ; Multiple loads of the loop with a common base -define i8* @test(i8* nocapture readonly %input_row, i8* nocapture readonly %input_col, i16 zeroext %output_ch, i16 zeroext %num_cols, i32 %col_offset, i16 signext %activation_min, i16 zeroext %row_len, i32* nocapture readonly %bias, i8* returned %out) { +define ptr @test(ptr nocapture readonly %input_row, ptr nocapture readonly %input_col, i16 zeroext %output_ch, i16 zeroext %num_cols, i32 %col_offset, i16 signext %activation_min, i16 zeroext %row_len, ptr nocapture readonly %bias, ptr returned %out) { ; CHECK-LABEL: test: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -291,11 +291,11 @@ for.cond.preheader: ; preds = %entry for.body.lr.ph: ; preds = %for.cond.preheader %conv5 = zext i16 %row_len to i32 - %add.ptr9 = getelementptr inbounds i8, i8* %input_col, i32 %conv5 + %add.ptr9 = getelementptr inbounds i8, ptr %input_col, i32 %conv5 %mul11 = shl nuw nsw i32 %conv5, 1 - %add.ptr12 = getelementptr inbounds i8, i8* %input_col, i32 %mul11 + %add.ptr12 = getelementptr inbounds i8, ptr %input_col, i32 %mul11 %mul14 = mul nuw nsw i32 %conv5, 3 - %add.ptr15 = getelementptr inbounds i8, i8* %input_col, i32 %mul14 + %add.ptr15 = getelementptr inbounds i8, ptr %input_col, i32 %mul14 %add = add nuw nsw i32 %conv5, 7 %div = lshr i32 %add, 3 %conv25 = trunc i32 %col_offset to i16 @@ -306,14 +306,14 @@ for.body.lr.ph: ; preds = %for.cond.preheader for.body: ; preds = %for.cond.cleanup23, %for.body.lr.ph %i_out_ch.0116 = phi i32 [ 0, %for.body.lr.ph ], [ %inc37, %for.cond.cleanup23 ] %i_row_loop.0115 = phi i32 [ undef, %for.body.lr.ph ], [ %i_row_loop.1.lcssa, %for.cond.cleanup23 ] - %arrayidx = getelementptr inbounds i32, i32* %bias, i32 %i_out_ch.0116 - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %bias, i32 %i_out_ch.0116 + %0 = load i32, ptr %arrayidx, align 4 %cmp2199 = icmp slt i32 %i_row_loop.0115, %div br i1 %cmp2199, label %for.body24.preheader, label %for.cond.cleanup23 for.body24.preheader: ; preds = %for.body %mul = mul nuw nsw i32 %i_out_ch.0116, %conv5 - %add.ptr = getelementptr inbounds i8, i8* %input_row, i32 %mul + %add.ptr = getelementptr inbounds i8, ptr %input_row, i32 %mul br label %for.body24 for.cond.cleanup23: ; preds = %for.body24, %for.body @@ -326,46 +326,46 @@ for.cond.cleanup23: ; preds = %for.body24, %for.bo %add32 = add nsw i32 %add31, %acc_2.0.lcssa %add33 = add nsw i32 %add32, %acc_3.0.lcssa %conv34 = trunc i32 %add33 to i8 - %arrayidx35 = getelementptr inbounds i8, i8* %out, i32 %i_out_ch.0116 - store i8 %conv34, i8* %arrayidx35, align 1 + %arrayidx35 = getelementptr inbounds i8, ptr %out, i32 %i_out_ch.0116 + store i8 %conv34, ptr %arrayidx35, align 1 %inc37 = add nuw nsw i32 %i_out_ch.0116, 1 %exitcond120 = icmp eq i32 %inc37, %conv2 br i1 %exitcond120, label %if.end, label %for.body for.body24: ; preds = %for.body24, %for.body24.preheader - %ip_r0.0109 = phi i8* [ %add.ptr26, %for.body24 ], [ %add.ptr, %for.body24.preheader ] - %ip_c0.0108 = phi i8* [ %add.ptr27, %for.body24 ], [ %input_col, %for.body24.preheader ] - %ip_c1.0107 = phi i8* [ %add.ptr28, %for.body24 ], [ %add.ptr9, %for.body24.preheader ] - %ip_c2.0106 = phi i8* [ %add.ptr29, %for.body24 ], [ %add.ptr12, %for.body24.preheader ] + %ip_r0.0109 = phi ptr [ %add.ptr26, %for.body24 ], [ %add.ptr, %for.body24.preheader ] + %ip_c0.0108 = phi ptr [ %add.ptr27, %for.body24 ], [ %input_col, %for.body24.preheader ] + %ip_c1.0107 = phi ptr [ %add.ptr28, %for.body24 ], [ %add.ptr9, %for.body24.preheader ] + %ip_c2.0106 = phi ptr [ %add.ptr29, %for.body24 ], [ %add.ptr12, %for.body24.preheader ] %i_row_loop.1105 = phi i32 [ %inc, %for.body24 ], [ %i_row_loop.0115, %for.body24.preheader ] - %ip_c3.0104 = phi i8* [ %add.ptr30, %for.body24 ], [ %add.ptr15, %for.body24.preheader ] + %ip_c3.0104 = phi ptr [ %add.ptr30, %for.body24 ], [ %add.ptr15, %for.body24.preheader ] %acc_3.0103 = phi i32 [ %23, %for.body24 ], [ %0, %for.body24.preheader ] %acc_2.0102 = phi i32 [ %22, %for.body24 ], [ %0, %for.body24.preheader ] %acc_1.0101 = phi i32 [ %21, %for.body24 ], [ %0, %for.body24.preheader ] %acc_0.0100 = phi i32 [ %20, %for.body24 ], [ %0, %for.body24.preheader ] - %1 = bitcast i8* %ip_r0.0109 to <8 x i8>* - %2 = load <8 x i8>, <8 x i8>* %1, align 1 + %1 = bitcast ptr %ip_r0.0109 to ptr + %2 = load <8 x i8>, ptr %1, align 1 %3 = sext <8 x i8> %2 to <8 x i16> - %add.ptr26 = getelementptr inbounds i8, i8* %ip_r0.0109, i32 8 - %4 = bitcast i8* %ip_c0.0108 to <8 x i8>* - %5 = load <8 x i8>, <8 x i8>* %4, align 1 + %add.ptr26 = getelementptr inbounds i8, ptr %ip_r0.0109, i32 8 + %4 = bitcast ptr %ip_c0.0108 to ptr + %5 = load <8 x i8>, ptr %4, align 1 %6 = sext <8 x i8> %5 to <8 x i16> - %add.ptr27 = getelementptr inbounds i8, i8* %ip_c0.0108, i32 8 + %add.ptr27 = getelementptr inbounds i8, ptr %ip_c0.0108, i32 8 %7 = add <8 x i16> %.splat, %6 - %8 = bitcast i8* %ip_c1.0107 to <8 x i8>* - %9 = load <8 x i8>, <8 x i8>* %8, align 1 + %8 = bitcast ptr %ip_c1.0107 to ptr + %9 = load <8 x i8>, ptr %8, align 1 %10 = sext <8 x i8> %9 to <8 x i16> - %add.ptr28 = getelementptr inbounds i8, i8* %ip_c1.0107, i32 8 + %add.ptr28 = getelementptr inbounds i8, ptr %ip_c1.0107, i32 8 %11 = add <8 x i16> %.splat, %10 - %12 = bitcast i8* %ip_c2.0106 to <8 x i8>* - %13 = load <8 x i8>, <8 x i8>* %12, align 1 + %12 = bitcast ptr %ip_c2.0106 to ptr + %13 = load <8 x i8>, ptr %12, align 1 %14 = sext <8 x i8> %13 to <8 x i16> - %add.ptr29 = getelementptr inbounds i8, i8* %ip_c2.0106, i32 8 + %add.ptr29 = getelementptr inbounds i8, ptr %ip_c2.0106, i32 8 %15 = add <8 x i16> %.splat, %14 - %16 = bitcast i8* %ip_c3.0104 to <8 x i8>* - %17 = load <8 x i8>, <8 x i8>* %16, align 1 + %16 = bitcast ptr %ip_c3.0104 to ptr + %17 = load <8 x i8>, ptr %16, align 1 %18 = sext <8 x i8> %17 to <8 x i16> - %add.ptr30 = getelementptr inbounds i8, i8* %ip_c3.0104, i32 8 + %add.ptr30 = getelementptr inbounds i8, ptr %ip_c3.0104, i32 8 %19 = add <8 x i16> %.splat, %18 %20 = tail call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 0, i32 0, i32 %acc_0.0100, <8 x i16> %3, <8 x i16> %7) %21 = tail call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 0, i32 0, i32 %acc_1.0101, <8 x i16> %3, <8 x i16> %11) @@ -376,11 +376,11 @@ for.body24: ; preds = %for.body24, %for.bo br i1 %exitcond, label %for.cond.cleanup23, label %for.body24 if.end: ; preds = %for.cond.cleanup23, %for.cond.preheader, %entry - ret i8* %out + ret ptr %out } ; Same as above with optsize -define i8* @test_optsize(i8* nocapture readonly %input_row, i8* nocapture readonly %input_col, i16 zeroext %output_ch, i16 zeroext %num_cols, i32 %col_offset, i16 signext %activation_min, i16 zeroext %row_len, i32* nocapture readonly %bias, i8* returned %out) optsize { +define ptr @test_optsize(ptr nocapture readonly %input_row, ptr nocapture readonly %input_col, i16 zeroext %output_ch, i16 zeroext %num_cols, i32 %col_offset, i16 signext %activation_min, i16 zeroext %row_len, ptr nocapture readonly %bias, ptr returned %out) optsize { ; CHECK-LABEL: test_optsize: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -473,11 +473,11 @@ for.cond.preheader: ; preds = %entry for.body.lr.ph: ; preds = %for.cond.preheader %conv5 = zext i16 %row_len to i32 - %add.ptr9 = getelementptr inbounds i8, i8* %input_col, i32 %conv5 + %add.ptr9 = getelementptr inbounds i8, ptr %input_col, i32 %conv5 %mul11 = shl nuw nsw i32 %conv5, 1 - %add.ptr12 = getelementptr inbounds i8, i8* %input_col, i32 %mul11 + %add.ptr12 = getelementptr inbounds i8, ptr %input_col, i32 %mul11 %mul14 = mul nuw nsw i32 %conv5, 3 - %add.ptr15 = getelementptr inbounds i8, i8* %input_col, i32 %mul14 + %add.ptr15 = getelementptr inbounds i8, ptr %input_col, i32 %mul14 %add = add nuw nsw i32 %conv5, 7 %div = lshr i32 %add, 3 %conv25 = trunc i32 %col_offset to i16 @@ -488,14 +488,14 @@ for.body.lr.ph: ; preds = %for.cond.preheader for.body: ; preds = %for.cond.cleanup23, %for.body.lr.ph %i_out_ch.0116 = phi i32 [ 0, %for.body.lr.ph ], [ %inc37, %for.cond.cleanup23 ] %i_row_loop.0115 = phi i32 [ undef, %for.body.lr.ph ], [ %i_row_loop.1.lcssa, %for.cond.cleanup23 ] - %arrayidx = getelementptr inbounds i32, i32* %bias, i32 %i_out_ch.0116 - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %bias, i32 %i_out_ch.0116 + %0 = load i32, ptr %arrayidx, align 4 %cmp2199 = icmp slt i32 %i_row_loop.0115, %div br i1 %cmp2199, label %for.body24.preheader, label %for.cond.cleanup23 for.body24.preheader: ; preds = %for.body %mul = mul nuw nsw i32 %i_out_ch.0116, %conv5 - %add.ptr = getelementptr inbounds i8, i8* %input_row, i32 %mul + %add.ptr = getelementptr inbounds i8, ptr %input_row, i32 %mul br label %for.body24 for.cond.cleanup23: ; preds = %for.body24, %for.body @@ -508,46 +508,46 @@ for.cond.cleanup23: ; preds = %for.body24, %for.bo %add32 = add nsw i32 %add31, %acc_2.0.lcssa %add33 = add nsw i32 %add32, %acc_3.0.lcssa %conv34 = trunc i32 %add33 to i8 - %arrayidx35 = getelementptr inbounds i8, i8* %out, i32 %i_out_ch.0116 - store i8 %conv34, i8* %arrayidx35, align 1 + %arrayidx35 = getelementptr inbounds i8, ptr %out, i32 %i_out_ch.0116 + store i8 %conv34, ptr %arrayidx35, align 1 %inc37 = add nuw nsw i32 %i_out_ch.0116, 1 %exitcond120 = icmp eq i32 %inc37, %conv2 br i1 %exitcond120, label %if.end, label %for.body for.body24: ; preds = %for.body24, %for.body24.preheader - %ip_r0.0109 = phi i8* [ %add.ptr26, %for.body24 ], [ %add.ptr, %for.body24.preheader ] - %ip_c0.0108 = phi i8* [ %add.ptr27, %for.body24 ], [ %input_col, %for.body24.preheader ] - %ip_c1.0107 = phi i8* [ %add.ptr28, %for.body24 ], [ %add.ptr9, %for.body24.preheader ] - %ip_c2.0106 = phi i8* [ %add.ptr29, %for.body24 ], [ %add.ptr12, %for.body24.preheader ] + %ip_r0.0109 = phi ptr [ %add.ptr26, %for.body24 ], [ %add.ptr, %for.body24.preheader ] + %ip_c0.0108 = phi ptr [ %add.ptr27, %for.body24 ], [ %input_col, %for.body24.preheader ] + %ip_c1.0107 = phi ptr [ %add.ptr28, %for.body24 ], [ %add.ptr9, %for.body24.preheader ] + %ip_c2.0106 = phi ptr [ %add.ptr29, %for.body24 ], [ %add.ptr12, %for.body24.preheader ] %i_row_loop.1105 = phi i32 [ %inc, %for.body24 ], [ %i_row_loop.0115, %for.body24.preheader ] - %ip_c3.0104 = phi i8* [ %add.ptr30, %for.body24 ], [ %add.ptr15, %for.body24.preheader ] + %ip_c3.0104 = phi ptr [ %add.ptr30, %for.body24 ], [ %add.ptr15, %for.body24.preheader ] %acc_3.0103 = phi i32 [ %23, %for.body24 ], [ %0, %for.body24.preheader ] %acc_2.0102 = phi i32 [ %22, %for.body24 ], [ %0, %for.body24.preheader ] %acc_1.0101 = phi i32 [ %21, %for.body24 ], [ %0, %for.body24.preheader ] %acc_0.0100 = phi i32 [ %20, %for.body24 ], [ %0, %for.body24.preheader ] - %1 = bitcast i8* %ip_r0.0109 to <8 x i8>* - %2 = load <8 x i8>, <8 x i8>* %1, align 1 + %1 = bitcast ptr %ip_r0.0109 to ptr + %2 = load <8 x i8>, ptr %1, align 1 %3 = sext <8 x i8> %2 to <8 x i16> - %add.ptr26 = getelementptr inbounds i8, i8* %ip_r0.0109, i32 8 - %4 = bitcast i8* %ip_c0.0108 to <8 x i8>* - %5 = load <8 x i8>, <8 x i8>* %4, align 1 + %add.ptr26 = getelementptr inbounds i8, ptr %ip_r0.0109, i32 8 + %4 = bitcast ptr %ip_c0.0108 to ptr + %5 = load <8 x i8>, ptr %4, align 1 %6 = sext <8 x i8> %5 to <8 x i16> - %add.ptr27 = getelementptr inbounds i8, i8* %ip_c0.0108, i32 8 + %add.ptr27 = getelementptr inbounds i8, ptr %ip_c0.0108, i32 8 %7 = add <8 x i16> %.splat, %6 - %8 = bitcast i8* %ip_c1.0107 to <8 x i8>* - %9 = load <8 x i8>, <8 x i8>* %8, align 1 + %8 = bitcast ptr %ip_c1.0107 to ptr + %9 = load <8 x i8>, ptr %8, align 1 %10 = sext <8 x i8> %9 to <8 x i16> - %add.ptr28 = getelementptr inbounds i8, i8* %ip_c1.0107, i32 8 + %add.ptr28 = getelementptr inbounds i8, ptr %ip_c1.0107, i32 8 %11 = add <8 x i16> %.splat, %10 - %12 = bitcast i8* %ip_c2.0106 to <8 x i8>* - %13 = load <8 x i8>, <8 x i8>* %12, align 1 + %12 = bitcast ptr %ip_c2.0106 to ptr + %13 = load <8 x i8>, ptr %12, align 1 %14 = sext <8 x i8> %13 to <8 x i16> - %add.ptr29 = getelementptr inbounds i8, i8* %ip_c2.0106, i32 8 + %add.ptr29 = getelementptr inbounds i8, ptr %ip_c2.0106, i32 8 %15 = add <8 x i16> %.splat, %14 - %16 = bitcast i8* %ip_c3.0104 to <8 x i8>* - %17 = load <8 x i8>, <8 x i8>* %16, align 1 + %16 = bitcast ptr %ip_c3.0104 to ptr + %17 = load <8 x i8>, ptr %16, align 1 %18 = sext <8 x i8> %17 to <8 x i16> - %add.ptr30 = getelementptr inbounds i8, i8* %ip_c3.0104, i32 8 + %add.ptr30 = getelementptr inbounds i8, ptr %ip_c3.0104, i32 8 %19 = add <8 x i16> %.splat, %18 %20 = tail call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 0, i32 0, i32 %acc_0.0100, <8 x i16> %3, <8 x i16> %7) %21 = tail call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 0, i32 0, i32 %acc_1.0101, <8 x i16> %3, <8 x i16> %11) @@ -558,12 +558,12 @@ for.body24: ; preds = %for.body24, %for.bo br i1 %exitcond, label %for.cond.cleanup23, label %for.body24 if.end: ; preds = %for.cond.cleanup23, %for.cond.preheader, %entry - ret i8* %out + ret ptr %out } ; Similar but predicated -define i32 @arm_nn_mat_mul_core_4x_s8(i32 %row_elements, i32 %offset, i8* %row_base, i8* %col_base, i32* nocapture readnone %sum_col, i32* nocapture %output) { +define i32 @arm_nn_mat_mul_core_4x_s8(i32 %row_elements, i32 %offset, ptr %row_base, ptr %col_base, ptr nocapture readnone %sum_col, ptr nocapture %output) { ; CHECK-LABEL: arm_nn_mat_mul_core_4x_s8: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r10, lr} @@ -612,10 +612,10 @@ entry: for.body.preheader: ; preds = %entry %mul2 = mul nsw i32 %offset, 3 - %add.ptr3 = getelementptr inbounds i8, i8* %row_base, i32 %mul2 + %add.ptr3 = getelementptr inbounds i8, ptr %row_base, i32 %mul2 %mul = shl nsw i32 %offset, 1 - %add.ptr1 = getelementptr inbounds i8, i8* %row_base, i32 %mul - %add.ptr = getelementptr inbounds i8, i8* %row_base, i32 %offset + %add.ptr1 = getelementptr inbounds i8, ptr %row_base, i32 %mul + %add.ptr = getelementptr inbounds i8, ptr %row_base, i32 %offset %0 = icmp sgt i32 %div, 1 %smax = select i1 %0, i32 %div, i32 1 br label %for.body @@ -625,54 +625,54 @@ for.cond.cleanup: ; preds = %for.body, %entry %acc_n.sroa.9.0.lcssa = phi i32 [ 0, %entry ], [ %12, %for.body ] %acc_n.sroa.6.0.lcssa = phi i32 [ 0, %entry ], [ %9, %for.body ] %acc_n.sroa.0.0.lcssa = phi i32 [ 0, %entry ], [ %6, %for.body ] - store i32 %acc_n.sroa.0.0.lcssa, i32* %output, align 4 - %arrayidx19 = getelementptr inbounds i32, i32* %output, i32 1 - store i32 %acc_n.sroa.6.0.lcssa, i32* %arrayidx19, align 4 - %arrayidx21 = getelementptr inbounds i32, i32* %output, i32 2 - store i32 %acc_n.sroa.9.0.lcssa, i32* %arrayidx21, align 4 - %arrayidx23 = getelementptr inbounds i32, i32* %output, i32 3 - store i32 %acc_n.sroa.12.0.lcssa, i32* %arrayidx23, align 4 + store i32 %acc_n.sroa.0.0.lcssa, ptr %output, align 4 + %arrayidx19 = getelementptr inbounds i32, ptr %output, i32 1 + store i32 %acc_n.sroa.6.0.lcssa, ptr %arrayidx19, align 4 + %arrayidx21 = getelementptr inbounds i32, ptr %output, i32 2 + store i32 %acc_n.sroa.9.0.lcssa, ptr %arrayidx21, align 4 + %arrayidx23 = getelementptr inbounds i32, ptr %output, i32 3 + store i32 %acc_n.sroa.12.0.lcssa, ptr %arrayidx23, align 4 ret i32 0 for.body: ; preds = %for.body, %for.body.preheader - %col_base.addr.095 = phi i8* [ %add.ptr4, %for.body ], [ %col_base, %for.body.preheader ] + %col_base.addr.095 = phi ptr [ %add.ptr4, %for.body ], [ %col_base, %for.body.preheader ] %acc_n.sroa.0.094 = phi i32 [ %6, %for.body ], [ 0, %for.body.preheader ] %acc_n.sroa.6.093 = phi i32 [ %9, %for.body ], [ 0, %for.body.preheader ] %acc_n.sroa.9.092 = phi i32 [ %12, %for.body ], [ 0, %for.body.preheader ] %i.091 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] %row_elem.090 = phi i32 [ %sub, %for.body ], [ %row_elements, %for.body.preheader ] %acc_n.sroa.12.089 = phi i32 [ %15, %for.body ], [ 0, %for.body.preheader ] - %ip_row_3.088 = phi i8* [ %add.ptr15, %for.body ], [ %add.ptr3, %for.body.preheader ] - %ip_row_2.087 = phi i8* [ %add.ptr14, %for.body ], [ %add.ptr1, %for.body.preheader ] - %ip_row_1.086 = phi i8* [ %add.ptr13, %for.body ], [ %add.ptr, %for.body.preheader ] - %ip_row_0.085 = phi i8* [ %add.ptr12, %for.body ], [ %row_base, %for.body.preheader ] + %ip_row_3.088 = phi ptr [ %add.ptr15, %for.body ], [ %add.ptr3, %for.body.preheader ] + %ip_row_2.087 = phi ptr [ %add.ptr14, %for.body ], [ %add.ptr1, %for.body.preheader ] + %ip_row_1.086 = phi ptr [ %add.ptr13, %for.body ], [ %add.ptr, %for.body.preheader ] + %ip_row_0.085 = phi ptr [ %add.ptr12, %for.body ], [ %row_base, %for.body.preheader ] %1 = tail call <16 x i1> @llvm.arm.mve.vctp8(i32 %row_elem.090) %sub = add nsw i32 %row_elem.090, -16 - %2 = bitcast i8* %col_base.addr.095 to <16 x i8>* - %3 = tail call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %2, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer) - %add.ptr4 = getelementptr inbounds i8, i8* %col_base.addr.095, i32 16 - %4 = bitcast i8* %ip_row_0.085 to <16 x i8>* - %5 = tail call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %4, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer) + %2 = bitcast ptr %col_base.addr.095 to ptr + %3 = tail call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %2, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer) + %add.ptr4 = getelementptr inbounds i8, ptr %col_base.addr.095, i32 16 + %4 = bitcast ptr %ip_row_0.085 to ptr + %5 = tail call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %4, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer) %6 = tail call i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32 0, i32 0, i32 0, i32 %acc_n.sroa.0.094, <16 x i8> %5, <16 x i8> %3, <16 x i1> %1) - %7 = bitcast i8* %ip_row_1.086 to <16 x i8>* - %8 = tail call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %7, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer) + %7 = bitcast ptr %ip_row_1.086 to ptr + %8 = tail call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %7, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer) %9 = tail call i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32 0, i32 0, i32 0, i32 %acc_n.sroa.6.093, <16 x i8> %8, <16 x i8> %3, <16 x i1> %1) - %10 = bitcast i8* %ip_row_2.087 to <16 x i8>* - %11 = tail call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %10, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer) + %10 = bitcast ptr %ip_row_2.087 to ptr + %11 = tail call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %10, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer) %12 = tail call i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32 0, i32 0, i32 0, i32 %acc_n.sroa.9.092, <16 x i8> %11, <16 x i8> %3, <16 x i1> %1) - %13 = bitcast i8* %ip_row_3.088 to <16 x i8>* - %14 = tail call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %13, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer) + %13 = bitcast ptr %ip_row_3.088 to ptr + %14 = tail call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %13, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer) %15 = tail call i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32 0, i32 0, i32 0, i32 %acc_n.sroa.12.089, <16 x i8> %14, <16 x i8> %3, <16 x i1> %1) - %add.ptr12 = getelementptr inbounds i8, i8* %ip_row_0.085, i32 16 - %add.ptr13 = getelementptr inbounds i8, i8* %ip_row_1.086, i32 16 - %add.ptr14 = getelementptr inbounds i8, i8* %ip_row_2.087, i32 16 - %add.ptr15 = getelementptr inbounds i8, i8* %ip_row_3.088, i32 16 + %add.ptr12 = getelementptr inbounds i8, ptr %ip_row_0.085, i32 16 + %add.ptr13 = getelementptr inbounds i8, ptr %ip_row_1.086, i32 16 + %add.ptr14 = getelementptr inbounds i8, ptr %ip_row_2.087, i32 16 + %add.ptr15 = getelementptr inbounds i8, ptr %ip_row_3.088, i32 16 %inc = add nuw nsw i32 %i.091, 1 %exitcond = icmp eq i32 %inc, %smax br i1 %exitcond, label %for.cond.cleanup, label %for.body } -define i8* @signext(i8* %input_row, i8* %input_col, i16 zeroext %output_ch, i16 zeroext %num_cols, i32* nocapture readnone %output_shift, i32* nocapture readnone %output_mult, i32 %out_offset, i32 %col_offset, i32 %row_offset, i16 signext %activation_min, i16 signext %activation_max, i16 zeroext %row_len, i32* nocapture readonly %bias, i8* returned %out) { +define ptr @signext(ptr %input_row, ptr %input_col, i16 zeroext %output_ch, i16 zeroext %num_cols, ptr nocapture readnone %output_shift, ptr nocapture readnone %output_mult, i32 %out_offset, i32 %col_offset, i32 %row_offset, i16 signext %activation_min, i16 signext %activation_max, i16 zeroext %row_len, ptr nocapture readonly %bias, ptr returned %out) { ; CHECK-LABEL: signext: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -770,11 +770,11 @@ for.cond.preheader: ; preds = %entry for.body.lr.ph: ; preds = %for.cond.preheader %conv5 = zext i16 %row_len to i32 - %add.ptr9 = getelementptr inbounds i8, i8* %input_col, i32 %conv5 + %add.ptr9 = getelementptr inbounds i8, ptr %input_col, i32 %conv5 %mul11 = shl nuw nsw i32 %conv5, 1 - %add.ptr12 = getelementptr inbounds i8, i8* %input_col, i32 %mul11 + %add.ptr12 = getelementptr inbounds i8, ptr %input_col, i32 %mul11 %mul14 = mul nuw nsw i32 %conv5, 3 - %add.ptr15 = getelementptr inbounds i8, i8* %input_col, i32 %mul14 + %add.ptr15 = getelementptr inbounds i8, ptr %input_col, i32 %mul14 %add = add nuw nsw i32 %conv5, 7 %div = lshr i32 %add, 3 %conv25 = trunc i32 %col_offset to i16 @@ -785,14 +785,14 @@ for.body.lr.ph: ; preds = %for.cond.preheader for.body: ; preds = %for.cond.cleanup23, %for.body.lr.ph %i_out_ch.0129 = phi i32 [ 0, %for.body.lr.ph ], [ %inc37, %for.cond.cleanup23 ] %i_row_loop.0128 = phi i32 [ undef, %for.body.lr.ph ], [ %i_row_loop.1.lcssa, %for.cond.cleanup23 ] - %arrayidx = getelementptr inbounds i32, i32* %bias, i32 %i_out_ch.0129 - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %bias, i32 %i_out_ch.0129 + %0 = load i32, ptr %arrayidx, align 4 %cmp21111 = icmp slt i32 %i_row_loop.0128, %div br i1 %cmp21111, label %for.body24.preheader, label %for.cond.cleanup23 for.body24.preheader: ; preds = %for.body %mul = mul nuw nsw i32 %i_out_ch.0129, %conv5 - %add.ptr = getelementptr inbounds i8, i8* %input_row, i32 %mul + %add.ptr = getelementptr inbounds i8, ptr %input_row, i32 %mul br label %for.body24 for.cond.cleanup23: ; preds = %for.body24, %for.body @@ -805,49 +805,49 @@ for.cond.cleanup23: ; preds = %for.body24, %for.bo %add32 = add nsw i32 %add31, %acc_2.0.lcssa %add33 = add nsw i32 %add32, %acc_3.0.lcssa %conv34 = trunc i32 %add33 to i8 - %arrayidx35 = getelementptr inbounds i8, i8* %out, i32 %i_out_ch.0129 - store i8 %conv34, i8* %arrayidx35, align 1 + %arrayidx35 = getelementptr inbounds i8, ptr %out, i32 %i_out_ch.0129 + store i8 %conv34, ptr %arrayidx35, align 1 %inc37 = add nuw nsw i32 %i_out_ch.0129, 1 %exitcond133 = icmp eq i32 %inc37, %conv2 br i1 %exitcond133, label %if.end, label %for.body for.body24: ; preds = %for.body24, %for.body24.preheader %row_len_tmp.0122 = phi i32 [ %sub, %for.body24 ], [ %conv5, %for.body24.preheader ] - %ip_r0.0121 = phi i8* [ %add.ptr26, %for.body24 ], [ %add.ptr, %for.body24.preheader ] - %ip_c0.0120 = phi i8* [ %add.ptr27, %for.body24 ], [ %input_col, %for.body24.preheader ] - %ip_c1.0119 = phi i8* [ %add.ptr28, %for.body24 ], [ %add.ptr9, %for.body24.preheader ] - %ip_c2.0118 = phi i8* [ %add.ptr29, %for.body24 ], [ %add.ptr12, %for.body24.preheader ] + %ip_r0.0121 = phi ptr [ %add.ptr26, %for.body24 ], [ %add.ptr, %for.body24.preheader ] + %ip_c0.0120 = phi ptr [ %add.ptr27, %for.body24 ], [ %input_col, %for.body24.preheader ] + %ip_c1.0119 = phi ptr [ %add.ptr28, %for.body24 ], [ %add.ptr9, %for.body24.preheader ] + %ip_c2.0118 = phi ptr [ %add.ptr29, %for.body24 ], [ %add.ptr12, %for.body24.preheader ] %i_row_loop.1117 = phi i32 [ %inc, %for.body24 ], [ %i_row_loop.0128, %for.body24.preheader ] - %ip_c3.0116 = phi i8* [ %add.ptr30, %for.body24 ], [ %add.ptr15, %for.body24.preheader ] + %ip_c3.0116 = phi ptr [ %add.ptr30, %for.body24 ], [ %add.ptr15, %for.body24.preheader ] %acc_3.0115 = phi i32 [ %24, %for.body24 ], [ %0, %for.body24.preheader ] %acc_2.0114 = phi i32 [ %23, %for.body24 ], [ %0, %for.body24.preheader ] %acc_1.0113 = phi i32 [ %22, %for.body24 ], [ %0, %for.body24.preheader ] %acc_0.0112 = phi i32 [ %21, %for.body24 ], [ %0, %for.body24.preheader ] %1 = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %row_len_tmp.0122) %sub = add nsw i32 %row_len_tmp.0122, -8 - %2 = bitcast i8* %ip_r0.0121 to <8 x i8>* - %3 = tail call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %2, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer) + %2 = bitcast ptr %ip_r0.0121 to ptr + %3 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %2, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer) %4 = sext <8 x i8> %3 to <8 x i16> - %add.ptr26 = getelementptr inbounds i8, i8* %ip_r0.0121, i32 8 - %5 = bitcast i8* %ip_c0.0120 to <8 x i8>* - %6 = tail call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %5, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer) + %add.ptr26 = getelementptr inbounds i8, ptr %ip_r0.0121, i32 8 + %5 = bitcast ptr %ip_c0.0120 to ptr + %6 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %5, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer) %7 = sext <8 x i8> %6 to <8 x i16> - %add.ptr27 = getelementptr inbounds i8, i8* %ip_c0.0120, i32 8 + %add.ptr27 = getelementptr inbounds i8, ptr %ip_c0.0120, i32 8 %8 = tail call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> %7, <8 x i16> %.splat.i, <8 x i1> %1, <8 x i16> undef) - %9 = bitcast i8* %ip_c1.0119 to <8 x i8>* - %10 = tail call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %9, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer) + %9 = bitcast ptr %ip_c1.0119 to ptr + %10 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %9, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer) %11 = sext <8 x i8> %10 to <8 x i16> - %add.ptr28 = getelementptr inbounds i8, i8* %ip_c1.0119, i32 8 + %add.ptr28 = getelementptr inbounds i8, ptr %ip_c1.0119, i32 8 %12 = tail call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> %11, <8 x i16> %.splat.i, <8 x i1> %1, <8 x i16> undef) - %13 = bitcast i8* %ip_c2.0118 to <8 x i8>* - %14 = tail call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %13, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer) + %13 = bitcast ptr %ip_c2.0118 to ptr + %14 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %13, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer) %15 = sext <8 x i8> %14 to <8 x i16> - %add.ptr29 = getelementptr inbounds i8, i8* %ip_c2.0118, i32 8 + %add.ptr29 = getelementptr inbounds i8, ptr %ip_c2.0118, i32 8 %16 = tail call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> %15, <8 x i16> %.splat.i, <8 x i1> %1, <8 x i16> undef) - %17 = bitcast i8* %ip_c3.0116 to <8 x i8>* - %18 = tail call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %17, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer) + %17 = bitcast ptr %ip_c3.0116 to ptr + %18 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %17, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer) %19 = sext <8 x i8> %18 to <8 x i16> - %add.ptr30 = getelementptr inbounds i8, i8* %ip_c3.0116, i32 8 + %add.ptr30 = getelementptr inbounds i8, ptr %ip_c3.0116, i32 8 %20 = tail call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> %19, <8 x i16> %.splat.i, <8 x i1> %1, <8 x i16> undef) %21 = tail call i32 @llvm.arm.mve.vmldava.predicated.v8i16.v8i1(i32 0, i32 0, i32 0, i32 %acc_0.0112, <8 x i16> %4, <8 x i16> %8, <8 x i1> %1) %22 = tail call i32 @llvm.arm.mve.vmldava.predicated.v8i16.v8i1(i32 0, i32 0, i32 0, i32 %acc_1.0113, <8 x i16> %4, <8 x i16> %12, <8 x i1> %1) @@ -858,10 +858,10 @@ for.body24: ; preds = %for.body24, %for.bo br i1 %exitcond, label %for.cond.cleanup23, label %for.body24 if.end: ; preds = %for.cond.cleanup23, %for.cond.preheader, %entry - ret i8* %out + ret ptr %out } -define i8* @signext_optsize(i8* %input_row, i8* %input_col, i16 zeroext %output_ch, i16 zeroext %num_cols, i32* nocapture readnone %output_shift, i32* nocapture readnone %output_mult, i32 %out_offset, i32 %col_offset, i32 %row_offset, i16 signext %activation_min, i16 signext %activation_max, i16 zeroext %row_len, i32* nocapture readonly %bias, i8* returned %out) optsize { +define ptr @signext_optsize(ptr %input_row, ptr %input_col, i16 zeroext %output_ch, i16 zeroext %num_cols, ptr nocapture readnone %output_shift, ptr nocapture readnone %output_mult, i32 %out_offset, i32 %col_offset, i32 %row_offset, i16 signext %activation_min, i16 signext %activation_max, i16 zeroext %row_len, ptr nocapture readonly %bias, ptr returned %out) optsize { ; CHECK-LABEL: signext_optsize: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -958,11 +958,11 @@ for.cond.preheader: ; preds = %entry for.body.lr.ph: ; preds = %for.cond.preheader %conv5 = zext i16 %row_len to i32 - %add.ptr9 = getelementptr inbounds i8, i8* %input_col, i32 %conv5 + %add.ptr9 = getelementptr inbounds i8, ptr %input_col, i32 %conv5 %mul11 = shl nuw nsw i32 %conv5, 1 - %add.ptr12 = getelementptr inbounds i8, i8* %input_col, i32 %mul11 + %add.ptr12 = getelementptr inbounds i8, ptr %input_col, i32 %mul11 %mul14 = mul nuw nsw i32 %conv5, 3 - %add.ptr15 = getelementptr inbounds i8, i8* %input_col, i32 %mul14 + %add.ptr15 = getelementptr inbounds i8, ptr %input_col, i32 %mul14 %add = add nuw nsw i32 %conv5, 7 %div = lshr i32 %add, 3 %conv25 = trunc i32 %col_offset to i16 @@ -973,14 +973,14 @@ for.body.lr.ph: ; preds = %for.cond.preheader for.body: ; preds = %for.cond.cleanup23, %for.body.lr.ph %i_out_ch.0129 = phi i32 [ 0, %for.body.lr.ph ], [ %inc37, %for.cond.cleanup23 ] %i_row_loop.0128 = phi i32 [ undef, %for.body.lr.ph ], [ %i_row_loop.1.lcssa, %for.cond.cleanup23 ] - %arrayidx = getelementptr inbounds i32, i32* %bias, i32 %i_out_ch.0129 - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %bias, i32 %i_out_ch.0129 + %0 = load i32, ptr %arrayidx, align 4 %cmp21111 = icmp slt i32 %i_row_loop.0128, %div br i1 %cmp21111, label %for.body24.preheader, label %for.cond.cleanup23 for.body24.preheader: ; preds = %for.body %mul = mul nuw nsw i32 %i_out_ch.0129, %conv5 - %add.ptr = getelementptr inbounds i8, i8* %input_row, i32 %mul + %add.ptr = getelementptr inbounds i8, ptr %input_row, i32 %mul br label %for.body24 for.cond.cleanup23: ; preds = %for.body24, %for.body @@ -993,49 +993,49 @@ for.cond.cleanup23: ; preds = %for.body24, %for.bo %add32 = add nsw i32 %add31, %acc_2.0.lcssa %add33 = add nsw i32 %add32, %acc_3.0.lcssa %conv34 = trunc i32 %add33 to i8 - %arrayidx35 = getelementptr inbounds i8, i8* %out, i32 %i_out_ch.0129 - store i8 %conv34, i8* %arrayidx35, align 1 + %arrayidx35 = getelementptr inbounds i8, ptr %out, i32 %i_out_ch.0129 + store i8 %conv34, ptr %arrayidx35, align 1 %inc37 = add nuw nsw i32 %i_out_ch.0129, 1 %exitcond133 = icmp eq i32 %inc37, %conv2 br i1 %exitcond133, label %if.end, label %for.body for.body24: ; preds = %for.body24, %for.body24.preheader %row_len_tmp.0122 = phi i32 [ %sub, %for.body24 ], [ %conv5, %for.body24.preheader ] - %ip_r0.0121 = phi i8* [ %add.ptr26, %for.body24 ], [ %add.ptr, %for.body24.preheader ] - %ip_c0.0120 = phi i8* [ %add.ptr27, %for.body24 ], [ %input_col, %for.body24.preheader ] - %ip_c1.0119 = phi i8* [ %add.ptr28, %for.body24 ], [ %add.ptr9, %for.body24.preheader ] - %ip_c2.0118 = phi i8* [ %add.ptr29, %for.body24 ], [ %add.ptr12, %for.body24.preheader ] + %ip_r0.0121 = phi ptr [ %add.ptr26, %for.body24 ], [ %add.ptr, %for.body24.preheader ] + %ip_c0.0120 = phi ptr [ %add.ptr27, %for.body24 ], [ %input_col, %for.body24.preheader ] + %ip_c1.0119 = phi ptr [ %add.ptr28, %for.body24 ], [ %add.ptr9, %for.body24.preheader ] + %ip_c2.0118 = phi ptr [ %add.ptr29, %for.body24 ], [ %add.ptr12, %for.body24.preheader ] %i_row_loop.1117 = phi i32 [ %inc, %for.body24 ], [ %i_row_loop.0128, %for.body24.preheader ] - %ip_c3.0116 = phi i8* [ %add.ptr30, %for.body24 ], [ %add.ptr15, %for.body24.preheader ] + %ip_c3.0116 = phi ptr [ %add.ptr30, %for.body24 ], [ %add.ptr15, %for.body24.preheader ] %acc_3.0115 = phi i32 [ %24, %for.body24 ], [ %0, %for.body24.preheader ] %acc_2.0114 = phi i32 [ %23, %for.body24 ], [ %0, %for.body24.preheader ] %acc_1.0113 = phi i32 [ %22, %for.body24 ], [ %0, %for.body24.preheader ] %acc_0.0112 = phi i32 [ %21, %for.body24 ], [ %0, %for.body24.preheader ] %1 = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %row_len_tmp.0122) %sub = add nsw i32 %row_len_tmp.0122, -8 - %2 = bitcast i8* %ip_r0.0121 to <8 x i8>* - %3 = tail call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %2, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer) + %2 = bitcast ptr %ip_r0.0121 to ptr + %3 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %2, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer) %4 = sext <8 x i8> %3 to <8 x i16> - %add.ptr26 = getelementptr inbounds i8, i8* %ip_r0.0121, i32 8 - %5 = bitcast i8* %ip_c0.0120 to <8 x i8>* - %6 = tail call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %5, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer) + %add.ptr26 = getelementptr inbounds i8, ptr %ip_r0.0121, i32 8 + %5 = bitcast ptr %ip_c0.0120 to ptr + %6 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %5, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer) %7 = sext <8 x i8> %6 to <8 x i16> - %add.ptr27 = getelementptr inbounds i8, i8* %ip_c0.0120, i32 8 + %add.ptr27 = getelementptr inbounds i8, ptr %ip_c0.0120, i32 8 %8 = tail call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> %7, <8 x i16> %.splat.i, <8 x i1> %1, <8 x i16> undef) - %9 = bitcast i8* %ip_c1.0119 to <8 x i8>* - %10 = tail call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %9, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer) + %9 = bitcast ptr %ip_c1.0119 to ptr + %10 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %9, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer) %11 = sext <8 x i8> %10 to <8 x i16> - %add.ptr28 = getelementptr inbounds i8, i8* %ip_c1.0119, i32 8 + %add.ptr28 = getelementptr inbounds i8, ptr %ip_c1.0119, i32 8 %12 = tail call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> %11, <8 x i16> %.splat.i, <8 x i1> %1, <8 x i16> undef) - %13 = bitcast i8* %ip_c2.0118 to <8 x i8>* - %14 = tail call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %13, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer) + %13 = bitcast ptr %ip_c2.0118 to ptr + %14 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %13, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer) %15 = sext <8 x i8> %14 to <8 x i16> - %add.ptr29 = getelementptr inbounds i8, i8* %ip_c2.0118, i32 8 + %add.ptr29 = getelementptr inbounds i8, ptr %ip_c2.0118, i32 8 %16 = tail call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> %15, <8 x i16> %.splat.i, <8 x i1> %1, <8 x i16> undef) - %17 = bitcast i8* %ip_c3.0116 to <8 x i8>* - %18 = tail call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %17, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer) + %17 = bitcast ptr %ip_c3.0116 to ptr + %18 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %17, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer) %19 = sext <8 x i8> %18 to <8 x i16> - %add.ptr30 = getelementptr inbounds i8, i8* %ip_c3.0116, i32 8 + %add.ptr30 = getelementptr inbounds i8, ptr %ip_c3.0116, i32 8 %20 = tail call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> %19, <8 x i16> %.splat.i, <8 x i1> %1, <8 x i16> undef) %21 = tail call i32 @llvm.arm.mve.vmldava.predicated.v8i16.v8i1(i32 0, i32 0, i32 0, i32 %acc_0.0112, <8 x i16> %4, <8 x i16> %8, <8 x i1> %1) %22 = tail call i32 @llvm.arm.mve.vmldava.predicated.v8i16.v8i1(i32 0, i32 0, i32 0, i32 %acc_1.0113, <8 x i16> %4, <8 x i16> %12, <8 x i1> %1) @@ -1046,11 +1046,11 @@ for.body24: ; preds = %for.body24, %for.bo br i1 %exitcond, label %for.cond.cleanup23, label %for.body24 if.end: ; preds = %for.cond.cleanup23, %for.cond.preheader, %entry - ret i8* %out + ret ptr %out } -%struct.arm_cfft_instance_f32 = type { i16, float*, i16*, i16, i32*, i32*, i32*, float*, float*, float* } -define arm_aapcs_vfpcc void @_Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cfft_instance_f32Pfjf(%struct.arm_cfft_instance_f32* nocapture readonly %0, float* %1, i32 %2, float %3) { +%struct.arm_cfft_instance_f32 = type { i16, ptr, ptr, i16, ptr, ptr, ptr, ptr, ptr, ptr } +define arm_aapcs_vfpcc void @_Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cfft_instance_f32Pfjf(ptr nocapture readonly %0, ptr %1, i32 %2, float %3) { ; CHECK-LABEL: _Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cfft_instance_f32Pfjf: ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -1210,12 +1210,12 @@ define arm_aapcs_vfpcc void @_Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cf 6: ; preds = %4 %7 = lshr i32 %2, 2 - %8 = getelementptr inbounds %struct.arm_cfft_instance_f32, %struct.arm_cfft_instance_f32* %0, i32 0, i32 7 - %9 = getelementptr inbounds %struct.arm_cfft_instance_f32, %struct.arm_cfft_instance_f32* %0, i32 0, i32 4 - %10 = getelementptr inbounds %struct.arm_cfft_instance_f32, %struct.arm_cfft_instance_f32* %0, i32 0, i32 8 - %11 = getelementptr inbounds %struct.arm_cfft_instance_f32, %struct.arm_cfft_instance_f32* %0, i32 0, i32 5 - %12 = getelementptr inbounds %struct.arm_cfft_instance_f32, %struct.arm_cfft_instance_f32* %0, i32 0, i32 9 - %13 = getelementptr inbounds %struct.arm_cfft_instance_f32, %struct.arm_cfft_instance_f32* %0, i32 0, i32 6 + %8 = getelementptr inbounds %struct.arm_cfft_instance_f32, ptr %0, i32 0, i32 7 + %9 = getelementptr inbounds %struct.arm_cfft_instance_f32, ptr %0, i32 0, i32 4 + %10 = getelementptr inbounds %struct.arm_cfft_instance_f32, ptr %0, i32 0, i32 8 + %11 = getelementptr inbounds %struct.arm_cfft_instance_f32, ptr %0, i32 0, i32 5 + %12 = getelementptr inbounds %struct.arm_cfft_instance_f32, ptr %0, i32 0, i32 9 + %13 = getelementptr inbounds %struct.arm_cfft_instance_f32, ptr %0, i32 0, i32 6 br label %14 14: ; preds = %6, %40 @@ -1235,7 +1235,7 @@ define arm_aapcs_vfpcc void @_Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cf br i1 %25, label %40, label %45 26: ; preds = %40, %4 - %27 = ptrtoint float* %1 to i32 + %27 = ptrtoint ptr %1 to i32 %28 = insertelement <4 x i32> undef, i32 %27, i32 0 %29 = shufflevector <4 x i32> %28, <4 x i32> undef, <4 x i32> zeroinitializer %30 = add <4 x i32> %29, <i32 -64, i32 -60, i32 -32, i32 -28> @@ -1261,76 +1261,76 @@ define arm_aapcs_vfpcc void @_Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cf 45: ; preds = %21, %113 %46 = phi i32 [ %114, %113 ], [ 0, %21 ] - %47 = load float*, float** %8, align 4 - %48 = load i32*, i32** %9, align 4 - %49 = getelementptr inbounds i32, i32* %48, i32 %18 - %50 = load i32, i32* %49, align 4 - %51 = getelementptr inbounds float, float* %47, i32 %50 - %52 = load float*, float** %10, align 4 - %53 = load i32*, i32** %11, align 4 - %54 = getelementptr inbounds i32, i32* %53, i32 %18 - %55 = load i32, i32* %54, align 4 - %56 = getelementptr inbounds float, float* %52, i32 %55 - %57 = load float*, float** %12, align 4 - %58 = load i32*, i32** %13, align 4 - %59 = getelementptr inbounds i32, i32* %58, i32 %18 - %60 = load i32, i32* %59, align 4 - %61 = getelementptr inbounds float, float* %57, i32 %60 + %47 = load ptr, ptr %8, align 4 + %48 = load ptr, ptr %9, align 4 + %49 = getelementptr inbounds i32, ptr %48, i32 %18 + %50 = load i32, ptr %49, align 4 + %51 = getelementptr inbounds float, ptr %47, i32 %50 + %52 = load ptr, ptr %10, align 4 + %53 = load ptr, ptr %11, align 4 + %54 = getelementptr inbounds i32, ptr %53, i32 %18 + %55 = load i32, ptr %54, align 4 + %56 = getelementptr inbounds float, ptr %52, i32 %55 + %57 = load ptr, ptr %12, align 4 + %58 = load ptr, ptr %13, align 4 + %59 = getelementptr inbounds i32, ptr %58, i32 %18 + %60 = load i32, ptr %59, align 4 + %61 = getelementptr inbounds float, ptr %57, i32 %60 %62 = mul i32 %22, %46 - %63 = getelementptr inbounds float, float* %1, i32 %62 - %64 = getelementptr inbounds float, float* %63, i32 %23 - %65 = getelementptr inbounds float, float* %64, i32 %23 - %66 = getelementptr inbounds float, float* %65, i32 %23 + %63 = getelementptr inbounds float, ptr %1, i32 %62 + %64 = getelementptr inbounds float, ptr %63, i32 %23 + %65 = getelementptr inbounds float, ptr %64, i32 %23 + %66 = getelementptr inbounds float, ptr %65, i32 %23 br label %67 67: ; preds = %45, %67 - %68 = phi float* [ %63, %45 ], [ %89, %67 ] - %69 = phi float* [ %65, %45 ], [ %103, %67 ] - %70 = phi float* [ %66, %45 ], [ %110, %67 ] - %71 = phi float* [ %64, %45 ], [ %96, %67 ] - %72 = phi float* [ %61, %45 ], [ %107, %67 ] - %73 = phi float* [ %56, %45 ], [ %93, %67 ] - %74 = phi float* [ %51, %45 ], [ %100, %67 ] + %68 = phi ptr [ %63, %45 ], [ %89, %67 ] + %69 = phi ptr [ %65, %45 ], [ %103, %67 ] + %70 = phi ptr [ %66, %45 ], [ %110, %67 ] + %71 = phi ptr [ %64, %45 ], [ %96, %67 ] + %72 = phi ptr [ %61, %45 ], [ %107, %67 ] + %73 = phi ptr [ %56, %45 ], [ %93, %67 ] + %74 = phi ptr [ %51, %45 ], [ %100, %67 ] %75 = phi i32 [ %24, %45 ], [ %111, %67 ] - %76 = bitcast float* %69 to <4 x float>* - %77 = bitcast float* %68 to <4 x float>* - %78 = load <4 x float>, <4 x float>* %76, align 4 - %79 = load <4 x float>, <4 x float>* %77, align 4 - %80 = bitcast float* %71 to <4 x float>* - %81 = load <4 x float>, <4 x float>* %80, align 4 - %82 = bitcast float* %70 to <4 x float>* - %83 = load <4 x float>, <4 x float>* %82, align 4 + %76 = bitcast ptr %69 to ptr + %77 = bitcast ptr %68 to ptr + %78 = load <4 x float>, ptr %76, align 4 + %79 = load <4 x float>, ptr %77, align 4 + %80 = bitcast ptr %71 to ptr + %81 = load <4 x float>, ptr %80, align 4 + %82 = bitcast ptr %70 to ptr + %83 = load <4 x float>, ptr %82, align 4 %84 = fadd <4 x float> %78, %79 %85 = fsub <4 x float> %79, %78 %86 = fadd <4 x float> %81, %83 %87 = fsub <4 x float> %81, %83 %88 = fadd <4 x float> %84, %86 - store <4 x float> %88, <4 x float>* %77, align 4 - %89 = getelementptr inbounds float, float* %68, i32 4 + store <4 x float> %88, ptr %77, align 4 + %89 = getelementptr inbounds float, ptr %68, i32 4 %90 = fsub <4 x float> %84, %86 - %91 = bitcast float* %73 to <4 x float>* - %92 = load <4 x float>, <4 x float>* %91, align 4 - %93 = getelementptr inbounds float, float* %73, i32 4 + %91 = bitcast ptr %73 to ptr + %92 = load <4 x float>, ptr %91, align 4 + %93 = getelementptr inbounds float, ptr %73, i32 4 %94 = tail call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 0, <4 x float> %92, <4 x float> %90) %95 = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 1, <4 x float> %94, <4 x float> %92, <4 x float> %90) - store <4 x float> %95, <4 x float>* %80, align 4 - %96 = getelementptr inbounds float, float* %71, i32 4 + store <4 x float> %95, ptr %80, align 4 + %96 = getelementptr inbounds float, ptr %71, i32 4 %97 = tail call <4 x float> @llvm.arm.mve.vcaddq.v4f32(i32 1, i32 0, <4 x float> %85, <4 x float> %87) - %98 = bitcast float* %74 to <4 x float>* - %99 = load <4 x float>, <4 x float>* %98, align 4 - %100 = getelementptr inbounds float, float* %74, i32 4 + %98 = bitcast ptr %74 to ptr + %99 = load <4 x float>, ptr %98, align 4 + %100 = getelementptr inbounds float, ptr %74, i32 4 %101 = tail call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 0, <4 x float> %99, <4 x float> %97) %102 = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 1, <4 x float> %101, <4 x float> %99, <4 x float> %97) - store <4 x float> %102, <4 x float>* %76, align 4 - %103 = getelementptr inbounds float, float* %69, i32 4 + store <4 x float> %102, ptr %76, align 4 + %103 = getelementptr inbounds float, ptr %69, i32 4 %104 = tail call <4 x float> @llvm.arm.mve.vcaddq.v4f32(i32 1, i32 1, <4 x float> %85, <4 x float> %87) - %105 = bitcast float* %72 to <4 x float>* - %106 = load <4 x float>, <4 x float>* %105, align 4 - %107 = getelementptr inbounds float, float* %72, i32 4 + %105 = bitcast ptr %72 to ptr + %106 = load <4 x float>, ptr %105, align 4 + %107 = getelementptr inbounds float, ptr %72, i32 4 %108 = tail call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 0, <4 x float> %106, <4 x float> %104) %109 = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 1, <4 x float> %108, <4 x float> %106, <4 x float> %104) - store <4 x float> %109, <4 x float>* %82, align 4 - %110 = getelementptr inbounds float, float* %70, i32 4 + store <4 x float> %109, ptr %82, align 4 + %110 = getelementptr inbounds float, ptr %70, i32 4 %111 = add nsw i32 %75, -1 %112 = icmp eq i32 %111, 0 br i1 %112, label %113, label %67 @@ -1378,10 +1378,10 @@ define arm_aapcs_vfpcc void @_Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cf declare <16 x i1> @llvm.arm.mve.vctp8(i32) declare <8 x i1> @llvm.arm.mve.vctp16(i32) declare i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1>) -declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) -declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32, <8 x i1>, <8 x i8>) -declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) -declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32 immarg, <4 x i1>) +declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>) +declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32, <8 x i1>, <8 x i8>) +declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32 immarg, <16 x i1>, <16 x i8>) +declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32 immarg, <4 x i1>) declare i32 @llvm.vector.reduce.add.v16i8(<16 x i32> %ext4) declare i32 @llvm.arm.mve.vmldava.v8i16(i32, i32, i32, i32, <8 x i16>, <8 x i16>) declare i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32, i32, i32, i32, <16 x i8>, <16 x i8>, <16 x i1>) diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll b/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll index 35e6dd4..bf6468b 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -verify-machineinstrs -o - | FileCheck %s -define void @arm_min_helium_f32(float* %pSrc, i32 %blockSize, float* nocapture %pResult, i32* nocapture %pIndex) { +define void @arm_min_helium_f32(ptr %pSrc, i32 %blockSize, ptr nocapture %pResult, ptr nocapture %pIndex) { ; CHECK-LABEL: arm_min_helium_f32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r6, r7, lr} @@ -48,17 +48,17 @@ do.body: ; preds = %do.body, %entry %indexVec.0 = phi <4 x i32> [ %1, %entry ], [ %11, %do.body ] %2 = phi <4 x float> [ zeroinitializer, %entry ], [ %10, %do.body ] %blkCnt.0 = phi i32 [ %blockSize, %entry ], [ %sub, %do.body ] - %pSrc.addr.0 = phi float* [ %pSrc, %entry ], [ %add.ptr, %do.body ] + %pSrc.addr.0 = phi ptr [ %pSrc, %entry ], [ %add.ptr, %do.body ] %3 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0) - %4 = bitcast float* %pSrc.addr.0 to <4 x float>* - %5 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %4, i32 4, <4 x i1> %3, <4 x float> zeroinitializer) + %4 = bitcast ptr %pSrc.addr.0 to ptr + %5 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %4, i32 4, <4 x i1> %3, <4 x float> zeroinitializer) %6 = fcmp fast ole <4 x float> %5, %curExtremValVec.0 %7 = and <4 x i1> %6, %3 %8 = select fast <4 x i1> %7, <4 x float> %5, <4 x float> %curExtremValVec.0 %9 = bitcast <4 x i32> %indexVec.0 to <4 x float> %10 = select fast <4 x i1> %7, <4 x float> %9, <4 x float> %2 %11 = add <4 x i32> %indexVec.0, <i32 4, i32 4, i32 4, i32 4> - %add.ptr = getelementptr inbounds float, float* %pSrc.addr.0, i32 4 + %add.ptr = getelementptr inbounds float, ptr %pSrc.addr.0, i32 4 %sub = add nsw i32 %blkCnt.0, -4 %cmp = icmp sgt i32 %blkCnt.0, 4 br i1 %cmp, label %do.body, label %do.end @@ -73,13 +73,13 @@ do.end: ; preds = %do.body %.splat2 = shufflevector <4 x i32> %.splatinsert1, <4 x i32> undef, <4 x i32> zeroinitializer %15 = select <4 x i1> %14, <4 x i32> %12, <4 x i32> %.splat2 %16 = tail call i32 @llvm.arm.mve.minv.v4i32(i32 %blockSize, <4 x i32> %15, i32 1) - store i32 %16, i32* %pIndex, align 4 - store float %13, float* %pResult, align 4 + store i32 %16, ptr %pIndex, align 4 + store float %13, ptr %pResult, align 4 ret void } declare { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32, i32) #1 declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1 -declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) #2 +declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>) #2 declare float @llvm.arm.mve.minnmv.f32.v4f32(float, <4 x float>) #1 declare i32 @llvm.arm.mve.minv.v4i32(i32, <4 x i32>, i32) #1 diff --git a/llvm/test/CodeGen/Thumb2/mve-qrintrsplat.ll b/llvm/test/CodeGen/Thumb2/mve-qrintrsplat.ll index f3e8ea8..29c4fb9 100644 --- a/llvm/test/CodeGen/Thumb2/mve-qrintrsplat.ll +++ b/llvm/test/CodeGen/Thumb2/mve-qrintrsplat.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s -define void @vaddq(i32* %x, i32* %y, i32 %n) { +define void @vaddq(ptr %x, ptr %y, i32 %n) { ; CHECK-LABEL: vaddq: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -28,23 +28,23 @@ for.cond.cleanup: ; preds = %for.body, %entry ret void for.body: ; preds = %entry, %for.body - %x.addr.014 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ] - %y.addr.013 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ] + %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ] + %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ] %i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012) - %1 = bitcast i32* %x.addr.014 to <4 x i32>* - %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) - %add.ptr = getelementptr inbounds i32, i32* %x.addr.014, i32 4 + %1 = bitcast ptr %x.addr.014 to ptr + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %add.ptr = getelementptr inbounds i32, ptr %x.addr.014, i32 4 %3 = add <4 x i32> %2, <i32 10, i32 10, i32 10, i32 10> - %4 = bitcast i32* %y.addr.013 to <4 x i32>* - tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %4, i32 4, <4 x i1> %0) - %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.013, i32 4 + %4 = bitcast ptr %y.addr.013 to ptr + tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %4, i32 4, <4 x i1> %0) + %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.013, i32 4 %sub = add nsw i32 %i.012, -4 %cmp = icmp sgt i32 %i.012, 4 br i1 %cmp, label %for.body, label %for.cond.cleanup } -define void @vadd(i32* %s1, i32 %c0, i32 %N) { +define void @vadd(ptr %s1, i32 %c0, i32 %N) { ; CHECK-LABEL: vadd: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -72,14 +72,14 @@ while.body.lr.ph: ; preds = %entry br label %while.body while.body: ; preds = %while.body.lr.ph, %while.body - %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %s1.addr.013 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] %N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012) - %1 = bitcast i32* %s1.addr.013 to <4 x i32>* - %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %1 = bitcast ptr %s1.addr.013 to ptr + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) %3 = tail call <4 x i32> @llvm.arm.mve.add.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> %.splat, <4 x i1> %0, <4 x i32> %2) - tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0) - %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4 + tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %1, i32 4, <4 x i1> %0) + %add.ptr = getelementptr inbounds i32, ptr %s1.addr.013, i32 4 %sub = add nsw i32 %N.addr.012, -4 %cmp = icmp sgt i32 %N.addr.012, 4 br i1 %cmp, label %while.body, label %while.end @@ -88,7 +88,7 @@ while.end: ; preds = %while.body, %entry ret void } -define void @vsubq(i32* %x, i32* %y, i32 %n) { +define void @vsubq(ptr %x, ptr %y, i32 %n) { ; CHECK-LABEL: vsubq: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -115,23 +115,23 @@ for.cond.cleanup: ; preds = %for.body, %entry ret void for.body: ; preds = %entry, %for.body - %x.addr.014 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ] - %y.addr.013 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ] + %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ] + %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ] %i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012) - %1 = bitcast i32* %x.addr.014 to <4 x i32>* - %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) - %add.ptr = getelementptr inbounds i32, i32* %x.addr.014, i32 4 + %1 = bitcast ptr %x.addr.014 to ptr + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %add.ptr = getelementptr inbounds i32, ptr %x.addr.014, i32 4 %3 = sub <4 x i32> %2, <i32 10, i32 10, i32 10, i32 10> - %4 = bitcast i32* %y.addr.013 to <4 x i32>* - tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %4, i32 4, <4 x i1> %0) - %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.013, i32 4 + %4 = bitcast ptr %y.addr.013 to ptr + tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %4, i32 4, <4 x i1> %0) + %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.013, i32 4 %sub = add nsw i32 %i.012, -4 %cmp = icmp sgt i32 %i.012, 4 br i1 %cmp, label %for.body, label %for.cond.cleanup } -define void @vsub(i32* %s1, i32 %N) { +define void @vsub(ptr %s1, i32 %N) { ; CHECK-LABEL: vsub: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -158,14 +158,14 @@ while.body.lr.ph: ; preds = %entry br label %while.body while.body: ; preds = %while.body.lr.ph, %while.body - %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %s1.addr.013 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] %N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012) - %1 = bitcast i32* %s1.addr.013 to <4 x i32>* - %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %1 = bitcast ptr %s1.addr.013 to ptr + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) %3 = tail call <4 x i32> @llvm.arm.mve.sub.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>, <4 x i1> %0, <4 x i32> %2) - tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0) - %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4 + tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %1, i32 4, <4 x i1> %0) + %add.ptr = getelementptr inbounds i32, ptr %s1.addr.013, i32 4 %sub = add nsw i32 %N.addr.012, -4 %cmp = icmp sgt i32 %N.addr.012, 4 br i1 %cmp, label %while.body, label %while.end @@ -174,7 +174,7 @@ while.end: ; preds = %while.body, %entry ret void } -define void @vmulq(i32* %x, i32* %y, i32 %n) { +define void @vmulq(ptr %x, ptr %y, i32 %n) { ; CHECK-LABEL: vmulq: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -201,23 +201,23 @@ for.cond.cleanup: ; preds = %for.body, %entry ret void for.body: ; preds = %entry, %for.body - %x.addr.014 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ] - %y.addr.013 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ] + %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ] + %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ] %i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012) - %1 = bitcast i32* %x.addr.014 to <4 x i32>* - %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) - %add.ptr = getelementptr inbounds i32, i32* %x.addr.014, i32 4 + %1 = bitcast ptr %x.addr.014 to ptr + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %add.ptr = getelementptr inbounds i32, ptr %x.addr.014, i32 4 %3 = mul <4 x i32> %2, <i32 10, i32 10, i32 10, i32 10> - %4 = bitcast i32* %y.addr.013 to <4 x i32>* - tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %4, i32 4, <4 x i1> %0) - %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.013, i32 4 + %4 = bitcast ptr %y.addr.013 to ptr + tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %4, i32 4, <4 x i1> %0) + %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.013, i32 4 %sub = add nsw i32 %i.012, -4 %cmp = icmp sgt i32 %i.012, 4 br i1 %cmp, label %for.body, label %for.cond.cleanup } -define void @vmul(i32* %s1, i32 %N) { +define void @vmul(ptr %s1, i32 %N) { ; CHECK-LABEL: vmul: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -244,14 +244,14 @@ while.body.lr.ph: ; preds = %entry br label %while.body while.body: ; preds = %while.body.lr.ph, %while.body - %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %s1.addr.013 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] %N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012) - %1 = bitcast i32* %s1.addr.013 to <4 x i32>* - %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %1 = bitcast ptr %s1.addr.013 to ptr + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) %3 = tail call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>, <4 x i1> %0, <4 x i32> %2) - tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0) - %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4 + tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %1, i32 4, <4 x i1> %0) + %add.ptr = getelementptr inbounds i32, ptr %s1.addr.013, i32 4 %sub = add nsw i32 %N.addr.012, -4 %cmp = icmp sgt i32 %N.addr.012, 4 br i1 %cmp, label %while.body, label %while.end @@ -260,7 +260,7 @@ while.end: ; preds = %while.body, %entry ret void } -define void @vqaddq(i32* %x, i32* %y, i32 %n) { +define void @vqaddq(ptr %x, ptr %y, i32 %n) { ; CHECK-LABEL: vqaddq: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -287,23 +287,23 @@ for.cond.cleanup: ; preds = %for.body, %entry ret void for.body: ; preds = %entry, %for.body - %x.addr.014 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ] - %y.addr.013 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ] + %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ] + %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ] %i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012) - %1 = bitcast i32* %x.addr.014 to <4 x i32>* - %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) - %add.ptr = getelementptr inbounds i32, i32* %x.addr.014, i32 4 + %1 = bitcast ptr %x.addr.014 to ptr + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %add.ptr = getelementptr inbounds i32, ptr %x.addr.014, i32 4 %3 = tail call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>) - %4 = bitcast i32* %y.addr.013 to <4 x i32>* - tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %4, i32 4, <4 x i1> %0) - %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.013, i32 4 + %4 = bitcast ptr %y.addr.013 to ptr + tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %4, i32 4, <4 x i1> %0) + %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.013, i32 4 %sub = add nsw i32 %i.012, -4 %cmp = icmp sgt i32 %i.012, 4 br i1 %cmp, label %for.body, label %for.cond.cleanup } -define void @vqaddqu(i32* %x, i32* %y, i32 %n) { +define void @vqaddqu(ptr %x, ptr %y, i32 %n) { ; CHECK-LABEL: vqaddqu: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -330,23 +330,23 @@ for.cond.cleanup: ; preds = %for.body, %entry ret void for.body: ; preds = %entry, %for.body - %x.addr.014 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ] - %y.addr.013 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ] + %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ] + %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ] %i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012) - %1 = bitcast i32* %x.addr.014 to <4 x i32>* - %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) - %add.ptr = getelementptr inbounds i32, i32* %x.addr.014, i32 4 + %1 = bitcast ptr %x.addr.014 to ptr + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %add.ptr = getelementptr inbounds i32, ptr %x.addr.014, i32 4 %3 = tail call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>) - %4 = bitcast i32* %y.addr.013 to <4 x i32>* - tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %4, i32 4, <4 x i1> %0) - %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.013, i32 4 + %4 = bitcast ptr %y.addr.013 to ptr + tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %4, i32 4, <4 x i1> %0) + %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.013, i32 4 %sub = add nsw i32 %i.012, -4 %cmp = icmp sgt i32 %i.012, 4 br i1 %cmp, label %for.body, label %for.cond.cleanup } -define void @vqadd(i32* %s1, i32 %N) { +define void @vqadd(ptr %s1, i32 %N) { ; CHECK-LABEL: vqadd: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -373,14 +373,14 @@ while.body.lr.ph: ; preds = %entry br label %while.body while.body: ; preds = %while.body.lr.ph, %while.body - %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %s1.addr.013 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] %N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012) - %1 = bitcast i32* %s1.addr.013 to <4 x i32>* - %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %1 = bitcast ptr %s1.addr.013 to ptr + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) %3 = tail call <4 x i32> @llvm.arm.mve.qadd.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>, i32 0, <4 x i1> %0, <4 x i32> %2) - tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0) - %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4 + tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %1, i32 4, <4 x i1> %0) + %add.ptr = getelementptr inbounds i32, ptr %s1.addr.013, i32 4 %sub = add nsw i32 %N.addr.012, -4 %cmp = icmp sgt i32 %N.addr.012, 4 br i1 %cmp, label %while.body, label %while.end @@ -389,7 +389,7 @@ while.end: ; preds = %while.body, %entry ret void } -define void @vqsubq(i32* %x, i32* %y, i32 %n) { +define void @vqsubq(ptr %x, ptr %y, i32 %n) { ; CHECK-LABEL: vqsubq: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -416,23 +416,23 @@ for.cond.cleanup: ; preds = %for.body, %entry ret void for.body: ; preds = %entry, %for.body - %x.addr.014 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ] - %y.addr.013 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ] + %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ] + %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ] %i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012) - %1 = bitcast i32* %x.addr.014 to <4 x i32>* - %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) - %add.ptr = getelementptr inbounds i32, i32* %x.addr.014, i32 4 + %1 = bitcast ptr %x.addr.014 to ptr + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %add.ptr = getelementptr inbounds i32, ptr %x.addr.014, i32 4 %3 = tail call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>) - %4 = bitcast i32* %y.addr.013 to <4 x i32>* - tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %4, i32 4, <4 x i1> %0) - %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.013, i32 4 + %4 = bitcast ptr %y.addr.013 to ptr + tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %4, i32 4, <4 x i1> %0) + %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.013, i32 4 %sub = add nsw i32 %i.012, -4 %cmp = icmp sgt i32 %i.012, 4 br i1 %cmp, label %for.body, label %for.cond.cleanup } -define void @vqsubqu(i32* %x, i32* %y, i32 %n) { +define void @vqsubqu(ptr %x, ptr %y, i32 %n) { ; CHECK-LABEL: vqsubqu: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -459,23 +459,23 @@ for.cond.cleanup: ; preds = %for.body, %entry ret void for.body: ; preds = %entry, %for.body - %x.addr.014 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ] - %y.addr.013 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ] + %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ] + %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ] %i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012) - %1 = bitcast i32* %x.addr.014 to <4 x i32>* - %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) - %add.ptr = getelementptr inbounds i32, i32* %x.addr.014, i32 4 + %1 = bitcast ptr %x.addr.014 to ptr + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %add.ptr = getelementptr inbounds i32, ptr %x.addr.014, i32 4 %3 = tail call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>) - %4 = bitcast i32* %y.addr.013 to <4 x i32>* - tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %4, i32 4, <4 x i1> %0) - %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.013, i32 4 + %4 = bitcast ptr %y.addr.013 to ptr + tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %4, i32 4, <4 x i1> %0) + %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.013, i32 4 %sub = add nsw i32 %i.012, -4 %cmp = icmp sgt i32 %i.012, 4 br i1 %cmp, label %for.body, label %for.cond.cleanup } -define void @vqsub(i32* %s1, i32 %N) { +define void @vqsub(ptr %s1, i32 %N) { ; CHECK-LABEL: vqsub: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -502,14 +502,14 @@ while.body.lr.ph: ; preds = %entry br label %while.body while.body: ; preds = %while.body.lr.ph, %while.body - %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %s1.addr.013 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] %N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012) - %1 = bitcast i32* %s1.addr.013 to <4 x i32>* - %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %1 = bitcast ptr %s1.addr.013 to ptr + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) %3 = tail call <4 x i32> @llvm.arm.mve.qsub.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>, i32 0, <4 x i1> %0, <4 x i32> %2) - tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0) - %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4 + tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %1, i32 4, <4 x i1> %0) + %add.ptr = getelementptr inbounds i32, ptr %s1.addr.013, i32 4 %sub = add nsw i32 %N.addr.012, -4 %cmp = icmp sgt i32 %N.addr.012, 4 br i1 %cmp, label %while.body, label %while.end @@ -518,7 +518,7 @@ while.end: ; preds = %while.body, %entry ret void } -define void @vhaddq(i32* %x, i32* %y, i32 %n) { +define void @vhaddq(ptr %x, ptr %y, i32 %n) { ; CHECK-LABEL: vhaddq: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -545,23 +545,23 @@ for.cond.cleanup: ; preds = %for.body, %entry ret void for.body: ; preds = %entry, %for.body - %x.addr.014 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ] - %y.addr.013 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ] + %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ] + %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ] %i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012) - %1 = bitcast i32* %x.addr.014 to <4 x i32>* - %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) - %add.ptr = getelementptr inbounds i32, i32* %x.addr.014, i32 4 + %1 = bitcast ptr %x.addr.014 to ptr + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %add.ptr = getelementptr inbounds i32, ptr %x.addr.014, i32 4 %3 = tail call <4 x i32> @llvm.arm.mve.vhadd.v4i32(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>, i32 0) - %4 = bitcast i32* %y.addr.013 to <4 x i32>* - tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %4, i32 4, <4 x i1> %0) - %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.013, i32 4 + %4 = bitcast ptr %y.addr.013 to ptr + tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %4, i32 4, <4 x i1> %0) + %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.013, i32 4 %sub = add nsw i32 %i.012, -4 %cmp = icmp sgt i32 %i.012, 4 br i1 %cmp, label %for.body, label %for.cond.cleanup } -define void @vhadd(i32* %s1, i32 %N) { +define void @vhadd(ptr %s1, i32 %N) { ; CHECK-LABEL: vhadd: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -588,14 +588,14 @@ while.body.lr.ph: ; preds = %entry br label %while.body while.body: ; preds = %while.body.lr.ph, %while.body - %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %s1.addr.013 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] %N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012) - %1 = bitcast i32* %s1.addr.013 to <4 x i32>* - %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %1 = bitcast ptr %s1.addr.013 to ptr + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) %3 = tail call <4 x i32> @llvm.arm.mve.hadd.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>, i32 0, <4 x i1> %0, <4 x i32> %2) - tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0) - %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4 + tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %1, i32 4, <4 x i1> %0) + %add.ptr = getelementptr inbounds i32, ptr %s1.addr.013, i32 4 %sub = add nsw i32 %N.addr.012, -4 %cmp = icmp sgt i32 %N.addr.012, 4 br i1 %cmp, label %while.body, label %while.end @@ -604,7 +604,7 @@ while.end: ; preds = %while.body, %entry ret void } -define void @vhsubq(i32* %x, i32* %y, i32 %n) { +define void @vhsubq(ptr %x, ptr %y, i32 %n) { ; CHECK-LABEL: vhsubq: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -631,23 +631,23 @@ for.cond.cleanup: ; preds = %for.body, %entry ret void for.body: ; preds = %entry, %for.body - %x.addr.014 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ] - %y.addr.013 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ] + %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ] + %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ] %i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012) - %1 = bitcast i32* %x.addr.014 to <4 x i32>* - %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) - %add.ptr = getelementptr inbounds i32, i32* %x.addr.014, i32 4 + %1 = bitcast ptr %x.addr.014 to ptr + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %add.ptr = getelementptr inbounds i32, ptr %x.addr.014, i32 4 %3 = tail call <4 x i32> @llvm.arm.mve.vhsub.v4i32(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>, i32 0) - %4 = bitcast i32* %y.addr.013 to <4 x i32>* - tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %4, i32 4, <4 x i1> %0) - %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.013, i32 4 + %4 = bitcast ptr %y.addr.013 to ptr + tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %4, i32 4, <4 x i1> %0) + %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.013, i32 4 %sub = add nsw i32 %i.012, -4 %cmp = icmp sgt i32 %i.012, 4 br i1 %cmp, label %for.body, label %for.cond.cleanup } -define void @vhsub(i32* %s1, i32 %N) { +define void @vhsub(ptr %s1, i32 %N) { ; CHECK-LABEL: vhsub: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -674,14 +674,14 @@ while.body.lr.ph: ; preds = %entry br label %while.body while.body: ; preds = %while.body.lr.ph, %while.body - %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %s1.addr.013 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] %N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012) - %1 = bitcast i32* %s1.addr.013 to <4 x i32>* - %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %1 = bitcast ptr %s1.addr.013 to ptr + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) %3 = tail call <4 x i32> @llvm.arm.mve.hsub.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>, i32 0, <4 x i1> %0, <4 x i32> %2) - tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0) - %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4 + tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %1, i32 4, <4 x i1> %0) + %add.ptr = getelementptr inbounds i32, ptr %s1.addr.013, i32 4 %sub = add nsw i32 %N.addr.012, -4 %cmp = icmp sgt i32 %N.addr.012, 4 br i1 %cmp, label %while.body, label %while.end @@ -690,7 +690,7 @@ while.end: ; preds = %while.body, %entry ret void } -define void @vqdmullbq(i32* %x, i32* %y, i32 %n) { +define void @vqdmullbq(ptr %x, ptr %y, i32 %n) { ; CHECK-LABEL: vqdmullbq: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -717,25 +717,25 @@ for.cond.cleanup: ; preds = %for.body, %entry ret void for.body: ; preds = %entry, %for.body - %x.addr.014 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ] - %y.addr.013 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ] + %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ] + %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ] %i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012) - %1 = bitcast i32* %x.addr.014 to <4 x i32>* - %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) - %add.ptr = getelementptr inbounds i32, i32* %x.addr.014, i32 4 + %1 = bitcast ptr %x.addr.014 to ptr + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %add.ptr = getelementptr inbounds i32, ptr %x.addr.014, i32 4 %3 = tail call <2 x i64> @llvm.arm.mve.vqdmull.v2i64.v4i32(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>, i32 0) %4 = bitcast <2 x i64> %3 to <4 x i32> - %5 = bitcast i32* %y.addr.013 to <4 x i32>* - tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %4, <4 x i32>* %5, i32 4, <4 x i1> %0) - %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.013, i32 4 + %5 = bitcast ptr %y.addr.013 to ptr + tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %4, ptr %5, i32 4, <4 x i1> %0) + %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.013, i32 4 %sub = add nsw i32 %i.012, -4 %cmp = icmp sgt i32 %i.012, 4 br i1 %cmp, label %for.body, label %for.cond.cleanup } -define void @vqdmull(i32* %s1, i32 %N) { +define void @vqdmull(ptr %s1, i32 %N) { ; CHECK-LABEL: vqdmull: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -762,17 +762,17 @@ while.body.lr.ph: ; preds = %entry br label %while.body while.body: ; preds = %while.body.lr.ph, %while.body - %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %s1.addr.013 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] %N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012) - %1 = bitcast i32* %s1.addr.013 to <4 x i16>* - %2 = tail call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %1, i32 2, <4 x i1> %0, <4 x i16> zeroinitializer) + %1 = bitcast ptr %s1.addr.013 to ptr + %2 = tail call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %1, i32 2, <4 x i1> %0, <4 x i16> zeroinitializer) %3 = sext <4 x i16> %2 to <4 x i32> %4 = bitcast <4 x i32> %3 to <8 x i16> %5 = tail call <4 x i32> @llvm.arm.mve.vqdmull.predicated.v4i32.v8i16.v4i1(<8 x i16> %4, <8 x i16> <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>, i32 0, <4 x i1> %0, <4 x i32> %3) - %6 = bitcast i32* %s1.addr.013 to <4 x i32>* - tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %5, <4 x i32>* %6, i32 4, <4 x i1> %0) - %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4 + %6 = bitcast ptr %s1.addr.013 to ptr + tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %5, ptr %6, i32 4, <4 x i1> %0) + %add.ptr = getelementptr inbounds i32, ptr %s1.addr.013, i32 4 %sub = add nsw i32 %N.addr.012, -4 %cmp = icmp sgt i32 %N.addr.012, 4 br i1 %cmp, label %while.body, label %while.end @@ -781,7 +781,7 @@ while.end: ; preds = %while.body, %entry ret void } -define void @vqdmulhq(i32* %x, i32* %y, i32 %n) { +define void @vqdmulhq(ptr %x, ptr %y, i32 %n) { ; CHECK-LABEL: vqdmulhq: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -808,23 +808,23 @@ for.cond.cleanup: ; preds = %for.body, %entry ret void for.body: ; preds = %entry, %for.body - %x.addr.014 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ] - %y.addr.013 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ] + %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ] + %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ] %i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012) - %1 = bitcast i32* %x.addr.014 to <4 x i32>* - %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) - %add.ptr = getelementptr inbounds i32, i32* %x.addr.014, i32 4 + %1 = bitcast ptr %x.addr.014 to ptr + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %add.ptr = getelementptr inbounds i32, ptr %x.addr.014, i32 4 %3 = tail call <4 x i32> @llvm.arm.mve.vqdmulh.v4i32(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>) - %4 = bitcast i32* %y.addr.013 to <4 x i32>* - tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %4, i32 4, <4 x i1> %0) - %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.013, i32 4 + %4 = bitcast ptr %y.addr.013 to ptr + tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %4, i32 4, <4 x i1> %0) + %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.013, i32 4 %sub = add nsw i32 %i.012, -4 %cmp = icmp sgt i32 %i.012, 4 br i1 %cmp, label %for.body, label %for.cond.cleanup } -define void @vqdmulh(i32* %s1, i32 %N) { +define void @vqdmulh(ptr %s1, i32 %N) { ; CHECK-LABEL: vqdmulh: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -851,14 +851,14 @@ while.body.lr.ph: ; preds = %entry br label %while.body while.body: ; preds = %while.body.lr.ph, %while.body - %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %s1.addr.013 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] %N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012) - %1 = bitcast i32* %s1.addr.013 to <4 x i32>* - %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %1 = bitcast ptr %s1.addr.013 to ptr + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) %3 = tail call <4 x i32> @llvm.arm.mve.qdmulh.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>, <4 x i1> %0, <4 x i32> %2) - tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0) - %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4 + tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %1, i32 4, <4 x i1> %0) + %add.ptr = getelementptr inbounds i32, ptr %s1.addr.013, i32 4 %sub = add nsw i32 %N.addr.012, -4 %cmp = icmp sgt i32 %N.addr.012, 4 br i1 %cmp, label %while.body, label %while.end @@ -867,7 +867,7 @@ while.end: ; preds = %while.body, %entry ret void } -define void @vqrdmulhq(i32* %x, i32* %y, i32 %n) { +define void @vqrdmulhq(ptr %x, ptr %y, i32 %n) { ; CHECK-LABEL: vqrdmulhq: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -894,23 +894,23 @@ for.cond.cleanup: ; preds = %for.body, %entry ret void for.body: ; preds = %entry, %for.body - %x.addr.014 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ] - %y.addr.013 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ] + %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ] + %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ] %i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012) - %1 = bitcast i32* %x.addr.014 to <4 x i32>* - %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) - %add.ptr = getelementptr inbounds i32, i32* %x.addr.014, i32 4 + %1 = bitcast ptr %x.addr.014 to ptr + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %add.ptr = getelementptr inbounds i32, ptr %x.addr.014, i32 4 %3 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>) - %4 = bitcast i32* %y.addr.013 to <4 x i32>* - tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %4, i32 4, <4 x i1> %0) - %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.013, i32 4 + %4 = bitcast ptr %y.addr.013 to ptr + tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %4, i32 4, <4 x i1> %0) + %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.013, i32 4 %sub = add nsw i32 %i.012, -4 %cmp = icmp sgt i32 %i.012, 4 br i1 %cmp, label %for.body, label %for.cond.cleanup } -define void @vqrdmulh(i32* %s1, i32 %N) { +define void @vqrdmulh(ptr %s1, i32 %N) { ; CHECK-LABEL: vqrdmulh: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -937,14 +937,14 @@ while.body.lr.ph: ; preds = %entry br label %while.body while.body: ; preds = %while.body.lr.ph, %while.body - %s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %s1.addr.013 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] %N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012) - %1 = bitcast i32* %s1.addr.013 to <4 x i32>* - %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %1 = bitcast ptr %s1.addr.013 to ptr + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) %3 = tail call <4 x i32> @llvm.arm.mve.qrdmulh.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> <i32 10, i32 10, i32 10, i32 10>, <4 x i1> %0, <4 x i32> %2) - tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0) - %add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4 + tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %1, i32 4, <4 x i1> %0) + %add.ptr = getelementptr inbounds i32, ptr %s1.addr.013, i32 4 %sub = add nsw i32 %N.addr.012, -4 %cmp = icmp sgt i32 %N.addr.012, 4 br i1 %cmp, label %while.body, label %while.end @@ -953,7 +953,7 @@ while.end: ; preds = %while.body, %entry ret void } -define void @vmlaq(i32* %x, i32* %y, i32 %n) { +define void @vmlaq(ptr %x, ptr %y, i32 %n) { ; CHECK-LABEL: vmlaq: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -981,25 +981,25 @@ for.cond.cleanup: ; preds = %for.body, %entry ret void for.body: ; preds = %entry, %for.body - %x.addr.017 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ] - %y.addr.016 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ] + %x.addr.017 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ] + %y.addr.016 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ] %i.015 = phi i32 [ %sub, %for.body ], [ %n, %entry ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.015) - %1 = bitcast i32* %x.addr.017 to <4 x i32>* - %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) - %add.ptr = getelementptr inbounds i32, i32* %x.addr.017, i32 4 - %3 = bitcast i32* %y.addr.016 to <4 x i32>* - %4 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %3, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %1 = bitcast ptr %x.addr.017 to ptr + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %add.ptr = getelementptr inbounds i32, ptr %x.addr.017, i32 4 + %3 = bitcast ptr %y.addr.016 to ptr + %4 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %3, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) %5 = mul <4 x i32> %4, <i32 10, i32 10, i32 10, i32 10> %6 = add <4 x i32> %5, %2 - tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %6, <4 x i32>* %3, i32 4, <4 x i1> %0) - %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.016, i32 4 + tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %6, ptr %3, i32 4, <4 x i1> %0) + %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.016, i32 4 %sub = add nsw i32 %i.015, -4 %cmp = icmp sgt i32 %i.015, 4 br i1 %cmp, label %for.body, label %for.cond.cleanup } -define void @vmlaqp(i32* %x, i32* %y, i32 %n) { +define void @vmlaqp(ptr %x, ptr %y, i32 %n) { ; CHECK-LABEL: vmlaqp: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -1027,24 +1027,24 @@ for.cond.cleanup: ; preds = %for.body, %entry ret void for.body: ; preds = %entry, %for.body - %x.addr.018 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ] - %y.addr.017 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ] + %x.addr.018 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ] + %y.addr.017 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ] %i.016 = phi i32 [ %sub, %for.body ], [ %n, %entry ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.016) - %1 = bitcast i32* %x.addr.018 to <4 x i32>* - %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) - %add.ptr = getelementptr inbounds i32, i32* %x.addr.018, i32 4 - %3 = bitcast i32* %y.addr.017 to <4 x i32>* - %4 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %3, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %1 = bitcast ptr %x.addr.018 to ptr + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %add.ptr = getelementptr inbounds i32, ptr %x.addr.018, i32 4 + %3 = bitcast ptr %y.addr.017 to ptr + %4 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %3, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) %5 = tail call <4 x i32> @llvm.arm.mve.vmla.n.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> %4, i32 10, <4 x i1> %0) - tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %5, <4 x i32>* %3, i32 4, <4 x i1> %0) - %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.017, i32 4 + tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %5, ptr %3, i32 4, <4 x i1> %0) + %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.017, i32 4 %sub = add nsw i32 %i.016, -4 %cmp = icmp sgt i32 %i.016, 4 br i1 %cmp, label %for.body, label %for.cond.cleanup } -define void @vmlasq(i32* %x, i32* %y, i32 %n) { +define void @vmlasq(ptr %x, ptr %y, i32 %n) { ; CHECK-LABEL: vmlasq: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -1072,25 +1072,25 @@ for.cond.cleanup: ; preds = %for.body, %entry ret void for.body: ; preds = %entry, %for.body - %x.addr.017 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ] - %y.addr.016 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ] + %x.addr.017 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ] + %y.addr.016 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ] %i.015 = phi i32 [ %sub, %for.body ], [ %n, %entry ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.015) - %1 = bitcast i32* %x.addr.017 to <4 x i32>* - %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) - %add.ptr = getelementptr inbounds i32, i32* %x.addr.017, i32 4 - %3 = bitcast i32* %y.addr.016 to <4 x i32>* - %4 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %3, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %1 = bitcast ptr %x.addr.017 to ptr + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %add.ptr = getelementptr inbounds i32, ptr %x.addr.017, i32 4 + %3 = bitcast ptr %y.addr.016 to ptr + %4 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %3, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) %5 = mul <4 x i32> %4, %2 %6 = add <4 x i32> %5, <i32 10, i32 10, i32 10, i32 10> - tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %6, <4 x i32>* %3, i32 4, <4 x i1> %0) - %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.016, i32 4 + tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %6, ptr %3, i32 4, <4 x i1> %0) + %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.016, i32 4 %sub = add nsw i32 %i.015, -4 %cmp = icmp sgt i32 %i.015, 4 br i1 %cmp, label %for.body, label %for.cond.cleanup } -define void @vmlasqp(i32* %x, i32* %y, i32 %n) { +define void @vmlasqp(ptr %x, ptr %y, i32 %n) { ; CHECK-LABEL: vmlasqp: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -1118,24 +1118,24 @@ for.cond.cleanup: ; preds = %for.body, %entry ret void for.body: ; preds = %entry, %for.body - %x.addr.018 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ] - %y.addr.017 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ] + %x.addr.018 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ] + %y.addr.017 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ] %i.016 = phi i32 [ %sub, %for.body ], [ %n, %entry ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.016) - %1 = bitcast i32* %x.addr.018 to <4 x i32>* - %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) - %add.ptr = getelementptr inbounds i32, i32* %x.addr.018, i32 4 - %3 = bitcast i32* %y.addr.017 to <4 x i32>* - %4 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %3, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %1 = bitcast ptr %x.addr.018 to ptr + %2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) + %add.ptr = getelementptr inbounds i32, ptr %x.addr.018, i32 4 + %3 = bitcast ptr %y.addr.017 to ptr + %4 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %3, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer) %5 = tail call <4 x i32> @llvm.arm.mve.vmlas.n.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> %4, i32 10, <4 x i1> %0) - tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %5, <4 x i32>* %3, i32 4, <4 x i1> %0) - %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.017, i32 4 + tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %5, ptr %3, i32 4, <4 x i1> %0) + %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.017, i32 4 %sub = add nsw i32 %i.016, -4 %cmp = icmp sgt i32 %i.016, 4 br i1 %cmp, label %for.body, label %for.cond.cleanup } -define void @vaddqf(float* %x, float* %y, i32 %n) { +define void @vaddqf(ptr %x, ptr %y, i32 %n) { ; CHECK-LABEL: vaddqf: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -1162,23 +1162,23 @@ for.cond.cleanup: ; preds = %for.body, %entry ret void for.body: ; preds = %entry, %for.body - %x.addr.014 = phi float* [ %add.ptr, %for.body ], [ %x, %entry ] - %y.addr.013 = phi float* [ %add.ptr1, %for.body ], [ %y, %entry ] + %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ] + %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ] %i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012) - %1 = bitcast float* %x.addr.014 to <4 x float>* - %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) - %add.ptr = getelementptr inbounds float, float* %x.addr.014, i32 4 + %1 = bitcast ptr %x.addr.014 to ptr + %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) + %add.ptr = getelementptr inbounds float, ptr %x.addr.014, i32 4 %3 = fadd fast <4 x float> %2, <float 10.0, float 10.0, float 10.0, float 10.0> - %4 = bitcast float* %y.addr.013 to <4 x float>* - tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %3, <4 x float>* %4, i32 4, <4 x i1> %0) - %add.ptr1 = getelementptr inbounds float, float* %y.addr.013, i32 4 + %4 = bitcast ptr %y.addr.013 to ptr + tail call void @llvm.masked.store.v4f32.p0(<4 x float> %3, ptr %4, i32 4, <4 x i1> %0) + %add.ptr1 = getelementptr inbounds float, ptr %y.addr.013, i32 4 %sub = add nsw i32 %i.012, -4 %cmp = icmp sgt i32 %i.012, 4 br i1 %cmp, label %for.body, label %for.cond.cleanup } -define void @vaddf(float* %s1, i32 %N) { +define void @vaddf(ptr %s1, i32 %N) { ; CHECK-LABEL: vaddf: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -1206,14 +1206,14 @@ while.body.lr.ph: ; preds = %entry br label %while.body while.body: ; preds = %while.body.lr.ph, %while.body - %s1.addr.013 = phi float* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %s1.addr.013 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] %N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012) - %1 = bitcast float* %s1.addr.013 to <4 x float>* - %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) + %1 = bitcast ptr %s1.addr.013 to ptr + %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) %3 = tail call fast <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %2, <4 x float> <float 10.0, float 10.0, float 10.0, float 10.0>, <4 x i1> %0, <4 x float> %2) - tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %3, <4 x float>* %1, i32 4, <4 x i1> %0) - %add.ptr = getelementptr inbounds float, float* %s1.addr.013, i32 4 + tail call void @llvm.masked.store.v4f32.p0(<4 x float> %3, ptr %1, i32 4, <4 x i1> %0) + %add.ptr = getelementptr inbounds float, ptr %s1.addr.013, i32 4 %sub = add nsw i32 %N.addr.012, -4 %cmp = icmp sgt i32 %N.addr.012, 4 br i1 %cmp, label %while.body, label %while.end @@ -1222,7 +1222,7 @@ while.end: ; preds = %while.body, %entry ret void } -define void @vsubqf(float* %x, float* %y, i32 %n) { +define void @vsubqf(ptr %x, ptr %y, i32 %n) { ; CHECK-LABEL: vsubqf: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -1249,23 +1249,23 @@ for.cond.cleanup: ; preds = %for.body, %entry ret void for.body: ; preds = %entry, %for.body - %x.addr.014 = phi float* [ %add.ptr, %for.body ], [ %x, %entry ] - %y.addr.013 = phi float* [ %add.ptr1, %for.body ], [ %y, %entry ] + %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ] + %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ] %i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012) - %1 = bitcast float* %x.addr.014 to <4 x float>* - %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) - %add.ptr = getelementptr inbounds float, float* %x.addr.014, i32 4 + %1 = bitcast ptr %x.addr.014 to ptr + %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) + %add.ptr = getelementptr inbounds float, ptr %x.addr.014, i32 4 %3 = fsub fast <4 x float> %2, <float 10.0, float 10.0, float 10.0, float 10.0> - %4 = bitcast float* %y.addr.013 to <4 x float>* - tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %3, <4 x float>* %4, i32 4, <4 x i1> %0) - %add.ptr1 = getelementptr inbounds float, float* %y.addr.013, i32 4 + %4 = bitcast ptr %y.addr.013 to ptr + tail call void @llvm.masked.store.v4f32.p0(<4 x float> %3, ptr %4, i32 4, <4 x i1> %0) + %add.ptr1 = getelementptr inbounds float, ptr %y.addr.013, i32 4 %sub = add nsw i32 %i.012, -4 %cmp = icmp sgt i32 %i.012, 4 br i1 %cmp, label %for.body, label %for.cond.cleanup } -define void @vsubf(float* %s1, i32 %N) { +define void @vsubf(ptr %s1, i32 %N) { ; CHECK-LABEL: vsubf: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -1293,14 +1293,14 @@ while.body.lr.ph: ; preds = %entry br label %while.body while.body: ; preds = %while.body.lr.ph, %while.body - %s1.addr.013 = phi float* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %s1.addr.013 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] %N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012) - %1 = bitcast float* %s1.addr.013 to <4 x float>* - %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) + %1 = bitcast ptr %s1.addr.013 to ptr + %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) %3 = tail call fast <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> %2, <4 x float> <float 10.0, float 10.0, float 10.0, float 10.0>, <4 x i1> %0, <4 x float> %2) - tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %3, <4 x float>* %1, i32 4, <4 x i1> %0) - %add.ptr = getelementptr inbounds float, float* %s1.addr.013, i32 4 + tail call void @llvm.masked.store.v4f32.p0(<4 x float> %3, ptr %1, i32 4, <4 x i1> %0) + %add.ptr = getelementptr inbounds float, ptr %s1.addr.013, i32 4 %sub = add nsw i32 %N.addr.012, -4 %cmp = icmp sgt i32 %N.addr.012, 4 br i1 %cmp, label %while.body, label %while.end @@ -1309,7 +1309,7 @@ while.end: ; preds = %while.body, %entry ret void } -define void @vmulqf(float* %x, float* %y, i32 %n) { +define void @vmulqf(ptr %x, ptr %y, i32 %n) { ; CHECK-LABEL: vmulqf: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -1336,23 +1336,23 @@ for.cond.cleanup: ; preds = %for.body, %entry ret void for.body: ; preds = %entry, %for.body - %x.addr.014 = phi float* [ %add.ptr, %for.body ], [ %x, %entry ] - %y.addr.013 = phi float* [ %add.ptr1, %for.body ], [ %y, %entry ] + %x.addr.014 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ] + %y.addr.013 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ] %i.012 = phi i32 [ %sub, %for.body ], [ %n, %entry ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.012) - %1 = bitcast float* %x.addr.014 to <4 x float>* - %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) - %add.ptr = getelementptr inbounds float, float* %x.addr.014, i32 4 + %1 = bitcast ptr %x.addr.014 to ptr + %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) + %add.ptr = getelementptr inbounds float, ptr %x.addr.014, i32 4 %3 = fmul fast <4 x float> %2, <float 10.0, float 10.0, float 10.0, float 10.0> - %4 = bitcast float* %y.addr.013 to <4 x float>* - tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %3, <4 x float>* %4, i32 4, <4 x i1> %0) - %add.ptr1 = getelementptr inbounds float, float* %y.addr.013, i32 4 + %4 = bitcast ptr %y.addr.013 to ptr + tail call void @llvm.masked.store.v4f32.p0(<4 x float> %3, ptr %4, i32 4, <4 x i1> %0) + %add.ptr1 = getelementptr inbounds float, ptr %y.addr.013, i32 4 %sub = add nsw i32 %i.012, -4 %cmp = icmp sgt i32 %i.012, 4 br i1 %cmp, label %for.body, label %for.cond.cleanup } -define void @vmulf(float* %s1, i32 %N) { +define void @vmulf(ptr %s1, i32 %N) { ; CHECK-LABEL: vmulf: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -1380,14 +1380,14 @@ while.body.lr.ph: ; preds = %entry br label %while.body while.body: ; preds = %while.body.lr.ph, %while.body - %s1.addr.013 = phi float* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %s1.addr.013 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] %N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012) - %1 = bitcast float* %s1.addr.013 to <4 x float>* - %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) + %1 = bitcast ptr %s1.addr.013 to ptr + %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) %3 = tail call fast <4 x float> @llvm.arm.mve.mul.predicated.v4f32.v4i1(<4 x float> %2, <4 x float> <float 10.0, float 10.0, float 10.0, float 10.0>, <4 x i1> %0, <4 x float> %2) - tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %3, <4 x float>* %1, i32 4, <4 x i1> %0) - %add.ptr = getelementptr inbounds float, float* %s1.addr.013, i32 4 + tail call void @llvm.masked.store.v4f32.p0(<4 x float> %3, ptr %1, i32 4, <4 x i1> %0) + %add.ptr = getelementptr inbounds float, ptr %s1.addr.013, i32 4 %sub = add nsw i32 %N.addr.012, -4 %cmp = icmp sgt i32 %N.addr.012, 4 br i1 %cmp, label %while.body, label %while.end @@ -1396,7 +1396,7 @@ while.end: ; preds = %while.body, %entry ret void } -define void @vfmaq(float* %x, float* %y, i32 %n) { +define void @vfmaq(ptr %x, ptr %y, i32 %n) { ; CHECK-LABEL: vfmaq: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -1424,24 +1424,24 @@ for.cond.cleanup: ; preds = %for.body, %entry ret void for.body: ; preds = %entry, %for.body - %x.addr.017 = phi float* [ %add.ptr, %for.body ], [ %x, %entry ] - %y.addr.016 = phi float* [ %add.ptr1, %for.body ], [ %y, %entry ] + %x.addr.017 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ] + %y.addr.016 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ] %i.015 = phi i32 [ %sub, %for.body ], [ %n, %entry ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.015) - %1 = bitcast float* %x.addr.017 to <4 x float>* - %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) - %add.ptr = getelementptr inbounds float, float* %x.addr.017, i32 4 - %3 = bitcast float* %y.addr.016 to <4 x float>* - %4 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %3, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) + %1 = bitcast ptr %x.addr.017 to ptr + %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) + %add.ptr = getelementptr inbounds float, ptr %x.addr.017, i32 4 + %3 = bitcast ptr %y.addr.016 to ptr + %4 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %3, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) %5 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %4, <4 x float> <float 10.0, float 10.0, float 10.0, float 10.0>, <4 x float> %2) - tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %5, <4 x float>* %3, i32 4, <4 x i1> %0) - %add.ptr1 = getelementptr inbounds float, float* %y.addr.016, i32 4 + tail call void @llvm.masked.store.v4f32.p0(<4 x float> %5, ptr %3, i32 4, <4 x i1> %0) + %add.ptr1 = getelementptr inbounds float, ptr %y.addr.016, i32 4 %sub = add nsw i32 %i.015, -4 %cmp = icmp sgt i32 %i.015, 4 br i1 %cmp, label %for.body, label %for.cond.cleanup } -define void @vfma(float* %s1, float* %s2, i32 %N) { +define void @vfma(ptr %s1, ptr %s2, i32 %N) { ; CHECK-LABEL: vfma: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -1466,19 +1466,19 @@ entry: br i1 %cmp12, label %while.body.lr.ph, label %while.end while.body.lr.ph: ; preds = %entry - %0 = bitcast float* %s2 to <4 x float>* + %0 = bitcast ptr %s2 to ptr br label %while.body while.body: ; preds = %while.body.lr.ph, %while.body - %s1.addr.014 = phi float* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %s1.addr.014 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] %N.addr.013 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] %1 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.013) - %2 = bitcast float* %s1.addr.014 to <4 x float>* - %3 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> zeroinitializer) - %4 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %1, <4 x float> zeroinitializer) + %2 = bitcast ptr %s1.addr.014 to ptr + %3 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %2, i32 4, <4 x i1> %1, <4 x float> zeroinitializer) + %4 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %0, i32 4, <4 x i1> %1, <4 x float> zeroinitializer) %5 = tail call fast <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> %4, <4 x float> <float 10.0, float 10.0, float 10.0, float 10.0>, <4 x float> %3, <4 x i1> %1) - tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %5, <4 x float>* %2, i32 4, <4 x i1> %1) - %add.ptr = getelementptr inbounds float, float* %s1.addr.014, i32 4 + tail call void @llvm.masked.store.v4f32.p0(<4 x float> %5, ptr %2, i32 4, <4 x i1> %1) + %add.ptr = getelementptr inbounds float, ptr %s1.addr.014, i32 4 %sub = add nsw i32 %N.addr.013, -4 %cmp = icmp sgt i32 %N.addr.013, 4 br i1 %cmp, label %while.body, label %while.end @@ -1487,7 +1487,7 @@ while.end: ; preds = %while.body, %entry ret void } -define void @vfmasq(float* %x, float* %y, i32 %n) { +define void @vfmasq(ptr %x, ptr %y, i32 %n) { ; CHECK-LABEL: vfmasq: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -1516,24 +1516,24 @@ for.cond.cleanup: ; preds = %for.body, %entry ret void for.body: ; preds = %entry, %for.body - %x.addr.017 = phi float* [ %add.ptr, %for.body ], [ %x, %entry ] - %y.addr.016 = phi float* [ %add.ptr1, %for.body ], [ %y, %entry ] + %x.addr.017 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ] + %y.addr.016 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ] %i.015 = phi i32 [ %sub, %for.body ], [ %n, %entry ] %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i.015) - %1 = bitcast float* %x.addr.017 to <4 x float>* - %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) - %add.ptr = getelementptr inbounds float, float* %x.addr.017, i32 4 - %3 = bitcast float* %y.addr.016 to <4 x float>* - %4 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %3, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) + %1 = bitcast ptr %x.addr.017 to ptr + %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) + %add.ptr = getelementptr inbounds float, ptr %x.addr.017, i32 4 + %3 = bitcast ptr %y.addr.016 to ptr + %4 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %3, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) %5 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %2, <4 x float> %4, <4 x float> <float 10.0, float 10.0, float 10.0, float 10.0>) - tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %5, <4 x float>* %3, i32 4, <4 x i1> %0) - %add.ptr1 = getelementptr inbounds float, float* %y.addr.016, i32 4 + tail call void @llvm.masked.store.v4f32.p0(<4 x float> %5, ptr %3, i32 4, <4 x i1> %0) + %add.ptr1 = getelementptr inbounds float, ptr %y.addr.016, i32 4 %sub = add nsw i32 %i.015, -4 %cmp = icmp sgt i32 %i.015, 4 br i1 %cmp, label %for.body, label %for.cond.cleanup } -define void @vfmas(float* %s1, float* %s2, i32 %N) { +define void @vfmas(ptr %s1, ptr %s2, i32 %N) { ; CHECK-LABEL: vfmas: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -1559,19 +1559,19 @@ entry: br i1 %cmp12, label %while.body.lr.ph, label %while.end while.body.lr.ph: ; preds = %entry - %0 = bitcast float* %s2 to <4 x float>* + %0 = bitcast ptr %s2 to ptr br label %while.body while.body: ; preds = %while.body.lr.ph, %while.body - %s1.addr.014 = phi float* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %s1.addr.014 = phi ptr [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ] %N.addr.013 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ] %1 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.013) - %2 = bitcast float* %s1.addr.014 to <4 x float>* - %3 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> zeroinitializer) - %4 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %1, <4 x float> zeroinitializer) + %2 = bitcast ptr %s1.addr.014 to ptr + %3 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %2, i32 4, <4 x i1> %1, <4 x float> zeroinitializer) + %4 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %0, i32 4, <4 x i1> %1, <4 x float> zeroinitializer) %5 = tail call fast <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> %3, <4 x float> %4, <4 x float> <float 10.0, float 10.0, float 10.0, float 10.0>, <4 x i1> %1) - tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %5, <4 x float>* %2, i32 4, <4 x i1> %1) - %add.ptr = getelementptr inbounds float, float* %s1.addr.014, i32 4 + tail call void @llvm.masked.store.v4f32.p0(<4 x float> %5, ptr %2, i32 4, <4 x i1> %1) + %add.ptr = getelementptr inbounds float, ptr %s1.addr.014, i32 4 %sub = add nsw i32 %N.addr.013, -4 %cmp = icmp sgt i32 %N.addr.013, 4 br i1 %cmp, label %while.body, label %while.end @@ -1580,7 +1580,7 @@ while.end: ; preds = %while.body, %entry ret void } -define void @rgbconvert(i32* noalias %pwSourceBase, i16 signext %iSourceStride, i16* noalias %phwTargetBase, i16 signext %iTargetStride, i16 %iHeight, i16 %iWidth) { +define void @rgbconvert(ptr noalias %pwSourceBase, i16 signext %iSourceStride, ptr noalias %phwTargetBase, i16 signext %iTargetStride, i16 %iHeight, i16 %iWidth) { ; CHECK-LABEL: rgbconvert: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -1655,18 +1655,18 @@ for.cond.cleanup: ; preds = %do.end, %entry ret void for.body: ; preds = %for.body.lr.ph, %do.end - %pwSourceBase.addr.040 = phi i32* [ %pwSourceBase, %for.body.lr.ph ], [ %add.ptr10, %do.end ] - %phwTargetBase.addr.039 = phi i16* [ %phwTargetBase, %for.body.lr.ph ], [ %add.ptr12, %do.end ] + %pwSourceBase.addr.040 = phi ptr [ %pwSourceBase, %for.body.lr.ph ], [ %add.ptr10, %do.end ] + %phwTargetBase.addr.039 = phi ptr [ %phwTargetBase, %for.body.lr.ph ], [ %add.ptr12, %do.end ] %y.038 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %do.end ] br label %do.body do.body: ; preds = %do.body, %for.body - %pTarget.0 = phi i16* [ %phwTargetBase.addr.039, %for.body ], [ %add.ptr6, %do.body ] - %pSource.0 = phi i32* [ %pwSourceBase.addr.040, %for.body ], [ %add.ptr, %do.body ] + %pTarget.0 = phi ptr [ %phwTargetBase.addr.039, %for.body ], [ %add.ptr6, %do.body ] + %pSource.0 = phi ptr [ %pwSourceBase.addr.040, %for.body ], [ %add.ptr, %do.body ] %blkCnt.0 = phi i32 [ %conv2, %for.body ], [ %sub, %do.body ] %l2 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0) - %l3 = bitcast i32* %pSource.0 to <4 x i32>* - %l4 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %l3, i32 4, <4 x i1> %l2, <4 x i32> zeroinitializer) + %l3 = bitcast ptr %pSource.0 to ptr + %l4 = tail call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %l3, i32 4, <4 x i1> %l2, <4 x i32> zeroinitializer) %l5 = tail call <4 x i32> @llvm.arm.mve.qdmulh.predicated.v4i32.v4i1(<4 x i32> %l4, <4 x i32> <i32 268435456, i32 268435456, i32 268435456, i32 268435456>, <4 x i1> %l2, <4 x i32> undef) %and = and <4 x i32> %l5, <i32 31, i32 31, i32 31, i32 31> %l6 = tail call <4 x i32> @llvm.arm.mve.qdmulh.predicated.v4i32.v4i1(<4 x i32> %l4, <4 x i32> <i32 67108864, i32 67108864, i32 67108864, i32 67108864>, <4 x i1> %l2, <4 x i32> undef) @@ -1676,29 +1676,29 @@ do.body: ; preds = %do.body, %for.body %or = or <4 x i32> %and3, %and %or5 = or <4 x i32> %or, %and4 %l8 = trunc <4 x i32> %or5 to <4 x i16> - %l9 = bitcast i16* %pTarget.0 to <4 x i16>* - tail call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %l8, <4 x i16>* %l9, i32 2, <4 x i1> %l2) - %add.ptr = getelementptr inbounds i32, i32* %pSource.0, i32 4 - %add.ptr6 = getelementptr inbounds i16, i16* %pTarget.0, i32 4 + %l9 = bitcast ptr %pTarget.0 to ptr + tail call void @llvm.masked.store.v4i16.p0(<4 x i16> %l8, ptr %l9, i32 2, <4 x i1> %l2) + %add.ptr = getelementptr inbounds i32, ptr %pSource.0, i32 4 + %add.ptr6 = getelementptr inbounds i16, ptr %pTarget.0, i32 4 %sub = add nsw i32 %blkCnt.0, -4 %cmp7 = icmp sgt i32 %blkCnt.0, 4 br i1 %cmp7, label %do.body, label %do.end do.end: ; preds = %do.body - %add.ptr10 = getelementptr inbounds i32, i32* %pwSourceBase.addr.040, i32 %conv9 - %add.ptr12 = getelementptr inbounds i16, i16* %phwTargetBase.addr.039, i32 %conv11 + %add.ptr10 = getelementptr inbounds i32, ptr %pwSourceBase.addr.040, i32 %conv9 + %add.ptr12 = getelementptr inbounds i16, ptr %phwTargetBase.addr.039, i32 %conv11 %inc = add nuw nsw i32 %y.038, 1 %exitcond.not = icmp eq i32 %inc, %conv br i1 %exitcond.not, label %for.cond.cleanup, label %for.body } declare <4 x i1> @llvm.arm.mve.vctp32(i32) -declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) -declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) -declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) -declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) -declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32 immarg, <4 x i1>) -declare void @llvm.masked.store.v4i16.p0v4i16(<4 x i16>, <4 x i16>*, i32 immarg, <4 x i1>) #3 +declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) +declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) +declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>) +declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) +declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32 immarg, <4 x i1>) +declare void @llvm.masked.store.v4i16.p0(<4 x i16>, ptr, i32 immarg, <4 x i1>) #3 declare <4 x i32> @llvm.arm.mve.add.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) declare <4 x i32> @llvm.arm.mve.sub.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-add-combine.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-add-combine.ll index 8a8b6c5..37ef4a9 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-add-combine.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-add-combine.ll @@ -13,7 +13,7 @@ entry: %reduce1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %arg2) %reduce2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %arg3) %add1 = add i32 %reduce1, %reduce2 - store i32 %add1, i32* %ptr, align 4 + store i32 %add1, ptr %ptr, align 4 %add2 = add i32 %add1, %arg1 ret i32 %add2 } @@ -30,7 +30,7 @@ entry: %reduce1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %arg2) %reduce2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %arg3) %add1 = add i32 %reduce1, %reduce2 - store i32 %add1, i32* %ptr, align 4 + store i32 %add1, ptr %ptr, align 4 %add2 = add i32 %arg1, %add1 ret i32 %add2 } @@ -48,7 +48,7 @@ entry: %reduce1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %arg3) %reduce2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %arg4) %add1 = add i32 %arg1, %reduce1 - store i32 %add1, i32* %ptr, align 4 + store i32 %add1, ptr %ptr, align 4 %add2 = add i32 %arg2, %reduce2 %add3 = add i32 %add1, %add2 ret i32 %add3 @@ -66,13 +66,13 @@ define arm_aapcs_vfpcc i32 @test4(ptr %ptr, i32 %arg1, ptr %arg2) { ; CHECK-NEXT: mov r0, r12 ; CHECK-NEXT: bx lr entry: - %load1 = load <4 x i32>, <4 x i32>* %arg2, align 4 - %gep = getelementptr inbounds i32, i32* %arg2, i32 1 - %load2 = load <4 x i32>, <4 x i32>* %gep, align 4 + %load1 = load <4 x i32>, ptr %arg2, align 4 + %gep = getelementptr inbounds i32, ptr %arg2, i32 1 + %load2 = load <4 x i32>, ptr %gep, align 4 %reduce1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %load1) %reduce2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %load2) %add1 = add i32 %arg1, %reduce1 - store i32 %add1, i32* %ptr, align 4 + store i32 %add1, ptr %ptr, align 4 %add2 = add i32 %add1, %reduce2 ret i32 %add2 } @@ -89,13 +89,13 @@ define arm_aapcs_vfpcc i32 @test5(ptr %ptr, i32 %arg1, ptr %arg2) { ; CHECK-NEXT: mov r0, r12 ; CHECK-NEXT: bx lr entry: - %load1 = load <4 x i32>, <4 x i32>* %arg2, align 4 - %gep = getelementptr inbounds i32, i32* %arg2, i32 1 - %load2 = load <4 x i32>, <4 x i32>* %gep, align 4 + %load1 = load <4 x i32>, ptr %arg2, align 4 + %gep = getelementptr inbounds i32, ptr %arg2, i32 1 + %load2 = load <4 x i32>, ptr %gep, align 4 %reduce1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %load1) %reduce2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %load2) %add1 = add i32 %arg1, %reduce2 - store i32 %add1, i32* %ptr, align 4 + store i32 %add1, ptr %ptr, align 4 %add2 = add i32 %add1, %reduce1 ret i32 %add2 } diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll index 6ab1a93..c7661a1 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -tail-predication=enabled -verify-machineinstrs %s -o - | FileCheck %s -define i32 @add_i32(i32* nocapture readonly %x, i32 %n) { +define i32 @add_i32(ptr nocapture readonly %x, i32 %n) { ; CHECK-LABEL: add_i32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -60,9 +60,9 @@ vector.ph: ; preds = %for.body.preheader vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi i32 [ 0, %vector.ph ], [ %3, %vector.body ] - %0 = getelementptr inbounds i32, i32* %x, i32 %index - %1 = bitcast i32* %0 to <4 x i32>* - %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %0 = getelementptr inbounds i32, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.load = load <4 x i32>, ptr %1, align 4 %2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %wide.load) %3 = add i32 %2, %vec.phi %index.next = add i32 %index, 4 @@ -81,8 +81,8 @@ for.body.preheader1: ; preds = %middle.block, %for. for.body: ; preds = %for.body.preheader1, %for.body %i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ] %r.07 = phi i32 [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ] - %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08 - %5 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08 + %5 = load i32, ptr %arrayidx, align 4 %add = add nsw i32 %5, %r.07 %inc = add nuw nsw i32 %i.08, 1 %exitcond = icmp eq i32 %inc, %n @@ -93,7 +93,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b ret i32 %r.0.lcssa } -define i32 @mul_i32(i32* nocapture readonly %x, i32 %n) { +define i32 @mul_i32(ptr nocapture readonly %x, i32 %n) { ; CHECK-LABEL: mul_i32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, lr} @@ -152,9 +152,9 @@ vector.ph: ; preds = %for.body.preheader vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ <i32 1, i32 1, i32 1, i32 1>, %vector.ph ], [ %2, %vector.body ] - %0 = getelementptr inbounds i32, i32* %x, i32 %index - %1 = bitcast i32* %0 to <4 x i32>* - %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %0 = getelementptr inbounds i32, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.load = load <4 x i32>, ptr %1, align 4 %2 = mul <4 x i32> %wide.load, %vec.phi %index.next = add i32 %index, 4 %3 = icmp eq i32 %index.next, %n.vec @@ -173,8 +173,8 @@ for.body.preheader1: ; preds = %middle.block, %for. for.body: ; preds = %for.body.preheader1, %for.body %i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ] %r.07 = phi i32 [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ] - %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08 - %5 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08 + %5 = load i32, ptr %arrayidx, align 4 %add = mul nsw i32 %5, %r.07 %inc = add nuw nsw i32 %i.08, 1 %exitcond = icmp eq i32 %inc, %n @@ -185,7 +185,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b ret i32 %r.0.lcssa } -define i32 @and_i32(i32* nocapture readonly %x, i32 %n) { +define i32 @and_i32(ptr nocapture readonly %x, i32 %n) { ; CHECK-LABEL: and_i32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, lr} @@ -249,9 +249,9 @@ vector.ph: ; preds = %for.body.preheader vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ <i32 -1, i32 -1, i32 -1, i32 -1>, %vector.ph ], [ %2, %vector.body ] - %0 = getelementptr inbounds i32, i32* %x, i32 %index - %1 = bitcast i32* %0 to <4 x i32>* - %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %0 = getelementptr inbounds i32, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.load = load <4 x i32>, ptr %1, align 4 %2 = and <4 x i32> %wide.load, %vec.phi %index.next = add i32 %index, 4 %3 = icmp eq i32 %index.next, %n.vec @@ -270,8 +270,8 @@ for.body.preheader1: ; preds = %middle.block, %for. for.body: ; preds = %for.body.preheader1, %for.body %i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ] %r.07 = phi i32 [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ] - %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08 - %5 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08 + %5 = load i32, ptr %arrayidx, align 4 %add = and i32 %5, %r.07 %inc = add nuw nsw i32 %i.08, 1 %exitcond = icmp eq i32 %inc, %n @@ -282,7 +282,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b ret i32 %r.0.lcssa } -define i32 @or_i32(i32* nocapture readonly %x, i32 %n) { +define i32 @or_i32(ptr nocapture readonly %x, i32 %n) { ; CHECK-LABEL: or_i32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, lr} @@ -346,9 +346,9 @@ vector.ph: ; preds = %for.body.preheader vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %2, %vector.body ] - %0 = getelementptr inbounds i32, i32* %x, i32 %index - %1 = bitcast i32* %0 to <4 x i32>* - %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %0 = getelementptr inbounds i32, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.load = load <4 x i32>, ptr %1, align 4 %2 = or <4 x i32> %wide.load, %vec.phi %index.next = add i32 %index, 4 %3 = icmp eq i32 %index.next, %n.vec @@ -367,8 +367,8 @@ for.body.preheader1: ; preds = %middle.block, %for. for.body: ; preds = %for.body.preheader1, %for.body %i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ] %r.07 = phi i32 [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ] - %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08 - %5 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08 + %5 = load i32, ptr %arrayidx, align 4 %add = or i32 %5, %r.07 %inc = add nuw nsw i32 %i.08, 1 %exitcond = icmp eq i32 %inc, %n @@ -379,7 +379,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b ret i32 %r.0.lcssa } -define i32 @xor_i32(i32* nocapture readonly %x, i32 %n) { +define i32 @xor_i32(ptr nocapture readonly %x, i32 %n) { ; CHECK-LABEL: xor_i32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, lr} @@ -443,9 +443,9 @@ vector.ph: ; preds = %for.body.preheader vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %2, %vector.body ] - %0 = getelementptr inbounds i32, i32* %x, i32 %index - %1 = bitcast i32* %0 to <4 x i32>* - %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %0 = getelementptr inbounds i32, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.load = load <4 x i32>, ptr %1, align 4 %2 = xor <4 x i32> %wide.load, %vec.phi %index.next = add i32 %index, 4 %3 = icmp eq i32 %index.next, %n.vec @@ -464,8 +464,8 @@ for.body.preheader1: ; preds = %middle.block, %for. for.body: ; preds = %for.body.preheader1, %for.body %i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ] %r.07 = phi i32 [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ] - %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08 - %5 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08 + %5 = load i32, ptr %arrayidx, align 4 %add = xor i32 %5, %r.07 %inc = add nuw nsw i32 %i.08, 1 %exitcond = icmp eq i32 %inc, %n @@ -476,7 +476,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b ret i32 %r.0.lcssa } -define float @fadd_f32(float* nocapture readonly %x, i32 %n) { +define float @fadd_f32(ptr nocapture readonly %x, i32 %n) { ; CHECK-LABEL: fadd_f32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -542,9 +542,9 @@ vector.ph: ; preds = %for.body.preheader vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x float> [ zeroinitializer, %vector.ph ], [ %2, %vector.body ] - %0 = getelementptr inbounds float, float* %x, i32 %index - %1 = bitcast float* %0 to <4 x float>* - %wide.load = load <4 x float>, <4 x float>* %1, align 4 + %0 = getelementptr inbounds float, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.load = load <4 x float>, ptr %1, align 4 %2 = fadd fast <4 x float> %wide.load, %vec.phi %index.next = add i32 %index, 4 %3 = icmp eq i32 %index.next, %n.vec @@ -563,8 +563,8 @@ for.body.preheader1: ; preds = %middle.block, %for. for.body: ; preds = %for.body.preheader1, %for.body %i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ] %r.07 = phi float [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ] - %arrayidx = getelementptr inbounds float, float* %x, i32 %i.08 - %5 = load float, float* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.08 + %5 = load float, ptr %arrayidx, align 4 %add = fadd fast float %5, %r.07 %inc = add nuw nsw i32 %i.08, 1 %exitcond = icmp eq i32 %inc, %n @@ -575,7 +575,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b ret float %r.0.lcssa } -define float @fmul_f32(float* nocapture readonly %x, i32 %n) { +define float @fmul_f32(ptr nocapture readonly %x, i32 %n) { ; CHECK-LABEL: fmul_f32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -637,9 +637,9 @@ vector.ph: ; preds = %for.body.preheader vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x float> [ <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %vector.ph ], [ %2, %vector.body ] - %0 = getelementptr inbounds float, float* %x, i32 %index - %1 = bitcast float* %0 to <4 x float>* - %wide.load = load <4 x float>, <4 x float>* %1, align 4 + %0 = getelementptr inbounds float, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.load = load <4 x float>, ptr %1, align 4 %2 = fmul fast <4 x float> %wide.load, %vec.phi %index.next = add i32 %index, 4 %3 = icmp eq i32 %index.next, %n.vec @@ -658,8 +658,8 @@ for.body.preheader1: ; preds = %middle.block, %for. for.body: ; preds = %for.body.preheader1, %for.body %i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ] %r.07 = phi float [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ] - %arrayidx = getelementptr inbounds float, float* %x, i32 %i.08 - %5 = load float, float* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.08 + %5 = load float, ptr %arrayidx, align 4 %add = fmul fast float %5, %r.07 %inc = add nuw nsw i32 %i.08, 1 %exitcond = icmp eq i32 %inc, %n @@ -670,7 +670,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b ret float %r.0.lcssa } -define i32 @smin_i32(i32* nocapture readonly %x, i32 %n) { +define i32 @smin_i32(ptr nocapture readonly %x, i32 %n) { ; CHECK-LABEL: smin_i32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -732,9 +732,9 @@ vector.ph: ; preds = %for.body.preheader vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>, %vector.ph ], [ %3, %vector.body ] - %0 = getelementptr inbounds i32, i32* %x, i32 %index - %1 = bitcast i32* %0 to <4 x i32>* - %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %0 = getelementptr inbounds i32, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.load = load <4 x i32>, ptr %1, align 4 %2 = icmp slt <4 x i32> %vec.phi, %wide.load %3 = select <4 x i1> %2, <4 x i32> %vec.phi, <4 x i32> %wide.load %index.next = add i32 %index, 4 @@ -754,8 +754,8 @@ for.body.preheader1: ; preds = %middle.block, %for. for.body: ; preds = %for.body.preheader1, %for.body %i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ] %r.07 = phi i32 [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ] - %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08 - %6 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08 + %6 = load i32, ptr %arrayidx, align 4 %c = icmp slt i32 %r.07, %6 %add = select i1 %c, i32 %r.07, i32 %6 %inc = add nuw nsw i32 %i.08, 1 @@ -767,7 +767,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b ret i32 %r.0.lcssa } -define i32 @smin_i32_inloop(i32* nocapture readonly %x, i32 %n) { +define i32 @smin_i32_inloop(ptr nocapture readonly %x, i32 %n) { ; CHECK-LABEL: smin_i32_inloop: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -827,9 +827,9 @@ vector.ph: ; preds = %for.body.preheader vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi i32 [ 2147483647, %vector.ph ], [ %3, %vector.body ] - %0 = getelementptr inbounds i32, i32* %x, i32 %index - %1 = bitcast i32* %0 to <4 x i32>* - %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %0 = getelementptr inbounds i32, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.load = load <4 x i32>, ptr %1, align 4 %l5 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %wide.load) %2 = icmp slt i32 %vec.phi, %l5 %3 = select i1 %2, i32 %vec.phi, i32 %l5 @@ -850,8 +850,8 @@ for.body.preheader1: ; preds = %middle.block, %for. for.body: ; preds = %for.body.preheader1, %for.body %i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ] %r.07 = phi i32 [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ] - %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08 - %6 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08 + %6 = load i32, ptr %arrayidx, align 4 %c = icmp slt i32 %r.07, %6 %add = select i1 %c, i32 %r.07, i32 %6 %inc = add nuw nsw i32 %i.08, 1 @@ -863,7 +863,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b ret i32 %r.0.lcssa } -define i32 @smax_i32(i32* nocapture readonly %x, i32 %n) { +define i32 @smax_i32(ptr nocapture readonly %x, i32 %n) { ; CHECK-LABEL: smax_i32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -925,9 +925,9 @@ vector.ph: ; preds = %for.body.preheader vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, %vector.ph ], [ %3, %vector.body ] - %0 = getelementptr inbounds i32, i32* %x, i32 %index - %1 = bitcast i32* %0 to <4 x i32>* - %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %0 = getelementptr inbounds i32, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.load = load <4 x i32>, ptr %1, align 4 %2 = icmp sgt <4 x i32> %vec.phi, %wide.load %3 = select <4 x i1> %2, <4 x i32> %vec.phi, <4 x i32> %wide.load %index.next = add i32 %index, 4 @@ -947,8 +947,8 @@ for.body.preheader1: ; preds = %middle.block, %for. for.body: ; preds = %for.body.preheader1, %for.body %i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ] %r.07 = phi i32 [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ] - %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08 - %6 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08 + %6 = load i32, ptr %arrayidx, align 4 %c = icmp sgt i32 %r.07, %6 %add = select i1 %c, i32 %r.07, i32 %6 %inc = add nuw nsw i32 %i.08, 1 @@ -960,7 +960,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b ret i32 %r.0.lcssa } -define i32 @smax_i32_inloop(i32* nocapture readonly %x, i32 %n) { +define i32 @smax_i32_inloop(ptr nocapture readonly %x, i32 %n) { ; CHECK-LABEL: smax_i32_inloop: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -1020,9 +1020,9 @@ vector.ph: ; preds = %for.body.preheader vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi i32 [ -2147483648, %vector.ph ], [ %3, %vector.body ] - %0 = getelementptr inbounds i32, i32* %x, i32 %index - %1 = bitcast i32* %0 to <4 x i32>* - %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %0 = getelementptr inbounds i32, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.load = load <4 x i32>, ptr %1, align 4 %l5 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %wide.load) %2 = icmp sgt i32 %vec.phi, %l5 %3 = select i1 %2, i32 %vec.phi, i32 %l5 @@ -1043,8 +1043,8 @@ for.body.preheader1: ; preds = %middle.block, %for. for.body: ; preds = %for.body.preheader1, %for.body %i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ] %r.07 = phi i32 [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ] - %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08 - %6 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08 + %6 = load i32, ptr %arrayidx, align 4 %c = icmp sgt i32 %r.07, %6 %add = select i1 %c, i32 %r.07, i32 %6 %inc = add nuw nsw i32 %i.08, 1 @@ -1056,7 +1056,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b ret i32 %r.0.lcssa } -define i32 @umin_i32(i32* nocapture readonly %x, i32 %n) { +define i32 @umin_i32(ptr nocapture readonly %x, i32 %n) { ; CHECK-LABEL: umin_i32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -1118,9 +1118,9 @@ vector.ph: ; preds = %for.body.preheader vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ <i32 -1, i32 -1, i32 -1, i32 -1>, %vector.ph ], [ %3, %vector.body ] - %0 = getelementptr inbounds i32, i32* %x, i32 %index - %1 = bitcast i32* %0 to <4 x i32>* - %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %0 = getelementptr inbounds i32, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.load = load <4 x i32>, ptr %1, align 4 %2 = icmp ult <4 x i32> %vec.phi, %wide.load %3 = select <4 x i1> %2, <4 x i32> %vec.phi, <4 x i32> %wide.load %index.next = add i32 %index, 4 @@ -1140,8 +1140,8 @@ for.body.preheader1: ; preds = %middle.block, %for. for.body: ; preds = %for.body.preheader1, %for.body %i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ] %r.07 = phi i32 [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ] - %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08 - %6 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08 + %6 = load i32, ptr %arrayidx, align 4 %c = icmp ult i32 %r.07, %6 %add = select i1 %c, i32 %r.07, i32 %6 %inc = add nuw nsw i32 %i.08, 1 @@ -1153,7 +1153,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b ret i32 %r.0.lcssa } -define i32 @umin_i32_inloop(i32* nocapture readonly %x, i32 %n) { +define i32 @umin_i32_inloop(ptr nocapture readonly %x, i32 %n) { ; CHECK-LABEL: umin_i32_inloop: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -1213,9 +1213,9 @@ vector.ph: ; preds = %for.body.preheader vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi i32 [ -1, %vector.ph ], [ %3, %vector.body ] - %0 = getelementptr inbounds i32, i32* %x, i32 %index - %1 = bitcast i32* %0 to <4 x i32>* - %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %0 = getelementptr inbounds i32, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.load = load <4 x i32>, ptr %1, align 4 %l5 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %wide.load) %2 = icmp ult i32 %vec.phi, %l5 %3 = select i1 %2, i32 %vec.phi, i32 %l5 @@ -1236,8 +1236,8 @@ for.body.preheader1: ; preds = %middle.block, %for. for.body: ; preds = %for.body.preheader1, %for.body %i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ] %r.07 = phi i32 [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ] - %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08 - %6 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08 + %6 = load i32, ptr %arrayidx, align 4 %c = icmp ugt i32 %r.07, %6 %add = select i1 %c, i32 %r.07, i32 %6 %inc = add nuw nsw i32 %i.08, 1 @@ -1249,7 +1249,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b ret i32 %r.0.lcssa } -define i32 @umax_i32(i32* nocapture readonly %x, i32 %n) { +define i32 @umax_i32(ptr nocapture readonly %x, i32 %n) { ; CHECK-LABEL: umax_i32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -1311,9 +1311,9 @@ vector.ph: ; preds = %for.body.preheader vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %3, %vector.body ] - %0 = getelementptr inbounds i32, i32* %x, i32 %index - %1 = bitcast i32* %0 to <4 x i32>* - %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %0 = getelementptr inbounds i32, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.load = load <4 x i32>, ptr %1, align 4 %2 = icmp ugt <4 x i32> %vec.phi, %wide.load %3 = select <4 x i1> %2, <4 x i32> %vec.phi, <4 x i32> %wide.load %index.next = add i32 %index, 4 @@ -1333,8 +1333,8 @@ for.body.preheader1: ; preds = %middle.block, %for. for.body: ; preds = %for.body.preheader1, %for.body %i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ] %r.07 = phi i32 [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ] - %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08 - %6 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08 + %6 = load i32, ptr %arrayidx, align 4 %c = icmp ugt i32 %r.07, %6 %add = select i1 %c, i32 %r.07, i32 %6 %inc = add nuw nsw i32 %i.08, 1 @@ -1346,7 +1346,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b ret i32 %r.0.lcssa } -define i32 @umax_i32_inloop(i32* nocapture readonly %x, i32 %n) { +define i32 @umax_i32_inloop(ptr nocapture readonly %x, i32 %n) { ; CHECK-LABEL: umax_i32_inloop: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -1406,9 +1406,9 @@ vector.ph: ; preds = %for.body.preheader vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi i32 [ 0, %vector.ph ], [ %3, %vector.body ] - %0 = getelementptr inbounds i32, i32* %x, i32 %index - %1 = bitcast i32* %0 to <4 x i32>* - %wide.load = load <4 x i32>, <4 x i32>* %1, align 4 + %0 = getelementptr inbounds i32, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.load = load <4 x i32>, ptr %1, align 4 %l5 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %wide.load) %2 = icmp ugt i32 %vec.phi, %l5 %3 = select i1 %2, i32 %vec.phi, i32 %l5 @@ -1429,8 +1429,8 @@ for.body.preheader1: ; preds = %middle.block, %for. for.body: ; preds = %for.body.preheader1, %for.body %i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ] %r.07 = phi i32 [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ] - %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08 - %6 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08 + %6 = load i32, ptr %arrayidx, align 4 %c = icmp ugt i32 %r.07, %6 %add = select i1 %c, i32 %r.07, i32 %6 %inc = add nuw nsw i32 %i.08, 1 @@ -1442,7 +1442,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b ret i32 %r.0.lcssa } -define float @fmin_f32(float* nocapture readonly %x, i32 %n) { +define float @fmin_f32(ptr nocapture readonly %x, i32 %n) { ; CHECK-LABEL: fmin_f32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -1511,9 +1511,9 @@ vector.ph: ; preds = %for.body.preheader vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x float> [ zeroinitializer, %vector.ph ], [ %3, %vector.body ] - %0 = getelementptr inbounds float, float* %x, i32 %index - %1 = bitcast float* %0 to <4 x float>* - %wide.load = load <4 x float>, <4 x float>* %1, align 4 + %0 = getelementptr inbounds float, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.load = load <4 x float>, ptr %1, align 4 %2 = fcmp ult <4 x float> %vec.phi, %wide.load %3 = select <4 x i1> %2, <4 x float> %vec.phi, <4 x float> %wide.load %index.next = add i32 %index, 4 @@ -1533,8 +1533,8 @@ for.body.preheader1: ; preds = %middle.block, %for. for.body: ; preds = %for.body.preheader1, %for.body %i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ] %r.07 = phi float [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ] - %arrayidx = getelementptr inbounds float, float* %x, i32 %i.08 - %6 = load float, float* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.08 + %6 = load float, ptr %arrayidx, align 4 %c = fcmp ult float %r.07, %6 %add = select i1 %c, float %r.07, float %6 %inc = add nuw nsw i32 %i.08, 1 @@ -1546,7 +1546,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b ret float %r.0.lcssa } -define float @fmax_f32(float* nocapture readonly %x, i32 %n) { +define float @fmax_f32(ptr nocapture readonly %x, i32 %n) { ; CHECK-LABEL: fmax_f32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -1615,9 +1615,9 @@ vector.ph: ; preds = %for.body.preheader vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi <4 x float> [ zeroinitializer, %vector.ph ], [ %3, %vector.body ] - %0 = getelementptr inbounds float, float* %x, i32 %index - %1 = bitcast float* %0 to <4 x float>* - %wide.load = load <4 x float>, <4 x float>* %1, align 4 + %0 = getelementptr inbounds float, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.load = load <4 x float>, ptr %1, align 4 %2 = fcmp ugt <4 x float> %vec.phi, %wide.load %3 = select <4 x i1> %2, <4 x float> %vec.phi, <4 x float> %wide.load %index.next = add i32 %index, 4 @@ -1637,8 +1637,8 @@ for.body.preheader1: ; preds = %middle.block, %for. for.body: ; preds = %for.body.preheader1, %for.body %i.08 = phi i32 [ %inc, %for.body ], [ %i.08.ph, %for.body.preheader1 ] %r.07 = phi float [ %add, %for.body ], [ %r.07.ph, %for.body.preheader1 ] - %arrayidx = getelementptr inbounds float, float* %x, i32 %i.08 - %6 = load float, float* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.08 + %6 = load float, ptr %arrayidx, align 4 %c = fcmp ugt float %r.07, %6 %add = select i1 %c, float %r.07, float %6 %inc = add nuw nsw i32 %i.08, 1 @@ -1650,7 +1650,7 @@ for.cond.cleanup: ; preds = %for.body, %middle.b ret float %r.0.lcssa } -define i32 @add4i32(i32* noalias nocapture readonly %x, i32 %n) { +define i32 @add4i32(ptr noalias nocapture readonly %x, i32 %n) { ; CHECK-LABEL: add4i32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -1684,9 +1684,9 @@ vector.body: ; preds = %vector.body, %vecto %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi i32 [ 0, %vector.ph ], [ %4, %vector.body ] %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %0 = getelementptr inbounds i32, i32* %x, i32 %index - %1 = bitcast i32* %0 to <4 x i32>* - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %0 = getelementptr inbounds i32, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %2 = select <4 x i1> %active.lane.mask, <4 x i32> %wide.masked.load, <4 x i32> zeroinitializer %3 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %2) %4 = add i32 %3, %vec.phi @@ -1699,7 +1699,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret i32 %s.0.lcssa } -define i32 @mla4i32(i32* noalias nocapture readonly %x, i32* noalias nocapture readonly %y, i32 %n) { +define i32 @mla4i32(ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y, i32 %n) { ; CHECK-LABEL: mla4i32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -1734,12 +1734,12 @@ vector.body: ; preds = %vector.body, %vecto %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi i32 [ 0, %vector.ph ], [ %7, %vector.body ] %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %0 = getelementptr inbounds i32, i32* %x, i32 %index - %1 = bitcast i32* %0 to <4 x i32>* - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) - %2 = getelementptr inbounds i32, i32* %y, i32 %index - %3 = bitcast i32* %2 to <4 x i32>* - %wide.masked.load13 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %3, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %0 = getelementptr inbounds i32, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %2 = getelementptr inbounds i32, ptr %y, i32 %index + %3 = bitcast ptr %2 to ptr + %wide.masked.load13 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %3, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %4 = mul nsw <4 x i32> %wide.masked.load13, %wide.masked.load %5 = select <4 x i1> %active.lane.mask, <4 x i32> %4, <4 x i32> zeroinitializer %6 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %5) @@ -1753,7 +1753,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret i32 %s.0.lcssa } -define i32 @add8i32(i16* noalias nocapture readonly %x, i32 %n) { +define i32 @add8i32(ptr noalias nocapture readonly %x, i32 %n) { ; CHECK-LABEL: add8i32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -1787,9 +1787,9 @@ vector.body: ; preds = %vector.body, %vecto %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi i32 [ 0, %vector.ph ], [ %5, %vector.body ] %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %n) - %0 = getelementptr inbounds i16, i16* %x, i32 %index - %1 = bitcast i16* %0 to <8 x i16>* - %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %1, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef) + %0 = getelementptr inbounds i16, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %1, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef) %2 = sext <8 x i16> %wide.masked.load to <8 x i32> %3 = select <8 x i1> %active.lane.mask, <8 x i32> %2, <8 x i32> zeroinitializer %4 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %3) @@ -1803,7 +1803,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret i32 %s.0.lcssa } -define i32 @mla8i32(i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y, i32 %n) { +define i32 @mla8i32(ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y, i32 %n) { ; CHECK-LABEL: mla8i32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -1838,13 +1838,13 @@ vector.body: ; preds = %vector.body, %vecto %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi i32 [ 0, %vector.ph ], [ %9, %vector.body ] %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %n) - %0 = getelementptr inbounds i16, i16* %x, i32 %index - %1 = bitcast i16* %0 to <8 x i16>* - %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %1, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef) + %0 = getelementptr inbounds i16, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %1, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef) %2 = sext <8 x i16> %wide.masked.load to <8 x i32> - %3 = getelementptr inbounds i16, i16* %y, i32 %index - %4 = bitcast i16* %3 to <8 x i16>* - %wide.masked.load14 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %4, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef) + %3 = getelementptr inbounds i16, ptr %y, i32 %index + %4 = bitcast ptr %3 to ptr + %wide.masked.load14 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %4, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef) %5 = sext <8 x i16> %wide.masked.load14 to <8 x i32> %6 = mul nsw <8 x i32> %5, %2 %7 = select <8 x i1> %active.lane.mask, <8 x i32> %6, <8 x i32> zeroinitializer @@ -1859,7 +1859,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret i32 %s.0.lcssa } -define i32 @add16i32(i8* noalias nocapture readonly %x, i32 %n) { +define i32 @add16i32(ptr noalias nocapture readonly %x, i32 %n) { ; CHECK-LABEL: add16i32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -1893,9 +1893,9 @@ vector.body: ; preds = %vector.body, %vecto %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi i32 [ 0, %vector.ph ], [ %5, %vector.body ] %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %n) - %0 = getelementptr inbounds i8, i8* %x, i32 %index - %1 = bitcast i8* %0 to <16 x i8>* - %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) + %0 = getelementptr inbounds i8, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) %2 = zext <16 x i8> %wide.masked.load to <16 x i32> %3 = select <16 x i1> %active.lane.mask, <16 x i32> %2, <16 x i32> zeroinitializer %4 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %3) @@ -1909,7 +1909,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret i32 %s.0.lcssa } -define i32 @mla16i32(i8* noalias nocapture readonly %x, i8* noalias nocapture readonly %y, i32 %n) { +define i32 @mla16i32(ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y, i32 %n) { ; CHECK-LABEL: mla16i32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -1944,13 +1944,13 @@ vector.body: ; preds = %vector.body, %vecto %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi i32 [ 0, %vector.ph ], [ %9, %vector.body ] %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %n) - %0 = getelementptr inbounds i8, i8* %x, i32 %index - %1 = bitcast i8* %0 to <16 x i8>* - %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) + %0 = getelementptr inbounds i8, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) %2 = zext <16 x i8> %wide.masked.load to <16 x i32> - %3 = getelementptr inbounds i8, i8* %y, i32 %index - %4 = bitcast i8* %3 to <16 x i8>* - %wide.masked.load14 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %4, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) + %3 = getelementptr inbounds i8, ptr %y, i32 %index + %4 = bitcast ptr %3 to ptr + %wide.masked.load14 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %4, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) %5 = zext <16 x i8> %wide.masked.load14 to <16 x i32> %6 = mul nuw nsw <16 x i32> %5, %2 %7 = select <16 x i1> %active.lane.mask, <16 x i32> %6, <16 x i32> zeroinitializer @@ -1965,7 +1965,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret i32 %s.0.lcssa } -define signext i16 @add8i16(i16* noalias nocapture readonly %x, i32 %n) { +define signext i16 @add8i16(ptr noalias nocapture readonly %x, i32 %n) { ; CHECK-LABEL: add8i16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -1999,9 +1999,9 @@ vector.body: ; preds = %vector.body, %vecto %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi i16 [ 0, %vector.ph ], [ %4, %vector.body ] %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %n) - %0 = getelementptr inbounds i16, i16* %x, i32 %index - %1 = bitcast i16* %0 to <8 x i16>* - %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %1, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef) + %0 = getelementptr inbounds i16, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %1, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef) %2 = select <8 x i1> %active.lane.mask, <8 x i16> %wide.masked.load, <8 x i16> zeroinitializer %3 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %2) %4 = add i16 %3, %vec.phi @@ -2014,7 +2014,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret i16 %s.0.lcssa } -define signext i16 @mla8i16(i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y, i32 %n) { +define signext i16 @mla8i16(ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y, i32 %n) { ; CHECK-LABEL: mla8i16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -2049,12 +2049,12 @@ vector.body: ; preds = %vector.body, %vecto %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi i16 [ 0, %vector.ph ], [ %7, %vector.body ] %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %n) - %0 = getelementptr inbounds i16, i16* %x, i32 %index - %1 = bitcast i16* %0 to <8 x i16>* - %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %1, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef) - %2 = getelementptr inbounds i16, i16* %y, i32 %index - %3 = bitcast i16* %2 to <8 x i16>* - %wide.masked.load16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %3, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef) + %0 = getelementptr inbounds i16, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %1, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef) + %2 = getelementptr inbounds i16, ptr %y, i32 %index + %3 = bitcast ptr %2 to ptr + %wide.masked.load16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %3, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef) %4 = mul <8 x i16> %wide.masked.load16, %wide.masked.load %5 = select <8 x i1> %active.lane.mask, <8 x i16> %4, <8 x i16> zeroinitializer %6 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %5) @@ -2068,7 +2068,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret i16 %s.0.lcssa } -define signext i16 @add16i16(i8* noalias nocapture readonly %x, i32 %n) { +define signext i16 @add16i16(ptr noalias nocapture readonly %x, i32 %n) { ; CHECK-LABEL: add16i16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -2102,9 +2102,9 @@ vector.body: ; preds = %vector.body, %vecto %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi i16 [ 0, %vector.ph ], [ %5, %vector.body ] %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %n) - %0 = getelementptr inbounds i8, i8* %x, i32 %index - %1 = bitcast i8* %0 to <16 x i8>* - %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) + %0 = getelementptr inbounds i8, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) %2 = zext <16 x i8> %wide.masked.load to <16 x i16> %3 = select <16 x i1> %active.lane.mask, <16 x i16> %2, <16 x i16> zeroinitializer %4 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %3) @@ -2118,7 +2118,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret i16 %s.0.lcssa } -define signext i16 @mla16i16(i8* noalias nocapture readonly %x, i8* noalias nocapture readonly %y, i32 %n) { +define signext i16 @mla16i16(ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y, i32 %n) { ; CHECK-LABEL: mla16i16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -2153,13 +2153,13 @@ vector.body: ; preds = %vector.body, %vecto %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi i16 [ 0, %vector.ph ], [ %9, %vector.body ] %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %n) - %0 = getelementptr inbounds i8, i8* %x, i32 %index - %1 = bitcast i8* %0 to <16 x i8>* - %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) + %0 = getelementptr inbounds i8, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) %2 = zext <16 x i8> %wide.masked.load to <16 x i16> - %3 = getelementptr inbounds i8, i8* %y, i32 %index - %4 = bitcast i8* %3 to <16 x i8>* - %wide.masked.load18 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %4, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) + %3 = getelementptr inbounds i8, ptr %y, i32 %index + %4 = bitcast ptr %3 to ptr + %wide.masked.load18 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %4, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) %5 = zext <16 x i8> %wide.masked.load18 to <16 x i16> %6 = mul nuw <16 x i16> %5, %2 %7 = select <16 x i1> %active.lane.mask, <16 x i16> %6, <16 x i16> zeroinitializer @@ -2174,7 +2174,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret i16 %s.0.lcssa } -define zeroext i8 @add16i8(i8* noalias nocapture readonly %x, i32 %n) { +define zeroext i8 @add16i8(ptr noalias nocapture readonly %x, i32 %n) { ; CHECK-LABEL: add16i8: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -2208,9 +2208,9 @@ vector.body: ; preds = %vector.body, %vecto %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi i8 [ 0, %vector.ph ], [ %4, %vector.body ] %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %n) - %0 = getelementptr inbounds i8, i8* %x, i32 %index - %1 = bitcast i8* %0 to <16 x i8>* - %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) + %0 = getelementptr inbounds i8, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) %2 = select <16 x i1> %active.lane.mask, <16 x i8> %wide.masked.load, <16 x i8> zeroinitializer %3 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %2) %4 = add i8 %3, %vec.phi @@ -2223,7 +2223,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret i8 %s.0.lcssa } -define zeroext i8 @mla16i8(i8* noalias nocapture readonly %x, i8* noalias nocapture readonly %y, i32 %n) { +define zeroext i8 @mla16i8(ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y, i32 %n) { ; CHECK-LABEL: mla16i8: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -2258,12 +2258,12 @@ vector.body: ; preds = %vector.body, %vecto %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi i8 [ 0, %vector.ph ], [ %7, %vector.body ] %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %n) - %0 = getelementptr inbounds i8, i8* %x, i32 %index - %1 = bitcast i8* %0 to <16 x i8>* - %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) - %2 = getelementptr inbounds i8, i8* %y, i32 %index - %3 = bitcast i8* %2 to <16 x i8>* - %wide.masked.load15 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %3, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) + %0 = getelementptr inbounds i8, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.masked.load = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %1, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) + %2 = getelementptr inbounds i8, ptr %y, i32 %index + %3 = bitcast ptr %2 to ptr + %wide.masked.load15 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %3, i32 1, <16 x i1> %active.lane.mask, <16 x i8> undef) %4 = mul <16 x i8> %wide.masked.load15, %wide.masked.load %5 = select <16 x i1> %active.lane.mask, <16 x i8> %4, <16 x i8> zeroinitializer %6 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %5) @@ -2277,7 +2277,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret i8 %s.0.lcssa } -define i64 @add4i64(i32* noalias nocapture readonly %x, i32 %n) { +define i64 @add4i64(ptr noalias nocapture readonly %x, i32 %n) { ; CHECK-LABEL: add4i64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -2313,9 +2313,9 @@ vector.body: ; preds = %vector.body, %vecto %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi i64 [ 0, %vector.ph ], [ %5, %vector.body ] %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %0 = getelementptr inbounds i32, i32* %x, i32 %index - %1 = bitcast i32* %0 to <4 x i32>* - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %0 = getelementptr inbounds i32, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %2 = sext <4 x i32> %wide.masked.load to <4 x i64> %3 = select <4 x i1> %active.lane.mask, <4 x i64> %2, <4 x i64> zeroinitializer %4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %3) @@ -2329,7 +2329,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret i64 %s.0.lcssa } -define i64 @mla4i64(i32* noalias nocapture readonly %x, i32* noalias nocapture readonly %y, i32 %n) { +define i64 @mla4i64(ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y, i32 %n) { ; CHECK-LABEL: mla4i64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -2366,13 +2366,13 @@ vector.body: ; preds = %vector.body, %vecto %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi i64 [ 0, %vector.ph ], [ %9, %vector.body ] %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %0 = getelementptr inbounds i32, i32* %x, i32 %index - %1 = bitcast i32* %0 to <4 x i32>* - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %0 = getelementptr inbounds i32, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %2 = sext <4 x i32> %wide.masked.load to <4 x i64> - %3 = getelementptr inbounds i32, i32* %y, i32 %index - %4 = bitcast i32* %3 to <4 x i32>* - %wide.masked.load14 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %4, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %3 = getelementptr inbounds i32, ptr %y, i32 %index + %4 = bitcast ptr %3 to ptr + %wide.masked.load14 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %4, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) %5 = sext <4 x i32> %wide.masked.load14 to <4 x i64> %6 = mul nsw <4 x i64> %5, %2 %7 = select <4 x i1> %active.lane.mask, <4 x i64> %6, <4 x i64> zeroinitializer @@ -2387,7 +2387,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret i64 %s.0.lcssa } -define i64 @mla8i64(i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y, i32 %n) { +define i64 @mla8i64(ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y, i32 %n) { ; CHECK-LABEL: mla8i64: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -2424,13 +2424,13 @@ vector.body: ; preds = %vector.body, %vecto %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %vec.phi = phi i64 [ 0, %vector.ph ], [ %9, %vector.body ] %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %n) - %0 = getelementptr inbounds i16, i16* %x, i32 %index - %1 = bitcast i16* %0 to <8 x i16>* - %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %1, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef) + %0 = getelementptr inbounds i16, ptr %x, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %1, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef) %2 = sext <8 x i16> %wide.masked.load to <8 x i64> - %3 = getelementptr inbounds i16, i16* %y, i32 %index - %4 = bitcast i16* %3 to <8 x i16>* - %wide.masked.load14 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %4, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef) + %3 = getelementptr inbounds i16, ptr %y, i32 %index + %4 = bitcast ptr %3 to ptr + %wide.masked.load14 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %4, i32 2, <8 x i1> %active.lane.mask, <8 x i16> undef) %5 = sext <8 x i16> %wide.masked.load14 to <8 x i64> %6 = mul nsw <8 x i64> %5, %2 %7 = select <8 x i1> %active.lane.mask, <8 x i64> %6, <8 x i64> zeroinitializer @@ -2446,12 +2446,12 @@ for.cond.cleanup: ; preds = %vector.body, %entry } declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) #1 -declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #2 +declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) #2 declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32) #1 -declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>) #2 +declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32 immarg, <8 x i1>, <8 x i16>) #2 declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) #3 declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32) #1 -declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) #2 +declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32 immarg, <16 x i1>, <16 x i8>) #2 declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) #3 declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) #3 declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) #3 diff --git a/llvm/test/CodeGen/Thumb2/mve-vmaxnma-commute.ll b/llvm/test/CodeGen/Thumb2/mve-vmaxnma-commute.ll index e6cb002..04be18e 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vmaxnma-commute.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmaxnma-commute.ll @@ -191,7 +191,7 @@ define arm_aapcs_vfpcc <8 x half> @minpredf16_c(<8 x half> %a, <8 x half> %b) { ; Loops -define void @loop_absmax32(float* nocapture readonly %0, i32 %1, float* nocapture %2) { +define void @loop_absmax32(ptr nocapture readonly %0, i32 %1, ptr nocapture %2) { ; CHECK-LABEL: loop_absmax32: ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r7, lr} @@ -224,10 +224,10 @@ define void @loop_absmax32(float* nocapture readonly %0, i32 %1, float* nocaptur 6: ; preds = %3, %6 %7 = phi i32 [ %16, %6 ], [ %4, %3 ] %8 = phi <4 x float> [ %15, %6 ], [ zeroinitializer, %3 ] - %9 = phi float* [ %12, %6 ], [ %0, %3 ] - %10 = bitcast float* %9 to <4 x float>* - %11 = load <4 x float>, <4 x float>* %10, align 4 - %12 = getelementptr inbounds float, float* %9, i32 4 + %9 = phi ptr [ %12, %6 ], [ %0, %3 ] + %10 = bitcast ptr %9 to ptr + %11 = load <4 x float>, ptr %10, align 4 + %12 = getelementptr inbounds float, ptr %9, i32 4 %13 = tail call fast <4 x float> @llvm.fabs.v4f32(<4 x float> %11) %14 = tail call fast <4 x float> @llvm.fabs.v4f32(<4 x float> %8) %15 = tail call fast <4 x float> @llvm.maxnum.v4f32(<4 x float> %14, <4 x float> %13) @@ -238,11 +238,11 @@ define void @loop_absmax32(float* nocapture readonly %0, i32 %1, float* nocaptur 18: ; preds = %6, %3 %19 = phi <4 x float> [ zeroinitializer, %3 ], [ %15, %6 ] %20 = tail call fast float @llvm.arm.mve.maxnmav.f32.v4f32(float 0.000000e+00, <4 x float> %19) - store float %20, float* %2, align 4 + store float %20, ptr %2, align 4 ret void } -define void @loop_absmax32_c(float* nocapture readonly %0, i32 %1, float* nocapture %2) { +define void @loop_absmax32_c(ptr nocapture readonly %0, i32 %1, ptr nocapture %2) { ; CHECK-LABEL: loop_absmax32_c: ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r7, lr} @@ -275,10 +275,10 @@ define void @loop_absmax32_c(float* nocapture readonly %0, i32 %1, float* nocapt 6: ; preds = %3, %6 %7 = phi i32 [ %16, %6 ], [ %4, %3 ] %8 = phi <4 x float> [ %15, %6 ], [ zeroinitializer, %3 ] - %9 = phi float* [ %12, %6 ], [ %0, %3 ] - %10 = bitcast float* %9 to <4 x float>* - %11 = load <4 x float>, <4 x float>* %10, align 4 - %12 = getelementptr inbounds float, float* %9, i32 4 + %9 = phi ptr [ %12, %6 ], [ %0, %3 ] + %10 = bitcast ptr %9 to ptr + %11 = load <4 x float>, ptr %10, align 4 + %12 = getelementptr inbounds float, ptr %9, i32 4 %13 = tail call fast <4 x float> @llvm.fabs.v4f32(<4 x float> %11) %14 = tail call fast <4 x float> @llvm.fabs.v4f32(<4 x float> %8) %15 = tail call fast <4 x float> @llvm.maxnum.v4f32(<4 x float> %13, <4 x float> %14) @@ -289,11 +289,11 @@ define void @loop_absmax32_c(float* nocapture readonly %0, i32 %1, float* nocapt 18: ; preds = %6, %3 %19 = phi <4 x float> [ zeroinitializer, %3 ], [ %15, %6 ] %20 = tail call fast float @llvm.arm.mve.maxnmav.f32.v4f32(float 0.000000e+00, <4 x float> %19) - store float %20, float* %2, align 4 + store float %20, ptr %2, align 4 ret void } -define void @loop_absmax32_pred(float* %0, i32 %1, float* nocapture %2) { +define void @loop_absmax32_pred(ptr %0, i32 %1, ptr nocapture %2) { ; CHECK-LABEL: loop_absmax32_pred: ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r7, lr} @@ -320,11 +320,11 @@ define void @loop_absmax32_pred(float* %0, i32 %1, float* nocapture %2) { 4: ; preds = %4, %3 %5 = phi <4 x float> [ zeroinitializer, %3 ], [ %12, %4 ] %6 = phi i32 [ %1, %3 ], [ %13, %4 ] - %7 = phi float* [ %0, %3 ], [ %11, %4 ] + %7 = phi ptr [ %0, %3 ], [ %11, %4 ] %8 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %6) - %9 = bitcast float* %7 to <4 x float>* - %10 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %9, i32 4, <4 x i1> %8, <4 x float> zeroinitializer) - %11 = getelementptr inbounds float, float* %7, i32 4 + %9 = bitcast ptr %7 to ptr + %10 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %9, i32 4, <4 x i1> %8, <4 x float> zeroinitializer) + %11 = getelementptr inbounds float, ptr %7, i32 4 %12 = tail call fast <4 x float> @llvm.arm.mve.vmaxnma.predicated.v4f32.v4i1(<4 x float> %5, <4 x float> %10, <4 x i1> %8) %13 = add nsw i32 %6, -4 %14 = icmp sgt i32 %6, 4 @@ -332,11 +332,11 @@ define void @loop_absmax32_pred(float* %0, i32 %1, float* nocapture %2) { 15: ; preds = %4 %16 = tail call fast float @llvm.arm.mve.maxnmav.f32.v4f32(float 0.000000e+00, <4 x float> %12) - store float %16, float* %2, align 4 + store float %16, ptr %2, align 4 ret void } -define void @loop_absmax32_pred_c(float* %0, i32 %1, float* nocapture %2) { +define void @loop_absmax32_pred_c(ptr %0, i32 %1, ptr nocapture %2) { ; CHECK-LABEL: loop_absmax32_pred_c: ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r7, lr} @@ -364,11 +364,11 @@ define void @loop_absmax32_pred_c(float* %0, i32 %1, float* nocapture %2) { 4: ; preds = %4, %3 %5 = phi <4 x float> [ zeroinitializer, %3 ], [ %12, %4 ] %6 = phi i32 [ %1, %3 ], [ %13, %4 ] - %7 = phi float* [ %0, %3 ], [ %11, %4 ] + %7 = phi ptr [ %0, %3 ], [ %11, %4 ] %8 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %6) - %9 = bitcast float* %7 to <4 x float>* - %10 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %9, i32 4, <4 x i1> %8, <4 x float> zeroinitializer) - %11 = getelementptr inbounds float, float* %7, i32 4 + %9 = bitcast ptr %7 to ptr + %10 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0(ptr %9, i32 4, <4 x i1> %8, <4 x float> zeroinitializer) + %11 = getelementptr inbounds float, ptr %7, i32 4 %12 = tail call fast <4 x float> @llvm.arm.mve.vmaxnma.predicated.v4f32.v4i1(<4 x float> %10, <4 x float> %5, <4 x i1> %8) %13 = add nsw i32 %6, -4 %14 = icmp sgt i32 %6, 4 @@ -376,7 +376,7 @@ define void @loop_absmax32_pred_c(float* %0, i32 %1, float* nocapture %2) { 15: ; preds = %4 %16 = tail call fast float @llvm.arm.mve.maxnmav.f32.v4f32(float 0.000000e+00, <4 x float> %12) - store float %16, float* %2, align 4 + store float %16, ptr %2, align 4 ret void } @@ -385,7 +385,7 @@ define void @loop_absmax32_pred_c(float* %0, i32 %1, float* nocapture %2) { -define void @loop_absmax16(half* nocapture readonly %0, i32 %1, half* nocapture %2) { +define void @loop_absmax16(ptr nocapture readonly %0, i32 %1, ptr nocapture %2) { ; CHECK-LABEL: loop_absmax16: ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r7, lr} @@ -418,10 +418,10 @@ define void @loop_absmax16(half* nocapture readonly %0, i32 %1, half* nocapture 6: ; preds = %3, %6 %7 = phi i32 [ %16, %6 ], [ %4, %3 ] %8 = phi <8 x half> [ %15, %6 ], [ zeroinitializer, %3 ] - %9 = phi half* [ %12, %6 ], [ %0, %3 ] - %10 = bitcast half* %9 to <8 x half>* - %11 = load <8 x half>, <8 x half>* %10, align 4 - %12 = getelementptr inbounds half, half* %9, i32 4 + %9 = phi ptr [ %12, %6 ], [ %0, %3 ] + %10 = bitcast ptr %9 to ptr + %11 = load <8 x half>, ptr %10, align 4 + %12 = getelementptr inbounds half, ptr %9, i32 4 %13 = tail call fast <8 x half> @llvm.fabs.v8f16(<8 x half> %11) %14 = tail call fast <8 x half> @llvm.fabs.v8f16(<8 x half> %8) %15 = tail call fast <8 x half> @llvm.maxnum.v8f16(<8 x half> %14, <8 x half> %13) @@ -432,11 +432,11 @@ define void @loop_absmax16(half* nocapture readonly %0, i32 %1, half* nocapture 18: ; preds = %6, %3 %19 = phi <8 x half> [ zeroinitializer, %3 ], [ %15, %6 ] %20 = tail call fast half @llvm.arm.mve.maxnmav.f16.v8f16(half 0.000000e+00, <8 x half> %19) - store half %20, half* %2, align 4 + store half %20, ptr %2, align 4 ret void } -define void @loop_absmax16_c(half* nocapture readonly %0, i32 %1, half* nocapture %2) { +define void @loop_absmax16_c(ptr nocapture readonly %0, i32 %1, ptr nocapture %2) { ; CHECK-LABEL: loop_absmax16_c: ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r7, lr} @@ -469,10 +469,10 @@ define void @loop_absmax16_c(half* nocapture readonly %0, i32 %1, half* nocaptur 6: ; preds = %3, %6 %7 = phi i32 [ %16, %6 ], [ %4, %3 ] %8 = phi <8 x half> [ %15, %6 ], [ zeroinitializer, %3 ] - %9 = phi half* [ %12, %6 ], [ %0, %3 ] - %10 = bitcast half* %9 to <8 x half>* - %11 = load <8 x half>, <8 x half>* %10, align 4 - %12 = getelementptr inbounds half, half* %9, i32 4 + %9 = phi ptr [ %12, %6 ], [ %0, %3 ] + %10 = bitcast ptr %9 to ptr + %11 = load <8 x half>, ptr %10, align 4 + %12 = getelementptr inbounds half, ptr %9, i32 4 %13 = tail call fast <8 x half> @llvm.fabs.v8f16(<8 x half> %11) %14 = tail call fast <8 x half> @llvm.fabs.v8f16(<8 x half> %8) %15 = tail call fast <8 x half> @llvm.maxnum.v8f16(<8 x half> %13, <8 x half> %14) @@ -483,11 +483,11 @@ define void @loop_absmax16_c(half* nocapture readonly %0, i32 %1, half* nocaptur 18: ; preds = %6, %3 %19 = phi <8 x half> [ zeroinitializer, %3 ], [ %15, %6 ] %20 = tail call fast half @llvm.arm.mve.maxnmav.f16.v8f16(half 0.000000e+00, <8 x half> %19) - store half %20, half* %2, align 4 + store half %20, ptr %2, align 4 ret void } -define void @loop_absmax16_pred(half* %0, i32 %1, half* nocapture %2) { +define void @loop_absmax16_pred(ptr %0, i32 %1, ptr nocapture %2) { ; CHECK-LABEL: loop_absmax16_pred: ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r7, lr} @@ -514,11 +514,11 @@ define void @loop_absmax16_pred(half* %0, i32 %1, half* nocapture %2) { 4: ; preds = %4, %3 %5 = phi <8 x half> [ zeroinitializer, %3 ], [ %12, %4 ] %6 = phi i32 [ %1, %3 ], [ %13, %4 ] - %7 = phi half* [ %0, %3 ], [ %11, %4 ] + %7 = phi ptr [ %0, %3 ], [ %11, %4 ] %8 = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %6) - %9 = bitcast half* %7 to <8 x half>* - %10 = tail call fast <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %9, i32 4, <8 x i1> %8, <8 x half> zeroinitializer) - %11 = getelementptr inbounds half, half* %7, i32 4 + %9 = bitcast ptr %7 to ptr + %10 = tail call fast <8 x half> @llvm.masked.load.v8f16.p0(ptr %9, i32 4, <8 x i1> %8, <8 x half> zeroinitializer) + %11 = getelementptr inbounds half, ptr %7, i32 4 %12 = tail call fast <8 x half> @llvm.arm.mve.vmaxnma.predicated.v8f16.v8i1(<8 x half> %5, <8 x half> %10, <8 x i1> %8) %13 = add nsw i32 %6, -8 %14 = icmp sgt i32 %6, 8 @@ -526,11 +526,11 @@ define void @loop_absmax16_pred(half* %0, i32 %1, half* nocapture %2) { 15: ; preds = %4 %16 = tail call fast half @llvm.arm.mve.maxnmav.f16.v8f16(half 0.000000e+00, <8 x half> %12) - store half %16, half* %2, align 4 + store half %16, ptr %2, align 4 ret void } -define void @loop_absmax16_pred_c(half* %0, i32 %1, half* nocapture %2) { +define void @loop_absmax16_pred_c(ptr %0, i32 %1, ptr nocapture %2) { ; CHECK-LABEL: loop_absmax16_pred_c: ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r7, lr} @@ -558,11 +558,11 @@ define void @loop_absmax16_pred_c(half* %0, i32 %1, half* nocapture %2) { 4: ; preds = %4, %3 %5 = phi <8 x half> [ zeroinitializer, %3 ], [ %12, %4 ] %6 = phi i32 [ %1, %3 ], [ %13, %4 ] - %7 = phi half* [ %0, %3 ], [ %11, %4 ] + %7 = phi ptr [ %0, %3 ], [ %11, %4 ] %8 = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %6) - %9 = bitcast half* %7 to <8 x half>* - %10 = tail call fast <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %9, i32 4, <8 x i1> %8, <8 x half> zeroinitializer) - %11 = getelementptr inbounds half, half* %7, i32 4 + %9 = bitcast ptr %7 to ptr + %10 = tail call fast <8 x half> @llvm.masked.load.v8f16.p0(ptr %9, i32 4, <8 x i1> %8, <8 x half> zeroinitializer) + %11 = getelementptr inbounds half, ptr %7, i32 4 %12 = tail call fast <8 x half> @llvm.arm.mve.vmaxnma.predicated.v8f16.v8i1(<8 x half> %10, <8 x half> %5, <8 x i1> %8) %13 = add nsw i32 %6, -8 %14 = icmp sgt i32 %6, 8 @@ -570,7 +570,7 @@ define void @loop_absmax16_pred_c(half* %0, i32 %1, half* nocapture %2) { 15: ; preds = %4 %16 = tail call fast half @llvm.arm.mve.maxnmav.f16.v8f16(half 0.000000e+00, <8 x half> %12) - store half %16, half* %2, align 4 + store half %16, ptr %2, align 4 ret void } @@ -579,7 +579,7 @@ define void @loop_absmax16_pred_c(half* %0, i32 %1, half* nocapture %2) { declare <4 x i1> @llvm.arm.mve.vctp32(i32) -declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) +declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>) declare <4 x float> @llvm.arm.mve.vminnma.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>) declare <4 x float> @llvm.arm.mve.vmaxnma.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>) declare float @llvm.arm.mve.maxnmav.f32.v4f32(float, <4 x float>) @@ -588,7 +588,7 @@ declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) declare <8 x i1> @llvm.arm.mve.vctp16(i32) -declare <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>*, i32 immarg, <8 x i1>, <8 x half>) +declare <8 x half> @llvm.masked.load.v8f16.p0(ptr, i32 immarg, <8 x i1>, <8 x half>) declare <8 x half> @llvm.arm.mve.vminnma.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>) declare <8 x half> @llvm.arm.mve.vmaxnma.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>) declare half @llvm.arm.mve.maxnmav.f16.v8f16(half, <8 x half>) diff --git a/llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll b/llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll index ac4abdb..f51f6c0 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s -define void @vmovl_s32(i32* noalias nocapture %d, i32* nocapture readonly %s, i32 %n) { +define void @vmovl_s32(ptr noalias nocapture %d, ptr nocapture readonly %s, i32 %n) { ; CHECK-LABEL: vmovl_s32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -31,14 +31,14 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) - %0 = getelementptr inbounds i32, i32* %s, i32 %index - %1 = bitcast i32* %0 to <4 x i32>* - %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> poison) + %0 = getelementptr inbounds i32, ptr %s, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> poison) %2 = shl <4 x i32> %wide.masked.load, <i32 16, i32 16, i32 16, i32 16> %3 = ashr exact <4 x i32> %2, <i32 16, i32 16, i32 16, i32 16> - %4 = getelementptr inbounds i32, i32* %d, i32 %index - %5 = bitcast i32* %4 to <4 x i32>* - call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %5, i32 4, <4 x i1> %active.lane.mask) + %4 = getelementptr inbounds i32, ptr %d, i32 %index + %5 = bitcast ptr %4 to ptr + call void @llvm.masked.store.v4i32.p0(<4 x i32> %3, ptr %5, i32 4, <4 x i1> %active.lane.mask) %index.next = add i32 %index, 4 %6 = icmp eq i32 %index.next, %n.vec br i1 %6, label %for.cond.cleanup, label %vector.body @@ -48,7 +48,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry } -define void @vmovl_u16(i16* noalias nocapture %d, i16* nocapture readonly %s, i32 %n) { +define void @vmovl_u16(ptr noalias nocapture %d, ptr nocapture readonly %s, i32 %n) { ; CHECK-LABEL: vmovl_u16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -78,13 +78,13 @@ vector.ph: ; preds = %entry vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %n) - %0 = getelementptr inbounds i16, i16* %s, i32 %index - %1 = bitcast i16* %0 to <8 x i16>* - %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %1, i32 2, <8 x i1> %active.lane.mask, <8 x i16> poison) + %0 = getelementptr inbounds i16, ptr %s, i32 %index + %1 = bitcast ptr %0 to ptr + %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %1, i32 2, <8 x i1> %active.lane.mask, <8 x i16> poison) %2 = and <8 x i16> %wide.masked.load, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> - %3 = getelementptr inbounds i16, i16* %d, i32 %index - %4 = bitcast i16* %3 to <8 x i16>* - call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %2, <8 x i16>* %4, i32 2, <8 x i1> %active.lane.mask) + %3 = getelementptr inbounds i16, ptr %d, i32 %index + %4 = bitcast ptr %3 to ptr + call void @llvm.masked.store.v8i16.p0(<8 x i16> %2, ptr %4, i32 2, <8 x i1> %active.lane.mask) %index.next = add i32 %index, 8 %5 = icmp eq i32 %index.next, %n.vec br i1 %5, label %for.cond.cleanup, label %vector.body @@ -93,7 +93,7 @@ for.cond.cleanup: ; preds = %vector.body, %entry ret void } -define void @vmovl_16to32(i16* %d, i16* %s, i32 %n) { +define void @vmovl_16to32(ptr %d, ptr %s, i32 %n) { ; CHECK-LABEL: vmovl_16to32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -131,25 +131,25 @@ for.cond.cleanup: ; preds = %for.body, %entry ret void for.body: ; preds = %entry, %for.body - %d.addr.016 = phi i16* [ %add.ptr3, %for.body ], [ %d, %entry ] - %s.addr.015 = phi i16* [ %add.ptr, %for.body ], [ %s, %entry ] + %d.addr.016 = phi ptr [ %add.ptr3, %for.body ], [ %d, %entry ] + %s.addr.015 = phi ptr [ %add.ptr, %for.body ], [ %s, %entry ] %i.014 = phi i32 [ %sub, %for.body ], [ %n, %entry ] %0 = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %i.014) - %1 = bitcast i16* %s.addr.015 to <8 x i16>* - %2 = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %1, i32 2, <8 x i1> %0, <8 x i16> <i16 0, i16 poison, i16 0, i16 poison, i16 0, i16 poison, i16 0, i16 poison>) - %add.ptr = getelementptr inbounds i16, i16* %s.addr.015, i32 8 + %1 = bitcast ptr %s.addr.015 to ptr + %2 = tail call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %1, i32 2, <8 x i1> %0, <8 x i16> <i16 0, i16 poison, i16 0, i16 poison, i16 0, i16 poison, i16 0, i16 poison>) + %add.ptr = getelementptr inbounds i16, ptr %s.addr.015, i32 8 %3 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> %4 = sext <4 x i16> %3 to <4 x i32> %5 = bitcast <4 x i32> %4 to <8 x i16> - %6 = bitcast i16* %d.addr.016 to <8 x i16>* - tail call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %5, <8 x i16>* %6, i32 2, <8 x i1> %0) - %add.ptr3 = getelementptr inbounds i16, i16* %d.addr.016, i32 8 + %6 = bitcast ptr %d.addr.016 to ptr + tail call void @llvm.masked.store.v8i16.p0(<8 x i16> %5, ptr %6, i32 2, <8 x i1> %0) + %add.ptr3 = getelementptr inbounds i16, ptr %d.addr.016, i32 8 %sub = add nsw i32 %i.014, -8 %cmp = icmp sgt i32 %i.014, 8 br i1 %cmp, label %for.body, label %for.cond.cleanup } -define void @sunken_vmovl(i8* noalias %pTarget, i16 signext %iTargetStride, i8* noalias %pchAlpha, i16 signext %iAlphaStride, i16 %0, i8 zeroext %Colour) { +define void @sunken_vmovl(ptr noalias %pTarget, i16 signext %iTargetStride, ptr noalias %pchAlpha, i16 signext %iAlphaStride, i16 %0, i8 zeroext %Colour) { ; CHECK-LABEL: sunken_vmovl: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} @@ -177,34 +177,34 @@ define void @sunken_vmovl(i8* noalias %pTarget, i16 signext %iTargetStride, i8* entry: %conv3 = sext i16 %0 to i32 %1 = zext i8 %Colour to i32 - %2 = bitcast i8* %pTarget to <8 x i8>* - %3 = load <8 x i8>, <8 x i8>* %2, align 1 - %4 = bitcast i8* %pchAlpha to <8 x i8>* - %5 = load <8 x i8>, <8 x i8>* %4, align 1 + %2 = bitcast ptr %pTarget to ptr + %3 = load <8 x i8>, ptr %2, align 1 + %4 = bitcast ptr %pchAlpha to ptr + %5 = load <8 x i8>, ptr %4, align 1 br label %do.body do.body: ; preds = %do.body, %entry - %pchAlpha.addr.0.pn = phi i8* [ %pchAlpha, %entry ], [ %pAlpha.0, %do.body ] - %pTarget8.0 = phi i8* [ %pTarget, %entry ], [ %add.ptr5, %do.body ] + %pchAlpha.addr.0.pn = phi ptr [ %pchAlpha, %entry ], [ %pAlpha.0, %do.body ] + %pTarget8.0 = phi ptr [ %pTarget, %entry ], [ %add.ptr5, %do.body ] %blkCnt.0 = phi i32 [ %conv3, %entry ], [ %sub, %do.body ] %vecTarget.0.in = phi <8 x i8> [ %3, %entry ], [ %10, %do.body ] %vecTransp.0.in = phi <8 x i8> [ %5, %entry ], [ %13, %do.body ] %vecTransp.0 = zext <8 x i8> %vecTransp.0.in to <8 x i16> %vecTarget.0 = zext <8 x i8> %vecTarget.0.in to <8 x i16> - %pAlpha.0 = getelementptr inbounds i8, i8* %pchAlpha.addr.0.pn, i32 8 + %pAlpha.0 = getelementptr inbounds i8, ptr %pchAlpha.addr.0.pn, i32 8 %6 = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %blkCnt.0) %7 = tail call <8 x i16> @llvm.arm.mve.sub.predicated.v8i16.v8i1(<8 x i16> <i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256>, <8 x i16> %vecTransp.0, <8 x i1> %6, <8 x i16> undef) %8 = tail call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> %vecTarget.0, <8 x i16> %7, <8 x i1> %6, <8 x i16> undef) - %add.ptr5 = getelementptr inbounds i8, i8* %pTarget8.0, i32 8 - %9 = bitcast i8* %add.ptr5 to <8 x i8>* - %10 = tail call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* nonnull %9, i32 1, <8 x i1> %6, <8 x i8> zeroinitializer) + %add.ptr5 = getelementptr inbounds i8, ptr %pTarget8.0, i32 8 + %9 = bitcast ptr %add.ptr5 to ptr + %10 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr nonnull %9, i32 1, <8 x i1> %6, <8 x i8> zeroinitializer) %11 = tail call <8 x i16> @llvm.arm.mve.vmla.n.predicated.v8i16.v8i1(<8 x i16> %8, <8 x i16> %vecTransp.0, i32 %1, <8 x i1> %6) - %12 = bitcast i8* %pAlpha.0 to <8 x i8>* - %13 = tail call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* nonnull %12, i32 1, <8 x i1> %6, <8 x i8> zeroinitializer) + %12 = bitcast ptr %pAlpha.0 to ptr + %13 = tail call <8 x i8> @llvm.masked.load.v8i8.p0(ptr nonnull %12, i32 1, <8 x i1> %6, <8 x i8> zeroinitializer) %14 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %11, i32 8, i32 1, <8 x i1> %6, <8 x i16> %11) %15 = trunc <8 x i16> %14 to <8 x i8> - %16 = bitcast i8* %pTarget8.0 to <8 x i8>* - tail call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %15, <8 x i8>* %16, i32 1, <8 x i1> %6) + %16 = bitcast ptr %pTarget8.0 to ptr + tail call void @llvm.masked.store.v8i8.p0(<8 x i8> %15, ptr %16, i32 1, <8 x i1> %6) %sub = add nsw i32 %blkCnt.0, -8 %cmp9 = icmp sgt i32 %blkCnt.0, 8 br i1 %cmp9, label %do.body, label %do.end @@ -214,15 +214,15 @@ do.end: ; preds = %do.body } declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32) #1 -declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>) #2 -declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>) #3 +declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32 immarg, <8 x i1>, <8 x i16>) #2 +declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32 immarg, <8 x i1>) #3 declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) #1 -declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) #2 -declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) #3 +declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) #2 +declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) #3 declare <8 x i1> @llvm.arm.mve.vctp16(i32) declare <8 x i16> @llvm.arm.mve.sub.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) declare <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) -declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32 immarg, <8 x i1>, <8 x i8>) +declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32 immarg, <8 x i1>, <8 x i8>) declare <8 x i16> @llvm.arm.mve.vmla.n.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, <8 x i1>) declare <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16>, i32, i32, <8 x i1>, <8 x i16>) -declare void @llvm.masked.store.v8i8.p0v8i8(<8 x i8>, <8 x i8>*, i32 immarg, <8 x i1>) +declare void @llvm.masked.store.v8i8.p0(<8 x i8>, ptr, i32 immarg, <8 x i1>) diff --git a/llvm/test/CodeGen/Thumb2/scavenge-lr.mir b/llvm/test/CodeGen/Thumb2/scavenge-lr.mir index 68b2cf8..5513bed 100644 --- a/llvm/test/CodeGen/Thumb2/scavenge-lr.mir +++ b/llvm/test/CodeGen/Thumb2/scavenge-lr.mir @@ -29,42 +29,42 @@ %S = type { [32 x i8] } - define void @f(%S* %arg) { + define void @f(ptr %arg) { entry: - %ppp..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -8 - %ppp..sroa_cast248 = bitcast %S* %ppp..sroa_idx to <8 x float>* - %ppp.copyload = load <8 x float>, <8 x float>* %ppp..sroa_cast248, align 32 + %ppp..sroa_idx = getelementptr inbounds %S, ptr %arg, i32 -8 + %ppp..sroa_cast248 = bitcast ptr %ppp..sroa_idx to ptr + %ppp.copyload = load <8 x float>, ptr %ppp..sroa_cast248, align 32 - %xxx..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -5 - %xxx..sroa_cast248 = bitcast %S* %xxx..sroa_idx to <8 x float>* - %xxx.copyload = load <8 x float>, <8 x float>* %xxx..sroa_cast248, align 32 + %xxx..sroa_idx = getelementptr inbounds %S, ptr %arg, i32 -5 + %xxx..sroa_cast248 = bitcast ptr %xxx..sroa_idx to ptr + %xxx.copyload = load <8 x float>, ptr %xxx..sroa_cast248, align 32 - %yyy..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -2 - %yyy..sroa_cast244 = bitcast %S* %yyy..sroa_idx to <8 x float>* - %yyy.copyload = load <8 x float>, <8 x float>* %yyy..sroa_cast244, align 32 + %yyy..sroa_idx = getelementptr inbounds %S, ptr %arg, i32 -2 + %yyy..sroa_cast244 = bitcast ptr %yyy..sroa_idx to ptr + %yyy.copyload = load <8 x float>, ptr %yyy..sroa_cast244, align 32 - %zzz..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -7 - %zzz..sroa_cast241 = bitcast %S* %zzz..sroa_idx to <8 x float>* - %zzz.copyload = load <8 x float>, <8 x float>* %zzz..sroa_cast241, align 32 + %zzz..sroa_idx = getelementptr inbounds %S, ptr %arg, i32 -7 + %zzz..sroa_cast241 = bitcast ptr %zzz..sroa_idx to ptr + %zzz.copyload = load <8 x float>, ptr %zzz..sroa_cast241, align 32 - %www..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -4 - %www..sroa_cast238 = bitcast %S* %www..sroa_idx to <8 x float>* - %www.copyload = load <8 x float>, <8 x float>* %www..sroa_cast238, align 32 + %www..sroa_idx = getelementptr inbounds %S, ptr %arg, i32 -4 + %www..sroa_cast238 = bitcast ptr %www..sroa_idx to ptr + %www.copyload = load <8 x float>, ptr %www..sroa_cast238, align 32 - %uuu..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 1 - %uuu..sroa_cast235 = bitcast %S* %uuu..sroa_idx to <8 x float>* - %uuu.copyload = load <8 x float>, <8 x float>* %uuu..sroa_cast235, align 32 + %uuu..sroa_idx = getelementptr inbounds %S, ptr %arg, i32 1 + %uuu..sroa_cast235 = bitcast ptr %uuu..sroa_idx to ptr + %uuu.copyload = load <8 x float>, ptr %uuu..sroa_cast235, align 32 - %vvv..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -6 - %vvv..sroa_cast230 = bitcast %S* %vvv..sroa_idx to <8 x float>* - %vvv.copyload = load <8 x float>, <8 x float>* %vvv..sroa_cast230, align 32 + %vvv..sroa_idx = getelementptr inbounds %S, ptr %arg, i32 -6 + %vvv..sroa_cast230 = bitcast ptr %vvv..sroa_idx to ptr + %vvv.copyload = load <8 x float>, ptr %vvv..sroa_cast230, align 32 - %ttt..sroa_idx = getelementptr inbounds %S, %S* %arg, i32 -3 - %ttt..sroa_cast226 = bitcast %S* %ttt..sroa_idx to <8 x float>* - %ttt.copyload = load <8 x float>, <8 x float>* %ttt..sroa_cast226, align 32 + %ttt..sroa_idx = getelementptr inbounds %S, ptr %arg, i32 -3 + %ttt..sroa_cast226 = bitcast ptr %ttt..sroa_idx to ptr + %ttt.copyload = load <8 x float>, ptr %ttt..sroa_cast226, align 32 - %sss..sroa_cast223 = bitcast %S* %arg to <8 x float>* - %sss.copyload = load <8 x float>, <8 x float>* %sss..sroa_cast223, align 32 + %sss..sroa_cast223 = bitcast ptr %arg to ptr + %sss.copyload = load <8 x float>, ptr %sss..sroa_cast223, align 32 %mul.i = fmul <8 x float> %ppp.copyload, %www.copyload %mul.i185 = fmul <8 x float> %xxx.copyload, %uuu.copyload @@ -75,31 +75,31 @@ %div.i = fdiv <8 x float> zeroinitializer, %add.i %mul.i153 = fmul <8 x float> %uuu.copyload, %div.i - store <8 x float> %mul.i153, <8 x float>* %ppp..sroa_cast248, align 32 + store <8 x float> %mul.i153, ptr %ppp..sroa_cast248, align 32 %mul.i147 = fmul <8 x float> %uuu.copyload, %vvv.copyload %mul.i141 = fmul <8 x float> %zzz.copyload, %sss.copyload %mul.i135 = fmul <8 x float> %mul.i141, %div.i %sub.i129 = fsub <8 x float> %mul.i147, %mul.i135 - store <8 x float> %sub.i129, <8 x float>* %zzz..sroa_cast241, align 32 - store <8 x float> %div.i, <8 x float>* %vvv..sroa_cast230, align 32 - store <8 x float> %div.i, <8 x float>* %xxx..sroa_cast248, align 32 + store <8 x float> %sub.i129, ptr %zzz..sroa_cast241, align 32 + store <8 x float> %div.i, ptr %vvv..sroa_cast230, align 32 + store <8 x float> %div.i, ptr %xxx..sroa_cast248, align 32 %mul.i123 = fmul <8 x float> %yyy.copyload, %vvv.copyload %mul.i117 = fmul <8 x float> %mul.i123, %div.i %sub.i111 = fsub <8 x float> %sss.copyload, %mul.i117 - store <8 x float> %sub.i111, <8 x float>* %www..sroa_cast238, align 32 + store <8 x float> %sub.i111, ptr %www..sroa_cast238, align 32 %mul.i105 = fmul <8 x float> %ppp.copyload, %ttt.copyload %mul.i99 = fmul <8 x float> %mul.i105, %div.i %sub.i93 = fsub <8 x float> %xxx.copyload, %mul.i99 - store <8 x float> %sub.i93, <8 x float>* %ttt..sroa_cast226, align 32 + store <8 x float> %sub.i93, ptr %ttt..sroa_cast226, align 32 %mul.i81 = fmul <8 x float> %yyy.copyload, %www.copyload %mul.i75 = fmul <8 x float> %mul.i81, %div.i %sub.i = fsub <8 x float> %mul.i185, %mul.i75 - store <8 x float> %sub.i, <8 x float>* %yyy..sroa_cast244, align 32 + store <8 x float> %sub.i, ptr %yyy..sroa_cast244, align 32 ret void } diff --git a/llvm/test/CodeGen/Thumb2/t2-teq-reduce.mir b/llvm/test/CodeGen/Thumb2/t2-teq-reduce.mir index e3b4ec2..15fe20c 100644 --- a/llvm/test/CodeGen/Thumb2/t2-teq-reduce.mir +++ b/llvm/test/CodeGen/Thumb2/t2-teq-reduce.mir @@ -5,82 +5,82 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8m.main" - %list_head = type { %list_head*, %list_data* } + %list_head = type { ptr, ptr } %list_data = type { i16, i16 } - define %list_head* @reg_reg_it_block(%list_head* %a, i16 zeroext %b) { + define ptr @reg_reg_it_block(ptr %a, i16 zeroext %b) { entry: br label %while.begin while.begin: ; preds = %while.body.end, %entry - %list.addr.i = phi %list_head* [ %ld.5, %while.body.end ], [ %a, %entry ] - %info.i = getelementptr inbounds %list_head, %list_head* %list.addr.i, i32 0, i32 1 - %ld.0 = load %list_data*, %list_data** %info.i, align 4 - %data16.i1 = bitcast %list_data* %ld.0 to i16* - %ld.1 = load i16, i16* %data16.i1, align 2 + %list.addr.i = phi ptr [ %ld.5, %while.body.end ], [ %a, %entry ] + %info.i = getelementptr inbounds %list_head, ptr %list.addr.i, i32 0, i32 1 + %ld.0 = load ptr, ptr %info.i, align 4 + %data16.i1 = bitcast ptr %ld.0 to ptr + %ld.1 = load i16, ptr %data16.i1, align 2 %xor.1 = xor i16 %ld.1, %b %cmp.i = icmp eq i16 %xor.1, 0 br i1 %cmp.i, label %exit, label %while.body.a while.body.a: ; preds = %while.begin - %next.i2 = bitcast %list_head* %list.addr.i to %list_head** - %ld.2 = load %list_head*, %list_head** %next.i2, align 4 - %cmp.i.1 = icmp eq %list_head* %ld.2, null + %next.i2 = bitcast ptr %list.addr.i to ptr + %ld.2 = load ptr, ptr %next.i2, align 4 + %cmp.i.1 = icmp eq ptr %ld.2, null br i1 %cmp.i.1, label %exit, label %it.block it.block: ; preds = %while.body.a - %info.i.1 = getelementptr inbounds %list_head, %list_head* %ld.2, i32 0, i32 1 - %ld.3 = load %list_data*, %list_data** %info.i.1, align 4 - %data16.i.13 = bitcast %list_data* %ld.3 to i16* - %ld.4 = load i16, i16* %data16.i.13, align 2 + %info.i.1 = getelementptr inbounds %list_head, ptr %ld.2, i32 0, i32 1 + %ld.3 = load ptr, ptr %info.i.1, align 4 + %data16.i.13 = bitcast ptr %ld.3 to ptr + %ld.4 = load i16, ptr %data16.i.13, align 2 %xor.2 = xor i16 %ld.4, %b %cmp.i.2 = icmp eq i16 %xor.2, 0 br i1 %cmp.i.2, label %exit, label %while.body.end while.body.end: ; preds = %it.block - %next.i.14 = bitcast %list_head* %ld.2 to %list_head** - %ld.5 = load %list_head*, %list_head** %next.i.14, align 4 - %cmp.i.3 = icmp eq %list_head* %ld.5, null + %next.i.14 = bitcast ptr %ld.2 to ptr + %ld.5 = load ptr, ptr %next.i.14, align 4 + %cmp.i.3 = icmp eq ptr %ld.5, null br i1 %cmp.i.3, label %exit, label %while.begin exit: ; preds = %while.body.end, %it.block, %while.body.a, %while.begin - %res = phi %list_head* [ %list.addr.i, %while.begin ], [ %ld.2, %while.body.a ], [ %ld.2, %it.block ], [ %ld.5, %while.body.end ] - ret %list_head* %res + %res = phi ptr [ %list.addr.i, %while.begin ], [ %ld.2, %while.body.a ], [ %ld.2, %it.block ], [ %ld.5, %while.body.end ] + ret ptr %res } - define i16 @op_not_killed(%list_head* %a, i16 zeroext %b) { + define i16 @op_not_killed(ptr %a, i16 zeroext %b) { entry: br label %while.begin while.begin: ; preds = %while.body.end, %entry - %list.addr.i = phi %list_head* [ %ld.5, %while.body.end ], [ %a, %entry ] - %info.i = getelementptr inbounds %list_head, %list_head* %list.addr.i, i32 0, i32 1 - %ld.0 = load %list_data*, %list_data** %info.i, align 4 - %data16.i1 = bitcast %list_data* %ld.0 to i16* - %ld.1 = load i16, i16* %data16.i1, align 2 + %list.addr.i = phi ptr [ %ld.5, %while.body.end ], [ %a, %entry ] + %info.i = getelementptr inbounds %list_head, ptr %list.addr.i, i32 0, i32 1 + %ld.0 = load ptr, ptr %info.i, align 4 + %data16.i1 = bitcast ptr %ld.0 to ptr + %ld.1 = load i16, ptr %data16.i1, align 2 %xor.1 = xor i16 %ld.1, %b %cmp.i = icmp eq i16 %xor.1, 0 br i1 %cmp.i, label %exit, label %while.body.a while.body.a: ; preds = %while.begin - %next.i2 = bitcast %list_head* %list.addr.i to %list_head** - %ld.2 = load %list_head*, %list_head** %next.i2, align 4 - %cmp.i.1 = icmp eq %list_head* %ld.2, null + %next.i2 = bitcast ptr %list.addr.i to ptr + %ld.2 = load ptr, ptr %next.i2, align 4 + %cmp.i.1 = icmp eq ptr %ld.2, null br i1 %cmp.i.1, label %exit, label %it.block it.block: ; preds = %while.body.a - %info.i.1 = getelementptr inbounds %list_head, %list_head* %ld.2, i32 0, i32 1 - %ld.3 = load %list_data*, %list_data** %info.i.1, align 4 - %data16.i.13 = bitcast %list_data* %ld.3 to i16* - %ld.4 = load i16, i16* %data16.i.13, align 2 + %info.i.1 = getelementptr inbounds %list_head, ptr %ld.2, i32 0, i32 1 + %ld.3 = load ptr, ptr %info.i.1, align 4 + %data16.i.13 = bitcast ptr %ld.3 to ptr + %ld.4 = load i16, ptr %data16.i.13, align 2 %xor.2 = xor i16 %ld.4, %b %cmp.i.2 = icmp eq i16 %xor.2, 0 br i1 %cmp.i.2, label %exit, label %while.body.end while.body.end: ; preds = %it.block - %next.i.14 = bitcast %list_head* %ld.2 to %list_head** - %ld.5 = load %list_head*, %list_head** %next.i.14, align 4 - %cmp.i.3 = icmp eq %list_head* %ld.5, null + %next.i.14 = bitcast ptr %ld.2 to ptr + %ld.5 = load ptr, ptr %next.i.14, align 4 + %cmp.i.3 = icmp eq ptr %ld.5, null br i1 %cmp.i.3, label %exit, label %while.begin exit: ; preds = %while.body.end, %it.block, %while.body.a, %while.begin |