diff options
author | WANG Rui <wangrui@loongson.cn> | 2025-09-26 09:16:46 +0800 |
---|---|---|
committer | WANG Rui <wangrui@loongson.cn> | 2025-09-26 09:16:46 +0800 |
commit | aec52219a8b7c60e8d2dff2440b5c4c44596b377 (patch) | |
tree | 36727c3cbc750d7e25f4c48d4c2ae2dbc7f9b601 | |
parent | 663414817b2669cee9d828c909822d3b4b9f5bbb (diff) | |
download | llvm-users/hev/test-vec-ext.zip llvm-users/hev/test-vec-ext.tar.gz llvm-users/hev/test-vec-ext.tar.bz2 |
[LoongArch][NFC] Pre-commit tests for vector sign and zero extensionsusers/hev/test-vec-ext
-rw-r--r-- | llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll | 1074 | ||||
-rw-r--r-- | llvm/test/CodeGen/LoongArch/lasx/vec-zext.ll | 1206 | ||||
-rw-r--r-- | llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll | 443 | ||||
-rw-r--r-- | llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll | 356 |
4 files changed, 2919 insertions, 160 deletions
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll new file mode 100644 index 0000000..953e6c4 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll @@ -0,0 +1,1074 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s --check-prefixes=CHECK,LA64 + +define void @load_sext_2i8_to_2i64(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_sext_2i8_to_2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld.h $a0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0 +; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 +; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 +; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0 +; CHECK-NEXT: vslli.d $vr0, $vr0, 56 +; CHECK-NEXT: vsrai.d $vr0, $vr0, 56 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret +entry: + %A = load <2 x i8>, ptr %ptr + %B = sext <2 x i8> %A to <2 x i64> + store <2 x i64> %B, ptr %dst + ret void +} + +define void @load_sext_2i16_to_2i64(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_sext_2i16_to_2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld.w $a0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 +; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 +; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0 +; CHECK-NEXT: vslli.d $vr0, $vr0, 48 +; CHECK-NEXT: vsrai.d $vr0, $vr0, 48 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret +entry: + %A = load <2 x i16>, ptr %ptr + %B = sext <2 x i16> %A to <2 x i64> + store <2 x i64> %B, ptr %dst + ret void +} + +define void @load_sext_2i32_to_2i64(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_sext_2i32_to_2i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.w $a2, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2 +; LA32-NEXT: vslli.d $vr0, $vr0, 32 +; LA32-NEXT: vsrai.d $vr0, $vr0, 32 +; LA32-NEXT: vst $vr0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_sext_2i32_to_2i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vshuf4i.w $vr0, $vr0, 16 +; LA64-NEXT: vslli.d $vr0, $vr0, 32 +; LA64-NEXT: vsrai.d $vr0, $vr0, 32 +; LA64-NEXT: vst $vr0, $a1, 0 +; LA64-NEXT: ret +entry: + %A = load <2 x i32>, ptr %ptr + %B = sext <2 x i32> %A to <2 x i64> + store <2 x i64> %B, ptr %dst + ret void +} + +define void @load_sext_4i8_to_4i32(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_sext_4i8_to_4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld.w $a0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 +; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 +; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 +; CHECK-NEXT: vslli.w $vr0, $vr0, 24 +; CHECK-NEXT: vsrai.w $vr0, $vr0, 24 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret +entry: + %A = load <4 x i8>, ptr %ptr + %B = sext <4 x i8> %A to <4 x i32> + store <4 x i32> %B, ptr %dst + ret void +} + +define void @load_sext_4i8_to_4i64(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_sext_4i8_to_4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld.w $a0, $a0, 0 +; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI4_0) +; CHECK-NEXT: xvld $xr0, $a2, %pc_lo12(.LCPI4_0) +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0 +; CHECK-NEXT: xvpermi.d $xr1, $xr1, 68 +; CHECK-NEXT: xvshuf.b $xr0, $xr0, $xr1, $xr0 +; CHECK-NEXT: xvslli.d $xr0, $xr0, 56 +; CHECK-NEXT: xvsrai.d $xr0, $xr0, 56 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret +entry: + %A = load <4 x i8>, ptr %ptr + %B = sext <4 x i8> %A to <4 x i64> + store <4 x i64> %B, ptr %dst + ret void +} + +define void @load_sext_4i16_to_4i32(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_sext_4i16_to_4i32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.w $a2, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vilvl.h $vr0, $vr0, $vr0 +; LA32-NEXT: vslli.w $vr0, $vr0, 16 +; LA32-NEXT: vsrai.w $vr0, $vr0, 16 +; LA32-NEXT: vst $vr0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_sext_4i16_to_4i32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vilvl.h $vr0, $vr0, $vr0 +; LA64-NEXT: vslli.w $vr0, $vr0, 16 +; LA64-NEXT: vsrai.w $vr0, $vr0, 16 +; LA64-NEXT: vst $vr0, $a1, 0 +; LA64-NEXT: ret +entry: + %A = load <4 x i16>, ptr %ptr + %B = sext <4 x i16> %A to <4 x i32> + store <4 x i32> %B, ptr %dst + ret void +} + +define void @load_sext_4i16_to_4i64(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_sext_4i16_to_4i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.w $a2, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: pcalau12i $a3, %pc_hi20(.LCPI6_0) +; LA32-NEXT: xvld $xr0, $a3, %pc_lo12(.LCPI6_0) +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1 +; LA32-NEXT: xvpermi.d $xr1, $xr1, 68 +; LA32-NEXT: xvshuf.h $xr0, $xr0, $xr1 +; LA32-NEXT: xvslli.d $xr0, $xr0, 48 +; LA32-NEXT: xvsrai.d $xr0, $xr0, 48 +; LA32-NEXT: xvst $xr0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_sext_4i16_to_4i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: pcalau12i $a2, %pc_hi20(.LCPI6_0) +; LA64-NEXT: xvld $xr0, $a2, %pc_lo12(.LCPI6_0) +; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 +; LA64-NEXT: xvpermi.d $xr1, $xr1, 68 +; LA64-NEXT: xvshuf.h $xr0, $xr0, $xr1 +; LA64-NEXT: xvslli.d $xr0, $xr0, 48 +; LA64-NEXT: xvsrai.d $xr0, $xr0, 48 +; LA64-NEXT: xvst $xr0, $a1, 0 +; LA64-NEXT: ret +entry: + %A = load <4 x i16>, ptr %ptr + %B = sext <4 x i16> %A to <4 x i64> + store <4 x i64> %B, ptr %dst + ret void +} + +define void @load_sext_4i32_to_4i64(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_sext_4i32_to_4i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vld $vr0, $a0, 0 +; LA32-NEXT: vextrins.w $vr1, $vr0, 2 +; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2 +; LA32-NEXT: srai.w $a0, $a0, 31 +; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1 +; LA32-NEXT: vextrins.w $vr1, $vr0, 35 +; LA32-NEXT: vpickve2gr.w $a0, $vr0, 3 +; LA32-NEXT: srai.w $a0, $a0, 31 +; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 3 +; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 +; LA32-NEXT: srai.w $a0, $a0, 31 +; LA32-NEXT: vori.b $vr2, $vr0, 0 +; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 1 +; LA32-NEXT: vextrins.w $vr2, $vr0, 33 +; LA32-NEXT: vpickve2gr.w $a0, $vr0, 1 +; LA32-NEXT: srai.w $a0, $a0, 31 +; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3 +; LA32-NEXT: xvpermi.q $xr2, $xr1, 2 +; LA32-NEXT: xvst $xr2, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_sext_4i32_to_4i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vld $vr0, $a0, 0 +; LA64-NEXT: vpickve2gr.w $a0, $vr0, 2 +; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 +; LA64-NEXT: vpickve2gr.w $a0, $vr0, 3 +; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 +; LA64-NEXT: vpickve2gr.w $a0, $vr0, 0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 +; LA64-NEXT: vpickve2gr.w $a0, $vr0, 1 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 +; LA64-NEXT: xvpermi.q $xr2, $xr1, 2 +; LA64-NEXT: xvst $xr2, $a1, 0 +; LA64-NEXT: ret +entry: + %A = load <4 x i32>, ptr %ptr + %B = sext <4 x i32> %A to <4 x i64> + store <4 x i64> %B, ptr %dst + ret void +} + +define void @load_sext_8i8_to_8i16(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_sext_8i8_to_8i16: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.w $a2, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vilvl.b $vr0, $vr0, $vr0 +; LA32-NEXT: vslli.h $vr0, $vr0, 8 +; LA32-NEXT: vsrai.h $vr0, $vr0, 8 +; LA32-NEXT: vst $vr0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_sext_8i8_to_8i16: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vilvl.b $vr0, $vr0, $vr0 +; LA64-NEXT: vslli.h $vr0, $vr0, 8 +; LA64-NEXT: vsrai.h $vr0, $vr0, 8 +; LA64-NEXT: vst $vr0, $a1, 0 +; LA64-NEXT: ret +entry: + %A = load <8 x i8>, ptr %ptr + %B = sext <8 x i8> %A to <8 x i16> + store <8 x i16> %B, ptr %dst + ret void +} + +define void @load_sext_8i8_to_8i32(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_sext_8i8_to_8i32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.w $a2, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: pcalau12i $a3, %pc_hi20(.LCPI9_0) +; LA32-NEXT: xvld $xr0, $a3, %pc_lo12(.LCPI9_0) +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1 +; LA32-NEXT: xvpermi.d $xr1, $xr1, 68 +; LA32-NEXT: xvshuf.b $xr0, $xr0, $xr1, $xr0 +; LA32-NEXT: xvslli.w $xr0, $xr0, 24 +; LA32-NEXT: xvsrai.w $xr0, $xr0, 24 +; LA32-NEXT: xvst $xr0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_sext_8i8_to_8i32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: pcalau12i $a2, %pc_hi20(.LCPI9_0) +; LA64-NEXT: xvld $xr0, $a2, %pc_lo12(.LCPI9_0) +; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 +; LA64-NEXT: xvpermi.d $xr1, $xr1, 68 +; LA64-NEXT: xvshuf.b $xr0, $xr0, $xr1, $xr0 +; LA64-NEXT: xvslli.w $xr0, $xr0, 24 +; LA64-NEXT: xvsrai.w $xr0, $xr0, 24 +; LA64-NEXT: xvst $xr0, $a1, 0 +; LA64-NEXT: ret +entry: + %A = load <8 x i8>, ptr %ptr + %B = sext <8 x i8> %A to <8 x i32> + store <8 x i32> %B, ptr %dst + ret void +} + +define void @load_sext_8i8_to_8i64(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_sext_8i8_to_8i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.w $a2, $a0, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: xvpermi.d $xr1, $xr0, 68 +; LA32-NEXT: # kill: def $vr0 killed $vr0 killed $xr0 +; LA32-NEXT: pcalau12i $a2, %pc_hi20(.LCPI10_0) +; LA32-NEXT: xvld $xr2, $a2, %pc_lo12(.LCPI10_0) +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vreplvei.w $vr0, $vr0, 1 +; LA32-NEXT: xvpermi.d $xr0, $xr0, 68 +; LA32-NEXT: xvshuf.b $xr0, $xr0, $xr0, $xr2 +; LA32-NEXT: xvslli.d $xr0, $xr0, 56 +; LA32-NEXT: xvsrai.d $xr0, $xr0, 56 +; LA32-NEXT: xvshuf.b $xr1, $xr0, $xr1, $xr2 +; LA32-NEXT: xvslli.d $xr1, $xr1, 56 +; LA32-NEXT: xvsrai.d $xr1, $xr1, 56 +; LA32-NEXT: xvst $xr1, $a1, 0 +; LA32-NEXT: xvst $xr0, $a1, 32 +; LA32-NEXT: ret +; +; LA64-LABEL: load_sext_8i8_to_8i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: pcalau12i $a2, %pc_hi20(.LCPI10_0) +; LA64-NEXT: xvld $xr0, $a2, %pc_lo12(.LCPI10_0) +; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 +; LA64-NEXT: vsrli.d $vr2, $vr1, 32 +; LA64-NEXT: xvpermi.d $xr2, $xr2, 68 +; LA64-NEXT: xvshuf.b $xr2, $xr0, $xr2, $xr0 +; LA64-NEXT: xvslli.d $xr2, $xr2, 56 +; LA64-NEXT: xvsrai.d $xr2, $xr2, 56 +; LA64-NEXT: xvpermi.d $xr1, $xr1, 68 +; LA64-NEXT: xvshuf.b $xr0, $xr0, $xr1, $xr0 +; LA64-NEXT: xvslli.d $xr0, $xr0, 56 +; LA64-NEXT: xvsrai.d $xr0, $xr0, 56 +; LA64-NEXT: xvst $xr0, $a1, 0 +; LA64-NEXT: xvst $xr2, $a1, 32 +; LA64-NEXT: ret +entry: + %A = load <8 x i8>, ptr %ptr + %B = sext <8 x i8> %A to <8 x i64> + store <8 x i64> %B, ptr %dst + ret void +} + +define void @load_sext_8i16_to_8i32(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_sext_8i16_to_8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 4 +; CHECK-NEXT: ext.w.h $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0 +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5 +; CHECK-NEXT: ext.w.h $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 1 +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 6 +; CHECK-NEXT: ext.w.h $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 2 +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7 +; CHECK-NEXT: ext.w.h $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 3 +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0 +; CHECK-NEXT: ext.w.h $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 0 +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 +; CHECK-NEXT: ext.w.h $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 1 +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2 +; CHECK-NEXT: ext.w.h $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 2 +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3 +; CHECK-NEXT: ext.w.h $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 3 +; CHECK-NEXT: xvpermi.q $xr2, $xr1, 2 +; CHECK-NEXT: xvst $xr2, $a1, 0 +; CHECK-NEXT: ret +entry: + %A = load <8 x i16>, ptr %ptr + %B = sext <8 x i16> %A to <8 x i32> + store <8 x i32> %B, ptr %dst + ret void +} + +define void @load_sext_8i16_to_8i64(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_sext_8i16_to_8i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vld $vr0, $a0, 0 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 2 +; LA32-NEXT: ext.w.h $a0, $a0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0 +; LA32-NEXT: srai.w $a2, $a0, 31 +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 1 +; LA32-NEXT: vpickve2gr.h $a2, $vr0, 3 +; LA32-NEXT: ext.w.h $a2, $a2 +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2 +; LA32-NEXT: srai.w $a3, $a2, 31 +; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 3 +; LA32-NEXT: vpickve2gr.h $a3, $vr0, 0 +; LA32-NEXT: ext.w.h $a3, $a3 +; LA32-NEXT: vinsgr2vr.w $vr2, $a3, 0 +; LA32-NEXT: vpickve2gr.h $a4, $vr0, 1 +; LA32-NEXT: ext.w.h $a4, $a4 +; LA32-NEXT: vinsgr2vr.w $vr2, $a4, 1 +; LA32-NEXT: srai.w $a3, $a3, 31 +; LA32-NEXT: vinsgr2vr.w $vr2, $a3, 1 +; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 2 +; LA32-NEXT: vinsgr2vr.w $vr2, $a4, 2 +; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 3 +; LA32-NEXT: srai.w $a0, $a4, 31 +; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3 +; LA32-NEXT: xvpermi.q $xr2, $xr1, 2 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 6 +; LA32-NEXT: ext.w.h $a0, $a0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0 +; LA32-NEXT: srai.w $a2, $a0, 31 +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 1 +; LA32-NEXT: vpickve2gr.h $a2, $vr0, 7 +; LA32-NEXT: ext.w.h $a2, $a2 +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2 +; LA32-NEXT: srai.w $a3, $a2, 31 +; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 3 +; LA32-NEXT: vpickve2gr.h $a3, $vr0, 4 +; LA32-NEXT: ext.w.h $a3, $a3 +; LA32-NEXT: vinsgr2vr.w $vr3, $a3, 0 +; LA32-NEXT: vpickve2gr.h $a4, $vr0, 5 +; LA32-NEXT: ext.w.h $a4, $a4 +; LA32-NEXT: vinsgr2vr.w $vr3, $a4, 1 +; LA32-NEXT: srai.w $a3, $a3, 31 +; LA32-NEXT: vinsgr2vr.w $vr3, $a3, 1 +; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 2 +; LA32-NEXT: vinsgr2vr.w $vr3, $a4, 2 +; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 3 +; LA32-NEXT: srai.w $a0, $a4, 31 +; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 3 +; LA32-NEXT: xvpermi.q $xr3, $xr1, 2 +; LA32-NEXT: xvst $xr3, $a1, 32 +; LA32-NEXT: xvst $xr2, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_sext_8i16_to_8i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vld $vr0, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 2 +; LA64-NEXT: ext.w.h $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 3 +; LA64-NEXT: ext.w.h $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 0 +; LA64-NEXT: ext.w.h $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 1 +; LA64-NEXT: ext.w.h $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 +; LA64-NEXT: xvpermi.q $xr2, $xr1, 2 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 6 +; LA64-NEXT: ext.w.h $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 7 +; LA64-NEXT: ext.w.h $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 4 +; LA64-NEXT: ext.w.h $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 5 +; LA64-NEXT: ext.w.h $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1 +; LA64-NEXT: xvpermi.q $xr3, $xr1, 2 +; LA64-NEXT: xvst $xr3, $a1, 32 +; LA64-NEXT: xvst $xr2, $a1, 0 +; LA64-NEXT: ret +entry: + %A = load <8 x i16>, ptr %ptr + %B = sext <8 x i16> %A to <8 x i64> + store <8 x i64> %B, ptr %dst + ret void +} + +define void @load_sext_8i32_to_8i64(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_sext_8i32_to_8i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvld $xr0, $a0, 0 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 1 +; LA32-NEXT: vextrins.w $vr2, $vr1, 2 +; LA32-NEXT: vpickve2gr.w $a0, $vr1, 2 +; LA32-NEXT: srai.w $a0, $a0, 31 +; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 1 +; LA32-NEXT: vextrins.w $vr2, $vr1, 35 +; LA32-NEXT: vpickve2gr.w $a0, $vr1, 3 +; LA32-NEXT: srai.w $a0, $a0, 31 +; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3 +; LA32-NEXT: vpickve2gr.w $a0, $vr1, 0 +; LA32-NEXT: srai.w $a0, $a0, 31 +; LA32-NEXT: vori.b $vr3, $vr1, 0 +; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 1 +; LA32-NEXT: vextrins.w $vr3, $vr1, 33 +; LA32-NEXT: vpickve2gr.w $a0, $vr1, 1 +; LA32-NEXT: srai.w $a0, $a0, 31 +; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 3 +; LA32-NEXT: xvpermi.q $xr3, $xr2, 2 +; LA32-NEXT: vextrins.w $vr1, $vr0, 2 +; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2 +; LA32-NEXT: srai.w $a0, $a0, 31 +; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1 +; LA32-NEXT: vextrins.w $vr1, $vr0, 35 +; LA32-NEXT: vpickve2gr.w $a0, $vr0, 3 +; LA32-NEXT: srai.w $a0, $a0, 31 +; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 3 +; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 +; LA32-NEXT: srai.w $a0, $a0, 31 +; LA32-NEXT: vori.b $vr2, $vr0, 0 +; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 1 +; LA32-NEXT: vextrins.w $vr2, $vr0, 33 +; LA32-NEXT: vpickve2gr.w $a0, $vr0, 1 +; LA32-NEXT: srai.w $a0, $a0, 31 +; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3 +; LA32-NEXT: xvpermi.q $xr2, $xr1, 2 +; LA32-NEXT: xvst $xr2, $a1, 0 +; LA32-NEXT: xvst $xr3, $a1, 32 +; LA32-NEXT: ret +; +; LA64-LABEL: load_sext_8i32_to_8i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a0, 0 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 1 +; LA64-NEXT: vpickve2gr.w $a0, $vr1, 2 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 +; LA64-NEXT: vpickve2gr.w $a0, $vr1, 3 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 +; LA64-NEXT: vpickve2gr.w $a0, $vr1, 0 +; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0 +; LA64-NEXT: vpickve2gr.w $a0, $vr1, 1 +; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1 +; LA64-NEXT: xvpermi.q $xr3, $xr2, 2 +; LA64-NEXT: vpickve2gr.w $a0, $vr0, 2 +; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 +; LA64-NEXT: vpickve2gr.w $a0, $vr0, 3 +; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 +; LA64-NEXT: vpickve2gr.w $a0, $vr0, 0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 +; LA64-NEXT: vpickve2gr.w $a0, $vr0, 1 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 +; LA64-NEXT: xvpermi.q $xr2, $xr1, 2 +; LA64-NEXT: xvst $xr2, $a1, 0 +; LA64-NEXT: xvst $xr3, $a1, 32 +; LA64-NEXT: ret +entry: + %A = load <8 x i32>, ptr %ptr + %B = sext <8 x i32> %A to <8 x i64> + store <8 x i64> %B, ptr %dst + ret void +} + +define void @load_sext_16i8_to_16i16(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_sext_16i8_to_16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 1 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 2 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 3 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 4 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 5 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 6 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 7 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 1 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 2 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 3 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 4 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 5 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 6 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 7 +; CHECK-NEXT: xvpermi.q $xr2, $xr1, 2 +; CHECK-NEXT: xvst $xr2, $a1, 0 +; CHECK-NEXT: ret +entry: + %A = load <16 x i8>, ptr %ptr + %B = sext <16 x i8> %A to <16 x i16> + store <16 x i16> %B, ptr %dst + ret void +} + +define void @load_sext_16i8_to_16i32(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_sext_16i8_to_16i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 1 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 2 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 3 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 1 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 2 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 3 +; CHECK-NEXT: xvpermi.q $xr2, $xr1, 2 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 1 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 2 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 3 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 1 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 2 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11 +; CHECK-NEXT: ext.w.b $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 3 +; CHECK-NEXT: xvpermi.q $xr3, $xr1, 2 +; CHECK-NEXT: xvst $xr3, $a1, 32 +; CHECK-NEXT: xvst $xr2, $a1, 0 +; CHECK-NEXT: ret +entry: + %A = load <16 x i8>, ptr %ptr + %B = sext <16 x i8> %A to <16 x i32> + store <16 x i32> %B, ptr %dst + ret void +} + +define void @load_sext_16i8_to_16i64(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_sext_16i8_to_16i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vld $vr1, $a0, 0 +; LA32-NEXT: vpickve2gr.b $a0, $vr1, 2 +; LA32-NEXT: ext.w.b $a0, $a0 +; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 0 +; LA32-NEXT: srai.w $a2, $a0, 31 +; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 1 +; LA32-NEXT: vpickve2gr.b $a2, $vr1, 3 +; LA32-NEXT: ext.w.b $a2, $a2 +; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 2 +; LA32-NEXT: srai.w $a3, $a2, 31 +; LA32-NEXT: vinsgr2vr.w $vr2, $a3, 3 +; LA32-NEXT: vpickve2gr.b $a3, $vr1, 0 +; LA32-NEXT: ext.w.b $a3, $a3 +; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 0 +; LA32-NEXT: vpickve2gr.b $a4, $vr1, 1 +; LA32-NEXT: ext.w.b $a4, $a4 +; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 1 +; LA32-NEXT: srai.w $a3, $a3, 31 +; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 1 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2 +; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 2 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 3 +; LA32-NEXT: srai.w $a0, $a4, 31 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 3 +; LA32-NEXT: xvpermi.q $xr0, $xr2, 2 +; LA32-NEXT: vpickve2gr.b $a0, $vr1, 6 +; LA32-NEXT: ext.w.b $a0, $a0 +; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 0 +; LA32-NEXT: srai.w $a2, $a0, 31 +; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 1 +; LA32-NEXT: vpickve2gr.b $a2, $vr1, 7 +; LA32-NEXT: ext.w.b $a2, $a2 +; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 2 +; LA32-NEXT: srai.w $a3, $a2, 31 +; LA32-NEXT: vinsgr2vr.w $vr3, $a3, 3 +; LA32-NEXT: vpickve2gr.b $a3, $vr1, 4 +; LA32-NEXT: ext.w.b $a3, $a3 +; LA32-NEXT: vinsgr2vr.w $vr2, $a3, 0 +; LA32-NEXT: vpickve2gr.b $a4, $vr1, 5 +; LA32-NEXT: ext.w.b $a4, $a4 +; LA32-NEXT: vinsgr2vr.w $vr2, $a4, 1 +; LA32-NEXT: srai.w $a3, $a3, 31 +; LA32-NEXT: vinsgr2vr.w $vr2, $a3, 1 +; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 2 +; LA32-NEXT: vinsgr2vr.w $vr2, $a4, 2 +; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 3 +; LA32-NEXT: srai.w $a0, $a4, 31 +; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3 +; LA32-NEXT: xvpermi.q $xr2, $xr3, 2 +; LA32-NEXT: vpickve2gr.b $a0, $vr1, 10 +; LA32-NEXT: ext.w.b $a0, $a0 +; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 0 +; LA32-NEXT: srai.w $a2, $a0, 31 +; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 1 +; LA32-NEXT: vpickve2gr.b $a2, $vr1, 11 +; LA32-NEXT: ext.w.b $a2, $a2 +; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 2 +; LA32-NEXT: srai.w $a3, $a2, 31 +; LA32-NEXT: vinsgr2vr.w $vr3, $a3, 3 +; LA32-NEXT: vpickve2gr.b $a3, $vr1, 8 +; LA32-NEXT: ext.w.b $a3, $a3 +; LA32-NEXT: vinsgr2vr.w $vr4, $a3, 0 +; LA32-NEXT: vpickve2gr.b $a4, $vr1, 9 +; LA32-NEXT: ext.w.b $a4, $a4 +; LA32-NEXT: vinsgr2vr.w $vr4, $a4, 1 +; LA32-NEXT: srai.w $a3, $a3, 31 +; LA32-NEXT: vinsgr2vr.w $vr4, $a3, 1 +; LA32-NEXT: vinsgr2vr.w $vr4, $a0, 2 +; LA32-NEXT: vinsgr2vr.w $vr4, $a4, 2 +; LA32-NEXT: vinsgr2vr.w $vr4, $a2, 3 +; LA32-NEXT: srai.w $a0, $a4, 31 +; LA32-NEXT: vinsgr2vr.w $vr4, $a0, 3 +; LA32-NEXT: xvpermi.q $xr4, $xr3, 2 +; LA32-NEXT: vpickve2gr.b $a0, $vr1, 14 +; LA32-NEXT: ext.w.b $a0, $a0 +; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 0 +; LA32-NEXT: srai.w $a2, $a0, 31 +; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 1 +; LA32-NEXT: vpickve2gr.b $a2, $vr1, 15 +; LA32-NEXT: ext.w.b $a2, $a2 +; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 2 +; LA32-NEXT: srai.w $a3, $a2, 31 +; LA32-NEXT: vinsgr2vr.w $vr3, $a3, 3 +; LA32-NEXT: vpickve2gr.b $a3, $vr1, 12 +; LA32-NEXT: ext.w.b $a3, $a3 +; LA32-NEXT: vinsgr2vr.w $vr5, $a3, 0 +; LA32-NEXT: vpickve2gr.b $a4, $vr1, 13 +; LA32-NEXT: ext.w.b $a4, $a4 +; LA32-NEXT: vinsgr2vr.w $vr5, $a4, 1 +; LA32-NEXT: srai.w $a3, $a3, 31 +; LA32-NEXT: vinsgr2vr.w $vr5, $a3, 1 +; LA32-NEXT: vinsgr2vr.w $vr5, $a0, 2 +; LA32-NEXT: vinsgr2vr.w $vr5, $a4, 2 +; LA32-NEXT: vinsgr2vr.w $vr5, $a2, 3 +; LA32-NEXT: srai.w $a0, $a4, 31 +; LA32-NEXT: vinsgr2vr.w $vr5, $a0, 3 +; LA32-NEXT: xvpermi.q $xr5, $xr3, 2 +; LA32-NEXT: xvst $xr5, $a1, 96 +; LA32-NEXT: xvst $xr4, $a1, 64 +; LA32-NEXT: xvst $xr2, $a1, 32 +; LA32-NEXT: xvst $xr0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_sext_16i8_to_16i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vld $vr0, $a0, 0 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 2 +; LA64-NEXT: ext.w.b $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 3 +; LA64-NEXT: ext.w.b $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 0 +; LA64-NEXT: ext.w.b $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 1 +; LA64-NEXT: ext.w.b $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 +; LA64-NEXT: xvpermi.q $xr1, $xr2, 2 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 6 +; LA64-NEXT: ext.w.b $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 7 +; LA64-NEXT: ext.w.b $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 4 +; LA64-NEXT: ext.w.b $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 5 +; LA64-NEXT: ext.w.b $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1 +; LA64-NEXT: xvpermi.q $xr3, $xr2, 2 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 10 +; LA64-NEXT: ext.w.b $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 11 +; LA64-NEXT: ext.w.b $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 8 +; LA64-NEXT: ext.w.b $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr4, $a0, 0 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 9 +; LA64-NEXT: ext.w.b $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr4, $a0, 1 +; LA64-NEXT: xvpermi.q $xr4, $xr2, 2 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 14 +; LA64-NEXT: ext.w.b $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 15 +; LA64-NEXT: ext.w.b $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 12 +; LA64-NEXT: ext.w.b $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr5, $a0, 0 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 13 +; LA64-NEXT: ext.w.b $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr5, $a0, 1 +; LA64-NEXT: xvpermi.q $xr5, $xr2, 2 +; LA64-NEXT: xvst $xr5, $a1, 96 +; LA64-NEXT: xvst $xr4, $a1, 64 +; LA64-NEXT: xvst $xr3, $a1, 32 +; LA64-NEXT: xvst $xr1, $a1, 0 +; LA64-NEXT: ret +entry: + %A = load <16 x i8>, ptr %ptr + %B = sext <16 x i8> %A to <16 x i64> + store <16 x i64> %B, ptr %dst + ret void +} + +define void @load_sext_16i16_to_16i32(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_sext_16i16_to_16i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 4 +; CHECK-NEXT: ext.w.h $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 0 +; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 5 +; CHECK-NEXT: ext.w.h $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 1 +; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 6 +; CHECK-NEXT: ext.w.h $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 2 +; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 7 +; CHECK-NEXT: ext.w.h $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 3 +; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 0 +; CHECK-NEXT: ext.w.h $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 0 +; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 1 +; CHECK-NEXT: ext.w.h $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 1 +; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 2 +; CHECK-NEXT: ext.w.h $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 2 +; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 3 +; CHECK-NEXT: ext.w.h $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 3 +; CHECK-NEXT: xvpermi.q $xr3, $xr2, 2 +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 4 +; CHECK-NEXT: ext.w.h $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0 +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5 +; CHECK-NEXT: ext.w.h $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 1 +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 6 +; CHECK-NEXT: ext.w.h $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 2 +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7 +; CHECK-NEXT: ext.w.h $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 3 +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0 +; CHECK-NEXT: ext.w.h $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 0 +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 +; CHECK-NEXT: ext.w.h $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 1 +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2 +; CHECK-NEXT: ext.w.h $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 2 +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3 +; CHECK-NEXT: ext.w.h $a0, $a0 +; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 3 +; CHECK-NEXT: xvpermi.q $xr2, $xr1, 2 +; CHECK-NEXT: xvst $xr2, $a1, 0 +; CHECK-NEXT: xvst $xr3, $a1, 32 +; CHECK-NEXT: ret +entry: + %A = load <16 x i16>, ptr %ptr + %B = sext <16 x i16> %A to <16 x i32> + store <16 x i32> %B, ptr %dst + ret void +} + +define void @load_sext_16i16_to_16i64(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_sext_16i16_to_16i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvld $xr1, $a0, 0 +; LA32-NEXT: xvpermi.q $xr3, $xr1, 1 +; LA32-NEXT: vpickve2gr.h $a0, $vr3, 2 +; LA32-NEXT: ext.w.h $a0, $a0 +; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 0 +; LA32-NEXT: srai.w $a2, $a0, 31 +; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 1 +; LA32-NEXT: vpickve2gr.h $a2, $vr3, 3 +; LA32-NEXT: ext.w.h $a2, $a2 +; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 2 +; LA32-NEXT: srai.w $a3, $a2, 31 +; LA32-NEXT: vinsgr2vr.w $vr2, $a3, 3 +; LA32-NEXT: vpickve2gr.h $a3, $vr3, 0 +; LA32-NEXT: ext.w.h $a3, $a3 +; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 0 +; LA32-NEXT: vpickve2gr.h $a4, $vr3, 1 +; LA32-NEXT: ext.w.h $a4, $a4 +; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 1 +; LA32-NEXT: srai.w $a3, $a3, 31 +; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 1 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2 +; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 2 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 3 +; LA32-NEXT: srai.w $a0, $a4, 31 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 3 +; LA32-NEXT: xvpermi.q $xr0, $xr2, 2 +; LA32-NEXT: vpickve2gr.h $a0, $vr3, 6 +; LA32-NEXT: ext.w.h $a0, $a0 +; LA32-NEXT: vinsgr2vr.w $vr4, $a0, 0 +; LA32-NEXT: srai.w $a2, $a0, 31 +; LA32-NEXT: vinsgr2vr.w $vr4, $a2, 1 +; LA32-NEXT: vpickve2gr.h $a2, $vr3, 7 +; LA32-NEXT: ext.w.h $a2, $a2 +; LA32-NEXT: vinsgr2vr.w $vr4, $a2, 2 +; LA32-NEXT: srai.w $a3, $a2, 31 +; LA32-NEXT: vinsgr2vr.w $vr4, $a3, 3 +; LA32-NEXT: vpickve2gr.h $a3, $vr3, 4 +; LA32-NEXT: ext.w.h $a3, $a3 +; LA32-NEXT: vinsgr2vr.w $vr2, $a3, 0 +; LA32-NEXT: vpickve2gr.h $a4, $vr3, 5 +; LA32-NEXT: ext.w.h $a4, $a4 +; LA32-NEXT: vinsgr2vr.w $vr2, $a4, 1 +; LA32-NEXT: srai.w $a3, $a3, 31 +; LA32-NEXT: vinsgr2vr.w $vr2, $a3, 1 +; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 2 +; LA32-NEXT: vinsgr2vr.w $vr2, $a4, 2 +; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 3 +; LA32-NEXT: srai.w $a0, $a4, 31 +; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3 +; LA32-NEXT: xvpermi.q $xr2, $xr4, 2 +; LA32-NEXT: vpickve2gr.h $a0, $vr1, 2 +; LA32-NEXT: ext.w.h $a0, $a0 +; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 0 +; LA32-NEXT: srai.w $a2, $a0, 31 +; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 1 +; LA32-NEXT: vpickve2gr.h $a2, $vr1, 3 +; LA32-NEXT: ext.w.h $a2, $a2 +; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 2 +; LA32-NEXT: srai.w $a3, $a2, 31 +; LA32-NEXT: vinsgr2vr.w $vr3, $a3, 3 +; LA32-NEXT: vpickve2gr.h $a3, $vr1, 0 +; LA32-NEXT: ext.w.h $a3, $a3 +; LA32-NEXT: vinsgr2vr.w $vr4, $a3, 0 +; LA32-NEXT: vpickve2gr.h $a4, $vr1, 1 +; LA32-NEXT: ext.w.h $a4, $a4 +; LA32-NEXT: vinsgr2vr.w $vr4, $a4, 1 +; LA32-NEXT: srai.w $a3, $a3, 31 +; LA32-NEXT: vinsgr2vr.w $vr4, $a3, 1 +; LA32-NEXT: vinsgr2vr.w $vr4, $a0, 2 +; LA32-NEXT: vinsgr2vr.w $vr4, $a4, 2 +; LA32-NEXT: vinsgr2vr.w $vr4, $a2, 3 +; LA32-NEXT: srai.w $a0, $a4, 31 +; LA32-NEXT: vinsgr2vr.w $vr4, $a0, 3 +; LA32-NEXT: xvpermi.q $xr4, $xr3, 2 +; LA32-NEXT: vpickve2gr.h $a0, $vr1, 6 +; LA32-NEXT: ext.w.h $a0, $a0 +; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 0 +; LA32-NEXT: srai.w $a2, $a0, 31 +; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 1 +; LA32-NEXT: vpickve2gr.h $a2, $vr1, 7 +; LA32-NEXT: ext.w.h $a2, $a2 +; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 2 +; LA32-NEXT: srai.w $a3, $a2, 31 +; LA32-NEXT: vinsgr2vr.w $vr3, $a3, 3 +; LA32-NEXT: vpickve2gr.h $a3, $vr1, 4 +; LA32-NEXT: ext.w.h $a3, $a3 +; LA32-NEXT: vinsgr2vr.w $vr5, $a3, 0 +; LA32-NEXT: vpickve2gr.h $a4, $vr1, 5 +; LA32-NEXT: ext.w.h $a4, $a4 +; LA32-NEXT: vinsgr2vr.w $vr5, $a4, 1 +; LA32-NEXT: srai.w $a3, $a3, 31 +; LA32-NEXT: vinsgr2vr.w $vr5, $a3, 1 +; LA32-NEXT: vinsgr2vr.w $vr5, $a0, 2 +; LA32-NEXT: vinsgr2vr.w $vr5, $a4, 2 +; LA32-NEXT: vinsgr2vr.w $vr5, $a2, 3 +; LA32-NEXT: srai.w $a0, $a4, 31 +; LA32-NEXT: vinsgr2vr.w $vr5, $a0, 3 +; LA32-NEXT: xvpermi.q $xr5, $xr3, 2 +; LA32-NEXT: xvst $xr5, $a1, 32 +; LA32-NEXT: xvst $xr4, $a1, 0 +; LA32-NEXT: xvst $xr2, $a1, 96 +; LA32-NEXT: xvst $xr0, $a1, 64 +; LA32-NEXT: ret +; +; LA64-LABEL: load_sext_16i16_to_16i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a0, 0 +; LA64-NEXT: xvpermi.q $xr2, $xr0, 1 +; LA64-NEXT: vpickve2gr.h $a0, $vr2, 2 +; LA64-NEXT: ext.w.h $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr2, 3 +; LA64-NEXT: ext.w.h $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1 +; LA64-NEXT: vpickve2gr.h $a0, $vr2, 0 +; LA64-NEXT: ext.w.h $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr2, 1 +; LA64-NEXT: ext.w.h $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 +; LA64-NEXT: xvpermi.q $xr1, $xr3, 2 +; LA64-NEXT: vpickve2gr.h $a0, $vr2, 6 +; LA64-NEXT: ext.w.h $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr2, 7 +; LA64-NEXT: ext.w.h $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1 +; LA64-NEXT: vpickve2gr.h $a0, $vr2, 4 +; LA64-NEXT: ext.w.h $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr4, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr2, 5 +; LA64-NEXT: ext.w.h $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr4, $a0, 1 +; LA64-NEXT: xvpermi.q $xr4, $xr3, 2 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 2 +; LA64-NEXT: ext.w.h $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 3 +; LA64-NEXT: ext.w.h $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 0 +; LA64-NEXT: ext.w.h $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 1 +; LA64-NEXT: ext.w.h $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1 +; LA64-NEXT: xvpermi.q $xr3, $xr2, 2 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 6 +; LA64-NEXT: ext.w.h $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 7 +; LA64-NEXT: ext.w.h $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 4 +; LA64-NEXT: ext.w.h $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr5, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 5 +; LA64-NEXT: ext.w.h $a0, $a0 +; LA64-NEXT: vinsgr2vr.d $vr5, $a0, 1 +; LA64-NEXT: xvpermi.q $xr5, $xr2, 2 +; LA64-NEXT: xvst $xr5, $a1, 32 +; LA64-NEXT: xvst $xr3, $a1, 0 +; LA64-NEXT: xvst $xr4, $a1, 96 +; LA64-NEXT: xvst $xr1, $a1, 64 +; LA64-NEXT: ret +entry: + %A = load <16 x i16>, ptr %ptr + %B = sext <16 x i16> %A to <16 x i64> + store <16 x i64> %B, ptr %dst + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-zext.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-zext.ll new file mode 100644 index 0000000..f0548cc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-zext.ll @@ -0,0 +1,1206 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s --check-prefixes=CHECK,LA64 + +define void @load_zext_2i8_to_2i64(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_zext_2i8_to_2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld.h $a0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0 +; CHECK-NEXT: vrepli.b $vr1, 0 +; CHECK-NEXT: vilvl.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vilvl.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret +entry: + %A = load <2 x i8>, ptr %ptr + %B = zext <2 x i8> %A to <2 x i64> + store <2 x i64> %B, ptr %dst + ret void +} + +define void @load_zext_2i16_to_2i64(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_zext_2i16_to_2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld.w $a0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 +; CHECK-NEXT: vrepli.b $vr1, 0 +; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vilvl.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret +entry: + %A = load <2 x i16>, ptr %ptr + %B = zext <2 x i16> %A to <2 x i64> + store <2 x i64> %B, ptr %dst + ret void +} + +define void @load_zext_2i32_to_2i64(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_zext_2i32_to_2i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.w $a2, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: vrepli.b $vr0, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2 +; LA32-NEXT: vst $vr0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_zext_2i32_to_2i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vrepli.b $vr1, 0 +; LA64-NEXT: vilvl.w $vr0, $vr1, $vr0 +; LA64-NEXT: vst $vr0, $a1, 0 +; LA64-NEXT: ret +entry: + %A = load <2 x i32>, ptr %ptr + %B = zext <2 x i32> %A to <2 x i64> + store <2 x i64> %B, ptr %dst + ret void +} + +define void @load_zext_4i8_to_4i32(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_zext_4i8_to_4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld.w $a0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 +; CHECK-NEXT: vrepli.b $vr1, 0 +; CHECK-NEXT: vilvl.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret +entry: + %A = load <4 x i8>, ptr %ptr + %B = zext <4 x i8> %A to <4 x i32> + store <4 x i32> %B, ptr %dst + ret void +} + +define void @load_zext_4i8_to_4i64(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_zext_4i8_to_4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld.w $a0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 +; CHECK-NEXT: xvrepli.b $xr1, 0 +; CHECK-NEXT: xvreplgr2vr.b $xr2, $a0 +; CHECK-NEXT: xvpermi.q $xr2, $xr1, 18 +; CHECK-NEXT: xvextrins.b $xr1, $xr2, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1 +; CHECK-NEXT: xvreplgr2vr.b $xr2, $a0 +; CHECK-NEXT: xvpermi.q $xr2, $xr1, 18 +; CHECK-NEXT: xvextrins.b $xr1, $xr2, 136 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2 +; CHECK-NEXT: xvreplgr2vr.b $xr2, $a0 +; CHECK-NEXT: xvpermi.q $xr2, $xr1, 48 +; CHECK-NEXT: xvextrins.b $xr1, $xr2, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3 +; CHECK-NEXT: xvreplgr2vr.b $xr0, $a0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: xvextrins.b $xr1, $xr0, 136 +; CHECK-NEXT: xvst $xr1, $a1, 0 +; CHECK-NEXT: ret +entry: + %A = load <4 x i8>, ptr %ptr + %B = zext <4 x i8> %A to <4 x i64> + store <4 x i64> %B, ptr %dst + ret void +} + +define void @load_zext_4i16_to_4i32(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_zext_4i16_to_4i32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.w $a2, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vrepli.b $vr1, 0 +; LA32-NEXT: vilvl.h $vr0, $vr1, $vr0 +; LA32-NEXT: vst $vr0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_zext_4i16_to_4i32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vrepli.b $vr1, 0 +; LA64-NEXT: vilvl.h $vr0, $vr1, $vr0 +; LA64-NEXT: vst $vr0, $a1, 0 +; LA64-NEXT: ret +entry: + %A = load <4 x i16>, ptr %ptr + %B = zext <4 x i16> %A to <4 x i32> + store <4 x i32> %B, ptr %dst + ret void +} + +define void @load_zext_4i16_to_4i64(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_zext_4i16_to_4i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.w $a2, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 0 +; LA32-NEXT: xvrepli.b $xr1, 0 +; LA32-NEXT: xvreplgr2vr.h $xr2, $a0 +; LA32-NEXT: xvpermi.q $xr2, $xr1, 18 +; LA32-NEXT: xvextrins.h $xr1, $xr2, 0 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 1 +; LA32-NEXT: xvreplgr2vr.h $xr2, $a0 +; LA32-NEXT: xvpermi.q $xr2, $xr1, 18 +; LA32-NEXT: xvextrins.h $xr1, $xr2, 68 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 2 +; LA32-NEXT: xvreplgr2vr.h $xr2, $a0 +; LA32-NEXT: xvpermi.q $xr2, $xr1, 48 +; LA32-NEXT: xvextrins.h $xr1, $xr2, 0 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 3 +; LA32-NEXT: xvreplgr2vr.h $xr0, $a0 +; LA32-NEXT: xvpermi.q $xr0, $xr1, 48 +; LA32-NEXT: xvextrins.h $xr1, $xr0, 68 +; LA32-NEXT: xvst $xr1, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_zext_4i16_to_4i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 0 +; LA64-NEXT: xvrepli.b $xr1, 0 +; LA64-NEXT: xvreplgr2vr.h $xr2, $a0 +; LA64-NEXT: xvpermi.q $xr2, $xr1, 18 +; LA64-NEXT: xvextrins.h $xr1, $xr2, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 1 +; LA64-NEXT: xvreplgr2vr.h $xr2, $a0 +; LA64-NEXT: xvpermi.q $xr2, $xr1, 18 +; LA64-NEXT: xvextrins.h $xr1, $xr2, 68 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 2 +; LA64-NEXT: xvreplgr2vr.h $xr2, $a0 +; LA64-NEXT: xvpermi.q $xr2, $xr1, 48 +; LA64-NEXT: xvextrins.h $xr1, $xr2, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 3 +; LA64-NEXT: xvreplgr2vr.h $xr0, $a0 +; LA64-NEXT: xvpermi.q $xr0, $xr1, 48 +; LA64-NEXT: xvextrins.h $xr1, $xr0, 68 +; LA64-NEXT: xvst $xr1, $a1, 0 +; LA64-NEXT: ret +entry: + %A = load <4 x i16>, ptr %ptr + %B = zext <4 x i16> %A to <4 x i64> + store <4 x i64> %B, ptr %dst + ret void +} + +define void @load_zext_4i32_to_4i64(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_zext_4i32_to_4i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vld $vr0, $a0, 0 +; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 +; LA32-NEXT: xvrepli.b $xr1, 0 +; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 0 +; LA32-NEXT: vpickve2gr.w $a0, $vr0, 1 +; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 2 +; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2 +; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 4 +; LA32-NEXT: vpickve2gr.w $a0, $vr0, 3 +; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 6 +; LA32-NEXT: xvst $xr1, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_zext_4i32_to_4i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vld $vr0, $a0, 0 +; LA64-NEXT: vpickve2gr.w $a0, $vr0, 2 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 +; LA64-NEXT: vpickve2gr.w $a0, $vr0, 3 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 +; LA64-NEXT: vpickve2gr.w $a0, $vr0, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 +; LA64-NEXT: vpickve2gr.w $a0, $vr0, 1 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 +; LA64-NEXT: xvpermi.q $xr2, $xr1, 2 +; LA64-NEXT: xvst $xr2, $a1, 0 +; LA64-NEXT: ret +entry: + %A = load <4 x i32>, ptr %ptr + %B = zext <4 x i32> %A to <4 x i64> + store <4 x i64> %B, ptr %dst + ret void +} + +define void @load_zext_8i8_to_8i16(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_zext_8i8_to_8i16: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.w $a2, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vrepli.b $vr1, 0 +; LA32-NEXT: vilvl.b $vr0, $vr1, $vr0 +; LA32-NEXT: vst $vr0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_zext_8i8_to_8i16: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vrepli.b $vr1, 0 +; LA64-NEXT: vilvl.b $vr0, $vr1, $vr0 +; LA64-NEXT: vst $vr0, $a1, 0 +; LA64-NEXT: ret +entry: + %A = load <8 x i8>, ptr %ptr + %B = zext <8 x i8> %A to <8 x i16> + store <8 x i16> %B, ptr %dst + ret void +} + +define void @load_zext_8i8_to_8i32(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_zext_8i8_to_8i32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.w $a2, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vpickve2gr.b $a0, $vr0, 0 +; LA32-NEXT: xvrepli.b $xr1, 0 +; LA32-NEXT: xvreplgr2vr.b $xr2, $a0 +; LA32-NEXT: xvpermi.q $xr2, $xr1, 18 +; LA32-NEXT: xvextrins.b $xr1, $xr2, 0 +; LA32-NEXT: vpickve2gr.b $a0, $vr0, 1 +; LA32-NEXT: xvreplgr2vr.b $xr2, $a0 +; LA32-NEXT: xvpermi.q $xr2, $xr1, 18 +; LA32-NEXT: xvextrins.b $xr1, $xr2, 68 +; LA32-NEXT: vpickve2gr.b $a0, $vr0, 2 +; LA32-NEXT: xvreplgr2vr.b $xr2, $a0 +; LA32-NEXT: xvpermi.q $xr2, $xr1, 18 +; LA32-NEXT: xvextrins.b $xr1, $xr2, 136 +; LA32-NEXT: vpickve2gr.b $a0, $vr0, 3 +; LA32-NEXT: xvreplgr2vr.b $xr2, $a0 +; LA32-NEXT: xvpermi.q $xr2, $xr1, 18 +; LA32-NEXT: xvextrins.b $xr1, $xr2, 204 +; LA32-NEXT: vpickve2gr.b $a0, $vr0, 4 +; LA32-NEXT: xvreplgr2vr.b $xr2, $a0 +; LA32-NEXT: xvpermi.q $xr2, $xr1, 48 +; LA32-NEXT: xvextrins.b $xr1, $xr2, 0 +; LA32-NEXT: vpickve2gr.b $a0, $vr0, 5 +; LA32-NEXT: xvreplgr2vr.b $xr2, $a0 +; LA32-NEXT: xvpermi.q $xr2, $xr1, 48 +; LA32-NEXT: xvextrins.b $xr1, $xr2, 68 +; LA32-NEXT: vpickve2gr.b $a0, $vr0, 6 +; LA32-NEXT: xvreplgr2vr.b $xr2, $a0 +; LA32-NEXT: xvpermi.q $xr2, $xr1, 48 +; LA32-NEXT: xvextrins.b $xr1, $xr2, 136 +; LA32-NEXT: vpickve2gr.b $a0, $vr0, 7 +; LA32-NEXT: xvreplgr2vr.b $xr0, $a0 +; LA32-NEXT: xvpermi.q $xr0, $xr1, 48 +; LA32-NEXT: xvextrins.b $xr1, $xr0, 204 +; LA32-NEXT: xvst $xr1, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_zext_8i8_to_8i32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 0 +; LA64-NEXT: xvrepli.b $xr1, 0 +; LA64-NEXT: xvreplgr2vr.b $xr2, $a0 +; LA64-NEXT: xvpermi.q $xr2, $xr1, 18 +; LA64-NEXT: xvextrins.b $xr1, $xr2, 0 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 1 +; LA64-NEXT: xvreplgr2vr.b $xr2, $a0 +; LA64-NEXT: xvpermi.q $xr2, $xr1, 18 +; LA64-NEXT: xvextrins.b $xr1, $xr2, 68 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 2 +; LA64-NEXT: xvreplgr2vr.b $xr2, $a0 +; LA64-NEXT: xvpermi.q $xr2, $xr1, 18 +; LA64-NEXT: xvextrins.b $xr1, $xr2, 136 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 3 +; LA64-NEXT: xvreplgr2vr.b $xr2, $a0 +; LA64-NEXT: xvpermi.q $xr2, $xr1, 18 +; LA64-NEXT: xvextrins.b $xr1, $xr2, 204 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 4 +; LA64-NEXT: xvreplgr2vr.b $xr2, $a0 +; LA64-NEXT: xvpermi.q $xr2, $xr1, 48 +; LA64-NEXT: xvextrins.b $xr1, $xr2, 0 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 5 +; LA64-NEXT: xvreplgr2vr.b $xr2, $a0 +; LA64-NEXT: xvpermi.q $xr2, $xr1, 48 +; LA64-NEXT: xvextrins.b $xr1, $xr2, 68 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 6 +; LA64-NEXT: xvreplgr2vr.b $xr2, $a0 +; LA64-NEXT: xvpermi.q $xr2, $xr1, 48 +; LA64-NEXT: xvextrins.b $xr1, $xr2, 136 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 7 +; LA64-NEXT: xvreplgr2vr.b $xr0, $a0 +; LA64-NEXT: xvpermi.q $xr0, $xr1, 48 +; LA64-NEXT: xvextrins.b $xr1, $xr0, 204 +; LA64-NEXT: xvst $xr1, $a1, 0 +; LA64-NEXT: ret +entry: + %A = load <8 x i8>, ptr %ptr + %B = zext <8 x i8> %A to <8 x i32> + store <8 x i32> %B, ptr %dst + ret void +} + +define void @load_zext_8i8_to_8i64(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_zext_8i8_to_8i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.w $a2, $a0, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: vpickve2gr.b $a2, $vr0, 0 +; LA32-NEXT: vpickve2gr.b $a3, $vr0, 1 +; LA32-NEXT: vpickve2gr.b $a4, $vr0, 2 +; LA32-NEXT: vpickve2gr.b $a5, $vr0, 3 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vreplvei.w $vr0, $vr0, 1 +; LA32-NEXT: vpickve2gr.b $a0, $vr0, 0 +; LA32-NEXT: xvrepli.b $xr1, 0 +; LA32-NEXT: xvreplgr2vr.b $xr2, $a0 +; LA32-NEXT: xvpermi.q $xr2, $xr1, 18 +; LA32-NEXT: xvori.b $xr3, $xr1, 0 +; LA32-NEXT: xvextrins.b $xr3, $xr2, 0 +; LA32-NEXT: vpickve2gr.b $a0, $vr0, 1 +; LA32-NEXT: xvreplgr2vr.b $xr2, $a0 +; LA32-NEXT: vpickve2gr.b $a0, $vr0, 2 +; LA32-NEXT: xvreplgr2vr.b $xr4, $a0 +; LA32-NEXT: vpickve2gr.b $a0, $vr0, 3 +; LA32-NEXT: xvreplgr2vr.b $xr0, $a0 +; LA32-NEXT: xvpermi.q $xr2, $xr3, 18 +; LA32-NEXT: xvextrins.b $xr3, $xr2, 136 +; LA32-NEXT: xvreplgr2vr.b $xr2, $a2 +; LA32-NEXT: xvpermi.q $xr4, $xr3, 48 +; LA32-NEXT: xvextrins.b $xr3, $xr4, 0 +; LA32-NEXT: xvreplgr2vr.b $xr4, $a3 +; LA32-NEXT: xvpermi.q $xr0, $xr3, 48 +; LA32-NEXT: xvextrins.b $xr3, $xr0, 136 +; LA32-NEXT: xvpermi.q $xr2, $xr1, 18 +; LA32-NEXT: xvextrins.b $xr1, $xr2, 0 +; LA32-NEXT: xvpermi.q $xr4, $xr1, 18 +; LA32-NEXT: xvextrins.b $xr1, $xr4, 136 +; LA32-NEXT: xvreplgr2vr.b $xr0, $a4 +; LA32-NEXT: xvpermi.q $xr0, $xr1, 48 +; LA32-NEXT: xvextrins.b $xr1, $xr0, 0 +; LA32-NEXT: xvreplgr2vr.b $xr0, $a5 +; LA32-NEXT: xvpermi.q $xr0, $xr1, 48 +; LA32-NEXT: xvextrins.b $xr1, $xr0, 136 +; LA32-NEXT: xvst $xr1, $a1, 0 +; LA32-NEXT: xvst $xr3, $a1, 32 +; LA32-NEXT: ret +; +; LA64-LABEL: load_zext_8i8_to_8i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vsrli.d $vr1, $vr0, 32 +; LA64-NEXT: vpickve2gr.b $a0, $vr1, 0 +; LA64-NEXT: xvrepli.b $xr2, 0 +; LA64-NEXT: xvreplgr2vr.b $xr3, $a0 +; LA64-NEXT: xvpermi.q $xr3, $xr2, 18 +; LA64-NEXT: xvori.b $xr4, $xr2, 0 +; LA64-NEXT: xvextrins.b $xr4, $xr3, 0 +; LA64-NEXT: vpickve2gr.b $a0, $vr1, 1 +; LA64-NEXT: xvreplgr2vr.b $xr3, $a0 +; LA64-NEXT: xvpermi.q $xr3, $xr4, 18 +; LA64-NEXT: xvextrins.b $xr4, $xr3, 136 +; LA64-NEXT: vpickve2gr.b $a0, $vr1, 2 +; LA64-NEXT: xvreplgr2vr.b $xr3, $a0 +; LA64-NEXT: xvpermi.q $xr3, $xr4, 48 +; LA64-NEXT: xvextrins.b $xr4, $xr3, 0 +; LA64-NEXT: vpickve2gr.b $a0, $vr1, 3 +; LA64-NEXT: xvreplgr2vr.b $xr1, $a0 +; LA64-NEXT: xvpermi.q $xr1, $xr4, 48 +; LA64-NEXT: xvextrins.b $xr4, $xr1, 136 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 0 +; LA64-NEXT: xvreplgr2vr.b $xr1, $a0 +; LA64-NEXT: xvpermi.q $xr1, $xr2, 18 +; LA64-NEXT: xvextrins.b $xr2, $xr1, 0 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 1 +; LA64-NEXT: xvreplgr2vr.b $xr1, $a0 +; LA64-NEXT: xvpermi.q $xr1, $xr2, 18 +; LA64-NEXT: xvextrins.b $xr2, $xr1, 136 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 2 +; LA64-NEXT: xvreplgr2vr.b $xr1, $a0 +; LA64-NEXT: xvpermi.q $xr1, $xr2, 48 +; LA64-NEXT: xvextrins.b $xr2, $xr1, 0 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 3 +; LA64-NEXT: xvreplgr2vr.b $xr0, $a0 +; LA64-NEXT: xvpermi.q $xr0, $xr2, 48 +; LA64-NEXT: xvextrins.b $xr2, $xr0, 136 +; LA64-NEXT: xvst $xr2, $a1, 0 +; LA64-NEXT: xvst $xr4, $a1, 32 +; LA64-NEXT: ret +entry: + %A = load <8 x i8>, ptr %ptr + %B = zext <8 x i8> %A to <8 x i64> + store <8 x i64> %B, ptr %dst + ret void +} + +define void @load_zext_8i16_to_8i32(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_zext_8i16_to_8i32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vld $vr0, $a0, 0 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 4 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 5 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 6 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 2 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 7 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 3 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 0 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 0 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 1 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 1 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 2 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 2 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 3 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3 +; LA32-NEXT: xvpermi.q $xr2, $xr1, 2 +; LA32-NEXT: xvst $xr2, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_zext_8i16_to_8i32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vld $vr0, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 4 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 5 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 1 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 6 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 2 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 7 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 3 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 1 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 1 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 2 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 2 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 3 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 3 +; LA64-NEXT: xvpermi.q $xr2, $xr1, 2 +; LA64-NEXT: xvst $xr2, $a1, 0 +; LA64-NEXT: ret +entry: + %A = load <8 x i16>, ptr %ptr + %B = zext <8 x i16> %A to <8 x i32> + store <8 x i32> %B, ptr %dst + ret void +} + +define void @load_zext_8i16_to_8i64(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_zext_8i16_to_8i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vld $vr0, $a0, 0 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 0 +; LA32-NEXT: xvrepli.b $xr1, 0 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: xvori.b $xr2, $xr1, 0 +; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 0 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 1 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 2 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 2 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 4 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 3 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 6 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 4 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 0 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 5 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 2 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 6 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 4 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 7 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 6 +; LA32-NEXT: xvst $xr1, $a1, 32 +; LA32-NEXT: xvst $xr2, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_zext_8i16_to_8i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vld $vr0, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 2 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 3 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 1 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 +; LA64-NEXT: xvpermi.q $xr2, $xr1, 2 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 6 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 7 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 4 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 5 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1 +; LA64-NEXT: xvpermi.q $xr3, $xr1, 2 +; LA64-NEXT: xvst $xr3, $a1, 32 +; LA64-NEXT: xvst $xr2, $a1, 0 +; LA64-NEXT: ret +entry: + %A = load <8 x i16>, ptr %ptr + %B = zext <8 x i16> %A to <8 x i64> + store <8 x i64> %B, ptr %dst + ret void +} + +define void @load_zext_8i32_to_8i64(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_zext_8i32_to_8i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvld $xr0, $a0, 0 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 1 +; LA32-NEXT: xvrepli.b $xr2, 0 +; LA32-NEXT: vpickve2gr.w $a0, $vr1, 0 +; LA32-NEXT: xvori.b $xr3, $xr2, 0 +; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 0 +; LA32-NEXT: vpickve2gr.w $a0, $vr1, 1 +; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 2 +; LA32-NEXT: vpickve2gr.w $a0, $vr1, 2 +; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 4 +; LA32-NEXT: vpickve2gr.w $a0, $vr1, 3 +; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 6 +; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 +; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 0 +; LA32-NEXT: vpickve2gr.w $a0, $vr0, 1 +; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 2 +; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2 +; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 4 +; LA32-NEXT: vpickve2gr.w $a0, $vr0, 3 +; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 6 +; LA32-NEXT: xvst $xr2, $a1, 0 +; LA32-NEXT: xvst $xr3, $a1, 32 +; LA32-NEXT: ret +; +; LA64-LABEL: load_zext_8i32_to_8i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a0, 0 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 1 +; LA64-NEXT: vpickve2gr.w $a0, $vr1, 2 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 +; LA64-NEXT: vpickve2gr.w $a0, $vr1, 3 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 +; LA64-NEXT: vpickve2gr.w $a0, $vr1, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0 +; LA64-NEXT: vpickve2gr.w $a0, $vr1, 1 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1 +; LA64-NEXT: xvpermi.q $xr3, $xr2, 2 +; LA64-NEXT: vpickve2gr.w $a0, $vr0, 2 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 +; LA64-NEXT: vpickve2gr.w $a0, $vr0, 3 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 +; LA64-NEXT: vpickve2gr.w $a0, $vr0, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 +; LA64-NEXT: vpickve2gr.w $a0, $vr0, 1 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 +; LA64-NEXT: xvpermi.q $xr2, $xr1, 2 +; LA64-NEXT: xvst $xr2, $a1, 0 +; LA64-NEXT: xvst $xr3, $a1, 32 +; LA64-NEXT: ret +entry: + %A = load <8 x i32>, ptr %ptr + %B = zext <8 x i32> %A to <8 x i64> + store <8 x i64> %B, ptr %dst + ret void +} + +define void @load_zext_16i8_to_16i16(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_zext_16i8_to_16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 1 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 2 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 3 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 4 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 5 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 6 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 7 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 1 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 2 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 3 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 4 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 5 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 6 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 7 +; CHECK-NEXT: xvpermi.q $xr2, $xr1, 2 +; CHECK-NEXT: xvst $xr2, $a1, 0 +; CHECK-NEXT: ret +entry: + %A = load <16 x i8>, ptr %ptr + %B = zext <16 x i8> %A to <16 x i16> + store <16 x i16> %B, ptr %dst + ret void +} + +define void @load_zext_16i8_to_16i32(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_zext_16i8_to_16i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 1 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 2 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 3 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 1 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 2 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 3 +; CHECK-NEXT: xvpermi.q $xr2, $xr1, 2 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 1 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 2 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.w $vr1, $a0, 3 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 1 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 2 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11 +; CHECK-NEXT: andi $a0, $a0, 255 +; CHECK-NEXT: vinsgr2vr.w $vr3, $a0, 3 +; CHECK-NEXT: xvpermi.q $xr3, $xr1, 2 +; CHECK-NEXT: xvst $xr3, $a1, 32 +; CHECK-NEXT: xvst $xr2, $a1, 0 +; CHECK-NEXT: ret +entry: + %A = load <16 x i8>, ptr %ptr + %B = zext <16 x i8> %A to <16 x i32> + store <16 x i32> %B, ptr %dst + ret void +} + +define void @load_zext_16i8_to_16i64(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_zext_16i8_to_16i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vld $vr1, $a0, 0 +; LA32-NEXT: vpickve2gr.b $a0, $vr1, 0 +; LA32-NEXT: xvrepli.b $xr2, 0 +; LA32-NEXT: andi $a0, $a0, 255 +; LA32-NEXT: xvori.b $xr0, $xr2, 0 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 0 +; LA32-NEXT: vpickve2gr.b $a0, $vr1, 1 +; LA32-NEXT: andi $a0, $a0, 255 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 2 +; LA32-NEXT: vpickve2gr.b $a0, $vr1, 2 +; LA32-NEXT: andi $a0, $a0, 255 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 4 +; LA32-NEXT: vpickve2gr.b $a0, $vr1, 3 +; LA32-NEXT: andi $a0, $a0, 255 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 6 +; LA32-NEXT: vpickve2gr.b $a0, $vr1, 4 +; LA32-NEXT: andi $a0, $a0, 255 +; LA32-NEXT: xvori.b $xr3, $xr2, 0 +; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 0 +; LA32-NEXT: vpickve2gr.b $a0, $vr1, 5 +; LA32-NEXT: andi $a0, $a0, 255 +; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 2 +; LA32-NEXT: vpickve2gr.b $a0, $vr1, 6 +; LA32-NEXT: andi $a0, $a0, 255 +; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 4 +; LA32-NEXT: vpickve2gr.b $a0, $vr1, 7 +; LA32-NEXT: andi $a0, $a0, 255 +; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 6 +; LA32-NEXT: vpickve2gr.b $a0, $vr1, 8 +; LA32-NEXT: andi $a0, $a0, 255 +; LA32-NEXT: xvori.b $xr4, $xr2, 0 +; LA32-NEXT: xvinsgr2vr.w $xr4, $a0, 0 +; LA32-NEXT: vpickve2gr.b $a0, $vr1, 9 +; LA32-NEXT: andi $a0, $a0, 255 +; LA32-NEXT: xvinsgr2vr.w $xr4, $a0, 2 +; LA32-NEXT: vpickve2gr.b $a0, $vr1, 10 +; LA32-NEXT: andi $a0, $a0, 255 +; LA32-NEXT: xvinsgr2vr.w $xr4, $a0, 4 +; LA32-NEXT: vpickve2gr.b $a0, $vr1, 11 +; LA32-NEXT: andi $a0, $a0, 255 +; LA32-NEXT: xvinsgr2vr.w $xr4, $a0, 6 +; LA32-NEXT: vpickve2gr.b $a0, $vr1, 12 +; LA32-NEXT: andi $a0, $a0, 255 +; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 0 +; LA32-NEXT: vpickve2gr.b $a0, $vr1, 13 +; LA32-NEXT: andi $a0, $a0, 255 +; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 2 +; LA32-NEXT: vpickve2gr.b $a0, $vr1, 14 +; LA32-NEXT: andi $a0, $a0, 255 +; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 4 +; LA32-NEXT: vpickve2gr.b $a0, $vr1, 15 +; LA32-NEXT: andi $a0, $a0, 255 +; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 6 +; LA32-NEXT: xvst $xr2, $a1, 96 +; LA32-NEXT: xvst $xr4, $a1, 64 +; LA32-NEXT: xvst $xr3, $a1, 32 +; LA32-NEXT: xvst $xr0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_zext_16i8_to_16i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vld $vr0, $a0, 0 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 2 +; LA64-NEXT: andi $a0, $a0, 255 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 3 +; LA64-NEXT: andi $a0, $a0, 255 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 0 +; LA64-NEXT: andi $a0, $a0, 255 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 1 +; LA64-NEXT: andi $a0, $a0, 255 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 +; LA64-NEXT: xvpermi.q $xr1, $xr2, 2 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 6 +; LA64-NEXT: andi $a0, $a0, 255 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 7 +; LA64-NEXT: andi $a0, $a0, 255 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 4 +; LA64-NEXT: andi $a0, $a0, 255 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 5 +; LA64-NEXT: andi $a0, $a0, 255 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1 +; LA64-NEXT: xvpermi.q $xr3, $xr2, 2 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 10 +; LA64-NEXT: andi $a0, $a0, 255 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 11 +; LA64-NEXT: andi $a0, $a0, 255 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 8 +; LA64-NEXT: andi $a0, $a0, 255 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr4, $a0, 0 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 9 +; LA64-NEXT: andi $a0, $a0, 255 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr4, $a0, 1 +; LA64-NEXT: xvpermi.q $xr4, $xr2, 2 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 14 +; LA64-NEXT: andi $a0, $a0, 255 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 15 +; LA64-NEXT: andi $a0, $a0, 255 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 12 +; LA64-NEXT: andi $a0, $a0, 255 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr5, $a0, 0 +; LA64-NEXT: vpickve2gr.b $a0, $vr0, 13 +; LA64-NEXT: andi $a0, $a0, 255 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr5, $a0, 1 +; LA64-NEXT: xvpermi.q $xr5, $xr2, 2 +; LA64-NEXT: xvst $xr5, $a1, 96 +; LA64-NEXT: xvst $xr4, $a1, 64 +; LA64-NEXT: xvst $xr3, $a1, 32 +; LA64-NEXT: xvst $xr1, $a1, 0 +; LA64-NEXT: ret +entry: + %A = load <16 x i8>, ptr %ptr + %B = zext <16 x i8> %A to <16 x i64> + store <16 x i64> %B, ptr %dst + ret void +} + +define void @load_zext_16i16_to_16i32(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_zext_16i16_to_16i32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvld $xr0, $a0, 0 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 1 +; LA32-NEXT: vpickve2gr.h $a0, $vr1, 4 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 0 +; LA32-NEXT: vpickve2gr.h $a0, $vr1, 5 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 1 +; LA32-NEXT: vpickve2gr.h $a0, $vr1, 6 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 2 +; LA32-NEXT: vpickve2gr.h $a0, $vr1, 7 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3 +; LA32-NEXT: vpickve2gr.h $a0, $vr1, 0 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 0 +; LA32-NEXT: vpickve2gr.h $a0, $vr1, 1 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 1 +; LA32-NEXT: vpickve2gr.h $a0, $vr1, 2 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 2 +; LA32-NEXT: vpickve2gr.h $a0, $vr1, 3 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: vinsgr2vr.w $vr3, $a0, 3 +; LA32-NEXT: xvpermi.q $xr3, $xr2, 2 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 4 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 5 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 6 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 2 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 7 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 3 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 0 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 0 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 1 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 1 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 2 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 2 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 3 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3 +; LA32-NEXT: xvpermi.q $xr2, $xr1, 2 +; LA32-NEXT: xvst $xr2, $a1, 0 +; LA32-NEXT: xvst $xr3, $a1, 32 +; LA32-NEXT: ret +; +; LA64-LABEL: load_zext_16i16_to_16i32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a0, 0 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 1 +; LA64-NEXT: vpickve2gr.h $a0, $vr1, 4 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr1, 5 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 1 +; LA64-NEXT: vpickve2gr.h $a0, $vr1, 6 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 2 +; LA64-NEXT: vpickve2gr.h $a0, $vr1, 7 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 3 +; LA64-NEXT: vpickve2gr.h $a0, $vr1, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.w $vr3, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr1, 1 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.w $vr3, $a0, 1 +; LA64-NEXT: vpickve2gr.h $a0, $vr1, 2 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.w $vr3, $a0, 2 +; LA64-NEXT: vpickve2gr.h $a0, $vr1, 3 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.w $vr3, $a0, 3 +; LA64-NEXT: xvpermi.q $xr3, $xr2, 2 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 4 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 5 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 1 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 6 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 2 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 7 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 3 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 1 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 1 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 2 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 2 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 3 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.w $vr2, $a0, 3 +; LA64-NEXT: xvpermi.q $xr2, $xr1, 2 +; LA64-NEXT: xvst $xr2, $a1, 0 +; LA64-NEXT: xvst $xr3, $a1, 32 +; LA64-NEXT: ret +entry: + %A = load <16 x i16>, ptr %ptr + %B = zext <16 x i16> %A to <16 x i32> + store <16 x i32> %B, ptr %dst + ret void +} + +define void @load_zext_16i16_to_16i64(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_zext_16i16_to_16i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvld $xr0, $a0, 0 +; LA32-NEXT: xvpermi.q $xr3, $xr0, 1 +; LA32-NEXT: vpickve2gr.h $a0, $vr3, 0 +; LA32-NEXT: xvrepli.b $xr2, 0 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: xvori.b $xr1, $xr2, 0 +; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 0 +; LA32-NEXT: vpickve2gr.h $a0, $vr3, 1 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 2 +; LA32-NEXT: vpickve2gr.h $a0, $vr3, 2 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 4 +; LA32-NEXT: vpickve2gr.h $a0, $vr3, 3 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: xvinsgr2vr.w $xr1, $a0, 6 +; LA32-NEXT: vpickve2gr.h $a0, $vr3, 4 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: xvori.b $xr4, $xr2, 0 +; LA32-NEXT: xvinsgr2vr.w $xr4, $a0, 0 +; LA32-NEXT: vpickve2gr.h $a0, $vr3, 5 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: xvinsgr2vr.w $xr4, $a0, 2 +; LA32-NEXT: vpickve2gr.h $a0, $vr3, 6 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: xvinsgr2vr.w $xr4, $a0, 4 +; LA32-NEXT: vpickve2gr.h $a0, $vr3, 7 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: xvinsgr2vr.w $xr4, $a0, 6 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 0 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: xvori.b $xr3, $xr2, 0 +; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 0 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 1 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 2 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 2 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 4 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 3 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: xvinsgr2vr.w $xr3, $a0, 6 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 4 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 0 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 5 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 2 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 6 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 4 +; LA32-NEXT: vpickve2gr.h $a0, $vr0, 7 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: xvinsgr2vr.w $xr2, $a0, 6 +; LA32-NEXT: xvst $xr2, $a1, 32 +; LA32-NEXT: xvst $xr3, $a1, 0 +; LA32-NEXT: xvst $xr4, $a1, 96 +; LA32-NEXT: xvst $xr1, $a1, 64 +; LA32-NEXT: ret +; +; LA64-LABEL: load_zext_16i16_to_16i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a0, 0 +; LA64-NEXT: xvpermi.q $xr2, $xr0, 1 +; LA64-NEXT: vpickve2gr.h $a0, $vr2, 2 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr2, 3 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1 +; LA64-NEXT: vpickve2gr.h $a0, $vr2, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr2, 1 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1 +; LA64-NEXT: xvpermi.q $xr1, $xr3, 2 +; LA64-NEXT: vpickve2gr.h $a0, $vr2, 6 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr2, 7 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1 +; LA64-NEXT: vpickve2gr.h $a0, $vr2, 4 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr4, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr2, 5 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr4, $a0, 1 +; LA64-NEXT: xvpermi.q $xr4, $xr3, 2 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 2 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 3 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 1 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr3, $a0, 1 +; LA64-NEXT: xvpermi.q $xr3, $xr2, 2 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 6 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 7 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 4 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr5, $a0, 0 +; LA64-NEXT: vpickve2gr.h $a0, $vr0, 5 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vinsgr2vr.d $vr5, $a0, 1 +; LA64-NEXT: xvpermi.q $xr5, $xr2, 2 +; LA64-NEXT: xvst $xr5, $a1, 32 +; LA64-NEXT: xvst $xr3, $a1, 0 +; LA64-NEXT: xvst $xr4, $a1, 96 +; LA64-NEXT: xvst $xr1, $a1, 64 +; LA64-NEXT: ret +entry: + %A = load <16 x i16>, ptr %ptr + %B = zext <16 x i16> %A to <16 x i64> + store <16 x i64> %B, ptr %dst + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll index dce6dc9..cadaf2f 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx %s -o - | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 ; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s --check-prefixes=CHECK,LA64 define void @load_sext_2i8_to_2i64(ptr %ptr, ptr %dst) { @@ -21,68 +21,90 @@ entry: ret void } -define void @load_sext_4i8_to_4i32(ptr %ptr, ptr %dst) { -; CHECK-LABEL: load_sext_4i8_to_4i32: +define void @load_sext_2i16_to_2i64(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_sext_2i16_to_2i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 ; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.w $vr0, $vr0, 24 -; CHECK-NEXT: vsrai.w $vr0, $vr0, 24 +; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0 +; CHECK-NEXT: vslli.d $vr0, $vr0, 48 +; CHECK-NEXT: vsrai.d $vr0, $vr0, 48 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: - %A = load <4 x i8>, ptr %ptr - %B = sext <4 x i8> %A to <4 x i32> - store <4 x i32> %B, ptr %dst + %A = load <2 x i16>, ptr %ptr + %B = sext <2 x i16> %A to <2 x i64> + store <2 x i64> %B, ptr %dst ret void } -define void @load_sext_8i8_to_8i16(ptr %ptr, ptr %dst) { -; LA32-LABEL: load_sext_8i8_to_8i16: +define void @load_sext_2i32_to_2i64(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_sext_2i32_to_2i64: ; LA32: # %bb.0: # %entry ; LA32-NEXT: ld.w $a2, $a0, 0 ; LA32-NEXT: ld.w $a0, $a0, 4 ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 -; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 -; LA32-NEXT: vilvl.b $vr0, $vr0, $vr0 -; LA32-NEXT: vslli.h $vr0, $vr0, 8 -; LA32-NEXT: vsrai.h $vr0, $vr0, 8 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2 +; LA32-NEXT: vslli.d $vr0, $vr0, 32 +; LA32-NEXT: vsrai.d $vr0, $vr0, 32 ; LA32-NEXT: vst $vr0, $a1, 0 ; LA32-NEXT: ret ; -; LA64-LABEL: load_sext_8i8_to_8i16: +; LA64-LABEL: load_sext_2i32_to_2i64: ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vilvl.b $vr0, $vr0, $vr0 -; LA64-NEXT: vslli.h $vr0, $vr0, 8 -; LA64-NEXT: vsrai.h $vr0, $vr0, 8 +; LA64-NEXT: vshuf4i.w $vr0, $vr0, 16 +; LA64-NEXT: vslli.d $vr0, $vr0, 32 +; LA64-NEXT: vsrai.d $vr0, $vr0, 32 ; LA64-NEXT: vst $vr0, $a1, 0 ; LA64-NEXT: ret entry: - %A = load <8 x i8>, ptr %ptr - %B = sext <8 x i8> %A to <8 x i16> - store <8 x i16> %B, ptr %dst + %A = load <2 x i32>, ptr %ptr + %B = sext <2 x i32> %A to <2 x i64> + store <2 x i64> %B, ptr %dst ret void } -define void @load_sext_2i16_to_2i64(ptr %ptr, ptr %dst) { -; CHECK-LABEL: load_sext_2i16_to_2i64: +define void @load_sext_4i8_to_4i32(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_sext_4i8_to_4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 +; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 ; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0 -; CHECK-NEXT: vslli.d $vr0, $vr0, 48 -; CHECK-NEXT: vsrai.d $vr0, $vr0, 48 +; CHECK-NEXT: vslli.w $vr0, $vr0, 24 +; CHECK-NEXT: vsrai.w $vr0, $vr0, 24 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: - %A = load <2 x i16>, ptr %ptr - %B = sext <2 x i16> %A to <2 x i64> - store <2 x i64> %B, ptr %dst + %A = load <4 x i8>, ptr %ptr + %B = sext <4 x i8> %A to <4 x i32> + store <4 x i32> %B, ptr %dst + ret void +} + +define void @load_sext_4i8_to_4i64(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_sext_4i8_to_4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld.w $a0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 +; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0 +; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0 +; CHECK-NEXT: vilvl.w $vr1, $vr0, $vr0 +; CHECK-NEXT: vslli.d $vr1, $vr1, 56 +; CHECK-NEXT: vsrai.d $vr1, $vr1, 56 +; CHECK-NEXT: vilvh.w $vr0, $vr0, $vr0 +; CHECK-NEXT: vslli.d $vr0, $vr0, 56 +; CHECK-NEXT: vsrai.d $vr0, $vr0, 56 +; CHECK-NEXT: vst $vr0, $a1, 16 +; CHECK-NEXT: vst $vr1, $a1, 0 +; CHECK-NEXT: ret +entry: + %A = load <4 x i8>, ptr %ptr + %B = sext <4 x i8> %A to <4 x i64> + store <4 x i64> %B, ptr %dst ret void } @@ -115,31 +137,270 @@ entry: ret void } -define void @load_sext_2i32_to_2i64(ptr %ptr, ptr %dst) { -; LA32-LABEL: load_sext_2i32_to_2i64: +define void @load_sext_4i16_to_4i64(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_sext_4i16_to_4i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.w $a2, $a0, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: vilvl.h $vr0, $vr0, $vr0 +; LA32-NEXT: vilvl.w $vr0, $vr0, $vr0 +; LA32-NEXT: vslli.d $vr0, $vr0, 48 +; LA32-NEXT: vsrai.d $vr0, $vr0, 48 +; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0 +; LA32-NEXT: vilvl.h $vr1, $vr1, $vr1 +; LA32-NEXT: vilvl.w $vr1, $vr1, $vr1 +; LA32-NEXT: vslli.d $vr1, $vr1, 48 +; LA32-NEXT: vsrai.d $vr1, $vr1, 48 +; LA32-NEXT: vst $vr1, $a1, 16 +; LA32-NEXT: vst $vr0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_sext_4i16_to_4i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vilvl.h $vr0, $vr0, $vr0 +; LA64-NEXT: vilvl.w $vr1, $vr0, $vr0 +; LA64-NEXT: vslli.d $vr1, $vr1, 48 +; LA64-NEXT: vsrai.d $vr1, $vr1, 48 +; LA64-NEXT: vilvh.w $vr0, $vr0, $vr0 +; LA64-NEXT: vslli.d $vr0, $vr0, 48 +; LA64-NEXT: vsrai.d $vr0, $vr0, 48 +; LA64-NEXT: vst $vr0, $a1, 16 +; LA64-NEXT: vst $vr1, $a1, 0 +; LA64-NEXT: ret +entry: + %A = load <4 x i16>, ptr %ptr + %B = sext <4 x i16> %A to <4 x i64> + store <4 x i64> %B, ptr %dst + ret void +} + +define void @load_sext_4i32_to_4i64(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_sext_4i32_to_4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 16 +; CHECK-NEXT: vslli.d $vr1, $vr1, 32 +; CHECK-NEXT: vsrai.d $vr1, $vr1, 32 +; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 50 +; CHECK-NEXT: vslli.d $vr0, $vr0, 32 +; CHECK-NEXT: vsrai.d $vr0, $vr0, 32 +; CHECK-NEXT: vst $vr0, $a1, 16 +; CHECK-NEXT: vst $vr1, $a1, 0 +; CHECK-NEXT: ret +entry: + %A = load <4 x i32>, ptr %ptr + %B = sext <4 x i32> %A to <4 x i64> + store <4 x i64> %B, ptr %dst + ret void +} + +define void @load_sext_8i8_to_8i16(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_sext_8i8_to_8i16: ; LA32: # %bb.0: # %entry ; LA32-NEXT: ld.w $a2, $a0, 0 ; LA32-NEXT: ld.w $a0, $a0, 4 ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 -; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2 -; LA32-NEXT: vslli.d $vr0, $vr0, 32 -; LA32-NEXT: vsrai.d $vr0, $vr0, 32 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vilvl.b $vr0, $vr0, $vr0 +; LA32-NEXT: vslli.h $vr0, $vr0, 8 +; LA32-NEXT: vsrai.h $vr0, $vr0, 8 ; LA32-NEXT: vst $vr0, $a1, 0 ; LA32-NEXT: ret ; -; LA64-LABEL: load_sext_2i32_to_2i64: +; LA64-LABEL: load_sext_8i8_to_8i16: ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; LA64-NEXT: vshuf4i.w $vr0, $vr0, 16 -; LA64-NEXT: vslli.d $vr0, $vr0, 32 -; LA64-NEXT: vsrai.d $vr0, $vr0, 32 +; LA64-NEXT: vilvl.b $vr0, $vr0, $vr0 +; LA64-NEXT: vslli.h $vr0, $vr0, 8 +; LA64-NEXT: vsrai.h $vr0, $vr0, 8 ; LA64-NEXT: vst $vr0, $a1, 0 ; LA64-NEXT: ret entry: - %A = load <2 x i32>, ptr %ptr - %B = sext <2 x i32> %A to <2 x i64> - store <2 x i64> %B, ptr %dst + %A = load <8 x i8>, ptr %ptr + %B = sext <8 x i8> %A to <8 x i16> + store <8 x i16> %B, ptr %dst + ret void +} + +define void @load_sext_8i8_to_8i32(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_sext_8i8_to_8i32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.w $a2, $a0, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: vilvl.b $vr0, $vr0, $vr0 +; LA32-NEXT: vilvl.h $vr0, $vr0, $vr0 +; LA32-NEXT: vslli.w $vr0, $vr0, 24 +; LA32-NEXT: vsrai.w $vr0, $vr0, 24 +; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0 +; LA32-NEXT: vilvl.b $vr1, $vr1, $vr1 +; LA32-NEXT: vilvl.h $vr1, $vr1, $vr1 +; LA32-NEXT: vslli.w $vr1, $vr1, 24 +; LA32-NEXT: vsrai.w $vr1, $vr1, 24 +; LA32-NEXT: vst $vr1, $a1, 16 +; LA32-NEXT: vst $vr0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_sext_8i8_to_8i32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vilvl.b $vr0, $vr0, $vr0 +; LA64-NEXT: vilvl.h $vr1, $vr0, $vr0 +; LA64-NEXT: vslli.w $vr1, $vr1, 24 +; LA64-NEXT: vsrai.w $vr1, $vr1, 24 +; LA64-NEXT: vilvh.h $vr0, $vr0, $vr0 +; LA64-NEXT: vslli.w $vr0, $vr0, 24 +; LA64-NEXT: vsrai.w $vr0, $vr0, 24 +; LA64-NEXT: vst $vr0, $a1, 16 +; LA64-NEXT: vst $vr1, $a1, 0 +; LA64-NEXT: ret +entry: + %A = load <8 x i8>, ptr %ptr + %B = sext <8 x i8> %A to <8 x i32> + store <8 x i32> %B, ptr %dst + ret void +} + +define void @load_sext_8i8_to_8i64(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_sext_8i8_to_8i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.w $a2, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vilvl.b $vr0, $vr0, $vr0 +; LA32-NEXT: vilvl.h $vr1, $vr0, $vr0 +; LA32-NEXT: vilvl.w $vr2, $vr1, $vr1 +; LA32-NEXT: vslli.d $vr2, $vr2, 56 +; LA32-NEXT: vsrai.d $vr2, $vr2, 56 +; LA32-NEXT: vilvh.w $vr1, $vr1, $vr1 +; LA32-NEXT: vslli.d $vr1, $vr1, 56 +; LA32-NEXT: vsrai.d $vr1, $vr1, 56 +; LA32-NEXT: vilvh.h $vr0, $vr0, $vr0 +; LA32-NEXT: vilvl.w $vr3, $vr0, $vr0 +; LA32-NEXT: vslli.d $vr3, $vr3, 56 +; LA32-NEXT: vsrai.d $vr3, $vr3, 56 +; LA32-NEXT: vilvh.w $vr0, $vr0, $vr0 +; LA32-NEXT: vslli.d $vr0, $vr0, 56 +; LA32-NEXT: vsrai.d $vr0, $vr0, 56 +; LA32-NEXT: vst $vr0, $a1, 48 +; LA32-NEXT: vst $vr3, $a1, 32 +; LA32-NEXT: vst $vr1, $a1, 16 +; LA32-NEXT: vst $vr2, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_sext_8i8_to_8i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vilvl.b $vr0, $vr0, $vr0 +; LA64-NEXT: vilvl.h $vr1, $vr0, $vr0 +; LA64-NEXT: vilvl.w $vr2, $vr1, $vr1 +; LA64-NEXT: vslli.d $vr2, $vr2, 56 +; LA64-NEXT: vsrai.d $vr2, $vr2, 56 +; LA64-NEXT: vilvh.w $vr1, $vr1, $vr1 +; LA64-NEXT: vslli.d $vr1, $vr1, 56 +; LA64-NEXT: vsrai.d $vr1, $vr1, 56 +; LA64-NEXT: vilvh.h $vr0, $vr0, $vr0 +; LA64-NEXT: vilvl.w $vr3, $vr0, $vr0 +; LA64-NEXT: vslli.d $vr3, $vr3, 56 +; LA64-NEXT: vsrai.d $vr3, $vr3, 56 +; LA64-NEXT: vilvh.w $vr0, $vr0, $vr0 +; LA64-NEXT: vslli.d $vr0, $vr0, 56 +; LA64-NEXT: vsrai.d $vr0, $vr0, 56 +; LA64-NEXT: vst $vr0, $a1, 48 +; LA64-NEXT: vst $vr3, $a1, 32 +; LA64-NEXT: vst $vr1, $a1, 16 +; LA64-NEXT: vst $vr2, $a1, 0 +; LA64-NEXT: ret +entry: + %A = load <8 x i8>, ptr %ptr + %B = sext <8 x i8> %A to <8 x i64> + store <8 x i64> %B, ptr %dst + ret void +} + +define void @load_sext_8i16_to_8i32(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_sext_8i16_to_8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vilvl.h $vr1, $vr0, $vr0 +; CHECK-NEXT: vslli.w $vr1, $vr1, 16 +; CHECK-NEXT: vsrai.w $vr1, $vr1, 16 +; CHECK-NEXT: vilvh.h $vr0, $vr0, $vr0 +; CHECK-NEXT: vslli.w $vr0, $vr0, 16 +; CHECK-NEXT: vsrai.w $vr0, $vr0, 16 +; CHECK-NEXT: vst $vr0, $a1, 16 +; CHECK-NEXT: vst $vr1, $a1, 0 +; CHECK-NEXT: ret +entry: + %A = load <8 x i16>, ptr %ptr + %B = sext <8 x i16> %A to <8 x i32> + store <8 x i32> %B, ptr %dst + ret void +} + +define void @load_sext_8i16_to_8i64(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_sext_8i16_to_8i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vilvl.h $vr1, $vr0, $vr0 +; CHECK-NEXT: vilvl.w $vr2, $vr1, $vr1 +; CHECK-NEXT: vslli.d $vr2, $vr2, 48 +; CHECK-NEXT: vsrai.d $vr2, $vr2, 48 +; CHECK-NEXT: vilvh.w $vr1, $vr1, $vr1 +; CHECK-NEXT: vslli.d $vr1, $vr1, 48 +; CHECK-NEXT: vsrai.d $vr1, $vr1, 48 +; CHECK-NEXT: vilvh.h $vr0, $vr0, $vr0 +; CHECK-NEXT: vilvl.w $vr3, $vr0, $vr0 +; CHECK-NEXT: vslli.d $vr3, $vr3, 48 +; CHECK-NEXT: vsrai.d $vr3, $vr3, 48 +; CHECK-NEXT: vilvh.w $vr0, $vr0, $vr0 +; CHECK-NEXT: vslli.d $vr0, $vr0, 48 +; CHECK-NEXT: vsrai.d $vr0, $vr0, 48 +; CHECK-NEXT: vst $vr0, $a1, 48 +; CHECK-NEXT: vst $vr3, $a1, 32 +; CHECK-NEXT: vst $vr1, $a1, 16 +; CHECK-NEXT: vst $vr2, $a1, 0 +; CHECK-NEXT: ret +entry: + %A = load <8 x i16>, ptr %ptr + %B = sext <8 x i16> %A to <8 x i64> + store <8 x i64> %B, ptr %dst + ret void +} + +define void @load_sext_8i32_to_8i64(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_sext_8i32_to_8i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vld $vr1, $a0, 16 +; CHECK-NEXT: vshuf4i.w $vr2, $vr0, 16 +; CHECK-NEXT: vslli.d $vr2, $vr2, 32 +; CHECK-NEXT: vsrai.d $vr2, $vr2, 32 +; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 50 +; CHECK-NEXT: vslli.d $vr0, $vr0, 32 +; CHECK-NEXT: vsrai.d $vr0, $vr0, 32 +; CHECK-NEXT: vshuf4i.w $vr3, $vr1, 16 +; CHECK-NEXT: vslli.d $vr3, $vr3, 32 +; CHECK-NEXT: vsrai.d $vr3, $vr3, 32 +; CHECK-NEXT: vshuf4i.w $vr1, $vr1, 50 +; CHECK-NEXT: vslli.d $vr1, $vr1, 32 +; CHECK-NEXT: vsrai.d $vr1, $vr1, 32 +; CHECK-NEXT: vst $vr1, $a1, 48 +; CHECK-NEXT: vst $vr3, $a1, 32 +; CHECK-NEXT: vst $vr0, $a1, 16 +; CHECK-NEXT: vst $vr2, $a1, 0 +; CHECK-NEXT: ret +entry: + %A = load <8 x i32>, ptr %ptr + %B = sext <8 x i32> %A to <8 x i64> + store <8 x i64> %B, ptr %dst ret void } @@ -243,72 +504,80 @@ entry: ret void } -define void @load_sext_8i16_to_8i32(ptr %ptr, ptr %dst) { -; CHECK-LABEL: load_sext_8i16_to_8i32: +define void @load_sext_16i16_to_16i32(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_sext_16i16_to_16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vilvl.h $vr1, $vr0, $vr0 -; CHECK-NEXT: vslli.w $vr1, $vr1, 16 -; CHECK-NEXT: vsrai.w $vr1, $vr1, 16 +; CHECK-NEXT: vld $vr1, $a0, 16 +; CHECK-NEXT: vilvl.h $vr2, $vr0, $vr0 +; CHECK-NEXT: vslli.w $vr2, $vr2, 16 +; CHECK-NEXT: vsrai.w $vr2, $vr2, 16 ; CHECK-NEXT: vilvh.h $vr0, $vr0, $vr0 ; CHECK-NEXT: vslli.w $vr0, $vr0, 16 ; CHECK-NEXT: vsrai.w $vr0, $vr0, 16 +; CHECK-NEXT: vilvl.h $vr3, $vr1, $vr1 +; CHECK-NEXT: vslli.w $vr3, $vr3, 16 +; CHECK-NEXT: vsrai.w $vr3, $vr3, 16 +; CHECK-NEXT: vilvh.h $vr1, $vr1, $vr1 +; CHECK-NEXT: vslli.w $vr1, $vr1, 16 +; CHECK-NEXT: vsrai.w $vr1, $vr1, 16 +; CHECK-NEXT: vst $vr1, $a1, 48 +; CHECK-NEXT: vst $vr3, $a1, 32 ; CHECK-NEXT: vst $vr0, $a1, 16 -; CHECK-NEXT: vst $vr1, $a1, 0 +; CHECK-NEXT: vst $vr2, $a1, 0 ; CHECK-NEXT: ret entry: - %A = load <8 x i16>, ptr %ptr - %B = sext <8 x i16> %A to <8 x i32> - store <8 x i32> %B, ptr %dst + %A = load <16 x i16>, ptr %ptr + %B = sext <16 x i16> %A to <16 x i32> + store <16 x i32> %B, ptr %dst ret void } -define void @load_sext_8i16_to_8i64(ptr %ptr, ptr %dst) { -; CHECK-LABEL: load_sext_8i16_to_8i64: +define void @load_sext_16i16_to_16i64(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_sext_16i16_to_16i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vilvl.h $vr1, $vr0, $vr0 -; CHECK-NEXT: vilvl.w $vr2, $vr1, $vr1 +; CHECK-NEXT: vld $vr1, $a0, 16 +; CHECK-NEXT: vilvl.h $vr2, $vr0, $vr0 +; CHECK-NEXT: vilvl.w $vr3, $vr2, $vr2 +; CHECK-NEXT: vslli.d $vr3, $vr3, 48 +; CHECK-NEXT: vsrai.d $vr3, $vr3, 48 +; CHECK-NEXT: vilvh.w $vr2, $vr2, $vr2 ; CHECK-NEXT: vslli.d $vr2, $vr2, 48 ; CHECK-NEXT: vsrai.d $vr2, $vr2, 48 -; CHECK-NEXT: vilvh.w $vr1, $vr1, $vr1 -; CHECK-NEXT: vslli.d $vr1, $vr1, 48 -; CHECK-NEXT: vsrai.d $vr1, $vr1, 48 ; CHECK-NEXT: vilvh.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vilvl.w $vr3, $vr0, $vr0 -; CHECK-NEXT: vslli.d $vr3, $vr3, 48 -; CHECK-NEXT: vsrai.d $vr3, $vr3, 48 +; CHECK-NEXT: vilvl.w $vr4, $vr0, $vr0 +; CHECK-NEXT: vslli.d $vr4, $vr4, 48 +; CHECK-NEXT: vsrai.d $vr4, $vr4, 48 ; CHECK-NEXT: vilvh.w $vr0, $vr0, $vr0 ; CHECK-NEXT: vslli.d $vr0, $vr0, 48 ; CHECK-NEXT: vsrai.d $vr0, $vr0, 48 +; CHECK-NEXT: vilvl.h $vr5, $vr1, $vr1 +; CHECK-NEXT: vilvl.w $vr6, $vr5, $vr5 +; CHECK-NEXT: vslli.d $vr6, $vr6, 48 +; CHECK-NEXT: vsrai.d $vr6, $vr6, 48 +; CHECK-NEXT: vilvh.w $vr5, $vr5, $vr5 +; CHECK-NEXT: vslli.d $vr5, $vr5, 48 +; CHECK-NEXT: vsrai.d $vr5, $vr5, 48 +; CHECK-NEXT: vilvh.h $vr1, $vr1, $vr1 +; CHECK-NEXT: vilvl.w $vr7, $vr1, $vr1 +; CHECK-NEXT: vslli.d $vr7, $vr7, 48 +; CHECK-NEXT: vsrai.d $vr7, $vr7, 48 +; CHECK-NEXT: vilvh.w $vr1, $vr1, $vr1 +; CHECK-NEXT: vslli.d $vr1, $vr1, 48 +; CHECK-NEXT: vsrai.d $vr1, $vr1, 48 +; CHECK-NEXT: vst $vr1, $a1, 112 +; CHECK-NEXT: vst $vr7, $a1, 96 +; CHECK-NEXT: vst $vr5, $a1, 80 +; CHECK-NEXT: vst $vr6, $a1, 64 ; CHECK-NEXT: vst $vr0, $a1, 48 -; CHECK-NEXT: vst $vr3, $a1, 32 -; CHECK-NEXT: vst $vr1, $a1, 16 -; CHECK-NEXT: vst $vr2, $a1, 0 -; CHECK-NEXT: ret -entry: - %A = load <8 x i16>, ptr %ptr - %B = sext <8 x i16> %A to <8 x i64> - store <8 x i64> %B, ptr %dst - ret void -} - -define void @load_sext_4i32_to_4i64(ptr %ptr, ptr %dst) { -; CHECK-LABEL: load_sext_4i32_to_4i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 16 -; CHECK-NEXT: vslli.d $vr1, $vr1, 32 -; CHECK-NEXT: vsrai.d $vr1, $vr1, 32 -; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 50 -; CHECK-NEXT: vslli.d $vr0, $vr0, 32 -; CHECK-NEXT: vsrai.d $vr0, $vr0, 32 -; CHECK-NEXT: vst $vr0, $a1, 16 -; CHECK-NEXT: vst $vr1, $a1, 0 +; CHECK-NEXT: vst $vr4, $a1, 32 +; CHECK-NEXT: vst $vr2, $a1, 16 +; CHECK-NEXT: vst $vr3, $a1, 0 ; CHECK-NEXT: ret entry: - %A = load <4 x i32>, ptr %ptr - %B = sext <4 x i32> %A to <4 x i64> - store <4 x i64> %B, ptr %dst + %A = load <16 x i16>, ptr %ptr + %B = sext <16 x i16> %A to <16 x i64> + store <16 x i64> %B, ptr %dst ret void } diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll index 602c0f1..2ace0bf 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll @@ -2,7 +2,6 @@ ; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 ; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s --check-prefixes=CHECK,LA64 - define void @load_zext_2i8_to_2i64(ptr %ptr, ptr %dst) { ; CHECK-LABEL: load_zext_2i8_to_2i64: ; CHECK: # %bb.0: # %entry @@ -21,64 +20,83 @@ entry: ret void } -define void @load_zext_4i8_to_4i32(ptr %ptr, ptr %dst) { -; CHECK-LABEL: load_zext_4i8_to_4i32: +define void @load_zext_2i16_to_2i64(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_zext_2i16_to_2i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 ; CHECK-NEXT: vrepli.b $vr1, 0 -; CHECK-NEXT: vilvl.b $vr0, $vr1, $vr0 ; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vilvl.w $vr0, $vr1, $vr0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: - %A = load <4 x i8>, ptr %ptr - %B = zext <4 x i8> %A to <4 x i32> - store <4 x i32> %B, ptr %dst + %A = load <2 x i16>, ptr %ptr + %B = zext <2 x i16> %A to <2 x i64> + store <2 x i64> %B, ptr %dst ret void } -define void @load_zext_8i8_to_8i16(ptr %ptr, ptr %dst) { -; LA32-LABEL: load_zext_8i8_to_8i16: +define void @load_zext_2i32_to_2i64(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_zext_2i32_to_2i64: ; LA32: # %bb.0: # %entry ; LA32-NEXT: ld.w $a2, $a0, 0 ; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: vrepli.b $vr0, 0 ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 -; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 -; LA32-NEXT: vrepli.b $vr1, 0 -; LA32-NEXT: vilvl.b $vr0, $vr1, $vr0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2 ; LA32-NEXT: vst $vr0, $a1, 0 ; LA32-NEXT: ret ; -; LA64-LABEL: load_zext_8i8_to_8i16: +; LA64-LABEL: load_zext_2i32_to_2i64: ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 ; LA64-NEXT: vrepli.b $vr1, 0 -; LA64-NEXT: vilvl.b $vr0, $vr1, $vr0 +; LA64-NEXT: vilvl.w $vr0, $vr1, $vr0 ; LA64-NEXT: vst $vr0, $a1, 0 ; LA64-NEXT: ret entry: - %A = load <8 x i8>, ptr %ptr - %B = zext <8 x i8> %A to <8 x i16> - store <8 x i16> %B, ptr %dst + %A = load <2 x i32>, ptr %ptr + %B = zext <2 x i32> %A to <2 x i64> + store <2 x i64> %B, ptr %dst ret void } -define void @load_zext_2i16_to_2i64(ptr %ptr, ptr %dst) { -; CHECK-LABEL: load_zext_2i16_to_2i64: +define void @load_zext_4i8_to_4i32(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_zext_4i8_to_4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 ; CHECK-NEXT: vrepli.b $vr1, 0 +; CHECK-NEXT: vilvl.b $vr0, $vr1, $vr0 ; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0 -; CHECK-NEXT: vilvl.w $vr0, $vr1, $vr0 ; CHECK-NEXT: vst $vr0, $a1, 0 ; CHECK-NEXT: ret entry: - %A = load <2 x i16>, ptr %ptr - %B = zext <2 x i16> %A to <2 x i64> - store <2 x i64> %B, ptr %dst + %A = load <4 x i8>, ptr %ptr + %B = zext <4 x i8> %A to <4 x i32> + store <4 x i32> %B, ptr %dst + ret void +} + +define void @load_zext_4i8_to_4i64(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_zext_4i8_to_4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld.w $a0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 +; CHECK-NEXT: vrepli.b $vr1, 0 +; CHECK-NEXT: vilvl.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vilvl.w $vr2, $vr1, $vr0 +; CHECK-NEXT: vilvh.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 16 +; CHECK-NEXT: vst $vr2, $a1, 0 +; CHECK-NEXT: ret +entry: + %A = load <4 x i8>, ptr %ptr + %B = zext <4 x i8> %A to <4 x i64> + store <4 x i64> %B, ptr %dst ret void } @@ -109,29 +127,222 @@ entry: ret void } -define void @load_zext_2i32_to_2i64(ptr %ptr, ptr %dst) { -; LA32-LABEL: load_zext_2i32_to_2i64: +define void @load_zext_4i16_to_4i64(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_zext_4i16_to_4i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.w $a2, $a0, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: vrepli.b $vr1, 0 +; LA32-NEXT: vilvl.h $vr0, $vr1, $vr0 +; LA32-NEXT: vilvl.w $vr0, $vr1, $vr0 +; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 0 +; LA32-NEXT: vilvl.h $vr2, $vr1, $vr2 +; LA32-NEXT: vilvl.w $vr1, $vr1, $vr2 +; LA32-NEXT: vst $vr1, $a1, 16 +; LA32-NEXT: vst $vr0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_zext_4i16_to_4i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vrepli.b $vr1, 0 +; LA64-NEXT: vilvl.h $vr0, $vr1, $vr0 +; LA64-NEXT: vilvl.w $vr2, $vr1, $vr0 +; LA64-NEXT: vilvh.w $vr0, $vr1, $vr0 +; LA64-NEXT: vst $vr0, $a1, 16 +; LA64-NEXT: vst $vr2, $a1, 0 +; LA64-NEXT: ret +entry: + %A = load <4 x i16>, ptr %ptr + %B = zext <4 x i16> %A to <4 x i64> + store <4 x i64> %B, ptr %dst + ret void +} + +define void @load_zext_4i32_to_4i64(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_zext_4i32_to_4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vrepli.b $vr1, 0 +; CHECK-NEXT: vilvl.w $vr2, $vr1, $vr0 +; CHECK-NEXT: vilvh.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 16 +; CHECK-NEXT: vst $vr2, $a1, 0 +; CHECK-NEXT: ret +entry: + %A = load <4 x i32>, ptr %ptr + %B = zext <4 x i32> %A to <4 x i64> + store <4 x i64> %B, ptr %dst + ret void +} + +define void @load_zext_8i8_to_8i16(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_zext_8i8_to_8i16: ; LA32: # %bb.0: # %entry ; LA32-NEXT: ld.w $a2, $a0, 0 ; LA32-NEXT: ld.w $a0, $a0, 4 -; LA32-NEXT: vrepli.b $vr0, 0 ; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 -; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vrepli.b $vr1, 0 +; LA32-NEXT: vilvl.b $vr0, $vr1, $vr0 ; LA32-NEXT: vst $vr0, $a1, 0 ; LA32-NEXT: ret ; -; LA64-LABEL: load_zext_2i32_to_2i64: +; LA64-LABEL: load_zext_8i8_to_8i16: ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a0, $a0, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 ; LA64-NEXT: vrepli.b $vr1, 0 -; LA64-NEXT: vilvl.w $vr0, $vr1, $vr0 +; LA64-NEXT: vilvl.b $vr0, $vr1, $vr0 ; LA64-NEXT: vst $vr0, $a1, 0 ; LA64-NEXT: ret entry: - %A = load <2 x i32>, ptr %ptr - %B = zext <2 x i32> %A to <2 x i64> - store <2 x i64> %B, ptr %dst + %A = load <8 x i8>, ptr %ptr + %B = zext <8 x i8> %A to <8 x i16> + store <8 x i16> %B, ptr %dst + ret void +} + +define void @load_zext_8i8_to_8i32(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_zext_8i8_to_8i32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.w $a2, $a0, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: vrepli.b $vr1, 0 +; LA32-NEXT: vilvl.b $vr0, $vr1, $vr0 +; LA32-NEXT: vilvl.h $vr0, $vr1, $vr0 +; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 0 +; LA32-NEXT: vilvl.b $vr2, $vr1, $vr2 +; LA32-NEXT: vilvl.h $vr1, $vr1, $vr2 +; LA32-NEXT: vst $vr1, $a1, 16 +; LA32-NEXT: vst $vr0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_zext_8i8_to_8i32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vrepli.b $vr1, 0 +; LA64-NEXT: vilvl.b $vr0, $vr1, $vr0 +; LA64-NEXT: vilvl.h $vr2, $vr1, $vr0 +; LA64-NEXT: vilvh.h $vr0, $vr1, $vr0 +; LA64-NEXT: vst $vr0, $a1, 16 +; LA64-NEXT: vst $vr2, $a1, 0 +; LA64-NEXT: ret +entry: + %A = load <8 x i8>, ptr %ptr + %B = zext <8 x i8> %A to <8 x i32> + store <8 x i32> %B, ptr %dst + ret void +} + +define void @load_zext_8i8_to_8i64(ptr %ptr, ptr %dst) { +; LA32-LABEL: load_zext_8i8_to_8i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.w $a2, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vrepli.b $vr1, 0 +; LA32-NEXT: vilvl.b $vr0, $vr1, $vr0 +; LA32-NEXT: vilvl.h $vr2, $vr1, $vr0 +; LA32-NEXT: vilvl.w $vr3, $vr1, $vr2 +; LA32-NEXT: vilvh.w $vr2, $vr1, $vr2 +; LA32-NEXT: vilvh.h $vr0, $vr1, $vr0 +; LA32-NEXT: vilvl.w $vr4, $vr1, $vr0 +; LA32-NEXT: vilvh.w $vr0, $vr1, $vr0 +; LA32-NEXT: vst $vr0, $a1, 48 +; LA32-NEXT: vst $vr4, $a1, 32 +; LA32-NEXT: vst $vr2, $a1, 16 +; LA32-NEXT: vst $vr3, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_zext_8i8_to_8i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vrepli.b $vr1, 0 +; LA64-NEXT: vilvl.b $vr0, $vr1, $vr0 +; LA64-NEXT: vilvl.h $vr2, $vr1, $vr0 +; LA64-NEXT: vilvl.w $vr3, $vr1, $vr2 +; LA64-NEXT: vilvh.w $vr2, $vr1, $vr2 +; LA64-NEXT: vilvh.h $vr0, $vr1, $vr0 +; LA64-NEXT: vilvl.w $vr4, $vr1, $vr0 +; LA64-NEXT: vilvh.w $vr0, $vr1, $vr0 +; LA64-NEXT: vst $vr0, $a1, 48 +; LA64-NEXT: vst $vr4, $a1, 32 +; LA64-NEXT: vst $vr2, $a1, 16 +; LA64-NEXT: vst $vr3, $a1, 0 +; LA64-NEXT: ret +entry: + %A = load <8 x i8>, ptr %ptr + %B = zext <8 x i8> %A to <8 x i64> + store <8 x i64> %B, ptr %dst + ret void +} + +define void @load_zext_8i16_to_8i32(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_zext_8i16_to_8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vrepli.b $vr1, 0 +; CHECK-NEXT: vilvl.h $vr2, $vr1, $vr0 +; CHECK-NEXT: vilvh.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 16 +; CHECK-NEXT: vst $vr2, $a1, 0 +; CHECK-NEXT: ret +entry: + %A = load <8 x i16>, ptr %ptr + %B = zext <8 x i16> %A to <8 x i32> + store <8 x i32> %B, ptr %dst + ret void +} + +define void @load_zext_8i16_to_8i64(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_zext_8i16_to_8i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vrepli.b $vr1, 0 +; CHECK-NEXT: vilvl.h $vr2, $vr1, $vr0 +; CHECK-NEXT: vilvl.w $vr3, $vr1, $vr2 +; CHECK-NEXT: vilvh.w $vr2, $vr1, $vr2 +; CHECK-NEXT: vilvh.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vilvl.w $vr4, $vr1, $vr0 +; CHECK-NEXT: vilvh.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 48 +; CHECK-NEXT: vst $vr4, $a1, 32 +; CHECK-NEXT: vst $vr2, $a1, 16 +; CHECK-NEXT: vst $vr3, $a1, 0 +; CHECK-NEXT: ret +entry: + %A = load <8 x i16>, ptr %ptr + %B = zext <8 x i16> %A to <8 x i64> + store <8 x i64> %B, ptr %dst + ret void +} + +define void @load_zext_8i32_to_8i64(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_zext_8i32_to_8i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vld $vr1, $a0, 16 +; CHECK-NEXT: vrepli.b $vr2, 0 +; CHECK-NEXT: vilvl.w $vr3, $vr2, $vr0 +; CHECK-NEXT: vilvh.w $vr0, $vr2, $vr0 +; CHECK-NEXT: vilvl.w $vr4, $vr2, $vr1 +; CHECK-NEXT: vilvh.w $vr1, $vr2, $vr1 +; CHECK-NEXT: vst $vr1, $a1, 48 +; CHECK-NEXT: vst $vr4, $a1, 32 +; CHECK-NEXT: vst $vr0, $a1, 16 +; CHECK-NEXT: vst $vr3, $a1, 0 +; CHECK-NEXT: ret +entry: + %A = load <8 x i32>, ptr %ptr + %B = zext <8 x i32> %A to <8 x i64> + store <8 x i64> %B, ptr %dst ret void } @@ -210,59 +421,58 @@ entry: ret void } -define void @load_zext_8i16_to_8i32(ptr %ptr, ptr %dst) { -; CHECK-LABEL: load_zext_8i16_to_8i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vrepli.b $vr1, 0 -; CHECK-NEXT: vilvl.h $vr2, $vr1, $vr0 -; CHECK-NEXT: vilvh.h $vr0, $vr1, $vr0 -; CHECK-NEXT: vst $vr0, $a1, 16 -; CHECK-NEXT: vst $vr2, $a1, 0 -; CHECK-NEXT: ret -entry: - %A = load <8 x i16>, ptr %ptr - %B = zext <8 x i16> %A to <8 x i32> - store <8 x i32> %B, ptr %dst - ret void -} - -define void @load_zext_8i16_to_8i64(ptr %ptr, ptr %dst) { -; CHECK-LABEL: load_zext_8i16_to_8i64: +define void @load_zext_16i16_to_16i32(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_zext_16i16_to_16i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vrepli.b $vr1, 0 -; CHECK-NEXT: vilvl.h $vr2, $vr1, $vr0 -; CHECK-NEXT: vilvl.w $vr3, $vr1, $vr2 -; CHECK-NEXT: vilvh.w $vr2, $vr1, $vr2 -; CHECK-NEXT: vilvh.h $vr0, $vr1, $vr0 -; CHECK-NEXT: vilvl.w $vr4, $vr1, $vr0 -; CHECK-NEXT: vilvh.w $vr0, $vr1, $vr0 -; CHECK-NEXT: vst $vr0, $a1, 48 +; CHECK-NEXT: vld $vr1, $a0, 16 +; CHECK-NEXT: vrepli.b $vr2, 0 +; CHECK-NEXT: vilvl.h $vr3, $vr2, $vr0 +; CHECK-NEXT: vilvh.h $vr0, $vr2, $vr0 +; CHECK-NEXT: vilvl.h $vr4, $vr2, $vr1 +; CHECK-NEXT: vilvh.h $vr1, $vr2, $vr1 +; CHECK-NEXT: vst $vr1, $a1, 48 ; CHECK-NEXT: vst $vr4, $a1, 32 -; CHECK-NEXT: vst $vr2, $a1, 16 +; CHECK-NEXT: vst $vr0, $a1, 16 ; CHECK-NEXT: vst $vr3, $a1, 0 ; CHECK-NEXT: ret entry: - %A = load <8 x i16>, ptr %ptr - %B = zext <8 x i16> %A to <8 x i64> - store <8 x i64> %B, ptr %dst + %A = load <16 x i16>, ptr %ptr + %B = zext <16 x i16> %A to <16 x i32> + store <16 x i32> %B, ptr %dst ret void } -define void @load_zext_4i32_to_4i64(ptr %ptr, ptr %dst) { -; CHECK-LABEL: load_zext_4i32_to_4i64: +define void @load_zext_16i16_to_16i64(ptr %ptr, ptr %dst) { +; CHECK-LABEL: load_zext_16i16_to_16i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vrepli.b $vr1, 0 -; CHECK-NEXT: vilvl.w $vr2, $vr1, $vr0 -; CHECK-NEXT: vilvh.w $vr0, $vr1, $vr0 -; CHECK-NEXT: vst $vr0, $a1, 16 -; CHECK-NEXT: vst $vr2, $a1, 0 +; CHECK-NEXT: vld $vr1, $a0, 16 +; CHECK-NEXT: vrepli.b $vr2, 0 +; CHECK-NEXT: vilvl.h $vr3, $vr2, $vr0 +; CHECK-NEXT: vilvl.w $vr4, $vr2, $vr3 +; CHECK-NEXT: vilvh.w $vr3, $vr2, $vr3 +; CHECK-NEXT: vilvh.h $vr0, $vr2, $vr0 +; CHECK-NEXT: vilvl.w $vr5, $vr2, $vr0 +; CHECK-NEXT: vilvh.w $vr0, $vr2, $vr0 +; CHECK-NEXT: vilvl.h $vr6, $vr2, $vr1 +; CHECK-NEXT: vilvl.w $vr7, $vr2, $vr6 +; CHECK-NEXT: vilvh.w $vr6, $vr2, $vr6 +; CHECK-NEXT: vilvh.h $vr1, $vr2, $vr1 +; CHECK-NEXT: vilvl.w $vr8, $vr2, $vr1 +; CHECK-NEXT: vilvh.w $vr1, $vr2, $vr1 +; CHECK-NEXT: vst $vr1, $a1, 112 +; CHECK-NEXT: vst $vr8, $a1, 96 +; CHECK-NEXT: vst $vr6, $a1, 80 +; CHECK-NEXT: vst $vr7, $a1, 64 +; CHECK-NEXT: vst $vr0, $a1, 48 +; CHECK-NEXT: vst $vr5, $a1, 32 +; CHECK-NEXT: vst $vr3, $a1, 16 +; CHECK-NEXT: vst $vr4, $a1, 0 ; CHECK-NEXT: ret entry: - %A = load <4 x i32>, ptr %ptr - %B = zext <4 x i32> %A to <4 x i64> - store <4 x i64> %B, ptr %dst + %A = load <16 x i16>, ptr %ptr + %B = zext <16 x i16> %A to <16 x i64> + store <16 x i64> %B, ptr %dst ret void } |