; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc -mtriple=powerpc64le < %s | FileCheck %s ; Legal define <4 x i32> @udiv_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i1> %m) { ; CHECK-LABEL: udiv_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: xxleqv 32, 32, 32 ; CHECK-NEXT: vspltisw 5, 1 ; CHECK-NEXT: xxsldwi 1, 34, 34, 1 ; CHECK-NEXT: vslw 4, 4, 0 ; CHECK-NEXT: xxswapd 4, 34 ; CHECK-NEXT: xxsldwi 6, 34, 34, 3 ; CHECK-NEXT: mffprwz 4, 1 ; CHECK-NEXT: vsraw 4, 4, 0 ; CHECK-NEXT: xxsel 0, 37, 35, 36 ; CHECK-NEXT: xxsldwi 2, 0, 0, 1 ; CHECK-NEXT: xxswapd 3, 0 ; CHECK-NEXT: xxsldwi 5, 0, 0, 3 ; CHECK-NEXT: mffprwz 3, 2 ; CHECK-NEXT: mffprwz 5, 3 ; CHECK-NEXT: divwu 3, 4, 3 ; CHECK-NEXT: mffprwz 4, 4 ; CHECK-NEXT: divwu 4, 4, 5 ; CHECK-NEXT: mfvsrwz 5, 34 ; CHECK-NEXT: rldimi 4, 3, 32, 0 ; CHECK-NEXT: mffprwz 3, 5 ; CHECK-NEXT: mtfprd 1, 4 ; CHECK-NEXT: mffprwz 4, 6 ; CHECK-NEXT: divwu 3, 4, 3 ; CHECK-NEXT: mffprwz 4, 0 ; CHECK-NEXT: divwu 4, 5, 4 ; CHECK-NEXT: rldimi 4, 3, 32, 0 ; CHECK-NEXT: mtfprd 0, 4 ; CHECK-NEXT: xxmrghd 34, 0, 1 ; CHECK-NEXT: blr %res = call <4 x i32> @llvm.masked.udiv(<4 x i32> %x, <4 x i32> %y, <4 x i1> %m) ret <4 x i32> %res } define <2 x i64> @udiv_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i1> %m) { ; CHECK-LABEL: udiv_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: xxleqv 32, 32, 32 ; CHECK-NEXT: vspltisw 5, 1 ; CHECK-NEXT: mfvsrd 4, 34 ; CHECK-NEXT: xxswapd 2, 34 ; CHECK-NEXT: vsld 4, 4, 0 ; CHECK-NEXT: vsrad 4, 4, 0 ; CHECK-NEXT: vupklsw 5, 5 ; CHECK-NEXT: xxsel 0, 37, 35, 36 ; CHECK-NEXT: mffprd 3, 0 ; CHECK-NEXT: divdu 3, 4, 3 ; CHECK-NEXT: mffprd 4, 2 ; CHECK-NEXT: xxswapd 1, 0 ; CHECK-NEXT: mtfprd 0, 3 ; CHECK-NEXT: mffprd 3, 1 ; CHECK-NEXT: divdu 3, 4, 3 ; CHECK-NEXT: mtfprd 1, 3 ; CHECK-NEXT: xxmrghd 34, 0, 1 ; CHECK-NEXT: blr %res = call <2 x i64> @llvm.masked.udiv(<2 x i64> %x, <2 x i64> %y, <2 x i1> %m) ret <2 x i64> %res } ; Splitting define <4 x i64> @udiv_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i1> %m) { ; CHECK-LABEL: udiv_v4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: xxmrglw 32, 38, 38 ; CHECK-NEXT: xxleqv 39, 39, 39 ; CHECK-NEXT: xxmrghw 33, 38, 38 ; CHECK-NEXT: mfvsrd 3, 34 ; CHECK-NEXT: vspltisw 6, 1 ; CHECK-NEXT: mfvsrd 4, 35 ; CHECK-NEXT: xxswapd 2, 34 ; CHECK-NEXT: xxswapd 4, 35 ; CHECK-NEXT: vsld 0, 0, 7 ; CHECK-NEXT: mffprd 5, 2 ; CHECK-NEXT: vsrad 0, 0, 7 ; CHECK-NEXT: vupklsw 6, 6 ; CHECK-NEXT: xxsel 0, 38, 36, 32 ; CHECK-NEXT: vsld 4, 1, 7 ; CHECK-NEXT: mffprd 6, 0 ; CHECK-NEXT: vsrad 4, 4, 7 ; CHECK-NEXT: divdu 3, 3, 6 ; CHECK-NEXT: xxswapd 3, 0 ; CHECK-NEXT: mtfprd 0, 3 ; CHECK-NEXT: xxsel 1, 38, 37, 36 ; CHECK-NEXT: mffprd 6, 1 ; CHECK-NEXT: divdu 4, 4, 6 ; CHECK-NEXT: mffprd 6, 3 ; CHECK-NEXT: divdu 5, 5, 6 ; CHECK-NEXT: mtfprd 2, 5 ; CHECK-NEXT: xxswapd 5, 1 ; CHECK-NEXT: mtfprd 1, 4 ; CHECK-NEXT: mffprd 3, 5 ; CHECK-NEXT: mffprd 4, 4 ; CHECK-NEXT: divdu 3, 4, 3 ; CHECK-NEXT: xxmrghd 34, 0, 2 ; CHECK-NEXT: mtfprd 0, 3 ; CHECK-NEXT: xxmrghd 35, 1, 0 ; CHECK-NEXT: blr %res = call <4 x i64> @llvm.masked.udiv(<4 x i64> %x, <4 x i64> %y, <4 x i1> %m) ret <4 x i64> %res } ; Widening define <2 x i32> @udiv_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i1> %m) { ; CHECK-LABEL: udiv_v2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: addis 3, 2, .LCPI3_0@toc@ha ; CHECK-NEXT: xxlxor 32, 32, 32 ; CHECK-NEXT: xxsldwi 1, 34, 34, 1 ; CHECK-NEXT: addi 3, 3, .LCPI3_0@toc@l ; CHECK-NEXT: mffprwz 4, 1 ; CHECK-NEXT: xxswapd 4, 34 ; CHECK-NEXT: xxsldwi 6, 34, 34, 3 ; CHECK-NEXT: lxvd2x 0, 0, 3 ; CHECK-NEXT: xxswapd 37, 0 ; CHECK-NEXT: vperm 4, 0, 4, 5 ; CHECK-NEXT: xxleqv 32, 32, 32 ; CHECK-NEXT: vspltisw 5, 1 ; CHECK-NEXT: vslw 4, 4, 0 ; CHECK-NEXT: vsraw 4, 4, 0 ; CHECK-NEXT: xxsel 0, 37, 35, 36 ; CHECK-NEXT: xxsldwi 2, 0, 0, 1 ; CHECK-NEXT: xxswapd 3, 0 ; CHECK-NEXT: xxsldwi 5, 0, 0, 3 ; CHECK-NEXT: mffprwz 3, 2 ; CHECK-NEXT: mffprwz 5, 3 ; CHECK-NEXT: divwu 3, 4, 3 ; CHECK-NEXT: mffprwz 4, 4 ; CHECK-NEXT: divwu 4, 4, 5 ; CHECK-NEXT: mfvsrwz 5, 34 ; CHECK-NEXT: rldimi 4, 3, 32, 0 ; CHECK-NEXT: mffprwz 3, 5 ; CHECK-NEXT: mtfprd 1, 4 ; CHECK-NEXT: mffprwz 4, 6 ; CHECK-NEXT: divwu 3, 4, 3 ; CHECK-NEXT: mffprwz 4, 0 ; CHECK-NEXT: divwu 4, 5, 4 ; CHECK-NEXT: rldimi 4, 3, 32, 0 ; CHECK-NEXT: mtfprd 0, 4 ; CHECK-NEXT: xxmrghd 34, 0, 1 ; CHECK-NEXT: blr %res = call <2 x i32> @llvm.masked.udiv(<2 x i32> %x, <2 x i32> %y, <2 x i1> %m) ret <2 x i32> %res } ; Promotion define <4 x i16> @udiv_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i1> %m) { ; CHECK-LABEL: udiv_v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: xxswapd 0, 36 ; CHECK-NEXT: xxsldwi 1, 36, 36, 1 ; CHECK-NEXT: mfvsrwz 3, 36 ; CHECK-NEXT: li 7, 0 ; CHECK-NEXT: xxsldwi 2, 36, 36, 3 ; CHECK-NEXT: mffprwz 4, 0 ; CHECK-NEXT: mffprwz 5, 1 ; CHECK-NEXT: mffprwz 6, 2 ; CHECK-NEXT: mtvsrd 36, 3 ; CHECK-NEXT: mtvsrd 37, 4 ; CHECK-NEXT: mtvsrd 32, 5 ; CHECK-NEXT: mfvsrd 5, 34 ; CHECK-NEXT: rldicl 8, 5, 48, 48 ; CHECK-NEXT: rldicl 9, 5, 32, 48 ; CHECK-NEXT: clrlwi 8, 8, 16 ; CHECK-NEXT: clrlwi 9, 9, 16 ; CHECK-NEXT: vmrghh 5, 0, 5 ; CHECK-NEXT: mtvsrd 32, 6 ; CHECK-NEXT: vmrghh 4, 0, 4 ; CHECK-NEXT: mtvsrd 32, 7 ; CHECK-NEXT: clrldi 7, 5, 48 ; CHECK-NEXT: rldicl 5, 5, 16, 48 ; CHECK-NEXT: clrlwi 7, 7, 16 ; CHECK-NEXT: clrlwi 5, 5, 16 ; CHECK-NEXT: xxmrglw 1, 36, 37 ; CHECK-NEXT: vspltish 4, 15 ; CHECK-NEXT: vsplth 0, 0, 3 ; CHECK-NEXT: xxspltw 0, 32, 3 ; CHECK-NEXT: vspltish 0, 1 ; CHECK-NEXT: xxmrgld 37, 0, 1 ; CHECK-NEXT: xxswapd 1, 34 ; CHECK-NEXT: vslh 5, 5, 4 ; CHECK-NEXT: mffprd 3, 1 ; CHECK-NEXT: vsrah 4, 5, 4 ; CHECK-NEXT: clrldi 10, 3, 48 ; CHECK-NEXT: rldicl 11, 3, 48, 48 ; CHECK-NEXT: clrlwi 10, 10, 16 ; CHECK-NEXT: clrlwi 11, 11, 16 ; CHECK-NEXT: xxsel 0, 32, 35, 36 ; CHECK-NEXT: mffprd 6, 0 ; CHECK-NEXT: clrldi 12, 6, 48 ; CHECK-NEXT: clrlwi 12, 12, 16 ; CHECK-NEXT: divwu 7, 7, 12 ; CHECK-NEXT: rldicl 12, 6, 48, 48 ; CHECK-NEXT: clrlwi 12, 12, 16 ; CHECK-NEXT: divwu 8, 8, 12 ; CHECK-NEXT: xxswapd 2, 0 ; CHECK-NEXT: mffprd 4, 2 ; CHECK-NEXT: rldicl 12, 6, 32, 48 ; CHECK-NEXT: rldicl 6, 6, 16, 48 ; CHECK-NEXT: clrlwi 6, 6, 16 ; CHECK-NEXT: clrlwi 12, 12, 16 ; CHECK-NEXT: divwu 5, 5, 6 ; CHECK-NEXT: clrldi 6, 4, 48 ; CHECK-NEXT: divwu 9, 9, 12 ; CHECK-NEXT: rldicl 12, 3, 32, 48 ; CHECK-NEXT: rldicl 3, 3, 16, 48 ; CHECK-NEXT: clrlwi 6, 6, 16 ; CHECK-NEXT: clrlwi 12, 12, 16 ; CHECK-NEXT: clrlwi 3, 3, 16 ; CHECK-NEXT: divwu 6, 10, 6 ; CHECK-NEXT: rldicl 10, 4, 48, 48 ; CHECK-NEXT: clrlwi 10, 10, 16 ; CHECK-NEXT: mtvsrd 34, 7 ; CHECK-NEXT: divwu 10, 11, 10 ; CHECK-NEXT: rldicl 11, 4, 32, 48 ; CHECK-NEXT: rldicl 4, 4, 16, 48 ; CHECK-NEXT: clrlwi 11, 11, 16 ; CHECK-NEXT: clrlwi 4, 4, 16 ; CHECK-NEXT: mtvsrd 35, 8 ; CHECK-NEXT: divwu 11, 12, 11 ; CHECK-NEXT: divwu 3, 3, 4 ; CHECK-NEXT: mtvsrd 36, 9 ; CHECK-NEXT: mtvsrd 37, 5 ; CHECK-NEXT: mtvsrd 32, 6 ; CHECK-NEXT: vmrghh 2, 3, 2 ; CHECK-NEXT: vmrghh 3, 5, 4 ; CHECK-NEXT: mtvsrd 36, 10 ; CHECK-NEXT: mtvsrd 37, 11 ; CHECK-NEXT: xxmrglw 0, 35, 34 ; CHECK-NEXT: vmrghh 4, 4, 0 ; CHECK-NEXT: mtvsrd 32, 3 ; CHECK-NEXT: vmrghh 5, 0, 5 ; CHECK-NEXT: xxmrglw 1, 37, 36 ; CHECK-NEXT: xxmrgld 34, 0, 1 ; CHECK-NEXT: blr %res = call <4 x i16> @llvm.masked.udiv(<4 x i16> %x, <4 x i16> %y, <4 x i1> %m) ret <4 x i16> %res } ; Scalarization define <1 x i64> @udiv_v1i164(<1 x i64> %x, <1 x i64> %y, <1 x i1> %m) { ; CHECK-LABEL: udiv_v1i164: ; CHECK: # %bb.0: ; CHECK-NEXT: andi. 5, 5, 1 ; CHECK-NEXT: li 5, 1 ; CHECK-NEXT: iselgt 4, 4, 5 ; CHECK-NEXT: divdu 3, 3, 4 ; CHECK-NEXT: blr %res = call <1 x i64> @llvm.masked.udiv(<1 x i64> %x, <1 x i64> %y, <1 x i1> %m) ret <1 x i64> %res } ; Expansion define <2 x i128> @udiv_v2i128(<2 x i128> %x, <2 x i128> %y, <2 x i1> %m) nounwind { ; CHECK-LABEL: udiv_v2i128: ; CHECK: # %bb.0: ; CHECK-NEXT: mfocrf 12, 32 ; CHECK-NEXT: stw 12, 8(1) ; CHECK-NEXT: mflr 0 ; CHECK-NEXT: stdu 1, -128(1) ; CHECK-NEXT: li 3, 48 ; CHECK-NEXT: std 0, 144(1) ; CHECK-NEXT: xxswapd 0, 38 ; CHECK-NEXT: xxswapd 1, 37 ; CHECK-NEXT: std 30, 112(1) # 8-byte Folded Spill ; CHECK-NEXT: li 30, 1 ; CHECK-NEXT: std 29, 104(1) # 8-byte Folded Spill ; CHECK-NEXT: li 29, 0 ; CHECK-NEXT: stxvd2x 61, 1, 3 # 16-byte Folded Spill ; CHECK-NEXT: li 3, 64 ; CHECK-NEXT: mfvsrd 4, 35 ; CHECK-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill ; CHECK-NEXT: li 3, 80 ; CHECK-NEXT: vmr 30, 2 ; CHECK-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; CHECK-NEXT: mffprd 3, 0 ; CHECK-NEXT: vmr 31, 4 ; CHECK-NEXT: andi. 3, 3, 1 ; CHECK-NEXT: mfvsrd 3, 38 ; CHECK-NEXT: crmove 8, 1 ; CHECK-NEXT: andi. 3, 3, 1 ; CHECK-NEXT: mffprd 3, 1 ; CHECK-NEXT: iselgt 5, 3, 30 ; CHECK-NEXT: mfvsrd 3, 37 ; CHECK-NEXT: xxswapd 0, 35 ; CHECK-NEXT: iselgt 6, 3, 29 ; CHECK-NEXT: mffprd 3, 0 ; CHECK-NEXT: bl __udivti3 ; CHECK-NEXT: nop ; CHECK-NEXT: xxswapd 0, 63 ; CHECK-NEXT: mtfprd 1, 3 ; CHECK-NEXT: mtfprd 2, 4 ; CHECK-NEXT: mfvsrd 4, 62 ; CHECK-NEXT: mffprd 3, 0 ; CHECK-NEXT: isel 5, 3, 30, 8 ; CHECK-NEXT: mfvsrd 3, 63 ; CHECK-NEXT: isel 6, 3, 29, 8 ; CHECK-NEXT: xxswapd 0, 62 ; CHECK-NEXT: mffprd 3, 0 ; CHECK-NEXT: xxmrghd 61, 2, 1 ; CHECK-NEXT: bl __udivti3 ; CHECK-NEXT: nop ; CHECK-NEXT: mtfprd 0, 3 ; CHECK-NEXT: li 3, 80 ; CHECK-NEXT: mtfprd 1, 4 ; CHECK-NEXT: ld 30, 112(1) # 8-byte Folded Reload ; CHECK-NEXT: vmr 3, 29 ; CHECK-NEXT: ld 29, 104(1) # 8-byte Folded Reload ; CHECK-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload ; CHECK-NEXT: li 3, 64 ; CHECK-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload ; CHECK-NEXT: li 3, 48 ; CHECK-NEXT: lxvd2x 61, 1, 3 # 16-byte Folded Reload ; CHECK-NEXT: xxmrghd 34, 1, 0 ; CHECK-NEXT: addi 1, 1, 128 ; CHECK-NEXT: ld 0, 16(1) ; CHECK-NEXT: lwz 12, 8(1) ; CHECK-NEXT: mtlr 0 ; CHECK-NEXT: mtocrf 32, 12 ; CHECK-NEXT: blr %res = call <2 x i128> @llvm.masked.udiv(<2 x i128> %x, <2 x i128> %y, <2 x i1> %m) ret <2 x i128> %res } ; Promotion and widening define <3 x i10> @udiv_v3i10(<3 x i10> %x, <3 x i10> %y, <3 x i1> %m) { ; CHECK-LABEL: udiv_v3i10: ; CHECK: # %bb.0: ; CHECK-NEXT: lbz 11, 96(1) ; CHECK-NEXT: li 12, 0 ; CHECK-NEXT: rldimi 9, 10, 32, 0 ; CHECK-NEXT: addis 10, 2, .LCPI7_0@toc@ha ; CHECK-NEXT: mtvsrwz 33, 8 ; CHECK-NEXT: vspltisw 4, -10 ; CHECK-NEXT: xxleqv 38, 38, 38 ; CHECK-NEXT: addi 10, 10, .LCPI7_0@toc@l ; CHECK-NEXT: mtfprd 0, 9 ; CHECK-NEXT: vsrw 4, 4, 4 ; CHECK-NEXT: rldimi 11, 12, 32, 0 ; CHECK-NEXT: lxvd2x 1, 0, 10 ; CHECK-NEXT: mtfprd 2, 11 ; CHECK-NEXT: xxswapd 34, 1 ; CHECK-NEXT: mtfprwz 1, 7 ; CHECK-NEXT: xxmrghd 35, 2, 0 ; CHECK-NEXT: mtfprwz 0, 6 ; CHECK-NEXT: vslw 3, 3, 6 ; CHECK-NEXT: vsraw 3, 3, 6 ; CHECK-NEXT: xxmrghw 37, 1, 0 ; CHECK-NEXT: mtfprwz 0, 3 ; CHECK-NEXT: mtfprwz 1, 4 ; CHECK-NEXT: vperm 5, 1, 5, 2 ; CHECK-NEXT: mtvsrwz 33, 5 ; CHECK-NEXT: xxmrghw 32, 1, 0 ; CHECK-NEXT: xxland 0, 37, 36 ; CHECK-NEXT: vperm 0, 1, 0, 2 ; CHECK-NEXT: vspltisw 1, 1 ; CHECK-NEXT: xxland 1, 32, 36 ; CHECK-NEXT: xxsel 0, 33, 0, 35 ; CHECK-NEXT: xxswapd 2, 0 ; CHECK-NEXT: xxswapd 3, 1 ; CHECK-NEXT: xxsldwi 4, 0, 0, 1 ; CHECK-NEXT: xxsldwi 5, 1, 1, 1 ; CHECK-NEXT: mffprwz 3, 2 ; CHECK-NEXT: mffprwz 4, 3 ; CHECK-NEXT: divwu 3, 4, 3 ; CHECK-NEXT: mffprwz 4, 5 ; CHECK-NEXT: mtfprwz 2, 3 ; CHECK-NEXT: mffprwz 3, 4 ; CHECK-NEXT: divwu 3, 4, 3 ; CHECK-NEXT: mffprwz 4, 1 ; CHECK-NEXT: mtfprwz 3, 3 ; CHECK-NEXT: mffprwz 3, 0 ; CHECK-NEXT: divwu 3, 4, 3 ; CHECK-NEXT: mtvsrwz 36, 3 ; CHECK-NEXT: xxmrghw 35, 3, 2 ; CHECK-NEXT: vperm 2, 4, 3, 2 ; CHECK-NEXT: mfvsrwz 5, 34 ; CHECK-NEXT: xxswapd 0, 34 ; CHECK-NEXT: xxsldwi 1, 34, 34, 1 ; CHECK-NEXT: mffprwz 3, 0 ; CHECK-NEXT: mffprwz 4, 1 ; CHECK-NEXT: blr %res = call <3 x i10> @llvm.masked.udiv(<3 x i10> %x, <3 x i10> %y, <3 x i1> %m) ret <3 x i10> %res }