; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,SSE2 ; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE4 ; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 ; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 ; RUN: opt < %s -passes="default" -S -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,SSE2 ; RUN: opt < %s -passes="default" -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE4 ; RUN: opt < %s -passes="default" -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 ; RUN: opt < %s -passes="default" -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 ; PR34072 - failure to canonicalize to (sub (shuffle a, b),(shuffle a, b)) for optimal horizontal sub patterns (with undemanded elements) ; ; v8i16 ; define <8 x i16> @sub_v8i16_01234567(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: @sub_v8i16_01234567( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = sub <8 x i16> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; %a0 = extractelement <8 x i16> %a, i32 0 %a1 = extractelement <8 x i16> %a, i32 1 %a2 = extractelement <8 x i16> %a, i32 2 %a3 = extractelement <8 x i16> %a, i32 3 %a4 = extractelement <8 x i16> %a, i32 4 %a5 = extractelement <8 x i16> %a, i32 5 %a6 = extractelement <8 x i16> %a, i32 6 %a7 = extractelement <8 x i16> %a, i32 7 %a01 = sub i16 %a0, %a1 %a23 = sub i16 %a2, %a3 %a45 = sub i16 %a4, %a5 %a67 = sub i16 %a6, %a7 %b0 = extractelement <8 x i16> %b, i32 0 %b1 = extractelement <8 x i16> %b, i32 1 %b2 = extractelement <8 x i16> %b, i32 2 %b3 = extractelement <8 x i16> %b, i32 3 %b4 = extractelement <8 x i16> %b, i32 4 %b5 = extractelement <8 x i16> %b, i32 5 %b6 = extractelement <8 x i16> %b, i32 6 %b7 = extractelement <8 x i16> %b, i32 7 %b01 = sub i16 %b0, %b1 %b23 = sub i16 %b2, %b3 %b45 = sub i16 %b4, %b5 %b67 = sub i16 %b6, %b7 %hsub0 = insertelement <8 x i16> poison, i16 %a01, i32 0 %hsub1 = insertelement <8 x i16> %hsub0, i16 %a23, i32 1 %hsub2 = insertelement <8 x i16> %hsub1, i16 %a45, i32 2 %hsub3 = insertelement <8 x i16> %hsub2, i16 %a67, i32 3 %hsub4 = insertelement <8 x i16> %hsub3, i16 %b01, i32 4 %hsub5 = insertelement <8 x i16> %hsub4, i16 %b23, i32 5 %hsub6 = insertelement <8 x i16> %hsub5, i16 %b45, i32 6 %hsub7 = insertelement <8 x i16> %hsub6, i16 %b67, i32 7 %result = shufflevector <8 x i16> %hsub7, <8 x i16> %a, <8 x i32> ret <8 x i16> %result } define <8 x i16> @sub_v8i16_u1234567(<8 x i16> %a, <8 x i16> %b) { ; SSE2-LABEL: @sub_v8i16_u1234567( ; SSE2-NEXT: [[SHIFT3:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> ; SSE2-NEXT: [[TMP6:%.*]] = sub <8 x i16> [[A]], [[SHIFT3]] ; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> ; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> ; SSE2-NEXT: [[HSUB22:%.*]] = sub <8 x i16> [[TMP4]], [[TMP5]] ; SSE2-NEXT: [[HSUB3:%.*]] = shufflevector <8 x i16> [[HSUB22]], <8 x i16> [[TMP6]], <8 x i32> ; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <8 x i32> ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <8 x i32> ; SSE2-NEXT: [[TMP3:%.*]] = sub <8 x i16> [[TMP1]], [[TMP2]] ; SSE2-NEXT: [[RESULT:%.*]] = shufflevector <8 x i16> [[HSUB3]], <8 x i16> [[TMP3]], <8 x i32> ; SSE2-NEXT: ret <8 x i16> [[RESULT]] ; ; SSE4-LABEL: @sub_v8i16_u1234567( ; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i32> ; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> ; SSE4-NEXT: [[TMP7:%.*]] = sub <8 x i16> [[TMP5]], [[TMP6]] ; SSE4-NEXT: ret <8 x i16> [[TMP7]] ; ; AVX-LABEL: @sub_v8i16_u1234567( ; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i32> ; AVX-NEXT: [[TMP6:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> ; AVX-NEXT: [[TMP7:%.*]] = sub <8 x i16> [[TMP5]], [[TMP6]] ; AVX-NEXT: ret <8 x i16> [[TMP7]] ; %a0 = extractelement <8 x i16> %a, i32 0 %a1 = extractelement <8 x i16> %a, i32 1 %a2 = extractelement <8 x i16> %a, i32 2 %a3 = extractelement <8 x i16> %a, i32 3 %a4 = extractelement <8 x i16> %a, i32 4 %a5 = extractelement <8 x i16> %a, i32 5 %a6 = extractelement <8 x i16> %a, i32 6 %a7 = extractelement <8 x i16> %a, i32 7 %a01 = sub i16 %a0, %a1 %a23 = sub i16 %a2, %a3 %a45 = sub i16 %a4, %a5 %a67 = sub i16 %a6, %a7 %b0 = extractelement <8 x i16> %b, i32 0 %b1 = extractelement <8 x i16> %b, i32 1 %b2 = extractelement <8 x i16> %b, i32 2 %b3 = extractelement <8 x i16> %b, i32 3 %b4 = extractelement <8 x i16> %b, i32 4 %b5 = extractelement <8 x i16> %b, i32 5 %b6 = extractelement <8 x i16> %b, i32 6 %b7 = extractelement <8 x i16> %b, i32 7 %b01 = sub i16 %b0, %b1 %b23 = sub i16 %b2, %b3 %b45 = sub i16 %b4, %b5 %b67 = sub i16 %b6, %b7 %hsub0 = insertelement <8 x i16> poison, i16 %a01, i32 0 %hsub1 = insertelement <8 x i16> %hsub0, i16 %a23, i32 1 %hsub2 = insertelement <8 x i16> %hsub1, i16 %a45, i32 2 %hsub3 = insertelement <8 x i16> %hsub2, i16 %a67, i32 3 %hsub4 = insertelement <8 x i16> %hsub3, i16 %b01, i32 4 %hsub5 = insertelement <8 x i16> %hsub4, i16 %b23, i32 5 %hsub6 = insertelement <8 x i16> %hsub5, i16 %b45, i32 6 %hsub7 = insertelement <8 x i16> %hsub6, i16 %b67, i32 7 %result = shufflevector <8 x i16> %hsub7, <8 x i16> %a, <8 x i32> ret <8 x i16> %result } define <8 x i16> @sub_v8i16_76u43210(<8 x i16> %a, <8 x i16> %b) { ; SSE2-LABEL: @sub_v8i16_76u43210( ; SSE2-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> ; SSE2-NEXT: [[TMP1:%.*]] = sub <8 x i16> [[A]], [[SHIFT]] ; SSE2-NEXT: [[SHIFT2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> poison, <8 x i32> ; SSE2-NEXT: [[TMP2:%.*]] = sub <8 x i16> [[B]], [[SHIFT2]] ; SSE2-NEXT: [[SHIFT3:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <8 x i32> ; SSE2-NEXT: [[TMP3:%.*]] = sub <8 x i16> [[SHIFT3]], [[B]] ; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> ; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> ; SSE2-NEXT: [[TMP6:%.*]] = sub <8 x i16> [[TMP4]], [[TMP5]] ; SSE2-NEXT: [[HSUB41:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP6]], <8 x i32> ; SSE2-NEXT: [[HSUB6:%.*]] = shufflevector <8 x i16> [[HSUB41]], <8 x i16> [[TMP2]], <8 x i32> ; SSE2-NEXT: [[HSUB7:%.*]] = shufflevector <8 x i16> [[HSUB6]], <8 x i16> [[TMP3]], <8 x i32> ; SSE2-NEXT: [[RESULT:%.*]] = shufflevector <8 x i16> [[HSUB7]], <8 x i16> poison, <8 x i32> ; SSE2-NEXT: ret <8 x i16> [[RESULT]] ; ; SSE4-LABEL: @sub_v8i16_76u43210( ; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> ; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> ; SSE4-NEXT: [[HSUB22:%.*]] = sub <8 x i16> [[TMP1]], [[TMP2]] ; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B:%.*]], <8 x i32> ; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> ; SSE4-NEXT: [[TMP5:%.*]] = sub <8 x i16> [[TMP3]], [[TMP4]] ; SSE4-NEXT: [[RESULT:%.*]] = shufflevector <8 x i16> [[TMP5]], <8 x i16> [[HSUB22]], <8 x i32> ; SSE4-NEXT: ret <8 x i16> [[RESULT]] ; ; AVX-LABEL: @sub_v8i16_76u43210( ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> ; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <8 x i32> ; AVX-NEXT: [[HSUB22:%.*]] = sub <8 x i16> [[TMP1]], [[TMP2]] ; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B:%.*]], <8 x i32> ; AVX-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> ; AVX-NEXT: [[TMP5:%.*]] = sub <8 x i16> [[TMP3]], [[TMP4]] ; AVX-NEXT: [[RESULT:%.*]] = shufflevector <8 x i16> [[TMP5]], <8 x i16> [[HSUB22]], <8 x i32> ; AVX-NEXT: ret <8 x i16> [[RESULT]] ; %a0 = extractelement <8 x i16> %a, i32 0 %a1 = extractelement <8 x i16> %a, i32 1 %a2 = extractelement <8 x i16> %a, i32 2 %a3 = extractelement <8 x i16> %a, i32 3 %a4 = extractelement <8 x i16> %a, i32 4 %a5 = extractelement <8 x i16> %a, i32 5 %a6 = extractelement <8 x i16> %a, i32 6 %a7 = extractelement <8 x i16> %a, i32 7 %a01 = sub i16 %a0, %a1 %a23 = sub i16 %a2, %a3 %a45 = sub i16 %a4, %a5 %a67 = sub i16 %a6, %a7 %b0 = extractelement <8 x i16> %b, i32 0 %b1 = extractelement <8 x i16> %b, i32 1 %b2 = extractelement <8 x i16> %b, i32 2 %b3 = extractelement <8 x i16> %b, i32 3 %b4 = extractelement <8 x i16> %b, i32 4 %b5 = extractelement <8 x i16> %b, i32 5 %b6 = extractelement <8 x i16> %b, i32 6 %b7 = extractelement <8 x i16> %b, i32 7 %b01 = sub i16 %b0, %b1 %b23 = sub i16 %b2, %b3 %b45 = sub i16 %b4, %b5 %b67 = sub i16 %b6, %b7 %hsub0 = insertelement <8 x i16> poison, i16 %a01, i32 0 %hsub1 = insertelement <8 x i16> %hsub0, i16 %a23, i32 1 %hsub2 = insertelement <8 x i16> %hsub1, i16 %a45, i32 2 %hsub3 = insertelement <8 x i16> %hsub2, i16 %a67, i32 3 %hsub4 = insertelement <8 x i16> %hsub3, i16 %b01, i32 4 %hsub5 = insertelement <8 x i16> %hsub4, i16 %b23, i32 5 %hsub6 = insertelement <8 x i16> %hsub5, i16 %b45, i32 6 %hsub7 = insertelement <8 x i16> %hsub6, i16 %b67, i32 7 %result = shufflevector <8 x i16> %hsub7, <8 x i16> %a, <8 x i32> ret <8 x i16> %result } ; ; v16i16 ; define <16 x i16> @sub_v16i16_0123456789ABCDEF(<16 x i16> %a, <16 x i16> %b) { ; CHECK-LABEL: @sub_v16i16_0123456789ABCDEF( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = sub <16 x i16> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <16 x i16> [[TMP3]] ; %a0 = extractelement <16 x i16> %a, i32 0 %a1 = extractelement <16 x i16> %a, i32 1 %a2 = extractelement <16 x i16> %a, i32 2 %a3 = extractelement <16 x i16> %a, i32 3 %a4 = extractelement <16 x i16> %a, i32 4 %a5 = extractelement <16 x i16> %a, i32 5 %a6 = extractelement <16 x i16> %a, i32 6 %a7 = extractelement <16 x i16> %a, i32 7 %a8 = extractelement <16 x i16> %a, i32 8 %a9 = extractelement <16 x i16> %a, i32 9 %aA = extractelement <16 x i16> %a, i32 10 %aB = extractelement <16 x i16> %a, i32 11 %aC = extractelement <16 x i16> %a, i32 12 %aD = extractelement <16 x i16> %a, i32 13 %aE = extractelement <16 x i16> %a, i32 14 %aF = extractelement <16 x i16> %a, i32 15 %a01 = sub i16 %a0, %a1 %a23 = sub i16 %a2, %a3 %a45 = sub i16 %a4, %a5 %a67 = sub i16 %a6, %a7 %a89 = sub i16 %a8, %a9 %aAB = sub i16 %aA, %aB %aCD = sub i16 %aC, %aD %aEF = sub i16 %aE, %aF %b0 = extractelement <16 x i16> %b, i32 0 %b1 = extractelement <16 x i16> %b, i32 1 %b2 = extractelement <16 x i16> %b, i32 2 %b3 = extractelement <16 x i16> %b, i32 3 %b4 = extractelement <16 x i16> %b, i32 4 %b5 = extractelement <16 x i16> %b, i32 5 %b6 = extractelement <16 x i16> %b, i32 6 %b7 = extractelement <16 x i16> %b, i32 7 %b8 = extractelement <16 x i16> %b, i32 8 %b9 = extractelement <16 x i16> %b, i32 9 %bA = extractelement <16 x i16> %b, i32 10 %bB = extractelement <16 x i16> %b, i32 11 %bC = extractelement <16 x i16> %b, i32 12 %bD = extractelement <16 x i16> %b, i32 13 %bE = extractelement <16 x i16> %b, i32 14 %bF = extractelement <16 x i16> %b, i32 15 %b01 = sub i16 %b0, %b1 %b23 = sub i16 %b2, %b3 %b45 = sub i16 %b4, %b5 %b67 = sub i16 %b6, %b7 %b89 = sub i16 %b8, %b9 %bAB = sub i16 %bA, %bB %bCD = sub i16 %bC, %bD %bEF = sub i16 %bE, %bF %hsub0 = insertelement <16 x i16> poison, i16 %a01, i32 0 %hsub1 = insertelement <16 x i16> %hsub0, i16 %a23, i32 1 %hsub2 = insertelement <16 x i16> %hsub1, i16 %a45, i32 2 %hsub3 = insertelement <16 x i16> %hsub2, i16 %a67, i32 3 %hsub4 = insertelement <16 x i16> %hsub3, i16 %b01, i32 4 %hsub5 = insertelement <16 x i16> %hsub4, i16 %b23, i32 5 %hsub6 = insertelement <16 x i16> %hsub5, i16 %b45, i32 6 %hsub7 = insertelement <16 x i16> %hsub6, i16 %b67, i32 7 %hsub8 = insertelement <16 x i16> %hsub7, i16 %a89, i32 8 %hsub9 = insertelement <16 x i16> %hsub8, i16 %aAB, i32 9 %hsubA = insertelement <16 x i16> %hsub9, i16 %aCD, i32 10 %hsubB = insertelement <16 x i16> %hsubA, i16 %aEF, i32 11 %hsubC = insertelement <16 x i16> %hsubB, i16 %b89, i32 12 %hsubD = insertelement <16 x i16> %hsubC, i16 %bAB, i32 13 %hsubE = insertelement <16 x i16> %hsubD, i16 %bCD, i32 14 %hsubF = insertelement <16 x i16> %hsubE, i16 %bEF, i32 15 %result = shufflevector <16 x i16> %hsubF, <16 x i16> %a, <16 x i32> ret <16 x i16> %result } define <16 x i16> @sub_v16i16_0123u56789uBCDEF(<16 x i16> %a, <16 x i16> %b) { ; SSE2-LABEL: @sub_v16i16_0123u56789uBCDEF( ; SSE2-NEXT: [[BE:%.*]] = extractelement <16 x i16> [[B:%.*]], i64 14 ; SSE2-NEXT: [[BF:%.*]] = extractelement <16 x i16> [[B]], i64 15 ; SSE2-NEXT: [[BEF:%.*]] = sub i16 [[BE]], [[BF]] ; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[B]], <16 x i16> [[B1:%.*]], <16 x i32> ; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[B]], <16 x i16> [[B1]], <16 x i32> ; SSE2-NEXT: [[HSUB8:%.*]] = sub <16 x i16> [[TMP3]], [[TMP7]] ; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[B]], <16 x i16> poison, <16 x i32> ; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[B]], <16 x i16> poison, <16 x i32> ; SSE2-NEXT: [[TMP6:%.*]] = sub <16 x i16> [[TMP4]], [[TMP5]] ; SSE2-NEXT: [[HSUB92:%.*]] = shufflevector <16 x i16> [[HSUB8]], <16 x i16> [[TMP6]], <16 x i32> ; SSE2-NEXT: [[HSUBB:%.*]] = insertelement <16 x i16> [[HSUB92]], i16 [[BEF]], i64 11 ; SSE2-NEXT: [[TMP10:%.*]] = shufflevector <16 x i16> [[B1]], <16 x i16> poison, <16 x i32> ; SSE2-NEXT: [[TMP8:%.*]] = shufflevector <16 x i16> [[B1]], <16 x i16> poison, <16 x i32> ; SSE2-NEXT: [[TMP9:%.*]] = sub <16 x i16> [[TMP10]], [[TMP8]] ; SSE2-NEXT: [[RESULT:%.*]] = shufflevector <16 x i16> [[HSUBB]], <16 x i16> [[TMP9]], <16 x i32> ; SSE2-NEXT: ret <16 x i16> [[RESULT]] ; ; SSE4-LABEL: @sub_v16i16_0123u56789uBCDEF( ; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> ; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> [[A]], <16 x i32> ; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <16 x i16> [[TMP4]], <16 x i16> [[A]], <16 x i32> ; SSE4-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> ; SSE4-NEXT: [[TMP8:%.*]] = shufflevector <16 x i16> [[TMP6]], <16 x i16> [[B]], <16 x i32> ; SSE4-NEXT: [[TMP9:%.*]] = sub <16 x i16> [[TMP7]], [[TMP8]] ; SSE4-NEXT: ret <16 x i16> [[TMP9]] ; ; AVX-LABEL: @sub_v16i16_0123u56789uBCDEF( ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> ; AVX-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> ; AVX-NEXT: [[RESULT:%.*]] = sub <16 x i16> [[TMP1]], [[TMP2]] ; AVX-NEXT: ret <16 x i16> [[RESULT]] ; %a0 = extractelement <16 x i16> %a, i32 0 %a1 = extractelement <16 x i16> %a, i32 1 %a2 = extractelement <16 x i16> %a, i32 2 %a3 = extractelement <16 x i16> %a, i32 3 %a4 = extractelement <16 x i16> %a, i32 4 %a5 = extractelement <16 x i16> %a, i32 5 %a6 = extractelement <16 x i16> %a, i32 6 %a7 = extractelement <16 x i16> %a, i32 7 %a8 = extractelement <16 x i16> %a, i32 8 %a9 = extractelement <16 x i16> %a, i32 9 %aA = extractelement <16 x i16> %a, i32 10 %aB = extractelement <16 x i16> %a, i32 11 %aC = extractelement <16 x i16> %a, i32 12 %aD = extractelement <16 x i16> %a, i32 13 %aE = extractelement <16 x i16> %a, i32 14 %aF = extractelement <16 x i16> %a, i32 15 %a01 = sub i16 %a0, %a1 %a23 = sub i16 %a2, %a3 %a45 = sub i16 %a4, %a5 %a67 = sub i16 %a6, %a7 %a89 = sub i16 %a8, %a9 %aAB = sub i16 %aA, %aB %aCD = sub i16 %aC, %aD %aEF = sub i16 %aE, %aF %b0 = extractelement <16 x i16> %b, i32 0 %b1 = extractelement <16 x i16> %b, i32 1 %b2 = extractelement <16 x i16> %b, i32 2 %b3 = extractelement <16 x i16> %b, i32 3 %b4 = extractelement <16 x i16> %b, i32 4 %b5 = extractelement <16 x i16> %b, i32 5 %b6 = extractelement <16 x i16> %b, i32 6 %b7 = extractelement <16 x i16> %b, i32 7 %b8 = extractelement <16 x i16> %b, i32 8 %b9 = extractelement <16 x i16> %b, i32 9 %bA = extractelement <16 x i16> %b, i32 10 %bB = extractelement <16 x i16> %b, i32 11 %bC = extractelement <16 x i16> %b, i32 12 %bD = extractelement <16 x i16> %b, i32 13 %bE = extractelement <16 x i16> %b, i32 14 %bF = extractelement <16 x i16> %b, i32 15 %b01 = sub i16 %b0, %b1 %b23 = sub i16 %b2, %b3 %b45 = sub i16 %b4, %b5 %b67 = sub i16 %b6, %b7 %b89 = sub i16 %b8, %b9 %bAB = sub i16 %bA, %bB %bCD = sub i16 %bC, %bD %bEF = sub i16 %bE, %bF %hsub0 = insertelement <16 x i16> poison, i16 %a01, i32 0 %hsub1 = insertelement <16 x i16> %hsub0, i16 %a23, i32 1 %hsub2 = insertelement <16 x i16> %hsub1, i16 %a45, i32 2 %hsub3 = insertelement <16 x i16> %hsub2, i16 %a67, i32 3 %hsub4 = insertelement <16 x i16> %hsub3, i16 %b01, i32 4 %hsub5 = insertelement <16 x i16> %hsub4, i16 %b23, i32 5 %hsub6 = insertelement <16 x i16> %hsub5, i16 %b45, i32 6 %hsub7 = insertelement <16 x i16> %hsub6, i16 %b67, i32 7 %hsub8 = insertelement <16 x i16> %hsub7, i16 %a89, i32 8 %hsub9 = insertelement <16 x i16> %hsub8, i16 %aAB, i32 9 %hsubA = insertelement <16 x i16> %hsub9, i16 %aCD, i32 10 %hsubB = insertelement <16 x i16> %hsubA, i16 %aEF, i32 11 %hsubC = insertelement <16 x i16> %hsubB, i16 %b89, i32 12 %hsubD = insertelement <16 x i16> %hsubC, i16 %bAB, i32 13 %hsubE = insertelement <16 x i16> %hsubD, i16 %bCD, i32 14 %hsubF = insertelement <16 x i16> %hsubE, i16 %bEF, i32 15 %result = shufflevector <16 x i16> %hsubF, <16 x i16> %a, <16 x i32> ret <16 x i16> %result } define <16 x i16> @sub_v16i16_FEuCBA98765432u0(<16 x i16> %a, <16 x i16> %b) { ; SSE2-LABEL: @sub_v16i16_FEuCBA98765432u0( ; SSE2-NEXT: [[BC:%.*]] = extractelement <16 x i16> [[B:%.*]], i64 12 ; SSE2-NEXT: [[BD:%.*]] = extractelement <16 x i16> [[B]], i64 13 ; SSE2-NEXT: [[BE:%.*]] = extractelement <16 x i16> [[B]], i64 14 ; SSE2-NEXT: [[BF:%.*]] = extractelement <16 x i16> [[B]], i64 15 ; SSE2-NEXT: [[BCD:%.*]] = sub i16 [[BC]], [[BD]] ; SSE2-NEXT: [[BEF:%.*]] = sub i16 [[BE]], [[BF]] ; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B]], <16 x i32> ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> ; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[A]], <16 x i32> ; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> [[A]], <16 x i32> ; SSE2-NEXT: [[HSUB8:%.*]] = sub <16 x i16> [[TMP3]], [[TMP7]] ; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> ; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> ; SSE2-NEXT: [[TMP6:%.*]] = sub <16 x i16> [[TMP4]], [[TMP5]] ; SSE2-NEXT: [[HSUBC1:%.*]] = shufflevector <16 x i16> [[HSUB8]], <16 x i16> [[TMP6]], <16 x i32> ; SSE2-NEXT: [[HSUBE:%.*]] = insertelement <16 x i16> [[HSUBC1]], i16 [[BCD]], i64 14 ; SSE2-NEXT: [[HSUBF:%.*]] = insertelement <16 x i16> [[HSUBE]], i16 [[BEF]], i64 15 ; SSE2-NEXT: [[RESULT:%.*]] = shufflevector <16 x i16> [[HSUBF]], <16 x i16> poison, <16 x i32> ; SSE2-NEXT: ret <16 x i16> [[RESULT]] ; ; SSE4-LABEL: @sub_v16i16_FEuCBA98765432u0( ; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> ; SSE4-NEXT: [[TMP10:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> [[A]], <16 x i32> ; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> ; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> [[A]], <16 x i32> ; SSE4-NEXT: [[TMP6:%.*]] = sub <16 x i16> [[TMP4]], [[TMP5]] ; SSE4-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> ; SSE4-NEXT: [[TMP8:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> ; SSE4-NEXT: [[TMP9:%.*]] = sub <16 x i16> [[TMP7]], [[TMP8]] ; SSE4-NEXT: [[RESULT:%.*]] = shufflevector <16 x i16> [[TMP9]], <16 x i16> [[TMP6]], <16 x i32> ; SSE4-NEXT: ret <16 x i16> [[RESULT]] ; ; AVX2-LABEL: @sub_v16i16_FEuCBA98765432u0( ; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> ; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> ; AVX2-NEXT: [[HSUBA:%.*]] = sub <16 x i16> [[TMP1]], [[TMP2]] ; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> ; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> ; AVX2-NEXT: [[TMP5:%.*]] = sub <16 x i16> [[TMP3]], [[TMP4]] ; AVX2-NEXT: [[RESULT:%.*]] = shufflevector <16 x i16> [[TMP5]], <16 x i16> [[HSUBA]], <16 x i32> ; AVX2-NEXT: ret <16 x i16> [[RESULT]] ; ; AVX512-LABEL: @sub_v16i16_FEuCBA98765432u0( ; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> ; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> ; AVX512-NEXT: [[HSUBA2:%.*]] = sub <16 x i16> [[TMP1]], [[TMP2]] ; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> ; AVX512-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> ; AVX512-NEXT: [[TMP5:%.*]] = sub <16 x i16> [[TMP3]], [[TMP4]] ; AVX512-NEXT: [[RESULT:%.*]] = shufflevector <16 x i16> [[TMP5]], <16 x i16> [[HSUBA2]], <16 x i32> ; AVX512-NEXT: ret <16 x i16> [[RESULT]] ; %a0 = extractelement <16 x i16> %a, i32 0 %a1 = extractelement <16 x i16> %a, i32 1 %a2 = extractelement <16 x i16> %a, i32 2 %a3 = extractelement <16 x i16> %a, i32 3 %a4 = extractelement <16 x i16> %a, i32 4 %a5 = extractelement <16 x i16> %a, i32 5 %a6 = extractelement <16 x i16> %a, i32 6 %a7 = extractelement <16 x i16> %a, i32 7 %a8 = extractelement <16 x i16> %a, i32 8 %a9 = extractelement <16 x i16> %a, i32 9 %aA = extractelement <16 x i16> %a, i32 10 %aB = extractelement <16 x i16> %a, i32 11 %aC = extractelement <16 x i16> %a, i32 12 %aD = extractelement <16 x i16> %a, i32 13 %aE = extractelement <16 x i16> %a, i32 14 %aF = extractelement <16 x i16> %a, i32 15 %a01 = sub i16 %a0, %a1 %a23 = sub i16 %a2, %a3 %a45 = sub i16 %a4, %a5 %a67 = sub i16 %a6, %a7 %a89 = sub i16 %a8, %a9 %aAB = sub i16 %aA, %aB %aCD = sub i16 %aC, %aD %aEF = sub i16 %aE, %aF %b0 = extractelement <16 x i16> %b, i32 0 %b1 = extractelement <16 x i16> %b, i32 1 %b2 = extractelement <16 x i16> %b, i32 2 %b3 = extractelement <16 x i16> %b, i32 3 %b4 = extractelement <16 x i16> %b, i32 4 %b5 = extractelement <16 x i16> %b, i32 5 %b6 = extractelement <16 x i16> %b, i32 6 %b7 = extractelement <16 x i16> %b, i32 7 %b8 = extractelement <16 x i16> %b, i32 8 %b9 = extractelement <16 x i16> %b, i32 9 %bA = extractelement <16 x i16> %b, i32 10 %bB = extractelement <16 x i16> %b, i32 11 %bC = extractelement <16 x i16> %b, i32 12 %bD = extractelement <16 x i16> %b, i32 13 %bE = extractelement <16 x i16> %b, i32 14 %bF = extractelement <16 x i16> %b, i32 15 %b01 = sub i16 %b0, %b1 %b23 = sub i16 %b2, %b3 %b45 = sub i16 %b4, %b5 %b67 = sub i16 %b6, %b7 %b89 = sub i16 %b8, %b9 %bAB = sub i16 %bA, %bB %bCD = sub i16 %bC, %bD %bEF = sub i16 %bE, %bF %hsub0 = insertelement <16 x i16> poison, i16 %a01, i32 0 %hsub1 = insertelement <16 x i16> %hsub0, i16 %a23, i32 1 %hsub2 = insertelement <16 x i16> %hsub1, i16 %a45, i32 2 %hsub3 = insertelement <16 x i16> %hsub2, i16 %a67, i32 3 %hsub4 = insertelement <16 x i16> %hsub3, i16 %b01, i32 4 %hsub5 = insertelement <16 x i16> %hsub4, i16 %b23, i32 5 %hsub6 = insertelement <16 x i16> %hsub5, i16 %b45, i32 6 %hsub7 = insertelement <16 x i16> %hsub6, i16 %b67, i32 7 %hsub8 = insertelement <16 x i16> %hsub7, i16 %a89, i32 8 %hsub9 = insertelement <16 x i16> %hsub8, i16 %aAB, i32 9 %hsubA = insertelement <16 x i16> %hsub9, i16 %aCD, i32 10 %hsubB = insertelement <16 x i16> %hsubA, i16 %aEF, i32 11 %hsubC = insertelement <16 x i16> %hsubB, i16 %b89, i32 12 %hsubD = insertelement <16 x i16> %hsubC, i16 %bAB, i32 13 %hsubE = insertelement <16 x i16> %hsubD, i16 %bCD, i32 14 %hsubF = insertelement <16 x i16> %hsubE, i16 %bEF, i32 15 %result = shufflevector <16 x i16> %hsubF, <16 x i16> %a, <16 x i32> ret <16 x i16> %result } ; ; v4i32 ; define <4 x i32> @sub_v4i32_0123(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @sub_v4i32_0123( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; %a0 = extractelement <4 x i32> %a, i32 0 %a1 = extractelement <4 x i32> %a, i32 1 %a2 = extractelement <4 x i32> %a, i32 2 %a3 = extractelement <4 x i32> %a, i32 3 %a01 = sub i32 %a0, %a1 %a23 = sub i32 %a2, %a3 %b0 = extractelement <4 x i32> %b, i32 0 %b1 = extractelement <4 x i32> %b, i32 1 %b2 = extractelement <4 x i32> %b, i32 2 %b3 = extractelement <4 x i32> %b, i32 3 %b01 = sub i32 %b0, %b1 %b23 = sub i32 %b2, %b3 %hsub0 = insertelement <4 x i32> poison, i32 %a01, i32 0 %hsub1 = insertelement <4 x i32> %hsub0, i32 %a23, i32 1 %hsub2 = insertelement <4 x i32> %hsub1, i32 %b01, i32 2 %hsub3 = insertelement <4 x i32> %hsub2, i32 %b23, i32 3 %result = shufflevector <4 x i32> %hsub3, <4 x i32> %a, <4 x i32> ret <4 x i32> %result } define <4 x i32> @sub_v4i32_u123(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @sub_v4i32_u123( ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] ; CHECK-NEXT: ret <4 x i32> [[TMP4]] ; %a0 = extractelement <4 x i32> %a, i32 0 %a1 = extractelement <4 x i32> %a, i32 1 %a2 = extractelement <4 x i32> %a, i32 2 %a3 = extractelement <4 x i32> %a, i32 3 %a01 = sub i32 %a0, %a1 %a23 = sub i32 %a2, %a3 %b0 = extractelement <4 x i32> %b, i32 0 %b1 = extractelement <4 x i32> %b, i32 1 %b2 = extractelement <4 x i32> %b, i32 2 %b3 = extractelement <4 x i32> %b, i32 3 %b01 = sub i32 %b0, %b1 %b23 = sub i32 %b2, %b3 %hsub0 = insertelement <4 x i32> poison, i32 %a01, i32 0 %hsub1 = insertelement <4 x i32> %hsub0, i32 %a23, i32 1 %hsub2 = insertelement <4 x i32> %hsub1, i32 %b01, i32 2 %hsub3 = insertelement <4 x i32> %hsub2, i32 %b23, i32 3 %result = shufflevector <4 x i32> %hsub3, <4 x i32> %a, <4 x i32> ret <4 x i32> %result } define <4 x i32> @sub_v4i32_0u23(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @sub_v4i32_0u23( ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] ; CHECK-NEXT: ret <4 x i32> [[TMP4]] ; %a0 = extractelement <4 x i32> %a, i32 0 %a1 = extractelement <4 x i32> %a, i32 1 %a2 = extractelement <4 x i32> %a, i32 2 %a3 = extractelement <4 x i32> %a, i32 3 %a01 = sub i32 %a0, %a1 %a23 = sub i32 %a2, %a3 %b0 = extractelement <4 x i32> %b, i32 0 %b1 = extractelement <4 x i32> %b, i32 1 %b2 = extractelement <4 x i32> %b, i32 2 %b3 = extractelement <4 x i32> %b, i32 3 %b01 = sub i32 %b0, %b1 %b23 = sub i32 %b2, %b3 %hsub0 = insertelement <4 x i32> poison, i32 %a01, i32 0 %hsub1 = insertelement <4 x i32> %hsub0, i32 %a23, i32 1 %hsub2 = insertelement <4 x i32> %hsub1, i32 %b01, i32 2 %hsub3 = insertelement <4 x i32> %hsub2, i32 %b23, i32 3 %result = shufflevector <4 x i32> %hsub3, <4 x i32> %a, <4 x i32> ret <4 x i32> %result } define <4 x i32> @sub_v4i32_01u3(<4 x i32> %a, <4 x i32> %b) { ; SSE2-LABEL: @sub_v4i32_01u3( ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> ; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> ; SSE2-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] ; SSE2-NEXT: ret <4 x i32> [[TMP4]] ; ; SSE4-LABEL: @sub_v4i32_01u3( ; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> ; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> ; SSE4-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] ; SSE4-NEXT: ret <4 x i32> [[TMP4]] ; ; AVX2-LABEL: @sub_v4i32_01u3( ; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> ; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> ; AVX2-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] ; AVX2-NEXT: ret <4 x i32> [[TMP4]] ; ; AVX512-LABEL: @sub_v4i32_01u3( ; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> ; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> ; AVX512-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] ; AVX512-NEXT: ret <4 x i32> [[TMP4]] ; %a0 = extractelement <4 x i32> %a, i32 0 %a1 = extractelement <4 x i32> %a, i32 1 %a2 = extractelement <4 x i32> %a, i32 2 %a3 = extractelement <4 x i32> %a, i32 3 %a01 = sub i32 %a0, %a1 %a23 = sub i32 %a2, %a3 %b0 = extractelement <4 x i32> %b, i32 0 %b1 = extractelement <4 x i32> %b, i32 1 %b2 = extractelement <4 x i32> %b, i32 2 %b3 = extractelement <4 x i32> %b, i32 3 %b01 = sub i32 %b0, %b1 %b23 = sub i32 %b2, %b3 %hsub0 = insertelement <4 x i32> poison, i32 %a01, i32 0 %hsub1 = insertelement <4 x i32> %hsub0, i32 %a23, i32 1 %hsub2 = insertelement <4 x i32> %hsub1, i32 %b01, i32 2 %hsub3 = insertelement <4 x i32> %hsub2, i32 %b23, i32 3 %result = shufflevector <4 x i32> %hsub3, <4 x i32> %a, <4 x i32> ret <4 x i32> %result } define <4 x i32> @sub_v4i32_012u(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @sub_v4i32_012u( ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] ; CHECK-NEXT: ret <4 x i32> [[TMP4]] ; %a0 = extractelement <4 x i32> %a, i32 0 %a1 = extractelement <4 x i32> %a, i32 1 %a2 = extractelement <4 x i32> %a, i32 2 %a3 = extractelement <4 x i32> %a, i32 3 %a01 = sub i32 %a0, %a1 %a23 = sub i32 %a2, %a3 %b0 = extractelement <4 x i32> %b, i32 0 %b1 = extractelement <4 x i32> %b, i32 1 %b2 = extractelement <4 x i32> %b, i32 2 %b3 = extractelement <4 x i32> %b, i32 3 %b01 = sub i32 %b0, %b1 %b23 = sub i32 %b2, %b3 %hsub0 = insertelement <4 x i32> poison, i32 %a01, i32 0 %hsub1 = insertelement <4 x i32> %hsub0, i32 %a23, i32 1 %hsub2 = insertelement <4 x i32> %hsub1, i32 %b01, i32 2 %hsub3 = insertelement <4 x i32> %hsub2, i32 %b23, i32 3 %result = shufflevector <4 x i32> %hsub3, <4 x i32> %a, <4 x i32> ret <4 x i32> %result } define <4 x i32> @sub_v4i32_uu23(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @sub_v4i32_uu23( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[RESULT1:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <4 x i32> [[RESULT1]] ; %a0 = extractelement <4 x i32> %a, i32 0 %a1 = extractelement <4 x i32> %a, i32 1 %a2 = extractelement <4 x i32> %a, i32 2 %a3 = extractelement <4 x i32> %a, i32 3 %a01 = sub i32 %a0, %a1 %a23 = sub i32 %a2, %a3 %b0 = extractelement <4 x i32> %b, i32 0 %b1 = extractelement <4 x i32> %b, i32 1 %b2 = extractelement <4 x i32> %b, i32 2 %b3 = extractelement <4 x i32> %b, i32 3 %b01 = sub i32 %b0, %b1 %b23 = sub i32 %b2, %b3 %hsub0 = insertelement <4 x i32> poison, i32 %a01, i32 0 %hsub1 = insertelement <4 x i32> %hsub0, i32 %a23, i32 1 %hsub2 = insertelement <4 x i32> %hsub1, i32 %b01, i32 2 %hsub3 = insertelement <4 x i32> %hsub2, i32 %b23, i32 3 %result = shufflevector <4 x i32> %hsub3, <4 x i32> %a, <4 x i32> ret <4 x i32> %result } define <4 x i32> @sub_v4i32_01uu(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @sub_v4i32_01uu( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; %a0 = extractelement <4 x i32> %a, i32 0 %a1 = extractelement <4 x i32> %a, i32 1 %a2 = extractelement <4 x i32> %a, i32 2 %a3 = extractelement <4 x i32> %a, i32 3 %a01 = sub i32 %a0, %a1 %a23 = sub i32 %a2, %a3 %b0 = extractelement <4 x i32> %b, i32 0 %b1 = extractelement <4 x i32> %b, i32 1 %b2 = extractelement <4 x i32> %b, i32 2 %b3 = extractelement <4 x i32> %b, i32 3 %b01 = sub i32 %b0, %b1 %b23 = sub i32 %b2, %b3 %hsub0 = insertelement <4 x i32> poison, i32 %a01, i32 0 %hsub1 = insertelement <4 x i32> %hsub0, i32 %a23, i32 1 %hsub2 = insertelement <4 x i32> %hsub1, i32 %b01, i32 2 %hsub3 = insertelement <4 x i32> %hsub2, i32 %b23, i32 3 %result = shufflevector <4 x i32> %hsub3, <4 x i32> %a, <4 x i32> ret <4 x i32> %result } define <4 x i32> @sub_v4i32_32u0(<4 x i32> %a, <4 x i32> %b) { ; SSE2-LABEL: @sub_v4i32_32u0( ; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], <4 x i32> ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[A]], <4 x i32> ; SSE2-NEXT: [[RESULT1:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]] ; SSE2-NEXT: ret <4 x i32> [[RESULT1]] ; ; SSE4-LABEL: @sub_v4i32_32u0( ; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], <4 x i32> ; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[A]], <4 x i32> ; SSE4-NEXT: [[RESULT:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]] ; SSE4-NEXT: ret <4 x i32> [[RESULT]] ; ; AVX2-LABEL: @sub_v4i32_32u0( ; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], <4 x i32> ; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[A]], <4 x i32> ; AVX2-NEXT: [[RESULT:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]] ; AVX2-NEXT: ret <4 x i32> [[RESULT]] ; ; AVX512-LABEL: @sub_v4i32_32u0( ; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], <4 x i32> ; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[A]], <4 x i32> ; AVX512-NEXT: [[RESULT1:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]] ; AVX512-NEXT: ret <4 x i32> [[RESULT1]] ; %a0 = extractelement <4 x i32> %a, i32 0 %a1 = extractelement <4 x i32> %a, i32 1 %a2 = extractelement <4 x i32> %a, i32 2 %a3 = extractelement <4 x i32> %a, i32 3 %a01 = sub i32 %a0, %a1 %a23 = sub i32 %a2, %a3 %b0 = extractelement <4 x i32> %b, i32 0 %b1 = extractelement <4 x i32> %b, i32 1 %b2 = extractelement <4 x i32> %b, i32 2 %b3 = extractelement <4 x i32> %b, i32 3 %b01 = sub i32 %b0, %b1 %b23 = sub i32 %b2, %b3 %hsub0 = insertelement <4 x i32> poison, i32 %a01, i32 0 %hsub1 = insertelement <4 x i32> %hsub0, i32 %a23, i32 1 %hsub2 = insertelement <4 x i32> %hsub1, i32 %b01, i32 2 %hsub3 = insertelement <4 x i32> %hsub2, i32 %b23, i32 3 %result = shufflevector <4 x i32> %hsub3, <4 x i32> %a, <4 x i32> ret <4 x i32> %result } ; ; v8i32 ; define <8 x i32> @sub_v8i32_01234567(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: @sub_v8i32_01234567( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = sub <8 x i32> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <8 x i32> [[TMP3]] ; %a0 = extractelement <8 x i32> %a, i32 0 %a1 = extractelement <8 x i32> %a, i32 1 %a2 = extractelement <8 x i32> %a, i32 2 %a3 = extractelement <8 x i32> %a, i32 3 %a4 = extractelement <8 x i32> %a, i32 4 %a5 = extractelement <8 x i32> %a, i32 5 %a6 = extractelement <8 x i32> %a, i32 6 %a7 = extractelement <8 x i32> %a, i32 7 %a01 = sub i32 %a0, %a1 %a23 = sub i32 %a2, %a3 %a45 = sub i32 %a4, %a5 %a67 = sub i32 %a6, %a7 %b0 = extractelement <8 x i32> %b, i32 0 %b1 = extractelement <8 x i32> %b, i32 1 %b2 = extractelement <8 x i32> %b, i32 2 %b3 = extractelement <8 x i32> %b, i32 3 %b4 = extractelement <8 x i32> %b, i32 4 %b5 = extractelement <8 x i32> %b, i32 5 %b6 = extractelement <8 x i32> %b, i32 6 %b7 = extractelement <8 x i32> %b, i32 7 %b01 = sub i32 %b0, %b1 %b23 = sub i32 %b2, %b3 %b45 = sub i32 %b4, %b5 %b67 = sub i32 %b6, %b7 %hsub0 = insertelement <8 x i32> poison, i32 %a01, i32 0 %hsub1 = insertelement <8 x i32> %hsub0, i32 %a23, i32 1 %hsub2 = insertelement <8 x i32> %hsub1, i32 %b01, i32 2 %hsub3 = insertelement <8 x i32> %hsub2, i32 %b23, i32 3 %hsub4 = insertelement <8 x i32> %hsub3, i32 %a45, i32 4 %hsub5 = insertelement <8 x i32> %hsub4, i32 %a67, i32 5 %hsub6 = insertelement <8 x i32> %hsub5, i32 %b45, i32 6 %hsub7 = insertelement <8 x i32> %hsub6, i32 %b67, i32 7 %result = shufflevector <8 x i32> %hsub7, <8 x i32> %a, <8 x i32> ret <8 x i32> %result } define <8 x i32> @sub_v8i32_01234u67(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: @sub_v8i32_01234u67( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> ; CHECK-NEXT: [[RESULT:%.*]] = sub <8 x i32> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <8 x i32> [[RESULT]] ; %a0 = extractelement <8 x i32> %a, i32 0 %a1 = extractelement <8 x i32> %a, i32 1 %a2 = extractelement <8 x i32> %a, i32 2 %a3 = extractelement <8 x i32> %a, i32 3 %a4 = extractelement <8 x i32> %a, i32 4 %a5 = extractelement <8 x i32> %a, i32 5 %a6 = extractelement <8 x i32> %a, i32 6 %a7 = extractelement <8 x i32> %a, i32 7 %a01 = sub i32 %a0, %a1 %a23 = sub i32 %a2, %a3 %a45 = sub i32 %a4, %a5 %a67 = sub i32 %a6, %a7 %b0 = extractelement <8 x i32> %b, i32 0 %b1 = extractelement <8 x i32> %b, i32 1 %b2 = extractelement <8 x i32> %b, i32 2 %b3 = extractelement <8 x i32> %b, i32 3 %b4 = extractelement <8 x i32> %b, i32 4 %b5 = extractelement <8 x i32> %b, i32 5 %b6 = extractelement <8 x i32> %b, i32 6 %b7 = extractelement <8 x i32> %b, i32 7 %b01 = sub i32 %b0, %b1 %b23 = sub i32 %b2, %b3 %b45 = sub i32 %b4, %b5 %b67 = sub i32 %b6, %b7 %hsub0 = insertelement <8 x i32> poison, i32 %a01, i32 0 %hsub1 = insertelement <8 x i32> %hsub0, i32 %a23, i32 1 %hsub2 = insertelement <8 x i32> %hsub1, i32 %b01, i32 2 %hsub3 = insertelement <8 x i32> %hsub2, i32 %b23, i32 3 %hsub4 = insertelement <8 x i32> %hsub3, i32 %a45, i32 4 %hsub5 = insertelement <8 x i32> %hsub4, i32 %a67, i32 5 %hsub6 = insertelement <8 x i32> %hsub5, i32 %b45, i32 6 %hsub7 = insertelement <8 x i32> %hsub6, i32 %b67, i32 7 %result = shufflevector <8 x i32> %hsub7, <8 x i32> %a, <8 x i32> ret <8 x i32> %result } ; ; v4f32 ; define <4 x float> @sub_v4f32_0123(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @sub_v4f32_0123( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <4 x float> [[TMP3]] ; %a0 = extractelement <4 x float> %a, i32 0 %a1 = extractelement <4 x float> %a, i32 1 %a2 = extractelement <4 x float> %a, i32 2 %a3 = extractelement <4 x float> %a, i32 3 %a01 = fsub float %a0, %a1 %a23 = fsub float %a2, %a3 %b0 = extractelement <4 x float> %b, i32 0 %b1 = extractelement <4 x float> %b, i32 1 %b2 = extractelement <4 x float> %b, i32 2 %b3 = extractelement <4 x float> %b, i32 3 %b01 = fsub float %b0, %b1 %b23 = fsub float %b2, %b3 %hsub0 = insertelement <4 x float> poison, float %a01, i32 0 %hsub1 = insertelement <4 x float> %hsub0, float %a23, i32 1 %hsub2 = insertelement <4 x float> %hsub1, float %b01, i32 2 %hsub3 = insertelement <4 x float> %hsub2, float %b23, i32 3 %result = shufflevector <4 x float> %hsub3, <4 x float> %a, <4 x i32> ret <4 x float> %result } define <4 x float> @sub_v4f32_u123(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @sub_v4f32_u123( ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = fsub <4 x float> [[TMP2]], [[TMP3]] ; CHECK-NEXT: ret <4 x float> [[TMP4]] ; %a0 = extractelement <4 x float> %a, i32 0 %a1 = extractelement <4 x float> %a, i32 1 %a2 = extractelement <4 x float> %a, i32 2 %a3 = extractelement <4 x float> %a, i32 3 %a01 = fsub float %a0, %a1 %a23 = fsub float %a2, %a3 %b0 = extractelement <4 x float> %b, i32 0 %b1 = extractelement <4 x float> %b, i32 1 %b2 = extractelement <4 x float> %b, i32 2 %b3 = extractelement <4 x float> %b, i32 3 %b01 = fsub float %b0, %b1 %b23 = fsub float %b2, %b3 %hsub0 = insertelement <4 x float> poison, float %a01, i32 0 %hsub1 = insertelement <4 x float> %hsub0, float %a23, i32 1 %hsub2 = insertelement <4 x float> %hsub1, float %b01, i32 2 %hsub3 = insertelement <4 x float> %hsub2, float %b23, i32 3 %result = shufflevector <4 x float> %hsub3, <4 x float> %a, <4 x i32> ret <4 x float> %result } define <4 x float> @sub_v4f32_0u23(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @sub_v4f32_0u23( ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = fsub <4 x float> [[TMP2]], [[TMP3]] ; CHECK-NEXT: ret <4 x float> [[TMP4]] ; %a0 = extractelement <4 x float> %a, i32 0 %a1 = extractelement <4 x float> %a, i32 1 %a2 = extractelement <4 x float> %a, i32 2 %a3 = extractelement <4 x float> %a, i32 3 %a01 = fsub float %a0, %a1 %a23 = fsub float %a2, %a3 %b0 = extractelement <4 x float> %b, i32 0 %b1 = extractelement <4 x float> %b, i32 1 %b2 = extractelement <4 x float> %b, i32 2 %b3 = extractelement <4 x float> %b, i32 3 %b01 = fsub float %b0, %b1 %b23 = fsub float %b2, %b3 %hsub0 = insertelement <4 x float> poison, float %a01, i32 0 %hsub1 = insertelement <4 x float> %hsub0, float %a23, i32 1 %hsub2 = insertelement <4 x float> %hsub1, float %b01, i32 2 %hsub3 = insertelement <4 x float> %hsub2, float %b23, i32 3 %result = shufflevector <4 x float> %hsub3, <4 x float> %a, <4 x i32> ret <4 x float> %result } define <4 x float> @sub_v4f32_01u3(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @sub_v4f32_01u3( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> ; CHECK-NEXT: [[RESULT1:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <4 x float> [[RESULT1]] ; %a0 = extractelement <4 x float> %a, i32 0 %a1 = extractelement <4 x float> %a, i32 1 %a2 = extractelement <4 x float> %a, i32 2 %a3 = extractelement <4 x float> %a, i32 3 %a01 = fsub float %a0, %a1 %a23 = fsub float %a2, %a3 %b0 = extractelement <4 x float> %b, i32 0 %b1 = extractelement <4 x float> %b, i32 1 %b2 = extractelement <4 x float> %b, i32 2 %b3 = extractelement <4 x float> %b, i32 3 %b01 = fsub float %b0, %b1 %b23 = fsub float %b2, %b3 %hsub0 = insertelement <4 x float> poison, float %a01, i32 0 %hsub1 = insertelement <4 x float> %hsub0, float %a23, i32 1 %hsub2 = insertelement <4 x float> %hsub1, float %b01, i32 2 %hsub3 = insertelement <4 x float> %hsub2, float %b23, i32 3 %result = shufflevector <4 x float> %hsub3, <4 x float> %a, <4 x i32> ret <4 x float> %result } define <4 x float> @sub_v4f32_012u(<4 x float> %a, <4 x float> %b) { ; SSE2-LABEL: @sub_v4f32_012u( ; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> ; SSE2-NEXT: [[RESULT1:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]] ; SSE2-NEXT: ret <4 x float> [[RESULT1]] ; ; SSE4-LABEL: @sub_v4f32_012u( ; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> ; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> ; SSE4-NEXT: [[TMP4:%.*]] = fsub <4 x float> [[TMP2]], [[TMP3]] ; SSE4-NEXT: ret <4 x float> [[TMP4]] ; ; AVX2-LABEL: @sub_v4f32_012u( ; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> ; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> ; AVX2-NEXT: [[TMP4:%.*]] = fsub <4 x float> [[TMP2]], [[TMP3]] ; AVX2-NEXT: ret <4 x float> [[TMP4]] ; ; AVX512-LABEL: @sub_v4f32_012u( ; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> ; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> ; AVX512-NEXT: [[RESULT1:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]] ; AVX512-NEXT: ret <4 x float> [[RESULT1]] ; %a0 = extractelement <4 x float> %a, i32 0 %a1 = extractelement <4 x float> %a, i32 1 %a2 = extractelement <4 x float> %a, i32 2 %a3 = extractelement <4 x float> %a, i32 3 %a01 = fsub float %a0, %a1 %a23 = fsub float %a2, %a3 %b0 = extractelement <4 x float> %b, i32 0 %b1 = extractelement <4 x float> %b, i32 1 %b2 = extractelement <4 x float> %b, i32 2 %b3 = extractelement <4 x float> %b, i32 3 %b01 = fsub float %b0, %b1 %b23 = fsub float %b2, %b3 %hsub0 = insertelement <4 x float> poison, float %a01, i32 0 %hsub1 = insertelement <4 x float> %hsub0, float %a23, i32 1 %hsub2 = insertelement <4 x float> %hsub1, float %b01, i32 2 %hsub3 = insertelement <4 x float> %hsub2, float %b23, i32 3 %result = shufflevector <4 x float> %hsub3, <4 x float> %a, <4 x i32> ret <4 x float> %result } define <4 x float> @sub_v4f32_uu23(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @sub_v4f32_uu23( ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[RESULT1:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[RESULT2:%.*]] = fsub <4 x float> [[TMP2]], [[RESULT1]] ; CHECK-NEXT: ret <4 x float> [[RESULT2]] ; %a0 = extractelement <4 x float> %a, i32 0 %a1 = extractelement <4 x float> %a, i32 1 %a2 = extractelement <4 x float> %a, i32 2 %a3 = extractelement <4 x float> %a, i32 3 %a01 = fsub float %a0, %a1 %a23 = fsub float %a2, %a3 %b0 = extractelement <4 x float> %b, i32 0 %b1 = extractelement <4 x float> %b, i32 1 %b2 = extractelement <4 x float> %b, i32 2 %b3 = extractelement <4 x float> %b, i32 3 %b01 = fsub float %b0, %b1 %b23 = fsub float %b2, %b3 %hsub0 = insertelement <4 x float> poison, float %a01, i32 0 %hsub1 = insertelement <4 x float> %hsub0, float %a23, i32 1 %hsub2 = insertelement <4 x float> %hsub1, float %b01, i32 2 %hsub3 = insertelement <4 x float> %hsub2, float %b23, i32 3 %result = shufflevector <4 x float> %hsub3, <4 x float> %a, <4 x i32> ret <4 x float> %result } define <4 x float> @sub_v4f32_01uu(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @sub_v4f32_01uu( ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = fsub <4 x float> [[TMP2]], [[TMP3]] ; CHECK-NEXT: ret <4 x float> [[TMP4]] ; %a0 = extractelement <4 x float> %a, i32 0 %a1 = extractelement <4 x float> %a, i32 1 %a2 = extractelement <4 x float> %a, i32 2 %a3 = extractelement <4 x float> %a, i32 3 %a01 = fsub float %a0, %a1 %a23 = fsub float %a2, %a3 %b0 = extractelement <4 x float> %b, i32 0 %b1 = extractelement <4 x float> %b, i32 1 %b2 = extractelement <4 x float> %b, i32 2 %b3 = extractelement <4 x float> %b, i32 3 %b01 = fsub float %b0, %b1 %b23 = fsub float %b2, %b3 %hsub0 = insertelement <4 x float> poison, float %a01, i32 0 %hsub1 = insertelement <4 x float> %hsub0, float %a23, i32 1 %hsub2 = insertelement <4 x float> %hsub1, float %b01, i32 2 %hsub3 = insertelement <4 x float> %hsub2, float %b23, i32 3 %result = shufflevector <4 x float> %hsub3, <4 x float> %a, <4 x i32> ret <4 x float> %result } ; ; v8f32 ; define <8 x float> @sub_v8f32_01234567(<8 x float> %a, <8 x float> %b) { ; CHECK-LABEL: @sub_v8f32_01234567( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = fsub <8 x float> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <8 x float> [[TMP3]] ; %a0 = extractelement <8 x float> %a, i32 0 %a1 = extractelement <8 x float> %a, i32 1 %a2 = extractelement <8 x float> %a, i32 2 %a3 = extractelement <8 x float> %a, i32 3 %a4 = extractelement <8 x float> %a, i32 4 %a5 = extractelement <8 x float> %a, i32 5 %a6 = extractelement <8 x float> %a, i32 6 %a7 = extractelement <8 x float> %a, i32 7 %a01 = fsub float %a0, %a1 %a23 = fsub float %a2, %a3 %a45 = fsub float %a4, %a5 %a67 = fsub float %a6, %a7 %b0 = extractelement <8 x float> %b, i32 0 %b1 = extractelement <8 x float> %b, i32 1 %b2 = extractelement <8 x float> %b, i32 2 %b3 = extractelement <8 x float> %b, i32 3 %b4 = extractelement <8 x float> %b, i32 4 %b5 = extractelement <8 x float> %b, i32 5 %b6 = extractelement <8 x float> %b, i32 6 %b7 = extractelement <8 x float> %b, i32 7 %b01 = fsub float %b0, %b1 %b23 = fsub float %b2, %b3 %b45 = fsub float %b4, %b5 %b67 = fsub float %b6, %b7 %hsub0 = insertelement <8 x float> poison, float %a01, i32 0 %hsub1 = insertelement <8 x float> %hsub0, float %a23, i32 1 %hsub2 = insertelement <8 x float> %hsub1, float %b01, i32 2 %hsub3 = insertelement <8 x float> %hsub2, float %b23, i32 3 %hsub4 = insertelement <8 x float> %hsub3, float %a45, i32 4 %hsub5 = insertelement <8 x float> %hsub4, float %a67, i32 5 %hsub6 = insertelement <8 x float> %hsub5, float %b45, i32 6 %hsub7 = insertelement <8 x float> %hsub6, float %b67, i32 7 %result = shufflevector <8 x float> %hsub7, <8 x float> %a, <8 x i32> ret <8 x float> %result } define <8 x float> @sub_v8f32_012u4567(<8 x float> %a, <8 x float> %b) { ; SSE2-LABEL: @sub_v8f32_012u4567( ; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[B:%.*]], <8 x float> poison, <2 x i32> ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> ; SSE2-NEXT: [[TMP3:%.*]] = fsub <2 x float> [[TMP1]], [[TMP2]] ; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B]], <8 x i32> ; SSE2-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> ; SSE2-NEXT: [[TMP6:%.*]] = fsub <8 x float> [[TMP5]], [[TMP8]] ; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <8 x i32> ; SSE2-NEXT: [[RESULT:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> [[TMP7]], <8 x i32> ; SSE2-NEXT: ret <8 x float> [[RESULT]] ; ; SSE4-LABEL: @sub_v8f32_012u4567( ; SSE4-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> ; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> ; SSE4-NEXT: [[TMP9:%.*]] = fsub <8 x float> [[TMP8]], [[TMP5]] ; SSE4-NEXT: ret <8 x float> [[TMP9]] ; ; AVX-LABEL: @sub_v8f32_012u4567( ; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> ; AVX-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> ; AVX-NEXT: [[TMP7:%.*]] = fsub <8 x float> [[TMP5]], [[TMP6]] ; AVX-NEXT: ret <8 x float> [[TMP7]] ; %a0 = extractelement <8 x float> %a, i32 0 %a1 = extractelement <8 x float> %a, i32 1 %a2 = extractelement <8 x float> %a, i32 2 %a3 = extractelement <8 x float> %a, i32 3 %a4 = extractelement <8 x float> %a, i32 4 %a5 = extractelement <8 x float> %a, i32 5 %a6 = extractelement <8 x float> %a, i32 6 %a7 = extractelement <8 x float> %a, i32 7 %a01 = fsub float %a0, %a1 %a23 = fsub float %a2, %a3 %a45 = fsub float %a4, %a5 %a67 = fsub float %a6, %a7 %b0 = extractelement <8 x float> %b, i32 0 %b1 = extractelement <8 x float> %b, i32 1 %b2 = extractelement <8 x float> %b, i32 2 %b3 = extractelement <8 x float> %b, i32 3 %b4 = extractelement <8 x float> %b, i32 4 %b5 = extractelement <8 x float> %b, i32 5 %b6 = extractelement <8 x float> %b, i32 6 %b7 = extractelement <8 x float> %b, i32 7 %b01 = fsub float %b0, %b1 %b23 = fsub float %b2, %b3 %b45 = fsub float %b4, %b5 %b67 = fsub float %b6, %b7 %hsub0 = insertelement <8 x float> poison, float %a01, i32 0 %hsub1 = insertelement <8 x float> %hsub0, float %a23, i32 1 %hsub2 = insertelement <8 x float> %hsub1, float %b01, i32 2 %hsub3 = insertelement <8 x float> %hsub2, float %b23, i32 3 %hsub4 = insertelement <8 x float> %hsub3, float %a45, i32 4 %hsub5 = insertelement <8 x float> %hsub4, float %a67, i32 5 %hsub6 = insertelement <8 x float> %hsub5, float %b45, i32 6 %hsub7 = insertelement <8 x float> %hsub6, float %b67, i32 7 %result = shufflevector <8 x float> %hsub7, <8 x float> %a, <8 x i32> ret <8 x float> %result } define <8 x float> @sub_v8f32_76u43210(<8 x float> %a, <8 x float> %b) { ; SSE2-LABEL: @sub_v8f32_76u43210( ; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> ; SSE2-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[TMP1]], [[TMP2]] ; SSE2-NEXT: [[B0:%.*]] = extractelement <8 x float> [[B:%.*]], i64 0 ; SSE2-NEXT: [[B1:%.*]] = extractelement <8 x float> [[B]], i64 1 ; SSE2-NEXT: [[B01:%.*]] = fsub float [[B0]], [[B1]] ; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> ; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[B]], <8 x float> poison, <2 x i32> ; SSE2-NEXT: [[TMP6:%.*]] = fsub <2 x float> [[TMP4]], [[TMP5]] ; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> ; SSE2-NEXT: [[HSUB4:%.*]] = insertelement <8 x float> [[TMP7]], float [[B01]], i64 4 ; SSE2-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> ; SSE2-NEXT: [[RESULT:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> [[HSUB4]], <8 x i32> ; SSE2-NEXT: ret <8 x float> [[RESULT]] ; ; SSE4-LABEL: @sub_v8f32_76u43210( ; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[B:%.*]], <8 x float> [[A:%.*]], <8 x i32> ; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[B]], <8 x float> [[A]], <8 x i32> ; SSE4-NEXT: [[TMP6:%.*]] = fsub <8 x float> [[TMP4]], [[TMP5]] ; SSE4-NEXT: ret <8 x float> [[TMP6]] ; ; AVX-LABEL: @sub_v8f32_76u43210( ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[B:%.*]], <8 x float> [[A:%.*]], <8 x i32> ; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[B]], <8 x float> [[A]], <8 x i32> ; AVX-NEXT: [[RESULT:%.*]] = fsub <8 x float> [[TMP1]], [[TMP2]] ; AVX-NEXT: ret <8 x float> [[RESULT]] ; %a0 = extractelement <8 x float> %a, i32 0 %a1 = extractelement <8 x float> %a, i32 1 %a2 = extractelement <8 x float> %a, i32 2 %a3 = extractelement <8 x float> %a, i32 3 %a4 = extractelement <8 x float> %a, i32 4 %a5 = extractelement <8 x float> %a, i32 5 %a6 = extractelement <8 x float> %a, i32 6 %a7 = extractelement <8 x float> %a, i32 7 %a01 = fsub float %a0, %a1 %a23 = fsub float %a2, %a3 %a45 = fsub float %a4, %a5 %a67 = fsub float %a6, %a7 %b0 = extractelement <8 x float> %b, i32 0 %b1 = extractelement <8 x float> %b, i32 1 %b2 = extractelement <8 x float> %b, i32 2 %b3 = extractelement <8 x float> %b, i32 3 %b4 = extractelement <8 x float> %b, i32 4 %b5 = extractelement <8 x float> %b, i32 5 %b6 = extractelement <8 x float> %b, i32 6 %b7 = extractelement <8 x float> %b, i32 7 %b01 = fsub float %b0, %b1 %b23 = fsub float %b2, %b3 %b45 = fsub float %b4, %b5 %b67 = fsub float %b6, %b7 %hsub0 = insertelement <8 x float> poison, float %a01, i32 0 %hsub1 = insertelement <8 x float> %hsub0, float %a23, i32 1 %hsub2 = insertelement <8 x float> %hsub1, float %a45, i32 2 %hsub3 = insertelement <8 x float> %hsub2, float %a67, i32 3 %hsub4 = insertelement <8 x float> %hsub3, float %b01, i32 4 %hsub5 = insertelement <8 x float> %hsub4, float %b23, i32 5 %hsub6 = insertelement <8 x float> %hsub5, float %b45, i32 6 %hsub7 = insertelement <8 x float> %hsub6, float %b67, i32 7 %result = shufflevector <8 x float> %hsub7, <8 x float> %a, <8 x i32> ret <8 x float> %result } ; ; v2f64 ; define <2 x double> @sub_v2f64_01(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: @sub_v2f64_01( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x double> [[TMP3]] ; %a0 = extractelement <2 x double> %a, i32 0 %a1 = extractelement <2 x double> %a, i32 1 %a01 = fsub double %a0, %a1 %b0 = extractelement <2 x double> %b, i32 0 %b1 = extractelement <2 x double> %b, i32 1 %b01 = fsub double %b0, %b1 %hsub0 = insertelement <2 x double> poison, double %a01, i32 0 %hsub1 = insertelement <2 x double> %hsub0, double %b01, i32 1 %result = shufflevector <2 x double> %hsub1, <2 x double> %a, <2 x i32> ret <2 x double> %result } define <2 x double> @sub_v2f64_u1(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: @sub_v2f64_u1( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[RESULT1:%.*]] = fsub <2 x double> [[TMP1]], [[B]] ; CHECK-NEXT: ret <2 x double> [[RESULT1]] ; %a0 = extractelement <2 x double> %a, i32 0 %a1 = extractelement <2 x double> %a, i32 1 %a01 = fsub double %a0, %a1 %b0 = extractelement <2 x double> %b, i32 0 %b1 = extractelement <2 x double> %b, i32 1 %b01 = fsub double %b0, %b1 %hsub0 = insertelement <2 x double> poison, double %a01, i32 0 %hsub1 = insertelement <2 x double> %hsub0, double %b01, i32 1 %result = shufflevector <2 x double> %hsub1, <2 x double> %a, <2 x i32> ret <2 x double> %result } define <2 x double> @sub_v2f64_0u(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: @sub_v2f64_0u( ; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <2 x double> [[TMP1:%.*]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[RESULT1:%.*]] = fsub <2 x double> [[TMP1]], [[RESULT]] ; CHECK-NEXT: ret <2 x double> [[RESULT1]] ; %a0 = extractelement <2 x double> %a, i32 0 %a1 = extractelement <2 x double> %a, i32 1 %a01 = fsub double %a0, %a1 %b0 = extractelement <2 x double> %b, i32 0 %b1 = extractelement <2 x double> %b, i32 1 %b01 = fsub double %b0, %b1 %hsub0 = insertelement <2 x double> poison, double %a01, i32 0 %hsub1 = insertelement <2 x double> %hsub0, double %b01, i32 1 %result = shufflevector <2 x double> %hsub1, <2 x double> %a, <2 x i32> ret <2 x double> %result } ; ; v4f64 ; define <4 x double> @sub_v4f64_0123(<4 x double> %a, <4 x double> %b) { ; CHECK-LABEL: @sub_v4f64_0123( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <4 x double> [[TMP3]] ; %a0 = extractelement <4 x double> %a, i32 0 %a1 = extractelement <4 x double> %a, i32 1 %a2 = extractelement <4 x double> %a, i32 2 %a3 = extractelement <4 x double> %a, i32 3 %a01 = fsub double %a0, %a1 %a23 = fsub double %a2, %a3 %b0 = extractelement <4 x double> %b, i32 0 %b1 = extractelement <4 x double> %b, i32 1 %b2 = extractelement <4 x double> %b, i32 2 %b3 = extractelement <4 x double> %b, i32 3 %b01 = fsub double %b0, %b1 %b23 = fsub double %b2, %b3 %hsub0 = insertelement <4 x double> poison, double %a01, i32 0 %hsub1 = insertelement <4 x double> %hsub0, double %b01, i32 1 %hsub2 = insertelement <4 x double> %hsub1, double %a23, i32 2 %hsub3 = insertelement <4 x double> %hsub2, double %b23, i32 3 %result = shufflevector <4 x double> %hsub3, <4 x double> %a, <4 x i32> ret <4 x double> %result } define <4 x double> @sub_v4f64_u123(<4 x double> %a, <4 x double> %b) { ; SSE2-LABEL: @sub_v4f64_u123( ; SSE2-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B:%.*]], i64 2 ; SSE2-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 ; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[A:%.*]], <2 x i32> ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[A]], <2 x i32> ; SSE2-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] ; SSE2-NEXT: [[B23:%.*]] = fsub double [[B2]], [[B3]] ; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> ; SSE2-NEXT: [[RESULT:%.*]] = insertelement <4 x double> [[TMP4]], double [[B23]], i64 3 ; SSE2-NEXT: ret <4 x double> [[RESULT]] ; ; SSE4-LABEL: @sub_v4f64_u123( ; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> [[A:%.*]], <4 x i32> ; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[A]], <4 x i32> ; SSE4-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP2]], [[TMP4]] ; SSE4-NEXT: ret <4 x double> [[TMP3]] ; ; AVX-LABEL: @sub_v4f64_u123( ; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> [[A:%.*]], <4 x i32> ; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[A]], <4 x i32> ; AVX-NEXT: [[TMP4:%.*]] = fsub <4 x double> [[TMP2]], [[TMP3]] ; AVX-NEXT: ret <4 x double> [[TMP4]] ; %a0 = extractelement <4 x double> %a, i32 0 %a1 = extractelement <4 x double> %a, i32 1 %a2 = extractelement <4 x double> %a, i32 2 %a3 = extractelement <4 x double> %a, i32 3 %a01 = fsub double %a0, %a1 %a23 = fsub double %a2, %a3 %b0 = extractelement <4 x double> %b, i32 0 %b1 = extractelement <4 x double> %b, i32 1 %b2 = extractelement <4 x double> %b, i32 2 %b3 = extractelement <4 x double> %b, i32 3 %b01 = fsub double %b0, %b1 %b23 = fsub double %b2, %b3 %hsub0 = insertelement <4 x double> poison, double %a01, i32 0 %hsub1 = insertelement <4 x double> %hsub0, double %b01, i32 1 %hsub2 = insertelement <4 x double> %hsub1, double %a23, i32 2 %hsub3 = insertelement <4 x double> %hsub2, double %b23, i32 3 %result = shufflevector <4 x double> %hsub3, <4 x double> %a, <4 x i32> ret <4 x double> %result } define <4 x double> @sub_v4f64_0u23(<4 x double> %a, <4 x double> %b) { ; SSE2-LABEL: @sub_v4f64_0u23( ; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> poison, <2 x i32> ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <2 x i32> ; SSE2-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] ; SSE2-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B:%.*]], i64 2 ; SSE2-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 ; SSE2-NEXT: [[B23:%.*]] = fsub double [[B2]], [[B3]] ; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> ; SSE2-NEXT: [[RESULT:%.*]] = insertelement <4 x double> [[TMP4]], double [[B23]], i64 3 ; SSE2-NEXT: ret <4 x double> [[RESULT]] ; ; SSE4-LABEL: @sub_v4f64_0u23( ; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> ; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> ; SSE4-NEXT: [[TMP4:%.*]] = fsub <4 x double> [[TMP2]], [[TMP3]] ; SSE4-NEXT: ret <4 x double> [[TMP4]] ; ; AVX-LABEL: @sub_v4f64_0u23( ; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> ; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> ; AVX-NEXT: [[TMP4:%.*]] = fsub <4 x double> [[TMP2]], [[TMP3]] ; AVX-NEXT: ret <4 x double> [[TMP4]] ; %a0 = extractelement <4 x double> %a, i32 0 %a1 = extractelement <4 x double> %a, i32 1 %a2 = extractelement <4 x double> %a, i32 2 %a3 = extractelement <4 x double> %a, i32 3 %a01 = fsub double %a0, %a1 %a23 = fsub double %a2, %a3 %b0 = extractelement <4 x double> %b, i32 0 %b1 = extractelement <4 x double> %b, i32 1 %b2 = extractelement <4 x double> %b, i32 2 %b3 = extractelement <4 x double> %b, i32 3 %b01 = fsub double %b0, %b1 %b23 = fsub double %b2, %b3 %hsub0 = insertelement <4 x double> poison, double %a01, i32 0 %hsub1 = insertelement <4 x double> %hsub0, double %b01, i32 1 %hsub2 = insertelement <4 x double> %hsub1, double %a23, i32 2 %hsub3 = insertelement <4 x double> %hsub2, double %b23, i32 3 %result = shufflevector <4 x double> %hsub3, <4 x double> %a, <4 x i32> ret <4 x double> %result } define <4 x double> @sub_v4f64_01u3(<4 x double> %a, <4 x double> %b) { ; SSE2-LABEL: @sub_v4f64_01u3( ; SSE2-NEXT: [[B2:%.*]] = extractelement <4 x double> [[B:%.*]], i64 2 ; SSE2-NEXT: [[B3:%.*]] = extractelement <4 x double> [[B]], i64 3 ; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B]], <2 x i32> ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> ; SSE2-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] ; SSE2-NEXT: [[B23:%.*]] = fsub double [[B2]], [[B3]] ; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> ; SSE2-NEXT: [[RESULT:%.*]] = insertelement <4 x double> [[TMP4]], double [[B23]], i64 3 ; SSE2-NEXT: ret <4 x double> [[RESULT]] ; ; SSE4-LABEL: @sub_v4f64_01u3( ; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> ; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> ; SSE4-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP2]], [[TMP4]] ; SSE4-NEXT: ret <4 x double> [[TMP3]] ; ; AVX-LABEL: @sub_v4f64_01u3( ; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> ; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> ; AVX-NEXT: [[TMP4:%.*]] = fsub <4 x double> [[TMP2]], [[TMP3]] ; AVX-NEXT: ret <4 x double> [[TMP4]] ; %a0 = extractelement <4 x double> %a, i32 0 %a1 = extractelement <4 x double> %a, i32 1 %a2 = extractelement <4 x double> %a, i32 2 %a3 = extractelement <4 x double> %a, i32 3 %a01 = fsub double %a0, %a1 %a23 = fsub double %a2, %a3 %b0 = extractelement <4 x double> %b, i32 0 %b1 = extractelement <4 x double> %b, i32 1 %b2 = extractelement <4 x double> %b, i32 2 %b3 = extractelement <4 x double> %b, i32 3 %b01 = fsub double %b0, %b1 %b23 = fsub double %b2, %b3 %hsub0 = insertelement <4 x double> poison, double %a01, i32 0 %hsub1 = insertelement <4 x double> %hsub0, double %b01, i32 1 %hsub2 = insertelement <4 x double> %hsub1, double %a23, i32 2 %hsub3 = insertelement <4 x double> %hsub2, double %b23, i32 3 %result = shufflevector <4 x double> %hsub3, <4 x double> %a, <4 x i32> ret <4 x double> %result } define <4 x double> @sub_v4f64_012u(<4 x double> %a, <4 x double> %b) { ; SSE2-LABEL: @sub_v4f64_012u( ; SSE2-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A:%.*]], i64 2 ; SSE2-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i64 3 ; SSE2-NEXT: [[A23:%.*]] = fsub double [[A2]], [[A3]] ; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B:%.*]], <2 x i32> ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> ; SSE2-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] ; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> ; SSE2-NEXT: [[RESULT:%.*]] = insertelement <4 x double> [[TMP4]], double [[A23]], i64 2 ; SSE2-NEXT: ret <4 x double> [[RESULT]] ; ; SSE4-LABEL: @sub_v4f64_012u( ; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> ; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> ; SSE4-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP2]], [[TMP4]] ; SSE4-NEXT: ret <4 x double> [[TMP3]] ; ; AVX-LABEL: @sub_v4f64_012u( ; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> ; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> ; AVX-NEXT: [[TMP4:%.*]] = fsub <4 x double> [[TMP2]], [[TMP3]] ; AVX-NEXT: ret <4 x double> [[TMP4]] ; %a0 = extractelement <4 x double> %a, i32 0 %a1 = extractelement <4 x double> %a, i32 1 %a2 = extractelement <4 x double> %a, i32 2 %a3 = extractelement <4 x double> %a, i32 3 %a01 = fsub double %a0, %a1 %a23 = fsub double %a2, %a3 %b0 = extractelement <4 x double> %b, i32 0 %b1 = extractelement <4 x double> %b, i32 1 %b2 = extractelement <4 x double> %b, i32 2 %b3 = extractelement <4 x double> %b, i32 3 %b01 = fsub double %b0, %b1 %b23 = fsub double %b2, %b3 %hsub0 = insertelement <4 x double> poison, double %a01, i32 0 %hsub1 = insertelement <4 x double> %hsub0, double %b01, i32 1 %hsub2 = insertelement <4 x double> %hsub1, double %a23, i32 2 %hsub3 = insertelement <4 x double> %hsub2, double %b23, i32 3 %result = shufflevector <4 x double> %hsub3, <4 x double> %a, <4 x i32> ret <4 x double> %result } define <4 x double> @sub_v4f64_uu23(<4 x double> %a, <4 x double> %b) { ; SSE2-LABEL: @sub_v4f64_uu23( ; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> ; SSE2-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] ; SSE2-NEXT: [[RESULT1:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> ; SSE2-NEXT: ret <4 x double> [[RESULT1]] ; ; SSE4-LABEL: @sub_v4f64_uu23( ; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> ; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> ; SSE4-NEXT: [[RESULT1:%.*]] = fsub <4 x double> [[TMP2]], [[TMP3]] ; SSE4-NEXT: ret <4 x double> [[RESULT1]] ; ; AVX-LABEL: @sub_v4f64_uu23( ; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> ; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> ; AVX-NEXT: [[RESULT1:%.*]] = fsub <4 x double> [[TMP2]], [[TMP3]] ; AVX-NEXT: ret <4 x double> [[RESULT1]] ; %a0 = extractelement <4 x double> %a, i32 0 %a1 = extractelement <4 x double> %a, i32 1 %a2 = extractelement <4 x double> %a, i32 2 %a3 = extractelement <4 x double> %a, i32 3 %a01 = fsub double %a0, %a1 %a23 = fsub double %a2, %a3 %b0 = extractelement <4 x double> %b, i32 0 %b1 = extractelement <4 x double> %b, i32 1 %b2 = extractelement <4 x double> %b, i32 2 %b3 = extractelement <4 x double> %b, i32 3 %b01 = fsub double %b0, %b1 %b23 = fsub double %b2, %b3 %hsub0 = insertelement <4 x double> poison, double %a01, i32 0 %hsub1 = insertelement <4 x double> %hsub0, double %b01, i32 1 %hsub2 = insertelement <4 x double> %hsub1, double %a23, i32 2 %hsub3 = insertelement <4 x double> %hsub2, double %b23, i32 3 %result = shufflevector <4 x double> %hsub3, <4 x double> %a, <4 x i32> ret <4 x double> %result } define <4 x double> @sub_v4f64_01uu(<4 x double> %a, <4 x double> %b) { ; SSE2-LABEL: @sub_v4f64_01uu( ; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> ; SSE2-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] ; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> ; SSE2-NEXT: ret <4 x double> [[TMP4]] ; ; SSE4-LABEL: @sub_v4f64_01uu( ; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> ; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> ; SSE4-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP2]], [[TMP4]] ; SSE4-NEXT: ret <4 x double> [[TMP3]] ; ; AVX-LABEL: @sub_v4f64_01uu( ; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> ; AVX-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> ; AVX-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP2]], [[TMP4]] ; AVX-NEXT: ret <4 x double> [[TMP3]] ; %a0 = extractelement <4 x double> %a, i32 0 %a1 = extractelement <4 x double> %a, i32 1 %a2 = extractelement <4 x double> %a, i32 2 %a3 = extractelement <4 x double> %a, i32 3 %a01 = fsub double %a0, %a1 %a23 = fsub double %a2, %a3 %b0 = extractelement <4 x double> %b, i32 0 %b1 = extractelement <4 x double> %b, i32 1 %b2 = extractelement <4 x double> %b, i32 2 %b3 = extractelement <4 x double> %b, i32 3 %b01 = fsub double %b0, %b1 %b23 = fsub double %b2, %b3 %hsub0 = insertelement <4 x double> poison, double %a01, i32 0 %hsub1 = insertelement <4 x double> %hsub0, double %b01, i32 1 %hsub2 = insertelement <4 x double> %hsub1, double %a23, i32 2 %hsub3 = insertelement <4 x double> %hsub2, double %b23, i32 3 %result = shufflevector <4 x double> %hsub3, <4 x double> %a, <4 x i32> ret <4 x double> %result } define <4 x double> @sub_v4f64_32u0(<4 x double> %a, <4 x double> %b) { ; SSE2-LABEL: @sub_v4f64_32u0( ; SSE2-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i64 0 ; SSE2-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i64 1 ; SSE2-NEXT: [[A01:%.*]] = fsub double [[A0]], [[A1]] ; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <2 x i32> ; SSE2-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP1]], [[TMP2]] ; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> ; SSE2-NEXT: [[RESULT:%.*]] = insertelement <4 x double> [[TMP4]], double [[A01]], i64 3 ; SSE2-NEXT: ret <4 x double> [[RESULT]] ; ; SSE4-LABEL: @sub_v4f64_32u0( ; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> [[A:%.*]], <4 x i32> ; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[A]], <4 x i32> ; SSE4-NEXT: [[RESULT:%.*]] = fsub <4 x double> [[TMP1]], [[TMP2]] ; SSE4-NEXT: ret <4 x double> [[RESULT]] ; ; AVX-LABEL: @sub_v4f64_32u0( ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> [[A:%.*]], <4 x i32> ; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[A]], <4 x i32> ; AVX-NEXT: [[RESULT:%.*]] = fsub <4 x double> [[TMP1]], [[TMP2]] ; AVX-NEXT: ret <4 x double> [[RESULT]] ; %a0 = extractelement <4 x double> %a, i32 0 %a1 = extractelement <4 x double> %a, i32 1 %a2 = extractelement <4 x double> %a, i32 2 %a3 = extractelement <4 x double> %a, i32 3 %a01 = fsub double %a0, %a1 %a23 = fsub double %a2, %a3 %b0 = extractelement <4 x double> %b, i32 0 %b1 = extractelement <4 x double> %b, i32 1 %b2 = extractelement <4 x double> %b, i32 2 %b3 = extractelement <4 x double> %b, i32 3 %b01 = fsub double %b0, %b1 %b23 = fsub double %b2, %b3 %hsub0 = insertelement <4 x double> poison, double %a01, i32 0 %hsub1 = insertelement <4 x double> %hsub0, double %a23, i32 1 %hsub2 = insertelement <4 x double> %hsub1, double %b01, i32 2 %hsub3 = insertelement <4 x double> %hsub2, double %b23, i32 3 %result = shufflevector <4 x double> %hsub3, <4 x double> %a, <4 x i32> ret <4 x double> %result }