[CostModel][X86] We don't need a scale factor for SLM extract costs

D74976 will handle larger vector types, but since SLM doesn't support AVX+ then we will always be extracting from 128-bit vectors so don't need to scale the cost.
This commit is contained in:
Simon Pilgrim 2020-02-24 14:22:48 +00:00
parent 7efabe5c7d
commit b82438872b
4 changed files with 35 additions and 74 deletions

View File

@ -2423,7 +2423,7 @@ int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
MVT MScalarTy = LT.second.getScalarType();
if (ST->isSLM())
if (auto *Entry = CostTableLookup(SLMCostTbl, ISD, MScalarTy))
return LT.first * Entry->Cost;
return Entry->Cost;
}
// Add to the base cost if we know that the extracted element of a vector is

View File

@ -533,9 +533,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256,
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 12, i32 13>
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> <i32 14, i32 15>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@ -557,9 +557,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256,
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 28, i32 29>
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> <i32 30, i32 31>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
@ -1328,9 +1328,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 28, i32 29>
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> <i32 30, i32 31>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 6, i32 7, i32 8, i32 9>
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> <i32 16, i32 17, i32 18, i32 19>

View File

@ -245,12 +245,12 @@ define i32 @extract_i64(i32 %arg) {
; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0
; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3
; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0
; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3
; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4
; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7
; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; GLM-LABEL: 'extract_i64'
@ -408,14 +408,14 @@ define i32 @extract_i32(i32 %arg) {
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0
; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4
; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0
; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8
; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; GLM-LABEL: 'extract_i32'
@ -581,17 +581,17 @@ define i32 @extract_i16(i32 %arg) {
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0
; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8
; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0
; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8
; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24
; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; GLM-LABEL: 'extract_i16'
@ -787,21 +787,21 @@ define i32 @extract_i8(i32 %arg) {
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0
; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7
; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8
; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15
; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24
; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0
; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7
; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8
; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15
; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24
; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48
; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63
; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; GLM-LABEL: 'extract_i8'

View File

@ -247,50 +247,11 @@ define <8 x float> @fneg_fabs(<8 x float> %a) {
}
define <8 x i32> @sext_zext(<8 x i16> %a) {
; SSE-LABEL: @sext_zext(
; SSE-NEXT: [[TMP1:%.*]] = sext <8 x i16> [[A:%.*]] to <8 x i32>
; SSE-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[A]] to <8 x i32>
; SSE-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; SSE-NEXT: ret <8 x i32> [[R7]]
;
; SLM-LABEL: @sext_zext(
; SLM-NEXT: [[A0:%.*]] = extractelement <8 x i16> [[A:%.*]], i32 0
; SLM-NEXT: [[A1:%.*]] = extractelement <8 x i16> [[A]], i32 1
; SLM-NEXT: [[A2:%.*]] = extractelement <8 x i16> [[A]], i32 2
; SLM-NEXT: [[A3:%.*]] = extractelement <8 x i16> [[A]], i32 3
; SLM-NEXT: [[A4:%.*]] = extractelement <8 x i16> [[A]], i32 4
; SLM-NEXT: [[A5:%.*]] = extractelement <8 x i16> [[A]], i32 5
; SLM-NEXT: [[A6:%.*]] = extractelement <8 x i16> [[A]], i32 6
; SLM-NEXT: [[A7:%.*]] = extractelement <8 x i16> [[A]], i32 7
; SLM-NEXT: [[AB0:%.*]] = sext i16 [[A0]] to i32
; SLM-NEXT: [[AB1:%.*]] = sext i16 [[A1]] to i32
; SLM-NEXT: [[AB2:%.*]] = sext i16 [[A2]] to i32
; SLM-NEXT: [[AB3:%.*]] = sext i16 [[A3]] to i32
; SLM-NEXT: [[AB4:%.*]] = zext i16 [[A4]] to i32
; SLM-NEXT: [[AB5:%.*]] = zext i16 [[A5]] to i32
; SLM-NEXT: [[AB6:%.*]] = zext i16 [[A6]] to i32
; SLM-NEXT: [[AB7:%.*]] = zext i16 [[A7]] to i32
; SLM-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[AB0]], i32 0
; SLM-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
; SLM-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2
; SLM-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3
; SLM-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4
; SLM-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
; SLM-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
; SLM-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
; SLM-NEXT: ret <8 x i32> [[R7]]
;
; AVX-LABEL: @sext_zext(
; AVX-NEXT: [[TMP1:%.*]] = sext <8 x i16> [[A:%.*]] to <8 x i32>
; AVX-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[A]] to <8 x i32>
; AVX-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX-NEXT: ret <8 x i32> [[R7]]
;
; AVX512-LABEL: @sext_zext(
; AVX512-NEXT: [[TMP1:%.*]] = sext <8 x i16> [[A:%.*]] to <8 x i32>
; AVX512-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[A]] to <8 x i32>
; AVX512-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX512-NEXT: ret <8 x i32> [[R7]]
; CHECK-LABEL: @sext_zext(
; CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i16> [[A:%.*]] to <8 x i32>
; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[A]] to <8 x i32>
; CHECK-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: ret <8 x i32> [[R7]]
;
%a0 = extractelement <8 x i16> %a, i32 0
%a1 = extractelement <8 x i16> %a, i32 1