forked from OSchip/llvm-project
[X86][SSE] There is no zmm addsubpd/addsubps instruction.
Replace the assert in combineShuffleToAddSub with an early out. llvm-svn: 256922
This commit is contained in:
parent
eaabd64a11
commit
267163e713
|
@ -23684,9 +23684,13 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
|
||||||
/// the operands which explicitly discard the lanes which are unused by this
|
/// the operands which explicitly discard the lanes which are unused by this
|
||||||
/// operation to try to flow through the rest of the combiner the fact that
|
/// operation to try to flow through the rest of the combiner the fact that
|
||||||
/// they're unused.
|
/// they're unused.
|
||||||
static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) {
|
static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget *Subtarget,
|
||||||
|
SelectionDAG &DAG) {
|
||||||
SDLoc DL(N);
|
SDLoc DL(N);
|
||||||
EVT VT = N->getValueType(0);
|
EVT VT = N->getValueType(0);
|
||||||
|
if ((!Subtarget->hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
|
||||||
|
(!Subtarget->hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)))
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
// We only handle target-independent shuffles.
|
// We only handle target-independent shuffles.
|
||||||
// FIXME: It would be easy and harmless to use the target shuffle mask
|
// FIXME: It would be easy and harmless to use the target shuffle mask
|
||||||
|
@ -23728,12 +23732,6 @@ static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) {
|
||||||
isShuffleEquivalent(V1, V2, Mask, {0, 9, 2, 11, 4, 13, 6, 15})))
|
isShuffleEquivalent(V1, V2, Mask, {0, 9, 2, 11, 4, 13, 6, 15})))
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
// Only specific types are legal at this point, assert so we notice if and
|
|
||||||
// when these change.
|
|
||||||
assert((VT == MVT::v4f32 || VT == MVT::v2f64 || VT == MVT::v8f32 ||
|
|
||||||
VT == MVT::v4f64) &&
|
|
||||||
"Unknown vector type encountered!");
|
|
||||||
|
|
||||||
return DAG.getNode(X86ISD::ADDSUB, DL, VT, LHS, RHS);
|
return DAG.getNode(X86ISD::ADDSUB, DL, VT, LHS, RHS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -23753,8 +23751,8 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
|
||||||
|
|
||||||
// If we have legalized the vector types, look for blends of FADD and FSUB
|
// If we have legalized the vector types, look for blends of FADD and FSUB
|
||||||
// nodes that we can fuse into an ADDSUB node.
|
// nodes that we can fuse into an ADDSUB node.
|
||||||
if (TLI.isTypeLegal(VT) && Subtarget->hasSSE3())
|
if (TLI.isTypeLegal(VT))
|
||||||
if (SDValue AddSub = combineShuffleToAddSub(N, DAG))
|
if (SDValue AddSub = combineShuffleToAddSub(N, Subtarget, DAG))
|
||||||
return AddSub;
|
return AddSub;
|
||||||
|
|
||||||
// Combine 256-bit vector shuffles. This is only profitable when in AVX mode
|
// Combine 256-bit vector shuffles. This is only profitable when in AVX mode
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefix=SSE
|
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefix=SSE
|
||||||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=AVX
|
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
|
||||||
|
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512
|
||||||
|
|
||||||
; Test ADDSUB ISel patterns.
|
; Test ADDSUB ISel patterns.
|
||||||
|
|
||||||
|
@ -101,6 +102,62 @@ define <2 x double> @test4(<2 x double> %A, <2 x double> %B) #0 {
|
||||||
ret <2 x double> %vecinit2
|
ret <2 x double> %vecinit2
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define <16 x float> @test5(<16 x float> %A, <16 x float> %B) {
|
||||||
|
; SSE-LABEL: test5:
|
||||||
|
; SSE: # BB#0:
|
||||||
|
; SSE-NEXT: addsubps %xmm4, %xmm0
|
||||||
|
; SSE-NEXT: addsubps %xmm5, %xmm1
|
||||||
|
; SSE-NEXT: addsubps %xmm6, %xmm2
|
||||||
|
; SSE-NEXT: addsubps %xmm7, %xmm3
|
||||||
|
; SSE-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX1-LABEL: test5:
|
||||||
|
; AVX1: # BB#0:
|
||||||
|
; AVX1-NEXT: vaddsubps %ymm2, %ymm0, %ymm0
|
||||||
|
; AVX1-NEXT: vaddsubps %ymm3, %ymm1, %ymm1
|
||||||
|
; AVX1-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512-LABEL: test5:
|
||||||
|
; AVX512: # BB#0:
|
||||||
|
; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm2
|
||||||
|
; AVX512-NEXT: vsubps %zmm1, %zmm0, %zmm0
|
||||||
|
; AVX512-NEXT: vmovdqa32 {{.*#+}} zmm1 = [0,17,2,19,4,21,6,23,8,25,10,27,12,29,14,31]
|
||||||
|
; AVX512-NEXT: vpermt2ps %zmm2, %zmm1, %zmm0
|
||||||
|
; AVX512-NEXT: retq
|
||||||
|
%add = fadd <16 x float> %A, %B
|
||||||
|
%sub = fsub <16 x float> %A, %B
|
||||||
|
%vecinit2 = shufflevector <16 x float> %sub, <16 x float> %add, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
|
||||||
|
ret <16 x float> %vecinit2
|
||||||
|
}
|
||||||
|
|
||||||
|
define <8 x double> @test6(<8 x double> %A, <8 x double> %B) {
|
||||||
|
; SSE-LABEL: test6:
|
||||||
|
; SSE: # BB#0:
|
||||||
|
; SSE-NEXT: addsubpd %xmm4, %xmm0
|
||||||
|
; SSE-NEXT: addsubpd %xmm5, %xmm1
|
||||||
|
; SSE-NEXT: addsubpd %xmm6, %xmm2
|
||||||
|
; SSE-NEXT: addsubpd %xmm7, %xmm3
|
||||||
|
; SSE-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX1-LABEL: test6:
|
||||||
|
; AVX1: # BB#0:
|
||||||
|
; AVX1-NEXT: vaddsubpd %ymm2, %ymm0, %ymm0
|
||||||
|
; AVX1-NEXT: vaddsubpd %ymm3, %ymm1, %ymm1
|
||||||
|
; AVX1-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512-LABEL: test6:
|
||||||
|
; AVX512: # BB#0:
|
||||||
|
; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm2
|
||||||
|
; AVX512-NEXT: vsubpd %zmm1, %zmm0, %zmm0
|
||||||
|
; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,9,2,11,4,13,6,15]
|
||||||
|
; AVX512-NEXT: vpermt2pd %zmm2, %zmm1, %zmm0
|
||||||
|
; AVX512-NEXT: retq
|
||||||
|
%add = fadd <8 x double> %A, %B
|
||||||
|
%sub = fsub <8 x double> %A, %B
|
||||||
|
%vecinit2 = shufflevector <8 x double> %sub, <8 x double> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
|
||||||
|
ret <8 x double> %vecinit2
|
||||||
|
}
|
||||||
|
|
||||||
define <4 x float> @test1b(<4 x float> %A, <4 x float>* %B) {
|
define <4 x float> @test1b(<4 x float> %A, <4 x float>* %B) {
|
||||||
; SSE-LABEL: test1b:
|
; SSE-LABEL: test1b:
|
||||||
; SSE: # BB#0:
|
; SSE: # BB#0:
|
||||||
|
|
Loading…
Reference in New Issue