forked from OSchip/llvm-project
[X86][SSE] combineExtractWithShuffle - extract(bitcast(broadcast(x))) --> x
Removes some unnecessary gpr<-->fpu traffic
This commit is contained in:
parent
58991ba773
commit
5340434c94
|
@ -37102,11 +37102,24 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
|
|||
|
||||
SDValue SrcBC = peekThroughBitcasts(Src);
|
||||
|
||||
// Handle extract(broadcast(scalar_value)), it doesn't matter what index is.
|
||||
// Handle extract(bitcast(broadcast(scalar_value))).
|
||||
if (X86ISD::VBROADCAST == SrcBC.getOpcode()) {
|
||||
SDValue SrcOp = SrcBC.getOperand(0);
|
||||
if (SrcOp.getValueSizeInBits() == VT.getSizeInBits())
|
||||
return DAG.getBitcast(VT, SrcOp);
|
||||
|
||||
EVT SrcOpVT = SrcOp.getValueType();
|
||||
if (SrcOpVT.isScalarInteger() && VT.isInteger() &&
|
||||
(SrcOpVT.getSizeInBits() % SrcSVT.getSizeInBits()) == 0) {
|
||||
unsigned Scale = SrcOpVT.getSizeInBits() / SrcSVT.getSizeInBits();
|
||||
unsigned Offset = IdxC.urem(Scale) * SrcSVT.getSizeInBits();
|
||||
// TODO support non-zero offsets.
|
||||
if (Offset == 0) {
|
||||
SrcOp = DAG.getZExtOrTrunc(SrcOp, dl, SrcVT.getScalarType());
|
||||
SrcOp = DAG.getZExtOrTrunc(SrcOp, dl, VT);
|
||||
return SrcOp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we're extracting a single element from a broadcast load and there are
|
||||
|
@ -37126,7 +37139,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
|
|||
}
|
||||
}
|
||||
|
||||
// Handle extract(scalar_to_vector(scalar_value)) for integers.
|
||||
// Handle extract(bitcast(scalar_to_vector(scalar_value))) for integers.
|
||||
// TODO: Move to DAGCombine?
|
||||
if (SrcBC.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isInteger() &&
|
||||
SrcBC.getValueType().isInteger() &&
|
||||
|
|
|
@ -77,9 +77,7 @@ define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind {
|
|||
; AVX512-NEXT: movzbl %cl, %eax
|
||||
; AVX512-NEXT: shrl $2, %eax
|
||||
; AVX512-NEXT: andl $3, %eax
|
||||
; AVX512-NEXT: vpbroadcastq %rax, %xmm0
|
||||
; AVX512-NEXT: andl $3, %ecx
|
||||
; AVX512-NEXT: vpextrb $8, %xmm0, %eax
|
||||
; AVX512-NEXT: addb %cl, %al
|
||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512-NEXT: retq
|
||||
|
@ -124,9 +122,7 @@ define i4 @bitcast_v8i16_to_v2i4(<8 x i16> %a0) nounwind {
|
|||
; AVX512-NEXT: kmovd %k0, %ecx
|
||||
; AVX512-NEXT: movzbl %cl, %eax
|
||||
; AVX512-NEXT: shrl $4, %eax
|
||||
; AVX512-NEXT: vpbroadcastq %rax, %xmm0
|
||||
; AVX512-NEXT: andl $15, %ecx
|
||||
; AVX512-NEXT: vpextrb $8, %xmm0, %eax
|
||||
; AVX512-NEXT: addb %cl, %al
|
||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512-NEXT: retq
|
||||
|
@ -214,9 +210,7 @@ define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind {
|
|||
; AVX512-NEXT: movzbl %cl, %eax
|
||||
; AVX512-NEXT: shrl $2, %eax
|
||||
; AVX512-NEXT: andl $3, %eax
|
||||
; AVX512-NEXT: vpbroadcastq %rax, %xmm0
|
||||
; AVX512-NEXT: andl $3, %ecx
|
||||
; AVX512-NEXT: vpextrb $8, %xmm0, %eax
|
||||
; AVX512-NEXT: addb %cl, %al
|
||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
|
@ -264,9 +258,7 @@ define i4 @bitcast_v8i32_to_v2i4(<8 x i32> %a0) nounwind {
|
|||
; AVX512-NEXT: kmovd %k0, %ecx
|
||||
; AVX512-NEXT: movzbl %cl, %eax
|
||||
; AVX512-NEXT: shrl $4, %eax
|
||||
; AVX512-NEXT: vpbroadcastq %rax, %xmm0
|
||||
; AVX512-NEXT: andl $15, %ecx
|
||||
; AVX512-NEXT: vpextrb $8, %xmm0, %eax
|
||||
; AVX512-NEXT: addb %cl, %al
|
||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
|
@ -451,9 +443,7 @@ define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind {
|
|||
; AVX512-NEXT: kmovd %k0, %ecx
|
||||
; AVX512-NEXT: movzbl %cl, %eax
|
||||
; AVX512-NEXT: shrl $4, %eax
|
||||
; AVX512-NEXT: vpbroadcastq %rax, %xmm0
|
||||
; AVX512-NEXT: andl $15, %ecx
|
||||
; AVX512-NEXT: vpextrb $8, %xmm0, %eax
|
||||
; AVX512-NEXT: addb %cl, %al
|
||||
; AVX512-NEXT: # kill: def $al killed $al killed $eax
|
||||
; AVX512-NEXT: vzeroupper
|
||||
|
|
Loading…
Reference in New Issue