forked from OSchip/llvm-project
[X86][AVX] isHorizontalBinOp - relax lane-crossing limits for AVX1-only targets.
Permit lane-crossing post shuffles on AVX1 targets as long as every element comes from the same source lane, which for v8f32/v4f64 cases can be efficiently lowered with the LowerShuffleAsLanePermuteAnd* style methods.
This commit is contained in:
parent
98827feddb
commit
36750ba5bd
|
@ -10661,6 +10661,35 @@ static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef<int> Mask) {
|
||||||
return isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(), Mask);
|
return isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(), Mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Test whether elements in each LaneSizeInBits lane in this shuffle mask come
|
||||||
|
/// from multiple lanes - this is different to isLaneCrossingShuffleMask to
|
||||||
|
/// better support 'repeated mask + lane permute' style shuffles.
|
||||||
|
static bool isMultiLaneShuffleMask(unsigned LaneSizeInBits,
|
||||||
|
unsigned ScalarSizeInBits,
|
||||||
|
ArrayRef<int> Mask) {
|
||||||
|
assert(LaneSizeInBits && ScalarSizeInBits &&
|
||||||
|
(LaneSizeInBits % ScalarSizeInBits) == 0 &&
|
||||||
|
"Illegal shuffle lane size");
|
||||||
|
int NumElts = Mask.size();
|
||||||
|
int NumEltsPerLane = LaneSizeInBits / ScalarSizeInBits;
|
||||||
|
int NumLanes = NumElts / NumEltsPerLane;
|
||||||
|
if (NumLanes > 1) {
|
||||||
|
for (int i = 0; i != NumLanes; ++i) {
|
||||||
|
int SrcLane = -1;
|
||||||
|
for (int j = 0; j != NumEltsPerLane; ++j) {
|
||||||
|
int M = Mask[(i * NumEltsPerLane) + j];
|
||||||
|
if (M < 0)
|
||||||
|
continue;
|
||||||
|
int Lane = (M % NumElts) / NumEltsPerLane;
|
||||||
|
if (SrcLane >= 0 && SrcLane != Lane)
|
||||||
|
return true;
|
||||||
|
SrcLane = Lane;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/// Test whether a shuffle mask is equivalent within each sub-lane.
|
/// Test whether a shuffle mask is equivalent within each sub-lane.
|
||||||
///
|
///
|
||||||
/// This checks a shuffle mask to see if it is performing the same
|
/// This checks a shuffle mask to see if it is performing the same
|
||||||
|
@ -44598,12 +44627,9 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, SelectionDAG &DAG,
|
||||||
if (IsIdentityPostShuffle)
|
if (IsIdentityPostShuffle)
|
||||||
PostShuffleMask.clear();
|
PostShuffleMask.clear();
|
||||||
|
|
||||||
// Avoid 128-bit lane crossing if pre-AVX2 and FP (integer will split), unless
|
// Avoid 128-bit multi lane shuffles if pre-AVX2 and FP (integer will split).
|
||||||
// the shuffle can widen to shuffle entire lanes, which should still be quick.
|
|
||||||
if (!IsIdentityPostShuffle && !Subtarget.hasAVX2() && VT.isFloatingPoint() &&
|
if (!IsIdentityPostShuffle && !Subtarget.hasAVX2() && VT.isFloatingPoint() &&
|
||||||
isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(),
|
isMultiLaneShuffleMask(128, VT.getScalarSizeInBits(), PostShuffleMask))
|
||||||
PostShuffleMask) &&
|
|
||||||
!canScaleShuffleElements(PostShuffleMask, 2))
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Assume a SingleSource HOP if we only shuffle one input and don't need to
|
// Assume a SingleSource HOP if we only shuffle one input and don't need to
|
||||||
|
|
|
@ -64,11 +64,9 @@ define <8 x float> @hadd_reverse_v8f32(<8 x float> %a0, <8 x float> %a1) {
|
||||||
;
|
;
|
||||||
; AVX1-LABEL: hadd_reverse_v8f32:
|
; AVX1-LABEL: hadd_reverse_v8f32:
|
||||||
; AVX1: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX1-NEXT: vshufps {{.*#+}} ymm2 = ymm0[3,1],ymm1[3,1],ymm0[7,5],ymm1[7,5]
|
; AVX1-NEXT: vhaddps %ymm1, %ymm0, %ymm0
|
||||||
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm2[2,3,0,1]
|
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
|
||||||
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,0],ymm1[2,0],ymm0[6,4],ymm1[6,4]
|
|
||||||
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
||||||
; AVX1-NEXT: vaddps %ymm0, %ymm2, %ymm0
|
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: hadd_reverse_v8f32:
|
; AVX2-LABEL: hadd_reverse_v8f32:
|
||||||
|
|
Loading…
Reference in New Issue