[X86] Add xop/avx2 shifts to X86TargetLowering::isBinOp

Allows shuffle combining through per-element shift nodes

This exposed a number of issues with shuffle combining with target intrinsics that are lowered to nodes later during legalization - in particular shuffle combining and SimplifyDemandedVectorElts were being called after canonicalizeShuffleWithBinOps, meaning that shuffles didn't have a chance to be combined away before the shuffle(binop(x,y)) -> binop(shuffle(x),shuffle(y)) fold.
This commit is contained in:
Simon Pilgrim 2022-01-13 17:29:41 +00:00
parent e2c78f99c4
commit 08212dbc44
3 changed files with 15 additions and 18 deletions

View File

@ -33081,6 +33081,11 @@ bool X86TargetLowering::isBinOp(unsigned Opcode) const {
case X86ISD::FMAX: case X86ISD::FMAX:
case X86ISD::FMIN: case X86ISD::FMIN:
case X86ISD::FANDN: case X86ISD::FANDN:
case X86ISD::VPSHA:
case X86ISD::VPSHL:
case X86ISD::VSHLV:
case X86ISD::VSRLV:
case X86ISD::VSRAV:
return true; return true;
} }
@ -38931,9 +38936,6 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
if (SDValue R = combineCommutableSHUFP(N, VT, DL, DAG)) if (SDValue R = combineCommutableSHUFP(N, VT, DL, DAG))
return R; return R;
if (SDValue R = canonicalizeShuffleWithBinOps(N, DAG, DL))
return R;
// Handle specific target shuffles. // Handle specific target shuffles.
switch (Opcode) { switch (Opcode) {
case X86ISD::MOVDDUP: { case X86ISD::MOVDDUP: {
@ -39898,6 +39900,12 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
if (TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, if (TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
DCI)) DCI))
return SDValue(N, 0); return SDValue(N, 0);
// Canonicalize SHUFFLE(BINOP(X,Y)) -> BINOP(SHUFFLE(X),SHUFFLE(Y)).
// Perform this after other shuffle combines to allow inner shuffles to be
// combined away first.
if (SDValue BinOp = canonicalizeShuffleWithBinOps(Op, DAG, SDLoc(N)))
return BinOp;
} }
return SDValue(); return SDValue();

View File

@ -113,7 +113,7 @@ define <4 x i32> @demandedelts_vpsravd(<4 x i32> %a0, <4 x i32> %a1) {
define <4 x i64> @demandedelts_vpsrlvq(<4 x i64> %a0, <4 x i64> %a1) { define <4 x i64> @demandedelts_vpsrlvq(<4 x i64> %a0, <4 x i64> %a1) {
; CHECK-LABEL: demandedelts_vpsrlvq: ; CHECK-LABEL: demandedelts_vpsrlvq:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0 ; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0
; CHECK-NEXT: retq ; CHECK-NEXT: retq
%shuffle = shufflevector <4 x i64> %a1, <4 x i64> undef, <4 x i32> zeroinitializer %shuffle = shufflevector <4 x i64> %a1, <4 x i64> undef, <4 x i32> zeroinitializer
@ -123,16 +123,13 @@ define <4 x i64> @demandedelts_vpsrlvq(<4 x i64> %a0, <4 x i64> %a1) {
} }
; ;
; isBinOp Handling (TODO) ; isBinOp Handling
; ;
define <4 x i32> @binop_shuffle_vpsllvd(<4 x i32> %a0, <4 x i32> %a1) { define <4 x i32> @binop_shuffle_vpsllvd(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: binop_shuffle_vpsllvd: ; CHECK-LABEL: binop_shuffle_vpsllvd:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0]
; CHECK-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
; CHECK-NEXT: retq ; CHECK-NEXT: retq
%shuffle0 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> %shuffle0 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%shuffle1 = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> %shuffle1 = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@ -144,10 +141,7 @@ define <4 x i32> @binop_shuffle_vpsllvd(<4 x i32> %a0, <4 x i32> %a1) {
define <8 x i32> @binop_shuffle_vpsravd(<8 x i32> %a0, <8 x i32> %a1) { define <8 x i32> @binop_shuffle_vpsravd(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK-LABEL: binop_shuffle_vpsravd: ; CHECK-LABEL: binop_shuffle_vpsravd:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
; CHECK-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[3,2,1,0,7,6,5,4]
; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
; CHECK-NEXT: retq ; CHECK-NEXT: retq
%shuffle0 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> %shuffle0 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
%shuffle1 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> %shuffle1 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
@ -159,10 +153,7 @@ define <8 x i32> @binop_shuffle_vpsravd(<8 x i32> %a0, <8 x i32> %a1) {
define <4 x i64> @binop_shuffle_vpsrlvq(<4 x i64> %a0, <4 x i64> %a1) { define <4 x i64> @binop_shuffle_vpsrlvq(<4 x i64> %a0, <4 x i64> %a1) {
; CHECK-LABEL: binop_shuffle_vpsrlvq: ; CHECK-LABEL: binop_shuffle_vpsrlvq:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[3,2,1,0]
; CHECK-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
; CHECK-NEXT: retq ; CHECK-NEXT: retq
%shuffle0 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> %shuffle0 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%shuffle1 = shufflevector <4 x i64> %a1, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> %shuffle1 = shufflevector <4 x i64> %a1, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>

View File

@ -31,16 +31,13 @@ define <4 x i32> @demandedelts_vpshld(<4 x i32> %a0, <4 x i32> %a1) {
} }
; ;
; isBinOp Handling (TODO) ; isBinOp Handling
; ;
define <8 x i16> @binop_shuffle_vpshaw(<8 x i16> %a0, <8 x i16> %a1) { define <8 x i16> @binop_shuffle_vpshaw(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: binop_shuffle_vpshaw: ; CHECK-LABEL: binop_shuffle_vpshaw:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
; CHECK-NEXT: vpshlw %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpshlw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
; CHECK-NEXT: retq ; CHECK-NEXT: retq
%shuffle0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7> %shuffle0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
%shuffle1 = shufflevector <8 x i16> %a1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7> %shuffle1 = shufflevector <8 x i16> %a1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
@ -49,6 +46,7 @@ define <8 x i16> @binop_shuffle_vpshaw(<8 x i16> %a0, <8 x i16> %a1) {
ret <8 x i16> %result ret <8 x i16> %result
} }
; TODO - canonicalizeShuffleWithBinOps - handle scaled shuffle masks.
define <2 x i64> @binop_shuffle_vpshlq(<2 x i64> %a0, <2 x i64> %a1) { define <2 x i64> @binop_shuffle_vpshlq(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK-LABEL: binop_shuffle_vpshlq: ; CHECK-LABEL: binop_shuffle_vpshlq:
; CHECK: # %bb.0: ; CHECK: # %bb.0: