forked from OSchip/llvm-project
[X86][AVX] combineSignExtendInReg - promote mask arithmetic before v4i64 canonicalization
We rely on the combine (sext_in_reg (v4i64 a/sext (v4i32 x)), v4i1) -> (v4i64 sext (v4i32 sext_in_reg (v4i32 x, ExtraVT))) to avoid complex v4i64 ashr codegen, but doing so prevents v4i64 comparison mask promotion, so ensure we attempt to promote before canonicalizing the (hopefully now redundant sext_in_reg). Helps with the poor codegen in PR45808.
This commit is contained in:
parent
751da4d596
commit
b8a725274c
|
@ -44615,7 +44615,7 @@ static SDValue combineSignExtendInReg(SDNode *N, SelectionDAG &DAG,
|
|||
//(sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) ->
|
||||
// (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT)))
|
||||
if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND ||
|
||||
N0.getOpcode() == ISD::SIGN_EXTEND)) {
|
||||
N0.getOpcode() == ISD::SIGN_EXTEND)) {
|
||||
SDValue N00 = N0.getOperand(0);
|
||||
|
||||
// EXTLOAD has a better solution on AVX2,
|
||||
|
@ -44624,9 +44624,14 @@ static SDValue combineSignExtendInReg(SDNode *N, SelectionDAG &DAG,
|
|||
if (!ISD::isNormalLoad(N00.getNode()))
|
||||
return SDValue();
|
||||
|
||||
// Attempt to promote any comparison mask ops before moving the
|
||||
// SIGN_EXTEND_INREG in the way.
|
||||
if (SDValue Promote = PromoteMaskArithmetic(N0.getNode(), DAG, Subtarget))
|
||||
return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Promote, N1);
|
||||
|
||||
if (N00.getValueType() == MVT::v4i32 && ExtraVT.getSizeInBits() < 128) {
|
||||
SDValue Tmp = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32,
|
||||
N00, N1);
|
||||
SDValue Tmp =
|
||||
DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, N00, N1);
|
||||
return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i64, Tmp);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -72,12 +72,10 @@ define <4 x i64> @PR45808(<4 x i64> %0, <4 x i64> %1) {
|
|||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vpackssdw %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpslld $31, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpmovsxdq %xmm2, %xmm3
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
|
||||
; AVX1-NEXT: vpmovsxdq %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
|
||||
; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpsllq $63, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
|
||||
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
|
@ -85,11 +83,8 @@ define <4 x i64> @PR45808(<4 x i64> %0, <4 x i64> %1) {
|
|||
; AVX2-LABEL: PR45808:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
|
||||
; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpslld $31, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpmovsxdq %xmm2, %ymm2
|
||||
; AVX2-NEXT: vpxor {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpsllq $63, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
%3 = icmp sgt <4 x i64> %0, %1
|
||||
|
|
Loading…
Reference in New Issue