forked from OSchip/llvm-project
[X86] Remove PALIGNR/VALIGN handling from combineBitcastForMaskedOp and move to isel patterns instead. Prefer 128-bit VALIGND/VALIGNQ over PALIGNR during lowering when possible.
llvm-svn: 317299
This commit is contained in:
parent
2fda36a18e
commit
333897ec31
|
@ -10716,10 +10716,16 @@ static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
|
||||
// Try to use byte rotation instructions.
|
||||
// Its more profitable for pre-SSSE3 to use shuffles/unpacks.
|
||||
if (Subtarget.hasSSSE3())
|
||||
if (Subtarget.hasSSSE3()) {
|
||||
if (Subtarget.hasVLX())
|
||||
if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v2i64, V1, V2,
|
||||
Mask, Subtarget, DAG))
|
||||
return Rotate;
|
||||
|
||||
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
|
||||
DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG))
|
||||
return Rotate;
|
||||
}
|
||||
|
||||
// If we have direct support for blends, we should lower by decomposing into
|
||||
// a permute. That will be faster than the domain cross.
|
||||
|
@ -11016,10 +11022,16 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
|
||||
// Try to use byte rotation instructions.
|
||||
// Its more profitable for pre-SSSE3 to use shuffles/unpacks.
|
||||
if (Subtarget.hasSSSE3())
|
||||
if (Subtarget.hasSSSE3()) {
|
||||
if (Subtarget.hasVLX())
|
||||
if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v4i32, V1, V2,
|
||||
Mask, Subtarget, DAG))
|
||||
return Rotate;
|
||||
|
||||
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
|
||||
DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG))
|
||||
return Rotate;
|
||||
}
|
||||
|
||||
// Assume that a single SHUFPS is faster than an alternative sequence of
|
||||
// multiple instructions (even if the CPU has a domain penalty).
|
||||
|
@ -30674,26 +30686,6 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG,
|
|||
|
||||
unsigned Opcode = Op.getOpcode();
|
||||
switch (Opcode) {
|
||||
case X86ISD::PALIGNR:
|
||||
// PALIGNR can be converted to VALIGND/Q for 128-bit vectors.
|
||||
if (!VT.is128BitVector())
|
||||
return false;
|
||||
Opcode = X86ISD::VALIGN;
|
||||
LLVM_FALLTHROUGH;
|
||||
case X86ISD::VALIGN: {
|
||||
if (EltVT != MVT::i32 && EltVT != MVT::i64)
|
||||
return false;
|
||||
uint64_t Imm = Op.getConstantOperandVal(2);
|
||||
MVT OpEltVT = Op.getSimpleValueType().getVectorElementType();
|
||||
unsigned ShiftAmt = Imm * OpEltVT.getSizeInBits();
|
||||
unsigned EltSize = EltVT.getSizeInBits();
|
||||
// Make sure we can represent the same shift with the new VT.
|
||||
if ((ShiftAmt % EltSize) != 0)
|
||||
return false;
|
||||
Imm = ShiftAmt / EltSize;
|
||||
return BitcastAndCombineShuffle(Opcode, Op.getOperand(0), Op.getOperand(1),
|
||||
DAG.getConstant(Imm, DL, MVT::i8));
|
||||
}
|
||||
case X86ISD::SHUF128: {
|
||||
if (EltVT.getSizeInBits() != 32 && EltVT.getSizeInBits() != 64)
|
||||
return false;
|
||||
|
|
|
@ -8911,6 +8911,123 @@ defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr" ,
|
|||
avx512vl_i8_info, avx512vl_i8_info>,
|
||||
EVEX_CD8<8, CD8VF>;
|
||||
|
||||
// Fragments to help convert valignq into masked valignd. Or valignq/valignd
|
||||
// into vpalignr.
|
||||
def ValignqImm32XForm : SDNodeXForm<imm, [{
|
||||
return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
|
||||
}]>;
|
||||
def ValignqImm8XForm : SDNodeXForm<imm, [{
|
||||
return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
|
||||
}]>;
|
||||
def ValigndImm8XForm : SDNodeXForm<imm, [{
|
||||
return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
|
||||
}]>;
|
||||
|
||||
multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
|
||||
X86VectorVTInfo From, X86VectorVTInfo To,
|
||||
SDNodeXForm ImmXForm> {
|
||||
def : Pat<(To.VT (vselect To.KRCWM:$mask,
|
||||
(bitconvert
|
||||
(From.VT (OpNode From.RC:$src1, From.RC:$src2,
|
||||
imm:$src3))),
|
||||
To.RC:$src0)),
|
||||
(!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
|
||||
To.RC:$src1, To.RC:$src2,
|
||||
(ImmXForm imm:$src3))>;
|
||||
|
||||
def : Pat<(To.VT (vselect To.KRCWM:$mask,
|
||||
(bitconvert
|
||||
(From.VT (OpNode From.RC:$src1, From.RC:$src2,
|
||||
imm:$src3))),
|
||||
To.ImmAllZerosV)),
|
||||
(!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
|
||||
To.RC:$src1, To.RC:$src2,
|
||||
(ImmXForm imm:$src3))>;
|
||||
|
||||
def : Pat<(To.VT (vselect To.KRCWM:$mask,
|
||||
(bitconvert
|
||||
(From.VT (OpNode From.RC:$src1,
|
||||
(bitconvert (To.LdFrag addr:$src2)),
|
||||
imm:$src3))),
|
||||
To.RC:$src0)),
|
||||
(!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
|
||||
To.RC:$src1, addr:$src2,
|
||||
(ImmXForm imm:$src3))>;
|
||||
|
||||
def : Pat<(To.VT (vselect To.KRCWM:$mask,
|
||||
(bitconvert
|
||||
(From.VT (OpNode From.RC:$src1,
|
||||
(bitconvert (To.LdFrag addr:$src2)),
|
||||
imm:$src3))),
|
||||
To.ImmAllZerosV)),
|
||||
(!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
|
||||
To.RC:$src1, addr:$src2,
|
||||
(ImmXForm imm:$src3))>;
|
||||
}
|
||||
|
||||
multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
|
||||
X86VectorVTInfo From,
|
||||
X86VectorVTInfo To,
|
||||
SDNodeXForm ImmXForm> :
|
||||
avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
|
||||
def : Pat<(From.VT (OpNode From.RC:$src1,
|
||||
(bitconvert (To.VT (X86VBroadcast
|
||||
(To.ScalarLdFrag addr:$src2)))),
|
||||
imm:$src3)),
|
||||
(!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
|
||||
(ImmXForm imm:$src3))>;
|
||||
|
||||
def : Pat<(To.VT (vselect To.KRCWM:$mask,
|
||||
(bitconvert
|
||||
(From.VT (OpNode From.RC:$src1,
|
||||
(bitconvert
|
||||
(To.VT (X86VBroadcast
|
||||
(To.ScalarLdFrag addr:$src2)))),
|
||||
imm:$src3))),
|
||||
To.RC:$src0)),
|
||||
(!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
|
||||
To.RC:$src1, addr:$src2,
|
||||
(ImmXForm imm:$src3))>;
|
||||
|
||||
def : Pat<(To.VT (vselect To.KRCWM:$mask,
|
||||
(bitconvert
|
||||
(From.VT (OpNode From.RC:$src1,
|
||||
(bitconvert
|
||||
(To.VT (X86VBroadcast
|
||||
(To.ScalarLdFrag addr:$src2)))),
|
||||
imm:$src3))),
|
||||
To.ImmAllZerosV)),
|
||||
(!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
|
||||
To.RC:$src1, addr:$src2,
|
||||
(ImmXForm imm:$src3))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX512] in {
|
||||
// For 512-bit we lower to the widest element type we can. So we only need
|
||||
// to handle converting valignq to valignd.
|
||||
defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
|
||||
v16i32_info, ValignqImm32XForm>;
|
||||
}
|
||||
|
||||
let Predicates = [HasVLX] in {
|
||||
// For 128-bit we lower to the widest element type we can. So we only need
|
||||
// to handle converting valignq to valignd.
|
||||
defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
|
||||
v4i32x_info, ValignqImm32XForm>;
|
||||
// For 256-bit we lower to the widest element type we can. So we only need
|
||||
// to handle converting valignq to valignd.
|
||||
defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
|
||||
v8i32x_info, ValignqImm32XForm>;
|
||||
}
|
||||
|
||||
let Predicates = [HasVLX, HasBWI] in {
|
||||
// We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
|
||||
defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
|
||||
v16i8x_info, ValignqImm8XForm>;
|
||||
defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
|
||||
v16i8x_info, ValigndImm8XForm>;
|
||||
}
|
||||
|
||||
defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw" ,
|
||||
avx512vl_i16_info, avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
|
||||
|
||||
|
|
|
@ -4712,8 +4712,8 @@ declare <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32>, <8 x i32>, i32,
|
|||
define <8 x i32>@test_int_x86_avx512_mask_valign_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
|
||||
; CHECK-LABEL: test_int_x86_avx512_mask_valign_d_256:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: valignd $6, %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf3,0x7d,0x28,0x03,0xd9,0x06]
|
||||
; CHECK-NEXT: ## ymm3 = ymm1[6,7],ymm0[0,1,2,3,4,5]
|
||||
; CHECK-NEXT: valignq $3, %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf3,0xfd,0x28,0x03,0xd9,0x03]
|
||||
; CHECK-NEXT: ## ymm3 = ymm1[3],ymm0[0,1,2]
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: valignd $6, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x03,0xd1,0x06]
|
||||
; CHECK-NEXT: ## ymm2 {%k1} = ymm1[6,7],ymm0[0,1,2,3,4,5]
|
||||
|
|
Loading…
Reference in New Issue