forked from OSchip/llvm-project
[X86][SSE] Add shuffle combining support for ISD::ANY_EXTEND_VECTOR_INREG
Reuses what we already have in place for ISD::ZERO_EXTEND_VECTOR_INREG just with a different sentinel llvm-svn: 361734
This commit is contained in:
parent
7b883b7ed0
commit
a044410f37
|
@ -1202,7 +1202,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
|
|||
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
|
||||
LLVM_FALLTHROUGH;
|
||||
CASE_PMOVZX(PMOVZXBW, m)
|
||||
DecodeZeroExtendMask(8, 16, getRegOperandNumElts(MI, 16, 0), ShuffleMask);
|
||||
DecodeZeroExtendMask(8, 16, getRegOperandNumElts(MI, 16, 0), false,
|
||||
ShuffleMask);
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
|
||||
|
@ -1210,7 +1211,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
|
|||
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
|
||||
LLVM_FALLTHROUGH;
|
||||
CASE_PMOVZX(PMOVZXBD, m)
|
||||
DecodeZeroExtendMask(8, 32, getRegOperandNumElts(MI, 32, 0), ShuffleMask);
|
||||
DecodeZeroExtendMask(8, 32, getRegOperandNumElts(MI, 32, 0), false,
|
||||
ShuffleMask);
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
|
||||
|
@ -1218,7 +1220,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
|
|||
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
|
||||
LLVM_FALLTHROUGH;
|
||||
CASE_PMOVZX(PMOVZXBQ, m)
|
||||
DecodeZeroExtendMask(8, 64, getRegOperandNumElts(MI, 64, 0), ShuffleMask);
|
||||
DecodeZeroExtendMask(8, 64, getRegOperandNumElts(MI, 64, 0), false,
|
||||
ShuffleMask);
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
|
||||
|
@ -1226,7 +1229,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
|
|||
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
|
||||
LLVM_FALLTHROUGH;
|
||||
CASE_PMOVZX(PMOVZXWD, m)
|
||||
DecodeZeroExtendMask(16, 32, getRegOperandNumElts(MI, 32, 0), ShuffleMask);
|
||||
DecodeZeroExtendMask(16, 32, getRegOperandNumElts(MI, 32, 0), false,
|
||||
ShuffleMask);
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
|
||||
|
@ -1234,7 +1238,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
|
|||
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
|
||||
LLVM_FALLTHROUGH;
|
||||
CASE_PMOVZX(PMOVZXWQ, m)
|
||||
DecodeZeroExtendMask(16, 64, getRegOperandNumElts(MI, 64, 0), ShuffleMask);
|
||||
DecodeZeroExtendMask(16, 64, getRegOperandNumElts(MI, 64, 0), false,
|
||||
ShuffleMask);
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
|
||||
|
@ -1242,7 +1247,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
|
|||
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
|
||||
LLVM_FALLTHROUGH;
|
||||
CASE_PMOVZX(PMOVZXDQ, m)
|
||||
DecodeZeroExtendMask(32, 64, getRegOperandNumElts(MI, 64, 0), ShuffleMask);
|
||||
DecodeZeroExtendMask(32, 64, getRegOperandNumElts(MI, 64, 0), false,
|
||||
ShuffleMask);
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -383,7 +383,8 @@ void DecodeVPERMMask(unsigned NumElts, unsigned Imm,
|
|||
}
|
||||
|
||||
void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits,
|
||||
unsigned NumDstElts, SmallVectorImpl<int> &Mask) {
|
||||
unsigned NumDstElts, bool IsAnyExtend,
|
||||
SmallVectorImpl<int> &Mask) {
|
||||
unsigned Scale = DstScalarBits / SrcScalarBits;
|
||||
assert(SrcScalarBits < DstScalarBits &&
|
||||
"Expected zero extension mask to increase scalar size");
|
||||
|
@ -391,7 +392,7 @@ void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits,
|
|||
for (unsigned i = 0; i != NumDstElts; i++) {
|
||||
Mask.push_back(i);
|
||||
for (unsigned j = 1; j != Scale; j++)
|
||||
Mask.push_back(SM_SentinelZero);
|
||||
Mask.push_back(IsAnyExtend ? SM_SentinelUndef : SM_SentinelZero);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -136,7 +136,7 @@ void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
|
|||
|
||||
/// Decode a zero extension instruction as a shuffle mask.
|
||||
void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits,
|
||||
unsigned NumDstElts,
|
||||
unsigned NumDstElts, bool IsAnyExtend,
|
||||
SmallVectorImpl<int> &ShuffleMask);
|
||||
|
||||
/// Decode a move lower and zero upper instruction as a shuffle mask.
|
||||
|
|
|
@ -6849,17 +6849,20 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
|
|||
return true;
|
||||
}
|
||||
case ISD::ZERO_EXTEND:
|
||||
case ISD::ZERO_EXTEND_VECTOR_INREG: {
|
||||
case ISD::ZERO_EXTEND_VECTOR_INREG:
|
||||
case ISD::ANY_EXTEND_VECTOR_INREG: {
|
||||
SDValue Src = N.getOperand(0);
|
||||
EVT SrcVT = Src.getValueType();
|
||||
|
||||
// Zero-extended source must be a simple vector.
|
||||
// Extended source must be a simple vector.
|
||||
if (!SrcVT.isSimple() || (SrcVT.getSizeInBits() % 128) != 0 ||
|
||||
(SrcVT.getScalarSizeInBits() % 8) != 0)
|
||||
return false;
|
||||
|
||||
unsigned NumSrcBitsPerElt = SrcVT.getScalarSizeInBits();
|
||||
DecodeZeroExtendMask(NumSrcBitsPerElt, NumBitsPerElt, NumElts, Mask);
|
||||
bool IsAnyExtend = (ISD::ANY_EXTEND_VECTOR_INREG == Opcode);
|
||||
DecodeZeroExtendMask(NumSrcBitsPerElt, NumBitsPerElt, NumElts, IsAnyExtend,
|
||||
Mask);
|
||||
|
||||
if (NumSizeInBits != SrcVT.getSizeInBits()) {
|
||||
assert((NumSizeInBits % SrcVT.getSizeInBits()) == 0 &&
|
||||
|
@ -43259,7 +43262,7 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG,
|
|||
|
||||
// Attempt to combine as a shuffle.
|
||||
// TODO: SSE41 support
|
||||
if (Subtarget.hasAVX() && N->getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
|
||||
if (Subtarget.hasAVX()) {
|
||||
SDValue Op(N, 0);
|
||||
if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getValueType()))
|
||||
if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
|
||||
|
|
|
@ -479,11 +479,10 @@ define void @mul_2xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
|
|||
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-AVX-NEXT: movl c, %esi
|
||||
; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; X86-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
|
||||
; X86-AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
|
||||
; X86-AVX-NEXT: vpmuludq %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
|
||||
; X86-AVX-NEXT: popl %esi
|
||||
; X86-AVX-NEXT: retl
|
||||
|
@ -503,11 +502,10 @@ define void @mul_2xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
|
|||
; X64-AVX-LABEL: mul_2xi16:
|
||||
; X64-AVX: # %bb.0: # %entry
|
||||
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
|
||||
; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; X64-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; X64-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
|
||||
; X64-AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
|
||||
; X64-AVX-NEXT: vpmuludq %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rdx,4)
|
||||
; X64-AVX-NEXT: retq
|
||||
entry:
|
||||
|
@ -1167,10 +1165,9 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon
|
|||
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-AVX-NEXT: movl c, %esi
|
||||
; X86-AVX-NEXT: vpmovsxwq (%edx,%ecx), %xmm0
|
||||
; X86-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; X86-AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
|
||||
; X86-AVX-NEXT: vpmuludq %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
|
||||
; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
|
||||
; X86-AVX-NEXT: popl %esi
|
||||
; X86-AVX-NEXT: retl
|
||||
|
@ -1195,10 +1192,9 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon
|
|||
; X64-AVX: # %bb.0: # %entry
|
||||
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
|
||||
; X64-AVX-NEXT: vpmovsxwq (%rdi,%rdx), %xmm0
|
||||
; X64-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; X64-AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
|
||||
; X64-AVX-NEXT: vpmuludq %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; X64-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
|
||||
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rdx,4)
|
||||
; X64-AVX-NEXT: retq
|
||||
entry:
|
||||
|
@ -1813,9 +1809,9 @@ define void @mul_2xi16_varconst1(i8* nocapture readonly %a, i64 %index) {
|
|||
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-AVX-NEXT: movl c, %edx
|
||||
; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
|
||||
; X86-AVX-NEXT: vpmuludq {{\.LCPI.*}}, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
|
||||
; X86-AVX-NEXT: retl
|
||||
;
|
||||
|
@ -1834,9 +1830,9 @@ define void @mul_2xi16_varconst1(i8* nocapture readonly %a, i64 %index) {
|
|||
; X64-AVX-LABEL: mul_2xi16_varconst1:
|
||||
; X64-AVX: # %bb.0: # %entry
|
||||
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
|
||||
; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
|
||||
; X64-AVX-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
|
||||
; X64-AVX-NEXT: retq
|
||||
entry:
|
||||
|
@ -1941,9 +1937,9 @@ define void @mul_2xi16_varconst3(i8* nocapture readonly %a, i64 %index) {
|
|||
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-AVX-NEXT: movl c, %edx
|
||||
; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
|
||||
; X86-AVX-NEXT: vpmuludq {{\.LCPI.*}}, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
|
||||
; X86-AVX-NEXT: retl
|
||||
;
|
||||
|
@ -1962,9 +1958,9 @@ define void @mul_2xi16_varconst3(i8* nocapture readonly %a, i64 %index) {
|
|||
; X64-AVX-LABEL: mul_2xi16_varconst3:
|
||||
; X64-AVX: # %bb.0: # %entry
|
||||
; X64-AVX-NEXT: movq {{.*}}(%rip), %rax
|
||||
; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; X64-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
|
||||
; X64-AVX-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4)
|
||||
; X64-AVX-NEXT: retq
|
||||
entry:
|
||||
|
|
Loading…
Reference in New Issue