forked from OSchip/llvm-project
[X86] Move splat addends from the gather/scatter index operand to the base address
This can avoid a vector add and a constant pool load. Or an explicit broadcast in case of non-constant. Also reverse the transform any time we encounter a constant index addend that can't be moved to base. In that case pull the constant from base into the index. This reduces code size needed for the displacement since we needed the index add anyway. Limit this to scale of 1 to avoid divisibility and wrap issues. Authored by Craig. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D111595
This commit is contained in:
parent
b288d08fbb
commit
79f9dfef0d
|
@ -216,6 +216,8 @@ namespace {
|
||||||
bool matchAdd(SDValue &N, X86ISelAddressMode &AM, unsigned Depth);
|
bool matchAdd(SDValue &N, X86ISelAddressMode &AM, unsigned Depth);
|
||||||
bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
|
bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
|
||||||
unsigned Depth);
|
unsigned Depth);
|
||||||
|
bool matchVectorAddressRecursively(SDValue N, X86ISelAddressMode &AM,
|
||||||
|
unsigned Depth);
|
||||||
bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
|
bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
|
||||||
bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
|
bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
|
||||||
SDValue &Scale, SDValue &Index, SDValue &Disp,
|
SDValue &Scale, SDValue &Index, SDValue &Disp,
|
||||||
|
@ -2468,10 +2470,18 @@ bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Helper for selectVectorAddr. Handles things that can be folded into a
|
bool X86DAGToDAGISel::matchVectorAddressRecursively(SDValue N,
|
||||||
/// gather scatter address. The index register and scale should have already
|
X86ISelAddressMode &AM,
|
||||||
/// been handled.
|
unsigned Depth) {
|
||||||
bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
|
SDLoc dl(N);
|
||||||
|
LLVM_DEBUG({
|
||||||
|
dbgs() << "MatchVectorAddress: ";
|
||||||
|
AM.dump(CurDAG);
|
||||||
|
});
|
||||||
|
// Limit recursion.
|
||||||
|
if (Depth > 5)
|
||||||
|
return matchAddressBase(N, AM);
|
||||||
|
|
||||||
// TODO: Support other operations.
|
// TODO: Support other operations.
|
||||||
switch (N.getOpcode()) {
|
switch (N.getOpcode()) {
|
||||||
case ISD::Constant: {
|
case ISD::Constant: {
|
||||||
|
@ -2484,11 +2494,40 @@ bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
|
||||||
if (!matchWrapper(N, AM))
|
if (!matchWrapper(N, AM))
|
||||||
return false;
|
return false;
|
||||||
break;
|
break;
|
||||||
|
case ISD::ADD: {
|
||||||
|
// Add an artificial use to this node so that we can keep track of
|
||||||
|
// it if it gets CSE'd with a different node.
|
||||||
|
HandleSDNode Handle(N);
|
||||||
|
|
||||||
|
X86ISelAddressMode Backup = AM;
|
||||||
|
if (!matchVectorAddressRecursively(N.getOperand(0), AM, Depth + 1) &&
|
||||||
|
!matchVectorAddressRecursively(Handle.getValue().getOperand(1), AM,
|
||||||
|
Depth + 1))
|
||||||
|
return false;
|
||||||
|
AM = Backup;
|
||||||
|
|
||||||
|
// Try again after commuting the operands.
|
||||||
|
if (!matchVectorAddressRecursively(Handle.getValue().getOperand(1), AM,
|
||||||
|
Depth + 1) &&
|
||||||
|
!matchVectorAddressRecursively(Handle.getValue().getOperand(0), AM,
|
||||||
|
Depth + 1))
|
||||||
|
return false;
|
||||||
|
AM = Backup;
|
||||||
|
|
||||||
|
N = Handle.getValue();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return matchAddressBase(N, AM);
|
return matchAddressBase(N, AM);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Helper for selectVectorAddr. Handles things that can be folded into a
|
||||||
|
/// gather/scatter address. The index register and scale should have already
|
||||||
|
/// been handled.
|
||||||
|
bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
|
||||||
|
return matchVectorAddressRecursively(N, AM, 0);
|
||||||
|
}
|
||||||
|
|
||||||
bool X86DAGToDAGISel::selectVectorAddr(MemSDNode *Parent, SDValue BasePtr,
|
bool X86DAGToDAGISel::selectVectorAddr(MemSDNode *Parent, SDValue BasePtr,
|
||||||
SDValue IndexOp, SDValue ScaleOp,
|
SDValue IndexOp, SDValue ScaleOp,
|
||||||
SDValue &Base, SDValue &Scale,
|
SDValue &Base, SDValue &Scale,
|
||||||
|
|
|
@ -50283,6 +50283,48 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||||
|
EVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
|
||||||
|
// Try to move splat constant adders from the index operand to the base
|
||||||
|
// pointer operand. Taking care to multiply by the scale. We can only do
|
||||||
|
// this when index element type is the same as the pointer type.
|
||||||
|
// Otherwise we need to be sure the math doesn't wrap before the scale.
|
||||||
|
if (Index.getOpcode() == ISD::ADD &&
|
||||||
|
Index.getValueType().getVectorElementType() == PtrVT &&
|
||||||
|
isa<ConstantSDNode>(Scale)) {
|
||||||
|
uint64_t ScaleAmt = cast<ConstantSDNode>(Scale)->getZExtValue();
|
||||||
|
if (auto *BV = dyn_cast<BuildVectorSDNode>(Index.getOperand(1))) {
|
||||||
|
BitVector UndefElts;
|
||||||
|
if (ConstantSDNode *C = BV->getConstantSplatNode(&UndefElts)) {
|
||||||
|
// FIXME: Allow non-constant?
|
||||||
|
if (UndefElts.none()) {
|
||||||
|
// Apply the scale.
|
||||||
|
APInt Adder = C->getAPIntValue() * ScaleAmt;
|
||||||
|
// Add it to the existing base.
|
||||||
|
Base = DAG.getNode(ISD::ADD, DL, PtrVT, Base,
|
||||||
|
DAG.getConstant(Adder, DL, PtrVT));
|
||||||
|
Index = Index.getOperand(0);
|
||||||
|
return rebuildGatherScatter(GorS, Index, Base, Scale, DAG);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// It's also possible base is just a constant. In that case, just
|
||||||
|
// replace it with 0 and move the displacement into the index.
|
||||||
|
if (BV->isConstant() && isa<ConstantSDNode>(Base) &&
|
||||||
|
isOneConstant(Scale)) {
|
||||||
|
SDValue Splat = DAG.getSplatBuildVector(Index.getValueType(), DL, Base);
|
||||||
|
// Combine the constant build_vector and the constant base.
|
||||||
|
Splat = DAG.getNode(ISD::ADD, DL, Index.getValueType(),
|
||||||
|
Index.getOperand(1), Splat);
|
||||||
|
// Add to the LHS of the original Index add.
|
||||||
|
Index = DAG.getNode(ISD::ADD, DL, Index.getValueType(),
|
||||||
|
Index.getOperand(0), Splat);
|
||||||
|
Base = DAG.getConstant(0, DL, Base.getValueType());
|
||||||
|
return rebuildGatherScatter(GorS, Index, Base, Scale, DAG);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (DCI.isBeforeLegalizeOps()) {
|
if (DCI.isBeforeLegalizeOps()) {
|
||||||
unsigned IndexWidth = Index.getScalarValueSizeInBits();
|
unsigned IndexWidth = Index.getScalarValueSizeInBits();
|
||||||
|
|
||||||
|
|
|
@ -476,10 +476,9 @@ define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) {
|
||||||
; KNL_64-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero
|
; KNL_64-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero
|
||||||
; KNL_64-NEXT: vpmuldq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm1
|
; KNL_64-NEXT: vpmuldq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm1
|
||||||
; KNL_64-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
; KNL_64-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
||||||
; KNL_64-NEXT: vpaddq %zmm0, %zmm4, %zmm0
|
; KNL_64-NEXT: vpaddq %zmm0, %zmm4, %zmm1
|
||||||
; KNL_64-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm1
|
|
||||||
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
|
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
|
||||||
; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
|
; KNL_64-NEXT: vpgatherqd 72(,%zmm1), %ymm0 {%k1}
|
||||||
; KNL_64-NEXT: retq
|
; KNL_64-NEXT: retq
|
||||||
;
|
;
|
||||||
; KNL_32-LABEL: test9:
|
; KNL_32-LABEL: test9:
|
||||||
|
@ -491,12 +490,10 @@ define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) {
|
||||||
; KNL_32-NEXT: vpbroadcastd {{.*#+}} ymm3 = [820,820,820,820,820,820,820,820]
|
; KNL_32-NEXT: vpbroadcastd {{.*#+}} ymm3 = [820,820,820,820,820,820,820,820]
|
||||||
; KNL_32-NEXT: vpmulld %ymm3, %ymm0, %ymm0
|
; KNL_32-NEXT: vpmulld %ymm3, %ymm0, %ymm0
|
||||||
; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
||||||
; KNL_32-NEXT: vpbroadcastd {{.*#+}} ymm1 = [68,68,68,68,68,68,68,68]
|
|
||||||
; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
|
||||||
; KNL_32-NEXT: vpaddd %ymm0, %ymm2, %ymm1
|
; KNL_32-NEXT: vpaddd %ymm0, %ymm2, %ymm1
|
||||||
; KNL_32-NEXT: movw $255, %ax
|
; KNL_32-NEXT: movw $255, %ax
|
||||||
; KNL_32-NEXT: kmovw %eax, %k1
|
; KNL_32-NEXT: kmovw %eax, %k1
|
||||||
; KNL_32-NEXT: vpgatherdd (,%zmm1), %zmm0 {%k1}
|
; KNL_32-NEXT: vpgatherdd 68(,%zmm1), %zmm0 {%k1}
|
||||||
; KNL_32-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
; KNL_32-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||||
; KNL_32-NEXT: retl
|
; KNL_32-NEXT: retl
|
||||||
;
|
;
|
||||||
|
@ -507,10 +504,9 @@ define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) {
|
||||||
; SKX_SMALL-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero
|
; SKX_SMALL-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero
|
||||||
; SKX_SMALL-NEXT: vpmuldq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm1
|
; SKX_SMALL-NEXT: vpmuldq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm1
|
||||||
; SKX_SMALL-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
; SKX_SMALL-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
||||||
; SKX_SMALL-NEXT: vpaddq %zmm0, %zmm2, %zmm0
|
; SKX_SMALL-NEXT: vpaddq %zmm0, %zmm2, %zmm1
|
||||||
; SKX_SMALL-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm1
|
|
||||||
; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1
|
; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1
|
||||||
; SKX_SMALL-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
|
; SKX_SMALL-NEXT: vpgatherqd 72(,%zmm1), %ymm0 {%k1}
|
||||||
; SKX_SMALL-NEXT: retq
|
; SKX_SMALL-NEXT: retq
|
||||||
;
|
;
|
||||||
; SKX_LARGE-LABEL: test9:
|
; SKX_LARGE-LABEL: test9:
|
||||||
|
@ -522,11 +518,9 @@ define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) {
|
||||||
; SKX_LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
|
; SKX_LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
|
||||||
; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm0, %zmm0
|
; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm0, %zmm0
|
||||||
; SKX_LARGE-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
; SKX_LARGE-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
||||||
; SKX_LARGE-NEXT: vpaddq %zmm0, %zmm2, %zmm0
|
; SKX_LARGE-NEXT: vpaddq %zmm0, %zmm2, %zmm1
|
||||||
; SKX_LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
|
|
||||||
; SKX_LARGE-NEXT: vpaddq (%rax){1to8}, %zmm0, %zmm1
|
|
||||||
; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1
|
; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1
|
||||||
; SKX_LARGE-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
|
; SKX_LARGE-NEXT: vpgatherqd 72(,%zmm1), %ymm0 {%k1}
|
||||||
; SKX_LARGE-NEXT: retq
|
; SKX_LARGE-NEXT: retq
|
||||||
;
|
;
|
||||||
; SKX_32-LABEL: test9:
|
; SKX_32-LABEL: test9:
|
||||||
|
@ -535,10 +529,9 @@ define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) {
|
||||||
; SKX_32-NEXT: vpmovqd %zmm0, %ymm0
|
; SKX_32-NEXT: vpmovqd %zmm0, %ymm0
|
||||||
; SKX_32-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0
|
; SKX_32-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0
|
||||||
; SKX_32-NEXT: vpaddd {{[0-9]+}}(%esp){1to8}, %ymm0, %ymm0
|
; SKX_32-NEXT: vpaddd {{[0-9]+}}(%esp){1to8}, %ymm0, %ymm0
|
||||||
; SKX_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
; SKX_32-NEXT: vpaddd %ymm1, %ymm0, %ymm1
|
||||||
; SKX_32-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm1
|
|
||||||
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
|
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
|
||||||
; SKX_32-NEXT: vpgatherdd (,%ymm1), %ymm0 {%k1}
|
; SKX_32-NEXT: vpgatherdd 68(,%ymm1), %ymm0 {%k1}
|
||||||
; SKX_32-NEXT: retl
|
; SKX_32-NEXT: retl
|
||||||
entry:
|
entry:
|
||||||
%broadcast.splatinsert = insertelement <8 x %struct.ST*> undef, %struct.ST* %base, i32 0
|
%broadcast.splatinsert = insertelement <8 x %struct.ST*> undef, %struct.ST* %base, i32 0
|
||||||
|
@ -562,10 +555,9 @@ define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) {
|
||||||
; KNL_64-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero
|
; KNL_64-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero
|
||||||
; KNL_64-NEXT: vpmuldq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm1
|
; KNL_64-NEXT: vpmuldq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm1
|
||||||
; KNL_64-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
; KNL_64-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
||||||
; KNL_64-NEXT: vpaddq %zmm0, %zmm4, %zmm0
|
; KNL_64-NEXT: vpaddq %zmm0, %zmm4, %zmm1
|
||||||
; KNL_64-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm1
|
|
||||||
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
|
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
|
||||||
; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
|
; KNL_64-NEXT: vpgatherqd 72(,%zmm1), %ymm0 {%k1}
|
||||||
; KNL_64-NEXT: retq
|
; KNL_64-NEXT: retq
|
||||||
;
|
;
|
||||||
; KNL_32-LABEL: test10:
|
; KNL_32-LABEL: test10:
|
||||||
|
@ -577,12 +569,10 @@ define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) {
|
||||||
; KNL_32-NEXT: vpbroadcastd {{.*#+}} ymm3 = [820,820,820,820,820,820,820,820]
|
; KNL_32-NEXT: vpbroadcastd {{.*#+}} ymm3 = [820,820,820,820,820,820,820,820]
|
||||||
; KNL_32-NEXT: vpmulld %ymm3, %ymm0, %ymm0
|
; KNL_32-NEXT: vpmulld %ymm3, %ymm0, %ymm0
|
||||||
; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
||||||
; KNL_32-NEXT: vpbroadcastd {{.*#+}} ymm1 = [68,68,68,68,68,68,68,68]
|
|
||||||
; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
|
||||||
; KNL_32-NEXT: vpaddd %ymm0, %ymm2, %ymm1
|
; KNL_32-NEXT: vpaddd %ymm0, %ymm2, %ymm1
|
||||||
; KNL_32-NEXT: movw $255, %ax
|
; KNL_32-NEXT: movw $255, %ax
|
||||||
; KNL_32-NEXT: kmovw %eax, %k1
|
; KNL_32-NEXT: kmovw %eax, %k1
|
||||||
; KNL_32-NEXT: vpgatherdd (,%zmm1), %zmm0 {%k1}
|
; KNL_32-NEXT: vpgatherdd 68(,%zmm1), %zmm0 {%k1}
|
||||||
; KNL_32-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
; KNL_32-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||||
; KNL_32-NEXT: retl
|
; KNL_32-NEXT: retl
|
||||||
;
|
;
|
||||||
|
@ -593,10 +583,9 @@ define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) {
|
||||||
; SKX_SMALL-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero
|
; SKX_SMALL-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero
|
||||||
; SKX_SMALL-NEXT: vpmuldq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm1
|
; SKX_SMALL-NEXT: vpmuldq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm1
|
||||||
; SKX_SMALL-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
; SKX_SMALL-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
||||||
; SKX_SMALL-NEXT: vpaddq %zmm0, %zmm2, %zmm0
|
; SKX_SMALL-NEXT: vpaddq %zmm0, %zmm2, %zmm1
|
||||||
; SKX_SMALL-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm1
|
|
||||||
; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1
|
; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1
|
||||||
; SKX_SMALL-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
|
; SKX_SMALL-NEXT: vpgatherqd 72(,%zmm1), %ymm0 {%k1}
|
||||||
; SKX_SMALL-NEXT: retq
|
; SKX_SMALL-NEXT: retq
|
||||||
;
|
;
|
||||||
; SKX_LARGE-LABEL: test10:
|
; SKX_LARGE-LABEL: test10:
|
||||||
|
@ -608,11 +597,9 @@ define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) {
|
||||||
; SKX_LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
|
; SKX_LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
|
||||||
; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm0, %zmm0
|
; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm0, %zmm0
|
||||||
; SKX_LARGE-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
; SKX_LARGE-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
||||||
; SKX_LARGE-NEXT: vpaddq %zmm0, %zmm2, %zmm0
|
; SKX_LARGE-NEXT: vpaddq %zmm0, %zmm2, %zmm1
|
||||||
; SKX_LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
|
|
||||||
; SKX_LARGE-NEXT: vpaddq (%rax){1to8}, %zmm0, %zmm1
|
|
||||||
; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1
|
; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1
|
||||||
; SKX_LARGE-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
|
; SKX_LARGE-NEXT: vpgatherqd 72(,%zmm1), %ymm0 {%k1}
|
||||||
; SKX_LARGE-NEXT: retq
|
; SKX_LARGE-NEXT: retq
|
||||||
;
|
;
|
||||||
; SKX_32-LABEL: test10:
|
; SKX_32-LABEL: test10:
|
||||||
|
@ -621,10 +608,9 @@ define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) {
|
||||||
; SKX_32-NEXT: vpmovqd %zmm0, %ymm0
|
; SKX_32-NEXT: vpmovqd %zmm0, %ymm0
|
||||||
; SKX_32-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0
|
; SKX_32-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0
|
||||||
; SKX_32-NEXT: vpaddd {{[0-9]+}}(%esp){1to8}, %ymm0, %ymm0
|
; SKX_32-NEXT: vpaddd {{[0-9]+}}(%esp){1to8}, %ymm0, %ymm0
|
||||||
; SKX_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
; SKX_32-NEXT: vpaddd %ymm1, %ymm0, %ymm1
|
||||||
; SKX_32-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm1
|
|
||||||
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
|
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
|
||||||
; SKX_32-NEXT: vpgatherdd (,%ymm1), %ymm0 {%k1}
|
; SKX_32-NEXT: vpgatherdd 68(,%ymm1), %ymm0 {%k1}
|
||||||
; SKX_32-NEXT: retl
|
; SKX_32-NEXT: retl
|
||||||
entry:
|
entry:
|
||||||
%broadcast.splatinsert = insertelement <8 x %struct.ST*> undef, %struct.ST* %base, i32 0
|
%broadcast.splatinsert = insertelement <8 x %struct.ST*> undef, %struct.ST* %base, i32 0
|
||||||
|
@ -5125,39 +5111,30 @@ declare void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float>, <8 x float*>, i32 i
|
||||||
define <8 x i64> @pr45906(<8 x %struct.foo*> %ptr) {
|
define <8 x i64> @pr45906(<8 x %struct.foo*> %ptr) {
|
||||||
; KNL_64-LABEL: pr45906:
|
; KNL_64-LABEL: pr45906:
|
||||||
; KNL_64: # %bb.0: # %bb
|
; KNL_64: # %bb.0: # %bb
|
||||||
; KNL_64-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm1
|
|
||||||
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
|
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
|
||||||
; KNL_64-NEXT: vpgatherqq (,%zmm1), %zmm0 {%k1}
|
; KNL_64-NEXT: vpgatherqq 8(,%zmm0), %zmm1 {%k1}
|
||||||
|
; KNL_64-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||||
; KNL_64-NEXT: retq
|
; KNL_64-NEXT: retq
|
||||||
;
|
;
|
||||||
; KNL_32-LABEL: pr45906:
|
; KNL_32-LABEL: pr45906:
|
||||||
; KNL_32: # %bb.0: # %bb
|
; KNL_32: # %bb.0: # %bb
|
||||||
; KNL_32-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4]
|
|
||||||
; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm1
|
|
||||||
; KNL_32-NEXT: kxnorw %k0, %k0, %k1
|
; KNL_32-NEXT: kxnorw %k0, %k0, %k1
|
||||||
; KNL_32-NEXT: vpgatherdq (,%ymm1), %zmm0 {%k1}
|
; KNL_32-NEXT: vpgatherdq 4(,%ymm0), %zmm1 {%k1}
|
||||||
|
; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||||
; KNL_32-NEXT: retl
|
; KNL_32-NEXT: retl
|
||||||
;
|
;
|
||||||
; SKX_SMALL-LABEL: pr45906:
|
; SKX-LABEL: pr45906:
|
||||||
; SKX_SMALL: # %bb.0: # %bb
|
; SKX: # %bb.0: # %bb
|
||||||
; SKX_SMALL-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm1
|
; SKX-NEXT: kxnorw %k0, %k0, %k1
|
||||||
; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1
|
; SKX-NEXT: vpgatherqq 8(,%zmm0), %zmm1 {%k1}
|
||||||
; SKX_SMALL-NEXT: vpgatherqq (,%zmm1), %zmm0 {%k1}
|
; SKX-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||||
; SKX_SMALL-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
;
|
|
||||||
; SKX_LARGE-LABEL: pr45906:
|
|
||||||
; SKX_LARGE: # %bb.0: # %bb
|
|
||||||
; SKX_LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
|
|
||||||
; SKX_LARGE-NEXT: vpaddq (%rax){1to8}, %zmm0, %zmm1
|
|
||||||
; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1
|
|
||||||
; SKX_LARGE-NEXT: vpgatherqq (,%zmm1), %zmm0 {%k1}
|
|
||||||
; SKX_LARGE-NEXT: retq
|
|
||||||
;
|
;
|
||||||
; SKX_32-LABEL: pr45906:
|
; SKX_32-LABEL: pr45906:
|
||||||
; SKX_32: # %bb.0: # %bb
|
; SKX_32: # %bb.0: # %bb
|
||||||
; SKX_32-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm1
|
|
||||||
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
|
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
|
||||||
; SKX_32-NEXT: vpgatherdq (,%ymm1), %zmm0 {%k1}
|
; SKX_32-NEXT: vpgatherdq 4(,%ymm0), %zmm1 {%k1}
|
||||||
|
; SKX_32-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||||
; SKX_32-NEXT: retl
|
; SKX_32-NEXT: retl
|
||||||
bb:
|
bb:
|
||||||
%tmp = getelementptr inbounds %struct.foo, <8 x %struct.foo*> %ptr, i64 0, i32 1
|
%tmp = getelementptr inbounds %struct.foo, <8 x %struct.foo*> %ptr, i64 0, i32 1
|
||||||
|
@ -5165,3 +5142,69 @@ bb:
|
||||||
ret <8 x i64> %tmp1
|
ret <8 x i64> %tmp1
|
||||||
}
|
}
|
||||||
declare <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*>, i32, <8 x i1>, <8 x i64>)
|
declare <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*>, i32, <8 x i1>, <8 x i64>)
|
||||||
|
|
||||||
|
%struct.ST2 = type { i32, i32 }
|
||||||
|
|
||||||
|
; Make sure we don't use a displacement on the gather. The constant from the
|
||||||
|
; struct offset should be folded into the constant pool load for the vector
|
||||||
|
; add.
|
||||||
|
define <8 x i32> @test_const_fold(%struct.ST2* %base, <8 x i64> %i1) {
|
||||||
|
; KNL_64-LABEL: test_const_fold:
|
||||||
|
; KNL_64: # %bb.0: # %entry
|
||||||
|
; KNL_64-NEXT: vpsllq $3, %zmm0, %zmm0
|
||||||
|
; KNL_64-NEXT: vpbroadcastq %rdi, %zmm1
|
||||||
|
; KNL_64-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
||||||
|
; KNL_64-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
|
||||||
|
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
|
||||||
|
; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
|
||||||
|
; KNL_64-NEXT: retq
|
||||||
|
;
|
||||||
|
; KNL_32-LABEL: test_const_fold:
|
||||||
|
; KNL_32: # %bb.0: # %entry
|
||||||
|
; KNL_32-NEXT: vpmovqd %zmm0, %ymm0
|
||||||
|
; KNL_32-NEXT: vpslld $3, %ymm0, %ymm0
|
||||||
|
; KNL_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %ymm1
|
||||||
|
; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
||||||
|
; KNL_32-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm1
|
||||||
|
; KNL_32-NEXT: movw $255, %ax
|
||||||
|
; KNL_32-NEXT: kmovw %eax, %k1
|
||||||
|
; KNL_32-NEXT: vpgatherdd (,%zmm1), %zmm0 {%k1}
|
||||||
|
; KNL_32-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
|
||||||
|
; KNL_32-NEXT: retl
|
||||||
|
;
|
||||||
|
; SKX_SMALL-LABEL: test_const_fold:
|
||||||
|
; SKX_SMALL: # %bb.0: # %entry
|
||||||
|
; SKX_SMALL-NEXT: vpsllq $3, %zmm0, %zmm0
|
||||||
|
; SKX_SMALL-NEXT: vpbroadcastq %rdi, %zmm1
|
||||||
|
; SKX_SMALL-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
||||||
|
; SKX_SMALL-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
|
||||||
|
; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1
|
||||||
|
; SKX_SMALL-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
|
||||||
|
; SKX_SMALL-NEXT: retq
|
||||||
|
;
|
||||||
|
; SKX_LARGE-LABEL: test_const_fold:
|
||||||
|
; SKX_LARGE: # %bb.0: # %entry
|
||||||
|
; SKX_LARGE-NEXT: vpsllq $3, %zmm0, %zmm0
|
||||||
|
; SKX_LARGE-NEXT: vpbroadcastq %rdi, %zmm1
|
||||||
|
; SKX_LARGE-NEXT: vpaddq %zmm1, %zmm0, %zmm0
|
||||||
|
; SKX_LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
|
||||||
|
; SKX_LARGE-NEXT: vpaddq (%rax), %zmm0, %zmm1
|
||||||
|
; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1
|
||||||
|
; SKX_LARGE-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
|
||||||
|
; SKX_LARGE-NEXT: retq
|
||||||
|
;
|
||||||
|
; SKX_32-LABEL: test_const_fold:
|
||||||
|
; SKX_32: # %bb.0: # %entry
|
||||||
|
; SKX_32-NEXT: vpmovqd %zmm0, %ymm0
|
||||||
|
; SKX_32-NEXT: vpslld $3, %ymm0, %ymm0
|
||||||
|
; SKX_32-NEXT: vpaddd {{[0-9]+}}(%esp){1to8}, %ymm0, %ymm0
|
||||||
|
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
|
||||||
|
; SKX_32-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm1
|
||||||
|
; SKX_32-NEXT: vpgatherdd (,%ymm1), %ymm0 {%k1}
|
||||||
|
; SKX_32-NEXT: retl
|
||||||
|
entry:
|
||||||
|
%add = add <8 x i64> %i1, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
|
||||||
|
%arrayidx = getelementptr %struct.ST2, %struct.ST2* %base, <8 x i64> %add, i32 1
|
||||||
|
%res = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
|
||||||
|
ret <8 x i32> %res
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue