forked from OSchip/llvm-project
[X86] combineX86GatherScatter - only fold scale if the index isn't extended
As mentioned on D108539, when the gather indices are smaller than the pointer size, they are sign-extended BEFORE scale is applied, making the general fold unsafe. If the index have sufficient sign-bits then folding the scale could be safe - I'll investigate this.
This commit is contained in:
parent
96808c69a1
commit
154c036ebb
|
@ -50230,16 +50230,20 @@ static SDValue combineX86GatherScatter(SDNode *N, SelectionDAG &DAG,
|
||||||
TargetLowering::DAGCombinerInfo &DCI,
|
TargetLowering::DAGCombinerInfo &DCI,
|
||||||
const X86Subtarget &Subtarget) {
|
const X86Subtarget &Subtarget) {
|
||||||
auto *MemOp = cast<X86MaskedGatherScatterSDNode>(N);
|
auto *MemOp = cast<X86MaskedGatherScatterSDNode>(N);
|
||||||
|
SDValue BasePtr = MemOp->getBasePtr();
|
||||||
SDValue Index = MemOp->getIndex();
|
SDValue Index = MemOp->getIndex();
|
||||||
SDValue Scale = MemOp->getScale();
|
SDValue Scale = MemOp->getScale();
|
||||||
SDValue Mask = MemOp->getMask();
|
SDValue Mask = MemOp->getMask();
|
||||||
|
|
||||||
// Attempt to fold an index scale into the scale value directly.
|
// Attempt to fold an index scale into the scale value directly.
|
||||||
|
// For smaller indices, implicit sext is performed BEFORE scale, preventing
|
||||||
|
// this fold under most circumstances.
|
||||||
// TODO: Move this into X86DAGToDAGISel::matchVectorAddressRecursively?
|
// TODO: Move this into X86DAGToDAGISel::matchVectorAddressRecursively?
|
||||||
if ((Index.getOpcode() == X86ISD::VSHLI ||
|
if ((Index.getOpcode() == X86ISD::VSHLI ||
|
||||||
(Index.getOpcode() == ISD::ADD &&
|
(Index.getOpcode() == ISD::ADD &&
|
||||||
Index.getOperand(0) == Index.getOperand(1))) &&
|
Index.getOperand(0) == Index.getOperand(1))) &&
|
||||||
isa<ConstantSDNode>(Scale)) {
|
isa<ConstantSDNode>(Scale) &&
|
||||||
|
BasePtr.getScalarValueSizeInBits() == Index.getScalarValueSizeInBits()) {
|
||||||
unsigned ShiftAmt =
|
unsigned ShiftAmt =
|
||||||
Index.getOpcode() == ISD::ADD ? 1 : Index.getConstantOperandVal(1);
|
Index.getOpcode() == ISD::ADD ? 1 : Index.getConstantOperandVal(1);
|
||||||
uint64_t ScaleAmt = cast<ConstantSDNode>(Scale)->getZExtValue();
|
uint64_t ScaleAmt = cast<ConstantSDNode>(Scale)->getZExtValue();
|
||||||
|
|
|
@ -4992,9 +4992,9 @@ define void @splat_ptr_scatter(i32* %ptr, <4 x i1> %mask, <4 x i32> %val) {
|
||||||
define <8 x float> @scaleidx_x86gather(float* %base, <8 x i32> %index, <8 x i32> %imask) nounwind {
|
define <8 x float> @scaleidx_x86gather(float* %base, <8 x i32> %index, <8 x i32> %imask) nounwind {
|
||||||
; KNL_64-LABEL: scaleidx_x86gather:
|
; KNL_64-LABEL: scaleidx_x86gather:
|
||||||
; KNL_64: # %bb.0:
|
; KNL_64: # %bb.0:
|
||||||
; KNL_64-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
; KNL_64-NEXT: vpslld $2, %ymm0, %ymm2
|
||||||
; KNL_64-NEXT: vgatherdps %ymm1, (%rdi,%ymm0,4), %ymm2
|
; KNL_64-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||||
; KNL_64-NEXT: vmovaps %ymm2, %ymm0
|
; KNL_64-NEXT: vgatherdps %ymm1, (%rdi,%ymm2), %ymm0
|
||||||
; KNL_64-NEXT: retq
|
; KNL_64-NEXT: retq
|
||||||
;
|
;
|
||||||
; KNL_32-LABEL: scaleidx_x86gather:
|
; KNL_32-LABEL: scaleidx_x86gather:
|
||||||
|
@ -5007,9 +5007,9 @@ define <8 x float> @scaleidx_x86gather(float* %base, <8 x i32> %index, <8 x i32>
|
||||||
;
|
;
|
||||||
; SKX-LABEL: scaleidx_x86gather:
|
; SKX-LABEL: scaleidx_x86gather:
|
||||||
; SKX: # %bb.0:
|
; SKX: # %bb.0:
|
||||||
; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
; SKX-NEXT: vpslld $2, %ymm0, %ymm2
|
||||||
; SKX-NEXT: vgatherdps %ymm1, (%rdi,%ymm0,4), %ymm2
|
; SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||||
; SKX-NEXT: vmovaps %ymm2, %ymm0
|
; SKX-NEXT: vgatherdps %ymm1, (%rdi,%ymm2), %ymm0
|
||||||
; SKX-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
;
|
;
|
||||||
; SKX_32-LABEL: scaleidx_x86gather:
|
; SKX_32-LABEL: scaleidx_x86gather:
|
||||||
|
@ -5068,7 +5068,8 @@ define void @scaleidx_x86scatter(<16 x float> %value, float* %base, <16 x i32> %
|
||||||
; KNL_64-LABEL: scaleidx_x86scatter:
|
; KNL_64-LABEL: scaleidx_x86scatter:
|
||||||
; KNL_64: # %bb.0:
|
; KNL_64: # %bb.0:
|
||||||
; KNL_64-NEXT: kmovw %esi, %k1
|
; KNL_64-NEXT: kmovw %esi, %k1
|
||||||
; KNL_64-NEXT: vscatterdps %zmm0, (%rdi,%zmm1,4) {%k1}
|
; KNL_64-NEXT: vpaddd %zmm1, %zmm1, %zmm1
|
||||||
|
; KNL_64-NEXT: vscatterdps %zmm0, (%rdi,%zmm1,2) {%k1}
|
||||||
; KNL_64-NEXT: vzeroupper
|
; KNL_64-NEXT: vzeroupper
|
||||||
; KNL_64-NEXT: retq
|
; KNL_64-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -5083,7 +5084,8 @@ define void @scaleidx_x86scatter(<16 x float> %value, float* %base, <16 x i32> %
|
||||||
; SKX-LABEL: scaleidx_x86scatter:
|
; SKX-LABEL: scaleidx_x86scatter:
|
||||||
; SKX: # %bb.0:
|
; SKX: # %bb.0:
|
||||||
; SKX-NEXT: kmovw %esi, %k1
|
; SKX-NEXT: kmovw %esi, %k1
|
||||||
; SKX-NEXT: vscatterdps %zmm0, (%rdi,%zmm1,4) {%k1}
|
; SKX-NEXT: vpaddd %zmm1, %zmm1, %zmm1
|
||||||
|
; SKX-NEXT: vscatterdps %zmm0, (%rdi,%zmm1,2) {%k1}
|
||||||
; SKX-NEXT: vzeroupper
|
; SKX-NEXT: vzeroupper
|
||||||
; SKX-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -5129,8 +5131,9 @@ define void @scaleidx_scatter(<8 x float> %value, float* %base, <8 x i32> %index
|
||||||
;
|
;
|
||||||
; SKX-LABEL: scaleidx_scatter:
|
; SKX-LABEL: scaleidx_scatter:
|
||||||
; SKX: # %bb.0:
|
; SKX: # %bb.0:
|
||||||
|
; SKX-NEXT: vpaddd %ymm1, %ymm1, %ymm1
|
||||||
; SKX-NEXT: kmovw %esi, %k1
|
; SKX-NEXT: kmovw %esi, %k1
|
||||||
; SKX-NEXT: vscatterdps %ymm0, (%rdi,%ymm1,8) {%k1}
|
; SKX-NEXT: vscatterdps %ymm0, (%rdi,%ymm1,4) {%k1}
|
||||||
; SKX-NEXT: vzeroupper
|
; SKX-NEXT: vzeroupper
|
||||||
; SKX-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
;
|
;
|
||||||
|
|
Loading…
Reference in New Issue