diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e922cb356dfe..7954290f0625 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -50230,16 +50230,20 @@ static SDValue combineX86GatherScatter(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { auto *MemOp = cast(N); + SDValue BasePtr = MemOp->getBasePtr(); SDValue Index = MemOp->getIndex(); SDValue Scale = MemOp->getScale(); SDValue Mask = MemOp->getMask(); // Attempt to fold an index scale into the scale value directly. + // For smaller indices, implicit sext is performed BEFORE scale, preventing + // this fold under most circumstances. // TODO: Move this into X86DAGToDAGISel::matchVectorAddressRecursively? if ((Index.getOpcode() == X86ISD::VSHLI || (Index.getOpcode() == ISD::ADD && Index.getOperand(0) == Index.getOperand(1))) && - isa(Scale)) { + isa(Scale) && + BasePtr.getScalarValueSizeInBits() == Index.getScalarValueSizeInBits()) { unsigned ShiftAmt = Index.getOpcode() == ISD::ADD ? 1 : Index.getConstantOperandVal(1); uint64_t ScaleAmt = cast(Scale)->getZExtValue(); diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index fbe02af64e3d..387513db012a 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -4992,9 +4992,9 @@ define void @splat_ptr_scatter(i32* %ptr, <4 x i1> %mask, <4 x i32> %val) { define <8 x float> @scaleidx_x86gather(float* %base, <8 x i32> %index, <8 x i32> %imask) nounwind { ; KNL_64-LABEL: scaleidx_x86gather: ; KNL_64: # %bb.0: -; KNL_64-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; KNL_64-NEXT: vgatherdps %ymm1, (%rdi,%ymm0,4), %ymm2 -; KNL_64-NEXT: vmovaps %ymm2, %ymm0 +; KNL_64-NEXT: vpslld $2, %ymm0, %ymm2 +; KNL_64-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; KNL_64-NEXT: vgatherdps %ymm1, (%rdi,%ymm2), %ymm0 ; KNL_64-NEXT: retq ; ; KNL_32-LABEL: scaleidx_x86gather: @@ -5007,9 +5007,9 @@ define <8 x float> @scaleidx_x86gather(float* %base, <8 x i32> %index, <8 x i32> ; ; SKX-LABEL: scaleidx_x86gather: ; SKX: # %bb.0: -; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; SKX-NEXT: vgatherdps %ymm1, (%rdi,%ymm0,4), %ymm2 -; SKX-NEXT: vmovaps %ymm2, %ymm0 +; SKX-NEXT: vpslld $2, %ymm0, %ymm2 +; SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; SKX-NEXT: vgatherdps %ymm1, (%rdi,%ymm2), %ymm0 ; SKX-NEXT: retq ; ; SKX_32-LABEL: scaleidx_x86gather: @@ -5068,7 +5068,8 @@ define void @scaleidx_x86scatter(<16 x float> %value, float* %base, <16 x i32> % ; KNL_64-LABEL: scaleidx_x86scatter: ; KNL_64: # %bb.0: ; KNL_64-NEXT: kmovw %esi, %k1 -; KNL_64-NEXT: vscatterdps %zmm0, (%rdi,%zmm1,4) {%k1} +; KNL_64-NEXT: vpaddd %zmm1, %zmm1, %zmm1 +; KNL_64-NEXT: vscatterdps %zmm0, (%rdi,%zmm1,2) {%k1} ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq ; @@ -5083,7 +5084,8 @@ define void @scaleidx_x86scatter(<16 x float> %value, float* %base, <16 x i32> % ; SKX-LABEL: scaleidx_x86scatter: ; SKX: # %bb.0: ; SKX-NEXT: kmovw %esi, %k1 -; SKX-NEXT: vscatterdps %zmm0, (%rdi,%zmm1,4) {%k1} +; SKX-NEXT: vpaddd %zmm1, %zmm1, %zmm1 +; SKX-NEXT: vscatterdps %zmm0, (%rdi,%zmm1,2) {%k1} ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; @@ -5129,8 +5131,9 @@ define void @scaleidx_scatter(<8 x float> %value, float* %base, <8 x i32> %index ; ; SKX-LABEL: scaleidx_scatter: ; SKX: # %bb.0: +; SKX-NEXT: vpaddd %ymm1, %ymm1, %ymm1 ; SKX-NEXT: kmovw %esi, %k1 -; SKX-NEXT: vscatterdps %ymm0, (%rdi,%ymm1,8) {%k1} +; SKX-NEXT: vscatterdps %ymm0, (%rdi,%ymm1,4) {%k1} ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ;