forked from OSchip/llvm-project
[SelectionDAG][X86] Fix incorrect offset generated for VMASKMOV
When creating high MachineMemOperand for MSTORE/MLOAD we supply it with the original PointerInfo, while the pointer itself had been incremented. The patch adds the proper offset to the PointerInfo. llvm-svn: 325135
This commit is contained in:
parent
7f246e003a
commit
7e5d525bd5
|
@ -6835,12 +6835,12 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
|
|||
|
||||
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
|
||||
MST->isCompressingStore());
|
||||
unsigned HiOffset = LoMemVT.getStoreSize();
|
||||
|
||||
MMO = DAG.getMachineFunction().
|
||||
getMachineMemOperand(MST->getPointerInfo(),
|
||||
MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
|
||||
SecondHalfAlignment, MST->getAAInfo(),
|
||||
MST->getRanges());
|
||||
MMO = DAG.getMachineFunction().getMachineMemOperand(
|
||||
MST->getPointerInfo().getWithOffset(HiOffset),
|
||||
MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment,
|
||||
MST->getAAInfo(), MST->getRanges());
|
||||
|
||||
Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
|
||||
MST->isTruncatingStore(),
|
||||
|
@ -6985,11 +6985,12 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
|
|||
|
||||
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
|
||||
MLD->isExpandingLoad());
|
||||
unsigned HiOffset = LoMemVT.getStoreSize();
|
||||
|
||||
MMO = DAG.getMachineFunction().
|
||||
getMachineMemOperand(MLD->getPointerInfo(),
|
||||
MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
|
||||
SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
|
||||
MMO = DAG.getMachineFunction().getMachineMemOperand(
|
||||
MLD->getPointerInfo().getWithOffset(HiOffset),
|
||||
MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment,
|
||||
MLD->getAAInfo(), MLD->getRanges());
|
||||
|
||||
Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
|
||||
ISD::NON_EXTLOAD, MLD->isExpandingLoad());
|
||||
|
|
|
@ -1210,16 +1210,16 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
|
|||
|
||||
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
|
||||
MLD->isExpandingLoad());
|
||||
unsigned HiOffset = LoMemVT.getStoreSize();
|
||||
|
||||
MMO = DAG.getMachineFunction().
|
||||
getMachineMemOperand(MLD->getPointerInfo(),
|
||||
MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
|
||||
SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
|
||||
MMO = DAG.getMachineFunction().getMachineMemOperand(
|
||||
MLD->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOLoad,
|
||||
HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(),
|
||||
MLD->getRanges());
|
||||
|
||||
Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
|
||||
ExtType, MLD->isExpandingLoad());
|
||||
|
||||
|
||||
// Build a factor node to remember that this load is independent of the
|
||||
// other one.
|
||||
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
|
||||
|
@ -1928,10 +1928,12 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
|
|||
|
||||
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
|
||||
N->isCompressingStore());
|
||||
MMO = DAG.getMachineFunction().
|
||||
getMachineMemOperand(N->getPointerInfo(),
|
||||
MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
|
||||
SecondHalfAlignment, N->getAAInfo(), N->getRanges());
|
||||
unsigned HiOffset = LoMemVT.getStoreSize();
|
||||
|
||||
MMO = DAG.getMachineFunction().getMachineMemOperand(
|
||||
N->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOStore,
|
||||
HiMemVT.getStoreSize(), SecondHalfAlignment, N->getAAInfo(),
|
||||
N->getRanges());
|
||||
|
||||
Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
|
||||
N->isTruncatingStore(), N->isCompressingStore());
|
||||
|
|
|
@ -388,11 +388,11 @@ define void @test18(double* %base, <16 x double> %V, <16 x i1> %mask) {
|
|||
; KNL-NEXT: vpslld $31, %zmm2, %zmm2
|
||||
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1
|
||||
; KNL-NEXT: kshiftrw $8, %k1, %k2
|
||||
; KNL-NEXT: vcompresspd %zmm0, (%rdi) {%k1}
|
||||
; KNL-NEXT: kmovw %k1, %eax
|
||||
; KNL-NEXT: movzbl %al, %eax
|
||||
; KNL-NEXT: popcntl %eax, %eax
|
||||
; KNL-NEXT: vcompresspd %zmm1, (%rdi,%rax,8) {%k2}
|
||||
; KNL-NEXT: vcompresspd %zmm0, (%rdi) {%k1}
|
||||
; KNL-NEXT: retq
|
||||
call void @llvm.masked.compressstore.v16f64(<16 x double> %V, double* %base, <16 x i1> %mask)
|
||||
ret void
|
||||
|
|
|
@ -9,8 +9,8 @@ define void @test_v16f() local_unnamed_addr {
|
|||
; CHECK: bb.0.bb:
|
||||
; CHECK: [[AVX_SET0_:%[0-9]+]]:vr256 = AVX_SET0
|
||||
; CHECK: [[VMASKMOVPSYrm:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[AVX_SET0_]], %stack.0.stack_input_vec, 1, $noreg, 0, $noreg :: (load 32 from %ir.stack_input_vec, align 4)
|
||||
; CHECK: [[VMASKMOVPSYrm1:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[AVX_SET0_]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load 32 from %ir.stack_input_vec, align 4)
|
||||
; CHECK: VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[AVX_SET0_]], killed [[VMASKMOVPSYrm1]] :: (store 32 into %ir.stack_output_vec, align 4)
|
||||
; CHECK: [[VMASKMOVPSYrm1:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[AVX_SET0_]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load 32 from %ir.stack_input_vec + 32, align 4)
|
||||
; CHECK: VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[AVX_SET0_]], killed [[VMASKMOVPSYrm1]] :: (store 32 into %ir.stack_output_vec + 32, align 4)
|
||||
; CHECK: VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 0, $noreg, [[AVX_SET0_]], killed [[VMASKMOVPSYrm]] :: (store 32 into %ir.stack_output_vec, align 4)
|
||||
; CHECK: RET 0
|
||||
bb:
|
||||
|
@ -29,8 +29,8 @@ define void @test_v8d() local_unnamed_addr {
|
|||
; CHECK: bb.0.bb:
|
||||
; CHECK: [[AVX_SET0_:%[0-9]+]]:vr256 = AVX_SET0
|
||||
; CHECK: [[VMASKMOVPDYrm:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[AVX_SET0_]], %stack.0.stack_input_vec, 1, $noreg, 0, $noreg :: (load 32 from %ir.stack_input_vec, align 4)
|
||||
; CHECK: [[VMASKMOVPDYrm1:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[AVX_SET0_]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load 32 from %ir.stack_input_vec, align 4)
|
||||
; CHECK: VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[AVX_SET0_]], killed [[VMASKMOVPDYrm1]] :: (store 32 into %ir.stack_output_vec, align 4)
|
||||
; CHECK: [[VMASKMOVPDYrm1:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[AVX_SET0_]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load 32 from %ir.stack_input_vec + 32, align 4)
|
||||
; CHECK: VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[AVX_SET0_]], killed [[VMASKMOVPDYrm1]] :: (store 32 into %ir.stack_output_vec + 32, align 4)
|
||||
; CHECK: VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 0, $noreg, [[AVX_SET0_]], killed [[VMASKMOVPDYrm]] :: (store 32 into %ir.stack_output_vec, align 4)
|
||||
; CHECK: RET 0
|
||||
bb:
|
||||
|
|
Loading…
Reference in New Issue