forked from OSchip/llvm-project
X86: do not fold load instructions such as [V]MOVS[S|D] to other instructions
when the destination register is wider than the memory load. These load instructions load from m32 or m64 and set the upper bits to zero, while the folded instructions may accept m128. rdar://12721174 llvm-svn: 168710
This commit is contained in:
parent
0ee1b50949
commit
5b4628201f
|
@ -3982,6 +3982,21 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
|
|||
break;
|
||||
}
|
||||
default: {
|
||||
if ((LoadMI->getOpcode() == X86::MOVSSrm ||
|
||||
LoadMI->getOpcode() == X86::VMOVSSrm) &&
|
||||
MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize()
|
||||
> 4)
|
||||
// These instructions only load 32 bits, we can't fold them if the
|
||||
// destination register is wider than 32 bits (4 bytes).
|
||||
return NULL;
|
||||
if ((LoadMI->getOpcode() == X86::MOVSDrm ||
|
||||
LoadMI->getOpcode() == X86::VMOVSDrm) &&
|
||||
MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize()
|
||||
> 8)
|
||||
// These instructions only load 64 bits, we can't fold them if the
|
||||
// destination register is wider than 64 bits (8 bytes).
|
||||
return NULL;
|
||||
|
||||
// Folding a normal load. Just copy the load's address operands.
|
||||
unsigned NumOps = LoadMI->getDesc().getNumOperands();
|
||||
for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse41 | FileCheck %s
|
||||
|
||||
; rdar://12721174
|
||||
; We should not fold movss into pshufd since pshufd expects m128 while movss
|
||||
; loads from m32.
|
||||
define void @sample_test(<4 x float>* %source, <2 x float>* %dest) nounwind {
|
||||
; CHECK: sample_test
|
||||
; CHECK: movss
|
||||
; CHECK: pshufd
|
||||
entry:
|
||||
%source.addr = alloca <4 x float>*, align 8
|
||||
%dest.addr = alloca <2 x float>*, align 8
|
||||
%tmp = alloca <2 x float>, align 8
|
||||
store <4 x float>* %source, <4 x float>** %source.addr, align 8
|
||||
store <2 x float>* %dest, <2 x float>** %dest.addr, align 8
|
||||
store <2 x float> zeroinitializer, <2 x float>* %tmp, align 8
|
||||
%0 = load <4 x float>** %source.addr, align 8
|
||||
%arrayidx = getelementptr inbounds <4 x float>* %0, i64 0
|
||||
%1 = load <4 x float>* %arrayidx, align 16
|
||||
%2 = extractelement <4 x float> %1, i32 0
|
||||
%3 = load <2 x float>* %tmp, align 8
|
||||
%4 = insertelement <2 x float> %3, float %2, i32 1
|
||||
store <2 x float> %4, <2 x float>* %tmp, align 8
|
||||
%5 = load <2 x float>* %tmp, align 8
|
||||
%6 = load <2 x float>** %dest.addr, align 8
|
||||
%arrayidx1 = getelementptr inbounds <2 x float>* %6, i64 0
|
||||
store <2 x float> %5, <2 x float>* %arrayidx1, align 8
|
||||
%7 = load <2 x float>** %dest.addr, align 8
|
||||
%arrayidx2 = getelementptr inbounds <2 x float>* %7, i64 0
|
||||
%8 = load <2 x float>* %arrayidx2, align 8
|
||||
%vecext = extractelement <2 x float> %8, i32 0
|
||||
%9 = load <2 x float>** %dest.addr, align 8
|
||||
%arrayidx3 = getelementptr inbounds <2 x float>* %9, i64 0
|
||||
%10 = load <2 x float>* %arrayidx3, align 8
|
||||
%vecext4 = extractelement <2 x float> %10, i32 1
|
||||
call void @ext(float %vecext, float %vecext4)
|
||||
ret void
|
||||
}
|
||||
declare void @ext(float, float)
|
Loading…
Reference in New Issue