forked from OSchip/llvm-project
Sometimes isPredicable lies to us and tells us we don't need the operands.
Go ahead and add them on when we might want to use them and let later passes remove them. Fixes rdar://9118569 llvm-svn: 127518
This commit is contained in:
parent
fcc34cacd9
commit
174d872702
|
@ -123,14 +123,15 @@ class ARMFastISel : public FastISel {
|
|||
const TargetRegisterClass *RC,
|
||||
unsigned Op0, bool Op0IsKill,
|
||||
const ConstantFP *FPImm);
|
||||
virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
|
||||
const TargetRegisterClass *RC,
|
||||
uint64_t Imm);
|
||||
virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
|
||||
const TargetRegisterClass *RC,
|
||||
unsigned Op0, bool Op0IsKill,
|
||||
unsigned Op1, bool Op1IsKill,
|
||||
uint64_t Imm);
|
||||
virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
|
||||
const TargetRegisterClass *RC,
|
||||
uint64_t Imm);
|
||||
|
||||
virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
|
||||
unsigned Op0, bool Op0IsKill,
|
||||
uint32_t Idx);
|
||||
|
@ -193,6 +194,7 @@ class ARMFastISel : public FastISel {
|
|||
|
||||
// OptionalDef handling routines.
|
||||
private:
|
||||
bool isARMNEONPred(const MachineInstr *MI);
|
||||
bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
|
||||
const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
|
||||
void AddLoadStoreOperands(EVT VT, Address &Addr,
|
||||
|
@ -221,6 +223,21 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
|
||||
const TargetInstrDesc &TID = MI->getDesc();
|
||||
|
||||
// If we're a thumb2 or not NEON function we were handled via isPredicable.
|
||||
if ((TID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
|
||||
AFI->isThumb2Function())
|
||||
return false;
|
||||
|
||||
for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i)
|
||||
if (TID.OpInfo[i].isPredicate())
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// If the machine is predicable go ahead and add the predicate operands, if
|
||||
// it needs default CC operands add those.
|
||||
// TODO: If we want to support thumb1 then we'll need to deal with optional
|
||||
|
@ -230,10 +247,12 @@ const MachineInstrBuilder &
|
|||
ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
|
||||
MachineInstr *MI = &*MIB;
|
||||
|
||||
// Do we use a predicate?
|
||||
if (TII.isPredicable(MI))
|
||||
// Do we use a predicate? or...
|
||||
// Are we NEON in ARM mode and have a predicate operand? If so, I know
|
||||
// we're not predicable but add it anyways.
|
||||
if (TII.isPredicable(MI) || isARMNEONPred(MI))
|
||||
AddDefaultPred(MIB);
|
||||
|
||||
|
||||
// Do we optionally set a predicate? Preds is size > 0 iff the predicate
|
||||
// defines CPSR. All other OptionalDefines in ARM are the CCR register.
|
||||
bool CPSR = false;
|
||||
|
|
|
@ -0,0 +1,60 @@
|
|||
; ModuleID = 'test.c'
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64-n32"
|
||||
target triple = "armv7-apple-darwin"
|
||||
|
||||
define i32 @main() nounwind ssp {
|
||||
entry:
|
||||
%retval = alloca i32, align 4
|
||||
%X = alloca <4 x i32>, align 16
|
||||
%Y = alloca <4 x float>, align 16
|
||||
store i32 0, i32* %retval
|
||||
%tmp = load <4 x i32>* %X, align 16
|
||||
call void @__aa(<4 x i32> %tmp, i8* null, i32 3, <4 x float>* %Y)
|
||||
%0 = load i32* %retval
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
define internal void @__aa(<4 x i32> %v, i8* %p, i32 %offset, <4 x float>* %constants) nounwind inlinehint ssp {
|
||||
entry:
|
||||
%__a.addr.i = alloca <4 x i32>, align 16
|
||||
%v.addr = alloca <4 x i32>, align 16
|
||||
%p.addr = alloca i8*, align 4
|
||||
%offset.addr = alloca i32, align 4
|
||||
%constants.addr = alloca <4 x float>*, align 4
|
||||
store <4 x i32> %v, <4 x i32>* %v.addr, align 16
|
||||
store i8* %p, i8** %p.addr, align 4
|
||||
store i32 %offset, i32* %offset.addr, align 4
|
||||
store <4 x float>* %constants, <4 x float>** %constants.addr, align 4
|
||||
%tmp = load <4 x i32>* %v.addr, align 16
|
||||
store <4 x i32> %tmp, <4 x i32>* %__a.addr.i, align 16
|
||||
%tmp.i = load <4 x i32>* %__a.addr.i, align 16
|
||||
%0 = bitcast <4 x i32> %tmp.i to <16 x i8>
|
||||
%1 = bitcast <16 x i8> %0 to <4 x i32>
|
||||
%vcvt.i = sitofp <4 x i32> %1 to <4 x float>
|
||||
%tmp1 = load i8** %p.addr, align 4
|
||||
%tmp2 = load i32* %offset.addr, align 4
|
||||
%tmp3 = load <4 x float>** %constants.addr, align 4
|
||||
call void @__bb(<4 x float> %vcvt.i, i8* %tmp1, i32 %tmp2, <4 x float>* %tmp3)
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal void @__bb(<4 x float> %v, i8* %p, i32 %offset, <4 x float>* %constants) nounwind inlinehint ssp {
|
||||
entry:
|
||||
%v.addr = alloca <4 x float>, align 16
|
||||
%p.addr = alloca i8*, align 4
|
||||
%offset.addr = alloca i32, align 4
|
||||
%constants.addr = alloca <4 x float>*, align 4
|
||||
%data = alloca i64, align 4
|
||||
store <4 x float> %v, <4 x float>* %v.addr, align 16
|
||||
store i8* %p, i8** %p.addr, align 4
|
||||
store i32 %offset, i32* %offset.addr, align 4
|
||||
store <4 x float>* %constants, <4 x float>** %constants.addr, align 4
|
||||
%tmp = load i64* %data, align 4
|
||||
%tmp1 = load i8** %p.addr, align 4
|
||||
%tmp2 = load i32* %offset.addr, align 4
|
||||
%add.ptr = getelementptr i8* %tmp1, i32 %tmp2
|
||||
%0 = bitcast i8* %add.ptr to i64*
|
||||
%arrayidx = getelementptr inbounds i64* %0, i32 0
|
||||
store i64 %tmp, i64* %arrayidx
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue