Sometimes isPredicable lies to us and tells us we don't need the operands.

Go ahead and add them on when we might want to use them and let
later passes remove them.

Fixes rdar://9118569

llvm-svn: 127518
This commit is contained in:
Eric Christopher 2011-03-12 01:09:29 +00:00
parent fcc34cacd9
commit 174d872702
2 changed files with 85 additions and 6 deletions

View File

@ -123,14 +123,15 @@ class ARMFastISel : public FastISel {
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
const ConstantFP *FPImm);
virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
uint64_t Imm);
virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill,
uint64_t Imm);
virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
uint64_t Imm);
virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
unsigned Op0, bool Op0IsKill,
uint32_t Idx);
@ -193,6 +194,7 @@ class ARMFastISel : public FastISel {
// OptionalDef handling routines.
private:
bool isARMNEONPred(const MachineInstr *MI);
bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
void AddLoadStoreOperands(EVT VT, Address &Addr,
@ -221,6 +223,21 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
return true;
}
bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
const TargetInstrDesc &TID = MI->getDesc();
// If we're a thumb2 or not NEON function we were handled via isPredicable.
if ((TID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
AFI->isThumb2Function())
return false;
for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i)
if (TID.OpInfo[i].isPredicate())
return true;
return false;
}
// If the machine is predicable go ahead and add the predicate operands, if
// it needs default CC operands add those.
// TODO: If we want to support thumb1 then we'll need to deal with optional
@ -230,10 +247,12 @@ const MachineInstrBuilder &
ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
MachineInstr *MI = &*MIB;
// Do we use a predicate?
if (TII.isPredicable(MI))
// Do we use a predicate? or...
// Are we NEON in ARM mode and have a predicate operand? If so, I know
// we're not predicable but add it anyways.
if (TII.isPredicable(MI) || isARMNEONPred(MI))
AddDefaultPred(MIB);
// Do we optionally set a predicate? Preds is size > 0 iff the predicate
// defines CPSR. All other OptionalDefines in ARM are the CCR register.
bool CPSR = false;

View File

@ -0,0 +1,60 @@
; ModuleID = 'test.c'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64-n32"
target triple = "armv7-apple-darwin"
define i32 @main() nounwind ssp {
entry:
%retval = alloca i32, align 4
%X = alloca <4 x i32>, align 16
%Y = alloca <4 x float>, align 16
store i32 0, i32* %retval
%tmp = load <4 x i32>* %X, align 16
call void @__aa(<4 x i32> %tmp, i8* null, i32 3, <4 x float>* %Y)
%0 = load i32* %retval
ret i32 %0
}
define internal void @__aa(<4 x i32> %v, i8* %p, i32 %offset, <4 x float>* %constants) nounwind inlinehint ssp {
entry:
%__a.addr.i = alloca <4 x i32>, align 16
%v.addr = alloca <4 x i32>, align 16
%p.addr = alloca i8*, align 4
%offset.addr = alloca i32, align 4
%constants.addr = alloca <4 x float>*, align 4
store <4 x i32> %v, <4 x i32>* %v.addr, align 16
store i8* %p, i8** %p.addr, align 4
store i32 %offset, i32* %offset.addr, align 4
store <4 x float>* %constants, <4 x float>** %constants.addr, align 4
%tmp = load <4 x i32>* %v.addr, align 16
store <4 x i32> %tmp, <4 x i32>* %__a.addr.i, align 16
%tmp.i = load <4 x i32>* %__a.addr.i, align 16
%0 = bitcast <4 x i32> %tmp.i to <16 x i8>
%1 = bitcast <16 x i8> %0 to <4 x i32>
%vcvt.i = sitofp <4 x i32> %1 to <4 x float>
%tmp1 = load i8** %p.addr, align 4
%tmp2 = load i32* %offset.addr, align 4
%tmp3 = load <4 x float>** %constants.addr, align 4
call void @__bb(<4 x float> %vcvt.i, i8* %tmp1, i32 %tmp2, <4 x float>* %tmp3)
ret void
}
define internal void @__bb(<4 x float> %v, i8* %p, i32 %offset, <4 x float>* %constants) nounwind inlinehint ssp {
entry:
%v.addr = alloca <4 x float>, align 16
%p.addr = alloca i8*, align 4
%offset.addr = alloca i32, align 4
%constants.addr = alloca <4 x float>*, align 4
%data = alloca i64, align 4
store <4 x float> %v, <4 x float>* %v.addr, align 16
store i8* %p, i8** %p.addr, align 4
store i32 %offset, i32* %offset.addr, align 4
store <4 x float>* %constants, <4 x float>** %constants.addr, align 4
%tmp = load i64* %data, align 4
%tmp1 = load i8** %p.addr, align 4
%tmp2 = load i32* %offset.addr, align 4
%add.ptr = getelementptr i8* %tmp1, i32 %tmp2
%0 = bitcast i8* %add.ptr to i64*
%arrayidx = getelementptr inbounds i64* %0, i32 0
store i64 %tmp, i64* %arrayidx
ret void
}