Rewrite fast-isel integer cast handling to handle more cases, and to be simpler and more consistent.

The practical effects here are that x86-64 fast-isel can now handle trunc from i8 to i1, and ARM fast-isel can handle many more constructs involving integers narrower than 32 bits (including loads, stores, and many integer casts).

rdar://9437928 .

llvm-svn: 132099
This commit is contained in:
Eli Friedman 2011-05-25 23:49:02 +00:00
parent fa63d3096d
commit c70355195c
5 changed files with 205 additions and 59 deletions

View File

@ -111,8 +111,8 @@ unsigned FastISel::getRegForValue(const Value *V) {
// of whether FastISel can handle them.
MVT VT = RealVT.getSimpleVT();
if (!TLI.isTypeLegal(VT)) {
// Promote MVT::i1 to a legal type though, because it's common and easy.
if (VT == MVT::i1)
// Handle integer promotions, though, because they're common and easy.
if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT();
else
return 0;
@ -653,21 +653,13 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) {
// Unhandled type. Halt "fast" selection and bail.
return false;
// Check if the destination type is legal. Or as a special case,
// it may be i1 if we're doing a truncate because that's
// easy and somewhat common.
// Check if the destination type is legal.
if (!TLI.isTypeLegal(DstVT))
if (DstVT != MVT::i1 || Opcode != ISD::TRUNCATE)
// Unhandled type. Halt "fast" selection and bail.
return false;
return false;
// Check if the source operand is legal. Or as a special case,
// it may be i1 if we're doing zero-extension because that's
// easy and somewhat common.
// Check if the source operand is legal.
if (!TLI.isTypeLegal(SrcVT))
if (SrcVT != MVT::i1 || Opcode != ISD::ZERO_EXTEND)
// Unhandled type. Halt "fast" selection and bail.
return false;
return false;
unsigned InputReg = getRegForValue(I->getOperand(0));
if (!InputReg)
@ -676,18 +668,6 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) {
bool InputRegIsKill = hasTrivialKill(I->getOperand(0));
// If the operand is i1, arrange for the high bits in the register to be zero.
if (SrcVT == MVT::i1) {
SrcVT = TLI.getTypeToTransformTo(I->getContext(), SrcVT);
InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg, InputRegIsKill);
if (!InputReg)
return false;
InputRegIsKill = true;
}
// If the result is i1, truncate to the target's type for i1 first.
if (DstVT == MVT::i1)
DstVT = TLI.getTypeToTransformTo(I->getContext(), DstVT);
unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(),
DstVT.getSimpleVT(),
Opcode,

View File

@ -167,6 +167,7 @@ class ARMFastISel : public FastISel {
bool SelectCall(const Instruction *I);
bool SelectSelect(const Instruction *I);
bool SelectRet(const Instruction *I);
bool SelectIntCast(const Instruction *I);
// Utility routines.
private:
@ -1129,7 +1130,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
} else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
MVT SourceVT;
if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
(isTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
(isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
unsigned OpReg = getRegForValue(TI->getOperand(0));
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
@ -1948,6 +1949,77 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
}
bool ARMFastISel::SelectIntCast(const Instruction *I) {
// On ARM, in general, integer casts don't involve legal types; this code
// handles promotable integers. The high bits for a type smaller than
// the register size are assumed to be undefined.
const Type *DestTy = I->getType();
Value *Op = I->getOperand(0);
const Type *SrcTy = Op->getType();
EVT SrcVT, DestVT;
SrcVT = TLI.getValueType(SrcTy, true);
DestVT = TLI.getValueType(DestTy, true);
if (isa<TruncInst>(I)) {
if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
return false;
if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
return false;
unsigned SrcReg = getRegForValue(Op);
if (!SrcReg) return false;
// Because the high bits are undefined, a truncate doesn't generate
// any code.
UpdateValueMap(I, SrcReg);
return true;
}
if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
return false;
unsigned Opc;
bool isZext = isa<ZExtInst>(I);
bool isBoolZext = false;
switch (SrcVT.getSimpleVT().SimpleTy) {
default: return false;
case MVT::i16:
if (isZext)
Opc = isThumb ? ARM::t2UXTHr : ARM::UXTHr;
else
Opc = isThumb ? ARM::t2SXTHr : ARM::SXTHr;
break;
case MVT::i8:
if (isZext)
Opc = isThumb ? ARM::t2UXTBr : ARM::UXTBr;
else
Opc = isThumb ? ARM::t2SXTBr : ARM::SXTBr;
break;
case MVT::i1:
if (isZext) {
Opc = isThumb ? ARM::t2ANDri : ARM::ANDri;
isBoolZext = true;
break;
}
return false;
}
// FIXME: We could save an instruction in many cases by special-casing
// load instructions.
unsigned SrcReg = getRegForValue(Op);
if (!SrcReg) return false;
unsigned DestReg = createResultReg(TLI.getRegClassFor(MVT::i32));
MachineInstrBuilder MIB;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
.addReg(SrcReg);
if (isBoolZext)
MIB.addImm(1);
AddOptionalDefs(MIB);
UpdateValueMap(I, DestReg);
return true;
}
// TODO: SoftFP support.
bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
@ -1985,6 +2057,10 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
return SelectSelect(I);
case Instruction::Ret:
return SelectRet(I);
case Instruction::Trunc:
case Instruction::ZExt:
case Instruction::SExt:
return SelectIntCast(I);
default: break;
}
return false;

View File

@ -936,18 +936,31 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
bool X86FastISel::X86SelectZExt(const Instruction *I) {
// Handle zero-extension from i1 to i8, which is common.
if (I->getType()->isIntegerTy(8) &&
I->getOperand(0)->getType()->isIntegerTy(1)) {
unsigned ResultReg = getRegForValue(I->getOperand(0));
if (ResultReg == 0) return false;
// Set the high bits to zero.
ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
if (ResultReg == 0) return false;
UpdateValueMap(I, ResultReg);
return true;
if (!I->getOperand(0)->getType()->isIntegerTy(1))
return false;
EVT DstVT = TLI.getValueType(I->getType());
if (!TLI.isTypeLegal(DstVT))
return false;
unsigned ResultReg = getRegForValue(I->getOperand(0));
if (ResultReg == 0)
return false;
// Set the high bits to zero.
ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
if (ResultReg == 0)
return false;
if (DstVT != MVT::i8) {
ResultReg = FastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
ResultReg, /*Kill=*/true);
if (ResultReg == 0)
return false;
}
return false;
UpdateValueMap(I, ResultReg);
return true;
}
@ -1229,18 +1242,13 @@ bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
}
bool X86FastISel::X86SelectTrunc(const Instruction *I) {
if (Subtarget->is64Bit())
// All other cases should be handled by the tblgen generated code.
return false;
EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
EVT DstVT = TLI.getValueType(I->getType());
// This code only handles truncation to byte right now.
// This code only handles truncation to byte.
if (DstVT != MVT::i8 && DstVT != MVT::i1)
// All other cases should be handled by the tblgen generated code.
return false;
if (SrcVT != MVT::i16 && SrcVT != MVT::i32)
// All other cases should be handled by the tblgen generated code.
if (!TLI.isTypeLegal(SrcVT))
return false;
unsigned InputReg = getRegForValue(I->getOperand(0));
@ -1248,16 +1256,26 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
// Unhandled operand. Halt "fast" selection and bail.
return false;
// First issue a copy to GR16_ABCD or GR32_ABCD.
const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass;
unsigned CopyReg = createResultReg(CopyRC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
CopyReg).addReg(InputReg);
if (SrcVT == MVT::i8) {
// Truncate from i8 to i1; no code needed.
UpdateValueMap(I, InputReg);
return true;
}
// Then issue an extract_subreg.
if (!Subtarget->is64Bit()) {
// If we're on x86-32; we can't extract an i8 from a general register.
// First issue a copy to GR16_ABCD or GR32_ABCD.
const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass;
unsigned CopyReg = createResultReg(CopyRC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
CopyReg).addReg(InputReg);
InputReg = CopyReg;
}
// Issue an extract_subreg.
unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
CopyReg, /*Kill=*/true,
InputReg, /*Kill=*/true,
X86::sub_8bit);
if (!ResultReg)
return false;

View File

@ -61,3 +61,74 @@ b3:
; THUMB: orr {{.*}} #4
; ARM: orr {{.*}} #4
}
define void @test3(i32 %tmp, i32* %ptr1, i16* %ptr2, i8* %ptr3) nounwind {
; THUMB: test3:
; ARM: test3:
bb1:
%a1 = trunc i32 %tmp to i16
%a2 = trunc i16 %a1 to i8
%a3 = trunc i8 %a2 to i1
%a4 = zext i1 %a3 to i8
store i8 %a4, i8* %ptr3
%a5 = zext i8 %a4 to i16
store i16 %a5, i16* %ptr2
%a6 = zext i16 %a5 to i32
store i32 %a6, i32* %ptr1
br label %bb2
; THUMB: and
; THUMB: strb
; THUMB: uxtb
; THUMB: strh
; THUMB: uxth
; ARM: and
; ARM: strb
; ARM: uxtb
; ARM: strh
; ARM: uxth
bb2:
%b1 = trunc i32 %tmp to i16
%b2 = trunc i16 %b1 to i8
store i8 %b2, i8* %ptr3
%b3 = sext i8 %b2 to i16
store i16 %b3, i16* %ptr2
%b4 = sext i16 %b3 to i32
store i32 %b4, i32* %ptr1
br label %bb3
; THUMB: strb
; THUMB: sxtb
; THUMB: strh
; THUMB: sxth
; ARM: strb
; ARM: sxtb
; ARM: strh
; ARM: sxth
bb3:
%c1 = load i8* %ptr3
%c2 = load i16* %ptr2
%c3 = load i32* %ptr1
%c4 = zext i8 %c1 to i32
%c5 = sext i16 %c2 to i32
%c6 = add i32 %c4, %c5
%c7 = sub i32 %c3, %c6
store i32 %c7, i32* %ptr1
ret void
; THUMB: ldrb
; THUMB: ldrh
; THUMB: uxtb
; THUMB: sxth
; THUMB: add
; THUMB: sub
; ARM: ldrb
; ARM: ldrh
; ARM: uxtb
; ARM: sxth
; ARM: add
; ARM: sub
}

View File

@ -1,14 +1,15 @@
; RUN: llc < %s -march=x86 -fast-isel | FileCheck %s
; RUN: llc < %s -march=x86 -fast-isel -fast-isel-abort | FileCheck %s
; RUN: llc < %s -march=x86-64 -fast-isel -fast-isel-abort | FileCheck %s
declare i64 @test1a(i64)
declare i32 @test1a(i32)
define i32 @test1(i64 %x) nounwind {
define i32 @test1(i32 %x) nounwind {
; CHECK: test1:
; CHECK: andb $1, %
%y = add i64 %x, -3
%t = call i64 @test1a(i64 %y)
%s = mul i64 %t, 77
%z = trunc i64 %s to i1
%y = add i32 %x, -3
%t = call i32 @test1a(i32 %y)
%s = mul i32 %t, 77
%z = trunc i32 %s to i1
br label %next
next: ; preds = %0