Rewrite fast-isel integer cast handling to handle more cases, and to be simpler and more consistent.

The practical effects here are that x86-64 fast-isel can now handle trunc from i8 to i1, and ARM fast-isel can handle many more constructs involving integers narrower than 32 bits (including loads, stores, and many integer casts). rdar://9437928 . llvm-svn: 132099
2011-05-25 23:49:02 +00:00 · 2011-05-25 23:49:02 +00:00 · c70355195c
parent fa63d3096d
commit c70355195c
5 changed files with 205 additions and 59 deletions
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@ -111,8 +111,8 @@ unsigned FastISel::getRegForValue(const Value *V) {
  // of whether FastISel can handle them.
  MVT VT = RealVT.getSimpleVT();
  if (!TLI.isTypeLegal(VT)) {
-    // Promote MVT::i1 to a legal type though, because it's common and easy.
-    if (VT == MVT::i1)
+    // Handle integer promotions, though, because they're common and easy.
+    if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
      VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT();
    else
      return 0;
@ -653,21 +653,13 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) {
    // Unhandled type. Halt "fast" selection and bail.
    return false;

-  // Check if the destination type is legal. Or as a special case,
-  // it may be i1 if we're doing a truncate because that's
-  // easy and somewhat common.
+  // Check if the destination type is legal.
  if (!TLI.isTypeLegal(DstVT))
-    if (DstVT != MVT::i1 || Opcode != ISD::TRUNCATE)
-      // Unhandled type. Halt "fast" selection and bail.
-      return false;
+    return false;

-  // Check if the source operand is legal. Or as a special case,
-  // it may be i1 if we're doing zero-extension because that's
-  // easy and somewhat common.
+  // Check if the source operand is legal.
  if (!TLI.isTypeLegal(SrcVT))
-    if (SrcVT != MVT::i1 || Opcode != ISD::ZERO_EXTEND)
-      // Unhandled type. Halt "fast" selection and bail.
-      return false;
+    return false;

  unsigned InputReg = getRegForValue(I->getOperand(0));
  if (!InputReg)
@ -676,18 +668,6 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) {

  bool InputRegIsKill = hasTrivialKill(I->getOperand(0));

-  // If the operand is i1, arrange for the high bits in the register to be zero.
-  if (SrcVT == MVT::i1) {
-   SrcVT = TLI.getTypeToTransformTo(I->getContext(), SrcVT);
-   InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg, InputRegIsKill);
-   if (!InputReg)
-     return false;
-   InputRegIsKill = true;
-  }
-  // If the result is i1, truncate to the target's type for i1 first.
-  if (DstVT == MVT::i1)
-    DstVT = TLI.getTypeToTransformTo(I->getContext(), DstVT);
-
  unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(),
                                  DstVT.getSimpleVT(),
                                  Opcode,
--- a/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/llvm/lib/Target/ARM/ARMFastISel.cpp
@ -167,6 +167,7 @@ class ARMFastISel : public FastISel {
    bool SelectCall(const Instruction *I);
    bool SelectSelect(const Instruction *I);
    bool SelectRet(const Instruction *I);
+    bool SelectIntCast(const Instruction *I);

    // Utility routines.
  private:
@ -1129,7 +1130,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
    MVT SourceVT;
    if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
-        (isTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
+        (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
      unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
      unsigned OpReg = getRegForValue(TI->getOperand(0));
      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
@ -1948,6 +1949,77 @@ bool ARMFastISel::SelectCall(const Instruction *I) {

 }

+bool ARMFastISel::SelectIntCast(const Instruction *I) {
+  // On ARM, in general, integer casts don't involve legal types; this code
+  // handles promotable integers.  The high bits for a type smaller than
+  // the register size are assumed to be undefined.
+  const Type *DestTy = I->getType();
+  Value *Op = I->getOperand(0);
+  const Type *SrcTy = Op->getType();
+
+  EVT SrcVT, DestVT;
+  SrcVT = TLI.getValueType(SrcTy, true);
+  DestVT = TLI.getValueType(DestTy, true);
+
+  if (isa<TruncInst>(I)) {
+    if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
+      return false;
+    if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
+      return false;
+
+    unsigned SrcReg = getRegForValue(Op);
+    if (!SrcReg) return false;
+
+    // Because the high bits are undefined, a truncate doesn't generate
+    // any code.
+    UpdateValueMap(I, SrcReg);
+    return true;
+  } 
+  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
+    return false;
+
+  unsigned Opc;
+  bool isZext = isa<ZExtInst>(I);
+  bool isBoolZext = false;
+  switch (SrcVT.getSimpleVT().SimpleTy) {
+  default: return false;
+  case MVT::i16:
+    if (isZext)
+      Opc = isThumb ? ARM::t2UXTHr : ARM::UXTHr;
+    else
+      Opc = isThumb ? ARM::t2SXTHr : ARM::SXTHr;
+    break;
+  case MVT::i8:
+    if (isZext)
+      Opc = isThumb ? ARM::t2UXTBr : ARM::UXTBr;
+    else
+      Opc = isThumb ? ARM::t2SXTBr : ARM::SXTBr;
+    break;
+  case MVT::i1:
+    if (isZext) {
+      Opc = isThumb ? ARM::t2ANDri : ARM::ANDri;
+      isBoolZext = true;
+      break;
+    }
+    return false;
+  }
+
+  // FIXME: We could save an instruction in many cases by special-casing
+  // load instructions.
+  unsigned SrcReg = getRegForValue(Op);
+  if (!SrcReg) return false;
+
+  unsigned DestReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+  MachineInstrBuilder MIB;
+  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+        .addReg(SrcReg);
+  if (isBoolZext)
+    MIB.addImm(1);
+  AddOptionalDefs(MIB);
+  UpdateValueMap(I, DestReg);
+  return true;
+}
+
 // TODO: SoftFP support.
 bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {

@ -1985,6 +2057,10 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
      return SelectSelect(I);
    case Instruction::Ret:
      return SelectRet(I);
+    case Instruction::Trunc:
+    case Instruction::ZExt:
+    case Instruction::SExt:
+      return SelectIntCast(I);
    default: break;
  }
  return false;
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@ -936,18 +936,31 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {

 bool X86FastISel::X86SelectZExt(const Instruction *I) {
  // Handle zero-extension from i1 to i8, which is common.
-  if (I->getType()->isIntegerTy(8) &&
-      I->getOperand(0)->getType()->isIntegerTy(1)) {
-    unsigned ResultReg = getRegForValue(I->getOperand(0));
-    if (ResultReg == 0) return false;
-    // Set the high bits to zero.
-    ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
-    if (ResultReg == 0) return false;
-    UpdateValueMap(I, ResultReg);
-    return true;
+  if (!I->getOperand(0)->getType()->isIntegerTy(1)) 
+    return false;
+
+  EVT DstVT = TLI.getValueType(I->getType());
+  if (!TLI.isTypeLegal(DstVT))
+    return false;
+
+  unsigned ResultReg = getRegForValue(I->getOperand(0));
+  if (ResultReg == 0)
+    return false;
+
+  // Set the high bits to zero.
+  ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
+  if (ResultReg == 0)
+    return false;
+
+  if (DstVT != MVT::i8) {
+    ResultReg = FastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
+                           ResultReg, /*Kill=*/true);
+    if (ResultReg == 0)
+      return false;
  }

-  return false;
+  UpdateValueMap(I, ResultReg);
+  return true;
 }


@ -1229,18 +1242,13 @@ bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
 }

 bool X86FastISel::X86SelectTrunc(const Instruction *I) {
-  if (Subtarget->is64Bit())
-    // All other cases should be handled by the tblgen generated code.
-    return false;
  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
  EVT DstVT = TLI.getValueType(I->getType());

-  // This code only handles truncation to byte right now.
+  // This code only handles truncation to byte.
  if (DstVT != MVT::i8 && DstVT != MVT::i1)
-    // All other cases should be handled by the tblgen generated code.
    return false;
-  if (SrcVT != MVT::i16 && SrcVT != MVT::i32)
-    // All other cases should be handled by the tblgen generated code.
+  if (!TLI.isTypeLegal(SrcVT))
    return false;

  unsigned InputReg = getRegForValue(I->getOperand(0));
@ -1248,16 +1256,26 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
    // Unhandled operand.  Halt "fast" selection and bail.
    return false;

-  // First issue a copy to GR16_ABCD or GR32_ABCD.
-  const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
-    ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass;
-  unsigned CopyReg = createResultReg(CopyRC);
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
-          CopyReg).addReg(InputReg);
+  if (SrcVT == MVT::i8) {
+    // Truncate from i8 to i1; no code needed.
+    UpdateValueMap(I, InputReg);
+    return true;
+  }

-  // Then issue an extract_subreg.
+  if (!Subtarget->is64Bit()) {
+    // If we're on x86-32; we can't extract an i8 from a general register.
+    // First issue a copy to GR16_ABCD or GR32_ABCD.
+    const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
+      ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass;
+    unsigned CopyReg = createResultReg(CopyRC);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            CopyReg).addReg(InputReg);
+    InputReg = CopyReg;
+  }
+
+  // Issue an extract_subreg.
  unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
-                                                  CopyReg, /*Kill=*/true,
+                                                  InputReg, /*Kill=*/true,
                                                  X86::sub_8bit);
  if (!ResultReg)
    return false;
--- a/llvm/test/CodeGen/ARM/fast-isel.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel.ll
@ -61,3 +61,74 @@ b3:
 ; THUMB: orr {{.*}} #4
 ; ARM: orr {{.*}} #4
 }
+
+define void @test3(i32 %tmp, i32* %ptr1, i16* %ptr2, i8* %ptr3) nounwind {
+; THUMB: test3:
+; ARM: test3:
+
+bb1:
+  %a1 = trunc i32 %tmp to i16
+  %a2 = trunc i16 %a1 to i8
+  %a3 = trunc i8 %a2 to i1
+  %a4 = zext i1 %a3 to i8
+  store i8 %a4, i8* %ptr3
+  %a5 = zext i8 %a4 to i16
+  store i16 %a5, i16* %ptr2
+  %a6 = zext i16 %a5 to i32
+  store i32 %a6, i32* %ptr1
+  br label %bb2
+
+; THUMB: and
+; THUMB: strb
+; THUMB: uxtb
+; THUMB: strh
+; THUMB: uxth
+; ARM: and
+; ARM: strb
+; ARM: uxtb
+; ARM: strh
+; ARM: uxth
+
+bb2:
+  %b1 = trunc i32 %tmp to i16
+  %b2 = trunc i16 %b1 to i8
+  store i8 %b2, i8* %ptr3
+  %b3 = sext i8 %b2 to i16
+  store i16 %b3, i16* %ptr2
+  %b4 = sext i16 %b3 to i32
+  store i32 %b4, i32* %ptr1
+  br label %bb3
+
+; THUMB: strb
+; THUMB: sxtb
+; THUMB: strh
+; THUMB: sxth
+; ARM: strb
+; ARM: sxtb
+; ARM: strh
+; ARM: sxth
+
+bb3:
+  %c1 = load i8* %ptr3
+  %c2 = load i16* %ptr2
+  %c3 = load i32* %ptr1
+  %c4 = zext i8 %c1 to i32
+  %c5 = sext i16 %c2 to i32
+  %c6 = add i32 %c4, %c5
+  %c7 = sub i32 %c3, %c6
+  store i32 %c7, i32* %ptr1
+  ret void
+
+; THUMB: ldrb
+; THUMB: ldrh
+; THUMB: uxtb
+; THUMB: sxth
+; THUMB: add
+; THUMB: sub
+; ARM: ldrb
+; ARM: ldrh
+; ARM: uxtb
+; ARM: sxth
+; ARM: add
+; ARM: sub
+}
--- a/llvm/test/CodeGen/X86/fast-isel-i1.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-i1.ll
@ -1,14 +1,15 @@
-; RUN: llc < %s -march=x86 -fast-isel | FileCheck %s
+; RUN: llc < %s -march=x86 -fast-isel -fast-isel-abort | FileCheck %s
+; RUN: llc < %s -march=x86-64 -fast-isel -fast-isel-abort | FileCheck %s

-declare i64 @test1a(i64)
+declare i32 @test1a(i32)

-define i32 @test1(i64 %x) nounwind {
+define i32 @test1(i32 %x) nounwind {
 ; CHECK: test1:
 ; CHECK: andb $1, %
-	%y = add i64 %x, -3
-	%t = call i64 @test1a(i64 %y)
-	%s = mul i64 %t, 77
-	%z = trunc i64 %s to i1
+	%y = add i32 %x, -3
+	%t = call i32 @test1a(i32 %y)
+	%s = mul i32 %t, 77
+	%z = trunc i32 %s to i1
 	br label %next

 next:		; preds = %0