X86: change zext moves to use sub-register infrastructure.

32-bit writes on amd64 zero out the high bits of the corresponding 64-bit
register. LLVM makes use of this for zero-extension, but until now relied on
custom MCLowering and other code to fixup instructions. Now we have proper
handling of sub-registers, this can be done by creating SUBREG_TO_REG
instructions at selection-time.

Should be no change in functionality.

llvm-svn: 182921
This commit is contained in:
Tim Northover 2013-05-30 10:43:18 +00:00
parent 46af5a2cdc
commit 04eb4234fc
5 changed files with 72 additions and 69 deletions

View File

@ -1005,10 +1005,6 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
} }
bool X86FastISel::X86SelectZExt(const Instruction *I) { bool X86FastISel::X86SelectZExt(const Instruction *I) {
// Handle zero-extension from i1 to i8, which is common.
if (!I->getOperand(0)->getType()->isIntegerTy(1))
return false;
EVT DstVT = TLI.getValueType(I->getType()); EVT DstVT = TLI.getValueType(I->getType());
if (!TLI.isTypeLegal(DstVT)) if (!TLI.isTypeLegal(DstVT))
return false; return false;
@ -1017,12 +1013,37 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
if (ResultReg == 0) if (ResultReg == 0)
return false; return false;
// Set the high bits to zero. // Handle zero-extension from i1 to i8, which is common.
ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false); MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()).getSimpleVT();
if (ResultReg == 0) if (SrcVT.SimpleTy == MVT::i1) {
return false; // Set the high bits to zero.
ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
SrcVT = MVT::i8;
if (DstVT != MVT::i8) { if (ResultReg == 0)
return false;
}
if (DstVT == MVT::i64) {
// Handle extension to 64-bits via sub-register shenanigans.
unsigned MovInst;
switch (SrcVT.SimpleTy) {
case MVT::i8: MovInst = X86::MOVZX32rr8; break;
case MVT::i16: MovInst = X86::MOVZX32rr16; break;
case MVT::i32: MovInst = X86::MOV32rr; break;
default: llvm_unreachable("Unexpected zext to i64 source type");
}
unsigned Result32 = createResultReg(&X86::GR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovInst), Result32)
.addReg(ResultReg);
ResultReg = createResultReg(&X86::GR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::SUBREG_TO_REG),
ResultReg)
.addImm(0).addReg(Result32).addImm(X86::sub_32bit);
} else if (DstVT != MVT::i8) {
ResultReg = FastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND, ResultReg = FastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
ResultReg, /*Kill=*/true); ResultReg, /*Kill=*/true);
if (ResultReg == 0) if (ResultReg == 0)

View File

@ -1119,7 +1119,8 @@ defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>; def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>; def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>; def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>; def : Pat<(zextloadi64i1 addr:$src),
(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
// extload bool -> extload byte // extload bool -> extload byte
// When extloading from 16-bit and smaller memory locations into 64-bit // When extloading from 16-bit and smaller memory locations into 64-bit
@ -1133,14 +1134,16 @@ def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>; def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;
def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>; def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
def : Pat<(extloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
def : Pat<(extloadi64i8 addr:$src), (MOVZX64rm8 addr:$src)>;
def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
// For other extloads, use subregs, since the high contents of the register are // For other extloads, use subregs, since the high contents of the register are
// defined after an extload. // defined after an extload.
def : Pat<(extloadi64i1 addr:$src),
(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
def : Pat<(extloadi64i8 addr:$src),
(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
def : Pat<(extloadi64i16 addr:$src),
(SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>;
def : Pat<(extloadi64i32 addr:$src), def : Pat<(extloadi64i32 addr:$src),
(SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>;
sub_32bit)>;
// anyext. Define these to do an explicit zero-extend to // anyext. Define these to do an explicit zero-extend to
// avoid partial-register updates. // avoid partial-register updates.
@ -1152,8 +1155,10 @@ def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
def : Pat<(i32 (anyext GR16:$src)), def : Pat<(i32 (anyext GR16:$src)),
(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>; (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>; def : Pat<(i64 (anyext GR8 :$src)),
def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>; (SUBREG_TO_REG (i64 0), (MOVZX32rr8 GR8 :$src), sub_32bit)>;
def : Pat<(i64 (anyext GR16:$src)),
(SUBREG_TO_REG (i64 0), (MOVZX32rr16 GR16 :$src), sub_32bit)>;
def : Pat<(i64 (anyext GR32:$src)), def : Pat<(i64 (anyext GR32:$src)),
(SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>; (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
@ -1318,13 +1323,19 @@ def : Pat<(and GR16:$src1, 0xff),
// r & (2^32-1) ==> movz // r & (2^32-1) ==> movz
def : Pat<(and GR64:$src, 0x00000000FFFFFFFF), def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
(MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>; (SUBREG_TO_REG (i64 0),
(MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)),
sub_32bit)>;
// r & (2^16-1) ==> movz // r & (2^16-1) ==> movz
def : Pat<(and GR64:$src, 0xffff), def : Pat<(and GR64:$src, 0xffff),
(MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>; (SUBREG_TO_REG (i64 0),
(MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))),
sub_32bit)>;
// r & (2^8-1) ==> movz // r & (2^8-1) ==> movz
def : Pat<(and GR64:$src, 0xff), def : Pat<(and GR64:$src, 0xff),
(MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>; (SUBREG_TO_REG (i64 0),
(MOVZX32rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit))),
sub_32bit)>;
// r & (2^8-1) ==> movz // r & (2^8-1) ==> movz
def : Pat<(and GR32:$src1, 0xff), def : Pat<(and GR32:$src1, 0xff),
(MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>, (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,

View File

@ -149,38 +149,24 @@ def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
"movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>, "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
TB, Sched<[WriteALULd]>; TB, Sched<[WriteALULd]>;
// FIXME: These should be Pat patterns. // 64-bit zero-extension patterns use SUBREG_TO_REG and an operation writing a
let isCodeGenOnly = 1 in { // 32-bit register.
def : Pat<(i64 (zext GR8:$src)),
(SUBREG_TO_REG (i64 0), (MOVZX32rr8 GR8:$src), sub_32bit)>;
def : Pat<(zextloadi64i8 addr:$src),
(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
// Use movzbl instead of movzbq when the destination is a register; it's def : Pat<(i64 (zext GR16:$src)),
// equivalent due to implicit zero-extending, and it has a smaller encoding. (SUBREG_TO_REG (i64 0), (MOVZX32rr16 GR16:$src), sub_32bit)>;
def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), def : Pat<(zextloadi64i16 addr:$src),
"", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB, (SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>;
Sched<[WriteALU]>;
def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
"", [(set GR64:$dst, (zextloadi64i8 addr:$src))], IIC_MOVZX>,
TB, Sched<[WriteALULd]>;
// Use movzwl instead of movzwq when the destination is a register; it's
// equivalent due to implicit zero-extending, and it has a smaller encoding.
def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
"", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB,
Sched<[WriteALU]>;
def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
"", [(set GR64:$dst, (zextloadi64i16 addr:$src))],
IIC_MOVZX>, TB, Sched<[WriteALULd]>;
// There's no movzlq instruction, but movl can be used for this purpose, using
// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero
// extension on x86-64 is to use a SUBREG_TO_REG to utilize implicit
// zero-extension, however this isn't possible when the 32-bit value is
// defined by a truncate or is copied from something where the high bits aren't
// necessarily all zero. In such cases, we fall back to these explicit zext
// instructions.
def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
"", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>,
Sched<[WriteALU]>;
def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
"", [(set GR64:$dst, (zextloadi64i32 addr:$src))],
IIC_MOVZX>, Sched<[WriteALULd]>;
}
// The preferred way to do 32-bit-to-64-bit zero extension on x86-64 is to use a
// SUBREG_TO_REG to utilize implicit zero-extension, however this isn't possible
// when the 32-bit value is defined by a truncate or is copied from something
// where the high bits aren't necessarily all zero. In such cases, we fall back
// to these explicit zext instructions.
def : Pat<(i64 (zext GR32:$src)),
(SUBREG_TO_REG (i64 0), (MOV32rr GR32:$src), sub_32bit)>;
def : Pat<(i64 (zextloadi64i32 addr:$src)),
(SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>;

View File

@ -451,9 +451,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::MOVZX32rr16, X86::MOVZX32rm16, 0 }, { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 },
{ X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 }, { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 },
{ X86::MOVZX32rr8, X86::MOVZX32rm8, 0 }, { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 },
{ X86::MOVZX64rr16, X86::MOVZX64rm16, 0 },
{ X86::MOVZX64rr32, X86::MOVZX64rm32, 0 },
{ X86::MOVZX64rr8, X86::MOVZX64rm8, 0 },
{ X86::PABSBrr128, X86::PABSBrm128, TB_ALIGN_16 }, { X86::PABSBrr128, X86::PABSBrm128, TB_ALIGN_16 },
{ X86::PABSDrr128, X86::PABSDrm128, TB_ALIGN_16 }, { X86::PABSDrr128, X86::PABSDrm128, TB_ALIGN_16 },
{ X86::PABSWrr128, X86::PABSWrm128, TB_ALIGN_16 }, { X86::PABSWrr128, X86::PABSWrm128, TB_ALIGN_16 },
@ -1381,7 +1378,6 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
case X86::MOVSX32rr8: case X86::MOVSX32rr8:
case X86::MOVZX32rr8: case X86::MOVZX32rr8:
case X86::MOVSX64rr8: case X86::MOVSX64rr8:
case X86::MOVZX64rr8:
if (!TM.getSubtarget<X86Subtarget>().is64Bit()) if (!TM.getSubtarget<X86Subtarget>().is64Bit())
// It's not always legal to reference the low 8-bit of the larger // It's not always legal to reference the low 8-bit of the larger
// register in 32-bit mode. // register in 32-bit mode.
@ -1389,9 +1385,7 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
case X86::MOVSX32rr16: case X86::MOVSX32rr16:
case X86::MOVZX32rr16: case X86::MOVZX32rr16:
case X86::MOVSX64rr16: case X86::MOVSX64rr16:
case X86::MOVZX64rr16: case X86::MOVSX64rr32: {
case X86::MOVSX64rr32:
case X86::MOVZX64rr32: {
if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg()) if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
// Be conservative. // Be conservative.
return false; return false;
@ -1404,17 +1398,14 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
case X86::MOVSX32rr8: case X86::MOVSX32rr8:
case X86::MOVZX32rr8: case X86::MOVZX32rr8:
case X86::MOVSX64rr8: case X86::MOVSX64rr8:
case X86::MOVZX64rr8:
SubIdx = X86::sub_8bit; SubIdx = X86::sub_8bit;
break; break;
case X86::MOVSX32rr16: case X86::MOVSX32rr16:
case X86::MOVZX32rr16: case X86::MOVZX32rr16:
case X86::MOVSX64rr16: case X86::MOVSX64rr16:
case X86::MOVZX64rr16:
SubIdx = X86::sub_16bit; SubIdx = X86::sub_16bit;
break; break;
case X86::MOVSX64rr32: case X86::MOVSX64rr32:
case X86::MOVZX64rr32:
SubIdx = X86::sub_32bit; SubIdx = X86::sub_32bit;
break; break;
} }

View File

@ -388,13 +388,7 @@ ReSimplify:
assert(OutMI.getOperand(1+X86::AddrSegmentReg).getReg() == 0 && assert(OutMI.getOperand(1+X86::AddrSegmentReg).getReg() == 0 &&
"LEA has segment specified!"); "LEA has segment specified!");
break; break;
case X86::MOVZX64rr32: LowerSubReg32_Op0(OutMI, X86::MOV32rr); break;
case X86::MOVZX64rm32: LowerSubReg32_Op0(OutMI, X86::MOV32rm); break;
case X86::MOV64ri64i32: LowerSubReg32_Op0(OutMI, X86::MOV32ri); break; case X86::MOV64ri64i32: LowerSubReg32_Op0(OutMI, X86::MOV32ri); break;
case X86::MOVZX64rr8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rr8); break;
case X86::MOVZX64rm8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm8); break;
case X86::MOVZX64rr16: LowerSubReg32_Op0(OutMI, X86::MOVZX32rr16); break;
case X86::MOVZX64rm16: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm16); break;
case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break; case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break;
case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break; case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break;