forked from OSchip/llvm-project
X86: change zext moves to use sub-register infrastructure.
32-bit writes on amd64 zero out the high bits of the corresponding 64-bit register. LLVM makes use of this for zero-extension, but until now relied on custom MCLowering and other code to fixup instructions. Now we have proper handling of sub-registers, this can be done by creating SUBREG_TO_REG instructions at selection-time. Should be no change in functionality. llvm-svn: 182921
This commit is contained in:
parent
46af5a2cdc
commit
04eb4234fc
|
@ -1005,10 +1005,6 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool X86FastISel::X86SelectZExt(const Instruction *I) {
|
bool X86FastISel::X86SelectZExt(const Instruction *I) {
|
||||||
// Handle zero-extension from i1 to i8, which is common.
|
|
||||||
if (!I->getOperand(0)->getType()->isIntegerTy(1))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
EVT DstVT = TLI.getValueType(I->getType());
|
EVT DstVT = TLI.getValueType(I->getType());
|
||||||
if (!TLI.isTypeLegal(DstVT))
|
if (!TLI.isTypeLegal(DstVT))
|
||||||
return false;
|
return false;
|
||||||
|
@ -1017,12 +1013,37 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
|
||||||
if (ResultReg == 0)
|
if (ResultReg == 0)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Set the high bits to zero.
|
// Handle zero-extension from i1 to i8, which is common.
|
||||||
ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
|
MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()).getSimpleVT();
|
||||||
if (ResultReg == 0)
|
if (SrcVT.SimpleTy == MVT::i1) {
|
||||||
return false;
|
// Set the high bits to zero.
|
||||||
|
ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
|
||||||
|
SrcVT = MVT::i8;
|
||||||
|
|
||||||
if (DstVT != MVT::i8) {
|
if (ResultReg == 0)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (DstVT == MVT::i64) {
|
||||||
|
// Handle extension to 64-bits via sub-register shenanigans.
|
||||||
|
unsigned MovInst;
|
||||||
|
|
||||||
|
switch (SrcVT.SimpleTy) {
|
||||||
|
case MVT::i8: MovInst = X86::MOVZX32rr8; break;
|
||||||
|
case MVT::i16: MovInst = X86::MOVZX32rr16; break;
|
||||||
|
case MVT::i32: MovInst = X86::MOV32rr; break;
|
||||||
|
default: llvm_unreachable("Unexpected zext to i64 source type");
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned Result32 = createResultReg(&X86::GR32RegClass);
|
||||||
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovInst), Result32)
|
||||||
|
.addReg(ResultReg);
|
||||||
|
|
||||||
|
ResultReg = createResultReg(&X86::GR64RegClass);
|
||||||
|
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::SUBREG_TO_REG),
|
||||||
|
ResultReg)
|
||||||
|
.addImm(0).addReg(Result32).addImm(X86::sub_32bit);
|
||||||
|
} else if (DstVT != MVT::i8) {
|
||||||
ResultReg = FastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
|
ResultReg = FastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
|
||||||
ResultReg, /*Kill=*/true);
|
ResultReg, /*Kill=*/true);
|
||||||
if (ResultReg == 0)
|
if (ResultReg == 0)
|
||||||
|
|
|
@ -1119,7 +1119,8 @@ defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
|
||||||
def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
|
def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
|
||||||
def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
|
def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
|
||||||
def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
|
def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
|
||||||
def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
|
def : Pat<(zextloadi64i1 addr:$src),
|
||||||
|
(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
|
||||||
|
|
||||||
// extload bool -> extload byte
|
// extload bool -> extload byte
|
||||||
// When extloading from 16-bit and smaller memory locations into 64-bit
|
// When extloading from 16-bit and smaller memory locations into 64-bit
|
||||||
|
@ -1133,14 +1134,16 @@ def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
|
||||||
def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;
|
def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;
|
||||||
def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
|
def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
|
||||||
|
|
||||||
def : Pat<(extloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
|
|
||||||
def : Pat<(extloadi64i8 addr:$src), (MOVZX64rm8 addr:$src)>;
|
|
||||||
def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
|
|
||||||
// For other extloads, use subregs, since the high contents of the register are
|
// For other extloads, use subregs, since the high contents of the register are
|
||||||
// defined after an extload.
|
// defined after an extload.
|
||||||
|
def : Pat<(extloadi64i1 addr:$src),
|
||||||
|
(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
|
||||||
|
def : Pat<(extloadi64i8 addr:$src),
|
||||||
|
(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
|
||||||
|
def : Pat<(extloadi64i16 addr:$src),
|
||||||
|
(SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>;
|
||||||
def : Pat<(extloadi64i32 addr:$src),
|
def : Pat<(extloadi64i32 addr:$src),
|
||||||
(SUBREG_TO_REG (i64 0), (MOV32rm addr:$src),
|
(SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>;
|
||||||
sub_32bit)>;
|
|
||||||
|
|
||||||
// anyext. Define these to do an explicit zero-extend to
|
// anyext. Define these to do an explicit zero-extend to
|
||||||
// avoid partial-register updates.
|
// avoid partial-register updates.
|
||||||
|
@ -1152,8 +1155,10 @@ def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
|
||||||
def : Pat<(i32 (anyext GR16:$src)),
|
def : Pat<(i32 (anyext GR16:$src)),
|
||||||
(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
|
(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
|
||||||
|
|
||||||
def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>;
|
def : Pat<(i64 (anyext GR8 :$src)),
|
||||||
def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>;
|
(SUBREG_TO_REG (i64 0), (MOVZX32rr8 GR8 :$src), sub_32bit)>;
|
||||||
|
def : Pat<(i64 (anyext GR16:$src)),
|
||||||
|
(SUBREG_TO_REG (i64 0), (MOVZX32rr16 GR16 :$src), sub_32bit)>;
|
||||||
def : Pat<(i64 (anyext GR32:$src)),
|
def : Pat<(i64 (anyext GR32:$src)),
|
||||||
(SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
|
(SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
|
||||||
|
|
||||||
|
@ -1318,13 +1323,19 @@ def : Pat<(and GR16:$src1, 0xff),
|
||||||
|
|
||||||
// r & (2^32-1) ==> movz
|
// r & (2^32-1) ==> movz
|
||||||
def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
|
def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
|
||||||
(MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
|
(SUBREG_TO_REG (i64 0),
|
||||||
|
(MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)),
|
||||||
|
sub_32bit)>;
|
||||||
// r & (2^16-1) ==> movz
|
// r & (2^16-1) ==> movz
|
||||||
def : Pat<(and GR64:$src, 0xffff),
|
def : Pat<(and GR64:$src, 0xffff),
|
||||||
(MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>;
|
(SUBREG_TO_REG (i64 0),
|
||||||
|
(MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))),
|
||||||
|
sub_32bit)>;
|
||||||
// r & (2^8-1) ==> movz
|
// r & (2^8-1) ==> movz
|
||||||
def : Pat<(and GR64:$src, 0xff),
|
def : Pat<(and GR64:$src, 0xff),
|
||||||
(MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>;
|
(SUBREG_TO_REG (i64 0),
|
||||||
|
(MOVZX32rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit))),
|
||||||
|
sub_32bit)>;
|
||||||
// r & (2^8-1) ==> movz
|
// r & (2^8-1) ==> movz
|
||||||
def : Pat<(and GR32:$src1, 0xff),
|
def : Pat<(and GR32:$src1, 0xff),
|
||||||
(MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,
|
(MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,
|
||||||
|
|
|
@ -149,38 +149,24 @@ def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
|
||||||
"movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
|
"movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
|
||||||
TB, Sched<[WriteALULd]>;
|
TB, Sched<[WriteALULd]>;
|
||||||
|
|
||||||
// FIXME: These should be Pat patterns.
|
// 64-bit zero-extension patterns use SUBREG_TO_REG and an operation writing a
|
||||||
let isCodeGenOnly = 1 in {
|
// 32-bit register.
|
||||||
|
def : Pat<(i64 (zext GR8:$src)),
|
||||||
|
(SUBREG_TO_REG (i64 0), (MOVZX32rr8 GR8:$src), sub_32bit)>;
|
||||||
|
def : Pat<(zextloadi64i8 addr:$src),
|
||||||
|
(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
|
||||||
|
|
||||||
// Use movzbl instead of movzbq when the destination is a register; it's
|
def : Pat<(i64 (zext GR16:$src)),
|
||||||
// equivalent due to implicit zero-extending, and it has a smaller encoding.
|
(SUBREG_TO_REG (i64 0), (MOVZX32rr16 GR16:$src), sub_32bit)>;
|
||||||
def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
|
def : Pat<(zextloadi64i16 addr:$src),
|
||||||
"", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB,
|
(SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>;
|
||||||
Sched<[WriteALU]>;
|
|
||||||
def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
|
|
||||||
"", [(set GR64:$dst, (zextloadi64i8 addr:$src))], IIC_MOVZX>,
|
|
||||||
TB, Sched<[WriteALULd]>;
|
|
||||||
// Use movzwl instead of movzwq when the destination is a register; it's
|
|
||||||
// equivalent due to implicit zero-extending, and it has a smaller encoding.
|
|
||||||
def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
|
|
||||||
"", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB,
|
|
||||||
Sched<[WriteALU]>;
|
|
||||||
def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
|
|
||||||
"", [(set GR64:$dst, (zextloadi64i16 addr:$src))],
|
|
||||||
IIC_MOVZX>, TB, Sched<[WriteALULd]>;
|
|
||||||
|
|
||||||
// There's no movzlq instruction, but movl can be used for this purpose, using
|
|
||||||
// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero
|
|
||||||
// extension on x86-64 is to use a SUBREG_TO_REG to utilize implicit
|
|
||||||
// zero-extension, however this isn't possible when the 32-bit value is
|
|
||||||
// defined by a truncate or is copied from something where the high bits aren't
|
|
||||||
// necessarily all zero. In such cases, we fall back to these explicit zext
|
|
||||||
// instructions.
|
|
||||||
def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
|
|
||||||
"", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>,
|
|
||||||
Sched<[WriteALU]>;
|
|
||||||
def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
|
|
||||||
"", [(set GR64:$dst, (zextloadi64i32 addr:$src))],
|
|
||||||
IIC_MOVZX>, Sched<[WriteALULd]>;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
// The preferred way to do 32-bit-to-64-bit zero extension on x86-64 is to use a
|
||||||
|
// SUBREG_TO_REG to utilize implicit zero-extension, however this isn't possible
|
||||||
|
// when the 32-bit value is defined by a truncate or is copied from something
|
||||||
|
// where the high bits aren't necessarily all zero. In such cases, we fall back
|
||||||
|
// to these explicit zext instructions.
|
||||||
|
def : Pat<(i64 (zext GR32:$src)),
|
||||||
|
(SUBREG_TO_REG (i64 0), (MOV32rr GR32:$src), sub_32bit)>;
|
||||||
|
def : Pat<(i64 (zextloadi64i32 addr:$src)),
|
||||||
|
(SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>;
|
||||||
|
|
|
@ -451,9 +451,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
||||||
{ X86::MOVZX32rr16, X86::MOVZX32rm16, 0 },
|
{ X86::MOVZX32rr16, X86::MOVZX32rm16, 0 },
|
||||||
{ X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 },
|
{ X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 },
|
||||||
{ X86::MOVZX32rr8, X86::MOVZX32rm8, 0 },
|
{ X86::MOVZX32rr8, X86::MOVZX32rm8, 0 },
|
||||||
{ X86::MOVZX64rr16, X86::MOVZX64rm16, 0 },
|
|
||||||
{ X86::MOVZX64rr32, X86::MOVZX64rm32, 0 },
|
|
||||||
{ X86::MOVZX64rr8, X86::MOVZX64rm8, 0 },
|
|
||||||
{ X86::PABSBrr128, X86::PABSBrm128, TB_ALIGN_16 },
|
{ X86::PABSBrr128, X86::PABSBrm128, TB_ALIGN_16 },
|
||||||
{ X86::PABSDrr128, X86::PABSDrm128, TB_ALIGN_16 },
|
{ X86::PABSDrr128, X86::PABSDrm128, TB_ALIGN_16 },
|
||||||
{ X86::PABSWrr128, X86::PABSWrm128, TB_ALIGN_16 },
|
{ X86::PABSWrr128, X86::PABSWrm128, TB_ALIGN_16 },
|
||||||
|
@ -1381,7 +1378,6 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
|
||||||
case X86::MOVSX32rr8:
|
case X86::MOVSX32rr8:
|
||||||
case X86::MOVZX32rr8:
|
case X86::MOVZX32rr8:
|
||||||
case X86::MOVSX64rr8:
|
case X86::MOVSX64rr8:
|
||||||
case X86::MOVZX64rr8:
|
|
||||||
if (!TM.getSubtarget<X86Subtarget>().is64Bit())
|
if (!TM.getSubtarget<X86Subtarget>().is64Bit())
|
||||||
// It's not always legal to reference the low 8-bit of the larger
|
// It's not always legal to reference the low 8-bit of the larger
|
||||||
// register in 32-bit mode.
|
// register in 32-bit mode.
|
||||||
|
@ -1389,9 +1385,7 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
|
||||||
case X86::MOVSX32rr16:
|
case X86::MOVSX32rr16:
|
||||||
case X86::MOVZX32rr16:
|
case X86::MOVZX32rr16:
|
||||||
case X86::MOVSX64rr16:
|
case X86::MOVSX64rr16:
|
||||||
case X86::MOVZX64rr16:
|
case X86::MOVSX64rr32: {
|
||||||
case X86::MOVSX64rr32:
|
|
||||||
case X86::MOVZX64rr32: {
|
|
||||||
if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
|
if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
|
||||||
// Be conservative.
|
// Be conservative.
|
||||||
return false;
|
return false;
|
||||||
|
@ -1404,17 +1398,14 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
|
||||||
case X86::MOVSX32rr8:
|
case X86::MOVSX32rr8:
|
||||||
case X86::MOVZX32rr8:
|
case X86::MOVZX32rr8:
|
||||||
case X86::MOVSX64rr8:
|
case X86::MOVSX64rr8:
|
||||||
case X86::MOVZX64rr8:
|
|
||||||
SubIdx = X86::sub_8bit;
|
SubIdx = X86::sub_8bit;
|
||||||
break;
|
break;
|
||||||
case X86::MOVSX32rr16:
|
case X86::MOVSX32rr16:
|
||||||
case X86::MOVZX32rr16:
|
case X86::MOVZX32rr16:
|
||||||
case X86::MOVSX64rr16:
|
case X86::MOVSX64rr16:
|
||||||
case X86::MOVZX64rr16:
|
|
||||||
SubIdx = X86::sub_16bit;
|
SubIdx = X86::sub_16bit;
|
||||||
break;
|
break;
|
||||||
case X86::MOVSX64rr32:
|
case X86::MOVSX64rr32:
|
||||||
case X86::MOVZX64rr32:
|
|
||||||
SubIdx = X86::sub_32bit;
|
SubIdx = X86::sub_32bit;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -388,13 +388,7 @@ ReSimplify:
|
||||||
assert(OutMI.getOperand(1+X86::AddrSegmentReg).getReg() == 0 &&
|
assert(OutMI.getOperand(1+X86::AddrSegmentReg).getReg() == 0 &&
|
||||||
"LEA has segment specified!");
|
"LEA has segment specified!");
|
||||||
break;
|
break;
|
||||||
case X86::MOVZX64rr32: LowerSubReg32_Op0(OutMI, X86::MOV32rr); break;
|
|
||||||
case X86::MOVZX64rm32: LowerSubReg32_Op0(OutMI, X86::MOV32rm); break;
|
|
||||||
case X86::MOV64ri64i32: LowerSubReg32_Op0(OutMI, X86::MOV32ri); break;
|
case X86::MOV64ri64i32: LowerSubReg32_Op0(OutMI, X86::MOV32ri); break;
|
||||||
case X86::MOVZX64rr8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rr8); break;
|
|
||||||
case X86::MOVZX64rm8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm8); break;
|
|
||||||
case X86::MOVZX64rr16: LowerSubReg32_Op0(OutMI, X86::MOVZX32rr16); break;
|
|
||||||
case X86::MOVZX64rm16: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm16); break;
|
|
||||||
case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break;
|
case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break;
|
||||||
case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break;
|
case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue