forked from OSchip/llvm-project
[x86][slh] Move isDataInvariant* functions
Patch by Zola Bridges! From the review: """ I moved these functions to X86InstrInfo.cpp, so they are available from another pass. In addition, this is a step toward resolving the FIXME to move this metadata to the instruction tables. This is the final step to make these two data invariance checks available for non-SLH passes. The other two steps were here: - https://reviews.llvm.org/D70283 - https://reviews.llvm.org/D75650 Tested via llvm-lit llvm/test/CodeGen/X86/speculative-load-hardening* """ Differential Revision: https://reviews.llvm.org/D75654
This commit is contained in:
parent
bb0ec1daff
commit
174c3eb69f
llvm/lib/Target/X86
|
@ -135,6 +135,491 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
|
|||
return false;
|
||||
}
|
||||
|
||||
bool X86InstrInfo::isDataInvariant(MachineInstr &MI) {
|
||||
switch (MI.getOpcode()) {
|
||||
default:
|
||||
// By default, assume that the instruction is not data invariant.
|
||||
return false;
|
||||
|
||||
// Some target-independent operations that trivially lower to data-invariant
|
||||
// instructions.
|
||||
case TargetOpcode::COPY:
|
||||
case TargetOpcode::INSERT_SUBREG:
|
||||
case TargetOpcode::SUBREG_TO_REG:
|
||||
return true;
|
||||
|
||||
// On x86 it is believed that imul is constant time w.r.t. the loaded data.
|
||||
// However, they set flags and are perhaps the most surprisingly constant
|
||||
// time operations so we call them out here separately.
|
||||
case X86::IMUL16rr:
|
||||
case X86::IMUL16rri8:
|
||||
case X86::IMUL16rri:
|
||||
case X86::IMUL32rr:
|
||||
case X86::IMUL32rri8:
|
||||
case X86::IMUL32rri:
|
||||
case X86::IMUL64rr:
|
||||
case X86::IMUL64rri32:
|
||||
case X86::IMUL64rri8:
|
||||
|
||||
// Bit scanning and counting instructions that are somewhat surprisingly
|
||||
// constant time as they scan across bits and do other fairly complex
|
||||
// operations like popcnt, but are believed to be constant time on x86.
|
||||
// However, these set flags.
|
||||
case X86::BSF16rr:
|
||||
case X86::BSF32rr:
|
||||
case X86::BSF64rr:
|
||||
case X86::BSR16rr:
|
||||
case X86::BSR32rr:
|
||||
case X86::BSR64rr:
|
||||
case X86::LZCNT16rr:
|
||||
case X86::LZCNT32rr:
|
||||
case X86::LZCNT64rr:
|
||||
case X86::POPCNT16rr:
|
||||
case X86::POPCNT32rr:
|
||||
case X86::POPCNT64rr:
|
||||
case X86::TZCNT16rr:
|
||||
case X86::TZCNT32rr:
|
||||
case X86::TZCNT64rr:
|
||||
|
||||
// Bit manipulation instructions are effectively combinations of basic
|
||||
// arithmetic ops, and should still execute in constant time. These also
|
||||
// set flags.
|
||||
case X86::BLCFILL32rr:
|
||||
case X86::BLCFILL64rr:
|
||||
case X86::BLCI32rr:
|
||||
case X86::BLCI64rr:
|
||||
case X86::BLCIC32rr:
|
||||
case X86::BLCIC64rr:
|
||||
case X86::BLCMSK32rr:
|
||||
case X86::BLCMSK64rr:
|
||||
case X86::BLCS32rr:
|
||||
case X86::BLCS64rr:
|
||||
case X86::BLSFILL32rr:
|
||||
case X86::BLSFILL64rr:
|
||||
case X86::BLSI32rr:
|
||||
case X86::BLSI64rr:
|
||||
case X86::BLSIC32rr:
|
||||
case X86::BLSIC64rr:
|
||||
case X86::BLSMSK32rr:
|
||||
case X86::BLSMSK64rr:
|
||||
case X86::BLSR32rr:
|
||||
case X86::BLSR64rr:
|
||||
case X86::TZMSK32rr:
|
||||
case X86::TZMSK64rr:
|
||||
|
||||
// Bit extracting and clearing instructions should execute in constant time,
|
||||
// and set flags.
|
||||
case X86::BEXTR32rr:
|
||||
case X86::BEXTR64rr:
|
||||
case X86::BEXTRI32ri:
|
||||
case X86::BEXTRI64ri:
|
||||
case X86::BZHI32rr:
|
||||
case X86::BZHI64rr:
|
||||
|
||||
// Shift and rotate.
|
||||
case X86::ROL8r1:
|
||||
case X86::ROL16r1:
|
||||
case X86::ROL32r1:
|
||||
case X86::ROL64r1:
|
||||
case X86::ROL8rCL:
|
||||
case X86::ROL16rCL:
|
||||
case X86::ROL32rCL:
|
||||
case X86::ROL64rCL:
|
||||
case X86::ROL8ri:
|
||||
case X86::ROL16ri:
|
||||
case X86::ROL32ri:
|
||||
case X86::ROL64ri:
|
||||
case X86::ROR8r1:
|
||||
case X86::ROR16r1:
|
||||
case X86::ROR32r1:
|
||||
case X86::ROR64r1:
|
||||
case X86::ROR8rCL:
|
||||
case X86::ROR16rCL:
|
||||
case X86::ROR32rCL:
|
||||
case X86::ROR64rCL:
|
||||
case X86::ROR8ri:
|
||||
case X86::ROR16ri:
|
||||
case X86::ROR32ri:
|
||||
case X86::ROR64ri:
|
||||
case X86::SAR8r1:
|
||||
case X86::SAR16r1:
|
||||
case X86::SAR32r1:
|
||||
case X86::SAR64r1:
|
||||
case X86::SAR8rCL:
|
||||
case X86::SAR16rCL:
|
||||
case X86::SAR32rCL:
|
||||
case X86::SAR64rCL:
|
||||
case X86::SAR8ri:
|
||||
case X86::SAR16ri:
|
||||
case X86::SAR32ri:
|
||||
case X86::SAR64ri:
|
||||
case X86::SHL8r1:
|
||||
case X86::SHL16r1:
|
||||
case X86::SHL32r1:
|
||||
case X86::SHL64r1:
|
||||
case X86::SHL8rCL:
|
||||
case X86::SHL16rCL:
|
||||
case X86::SHL32rCL:
|
||||
case X86::SHL64rCL:
|
||||
case X86::SHL8ri:
|
||||
case X86::SHL16ri:
|
||||
case X86::SHL32ri:
|
||||
case X86::SHL64ri:
|
||||
case X86::SHR8r1:
|
||||
case X86::SHR16r1:
|
||||
case X86::SHR32r1:
|
||||
case X86::SHR64r1:
|
||||
case X86::SHR8rCL:
|
||||
case X86::SHR16rCL:
|
||||
case X86::SHR32rCL:
|
||||
case X86::SHR64rCL:
|
||||
case X86::SHR8ri:
|
||||
case X86::SHR16ri:
|
||||
case X86::SHR32ri:
|
||||
case X86::SHR64ri:
|
||||
case X86::SHLD16rrCL:
|
||||
case X86::SHLD32rrCL:
|
||||
case X86::SHLD64rrCL:
|
||||
case X86::SHLD16rri8:
|
||||
case X86::SHLD32rri8:
|
||||
case X86::SHLD64rri8:
|
||||
case X86::SHRD16rrCL:
|
||||
case X86::SHRD32rrCL:
|
||||
case X86::SHRD64rrCL:
|
||||
case X86::SHRD16rri8:
|
||||
case X86::SHRD32rri8:
|
||||
case X86::SHRD64rri8:
|
||||
|
||||
// Basic arithmetic is constant time on the input but does set flags.
|
||||
case X86::ADC8rr:
|
||||
case X86::ADC8ri:
|
||||
case X86::ADC16rr:
|
||||
case X86::ADC16ri:
|
||||
case X86::ADC16ri8:
|
||||
case X86::ADC32rr:
|
||||
case X86::ADC32ri:
|
||||
case X86::ADC32ri8:
|
||||
case X86::ADC64rr:
|
||||
case X86::ADC64ri8:
|
||||
case X86::ADC64ri32:
|
||||
case X86::ADD8rr:
|
||||
case X86::ADD8ri:
|
||||
case X86::ADD16rr:
|
||||
case X86::ADD16ri:
|
||||
case X86::ADD16ri8:
|
||||
case X86::ADD32rr:
|
||||
case X86::ADD32ri:
|
||||
case X86::ADD32ri8:
|
||||
case X86::ADD64rr:
|
||||
case X86::ADD64ri8:
|
||||
case X86::ADD64ri32:
|
||||
case X86::AND8rr:
|
||||
case X86::AND8ri:
|
||||
case X86::AND16rr:
|
||||
case X86::AND16ri:
|
||||
case X86::AND16ri8:
|
||||
case X86::AND32rr:
|
||||
case X86::AND32ri:
|
||||
case X86::AND32ri8:
|
||||
case X86::AND64rr:
|
||||
case X86::AND64ri8:
|
||||
case X86::AND64ri32:
|
||||
case X86::OR8rr:
|
||||
case X86::OR8ri:
|
||||
case X86::OR16rr:
|
||||
case X86::OR16ri:
|
||||
case X86::OR16ri8:
|
||||
case X86::OR32rr:
|
||||
case X86::OR32ri:
|
||||
case X86::OR32ri8:
|
||||
case X86::OR64rr:
|
||||
case X86::OR64ri8:
|
||||
case X86::OR64ri32:
|
||||
case X86::SBB8rr:
|
||||
case X86::SBB8ri:
|
||||
case X86::SBB16rr:
|
||||
case X86::SBB16ri:
|
||||
case X86::SBB16ri8:
|
||||
case X86::SBB32rr:
|
||||
case X86::SBB32ri:
|
||||
case X86::SBB32ri8:
|
||||
case X86::SBB64rr:
|
||||
case X86::SBB64ri8:
|
||||
case X86::SBB64ri32:
|
||||
case X86::SUB8rr:
|
||||
case X86::SUB8ri:
|
||||
case X86::SUB16rr:
|
||||
case X86::SUB16ri:
|
||||
case X86::SUB16ri8:
|
||||
case X86::SUB32rr:
|
||||
case X86::SUB32ri:
|
||||
case X86::SUB32ri8:
|
||||
case X86::SUB64rr:
|
||||
case X86::SUB64ri8:
|
||||
case X86::SUB64ri32:
|
||||
case X86::XOR8rr:
|
||||
case X86::XOR8ri:
|
||||
case X86::XOR16rr:
|
||||
case X86::XOR16ri:
|
||||
case X86::XOR16ri8:
|
||||
case X86::XOR32rr:
|
||||
case X86::XOR32ri:
|
||||
case X86::XOR32ri8:
|
||||
case X86::XOR64rr:
|
||||
case X86::XOR64ri8:
|
||||
case X86::XOR64ri32:
|
||||
// Arithmetic with just 32-bit and 64-bit variants and no immediates.
|
||||
case X86::ADCX32rr:
|
||||
case X86::ADCX64rr:
|
||||
case X86::ADOX32rr:
|
||||
case X86::ADOX64rr:
|
||||
case X86::ANDN32rr:
|
||||
case X86::ANDN64rr:
|
||||
// Unary arithmetic operations.
|
||||
case X86::DEC8r:
|
||||
case X86::DEC16r:
|
||||
case X86::DEC32r:
|
||||
case X86::DEC64r:
|
||||
case X86::INC8r:
|
||||
case X86::INC16r:
|
||||
case X86::INC32r:
|
||||
case X86::INC64r:
|
||||
case X86::NEG8r:
|
||||
case X86::NEG16r:
|
||||
case X86::NEG32r:
|
||||
case X86::NEG64r:
|
||||
|
||||
// Unlike other arithmetic, NOT doesn't set EFLAGS.
|
||||
case X86::NOT8r:
|
||||
case X86::NOT16r:
|
||||
case X86::NOT32r:
|
||||
case X86::NOT64r:
|
||||
|
||||
// Various move instructions used to zero or sign extend things. Note that we
|
||||
// intentionally don't support the _NOREX variants as we can't handle that
|
||||
// register constraint anyways.
|
||||
case X86::MOVSX16rr8:
|
||||
case X86::MOVSX32rr8:
|
||||
case X86::MOVSX32rr16:
|
||||
case X86::MOVSX64rr8:
|
||||
case X86::MOVSX64rr16:
|
||||
case X86::MOVSX64rr32:
|
||||
case X86::MOVZX16rr8:
|
||||
case X86::MOVZX32rr8:
|
||||
case X86::MOVZX32rr16:
|
||||
case X86::MOVZX64rr8:
|
||||
case X86::MOVZX64rr16:
|
||||
case X86::MOV32rr:
|
||||
|
||||
// Arithmetic instructions that are both constant time and don't set flags.
|
||||
case X86::RORX32ri:
|
||||
case X86::RORX64ri:
|
||||
case X86::SARX32rr:
|
||||
case X86::SARX64rr:
|
||||
case X86::SHLX32rr:
|
||||
case X86::SHLX64rr:
|
||||
case X86::SHRX32rr:
|
||||
case X86::SHRX64rr:
|
||||
|
||||
// LEA doesn't actually access memory, and its arithmetic is constant time.
|
||||
case X86::LEA16r:
|
||||
case X86::LEA32r:
|
||||
case X86::LEA64_32r:
|
||||
case X86::LEA64r:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool X86InstrInfo::isDataInvariantLoad(MachineInstr &MI) {
|
||||
switch (MI.getOpcode()) {
|
||||
default:
|
||||
// By default, assume that the load will immediately leak.
|
||||
return false;
|
||||
|
||||
// On x86 it is believed that imul is constant time w.r.t. the loaded data.
|
||||
// However, they set flags and are perhaps the most surprisingly constant
|
||||
// time operations so we call them out here separately.
|
||||
case X86::IMUL16rm:
|
||||
case X86::IMUL16rmi8:
|
||||
case X86::IMUL16rmi:
|
||||
case X86::IMUL32rm:
|
||||
case X86::IMUL32rmi8:
|
||||
case X86::IMUL32rmi:
|
||||
case X86::IMUL64rm:
|
||||
case X86::IMUL64rmi32:
|
||||
case X86::IMUL64rmi8:
|
||||
|
||||
// Bit scanning and counting instructions that are somewhat surprisingly
|
||||
// constant time as they scan across bits and do other fairly complex
|
||||
// operations like popcnt, but are believed to be constant time on x86.
|
||||
// However, these set flags.
|
||||
case X86::BSF16rm:
|
||||
case X86::BSF32rm:
|
||||
case X86::BSF64rm:
|
||||
case X86::BSR16rm:
|
||||
case X86::BSR32rm:
|
||||
case X86::BSR64rm:
|
||||
case X86::LZCNT16rm:
|
||||
case X86::LZCNT32rm:
|
||||
case X86::LZCNT64rm:
|
||||
case X86::POPCNT16rm:
|
||||
case X86::POPCNT32rm:
|
||||
case X86::POPCNT64rm:
|
||||
case X86::TZCNT16rm:
|
||||
case X86::TZCNT32rm:
|
||||
case X86::TZCNT64rm:
|
||||
|
||||
// Bit manipulation instructions are effectively combinations of basic
|
||||
// arithmetic ops, and should still execute in constant time. These also
|
||||
// set flags.
|
||||
case X86::BLCFILL32rm:
|
||||
case X86::BLCFILL64rm:
|
||||
case X86::BLCI32rm:
|
||||
case X86::BLCI64rm:
|
||||
case X86::BLCIC32rm:
|
||||
case X86::BLCIC64rm:
|
||||
case X86::BLCMSK32rm:
|
||||
case X86::BLCMSK64rm:
|
||||
case X86::BLCS32rm:
|
||||
case X86::BLCS64rm:
|
||||
case X86::BLSFILL32rm:
|
||||
case X86::BLSFILL64rm:
|
||||
case X86::BLSI32rm:
|
||||
case X86::BLSI64rm:
|
||||
case X86::BLSIC32rm:
|
||||
case X86::BLSIC64rm:
|
||||
case X86::BLSMSK32rm:
|
||||
case X86::BLSMSK64rm:
|
||||
case X86::BLSR32rm:
|
||||
case X86::BLSR64rm:
|
||||
case X86::TZMSK32rm:
|
||||
case X86::TZMSK64rm:
|
||||
|
||||
// Bit extracting and clearing instructions should execute in constant time,
|
||||
// and set flags.
|
||||
case X86::BEXTR32rm:
|
||||
case X86::BEXTR64rm:
|
||||
case X86::BEXTRI32mi:
|
||||
case X86::BEXTRI64mi:
|
||||
case X86::BZHI32rm:
|
||||
case X86::BZHI64rm:
|
||||
|
||||
// Basic arithmetic is constant time on the input but does set flags.
|
||||
case X86::ADC8rm:
|
||||
case X86::ADC16rm:
|
||||
case X86::ADC32rm:
|
||||
case X86::ADC64rm:
|
||||
case X86::ADCX32rm:
|
||||
case X86::ADCX64rm:
|
||||
case X86::ADD8rm:
|
||||
case X86::ADD16rm:
|
||||
case X86::ADD32rm:
|
||||
case X86::ADD64rm:
|
||||
case X86::ADOX32rm:
|
||||
case X86::ADOX64rm:
|
||||
case X86::AND8rm:
|
||||
case X86::AND16rm:
|
||||
case X86::AND32rm:
|
||||
case X86::AND64rm:
|
||||
case X86::ANDN32rm:
|
||||
case X86::ANDN64rm:
|
||||
case X86::OR8rm:
|
||||
case X86::OR16rm:
|
||||
case X86::OR32rm:
|
||||
case X86::OR64rm:
|
||||
case X86::SBB8rm:
|
||||
case X86::SBB16rm:
|
||||
case X86::SBB32rm:
|
||||
case X86::SBB64rm:
|
||||
case X86::SUB8rm:
|
||||
case X86::SUB16rm:
|
||||
case X86::SUB32rm:
|
||||
case X86::SUB64rm:
|
||||
case X86::XOR8rm:
|
||||
case X86::XOR16rm:
|
||||
case X86::XOR32rm:
|
||||
case X86::XOR64rm:
|
||||
|
||||
// Integer multiply w/o affecting flags is still believed to be constant
|
||||
// time on x86. Called out separately as this is among the most surprising
|
||||
// instructions to exhibit that behavior.
|
||||
case X86::MULX32rm:
|
||||
case X86::MULX64rm:
|
||||
|
||||
// Arithmetic instructions that are both constant time and don't set flags.
|
||||
case X86::RORX32mi:
|
||||
case X86::RORX64mi:
|
||||
case X86::SARX32rm:
|
||||
case X86::SARX64rm:
|
||||
case X86::SHLX32rm:
|
||||
case X86::SHLX64rm:
|
||||
case X86::SHRX32rm:
|
||||
case X86::SHRX64rm:
|
||||
|
||||
// Conversions are believed to be constant time and don't set flags.
|
||||
case X86::CVTTSD2SI64rm:
|
||||
case X86::VCVTTSD2SI64rm:
|
||||
case X86::VCVTTSD2SI64Zrm:
|
||||
case X86::CVTTSD2SIrm:
|
||||
case X86::VCVTTSD2SIrm:
|
||||
case X86::VCVTTSD2SIZrm:
|
||||
case X86::CVTTSS2SI64rm:
|
||||
case X86::VCVTTSS2SI64rm:
|
||||
case X86::VCVTTSS2SI64Zrm:
|
||||
case X86::CVTTSS2SIrm:
|
||||
case X86::VCVTTSS2SIrm:
|
||||
case X86::VCVTTSS2SIZrm:
|
||||
case X86::CVTSI2SDrm:
|
||||
case X86::VCVTSI2SDrm:
|
||||
case X86::VCVTSI2SDZrm:
|
||||
case X86::CVTSI2SSrm:
|
||||
case X86::VCVTSI2SSrm:
|
||||
case X86::VCVTSI2SSZrm:
|
||||
case X86::CVTSI642SDrm:
|
||||
case X86::VCVTSI642SDrm:
|
||||
case X86::VCVTSI642SDZrm:
|
||||
case X86::CVTSI642SSrm:
|
||||
case X86::VCVTSI642SSrm:
|
||||
case X86::VCVTSI642SSZrm:
|
||||
case X86::CVTSS2SDrm:
|
||||
case X86::VCVTSS2SDrm:
|
||||
case X86::VCVTSS2SDZrm:
|
||||
case X86::CVTSD2SSrm:
|
||||
case X86::VCVTSD2SSrm:
|
||||
case X86::VCVTSD2SSZrm:
|
||||
// AVX512 added unsigned integer conversions.
|
||||
case X86::VCVTTSD2USI64Zrm:
|
||||
case X86::VCVTTSD2USIZrm:
|
||||
case X86::VCVTTSS2USI64Zrm:
|
||||
case X86::VCVTTSS2USIZrm:
|
||||
case X86::VCVTUSI2SDZrm:
|
||||
case X86::VCVTUSI642SDZrm:
|
||||
case X86::VCVTUSI2SSZrm:
|
||||
case X86::VCVTUSI642SSZrm:
|
||||
|
||||
// Loads to register don't set flags.
|
||||
case X86::MOV8rm:
|
||||
case X86::MOV8rm_NOREX:
|
||||
case X86::MOV16rm:
|
||||
case X86::MOV32rm:
|
||||
case X86::MOV64rm:
|
||||
case X86::MOVSX16rm8:
|
||||
case X86::MOVSX32rm16:
|
||||
case X86::MOVSX32rm8:
|
||||
case X86::MOVSX32rm8_NOREX:
|
||||
case X86::MOVSX64rm16:
|
||||
case X86::MOVSX64rm32:
|
||||
case X86::MOVSX64rm8:
|
||||
case X86::MOVZX16rm8:
|
||||
case X86::MOVZX32rm16:
|
||||
case X86::MOVZX32rm8:
|
||||
case X86::MOVZX32rm8_NOREX:
|
||||
case X86::MOVZX64rm16:
|
||||
case X86::MOVZX64rm8:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
int X86InstrInfo::getSPAdjust(const MachineInstr &MI) const {
|
||||
const MachineFunction *MF = MI.getParent()->getParent();
|
||||
const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
|
||||
|
|
|
@ -183,6 +183,35 @@ public:
|
|||
bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
|
||||
unsigned &DstReg, unsigned &SubIdx) const override;
|
||||
|
||||
/// Returns true if the instruction has no behavior (specified or otherwise)
|
||||
/// that is based on the value of any of its register operands
|
||||
///
|
||||
/// Instructions are considered data invariant even if they set EFLAGS.
|
||||
///
|
||||
/// A classical example of something that is inherently not data invariant is
|
||||
/// an indirect jump -- the destination is loaded into icache based on the
|
||||
/// bits set in the jump destination register.
|
||||
///
|
||||
/// FIXME: This should become part of our instruction tables.
|
||||
static bool isDataInvariant(MachineInstr &MI);
|
||||
|
||||
/// Returns true if the instruction has no behavior (specified or otherwise)
|
||||
/// that is based on the value loaded from memory or the value of any
|
||||
/// non-address register operands.
|
||||
///
|
||||
/// For example, if the latency of the instruction is dependent on the
|
||||
/// particular bits set in any of the registers *or* any of the bits loaded
|
||||
/// from memory.
|
||||
///
|
||||
/// Instructions are considered data invariant even if they set EFLAGS.
|
||||
///
|
||||
/// A classical example of something that is inherently not data invariant is
|
||||
/// an indirect jump -- the destination is loaded into icache based on the
|
||||
/// bits set in the jump destination register.
|
||||
///
|
||||
/// FIXME: This should become part of our instruction tables.
|
||||
static bool isDataInvariantLoad(MachineInstr &MI);
|
||||
|
||||
unsigned isLoadFromStackSlot(const MachineInstr &MI,
|
||||
int &FrameIndex) const override;
|
||||
unsigned isLoadFromStackSlot(const MachineInstr &MI,
|
||||
|
|
|
@ -1200,374 +1200,6 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches(
|
|||
return CMovs;
|
||||
}
|
||||
|
||||
/// Returns true if the instruction has no behavior (specified or otherwise)
|
||||
/// that is based on the value of any of its register operands
|
||||
///
|
||||
/// Instructions are considered data invariant even if they set EFLAGS.
|
||||
///
|
||||
/// A classical example of something that is inherently not data invariant is an
|
||||
/// indirect jump -- the destination is loaded into icache based on the bits set
|
||||
/// in the jump destination register.
|
||||
///
|
||||
/// FIXME: This should become part of our instruction tables.
|
||||
static bool isDataInvariant(MachineInstr &MI) {
|
||||
switch (MI.getOpcode()) {
|
||||
default:
|
||||
// By default, assume that the instruction is not data invariant.
|
||||
return false;
|
||||
|
||||
// Some target-independent operations that trivially lower to data-invariant
|
||||
// instructions.
|
||||
case TargetOpcode::COPY:
|
||||
case TargetOpcode::INSERT_SUBREG:
|
||||
case TargetOpcode::SUBREG_TO_REG:
|
||||
return true;
|
||||
|
||||
// On x86 it is believed that imul is constant time w.r.t. the loaded data.
|
||||
// However, they set flags and are perhaps the most surprisingly constant
|
||||
// time operations so we call them out here separately.
|
||||
case X86::IMUL16rr:
|
||||
case X86::IMUL16rri8:
|
||||
case X86::IMUL16rri:
|
||||
case X86::IMUL32rr:
|
||||
case X86::IMUL32rri8:
|
||||
case X86::IMUL32rri:
|
||||
case X86::IMUL64rr:
|
||||
case X86::IMUL64rri32:
|
||||
case X86::IMUL64rri8:
|
||||
|
||||
// Bit scanning and counting instructions that are somewhat surprisingly
|
||||
// constant time as they scan across bits and do other fairly complex
|
||||
// operations like popcnt, but are believed to be constant time on x86.
|
||||
// However, these set flags.
|
||||
case X86::BSF16rr:
|
||||
case X86::BSF32rr:
|
||||
case X86::BSF64rr:
|
||||
case X86::BSR16rr:
|
||||
case X86::BSR32rr:
|
||||
case X86::BSR64rr:
|
||||
case X86::LZCNT16rr:
|
||||
case X86::LZCNT32rr:
|
||||
case X86::LZCNT64rr:
|
||||
case X86::POPCNT16rr:
|
||||
case X86::POPCNT32rr:
|
||||
case X86::POPCNT64rr:
|
||||
case X86::TZCNT16rr:
|
||||
case X86::TZCNT32rr:
|
||||
case X86::TZCNT64rr:
|
||||
|
||||
// Bit manipulation instructions are effectively combinations of basic
|
||||
// arithmetic ops, and should still execute in constant time. These also
|
||||
// set flags.
|
||||
case X86::BLCFILL32rr:
|
||||
case X86::BLCFILL64rr:
|
||||
case X86::BLCI32rr:
|
||||
case X86::BLCI64rr:
|
||||
case X86::BLCIC32rr:
|
||||
case X86::BLCIC64rr:
|
||||
case X86::BLCMSK32rr:
|
||||
case X86::BLCMSK64rr:
|
||||
case X86::BLCS32rr:
|
||||
case X86::BLCS64rr:
|
||||
case X86::BLSFILL32rr:
|
||||
case X86::BLSFILL64rr:
|
||||
case X86::BLSI32rr:
|
||||
case X86::BLSI64rr:
|
||||
case X86::BLSIC32rr:
|
||||
case X86::BLSIC64rr:
|
||||
case X86::BLSMSK32rr:
|
||||
case X86::BLSMSK64rr:
|
||||
case X86::BLSR32rr:
|
||||
case X86::BLSR64rr:
|
||||
case X86::TZMSK32rr:
|
||||
case X86::TZMSK64rr:
|
||||
|
||||
// Bit extracting and clearing instructions should execute in constant time,
|
||||
// and set flags.
|
||||
case X86::BEXTR32rr:
|
||||
case X86::BEXTR64rr:
|
||||
case X86::BEXTRI32ri:
|
||||
case X86::BEXTRI64ri:
|
||||
case X86::BZHI32rr:
|
||||
case X86::BZHI64rr:
|
||||
|
||||
// Shift and rotate.
|
||||
case X86::ROL8r1: case X86::ROL16r1: case X86::ROL32r1: case X86::ROL64r1:
|
||||
case X86::ROL8rCL: case X86::ROL16rCL: case X86::ROL32rCL: case X86::ROL64rCL:
|
||||
case X86::ROL8ri: case X86::ROL16ri: case X86::ROL32ri: case X86::ROL64ri:
|
||||
case X86::ROR8r1: case X86::ROR16r1: case X86::ROR32r1: case X86::ROR64r1:
|
||||
case X86::ROR8rCL: case X86::ROR16rCL: case X86::ROR32rCL: case X86::ROR64rCL:
|
||||
case X86::ROR8ri: case X86::ROR16ri: case X86::ROR32ri: case X86::ROR64ri:
|
||||
case X86::SAR8r1: case X86::SAR16r1: case X86::SAR32r1: case X86::SAR64r1:
|
||||
case X86::SAR8rCL: case X86::SAR16rCL: case X86::SAR32rCL: case X86::SAR64rCL:
|
||||
case X86::SAR8ri: case X86::SAR16ri: case X86::SAR32ri: case X86::SAR64ri:
|
||||
case X86::SHL8r1: case X86::SHL16r1: case X86::SHL32r1: case X86::SHL64r1:
|
||||
case X86::SHL8rCL: case X86::SHL16rCL: case X86::SHL32rCL: case X86::SHL64rCL:
|
||||
case X86::SHL8ri: case X86::SHL16ri: case X86::SHL32ri: case X86::SHL64ri:
|
||||
case X86::SHR8r1: case X86::SHR16r1: case X86::SHR32r1: case X86::SHR64r1:
|
||||
case X86::SHR8rCL: case X86::SHR16rCL: case X86::SHR32rCL: case X86::SHR64rCL:
|
||||
case X86::SHR8ri: case X86::SHR16ri: case X86::SHR32ri: case X86::SHR64ri:
|
||||
case X86::SHLD16rrCL: case X86::SHLD32rrCL: case X86::SHLD64rrCL:
|
||||
case X86::SHLD16rri8: case X86::SHLD32rri8: case X86::SHLD64rri8:
|
||||
case X86::SHRD16rrCL: case X86::SHRD32rrCL: case X86::SHRD64rrCL:
|
||||
case X86::SHRD16rri8: case X86::SHRD32rri8: case X86::SHRD64rri8:
|
||||
|
||||
// Basic arithmetic is constant time on the input but does set flags.
|
||||
case X86::ADC8rr: case X86::ADC8ri:
|
||||
case X86::ADC16rr: case X86::ADC16ri: case X86::ADC16ri8:
|
||||
case X86::ADC32rr: case X86::ADC32ri: case X86::ADC32ri8:
|
||||
case X86::ADC64rr: case X86::ADC64ri8: case X86::ADC64ri32:
|
||||
case X86::ADD8rr: case X86::ADD8ri:
|
||||
case X86::ADD16rr: case X86::ADD16ri: case X86::ADD16ri8:
|
||||
case X86::ADD32rr: case X86::ADD32ri: case X86::ADD32ri8:
|
||||
case X86::ADD64rr: case X86::ADD64ri8: case X86::ADD64ri32:
|
||||
case X86::AND8rr: case X86::AND8ri:
|
||||
case X86::AND16rr: case X86::AND16ri: case X86::AND16ri8:
|
||||
case X86::AND32rr: case X86::AND32ri: case X86::AND32ri8:
|
||||
case X86::AND64rr: case X86::AND64ri8: case X86::AND64ri32:
|
||||
case X86::OR8rr: case X86::OR8ri:
|
||||
case X86::OR16rr: case X86::OR16ri: case X86::OR16ri8:
|
||||
case X86::OR32rr: case X86::OR32ri: case X86::OR32ri8:
|
||||
case X86::OR64rr: case X86::OR64ri8: case X86::OR64ri32:
|
||||
case X86::SBB8rr: case X86::SBB8ri:
|
||||
case X86::SBB16rr: case X86::SBB16ri: case X86::SBB16ri8:
|
||||
case X86::SBB32rr: case X86::SBB32ri: case X86::SBB32ri8:
|
||||
case X86::SBB64rr: case X86::SBB64ri8: case X86::SBB64ri32:
|
||||
case X86::SUB8rr: case X86::SUB8ri:
|
||||
case X86::SUB16rr: case X86::SUB16ri: case X86::SUB16ri8:
|
||||
case X86::SUB32rr: case X86::SUB32ri: case X86::SUB32ri8:
|
||||
case X86::SUB64rr: case X86::SUB64ri8: case X86::SUB64ri32:
|
||||
case X86::XOR8rr: case X86::XOR8ri:
|
||||
case X86::XOR16rr: case X86::XOR16ri: case X86::XOR16ri8:
|
||||
case X86::XOR32rr: case X86::XOR32ri: case X86::XOR32ri8:
|
||||
case X86::XOR64rr: case X86::XOR64ri8: case X86::XOR64ri32:
|
||||
// Arithmetic with just 32-bit and 64-bit variants and no immediates.
|
||||
case X86::ADCX32rr: case X86::ADCX64rr:
|
||||
case X86::ADOX32rr: case X86::ADOX64rr:
|
||||
case X86::ANDN32rr: case X86::ANDN64rr:
|
||||
// Unary arithmetic operations.
|
||||
case X86::DEC8r: case X86::DEC16r: case X86::DEC32r: case X86::DEC64r:
|
||||
case X86::INC8r: case X86::INC16r: case X86::INC32r: case X86::INC64r:
|
||||
case X86::NEG8r: case X86::NEG16r: case X86::NEG32r: case X86::NEG64r:
|
||||
|
||||
// Unlike other arithmetic, NOT doesn't set EFLAGS.
|
||||
case X86::NOT8r: case X86::NOT16r: case X86::NOT32r: case X86::NOT64r:
|
||||
|
||||
// Various move instructions used to zero or sign extend things. Note that we
|
||||
// intentionally don't support the _NOREX variants as we can't handle that
|
||||
// register constraint anyways.
|
||||
case X86::MOVSX16rr8:
|
||||
case X86::MOVSX32rr8: case X86::MOVSX32rr16:
|
||||
case X86::MOVSX64rr8: case X86::MOVSX64rr16: case X86::MOVSX64rr32:
|
||||
case X86::MOVZX16rr8:
|
||||
case X86::MOVZX32rr8: case X86::MOVZX32rr16:
|
||||
case X86::MOVZX64rr8: case X86::MOVZX64rr16:
|
||||
case X86::MOV32rr:
|
||||
|
||||
// Arithmetic instructions that are both constant time and don't set flags.
|
||||
case X86::RORX32ri:
|
||||
case X86::RORX64ri:
|
||||
case X86::SARX32rr:
|
||||
case X86::SARX64rr:
|
||||
case X86::SHLX32rr:
|
||||
case X86::SHLX64rr:
|
||||
case X86::SHRX32rr:
|
||||
case X86::SHRX64rr:
|
||||
|
||||
// LEA doesn't actually access memory, and its arithmetic is constant time.
|
||||
case X86::LEA16r:
|
||||
case X86::LEA32r:
|
||||
case X86::LEA64_32r:
|
||||
case X86::LEA64r:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if the instruction has no behavior (specified or otherwise)
|
||||
/// that is based on the value loaded from memory or the value of any
|
||||
/// non-address register operands.
|
||||
///
|
||||
/// For example, if the latency of the instruction is dependent on the
|
||||
/// particular bits set in any of the registers *or* any of the bits loaded from
|
||||
/// memory.
|
||||
///
|
||||
/// Instructions are considered data invariant even if they set EFLAGS.
|
||||
///
|
||||
/// A classical example of something that is inherently not data invariant is an
|
||||
/// indirect jump -- the destination is loaded into icache based on the bits set
|
||||
/// in the jump destination register.
|
||||
///
|
||||
/// FIXME: This should become part of our instruction tables.
|
||||
static bool isDataInvariantLoad(MachineInstr &MI) {
|
||||
switch (MI.getOpcode()) {
|
||||
default:
|
||||
// By default, assume that the load will immediately leak.
|
||||
return false;
|
||||
|
||||
// On x86 it is believed that imul is constant time w.r.t. the loaded data.
|
||||
// However, they set flags and are perhaps the most surprisingly constant
|
||||
// time operations so we call them out here separately.
|
||||
case X86::IMUL16rm:
|
||||
case X86::IMUL16rmi8:
|
||||
case X86::IMUL16rmi:
|
||||
case X86::IMUL32rm:
|
||||
case X86::IMUL32rmi8:
|
||||
case X86::IMUL32rmi:
|
||||
case X86::IMUL64rm:
|
||||
case X86::IMUL64rmi32:
|
||||
case X86::IMUL64rmi8:
|
||||
|
||||
// Bit scanning and counting instructions that are somewhat surprisingly
|
||||
// constant time as they scan across bits and do other fairly complex
|
||||
// operations like popcnt, but are believed to be constant time on x86.
|
||||
// However, these set flags.
|
||||
case X86::BSF16rm:
|
||||
case X86::BSF32rm:
|
||||
case X86::BSF64rm:
|
||||
case X86::BSR16rm:
|
||||
case X86::BSR32rm:
|
||||
case X86::BSR64rm:
|
||||
case X86::LZCNT16rm:
|
||||
case X86::LZCNT32rm:
|
||||
case X86::LZCNT64rm:
|
||||
case X86::POPCNT16rm:
|
||||
case X86::POPCNT32rm:
|
||||
case X86::POPCNT64rm:
|
||||
case X86::TZCNT16rm:
|
||||
case X86::TZCNT32rm:
|
||||
case X86::TZCNT64rm:
|
||||
|
||||
// Bit manipulation instructions are effectively combinations of basic
|
||||
// arithmetic ops, and should still execute in constant time. These also
|
||||
// set flags.
|
||||
case X86::BLCFILL32rm:
|
||||
case X86::BLCFILL64rm:
|
||||
case X86::BLCI32rm:
|
||||
case X86::BLCI64rm:
|
||||
case X86::BLCIC32rm:
|
||||
case X86::BLCIC64rm:
|
||||
case X86::BLCMSK32rm:
|
||||
case X86::BLCMSK64rm:
|
||||
case X86::BLCS32rm:
|
||||
case X86::BLCS64rm:
|
||||
case X86::BLSFILL32rm:
|
||||
case X86::BLSFILL64rm:
|
||||
case X86::BLSI32rm:
|
||||
case X86::BLSI64rm:
|
||||
case X86::BLSIC32rm:
|
||||
case X86::BLSIC64rm:
|
||||
case X86::BLSMSK32rm:
|
||||
case X86::BLSMSK64rm:
|
||||
case X86::BLSR32rm:
|
||||
case X86::BLSR64rm:
|
||||
case X86::TZMSK32rm:
|
||||
case X86::TZMSK64rm:
|
||||
|
||||
// Bit extracting and clearing instructions should execute in constant time,
|
||||
// and set flags.
|
||||
case X86::BEXTR32rm:
|
||||
case X86::BEXTR64rm:
|
||||
case X86::BEXTRI32mi:
|
||||
case X86::BEXTRI64mi:
|
||||
case X86::BZHI32rm:
|
||||
case X86::BZHI64rm:
|
||||
|
||||
// Basic arithmetic is constant time on the input but does set flags.
|
||||
case X86::ADC8rm:
|
||||
case X86::ADC16rm:
|
||||
case X86::ADC32rm:
|
||||
case X86::ADC64rm:
|
||||
case X86::ADCX32rm:
|
||||
case X86::ADCX64rm:
|
||||
case X86::ADD8rm:
|
||||
case X86::ADD16rm:
|
||||
case X86::ADD32rm:
|
||||
case X86::ADD64rm:
|
||||
case X86::ADOX32rm:
|
||||
case X86::ADOX64rm:
|
||||
case X86::AND8rm:
|
||||
case X86::AND16rm:
|
||||
case X86::AND32rm:
|
||||
case X86::AND64rm:
|
||||
case X86::ANDN32rm:
|
||||
case X86::ANDN64rm:
|
||||
case X86::OR8rm:
|
||||
case X86::OR16rm:
|
||||
case X86::OR32rm:
|
||||
case X86::OR64rm:
|
||||
case X86::SBB8rm:
|
||||
case X86::SBB16rm:
|
||||
case X86::SBB32rm:
|
||||
case X86::SBB64rm:
|
||||
case X86::SUB8rm:
|
||||
case X86::SUB16rm:
|
||||
case X86::SUB32rm:
|
||||
case X86::SUB64rm:
|
||||
case X86::XOR8rm:
|
||||
case X86::XOR16rm:
|
||||
case X86::XOR32rm:
|
||||
case X86::XOR64rm:
|
||||
|
||||
// Integer multiply w/o affecting flags is still believed to be constant
|
||||
// time on x86. Called out separately as this is among the most surprising
|
||||
// instructions to exhibit that behavior.
|
||||
case X86::MULX32rm:
|
||||
case X86::MULX64rm:
|
||||
|
||||
// Arithmetic instructions that are both constant time and don't set flags.
|
||||
case X86::RORX32mi:
|
||||
case X86::RORX64mi:
|
||||
case X86::SARX32rm:
|
||||
case X86::SARX64rm:
|
||||
case X86::SHLX32rm:
|
||||
case X86::SHLX64rm:
|
||||
case X86::SHRX32rm:
|
||||
case X86::SHRX64rm:
|
||||
|
||||
// Conversions are believed to be constant time and don't set flags.
|
||||
case X86::CVTTSD2SI64rm: case X86::VCVTTSD2SI64rm: case X86::VCVTTSD2SI64Zrm:
|
||||
case X86::CVTTSD2SIrm: case X86::VCVTTSD2SIrm: case X86::VCVTTSD2SIZrm:
|
||||
case X86::CVTTSS2SI64rm: case X86::VCVTTSS2SI64rm: case X86::VCVTTSS2SI64Zrm:
|
||||
case X86::CVTTSS2SIrm: case X86::VCVTTSS2SIrm: case X86::VCVTTSS2SIZrm:
|
||||
case X86::CVTSI2SDrm: case X86::VCVTSI2SDrm: case X86::VCVTSI2SDZrm:
|
||||
case X86::CVTSI2SSrm: case X86::VCVTSI2SSrm: case X86::VCVTSI2SSZrm:
|
||||
case X86::CVTSI642SDrm: case X86::VCVTSI642SDrm: case X86::VCVTSI642SDZrm:
|
||||
case X86::CVTSI642SSrm: case X86::VCVTSI642SSrm: case X86::VCVTSI642SSZrm:
|
||||
case X86::CVTSS2SDrm: case X86::VCVTSS2SDrm: case X86::VCVTSS2SDZrm:
|
||||
case X86::CVTSD2SSrm: case X86::VCVTSD2SSrm: case X86::VCVTSD2SSZrm:
|
||||
// AVX512 added unsigned integer conversions.
|
||||
case X86::VCVTTSD2USI64Zrm:
|
||||
case X86::VCVTTSD2USIZrm:
|
||||
case X86::VCVTTSS2USI64Zrm:
|
||||
case X86::VCVTTSS2USIZrm:
|
||||
case X86::VCVTUSI2SDZrm:
|
||||
case X86::VCVTUSI642SDZrm:
|
||||
case X86::VCVTUSI2SSZrm:
|
||||
case X86::VCVTUSI642SSZrm:
|
||||
|
||||
// Loads to register don't set flags.
|
||||
case X86::MOV8rm:
|
||||
case X86::MOV8rm_NOREX:
|
||||
case X86::MOV16rm:
|
||||
case X86::MOV32rm:
|
||||
case X86::MOV64rm:
|
||||
case X86::MOVSX16rm8:
|
||||
case X86::MOVSX32rm16:
|
||||
case X86::MOVSX32rm8:
|
||||
case X86::MOVSX32rm8_NOREX:
|
||||
case X86::MOVSX64rm16:
|
||||
case X86::MOVSX64rm32:
|
||||
case X86::MOVSX64rm8:
|
||||
case X86::MOVZX16rm8:
|
||||
case X86::MOVZX32rm16:
|
||||
case X86::MOVZX32rm8:
|
||||
case X86::MOVZX32rm8_NOREX:
|
||||
case X86::MOVZX64rm16:
|
||||
case X86::MOVZX64rm8:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true if the MI has EFLAGS as a register def operand and it's live,
|
||||
// otherwise it returns false
|
||||
static bool isEFLAGSDefLive(const MachineInstr &MI) {
|
||||
|
@ -1732,7 +1364,7 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden(
|
|||
// address registers, queue it up to be hardened post-load. Notably,
|
||||
// even once hardened this won't introduce a useful dependency that
|
||||
// could prune out subsequent loads.
|
||||
if (EnablePostLoadHardening && isDataInvariantLoad(MI) &&
|
||||
if (EnablePostLoadHardening && X86InstrInfo::isDataInvariantLoad(MI) &&
|
||||
!isEFLAGSDefLive(MI) && MI.getDesc().getNumDefs() == 1 &&
|
||||
MI.getOperand(0).isReg() &&
|
||||
canHardenRegister(MI.getOperand(0).getReg()) &&
|
||||
|
@ -1791,7 +1423,7 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden(
|
|||
// If this is a data-invariant load and there is no EFLAGS
|
||||
// interference, we want to try and sink any hardening as far as
|
||||
// possible.
|
||||
if (isDataInvariantLoad(MI) && !isEFLAGSDefLive(MI)) {
|
||||
if (X86InstrInfo::isDataInvariantLoad(MI) && !isEFLAGSDefLive(MI)) {
|
||||
// Sink the instruction we'll need to harden as far as we can down
|
||||
// the graph.
|
||||
MachineInstr *SunkMI = sinkPostLoadHardenedInst(MI, HardenPostLoad);
|
||||
|
@ -2141,7 +1773,7 @@ void X86SpeculativeLoadHardeningPass::hardenLoadAddr(
|
|||
|
||||
MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst(
|
||||
MachineInstr &InitialMI, SmallPtrSetImpl<MachineInstr *> &HardenedInstrs) {
|
||||
assert(isDataInvariantLoad(InitialMI) &&
|
||||
assert(X86InstrInfo::isDataInvariantLoad(InitialMI) &&
|
||||
"Cannot get here with a non-invariant load!");
|
||||
assert(!isEFLAGSDefLive(InitialMI) &&
|
||||
"Cannot get here with a data invariant load "
|
||||
|
@ -2160,11 +1792,11 @@ MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst(
|
|||
// If we're already going to harden this use, it is data invariant, it
|
||||
// does not interfere with EFLAGS, and within our block.
|
||||
if (HardenedInstrs.count(&UseMI)) {
|
||||
if (!isDataInvariantLoad(UseMI) || isEFLAGSDefLive(UseMI)) {
|
||||
if (!X86InstrInfo::isDataInvariantLoad(UseMI) || isEFLAGSDefLive(UseMI)) {
|
||||
// If we've already decided to harden a non-load, we must have sunk
|
||||
// some other post-load hardened instruction to it and it must itself
|
||||
// be data-invariant.
|
||||
assert(isDataInvariant(UseMI) &&
|
||||
assert(X86InstrInfo::isDataInvariant(UseMI) &&
|
||||
"Data variant instruction being hardened!");
|
||||
continue;
|
||||
}
|
||||
|
@ -2196,7 +1828,7 @@ MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst(
|
|||
|
||||
// If this single use isn't data invariant, isn't in this block, or has
|
||||
// interfering EFLAGS, we can't sink the hardening to it.
|
||||
if (!isDataInvariant(UseMI) || UseMI.getParent() != MI.getParent() ||
|
||||
if (!X86InstrInfo::isDataInvariant(UseMI) || UseMI.getParent() != MI.getParent() ||
|
||||
isEFLAGSDefLive(UseMI))
|
||||
return {};
|
||||
|
||||
|
|
Loading…
Reference in New Issue