llvm-project/lld/ELF/Arch/X86.cpp

//===- X86.cpp ------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "InputFiles.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
#include "lld/Common/ErrorHandler.h"
#include "llvm/Support/Endian.h"

using namespace llvm;
using namespace llvm::support::endian;
using namespace llvm::ELF;
using namespace lld;
using namespace lld::elf;

namespace {
class X86 : public TargetInfo {
public:
  X86();
  int getTlsGdRelaxSkip(RelType type) const override;
  RelExpr getRelExpr(RelType type, const Symbol &s,
                     const uint8_t *loc) const override;
  int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
  void writeGotPltHeader(uint8_t *buf) const override;
  RelType getDynRel(RelType type) const override;
  void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
  void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
  void writePltHeader(uint8_t *buf) const override;
  void writePlt(uint8_t *buf, const Symbol &sym,
                uint64_t pltEntryAddr) const override;
  void relocate(uint8_t *loc, const Relocation &rel,
                uint64_t val) const override;

  RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
  void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
                      uint64_t val) const override;
  void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
                      uint64_t val) const override;
  void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
                      uint64_t val) const override;
  void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
                      uint64_t val) const override;
};
} // namespace

X86::X86() {
  copyRel = R_386_COPY;
  gotRel = R_386_GLOB_DAT;
  pltRel = R_386_JUMP_SLOT;
  iRelativeRel = R_386_IRELATIVE;
  relativeRel = R_386_RELATIVE;
  symbolicRel = R_386_32;
  tlsDescRel = R_386_TLS_DESC;
  tlsGotRel = R_386_TLS_TPOFF;
  tlsModuleIndexRel = R_386_TLS_DTPMOD32;
  tlsOffsetRel = R_386_TLS_DTPOFF32;
  gotBaseSymInGotPlt = true;
  pltHeaderSize = 16;
  pltEntrySize = 16;
  ipltEntrySize = 16;
  trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3

  // Align to the non-PAE large page size (known as a superpage or huge page).
  // FreeBSD automatically promotes large, superpage-aligned allocations.
  defaultImageBase = 0x400000;
}

int X86::getTlsGdRelaxSkip(RelType type) const {
  // TLSDESC relocations are processed separately. See relaxTlsGdToLe below.
  return type == R_386_TLS_GOTDESC || type == R_386_TLS_DESC_CALL ? 1 : 2;
}

RelExpr X86::getRelExpr(RelType type, const Symbol &s,
                        const uint8_t *loc) const {
  if (type == R_386_TLS_IE || type == R_386_TLS_GOTIE)
    config->hasTlsIe = true;

  switch (type) {
  case R_386_8:
  case R_386_16:
  case R_386_32:
    return R_ABS;
  case R_386_TLS_LDO_32:
    return R_DTPREL;
  case R_386_TLS_GD:
    return R_TLSGD_GOTPLT;
  case R_386_TLS_LDM:
    return R_TLSLD_GOTPLT;
  case R_386_PLT32:
    return R_PLT_PC;
  case R_386_PC8:
  case R_386_PC16:
  case R_386_PC32:
    return R_PC;
  case R_386_GOTPC:
    return R_GOTPLTONLY_PC;
  case R_386_TLS_IE:
    return R_GOT;
  case R_386_GOT32:
  case R_386_GOT32X:
    // These relocations are arguably mis-designed because their calculations
    // depend on the instructions they are applied to. This is bad because we
    // usually don't care about whether the target section contains valid
    // machine instructions or not. But this is part of the documented ABI, so
    // we had to implement as the standard requires.
    //
    // x86 does not support PC-relative data access. Therefore, in order to
    // access GOT contents, a GOT address needs to be known at link-time
    // (which means non-PIC) or compilers have to emit code to get a GOT
    // address at runtime (which means code is position-independent but
    // compilers need to emit extra code for each GOT access.) This decision
    // is made at compile-time. In the latter case, compilers emit code to
    // load a GOT address to a register, which is usually %ebx.
    //
    // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or
    // foo@GOT(%ebx).
    //
    // foo@GOT is not usable in PIC. If we are creating a PIC output and if we
    // find such relocation, we should report an error. foo@GOT is resolved to
    // an *absolute* address of foo's GOT entry, because both GOT address and
    // foo's offset are known. In other words, it's G + A.
    //
    // foo@GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to
    // foo's GOT entry in the table, because GOT address is not known but foo's
    // offset in the table is known. It's G + A - GOT.
    //
    // It's unfortunate that compilers emit the same relocation for these
    // different use cases. In order to distinguish them, we have to read a
    // machine instruction.
    //
    // The following code implements it. We assume that Loc[0] is the first byte
    // of a displacement or an immediate field of a valid machine
    // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at
    // the byte, we can determine whether the instruction uses the operand as an
    // absolute address (R_GOT) or a register-relative address (R_GOTPLT).
    return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT;
  case R_386_TLS_GOTDESC:
    return R_TLSDESC_GOTPLT;
  case R_386_TLS_DESC_CALL:
    return R_TLSDESC_CALL;
  case R_386_TLS_GOTIE:
    return R_GOTPLT;
  case R_386_GOTOFF:
    return R_GOTPLTREL;
  case R_386_TLS_LE:
    return R_TPREL;
  case R_386_TLS_LE_32:
    return R_TPREL_NEG;
  case R_386_NONE:
    return R_NONE;
  default:
    error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
          ") against symbol " + toString(s));
    return R_NONE;
  }
}

RelExpr X86::adjustTlsExpr(RelType type, RelExpr expr) const {
  switch (expr) {
  default:
    return expr;
  case R_RELAX_TLS_GD_TO_IE:
    return R_RELAX_TLS_GD_TO_IE_GOTPLT;
  case R_RELAX_TLS_GD_TO_LE:
    return type == R_386_TLS_GD ? R_RELAX_TLS_GD_TO_LE_NEG
                                : R_RELAX_TLS_GD_TO_LE;
  }
}

void X86::writeGotPltHeader(uint8_t *buf) const {
  write32le(buf, mainPart->dynamic->getVA());
}

void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const {
  // Entries in .got.plt initially points back to the corresponding
  // PLT entries with a fixed offset to skip the first instruction.
  write32le(buf, s.getPltVA() + 6);
}

void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
  // An x86 entry is the address of the ifunc resolver function.
  write32le(buf, s.getVA());
}

RelType X86::getDynRel(RelType type) const {
  if (type == R_386_TLS_LE)
    return R_386_TLS_TPOFF;
  if (type == R_386_TLS_LE_32)
    return R_386_TLS_TPOFF32;
  return type;
}

void X86::writePltHeader(uint8_t *buf) const {
  if (config->isPic) {
    const uint8_t v[] = {
        0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx)
        0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx)
        0x90, 0x90, 0x90, 0x90              // nop
    };
    memcpy(buf, v, sizeof(v));
    return;
  }

  const uint8_t pltData[] = {
      0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4)
      0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8)
      0x90, 0x90, 0x90, 0x90, // nop
  };
  memcpy(buf, pltData, sizeof(pltData));
  uint32_t gotPlt = in.gotPlt->getVA();
  write32le(buf + 2, gotPlt + 4);
  write32le(buf + 8, gotPlt + 8);
}

void X86::writePlt(uint8_t *buf, const Symbol &sym,
                   uint64_t pltEntryAddr) const {
  unsigned relOff = in.relaPlt->entsize * sym.getPltIdx();
  if (config->isPic) {
    const uint8_t inst[] = {
        0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx)
        0x68, 0,    0, 0, 0,    // pushl $reloc_offset
        0xe9, 0,    0, 0, 0,    // jmp .PLT0@PC
    };
    memcpy(buf, inst, sizeof(inst));
    write32le(buf + 2, sym.getGotPltVA() - in.gotPlt->getVA());
  } else {
    const uint8_t inst[] = {
        0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
        0x68, 0,    0, 0, 0,    // pushl $reloc_offset
        0xe9, 0,    0, 0, 0,    // jmp .PLT0@PC
    };
    memcpy(buf, inst, sizeof(inst));
    write32le(buf + 2, sym.getGotPltVA());
  }

  write32le(buf + 7, relOff);
  write32le(buf + 12, in.plt->getVA() - pltEntryAddr - 16);
}

int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const {
  switch (type) {
  case R_386_8:
  case R_386_PC8:
    return SignExtend64<8>(*buf);
  case R_386_16:
  case R_386_PC16:
    return SignExtend64<16>(read16le(buf));
  case R_386_32:
  case R_386_GLOB_DAT:
  case R_386_GOT32:
  case R_386_GOT32X:
  case R_386_GOTOFF:
  case R_386_GOTPC:
  case R_386_IRELATIVE:
  case R_386_PC32:
  case R_386_PLT32:
  case R_386_RELATIVE:
  case R_386_TLS_GOTDESC:
  case R_386_TLS_DESC_CALL:
  case R_386_TLS_DTPMOD32:
  case R_386_TLS_DTPOFF32:
  case R_386_TLS_LDO_32:
  case R_386_TLS_LDM:
  case R_386_TLS_IE:
  case R_386_TLS_IE_32:
  case R_386_TLS_LE:
  case R_386_TLS_LE_32:
  case R_386_TLS_GD:
  case R_386_TLS_GD_32:
  case R_386_TLS_GOTIE:
  case R_386_TLS_TPOFF:
  case R_386_TLS_TPOFF32:
    return SignExtend64<32>(read32le(buf));
  case R_386_TLS_DESC:
    return SignExtend64<32>(read32le(buf + 4));
  case R_386_NONE:
  case R_386_JUMP_SLOT:
    // These relocations are defined as not having an implicit addend.
    return 0;
  default:
    internalLinkerError(getErrorLocation(buf),
                        "cannot read addend for relocation " + toString(type));
    return 0;
  }
}

void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
  switch (rel.type) {
  case R_386_8:
    // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
    // being used for some 16-bit programs such as boot loaders, so
    // we want to support them.
    checkIntUInt(loc, val, 8, rel);
    *loc = val;
    break;
  case R_386_PC8:
    checkInt(loc, val, 8, rel);
    *loc = val;
    break;
  case R_386_16:
    checkIntUInt(loc, val, 16, rel);
    write16le(loc, val);
    break;
  case R_386_PC16:
    // R_386_PC16 is normally used with 16 bit code. In that situation
    // the PC is 16 bits, just like the addend. This means that it can
    // point from any 16 bit address to any other if the possibility
    // of wrapping is included.
    // The only restriction we have to check then is that the destination
    // address fits in 16 bits. That is impossible to do here. The problem is
    // that we are passed the final value, which already had the
    // current location subtracted from it.
    // We just check that Val fits in 17 bits. This misses some cases, but
    // should have no false positives.
    checkInt(loc, val, 17, rel);
    write16le(loc, val);
    break;
  case R_386_32:
  case R_386_GOT32:
  case R_386_GOT32X:
  case R_386_GOTOFF:
  case R_386_GOTPC:
  case R_386_PC32:
  case R_386_PLT32:
  case R_386_RELATIVE:
  case R_386_TLS_GOTDESC:
  case R_386_TLS_DESC_CALL:
  case R_386_TLS_DTPMOD32:
  case R_386_TLS_DTPOFF32:
  case R_386_TLS_GD:
  case R_386_TLS_GOTIE:
  case R_386_TLS_IE:
  case R_386_TLS_LDM:
  case R_386_TLS_LDO_32:
  case R_386_TLS_LE:
  case R_386_TLS_LE_32:
  case R_386_TLS_TPOFF:
  case R_386_TLS_TPOFF32:
    checkInt(loc, val, 32, rel);
    write32le(loc, val);
    break;
  case R_386_TLS_DESC:
    // The addend is stored in the second 32-bit word.
    write32le(loc + 4, val);
    break;
  default:
    llvm_unreachable("unknown relocation");
  }
}

void X86::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
                         uint64_t val) const {
  if (rel.type == R_386_TLS_GD) {
    // Convert
    //   leal x@tlsgd(, %ebx, 1), %eax
    //   call __tls_get_addr@plt
    // to
    //   movl %gs:0, %eax
    //   subl $x@tpoff, %eax
    const uint8_t inst[] = {
        0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
        0x81, 0xe8, 0,    0,    0,    0,    // subl val(%ebx), %eax
    };
    memcpy(loc - 3, inst, sizeof(inst));
    write32le(loc + 5, val);
  } else if (rel.type == R_386_TLS_GOTDESC) {
    // Convert leal x@tlsdesc(%ebx), %eax to leal x@ntpoff, %eax.
    //
    // Note: call *x@tlsdesc(%eax) may not immediately follow this instruction.
    if (memcmp(loc - 2, "\x8d\x83", 2)) {
      error(getErrorLocation(loc - 2) +
            "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax");
      return;
    }
    loc[-1] = 0x05;
    write32le(loc, val);
  } else {
    // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
    assert(rel.type == R_386_TLS_DESC_CALL);
    loc[0] = 0x66;
    loc[1] = 0x90;
  }
}

void X86::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
                         uint64_t val) const {
  if (rel.type == R_386_TLS_GD) {
    // Convert
    //   leal x@tlsgd(, %ebx, 1), %eax
    //   call __tls_get_addr@plt
    // to
    //   movl %gs:0, %eax
    //   addl x@gotntpoff(%ebx), %eax
    const uint8_t inst[] = {
        0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
        0x03, 0x83, 0,    0,    0,    0,    // addl val(%ebx), %eax
    };
    memcpy(loc - 3, inst, sizeof(inst));
    write32le(loc + 5, val);
  } else if (rel.type == R_386_TLS_GOTDESC) {
    // Convert leal x@tlsdesc(%ebx), %eax to movl x@gotntpoff(%ebx), %eax.
    if (memcmp(loc - 2, "\x8d\x83", 2)) {
      error(getErrorLocation(loc - 2) +
            "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax");
      return;
    }
    loc[-2] = 0x8b;
    write32le(loc, val);
  } else {
    // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
    assert(rel.type == R_386_TLS_DESC_CALL);
    loc[0] = 0x66;
    loc[1] = 0x90;
  }
}

// In some conditions, relocations can be optimized to avoid using GOT.
// This function does that for Initial Exec to Local Exec case.
void X86::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
                         uint64_t val) const {
  // Ulrich's document section 6.2 says that @gotntpoff can
  // be used with MOVL or ADDL instructions.
  // @indntpoff is similar to @gotntpoff, but for use in
  // position dependent code.
  uint8_t reg = (loc[-1] >> 3) & 7;

  if (rel.type == R_386_TLS_IE) {
    if (loc[-1] == 0xa1) {
      // "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
      // This case is different from the generic case below because
      // this is a 5 byte instruction while below is 6 bytes.
      loc[-1] = 0xb8;
    } else if (loc[-2] == 0x8b) {
      // "movl foo@indntpoff,%reg" -> "movl $foo,%reg"
      loc[-2] = 0xc7;
      loc[-1] = 0xc0 | reg;
    } else {
      // "addl foo@indntpoff,%reg" -> "addl $foo,%reg"
      loc[-2] = 0x81;
      loc[-1] = 0xc0 | reg;
    }
  } else {
    assert(rel.type == R_386_TLS_GOTIE);
    if (loc[-2] == 0x8b) {
      // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
      loc[-2] = 0xc7;
      loc[-1] = 0xc0 | reg;
    } else {
      // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg"
      loc[-2] = 0x8d;
      loc[-1] = 0x80 | (reg << 3) | reg;
    }
  }
  write32le(loc, val);
}

void X86::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
                         uint64_t val) const {
  if (rel.type == R_386_TLS_LDO_32) {
    write32le(loc, val);
    return;
  }

  // Convert
  //   leal foo(%reg),%eax
  //   call ___tls_get_addr
  // to
  //   movl %gs:0,%eax
  //   nop
  //   leal 0(%esi,1),%esi
  const uint8_t inst[] = {
      0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
      0x90,                               // nop
      0x8d, 0x74, 0x26, 0x00,             // leal 0(%esi,1),%esi
  };
  memcpy(loc - 2, inst, sizeof(inst));
}

// If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
// entries containing endbr32 instructions. A PLT entry will be split into two
// parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
namespace {
class IntelIBT : public X86 {
public:
  IntelIBT();
  void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
  void writePlt(uint8_t *buf, const Symbol &sym,
                uint64_t pltEntryAddr) const override;
  void writeIBTPlt(uint8_t *buf, size_t numEntries) const override;

  static const unsigned IBTPltHeaderSize = 16;
};
} // namespace

IntelIBT::IntelIBT() { pltHeaderSize = 0; }

void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const {
  uint64_t va =
      in.ibtPlt->getVA() + IBTPltHeaderSize + s.getPltIdx() * pltEntrySize;
  write32le(buf, va);
}

void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym,
                        uint64_t /*pltEntryAddr*/) const {
  if (config->isPic) {
    const uint8_t inst[] = {
        0xf3, 0x0f, 0x1e, 0xfb,       // endbr32
        0xff, 0xa3, 0,    0,    0, 0, // jmp *name@GOT(%ebx)
        0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
    };
    memcpy(buf, inst, sizeof(inst));
    write32le(buf + 6, sym.getGotPltVA() - in.gotPlt->getVA());
    return;
  }

  const uint8_t inst[] = {
      0xf3, 0x0f, 0x1e, 0xfb,       // endbr32
      0xff, 0x25, 0,    0,    0, 0, // jmp *foo@GOT
      0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
  };
  memcpy(buf, inst, sizeof(inst));
  write32le(buf + 6, sym.getGotPltVA());
}

void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const {
  writePltHeader(buf);
  buf += IBTPltHeaderSize;

  const uint8_t inst[] = {
      0xf3, 0x0f, 0x1e, 0xfb,    // endbr32
      0x68, 0,    0,    0,    0, // pushl $reloc_offset
      0xe9, 0,    0,    0,    0, // jmpq .PLT0@PC
      0x66, 0x90,                // nop
  };

  for (size_t i = 0; i < numEntries; ++i) {
    memcpy(buf, inst, sizeof(inst));
    write32le(buf + 5, i * sizeof(object::ELF32LE::Rel));
    write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30);
    buf += sizeof(inst);
  }
}

namespace {
class RetpolinePic : public X86 {
public:
  RetpolinePic();
  void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
  void writePltHeader(uint8_t *buf) const override;
  void writePlt(uint8_t *buf, const Symbol &sym,
                uint64_t pltEntryAddr) const override;
};

class RetpolineNoPic : public X86 {
public:
  RetpolineNoPic();
  void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
  void writePltHeader(uint8_t *buf) const override;
  void writePlt(uint8_t *buf, const Symbol &sym,
                uint64_t pltEntryAddr) const override;
};
} // namespace

RetpolinePic::RetpolinePic() {
  pltHeaderSize = 48;
  pltEntrySize = 32;
  ipltEntrySize = 32;
}

void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
  write32le(buf, s.getPltVA() + 17);
}

void RetpolinePic::writePltHeader(uint8_t *buf) const {
  const uint8_t insn[] = {
      0xff, 0xb3, 4,    0,    0,    0,          // 0:    pushl 4(%ebx)
      0x50,                                     // 6:    pushl %eax
      0x8b, 0x83, 8,    0,    0,    0,          // 7:    mov 8(%ebx), %eax
      0xe8, 0x0e, 0x00, 0x00, 0x00,             // d:    call next
      0xf3, 0x90,                               // 12: loop: pause
      0x0f, 0xae, 0xe8,                         // 14:   lfence
      0xeb, 0xf9,                               // 17:   jmp loop
      0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19:   int3; .align 16
      0x89, 0x0c, 0x24,                         // 20: next: mov %ecx, (%esp)
      0x8b, 0x4c, 0x24, 0x04,                   // 23:   mov 0x4(%esp), %ecx
      0x89, 0x44, 0x24, 0x04,                   // 27:   mov %eax ,0x4(%esp)
      0x89, 0xc8,                               // 2b:   mov %ecx, %eax
      0x59,                                     // 2d:   pop %ecx
      0xc3,                                     // 2e:   ret
      0xcc,                                     // 2f:   int3; padding
  };
  memcpy(buf, insn, sizeof(insn));
}

void RetpolinePic::writePlt(uint8_t *buf, const Symbol &sym,
                            uint64_t pltEntryAddr) const {
  unsigned relOff = in.relaPlt->entsize * sym.getPltIdx();
  const uint8_t insn[] = {
      0x50,                            // pushl %eax
      0x8b, 0x83, 0,    0,    0,    0, // mov foo@GOT(%ebx), %eax
      0xe8, 0,    0,    0,    0,       // call plt+0x20
      0xe9, 0,    0,    0,    0,       // jmp plt+0x12
      0x68, 0,    0,    0,    0,       // pushl $reloc_offset
      0xe9, 0,    0,    0,    0,       // jmp plt+0
      0xcc, 0xcc, 0xcc, 0xcc, 0xcc,    // int3; padding
  };
  memcpy(buf, insn, sizeof(insn));

  uint32_t ebx = in.gotPlt->getVA();
  unsigned off = pltEntryAddr - in.plt->getVA();
  write32le(buf + 3, sym.getGotPltVA() - ebx);
  write32le(buf + 8, -off - 12 + 32);
  write32le(buf + 13, -off - 17 + 18);
  write32le(buf + 18, relOff);
  write32le(buf + 23, -off - 27);
}

RetpolineNoPic::RetpolineNoPic() {
  pltHeaderSize = 48;
  pltEntrySize = 32;
  ipltEntrySize = 32;
}

void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
  write32le(buf, s.getPltVA() + 16);
}

void RetpolineNoPic::writePltHeader(uint8_t *buf) const {
  const uint8_t insn[] = {
      0xff, 0x35, 0,    0,    0,    0, // 0:    pushl GOTPLT+4
      0x50,                            // 6:    pushl %eax
      0xa1, 0,    0,    0,    0,       // 7:    mov GOTPLT+8, %eax
      0xe8, 0x0f, 0x00, 0x00, 0x00,    // c:    call next
      0xf3, 0x90,                      // 11: loop: pause
      0x0f, 0xae, 0xe8,                // 13:   lfence
      0xeb, 0xf9,                      // 16:   jmp loop
      0xcc, 0xcc, 0xcc, 0xcc, 0xcc,    // 18:   int3
      0xcc, 0xcc, 0xcc,                // 1f:   int3; .align 16
      0x89, 0x0c, 0x24,                // 20: next: mov %ecx, (%esp)
      0x8b, 0x4c, 0x24, 0x04,          // 23:   mov 0x4(%esp), %ecx
      0x89, 0x44, 0x24, 0x04,          // 27:   mov %eax ,0x4(%esp)
      0x89, 0xc8,                      // 2b:   mov %ecx, %eax
      0x59,                            // 2d:   pop %ecx
      0xc3,                            // 2e:   ret
      0xcc,                            // 2f:   int3; padding
  };
  memcpy(buf, insn, sizeof(insn));

  uint32_t gotPlt = in.gotPlt->getVA();
  write32le(buf + 2, gotPlt + 4);
  write32le(buf + 8, gotPlt + 8);
}

void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym,
                              uint64_t pltEntryAddr) const {
  unsigned relOff = in.relaPlt->entsize * sym.getPltIdx();
  const uint8_t insn[] = {
      0x50,                         // 0:  pushl %eax
      0xa1, 0,    0,    0,    0,    // 1:  mov foo_in_GOT, %eax
      0xe8, 0,    0,    0,    0,    // 6:  call plt+0x20
      0xe9, 0,    0,    0,    0,    // b:  jmp plt+0x11
      0x68, 0,    0,    0,    0,    // 10: pushl $reloc_offset
      0xe9, 0,    0,    0,    0,    // 15: jmp plt+0
      0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
      0xcc,                         // 1f: int3; padding
  };
  memcpy(buf, insn, sizeof(insn));

  unsigned off = pltEntryAddr - in.plt->getVA();
  write32le(buf + 2, sym.getGotPltVA());
  write32le(buf + 7, -off - 11 + 32);
  write32le(buf + 12, -off - 16 + 17);
  write32le(buf + 17, relOff);
  write32le(buf + 22, -off - 26);
}

TargetInfo *elf::getX86TargetInfo() {
  if (config->zRetpolineplt) {
    if (config->isPic) {
      static RetpolinePic t;
      return &t;
    }
    static RetpolineNoPic t;
    return &t;
  }

  if (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) {
    static IntelIBT t;
    return &t;
  }

  static X86 t;
  return &t;
}