[RISCV] Add "lla" pseudo-instruction to assembler

This pseudo-instruction is similar to la but uses PC-relative addressing
unconditionally. This is, la is only different to lla when using -fPIC. This
pseudo-instruction seems often forgotten in several specs but it is definitely
mentioned in binutils opcodes/riscv-opc.c. The semantics are defined both in
page 37 of the "RISC-V Reader" book but also in function macro found in
gas/config/tc-riscv.c.

This is a very first step towards adding PIC support for Linux in the RISC-V
backend.

The lla pseudo-instruction expands to a sequence of auipc + addi with a couple
of pc-rel relocations where the second points to the first one. This is
described in
https://github.com/riscv/riscv-elf-psabi-doc/blob/master/riscv-elf.md#pc-relative-symbol-addresses

For now, this patch only introduces support of that pseudo instruction at the
assembler parser.

Differential Revision: https://reviews.llvm.org/D49661

llvm-svn: 339314
This commit is contained in:
Roger Ferrer Ibanez 2018-08-09 07:08:20 +00:00
parent 7164b7d347
commit 577a97e2b9
4 changed files with 103 additions and 3 deletions

View File

@ -73,6 +73,9 @@ class RISCVAsmParser : public MCTargetAsmParser {
// synthesize the desired immedate value into the destination register. // synthesize the desired immedate value into the destination register.
void emitLoadImm(unsigned DestReg, int64_t Value, MCStreamer &Out); void emitLoadImm(unsigned DestReg, int64_t Value, MCStreamer &Out);
// Helper to emit pseudo instruction "lla" used in PC-rel addressing.
void emitLoadLocalAddress(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
/// Helper for processing MC instructions that have been successfully matched /// Helper for processing MC instructions that have been successfully matched
/// by MatchAndEmitInstruction. Modifications to the emitted instructions, /// by MatchAndEmitInstruction. Modifications to the emitted instructions,
/// like the expansion of pseudo instructions (e.g., "li"), can be performed /// like the expansion of pseudo instructions (e.g., "li"), can be performed
@ -964,6 +967,26 @@ bool RISCVAsmParser::parseOperand(OperandVector &Operands,
return true; return true;
} }
/// Return true if the operand at the OperandIdx for opcode Name should be
/// 'forced' to be parsed as an immediate. This is required for
/// pseudoinstructions such as tail or call, which allow bare symbols to be used
/// that could clash with register names.
static bool shouldForceImediateOperand(StringRef Name, unsigned OperandIdx) {
// FIXME: This may not scale so perhaps we want to use a data-driven approach
// instead.
switch (OperandIdx) {
case 0:
// call imm
// tail imm
return Name == "tail" || Name == "call";
case 1:
// lla rdest, imm
return Name == "lla";
default:
return false;
}
}
bool RISCVAsmParser::ParseInstruction(ParseInstructionInfo &Info, bool RISCVAsmParser::ParseInstruction(ParseInstructionInfo &Info,
StringRef Name, SMLoc NameLoc, StringRef Name, SMLoc NameLoc,
OperandVector &Operands) { OperandVector &Operands) {
@ -975,18 +998,20 @@ bool RISCVAsmParser::ParseInstruction(ParseInstructionInfo &Info,
return false; return false;
// Parse first operand // Parse first operand
bool ForceImmediate = (Name == "call" || Name == "tail"); if (parseOperand(Operands, shouldForceImediateOperand(Name, 0)))
if (parseOperand(Operands, ForceImmediate))
return true; return true;
// Parse until end of statement, consuming commas between operands // Parse until end of statement, consuming commas between operands
unsigned OperandIdx = 1;
while (getLexer().is(AsmToken::Comma)) { while (getLexer().is(AsmToken::Comma)) {
// Consume comma token // Consume comma token
getLexer().Lex(); getLexer().Lex();
// Parse next operand // Parse next operand
if (parseOperand(Operands, false)) if (parseOperand(Operands, shouldForceImediateOperand(Name, OperandIdx)))
return true; return true;
++OperandIdx;
} }
if (getLexer().isNot(AsmToken::EndOfStatement)) { if (getLexer().isNot(AsmToken::EndOfStatement)) {
@ -1184,6 +1209,39 @@ void RISCVAsmParser::emitLoadImm(unsigned DestReg, int64_t Value,
.addImm(Lo12)); .addImm(Lo12));
} }
void RISCVAsmParser::emitLoadLocalAddress(MCInst &Inst, SMLoc IDLoc,
MCStreamer &Out) {
// The local load address pseudo-instruction "lla" is used in PC-relative
// addressing of symbols:
// lla rdest, symbol
// expands to
// TmpLabel: AUIPC rdest, %pcrel_hi(symbol)
// ADDI rdest, %pcrel_lo(TmpLabel)
MCContext &Ctx = getContext();
MCSymbol *TmpLabel = Ctx.createTempSymbol(
"pcrel_hi", /* AlwaysAddSuffix */ true, /* CanBeUnnamed */ false);
Out.EmitLabel(TmpLabel);
MCOperand DestReg = Inst.getOperand(0);
const RISCVMCExpr *Symbol = RISCVMCExpr::create(
Inst.getOperand(1).getExpr(), RISCVMCExpr::VK_RISCV_PCREL_HI, Ctx);
MCInst &AUIPC =
MCInstBuilder(RISCV::AUIPC).addOperand(DestReg).addExpr(Symbol);
emitToStreamer(Out, AUIPC);
const MCExpr *RefToLinkTmpLabel =
RISCVMCExpr::create(MCSymbolRefExpr::create(TmpLabel, Ctx),
RISCVMCExpr::VK_RISCV_PCREL_LO, Ctx);
MCInst &ADDI = MCInstBuilder(RISCV::ADDI)
.addOperand(DestReg)
.addOperand(DestReg)
.addExpr(RefToLinkTmpLabel);
emitToStreamer(Out, ADDI);
}
bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
MCStreamer &Out) { MCStreamer &Out) {
Inst.setLoc(IDLoc); Inst.setLoc(IDLoc);
@ -1198,6 +1256,9 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
Imm = SignExtend64<32>(Imm); Imm = SignExtend64<32>(Imm);
emitLoadImm(Reg, Imm, Out); emitLoadImm(Reg, Imm, Out);
return false; return false;
} else if (Inst.getOpcode() == RISCV::PseudoLLA) {
emitLoadLocalAddress(Inst, IDLoc, Out);
return false;
} }
emitToStreamer(Out, Inst); emitToStreamer(Out, Inst);

View File

@ -759,6 +759,11 @@ def : Pat<(Tail (iPTR tglobaladdr:$dst)),
def : Pat<(Tail (iPTR texternalsym:$dst)), def : Pat<(Tail (iPTR texternalsym:$dst)),
(PseudoTAIL texternalsym:$dst)>; (PseudoTAIL texternalsym:$dst)>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 0,
isAsmParserOnly = 1 in
def PseudoLLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
"lla", "$dst, $src">;
/// Loads /// Loads
multiclass LdPat<PatFrag LoadOp, RVInst Inst> { multiclass LdPat<PatFrag LoadOp, RVInst Inst> {

View File

@ -0,0 +1,6 @@
# RUN: not llvm-mc -triple=riscv32 < %s 2>&1 | FileCheck %s
# RUN: not llvm-mc -triple=riscv64 < %s 2>&1 | FileCheck %s
# Non bare symbols must be rejected
lla a2, %lo(a_symbol) # CHECK: :[[@LINE]]:9: error: operand must be a bare symbol name
lla a2, %hi(a_symbol) # CHECK: :[[@LINE]]:9: error: operand must be a bare symbol name

View File

@ -0,0 +1,28 @@
# RUN: llvm-mc %s -triple=riscv32 | FileCheck %s
# RUN: llvm-mc %s -triple=riscv64 | FileCheck %s
# CHECK: .Lpcrel_hi0:
# CHECK: auipc a0, %pcrel_hi(a_symbol)
# CHECK: addi a0, a0, %pcrel_lo(.Lpcrel_hi0)
lla a0, a_symbol
# CHECK: .Lpcrel_hi1:
# CHECK: auipc a1, %pcrel_hi(another_symbol)
# CHECK: addi a1, a1, %pcrel_lo(.Lpcrel_hi1)
lla a1, another_symbol
# Check that we can load the address of symbols that are spelled like a register
# CHECK: .Lpcrel_hi2:
# CHECK: auipc a2, %pcrel_hi(zero)
# CHECK: addi a2, a2, %pcrel_lo(.Lpcrel_hi2)
lla a2, zero
# CHECK: .Lpcrel_hi3:
# CHECK: auipc a3, %pcrel_hi(ra)
# CHECK: addi a3, a3, %pcrel_lo(.Lpcrel_hi3)
lla a3, ra
# CHECK: .Lpcrel_hi4:
# CHECK: auipc a4, %pcrel_hi(f1)
# CHECK: addi a4, a4, %pcrel_lo(.Lpcrel_hi4)
lla a4, f1