[LTO][MC] Discard non-prevailing defined symbols in module-level assembly

This is the alternative approach to D96931.

In LTO, for each module with inlineasm block, prepend directive ".lto_discard <sym>, <sym>*" to the beginning of the inline
asm.  ".lto_discard" is both a module inlineasm block marker and (optionally) provides a list of symbols to be discarded.

In MC while emitting for inlineasm, discard symbol binding & symbol
definitions according to ".lto_disard".

Reviewed By: MaskRay

Differential Revision: https://reviews.llvm.org/D98762
This commit is contained in:
Yuanfang Chen 2021-03-18 15:32:29 -07:00
parent 2df65f87c1
commit b4a8c0ebb6
8 changed files with 231 additions and 3 deletions

View File

@ -396,7 +396,6 @@ namespace llvm {
void initInlineSourceManager();
SourceMgr *getInlineSourceManager() {
assert(InlineSrcMgr);
return InlineSrcMgr.get();
}
std::vector<const MDNode *> &getLocInfos() { return LocInfos; }

View File

@ -182,6 +182,8 @@ public:
virtual void setParsingMSInlineAsm(bool V) = 0;
virtual bool isParsingMSInlineAsm() = 0;
virtual bool discardLTOSymbol(StringRef) const { return false; }
virtual bool isParsingMasm() const { return false; }
virtual bool defineMacro(StringRef Name, StringRef Value) { return true; }

View File

@ -11,7 +11,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/LTO/LTO.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/StackSafetyAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@ -752,6 +754,7 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
Skip();
std::set<const Comdat *> NonPrevailingComdats;
SmallSet<StringRef, 2> NonPrevailingAsmSymbols;
for (const InputFile::Symbol &Sym : Syms) {
assert(ResI != ResE);
SymbolResolution Res = *ResI++;
@ -798,7 +801,14 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
GV->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::
DefaultStorageClass);
}
} else if (auto *AS = Msym.dyn_cast<ModuleSymbolTable::AsmSymbol *>()) {
// Collect non-prevailing symbols.
if (!Res.Prevailing)
NonPrevailingAsmSymbols.insert(AS->first);
} else {
llvm_unreachable("unknown symbol type");
}
// Common resolution: collect the maximum size/alignment over all commons.
// We also record if we see an instance of a common as prevailing, so that
// if none is prevailing we can ignore it later.
@ -812,11 +822,29 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
CommonRes.Align = max(*SymAlign, CommonRes.Align);
CommonRes.Prevailing |= Res.Prevailing;
}
}
if (!M.getComdatSymbolTable().empty())
for (GlobalValue &GV : M.global_values())
handleNonPrevailingComdat(GV, NonPrevailingComdats);
// Prepend ".lto_discard <sym>, <sym>*" directive to each module inline asm
// block.
if (!M.getModuleInlineAsm().empty()) {
std::string NewIA = ".lto_discard";
if (!NonPrevailingAsmSymbols.empty()) {
// Don't dicard a symbol if there is a live .symver for it.
ModuleSymbolTable::CollectAsmSymvers(
M, [&](StringRef Name, StringRef Alias) {
if (!NonPrevailingAsmSymbols.count(Alias))
NonPrevailingAsmSymbols.erase(Name);
});
NewIA += " " + llvm::join(NonPrevailingAsmSymbols, ", ");
}
NewIA += "\n";
M.setModuleInlineAsm(NewIA + M.getModuleInlineAsm());
}
assert(MsymI == MsymE);
return std::move(Mod);
}

View File

@ -15,6 +15,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
@ -168,6 +169,8 @@ private:
/// List of forward directional labels for diagnosis at the end.
SmallVector<std::tuple<SMLoc, CppHashInfoTy, MCSymbol *>, 4> DirLabels;
SmallSet<StringRef, 2> LTODiscardSymbols;
/// AssemblerDialect. ~OU means unset value and use value provided by MAI.
unsigned AssemblerDialect = ~0U;
@ -235,6 +238,10 @@ public:
}
bool isParsingMSInlineAsm() override { return ParsingMSInlineAsm; }
bool discardLTOSymbol(StringRef Name) const override {
return LTODiscardSymbols.contains(Name);
}
bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
unsigned &NumOutputs, unsigned &NumInputs,
SmallVectorImpl<std::pair<void *,bool>> &OpDecls,
@ -516,6 +523,7 @@ private:
DK_ADDRSIG,
DK_ADDRSIG_SYM,
DK_PSEUDO_PROBE,
DK_LTO_DISCARD,
DK_END
};
@ -682,6 +690,9 @@ private:
// .pseudoprobe
bool parseDirectivePseudoProbe();
// ".lto_discard"
bool parseDirectiveLTODiscard();
// Directives to support address-significance tables.
bool parseDirectiveAddrsig();
bool parseDirectiveAddrsigSym();
@ -892,6 +903,8 @@ bool AsmParser::enabledGenDwarfForAssembly() {
}
bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
LTODiscardSymbols.clear();
// Create the initial section, if requested.
if (!NoInitialTextSection)
Out.InitSections(false);
@ -1770,7 +1783,6 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
StringMap<DirectiveKind>::const_iterator DirKindIt =
DirectiveKindMap.find(IDVal.lower());
DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end())
? DK_NO_DIRECTIVE
: DirKindIt->getValue();
switch (DirKind) {
@ -1868,6 +1880,9 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
Lex();
}
if (discardLTOSymbol(IDVal))
return false;
getTargetParser().doBeforeLabelEmit(Sym);
// Emit the label.
@ -2208,6 +2223,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
return parseDirectiveAddrsigSym();
case DK_PSEUDO_PROBE:
return parseDirectivePseudoProbe();
case DK_LTO_DISCARD:
return parseDirectiveLTODiscard();
}
return Error(IDLoc, "unknown directive");
@ -2852,6 +2869,9 @@ bool AsmParser::parseAssignment(StringRef Name, bool allow_redef,
return false;
}
if (discardLTOSymbol(Name))
return false;
// Do the assignment.
Out.emitAssignment(Sym, Value);
if (NoDeadStrip)
@ -4870,6 +4890,10 @@ bool AsmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
SMLoc Loc = getTok().getLoc();
if (parseIdentifier(Name))
return Error(Loc, "expected identifier");
if (discardLTOSymbol(Name))
return false;
MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
// Assembler local symbols don't make any sense here. Complain loudly.
@ -5493,6 +5517,7 @@ void AsmParser::initializeDirectiveKindMap() {
DirectiveKindMap[".addrsig"] = DK_ADDRSIG;
DirectiveKindMap[".addrsig_sym"] = DK_ADDRSIG_SYM;
DirectiveKindMap[".pseudoprobe"] = DK_PSEUDO_PROBE;
DirectiveKindMap[".lto_discard"] = DK_LTO_DISCARD;
}
MCAsmMacro *AsmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
@ -5806,6 +5831,27 @@ bool AsmParser::parseDirectivePseudoProbe() {
return false;
}
/// parseDirectiveLTODiscard
/// ::= ".lto_discard" [ identifier ( , identifier )* ]
/// The LTO library emits this directive to discard non-prevailing symbols.
/// We ignore symbol assignments and attribute changes for the specified
/// symbols.
bool AsmParser::parseDirectiveLTODiscard() {
auto ParseOp = [&]() -> bool {
StringRef Name;
SMLoc Loc = getTok().getLoc();
if (parseIdentifier(Name))
return Error(Loc, "expected identifier");
LTODiscardSymbols.insert(Name);
return false;
};
LTODiscardSymbols.clear();
if (parseMany(ParseOp))
return addErrorSuffix(" in directive");
return false;
}
// We are comparing pointers, but the pointers are relative to a single string.
// Thus, this should always be deterministic.
static int rewritesSort(const AsmRewrite *AsmRewriteA,

View File

@ -182,6 +182,12 @@ bool ELFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
if (getParser().parseIdentifier(Name))
return TokError("expected identifier in directive");
if (getParser().discardLTOSymbol(Name)) {
if (getLexer().is(AsmToken::EndOfStatement))
break;
continue;
}
MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
getStreamer().emitSymbolAttribute(Sym, Attr);

View File

@ -0,0 +1,87 @@
; Check that non-prevailing symbols in module inline assembly are discarded
; during regular LTO otherwise the final symbol binding could be wrong.
; RUN: split-file %s %t
; RUN: opt %t/t1.ll -o %t1
; RUN: opt %t/t2.ll -o %t2
; RUN: opt %t/t3.ll -o %t3
; RUN: opt %t/t4.ll -o %t4
; RUN: llvm-lto2 run -o %to1 -save-temps %t1 %t2 \
; RUN: -r %t1,foo,px \
; RUN: -r %t2,foo, \
; RUN: -r %t2,bar,pl
; RUN: llvm-dis < %to1.0.0.preopt.bc | FileCheck %s --check-prefix=ASM1
; RUN: llvm-nm %to1.0 | FileCheck %s --check-prefix=SYM
; RUN: llvm-objdump -d --disassemble-symbols=foo %to1.0 \
; RUN: | FileCheck %s --check-prefix=DEF
; RUN: llvm-lto2 run -o %to2 -save-temps %t2 %t3 \
; RUN: -r %t2,foo, \
; RUN: -r %t2,bar,pl \
; RUN: -r %t3,foo,px
; RUN: llvm-dis < %to2.0.0.preopt.bc | FileCheck %s --check-prefix=ASM2
; RUN: llvm-nm %to2.0 | FileCheck %s --check-prefix=SYM
; RUN: llvm-objdump -d --disassemble-symbols=foo %to2.0 \
; RUN: | FileCheck %s --check-prefix=DEF
; Check that ".symver" is properly handled.
; RUN: llvm-lto2 run -o %to3 -save-temps %t4 \
; RUN: -r %t4,bar, \
; RUN: -r %t4,foo, \
; RUN: -r %t4,foo@@VER1,px
; RUN: llvm-dis < %to3.0.0.preopt.bc | FileCheck %s --check-prefix=ASM3
; ASM1: module asm ".lto_discard foo"
; ASM1-NEXT: module asm ".weak foo"
; ASM1-NEXT: module asm ".equ foo,bar"
; ASM2: module asm ".lto_discard foo"
; ASM2-NEXT: module asm ".weak foo"
; ASM2-NEXT: module asm ".equ foo,bar"
; ASM2-NEXT: module asm ".lto_discard"
; ASM2-NEXT: module asm " .global foo ; foo: leal 2(%rdi), %eax"
; ASM3-NOT: module asm ".lto_discard foo"
; SYM: T foo
; DEF: leal 2(%rdi), %eax
;--- t1.ll
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define dso_local i32 @foo(i32 %0) {
%2 = add nsw i32 %0, 2
ret i32 %2
}
;--- t2.ll
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
module asm ".weak foo"
module asm ".equ foo,bar"
@llvm.compiler.used = appending global [1 x i8*] [i8* bitcast (i32 (i32)* @bar to i8*)], section "llvm.metadata"
define internal i32 @bar(i32 %0) {
%2 = add nsw i32 %0, 1
ret i32 %2
}
;--- t3.ll
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
module asm " .global foo ; foo: leal 2(%rdi), %eax"
;--- t4.ll
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
module asm ".global foo"
module asm "foo: call bar"
module asm ".symver foo,foo@@@VER1"
module asm ".symver bar,bar@@@VER1"

View File

@ -0,0 +1,29 @@
; Check that
; 1. ".lto_discard" works as module inlineasm marker and its argument symbols
; are discarded.
; 2. there is no reassignment error in the presence of ".lto_discard"
; RUN: llc < %s | FileCheck %s
; CHECK: .data
; CHECK-NOT: .weak foo
; CHECK-NOT: .set foo, bar
; CHECK: .globl foo
; CHECK: foo:
; CHECK: .byte 1
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
module asm ".lto_discard foo"
module asm " .text"
module asm "bar:"
module asm " .data"
module asm ".weak foo"
module asm ".set foo, bar"
module asm ".weak foo"
module asm ".set foo, bar"
module asm ".lto_discard"
module asm ".globl foo"
module asm "foo:"
module asm " .byte 1"

View File

@ -0,0 +1,31 @@
// Check that ".lto_discard" ignores symbol assignments and attribute changes
// for the specified symbols.
// RUN: llvm-mc -triple x86_64 < %s | FileCheck %s
// Check that ".lto_discard" only accepts identifiers.
// RUN: not llvm-mc -filetype=obj -triple x86_64 --defsym ERR=1 %s 2>&1 |\
// RUN: FileCheck %s --check-prefix=ERR
// CHECK: .weak foo
// CHECK: foo:
// CHECK: .byte 1
// CHECK: .weak bar
// CHECK: bar:
// CHECK: .byte 2
.lto_discard foo
.weak foo
foo:
.byte 1
.lto_discard
.weak bar
bar:
.byte 2
.ifdef ERR
.text
# ERR: {{.*}}.s:[[#@LINE+1]]:14: error: expected identifier in directive
.lto_discard 1
.endif