From 1d445a6e7679cc188fd051f7397b7d9ca8ce4f10 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Thu, 22 Jul 2021 14:12:50 -0700 Subject: [PATCH] Reland: "[WebAssembly] Deduplicate imports of the same module name, field name, and type" When two symbols import the same thing, only one import should be emitted in the Wasm file. Fixes https://bugs.llvm.org/show_bug.cgi?id=50938 Reverted in: 16aac493e59519377071e900d119ba2e7e5b525d. Reviewed By: sbc100 Differential Revision: https://reviews.llvm.org/D105519 --- lld/test/wasm/duplicate-function-imports.s | 57 ++++++++++++++++ lld/test/wasm/duplicate-global-imports.s | 69 +++++++++++++++++++ lld/test/wasm/duplicate-table-imports.s | 75 +++++++++++++++++++++ lld/wasm/SyntheticSections.cpp | 44 +++++++++--- lld/wasm/SyntheticSections.h | 65 ++++++++++++++++++ llvm/include/llvm/BinaryFormat/Wasm.h | 10 +++ llvm/include/llvm/BinaryFormat/WasmTraits.h | 43 ++++++++++++ 7 files changed, 355 insertions(+), 8 deletions(-) create mode 100644 lld/test/wasm/duplicate-function-imports.s create mode 100644 lld/test/wasm/duplicate-global-imports.s create mode 100644 lld/test/wasm/duplicate-table-imports.s diff --git a/lld/test/wasm/duplicate-function-imports.s b/lld/test/wasm/duplicate-function-imports.s new file mode 100644 index 000000000000..e2c942f62408 --- /dev/null +++ b/lld/test/wasm/duplicate-function-imports.s @@ -0,0 +1,57 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %s -o %t.o +# RUN: wasm-ld -o %t1.wasm %t.o +# RUN: obj2yaml %t1.wasm | FileCheck %s + +.globl f1 +.import_module f1, env +.import_name f1, f +.functype f1 () -> (i32) + +# Same import module/name/type as `f1`, should be de-duped. +.globl f2 +.import_module f2, env +.import_name f2, f +.functype f2 () -> (i32) + +# Same import module/name, but different type. Should not be de-duped. +.globl f3 +.import_module f3, env +.import_name f3, f +.functype f3 () -> (f32) + +.globl _start +_start: + .functype _start () -> () + call f1 + drop + call f2 + drop + call f3 + drop + end_function + + +# CHECK: - Type: TYPE +# CHECK-NEXT: Signatures: +# CHECK-NEXT: - Index: 0 +# CHECK-NEXT: ParamTypes: [] +# CHECK-NEXT: ReturnTypes: +# CHECK-NEXT: - I32 +# CHECK-NEXT: - Index: 1 +# CHECK-NEXT: ParamTypes: [] +# CHECK-NEXT: ReturnTypes: +# CHECK-NEXT: - F32 +# CHECK-NEXT: - Index: 2 +# CHECK-NEXT: ParamTypes: [] +# CHECK-NEXT: ReturnTypes: [] +# CHECK-NEXT: - Type: IMPORT +# CHECK-NEXT: Imports: +# CHECK-NEXT: - Module: env +# CHECK-NEXT: Field: f +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: SigIndex: 0 +# CHECK-NEXT: - Module: env +# CHECK-NEXT: Field: f +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: SigIndex: 1 +# CHECK-NEXT: - Type: diff --git a/lld/test/wasm/duplicate-global-imports.s b/lld/test/wasm/duplicate-global-imports.s new file mode 100644 index 000000000000..8f057cd4f19e --- /dev/null +++ b/lld/test/wasm/duplicate-global-imports.s @@ -0,0 +1,69 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %s -o %t.o +# RUN: wasm-ld --no-check-features -o %t1.wasm %t.o +# RUN: obj2yaml %t1.wasm | FileCheck %s + +.global g1 +.import_module g1, env +.import_name g1, g +.globaltype g1, i64, immutable + +# Same import module/name/type as `g1`, should be de-duped. +.global g2 +.import_module g2, env +.import_name g2, g +.globaltype g2, i64, immutable + +# Imported as an i32 instead of i64, so should not be de-duped. +.global g3 +.import_module g3, env +.import_name g3, g +.globaltype g3, i32, immutable + +# Imported as mutable instead of immutable, so should not be de-duped. +.global g4 +.import_module g4, env +.import_name g4, g +.globaltype g4, i64 + +.globl _start +_start: + .functype _start () -> () + global.get g1 + drop + global.get g2 + drop + global.get g3 + drop + global.get g4 + drop + end_function + + +# CHECK: - Type: IMPORT +# CHECK-NEXT: Imports: +# CHECK-NEXT: - Module: env +# CHECK-NEXT: Field: g +# CHECK-NEXT: Kind: GLOBAL +# CHECK-NEXT: GlobalType: I64 +# CHECK-NEXT: GlobalMutable: false +# CHECK-NEXT: - Module: env +# CHECK-NEXT: Field: g +# CHECK-NEXT: Kind: GLOBAL +# CHECK-NEXT: GlobalType: I32 +# CHECK-NEXT: GlobalMutable: false +# CHECK-NEXT: - Module: env +# CHECK-NEXT: Field: g +# CHECK-NEXT: Kind: GLOBAL +# CHECK-NEXT: GlobalType: I64 +# CHECK-NEXT: GlobalMutable: true +# CHECK-NEXT: - Type: + +# CHECK: GlobalNames: +# CHECK-NEXT: - Index: 0 +# CHECK-NEXT: Name: g1 +# CHECK-NEXT: - Index: 1 +# CHECK-NEXT: Name: g3 +# CHECK-NEXT: - Index: 2 +# CHECK-NEXT: Name: g4 +# CHECK-NEXT: - Index: 3 +# CHECK-NEXT: Name: __stack_pointer diff --git a/lld/test/wasm/duplicate-table-imports.s b/lld/test/wasm/duplicate-table-imports.s new file mode 100644 index 000000000000..aa3307e22182 --- /dev/null +++ b/lld/test/wasm/duplicate-table-imports.s @@ -0,0 +1,75 @@ +# RUN: llvm-mc -mattr=+reference-types -triple=wasm32-unknown-unknown -filetype=obj -o %t.o %s +# RUN: wasm-ld --allow-undefined -o %t1.wasm %t.o +# RUN: obj2yaml %t1.wasm | FileCheck %s + +.tabletype t1, funcref +.import_module t1, env +.import_name t1, t +.globl t1 + +# Same import module/name/type as `t1`, should be de-duped. +.tabletype t2, funcref +.import_module t2, env +.import_name t2, t +.globl t2 + +# Imported as an externref instead of funcref, so should not be de-duped. +.tabletype t3, externref +.import_module t3, env +.import_name t3, t +.globl t3 + +.globl _start +_start: + .functype _start () -> () + + # Read from `t1` + i32.const 0 + table.get t1 + drop + + # Read from `t2` + i32.const 0 + table.get t2 + drop + + # Read from `t3` + i32.const 0 + table.get t3 + drop + + end_function + +## XXX: the second imported table has index 1, not 0. I've verified by hand +## (with `wasm2wat`) that the resulting Wasm file is correct: `t3` does end up +## at index 1 and our `table.get` instructions are using the proper table index +## immediates. This is also asserted (less legibly) in the hexdump of the code +## body below. It looks like there's a bug in how `obj2yaml` disassembles +## multiple table imports. + +# CHECK: - Type: IMPORT +# CHECK-NEXT: Imports: +# CHECK-NEXT: - Module: env +# CHECK-NEXT: Field: t +# CHECK-NEXT: Kind: TABLE +# CHECK-NEXT: Table: +# CHECK-NEXT: Index: 0 +# CHECK-NEXT: ElemType: FUNCREF +# CHECK-NEXT: Limits: +# CHECK-NEXT: Minimum: 0x0 +# CHECK-NEXT: - Module: env +# CHECK-NEXT: Field: t +# CHECK-NEXT: Kind: TABLE +# CHECK-NEXT: Table: +# CHECK-NEXT: Index: 0 +# CHECK-NEXT: ElemType: EXTERNREF +# CHECK-NEXT: Limits: +# CHECK-NEXT: Minimum: 0x0 +# CHECK-NEXT: - Type: + +# CHECK: - Type: CODE +# CHECK-NEXT: Functions: +# CHECK-NEXT: - Index: 0 +# CHECK-NEXT: Locals: [] +# CHECK-NEXT: Body: 41002580808080001A41002580808080001A41002581808080001A0B +# CHECK-NEXT: - Type: diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp index dcbf1302cc91..fc37115844b7 100644 --- a/lld/wasm/SyntheticSections.cpp +++ b/lld/wasm/SyntheticSections.cpp @@ -109,15 +109,43 @@ void ImportSection::addGOTEntry(Symbol *sym) { void ImportSection::addImport(Symbol *sym) { assert(!isSealed); - importedSymbols.emplace_back(sym); - if (auto *f = dyn_cast(sym)) - f->setFunctionIndex(numImportedFunctions++); - else if (auto *g = dyn_cast(sym)) - g->setGlobalIndex(numImportedGlobals++); - else if (auto *t = dyn_cast(sym)) + StringRef module = sym->importModule.getValueOr(defaultModule); + StringRef name = sym->importName.getValueOr(sym->getName()); + if (auto *f = dyn_cast(sym)) { + ImportKey key(*(f->getSignature()), module, name); + auto entry = importedFunctions.try_emplace(key, numImportedFunctions); + if (entry.second) { + importedSymbols.emplace_back(sym); + f->setFunctionIndex(numImportedFunctions++); + } else { + f->setFunctionIndex(entry.first->second); + } + } else if (auto *g = dyn_cast(sym)) { + ImportKey key(*(g->getGlobalType()), module, name); + auto entry = importedGlobals.try_emplace(key, numImportedGlobals); + if (entry.second) { + importedSymbols.emplace_back(sym); + g->setGlobalIndex(numImportedGlobals++); + } else { + g->setGlobalIndex(entry.first->second); + } + } else if (auto *t = dyn_cast(sym)) { + // NB: There's currently only one possible kind of tag, and no + // `UndefinedTag`, so we don't bother de-duplicating tag imports. + importedSymbols.emplace_back(sym); t->setTagIndex(numImportedTags++); - else - cast(sym)->setTableNumber(numImportedTables++); + } else { + assert(TableSymbol::classof(sym)); + auto *table = cast(sym); + ImportKey key(*(table->getTableType()), module, name); + auto entry = importedTables.try_emplace(key, numImportedTables); + if (entry.second) { + importedSymbols.emplace_back(sym); + table->setTableNumber(numImportedTables++); + } else { + table->setTableNumber(entry.first->second); + } + } } void ImportSection::writeBody() { diff --git a/lld/wasm/SyntheticSections.h b/lld/wasm/SyntheticSections.h index f57fc8832dd2..8c678fa0853f 100644 --- a/lld/wasm/SyntheticSections.h +++ b/lld/wasm/SyntheticSections.h @@ -96,6 +96,68 @@ protected: llvm::DenseMap typeIndices; }; +/** + * A key for some kind of imported entity of type `T`. + * + * Used when de-duplicating imports. + */ +template struct ImportKey { +public: + enum class State { Plain, Empty, Tombstone }; + +public: + T type; + llvm::Optional importModule; + llvm::Optional importName; + State state; + +public: + ImportKey(T type) : type(type), state(State::Plain) {} + ImportKey(T type, State state) : type(type), state(state) {} + ImportKey(T type, llvm::Optional importModule, + llvm::Optional importName) + : type(type), importModule(importModule), importName(importName), + state(State::Plain) {} +}; + +template +inline bool operator==(const ImportKey &lhs, const ImportKey &rhs) { + return lhs.state == rhs.state && lhs.importModule == rhs.importModule && + lhs.importName == rhs.importName && lhs.type == rhs.type; +} + +} // namespace wasm +} // namespace lld + +// `ImportKey` can be used as a key in a `DenseMap` if `T` can be used as a +// key in a `DenseMap`. +template struct llvm::DenseMapInfo> { + static lld::wasm::ImportKey getEmptyKey() { + typename lld::wasm::ImportKey key(llvm::DenseMapInfo::getEmptyKey()); + key.state = lld::wasm::ImportKey::State::Empty; + return key; + } + static lld::wasm::ImportKey getTombstoneKey() { + typename lld::wasm::ImportKey key(llvm::DenseMapInfo::getEmptyKey()); + key.state = lld::wasm::ImportKey::State::Tombstone; + return key; + } + static unsigned getHashValue(const lld::wasm::ImportKey &key) { + uintptr_t hash = hash_value(key.importModule); + hash = hash_combine(hash, key.importName); + hash = hash_combine(hash, llvm::DenseMapInfo::getHashValue(key.type)); + hash = hash_combine(hash, key.state); + return hash; + } + static bool isEqual(const lld::wasm::ImportKey &lhs, + const lld::wasm::ImportKey &rhs) { + return lhs == rhs; + } +}; + +namespace lld { +namespace wasm { + class ImportSection : public SyntheticSection { public: ImportSection() : SyntheticSection(llvm::wasm::WASM_SEC_IMPORT) {} @@ -131,6 +193,9 @@ protected: unsigned numImportedFunctions = 0; unsigned numImportedTags = 0; unsigned numImportedTables = 0; + llvm::DenseMap, uint32_t> importedGlobals; + llvm::DenseMap, uint32_t> importedFunctions; + llvm::DenseMap, uint32_t> importedTables; }; class FunctionSection : public SyntheticSection { diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h index 61544c364a3b..c38e64928521 100644 --- a/llvm/include/llvm/BinaryFormat/Wasm.h +++ b/llvm/include/llvm/BinaryFormat/Wasm.h @@ -429,6 +429,16 @@ inline bool operator!=(const WasmGlobalType &LHS, const WasmGlobalType &RHS) { return !(LHS == RHS); } +inline bool operator==(const WasmLimits &LHS, const WasmLimits &RHS) { + return LHS.Flags == RHS.Flags && LHS.Minimum == RHS.Minimum && + (LHS.Flags & WASM_LIMITS_FLAG_HAS_MAX ? LHS.Maximum == RHS.Maximum + : true); +} + +inline bool operator==(const WasmTableType &LHS, const WasmTableType &RHS) { + return LHS.ElemType == RHS.ElemType && LHS.Limits == RHS.Limits; +} + std::string toString(WasmSymbolType type); std::string relocTypetoString(uint32_t type); bool relocTypeHasAddend(uint32_t type); diff --git a/llvm/include/llvm/BinaryFormat/WasmTraits.h b/llvm/include/llvm/BinaryFormat/WasmTraits.h index e34182499187..930ee690bcc0 100644 --- a/llvm/include/llvm/BinaryFormat/WasmTraits.h +++ b/llvm/include/llvm/BinaryFormat/WasmTraits.h @@ -63,6 +63,49 @@ template <> struct DenseMapInfo { } }; +// Traits for using WasmLimits in a DenseMap +template <> struct DenseMapInfo { + static wasm::WasmLimits getEmptyKey() { + return wasm::WasmLimits{0xff, 0xff, 0xff}; + } + static wasm::WasmLimits getTombstoneKey() { + return wasm::WasmLimits{0xee, 0xee, 0xee}; + } + static unsigned getHashValue(const wasm::WasmLimits &Limits) { + unsigned Hash = hash_value(Limits.Flags); + Hash = hash_combine(Hash, Limits.Minimum); + if (Limits.Flags & llvm::wasm::WASM_LIMITS_FLAG_HAS_MAX) { + Hash = hash_combine(Hash, Limits.Maximum); + } + return Hash; + } + static bool isEqual(const wasm::WasmLimits &LHS, + const wasm::WasmLimits &RHS) { + return LHS == RHS; + } +}; + +// Traits for using WasmTableType in a DenseMap +template <> struct DenseMapInfo { + static wasm::WasmTableType getEmptyKey() { + return wasm::WasmTableType{0, + DenseMapInfo::getEmptyKey()}; + } + static wasm::WasmTableType getTombstoneKey() { + return wasm::WasmTableType{ + 1, DenseMapInfo::getTombstoneKey()}; + } + static unsigned getHashValue(const wasm::WasmTableType &TableType) { + return hash_combine( + TableType.ElemType, + DenseMapInfo::getHashValue(TableType.Limits)); + } + static bool isEqual(const wasm::WasmTableType &LHS, + const wasm::WasmTableType &RHS) { + return LHS == RHS; + } +}; + } // end namespace llvm #endif // LLVM_BINARYFORMAT_WASMTRAITS_H