[WebAssembly] Made disassembler only use stack instructions.

Summary:
Now uses the StackBased bit from the tablegen defs to identify
stack instructions (and ignore register based or non-wasm instructions).

Also changed how we store operands, since we now have up to 16 of them
per instruction. To not cause static data bloat, these are compressed
into a tiny table.

+ a few other cleanups.

Tested:
- MCTest
- llvm-lit -v `find test -name WebAssembly`

Reviewers: dschuff, jgravelle-google, sunfish, tlively

Subscribers: sbc100, aheejin, llvm-commits

Differential Revision: https://reviews.llvm.org/D51320

llvm-svn: 341081
This commit is contained in:
Wouter van Oortmerssen 2018-08-30 15:40:53 +00:00
parent 2305c049a3
commit a733d08db2
3 changed files with 55 additions and 27 deletions

View File

@ -140,7 +140,7 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
MI.setOpcode(WasmInst->Opcode); MI.setOpcode(WasmInst->Opcode);
// Parse any operands. // Parse any operands.
for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) { for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) {
switch (WasmInst->Operands[OPI]) { switch (OperandTable[WasmInst->OperandStart + OPI]) {
// ULEB operands: // ULEB operands:
case WebAssembly::OPERAND_BASIC_BLOCK: case WebAssembly::OPERAND_BASIC_BLOCK:
case WebAssembly::OPERAND_LOCAL: case WebAssembly::OPERAND_LOCAL:
@ -194,15 +194,12 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
return MCDisassembler::Fail; return MCDisassembler::Fail;
break; break;
} }
case MCOI::OPERAND_REGISTER: { case MCOI::OPERAND_REGISTER:
// These are NOT actually in the instruction stream, but MC is going to // The tablegen header currently does not have any register operands since
// expect operands to be present for them! // we use only the stack (_S) instructions.
// FIXME: can MC re-generate register assignments or do we have to // If you hit this that probably means a bad instruction definition in
// do this? Since this function decodes a single instruction, we don't // tablegen.
// have the proper context for tracking an operand stack here. llvm_unreachable("Register operand in WebAssemblyDisassembler");
MI.addOperand(MCOperand::createReg(0));
break;
}
default: default:
llvm_unreachable("Unknown operand type in WebAssemblyDisassembler"); llvm_unreachable("Unknown operand type in WebAssemblyDisassembler");
} }

View File

@ -31,6 +31,6 @@
# CHECK: i64.trunc_u:sat/f64 # CHECK: i64.trunc_u:sat/f64
0xFC 0x07 0xFC 0x07
# v128.const is arbitrarily disassembled as v2f64 # v128.const is arbitrarily disassembled as v16i8
# CHECK: v128.const 0x1.60504030201p-911, 0x1.e0d0c0b0a0908p-783 # CHECK: v128.const 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
0xFD 0x00 0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x09 0x0A 0x0B 0x0C 0x0D 0x0E 0x0F 0xFD 0x00 0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x09 0x0A 0x0B 0x0C 0x0D 0x0E 0x0F

View File

@ -42,15 +42,16 @@ void emitWebAssemblyDisassemblerTables(
auto Prefix = Opc >> 8; auto Prefix = Opc >> 8;
Opc = Opc & 0xFF; Opc = Opc & 0xFF;
auto &CGIP = OpcodeTable[Prefix][Opc]; auto &CGIP = OpcodeTable[Prefix][Opc];
if (!CGIP.second || // All wasm instructions have a StackBased fieldof type bit, we only want
// Make sure we store the variant with the least amount of operands, // the instructions for which this is 1.
// which is the one without explicit registers. Only few instructions auto Bit = Def.getValue("StackBased")->getValue()->
// have these currently, would be good to have for all of them. getCastTo(BitRecTy::get());
// FIXME: this picks the first of many typed variants, which is auto IsStackBased = Bit && reinterpret_cast<const BitInit *>(Bit)
// currently the except_ref one, though this shouldn't matter for ->getValue();
// disassembly purposes. if (IsStackBased && !CGIP.second) {
CGIP.second->Operands.OperandList.size() > // this picks the first of many typed variants, which is
CGI.Operands.OperandList.size()) { // currently the except_ref one, though this shouldn't matter for
// disassembly purposes.
CGIP = std::make_pair(I, &CGI); CGIP = std::make_pair(I, &CGI);
} }
} }
@ -63,8 +64,9 @@ void emitWebAssemblyDisassemblerTables(
OS << " uint16_t Opcode;\n"; OS << " uint16_t Opcode;\n";
OS << " EntryType ET;\n"; OS << " EntryType ET;\n";
OS << " uint8_t NumOperands;\n"; OS << " uint8_t NumOperands;\n";
OS << " uint8_t Operands[4];\n"; OS << " uint16_t OperandStart;\n";
OS << "};\n\n"; OS << "};\n\n";
std::vector<std::string> OperandTable, CurOperandList;
// Output one table per prefix. // Output one table per prefix.
for (auto &PrefixPair : OpcodeTable) { for (auto &PrefixPair : OpcodeTable) {
if (PrefixPair.second.empty()) if (PrefixPair.second.empty())
@ -81,24 +83,53 @@ void emitWebAssemblyDisassemblerTables(
OS.write_hex(static_cast<unsigned long long>(I)); OS.write_hex(static_cast<unsigned long long>(I));
OS << ": " << CGI.AsmString << "\n"; OS << ": " << CGI.AsmString << "\n";
OS << " { " << InstIt->second.first << ", ET_Instruction, "; OS << " { " << InstIt->second.first << ", ET_Instruction, ";
OS << CGI.Operands.OperandList.size() << ", {\n"; OS << CGI.Operands.OperandList.size() << ", ";
// Collect operand types for storage in a shared list.
CurOperandList.clear();
for (auto &Op : CGI.Operands.OperandList) { for (auto &Op : CGI.Operands.OperandList) {
OS << " " << Op.OperandType << ",\n"; CurOperandList.push_back(Op.OperandType);
} }
OS << " }\n"; // See if we already have stored this sequence before. This is not
// strictly necessary but makes the table really small.
size_t OperandStart = OperandTable.size();
if (CurOperandList.size() <= OperandTable.size()) {
for (size_t J = 0; J <= OperandTable.size() - CurOperandList.size();
++J) {
size_t K = 0;
for (; K < CurOperandList.size(); ++K) {
if (OperandTable[J + K] != CurOperandList[K]) break;
}
if (K == CurOperandList.size()) {
OperandStart = J;
break;
}
}
}
// Store operands if no prior occurrence.
if (OperandStart == OperandTable.size()) {
OperandTable.insert(OperandTable.end(), CurOperandList.begin(),
CurOperandList.end());
}
OS << OperandStart;
} else { } else {
auto PrefixIt = OpcodeTable.find(I); auto PrefixIt = OpcodeTable.find(I);
// If we have a non-empty table for it that's not 0, this is a prefix. // If we have a non-empty table for it that's not 0, this is a prefix.
if (PrefixIt != OpcodeTable.end() && I && !PrefixPair.first) { if (PrefixIt != OpcodeTable.end() && I && !PrefixPair.first) {
OS << " { 0, ET_Prefix, 0, {}"; OS << " { 0, ET_Prefix, 0, 0";
} else { } else {
OS << " { 0, ET_Unused, 0, {}"; OS << " { 0, ET_Unused, 0, 0";
} }
} }
OS << " },\n"; OS << " },\n";
} }
OS << "};\n\n"; OS << "};\n\n";
} }
// Create a table of all operands:
OS << "const uint8_t OperandTable[] = {\n";
for (auto &Op : OperandTable) {
OS << " " << Op << ",\n";
}
OS << "};\n\n";
// Create a table of all extension tables: // Create a table of all extension tables:
OS << "struct { uint8_t Prefix; const WebAssemblyInstruction *Table; }\n"; OS << "struct { uint8_t Prefix; const WebAssemblyInstruction *Table; }\n";
OS << "PrefixTable[] = {\n"; OS << "PrefixTable[] = {\n";