From b99365a7f4a1c348f1237937e9775b3ce8441e08 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <minyihh@uci.edu>
Date: Tue, 14 Dec 2021 20:59:22 +0800
Subject: [PATCH] [TableGen] Add a new `encoder` directive into
 VarLenCodeEmitterGen

The new encoder directive can be used to specify custom encoder for a
single operand or slice. This is different from the EncoderMethod field
within an Operand, which affects every operands in the target.

In addition, this patch also changes the function signature of the
encoder method -- a new argument, InsertPost, is added to both the
default one (i.e. getMachineValue) and the custom one. This argument
provides the bit position where the operand will eventually be inserted.

Differential Revision: https://reviews.llvm.org/D119100
---
 llvm/include/llvm/Target/Target.td            |  5 ++
 .../M68k/MCTargetDesc/M68kMCCodeEmitter.cpp   |  5 +-
 llvm/test/TableGen/VarLenEncoder.td           | 34 ++++----
 llvm/utils/TableGen/VarLenCodeEmitterGen.cpp  | 78 +++++++++++++------
 4 files changed, 82 insertions(+), 40 deletions(-)
diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td
index f457e5e84409..d39016c7d2a4 100644
--- a/llvm/include/llvm/Target/Target.td
+++ b/llvm/include/llvm/Target/Target.td
@@ -778,6 +778,11 @@ def operand;
 /// Both DAG represent bit 6 to 8 (total of 3 bits) in the encoding of operand
 /// `$src`.
 def slice;
+/// You can use `encoder` to specify a custom encoder function for a specific
+/// `operand` or `encoder` directive. For example:
+///     (operand "$src", 4, (encoder "encodeMyImm"))
+///     (slice "$src", 8, 6, (encoder "encodeMyReg"))
+def encoder;
 
 /// PointerLikeRegClass - Values that are designed to have pointer width are
 /// derived from this.  TableGen treats the register class as having a symbolic
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp
index e4ecd3b41824..c90126e52db0 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp
@@ -43,7 +43,8 @@ class M68kMCCodeEmitter : public MCCodeEmitter {
                              APInt &Inst, APInt &Scratch,
                              const MCSubtargetInfo &STI) const;
 
-  void getMachineOpValue(const MCInst &MI, const MCOperand &Op, APInt &Value,
+  void getMachineOpValue(const MCInst &MI, const MCOperand &Op,
+                         unsigned InsertPos, APInt &Value,
                          SmallVectorImpl<MCFixup> &Fixups,
                          const MCSubtargetInfo &STI) const;
 
@@ -83,7 +84,7 @@ public:
 #include "M68kGenMCCodeEmitter.inc"
 
 void M68kMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &Op,
-                                          APInt &Value,
+                                          unsigned InsertPos, APInt &Value,
                                           SmallVectorImpl<MCFixup> &Fixups,
                                           const MCSubtargetInfo &STI) const {
   // Register
diff --git a/llvm/test/TableGen/VarLenEncoder.td b/llvm/test/TableGen/VarLenEncoder.td
index a1ea389ffad2..58dee97aa6ff 100644
--- a/llvm/test/TableGen/VarLenEncoder.td
+++ b/llvm/test/TableGen/VarLenEncoder.td
@@ -35,7 +35,9 @@ class MyVarInst<MyMemOperand memory_op> : Instruction {
     // Testing operand referencing.
     (operand "$dst", 4),
     // Testing operand referencing with a certain bit range.
-    (slice "$dst", 3, 1)
+    (slice "$dst", 3, 1),
+    // Testing custom encoder
+    (operand "$dst", 2, (encoder "myCustomEncoder"))
   );
 }
 
@@ -55,39 +57,45 @@ def FOO16 : MyVarInst<MemOp16<"src">>;
 def FOO32 : MyVarInst<MemOp32<"src">>;
 
 // The fixed bits part
-// CHECK: {/*NumBits*/39,
+// CHECK: {/*NumBits*/41,
 // CHECK-SAME: // FOO16
-// CHECK: {/*NumBits*/55,
+// CHECK: {/*NumBits*/57,
 // CHECK-SAME: // FOO32
 // CHECK: UINT64_C(46848), // FOO16
 // CHECK: UINT64_C(46848), // FOO32
 
 // CHECK-LABEL: case ::FOO16: {
-// CHECK: Scratch = Scratch.zextOrSelf(39);
+// CHECK: Scratch = Scratch.zextOrSelf(41);
 // src.reg
-// CHECK: getMachineOpValue(MI, MI.getOperand(1), Scratch, Fixups, STI);
+// CHECK: getMachineOpValue(MI, MI.getOperand(1), /*Pos=*/0, Scratch, Fixups, STI);
 // CHECK: Inst.insertBits(Scratch.extractBits(8, 0), 0);
 // src.offset
-// CHECK: getMachineOpValue(MI, MI.getOperand(2), Scratch, Fixups, STI);
+// CHECK: getMachineOpValue(MI, MI.getOperand(2), /*Pos=*/16, Scratch, Fixups, STI);
 // CHECK: Inst.insertBits(Scratch.extractBits(16, 0), 16);
 // 1st dst
-// CHECK: getMachineOpValue(MI, MI.getOperand(0), Scratch, Fixups, STI);
+// CHECK: getMachineOpValue(MI, MI.getOperand(0), /*Pos=*/32, Scratch, Fixups, STI);
 // CHECK: Inst.insertBits(Scratch.extractBits(4, 0), 32);
 // 2nd dst
-// CHECK: getMachineOpValue(MI, MI.getOperand(0), Scratch, Fixups, STI);
+// CHECK: getMachineOpValue(MI, MI.getOperand(0), /*Pos=*/36, Scratch, Fixups, STI);
 // CHECK: Inst.insertBits(Scratch.extractBits(3, 1), 36);
+// dst w/ custom encoder
+// CHECK: myCustomEncoder(MI, /*OpIdx=*/0, /*Pos=*/39, Scratch, Fixups, STI);
+// CHECK: Inst.insertBits(Scratch.extractBits(2, 0), 39);
 
 // CHECK-LABEL: case ::FOO32: {
-// CHECK: Scratch = Scratch.zextOrSelf(55);
+// CHECK: Scratch = Scratch.zextOrSelf(57);
 // src.reg
-// CHECK: getMachineOpValue(MI, MI.getOperand(1), Scratch, Fixups, STI);
+// CHECK: getMachineOpValue(MI, MI.getOperand(1), /*Pos=*/0, Scratch, Fixups, STI);
 // CHECK: Inst.insertBits(Scratch.extractBits(8, 0), 0);
 // src.offset
-// CHECK: getMachineOpValue(MI, MI.getOperand(2), Scratch, Fixups, STI);
+// CHECK: getMachineOpValue(MI, MI.getOperand(2), /*Pos=*/16, Scratch, Fixups, STI);
 // CHECK: Inst.insertBits(Scratch.extractBits(32, 0), 16);
 // 1st dst
-// CHECK: getMachineOpValue(MI, MI.getOperand(0), Scratch, Fixups, STI);
+// CHECK: getMachineOpValue(MI, MI.getOperand(0), /*Pos=*/48, Scratch, Fixups, STI);
 // CHECK: Inst.insertBits(Scratch.extractBits(4, 0), 48);
 // 2nd dst
-// CHECK: getMachineOpValue(MI, MI.getOperand(0), Scratch, Fixups, STI);
+// CHECK: getMachineOpValue(MI, MI.getOperand(0), /*Pos=*/52, Scratch, Fixups, STI);
 // CHECK: Inst.insertBits(Scratch.extractBits(3, 1), 52);
+// dst w/ custom encoder
+// CHECK: myCustomEncoder(MI, /*OpIdx=*/0, /*Pos=*/55, Scratch, Fixups, STI);
+// CHECK: Inst.insertBits(Scratch.extractBits(2, 0), 55);
diff --git a/llvm/utils/TableGen/VarLenCodeEmitterGen.cpp b/llvm/utils/TableGen/VarLenCodeEmitterGen.cpp
index 832c9053ffb9..90c97a069d2f 100644
--- a/llvm/utils/TableGen/VarLenCodeEmitterGen.cpp
+++ b/llvm/utils/TableGen/VarLenCodeEmitterGen.cpp
@@ -66,17 +66,30 @@ namespace {
 class VarLenCodeEmitterGen {
   RecordKeeper &Records;
 
+  struct EncodingSegment {
+    unsigned BitWidth;
+    const Init *Value;
+    StringRef CustomEncoder = "";
+  };
+
   class VarLenInst {
     size_t NumBits;
 
     // Set if any of the segment is not fixed value.
     bool HasDynamicSegment;
 
-    // {Number of bits, Value}
-    SmallVector<std::pair<unsigned, const Init *>, 4> Segments;
+    SmallVector<EncodingSegment, 4> Segments;
 
     void buildRec(const DagInit *DI);
 
+    StringRef getCustomEncoderName(const Init *EI) const {
+      if (const auto *DI = dyn_cast<DagInit>(EI)) {
+        if (DI->getNumArgs() && isa<StringInit>(DI->getArg(0)))
+          return cast<StringInit>(DI->getArg(0))->getValue();
+      }
+      return "";
+    }
+
   public:
     VarLenInst() : NumBits(0U), HasDynamicSegment(false) {}
 
@@ -117,7 +130,7 @@ public:
 VarLenCodeEmitterGen::VarLenInst::VarLenInst(const DagInit *DI) : NumBits(0U) {
   buildRec(DI);
   for (const auto &S : Segments)
-    NumBits += S.first;
+    NumBits += S.BitWidth;
 }
 
 void VarLenCodeEmitterGen::VarLenInst::buildRec(const DagInit *DI) {
@@ -146,9 +159,9 @@ void VarLenCodeEmitterGen::VarLenInst::buildRec(const DagInit *DI) {
       }
     }
   } else if (Op == "operand") {
-    // (operand <operand name>, <# of bits>)
-    if (DI->getNumArgs() != 2)
-      PrintFatalError("Expecting 2 arguments for `operand`");
+    // (operand <operand name>, <# of bits>, [(encoder <custom encoder>)])
+    if (DI->getNumArgs() < 2)
+      PrintFatalError("Expecting at least 2 arguments for `operand`");
     HasDynamicSegment = true;
     const Init *OperandName = DI->getArg(0), *NumBits = DI->getArg(1);
     if (!isa<StringInit>(OperandName) || !isa<IntInit>(NumBits))
@@ -158,11 +171,16 @@ void VarLenCodeEmitterGen::VarLenInst::buildRec(const DagInit *DI) {
     if (NumBitsVal <= 0)
       PrintFatalError("Invalid number of bits for `operand`");
 
-    Segments.push_back({NumBitsVal, OperandName});
+    StringRef CustomEncoder;
+    if (DI->getNumArgs() >= 3)
+      CustomEncoder = getCustomEncoderName(DI->getArg(2));
+    Segments.push_back(
+        {static_cast<unsigned>(NumBitsVal), OperandName, CustomEncoder});
   } else if (Op == "slice") {
-    // (slice <operand name>, <high / low bit>, <low / high bit>)
-    if (DI->getNumArgs() != 3)
-      PrintFatalError("Expecting 3 arguments for `slice`");
+    // (slice <operand name>, <high / low bit>, <low / high bit>,
+    //        [(encoder <custom encoder>)])
+    if (DI->getNumArgs() < 3)
+      PrintFatalError("Expecting at least 3 arguments for `slice`");
     HasDynamicSegment = true;
     Init *OperandName = DI->getArg(0), *HiBit = DI->getArg(1),
          *LoBit = DI->getArg(2);
@@ -183,13 +201,18 @@ void VarLenCodeEmitterGen::VarLenInst::buildRec(const DagInit *DI) {
       NumBits = static_cast<unsigned>(HiBitVal - LoBitVal + 1);
     }
 
+    StringRef CustomEncoder;
+    if (DI->getNumArgs() >= 4)
+      CustomEncoder = getCustomEncoderName(DI->getArg(3));
+
     if (NeedSwap) {
       // Normalization: Hi bit should always be the second argument.
       Init *const NewArgs[] = {OperandName, LoBit, HiBit};
-      Segments.push_back(
-          {NumBits, DagInit::get(DI->getOperator(), nullptr, NewArgs, {})});
+      Segments.push_back({NumBits,
+                          DagInit::get(DI->getOperator(), nullptr, NewArgs, {}),
+                          CustomEncoder});
     } else {
-      Segments.push_back({NumBits, DI});
+      Segments.push_back({NumBits, DI, CustomEncoder});
     }
   }
 }
@@ -372,14 +395,14 @@ void VarLenCodeEmitterGen::emitInstructionBaseValues(
     auto SI = VLI.begin(), SE = VLI.end();
     // Scan through all the segments that have fixed-bits values.
     while (i < BitWidth && SI != SE) {
-      unsigned SegmentNumBits = SI->first;
-      if (const auto *BI = dyn_cast<BitsInit>(SI->second)) {
+      unsigned SegmentNumBits = SI->BitWidth;
+      if (const auto *BI = dyn_cast<BitsInit>(SI->Value)) {
         for (unsigned Idx = 0U; Idx != SegmentNumBits; ++Idx) {
           auto *B = cast<BitInit>(BI->getBit(Idx));
           Value.setBitVal(i + Idx, B->getValue());
         }
       }
-      if (const auto *BI = dyn_cast<BitInit>(SI->second))
+      if (const auto *BI = dyn_cast<BitInit>(SI->Value))
         Value.setBitVal(i, BI->getValue());
 
       i += SegmentNumBits;
@@ -439,9 +462,9 @@ std::string VarLenCodeEmitterGen::getInstructionCaseForEncoding(
 
   // Process each segment in VLI.
   size_t Offset = 0U;
-  for (const auto &Pair : VLI) {
-    unsigned NumBits = Pair.first;
-    const Init *Val = Pair.second;
+  for (const auto &ES : VLI) {
+    unsigned NumBits = ES.BitWidth;
+    const Init *Val = ES.Value;
     // If it's a StringInit or DagInit, it's a reference to an operand
     // or part of an operand.
     if (isa<StringInit>(Val) || isa<DagInit>(Val)) {
@@ -458,15 +481,20 @@ std::string VarLenCodeEmitterGen::getInstructionCaseForEncoding(
 
       auto OpIdx = CGI.Operands.ParseOperandName(OperandName);
       unsigned FlatOpIdx = CGI.Operands.getFlattenedOperandNumber(OpIdx);
-      StringRef EncoderMethodName = "getMachineOpValue";
-      auto &CustomEncoder = CGI.Operands[OpIdx.first].EncoderMethodName;
-      if (!CustomEncoder.empty())
-        EncoderMethodName = CustomEncoder;
+      StringRef CustomEncoder = CGI.Operands[OpIdx.first].EncoderMethodName;
+      if (ES.CustomEncoder.size())
+        CustomEncoder = ES.CustomEncoder;
 
       SS.indent(6) << "Scratch.clearAllBits();\n";
       SS.indent(6) << "// op: " << OperandName.drop_front(1) << "\n";
-      SS.indent(6) << EncoderMethodName << "(MI, MI.getOperand("
-                   << utostr(FlatOpIdx) << "), Scratch, Fixups, STI);\n";
+      if (CustomEncoder.empty())
+        SS.indent(6) << "getMachineOpValue(MI, MI.getOperand("
+                     << utostr(FlatOpIdx) << ")";
+      else
+        SS.indent(6) << CustomEncoder << "(MI, /*OpIdx=*/" << utostr(FlatOpIdx);
+
+      SS << ", /*Pos=*/" << utostr(Offset) << ", Scratch, Fixups, STI);\n";
+
       SS.indent(6) << "Inst.insertBits("
                    << "Scratch.extractBits(" << utostr(NumBits) << ", "
                    << utostr(LoBit) << ")"