[TableGen] Add the !substr() bang operator

Update the documentation and add a test. Build failed: Change SIZE_MAX to std::numeric_limits<int64_t>::max(). Differential Revision: https://reviews.llvm.org/D93419
2020-12-16 09:55:16 -05:00 · 2020-12-16 09:55:16 -05:00 · e122a71a0a
parent 9fb074e7bb
commit e122a71a0a
8 changed files with 216 additions and 10 deletions
--- a/llvm/docs/TableGen/ProgRef.rst
+++ b/llvm/docs/TableGen/ProgRef.rst
@ -216,7 +216,8 @@ TableGen provides "bang operators" that have a wide variety of uses:
               : !interleave !isa         !le          !listconcat  !listsplat
               : !lt         !mul         !ne          !not         !or
               : !setdagop   !shl         !size        !sra         !srl
-               : !strconcat  !sub         !subst       !tail        !xor
+               : !strconcat  !sub         !subst       !substr      !tail
+               : !xor

 The ``!cond`` operator has a slightly different
 syntax compared to other bang operators, so it is defined separately:
@ -1723,6 +1724,13 @@ and non-0 as true.
    record if the *target* record name equals the *value* record name; otherwise it
    produces the *value*.

+``!substr(``\ *string*\ ``,`` *start*\ [``,`` *length*]\ ``)``
+    This operator extracts a substring of the given *string*. The starting
+    position of the substring is specified by *start*, which can range
+    between 0 and the length of the string. The length of the substring
+    is specified by *length*; if not specified, the rest of the string is
+    extracted. The *start* and *length* arguments must be integers.
+
 ``!tail(``\ *a*\ ``)``
    This operator produces a new list with all the elements
    of the list *a* except for the zeroth one. (See also ``!head``.)
--- a/llvm/include/llvm/TableGen/Record.h
+++ b/llvm/include/llvm/TableGen/Record.h
@ -829,7 +829,7 @@ public:
 /// !op (X, Y, Z) - Combine two inits.
 class TernOpInit : public OpInit, public FoldingSetNode {
 public:
-  enum TernaryOp : uint8_t { SUBST, FOREACH, FILTER, IF, DAG };
+  enum TernaryOp : uint8_t { SUBST, FOREACH, FILTER, IF, DAG, SUBSTR };

 private:
  Init *LHS, *MHS, *RHS;
--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@ -1325,6 +1325,27 @@ Init *TernOpInit::Fold(Record *CurRec) const {
    }
    break;
  }
+
+  case SUBSTR: {
+    StringInit *LHSs = dyn_cast<StringInit>(LHS);
+    IntInit *MHSi = dyn_cast<IntInit>(MHS);
+    IntInit *RHSi = dyn_cast<IntInit>(RHS);
+    if (LHSs && MHSi && RHSi) {
+      int64_t StringSize = LHSs->getValue().size();
+      int64_t Start = MHSi->getValue();
+      int64_t Length = RHSi->getValue();
+      if (Start < 0 || Start > StringSize)
+        PrintError(CurRec->getLoc(),
+                   Twine("!substr start position is out of range 0...") +
+                       std::to_string(StringSize) + ": " +
+                       std::to_string(Start));
+      if (Length < 0)
+        PrintError(CurRec->getLoc(), "!substr length must be nonnegative");
+      return StringInit::get(LHSs->getValue().substr(Start, Length),
+                             LHSs->getFormat());
+    }
+    break;
+  }
  }

  return const_cast<TernOpInit *>(this);
@ -1364,11 +1385,12 @@ std::string TernOpInit::getAsString() const {
  std::string Result;
  bool UnquotedLHS = false;
  switch (getOpcode()) {
-  case SUBST: Result = "!subst"; break;
-  case FOREACH: Result = "!foreach"; UnquotedLHS = true; break;
-  case FILTER: Result = "!filter"; UnquotedLHS = true; break;
-  case IF: Result = "!if"; break;
  case DAG: Result = "!dag"; break;
+  case FILTER: Result = "!filter"; UnquotedLHS = true; break;
+  case FOREACH: Result = "!foreach"; UnquotedLHS = true; break;
+  case IF: Result = "!if"; break;
+  case SUBST: Result = "!subst"; break;
+  case SUBSTR: Result = "!substr"; break;
  }
  return (Result + "(" +
          (UnquotedLHS ? LHS->getAsUnquotedString() : LHS->getAsString()) +
--- a/llvm/lib/TableGen/TGLexer.cpp
+++ b/llvm/lib/TableGen/TGLexer.cpp
@ -589,6 +589,7 @@ tgtok::TokKind TGLexer::LexExclaim() {
    .Case("listsplat", tgtok::XListSplat)
    .Case("strconcat", tgtok::XStrConcat)
    .Case("interleave", tgtok::XInterleave)
+    .Case("substr", tgtok::XSubstr)
    .Cases("setdagop", "setop", tgtok::XSetDagOp) // !setop is deprecated.
    .Cases("getdagop", "getop", tgtok::XGetDagOp) // !getop is deprecated.
    .Default(tgtok::Error);
--- a/llvm/lib/TableGen/TGLexer.h
+++ b/llvm/lib/TableGen/TGLexer.h
@ -53,9 +53,9 @@ namespace tgtok {

    // Bang operators.
    XConcat, XADD, XSUB, XMUL, XNOT, XAND, XOR, XXOR, XSRA, XSRL, XSHL,
-    XListConcat, XListSplat, XStrConcat, XInterleave, XCast, XSubst, XForEach,
-    XFilter, XFoldl, XHead, XTail, XSize, XEmpty, XIf, XCond, XEq, XIsA,
-    XDag, XNe, XLe, XLt, XGe, XGt, XSetDagOp, XGetDagOp,
+    XListConcat, XListSplat, XStrConcat, XInterleave, XSubstr, XCast,
+    XSubst, XForEach, XFilter, XFoldl, XHead, XTail, XSize, XEmpty, XIf,
+    XCond, XEq, XIsA, XDag, XNe, XLe, XLt, XGe, XGt, XSetDagOp, XGetDagOp,

    // Boolean literals.
    TrueVal, FalseVal,
--- a/llvm/lib/TableGen/TGParser.cpp
+++ b/llvm/lib/TableGen/TGParser.cpp
@ -25,6 +25,7 @@
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
+#include <limits>

 using namespace llvm;

@ -1496,6 +1497,9 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
    return (TernOpInit::get(Code, LHS, MHS, RHS, Type))->Fold(CurRec);
  }

+  case tgtok::XSubstr:
+    return ParseOperationSubstr(CurRec, ItemType);
+
  case tgtok::XCond:
    return ParseOperationCond(CurRec, ItemType);

@ -1655,6 +1659,94 @@ RecTy *TGParser::ParseOperatorType() {
  return Type;
 }

+/// Parse the !substr operation. Return null on error.
+///
+/// Substr ::= !substr(string, start-int [, length-int]) => string
+Init *TGParser::ParseOperationSubstr(Record *CurRec, RecTy *ItemType) {
+  TernOpInit::TernaryOp Code = TernOpInit::SUBSTR;
+  RecTy *Type = StringRecTy::get();
+
+  Lex.Lex(); // eat the operation
+
+  if (!consume(tgtok::l_paren)) {
+    TokError("expected '(' after !substr operator");
+    return nullptr;
+  }
+
+  Init *LHS = ParseValue(CurRec);
+  if (!LHS)
+    return nullptr;
+
+  if (!consume(tgtok::comma)) {
+    TokError("expected ',' in !substr operator");
+    return nullptr;
+  }
+
+  SMLoc MHSLoc = Lex.getLoc();
+  Init *MHS = ParseValue(CurRec);
+  if (!MHS)
+    return nullptr;
+
+  SMLoc RHSLoc = Lex.getLoc();
+  Init *RHS;
+  if (consume(tgtok::comma)) {
+    RHSLoc = Lex.getLoc();
+    RHS = ParseValue(CurRec);
+    if (!RHS)
+      return nullptr;
+  } else {
+    RHS = IntInit::get(std::numeric_limits<int64_t>::max());
+  }
+
+  if (!consume(tgtok::r_paren)) {
+    TokError("expected ')' in !substr operator");
+    return nullptr;
+  }
+
+  if (ItemType && !Type->typeIsConvertibleTo(ItemType)) {
+    Error(RHSLoc, Twine("expected value of type '") +
+                  ItemType->getAsString() + "', got '" +
+                  Type->getAsString() + "'");
+  }
+
+  TypedInit *LHSt = dyn_cast<TypedInit>(LHS);
+  if (!LHSt && !isa<UnsetInit>(LHS)) {
+    TokError("could not determine type of the string in !substr");
+    return nullptr;
+  }
+  if (LHSt && !isa<StringRecTy>(LHSt->getType())) {
+    TokError(Twine("expected string, got type '") +
+             LHSt->getType()->getAsString() + "'");
+    return nullptr;
+  }
+
+  TypedInit *MHSt = dyn_cast<TypedInit>(MHS);
+  if (!MHSt && !isa<UnsetInit>(MHS)) {
+    TokError("could not determine type of the start position in !substr");
+    return nullptr;
+  }
+  if (MHSt && !isa<IntRecTy>(MHSt->getType())) {
+    Error(MHSLoc, Twine("expected int, got type '") +
+                      MHSt->getType()->getAsString() + "'");
+    return nullptr;
+  }
+
+  if (RHS) {
+    TypedInit *RHSt = dyn_cast<TypedInit>(RHS);
+    if (!RHSt && !isa<UnsetInit>(RHS)) {
+      TokError("could not determine type of the length in !substr");
+      return nullptr;
+    }
+    if (RHSt && !isa<IntRecTy>(RHSt->getType())) {
+      TokError(Twine("expected int, got type '") +
+               RHSt->getType()->getAsString() + "'");
+      return nullptr;
+    }
+  }
+
+  return (TernOpInit::get(Code, LHS, MHS, RHS, Type))->Fold(CurRec);
+}
+
 /// Parse the !foreach and !filter operations. Return null on error.
 ///
 /// ForEach ::= !foreach(ID, list-or-dag, expr) => list<expr type>
@ -2206,7 +2298,8 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
  case tgtok::XFoldl:
  case tgtok::XForEach:
  case tgtok::XFilter:
-  case tgtok::XSubst: { // Value ::= !ternop '(' Value ',' Value ',' Value ')'
+  case tgtok::XSubst:
+  case tgtok::XSubstr: { // Value ::= !ternop '(' Value ',' Value ',' Value ')'
    return ParseOperation(CurRec, ItemType);
  }
  }
--- a/llvm/lib/TableGen/TGParser.h
+++ b/llvm/lib/TableGen/TGParser.h
@ -254,6 +254,7 @@ private:  // Parser methods.
                       TypedInit *FirstItem = nullptr);
  RecTy *ParseType();
  Init *ParseOperation(Record *CurRec, RecTy *ItemType);
+  Init *ParseOperationSubstr(Record *CurRec, RecTy *ItemType);
  Init *ParseOperationForEachFilter(Record *CurRec, RecTy *ItemType);
  Init *ParseOperationCond(Record *CurRec, RecTy *ItemType);
  RecTy *ParseOperatorType();
--- a/llvm/test/TableGen/substr.td
+++ b/llvm/test/TableGen/substr.td
@ -0,0 +1,81 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+// RUN: not llvm-tblgen -DERROR1 %s 2>&1 | FileCheck --check-prefix=ERROR1 %s
+
+defvar claim = "This is the end of the world!";
+
+// CHECK: def Rec1
+// CHECK:   fullNoLength = "This is the end of the world!";
+// CHECK:   fullLength = "This is the end of the world!";
+// CHECK:   thisIsTheEnd = "This is the end";
+// CHECK:   DoorsSong = "the end";
+// CHECK:   finalNoLength = "end of the world!";
+// CHECK:   finalLength = "end of the world!";
+
+def Rec1 {
+  string fullNoLength = !substr(claim, 0);
+  string fullLength = !substr(claim, 0, 999);
+  string thisIsTheEnd = !substr(claim, 0, 15);
+  string DoorsSong = !substr(claim, 8, 7);
+  string finalNoLength = !substr(claim, 12);
+  string finalLength = !substr(claim, 12, !sub(!size(claim), 12));
+}
+
+// CHECK: def Rec2 {
+// CHECK:   lastName = "Flintstone";
+
+def Rec2 {
+  string firstName = "Fred";
+  string name = firstName # " " # "Flintstone";
+  string lastName = !substr(name, !add(!size(firstName), 1));
+}
+
+// CHECK: def Rec3 {
+// CHECK:   test1 = "";
+// CHECK:   test2 = "";
+// CHECK:   test3 = "";
+// CHECK:   test4 = "h";
+// CHECK:   test5 = "hello";
+// CHECK:   test6 = "";
+
+def Rec3 {
+  string test1 = !substr("", 0, 0);
+  string test2 = !substr("", 0, 9);
+  string test3 = !substr("hello", 0, 0);
+  string test4 = !substr("hello", 0, 1);
+  string test5 = !substr("hello", 0, 99);
+  string test6 = !substr("hello", 5, 99);
+}
+
+// CHECK: def Rec4
+// CHECK:   message = "This is the end of the world!";
+// CHECK:   messagePrefix = "This is th...";
+// CHECK:   warning = "Bad message: 'This is th...'";
+
+class C<string msg> {
+  string message = msg;
+  string messagePrefix = !substr(message, 0, 10) # "...";
+}
+
+def Rec4 : C<claim> {
+  string warning = "Bad message: '" # messagePrefix # "'";
+}
+
+#ifdef ERROR1
+
+// ERROR1: expected string, got type 'int'
+// ERROR1: expected int, got type 'bits<3>'
+// ERROR1: expected int, got type 'string'
+// ERROR1: !substr start position is out of range 0...29: 30
+// ERROR1: !substr length must be nonnegative
+
+def Rec8 {
+  string claim1 = !substr(42, 0, 3);
+  string claim2 = !substr(claim, 0b101);
+  string claim3 = !substr(claim, 0, "oops");
+}
+
+def Rec9 {
+  string claim1 = !substr(claim, !add(!size(claim), 1));
+  string claim2 = !substr(claim, 0, -13);
+}
+#endif