[utils] Refactor utils/update_{,llc_}test_checks.py to share more code

Summary: This revision refactors 1. parser 2. CHECK line adder of utils/update_{,llc_}test_checks.py so that thir functionality can be re-used by other utility scripts (e.g. D42712) Reviewers: asb, craig.topper, RKSimon, echristo Subscribers: llvm-commits, spatel Differential Revision: https://reviews.llvm.org/D42805 llvm-svn: 324803
2018-02-10 05:01:33 +00:00 · 2018-02-10 05:01:33 +00:00 · 4f0f426d1f
parent 3da7205114
commit 4f0f426d1f
4 changed files with 189 additions and 165 deletions
--- a/llvm/utils/UpdateTestChecks/asm.py
+++ b/llvm/utils/UpdateTestChecks/asm.py
@ -1,10 +1,18 @@
 import re
-import string
+import sys

 from . import common

+if sys.version_info[0] > 2:
+  class string:
+    expandtabs = str.expandtabs
+else:
+  import string
+
 # RegEx: this is where the magic happens.

+##### Assembly parser
+
 ASM_FUNCTION_X86_RE = re.compile(
    r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?'
    r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*'
@ -197,3 +205,29 @@ def build_function_body_dictionary_for_triple(args, raw_tool_output, triple, pre
  common.build_function_body_dictionary(
          function_re, scrubber, [args], raw_tool_output, prefixes,
          func_dict, args.verbose)
+
+##### Generator of assembly CHECK lines
+
+def add_asm_checks(output_lines, comment_marker, run_list, func_dict, func_name):
+  printed_prefixes = []
+  for p in run_list:
+    checkprefixes = p[0]
+    for checkprefix in checkprefixes:
+      if checkprefix in printed_prefixes:
+        break
+      # TODO func_dict[checkprefix] may be None, '' or not exist.
+      # Fix the call sites.
+      if func_name not in func_dict[checkprefix] or not func_dict[checkprefix][func_name]:
+        continue
+      # Add some space between different check prefixes.
+      if len(printed_prefixes) != 0:
+        output_lines.append(comment_marker)
+      printed_prefixes.append(checkprefix)
+      output_lines.append('%s %s-LABEL: %s:' % (comment_marker, checkprefix, func_name))
+      func_body = func_dict[checkprefix][func_name].splitlines()
+      output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
+      for func_line in func_body[1:]:
+        output_lines.append('%s %s-NEXT:  %s' % (comment_marker, checkprefix, func_line))
+      # Add space between different check prefixes and the first line of code.
+      # output_lines.append(';')
+      break
--- a/llvm/utils/UpdateTestChecks/common.py
+++ b/llvm/utils/UpdateTestChecks/common.py
@ -1,22 +1,16 @@
 from __future__ import print_function
 import re
+import string
 import subprocess
 import sys

-RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$')
-CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?=(\S+)')
-CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
+if sys.version_info[0] > 2:
+  class string:
+    expandtabs = str.expandtabs
+else:
+  import string

-IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@(\w+)\s*\(')
-TRIPLE_IR_RE = re.compile(r'^target\s+triple\s*=\s*"([^"]+)"$')
-TRIPLE_ARG_RE = re.compile(r'-mtriple=([^ ]+)')
-
-SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
-SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
-SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
-SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
-SCRUB_LOOP_COMMENT_RE = re.compile(
-    r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
+##### Common utilities for update_*test_checks.py

 def should_add_line_to_output(input_line, prefix_set):
  # Skip any blank comment lines in the IR.
@ -42,6 +36,38 @@ def invoke_tool(exe, cmd_args, ir):
  # Fix line endings to unix CR style.
  return stdout.replace('\r\n', '\n')

+##### LLVM IR parser
+
+RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$')
+CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?=(\S+)')
+CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
+
+OPT_FUNCTION_RE = re.compile(
+    r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*\('
+    r'(\s+)?[^)]*[^{]*\{\n(?P<body>.*?)^\}$',
+    flags=(re.M | re.S))
+
+IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@(\w+)\s*\(')
+TRIPLE_IR_RE = re.compile(r'^target\s+triple\s*=\s*"([^"]+)"$')
+TRIPLE_ARG_RE = re.compile(r'-mtriple=([^ ]+)')
+
+SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
+SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
+SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
+SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
+SCRUB_LOOP_COMMENT_RE = re.compile(
+    r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
+
+def scrub_body(body):
+  # Scrub runs of whitespace out of the assembly, but leave the leading
+  # whitespace in place.
+  body = SCRUB_WHITESPACE_RE.sub(r' ', body)
+  # Expand the tabs used for indentation.
+  body = string.expandtabs(body, 2)
+  # Strip trailing whitespace.
+  body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
+  return body
+
 # Build up a dictionary of all the function bodies.
 def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_tool_output, prefixes, func_dict, verbose):
  for m in function_re.finditer(raw_tool_output):
@ -66,3 +92,114 @@ def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_too
          continue

      func_dict[prefix][func] = scrubbed_body
+
+##### Generator of LLVM IR CHECK lines
+
+SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
+
+# Match things that look at identifiers, but only if they are followed by
+# spaces, commas, paren, or end of the string
+IR_VALUE_RE = re.compile(r'(\s+)%([\w\.]+?)([,\s\(\)]|\Z)')
+
+# Create a FileCheck variable name based on an IR name.
+def get_value_name(var):
+  if var.isdigit():
+    var = 'TMP' + var
+  var = var.replace('.', '_')
+  return var.upper()
+
+
+# Create a FileCheck variable from regex.
+def get_value_definition(var):
+  return '[[' + get_value_name(var) + ':%.*]]'
+
+
+# Use a FileCheck variable.
+def get_value_use(var):
+  return '[[' + get_value_name(var) + ']]'
+
+# Replace IR value defs and uses with FileCheck variables.
+def genericize_check_lines(lines):
+  # This gets called for each match that occurs in
+  # a line. We transform variables we haven't seen
+  # into defs, and variables we have seen into uses.
+  def transform_line_vars(match):
+    var = match.group(2)
+    if var in vars_seen:
+      rv = get_value_use(var)
+    else:
+      vars_seen.add(var)
+      rv = get_value_definition(var)
+    # re.sub replaces the entire regex match
+    # with whatever you return, so we have
+    # to make sure to hand it back everything
+    # including the commas and spaces.
+    return match.group(1) + rv + match.group(3)
+
+  vars_seen = set()
+  lines_with_def = []
+
+  for i, line in enumerate(lines):
+    # An IR variable named '%.' matches the FileCheck regex string.
+    line = line.replace('%.', '%dot')
+    # Ignore any comments, since the check lines will too.
+    scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
+    lines[i] =  IR_VALUE_RE.sub(transform_line_vars, scrubbed_line)
+  return lines
+
+
+def add_ir_checks(output_lines, prefix_list, func_dict, func_name, opt_basename):
+  # Label format is based on IR string.
+  check_label_format = "; %s-LABEL: @%s("
+
+  printed_prefixes = []
+  for checkprefixes, _ in prefix_list:
+    for checkprefix in checkprefixes:
+      if checkprefix in printed_prefixes:
+        break
+      if not func_dict[checkprefix][func_name]:
+        continue
+      # Add some space between different check prefixes, but not after the last
+      # check line (before the test code).
+      #if len(printed_prefixes) != 0:
+      #  output_lines.append(';')
+      printed_prefixes.append(checkprefix)
+      output_lines.append(check_label_format % (checkprefix, func_name))
+      func_body = func_dict[checkprefix][func_name].splitlines()
+
+      # For IR output, change all defs to FileCheck variables, so we're immune
+      # to variable naming fashions.
+      func_body = genericize_check_lines(func_body)
+
+      # This could be selectively enabled with an optional invocation argument.
+      # Disabled for now: better to check everything. Be safe rather than sorry.
+
+      # Handle the first line of the function body as a special case because
+      # it's often just noise (a useless asm comment or entry label).
+      #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
+      #  is_blank_line = True
+      #else:
+      #  output_lines.append('; %s:       %s' % (checkprefix, func_body[0]))
+      #  is_blank_line = False
+
+      is_blank_line = False
+
+      for func_line in func_body:
+        if func_line.strip() == '':
+          is_blank_line = True
+          continue
+        # Do not waste time checking IR comments.
+        func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
+
+        # Skip blank lines instead of checking them.
+        if is_blank_line == True:
+          output_lines.append('; %s:       %s' % (checkprefix, func_line))
+        else:
+          output_lines.append('; %s-NEXT:  %s' % (checkprefix, func_line))
+        is_blank_line = False
+
+      # Add space between different check prefixes and also before the first
+      # line of code in the test function.
+      output_lines.append(';')
+      break
+  return output_lines
--- a/llvm/utils/update_llc_test_checks.py
+++ b/llvm/utils/update_llc_test_checks.py
@ -19,30 +19,6 @@ from UpdateTestChecks import asm, common
 ADVERT = '; NOTE: Assertions have been autogenerated by '


-def add_checks(output_lines, run_list, func_dict, func_name):
-  printed_prefixes = []
-  for p in run_list:
-    checkprefixes = p[0]
-    for checkprefix in checkprefixes:
-      if checkprefix in printed_prefixes:
-        break
-      if not func_dict[checkprefix][func_name]:
-        continue
-      # Add some space between different check prefixes.
-      if len(printed_prefixes) != 0:
-        output_lines.append(';')
-      printed_prefixes.append(checkprefix)
-      output_lines.append('; %s-LABEL: %s:' % (checkprefix, func_name))
-      func_body = func_dict[checkprefix][func_name].splitlines()
-      output_lines.append('; %s:       %s' % (checkprefix, func_body[0]))
-      for func_line in func_body[1:]:
-        output_lines.append('; %s-NEXT:  %s' % (checkprefix, func_line))
-      # Add space between different check prefixes and the first line of code.
-      # output_lines.append(';')
-      break
-  return output_lines
-
-
 def main():
  parser = argparse.ArgumentParser(description=__doc__)
  parser.add_argument('-v', '--verbose', action='store_true',
@ -156,7 +132,7 @@ def main():
            continue

        # Print out the various check lines here.
-        output_lines = add_checks(output_lines, run_list, func_dict, func_name)
+        asm.add_asm_checks(output_lines, ';', run_list, func_dict, func_name)
        is_in_function_start = False

      if is_in_function:
--- a/llvm/utils/update_test_checks.py
+++ b/llvm/utils/update_test_checks.py
@ -44,133 +44,10 @@ ADVERT = '; NOTE: Assertions have been autogenerated by '

 # RegEx: this is where the magic happens.

-SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
-
 IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@([\w-]+)\s*\(')
-OPT_FUNCTION_RE = re.compile(
-    r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*\('
-    r'(\s+)?[^)]*[^{]*\{\n(?P<body>.*?)^\}$',
-    flags=(re.M | re.S))
-# Match things that look at identifiers, but only if they are followed by
-# spaces, commas, paren, or end of the string
-IR_VALUE_RE = re.compile(r'(\s+)%([\w\.]+?)([,\s\(\)]|\Z)')



-def scrub_body(body, opt_basename):
-  # Scrub runs of whitespace out of the assembly, but leave the leading
-  # whitespace in place.
-  body = common.SCRUB_WHITESPACE_RE.sub(r' ', body)
-  # Expand the tabs used for indentation.
-  body = string.expandtabs(body, 2)
-  # Strip trailing whitespace.
-  body = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
-  return body
-
-
-
-# Create a FileCheck variable name based on an IR name.
-def get_value_name(var):
-  if var.isdigit():
-    var = 'TMP' + var
-  var = var.replace('.', '_')
-  return var.upper()
-
-
-# Create a FileCheck variable from regex.
-def get_value_definition(var):
-  return '[[' + get_value_name(var) + ':%.*]]'
-
-
-# Use a FileCheck variable.
-def get_value_use(var):
-  return '[[' + get_value_name(var) + ']]'
-
-# Replace IR value defs and uses with FileCheck variables.
-def genericize_check_lines(lines):
-  # This gets called for each match that occurs in
-  # a line. We transform variables we haven't seen
-  # into defs, and variables we have seen into uses.
-  def transform_line_vars(match):
-    var = match.group(2)
-    if var in vars_seen:
-      rv = get_value_use(var)
-    else:
-      vars_seen.add(var)
-      rv = get_value_definition(var)
-    # re.sub replaces the entire regex match
-    # with whatever you return, so we have
-    # to make sure to hand it back everything
-    # including the commas and spaces.
-    return match.group(1) + rv + match.group(3)
-
-  vars_seen = set()
-  lines_with_def = []
-
-  for i, line in enumerate(lines):
-    # An IR variable named '%.' matches the FileCheck regex string.
-    line = line.replace('%.', '%dot')
-    # Ignore any comments, since the check lines will too.
-    scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
-    lines[i] =  IR_VALUE_RE.sub(transform_line_vars, scrubbed_line)
-  return lines
-
-
-def add_checks(output_lines, prefix_list, func_dict, func_name, opt_basename):
-  # Label format is based on IR string.
-  check_label_format = "; %s-LABEL: @%s("
-
-  printed_prefixes = []
-  for checkprefixes, _ in prefix_list:
-    for checkprefix in checkprefixes:
-      if checkprefix in printed_prefixes:
-        break
-      if not func_dict[checkprefix][func_name]:
-        continue
-      # Add some space between different check prefixes, but not after the last
-      # check line (before the test code).
-      #if len(printed_prefixes) != 0:
-      #  output_lines.append(';')
-      printed_prefixes.append(checkprefix)
-      output_lines.append(check_label_format % (checkprefix, func_name))
-      func_body = func_dict[checkprefix][func_name].splitlines()
-
-      # For IR output, change all defs to FileCheck variables, so we're immune
-      # to variable naming fashions.
-      func_body = genericize_check_lines(func_body)
-
-      # This could be selectively enabled with an optional invocation argument.
-      # Disabled for now: better to check everything. Be safe rather than sorry.
-
-      # Handle the first line of the function body as a special case because
-      # it's often just noise (a useless asm comment or entry label).
-      #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
-      #  is_blank_line = True
-      #else:
-      #  output_lines.append('; %s:       %s' % (checkprefix, func_body[0]))
-      #  is_blank_line = False
-
-      is_blank_line = False
-
-      for func_line in func_body:
-        if func_line.strip() == '':
-          is_blank_line = True
-          continue
-        # Do not waste time checking IR comments.
-        func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
-
-        # Skip blank lines instead of checking them.
-        if is_blank_line == True:
-          output_lines.append('; %s:       %s' % (checkprefix, func_line))
-        else:
-          output_lines.append('; %s-NEXT:  %s' % (checkprefix, func_line))
-        is_blank_line = False
-
-      # Add space between different check prefixes and also before the first
-      # line of code in the test function.
-      output_lines.append(';')
-      break
-  return output_lines


 def main():
@ -247,8 +124,8 @@ def main():

      raw_tool_output = common.invoke_tool(args.opt_binary, opt_args, test)
      common.build_function_body_dictionary(
-              OPT_FUNCTION_RE, scrub_body, [opt_basename], raw_tool_output,
-              prefixes, func_dict, args.verbose)
+              common.OPT_FUNCTION_RE, common.scrub_body, [],
+              raw_tool_output, prefixes, func_dict, args.verbose)

    is_in_function = False
    is_in_function_start = False
@ -269,7 +146,7 @@ def main():
            continue

        # Print out the various check lines here.
-        output_lines = add_checks(output_lines, prefix_list, func_dict, func_name, opt_basename)
+        output_lines = common.add_ir_checks(output_lines, prefix_list, func_dict, func_name, opt_basename)
        is_in_function_start = False

      if is_in_function: