[UpdateTestChecks] Prevent rapid onset insanity when forced to write LoopVectorize-driven costmodel tests

Subj, or on other words, we have a lot of tests that are driven by the LoopVectorizer's debug output, but we don't have any meaningful way to autogenerate checklines in them, which means that an insurmountable amount of manual work is required when modifying the appropriate cost models. That is not sustainable, so this presents a solution. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D121133
2022-04-15 15:37:29 +03:00 · 2022-04-15 15:37:29 +03:00 · 8fbed6870b
parent 4dba3d4c53
commit 8fbed6870b
5 changed files with 120 additions and 4 deletions
--- a/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/x86-loopvectorize-costmodel.ll
+++ b/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/x86-loopvectorize-costmodel.ll
@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load float, float\* %in0, align 4"
+; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX512
+; REQUIRES: asserts
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@A = global [1024 x float] zeroinitializer, align 128
+@B = global [1024 x i8] zeroinitializer, align 128
+
+define void @test() {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+
+  %iv.0 = add nuw nsw i64 %iv, 0
+  %iv.1 = add nuw nsw i64 %iv, 1
+
+  %in0 = getelementptr inbounds [1024 x float], [1024 x float]* @A, i64 0, i64 %iv.0
+  %in1 = getelementptr inbounds [1024 x float], [1024 x float]* @A, i64 0, i64 %iv.1
+
+  %v0 = load float, float* %in0
+  %v1 = load float, float* %in1
+
+  %reduce.add.0 = fadd float %v0, %v1
+
+  %reduce.add.0.narrow = fptoui float %reduce.add.0 to i8
+
+  %out = getelementptr inbounds [1024 x i8], [1024 x i8]* @B, i64 0, i64 %iv.0
+  store i8 %reduce.add.0.narrow, i8* %out
+
+  %iv.next = add nuw nsw i64 %iv.0, 2
+  %cmp = icmp ult i64 %iv.next, 1024
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+}
--- a/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/x86-loopvectorize-costmodel.ll.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/x86-loopvectorize-costmodel.ll.expected
@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load float, float\* %in0, align 4"
+; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX512
+; REQUIRES: asserts
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@A = global [1024 x float] zeroinitializer, align 128
+@B = global [1024 x i8] zeroinitializer, align 128
+
+define void @test() {
+; CHECK-LABEL: 'test'
+; CHECK:  LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, float* %in0, align 4
+; CHECK:  LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, float* %in0, align 4
+; CHECK:  LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load float, float* %in0, align 4
+; CHECK:  LV: Found an estimated cost of 3 for VF 8 For instruction: %v0 = load float, float* %in0, align 4
+; CHECK:  LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load float, float* %in0, align 4
+; CHECK:  LV: Found an estimated cost of 22 for VF 32 For instruction: %v0 = load float, float* %in0, align 4
+; CHECK:  LV: Found an estimated cost of 92 for VF 64 For instruction: %v0 = load float, float* %in0, align 4
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+
+  %iv.0 = add nuw nsw i64 %iv, 0
+  %iv.1 = add nuw nsw i64 %iv, 1
+
+  %in0 = getelementptr inbounds [1024 x float], [1024 x float]* @A, i64 0, i64 %iv.0
+  %in1 = getelementptr inbounds [1024 x float], [1024 x float]* @A, i64 0, i64 %iv.1
+
+  %v0 = load float, float* %in0
+  %v1 = load float, float* %in1
+
+  %reduce.add.0 = fadd float %v0, %v1
+
+  %reduce.add.0.narrow = fptoui float %reduce.add.0 to i8
+
+  %out = getelementptr inbounds [1024 x i8], [1024 x i8]* @B, i64 0, i64 %iv.0
+  store i8 %reduce.add.0.narrow, i8* %out
+
+  %iv.next = add nuw nsw i64 %iv.0, 2
+  %cmp = icmp ult i64 %iv.next, 1024
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+}
--- a/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/loopvectorize-costmodel.test
+++ b/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/loopvectorize-costmodel.test
@ -0,0 +1,13 @@
+# REQUIRES: x86-registered-target
+
+## Check that --filter works properly.
+# RUN: cp -f %S/Inputs/x86-loopvectorize-costmodel.ll %t.ll && %update_analyze_test_checks --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load float, float\* %in0, align 4" %t.ll
+# RUN: diff -u %t.ll %S/Inputs/x86-loopvectorize-costmodel.ll.expected
+
+## Check that running the script again does not change the result:
+# RUN: %update_analyze_test_checks --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load float, float\* %in0, align 4" %t.ll
+# RUN: diff -u %t.ll %S/Inputs/x86-loopvectorize-costmodel.ll.expected
+
+## Check that running the script again, without arguments, does not change the result:
+# RUN: %update_analyze_test_checks %t.ll
+# RUN: diff -u %t.ll %S/Inputs/x86-loopvectorize-costmodel.ll.expected
--- a/llvm/utils/UpdateTestChecks/common.py
+++ b/llvm/utils/UpdateTestChecks/common.py
@ -303,6 +303,11 @@ ANALYZE_FUNCTION_RE = re.compile(
    r'\s*\n(?P<body>.*)$',
    flags=(re.X | re.S))

+LV_DEBUG_RE = re.compile(
+    r'^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*'
+    r'\s*\n(?P<body>.*)$',
+    flags=(re.X | re.S))
+
 IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(')
 TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
 TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
--- a/llvm/utils/update_analyze_test_checks.py
+++ b/llvm/utils/update_analyze_test_checks.py
@ -110,10 +110,19 @@ def main():

      raw_tool_outputs = common.invoke_tool(ti.args.opt_binary, opt_args, ti.path)

-      # Split analysis outputs by "Printing analysis " declarations.
-      for raw_tool_output in re.split(r'Printing analysis ', raw_tool_outputs):
-        builder.process_run_line(common.ANALYZE_FUNCTION_RE, common.scrub_body,
-                                 raw_tool_output, prefixes, False)
+      if re.search(r'Printing analysis ', raw_tool_outputs) is not None:
+        # Split analysis outputs by "Printing analysis " declarations.
+        for raw_tool_output in re.split(r'Printing analysis ', raw_tool_outputs):
+          builder.process_run_line(common.ANALYZE_FUNCTION_RE, common.scrub_body,
+                                  raw_tool_output, prefixes, False)
+      elif re.search(r'LV: Checking a loop in ', raw_tool_outputs) is not None:
+        # Split analysis outputs by "Printing analysis " declarations.
+        for raw_tool_output in re.split(r'LV: Checking a loop in ', raw_tool_outputs):
+          builder.process_run_line(common.LV_DEBUG_RE, common.scrub_body,
+                                  raw_tool_output, prefixes, False)
+      else:
+        common.warn('Don\'t know how to deal with this output')
+        continue

    func_dict = builder.finish_and_get_func_dict()
    is_in_function = False