[UpdateTestChecks] Prevent rapid onset insanity when forced to write LoopVectorize-driven costmodel tests

Subj, or on other words, we have a lot of tests that are driven by
the LoopVectorizer's debug output, but we don't have
any meaningful way to autogenerate checklines in them,
which means that an insurmountable amount of manual work
is required when modifying the appropriate cost models.

That is not sustainable, so this presents a solution.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D121133
This commit is contained in:
Roman Lebedev 2022-04-15 15:37:29 +03:00
parent 4dba3d4c53
commit 8fbed6870b
No known key found for this signature in database
GPG Key ID: 083C3EBB4A1689E0
5 changed files with 120 additions and 4 deletions

View File

@ -0,0 +1,40 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load float, float\* %in0, align 4"
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX512
; REQUIRES: asserts
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@A = global [1024 x float] zeroinitializer, align 128
@B = global [1024 x i8] zeroinitializer, align 128
define void @test() {
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%iv.0 = add nuw nsw i64 %iv, 0
%iv.1 = add nuw nsw i64 %iv, 1
%in0 = getelementptr inbounds [1024 x float], [1024 x float]* @A, i64 0, i64 %iv.0
%in1 = getelementptr inbounds [1024 x float], [1024 x float]* @A, i64 0, i64 %iv.1
%v0 = load float, float* %in0
%v1 = load float, float* %in1
%reduce.add.0 = fadd float %v0, %v1
%reduce.add.0.narrow = fptoui float %reduce.add.0 to i8
%out = getelementptr inbounds [1024 x i8], [1024 x i8]* @B, i64 0, i64 %iv.0
store i8 %reduce.add.0.narrow, i8* %out
%iv.next = add nuw nsw i64 %iv.0, 2
%cmp = icmp ult i64 %iv.next, 1024
br i1 %cmp, label %for.body, label %for.cond.cleanup
for.cond.cleanup:
ret void
}

View File

@ -0,0 +1,49 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load float, float\* %in0, align 4"
; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx512bw --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,AVX512
; REQUIRES: asserts
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@A = global [1024 x float] zeroinitializer, align 128
@B = global [1024 x i8] zeroinitializer, align 128
define void @test() {
; CHECK-LABEL: 'test'
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, float* %in0, align 4
; CHECK: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, float* %in0, align 4
; CHECK: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load float, float* %in0, align 4
; CHECK: LV: Found an estimated cost of 3 for VF 8 For instruction: %v0 = load float, float* %in0, align 4
; CHECK: LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load float, float* %in0, align 4
; CHECK: LV: Found an estimated cost of 22 for VF 32 For instruction: %v0 = load float, float* %in0, align 4
; CHECK: LV: Found an estimated cost of 92 for VF 64 For instruction: %v0 = load float, float* %in0, align 4
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%iv.0 = add nuw nsw i64 %iv, 0
%iv.1 = add nuw nsw i64 %iv, 1
%in0 = getelementptr inbounds [1024 x float], [1024 x float]* @A, i64 0, i64 %iv.0
%in1 = getelementptr inbounds [1024 x float], [1024 x float]* @A, i64 0, i64 %iv.1
%v0 = load float, float* %in0
%v1 = load float, float* %in1
%reduce.add.0 = fadd float %v0, %v1
%reduce.add.0.narrow = fptoui float %reduce.add.0 to i8
%out = getelementptr inbounds [1024 x i8], [1024 x i8]* @B, i64 0, i64 %iv.0
store i8 %reduce.add.0.narrow, i8* %out
%iv.next = add nuw nsw i64 %iv.0, 2
%cmp = icmp ult i64 %iv.next, 1024
br i1 %cmp, label %for.body, label %for.cond.cleanup
for.cond.cleanup:
ret void
}

View File

@ -0,0 +1,13 @@
# REQUIRES: x86-registered-target
## Check that --filter works properly.
# RUN: cp -f %S/Inputs/x86-loopvectorize-costmodel.ll %t.ll && %update_analyze_test_checks --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load float, float\* %in0, align 4" %t.ll
# RUN: diff -u %t.ll %S/Inputs/x86-loopvectorize-costmodel.ll.expected
## Check that running the script again does not change the result:
# RUN: %update_analyze_test_checks --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load float, float\* %in0, align 4" %t.ll
# RUN: diff -u %t.ll %S/Inputs/x86-loopvectorize-costmodel.ll.expected
## Check that running the script again, without arguments, does not change the result:
# RUN: %update_analyze_test_checks %t.ll
# RUN: diff -u %t.ll %S/Inputs/x86-loopvectorize-costmodel.ll.expected

View File

@ -303,6 +303,11 @@ ANALYZE_FUNCTION_RE = re.compile(
r'\s*\n(?P<body>.*)$',
flags=(re.X | re.S))
LV_DEBUG_RE = re.compile(
r'^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*'
r'\s*\n(?P<body>.*)$',
flags=(re.X | re.S))
IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(')
TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')

View File

@ -110,10 +110,19 @@ def main():
raw_tool_outputs = common.invoke_tool(ti.args.opt_binary, opt_args, ti.path)
# Split analysis outputs by "Printing analysis " declarations.
for raw_tool_output in re.split(r'Printing analysis ', raw_tool_outputs):
builder.process_run_line(common.ANALYZE_FUNCTION_RE, common.scrub_body,
raw_tool_output, prefixes, False)
if re.search(r'Printing analysis ', raw_tool_outputs) is not None:
# Split analysis outputs by "Printing analysis " declarations.
for raw_tool_output in re.split(r'Printing analysis ', raw_tool_outputs):
builder.process_run_line(common.ANALYZE_FUNCTION_RE, common.scrub_body,
raw_tool_output, prefixes, False)
elif re.search(r'LV: Checking a loop in ', raw_tool_outputs) is not None:
# Split analysis outputs by "Printing analysis " declarations.
for raw_tool_output in re.split(r'LV: Checking a loop in ', raw_tool_outputs):
builder.process_run_line(common.LV_DEBUG_RE, common.scrub_body,
raw_tool_output, prefixes, False)
else:
common.warn('Don\'t know how to deal with this output')
continue
func_dict = builder.finish_and_get_func_dict()
is_in_function = False