From b910fab7054bb27326560c45d325fb48891ac134 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Wed, 10 Feb 2021 12:28:08 +0700 Subject: [PATCH] [Test] Two more tests on usub They are analogous to the existing tests, but use different starting offset which can be important for some transforms. --- .../X86/2020_12_02_decrementing_loop.ll | 65 +++++++++++++++---- llvm/test/CodeGen/X86/usub_inc_iv.ll | 49 ++++++++++++++ 2 files changed, 103 insertions(+), 11 deletions(-) diff --git a/llvm/test/CodeGen/X86/2020_12_02_decrementing_loop.ll b/llvm/test/CodeGen/X86/2020_12_02_decrementing_loop.ll index 5d860ea6e98e..fc0d0ae36720 100644 --- a/llvm/test/CodeGen/X86/2020_12_02_decrementing_loop.ll +++ b/llvm/test/CodeGen/X86/2020_12_02_decrementing_loop.ll @@ -43,24 +43,67 @@ failure: ; preds = %backedge unreachable } -define i32 @test_02(i32* %p, i64 %len, i32 %x) { -; CHECK-LABEL: test_02: +; Similar to test_01, but we use offsetted pointer as base. +define i32 @test_01a(i32* %p, i64 %len, i32 %x) { +; CHECK-LABEL: test_01a: ; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB1_1: ## %loop ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: testq %rsi, %rsi -; CHECK-NEXT: je LBB1_4 +; CHECK-NEXT: subq $1, %rax +; CHECK-NEXT: jb LBB1_4 ; CHECK-NEXT: ## %bb.2: ## %backedge ; CHECK-NEXT: ## in Loop: Header=BB1_1 Depth=1 -; CHECK-NEXT: cmpl %edx, -4(%rdi,%rsi,4) -; CHECK-NEXT: leaq -1(%rsi), %rsi +; CHECK-NEXT: cmpl %edx, -28(%rdi,%rsi,4) +; CHECK-NEXT: movq %rax, %rsi ; CHECK-NEXT: jne LBB1_1 ; CHECK-NEXT: ## %bb.3: ## %failure ; CHECK-NEXT: ud2 ; CHECK-NEXT: LBB1_4: ## %exit ; CHECK-NEXT: movl $-1, %eax ; CHECK-NEXT: retq +entry: + %base = getelementptr inbounds i32, i32* %p, i64 -6 + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i64 [ %iv.next, %backedge ], [ %len, %entry ] + %iv.next = add nsw i64 %iv, -1 + %cond_1 = icmp eq i64 %iv, 0 + br i1 %cond_1, label %exit, label %backedge + +backedge: ; preds = %loop + %addr = getelementptr inbounds i32, i32* %base, i64 %iv.next + %loaded = load atomic i32, i32* %addr unordered, align 4 + %cond_2 = icmp eq i32 %loaded, %x + br i1 %cond_2, label %failure, label %loop + +exit: ; preds = %loop + ret i32 -1 + +failure: ; preds = %backedge + unreachable +} + +define i32 @test_02(i32* %p, i64 %len, i32 %x) { +; CHECK-LABEL: test_02: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB2_1: ## %loop +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: testq %rsi, %rsi +; CHECK-NEXT: je LBB2_4 +; CHECK-NEXT: ## %bb.2: ## %backedge +; CHECK-NEXT: ## in Loop: Header=BB2_1 Depth=1 +; CHECK-NEXT: cmpl %edx, -4(%rdi,%rsi,4) +; CHECK-NEXT: leaq -1(%rsi), %rsi +; CHECK-NEXT: jne LBB2_1 +; CHECK-NEXT: ## %bb.3: ## %failure +; CHECK-NEXT: ud2 +; CHECK-NEXT: LBB2_4: ## %exit +; CHECK-NEXT: movl $-1, %eax +; CHECK-NEXT: retq entry: %start = add i64 %len, -1 br label %loop @@ -90,18 +133,18 @@ define i32 @test_03(i32* %p, i64 %len, i32 %x) { ; CHECK-LABEL: test_03: ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB2_1: ## %loop +; CHECK-NEXT: LBB3_1: ## %loop ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: testq %rsi, %rsi -; CHECK-NEXT: je LBB2_4 +; CHECK-NEXT: je LBB3_4 ; CHECK-NEXT: ## %bb.2: ## %backedge -; CHECK-NEXT: ## in Loop: Header=BB2_1 Depth=1 +; CHECK-NEXT: ## in Loop: Header=BB3_1 Depth=1 ; CHECK-NEXT: cmpl %edx, -4(%rdi,%rsi,4) ; CHECK-NEXT: leaq -1(%rsi), %rsi -; CHECK-NEXT: jne LBB2_1 +; CHECK-NEXT: jne LBB3_1 ; CHECK-NEXT: ## %bb.3: ## %failure ; CHECK-NEXT: ud2 -; CHECK-NEXT: LBB2_4: ## %exit +; CHECK-NEXT: LBB3_4: ## %exit ; CHECK-NEXT: movl $-1, %eax ; CHECK-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/usub_inc_iv.ll b/llvm/test/CodeGen/X86/usub_inc_iv.ll index 6cffce3a9960..4cc0fedab3a3 100644 --- a/llvm/test/CodeGen/X86/usub_inc_iv.ll +++ b/llvm/test/CodeGen/X86/usub_inc_iv.ll @@ -48,6 +48,55 @@ failure: ; preds = %backedge unreachable } +; Similar to test_01, but with different offset. +define i32 @test_01a(i32* %p, i64 %len, i32 %x) { +; CHECK-LABEL: @test_01a( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[MATH:%.*]], [[BACKEDGE:%.*]] ], [ [[LEN:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[IV]], i64 1) +; CHECK-NEXT: [[MATH]] = extractvalue { i64, i1 } [[TMP0]], 0 +; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1 +; CHECK-NEXT: br i1 [[OV]], label [[EXIT:%.*]], label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[IV]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to i8* +; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, i8* [[TMP1]], i64 [[SUNKADDR]] +; CHECK-NEXT: [[SUNKADDR2:%.*]] = getelementptr i8, i8* [[SUNKADDR1]], i64 -28 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[SUNKADDR2]] to i32* +; CHECK-NEXT: [[LOADED:%.*]] = load atomic i32, i32* [[TMP2]] unordered, align 4 +; CHECK-NEXT: [[COND_2:%.*]] = icmp eq i32 [[LOADED]], [[X:%.*]] +; CHECK-NEXT: br i1 [[COND_2]], label [[FAILURE:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret i32 -1 +; CHECK: failure: +; CHECK-NEXT: unreachable +; +entry: + %scevgep = getelementptr i32, i32* %p, i64 -7 + br label %loop + +loop: ; preds = %backedge, %entry + %iv = phi i64 [ %iv.next, %backedge ], [ %len, %entry ] + %iv.next = add i64 %iv, -1 + %cond_1 = icmp eq i64 %iv, 0 + br i1 %cond_1, label %exit, label %backedge + +backedge: ; preds = %loop + %scevgep1 = getelementptr i32, i32* %scevgep, i64 %iv + %loaded = load atomic i32, i32* %scevgep1 unordered, align 4 + %cond_2 = icmp eq i32 %loaded, %x + br i1 %cond_2, label %failure, label %loop + +exit: ; preds = %loop + ret i32 -1 + +failure: ; preds = %backedge + unreachable +} + + ; TODO: We can use trick with usub here. define i32 @test_02(i32* %p, i64 %len, i32 %x) { ; CHECK-LABEL: @test_02(