forked from OSchip/llvm-project
96 lines
3.8 KiB
LLVM
96 lines
3.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -S -codegenprepare < %s | FileCheck %s
|
|
|
|
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
|
|
target triple = "nvptx64-nvidia-cuda"
|
|
|
|
; No bypassing should be done in apparently unsuitable cases.
|
|
define void @Test_no_bypassing(i32 %a, i64 %b, i64* %retptr) {
|
|
; CHECK-LABEL: @Test_no_bypassing(
|
|
; CHECK-NEXT: [[A_1:%.*]] = zext i32 [[A:%.*]] to i64
|
|
; CHECK-NEXT: [[A_2:%.*]] = sub i64 -1, [[A_1]]
|
|
; CHECK-NEXT: [[RES:%.*]] = srem i64 [[A_2]], [[B:%.*]]
|
|
; CHECK-NEXT: store i64 [[RES]], i64* [[RETPTR:%.*]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%a.1 = zext i32 %a to i64
|
|
; %a.2 is always negative so the division cannot be bypassed.
|
|
%a.2 = sub i64 -1, %a.1
|
|
%res = srem i64 %a.2, %b
|
|
store i64 %res, i64* %retptr
|
|
ret void
|
|
}
|
|
|
|
; No OR instruction is needed if one of the operands (divisor) is known
|
|
; to fit into 32 bits.
|
|
define void @Test_check_one_operand(i64 %a, i32 %b, i64* %retptr) {
|
|
; CHECK-LABEL: @Test_check_one_operand(
|
|
; CHECK-NEXT: [[B_1:%.*]] = zext i32 [[B:%.*]] to i64
|
|
; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[A:%.*]], -4294967296
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0
|
|
; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP8:%.*]]
|
|
; CHECK: [[TMP4:%.*]] = trunc i64 [[B_1]] to i32
|
|
; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[A]] to i32
|
|
; CHECK-NEXT: [[TMP6:%.*]] = udiv i32 [[TMP5]], [[TMP4]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
|
|
; CHECK-NEXT: br label [[TMP10:%.*]]
|
|
; CHECK: [[TMP9:%.*]] = sdiv i64 [[A]], [[B_1]]
|
|
; CHECK-NEXT: br label [[TMP10]]
|
|
; CHECK: [[TMP11:%.*]] = phi i64 [ [[TMP7]], [[TMP3]] ], [ [[TMP9]], [[TMP8]] ]
|
|
; CHECK-NEXT: store i64 [[TMP11]], i64* [[RETPTR:%.*]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%b.1 = zext i32 %b to i64
|
|
%res = sdiv i64 %a, %b.1
|
|
store i64 %res, i64* %retptr
|
|
ret void
|
|
}
|
|
|
|
; If both operands are known to fit into 32 bits, then replace the division
|
|
; in-place without CFG modification.
|
|
define void @Test_check_none(i64 %a, i32 %b, i64* %retptr) {
|
|
; CHECK-LABEL: @Test_check_none(
|
|
; CHECK-NEXT: [[A_1:%.*]] = and i64 [[A:%.*]], 4294967295
|
|
; CHECK-NEXT: [[B_1:%.*]] = zext i32 [[B:%.*]] to i64
|
|
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[A_1]] to i32
|
|
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[B_1]] to i32
|
|
; CHECK-NEXT: [[TMP3:%.*]] = udiv i32 [[TMP1]], [[TMP2]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
|
|
; CHECK-NEXT: store i64 [[TMP4]], i64* [[RETPTR:%.*]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%a.1 = and i64 %a, 4294967295
|
|
%b.1 = zext i32 %b to i64
|
|
%res = udiv i64 %a.1, %b.1
|
|
store i64 %res, i64* %retptr
|
|
ret void
|
|
}
|
|
|
|
; In case of unsigned long division with a short dividend,
|
|
; the long division is not needed any more.
|
|
define void @Test_special_case(i32 %a, i64 %b, i64* %retptr) {
|
|
; CHECK-LABEL: @Test_special_case(
|
|
; CHECK-NEXT: [[A_1:%.*]] = zext i32 [[A:%.*]] to i64
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp uge i64 [[A_1]], [[B:%.*]]
|
|
; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP9:%.*]]
|
|
; CHECK: [[TMP3:%.*]] = trunc i64 [[B]] to i32
|
|
; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[A_1]] to i32
|
|
; CHECK-NEXT: [[TMP5:%.*]] = udiv i32 [[TMP4]], [[TMP3]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = urem i32 [[TMP4]], [[TMP3]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
|
|
; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
|
|
; CHECK-NEXT: br label [[TMP9]]
|
|
; CHECK: [[TMP10:%.*]] = phi i64 [ [[TMP7]], [[TMP2]] ], [ 0, [[TMP0:%.*]] ]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = phi i64 [ [[TMP8]], [[TMP2]] ], [ [[A_1]], [[TMP0]] ]
|
|
; CHECK-NEXT: [[RES:%.*]] = add i64 [[TMP10]], [[TMP11]]
|
|
; CHECK-NEXT: store i64 [[RES]], i64* [[RETPTR:%.*]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%a.1 = zext i32 %a to i64
|
|
%div = udiv i64 %a.1, %b
|
|
%rem = urem i64 %a.1, %b
|
|
%res = add i64 %div, %rem
|
|
store i64 %res, i64* %retptr
|
|
ret void
|
|
}
|