llvm-project/llvm/test/Transforms/InstCombine/abs-1.ll

508 lines
16 KiB
LLVM
Raw Normal View History

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
declare i32 @abs(i32)
declare i64 @labs(i64)
declare i64 @llabs(i64)
; Test that the abs library call simplifier works correctly.
; abs(x) -> x <s 0 ? -x : x.
define i32 @test_abs(i32 %x) {
; CHECK-LABEL: @test_abs(
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 0
; CHECK-NEXT: [[NEG:%.*]] = sub nsw i32 0, [[X]]
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[NEG]], i32 [[X]]
; CHECK-NEXT: ret i32 [[TMP2]]
;
%ret = call i32 @abs(i32 %x)
ret i32 %ret
}
define i64 @test_labs(i64 %x) {
; CHECK-LABEL: @test_labs(
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i64 [[X:%.*]], 0
; CHECK-NEXT: [[NEG:%.*]] = sub nsw i64 0, [[X]]
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[NEG]], i64 [[X]]
; CHECK-NEXT: ret i64 [[TMP2]]
;
%ret = call i64 @labs(i64 %x)
ret i64 %ret
}
define i64 @test_llabs(i64 %x) {
; CHECK-LABEL: @test_llabs(
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i64 [[X:%.*]], 0
; CHECK-NEXT: [[NEG:%.*]] = sub nsw i64 0, [[X]]
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[NEG]], i64 [[X]]
; CHECK-NEXT: ret i64 [[TMP2]]
;
%ret = call i64 @llabs(i64 %x)
ret i64 %ret
}
; We have a canonical form of abs to make CSE easier.
define i8 @abs_canonical_1(i8 %x) {
; CHECK-LABEL: @abs_canonical_1(
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 0
; CHECK-NEXT: [[NEG:%.*]] = sub i8 0, [[X]]
; CHECK-NEXT: [[ABS:%.*]] = select i1 [[CMP]], i8 [[NEG]], i8 [[X]]
; CHECK-NEXT: ret i8 [[ABS]]
;
%cmp = icmp sgt i8 %x, 0
%neg = sub i8 0, %x
%abs = select i1 %cmp, i8 %x, i8 %neg
ret i8 %abs
}
; Vectors should work too.
define <2 x i8> @abs_canonical_2(<2 x i8> %x) {
; CHECK-LABEL: @abs_canonical_2(
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer
; CHECK-NEXT: [[NEG:%.*]] = sub <2 x i8> zeroinitializer, [[X]]
; CHECK-NEXT: [[ABS:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[NEG]], <2 x i8> [[X]]
; CHECK-NEXT: ret <2 x i8> [[ABS]]
;
%cmp = icmp sgt <2 x i8> %x, <i8 -1, i8 -1>
%neg = sub <2 x i8> zeroinitializer, %x
%abs = select <2 x i1> %cmp, <2 x i8> %x, <2 x i8> %neg
ret <2 x i8> %abs
}
; Even if a constant has undef elements.
define <2 x i8> @abs_canonical_2_vec_undef_elts(<2 x i8> %x) {
; CHECK-LABEL: @abs_canonical_2_vec_undef_elts(
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer
; CHECK-NEXT: [[NEG:%.*]] = sub <2 x i8> zeroinitializer, [[X]]
; CHECK-NEXT: [[ABS:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[NEG]], <2 x i8> [[X]]
; CHECK-NEXT: ret <2 x i8> [[ABS]]
;
%cmp = icmp sgt <2 x i8> %x, <i8 undef, i8 -1>
%neg = sub <2 x i8> zeroinitializer, %x
%abs = select <2 x i1> %cmp, <2 x i8> %x, <2 x i8> %neg
ret <2 x i8> %abs
}
; NSW should not change.
define i8 @abs_canonical_3(i8 %x) {
; CHECK-LABEL: @abs_canonical_3(
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 0
; CHECK-NEXT: [[NEG:%.*]] = sub nsw i8 0, [[X]]
; CHECK-NEXT: [[ABS:%.*]] = select i1 [[CMP]], i8 [[NEG]], i8 [[X]]
; CHECK-NEXT: ret i8 [[ABS]]
;
%cmp = icmp slt i8 %x, 0
%neg = sub nsw i8 0, %x
%abs = select i1 %cmp, i8 %neg, i8 %x
ret i8 %abs
}
define i8 @abs_canonical_4(i8 %x) {
; CHECK-LABEL: @abs_canonical_4(
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 0
; CHECK-NEXT: [[NEG:%.*]] = sub i8 0, [[X]]
; CHECK-NEXT: [[ABS:%.*]] = select i1 [[CMP]], i8 [[NEG]], i8 [[X]]
; CHECK-NEXT: ret i8 [[ABS]]
;
%cmp = icmp slt i8 %x, 1
%neg = sub i8 0, %x
%abs = select i1 %cmp, i8 %neg, i8 %x
ret i8 %abs
}
define i32 @abs_canonical_5(i8 %x) {
; CHECK-LABEL: @abs_canonical_5(
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 0
; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[X]] to i32
; CHECK-NEXT: [[NEG:%.*]] = sub nsw i32 0, [[CONV]]
; CHECK-NEXT: [[ABS:%.*]] = select i1 [[CMP]], i32 [[NEG]], i32 [[CONV]]
; CHECK-NEXT: ret i32 [[ABS]]
;
%cmp = icmp sgt i8 %x, 0
%conv = sext i8 %x to i32
%neg = sub i32 0, %conv
%abs = select i1 %cmp, i32 %conv, i32 %neg
ret i32 %abs
}
define i32 @abs_canonical_6(i32 %a, i32 %b) {
; CHECK-LABEL: @abs_canonical_6(
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], -1
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[B]], [[A]]
; CHECK-NEXT: [[ABS:%.*]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[TMP2]]
; CHECK-NEXT: ret i32 [[ABS]]
;
%tmp1 = sub i32 %a, %b
%cmp = icmp sgt i32 %tmp1, -1
%tmp2 = sub i32 %b, %a
%abs = select i1 %cmp, i32 %tmp1, i32 %tmp2
ret i32 %abs
}
define <2 x i8> @abs_canonical_7(<2 x i8> %a, <2 x i8 > %b) {
; CHECK-LABEL: @abs_canonical_7(
; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i8> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[TMP1]], <i8 -1, i8 -1>
; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i8> [[B]], [[A]]
; CHECK-NEXT: [[ABS:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[TMP1]], <2 x i8> [[TMP2]]
; CHECK-NEXT: ret <2 x i8> [[ABS]]
;
%tmp1 = sub <2 x i8> %a, %b
%cmp = icmp sgt <2 x i8> %tmp1, <i8 -1, i8 -1>
%tmp2 = sub <2 x i8> %b, %a
%abs = select <2 x i1> %cmp, <2 x i8> %tmp1, <2 x i8> %tmp2
ret <2 x i8> %abs
}
define i32 @abs_canonical_8(i32 %a) {
; CHECK-LABEL: @abs_canonical_8(
; CHECK-NEXT: [[TMP:%.*]] = sub i32 0, [[A:%.*]]
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP]], 0
; CHECK-NEXT: [[ABS:%.*]] = select i1 [[CMP]], i32 [[A]], i32 [[TMP]]
; CHECK-NEXT: ret i32 [[ABS]]
;
%tmp = sub i32 0, %a
%cmp = icmp slt i32 %tmp, 0
%abs = select i1 %cmp, i32 %a, i32 %tmp
ret i32 %abs
}
; We have a canonical form of nabs to make CSE easier.
define i8 @nabs_canonical_1(i8 %x) {
; CHECK-LABEL: @nabs_canonical_1(
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 0
; CHECK-NEXT: [[NEG:%.*]] = sub i8 0, [[X]]
; CHECK-NEXT: [[ABS:%.*]] = select i1 [[CMP]], i8 [[X]], i8 [[NEG]]
; CHECK-NEXT: ret i8 [[ABS]]
;
%cmp = icmp sgt i8 %x, 0
%neg = sub i8 0, %x
%abs = select i1 %cmp, i8 %neg, i8 %x
ret i8 %abs
}
; Vectors should work too.
define <2 x i8> @nabs_canonical_2(<2 x i8> %x) {
; CHECK-LABEL: @nabs_canonical_2(
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer
; CHECK-NEXT: [[NEG:%.*]] = sub <2 x i8> zeroinitializer, [[X]]
; CHECK-NEXT: [[ABS:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[X]], <2 x i8> [[NEG]]
; CHECK-NEXT: ret <2 x i8> [[ABS]]
;
%cmp = icmp sgt <2 x i8> %x, <i8 -1, i8 -1>
%neg = sub <2 x i8> zeroinitializer, %x
%abs = select <2 x i1> %cmp, <2 x i8> %neg, <2 x i8> %x
ret <2 x i8> %abs
}
; Even if a constant has undef elements.
define <2 x i8> @nabs_canonical_2_vec_undef_elts(<2 x i8> %x) {
; CHECK-LABEL: @nabs_canonical_2_vec_undef_elts(
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer
; CHECK-NEXT: [[NEG:%.*]] = sub <2 x i8> zeroinitializer, [[X]]
; CHECK-NEXT: [[ABS:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[X]], <2 x i8> [[NEG]]
; CHECK-NEXT: ret <2 x i8> [[ABS]]
;
%cmp = icmp sgt <2 x i8> %x, <i8 -1, i8 undef>
%neg = sub <2 x i8> zeroinitializer, %x
%abs = select <2 x i1> %cmp, <2 x i8> %neg, <2 x i8> %x
ret <2 x i8> %abs
}
; NSW should not change.
define i8 @nabs_canonical_3(i8 %x) {
; CHECK-LABEL: @nabs_canonical_3(
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 0
; CHECK-NEXT: [[NEG:%.*]] = sub nsw i8 0, [[X]]
; CHECK-NEXT: [[ABS:%.*]] = select i1 [[CMP]], i8 [[X]], i8 [[NEG]]
; CHECK-NEXT: ret i8 [[ABS]]
;
%cmp = icmp slt i8 %x, 0
%neg = sub nsw i8 0, %x
%abs = select i1 %cmp, i8 %x, i8 %neg
ret i8 %abs
}
define i8 @nabs_canonical_4(i8 %x) {
; CHECK-LABEL: @nabs_canonical_4(
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 0
; CHECK-NEXT: [[NEG:%.*]] = sub i8 0, [[X]]
; CHECK-NEXT: [[ABS:%.*]] = select i1 [[CMP]], i8 [[X]], i8 [[NEG]]
; CHECK-NEXT: ret i8 [[ABS]]
;
%cmp = icmp slt i8 %x, 1
%neg = sub i8 0, %x
%abs = select i1 %cmp, i8 %x, i8 %neg
ret i8 %abs
}
define i32 @nabs_canonical_5(i8 %x) {
; CHECK-LABEL: @nabs_canonical_5(
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 0
; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[X]] to i32
; CHECK-NEXT: [[NEG:%.*]] = sub nsw i32 0, [[CONV]]
; CHECK-NEXT: [[ABS:%.*]] = select i1 [[CMP]], i32 [[CONV]], i32 [[NEG]]
; CHECK-NEXT: ret i32 [[ABS]]
;
%cmp = icmp sgt i8 %x, 0
%conv = sext i8 %x to i32
%neg = sub i32 0, %conv
%abs = select i1 %cmp, i32 %neg, i32 %conv
ret i32 %abs
}
define i32 @nabs_canonical_6(i32 %a, i32 %b) {
; CHECK-LABEL: @nabs_canonical_6(
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], -1
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[B]], [[A]]
; CHECK-NEXT: [[ABS:%.*]] = select i1 [[CMP]], i32 [[TMP2]], i32 [[TMP1]]
; CHECK-NEXT: ret i32 [[ABS]]
;
%tmp1 = sub i32 %a, %b
%cmp = icmp sgt i32 %tmp1, -1
%tmp2 = sub i32 %b, %a
%abs = select i1 %cmp, i32 %tmp2, i32 %tmp1
ret i32 %abs
}
define <2 x i8> @nabs_canonical_7(<2 x i8> %a, <2 x i8 > %b) {
; CHECK-LABEL: @nabs_canonical_7(
; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i8> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[TMP1]], <i8 -1, i8 -1>
; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i8> [[B]], [[A]]
; CHECK-NEXT: [[ABS:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[TMP2]], <2 x i8> [[TMP1]]
; CHECK-NEXT: ret <2 x i8> [[ABS]]
;
%tmp1 = sub <2 x i8> %a, %b
%cmp = icmp sgt <2 x i8> %tmp1, <i8 -1, i8 -1>
%tmp2 = sub <2 x i8> %b, %a
%abs = select <2 x i1> %cmp, <2 x i8> %tmp2, <2 x i8> %tmp1
ret <2 x i8> %abs
}
define i32 @nabs_canonical_8(i32 %a) {
; CHECK-LABEL: @nabs_canonical_8(
; CHECK-NEXT: [[TMP:%.*]] = sub i32 0, [[A:%.*]]
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP]], 0
; CHECK-NEXT: [[ABS:%.*]] = select i1 [[CMP]], i32 [[TMP]], i32 [[A]]
; CHECK-NEXT: ret i32 [[ABS]]
;
%tmp = sub i32 0, %a
%cmp = icmp slt i32 %tmp, 0
%abs = select i1 %cmp, i32 %tmp, i32 %a
ret i32 %abs
}
; The following 5 tests use a shift+add+xor to implement abs():
; B = ashr i8 A, 7 -- smear the sign bit.
; xor (add A, B), B -- add -1 and flip bits if negative
define i8 @shifty_abs_commute0(i8 %x) {
; CHECK-LABEL: @shifty_abs_commute0(
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i8 [[X:%.*]], 0
; CHECK-NEXT: [[TMP2:%.*]] = sub i8 0, [[X]]
; CHECK-NEXT: [[ABS:%.*]] = select i1 [[TMP1]], i8 [[TMP2]], i8 [[X]]
; CHECK-NEXT: ret i8 [[ABS]]
;
%signbit = ashr i8 %x, 7
%add = add i8 %signbit, %x
%abs = xor i8 %add, %signbit
ret i8 %abs
}
define i8 @shifty_abs_commute0_nsw(i8 %x) {
; CHECK-LABEL: @shifty_abs_commute0_nsw(
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i8 [[X:%.*]], 0
; CHECK-NEXT: [[TMP2:%.*]] = sub nsw i8 0, [[X]]
; CHECK-NEXT: [[ABS:%.*]] = select i1 [[TMP1]], i8 [[TMP2]], i8 [[X]]
; CHECK-NEXT: ret i8 [[ABS]]
;
%signbit = ashr i8 %x, 7
%add = add nsw i8 %signbit, %x
%abs = xor i8 %add, %signbit
ret i8 %abs
}
; The nuw flag creates a contradiction. If the shift produces all 1s, the only
; way for the add to not wrap is for %x to be 0, but then the shift couldn't
; have produced all 1s. We partially optimize this.
define i8 @shifty_abs_commute0_nuw(i8 %x) {
; CHECK-LABEL: @shifty_abs_commute0_nuw(
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i8 [[X:%.*]], 0
; CHECK-NEXT: [[ABS:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 0
; CHECK-NEXT: ret i8 [[ABS]]
;
%signbit = ashr i8 %x, 7
%add = add nuw i8 %signbit, %x
%abs = xor i8 %add, %signbit
ret i8 %abs
}
define <2 x i8> @shifty_abs_commute1(<2 x i8> %x) {
; CHECK-LABEL: @shifty_abs_commute1(
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i8> zeroinitializer, [[X]]
; CHECK-NEXT: [[ABS:%.*]] = select <2 x i1> [[TMP1]], <2 x i8> [[TMP2]], <2 x i8> [[X]]
; CHECK-NEXT: ret <2 x i8> [[ABS]]
;
%signbit = ashr <2 x i8> %x, <i8 7, i8 7>
%add = add <2 x i8> %signbit, %x
%abs = xor <2 x i8> %signbit, %add
ret <2 x i8> %abs
}
define <2 x i8> @shifty_abs_commute2(<2 x i8> %x) {
; CHECK-LABEL: @shifty_abs_commute2(
; CHECK-NEXT: [[Y:%.*]] = mul <2 x i8> [[X:%.*]], <i8 3, i8 3>
[InstCombine] canonicalize shifty abs(): ashr+add+xor --> cmp+neg+sel We want to do this for 2 reasons: 1. Value tracking does not recognize the ashr variant, so it would fail to match for cases like D39766. 2. DAGCombiner does better at producing optimal codegen when we have the cmp+sel pattern. More detail about what happens in the backend: 1. DAGCombiner has a generic transform for all targets to convert the scalar cmp+sel variant of abs into the shift variant. That is the opposite of this IR canonicalization. 2. DAGCombiner has a generic transform for all targets to convert the vector cmp+sel variant of abs into either an ABS node or the shift variant. That is again the opposite of this IR canonicalization. 3. DAGCombiner has a generic transform for all targets to convert the exact shift variants produced by #1 or #2 into an ISD::ABS node. Note: It would be an efficiency improvement if we had #1 go directly to an ABS node when that's legal/custom. 4. The pattern matching above is incomplete, so it is possible to escape the intended/optimal codegen in a variety of ways. a. For #2, the vector path is missing the case for setlt with a '1' constant. b. For #3, we are missing a match for commuted versions of the shift variants. 5. Therefore, this IR canonicalization can only help get us to the optimal codegen. The version of cmp+sel produced by this patch will be recognized in the DAG and converted to an ABS node when possible or the shift sequence when not. 6. In the following examples with this patch applied, we may get conditional moves rather than the shift produced by the generic DAGCombiner transforms. The conditional move is created using a target-specific decision for any given target. Whether it is optimal or not for a particular subtarget may be up for debate. define i32 @abs_shifty(i32 %x) { %signbit = ashr i32 %x, 31 %add = add i32 %signbit, %x %abs = xor i32 %signbit, %add ret i32 %abs } define i32 @abs_cmpsubsel(i32 %x) { %cmp = icmp slt i32 %x, zeroinitializer %sub = sub i32 zeroinitializer, %x %abs = select i1 %cmp, i32 %sub, i32 %x ret i32 %abs } define <4 x i32> @abs_shifty_vec(<4 x i32> %x) { %signbit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31> %add = add <4 x i32> %signbit, %x %abs = xor <4 x i32> %signbit, %add ret <4 x i32> %abs } define <4 x i32> @abs_cmpsubsel_vec(<4 x i32> %x) { %cmp = icmp slt <4 x i32> %x, zeroinitializer %sub = sub <4 x i32> zeroinitializer, %x %abs = select <4 x i1> %cmp, <4 x i32> %sub, <4 x i32> %x ret <4 x i32> %abs } > $ ./opt -instcombine shiftyabs.ll -S | ./llc -o - -mtriple=x86_64 -mattr=avx > abs_shifty: > movl %edi, %eax > negl %eax > cmovll %edi, %eax > retq > > abs_cmpsubsel: > movl %edi, %eax > negl %eax > cmovll %edi, %eax > retq > > abs_shifty_vec: > vpabsd %xmm0, %xmm0 > retq > > abs_cmpsubsel_vec: > vpabsd %xmm0, %xmm0 > retq > > $ ./opt -instcombine shiftyabs.ll -S | ./llc -o - -mtriple=aarch64 > abs_shifty: > cmp w0, #0 // =0 > cneg w0, w0, mi > ret > > abs_cmpsubsel: > cmp w0, #0 // =0 > cneg w0, w0, mi > ret > > abs_shifty_vec: > abs v0.4s, v0.4s > ret > > abs_cmpsubsel_vec: > abs v0.4s, v0.4s > ret > > $ ./opt -instcombine shiftyabs.ll -S | ./llc -o - -mtriple=powerpc64le > abs_shifty: > srawi 4, 3, 31 > add 3, 3, 4 > xor 3, 3, 4 > blr > > abs_cmpsubsel: > srawi 4, 3, 31 > add 3, 3, 4 > xor 3, 3, 4 > blr > > abs_shifty_vec: > vspltisw 3, -16 > vspltisw 4, 15 > vsubuwm 3, 4, 3 > vsraw 3, 2, 3 > vadduwm 2, 2, 3 > xxlxor 34, 34, 35 > blr > > abs_cmpsubsel_vec: > vspltisw 3, -16 > vspltisw 4, 15 > vsubuwm 3, 4, 3 > vsraw 3, 2, 3 > vadduwm 2, 2, 3 > xxlxor 34, 34, 35 > blr > Differential Revision: https://reviews.llvm.org/D40984 llvm-svn: 320921
2017-12-17 00:41:17 +08:00
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i8> [[Y]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i8> zeroinitializer, [[Y]]
; CHECK-NEXT: [[ABS:%.*]] = select <2 x i1> [[TMP1]], <2 x i8> [[TMP2]], <2 x i8> [[Y]]
; CHECK-NEXT: ret <2 x i8> [[ABS]]
;
%y = mul <2 x i8> %x, <i8 3, i8 3> ; extra op to thwart complexity-based canonicalization
%signbit = ashr <2 x i8> %y, <i8 7, i8 7>
%add = add <2 x i8> %y, %signbit
%abs = xor <2 x i8> %signbit, %add
ret <2 x i8> %abs
}
define i8 @shifty_abs_commute3(i8 %x) {
; CHECK-LABEL: @shifty_abs_commute3(
; CHECK-NEXT: [[Y:%.*]] = mul i8 [[X:%.*]], 3
[InstCombine] canonicalize shifty abs(): ashr+add+xor --> cmp+neg+sel We want to do this for 2 reasons: 1. Value tracking does not recognize the ashr variant, so it would fail to match for cases like D39766. 2. DAGCombiner does better at producing optimal codegen when we have the cmp+sel pattern. More detail about what happens in the backend: 1. DAGCombiner has a generic transform for all targets to convert the scalar cmp+sel variant of abs into the shift variant. That is the opposite of this IR canonicalization. 2. DAGCombiner has a generic transform for all targets to convert the vector cmp+sel variant of abs into either an ABS node or the shift variant. That is again the opposite of this IR canonicalization. 3. DAGCombiner has a generic transform for all targets to convert the exact shift variants produced by #1 or #2 into an ISD::ABS node. Note: It would be an efficiency improvement if we had #1 go directly to an ABS node when that's legal/custom. 4. The pattern matching above is incomplete, so it is possible to escape the intended/optimal codegen in a variety of ways. a. For #2, the vector path is missing the case for setlt with a '1' constant. b. For #3, we are missing a match for commuted versions of the shift variants. 5. Therefore, this IR canonicalization can only help get us to the optimal codegen. The version of cmp+sel produced by this patch will be recognized in the DAG and converted to an ABS node when possible or the shift sequence when not. 6. In the following examples with this patch applied, we may get conditional moves rather than the shift produced by the generic DAGCombiner transforms. The conditional move is created using a target-specific decision for any given target. Whether it is optimal or not for a particular subtarget may be up for debate. define i32 @abs_shifty(i32 %x) { %signbit = ashr i32 %x, 31 %add = add i32 %signbit, %x %abs = xor i32 %signbit, %add ret i32 %abs } define i32 @abs_cmpsubsel(i32 %x) { %cmp = icmp slt i32 %x, zeroinitializer %sub = sub i32 zeroinitializer, %x %abs = select i1 %cmp, i32 %sub, i32 %x ret i32 %abs } define <4 x i32> @abs_shifty_vec(<4 x i32> %x) { %signbit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31> %add = add <4 x i32> %signbit, %x %abs = xor <4 x i32> %signbit, %add ret <4 x i32> %abs } define <4 x i32> @abs_cmpsubsel_vec(<4 x i32> %x) { %cmp = icmp slt <4 x i32> %x, zeroinitializer %sub = sub <4 x i32> zeroinitializer, %x %abs = select <4 x i1> %cmp, <4 x i32> %sub, <4 x i32> %x ret <4 x i32> %abs } > $ ./opt -instcombine shiftyabs.ll -S | ./llc -o - -mtriple=x86_64 -mattr=avx > abs_shifty: > movl %edi, %eax > negl %eax > cmovll %edi, %eax > retq > > abs_cmpsubsel: > movl %edi, %eax > negl %eax > cmovll %edi, %eax > retq > > abs_shifty_vec: > vpabsd %xmm0, %xmm0 > retq > > abs_cmpsubsel_vec: > vpabsd %xmm0, %xmm0 > retq > > $ ./opt -instcombine shiftyabs.ll -S | ./llc -o - -mtriple=aarch64 > abs_shifty: > cmp w0, #0 // =0 > cneg w0, w0, mi > ret > > abs_cmpsubsel: > cmp w0, #0 // =0 > cneg w0, w0, mi > ret > > abs_shifty_vec: > abs v0.4s, v0.4s > ret > > abs_cmpsubsel_vec: > abs v0.4s, v0.4s > ret > > $ ./opt -instcombine shiftyabs.ll -S | ./llc -o - -mtriple=powerpc64le > abs_shifty: > srawi 4, 3, 31 > add 3, 3, 4 > xor 3, 3, 4 > blr > > abs_cmpsubsel: > srawi 4, 3, 31 > add 3, 3, 4 > xor 3, 3, 4 > blr > > abs_shifty_vec: > vspltisw 3, -16 > vspltisw 4, 15 > vsubuwm 3, 4, 3 > vsraw 3, 2, 3 > vadduwm 2, 2, 3 > xxlxor 34, 34, 35 > blr > > abs_cmpsubsel_vec: > vspltisw 3, -16 > vspltisw 4, 15 > vsubuwm 3, 4, 3 > vsraw 3, 2, 3 > vadduwm 2, 2, 3 > xxlxor 34, 34, 35 > blr > Differential Revision: https://reviews.llvm.org/D40984 llvm-svn: 320921
2017-12-17 00:41:17 +08:00
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i8 [[Y]], 0
; CHECK-NEXT: [[TMP2:%.*]] = sub i8 0, [[Y]]
; CHECK-NEXT: [[ABS:%.*]] = select i1 [[TMP1]], i8 [[TMP2]], i8 [[Y]]
; CHECK-NEXT: ret i8 [[ABS]]
;
%y = mul i8 %x, 3 ; extra op to thwart complexity-based canonicalization
%signbit = ashr i8 %y, 7
%add = add i8 %y, %signbit
%abs = xor i8 %add, %signbit
ret i8 %abs
}
; Negative test - don't transform if it would increase instruction count.
declare void @extra_use(i8)
define i8 @shifty_abs_too_many_uses(i8 %x) {
; CHECK-LABEL: @shifty_abs_too_many_uses(
; CHECK-NEXT: [[SIGNBIT:%.*]] = ashr i8 [[X:%.*]], 7
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[SIGNBIT]], [[X]]
; CHECK-NEXT: [[ABS:%.*]] = xor i8 [[ADD]], [[SIGNBIT]]
; CHECK-NEXT: call void @extra_use(i8 [[SIGNBIT]])
; CHECK-NEXT: ret i8 [[ABS]]
;
%signbit = ashr i8 %x, 7
%add = add i8 %x, %signbit
%abs = xor i8 %add, %signbit
call void @extra_use(i8 %signbit)
ret i8 %abs
}
; There's another way to make abs() using shift, xor, and subtract.
; PR36036 - https://bugs.llvm.org/show_bug.cgi?id=36036
define i8 @shifty_sub(i8 %x) {
; CHECK-LABEL: @shifty_sub(
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i8 [[X:%.*]], 0
; CHECK-NEXT: [[TMP2:%.*]] = sub i8 0, [[X]]
; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i8 [[TMP2]], i8 [[X]]
; CHECK-NEXT: ret i8 [[R]]
;
%sh = ashr i8 %x, 7
%xor = xor i8 %x, %sh
%r = sub i8 %xor, %sh
ret i8 %r
}
define i8 @shifty_sub_nsw_commute(i8 %x) {
; CHECK-LABEL: @shifty_sub_nsw_commute(
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i8 [[X:%.*]], 0
; CHECK-NEXT: [[TMP2:%.*]] = sub nsw i8 0, [[X]]
; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i8 [[TMP2]], i8 [[X]]
; CHECK-NEXT: ret i8 [[R]]
;
%sh = ashr i8 %x, 7
%xor = xor i8 %sh, %x
%r = sub nsw i8 %xor, %sh
ret i8 %r
}
define <4 x i32> @shifty_sub_nuw_vec_commute(<4 x i32> %x) {
; CHECK-LABEL: @shifty_sub_nuw_vec_commute(
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], zeroinitializer
; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[X]], <4 x i32> zeroinitializer
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%sh = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
%xor = xor <4 x i32> %sh, %x
%r = sub nuw <4 x i32> %xor, %sh
ret <4 x i32> %r
}
define i12 @shifty_sub_nsw_nuw(i12 %x) {
; CHECK-LABEL: @shifty_sub_nsw_nuw(
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i12 [[X:%.*]], 0
; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i12 [[X]], i12 0
; CHECK-NEXT: ret i12 [[R]]
;
%sh = ashr i12 %x, 11
%xor = xor i12 %x, %sh
%r = sub nsw nuw i12 %xor, %sh
ret i12 %r
}
define i8 @negate_abs(i8 %x) {
; CHECK-LABEL: @negate_abs(
; CHECK-NEXT: [[N:%.*]] = sub i8 0, [[X:%.*]]
; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[X]], 0
; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 [[X]], i8 [[N]]
; CHECK-NEXT: ret i8 [[S]]
;
%n = sub i8 0, %x
%c = icmp slt i8 %x, 0
%s = select i1 %c, i8 %n, i8 %x
%r = sub i8 0, %s
ret i8 %r
}
define <2 x i8> @negate_nabs(<2 x i8> %x) {
; CHECK-LABEL: @negate_nabs(
; CHECK-NEXT: [[N:%.*]] = sub <2 x i8> zeroinitializer, [[X:%.*]]
; CHECK-NEXT: [[C:%.*]] = icmp slt <2 x i8> [[X]], zeroinitializer
; CHECK-NEXT: [[S:%.*]] = select <2 x i1> [[C]], <2 x i8> [[N]], <2 x i8> [[X]]
; CHECK-NEXT: ret <2 x i8> [[S]]
;
%n = sub <2 x i8> zeroinitializer, %x
%c = icmp slt <2 x i8> %x, zeroinitializer
%s = select <2 x i1> %c, <2 x i8> %x, <2 x i8> %n
%r = sub <2 x i8> zeroinitializer, %s
ret <2 x i8> %r
}
define i1 @abs_must_be_positive(i32 %x) {
; CHECK-LABEL: @abs_must_be_positive(
; CHECK-NEXT: ret i1 true
;
%negx = sub nsw i32 0, %x
%c = icmp sge i32 %x, 0
%sel = select i1 %c, i32 %x, i32 %negx
%c2 = icmp sge i32 %sel, 0
ret i1 %c2
}