2020-12-24 10:41:27 +08:00
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
define < 4 x float > @test1 ( < 4 x float > %v1 ) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: ret <4 x float> [[V1:%.*]]
;
2020-12-29 16:02:25 +08:00
%v2 = shufflevector < 4 x float > %v1 , < 4 x float > p o i s o n , < 4 x i32 > < i32 0 , i32 1 , i32 2 , i32 3 >
2020-12-24 10:41:27 +08:00
ret < 4 x float > %v2
}
define < 4 x float > @test2 ( < 4 x float > %v1 ) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: ret <4 x float> [[V1:%.*]]
;
%v2 = shufflevector < 4 x float > %v1 , < 4 x float > %v1 , < 4 x i32 > < i32 0 , i32 5 , i32 2 , i32 7 >
ret < 4 x float > %v2
}
define float @test3 ( < 4 x float > %A , < 4 x float > %B , float %f ) {
; CHECK-LABEL: @test3(
; CHECK-NEXT: ret float [[F:%.*]]
;
%C = insertelement < 4 x float > %A , float %f , i32 0
%D = shufflevector < 4 x float > %C , < 4 x float > %B , < 4 x i32 > < i32 5 , i32 0 , i32 2 , i32 7 >
%E = extractelement < 4 x float > %D , i32 1
ret float %E
}
define i32 @test4 ( < 4 x i32 > %X ) {
; CHECK-LABEL: @test4(
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
; CHECK-NEXT: ret i32 [[R]]
;
2020-12-29 16:02:25 +08:00
%t = shufflevector < 4 x i32 > %X , < 4 x i32 > p o i s o n , < 4 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = extractelement < 4 x i32 > %t , i32 0
ret i32 %r
}
define i32 @test5 ( < 4 x i32 > %X ) {
; CHECK-LABEL: @test5(
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 3
; CHECK-NEXT: ret i32 [[R]]
;
2020-12-29 16:02:25 +08:00
%t = shufflevector < 4 x i32 > %X , < 4 x i32 > p o i s o n , < 4 x i32 > < i32 3 , i32 2 , i32 undef , i32 undef >
2020-12-24 10:41:27 +08:00
%r = extractelement < 4 x i32 > %t , i32 0
ret i32 %r
}
define float @test6 ( < 4 x float > %X ) {
; CHECK-LABEL: @test6(
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0
; CHECK-NEXT: ret float [[R]]
;
%X1 = bitcast < 4 x float > %X to < 4 x i32 >
2020-12-29 16:02:25 +08:00
%t = shufflevector < 4 x i32 > %X1 , < 4 x i32 > p o i s o n , < 4 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%t2 = bitcast < 4 x i32 > %t to < 4 x float >
%r = extractelement < 4 x float > %t2 , i32 0
ret float %r
}
define float @testvscale6 ( < v s c a l e x 4 x float > %X ) {
; CHECK-LABEL: @testvscale6(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[T2:%.*]] = shufflevector <vscale x 4 x float> [[X:%.*]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x float> [[T2]], i32 0
; CHECK-NEXT: ret float [[R]]
;
%X1 = bitcast < v s c a l e x 4 x float > %X to < v s c a l e x 4 x i32 >
2020-12-29 16:02:25 +08:00
%t = shufflevector < v s c a l e x 4 x i32 > %X1 , < v s c a l e x 4 x i32 > p o i s o n , < v s c a l e x 4 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%t2 = bitcast < v s c a l e x 4 x i32 > %t to < v s c a l e x 4 x float >
%r = extractelement < v s c a l e x 4 x float > %t2 , i32 0
ret float %r
}
define < 4 x float > @test7 ( < 4 x float > %x ) {
; CHECK-LABEL: @test7(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: ret <4 x float> [[R]]
;
2020-12-29 16:02:25 +08:00
%r = shufflevector < 4 x float > %x , < 4 x float > p o i s o n , < 4 x i32 > < i32 0 , i32 1 , i32 6 , i32 7 >
2020-12-24 10:41:27 +08:00
ret < 4 x float > %r
}
; This should turn into a single shuffle.
define < 4 x float > @test8 ( < 4 x float > %x , < 4 x float > %y ) {
; CHECK-LABEL: @test8(
[InstCombine] use poison as placeholder for undemanded elems
Currently undef is used as a don’t-care vector when constructing a vector using a series of insertelement.
However, this is problematic because undef isn’t undefined enough.
Especially, a sequence of insertelement can be optimized to shufflevector, but using undef as its placeholder makes shufflevector a poison-blocking instruction because undef cannot be optimized to poison.
This makes a few straightforward optimizations incorrect, such as:
```
; https://bugs.llvm.org/show_bug.cgi?id=44185
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> { 0, 6, 2, undef }
ret <4 x float> %r ; %r[3] is undef
}
=>
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%r = insertelement <4 x float> %y, float %x, i32 1
ret <4 x float> %r ; %r[3] = %y[3], incorrect if %y[3] = poison
}
Transformation doesn't verify!
ERROR: Target is more poisonous than source
```
I’d like to suggest
1. Using poison as insertelement’s placeholder value (IRBuilder::CreateVectorSplat should be patched too)
2. Updating shufflevector’s semantics to return poison element if mask is undef
Note that poison is currently lowered into UNDEF in SelDag, so codegen part is okay.
m_Undef() matches PoisonValue as well, so existing optimizations will still fire.
The only concern is hidden miscompilations that will go incorrect when poison constant is given.
A conservative way is copying all tests having `insertelement undef` & replacing it with `insertelement poison` & run Alive2 on it, but it will create many tests and people won’t like it. :(
Instead, I’ll simply locally maintain the tests and run Alive2.
If there is any bug found, I’ll report it.
Relevant links: https://bugs.llvm.org/show_bug.cgi?id=43958 , http://lists.llvm.org/pipermail/llvm-dev/2019-November/137242.html
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D93586
2020-12-28 07:58:15 +08:00
; CHECK-NEXT: [[T132:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 3, i32 undef>
; CHECK-NEXT: [[T134:%.*]] = shufflevector <4 x float> [[T132]], <4 x float> [[Y:%.*]], <4 x i32> <i32 0, i32 undef, i32 2, i32 4>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: ret <4 x float> [[T134]]
;
%t4 = extractelement < 4 x float > %x , i32 1
%t2 = extractelement < 4 x float > %x , i32 3
%t1 = extractelement < 4 x float > %y , i32 0
%t128 = insertelement < 4 x float > p o i s o n , float %t4 , i32 0
%t130 = insertelement < 4 x float > %t128 , float undef , i32 1
%t132 = insertelement < 4 x float > %t130 , float %t2 , i32 2
%t134 = insertelement < 4 x float > %t132 , float %t1 , i32 3
ret < 4 x float > %t134
}
; Test fold of two shuffles where the first shuffle vectors inputs are a
; different length then the second.
define < 4 x i8 > @test9 ( < 16 x i8 > %t6 ) {
; CHECK-LABEL: @test9(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[T9:%.*]] = shufflevector <16 x i8> [[T6:%.*]], <16 x i8> poison, <4 x i32> <i32 13, i32 9, i32 4, i32 13>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: ret <4 x i8> [[T9]]
;
2020-12-29 16:02:25 +08:00
%t7 = shufflevector < 16 x i8 > %t6 , < 16 x i8 > p o i s o n , < 4 x i32 > < i32 13 , i32 9 , i32 4 , i32 13 >
%t9 = shufflevector < 4 x i8 > %t7 , < 4 x i8 > p o i s o n , < 4 x i32 > < i32 3 , i32 1 , i32 2 , i32 0 >
2020-12-24 10:41:27 +08:00
ret < 4 x i8 > %t9
}
; Same as test9, but make sure that "undef" mask values are not confused with
; mask values of 2*N, where N is the mask length. These shuffles should not
; be folded (because [8,9,4,8] may not be a mask supported by the target).
define < 4 x i8 > @test9a ( < 16 x i8 > %t6 ) {
; CHECK-LABEL: @test9a(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[T7:%.*]] = shufflevector <16 x i8> [[T6:%.*]], <16 x i8> poison, <4 x i32> <i32 undef, i32 9, i32 4, i32 8>
; CHECK-NEXT: [[T9:%.*]] = shufflevector <4 x i8> [[T7]], <4 x i8> poison, <4 x i32> <i32 3, i32 1, i32 2, i32 undef>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: ret <4 x i8> [[T9]]
;
2020-12-29 16:02:25 +08:00
%t7 = shufflevector < 16 x i8 > %t6 , < 16 x i8 > p o i s o n , < 4 x i32 > < i32 undef , i32 9 , i32 4 , i32 8 >
%t9 = shufflevector < 4 x i8 > %t7 , < 4 x i8 > p o i s o n , < 4 x i32 > < i32 3 , i32 1 , i32 2 , i32 0 >
2020-12-24 10:41:27 +08:00
ret < 4 x i8 > %t9
}
; Test fold of two shuffles where the first shuffle vectors inputs are a
; different length then the second.
define < 4 x i8 > @test9b ( < 4 x i8 > %t6 , < 4 x i8 > %t7 ) {
; CHECK-LABEL: @test9b(
; CHECK-NEXT: [[T9:%.*]] = shufflevector <4 x i8> [[T6:%.*]], <4 x i8> [[T7:%.*]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: ret <4 x i8> [[T9]]
;
%t1 = shufflevector < 4 x i8 > %t6 , < 4 x i8 > %t7 , < 8 x i32 > < i32 0 , i32 1 , i32 4 , i32 5 , i32 4 , i32 5 , i32 2 , i32 3 >
2020-12-29 16:02:25 +08:00
%t9 = shufflevector < 8 x i8 > %t1 , < 8 x i8 > p o i s o n , < 4 x i32 > < i32 0 , i32 1 , i32 4 , i32 5 >
2020-12-24 10:41:27 +08:00
ret < 4 x i8 > %t9
}
; Redundant vector splats should be removed. Radar 8597790.
define < 4 x i32 > @test10 ( < 4 x i32 > %t5 ) {
; CHECK-LABEL: @test10(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[T7:%.*]] = shufflevector <4 x i32> [[T5:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: ret <4 x i32> [[T7]]
;
2020-12-29 16:02:25 +08:00
%t6 = shufflevector < 4 x i32 > %t5 , < 4 x i32 > p o i s o n , < 4 x i32 > < i32 1 , i32 undef , i32 undef , i32 undef >
%t7 = shufflevector < 4 x i32 > %t6 , < 4 x i32 > p o i s o n , < 4 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
ret < 4 x i32 > %t7
}
; Test fold of two shuffles where the two shufflevector inputs's op1 are the same.
define < 8 x i8 > @test11 ( < 16 x i8 > %t6 ) {
; CHECK-LABEL: @test11(
; CHECK-NEXT: [[T3:%.*]] = shufflevector <16 x i8> [[T6:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: ret <8 x i8> [[T3]]
;
2020-12-29 16:02:25 +08:00
%t1 = shufflevector < 16 x i8 > %t6 , < 16 x i8 > p o i s o n , < 4 x i32 > < i32 0 , i32 1 , i32 2 , i32 3 >
%t2 = shufflevector < 16 x i8 > %t6 , < 16 x i8 > p o i s o n , < 4 x i32 > < i32 4 , i32 5 , i32 6 , i32 7 >
2020-12-24 10:41:27 +08:00
%t3 = shufflevector < 4 x i8 > %t1 , < 4 x i8 > %t2 , < 8 x i32 > < i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 >
ret < 8 x i8 > %t3
}
; Test fold of two shuffles where the first shufflevector's inputs are the same as the second.
define < 8 x i8 > @test12 ( < 8 x i8 > %t6 , < 8 x i8 > %t2 ) {
; CHECK-LABEL: @test12(
; CHECK-NEXT: [[T3:%.*]] = shufflevector <8 x i8> [[T6:%.*]], <8 x i8> [[T2:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 9, i32 8, i32 11, i32 12>
; CHECK-NEXT: ret <8 x i8> [[T3]]
;
2020-12-29 16:02:25 +08:00
%t1 = shufflevector < 8 x i8 > %t6 , < 8 x i8 > p o i s o n , < 8 x i32 > < i32 0 , i32 1 , i32 2 , i32 3 , i32 5 , i32 4 , i32 undef , i32 7 >
2020-12-24 10:41:27 +08:00
%t3 = shufflevector < 8 x i8 > %t1 , < 8 x i8 > %t2 , < 8 x i32 > < i32 0 , i32 1 , i32 2 , i32 3 , i32 9 , i32 8 , i32 11 , i32 12 >
ret < 8 x i8 > %t3
}
; Test fold of two shuffles where the first shufflevector's inputs are the same as the second.
define < 8 x i8 > @test12a ( < 8 x i8 > %t6 , < 8 x i8 > %t2 ) {
; CHECK-LABEL: @test12a(
; CHECK-NEXT: [[T3:%.*]] = shufflevector <8 x i8> [[T2:%.*]], <8 x i8> [[T6:%.*]], <8 x i32> <i32 0, i32 3, i32 1, i32 4, i32 8, i32 9, i32 10, i32 11>
; CHECK-NEXT: ret <8 x i8> [[T3]]
;
2020-12-29 16:02:25 +08:00
%t1 = shufflevector < 8 x i8 > %t6 , < 8 x i8 > p o i s o n , < 8 x i32 > < i32 0 , i32 1 , i32 2 , i32 3 , i32 5 , i32 4 , i32 undef , i32 7 >
2020-12-24 10:41:27 +08:00
%t3 = shufflevector < 8 x i8 > %t2 , < 8 x i8 > %t1 , < 8 x i32 > < i32 0 , i32 3 , i32 1 , i32 4 , i32 8 , i32 9 , i32 10 , i32 11 >
ret < 8 x i8 > %t3
}
; The mask length of the 1st shuffle can be reduced to eliminate the 2nd shuffle.
define < 2 x i8 > @extract_subvector_of_shuffle ( < 2 x i8 > %x , < 2 x i8 > %y ) {
; CHECK-LABEL: @extract_subvector_of_shuffle(
; CHECK-NEXT: [[EXTRACT_SUBV:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: ret <2 x i8> [[EXTRACT_SUBV]]
;
%shuf = shufflevector < 2 x i8 > %x , < 2 x i8 > %y , < 3 x i32 > < i32 0 , i32 2 , i32 0 >
2020-12-29 16:02:25 +08:00
%extract_subv = shufflevector < 3 x i8 > %shuf , < 3 x i8 > p o i s o n , < 2 x i32 > < i32 0 , i32 1 >
2020-12-24 10:41:27 +08:00
ret < 2 x i8 > %extract_subv
}
; Undef elements in either mask are ok. Undefs from the 2nd shuffle mask should propagate to the new shuffle.
; The type of the inputs does not have to match the output type.
define < 4 x i8 > @extract_subvector_of_shuffle_undefs_types ( < 2 x i8 > %x , < 2 x i8 > %y ) {
; CHECK-LABEL: @extract_subvector_of_shuffle_undefs_types(
; CHECK-NEXT: [[EXTRACT_SUBV:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <4 x i32> <i32 undef, i32 2, i32 0, i32 undef>
; CHECK-NEXT: ret <4 x i8> [[EXTRACT_SUBV]]
;
%shuf = shufflevector < 2 x i8 > %x , < 2 x i8 > %y , < 5 x i32 > < i32 undef , i32 2 , i32 0 , i32 1 , i32 0 >
2020-12-29 16:02:25 +08:00
%extract_subv = shufflevector < 5 x i8 > %shuf , < 5 x i8 > p o i s o n , < 4 x i32 > < i32 0 , i32 1 , i32 2 , i32 undef >
2020-12-24 10:41:27 +08:00
ret < 4 x i8 > %extract_subv
}
; Extra uses are not ok - we only do the transform when we can eliminate an instruction.
declare void @use_v5i8 ( < 5 x i8 > )
define < 4 x i8 > @extract_subvector_of_shuffle_extra_use ( < 2 x i8 > %x , < 2 x i8 > %y ) {
; CHECK-LABEL: @extract_subvector_of_shuffle_extra_use(
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <5 x i32> <i32 undef, i32 2, i32 0, i32 1, i32 0>
; CHECK-NEXT: call void @use_v5i8(<5 x i8> [[SHUF]])
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[EXTRACT_SUBV:%.*]] = shufflevector <5 x i8> [[SHUF]], <5 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: ret <4 x i8> [[EXTRACT_SUBV]]
;
%shuf = shufflevector < 2 x i8 > %x , < 2 x i8 > %y , < 5 x i32 > < i32 undef , i32 2 , i32 0 , i32 1 , i32 0 >
call void @use_v5i8 ( < 5 x i8 > %shuf )
2020-12-29 16:02:25 +08:00
%extract_subv = shufflevector < 5 x i8 > %shuf , < 5 x i8 > p o i s o n , < 4 x i32 > < i32 0 , i32 1 , i32 2 , i32 undef >
2020-12-24 10:41:27 +08:00
ret < 4 x i8 > %extract_subv
}
define < 2 x i8 > @test13a ( i8 %x1 , i8 %x2 ) {
; CHECK-LABEL: @test13a(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> undef, i8 [[X1:%.*]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i8> [[TMP1]], i8 [[X2:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i8> [[TMP2]], <i8 7, i8 5>
; CHECK-NEXT: ret <2 x i8> [[TMP3]]
;
%A = insertelement < 2 x i8 > p o i s o n , i8 %x1 , i32 0
%B = insertelement < 2 x i8 > %A , i8 %x2 , i32 1
%C = add < 2 x i8 > %B , < i8 5 , i8 7 >
2020-12-29 16:02:25 +08:00
%D = shufflevector < 2 x i8 > %C , < 2 x i8 > p o i s o n , < 2 x i32 > < i32 1 , i32 0 >
2020-12-24 10:41:27 +08:00
ret < 2 x i8 > %D
}
; Increasing length of vector ops is not a good canonicalization.
define < 3 x i32 > @add_wider ( i32 %y , i32 %z ) {
; CHECK-LABEL: @add_wider(
; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i32 0
; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x i32> [[I0]], i32 [[Z:%.*]], i32 1
; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[I1]], <i32 255, i32 255>
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> poison, <3 x i32> <i32 0, i32 1, i32 undef>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: ret <3 x i32> [[EXT]]
;
%i0 = insertelement < 2 x i32 > p o i s o n , i32 %y , i32 0
%i1 = insertelement < 2 x i32 > %i0 , i32 %z , i32 1
%a = add < 2 x i32 > %i1 , < i32 255 , i32 255 >
2020-12-29 16:02:25 +08:00
%ext = shufflevector < 2 x i32 > %a , < 2 x i32 > p o i s o n , < 3 x i32 > < i32 0 , i32 1 , i32 undef >
2020-12-24 10:41:27 +08:00
ret < 3 x i32 > %ext
}
; Increasing length of vector ops must be safe from illegal undef propagation.
define < 3 x i32 > @div_wider ( i32 %y , i32 %z ) {
; CHECK-LABEL: @div_wider(
; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i32 0
; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x i32> [[I0]], i32 [[Z:%.*]], i32 1
; CHECK-NEXT: [[A:%.*]] = sdiv <2 x i32> [[I1]], <i32 255, i32 255>
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> poison, <3 x i32> <i32 0, i32 1, i32 undef>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: ret <3 x i32> [[EXT]]
;
%i0 = insertelement < 2 x i32 > p o i s o n , i32 %y , i32 0
%i1 = insertelement < 2 x i32 > %i0 , i32 %z , i32 1
%a = sdiv < 2 x i32 > %i1 , < i32 255 , i32 255 >
2020-12-29 16:02:25 +08:00
%ext = shufflevector < 2 x i32 > %a , < 2 x i32 > p o i s o n , < 3 x i32 > < i32 0 , i32 1 , i32 undef >
2020-12-24 10:41:27 +08:00
ret < 3 x i32 > %ext
}
; Increasing length of insertelements (no math ops) is a good canonicalization.
define < 3 x i8 > @fold_inselts_with_widening_shuffle ( i8 %x , i8 %y ) {
; CHECK-LABEL: @fold_inselts_with_widening_shuffle(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i8> undef, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <3 x i8> [[TMP1]], i8 [[Y:%.*]], i32 1
; CHECK-NEXT: ret <3 x i8> [[TMP2]]
;
%ins0 = insertelement < 2 x i8 > p o i s o n , i8 %x , i32 0
%ins1 = insertelement < 2 x i8 > %ins0 , i8 %y , i32 1
2020-12-29 16:02:25 +08:00
%widen = shufflevector < 2 x i8 > %ins1 , < 2 x i8 > p o i s o n , < 3 x i32 > < i32 0 , i32 1 , i32 undef >
2020-12-24 10:41:27 +08:00
ret < 3 x i8 > %widen
}
define < 2 x i8 > @test13b ( i8 %x ) {
; CHECK-LABEL: @test13b(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[B:%.*]] = insertelement <2 x i8> poison, i8 [[X:%.*]], i32 1
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: ret <2 x i8> [[B]]
;
%A = insertelement < 2 x i8 > p o i s o n , i8 %x , i32 0
2020-12-29 16:02:25 +08:00
%B = shufflevector < 2 x i8 > %A , < 2 x i8 > p o i s o n , < 2 x i32 > < i32 undef , i32 0 >
2020-12-24 10:41:27 +08:00
ret < 2 x i8 > %B
}
define < 2 x i8 > @test13c ( i8 %x1 , i8 %x2 ) {
; CHECK-LABEL: @test13c(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> undef, i8 [[X1:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i8> [[TMP1]], i8 [[X2:%.*]], i32 1
; CHECK-NEXT: ret <2 x i8> [[TMP2]]
;
%A = insertelement < 4 x i8 > p o i s o n , i8 %x1 , i32 0
%B = insertelement < 4 x i8 > %A , i8 %x2 , i32 2
2020-12-29 16:02:25 +08:00
%C = shufflevector < 4 x i8 > %B , < 4 x i8 > p o i s o n , < 2 x i32 > < i32 0 , i32 2 >
2020-12-24 10:41:27 +08:00
ret < 2 x i8 > %C
}
define void @test14 ( i16 %conv10 ) {
; CHECK-LABEL: @test14(
; CHECK-NEXT: store <4 x i16> <i16 poison, i16 poison, i16 poison, i16 23>, <4 x i16>* undef, align 8
; CHECK-NEXT: ret void
;
%t = alloca < 4 x i16 > , align 8
%vecinit6 = insertelement < 4 x i16 > p o i s o n , i16 23 , i32 3
store < 4 x i16 > %vecinit6 , < 4 x i16 > * undef
%t1 = load < 4 x i16 > , < 4 x i16 > * undef
%vecinit11 = insertelement < 4 x i16 > p o i s o n , i16 %conv10 , i32 3
%div = udiv < 4 x i16 > %t1 , %vecinit11
store < 4 x i16 > %div , < 4 x i16 > * %t
%t4 = load < 4 x i16 > , < 4 x i16 > * %t
2020-12-29 16:02:25 +08:00
%t5 = shufflevector < 4 x i16 > %t4 , < 4 x i16 > p o i s o n , < 2 x i32 > < i32 2 , i32 0 >
2020-12-24 10:41:27 +08:00
%cmp = icmp ule < 2 x i16 > %t5 , undef
%sext = sext < 2 x i1 > %cmp to < 2 x i16 >
ret void
}
; Check that sequences of insert/extract element are
; collapsed into valid shuffle instruction with correct shuffle indexes.
define < 4 x float > @test15a ( < 4 x float > %LHS , < 4 x float > %RHS ) {
; CHECK-LABEL: @test15a(
; CHECK-NEXT: [[T4:%.*]] = shufflevector <4 x float> [[LHS:%.*]], <4 x float> [[RHS:%.*]], <4 x i32> <i32 4, i32 0, i32 6, i32 6>
; CHECK-NEXT: ret <4 x float> [[T4]]
;
%t1 = extractelement < 4 x float > %LHS , i32 0
%t2 = insertelement < 4 x float > %RHS , float %t1 , i32 1
%t3 = extractelement < 4 x float > %RHS , i32 2
%t4 = insertelement < 4 x float > %t2 , float %t3 , i32 3
ret < 4 x float > %t4
}
define < 4 x float > @test15b ( < 4 x float > %LHS , < 4 x float > %RHS ) {
; CHECK-LABEL: @test15b(
; CHECK-NEXT: [[T5:%.*]] = shufflevector <4 x float> [[LHS:%.*]], <4 x float> [[RHS:%.*]], <4 x i32> <i32 4, i32 3, i32 6, i32 6>
; CHECK-NEXT: ret <4 x float> [[T5]]
;
%t0 = extractelement < 4 x float > %LHS , i32 3
%t1 = insertelement < 4 x float > %RHS , float %t0 , i32 0
%t2 = extractelement < 4 x float > %t1 , i32 0
%t3 = insertelement < 4 x float > %RHS , float %t2 , i32 1
%t4 = extractelement < 4 x float > %RHS , i32 2
%t5 = insertelement < 4 x float > %t3 , float %t4 , i32 3
ret < 4 x float > %t5
}
define < 1 x i32 > @test16a ( i32 %ele ) {
; CHECK-LABEL: @test16a(
; CHECK-NEXT: ret <1 x i32> <i32 2>
;
%t0 = insertelement < 2 x i32 > < i32 1 , i32 undef > , i32 %ele , i32 1
%t1 = shl < 2 x i32 > %t0 , < i32 1 , i32 1 >
2020-12-29 16:02:25 +08:00
%t2 = shufflevector < 2 x i32 > %t1 , < 2 x i32 > p o i s o n , < 1 x i32 > < i32 0 >
2020-12-24 10:41:27 +08:00
ret < 1 x i32 > %t2
}
define < 4 x i8 > @test16b ( i8 %ele ) {
; CHECK-LABEL: @test16b(
; CHECK-NEXT: ret <4 x i8> <i8 2, i8 2, i8 2, i8 2>
;
%t0 = insertelement < 8 x i8 > < i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 undef , i8 1 > , i8 %ele , i32 6
%t1 = shl < 8 x i8 > %t0 , < i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 >
2020-12-29 16:02:25 +08:00
%t2 = shufflevector < 8 x i8 > %t1 , < 8 x i8 > p o i s o n , < 4 x i32 > < i32 1 , i32 2 , i32 3 , i32 4 >
2020-12-24 10:41:27 +08:00
ret < 4 x i8 > %t2
}
; If composition of two shuffles is identity, shuffles can be removed.
define < 4 x i32 > @shuffle_17ident ( < 4 x i32 > %v ) {
; CHECK-LABEL: @shuffle_17ident(
; CHECK-NEXT: ret <4 x i32> [[V:%.*]]
;
%shuffle = shufflevector < 4 x i32 > %v , < 4 x i32 > zeroinitializer , < 4 x i32 > < i32 1 , i32 2 , i32 3 , i32 0 >
%shuffle2 = shufflevector < 4 x i32 > %shuffle , < 4 x i32 > zeroinitializer , < 4 x i32 > < i32 3 , i32 0 , i32 1 , i32 2 >
ret < 4 x i32 > %shuffle2
}
; swizzle can be put after operation
define < 4 x i32 > @shuffle_17and ( < 4 x i32 > %v1 , < 4 x i32 > %v2 ) {
; CHECK-LABEL: @shuffle_17and(
; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[V1:%.*]], [[V2:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%t1 = shufflevector < 4 x i32 > %v1 , < 4 x i32 > zeroinitializer , < 4 x i32 > < i32 1 , i32 2 , i32 3 , i32 0 >
%t2 = shufflevector < 4 x i32 > %v2 , < 4 x i32 > zeroinitializer , < 4 x i32 > < i32 1 , i32 2 , i32 3 , i32 0 >
%r = and < 4 x i32 > %t1 , %t2
ret < 4 x i32 > %r
}
declare void @use ( < 2 x float > )
; One extra use is ok to transform.
define < 2 x float > @shuffle_fadd_multiuse ( < 2 x float > %v1 , < 2 x float > %v2 ) {
; CHECK-LABEL: @shuffle_fadd_multiuse(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[T1:%.*]] = shufflevector <2 x float> [[V1:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[V1]], [[V2:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: call void @use(<2 x float> [[T1]])
; CHECK-NEXT: ret <2 x float> [[R]]
;
2020-12-29 16:02:25 +08:00
%t1 = shufflevector < 2 x float > %v1 , < 2 x float > p o i s o n , < 2 x i32 > < i32 1 , i32 0 >
%t2 = shufflevector < 2 x float > %v2 , < 2 x float > p o i s o n , < 2 x i32 > < i32 1 , i32 0 >
2020-12-24 10:41:27 +08:00
%r = fadd < 2 x float > %t1 , %t2
call void @use ( < 2 x float > %t1 )
ret < 2 x float > %r
}
define < 2 x float > @shuffle_fdiv_multiuse ( < 2 x float > %v1 , < 2 x float > %v2 ) {
; CHECK-LABEL: @shuffle_fdiv_multiuse(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[T2:%.*]] = shufflevector <2 x float> [[V2:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[TMP1:%.*]] = fdiv <2 x float> [[V1:%.*]], [[V2]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: call void @use(<2 x float> [[T2]])
; CHECK-NEXT: ret <2 x float> [[R]]
;
2020-12-29 16:02:25 +08:00
%t1 = shufflevector < 2 x float > %v1 , < 2 x float > p o i s o n , < 2 x i32 > < i32 1 , i32 0 >
%t2 = shufflevector < 2 x float > %v2 , < 2 x float > p o i s o n , < 2 x i32 > < i32 1 , i32 0 >
2020-12-24 10:41:27 +08:00
%r = fdiv < 2 x float > %t1 , %t2
call void @use ( < 2 x float > %t2 )
ret < 2 x float > %r
}
; But 2 extra uses would require an extra instruction.
define < 2 x float > @shuffle_fsub_multiuse ( < 2 x float > %v1 , < 2 x float > %v2 ) {
; CHECK-LABEL: @shuffle_fsub_multiuse(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[T1:%.*]] = shufflevector <2 x float> [[V1:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[T2:%.*]] = shufflevector <2 x float> [[V2:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = fsub <2 x float> [[T1]], [[T2]]
; CHECK-NEXT: call void @use(<2 x float> [[T1]])
; CHECK-NEXT: call void @use(<2 x float> [[T2]])
; CHECK-NEXT: ret <2 x float> [[R]]
;
2020-12-29 16:02:25 +08:00
%t1 = shufflevector < 2 x float > %v1 , < 2 x float > p o i s o n , < 2 x i32 > < i32 1 , i32 0 >
%t2 = shufflevector < 2 x float > %v2 , < 2 x float > p o i s o n , < 2 x i32 > < i32 1 , i32 0 >
2020-12-24 10:41:27 +08:00
%r = fsub < 2 x float > %t1 , %t2
call void @use ( < 2 x float > %t1 )
call void @use ( < 2 x float > %t2 )
ret < 2 x float > %r
}
define < 4 x i32 > @shuffle_17add ( < 4 x i32 > %v1 , < 4 x i32 > %v2 ) {
; CHECK-LABEL: @shuffle_17add(
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[V1:%.*]], [[V2:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%t1 = shufflevector < 4 x i32 > %v1 , < 4 x i32 > zeroinitializer , < 4 x i32 > < i32 1 , i32 2 , i32 3 , i32 0 >
%t2 = shufflevector < 4 x i32 > %v2 , < 4 x i32 > zeroinitializer , < 4 x i32 > < i32 1 , i32 2 , i32 3 , i32 0 >
%r = add < 4 x i32 > %t1 , %t2
ret < 4 x i32 > %r
}
define < 4 x i32 > @shuffle_17addnsw ( < 4 x i32 > %v1 , < 4 x i32 > %v2 ) {
; CHECK-LABEL: @shuffle_17addnsw(
; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[V1:%.*]], [[V2:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%t1 = shufflevector < 4 x i32 > %v1 , < 4 x i32 > zeroinitializer , < 4 x i32 > < i32 1 , i32 2 , i32 3 , i32 0 >
%t2 = shufflevector < 4 x i32 > %v2 , < 4 x i32 > zeroinitializer , < 4 x i32 > < i32 1 , i32 2 , i32 3 , i32 0 >
%r = add nsw < 4 x i32 > %t1 , %t2
ret < 4 x i32 > %r
}
define < 4 x i32 > @shuffle_17addnuw ( < 4 x i32 > %v1 , < 4 x i32 > %v2 ) {
; CHECK-LABEL: @shuffle_17addnuw(
; CHECK-NEXT: [[TMP1:%.*]] = add nuw <4 x i32> [[V1:%.*]], [[V2:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%t1 = shufflevector < 4 x i32 > %v1 , < 4 x i32 > zeroinitializer , < 4 x i32 > < i32 1 , i32 2 , i32 3 , i32 0 >
%t2 = shufflevector < 4 x i32 > %v2 , < 4 x i32 > zeroinitializer , < 4 x i32 > < i32 1 , i32 2 , i32 3 , i32 0 >
%r = add nuw < 4 x i32 > %t1 , %t2
ret < 4 x i32 > %r
}
define < 4 x float > @shuffle_17fsub_fast ( < 4 x float > %v1 , < 4 x float > %v2 ) {
; CHECK-LABEL: @shuffle_17fsub_fast(
; CHECK-NEXT: [[TMP1:%.*]] = fsub fast <4 x float> [[V1:%.*]], [[V2:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
; CHECK-NEXT: ret <4 x float> [[R]]
;
%t1 = shufflevector < 4 x float > %v1 , < 4 x float > zeroinitializer , < 4 x i32 > < i32 1 , i32 2 , i32 3 , i32 0 >
%t2 = shufflevector < 4 x float > %v2 , < 4 x float > zeroinitializer , < 4 x i32 > < i32 1 , i32 2 , i32 3 , i32 0 >
%r = fsub fast < 4 x float > %t1 , %t2
ret < 4 x float > %r
}
define < 4 x i32 > @add_const ( < 4 x i32 > %v ) {
; CHECK-LABEL: @add_const(
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[V:%.*]], <i32 44, i32 41, i32 42, i32 43>
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%t1 = shufflevector < 4 x i32 > %v , < 4 x i32 > p o i s o n , < 4 x i32 > < i32 1 , i32 2 , i32 3 , i32 0 >
2020-12-24 10:41:27 +08:00
%r = add < 4 x i32 > %t1 , < i32 41 , i32 42 , i32 43 , i32 44 >
ret < 4 x i32 > %r
}
define < 4 x i32 > @sub_const ( < 4 x i32 > %v ) {
; CHECK-LABEL: @sub_const(
; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> <i32 44, i32 43, i32 42, i32 41>, [[V:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%t1 = shufflevector < 4 x i32 > %v , < 4 x i32 > p o i s o n , < 4 x i32 > < i32 3 , i32 2 , i32 1 , i32 0 >
2020-12-24 10:41:27 +08:00
%r = sub < 4 x i32 > < i32 41 , i32 42 , i32 43 , i32 44 > , %t1
ret < 4 x i32 > %r
}
; Math before shuffle requires an extra shuffle.
define < 2 x float > @fadd_const_multiuse ( < 2 x float > %v ) {
; CHECK-LABEL: @fadd_const_multiuse(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[T1:%.*]] = shufflevector <2 x float> [[V:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = fadd <2 x float> [[T1]], <float 4.100000e+01, float 4.200000e+01>
; CHECK-NEXT: call void @use(<2 x float> [[T1]])
; CHECK-NEXT: ret <2 x float> [[R]]
;
2020-12-29 16:02:25 +08:00
%t1 = shufflevector < 2 x float > %v , < 2 x float > p o i s o n , < 2 x i32 > < i32 1 , i32 0 >
2020-12-24 10:41:27 +08:00
%r = fadd < 2 x float > %t1 , < float 41.0 , float 42.0 >
call void @use ( < 2 x float > %t1 )
ret < 2 x float > %r
}
; Math before splat allows replacing constant elements with undef lanes.
define < 4 x i32 > @mul_const_splat ( < 4 x i32 > %v ) {
; CHECK-LABEL: @mul_const_splat(
[InstCombine] use poison as placeholder for undemanded elems
Currently undef is used as a don’t-care vector when constructing a vector using a series of insertelement.
However, this is problematic because undef isn’t undefined enough.
Especially, a sequence of insertelement can be optimized to shufflevector, but using undef as its placeholder makes shufflevector a poison-blocking instruction because undef cannot be optimized to poison.
This makes a few straightforward optimizations incorrect, such as:
```
; https://bugs.llvm.org/show_bug.cgi?id=44185
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> { 0, 6, 2, undef }
ret <4 x float> %r ; %r[3] is undef
}
=>
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%r = insertelement <4 x float> %y, float %x, i32 1
ret <4 x float> %r ; %r[3] = %y[3], incorrect if %y[3] = poison
}
Transformation doesn't verify!
ERROR: Target is more poisonous than source
```
I’d like to suggest
1. Using poison as insertelement’s placeholder value (IRBuilder::CreateVectorSplat should be patched too)
2. Updating shufflevector’s semantics to return poison element if mask is undef
Note that poison is currently lowered into UNDEF in SelDag, so codegen part is okay.
m_Undef() matches PoisonValue as well, so existing optimizations will still fire.
The only concern is hidden miscompilations that will go incorrect when poison constant is given.
A conservative way is copying all tests having `insertelement undef` & replacing it with `insertelement poison` & run Alive2 on it, but it will create many tests and people won’t like it. :(
Instead, I’ll simply locally maintain the tests and run Alive2.
If there is any bug found, I’ll report it.
Relevant links: https://bugs.llvm.org/show_bug.cgi?id=43958 , http://lists.llvm.org/pipermail/llvm-dev/2019-November/137242.html
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D93586
2020-12-28 07:58:15 +08:00
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[V:%.*]], <i32 poison, i32 42, i32 poison, i32 poison>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%t1 = shufflevector < 4 x i32 > %v , < 4 x i32 > p o i s o n , < 4 x i32 > < i32 1 , i32 1 , i32 1 , i32 1 >
2020-12-24 10:41:27 +08:00
%r = mul < 4 x i32 > < i32 42 , i32 42 , i32 42 , i32 42 > , %t1
ret < 4 x i32 > %r
}
; Take 2 elements of a vector and shift each of those by a different amount
define < 4 x i32 > @lshr_const_half_splat ( < 4 x i32 > %v ) {
; CHECK-LABEL: @lshr_const_half_splat(
; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> <i32 undef, i32 8, i32 9, i32 undef>, [[V:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%t1 = shufflevector < 4 x i32 > %v , < 4 x i32 > p o i s o n , < 4 x i32 > < i32 1 , i32 1 , i32 2 , i32 2 >
2020-12-24 10:41:27 +08:00
%r = lshr < 4 x i32 > < i32 8 , i32 8 , i32 9 , i32 9 > , %t1
ret < 4 x i32 > %r
}
; We can't change this because there's no pre-shuffle version of the fmul constant.
define < 2 x float > @fmul_const_invalid_constant ( < 2 x float > %v ) {
; CHECK-LABEL: @fmul_const_invalid_constant(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[T1:%.*]] = shufflevector <2 x float> [[V:%.*]], <2 x float> poison, <2 x i32> zeroinitializer
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = fmul <2 x float> [[T1]], <float 4.100000e+01, float 4.200000e+01>
; CHECK-NEXT: ret <2 x float> [[R]]
;
2020-12-29 16:02:25 +08:00
%t1 = shufflevector < 2 x float > %v , < 2 x float > p o i s o n , < 2 x i32 > < i32 0 , i32 0 >
2020-12-24 10:41:27 +08:00
%r = fmul < 2 x float > %t1 , < float 41.0 , float 42.0 >
ret < 2 x float > %r
}
; Reduce the width of the binop by moving it ahead of a shuffle.
define < 4 x i8 > @widening_shuffle_add_1 ( < 2 x i8 > %x ) {
; CHECK-LABEL: @widening_shuffle_add_1(
; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], <i8 42, i8 43>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: ret <4 x i8> [[R]]
;
2020-12-29 16:02:25 +08:00
%widex = shufflevector < 2 x i8 > %x , < 2 x i8 > p o i s o n , < 4 x i32 > < i32 0 , i32 1 , i32 undef , i32 undef >
2020-12-24 10:41:27 +08:00
%r = add < 4 x i8 > %widex , < i8 42 , i8 43 , i8 44 , i8 45 >
ret < 4 x i8 > %r
}
; Reduce the width of the binop by moving it ahead of a shuffle.
define < 4 x i8 > @widening_shuffle_add_2 ( < 2 x i8 > %x ) {
; CHECK-LABEL: @widening_shuffle_add_2(
; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], <i8 43, i8 42>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
; CHECK-NEXT: ret <4 x i8> [[R]]
;
2020-12-29 16:02:25 +08:00
%widex = shufflevector < 2 x i8 > %x , < 2 x i8 > p o i s o n , < 4 x i32 > < i32 1 , i32 0 , i32 undef , i32 undef >
2020-12-24 10:41:27 +08:00
%r = add < 4 x i8 > %widex , < i8 42 , i8 43 , i8 44 , i8 45 >
ret < 4 x i8 > %r
}
; Negative test - widening shuffles have the same mask/constant constraint as non-size-changing shuffles.
define < 4 x i8 > @widening_shuffle_add_invalid_constant ( < 2 x i8 > %x ) {
; CHECK-LABEL: @widening_shuffle_add_invalid_constant(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[WIDEX:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> poison, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = add <4 x i8> [[WIDEX]], <i8 42, i8 43, i8 44, i8 45>
; CHECK-NEXT: ret <4 x i8> [[R]]
;
2020-12-29 16:02:25 +08:00
%widex = shufflevector < 2 x i8 > %x , < 2 x i8 > p o i s o n , < 4 x i32 > < i32 1 , i32 1 , i32 undef , i32 undef >
2020-12-24 10:41:27 +08:00
%r = add < 4 x i8 > %widex , < i8 42 , i8 43 , i8 44 , i8 45 >
ret < 4 x i8 > %r
}
; Negative test - widening shuffles have an additional constraint: they must not extend with anything but undefs.
define < 4 x i8 > @widening_shuffle_add_invalid_mask ( < 2 x i8 > %x ) {
; CHECK-LABEL: @widening_shuffle_add_invalid_mask(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[WIDEX:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 0>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = add <4 x i8> [[WIDEX]], <i8 42, i8 43, i8 44, i8 45>
; CHECK-NEXT: ret <4 x i8> [[R]]
;
2020-12-29 16:02:25 +08:00
%widex = shufflevector < 2 x i8 > %x , < 2 x i8 > p o i s o n , < 4 x i32 > < i32 0 , i32 1 , i32 undef , i32 0 >
2020-12-24 10:41:27 +08:00
%r = add < 4 x i8 > %widex , < i8 42 , i8 43 , i8 44 , i8 45 >
ret < 4 x i8 > %r
}
; A binop that produces undef in the high lanes can be moved before the shuffle.
; This is ok because 'shl C, undef --> undef'.
define < 4 x i16 > @widening_shuffle_shl_constant_op0 ( < 2 x i16 > %v ) {
; CHECK-LABEL: @widening_shuffle_shl_constant_op0(
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i16> <i16 42, i16 -42>, [[V:%.*]]
; CHECK-NEXT: [[BO:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: ret <4 x i16> [[BO]]
;
2020-12-29 16:02:25 +08:00
%shuf = shufflevector < 2 x i16 > %v , < 2 x i16 > p o i s o n , < 4 x i32 > < i32 0 , i32 1 , i32 undef , i32 undef >
2020-12-24 10:41:27 +08:00
%bo = shl < 4 x i16 > < i16 42 , i16 -42 , i16 -1 , i16 -1 > , %shuf
ret < 4 x i16 > %bo
}
; A binop that produces undef in the high lanes can be moved before the shuffle.
; This is ok because 'shl undef, 0 --> undef'.
define < 4 x i16 > @widening_shuffle_shl_constant_op1 ( < 2 x i16 > %v ) {
; CHECK-LABEL: @widening_shuffle_shl_constant_op1(
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i16> [[V:%.*]], <i16 2, i16 4>
; CHECK-NEXT: [[BO:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: ret <4 x i16> [[BO]]
;
2020-12-29 16:02:25 +08:00
%shuf = shufflevector < 2 x i16 > %v , < 2 x i16 > p o i s o n , < 4 x i32 > < i32 0 , i32 1 , i32 undef , i32 undef >
2020-12-24 10:41:27 +08:00
%bo = shl < 4 x i16 > %shuf , < i16 2 , i16 4 , i16 0 , i16 0 >
ret < 4 x i16 > %bo
}
; A binop that does not produce undef in the high lanes can not be moved before the shuffle.
; This is not ok because 'shl undef, 1 (or 2)' --> 0' but moving the shuffle results in undef instead.
define < 4 x i16 > @widening_shuffle_shl_constant_op1_non0 ( < 2 x i16 > %v ) {
; CHECK-LABEL: @widening_shuffle_shl_constant_op1_non0(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i16> [[V:%.*]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[BO:%.*]] = shl <4 x i16> [[SHUF]], <i16 2, i16 4, i16 1, i16 2>
; CHECK-NEXT: ret <4 x i16> [[BO]]
;
2020-12-29 16:02:25 +08:00
%shuf = shufflevector < 2 x i16 > %v , < 2 x i16 > p o i s o n , < 4 x i32 > < i32 0 , i32 1 , i32 undef , i32 undef >
2020-12-24 10:41:27 +08:00
%bo = shl < 4 x i16 > %shuf , < i16 2 , i16 4 , i16 1 , i16 2 >
ret < 4 x i16 > %bo
}
; A binop that does not produce undef in the high lanes can not be moved before the shuffle.
; This is not ok because 'or -1, undef --> -1' but moving the shuffle results in undef instead.
define < 4 x i16 > @widening_shuffle_or ( < 2 x i16 > %v ) {
; CHECK-LABEL: @widening_shuffle_or(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i16> [[V:%.*]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[BO:%.*]] = or <4 x i16> [[SHUF]], <i16 42, i16 -42, i16 -1, i16 -1>
; CHECK-NEXT: ret <4 x i16> [[BO]]
;
2020-12-29 16:02:25 +08:00
%shuf = shufflevector < 2 x i16 > %v , < 2 x i16 > p o i s o n , < 4 x i32 > < i32 0 , i32 1 , i32 undef , i32 undef >
2020-12-24 10:41:27 +08:00
%bo = or < 4 x i16 > %shuf , < i16 42 , i16 -42 , i16 -1 , i16 -1 >
ret < 4 x i16 > %bo
}
define < 4 x i32 > @shuffle_17add2 ( < 4 x i32 > %v ) {
; CHECK-LABEL: @shuffle_17add2(
; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
;
%t1 = shufflevector < 4 x i32 > %v , < 4 x i32 > zeroinitializer , < 4 x i32 > < i32 3 , i32 2 , i32 1 , i32 0 >
%t2 = add < 4 x i32 > %t1 , %t1
%r = shufflevector < 4 x i32 > %t2 , < 4 x i32 > zeroinitializer , < 4 x i32 > < i32 3 , i32 2 , i32 1 , i32 0 >
ret < 4 x i32 > %r
}
define < 4 x i32 > @shuffle_17mulsplat ( < 4 x i32 > %v ) {
; CHECK-LABEL: @shuffle_17mulsplat(
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[V:%.*]], [[V]]
; CHECK-NEXT: [[M1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: ret <4 x i32> [[M1]]
;
%s1 = shufflevector < 4 x i32 > %v , < 4 x i32 > zeroinitializer , < 4 x i32 > zeroinitializer
%m1 = mul < 4 x i32 > %s1 , %s1
%s2 = shufflevector < 4 x i32 > %m1 , < 4 x i32 > zeroinitializer , < 4 x i32 > < i32 1 , i32 1 , i32 1 , i32 1 >
ret < 4 x i32 > %s2
}
; Do not reorder shuffle and binop if LHS of shuffles are of different size
define < 2 x i32 > @pr19717 ( < 4 x i32 > %in0 , < 2 x i32 > %in1 ) {
; CHECK-LABEL: @pr19717(
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[IN0:%.*]], <4 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[SHUFFLE4:%.*]] = shufflevector <2 x i32> [[IN1:%.*]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[SHUFFLE]], [[SHUFFLE4]]
; CHECK-NEXT: ret <2 x i32> [[MUL]]
;
%shuffle = shufflevector < 4 x i32 > %in0 , < 4 x i32 > %in0 , < 2 x i32 > zeroinitializer
%shuffle4 = shufflevector < 2 x i32 > %in1 , < 2 x i32 > %in1 , < 2 x i32 > zeroinitializer
%mul = mul < 2 x i32 > %shuffle , %shuffle4
ret < 2 x i32 > %mul
}
define < 4 x i16 > @pr19717a ( < 8 x i16 > %in0 , < 8 x i16 > %in1 ) {
; CHECK-LABEL: @pr19717a(
; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i16> [[IN0:%.*]], [[IN1:%.*]]
; CHECK-NEXT: [[MUL:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
; CHECK-NEXT: ret <4 x i16> [[MUL]]
;
%shuffle = shufflevector < 8 x i16 > %in0 , < 8 x i16 > %in0 , < 4 x i32 > < i32 5 , i32 5 , i32 5 , i32 5 >
%shuffle1 = shufflevector < 8 x i16 > %in1 , < 8 x i16 > %in1 , < 4 x i32 > < i32 5 , i32 5 , i32 5 , i32 5 >
%mul = mul < 4 x i16 > %shuffle , %shuffle1
ret < 4 x i16 > %mul
}
define < 8 x i8 > @pr19730 ( < 16 x i8 > %in0 ) {
; CHECK-LABEL: @pr19730(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i8> [[IN0:%.*]], <16 x i8> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x i8> [[SHUFFLE]], <8 x i8> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: ret <8 x i8> [[SHUFFLE1]]
;
2020-12-29 16:02:25 +08:00
%shuffle = shufflevector < 16 x i8 > %in0 , < 16 x i8 > p o i s o n , < 8 x i32 > < i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
%shuffle1 = shufflevector < 8 x i8 > %shuffle , < 8 x i8 > p o i s o n , < 8 x i32 > < i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
2020-12-24 10:41:27 +08:00
ret < 8 x i8 > %shuffle1
}
define i32 @pr19737 ( < 4 x i32 > %in0 ) {
; CHECK-LABEL: @pr19737(
; CHECK-NEXT: [[RV:%.*]] = extractelement <4 x i32> [[IN0:%.*]], i32 0
; CHECK-NEXT: ret i32 [[RV]]
;
%shuffle.i = shufflevector < 4 x i32 > zeroinitializer , < 4 x i32 > %in0 , < 4 x i32 > < i32 0 , i32 4 , i32 2 , i32 6 >
%neg.i = xor < 4 x i32 > %shuffle.i , < i32 -1 , i32 -1 , i32 -1 , i32 -1 >
%and.i = and < 4 x i32 > %in0 , %neg.i
%rv = extractelement < 4 x i32 > %and.i , i32 0
ret i32 %rv
}
; In PR20059 ( http://llvm.org/pr20059 ), shufflevector operations are reordered/removed
; for an srem operation. This is not a valid optimization because it may cause a trap
; on div-by-zero.
define < 4 x i32 > @pr20059 ( < 4 x i32 > %p1 , < 4 x i32 > %p2 ) {
; CHECK-LABEL: @pr20059(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[P1:%.*]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[P2:%.*]], <4 x i32> poison, <4 x i32> zeroinitializer
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[RETVAL:%.*]] = srem <4 x i32> [[SPLAT1]], [[SPLAT2]]
; CHECK-NEXT: ret <4 x i32> [[RETVAL]]
;
2020-12-29 16:02:25 +08:00
%splat1 = shufflevector < 4 x i32 > %p1 , < 4 x i32 > p o i s o n , < 4 x i32 > zeroinitializer
%splat2 = shufflevector < 4 x i32 > %p2 , < 4 x i32 > p o i s o n , < 4 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%retval = srem < 4 x i32 > %splat1 , %splat2
ret < 4 x i32 > %retval
}
define < 4 x i32 > @pr20114 ( < 4 x i32 > %__mask ) {
; CHECK-LABEL: @pr20114(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[MASK01_I:%.*]] = shufflevector <4 x i32> [[__MASK:%.*]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[MASKED_NEW_I_I_I:%.*]] = and <4 x i32> [[MASK01_I]], bitcast (<2 x i64> <i64 ptrtoint (<4 x i32> (<4 x i32>)* @pr20114 to i64), i64 ptrtoint (<4 x i32> (<4 x i32>)* @pr20114 to i64)> to <4 x i32>)
; CHECK-NEXT: ret <4 x i32> [[MASKED_NEW_I_I_I]]
;
2020-12-29 16:02:25 +08:00
%mask01.i = shufflevector < 4 x i32 > %__mask , < 4 x i32 > p o i s o n , < 4 x i32 > < i32 0 , i32 0 , i32 1 , i32 1 >
2020-12-24 10:41:27 +08:00
%masked_new.i.i.i = and < 4 x i32 > bitcast ( < 2 x i64 > < i64 ptrtoint ( < 4 x i32 > ( < 4 x i32 > ) * @pr20114 to i64 ) , i64 ptrtoint ( < 4 x i32 > ( < 4 x i32 > ) * @pr20114 to i64 ) > to < 4 x i32 > ) , %mask01.i
ret < 4 x i32 > %masked_new.i.i.i
}
define < 2 x i32 * > @pr23113 ( < 4 x i32 * > %A ) {
; CHECK-LABEL: @pr23113(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32*> [[A:%.*]], <4 x i32*> poison, <2 x i32> <i32 0, i32 1>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: ret <2 x i32*> [[TMP1]]
;
2020-12-29 16:02:25 +08:00
%1 = shufflevector < 4 x i32 * > %A , < 4 x i32 * > p o i s o n , < 2 x i32 > < i32 0 , i32 1 >
2020-12-24 10:41:27 +08:00
ret < 2 x i32 * > %1
}
; Unused lanes in the new binop should not kill the entire op (although it may simplify anyway as shown here).
define < 2 x i32 > @PR37648 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @PR37648(
; CHECK-NEXT: ret <2 x i32> zeroinitializer
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = urem < 2 x i32 > %splat , < i32 1 , i32 1 >
ret < 2 x i32 > %r
}
; Test shuffle followed by binop with splat constant for all 18 binop opcodes.
; Test with constant as operand 0 and operand 1 for non-commutative opcodes.
define < 2 x i32 > @add_splat_constant ( < 2 x i32 > %x ) {
; CHECK-LABEL: @add_splat_constant(
[InstCombine] use poison as placeholder for undemanded elems
Currently undef is used as a don’t-care vector when constructing a vector using a series of insertelement.
However, this is problematic because undef isn’t undefined enough.
Especially, a sequence of insertelement can be optimized to shufflevector, but using undef as its placeholder makes shufflevector a poison-blocking instruction because undef cannot be optimized to poison.
This makes a few straightforward optimizations incorrect, such as:
```
; https://bugs.llvm.org/show_bug.cgi?id=44185
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> { 0, 6, 2, undef }
ret <4 x float> %r ; %r[3] is undef
}
=>
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%r = insertelement <4 x float> %y, float %x, i32 1
ret <4 x float> %r ; %r[3] = %y[3], incorrect if %y[3] = poison
}
Transformation doesn't verify!
ERROR: Target is more poisonous than source
```
I’d like to suggest
1. Using poison as insertelement’s placeholder value (IRBuilder::CreateVectorSplat should be patched too)
2. Updating shufflevector’s semantics to return poison element if mask is undef
Note that poison is currently lowered into UNDEF in SelDag, so codegen part is okay.
m_Undef() matches PoisonValue as well, so existing optimizations will still fire.
The only concern is hidden miscompilations that will go incorrect when poison constant is given.
A conservative way is copying all tests having `insertelement undef` & replacing it with `insertelement poison` & run Alive2 on it, but it will create many tests and people won’t like it. :(
Instead, I’ll simply locally maintain the tests and run Alive2.
If there is any bug found, I’ll report it.
Relevant links: https://bugs.llvm.org/show_bug.cgi?id=43958 , http://lists.llvm.org/pipermail/llvm-dev/2019-November/137242.html
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D93586
2020-12-28 07:58:15 +08:00
; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 42, i32 poison>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = add < 2 x i32 > %splat , < i32 42 , i32 42 >
ret < 2 x i32 > %r
}
define < 2 x i32 > @sub_splat_constant0 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @sub_splat_constant0(
[InstCombine] use poison as placeholder for undemanded elems
Currently undef is used as a don’t-care vector when constructing a vector using a series of insertelement.
However, this is problematic because undef isn’t undefined enough.
Especially, a sequence of insertelement can be optimized to shufflevector, but using undef as its placeholder makes shufflevector a poison-blocking instruction because undef cannot be optimized to poison.
This makes a few straightforward optimizations incorrect, such as:
```
; https://bugs.llvm.org/show_bug.cgi?id=44185
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> { 0, 6, 2, undef }
ret <4 x float> %r ; %r[3] is undef
}
=>
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%r = insertelement <4 x float> %y, float %x, i32 1
ret <4 x float> %r ; %r[3] = %y[3], incorrect if %y[3] = poison
}
Transformation doesn't verify!
ERROR: Target is more poisonous than source
```
I’d like to suggest
1. Using poison as insertelement’s placeholder value (IRBuilder::CreateVectorSplat should be patched too)
2. Updating shufflevector’s semantics to return poison element if mask is undef
Note that poison is currently lowered into UNDEF in SelDag, so codegen part is okay.
m_Undef() matches PoisonValue as well, so existing optimizations will still fire.
The only concern is hidden miscompilations that will go incorrect when poison constant is given.
A conservative way is copying all tests having `insertelement undef` & replacing it with `insertelement poison` & run Alive2 on it, but it will create many tests and people won’t like it. :(
Instead, I’ll simply locally maintain the tests and run Alive2.
If there is any bug found, I’ll report it.
Relevant links: https://bugs.llvm.org/show_bug.cgi?id=43958 , http://lists.llvm.org/pipermail/llvm-dev/2019-November/137242.html
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D93586
2020-12-28 07:58:15 +08:00
; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> <i32 42, i32 poison>, [[X:%.*]]
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = sub < 2 x i32 > < i32 42 , i32 42 > , %splat
ret < 2 x i32 > %r
}
define < 2 x i32 > @sub_splat_constant1 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @sub_splat_constant1(
[InstCombine] use poison as placeholder for undemanded elems
Currently undef is used as a don’t-care vector when constructing a vector using a series of insertelement.
However, this is problematic because undef isn’t undefined enough.
Especially, a sequence of insertelement can be optimized to shufflevector, but using undef as its placeholder makes shufflevector a poison-blocking instruction because undef cannot be optimized to poison.
This makes a few straightforward optimizations incorrect, such as:
```
; https://bugs.llvm.org/show_bug.cgi?id=44185
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> { 0, 6, 2, undef }
ret <4 x float> %r ; %r[3] is undef
}
=>
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%r = insertelement <4 x float> %y, float %x, i32 1
ret <4 x float> %r ; %r[3] = %y[3], incorrect if %y[3] = poison
}
Transformation doesn't verify!
ERROR: Target is more poisonous than source
```
I’d like to suggest
1. Using poison as insertelement’s placeholder value (IRBuilder::CreateVectorSplat should be patched too)
2. Updating shufflevector’s semantics to return poison element if mask is undef
Note that poison is currently lowered into UNDEF in SelDag, so codegen part is okay.
m_Undef() matches PoisonValue as well, so existing optimizations will still fire.
The only concern is hidden miscompilations that will go incorrect when poison constant is given.
A conservative way is copying all tests having `insertelement undef` & replacing it with `insertelement poison` & run Alive2 on it, but it will create many tests and people won’t like it. :(
Instead, I’ll simply locally maintain the tests and run Alive2.
If there is any bug found, I’ll report it.
Relevant links: https://bugs.llvm.org/show_bug.cgi?id=43958 , http://lists.llvm.org/pipermail/llvm-dev/2019-November/137242.html
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D93586
2020-12-28 07:58:15 +08:00
; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 -42, i32 poison>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = sub < 2 x i32 > %splat , < i32 42 , i32 42 >
ret < 2 x i32 > %r
}
define < 2 x i32 > @mul_splat_constant ( < 2 x i32 > %x ) {
; CHECK-LABEL: @mul_splat_constant(
[InstCombine] use poison as placeholder for undemanded elems
Currently undef is used as a don’t-care vector when constructing a vector using a series of insertelement.
However, this is problematic because undef isn’t undefined enough.
Especially, a sequence of insertelement can be optimized to shufflevector, but using undef as its placeholder makes shufflevector a poison-blocking instruction because undef cannot be optimized to poison.
This makes a few straightforward optimizations incorrect, such as:
```
; https://bugs.llvm.org/show_bug.cgi?id=44185
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> { 0, 6, 2, undef }
ret <4 x float> %r ; %r[3] is undef
}
=>
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%r = insertelement <4 x float> %y, float %x, i32 1
ret <4 x float> %r ; %r[3] = %y[3], incorrect if %y[3] = poison
}
Transformation doesn't verify!
ERROR: Target is more poisonous than source
```
I’d like to suggest
1. Using poison as insertelement’s placeholder value (IRBuilder::CreateVectorSplat should be patched too)
2. Updating shufflevector’s semantics to return poison element if mask is undef
Note that poison is currently lowered into UNDEF in SelDag, so codegen part is okay.
m_Undef() matches PoisonValue as well, so existing optimizations will still fire.
The only concern is hidden miscompilations that will go incorrect when poison constant is given.
A conservative way is copying all tests having `insertelement undef` & replacing it with `insertelement poison` & run Alive2 on it, but it will create many tests and people won’t like it. :(
Instead, I’ll simply locally maintain the tests and run Alive2.
If there is any bug found, I’ll report it.
Relevant links: https://bugs.llvm.org/show_bug.cgi?id=43958 , http://lists.llvm.org/pipermail/llvm-dev/2019-November/137242.html
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D93586
2020-12-28 07:58:15 +08:00
; CHECK-NEXT: [[TMP1:%.*]] = mul <2 x i32> [[X:%.*]], <i32 42, i32 poison>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = mul < 2 x i32 > %splat , < i32 42 , i32 42 >
ret < 2 x i32 > %r
}
define < 2 x i32 > @shl_splat_constant0 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @shl_splat_constant0(
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> <i32 5, i32 undef>, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = shl < 2 x i32 > < i32 5 , i32 5 > , %splat
ret < 2 x i32 > %r
}
define < 2 x i32 > @shl_splat_constant1 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @shl_splat_constant1(
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], <i32 5, i32 0>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = shl < 2 x i32 > %splat , < i32 5 , i32 5 >
ret < 2 x i32 > %r
}
define < 2 x i32 > @ashr_splat_constant0 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @ashr_splat_constant0(
; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> <i32 5, i32 undef>, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = ashr < 2 x i32 > < i32 5 , i32 5 > , %splat
ret < 2 x i32 > %r
}
define < 2 x i32 > @ashr_splat_constant1 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @ashr_splat_constant1(
; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 5, i32 0>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = ashr < 2 x i32 > %splat , < i32 5 , i32 5 >
ret < 2 x i32 > %r
}
define < 2 x i32 > @lshr_splat_constant0 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @lshr_splat_constant0(
; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> <i32 5, i32 undef>, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = lshr < 2 x i32 > < i32 5 , i32 5 > , %splat
ret < 2 x i32 > %r
}
define < 2 x i32 > @lshr_splat_constant1 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @lshr_splat_constant1(
; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 5, i32 0>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = lshr < 2 x i32 > %splat , < i32 5 , i32 5 >
ret < 2 x i32 > %r
}
define < 2 x i32 > @urem_splat_constant0 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @urem_splat_constant0(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = urem <2 x i32> <i32 42, i32 42>, [[SPLAT]]
; CHECK-NEXT: ret <2 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = urem < 2 x i32 > < i32 42 , i32 42 > , %splat
ret < 2 x i32 > %r
}
define < 2 x i32 > @urem_splat_constant1 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @urem_splat_constant1(
; CHECK-NEXT: [[TMP1:%.*]] = urem <2 x i32> [[X:%.*]], <i32 42, i32 1>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = urem < 2 x i32 > %splat , < i32 42 , i32 42 >
ret < 2 x i32 > %r
}
define < 2 x i32 > @srem_splat_constant0 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @srem_splat_constant0(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = srem <2 x i32> <i32 42, i32 42>, [[SPLAT]]
; CHECK-NEXT: ret <2 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = srem < 2 x i32 > < i32 42 , i32 42 > , %splat
ret < 2 x i32 > %r
}
define < 2 x i32 > @srem_splat_constant1 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @srem_splat_constant1(
; CHECK-NEXT: [[TMP1:%.*]] = srem <2 x i32> [[X:%.*]], <i32 42, i32 1>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = srem < 2 x i32 > %splat , < i32 42 , i32 42 >
ret < 2 x i32 > %r
}
define < 2 x i32 > @udiv_splat_constant0 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @udiv_splat_constant0(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = udiv <2 x i32> <i32 42, i32 42>, [[SPLAT]]
; CHECK-NEXT: ret <2 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = udiv < 2 x i32 > < i32 42 , i32 42 > , %splat
ret < 2 x i32 > %r
}
define < 2 x i32 > @udiv_splat_constant1 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @udiv_splat_constant1(
; CHECK-NEXT: [[TMP1:%.*]] = udiv <2 x i32> [[X:%.*]], <i32 42, i32 1>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = udiv < 2 x i32 > %splat , < i32 42 , i32 42 >
ret < 2 x i32 > %r
}
define < 2 x i32 > @sdiv_splat_constant0 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @sdiv_splat_constant0(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = sdiv <2 x i32> <i32 42, i32 42>, [[SPLAT]]
; CHECK-NEXT: ret <2 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = sdiv < 2 x i32 > < i32 42 , i32 42 > , %splat
ret < 2 x i32 > %r
}
define < 2 x i32 > @sdiv_splat_constant1 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @sdiv_splat_constant1(
; CHECK-NEXT: [[TMP1:%.*]] = sdiv <2 x i32> [[X:%.*]], <i32 42, i32 1>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = sdiv < 2 x i32 > %splat , < i32 42 , i32 42 >
ret < 2 x i32 > %r
}
define < 2 x i32 > @and_splat_constant ( < 2 x i32 > %x ) {
; CHECK-LABEL: @and_splat_constant(
[InstCombine] use poison as placeholder for undemanded elems
Currently undef is used as a don’t-care vector when constructing a vector using a series of insertelement.
However, this is problematic because undef isn’t undefined enough.
Especially, a sequence of insertelement can be optimized to shufflevector, but using undef as its placeholder makes shufflevector a poison-blocking instruction because undef cannot be optimized to poison.
This makes a few straightforward optimizations incorrect, such as:
```
; https://bugs.llvm.org/show_bug.cgi?id=44185
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> { 0, 6, 2, undef }
ret <4 x float> %r ; %r[3] is undef
}
=>
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%r = insertelement <4 x float> %y, float %x, i32 1
ret <4 x float> %r ; %r[3] = %y[3], incorrect if %y[3] = poison
}
Transformation doesn't verify!
ERROR: Target is more poisonous than source
```
I’d like to suggest
1. Using poison as insertelement’s placeholder value (IRBuilder::CreateVectorSplat should be patched too)
2. Updating shufflevector’s semantics to return poison element if mask is undef
Note that poison is currently lowered into UNDEF in SelDag, so codegen part is okay.
m_Undef() matches PoisonValue as well, so existing optimizations will still fire.
The only concern is hidden miscompilations that will go incorrect when poison constant is given.
A conservative way is copying all tests having `insertelement undef` & replacing it with `insertelement poison` & run Alive2 on it, but it will create many tests and people won’t like it. :(
Instead, I’ll simply locally maintain the tests and run Alive2.
If there is any bug found, I’ll report it.
Relevant links: https://bugs.llvm.org/show_bug.cgi?id=43958 , http://lists.llvm.org/pipermail/llvm-dev/2019-November/137242.html
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D93586
2020-12-28 07:58:15 +08:00
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 42, i32 poison>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = and < 2 x i32 > %splat , < i32 42 , i32 42 >
ret < 2 x i32 > %r
}
; AND does not fold to undef for undef operands, we cannot move it
; across a shuffle with undef masks.
define < 4 x i16 > @and_constant_mask_undef ( < 4 x i16 > %add ) {
; CHECK-LABEL: @and_constant_mask_undef(
; CHECK-NEXT: entry:
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> poison, <4 x i32> <i32 undef, i32 undef, i32 1, i32 1>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[AND:%.*]] = and <4 x i16> [[SHUFFLE]], <i16 0, i16 0, i16 -1, i16 -1>
; CHECK-NEXT: ret <4 x i16> [[AND]]
;
entry:
2020-12-29 16:02:25 +08:00
%shuffle = shufflevector < 4 x i16 > %add , < 4 x i16 > p o i s o n , < 4 x i32 > < i32 undef , i32 undef , i32 1 , i32 1 >
2020-12-24 10:41:27 +08:00
%and = and < 4 x i16 > %shuffle , < i16 0 , i16 0 , i16 -1 , i16 -1 >
ret < 4 x i16 > %and
}
; AND does not fold to undef for undef operands, we cannot move it
; across a shuffle with undef masks.
define < 4 x i16 > @and_constant_mask_undef_2 ( < 4 x i16 > %add ) {
; CHECK-LABEL: @and_constant_mask_undef_2(
; CHECK-NEXT: entry:
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 undef>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[AND:%.*]] = and <4 x i16> [[SHUFFLE]], <i16 -1, i16 -1, i16 -1, i16 0>
; CHECK-NEXT: ret <4 x i16> [[AND]]
;
entry:
2020-12-29 16:02:25 +08:00
%shuffle = shufflevector < 4 x i16 > %add , < 4 x i16 > p o i s o n , < 4 x i32 > < i32 1 , i32 1 , i32 1 , i32 undef >
2020-12-24 10:41:27 +08:00
%and = and < 4 x i16 > %shuffle , < i16 -1 , i16 -1 , i16 -1 , i16 -0 >
ret < 4 x i16 > %and
}
; We can move the AND across the shuffle, as -1 (AND identity value) is used for undef lanes.
define < 4 x i16 > @and_constant_mask_undef_3 ( < 4 x i16 > %add ) {
; CHECK-LABEL: @and_constant_mask_undef_3(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <4 x i16> <i16 0, i16 0, i16 0, i16 undef>
;
entry:
2020-12-29 16:02:25 +08:00
%shuffle = shufflevector < 4 x i16 > %add , < 4 x i16 > p o i s o n , < 4 x i32 > < i32 0 , i32 1 , i32 1 , i32 undef >
2020-12-24 10:41:27 +08:00
%and = and < 4 x i16 > %shuffle , < i16 0 , i16 0 , i16 0 , i16 -1 >
ret < 4 x i16 > %and
}
; We can move the AND across the shuffle, as -1 (AND identity value) is used for undef lanes.
define < 4 x i16 > @and_constant_mask_undef_4 ( < 4 x i16 > %add ) {
; CHECK-LABEL: @and_constant_mask_undef_4(
; CHECK-NEXT: entry:
[InstCombine] use poison as placeholder for undemanded elems
Currently undef is used as a don’t-care vector when constructing a vector using a series of insertelement.
However, this is problematic because undef isn’t undefined enough.
Especially, a sequence of insertelement can be optimized to shufflevector, but using undef as its placeholder makes shufflevector a poison-blocking instruction because undef cannot be optimized to poison.
This makes a few straightforward optimizations incorrect, such as:
```
; https://bugs.llvm.org/show_bug.cgi?id=44185
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> { 0, 6, 2, undef }
ret <4 x float> %r ; %r[3] is undef
}
=>
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%r = insertelement <4 x float> %y, float %x, i32 1
ret <4 x float> %r ; %r[3] = %y[3], incorrect if %y[3] = poison
}
Transformation doesn't verify!
ERROR: Target is more poisonous than source
```
I’d like to suggest
1. Using poison as insertelement’s placeholder value (IRBuilder::CreateVectorSplat should be patched too)
2. Updating shufflevector’s semantics to return poison element if mask is undef
Note that poison is currently lowered into UNDEF in SelDag, so codegen part is okay.
m_Undef() matches PoisonValue as well, so existing optimizations will still fire.
The only concern is hidden miscompilations that will go incorrect when poison constant is given.
A conservative way is copying all tests having `insertelement undef` & replacing it with `insertelement poison` & run Alive2 on it, but it will create many tests and people won’t like it. :(
Instead, I’ll simply locally maintain the tests and run Alive2.
If there is any bug found, I’ll report it.
Relevant links: https://bugs.llvm.org/show_bug.cgi?id=43958 , http://lists.llvm.org/pipermail/llvm-dev/2019-November/137242.html
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D93586
2020-12-28 07:58:15 +08:00
; CHECK-NEXT: [[TMP0:%.*]] = and <4 x i16> [[ADD:%.*]], <i16 9, i16 20, i16 poison, i16 poison>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[AND:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 undef>
; CHECK-NEXT: ret <4 x i16> [[AND]]
;
entry:
2020-12-29 16:02:25 +08:00
%shuffle = shufflevector < 4 x i16 > %add , < 4 x i16 > p o i s o n , < 4 x i32 > < i32 0 , i32 1 , i32 1 , i32 undef >
2020-12-24 10:41:27 +08:00
%and = and < 4 x i16 > %shuffle , < i16 9 , i16 20 , i16 20 , i16 -1 >
ret < 4 x i16 > %and
}
define < 4 x i16 > @and_constant_mask_not_undef ( < 4 x i16 > %add ) {
; CHECK-LABEL: @and_constant_mask_not_undef(
; CHECK-NEXT: entry:
[InstCombine] use poison as placeholder for undemanded elems
Currently undef is used as a don’t-care vector when constructing a vector using a series of insertelement.
However, this is problematic because undef isn’t undefined enough.
Especially, a sequence of insertelement can be optimized to shufflevector, but using undef as its placeholder makes shufflevector a poison-blocking instruction because undef cannot be optimized to poison.
This makes a few straightforward optimizations incorrect, such as:
```
; https://bugs.llvm.org/show_bug.cgi?id=44185
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> { 0, 6, 2, undef }
ret <4 x float> %r ; %r[3] is undef
}
=>
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%r = insertelement <4 x float> %y, float %x, i32 1
ret <4 x float> %r ; %r[3] = %y[3], incorrect if %y[3] = poison
}
Transformation doesn't verify!
ERROR: Target is more poisonous than source
```
I’d like to suggest
1. Using poison as insertelement’s placeholder value (IRBuilder::CreateVectorSplat should be patched too)
2. Updating shufflevector’s semantics to return poison element if mask is undef
Note that poison is currently lowered into UNDEF in SelDag, so codegen part is okay.
m_Undef() matches PoisonValue as well, so existing optimizations will still fire.
The only concern is hidden miscompilations that will go incorrect when poison constant is given.
A conservative way is copying all tests having `insertelement undef` & replacing it with `insertelement poison` & run Alive2 on it, but it will create many tests and people won’t like it. :(
Instead, I’ll simply locally maintain the tests and run Alive2.
If there is any bug found, I’ll report it.
Relevant links: https://bugs.llvm.org/show_bug.cgi?id=43958 , http://lists.llvm.org/pipermail/llvm-dev/2019-November/137242.html
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D93586
2020-12-28 07:58:15 +08:00
; CHECK-NEXT: [[TMP0:%.*]] = and <4 x i16> [[ADD:%.*]], <i16 poison, i16 -1, i16 0, i16 0>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[AND:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 1>
; CHECK-NEXT: ret <4 x i16> [[AND]]
;
entry:
2020-12-29 16:02:25 +08:00
%shuffle = shufflevector < 4 x i16 > %add , < 4 x i16 > p o i s o n , < 4 x i32 > < i32 2 , i32 3 , i32 1 , i32 1 >
2020-12-24 10:41:27 +08:00
%and = and < 4 x i16 > %shuffle , < i16 0 , i16 0 , i16 -1 , i16 -1 >
ret < 4 x i16 > %and
}
; OR does not fold to undef for undef operands, we cannot move it
; across a shuffle with undef masks.
define < 4 x i16 > @or_constant_mask_undef ( < 4 x i16 > %in ) {
; CHECK-LABEL: @or_constant_mask_undef(
; CHECK-NEXT: entry:
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> poison, <4 x i32> <i32 undef, i32 undef, i32 1, i32 1>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[OR:%.*]] = or <4 x i16> [[SHUFFLE]], <i16 -1, i16 -1, i16 0, i16 0>
; CHECK-NEXT: ret <4 x i16> [[OR]]
;
entry:
2020-12-29 16:02:25 +08:00
%shuffle = shufflevector < 4 x i16 > %in , < 4 x i16 > p o i s o n , < 4 x i32 > < i32 undef , i32 undef , i32 1 , i32 1 >
2020-12-24 10:41:27 +08:00
%or = or < 4 x i16 > %shuffle , < i16 -1 , i16 -1 , i16 0 , i16 0 >
ret < 4 x i16 > %or
}
; OR does not fold to undef for undef operands, we cannot move it
; across a shuffle with undef masks.
define < 4 x i16 > @or_constant_mask_undef_2 ( < 4 x i16 > %in ) {
; CHECK-LABEL: @or_constant_mask_undef_2(
; CHECK-NEXT: entry:
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> poison, <4 x i32> <i32 undef, i32 1, i32 1, i32 undef>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[OR:%.*]] = or <4 x i16> [[SHUFFLE]], <i16 -1, i16 0, i16 0, i16 -1>
; CHECK-NEXT: ret <4 x i16> [[OR]]
;
entry:
2020-12-29 16:02:25 +08:00
%shuffle = shufflevector < 4 x i16 > %in , < 4 x i16 > p o i s o n , < 4 x i32 > < i32 undef , i32 1 , i32 1 , i32 undef >
2020-12-24 10:41:27 +08:00
%or = or < 4 x i16 > %shuffle , < i16 -1 , i16 0 , i16 0 , i16 -1 >
ret < 4 x i16 > %or
}
; We can move the OR across the shuffle, as 0 (OR identity value) is used for undef lanes.
define < 4 x i16 > @or_constant_mask_undef_3 ( < 4 x i16 > %in ) {
; CHECK-LABEL: @or_constant_mask_undef_3(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret <4 x i16> <i16 undef, i16 -1, i16 -1, i16 undef>
;
entry:
2020-12-29 16:02:25 +08:00
%shuffle = shufflevector < 4 x i16 > %in , < 4 x i16 > p o i s o n , < 4 x i32 > < i32 undef , i32 1 , i32 1 , i32 undef >
2020-12-24 10:41:27 +08:00
%or = or < 4 x i16 > %shuffle , < i16 0 , i16 -1 , i16 -1 , i16 0 >
ret < 4 x i16 > %or
}
; We can move the OR across the shuffle, as 0 (OR identity value) is used for undef lanes.
define < 4 x i16 > @or_constant_mask_undef_4 ( < 4 x i16 > %in ) {
; CHECK-LABEL: @or_constant_mask_undef_4(
; CHECK-NEXT: entry:
[InstCombine] use poison as placeholder for undemanded elems
Currently undef is used as a don’t-care vector when constructing a vector using a series of insertelement.
However, this is problematic because undef isn’t undefined enough.
Especially, a sequence of insertelement can be optimized to shufflevector, but using undef as its placeholder makes shufflevector a poison-blocking instruction because undef cannot be optimized to poison.
This makes a few straightforward optimizations incorrect, such as:
```
; https://bugs.llvm.org/show_bug.cgi?id=44185
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> { 0, 6, 2, undef }
ret <4 x float> %r ; %r[3] is undef
}
=>
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%r = insertelement <4 x float> %y, float %x, i32 1
ret <4 x float> %r ; %r[3] = %y[3], incorrect if %y[3] = poison
}
Transformation doesn't verify!
ERROR: Target is more poisonous than source
```
I’d like to suggest
1. Using poison as insertelement’s placeholder value (IRBuilder::CreateVectorSplat should be patched too)
2. Updating shufflevector’s semantics to return poison element if mask is undef
Note that poison is currently lowered into UNDEF in SelDag, so codegen part is okay.
m_Undef() matches PoisonValue as well, so existing optimizations will still fire.
The only concern is hidden miscompilations that will go incorrect when poison constant is given.
A conservative way is copying all tests having `insertelement undef` & replacing it with `insertelement poison` & run Alive2 on it, but it will create many tests and people won’t like it. :(
Instead, I’ll simply locally maintain the tests and run Alive2.
If there is any bug found, I’ll report it.
Relevant links: https://bugs.llvm.org/show_bug.cgi?id=43958 , http://lists.llvm.org/pipermail/llvm-dev/2019-November/137242.html
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D93586
2020-12-28 07:58:15 +08:00
; CHECK-NEXT: [[TMP0:%.*]] = or <4 x i16> [[IN:%.*]], <i16 poison, i16 99, i16 poison, i16 poison>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[OR:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> undef, <4 x i32> <i32 undef, i32 1, i32 1, i32 undef>
; CHECK-NEXT: ret <4 x i16> [[OR]]
;
entry:
2020-12-29 16:02:25 +08:00
%shuffle = shufflevector < 4 x i16 > %in , < 4 x i16 > p o i s o n , < 4 x i32 > < i32 undef , i32 1 , i32 1 , i32 undef >
2020-12-24 10:41:27 +08:00
%or = or < 4 x i16 > %shuffle , < i16 0 , i16 99 , i16 99 , i16 0 >
ret < 4 x i16 > %or
}
define < 4 x i16 > @or_constant_mask_not_undef ( < 4 x i16 > %in ) {
; CHECK-LABEL: @or_constant_mask_not_undef(
; CHECK-NEXT: entry:
[InstCombine] use poison as placeholder for undemanded elems
Currently undef is used as a don’t-care vector when constructing a vector using a series of insertelement.
However, this is problematic because undef isn’t undefined enough.
Especially, a sequence of insertelement can be optimized to shufflevector, but using undef as its placeholder makes shufflevector a poison-blocking instruction because undef cannot be optimized to poison.
This makes a few straightforward optimizations incorrect, such as:
```
; https://bugs.llvm.org/show_bug.cgi?id=44185
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> { 0, 6, 2, undef }
ret <4 x float> %r ; %r[3] is undef
}
=>
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%r = insertelement <4 x float> %y, float %x, i32 1
ret <4 x float> %r ; %r[3] = %y[3], incorrect if %y[3] = poison
}
Transformation doesn't verify!
ERROR: Target is more poisonous than source
```
I’d like to suggest
1. Using poison as insertelement’s placeholder value (IRBuilder::CreateVectorSplat should be patched too)
2. Updating shufflevector’s semantics to return poison element if mask is undef
Note that poison is currently lowered into UNDEF in SelDag, so codegen part is okay.
m_Undef() matches PoisonValue as well, so existing optimizations will still fire.
The only concern is hidden miscompilations that will go incorrect when poison constant is given.
A conservative way is copying all tests having `insertelement undef` & replacing it with `insertelement poison` & run Alive2 on it, but it will create many tests and people won’t like it. :(
Instead, I’ll simply locally maintain the tests and run Alive2.
If there is any bug found, I’ll report it.
Relevant links: https://bugs.llvm.org/show_bug.cgi?id=43958 , http://lists.llvm.org/pipermail/llvm-dev/2019-November/137242.html
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D93586
2020-12-28 07:58:15 +08:00
; CHECK-NEXT: [[TMP0:%.*]] = or <4 x i16> [[IN:%.*]], <i16 poison, i16 -1, i16 0, i16 0>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[AND:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 1>
; CHECK-NEXT: ret <4 x i16> [[AND]]
;
entry:
2020-12-29 16:02:25 +08:00
%shuffle = shufflevector < 4 x i16 > %in , < 4 x i16 > p o i s o n , < 4 x i32 > < i32 2 , i32 3 , i32 1 , i32 1 >
2020-12-24 10:41:27 +08:00
%and = or < 4 x i16 > %shuffle , < i16 0 , i16 0 , i16 -1 , i16 -1 >
ret < 4 x i16 > %and
}
define < 4 x i16 > @shl_constant_mask_undef ( < 4 x i16 > %in ) {
; CHECK-LABEL: @shl_constant_mask_undef(
; CHECK-NEXT: entry:
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> poison, <4 x i32> <i32 0, i32 undef, i32 1, i32 1>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i16> [[SHUFFLE]], <i16 10, i16 3, i16 0, i16 0>
; CHECK-NEXT: ret <4 x i16> [[SHL]]
;
entry:
2020-12-29 16:02:25 +08:00
%shuffle = shufflevector < 4 x i16 > %in , < 4 x i16 > p o i s o n , < 4 x i32 > < i32 0 , i32 undef , i32 1 , i32 1 >
2020-12-24 10:41:27 +08:00
%shl = shl < 4 x i16 > %shuffle , < i16 10 , i16 3 , i16 0 , i16 0 >
ret < 4 x i16 > %shl
}
define < 4 x i16 > @add_constant_mask_undef ( < 4 x i16 > %in ) {
; CHECK-LABEL: @add_constant_mask_undef(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> <i32 undef, i32 undef, i32 1, i32 1>
; CHECK-NEXT: ret <4 x i16> [[ADD]]
;
entry:
2020-12-29 16:02:25 +08:00
%shuffle = shufflevector < 4 x i16 > %in , < 4 x i16 > p o i s o n , < 4 x i32 > < i32 undef , i32 undef , i32 1 , i32 1 >
2020-12-24 10:41:27 +08:00
%add = add < 4 x i16 > %shuffle , < i16 10 , i16 3 , i16 0 , i16 0 >
ret < 4 x i16 > %add
}
define < 4 x i16 > @add_constant_mask_undef_2 ( < 4 x i16 > %in ) {
; CHECK-LABEL: @add_constant_mask_undef_2(
; CHECK-NEXT: entry:
[InstCombine] use poison as placeholder for undemanded elems
Currently undef is used as a don’t-care vector when constructing a vector using a series of insertelement.
However, this is problematic because undef isn’t undefined enough.
Especially, a sequence of insertelement can be optimized to shufflevector, but using undef as its placeholder makes shufflevector a poison-blocking instruction because undef cannot be optimized to poison.
This makes a few straightforward optimizations incorrect, such as:
```
; https://bugs.llvm.org/show_bug.cgi?id=44185
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> { 0, 6, 2, undef }
ret <4 x float> %r ; %r[3] is undef
}
=>
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%r = insertelement <4 x float> %y, float %x, i32 1
ret <4 x float> %r ; %r[3] = %y[3], incorrect if %y[3] = poison
}
Transformation doesn't verify!
ERROR: Target is more poisonous than source
```
I’d like to suggest
1. Using poison as insertelement’s placeholder value (IRBuilder::CreateVectorSplat should be patched too)
2. Updating shufflevector’s semantics to return poison element if mask is undef
Note that poison is currently lowered into UNDEF in SelDag, so codegen part is okay.
m_Undef() matches PoisonValue as well, so existing optimizations will still fire.
The only concern is hidden miscompilations that will go incorrect when poison constant is given.
A conservative way is copying all tests having `insertelement undef` & replacing it with `insertelement poison` & run Alive2 on it, but it will create many tests and people won’t like it. :(
Instead, I’ll simply locally maintain the tests and run Alive2.
If there is any bug found, I’ll report it.
Relevant links: https://bugs.llvm.org/show_bug.cgi?id=43958 , http://lists.llvm.org/pipermail/llvm-dev/2019-November/137242.html
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D93586
2020-12-28 07:58:15 +08:00
; CHECK-NEXT: [[TMP0:%.*]] = add <4 x i16> [[IN:%.*]], <i16 poison, i16 0, i16 3, i16 poison>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[ADD:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 1>
; CHECK-NEXT: ret <4 x i16> [[ADD]]
;
entry:
2020-12-29 16:02:25 +08:00
%shuffle = shufflevector < 4 x i16 > %in , < 4 x i16 > p o i s o n , < 4 x i32 > < i32 undef , i32 2 , i32 1 , i32 1 >
2020-12-24 10:41:27 +08:00
%add = add < 4 x i16 > %shuffle , < i16 10 , i16 3 , i16 0 , i16 0 >
ret < 4 x i16 > %add
}
define < 4 x i16 > @sub_constant_mask_undef ( < 4 x i16 > %in ) {
; CHECK-LABEL: @sub_constant_mask_undef(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SUB:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> <i32 undef, i32 undef, i32 1, i32 1>
; CHECK-NEXT: ret <4 x i16> [[SUB]]
;
entry:
2020-12-29 16:02:25 +08:00
%shuffle = shufflevector < 4 x i16 > %in , < 4 x i16 > p o i s o n , < 4 x i32 > < i32 undef , i32 undef , i32 1 , i32 1 >
2020-12-24 10:41:27 +08:00
%sub = sub < 4 x i16 > %shuffle , < i16 10 , i16 3 , i16 0 , i16 0 >
ret < 4 x i16 > %sub
}
define < 4 x i16 > @sub_constant_mask_undef_2 ( < 4 x i16 > %in ) {
; CHECK-LABEL: @sub_constant_mask_undef_2(
; CHECK-NEXT: entry:
[InstCombine] use poison as placeholder for undemanded elems
Currently undef is used as a don’t-care vector when constructing a vector using a series of insertelement.
However, this is problematic because undef isn’t undefined enough.
Especially, a sequence of insertelement can be optimized to shufflevector, but using undef as its placeholder makes shufflevector a poison-blocking instruction because undef cannot be optimized to poison.
This makes a few straightforward optimizations incorrect, such as:
```
; https://bugs.llvm.org/show_bug.cgi?id=44185
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> { 0, 6, 2, undef }
ret <4 x float> %r ; %r[3] is undef
}
=>
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%r = insertelement <4 x float> %y, float %x, i32 1
ret <4 x float> %r ; %r[3] = %y[3], incorrect if %y[3] = poison
}
Transformation doesn't verify!
ERROR: Target is more poisonous than source
```
I’d like to suggest
1. Using poison as insertelement’s placeholder value (IRBuilder::CreateVectorSplat should be patched too)
2. Updating shufflevector’s semantics to return poison element if mask is undef
Note that poison is currently lowered into UNDEF in SelDag, so codegen part is okay.
m_Undef() matches PoisonValue as well, so existing optimizations will still fire.
The only concern is hidden miscompilations that will go incorrect when poison constant is given.
A conservative way is copying all tests having `insertelement undef` & replacing it with `insertelement poison` & run Alive2 on it, but it will create many tests and people won’t like it. :(
Instead, I’ll simply locally maintain the tests and run Alive2.
If there is any bug found, I’ll report it.
Relevant links: https://bugs.llvm.org/show_bug.cgi?id=43958 , http://lists.llvm.org/pipermail/llvm-dev/2019-November/137242.html
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D93586
2020-12-28 07:58:15 +08:00
; CHECK-NEXT: [[TMP0:%.*]] = add <4 x i16> [[IN:%.*]], <i16 poison, i16 0, i16 -10, i16 poison>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[SUB:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 undef>
; CHECK-NEXT: ret <4 x i16> [[SUB]]
;
entry:
2020-12-29 16:02:25 +08:00
%shuffle = shufflevector < 4 x i16 > %in , < 4 x i16 > p o i s o n , < 4 x i32 > < i32 1 , i32 1 , i32 2 , i32 undef >
2020-12-24 10:41:27 +08:00
%sub = sub < 4 x i16 > %shuffle , < i16 0 , i16 0 , i16 10 , i16 99 >
ret < 4 x i16 > %sub
}
define < 2 x i32 > @or_splat_constant ( < 2 x i32 > %x ) {
; CHECK-LABEL: @or_splat_constant(
[InstCombine] use poison as placeholder for undemanded elems
Currently undef is used as a don’t-care vector when constructing a vector using a series of insertelement.
However, this is problematic because undef isn’t undefined enough.
Especially, a sequence of insertelement can be optimized to shufflevector, but using undef as its placeholder makes shufflevector a poison-blocking instruction because undef cannot be optimized to poison.
This makes a few straightforward optimizations incorrect, such as:
```
; https://bugs.llvm.org/show_bug.cgi?id=44185
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> { 0, 6, 2, undef }
ret <4 x float> %r ; %r[3] is undef
}
=>
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%r = insertelement <4 x float> %y, float %x, i32 1
ret <4 x float> %r ; %r[3] = %y[3], incorrect if %y[3] = poison
}
Transformation doesn't verify!
ERROR: Target is more poisonous than source
```
I’d like to suggest
1. Using poison as insertelement’s placeholder value (IRBuilder::CreateVectorSplat should be patched too)
2. Updating shufflevector’s semantics to return poison element if mask is undef
Note that poison is currently lowered into UNDEF in SelDag, so codegen part is okay.
m_Undef() matches PoisonValue as well, so existing optimizations will still fire.
The only concern is hidden miscompilations that will go incorrect when poison constant is given.
A conservative way is copying all tests having `insertelement undef` & replacing it with `insertelement poison` & run Alive2 on it, but it will create many tests and people won’t like it. :(
Instead, I’ll simply locally maintain the tests and run Alive2.
If there is any bug found, I’ll report it.
Relevant links: https://bugs.llvm.org/show_bug.cgi?id=43958 , http://lists.llvm.org/pipermail/llvm-dev/2019-November/137242.html
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D93586
2020-12-28 07:58:15 +08:00
; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X:%.*]], <i32 42, i32 poison>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = or < 2 x i32 > %splat , < i32 42 , i32 42 >
ret < 2 x i32 > %r
}
define < 2 x i32 > @xor_splat_constant ( < 2 x i32 > %x ) {
; CHECK-LABEL: @xor_splat_constant(
[InstCombine] use poison as placeholder for undemanded elems
Currently undef is used as a don’t-care vector when constructing a vector using a series of insertelement.
However, this is problematic because undef isn’t undefined enough.
Especially, a sequence of insertelement can be optimized to shufflevector, but using undef as its placeholder makes shufflevector a poison-blocking instruction because undef cannot be optimized to poison.
This makes a few straightforward optimizations incorrect, such as:
```
; https://bugs.llvm.org/show_bug.cgi?id=44185
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> { 0, 6, 2, undef }
ret <4 x float> %r ; %r[3] is undef
}
=>
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%r = insertelement <4 x float> %y, float %x, i32 1
ret <4 x float> %r ; %r[3] = %y[3], incorrect if %y[3] = poison
}
Transformation doesn't verify!
ERROR: Target is more poisonous than source
```
I’d like to suggest
1. Using poison as insertelement’s placeholder value (IRBuilder::CreateVectorSplat should be patched too)
2. Updating shufflevector’s semantics to return poison element if mask is undef
Note that poison is currently lowered into UNDEF in SelDag, so codegen part is okay.
m_Undef() matches PoisonValue as well, so existing optimizations will still fire.
The only concern is hidden miscompilations that will go incorrect when poison constant is given.
A conservative way is copying all tests having `insertelement undef` & replacing it with `insertelement poison` & run Alive2 on it, but it will create many tests and people won’t like it. :(
Instead, I’ll simply locally maintain the tests and run Alive2.
If there is any bug found, I’ll report it.
Relevant links: https://bugs.llvm.org/show_bug.cgi?id=43958 , http://lists.llvm.org/pipermail/llvm-dev/2019-November/137242.html
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D93586
2020-12-28 07:58:15 +08:00
; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], <i32 42, i32 poison>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = xor < 2 x i32 > %splat , < i32 42 , i32 42 >
ret < 2 x i32 > %r
}
define < 2 x float > @fadd_splat_constant ( < 2 x float > %x ) {
; CHECK-LABEL: @fadd_splat_constant(
[InstCombine] use poison as placeholder for undemanded elems
Currently undef is used as a don’t-care vector when constructing a vector using a series of insertelement.
However, this is problematic because undef isn’t undefined enough.
Especially, a sequence of insertelement can be optimized to shufflevector, but using undef as its placeholder makes shufflevector a poison-blocking instruction because undef cannot be optimized to poison.
This makes a few straightforward optimizations incorrect, such as:
```
; https://bugs.llvm.org/show_bug.cgi?id=44185
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> { 0, 6, 2, undef }
ret <4 x float> %r ; %r[3] is undef
}
=>
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%r = insertelement <4 x float> %y, float %x, i32 1
ret <4 x float> %r ; %r[3] = %y[3], incorrect if %y[3] = poison
}
Transformation doesn't verify!
ERROR: Target is more poisonous than source
```
I’d like to suggest
1. Using poison as insertelement’s placeholder value (IRBuilder::CreateVectorSplat should be patched too)
2. Updating shufflevector’s semantics to return poison element if mask is undef
Note that poison is currently lowered into UNDEF in SelDag, so codegen part is okay.
m_Undef() matches PoisonValue as well, so existing optimizations will still fire.
The only concern is hidden miscompilations that will go incorrect when poison constant is given.
A conservative way is copying all tests having `insertelement undef` & replacing it with `insertelement poison` & run Alive2 on it, but it will create many tests and people won’t like it. :(
Instead, I’ll simply locally maintain the tests and run Alive2.
If there is any bug found, I’ll report it.
Relevant links: https://bugs.llvm.org/show_bug.cgi?id=43958 , http://lists.llvm.org/pipermail/llvm-dev/2019-November/137242.html
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D93586
2020-12-28 07:58:15 +08:00
; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[X:%.*]], <float 4.200000e+01, float poison>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x float> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x float > %x , < 2 x float > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = fadd < 2 x float > %splat , < float 42.0 , float 42.0 >
ret < 2 x float > %r
}
define < 2 x float > @fsub_splat_constant0 ( < 2 x float > %x ) {
; CHECK-LABEL: @fsub_splat_constant0(
[InstCombine] use poison as placeholder for undemanded elems
Currently undef is used as a don’t-care vector when constructing a vector using a series of insertelement.
However, this is problematic because undef isn’t undefined enough.
Especially, a sequence of insertelement can be optimized to shufflevector, but using undef as its placeholder makes shufflevector a poison-blocking instruction because undef cannot be optimized to poison.
This makes a few straightforward optimizations incorrect, such as:
```
; https://bugs.llvm.org/show_bug.cgi?id=44185
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> { 0, 6, 2, undef }
ret <4 x float> %r ; %r[3] is undef
}
=>
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%r = insertelement <4 x float> %y, float %x, i32 1
ret <4 x float> %r ; %r[3] = %y[3], incorrect if %y[3] = poison
}
Transformation doesn't verify!
ERROR: Target is more poisonous than source
```
I’d like to suggest
1. Using poison as insertelement’s placeholder value (IRBuilder::CreateVectorSplat should be patched too)
2. Updating shufflevector’s semantics to return poison element if mask is undef
Note that poison is currently lowered into UNDEF in SelDag, so codegen part is okay.
m_Undef() matches PoisonValue as well, so existing optimizations will still fire.
The only concern is hidden miscompilations that will go incorrect when poison constant is given.
A conservative way is copying all tests having `insertelement undef` & replacing it with `insertelement poison` & run Alive2 on it, but it will create many tests and people won’t like it. :(
Instead, I’ll simply locally maintain the tests and run Alive2.
If there is any bug found, I’ll report it.
Relevant links: https://bugs.llvm.org/show_bug.cgi?id=43958 , http://lists.llvm.org/pipermail/llvm-dev/2019-November/137242.html
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D93586
2020-12-28 07:58:15 +08:00
; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x float> <float 4.200000e+01, float poison>, [[X:%.*]]
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x float> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x float > %x , < 2 x float > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = fsub < 2 x float > < float 42.0 , float 42.0 > , %splat
ret < 2 x float > %r
}
define < 2 x float > @fsub_splat_constant1 ( < 2 x float > %x ) {
; CHECK-LABEL: @fsub_splat_constant1(
[InstCombine] use poison as placeholder for undemanded elems
Currently undef is used as a don’t-care vector when constructing a vector using a series of insertelement.
However, this is problematic because undef isn’t undefined enough.
Especially, a sequence of insertelement can be optimized to shufflevector, but using undef as its placeholder makes shufflevector a poison-blocking instruction because undef cannot be optimized to poison.
This makes a few straightforward optimizations incorrect, such as:
```
; https://bugs.llvm.org/show_bug.cgi?id=44185
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> { 0, 6, 2, undef }
ret <4 x float> %r ; %r[3] is undef
}
=>
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%r = insertelement <4 x float> %y, float %x, i32 1
ret <4 x float> %r ; %r[3] = %y[3], incorrect if %y[3] = poison
}
Transformation doesn't verify!
ERROR: Target is more poisonous than source
```
I’d like to suggest
1. Using poison as insertelement’s placeholder value (IRBuilder::CreateVectorSplat should be patched too)
2. Updating shufflevector’s semantics to return poison element if mask is undef
Note that poison is currently lowered into UNDEF in SelDag, so codegen part is okay.
m_Undef() matches PoisonValue as well, so existing optimizations will still fire.
The only concern is hidden miscompilations that will go incorrect when poison constant is given.
A conservative way is copying all tests having `insertelement undef` & replacing it with `insertelement poison` & run Alive2 on it, but it will create many tests and people won’t like it. :(
Instead, I’ll simply locally maintain the tests and run Alive2.
If there is any bug found, I’ll report it.
Relevant links: https://bugs.llvm.org/show_bug.cgi?id=43958 , http://lists.llvm.org/pipermail/llvm-dev/2019-November/137242.html
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D93586
2020-12-28 07:58:15 +08:00
; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[X:%.*]], <float -4.200000e+01, float poison>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x float> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x float > %x , < 2 x float > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = fsub < 2 x float > %splat , < float 42.0 , float 42.0 >
ret < 2 x float > %r
}
define < 2 x float > @fneg ( < 2 x float > %x ) {
; CHECK-LABEL: @fneg(
; CHECK-NEXT: [[TMP1:%.*]] = fneg <2 x float> [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x float> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x float > %x , < 2 x float > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = fsub < 2 x float > < float -0.0 , float -0.0 > , %splat
ret < 2 x float > %r
}
define < 2 x float > @fmul_splat_constant ( < 2 x float > %x ) {
; CHECK-LABEL: @fmul_splat_constant(
[InstCombine] use poison as placeholder for undemanded elems
Currently undef is used as a don’t-care vector when constructing a vector using a series of insertelement.
However, this is problematic because undef isn’t undefined enough.
Especially, a sequence of insertelement can be optimized to shufflevector, but using undef as its placeholder makes shufflevector a poison-blocking instruction because undef cannot be optimized to poison.
This makes a few straightforward optimizations incorrect, such as:
```
; https://bugs.llvm.org/show_bug.cgi?id=44185
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> { 0, 6, 2, undef }
ret <4 x float> %r ; %r[3] is undef
}
=>
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%r = insertelement <4 x float> %y, float %x, i32 1
ret <4 x float> %r ; %r[3] = %y[3], incorrect if %y[3] = poison
}
Transformation doesn't verify!
ERROR: Target is more poisonous than source
```
I’d like to suggest
1. Using poison as insertelement’s placeholder value (IRBuilder::CreateVectorSplat should be patched too)
2. Updating shufflevector’s semantics to return poison element if mask is undef
Note that poison is currently lowered into UNDEF in SelDag, so codegen part is okay.
m_Undef() matches PoisonValue as well, so existing optimizations will still fire.
The only concern is hidden miscompilations that will go incorrect when poison constant is given.
A conservative way is copying all tests having `insertelement undef` & replacing it with `insertelement poison` & run Alive2 on it, but it will create many tests and people won’t like it. :(
Instead, I’ll simply locally maintain the tests and run Alive2.
If there is any bug found, I’ll report it.
Relevant links: https://bugs.llvm.org/show_bug.cgi?id=43958 , http://lists.llvm.org/pipermail/llvm-dev/2019-November/137242.html
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D93586
2020-12-28 07:58:15 +08:00
; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x float> [[X:%.*]], <float 4.200000e+01, float poison>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x float> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x float > %x , < 2 x float > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = fmul < 2 x float > %splat , < float 42.0 , float 42.0 >
ret < 2 x float > %r
}
define < 2 x float > @fdiv_splat_constant0 ( < 2 x float > %x ) {
; CHECK-LABEL: @fdiv_splat_constant0(
[InstCombine] use poison as placeholder for undemanded elems
Currently undef is used as a don’t-care vector when constructing a vector using a series of insertelement.
However, this is problematic because undef isn’t undefined enough.
Especially, a sequence of insertelement can be optimized to shufflevector, but using undef as its placeholder makes shufflevector a poison-blocking instruction because undef cannot be optimized to poison.
This makes a few straightforward optimizations incorrect, such as:
```
; https://bugs.llvm.org/show_bug.cgi?id=44185
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> { 0, 6, 2, undef }
ret <4 x float> %r ; %r[3] is undef
}
=>
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%r = insertelement <4 x float> %y, float %x, i32 1
ret <4 x float> %r ; %r[3] = %y[3], incorrect if %y[3] = poison
}
Transformation doesn't verify!
ERROR: Target is more poisonous than source
```
I’d like to suggest
1. Using poison as insertelement’s placeholder value (IRBuilder::CreateVectorSplat should be patched too)
2. Updating shufflevector’s semantics to return poison element if mask is undef
Note that poison is currently lowered into UNDEF in SelDag, so codegen part is okay.
m_Undef() matches PoisonValue as well, so existing optimizations will still fire.
The only concern is hidden miscompilations that will go incorrect when poison constant is given.
A conservative way is copying all tests having `insertelement undef` & replacing it with `insertelement poison` & run Alive2 on it, but it will create many tests and people won’t like it. :(
Instead, I’ll simply locally maintain the tests and run Alive2.
If there is any bug found, I’ll report it.
Relevant links: https://bugs.llvm.org/show_bug.cgi?id=43958 , http://lists.llvm.org/pipermail/llvm-dev/2019-November/137242.html
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D93586
2020-12-28 07:58:15 +08:00
; CHECK-NEXT: [[TMP1:%.*]] = fdiv <2 x float> <float 4.200000e+01, float poison>, [[X:%.*]]
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x float> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x float > %x , < 2 x float > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = fdiv < 2 x float > < float 42.0 , float 42.0 > , %splat
ret < 2 x float > %r
}
define < 2 x float > @fdiv_splat_constant1 ( < 2 x float > %x ) {
; CHECK-LABEL: @fdiv_splat_constant1(
[InstCombine] use poison as placeholder for undemanded elems
Currently undef is used as a don’t-care vector when constructing a vector using a series of insertelement.
However, this is problematic because undef isn’t undefined enough.
Especially, a sequence of insertelement can be optimized to shufflevector, but using undef as its placeholder makes shufflevector a poison-blocking instruction because undef cannot be optimized to poison.
This makes a few straightforward optimizations incorrect, such as:
```
; https://bugs.llvm.org/show_bug.cgi?id=44185
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> { 0, 6, 2, undef }
ret <4 x float> %r ; %r[3] is undef
}
=>
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%r = insertelement <4 x float> %y, float %x, i32 1
ret <4 x float> %r ; %r[3] = %y[3], incorrect if %y[3] = poison
}
Transformation doesn't verify!
ERROR: Target is more poisonous than source
```
I’d like to suggest
1. Using poison as insertelement’s placeholder value (IRBuilder::CreateVectorSplat should be patched too)
2. Updating shufflevector’s semantics to return poison element if mask is undef
Note that poison is currently lowered into UNDEF in SelDag, so codegen part is okay.
m_Undef() matches PoisonValue as well, so existing optimizations will still fire.
The only concern is hidden miscompilations that will go incorrect when poison constant is given.
A conservative way is copying all tests having `insertelement undef` & replacing it with `insertelement poison` & run Alive2 on it, but it will create many tests and people won’t like it. :(
Instead, I’ll simply locally maintain the tests and run Alive2.
If there is any bug found, I’ll report it.
Relevant links: https://bugs.llvm.org/show_bug.cgi?id=43958 , http://lists.llvm.org/pipermail/llvm-dev/2019-November/137242.html
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D93586
2020-12-28 07:58:15 +08:00
; CHECK-NEXT: [[TMP1:%.*]] = fdiv <2 x float> [[X:%.*]], <float 4.200000e+01, float poison>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x float> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x float > %x , < 2 x float > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = fdiv < 2 x float > %splat , < float 42.0 , float 42.0 >
ret < 2 x float > %r
}
define < 2 x float > @frem_splat_constant0 ( < 2 x float > %x ) {
; CHECK-LABEL: @frem_splat_constant0(
[InstCombine] use poison as placeholder for undemanded elems
Currently undef is used as a don’t-care vector when constructing a vector using a series of insertelement.
However, this is problematic because undef isn’t undefined enough.
Especially, a sequence of insertelement can be optimized to shufflevector, but using undef as its placeholder makes shufflevector a poison-blocking instruction because undef cannot be optimized to poison.
This makes a few straightforward optimizations incorrect, such as:
```
; https://bugs.llvm.org/show_bug.cgi?id=44185
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> { 0, 6, 2, undef }
ret <4 x float> %r ; %r[3] is undef
}
=>
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%r = insertelement <4 x float> %y, float %x, i32 1
ret <4 x float> %r ; %r[3] = %y[3], incorrect if %y[3] = poison
}
Transformation doesn't verify!
ERROR: Target is more poisonous than source
```
I’d like to suggest
1. Using poison as insertelement’s placeholder value (IRBuilder::CreateVectorSplat should be patched too)
2. Updating shufflevector’s semantics to return poison element if mask is undef
Note that poison is currently lowered into UNDEF in SelDag, so codegen part is okay.
m_Undef() matches PoisonValue as well, so existing optimizations will still fire.
The only concern is hidden miscompilations that will go incorrect when poison constant is given.
A conservative way is copying all tests having `insertelement undef` & replacing it with `insertelement poison` & run Alive2 on it, but it will create many tests and people won’t like it. :(
Instead, I’ll simply locally maintain the tests and run Alive2.
If there is any bug found, I’ll report it.
Relevant links: https://bugs.llvm.org/show_bug.cgi?id=43958 , http://lists.llvm.org/pipermail/llvm-dev/2019-November/137242.html
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D93586
2020-12-28 07:58:15 +08:00
; CHECK-NEXT: [[TMP1:%.*]] = frem <2 x float> <float 4.200000e+01, float poison>, [[X:%.*]]
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x float> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x float > %x , < 2 x float > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = frem < 2 x float > < float 42.0 , float 42.0 > , %splat
ret < 2 x float > %r
}
define < 2 x float > @frem_splat_constant1 ( < 2 x float > %x ) {
; CHECK-LABEL: @frem_splat_constant1(
[InstCombine] use poison as placeholder for undemanded elems
Currently undef is used as a don’t-care vector when constructing a vector using a series of insertelement.
However, this is problematic because undef isn’t undefined enough.
Especially, a sequence of insertelement can be optimized to shufflevector, but using undef as its placeholder makes shufflevector a poison-blocking instruction because undef cannot be optimized to poison.
This makes a few straightforward optimizations incorrect, such as:
```
; https://bugs.llvm.org/show_bug.cgi?id=44185
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> { 0, 6, 2, undef }
ret <4 x float> %r ; %r[3] is undef
}
=>
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%r = insertelement <4 x float> %y, float %x, i32 1
ret <4 x float> %r ; %r[3] = %y[3], incorrect if %y[3] = poison
}
Transformation doesn't verify!
ERROR: Target is more poisonous than source
```
I’d like to suggest
1. Using poison as insertelement’s placeholder value (IRBuilder::CreateVectorSplat should be patched too)
2. Updating shufflevector’s semantics to return poison element if mask is undef
Note that poison is currently lowered into UNDEF in SelDag, so codegen part is okay.
m_Undef() matches PoisonValue as well, so existing optimizations will still fire.
The only concern is hidden miscompilations that will go incorrect when poison constant is given.
A conservative way is copying all tests having `insertelement undef` & replacing it with `insertelement poison` & run Alive2 on it, but it will create many tests and people won’t like it. :(
Instead, I’ll simply locally maintain the tests and run Alive2.
If there is any bug found, I’ll report it.
Relevant links: https://bugs.llvm.org/show_bug.cgi?id=43958 , http://lists.llvm.org/pipermail/llvm-dev/2019-November/137242.html
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D93586
2020-12-28 07:58:15 +08:00
; CHECK-NEXT: [[TMP1:%.*]] = frem <2 x float> [[X:%.*]], <float 4.200000e+01, float poison>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x float> [[R]]
;
2020-12-29 16:02:25 +08:00
%splat = shufflevector < 2 x float > %x , < 2 x float > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%r = frem < 2 x float > %splat , < float 42.0 , float 42.0 >
ret < 2 x float > %r
}
; Equivalent shuffle masks, but only one is a narrowing op.
define < 2 x i1 > @PR40734 ( < 1 x i1 > %x , < 4 x i1 > %y ) {
; CHECK-LABEL: @PR40734(
; CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <1 x i1> zeroinitializer, <1 x i1> [[X:%.*]], <2 x i32> <i32 0, i32 1>
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[NARROW:%.*]] = shufflevector <4 x i1> [[Y:%.*]], <4 x i1> poison, <2 x i32> <i32 0, i32 1>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = and <2 x i1> [[WIDEN]], [[NARROW]]
; CHECK-NEXT: ret <2 x i1> [[R]]
;
%widen = shufflevector < 1 x i1 > zeroinitializer , < 1 x i1 > %x , < 2 x i32 > < i32 0 , i32 1 >
2020-12-29 16:02:25 +08:00
%narrow = shufflevector < 4 x i1 > %y , < 4 x i1 > p o i s o n , < 2 x i32 > < i32 0 , i32 1 >
2020-12-24 10:41:27 +08:00
%r = and < 2 x i1 > %widen , %narrow
ret < 2 x i1 > %r
}
; Negative test - do not transform non-power-of-2 unless we know the backend handles these sequences identically.
define < 7 x i8 > @insert_subvector_shuffles ( < 3 x i8 > %x , < 3 x i8 > %y ) {
; CHECK-LABEL: @insert_subvector_shuffles(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[S1:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <7 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[S2:%.*]] = shufflevector <3 x i8> [[Y:%.*]], <3 x i8> poison, <7 x i32> <i32 undef, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[S3:%.*]] = shufflevector <7 x i8> [[S1]], <7 x i8> [[S2]], <7 x i32> <i32 0, i32 8, i32 1, i32 undef, i32 8, i32 1, i32 9>
; CHECK-NEXT: ret <7 x i8> [[S3]]
;
2020-12-29 16:02:25 +08:00
%s1 = shufflevector < 3 x i8 > %x , < 3 x i8 > p o i s o n , < 7 x i32 > < i32 0 , i32 1 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
%s2 = shufflevector < 3 x i8 > %y , < 3 x i8 > p o i s o n , < 7 x i32 > < i32 undef , i32 1 , i32 2 , i32 undef , i32 undef , i32 undef , i32 undef >
2020-12-24 10:41:27 +08:00
%s3 = shufflevector < 7 x i8 > %s1 , < 7 x i8 > %s2 , < 7 x i32 > < i32 0 , i32 8 , i32 1 , i32 undef , i32 8 , i32 1 , i32 9 >
ret < 7 x i8 > %s3
}
define < 8 x i8 > @insert_subvector_shuffles_pow2elts ( < 2 x i8 > %x , < 2 x i8 > %y ) {
; CHECK-LABEL: @insert_subvector_shuffles_pow2elts(
; CHECK-NEXT: [[S3:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <8 x i32> <i32 0, i32 2, i32 1, i32 undef, i32 2, i32 1, i32 3, i32 0>
; CHECK-NEXT: ret <8 x i8> [[S3]]
;
2020-12-29 16:02:25 +08:00
%s1 = shufflevector < 2 x i8 > %x , < 2 x i8 > p o i s o n , < 8 x i32 > < i32 0 , i32 1 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
%s2 = shufflevector < 2 x i8 > %y , < 2 x i8 > p o i s o n , < 8 x i32 > < i32 0 , i32 1 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
2020-12-24 10:41:27 +08:00
%s3 = shufflevector < 8 x i8 > %s1 , < 8 x i8 > %s2 , < 8 x i32 > < i32 0 , i32 8 , i32 1 , i32 undef , i32 8 , i32 1 , i32 9 , i32 0 >
ret < 8 x i8 > %s3
}
; The last shuffle may change the vector type.
; Negative test - do not transform non-power-of-2 unless we know the backend handles these sequences identically.
define < 2 x i8 > @insert_subvector_shuffles_narrowing ( < 3 x i8 > %x , < 3 x i8 > %y ) {
; CHECK-LABEL: @insert_subvector_shuffles_narrowing(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[S1:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <7 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[S2:%.*]] = shufflevector <3 x i8> [[Y:%.*]], <3 x i8> poison, <7 x i32> <i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[S3:%.*]] = shufflevector <7 x i8> [[S1]], <7 x i8> [[S2]], <2 x i32> <i32 0, i32 8>
; CHECK-NEXT: ret <2 x i8> [[S3]]
;
2020-12-29 16:02:25 +08:00
%s1 = shufflevector < 3 x i8 > %x , < 3 x i8 > p o i s o n , < 7 x i32 > < i32 0 , i32 1 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
%s2 = shufflevector < 3 x i8 > %y , < 3 x i8 > p o i s o n , < 7 x i32 > < i32 undef , i32 1 , i32 2 , i32 undef , i32 undef , i32 undef , i32 undef >
2020-12-24 10:41:27 +08:00
%s3 = shufflevector < 7 x i8 > %s1 , < 7 x i8 > %s2 , < 2 x i32 > < i32 0 , i32 8 >
ret < 2 x i8 > %s3
}
define < 2 x i8 > @insert_subvector_shuffles_narrowing_pow2elts ( < 4 x i8 > %x , < 4 x i8 > %y ) {
; CHECK-LABEL: @insert_subvector_shuffles_narrowing_pow2elts(
; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <2 x i32> <i32 0, i32 4>
; CHECK-NEXT: ret <2 x i8> [[S3]]
;
2020-12-29 16:02:25 +08:00
%s1 = shufflevector < 4 x i8 > %x , < 4 x i8 > p o i s o n , < 8 x i32 > < i32 0 , i32 1 , i32 2 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
%s2 = shufflevector < 4 x i8 > %y , < 4 x i8 > p o i s o n , < 8 x i32 > < i32 0 , i32 1 , i32 undef , i32 3 , i32 undef , i32 undef , i32 undef , i32 undef >
2020-12-24 10:41:27 +08:00
%s3 = shufflevector < 8 x i8 > %s1 , < 8 x i8 > %s2 , < 2 x i32 > < i32 0 , i32 8 >
ret < 2 x i8 > %s3
}
; Similar to above, but this reduces to a widen with undefs of 'x'.
define < 4 x double > @insert_subvector_shuffles_identity ( < 2 x double > %x ) {
; CHECK-LABEL: @insert_subvector_shuffles_identity(
; CHECK-NEXT: [[S3:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: ret <4 x double> [[S3]]
;
2020-12-29 16:02:25 +08:00
%s1 = shufflevector < 2 x double > %x , < 2 x double > p o i s o n , < 4 x i32 > < i32 undef , i32 1 , i32 undef , i32 undef >
%s2 = shufflevector < 2 x double > %x , < 2 x double > p o i s o n , < 4 x i32 > < i32 0 , i32 undef , i32 undef , i32 undef >
2020-12-24 10:41:27 +08:00
%s3 = shufflevector < 4 x double > %s2 , < 4 x double > %s1 , < 4 x i32 > < i32 0 , i32 5 , i32 undef , i32 undef >
ret < 4 x double > %s3
}
; Negative test - not identity with padding (although this could be folded with better analysis).
define < 4 x double > @not_insert_subvector_shuffle ( < 2 x double > %x ) {
; CHECK-LABEL: @not_insert_subvector_shuffle(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[S1:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> poison, <4 x i32> <i32 undef, i32 1, i32 undef, i32 1>
; CHECK-NEXT: [[S2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x double> [[S2]], <4 x double> [[S1]], <4 x i32> <i32 0, i32 5, i32 7, i32 undef>
; CHECK-NEXT: ret <4 x double> [[S3]]
;
2020-12-29 16:02:25 +08:00
%s1 = shufflevector < 2 x double > %x , < 2 x double > p o i s o n , < 4 x i32 > < i32 undef , i32 1 , i32 undef , i32 1 >
%s2 = shufflevector < 2 x double > %x , < 2 x double > p o i s o n , < 4 x i32 > < i32 0 , i32 undef , i32 undef , i32 undef >
2020-12-24 10:41:27 +08:00
%s3 = shufflevector < 4 x double > %s2 , < 4 x double > %s1 , < 4 x i32 > < i32 0 , i32 5 , i32 7 , i32 undef >
ret < 4 x double > %s3
}
; Negative test - operands are not the same size (although this could be partly folded with better analysis).
define < 4 x double > @not_insert_subvector_shuffles_with_same_size ( < 2 x double > %x , < 3 x double > %y ) {
; CHECK-LABEL: @not_insert_subvector_shuffles_with_same_size(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[S1:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> poison, <4 x i32> <i32 undef, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[S2:%.*]] = shufflevector <3 x double> [[Y:%.*]], <3 x double> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x double> [[S2]], <4 x double> [[S1]], <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
; CHECK-NEXT: ret <4 x double> [[S3]]
;
2020-12-29 16:02:25 +08:00
%s1 = shufflevector < 2 x double > %x , < 2 x double > p o i s o n , < 4 x i32 > < i32 undef , i32 1 , i32 undef , i32 undef >
%s2 = shufflevector < 3 x double > %y , < 3 x double > p o i s o n , < 4 x i32 > < i32 0 , i32 undef , i32 undef , i32 undef >
2020-12-24 10:41:27 +08:00
%s3 = shufflevector < 4 x double > %s2 , < 4 x double > %s1 , < 4 x i32 > < i32 0 , i32 5 , i32 undef , i32 undef >
ret < 4 x double > %s3
}
; Demanded vector elements may not be able to simplify a shuffle mask
; before we try to narrow it. This used to crash.
define < 4 x float > @insert_subvector_crash_invalid_mask_elt ( < 2 x float > %x , < 4 x float > * %p ) {
; CHECK-LABEL: @insert_subvector_crash_invalid_mask_elt(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[I:%.*]] = shufflevector <2 x float> [[X]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: store <4 x float> [[I]], <4 x float>* [[P:%.*]], align 16
; CHECK-NEXT: ret <4 x float> [[WIDEN]]
;
2020-12-29 16:02:25 +08:00
%widen = shufflevector < 2 x float > %x , < 2 x float > p o i s o n , < 4 x i32 > < i32 0 , i32 1 , i32 undef , i32 undef >
2020-12-24 10:41:27 +08:00
%ext2 = extractelement < 2 x float > %x , i32 0
%I = insertelement < 4 x float > %widen , float %ext2 , i16 0
store < 4 x float > %I , < 4 x float > * %p
ret < 4 x float > %widen
}
define < 4 x i32 > @splat_assoc_add ( < 4 x i32 > %x , < 4 x i32 > %y ) {
; CHECK-LABEL: @splat_assoc_add(
[InstCombine] use poison as placeholder for undemanded elems
Currently undef is used as a don’t-care vector when constructing a vector using a series of insertelement.
However, this is problematic because undef isn’t undefined enough.
Especially, a sequence of insertelement can be optimized to shufflevector, but using undef as its placeholder makes shufflevector a poison-blocking instruction because undef cannot be optimized to poison.
This makes a few straightforward optimizations incorrect, such as:
```
; https://bugs.llvm.org/show_bug.cgi?id=44185
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> { 0, 6, 2, undef }
ret <4 x float> %r ; %r[3] is undef
}
=>
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%r = insertelement <4 x float> %y, float %x, i32 1
ret <4 x float> %r ; %r[3] = %y[3], incorrect if %y[3] = poison
}
Transformation doesn't verify!
ERROR: Target is more poisonous than source
```
I’d like to suggest
1. Using poison as insertelement’s placeholder value (IRBuilder::CreateVectorSplat should be patched too)
2. Updating shufflevector’s semantics to return poison element if mask is undef
Note that poison is currently lowered into UNDEF in SelDag, so codegen part is okay.
m_Undef() matches PoisonValue as well, so existing optimizations will still fire.
The only concern is hidden miscompilations that will go incorrect when poison constant is given.
A conservative way is copying all tests having `insertelement undef` & replacing it with `insertelement poison` & run Alive2 on it, but it will create many tests and people won’t like it. :(
Instead, I’ll simply locally maintain the tests and run Alive2.
If there is any bug found, I’ll report it.
Relevant links: https://bugs.llvm.org/show_bug.cgi?id=43958 , http://lists.llvm.org/pipermail/llvm-dev/2019-November/137242.html
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D93586
2020-12-28 07:58:15 +08:00
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], <i32 317426, i32 poison, i32 poison, i32 poison>
2020-12-24 08:33:58 +08:00
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[TMP2]], [[Y:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splatx = shufflevector < 4 x i32 > %x , < 4 x i32 > p o i s o n , < 4 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%a = add < 4 x i32 > %y , < i32 317426 , i32 317426 , i32 317426 , i32 317426 >
%r = add < 4 x i32 > %splatx , %a
ret < 4 x i32 > %r
}
define < v s c a l e x 4 x i32 > @vsplat_assoc_add ( < v s c a l e x 4 x i32 > %x , < v s c a l e x 4 x i32 > %y ) {
; CHECK-LABEL: @vsplat_assoc_add(
; CHECK-NEXT: [[TMP1:%.*]] = add <vscale x 4 x i32> [[X:%.*]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 317426, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer)
2020-12-24 08:33:58 +08:00
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = add <vscale x 4 x i32> [[TMP2]], [[Y:%.*]]
; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splatx = shufflevector < v s c a l e x 4 x i32 > %x , < v s c a l e x 4 x i32 > p o i s o n , < v s c a l e x 4 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%a = add < v s c a l e x 4 x i32 > %y , shufflevector ( < v s c a l e x 4 x i32 > insertelement ( < v s c a l e x 4 x i32 > undef , i32 317426 , i32 0 ) , < v s c a l e x 4 x i32 > undef , < v s c a l e x 4 x i32 > zeroinitializer )
%r = add < v s c a l e x 4 x i32 > %splatx , %a
ret < v s c a l e x 4 x i32 > %r
}
; Undefs in splat mask are replaced with defined splat index
define < 4 x i32 > @splat_assoc_add_undef_mask_elts ( < 4 x i32 > %x , < 4 x i32 > %y ) {
; CHECK-LABEL: @splat_assoc_add_undef_mask_elts(
[InstCombine] use poison as placeholder for undemanded elems
Currently undef is used as a don’t-care vector when constructing a vector using a series of insertelement.
However, this is problematic because undef isn’t undefined enough.
Especially, a sequence of insertelement can be optimized to shufflevector, but using undef as its placeholder makes shufflevector a poison-blocking instruction because undef cannot be optimized to poison.
This makes a few straightforward optimizations incorrect, such as:
```
; https://bugs.llvm.org/show_bug.cgi?id=44185
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> { 0, 6, 2, undef }
ret <4 x float> %r ; %r[3] is undef
}
=>
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%r = insertelement <4 x float> %y, float %x, i32 1
ret <4 x float> %r ; %r[3] = %y[3], incorrect if %y[3] = poison
}
Transformation doesn't verify!
ERROR: Target is more poisonous than source
```
I’d like to suggest
1. Using poison as insertelement’s placeholder value (IRBuilder::CreateVectorSplat should be patched too)
2. Updating shufflevector’s semantics to return poison element if mask is undef
Note that poison is currently lowered into UNDEF in SelDag, so codegen part is okay.
m_Undef() matches PoisonValue as well, so existing optimizations will still fire.
The only concern is hidden miscompilations that will go incorrect when poison constant is given.
A conservative way is copying all tests having `insertelement undef` & replacing it with `insertelement poison` & run Alive2 on it, but it will create many tests and people won’t like it. :(
Instead, I’ll simply locally maintain the tests and run Alive2.
If there is any bug found, I’ll report it.
Relevant links: https://bugs.llvm.org/show_bug.cgi?id=43958 , http://lists.llvm.org/pipermail/llvm-dev/2019-November/137242.html
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D93586
2020-12-28 07:58:15 +08:00
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], <i32 42, i32 poison, i32 poison, i32 poison>
2020-12-24 08:33:58 +08:00
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[TMP2]], [[Y:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splatx = shufflevector < 4 x i32 > %x , < 4 x i32 > p o i s o n , < 4 x i32 > < i32 0 , i32 0 , i32 undef , i32 undef >
2020-12-24 10:41:27 +08:00
%a = add < 4 x i32 > %y , < i32 42 , i32 42 , i32 42 , i32 42 >
%r = add < 4 x i32 > %splatx , %a
ret < 4 x i32 > %r
}
; Undefs in splat mask are replaced with defined splat index
define < 4 x i32 > @splat_assoc_add_undef_mask_elt_at_splat_index ( < 4 x i32 > %x , < 4 x i32 > %y ) {
; CHECK-LABEL: @splat_assoc_add_undef_mask_elt_at_splat_index(
[InstCombine] use poison as placeholder for undemanded elems
Currently undef is used as a don’t-care vector when constructing a vector using a series of insertelement.
However, this is problematic because undef isn’t undefined enough.
Especially, a sequence of insertelement can be optimized to shufflevector, but using undef as its placeholder makes shufflevector a poison-blocking instruction because undef cannot be optimized to poison.
This makes a few straightforward optimizations incorrect, such as:
```
; https://bugs.llvm.org/show_bug.cgi?id=44185
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> { 0, 6, 2, undef }
ret <4 x float> %r ; %r[3] is undef
}
=>
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%r = insertelement <4 x float> %y, float %x, i32 1
ret <4 x float> %r ; %r[3] = %y[3], incorrect if %y[3] = poison
}
Transformation doesn't verify!
ERROR: Target is more poisonous than source
```
I’d like to suggest
1. Using poison as insertelement’s placeholder value (IRBuilder::CreateVectorSplat should be patched too)
2. Updating shufflevector’s semantics to return poison element if mask is undef
Note that poison is currently lowered into UNDEF in SelDag, so codegen part is okay.
m_Undef() matches PoisonValue as well, so existing optimizations will still fire.
The only concern is hidden miscompilations that will go incorrect when poison constant is given.
A conservative way is copying all tests having `insertelement undef` & replacing it with `insertelement poison` & run Alive2 on it, but it will create many tests and people won’t like it. :(
Instead, I’ll simply locally maintain the tests and run Alive2.
If there is any bug found, I’ll report it.
Relevant links: https://bugs.llvm.org/show_bug.cgi?id=43958 , http://lists.llvm.org/pipermail/llvm-dev/2019-November/137242.html
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D93586
2020-12-28 07:58:15 +08:00
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], <i32 42, i32 poison, i32 poison, i32 poison>
2020-12-24 08:33:58 +08:00
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[TMP2]], [[Y:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splatx = shufflevector < 4 x i32 > %x , < 4 x i32 > p o i s o n , < 4 x i32 > < i32 undef , i32 0 , i32 0 , i32 0 >
2020-12-24 10:41:27 +08:00
%a = add < 4 x i32 > %y , < i32 42 , i32 42 , i32 42 , i32 42 >
%r = add < 4 x i32 > %splatx , %a
ret < 4 x i32 > %r
}
define < 4 x i32 > @splat_assoc_add_undef_constant_elts ( < 4 x i32 > %x , < 4 x i32 > %y ) {
; CHECK-LABEL: @splat_assoc_add_undef_constant_elts(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> zeroinitializer
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[Y:%.*]], <i32 42, i32 undef, i32 undef, i32 42>
; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[SPLATX]], [[A]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splatx = shufflevector < 4 x i32 > %x , < 4 x i32 > p o i s o n , < 4 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%a = add < 4 x i32 > %y , < i32 42 , i32 undef , i32 undef , i32 42 >
%r = add < 4 x i32 > %splatx , %a
ret < 4 x i32 > %r
}
define < 4 x i32 > @splat_assoc_add_undef_constant_elt_at_splat_index ( < 4 x i32 > %x , < 4 x i32 > %y ) {
; CHECK-LABEL: @splat_assoc_add_undef_constant_elt_at_splat_index(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> zeroinitializer
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[Y:%.*]], <i32 undef, i32 42, i32 undef, i32 42>
; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[SPLATX]], [[A]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splatx = shufflevector < 4 x i32 > %x , < 4 x i32 > p o i s o n , < 4 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%a = add < 4 x i32 > %y , < i32 undef , i32 42 , i32 undef , i32 42 >
%r = add < 4 x i32 > %splatx , %a
ret < 4 x i32 > %r
}
define < 4 x i32 > @splat_assoc_add_undef_mask_elts_undef_constant_elts ( < 4 x i32 > %x , < 4 x i32 > %y ) {
; CHECK-LABEL: @splat_assoc_add_undef_mask_elts_undef_constant_elts(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 0, i32 undef, i32 0, i32 undef>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[Y:%.*]], <i32 42, i32 undef, i32 undef, i32 42>
; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[SPLATX]], [[A]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splatx = shufflevector < 4 x i32 > %x , < 4 x i32 > p o i s o n , < 4 x i32 > < i32 0 , i32 undef , i32 0 , i32 undef >
2020-12-24 10:41:27 +08:00
%a = add < 4 x i32 > %y , < i32 42 , i32 undef , i32 undef , i32 42 >
%r = add < 4 x i32 > %splatx , %a
ret < 4 x i32 > %r
}
define < 4 x i32 > @splat_assoc_add_undef_mask_elt_at_splat_index_undef_constant_elts ( < 4 x i32 > %x , < 4 x i32 > %y ) {
; CHECK-LABEL: @splat_assoc_add_undef_mask_elt_at_splat_index_undef_constant_elts(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 undef, i32 0, i32 0, i32 0>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[Y:%.*]], <i32 42, i32 undef, i32 undef, i32 42>
; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[SPLATX]], [[A]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splatx = shufflevector < 4 x i32 > %x , < 4 x i32 > p o i s o n , < 4 x i32 > < i32 undef , i32 0 , i32 0 , i32 0 >
2020-12-24 10:41:27 +08:00
%a = add < 4 x i32 > %y , < i32 42 , i32 undef , i32 undef , i32 42 >
%r = add < 4 x i32 > %splatx , %a
ret < 4 x i32 > %r
}
define < 4 x i32 > @splat_assoc_add_undef_mask_elt_at_splat_index_undef_constant_elt_at_splat_index ( < 4 x i32 > %x , < 4 x i32 > %y ) {
; CHECK-LABEL: @splat_assoc_add_undef_mask_elt_at_splat_index_undef_constant_elt_at_splat_index(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 undef, i32 0, i32 0, i32 0>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[Y:%.*]], <i32 undef, i32 42, i32 undef, i32 42>
; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[SPLATX]], [[A]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splatx = shufflevector < 4 x i32 > %x , < 4 x i32 > p o i s o n , < 4 x i32 > < i32 undef , i32 0 , i32 0 , i32 0 >
2020-12-24 10:41:27 +08:00
%a = add < 4 x i32 > %y , < i32 undef , i32 42 , i32 undef , i32 42 >
%r = add < 4 x i32 > %splatx , %a
ret < 4 x i32 > %r
}
; Non-zero splat index; commute operands; FMF intersect
define < 2 x float > @splat_assoc_fmul ( < 2 x float > %x , < 2 x float > %y ) {
; CHECK-LABEL: @splat_assoc_fmul(
[InstCombine] use poison as placeholder for undemanded elems
Currently undef is used as a don’t-care vector when constructing a vector using a series of insertelement.
However, this is problematic because undef isn’t undefined enough.
Especially, a sequence of insertelement can be optimized to shufflevector, but using undef as its placeholder makes shufflevector a poison-blocking instruction because undef cannot be optimized to poison.
This makes a few straightforward optimizations incorrect, such as:
```
; https://bugs.llvm.org/show_bug.cgi?id=44185
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%xv = insertelement <4 x float> %q, float %x, i32 2
%r = shufflevector <4 x float> %y, <4 x float> %xv, <4 x i32> { 0, 6, 2, undef }
ret <4 x float> %r ; %r[3] is undef
}
=>
define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
%r = insertelement <4 x float> %y, float %x, i32 1
ret <4 x float> %r ; %r[3] = %y[3], incorrect if %y[3] = poison
}
Transformation doesn't verify!
ERROR: Target is more poisonous than source
```
I’d like to suggest
1. Using poison as insertelement’s placeholder value (IRBuilder::CreateVectorSplat should be patched too)
2. Updating shufflevector’s semantics to return poison element if mask is undef
Note that poison is currently lowered into UNDEF in SelDag, so codegen part is okay.
m_Undef() matches PoisonValue as well, so existing optimizations will still fire.
The only concern is hidden miscompilations that will go incorrect when poison constant is given.
A conservative way is copying all tests having `insertelement undef` & replacing it with `insertelement poison` & run Alive2 on it, but it will create many tests and people won’t like it. :(
Instead, I’ll simply locally maintain the tests and run Alive2.
If there is any bug found, I’ll report it.
Relevant links: https://bugs.llvm.org/show_bug.cgi?id=43958 , http://lists.llvm.org/pipermail/llvm-dev/2019-November/137242.html
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D93586
2020-12-28 07:58:15 +08:00
; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nsz <2 x float> [[X:%.*]], <float poison, float 3.000000e+00>
2020-12-24 08:33:58 +08:00
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> <i32 1, i32 1>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = fmul reassoc nsz <2 x float> [[TMP2]], [[Y:%.*]]
; CHECK-NEXT: ret <2 x float> [[R]]
;
2020-12-29 16:02:25 +08:00
%splatx = shufflevector < 2 x float > %x , < 2 x float > p o i s o n , < 2 x i32 > < i32 1 , i32 1 >
2020-12-24 10:41:27 +08:00
%a = fmul r e a s s o c nsz < 2 x float > %y , < float 3.0 , float 3.0 >
%r = fmul r e a s s o c nsz nnan < 2 x float > %a , %splatx
ret < 2 x float > %r
}
; Two splat shuffles; drop poison-generating flags
define < 3 x i8 > @splat_assoc_mul ( < 3 x i8 > %x , < 3 x i8 > %y , < 3 x i8 > %z ) {
; CHECK-LABEL: @splat_assoc_mul(
; CHECK-NEXT: [[TMP1:%.*]] = mul <3 x i8> [[Z:%.*]], [[X:%.*]]
2020-12-24 08:33:58 +08:00
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> poison, <3 x i32> <i32 2, i32 2, i32 2>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = mul <3 x i8> [[TMP2]], [[Y:%.*]]
; CHECK-NEXT: ret <3 x i8> [[R]]
;
2020-12-29 16:02:25 +08:00
%splatx = shufflevector < 3 x i8 > %x , < 3 x i8 > p o i s o n , < 3 x i32 > < i32 2 , i32 2 , i32 2 >
%splatz = shufflevector < 3 x i8 > %z , < 3 x i8 > p o i s o n , < 3 x i32 > < i32 2 , i32 2 , i32 2 >
2020-12-24 10:41:27 +08:00
%a = mul nsw < 3 x i8 > %y , %splatz
%r = mul < 3 x i8 > %a , %splatx
ret < 3 x i8 > %r
}
define < 3 x i8 > @splat_assoc_mul_undef_elt1 ( < 3 x i8 > %x , < 3 x i8 > %y , < 3 x i8 > %z ) {
; CHECK-LABEL: @splat_assoc_mul_undef_elt1(
; CHECK-NEXT: [[TMP1:%.*]] = mul <3 x i8> [[Z:%.*]], [[X:%.*]]
2020-12-24 08:33:58 +08:00
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> poison, <3 x i32> <i32 2, i32 2, i32 2>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = mul <3 x i8> [[TMP2]], [[Y:%.*]]
; CHECK-NEXT: ret <3 x i8> [[R]]
;
2020-12-29 16:02:25 +08:00
%splatx = shufflevector < 3 x i8 > %x , < 3 x i8 > p o i s o n , < 3 x i32 > < i32 undef , i32 2 , i32 2 >
%splatz = shufflevector < 3 x i8 > %z , < 3 x i8 > p o i s o n , < 3 x i32 > < i32 2 , i32 2 , i32 2 >
2020-12-24 10:41:27 +08:00
%a = mul nsw < 3 x i8 > %y , %splatz
%r = mul nsw nuw < 3 x i8 > %a , %splatx
ret < 3 x i8 > %r
}
define < 3 x i8 > @splat_assoc_mul_undef_elt2 ( < 3 x i8 > %x , < 3 x i8 > %y , < 3 x i8 > %z ) {
; CHECK-LABEL: @splat_assoc_mul_undef_elt2(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <3 x i32> <i32 2, i32 2, i32 2>
; CHECK-NEXT: [[SPLATZ:%.*]] = shufflevector <3 x i8> [[Z:%.*]], <3 x i8> poison, <3 x i32> <i32 undef, i32 2, i32 2>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[A:%.*]] = mul nsw <3 x i8> [[SPLATZ]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = mul nuw nsw <3 x i8> [[A]], [[SPLATX]]
; CHECK-NEXT: ret <3 x i8> [[R]]
;
2020-12-29 16:02:25 +08:00
%splatx = shufflevector < 3 x i8 > %x , < 3 x i8 > p o i s o n , < 3 x i32 > < i32 2 , i32 2 , i32 2 >
%splatz = shufflevector < 3 x i8 > %z , < 3 x i8 > p o i s o n , < 3 x i32 > < i32 undef , i32 2 , i32 2 >
2020-12-24 10:41:27 +08:00
%a = mul nsw < 3 x i8 > %y , %splatz
%r = mul nsw nuw < 3 x i8 > %a , %splatx
ret < 3 x i8 > %r
}
define < 3 x i8 > @splat_assoc_mul_undef_elt_at_splat_index1 ( < 3 x i8 > %x , < 3 x i8 > %y , < 3 x i8 > %z ) {
; CHECK-LABEL: @splat_assoc_mul_undef_elt_at_splat_index1(
; CHECK-NEXT: [[TMP1:%.*]] = mul <3 x i8> [[Z:%.*]], [[X:%.*]]
2020-12-24 08:33:58 +08:00
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> poison, <3 x i32> <i32 2, i32 2, i32 2>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[R:%.*]] = mul <3 x i8> [[TMP2]], [[Y:%.*]]
; CHECK-NEXT: ret <3 x i8> [[R]]
;
2020-12-29 16:02:25 +08:00
%splatx = shufflevector < 3 x i8 > %x , < 3 x i8 > p o i s o n , < 3 x i32 > < i32 2 , i32 2 , i32 undef >
%splatz = shufflevector < 3 x i8 > %z , < 3 x i8 > p o i s o n , < 3 x i32 > < i32 2 , i32 2 , i32 2 >
2020-12-24 10:41:27 +08:00
%a = mul nsw < 3 x i8 > %y , %splatz
%r = mul nsw nuw < 3 x i8 > %a , %splatx
ret < 3 x i8 > %r
}
define < 3 x i8 > @splat_assoc_mul_undef_elt_at_splat_index2 ( < 3 x i8 > %x , < 3 x i8 > %y , < 3 x i8 > %z ) {
; CHECK-LABEL: @splat_assoc_mul_undef_elt_at_splat_index2(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <3 x i32> <i32 2, i32 2, i32 2>
; CHECK-NEXT: [[SPLATZ:%.*]] = shufflevector <3 x i8> [[Z:%.*]], <3 x i8> poison, <3 x i32> <i32 2, i32 2, i32 undef>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[A:%.*]] = mul nsw <3 x i8> [[SPLATZ]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = mul nuw nsw <3 x i8> [[A]], [[SPLATX]]
; CHECK-NEXT: ret <3 x i8> [[R]]
;
2020-12-29 16:02:25 +08:00
%splatx = shufflevector < 3 x i8 > %x , < 3 x i8 > p o i s o n , < 3 x i32 > < i32 2 , i32 2 , i32 2 >
%splatz = shufflevector < 3 x i8 > %z , < 3 x i8 > p o i s o n , < 3 x i32 > < i32 2 , i32 2 , i32 undef >
2020-12-24 10:41:27 +08:00
%a = mul nsw < 3 x i8 > %y , %splatz
%r = mul nsw nuw < 3 x i8 > %a , %splatx
ret < 3 x i8 > %r
}
; Negative test - mismatched splat elements
define < 3 x i8 > @splat_assoc_or ( < 3 x i8 > %x , < 3 x i8 > %y , < 3 x i8 > %z ) {
; CHECK-LABEL: @splat_assoc_or(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <3 x i32> <i32 1, i32 1, i32 1>
; CHECK-NEXT: [[SPLATZ:%.*]] = shufflevector <3 x i8> [[Z:%.*]], <3 x i8> poison, <3 x i32> <i32 2, i32 2, i32 2>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[A:%.*]] = or <3 x i8> [[SPLATZ]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = or <3 x i8> [[A]], [[SPLATX]]
; CHECK-NEXT: ret <3 x i8> [[R]]
;
2020-12-29 16:02:25 +08:00
%splatx = shufflevector < 3 x i8 > %x , < 3 x i8 > p o i s o n , < 3 x i32 > < i32 1 , i32 1 , i32 1 >
%splatz = shufflevector < 3 x i8 > %z , < 3 x i8 > p o i s o n , < 3 x i32 > < i32 2 , i32 2 , i32 2 >
2020-12-24 10:41:27 +08:00
%a = or < 3 x i8 > %y , %splatz
%r = or < 3 x i8 > %a , %splatx
ret < 3 x i8 > %r
}
; Negative test - not associative
define < 2 x float > @splat_assoc_fdiv ( < 2 x float > %x , < 2 x float > %y ) {
; CHECK-LABEL: @splat_assoc_fdiv(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> zeroinitializer
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[A:%.*]] = fdiv reassoc nsz <2 x float> [[Y:%.*]], <float 3.000000e+00, float 3.000000e+00>
; CHECK-NEXT: [[R:%.*]] = fdiv reassoc nsz <2 x float> [[A]], [[SPLATX]]
; CHECK-NEXT: ret <2 x float> [[R]]
;
2020-12-29 16:02:25 +08:00
%splatx = shufflevector < 2 x float > %x , < 2 x float > p o i s o n , < 2 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%a = fdiv r e a s s o c nsz < 2 x float > %y , < float 3.0 , float 3.0 >
%r = fdiv r e a s s o c nsz < 2 x float > %a , %splatx
ret < 2 x float > %r
}
; Negative test - extra use
define < 2 x float > @splat_assoc_fadd ( < 2 x float > %x , < 2 x float > %y ) {
; CHECK-LABEL: @splat_assoc_fadd(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 1>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[A:%.*]] = fadd fast <2 x float> [[Y:%.*]], <float 3.000000e+00, float 3.000000e+00>
; CHECK-NEXT: call void @use(<2 x float> [[A]])
; CHECK-NEXT: [[R:%.*]] = fadd fast <2 x float> [[A]], [[SPLATX]]
; CHECK-NEXT: ret <2 x float> [[R]]
;
2020-12-29 16:02:25 +08:00
%splatx = shufflevector < 2 x float > %x , < 2 x float > p o i s o n , < 2 x i32 > < i32 1 , i32 1 >
2020-12-24 10:41:27 +08:00
%a = fadd fast < 2 x float > %y , < float 3.0 , float 3.0 >
call void @use ( < 2 x float > %a )
%r = fadd fast < 2 x float > %a , %splatx
ret < 2 x float > %r
}
; Negative test - narrowing splat
define < 3 x i32 > @splat_assoc_and ( < 4 x i32 > %x , < 3 x i32 > %y ) {
; CHECK-LABEL: @splat_assoc_and(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <3 x i32> zeroinitializer
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[A:%.*]] = and <3 x i32> [[Y:%.*]], <i32 42, i32 42, i32 42>
; CHECK-NEXT: [[R:%.*]] = and <3 x i32> [[SPLATX]], [[A]]
; CHECK-NEXT: ret <3 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splatx = shufflevector < 4 x i32 > %x , < 4 x i32 > p o i s o n , < 3 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%a = and < 3 x i32 > %y , < i32 42 , i32 42 , i32 42 >
%r = and < 3 x i32 > %splatx , %a
ret < 3 x i32 > %r
}
; Negative test - widening splat
define < 5 x i32 > @splat_assoc_xor ( < 4 x i32 > %x , < 5 x i32 > %y ) {
; CHECK-LABEL: @splat_assoc_xor(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <5 x i32> zeroinitializer
2020-12-24 17:29:16 +08:00
; CHECK-NEXT: [[TMP1:%.*]] = xor <5 x i32> [[SPLATX]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = xor <5 x i32> [[TMP1]], <i32 42, i32 42, i32 42, i32 42, i32 42>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: ret <5 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splatx = shufflevector < 4 x i32 > %x , < 4 x i32 > p o i s o n , < 5 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%a = xor < 5 x i32 > %y , < i32 42 , i32 42 , i32 42 , i32 42 , i32 42 >
%r = xor < 5 x i32 > %splatx , %a
ret < 5 x i32 > %r
}
; Negative test - opcode mismatch
define < 4 x i32 > @splat_assoc_add_mul ( < 4 x i32 > %x , < 4 x i32 > %y ) {
; CHECK-LABEL: @splat_assoc_add_mul(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> zeroinitializer
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[Y:%.*]], <i32 42, i32 42, i32 42, i32 42>
; CHECK-NEXT: [[R:%.*]] = mul <4 x i32> [[SPLATX]], [[A]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
2020-12-29 16:02:25 +08:00
%splatx = shufflevector < 4 x i32 > %x , < 4 x i32 > p o i s o n , < 4 x i32 > zeroinitializer
2020-12-24 10:41:27 +08:00
%a = add < 4 x i32 > %y , < i32 42 , i32 42 , i32 42 , i32 42 >
%r = mul < 4 x i32 > %splatx , %a
ret < 4 x i32 > %r
}
; Do not crash on constant expressions.
define < 4 x i32 > @PR46872 ( < 4 x i32 > %x ) {
; CHECK-LABEL: @PR46872(
2020-12-29 16:02:25 +08:00
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 undef, i32 0, i32 1, i32 1>
2020-12-24 10:41:27 +08:00
; CHECK-NEXT: [[A:%.*]] = and <4 x i32> [[S]], bitcast (<2 x i64> <i64 ptrtoint (<4 x i32> (<4 x i32>)* @PR46872 to i64), i64 ptrtoint (<4 x i32> (<4 x i32>)* @PR46872 to i64)> to <4 x i32>)
; CHECK-NEXT: ret <4 x i32> [[A]]
;
2020-12-29 16:02:25 +08:00
%s = shufflevector < 4 x i32 > %x , < 4 x i32 > p o i s o n , < 4 x i32 > < i32 undef , i32 0 , i32 1 , i32 1 >
2020-12-24 10:41:27 +08:00
%a = and < 4 x i32 > %s , bitcast ( < 2 x i64 > < i64 ptrtoint ( < 4 x i32 > ( < 4 x i32 > ) * @PR46872 to i64 ) , i64 ptrtoint ( < 4 x i32 > ( < 4 x i32 > ) * @PR46872 to i64 ) > to < 4 x i32 > )
ret < 4 x i32 > %a
}