2019-04-17 12:52:47 +08:00
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
define < 4 x float > @test1 ( < 4 x float > %v1 ) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: ret <4 x float> [[V1:%.*]]
;
%v2 = shufflevector < 4 x float > %v1 , < 4 x float > undef , < 4 x i32 > < i32 0 , i32 1 , i32 2 , i32 3 >
ret < 4 x float > %v2
}
define < 4 x float > @test2 ( < 4 x float > %v1 ) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: ret <4 x float> [[V1:%.*]]
;
%v2 = shufflevector < 4 x float > %v1 , < 4 x float > %v1 , < 4 x i32 > < i32 0 , i32 5 , i32 2 , i32 7 >
ret < 4 x float > %v2
}
define float @test3 ( < 4 x float > %A , < 4 x float > %B , float %f ) {
; CHECK-LABEL: @test3(
; CHECK-NEXT: ret float [[F:%.*]]
;
%C = insertelement < 4 x float > %A , float %f , i32 0
%D = shufflevector < 4 x float > %C , < 4 x float > %B , < 4 x i32 > < i32 5 , i32 0 , i32 2 , i32 7 >
%E = extractelement < 4 x float > %D , i32 1
ret float %E
}
define i32 @test4 ( < 4 x i32 > %X ) {
; CHECK-LABEL: @test4(
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
; CHECK-NEXT: ret i32 [[R]]
;
%t = shufflevector < 4 x i32 > %X , < 4 x i32 > undef , < 4 x i32 > zeroinitializer
%r = extractelement < 4 x i32 > %t , i32 0
ret i32 %r
}
define i32 @test5 ( < 4 x i32 > %X ) {
; CHECK-LABEL: @test5(
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 3
; CHECK-NEXT: ret i32 [[R]]
;
%t = shufflevector < 4 x i32 > %X , < 4 x i32 > undef , < 4 x i32 > < i32 3 , i32 2 , i32 undef , i32 undef >
%r = extractelement < 4 x i32 > %t , i32 0
ret i32 %r
}
define float @test6 ( < 4 x float > %X ) {
; CHECK-LABEL: @test6(
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0
; CHECK-NEXT: ret float [[R]]
;
%X1 = bitcast < 4 x float > %X to < 4 x i32 >
%t = shufflevector < 4 x i32 > %X1 , < 4 x i32 > undef , < 4 x i32 > zeroinitializer
%t2 = bitcast < 4 x i32 > %t to < 4 x float >
%r = extractelement < 4 x float > %t2 , i32 0
ret float %r
}
define < 4 x float > @test7 ( < 4 x float > %x ) {
; CHECK-LABEL: @test7(
2019-11-24 23:06:26 +08:00
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: ret <4 x float> [[R]]
2019-04-17 12:52:47 +08:00
;
%r = shufflevector < 4 x float > %x , < 4 x float > undef , < 4 x i32 > < i32 0 , i32 1 , i32 6 , i32 7 >
ret < 4 x float > %r
}
; This should turn into a single shuffle.
define < 4 x float > @test8 ( < 4 x float > %x , < 4 x float > %y ) {
; CHECK-LABEL: @test8(
; CHECK-NEXT: [[T134:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x i32> <i32 1, i32 undef, i32 3, i32 4>
; CHECK-NEXT: ret <4 x float> [[T134]]
;
%t4 = extractelement < 4 x float > %x , i32 1
%t2 = extractelement < 4 x float > %x , i32 3
%t1 = extractelement < 4 x float > %y , i32 0
%t128 = insertelement < 4 x float > undef , float %t4 , i32 0
%t130 = insertelement < 4 x float > %t128 , float undef , i32 1
%t132 = insertelement < 4 x float > %t130 , float %t2 , i32 2
%t134 = insertelement < 4 x float > %t132 , float %t1 , i32 3
ret < 4 x float > %t134
}
; Test fold of two shuffles where the first shuffle vectors inputs are a
; different length then the second.
define < 4 x i8 > @test9 ( < 16 x i8 > %t6 ) {
; CHECK-LABEL: @test9(
; CHECK-NEXT: [[T9:%.*]] = shufflevector <16 x i8> [[T6:%.*]], <16 x i8> undef, <4 x i32> <i32 13, i32 9, i32 4, i32 13>
; CHECK-NEXT: ret <4 x i8> [[T9]]
;
%t7 = shufflevector < 16 x i8 > %t6 , < 16 x i8 > undef , < 4 x i32 > < i32 13 , i32 9 , i32 4 , i32 13 >
%t9 = shufflevector < 4 x i8 > %t7 , < 4 x i8 > undef , < 4 x i32 > < i32 3 , i32 1 , i32 2 , i32 0 >
ret < 4 x i8 > %t9
}
; Same as test9, but make sure that "undef" mask values are not confused with
; mask values of 2*N, where N is the mask length. These shuffles should not
; be folded (because [8,9,4,8] may not be a mask supported by the target).
define < 4 x i8 > @test9a ( < 16 x i8 > %t6 ) {
; CHECK-LABEL: @test9a(
; CHECK-NEXT: [[T7:%.*]] = shufflevector <16 x i8> [[T6:%.*]], <16 x i8> undef, <4 x i32> <i32 undef, i32 9, i32 4, i32 8>
; CHECK-NEXT: [[T9:%.*]] = shufflevector <4 x i8> [[T7]], <4 x i8> undef, <4 x i32> <i32 3, i32 1, i32 2, i32 undef>
; CHECK-NEXT: ret <4 x i8> [[T9]]
;
%t7 = shufflevector < 16 x i8 > %t6 , < 16 x i8 > undef , < 4 x i32 > < i32 undef , i32 9 , i32 4 , i32 8 >
%t9 = shufflevector < 4 x i8 > %t7 , < 4 x i8 > undef , < 4 x i32 > < i32 3 , i32 1 , i32 2 , i32 0 >
ret < 4 x i8 > %t9
}
; Test fold of two shuffles where the first shuffle vectors inputs are a
; different length then the second.
define < 4 x i8 > @test9b ( < 4 x i8 > %t6 , < 4 x i8 > %t7 ) {
; CHECK-LABEL: @test9b(
; CHECK-NEXT: [[T9:%.*]] = shufflevector <4 x i8> [[T6:%.*]], <4 x i8> [[T7:%.*]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: ret <4 x i8> [[T9]]
;
%t1 = shufflevector < 4 x i8 > %t6 , < 4 x i8 > %t7 , < 8 x i32 > < i32 0 , i32 1 , i32 4 , i32 5 , i32 4 , i32 5 , i32 2 , i32 3 >
%t9 = shufflevector < 8 x i8 > %t1 , < 8 x i8 > undef , < 4 x i32 > < i32 0 , i32 1 , i32 4 , i32 5 >
ret < 4 x i8 > %t9
}
; Redundant vector splats should be removed. Radar 8597790.
define < 4 x i32 > @test10 ( < 4 x i32 > %t5 ) {
; CHECK-LABEL: @test10(
; CHECK-NEXT: [[T7:%.*]] = shufflevector <4 x i32> [[T5:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: ret <4 x i32> [[T7]]
;
%t6 = shufflevector < 4 x i32 > %t5 , < 4 x i32 > undef , < 4 x i32 > < i32 1 , i32 undef , i32 undef , i32 undef >
%t7 = shufflevector < 4 x i32 > %t6 , < 4 x i32 > undef , < 4 x i32 > zeroinitializer
ret < 4 x i32 > %t7
}
; Test fold of two shuffles where the two shufflevector inputs's op1 are the same.
define < 8 x i8 > @test11 ( < 16 x i8 > %t6 ) {
; CHECK-LABEL: @test11(
; CHECK-NEXT: [[T3:%.*]] = shufflevector <16 x i8> [[T6:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: ret <8 x i8> [[T3]]
;
%t1 = shufflevector < 16 x i8 > %t6 , < 16 x i8 > undef , < 4 x i32 > < i32 0 , i32 1 , i32 2 , i32 3 >
%t2 = shufflevector < 16 x i8 > %t6 , < 16 x i8 > undef , < 4 x i32 > < i32 4 , i32 5 , i32 6 , i32 7 >
%t3 = shufflevector < 4 x i8 > %t1 , < 4 x i8 > %t2 , < 8 x i32 > < i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 >
ret < 8 x i8 > %t3
}
; Test fold of two shuffles where the first shufflevector's inputs are the same as the second.
define < 8 x i8 > @test12 ( < 8 x i8 > %t6 , < 8 x i8 > %t2 ) {
; CHECK-LABEL: @test12(
; CHECK-NEXT: [[T3:%.*]] = shufflevector <8 x i8> [[T6:%.*]], <8 x i8> [[T2:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 9, i32 8, i32 11, i32 12>
; CHECK-NEXT: ret <8 x i8> [[T3]]
;
%t1 = shufflevector < 8 x i8 > %t6 , < 8 x i8 > undef , < 8 x i32 > < i32 0 , i32 1 , i32 2 , i32 3 , i32 5 , i32 4 , i32 undef , i32 7 >
%t3 = shufflevector < 8 x i8 > %t1 , < 8 x i8 > %t2 , < 8 x i32 > < i32 0 , i32 1 , i32 2 , i32 3 , i32 9 , i32 8 , i32 11 , i32 12 >
ret < 8 x i8 > %t3
}
; Test fold of two shuffles where the first shufflevector's inputs are the same as the second.
define < 8 x i8 > @test12a ( < 8 x i8 > %t6 , < 8 x i8 > %t2 ) {
; CHECK-LABEL: @test12a(
; CHECK-NEXT: [[T3:%.*]] = shufflevector <8 x i8> [[T2:%.*]], <8 x i8> [[T6:%.*]], <8 x i32> <i32 0, i32 3, i32 1, i32 4, i32 8, i32 9, i32 10, i32 11>
; CHECK-NEXT: ret <8 x i8> [[T3]]
;
%t1 = shufflevector < 8 x i8 > %t6 , < 8 x i8 > undef , < 8 x i32 > < i32 0 , i32 1 , i32 2 , i32 3 , i32 5 , i32 4 , i32 undef , i32 7 >
%t3 = shufflevector < 8 x i8 > %t2 , < 8 x i8 > %t1 , < 8 x i32 > < i32 0 , i32 3 , i32 1 , i32 4 , i32 8 , i32 9 , i32 10 , i32 11 >
ret < 8 x i8 > %t3
}
; The mask length of the 1st shuffle can be reduced to eliminate the 2nd shuffle.
define < 2 x i8 > @extract_subvector_of_shuffle ( < 2 x i8 > %x , < 2 x i8 > %y ) {
; CHECK-LABEL: @extract_subvector_of_shuffle(
; CHECK-NEXT: [[EXTRACT_SUBV:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: ret <2 x i8> [[EXTRACT_SUBV]]
;
%shuf = shufflevector < 2 x i8 > %x , < 2 x i8 > %y , < 3 x i32 > < i32 0 , i32 2 , i32 0 >
%extract_subv = shufflevector < 3 x i8 > %shuf , < 3 x i8 > undef , < 2 x i32 > < i32 0 , i32 1 >
ret < 2 x i8 > %extract_subv
}
; Undef elements in either mask are ok. Undefs from the 2nd shuffle mask should propagate to the new shuffle.
; The type of the inputs does not have to match the output type.
define < 4 x i8 > @extract_subvector_of_shuffle_undefs_types ( < 2 x i8 > %x , < 2 x i8 > %y ) {
; CHECK-LABEL: @extract_subvector_of_shuffle_undefs_types(
; CHECK-NEXT: [[EXTRACT_SUBV:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <4 x i32> <i32 undef, i32 2, i32 0, i32 undef>
; CHECK-NEXT: ret <4 x i8> [[EXTRACT_SUBV]]
;
%shuf = shufflevector < 2 x i8 > %x , < 2 x i8 > %y , < 5 x i32 > < i32 undef , i32 2 , i32 0 , i32 1 , i32 0 >
%extract_subv = shufflevector < 5 x i8 > %shuf , < 5 x i8 > undef , < 4 x i32 > < i32 0 , i32 1 , i32 2 , i32 undef >
ret < 4 x i8 > %extract_subv
}
; Extra uses are not ok - we only do the transform when we can eliminate an instruction.
declare void @use_v5i8 ( < 5 x i8 > )
define < 4 x i8 > @extract_subvector_of_shuffle_extra_use ( < 2 x i8 > %x , < 2 x i8 > %y ) {
; CHECK-LABEL: @extract_subvector_of_shuffle_extra_use(
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <5 x i32> <i32 undef, i32 2, i32 0, i32 1, i32 0>
; CHECK-NEXT: call void @use_v5i8(<5 x i8> [[SHUF]])
; CHECK-NEXT: [[EXTRACT_SUBV:%.*]] = shufflevector <5 x i8> [[SHUF]], <5 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
; CHECK-NEXT: ret <4 x i8> [[EXTRACT_SUBV]]
;
%shuf = shufflevector < 2 x i8 > %x , < 2 x i8 > %y , < 5 x i32 > < i32 undef , i32 2 , i32 0 , i32 1 , i32 0 >
call void @use_v5i8 ( < 5 x i8 > %shuf )
%extract_subv = shufflevector < 5 x i8 > %shuf , < 5 x i8 > undef , < 4 x i32 > < i32 0 , i32 1 , i32 2 , i32 undef >
ret < 4 x i8 > %extract_subv
}
define < 2 x i8 > @test13a ( i8 %x1 , i8 %x2 ) {
; CHECK-LABEL: @test13a(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> undef, i8 [[X1:%.*]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i8> [[TMP1]], i8 [[X2:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i8> [[TMP2]], <i8 7, i8 5>
; CHECK-NEXT: ret <2 x i8> [[TMP3]]
;
%A = insertelement < 2 x i8 > undef , i8 %x1 , i32 0
%B = insertelement < 2 x i8 > %A , i8 %x2 , i32 1
%C = add < 2 x i8 > %B , < i8 5 , i8 7 >
%D = shufflevector < 2 x i8 > %C , < 2 x i8 > undef , < 2 x i32 > < i32 1 , i32 0 >
ret < 2 x i8 > %D
}
; Increasing length of vector ops is not a good canonicalization.
define < 3 x i32 > @add_wider ( i32 %y , i32 %z ) {
; CHECK-LABEL: @add_wider(
; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0
; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x i32> [[I0]], i32 [[Z:%.*]], i32 1
; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[I1]], <i32 255, i32 255>
; CHECK-NEXT: [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> undef, <3 x i32> <i32 0, i32 1, i32 undef>
; CHECK-NEXT: ret <3 x i32> [[EXT]]
;
%i0 = insertelement < 2 x i32 > undef , i32 %y , i32 0
%i1 = insertelement < 2 x i32 > %i0 , i32 %z , i32 1
%a = add < 2 x i32 > %i1 , < i32 255 , i32 255 >
%ext = shufflevector < 2 x i32 > %a , < 2 x i32 > undef , < 3 x i32 > < i32 0 , i32 1 , i32 undef >
ret < 3 x i32 > %ext
}
; Increasing length of vector ops must be safe from illegal undef propagation.
define < 3 x i32 > @div_wider ( i32 %y , i32 %z ) {
; CHECK-LABEL: @div_wider(
; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i32 0
; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x i32> [[I0]], i32 [[Z:%.*]], i32 1
; CHECK-NEXT: [[A:%.*]] = sdiv <2 x i32> [[I1]], <i32 255, i32 255>
; CHECK-NEXT: [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> undef, <3 x i32> <i32 0, i32 1, i32 undef>
; CHECK-NEXT: ret <3 x i32> [[EXT]]
;
%i0 = insertelement < 2 x i32 > undef , i32 %y , i32 0
%i1 = insertelement < 2 x i32 > %i0 , i32 %z , i32 1
%a = sdiv < 2 x i32 > %i1 , < i32 255 , i32 255 >
%ext = shufflevector < 2 x i32 > %a , < 2 x i32 > undef , < 3 x i32 > < i32 0 , i32 1 , i32 undef >
ret < 3 x i32 > %ext
}
; Increasing length of insertelements (no math ops) is a good canonicalization.
define < 3 x i8 > @fold_inselts_with_widening_shuffle ( i8 %x , i8 %y ) {
; CHECK-LABEL: @fold_inselts_with_widening_shuffle(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i8> undef, i8 [[X:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <3 x i8> [[TMP1]], i8 [[Y:%.*]], i32 1
; CHECK-NEXT: ret <3 x i8> [[TMP2]]
;
%ins0 = insertelement < 2 x i8 > undef , i8 %x , i32 0
%ins1 = insertelement < 2 x i8 > %ins0 , i8 %y , i32 1
%widen = shufflevector < 2 x i8 > %ins1 , < 2 x i8 > undef , < 3 x i32 > < i32 0 , i32 1 , i32 undef >
ret < 3 x i8 > %widen
}
define < 2 x i8 > @test13b ( i8 %x ) {
; CHECK-LABEL: @test13b(
; CHECK-NEXT: [[B:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 1
; CHECK-NEXT: ret <2 x i8> [[B]]
;
%A = insertelement < 2 x i8 > undef , i8 %x , i32 0
%B = shufflevector < 2 x i8 > %A , < 2 x i8 > undef , < 2 x i32 > < i32 undef , i32 0 >
ret < 2 x i8 > %B
}
define < 2 x i8 > @test13c ( i8 %x1 , i8 %x2 ) {
; CHECK-LABEL: @test13c(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> undef, i8 [[X1:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i8> [[TMP1]], i8 [[X2:%.*]], i32 1
; CHECK-NEXT: ret <2 x i8> [[TMP2]]
;
%A = insertelement < 4 x i8 > undef , i8 %x1 , i32 0
%B = insertelement < 4 x i8 > %A , i8 %x2 , i32 2
%C = shufflevector < 4 x i8 > %B , < 4 x i8 > undef , < 2 x i32 > < i32 0 , i32 2 >
ret < 2 x i8 > %C
}
define void @test14 ( i16 %conv10 ) {
; CHECK-LABEL: @test14(
; CHECK-NEXT: store <4 x i16> <i16 undef, i16 undef, i16 undef, i16 23>, <4 x i16>* undef, align 8
; CHECK-NEXT: ret void
;
%t = alloca < 4 x i16 > , align 8
%vecinit6 = insertelement < 4 x i16 > undef , i16 23 , i32 3
store < 4 x i16 > %vecinit6 , < 4 x i16 > * undef
%t1 = load < 4 x i16 > , < 4 x i16 > * undef
%vecinit11 = insertelement < 4 x i16 > undef , i16 %conv10 , i32 3
%div = udiv < 4 x i16 > %t1 , %vecinit11
store < 4 x i16 > %div , < 4 x i16 > * %t
%t4 = load < 4 x i16 > , < 4 x i16 > * %t
%t5 = shufflevector < 4 x i16 > %t4 , < 4 x i16 > undef , < 2 x i32 > < i32 2 , i32 0 >
%cmp = icmp ule < 2 x i16 > %t5 , undef
%sext = sext < 2 x i1 > %cmp to < 2 x i16 >
ret void
}
; Check that sequences of insert/extract element are
; collapsed into valid shuffle instruction with correct shuffle indexes.
define < 4 x float > @test15a ( < 4 x float > %LHS , < 4 x float > %RHS ) {
; CHECK-LABEL: @test15a(
; CHECK-NEXT: [[T4:%.*]] = shufflevector <4 x float> [[LHS:%.*]], <4 x float> [[RHS:%.*]], <4 x i32> <i32 4, i32 0, i32 6, i32 6>
; CHECK-NEXT: ret <4 x float> [[T4]]
;
%t1 = extractelement < 4 x float > %LHS , i32 0
%t2 = insertelement < 4 x float > %RHS , float %t1 , i32 1
%t3 = extractelement < 4 x float > %RHS , i32 2
%t4 = insertelement < 4 x float > %t2 , float %t3 , i32 3
ret < 4 x float > %t4
}
define < 4 x float > @test15b ( < 4 x float > %LHS , < 4 x float > %RHS ) {
; CHECK-LABEL: @test15b(
; CHECK-NEXT: [[T5:%.*]] = shufflevector <4 x float> [[LHS:%.*]], <4 x float> [[RHS:%.*]], <4 x i32> <i32 4, i32 3, i32 6, i32 6>
; CHECK-NEXT: ret <4 x float> [[T5]]
;
%t0 = extractelement < 4 x float > %LHS , i32 3
%t1 = insertelement < 4 x float > %RHS , float %t0 , i32 0
%t2 = extractelement < 4 x float > %t1 , i32 0
%t3 = insertelement < 4 x float > %RHS , float %t2 , i32 1
%t4 = extractelement < 4 x float > %RHS , i32 2
%t5 = insertelement < 4 x float > %t3 , float %t4 , i32 3
ret < 4 x float > %t5
}
define < 1 x i32 > @test16a ( i32 %ele ) {
; CHECK-LABEL: @test16a(
; CHECK-NEXT: ret <1 x i32> <i32 2>
;
%t0 = insertelement < 2 x i32 > < i32 1 , i32 undef > , i32 %ele , i32 1
%t1 = shl < 2 x i32 > %t0 , < i32 1 , i32 1 >
%t2 = shufflevector < 2 x i32 > %t1 , < 2 x i32 > undef , < 1 x i32 > < i32 0 >
ret < 1 x i32 > %t2
}
define < 4 x i8 > @test16b ( i8 %ele ) {
; CHECK-LABEL: @test16b(
; CHECK-NEXT: ret <4 x i8> <i8 2, i8 2, i8 2, i8 2>
;
%t0 = insertelement < 8 x i8 > < i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 undef , i8 1 > , i8 %ele , i32 6
%t1 = shl < 8 x i8 > %t0 , < i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 , i8 1 >
%t2 = shufflevector < 8 x i8 > %t1 , < 8 x i8 > undef , < 4 x i32 > < i32 1 , i32 2 , i32 3 , i32 4 >
ret < 4 x i8 > %t2
}
; If composition of two shuffles is identity, shuffles can be removed.
define < 4 x i32 > @shuffle_17ident ( < 4 x i32 > %v ) {
; CHECK-LABEL: @shuffle_17ident(
; CHECK-NEXT: ret <4 x i32> [[V:%.*]]
;
%shuffle = shufflevector < 4 x i32 > %v , < 4 x i32 > zeroinitializer , < 4 x i32 > < i32 1 , i32 2 , i32 3 , i32 0 >
%shuffle2 = shufflevector < 4 x i32 > %shuffle , < 4 x i32 > zeroinitializer , < 4 x i32 > < i32 3 , i32 0 , i32 1 , i32 2 >
ret < 4 x i32 > %shuffle2
}
; swizzle can be put after operation
define < 4 x i32 > @shuffle_17and ( < 4 x i32 > %v1 , < 4 x i32 > %v2 ) {
; CHECK-LABEL: @shuffle_17and(
; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[V1:%.*]], [[V2:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%t1 = shufflevector < 4 x i32 > %v1 , < 4 x i32 > zeroinitializer , < 4 x i32 > < i32 1 , i32 2 , i32 3 , i32 0 >
%t2 = shufflevector < 4 x i32 > %v2 , < 4 x i32 > zeroinitializer , < 4 x i32 > < i32 1 , i32 2 , i32 3 , i32 0 >
%r = and < 4 x i32 > %t1 , %t2
ret < 4 x i32 > %r
}
declare void @use ( < 2 x float > )
; One extra use is ok to transform.
define < 2 x float > @shuffle_fadd_multiuse ( < 2 x float > %v1 , < 2 x float > %v2 ) {
; CHECK-LABEL: @shuffle_fadd_multiuse(
; CHECK-NEXT: [[T1:%.*]] = shufflevector <2 x float> [[V1:%.*]], <2 x float> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[V1]], [[V2:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: call void @use(<2 x float> [[T1]])
; CHECK-NEXT: ret <2 x float> [[R]]
;
%t1 = shufflevector < 2 x float > %v1 , < 2 x float > undef , < 2 x i32 > < i32 1 , i32 0 >
%t2 = shufflevector < 2 x float > %v2 , < 2 x float > undef , < 2 x i32 > < i32 1 , i32 0 >
%r = fadd < 2 x float > %t1 , %t2
call void @use ( < 2 x float > %t1 )
ret < 2 x float > %r
}
define < 2 x float > @shuffle_fdiv_multiuse ( < 2 x float > %v1 , < 2 x float > %v2 ) {
; CHECK-LABEL: @shuffle_fdiv_multiuse(
; CHECK-NEXT: [[T2:%.*]] = shufflevector <2 x float> [[V2:%.*]], <2 x float> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP1:%.*]] = fdiv <2 x float> [[V1:%.*]], [[V2]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: call void @use(<2 x float> [[T2]])
; CHECK-NEXT: ret <2 x float> [[R]]
;
%t1 = shufflevector < 2 x float > %v1 , < 2 x float > undef , < 2 x i32 > < i32 1 , i32 0 >
%t2 = shufflevector < 2 x float > %v2 , < 2 x float > undef , < 2 x i32 > < i32 1 , i32 0 >
%r = fdiv < 2 x float > %t1 , %t2
call void @use ( < 2 x float > %t2 )
ret < 2 x float > %r
}
; But 2 extra uses would require an extra instruction.
define < 2 x float > @shuffle_fsub_multiuse ( < 2 x float > %v1 , < 2 x float > %v2 ) {
; CHECK-LABEL: @shuffle_fsub_multiuse(
; CHECK-NEXT: [[T1:%.*]] = shufflevector <2 x float> [[V1:%.*]], <2 x float> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[T2:%.*]] = shufflevector <2 x float> [[V2:%.*]], <2 x float> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[R:%.*]] = fsub <2 x float> [[T1]], [[T2]]
; CHECK-NEXT: call void @use(<2 x float> [[T1]])
; CHECK-NEXT: call void @use(<2 x float> [[T2]])
; CHECK-NEXT: ret <2 x float> [[R]]
;
%t1 = shufflevector < 2 x float > %v1 , < 2 x float > undef , < 2 x i32 > < i32 1 , i32 0 >
%t2 = shufflevector < 2 x float > %v2 , < 2 x float > undef , < 2 x i32 > < i32 1 , i32 0 >
%r = fsub < 2 x float > %t1 , %t2
call void @use ( < 2 x float > %t1 )
call void @use ( < 2 x float > %t2 )
ret < 2 x float > %r
}
define < 4 x i32 > @shuffle_17add ( < 4 x i32 > %v1 , < 4 x i32 > %v2 ) {
; CHECK-LABEL: @shuffle_17add(
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[V1:%.*]], [[V2:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%t1 = shufflevector < 4 x i32 > %v1 , < 4 x i32 > zeroinitializer , < 4 x i32 > < i32 1 , i32 2 , i32 3 , i32 0 >
%t2 = shufflevector < 4 x i32 > %v2 , < 4 x i32 > zeroinitializer , < 4 x i32 > < i32 1 , i32 2 , i32 3 , i32 0 >
%r = add < 4 x i32 > %t1 , %t2
ret < 4 x i32 > %r
}
define < 4 x i32 > @shuffle_17addnsw ( < 4 x i32 > %v1 , < 4 x i32 > %v2 ) {
; CHECK-LABEL: @shuffle_17addnsw(
; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[V1:%.*]], [[V2:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%t1 = shufflevector < 4 x i32 > %v1 , < 4 x i32 > zeroinitializer , < 4 x i32 > < i32 1 , i32 2 , i32 3 , i32 0 >
%t2 = shufflevector < 4 x i32 > %v2 , < 4 x i32 > zeroinitializer , < 4 x i32 > < i32 1 , i32 2 , i32 3 , i32 0 >
%r = add nsw < 4 x i32 > %t1 , %t2
ret < 4 x i32 > %r
}
define < 4 x i32 > @shuffle_17addnuw ( < 4 x i32 > %v1 , < 4 x i32 > %v2 ) {
; CHECK-LABEL: @shuffle_17addnuw(
; CHECK-NEXT: [[TMP1:%.*]] = add nuw <4 x i32> [[V1:%.*]], [[V2:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%t1 = shufflevector < 4 x i32 > %v1 , < 4 x i32 > zeroinitializer , < 4 x i32 > < i32 1 , i32 2 , i32 3 , i32 0 >
%t2 = shufflevector < 4 x i32 > %v2 , < 4 x i32 > zeroinitializer , < 4 x i32 > < i32 1 , i32 2 , i32 3 , i32 0 >
%r = add nuw < 4 x i32 > %t1 , %t2
ret < 4 x i32 > %r
}
define < 4 x float > @shuffle_17fsub_fast ( < 4 x float > %v1 , < 4 x float > %v2 ) {
; CHECK-LABEL: @shuffle_17fsub_fast(
; CHECK-NEXT: [[TMP1:%.*]] = fsub fast <4 x float> [[V1:%.*]], [[V2:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
; CHECK-NEXT: ret <4 x float> [[R]]
;
%t1 = shufflevector < 4 x float > %v1 , < 4 x float > zeroinitializer , < 4 x i32 > < i32 1 , i32 2 , i32 3 , i32 0 >
%t2 = shufflevector < 4 x float > %v2 , < 4 x float > zeroinitializer , < 4 x i32 > < i32 1 , i32 2 , i32 3 , i32 0 >
%r = fsub fast < 4 x float > %t1 , %t2
ret < 4 x float > %r
}
define < 4 x i32 > @add_const ( < 4 x i32 > %v ) {
; CHECK-LABEL: @add_const(
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[V:%.*]], <i32 44, i32 41, i32 42, i32 43>
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%t1 = shufflevector < 4 x i32 > %v , < 4 x i32 > undef , < 4 x i32 > < i32 1 , i32 2 , i32 3 , i32 0 >
%r = add < 4 x i32 > %t1 , < i32 41 , i32 42 , i32 43 , i32 44 >
ret < 4 x i32 > %r
}
define < 4 x i32 > @sub_const ( < 4 x i32 > %v ) {
; CHECK-LABEL: @sub_const(
; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> <i32 44, i32 43, i32 42, i32 41>, [[V:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%t1 = shufflevector < 4 x i32 > %v , < 4 x i32 > undef , < 4 x i32 > < i32 3 , i32 2 , i32 1 , i32 0 >
%r = sub < 4 x i32 > < i32 41 , i32 42 , i32 43 , i32 44 > , %t1
ret < 4 x i32 > %r
}
; Math before shuffle requires an extra shuffle.
define < 2 x float > @fadd_const_multiuse ( < 2 x float > %v ) {
; CHECK-LABEL: @fadd_const_multiuse(
; CHECK-NEXT: [[T1:%.*]] = shufflevector <2 x float> [[V:%.*]], <2 x float> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[R:%.*]] = fadd <2 x float> [[T1]], <float 4.100000e+01, float 4.200000e+01>
; CHECK-NEXT: call void @use(<2 x float> [[T1]])
; CHECK-NEXT: ret <2 x float> [[R]]
;
%t1 = shufflevector < 2 x float > %v , < 2 x float > undef , < 2 x i32 > < i32 1 , i32 0 >
%r = fadd < 2 x float > %t1 , < float 41.0 , float 42.0 >
call void @use ( < 2 x float > %t1 )
ret < 2 x float > %r
}
; Math before splat allows replacing constant elements with undef lanes.
define < 4 x i32 > @mul_const_splat ( < 4 x i32 > %v ) {
; CHECK-LABEL: @mul_const_splat(
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[V:%.*]], <i32 undef, i32 42, i32 undef, i32 undef>
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%t1 = shufflevector < 4 x i32 > %v , < 4 x i32 > undef , < 4 x i32 > < i32 1 , i32 1 , i32 1 , i32 1 >
%r = mul < 4 x i32 > < i32 42 , i32 42 , i32 42 , i32 42 > , %t1
ret < 4 x i32 > %r
}
; Take 2 elements of a vector and shift each of those by a different amount
define < 4 x i32 > @lshr_const_half_splat ( < 4 x i32 > %v ) {
; CHECK-LABEL: @lshr_const_half_splat(
; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> <i32 undef, i32 8, i32 9, i32 undef>, [[V:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%t1 = shufflevector < 4 x i32 > %v , < 4 x i32 > undef , < 4 x i32 > < i32 1 , i32 1 , i32 2 , i32 2 >
%r = lshr < 4 x i32 > < i32 8 , i32 8 , i32 9 , i32 9 > , %t1
ret < 4 x i32 > %r
}
; We can't change this because there's no pre-shuffle version of the fmul constant.
define < 2 x float > @fmul_const_invalid_constant ( < 2 x float > %v ) {
; CHECK-LABEL: @fmul_const_invalid_constant(
; CHECK-NEXT: [[T1:%.*]] = shufflevector <2 x float> [[V:%.*]], <2 x float> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[R:%.*]] = fmul <2 x float> [[T1]], <float 4.100000e+01, float 4.200000e+01>
; CHECK-NEXT: ret <2 x float> [[R]]
;
%t1 = shufflevector < 2 x float > %v , < 2 x float > undef , < 2 x i32 > < i32 0 , i32 0 >
%r = fmul < 2 x float > %t1 , < float 41.0 , float 42.0 >
ret < 2 x float > %r
}
; Reduce the width of the binop by moving it ahead of a shuffle.
define < 4 x i8 > @widening_shuffle_add_1 ( < 2 x i8 > %x ) {
; CHECK-LABEL: @widening_shuffle_add_1(
; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], <i8 42, i8 43>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: ret <4 x i8> [[R]]
;
%widex = shufflevector < 2 x i8 > %x , < 2 x i8 > undef , < 4 x i32 > < i32 0 , i32 1 , i32 undef , i32 undef >
%r = add < 4 x i8 > %widex , < i8 42 , i8 43 , i8 44 , i8 45 >
ret < 4 x i8 > %r
}
; Reduce the width of the binop by moving it ahead of a shuffle.
define < 4 x i8 > @widening_shuffle_add_2 ( < 2 x i8 > %x ) {
; CHECK-LABEL: @widening_shuffle_add_2(
; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], <i8 43, i8 42>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
; CHECK-NEXT: ret <4 x i8> [[R]]
;
%widex = shufflevector < 2 x i8 > %x , < 2 x i8 > undef , < 4 x i32 > < i32 1 , i32 0 , i32 undef , i32 undef >
%r = add < 4 x i8 > %widex , < i8 42 , i8 43 , i8 44 , i8 45 >
ret < 4 x i8 > %r
}
; Negative test - widening shuffles have the same mask/constant constraint as non-size-changing shuffles.
define < 4 x i8 > @widening_shuffle_add_invalid_constant ( < 2 x i8 > %x ) {
; CHECK-LABEL: @widening_shuffle_add_invalid_constant(
; CHECK-NEXT: [[WIDEX:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[R:%.*]] = add <4 x i8> [[WIDEX]], <i8 42, i8 43, i8 44, i8 45>
; CHECK-NEXT: ret <4 x i8> [[R]]
;
%widex = shufflevector < 2 x i8 > %x , < 2 x i8 > undef , < 4 x i32 > < i32 1 , i32 1 , i32 undef , i32 undef >
%r = add < 4 x i8 > %widex , < i8 42 , i8 43 , i8 44 , i8 45 >
ret < 4 x i8 > %r
}
; Negative test - widening shuffles have an additional constraint: they must not extend with anything but undefs.
define < 4 x i8 > @widening_shuffle_add_invalid_mask ( < 2 x i8 > %x ) {
; CHECK-LABEL: @widening_shuffle_add_invalid_mask(
; CHECK-NEXT: [[WIDEX:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 0>
; CHECK-NEXT: [[R:%.*]] = add <4 x i8> [[WIDEX]], <i8 42, i8 43, i8 44, i8 45>
; CHECK-NEXT: ret <4 x i8> [[R]]
;
%widex = shufflevector < 2 x i8 > %x , < 2 x i8 > undef , < 4 x i32 > < i32 0 , i32 1 , i32 undef , i32 0 >
%r = add < 4 x i8 > %widex , < i8 42 , i8 43 , i8 44 , i8 45 >
ret < 4 x i8 > %r
}
; A binop that produces undef in the high lanes can be moved before the shuffle.
; This is ok because 'shl C, undef --> undef'.
define < 4 x i16 > @widening_shuffle_shl_constant_op0 ( < 2 x i16 > %v ) {
; CHECK-LABEL: @widening_shuffle_shl_constant_op0(
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i16> <i16 42, i16 -42>, [[V:%.*]]
; CHECK-NEXT: [[BO:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: ret <4 x i16> [[BO]]
;
%shuf = shufflevector < 2 x i16 > %v , < 2 x i16 > undef , < 4 x i32 > < i32 0 , i32 1 , i32 undef , i32 undef >
%bo = shl < 4 x i16 > < i16 42 , i16 -42 , i16 -1 , i16 -1 > , %shuf
ret < 4 x i16 > %bo
}
; A binop that produces undef in the high lanes can be moved before the shuffle.
; This is ok because 'shl undef, 0 --> undef'.
define < 4 x i16 > @widening_shuffle_shl_constant_op1 ( < 2 x i16 > %v ) {
; CHECK-LABEL: @widening_shuffle_shl_constant_op1(
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i16> [[V:%.*]], <i16 2, i16 4>
; CHECK-NEXT: [[BO:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: ret <4 x i16> [[BO]]
;
%shuf = shufflevector < 2 x i16 > %v , < 2 x i16 > undef , < 4 x i32 > < i32 0 , i32 1 , i32 undef , i32 undef >
%bo = shl < 4 x i16 > %shuf , < i16 2 , i16 4 , i16 0 , i16 0 >
ret < 4 x i16 > %bo
}
; A binop that does not produce undef in the high lanes can not be moved before the shuffle.
; This is not ok because 'shl undef, 1 (or 2)' --> 0' but moving the shuffle results in undef instead.
define < 4 x i16 > @widening_shuffle_shl_constant_op1_non0 ( < 2 x i16 > %v ) {
; CHECK-LABEL: @widening_shuffle_shl_constant_op1_non0(
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i16> [[V:%.*]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[BO:%.*]] = shl <4 x i16> [[SHUF]], <i16 2, i16 4, i16 1, i16 2>
; CHECK-NEXT: ret <4 x i16> [[BO]]
;
%shuf = shufflevector < 2 x i16 > %v , < 2 x i16 > undef , < 4 x i32 > < i32 0 , i32 1 , i32 undef , i32 undef >
%bo = shl < 4 x i16 > %shuf , < i16 2 , i16 4 , i16 1 , i16 2 >
ret < 4 x i16 > %bo
}
; A binop that does not produce undef in the high lanes can not be moved before the shuffle.
; This is not ok because 'or -1, undef --> -1' but moving the shuffle results in undef instead.
define < 4 x i16 > @widening_shuffle_or ( < 2 x i16 > %v ) {
; CHECK-LABEL: @widening_shuffle_or(
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i16> [[V:%.*]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[BO:%.*]] = or <4 x i16> [[SHUF]], <i16 42, i16 -42, i16 -1, i16 -1>
; CHECK-NEXT: ret <4 x i16> [[BO]]
;
%shuf = shufflevector < 2 x i16 > %v , < 2 x i16 > undef , < 4 x i32 > < i32 0 , i32 1 , i32 undef , i32 undef >
%bo = or < 4 x i16 > %shuf , < i16 42 , i16 -42 , i16 -1 , i16 -1 >
ret < 4 x i16 > %bo
}
define < 4 x i32 > @shuffle_17add2 ( < 4 x i32 > %v ) {
; CHECK-LABEL: @shuffle_17add2(
; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
;
%t1 = shufflevector < 4 x i32 > %v , < 4 x i32 > zeroinitializer , < 4 x i32 > < i32 3 , i32 2 , i32 1 , i32 0 >
%t2 = add < 4 x i32 > %t1 , %t1
%r = shufflevector < 4 x i32 > %t2 , < 4 x i32 > zeroinitializer , < 4 x i32 > < i32 3 , i32 2 , i32 1 , i32 0 >
ret < 4 x i32 > %r
}
define < 4 x i32 > @shuffle_17mulsplat ( < 4 x i32 > %v ) {
; CHECK-LABEL: @shuffle_17mulsplat(
; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[V:%.*]], [[V]]
; CHECK-NEXT: [[M1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: ret <4 x i32> [[M1]]
;
%s1 = shufflevector < 4 x i32 > %v , < 4 x i32 > zeroinitializer , < 4 x i32 > zeroinitializer
%m1 = mul < 4 x i32 > %s1 , %s1
%s2 = shufflevector < 4 x i32 > %m1 , < 4 x i32 > zeroinitializer , < 4 x i32 > < i32 1 , i32 1 , i32 1 , i32 1 >
ret < 4 x i32 > %s2
}
; Do not reorder shuffle and binop if LHS of shuffles are of different size
define < 2 x i32 > @pr19717 ( < 4 x i32 > %in0 , < 2 x i32 > %in1 ) {
; CHECK-LABEL: @pr19717(
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[IN0:%.*]], <4 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[SHUFFLE4:%.*]] = shufflevector <2 x i32> [[IN1:%.*]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[SHUFFLE]], [[SHUFFLE4]]
; CHECK-NEXT: ret <2 x i32> [[MUL]]
;
%shuffle = shufflevector < 4 x i32 > %in0 , < 4 x i32 > %in0 , < 2 x i32 > zeroinitializer
%shuffle4 = shufflevector < 2 x i32 > %in1 , < 2 x i32 > %in1 , < 2 x i32 > zeroinitializer
%mul = mul < 2 x i32 > %shuffle , %shuffle4
ret < 2 x i32 > %mul
}
define < 4 x i16 > @pr19717a ( < 8 x i16 > %in0 , < 8 x i16 > %in1 ) {
; CHECK-LABEL: @pr19717a(
; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i16> [[IN0:%.*]], [[IN1:%.*]]
; CHECK-NEXT: [[MUL:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
; CHECK-NEXT: ret <4 x i16> [[MUL]]
;
%shuffle = shufflevector < 8 x i16 > %in0 , < 8 x i16 > %in0 , < 4 x i32 > < i32 5 , i32 5 , i32 5 , i32 5 >
%shuffle1 = shufflevector < 8 x i16 > %in1 , < 8 x i16 > %in1 , < 4 x i32 > < i32 5 , i32 5 , i32 5 , i32 5 >
%mul = mul < 4 x i16 > %shuffle , %shuffle1
ret < 4 x i16 > %mul
}
define < 8 x i8 > @pr19730 ( < 16 x i8 > %in0 ) {
; CHECK-LABEL: @pr19730(
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i8> [[IN0:%.*]], <16 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x i8> [[SHUFFLE]], <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <8 x i8> [[SHUFFLE1]]
;
%shuffle = shufflevector < 16 x i8 > %in0 , < 16 x i8 > undef , < 8 x i32 > < i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
%shuffle1 = shufflevector < 8 x i8 > %shuffle , < 8 x i8 > undef , < 8 x i32 > < i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
ret < 8 x i8 > %shuffle1
}
define i32 @pr19737 ( < 4 x i32 > %in0 ) {
; CHECK-LABEL: @pr19737(
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[IN0:%.*]], i32 0
; CHECK-NEXT: ret i32 [[TMP1]]
;
%shuffle.i = shufflevector < 4 x i32 > zeroinitializer , < 4 x i32 > %in0 , < 4 x i32 > < i32 0 , i32 4 , i32 2 , i32 6 >
%neg.i = xor < 4 x i32 > %shuffle.i , < i32 -1 , i32 -1 , i32 -1 , i32 -1 >
%and.i = and < 4 x i32 > %in0 , %neg.i
%rv = extractelement < 4 x i32 > %and.i , i32 0
ret i32 %rv
}
; In PR20059 ( http://llvm.org/pr20059 ), shufflevector operations are reordered/removed
; for an srem operation. This is not a valid optimization because it may cause a trap
; on div-by-zero.
define < 4 x i32 > @pr20059 ( < 4 x i32 > %p1 , < 4 x i32 > %p2 ) {
; CHECK-LABEL: @pr20059(
; CHECK-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[P1:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[P2:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[RETVAL:%.*]] = srem <4 x i32> [[SPLAT1]], [[SPLAT2]]
; CHECK-NEXT: ret <4 x i32> [[RETVAL]]
;
%splat1 = shufflevector < 4 x i32 > %p1 , < 4 x i32 > undef , < 4 x i32 > zeroinitializer
%splat2 = shufflevector < 4 x i32 > %p2 , < 4 x i32 > undef , < 4 x i32 > zeroinitializer
%retval = srem < 4 x i32 > %splat1 , %splat2
ret < 4 x i32 > %retval
}
define < 4 x i32 > @pr20114 ( < 4 x i32 > %__mask ) {
; CHECK-LABEL: @pr20114(
; CHECK-NEXT: [[MASK01_I:%.*]] = shufflevector <4 x i32> [[__MASK:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
; CHECK-NEXT: [[MASKED_NEW_I_I_I:%.*]] = and <4 x i32> [[MASK01_I]], bitcast (<2 x i64> <i64 ptrtoint (<4 x i32> (<4 x i32>)* @pr20114 to i64), i64 ptrtoint (<4 x i32> (<4 x i32>)* @pr20114 to i64)> to <4 x i32>)
; CHECK-NEXT: ret <4 x i32> [[MASKED_NEW_I_I_I]]
;
%mask01.i = shufflevector < 4 x i32 > %__mask , < 4 x i32 > undef , < 4 x i32 > < i32 0 , i32 0 , i32 1 , i32 1 >
%masked_new.i.i.i = and < 4 x i32 > bitcast ( < 2 x i64 > < i64 ptrtoint ( < 4 x i32 > ( < 4 x i32 > ) * @pr20114 to i64 ) , i64 ptrtoint ( < 4 x i32 > ( < 4 x i32 > ) * @pr20114 to i64 ) > to < 4 x i32 > ) , %mask01.i
ret < 4 x i32 > %masked_new.i.i.i
}
define < 2 x i32 * > @pr23113 ( < 4 x i32 * > %A ) {
; CHECK-LABEL: @pr23113(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32*> [[A:%.*]], <4 x i32*> undef, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: ret <2 x i32*> [[TMP1]]
;
%1 = shufflevector < 4 x i32 * > %A , < 4 x i32 * > undef , < 2 x i32 > < i32 0 , i32 1 >
ret < 2 x i32 * > %1
}
; Unused lanes in the new binop should not kill the entire op (although it may simplify anyway as shown here).
define < 2 x i32 > @PR37648 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @PR37648(
; CHECK-NEXT: ret <2 x i32> zeroinitializer
;
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > undef , < 2 x i32 > zeroinitializer
%r = urem < 2 x i32 > %splat , < i32 1 , i32 1 >
ret < 2 x i32 > %r
}
; Test shuffle followed by binop with splat constant for all 18 binop opcodes.
; Test with constant as operand 0 and operand 1 for non-commutative opcodes.
define < 2 x i32 > @add_splat_constant ( < 2 x i32 > %x ) {
; CHECK-LABEL: @add_splat_constant(
; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 42, i32 undef>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > undef , < 2 x i32 > zeroinitializer
%r = add < 2 x i32 > %splat , < i32 42 , i32 42 >
ret < 2 x i32 > %r
}
define < 2 x i32 > @sub_splat_constant0 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @sub_splat_constant0(
; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> <i32 42, i32 undef>, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > undef , < 2 x i32 > zeroinitializer
%r = sub < 2 x i32 > < i32 42 , i32 42 > , %splat
ret < 2 x i32 > %r
}
define < 2 x i32 > @sub_splat_constant1 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @sub_splat_constant1(
; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 -42, i32 undef>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > undef , < 2 x i32 > zeroinitializer
%r = sub < 2 x i32 > %splat , < i32 42 , i32 42 >
ret < 2 x i32 > %r
}
define < 2 x i32 > @mul_splat_constant ( < 2 x i32 > %x ) {
; CHECK-LABEL: @mul_splat_constant(
; CHECK-NEXT: [[TMP1:%.*]] = mul <2 x i32> [[X:%.*]], <i32 42, i32 undef>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > undef , < 2 x i32 > zeroinitializer
%r = mul < 2 x i32 > %splat , < i32 42 , i32 42 >
ret < 2 x i32 > %r
}
define < 2 x i32 > @shl_splat_constant0 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @shl_splat_constant0(
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> <i32 5, i32 undef>, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > undef , < 2 x i32 > zeroinitializer
%r = shl < 2 x i32 > < i32 5 , i32 5 > , %splat
ret < 2 x i32 > %r
}
define < 2 x i32 > @shl_splat_constant1 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @shl_splat_constant1(
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], <i32 5, i32 0>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > undef , < 2 x i32 > zeroinitializer
%r = shl < 2 x i32 > %splat , < i32 5 , i32 5 >
ret < 2 x i32 > %r
}
define < 2 x i32 > @ashr_splat_constant0 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @ashr_splat_constant0(
; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> <i32 5, i32 undef>, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > undef , < 2 x i32 > zeroinitializer
%r = ashr < 2 x i32 > < i32 5 , i32 5 > , %splat
ret < 2 x i32 > %r
}
define < 2 x i32 > @ashr_splat_constant1 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @ashr_splat_constant1(
; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 5, i32 0>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > undef , < 2 x i32 > zeroinitializer
%r = ashr < 2 x i32 > %splat , < i32 5 , i32 5 >
ret < 2 x i32 > %r
}
define < 2 x i32 > @lshr_splat_constant0 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @lshr_splat_constant0(
; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> <i32 5, i32 undef>, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > undef , < 2 x i32 > zeroinitializer
%r = lshr < 2 x i32 > < i32 5 , i32 5 > , %splat
ret < 2 x i32 > %r
}
define < 2 x i32 > @lshr_splat_constant1 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @lshr_splat_constant1(
; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 5, i32 0>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > undef , < 2 x i32 > zeroinitializer
%r = lshr < 2 x i32 > %splat , < i32 5 , i32 5 >
ret < 2 x i32 > %r
}
define < 2 x i32 > @urem_splat_constant0 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @urem_splat_constant0(
; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[R:%.*]] = urem <2 x i32> <i32 42, i32 42>, [[SPLAT]]
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > undef , < 2 x i32 > zeroinitializer
%r = urem < 2 x i32 > < i32 42 , i32 42 > , %splat
ret < 2 x i32 > %r
}
define < 2 x i32 > @urem_splat_constant1 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @urem_splat_constant1(
; CHECK-NEXT: [[TMP1:%.*]] = urem <2 x i32> [[X:%.*]], <i32 42, i32 1>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > undef , < 2 x i32 > zeroinitializer
%r = urem < 2 x i32 > %splat , < i32 42 , i32 42 >
ret < 2 x i32 > %r
}
define < 2 x i32 > @srem_splat_constant0 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @srem_splat_constant0(
; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[R:%.*]] = srem <2 x i32> <i32 42, i32 42>, [[SPLAT]]
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > undef , < 2 x i32 > zeroinitializer
%r = srem < 2 x i32 > < i32 42 , i32 42 > , %splat
ret < 2 x i32 > %r
}
define < 2 x i32 > @srem_splat_constant1 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @srem_splat_constant1(
; CHECK-NEXT: [[TMP1:%.*]] = srem <2 x i32> [[X:%.*]], <i32 42, i32 1>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > undef , < 2 x i32 > zeroinitializer
%r = srem < 2 x i32 > %splat , < i32 42 , i32 42 >
ret < 2 x i32 > %r
}
define < 2 x i32 > @udiv_splat_constant0 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @udiv_splat_constant0(
; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[R:%.*]] = udiv <2 x i32> <i32 42, i32 42>, [[SPLAT]]
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > undef , < 2 x i32 > zeroinitializer
%r = udiv < 2 x i32 > < i32 42 , i32 42 > , %splat
ret < 2 x i32 > %r
}
define < 2 x i32 > @udiv_splat_constant1 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @udiv_splat_constant1(
; CHECK-NEXT: [[TMP1:%.*]] = udiv <2 x i32> [[X:%.*]], <i32 42, i32 1>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > undef , < 2 x i32 > zeroinitializer
%r = udiv < 2 x i32 > %splat , < i32 42 , i32 42 >
ret < 2 x i32 > %r
}
define < 2 x i32 > @sdiv_splat_constant0 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @sdiv_splat_constant0(
; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[R:%.*]] = sdiv <2 x i32> <i32 42, i32 42>, [[SPLAT]]
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > undef , < 2 x i32 > zeroinitializer
%r = sdiv < 2 x i32 > < i32 42 , i32 42 > , %splat
ret < 2 x i32 > %r
}
define < 2 x i32 > @sdiv_splat_constant1 ( < 2 x i32 > %x ) {
; CHECK-LABEL: @sdiv_splat_constant1(
; CHECK-NEXT: [[TMP1:%.*]] = sdiv <2 x i32> [[X:%.*]], <i32 42, i32 1>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > undef , < 2 x i32 > zeroinitializer
%r = sdiv < 2 x i32 > %splat , < i32 42 , i32 42 >
ret < 2 x i32 > %r
}
define < 2 x i32 > @and_splat_constant ( < 2 x i32 > %x ) {
; CHECK-LABEL: @and_splat_constant(
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 42, i32 undef>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > undef , < 2 x i32 > zeroinitializer
%r = and < 2 x i32 > %splat , < i32 42 , i32 42 >
ret < 2 x i32 > %r
}
[InstCombine] Avoid moving ops that do restrict undef across shuffles.
I think we have to be a bit more careful when it comes to moving
ops across shuffles, if the op does restrict undef. For example, without
this patch, we would move 'and %v, <0, 0, -1, -1>' over a
'shufflevector %a, undef, <undef, undef, 1, 2>'. As a result, the first
2 lanes of the result are undef after the combine, but they really
should be 0, unless I am missing something.
For ops that do fold to undef on undef operands, the current behavior
should be fine. I've add conservative check OpDoesRestrictUndef, maybe
there's a better existing utility?
Reviewers: spatel, RKSimon, lebedev.ri
Reviewed By: spatel
Differential Revision: https://reviews.llvm.org/D70093
2019-11-13 21:26:13 +08:00
; AND does not fold to undef for undef operands, we cannot move it
; across a shuffle with undef masks.
2019-11-12 01:42:13 +08:00
define < 4 x i16 > @and_constant_mask_undef ( < 4 x i16 > %add ) {
; CHECK-LABEL: @and_constant_mask_undef(
; CHECK-NEXT: entry:
[InstCombine] Avoid moving ops that do restrict undef across shuffles.
I think we have to be a bit more careful when it comes to moving
ops across shuffles, if the op does restrict undef. For example, without
this patch, we would move 'and %v, <0, 0, -1, -1>' over a
'shufflevector %a, undef, <undef, undef, 1, 2>'. As a result, the first
2 lanes of the result are undef after the combine, but they really
should be 0, unless I am missing something.
For ops that do fold to undef on undef operands, the current behavior
should be fine. I've add conservative check OpDoesRestrictUndef, maybe
there's a better existing utility?
Reviewers: spatel, RKSimon, lebedev.ri
Reviewed By: spatel
Differential Revision: https://reviews.llvm.org/D70093
2019-11-13 21:26:13 +08:00
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> undef, <4 x i32> <i32 undef, i32 undef, i32 1, i32 1>
; CHECK-NEXT: [[AND:%.*]] = and <4 x i16> [[SHUFFLE]], <i16 0, i16 0, i16 -1, i16 -1>
2019-11-12 01:42:13 +08:00
; CHECK-NEXT: ret <4 x i16> [[AND]]
;
entry:
%shuffle = shufflevector < 4 x i16 > %add , < 4 x i16 > undef , < 4 x i32 > < i32 undef , i32 undef , i32 1 , i32 1 >
%and = and < 4 x i16 > %shuffle , < i16 0 , i16 0 , i16 -1 , i16 -1 >
ret < 4 x i16 > %and
}
[InstCombine] Avoid moving ops that do restrict undef across shuffles.
I think we have to be a bit more careful when it comes to moving
ops across shuffles, if the op does restrict undef. For example, without
this patch, we would move 'and %v, <0, 0, -1, -1>' over a
'shufflevector %a, undef, <undef, undef, 1, 2>'. As a result, the first
2 lanes of the result are undef after the combine, but they really
should be 0, unless I am missing something.
For ops that do fold to undef on undef operands, the current behavior
should be fine. I've add conservative check OpDoesRestrictUndef, maybe
there's a better existing utility?
Reviewers: spatel, RKSimon, lebedev.ri
Reviewed By: spatel
Differential Revision: https://reviews.llvm.org/D70093
2019-11-13 21:26:13 +08:00
; AND does not fold to undef for undef operands, we cannot move it
; across a shuffle with undef masks.
2019-11-12 01:42:13 +08:00
define < 4 x i16 > @and_constant_mask_undef_2 ( < 4 x i16 > %add ) {
; CHECK-LABEL: @and_constant_mask_undef_2(
; CHECK-NEXT: entry:
[InstCombine] Avoid moving ops that do restrict undef across shuffles.
I think we have to be a bit more careful when it comes to moving
ops across shuffles, if the op does restrict undef. For example, without
this patch, we would move 'and %v, <0, 0, -1, -1>' over a
'shufflevector %a, undef, <undef, undef, 1, 2>'. As a result, the first
2 lanes of the result are undef after the combine, but they really
should be 0, unless I am missing something.
For ops that do fold to undef on undef operands, the current behavior
should be fine. I've add conservative check OpDoesRestrictUndef, maybe
there's a better existing utility?
Reviewers: spatel, RKSimon, lebedev.ri
Reviewed By: spatel
Differential Revision: https://reviews.llvm.org/D70093
2019-11-13 21:26:13 +08:00
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 undef>
; CHECK-NEXT: [[AND:%.*]] = and <4 x i16> [[SHUFFLE]], <i16 -1, i16 -1, i16 -1, i16 0>
2019-11-12 01:42:13 +08:00
; CHECK-NEXT: ret <4 x i16> [[AND]]
;
entry:
%shuffle = shufflevector < 4 x i16 > %add , < 4 x i16 > undef , < 4 x i32 > < i32 1 , i32 1 , i32 1 , i32 undef >
%and = and < 4 x i16 > %shuffle , < i16 -1 , i16 -1 , i16 -1 , i16 -0 >
ret < 4 x i16 > %and
}
2019-11-18 05:29:55 +08:00
; We can move the AND across the shuffle, as -1 (AND identity value) is used for undef lanes.
2019-11-12 01:42:13 +08:00
define < 4 x i16 > @and_constant_mask_undef_3 ( < 4 x i16 > %add ) {
; CHECK-LABEL: @and_constant_mask_undef_3(
; CHECK-NEXT: entry:
2019-11-18 05:29:55 +08:00
; CHECK-NEXT: ret <4 x i16> <i16 0, i16 0, i16 0, i16 undef>
2019-11-12 01:42:13 +08:00
;
entry:
%shuffle = shufflevector < 4 x i16 > %add , < 4 x i16 > undef , < 4 x i32 > < i32 0 , i32 1 , i32 1 , i32 undef >
%and = and < 4 x i16 > %shuffle , < i16 0 , i16 0 , i16 0 , i16 -1 >
ret < 4 x i16 > %and
}
2019-11-18 05:29:55 +08:00
; We can move the AND across the shuffle, as -1 (AND identity value) is used for undef lanes.
2019-11-12 01:42:13 +08:00
define < 4 x i16 > @and_constant_mask_undef_4 ( < 4 x i16 > %add ) {
; CHECK-LABEL: @and_constant_mask_undef_4(
; CHECK-NEXT: entry:
2019-11-18 05:29:55 +08:00
; CHECK-NEXT: [[TMP0:%.*]] = and <4 x i16> [[ADD:%.*]], <i16 9, i16 20, i16 undef, i16 undef>
; CHECK-NEXT: [[AND:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 undef>
2019-11-12 01:42:13 +08:00
; CHECK-NEXT: ret <4 x i16> [[AND]]
;
entry:
%shuffle = shufflevector < 4 x i16 > %add , < 4 x i16 > undef , < 4 x i32 > < i32 0 , i32 1 , i32 1 , i32 undef >
%and = and < 4 x i16 > %shuffle , < i16 9 , i16 20 , i16 20 , i16 -1 >
ret < 4 x i16 > %and
}
define < 4 x i16 > @and_constant_mask_not_undef ( < 4 x i16 > %add ) {
; CHECK-LABEL: @and_constant_mask_not_undef(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = and <4 x i16> [[ADD:%.*]], <i16 undef, i16 -1, i16 0, i16 0>
; CHECK-NEXT: [[AND:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 1>
; CHECK-NEXT: ret <4 x i16> [[AND]]
;
entry:
%shuffle = shufflevector < 4 x i16 > %add , < 4 x i16 > undef , < 4 x i32 > < i32 2 , i32 3 , i32 1 , i32 1 >
%and = and < 4 x i16 > %shuffle , < i16 0 , i16 0 , i16 -1 , i16 -1 >
ret < 4 x i16 > %and
}
[InstCombine] Avoid moving ops that do restrict undef across shuffles.
I think we have to be a bit more careful when it comes to moving
ops across shuffles, if the op does restrict undef. For example, without
this patch, we would move 'and %v, <0, 0, -1, -1>' over a
'shufflevector %a, undef, <undef, undef, 1, 2>'. As a result, the first
2 lanes of the result are undef after the combine, but they really
should be 0, unless I am missing something.
For ops that do fold to undef on undef operands, the current behavior
should be fine. I've add conservative check OpDoesRestrictUndef, maybe
there's a better existing utility?
Reviewers: spatel, RKSimon, lebedev.ri
Reviewed By: spatel
Differential Revision: https://reviews.llvm.org/D70093
2019-11-13 21:26:13 +08:00
; OR does not fold to undef for undef operands, we cannot move it
; across a shuffle with undef masks.
2019-11-12 01:42:13 +08:00
define < 4 x i16 > @or_constant_mask_undef ( < 4 x i16 > %in ) {
; CHECK-LABEL: @or_constant_mask_undef(
; CHECK-NEXT: entry:
[InstCombine] Avoid moving ops that do restrict undef across shuffles.
I think we have to be a bit more careful when it comes to moving
ops across shuffles, if the op does restrict undef. For example, without
this patch, we would move 'and %v, <0, 0, -1, -1>' over a
'shufflevector %a, undef, <undef, undef, 1, 2>'. As a result, the first
2 lanes of the result are undef after the combine, but they really
should be 0, unless I am missing something.
For ops that do fold to undef on undef operands, the current behavior
should be fine. I've add conservative check OpDoesRestrictUndef, maybe
there's a better existing utility?
Reviewers: spatel, RKSimon, lebedev.ri
Reviewed By: spatel
Differential Revision: https://reviews.llvm.org/D70093
2019-11-13 21:26:13 +08:00
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> <i32 undef, i32 undef, i32 1, i32 1>
; CHECK-NEXT: [[OR:%.*]] = or <4 x i16> [[SHUFFLE]], <i16 -1, i16 -1, i16 0, i16 0>
2019-11-12 01:42:13 +08:00
; CHECK-NEXT: ret <4 x i16> [[OR]]
;
entry:
%shuffle = shufflevector < 4 x i16 > %in , < 4 x i16 > undef , < 4 x i32 > < i32 undef , i32 undef , i32 1 , i32 1 >
%or = or < 4 x i16 > %shuffle , < i16 -1 , i16 -1 , i16 0 , i16 0 >
ret < 4 x i16 > %or
}
[InstCombine] Avoid moving ops that do restrict undef across shuffles.
I think we have to be a bit more careful when it comes to moving
ops across shuffles, if the op does restrict undef. For example, without
this patch, we would move 'and %v, <0, 0, -1, -1>' over a
'shufflevector %a, undef, <undef, undef, 1, 2>'. As a result, the first
2 lanes of the result are undef after the combine, but they really
should be 0, unless I am missing something.
For ops that do fold to undef on undef operands, the current behavior
should be fine. I've add conservative check OpDoesRestrictUndef, maybe
there's a better existing utility?
Reviewers: spatel, RKSimon, lebedev.ri
Reviewed By: spatel
Differential Revision: https://reviews.llvm.org/D70093
2019-11-13 21:26:13 +08:00
; OR does not fold to undef for undef operands, we cannot move it
; across a shuffle with undef masks.
2019-11-12 01:42:13 +08:00
define < 4 x i16 > @or_constant_mask_undef_2 ( < 4 x i16 > %in ) {
; CHECK-LABEL: @or_constant_mask_undef_2(
; CHECK-NEXT: entry:
[InstCombine] Avoid moving ops that do restrict undef across shuffles.
I think we have to be a bit more careful when it comes to moving
ops across shuffles, if the op does restrict undef. For example, without
this patch, we would move 'and %v, <0, 0, -1, -1>' over a
'shufflevector %a, undef, <undef, undef, 1, 2>'. As a result, the first
2 lanes of the result are undef after the combine, but they really
should be 0, unless I am missing something.
For ops that do fold to undef on undef operands, the current behavior
should be fine. I've add conservative check OpDoesRestrictUndef, maybe
there's a better existing utility?
Reviewers: spatel, RKSimon, lebedev.ri
Reviewed By: spatel
Differential Revision: https://reviews.llvm.org/D70093
2019-11-13 21:26:13 +08:00
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> <i32 undef, i32 1, i32 1, i32 undef>
; CHECK-NEXT: [[OR:%.*]] = or <4 x i16> [[SHUFFLE]], <i16 -1, i16 0, i16 0, i16 -1>
2019-11-12 01:42:13 +08:00
; CHECK-NEXT: ret <4 x i16> [[OR]]
;
entry:
%shuffle = shufflevector < 4 x i16 > %in , < 4 x i16 > undef , < 4 x i32 > < i32 undef , i32 1 , i32 1 , i32 undef >
%or = or < 4 x i16 > %shuffle , < i16 -1 , i16 0 , i16 0 , i16 -1 >
ret < 4 x i16 > %or
}
2019-11-18 05:29:55 +08:00
; We can move the OR across the shuffle, as 0 (OR identity value) is used for undef lanes.
2019-11-12 01:42:13 +08:00
define < 4 x i16 > @or_constant_mask_undef_3 ( < 4 x i16 > %in ) {
; CHECK-LABEL: @or_constant_mask_undef_3(
; CHECK-NEXT: entry:
2019-11-18 05:29:55 +08:00
; CHECK-NEXT: ret <4 x i16> <i16 undef, i16 -1, i16 -1, i16 undef>
2019-11-12 01:42:13 +08:00
;
entry:
%shuffle = shufflevector < 4 x i16 > %in , < 4 x i16 > undef , < 4 x i32 > < i32 undef , i32 1 , i32 1 , i32 undef >
%or = or < 4 x i16 > %shuffle , < i16 0 , i16 -1 , i16 -1 , i16 0 >
ret < 4 x i16 > %or
}
2019-11-18 05:29:55 +08:00
; We can move the OR across the shuffle, as 0 (OR identity value) is used for undef lanes.
2019-11-12 01:42:13 +08:00
define < 4 x i16 > @or_constant_mask_undef_4 ( < 4 x i16 > %in ) {
; CHECK-LABEL: @or_constant_mask_undef_4(
; CHECK-NEXT: entry:
2019-11-18 05:29:55 +08:00
; CHECK-NEXT: [[TMP0:%.*]] = or <4 x i16> [[IN:%.*]], <i16 undef, i16 99, i16 undef, i16 undef>
; CHECK-NEXT: [[OR:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> undef, <4 x i32> <i32 undef, i32 1, i32 1, i32 undef>
2019-11-12 01:42:13 +08:00
; CHECK-NEXT: ret <4 x i16> [[OR]]
;
entry:
%shuffle = shufflevector < 4 x i16 > %in , < 4 x i16 > undef , < 4 x i32 > < i32 undef , i32 1 , i32 1 , i32 undef >
%or = or < 4 x i16 > %shuffle , < i16 0 , i16 99 , i16 99 , i16 0 >
ret < 4 x i16 > %or
}
define < 4 x i16 > @or_constant_mask_not_undef ( < 4 x i16 > %in ) {
; CHECK-LABEL: @or_constant_mask_not_undef(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = or <4 x i16> [[IN:%.*]], <i16 undef, i16 -1, i16 0, i16 0>
; CHECK-NEXT: [[AND:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 1>
; CHECK-NEXT: ret <4 x i16> [[AND]]
;
entry:
%shuffle = shufflevector < 4 x i16 > %in , < 4 x i16 > undef , < 4 x i32 > < i32 2 , i32 3 , i32 1 , i32 1 >
%and = or < 4 x i16 > %shuffle , < i16 0 , i16 0 , i16 -1 , i16 -1 >
ret < 4 x i16 > %and
}
define < 4 x i16 > @shl_constant_mask_undef ( < 4 x i16 > %in ) {
; CHECK-LABEL: @shl_constant_mask_undef(
; CHECK-NEXT: entry:
[InstCombine] Avoid moving ops that do restrict undef across shuffles.
I think we have to be a bit more careful when it comes to moving
ops across shuffles, if the op does restrict undef. For example, without
this patch, we would move 'and %v, <0, 0, -1, -1>' over a
'shufflevector %a, undef, <undef, undef, 1, 2>'. As a result, the first
2 lanes of the result are undef after the combine, but they really
should be 0, unless I am missing something.
For ops that do fold to undef on undef operands, the current behavior
should be fine. I've add conservative check OpDoesRestrictUndef, maybe
there's a better existing utility?
Reviewers: spatel, RKSimon, lebedev.ri
Reviewed By: spatel
Differential Revision: https://reviews.llvm.org/D70093
2019-11-13 21:26:13 +08:00
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> <i32 0, i32 undef, i32 1, i32 1>
; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i16> [[SHUFFLE]], <i16 10, i16 3, i16 0, i16 0>
2019-11-12 01:42:13 +08:00
; CHECK-NEXT: ret <4 x i16> [[SHL]]
;
entry:
%shuffle = shufflevector < 4 x i16 > %in , < 4 x i16 > undef , < 4 x i32 > < i32 0 , i32 undef , i32 1 , i32 1 >
%shl = shl < 4 x i16 > %shuffle , < i16 10 , i16 3 , i16 0 , i16 0 >
ret < 4 x i16 > %shl
}
define < 4 x i16 > @add_constant_mask_undef ( < 4 x i16 > %in ) {
; CHECK-LABEL: @add_constant_mask_undef(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> <i32 undef, i32 undef, i32 1, i32 1>
; CHECK-NEXT: ret <4 x i16> [[ADD]]
;
entry:
%shuffle = shufflevector < 4 x i16 > %in , < 4 x i16 > undef , < 4 x i32 > < i32 undef , i32 undef , i32 1 , i32 1 >
%add = add < 4 x i16 > %shuffle , < i16 10 , i16 3 , i16 0 , i16 0 >
ret < 4 x i16 > %add
}
define < 4 x i16 > @add_constant_mask_undef_2 ( < 4 x i16 > %in ) {
; CHECK-LABEL: @add_constant_mask_undef_2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = add <4 x i16> [[IN:%.*]], <i16 undef, i16 0, i16 3, i16 undef>
; CHECK-NEXT: [[ADD:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 1>
; CHECK-NEXT: ret <4 x i16> [[ADD]]
;
entry:
%shuffle = shufflevector < 4 x i16 > %in , < 4 x i16 > undef , < 4 x i32 > < i32 undef , i32 2 , i32 1 , i32 1 >
%add = add < 4 x i16 > %shuffle , < i16 10 , i16 3 , i16 0 , i16 0 >
ret < 4 x i16 > %add
}
define < 4 x i16 > @sub_constant_mask_undef ( < 4 x i16 > %in ) {
; CHECK-LABEL: @sub_constant_mask_undef(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SUB:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> <i32 undef, i32 undef, i32 1, i32 1>
; CHECK-NEXT: ret <4 x i16> [[SUB]]
;
entry:
%shuffle = shufflevector < 4 x i16 > %in , < 4 x i16 > undef , < 4 x i32 > < i32 undef , i32 undef , i32 1 , i32 1 >
%sub = sub < 4 x i16 > %shuffle , < i16 10 , i16 3 , i16 0 , i16 0 >
ret < 4 x i16 > %sub
}
define < 4 x i16 > @sub_constant_mask_undef_2 ( < 4 x i16 > %in ) {
; CHECK-LABEL: @sub_constant_mask_undef_2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = add <4 x i16> [[IN:%.*]], <i16 undef, i16 0, i16 -10, i16 undef>
; CHECK-NEXT: [[SUB:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 undef>
; CHECK-NEXT: ret <4 x i16> [[SUB]]
;
entry:
%shuffle = shufflevector < 4 x i16 > %in , < 4 x i16 > undef , < 4 x i32 > < i32 1 , i32 1 , i32 2 , i32 undef >
%sub = sub < 4 x i16 > %shuffle , < i16 0 , i16 0 , i16 10 , i16 99 >
ret < 4 x i16 > %sub
}
2019-04-17 12:52:47 +08:00
define < 2 x i32 > @or_splat_constant ( < 2 x i32 > %x ) {
; CHECK-LABEL: @or_splat_constant(
; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X:%.*]], <i32 42, i32 undef>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > undef , < 2 x i32 > zeroinitializer
%r = or < 2 x i32 > %splat , < i32 42 , i32 42 >
ret < 2 x i32 > %r
}
define < 2 x i32 > @xor_splat_constant ( < 2 x i32 > %x ) {
; CHECK-LABEL: @xor_splat_constant(
; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], <i32 42, i32 undef>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%splat = shufflevector < 2 x i32 > %x , < 2 x i32 > undef , < 2 x i32 > zeroinitializer
%r = xor < 2 x i32 > %splat , < i32 42 , i32 42 >
ret < 2 x i32 > %r
}
define < 2 x float > @fadd_splat_constant ( < 2 x float > %x ) {
; CHECK-LABEL: @fadd_splat_constant(
; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[X:%.*]], <float 4.200000e+01, float undef>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x float> [[R]]
;
%splat = shufflevector < 2 x float > %x , < 2 x float > undef , < 2 x i32 > zeroinitializer
%r = fadd < 2 x float > %splat , < float 42.0 , float 42.0 >
ret < 2 x float > %r
}
define < 2 x float > @fsub_splat_constant0 ( < 2 x float > %x ) {
; CHECK-LABEL: @fsub_splat_constant0(
; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x float> <float 4.200000e+01, float undef>, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x float> [[R]]
;
%splat = shufflevector < 2 x float > %x , < 2 x float > undef , < 2 x i32 > zeroinitializer
%r = fsub < 2 x float > < float 42.0 , float 42.0 > , %splat
ret < 2 x float > %r
}
define < 2 x float > @fsub_splat_constant1 ( < 2 x float > %x ) {
; CHECK-LABEL: @fsub_splat_constant1(
; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[X:%.*]], <float -4.200000e+01, float undef>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x float> [[R]]
;
%splat = shufflevector < 2 x float > %x , < 2 x float > undef , < 2 x i32 > zeroinitializer
%r = fsub < 2 x float > %splat , < float 42.0 , float 42.0 >
ret < 2 x float > %r
}
define < 2 x float > @fneg ( < 2 x float > %x ) {
; CHECK-LABEL: @fneg(
; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x float> <float -0.000000e+00, float undef>, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x float> [[R]]
;
%splat = shufflevector < 2 x float > %x , < 2 x float > undef , < 2 x i32 > zeroinitializer
%r = fsub < 2 x float > < float -0.0 , float -0.0 > , %splat
ret < 2 x float > %r
}
define < 2 x float > @fmul_splat_constant ( < 2 x float > %x ) {
; CHECK-LABEL: @fmul_splat_constant(
; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x float> [[X:%.*]], <float 4.200000e+01, float undef>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x float> [[R]]
;
%splat = shufflevector < 2 x float > %x , < 2 x float > undef , < 2 x i32 > zeroinitializer
%r = fmul < 2 x float > %splat , < float 42.0 , float 42.0 >
ret < 2 x float > %r
}
define < 2 x float > @fdiv_splat_constant0 ( < 2 x float > %x ) {
; CHECK-LABEL: @fdiv_splat_constant0(
; CHECK-NEXT: [[TMP1:%.*]] = fdiv <2 x float> <float 4.200000e+01, float undef>, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x float> [[R]]
;
%splat = shufflevector < 2 x float > %x , < 2 x float > undef , < 2 x i32 > zeroinitializer
%r = fdiv < 2 x float > < float 42.0 , float 42.0 > , %splat
ret < 2 x float > %r
}
define < 2 x float > @fdiv_splat_constant1 ( < 2 x float > %x ) {
; CHECK-LABEL: @fdiv_splat_constant1(
; CHECK-NEXT: [[TMP1:%.*]] = fdiv <2 x float> [[X:%.*]], <float 4.200000e+01, float undef>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x float> [[R]]
;
%splat = shufflevector < 2 x float > %x , < 2 x float > undef , < 2 x i32 > zeroinitializer
%r = fdiv < 2 x float > %splat , < float 42.0 , float 42.0 >
ret < 2 x float > %r
}
define < 2 x float > @frem_splat_constant0 ( < 2 x float > %x ) {
; CHECK-LABEL: @frem_splat_constant0(
; CHECK-NEXT: [[TMP1:%.*]] = frem <2 x float> <float 4.200000e+01, float undef>, [[X:%.*]]
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x float> [[R]]
;
%splat = shufflevector < 2 x float > %x , < 2 x float > undef , < 2 x i32 > zeroinitializer
%r = frem < 2 x float > < float 42.0 , float 42.0 > , %splat
ret < 2 x float > %r
}
define < 2 x float > @frem_splat_constant1 ( < 2 x float > %x ) {
; CHECK-LABEL: @frem_splat_constant1(
; CHECK-NEXT: [[TMP1:%.*]] = frem <2 x float> [[X:%.*]], <float 4.200000e+01, float undef>
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x float> [[R]]
;
%splat = shufflevector < 2 x float > %x , < 2 x float > undef , < 2 x i32 > zeroinitializer
%r = frem < 2 x float > %splat , < float 42.0 , float 42.0 >
ret < 2 x float > %r
}
; Equivalent shuffle masks, but only one is a narrowing op.
define < 2 x i1 > @PR40734 ( < 1 x i1 > %x , < 4 x i1 > %y ) {
; CHECK-LABEL: @PR40734(
; CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <1 x i1> zeroinitializer, <1 x i1> [[X:%.*]], <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[NARROW:%.*]] = shufflevector <4 x i1> [[Y:%.*]], <4 x i1> undef, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[R:%.*]] = and <2 x i1> [[WIDEN]], [[NARROW]]
; CHECK-NEXT: ret <2 x i1> [[R]]
;
%widen = shufflevector < 1 x i1 > zeroinitializer , < 1 x i1 > %x , < 2 x i32 > < i32 0 , i32 1 >
%narrow = shufflevector < 4 x i1 > %y , < 4 x i1 > undef , < 2 x i32 > < i32 0 , i32 1 >
%r = and < 2 x i1 > %widen , %narrow
ret < 2 x i1 > %r
}
2019-05-22 08:32:25 +08:00
; Negative test - do not transform non-power-of-2 unless we know the backend handles these sequences identically.
2019-05-17 02:09:47 +08:00
define < 7 x i8 > @insert_subvector_shuffles ( < 3 x i8 > %x , < 3 x i8 > %y ) {
; CHECK-LABEL: @insert_subvector_shuffles(
; CHECK-NEXT: [[S1:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> undef, <7 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[S2:%.*]] = shufflevector <3 x i8> [[Y:%.*]], <3 x i8> undef, <7 x i32> <i32 undef, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[S3:%.*]] = shufflevector <7 x i8> [[S1]], <7 x i8> [[S2]], <7 x i32> <i32 0, i32 8, i32 1, i32 undef, i32 8, i32 1, i32 9>
; CHECK-NEXT: ret <7 x i8> [[S3]]
;
%s1 = shufflevector < 3 x i8 > %x , < 3 x i8 > undef , < 7 x i32 > < i32 0 , i32 1 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
%s2 = shufflevector < 3 x i8 > %y , < 3 x i8 > undef , < 7 x i32 > < i32 undef , i32 1 , i32 2 , i32 undef , i32 undef , i32 undef , i32 undef >
%s3 = shufflevector < 7 x i8 > %s1 , < 7 x i8 > %s2 , < 7 x i32 > < i32 0 , i32 8 , i32 1 , i32 undef , i32 8 , i32 1 , i32 9 >
ret < 7 x i8 > %s3
}
2019-05-22 05:45:24 +08:00
define < 8 x i8 > @insert_subvector_shuffles_pow2elts ( < 2 x i8 > %x , < 2 x i8 > %y ) {
; CHECK-LABEL: @insert_subvector_shuffles_pow2elts(
2019-05-22 08:32:25 +08:00
; CHECK-NEXT: [[S3:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <8 x i32> <i32 0, i32 2, i32 1, i32 undef, i32 2, i32 1, i32 3, i32 0>
2019-05-22 05:45:24 +08:00
; CHECK-NEXT: ret <8 x i8> [[S3]]
;
%s1 = shufflevector < 2 x i8 > %x , < 2 x i8 > undef , < 8 x i32 > < i32 0 , i32 1 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
%s2 = shufflevector < 2 x i8 > %y , < 2 x i8 > undef , < 8 x i32 > < i32 0 , i32 1 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
%s3 = shufflevector < 8 x i8 > %s1 , < 8 x i8 > %s2 , < 8 x i32 > < i32 0 , i32 8 , i32 1 , i32 undef , i32 8 , i32 1 , i32 9 , i32 0 >
ret < 8 x i8 > %s3
}
2019-05-17 02:09:47 +08:00
; The last shuffle may change the vector type.
2019-05-22 08:32:25 +08:00
; Negative test - do not transform non-power-of-2 unless we know the backend handles these sequences identically.
2019-05-17 02:09:47 +08:00
define < 2 x i8 > @insert_subvector_shuffles_narrowing ( < 3 x i8 > %x , < 3 x i8 > %y ) {
; CHECK-LABEL: @insert_subvector_shuffles_narrowing(
; CHECK-NEXT: [[S1:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> undef, <7 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[S2:%.*]] = shufflevector <3 x i8> [[Y:%.*]], <3 x i8> undef, <7 x i32> <i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[S3:%.*]] = shufflevector <7 x i8> [[S1]], <7 x i8> [[S2]], <2 x i32> <i32 0, i32 8>
; CHECK-NEXT: ret <2 x i8> [[S3]]
;
%s1 = shufflevector < 3 x i8 > %x , < 3 x i8 > undef , < 7 x i32 > < i32 0 , i32 1 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
%s2 = shufflevector < 3 x i8 > %y , < 3 x i8 > undef , < 7 x i32 > < i32 undef , i32 1 , i32 2 , i32 undef , i32 undef , i32 undef , i32 undef >
%s3 = shufflevector < 7 x i8 > %s1 , < 7 x i8 > %s2 , < 2 x i32 > < i32 0 , i32 8 >
ret < 2 x i8 > %s3
}
2019-05-22 05:45:24 +08:00
define < 2 x i8 > @insert_subvector_shuffles_narrowing_pow2elts ( < 4 x i8 > %x , < 4 x i8 > %y ) {
; CHECK-LABEL: @insert_subvector_shuffles_narrowing_pow2elts(
2019-05-22 08:32:25 +08:00
; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <2 x i32> <i32 0, i32 4>
2019-05-22 05:45:24 +08:00
; CHECK-NEXT: ret <2 x i8> [[S3]]
;
%s1 = shufflevector < 4 x i8 > %x , < 4 x i8 > undef , < 8 x i32 > < i32 0 , i32 1 , i32 2 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
%s2 = shufflevector < 4 x i8 > %y , < 4 x i8 > undef , < 8 x i32 > < i32 0 , i32 1 , i32 undef , i32 3 , i32 undef , i32 undef , i32 undef , i32 undef >
%s3 = shufflevector < 8 x i8 > %s1 , < 8 x i8 > %s2 , < 2 x i32 > < i32 0 , i32 8 >
ret < 2 x i8 > %s3
}
2019-05-17 02:09:47 +08:00
; Similar to above, but this reduces to a widen with undefs of 'x'.
define < 4 x double > @insert_subvector_shuffles_identity ( < 2 x double > %x ) {
; CHECK-LABEL: @insert_subvector_shuffles_identity(
2019-05-22 08:32:25 +08:00
; CHECK-NEXT: [[S3:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
2019-05-17 02:09:47 +08:00
; CHECK-NEXT: ret <4 x double> [[S3]]
;
%s1 = shufflevector < 2 x double > %x , < 2 x double > undef , < 4 x i32 > < i32 undef , i32 1 , i32 undef , i32 undef >
%s2 = shufflevector < 2 x double > %x , < 2 x double > undef , < 4 x i32 > < i32 0 , i32 undef , i32 undef , i32 undef >
%s3 = shufflevector < 4 x double > %s2 , < 4 x double > %s1 , < 4 x i32 > < i32 0 , i32 5 , i32 undef , i32 undef >
ret < 4 x double > %s3
}
; Negative test - not identity with padding (although this could be folded with better analysis).
define < 4 x double > @not_insert_subvector_shuffle ( < 2 x double > %x ) {
; CHECK-LABEL: @not_insert_subvector_shuffle(
; CHECK-NEXT: [[S1:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> undef, <4 x i32> <i32 undef, i32 1, i32 undef, i32 1>
; CHECK-NEXT: [[S2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x double> [[S2]], <4 x double> [[S1]], <4 x i32> <i32 0, i32 5, i32 7, i32 undef>
; CHECK-NEXT: ret <4 x double> [[S3]]
;
%s1 = shufflevector < 2 x double > %x , < 2 x double > undef , < 4 x i32 > < i32 undef , i32 1 , i32 undef , i32 1 >
%s2 = shufflevector < 2 x double > %x , < 2 x double > undef , < 4 x i32 > < i32 0 , i32 undef , i32 undef , i32 undef >
%s3 = shufflevector < 4 x double > %s2 , < 4 x double > %s1 , < 4 x i32 > < i32 0 , i32 5 , i32 7 , i32 undef >
ret < 4 x double > %s3
}
; Negative test - operands are not the same size (although this could be partly folded with better analysis).
define < 4 x double > @not_insert_subvector_shuffles_with_same_size ( < 2 x double > %x , < 3 x double > %y ) {
; CHECK-LABEL: @not_insert_subvector_shuffles_with_same_size(
; CHECK-NEXT: [[S1:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> undef, <4 x i32> <i32 undef, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[S2:%.*]] = shufflevector <3 x double> [[Y:%.*]], <3 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x double> [[S2]], <4 x double> [[S1]], <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
; CHECK-NEXT: ret <4 x double> [[S3]]
;
%s1 = shufflevector < 2 x double > %x , < 2 x double > undef , < 4 x i32 > < i32 undef , i32 1 , i32 undef , i32 undef >
%s2 = shufflevector < 3 x double > %y , < 3 x double > undef , < 4 x i32 > < i32 0 , i32 undef , i32 undef , i32 undef >
%s3 = shufflevector < 4 x double > %s2 , < 4 x double > %s1 , < 4 x i32 > < i32 0 , i32 5 , i32 undef , i32 undef >
ret < 4 x double > %s3
}
2019-05-24 02:46:03 +08:00
; Demanded vector elements may not be able to simplify a shuffle mask
; before we try to narrow it. This used to crash.
define < 4 x float > @insert_subvector_crash_invalid_mask_elt ( < 2 x float > %x , < 4 x float > * %p ) {
; CHECK-LABEL: @insert_subvector_crash_invalid_mask_elt(
; CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: [[I:%.*]] = shufflevector <2 x float> [[X]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; CHECK-NEXT: store <4 x float> [[I]], <4 x float>* [[P:%.*]], align 16
; CHECK-NEXT: ret <4 x float> [[WIDEN]]
;
%widen = shufflevector < 2 x float > %x , < 2 x float > undef , < 4 x i32 > < i32 0 , i32 1 , i32 undef , i32 undef >
%ext2 = extractelement < 2 x float > %x , i32 0
%I = insertelement < 4 x float > %widen , float %ext2 , i16 0
store < 4 x float > %I , < 4 x float > * %p
ret < 4 x float > %widen
}