forked from OSchip/llvm-project
[DAGCombiner][X86][AArch64] (x - C) + y -> (x + y) - C fold. Try 2
Summary: Only vector tests are being affected here, since subtraction by scalar constant is rewritten as addition by negated constant. No surprising test changes. https://rise4fun.com/Alive/pbT This is a recommit, originally committed in rL361852, but reverted to investigate test-suite compile-time hangs. Reviewers: RKSimon, craig.topper, spatel Reviewed By: RKSimon Subscribers: javed.absar, kristof.beyls, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62257 llvm-svn: 362146
This commit is contained in:
parent
57aa36ff91
commit
a4e3b50e26
|
@ -2470,6 +2470,14 @@ SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
|
|||
if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
|
||||
return V;
|
||||
|
||||
// Hoist one-use subtraction by constant: (x - C) + y -> (x + y) - C
|
||||
// This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
|
||||
if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
|
||||
isConstantOrConstantVector(N0.getOperand(1))) {
|
||||
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
|
||||
return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
|
||||
}
|
||||
|
||||
// If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
|
||||
// rather than 'add 0/-1' (the zext should get folded).
|
||||
// add (sext i1 Y), X --> sub X, (zext i1 Y)
|
||||
|
|
|
@ -218,8 +218,8 @@ define <4 x i32> @vec_sink_sub_of_const_to_add0(<4 x i32> %a, <4 x i32> %b, <4 x
|
|||
; CHECK-NEXT: adrp x8, .LCPI14_0
|
||||
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI14_0]
|
||||
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
|
||||
; CHECK-NEXT: sub v0.4s, v0.4s, v3.4s
|
||||
; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
|
||||
; CHECK-NEXT: sub v0.4s, v0.4s, v3.4s
|
||||
; CHECK-NEXT: ret
|
||||
%t0 = add <4 x i32> %a, %b
|
||||
%t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
|
||||
|
@ -232,8 +232,8 @@ define <4 x i32> @vec_sink_sub_of_const_to_add1(<4 x i32> %a, <4 x i32> %b, <4 x
|
|||
; CHECK-NEXT: adrp x8, .LCPI15_0
|
||||
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI15_0]
|
||||
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
|
||||
; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
|
||||
; CHECK-NEXT: sub v0.4s, v0.4s, v3.4s
|
||||
; CHECK-NEXT: add v0.4s, v2.4s, v0.4s
|
||||
; CHECK-NEXT: ret
|
||||
%t0 = add <4 x i32> %a, %b
|
||||
%t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
|
||||
|
@ -360,8 +360,8 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4
|
|||
; CHECK-NEXT: adrp x8, .LCPI23_0
|
||||
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI23_0]
|
||||
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
|
||||
; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
|
||||
; CHECK-NEXT: sub v0.4s, v0.4s, v3.4s
|
||||
; CHECK-NEXT: add v0.4s, v2.4s, v0.4s
|
||||
; CHECK-NEXT: ret
|
||||
%t0 = sub <4 x i32> %a, %b
|
||||
%t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0
|
||||
|
|
|
@ -341,16 +341,16 @@ define <4 x i32> @vec_sink_add_of_const_to_add1(<4 x i32> %a, <4 x i32> %b, <4 x
|
|||
define <4 x i32> @vec_sink_sub_of_const_to_add0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
||||
; X32-LABEL: vec_sink_sub_of_const_to_add0:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: paddd %xmm2, %xmm1
|
||||
; X32-NEXT: paddd %xmm1, %xmm0
|
||||
; X32-NEXT: psubd {{\.LCPI.*}}, %xmm0
|
||||
; X32-NEXT: paddd %xmm2, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: vec_sink_sub_of_const_to_add0:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: paddd %xmm2, %xmm1
|
||||
; X64-NEXT: paddd %xmm1, %xmm0
|
||||
; X64-NEXT: psubd {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: paddd %xmm2, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%t0 = add <4 x i32> %a, %b
|
||||
%t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
|
||||
|
@ -360,16 +360,16 @@ define <4 x i32> @vec_sink_sub_of_const_to_add0(<4 x i32> %a, <4 x i32> %b, <4 x
|
|||
define <4 x i32> @vec_sink_sub_of_const_to_add1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
|
||||
; X32-LABEL: vec_sink_sub_of_const_to_add1:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: paddd %xmm2, %xmm1
|
||||
; X32-NEXT: paddd %xmm1, %xmm0
|
||||
; X32-NEXT: psubd {{\.LCPI.*}}, %xmm0
|
||||
; X32-NEXT: paddd %xmm2, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: vec_sink_sub_of_const_to_add1:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: paddd %xmm2, %xmm1
|
||||
; X64-NEXT: paddd %xmm1, %xmm0
|
||||
; X64-NEXT: psubd {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: paddd %xmm2, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%t0 = add <4 x i32> %a, %b
|
||||
%t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
|
||||
|
@ -525,15 +525,15 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4
|
|||
; X32-LABEL: vec_sink_sub_from_const_to_sub2:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: psubd %xmm1, %xmm0
|
||||
; X32-NEXT: psubd {{\.LCPI.*}}, %xmm0
|
||||
; X32-NEXT: paddd %xmm2, %xmm0
|
||||
; X32-NEXT: psubd {{\.LCPI.*}}, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: vec_sink_sub_from_const_to_sub2:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: psubd %xmm1, %xmm0
|
||||
; X64-NEXT: psubd {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: paddd %xmm2, %xmm0
|
||||
; X64-NEXT: psubd {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: retq
|
||||
%t0 = sub <4 x i32> %a, %b
|
||||
%t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0
|
||||
|
|
|
@ -186,10 +186,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
|
|||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm0
|
||||
; SSE2-NEXT: psubb %xmm2, %xmm0
|
||||
; SSE2-NEXT: psrlw $7, %xmm1
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
|
||||
; SSE2-NEXT: paddb %xmm0, %xmm1
|
||||
; SSE2-NEXT: psubb %xmm2, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
|
@ -210,10 +210,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
|
|||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; SSE41-NEXT: pxor %xmm2, %xmm0
|
||||
; SSE41-NEXT: psubb %xmm2, %xmm0
|
||||
; SSE41-NEXT: psrlw $7, %xmm1
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm1
|
||||
; SSE41-NEXT: paddb %xmm0, %xmm1
|
||||
; SSE41-NEXT: psubb %xmm2, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
|
@ -233,10 +233,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
|
|||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsubb %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vpsubb %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2NOBW-LABEL: test_div7_16i8:
|
||||
|
@ -251,10 +251,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
|
|||
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX2NOBW-NEXT: vmovdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX2NOBW-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
||||
; AVX2NOBW-NEXT: vpsubb %xmm2, %xmm1, %xmm1
|
||||
; AVX2NOBW-NEXT: vpsrlw $7, %xmm0, %xmm0
|
||||
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX2NOBW-NEXT: vpaddb %xmm0, %xmm1, %xmm0
|
||||
; AVX2NOBW-NEXT: vpsubb %xmm2, %xmm0, %xmm0
|
||||
; AVX2NOBW-NEXT: vzeroupper
|
||||
; AVX2NOBW-NEXT: retq
|
||||
;
|
||||
|
@ -269,10 +269,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX512BW-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpsubb %xmm2, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpsrlw $7, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpaddb %xmm0, %xmm1, %xmm0
|
||||
; AVX512BW-NEXT: vpsubb %xmm2, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
%res = sdiv <16 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
|
||||
|
@ -657,10 +657,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
|
|||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; SSE2-NEXT: pxor %xmm3, %xmm2
|
||||
; SSE2-NEXT: psubb %xmm3, %xmm2
|
||||
; SSE2-NEXT: psrlw $7, %xmm1
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
|
||||
; SSE2-NEXT: paddb %xmm2, %xmm1
|
||||
; SSE2-NEXT: psubb %xmm3, %xmm1
|
||||
; SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSE2-NEXT: psllw $3, %xmm2
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
|
||||
|
@ -685,10 +685,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
|
|||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm2
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; SSE41-NEXT: pxor %xmm3, %xmm2
|
||||
; SSE41-NEXT: psubb %xmm3, %xmm2
|
||||
; SSE41-NEXT: psrlw $7, %xmm1
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm1
|
||||
; SSE41-NEXT: paddb %xmm2, %xmm1
|
||||
; SSE41-NEXT: psubb %xmm3, %xmm1
|
||||
; SSE41-NEXT: movdqa %xmm1, %xmm2
|
||||
; SSE41-NEXT: psllw $3, %xmm2
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm2
|
||||
|
@ -712,10 +712,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
|
|||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsubb %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpaddb %xmm1, %xmm2, %xmm1
|
||||
; AVX1-NEXT: vpsubb %xmm3, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsllw $3, %xmm1, %xmm2
|
||||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsubb %xmm2, %xmm1, %xmm1
|
||||
|
@ -734,10 +734,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
|
|||
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX2NOBW-NEXT: vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX2NOBW-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
||||
; AVX2NOBW-NEXT: vpsubb %xmm3, %xmm2, %xmm2
|
||||
; AVX2NOBW-NEXT: vpsrlw $7, %xmm1, %xmm1
|
||||
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX2NOBW-NEXT: vpaddb %xmm1, %xmm2, %xmm1
|
||||
; AVX2NOBW-NEXT: vpsubb %xmm3, %xmm1, %xmm1
|
||||
; AVX2NOBW-NEXT: vpsllw $3, %xmm1, %xmm2
|
||||
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX2NOBW-NEXT: vpsubb %xmm2, %xmm1, %xmm1
|
||||
|
@ -756,10 +756,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX512BW-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpsubb %xmm3, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpsrlw $7, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpaddb %xmm1, %xmm2, %xmm1
|
||||
; AVX512BW-NEXT: vpsubb %xmm3, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpsllw $3, %xmm1, %xmm2
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpsubb %xmm2, %xmm1, %xmm1
|
||||
|
|
|
@ -177,8 +177,8 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
|
|||
; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX1-NEXT: vpxor %xmm6, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsubb %xmm6, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpaddb %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpsubb %xmm6, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpmovsxbw %xmm0, %xmm2
|
||||
; AVX1-NEXT: vpmullw %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
|
||||
|
@ -193,8 +193,8 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
|
|||
; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpand %xmm5, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm6, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsubb %xmm6, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsubb %xmm6, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
|
@ -215,10 +215,10 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
|
|||
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX2NOBW-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vpsubb %ymm2, %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vpsrlw $7, %ymm0, %ymm0
|
||||
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2NOBW-NEXT: vpaddb %ymm0, %ymm1, %ymm0
|
||||
; AVX2NOBW-NEXT: vpsubb %ymm2, %ymm0, %ymm0
|
||||
; AVX2NOBW-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_div7_32i8:
|
||||
|
@ -232,10 +232,10 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX512BW-NEXT: vpxor %ymm2, %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vpsubb %ymm2, %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vpsrlw $7, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpaddb %ymm0, %ymm1, %ymm0
|
||||
; AVX512BW-NEXT: vpsubb %ymm2, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%res = sdiv <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
|
||||
ret <32 x i8> %res
|
||||
|
@ -588,8 +588,8 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
|
|||
; AVX1-NEXT: vpand %xmm6, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX1-NEXT: vpxor %xmm7, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsubb %xmm7, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpaddb %xmm4, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsubb %xmm7, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsllw $3, %xmm2, %xmm4
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
|
||||
; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4
|
||||
|
@ -609,8 +609,8 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
|
|||
; AVX1-NEXT: vpsrlw $2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpand %xmm6, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpxor %xmm7, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsubb %xmm7, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpaddb %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsubb %xmm7, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpsllw $3, %xmm2, %xmm3
|
||||
; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpsubb %xmm3, %xmm2, %xmm2
|
||||
|
@ -635,10 +635,10 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
|
|||
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX2NOBW-NEXT: vmovdqa {{.*#+}} ymm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX2NOBW-NEXT: vpxor %ymm3, %ymm2, %ymm2
|
||||
; AVX2NOBW-NEXT: vpsubb %ymm3, %ymm2, %ymm2
|
||||
; AVX2NOBW-NEXT: vpsrlw $7, %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vpaddb %ymm1, %ymm2, %ymm1
|
||||
; AVX2NOBW-NEXT: vpsubb %ymm3, %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vpsllw $3, %ymm1, %ymm2
|
||||
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX2NOBW-NEXT: vpsubb %ymm2, %ymm1, %ymm1
|
||||
|
@ -656,10 +656,10 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX512BW-NEXT: vpxor %ymm3, %ymm2, %ymm2
|
||||
; AVX512BW-NEXT: vpsubb %ymm3, %ymm2, %ymm2
|
||||
; AVX512BW-NEXT: vpsrlw $7, %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vpaddb %ymm1, %ymm2, %ymm1
|
||||
; AVX512BW-NEXT: vpsubb %ymm3, %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vpsllw $3, %ymm1, %ymm2
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
|
||||
; AVX512BW-NEXT: vpsubb %ymm2, %ymm1, %ymm1
|
||||
|
|
|
@ -146,8 +146,8 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512F-NEXT: vpand %ymm5, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX512F-NEXT: vpxor %ymm6, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsubb %ymm6, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpaddb %ymm2, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vpsubb %ymm6, %ymm0, %ymm0
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512F-NEXT: vpmovsxbw %xmm2, %ymm2
|
||||
; AVX512F-NEXT: vpmullw %ymm3, %ymm2, %ymm2
|
||||
|
@ -163,8 +163,8 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512F-NEXT: vpsrlw $2, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpand %ymm5, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpxor %ymm6, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpsubb %ymm6, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpaddb %ymm2, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: vpsubb %ymm6, %ymm1, %ymm1
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_div7_64i8:
|
||||
|
@ -185,10 +185,10 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX512BW-NEXT: vpxorq %zmm2, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpsubb %zmm2, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpsrlw $7, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: vpsubb %zmm2, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
%res = sdiv <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
|
||||
ret <64 x i8> %res
|
||||
|
@ -486,8 +486,8 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512F-NEXT: vpand %ymm6, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm7 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX512F-NEXT: vpxor %ymm7, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsubb %ymm7, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpaddb %ymm4, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsubb %ymm7, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsllw $3, %ymm2, %ymm4
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm8 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
|
||||
; AVX512F-NEXT: vpand %ymm8, %ymm4, %ymm4
|
||||
|
@ -508,8 +508,8 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512F-NEXT: vpsrlw $2, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpand %ymm6, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpxor %ymm7, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsubb %ymm7, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpaddb %ymm3, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsubb %ymm7, %ymm2, %ymm2
|
||||
; AVX512F-NEXT: vpsllw $3, %ymm2, %ymm3
|
||||
; AVX512F-NEXT: vpand %ymm8, %ymm3, %ymm3
|
||||
; AVX512F-NEXT: vpsubb %ymm3, %ymm2, %ymm2
|
||||
|
@ -534,10 +534,10 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX512BW-NEXT: vpxorq %zmm3, %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpsubb %zmm3, %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpsrlw $7, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpaddb %zmm1, %zmm2, %zmm1
|
||||
; AVX512BW-NEXT: vpsubb %zmm3, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpsllw $3, %zmm1, %zmm2
|
||||
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
|
||||
; AVX512BW-NEXT: vpsubb %zmm2, %zmm1, %zmm1
|
||||
|
|
Loading…
Reference in New Issue