forked from OSchip/llvm-project
[X86][SSE] Add support for PACKSS/PACKUS constant folding
Pulled out of D38472 llvm-svn: 314776
This commit is contained in:
parent
e485b143ea
commit
19d535e75b
|
@ -5350,6 +5350,13 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
|
|||
return false;
|
||||
};
|
||||
|
||||
// Handle UNDEFs.
|
||||
if (Op.isUndef()) {
|
||||
APInt UndefSrcElts = APInt::getAllOnesValue(NumElts);
|
||||
SmallVector<APInt, 64> SrcEltBits(NumElts, APInt(EltSizeInBits, 0));
|
||||
return CastBitData(UndefSrcElts, SrcEltBits);
|
||||
}
|
||||
|
||||
// Extract constant bits from build vector.
|
||||
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
|
||||
unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
|
||||
|
@ -31838,6 +31845,82 @@ static SDValue combineShift(SDNode* N, SelectionDAG &DAG,
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const X86Subtarget &Subtarget) {
|
||||
unsigned Opcode = N->getOpcode();
|
||||
assert((X86ISD::PACKSS == Opcode || X86ISD::PACKUS == Opcode) &&
|
||||
"Unexpected shift opcode");
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
EVT SVT = VT.getScalarType();
|
||||
SDValue N0 = N->getOperand(0);
|
||||
SDValue N1 = N->getOperand(1);
|
||||
unsigned DstBitsPerElt = VT.getScalarSizeInBits();
|
||||
unsigned SrcBitsPerElt = 2 * DstBitsPerElt;
|
||||
assert(N0.getScalarValueSizeInBits() == SrcBitsPerElt &&
|
||||
N1.getScalarValueSizeInBits() == SrcBitsPerElt &&
|
||||
"Unexpected PACKSS/PACKUS input type");
|
||||
|
||||
// Constant Folding.
|
||||
APInt UndefElts0, UndefElts1;
|
||||
SmallVector<APInt, 32> EltBits0, EltBits1;
|
||||
if ((N0->isUndef() || N->isOnlyUserOf(N0.getNode())) &&
|
||||
(N1->isUndef() || N->isOnlyUserOf(N1.getNode())) &&
|
||||
getTargetConstantBitsFromNode(N0, SrcBitsPerElt, UndefElts0, EltBits0) &&
|
||||
getTargetConstantBitsFromNode(N1, SrcBitsPerElt, UndefElts1, EltBits1)) {
|
||||
unsigned NumLanes = VT.getSizeInBits() / 128;
|
||||
unsigned NumDstElts = VT.getVectorNumElements();
|
||||
unsigned NumSrcElts = NumDstElts / 2;
|
||||
unsigned NumDstEltsPerLane = NumDstElts / NumLanes;
|
||||
unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
|
||||
bool IsSigned = (X86ISD::PACKSS == Opcode);
|
||||
|
||||
APInt Undefs(NumDstElts, 0);
|
||||
SmallVector<APInt, 32> Bits(NumDstElts, APInt::getNullValue(DstBitsPerElt));
|
||||
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
|
||||
for (unsigned Elt = 0; Elt != NumDstEltsPerLane; ++Elt) {
|
||||
unsigned SrcIdx = Lane * NumSrcEltsPerLane + Elt % NumSrcEltsPerLane;
|
||||
auto &UndefElts = (Elt >= NumSrcEltsPerLane ? UndefElts1 : UndefElts0);
|
||||
auto &EltBits = (Elt >= NumSrcEltsPerLane ? EltBits1 : EltBits0);
|
||||
|
||||
if (UndefElts[SrcIdx]) {
|
||||
Undefs.setBit(Lane * NumDstEltsPerLane + Elt);
|
||||
continue;
|
||||
}
|
||||
|
||||
APInt &Val = EltBits[SrcIdx];
|
||||
if (IsSigned) {
|
||||
// PACKSS: Truncate signed value with signed saturation.
|
||||
// Source values less than dst minint are saturated to minint.
|
||||
// Source values greater than dst maxint are saturated to maxint.
|
||||
if (Val.isSignedIntN(DstBitsPerElt))
|
||||
Val = Val.trunc(DstBitsPerElt);
|
||||
else if (Val.isNegative())
|
||||
Val = APInt::getSignedMinValue(DstBitsPerElt);
|
||||
else
|
||||
Val = APInt::getSignedMaxValue(DstBitsPerElt);
|
||||
} else {
|
||||
// PACKUS: Truncate signed value with unsigned saturation.
|
||||
// Source values less than zero are saturated to zero.
|
||||
// Source values greater than dst maxuint are saturated to maxuint.
|
||||
if (Val.isIntN(DstBitsPerElt))
|
||||
Val = Val.trunc(DstBitsPerElt);
|
||||
else if (Val.isNegative())
|
||||
Val = APInt::getNullValue(DstBitsPerElt);
|
||||
else
|
||||
Val = APInt::getAllOnesValue(DstBitsPerElt);
|
||||
}
|
||||
Bits[Lane * NumDstEltsPerLane + Elt] = Val;
|
||||
}
|
||||
}
|
||||
|
||||
return getConstVector(Bits, Undefs, VT.getSimpleVT(), DAG, SDLoc(N));
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const X86Subtarget &Subtarget) {
|
||||
|
@ -36069,6 +36152,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
|||
case ISD::SETCC: return combineSetCC(N, DAG, Subtarget);
|
||||
case X86ISD::SETCC: return combineX86SetCC(N, DAG, Subtarget);
|
||||
case X86ISD::BRCOND: return combineBrCond(N, DAG, Subtarget);
|
||||
case X86ISD::PACKSS:
|
||||
case X86ISD::PACKUS: return combineVectorPack(N, DAG, DCI, Subtarget);
|
||||
case X86ISD::VSHLI:
|
||||
case X86ISD::VSRAI:
|
||||
case X86ISD::VSRLI:
|
||||
|
|
|
@ -21,15 +21,15 @@ declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readno
|
|||
define <16 x i16> @test_x86_avx2_packssdw_fold() {
|
||||
; AVX2-LABEL: test_x86_avx2_packssdw_fold:
|
||||
; AVX2: ## BB#0:
|
||||
; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
|
||||
; AVX2-NEXT: vpackssdw LCPI1_0, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x6b,0x05,A,A,A,A]
|
||||
; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,32767,65535,0,0,0,0,32769,32768,0,65280]
|
||||
; AVX2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
|
||||
; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI1_0, kind: FK_Data_4
|
||||
; AVX2-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; AVX512VL-LABEL: test_x86_avx2_packssdw_fold:
|
||||
; AVX512VL: ## BB#0:
|
||||
; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
|
||||
; AVX512VL-NEXT: vpackssdw LCPI1_0, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x05,A,A,A,A]
|
||||
; AVX512VL-NEXT: vmovaps LCPI1_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [0,0,0,0,255,32767,32767,65535,0,0,0,0,32769,32768,0,65280]
|
||||
; AVX512VL-NEXT: ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
|
||||
; AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI1_0, kind: FK_Data_4
|
||||
; AVX512VL-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> zeroinitializer, <8 x i32> <i32 255, i32 32767, i32 65535, i32 -1, i32 -32767, i32 -65535, i32 0, i32 -256>)
|
||||
|
@ -56,20 +56,16 @@ declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readn
|
|||
define <32 x i8> @test_x86_avx2_packsswb_fold() {
|
||||
; AVX2-LABEL: test_x86_avx2_packsswb_fold:
|
||||
; AVX2: ## BB#0:
|
||||
; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,255,256,65535,65535,65281,65280,32858,0,255,256,65535,65535,65281,65280,32858]
|
||||
; AVX2-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A]
|
||||
; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0,0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
|
||||
; AVX2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
|
||||
; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI3_0, kind: FK_Data_4
|
||||
; AVX2-NEXT: vpacksswb %ymm0, %ymm1, %ymm0 ## encoding: [0xc5,0xf5,0x63,0xc0]
|
||||
; AVX2-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; AVX512VL-LABEL: test_x86_avx2_packsswb_fold:
|
||||
; AVX512VL: ## BB#0:
|
||||
; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
|
||||
; AVX512VL-NEXT: vmovdqa LCPI3_0, %ymm1 ## EVEX TO VEX Compression ymm1 = [0,255,256,65535,65535,65281,65280,32858,0,255,256,65535,65535,65281,65280,32858]
|
||||
; AVX512VL-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A]
|
||||
; AVX512VL-NEXT: vmovaps LCPI3_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0,0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
|
||||
; AVX512VL-NEXT: ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
|
||||
; AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI3_0, kind: FK_Data_4
|
||||
; AVX512VL-NEXT: vpacksswb %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x63,0xc0]
|
||||
; AVX512VL-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678, i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <16 x i16> zeroinitializer)
|
||||
ret <32 x i8> %res
|
||||
|
@ -95,20 +91,16 @@ declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readn
|
|||
define <32 x i8> @test_x86_avx2_packuswb_fold() {
|
||||
; AVX2-LABEL: test_x86_avx2_packuswb_fold:
|
||||
; AVX2: ## BB#0:
|
||||
; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,255,256,65535,65535,65281,65280,32858,0,255,256,65535,65535,65281,65280,32858]
|
||||
; AVX2-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A]
|
||||
; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
|
||||
; AVX2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
|
||||
; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI5_0, kind: FK_Data_4
|
||||
; AVX2-NEXT: vpackuswb %ymm0, %ymm1, %ymm0 ## encoding: [0xc5,0xf5,0x67,0xc0]
|
||||
; AVX2-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; AVX512VL-LABEL: test_x86_avx2_packuswb_fold:
|
||||
; AVX512VL: ## BB#0:
|
||||
; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
|
||||
; AVX512VL-NEXT: vmovdqa LCPI5_0, %ymm1 ## EVEX TO VEX Compression ymm1 = [0,255,256,65535,65535,65281,65280,32858,0,255,256,65535,65535,65281,65280,32858]
|
||||
; AVX512VL-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A]
|
||||
; AVX512VL-NEXT: vmovaps LCPI5_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
|
||||
; AVX512VL-NEXT: ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
|
||||
; AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI5_0, kind: FK_Data_4
|
||||
; AVX512VL-NEXT: vpackuswb %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x67,0xc0]
|
||||
; AVX512VL-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678, i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <16 x i16> zeroinitializer)
|
||||
ret <32 x i8> %res
|
||||
|
@ -850,16 +842,16 @@ declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readno
|
|||
define <16 x i16> @test_x86_avx2_packusdw_fold() {
|
||||
; AVX2-LABEL: test_x86_avx2_packusdw_fold:
|
||||
; AVX2: ## BB#0:
|
||||
; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
|
||||
; AVX2-NEXT: vpackusdw LCPI55_0, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x2b,0x05,A,A,A,A]
|
||||
; AVX2-NEXT: ## fixup A - offset: 5, value: LCPI55_0, kind: FK_Data_4
|
||||
; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0]
|
||||
; AVX2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
|
||||
; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI55_0, kind: FK_Data_4
|
||||
; AVX2-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; AVX512VL-LABEL: test_x86_avx2_packusdw_fold:
|
||||
; AVX512VL: ## BB#0:
|
||||
; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
|
||||
; AVX512VL-NEXT: vpackusdw LCPI55_0, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x05,A,A,A,A]
|
||||
; AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI55_0, kind: FK_Data_4
|
||||
; AVX512VL-NEXT: vmovaps LCPI55_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0]
|
||||
; AVX512VL-NEXT: ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
|
||||
; AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI55_0, kind: FK_Data_4
|
||||
; AVX512VL-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> zeroinitializer, <8 x i32> <i32 255, i32 32767, i32 65535, i32 -1, i32 -32767, i32 -65535, i32 0, i32 -256>)
|
||||
ret <16 x i16> %res
|
||||
|
|
|
@ -684,22 +684,22 @@ declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind rea
|
|||
define <8 x i16> @test_x86_sse2_packssdw_128_fold() {
|
||||
; SSE-LABEL: test_x86_sse2_packssdw_128_fold:
|
||||
; SSE: ## BB#0:
|
||||
; SSE-NEXT: pxor %xmm0, %xmm0 ## encoding: [0x66,0x0f,0xef,0xc0]
|
||||
; SSE-NEXT: packssdw LCPI32_0, %xmm0 ## encoding: [0x66,0x0f,0x6b,0x05,A,A,A,A]
|
||||
; SSE-NEXT: ## fixup A - offset: 4, value: LCPI32_0, kind: FK_Data_4
|
||||
; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,32767,32767,65535,32768]
|
||||
; SSE-NEXT: ## encoding: [0x0f,0x28,0x05,A,A,A,A]
|
||||
; SSE-NEXT: ## fixup A - offset: 3, value: LCPI32_0, kind: FK_Data_4
|
||||
; SSE-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; AVX2-LABEL: test_x86_sse2_packssdw_128_fold:
|
||||
; AVX2: ## BB#0:
|
||||
; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
|
||||
; AVX2-NEXT: vpackssdw LCPI32_0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x6b,0x05,A,A,A,A]
|
||||
; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,32767,32767,65535,32768]
|
||||
; AVX2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
|
||||
; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI32_0, kind: FK_Data_4
|
||||
; AVX2-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; SKX-LABEL: test_x86_sse2_packssdw_128_fold:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
|
||||
; SKX-NEXT: vpackssdw LCPI32_0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x05,A,A,A,A]
|
||||
; SKX-NEXT: vmovaps LCPI32_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,0,0,0,32767,32767,65535,32768]
|
||||
; SKX-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
|
||||
; SKX-NEXT: ## fixup A - offset: 4, value: LCPI32_0, kind: FK_Data_4
|
||||
; SKX-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> <i32 65535, i32 65536, i32 -1, i32 -131072>)
|
||||
|
@ -731,29 +731,23 @@ declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind rea
|
|||
define <16 x i8> @test_x86_sse2_packsswb_128_fold() {
|
||||
; SSE-LABEL: test_x86_sse2_packsswb_128_fold:
|
||||
; SSE: ## BB#0:
|
||||
; SSE-NEXT: pxor %xmm1, %xmm1 ## encoding: [0x66,0x0f,0xef,0xc9]
|
||||
; SSE-NEXT: movdqa {{.*#+}} xmm0 = [0,255,256,65535,65535,65281,65280,32858]
|
||||
; SSE-NEXT: ## encoding: [0x66,0x0f,0x6f,0x05,A,A,A,A]
|
||||
; SSE-NEXT: ## fixup A - offset: 4, value: LCPI34_0, kind: FK_Data_4
|
||||
; SSE-NEXT: packsswb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x63,0xc1]
|
||||
; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
|
||||
; SSE-NEXT: ## encoding: [0x0f,0x28,0x05,A,A,A,A]
|
||||
; SSE-NEXT: ## fixup A - offset: 3, value: LCPI34_0, kind: FK_Data_4
|
||||
; SSE-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; AVX2-LABEL: test_x86_sse2_packsswb_128_fold:
|
||||
; AVX2: ## BB#0:
|
||||
; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [0,255,256,65535,65535,65281,65280,32858]
|
||||
; AVX2-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A]
|
||||
; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
|
||||
; AVX2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
|
||||
; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI34_0, kind: FK_Data_4
|
||||
; AVX2-NEXT: vpacksswb %xmm0, %xmm1, %xmm0 ## encoding: [0xc5,0xf1,0x63,0xc0]
|
||||
; AVX2-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; SKX-LABEL: test_x86_sse2_packsswb_128_fold:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
|
||||
; SKX-NEXT: vmovdqa LCPI34_0, %xmm1 ## EVEX TO VEX Compression xmm1 = [0,255,256,65535,65535,65281,65280,32858]
|
||||
; SKX-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A]
|
||||
; SKX-NEXT: vmovaps LCPI34_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
|
||||
; SKX-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
|
||||
; SKX-NEXT: ## fixup A - offset: 4, value: LCPI34_0, kind: FK_Data_4
|
||||
; SKX-NEXT: vpacksswb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x63,0xc0]
|
||||
; SKX-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <8 x i16> zeroinitializer)
|
||||
ret <16 x i8> %res
|
||||
|
@ -784,29 +778,23 @@ declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind rea
|
|||
define <16 x i8> @test_x86_sse2_packuswb_128_fold() {
|
||||
; SSE-LABEL: test_x86_sse2_packuswb_128_fold:
|
||||
; SSE: ## BB#0:
|
||||
; SSE-NEXT: pxor %xmm1, %xmm1 ## encoding: [0x66,0x0f,0xef,0xc9]
|
||||
; SSE-NEXT: movdqa {{.*#+}} xmm0 = [0,255,256,65535,65535,65281,65280,32858]
|
||||
; SSE-NEXT: ## encoding: [0x66,0x0f,0x6f,0x05,A,A,A,A]
|
||||
; SSE-NEXT: ## fixup A - offset: 4, value: LCPI36_0, kind: FK_Data_4
|
||||
; SSE-NEXT: packuswb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x67,0xc1]
|
||||
; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
|
||||
; SSE-NEXT: ## encoding: [0x0f,0x28,0x05,A,A,A,A]
|
||||
; SSE-NEXT: ## fixup A - offset: 3, value: LCPI36_0, kind: FK_Data_4
|
||||
; SSE-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; AVX2-LABEL: test_x86_sse2_packuswb_128_fold:
|
||||
; AVX2: ## BB#0:
|
||||
; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [0,255,256,65535,65535,65281,65280,32858]
|
||||
; AVX2-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A]
|
||||
; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
|
||||
; AVX2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
|
||||
; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI36_0, kind: FK_Data_4
|
||||
; AVX2-NEXT: vpackuswb %xmm0, %xmm1, %xmm0 ## encoding: [0xc5,0xf1,0x67,0xc0]
|
||||
; AVX2-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; SKX-LABEL: test_x86_sse2_packuswb_128_fold:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
|
||||
; SKX-NEXT: vmovdqa LCPI36_0, %xmm1 ## EVEX TO VEX Compression xmm1 = [0,255,256,65535,65535,65281,65280,32858]
|
||||
; SKX-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A]
|
||||
; SKX-NEXT: vmovaps LCPI36_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
|
||||
; SKX-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
|
||||
; SKX-NEXT: ## fixup A - offset: 4, value: LCPI36_0, kind: FK_Data_4
|
||||
; SKX-NEXT: vpackuswb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x67,0xc0]
|
||||
; SKX-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <8 x i16> zeroinitializer)
|
||||
ret <16 x i8> %res
|
||||
|
|
|
@ -138,23 +138,23 @@ declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readno
|
|||
define <8 x i16> @test_x86_sse41_packusdw_fold() {
|
||||
; SSE41-LABEL: test_x86_sse41_packusdw_fold:
|
||||
; SSE41: ## BB#0:
|
||||
; SSE41-NEXT: pxor %xmm0, %xmm0 ## encoding: [0x66,0x0f,0xef,0xc0]
|
||||
; SSE41-NEXT: packusdw LCPI7_0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x2b,0x05,A,A,A,A]
|
||||
; SSE41-NEXT: ## fixup A - offset: 5, value: LCPI7_0, kind: FK_Data_4
|
||||
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,65535,65535,0,0]
|
||||
; SSE41-NEXT: ## encoding: [0x0f,0x28,0x05,A,A,A,A]
|
||||
; SSE41-NEXT: ## fixup A - offset: 3, value: LCPI7_0, kind: FK_Data_4
|
||||
; SSE41-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; AVX2-LABEL: test_x86_sse41_packusdw_fold:
|
||||
; AVX2: ## BB#0:
|
||||
; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
|
||||
; AVX2-NEXT: vpackusdw LCPI7_0, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x2b,0x05,A,A,A,A]
|
||||
; AVX2-NEXT: ## fixup A - offset: 5, value: LCPI7_0, kind: FK_Data_4
|
||||
; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,65535,65535,0,0]
|
||||
; AVX2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
|
||||
; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI7_0, kind: FK_Data_4
|
||||
; AVX2-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; SKX-LABEL: test_x86_sse41_packusdw_fold:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
|
||||
; SKX-NEXT: vpackusdw LCPI7_0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x05,A,A,A,A]
|
||||
; SKX-NEXT: ## fixup A - offset: 5, value: LCPI7_0, kind: FK_Data_4
|
||||
; SKX-NEXT: vmovaps LCPI7_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,0,0,0,65535,65535,0,0]
|
||||
; SKX-NEXT: ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
|
||||
; SKX-NEXT: ## fixup A - offset: 4, value: LCPI7_0, kind: FK_Data_4
|
||||
; SKX-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> zeroinitializer, <4 x i32> <i32 65535, i32 65536, i32 -1, i32 -131072>)
|
||||
ret <8 x i16> %res
|
||||
|
|
Loading…
Reference in New Issue