forked from OSchip/llvm-project
[LegalizeVectorOps] Improve the placement of ANDs in the ExpandLoad path for non-byte-sized loads.
When we need to merge two adjacent loads the AND mask for the low piece was still sized for the full src element size. But we didn't have that many bits. The upper bits are already zero due to the SRL. So we can skip the AND if we're going to combine with the high bits. We do need an AND to clear out any bits from the high part. We were anding the high part before combining with the low part, but it looks like ANDing after the OR gets better results. So we can just emit the final AND after the optional concatentation is done. That will handling skipping before the OR and get rid of extra high bits after the OR. llvm-svn: 354655
This commit is contained in:
parent
069cf05e87
commit
fa6187d230
|
@ -654,21 +654,21 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
|
|||
LoadChains.push_back(ScalarLoad.getValue(1));
|
||||
}
|
||||
|
||||
// Extract bits, pack and extend/trunc them into destination type.
|
||||
unsigned SrcEltBits = SrcEltVT.getSizeInBits();
|
||||
SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, dl, WideVT);
|
||||
|
||||
unsigned BitOffset = 0;
|
||||
unsigned WideIdx = 0;
|
||||
unsigned WideBits = WideVT.getSizeInBits();
|
||||
|
||||
// Extract bits, pack and extend/trunc them into destination type.
|
||||
unsigned SrcEltBits = SrcEltVT.getSizeInBits();
|
||||
SDValue SrcEltBitMask = DAG.getConstant(
|
||||
APInt::getLowBitsSet(WideBits, SrcEltBits), dl, WideVT);
|
||||
|
||||
for (unsigned Idx = 0; Idx != NumElem; ++Idx) {
|
||||
assert(BitOffset < WideBits && "Unexpected offset!");
|
||||
|
||||
SDValue ShAmt = DAG.getConstant(
|
||||
BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
|
||||
SDValue Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
|
||||
Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
|
||||
|
||||
BitOffset += SrcEltBits;
|
||||
if (BitOffset >= WideBits) {
|
||||
|
@ -680,11 +680,12 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
|
|||
TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
|
||||
SDValue Hi =
|
||||
DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
|
||||
Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
|
||||
Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi);
|
||||
}
|
||||
}
|
||||
|
||||
Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
|
||||
|
||||
switch (ExtType) {
|
||||
default: llvm_unreachable("Unknown extended-load op!");
|
||||
case ISD::EXTLOAD:
|
||||
|
|
|
@ -2285,58 +2285,49 @@ define <2 x i32> @zext_2i8_to_2i32(<2 x i8>* %addr) {
|
|||
define <4 x i32> @zext_4i17_to_4i32(<4 x i17>* %ptr) {
|
||||
; SSE2-LABEL: zext_4i17_to_4i32:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movl 8(%rdi), %eax
|
||||
; SSE2-NEXT: andl $15, %eax
|
||||
; SSE2-NEXT: shll $13, %eax
|
||||
; SSE2-NEXT: movq (%rdi), %rcx
|
||||
; SSE2-NEXT: movq %rcx, %rdx
|
||||
; SSE2-NEXT: shrq $51, %rdx
|
||||
; SSE2-NEXT: orl %eax, %edx
|
||||
; SSE2-NEXT: movd %edx, %xmm0
|
||||
; SSE2-NEXT: movq %rcx, %rax
|
||||
; SSE2-NEXT: shrq $34, %rax
|
||||
; SSE2-NEXT: andl $131071, %eax # imm = 0x1FFFF
|
||||
; SSE2-NEXT: movd %eax, %xmm1
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||
; SSE2-NEXT: movl %ecx, %eax
|
||||
; SSE2-NEXT: andl $131071, %eax # imm = 0x1FFFF
|
||||
; SSE2-NEXT: movq (%rdi), %rax
|
||||
; SSE2-NEXT: movd %eax, %xmm0
|
||||
; SSE2-NEXT: movq %rax, %rcx
|
||||
; SSE2-NEXT: shrq $17, %rcx
|
||||
; SSE2-NEXT: andl $131071, %ecx # imm = 0x1FFFF
|
||||
; SSE2-NEXT: movd %ecx, %xmm2
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; SSE2-NEXT: movd %ecx, %xmm1
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSE2-NEXT: movl 8(%rdi), %ecx
|
||||
; SSE2-NEXT: shll $13, %ecx
|
||||
; SSE2-NEXT: movq %rax, %rdx
|
||||
; SSE2-NEXT: shrq $51, %rdx
|
||||
; SSE2-NEXT: orl %ecx, %edx
|
||||
; SSE2-NEXT: movd %edx, %xmm1
|
||||
; SSE2-NEXT: shrq $34, %rax
|
||||
; SSE2-NEXT: movd %eax, %xmm2
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: zext_4i17_to_4i32:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movl 8(%rdi), %eax
|
||||
; SSSE3-NEXT: andl $15, %eax
|
||||
; SSSE3-NEXT: shll $13, %eax
|
||||
; SSSE3-NEXT: movq (%rdi), %rcx
|
||||
; SSSE3-NEXT: movq %rcx, %rdx
|
||||
; SSSE3-NEXT: shrq $51, %rdx
|
||||
; SSSE3-NEXT: orl %eax, %edx
|
||||
; SSSE3-NEXT: movd %edx, %xmm0
|
||||
; SSSE3-NEXT: movq %rcx, %rax
|
||||
; SSSE3-NEXT: shrq $34, %rax
|
||||
; SSSE3-NEXT: andl $131071, %eax # imm = 0x1FFFF
|
||||
; SSSE3-NEXT: movd %eax, %xmm1
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||
; SSSE3-NEXT: movl %ecx, %eax
|
||||
; SSSE3-NEXT: andl $131071, %eax # imm = 0x1FFFF
|
||||
; SSSE3-NEXT: movq (%rdi), %rax
|
||||
; SSSE3-NEXT: movd %eax, %xmm0
|
||||
; SSSE3-NEXT: movq %rax, %rcx
|
||||
; SSSE3-NEXT: shrq $17, %rcx
|
||||
; SSSE3-NEXT: andl $131071, %ecx # imm = 0x1FFFF
|
||||
; SSSE3-NEXT: movd %ecx, %xmm2
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; SSSE3-NEXT: movd %ecx, %xmm1
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSSE3-NEXT: movl 8(%rdi), %ecx
|
||||
; SSSE3-NEXT: shll $13, %ecx
|
||||
; SSSE3-NEXT: movq %rax, %rdx
|
||||
; SSSE3-NEXT: shrq $51, %rdx
|
||||
; SSSE3-NEXT: orl %ecx, %edx
|
||||
; SSSE3-NEXT: movd %edx, %xmm1
|
||||
; SSSE3-NEXT: shrq $34, %rax
|
||||
; SSSE3-NEXT: movd %eax, %xmm2
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
||||
; SSSE3-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: zext_4i17_to_4i32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movl 8(%rdi), %eax
|
||||
; SSE41-NEXT: andl $15, %eax
|
||||
; SSE41-NEXT: shll $13, %eax
|
||||
; SSE41-NEXT: movq (%rdi), %rcx
|
||||
; SSE41-NEXT: movq %rcx, %rdx
|
||||
|
@ -2344,38 +2335,69 @@ define <4 x i32> @zext_4i17_to_4i32(<4 x i17>* %ptr) {
|
|||
; SSE41-NEXT: orl %eax, %edx
|
||||
; SSE41-NEXT: movq %rcx, %rax
|
||||
; SSE41-NEXT: shrq $17, %rax
|
||||
; SSE41-NEXT: andl $131071, %eax # imm = 0x1FFFF
|
||||
; SSE41-NEXT: movl %ecx, %esi
|
||||
; SSE41-NEXT: andl $131071, %esi # imm = 0x1FFFF
|
||||
; SSE41-NEXT: movd %esi, %xmm0
|
||||
; SSE41-NEXT: movd %ecx, %xmm0
|
||||
; SSE41-NEXT: pinsrd $1, %eax, %xmm0
|
||||
; SSE41-NEXT: shrq $34, %rcx
|
||||
; SSE41-NEXT: andl $131071, %ecx # imm = 0x1FFFF
|
||||
; SSE41-NEXT: pinsrd $2, %ecx, %xmm0
|
||||
; SSE41-NEXT: pinsrd $3, %edx, %xmm0
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: zext_4i17_to_4i32:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: movl 8(%rdi), %eax
|
||||
; AVX-NEXT: andl $15, %eax
|
||||
; AVX-NEXT: shll $13, %eax
|
||||
; AVX-NEXT: movq (%rdi), %rcx
|
||||
; AVX-NEXT: movq %rcx, %rdx
|
||||
; AVX-NEXT: shrq $51, %rdx
|
||||
; AVX-NEXT: orl %eax, %edx
|
||||
; AVX-NEXT: movq %rcx, %rax
|
||||
; AVX-NEXT: shrq $17, %rax
|
||||
; AVX-NEXT: andl $131071, %eax # imm = 0x1FFFF
|
||||
; AVX-NEXT: movl %ecx, %esi
|
||||
; AVX-NEXT: andl $131071, %esi # imm = 0x1FFFF
|
||||
; AVX-NEXT: vmovd %esi, %xmm0
|
||||
; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
||||
; AVX-NEXT: shrq $34, %rcx
|
||||
; AVX-NEXT: andl $131071, %ecx # imm = 0x1FFFF
|
||||
; AVX-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
; AVX1-LABEL: zext_4i17_to_4i32:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: movl 8(%rdi), %eax
|
||||
; AVX1-NEXT: shll $13, %eax
|
||||
; AVX1-NEXT: movq (%rdi), %rcx
|
||||
; AVX1-NEXT: movq %rcx, %rdx
|
||||
; AVX1-NEXT: shrq $51, %rdx
|
||||
; AVX1-NEXT: orl %eax, %edx
|
||||
; AVX1-NEXT: movq %rcx, %rax
|
||||
; AVX1-NEXT: shrq $17, %rax
|
||||
; AVX1-NEXT: vmovd %ecx, %xmm0
|
||||
; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: shrq $34, %rcx
|
||||
; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: zext_4i17_to_4i32:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: movl 8(%rdi), %eax
|
||||
; AVX2-NEXT: shll $13, %eax
|
||||
; AVX2-NEXT: movq (%rdi), %rcx
|
||||
; AVX2-NEXT: movq %rcx, %rdx
|
||||
; AVX2-NEXT: shrq $51, %rdx
|
||||
; AVX2-NEXT: orl %eax, %edx
|
||||
; AVX2-NEXT: movq %rcx, %rax
|
||||
; AVX2-NEXT: shrq $17, %rax
|
||||
; AVX2-NEXT: vmovd %ecx, %xmm0
|
||||
; AVX2-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: shrq $34, %rcx
|
||||
; AVX2-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [131071,131071,131071,131071]
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: zext_4i17_to_4i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: movl 8(%rdi), %eax
|
||||
; AVX512-NEXT: shll $13, %eax
|
||||
; AVX512-NEXT: movq (%rdi), %rcx
|
||||
; AVX512-NEXT: movq %rcx, %rdx
|
||||
; AVX512-NEXT: shrq $51, %rdx
|
||||
; AVX512-NEXT: orl %eax, %edx
|
||||
; AVX512-NEXT: movq %rcx, %rax
|
||||
; AVX512-NEXT: shrq $17, %rax
|
||||
; AVX512-NEXT: vmovd %ecx, %xmm0
|
||||
; AVX512-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
||||
; AVX512-NEXT: shrq $34, %rcx
|
||||
; AVX512-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [131071,131071,131071,131071]
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%a = load <4 x i17>, <4 x i17>* %ptr
|
||||
%b = zext <4 x i17> %a to <4 x i32>
|
||||
ret <4 x i32> %b
|
||||
|
|
|
@ -2299,58 +2299,49 @@ define <2 x i32> @zext_2i8_to_2i32(<2 x i8>* %addr) {
|
|||
define <4 x i32> @zext_4i17_to_4i32(<4 x i17>* %ptr) {
|
||||
; SSE2-LABEL: zext_4i17_to_4i32:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movl 8(%rdi), %eax
|
||||
; SSE2-NEXT: andl $15, %eax
|
||||
; SSE2-NEXT: shll $13, %eax
|
||||
; SSE2-NEXT: movq (%rdi), %rcx
|
||||
; SSE2-NEXT: movq %rcx, %rdx
|
||||
; SSE2-NEXT: shrq $51, %rdx
|
||||
; SSE2-NEXT: orl %eax, %edx
|
||||
; SSE2-NEXT: movd %edx, %xmm0
|
||||
; SSE2-NEXT: movq %rcx, %rax
|
||||
; SSE2-NEXT: shrq $34, %rax
|
||||
; SSE2-NEXT: andl $131071, %eax # imm = 0x1FFFF
|
||||
; SSE2-NEXT: movd %eax, %xmm1
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||
; SSE2-NEXT: movl %ecx, %eax
|
||||
; SSE2-NEXT: andl $131071, %eax # imm = 0x1FFFF
|
||||
; SSE2-NEXT: movq (%rdi), %rax
|
||||
; SSE2-NEXT: movd %eax, %xmm0
|
||||
; SSE2-NEXT: movq %rax, %rcx
|
||||
; SSE2-NEXT: shrq $17, %rcx
|
||||
; SSE2-NEXT: andl $131071, %ecx # imm = 0x1FFFF
|
||||
; SSE2-NEXT: movd %ecx, %xmm2
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; SSE2-NEXT: movd %ecx, %xmm1
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSE2-NEXT: movl 8(%rdi), %ecx
|
||||
; SSE2-NEXT: shll $13, %ecx
|
||||
; SSE2-NEXT: movq %rax, %rdx
|
||||
; SSE2-NEXT: shrq $51, %rdx
|
||||
; SSE2-NEXT: orl %ecx, %edx
|
||||
; SSE2-NEXT: movd %edx, %xmm1
|
||||
; SSE2-NEXT: shrq $34, %rax
|
||||
; SSE2-NEXT: movd %eax, %xmm2
|
||||
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
||||
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: zext_4i17_to_4i32:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movl 8(%rdi), %eax
|
||||
; SSSE3-NEXT: andl $15, %eax
|
||||
; SSSE3-NEXT: shll $13, %eax
|
||||
; SSSE3-NEXT: movq (%rdi), %rcx
|
||||
; SSSE3-NEXT: movq %rcx, %rdx
|
||||
; SSSE3-NEXT: shrq $51, %rdx
|
||||
; SSSE3-NEXT: orl %eax, %edx
|
||||
; SSSE3-NEXT: movd %edx, %xmm0
|
||||
; SSSE3-NEXT: movq %rcx, %rax
|
||||
; SSSE3-NEXT: shrq $34, %rax
|
||||
; SSSE3-NEXT: andl $131071, %eax # imm = 0x1FFFF
|
||||
; SSSE3-NEXT: movd %eax, %xmm1
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||
; SSSE3-NEXT: movl %ecx, %eax
|
||||
; SSSE3-NEXT: andl $131071, %eax # imm = 0x1FFFF
|
||||
; SSSE3-NEXT: movq (%rdi), %rax
|
||||
; SSSE3-NEXT: movd %eax, %xmm0
|
||||
; SSSE3-NEXT: movq %rax, %rcx
|
||||
; SSSE3-NEXT: shrq $17, %rcx
|
||||
; SSSE3-NEXT: andl $131071, %ecx # imm = 0x1FFFF
|
||||
; SSSE3-NEXT: movd %ecx, %xmm2
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; SSSE3-NEXT: movd %ecx, %xmm1
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
|
||||
; SSSE3-NEXT: movl 8(%rdi), %ecx
|
||||
; SSSE3-NEXT: shll $13, %ecx
|
||||
; SSSE3-NEXT: movq %rax, %rdx
|
||||
; SSSE3-NEXT: shrq $51, %rdx
|
||||
; SSSE3-NEXT: orl %ecx, %edx
|
||||
; SSSE3-NEXT: movd %edx, %xmm1
|
||||
; SSSE3-NEXT: shrq $34, %rax
|
||||
; SSSE3-NEXT: movd %eax, %xmm2
|
||||
; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
||||
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
|
||||
; SSSE3-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: zext_4i17_to_4i32:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movl 8(%rdi), %eax
|
||||
; SSE41-NEXT: andl $15, %eax
|
||||
; SSE41-NEXT: shll $13, %eax
|
||||
; SSE41-NEXT: movq (%rdi), %rcx
|
||||
; SSE41-NEXT: movq %rcx, %rdx
|
||||
|
@ -2358,38 +2349,69 @@ define <4 x i32> @zext_4i17_to_4i32(<4 x i17>* %ptr) {
|
|||
; SSE41-NEXT: orl %eax, %edx
|
||||
; SSE41-NEXT: movq %rcx, %rax
|
||||
; SSE41-NEXT: shrq $17, %rax
|
||||
; SSE41-NEXT: andl $131071, %eax # imm = 0x1FFFF
|
||||
; SSE41-NEXT: movl %ecx, %esi
|
||||
; SSE41-NEXT: andl $131071, %esi # imm = 0x1FFFF
|
||||
; SSE41-NEXT: movd %esi, %xmm0
|
||||
; SSE41-NEXT: movd %ecx, %xmm0
|
||||
; SSE41-NEXT: pinsrd $1, %eax, %xmm0
|
||||
; SSE41-NEXT: shrq $34, %rcx
|
||||
; SSE41-NEXT: andl $131071, %ecx # imm = 0x1FFFF
|
||||
; SSE41-NEXT: pinsrd $2, %ecx, %xmm0
|
||||
; SSE41-NEXT: pinsrd $3, %edx, %xmm0
|
||||
; SSE41-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: zext_4i17_to_4i32:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: movl 8(%rdi), %eax
|
||||
; AVX-NEXT: andl $15, %eax
|
||||
; AVX-NEXT: shll $13, %eax
|
||||
; AVX-NEXT: movq (%rdi), %rcx
|
||||
; AVX-NEXT: movq %rcx, %rdx
|
||||
; AVX-NEXT: shrq $51, %rdx
|
||||
; AVX-NEXT: orl %eax, %edx
|
||||
; AVX-NEXT: movq %rcx, %rax
|
||||
; AVX-NEXT: shrq $17, %rax
|
||||
; AVX-NEXT: andl $131071, %eax # imm = 0x1FFFF
|
||||
; AVX-NEXT: movl %ecx, %esi
|
||||
; AVX-NEXT: andl $131071, %esi # imm = 0x1FFFF
|
||||
; AVX-NEXT: vmovd %esi, %xmm0
|
||||
; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
||||
; AVX-NEXT: shrq $34, %rcx
|
||||
; AVX-NEXT: andl $131071, %ecx # imm = 0x1FFFF
|
||||
; AVX-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
; AVX1-LABEL: zext_4i17_to_4i32:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: movl 8(%rdi), %eax
|
||||
; AVX1-NEXT: shll $13, %eax
|
||||
; AVX1-NEXT: movq (%rdi), %rcx
|
||||
; AVX1-NEXT: movq %rcx, %rdx
|
||||
; AVX1-NEXT: shrq $51, %rdx
|
||||
; AVX1-NEXT: orl %eax, %edx
|
||||
; AVX1-NEXT: movq %rcx, %rax
|
||||
; AVX1-NEXT: shrq $17, %rax
|
||||
; AVX1-NEXT: vmovd %ecx, %xmm0
|
||||
; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
||||
; AVX1-NEXT: shrq $34, %rcx
|
||||
; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: zext_4i17_to_4i32:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: movl 8(%rdi), %eax
|
||||
; AVX2-NEXT: shll $13, %eax
|
||||
; AVX2-NEXT: movq (%rdi), %rcx
|
||||
; AVX2-NEXT: movq %rcx, %rdx
|
||||
; AVX2-NEXT: shrq $51, %rdx
|
||||
; AVX2-NEXT: orl %eax, %edx
|
||||
; AVX2-NEXT: movq %rcx, %rax
|
||||
; AVX2-NEXT: shrq $17, %rax
|
||||
; AVX2-NEXT: vmovd %ecx, %xmm0
|
||||
; AVX2-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
||||
; AVX2-NEXT: shrq $34, %rcx
|
||||
; AVX2-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [131071,131071,131071,131071]
|
||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: zext_4i17_to_4i32:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: movl 8(%rdi), %eax
|
||||
; AVX512-NEXT: shll $13, %eax
|
||||
; AVX512-NEXT: movq (%rdi), %rcx
|
||||
; AVX512-NEXT: movq %rcx, %rdx
|
||||
; AVX512-NEXT: shrq $51, %rdx
|
||||
; AVX512-NEXT: orl %eax, %edx
|
||||
; AVX512-NEXT: movq %rcx, %rax
|
||||
; AVX512-NEXT: shrq $17, %rax
|
||||
; AVX512-NEXT: vmovd %ecx, %xmm0
|
||||
; AVX512-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
|
||||
; AVX512-NEXT: shrq $34, %rcx
|
||||
; AVX512-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [131071,131071,131071,131071]
|
||||
; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%a = load <4 x i17>, <4 x i17>* %ptr
|
||||
%b = zext <4 x i17> %a to <4 x i32>
|
||||
ret <4 x i32> %b
|
||||
|
|
Loading…
Reference in New Issue