Rewrite areNonVolatileConsecutiveLoads to use BaseIndexOffset

Relanding after rewriting undef.ll test to avoid host-dependant
endianness.

As discussed in D34087, rewrite areNonVolatileConsecutiveLoads using
generic checks. Also, propagate missing local handling from there to
BaseIndexOffset checks.

Tests of note:

  * test/CodeGen/X86/build-vector* - Improved.
  * test/CodeGen/BPF/undef.ll - Improved store alignment allows an
    additional store merge

  * test/CodeGen/X86/clear_upper_vector_element_bits.ll - This is a
    case we already do not handle well. Here, the DAG is improved, but
    scheduling causes a code size degradation.

Reviewers: RKSimon, craig.topper, spatel, andreadb, filcab

Subscribers: nemanjai, llvm-commits

Differential Revision: https://reviews.llvm.org/D34472

llvm-svn: 307114
This commit is contained in:
Nirav Dave 2017-07-05 01:21:23 +00:00
parent ed37df7ea3
commit b320ef9fab
10 changed files with 187 additions and 256 deletions

View File

@ -34,6 +34,7 @@
#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h" #include "llvm/CodeGen/ValueTypes.h"
@ -7630,45 +7631,13 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
SDValue Loc = LD->getOperand(1); SDValue Loc = LD->getOperand(1);
SDValue BaseLoc = Base->getOperand(1); SDValue BaseLoc = Base->getOperand(1);
if (Loc.getOpcode() == ISD::FrameIndex) {
if (BaseLoc.getOpcode() != ISD::FrameIndex)
return false;
const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
int FS = MFI.getObjectSize(FI);
int BFS = MFI.getObjectSize(BFI);
if (FS != BFS || FS != (int)Bytes) return false;
return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
}
// Handle X + C. auto BaseLocDecomp = BaseIndexOffset::match(BaseLoc, *this);
if (isBaseWithConstantOffset(Loc)) { auto LocDecomp = BaseIndexOffset::match(Loc, *this);
int64_t LocOffset = cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
if (Loc.getOperand(0) == BaseLoc) { int64_t Offset = 0;
// If the base location is a simple address with no offset itself, then if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset))
// the second load's first add operand should be the base address. return (Dist * Bytes == Offset);
if (LocOffset == Dist * (int)Bytes)
return true;
} else if (isBaseWithConstantOffset(BaseLoc)) {
// The base location itself has an offset, so subtract that value from the
// second load's offset before comparing to distance * size.
int64_t BOffset =
cast<ConstantSDNode>(BaseLoc.getOperand(1))->getSExtValue();
if (Loc.getOperand(0) == BaseLoc.getOperand(0)) {
if ((LocOffset - BOffset) == Dist * (int)Bytes)
return true;
}
}
}
const GlobalValue *GV1 = nullptr;
const GlobalValue *GV2 = nullptr;
int64_t Offset1 = 0;
int64_t Offset2 = 0;
bool isGA1 = TLI->isGAPlusOffset(Loc.getNode(), GV1, Offset1);
bool isGA2 = TLI->isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
if (isGA1 && isGA2 && GV1 == GV2)
return Offset1 == (Offset2 + Dist*Bytes);
return false; return false;
} }

View File

@ -60,12 +60,18 @@ BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) {
int64_t Offset = 0; int64_t Offset = 0;
bool IsIndexSignExt = false; bool IsIndexSignExt = false;
// Consume constant adds // Consume constant adds & ors with appropriate masking.
while (Base->getOpcode() == ISD::ADD && while (Base->getOpcode() == ISD::ADD || Base->getOpcode() == ISD::OR) {
isa<ConstantSDNode>(Base->getOperand(1))) { if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) {
int64_t POffset = cast<ConstantSDNode>(Base->getOperand(1))->getSExtValue(); // Only consider ORs which act as adds.
Offset += POffset; if (Base->getOpcode() == ISD::OR &&
Base = Base->getOperand(0); !DAG.MaskedValueIsZero(Base->getOperand(0), C->getAPIntValue()))
break;
Offset += C->getSExtValue();
Base = Base->getOperand(0);
continue;
}
break;
} }
if (Base->getOpcode() == ISD::ADD) { if (Base->getOpcode() == ISD::ADD) {

View File

@ -1,4 +1,5 @@
; RUN: not llc < %s -march=bpf | FileCheck %s ; RUN: not llc < %s -march=bpfel | FileCheck -check-prefixes=CHECK,EL %s
; RUN: not llc < %s -march=bpfeb | FileCheck -check-prefixes=CHECK,EB %s
%struct.bpf_map_def = type { i32, i32, i32, i32 } %struct.bpf_map_def = type { i32, i32, i32, i32 }
%struct.__sk_buff = type opaque %struct.__sk_buff = type opaque
@ -13,36 +14,31 @@
; Function Attrs: nounwind uwtable ; Function Attrs: nounwind uwtable
define i32 @ebpf_filter(%struct.__sk_buff* nocapture readnone %ebpf_packet) #0 section "socket1" { define i32 @ebpf_filter(%struct.__sk_buff* nocapture readnone %ebpf_packet) #0 section "socket1" {
; CHECK: r2 = r10 ; CHECK: r1 = r10
; CHECK: r2 += -2 ; CHECK: r1 += -2
; CHECK: r1 = 0 ; CHECK: r2 = 0
; CHECK: *(u16 *)(r2 + 6) = r1 ; CHECK: *(u16 *)(r1 + 6) = r2
; CHECK: *(u16 *)(r2 + 4) = r1 ; CHECK: *(u16 *)(r1 + 4) = r2
; CHECK: *(u16 *)(r2 + 2) = r1 ; CHECK: *(u16 *)(r1 + 2) = r2
; CHECK: r2 = 6 ; EL: r1 = 134678021
; CHECK: *(u8 *)(r10 - 7) = r2 ; EB: r1 = 84281096
; CHECK: r2 = 5 ; CHECK: *(u32 *)(r10 - 8) = r1
; CHECK: *(u8 *)(r10 - 8) = r2 ; CHECK: r1 = 9
; CHECK: r2 = 7 ; CHECK: *(u8 *)(r10 - 4) = r1
; CHECK: *(u8 *)(r10 - 6) = r2 ; CHECK: r1 = 10
; CHECK: r2 = 8 ; CHECK: *(u8 *)(r10 - 3) = r1
; CHECK: *(u8 *)(r10 - 5) = r2 ; CHECK: *(u16 *)(r10 + 24) = r2
; CHECK: r2 = 9 ; CHECK: *(u16 *)(r10 + 22) = r2
; CHECK: *(u8 *)(r10 - 4) = r2 ; CHECK: *(u16 *)(r10 + 20) = r2
; CHECK: r2 = 10 ; CHECK: *(u16 *)(r10 + 18) = r2
; CHECK: *(u8 *)(r10 - 3) = r2 ; CHECK: *(u16 *)(r10 + 16) = r2
; CHECK: *(u16 *)(r10 + 24) = r1 ; CHECK: *(u16 *)(r10 + 14) = r2
; CHECK: *(u16 *)(r10 + 22) = r1 ; CHECK: *(u16 *)(r10 + 12) = r2
; CHECK: *(u16 *)(r10 + 20) = r1 ; CHECK: *(u16 *)(r10 + 10) = r2
; CHECK: *(u16 *)(r10 + 18) = r1 ; CHECK: *(u16 *)(r10 + 8) = r2
; CHECK: *(u16 *)(r10 + 16) = r1 ; CHECK: *(u16 *)(r10 + 6) = r2
; CHECK: *(u16 *)(r10 + 14) = r1 ; CHECK: *(u16 *)(r10 - 2) = r2
; CHECK: *(u16 *)(r10 + 12) = r1 ; CHECK: *(u16 *)(r10 + 26) = r2
; CHECK: *(u16 *)(r10 + 10) = r1
; CHECK: *(u16 *)(r10 + 8) = r1
; CHECK: *(u16 *)(r10 + 6) = r1
; CHECK: *(u16 *)(r10 - 2) = r1
; CHECK: *(u16 *)(r10 + 26) = r1
; CHECK: r2 = r10 ; CHECK: r2 = r10
; CHECK: r2 += -8 ; CHECK: r2 += -8
; CHECK: r1 = <MCOperand Expr:(routing)>ll ; CHECK: r1 = <MCOperand Expr:(routing)>ll

View File

@ -64,6 +64,6 @@ entry:
%0 = load i16, i16* %retval ; <i16> [#uses=1] %0 = load i16, i16* %retval ; <i16> [#uses=1]
ret i16 %0 ret i16 %0
; CHECK-LABEL: mov2: ; CHECK-LABEL: mov2:
; CHECK: mov.w 0(r1), 4(r1) ; CHECK-DAG: mov.w 2(r1), 6(r1)
; CHECK: mov.w 2(r1), 6(r1) ; CHECK-DAG: mov.w 0(r1), 4(r1)
} }

View File

@ -71,8 +71,8 @@ define i128 @bswap_i128(i128 %a0) nounwind {
; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-MOVBE-NEXT: movbel %esi, 12(%eax) ; X86-MOVBE-NEXT: movbel %esi, 12(%eax)
; X86-MOVBE-NEXT: movbel %edi, 8(%eax) ; X86-MOVBE-NEXT: movbel %edi, 8(%eax)
; X86-MOVBE-NEXT: movbel %ecx, 4(%eax) ; X86-MOVBE-NEXT: movbel %edx, 4(%eax)
; X86-MOVBE-NEXT: movbel %edx, (%eax) ; X86-MOVBE-NEXT: movbel %ecx, (%eax)
; X86-MOVBE-NEXT: popl %esi ; X86-MOVBE-NEXT: popl %esi
; X86-MOVBE-NEXT: popl %edi ; X86-MOVBE-NEXT: popl %edi
; X86-MOVBE-NEXT: retl $4 ; X86-MOVBE-NEXT: retl $4

View File

@ -72,12 +72,10 @@ define <4 x float> @test_buildvector_v4f32(float %a0, float %a1, float %a2, floa
} }
define <2 x i64> @test_buildvector_v2i64(i64 %a0, i64 %a1) { define <2 x i64> @test_buildvector_v2i64(i64 %a0, i64 %a1) {
; SSE2-32-LABEL: test_buildvector_v2i64: ; SSE-32-LABEL: test_buildvector_v2i64:
; SSE2-32: # BB#0: ; SSE-32: # BB#0:
; SSE2-32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero ; SSE-32-NEXT: movups {{[0-9]+}}(%esp), %xmm0
; SSE2-32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; SSE-32-NEXT: retl
; SSE2-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-32-NEXT: retl
; ;
; SSE-64-LABEL: test_buildvector_v2i64: ; SSE-64-LABEL: test_buildvector_v2i64:
; SSE-64: # BB#0: ; SSE-64: # BB#0:
@ -86,20 +84,9 @@ define <2 x i64> @test_buildvector_v2i64(i64 %a0, i64 %a1) {
; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-64-NEXT: retq ; SSE-64-NEXT: retq
; ;
; SSE41-32-LABEL: test_buildvector_v2i64:
; SSE41-32: # BB#0:
; SSE41-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE41-32-NEXT: pinsrd $1, {{[0-9]+}}(%esp), %xmm0
; SSE41-32-NEXT: pinsrd $2, {{[0-9]+}}(%esp), %xmm0
; SSE41-32-NEXT: pinsrd $3, {{[0-9]+}}(%esp), %xmm0
; SSE41-32-NEXT: retl
;
; AVX-32-LABEL: test_buildvector_v2i64: ; AVX-32-LABEL: test_buildvector_v2i64:
; AVX-32: # BB#0: ; AVX-32: # BB#0:
; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT: retl ; AVX-32-NEXT: retl
; ;
; AVX-64-LABEL: test_buildvector_v2i64: ; AVX-64-LABEL: test_buildvector_v2i64:

View File

@ -51,18 +51,10 @@ define <8 x float> @test_buildvector_v8f32(float %a0, float %a1, float %a2, floa
} }
define <4 x i64> @test_buildvector_v4i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) { define <4 x i64> @test_buildvector_v4i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) {
; AVX1-32-LABEL: test_buildvector_v4i64: ; AVX-32-LABEL: test_buildvector_v4i64:
; AVX1-32: # BB#0: ; AVX-32: # BB#0:
; AVX1-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-32-NEXT: vmovups {{[0-9]+}}(%esp), %ymm0
; AVX1-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX-32-NEXT: retl
; AVX1-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX1-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX1-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX1-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
; AVX1-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
; AVX1-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
; AVX1-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-32-NEXT: retl
; ;
; AVX1-64-LABEL: test_buildvector_v4i64: ; AVX1-64-LABEL: test_buildvector_v4i64:
; AVX1-64: # BB#0: ; AVX1-64: # BB#0:
@ -75,19 +67,6 @@ define <4 x i64> @test_buildvector_v4i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) {
; AVX1-64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-64-NEXT: retq ; AVX1-64-NEXT: retq
; ;
; AVX2-32-LABEL: test_buildvector_v4i64:
; AVX2-32: # BB#0:
; AVX2-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX2-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX2-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX2-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX2-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX2-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
; AVX2-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
; AVX2-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
; AVX2-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-32-NEXT: retl
;
; AVX2-64-LABEL: test_buildvector_v4i64: ; AVX2-64-LABEL: test_buildvector_v4i64:
; AVX2-64: # BB#0: ; AVX2-64: # BB#0:
; AVX2-64-NEXT: vmovq %rcx, %xmm0 ; AVX2-64-NEXT: vmovq %rcx, %xmm0

View File

@ -79,25 +79,7 @@ define <16 x float> @test_buildvector_v16f32(float %a0, float %a1, float %a2, fl
define <8 x i64> @test_buildvector_v8i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7) { define <8 x i64> @test_buildvector_v8i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7) {
; AVX-32-LABEL: test_buildvector_v8i64: ; AVX-32-LABEL: test_buildvector_v8i64:
; AVX-32: # BB#0: ; AVX-32: # BB#0:
; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-32-NEXT: vmovups {{[0-9]+}}(%esp), %zmm0
; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
; AVX-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1
; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1
; AVX-32-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm2, %xmm2
; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm2, %xmm2
; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm2, %xmm2
; AVX-32-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; AVX-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX-32-NEXT: retl ; AVX-32-NEXT: retl
; ;
; AVX-64-LABEL: test_buildvector_v8i64: ; AVX-64-LABEL: test_buildvector_v8i64:

View File

@ -1063,87 +1063,89 @@ define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind {
; ;
; AVX1-LABEL: _clearupper32xi8b: ; AVX1-LABEL: _clearupper32xi8b:
; AVX1: # BB#0: ; AVX1: # BB#0:
; AVX1-NEXT: pushq %rbp
; AVX1-NEXT: pushq %r15
; AVX1-NEXT: pushq %r14 ; AVX1-NEXT: pushq %r14
; AVX1-NEXT: pushq %r13
; AVX1-NEXT: pushq %r12
; AVX1-NEXT: pushq %rbx ; AVX1-NEXT: pushq %rbx
; AVX1-NEXT: vpextrq $1, %xmm0, -{{[0-9]+}}(%rsp) ; AVX1-NEXT: vmovq %xmm0, %rcx
; AVX1-NEXT: vmovq %xmm0, -{{[0-9]+}}(%rsp) ; AVX1-NEXT: movq %rcx, %r8
; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %r14 ; AVX1-NEXT: movq %rcx, %r9
; AVX1-NEXT: movq %rcx, %r10
; AVX1-NEXT: movq %rcx, %r11
; AVX1-NEXT: movq %rcx, %r14
; AVX1-NEXT: movq %rcx, %r15
; AVX1-NEXT: vpextrq $1, %xmm0, %rdx ; AVX1-NEXT: vpextrq $1, %xmm0, %rdx
; AVX1-NEXT: movq %rdx, %r8 ; AVX1-NEXT: movq %rdx, %r12
; AVX1-NEXT: movq %rdx, %r9 ; AVX1-NEXT: movq %rdx, %r13
; AVX1-NEXT: movq %rdx, %r11 ; AVX1-NEXT: movq %rdx, %rbx
; AVX1-NEXT: movq %rdx, %rsi
; AVX1-NEXT: movq %rdx, %rdi
; AVX1-NEXT: movq %rdx, %rcx
; AVX1-NEXT: movq %rdx, %rax ; AVX1-NEXT: movq %rdx, %rax
; AVX1-NEXT: movq %rdx, %rdi
; AVX1-NEXT: movq %rdx, %rsi
; AVX1-NEXT: movq %rdx, %rbp
; AVX1-NEXT: andb $15, %dl ; AVX1-NEXT: andb $15, %dl
; AVX1-NEXT: movb %dl, -{{[0-9]+}}(%rsp) ; AVX1-NEXT: movb %dl, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: shrq $56, %rax ; AVX1-NEXT: movq %rcx, %rdx
; AVX1-NEXT: andb $15, %al
; AVX1-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: movq %r14, %r10
; AVX1-NEXT: shrq $48, %rcx
; AVX1-NEXT: andb $15, %cl ; AVX1-NEXT: andb $15, %cl
; AVX1-NEXT: movb %cl, -{{[0-9]+}}(%rsp) ; AVX1-NEXT: movb %cl, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: movq %r14, %rdx ; AVX1-NEXT: shrq $56, %rbp
; AVX1-NEXT: andb $15, %bpl
; AVX1-NEXT: movb %bpl, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: shrq $48, %rsi
; AVX1-NEXT: andb $15, %sil
; AVX1-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: shrq $40, %rdi ; AVX1-NEXT: shrq $40, %rdi
; AVX1-NEXT: andb $15, %dil ; AVX1-NEXT: andb $15, %dil
; AVX1-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; AVX1-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: movq %r14, %rax ; AVX1-NEXT: shrq $32, %rax
; AVX1-NEXT: shrq $32, %rsi
; AVX1-NEXT: andb $15, %sil
; AVX1-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: movq %r14, %rcx
; AVX1-NEXT: shrq $24, %r11
; AVX1-NEXT: andb $15, %r11b
; AVX1-NEXT: movb %r11b, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: movq %r14, %rsi
; AVX1-NEXT: shrq $16, %r9
; AVX1-NEXT: andb $15, %r9b
; AVX1-NEXT: movb %r9b, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: movq %r14, %rdi
; AVX1-NEXT: shrq $8, %r8
; AVX1-NEXT: andb $15, %r8b
; AVX1-NEXT: movb %r8b, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: movq %r14, %rbx
; AVX1-NEXT: andb $15, %r14b
; AVX1-NEXT: movb %r14b, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: shrq $8, %r10
; AVX1-NEXT: shrq $16, %rdx
; AVX1-NEXT: shrq $24, %rax
; AVX1-NEXT: shrq $32, %rcx
; AVX1-NEXT: shrq $40, %rsi
; AVX1-NEXT: shrq $48, %rdi
; AVX1-NEXT: shrq $56, %rbx
; AVX1-NEXT: andb $15, %bl
; AVX1-NEXT: movb %bl, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: andb $15, %dil
; AVX1-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: andb $15, %sil
; AVX1-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: andb $15, %cl
; AVX1-NEXT: movb %cl, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: andb $15, %al ; AVX1-NEXT: andb $15, %al
; AVX1-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; AVX1-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: shrq $24, %rbx
; AVX1-NEXT: andb $15, %bl
; AVX1-NEXT: movb %bl, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: shrq $16, %r13
; AVX1-NEXT: andb $15, %r13b
; AVX1-NEXT: movb %r13b, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: shrq $8, %r12
; AVX1-NEXT: andb $15, %r12b
; AVX1-NEXT: movb %r12b, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: shrq $8, %r8
; AVX1-NEXT: shrq $16, %r9
; AVX1-NEXT: shrq $24, %r10
; AVX1-NEXT: shrq $32, %r11
; AVX1-NEXT: shrq $40, %r14
; AVX1-NEXT: shrq $48, %r15
; AVX1-NEXT: shrq $56, %rdx
; AVX1-NEXT: andb $15, %dl ; AVX1-NEXT: andb $15, %dl
; AVX1-NEXT: movb %dl, -{{[0-9]+}}(%rsp) ; AVX1-NEXT: movb %dl, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: andb $15, %r15b
; AVX1-NEXT: movb %r15b, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: andb $15, %r14b
; AVX1-NEXT: movb %r14b, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: andb $15, %r11b
; AVX1-NEXT: movb %r11b, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: andb $15, %r10b ; AVX1-NEXT: andb $15, %r10b
; AVX1-NEXT: movb %r10b, -{{[0-9]+}}(%rsp) ; AVX1-NEXT: movb %r10b, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: andb $15, %r9b
; AVX1-NEXT: movb %r9b, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: andb $15, %r8b
; AVX1-NEXT: movb %r8b, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: movq %rax, %r8 ; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: movq %rax, %rdx ; AVX1-NEXT: movq %rax, %rdx
; AVX1-NEXT: movq %rax, %rsi ; AVX1-NEXT: movq %rax, %rsi
; AVX1-NEXT: movq %rax, %rdi ; AVX1-NEXT: movq %rax, %rdi
; AVX1-NEXT: movl %eax, %ebp
; AVX1-NEXT: movl %eax, %ebx ; AVX1-NEXT: movl %eax, %ebx
; AVX1-NEXT: movl %eax, %ecx
; AVX1-NEXT: vmovd %eax, %xmm1 ; AVX1-NEXT: vmovd %eax, %xmm1
; AVX1-NEXT: shrl $8, %eax ; AVX1-NEXT: shrl $8, %eax
; AVX1-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 ; AVX1-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; AVX1-NEXT: shrl $16, %ecx ; AVX1-NEXT: shrl $16, %ebx
; AVX1-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 ; AVX1-NEXT: vpinsrb $2, %ebx, %xmm1, %xmm1
; AVX1-NEXT: shrl $24, %ebx ; AVX1-NEXT: shrl $24, %ebp
; AVX1-NEXT: vpinsrb $3, %ebx, %xmm1, %xmm1 ; AVX1-NEXT: vpinsrb $3, %ebp, %xmm1, %xmm1
; AVX1-NEXT: shrq $32, %rdi ; AVX1-NEXT: shrq $32, %rdi
; AVX1-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1 ; AVX1-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1
; AVX1-NEXT: shrq $40, %rsi ; AVX1-NEXT: shrq $40, %rsi
@ -1153,8 +1155,8 @@ define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind {
; AVX1-NEXT: shrq $48, %rdx ; AVX1-NEXT: shrq $48, %rdx
; AVX1-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1 ; AVX1-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1
; AVX1-NEXT: vpextrq $1, %xmm0, %rax ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-NEXT: shrq $56, %r8 ; AVX1-NEXT: shrq $56, %rcx
; AVX1-NEXT: vpinsrb $7, %r8d, %xmm1, %xmm0 ; AVX1-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm0
; AVX1-NEXT: movl %eax, %ecx ; AVX1-NEXT: movl %eax, %ecx
; AVX1-NEXT: shrl $8, %ecx ; AVX1-NEXT: shrl $8, %ecx
; AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ; AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
@ -1222,92 +1224,98 @@ define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind {
; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 ; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: popq %rbx ; AVX1-NEXT: popq %rbx
; AVX1-NEXT: popq %r12
; AVX1-NEXT: popq %r13
; AVX1-NEXT: popq %r14 ; AVX1-NEXT: popq %r14
; AVX1-NEXT: popq %r15
; AVX1-NEXT: popq %rbp
; AVX1-NEXT: retq ; AVX1-NEXT: retq
; ;
; AVX2-LABEL: _clearupper32xi8b: ; AVX2-LABEL: _clearupper32xi8b:
; AVX2: # BB#0: ; AVX2: # BB#0:
; AVX2-NEXT: pushq %rbp
; AVX2-NEXT: pushq %r15
; AVX2-NEXT: pushq %r14 ; AVX2-NEXT: pushq %r14
; AVX2-NEXT: pushq %r13
; AVX2-NEXT: pushq %r12
; AVX2-NEXT: pushq %rbx ; AVX2-NEXT: pushq %rbx
; AVX2-NEXT: vpextrq $1, %xmm0, -{{[0-9]+}}(%rsp) ; AVX2-NEXT: vmovq %xmm0, %rcx
; AVX2-NEXT: vmovq %xmm0, -{{[0-9]+}}(%rsp) ; AVX2-NEXT: movq %rcx, %r8
; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %r14 ; AVX2-NEXT: movq %rcx, %r9
; AVX2-NEXT: movq %rcx, %r10
; AVX2-NEXT: movq %rcx, %r11
; AVX2-NEXT: movq %rcx, %r14
; AVX2-NEXT: movq %rcx, %r15
; AVX2-NEXT: vpextrq $1, %xmm0, %rdx ; AVX2-NEXT: vpextrq $1, %xmm0, %rdx
; AVX2-NEXT: movq %rdx, %r8 ; AVX2-NEXT: movq %rdx, %r12
; AVX2-NEXT: movq %rdx, %r9 ; AVX2-NEXT: movq %rdx, %r13
; AVX2-NEXT: movq %rdx, %r11 ; AVX2-NEXT: movq %rdx, %rbx
; AVX2-NEXT: movq %rdx, %rsi
; AVX2-NEXT: movq %rdx, %rdi
; AVX2-NEXT: movq %rdx, %rcx
; AVX2-NEXT: movq %rdx, %rax ; AVX2-NEXT: movq %rdx, %rax
; AVX2-NEXT: movq %rdx, %rdi
; AVX2-NEXT: movq %rdx, %rsi
; AVX2-NEXT: movq %rdx, %rbp
; AVX2-NEXT: andb $15, %dl ; AVX2-NEXT: andb $15, %dl
; AVX2-NEXT: movb %dl, -{{[0-9]+}}(%rsp) ; AVX2-NEXT: movb %dl, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: shrq $56, %rax ; AVX2-NEXT: movq %rcx, %rdx
; AVX2-NEXT: andb $15, %al
; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %r14, %r10
; AVX2-NEXT: shrq $48, %rcx
; AVX2-NEXT: andb $15, %cl ; AVX2-NEXT: andb $15, %cl
; AVX2-NEXT: movb %cl, -{{[0-9]+}}(%rsp) ; AVX2-NEXT: movb %cl, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %r14, %rdx ; AVX2-NEXT: shrq $56, %rbp
; AVX2-NEXT: andb $15, %bpl
; AVX2-NEXT: movb %bpl, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: shrq $48, %rsi
; AVX2-NEXT: andb $15, %sil
; AVX2-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: shrq $40, %rdi ; AVX2-NEXT: shrq $40, %rdi
; AVX2-NEXT: andb $15, %dil ; AVX2-NEXT: andb $15, %dil
; AVX2-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; AVX2-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %r14, %rax ; AVX2-NEXT: shrq $32, %rax
; AVX2-NEXT: shrq $32, %rsi
; AVX2-NEXT: andb $15, %sil
; AVX2-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %r14, %rcx
; AVX2-NEXT: shrq $24, %r11
; AVX2-NEXT: andb $15, %r11b
; AVX2-NEXT: movb %r11b, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %r14, %rsi
; AVX2-NEXT: shrq $16, %r9
; AVX2-NEXT: andb $15, %r9b
; AVX2-NEXT: movb %r9b, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %r14, %rdi
; AVX2-NEXT: shrq $8, %r8
; AVX2-NEXT: andb $15, %r8b
; AVX2-NEXT: movb %r8b, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %r14, %rbx
; AVX2-NEXT: andb $15, %r14b
; AVX2-NEXT: movb %r14b, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: shrq $8, %r10
; AVX2-NEXT: shrq $16, %rdx
; AVX2-NEXT: shrq $24, %rax
; AVX2-NEXT: shrq $32, %rcx
; AVX2-NEXT: shrq $40, %rsi
; AVX2-NEXT: shrq $48, %rdi
; AVX2-NEXT: shrq $56, %rbx
; AVX2-NEXT: andb $15, %bl
; AVX2-NEXT: movb %bl, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: andb $15, %dil
; AVX2-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: andb $15, %sil
; AVX2-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: andb $15, %cl
; AVX2-NEXT: movb %cl, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: andb $15, %al ; AVX2-NEXT: andb $15, %al
; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: shrq $24, %rbx
; AVX2-NEXT: andb $15, %bl
; AVX2-NEXT: movb %bl, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: shrq $16, %r13
; AVX2-NEXT: andb $15, %r13b
; AVX2-NEXT: movb %r13b, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: shrq $8, %r12
; AVX2-NEXT: andb $15, %r12b
; AVX2-NEXT: movb %r12b, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: shrq $8, %r8
; AVX2-NEXT: shrq $16, %r9
; AVX2-NEXT: shrq $24, %r10
; AVX2-NEXT: shrq $32, %r11
; AVX2-NEXT: shrq $40, %r14
; AVX2-NEXT: shrq $48, %r15
; AVX2-NEXT: shrq $56, %rdx
; AVX2-NEXT: andb $15, %dl ; AVX2-NEXT: andb $15, %dl
; AVX2-NEXT: movb %dl, -{{[0-9]+}}(%rsp) ; AVX2-NEXT: movb %dl, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: andb $15, %r15b
; AVX2-NEXT: movb %r15b, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: andb $15, %r14b
; AVX2-NEXT: movb %r14b, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: andb $15, %r11b
; AVX2-NEXT: movb %r11b, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: andb $15, %r10b ; AVX2-NEXT: andb $15, %r10b
; AVX2-NEXT: movb %r10b, -{{[0-9]+}}(%rsp) ; AVX2-NEXT: movb %r10b, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: andb $15, %r9b
; AVX2-NEXT: movb %r9b, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: andb $15, %r8b
; AVX2-NEXT: movb %r8b, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: movq %rax, %r8 ; AVX2-NEXT: movq %rax, %rcx
; AVX2-NEXT: movq %rax, %rdx ; AVX2-NEXT: movq %rax, %rdx
; AVX2-NEXT: movq %rax, %rsi ; AVX2-NEXT: movq %rax, %rsi
; AVX2-NEXT: movq %rax, %rdi ; AVX2-NEXT: movq %rax, %rdi
; AVX2-NEXT: movl %eax, %ebp
; AVX2-NEXT: movl %eax, %ebx ; AVX2-NEXT: movl %eax, %ebx
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: vmovd %eax, %xmm1 ; AVX2-NEXT: vmovd %eax, %xmm1
; AVX2-NEXT: shrl $8, %eax ; AVX2-NEXT: shrl $8, %eax
; AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 ; AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; AVX2-NEXT: shrl $16, %ecx ; AVX2-NEXT: shrl $16, %ebx
; AVX2-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 ; AVX2-NEXT: vpinsrb $2, %ebx, %xmm1, %xmm1
; AVX2-NEXT: shrl $24, %ebx ; AVX2-NEXT: shrl $24, %ebp
; AVX2-NEXT: vpinsrb $3, %ebx, %xmm1, %xmm1 ; AVX2-NEXT: vpinsrb $3, %ebp, %xmm1, %xmm1
; AVX2-NEXT: shrq $32, %rdi ; AVX2-NEXT: shrq $32, %rdi
; AVX2-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1 ; AVX2-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1
; AVX2-NEXT: shrq $40, %rsi ; AVX2-NEXT: shrq $40, %rsi
@ -1317,8 +1325,8 @@ define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind {
; AVX2-NEXT: shrq $48, %rdx ; AVX2-NEXT: shrq $48, %rdx
; AVX2-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1 ; AVX2-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1
; AVX2-NEXT: vpextrq $1, %xmm0, %rax ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
; AVX2-NEXT: shrq $56, %r8 ; AVX2-NEXT: shrq $56, %rcx
; AVX2-NEXT: vpinsrb $7, %r8d, %xmm1, %xmm0 ; AVX2-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm0
; AVX2-NEXT: movl %eax, %ecx ; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: shrl $8, %ecx ; AVX2-NEXT: shrl $8, %ecx
; AVX2-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ; AVX2-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
@ -1386,7 +1394,11 @@ define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind {
; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 ; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: popq %rbx ; AVX2-NEXT: popq %rbx
; AVX2-NEXT: popq %r12
; AVX2-NEXT: popq %r13
; AVX2-NEXT: popq %r14 ; AVX2-NEXT: popq %r14
; AVX2-NEXT: popq %r15
; AVX2-NEXT: popq %rbp
; AVX2-NEXT: retq ; AVX2-NEXT: retq
%x4 = bitcast <32 x i8> %0 to <64 x i4> %x4 = bitcast <32 x i8> %0 to <64 x i4>
%r0 = insertelement <64 x i4> %x4, i4 zeroinitializer, i32 1 %r0 = insertelement <64 x i4> %x4, i4 zeroinitializer, i32 1

View File

@ -101,8 +101,8 @@ define i32 @test_wide(i128 %a, i128 %b) {
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %edx
; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %esi ; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %esi
; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: jge .LBB4_2 ; CHECK-NEXT: jge .LBB4_2
; CHECK-NEXT: # BB#1: # %bb1 ; CHECK-NEXT: # BB#1: # %bb1
; CHECK-NEXT: movl $1, %eax ; CHECK-NEXT: movl $1, %eax