llvm-project/llvm/test/CodeGen/X86/avoid-sfb.ll

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1529 lines
58 KiB
LLVM
Raw Normal View History

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-linux -mcpu=x86-64 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK
; RUN: llc < %s -mtriple=x86_64-linux -mcpu=x86-64 --x86-disable-avoid-SFB -verify-machineinstrs | FileCheck %s --check-prefix=DISABLED
; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core-avx2 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX2
; RUN: llc < %s -mtriple=x86_64-linux -mcpu=skx -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX512
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%struct.S = type { i32, i32, i32, i32 }
; Function Attrs: nounwind uwtable
define void @test_conditional_block(%struct.S* nocapture noalias %s1 , %struct.S* nocapture noalias %s2, i32 %x, %struct.S* nocapture noalias %s3, %struct.S* nocapture noalias readonly %s4) local_unnamed_addr #0 {
; CHECK-LABEL: test_conditional_block:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpl $18, %edx
; CHECK-NEXT: jl .LBB0_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: movl %edx, 4(%rdi)
; CHECK-NEXT: .LBB0_2: # %if.end
; CHECK-NEXT: movups (%r8), %xmm0
; CHECK-NEXT: movups %xmm0, (%rcx)
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: movl %eax, (%rsi)
; CHECK-NEXT: movl 4(%rdi), %eax
; CHECK-NEXT: movl %eax, 4(%rsi)
; CHECK-NEXT: movq 8(%rdi), %rax
; CHECK-NEXT: movq %rax, 8(%rsi)
; CHECK-NEXT: retq
;
; DISABLED-LABEL: test_conditional_block:
; DISABLED: # %bb.0: # %entry
; DISABLED-NEXT: cmpl $18, %edx
; DISABLED-NEXT: jl .LBB0_2
; DISABLED-NEXT: # %bb.1: # %if.then
; DISABLED-NEXT: movl %edx, 4(%rdi)
; DISABLED-NEXT: .LBB0_2: # %if.end
; DISABLED-NEXT: movups (%r8), %xmm0
; DISABLED-NEXT: movups %xmm0, (%rcx)
; DISABLED-NEXT: movups (%rdi), %xmm0
; DISABLED-NEXT: movups %xmm0, (%rsi)
; DISABLED-NEXT: retq
;
; CHECK-AVX2-LABEL: test_conditional_block:
; CHECK-AVX2: # %bb.0: # %entry
; CHECK-AVX2-NEXT: cmpl $18, %edx
; CHECK-AVX2-NEXT: jl .LBB0_2
; CHECK-AVX2-NEXT: # %bb.1: # %if.then
; CHECK-AVX2-NEXT: movl %edx, 4(%rdi)
; CHECK-AVX2-NEXT: .LBB0_2: # %if.end
; CHECK-AVX2-NEXT: vmovups (%r8), %xmm0
; CHECK-AVX2-NEXT: vmovups %xmm0, (%rcx)
; CHECK-AVX2-NEXT: movl (%rdi), %eax
; CHECK-AVX2-NEXT: movl %eax, (%rsi)
; CHECK-AVX2-NEXT: movl 4(%rdi), %eax
; CHECK-AVX2-NEXT: movl %eax, 4(%rsi)
; CHECK-AVX2-NEXT: movq 8(%rdi), %rax
; CHECK-AVX2-NEXT: movq %rax, 8(%rsi)
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX512-LABEL: test_conditional_block:
; CHECK-AVX512: # %bb.0: # %entry
; CHECK-AVX512-NEXT: cmpl $18, %edx
; CHECK-AVX512-NEXT: jl .LBB0_2
; CHECK-AVX512-NEXT: # %bb.1: # %if.then
; CHECK-AVX512-NEXT: movl %edx, 4(%rdi)
; CHECK-AVX512-NEXT: .LBB0_2: # %if.end
; CHECK-AVX512-NEXT: vmovups (%r8), %xmm0
; CHECK-AVX512-NEXT: vmovups %xmm0, (%rcx)
; CHECK-AVX512-NEXT: movl (%rdi), %eax
; CHECK-AVX512-NEXT: movl %eax, (%rsi)
; CHECK-AVX512-NEXT: movl 4(%rdi), %eax
; CHECK-AVX512-NEXT: movl %eax, 4(%rsi)
; CHECK-AVX512-NEXT: movq 8(%rdi), %rax
; CHECK-AVX512-NEXT: movq %rax, 8(%rsi)
; CHECK-AVX512-NEXT: retq
entry:
%cmp = icmp sgt i32 %x, 17
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
%b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1
store i32 %x, i32* %b, align 4
br label %if.end
if.end: ; preds = %if.then, %entry
%0 = bitcast %struct.S* %s3 to i8*
%1 = bitcast %struct.S* %s4 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
%2 = bitcast %struct.S* %s2 to i8*
%3 = bitcast %struct.S* %s1 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false)
ret void
}
; Function Attrs: nounwind uwtable
define void @test_imm_store(%struct.S* nocapture noalias %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3) local_unnamed_addr #0 {
; CHECK-LABEL: test_imm_store:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl $0, (%rdi)
; CHECK-NEXT: movl $1, (%rcx)
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: movl %eax, (%rsi)
; CHECK-NEXT: movq 4(%rdi), %rax
; CHECK-NEXT: movq %rax, 4(%rsi)
; CHECK-NEXT: movl 12(%rdi), %eax
; CHECK-NEXT: movl %eax, 12(%rsi)
; CHECK-NEXT: retq
;
; DISABLED-LABEL: test_imm_store:
; DISABLED: # %bb.0: # %entry
; DISABLED-NEXT: movl $0, (%rdi)
; DISABLED-NEXT: movl $1, (%rcx)
; DISABLED-NEXT: movups (%rdi), %xmm0
; DISABLED-NEXT: movups %xmm0, (%rsi)
; DISABLED-NEXT: retq
;
; CHECK-AVX2-LABEL: test_imm_store:
; CHECK-AVX2: # %bb.0: # %entry
; CHECK-AVX2-NEXT: movl $0, (%rdi)
; CHECK-AVX2-NEXT: movl $1, (%rcx)
; CHECK-AVX2-NEXT: movl (%rdi), %eax
; CHECK-AVX2-NEXT: movl %eax, (%rsi)
; CHECK-AVX2-NEXT: movq 4(%rdi), %rax
; CHECK-AVX2-NEXT: movq %rax, 4(%rsi)
; CHECK-AVX2-NEXT: movl 12(%rdi), %eax
; CHECK-AVX2-NEXT: movl %eax, 12(%rsi)
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX512-LABEL: test_imm_store:
; CHECK-AVX512: # %bb.0: # %entry
; CHECK-AVX512-NEXT: movl $0, (%rdi)
; CHECK-AVX512-NEXT: movl $1, (%rcx)
; CHECK-AVX512-NEXT: movl (%rdi), %eax
; CHECK-AVX512-NEXT: movl %eax, (%rsi)
; CHECK-AVX512-NEXT: movq 4(%rdi), %rax
; CHECK-AVX512-NEXT: movq %rax, 4(%rsi)
; CHECK-AVX512-NEXT: movl 12(%rdi), %eax
; CHECK-AVX512-NEXT: movl %eax, 12(%rsi)
; CHECK-AVX512-NEXT: retq
entry:
%a = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 0
store i32 0, i32* %a, align 4
%a1 = getelementptr inbounds %struct.S, %struct.S* %s3, i64 0, i32 0
store i32 1, i32* %a1, align 4
%0 = bitcast %struct.S* %s2 to i8*
%1 = bitcast %struct.S* %s1 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
ret void
}
; Function Attrs: nounwind uwtable
define void @test_nondirect_br(%struct.S* nocapture noalias %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
; CHECK-LABEL: test_nondirect_br:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpl $18, %edx
; CHECK-NEXT: jl .LBB2_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: movl %edx, 4(%rdi)
; CHECK-NEXT: .LBB2_2: # %if.end
; CHECK-NEXT: cmpl $14, %r9d
; CHECK-NEXT: jl .LBB2_4
; CHECK-NEXT: # %bb.3: # %if.then2
; CHECK-NEXT: movl %r9d, 12(%rdi)
; CHECK-NEXT: .LBB2_4: # %if.end3
; CHECK-NEXT: movups (%r8), %xmm0
; CHECK-NEXT: movups %xmm0, (%rcx)
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: movq %rax, (%rsi)
; CHECK-NEXT: movl 8(%rdi), %eax
; CHECK-NEXT: movl %eax, 8(%rsi)
; CHECK-NEXT: movl 12(%rdi), %eax
; CHECK-NEXT: movl %eax, 12(%rsi)
; CHECK-NEXT: retq
;
; DISABLED-LABEL: test_nondirect_br:
; DISABLED: # %bb.0: # %entry
; DISABLED-NEXT: cmpl $18, %edx
; DISABLED-NEXT: jl .LBB2_2
; DISABLED-NEXT: # %bb.1: # %if.then
; DISABLED-NEXT: movl %edx, 4(%rdi)
; DISABLED-NEXT: .LBB2_2: # %if.end
; DISABLED-NEXT: cmpl $14, %r9d
; DISABLED-NEXT: jl .LBB2_4
; DISABLED-NEXT: # %bb.3: # %if.then2
; DISABLED-NEXT: movl %r9d, 12(%rdi)
; DISABLED-NEXT: .LBB2_4: # %if.end3
; DISABLED-NEXT: movups (%r8), %xmm0
; DISABLED-NEXT: movups %xmm0, (%rcx)
; DISABLED-NEXT: movups (%rdi), %xmm0
; DISABLED-NEXT: movups %xmm0, (%rsi)
; DISABLED-NEXT: retq
;
; CHECK-AVX2-LABEL: test_nondirect_br:
; CHECK-AVX2: # %bb.0: # %entry
; CHECK-AVX2-NEXT: cmpl $18, %edx
; CHECK-AVX2-NEXT: jl .LBB2_2
; CHECK-AVX2-NEXT: # %bb.1: # %if.then
; CHECK-AVX2-NEXT: movl %edx, 4(%rdi)
; CHECK-AVX2-NEXT: .LBB2_2: # %if.end
; CHECK-AVX2-NEXT: cmpl $14, %r9d
; CHECK-AVX2-NEXT: jl .LBB2_4
; CHECK-AVX2-NEXT: # %bb.3: # %if.then2
; CHECK-AVX2-NEXT: movl %r9d, 12(%rdi)
; CHECK-AVX2-NEXT: .LBB2_4: # %if.end3
; CHECK-AVX2-NEXT: vmovups (%r8), %xmm0
; CHECK-AVX2-NEXT: vmovups %xmm0, (%rcx)
; CHECK-AVX2-NEXT: movq (%rdi), %rax
; CHECK-AVX2-NEXT: movq %rax, (%rsi)
; CHECK-AVX2-NEXT: movl 8(%rdi), %eax
; CHECK-AVX2-NEXT: movl %eax, 8(%rsi)
; CHECK-AVX2-NEXT: movl 12(%rdi), %eax
; CHECK-AVX2-NEXT: movl %eax, 12(%rsi)
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX512-LABEL: test_nondirect_br:
; CHECK-AVX512: # %bb.0: # %entry
; CHECK-AVX512-NEXT: cmpl $18, %edx
; CHECK-AVX512-NEXT: jl .LBB2_2
; CHECK-AVX512-NEXT: # %bb.1: # %if.then
; CHECK-AVX512-NEXT: movl %edx, 4(%rdi)
; CHECK-AVX512-NEXT: .LBB2_2: # %if.end
; CHECK-AVX512-NEXT: cmpl $14, %r9d
; CHECK-AVX512-NEXT: jl .LBB2_4
; CHECK-AVX512-NEXT: # %bb.3: # %if.then2
; CHECK-AVX512-NEXT: movl %r9d, 12(%rdi)
; CHECK-AVX512-NEXT: .LBB2_4: # %if.end3
; CHECK-AVX512-NEXT: vmovups (%r8), %xmm0
; CHECK-AVX512-NEXT: vmovups %xmm0, (%rcx)
; CHECK-AVX512-NEXT: movq (%rdi), %rax
; CHECK-AVX512-NEXT: movq %rax, (%rsi)
; CHECK-AVX512-NEXT: movl 8(%rdi), %eax
; CHECK-AVX512-NEXT: movl %eax, 8(%rsi)
; CHECK-AVX512-NEXT: movl 12(%rdi), %eax
; CHECK-AVX512-NEXT: movl %eax, 12(%rsi)
; CHECK-AVX512-NEXT: retq
entry:
%cmp = icmp sgt i32 %x, 17
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
%b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1
store i32 %x, i32* %b, align 4
br label %if.end
if.end: ; preds = %if.then, %entry
%cmp1 = icmp sgt i32 %x2, 13
br i1 %cmp1, label %if.then2, label %if.end3
if.then2: ; preds = %if.end
%d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3
store i32 %x2, i32* %d, align 4
br label %if.end3
if.end3: ; preds = %if.then2, %if.end
%0 = bitcast %struct.S* %s3 to i8*
%1 = bitcast %struct.S* %s4 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
%2 = bitcast %struct.S* %s2 to i8*
%3 = bitcast %struct.S* %s1 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false)
ret void
}
; Function Attrs: nounwind uwtable
define void @test_2preds_block(%struct.S* nocapture noalias %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
; CHECK-LABEL: test_2preds_block:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl %r9d, 12(%rdi)
; CHECK-NEXT: cmpl $18, %edx
; CHECK-NEXT: jl .LBB3_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: movl %edx, 4(%rdi)
; CHECK-NEXT: .LBB3_2: # %if.end
; CHECK-NEXT: movups (%r8), %xmm0
; CHECK-NEXT: movups %xmm0, (%rcx)
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: movl %eax, (%rsi)
; CHECK-NEXT: movl 4(%rdi), %eax
; CHECK-NEXT: movl %eax, 4(%rsi)
; CHECK-NEXT: movl 8(%rdi), %eax
; CHECK-NEXT: movl %eax, 8(%rsi)
; CHECK-NEXT: movl 12(%rdi), %eax
; CHECK-NEXT: movl %eax, 12(%rsi)
; CHECK-NEXT: retq
;
; DISABLED-LABEL: test_2preds_block:
; DISABLED: # %bb.0: # %entry
; DISABLED-NEXT: movl %r9d, 12(%rdi)
; DISABLED-NEXT: cmpl $18, %edx
; DISABLED-NEXT: jl .LBB3_2
; DISABLED-NEXT: # %bb.1: # %if.then
; DISABLED-NEXT: movl %edx, 4(%rdi)
; DISABLED-NEXT: .LBB3_2: # %if.end
; DISABLED-NEXT: movups (%r8), %xmm0
; DISABLED-NEXT: movups %xmm0, (%rcx)
; DISABLED-NEXT: movups (%rdi), %xmm0
; DISABLED-NEXT: movups %xmm0, (%rsi)
; DISABLED-NEXT: retq
;
; CHECK-AVX2-LABEL: test_2preds_block:
; CHECK-AVX2: # %bb.0: # %entry
; CHECK-AVX2-NEXT: movl %r9d, 12(%rdi)
; CHECK-AVX2-NEXT: cmpl $18, %edx
; CHECK-AVX2-NEXT: jl .LBB3_2
; CHECK-AVX2-NEXT: # %bb.1: # %if.then
; CHECK-AVX2-NEXT: movl %edx, 4(%rdi)
; CHECK-AVX2-NEXT: .LBB3_2: # %if.end
; CHECK-AVX2-NEXT: vmovups (%r8), %xmm0
; CHECK-AVX2-NEXT: vmovups %xmm0, (%rcx)
; CHECK-AVX2-NEXT: movl (%rdi), %eax
; CHECK-AVX2-NEXT: movl %eax, (%rsi)
; CHECK-AVX2-NEXT: movl 4(%rdi), %eax
; CHECK-AVX2-NEXT: movl %eax, 4(%rsi)
; CHECK-AVX2-NEXT: movl 8(%rdi), %eax
; CHECK-AVX2-NEXT: movl %eax, 8(%rsi)
; CHECK-AVX2-NEXT: movl 12(%rdi), %eax
; CHECK-AVX2-NEXT: movl %eax, 12(%rsi)
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX512-LABEL: test_2preds_block:
; CHECK-AVX512: # %bb.0: # %entry
; CHECK-AVX512-NEXT: movl %r9d, 12(%rdi)
; CHECK-AVX512-NEXT: cmpl $18, %edx
; CHECK-AVX512-NEXT: jl .LBB3_2
; CHECK-AVX512-NEXT: # %bb.1: # %if.then
; CHECK-AVX512-NEXT: movl %edx, 4(%rdi)
; CHECK-AVX512-NEXT: .LBB3_2: # %if.end
; CHECK-AVX512-NEXT: vmovups (%r8), %xmm0
; CHECK-AVX512-NEXT: vmovups %xmm0, (%rcx)
; CHECK-AVX512-NEXT: movl (%rdi), %eax
; CHECK-AVX512-NEXT: movl %eax, (%rsi)
; CHECK-AVX512-NEXT: movl 4(%rdi), %eax
; CHECK-AVX512-NEXT: movl %eax, 4(%rsi)
; CHECK-AVX512-NEXT: movl 8(%rdi), %eax
; CHECK-AVX512-NEXT: movl %eax, 8(%rsi)
; CHECK-AVX512-NEXT: movl 12(%rdi), %eax
; CHECK-AVX512-NEXT: movl %eax, 12(%rsi)
; CHECK-AVX512-NEXT: retq
entry:
%d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3
store i32 %x2, i32* %d, align 4
%cmp = icmp sgt i32 %x, 17
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
%b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1
store i32 %x, i32* %b, align 4
br label %if.end
if.end: ; preds = %if.then, %entry
%0 = bitcast %struct.S* %s3 to i8*
%1 = bitcast %struct.S* %s4 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
%2 = bitcast %struct.S* %s2 to i8*
%3 = bitcast %struct.S* %s1 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false)
ret void
}
%struct.S2 = type { i64, i64 }
; Function Attrs: nounwind uwtable
define void @test_type64(%struct.S2* nocapture noalias %s1, %struct.S2* nocapture %s2, i32 %x, %struct.S2* nocapture %s3, %struct.S2* nocapture readonly %s4) local_unnamed_addr #0 {
; CHECK-LABEL: test_type64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpl $18, %edx
; CHECK-NEXT: jl .LBB4_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: movslq %edx, %rax
; CHECK-NEXT: movq %rax, 8(%rdi)
; CHECK-NEXT: .LBB4_2: # %if.end
; CHECK-NEXT: movups (%r8), %xmm0
; CHECK-NEXT: movups %xmm0, (%rcx)
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: movq %rax, (%rsi)
; CHECK-NEXT: movq 8(%rdi), %rax
; CHECK-NEXT: movq %rax, 8(%rsi)
; CHECK-NEXT: retq
;
; DISABLED-LABEL: test_type64:
; DISABLED: # %bb.0: # %entry
; DISABLED-NEXT: cmpl $18, %edx
; DISABLED-NEXT: jl .LBB4_2
; DISABLED-NEXT: # %bb.1: # %if.then
; DISABLED-NEXT: movslq %edx, %rax
; DISABLED-NEXT: movq %rax, 8(%rdi)
; DISABLED-NEXT: .LBB4_2: # %if.end
; DISABLED-NEXT: movups (%r8), %xmm0
; DISABLED-NEXT: movups %xmm0, (%rcx)
; DISABLED-NEXT: movups (%rdi), %xmm0
; DISABLED-NEXT: movups %xmm0, (%rsi)
; DISABLED-NEXT: retq
;
; CHECK-AVX2-LABEL: test_type64:
; CHECK-AVX2: # %bb.0: # %entry
; CHECK-AVX2-NEXT: cmpl $18, %edx
; CHECK-AVX2-NEXT: jl .LBB4_2
; CHECK-AVX2-NEXT: # %bb.1: # %if.then
; CHECK-AVX2-NEXT: movslq %edx, %rax
; CHECK-AVX2-NEXT: movq %rax, 8(%rdi)
; CHECK-AVX2-NEXT: .LBB4_2: # %if.end
; CHECK-AVX2-NEXT: vmovups (%r8), %xmm0
; CHECK-AVX2-NEXT: vmovups %xmm0, (%rcx)
; CHECK-AVX2-NEXT: movq (%rdi), %rax
; CHECK-AVX2-NEXT: movq %rax, (%rsi)
; CHECK-AVX2-NEXT: movq 8(%rdi), %rax
; CHECK-AVX2-NEXT: movq %rax, 8(%rsi)
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX512-LABEL: test_type64:
; CHECK-AVX512: # %bb.0: # %entry
; CHECK-AVX512-NEXT: cmpl $18, %edx
; CHECK-AVX512-NEXT: jl .LBB4_2
; CHECK-AVX512-NEXT: # %bb.1: # %if.then
; CHECK-AVX512-NEXT: movslq %edx, %rax
; CHECK-AVX512-NEXT: movq %rax, 8(%rdi)
; CHECK-AVX512-NEXT: .LBB4_2: # %if.end
; CHECK-AVX512-NEXT: vmovups (%r8), %xmm0
; CHECK-AVX512-NEXT: vmovups %xmm0, (%rcx)
; CHECK-AVX512-NEXT: movq (%rdi), %rax
; CHECK-AVX512-NEXT: movq %rax, (%rsi)
; CHECK-AVX512-NEXT: movq 8(%rdi), %rax
; CHECK-AVX512-NEXT: movq %rax, 8(%rsi)
; CHECK-AVX512-NEXT: retq
entry:
%cmp = icmp sgt i32 %x, 17
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
%conv = sext i32 %x to i64
%b = getelementptr inbounds %struct.S2, %struct.S2* %s1, i64 0, i32 1
store i64 %conv, i64* %b, align 8
br label %if.end
if.end: ; preds = %if.then, %entry
%0 = bitcast %struct.S2* %s3 to i8*
%1 = bitcast %struct.S2* %s4 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 8, i1 false)
%2 = bitcast %struct.S2* %s2 to i8*
%3 = bitcast %struct.S2* %s1 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 8, i1 false)
ret void
}
%struct.S3 = type { i64, i8, i8, i16, i32 }
; Function Attrs: noinline nounwind uwtable
define void @test_mixed_type(%struct.S3* nocapture noalias %s1, %struct.S3* nocapture %s2, i32 %x, %struct.S3* nocapture readnone %s3, %struct.S3* nocapture readnone %s4) local_unnamed_addr #0 {
; CHECK-LABEL: test_mixed_type:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpl $18, %edx
; CHECK-NEXT: jl .LBB5_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: movslq %edx, %rax
; CHECK-NEXT: movq %rax, (%rdi)
; CHECK-NEXT: movb %dl, 8(%rdi)
; CHECK-NEXT: .LBB5_2: # %if.end
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: movq %rax, (%rsi)
; CHECK-NEXT: movb 8(%rdi), %al
; CHECK-NEXT: movb %al, 8(%rsi)
; CHECK-NEXT: movl 9(%rdi), %eax
; CHECK-NEXT: movl %eax, 9(%rsi)
; CHECK-NEXT: movzwl 13(%rdi), %eax
; CHECK-NEXT: movw %ax, 13(%rsi)
; CHECK-NEXT: movb 15(%rdi), %al
; CHECK-NEXT: movb %al, 15(%rsi)
; CHECK-NEXT: retq
;
; DISABLED-LABEL: test_mixed_type:
; DISABLED: # %bb.0: # %entry
; DISABLED-NEXT: cmpl $18, %edx
; DISABLED-NEXT: jl .LBB5_2
; DISABLED-NEXT: # %bb.1: # %if.then
; DISABLED-NEXT: movslq %edx, %rax
; DISABLED-NEXT: movq %rax, (%rdi)
; DISABLED-NEXT: movb %dl, 8(%rdi)
; DISABLED-NEXT: .LBB5_2: # %if.end
; DISABLED-NEXT: movups (%rdi), %xmm0
; DISABLED-NEXT: movups %xmm0, (%rsi)
; DISABLED-NEXT: retq
;
; CHECK-AVX2-LABEL: test_mixed_type:
; CHECK-AVX2: # %bb.0: # %entry
; CHECK-AVX2-NEXT: cmpl $18, %edx
; CHECK-AVX2-NEXT: jl .LBB5_2
; CHECK-AVX2-NEXT: # %bb.1: # %if.then
; CHECK-AVX2-NEXT: movslq %edx, %rax
; CHECK-AVX2-NEXT: movq %rax, (%rdi)
; CHECK-AVX2-NEXT: movb %dl, 8(%rdi)
; CHECK-AVX2-NEXT: .LBB5_2: # %if.end
; CHECK-AVX2-NEXT: movq (%rdi), %rax
; CHECK-AVX2-NEXT: movq %rax, (%rsi)
; CHECK-AVX2-NEXT: movb 8(%rdi), %al
; CHECK-AVX2-NEXT: movb %al, 8(%rsi)
; CHECK-AVX2-NEXT: movl 9(%rdi), %eax
; CHECK-AVX2-NEXT: movl %eax, 9(%rsi)
; CHECK-AVX2-NEXT: movzwl 13(%rdi), %eax
; CHECK-AVX2-NEXT: movw %ax, 13(%rsi)
; CHECK-AVX2-NEXT: movb 15(%rdi), %al
; CHECK-AVX2-NEXT: movb %al, 15(%rsi)
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX512-LABEL: test_mixed_type:
; CHECK-AVX512: # %bb.0: # %entry
; CHECK-AVX512-NEXT: cmpl $18, %edx
; CHECK-AVX512-NEXT: jl .LBB5_2
; CHECK-AVX512-NEXT: # %bb.1: # %if.then
; CHECK-AVX512-NEXT: movslq %edx, %rax
; CHECK-AVX512-NEXT: movq %rax, (%rdi)
; CHECK-AVX512-NEXT: movb %dl, 8(%rdi)
; CHECK-AVX512-NEXT: .LBB5_2: # %if.end
; CHECK-AVX512-NEXT: movq (%rdi), %rax
; CHECK-AVX512-NEXT: movq %rax, (%rsi)
; CHECK-AVX512-NEXT: movb 8(%rdi), %al
; CHECK-AVX512-NEXT: movb %al, 8(%rsi)
; CHECK-AVX512-NEXT: movl 9(%rdi), %eax
; CHECK-AVX512-NEXT: movl %eax, 9(%rsi)
; CHECK-AVX512-NEXT: movzwl 13(%rdi), %eax
; CHECK-AVX512-NEXT: movw %ax, 13(%rsi)
; CHECK-AVX512-NEXT: movb 15(%rdi), %al
; CHECK-AVX512-NEXT: movb %al, 15(%rsi)
; CHECK-AVX512-NEXT: retq
entry:
%cmp = icmp sgt i32 %x, 17
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
%conv = sext i32 %x to i64
%a = getelementptr inbounds %struct.S3, %struct.S3* %s1, i64 0, i32 0
store i64 %conv, i64* %a, align 8
%conv1 = trunc i32 %x to i8
%b = getelementptr inbounds %struct.S3, %struct.S3* %s1, i64 0, i32 1
store i8 %conv1, i8* %b, align 8
br label %if.end
if.end: ; preds = %if.then, %entry
%0 = bitcast %struct.S3* %s2 to i8*
%1 = bitcast %struct.S3* %s1 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 8, i1 false)
ret void
}
%struct.S4 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
; Function Attrs: nounwind uwtable
define void @test_multiple_blocks(%struct.S4* nocapture noalias %s1, %struct.S4* nocapture %s2) local_unnamed_addr #0 {
; CHECK-LABEL: test_multiple_blocks:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl $0, 4(%rdi)
; CHECK-NEXT: movl $0, 36(%rdi)
; CHECK-NEXT: movups 16(%rdi), %xmm0
; CHECK-NEXT: movups %xmm0, 16(%rsi)
; CHECK-NEXT: movl 32(%rdi), %eax
; CHECK-NEXT: movl %eax, 32(%rsi)
; CHECK-NEXT: movl 36(%rdi), %eax
; CHECK-NEXT: movl %eax, 36(%rsi)
; CHECK-NEXT: movq 40(%rdi), %rax
; CHECK-NEXT: movq %rax, 40(%rsi)
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: movl %eax, (%rsi)
; CHECK-NEXT: movl 4(%rdi), %eax
; CHECK-NEXT: movl %eax, 4(%rsi)
; CHECK-NEXT: movq 8(%rdi), %rax
; CHECK-NEXT: movq %rax, 8(%rsi)
; CHECK-NEXT: retq
;
; DISABLED-LABEL: test_multiple_blocks:
; DISABLED: # %bb.0: # %entry
; DISABLED-NEXT: movl $0, 4(%rdi)
; DISABLED-NEXT: movl $0, 36(%rdi)
; DISABLED-NEXT: movups 16(%rdi), %xmm0
; DISABLED-NEXT: movups %xmm0, 16(%rsi)
; DISABLED-NEXT: movups 32(%rdi), %xmm0
; DISABLED-NEXT: movups %xmm0, 32(%rsi)
; DISABLED-NEXT: movups (%rdi), %xmm0
; DISABLED-NEXT: movups %xmm0, (%rsi)
; DISABLED-NEXT: retq
;
; CHECK-AVX2-LABEL: test_multiple_blocks:
; CHECK-AVX2: # %bb.0: # %entry
; CHECK-AVX2-NEXT: movl $0, 4(%rdi)
; CHECK-AVX2-NEXT: movl $0, 36(%rdi)
; CHECK-AVX2-NEXT: vmovups 16(%rdi), %xmm0
; CHECK-AVX2-NEXT: vmovups %xmm0, 16(%rsi)
; CHECK-AVX2-NEXT: movl 32(%rdi), %eax
; CHECK-AVX2-NEXT: movl %eax, 32(%rsi)
; CHECK-AVX2-NEXT: movl 36(%rdi), %eax
; CHECK-AVX2-NEXT: movl %eax, 36(%rsi)
; CHECK-AVX2-NEXT: movq 40(%rdi), %rax
; CHECK-AVX2-NEXT: movq %rax, 40(%rsi)
; CHECK-AVX2-NEXT: movl (%rdi), %eax
; CHECK-AVX2-NEXT: movl %eax, (%rsi)
; CHECK-AVX2-NEXT: movl 4(%rdi), %eax
; CHECK-AVX2-NEXT: movl %eax, 4(%rsi)
; CHECK-AVX2-NEXT: vmovups 8(%rdi), %xmm0
; CHECK-AVX2-NEXT: vmovups %xmm0, 8(%rsi)
; CHECK-AVX2-NEXT: movq 24(%rdi), %rax
; CHECK-AVX2-NEXT: movq %rax, 24(%rsi)
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX512-LABEL: test_multiple_blocks:
; CHECK-AVX512: # %bb.0: # %entry
; CHECK-AVX512-NEXT: movl $0, 4(%rdi)
; CHECK-AVX512-NEXT: movl $0, 36(%rdi)
; CHECK-AVX512-NEXT: vmovups 16(%rdi), %xmm0
; CHECK-AVX512-NEXT: vmovups %xmm0, 16(%rsi)
; CHECK-AVX512-NEXT: movl 32(%rdi), %eax
; CHECK-AVX512-NEXT: movl %eax, 32(%rsi)
; CHECK-AVX512-NEXT: movl 36(%rdi), %eax
; CHECK-AVX512-NEXT: movl %eax, 36(%rsi)
; CHECK-AVX512-NEXT: movq 40(%rdi), %rax
; CHECK-AVX512-NEXT: movq %rax, 40(%rsi)
; CHECK-AVX512-NEXT: movl (%rdi), %eax
; CHECK-AVX512-NEXT: movl %eax, (%rsi)
; CHECK-AVX512-NEXT: movl 4(%rdi), %eax
; CHECK-AVX512-NEXT: movl %eax, 4(%rsi)
; CHECK-AVX512-NEXT: vmovups 8(%rdi), %xmm0
; CHECK-AVX512-NEXT: vmovups %xmm0, 8(%rsi)
; CHECK-AVX512-NEXT: movq 24(%rdi), %rax
; CHECK-AVX512-NEXT: movq %rax, 24(%rsi)
; CHECK-AVX512-NEXT: retq
entry:
%b = getelementptr inbounds %struct.S4, %struct.S4* %s1, i64 0, i32 1
store i32 0, i32* %b, align 4
%b3 = getelementptr inbounds %struct.S4, %struct.S4* %s1, i64 0, i32 9
store i32 0, i32* %b3, align 4
%0 = bitcast %struct.S4* %s2 to i8*
%1 = bitcast %struct.S4* %s1 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 48, i32 4, i1 false)
ret void
}
%struct.S5 = type { i16, i16, i16, i16, i16, i16, i16, i16 }
; Function Attrs: nounwind uwtable
define void @test_type16(%struct.S5* nocapture noalias %s1, %struct.S5* nocapture %s2, i32 %x, %struct.S5* nocapture %s3, %struct.S5* nocapture readonly %s4) local_unnamed_addr #0 {
; CHECK-LABEL: test_type16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpl $18, %edx
; CHECK-NEXT: jl .LBB7_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: movw %dx, 2(%rdi)
; CHECK-NEXT: .LBB7_2: # %if.end
; CHECK-NEXT: movups (%r8), %xmm0
; CHECK-NEXT: movups %xmm0, (%rcx)
; CHECK-NEXT: movzwl (%rdi), %eax
; CHECK-NEXT: movw %ax, (%rsi)
; CHECK-NEXT: movzwl 2(%rdi), %eax
; CHECK-NEXT: movw %ax, 2(%rsi)
; CHECK-NEXT: movq 4(%rdi), %rax
; CHECK-NEXT: movq %rax, 4(%rsi)
; CHECK-NEXT: movl 12(%rdi), %eax
; CHECK-NEXT: movl %eax, 12(%rsi)
; CHECK-NEXT: retq
;
; DISABLED-LABEL: test_type16:
; DISABLED: # %bb.0: # %entry
; DISABLED-NEXT: cmpl $18, %edx
; DISABLED-NEXT: jl .LBB7_2
; DISABLED-NEXT: # %bb.1: # %if.then
; DISABLED-NEXT: movw %dx, 2(%rdi)
; DISABLED-NEXT: .LBB7_2: # %if.end
; DISABLED-NEXT: movups (%r8), %xmm0
; DISABLED-NEXT: movups %xmm0, (%rcx)
; DISABLED-NEXT: movups (%rdi), %xmm0
; DISABLED-NEXT: movups %xmm0, (%rsi)
; DISABLED-NEXT: retq
;
; CHECK-AVX2-LABEL: test_type16:
; CHECK-AVX2: # %bb.0: # %entry
; CHECK-AVX2-NEXT: cmpl $18, %edx
; CHECK-AVX2-NEXT: jl .LBB7_2
; CHECK-AVX2-NEXT: # %bb.1: # %if.then
; CHECK-AVX2-NEXT: movw %dx, 2(%rdi)
; CHECK-AVX2-NEXT: .LBB7_2: # %if.end
; CHECK-AVX2-NEXT: vmovups (%r8), %xmm0
; CHECK-AVX2-NEXT: vmovups %xmm0, (%rcx)
; CHECK-AVX2-NEXT: movzwl (%rdi), %eax
; CHECK-AVX2-NEXT: movw %ax, (%rsi)
; CHECK-AVX2-NEXT: movzwl 2(%rdi), %eax
; CHECK-AVX2-NEXT: movw %ax, 2(%rsi)
; CHECK-AVX2-NEXT: movq 4(%rdi), %rax
; CHECK-AVX2-NEXT: movq %rax, 4(%rsi)
; CHECK-AVX2-NEXT: movl 12(%rdi), %eax
; CHECK-AVX2-NEXT: movl %eax, 12(%rsi)
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX512-LABEL: test_type16:
; CHECK-AVX512: # %bb.0: # %entry
; CHECK-AVX512-NEXT: cmpl $18, %edx
; CHECK-AVX512-NEXT: jl .LBB7_2
; CHECK-AVX512-NEXT: # %bb.1: # %if.then
; CHECK-AVX512-NEXT: movw %dx, 2(%rdi)
; CHECK-AVX512-NEXT: .LBB7_2: # %if.end
; CHECK-AVX512-NEXT: vmovups (%r8), %xmm0
; CHECK-AVX512-NEXT: vmovups %xmm0, (%rcx)
; CHECK-AVX512-NEXT: movzwl (%rdi), %eax
; CHECK-AVX512-NEXT: movw %ax, (%rsi)
; CHECK-AVX512-NEXT: movzwl 2(%rdi), %eax
; CHECK-AVX512-NEXT: movw %ax, 2(%rsi)
; CHECK-AVX512-NEXT: movq 4(%rdi), %rax
; CHECK-AVX512-NEXT: movq %rax, 4(%rsi)
; CHECK-AVX512-NEXT: movl 12(%rdi), %eax
; CHECK-AVX512-NEXT: movl %eax, 12(%rsi)
; CHECK-AVX512-NEXT: retq
entry:
%cmp = icmp sgt i32 %x, 17
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
%conv = trunc i32 %x to i16
%b = getelementptr inbounds %struct.S5, %struct.S5* %s1, i64 0, i32 1
store i16 %conv, i16* %b, align 2
br label %if.end
if.end: ; preds = %if.then, %entry
%0 = bitcast %struct.S5* %s3 to i8*
%1 = bitcast %struct.S5* %s4 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 2, i1 false)
%2 = bitcast %struct.S5* %s2 to i8*
%3 = bitcast %struct.S5* %s1 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 2, i1 false)
ret void
}
%struct.S6 = type { [4 x i32], i32, i32, i32, i32 }
; Function Attrs: nounwind uwtable
define void @test_stack(%struct.S6* noalias nocapture sret %agg.result, %struct.S6* byval nocapture readnone align 8 %s1, %struct.S6* byval nocapture align 8 %s2, i32 %x) local_unnamed_addr #0 {
; CHECK-LABEL: test_stack:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: movl %esi, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0
; CHECK-NEXT: movups %xmm0, (%rdi)
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; CHECK-NEXT: movq %rcx, 16(%rdi)
; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; CHECK-NEXT: movl %ecx, 24(%rdi)
; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; CHECK-NEXT: movl %ecx, 28(%rdi)
; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %edx
; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %esi
; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movl %edx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movl %esi, {{[0-9]+}}(%rsp)
; CHECK-NEXT: retq
;
; DISABLED-LABEL: test_stack:
; DISABLED: # %bb.0: # %entry
; DISABLED-NEXT: movq %rdi, %rax
; DISABLED-NEXT: movl %esi, {{[0-9]+}}(%rsp)
; DISABLED-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0
; DISABLED-NEXT: movups %xmm0, (%rdi)
; DISABLED-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0
; DISABLED-NEXT: movups %xmm0, 16(%rdi)
; DISABLED-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0
; DISABLED-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
; DISABLED-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
; DISABLED-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp)
; DISABLED-NEXT: retq
;
; CHECK-AVX2-LABEL: test_stack:
; CHECK-AVX2: # %bb.0: # %entry
; CHECK-AVX2-NEXT: movq %rdi, %rax
; CHECK-AVX2-NEXT: movl %esi, {{[0-9]+}}(%rsp)
; CHECK-AVX2-NEXT: vmovups {{[0-9]+}}(%rsp), %xmm0
; CHECK-AVX2-NEXT: vmovups %xmm0, (%rdi)
; CHECK-AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; CHECK-AVX2-NEXT: movq %rcx, 16(%rdi)
; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; CHECK-AVX2-NEXT: movl %ecx, 24(%rdi)
; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; CHECK-AVX2-NEXT: movl %ecx, 28(%rdi)
; CHECK-AVX2-NEXT: vmovups {{[0-9]+}}(%rsp), %xmm0
; CHECK-AVX2-NEXT: vmovups %xmm0, {{[0-9]+}}(%rsp)
; CHECK-AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; CHECK-AVX2-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; CHECK-AVX2-NEXT: movl %ecx, {{[0-9]+}}(%rsp)
; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; CHECK-AVX2-NEXT: movl %ecx, {{[0-9]+}}(%rsp)
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX512-LABEL: test_stack:
; CHECK-AVX512: # %bb.0: # %entry
; CHECK-AVX512-NEXT: movq %rdi, %rax
; CHECK-AVX512-NEXT: movl %esi, {{[0-9]+}}(%rsp)
; CHECK-AVX512-NEXT: vmovups {{[0-9]+}}(%rsp), %xmm0
; CHECK-AVX512-NEXT: vmovups %xmm0, (%rdi)
; CHECK-AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; CHECK-AVX512-NEXT: movq %rcx, 16(%rdi)
; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; CHECK-AVX512-NEXT: movl %ecx, 24(%rdi)
; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; CHECK-AVX512-NEXT: movl %ecx, 28(%rdi)
; CHECK-AVX512-NEXT: vmovups {{[0-9]+}}(%rsp), %xmm0
; CHECK-AVX512-NEXT: vmovups %xmm0, {{[0-9]+}}(%rsp)
; CHECK-AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; CHECK-AVX512-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; CHECK-AVX512-NEXT: movl %ecx, {{[0-9]+}}(%rsp)
; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; CHECK-AVX512-NEXT: movl %ecx, {{[0-9]+}}(%rsp)
; CHECK-AVX512-NEXT: retq
entry:
%s6.sroa.0.0..sroa_cast1 = bitcast %struct.S6* %s2 to i8*
%s6.sroa.3.0..sroa_idx4 = getelementptr inbounds %struct.S6, %struct.S6* %s2, i64 0, i32 3
store i32 %x, i32* %s6.sroa.3.0..sroa_idx4, align 8
%0 = bitcast %struct.S6* %agg.result to i8*
%s6.sroa.0.0..sroa_cast2 = bitcast %struct.S6* %s1 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* nonnull %s6.sroa.0.0..sroa_cast1, i64 32, i32 4, i1 false)
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %s6.sroa.0.0..sroa_cast2, i8* nonnull %s6.sroa.0.0..sroa_cast1, i64 32, i32 4, i1 false)
ret void
}
; Function Attrs: nounwind uwtable
define void @test_limit_all(%struct.S* noalias %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
; CHECK-LABEL: test_limit_all:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset %rbx, -48
; CHECK-NEXT: .cfi_offset %r12, -40
; CHECK-NEXT: .cfi_offset %r14, -32
; CHECK-NEXT: .cfi_offset %r15, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movq %r8, %r15
; CHECK-NEXT: movq %rcx, %r14
; CHECK-NEXT: movl %edx, %ebp
; CHECK-NEXT: movq %rsi, %r12
; CHECK-NEXT: movq %rdi, %rbx
; CHECK-NEXT: movl %r9d, 12(%rdi)
; CHECK-NEXT: callq bar
; CHECK-NEXT: cmpl $18, %ebp
; CHECK-NEXT: jl .LBB9_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: movl %ebp, 4(%rbx)
; CHECK-NEXT: movq %rbx, %rdi
; CHECK-NEXT: callq bar
; CHECK-NEXT: .LBB9_2: # %if.end
; CHECK-NEXT: movups (%r15), %xmm0
; CHECK-NEXT: movups %xmm0, (%r14)
; CHECK-NEXT: movups (%rbx), %xmm0
; CHECK-NEXT: movups %xmm0, (%r12)
; CHECK-NEXT: popq %rbx
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: popq %r12
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: popq %r14
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: popq %r15
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; DISABLED-LABEL: test_limit_all:
; DISABLED: # %bb.0: # %entry
; DISABLED-NEXT: pushq %rbp
; DISABLED-NEXT: .cfi_def_cfa_offset 16
; DISABLED-NEXT: pushq %r15
; DISABLED-NEXT: .cfi_def_cfa_offset 24
; DISABLED-NEXT: pushq %r14
; DISABLED-NEXT: .cfi_def_cfa_offset 32
; DISABLED-NEXT: pushq %r12
; DISABLED-NEXT: .cfi_def_cfa_offset 40
; DISABLED-NEXT: pushq %rbx
; DISABLED-NEXT: .cfi_def_cfa_offset 48
; DISABLED-NEXT: .cfi_offset %rbx, -48
; DISABLED-NEXT: .cfi_offset %r12, -40
; DISABLED-NEXT: .cfi_offset %r14, -32
; DISABLED-NEXT: .cfi_offset %r15, -24
; DISABLED-NEXT: .cfi_offset %rbp, -16
; DISABLED-NEXT: movq %r8, %r15
; DISABLED-NEXT: movq %rcx, %r14
; DISABLED-NEXT: movl %edx, %ebp
; DISABLED-NEXT: movq %rsi, %r12
; DISABLED-NEXT: movq %rdi, %rbx
; DISABLED-NEXT: movl %r9d, 12(%rdi)
; DISABLED-NEXT: callq bar
; DISABLED-NEXT: cmpl $18, %ebp
; DISABLED-NEXT: jl .LBB9_2
; DISABLED-NEXT: # %bb.1: # %if.then
; DISABLED-NEXT: movl %ebp, 4(%rbx)
; DISABLED-NEXT: movq %rbx, %rdi
; DISABLED-NEXT: callq bar
; DISABLED-NEXT: .LBB9_2: # %if.end
; DISABLED-NEXT: movups (%r15), %xmm0
; DISABLED-NEXT: movups %xmm0, (%r14)
; DISABLED-NEXT: movups (%rbx), %xmm0
; DISABLED-NEXT: movups %xmm0, (%r12)
; DISABLED-NEXT: popq %rbx
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; DISABLED-NEXT: .cfi_def_cfa_offset 40
; DISABLED-NEXT: popq %r12
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; DISABLED-NEXT: .cfi_def_cfa_offset 32
; DISABLED-NEXT: popq %r14
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; DISABLED-NEXT: .cfi_def_cfa_offset 24
; DISABLED-NEXT: popq %r15
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; DISABLED-NEXT: .cfi_def_cfa_offset 16
; DISABLED-NEXT: popq %rbp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; DISABLED-NEXT: .cfi_def_cfa_offset 8
; DISABLED-NEXT: retq
;
; CHECK-AVX2-LABEL: test_limit_all:
; CHECK-AVX2: # %bb.0: # %entry
; CHECK-AVX2-NEXT: pushq %rbp
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 16
; CHECK-AVX2-NEXT: pushq %r15
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 24
; CHECK-AVX2-NEXT: pushq %r14
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 32
; CHECK-AVX2-NEXT: pushq %r12
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 40
; CHECK-AVX2-NEXT: pushq %rbx
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 48
; CHECK-AVX2-NEXT: .cfi_offset %rbx, -48
; CHECK-AVX2-NEXT: .cfi_offset %r12, -40
; CHECK-AVX2-NEXT: .cfi_offset %r14, -32
; CHECK-AVX2-NEXT: .cfi_offset %r15, -24
; CHECK-AVX2-NEXT: .cfi_offset %rbp, -16
; CHECK-AVX2-NEXT: movq %r8, %r15
; CHECK-AVX2-NEXT: movq %rcx, %r14
; CHECK-AVX2-NEXT: movl %edx, %ebp
; CHECK-AVX2-NEXT: movq %rsi, %r12
; CHECK-AVX2-NEXT: movq %rdi, %rbx
; CHECK-AVX2-NEXT: movl %r9d, 12(%rdi)
; CHECK-AVX2-NEXT: callq bar
; CHECK-AVX2-NEXT: cmpl $18, %ebp
; CHECK-AVX2-NEXT: jl .LBB9_2
; CHECK-AVX2-NEXT: # %bb.1: # %if.then
; CHECK-AVX2-NEXT: movl %ebp, 4(%rbx)
; CHECK-AVX2-NEXT: movq %rbx, %rdi
; CHECK-AVX2-NEXT: callq bar
; CHECK-AVX2-NEXT: .LBB9_2: # %if.end
; CHECK-AVX2-NEXT: vmovups (%r15), %xmm0
; CHECK-AVX2-NEXT: vmovups %xmm0, (%r14)
; CHECK-AVX2-NEXT: vmovups (%rbx), %xmm0
; CHECK-AVX2-NEXT: vmovups %xmm0, (%r12)
; CHECK-AVX2-NEXT: popq %rbx
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 40
; CHECK-AVX2-NEXT: popq %r12
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 32
; CHECK-AVX2-NEXT: popq %r14
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 24
; CHECK-AVX2-NEXT: popq %r15
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 16
; CHECK-AVX2-NEXT: popq %rbp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX512-LABEL: test_limit_all:
; CHECK-AVX512: # %bb.0: # %entry
; CHECK-AVX512-NEXT: pushq %rbp
; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 16
; CHECK-AVX512-NEXT: pushq %r15
; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 24
; CHECK-AVX512-NEXT: pushq %r14
; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 32
; CHECK-AVX512-NEXT: pushq %r12
; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 40
; CHECK-AVX512-NEXT: pushq %rbx
; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 48
; CHECK-AVX512-NEXT: .cfi_offset %rbx, -48
; CHECK-AVX512-NEXT: .cfi_offset %r12, -40
; CHECK-AVX512-NEXT: .cfi_offset %r14, -32
; CHECK-AVX512-NEXT: .cfi_offset %r15, -24
; CHECK-AVX512-NEXT: .cfi_offset %rbp, -16
; CHECK-AVX512-NEXT: movq %r8, %r15
; CHECK-AVX512-NEXT: movq %rcx, %r14
; CHECK-AVX512-NEXT: movl %edx, %ebp
; CHECK-AVX512-NEXT: movq %rsi, %r12
; CHECK-AVX512-NEXT: movq %rdi, %rbx
; CHECK-AVX512-NEXT: movl %r9d, 12(%rdi)
; CHECK-AVX512-NEXT: callq bar
; CHECK-AVX512-NEXT: cmpl $18, %ebp
; CHECK-AVX512-NEXT: jl .LBB9_2
; CHECK-AVX512-NEXT: # %bb.1: # %if.then
; CHECK-AVX512-NEXT: movl %ebp, 4(%rbx)
; CHECK-AVX512-NEXT: movq %rbx, %rdi
; CHECK-AVX512-NEXT: callq bar
; CHECK-AVX512-NEXT: .LBB9_2: # %if.end
; CHECK-AVX512-NEXT: vmovups (%r15), %xmm0
; CHECK-AVX512-NEXT: vmovups %xmm0, (%r14)
; CHECK-AVX512-NEXT: vmovups (%rbx), %xmm0
; CHECK-AVX512-NEXT: vmovups %xmm0, (%r12)
; CHECK-AVX512-NEXT: popq %rbx
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 40
; CHECK-AVX512-NEXT: popq %r12
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 32
; CHECK-AVX512-NEXT: popq %r14
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 24
; CHECK-AVX512-NEXT: popq %r15
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 16
; CHECK-AVX512-NEXT: popq %rbp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8
; CHECK-AVX512-NEXT: retq
entry:
%d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3
store i32 %x2, i32* %d, align 4
tail call void @bar(%struct.S* %s1) #3
%cmp = icmp sgt i32 %x, 17
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
%b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1
store i32 %x, i32* %b, align 4
tail call void @bar(%struct.S* nonnull %s1) #3
br label %if.end
if.end: ; preds = %if.then, %entry
%0 = bitcast %struct.S* %s3 to i8*
%1 = bitcast %struct.S* %s4 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
%2 = bitcast %struct.S* %s2 to i8*
%3 = bitcast %struct.S* %s1 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false)
ret void
}
; Function Attrs: nounwind uwtable
define void @test_limit_one_pred(%struct.S* noalias %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
; CHECK-LABEL: test_limit_one_pred:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset %rbx, -40
; CHECK-NEXT: .cfi_offset %r12, -32
; CHECK-NEXT: .cfi_offset %r14, -24
; CHECK-NEXT: .cfi_offset %r15, -16
; CHECK-NEXT: movq %r8, %r12
; CHECK-NEXT: movq %rcx, %r15
; CHECK-NEXT: movq %rsi, %r14
; CHECK-NEXT: movq %rdi, %rbx
; CHECK-NEXT: movl %r9d, 12(%rdi)
; CHECK-NEXT: cmpl $18, %edx
; CHECK-NEXT: jl .LBB10_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: movl %edx, 4(%rbx)
; CHECK-NEXT: movq %rbx, %rdi
; CHECK-NEXT: callq bar
; CHECK-NEXT: .LBB10_2: # %if.end
; CHECK-NEXT: movups (%r12), %xmm0
; CHECK-NEXT: movups %xmm0, (%r15)
; CHECK-NEXT: movq (%rbx), %rax
; CHECK-NEXT: movq %rax, (%r14)
; CHECK-NEXT: movl 8(%rbx), %eax
; CHECK-NEXT: movl %eax, 8(%r14)
; CHECK-NEXT: movl 12(%rbx), %eax
; CHECK-NEXT: movl %eax, 12(%r14)
; CHECK-NEXT: addq $8, %rsp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: popq %rbx
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: popq %r12
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: popq %r14
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %r15
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
;
; DISABLED-LABEL: test_limit_one_pred:
; DISABLED: # %bb.0: # %entry
; DISABLED-NEXT: pushq %r15
; DISABLED-NEXT: .cfi_def_cfa_offset 16
; DISABLED-NEXT: pushq %r14
; DISABLED-NEXT: .cfi_def_cfa_offset 24
; DISABLED-NEXT: pushq %r12
; DISABLED-NEXT: .cfi_def_cfa_offset 32
; DISABLED-NEXT: pushq %rbx
; DISABLED-NEXT: .cfi_def_cfa_offset 40
; DISABLED-NEXT: pushq %rax
; DISABLED-NEXT: .cfi_def_cfa_offset 48
; DISABLED-NEXT: .cfi_offset %rbx, -40
; DISABLED-NEXT: .cfi_offset %r12, -32
; DISABLED-NEXT: .cfi_offset %r14, -24
; DISABLED-NEXT: .cfi_offset %r15, -16
; DISABLED-NEXT: movq %r8, %r15
; DISABLED-NEXT: movq %rcx, %r14
; DISABLED-NEXT: movq %rsi, %r12
; DISABLED-NEXT: movq %rdi, %rbx
; DISABLED-NEXT: movl %r9d, 12(%rdi)
; DISABLED-NEXT: cmpl $18, %edx
; DISABLED-NEXT: jl .LBB10_2
; DISABLED-NEXT: # %bb.1: # %if.then
; DISABLED-NEXT: movl %edx, 4(%rbx)
; DISABLED-NEXT: movq %rbx, %rdi
; DISABLED-NEXT: callq bar
; DISABLED-NEXT: .LBB10_2: # %if.end
; DISABLED-NEXT: movups (%r15), %xmm0
; DISABLED-NEXT: movups %xmm0, (%r14)
; DISABLED-NEXT: movups (%rbx), %xmm0
; DISABLED-NEXT: movups %xmm0, (%r12)
; DISABLED-NEXT: addq $8, %rsp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; DISABLED-NEXT: .cfi_def_cfa_offset 40
; DISABLED-NEXT: popq %rbx
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; DISABLED-NEXT: .cfi_def_cfa_offset 32
; DISABLED-NEXT: popq %r12
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; DISABLED-NEXT: .cfi_def_cfa_offset 24
; DISABLED-NEXT: popq %r14
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; DISABLED-NEXT: .cfi_def_cfa_offset 16
; DISABLED-NEXT: popq %r15
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; DISABLED-NEXT: .cfi_def_cfa_offset 8
; DISABLED-NEXT: retq
;
; CHECK-AVX2-LABEL: test_limit_one_pred:
; CHECK-AVX2: # %bb.0: # %entry
; CHECK-AVX2-NEXT: pushq %r15
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 16
; CHECK-AVX2-NEXT: pushq %r14
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 24
; CHECK-AVX2-NEXT: pushq %r12
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 32
; CHECK-AVX2-NEXT: pushq %rbx
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 40
; CHECK-AVX2-NEXT: pushq %rax
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 48
; CHECK-AVX2-NEXT: .cfi_offset %rbx, -40
; CHECK-AVX2-NEXT: .cfi_offset %r12, -32
; CHECK-AVX2-NEXT: .cfi_offset %r14, -24
; CHECK-AVX2-NEXT: .cfi_offset %r15, -16
; CHECK-AVX2-NEXT: movq %r8, %r12
; CHECK-AVX2-NEXT: movq %rcx, %r15
; CHECK-AVX2-NEXT: movq %rsi, %r14
; CHECK-AVX2-NEXT: movq %rdi, %rbx
; CHECK-AVX2-NEXT: movl %r9d, 12(%rdi)
; CHECK-AVX2-NEXT: cmpl $18, %edx
; CHECK-AVX2-NEXT: jl .LBB10_2
; CHECK-AVX2-NEXT: # %bb.1: # %if.then
; CHECK-AVX2-NEXT: movl %edx, 4(%rbx)
; CHECK-AVX2-NEXT: movq %rbx, %rdi
; CHECK-AVX2-NEXT: callq bar
; CHECK-AVX2-NEXT: .LBB10_2: # %if.end
; CHECK-AVX2-NEXT: vmovups (%r12), %xmm0
; CHECK-AVX2-NEXT: vmovups %xmm0, (%r15)
; CHECK-AVX2-NEXT: movq (%rbx), %rax
; CHECK-AVX2-NEXT: movq %rax, (%r14)
; CHECK-AVX2-NEXT: movl 8(%rbx), %eax
; CHECK-AVX2-NEXT: movl %eax, 8(%r14)
; CHECK-AVX2-NEXT: movl 12(%rbx), %eax
; CHECK-AVX2-NEXT: movl %eax, 12(%r14)
; CHECK-AVX2-NEXT: addq $8, %rsp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 40
; CHECK-AVX2-NEXT: popq %rbx
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 32
; CHECK-AVX2-NEXT: popq %r12
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 24
; CHECK-AVX2-NEXT: popq %r14
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 16
; CHECK-AVX2-NEXT: popq %r15
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX512-LABEL: test_limit_one_pred:
; CHECK-AVX512: # %bb.0: # %entry
; CHECK-AVX512-NEXT: pushq %r15
; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 16
; CHECK-AVX512-NEXT: pushq %r14
; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 24
; CHECK-AVX512-NEXT: pushq %r12
; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 32
; CHECK-AVX512-NEXT: pushq %rbx
; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 40
; CHECK-AVX512-NEXT: pushq %rax
; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 48
; CHECK-AVX512-NEXT: .cfi_offset %rbx, -40
; CHECK-AVX512-NEXT: .cfi_offset %r12, -32
; CHECK-AVX512-NEXT: .cfi_offset %r14, -24
; CHECK-AVX512-NEXT: .cfi_offset %r15, -16
; CHECK-AVX512-NEXT: movq %r8, %r12
; CHECK-AVX512-NEXT: movq %rcx, %r15
; CHECK-AVX512-NEXT: movq %rsi, %r14
; CHECK-AVX512-NEXT: movq %rdi, %rbx
; CHECK-AVX512-NEXT: movl %r9d, 12(%rdi)
; CHECK-AVX512-NEXT: cmpl $18, %edx
; CHECK-AVX512-NEXT: jl .LBB10_2
; CHECK-AVX512-NEXT: # %bb.1: # %if.then
; CHECK-AVX512-NEXT: movl %edx, 4(%rbx)
; CHECK-AVX512-NEXT: movq %rbx, %rdi
; CHECK-AVX512-NEXT: callq bar
; CHECK-AVX512-NEXT: .LBB10_2: # %if.end
; CHECK-AVX512-NEXT: vmovups (%r12), %xmm0
; CHECK-AVX512-NEXT: vmovups %xmm0, (%r15)
; CHECK-AVX512-NEXT: movq (%rbx), %rax
; CHECK-AVX512-NEXT: movq %rax, (%r14)
; CHECK-AVX512-NEXT: movl 8(%rbx), %eax
; CHECK-AVX512-NEXT: movl %eax, 8(%r14)
; CHECK-AVX512-NEXT: movl 12(%rbx), %eax
; CHECK-AVX512-NEXT: movl %eax, 12(%r14)
; CHECK-AVX512-NEXT: addq $8, %rsp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 40
; CHECK-AVX512-NEXT: popq %rbx
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 32
; CHECK-AVX512-NEXT: popq %r12
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 24
; CHECK-AVX512-NEXT: popq %r14
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 16
; CHECK-AVX512-NEXT: popq %r15
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8
; CHECK-AVX512-NEXT: retq
entry:
%d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3
store i32 %x2, i32* %d, align 4
%cmp = icmp sgt i32 %x, 17
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
%b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1
store i32 %x, i32* %b, align 4
tail call void @bar(%struct.S* nonnull %s1) #3
br label %if.end
if.end: ; preds = %if.then, %entry
%0 = bitcast %struct.S* %s3 to i8*
%1 = bitcast %struct.S* %s4 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
%2 = bitcast %struct.S* %s2 to i8*
%3 = bitcast %struct.S* %s1 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false)
ret void
}
declare void @bar(%struct.S*) local_unnamed_addr #1
%struct.S7 = type { float, float, float , float, float, float, float, float }
; Function Attrs: nounwind uwtable
define void @test_conditional_block_float(%struct.S7* nocapture noalias %s1, %struct.S7* nocapture %s2, i32 %x, %struct.S7* nocapture %s3, %struct.S7* nocapture readonly %s4, float %y) local_unnamed_addr #0 {
; CHECK-LABEL: test_conditional_block_float:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpl $18, %edx
; CHECK-NEXT: jl .LBB11_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: movl $1065353216, 4(%rdi) # imm = 0x3F800000
; CHECK-NEXT: .LBB11_2: # %if.end
; CHECK-NEXT: movups (%r8), %xmm0
; CHECK-NEXT: movups 16(%r8), %xmm1
; CHECK-NEXT: movups %xmm1, 16(%rcx)
; CHECK-NEXT: movups %xmm0, (%rcx)
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: movl 4(%rdi), %ecx
; CHECK-NEXT: movq 8(%rdi), %rdx
; CHECK-NEXT: movups 16(%rdi), %xmm0
; CHECK-NEXT: movups %xmm0, 16(%rsi)
; CHECK-NEXT: movl %eax, (%rsi)
; CHECK-NEXT: movl %ecx, 4(%rsi)
; CHECK-NEXT: movq %rdx, 8(%rsi)
; CHECK-NEXT: retq
;
; DISABLED-LABEL: test_conditional_block_float:
; DISABLED: # %bb.0: # %entry
; DISABLED-NEXT: cmpl $18, %edx
; DISABLED-NEXT: jl .LBB11_2
; DISABLED-NEXT: # %bb.1: # %if.then
; DISABLED-NEXT: movl $1065353216, 4(%rdi) # imm = 0x3F800000
; DISABLED-NEXT: .LBB11_2: # %if.end
; DISABLED-NEXT: movups (%r8), %xmm0
; DISABLED-NEXT: movups 16(%r8), %xmm1
; DISABLED-NEXT: movups %xmm1, 16(%rcx)
; DISABLED-NEXT: movups %xmm0, (%rcx)
; DISABLED-NEXT: movups (%rdi), %xmm0
; DISABLED-NEXT: movups 16(%rdi), %xmm1
; DISABLED-NEXT: movups %xmm1, 16(%rsi)
; DISABLED-NEXT: movups %xmm0, (%rsi)
; DISABLED-NEXT: retq
;
; CHECK-AVX2-LABEL: test_conditional_block_float:
; CHECK-AVX2: # %bb.0: # %entry
; CHECK-AVX2-NEXT: cmpl $18, %edx
; CHECK-AVX2-NEXT: jl .LBB11_2
; CHECK-AVX2-NEXT: # %bb.1: # %if.then
; CHECK-AVX2-NEXT: movl $1065353216, 4(%rdi) # imm = 0x3F800000
; CHECK-AVX2-NEXT: .LBB11_2: # %if.end
; CHECK-AVX2-NEXT: vmovups (%r8), %ymm0
; CHECK-AVX2-NEXT: vmovups %ymm0, (%rcx)
; CHECK-AVX2-NEXT: movl (%rdi), %eax
; CHECK-AVX2-NEXT: movl %eax, (%rsi)
; CHECK-AVX2-NEXT: movl 4(%rdi), %eax
; CHECK-AVX2-NEXT: movl %eax, 4(%rsi)
; CHECK-AVX2-NEXT: vmovups 8(%rdi), %xmm0
; CHECK-AVX2-NEXT: vmovups %xmm0, 8(%rsi)
; CHECK-AVX2-NEXT: movq 24(%rdi), %rax
; CHECK-AVX2-NEXT: movq %rax, 24(%rsi)
; CHECK-AVX2-NEXT: vzeroupper
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX512-LABEL: test_conditional_block_float:
; CHECK-AVX512: # %bb.0: # %entry
; CHECK-AVX512-NEXT: cmpl $18, %edx
; CHECK-AVX512-NEXT: jl .LBB11_2
; CHECK-AVX512-NEXT: # %bb.1: # %if.then
; CHECK-AVX512-NEXT: movl $1065353216, 4(%rdi) # imm = 0x3F800000
; CHECK-AVX512-NEXT: .LBB11_2: # %if.end
; CHECK-AVX512-NEXT: vmovups (%r8), %ymm0
; CHECK-AVX512-NEXT: vmovups %ymm0, (%rcx)
; CHECK-AVX512-NEXT: movl (%rdi), %eax
; CHECK-AVX512-NEXT: movl %eax, (%rsi)
; CHECK-AVX512-NEXT: movl 4(%rdi), %eax
; CHECK-AVX512-NEXT: movl %eax, 4(%rsi)
; CHECK-AVX512-NEXT: vmovups 8(%rdi), %xmm0
; CHECK-AVX512-NEXT: vmovups %xmm0, 8(%rsi)
; CHECK-AVX512-NEXT: movq 24(%rdi), %rax
; CHECK-AVX512-NEXT: movq %rax, 24(%rsi)
; CHECK-AVX512-NEXT: vzeroupper
; CHECK-AVX512-NEXT: retq
entry:
%cmp = icmp sgt i32 %x, 17
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
%b = getelementptr inbounds %struct.S7, %struct.S7* %s1, i64 0, i32 1
store float 1.0, float* %b, align 4
br label %if.end
if.end: ; preds = %if.then, %entry
%0 = bitcast %struct.S7* %s3 to i8*
%1 = bitcast %struct.S7* %s4 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 32, i32 4, i1 false)
%2 = bitcast %struct.S7* %s2 to i8*
%3 = bitcast %struct.S7* %s1 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 32, i32 4, i1 false)
ret void
}
%struct.S8 = type { i64, i64, i64, i64, i64, i64 }
; Function Attrs: nounwind uwtable
define void @test_conditional_block_ymm(%struct.S8* nocapture noalias %s1, %struct.S8* nocapture %s2, i32 %x, %struct.S8* nocapture %s3, %struct.S8* nocapture readonly %s4) local_unnamed_addr #0 {
; CHECK-LABEL: test_conditional_block_ymm:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpl $18, %edx
; CHECK-NEXT: jl .LBB12_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: movq $1, 8(%rdi)
; CHECK-NEXT: .LBB12_2: # %if.end
; CHECK-NEXT: movups (%r8), %xmm0
; CHECK-NEXT: movups 16(%r8), %xmm1
; CHECK-NEXT: movups %xmm1, 16(%rcx)
; CHECK-NEXT: movups %xmm0, (%rcx)
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: movq 8(%rdi), %rcx
; CHECK-NEXT: movups 16(%rdi), %xmm0
; CHECK-NEXT: movups %xmm0, 16(%rsi)
; CHECK-NEXT: movq %rax, (%rsi)
; CHECK-NEXT: movq %rcx, 8(%rsi)
; CHECK-NEXT: retq
;
; DISABLED-LABEL: test_conditional_block_ymm:
; DISABLED: # %bb.0: # %entry
; DISABLED-NEXT: cmpl $18, %edx
; DISABLED-NEXT: jl .LBB12_2
; DISABLED-NEXT: # %bb.1: # %if.then
; DISABLED-NEXT: movq $1, 8(%rdi)
; DISABLED-NEXT: .LBB12_2: # %if.end
; DISABLED-NEXT: movups (%r8), %xmm0
; DISABLED-NEXT: movups 16(%r8), %xmm1
; DISABLED-NEXT: movups %xmm1, 16(%rcx)
; DISABLED-NEXT: movups %xmm0, (%rcx)
; DISABLED-NEXT: movups (%rdi), %xmm0
; DISABLED-NEXT: movups 16(%rdi), %xmm1
; DISABLED-NEXT: movups %xmm1, 16(%rsi)
; DISABLED-NEXT: movups %xmm0, (%rsi)
; DISABLED-NEXT: retq
;
; CHECK-AVX2-LABEL: test_conditional_block_ymm:
; CHECK-AVX2: # %bb.0: # %entry
; CHECK-AVX2-NEXT: cmpl $18, %edx
; CHECK-AVX2-NEXT: jl .LBB12_2
; CHECK-AVX2-NEXT: # %bb.1: # %if.then
; CHECK-AVX2-NEXT: movq $1, 8(%rdi)
; CHECK-AVX2-NEXT: .LBB12_2: # %if.end
; CHECK-AVX2-NEXT: vmovups (%r8), %ymm0
; CHECK-AVX2-NEXT: vmovups %ymm0, (%rcx)
; CHECK-AVX2-NEXT: movq (%rdi), %rax
; CHECK-AVX2-NEXT: movq %rax, (%rsi)
; CHECK-AVX2-NEXT: movq 8(%rdi), %rax
; CHECK-AVX2-NEXT: movq %rax, 8(%rsi)
; CHECK-AVX2-NEXT: vmovups 16(%rdi), %xmm0
; CHECK-AVX2-NEXT: vmovups %xmm0, 16(%rsi)
; CHECK-AVX2-NEXT: vzeroupper
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX512-LABEL: test_conditional_block_ymm:
; CHECK-AVX512: # %bb.0: # %entry
; CHECK-AVX512-NEXT: cmpl $18, %edx
; CHECK-AVX512-NEXT: jl .LBB12_2
; CHECK-AVX512-NEXT: # %bb.1: # %if.then
; CHECK-AVX512-NEXT: movq $1, 8(%rdi)
; CHECK-AVX512-NEXT: .LBB12_2: # %if.end
; CHECK-AVX512-NEXT: vmovups (%r8), %ymm0
; CHECK-AVX512-NEXT: vmovups %ymm0, (%rcx)
; CHECK-AVX512-NEXT: movq (%rdi), %rax
; CHECK-AVX512-NEXT: movq %rax, (%rsi)
; CHECK-AVX512-NEXT: movq 8(%rdi), %rax
; CHECK-AVX512-NEXT: movq %rax, 8(%rsi)
; CHECK-AVX512-NEXT: vmovups 16(%rdi), %xmm0
; CHECK-AVX512-NEXT: vmovups %xmm0, 16(%rsi)
; CHECK-AVX512-NEXT: vzeroupper
; CHECK-AVX512-NEXT: retq
entry:
%cmp = icmp sgt i32 %x, 17
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
%b = getelementptr inbounds %struct.S8, %struct.S8* %s1, i64 0, i32 1
store i64 1, i64* %b, align 4
br label %if.end
if.end: ; preds = %if.then, %entry
%0 = bitcast %struct.S8* %s3 to i8*
%1 = bitcast %struct.S8* %s4 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 32, i32 4, i1 false)
%2 = bitcast %struct.S8* %s2 to i8*
%3 = bitcast %struct.S8* %s1 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 32, i32 4, i1 false)
ret void
}
define dso_local void @test_alias(i8* nocapture %A, i32 %x) local_unnamed_addr #0 {
; CHECK-LABEL: test_alias:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl %esi, (%rdi)
; CHECK-NEXT: movups (%rdi), %xmm0
; CHECK-NEXT: movups %xmm0, 4(%rdi)
; CHECK-NEXT: retq
;
; DISABLED-LABEL: test_alias:
; DISABLED: # %bb.0: # %entry
; DISABLED-NEXT: movl %esi, (%rdi)
; DISABLED-NEXT: movups (%rdi), %xmm0
; DISABLED-NEXT: movups %xmm0, 4(%rdi)
; DISABLED-NEXT: retq
;
; CHECK-AVX2-LABEL: test_alias:
; CHECK-AVX2: # %bb.0: # %entry
; CHECK-AVX2-NEXT: movl %esi, (%rdi)
; CHECK-AVX2-NEXT: vmovups (%rdi), %xmm0
; CHECK-AVX2-NEXT: vmovups %xmm0, 4(%rdi)
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX512-LABEL: test_alias:
; CHECK-AVX512: # %bb.0: # %entry
; CHECK-AVX512-NEXT: movl %esi, (%rdi)
; CHECK-AVX512-NEXT: vmovups (%rdi), %xmm0
; CHECK-AVX512-NEXT: vmovups %xmm0, 4(%rdi)
; CHECK-AVX512-NEXT: retq
entry:
%a = bitcast i8* %A to i32*
store i32 %x, i32* %a, align 4
%add.ptr = getelementptr inbounds i8, i8* %A, i64 4
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr, i8* align 4 %A, i64 16, i32 4, i1 false)
ret void
}
; Function Attrs: nounwind uwtable
define dso_local void @test_noalias(i8* nocapture %A, i32 %x) local_unnamed_addr #0 {
; CHECK-LABEL: test_noalias:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl %esi, (%rdi)
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: movl %eax, 20(%rdi)
; CHECK-NEXT: movq 4(%rdi), %rax
; CHECK-NEXT: movq %rax, 24(%rdi)
; CHECK-NEXT: movl 12(%rdi), %eax
; CHECK-NEXT: movl %eax, 32(%rdi)
; CHECK-NEXT: retq
;
; DISABLED-LABEL: test_noalias:
; DISABLED: # %bb.0: # %entry
; DISABLED-NEXT: movl %esi, (%rdi)
; DISABLED-NEXT: movups (%rdi), %xmm0
; DISABLED-NEXT: movups %xmm0, 20(%rdi)
; DISABLED-NEXT: retq
;
; CHECK-AVX2-LABEL: test_noalias:
; CHECK-AVX2: # %bb.0: # %entry
; CHECK-AVX2-NEXT: movl %esi, (%rdi)
; CHECK-AVX2-NEXT: movl (%rdi), %eax
; CHECK-AVX2-NEXT: movl %eax, 20(%rdi)
; CHECK-AVX2-NEXT: movq 4(%rdi), %rax
; CHECK-AVX2-NEXT: movq %rax, 24(%rdi)
; CHECK-AVX2-NEXT: movl 12(%rdi), %eax
; CHECK-AVX2-NEXT: movl %eax, 32(%rdi)
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX512-LABEL: test_noalias:
; CHECK-AVX512: # %bb.0: # %entry
; CHECK-AVX512-NEXT: movl %esi, (%rdi)
; CHECK-AVX512-NEXT: movl (%rdi), %eax
; CHECK-AVX512-NEXT: movl %eax, 20(%rdi)
; CHECK-AVX512-NEXT: movq 4(%rdi), %rax
; CHECK-AVX512-NEXT: movq %rax, 24(%rdi)
; CHECK-AVX512-NEXT: movl 12(%rdi), %eax
; CHECK-AVX512-NEXT: movl %eax, 32(%rdi)
; CHECK-AVX512-NEXT: retq
entry:
%a = bitcast i8* %A to i32*
store i32 %x, i32* %a, align 4
%add.ptr = getelementptr inbounds i8, i8* %A, i64 20
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr, i8* align 4 %A, i64 16, i32 4, i1 false)
ret void
}
; Function Attrs: argmemonly nounwind
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #1
attributes #0 = { nounwind uwtable }