llvm-project/llvm/test/CodeGen/X86/pr21792.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-linux -mcpu=corei7 | FileCheck %s

; This fixes a missing cases in the MI scheduler's constrainLocalCopy exposed by
; PR21792

@stuff = external dso_local constant [256 x double], align 16

define void @func(<4 x float> %vx) {
; CHECK-LABEL: func:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    pushq %rax
; CHECK-NEXT:    .cfi_def_cfa_offset 16
; CHECK-NEXT:    pand {{.*}}(%rip), %xmm0
; CHECK-NEXT:    movd %xmm0, %r8d
; CHECK-NEXT:    leaq stuff(%r8), %rdi
; CHECK-NEXT:    pextrd $1, %xmm0, %eax
; CHECK-NEXT:    leaq stuff(%rax), %rsi
; CHECK-NEXT:    pextrd $2, %xmm0, %edx
; CHECK-NEXT:    pextrd $3, %xmm0, %ecx
; CHECK-NEXT:    leaq stuff(%rdx), %rdx
; CHECK-NEXT:    leaq stuff(%rcx), %rcx
; CHECK-NEXT:    leaq stuff+8(%r8), %r8
; CHECK-NEXT:    leaq stuff+8(%rax), %r9
; CHECK-NEXT:    callq toto
; CHECK-NEXT:    popq %rax
; CHECK-NEXT:    .cfi_def_cfa_offset 8
; CHECK-NEXT:    retq
entry:
  %tmp2 = bitcast <4 x float> %vx to <2 x i64>
  %and.i = and <2 x i64> %tmp2, <i64 8727373547504, i64 8727373547504>
  %tmp3 = bitcast <2 x i64> %and.i to <4 x i32>
  %index.sroa.0.0.vec.extract = extractelement <4 x i32> %tmp3, i32 0
  %idx.ext = sext i32 %index.sroa.0.0.vec.extract to i64
  %add.ptr = getelementptr inbounds i8, i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext
  %tmp4 = bitcast i8* %add.ptr to double*
  %index.sroa.0.4.vec.extract = extractelement <4 x i32> %tmp3, i32 1
  %idx.ext5 = sext i32 %index.sroa.0.4.vec.extract to i64
  %add.ptr6 = getelementptr inbounds i8, i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext5
  %tmp5 = bitcast i8* %add.ptr6 to double*
  %index.sroa.0.8.vec.extract = extractelement <4 x i32> %tmp3, i32 2
  %idx.ext14 = sext i32 %index.sroa.0.8.vec.extract to i64
  %add.ptr15 = getelementptr inbounds i8, i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext14
  %tmp6 = bitcast i8* %add.ptr15 to double*
  %index.sroa.0.12.vec.extract = extractelement <4 x i32> %tmp3, i32 3
  %idx.ext19 = sext i32 %index.sroa.0.12.vec.extract to i64
  %add.ptr20 = getelementptr inbounds i8, i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext19
  %tmp7 = bitcast i8* %add.ptr20 to double*
  %add.ptr46 = getelementptr inbounds i8, i8* bitcast (double* getelementptr inbounds ([256 x double], [256 x double]* @stuff, i64 0, i64 1) to i8*), i64 %idx.ext
  %tmp16 = bitcast i8* %add.ptr46 to double*
  %add.ptr51 = getelementptr inbounds i8, i8* bitcast (double* getelementptr inbounds ([256 x double], [256 x double]* @stuff, i64 0, i64 1) to i8*), i64 %idx.ext5
  %tmp17 = bitcast i8* %add.ptr51 to double*
  call void @toto(double* %tmp4, double* %tmp5, double* %tmp6, double* %tmp7, double* %tmp16, double* %tmp17)
  ret void
}

declare void @toto(double*, double*, double*, double*, double*, double*)
[X86] Regenerate test llvm-svn: 289279 2016-12-10 05:53:12 +08:00			`; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py`
			`; RUN: llc < %s -mtriple=x86_64-linux -mcpu=corei7 \| FileCheck %s`

Fix several accidental DOS line endings in source files Summary: There are a number of files in the tree which have been accidentally checked in with DOS line endings. Convert these to native line endings. There are also a few files which have DOS line endings on purpose, and I have set the svn:eol-style property to 'CRLF' on those. Reviewers: joerg, aaron.ballman Subscribers: aaron.ballman, sanjoy, dsanders, llvm-commits Differential Revision: http://reviews.llvm.org/D15848 llvm-svn: 256707 2016-01-04 01:22:03 +08:00			`; This fixes a missing cases in the MI scheduler's constrainLocalCopy exposed by`
			`; PR21792`

[test] Add explicit dso_local to constant/global variable declarations They are currently implicit because TargetMachine::shouldAssumeDSOLocal implies dso_local. For external data, clang -fno-pic emits the dso_local specifier for ELF and non-MinGW COFF. Adding explicit dso_local makes these tests in align with the clang behavior and helps implementing an option to use GOT indirection for external data access in -fno-pic mode (to avoid copy relocations). 2020-12-05 05:51:01 +08:00			`@stuff = external dso_local constant [256 x double], align 16`
Fix several accidental DOS line endings in source files Summary: There are a number of files in the tree which have been accidentally checked in with DOS line endings. Convert these to native line endings. There are also a few files which have DOS line endings on purpose, and I have set the svn:eol-style property to 'CRLF' on those. Reviewers: joerg, aaron.ballman Subscribers: aaron.ballman, sanjoy, dsanders, llvm-commits Differential Revision: http://reviews.llvm.org/D15848 llvm-svn: 256707 2016-01-04 01:22:03 +08:00
			`define void @func(<4 x float> %vx) {`
[X86] Regenerate test llvm-svn: 289279 2016-12-10 05:53:12 +08:00			`; CHECK-LABEL: func:`
[CodeGen] Unify MBB reference format in both MIR and debug output As part of the unification of the debug format and the MIR format, print MBB references as '%bb.5'. The MIR printer prints the IR name of a MBB only for block definitions. * find . \( -name ".mir" -o -name ".cpp" -o -name ".h" -o -name ".ll" \) -type f -print0 \| xargs -0 sed -i '' -E 's/BB#" << ([a-zA-Z0-9_]+)->getNumber\(\)/" << printMBBReference(\1)/g' find . \( -name ".mir" -o -name ".cpp" -o -name ".h" -o -name ".ll" \) -type f -print0 \| xargs -0 sed -i '' -E 's/BB#" << ([a-zA-Z0-9_]+)\.getNumber\(\)/" << printMBBReference(\1)/g' * find . \( -name ".txt" -o -name ".s" -o -name ".mir" -o -name ".cpp" -o -name ".h" -o -name ".ll" \) -type f -print0 \| xargs -0 sed -i '' -E 's/BB#([0-9]+)/%bb.\1/g' * grep -nr 'BB#' and fix Differential Revision: https://reviews.llvm.org/D40422 llvm-svn: 319665 2017-12-05 01:18:51 +08:00			`; CHECK: # %bb.0: # %entry`
[X86] Regenerate test llvm-svn: 289279 2016-12-10 05:53:12 +08:00			`; CHECK-NEXT: pushq %rax`
			`; CHECK-NEXT: .cfi_def_cfa_offset 16`
			`; CHECK-NEXT: pand {{.*}}(%rip), %xmm0`
[X86][SSE] Don't colaesce v4i32 extracts We currently coalesce v4i32 extracts from all 4 elements to 2 v2i64 extracts + shifts/sign-extends. This seems to have been added back in the days when we tended to spill vectors and reload scalars, or ended up with repeated shuffles moving everything down to 0'th index. I don't think either of these are likely these days as we have better EXTRACT_VECTOR_ELT and VECTOR_SHUFFLE handling, and the existing code tends to make it very difficult for various vector and load combines. Differential Revision: https://reviews.llvm.org/D42308 llvm-svn: 323541 2018-01-27 01:11:34 +08:00			`; CHECK-NEXT: movd %xmm0, %r8d`
[X86] Add an override of targetShrinkDemandedConstant to limit the damage that shrinkdemandedbits can do to zext_in_reg operations Summary: This patch adds an implementation of targetShrinkDemandedConstant that tries to keep shrinkdemandedbits from removing bits that would otherwise have been recognized as a movzx. We still need a follow patch to stop moving ands across srl if the and could be represented as a movzx before the shift but not after. I think this should help with some of the cases that D42088 ended up removing during isel. Reviewers: spatel, RKSimon Reviewed By: spatel Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D42265 llvm-svn: 323048 2018-01-21 02:50:09 +08:00			`; CHECK-NEXT: leaq stuff(%r8), %rdi`
[X86][SSE] Don't colaesce v4i32 extracts We currently coalesce v4i32 extracts from all 4 elements to 2 v2i64 extracts + shifts/sign-extends. This seems to have been added back in the days when we tended to spill vectors and reload scalars, or ended up with repeated shuffles moving everything down to 0'th index. I don't think either of these are likely these days as we have better EXTRACT_VECTOR_ELT and VECTOR_SHUFFLE handling, and the existing code tends to make it very difficult for various vector and load combines. Differential Revision: https://reviews.llvm.org/D42308 llvm-svn: 323541 2018-01-27 01:11:34 +08:00			`; CHECK-NEXT: pextrd $1, %xmm0, %eax`
			`; CHECK-NEXT: leaq stuff(%rax), %rsi`
			`; CHECK-NEXT: pextrd $2, %xmm0, %edx`
			`; CHECK-NEXT: pextrd $3, %xmm0, %ecx`
			`; CHECK-NEXT: leaq stuff(%rdx), %rdx`
			`; CHECK-NEXT: leaq stuff(%rcx), %rcx`
[X86] Add an override of targetShrinkDemandedConstant to limit the damage that shrinkdemandedbits can do to zext_in_reg operations Summary: This patch adds an implementation of targetShrinkDemandedConstant that tries to keep shrinkdemandedbits from removing bits that would otherwise have been recognized as a movzx. We still need a follow patch to stop moving ands across srl if the and could be represented as a movzx before the shift but not after. I think this should help with some of the cases that D42088 ended up removing during isel. Reviewers: spatel, RKSimon Reviewed By: spatel Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D42265 llvm-svn: 323048 2018-01-21 02:50:09 +08:00			`; CHECK-NEXT: leaq stuff+8(%r8), %r8`
[X86][SSE] Don't colaesce v4i32 extracts We currently coalesce v4i32 extracts from all 4 elements to 2 v2i64 extracts + shifts/sign-extends. This seems to have been added back in the days when we tended to spill vectors and reload scalars, or ended up with repeated shuffles moving everything down to 0'th index. I don't think either of these are likely these days as we have better EXTRACT_VECTOR_ELT and VECTOR_SHUFFLE handling, and the existing code tends to make it very difficult for various vector and load combines. Differential Revision: https://reviews.llvm.org/D42308 llvm-svn: 323541 2018-01-27 01:11:34 +08:00			`; CHECK-NEXT: leaq stuff+8(%rax), %r9`
[X86] Regenerate test llvm-svn: 289279 2016-12-10 05:53:12 +08:00			`; CHECK-NEXT: callq toto`
			`; CHECK-NEXT: popq %rax`
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706 2018-04-24 18:32:08 +08:00			`; CHECK-NEXT: .cfi_def_cfa_offset 8`
[X86] Regenerate test llvm-svn: 289279 2016-12-10 05:53:12 +08:00			`; CHECK-NEXT: retq`
Fix several accidental DOS line endings in source files Summary: There are a number of files in the tree which have been accidentally checked in with DOS line endings. Convert these to native line endings. There are also a few files which have DOS line endings on purpose, and I have set the svn:eol-style property to 'CRLF' on those. Reviewers: joerg, aaron.ballman Subscribers: aaron.ballman, sanjoy, dsanders, llvm-commits Differential Revision: http://reviews.llvm.org/D15848 llvm-svn: 256707 2016-01-04 01:22:03 +08:00			`entry:`
			`%tmp2 = bitcast <4 x float> %vx to <2 x i64>`
			`%and.i = and <2 x i64> %tmp2, <i64 8727373547504, i64 8727373547504>`
			`%tmp3 = bitcast <2 x i64> %and.i to <4 x i32>`
			`%index.sroa.0.0.vec.extract = extractelement <4 x i32> %tmp3, i32 0`
			`%idx.ext = sext i32 %index.sroa.0.0.vec.extract to i64`
			`%add.ptr = getelementptr inbounds i8, i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext`
			`%tmp4 = bitcast i8* %add.ptr to double*`
			`%index.sroa.0.4.vec.extract = extractelement <4 x i32> %tmp3, i32 1`
			`%idx.ext5 = sext i32 %index.sroa.0.4.vec.extract to i64`
			`%add.ptr6 = getelementptr inbounds i8, i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext5`
			`%tmp5 = bitcast i8* %add.ptr6 to double*`
			`%index.sroa.0.8.vec.extract = extractelement <4 x i32> %tmp3, i32 2`
			`%idx.ext14 = sext i32 %index.sroa.0.8.vec.extract to i64`
			`%add.ptr15 = getelementptr inbounds i8, i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext14`
			`%tmp6 = bitcast i8* %add.ptr15 to double*`
			`%index.sroa.0.12.vec.extract = extractelement <4 x i32> %tmp3, i32 3`
			`%idx.ext19 = sext i32 %index.sroa.0.12.vec.extract to i64`
			`%add.ptr20 = getelementptr inbounds i8, i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext19`
			`%tmp7 = bitcast i8* %add.ptr20 to double*`
			`%add.ptr46 = getelementptr inbounds i8, i8* bitcast (double* getelementptr inbounds ([256 x double], [256 x double]* @stuff, i64 0, i64 1) to i8*), i64 %idx.ext`
			`%tmp16 = bitcast i8* %add.ptr46 to double*`
			`%add.ptr51 = getelementptr inbounds i8, i8* bitcast (double* getelementptr inbounds ([256 x double], [256 x double]* @stuff, i64 0, i64 1) to i8*), i64 %idx.ext5`
			`%tmp17 = bitcast i8* %add.ptr51 to double*`
			`call void @toto(double* %tmp4, double* %tmp5, double* %tmp6, double* %tmp7, double* %tmp16, double* %tmp17)`
			`ret void`
			`}`

			`declare void @toto(double, double, double, double, double, double)`