llvm-project/llvm/test/CodeGen/X86/chain_order.ll

; RUN: llc < %s -mcpu=corei7-avx -mtriple=x86_64-linux | FileCheck %s

; CHECK-LABEL: cftx020:
; CHECK: vmovsd  (%rdi), %xmm{{.*}}
; CHECK-NEXT: vmovsd  16(%rdi), %xmm{{.*}}
; CHECK-NEXT: vmovhpd  24(%rdi), %xmm{{.*}}
; CHECK-NEXT: vmovhpd  8(%rdi), %xmm{{.*}}
; CHECK: vmovupd %xmm{{.*}}, (%rdi)
; CHECK-NEXT: vmovupd %xmm{{.*}}, 16(%rdi)
; CHECK: ret

; A test from pifft (after SLP-vectorization) that fails when we drop the chain on newly merged loads.
define void @cftx020(double* nocapture %a) {
entry:
  %0 = load double* %a, align 8
  %arrayidx1 = getelementptr inbounds double* %a, i64 2
  %1 = load double* %arrayidx1, align 8
  %arrayidx2 = getelementptr inbounds double* %a, i64 1
  %2 = load double* %arrayidx2, align 8
  %arrayidx3 = getelementptr inbounds double* %a, i64 3
  %3 = load double* %arrayidx3, align 8
  %4 = insertelement <2 x double> undef, double %0, i32 0
  %5 = insertelement <2 x double> %4, double %3, i32 1
  %6 = insertelement <2 x double> undef, double %1, i32 0
  %7 = insertelement <2 x double> %6, double %2, i32 1
  %8 = fadd <2 x double> %5, %7
  %9 = bitcast double* %a to <2 x double>*
  store <2 x double> %8, <2 x double>* %9, align 8
  %10 = insertelement <2 x double> undef, double %0, i32 0
  %11 = insertelement <2 x double> %10, double %2, i32 1
  %12 = insertelement <2 x double> undef, double %1, i32 0
  %13 = insertelement <2 x double> %12, double %3, i32 1
  %14 = fsub <2 x double> %11, %13
  %15 = bitcast double* %arrayidx1 to <2 x double>*
  store <2 x double> %14, <2 x double>* %15, align 8
  ret void
}
X86: Fix a bug in EltsFromConsecutiveLoads. We can't generate new loads without chains. llvm-svn: 182507 2013-05-23 03:28:41 +08:00			`; RUN: llc < %s -mcpu=corei7-avx -mtriple=x86_64-linux \| FileCheck %s`

merge consecutive loads that are offset from a base address SelectionDAG::isConsecutiveLoad() was not detecting consecutive loads when the first load was offset from a base address. This patch recognizes that pattern and subtracts the offset before comparing the second load to see if it is consecutive. The codegen change in the new test case improves from: vmovsd 32(%rdi), %xmm0 vmovsd 48(%rdi), %xmm1 vmovhpd 56(%rdi), %xmm1, %xmm1 vmovhpd 40(%rdi), %xmm0, %xmm0 vinsertf128 $1, %xmm1, %ymm0, %ymm0 To: vmovups 32(%rdi), %ymm0 An existing test case is also improved from: vmovsd (%rdi), %xmm0 vmovsd 16(%rdi), %xmm1 vmovsd 24(%rdi), %xmm2 vunpcklpd %xmm2, %xmm0, %xmm0 ## xmm0 = xmm0[0],xmm2[0] vmovhpd 8(%rdi), %xmm1, %xmm3 To: vmovsd (%rdi), %xmm0 vmovsd 16(%rdi), %xmm1 vmovhpd 24(%rdi), %xmm0, %xmm0 vmovhpd 8(%rdi), %xmm1, %xmm1 This patch fixes PR21771 ( http://llvm.org/bugs/show_bug.cgi?id=21771 ). Differential Revision: http://reviews.llvm.org/D6642 llvm-svn: 224379 2014-12-17 05:57:18 +08:00			`; CHECK-LABEL: cftx020:`
			`; CHECK: vmovsd (%rdi), %xmm{{.*}}`
			`; CHECK-NEXT: vmovsd 16(%rdi), %xmm{{.*}}`
			`; CHECK-NEXT: vmovhpd 24(%rdi), %xmm{{.*}}`
			`; CHECK-NEXT: vmovhpd 8(%rdi), %xmm{{.*}}`
			`; CHECK: vmovupd %xmm{{.*}}, (%rdi)`
			`; CHECK-NEXT: vmovupd %xmm{{.*}}, 16(%rdi)`
			`; CHECK: ret`
X86: Fix a bug in EltsFromConsecutiveLoads. We can't generate new loads without chains. llvm-svn: 182507 2013-05-23 03:28:41 +08:00
			`; A test from pifft (after SLP-vectorization) that fails when we drop the chain on newly merged loads.`
			`define void @cftx020(double* nocapture %a) {`
			`entry:`
			`%0 = load double* %a, align 8`
			`%arrayidx1 = getelementptr inbounds double* %a, i64 2`
			`%1 = load double* %arrayidx1, align 8`
			`%arrayidx2 = getelementptr inbounds double* %a, i64 1`
			`%2 = load double* %arrayidx2, align 8`
			`%arrayidx3 = getelementptr inbounds double* %a, i64 3`
			`%3 = load double* %arrayidx3, align 8`
			`%4 = insertelement <2 x double> undef, double %0, i32 0`
			`%5 = insertelement <2 x double> %4, double %3, i32 1`
			`%6 = insertelement <2 x double> undef, double %1, i32 0`
			`%7 = insertelement <2 x double> %6, double %2, i32 1`
			`%8 = fadd <2 x double> %5, %7`
			`%9 = bitcast double* %a to <2 x double>*`
			`store <2 x double> %8, <2 x double>* %9, align 8`
			`%10 = insertelement <2 x double> undef, double %0, i32 0`
			`%11 = insertelement <2 x double> %10, double %2, i32 1`
			`%12 = insertelement <2 x double> undef, double %1, i32 0`
			`%13 = insertelement <2 x double> %12, double %3, i32 1`
			`%14 = fsub <2 x double> %11, %13`
			`%15 = bitcast double* %arrayidx1 to <2 x double>*`
			`store <2 x double> %14, <2 x double>* %15, align 8`
			`ret void`
			`}`