2013-05-23 03:28:41 +08:00
|
|
|
; RUN: llc < %s -mcpu=corei7-avx -mtriple=x86_64-linux | FileCheck %s
|
|
|
|
|
merge consecutive loads that are offset from a base address
SelectionDAG::isConsecutiveLoad() was not detecting consecutive loads
when the first load was offset from a base address.
This patch recognizes that pattern and subtracts the offset before comparing
the second load to see if it is consecutive.
The codegen change in the new test case improves from:
vmovsd 32(%rdi), %xmm0
vmovsd 48(%rdi), %xmm1
vmovhpd 56(%rdi), %xmm1, %xmm1
vmovhpd 40(%rdi), %xmm0, %xmm0
vinsertf128 $1, %xmm1, %ymm0, %ymm0
To:
vmovups 32(%rdi), %ymm0
An existing test case is also improved from:
vmovsd (%rdi), %xmm0
vmovsd 16(%rdi), %xmm1
vmovsd 24(%rdi), %xmm2
vunpcklpd %xmm2, %xmm0, %xmm0 ## xmm0 = xmm0[0],xmm2[0]
vmovhpd 8(%rdi), %xmm1, %xmm3
To:
vmovsd (%rdi), %xmm0
vmovsd 16(%rdi), %xmm1
vmovhpd 24(%rdi), %xmm0, %xmm0
vmovhpd 8(%rdi), %xmm1, %xmm1
This patch fixes PR21771 ( http://llvm.org/bugs/show_bug.cgi?id=21771 ).
Differential Revision: http://reviews.llvm.org/D6642
llvm-svn: 224379
2014-12-17 05:57:18 +08:00
|
|
|
; CHECK-LABEL: cftx020:
|
|
|
|
; CHECK: vmovsd (%rdi), %xmm{{.*}}
|
|
|
|
; CHECK-NEXT: vmovsd 16(%rdi), %xmm{{.*}}
|
|
|
|
; CHECK-NEXT: vmovhpd 24(%rdi), %xmm{{.*}}
|
|
|
|
; CHECK-NEXT: vmovhpd 8(%rdi), %xmm{{.*}}
|
|
|
|
; CHECK: vmovupd %xmm{{.*}}, (%rdi)
|
|
|
|
; CHECK-NEXT: vmovupd %xmm{{.*}}, 16(%rdi)
|
|
|
|
; CHECK: ret
|
2013-05-23 03:28:41 +08:00
|
|
|
|
|
|
|
; A test from pifft (after SLP-vectorization) that fails when we drop the chain on newly merged loads.
|
|
|
|
define void @cftx020(double* nocapture %a) {
|
|
|
|
entry:
|
|
|
|
%0 = load double* %a, align 8
|
|
|
|
%arrayidx1 = getelementptr inbounds double* %a, i64 2
|
|
|
|
%1 = load double* %arrayidx1, align 8
|
|
|
|
%arrayidx2 = getelementptr inbounds double* %a, i64 1
|
|
|
|
%2 = load double* %arrayidx2, align 8
|
|
|
|
%arrayidx3 = getelementptr inbounds double* %a, i64 3
|
|
|
|
%3 = load double* %arrayidx3, align 8
|
|
|
|
%4 = insertelement <2 x double> undef, double %0, i32 0
|
|
|
|
%5 = insertelement <2 x double> %4, double %3, i32 1
|
|
|
|
%6 = insertelement <2 x double> undef, double %1, i32 0
|
|
|
|
%7 = insertelement <2 x double> %6, double %2, i32 1
|
|
|
|
%8 = fadd <2 x double> %5, %7
|
|
|
|
%9 = bitcast double* %a to <2 x double>*
|
|
|
|
store <2 x double> %8, <2 x double>* %9, align 8
|
|
|
|
%10 = insertelement <2 x double> undef, double %0, i32 0
|
|
|
|
%11 = insertelement <2 x double> %10, double %2, i32 1
|
|
|
|
%12 = insertelement <2 x double> undef, double %1, i32 0
|
|
|
|
%13 = insertelement <2 x double> %12, double %3, i32 1
|
|
|
|
%14 = fsub <2 x double> %11, %13
|
|
|
|
%15 = bitcast double* %arrayidx1 to <2 x double>*
|
|
|
|
store <2 x double> %14, <2 x double>* %15, align 8
|
|
|
|
ret void
|
|
|
|
}
|