[X86] Add a test case for r208252.
Prior to r208252, the FMA 231 family was marked as isCommutable. However the
memory variants of this family are not commutable. Therefore, we did not
implemented the findCommutedOpIndices for those variants and missed that
the default implementation (more or less: commute indices 1 and 2) was
firing behind our back.
As a result, as demonstrated in the test case before the fix, we were
transforming a = b * c + a into a = a * c + b.
I.e., before r208252 we were generating for this test case:
vmovaps %xmm0, %xmm1
vmoss (%rsi), %xmm0
vfmadd231ss (%rdi), %xmm1, %xmm0
Instead of:
vmoss (%rsi), %xmm1
vfmadd231ss (%rdi), %xmm1, %xmm0
<rdar://problem/16800495>
llvm-svn: 208260
2014-05-08 06:52:58 +08:00
|
|
|
; RUN: llc -fp-contract=fast -mattr=+fma -disable-cgp < %s -o - | FileCheck %s
|
|
|
|
; Check that the 2nd and 3rd arguments of fmaXXX231 reg1, reg2, mem3 are not commuted.
|
|
|
|
; <rdar://problem/16800495>
|
|
|
|
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
|
|
|
target triple = "x86_64-apple-macosx"
|
|
|
|
|
|
|
|
; CHECK-LABEL: test1:
|
|
|
|
; %arg lives in xmm0 and it shouldn't be redefined until it is used in the FMA.
|
2015-08-11 03:01:27 +08:00
|
|
|
; CHECK-NOT: {{.*}}, %xmm0
|
[X86] Add a test case for r208252.
Prior to r208252, the FMA 231 family was marked as isCommutable. However the
memory variants of this family are not commutable. Therefore, we did not
implemented the findCommutedOpIndices for those variants and missed that
the default implementation (more or less: commute indices 1 and 2) was
firing behind our back.
As a result, as demonstrated in the test case before the fix, we were
transforming a = b * c + a into a = a * c + b.
I.e., before r208252 we were generating for this test case:
vmovaps %xmm0, %xmm1
vmoss (%rsi), %xmm0
vfmadd231ss (%rdi), %xmm1, %xmm0
Instead of:
vmoss (%rsi), %xmm1
vfmadd231ss (%rdi), %xmm1, %xmm0
<rdar://problem/16800495>
llvm-svn: 208260
2014-05-08 06:52:58 +08:00
|
|
|
; %addr lives in rdi.
|
|
|
|
; %addr2 lives in rsi.
|
|
|
|
; CHECK: vmovss (%rsi), [[ADDR2:%xmm[0-9]+]]
|
|
|
|
; The assembly syntax is in the reverse order.
|
|
|
|
; CHECK: vfmadd231ss (%rdi), [[ADDR2]], %xmm0
|
|
|
|
define void @test1(float* %addr, float* %addr2, float %arg) {
|
|
|
|
entry:
|
|
|
|
br label %loop
|
|
|
|
|
|
|
|
loop:
|
|
|
|
%sum0 = phi float [ %fma, %loop ], [ %arg, %entry ]
|
2015-02-28 05:17:42 +08:00
|
|
|
%addrVal = load float, float* %addr, align 4
|
|
|
|
%addr2Val = load float, float* %addr2, align 4
|
[X86] Add a test case for r208252.
Prior to r208252, the FMA 231 family was marked as isCommutable. However the
memory variants of this family are not commutable. Therefore, we did not
implemented the findCommutedOpIndices for those variants and missed that
the default implementation (more or less: commute indices 1 and 2) was
firing behind our back.
As a result, as demonstrated in the test case before the fix, we were
transforming a = b * c + a into a = a * c + b.
I.e., before r208252 we were generating for this test case:
vmovaps %xmm0, %xmm1
vmoss (%rsi), %xmm0
vfmadd231ss (%rdi), %xmm1, %xmm0
Instead of:
vmoss (%rsi), %xmm1
vfmadd231ss (%rdi), %xmm1, %xmm0
<rdar://problem/16800495>
llvm-svn: 208260
2014-05-08 06:52:58 +08:00
|
|
|
%fmul = fmul float %addrVal, %addr2Val
|
|
|
|
%fma = fadd float %sum0, %fmul
|
|
|
|
br i1 true, label %exit, label %loop
|
|
|
|
|
|
|
|
exit:
|
|
|
|
store float %fma, float* %addr, align 4
|
|
|
|
ret void
|
|
|
|
}
|