llvm-project/llvm/test/CodeGen/PowerPC/f128-fma.ll

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

560 lines
19 KiB
LLVM
Raw Normal View History

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s
; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \
; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s \
; RUN: -check-prefix=CHECK-P8
define void @qpFmadd(fp128* nocapture readonly %a, fp128* nocapture %b,
; CHECK-LABEL: qpFmadd:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxv v2, 0(r3)
; CHECK-NEXT: lxv v3, 0(r4)
; CHECK-NEXT: lxv v4, 0(r5)
; CHECK-NEXT: xsmaddqp v4, v2, v3
; CHECK-NEXT: stxv v4, 0(r6)
; CHECK-NEXT: blr
;
; CHECK-P8-LABEL: qpFmadd:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
; CHECK-P8-NEXT: .cfi_def_cfa_offset 64
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: .cfi_offset r28, -32
; CHECK-P8-NEXT: .cfi_offset r29, -24
; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: std r28, -32(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -64(r1)
; CHECK-P8-NEXT: ld r7, 0(r3)
; CHECK-P8-NEXT: ld r8, 8(r3)
; CHECK-P8-NEXT: ld r9, 0(r4)
; CHECK-P8-NEXT: ld r10, 8(r4)
; CHECK-P8-NEXT: mr r28, r6
; CHECK-P8-NEXT: ld r30, 0(r5)
; CHECK-P8-NEXT: ld r29, 8(r5)
; CHECK-P8-NEXT: mr r3, r7
; CHECK-P8-NEXT: mr r4, r8
; CHECK-P8-NEXT: mr r5, r9
; CHECK-P8-NEXT: mr r6, r10
; CHECK-P8-NEXT: bl __mulkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: mr r5, r30
; CHECK-P8-NEXT: mr r6, r29
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: std r3, 0(r28)
; CHECK-P8-NEXT: std r4, 8(r28)
; CHECK-P8-NEXT: addi r1, r1, 64
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: blr
fp128* nocapture readonly %c, fp128* nocapture %res) {
entry:
%0 = load fp128, fp128* %a, align 16
%1 = load fp128, fp128* %b, align 16
%2 = load fp128, fp128* %c, align 16
%madd = tail call fp128 @llvm.fmuladd.f128(fp128 %0, fp128 %1, fp128 %2)
store fp128 %madd, fp128* %res, align 16
ret void
}
declare fp128 @llvm.fmuladd.f128(fp128, fp128, fp128)
; Function Attrs: norecurse nounwind
define void @qpFmadd_02(fp128* nocapture readonly %a,
; CHECK-LABEL: qpFmadd_02:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxv v2, 0(r3)
; CHECK-NEXT: lxv v3, 0(r4)
; CHECK-NEXT: lxv v4, 0(r5)
; CHECK-NEXT: xsmaddqp v2, v3, v4
; CHECK-NEXT: stxv v2, 0(r6)
; CHECK-NEXT: blr
;
; CHECK-P8-LABEL: qpFmadd_02:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
; CHECK-P8-NEXT: .cfi_def_cfa_offset 64
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: .cfi_offset r28, -32
; CHECK-P8-NEXT: .cfi_offset r29, -24
; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: std r28, -32(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -64(r1)
; CHECK-P8-NEXT: ld r7, 0(r5)
; CHECK-P8-NEXT: ld r8, 8(r5)
; CHECK-P8-NEXT: ld r30, 0(r3)
; CHECK-P8-NEXT: ld r29, 8(r3)
; CHECK-P8-NEXT: mr r28, r6
; CHECK-P8-NEXT: ld r3, 0(r4)
; CHECK-P8-NEXT: ld r4, 8(r4)
; CHECK-P8-NEXT: mr r5, r7
; CHECK-P8-NEXT: mr r6, r8
; CHECK-P8-NEXT: bl __mulkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: mr r5, r3
; CHECK-P8-NEXT: mr r6, r4
; CHECK-P8-NEXT: mr r3, r30
; CHECK-P8-NEXT: mr r4, r29
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: std r3, 0(r28)
; CHECK-P8-NEXT: std r4, 8(r28)
; CHECK-P8-NEXT: addi r1, r1, 64
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: blr
fp128* nocapture readonly %b,
fp128* nocapture readonly %c, fp128* nocapture %res) {
entry:
%0 = load fp128, fp128* %a, align 16
%1 = load fp128, fp128* %b, align 16
%2 = load fp128, fp128* %c, align 16
%mul = fmul contract fp128 %1, %2
%add = fadd contract fp128 %0, %mul
store fp128 %add, fp128* %res, align 16
ret void
}
; Function Attrs: norecurse nounwind
define void @qpFmadd_03(fp128* nocapture readonly %a,
; CHECK-LABEL: qpFmadd_03:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxv v2, 0(r3)
; CHECK-NEXT: lxv v3, 0(r4)
; CHECK-NEXT: lxv v4, 0(r5)
; CHECK-NEXT: xsmaddqp v4, v2, v3
; CHECK-NEXT: stxv v4, 0(r6)
; CHECK-NEXT: blr
;
; CHECK-P8-LABEL: qpFmadd_03:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
; CHECK-P8-NEXT: .cfi_def_cfa_offset 64
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: .cfi_offset r29, -24
; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -64(r1)
; CHECK-P8-NEXT: ld r9, 0(r3)
; CHECK-P8-NEXT: ld r7, 8(r3)
; CHECK-P8-NEXT: ld r8, 0(r4)
; CHECK-P8-NEXT: mr r30, r6
; CHECK-P8-NEXT: ld r6, 8(r4)
; CHECK-P8-NEXT: mr r29, r5
; CHECK-P8-NEXT: mr r3, r9
; CHECK-P8-NEXT: mr r4, r7
; CHECK-P8-NEXT: mr r5, r8
; CHECK-P8-NEXT: bl __mulkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: ld r5, 0(r29)
; CHECK-P8-NEXT: ld r6, 8(r29)
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: std r3, 0(r30)
; CHECK-P8-NEXT: std r4, 8(r30)
; CHECK-P8-NEXT: addi r1, r1, 64
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: blr
fp128* nocapture readonly %b,
fp128* nocapture readonly %c, fp128* nocapture %res) {
entry:
%0 = load fp128, fp128* %a, align 16
%1 = load fp128, fp128* %b, align 16
%mul = fmul contract fp128 %0, %1
%2 = load fp128, fp128* %c, align 16
%add = fadd contract fp128 %mul, %2
store fp128 %add, fp128* %res, align 16
ret void
}
; Function Attrs: norecurse nounwind
define void @qpFnmadd(fp128* nocapture readonly %a,
; CHECK-LABEL: qpFnmadd:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxv v2, 0(r3)
; CHECK-NEXT: lxv v3, 0(r4)
; CHECK-NEXT: lxv v4, 0(r5)
; CHECK-NEXT: xsnmaddqp v2, v3, v4
; CHECK-NEXT: stxv v2, 0(r6)
; CHECK-NEXT: blr
;
; CHECK-P8-LABEL: qpFnmadd:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
; CHECK-P8-NEXT: .cfi_def_cfa_offset 64
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: .cfi_offset r28, -32
; CHECK-P8-NEXT: .cfi_offset r29, -24
; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: std r28, -32(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -64(r1)
; CHECK-P8-NEXT: ld r7, 0(r5)
; CHECK-P8-NEXT: ld r8, 8(r5)
; CHECK-P8-NEXT: ld r30, 0(r3)
; CHECK-P8-NEXT: ld r29, 8(r3)
; CHECK-P8-NEXT: mr r28, r6
; CHECK-P8-NEXT: ld r3, 0(r4)
; CHECK-P8-NEXT: ld r4, 8(r4)
; CHECK-P8-NEXT: mr r5, r7
; CHECK-P8-NEXT: mr r6, r8
; CHECK-P8-NEXT: bl __mulkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: mr r5, r3
; CHECK-P8-NEXT: mr r6, r4
; CHECK-P8-NEXT: mr r3, r30
; CHECK-P8-NEXT: mr r4, r29
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: li r5, 1
; CHECK-P8-NEXT: std r3, 0(r28)
; CHECK-P8-NEXT: rldic r5, r5, 63, 0
; CHECK-P8-NEXT: xor r4, r4, r5
; CHECK-P8-NEXT: std r4, 8(r28)
; CHECK-P8-NEXT: addi r1, r1, 64
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: blr
fp128* nocapture readonly %b,
fp128* nocapture readonly %c, fp128* nocapture %res) {
entry:
%0 = load fp128, fp128* %a, align 16
%1 = load fp128, fp128* %b, align 16
%2 = load fp128, fp128* %c, align 16
%mul = fmul contract fp128 %1, %2
%add = fadd contract fp128 %0, %mul
%sub = fsub fp128 0xL00000000000000008000000000000000, %add
store fp128 %sub, fp128* %res, align 16
ret void
}
; Function Attrs: norecurse nounwind
define void @qpFnmadd_02(fp128* nocapture readonly %a,
; CHECK-LABEL: qpFnmadd_02:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxv v2, 0(r3)
; CHECK-NEXT: lxv v3, 0(r4)
; CHECK-NEXT: lxv v4, 0(r5)
; CHECK-NEXT: xsnmaddqp v4, v2, v3
; CHECK-NEXT: stxv v4, 0(r6)
; CHECK-NEXT: blr
;
; CHECK-P8-LABEL: qpFnmadd_02:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
; CHECK-P8-NEXT: .cfi_def_cfa_offset 64
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: .cfi_offset r29, -24
; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -64(r1)
; CHECK-P8-NEXT: ld r9, 0(r3)
; CHECK-P8-NEXT: ld r7, 8(r3)
; CHECK-P8-NEXT: ld r8, 0(r4)
; CHECK-P8-NEXT: mr r30, r6
; CHECK-P8-NEXT: ld r6, 8(r4)
; CHECK-P8-NEXT: mr r29, r5
; CHECK-P8-NEXT: mr r3, r9
; CHECK-P8-NEXT: mr r4, r7
; CHECK-P8-NEXT: mr r5, r8
; CHECK-P8-NEXT: bl __mulkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: ld r5, 0(r29)
; CHECK-P8-NEXT: ld r6, 8(r29)
; CHECK-P8-NEXT: bl __addkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: li r5, 1
; CHECK-P8-NEXT: std r3, 0(r30)
; CHECK-P8-NEXT: rldic r5, r5, 63, 0
; CHECK-P8-NEXT: xor r4, r4, r5
; CHECK-P8-NEXT: std r4, 8(r30)
; CHECK-P8-NEXT: addi r1, r1, 64
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: blr
fp128* nocapture readonly %b,
fp128* nocapture readonly %c, fp128* nocapture %res) {
entry:
%0 = load fp128, fp128* %a, align 16
%1 = load fp128, fp128* %b, align 16
%mul = fmul contract fp128 %0, %1
%2 = load fp128, fp128* %c, align 16
%add = fadd contract fp128 %mul, %2
%sub = fsub fp128 0xL00000000000000008000000000000000, %add
store fp128 %sub, fp128* %res, align 16
ret void
}
; Function Attrs: norecurse nounwind
define void @qpFmsub(fp128* nocapture readonly %a,
; CHECK-LABEL: qpFmsub:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxv v2, 0(r3)
; CHECK-NEXT: lxv v3, 0(r4)
; CHECK-NEXT: lxv v4, 0(r5)
; CHECK-NEXT: xsnmsubqp v2, v3, v4
; CHECK-NEXT: stxv v2, 0(r6)
; CHECK-NEXT: blr
;
; CHECK-P8-LABEL: qpFmsub:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
; CHECK-P8-NEXT: .cfi_def_cfa_offset 64
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: .cfi_offset r28, -32
; CHECK-P8-NEXT: .cfi_offset r29, -24
; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: std r28, -32(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -64(r1)
; CHECK-P8-NEXT: ld r7, 0(r5)
; CHECK-P8-NEXT: ld r8, 8(r5)
; CHECK-P8-NEXT: ld r30, 0(r3)
; CHECK-P8-NEXT: ld r29, 8(r3)
; CHECK-P8-NEXT: mr r28, r6
; CHECK-P8-NEXT: ld r3, 0(r4)
; CHECK-P8-NEXT: ld r4, 8(r4)
; CHECK-P8-NEXT: mr r5, r7
; CHECK-P8-NEXT: mr r6, r8
; CHECK-P8-NEXT: bl __mulkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: mr r5, r3
; CHECK-P8-NEXT: mr r6, r4
; CHECK-P8-NEXT: mr r3, r30
; CHECK-P8-NEXT: mr r4, r29
; CHECK-P8-NEXT: bl __subkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: std r3, 0(r28)
; CHECK-P8-NEXT: std r4, 8(r28)
; CHECK-P8-NEXT: addi r1, r1, 64
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: blr
fp128* nocapture readonly %b,
fp128* nocapture readonly %c, fp128* nocapture %res) {
entry:
%0 = load fp128, fp128* %a, align 16
%1 = load fp128, fp128* %b, align 16
%2 = load fp128, fp128* %c, align 16
%mul = fmul contract fp128 %1, %2
%sub = fsub contract nsz fp128 %0, %mul
store fp128 %sub, fp128* %res, align 16
ret void
}
; Function Attrs: norecurse nounwind
define void @qpFmsub_02(fp128* nocapture readonly %a,
; CHECK-LABEL: qpFmsub_02:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxv v2, 0(r3)
; CHECK-NEXT: lxv v3, 0(r4)
; CHECK-NEXT: lxv v4, 0(r5)
; CHECK-NEXT: xsmsubqp v4, v2, v3
; CHECK-NEXT: stxv v4, 0(r6)
; CHECK-NEXT: blr
;
; CHECK-P8-LABEL: qpFmsub_02:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
; CHECK-P8-NEXT: .cfi_def_cfa_offset 64
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: .cfi_offset r29, -24
; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -64(r1)
; CHECK-P8-NEXT: ld r9, 0(r3)
; CHECK-P8-NEXT: ld r7, 8(r3)
; CHECK-P8-NEXT: ld r8, 0(r4)
; CHECK-P8-NEXT: mr r30, r6
; CHECK-P8-NEXT: ld r6, 8(r4)
; CHECK-P8-NEXT: mr r29, r5
; CHECK-P8-NEXT: mr r3, r9
; CHECK-P8-NEXT: mr r4, r7
; CHECK-P8-NEXT: mr r5, r8
; CHECK-P8-NEXT: bl __mulkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: ld r5, 0(r29)
; CHECK-P8-NEXT: ld r6, 8(r29)
; CHECK-P8-NEXT: bl __subkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: std r3, 0(r30)
; CHECK-P8-NEXT: std r4, 8(r30)
; CHECK-P8-NEXT: addi r1, r1, 64
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: blr
fp128* nocapture readonly %b,
fp128* nocapture readonly %c, fp128* nocapture %res) {
entry:
%0 = load fp128, fp128* %a, align 16
%1 = load fp128, fp128* %b, align 16
%mul = fmul contract fp128 %0, %1
%2 = load fp128, fp128* %c, align 16
%sub = fsub contract fp128 %mul, %2
store fp128 %sub, fp128* %res, align 16
ret void
}
; Function Attrs: norecurse nounwind
define void @qpFnmsub(fp128* nocapture readonly %a,
; CHECK-LABEL: qpFnmsub:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxv v3, 0(r4)
; CHECK-NEXT: lxv v2, 0(r3)
; CHECK-NEXT: lxv v4, 0(r5)
; CHECK-NEXT: xsnegqp v3, v3
; CHECK-NEXT: xsnmaddqp v2, v3, v4
; CHECK-NEXT: stxv v2, 0(r6)
; CHECK-NEXT: blr
;
; CHECK-P8-LABEL: qpFnmsub:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
; CHECK-P8-NEXT: .cfi_def_cfa_offset 64
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: .cfi_offset r28, -32
; CHECK-P8-NEXT: .cfi_offset r29, -24
; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: std r28, -32(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -64(r1)
; CHECK-P8-NEXT: ld r7, 0(r5)
; CHECK-P8-NEXT: ld r8, 8(r5)
; CHECK-P8-NEXT: ld r30, 0(r3)
; CHECK-P8-NEXT: ld r29, 8(r3)
; CHECK-P8-NEXT: mr r28, r6
; CHECK-P8-NEXT: ld r3, 0(r4)
; CHECK-P8-NEXT: ld r4, 8(r4)
; CHECK-P8-NEXT: mr r5, r7
; CHECK-P8-NEXT: mr r6, r8
; CHECK-P8-NEXT: bl __mulkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: mr r5, r3
; CHECK-P8-NEXT: mr r6, r4
; CHECK-P8-NEXT: mr r3, r30
; CHECK-P8-NEXT: mr r4, r29
; CHECK-P8-NEXT: bl __subkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: li r5, 1
; CHECK-P8-NEXT: std r3, 0(r28)
; CHECK-P8-NEXT: rldic r5, r5, 63, 0
; CHECK-P8-NEXT: xor r4, r4, r5
; CHECK-P8-NEXT: std r4, 8(r28)
; CHECK-P8-NEXT: addi r1, r1, 64
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: blr
fp128* nocapture readonly %b,
fp128* nocapture readonly %c, fp128* nocapture %res) {
entry:
%0 = load fp128, fp128* %a, align 16
%1 = load fp128, fp128* %b, align 16
%2 = load fp128, fp128* %c, align 16
%mul = fmul contract fp128 %1, %2
%sub = fsub contract fp128 %0, %mul
%sub1 = fsub fp128 0xL00000000000000008000000000000000, %sub
store fp128 %sub1, fp128* %res, align 16
ret void
}
; Function Attrs: norecurse nounwind
define void @qpFnmsub_02(fp128* nocapture readonly %a,
; CHECK-LABEL: qpFnmsub_02:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lxv v2, 0(r3)
; CHECK-NEXT: lxv v3, 0(r4)
; CHECK-NEXT: lxv v4, 0(r5)
; CHECK-NEXT: xsnmsubqp v4, v2, v3
; CHECK-NEXT: stxv v4, 0(r6)
; CHECK-NEXT: blr
;
; CHECK-P8-LABEL: qpFnmsub_02:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
; CHECK-P8-NEXT: .cfi_def_cfa_offset 64
; CHECK-P8-NEXT: .cfi_offset lr, 16
; CHECK-P8-NEXT: .cfi_offset r29, -24
; CHECK-P8-NEXT: .cfi_offset r30, -16
; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-P8-NEXT: std r0, 16(r1)
; CHECK-P8-NEXT: stdu r1, -64(r1)
; CHECK-P8-NEXT: ld r9, 0(r3)
; CHECK-P8-NEXT: ld r7, 8(r3)
; CHECK-P8-NEXT: ld r8, 0(r4)
; CHECK-P8-NEXT: mr r30, r6
; CHECK-P8-NEXT: ld r6, 8(r4)
; CHECK-P8-NEXT: mr r29, r5
; CHECK-P8-NEXT: mr r3, r9
; CHECK-P8-NEXT: mr r4, r7
; CHECK-P8-NEXT: mr r5, r8
; CHECK-P8-NEXT: bl __mulkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: ld r5, 0(r29)
; CHECK-P8-NEXT: ld r6, 8(r29)
; CHECK-P8-NEXT: bl __subkf3
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: li r5, 1
; CHECK-P8-NEXT: std r3, 0(r30)
; CHECK-P8-NEXT: rldic r5, r5, 63, 0
; CHECK-P8-NEXT: xor r4, r4, r5
; CHECK-P8-NEXT: std r4, 8(r30)
; CHECK-P8-NEXT: addi r1, r1, 64
; CHECK-P8-NEXT: ld r0, 16(r1)
; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: blr
fp128* nocapture readonly %b,
fp128* nocapture readonly %c, fp128* nocapture %res) {
entry:
%0 = load fp128, fp128* %a, align 16
%1 = load fp128, fp128* %b, align 16
%mul = fmul contract fp128 %0, %1
%2 = load fp128, fp128* %c, align 16
%sub = fsub contract fp128 %mul, %2
%sub1 = fsub fp128 0xL00000000000000008000000000000000, %sub
store fp128 %sub1, fp128* %res, align 16
ret void
}