From 1a493b0fa556a07c728862c3c3f70bfd8683bef0 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Fri, 22 May 2020 07:48:09 -0500 Subject: [PATCH] [PowerPC] Add missing handling for half precision The fix for PR39865 took care of some of the handling for half precision but it missed a number of issues that still exist. This patch fixes the remaining issues that cause crashes in the PPC back end. Fixes: https://bugs.llvm.org/show_bug.cgi?id=45776 Differential revision: https://reviews.llvm.org/D79283 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 6 + llvm/lib/Target/PowerPC/PPCISelLowering.h | 2 +- llvm/lib/Target/PowerPC/PPCInstrVSX.td | 7 + .../PowerPC/handle-f16-storage-type.ll | 1081 +++++++++++++++++ .../CodeGen/PowerPC/scalar_vector_test_2.ll | 88 +- .../vector-constrained-fp-intrinsics.ll | 146 +-- 6 files changed, 1151 insertions(+), 179 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 7f6fc73dee75..8b1ebba596a0 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -10755,6 +10755,7 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::FP_EXTEND && "Should only be called for ISD::FP_EXTEND"); + // FIXME: handle extends from half precision float vectors on P9. // We only want to custom lower an extend from v2f32 to v2f64. if (Op.getValueType() != MVT::v2f64 || Op.getOperand(0).getValueType() != MVT::v2f32) @@ -10968,6 +10969,11 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::BITCAST: // Don't handle bitcast here. return; + case ISD::FP_EXTEND: + SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG); + if (Lowered) + Results.push_back(Lowered); + return; } } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 41898eef3713..c34fd6aa78be 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -640,7 +640,7 @@ namespace llvm { /// then the VPERM for the shuffle. All in all a very slow sequence. TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override { - if (VT.getScalarSizeInBits() % 8 == 0) + if (VT.getVectorNumElements() != 1 && VT.getScalarSizeInBits() % 8 == 0) return TypeWidenVector; return TargetLoweringBase::getPreferredVectorAction(VT); } diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 93fbdd69ea3f..acb8e3563ffb 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -3658,6 +3658,13 @@ def : Pat<(f32 (extloadf16 xoaddr:$src)), (f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX xoaddr:$src)), VSSRC))>; def : Pat<(truncstoref16 f32:$src, xoaddr:$dst), (STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), xoaddr:$dst)>; +def : Pat<(f64 (f16_to_fp i32:$A)), + (f64 (XSCVHPDP (MTVSRWZ $A)))>; +def : Pat<(f32 (f16_to_fp i32:$A)), + (f32 (COPY_TO_REGCLASS (XSCVHPDP (MTVSRWZ $A)), VSSRC))>; +def : Pat<(i32 (fp_to_f16 f32:$A)), + (i32 (MFVSRWZ (XSCVDPHP (COPY_TO_REGCLASS $A, VSFRC))))>; +def : Pat<(i32 (fp_to_f16 f64:$A)), (i32 (MFVSRWZ (XSCVDPHP $A)))>; // Vector sign extensions def : Pat<(f64 (PPCVexts f64:$A, 1)), diff --git a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll index f7ce464c5b61..c5968758cc4a 100644 --- a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll +++ b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll @@ -8,6 +8,8 @@ ; RUN: -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \ ; RUN: --check-prefix=SOFT +; Tests for various operations on half precison float. Much of the test is +; copied from test/CodeGen/X86/half.ll. define dso_local double @loadd(i16* nocapture readonly %a) local_unnamed_addr #0 { ; P8-LABEL: loadd: ; P8: # %bb.0: # %entry @@ -197,4 +199,1083 @@ entry: } declare i16 @llvm.convert.to.fp16.f32(float) +define void @test_load_store(half* %in, half* %out) #0 { +; P8-LABEL: test_load_store: +; P8: # %bb.0: +; P8-NEXT: lhz r3, 0(r3) +; P8-NEXT: sth r3, 0(r4) +; P8-NEXT: blr +; +; CHECK-LABEL: test_load_store: +; CHECK: # %bb.0: +; CHECK-NEXT: lhz r3, 0(r3) +; CHECK-NEXT: sth r3, 0(r4) +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_load_store: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r0, 16(r1) +; SOFT-NEXT: stdu r1, -48(r1) +; SOFT-NEXT: lhz r3, 0(r3) +; SOFT-NEXT: mr r30, r4 +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: bl __gnu_f2h_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 0(r30) +; SOFT-NEXT: addi r1, r1, 48 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr + %val = load half, half* %in + store half %val, half* %out + ret void +} +define i16 @test_bitcast_from_half(half* %addr) #0 { +; P8-LABEL: test_bitcast_from_half: +; P8: # %bb.0: +; P8-NEXT: lhz r3, 0(r3) +; P8-NEXT: blr +; +; CHECK-LABEL: test_bitcast_from_half: +; CHECK: # %bb.0: +; CHECK-NEXT: lhz r3, 0(r3) +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_bitcast_from_half: +; SOFT: # %bb.0: +; SOFT-NEXT: lhz r3, 0(r3) +; SOFT-NEXT: blr + %val = load half, half* %addr + %val_int = bitcast half %val to i16 + ret i16 %val_int +} +define void @test_bitcast_to_half(half* %addr, i16 %in) #0 { +; P8-LABEL: test_bitcast_to_half: +; P8: # %bb.0: +; P8-NEXT: sth r4, 0(r3) +; P8-NEXT: blr +; +; CHECK-LABEL: test_bitcast_to_half: +; CHECK: # %bb.0: +; CHECK-NEXT: sth r4, 0(r3) +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_bitcast_to_half: +; SOFT: # %bb.0: +; SOFT-NEXT: sth r4, 0(r3) +; SOFT-NEXT: blr + %val_fp = bitcast i16 %in to half + store half %val_fp, half* %addr + ret void +} +define float @test_extend32(half* %addr) #0 { +; P8-LABEL: test_extend32: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -32(r1) +; P8-NEXT: lhz r3, 0(r3) +; P8-NEXT: bl __gnu_h2f_ieee +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 32 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_extend32: +; CHECK: # %bb.0: +; CHECK-NEXT: lxsihzx f0, 0, r3 +; CHECK-NEXT: xscvhpdp f1, f0 +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_extend32: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r0, 16(r1) +; SOFT-NEXT: stdu r1, -32(r1) +; SOFT-NEXT: lhz r3, 0(r3) +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: addi r1, r1, 32 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr + %val16 = load half, half* %addr + %val32 = fpext half %val16 to float + ret float %val32 +} +define double @test_extend64(half* %addr) #0 { +; P8-LABEL: test_extend64: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -32(r1) +; P8-NEXT: lhz r3, 0(r3) +; P8-NEXT: bl __gnu_h2f_ieee +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 32 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_extend64: +; CHECK: # %bb.0: +; CHECK-NEXT: lxsihzx f0, 0, r3 +; CHECK-NEXT: xscvhpdp f1, f0 +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_extend64: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r0, 16(r1) +; SOFT-NEXT: stdu r1, -32(r1) +; SOFT-NEXT: lhz r3, 0(r3) +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: bl __extendsfdf2 +; SOFT-NEXT: nop +; SOFT-NEXT: addi r1, r1, 32 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr + %val16 = load half, half* %addr + %val32 = fpext half %val16 to double + ret double %val32 +} +define void @test_trunc32(float %in, half* %addr) #0 { +; P8-LABEL: test_trunc32: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -48(r1) +; P8-NEXT: mr r30, r4 +; P8-NEXT: bl __gnu_f2h_ieee +; P8-NEXT: nop +; P8-NEXT: sth r3, 0(r30) +; P8-NEXT: addi r1, r1, 48 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_trunc32: +; CHECK: # %bb.0: +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: stxsihx f0, 0, r4 +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_trunc32: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r0, 16(r1) +; SOFT-NEXT: stdu r1, -48(r1) +; SOFT-NEXT: clrldi r3, r3, 32 +; SOFT-NEXT: mr r30, r4 +; SOFT-NEXT: bl __gnu_f2h_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: clrldi r3, r3, 48 +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: bl __gnu_f2h_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 0(r30) +; SOFT-NEXT: addi r1, r1, 48 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr + %val16 = fptrunc float %in to half + store half %val16, half* %addr + ret void +} +define void @test_trunc64(double %in, half* %addr) #0 { +; P8-LABEL: test_trunc64: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -48(r1) +; P8-NEXT: mr r30, r4 +; P8-NEXT: bl __truncdfhf2 +; P8-NEXT: nop +; P8-NEXT: sth r3, 0(r30) +; P8-NEXT: addi r1, r1, 48 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_trunc64: +; CHECK: # %bb.0: +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: stxsihx f0, 0, r4 +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_trunc64: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r0, 16(r1) +; SOFT-NEXT: stdu r1, -48(r1) +; SOFT-NEXT: mr r30, r4 +; SOFT-NEXT: bl __truncdfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: clrldi r3, r3, 48 +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: bl __gnu_f2h_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 0(r30) +; SOFT-NEXT: addi r1, r1, 48 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr + %val16 = fptrunc double %in to half + store half %val16, half* %addr + ret void +} +define i64 @test_fptosi_i64(half* %p) #0 { +; P8-LABEL: test_fptosi_i64: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -32(r1) +; P8-NEXT: lhz r3, 0(r3) +; P8-NEXT: bl __gnu_h2f_ieee +; P8-NEXT: nop +; P8-NEXT: xscvdpsxds f0, f1 +; P8-NEXT: mffprd r3, f0 +; P8-NEXT: addi r1, r1, 32 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_fptosi_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lhz r3, 0(r3) +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvhpdp f0, f0 +; CHECK-NEXT: xscvdpsxds f0, f0 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_fptosi_i64: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r0, 16(r1) +; SOFT-NEXT: stdu r1, -32(r1) +; SOFT-NEXT: lhz r3, 0(r3) +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: bl __fixsfdi +; SOFT-NEXT: nop +; SOFT-NEXT: addi r1, r1, 32 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr + %a = load half, half* %p, align 2 + %r = fptosi half %a to i64 + ret i64 %r +} +define void @test_sitofp_i64(i64 %a, half* %p) #0 { +; P8-LABEL: test_sitofp_i64: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -48(r1) +; P8-NEXT: mtfprd f0, r3 +; P8-NEXT: mr r30, r4 +; P8-NEXT: xscvsxdsp f1, f0 +; P8-NEXT: bl __gnu_f2h_ieee +; P8-NEXT: nop +; P8-NEXT: sth r3, 0(r30) +; P8-NEXT: addi r1, r1, 48 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_sitofp_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xscvsxdsp f0, f0 +; CHECK-NEXT: xscvdphp f0, f0 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: sth r3, 0(r4) +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_sitofp_i64: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r0, 16(r1) +; SOFT-NEXT: stdu r1, -48(r1) +; SOFT-NEXT: mr r30, r4 +; SOFT-NEXT: bl __floatdisf +; SOFT-NEXT: nop +; SOFT-NEXT: clrldi r3, r3, 32 +; SOFT-NEXT: bl __gnu_f2h_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: clrldi r3, r3, 48 +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: bl __gnu_f2h_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 0(r30) +; SOFT-NEXT: addi r1, r1, 48 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr + %r = sitofp i64 %a to half + store half %r, half* %p + ret void +} +define i64 @test_fptoui_i64(half* %p) #0 { +; P8-LABEL: test_fptoui_i64: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -32(r1) +; P8-NEXT: lhz r3, 0(r3) +; P8-NEXT: bl __gnu_h2f_ieee +; P8-NEXT: nop +; P8-NEXT: xscvdpuxds f0, f1 +; P8-NEXT: mffprd r3, f0 +; P8-NEXT: addi r1, r1, 32 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_fptoui_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lhz r3, 0(r3) +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvhpdp f0, f0 +; CHECK-NEXT: xscvdpuxds f0, f0 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_fptoui_i64: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r0, 16(r1) +; SOFT-NEXT: stdu r1, -32(r1) +; SOFT-NEXT: lhz r3, 0(r3) +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: bl __fixunssfdi +; SOFT-NEXT: nop +; SOFT-NEXT: addi r1, r1, 32 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr + %a = load half, half* %p, align 2 + %r = fptoui half %a to i64 + ret i64 %r +} +define void @test_uitofp_i64(i64 %a, half* %p) #0 { +; P8-LABEL: test_uitofp_i64: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -48(r1) +; P8-NEXT: mtfprd f0, r3 +; P8-NEXT: mr r30, r4 +; P8-NEXT: xscvuxdsp f1, f0 +; P8-NEXT: bl __gnu_f2h_ieee +; P8-NEXT: nop +; P8-NEXT: sth r3, 0(r30) +; P8-NEXT: addi r1, r1, 48 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_uitofp_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xscvuxdsp f0, f0 +; CHECK-NEXT: xscvdphp f0, f0 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: sth r3, 0(r4) +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_uitofp_i64: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r0, 16(r1) +; SOFT-NEXT: stdu r1, -48(r1) +; SOFT-NEXT: mr r30, r4 +; SOFT-NEXT: bl __floatundisf +; SOFT-NEXT: nop +; SOFT-NEXT: bl __gnu_f2h_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: clrldi r3, r3, 48 +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: bl __gnu_f2h_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 0(r30) +; SOFT-NEXT: addi r1, r1, 48 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr + %r = uitofp i64 %a to half + store half %r, half* %p + ret void +} +define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 { +; P8-LABEL: test_extend32_vec4: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: mr r30, r3 +; P8-NEXT: lhz r3, 6(r3) +; P8-NEXT: bl __gnu_h2f_ieee +; P8-NEXT: nop +; P8-NEXT: li r3, 80 +; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8-NEXT: stxvd2x vs1, r1, r3 # 16-byte Folded Spill +; P8-NEXT: lhz r3, 2(r30) +; P8-NEXT: bl __gnu_h2f_ieee +; P8-NEXT: nop +; P8-NEXT: li r3, 64 +; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8-NEXT: stxvd2x vs1, r1, r3 # 16-byte Folded Spill +; P8-NEXT: lhz r3, 4(r30) +; P8-NEXT: bl __gnu_h2f_ieee +; P8-NEXT: nop +; P8-NEXT: li r3, 48 +; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8-NEXT: stxvd2x vs1, r1, r3 # 16-byte Folded Spill +; P8-NEXT: lhz r3, 0(r30) +; P8-NEXT: bl __gnu_h2f_ieee +; P8-NEXT: nop +; P8-NEXT: li r3, 80 +; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8-NEXT: lxvd2x vs0, r1, r3 # 16-byte Folded Reload +; P8-NEXT: li r3, 64 +; P8-NEXT: lxvd2x vs2, r1, r3 # 16-byte Folded Reload +; P8-NEXT: li r3, 48 +; P8-NEXT: xxmrghd vs0, vs0, vs2 +; P8-NEXT: lxvd2x vs2, r1, r3 # 16-byte Folded Reload +; P8-NEXT: xxmrghd vs1, vs2, vs1 +; P8-NEXT: xvcvdpsp vs34, vs0 +; P8-NEXT: xvcvdpsp vs35, vs1 +; P8-NEXT: vmrgew v2, v2, v3 +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_extend32_vec4: +; CHECK: # %bb.0: +; CHECK-NEXT: lhz r4, 6(r3) +; CHECK-NEXT: mtfprwz f0, r4 +; CHECK-NEXT: xscvhpdp f0, f0 +; CHECK-NEXT: lhz r4, 2(r3) +; CHECK-NEXT: mtfprwz f1, r4 +; CHECK-NEXT: xscvhpdp f1, f1 +; CHECK-NEXT: lhz r4, 4(r3) +; CHECK-NEXT: mtfprwz f2, r4 +; CHECK-NEXT: xscvhpdp f2, f2 +; CHECK-NEXT: lhz r3, 0(r3) +; CHECK-NEXT: xxmrghd vs0, vs0, vs1 +; CHECK-NEXT: mtfprwz f3, r3 +; CHECK-NEXT: xscvhpdp f3, f3 +; CHECK-NEXT: xxmrghd vs2, vs2, vs3 +; CHECK-NEXT: xvcvdpsp vs34, vs2 +; CHECK-NEXT: xvcvdpsp vs35, vs0 +; CHECK-NEXT: vmrgew v2, v3, v2 +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_extend32_vec4: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r27, -40(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r28, -32(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r0, 16(r1) +; SOFT-NEXT: stdu r1, -80(r1) +; SOFT-NEXT: mr r30, r3 +; SOFT-NEXT: lhz r3, 0(r3) +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: mr r29, r3 +; SOFT-NEXT: lhz r3, 2(r30) +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: mr r28, r3 +; SOFT-NEXT: lhz r3, 4(r30) +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: mr r27, r3 +; SOFT-NEXT: lhz r3, 6(r30) +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: mr r6, r3 +; SOFT-NEXT: mr r3, r29 +; SOFT-NEXT: mr r4, r28 +; SOFT-NEXT: mr r5, r27 +; SOFT-NEXT: addi r1, r1, 80 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r28, -32(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: ld r27, -40(r1) # 8-byte Folded Reload +; SOFT-NEXT: blr + %a = load <4 x half>, <4 x half>* %p, align 8 + %b = fpext <4 x half> %a to <4 x float> + ret <4 x float> %b +} +define <4 x double> @test_extend64_vec4(<4 x half>* %p) #0 { +; P8-LABEL: test_extend64_vec4: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: mr r30, r3 +; P8-NEXT: lhz r3, 6(r3) +; P8-NEXT: bl __gnu_h2f_ieee +; P8-NEXT: nop +; P8-NEXT: li r3, 80 +; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8-NEXT: stxvd2x vs1, r1, r3 # 16-byte Folded Spill +; P8-NEXT: lhz r3, 4(r30) +; P8-NEXT: bl __gnu_h2f_ieee +; P8-NEXT: nop +; P8-NEXT: li r3, 64 +; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8-NEXT: stxvd2x vs1, r1, r3 # 16-byte Folded Spill +; P8-NEXT: lhz r3, 2(r30) +; P8-NEXT: bl __gnu_h2f_ieee +; P8-NEXT: nop +; P8-NEXT: li r3, 48 +; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8-NEXT: stxvd2x vs1, r1, r3 # 16-byte Folded Spill +; P8-NEXT: lhz r3, 0(r30) +; P8-NEXT: bl __gnu_h2f_ieee +; P8-NEXT: nop +; P8-NEXT: li r3, 80 +; P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; P8-NEXT: lxvd2x vs0, r1, r3 # 16-byte Folded Reload +; P8-NEXT: li r3, 64 +; P8-NEXT: lxvd2x vs2, r1, r3 # 16-byte Folded Reload +; P8-NEXT: li r3, 48 +; P8-NEXT: xxmrghd vs35, vs0, vs2 +; P8-NEXT: lxvd2x vs0, r1, r3 # 16-byte Folded Reload +; P8-NEXT: xxmrghd vs34, vs0, vs1 +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_extend64_vec4: +; CHECK: # %bb.0: +; CHECK-NEXT: lhz r4, 6(r3) +; CHECK-NEXT: lhz r5, 4(r3) +; CHECK-NEXT: lhz r6, 2(r3) +; CHECK-NEXT: lhz r3, 0(r3) +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: mtfprwz f1, r6 +; CHECK-NEXT: xscvhpdp f0, f0 +; CHECK-NEXT: xscvhpdp f1, f1 +; CHECK-NEXT: xxmrghd vs34, vs1, vs0 +; CHECK-NEXT: mtfprwz f0, r5 +; CHECK-NEXT: mtfprwz f1, r4 +; CHECK-NEXT: xscvhpdp f0, f0 +; CHECK-NEXT: xscvhpdp f1, f1 +; CHECK-NEXT: xxmrghd vs35, vs1, vs0 +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_extend64_vec4: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r27, -40(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r28, -32(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r0, 16(r1) +; SOFT-NEXT: stdu r1, -80(r1) +; SOFT-NEXT: mr r30, r3 +; SOFT-NEXT: lhz r3, 0(r3) +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: bl __extendsfdf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r29, r3 +; SOFT-NEXT: lhz r3, 2(r30) +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: bl __extendsfdf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r28, r3 +; SOFT-NEXT: lhz r3, 4(r30) +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: bl __extendsfdf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r27, r3 +; SOFT-NEXT: lhz r3, 6(r30) +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: bl __extendsfdf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r6, r3 +; SOFT-NEXT: mr r3, r29 +; SOFT-NEXT: mr r4, r28 +; SOFT-NEXT: mr r5, r27 +; SOFT-NEXT: addi r1, r1, 80 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r28, -32(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: ld r27, -40(r1) # 8-byte Folded Reload +; SOFT-NEXT: blr + %a = load <4 x half>, <4 x half>* %p, align 8 + %b = fpext <4 x half> %a to <4 x double> + ret <4 x double> %b +} +define void @test_trunc32_vec4(<4 x float> %a, <4 x half>* %p) #0 { +; P8-LABEL: test_trunc32_vec4: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: xxsldwi vs0, vs34, vs34, 3 +; P8-NEXT: li r3, 48 +; P8-NEXT: std r27, 72(r1) # 8-byte Folded Spill +; P8-NEXT: std r28, 80(r1) # 8-byte Folded Spill +; P8-NEXT: std r29, 88(r1) # 8-byte Folded Spill +; P8-NEXT: std r30, 96(r1) # 8-byte Folded Spill +; P8-NEXT: mr r30, r5 +; P8-NEXT: xscvspdpn f1, vs0 +; P8-NEXT: stxvd2x vs63, r1, r3 # 16-byte Folded Spill +; P8-NEXT: vmr v31, v2 +; P8-NEXT: bl __gnu_f2h_ieee +; P8-NEXT: nop +; P8-NEXT: xxswapd vs0, vs63 +; P8-NEXT: mr r29, r3 +; P8-NEXT: xscvspdpn f1, vs0 +; P8-NEXT: bl __gnu_f2h_ieee +; P8-NEXT: nop +; P8-NEXT: xxsldwi vs0, vs63, vs63, 1 +; P8-NEXT: mr r28, r3 +; P8-NEXT: xscvspdpn f1, vs0 +; P8-NEXT: bl __gnu_f2h_ieee +; P8-NEXT: nop +; P8-NEXT: xscvspdpn f1, vs63 +; P8-NEXT: mr r27, r3 +; P8-NEXT: bl __gnu_f2h_ieee +; P8-NEXT: nop +; P8-NEXT: sth r3, 6(r30) +; P8-NEXT: li r3, 48 +; P8-NEXT: sth r27, 4(r30) +; P8-NEXT: ld r27, 72(r1) # 8-byte Folded Reload +; P8-NEXT: sth r28, 2(r30) +; P8-NEXT: sth r29, 0(r30) +; P8-NEXT: ld r30, 96(r1) # 8-byte Folded Reload +; P8-NEXT: ld r29, 88(r1) # 8-byte Folded Reload +; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload +; P8-NEXT: ld r28, 80(r1) # 8-byte Folded Reload +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_trunc32_vec4: +; CHECK: # %bb.0: +; CHECK-NEXT: xxsldwi vs0, vs34, vs34, 3 +; CHECK-NEXT: xscvspdpn f0, vs0 +; CHECK-NEXT: xscvdphp f0, f0 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: xxswapd vs0, vs34 +; CHECK-NEXT: xxsldwi vs1, vs34, vs34, 1 +; CHECK-NEXT: xscvspdpn f1, vs1 +; CHECK-NEXT: xscvspdpn f0, vs0 +; CHECK-NEXT: xscvdphp f0, f0 +; CHECK-NEXT: xscvdphp f1, f1 +; CHECK-NEXT: mffprwz r4, f1 +; CHECK-NEXT: xscvspdpn f1, vs34 +; CHECK-NEXT: xscvdphp f1, f1 +; CHECK-NEXT: sth r4, 4(r5) +; CHECK-NEXT: mffprwz r4, f0 +; CHECK-NEXT: sth r4, 2(r5) +; CHECK-NEXT: sth r3, 0(r5) +; CHECK-NEXT: mffprwz r6, f1 +; CHECK-NEXT: sth r6, 6(r5) +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_trunc32_vec4: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r26, -48(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r27, -40(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r28, -32(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r0, 16(r1) +; SOFT-NEXT: stdu r1, -80(r1) +; SOFT-NEXT: mr r27, r3 +; SOFT-NEXT: clrldi r3, r6, 32 +; SOFT-NEXT: mr r30, r7 +; SOFT-NEXT: mr r29, r5 +; SOFT-NEXT: mr r28, r4 +; SOFT-NEXT: bl __gnu_f2h_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: mr r26, r3 +; SOFT-NEXT: clrldi r3, r29, 32 +; SOFT-NEXT: bl __gnu_f2h_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: mr r29, r3 +; SOFT-NEXT: clrldi r3, r28, 32 +; SOFT-NEXT: bl __gnu_f2h_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: mr r28, r3 +; SOFT-NEXT: clrldi r3, r27, 32 +; SOFT-NEXT: bl __gnu_f2h_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: clrldi r3, r3, 48 +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: mr r27, r3 +; SOFT-NEXT: clrldi r3, r28, 48 +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: mr r28, r3 +; SOFT-NEXT: clrldi r3, r29, 48 +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: mr r29, r3 +; SOFT-NEXT: clrldi r3, r26, 48 +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: bl __gnu_f2h_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 6(r30) +; SOFT-NEXT: mr r3, r29 +; SOFT-NEXT: bl __gnu_f2h_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 4(r30) +; SOFT-NEXT: mr r3, r28 +; SOFT-NEXT: bl __gnu_f2h_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 2(r30) +; SOFT-NEXT: mr r3, r27 +; SOFT-NEXT: bl __gnu_f2h_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 0(r30) +; SOFT-NEXT: addi r1, r1, 80 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r28, -32(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: ld r27, -40(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r26, -48(r1) # 8-byte Folded Reload +; SOFT-NEXT: blr + %v = fptrunc <4 x float> %a to <4 x half> + store <4 x half> %v, <4 x half>* %p + ret void +} +define void @test_trunc64_vec4(<4 x double> %a, <4 x half>* %p) #0 { +; P8-LABEL: test_trunc64_vec4: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -128(r1) +; P8-NEXT: li r3, 48 +; P8-NEXT: xxswapd vs1, vs34 +; P8-NEXT: std r27, 88(r1) # 8-byte Folded Spill +; P8-NEXT: std r28, 96(r1) # 8-byte Folded Spill +; P8-NEXT: std r29, 104(r1) # 8-byte Folded Spill +; P8-NEXT: std r30, 112(r1) # 8-byte Folded Spill +; P8-NEXT: mr r30, r7 +; P8-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; P8-NEXT: stxvd2x vs62, r1, r3 # 16-byte Folded Spill +; P8-NEXT: li r3, 64 +; P8-NEXT: vmr v30, v2 +; P8-NEXT: stxvd2x vs63, r1, r3 # 16-byte Folded Spill +; P8-NEXT: vmr v31, v3 +; P8-NEXT: bl __truncdfhf2 +; P8-NEXT: nop +; P8-NEXT: xxswapd vs1, vs63 +; P8-NEXT: mr r29, r3 +; P8-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; P8-NEXT: bl __truncdfhf2 +; P8-NEXT: nop +; P8-NEXT: xxlor f1, vs62, vs62 +; P8-NEXT: mr r28, r3 +; P8-NEXT: bl __truncdfhf2 +; P8-NEXT: nop +; P8-NEXT: xxlor f1, vs63, vs63 +; P8-NEXT: mr r27, r3 +; P8-NEXT: bl __truncdfhf2 +; P8-NEXT: nop +; P8-NEXT: sth r3, 6(r30) +; P8-NEXT: li r3, 64 +; P8-NEXT: sth r27, 2(r30) +; P8-NEXT: ld r27, 88(r1) # 8-byte Folded Reload +; P8-NEXT: sth r28, 4(r30) +; P8-NEXT: sth r29, 0(r30) +; P8-NEXT: ld r30, 112(r1) # 8-byte Folded Reload +; P8-NEXT: ld r29, 104(r1) # 8-byte Folded Reload +; P8-NEXT: lxvd2x vs63, r1, r3 # 16-byte Folded Reload +; P8-NEXT: li r3, 48 +; P8-NEXT: ld r28, 96(r1) # 8-byte Folded Reload +; P8-NEXT: lxvd2x vs62, r1, r3 # 16-byte Folded Reload +; P8-NEXT: addi r1, r1, 128 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_trunc64_vec4: +; CHECK: # %bb.0: +; CHECK-NEXT: xxswapd vs0, vs34 +; CHECK-NEXT: xscvdphp f0, f0 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: xxswapd vs0, vs35 +; CHECK-NEXT: xscvdphp f0, f0 +; CHECK-NEXT: xscvdphp f1, vs34 +; CHECK-NEXT: mffprwz r4, f1 +; CHECK-NEXT: xscvdphp f1, vs35 +; CHECK-NEXT: sth r4, 2(r7) +; CHECK-NEXT: mffprwz r4, f0 +; CHECK-NEXT: sth r4, 4(r7) +; CHECK-NEXT: sth r3, 0(r7) +; CHECK-NEXT: mffprwz r5, f1 +; CHECK-NEXT: sth r5, 6(r7) +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_trunc64_vec4: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r26, -48(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r27, -40(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r28, -32(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r0, 16(r1) +; SOFT-NEXT: stdu r1, -80(r1) +; SOFT-NEXT: mr r27, r3 +; SOFT-NEXT: mr r3, r6 +; SOFT-NEXT: mr r30, r7 +; SOFT-NEXT: mr r29, r5 +; SOFT-NEXT: mr r28, r4 +; SOFT-NEXT: bl __truncdfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r26, r3 +; SOFT-NEXT: mr r3, r29 +; SOFT-NEXT: bl __truncdfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r29, r3 +; SOFT-NEXT: mr r3, r28 +; SOFT-NEXT: bl __truncdfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: mr r28, r3 +; SOFT-NEXT: mr r3, r27 +; SOFT-NEXT: bl __truncdfhf2 +; SOFT-NEXT: nop +; SOFT-NEXT: clrldi r3, r3, 48 +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: mr r27, r3 +; SOFT-NEXT: clrldi r3, r28, 48 +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: mr r28, r3 +; SOFT-NEXT: clrldi r3, r29, 48 +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: mr r29, r3 +; SOFT-NEXT: clrldi r3, r26, 48 +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: bl __gnu_f2h_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 6(r30) +; SOFT-NEXT: mr r3, r29 +; SOFT-NEXT: bl __gnu_f2h_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 4(r30) +; SOFT-NEXT: mr r3, r28 +; SOFT-NEXT: bl __gnu_f2h_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 2(r30) +; SOFT-NEXT: mr r3, r27 +; SOFT-NEXT: bl __gnu_f2h_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: sth r3, 0(r30) +; SOFT-NEXT: addi r1, r1, 80 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r28, -32(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: ld r27, -40(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r26, -48(r1) # 8-byte Folded Reload +; SOFT-NEXT: blr + %v = fptrunc <4 x double> %a to <4 x half> + store <4 x half> %v, <4 x half>* %p + ret void +} +define float @test_sitofp_fadd_i32(i32 %a, half* %b) #0 { +; P8-LABEL: test_sitofp_fadd_i32: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: std r30, -24(r1) # 8-byte Folded Spill +; P8-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -64(r1) +; P8-NEXT: mr r30, r3 +; P8-NEXT: lhz r3, 0(r4) +; P8-NEXT: bl __gnu_h2f_ieee +; P8-NEXT: nop +; P8-NEXT: mtfprwa f0, r30 +; P8-NEXT: fmr f31, f1 +; P8-NEXT: xscvsxdsp f1, f0 +; P8-NEXT: bl __gnu_f2h_ieee +; P8-NEXT: nop +; P8-NEXT: bl __gnu_h2f_ieee +; P8-NEXT: nop +; P8-NEXT: xsaddsp f1, f31, f1 +; P8-NEXT: addi r1, r1, 64 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload +; P8-NEXT: ld r30, -24(r1) # 8-byte Folded Reload +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: test_sitofp_fadd_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: mtfprwa f1, r3 +; CHECK-NEXT: xscvsxdsp f1, f1 +; CHECK-NEXT: lhz r4, 0(r4) +; CHECK-NEXT: mtfprwz f0, r4 +; CHECK-NEXT: xscvhpdp f0, f0 +; CHECK-NEXT: xscvdphp f1, f1 +; CHECK-NEXT: mffprwz r3, f1 +; CHECK-NEXT: mtfprwz f1, r3 +; CHECK-NEXT: xscvhpdp f1, f1 +; CHECK-NEXT: xsaddsp f1, f0, f1 +; CHECK-NEXT: blr +; +; SOFT-LABEL: test_sitofp_fadd_i32: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; SOFT-NEXT: std r0, 16(r1) +; SOFT-NEXT: stdu r1, -64(r1) +; SOFT-NEXT: mr r30, r3 +; SOFT-NEXT: lhz r3, 0(r4) +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: mr r29, r3 +; SOFT-NEXT: extsw r3, r30 +; SOFT-NEXT: bl __floatsisf +; SOFT-NEXT: nop +; SOFT-NEXT: clrldi r3, r3, 32 +; SOFT-NEXT: bl __gnu_f2h_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: clrldi r3, r3, 48 +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: mr r4, r3 +; SOFT-NEXT: mr r3, r29 +; SOFT-NEXT: bl __addsf3 +; SOFT-NEXT: nop +; SOFT-NEXT: addi r1, r1, 64 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; SOFT-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr + %tmp0 = load half, half* %b + %tmp1 = sitofp i32 %a to half + %tmp2 = fadd half %tmp0, %tmp1 + %tmp3 = fpext half %tmp2 to float + ret float %tmp3 +} +define half @PR40273(half) #0 { +; P8-LABEL: PR40273: +; P8: # %bb.0: +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -32(r1) +; P8-NEXT: bl __gnu_f2h_ieee +; P8-NEXT: nop +; P8-NEXT: bl __gnu_h2f_ieee +; P8-NEXT: nop +; P8-NEXT: xxlxor f0, f0, f0 +; P8-NEXT: fcmpu cr0, f1, f0 +; P8-NEXT: beq cr0, .LBB20_2 +; P8-NEXT: # %bb.1: +; P8-NEXT: addis r3, r2, .LCPI20_0@toc@ha +; P8-NEXT: lfs f0, .LCPI20_0@toc@l(r3) +; P8-NEXT: .LBB20_2: +; P8-NEXT: fmr f1, f0 +; P8-NEXT: addi r1, r1, 32 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; CHECK-LABEL: PR40273: +; CHECK: # %bb.0: +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: xxlxor f1, f1, f1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvhpdp f0, f0 +; CHECK-NEXT: fcmpu cr0, f0, f1 +; CHECK-NEXT: beqlr cr0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: addis r3, r2, .LCPI20_0@toc@ha +; CHECK-NEXT: lfs f1, .LCPI20_0@toc@l(r3) +; CHECK-NEXT: blr +; +; SOFT-LABEL: PR40273: +; SOFT: # %bb.0: +; SOFT-NEXT: mflr r0 +; SOFT-NEXT: std r0, 16(r1) +; SOFT-NEXT: stdu r1, -32(r1) +; SOFT-NEXT: clrldi r3, r3, 48 +; SOFT-NEXT: bl __gnu_h2f_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: li r4, 0 +; SOFT-NEXT: bl __nesf2 +; SOFT-NEXT: nop +; SOFT-NEXT: cmplwi r3, 0 +; SOFT-NEXT: lis r3, 16256 +; SOFT-NEXT: isel r3, 0, r3, eq +; SOFT-NEXT: bl __gnu_f2h_ieee +; SOFT-NEXT: nop +; SOFT-NEXT: addi r1, r1, 32 +; SOFT-NEXT: ld r0, 16(r1) +; SOFT-NEXT: mtlr r0 +; SOFT-NEXT: blr + %2 = fcmp une half %0, 0xH0000 + %3 = uitofp i1 %2 to half + ret half %3 +} attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_2.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_2.ll index 6dcd14d06c62..783ea3a11cce 100644 --- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_2.ll +++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_2.ll @@ -11,46 +11,34 @@ define void @test_liwzx1(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) { ; P9LE-LABEL: test_liwzx1: ; P9LE: # %bb.0: -; P9LE-NEXT: lfiwzx f0, 0, r3 -; P9LE-NEXT: lfiwzx f1, 0, r4 -; P9LE-NEXT: xxswapd vs0, f0 -; P9LE-NEXT: xxswapd vs1, f1 -; P9LE-NEXT: xvaddsp vs0, vs0, vs1 -; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 2 -; P9LE-NEXT: stfiwx f0, 0, r5 +; P9LE-NEXT: lfs f0, 0(r3) +; P9LE-NEXT: lfs f1, 0(r4) +; P9LE-NEXT: xsaddsp f0, f0, f1 +; P9LE-NEXT: stfs f0, 0(r5) ; P9LE-NEXT: blr ; ; P9BE-LABEL: test_liwzx1: ; P9BE: # %bb.0: -; P9BE-NEXT: lfiwzx f0, 0, r3 -; P9BE-NEXT: lfiwzx f1, 0, r4 -; P9BE-NEXT: xxsldwi vs0, f0, f0, 1 -; P9BE-NEXT: xxsldwi vs1, f1, f1, 1 -; P9BE-NEXT: xvaddsp vs0, vs0, vs1 -; P9BE-NEXT: xxsldwi vs0, vs0, vs0, 3 -; P9BE-NEXT: stfiwx f0, 0, r5 +; P9BE-NEXT: lfs f0, 0(r3) +; P9BE-NEXT: lfs f1, 0(r4) +; P9BE-NEXT: xsaddsp f0, f0, f1 +; P9BE-NEXT: stfs f0, 0(r5) ; P9BE-NEXT: blr ; ; P8LE-LABEL: test_liwzx1: ; P8LE: # %bb.0: -; P8LE-NEXT: lfiwzx f0, 0, r3 -; P8LE-NEXT: lfiwzx f1, 0, r4 -; P8LE-NEXT: xxswapd vs0, f0 -; P8LE-NEXT: xxswapd vs1, f1 -; P8LE-NEXT: xvaddsp vs0, vs0, vs1 -; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 2 -; P8LE-NEXT: stfiwx f0, 0, r5 +; P8LE-NEXT: lfsx f0, 0, r3 +; P8LE-NEXT: lfsx f1, 0, r4 +; P8LE-NEXT: xsaddsp f0, f0, f1 +; P8LE-NEXT: stfsx f0, 0, r5 ; P8LE-NEXT: blr ; ; P8BE-LABEL: test_liwzx1: ; P8BE: # %bb.0: -; P8BE-NEXT: lfiwzx f0, 0, r3 -; P8BE-NEXT: lfiwzx f1, 0, r4 -; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 -; P8BE-NEXT: xxsldwi vs1, f1, f1, 1 -; P8BE-NEXT: xvaddsp vs0, vs0, vs1 -; P8BE-NEXT: xxsldwi vs0, vs0, vs0, 3 -; P8BE-NEXT: stfiwx f0, 0, r5 +; P8BE-NEXT: lfsx f0, 0, r3 +; P8BE-NEXT: lfsx f1, 0, r4 +; P8BE-NEXT: xsaddsp f0, f0, f1 +; P8BE-NEXT: stfsx f0, 0, r5 ; P8BE-NEXT: blr @@ -65,50 +53,38 @@ define void @test_liwzx1(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) { define <1 x float>* @test_liwzx2(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) { ; P9LE-LABEL: test_liwzx2: ; P9LE: # %bb.0: -; P9LE-NEXT: lfiwzx f0, 0, r3 -; P9LE-NEXT: lfiwzx f1, 0, r4 -; P9LE-NEXT: xxswapd vs0, f0 -; P9LE-NEXT: xxswapd vs1, f1 -; P9LE-NEXT: xvsubsp vs0, vs0, vs1 -; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 2 +; P9LE-NEXT: lfs f0, 0(r3) ; P9LE-NEXT: mr r3, r5 -; P9LE-NEXT: stfiwx f0, 0, r5 +; P9LE-NEXT: lfs f1, 0(r4) +; P9LE-NEXT: xssubsp f0, f0, f1 +; P9LE-NEXT: stfs f0, 0(r5) ; P9LE-NEXT: blr ; ; P9BE-LABEL: test_liwzx2: ; P9BE: # %bb.0: -; P9BE-NEXT: lfiwzx f0, 0, r3 -; P9BE-NEXT: lfiwzx f1, 0, r4 -; P9BE-NEXT: xxsldwi vs0, f0, f0, 1 -; P9BE-NEXT: xxsldwi vs1, f1, f1, 1 -; P9BE-NEXT: xvsubsp vs0, vs0, vs1 -; P9BE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; P9BE-NEXT: lfs f0, 0(r3) ; P9BE-NEXT: mr r3, r5 -; P9BE-NEXT: stfiwx f0, 0, r5 +; P9BE-NEXT: lfs f1, 0(r4) +; P9BE-NEXT: xssubsp f0, f0, f1 +; P9BE-NEXT: stfs f0, 0(r5) ; P9BE-NEXT: blr ; ; P8LE-LABEL: test_liwzx2: ; P8LE: # %bb.0: -; P8LE-NEXT: lfiwzx f0, 0, r3 -; P8LE-NEXT: lfiwzx f1, 0, r4 +; P8LE-NEXT: lfsx f0, 0, r3 +; P8LE-NEXT: lfsx f1, 0, r4 ; P8LE-NEXT: mr r3, r5 -; P8LE-NEXT: xxswapd vs0, f0 -; P8LE-NEXT: xxswapd vs1, f1 -; P8LE-NEXT: xvsubsp vs0, vs0, vs1 -; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 2 -; P8LE-NEXT: stfiwx f0, 0, r5 +; P8LE-NEXT: xssubsp f0, f0, f1 +; P8LE-NEXT: stfsx f0, 0, r5 ; P8LE-NEXT: blr ; ; P8BE-LABEL: test_liwzx2: ; P8BE: # %bb.0: -; P8BE-NEXT: lfiwzx f0, 0, r3 -; P8BE-NEXT: lfiwzx f1, 0, r4 +; P8BE-NEXT: lfsx f0, 0, r3 +; P8BE-NEXT: lfsx f1, 0, r4 ; P8BE-NEXT: mr r3, r5 -; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 -; P8BE-NEXT: xxsldwi vs1, f1, f1, 1 -; P8BE-NEXT: xvsubsp vs0, vs0, vs1 -; P8BE-NEXT: xxsldwi vs0, vs0, vs0, 3 -; P8BE-NEXT: stfiwx f0, 0, r5 +; P8BE-NEXT: xssubsp f0, f0, f1 +; P8BE-NEXT: stfsx f0, 0, r5 ; P8BE-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll index 4117f279eb16..c188cd6bf14a 100644 --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -9,9 +9,7 @@ define <1 x float> @constrained_vector_fdiv_v1f32() #0 { ; PC64LE-NEXT: addis 4, 2, .LCPI0_1@toc@ha ; PC64LE-NEXT: lfs 0, .LCPI0_0@toc@l(3) ; PC64LE-NEXT: lfs 1, .LCPI0_1@toc@l(4) -; PC64LE-NEXT: xsdivsp 0, 1, 0 -; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xsdivsp 1, 1, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fdiv_v1f32: @@ -20,9 +18,7 @@ define <1 x float> @constrained_vector_fdiv_v1f32() #0 { ; PC64LE9-NEXT: lfs 0, .LCPI0_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI0_1@toc@ha ; PC64LE9-NEXT: lfs 1, .LCPI0_1@toc@l(3) -; PC64LE9-NEXT: xsdivsp 0, 1, 0 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xsdivsp 1, 1, 0 ; PC64LE9-NEXT: blr entry: %div = call <1 x float> @llvm.experimental.constrained.fdiv.v1f32( @@ -232,8 +228,6 @@ define <1 x float> @constrained_vector_frem_v1f32() #0 { ; PC64LE-NEXT: lfs 2, .LCPI5_1@toc@l(4) ; PC64LE-NEXT: bl fmodf ; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -250,8 +244,6 @@ define <1 x float> @constrained_vector_frem_v1f32() #0 { ; PC64LE9-NEXT: lfs 2, .LCPI5_1@toc@l(3) ; PC64LE9-NEXT: bl fmodf ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -645,9 +637,7 @@ define <1 x float> @constrained_vector_fmul_v1f32() #0 { ; PC64LE-NEXT: addis 4, 2, .LCPI10_1@toc@ha ; PC64LE-NEXT: lfs 0, .LCPI10_0@toc@l(3) ; PC64LE-NEXT: lfs 1, .LCPI10_1@toc@l(4) -; PC64LE-NEXT: xsmulsp 0, 1, 0 -; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xsmulsp 1, 1, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fmul_v1f32: @@ -656,9 +646,7 @@ define <1 x float> @constrained_vector_fmul_v1f32() #0 { ; PC64LE9-NEXT: lfs 0, .LCPI10_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI10_1@toc@ha ; PC64LE9-NEXT: lfs 1, .LCPI10_1@toc@l(3) -; PC64LE9-NEXT: xsmulsp 0, 1, 0 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xsmulsp 1, 1, 0 ; PC64LE9-NEXT: blr entry: %mul = call <1 x float> @llvm.experimental.constrained.fmul.v1f32( @@ -865,9 +853,7 @@ define <1 x float> @constrained_vector_fadd_v1f32() #0 { ; PC64LE-NEXT: addis 4, 2, .LCPI15_1@toc@ha ; PC64LE-NEXT: lfs 0, .LCPI15_0@toc@l(3) ; PC64LE-NEXT: lfs 1, .LCPI15_1@toc@l(4) -; PC64LE-NEXT: xsaddsp 0, 1, 0 -; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xsaddsp 1, 1, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fadd_v1f32: @@ -876,9 +862,7 @@ define <1 x float> @constrained_vector_fadd_v1f32() #0 { ; PC64LE9-NEXT: lfs 0, .LCPI15_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI15_1@toc@ha ; PC64LE9-NEXT: lfs 1, .LCPI15_1@toc@l(3) -; PC64LE9-NEXT: xsaddsp 0, 1, 0 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xsaddsp 1, 1, 0 ; PC64LE9-NEXT: blr entry: %add = call <1 x float> @llvm.experimental.constrained.fadd.v1f32( @@ -1081,9 +1065,7 @@ define <1 x float> @constrained_vector_fsub_v1f32() #0 { ; PC64LE-NEXT: addis 4, 2, .LCPI20_1@toc@ha ; PC64LE-NEXT: lfs 0, .LCPI20_0@toc@l(3) ; PC64LE-NEXT: lfs 1, .LCPI20_1@toc@l(4) -; PC64LE-NEXT: xssubsp 0, 1, 0 -; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xssubsp 1, 1, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fsub_v1f32: @@ -1092,9 +1074,7 @@ define <1 x float> @constrained_vector_fsub_v1f32() #0 { ; PC64LE9-NEXT: lfs 0, .LCPI20_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI20_1@toc@ha ; PC64LE9-NEXT: lfs 1, .LCPI20_1@toc@l(3) -; PC64LE9-NEXT: xssubsp 0, 1, 0 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xssubsp 1, 1, 0 ; PC64LE9-NEXT: blr entry: %sub = call <1 x float> @llvm.experimental.constrained.fsub.v1f32( @@ -1295,18 +1275,14 @@ define <1 x float> @constrained_vector_sqrt_v1f32() #0 { ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI25_0@toc@ha ; PC64LE-NEXT: lfs 0, .LCPI25_0@toc@l(3) -; PC64LE-NEXT: xssqrtsp 0, 0 -; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xssqrtsp 1, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_sqrt_v1f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI25_0@toc@ha ; PC64LE9-NEXT: lfs 0, .LCPI25_0@toc@l(3) -; PC64LE9-NEXT: xssqrtsp 0, 0 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xssqrtsp 1, 0 ; PC64LE9-NEXT: blr entry: %sqrt = call <1 x float> @llvm.experimental.constrained.sqrt.v1f32( @@ -1481,8 +1457,6 @@ define <1 x float> @constrained_vector_pow_v1f32() #0 { ; PC64LE-NEXT: lfs 2, .LCPI30_1@toc@l(4) ; PC64LE-NEXT: bl powf ; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -1499,8 +1473,6 @@ define <1 x float> @constrained_vector_pow_v1f32() #0 { ; PC64LE9-NEXT: lfs 2, .LCPI30_1@toc@l(3) ; PC64LE9-NEXT: bl powf ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -1899,8 +1871,6 @@ define <1 x float> @constrained_vector_powi_v1f32() #0 { ; PC64LE-NEXT: lfs 1, .LCPI35_0@toc@l(3) ; PC64LE-NEXT: bl __powisf2 ; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -1916,8 +1886,6 @@ define <1 x float> @constrained_vector_powi_v1f32() #0 { ; PC64LE9-NEXT: li 4, 3 ; PC64LE9-NEXT: bl __powisf2 ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2284,8 +2252,6 @@ define <1 x float> @constrained_vector_sin_v1f32() #0 { ; PC64LE-NEXT: lfs 1, .LCPI40_0@toc@l(3) ; PC64LE-NEXT: bl sinf ; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -2300,8 +2266,6 @@ define <1 x float> @constrained_vector_sin_v1f32() #0 { ; PC64LE9-NEXT: lfs 1, .LCPI40_0@toc@l(3) ; PC64LE9-NEXT: bl sinf ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2637,8 +2601,6 @@ define <1 x float> @constrained_vector_cos_v1f32() #0 { ; PC64LE-NEXT: lfs 1, .LCPI45_0@toc@l(3) ; PC64LE-NEXT: bl cosf ; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -2653,8 +2615,6 @@ define <1 x float> @constrained_vector_cos_v1f32() #0 { ; PC64LE9-NEXT: lfs 1, .LCPI45_0@toc@l(3) ; PC64LE9-NEXT: bl cosf ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2990,8 +2950,6 @@ define <1 x float> @constrained_vector_exp_v1f32() #0 { ; PC64LE-NEXT: lfs 1, .LCPI50_0@toc@l(3) ; PC64LE-NEXT: bl expf ; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -3006,8 +2964,6 @@ define <1 x float> @constrained_vector_exp_v1f32() #0 { ; PC64LE9-NEXT: lfs 1, .LCPI50_0@toc@l(3) ; PC64LE9-NEXT: bl expf ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -3343,8 +3299,6 @@ define <1 x float> @constrained_vector_exp2_v1f32() #0 { ; PC64LE-NEXT: lfs 1, .LCPI55_0@toc@l(3) ; PC64LE-NEXT: bl exp2f ; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -3359,8 +3313,6 @@ define <1 x float> @constrained_vector_exp2_v1f32() #0 { ; PC64LE9-NEXT: lfs 1, .LCPI55_0@toc@l(3) ; PC64LE9-NEXT: bl exp2f ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -3696,8 +3648,6 @@ define <1 x float> @constrained_vector_log_v1f32() #0 { ; PC64LE-NEXT: lfs 1, .LCPI60_0@toc@l(3) ; PC64LE-NEXT: bl logf ; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -3712,8 +3662,6 @@ define <1 x float> @constrained_vector_log_v1f32() #0 { ; PC64LE9-NEXT: lfs 1, .LCPI60_0@toc@l(3) ; PC64LE9-NEXT: bl logf ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -4049,8 +3997,6 @@ define <1 x float> @constrained_vector_log10_v1f32() #0 { ; PC64LE-NEXT: lfs 1, .LCPI65_0@toc@l(3) ; PC64LE-NEXT: bl log10f ; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -4065,8 +4011,6 @@ define <1 x float> @constrained_vector_log10_v1f32() #0 { ; PC64LE9-NEXT: lfs 1, .LCPI65_0@toc@l(3) ; PC64LE9-NEXT: bl log10f ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -4402,8 +4346,6 @@ define <1 x float> @constrained_vector_log2_v1f32() #0 { ; PC64LE-NEXT: lfs 1, .LCPI70_0@toc@l(3) ; PC64LE-NEXT: bl log2f ; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -4418,8 +4360,6 @@ define <1 x float> @constrained_vector_log2_v1f32() #0 { ; PC64LE9-NEXT: lfs 1, .LCPI70_0@toc@l(3) ; PC64LE9-NEXT: bl log2f ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -4750,18 +4690,14 @@ define <1 x float> @constrained_vector_rint_v1f32() #0 { ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI75_0@toc@ha ; PC64LE-NEXT: lfs 0, .LCPI75_0@toc@l(3) -; PC64LE-NEXT: xsrdpic 0, 0 -; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: xsrdpic 1, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_rint_v1f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI75_0@toc@ha ; PC64LE9-NEXT: lfs 0, .LCPI75_0@toc@l(3) -; PC64LE9-NEXT: xsrdpic 0, 0 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: xsrdpic 1, 0 ; PC64LE9-NEXT: blr entry: %rint = call <1 x float> @llvm.experimental.constrained.rint.v1f32( @@ -4934,8 +4870,6 @@ define <1 x float> @constrained_vector_nearbyint_v1f32() #0 { ; PC64LE-NEXT: lfs 1, .LCPI80_0@toc@l(3) ; PC64LE-NEXT: bl nearbyintf ; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -4950,8 +4884,6 @@ define <1 x float> @constrained_vector_nearbyint_v1f32() #0 { ; PC64LE9-NEXT: lfs 1, .LCPI80_0@toc@l(3) ; PC64LE9-NEXT: bl nearbyintf ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -5289,8 +5221,6 @@ define <1 x float> @constrained_vector_maxnum_v1f32() #0 { ; PC64LE-NEXT: lfs 2, .LCPI85_1@toc@l(4) ; PC64LE-NEXT: bl fmaxf ; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -5307,8 +5237,6 @@ define <1 x float> @constrained_vector_maxnum_v1f32() #0 { ; PC64LE9-NEXT: lfs 2, .LCPI85_1@toc@l(3) ; PC64LE9-NEXT: bl fmaxf ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -5580,8 +5508,6 @@ define <1 x float> @constrained_vector_minnum_v1f32() #0 { ; PC64LE-NEXT: lfs 2, .LCPI90_1@toc@l(4) ; PC64LE-NEXT: bl fminf ; PC64LE-NEXT: nop -; PC64LE-NEXT: xscvdpspn 0, 1 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 @@ -5598,8 +5524,6 @@ define <1 x float> @constrained_vector_minnum_v1f32() #0 { ; PC64LE9-NEXT: lfs 2, .LCPI90_1@toc@l(3) ; PC64LE9-NEXT: bl fminf ; PC64LE9-NEXT: nop -; PC64LE9-NEXT: xscvdpspn 0, 1 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 ; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -5864,18 +5788,14 @@ define <1 x float> @constrained_vector_fptrunc_v1f64() #0 { ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI95_0@toc@ha ; PC64LE-NEXT: lfd 0, .LCPI95_0@toc@l(3) -; PC64LE-NEXT: frsp 0, 0 -; PC64LE-NEXT: xscvdpspn 0, 0 -; PC64LE-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE-NEXT: frsp 1, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fptrunc_v1f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI95_0@toc@ha ; PC64LE9-NEXT: lfd 0, .LCPI95_0@toc@l(3) -; PC64LE9-NEXT: frsp 0, 0 -; PC64LE9-NEXT: xscvdpspn 0, 0 -; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1 +; PC64LE9-NEXT: frsp 1, 0 ; PC64LE9-NEXT: blr entry: %result = call <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64( @@ -6027,15 +5947,13 @@ define <1 x double> @constrained_vector_fpext_v1f32() #0 { ; PC64LE-LABEL: constrained_vector_fpext_v1f32: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI99_0@toc@ha -; PC64LE-NEXT: lfs 0, .LCPI99_0@toc@l(3) -; PC64LE-NEXT: xxspltd 34, 0, 0 +; PC64LE-NEXT: lfs 1, .LCPI99_0@toc@l(3) ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_fpext_v1f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI99_0@toc@ha -; PC64LE9-NEXT: lfs 0, .LCPI99_0@toc@l(3) -; PC64LE9-NEXT: xxspltd 34, 0, 0 +; PC64LE9-NEXT: lfs 1, .LCPI99_0@toc@l(3) ; PC64LE9-NEXT: blr entry: %result = call <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32( @@ -6137,17 +6055,13 @@ define <1 x float> @constrained_vector_ceil_v1f32() #0 { ; PC64LE-LABEL: constrained_vector_ceil_v1f32: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI103_0@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI103_0@toc@l -; PC64LE-NEXT: lfiwzx 0, 0, 3 -; PC64LE-NEXT: xxswapd 34, 0 +; PC64LE-NEXT: lfs 1, .LCPI103_0@toc@l(3) ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_ceil_v1f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI103_0@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI103_0@toc@l -; PC64LE9-NEXT: lfiwzx 0, 0, 3 -; PC64LE9-NEXT: xxswapd 34, 0 +; PC64LE9-NEXT: lfs 1, .LCPI103_0@toc@l(3) ; PC64LE9-NEXT: blr entry: %ceil = call <1 x float> @llvm.experimental.constrained.ceil.v1f32( @@ -6226,17 +6140,13 @@ define <1 x float> @constrained_vector_floor_v1f32() #0 { ; PC64LE-LABEL: constrained_vector_floor_v1f32: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI107_0@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI107_0@toc@l -; PC64LE-NEXT: lfiwzx 0, 0, 3 -; PC64LE-NEXT: xxswapd 34, 0 +; PC64LE-NEXT: lfs 1, .LCPI107_0@toc@l(3) ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_floor_v1f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI107_0@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI107_0@toc@l -; PC64LE9-NEXT: lfiwzx 0, 0, 3 -; PC64LE9-NEXT: xxswapd 34, 0 +; PC64LE9-NEXT: lfs 1, .LCPI107_0@toc@l(3) ; PC64LE9-NEXT: blr entry: %floor = call <1 x float> @llvm.experimental.constrained.floor.v1f32( @@ -6316,17 +6226,13 @@ define <1 x float> @constrained_vector_round_v1f32() #0 { ; PC64LE-LABEL: constrained_vector_round_v1f32: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI111_0@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI111_0@toc@l -; PC64LE-NEXT: lfiwzx 0, 0, 3 -; PC64LE-NEXT: xxswapd 34, 0 +; PC64LE-NEXT: lfs 1, .LCPI111_0@toc@l(3) ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_round_v1f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI111_0@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI111_0@toc@l -; PC64LE9-NEXT: lfiwzx 0, 0, 3 -; PC64LE9-NEXT: xxswapd 34, 0 +; PC64LE9-NEXT: lfs 1, .LCPI111_0@toc@l(3) ; PC64LE9-NEXT: blr entry: %round = call <1 x float> @llvm.experimental.constrained.round.v1f32( @@ -6408,17 +6314,13 @@ define <1 x float> @constrained_vector_trunc_v1f32() #0 { ; PC64LE-LABEL: constrained_vector_trunc_v1f32: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: addis 3, 2, .LCPI115_0@toc@ha -; PC64LE-NEXT: addi 3, 3, .LCPI115_0@toc@l -; PC64LE-NEXT: lfiwzx 0, 0, 3 -; PC64LE-NEXT: xxswapd 34, 0 +; PC64LE-NEXT: lfs 1, .LCPI115_0@toc@l(3) ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_trunc_v1f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI115_0@toc@ha -; PC64LE9-NEXT: addi 3, 3, .LCPI115_0@toc@l -; PC64LE9-NEXT: lfiwzx 0, 0, 3 -; PC64LE9-NEXT: xxswapd 34, 0 +; PC64LE9-NEXT: lfs 1, .LCPI115_0@toc@l(3) ; PC64LE9-NEXT: blr entry: %trunc = call <1 x float> @llvm.experimental.constrained.trunc.v1f32(