From 3ed12232b03721dd70ff6e88b10a016c7f2a915e Mon Sep 17 00:00:00 2001 From: "Kazushi (Jam) Marukawa" Date: Tue, 4 Feb 2020 16:55:20 +0100 Subject: [PATCH] [VE] half fptrunc+store&load+fpext Summary: fp16 (half) load+fpext and fptrunc+store isel legalization and tests. Also, ExternalSymbolSDNode operand printing (tested by fp16 lowering). Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D73899 --- llvm/lib/Target/VE/VEISelLowering.cpp | 27 +++- llvm/lib/Target/VE/VEInstrInfo.td | 7 + llvm/lib/Target/VE/VEMCInstLower.cpp | 3 + llvm/test/CodeGen/VE/fp_extload_truncstore.ll | 144 ++++++++++++++++++ 4 files changed, 174 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp index a82701ac4c07..92d0fc2419d4 100644 --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -523,10 +523,14 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM, addRegisterClass(MVT::f64, &VE::I64RegClass); /// Load & Store { - // Turn FP extload into load/fpextend - for (MVT VT : MVT::fp_valuetypes()) { - setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand); + for (MVT FPVT : MVT::fp_valuetypes()) { + for (MVT OtherFPVT : MVT::fp_valuetypes()) { + // Turn FP extload into load/fpextend + setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand); + + // Turn FP truncstore into trunc + store. + setTruncStoreAction(FPVT, OtherFPVT, Expand); + } } // VE doesn't have i1 sign extending load @@ -536,9 +540,6 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM, setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); setTruncStoreAction(VT, MVT::i1, Expand); } - - // Turn FP truncstore into trunc + store. - setTruncStoreAction(MVT::f64, MVT::f32, Expand); /// } Load & Store // Custom legalize address nodes into LO/HI parts. @@ -563,12 +564,20 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM, setOperationAction(ISD::UDIVREM, IntVT, Expand); } + /// Conversion { // VE doesn't have instructions for fp<->uint, so expand them by llvm setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); + // fp16 not supported + for (MVT FPVT : MVT::fp_valuetypes()) { + setOperationAction(ISD::FP16_TO_FP, FPVT, Expand); + setOperationAction(ISD::FP_TO_FP16, FPVT, Expand); + } + /// } Conversion + setStackPointerRegisterToSaveRestore(VE::SX11); // Set function alignment to 16 bytes @@ -612,6 +621,10 @@ SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF, return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(), 0, TF); + if (const ExternalSymbolSDNode *ES = dyn_cast(Op)) + return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0), + TF); + llvm_unreachable("Unhandled address SDNode"); } diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td index 1cee47d87d73..a64fa5f54547 100644 --- a/llvm/lib/Target/VE/VEInstrInfo.td +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -1129,6 +1129,13 @@ def : Pat<(add (VEhi tglobaladdr:$in1), (VElo tglobaladdr:$in2)), (LEASLrzi (ANDrm0 (LEAzzi tglobaladdr:$in2), 32), (tglobaladdr:$in1))>; +// Address calculation and its optimization +def : Pat<(VEhi texternalsym:$in), (LEASLzzi texternalsym:$in)>; +def : Pat<(VElo texternalsym:$in), (ANDrm0 (LEAzzi texternalsym:$in), 32)>; +def : Pat<(add (VEhi texternalsym:$in1), (VElo texternalsym:$in2)), + (LEASLrzi (ANDrm0 (LEAzzi texternalsym:$in2), 32), + (texternalsym:$in1))>; + // Calls def : Pat<(call tglobaladdr:$dst), (CALL tglobaladdr:$dst)>; diff --git a/llvm/lib/Target/VE/VEMCInstLower.cpp b/llvm/lib/Target/VE/VEMCInstLower.cpp index 10bfb88ad009..389e77f913fe 100644 --- a/llvm/lib/Target/VE/VEMCInstLower.cpp +++ b/llvm/lib/Target/VE/VEMCInstLower.cpp @@ -46,6 +46,9 @@ static MCOperand LowerOperand(const MachineInstr *MI, const MachineOperand &MO, break; return MCOperand::createReg(MO.getReg()); + case MachineOperand::MO_ExternalSymbol: + return LowerSymbolOperand( + MI, MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()), AP); case MachineOperand::MO_GlobalAddress: return LowerSymbolOperand(MI, MO, AP.getSymbol(MO.getGlobal()), AP); case MachineOperand::MO_Immediate: diff --git a/llvm/test/CodeGen/VE/fp_extload_truncstore.ll b/llvm/test/CodeGen/VE/fp_extload_truncstore.ll index cc6a6597b8ee..bedccef86bf7 100644 --- a/llvm/test/CodeGen/VE/fp_extload_truncstore.ll +++ b/llvm/test/CodeGen/VE/fp_extload_truncstore.ll @@ -1,5 +1,113 @@ ; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s +declare i16 @llvm.convert.to.fp16.f32(float %a) +declare i16 @llvm.convert.to.fp16.f64(double %a) + +declare float @llvm.convert.from.fp16.f32(i16 %a) +declare double @llvm.convert.from.fp16.f64(i16 %a) + +define float @func_i16fp32(i16* %a) { +; CHECK-LABEL: func_i16fp32: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: ld2b.zx %s0, (,%s0) +; CHECK-NEXT: lea %s1, __gnu_h2f_ieee@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, __gnu_h2f_ieee@hi(%s1) +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 + %a.val = load i16, i16* %a, align 4 + %a.asd = call float @llvm.convert.from.fp16.f32(i16 %a.val) + ret float %a.asd +} + +define double @func_i16fp64(i16* %a) { +; CHECK-LABEL: func_i16fp64: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: ld2b.zx %s0, (,%s0) +; CHECK-NEXT: lea %s1, __gnu_h2f_ieee@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, __gnu_h2f_ieee@hi(%s1) +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: cvt.d.s %s0, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %a.val = load i16, i16* %a, align 4 + %a.asd = call double @llvm.convert.from.fp16.f64(i16 %a.val) + ret double %a.asd +} + +define float @func_fp16fp32(half* %a) { +; CHECK-LABEL: func_fp16fp32: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: ld2b.zx %s0, (,%s0) +; CHECK-NEXT: lea %s1, __gnu_h2f_ieee@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, __gnu_h2f_ieee@hi(%s1) +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 + %a.val = load half, half* %a, align 4 + %a.asd = fpext half %a.val to float + ret float %a.asd +} + +define double @func_fp16fp64(half* %a) { +; CHECK-LABEL: func_fp16fp64: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: ld2b.zx %s0, (,%s0) +; CHECK-NEXT: lea %s1, __gnu_h2f_ieee@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, __gnu_h2f_ieee@hi(%s1) +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: cvt.d.s %s0, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %a.val = load half, half* %a, align 4 + %a.asd = fpext half %a.val to double + ret double %a.asd +} + +define void @func_fp32i16(i16* %fl.ptr, float %val) { +; CHECK-LABEL: func_fp32i16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: st %s18, 48(,%s9) # 8-byte Folded Spill +; CHECK-NEXT: or %s18, 0, %s0 +; CHECK-NEXT: lea %s0, __gnu_f2h_ieee@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __gnu_f2h_ieee@hi(%s0) +; CHECK-NEXT: or %s0, 0, %s1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: st2b %s0, (,%s18) +; CHECK-NEXT: ld %s18, 48(,%s9) # 8-byte Folded Reload +; CHECK-NEXT: or %s11, 0, %s9 + %val.asf = call i16 @llvm.convert.to.fp16.f32(float %val) + store i16 %val.asf, i16* %fl.ptr + ret void +} + +define half @func_fp32fp16(half* %fl.ptr, float %a) { +; CHECK-LABEL: func_fp32fp16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: st %s18, 48(,%s9) # 8-byte Folded Spill +; CHECK-NEXT: st %s19, 56(,%s9) # 8-byte Folded Spill +; CHECK-NEXT: or %s18, 0, %s0 +; CHECK-NEXT: lea %s0, __gnu_f2h_ieee@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __gnu_f2h_ieee@hi(%s0) +; CHECK-NEXT: or %s0, 0, %s1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s19, 0, %s0 +; CHECK-NEXT: lea %s0, __gnu_h2f_ieee@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __gnu_h2f_ieee@hi(%s0) +; CHECK-NEXT: or %s0, 0, %s19 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: st2b %s19, (,%s18) +; CHECK-NEXT: ld %s19, 56(,%s9) # 8-byte Folded Reload +; CHECK-NEXT: ld %s18, 48(,%s9) # 8-byte Folded Reload +; CHECK-NEXT: or %s11, 0, %s9 + %a.asd = fptrunc float %a to half + store half %a.asd, half* %fl.ptr + ret half %a.asd +} + define double @func_fp32fp64(float* %a) { ; CHECK-LABEL: func_fp32fp64: ; CHECK: .LBB{{[0-9]+}}_2: @@ -11,6 +119,42 @@ define double @func_fp32fp64(float* %a) { ret double %a.asd } +define void @func_fp64i16(i16* %fl.ptr, double %val) { +; CHECK-LABEL: func_fp64i16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: st %s18, 48(,%s9) # 8-byte Folded Spill +; CHECK-NEXT: or %s18, 0, %s0 +; CHECK-NEXT: lea %s0, __truncdfhf2@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __truncdfhf2@hi(%s0) +; CHECK-NEXT: or %s0, 0, %s1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: st2b %s0, (,%s18) +; CHECK-NEXT: ld %s18, 48(,%s9) # 8-byte Folded Reload +; CHECK-NEXT: or %s11, 0, %s9 + %val.asf = call i16 @llvm.convert.to.fp16.f64(double %val) + store i16 %val.asf, i16* %fl.ptr + ret void +} + +define void @func_fp64fp16(half* %fl.ptr, double %val) { +; CHECK-LABEL: func_fp64fp16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: st %s18, 48(,%s9) # 8-byte Folded Spill +; CHECK-NEXT: or %s18, 0, %s0 +; CHECK-NEXT: lea %s0, __truncdfhf2@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __truncdfhf2@hi(%s0) +; CHECK-NEXT: or %s0, 0, %s1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: st2b %s0, (,%s18) +; CHECK-NEXT: ld %s18, 48(,%s9) # 8-byte Folded Reload +; CHECK-NEXT: or %s11, 0, %s9 + %val.asf = fptrunc double %val to half + store half %val.asf, half* %fl.ptr + ret void +} + define void @func_fp64fp32(float* %fl.ptr, double %val) { ; CHECK-LABEL: func_fp64fp32: ; CHECK: .LBB{{[0-9]+}}_2: