[VE] half fptrunc+store&load+fpext

Summary:
fp16 (half) load+fpext and fptrunc+store isel legalization and tests.
Also, ExternalSymbolSDNode operand printing (tested by fp16 lowering).

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D73899
This commit is contained in:
Kazushi (Jam) Marukawa 2020-02-04 16:55:20 +01:00 committed by Simon Moll
parent e943329ba0
commit 3ed12232b0
4 changed files with 174 additions and 7 deletions

View File

@ -523,10 +523,14 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::f64, &VE::I64RegClass);
/// Load & Store {
// Turn FP extload into load/fpextend
for (MVT VT : MVT::fp_valuetypes()) {
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
for (MVT FPVT : MVT::fp_valuetypes()) {
for (MVT OtherFPVT : MVT::fp_valuetypes()) {
// Turn FP extload into load/fpextend
setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand);
// Turn FP truncstore into trunc + store.
setTruncStoreAction(FPVT, OtherFPVT, Expand);
}
}
// VE doesn't have i1 sign extending load
@ -536,9 +540,6 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
setTruncStoreAction(VT, MVT::i1, Expand);
}
// Turn FP truncstore into trunc + store.
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
/// } Load & Store
// Custom legalize address nodes into LO/HI parts.
@ -563,12 +564,20 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UDIVREM, IntVT, Expand);
}
/// Conversion {
// VE doesn't have instructions for fp<->uint, so expand them by llvm
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
// fp16 not supported
for (MVT FPVT : MVT::fp_valuetypes()) {
setOperationAction(ISD::FP16_TO_FP, FPVT, Expand);
setOperationAction(ISD::FP_TO_FP16, FPVT, Expand);
}
/// } Conversion
setStackPointerRegisterToSaveRestore(VE::SX11);
// Set function alignment to 16 bytes
@ -612,6 +621,10 @@ SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(),
0, TF);
if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
TF);
llvm_unreachable("Unhandled address SDNode");
}

View File

@ -1129,6 +1129,13 @@ def : Pat<(add (VEhi tglobaladdr:$in1), (VElo tglobaladdr:$in2)),
(LEASLrzi (ANDrm0 (LEAzzi tglobaladdr:$in2), 32),
(tglobaladdr:$in1))>;
// Address calculation and its optimization
def : Pat<(VEhi texternalsym:$in), (LEASLzzi texternalsym:$in)>;
def : Pat<(VElo texternalsym:$in), (ANDrm0 (LEAzzi texternalsym:$in), 32)>;
def : Pat<(add (VEhi texternalsym:$in1), (VElo texternalsym:$in2)),
(LEASLrzi (ANDrm0 (LEAzzi texternalsym:$in2), 32),
(texternalsym:$in1))>;
// Calls
def : Pat<(call tglobaladdr:$dst),
(CALL tglobaladdr:$dst)>;

View File

@ -46,6 +46,9 @@ static MCOperand LowerOperand(const MachineInstr *MI, const MachineOperand &MO,
break;
return MCOperand::createReg(MO.getReg());
case MachineOperand::MO_ExternalSymbol:
return LowerSymbolOperand(
MI, MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()), AP);
case MachineOperand::MO_GlobalAddress:
return LowerSymbolOperand(MI, MO, AP.getSymbol(MO.getGlobal()), AP);
case MachineOperand::MO_Immediate:

View File

@ -1,5 +1,113 @@
; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s
declare i16 @llvm.convert.to.fp16.f32(float %a)
declare i16 @llvm.convert.to.fp16.f64(double %a)
declare float @llvm.convert.from.fp16.f32(i16 %a)
declare double @llvm.convert.from.fp16.f64(i16 %a)
define float @func_i16fp32(i16* %a) {
; CHECK-LABEL: func_i16fp32:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: ld2b.zx %s0, (,%s0)
; CHECK-NEXT: lea %s1, __gnu_h2f_ieee@lo
; CHECK-NEXT: and %s1, %s1, (32)0
; CHECK-NEXT: lea.sl %s12, __gnu_h2f_ieee@hi(%s1)
; CHECK-NEXT: bsic %lr, (,%s12)
; CHECK-NEXT: or %s11, 0, %s9
%a.val = load i16, i16* %a, align 4
%a.asd = call float @llvm.convert.from.fp16.f32(i16 %a.val)
ret float %a.asd
}
define double @func_i16fp64(i16* %a) {
; CHECK-LABEL: func_i16fp64:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: ld2b.zx %s0, (,%s0)
; CHECK-NEXT: lea %s1, __gnu_h2f_ieee@lo
; CHECK-NEXT: and %s1, %s1, (32)0
; CHECK-NEXT: lea.sl %s12, __gnu_h2f_ieee@hi(%s1)
; CHECK-NEXT: bsic %lr, (,%s12)
; CHECK-NEXT: cvt.d.s %s0, %s0
; CHECK-NEXT: or %s11, 0, %s9
%a.val = load i16, i16* %a, align 4
%a.asd = call double @llvm.convert.from.fp16.f64(i16 %a.val)
ret double %a.asd
}
define float @func_fp16fp32(half* %a) {
; CHECK-LABEL: func_fp16fp32:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: ld2b.zx %s0, (,%s0)
; CHECK-NEXT: lea %s1, __gnu_h2f_ieee@lo
; CHECK-NEXT: and %s1, %s1, (32)0
; CHECK-NEXT: lea.sl %s12, __gnu_h2f_ieee@hi(%s1)
; CHECK-NEXT: bsic %lr, (,%s12)
; CHECK-NEXT: or %s11, 0, %s9
%a.val = load half, half* %a, align 4
%a.asd = fpext half %a.val to float
ret float %a.asd
}
define double @func_fp16fp64(half* %a) {
; CHECK-LABEL: func_fp16fp64:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: ld2b.zx %s0, (,%s0)
; CHECK-NEXT: lea %s1, __gnu_h2f_ieee@lo
; CHECK-NEXT: and %s1, %s1, (32)0
; CHECK-NEXT: lea.sl %s12, __gnu_h2f_ieee@hi(%s1)
; CHECK-NEXT: bsic %lr, (,%s12)
; CHECK-NEXT: cvt.d.s %s0, %s0
; CHECK-NEXT: or %s11, 0, %s9
%a.val = load half, half* %a, align 4
%a.asd = fpext half %a.val to double
ret double %a.asd
}
define void @func_fp32i16(i16* %fl.ptr, float %val) {
; CHECK-LABEL: func_fp32i16:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: st %s18, 48(,%s9) # 8-byte Folded Spill
; CHECK-NEXT: or %s18, 0, %s0
; CHECK-NEXT: lea %s0, __gnu_f2h_ieee@lo
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lea.sl %s12, __gnu_f2h_ieee@hi(%s0)
; CHECK-NEXT: or %s0, 0, %s1
; CHECK-NEXT: bsic %lr, (,%s12)
; CHECK-NEXT: st2b %s0, (,%s18)
; CHECK-NEXT: ld %s18, 48(,%s9) # 8-byte Folded Reload
; CHECK-NEXT: or %s11, 0, %s9
%val.asf = call i16 @llvm.convert.to.fp16.f32(float %val)
store i16 %val.asf, i16* %fl.ptr
ret void
}
define half @func_fp32fp16(half* %fl.ptr, float %a) {
; CHECK-LABEL: func_fp32fp16:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: st %s18, 48(,%s9) # 8-byte Folded Spill
; CHECK-NEXT: st %s19, 56(,%s9) # 8-byte Folded Spill
; CHECK-NEXT: or %s18, 0, %s0
; CHECK-NEXT: lea %s0, __gnu_f2h_ieee@lo
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lea.sl %s12, __gnu_f2h_ieee@hi(%s0)
; CHECK-NEXT: or %s0, 0, %s1
; CHECK-NEXT: bsic %lr, (,%s12)
; CHECK-NEXT: or %s19, 0, %s0
; CHECK-NEXT: lea %s0, __gnu_h2f_ieee@lo
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lea.sl %s12, __gnu_h2f_ieee@hi(%s0)
; CHECK-NEXT: or %s0, 0, %s19
; CHECK-NEXT: bsic %lr, (,%s12)
; CHECK-NEXT: st2b %s19, (,%s18)
; CHECK-NEXT: ld %s19, 56(,%s9) # 8-byte Folded Reload
; CHECK-NEXT: ld %s18, 48(,%s9) # 8-byte Folded Reload
; CHECK-NEXT: or %s11, 0, %s9
%a.asd = fptrunc float %a to half
store half %a.asd, half* %fl.ptr
ret half %a.asd
}
define double @func_fp32fp64(float* %a) {
; CHECK-LABEL: func_fp32fp64:
; CHECK: .LBB{{[0-9]+}}_2:
@ -11,6 +119,42 @@ define double @func_fp32fp64(float* %a) {
ret double %a.asd
}
define void @func_fp64i16(i16* %fl.ptr, double %val) {
; CHECK-LABEL: func_fp64i16:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: st %s18, 48(,%s9) # 8-byte Folded Spill
; CHECK-NEXT: or %s18, 0, %s0
; CHECK-NEXT: lea %s0, __truncdfhf2@lo
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lea.sl %s12, __truncdfhf2@hi(%s0)
; CHECK-NEXT: or %s0, 0, %s1
; CHECK-NEXT: bsic %lr, (,%s12)
; CHECK-NEXT: st2b %s0, (,%s18)
; CHECK-NEXT: ld %s18, 48(,%s9) # 8-byte Folded Reload
; CHECK-NEXT: or %s11, 0, %s9
%val.asf = call i16 @llvm.convert.to.fp16.f64(double %val)
store i16 %val.asf, i16* %fl.ptr
ret void
}
define void @func_fp64fp16(half* %fl.ptr, double %val) {
; CHECK-LABEL: func_fp64fp16:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: st %s18, 48(,%s9) # 8-byte Folded Spill
; CHECK-NEXT: or %s18, 0, %s0
; CHECK-NEXT: lea %s0, __truncdfhf2@lo
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: lea.sl %s12, __truncdfhf2@hi(%s0)
; CHECK-NEXT: or %s0, 0, %s1
; CHECK-NEXT: bsic %lr, (,%s12)
; CHECK-NEXT: st2b %s0, (,%s18)
; CHECK-NEXT: ld %s18, 48(,%s9) # 8-byte Folded Reload
; CHECK-NEXT: or %s11, 0, %s9
%val.asf = fptrunc double %val to half
store half %val.asf, half* %fl.ptr
ret void
}
define void @func_fp64fp32(float* %fl.ptr, double %val) {
; CHECK-LABEL: func_fp64fp32:
; CHECK: .LBB{{[0-9]+}}_2: