forked from OSchip/llvm-project
[VE] half fptrunc+store&load+fpext
Summary: fp16 (half) load+fpext and fptrunc+store isel legalization and tests. Also, ExternalSymbolSDNode operand printing (tested by fp16 lowering). Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D73899
This commit is contained in:
parent
e943329ba0
commit
3ed12232b0
|
@ -523,10 +523,14 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM,
|
|||
addRegisterClass(MVT::f64, &VE::I64RegClass);
|
||||
|
||||
/// Load & Store {
|
||||
// Turn FP extload into load/fpextend
|
||||
for (MVT VT : MVT::fp_valuetypes()) {
|
||||
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
|
||||
for (MVT FPVT : MVT::fp_valuetypes()) {
|
||||
for (MVT OtherFPVT : MVT::fp_valuetypes()) {
|
||||
// Turn FP extload into load/fpextend
|
||||
setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand);
|
||||
|
||||
// Turn FP truncstore into trunc + store.
|
||||
setTruncStoreAction(FPVT, OtherFPVT, Expand);
|
||||
}
|
||||
}
|
||||
|
||||
// VE doesn't have i1 sign extending load
|
||||
|
@ -536,9 +540,6 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM,
|
|||
setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
|
||||
setTruncStoreAction(VT, MVT::i1, Expand);
|
||||
}
|
||||
|
||||
// Turn FP truncstore into trunc + store.
|
||||
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
|
||||
/// } Load & Store
|
||||
|
||||
// Custom legalize address nodes into LO/HI parts.
|
||||
|
@ -563,12 +564,20 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::UDIVREM, IntVT, Expand);
|
||||
}
|
||||
|
||||
/// Conversion {
|
||||
// VE doesn't have instructions for fp<->uint, so expand them by llvm
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
|
||||
|
||||
// fp16 not supported
|
||||
for (MVT FPVT : MVT::fp_valuetypes()) {
|
||||
setOperationAction(ISD::FP16_TO_FP, FPVT, Expand);
|
||||
setOperationAction(ISD::FP_TO_FP16, FPVT, Expand);
|
||||
}
|
||||
/// } Conversion
|
||||
|
||||
setStackPointerRegisterToSaveRestore(VE::SX11);
|
||||
|
||||
// Set function alignment to 16 bytes
|
||||
|
@ -612,6 +621,10 @@ SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
|
|||
return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(),
|
||||
0, TF);
|
||||
|
||||
if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
|
||||
return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
|
||||
TF);
|
||||
|
||||
llvm_unreachable("Unhandled address SDNode");
|
||||
}
|
||||
|
||||
|
|
|
@ -1129,6 +1129,13 @@ def : Pat<(add (VEhi tglobaladdr:$in1), (VElo tglobaladdr:$in2)),
|
|||
(LEASLrzi (ANDrm0 (LEAzzi tglobaladdr:$in2), 32),
|
||||
(tglobaladdr:$in1))>;
|
||||
|
||||
// Address calculation and its optimization
|
||||
def : Pat<(VEhi texternalsym:$in), (LEASLzzi texternalsym:$in)>;
|
||||
def : Pat<(VElo texternalsym:$in), (ANDrm0 (LEAzzi texternalsym:$in), 32)>;
|
||||
def : Pat<(add (VEhi texternalsym:$in1), (VElo texternalsym:$in2)),
|
||||
(LEASLrzi (ANDrm0 (LEAzzi texternalsym:$in2), 32),
|
||||
(texternalsym:$in1))>;
|
||||
|
||||
// Calls
|
||||
def : Pat<(call tglobaladdr:$dst),
|
||||
(CALL tglobaladdr:$dst)>;
|
||||
|
|
|
@ -46,6 +46,9 @@ static MCOperand LowerOperand(const MachineInstr *MI, const MachineOperand &MO,
|
|||
break;
|
||||
return MCOperand::createReg(MO.getReg());
|
||||
|
||||
case MachineOperand::MO_ExternalSymbol:
|
||||
return LowerSymbolOperand(
|
||||
MI, MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()), AP);
|
||||
case MachineOperand::MO_GlobalAddress:
|
||||
return LowerSymbolOperand(MI, MO, AP.getSymbol(MO.getGlobal()), AP);
|
||||
case MachineOperand::MO_Immediate:
|
||||
|
|
|
@ -1,5 +1,113 @@
|
|||
; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s
|
||||
|
||||
declare i16 @llvm.convert.to.fp16.f32(float %a)
|
||||
declare i16 @llvm.convert.to.fp16.f64(double %a)
|
||||
|
||||
declare float @llvm.convert.from.fp16.f32(i16 %a)
|
||||
declare double @llvm.convert.from.fp16.f64(i16 %a)
|
||||
|
||||
define float @func_i16fp32(i16* %a) {
|
||||
; CHECK-LABEL: func_i16fp32:
|
||||
; CHECK: .LBB{{[0-9]+}}_2:
|
||||
; CHECK-NEXT: ld2b.zx %s0, (,%s0)
|
||||
; CHECK-NEXT: lea %s1, __gnu_h2f_ieee@lo
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lea.sl %s12, __gnu_h2f_ieee@hi(%s1)
|
||||
; CHECK-NEXT: bsic %lr, (,%s12)
|
||||
; CHECK-NEXT: or %s11, 0, %s9
|
||||
%a.val = load i16, i16* %a, align 4
|
||||
%a.asd = call float @llvm.convert.from.fp16.f32(i16 %a.val)
|
||||
ret float %a.asd
|
||||
}
|
||||
|
||||
define double @func_i16fp64(i16* %a) {
|
||||
; CHECK-LABEL: func_i16fp64:
|
||||
; CHECK: .LBB{{[0-9]+}}_2:
|
||||
; CHECK-NEXT: ld2b.zx %s0, (,%s0)
|
||||
; CHECK-NEXT: lea %s1, __gnu_h2f_ieee@lo
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lea.sl %s12, __gnu_h2f_ieee@hi(%s1)
|
||||
; CHECK-NEXT: bsic %lr, (,%s12)
|
||||
; CHECK-NEXT: cvt.d.s %s0, %s0
|
||||
; CHECK-NEXT: or %s11, 0, %s9
|
||||
%a.val = load i16, i16* %a, align 4
|
||||
%a.asd = call double @llvm.convert.from.fp16.f64(i16 %a.val)
|
||||
ret double %a.asd
|
||||
}
|
||||
|
||||
define float @func_fp16fp32(half* %a) {
|
||||
; CHECK-LABEL: func_fp16fp32:
|
||||
; CHECK: .LBB{{[0-9]+}}_2:
|
||||
; CHECK-NEXT: ld2b.zx %s0, (,%s0)
|
||||
; CHECK-NEXT: lea %s1, __gnu_h2f_ieee@lo
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lea.sl %s12, __gnu_h2f_ieee@hi(%s1)
|
||||
; CHECK-NEXT: bsic %lr, (,%s12)
|
||||
; CHECK-NEXT: or %s11, 0, %s9
|
||||
%a.val = load half, half* %a, align 4
|
||||
%a.asd = fpext half %a.val to float
|
||||
ret float %a.asd
|
||||
}
|
||||
|
||||
define double @func_fp16fp64(half* %a) {
|
||||
; CHECK-LABEL: func_fp16fp64:
|
||||
; CHECK: .LBB{{[0-9]+}}_2:
|
||||
; CHECK-NEXT: ld2b.zx %s0, (,%s0)
|
||||
; CHECK-NEXT: lea %s1, __gnu_h2f_ieee@lo
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lea.sl %s12, __gnu_h2f_ieee@hi(%s1)
|
||||
; CHECK-NEXT: bsic %lr, (,%s12)
|
||||
; CHECK-NEXT: cvt.d.s %s0, %s0
|
||||
; CHECK-NEXT: or %s11, 0, %s9
|
||||
%a.val = load half, half* %a, align 4
|
||||
%a.asd = fpext half %a.val to double
|
||||
ret double %a.asd
|
||||
}
|
||||
|
||||
define void @func_fp32i16(i16* %fl.ptr, float %val) {
|
||||
; CHECK-LABEL: func_fp32i16:
|
||||
; CHECK: .LBB{{[0-9]+}}_2:
|
||||
; CHECK-NEXT: st %s18, 48(,%s9) # 8-byte Folded Spill
|
||||
; CHECK-NEXT: or %s18, 0, %s0
|
||||
; CHECK-NEXT: lea %s0, __gnu_f2h_ieee@lo
|
||||
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||
; CHECK-NEXT: lea.sl %s12, __gnu_f2h_ieee@hi(%s0)
|
||||
; CHECK-NEXT: or %s0, 0, %s1
|
||||
; CHECK-NEXT: bsic %lr, (,%s12)
|
||||
; CHECK-NEXT: st2b %s0, (,%s18)
|
||||
; CHECK-NEXT: ld %s18, 48(,%s9) # 8-byte Folded Reload
|
||||
; CHECK-NEXT: or %s11, 0, %s9
|
||||
%val.asf = call i16 @llvm.convert.to.fp16.f32(float %val)
|
||||
store i16 %val.asf, i16* %fl.ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define half @func_fp32fp16(half* %fl.ptr, float %a) {
|
||||
; CHECK-LABEL: func_fp32fp16:
|
||||
; CHECK: .LBB{{[0-9]+}}_2:
|
||||
; CHECK-NEXT: st %s18, 48(,%s9) # 8-byte Folded Spill
|
||||
; CHECK-NEXT: st %s19, 56(,%s9) # 8-byte Folded Spill
|
||||
; CHECK-NEXT: or %s18, 0, %s0
|
||||
; CHECK-NEXT: lea %s0, __gnu_f2h_ieee@lo
|
||||
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||
; CHECK-NEXT: lea.sl %s12, __gnu_f2h_ieee@hi(%s0)
|
||||
; CHECK-NEXT: or %s0, 0, %s1
|
||||
; CHECK-NEXT: bsic %lr, (,%s12)
|
||||
; CHECK-NEXT: or %s19, 0, %s0
|
||||
; CHECK-NEXT: lea %s0, __gnu_h2f_ieee@lo
|
||||
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||
; CHECK-NEXT: lea.sl %s12, __gnu_h2f_ieee@hi(%s0)
|
||||
; CHECK-NEXT: or %s0, 0, %s19
|
||||
; CHECK-NEXT: bsic %lr, (,%s12)
|
||||
; CHECK-NEXT: st2b %s19, (,%s18)
|
||||
; CHECK-NEXT: ld %s19, 56(,%s9) # 8-byte Folded Reload
|
||||
; CHECK-NEXT: ld %s18, 48(,%s9) # 8-byte Folded Reload
|
||||
; CHECK-NEXT: or %s11, 0, %s9
|
||||
%a.asd = fptrunc float %a to half
|
||||
store half %a.asd, half* %fl.ptr
|
||||
ret half %a.asd
|
||||
}
|
||||
|
||||
define double @func_fp32fp64(float* %a) {
|
||||
; CHECK-LABEL: func_fp32fp64:
|
||||
; CHECK: .LBB{{[0-9]+}}_2:
|
||||
|
@ -11,6 +119,42 @@ define double @func_fp32fp64(float* %a) {
|
|||
ret double %a.asd
|
||||
}
|
||||
|
||||
define void @func_fp64i16(i16* %fl.ptr, double %val) {
|
||||
; CHECK-LABEL: func_fp64i16:
|
||||
; CHECK: .LBB{{[0-9]+}}_2:
|
||||
; CHECK-NEXT: st %s18, 48(,%s9) # 8-byte Folded Spill
|
||||
; CHECK-NEXT: or %s18, 0, %s0
|
||||
; CHECK-NEXT: lea %s0, __truncdfhf2@lo
|
||||
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||
; CHECK-NEXT: lea.sl %s12, __truncdfhf2@hi(%s0)
|
||||
; CHECK-NEXT: or %s0, 0, %s1
|
||||
; CHECK-NEXT: bsic %lr, (,%s12)
|
||||
; CHECK-NEXT: st2b %s0, (,%s18)
|
||||
; CHECK-NEXT: ld %s18, 48(,%s9) # 8-byte Folded Reload
|
||||
; CHECK-NEXT: or %s11, 0, %s9
|
||||
%val.asf = call i16 @llvm.convert.to.fp16.f64(double %val)
|
||||
store i16 %val.asf, i16* %fl.ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @func_fp64fp16(half* %fl.ptr, double %val) {
|
||||
; CHECK-LABEL: func_fp64fp16:
|
||||
; CHECK: .LBB{{[0-9]+}}_2:
|
||||
; CHECK-NEXT: st %s18, 48(,%s9) # 8-byte Folded Spill
|
||||
; CHECK-NEXT: or %s18, 0, %s0
|
||||
; CHECK-NEXT: lea %s0, __truncdfhf2@lo
|
||||
; CHECK-NEXT: and %s0, %s0, (32)0
|
||||
; CHECK-NEXT: lea.sl %s12, __truncdfhf2@hi(%s0)
|
||||
; CHECK-NEXT: or %s0, 0, %s1
|
||||
; CHECK-NEXT: bsic %lr, (,%s12)
|
||||
; CHECK-NEXT: st2b %s0, (,%s18)
|
||||
; CHECK-NEXT: ld %s18, 48(,%s9) # 8-byte Folded Reload
|
||||
; CHECK-NEXT: or %s11, 0, %s9
|
||||
%val.asf = fptrunc double %val to half
|
||||
store half %val.asf, half* %fl.ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @func_fp64fp32(float* %fl.ptr, double %val) {
|
||||
; CHECK-LABEL: func_fp64fp32:
|
||||
; CHECK: .LBB{{[0-9]+}}_2:
|
||||
|
|
Loading…
Reference in New Issue