2012-02-18 20:03:15 +08:00
|
|
|
//===-- ARMInstrVFP.td - VFP support for ARM ---------------*- tablegen -*-===//
|
2007-01-19 15:51:42 +08:00
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2007-01-19 15:51:42 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
2008-09-12 05:41:29 +08:00
|
|
|
// This file describes the ARM VFP instruction set.
|
2007-01-19 15:51:42 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2019-10-08 16:25:42 +08:00
|
|
|
def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
|
2010-10-16 05:50:45 +08:00
|
|
|
def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
|
|
|
|
SDTCisSameAs<1, 2>]>;
|
2017-03-15 02:43:37 +08:00
|
|
|
def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
|
|
|
|
SDTCisVT<2, f64>]>;
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2018-03-23 21:02:03 +08:00
|
|
|
def SDT_VMOVSR : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i32>]>;
|
|
|
|
|
2010-12-24 02:28:41 +08:00
|
|
|
def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>;
|
2019-10-08 16:25:42 +08:00
|
|
|
def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>;
|
2010-12-24 02:28:41 +08:00
|
|
|
def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
|
2020-01-22 22:04:12 +08:00
|
|
|
def arm_cmpfpe : SDNode<"ARMISD::CMPFPE", SDT_ARMCmp, [SDNPOutGlue]>;
|
|
|
|
def arm_cmpfpe0: SDNode<"ARMISD::CMPFPEw0",SDT_CMPFP0, [SDNPOutGlue]>;
|
2010-10-16 05:50:45 +08:00
|
|
|
def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
|
2017-03-15 02:43:37 +08:00
|
|
|
def arm_fmrrd : SDNode<"ARMISD::VMOVRRD", SDT_VMOVRRD>;
|
2018-03-23 21:02:03 +08:00
|
|
|
def arm_vmovsr : SDNode<"ARMISD::VMOVSR", SDT_VMOVSR>;
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2018-01-31 18:18:29 +08:00
|
|
|
def SDT_VMOVhr : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, i32>] >;
|
|
|
|
def SDT_VMOVrh : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisFP<1>] >;
|
|
|
|
def arm_vmovhr : SDNode<"ARMISD::VMOVhr", SDT_VMOVhr>;
|
|
|
|
def arm_vmovrh : SDNode<"ARMISD::VMOVrh", SDT_VMOVrh>;
|
|
|
|
|
2009-10-28 09:44:26 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Operand Definitions.
|
|
|
|
//
|
|
|
|
|
2011-10-04 07:38:36 +08:00
|
|
|
// 8-bit floating-point immediate encodings.
|
|
|
|
def FPImmOperand : AsmOperandClass {
|
|
|
|
let Name = "FPImm";
|
|
|
|
let ParserMethod = "parseFPImm";
|
|
|
|
}
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def vfp_f16imm : Operand<f16>,
|
|
|
|
PatLeaf<(f16 fpimm), [{
|
|
|
|
return ARM_AM::getFP16Imm(N->getValueAPF()) != -1;
|
|
|
|
}], SDNodeXForm<fpimm, [{
|
|
|
|
APFloat InVal = N->getValueAPF();
|
|
|
|
uint32_t enc = ARM_AM::getFP16Imm(InVal);
|
2018-02-07 16:37:17 +08:00
|
|
|
return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
|
2016-01-25 18:26:26 +08:00
|
|
|
}]>> {
|
|
|
|
let PrintMethod = "printFPImmOperand";
|
|
|
|
let ParserMatchClass = FPImmOperand;
|
|
|
|
}
|
|
|
|
|
2019-04-10 17:14:32 +08:00
|
|
|
def vfp_f32imm_xform : SDNodeXForm<fpimm, [{
|
2011-09-30 08:50:06 +08:00
|
|
|
APFloat InVal = N->getValueAPF();
|
|
|
|
uint32_t enc = ARM_AM::getFP32Imm(InVal);
|
2015-04-28 22:05:47 +08:00
|
|
|
return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
|
2019-04-10 17:14:32 +08:00
|
|
|
}]>;
|
|
|
|
|
|
|
|
def gi_vfp_f32imm : GICustomOperandRenderer<"renderVFPF32Imm">,
|
|
|
|
GISDNodeXFormEquiv<vfp_f32imm_xform>;
|
|
|
|
|
|
|
|
def vfp_f32imm : Operand<f32>,
|
|
|
|
PatLeaf<(f32 fpimm), [{
|
|
|
|
return ARM_AM::getFP32Imm(N->getValueAPF()) != -1;
|
|
|
|
}], vfp_f32imm_xform> {
|
2011-09-30 08:50:06 +08:00
|
|
|
let PrintMethod = "printFPImmOperand";
|
2011-10-04 07:38:36 +08:00
|
|
|
let ParserMatchClass = FPImmOperand;
|
2019-04-10 17:14:32 +08:00
|
|
|
let GISelPredicateCode = [{
|
|
|
|
const auto &MO = MI.getOperand(1);
|
|
|
|
if (!MO.isFPImm())
|
|
|
|
return false;
|
|
|
|
return ARM_AM::getFP32Imm(MO.getFPImm()->getValueAPF()) != -1;
|
|
|
|
}];
|
2009-10-28 09:44:26 +08:00
|
|
|
}
|
|
|
|
|
2019-04-10 17:14:32 +08:00
|
|
|
def vfp_f64imm_xform : SDNodeXForm<fpimm, [{
|
2011-09-30 08:50:06 +08:00
|
|
|
APFloat InVal = N->getValueAPF();
|
|
|
|
uint32_t enc = ARM_AM::getFP64Imm(InVal);
|
2015-04-28 22:05:47 +08:00
|
|
|
return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
|
2019-04-10 17:14:32 +08:00
|
|
|
}]>;
|
|
|
|
|
|
|
|
def gi_vfp_f64imm : GICustomOperandRenderer<"renderVFPF64Imm">,
|
|
|
|
GISDNodeXFormEquiv<vfp_f64imm_xform>;
|
|
|
|
|
|
|
|
def vfp_f64imm : Operand<f64>,
|
|
|
|
PatLeaf<(f64 fpimm), [{
|
|
|
|
return ARM_AM::getFP64Imm(N->getValueAPF()) != -1;
|
|
|
|
}], vfp_f64imm_xform> {
|
2011-09-30 08:50:06 +08:00
|
|
|
let PrintMethod = "printFPImmOperand";
|
2011-10-04 07:38:36 +08:00
|
|
|
let ParserMatchClass = FPImmOperand;
|
2019-04-10 17:14:32 +08:00
|
|
|
let GISelPredicateCode = [{
|
|
|
|
const auto &MO = MI.getOperand(1);
|
|
|
|
if (!MO.isFPImm())
|
|
|
|
return false;
|
|
|
|
return ARM_AM::getFP64Imm(MO.getFPImm()->getValueAPF()) != -1;
|
|
|
|
}];
|
2009-10-28 09:44:26 +08:00
|
|
|
}
|
|
|
|
|
[ARM] Armv8.2-A FP16 code generation (part 1/3)
This is the groundwork for Armv8.2-A FP16 code generation .
Clang passes and returns _Float16 values as floats, together with the required
bitconverts and truncs etc. to implement correct AAPCS behaviour, see D42318.
We will implement half-precision argument passing/returning lowering in the ARM
backend soon, but for now this means that this:
_Float16 sub(_Float16 a, _Float16 b) {
return a + b;
}
gets lowered to this:
define float @sub(float %a.coerce, float %b.coerce) {
entry:
%0 = bitcast float %a.coerce to i32
%tmp.0.extract.trunc = trunc i32 %0 to i16
%1 = bitcast i16 %tmp.0.extract.trunc to half
<SNIP>
%add = fadd half %1, %3
<SNIP>
}
When FullFP16 is *not* supported, we don't make f16 a legal type, and we get
legalization for "free", i.e. nothing changes and everything works as before.
And also f16 argument passing/returning is handled.
When FullFP16 is supported, we do make f16 a legal type, and have 2 places that
we need to patch up: f16 argument passing and returning, which involves minor
tweaks to avoid unnecessary code generation for some bitcasts.
As a "demonstrator" that this works for the different FP16, FullFP16, softfp
modes, etc., I've added match rules to the VSUB instruction description showing
that we can codegen this instruction from IR, but more importantly, also to
some conversion instructions. These conversions were causing issue before in
the FP16 and FullFP16 cases.
I've also added match rules to the VLDRH and VSTRH desriptions, so that we can
actually compile the entire half-precision sub code example above. This showed
that these loads and stores had the wrong addressing mode specified: AddrMode5
instead of AddrMode5FP16, which turned out not be implemented at all, so that
has also been added.
This is the minimal patch that shows all the different moving parts. In patch
2/3 I will add some efficient lowering of bitcasts, and in 2/3 I will add the
remaining Armv8.2-A FP16 instruction descriptions.
Thanks to Sam Parker and Oliver Stannard for their help and reviews!
Differential Revision: https://reviews.llvm.org/D38315
llvm-svn: 323512
2018-01-26 17:26:40 +08:00
|
|
|
def alignedload16 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
|
|
|
return cast<LoadSDNode>(N)->getAlignment() >= 2;
|
|
|
|
}]>;
|
|
|
|
|
2012-08-16 01:44:53 +08:00
|
|
|
def alignedload32 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
|
|
|
return cast<LoadSDNode>(N)->getAlignment() >= 4;
|
|
|
|
}]>;
|
|
|
|
|
[ARM] Armv8.2-A FP16 code generation (part 1/3)
This is the groundwork for Armv8.2-A FP16 code generation .
Clang passes and returns _Float16 values as floats, together with the required
bitconverts and truncs etc. to implement correct AAPCS behaviour, see D42318.
We will implement half-precision argument passing/returning lowering in the ARM
backend soon, but for now this means that this:
_Float16 sub(_Float16 a, _Float16 b) {
return a + b;
}
gets lowered to this:
define float @sub(float %a.coerce, float %b.coerce) {
entry:
%0 = bitcast float %a.coerce to i32
%tmp.0.extract.trunc = trunc i32 %0 to i16
%1 = bitcast i16 %tmp.0.extract.trunc to half
<SNIP>
%add = fadd half %1, %3
<SNIP>
}
When FullFP16 is *not* supported, we don't make f16 a legal type, and we get
legalization for "free", i.e. nothing changes and everything works as before.
And also f16 argument passing/returning is handled.
When FullFP16 is supported, we do make f16 a legal type, and have 2 places that
we need to patch up: f16 argument passing and returning, which involves minor
tweaks to avoid unnecessary code generation for some bitcasts.
As a "demonstrator" that this works for the different FP16, FullFP16, softfp
modes, etc., I've added match rules to the VSUB instruction description showing
that we can codegen this instruction from IR, but more importantly, also to
some conversion instructions. These conversions were causing issue before in
the FP16 and FullFP16 cases.
I've also added match rules to the VLDRH and VSTRH desriptions, so that we can
actually compile the entire half-precision sub code example above. This showed
that these loads and stores had the wrong addressing mode specified: AddrMode5
instead of AddrMode5FP16, which turned out not be implemented at all, so that
has also been added.
This is the minimal patch that shows all the different moving parts. In patch
2/3 I will add some efficient lowering of bitcasts, and in 2/3 I will add the
remaining Armv8.2-A FP16 instruction descriptions.
Thanks to Sam Parker and Oliver Stannard for their help and reviews!
Differential Revision: https://reviews.llvm.org/D38315
llvm-svn: 323512
2018-01-26 17:26:40 +08:00
|
|
|
def alignedstore16 : PatFrag<(ops node:$val, node:$ptr),
|
|
|
|
(store node:$val, node:$ptr), [{
|
|
|
|
return cast<StoreSDNode>(N)->getAlignment() >= 2;
|
|
|
|
}]>;
|
|
|
|
|
2012-08-16 01:44:53 +08:00
|
|
|
def alignedstore32 : PatFrag<(ops node:$val, node:$ptr),
|
|
|
|
(store node:$val, node:$ptr), [{
|
|
|
|
return cast<StoreSDNode>(N)->getAlignment() >= 4;
|
|
|
|
}]>;
|
|
|
|
|
2011-12-23 06:19:05 +08:00
|
|
|
// The VCVT to/from fixed-point instructions encode the 'fbits' operand
|
|
|
|
// (the number of fixed bits) differently than it appears in the assembly
|
|
|
|
// source. It's encoded as "Size - fbits" where Size is the size of the
|
|
|
|
// fixed-point representation (32 or 16) and fbits is the value appearing
|
|
|
|
// in the assembly source, an integer in [0,16] or (0,32], depending on size.
|
|
|
|
def fbits32_asm_operand : AsmOperandClass { let Name = "FBits32"; }
|
|
|
|
def fbits32 : Operand<i32> {
|
|
|
|
let PrintMethod = "printFBits32";
|
|
|
|
let ParserMatchClass = fbits32_asm_operand;
|
|
|
|
}
|
|
|
|
|
|
|
|
def fbits16_asm_operand : AsmOperandClass { let Name = "FBits16"; }
|
|
|
|
def fbits16 : Operand<i32> {
|
|
|
|
let PrintMethod = "printFBits16";
|
|
|
|
let ParserMatchClass = fbits16_asm_operand;
|
|
|
|
}
|
2009-10-28 09:44:26 +08:00
|
|
|
|
2007-01-19 15:51:42 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Load / store Instructions.
|
|
|
|
//
|
|
|
|
|
2010-02-28 07:47:46 +08:00
|
|
|
let canFoldAsLoad = 1, isReMaterializable = 1 in {
|
2010-11-03 09:49:29 +08:00
|
|
|
|
2010-10-21 06:44:54 +08:00
|
|
|
def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
|
2011-11-15 07:03:21 +08:00
|
|
|
IIC_fpLoad64, "vldr", "\t$Dd, $addr",
|
2019-05-30 20:37:05 +08:00
|
|
|
[(set DPR:$Dd, (f64 (alignedload32 addrmode5:$addr)))]>,
|
|
|
|
Requires<[HasFPRegs]>;
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2010-11-03 09:49:29 +08:00
|
|
|
def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
|
2011-11-15 07:03:21 +08:00
|
|
|
IIC_fpLoad32, "vldr", "\t$Sd, $addr",
|
2019-05-30 20:37:05 +08:00
|
|
|
[(set SPR:$Sd, (alignedload32 addrmode5:$addr))]>,
|
|
|
|
Requires<[HasFPRegs]> {
|
2011-02-16 08:35:02 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and VFP
|
|
|
|
// pipelines.
|
|
|
|
let D = VFPNeonDomain;
|
|
|
|
}
|
2010-11-03 09:49:29 +08:00
|
|
|
|
[ARM] Make fullfp16 instructions not conditionalisable.
More or less all the instructions defined in the v8.2a full-fp16
extension are defined as UNPREDICTABLE if you put them in an IT block
(Thumb) or use with any condition other than AL (ARM). LLVM didn't
know that, and was happy to conditionalise them.
In order to force these instructions to count as not predicable, I had
to make a small Tablegen change. The code generation back end mostly
decides if an instruction was predicable by looking for something it
can identify as a predicate operand; there's an isPredicable bit flag
that overrides that check in the positive direction, but nothing that
overrides it in the negative direction.
(I considered the alternative approach of actually removing the
predicate operand from those instructions, but thought that it would
be more painful overall for instructions differing only in data type
to have different shapes of operand list. This way, the only code that
has to notice the difference is the if-converter.)
So I've added an isUnpredicable bit alongside isPredicable, and set
that bit on the right subset of FP16 instructions, and also on the
VSEL, VMAXNM/VMINNM and VRINT[ANPM] families which should be
unpredicable for all data types.
I've included a couple of representative regression tests, both of
which previously caused an fp16 instruction to be conditionalised in
ARM state and (with -arm-no-restrict-it) to be put in an IT block in
Thumb.
Reviewers: SjoerdMeijer, t.p.northover, efriedma
Reviewed By: efriedma
Subscribers: jdoerfert, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57823
llvm-svn: 354768
2019-02-25 18:39:53 +08:00
|
|
|
let isUnpredicable = 1 in
|
[ARM] Armv8.2-A FP16 code generation (part 1/3)
This is the groundwork for Armv8.2-A FP16 code generation .
Clang passes and returns _Float16 values as floats, together with the required
bitconverts and truncs etc. to implement correct AAPCS behaviour, see D42318.
We will implement half-precision argument passing/returning lowering in the ARM
backend soon, but for now this means that this:
_Float16 sub(_Float16 a, _Float16 b) {
return a + b;
}
gets lowered to this:
define float @sub(float %a.coerce, float %b.coerce) {
entry:
%0 = bitcast float %a.coerce to i32
%tmp.0.extract.trunc = trunc i32 %0 to i16
%1 = bitcast i16 %tmp.0.extract.trunc to half
<SNIP>
%add = fadd half %1, %3
<SNIP>
}
When FullFP16 is *not* supported, we don't make f16 a legal type, and we get
legalization for "free", i.e. nothing changes and everything works as before.
And also f16 argument passing/returning is handled.
When FullFP16 is supported, we do make f16 a legal type, and have 2 places that
we need to patch up: f16 argument passing and returning, which involves minor
tweaks to avoid unnecessary code generation for some bitcasts.
As a "demonstrator" that this works for the different FP16, FullFP16, softfp
modes, etc., I've added match rules to the VSUB instruction description showing
that we can codegen this instruction from IR, but more importantly, also to
some conversion instructions. These conversions were causing issue before in
the FP16 and FullFP16 cases.
I've also added match rules to the VLDRH and VSTRH desriptions, so that we can
actually compile the entire half-precision sub code example above. This showed
that these loads and stores had the wrong addressing mode specified: AddrMode5
instead of AddrMode5FP16, which turned out not be implemented at all, so that
has also been added.
This is the minimal patch that shows all the different moving parts. In patch
2/3 I will add some efficient lowering of bitcasts, and in 2/3 I will add the
remaining Armv8.2-A FP16 instruction descriptions.
Thanks to Sam Parker and Oliver Stannard for their help and reviews!
Differential Revision: https://reviews.llvm.org/D38315
llvm-svn: 323512
2018-01-26 17:26:40 +08:00
|
|
|
def VLDRH : AHI5<0b1101, 0b01, (outs HPR:$Sd), (ins addrmode5fp16:$addr),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpLoad16, "vldr", ".16\t$Sd, $addr",
|
[ARM] Armv8.2-A FP16 code generation (part 1/3)
This is the groundwork for Armv8.2-A FP16 code generation .
Clang passes and returns _Float16 values as floats, together with the required
bitconverts and truncs etc. to implement correct AAPCS behaviour, see D42318.
We will implement half-precision argument passing/returning lowering in the ARM
backend soon, but for now this means that this:
_Float16 sub(_Float16 a, _Float16 b) {
return a + b;
}
gets lowered to this:
define float @sub(float %a.coerce, float %b.coerce) {
entry:
%0 = bitcast float %a.coerce to i32
%tmp.0.extract.trunc = trunc i32 %0 to i16
%1 = bitcast i16 %tmp.0.extract.trunc to half
<SNIP>
%add = fadd half %1, %3
<SNIP>
}
When FullFP16 is *not* supported, we don't make f16 a legal type, and we get
legalization for "free", i.e. nothing changes and everything works as before.
And also f16 argument passing/returning is handled.
When FullFP16 is supported, we do make f16 a legal type, and have 2 places that
we need to patch up: f16 argument passing and returning, which involves minor
tweaks to avoid unnecessary code generation for some bitcasts.
As a "demonstrator" that this works for the different FP16, FullFP16, softfp
modes, etc., I've added match rules to the VSUB instruction description showing
that we can codegen this instruction from IR, but more importantly, also to
some conversion instructions. These conversions were causing issue before in
the FP16 and FullFP16 cases.
I've also added match rules to the VLDRH and VSTRH desriptions, so that we can
actually compile the entire half-precision sub code example above. This showed
that these loads and stores had the wrong addressing mode specified: AddrMode5
instead of AddrMode5FP16, which turned out not be implemented at all, so that
has also been added.
This is the minimal patch that shows all the different moving parts. In patch
2/3 I will add some efficient lowering of bitcasts, and in 2/3 I will add the
remaining Armv8.2-A FP16 instruction descriptions.
Thanks to Sam Parker and Oliver Stannard for their help and reviews!
Differential Revision: https://reviews.llvm.org/D38315
llvm-svn: 323512
2018-01-26 17:26:40 +08:00
|
|
|
[(set HPR:$Sd, (alignedload16 addrmode5fp16:$addr))]>,
|
2019-05-30 20:37:05 +08:00
|
|
|
Requires<[HasFPRegs16]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2010-11-03 09:49:29 +08:00
|
|
|
} // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in'
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2010-11-04 08:59:42 +08:00
|
|
|
def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
|
2011-11-15 07:03:21 +08:00
|
|
|
IIC_fpStore64, "vstr", "\t$Dd, $addr",
|
2019-05-30 20:37:05 +08:00
|
|
|
[(alignedstore32 (f64 DPR:$Dd), addrmode5:$addr)]>,
|
|
|
|
Requires<[HasFPRegs]>;
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2010-11-04 08:59:42 +08:00
|
|
|
def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
|
2011-11-15 07:03:21 +08:00
|
|
|
IIC_fpStore32, "vstr", "\t$Sd, $addr",
|
2019-05-30 20:37:05 +08:00
|
|
|
[(alignedstore32 SPR:$Sd, addrmode5:$addr)]>,
|
|
|
|
Requires<[HasFPRegs]> {
|
2011-02-16 08:35:02 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and VFP
|
|
|
|
// pipelines.
|
|
|
|
let D = VFPNeonDomain;
|
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
[ARM] Make fullfp16 instructions not conditionalisable.
More or less all the instructions defined in the v8.2a full-fp16
extension are defined as UNPREDICTABLE if you put them in an IT block
(Thumb) or use with any condition other than AL (ARM). LLVM didn't
know that, and was happy to conditionalise them.
In order to force these instructions to count as not predicable, I had
to make a small Tablegen change. The code generation back end mostly
decides if an instruction was predicable by looking for something it
can identify as a predicate operand; there's an isPredicable bit flag
that overrides that check in the positive direction, but nothing that
overrides it in the negative direction.
(I considered the alternative approach of actually removing the
predicate operand from those instructions, but thought that it would
be more painful overall for instructions differing only in data type
to have different shapes of operand list. This way, the only code that
has to notice the difference is the if-converter.)
So I've added an isUnpredicable bit alongside isPredicable, and set
that bit on the right subset of FP16 instructions, and also on the
VSEL, VMAXNM/VMINNM and VRINT[ANPM] families which should be
unpredicable for all data types.
I've included a couple of representative regression tests, both of
which previously caused an fp16 instruction to be conditionalised in
ARM state and (with -arm-no-restrict-it) to be put in an IT block in
Thumb.
Reviewers: SjoerdMeijer, t.p.northover, efriedma
Reviewed By: efriedma
Subscribers: jdoerfert, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57823
llvm-svn: 354768
2019-02-25 18:39:53 +08:00
|
|
|
let isUnpredicable = 1 in
|
[ARM] Armv8.2-A FP16 code generation (part 1/3)
This is the groundwork for Armv8.2-A FP16 code generation .
Clang passes and returns _Float16 values as floats, together with the required
bitconverts and truncs etc. to implement correct AAPCS behaviour, see D42318.
We will implement half-precision argument passing/returning lowering in the ARM
backend soon, but for now this means that this:
_Float16 sub(_Float16 a, _Float16 b) {
return a + b;
}
gets lowered to this:
define float @sub(float %a.coerce, float %b.coerce) {
entry:
%0 = bitcast float %a.coerce to i32
%tmp.0.extract.trunc = trunc i32 %0 to i16
%1 = bitcast i16 %tmp.0.extract.trunc to half
<SNIP>
%add = fadd half %1, %3
<SNIP>
}
When FullFP16 is *not* supported, we don't make f16 a legal type, and we get
legalization for "free", i.e. nothing changes and everything works as before.
And also f16 argument passing/returning is handled.
When FullFP16 is supported, we do make f16 a legal type, and have 2 places that
we need to patch up: f16 argument passing and returning, which involves minor
tweaks to avoid unnecessary code generation for some bitcasts.
As a "demonstrator" that this works for the different FP16, FullFP16, softfp
modes, etc., I've added match rules to the VSUB instruction description showing
that we can codegen this instruction from IR, but more importantly, also to
some conversion instructions. These conversions were causing issue before in
the FP16 and FullFP16 cases.
I've also added match rules to the VLDRH and VSTRH desriptions, so that we can
actually compile the entire half-precision sub code example above. This showed
that these loads and stores had the wrong addressing mode specified: AddrMode5
instead of AddrMode5FP16, which turned out not be implemented at all, so that
has also been added.
This is the minimal patch that shows all the different moving parts. In patch
2/3 I will add some efficient lowering of bitcasts, and in 2/3 I will add the
remaining Armv8.2-A FP16 instruction descriptions.
Thanks to Sam Parker and Oliver Stannard for their help and reviews!
Differential Revision: https://reviews.llvm.org/D38315
llvm-svn: 323512
2018-01-26 17:26:40 +08:00
|
|
|
def VSTRH : AHI5<0b1101, 0b00, (outs), (ins HPR:$Sd, addrmode5fp16:$addr),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpStore16, "vstr", ".16\t$Sd, $addr",
|
[ARM] Armv8.2-A FP16 code generation (part 1/3)
This is the groundwork for Armv8.2-A FP16 code generation .
Clang passes and returns _Float16 values as floats, together with the required
bitconverts and truncs etc. to implement correct AAPCS behaviour, see D42318.
We will implement half-precision argument passing/returning lowering in the ARM
backend soon, but for now this means that this:
_Float16 sub(_Float16 a, _Float16 b) {
return a + b;
}
gets lowered to this:
define float @sub(float %a.coerce, float %b.coerce) {
entry:
%0 = bitcast float %a.coerce to i32
%tmp.0.extract.trunc = trunc i32 %0 to i16
%1 = bitcast i16 %tmp.0.extract.trunc to half
<SNIP>
%add = fadd half %1, %3
<SNIP>
}
When FullFP16 is *not* supported, we don't make f16 a legal type, and we get
legalization for "free", i.e. nothing changes and everything works as before.
And also f16 argument passing/returning is handled.
When FullFP16 is supported, we do make f16 a legal type, and have 2 places that
we need to patch up: f16 argument passing and returning, which involves minor
tweaks to avoid unnecessary code generation for some bitcasts.
As a "demonstrator" that this works for the different FP16, FullFP16, softfp
modes, etc., I've added match rules to the VSUB instruction description showing
that we can codegen this instruction from IR, but more importantly, also to
some conversion instructions. These conversions were causing issue before in
the FP16 and FullFP16 cases.
I've also added match rules to the VLDRH and VSTRH desriptions, so that we can
actually compile the entire half-precision sub code example above. This showed
that these loads and stores had the wrong addressing mode specified: AddrMode5
instead of AddrMode5FP16, which turned out not be implemented at all, so that
has also been added.
This is the minimal patch that shows all the different moving parts. In patch
2/3 I will add some efficient lowering of bitcasts, and in 2/3 I will add the
remaining Armv8.2-A FP16 instruction descriptions.
Thanks to Sam Parker and Oliver Stannard for their help and reviews!
Differential Revision: https://reviews.llvm.org/D38315
llvm-svn: 323512
2018-01-26 17:26:40 +08:00
|
|
|
[(alignedstore16 HPR:$Sd, addrmode5fp16:$addr)]>,
|
2019-05-30 20:37:05 +08:00
|
|
|
Requires<[HasFPRegs16]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2007-01-19 15:51:42 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Load / store multiple Instructions.
|
|
|
|
//
|
|
|
|
|
2010-11-16 09:16:36 +08:00
|
|
|
multiclass vfp_ldst_mult<string asm, bit L_bit,
|
|
|
|
InstrItinClass itin, InstrItinClass itin_upd> {
|
2019-05-30 20:37:05 +08:00
|
|
|
let Predicates = [HasFPRegs] in {
|
2010-11-16 09:16:36 +08:00
|
|
|
// Double Precision
|
|
|
|
def DIA :
|
2010-11-17 12:32:08 +08:00
|
|
|
AXDI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
|
2010-11-13 17:09:38 +08:00
|
|
|
IndexModeNone, itin,
|
2010-11-16 09:16:36 +08:00
|
|
|
!strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> {
|
2010-11-13 17:09:38 +08:00
|
|
|
let Inst{24-23} = 0b01; // Increment After
|
|
|
|
let Inst{21} = 0; // No writeback
|
|
|
|
let Inst{20} = L_bit;
|
|
|
|
}
|
2010-11-16 09:16:36 +08:00
|
|
|
def DIA_UPD :
|
2011-06-14 06:54:22 +08:00
|
|
|
AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs,
|
|
|
|
variable_ops),
|
2010-11-13 17:09:38 +08:00
|
|
|
IndexModeUpd, itin_upd,
|
2010-11-16 09:16:36 +08:00
|
|
|
!strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
|
2010-11-13 17:09:38 +08:00
|
|
|
let Inst{24-23} = 0b01; // Increment After
|
|
|
|
let Inst{21} = 1; // Writeback
|
|
|
|
let Inst{20} = L_bit;
|
|
|
|
}
|
2010-11-16 09:16:36 +08:00
|
|
|
def DDB_UPD :
|
2011-06-14 06:54:22 +08:00
|
|
|
AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs,
|
|
|
|
variable_ops),
|
2010-11-13 17:09:38 +08:00
|
|
|
IndexModeUpd, itin_upd,
|
|
|
|
!strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
|
|
|
|
let Inst{24-23} = 0b10; // Decrement Before
|
|
|
|
let Inst{21} = 1; // Writeback
|
|
|
|
let Inst{20} = L_bit;
|
|
|
|
}
|
|
|
|
|
2010-11-16 09:16:36 +08:00
|
|
|
// Single Precision
|
|
|
|
def SIA :
|
2010-11-17 12:32:08 +08:00
|
|
|
AXSI4<(outs), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
|
2010-11-13 17:09:38 +08:00
|
|
|
IndexModeNone, itin,
|
2010-11-16 09:16:36 +08:00
|
|
|
!strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> {
|
2010-11-13 17:09:38 +08:00
|
|
|
let Inst{24-23} = 0b01; // Increment After
|
|
|
|
let Inst{21} = 0; // No writeback
|
|
|
|
let Inst{20} = L_bit;
|
2011-02-16 08:35:02 +08:00
|
|
|
|
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines.
|
|
|
|
let D = VFPNeonDomain;
|
2010-11-13 17:09:38 +08:00
|
|
|
}
|
2010-11-16 09:16:36 +08:00
|
|
|
def SIA_UPD :
|
2011-06-14 06:54:22 +08:00
|
|
|
AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs,
|
|
|
|
variable_ops),
|
2010-11-13 17:09:38 +08:00
|
|
|
IndexModeUpd, itin_upd,
|
2010-11-16 09:16:36 +08:00
|
|
|
!strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
|
2010-11-13 17:09:38 +08:00
|
|
|
let Inst{24-23} = 0b01; // Increment After
|
|
|
|
let Inst{21} = 1; // Writeback
|
|
|
|
let Inst{20} = L_bit;
|
2011-02-16 08:35:02 +08:00
|
|
|
|
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines.
|
|
|
|
let D = VFPNeonDomain;
|
2010-11-13 17:09:38 +08:00
|
|
|
}
|
2010-11-16 09:16:36 +08:00
|
|
|
def SDB_UPD :
|
2011-06-14 06:54:22 +08:00
|
|
|
AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs,
|
|
|
|
variable_ops),
|
2010-11-13 17:09:38 +08:00
|
|
|
IndexModeUpd, itin_upd,
|
|
|
|
!strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
|
|
|
|
let Inst{24-23} = 0b10; // Decrement Before
|
|
|
|
let Inst{21} = 1; // Writeback
|
|
|
|
let Inst{20} = L_bit;
|
2011-02-16 08:35:02 +08:00
|
|
|
|
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines.
|
|
|
|
let D = VFPNeonDomain;
|
2010-11-13 17:09:38 +08:00
|
|
|
}
|
2019-05-30 20:37:05 +08:00
|
|
|
}
|
2010-11-13 17:09:38 +08:00
|
|
|
}
|
|
|
|
|
2014-11-26 08:46:26 +08:00
|
|
|
let hasSideEffects = 0 in {
|
2010-11-13 18:57:02 +08:00
|
|
|
|
2010-11-16 09:16:36 +08:00
|
|
|
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
|
|
|
|
defm VLDM : vfp_ldst_mult<"vldm", 1, IIC_fpLoad_m, IIC_fpLoad_mu>;
|
2010-11-13 18:57:02 +08:00
|
|
|
|
2010-11-16 09:16:36 +08:00
|
|
|
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
|
2013-12-30 01:58:27 +08:00
|
|
|
defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpStore_m, IIC_fpStore_mu>;
|
2010-11-13 18:57:02 +08:00
|
|
|
|
2014-11-26 08:46:26 +08:00
|
|
|
} // hasSideEffects
|
2010-11-13 18:57:02 +08:00
|
|
|
|
2010-11-16 10:00:24 +08:00
|
|
|
def : MnemonicAlias<"vldm", "vldmia">;
|
|
|
|
def : MnemonicAlias<"vstm", "vstmia">;
|
|
|
|
|
2016-01-25 19:24:47 +08:00
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Lazy load / store multiple Instructions
|
|
|
|
//
|
|
|
|
let mayLoad = 1 in
|
|
|
|
def VLLDM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
|
2019-12-14 02:14:38 +08:00
|
|
|
NoItinerary, "vlldm${p}\t$Rn", "", []>,
|
2016-01-25 19:24:47 +08:00
|
|
|
Requires<[HasV8MMainline, Has8MSecExt]> {
|
|
|
|
let Inst{24-23} = 0b00;
|
|
|
|
let Inst{22} = 0;
|
|
|
|
let Inst{21} = 1;
|
|
|
|
let Inst{20} = 1;
|
|
|
|
let Inst{15-12} = 0;
|
|
|
|
let Inst{7-0} = 0;
|
|
|
|
let mayLoad = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
let mayStore = 1 in
|
|
|
|
def VLSTM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
|
2019-12-14 02:14:38 +08:00
|
|
|
NoItinerary, "vlstm${p}\t$Rn", "", []>,
|
2016-01-25 19:24:47 +08:00
|
|
|
Requires<[HasV8MMainline, Has8MSecExt]> {
|
|
|
|
let Inst{24-23} = 0b00;
|
|
|
|
let Inst{22} = 0;
|
|
|
|
let Inst{21} = 1;
|
|
|
|
let Inst{20} = 0;
|
|
|
|
let Inst{15-12} = 0;
|
|
|
|
let Inst{7-0} = 0;
|
|
|
|
let mayStore = 1;
|
|
|
|
}
|
|
|
|
|
2016-06-03 21:19:43 +08:00
|
|
|
def : InstAlias<"vpush${p} $r", (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r), 0>,
|
2019-07-10 16:59:17 +08:00
|
|
|
Requires<[HasFPRegs]>;
|
2016-06-03 21:19:43 +08:00
|
|
|
def : InstAlias<"vpush${p} $r", (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r), 0>,
|
2019-07-10 16:59:17 +08:00
|
|
|
Requires<[HasFPRegs]>;
|
2016-06-03 21:19:43 +08:00
|
|
|
def : InstAlias<"vpop${p} $r", (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r), 0>,
|
2019-07-10 16:59:17 +08:00
|
|
|
Requires<[HasFPRegs]>;
|
2016-06-03 21:19:43 +08:00
|
|
|
def : InstAlias<"vpop${p} $r", (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r), 0>,
|
2019-07-10 16:59:17 +08:00
|
|
|
Requires<[HasFPRegs]>;
|
2012-03-06 07:16:31 +08:00
|
|
|
defm : VFPDTAnyInstAlias<"vpush${p}", "$r",
|
|
|
|
(VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>;
|
|
|
|
defm : VFPDTAnyInstAlias<"vpush${p}", "$r",
|
|
|
|
(VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r)>;
|
|
|
|
defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
|
|
|
|
(VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>;
|
|
|
|
defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
|
|
|
|
(VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>;
|
2011-06-28 04:00:07 +08:00
|
|
|
|
2013-05-31 23:55:51 +08:00
|
|
|
// FLDMX, FSTMX - Load and store multiple unknown precision registers for
|
|
|
|
// pre-armv6 cores.
|
|
|
|
// These instruction are deprecated so we don't want them to get selected.
|
2017-11-22 00:20:25 +08:00
|
|
|
// However, there is no UAL syntax for them, so we keep them around for
|
|
|
|
// (dis)assembly only.
|
2013-05-31 23:55:51 +08:00
|
|
|
multiclass vfp_ldstx_mult<string asm, bit L_bit> {
|
2019-09-29 16:38:48 +08:00
|
|
|
let Predicates = [HasFPRegs], hasNoSchedulingInfo = 1 in {
|
2013-05-31 23:55:51 +08:00
|
|
|
// Unknown precision
|
|
|
|
def XIA :
|
|
|
|
AXXI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
|
|
|
|
IndexModeNone, !strconcat(asm, "iax${p}\t$Rn, $regs"), "", []> {
|
|
|
|
let Inst{24-23} = 0b01; // Increment After
|
|
|
|
let Inst{21} = 0; // No writeback
|
|
|
|
let Inst{20} = L_bit;
|
|
|
|
}
|
|
|
|
def XIA_UPD :
|
|
|
|
AXXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
|
|
|
|
IndexModeUpd, !strconcat(asm, "iax${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
|
|
|
|
let Inst{24-23} = 0b01; // Increment After
|
|
|
|
let Inst{21} = 1; // Writeback
|
|
|
|
let Inst{20} = L_bit;
|
|
|
|
}
|
|
|
|
def XDB_UPD :
|
|
|
|
AXXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
|
|
|
|
IndexModeUpd, !strconcat(asm, "dbx${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
|
|
|
|
let Inst{24-23} = 0b10; // Decrement Before
|
2013-12-30 01:58:35 +08:00
|
|
|
let Inst{21} = 1; // Writeback
|
2013-05-31 23:55:51 +08:00
|
|
|
let Inst{20} = L_bit;
|
|
|
|
}
|
2019-05-30 20:37:05 +08:00
|
|
|
}
|
2013-05-31 23:55:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
defm FLDM : vfp_ldstx_mult<"fldm", 1>;
|
|
|
|
defm FSTM : vfp_ldstx_mult<"fstm", 0>;
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2013-12-30 01:58:35 +08:00
|
|
|
def : VFP2MnemonicAlias<"fldmeax", "fldmdbx">;
|
|
|
|
def : VFP2MnemonicAlias<"fldmfdx", "fldmiax">;
|
|
|
|
|
|
|
|
def : VFP2MnemonicAlias<"fstmeax", "fstmiax">;
|
|
|
|
def : VFP2MnemonicAlias<"fstmfdx", "fstmdbx">;
|
|
|
|
|
2010-10-13 07:06:54 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// FP Binary Operations.
|
|
|
|
//
|
2010-02-09 03:41:48 +08:00
|
|
|
|
2012-04-20 08:15:00 +08:00
|
|
|
let TwoOperandAliasConstraint = "$Dn = $Dd" in
|
2010-11-01 14:00:39 +08:00
|
|
|
def VADDD : ADbI<0b11100, 0b11, 0, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
|
|
|
|
IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>,
|
|
|
|
Sched<[WriteFPALU64]>;
|
2010-11-01 14:00:39 +08:00
|
|
|
|
2012-04-20 08:15:00 +08:00
|
|
|
let TwoOperandAliasConstraint = "$Sn = $Sd" in
|
2010-11-01 14:00:39 +08:00
|
|
|
def VADDS : ASbIn<0b11100, 0b11, 0, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]>,
|
|
|
|
Sched<[WriteFPALU32]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-11-01 14:00:39 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
let TwoOperandAliasConstraint = "$Sn = $Sd" in
|
|
|
|
def VADDH : AHbI<0b11100, 0b11, 0, 0,
|
[ARM] Armv8.2-A FP16 code generation (part 1/3)
This is the groundwork for Armv8.2-A FP16 code generation .
Clang passes and returns _Float16 values as floats, together with the required
bitconverts and truncs etc. to implement correct AAPCS behaviour, see D42318.
We will implement half-precision argument passing/returning lowering in the ARM
backend soon, but for now this means that this:
_Float16 sub(_Float16 a, _Float16 b) {
return a + b;
}
gets lowered to this:
define float @sub(float %a.coerce, float %b.coerce) {
entry:
%0 = bitcast float %a.coerce to i32
%tmp.0.extract.trunc = trunc i32 %0 to i16
%1 = bitcast i16 %tmp.0.extract.trunc to half
<SNIP>
%add = fadd half %1, %3
<SNIP>
}
When FullFP16 is *not* supported, we don't make f16 a legal type, and we get
legalization for "free", i.e. nothing changes and everything works as before.
And also f16 argument passing/returning is handled.
When FullFP16 is supported, we do make f16 a legal type, and have 2 places that
we need to patch up: f16 argument passing and returning, which involves minor
tweaks to avoid unnecessary code generation for some bitcasts.
As a "demonstrator" that this works for the different FP16, FullFP16, softfp
modes, etc., I've added match rules to the VSUB instruction description showing
that we can codegen this instruction from IR, but more importantly, also to
some conversion instructions. These conversions were causing issue before in
the FP16 and FullFP16 cases.
I've also added match rules to the VLDRH and VSTRH desriptions, so that we can
actually compile the entire half-precision sub code example above. This showed
that these loads and stores had the wrong addressing mode specified: AddrMode5
instead of AddrMode5FP16, which turned out not be implemented at all, so that
has also been added.
This is the minimal patch that shows all the different moving parts. In patch
2/3 I will add some efficient lowering of bitcasts, and in 2/3 I will add the
remaining Armv8.2-A FP16 instruction descriptions.
Thanks to Sam Parker and Oliver Stannard for their help and reviews!
Differential Revision: https://reviews.llvm.org/D38315
llvm-svn: 323512
2018-01-26 17:26:40 +08:00
|
|
|
(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpALU16, "vadd", ".f16\t$Sd, $Sn, $Sm",
|
[ARM] Armv8.2-A FP16 code generation (part 1/3)
This is the groundwork for Armv8.2-A FP16 code generation .
Clang passes and returns _Float16 values as floats, together with the required
bitconverts and truncs etc. to implement correct AAPCS behaviour, see D42318.
We will implement half-precision argument passing/returning lowering in the ARM
backend soon, but for now this means that this:
_Float16 sub(_Float16 a, _Float16 b) {
return a + b;
}
gets lowered to this:
define float @sub(float %a.coerce, float %b.coerce) {
entry:
%0 = bitcast float %a.coerce to i32
%tmp.0.extract.trunc = trunc i32 %0 to i16
%1 = bitcast i16 %tmp.0.extract.trunc to half
<SNIP>
%add = fadd half %1, %3
<SNIP>
}
When FullFP16 is *not* supported, we don't make f16 a legal type, and we get
legalization for "free", i.e. nothing changes and everything works as before.
And also f16 argument passing/returning is handled.
When FullFP16 is supported, we do make f16 a legal type, and have 2 places that
we need to patch up: f16 argument passing and returning, which involves minor
tweaks to avoid unnecessary code generation for some bitcasts.
As a "demonstrator" that this works for the different FP16, FullFP16, softfp
modes, etc., I've added match rules to the VSUB instruction description showing
that we can codegen this instruction from IR, but more importantly, also to
some conversion instructions. These conversions were causing issue before in
the FP16 and FullFP16 cases.
I've also added match rules to the VLDRH and VSTRH desriptions, so that we can
actually compile the entire half-precision sub code example above. This showed
that these loads and stores had the wrong addressing mode specified: AddrMode5
instead of AddrMode5FP16, which turned out not be implemented at all, so that
has also been added.
This is the minimal patch that shows all the different moving parts. In patch
2/3 I will add some efficient lowering of bitcasts, and in 2/3 I will add the
remaining Armv8.2-A FP16 instruction descriptions.
Thanks to Sam Parker and Oliver Stannard for their help and reviews!
Differential Revision: https://reviews.llvm.org/D38315
llvm-svn: 323512
2018-01-26 17:26:40 +08:00
|
|
|
[(set HPR:$Sd, (fadd HPR:$Sn, HPR:$Sm))]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Sched<[WriteFPALU32]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2012-04-20 08:15:00 +08:00
|
|
|
let TwoOperandAliasConstraint = "$Dn = $Dd" in
|
2010-11-01 14:00:39 +08:00
|
|
|
def VSUBD : ADbI<0b11100, 0b11, 1, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
|
|
|
|
IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>,
|
|
|
|
Sched<[WriteFPALU64]>;
|
2010-11-01 14:00:39 +08:00
|
|
|
|
2012-04-20 08:15:00 +08:00
|
|
|
let TwoOperandAliasConstraint = "$Sn = $Sd" in
|
2010-11-01 14:00:39 +08:00
|
|
|
def VSUBS : ASbIn<0b11100, 0b11, 1, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]>,
|
|
|
|
Sched<[WriteFPALU32]>{
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-11-01 14:00:39 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
let TwoOperandAliasConstraint = "$Sn = $Sd" in
|
|
|
|
def VSUBH : AHbI<0b11100, 0b11, 1, 0,
|
[ARM] Armv8.2-A FP16 code generation (part 1/3)
This is the groundwork for Armv8.2-A FP16 code generation .
Clang passes and returns _Float16 values as floats, together with the required
bitconverts and truncs etc. to implement correct AAPCS behaviour, see D42318.
We will implement half-precision argument passing/returning lowering in the ARM
backend soon, but for now this means that this:
_Float16 sub(_Float16 a, _Float16 b) {
return a + b;
}
gets lowered to this:
define float @sub(float %a.coerce, float %b.coerce) {
entry:
%0 = bitcast float %a.coerce to i32
%tmp.0.extract.trunc = trunc i32 %0 to i16
%1 = bitcast i16 %tmp.0.extract.trunc to half
<SNIP>
%add = fadd half %1, %3
<SNIP>
}
When FullFP16 is *not* supported, we don't make f16 a legal type, and we get
legalization for "free", i.e. nothing changes and everything works as before.
And also f16 argument passing/returning is handled.
When FullFP16 is supported, we do make f16 a legal type, and have 2 places that
we need to patch up: f16 argument passing and returning, which involves minor
tweaks to avoid unnecessary code generation for some bitcasts.
As a "demonstrator" that this works for the different FP16, FullFP16, softfp
modes, etc., I've added match rules to the VSUB instruction description showing
that we can codegen this instruction from IR, but more importantly, also to
some conversion instructions. These conversions were causing issue before in
the FP16 and FullFP16 cases.
I've also added match rules to the VLDRH and VSTRH desriptions, so that we can
actually compile the entire half-precision sub code example above. This showed
that these loads and stores had the wrong addressing mode specified: AddrMode5
instead of AddrMode5FP16, which turned out not be implemented at all, so that
has also been added.
This is the minimal patch that shows all the different moving parts. In patch
2/3 I will add some efficient lowering of bitcasts, and in 2/3 I will add the
remaining Armv8.2-A FP16 instruction descriptions.
Thanks to Sam Parker and Oliver Stannard for their help and reviews!
Differential Revision: https://reviews.llvm.org/D38315
llvm-svn: 323512
2018-01-26 17:26:40 +08:00
|
|
|
(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpALU16, "vsub", ".f16\t$Sd, $Sn, $Sm",
|
[ARM] Armv8.2-A FP16 code generation (part 1/3)
This is the groundwork for Armv8.2-A FP16 code generation .
Clang passes and returns _Float16 values as floats, together with the required
bitconverts and truncs etc. to implement correct AAPCS behaviour, see D42318.
We will implement half-precision argument passing/returning lowering in the ARM
backend soon, but for now this means that this:
_Float16 sub(_Float16 a, _Float16 b) {
return a + b;
}
gets lowered to this:
define float @sub(float %a.coerce, float %b.coerce) {
entry:
%0 = bitcast float %a.coerce to i32
%tmp.0.extract.trunc = trunc i32 %0 to i16
%1 = bitcast i16 %tmp.0.extract.trunc to half
<SNIP>
%add = fadd half %1, %3
<SNIP>
}
When FullFP16 is *not* supported, we don't make f16 a legal type, and we get
legalization for "free", i.e. nothing changes and everything works as before.
And also f16 argument passing/returning is handled.
When FullFP16 is supported, we do make f16 a legal type, and have 2 places that
we need to patch up: f16 argument passing and returning, which involves minor
tweaks to avoid unnecessary code generation for some bitcasts.
As a "demonstrator" that this works for the different FP16, FullFP16, softfp
modes, etc., I've added match rules to the VSUB instruction description showing
that we can codegen this instruction from IR, but more importantly, also to
some conversion instructions. These conversions were causing issue before in
the FP16 and FullFP16 cases.
I've also added match rules to the VLDRH and VSTRH desriptions, so that we can
actually compile the entire half-precision sub code example above. This showed
that these loads and stores had the wrong addressing mode specified: AddrMode5
instead of AddrMode5FP16, which turned out not be implemented at all, so that
has also been added.
This is the minimal patch that shows all the different moving parts. In patch
2/3 I will add some efficient lowering of bitcasts, and in 2/3 I will add the
remaining Armv8.2-A FP16 instruction descriptions.
Thanks to Sam Parker and Oliver Stannard for their help and reviews!
Differential Revision: https://reviews.llvm.org/D38315
llvm-svn: 323512
2018-01-26 17:26:40 +08:00
|
|
|
[(set HPR:$Sd, (fsub HPR:$Sn, HPR:$Sm))]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Sched<[WriteFPALU32]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2012-04-20 08:15:00 +08:00
|
|
|
let TwoOperandAliasConstraint = "$Dn = $Dd" in
|
2010-11-01 14:00:39 +08:00
|
|
|
def VDIVD : ADbI<0b11101, 0b00, 0, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
|
|
|
|
IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>,
|
|
|
|
Sched<[WriteFPDIV64]>;
|
2010-11-01 14:00:39 +08:00
|
|
|
|
2012-04-20 08:15:00 +08:00
|
|
|
let TwoOperandAliasConstraint = "$Sn = $Sd" in
|
2010-11-01 14:00:39 +08:00
|
|
|
def VDIVS : ASbI<0b11101, 0b00, 0, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>,
|
|
|
|
Sched<[WriteFPDIV32]>;
|
2010-11-01 14:00:39 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
let TwoOperandAliasConstraint = "$Sn = $Sd" in
|
|
|
|
def VDIVH : AHbI<0b11101, 0b00, 0, 0,
|
2018-02-06 16:43:56 +08:00
|
|
|
(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpDIV16, "vdiv", ".f16\t$Sd, $Sn, $Sm",
|
2018-02-06 16:43:56 +08:00
|
|
|
[(set HPR:$Sd, (fdiv HPR:$Sn, HPR:$Sm))]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Sched<[WriteFPDIV32]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2012-04-20 08:15:00 +08:00
|
|
|
let TwoOperandAliasConstraint = "$Dn = $Dd" in
|
2010-11-01 14:00:39 +08:00
|
|
|
def VMULD : ADbI<0b11100, 0b10, 0, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
|
|
|
|
IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>,
|
|
|
|
Sched<[WriteFPMUL64, ReadFPMUL, ReadFPMUL]>;
|
2010-11-01 14:00:39 +08:00
|
|
|
|
2012-04-20 08:15:00 +08:00
|
|
|
let TwoOperandAliasConstraint = "$Sn = $Sd" in
|
2010-11-01 14:00:39 +08:00
|
|
|
def VMULS : ASbIn<0b11100, 0b10, 0, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]>,
|
|
|
|
Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-11-01 14:00:39 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
let TwoOperandAliasConstraint = "$Sn = $Sd" in
|
|
|
|
def VMULH : AHbI<0b11100, 0b10, 0, 0,
|
2018-02-06 16:43:56 +08:00
|
|
|
(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpMUL16, "vmul", ".f16\t$Sd, $Sn, $Sm",
|
2018-02-06 16:43:56 +08:00
|
|
|
[(set HPR:$Sd, (fmul HPR:$Sn, HPR:$Sm))]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VNMULD : ADbI<0b11100, 0b10, 1, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
|
|
|
|
IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>,
|
|
|
|
Sched<[WriteFPMUL64, ReadFPMUL, ReadFPMUL]>;
|
2010-11-01 14:00:39 +08:00
|
|
|
|
|
|
|
def VNMULS : ASbI<0b11100, 0b10, 1, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]>,
|
|
|
|
Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VNMULH : AHbI<0b11100, 0b10, 1, 0,
|
2018-02-06 16:43:56 +08:00
|
|
|
(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpMUL16, "vnmul", ".f16\t$Sd, $Sn, $Sm",
|
2018-02-06 16:43:56 +08:00
|
|
|
[(set HPR:$Sd, (fneg (fmul HPR:$Sn, HPR:$Sm)))]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2013-08-22 23:29:11 +08:00
|
|
|
multiclass vsel_inst<string op, bits<2> opc, int CC> {
|
|
|
|
let DecoderNamespace = "VFPV8", PostEncoderMethod = "",
|
[ARM] Make fullfp16 instructions not conditionalisable.
More or less all the instructions defined in the v8.2a full-fp16
extension are defined as UNPREDICTABLE if you put them in an IT block
(Thumb) or use with any condition other than AL (ARM). LLVM didn't
know that, and was happy to conditionalise them.
In order to force these instructions to count as not predicable, I had
to make a small Tablegen change. The code generation back end mostly
decides if an instruction was predicable by looking for something it
can identify as a predicate operand; there's an isPredicable bit flag
that overrides that check in the positive direction, but nothing that
overrides it in the negative direction.
(I considered the alternative approach of actually removing the
predicate operand from those instructions, but thought that it would
be more painful overall for instructions differing only in data type
to have different shapes of operand list. This way, the only code that
has to notice the difference is the if-converter.)
So I've added an isUnpredicable bit alongside isPredicable, and set
that bit on the right subset of FP16 instructions, and also on the
VSEL, VMAXNM/VMINNM and VRINT[ANPM] families which should be
unpredicable for all data types.
I've included a couple of representative regression tests, both of
which previously caused an fp16 instruction to be conditionalised in
ARM state and (with -arm-no-restrict-it) to be put in an IT block in
Thumb.
Reviewers: SjoerdMeijer, t.p.northover, efriedma
Reviewed By: efriedma
Subscribers: jdoerfert, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57823
llvm-svn: 354768
2019-02-25 18:39:53 +08:00
|
|
|
Uses = [CPSR], AddedComplexity = 4, isUnpredicable = 1 in {
|
2016-01-25 18:26:26 +08:00
|
|
|
def H : AHbInp<0b11100, opc, 0,
|
2018-03-16 16:06:25 +08:00
|
|
|
(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
NoItinerary, !strconcat("vsel", op, ".f16\t$Sd, $Sn, $Sm"),
|
2018-03-16 16:06:25 +08:00
|
|
|
[(set HPR:$Sd, (ARMcmov HPR:$Sm, HPR:$Sn, CC))]>,
|
2016-01-25 18:26:26 +08:00
|
|
|
Requires<[HasFullFP16]>;
|
|
|
|
|
2013-07-07 04:50:18 +08:00
|
|
|
def S : ASbInp<0b11100, opc, 0,
|
2013-07-04 22:57:20 +08:00
|
|
|
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
|
|
|
NoItinerary, !strconcat("vsel", op, ".f32\t$Sd, $Sn, $Sm"),
|
2013-08-22 23:29:11 +08:00
|
|
|
[(set SPR:$Sd, (ARMcmov SPR:$Sm, SPR:$Sn, CC))]>,
|
2013-09-13 21:46:57 +08:00
|
|
|
Requires<[HasFPARMv8]>;
|
2013-07-04 22:57:20 +08:00
|
|
|
|
2013-07-07 04:50:18 +08:00
|
|
|
def D : ADbInp<0b11100, opc, 0,
|
2013-07-04 22:57:20 +08:00
|
|
|
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
|
|
|
|
NoItinerary, !strconcat("vsel", op, ".f64\t$Dd, $Dn, $Dm"),
|
2013-08-22 23:29:11 +08:00
|
|
|
[(set DPR:$Dd, (ARMcmov (f64 DPR:$Dm), (f64 DPR:$Dn), CC))]>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasFPARMv8, HasDPVFP]>;
|
2013-07-04 22:57:20 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-08-22 23:29:11 +08:00
|
|
|
// The CC constants here match ARMCC::CondCodes.
|
|
|
|
defm VSELGT : vsel_inst<"gt", 0b11, 12>;
|
|
|
|
defm VSELGE : vsel_inst<"ge", 0b10, 10>;
|
|
|
|
defm VSELEQ : vsel_inst<"eq", 0b00, 0>;
|
|
|
|
defm VSELVS : vsel_inst<"vs", 0b01, 6>;
|
2013-07-04 22:57:20 +08:00
|
|
|
|
2013-08-23 20:01:13 +08:00
|
|
|
multiclass vmaxmin_inst<string op, bit opc, SDNode SD> {
|
[ARM] Make fullfp16 instructions not conditionalisable.
More or less all the instructions defined in the v8.2a full-fp16
extension are defined as UNPREDICTABLE if you put them in an IT block
(Thumb) or use with any condition other than AL (ARM). LLVM didn't
know that, and was happy to conditionalise them.
In order to force these instructions to count as not predicable, I had
to make a small Tablegen change. The code generation back end mostly
decides if an instruction was predicable by looking for something it
can identify as a predicate operand; there's an isPredicable bit flag
that overrides that check in the positive direction, but nothing that
overrides it in the negative direction.
(I considered the alternative approach of actually removing the
predicate operand from those instructions, but thought that it would
be more painful overall for instructions differing only in data type
to have different shapes of operand list. This way, the only code that
has to notice the difference is the if-converter.)
So I've added an isUnpredicable bit alongside isPredicable, and set
that bit on the right subset of FP16 instructions, and also on the
VSEL, VMAXNM/VMINNM and VRINT[ANPM] families which should be
unpredicable for all data types.
I've included a couple of representative regression tests, both of
which previously caused an fp16 instruction to be conditionalised in
ARM state and (with -arm-no-restrict-it) to be put in an IT block in
Thumb.
Reviewers: SjoerdMeijer, t.p.northover, efriedma
Reviewed By: efriedma
Subscribers: jdoerfert, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57823
llvm-svn: 354768
2019-02-25 18:39:53 +08:00
|
|
|
let DecoderNamespace = "VFPV8", PostEncoderMethod = "",
|
|
|
|
isUnpredicable = 1 in {
|
2016-01-25 18:26:26 +08:00
|
|
|
def H : AHbInp<0b11101, 0b00, opc,
|
2018-04-13 23:34:26 +08:00
|
|
|
(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
NoItinerary, !strconcat(op, ".f16\t$Sd, $Sn, $Sm"),
|
2018-04-13 23:34:26 +08:00
|
|
|
[(set HPR:$Sd, (SD HPR:$Sn, HPR:$Sm))]>,
|
2016-01-25 18:26:26 +08:00
|
|
|
Requires<[HasFullFP16]>;
|
|
|
|
|
2013-07-07 04:50:18 +08:00
|
|
|
def S : ASbInp<0b11101, 0b00, opc,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
|
|
|
NoItinerary, !strconcat(op, ".f32\t$Sd, $Sn, $Sm"),
|
2013-08-23 20:01:13 +08:00
|
|
|
[(set SPR:$Sd, (SD SPR:$Sn, SPR:$Sm))]>,
|
2013-09-13 21:46:57 +08:00
|
|
|
Requires<[HasFPARMv8]>;
|
2013-07-07 04:50:18 +08:00
|
|
|
|
|
|
|
def D : ADbInp<0b11101, 0b00, opc,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
|
|
|
|
NoItinerary, !strconcat(op, ".f64\t$Dd, $Dn, $Dm"),
|
2013-08-23 20:01:13 +08:00
|
|
|
[(set DPR:$Dd, (f64 (SD (f64 DPR:$Dn), (f64 DPR:$Dm))))]>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasFPARMv8, HasDPVFP]>;
|
2013-07-07 04:50:18 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-30 22:34:29 +08:00
|
|
|
defm VFP_VMAXNM : vmaxmin_inst<"vmaxnm", 0, fmaxnum>;
|
|
|
|
defm VFP_VMINNM : vmaxmin_inst<"vminnm", 1, fminnum>;
|
2013-07-07 04:50:18 +08:00
|
|
|
|
2007-05-03 08:32:00 +08:00
|
|
|
// Match reassociated forms only if not sign dependent rounding.
|
2010-03-09 02:51:21 +08:00
|
|
|
def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)),
|
2013-10-24 23:49:39 +08:00
|
|
|
(VNMULD DPR:$a, DPR:$b)>,
|
|
|
|
Requires<[NoHonorSignDependentRounding,HasDPVFP]>;
|
2007-05-03 08:32:00 +08:00
|
|
|
def : Pat<(fmul (fneg SPR:$a), SPR:$b),
|
2009-11-09 08:11:35 +08:00
|
|
|
(VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
|
2007-05-03 08:32:00 +08:00
|
|
|
|
2010-10-13 06:55:35 +08:00
|
|
|
// These are encoded as unary instructions.
|
2012-03-06 08:19:55 +08:00
|
|
|
let Defs = [FPSCR_NZCV] in {
|
2010-11-01 14:00:39 +08:00
|
|
|
def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
|
|
|
|
(outs), (ins DPR:$Dd, DPR:$Dm),
|
|
|
|
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm",
|
2020-01-22 22:04:12 +08:00
|
|
|
[(arm_cmpfpe DPR:$Dd, (f64 DPR:$Dm))]>;
|
2010-10-13 08:04:29 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
|
|
|
|
(outs), (ins SPR:$Sd, SPR:$Sm),
|
|
|
|
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm",
|
2020-01-22 22:04:12 +08:00
|
|
|
[(arm_cmpfpe SPR:$Sd, SPR:$Sm)]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-10-13 06:55:35 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VCMPEH : AHuI<0b11101, 0b11, 0b0100, 0b11, 0,
|
2018-02-06 16:43:56 +08:00
|
|
|
(outs), (ins HPR:$Sd, HPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpCMP16, "vcmpe", ".f16\t$Sd, $Sm",
|
2020-01-22 22:04:12 +08:00
|
|
|
[(arm_cmpfpe HPR:$Sd, HPR:$Sm)]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0,
|
|
|
|
(outs), (ins DPR:$Dd, DPR:$Dm),
|
|
|
|
IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm",
|
2019-10-08 16:25:42 +08:00
|
|
|
[(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>;
|
2010-10-13 06:55:35 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
|
|
|
|
(outs), (ins SPR:$Sd, SPR:$Sm),
|
|
|
|
IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm",
|
2019-10-08 16:25:42 +08:00
|
|
|
[(arm_cmpfp SPR:$Sd, SPR:$Sm)]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2010-10-13 06:55:35 +08:00
|
|
|
}
|
2016-01-25 18:26:26 +08:00
|
|
|
|
|
|
|
def VCMPH : AHuI<0b11101, 0b11, 0b0100, 0b01, 0,
|
2018-02-06 16:43:56 +08:00
|
|
|
(outs), (ins HPR:$Sd, HPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpCMP16, "vcmp", ".f16\t$Sd, $Sm",
|
2019-10-08 16:25:42 +08:00
|
|
|
[(arm_cmpfp HPR:$Sd, HPR:$Sm)]>;
|
2012-03-06 08:19:55 +08:00
|
|
|
} // Defs = [FPSCR_NZCV]
|
2007-01-19 15:51:42 +08:00
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// FP Unary Operations.
|
|
|
|
//
|
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VABSD : ADuI<0b11101, 0b11, 0b0000, 0b11, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Dm),
|
|
|
|
IIC_fpUNA64, "vabs", ".f64\t$Dd, $Dm",
|
|
|
|
[(set DPR:$Dd, (fabs (f64 DPR:$Dm)))]>;
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpUNA32, "vabs", ".f32\t$Sd, $Sm",
|
2011-02-16 08:35:02 +08:00
|
|
|
[(set SPR:$Sd, (fabs SPR:$Sm))]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VABSH : AHuI<0b11101, 0b11, 0b0000, 0b11, 0,
|
2019-05-26 18:51:58 +08:00
|
|
|
(outs HPR:$Sd), (ins HPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpUNA16, "vabs", ".f16\t$Sd, $Sm",
|
2019-05-26 18:51:58 +08:00
|
|
|
[(set HPR:$Sd, (fabs (f16 HPR:$Sm)))]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2012-03-06 08:19:55 +08:00
|
|
|
let Defs = [FPSCR_NZCV] in {
|
2010-11-01 14:00:39 +08:00
|
|
|
def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
|
|
|
|
(outs), (ins DPR:$Dd),
|
|
|
|
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0",
|
2020-01-22 22:04:12 +08:00
|
|
|
[(arm_cmpfpe0 (f64 DPR:$Dd))]> {
|
2010-11-01 14:00:39 +08:00
|
|
|
let Inst{3-0} = 0b0000;
|
|
|
|
let Inst{5} = 0;
|
2010-10-13 08:38:07 +08:00
|
|
|
}
|
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
|
|
|
|
(outs), (ins SPR:$Sd),
|
|
|
|
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0",
|
2020-01-22 22:04:12 +08:00
|
|
|
[(arm_cmpfpe0 SPR:$Sd)]> {
|
2010-11-01 14:00:39 +08:00
|
|
|
let Inst{3-0} = 0b0000;
|
|
|
|
let Inst{5} = 0;
|
2011-02-16 08:35:02 +08:00
|
|
|
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2010-10-13 08:38:07 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0,
|
2018-02-15 18:33:07 +08:00
|
|
|
(outs), (ins HPR:$Sd),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpCMP16, "vcmpe", ".f16\t$Sd, #0",
|
2020-01-22 22:04:12 +08:00
|
|
|
[(arm_cmpfpe0 HPR:$Sd)]> {
|
2016-01-25 18:26:26 +08:00
|
|
|
let Inst{3-0} = 0b0000;
|
|
|
|
let Inst{5} = 0;
|
|
|
|
}
|
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
|
|
|
|
(outs), (ins DPR:$Dd),
|
|
|
|
IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0",
|
2019-10-08 16:25:42 +08:00
|
|
|
[(arm_cmpfp0 (f64 DPR:$Dd))]> {
|
2010-11-01 14:00:39 +08:00
|
|
|
let Inst{3-0} = 0b0000;
|
|
|
|
let Inst{5} = 0;
|
2010-10-14 04:58:46 +08:00
|
|
|
}
|
2010-02-09 03:41:48 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
|
|
|
|
(outs), (ins SPR:$Sd),
|
|
|
|
IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0",
|
2019-10-08 16:25:42 +08:00
|
|
|
[(arm_cmpfp0 SPR:$Sd)]> {
|
2010-11-01 14:00:39 +08:00
|
|
|
let Inst{3-0} = 0b0000;
|
|
|
|
let Inst{5} = 0;
|
2011-02-16 08:35:02 +08:00
|
|
|
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2010-10-14 04:58:46 +08:00
|
|
|
}
|
2016-01-25 18:26:26 +08:00
|
|
|
|
|
|
|
def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0,
|
2018-02-15 18:33:07 +08:00
|
|
|
(outs), (ins HPR:$Sd),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpCMP16, "vcmp", ".f16\t$Sd, #0",
|
2019-10-08 16:25:42 +08:00
|
|
|
[(arm_cmpfp0 HPR:$Sd)]> {
|
2016-01-25 18:26:26 +08:00
|
|
|
let Inst{3-0} = 0b0000;
|
|
|
|
let Inst{5} = 0;
|
|
|
|
}
|
2012-03-06 08:19:55 +08:00
|
|
|
} // Defs = [FPSCR_NZCV]
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2010-10-13 08:56:35 +08:00
|
|
|
def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
|
|
|
|
(outs DPR:$Dd), (ins SPR:$Sm),
|
|
|
|
IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set DPR:$Dd, (fpextend SPR:$Sm))]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2010-10-13 08:56:35 +08:00
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Dd;
|
|
|
|
bits<5> Sm;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{3-0} = Sm{4-1};
|
|
|
|
let Inst{5} = Sm{0};
|
|
|
|
let Inst{15-12} = Dd{3-0};
|
|
|
|
let Inst{22} = Dd{4};
|
2014-08-21 20:50:31 +08:00
|
|
|
|
|
|
|
let Predicates = [HasVFP2, HasDPVFP];
|
2010-10-13 08:56:35 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2008-11-11 10:11:05 +08:00
|
|
|
// Special case encoding: bits 11-8 is 0b1011.
|
2010-10-13 08:56:35 +08:00
|
|
|
def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
|
|
|
|
IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set SPR:$Sd, (fpround DPR:$Dm))]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2010-10-13 08:56:35 +08:00
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Sd;
|
|
|
|
bits<5> Dm;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{3-0} = Dm{3-0};
|
|
|
|
let Inst{5} = Dm{4};
|
|
|
|
let Inst{15-12} = Sd{4-1};
|
|
|
|
let Inst{22} = Sd{0};
|
|
|
|
|
2008-11-11 10:11:05 +08:00
|
|
|
let Inst{27-23} = 0b11101;
|
|
|
|
let Inst{21-16} = 0b110111;
|
|
|
|
let Inst{11-8} = 0b1011;
|
2010-01-30 07:21:10 +08:00
|
|
|
let Inst{7-6} = 0b11;
|
|
|
|
let Inst{4} = 0;
|
2013-10-24 23:49:39 +08:00
|
|
|
|
|
|
|
let Predicates = [HasVFP2, HasDPVFP];
|
2008-11-11 10:11:05 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
[ARM] Armv8.2-A FP16 code generation (part 1/3)
This is the groundwork for Armv8.2-A FP16 code generation .
Clang passes and returns _Float16 values as floats, together with the required
bitconverts and truncs etc. to implement correct AAPCS behaviour, see D42318.
We will implement half-precision argument passing/returning lowering in the ARM
backend soon, but for now this means that this:
_Float16 sub(_Float16 a, _Float16 b) {
return a + b;
}
gets lowered to this:
define float @sub(float %a.coerce, float %b.coerce) {
entry:
%0 = bitcast float %a.coerce to i32
%tmp.0.extract.trunc = trunc i32 %0 to i16
%1 = bitcast i16 %tmp.0.extract.trunc to half
<SNIP>
%add = fadd half %1, %3
<SNIP>
}
When FullFP16 is *not* supported, we don't make f16 a legal type, and we get
legalization for "free", i.e. nothing changes and everything works as before.
And also f16 argument passing/returning is handled.
When FullFP16 is supported, we do make f16 a legal type, and have 2 places that
we need to patch up: f16 argument passing and returning, which involves minor
tweaks to avoid unnecessary code generation for some bitcasts.
As a "demonstrator" that this works for the different FP16, FullFP16, softfp
modes, etc., I've added match rules to the VSUB instruction description showing
that we can codegen this instruction from IR, but more importantly, also to
some conversion instructions. These conversions were causing issue before in
the FP16 and FullFP16 cases.
I've also added match rules to the VLDRH and VSTRH desriptions, so that we can
actually compile the entire half-precision sub code example above. This showed
that these loads and stores had the wrong addressing mode specified: AddrMode5
instead of AddrMode5FP16, which turned out not be implemented at all, so that
has also been added.
This is the minimal patch that shows all the different moving parts. In patch
2/3 I will add some efficient lowering of bitcasts, and in 2/3 I will add the
remaining Armv8.2-A FP16 instruction descriptions.
Thanks to Sam Parker and Oliver Stannard for their help and reviews!
Differential Revision: https://reviews.llvm.org/D38315
llvm-svn: 323512
2018-01-26 17:26:40 +08:00
|
|
|
// Between half, single and double-precision.
|
2012-08-15 07:36:01 +08:00
|
|
|
def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
|
2011-08-23 05:34:00 +08:00
|
|
|
/* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
|
2018-02-07 00:28:43 +08:00
|
|
|
[/* Intentionally left blank, see patterns below */]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasFP16]>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2010-03-19 06:35:37 +08:00
|
|
|
|
[ARM] Explicit lowering of half <-> double conversions.
If an FP_EXTEND or FP_ROUND isel dag node converts directly between
f16 and f32 when the target CPU has no instruction to do it in one go,
it has to be done in two steps instead, going via f32.
Previously, this was done implicitly, because all such CPUs had the
storage-only implementation of f16 (i.e. the only thing you can do
with one at all is to convert it to/from f32). So isel would legalize
the f16 into an f32 as soon as it saw it, by inserting an fp16_to_fp
node (or vice versa), and then the fp_extend would already be f32->f64
rather than f16->f64.
But that technique can't support a target CPU which has full f16
support but _not_ f64, such as some variants of Arm v8.1-M. So now we
provide custom lowering for FP_EXTEND and FP_ROUND, which checks
support for f16 and f64 and decides on the best thing to do given the
combination of flags it gets back.
Reviewers: dmgreen, samparker, SjoerdMeijer
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D60692
llvm-svn: 364294
2019-06-25 19:24:50 +08:00
|
|
|
def : FP16Pat<(f32 (fpextend HPR:$Sm)),
|
|
|
|
(VCVTBHS (COPY_TO_REGCLASS HPR:$Sm, SPR))>;
|
2018-02-07 00:28:43 +08:00
|
|
|
def : FP16Pat<(f16_to_fp GPR:$a),
|
|
|
|
(VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
|
[ARM] Armv8.2-A FP16 code generation (part 1/3)
This is the groundwork for Armv8.2-A FP16 code generation .
Clang passes and returns _Float16 values as floats, together with the required
bitconverts and truncs etc. to implement correct AAPCS behaviour, see D42318.
We will implement half-precision argument passing/returning lowering in the ARM
backend soon, but for now this means that this:
_Float16 sub(_Float16 a, _Float16 b) {
return a + b;
}
gets lowered to this:
define float @sub(float %a.coerce, float %b.coerce) {
entry:
%0 = bitcast float %a.coerce to i32
%tmp.0.extract.trunc = trunc i32 %0 to i16
%1 = bitcast i16 %tmp.0.extract.trunc to half
<SNIP>
%add = fadd half %1, %3
<SNIP>
}
When FullFP16 is *not* supported, we don't make f16 a legal type, and we get
legalization for "free", i.e. nothing changes and everything works as before.
And also f16 argument passing/returning is handled.
When FullFP16 is supported, we do make f16 a legal type, and have 2 places that
we need to patch up: f16 argument passing and returning, which involves minor
tweaks to avoid unnecessary code generation for some bitcasts.
As a "demonstrator" that this works for the different FP16, FullFP16, softfp
modes, etc., I've added match rules to the VSUB instruction description showing
that we can codegen this instruction from IR, but more importantly, also to
some conversion instructions. These conversions were causing issue before in
the FP16 and FullFP16 cases.
I've also added match rules to the VLDRH and VSTRH desriptions, so that we can
actually compile the entire half-precision sub code example above. This showed
that these loads and stores had the wrong addressing mode specified: AddrMode5
instead of AddrMode5FP16, which turned out not be implemented at all, so that
has also been added.
This is the minimal patch that shows all the different moving parts. In patch
2/3 I will add some efficient lowering of bitcasts, and in 2/3 I will add the
remaining Armv8.2-A FP16 instruction descriptions.
Thanks to Sam Parker and Oliver Stannard for their help and reviews!
Differential Revision: https://reviews.llvm.org/D38315
llvm-svn: 323512
2018-01-26 17:26:40 +08:00
|
|
|
|
2012-08-15 07:36:01 +08:00
|
|
|
def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
|
2011-08-23 05:34:00 +08:00
|
|
|
/* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
|
2018-02-07 00:28:43 +08:00
|
|
|
[/* Intentionally left blank, see patterns below */]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasFP16]>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2010-03-19 06:35:37 +08:00
|
|
|
|
[ARM] Explicit lowering of half <-> double conversions.
If an FP_EXTEND or FP_ROUND isel dag node converts directly between
f16 and f32 when the target CPU has no instruction to do it in one go,
it has to be done in two steps instead, going via f32.
Previously, this was done implicitly, because all such CPUs had the
storage-only implementation of f16 (i.e. the only thing you can do
with one at all is to convert it to/from f32). So isel would legalize
the f16 into an f32 as soon as it saw it, by inserting an fp16_to_fp
node (or vice versa), and then the fp_extend would already be f32->f64
rather than f16->f64.
But that technique can't support a target CPU which has full f16
support but _not_ f64, such as some variants of Arm v8.1-M. So now we
provide custom lowering for FP_EXTEND and FP_ROUND, which checks
support for f16 and f64 and decides on the best thing to do given the
combination of flags it gets back.
Reviewers: dmgreen, samparker, SjoerdMeijer
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D60692
llvm-svn: 364294
2019-06-25 19:24:50 +08:00
|
|
|
def : FP16Pat<(f16 (fpround SPR:$Sm)),
|
|
|
|
(COPY_TO_REGCLASS (VCVTBSH SPR:$Sm), HPR)>;
|
2018-02-07 00:28:43 +08:00
|
|
|
def : FP16Pat<(fp_to_f16 SPR:$a),
|
|
|
|
(i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
|
|
|
|
|
2012-08-15 07:36:01 +08:00
|
|
|
def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
|
2011-08-23 05:34:00 +08:00
|
|
|
/* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
|
2015-12-07 18:54:36 +08:00
|
|
|
[/* For disassembly only; pattern left blank */]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasFP16]>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2010-02-10 01:21:56 +08:00
|
|
|
|
2012-08-15 07:36:01 +08:00
|
|
|
def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
|
2011-08-23 05:34:00 +08:00
|
|
|
/* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm",
|
2015-12-07 18:54:36 +08:00
|
|
|
[/* For disassembly only; pattern left blank */]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasFP16]>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2010-02-10 01:21:56 +08:00
|
|
|
|
2013-07-04 18:04:08 +08:00
|
|
|
def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
|
|
|
|
(outs DPR:$Dd), (ins SPR:$Sm),
|
|
|
|
NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm",
|
2018-02-07 00:28:43 +08:00
|
|
|
[/* Intentionally left blank, see patterns below */]>,
|
|
|
|
Requires<[HasFPARMv8, HasDPVFP]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Sched<[WriteFPCVT]> {
|
2013-07-04 18:04:08 +08:00
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Sm;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{3-0} = Sm{4-1};
|
|
|
|
let Inst{5} = Sm{0};
|
|
|
|
}
|
|
|
|
|
2018-02-07 00:28:43 +08:00
|
|
|
def : FullFP16Pat<(f64 (fpextend HPR:$Sm)),
|
2018-09-13 00:24:43 +08:00
|
|
|
(VCVTBHD (COPY_TO_REGCLASS HPR:$Sm, SPR))>,
|
|
|
|
Requires<[HasFPARMv8, HasDPVFP]>;
|
2018-02-07 00:28:43 +08:00
|
|
|
def : FP16Pat<(f64 (f16_to_fp GPR:$a)),
|
2018-09-13 00:24:43 +08:00
|
|
|
(VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>,
|
|
|
|
Requires<[HasFPARMv8, HasDPVFP]>;
|
2018-02-07 00:28:43 +08:00
|
|
|
|
2013-07-04 18:04:08 +08:00
|
|
|
def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0,
|
|
|
|
(outs SPR:$Sd), (ins DPR:$Dm),
|
|
|
|
NoItinerary, "vcvtb", ".f16.f64\t$Sd, $Dm",
|
2018-02-07 00:28:43 +08:00
|
|
|
[/* Intentionally left blank, see patterns below */]>,
|
|
|
|
Requires<[HasFPARMv8, HasDPVFP]> {
|
2013-07-04 18:04:08 +08:00
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Sd;
|
|
|
|
bits<5> Dm;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{3-0} = Dm{3-0};
|
|
|
|
let Inst{5} = Dm{4};
|
|
|
|
let Inst{15-12} = Sd{4-1};
|
|
|
|
let Inst{22} = Sd{0};
|
|
|
|
}
|
|
|
|
|
2018-02-07 00:28:43 +08:00
|
|
|
def : FullFP16Pat<(f16 (fpround DPR:$Dm)),
|
2018-09-13 00:24:43 +08:00
|
|
|
(COPY_TO_REGCLASS (VCVTBDH DPR:$Dm), HPR)>,
|
|
|
|
Requires<[HasFPARMv8, HasDPVFP]>;
|
2018-02-07 00:28:43 +08:00
|
|
|
def : FP16Pat<(fp_to_f16 (f64 DPR:$a)),
|
2018-09-13 00:24:43 +08:00
|
|
|
(i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>,
|
|
|
|
Requires<[HasFPARMv8, HasDPVFP]>;
|
2018-02-07 00:28:43 +08:00
|
|
|
|
2013-07-04 18:04:08 +08:00
|
|
|
def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0,
|
|
|
|
(outs DPR:$Dd), (ins SPR:$Sm),
|
|
|
|
NoItinerary, "vcvtt", ".f64.f16\t$Dd, $Sm",
|
2013-10-24 23:49:39 +08:00
|
|
|
[]>, Requires<[HasFPARMv8, HasDPVFP]> {
|
2013-07-04 18:04:08 +08:00
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Sm;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{3-0} = Sm{4-1};
|
|
|
|
let Inst{5} = Sm{0};
|
|
|
|
}
|
|
|
|
|
|
|
|
def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0,
|
|
|
|
(outs SPR:$Sd), (ins DPR:$Dm),
|
|
|
|
NoItinerary, "vcvtt", ".f16.f64\t$Sd, $Dm",
|
2013-10-24 23:49:39 +08:00
|
|
|
[]>, Requires<[HasFPARMv8, HasDPVFP]> {
|
2013-07-04 18:04:08 +08:00
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Sd;
|
|
|
|
bits<5> Dm;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{15-12} = Sd{4-1};
|
|
|
|
let Inst{22} = Sd{0};
|
|
|
|
let Inst{3-0} = Dm{3-0};
|
|
|
|
let Inst{5} = Dm{4};
|
|
|
|
}
|
|
|
|
|
2014-08-26 00:56:33 +08:00
|
|
|
multiclass vcvt_inst<string opc, bits<2> rm,
|
|
|
|
SDPatternOperator node = null_frag> {
|
2013-07-18 18:20:25 +08:00
|
|
|
let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {
|
2016-01-25 18:26:26 +08:00
|
|
|
def SH : AHuInp<0b11101, 0b11, 0b1100, 0b11, 0,
|
2018-02-06 16:43:56 +08:00
|
|
|
(outs SPR:$Sd), (ins HPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
NoItinerary, !strconcat("vcvt", opc, ".s32.f16\t$Sd, $Sm"),
|
|
|
|
[]>,
|
|
|
|
Requires<[HasFullFP16]> {
|
|
|
|
let Inst{17-16} = rm;
|
|
|
|
}
|
|
|
|
|
|
|
|
def UH : AHuInp<0b11101, 0b11, 0b1100, 0b01, 0,
|
2018-02-06 16:43:56 +08:00
|
|
|
(outs SPR:$Sd), (ins HPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
NoItinerary, !strconcat("vcvt", opc, ".u32.f16\t$Sd, $Sm"),
|
|
|
|
[]>,
|
|
|
|
Requires<[HasFullFP16]> {
|
|
|
|
let Inst{17-16} = rm;
|
|
|
|
}
|
|
|
|
|
2013-07-09 17:59:04 +08:00
|
|
|
def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"),
|
2015-03-24 00:15:16 +08:00
|
|
|
[]>,
|
2014-08-26 00:56:33 +08:00
|
|
|
Requires<[HasFPARMv8]> {
|
2013-07-09 17:59:04 +08:00
|
|
|
let Inst{17-16} = rm;
|
|
|
|
}
|
|
|
|
|
|
|
|
def US : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
NoItinerary, !strconcat("vcvt", opc, ".u32.f32\t$Sd, $Sm"),
|
2015-03-24 00:15:16 +08:00
|
|
|
[]>,
|
2014-08-26 00:56:33 +08:00
|
|
|
Requires<[HasFPARMv8]> {
|
2013-07-09 17:59:04 +08:00
|
|
|
let Inst{17-16} = rm;
|
|
|
|
}
|
|
|
|
|
|
|
|
def SD : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
|
|
|
|
(outs SPR:$Sd), (ins DPR:$Dm),
|
|
|
|
NoItinerary, !strconcat("vcvt", opc, ".s32.f64\t$Sd, $Dm"),
|
2015-03-24 00:15:16 +08:00
|
|
|
[]>,
|
2014-08-26 00:56:33 +08:00
|
|
|
Requires<[HasFPARMv8, HasDPVFP]> {
|
2013-07-09 17:59:04 +08:00
|
|
|
bits<5> Dm;
|
|
|
|
|
|
|
|
let Inst{17-16} = rm;
|
|
|
|
|
2019-07-16 17:15:01 +08:00
|
|
|
// Encode instruction operands.
|
2013-07-09 17:59:04 +08:00
|
|
|
let Inst{3-0} = Dm{3-0};
|
|
|
|
let Inst{5} = Dm{4};
|
|
|
|
let Inst{8} = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
def UD : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0,
|
|
|
|
(outs SPR:$Sd), (ins DPR:$Dm),
|
|
|
|
NoItinerary, !strconcat("vcvt", opc, ".u32.f64\t$Sd, $Dm"),
|
2015-03-24 00:15:16 +08:00
|
|
|
[]>,
|
2014-08-26 00:56:33 +08:00
|
|
|
Requires<[HasFPARMv8, HasDPVFP]> {
|
2013-07-09 17:59:04 +08:00
|
|
|
bits<5> Dm;
|
|
|
|
|
|
|
|
let Inst{17-16} = rm;
|
|
|
|
|
|
|
|
// Encode instruction operands
|
|
|
|
let Inst{3-0} = Dm{3-0};
|
|
|
|
let Inst{5} = Dm{4};
|
|
|
|
let Inst{8} = 1;
|
|
|
|
}
|
|
|
|
}
|
2015-03-24 00:15:16 +08:00
|
|
|
|
|
|
|
let Predicates = [HasFPARMv8] in {
|
2018-02-06 16:43:56 +08:00
|
|
|
let Predicates = [HasFullFP16] in {
|
|
|
|
def : Pat<(i32 (fp_to_sint (node HPR:$a))),
|
|
|
|
(COPY_TO_REGCLASS
|
|
|
|
(!cast<Instruction>(NAME#"SH") HPR:$a),
|
|
|
|
GPR)>;
|
|
|
|
|
|
|
|
def : Pat<(i32 (fp_to_uint (node HPR:$a))),
|
|
|
|
(COPY_TO_REGCLASS
|
|
|
|
(!cast<Instruction>(NAME#"UH") HPR:$a),
|
|
|
|
GPR)>;
|
|
|
|
}
|
2015-03-24 00:15:16 +08:00
|
|
|
def : Pat<(i32 (fp_to_sint (node SPR:$a))),
|
|
|
|
(COPY_TO_REGCLASS
|
|
|
|
(!cast<Instruction>(NAME#"SS") SPR:$a),
|
|
|
|
GPR)>;
|
|
|
|
def : Pat<(i32 (fp_to_uint (node SPR:$a))),
|
|
|
|
(COPY_TO_REGCLASS
|
|
|
|
(!cast<Instruction>(NAME#"US") SPR:$a),
|
|
|
|
GPR)>;
|
|
|
|
}
|
|
|
|
let Predicates = [HasFPARMv8, HasDPVFP] in {
|
|
|
|
def : Pat<(i32 (fp_to_sint (node (f64 DPR:$a)))),
|
|
|
|
(COPY_TO_REGCLASS
|
|
|
|
(!cast<Instruction>(NAME#"SD") DPR:$a),
|
|
|
|
GPR)>;
|
|
|
|
def : Pat<(i32 (fp_to_uint (node (f64 DPR:$a)))),
|
|
|
|
(COPY_TO_REGCLASS
|
|
|
|
(!cast<Instruction>(NAME#"UD") DPR:$a),
|
|
|
|
GPR)>;
|
|
|
|
}
|
2013-07-09 17:59:04 +08:00
|
|
|
}
|
|
|
|
|
2016-08-19 04:08:15 +08:00
|
|
|
defm VCVTA : vcvt_inst<"a", 0b00, fround>;
|
2013-07-09 17:59:04 +08:00
|
|
|
defm VCVTN : vcvt_inst<"n", 0b01>;
|
2014-08-26 00:56:33 +08:00
|
|
|
defm VCVTP : vcvt_inst<"p", 0b10, fceil>;
|
|
|
|
defm VCVTM : vcvt_inst<"m", 0b11, ffloor>;
|
2013-07-09 17:59:04 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Dm),
|
|
|
|
IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm",
|
|
|
|
[(set DPR:$Dd, (fneg (f64 DPR:$Dm)))]>;
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
|
2011-02-16 08:35:02 +08:00
|
|
|
[(set SPR:$Sd, (fneg SPR:$Sm))]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VNEGH : AHuI<0b11101, 0b11, 0b0001, 0b01, 0,
|
2018-02-06 16:43:56 +08:00
|
|
|
(outs HPR:$Sd), (ins HPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpUNA16, "vneg", ".f16\t$Sd, $Sm",
|
2018-02-06 16:43:56 +08:00
|
|
|
[(set HPR:$Sd, (fneg HPR:$Sm))]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2014-08-16 05:38:16 +08:00
|
|
|
multiclass vrint_inst_zrx<string opc, bit op, bit op2, SDPatternOperator node> {
|
2016-01-25 18:26:26 +08:00
|
|
|
def H : AHuI<0b11101, 0b11, 0b0110, 0b11, 0,
|
2019-05-26 19:13:00 +08:00
|
|
|
(outs HPR:$Sd), (ins HPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
NoItinerary, !strconcat("vrint", opc), ".f16\t$Sd, $Sm",
|
2019-05-26 19:13:00 +08:00
|
|
|
[(set (f16 HPR:$Sd), (node (f16 HPR:$Sm)))]>,
|
2016-01-25 18:26:26 +08:00
|
|
|
Requires<[HasFullFP16]> {
|
|
|
|
let Inst{7} = op2;
|
|
|
|
let Inst{16} = op;
|
|
|
|
}
|
|
|
|
|
2013-07-09 19:03:21 +08:00
|
|
|
def S : ASuI<0b11101, 0b11, 0b0110, 0b11, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm",
|
2014-08-16 05:38:16 +08:00
|
|
|
[(set (f32 SPR:$Sd), (node (f32 SPR:$Sm)))]>,
|
|
|
|
Requires<[HasFPARMv8]> {
|
2013-07-09 19:03:21 +08:00
|
|
|
let Inst{7} = op2;
|
|
|
|
let Inst{16} = op;
|
|
|
|
}
|
|
|
|
def D : ADuI<0b11101, 0b11, 0b0110, 0b11, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Dm),
|
|
|
|
NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm",
|
2014-08-16 05:38:16 +08:00
|
|
|
[(set (f64 DPR:$Dd), (node (f64 DPR:$Dm)))]>,
|
|
|
|
Requires<[HasFPARMv8, HasDPVFP]> {
|
2013-07-09 19:03:21 +08:00
|
|
|
let Inst{7} = op2;
|
|
|
|
let Inst{16} = op;
|
|
|
|
}
|
2013-08-27 19:24:16 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def : InstAlias<!strconcat("vrint", opc, "$p.f16.f16\t$Sd, $Sm"),
|
2016-06-03 21:19:43 +08:00
|
|
|
(!cast<Instruction>(NAME#"H") SPR:$Sd, SPR:$Sm, pred:$p), 0>,
|
2016-01-25 18:26:26 +08:00
|
|
|
Requires<[HasFullFP16]>;
|
2013-08-27 19:24:16 +08:00
|
|
|
def : InstAlias<!strconcat("vrint", opc, "$p.f32.f32\t$Sd, $Sm"),
|
2016-06-03 21:19:43 +08:00
|
|
|
(!cast<Instruction>(NAME#"S") SPR:$Sd, SPR:$Sm, pred:$p), 0>,
|
2013-10-24 20:22:58 +08:00
|
|
|
Requires<[HasFPARMv8]>;
|
2013-08-27 19:24:16 +08:00
|
|
|
def : InstAlias<!strconcat("vrint", opc, "$p.f64.f64\t$Dd, $Dm"),
|
2016-06-03 21:19:43 +08:00
|
|
|
(!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm, pred:$p), 0>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasFPARMv8,HasDPVFP]>;
|
2013-07-09 19:03:21 +08:00
|
|
|
}
|
|
|
|
|
2014-08-16 05:38:16 +08:00
|
|
|
defm VRINTZ : vrint_inst_zrx<"z", 0, 1, ftrunc>;
|
|
|
|
defm VRINTR : vrint_inst_zrx<"r", 0, 0, fnearbyint>;
|
|
|
|
defm VRINTX : vrint_inst_zrx<"x", 1, 0, frint>;
|
2013-07-09 19:03:21 +08:00
|
|
|
|
2014-08-16 05:38:16 +08:00
|
|
|
multiclass vrint_inst_anpm<string opc, bits<2> rm,
|
|
|
|
SDPatternOperator node = null_frag> {
|
[ARM] Make fullfp16 instructions not conditionalisable.
More or less all the instructions defined in the v8.2a full-fp16
extension are defined as UNPREDICTABLE if you put them in an IT block
(Thumb) or use with any condition other than AL (ARM). LLVM didn't
know that, and was happy to conditionalise them.
In order to force these instructions to count as not predicable, I had
to make a small Tablegen change. The code generation back end mostly
decides if an instruction was predicable by looking for something it
can identify as a predicate operand; there's an isPredicable bit flag
that overrides that check in the positive direction, but nothing that
overrides it in the negative direction.
(I considered the alternative approach of actually removing the
predicate operand from those instructions, but thought that it would
be more painful overall for instructions differing only in data type
to have different shapes of operand list. This way, the only code that
has to notice the difference is the if-converter.)
So I've added an isUnpredicable bit alongside isPredicable, and set
that bit on the right subset of FP16 instructions, and also on the
VSEL, VMAXNM/VMINNM and VRINT[ANPM] families which should be
unpredicable for all data types.
I've included a couple of representative regression tests, both of
which previously caused an fp16 instruction to be conditionalised in
ARM state and (with -arm-no-restrict-it) to be put in an IT block in
Thumb.
Reviewers: SjoerdMeijer, t.p.northover, efriedma
Reviewed By: efriedma
Subscribers: jdoerfert, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57823
llvm-svn: 354768
2019-02-25 18:39:53 +08:00
|
|
|
let PostEncoderMethod = "", DecoderNamespace = "VFPV8",
|
|
|
|
isUnpredicable = 1 in {
|
2016-01-25 18:26:26 +08:00
|
|
|
def H : AHuInp<0b11101, 0b11, 0b1000, 0b01, 0,
|
2019-05-26 19:13:00 +08:00
|
|
|
(outs HPR:$Sd), (ins HPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
NoItinerary, !strconcat("vrint", opc, ".f16\t$Sd, $Sm"),
|
2019-05-26 19:13:00 +08:00
|
|
|
[(set (f16 HPR:$Sd), (node (f16 HPR:$Sm)))]>,
|
2016-01-25 18:26:26 +08:00
|
|
|
Requires<[HasFullFP16]> {
|
|
|
|
let Inst{17-16} = rm;
|
|
|
|
}
|
2013-07-09 19:26:18 +08:00
|
|
|
def S : ASuInp<0b11101, 0b11, 0b1000, 0b01, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
NoItinerary, !strconcat("vrint", opc, ".f32\t$Sd, $Sm"),
|
2014-08-16 05:38:16 +08:00
|
|
|
[(set (f32 SPR:$Sd), (node (f32 SPR:$Sm)))]>,
|
|
|
|
Requires<[HasFPARMv8]> {
|
2013-07-09 19:26:18 +08:00
|
|
|
let Inst{17-16} = rm;
|
|
|
|
}
|
|
|
|
def D : ADuInp<0b11101, 0b11, 0b1000, 0b01, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Dm),
|
|
|
|
NoItinerary, !strconcat("vrint", opc, ".f64\t$Dd, $Dm"),
|
2014-08-16 05:38:16 +08:00
|
|
|
[(set (f64 DPR:$Dd), (node (f64 DPR:$Dm)))]>,
|
|
|
|
Requires<[HasFPARMv8, HasDPVFP]> {
|
2013-07-09 19:26:18 +08:00
|
|
|
let Inst{17-16} = rm;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
def : InstAlias<!strconcat("vrint", opc, ".f32.f32\t$Sd, $Sm"),
|
2016-06-03 21:19:43 +08:00
|
|
|
(!cast<Instruction>(NAME#"S") SPR:$Sd, SPR:$Sm), 0>,
|
2013-10-24 20:22:58 +08:00
|
|
|
Requires<[HasFPARMv8]>;
|
2013-07-09 19:26:18 +08:00
|
|
|
def : InstAlias<!strconcat("vrint", opc, ".f64.f64\t$Dd, $Dm"),
|
2016-06-03 21:19:43 +08:00
|
|
|
(!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm), 0>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasFPARMv8,HasDPVFP]>;
|
2013-07-09 19:26:18 +08:00
|
|
|
}
|
|
|
|
|
2016-08-19 04:08:15 +08:00
|
|
|
defm VRINTA : vrint_inst_anpm<"a", 0b00, fround>;
|
2018-04-13 20:45:12 +08:00
|
|
|
defm VRINTN : vrint_inst_anpm<"n", 0b01, int_arm_neon_vrintn>;
|
2014-08-16 05:38:16 +08:00
|
|
|
defm VRINTP : vrint_inst_anpm<"p", 0b10, fceil>;
|
|
|
|
defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>;
|
2013-07-09 19:26:18 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Dm),
|
|
|
|
IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>,
|
|
|
|
Sched<[WriteFPSQRT64]>;
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set SPR:$Sd, (fsqrt SPR:$Sm))]>,
|
|
|
|
Sched<[WriteFPSQRT32]>;
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VSQRTH : AHuI<0b11101, 0b11, 0b0001, 0b11, 0,
|
2019-05-26 18:42:24 +08:00
|
|
|
(outs HPR:$Sd), (ins HPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpSQRT16, "vsqrt", ".f16\t$Sd, $Sm",
|
2019-05-26 18:42:24 +08:00
|
|
|
[(set HPR:$Sd, (fsqrt (f16 HPR:$Sm)))]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2014-11-26 08:46:26 +08:00
|
|
|
let hasSideEffects = 0 in {
|
2018-05-23 23:28:28 +08:00
|
|
|
let isMoveReg = 1 in {
|
2010-11-01 14:00:39 +08:00
|
|
|
def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Dm),
|
2019-05-30 20:37:05 +08:00
|
|
|
IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>,
|
|
|
|
Requires<[HasFPRegs64]>;
|
2010-10-14 04:58:46 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VMOVS : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
2019-05-30 20:37:05 +08:00
|
|
|
IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>,
|
|
|
|
Requires<[HasFPRegs]>;
|
2018-05-23 23:28:28 +08:00
|
|
|
} // isMoveReg
|
2016-01-25 18:26:26 +08:00
|
|
|
|
[ARM] Make fullfp16 instructions not conditionalisable.
More or less all the instructions defined in the v8.2a full-fp16
extension are defined as UNPREDICTABLE if you put them in an IT block
(Thumb) or use with any condition other than AL (ARM). LLVM didn't
know that, and was happy to conditionalise them.
In order to force these instructions to count as not predicable, I had
to make a small Tablegen change. The code generation back end mostly
decides if an instruction was predicable by looking for something it
can identify as a predicate operand; there's an isPredicable bit flag
that overrides that check in the positive direction, but nothing that
overrides it in the negative direction.
(I considered the alternative approach of actually removing the
predicate operand from those instructions, but thought that it would
be more painful overall for instructions differing only in data type
to have different shapes of operand list. This way, the only code that
has to notice the difference is the if-converter.)
So I've added an isUnpredicable bit alongside isPredicable, and set
that bit on the right subset of FP16 instructions, and also on the
VSEL, VMAXNM/VMINNM and VRINT[ANPM] families which should be
unpredicable for all data types.
I've included a couple of representative regression tests, both of
which previously caused an fp16 instruction to be conditionalised in
ARM state and (with -arm-no-restrict-it) to be put in an IT block in
Thumb.
Reviewers: SjoerdMeijer, t.p.northover, efriedma
Reviewed By: efriedma
Subscribers: jdoerfert, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57823
llvm-svn: 354768
2019-02-25 18:39:53 +08:00
|
|
|
let PostEncoderMethod = "", DecoderNamespace = "VFPV8", isUnpredicable = 1 in {
|
2016-01-25 18:26:26 +08:00
|
|
|
def VMOVH : ASuInp<0b11101, 0b11, 0b0000, 0b01, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpUNA16, "vmovx.f16\t$Sd, $Sm", []>,
|
|
|
|
Requires<[HasFullFP16]>;
|
|
|
|
|
|
|
|
def VINSH : ASuInp<0b11101, 0b11, 0b0000, 0b11, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpUNA16, "vins.f16\t$Sd, $Sm", []>,
|
|
|
|
Requires<[HasFullFP16]>;
|
|
|
|
} // PostEncoderMethod
|
2014-11-26 08:46:26 +08:00
|
|
|
} // hasSideEffects
|
2010-10-14 04:58:46 +08:00
|
|
|
|
2007-01-19 15:51:42 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// FP <-> GPR Copies. Int <-> FP Conversions.
|
|
|
|
//
|
|
|
|
|
2018-05-23 23:28:28 +08:00
|
|
|
let isMoveReg = 1 in {
|
2010-10-21 06:44:54 +08:00
|
|
|
def VMOVRS : AVConv2I<0b11100001, 0b1010,
|
|
|
|
(outs GPR:$Rt), (ins SPR:$Sn),
|
|
|
|
IIC_fpMOVSI, "vmov", "\t$Rt, $Sn",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set GPR:$Rt, (bitconvert SPR:$Sn))]>,
|
2019-05-30 20:37:05 +08:00
|
|
|
Requires<[HasFPRegs]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Sched<[WriteFPMOV]> {
|
2010-10-21 06:44:54 +08:00
|
|
|
// Instruction operands.
|
|
|
|
bits<4> Rt;
|
|
|
|
bits<5> Sn;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{19-16} = Sn{4-1};
|
|
|
|
let Inst{7} = Sn{0};
|
|
|
|
let Inst{15-12} = Rt;
|
|
|
|
|
|
|
|
let Inst{6-5} = 0b00;
|
|
|
|
let Inst{3-0} = 0b0000;
|
2011-04-20 02:11:38 +08:00
|
|
|
|
|
|
|
// Some single precision VFP instructions may be executed on both NEON and VFP
|
|
|
|
// pipelines.
|
|
|
|
let D = VFPNeonDomain;
|
2010-10-21 06:44:54 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2012-09-30 05:43:49 +08:00
|
|
|
// Bitcast i32 -> f32. NEON prefers to use VMOVDRR.
|
2010-10-21 06:44:54 +08:00
|
|
|
def VMOVSR : AVConv4I<0b11100000, 0b1010,
|
|
|
|
(outs SPR:$Sn), (ins GPR:$Rt),
|
|
|
|
IIC_fpMOVIS, "vmov", "\t$Sn, $Rt",
|
2012-09-30 05:43:49 +08:00
|
|
|
[(set SPR:$Sn, (bitconvert GPR:$Rt))]>,
|
2019-05-30 20:37:05 +08:00
|
|
|
Requires<[HasFPRegs, UseVMOVSR]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Sched<[WriteFPMOV]> {
|
2010-10-21 06:44:54 +08:00
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Sn;
|
|
|
|
bits<4> Rt;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{19-16} = Sn{4-1};
|
|
|
|
let Inst{7} = Sn{0};
|
|
|
|
let Inst{15-12} = Rt;
|
|
|
|
|
|
|
|
let Inst{6-5} = 0b00;
|
|
|
|
let Inst{3-0} = 0b0000;
|
2011-04-20 02:11:38 +08:00
|
|
|
|
|
|
|
// Some single precision VFP instructions may be executed on both NEON and VFP
|
|
|
|
// pipelines.
|
|
|
|
let D = VFPNeonDomain;
|
2010-10-21 06:44:54 +08:00
|
|
|
}
|
2018-05-23 23:28:28 +08:00
|
|
|
} // isMoveReg
|
2018-03-28 18:02:26 +08:00
|
|
|
def : Pat<(arm_vmovsr GPR:$Rt), (VMOVSR GPR:$Rt)>, Requires<[HasVFP2, UseVMOVSR]>;
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2014-11-26 08:46:26 +08:00
|
|
|
let hasSideEffects = 0 in {
|
2009-11-09 08:11:35 +08:00
|
|
|
def VMOVRRD : AVConv3I<0b11000101, 0b1011,
|
2010-10-21 07:37:40 +08:00
|
|
|
(outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm),
|
|
|
|
IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm",
|
2017-03-15 02:43:37 +08:00
|
|
|
[(set GPR:$Rt, GPR:$Rt2, (arm_fmrrd DPR:$Dm))]>,
|
2019-05-30 20:37:05 +08:00
|
|
|
Requires<[HasFPRegs]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Sched<[WriteFPMOV]> {
|
2010-10-21 07:37:40 +08:00
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Dm;
|
|
|
|
bits<4> Rt;
|
|
|
|
bits<4> Rt2;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{3-0} = Dm{3-0};
|
|
|
|
let Inst{5} = Dm{4};
|
|
|
|
let Inst{15-12} = Rt;
|
|
|
|
let Inst{19-16} = Rt2;
|
|
|
|
|
2010-02-06 02:04:58 +08:00
|
|
|
let Inst{7-6} = 0b00;
|
2011-04-20 02:11:38 +08:00
|
|
|
|
|
|
|
// Some single precision VFP instructions may be executed on both NEON and VFP
|
|
|
|
// pipelines.
|
|
|
|
let D = VFPNeonDomain;
|
2014-08-21 06:16:19 +08:00
|
|
|
|
|
|
|
// This instruction is equivalent to
|
|
|
|
// $Rt = EXTRACT_SUBREG $Dm, ssub_0
|
|
|
|
// $Rt2 = EXTRACT_SUBREG $Dm, ssub_1
|
|
|
|
let isExtractSubreg = 1;
|
2010-02-06 02:04:58 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2010-02-09 01:26:09 +08:00
|
|
|
def VMOVRRS : AVConv3I<0b11000101, 0b1010,
|
2011-08-30 07:15:25 +08:00
|
|
|
(outs GPR:$Rt, GPR:$Rt2), (ins SPR:$src1, SPR:$src2),
|
|
|
|
IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $src1, $src2",
|
2017-01-24 04:20:39 +08:00
|
|
|
[/* For disassembly only; pattern left blank */]>,
|
2019-05-30 20:37:05 +08:00
|
|
|
Requires<[HasFPRegs]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Sched<[WriteFPMOV]> {
|
2011-08-30 07:15:25 +08:00
|
|
|
bits<5> src1;
|
|
|
|
bits<4> Rt;
|
|
|
|
bits<4> Rt2;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
2012-07-10 20:51:09 +08:00
|
|
|
let Inst{3-0} = src1{4-1};
|
|
|
|
let Inst{5} = src1{0};
|
2011-08-30 07:15:25 +08:00
|
|
|
let Inst{15-12} = Rt;
|
|
|
|
let Inst{19-16} = Rt2;
|
|
|
|
|
2010-02-09 01:26:09 +08:00
|
|
|
let Inst{7-6} = 0b00;
|
2011-04-20 02:11:38 +08:00
|
|
|
|
|
|
|
// Some single precision VFP instructions may be executed on both NEON and VFP
|
|
|
|
// pipelines.
|
|
|
|
let D = VFPNeonDomain;
|
2011-08-23 04:27:12 +08:00
|
|
|
let DecoderMethod = "DecodeVMOVRRS";
|
2010-02-09 01:26:09 +08:00
|
|
|
}
|
2014-11-26 08:46:26 +08:00
|
|
|
} // hasSideEffects
|
2010-02-09 01:26:09 +08:00
|
|
|
|
2007-01-19 15:51:42 +08:00
|
|
|
// FMDHR: GPR -> SPR
|
|
|
|
// FMDLR: GPR -> SPR
|
|
|
|
|
2009-11-09 08:11:35 +08:00
|
|
|
def VMOVDRR : AVConv5I<0b11000100, 0b1011,
|
2010-10-21 07:37:40 +08:00
|
|
|
(outs DPR:$Dm), (ins GPR:$Rt, GPR:$Rt2),
|
|
|
|
IIC_fpMOVID, "vmov", "\t$Dm, $Rt, $Rt2",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]>,
|
2019-05-30 20:37:05 +08:00
|
|
|
Requires<[HasFPRegs]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Sched<[WriteFPMOV]> {
|
2010-10-21 07:37:40 +08:00
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Dm;
|
|
|
|
bits<4> Rt;
|
|
|
|
bits<4> Rt2;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{3-0} = Dm{3-0};
|
|
|
|
let Inst{5} = Dm{4};
|
|
|
|
let Inst{15-12} = Rt;
|
|
|
|
let Inst{19-16} = Rt2;
|
|
|
|
|
|
|
|
let Inst{7-6} = 0b00;
|
2011-04-20 02:11:38 +08:00
|
|
|
|
|
|
|
// Some single precision VFP instructions may be executed on both NEON and VFP
|
|
|
|
// pipelines.
|
|
|
|
let D = VFPNeonDomain;
|
2014-08-12 06:56:22 +08:00
|
|
|
|
|
|
|
// This instruction is equivalent to
|
|
|
|
// $Dm = REG_SEQUENCE $Rt, ssub_0, $Rt2, ssub_1
|
|
|
|
let isRegSequence = 1;
|
2010-02-06 02:04:58 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2015-08-29 18:49:11 +08:00
|
|
|
// Hoist an fabs or a fneg of a value coming from integer registers
|
|
|
|
// and do the fabs/fneg on the integer value. This is never a lose
|
|
|
|
// and could enable the conversion to float to be removed completely.
|
|
|
|
def : Pat<(fabs (arm_fmdrr GPR:$Rl, GPR:$Rh)),
|
|
|
|
(VMOVDRR GPR:$Rl, (BFC GPR:$Rh, (i32 0x7FFFFFFF)))>,
|
2016-01-13 08:03:35 +08:00
|
|
|
Requires<[IsARM, HasV6T2]>;
|
2015-08-29 18:49:11 +08:00
|
|
|
def : Pat<(fabs (arm_fmdrr GPR:$Rl, GPR:$Rh)),
|
|
|
|
(VMOVDRR GPR:$Rl, (t2BFC GPR:$Rh, (i32 0x7FFFFFFF)))>,
|
2016-01-13 08:03:35 +08:00
|
|
|
Requires<[IsThumb2, HasV6T2]>;
|
2015-08-29 18:49:11 +08:00
|
|
|
def : Pat<(fneg (arm_fmdrr GPR:$Rl, GPR:$Rh)),
|
|
|
|
(VMOVDRR GPR:$Rl, (EORri GPR:$Rh, (i32 0x80000000)))>,
|
|
|
|
Requires<[IsARM]>;
|
|
|
|
def : Pat<(fneg (arm_fmdrr GPR:$Rl, GPR:$Rh)),
|
|
|
|
(VMOVDRR GPR:$Rl, (t2EORri GPR:$Rh, (i32 0x80000000)))>,
|
|
|
|
Requires<[IsThumb2]>;
|
|
|
|
|
2014-11-26 08:46:26 +08:00
|
|
|
let hasSideEffects = 0 in
|
2010-02-09 01:26:09 +08:00
|
|
|
def VMOVSRR : AVConv5I<0b11000100, 0b1010,
|
|
|
|
(outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
|
2010-04-08 02:20:02 +08:00
|
|
|
IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
|
2017-01-24 04:20:39 +08:00
|
|
|
[/* For disassembly only; pattern left blank */]>,
|
2019-05-30 20:37:05 +08:00
|
|
|
Requires<[HasFPRegs]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Sched<[WriteFPMOV]> {
|
2011-08-30 07:15:25 +08:00
|
|
|
// Instruction operands.
|
|
|
|
bits<5> dst1;
|
|
|
|
bits<4> src1;
|
|
|
|
bits<4> src2;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
2012-07-10 20:51:09 +08:00
|
|
|
let Inst{3-0} = dst1{4-1};
|
|
|
|
let Inst{5} = dst1{0};
|
2011-08-30 07:15:25 +08:00
|
|
|
let Inst{15-12} = src1;
|
|
|
|
let Inst{19-16} = src2;
|
|
|
|
|
2010-02-09 01:26:09 +08:00
|
|
|
let Inst{7-6} = 0b00;
|
2011-04-20 02:11:38 +08:00
|
|
|
|
|
|
|
// Some single precision VFP instructions may be executed on both NEON and VFP
|
|
|
|
// pipelines.
|
|
|
|
let D = VFPNeonDomain;
|
2011-08-23 04:27:12 +08:00
|
|
|
|
|
|
|
let DecoderMethod = "DecodeVMOVSRR";
|
2010-02-09 01:26:09 +08:00
|
|
|
}
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
// Move H->R, clearing top 16 bits
|
|
|
|
def VMOVRH : AVConv2I<0b11100001, 0b1001,
|
[ARM] Disallow PC, and optionally SP, in VMOVRH and VMOVHR.
Arm v8.1-M supports the VMOV instructions that move a half-precision
value to and from a GPR, but not if the GPR is SP or PC.
To fix this, I've changed those instructions to use the rGPR register
class instead of GPR. rGPR always excludes PC, and it excludes SP
except in the presence of the HasV8Ops target feature (i.e. Arm v8-A).
So the effect is that VMOV.F16 to and from PC is now illegal
everywhere, but VMOV.F16 to and from SP is illegal only on non-v8-A
cores (which I believe is all as it should be).
Reviewers: dmgreen, samparker, SjoerdMeijer, ostannard
Reviewed By: ostannard
Subscribers: ostannard, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D60704
llvm-svn: 362942
2019-06-10 22:43:55 +08:00
|
|
|
(outs rGPR:$Rt), (ins HPR:$Sn),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpMOVSI, "vmov", ".f16\t$Rt, $Sn",
|
[ARM] Disallow PC, and optionally SP, in VMOVRH and VMOVHR.
Arm v8.1-M supports the VMOV instructions that move a half-precision
value to and from a GPR, but not if the GPR is SP or PC.
To fix this, I've changed those instructions to use the rGPR register
class instead of GPR. rGPR always excludes PC, and it excludes SP
except in the presence of the HasV8Ops target feature (i.e. Arm v8-A).
So the effect is that VMOV.F16 to and from PC is now illegal
everywhere, but VMOV.F16 to and from SP is illegal only on non-v8-A
cores (which I believe is all as it should be).
Reviewers: dmgreen, samparker, SjoerdMeijer, ostannard
Reviewed By: ostannard
Subscribers: ostannard, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D60704
llvm-svn: 362942
2019-06-10 22:43:55 +08:00
|
|
|
[(set rGPR:$Rt, (arm_vmovrh HPR:$Sn))]>,
|
2019-05-30 20:37:05 +08:00
|
|
|
Requires<[HasFPRegs16]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Sched<[WriteFPMOV]> {
|
2016-01-25 18:26:26 +08:00
|
|
|
// Instruction operands.
|
|
|
|
bits<4> Rt;
|
|
|
|
bits<5> Sn;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{19-16} = Sn{4-1};
|
|
|
|
let Inst{7} = Sn{0};
|
|
|
|
let Inst{15-12} = Rt;
|
|
|
|
|
|
|
|
let Inst{6-5} = 0b00;
|
|
|
|
let Inst{3-0} = 0b0000;
|
[ARM] Make fullfp16 instructions not conditionalisable.
More or less all the instructions defined in the v8.2a full-fp16
extension are defined as UNPREDICTABLE if you put them in an IT block
(Thumb) or use with any condition other than AL (ARM). LLVM didn't
know that, and was happy to conditionalise them.
In order to force these instructions to count as not predicable, I had
to make a small Tablegen change. The code generation back end mostly
decides if an instruction was predicable by looking for something it
can identify as a predicate operand; there's an isPredicable bit flag
that overrides that check in the positive direction, but nothing that
overrides it in the negative direction.
(I considered the alternative approach of actually removing the
predicate operand from those instructions, but thought that it would
be more painful overall for instructions differing only in data type
to have different shapes of operand list. This way, the only code that
has to notice the difference is the if-converter.)
So I've added an isUnpredicable bit alongside isPredicable, and set
that bit on the right subset of FP16 instructions, and also on the
VSEL, VMAXNM/VMINNM and VRINT[ANPM] families which should be
unpredicable for all data types.
I've included a couple of representative regression tests, both of
which previously caused an fp16 instruction to be conditionalised in
ARM state and (with -arm-no-restrict-it) to be put in an IT block in
Thumb.
Reviewers: SjoerdMeijer, t.p.northover, efriedma
Reviewed By: efriedma
Subscribers: jdoerfert, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57823
llvm-svn: 354768
2019-02-25 18:39:53 +08:00
|
|
|
|
|
|
|
let isUnpredicable = 1;
|
2016-01-25 18:26:26 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Move R->H, clearing top 16 bits
|
|
|
|
def VMOVHR : AVConv4I<0b11100000, 0b1001,
|
[ARM] Disallow PC, and optionally SP, in VMOVRH and VMOVHR.
Arm v8.1-M supports the VMOV instructions that move a half-precision
value to and from a GPR, but not if the GPR is SP or PC.
To fix this, I've changed those instructions to use the rGPR register
class instead of GPR. rGPR always excludes PC, and it excludes SP
except in the presence of the HasV8Ops target feature (i.e. Arm v8-A).
So the effect is that VMOV.F16 to and from PC is now illegal
everywhere, but VMOV.F16 to and from SP is illegal only on non-v8-A
cores (which I believe is all as it should be).
Reviewers: dmgreen, samparker, SjoerdMeijer, ostannard
Reviewed By: ostannard
Subscribers: ostannard, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D60704
llvm-svn: 362942
2019-06-10 22:43:55 +08:00
|
|
|
(outs HPR:$Sn), (ins rGPR:$Rt),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpMOVIS, "vmov", ".f16\t$Sn, $Rt",
|
[ARM] Disallow PC, and optionally SP, in VMOVRH and VMOVHR.
Arm v8.1-M supports the VMOV instructions that move a half-precision
value to and from a GPR, but not if the GPR is SP or PC.
To fix this, I've changed those instructions to use the rGPR register
class instead of GPR. rGPR always excludes PC, and it excludes SP
except in the presence of the HasV8Ops target feature (i.e. Arm v8-A).
So the effect is that VMOV.F16 to and from PC is now illegal
everywhere, but VMOV.F16 to and from SP is illegal only on non-v8-A
cores (which I believe is all as it should be).
Reviewers: dmgreen, samparker, SjoerdMeijer, ostannard
Reviewed By: ostannard
Subscribers: ostannard, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D60704
llvm-svn: 362942
2019-06-10 22:43:55 +08:00
|
|
|
[(set HPR:$Sn, (arm_vmovhr rGPR:$Rt))]>,
|
2019-05-30 20:37:05 +08:00
|
|
|
Requires<[HasFPRegs16]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Sched<[WriteFPMOV]> {
|
2016-01-25 18:26:26 +08:00
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Sn;
|
|
|
|
bits<4> Rt;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{19-16} = Sn{4-1};
|
|
|
|
let Inst{7} = Sn{0};
|
|
|
|
let Inst{15-12} = Rt;
|
|
|
|
|
|
|
|
let Inst{6-5} = 0b00;
|
|
|
|
let Inst{3-0} = 0b0000;
|
[ARM] Make fullfp16 instructions not conditionalisable.
More or less all the instructions defined in the v8.2a full-fp16
extension are defined as UNPREDICTABLE if you put them in an IT block
(Thumb) or use with any condition other than AL (ARM). LLVM didn't
know that, and was happy to conditionalise them.
In order to force these instructions to count as not predicable, I had
to make a small Tablegen change. The code generation back end mostly
decides if an instruction was predicable by looking for something it
can identify as a predicate operand; there's an isPredicable bit flag
that overrides that check in the positive direction, but nothing that
overrides it in the negative direction.
(I considered the alternative approach of actually removing the
predicate operand from those instructions, but thought that it would
be more painful overall for instructions differing only in data type
to have different shapes of operand list. This way, the only code that
has to notice the difference is the if-converter.)
So I've added an isUnpredicable bit alongside isPredicable, and set
that bit on the right subset of FP16 instructions, and also on the
VSEL, VMAXNM/VMINNM and VRINT[ANPM] families which should be
unpredicable for all data types.
I've included a couple of representative regression tests, both of
which previously caused an fp16 instruction to be conditionalised in
ARM state and (with -arm-no-restrict-it) to be put in an IT block in
Thumb.
Reviewers: SjoerdMeijer, t.p.northover, efriedma
Reviewed By: efriedma
Subscribers: jdoerfert, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57823
llvm-svn: 354768
2019-02-25 18:39:53 +08:00
|
|
|
|
|
|
|
let isUnpredicable = 1;
|
2016-01-25 18:26:26 +08:00
|
|
|
}
|
|
|
|
|
2007-01-19 15:51:42 +08:00
|
|
|
// FMRDH: SPR -> GPR
|
|
|
|
// FMRDL: SPR -> GPR
|
|
|
|
// FMRRS: SPR -> GPR
|
2010-10-14 04:58:46 +08:00
|
|
|
// FMRX: SPR system reg -> GPR
|
2007-01-19 15:51:42 +08:00
|
|
|
// FMSRR: GPR -> SPR
|
2010-10-14 04:58:46 +08:00
|
|
|
// FMXR: GPR -> VFP system reg
|
2007-01-19 15:51:42 +08:00
|
|
|
|
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
// Int -> FP:
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
class AVConv1IDs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
|
|
|
|
bits<4> opcod4, dag oops, dag iops,
|
|
|
|
InstrItinClass itin, string opc, string asm,
|
|
|
|
list<dag> pattern>
|
|
|
|
: AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
|
|
|
|
pattern> {
|
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Dd;
|
|
|
|
bits<5> Sm;
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{3-0} = Sm{4-1};
|
|
|
|
let Inst{5} = Sm{0};
|
|
|
|
let Inst{15-12} = Dd{3-0};
|
|
|
|
let Inst{22} = Dd{4};
|
2013-10-24 23:49:39 +08:00
|
|
|
|
|
|
|
let Predicates = [HasVFP2, HasDPVFP];
|
2010-10-14 04:58:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
|
|
|
|
bits<4> opcod4, dag oops, dag iops,InstrItinClass itin,
|
|
|
|
string opc, string asm, list<dag> pattern>
|
|
|
|
: AVConv1In<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
|
|
|
|
pattern> {
|
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Sd;
|
|
|
|
bits<5> Sm;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{3-0} = Sm{4-1};
|
|
|
|
let Inst{5} = Sm{0};
|
|
|
|
let Inst{15-12} = Sd{4-1};
|
|
|
|
let Inst{22} = Sd{0};
|
|
|
|
}
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
class AVConv1IHs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
|
|
|
|
bits<4> opcod4, dag oops, dag iops,
|
|
|
|
InstrItinClass itin, string opc, string asm,
|
|
|
|
list<dag> pattern>
|
|
|
|
: AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
|
|
|
|
pattern> {
|
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Sd;
|
|
|
|
bits<5> Sm;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{3-0} = Sm{4-1};
|
|
|
|
let Inst{5} = Sm{0};
|
|
|
|
let Inst{15-12} = Sd{4-1};
|
|
|
|
let Inst{22} = Sd{0};
|
|
|
|
|
|
|
|
let Predicates = [HasFullFP16];
|
|
|
|
}
|
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
|
|
|
|
(outs DPR:$Dd), (ins SPR:$Sm),
|
|
|
|
IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2010-01-30 07:21:10 +08:00
|
|
|
let Inst{7} = 1; // s32
|
2008-11-12 03:40:26 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2015-03-24 00:15:16 +08:00
|
|
|
let Predicates=[HasVFP2, HasDPVFP] in {
|
|
|
|
def : VFPPat<(f64 (sint_to_fp GPR:$a)),
|
|
|
|
(VSITOD (COPY_TO_REGCLASS GPR:$a, SPR))>;
|
|
|
|
|
2015-10-27 05:32:53 +08:00
|
|
|
def : VFPPat<(f64 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))),
|
2015-03-24 00:15:16 +08:00
|
|
|
(VSITOD (VLDRS addrmode5:$a))>;
|
|
|
|
}
|
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
|
|
|
|
(outs SPR:$Sd),(ins SPR:$Sm),
|
|
|
|
IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2010-01-30 07:21:10 +08:00
|
|
|
let Inst{7} = 1; // s32
|
2011-02-16 08:35:02 +08:00
|
|
|
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2008-11-12 03:40:26 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2015-03-24 00:15:16 +08:00
|
|
|
def : VFPNoNEONPat<(f32 (sint_to_fp GPR:$a)),
|
|
|
|
(VSITOS (COPY_TO_REGCLASS GPR:$a, SPR))>;
|
|
|
|
|
2015-10-27 05:32:53 +08:00
|
|
|
def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))),
|
2015-03-24 00:15:16 +08:00
|
|
|
(VSITOS (VLDRS addrmode5:$a))>;
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VSITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001,
|
2018-02-06 16:43:56 +08:00
|
|
|
(outs HPR:$Sd), (ins SPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpCVTIH, "vcvt", ".f16.s32\t$Sd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2016-01-25 18:26:26 +08:00
|
|
|
let Inst{7} = 1; // s32
|
[ARM] Make fullfp16 instructions not conditionalisable.
More or less all the instructions defined in the v8.2a full-fp16
extension are defined as UNPREDICTABLE if you put them in an IT block
(Thumb) or use with any condition other than AL (ARM). LLVM didn't
know that, and was happy to conditionalise them.
In order to force these instructions to count as not predicable, I had
to make a small Tablegen change. The code generation back end mostly
decides if an instruction was predicable by looking for something it
can identify as a predicate operand; there's an isPredicable bit flag
that overrides that check in the positive direction, but nothing that
overrides it in the negative direction.
(I considered the alternative approach of actually removing the
predicate operand from those instructions, but thought that it would
be more painful overall for instructions differing only in data type
to have different shapes of operand list. This way, the only code that
has to notice the difference is the if-converter.)
So I've added an isUnpredicable bit alongside isPredicable, and set
that bit on the right subset of FP16 instructions, and also on the
VSEL, VMAXNM/VMINNM and VRINT[ANPM] families which should be
unpredicable for all data types.
I've included a couple of representative regression tests, both of
which previously caused an fp16 instruction to be conditionalised in
ARM state and (with -arm-no-restrict-it) to be put in an IT block in
Thumb.
Reviewers: SjoerdMeijer, t.p.northover, efriedma
Reviewed By: efriedma
Subscribers: jdoerfert, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57823
llvm-svn: 354768
2019-02-25 18:39:53 +08:00
|
|
|
let isUnpredicable = 1;
|
2016-01-25 18:26:26 +08:00
|
|
|
}
|
|
|
|
|
2018-02-06 16:43:56 +08:00
|
|
|
def : VFPNoNEONPat<(f16 (sint_to_fp GPR:$a)),
|
|
|
|
(VSITOH (COPY_TO_REGCLASS GPR:$a, SPR))>;
|
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
|
|
|
|
(outs DPR:$Dd), (ins SPR:$Sm),
|
|
|
|
IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2010-01-30 07:21:10 +08:00
|
|
|
let Inst{7} = 0; // u32
|
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2015-03-24 00:15:16 +08:00
|
|
|
let Predicates=[HasVFP2, HasDPVFP] in {
|
|
|
|
def : VFPPat<(f64 (uint_to_fp GPR:$a)),
|
|
|
|
(VUITOD (COPY_TO_REGCLASS GPR:$a, SPR))>;
|
|
|
|
|
2015-10-27 05:32:53 +08:00
|
|
|
def : VFPPat<(f64 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))),
|
2015-03-24 00:15:16 +08:00
|
|
|
(VUITOD (VLDRS addrmode5:$a))>;
|
|
|
|
}
|
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2010-01-30 07:21:10 +08:00
|
|
|
let Inst{7} = 0; // u32
|
2011-02-16 08:35:02 +08:00
|
|
|
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2010-01-30 07:21:10 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2015-03-24 00:15:16 +08:00
|
|
|
def : VFPNoNEONPat<(f32 (uint_to_fp GPR:$a)),
|
|
|
|
(VUITOS (COPY_TO_REGCLASS GPR:$a, SPR))>;
|
|
|
|
|
2015-10-27 05:32:53 +08:00
|
|
|
def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))),
|
2015-03-24 00:15:16 +08:00
|
|
|
(VUITOS (VLDRS addrmode5:$a))>;
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VUITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001,
|
2018-02-06 16:43:56 +08:00
|
|
|
(outs HPR:$Sd), (ins SPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpCVTIH, "vcvt", ".f16.u32\t$Sd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2016-01-25 18:26:26 +08:00
|
|
|
let Inst{7} = 0; // u32
|
[ARM] Make fullfp16 instructions not conditionalisable.
More or less all the instructions defined in the v8.2a full-fp16
extension are defined as UNPREDICTABLE if you put them in an IT block
(Thumb) or use with any condition other than AL (ARM). LLVM didn't
know that, and was happy to conditionalise them.
In order to force these instructions to count as not predicable, I had
to make a small Tablegen change. The code generation back end mostly
decides if an instruction was predicable by looking for something it
can identify as a predicate operand; there's an isPredicable bit flag
that overrides that check in the positive direction, but nothing that
overrides it in the negative direction.
(I considered the alternative approach of actually removing the
predicate operand from those instructions, but thought that it would
be more painful overall for instructions differing only in data type
to have different shapes of operand list. This way, the only code that
has to notice the difference is the if-converter.)
So I've added an isUnpredicable bit alongside isPredicable, and set
that bit on the right subset of FP16 instructions, and also on the
VSEL, VMAXNM/VMINNM and VRINT[ANPM] families which should be
unpredicable for all data types.
I've included a couple of representative regression tests, both of
which previously caused an fp16 instruction to be conditionalised in
ARM state and (with -arm-no-restrict-it) to be put in an IT block in
Thumb.
Reviewers: SjoerdMeijer, t.p.northover, efriedma
Reviewed By: efriedma
Subscribers: jdoerfert, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57823
llvm-svn: 354768
2019-02-25 18:39:53 +08:00
|
|
|
let isUnpredicable = 1;
|
2016-01-25 18:26:26 +08:00
|
|
|
}
|
|
|
|
|
2018-02-06 16:43:56 +08:00
|
|
|
def : VFPNoNEONPat<(f16 (uint_to_fp GPR:$a)),
|
|
|
|
(VUITOH (COPY_TO_REGCLASS GPR:$a, SPR))>;
|
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
// FP -> Int:
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
class AVConv1IsD_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
|
|
|
|
bits<4> opcod4, dag oops, dag iops,
|
|
|
|
InstrItinClass itin, string opc, string asm,
|
|
|
|
list<dag> pattern>
|
|
|
|
: AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
|
|
|
|
pattern> {
|
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Sd;
|
|
|
|
bits<5> Dm;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{3-0} = Dm{3-0};
|
|
|
|
let Inst{5} = Dm{4};
|
|
|
|
let Inst{15-12} = Sd{4-1};
|
|
|
|
let Inst{22} = Sd{0};
|
2013-10-24 23:49:39 +08:00
|
|
|
|
|
|
|
let Predicates = [HasVFP2, HasDPVFP];
|
2010-10-14 04:58:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
|
|
|
|
bits<4> opcod4, dag oops, dag iops,
|
|
|
|
InstrItinClass itin, string opc, string asm,
|
|
|
|
list<dag> pattern>
|
|
|
|
: AVConv1In<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
|
|
|
|
pattern> {
|
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Sd;
|
|
|
|
bits<5> Sm;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{3-0} = Sm{4-1};
|
|
|
|
let Inst{5} = Sm{0};
|
|
|
|
let Inst{15-12} = Sd{4-1};
|
|
|
|
let Inst{22} = Sd{0};
|
|
|
|
}
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
class AVConv1IsH_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
|
|
|
|
bits<4> opcod4, dag oops, dag iops,
|
|
|
|
InstrItinClass itin, string opc, string asm,
|
|
|
|
list<dag> pattern>
|
|
|
|
: AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
|
|
|
|
pattern> {
|
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Sd;
|
|
|
|
bits<5> Sm;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{3-0} = Sm{4-1};
|
|
|
|
let Inst{5} = Sm{0};
|
|
|
|
let Inst{15-12} = Sd{4-1};
|
|
|
|
let Inst{22} = Sd{0};
|
|
|
|
|
|
|
|
let Predicates = [HasFullFP16];
|
|
|
|
}
|
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
// Always set Z bit in the instruction, i.e. "round towards zero" variants.
|
|
|
|
def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
|
|
|
|
(outs SPR:$Sd), (ins DPR:$Dm),
|
|
|
|
IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2008-11-12 03:40:26 +08:00
|
|
|
let Inst{7} = 1; // Z bit
|
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2015-03-24 00:15:16 +08:00
|
|
|
let Predicates=[HasVFP2, HasDPVFP] in {
|
|
|
|
def : VFPPat<(i32 (fp_to_sint (f64 DPR:$a))),
|
|
|
|
(COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>;
|
2017-09-26 06:07:33 +08:00
|
|
|
|
|
|
|
def : VFPPat<(alignedstore32 (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr),
|
|
|
|
(VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>;
|
2015-03-24 00:15:16 +08:00
|
|
|
}
|
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2008-11-12 03:40:26 +08:00
|
|
|
let Inst{7} = 1; // Z bit
|
2011-02-16 08:35:02 +08:00
|
|
|
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2008-11-12 03:40:26 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2015-03-24 00:15:16 +08:00
|
|
|
def : VFPNoNEONPat<(i32 (fp_to_sint SPR:$a)),
|
|
|
|
(COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>;
|
|
|
|
|
2017-09-26 06:07:33 +08:00
|
|
|
def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))),
|
|
|
|
addrmode5:$ptr),
|
|
|
|
(VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>;
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
|
2018-02-06 16:43:56 +08:00
|
|
|
(outs SPR:$Sd), (ins HPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpCVTHI, "vcvt", ".s32.f16\t$Sd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2016-01-25 18:26:26 +08:00
|
|
|
let Inst{7} = 1; // Z bit
|
[ARM] Make fullfp16 instructions not conditionalisable.
More or less all the instructions defined in the v8.2a full-fp16
extension are defined as UNPREDICTABLE if you put them in an IT block
(Thumb) or use with any condition other than AL (ARM). LLVM didn't
know that, and was happy to conditionalise them.
In order to force these instructions to count as not predicable, I had
to make a small Tablegen change. The code generation back end mostly
decides if an instruction was predicable by looking for something it
can identify as a predicate operand; there's an isPredicable bit flag
that overrides that check in the positive direction, but nothing that
overrides it in the negative direction.
(I considered the alternative approach of actually removing the
predicate operand from those instructions, but thought that it would
be more painful overall for instructions differing only in data type
to have different shapes of operand list. This way, the only code that
has to notice the difference is the if-converter.)
So I've added an isUnpredicable bit alongside isPredicable, and set
that bit on the right subset of FP16 instructions, and also on the
VSEL, VMAXNM/VMINNM and VRINT[ANPM] families which should be
unpredicable for all data types.
I've included a couple of representative regression tests, both of
which previously caused an fp16 instruction to be conditionalised in
ARM state and (with -arm-no-restrict-it) to be put in an IT block in
Thumb.
Reviewers: SjoerdMeijer, t.p.northover, efriedma
Reviewed By: efriedma
Subscribers: jdoerfert, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57823
llvm-svn: 354768
2019-02-25 18:39:53 +08:00
|
|
|
let isUnpredicable = 1;
|
2016-01-25 18:26:26 +08:00
|
|
|
}
|
|
|
|
|
2018-02-06 16:43:56 +08:00
|
|
|
def : VFPNoNEONPat<(i32 (fp_to_sint HPR:$a)),
|
|
|
|
(COPY_TO_REGCLASS (VTOSIZH HPR:$a), GPR)>;
|
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
|
|
|
|
(outs SPR:$Sd), (ins DPR:$Dm),
|
|
|
|
IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2008-11-12 03:40:26 +08:00
|
|
|
let Inst{7} = 1; // Z bit
|
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2015-03-24 00:15:16 +08:00
|
|
|
let Predicates=[HasVFP2, HasDPVFP] in {
|
|
|
|
def : VFPPat<(i32 (fp_to_uint (f64 DPR:$a))),
|
|
|
|
(COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>;
|
2017-09-26 06:07:33 +08:00
|
|
|
|
|
|
|
def : VFPPat<(alignedstore32 (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr),
|
|
|
|
(VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>;
|
2015-03-24 00:15:16 +08:00
|
|
|
}
|
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2008-11-12 03:40:26 +08:00
|
|
|
let Inst{7} = 1; // Z bit
|
2011-02-16 08:35:02 +08:00
|
|
|
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2008-11-12 03:40:26 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2015-03-24 00:15:16 +08:00
|
|
|
def : VFPNoNEONPat<(i32 (fp_to_uint SPR:$a)),
|
|
|
|
(COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>;
|
|
|
|
|
2017-09-26 06:07:33 +08:00
|
|
|
def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))),
|
|
|
|
addrmode5:$ptr),
|
|
|
|
(VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>;
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
|
2018-02-06 16:43:56 +08:00
|
|
|
(outs SPR:$Sd), (ins HPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpCVTHI, "vcvt", ".u32.f16\t$Sd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2016-01-25 18:26:26 +08:00
|
|
|
let Inst{7} = 1; // Z bit
|
[ARM] Make fullfp16 instructions not conditionalisable.
More or less all the instructions defined in the v8.2a full-fp16
extension are defined as UNPREDICTABLE if you put them in an IT block
(Thumb) or use with any condition other than AL (ARM). LLVM didn't
know that, and was happy to conditionalise them.
In order to force these instructions to count as not predicable, I had
to make a small Tablegen change. The code generation back end mostly
decides if an instruction was predicable by looking for something it
can identify as a predicate operand; there's an isPredicable bit flag
that overrides that check in the positive direction, but nothing that
overrides it in the negative direction.
(I considered the alternative approach of actually removing the
predicate operand from those instructions, but thought that it would
be more painful overall for instructions differing only in data type
to have different shapes of operand list. This way, the only code that
has to notice the difference is the if-converter.)
So I've added an isUnpredicable bit alongside isPredicable, and set
that bit on the right subset of FP16 instructions, and also on the
VSEL, VMAXNM/VMINNM and VRINT[ANPM] families which should be
unpredicable for all data types.
I've included a couple of representative regression tests, both of
which previously caused an fp16 instruction to be conditionalised in
ARM state and (with -arm-no-restrict-it) to be put in an IT block in
Thumb.
Reviewers: SjoerdMeijer, t.p.northover, efriedma
Reviewed By: efriedma
Subscribers: jdoerfert, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57823
llvm-svn: 354768
2019-02-25 18:39:53 +08:00
|
|
|
let isUnpredicable = 1;
|
2016-01-25 18:26:26 +08:00
|
|
|
}
|
|
|
|
|
2018-02-06 16:43:56 +08:00
|
|
|
def : VFPNoNEONPat<(i32 (fp_to_uint HPR:$a)),
|
|
|
|
(COPY_TO_REGCLASS (VTOUIZH HPR:$a), GPR)>;
|
|
|
|
|
2010-02-09 06:02:41 +08:00
|
|
|
// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
|
2010-08-04 05:31:55 +08:00
|
|
|
let Uses = [FPSCR] in {
|
2010-10-14 04:58:46 +08:00
|
|
|
def VTOSIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
|
|
|
|
(outs SPR:$Sd), (ins DPR:$Dm),
|
|
|
|
IIC_fpCVTDI, "vcvtr", ".s32.f64\t$Sd, $Dm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2010-02-09 06:02:41 +08:00
|
|
|
let Inst{7} = 0; // Z bit
|
|
|
|
}
|
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
def VTOSIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpCVTSI, "vcvtr", ".s32.f32\t$Sd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2010-02-09 06:02:41 +08:00
|
|
|
let Inst{7} = 0; // Z bit
|
|
|
|
}
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VTOSIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpCVTHI, "vcvtr", ".s32.f16\t$Sd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2016-01-25 18:26:26 +08:00
|
|
|
let Inst{7} = 0; // Z bit
|
[ARM] Make fullfp16 instructions not conditionalisable.
More or less all the instructions defined in the v8.2a full-fp16
extension are defined as UNPREDICTABLE if you put them in an IT block
(Thumb) or use with any condition other than AL (ARM). LLVM didn't
know that, and was happy to conditionalise them.
In order to force these instructions to count as not predicable, I had
to make a small Tablegen change. The code generation back end mostly
decides if an instruction was predicable by looking for something it
can identify as a predicate operand; there's an isPredicable bit flag
that overrides that check in the positive direction, but nothing that
overrides it in the negative direction.
(I considered the alternative approach of actually removing the
predicate operand from those instructions, but thought that it would
be more painful overall for instructions differing only in data type
to have different shapes of operand list. This way, the only code that
has to notice the difference is the if-converter.)
So I've added an isUnpredicable bit alongside isPredicable, and set
that bit on the right subset of FP16 instructions, and also on the
VSEL, VMAXNM/VMINNM and VRINT[ANPM] families which should be
unpredicable for all data types.
I've included a couple of representative regression tests, both of
which previously caused an fp16 instruction to be conditionalised in
ARM state and (with -arm-no-restrict-it) to be put in an IT block in
Thumb.
Reviewers: SjoerdMeijer, t.p.northover, efriedma
Reviewed By: efriedma
Subscribers: jdoerfert, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57823
llvm-svn: 354768
2019-02-25 18:39:53 +08:00
|
|
|
let isUnpredicable = 1;
|
2016-01-25 18:26:26 +08:00
|
|
|
}
|
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
|
|
|
|
(outs SPR:$Sd), (ins DPR:$Dm),
|
|
|
|
IIC_fpCVTDI, "vcvtr", ".u32.f64\t$Sd, $Dm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2010-02-09 06:02:41 +08:00
|
|
|
let Inst{7} = 0; // Z bit
|
|
|
|
}
|
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpCVTSI, "vcvtr", ".u32.f32\t$Sd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2010-02-09 06:02:41 +08:00
|
|
|
let Inst{7} = 0; // Z bit
|
|
|
|
}
|
2016-01-25 18:26:26 +08:00
|
|
|
|
|
|
|
def VTOUIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpCVTHI, "vcvtr", ".u32.f16\t$Sd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2016-01-25 18:26:26 +08:00
|
|
|
let Inst{7} = 0; // Z bit
|
[ARM] Make fullfp16 instructions not conditionalisable.
More or less all the instructions defined in the v8.2a full-fp16
extension are defined as UNPREDICTABLE if you put them in an IT block
(Thumb) or use with any condition other than AL (ARM). LLVM didn't
know that, and was happy to conditionalise them.
In order to force these instructions to count as not predicable, I had
to make a small Tablegen change. The code generation back end mostly
decides if an instruction was predicable by looking for something it
can identify as a predicate operand; there's an isPredicable bit flag
that overrides that check in the positive direction, but nothing that
overrides it in the negative direction.
(I considered the alternative approach of actually removing the
predicate operand from those instructions, but thought that it would
be more painful overall for instructions differing only in data type
to have different shapes of operand list. This way, the only code that
has to notice the difference is the if-converter.)
So I've added an isUnpredicable bit alongside isPredicable, and set
that bit on the right subset of FP16 instructions, and also on the
VSEL, VMAXNM/VMINNM and VRINT[ANPM] families which should be
unpredicable for all data types.
I've included a couple of representative regression tests, both of
which previously caused an fp16 instruction to be conditionalised in
ARM state and (with -arm-no-restrict-it) to be put in an IT block in
Thumb.
Reviewers: SjoerdMeijer, t.p.northover, efriedma
Reviewed By: efriedma
Subscribers: jdoerfert, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57823
llvm-svn: 354768
2019-02-25 18:39:53 +08:00
|
|
|
let isUnpredicable = 1;
|
2016-01-25 18:26:26 +08:00
|
|
|
}
|
2010-08-04 05:31:55 +08:00
|
|
|
}
|
2010-02-09 06:02:41 +08:00
|
|
|
|
2017-08-22 19:08:21 +08:00
|
|
|
// v8.3-a Javascript Convert to Signed fixed-point
|
|
|
|
def VJCVT : AVConv1IsD_Encode<0b11101, 0b11, 0b1001, 0b1011,
|
|
|
|
(outs SPR:$Sd), (ins DPR:$Dm),
|
|
|
|
IIC_fpCVTDI, "vjcvt", ".s32.f64\t$Sd, $Dm",
|
|
|
|
[]>,
|
|
|
|
Requires<[HasFPARMv8, HasV8_3a]> {
|
|
|
|
let Inst{7} = 1; // Z bit
|
|
|
|
}
|
|
|
|
|
2010-02-12 02:17:16 +08:00
|
|
|
// Convert between floating-point and fixed-point
|
|
|
|
// Data type for fixed-point naming convention:
|
|
|
|
// S16 (U=0, sx=0) -> SH
|
|
|
|
// U16 (U=1, sx=0) -> UH
|
|
|
|
// S32 (U=0, sx=1) -> SL
|
|
|
|
// U32 (U=1, sx=1) -> UL
|
|
|
|
|
2011-12-23 03:45:01 +08:00
|
|
|
let Constraints = "$a = $dst" in {
|
2010-02-12 02:17:16 +08:00
|
|
|
|
|
|
|
// FP to Fixed-Point:
|
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
// Single Precision register
|
2012-04-24 06:04:10 +08:00
|
|
|
class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
|
|
|
|
bit op5, dag oops, dag iops, InstrItinClass itin,
|
|
|
|
string opc, string asm, list<dag> pattern>
|
2017-01-24 04:20:39 +08:00
|
|
|
: AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
|
2012-03-16 01:50:29 +08:00
|
|
|
bits<5> dst;
|
|
|
|
// if dp_operation then UInt(D:Vd) else UInt(Vd:D);
|
|
|
|
let Inst{22} = dst{0};
|
|
|
|
let Inst{15-12} = dst{4-1};
|
|
|
|
}
|
|
|
|
|
|
|
|
// Double Precision register
|
2012-04-24 06:04:10 +08:00
|
|
|
class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
|
|
|
|
bit op5, dag oops, dag iops, InstrItinClass itin,
|
|
|
|
string opc, string asm, list<dag> pattern>
|
2017-01-24 04:20:39 +08:00
|
|
|
: AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
|
2012-03-16 01:50:29 +08:00
|
|
|
bits<5> dst;
|
|
|
|
// if dp_operation then UInt(D:Vd) else UInt(Vd:D);
|
|
|
|
let Inst{22} = dst{4};
|
|
|
|
let Inst{15-12} = dst{3-0};
|
2013-10-24 23:49:39 +08:00
|
|
|
|
|
|
|
let Predicates = [HasVFP2, HasDPVFP];
|
2012-03-16 01:50:29 +08:00
|
|
|
}
|
|
|
|
|
[ARM] Make fullfp16 instructions not conditionalisable.
More or less all the instructions defined in the v8.2a full-fp16
extension are defined as UNPREDICTABLE if you put them in an IT block
(Thumb) or use with any condition other than AL (ARM). LLVM didn't
know that, and was happy to conditionalise them.
In order to force these instructions to count as not predicable, I had
to make a small Tablegen change. The code generation back end mostly
decides if an instruction was predicable by looking for something it
can identify as a predicate operand; there's an isPredicable bit flag
that overrides that check in the positive direction, but nothing that
overrides it in the negative direction.
(I considered the alternative approach of actually removing the
predicate operand from those instructions, but thought that it would
be more painful overall for instructions differing only in data type
to have different shapes of operand list. This way, the only code that
has to notice the difference is the if-converter.)
So I've added an isUnpredicable bit alongside isPredicable, and set
that bit on the right subset of FP16 instructions, and also on the
VSEL, VMAXNM/VMINNM and VRINT[ANPM] families which should be
unpredicable for all data types.
I've included a couple of representative regression tests, both of
which previously caused an fp16 instruction to be conditionalised in
ARM state and (with -arm-no-restrict-it) to be put in an IT block in
Thumb.
Reviewers: SjoerdMeijer, t.p.northover, efriedma
Reviewed By: efriedma
Subscribers: jdoerfert, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57823
llvm-svn: 354768
2019-02-25 18:39:53 +08:00
|
|
|
let isUnpredicable = 1 in {
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VTOSHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 0,
|
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
|
|
|
|
IIC_fpCVTHI, "vcvt", ".s16.f16\t$dst, $a, $fbits", []>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasFullFP16]>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
|
|
|
def VTOUHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 0,
|
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
|
|
|
|
IIC_fpCVTHI, "vcvt", ".u16.f16\t$dst, $a, $fbits", []>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasFullFP16]>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
|
|
|
def VTOSLH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 1,
|
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
|
|
|
|
IIC_fpCVTHI, "vcvt", ".s32.f16\t$dst, $a, $fbits", []>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasFullFP16]>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
|
|
|
def VTOULH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 1,
|
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
|
|
|
|
IIC_fpCVTHI, "vcvt", ".u32.f16\t$dst, $a, $fbits", []>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasFullFP16]>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
[ARM] Make fullfp16 instructions not conditionalisable.
More or less all the instructions defined in the v8.2a full-fp16
extension are defined as UNPREDICTABLE if you put them in an IT block
(Thumb) or use with any condition other than AL (ARM). LLVM didn't
know that, and was happy to conditionalise them.
In order to force these instructions to count as not predicable, I had
to make a small Tablegen change. The code generation back end mostly
decides if an instruction was predicable by looking for something it
can identify as a predicate operand; there's an isPredicable bit flag
that overrides that check in the positive direction, but nothing that
overrides it in the negative direction.
(I considered the alternative approach of actually removing the
predicate operand from those instructions, but thought that it would
be more painful overall for instructions differing only in data type
to have different shapes of operand list. This way, the only code that
has to notice the difference is the if-converter.)
So I've added an isUnpredicable bit alongside isPredicable, and set
that bit on the right subset of FP16 instructions, and also on the
VSEL, VMAXNM/VMINNM and VRINT[ANPM] families which should be
unpredicable for all data types.
I've included a couple of representative regression tests, both of
which previously caused an fp16 instruction to be conditionalised in
ARM state and (with -arm-no-restrict-it) to be put in an IT block in
Thumb.
Reviewers: SjoerdMeijer, t.p.northover, efriedma
Reviewed By: efriedma
Subscribers: jdoerfert, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57823
llvm-svn: 354768
2019-02-25 18:39:53 +08:00
|
|
|
} // End of 'let isUnpredicable = 1 in'
|
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
|
2017-01-24 04:20:39 +08:00
|
|
|
IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VTOUHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 0,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
|
2019-09-29 16:38:48 +08:00
|
|
|
IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VTOSLS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 1,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
|
2019-09-29 16:38:48 +08:00
|
|
|
IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VTOULS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 1,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
|
2019-09-29 16:38:48 +08:00
|
|
|
IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VTOSHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 0,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
|
2017-01-24 04:20:39 +08:00
|
|
|
IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VTOUHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 0,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
|
2017-01-24 04:20:39 +08:00
|
|
|
IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VTOSLD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 1,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
|
2017-01-24 04:20:39 +08:00
|
|
|
IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VTOULD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 1,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
|
2017-01-24 04:20:39 +08:00
|
|
|
IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2010-02-12 02:17:16 +08:00
|
|
|
|
|
|
|
// Fixed-Point to FP:
|
|
|
|
|
[ARM] Make fullfp16 instructions not conditionalisable.
More or less all the instructions defined in the v8.2a full-fp16
extension are defined as UNPREDICTABLE if you put them in an IT block
(Thumb) or use with any condition other than AL (ARM). LLVM didn't
know that, and was happy to conditionalise them.
In order to force these instructions to count as not predicable, I had
to make a small Tablegen change. The code generation back end mostly
decides if an instruction was predicable by looking for something it
can identify as a predicate operand; there's an isPredicable bit flag
that overrides that check in the positive direction, but nothing that
overrides it in the negative direction.
(I considered the alternative approach of actually removing the
predicate operand from those instructions, but thought that it would
be more painful overall for instructions differing only in data type
to have different shapes of operand list. This way, the only code that
has to notice the difference is the if-converter.)
So I've added an isUnpredicable bit alongside isPredicable, and set
that bit on the right subset of FP16 instructions, and also on the
VSEL, VMAXNM/VMINNM and VRINT[ANPM] families which should be
unpredicable for all data types.
I've included a couple of representative regression tests, both of
which previously caused an fp16 instruction to be conditionalised in
ARM state and (with -arm-no-restrict-it) to be put in an IT block in
Thumb.
Reviewers: SjoerdMeijer, t.p.northover, efriedma
Reviewed By: efriedma
Subscribers: jdoerfert, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57823
llvm-svn: 354768
2019-02-25 18:39:53 +08:00
|
|
|
let isUnpredicable = 1 in {
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VSHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 0,
|
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
|
|
|
|
IIC_fpCVTIH, "vcvt", ".f16.s16\t$dst, $a, $fbits", []>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasFullFP16]>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
|
|
|
def VUHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 0,
|
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
|
|
|
|
IIC_fpCVTIH, "vcvt", ".f16.u16\t$dst, $a, $fbits", []>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasFullFP16]>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
|
|
|
def VSLTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 1,
|
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
|
|
|
|
IIC_fpCVTIH, "vcvt", ".f16.s32\t$dst, $a, $fbits", []>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasFullFP16]>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
|
|
|
def VULTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 1,
|
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
|
|
|
|
IIC_fpCVTIH, "vcvt", ".f16.u32\t$dst, $a, $fbits", []>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasFullFP16]>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
[ARM] Make fullfp16 instructions not conditionalisable.
More or less all the instructions defined in the v8.2a full-fp16
extension are defined as UNPREDICTABLE if you put them in an IT block
(Thumb) or use with any condition other than AL (ARM). LLVM didn't
know that, and was happy to conditionalise them.
In order to force these instructions to count as not predicable, I had
to make a small Tablegen change. The code generation back end mostly
decides if an instruction was predicable by looking for something it
can identify as a predicate operand; there's an isPredicable bit flag
that overrides that check in the positive direction, but nothing that
overrides it in the negative direction.
(I considered the alternative approach of actually removing the
predicate operand from those instructions, but thought that it would
be more painful overall for instructions differing only in data type
to have different shapes of operand list. This way, the only code that
has to notice the difference is the if-converter.)
So I've added an isUnpredicable bit alongside isPredicable, and set
that bit on the right subset of FP16 instructions, and also on the
VSEL, VMAXNM/VMINNM and VRINT[ANPM] families which should be
unpredicable for all data types.
I've included a couple of representative regression tests, both of
which previously caused an fp16 instruction to be conditionalised in
ARM state and (with -arm-no-restrict-it) to be put in an IT block in
Thumb.
Reviewers: SjoerdMeijer, t.p.northover, efriedma
Reviewed By: efriedma
Subscribers: jdoerfert, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57823
llvm-svn: 354768
2019-02-25 18:39:53 +08:00
|
|
|
} // End of 'let isUnpredicable = 1 in'
|
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
|
2017-01-24 04:20:39 +08:00
|
|
|
IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VUHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 0,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
|
2017-01-24 04:20:39 +08:00
|
|
|
IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VSLTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 1,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
|
2017-01-24 04:20:39 +08:00
|
|
|
IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VULTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 1,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
|
2017-01-24 04:20:39 +08:00
|
|
|
IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VSHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 0,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
|
2017-01-24 04:20:39 +08:00
|
|
|
IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VUHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 0,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
|
2017-01-24 04:20:39 +08:00
|
|
|
IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VSLTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 1,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
|
2017-01-24 04:20:39 +08:00
|
|
|
IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
|
2017-01-24 04:20:39 +08:00
|
|
|
IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2011-12-23 03:45:01 +08:00
|
|
|
} // End of 'let Constraints = "$a = $dst" in'
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2007-01-19 15:51:42 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
2011-07-07 16:28:52 +08:00
|
|
|
// FP Multiply-Accumulate Operations.
|
2007-01-19 15:51:42 +08:00
|
|
|
//
|
|
|
|
|
2010-11-13 04:32:20 +08:00
|
|
|
def VMLAD : ADbI<0b11100, 0b00, 0, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
|
|
|
IIC_fpMAC64, "vmla", ".f64\t$Dd, $Dn, $Dm",
|
2010-12-06 06:04:16 +08:00
|
|
|
[(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
|
|
|
|
(f64 DPR:$Ddin)))]>,
|
2010-11-13 04:32:20 +08:00
|
|
|
RegConstraint<"$Ddin = $Dd">,
|
2018-10-17 15:26:35 +08:00
|
|
|
Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
2010-10-14 09:02:08 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpMAC32, "vmla", ".f32\t$Sd, $Sn, $Sm",
|
2010-12-06 06:04:16 +08:00
|
|
|
[(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
|
|
|
|
SPR:$Sdin))]>,
|
2010-11-13 04:32:20 +08:00
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
2018-10-17 15:26:35 +08:00
|
|
|
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-10-14 09:02:08 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VMLAH : AHbI<0b11100, 0b00, 0, 0,
|
2018-02-06 16:43:56 +08:00
|
|
|
(outs HPR:$Sd), (ins HPR:$Sdin, HPR:$Sn, HPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpMAC16, "vmla", ".f16\t$Sd, $Sn, $Sm",
|
2018-02-06 16:43:56 +08:00
|
|
|
[(set HPR:$Sd, (fadd_mlx (fmul_su HPR:$Sn, HPR:$Sm),
|
|
|
|
HPR:$Sdin))]>,
|
2016-01-25 18:26:26 +08:00
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
2018-10-17 15:26:35 +08:00
|
|
|
Requires<[HasFullFP16,UseFPVMLx]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2010-12-06 06:04:16 +08:00
|
|
|
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
|
2010-11-13 04:32:20 +08:00
|
|
|
(VMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
|
2018-10-17 15:26:35 +08:00
|
|
|
Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>;
|
2010-12-06 06:04:16 +08:00
|
|
|
def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
|
2010-11-13 04:32:20 +08:00
|
|
|
(VMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
|
2018-10-17 15:26:35 +08:00
|
|
|
Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx]>;
|
2018-02-06 16:43:56 +08:00
|
|
|
def : Pat<(fadd_mlx HPR:$dstin, (fmul_su HPR:$a, HPR:$b)),
|
|
|
|
(VMLAH HPR:$dstin, HPR:$a, HPR:$b)>,
|
2018-10-17 15:26:35 +08:00
|
|
|
Requires<[HasFullFP16,DontUseNEONForFP, UseFPVMLx]>;
|
2018-02-06 16:43:56 +08:00
|
|
|
|
2010-10-14 09:02:08 +08:00
|
|
|
|
2010-11-13 04:32:20 +08:00
|
|
|
def VMLSD : ADbI<0b11100, 0b00, 1, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
|
|
|
IIC_fpMAC64, "vmls", ".f64\t$Dd, $Dn, $Dm",
|
2010-12-06 06:04:16 +08:00
|
|
|
[(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
|
|
|
|
(f64 DPR:$Ddin)))]>,
|
2010-11-13 04:32:20 +08:00
|
|
|
RegConstraint<"$Ddin = $Dd">,
|
2018-10-17 15:26:35 +08:00
|
|
|
Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
2010-10-14 09:02:08 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpMAC32, "vmls", ".f32\t$Sd, $Sn, $Sm",
|
2010-12-06 06:04:16 +08:00
|
|
|
[(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
|
|
|
|
SPR:$Sdin))]>,
|
2010-11-13 04:32:20 +08:00
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
2018-10-17 15:26:35 +08:00
|
|
|
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VMLSH : AHbI<0b11100, 0b00, 1, 0,
|
2018-02-06 16:43:56 +08:00
|
|
|
(outs HPR:$Sd), (ins HPR:$Sdin, HPR:$Sn, HPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpMAC16, "vmls", ".f16\t$Sd, $Sn, $Sm",
|
2018-02-06 16:43:56 +08:00
|
|
|
[(set HPR:$Sd, (fadd_mlx (fneg (fmul_su HPR:$Sn, HPR:$Sm)),
|
|
|
|
HPR:$Sdin))]>,
|
2016-01-25 18:26:26 +08:00
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
2018-10-17 15:26:35 +08:00
|
|
|
Requires<[HasFullFP16,UseFPVMLx]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2010-12-06 06:04:16 +08:00
|
|
|
def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
|
2010-11-13 04:32:20 +08:00
|
|
|
(VMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
|
2018-10-17 15:26:35 +08:00
|
|
|
Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>;
|
2010-12-06 06:04:16 +08:00
|
|
|
def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
|
2010-11-13 04:32:20 +08:00
|
|
|
(VMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
|
2018-10-17 15:26:35 +08:00
|
|
|
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
|
2018-02-06 16:43:56 +08:00
|
|
|
def : Pat<(fsub_mlx HPR:$dstin, (fmul_su HPR:$a, HPR:$b)),
|
|
|
|
(VMLSH HPR:$dstin, HPR:$a, HPR:$b)>,
|
2018-10-17 15:26:35 +08:00
|
|
|
Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx]>;
|
2009-08-05 02:44:29 +08:00
|
|
|
|
2010-11-13 04:32:20 +08:00
|
|
|
def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
|
|
|
IIC_fpMAC64, "vnmla", ".f64\t$Dd, $Dn, $Dm",
|
2010-12-06 06:04:16 +08:00
|
|
|
[(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
|
|
|
|
(f64 DPR:$Ddin)))]>,
|
2010-11-13 04:32:20 +08:00
|
|
|
RegConstraint<"$Ddin = $Dd">,
|
2018-10-17 15:26:35 +08:00
|
|
|
Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
2010-10-14 09:02:08 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpMAC32, "vnmla", ".f32\t$Sd, $Sn, $Sm",
|
2010-12-06 06:04:16 +08:00
|
|
|
[(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
|
|
|
|
SPR:$Sdin))]>,
|
2010-11-13 04:32:20 +08:00
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
2018-10-17 15:26:35 +08:00
|
|
|
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-10-14 09:02:08 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VNMLAH : AHbI<0b11100, 0b01, 1, 0,
|
2018-02-06 16:43:56 +08:00
|
|
|
(outs HPR:$Sd), (ins HPR:$Sdin, HPR:$Sn, HPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpMAC16, "vnmla", ".f16\t$Sd, $Sn, $Sm",
|
2018-02-06 16:43:56 +08:00
|
|
|
[(set HPR:$Sd, (fsub_mlx (fneg (fmul_su HPR:$Sn, HPR:$Sm)),
|
|
|
|
HPR:$Sdin))]>,
|
2016-01-25 18:26:26 +08:00
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
2018-10-17 15:26:35 +08:00
|
|
|
Requires<[HasFullFP16,UseFPVMLx]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
[ARM] Add missing selection patterns for vnmla
For the following function:
double fn1(double d0, double d1, double d2) {
double a = -d0 - d1 * d2;
return a;
}
on ARM, LLVM generates code along the lines of
vneg.f64 d0, d0
vmls.f64 d0, d1, d2
i.e., a negate and a multiply-subtract.
The attached patch adds instruction selection patterns to allow it to generate the single instruction
vnmla.f64 d0, d1, d2
(multiply-add with negation) instead, like GCC does.
Committed on behalf of @gergo- (Gergö Barany)
Differential Revision: https://reviews.llvm.org/D35911
llvm-svn: 313972
2017-09-22 17:50:52 +08:00
|
|
|
// (-(a * b) - dst) -> -(dst + (a * b))
|
2010-12-06 06:04:16 +08:00
|
|
|
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
|
2010-11-13 04:32:20 +08:00
|
|
|
(VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
|
2018-10-17 15:26:35 +08:00
|
|
|
Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>;
|
2010-12-06 06:04:16 +08:00
|
|
|
def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
|
2010-11-13 04:32:20 +08:00
|
|
|
(VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
|
2018-10-17 15:26:35 +08:00
|
|
|
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
|
2018-02-06 16:43:56 +08:00
|
|
|
def : Pat<(fsub_mlx (fneg (fmul_su HPR:$a, HPR:$b)), HPR:$dstin),
|
|
|
|
(VNMLAH HPR:$dstin, HPR:$a, HPR:$b)>,
|
2018-10-17 15:26:35 +08:00
|
|
|
Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx]>;
|
2010-10-14 09:02:08 +08:00
|
|
|
|
[ARM] Add missing selection patterns for vnmla
For the following function:
double fn1(double d0, double d1, double d2) {
double a = -d0 - d1 * d2;
return a;
}
on ARM, LLVM generates code along the lines of
vneg.f64 d0, d0
vmls.f64 d0, d1, d2
i.e., a negate and a multiply-subtract.
The attached patch adds instruction selection patterns to allow it to generate the single instruction
vnmla.f64 d0, d1, d2
(multiply-add with negation) instead, like GCC does.
Committed on behalf of @gergo- (Gergö Barany)
Differential Revision: https://reviews.llvm.org/D35911
llvm-svn: 313972
2017-09-22 17:50:52 +08:00
|
|
|
// (-dst - (a * b)) -> -(dst + (a * b))
|
|
|
|
def : Pat<(fsub_mlx (fneg DPR:$dstin), (fmul_su DPR:$a, (f64 DPR:$b))),
|
|
|
|
(VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
|
2018-10-17 15:26:35 +08:00
|
|
|
Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>;
|
[ARM] Add missing selection patterns for vnmla
For the following function:
double fn1(double d0, double d1, double d2) {
double a = -d0 - d1 * d2;
return a;
}
on ARM, LLVM generates code along the lines of
vneg.f64 d0, d0
vmls.f64 d0, d1, d2
i.e., a negate and a multiply-subtract.
The attached patch adds instruction selection patterns to allow it to generate the single instruction
vnmla.f64 d0, d1, d2
(multiply-add with negation) instead, like GCC does.
Committed on behalf of @gergo- (Gergö Barany)
Differential Revision: https://reviews.llvm.org/D35911
llvm-svn: 313972
2017-09-22 17:50:52 +08:00
|
|
|
def : Pat<(fsub_mlx (fneg SPR:$dstin), (fmul_su SPR:$a, SPR:$b)),
|
|
|
|
(VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
|
2018-10-17 15:26:35 +08:00
|
|
|
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
|
2018-02-06 16:43:56 +08:00
|
|
|
def : Pat<(fsub_mlx (fneg HPR:$dstin), (fmul_su HPR:$a, HPR:$b)),
|
|
|
|
(VNMLAH HPR:$dstin, HPR:$a, HPR:$b)>,
|
2018-10-17 15:26:35 +08:00
|
|
|
Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx]>;
|
[ARM] Add missing selection patterns for vnmla
For the following function:
double fn1(double d0, double d1, double d2) {
double a = -d0 - d1 * d2;
return a;
}
on ARM, LLVM generates code along the lines of
vneg.f64 d0, d0
vmls.f64 d0, d1, d2
i.e., a negate and a multiply-subtract.
The attached patch adds instruction selection patterns to allow it to generate the single instruction
vnmla.f64 d0, d1, d2
(multiply-add with negation) instead, like GCC does.
Committed on behalf of @gergo- (Gergö Barany)
Differential Revision: https://reviews.llvm.org/D35911
llvm-svn: 313972
2017-09-22 17:50:52 +08:00
|
|
|
|
2010-11-13 04:32:20 +08:00
|
|
|
def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
|
|
|
IIC_fpMAC64, "vnmls", ".f64\t$Dd, $Dn, $Dm",
|
2010-12-06 06:04:16 +08:00
|
|
|
[(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
|
|
|
|
(f64 DPR:$Ddin)))]>,
|
2010-11-13 04:32:20 +08:00
|
|
|
RegConstraint<"$Ddin = $Dd">,
|
2018-10-17 15:26:35 +08:00
|
|
|
Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
2010-10-14 09:02:08 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
|
2010-12-06 06:04:16 +08:00
|
|
|
[(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
|
2010-11-13 04:32:20 +08:00
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
2018-10-17 15:26:35 +08:00
|
|
|
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-10-14 09:02:08 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VNMLSH : AHbI<0b11100, 0b01, 0, 0,
|
2018-02-06 16:43:56 +08:00
|
|
|
(outs HPR:$Sd), (ins HPR:$Sdin, HPR:$Sn, HPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpMAC16, "vnmls", ".f16\t$Sd, $Sn, $Sm",
|
2018-02-06 16:43:56 +08:00
|
|
|
[(set HPR:$Sd, (fsub_mlx (fmul_su HPR:$Sn, HPR:$Sm), HPR:$Sdin))]>,
|
2016-01-25 18:26:26 +08:00
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
2018-10-17 15:26:35 +08:00
|
|
|
Requires<[HasFullFP16,UseFPVMLx]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2010-12-06 06:04:16 +08:00
|
|
|
def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
|
2010-11-13 04:32:20 +08:00
|
|
|
(VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
|
2018-10-17 15:26:35 +08:00
|
|
|
Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>;
|
2010-12-06 06:04:16 +08:00
|
|
|
def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
|
2010-11-13 04:32:20 +08:00
|
|
|
(VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
|
2018-10-17 15:26:35 +08:00
|
|
|
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
|
2018-02-06 16:43:56 +08:00
|
|
|
def : Pat<(fsub_mlx (fmul_su HPR:$a, HPR:$b), HPR:$dstin),
|
|
|
|
(VNMLSH HPR:$dstin, HPR:$a, HPR:$b)>,
|
2018-10-17 15:26:35 +08:00
|
|
|
Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx]>;
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2012-01-22 20:07:33 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Fused FP Multiply-Accumulate Operations.
|
|
|
|
//
|
|
|
|
def VFMAD : ADbI<0b11101, 0b10, 0, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
|
|
|
IIC_fpFMAC64, "vfma", ".f64\t$Dd, $Dn, $Dm",
|
|
|
|
[(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
|
|
|
|
(f64 DPR:$Ddin)))]>,
|
|
|
|
RegConstraint<"$Ddin = $Dd">,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
2012-01-22 20:07:33 +08:00
|
|
|
|
|
|
|
def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpFMAC32, "vfma", ".f32\t$Sd, $Sn, $Sm",
|
|
|
|
[(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
|
|
|
|
SPR:$Sdin))]>,
|
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
|
2012-01-22 20:07:33 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines.
|
|
|
|
}
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VFMAH : AHbI<0b11101, 0b10, 0, 0,
|
2018-02-06 16:43:56 +08:00
|
|
|
(outs HPR:$Sd), (ins HPR:$Sdin, HPR:$Sn, HPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpFMAC16, "vfma", ".f16\t$Sd, $Sn, $Sm",
|
2018-02-06 16:43:56 +08:00
|
|
|
[(set HPR:$Sd, (fadd_mlx (fmul_su HPR:$Sn, HPR:$Sm),
|
|
|
|
HPR:$Sdin))]>,
|
2016-01-25 18:26:26 +08:00
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasFullFP16,UseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2012-01-22 20:07:33 +08:00
|
|
|
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
|
|
|
|
(VFMAD DPR:$dstin, DPR:$a, DPR:$b)>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
|
2012-01-22 20:07:33 +08:00
|
|
|
def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
|
|
|
|
(VFMAS SPR:$dstin, SPR:$a, SPR:$b)>,
|
2012-04-11 13:33:07 +08:00
|
|
|
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
|
2018-02-06 16:43:56 +08:00
|
|
|
def : Pat<(fadd_mlx HPR:$dstin, (fmul_su HPR:$a, HPR:$b)),
|
|
|
|
(VFMAH HPR:$dstin, HPR:$a, HPR:$b)>,
|
|
|
|
Requires<[HasFullFP16,DontUseNEONForFP,UseFusedMAC]>;
|
2012-01-22 20:07:33 +08:00
|
|
|
|
2012-04-11 05:40:28 +08:00
|
|
|
// Match @llvm.fma.* intrinsics
|
2012-04-28 02:51:24 +08:00
|
|
|
// (fma x, y, z) -> (vfms z, x, y)
|
|
|
|
def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, DPR:$Ddin)),
|
2012-04-11 05:40:28 +08:00
|
|
|
(VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP]>;
|
2012-04-28 02:51:24 +08:00
|
|
|
def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, SPR:$Sdin)),
|
2012-04-11 05:40:28 +08:00
|
|
|
(VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
|
|
|
|
Requires<[HasVFP4]>;
|
2019-05-26 19:34:30 +08:00
|
|
|
def : Pat<(f16 (fma HPR:$Sn, HPR:$Sm, HPR:$Sdin)),
|
|
|
|
(VFMAH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
|
|
|
|
Requires<[HasFullFP16]>;
|
2012-04-11 05:40:28 +08:00
|
|
|
|
2012-01-22 20:07:33 +08:00
|
|
|
def VFMSD : ADbI<0b11101, 0b10, 1, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
|
|
|
IIC_fpFMAC64, "vfms", ".f64\t$Dd, $Dn, $Dm",
|
|
|
|
[(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
|
|
|
|
(f64 DPR:$Ddin)))]>,
|
|
|
|
RegConstraint<"$Ddin = $Dd">,
|
2017-06-09 17:19:09 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
2012-01-22 20:07:33 +08:00
|
|
|
|
|
|
|
def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpFMAC32, "vfms", ".f32\t$Sd, $Sn, $Sm",
|
|
|
|
[(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
|
|
|
|
SPR:$Sdin))]>,
|
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
2017-06-09 17:19:09 +08:00
|
|
|
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
|
2012-01-22 20:07:33 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines.
|
|
|
|
}
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VFMSH : AHbI<0b11101, 0b10, 1, 0,
|
2018-02-06 16:43:56 +08:00
|
|
|
(outs HPR:$Sd), (ins HPR:$Sdin, HPR:$Sn, HPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpFMAC16, "vfms", ".f16\t$Sd, $Sn, $Sm",
|
2018-02-06 16:43:56 +08:00
|
|
|
[(set HPR:$Sd, (fadd_mlx (fneg (fmul_su HPR:$Sn, HPR:$Sm)),
|
|
|
|
HPR:$Sdin))]>,
|
2016-01-25 18:26:26 +08:00
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
2017-06-09 17:19:09 +08:00
|
|
|
Requires<[HasFullFP16,UseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2012-01-22 20:07:33 +08:00
|
|
|
def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
|
|
|
|
(VFMSD DPR:$dstin, DPR:$a, DPR:$b)>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
|
2012-01-22 20:07:33 +08:00
|
|
|
def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
|
|
|
|
(VFMSS SPR:$dstin, SPR:$a, SPR:$b)>,
|
2012-04-11 13:33:07 +08:00
|
|
|
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
|
2018-02-06 16:43:56 +08:00
|
|
|
def : Pat<(fsub_mlx HPR:$dstin, (fmul_su HPR:$a, HPR:$b)),
|
|
|
|
(VFMSH HPR:$dstin, HPR:$a, HPR:$b)>,
|
|
|
|
Requires<[HasFullFP16,DontUseNEONForFP,UseFusedMAC]>;
|
2012-01-22 20:07:33 +08:00
|
|
|
|
2012-04-11 14:59:47 +08:00
|
|
|
// Match @llvm.fma.* intrinsics
|
2012-04-28 02:51:24 +08:00
|
|
|
// (fma (fneg x), y, z) -> (vfms z, x, y)
|
|
|
|
def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin)),
|
2012-04-11 14:59:47 +08:00
|
|
|
(VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP]>;
|
2012-04-28 02:51:24 +08:00
|
|
|
def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin)),
|
2012-04-11 14:59:47 +08:00
|
|
|
(VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
|
|
|
|
Requires<[HasVFP4]>;
|
2020-01-05 18:54:49 +08:00
|
|
|
def : Pat<(f16 (fma (fneg HPR:$Sn), HPR:$Sm, HPR:$Sdin)),
|
|
|
|
(VFMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
|
|
|
|
Requires<[HasFullFP16]>;
|
2012-04-28 02:51:24 +08:00
|
|
|
// (fma x, (fneg y), z) -> (vfms z, x, y)
|
|
|
|
def : Pat<(f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin)),
|
2012-04-11 14:59:47 +08:00
|
|
|
(VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP]>;
|
2012-04-28 02:51:24 +08:00
|
|
|
def : Pat<(f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin)),
|
2012-04-11 14:59:47 +08:00
|
|
|
(VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
|
|
|
|
Requires<[HasVFP4]>;
|
2020-01-05 18:54:49 +08:00
|
|
|
def : Pat<(f16 (fma HPR:$Sn, (fneg HPR:$Sm), HPR:$Sdin)),
|
|
|
|
(VFMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
|
|
|
|
Requires<[HasFullFP16]>;
|
2012-04-11 14:59:47 +08:00
|
|
|
|
2012-01-22 20:07:33 +08:00
|
|
|
def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
|
|
|
IIC_fpFMAC64, "vfnma", ".f64\t$Dd, $Dn, $Dm",
|
|
|
|
[(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
|
|
|
|
(f64 DPR:$Ddin)))]>,
|
|
|
|
RegConstraint<"$Ddin = $Dd">,
|
2017-06-13 21:04:32 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
2012-01-22 20:07:33 +08:00
|
|
|
|
|
|
|
def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpFMAC32, "vfnma", ".f32\t$Sd, $Sn, $Sm",
|
|
|
|
[(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
|
|
|
|
SPR:$Sdin))]>,
|
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
2017-06-13 21:04:32 +08:00
|
|
|
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
|
2012-01-22 20:07:33 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines.
|
|
|
|
}
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VFNMAH : AHbI<0b11101, 0b01, 1, 0,
|
2018-02-06 16:43:56 +08:00
|
|
|
(outs HPR:$Sd), (ins HPR:$Sdin, HPR:$Sn, HPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpFMAC16, "vfnma", ".f16\t$Sd, $Sn, $Sm",
|
2018-02-06 16:43:56 +08:00
|
|
|
[(set HPR:$Sd, (fsub_mlx (fneg (fmul_su HPR:$Sn, HPR:$Sm)),
|
|
|
|
HPR:$Sdin))]>,
|
2016-01-25 18:26:26 +08:00
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
2017-06-13 21:04:32 +08:00
|
|
|
Requires<[HasFullFP16,UseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2012-01-22 20:07:33 +08:00
|
|
|
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
|
|
|
|
(VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
|
2012-01-22 20:07:33 +08:00
|
|
|
def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
|
|
|
|
(VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>,
|
2012-04-11 13:33:07 +08:00
|
|
|
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
|
2012-01-22 20:07:33 +08:00
|
|
|
|
2012-04-11 09:21:25 +08:00
|
|
|
// Match @llvm.fma.* intrinsics
|
2012-04-28 02:51:24 +08:00
|
|
|
// (fneg (fma x, y, z)) -> (vfnma z, x, y)
|
|
|
|
def : Pat<(fneg (fma (f64 DPR:$Dn), (f64 DPR:$Dm), (f64 DPR:$Ddin))),
|
2012-04-11 09:21:25 +08:00
|
|
|
(VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP]>;
|
2012-04-28 02:51:24 +08:00
|
|
|
def : Pat<(fneg (fma (f32 SPR:$Sn), (f32 SPR:$Sm), (f32 SPR:$Sdin))),
|
2012-04-11 09:21:25 +08:00
|
|
|
(VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
|
|
|
|
Requires<[HasVFP4]>;
|
2020-01-05 18:54:49 +08:00
|
|
|
def : Pat<(fneg (fma (f16 HPR:$Sn), (f16 HPR:$Sm), (f16 HPR:$Sdin))),
|
|
|
|
(VFNMAH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
|
|
|
|
Requires<[HasFullFP16]>;
|
2012-04-28 02:51:24 +08:00
|
|
|
// (fma (fneg x), y, (fneg z)) -> (vfnma z, x, y)
|
|
|
|
def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, (fneg DPR:$Ddin))),
|
2012-04-11 14:59:47 +08:00
|
|
|
(VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP]>;
|
2012-04-28 02:51:24 +08:00
|
|
|
def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, (fneg SPR:$Sdin))),
|
2012-04-11 14:59:47 +08:00
|
|
|
(VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
|
|
|
|
Requires<[HasVFP4]>;
|
2020-01-05 18:54:49 +08:00
|
|
|
def : Pat<(f16 (fma (fneg HPR:$Sn), HPR:$Sm, (fneg HPR:$Sdin))),
|
|
|
|
(VFNMAH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
|
|
|
|
Requires<[HasFullFP16]>;
|
2012-04-11 09:21:25 +08:00
|
|
|
|
2012-01-22 20:07:33 +08:00
|
|
|
def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
|
|
|
IIC_fpFMAC64, "vfnms", ".f64\t$Dd, $Dn, $Dm",
|
|
|
|
[(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
|
|
|
|
(f64 DPR:$Ddin)))]>,
|
|
|
|
RegConstraint<"$Ddin = $Dd">,
|
2017-06-13 21:04:32 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
2012-01-22 20:07:33 +08:00
|
|
|
|
|
|
|
def VFNMSS : ASbI<0b11101, 0b01, 0, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm",
|
|
|
|
[(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
|
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
2017-06-13 21:04:32 +08:00
|
|
|
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
|
2012-01-22 20:07:33 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines.
|
|
|
|
}
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VFNMSH : AHbI<0b11101, 0b01, 0, 0,
|
2018-02-06 16:43:56 +08:00
|
|
|
(outs HPR:$Sd), (ins HPR:$Sdin, HPR:$Sn, HPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpFMAC16, "vfnms", ".f16\t$Sd, $Sn, $Sm",
|
2018-02-06 16:43:56 +08:00
|
|
|
[(set HPR:$Sd, (fsub_mlx (fmul_su HPR:$Sn, HPR:$Sm), HPR:$Sdin))]>,
|
2016-01-25 18:26:26 +08:00
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
2017-06-13 21:04:32 +08:00
|
|
|
Requires<[HasFullFP16,UseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2012-01-22 20:07:33 +08:00
|
|
|
def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
|
|
|
|
(VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
|
2012-01-22 20:07:33 +08:00
|
|
|
def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
|
|
|
|
(VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>,
|
2012-04-11 13:33:07 +08:00
|
|
|
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2012-04-11 14:59:47 +08:00
|
|
|
// Match @llvm.fma.* intrinsics
|
2012-06-21 14:10:00 +08:00
|
|
|
|
|
|
|
// (fma x, y, (fneg z)) -> (vfnms z, x, y))
|
|
|
|
def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, (fneg DPR:$Ddin))),
|
|
|
|
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP]>;
|
2012-06-21 14:10:00 +08:00
|
|
|
def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, (fneg SPR:$Sdin))),
|
|
|
|
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
|
|
|
|
Requires<[HasVFP4]>;
|
2020-01-05 18:54:49 +08:00
|
|
|
def : Pat<(f16 (fma HPR:$Sn, HPR:$Sm, (fneg HPR:$Sdin))),
|
|
|
|
(VFNMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
|
|
|
|
Requires<[HasFullFP16]>;
|
2012-04-28 02:51:24 +08:00
|
|
|
// (fneg (fma (fneg x), y, z)) -> (vfnms z, x, y)
|
|
|
|
def : Pat<(fneg (f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))),
|
2012-04-11 14:59:47 +08:00
|
|
|
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP]>;
|
2012-04-28 02:51:24 +08:00
|
|
|
def : Pat<(fneg (f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin))),
|
2012-04-11 14:59:47 +08:00
|
|
|
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
|
|
|
|
Requires<[HasVFP4]>;
|
2020-01-05 18:54:49 +08:00
|
|
|
def : Pat<(fneg (f16 (fma (fneg HPR:$Sn), HPR:$Sm, HPR:$Sdin))),
|
|
|
|
(VFNMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
|
|
|
|
Requires<[HasFullFP16]>;
|
2012-04-28 02:51:24 +08:00
|
|
|
// (fneg (fma x, (fneg y), z) -> (vfnms z, x, y)
|
|
|
|
def : Pat<(fneg (f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin))),
|
2012-04-11 14:59:47 +08:00
|
|
|
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP]>;
|
2012-04-28 02:51:24 +08:00
|
|
|
def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))),
|
2012-04-11 14:59:47 +08:00
|
|
|
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
|
|
|
|
Requires<[HasVFP4]>;
|
2020-01-05 18:54:49 +08:00
|
|
|
def : Pat<(fneg (f16 (fma HPR:$Sn, (fneg HPR:$Sm), HPR:$Sdin))),
|
|
|
|
(VFNMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
|
|
|
|
Requires<[HasFullFP16]>;
|
2012-04-11 14:59:47 +08:00
|
|
|
|
2007-01-19 15:51:42 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// FP Conditional moves.
|
|
|
|
//
|
|
|
|
|
2014-11-26 08:46:26 +08:00
|
|
|
let hasSideEffects = 0 in {
|
2013-08-22 17:57:11 +08:00
|
|
|
def VMOVDcc : PseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, cmovpred:$p),
|
|
|
|
IIC_fpUNA64,
|
|
|
|
[(set (f64 DPR:$Dd),
|
|
|
|
(ARMcmov DPR:$Dn, DPR:$Dm, cmovpred:$p))]>,
|
[ARM] Stop using scalar FP instructions in integer-only MVE mode.
If you compile with `-mattr=+mve` (enabling integer MVE instructions
but not floating-point ones), then the scalar FP //registers// exist
and it's legal to move things in and out of them, load and store them,
but it's not legal to do arithmetic on them.
In D60708, the calls to `addRegisterClass` in ARMISelLowering that
enable use of the scalar FP registers became conditionalised on
`Subtarget->hasFPRegs()` instead of `Subtarget->hasVFP2Base()`, so
that loads, stores and moves of those registers would work. But I
didn't realise that that would also enable all the operations on those
types by default.
Now, if the target doesn't have basic VFP, we follow up those
`addRegisterClass` calls by turning back off all the nontrivial
operations you can perform on f32 and f64. That causes several
knock-on failures, which are fixed by allowing the `VMOVDcc` and
`VMOVScc` instructions to be selected even if all you have is
`HasFPRegs`, and adjusting several checks for 'is this a double in a
single-precision-only world?' to the more general 'is this any FP type
we can't do arithmetic on?'. Between those, the whole of the
`float-ops.ll` and `fp16-instructions.ll` tests can now run in
MVE-without-FP mode and generate correct-looking code.
One odd side effect is that I had to relax the check lines in that
test so that they permit test functions like `add_f` to be generated
as tailcalls to software FP library functions, instead of ordinary
calls. Doing that is entirely legal, but the mystery is why this is
the first RUN line that's needed the relaxation: on the usual kind of
non-FP target, no tailcalls ever seem to be generated. Going by the
llc messages, I think `SoftenFloatResult` must be perturbing the code
generation in some way, but that's as much as I can guess.
Reviewers: dmgreen, ostannard
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D63938
llvm-svn: 364909
2019-07-02 19:26:00 +08:00
|
|
|
RegConstraint<"$Dn = $Dd">, Requires<[HasFPRegs64]>;
|
2013-08-22 17:57:11 +08:00
|
|
|
|
|
|
|
def VMOVScc : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p),
|
|
|
|
IIC_fpUNA32,
|
|
|
|
[(set (f32 SPR:$Sd),
|
|
|
|
(ARMcmov SPR:$Sn, SPR:$Sm, cmovpred:$p))]>,
|
[ARM] Stop using scalar FP instructions in integer-only MVE mode.
If you compile with `-mattr=+mve` (enabling integer MVE instructions
but not floating-point ones), then the scalar FP //registers// exist
and it's legal to move things in and out of them, load and store them,
but it's not legal to do arithmetic on them.
In D60708, the calls to `addRegisterClass` in ARMISelLowering that
enable use of the scalar FP registers became conditionalised on
`Subtarget->hasFPRegs()` instead of `Subtarget->hasVFP2Base()`, so
that loads, stores and moves of those registers would work. But I
didn't realise that that would also enable all the operations on those
types by default.
Now, if the target doesn't have basic VFP, we follow up those
`addRegisterClass` calls by turning back off all the nontrivial
operations you can perform on f32 and f64. That causes several
knock-on failures, which are fixed by allowing the `VMOVDcc` and
`VMOVScc` instructions to be selected even if all you have is
`HasFPRegs`, and adjusting several checks for 'is this a double in a
single-precision-only world?' to the more general 'is this any FP type
we can't do arithmetic on?'. Between those, the whole of the
`float-ops.ll` and `fp16-instructions.ll` tests can now run in
MVE-without-FP mode and generate correct-looking code.
One odd side effect is that I had to relax the check lines in that
test so that they permit test functions like `add_f` to be generated
as tailcalls to software FP library functions, instead of ordinary
calls. Doing that is entirely legal, but the mystery is why this is
the first RUN line that's needed the relaxation: on the usual kind of
non-FP target, no tailcalls ever seem to be generated. Going by the
llc messages, I think `SoftenFloatResult` must be perturbing the code
generation in some way, but that's as much as I can guess.
Reviewers: dmgreen, ostannard
Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D63938
llvm-svn: 364909
2019-07-02 19:26:00 +08:00
|
|
|
RegConstraint<"$Sn = $Sd">, Requires<[HasFPRegs]>;
|
2019-11-19 17:55:16 +08:00
|
|
|
|
|
|
|
def VMOVHcc : PseudoInst<(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm, cmovpred:$p),
|
|
|
|
IIC_fpUNA16,
|
|
|
|
[(set (f16 HPR:$Sd),
|
|
|
|
(ARMcmov HPR:$Sn, HPR:$Sm, cmovpred:$p))]>,
|
|
|
|
RegConstraint<"$Sd = $Sn">, Requires<[HasFPRegs]>;
|
2014-11-26 08:46:26 +08:00
|
|
|
} // hasSideEffects
|
2008-11-12 03:40:26 +08:00
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
2011-01-19 05:58:20 +08:00
|
|
|
// Move from VFP System Register to ARM core register.
|
2008-11-12 03:40:26 +08:00
|
|
|
//
|
|
|
|
|
2011-01-19 05:58:20 +08:00
|
|
|
class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
|
|
|
|
list<dag> pattern>:
|
|
|
|
VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> {
|
2009-10-28 09:44:26 +08:00
|
|
|
|
2010-10-14 09:02:08 +08:00
|
|
|
// Instruction operand.
|
|
|
|
bits<4> Rt;
|
|
|
|
|
2010-02-10 06:35:38 +08:00
|
|
|
let Inst{27-20} = 0b11101111;
|
2011-01-19 05:58:20 +08:00
|
|
|
let Inst{19-16} = opc19_16;
|
|
|
|
let Inst{15-12} = Rt;
|
2010-02-10 06:35:38 +08:00
|
|
|
let Inst{11-8} = 0b1010;
|
|
|
|
let Inst{7} = 0;
|
2010-10-14 09:02:08 +08:00
|
|
|
let Inst{6-5} = 0b00;
|
2010-02-10 06:35:38 +08:00
|
|
|
let Inst{4} = 1;
|
2010-10-14 09:02:08 +08:00
|
|
|
let Inst{3-0} = 0b0000;
|
2019-09-03 17:55:30 +08:00
|
|
|
let Unpredictable{7-5} = 0b111;
|
|
|
|
let Unpredictable{3-0} = 0b1111;
|
2010-02-10 06:35:38 +08:00
|
|
|
}
|
|
|
|
|
2019-06-10 23:58:19 +08:00
|
|
|
let DecoderMethod = "DecodeForVMRSandVMSR" in {
|
[ARM] Add the non-MVE instructions in Arm v8.1-M.
This adds support for the new family of conditional selection /
increment / negation instructions; the low-overhead branch
instructions (e.g. BF, WLS, DLS); the CLRM instruction to zero a whole
list of registers at once; the new VMRS/VMSR and VLDR/VSTR
instructions to get data in and out of 8.1-M system registers,
particularly including the new VPR register used by MVE vector
predication.
To support this, we also add a register name 'zr' (used by the CSEL
family to force one of the inputs to the constant 0), and operand
types for lists of registers that are also allowed to include APSR or
VPR (used by CLRM). The VLDR/VSTR instructions also need a new
addressing mode.
The low-overhead branch instructions exist in their own separate
architecture extension, which we treat as enabled by default, but you
can say -mattr=-lob or equivalent to turn it off.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Reviewed By: samparker
Subscribers: miyuki, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62667
llvm-svn: 363039
2019-06-11 17:29:18 +08:00
|
|
|
// APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
|
|
|
|
// to APSR.
|
|
|
|
let Defs = [CPSR], Uses = [FPSCR_NZCV], Predicates = [HasFPRegs],
|
|
|
|
Rt = 0b1111 /* apsr_nzcv */ in
|
|
|
|
def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
|
|
|
|
"vmrs", "\tAPSR_nzcv, fpscr", [(arm_fmstat)]>;
|
|
|
|
|
2017-09-22 20:17:42 +08:00
|
|
|
// Application level FPSCR -> GPR
|
2019-05-30 20:37:05 +08:00
|
|
|
let hasSideEffects = 1, Uses = [FPSCR], Predicates = [HasFPRegs] in
|
2017-09-22 20:17:42 +08:00
|
|
|
def VMRS : MovFromVFP<0b0001 /* fpscr */, (outs GPRnopc:$Rt), (ins),
|
|
|
|
"vmrs", "\t$Rt, fpscr",
|
|
|
|
[(set GPRnopc:$Rt, (int_arm_get_fpscr))]>;
|
|
|
|
|
|
|
|
// System level FPEXC, FPSID -> GPR
|
|
|
|
let Uses = [FPSCR] in {
|
|
|
|
def VMRS_FPEXC : MovFromVFP<0b1000 /* fpexc */, (outs GPRnopc:$Rt), (ins),
|
|
|
|
"vmrs", "\t$Rt, fpexc", []>;
|
|
|
|
def VMRS_FPSID : MovFromVFP<0b0000 /* fpsid */, (outs GPRnopc:$Rt), (ins),
|
|
|
|
"vmrs", "\t$Rt, fpsid", []>;
|
|
|
|
def VMRS_MVFR0 : MovFromVFP<0b0111 /* mvfr0 */, (outs GPRnopc:$Rt), (ins),
|
2017-08-09 01:16:46 +08:00
|
|
|
"vmrs", "\t$Rt, mvfr0", []>;
|
2017-09-22 20:17:42 +08:00
|
|
|
def VMRS_MVFR1 : MovFromVFP<0b0110 /* mvfr1 */, (outs GPRnopc:$Rt), (ins),
|
|
|
|
"vmrs", "\t$Rt, mvfr1", []>;
|
|
|
|
let Predicates = [HasFPARMv8] in {
|
|
|
|
def VMRS_MVFR2 : MovFromVFP<0b0101 /* mvfr2 */, (outs GPRnopc:$Rt), (ins),
|
|
|
|
"vmrs", "\t$Rt, mvfr2", []>;
|
|
|
|
}
|
|
|
|
def VMRS_FPINST : MovFromVFP<0b1001 /* fpinst */, (outs GPRnopc:$Rt), (ins),
|
|
|
|
"vmrs", "\t$Rt, fpinst", []>;
|
|
|
|
def VMRS_FPINST2 : MovFromVFP<0b1010 /* fpinst2 */, (outs GPRnopc:$Rt),
|
|
|
|
(ins), "vmrs", "\t$Rt, fpinst2", []>;
|
[ARM] Add the non-MVE instructions in Arm v8.1-M.
This adds support for the new family of conditional selection /
increment / negation instructions; the low-overhead branch
instructions (e.g. BF, WLS, DLS); the CLRM instruction to zero a whole
list of registers at once; the new VMRS/VMSR and VLDR/VSTR
instructions to get data in and out of 8.1-M system registers,
particularly including the new VPR register used by MVE vector
predication.
To support this, we also add a register name 'zr' (used by the CSEL
family to force one of the inputs to the constant 0), and operand
types for lists of registers that are also allowed to include APSR or
VPR (used by CLRM). The VLDR/VSTR instructions also need a new
addressing mode.
The low-overhead branch instructions exist in their own separate
architecture extension, which we treat as enabled by default, but you
can say -mattr=-lob or equivalent to turn it off.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Reviewed By: samparker
Subscribers: miyuki, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62667
llvm-svn: 363039
2019-06-11 17:29:18 +08:00
|
|
|
let Predicates = [HasV8_1MMainline, HasFPRegs] in {
|
|
|
|
// System level FPSCR_NZCVQC -> GPR
|
|
|
|
def VMRS_FPSCR_NZCVQC
|
|
|
|
: MovFromVFP<0b0010 /* fpscr_nzcvqc */,
|
|
|
|
(outs GPR:$Rt), (ins cl_FPSCR_NZCV:$fpscr_in),
|
|
|
|
"vmrs", "\t$Rt, fpscr_nzcvqc", []>;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
let Predicates = [HasV8_1MMainline, Has8MSecExt] in {
|
|
|
|
// System level FPSCR -> GPR, with context saving for security extensions
|
|
|
|
def VMRS_FPCXTNS : MovFromVFP<0b1110 /* fpcxtns */, (outs GPR:$Rt), (ins),
|
|
|
|
"vmrs", "\t$Rt, fpcxtns", []>;
|
|
|
|
}
|
|
|
|
let Predicates = [HasV8_1MMainline, Has8MSecExt] in {
|
|
|
|
// System level FPSCR -> GPR, with context saving for security extensions
|
|
|
|
def VMRS_FPCXTS : MovFromVFP<0b1111 /* fpcxts */, (outs GPR:$Rt), (ins),
|
|
|
|
"vmrs", "\t$Rt, fpcxts", []>;
|
|
|
|
}
|
|
|
|
|
|
|
|
let Predicates = [HasV8_1MMainline, HasMVEInt] in {
|
|
|
|
// System level VPR/P0 -> GPR
|
|
|
|
let Uses = [VPR] in
|
|
|
|
def VMRS_VPR : MovFromVFP<0b1100 /* vpr */, (outs GPR:$Rt), (ins),
|
|
|
|
"vmrs", "\t$Rt, vpr", []>;
|
|
|
|
|
|
|
|
def VMRS_P0 : MovFromVFP<0b1101 /* p0 */, (outs GPR:$Rt), (ins VCCR:$cond),
|
|
|
|
"vmrs", "\t$Rt, p0", []>;
|
2017-09-22 20:17:42 +08:00
|
|
|
}
|
2011-01-19 05:58:20 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Move from ARM core register to VFP System Register.
|
|
|
|
//
|
|
|
|
|
|
|
|
class MovToVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
|
|
|
|
list<dag> pattern>:
|
|
|
|
VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> {
|
|
|
|
|
2010-10-14 09:02:08 +08:00
|
|
|
// Instruction operand.
|
2019-09-03 17:55:30 +08:00
|
|
|
bits<4> Rt;
|
2010-10-14 09:02:08 +08:00
|
|
|
|
2010-02-10 06:35:38 +08:00
|
|
|
let Inst{27-20} = 0b11101110;
|
2011-01-19 05:58:20 +08:00
|
|
|
let Inst{19-16} = opc19_16;
|
2019-09-03 17:55:30 +08:00
|
|
|
let Inst{15-12} = Rt;
|
2010-02-10 06:35:38 +08:00
|
|
|
let Inst{11-8} = 0b1010;
|
|
|
|
let Inst{7} = 0;
|
2019-09-03 17:55:30 +08:00
|
|
|
let Inst{6-5} = 0b00;
|
2010-02-10 06:35:38 +08:00
|
|
|
let Inst{4} = 1;
|
2019-09-03 17:55:30 +08:00
|
|
|
let Inst{3-0} = 0b0000;
|
[ARM] Add the non-MVE instructions in Arm v8.1-M.
This adds support for the new family of conditional selection /
increment / negation instructions; the low-overhead branch
instructions (e.g. BF, WLS, DLS); the CLRM instruction to zero a whole
list of registers at once; the new VMRS/VMSR and VLDR/VSTR
instructions to get data in and out of 8.1-M system registers,
particularly including the new VPR register used by MVE vector
predication.
To support this, we also add a register name 'zr' (used by the CSEL
family to force one of the inputs to the constant 0), and operand
types for lists of registers that are also allowed to include APSR or
VPR (used by CLRM). The VLDR/VSTR instructions also need a new
addressing mode.
The low-overhead branch instructions exist in their own separate
architecture extension, which we treat as enabled by default, but you
can say -mattr=-lob or equivalent to turn it off.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Reviewed By: samparker
Subscribers: miyuki, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62667
llvm-svn: 363039
2019-06-11 17:29:18 +08:00
|
|
|
let Predicates = [HasVFP2];
|
2019-09-03 17:55:30 +08:00
|
|
|
let Unpredictable{7-5} = 0b111;
|
|
|
|
let Unpredictable{3-0} = 0b1111;
|
2010-02-10 06:35:38 +08:00
|
|
|
}
|
2009-10-28 09:44:26 +08:00
|
|
|
|
2017-09-22 20:17:42 +08:00
|
|
|
let DecoderMethod = "DecodeForVMRSandVMSR" in {
|
|
|
|
let Defs = [FPSCR] in {
|
2019-05-30 20:37:05 +08:00
|
|
|
let Predicates = [HasFPRegs] in
|
2017-09-22 20:17:42 +08:00
|
|
|
// Application level GPR -> FPSCR
|
2019-09-03 17:55:30 +08:00
|
|
|
def VMSR : MovToVFP<0b0001 /* fpscr */, (outs), (ins GPRnopc:$Rt),
|
|
|
|
"vmsr", "\tfpscr, $Rt",
|
|
|
|
[(int_arm_set_fpscr GPRnopc:$Rt)]>;
|
2017-09-22 20:17:42 +08:00
|
|
|
// System level GPR -> FPEXC
|
2019-09-03 17:55:30 +08:00
|
|
|
def VMSR_FPEXC : MovToVFP<0b1000 /* fpexc */, (outs), (ins GPRnopc:$Rt),
|
|
|
|
"vmsr", "\tfpexc, $Rt", []>;
|
2017-09-22 20:17:42 +08:00
|
|
|
// System level GPR -> FPSID
|
2019-09-03 17:55:30 +08:00
|
|
|
def VMSR_FPSID : MovToVFP<0b0000 /* fpsid */, (outs), (ins GPRnopc:$Rt),
|
|
|
|
"vmsr", "\tfpsid, $Rt", []>;
|
|
|
|
def VMSR_FPINST : MovToVFP<0b1001 /* fpinst */, (outs), (ins GPRnopc:$Rt),
|
|
|
|
"vmsr", "\tfpinst, $Rt", []>;
|
|
|
|
def VMSR_FPINST2 : MovToVFP<0b1010 /* fpinst2 */, (outs), (ins GPRnopc:$Rt),
|
|
|
|
"vmsr", "\tfpinst2, $Rt", []>;
|
2017-09-22 20:17:42 +08:00
|
|
|
}
|
[ARM] Add the non-MVE instructions in Arm v8.1-M.
This adds support for the new family of conditional selection /
increment / negation instructions; the low-overhead branch
instructions (e.g. BF, WLS, DLS); the CLRM instruction to zero a whole
list of registers at once; the new VMRS/VMSR and VLDR/VSTR
instructions to get data in and out of 8.1-M system registers,
particularly including the new VPR register used by MVE vector
predication.
To support this, we also add a register name 'zr' (used by the CSEL
family to force one of the inputs to the constant 0), and operand
types for lists of registers that are also allowed to include APSR or
VPR (used by CLRM). The VLDR/VSTR instructions also need a new
addressing mode.
The low-overhead branch instructions exist in their own separate
architecture extension, which we treat as enabled by default, but you
can say -mattr=-lob or equivalent to turn it off.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Reviewed By: samparker
Subscribers: miyuki, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62667
llvm-svn: 363039
2019-06-11 17:29:18 +08:00
|
|
|
let Predicates = [HasV8_1MMainline, Has8MSecExt] in {
|
|
|
|
// System level GPR -> FPSCR with context saving for security extensions
|
2019-09-03 17:55:30 +08:00
|
|
|
def VMSR_FPCXTNS : MovToVFP<0b1110 /* fpcxtns */, (outs), (ins GPR:$Rt),
|
|
|
|
"vmsr", "\tfpcxtns, $Rt", []>;
|
[ARM] Add the non-MVE instructions in Arm v8.1-M.
This adds support for the new family of conditional selection /
increment / negation instructions; the low-overhead branch
instructions (e.g. BF, WLS, DLS); the CLRM instruction to zero a whole
list of registers at once; the new VMRS/VMSR and VLDR/VSTR
instructions to get data in and out of 8.1-M system registers,
particularly including the new VPR register used by MVE vector
predication.
To support this, we also add a register name 'zr' (used by the CSEL
family to force one of the inputs to the constant 0), and operand
types for lists of registers that are also allowed to include APSR or
VPR (used by CLRM). The VLDR/VSTR instructions also need a new
addressing mode.
The low-overhead branch instructions exist in their own separate
architecture extension, which we treat as enabled by default, but you
can say -mattr=-lob or equivalent to turn it off.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Reviewed By: samparker
Subscribers: miyuki, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62667
llvm-svn: 363039
2019-06-11 17:29:18 +08:00
|
|
|
}
|
|
|
|
let Predicates = [HasV8_1MMainline, Has8MSecExt] in {
|
|
|
|
// System level GPR -> FPSCR with context saving for security extensions
|
2019-09-03 17:55:30 +08:00
|
|
|
def VMSR_FPCXTS : MovToVFP<0b1111 /* fpcxts */, (outs), (ins GPR:$Rt),
|
|
|
|
"vmsr", "\tfpcxts, $Rt", []>;
|
[ARM] Add the non-MVE instructions in Arm v8.1-M.
This adds support for the new family of conditional selection /
increment / negation instructions; the low-overhead branch
instructions (e.g. BF, WLS, DLS); the CLRM instruction to zero a whole
list of registers at once; the new VMRS/VMSR and VLDR/VSTR
instructions to get data in and out of 8.1-M system registers,
particularly including the new VPR register used by MVE vector
predication.
To support this, we also add a register name 'zr' (used by the CSEL
family to force one of the inputs to the constant 0), and operand
types for lists of registers that are also allowed to include APSR or
VPR (used by CLRM). The VLDR/VSTR instructions also need a new
addressing mode.
The low-overhead branch instructions exist in their own separate
architecture extension, which we treat as enabled by default, but you
can say -mattr=-lob or equivalent to turn it off.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Reviewed By: samparker
Subscribers: miyuki, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62667
llvm-svn: 363039
2019-06-11 17:29:18 +08:00
|
|
|
}
|
|
|
|
let Predicates = [HasV8_1MMainline, HasFPRegs] in {
|
|
|
|
// System level GPR -> FPSCR_NZCVQC
|
|
|
|
def VMSR_FPSCR_NZCVQC
|
|
|
|
: MovToVFP<0b0010 /* fpscr_nzcvqc */,
|
2019-09-03 17:55:30 +08:00
|
|
|
(outs cl_FPSCR_NZCV:$fpscr_out), (ins GPR:$Rt),
|
|
|
|
"vmsr", "\tfpscr_nzcvqc, $Rt", []>;
|
[ARM] Add the non-MVE instructions in Arm v8.1-M.
This adds support for the new family of conditional selection /
increment / negation instructions; the low-overhead branch
instructions (e.g. BF, WLS, DLS); the CLRM instruction to zero a whole
list of registers at once; the new VMRS/VMSR and VLDR/VSTR
instructions to get data in and out of 8.1-M system registers,
particularly including the new VPR register used by MVE vector
predication.
To support this, we also add a register name 'zr' (used by the CSEL
family to force one of the inputs to the constant 0), and operand
types for lists of registers that are also allowed to include APSR or
VPR (used by CLRM). The VLDR/VSTR instructions also need a new
addressing mode.
The low-overhead branch instructions exist in their own separate
architecture extension, which we treat as enabled by default, but you
can say -mattr=-lob or equivalent to turn it off.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Reviewed By: samparker
Subscribers: miyuki, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62667
llvm-svn: 363039
2019-06-11 17:29:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
let Predicates = [HasV8_1MMainline, HasMVEInt] in {
|
|
|
|
// System level GPR -> VPR/P0
|
|
|
|
let Defs = [VPR] in
|
2019-09-03 17:55:30 +08:00
|
|
|
def VMSR_VPR : MovToVFP<0b1100 /* vpr */, (outs), (ins GPR:$Rt),
|
|
|
|
"vmsr", "\tvpr, $Rt", []>;
|
[ARM] Add the non-MVE instructions in Arm v8.1-M.
This adds support for the new family of conditional selection /
increment / negation instructions; the low-overhead branch
instructions (e.g. BF, WLS, DLS); the CLRM instruction to zero a whole
list of registers at once; the new VMRS/VMSR and VLDR/VSTR
instructions to get data in and out of 8.1-M system registers,
particularly including the new VPR register used by MVE vector
predication.
To support this, we also add a register name 'zr' (used by the CSEL
family to force one of the inputs to the constant 0), and operand
types for lists of registers that are also allowed to include APSR or
VPR (used by CLRM). The VLDR/VSTR instructions also need a new
addressing mode.
The low-overhead branch instructions exist in their own separate
architecture extension, which we treat as enabled by default, but you
can say -mattr=-lob or equivalent to turn it off.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Reviewed By: samparker
Subscribers: miyuki, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62667
llvm-svn: 363039
2019-06-11 17:29:18 +08:00
|
|
|
|
2019-09-03 17:55:30 +08:00
|
|
|
def VMSR_P0 : MovToVFP<0b1101 /* p0 */, (outs VCCR:$cond), (ins GPR:$Rt),
|
|
|
|
"vmsr", "\tp0, $Rt", []>;
|
[ARM] Add the non-MVE instructions in Arm v8.1-M.
This adds support for the new family of conditional selection /
increment / negation instructions; the low-overhead branch
instructions (e.g. BF, WLS, DLS); the CLRM instruction to zero a whole
list of registers at once; the new VMRS/VMSR and VLDR/VSTR
instructions to get data in and out of 8.1-M system registers,
particularly including the new VPR register used by MVE vector
predication.
To support this, we also add a register name 'zr' (used by the CSEL
family to force one of the inputs to the constant 0), and operand
types for lists of registers that are also allowed to include APSR or
VPR (used by CLRM). The VLDR/VSTR instructions also need a new
addressing mode.
The low-overhead branch instructions exist in their own separate
architecture extension, which we treat as enabled by default, but you
can say -mattr=-lob or equivalent to turn it off.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Reviewed By: samparker
Subscribers: miyuki, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62667
llvm-svn: 363039
2019-06-11 17:29:18 +08:00
|
|
|
}
|
2011-01-19 05:58:20 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Misc.
|
|
|
|
//
|
|
|
|
|
2009-10-28 09:44:26 +08:00
|
|
|
// Materialize FP immediates. VFP3 only.
|
2009-11-09 08:11:35 +08:00
|
|
|
let isReMaterializable = 1 in {
|
2010-10-14 10:33:26 +08:00
|
|
|
def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm),
|
2010-04-08 02:19:56 +08:00
|
|
|
VFPMiscFrm, IIC_fpUNA64,
|
2010-10-14 10:33:26 +08:00
|
|
|
"vmov", ".f64\t$Dd, $imm",
|
2013-10-24 23:49:39 +08:00
|
|
|
[(set DPR:$Dd, vfp_f64imm:$imm)]>,
|
|
|
|
Requires<[HasVFP3,HasDPVFP]> {
|
2011-09-30 08:50:06 +08:00
|
|
|
bits<5> Dd;
|
|
|
|
bits<8> imm;
|
2010-10-14 10:33:26 +08:00
|
|
|
|
2009-10-28 09:44:26 +08:00
|
|
|
let Inst{27-23} = 0b11101;
|
2011-09-30 08:50:06 +08:00
|
|
|
let Inst{22} = Dd{4};
|
2009-10-28 09:44:26 +08:00
|
|
|
let Inst{21-20} = 0b11;
|
2011-09-30 08:50:06 +08:00
|
|
|
let Inst{19-16} = imm{7-4};
|
|
|
|
let Inst{15-12} = Dd{3-0};
|
2009-10-28 09:44:26 +08:00
|
|
|
let Inst{11-9} = 0b101;
|
2010-10-14 10:33:26 +08:00
|
|
|
let Inst{8} = 1; // Double precision.
|
2009-10-28 09:44:26 +08:00
|
|
|
let Inst{7-4} = 0b0000;
|
2011-09-30 08:50:06 +08:00
|
|
|
let Inst{3-0} = imm{3-0};
|
2009-10-28 09:44:26 +08:00
|
|
|
}
|
|
|
|
|
2010-10-14 10:33:26 +08:00
|
|
|
def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm),
|
|
|
|
VFPMiscFrm, IIC_fpUNA32,
|
|
|
|
"vmov", ".f32\t$Sd, $imm",
|
|
|
|
[(set SPR:$Sd, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
|
2011-09-30 08:50:06 +08:00
|
|
|
bits<5> Sd;
|
|
|
|
bits<8> imm;
|
2010-10-14 10:33:26 +08:00
|
|
|
|
2009-10-28 09:44:26 +08:00
|
|
|
let Inst{27-23} = 0b11101;
|
2011-09-30 08:50:06 +08:00
|
|
|
let Inst{22} = Sd{0};
|
2009-10-28 09:44:26 +08:00
|
|
|
let Inst{21-20} = 0b11;
|
2011-09-30 08:50:06 +08:00
|
|
|
let Inst{19-16} = imm{7-4};
|
|
|
|
let Inst{15-12} = Sd{4-1};
|
2009-10-28 09:44:26 +08:00
|
|
|
let Inst{11-9} = 0b101;
|
2010-10-14 10:33:26 +08:00
|
|
|
let Inst{8} = 0; // Single precision.
|
2009-10-28 09:44:26 +08:00
|
|
|
let Inst{7-4} = 0b0000;
|
2011-09-30 08:50:06 +08:00
|
|
|
let Inst{3-0} = imm{3-0};
|
2009-10-28 09:44:26 +08:00
|
|
|
}
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2018-02-07 16:37:17 +08:00
|
|
|
def FCONSTH : VFPAI<(outs HPR:$Sd), (ins vfp_f16imm:$imm),
|
2016-01-25 18:26:26 +08:00
|
|
|
VFPMiscFrm, IIC_fpUNA16,
|
|
|
|
"vmov", ".f16\t$Sd, $imm",
|
2018-02-07 16:37:17 +08:00
|
|
|
[(set HPR:$Sd, vfp_f16imm:$imm)]>,
|
|
|
|
Requires<[HasFullFP16]> {
|
2016-01-25 18:26:26 +08:00
|
|
|
bits<5> Sd;
|
|
|
|
bits<8> imm;
|
|
|
|
|
|
|
|
let Inst{27-23} = 0b11101;
|
|
|
|
let Inst{22} = Sd{0};
|
|
|
|
let Inst{21-20} = 0b11;
|
|
|
|
let Inst{19-16} = imm{7-4};
|
|
|
|
let Inst{15-12} = Sd{4-1};
|
|
|
|
let Inst{11-8} = 0b1001; // Half precision
|
|
|
|
let Inst{7-4} = 0b0000;
|
|
|
|
let Inst{3-0} = imm{3-0};
|
[ARM] Make fullfp16 instructions not conditionalisable.
More or less all the instructions defined in the v8.2a full-fp16
extension are defined as UNPREDICTABLE if you put them in an IT block
(Thumb) or use with any condition other than AL (ARM). LLVM didn't
know that, and was happy to conditionalise them.
In order to force these instructions to count as not predicable, I had
to make a small Tablegen change. The code generation back end mostly
decides if an instruction was predicable by looking for something it
can identify as a predicate operand; there's an isPredicable bit flag
that overrides that check in the positive direction, but nothing that
overrides it in the negative direction.
(I considered the alternative approach of actually removing the
predicate operand from those instructions, but thought that it would
be more painful overall for instructions differing only in data type
to have different shapes of operand list. This way, the only code that
has to notice the difference is the if-converter.)
So I've added an isUnpredicable bit alongside isPredicable, and set
that bit on the right subset of FP16 instructions, and also on the
VSEL, VMAXNM/VMINNM and VRINT[ANPM] families which should be
unpredicable for all data types.
I've included a couple of representative regression tests, both of
which previously caused an fp16 instruction to be conditionalised in
ARM state and (with -arm-no-restrict-it) to be put in an IT block in
Thumb.
Reviewers: SjoerdMeijer, t.p.northover, efriedma
Reviewed By: efriedma
Subscribers: jdoerfert, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57823
llvm-svn: 354768
2019-02-25 18:39:53 +08:00
|
|
|
|
|
|
|
let isUnpredicable = 1;
|
2016-01-25 18:26:26 +08:00
|
|
|
}
|
2009-11-09 08:11:35 +08:00
|
|
|
}
|
2011-10-04 05:12:43 +08:00
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Assembler aliases.
|
|
|
|
//
|
2013-12-30 01:58:31 +08:00
|
|
|
// A few mnemonic aliases for pre-unifixed syntax. We don't guarantee to
|
2011-12-08 08:49:29 +08:00
|
|
|
// support them all, but supporting at least some of the basics is
|
|
|
|
// good to be friendly.
|
2011-12-10 07:34:09 +08:00
|
|
|
def : VFP2MnemonicAlias<"flds", "vldr">;
|
|
|
|
def : VFP2MnemonicAlias<"fldd", "vldr">;
|
|
|
|
def : VFP2MnemonicAlias<"fmrs", "vmov">;
|
|
|
|
def : VFP2MnemonicAlias<"fmsr", "vmov">;
|
|
|
|
def : VFP2MnemonicAlias<"fsqrts", "vsqrt">;
|
|
|
|
def : VFP2MnemonicAlias<"fsqrtd", "vsqrt">;
|
|
|
|
def : VFP2MnemonicAlias<"fadds", "vadd.f32">;
|
|
|
|
def : VFP2MnemonicAlias<"faddd", "vadd.f64">;
|
|
|
|
def : VFP2MnemonicAlias<"fmrdd", "vmov">;
|
|
|
|
def : VFP2MnemonicAlias<"fmrds", "vmov">;
|
|
|
|
def : VFP2MnemonicAlias<"fmrrd", "vmov">;
|
|
|
|
def : VFP2MnemonicAlias<"fmdrr", "vmov">;
|
2011-12-20 03:43:50 +08:00
|
|
|
def : VFP2MnemonicAlias<"fmuls", "vmul.f32">;
|
2011-12-10 07:34:09 +08:00
|
|
|
def : VFP2MnemonicAlias<"fmuld", "vmul.f64">;
|
|
|
|
def : VFP2MnemonicAlias<"fnegs", "vneg.f32">;
|
|
|
|
def : VFP2MnemonicAlias<"fnegd", "vneg.f64">;
|
2011-12-10 08:01:02 +08:00
|
|
|
def : VFP2MnemonicAlias<"ftosizd", "vcvt.s32.f64">;
|
|
|
|
def : VFP2MnemonicAlias<"ftosid", "vcvtr.s32.f64">;
|
|
|
|
def : VFP2MnemonicAlias<"ftosizs", "vcvt.s32.f32">;
|
|
|
|
def : VFP2MnemonicAlias<"ftosis", "vcvtr.s32.f32">;
|
|
|
|
def : VFP2MnemonicAlias<"ftouizd", "vcvt.u32.f64">;
|
|
|
|
def : VFP2MnemonicAlias<"ftouid", "vcvtr.u32.f64">;
|
|
|
|
def : VFP2MnemonicAlias<"ftouizs", "vcvt.u32.f32">;
|
|
|
|
def : VFP2MnemonicAlias<"ftouis", "vcvtr.u32.f32">;
|
|
|
|
def : VFP2MnemonicAlias<"fsitod", "vcvt.f64.s32">;
|
|
|
|
def : VFP2MnemonicAlias<"fsitos", "vcvt.f32.s32">;
|
|
|
|
def : VFP2MnemonicAlias<"fuitod", "vcvt.f64.u32">;
|
|
|
|
def : VFP2MnemonicAlias<"fuitos", "vcvt.f32.u32">;
|
2011-12-14 04:13:48 +08:00
|
|
|
def : VFP2MnemonicAlias<"fsts", "vstr">;
|
|
|
|
def : VFP2MnemonicAlias<"fstd", "vstr">;
|
2011-12-14 04:40:37 +08:00
|
|
|
def : VFP2MnemonicAlias<"fmacd", "vmla.f64">;
|
|
|
|
def : VFP2MnemonicAlias<"fmacs", "vmla.f32">;
|
2011-12-20 03:02:41 +08:00
|
|
|
def : VFP2MnemonicAlias<"fcpys", "vmov.f32">;
|
|
|
|
def : VFP2MnemonicAlias<"fcpyd", "vmov.f64">;
|
2011-12-23 03:20:45 +08:00
|
|
|
def : VFP2MnemonicAlias<"fcmps", "vcmp.f32">;
|
|
|
|
def : VFP2MnemonicAlias<"fcmpd", "vcmp.f64">;
|
2011-12-20 03:02:41 +08:00
|
|
|
def : VFP2MnemonicAlias<"fdivs", "vdiv.f32">;
|
|
|
|
def : VFP2MnemonicAlias<"fdivd", "vdiv.f64">;
|
2012-03-17 05:06:13 +08:00
|
|
|
def : VFP2MnemonicAlias<"fmrx", "vmrs">;
|
|
|
|
def : VFP2MnemonicAlias<"fmxr", "vmsr">;
|
2011-12-08 08:49:29 +08:00
|
|
|
|
2012-03-16 04:48:18 +08:00
|
|
|
// Be friendly and accept the old form of zero-compare
|
2013-10-24 23:49:39 +08:00
|
|
|
def : VFP2DPInstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>;
|
2012-03-16 04:48:18 +08:00
|
|
|
def : VFP2InstAlias<"fcmpzs${p} $val", (VCMPZS SPR:$val, pred:$p)>;
|
|
|
|
|
|
|
|
|
2019-05-30 20:37:05 +08:00
|
|
|
def : InstAlias<"fmstat${p}", (FMSTAT pred:$p), 0>, Requires<[HasFPRegs]>;
|
2011-12-10 08:01:02 +08:00
|
|
|
def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm",
|
|
|
|
(VADDS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
|
2013-10-24 23:49:39 +08:00
|
|
|
def : VFP2DPInstAlias<"faddd${p} $Dd, $Dn, $Dm",
|
|
|
|
(VADDD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
|
2011-12-10 08:01:02 +08:00
|
|
|
def : VFP2InstAlias<"fsubs${p} $Sd, $Sn, $Sm",
|
|
|
|
(VSUBS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
|
2013-10-24 23:49:39 +08:00
|
|
|
def : VFP2DPInstAlias<"fsubd${p} $Dd, $Dn, $Dm",
|
|
|
|
(VSUBD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
|
2011-10-04 05:12:43 +08:00
|
|
|
|
2011-12-09 06:51:25 +08:00
|
|
|
// No need for the size suffix on VSQRT. It's implied by the register classes.
|
|
|
|
def : VFP2InstAlias<"vsqrt${p} $Sd, $Sm", (VSQRTS SPR:$Sd, SPR:$Sm, pred:$p)>;
|
2013-10-24 23:49:39 +08:00
|
|
|
def : VFP2DPInstAlias<"vsqrt${p} $Dd, $Dm", (VSQRTD DPR:$Dd, DPR:$Dm, pred:$p)>;
|
2011-12-09 06:51:25 +08:00
|
|
|
|
2011-11-15 07:03:21 +08:00
|
|
|
// VLDR/VSTR accept an optional type suffix.
|
2011-12-07 09:50:36 +08:00
|
|
|
def : VFP2InstAlias<"vldr${p}.32 $Sd, $addr",
|
|
|
|
(VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
|
|
|
|
def : VFP2InstAlias<"vstr${p}.32 $Sd, $addr",
|
|
|
|
(VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
|
|
|
|
def : VFP2InstAlias<"vldr${p}.64 $Dd, $addr",
|
|
|
|
(VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
|
|
|
|
def : VFP2InstAlias<"vstr${p}.64 $Dd, $addr",
|
|
|
|
(VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
|
2011-11-16 04:14:51 +08:00
|
|
|
|
2011-12-22 07:24:15 +08:00
|
|
|
// VMOV can accept optional 32-bit or less data type suffix suffix.
|
|
|
|
def : VFP2InstAlias<"vmov${p}.8 $Rt, $Sn",
|
2011-11-16 04:29:42 +08:00
|
|
|
(VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>;
|
2011-12-22 07:24:15 +08:00
|
|
|
def : VFP2InstAlias<"vmov${p}.16 $Rt, $Sn",
|
|
|
|
(VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>;
|
|
|
|
def : VFP2InstAlias<"vmov${p}.32 $Rt, $Sn",
|
|
|
|
(VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>;
|
|
|
|
def : VFP2InstAlias<"vmov${p}.8 $Sn, $Rt",
|
|
|
|
(VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>;
|
|
|
|
def : VFP2InstAlias<"vmov${p}.16 $Sn, $Rt",
|
|
|
|
(VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>;
|
|
|
|
def : VFP2InstAlias<"vmov${p}.32 $Sn, $Rt",
|
2011-11-16 04:29:42 +08:00
|
|
|
(VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>;
|
|
|
|
|
|
|
|
def : VFP2InstAlias<"vmov${p}.f64 $Rt, $Rt2, $Dn",
|
|
|
|
(VMOVRRD GPR:$Rt, GPR:$Rt2, DPR:$Dn, pred:$p)>;
|
|
|
|
def : VFP2InstAlias<"vmov${p}.f64 $Dn, $Rt, $Rt2",
|
|
|
|
(VMOVDRR DPR:$Dn, GPR:$Rt, GPR:$Rt2, pred:$p)>;
|
2011-11-16 05:18:35 +08:00
|
|
|
|
|
|
|
// VMOVS doesn't need the .f32 to disambiguate from the NEON encoding the way
|
|
|
|
// VMOVD does.
|
|
|
|
def : VFP2InstAlias<"vmov${p} $Sd, $Sm",
|
|
|
|
(VMOVS SPR:$Sd, SPR:$Sm, pred:$p)>;
|
2014-01-08 02:19:23 +08:00
|
|
|
|
|
|
|
// FCONSTD/FCONSTS alias for vmov.f64/vmov.f32
|
|
|
|
// These aliases provide added functionality over vmov.f instructions by
|
|
|
|
// allowing users to write assembly containing encoded floating point constants
|
|
|
|
// (e.g. #0x70 vs #1.0). Without these alises there is no way for the
|
|
|
|
// assembler to accept encoded fp constants (but the equivalent fp-literal is
|
|
|
|
// accepted directly by vmovf).
|
|
|
|
def : VFP3InstAlias<"fconstd${p} $Dd, $val",
|
|
|
|
(FCONSTD DPR:$Dd, vfp_f64imm:$val, pred:$p)>;
|
|
|
|
def : VFP3InstAlias<"fconsts${p} $Sd, $val",
|
|
|
|
(FCONSTS SPR:$Sd, vfp_f32imm:$val, pred:$p)>;
|
[ARM] Add the non-MVE instructions in Arm v8.1-M.
This adds support for the new family of conditional selection /
increment / negation instructions; the low-overhead branch
instructions (e.g. BF, WLS, DLS); the CLRM instruction to zero a whole
list of registers at once; the new VMRS/VMSR and VLDR/VSTR
instructions to get data in and out of 8.1-M system registers,
particularly including the new VPR register used by MVE vector
predication.
To support this, we also add a register name 'zr' (used by the CSEL
family to force one of the inputs to the constant 0), and operand
types for lists of registers that are also allowed to include APSR or
VPR (used by CLRM). The VLDR/VSTR instructions also need a new
addressing mode.
The low-overhead branch instructions exist in their own separate
architecture extension, which we treat as enabled by default, but you
can say -mattr=-lob or equivalent to turn it off.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Reviewed By: samparker
Subscribers: miyuki, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62667
llvm-svn: 363039
2019-06-11 17:29:18 +08:00
|
|
|
|
|
|
|
def VSCCLRMD : VFPXI<(outs), (ins pred:$p, fp_dreglist_with_vpr:$regs, variable_ops),
|
|
|
|
AddrModeNone, 4, IndexModeNone, VFPMiscFrm, NoItinerary,
|
|
|
|
"vscclrm{$p}\t$regs", "", []>, Sched<[]> {
|
|
|
|
bits<13> regs;
|
|
|
|
let Inst{31-23} = 0b111011001;
|
|
|
|
let Inst{22} = regs{12};
|
|
|
|
let Inst{21-16} = 0b011111;
|
|
|
|
let Inst{15-12} = regs{11-8};
|
|
|
|
let Inst{11-8} = 0b1011;
|
[MC][ARM] vscclrm disassembles as vldmia
Happens only when the mve.fp subtarget feature is enabled:
$ llvm-mc -triple thumbv8.1m.main -mattr=+mve.fp,+8msecext -disassemble <<< "0x9f,0xec,0x08,0x0b"
.text
vldmia pc, {d0, d1, d2, d3}
$ llvm-mc -triple thumbv8.1m.main -mattr=+8msecext -disassemble <<< "0x9f,0xec,0x08,0x0b"
.text
vscclrm {d0, d1, d2, d3, vpr}
Assembling returns the correct encoding with or without mve.fp:
$ llvm-mc -triple thumbv8.1m.main -mattr=+mve.fp,+8msecext -show-encoding <<< "vscclrm {d0-d3, vpr}"
.text
vscclrm {d0, d1, d2, d3, vpr} @ encoding: [0x9f,0xec,0x08,0x0b]
$ llvm-mc -triple thumbv8.1m.main -mattr=+8msecext -show-encoding <<< "vscclrm {d0-d3, vpr}"
.text
vscclrm {d0, d1, d2, d3, vpr} @ encoding: [0x9f,0xec,0x08,0x0b]
The problem seems to be in the TableGen description of VSCCLRMD.
The least significant bit should be set to zero.
Differential Revision: https://reviews.llvm.org/D68025
llvm-svn: 373052
2019-09-27 16:22:24 +08:00
|
|
|
let Inst{7-1} = regs{7-1};
|
|
|
|
let Inst{0} = 0;
|
[ARM] Add the non-MVE instructions in Arm v8.1-M.
This adds support for the new family of conditional selection /
increment / negation instructions; the low-overhead branch
instructions (e.g. BF, WLS, DLS); the CLRM instruction to zero a whole
list of registers at once; the new VMRS/VMSR and VLDR/VSTR
instructions to get data in and out of 8.1-M system registers,
particularly including the new VPR register used by MVE vector
predication.
To support this, we also add a register name 'zr' (used by the CSEL
family to force one of the inputs to the constant 0), and operand
types for lists of registers that are also allowed to include APSR or
VPR (used by CLRM). The VLDR/VSTR instructions also need a new
addressing mode.
The low-overhead branch instructions exist in their own separate
architecture extension, which we treat as enabled by default, but you
can say -mattr=-lob or equivalent to turn it off.
Reviewers: dmgreen, samparker, SjoerdMeijer, t.p.northover
Reviewed By: samparker
Subscribers: miyuki, javed.absar, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62667
llvm-svn: 363039
2019-06-11 17:29:18 +08:00
|
|
|
|
|
|
|
let DecoderMethod = "DecodeVSCCLRM";
|
|
|
|
|
|
|
|
list<Predicate> Predicates = [HasV8_1MMainline, Has8MSecExt];
|
|
|
|
}
|
|
|
|
|
|
|
|
def VSCCLRMS : VFPXI<(outs), (ins pred:$p, fp_sreglist_with_vpr:$regs, variable_ops),
|
|
|
|
AddrModeNone, 4, IndexModeNone, VFPMiscFrm, NoItinerary,
|
|
|
|
"vscclrm{$p}\t$regs", "", []>, Sched<[]> {
|
|
|
|
bits<13> regs;
|
|
|
|
let Inst{31-23} = 0b111011001;
|
|
|
|
let Inst{22} = regs{8};
|
|
|
|
let Inst{21-16} = 0b011111;
|
|
|
|
let Inst{15-12} = regs{12-9};
|
|
|
|
let Inst{11-8} = 0b1010;
|
|
|
|
let Inst{7-0} = regs{7-0};
|
|
|
|
|
|
|
|
let DecoderMethod = "DecodeVSCCLRM";
|
|
|
|
|
|
|
|
list<Predicate> Predicates = [HasV8_1MMainline, Has8MSecExt];
|
|
|
|
}
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Store VFP System Register to memory.
|
|
|
|
//
|
|
|
|
|
|
|
|
class vfp_vstrldr<bit opc, bit P, bit W, bits<4> SysReg, string sysreg,
|
|
|
|
dag oops, dag iops, IndexMode im, string Dest, string cstr>
|
|
|
|
: VFPI<oops, iops, AddrModeT2_i7s4, 4, im, VFPLdStFrm, IIC_fpSTAT,
|
|
|
|
!if(opc,"vldr","vstr"), !strconcat("\t", sysreg, ", ", Dest), cstr, []>,
|
|
|
|
Sched<[]> {
|
|
|
|
bits<12> addr;
|
|
|
|
let Inst{27-25} = 0b110;
|
|
|
|
let Inst{24} = P;
|
|
|
|
let Inst{23} = addr{7};
|
|
|
|
let Inst{22} = SysReg{3};
|
|
|
|
let Inst{21} = W;
|
|
|
|
let Inst{20} = opc;
|
|
|
|
let Inst{19-16} = addr{11-8};
|
|
|
|
let Inst{15-13} = SysReg{2-0};
|
|
|
|
let Inst{12-7} = 0b011111;
|
|
|
|
let Inst{6-0} = addr{6-0};
|
|
|
|
list<Predicate> Predicates = [HasFPRegs, HasV8_1MMainline];
|
|
|
|
let mayLoad = opc;
|
|
|
|
let mayStore = !if(opc, 0b0, 0b1);
|
|
|
|
let hasSideEffects = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
multiclass vfp_vstrldr_sysreg<bit opc, bits<4> SysReg, string sysreg,
|
|
|
|
dag oops=(outs), dag iops=(ins)> {
|
|
|
|
def _off :
|
|
|
|
vfp_vstrldr<opc, 1, 0, SysReg, sysreg,
|
|
|
|
oops, !con(iops, (ins t2addrmode_imm7s4:$addr)),
|
|
|
|
IndexModePost, "$addr", "" > {
|
|
|
|
let DecoderMethod = "DecodeVSTRVLDR_SYSREG<false>";
|
|
|
|
}
|
|
|
|
|
|
|
|
def _pre :
|
|
|
|
vfp_vstrldr<opc, 1, 1, SysReg, sysreg,
|
|
|
|
!con(oops, (outs GPRnopc:$wb)),
|
|
|
|
!con(iops, (ins t2addrmode_imm7s4_pre:$addr)),
|
|
|
|
IndexModePre, "$addr!", "$addr.base = $wb"> {
|
|
|
|
let DecoderMethod = "DecodeVSTRVLDR_SYSREG<true>";
|
|
|
|
}
|
|
|
|
|
|
|
|
def _post :
|
|
|
|
vfp_vstrldr<opc, 0, 1, SysReg, sysreg,
|
|
|
|
!con(oops, (outs GPRnopc:$wb)),
|
|
|
|
!con(iops, (ins t2_addr_offset_none:$Rn,
|
|
|
|
t2am_imm7s4_offset:$addr)),
|
|
|
|
IndexModePost, "$Rn$addr", "$Rn.base = $wb"> {
|
|
|
|
bits<4> Rn;
|
|
|
|
let Inst{19-16} = Rn{3-0};
|
|
|
|
let DecoderMethod = "DecodeVSTRVLDR_SYSREG<true>";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
let Defs = [FPSCR] in {
|
|
|
|
defm VSTR_FPSCR : vfp_vstrldr_sysreg<0b0,0b0001, "fpscr">;
|
|
|
|
defm VSTR_FPSCR_NZCVQC : vfp_vstrldr_sysreg<0b0,0b0010, "fpscr_nzcvqc">;
|
|
|
|
|
|
|
|
let Predicates = [HasV8_1MMainline, Has8MSecExt] in {
|
|
|
|
defm VSTR_FPCXTNS : vfp_vstrldr_sysreg<0b0,0b1110, "fpcxtns">;
|
|
|
|
defm VSTR_FPCXTS : vfp_vstrldr_sysreg<0b0,0b1111, "fpcxts">;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
let Predicates = [HasV8_1MMainline, HasMVEInt] in {
|
|
|
|
let Uses = [VPR] in {
|
|
|
|
defm VSTR_VPR : vfp_vstrldr_sysreg<0b0,0b1100, "vpr">;
|
|
|
|
}
|
|
|
|
defm VSTR_P0 : vfp_vstrldr_sysreg<0b0,0b1101, "p0",
|
|
|
|
(outs), (ins VCCR:$P0)>;
|
|
|
|
}
|
|
|
|
|
|
|
|
let Uses = [FPSCR] in {
|
|
|
|
defm VLDR_FPSCR : vfp_vstrldr_sysreg<0b1,0b0001, "fpscr">;
|
|
|
|
defm VLDR_FPSCR_NZCVQC : vfp_vstrldr_sysreg<0b1,0b0010, "fpscr_nzcvqc">;
|
|
|
|
|
|
|
|
let Predicates = [HasV8_1MMainline, Has8MSecExt] in {
|
|
|
|
defm VLDR_FPCXTNS : vfp_vstrldr_sysreg<0b1,0b1110, "fpcxtns">;
|
|
|
|
defm VLDR_FPCXTS : vfp_vstrldr_sysreg<0b1,0b1111, "fpcxts">;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
let Predicates = [HasV8_1MMainline, HasMVEInt] in {
|
|
|
|
let Defs = [VPR] in {
|
|
|
|
defm VLDR_VPR : vfp_vstrldr_sysreg<0b1,0b1100, "vpr">;
|
|
|
|
}
|
|
|
|
defm VLDR_P0 : vfp_vstrldr_sysreg<0b1,0b1101, "p0",
|
|
|
|
(outs VCCR:$P0), (ins)>;
|
|
|
|
}
|