2012-02-18 20:03:15 +08:00
|
|
|
//===-- ARMInstrVFP.td - VFP support for ARM ---------------*- tablegen -*-===//
|
2007-01-19 15:51:42 +08:00
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
2007-12-30 04:36:04 +08:00
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
2007-01-19 15:51:42 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
2008-09-12 05:41:29 +08:00
|
|
|
// This file describes the ARM VFP instruction set.
|
2007-01-19 15:51:42 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2017-02-13 20:32:47 +08:00
|
|
|
def SDT_CMPFP0 : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisVT<1, i32>]>;
|
2010-10-16 05:50:45 +08:00
|
|
|
def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
|
|
|
|
SDTCisSameAs<1, 2>]>;
|
2017-03-15 02:43:37 +08:00
|
|
|
def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
|
|
|
|
SDTCisVT<2, f64>]>;
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2010-12-24 02:28:41 +08:00
|
|
|
def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>;
|
2017-02-13 20:32:47 +08:00
|
|
|
def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMFCmp, [SDNPOutGlue]>;
|
2010-12-24 02:28:41 +08:00
|
|
|
def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
|
2010-10-16 05:50:45 +08:00
|
|
|
def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
|
2017-03-15 02:43:37 +08:00
|
|
|
def arm_fmrrd : SDNode<"ARMISD::VMOVRRD", SDT_VMOVRRD>;
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2018-01-31 18:18:29 +08:00
|
|
|
def SDT_VMOVhr : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, i32>] >;
|
|
|
|
def SDT_VMOVrh : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisFP<1>] >;
|
|
|
|
def arm_vmovhr : SDNode<"ARMISD::VMOVhr", SDT_VMOVhr>;
|
|
|
|
def arm_vmovrh : SDNode<"ARMISD::VMOVrh", SDT_VMOVrh>;
|
|
|
|
|
2009-10-28 09:44:26 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Operand Definitions.
|
|
|
|
//
|
|
|
|
|
2011-10-04 07:38:36 +08:00
|
|
|
// 8-bit floating-point immediate encodings.
|
|
|
|
def FPImmOperand : AsmOperandClass {
|
|
|
|
let Name = "FPImm";
|
|
|
|
let ParserMethod = "parseFPImm";
|
|
|
|
}
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def vfp_f16imm : Operand<f16>,
|
|
|
|
PatLeaf<(f16 fpimm), [{
|
|
|
|
return ARM_AM::getFP16Imm(N->getValueAPF()) != -1;
|
|
|
|
}], SDNodeXForm<fpimm, [{
|
|
|
|
APFloat InVal = N->getValueAPF();
|
|
|
|
uint32_t enc = ARM_AM::getFP16Imm(InVal);
|
|
|
|
return CurDAG->getTargetConstant(enc, MVT::i32);
|
|
|
|
}]>> {
|
|
|
|
let PrintMethod = "printFPImmOperand";
|
|
|
|
let ParserMatchClass = FPImmOperand;
|
|
|
|
}
|
|
|
|
|
2009-10-28 09:44:26 +08:00
|
|
|
def vfp_f32imm : Operand<f32>,
|
|
|
|
PatLeaf<(f32 fpimm), [{
|
2011-09-30 08:50:06 +08:00
|
|
|
return ARM_AM::getFP32Imm(N->getValueAPF()) != -1;
|
|
|
|
}], SDNodeXForm<fpimm, [{
|
|
|
|
APFloat InVal = N->getValueAPF();
|
|
|
|
uint32_t enc = ARM_AM::getFP32Imm(InVal);
|
2015-04-28 22:05:47 +08:00
|
|
|
return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
|
2011-09-30 08:50:06 +08:00
|
|
|
}]>> {
|
|
|
|
let PrintMethod = "printFPImmOperand";
|
2011-10-04 07:38:36 +08:00
|
|
|
let ParserMatchClass = FPImmOperand;
|
2009-10-28 09:44:26 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
def vfp_f64imm : Operand<f64>,
|
|
|
|
PatLeaf<(f64 fpimm), [{
|
2011-09-30 08:50:06 +08:00
|
|
|
return ARM_AM::getFP64Imm(N->getValueAPF()) != -1;
|
|
|
|
}], SDNodeXForm<fpimm, [{
|
|
|
|
APFloat InVal = N->getValueAPF();
|
|
|
|
uint32_t enc = ARM_AM::getFP64Imm(InVal);
|
2015-04-28 22:05:47 +08:00
|
|
|
return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
|
2011-09-30 08:50:06 +08:00
|
|
|
}]>> {
|
|
|
|
let PrintMethod = "printFPImmOperand";
|
2011-10-04 07:38:36 +08:00
|
|
|
let ParserMatchClass = FPImmOperand;
|
2009-10-28 09:44:26 +08:00
|
|
|
}
|
|
|
|
|
[ARM] Armv8.2-A FP16 code generation (part 1/3)
This is the groundwork for Armv8.2-A FP16 code generation .
Clang passes and returns _Float16 values as floats, together with the required
bitconverts and truncs etc. to implement correct AAPCS behaviour, see D42318.
We will implement half-precision argument passing/returning lowering in the ARM
backend soon, but for now this means that this:
_Float16 sub(_Float16 a, _Float16 b) {
return a + b;
}
gets lowered to this:
define float @sub(float %a.coerce, float %b.coerce) {
entry:
%0 = bitcast float %a.coerce to i32
%tmp.0.extract.trunc = trunc i32 %0 to i16
%1 = bitcast i16 %tmp.0.extract.trunc to half
<SNIP>
%add = fadd half %1, %3
<SNIP>
}
When FullFP16 is *not* supported, we don't make f16 a legal type, and we get
legalization for "free", i.e. nothing changes and everything works as before.
And also f16 argument passing/returning is handled.
When FullFP16 is supported, we do make f16 a legal type, and have 2 places that
we need to patch up: f16 argument passing and returning, which involves minor
tweaks to avoid unnecessary code generation for some bitcasts.
As a "demonstrator" that this works for the different FP16, FullFP16, softfp
modes, etc., I've added match rules to the VSUB instruction description showing
that we can codegen this instruction from IR, but more importantly, also to
some conversion instructions. These conversions were causing issue before in
the FP16 and FullFP16 cases.
I've also added match rules to the VLDRH and VSTRH desriptions, so that we can
actually compile the entire half-precision sub code example above. This showed
that these loads and stores had the wrong addressing mode specified: AddrMode5
instead of AddrMode5FP16, which turned out not be implemented at all, so that
has also been added.
This is the minimal patch that shows all the different moving parts. In patch
2/3 I will add some efficient lowering of bitcasts, and in 2/3 I will add the
remaining Armv8.2-A FP16 instruction descriptions.
Thanks to Sam Parker and Oliver Stannard for their help and reviews!
Differential Revision: https://reviews.llvm.org/D38315
llvm-svn: 323512
2018-01-26 17:26:40 +08:00
|
|
|
def alignedload16 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
|
|
|
return cast<LoadSDNode>(N)->getAlignment() >= 2;
|
|
|
|
}]>;
|
|
|
|
|
2012-08-16 01:44:53 +08:00
|
|
|
def alignedload32 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
|
|
|
return cast<LoadSDNode>(N)->getAlignment() >= 4;
|
|
|
|
}]>;
|
|
|
|
|
[ARM] Armv8.2-A FP16 code generation (part 1/3)
This is the groundwork for Armv8.2-A FP16 code generation .
Clang passes and returns _Float16 values as floats, together with the required
bitconverts and truncs etc. to implement correct AAPCS behaviour, see D42318.
We will implement half-precision argument passing/returning lowering in the ARM
backend soon, but for now this means that this:
_Float16 sub(_Float16 a, _Float16 b) {
return a + b;
}
gets lowered to this:
define float @sub(float %a.coerce, float %b.coerce) {
entry:
%0 = bitcast float %a.coerce to i32
%tmp.0.extract.trunc = trunc i32 %0 to i16
%1 = bitcast i16 %tmp.0.extract.trunc to half
<SNIP>
%add = fadd half %1, %3
<SNIP>
}
When FullFP16 is *not* supported, we don't make f16 a legal type, and we get
legalization for "free", i.e. nothing changes and everything works as before.
And also f16 argument passing/returning is handled.
When FullFP16 is supported, we do make f16 a legal type, and have 2 places that
we need to patch up: f16 argument passing and returning, which involves minor
tweaks to avoid unnecessary code generation for some bitcasts.
As a "demonstrator" that this works for the different FP16, FullFP16, softfp
modes, etc., I've added match rules to the VSUB instruction description showing
that we can codegen this instruction from IR, but more importantly, also to
some conversion instructions. These conversions were causing issue before in
the FP16 and FullFP16 cases.
I've also added match rules to the VLDRH and VSTRH desriptions, so that we can
actually compile the entire half-precision sub code example above. This showed
that these loads and stores had the wrong addressing mode specified: AddrMode5
instead of AddrMode5FP16, which turned out not be implemented at all, so that
has also been added.
This is the minimal patch that shows all the different moving parts. In patch
2/3 I will add some efficient lowering of bitcasts, and in 2/3 I will add the
remaining Armv8.2-A FP16 instruction descriptions.
Thanks to Sam Parker and Oliver Stannard for their help and reviews!
Differential Revision: https://reviews.llvm.org/D38315
llvm-svn: 323512
2018-01-26 17:26:40 +08:00
|
|
|
def alignedstore16 : PatFrag<(ops node:$val, node:$ptr),
|
|
|
|
(store node:$val, node:$ptr), [{
|
|
|
|
return cast<StoreSDNode>(N)->getAlignment() >= 2;
|
|
|
|
}]>;
|
|
|
|
|
2012-08-16 01:44:53 +08:00
|
|
|
def alignedstore32 : PatFrag<(ops node:$val, node:$ptr),
|
|
|
|
(store node:$val, node:$ptr), [{
|
|
|
|
return cast<StoreSDNode>(N)->getAlignment() >= 4;
|
|
|
|
}]>;
|
|
|
|
|
2011-12-23 06:19:05 +08:00
|
|
|
// The VCVT to/from fixed-point instructions encode the 'fbits' operand
|
|
|
|
// (the number of fixed bits) differently than it appears in the assembly
|
|
|
|
// source. It's encoded as "Size - fbits" where Size is the size of the
|
|
|
|
// fixed-point representation (32 or 16) and fbits is the value appearing
|
|
|
|
// in the assembly source, an integer in [0,16] or (0,32], depending on size.
|
|
|
|
def fbits32_asm_operand : AsmOperandClass { let Name = "FBits32"; }
|
|
|
|
def fbits32 : Operand<i32> {
|
|
|
|
let PrintMethod = "printFBits32";
|
|
|
|
let ParserMatchClass = fbits32_asm_operand;
|
|
|
|
}
|
|
|
|
|
|
|
|
def fbits16_asm_operand : AsmOperandClass { let Name = "FBits16"; }
|
|
|
|
def fbits16 : Operand<i32> {
|
|
|
|
let PrintMethod = "printFBits16";
|
|
|
|
let ParserMatchClass = fbits16_asm_operand;
|
|
|
|
}
|
2009-10-28 09:44:26 +08:00
|
|
|
|
2007-01-19 15:51:42 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Load / store Instructions.
|
|
|
|
//
|
|
|
|
|
2010-02-28 07:47:46 +08:00
|
|
|
let canFoldAsLoad = 1, isReMaterializable = 1 in {
|
2010-11-03 09:49:29 +08:00
|
|
|
|
2010-10-21 06:44:54 +08:00
|
|
|
def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
|
2011-11-15 07:03:21 +08:00
|
|
|
IIC_fpLoad64, "vldr", "\t$Dd, $addr",
|
2012-08-16 01:44:53 +08:00
|
|
|
[(set DPR:$Dd, (f64 (alignedload32 addrmode5:$addr)))]>;
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2010-11-03 09:49:29 +08:00
|
|
|
def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
|
2011-11-15 07:03:21 +08:00
|
|
|
IIC_fpLoad32, "vldr", "\t$Sd, $addr",
|
2015-10-27 05:32:53 +08:00
|
|
|
[(set SPR:$Sd, (alignedload32 addrmode5:$addr))]> {
|
2011-02-16 08:35:02 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and VFP
|
|
|
|
// pipelines.
|
|
|
|
let D = VFPNeonDomain;
|
|
|
|
}
|
2010-11-03 09:49:29 +08:00
|
|
|
|
[ARM] Armv8.2-A FP16 code generation (part 1/3)
This is the groundwork for Armv8.2-A FP16 code generation .
Clang passes and returns _Float16 values as floats, together with the required
bitconverts and truncs etc. to implement correct AAPCS behaviour, see D42318.
We will implement half-precision argument passing/returning lowering in the ARM
backend soon, but for now this means that this:
_Float16 sub(_Float16 a, _Float16 b) {
return a + b;
}
gets lowered to this:
define float @sub(float %a.coerce, float %b.coerce) {
entry:
%0 = bitcast float %a.coerce to i32
%tmp.0.extract.trunc = trunc i32 %0 to i16
%1 = bitcast i16 %tmp.0.extract.trunc to half
<SNIP>
%add = fadd half %1, %3
<SNIP>
}
When FullFP16 is *not* supported, we don't make f16 a legal type, and we get
legalization for "free", i.e. nothing changes and everything works as before.
And also f16 argument passing/returning is handled.
When FullFP16 is supported, we do make f16 a legal type, and have 2 places that
we need to patch up: f16 argument passing and returning, which involves minor
tweaks to avoid unnecessary code generation for some bitcasts.
As a "demonstrator" that this works for the different FP16, FullFP16, softfp
modes, etc., I've added match rules to the VSUB instruction description showing
that we can codegen this instruction from IR, but more importantly, also to
some conversion instructions. These conversions were causing issue before in
the FP16 and FullFP16 cases.
I've also added match rules to the VLDRH and VSTRH desriptions, so that we can
actually compile the entire half-precision sub code example above. This showed
that these loads and stores had the wrong addressing mode specified: AddrMode5
instead of AddrMode5FP16, which turned out not be implemented at all, so that
has also been added.
This is the minimal patch that shows all the different moving parts. In patch
2/3 I will add some efficient lowering of bitcasts, and in 2/3 I will add the
remaining Armv8.2-A FP16 instruction descriptions.
Thanks to Sam Parker and Oliver Stannard for their help and reviews!
Differential Revision: https://reviews.llvm.org/D38315
llvm-svn: 323512
2018-01-26 17:26:40 +08:00
|
|
|
def VLDRH : AHI5<0b1101, 0b01, (outs HPR:$Sd), (ins addrmode5fp16:$addr),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpLoad16, "vldr", ".16\t$Sd, $addr",
|
[ARM] Armv8.2-A FP16 code generation (part 1/3)
This is the groundwork for Armv8.2-A FP16 code generation .
Clang passes and returns _Float16 values as floats, together with the required
bitconverts and truncs etc. to implement correct AAPCS behaviour, see D42318.
We will implement half-precision argument passing/returning lowering in the ARM
backend soon, but for now this means that this:
_Float16 sub(_Float16 a, _Float16 b) {
return a + b;
}
gets lowered to this:
define float @sub(float %a.coerce, float %b.coerce) {
entry:
%0 = bitcast float %a.coerce to i32
%tmp.0.extract.trunc = trunc i32 %0 to i16
%1 = bitcast i16 %tmp.0.extract.trunc to half
<SNIP>
%add = fadd half %1, %3
<SNIP>
}
When FullFP16 is *not* supported, we don't make f16 a legal type, and we get
legalization for "free", i.e. nothing changes and everything works as before.
And also f16 argument passing/returning is handled.
When FullFP16 is supported, we do make f16 a legal type, and have 2 places that
we need to patch up: f16 argument passing and returning, which involves minor
tweaks to avoid unnecessary code generation for some bitcasts.
As a "demonstrator" that this works for the different FP16, FullFP16, softfp
modes, etc., I've added match rules to the VSUB instruction description showing
that we can codegen this instruction from IR, but more importantly, also to
some conversion instructions. These conversions were causing issue before in
the FP16 and FullFP16 cases.
I've also added match rules to the VLDRH and VSTRH desriptions, so that we can
actually compile the entire half-precision sub code example above. This showed
that these loads and stores had the wrong addressing mode specified: AddrMode5
instead of AddrMode5FP16, which turned out not be implemented at all, so that
has also been added.
This is the minimal patch that shows all the different moving parts. In patch
2/3 I will add some efficient lowering of bitcasts, and in 2/3 I will add the
remaining Armv8.2-A FP16 instruction descriptions.
Thanks to Sam Parker and Oliver Stannard for their help and reviews!
Differential Revision: https://reviews.llvm.org/D38315
llvm-svn: 323512
2018-01-26 17:26:40 +08:00
|
|
|
[(set HPR:$Sd, (alignedload16 addrmode5fp16:$addr))]>,
|
2016-01-25 18:26:26 +08:00
|
|
|
Requires<[HasFullFP16]>;
|
|
|
|
|
2010-11-03 09:49:29 +08:00
|
|
|
} // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in'
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2010-11-04 08:59:42 +08:00
|
|
|
def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
|
2011-11-15 07:03:21 +08:00
|
|
|
IIC_fpStore64, "vstr", "\t$Dd, $addr",
|
2012-08-16 01:44:53 +08:00
|
|
|
[(alignedstore32 (f64 DPR:$Dd), addrmode5:$addr)]>;
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2010-11-04 08:59:42 +08:00
|
|
|
def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
|
2011-11-15 07:03:21 +08:00
|
|
|
IIC_fpStore32, "vstr", "\t$Sd, $addr",
|
2015-10-27 05:32:53 +08:00
|
|
|
[(alignedstore32 SPR:$Sd, addrmode5:$addr)]> {
|
2011-02-16 08:35:02 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and VFP
|
|
|
|
// pipelines.
|
|
|
|
let D = VFPNeonDomain;
|
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
[ARM] Armv8.2-A FP16 code generation (part 1/3)
This is the groundwork for Armv8.2-A FP16 code generation .
Clang passes and returns _Float16 values as floats, together with the required
bitconverts and truncs etc. to implement correct AAPCS behaviour, see D42318.
We will implement half-precision argument passing/returning lowering in the ARM
backend soon, but for now this means that this:
_Float16 sub(_Float16 a, _Float16 b) {
return a + b;
}
gets lowered to this:
define float @sub(float %a.coerce, float %b.coerce) {
entry:
%0 = bitcast float %a.coerce to i32
%tmp.0.extract.trunc = trunc i32 %0 to i16
%1 = bitcast i16 %tmp.0.extract.trunc to half
<SNIP>
%add = fadd half %1, %3
<SNIP>
}
When FullFP16 is *not* supported, we don't make f16 a legal type, and we get
legalization for "free", i.e. nothing changes and everything works as before.
And also f16 argument passing/returning is handled.
When FullFP16 is supported, we do make f16 a legal type, and have 2 places that
we need to patch up: f16 argument passing and returning, which involves minor
tweaks to avoid unnecessary code generation for some bitcasts.
As a "demonstrator" that this works for the different FP16, FullFP16, softfp
modes, etc., I've added match rules to the VSUB instruction description showing
that we can codegen this instruction from IR, but more importantly, also to
some conversion instructions. These conversions were causing issue before in
the FP16 and FullFP16 cases.
I've also added match rules to the VLDRH and VSTRH desriptions, so that we can
actually compile the entire half-precision sub code example above. This showed
that these loads and stores had the wrong addressing mode specified: AddrMode5
instead of AddrMode5FP16, which turned out not be implemented at all, so that
has also been added.
This is the minimal patch that shows all the different moving parts. In patch
2/3 I will add some efficient lowering of bitcasts, and in 2/3 I will add the
remaining Armv8.2-A FP16 instruction descriptions.
Thanks to Sam Parker and Oliver Stannard for their help and reviews!
Differential Revision: https://reviews.llvm.org/D38315
llvm-svn: 323512
2018-01-26 17:26:40 +08:00
|
|
|
def VSTRH : AHI5<0b1101, 0b00, (outs), (ins HPR:$Sd, addrmode5fp16:$addr),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpStore16, "vstr", ".16\t$Sd, $addr",
|
[ARM] Armv8.2-A FP16 code generation (part 1/3)
This is the groundwork for Armv8.2-A FP16 code generation .
Clang passes and returns _Float16 values as floats, together with the required
bitconverts and truncs etc. to implement correct AAPCS behaviour, see D42318.
We will implement half-precision argument passing/returning lowering in the ARM
backend soon, but for now this means that this:
_Float16 sub(_Float16 a, _Float16 b) {
return a + b;
}
gets lowered to this:
define float @sub(float %a.coerce, float %b.coerce) {
entry:
%0 = bitcast float %a.coerce to i32
%tmp.0.extract.trunc = trunc i32 %0 to i16
%1 = bitcast i16 %tmp.0.extract.trunc to half
<SNIP>
%add = fadd half %1, %3
<SNIP>
}
When FullFP16 is *not* supported, we don't make f16 a legal type, and we get
legalization for "free", i.e. nothing changes and everything works as before.
And also f16 argument passing/returning is handled.
When FullFP16 is supported, we do make f16 a legal type, and have 2 places that
we need to patch up: f16 argument passing and returning, which involves minor
tweaks to avoid unnecessary code generation for some bitcasts.
As a "demonstrator" that this works for the different FP16, FullFP16, softfp
modes, etc., I've added match rules to the VSUB instruction description showing
that we can codegen this instruction from IR, but more importantly, also to
some conversion instructions. These conversions were causing issue before in
the FP16 and FullFP16 cases.
I've also added match rules to the VLDRH and VSTRH desriptions, so that we can
actually compile the entire half-precision sub code example above. This showed
that these loads and stores had the wrong addressing mode specified: AddrMode5
instead of AddrMode5FP16, which turned out not be implemented at all, so that
has also been added.
This is the minimal patch that shows all the different moving parts. In patch
2/3 I will add some efficient lowering of bitcasts, and in 2/3 I will add the
remaining Armv8.2-A FP16 instruction descriptions.
Thanks to Sam Parker and Oliver Stannard for their help and reviews!
Differential Revision: https://reviews.llvm.org/D38315
llvm-svn: 323512
2018-01-26 17:26:40 +08:00
|
|
|
[(alignedstore16 HPR:$Sd, addrmode5fp16:$addr)]>,
|
2016-01-25 18:26:26 +08:00
|
|
|
Requires<[HasFullFP16]>;
|
|
|
|
|
2007-01-19 15:51:42 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Load / store multiple Instructions.
|
|
|
|
//
|
|
|
|
|
2010-11-16 09:16:36 +08:00
|
|
|
multiclass vfp_ldst_mult<string asm, bit L_bit,
|
|
|
|
InstrItinClass itin, InstrItinClass itin_upd> {
|
|
|
|
// Double Precision
|
|
|
|
def DIA :
|
2010-11-17 12:32:08 +08:00
|
|
|
AXDI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
|
2010-11-13 17:09:38 +08:00
|
|
|
IndexModeNone, itin,
|
2010-11-16 09:16:36 +08:00
|
|
|
!strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> {
|
2010-11-13 17:09:38 +08:00
|
|
|
let Inst{24-23} = 0b01; // Increment After
|
|
|
|
let Inst{21} = 0; // No writeback
|
|
|
|
let Inst{20} = L_bit;
|
|
|
|
}
|
2010-11-16 09:16:36 +08:00
|
|
|
def DIA_UPD :
|
2011-06-14 06:54:22 +08:00
|
|
|
AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs,
|
|
|
|
variable_ops),
|
2010-11-13 17:09:38 +08:00
|
|
|
IndexModeUpd, itin_upd,
|
2010-11-16 09:16:36 +08:00
|
|
|
!strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
|
2010-11-13 17:09:38 +08:00
|
|
|
let Inst{24-23} = 0b01; // Increment After
|
|
|
|
let Inst{21} = 1; // Writeback
|
|
|
|
let Inst{20} = L_bit;
|
|
|
|
}
|
2010-11-16 09:16:36 +08:00
|
|
|
def DDB_UPD :
|
2011-06-14 06:54:22 +08:00
|
|
|
AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs,
|
|
|
|
variable_ops),
|
2010-11-13 17:09:38 +08:00
|
|
|
IndexModeUpd, itin_upd,
|
|
|
|
!strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
|
|
|
|
let Inst{24-23} = 0b10; // Decrement Before
|
|
|
|
let Inst{21} = 1; // Writeback
|
|
|
|
let Inst{20} = L_bit;
|
|
|
|
}
|
|
|
|
|
2010-11-16 09:16:36 +08:00
|
|
|
// Single Precision
|
|
|
|
def SIA :
|
2010-11-17 12:32:08 +08:00
|
|
|
AXSI4<(outs), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
|
2010-11-13 17:09:38 +08:00
|
|
|
IndexModeNone, itin,
|
2010-11-16 09:16:36 +08:00
|
|
|
!strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> {
|
2010-11-13 17:09:38 +08:00
|
|
|
let Inst{24-23} = 0b01; // Increment After
|
|
|
|
let Inst{21} = 0; // No writeback
|
|
|
|
let Inst{20} = L_bit;
|
2011-02-16 08:35:02 +08:00
|
|
|
|
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines.
|
|
|
|
let D = VFPNeonDomain;
|
2010-11-13 17:09:38 +08:00
|
|
|
}
|
2010-11-16 09:16:36 +08:00
|
|
|
def SIA_UPD :
|
2011-06-14 06:54:22 +08:00
|
|
|
AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs,
|
|
|
|
variable_ops),
|
2010-11-13 17:09:38 +08:00
|
|
|
IndexModeUpd, itin_upd,
|
2010-11-16 09:16:36 +08:00
|
|
|
!strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
|
2010-11-13 17:09:38 +08:00
|
|
|
let Inst{24-23} = 0b01; // Increment After
|
|
|
|
let Inst{21} = 1; // Writeback
|
|
|
|
let Inst{20} = L_bit;
|
2011-02-16 08:35:02 +08:00
|
|
|
|
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines.
|
|
|
|
let D = VFPNeonDomain;
|
2010-11-13 17:09:38 +08:00
|
|
|
}
|
2010-11-16 09:16:36 +08:00
|
|
|
def SDB_UPD :
|
2011-06-14 06:54:22 +08:00
|
|
|
AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs,
|
|
|
|
variable_ops),
|
2010-11-13 17:09:38 +08:00
|
|
|
IndexModeUpd, itin_upd,
|
|
|
|
!strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
|
|
|
|
let Inst{24-23} = 0b10; // Decrement Before
|
|
|
|
let Inst{21} = 1; // Writeback
|
|
|
|
let Inst{20} = L_bit;
|
2011-02-16 08:35:02 +08:00
|
|
|
|
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines.
|
|
|
|
let D = VFPNeonDomain;
|
2010-11-13 17:09:38 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-11-26 08:46:26 +08:00
|
|
|
let hasSideEffects = 0 in {
|
2010-11-13 18:57:02 +08:00
|
|
|
|
2010-11-16 09:16:36 +08:00
|
|
|
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
|
|
|
|
defm VLDM : vfp_ldst_mult<"vldm", 1, IIC_fpLoad_m, IIC_fpLoad_mu>;
|
2010-11-13 18:57:02 +08:00
|
|
|
|
2010-11-16 09:16:36 +08:00
|
|
|
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
|
2013-12-30 01:58:27 +08:00
|
|
|
defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpStore_m, IIC_fpStore_mu>;
|
2010-11-13 18:57:02 +08:00
|
|
|
|
2014-11-26 08:46:26 +08:00
|
|
|
} // hasSideEffects
|
2010-11-13 18:57:02 +08:00
|
|
|
|
2010-11-16 10:00:24 +08:00
|
|
|
def : MnemonicAlias<"vldm", "vldmia">;
|
|
|
|
def : MnemonicAlias<"vstm", "vstmia">;
|
|
|
|
|
2016-01-25 19:24:47 +08:00
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Lazy load / store multiple Instructions
|
|
|
|
//
|
|
|
|
let mayLoad = 1 in
|
|
|
|
def VLLDM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
|
|
|
|
IIC_fpLoad_m, "vlldm${p}\t$Rn", "", []>,
|
|
|
|
Requires<[HasV8MMainline, Has8MSecExt]> {
|
|
|
|
let Inst{24-23} = 0b00;
|
|
|
|
let Inst{22} = 0;
|
|
|
|
let Inst{21} = 1;
|
|
|
|
let Inst{20} = 1;
|
|
|
|
let Inst{15-12} = 0;
|
|
|
|
let Inst{7-0} = 0;
|
|
|
|
let mayLoad = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
let mayStore = 1 in
|
|
|
|
def VLSTM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
|
|
|
|
IIC_fpStore_m, "vlstm${p}\t$Rn", "", []>,
|
|
|
|
Requires<[HasV8MMainline, Has8MSecExt]> {
|
|
|
|
let Inst{24-23} = 0b00;
|
|
|
|
let Inst{22} = 0;
|
|
|
|
let Inst{21} = 1;
|
|
|
|
let Inst{20} = 0;
|
|
|
|
let Inst{15-12} = 0;
|
|
|
|
let Inst{7-0} = 0;
|
|
|
|
let mayStore = 1;
|
|
|
|
}
|
|
|
|
|
2016-06-03 21:19:43 +08:00
|
|
|
def : InstAlias<"vpush${p} $r", (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r), 0>,
|
2011-06-28 04:00:07 +08:00
|
|
|
Requires<[HasVFP2]>;
|
2016-06-03 21:19:43 +08:00
|
|
|
def : InstAlias<"vpush${p} $r", (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r), 0>,
|
2011-06-28 04:00:07 +08:00
|
|
|
Requires<[HasVFP2]>;
|
2016-06-03 21:19:43 +08:00
|
|
|
def : InstAlias<"vpop${p} $r", (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r), 0>,
|
2011-06-28 04:00:07 +08:00
|
|
|
Requires<[HasVFP2]>;
|
2016-06-03 21:19:43 +08:00
|
|
|
def : InstAlias<"vpop${p} $r", (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r), 0>,
|
2011-06-28 04:00:07 +08:00
|
|
|
Requires<[HasVFP2]>;
|
2012-03-06 07:16:31 +08:00
|
|
|
defm : VFPDTAnyInstAlias<"vpush${p}", "$r",
|
|
|
|
(VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>;
|
|
|
|
defm : VFPDTAnyInstAlias<"vpush${p}", "$r",
|
|
|
|
(VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r)>;
|
|
|
|
defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
|
|
|
|
(VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>;
|
|
|
|
defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
|
|
|
|
(VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>;
|
2011-06-28 04:00:07 +08:00
|
|
|
|
2013-05-31 23:55:51 +08:00
|
|
|
// FLDMX, FSTMX - Load and store multiple unknown precision registers for
|
|
|
|
// pre-armv6 cores.
|
|
|
|
// These instruction are deprecated so we don't want them to get selected.
|
2017-11-22 00:20:25 +08:00
|
|
|
// However, there is no UAL syntax for them, so we keep them around for
|
|
|
|
// (dis)assembly only.
|
2013-05-31 23:55:51 +08:00
|
|
|
multiclass vfp_ldstx_mult<string asm, bit L_bit> {
|
|
|
|
// Unknown precision
|
|
|
|
def XIA :
|
|
|
|
AXXI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
|
|
|
|
IndexModeNone, !strconcat(asm, "iax${p}\t$Rn, $regs"), "", []> {
|
|
|
|
let Inst{24-23} = 0b01; // Increment After
|
|
|
|
let Inst{21} = 0; // No writeback
|
|
|
|
let Inst{20} = L_bit;
|
|
|
|
}
|
|
|
|
def XIA_UPD :
|
|
|
|
AXXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
|
|
|
|
IndexModeUpd, !strconcat(asm, "iax${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
|
|
|
|
let Inst{24-23} = 0b01; // Increment After
|
|
|
|
let Inst{21} = 1; // Writeback
|
|
|
|
let Inst{20} = L_bit;
|
|
|
|
}
|
|
|
|
def XDB_UPD :
|
|
|
|
AXXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
|
|
|
|
IndexModeUpd, !strconcat(asm, "dbx${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
|
|
|
|
let Inst{24-23} = 0b10; // Decrement Before
|
2013-12-30 01:58:35 +08:00
|
|
|
let Inst{21} = 1; // Writeback
|
2013-05-31 23:55:51 +08:00
|
|
|
let Inst{20} = L_bit;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
defm FLDM : vfp_ldstx_mult<"fldm", 1>;
|
|
|
|
defm FSTM : vfp_ldstx_mult<"fstm", 0>;
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2013-12-30 01:58:35 +08:00
|
|
|
def : VFP2MnemonicAlias<"fldmeax", "fldmdbx">;
|
|
|
|
def : VFP2MnemonicAlias<"fldmfdx", "fldmiax">;
|
|
|
|
|
|
|
|
def : VFP2MnemonicAlias<"fstmeax", "fstmiax">;
|
|
|
|
def : VFP2MnemonicAlias<"fstmfdx", "fstmdbx">;
|
|
|
|
|
2010-10-13 07:06:54 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// FP Binary Operations.
|
|
|
|
//
|
2010-02-09 03:41:48 +08:00
|
|
|
|
2012-04-20 08:15:00 +08:00
|
|
|
let TwoOperandAliasConstraint = "$Dn = $Dd" in
|
2010-11-01 14:00:39 +08:00
|
|
|
def VADDD : ADbI<0b11100, 0b11, 0, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
|
|
|
|
IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>,
|
|
|
|
Sched<[WriteFPALU64]>;
|
2010-11-01 14:00:39 +08:00
|
|
|
|
2012-04-20 08:15:00 +08:00
|
|
|
let TwoOperandAliasConstraint = "$Sn = $Sd" in
|
2010-11-01 14:00:39 +08:00
|
|
|
def VADDS : ASbIn<0b11100, 0b11, 0, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]>,
|
|
|
|
Sched<[WriteFPALU32]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-11-01 14:00:39 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
let TwoOperandAliasConstraint = "$Sn = $Sd" in
|
|
|
|
def VADDH : AHbI<0b11100, 0b11, 0, 0,
|
[ARM] Armv8.2-A FP16 code generation (part 1/3)
This is the groundwork for Armv8.2-A FP16 code generation .
Clang passes and returns _Float16 values as floats, together with the required
bitconverts and truncs etc. to implement correct AAPCS behaviour, see D42318.
We will implement half-precision argument passing/returning lowering in the ARM
backend soon, but for now this means that this:
_Float16 sub(_Float16 a, _Float16 b) {
return a + b;
}
gets lowered to this:
define float @sub(float %a.coerce, float %b.coerce) {
entry:
%0 = bitcast float %a.coerce to i32
%tmp.0.extract.trunc = trunc i32 %0 to i16
%1 = bitcast i16 %tmp.0.extract.trunc to half
<SNIP>
%add = fadd half %1, %3
<SNIP>
}
When FullFP16 is *not* supported, we don't make f16 a legal type, and we get
legalization for "free", i.e. nothing changes and everything works as before.
And also f16 argument passing/returning is handled.
When FullFP16 is supported, we do make f16 a legal type, and have 2 places that
we need to patch up: f16 argument passing and returning, which involves minor
tweaks to avoid unnecessary code generation for some bitcasts.
As a "demonstrator" that this works for the different FP16, FullFP16, softfp
modes, etc., I've added match rules to the VSUB instruction description showing
that we can codegen this instruction from IR, but more importantly, also to
some conversion instructions. These conversions were causing issue before in
the FP16 and FullFP16 cases.
I've also added match rules to the VLDRH and VSTRH desriptions, so that we can
actually compile the entire half-precision sub code example above. This showed
that these loads and stores had the wrong addressing mode specified: AddrMode5
instead of AddrMode5FP16, which turned out not be implemented at all, so that
has also been added.
This is the minimal patch that shows all the different moving parts. In patch
2/3 I will add some efficient lowering of bitcasts, and in 2/3 I will add the
remaining Armv8.2-A FP16 instruction descriptions.
Thanks to Sam Parker and Oliver Stannard for their help and reviews!
Differential Revision: https://reviews.llvm.org/D38315
llvm-svn: 323512
2018-01-26 17:26:40 +08:00
|
|
|
(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpALU16, "vadd", ".f16\t$Sd, $Sn, $Sm",
|
[ARM] Armv8.2-A FP16 code generation (part 1/3)
This is the groundwork for Armv8.2-A FP16 code generation .
Clang passes and returns _Float16 values as floats, together with the required
bitconverts and truncs etc. to implement correct AAPCS behaviour, see D42318.
We will implement half-precision argument passing/returning lowering in the ARM
backend soon, but for now this means that this:
_Float16 sub(_Float16 a, _Float16 b) {
return a + b;
}
gets lowered to this:
define float @sub(float %a.coerce, float %b.coerce) {
entry:
%0 = bitcast float %a.coerce to i32
%tmp.0.extract.trunc = trunc i32 %0 to i16
%1 = bitcast i16 %tmp.0.extract.trunc to half
<SNIP>
%add = fadd half %1, %3
<SNIP>
}
When FullFP16 is *not* supported, we don't make f16 a legal type, and we get
legalization for "free", i.e. nothing changes and everything works as before.
And also f16 argument passing/returning is handled.
When FullFP16 is supported, we do make f16 a legal type, and have 2 places that
we need to patch up: f16 argument passing and returning, which involves minor
tweaks to avoid unnecessary code generation for some bitcasts.
As a "demonstrator" that this works for the different FP16, FullFP16, softfp
modes, etc., I've added match rules to the VSUB instruction description showing
that we can codegen this instruction from IR, but more importantly, also to
some conversion instructions. These conversions were causing issue before in
the FP16 and FullFP16 cases.
I've also added match rules to the VLDRH and VSTRH desriptions, so that we can
actually compile the entire half-precision sub code example above. This showed
that these loads and stores had the wrong addressing mode specified: AddrMode5
instead of AddrMode5FP16, which turned out not be implemented at all, so that
has also been added.
This is the minimal patch that shows all the different moving parts. In patch
2/3 I will add some efficient lowering of bitcasts, and in 2/3 I will add the
remaining Armv8.2-A FP16 instruction descriptions.
Thanks to Sam Parker and Oliver Stannard for their help and reviews!
Differential Revision: https://reviews.llvm.org/D38315
llvm-svn: 323512
2018-01-26 17:26:40 +08:00
|
|
|
[(set HPR:$Sd, (fadd HPR:$Sn, HPR:$Sm))]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Sched<[WriteFPALU32]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2012-04-20 08:15:00 +08:00
|
|
|
let TwoOperandAliasConstraint = "$Dn = $Dd" in
|
2010-11-01 14:00:39 +08:00
|
|
|
def VSUBD : ADbI<0b11100, 0b11, 1, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
|
|
|
|
IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>,
|
|
|
|
Sched<[WriteFPALU64]>;
|
2010-11-01 14:00:39 +08:00
|
|
|
|
2012-04-20 08:15:00 +08:00
|
|
|
let TwoOperandAliasConstraint = "$Sn = $Sd" in
|
2010-11-01 14:00:39 +08:00
|
|
|
def VSUBS : ASbIn<0b11100, 0b11, 1, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]>,
|
|
|
|
Sched<[WriteFPALU32]>{
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-11-01 14:00:39 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
let TwoOperandAliasConstraint = "$Sn = $Sd" in
|
|
|
|
def VSUBH : AHbI<0b11100, 0b11, 1, 0,
|
[ARM] Armv8.2-A FP16 code generation (part 1/3)
This is the groundwork for Armv8.2-A FP16 code generation .
Clang passes and returns _Float16 values as floats, together with the required
bitconverts and truncs etc. to implement correct AAPCS behaviour, see D42318.
We will implement half-precision argument passing/returning lowering in the ARM
backend soon, but for now this means that this:
_Float16 sub(_Float16 a, _Float16 b) {
return a + b;
}
gets lowered to this:
define float @sub(float %a.coerce, float %b.coerce) {
entry:
%0 = bitcast float %a.coerce to i32
%tmp.0.extract.trunc = trunc i32 %0 to i16
%1 = bitcast i16 %tmp.0.extract.trunc to half
<SNIP>
%add = fadd half %1, %3
<SNIP>
}
When FullFP16 is *not* supported, we don't make f16 a legal type, and we get
legalization for "free", i.e. nothing changes and everything works as before.
And also f16 argument passing/returning is handled.
When FullFP16 is supported, we do make f16 a legal type, and have 2 places that
we need to patch up: f16 argument passing and returning, which involves minor
tweaks to avoid unnecessary code generation for some bitcasts.
As a "demonstrator" that this works for the different FP16, FullFP16, softfp
modes, etc., I've added match rules to the VSUB instruction description showing
that we can codegen this instruction from IR, but more importantly, also to
some conversion instructions. These conversions were causing issue before in
the FP16 and FullFP16 cases.
I've also added match rules to the VLDRH and VSTRH desriptions, so that we can
actually compile the entire half-precision sub code example above. This showed
that these loads and stores had the wrong addressing mode specified: AddrMode5
instead of AddrMode5FP16, which turned out not be implemented at all, so that
has also been added.
This is the minimal patch that shows all the different moving parts. In patch
2/3 I will add some efficient lowering of bitcasts, and in 2/3 I will add the
remaining Armv8.2-A FP16 instruction descriptions.
Thanks to Sam Parker and Oliver Stannard for their help and reviews!
Differential Revision: https://reviews.llvm.org/D38315
llvm-svn: 323512
2018-01-26 17:26:40 +08:00
|
|
|
(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpALU16, "vsub", ".f16\t$Sd, $Sn, $Sm",
|
[ARM] Armv8.2-A FP16 code generation (part 1/3)
This is the groundwork for Armv8.2-A FP16 code generation .
Clang passes and returns _Float16 values as floats, together with the required
bitconverts and truncs etc. to implement correct AAPCS behaviour, see D42318.
We will implement half-precision argument passing/returning lowering in the ARM
backend soon, but for now this means that this:
_Float16 sub(_Float16 a, _Float16 b) {
return a + b;
}
gets lowered to this:
define float @sub(float %a.coerce, float %b.coerce) {
entry:
%0 = bitcast float %a.coerce to i32
%tmp.0.extract.trunc = trunc i32 %0 to i16
%1 = bitcast i16 %tmp.0.extract.trunc to half
<SNIP>
%add = fadd half %1, %3
<SNIP>
}
When FullFP16 is *not* supported, we don't make f16 a legal type, and we get
legalization for "free", i.e. nothing changes and everything works as before.
And also f16 argument passing/returning is handled.
When FullFP16 is supported, we do make f16 a legal type, and have 2 places that
we need to patch up: f16 argument passing and returning, which involves minor
tweaks to avoid unnecessary code generation for some bitcasts.
As a "demonstrator" that this works for the different FP16, FullFP16, softfp
modes, etc., I've added match rules to the VSUB instruction description showing
that we can codegen this instruction from IR, but more importantly, also to
some conversion instructions. These conversions were causing issue before in
the FP16 and FullFP16 cases.
I've also added match rules to the VLDRH and VSTRH desriptions, so that we can
actually compile the entire half-precision sub code example above. This showed
that these loads and stores had the wrong addressing mode specified: AddrMode5
instead of AddrMode5FP16, which turned out not be implemented at all, so that
has also been added.
This is the minimal patch that shows all the different moving parts. In patch
2/3 I will add some efficient lowering of bitcasts, and in 2/3 I will add the
remaining Armv8.2-A FP16 instruction descriptions.
Thanks to Sam Parker and Oliver Stannard for their help and reviews!
Differential Revision: https://reviews.llvm.org/D38315
llvm-svn: 323512
2018-01-26 17:26:40 +08:00
|
|
|
[(set HPR:$Sd, (fsub HPR:$Sn, HPR:$Sm))]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Sched<[WriteFPALU32]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2012-04-20 08:15:00 +08:00
|
|
|
let TwoOperandAliasConstraint = "$Dn = $Dd" in
|
2010-11-01 14:00:39 +08:00
|
|
|
def VDIVD : ADbI<0b11101, 0b00, 0, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
|
|
|
|
IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>,
|
|
|
|
Sched<[WriteFPDIV64]>;
|
2010-11-01 14:00:39 +08:00
|
|
|
|
2012-04-20 08:15:00 +08:00
|
|
|
let TwoOperandAliasConstraint = "$Sn = $Sd" in
|
2010-11-01 14:00:39 +08:00
|
|
|
def VDIVS : ASbI<0b11101, 0b00, 0, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>,
|
|
|
|
Sched<[WriteFPDIV32]>;
|
2010-11-01 14:00:39 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
let TwoOperandAliasConstraint = "$Sn = $Sd" in
|
|
|
|
def VDIVH : AHbI<0b11101, 0b00, 0, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpDIV16, "vdiv", ".f16\t$Sd, $Sn, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPDIV32]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2012-04-20 08:15:00 +08:00
|
|
|
let TwoOperandAliasConstraint = "$Dn = $Dd" in
|
2010-11-01 14:00:39 +08:00
|
|
|
def VMULD : ADbI<0b11100, 0b10, 0, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
|
|
|
|
IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>,
|
|
|
|
Sched<[WriteFPMUL64, ReadFPMUL, ReadFPMUL]>;
|
2010-11-01 14:00:39 +08:00
|
|
|
|
2012-04-20 08:15:00 +08:00
|
|
|
let TwoOperandAliasConstraint = "$Sn = $Sd" in
|
2010-11-01 14:00:39 +08:00
|
|
|
def VMULS : ASbIn<0b11100, 0b10, 0, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]>,
|
|
|
|
Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-11-01 14:00:39 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
let TwoOperandAliasConstraint = "$Sn = $Sd" in
|
|
|
|
def VMULH : AHbI<0b11100, 0b10, 0, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpMUL16, "vmul", ".f16\t$Sd, $Sn, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VNMULD : ADbI<0b11100, 0b10, 1, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
|
|
|
|
IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>,
|
|
|
|
Sched<[WriteFPMUL64, ReadFPMUL, ReadFPMUL]>;
|
2010-11-01 14:00:39 +08:00
|
|
|
|
|
|
|
def VNMULS : ASbI<0b11100, 0b10, 1, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]>,
|
|
|
|
Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VNMULH : AHbI<0b11100, 0b10, 1, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpMUL16, "vnmul", ".f16\t$Sd, $Sn, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2013-08-22 23:29:11 +08:00
|
|
|
multiclass vsel_inst<string op, bits<2> opc, int CC> {
|
|
|
|
let DecoderNamespace = "VFPV8", PostEncoderMethod = "",
|
|
|
|
Uses = [CPSR], AddedComplexity = 4 in {
|
2016-01-25 18:26:26 +08:00
|
|
|
def H : AHbInp<0b11100, opc, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
|
|
|
NoItinerary, !strconcat("vsel", op, ".f16\t$Sd, $Sn, $Sm"),
|
|
|
|
[]>,
|
|
|
|
Requires<[HasFullFP16]>;
|
|
|
|
|
2013-07-07 04:50:18 +08:00
|
|
|
def S : ASbInp<0b11100, opc, 0,
|
2013-07-04 22:57:20 +08:00
|
|
|
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
|
|
|
NoItinerary, !strconcat("vsel", op, ".f32\t$Sd, $Sn, $Sm"),
|
2013-08-22 23:29:11 +08:00
|
|
|
[(set SPR:$Sd, (ARMcmov SPR:$Sm, SPR:$Sn, CC))]>,
|
2013-09-13 21:46:57 +08:00
|
|
|
Requires<[HasFPARMv8]>;
|
2013-07-04 22:57:20 +08:00
|
|
|
|
2013-07-07 04:50:18 +08:00
|
|
|
def D : ADbInp<0b11100, opc, 0,
|
2013-07-04 22:57:20 +08:00
|
|
|
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
|
|
|
|
NoItinerary, !strconcat("vsel", op, ".f64\t$Dd, $Dn, $Dm"),
|
2013-08-22 23:29:11 +08:00
|
|
|
[(set DPR:$Dd, (ARMcmov (f64 DPR:$Dm), (f64 DPR:$Dn), CC))]>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasFPARMv8, HasDPVFP]>;
|
2013-07-04 22:57:20 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-08-22 23:29:11 +08:00
|
|
|
// The CC constants here match ARMCC::CondCodes.
|
|
|
|
defm VSELGT : vsel_inst<"gt", 0b11, 12>;
|
|
|
|
defm VSELGE : vsel_inst<"ge", 0b10, 10>;
|
|
|
|
defm VSELEQ : vsel_inst<"eq", 0b00, 0>;
|
|
|
|
defm VSELVS : vsel_inst<"vs", 0b01, 6>;
|
2013-07-04 22:57:20 +08:00
|
|
|
|
2013-08-23 20:01:13 +08:00
|
|
|
multiclass vmaxmin_inst<string op, bit opc, SDNode SD> {
|
2013-07-07 04:50:18 +08:00
|
|
|
let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in {
|
2016-01-25 18:26:26 +08:00
|
|
|
def H : AHbInp<0b11101, 0b00, opc,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
|
|
|
NoItinerary, !strconcat(op, ".f16\t$Sd, $Sn, $Sm"),
|
|
|
|
[]>,
|
|
|
|
Requires<[HasFullFP16]>;
|
|
|
|
|
2013-07-07 04:50:18 +08:00
|
|
|
def S : ASbInp<0b11101, 0b00, opc,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
|
|
|
|
NoItinerary, !strconcat(op, ".f32\t$Sd, $Sn, $Sm"),
|
2013-08-23 20:01:13 +08:00
|
|
|
[(set SPR:$Sd, (SD SPR:$Sn, SPR:$Sm))]>,
|
2013-09-13 21:46:57 +08:00
|
|
|
Requires<[HasFPARMv8]>;
|
2013-07-07 04:50:18 +08:00
|
|
|
|
|
|
|
def D : ADbInp<0b11101, 0b00, opc,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
|
|
|
|
NoItinerary, !strconcat(op, ".f64\t$Dd, $Dn, $Dm"),
|
2013-08-23 20:01:13 +08:00
|
|
|
[(set DPR:$Dd, (f64 (SD (f64 DPR:$Dn), (f64 DPR:$Dm))))]>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasFPARMv8, HasDPVFP]>;
|
2013-07-07 04:50:18 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-11 20:06:22 +08:00
|
|
|
defm VMAXNM : vmaxmin_inst<"vmaxnm", 0, fmaxnum>;
|
|
|
|
defm VMINNM : vmaxmin_inst<"vminnm", 1, fminnum>;
|
2013-07-07 04:50:18 +08:00
|
|
|
|
2007-05-03 08:32:00 +08:00
|
|
|
// Match reassociated forms only if not sign dependent rounding.
|
2010-03-09 02:51:21 +08:00
|
|
|
def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)),
|
2013-10-24 23:49:39 +08:00
|
|
|
(VNMULD DPR:$a, DPR:$b)>,
|
|
|
|
Requires<[NoHonorSignDependentRounding,HasDPVFP]>;
|
2007-05-03 08:32:00 +08:00
|
|
|
def : Pat<(fmul (fneg SPR:$a), SPR:$b),
|
2009-11-09 08:11:35 +08:00
|
|
|
(VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
|
2007-05-03 08:32:00 +08:00
|
|
|
|
2010-10-13 06:55:35 +08:00
|
|
|
// These are encoded as unary instructions.
|
2012-03-06 08:19:55 +08:00
|
|
|
let Defs = [FPSCR_NZCV] in {
|
2010-11-01 14:00:39 +08:00
|
|
|
def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
|
|
|
|
(outs), (ins DPR:$Dd, DPR:$Dm),
|
|
|
|
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm",
|
2017-02-13 20:32:47 +08:00
|
|
|
[(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm), (i32 1))]>;
|
2010-10-13 08:04:29 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
|
|
|
|
(outs), (ins SPR:$Sd, SPR:$Sm),
|
|
|
|
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm",
|
2017-02-13 20:32:47 +08:00
|
|
|
[(arm_cmpfp SPR:$Sd, SPR:$Sm, (i32 1))]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-10-13 06:55:35 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VCMPEH : AHuI<0b11101, 0b11, 0b0100, 0b11, 0,
|
|
|
|
(outs), (ins SPR:$Sd, SPR:$Sm),
|
|
|
|
IIC_fpCMP16, "vcmpe", ".f16\t$Sd, $Sm",
|
|
|
|
[]>;
|
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0,
|
|
|
|
(outs), (ins DPR:$Dd, DPR:$Dm),
|
|
|
|
IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm",
|
2017-02-13 20:32:47 +08:00
|
|
|
[(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm), (i32 0))]>;
|
2010-10-13 06:55:35 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
|
|
|
|
(outs), (ins SPR:$Sd, SPR:$Sm),
|
|
|
|
IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm",
|
2017-02-13 20:32:47 +08:00
|
|
|
[(arm_cmpfp SPR:$Sd, SPR:$Sm, (i32 0))]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2010-10-13 06:55:35 +08:00
|
|
|
}
|
2016-01-25 18:26:26 +08:00
|
|
|
|
|
|
|
def VCMPH : AHuI<0b11101, 0b11, 0b0100, 0b01, 0,
|
|
|
|
(outs), (ins SPR:$Sd, SPR:$Sm),
|
|
|
|
IIC_fpCMP16, "vcmp", ".f16\t$Sd, $Sm",
|
|
|
|
[]>;
|
2012-03-06 08:19:55 +08:00
|
|
|
} // Defs = [FPSCR_NZCV]
|
2007-01-19 15:51:42 +08:00
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// FP Unary Operations.
|
|
|
|
//
|
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VABSD : ADuI<0b11101, 0b11, 0b0000, 0b11, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Dm),
|
|
|
|
IIC_fpUNA64, "vabs", ".f64\t$Dd, $Dm",
|
|
|
|
[(set DPR:$Dd, (fabs (f64 DPR:$Dm)))]>;
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpUNA32, "vabs", ".f32\t$Sd, $Sm",
|
2011-02-16 08:35:02 +08:00
|
|
|
[(set SPR:$Sd, (fabs SPR:$Sm))]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VABSH : AHuI<0b11101, 0b11, 0b0000, 0b11, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpUNA16, "vabs", ".f16\t$Sd, $Sm",
|
|
|
|
[]>;
|
|
|
|
|
2012-03-06 08:19:55 +08:00
|
|
|
let Defs = [FPSCR_NZCV] in {
|
2010-11-01 14:00:39 +08:00
|
|
|
def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
|
|
|
|
(outs), (ins DPR:$Dd),
|
|
|
|
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0",
|
2017-02-13 20:32:47 +08:00
|
|
|
[(arm_cmpfp0 (f64 DPR:$Dd), (i32 1))]> {
|
2010-11-01 14:00:39 +08:00
|
|
|
let Inst{3-0} = 0b0000;
|
|
|
|
let Inst{5} = 0;
|
2010-10-13 08:38:07 +08:00
|
|
|
}
|
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
|
|
|
|
(outs), (ins SPR:$Sd),
|
|
|
|
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0",
|
2017-02-13 20:32:47 +08:00
|
|
|
[(arm_cmpfp0 SPR:$Sd, (i32 1))]> {
|
2010-11-01 14:00:39 +08:00
|
|
|
let Inst{3-0} = 0b0000;
|
|
|
|
let Inst{5} = 0;
|
2011-02-16 08:35:02 +08:00
|
|
|
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2010-10-13 08:38:07 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0,
|
|
|
|
(outs), (ins SPR:$Sd),
|
|
|
|
IIC_fpCMP16, "vcmpe", ".f16\t$Sd, #0",
|
|
|
|
[]> {
|
|
|
|
let Inst{3-0} = 0b0000;
|
|
|
|
let Inst{5} = 0;
|
|
|
|
}
|
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
|
|
|
|
(outs), (ins DPR:$Dd),
|
|
|
|
IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0",
|
2017-02-13 20:32:47 +08:00
|
|
|
[(arm_cmpfp0 (f64 DPR:$Dd), (i32 0))]> {
|
2010-11-01 14:00:39 +08:00
|
|
|
let Inst{3-0} = 0b0000;
|
|
|
|
let Inst{5} = 0;
|
2010-10-14 04:58:46 +08:00
|
|
|
}
|
2010-02-09 03:41:48 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
|
|
|
|
(outs), (ins SPR:$Sd),
|
|
|
|
IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0",
|
2017-02-13 20:32:47 +08:00
|
|
|
[(arm_cmpfp0 SPR:$Sd, (i32 0))]> {
|
2010-11-01 14:00:39 +08:00
|
|
|
let Inst{3-0} = 0b0000;
|
|
|
|
let Inst{5} = 0;
|
2011-02-16 08:35:02 +08:00
|
|
|
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2010-10-14 04:58:46 +08:00
|
|
|
}
|
2016-01-25 18:26:26 +08:00
|
|
|
|
|
|
|
def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0,
|
|
|
|
(outs), (ins SPR:$Sd),
|
|
|
|
IIC_fpCMP16, "vcmp", ".f16\t$Sd, #0",
|
|
|
|
[]> {
|
|
|
|
let Inst{3-0} = 0b0000;
|
|
|
|
let Inst{5} = 0;
|
|
|
|
}
|
2012-03-06 08:19:55 +08:00
|
|
|
} // Defs = [FPSCR_NZCV]
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2010-10-13 08:56:35 +08:00
|
|
|
def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
|
|
|
|
(outs DPR:$Dd), (ins SPR:$Sm),
|
|
|
|
IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set DPR:$Dd, (fpextend SPR:$Sm))]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2010-10-13 08:56:35 +08:00
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Dd;
|
|
|
|
bits<5> Sm;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{3-0} = Sm{4-1};
|
|
|
|
let Inst{5} = Sm{0};
|
|
|
|
let Inst{15-12} = Dd{3-0};
|
|
|
|
let Inst{22} = Dd{4};
|
2014-08-21 20:50:31 +08:00
|
|
|
|
|
|
|
let Predicates = [HasVFP2, HasDPVFP];
|
2010-10-13 08:56:35 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2008-11-11 10:11:05 +08:00
|
|
|
// Special case encoding: bits 11-8 is 0b1011.
|
2010-10-13 08:56:35 +08:00
|
|
|
def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
|
|
|
|
IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set SPR:$Sd, (fpround DPR:$Dm))]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2010-10-13 08:56:35 +08:00
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Sd;
|
|
|
|
bits<5> Dm;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{3-0} = Dm{3-0};
|
|
|
|
let Inst{5} = Dm{4};
|
|
|
|
let Inst{15-12} = Sd{4-1};
|
|
|
|
let Inst{22} = Sd{0};
|
|
|
|
|
2008-11-11 10:11:05 +08:00
|
|
|
let Inst{27-23} = 0b11101;
|
|
|
|
let Inst{21-16} = 0b110111;
|
|
|
|
let Inst{11-8} = 0b1011;
|
2010-01-30 07:21:10 +08:00
|
|
|
let Inst{7-6} = 0b11;
|
|
|
|
let Inst{4} = 0;
|
2013-10-24 23:49:39 +08:00
|
|
|
|
|
|
|
let Predicates = [HasVFP2, HasDPVFP];
|
2008-11-11 10:11:05 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
[ARM] Armv8.2-A FP16 code generation (part 1/3)
This is the groundwork for Armv8.2-A FP16 code generation .
Clang passes and returns _Float16 values as floats, together with the required
bitconverts and truncs etc. to implement correct AAPCS behaviour, see D42318.
We will implement half-precision argument passing/returning lowering in the ARM
backend soon, but for now this means that this:
_Float16 sub(_Float16 a, _Float16 b) {
return a + b;
}
gets lowered to this:
define float @sub(float %a.coerce, float %b.coerce) {
entry:
%0 = bitcast float %a.coerce to i32
%tmp.0.extract.trunc = trunc i32 %0 to i16
%1 = bitcast i16 %tmp.0.extract.trunc to half
<SNIP>
%add = fadd half %1, %3
<SNIP>
}
When FullFP16 is *not* supported, we don't make f16 a legal type, and we get
legalization for "free", i.e. nothing changes and everything works as before.
And also f16 argument passing/returning is handled.
When FullFP16 is supported, we do make f16 a legal type, and have 2 places that
we need to patch up: f16 argument passing and returning, which involves minor
tweaks to avoid unnecessary code generation for some bitcasts.
As a "demonstrator" that this works for the different FP16, FullFP16, softfp
modes, etc., I've added match rules to the VSUB instruction description showing
that we can codegen this instruction from IR, but more importantly, also to
some conversion instructions. These conversions were causing issue before in
the FP16 and FullFP16 cases.
I've also added match rules to the VLDRH and VSTRH desriptions, so that we can
actually compile the entire half-precision sub code example above. This showed
that these loads and stores had the wrong addressing mode specified: AddrMode5
instead of AddrMode5FP16, which turned out not be implemented at all, so that
has also been added.
This is the minimal patch that shows all the different moving parts. In patch
2/3 I will add some efficient lowering of bitcasts, and in 2/3 I will add the
remaining Armv8.2-A FP16 instruction descriptions.
Thanks to Sam Parker and Oliver Stannard for their help and reviews!
Differential Revision: https://reviews.llvm.org/D38315
llvm-svn: 323512
2018-01-26 17:26:40 +08:00
|
|
|
// Between half, single and double-precision.
|
2012-08-15 07:36:01 +08:00
|
|
|
def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
|
2011-08-23 05:34:00 +08:00
|
|
|
/* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
|
[ARM] Armv8.2-A FP16 code generation (part 1/3)
This is the groundwork for Armv8.2-A FP16 code generation .
Clang passes and returns _Float16 values as floats, together with the required
bitconverts and truncs etc. to implement correct AAPCS behaviour, see D42318.
We will implement half-precision argument passing/returning lowering in the ARM
backend soon, but for now this means that this:
_Float16 sub(_Float16 a, _Float16 b) {
return a + b;
}
gets lowered to this:
define float @sub(float %a.coerce, float %b.coerce) {
entry:
%0 = bitcast float %a.coerce to i32
%tmp.0.extract.trunc = trunc i32 %0 to i16
%1 = bitcast i16 %tmp.0.extract.trunc to half
<SNIP>
%add = fadd half %1, %3
<SNIP>
}
When FullFP16 is *not* supported, we don't make f16 a legal type, and we get
legalization for "free", i.e. nothing changes and everything works as before.
And also f16 argument passing/returning is handled.
When FullFP16 is supported, we do make f16 a legal type, and have 2 places that
we need to patch up: f16 argument passing and returning, which involves minor
tweaks to avoid unnecessary code generation for some bitcasts.
As a "demonstrator" that this works for the different FP16, FullFP16, softfp
modes, etc., I've added match rules to the VSUB instruction description showing
that we can codegen this instruction from IR, but more importantly, also to
some conversion instructions. These conversions were causing issue before in
the FP16 and FullFP16 cases.
I've also added match rules to the VLDRH and VSTRH desriptions, so that we can
actually compile the entire half-precision sub code example above. This showed
that these loads and stores had the wrong addressing mode specified: AddrMode5
instead of AddrMode5FP16, which turned out not be implemented at all, so that
has also been added.
This is the minimal patch that shows all the different moving parts. In patch
2/3 I will add some efficient lowering of bitcasts, and in 2/3 I will add the
remaining Armv8.2-A FP16 instruction descriptions.
Thanks to Sam Parker and Oliver Stannard for their help and reviews!
Differential Revision: https://reviews.llvm.org/D38315
llvm-svn: 323512
2018-01-26 17:26:40 +08:00
|
|
|
[ /* intentionally left blank, see rule below */ ]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasFP16]>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2010-03-19 06:35:37 +08:00
|
|
|
|
2018-01-29 19:28:06 +08:00
|
|
|
def : FullFP16Pat<(f32 (fpextend HPR:$Sm)),
|
|
|
|
(VCVTBHS (COPY_TO_REGCLASS HPR:$Sm, SPR))>;
|
[ARM] Armv8.2-A FP16 code generation (part 1/3)
This is the groundwork for Armv8.2-A FP16 code generation .
Clang passes and returns _Float16 values as floats, together with the required
bitconverts and truncs etc. to implement correct AAPCS behaviour, see D42318.
We will implement half-precision argument passing/returning lowering in the ARM
backend soon, but for now this means that this:
_Float16 sub(_Float16 a, _Float16 b) {
return a + b;
}
gets lowered to this:
define float @sub(float %a.coerce, float %b.coerce) {
entry:
%0 = bitcast float %a.coerce to i32
%tmp.0.extract.trunc = trunc i32 %0 to i16
%1 = bitcast i16 %tmp.0.extract.trunc to half
<SNIP>
%add = fadd half %1, %3
<SNIP>
}
When FullFP16 is *not* supported, we don't make f16 a legal type, and we get
legalization for "free", i.e. nothing changes and everything works as before.
And also f16 argument passing/returning is handled.
When FullFP16 is supported, we do make f16 a legal type, and have 2 places that
we need to patch up: f16 argument passing and returning, which involves minor
tweaks to avoid unnecessary code generation for some bitcasts.
As a "demonstrator" that this works for the different FP16, FullFP16, softfp
modes, etc., I've added match rules to the VSUB instruction description showing
that we can codegen this instruction from IR, but more importantly, also to
some conversion instructions. These conversions were causing issue before in
the FP16 and FullFP16 cases.
I've also added match rules to the VLDRH and VSTRH desriptions, so that we can
actually compile the entire half-precision sub code example above. This showed
that these loads and stores had the wrong addressing mode specified: AddrMode5
instead of AddrMode5FP16, which turned out not be implemented at all, so that
has also been added.
This is the minimal patch that shows all the different moving parts. In patch
2/3 I will add some efficient lowering of bitcasts, and in 2/3 I will add the
remaining Armv8.2-A FP16 instruction descriptions.
Thanks to Sam Parker and Oliver Stannard for their help and reviews!
Differential Revision: https://reviews.llvm.org/D38315
llvm-svn: 323512
2018-01-26 17:26:40 +08:00
|
|
|
|
2012-08-15 07:36:01 +08:00
|
|
|
def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
|
2011-08-23 05:34:00 +08:00
|
|
|
/* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
|
[ARM] Armv8.2-A FP16 code generation (part 1/3)
This is the groundwork for Armv8.2-A FP16 code generation .
Clang passes and returns _Float16 values as floats, together with the required
bitconverts and truncs etc. to implement correct AAPCS behaviour, see D42318.
We will implement half-precision argument passing/returning lowering in the ARM
backend soon, but for now this means that this:
_Float16 sub(_Float16 a, _Float16 b) {
return a + b;
}
gets lowered to this:
define float @sub(float %a.coerce, float %b.coerce) {
entry:
%0 = bitcast float %a.coerce to i32
%tmp.0.extract.trunc = trunc i32 %0 to i16
%1 = bitcast i16 %tmp.0.extract.trunc to half
<SNIP>
%add = fadd half %1, %3
<SNIP>
}
When FullFP16 is *not* supported, we don't make f16 a legal type, and we get
legalization for "free", i.e. nothing changes and everything works as before.
And also f16 argument passing/returning is handled.
When FullFP16 is supported, we do make f16 a legal type, and have 2 places that
we need to patch up: f16 argument passing and returning, which involves minor
tweaks to avoid unnecessary code generation for some bitcasts.
As a "demonstrator" that this works for the different FP16, FullFP16, softfp
modes, etc., I've added match rules to the VSUB instruction description showing
that we can codegen this instruction from IR, but more importantly, also to
some conversion instructions. These conversions were causing issue before in
the FP16 and FullFP16 cases.
I've also added match rules to the VLDRH and VSTRH desriptions, so that we can
actually compile the entire half-precision sub code example above. This showed
that these loads and stores had the wrong addressing mode specified: AddrMode5
instead of AddrMode5FP16, which turned out not be implemented at all, so that
has also been added.
This is the minimal patch that shows all the different moving parts. In patch
2/3 I will add some efficient lowering of bitcasts, and in 2/3 I will add the
remaining Armv8.2-A FP16 instruction descriptions.
Thanks to Sam Parker and Oliver Stannard for their help and reviews!
Differential Revision: https://reviews.llvm.org/D38315
llvm-svn: 323512
2018-01-26 17:26:40 +08:00
|
|
|
[]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasFP16]>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2010-03-19 06:35:37 +08:00
|
|
|
|
2012-08-15 07:36:01 +08:00
|
|
|
def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
|
2011-08-23 05:34:00 +08:00
|
|
|
/* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
|
2015-12-07 18:54:36 +08:00
|
|
|
[/* For disassembly only; pattern left blank */]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasFP16]>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2010-02-10 01:21:56 +08:00
|
|
|
|
2012-08-15 07:36:01 +08:00
|
|
|
def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
|
2011-08-23 05:34:00 +08:00
|
|
|
/* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm",
|
2015-12-07 18:54:36 +08:00
|
|
|
[/* For disassembly only; pattern left blank */]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasFP16]>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2010-02-10 01:21:56 +08:00
|
|
|
|
2013-07-04 18:04:08 +08:00
|
|
|
def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
|
|
|
|
(outs DPR:$Dd), (ins SPR:$Sm),
|
|
|
|
NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>, Requires<[HasFPARMv8, HasDPVFP]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2013-07-04 18:04:08 +08:00
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Sm;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{3-0} = Sm{4-1};
|
|
|
|
let Inst{5} = Sm{0};
|
|
|
|
}
|
|
|
|
|
|
|
|
def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0,
|
|
|
|
(outs SPR:$Sd), (ins DPR:$Dm),
|
|
|
|
NoItinerary, "vcvtb", ".f16.f64\t$Sd, $Dm",
|
2013-10-24 23:49:39 +08:00
|
|
|
[]>, Requires<[HasFPARMv8, HasDPVFP]> {
|
2013-07-04 18:04:08 +08:00
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Sd;
|
|
|
|
bits<5> Dm;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{3-0} = Dm{3-0};
|
|
|
|
let Inst{5} = Dm{4};
|
|
|
|
let Inst{15-12} = Sd{4-1};
|
|
|
|
let Inst{22} = Sd{0};
|
|
|
|
}
|
|
|
|
|
|
|
|
def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0,
|
|
|
|
(outs DPR:$Dd), (ins SPR:$Sm),
|
|
|
|
NoItinerary, "vcvtt", ".f64.f16\t$Dd, $Sm",
|
2013-10-24 23:49:39 +08:00
|
|
|
[]>, Requires<[HasFPARMv8, HasDPVFP]> {
|
2013-07-04 18:04:08 +08:00
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Sm;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{3-0} = Sm{4-1};
|
|
|
|
let Inst{5} = Sm{0};
|
|
|
|
}
|
|
|
|
|
|
|
|
def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0,
|
|
|
|
(outs SPR:$Sd), (ins DPR:$Dm),
|
|
|
|
NoItinerary, "vcvtt", ".f16.f64\t$Sd, $Dm",
|
2013-10-24 23:49:39 +08:00
|
|
|
[]>, Requires<[HasFPARMv8, HasDPVFP]> {
|
2013-07-04 18:04:08 +08:00
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Sd;
|
|
|
|
bits<5> Dm;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{15-12} = Sd{4-1};
|
|
|
|
let Inst{22} = Sd{0};
|
|
|
|
let Inst{3-0} = Dm{3-0};
|
|
|
|
let Inst{5} = Dm{4};
|
|
|
|
}
|
|
|
|
|
2018-01-29 19:28:06 +08:00
|
|
|
def : FP16Pat<(fp_to_f16 SPR:$a),
|
|
|
|
(i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
|
2014-07-17 19:27:04 +08:00
|
|
|
|
2018-01-29 19:28:06 +08:00
|
|
|
def : FP16Pat<(fp_to_f16 (f64 DPR:$a)),
|
|
|
|
(i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>;
|
2014-07-17 19:27:04 +08:00
|
|
|
|
2018-01-29 19:28:06 +08:00
|
|
|
def : FP16Pat<(f16_to_fp GPR:$a),
|
|
|
|
(VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
|
2014-07-17 19:27:04 +08:00
|
|
|
|
2018-01-29 19:28:06 +08:00
|
|
|
def : FP16Pat<(f64 (f16_to_fp GPR:$a)),
|
|
|
|
(VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>;
|
2014-07-17 19:27:04 +08:00
|
|
|
|
2014-08-26 00:56:33 +08:00
|
|
|
multiclass vcvt_inst<string opc, bits<2> rm,
|
|
|
|
SDPatternOperator node = null_frag> {
|
2013-07-18 18:20:25 +08:00
|
|
|
let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {
|
2016-01-25 18:26:26 +08:00
|
|
|
def SH : AHuInp<0b11101, 0b11, 0b1100, 0b11, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
NoItinerary, !strconcat("vcvt", opc, ".s32.f16\t$Sd, $Sm"),
|
|
|
|
[]>,
|
|
|
|
Requires<[HasFullFP16]> {
|
|
|
|
let Inst{17-16} = rm;
|
|
|
|
}
|
|
|
|
|
|
|
|
def UH : AHuInp<0b11101, 0b11, 0b1100, 0b01, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
NoItinerary, !strconcat("vcvt", opc, ".u32.f16\t$Sd, $Sm"),
|
|
|
|
[]>,
|
|
|
|
Requires<[HasFullFP16]> {
|
|
|
|
let Inst{17-16} = rm;
|
|
|
|
}
|
|
|
|
|
2013-07-09 17:59:04 +08:00
|
|
|
def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"),
|
2015-03-24 00:15:16 +08:00
|
|
|
[]>,
|
2014-08-26 00:56:33 +08:00
|
|
|
Requires<[HasFPARMv8]> {
|
2013-07-09 17:59:04 +08:00
|
|
|
let Inst{17-16} = rm;
|
|
|
|
}
|
|
|
|
|
|
|
|
def US : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
NoItinerary, !strconcat("vcvt", opc, ".u32.f32\t$Sd, $Sm"),
|
2015-03-24 00:15:16 +08:00
|
|
|
[]>,
|
2014-08-26 00:56:33 +08:00
|
|
|
Requires<[HasFPARMv8]> {
|
2013-07-09 17:59:04 +08:00
|
|
|
let Inst{17-16} = rm;
|
|
|
|
}
|
|
|
|
|
|
|
|
def SD : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
|
|
|
|
(outs SPR:$Sd), (ins DPR:$Dm),
|
|
|
|
NoItinerary, !strconcat("vcvt", opc, ".s32.f64\t$Sd, $Dm"),
|
2015-03-24 00:15:16 +08:00
|
|
|
[]>,
|
2014-08-26 00:56:33 +08:00
|
|
|
Requires<[HasFPARMv8, HasDPVFP]> {
|
2013-07-09 17:59:04 +08:00
|
|
|
bits<5> Dm;
|
|
|
|
|
|
|
|
let Inst{17-16} = rm;
|
|
|
|
|
|
|
|
// Encode instruction operands
|
|
|
|
let Inst{3-0} = Dm{3-0};
|
|
|
|
let Inst{5} = Dm{4};
|
|
|
|
let Inst{8} = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
def UD : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0,
|
|
|
|
(outs SPR:$Sd), (ins DPR:$Dm),
|
|
|
|
NoItinerary, !strconcat("vcvt", opc, ".u32.f64\t$Sd, $Dm"),
|
2015-03-24 00:15:16 +08:00
|
|
|
[]>,
|
2014-08-26 00:56:33 +08:00
|
|
|
Requires<[HasFPARMv8, HasDPVFP]> {
|
2013-07-09 17:59:04 +08:00
|
|
|
bits<5> Dm;
|
|
|
|
|
|
|
|
let Inst{17-16} = rm;
|
|
|
|
|
|
|
|
// Encode instruction operands
|
|
|
|
let Inst{3-0} = Dm{3-0};
|
|
|
|
let Inst{5} = Dm{4};
|
|
|
|
let Inst{8} = 1;
|
|
|
|
}
|
|
|
|
}
|
2015-03-24 00:15:16 +08:00
|
|
|
|
|
|
|
let Predicates = [HasFPARMv8] in {
|
|
|
|
def : Pat<(i32 (fp_to_sint (node SPR:$a))),
|
|
|
|
(COPY_TO_REGCLASS
|
|
|
|
(!cast<Instruction>(NAME#"SS") SPR:$a),
|
|
|
|
GPR)>;
|
|
|
|
def : Pat<(i32 (fp_to_uint (node SPR:$a))),
|
|
|
|
(COPY_TO_REGCLASS
|
|
|
|
(!cast<Instruction>(NAME#"US") SPR:$a),
|
|
|
|
GPR)>;
|
|
|
|
}
|
|
|
|
let Predicates = [HasFPARMv8, HasDPVFP] in {
|
|
|
|
def : Pat<(i32 (fp_to_sint (node (f64 DPR:$a)))),
|
|
|
|
(COPY_TO_REGCLASS
|
|
|
|
(!cast<Instruction>(NAME#"SD") DPR:$a),
|
|
|
|
GPR)>;
|
|
|
|
def : Pat<(i32 (fp_to_uint (node (f64 DPR:$a)))),
|
|
|
|
(COPY_TO_REGCLASS
|
|
|
|
(!cast<Instruction>(NAME#"UD") DPR:$a),
|
|
|
|
GPR)>;
|
|
|
|
}
|
2013-07-09 17:59:04 +08:00
|
|
|
}
|
|
|
|
|
2016-08-19 04:08:15 +08:00
|
|
|
defm VCVTA : vcvt_inst<"a", 0b00, fround>;
|
2013-07-09 17:59:04 +08:00
|
|
|
defm VCVTN : vcvt_inst<"n", 0b01>;
|
2014-08-26 00:56:33 +08:00
|
|
|
defm VCVTP : vcvt_inst<"p", 0b10, fceil>;
|
|
|
|
defm VCVTM : vcvt_inst<"m", 0b11, ffloor>;
|
2013-07-09 17:59:04 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Dm),
|
|
|
|
IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm",
|
|
|
|
[(set DPR:$Dd, (fneg (f64 DPR:$Dm)))]>;
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
|
2011-02-16 08:35:02 +08:00
|
|
|
[(set SPR:$Sd, (fneg SPR:$Sm))]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VNEGH : AHuI<0b11101, 0b11, 0b0001, 0b01, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpUNA16, "vneg", ".f16\t$Sd, $Sm",
|
|
|
|
[]>;
|
|
|
|
|
2014-08-16 05:38:16 +08:00
|
|
|
multiclass vrint_inst_zrx<string opc, bit op, bit op2, SDPatternOperator node> {
|
2016-01-25 18:26:26 +08:00
|
|
|
def H : AHuI<0b11101, 0b11, 0b0110, 0b11, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
NoItinerary, !strconcat("vrint", opc), ".f16\t$Sd, $Sm",
|
|
|
|
[]>,
|
|
|
|
Requires<[HasFullFP16]> {
|
|
|
|
let Inst{7} = op2;
|
|
|
|
let Inst{16} = op;
|
|
|
|
}
|
|
|
|
|
2013-07-09 19:03:21 +08:00
|
|
|
def S : ASuI<0b11101, 0b11, 0b0110, 0b11, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm",
|
2014-08-16 05:38:16 +08:00
|
|
|
[(set (f32 SPR:$Sd), (node (f32 SPR:$Sm)))]>,
|
|
|
|
Requires<[HasFPARMv8]> {
|
2013-07-09 19:03:21 +08:00
|
|
|
let Inst{7} = op2;
|
|
|
|
let Inst{16} = op;
|
|
|
|
}
|
|
|
|
def D : ADuI<0b11101, 0b11, 0b0110, 0b11, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Dm),
|
|
|
|
NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm",
|
2014-08-16 05:38:16 +08:00
|
|
|
[(set (f64 DPR:$Dd), (node (f64 DPR:$Dm)))]>,
|
|
|
|
Requires<[HasFPARMv8, HasDPVFP]> {
|
2013-07-09 19:03:21 +08:00
|
|
|
let Inst{7} = op2;
|
|
|
|
let Inst{16} = op;
|
|
|
|
}
|
2013-08-27 19:24:16 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def : InstAlias<!strconcat("vrint", opc, "$p.f16.f16\t$Sd, $Sm"),
|
2016-06-03 21:19:43 +08:00
|
|
|
(!cast<Instruction>(NAME#"H") SPR:$Sd, SPR:$Sm, pred:$p), 0>,
|
2016-01-25 18:26:26 +08:00
|
|
|
Requires<[HasFullFP16]>;
|
2013-08-27 19:24:16 +08:00
|
|
|
def : InstAlias<!strconcat("vrint", opc, "$p.f32.f32\t$Sd, $Sm"),
|
2016-06-03 21:19:43 +08:00
|
|
|
(!cast<Instruction>(NAME#"S") SPR:$Sd, SPR:$Sm, pred:$p), 0>,
|
2013-10-24 20:22:58 +08:00
|
|
|
Requires<[HasFPARMv8]>;
|
2013-08-27 19:24:16 +08:00
|
|
|
def : InstAlias<!strconcat("vrint", opc, "$p.f64.f64\t$Dd, $Dm"),
|
2016-06-03 21:19:43 +08:00
|
|
|
(!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm, pred:$p), 0>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasFPARMv8,HasDPVFP]>;
|
2013-07-09 19:03:21 +08:00
|
|
|
}
|
|
|
|
|
2014-08-16 05:38:16 +08:00
|
|
|
defm VRINTZ : vrint_inst_zrx<"z", 0, 1, ftrunc>;
|
|
|
|
defm VRINTR : vrint_inst_zrx<"r", 0, 0, fnearbyint>;
|
|
|
|
defm VRINTX : vrint_inst_zrx<"x", 1, 0, frint>;
|
2013-07-09 19:03:21 +08:00
|
|
|
|
2014-08-16 05:38:16 +08:00
|
|
|
multiclass vrint_inst_anpm<string opc, bits<2> rm,
|
|
|
|
SDPatternOperator node = null_frag> {
|
2013-07-18 18:20:25 +08:00
|
|
|
let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {
|
2016-01-25 18:26:26 +08:00
|
|
|
def H : AHuInp<0b11101, 0b11, 0b1000, 0b01, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
NoItinerary, !strconcat("vrint", opc, ".f16\t$Sd, $Sm"),
|
|
|
|
[]>,
|
|
|
|
Requires<[HasFullFP16]> {
|
|
|
|
let Inst{17-16} = rm;
|
|
|
|
}
|
2013-07-09 19:26:18 +08:00
|
|
|
def S : ASuInp<0b11101, 0b11, 0b1000, 0b01, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
NoItinerary, !strconcat("vrint", opc, ".f32\t$Sd, $Sm"),
|
2014-08-16 05:38:16 +08:00
|
|
|
[(set (f32 SPR:$Sd), (node (f32 SPR:$Sm)))]>,
|
|
|
|
Requires<[HasFPARMv8]> {
|
2013-07-09 19:26:18 +08:00
|
|
|
let Inst{17-16} = rm;
|
|
|
|
}
|
|
|
|
def D : ADuInp<0b11101, 0b11, 0b1000, 0b01, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Dm),
|
|
|
|
NoItinerary, !strconcat("vrint", opc, ".f64\t$Dd, $Dm"),
|
2014-08-16 05:38:16 +08:00
|
|
|
[(set (f64 DPR:$Dd), (node (f64 DPR:$Dm)))]>,
|
|
|
|
Requires<[HasFPARMv8, HasDPVFP]> {
|
2013-07-09 19:26:18 +08:00
|
|
|
let Inst{17-16} = rm;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
def : InstAlias<!strconcat("vrint", opc, ".f32.f32\t$Sd, $Sm"),
|
2016-06-03 21:19:43 +08:00
|
|
|
(!cast<Instruction>(NAME#"S") SPR:$Sd, SPR:$Sm), 0>,
|
2013-10-24 20:22:58 +08:00
|
|
|
Requires<[HasFPARMv8]>;
|
2013-07-09 19:26:18 +08:00
|
|
|
def : InstAlias<!strconcat("vrint", opc, ".f64.f64\t$Dd, $Dm"),
|
2016-06-03 21:19:43 +08:00
|
|
|
(!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm), 0>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasFPARMv8,HasDPVFP]>;
|
2013-07-09 19:26:18 +08:00
|
|
|
}
|
|
|
|
|
2016-08-19 04:08:15 +08:00
|
|
|
defm VRINTA : vrint_inst_anpm<"a", 0b00, fround>;
|
2013-07-09 19:26:18 +08:00
|
|
|
defm VRINTN : vrint_inst_anpm<"n", 0b01>;
|
2014-08-16 05:38:16 +08:00
|
|
|
defm VRINTP : vrint_inst_anpm<"p", 0b10, fceil>;
|
|
|
|
defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>;
|
2013-07-09 19:26:18 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Dm),
|
|
|
|
IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>,
|
|
|
|
Sched<[WriteFPSQRT64]>;
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set SPR:$Sd, (fsqrt SPR:$Sm))]>,
|
|
|
|
Sched<[WriteFPSQRT32]>;
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VSQRTH : AHuI<0b11101, 0b11, 0b0001, 0b11, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpSQRT16, "vsqrt", ".f16\t$Sd, $Sm",
|
|
|
|
[]>;
|
|
|
|
|
2014-11-26 08:46:26 +08:00
|
|
|
let hasSideEffects = 0 in {
|
2010-11-01 14:00:39 +08:00
|
|
|
def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Dm),
|
|
|
|
IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>;
|
2010-10-14 04:58:46 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VMOVS : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
|
|
|
let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {
|
|
|
|
def VMOVH : ASuInp<0b11101, 0b11, 0b0000, 0b01, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpUNA16, "vmovx.f16\t$Sd, $Sm", []>,
|
|
|
|
Requires<[HasFullFP16]>;
|
|
|
|
|
|
|
|
def VINSH : ASuInp<0b11101, 0b11, 0b0000, 0b11, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpUNA16, "vins.f16\t$Sd, $Sm", []>,
|
|
|
|
Requires<[HasFullFP16]>;
|
|
|
|
} // PostEncoderMethod
|
2014-11-26 08:46:26 +08:00
|
|
|
} // hasSideEffects
|
2010-10-14 04:58:46 +08:00
|
|
|
|
2007-01-19 15:51:42 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// FP <-> GPR Copies. Int <-> FP Conversions.
|
|
|
|
//
|
|
|
|
|
2010-10-21 06:44:54 +08:00
|
|
|
def VMOVRS : AVConv2I<0b11100001, 0b1010,
|
|
|
|
(outs GPR:$Rt), (ins SPR:$Sn),
|
|
|
|
IIC_fpMOVSI, "vmov", "\t$Rt, $Sn",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set GPR:$Rt, (bitconvert SPR:$Sn))]>,
|
|
|
|
Sched<[WriteFPMOV]> {
|
2010-10-21 06:44:54 +08:00
|
|
|
// Instruction operands.
|
|
|
|
bits<4> Rt;
|
|
|
|
bits<5> Sn;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{19-16} = Sn{4-1};
|
|
|
|
let Inst{7} = Sn{0};
|
|
|
|
let Inst{15-12} = Rt;
|
|
|
|
|
|
|
|
let Inst{6-5} = 0b00;
|
|
|
|
let Inst{3-0} = 0b0000;
|
2011-04-20 02:11:38 +08:00
|
|
|
|
|
|
|
// Some single precision VFP instructions may be executed on both NEON and VFP
|
|
|
|
// pipelines.
|
|
|
|
let D = VFPNeonDomain;
|
2010-10-21 06:44:54 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2012-09-30 05:43:49 +08:00
|
|
|
// Bitcast i32 -> f32. NEON prefers to use VMOVDRR.
|
2010-10-21 06:44:54 +08:00
|
|
|
def VMOVSR : AVConv4I<0b11100000, 0b1010,
|
|
|
|
(outs SPR:$Sn), (ins GPR:$Rt),
|
|
|
|
IIC_fpMOVIS, "vmov", "\t$Sn, $Rt",
|
2012-09-30 05:43:49 +08:00
|
|
|
[(set SPR:$Sn, (bitconvert GPR:$Rt))]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasVFP2, UseVMOVSR]>,
|
|
|
|
Sched<[WriteFPMOV]> {
|
2010-10-21 06:44:54 +08:00
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Sn;
|
|
|
|
bits<4> Rt;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{19-16} = Sn{4-1};
|
|
|
|
let Inst{7} = Sn{0};
|
|
|
|
let Inst{15-12} = Rt;
|
|
|
|
|
|
|
|
let Inst{6-5} = 0b00;
|
|
|
|
let Inst{3-0} = 0b0000;
|
2011-04-20 02:11:38 +08:00
|
|
|
|
|
|
|
// Some single precision VFP instructions may be executed on both NEON and VFP
|
|
|
|
// pipelines.
|
|
|
|
let D = VFPNeonDomain;
|
2010-10-21 06:44:54 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2014-11-26 08:46:26 +08:00
|
|
|
let hasSideEffects = 0 in {
|
2009-11-09 08:11:35 +08:00
|
|
|
def VMOVRRD : AVConv3I<0b11000101, 0b1011,
|
2010-10-21 07:37:40 +08:00
|
|
|
(outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm),
|
|
|
|
IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm",
|
2017-03-15 02:43:37 +08:00
|
|
|
[(set GPR:$Rt, GPR:$Rt2, (arm_fmrrd DPR:$Dm))]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Sched<[WriteFPMOV]> {
|
2010-10-21 07:37:40 +08:00
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Dm;
|
|
|
|
bits<4> Rt;
|
|
|
|
bits<4> Rt2;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{3-0} = Dm{3-0};
|
|
|
|
let Inst{5} = Dm{4};
|
|
|
|
let Inst{15-12} = Rt;
|
|
|
|
let Inst{19-16} = Rt2;
|
|
|
|
|
2010-02-06 02:04:58 +08:00
|
|
|
let Inst{7-6} = 0b00;
|
2011-04-20 02:11:38 +08:00
|
|
|
|
|
|
|
// Some single precision VFP instructions may be executed on both NEON and VFP
|
|
|
|
// pipelines.
|
|
|
|
let D = VFPNeonDomain;
|
2014-08-21 06:16:19 +08:00
|
|
|
|
|
|
|
// This instruction is equivalent to
|
|
|
|
// $Rt = EXTRACT_SUBREG $Dm, ssub_0
|
|
|
|
// $Rt2 = EXTRACT_SUBREG $Dm, ssub_1
|
|
|
|
let isExtractSubreg = 1;
|
2010-02-06 02:04:58 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2010-02-09 01:26:09 +08:00
|
|
|
def VMOVRRS : AVConv3I<0b11000101, 0b1010,
|
2011-08-30 07:15:25 +08:00
|
|
|
(outs GPR:$Rt, GPR:$Rt2), (ins SPR:$src1, SPR:$src2),
|
|
|
|
IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $src1, $src2",
|
2017-01-24 04:20:39 +08:00
|
|
|
[/* For disassembly only; pattern left blank */]>,
|
|
|
|
Sched<[WriteFPMOV]> {
|
2011-08-30 07:15:25 +08:00
|
|
|
bits<5> src1;
|
|
|
|
bits<4> Rt;
|
|
|
|
bits<4> Rt2;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
2012-07-10 20:51:09 +08:00
|
|
|
let Inst{3-0} = src1{4-1};
|
|
|
|
let Inst{5} = src1{0};
|
2011-08-30 07:15:25 +08:00
|
|
|
let Inst{15-12} = Rt;
|
|
|
|
let Inst{19-16} = Rt2;
|
|
|
|
|
2010-02-09 01:26:09 +08:00
|
|
|
let Inst{7-6} = 0b00;
|
2011-04-20 02:11:38 +08:00
|
|
|
|
|
|
|
// Some single precision VFP instructions may be executed on both NEON and VFP
|
|
|
|
// pipelines.
|
|
|
|
let D = VFPNeonDomain;
|
2011-08-23 04:27:12 +08:00
|
|
|
let DecoderMethod = "DecodeVMOVRRS";
|
2010-02-09 01:26:09 +08:00
|
|
|
}
|
2014-11-26 08:46:26 +08:00
|
|
|
} // hasSideEffects
|
2010-02-09 01:26:09 +08:00
|
|
|
|
2007-01-19 15:51:42 +08:00
|
|
|
// FMDHR: GPR -> SPR
|
|
|
|
// FMDLR: GPR -> SPR
|
|
|
|
|
2009-11-09 08:11:35 +08:00
|
|
|
def VMOVDRR : AVConv5I<0b11000100, 0b1011,
|
2010-10-21 07:37:40 +08:00
|
|
|
(outs DPR:$Dm), (ins GPR:$Rt, GPR:$Rt2),
|
|
|
|
IIC_fpMOVID, "vmov", "\t$Dm, $Rt, $Rt2",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]>,
|
|
|
|
Sched<[WriteFPMOV]> {
|
2010-10-21 07:37:40 +08:00
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Dm;
|
|
|
|
bits<4> Rt;
|
|
|
|
bits<4> Rt2;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{3-0} = Dm{3-0};
|
|
|
|
let Inst{5} = Dm{4};
|
|
|
|
let Inst{15-12} = Rt;
|
|
|
|
let Inst{19-16} = Rt2;
|
|
|
|
|
|
|
|
let Inst{7-6} = 0b00;
|
2011-04-20 02:11:38 +08:00
|
|
|
|
|
|
|
// Some single precision VFP instructions may be executed on both NEON and VFP
|
|
|
|
// pipelines.
|
|
|
|
let D = VFPNeonDomain;
|
2014-08-12 06:56:22 +08:00
|
|
|
|
|
|
|
// This instruction is equivalent to
|
|
|
|
// $Dm = REG_SEQUENCE $Rt, ssub_0, $Rt2, ssub_1
|
|
|
|
let isRegSequence = 1;
|
2010-02-06 02:04:58 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2015-08-29 18:49:11 +08:00
|
|
|
// Hoist an fabs or a fneg of a value coming from integer registers
|
|
|
|
// and do the fabs/fneg on the integer value. This is never a lose
|
|
|
|
// and could enable the conversion to float to be removed completely.
|
|
|
|
def : Pat<(fabs (arm_fmdrr GPR:$Rl, GPR:$Rh)),
|
|
|
|
(VMOVDRR GPR:$Rl, (BFC GPR:$Rh, (i32 0x7FFFFFFF)))>,
|
2016-01-13 08:03:35 +08:00
|
|
|
Requires<[IsARM, HasV6T2]>;
|
2015-08-29 18:49:11 +08:00
|
|
|
def : Pat<(fabs (arm_fmdrr GPR:$Rl, GPR:$Rh)),
|
|
|
|
(VMOVDRR GPR:$Rl, (t2BFC GPR:$Rh, (i32 0x7FFFFFFF)))>,
|
2016-01-13 08:03:35 +08:00
|
|
|
Requires<[IsThumb2, HasV6T2]>;
|
2015-08-29 18:49:11 +08:00
|
|
|
def : Pat<(fneg (arm_fmdrr GPR:$Rl, GPR:$Rh)),
|
|
|
|
(VMOVDRR GPR:$Rl, (EORri GPR:$Rh, (i32 0x80000000)))>,
|
|
|
|
Requires<[IsARM]>;
|
|
|
|
def : Pat<(fneg (arm_fmdrr GPR:$Rl, GPR:$Rh)),
|
|
|
|
(VMOVDRR GPR:$Rl, (t2EORri GPR:$Rh, (i32 0x80000000)))>,
|
|
|
|
Requires<[IsThumb2]>;
|
|
|
|
|
2014-11-26 08:46:26 +08:00
|
|
|
let hasSideEffects = 0 in
|
2010-02-09 01:26:09 +08:00
|
|
|
def VMOVSRR : AVConv5I<0b11000100, 0b1010,
|
|
|
|
(outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
|
2010-04-08 02:20:02 +08:00
|
|
|
IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
|
2017-01-24 04:20:39 +08:00
|
|
|
[/* For disassembly only; pattern left blank */]>,
|
|
|
|
Sched<[WriteFPMOV]> {
|
2011-08-30 07:15:25 +08:00
|
|
|
// Instruction operands.
|
|
|
|
bits<5> dst1;
|
|
|
|
bits<4> src1;
|
|
|
|
bits<4> src2;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
2012-07-10 20:51:09 +08:00
|
|
|
let Inst{3-0} = dst1{4-1};
|
|
|
|
let Inst{5} = dst1{0};
|
2011-08-30 07:15:25 +08:00
|
|
|
let Inst{15-12} = src1;
|
|
|
|
let Inst{19-16} = src2;
|
|
|
|
|
2010-02-09 01:26:09 +08:00
|
|
|
let Inst{7-6} = 0b00;
|
2011-04-20 02:11:38 +08:00
|
|
|
|
|
|
|
// Some single precision VFP instructions may be executed on both NEON and VFP
|
|
|
|
// pipelines.
|
|
|
|
let D = VFPNeonDomain;
|
2011-08-23 04:27:12 +08:00
|
|
|
|
|
|
|
let DecoderMethod = "DecodeVMOVSRR";
|
2010-02-09 01:26:09 +08:00
|
|
|
}
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
// Move H->R, clearing top 16 bits
|
|
|
|
def VMOVRH : AVConv2I<0b11100001, 0b1001,
|
2018-01-31 18:18:29 +08:00
|
|
|
(outs GPR:$Rt), (ins HPR:$Sn),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpMOVSI, "vmov", ".f16\t$Rt, $Sn",
|
2018-01-31 18:18:29 +08:00
|
|
|
[(set GPR:$Rt, (arm_vmovrh HPR:$Sn))]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasFullFP16]>,
|
|
|
|
Sched<[WriteFPMOV]> {
|
2016-01-25 18:26:26 +08:00
|
|
|
// Instruction operands.
|
|
|
|
bits<4> Rt;
|
|
|
|
bits<5> Sn;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{19-16} = Sn{4-1};
|
|
|
|
let Inst{7} = Sn{0};
|
|
|
|
let Inst{15-12} = Rt;
|
|
|
|
|
|
|
|
let Inst{6-5} = 0b00;
|
|
|
|
let Inst{3-0} = 0b0000;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Move R->H, clearing top 16 bits
|
|
|
|
def VMOVHR : AVConv4I<0b11100000, 0b1001,
|
2018-01-31 18:18:29 +08:00
|
|
|
(outs HPR:$Sn), (ins GPR:$Rt),
|
2016-01-25 18:26:26 +08:00
|
|
|
IIC_fpMOVIS, "vmov", ".f16\t$Sn, $Rt",
|
2018-01-31 18:18:29 +08:00
|
|
|
[(set HPR:$Sn, (arm_vmovhr GPR:$Rt))]>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasFullFP16]>,
|
|
|
|
Sched<[WriteFPMOV]> {
|
2016-01-25 18:26:26 +08:00
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Sn;
|
|
|
|
bits<4> Rt;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{19-16} = Sn{4-1};
|
|
|
|
let Inst{7} = Sn{0};
|
|
|
|
let Inst{15-12} = Rt;
|
|
|
|
|
|
|
|
let Inst{6-5} = 0b00;
|
|
|
|
let Inst{3-0} = 0b0000;
|
|
|
|
}
|
|
|
|
|
2007-01-19 15:51:42 +08:00
|
|
|
// FMRDH: SPR -> GPR
|
|
|
|
// FMRDL: SPR -> GPR
|
|
|
|
// FMRRS: SPR -> GPR
|
2010-10-14 04:58:46 +08:00
|
|
|
// FMRX: SPR system reg -> GPR
|
2007-01-19 15:51:42 +08:00
|
|
|
// FMSRR: GPR -> SPR
|
2010-10-14 04:58:46 +08:00
|
|
|
// FMXR: GPR -> VFP system reg
|
2007-01-19 15:51:42 +08:00
|
|
|
|
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
// Int -> FP:
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
class AVConv1IDs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
|
|
|
|
bits<4> opcod4, dag oops, dag iops,
|
|
|
|
InstrItinClass itin, string opc, string asm,
|
|
|
|
list<dag> pattern>
|
|
|
|
: AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
|
|
|
|
pattern> {
|
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Dd;
|
|
|
|
bits<5> Sm;
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{3-0} = Sm{4-1};
|
|
|
|
let Inst{5} = Sm{0};
|
|
|
|
let Inst{15-12} = Dd{3-0};
|
|
|
|
let Inst{22} = Dd{4};
|
2013-10-24 23:49:39 +08:00
|
|
|
|
|
|
|
let Predicates = [HasVFP2, HasDPVFP];
|
2010-10-14 04:58:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
|
|
|
|
bits<4> opcod4, dag oops, dag iops,InstrItinClass itin,
|
|
|
|
string opc, string asm, list<dag> pattern>
|
|
|
|
: AVConv1In<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
|
|
|
|
pattern> {
|
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Sd;
|
|
|
|
bits<5> Sm;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{3-0} = Sm{4-1};
|
|
|
|
let Inst{5} = Sm{0};
|
|
|
|
let Inst{15-12} = Sd{4-1};
|
|
|
|
let Inst{22} = Sd{0};
|
|
|
|
}
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
class AVConv1IHs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
|
|
|
|
bits<4> opcod4, dag oops, dag iops,
|
|
|
|
InstrItinClass itin, string opc, string asm,
|
|
|
|
list<dag> pattern>
|
|
|
|
: AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
|
|
|
|
pattern> {
|
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Sd;
|
|
|
|
bits<5> Sm;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{3-0} = Sm{4-1};
|
|
|
|
let Inst{5} = Sm{0};
|
|
|
|
let Inst{15-12} = Sd{4-1};
|
|
|
|
let Inst{22} = Sd{0};
|
|
|
|
|
|
|
|
let Predicates = [HasFullFP16];
|
|
|
|
}
|
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
|
|
|
|
(outs DPR:$Dd), (ins SPR:$Sm),
|
|
|
|
IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2010-01-30 07:21:10 +08:00
|
|
|
let Inst{7} = 1; // s32
|
2008-11-12 03:40:26 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2015-03-24 00:15:16 +08:00
|
|
|
let Predicates=[HasVFP2, HasDPVFP] in {
|
|
|
|
def : VFPPat<(f64 (sint_to_fp GPR:$a)),
|
|
|
|
(VSITOD (COPY_TO_REGCLASS GPR:$a, SPR))>;
|
|
|
|
|
2015-10-27 05:32:53 +08:00
|
|
|
def : VFPPat<(f64 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))),
|
2015-03-24 00:15:16 +08:00
|
|
|
(VSITOD (VLDRS addrmode5:$a))>;
|
|
|
|
}
|
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
|
|
|
|
(outs SPR:$Sd),(ins SPR:$Sm),
|
|
|
|
IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2010-01-30 07:21:10 +08:00
|
|
|
let Inst{7} = 1; // s32
|
2011-02-16 08:35:02 +08:00
|
|
|
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2008-11-12 03:40:26 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2015-03-24 00:15:16 +08:00
|
|
|
def : VFPNoNEONPat<(f32 (sint_to_fp GPR:$a)),
|
|
|
|
(VSITOS (COPY_TO_REGCLASS GPR:$a, SPR))>;
|
|
|
|
|
2015-10-27 05:32:53 +08:00
|
|
|
def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))),
|
2015-03-24 00:15:16 +08:00
|
|
|
(VSITOS (VLDRS addrmode5:$a))>;
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VSITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpCVTIH, "vcvt", ".f16.s32\t$Sd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2016-01-25 18:26:26 +08:00
|
|
|
let Inst{7} = 1; // s32
|
|
|
|
}
|
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
|
|
|
|
(outs DPR:$Dd), (ins SPR:$Sm),
|
|
|
|
IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2010-01-30 07:21:10 +08:00
|
|
|
let Inst{7} = 0; // u32
|
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2015-03-24 00:15:16 +08:00
|
|
|
let Predicates=[HasVFP2, HasDPVFP] in {
|
|
|
|
def : VFPPat<(f64 (uint_to_fp GPR:$a)),
|
|
|
|
(VUITOD (COPY_TO_REGCLASS GPR:$a, SPR))>;
|
|
|
|
|
2015-10-27 05:32:53 +08:00
|
|
|
def : VFPPat<(f64 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))),
|
2015-03-24 00:15:16 +08:00
|
|
|
(VUITOD (VLDRS addrmode5:$a))>;
|
|
|
|
}
|
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2010-01-30 07:21:10 +08:00
|
|
|
let Inst{7} = 0; // u32
|
2011-02-16 08:35:02 +08:00
|
|
|
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2010-01-30 07:21:10 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2015-03-24 00:15:16 +08:00
|
|
|
def : VFPNoNEONPat<(f32 (uint_to_fp GPR:$a)),
|
|
|
|
(VUITOS (COPY_TO_REGCLASS GPR:$a, SPR))>;
|
|
|
|
|
2015-10-27 05:32:53 +08:00
|
|
|
def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))),
|
2015-03-24 00:15:16 +08:00
|
|
|
(VUITOS (VLDRS addrmode5:$a))>;
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VUITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpCVTIH, "vcvt", ".f16.u32\t$Sd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2016-01-25 18:26:26 +08:00
|
|
|
let Inst{7} = 0; // u32
|
|
|
|
}
|
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
// FP -> Int:
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
class AVConv1IsD_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
|
|
|
|
bits<4> opcod4, dag oops, dag iops,
|
|
|
|
InstrItinClass itin, string opc, string asm,
|
|
|
|
list<dag> pattern>
|
|
|
|
: AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
|
|
|
|
pattern> {
|
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Sd;
|
|
|
|
bits<5> Dm;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{3-0} = Dm{3-0};
|
|
|
|
let Inst{5} = Dm{4};
|
|
|
|
let Inst{15-12} = Sd{4-1};
|
|
|
|
let Inst{22} = Sd{0};
|
2013-10-24 23:49:39 +08:00
|
|
|
|
|
|
|
let Predicates = [HasVFP2, HasDPVFP];
|
2010-10-14 04:58:46 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
|
|
|
|
bits<4> opcod4, dag oops, dag iops,
|
|
|
|
InstrItinClass itin, string opc, string asm,
|
|
|
|
list<dag> pattern>
|
|
|
|
: AVConv1In<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
|
|
|
|
pattern> {
|
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Sd;
|
|
|
|
bits<5> Sm;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{3-0} = Sm{4-1};
|
|
|
|
let Inst{5} = Sm{0};
|
|
|
|
let Inst{15-12} = Sd{4-1};
|
|
|
|
let Inst{22} = Sd{0};
|
|
|
|
}
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
class AVConv1IsH_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
|
|
|
|
bits<4> opcod4, dag oops, dag iops,
|
|
|
|
InstrItinClass itin, string opc, string asm,
|
|
|
|
list<dag> pattern>
|
|
|
|
: AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
|
|
|
|
pattern> {
|
|
|
|
// Instruction operands.
|
|
|
|
bits<5> Sd;
|
|
|
|
bits<5> Sm;
|
|
|
|
|
|
|
|
// Encode instruction operands.
|
|
|
|
let Inst{3-0} = Sm{4-1};
|
|
|
|
let Inst{5} = Sm{0};
|
|
|
|
let Inst{15-12} = Sd{4-1};
|
|
|
|
let Inst{22} = Sd{0};
|
|
|
|
|
|
|
|
let Predicates = [HasFullFP16];
|
|
|
|
}
|
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
// Always set Z bit in the instruction, i.e. "round towards zero" variants.
|
|
|
|
def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
|
|
|
|
(outs SPR:$Sd), (ins DPR:$Dm),
|
|
|
|
IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2008-11-12 03:40:26 +08:00
|
|
|
let Inst{7} = 1; // Z bit
|
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2015-03-24 00:15:16 +08:00
|
|
|
let Predicates=[HasVFP2, HasDPVFP] in {
|
|
|
|
def : VFPPat<(i32 (fp_to_sint (f64 DPR:$a))),
|
|
|
|
(COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>;
|
2017-09-26 06:07:33 +08:00
|
|
|
|
|
|
|
def : VFPPat<(alignedstore32 (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr),
|
|
|
|
(VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>;
|
2015-03-24 00:15:16 +08:00
|
|
|
}
|
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2008-11-12 03:40:26 +08:00
|
|
|
let Inst{7} = 1; // Z bit
|
2011-02-16 08:35:02 +08:00
|
|
|
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2008-11-12 03:40:26 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2015-03-24 00:15:16 +08:00
|
|
|
def : VFPNoNEONPat<(i32 (fp_to_sint SPR:$a)),
|
|
|
|
(COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>;
|
|
|
|
|
2017-09-26 06:07:33 +08:00
|
|
|
def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))),
|
|
|
|
addrmode5:$ptr),
|
|
|
|
(VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>;
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpCVTHI, "vcvt", ".s32.f16\t$Sd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2016-01-25 18:26:26 +08:00
|
|
|
let Inst{7} = 1; // Z bit
|
|
|
|
}
|
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
|
|
|
|
(outs SPR:$Sd), (ins DPR:$Dm),
|
|
|
|
IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2008-11-12 03:40:26 +08:00
|
|
|
let Inst{7} = 1; // Z bit
|
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2015-03-24 00:15:16 +08:00
|
|
|
let Predicates=[HasVFP2, HasDPVFP] in {
|
|
|
|
def : VFPPat<(i32 (fp_to_uint (f64 DPR:$a))),
|
|
|
|
(COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>;
|
2017-09-26 06:07:33 +08:00
|
|
|
|
|
|
|
def : VFPPat<(alignedstore32 (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr),
|
|
|
|
(VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>;
|
2015-03-24 00:15:16 +08:00
|
|
|
}
|
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2008-11-12 03:40:26 +08:00
|
|
|
let Inst{7} = 1; // Z bit
|
2011-02-16 08:35:02 +08:00
|
|
|
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2008-11-12 03:40:26 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2015-03-24 00:15:16 +08:00
|
|
|
def : VFPNoNEONPat<(i32 (fp_to_uint SPR:$a)),
|
|
|
|
(COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>;
|
|
|
|
|
2017-09-26 06:07:33 +08:00
|
|
|
def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))),
|
|
|
|
addrmode5:$ptr),
|
|
|
|
(VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>;
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpCVTHI, "vcvt", ".u32.f16\t$Sd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2016-01-25 18:26:26 +08:00
|
|
|
let Inst{7} = 1; // Z bit
|
|
|
|
}
|
|
|
|
|
2010-02-09 06:02:41 +08:00
|
|
|
// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
|
2010-08-04 05:31:55 +08:00
|
|
|
let Uses = [FPSCR] in {
|
2010-10-14 04:58:46 +08:00
|
|
|
def VTOSIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
|
|
|
|
(outs SPR:$Sd), (ins DPR:$Dm),
|
|
|
|
IIC_fpCVTDI, "vcvtr", ".s32.f64\t$Sd, $Dm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2010-02-09 06:02:41 +08:00
|
|
|
let Inst{7} = 0; // Z bit
|
|
|
|
}
|
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
def VTOSIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpCVTSI, "vcvtr", ".s32.f32\t$Sd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2010-02-09 06:02:41 +08:00
|
|
|
let Inst{7} = 0; // Z bit
|
|
|
|
}
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VTOSIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpCVTHI, "vcvtr", ".s32.f16\t$Sd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2016-01-25 18:26:26 +08:00
|
|
|
let Inst{7} = 0; // Z bit
|
|
|
|
}
|
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
|
|
|
|
(outs SPR:$Sd), (ins DPR:$Dm),
|
|
|
|
IIC_fpCVTDI, "vcvtr", ".u32.f64\t$Sd, $Dm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2010-02-09 06:02:41 +08:00
|
|
|
let Inst{7} = 0; // Z bit
|
|
|
|
}
|
|
|
|
|
2010-10-14 04:58:46 +08:00
|
|
|
def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpCVTSI, "vcvtr", ".u32.f32\t$Sd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2010-02-09 06:02:41 +08:00
|
|
|
let Inst{7} = 0; // Z bit
|
|
|
|
}
|
2016-01-25 18:26:26 +08:00
|
|
|
|
|
|
|
def VTOUIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sm),
|
|
|
|
IIC_fpCVTHI, "vcvtr", ".u32.f16\t$Sd, $Sm",
|
2017-01-24 04:20:39 +08:00
|
|
|
[]>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2016-01-25 18:26:26 +08:00
|
|
|
let Inst{7} = 0; // Z bit
|
|
|
|
}
|
2010-08-04 05:31:55 +08:00
|
|
|
}
|
2010-02-09 06:02:41 +08:00
|
|
|
|
2017-08-22 19:08:21 +08:00
|
|
|
// v8.3-a Javascript Convert to Signed fixed-point
|
|
|
|
def VJCVT : AVConv1IsD_Encode<0b11101, 0b11, 0b1001, 0b1011,
|
|
|
|
(outs SPR:$Sd), (ins DPR:$Dm),
|
|
|
|
IIC_fpCVTDI, "vjcvt", ".s32.f64\t$Sd, $Dm",
|
|
|
|
[]>,
|
|
|
|
Requires<[HasFPARMv8, HasV8_3a]> {
|
|
|
|
let Inst{7} = 1; // Z bit
|
|
|
|
}
|
|
|
|
|
2010-02-12 02:17:16 +08:00
|
|
|
// Convert between floating-point and fixed-point
|
|
|
|
// Data type for fixed-point naming convention:
|
|
|
|
// S16 (U=0, sx=0) -> SH
|
|
|
|
// U16 (U=1, sx=0) -> UH
|
|
|
|
// S32 (U=0, sx=1) -> SL
|
|
|
|
// U32 (U=1, sx=1) -> UL
|
|
|
|
|
2011-12-23 03:45:01 +08:00
|
|
|
let Constraints = "$a = $dst" in {
|
2010-02-12 02:17:16 +08:00
|
|
|
|
|
|
|
// FP to Fixed-Point:
|
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
// Single Precision register
|
2012-04-24 06:04:10 +08:00
|
|
|
class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
|
|
|
|
bit op5, dag oops, dag iops, InstrItinClass itin,
|
|
|
|
string opc, string asm, list<dag> pattern>
|
2017-01-24 04:20:39 +08:00
|
|
|
: AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
|
2012-03-16 01:50:29 +08:00
|
|
|
bits<5> dst;
|
|
|
|
// if dp_operation then UInt(D:Vd) else UInt(Vd:D);
|
|
|
|
let Inst{22} = dst{0};
|
|
|
|
let Inst{15-12} = dst{4-1};
|
|
|
|
}
|
|
|
|
|
|
|
|
// Double Precision register
|
2012-04-24 06:04:10 +08:00
|
|
|
class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
|
|
|
|
bit op5, dag oops, dag iops, InstrItinClass itin,
|
|
|
|
string opc, string asm, list<dag> pattern>
|
2017-01-24 04:20:39 +08:00
|
|
|
: AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
|
2012-03-16 01:50:29 +08:00
|
|
|
bits<5> dst;
|
|
|
|
// if dp_operation then UInt(D:Vd) else UInt(Vd:D);
|
|
|
|
let Inst{22} = dst{4};
|
|
|
|
let Inst{15-12} = dst{3-0};
|
2013-10-24 23:49:39 +08:00
|
|
|
|
|
|
|
let Predicates = [HasVFP2, HasDPVFP];
|
2012-03-16 01:50:29 +08:00
|
|
|
}
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VTOSHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 0,
|
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
|
|
|
|
IIC_fpCVTHI, "vcvt", ".s16.f16\t$dst, $a, $fbits", []>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasFullFP16]>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
|
|
|
def VTOUHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 0,
|
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
|
|
|
|
IIC_fpCVTHI, "vcvt", ".u16.f16\t$dst, $a, $fbits", []>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasFullFP16]>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
|
|
|
def VTOSLH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 1,
|
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
|
|
|
|
IIC_fpCVTHI, "vcvt", ".s32.f16\t$dst, $a, $fbits", []>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasFullFP16]>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
|
|
|
def VTOULH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 1,
|
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
|
|
|
|
IIC_fpCVTHI, "vcvt", ".u32.f16\t$dst, $a, $fbits", []>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasFullFP16]>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
|
2017-01-24 04:20:39 +08:00
|
|
|
IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VTOUHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 0,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
|
2011-12-23 03:45:01 +08:00
|
|
|
IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", []> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VTOSLS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 1,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
|
2011-12-23 03:45:01 +08:00
|
|
|
IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", []> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VTOULS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 1,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
|
2011-12-23 03:45:01 +08:00
|
|
|
IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", []> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VTOSHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 0,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
|
2017-01-24 04:20:39 +08:00
|
|
|
IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VTOUHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 0,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
|
2017-01-24 04:20:39 +08:00
|
|
|
IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VTOSLD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 1,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
|
2017-01-24 04:20:39 +08:00
|
|
|
IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VTOULD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 1,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
|
2017-01-24 04:20:39 +08:00
|
|
|
IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2010-02-12 02:17:16 +08:00
|
|
|
|
|
|
|
// Fixed-Point to FP:
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VSHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 0,
|
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
|
|
|
|
IIC_fpCVTIH, "vcvt", ".f16.s16\t$dst, $a, $fbits", []>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasFullFP16]>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
|
|
|
def VUHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 0,
|
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
|
|
|
|
IIC_fpCVTIH, "vcvt", ".f16.u16\t$dst, $a, $fbits", []>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasFullFP16]>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
|
|
|
def VSLTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 1,
|
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
|
|
|
|
IIC_fpCVTIH, "vcvt", ".f16.s32\t$dst, $a, $fbits", []>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasFullFP16]>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
|
|
|
def VULTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 1,
|
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
|
|
|
|
IIC_fpCVTIH, "vcvt", ".f16.u32\t$dst, $a, $fbits", []>,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasFullFP16]>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
|
2017-01-24 04:20:39 +08:00
|
|
|
IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VUHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 0,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
|
2017-01-24 04:20:39 +08:00
|
|
|
IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VSLTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 1,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
|
2017-01-24 04:20:39 +08:00
|
|
|
IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VULTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 1,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
|
2017-01-24 04:20:39 +08:00
|
|
|
IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VSHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 0,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
|
2017-01-24 04:20:39 +08:00
|
|
|
IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VUHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 0,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
|
2017-01-24 04:20:39 +08:00
|
|
|
IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VSLTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 1,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
|
2017-01-24 04:20:39 +08:00
|
|
|
IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2012-03-16 01:50:29 +08:00
|
|
|
def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1,
|
2011-12-23 06:19:05 +08:00
|
|
|
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
|
2017-01-24 04:20:39 +08:00
|
|
|
IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>,
|
|
|
|
Sched<[WriteFPCVT]>;
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2011-12-23 03:45:01 +08:00
|
|
|
} // End of 'let Constraints = "$a = $dst" in'
|
2010-02-12 02:17:16 +08:00
|
|
|
|
2007-01-19 15:51:42 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
2011-07-07 16:28:52 +08:00
|
|
|
// FP Multiply-Accumulate Operations.
|
2007-01-19 15:51:42 +08:00
|
|
|
//
|
|
|
|
|
2010-11-13 04:32:20 +08:00
|
|
|
def VMLAD : ADbI<0b11100, 0b00, 0, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
|
|
|
IIC_fpMAC64, "vmla", ".f64\t$Dd, $Dn, $Dm",
|
2010-12-06 06:04:16 +08:00
|
|
|
[(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
|
|
|
|
(f64 DPR:$Ddin)))]>,
|
2010-11-13 04:32:20 +08:00
|
|
|
RegConstraint<"$Ddin = $Dd">,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
2010-10-14 09:02:08 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpMAC32, "vmla", ".f32\t$Sd, $Sn, $Sm",
|
2010-12-06 06:04:16 +08:00
|
|
|
[(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
|
|
|
|
SPR:$Sdin))]>,
|
2010-11-13 04:32:20 +08:00
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-10-14 09:02:08 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VMLAH : AHbI<0b11100, 0b00, 0, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpMAC16, "vmla", ".f16\t$Sd, $Sn, $Sm",
|
|
|
|
[]>,
|
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
|
|
|
Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>;
|
|
|
|
|
2010-12-06 06:04:16 +08:00
|
|
|
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
|
2010-11-13 04:32:20 +08:00
|
|
|
(VMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
|
2010-12-06 06:04:16 +08:00
|
|
|
def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
|
2010-11-13 04:32:20 +08:00
|
|
|
(VMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
|
2012-04-11 13:33:07 +08:00
|
|
|
Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,DontUseFusedMAC]>;
|
2010-10-14 09:02:08 +08:00
|
|
|
|
2010-11-13 04:32:20 +08:00
|
|
|
def VMLSD : ADbI<0b11100, 0b00, 1, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
|
|
|
IIC_fpMAC64, "vmls", ".f64\t$Dd, $Dn, $Dm",
|
2010-12-06 06:04:16 +08:00
|
|
|
[(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
|
|
|
|
(f64 DPR:$Ddin)))]>,
|
2010-11-13 04:32:20 +08:00
|
|
|
RegConstraint<"$Ddin = $Dd">,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
2010-10-14 09:02:08 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpMAC32, "vmls", ".f32\t$Sd, $Sn, $Sm",
|
2010-12-06 06:04:16 +08:00
|
|
|
[(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
|
|
|
|
SPR:$Sdin))]>,
|
2010-11-13 04:32:20 +08:00
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VMLSH : AHbI<0b11100, 0b00, 1, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpMAC16, "vmls", ".f16\t$Sd, $Sn, $Sm",
|
|
|
|
[]>,
|
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
|
|
|
Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>;
|
|
|
|
|
2010-12-06 06:04:16 +08:00
|
|
|
def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
|
2010-11-13 04:32:20 +08:00
|
|
|
(VMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
|
2010-12-06 06:04:16 +08:00
|
|
|
def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
|
2010-11-13 04:32:20 +08:00
|
|
|
(VMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
|
2012-04-11 13:33:07 +08:00
|
|
|
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
|
2009-08-05 02:44:29 +08:00
|
|
|
|
2010-11-13 04:32:20 +08:00
|
|
|
def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
|
|
|
IIC_fpMAC64, "vnmla", ".f64\t$Dd, $Dn, $Dm",
|
2010-12-06 06:04:16 +08:00
|
|
|
[(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
|
|
|
|
(f64 DPR:$Ddin)))]>,
|
2010-11-13 04:32:20 +08:00
|
|
|
RegConstraint<"$Ddin = $Dd">,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
2010-10-14 09:02:08 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpMAC32, "vnmla", ".f32\t$Sd, $Sn, $Sm",
|
2010-12-06 06:04:16 +08:00
|
|
|
[(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
|
|
|
|
SPR:$Sdin))]>,
|
2010-11-13 04:32:20 +08:00
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-10-14 09:02:08 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VNMLAH : AHbI<0b11100, 0b01, 1, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpMAC16, "vnmla", ".f16\t$Sd, $Sn, $Sm",
|
|
|
|
[]>,
|
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
|
|
|
Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>;
|
|
|
|
|
[ARM] Add missing selection patterns for vnmla
For the following function:
double fn1(double d0, double d1, double d2) {
double a = -d0 - d1 * d2;
return a;
}
on ARM, LLVM generates code along the lines of
vneg.f64 d0, d0
vmls.f64 d0, d1, d2
i.e., a negate and a multiply-subtract.
The attached patch adds instruction selection patterns to allow it to generate the single instruction
vnmla.f64 d0, d1, d2
(multiply-add with negation) instead, like GCC does.
Committed on behalf of @gergo- (Gergö Barany)
Differential Revision: https://reviews.llvm.org/D35911
llvm-svn: 313972
2017-09-22 17:50:52 +08:00
|
|
|
// (-(a * b) - dst) -> -(dst + (a * b))
|
2010-12-06 06:04:16 +08:00
|
|
|
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
|
2010-11-13 04:32:20 +08:00
|
|
|
(VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
|
2010-12-06 06:04:16 +08:00
|
|
|
def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
|
2010-11-13 04:32:20 +08:00
|
|
|
(VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
|
2012-04-11 13:33:07 +08:00
|
|
|
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
|
2010-10-14 09:02:08 +08:00
|
|
|
|
[ARM] Add missing selection patterns for vnmla
For the following function:
double fn1(double d0, double d1, double d2) {
double a = -d0 - d1 * d2;
return a;
}
on ARM, LLVM generates code along the lines of
vneg.f64 d0, d0
vmls.f64 d0, d1, d2
i.e., a negate and a multiply-subtract.
The attached patch adds instruction selection patterns to allow it to generate the single instruction
vnmla.f64 d0, d1, d2
(multiply-add with negation) instead, like GCC does.
Committed on behalf of @gergo- (Gergö Barany)
Differential Revision: https://reviews.llvm.org/D35911
llvm-svn: 313972
2017-09-22 17:50:52 +08:00
|
|
|
// (-dst - (a * b)) -> -(dst + (a * b))
|
|
|
|
def : Pat<(fsub_mlx (fneg DPR:$dstin), (fmul_su DPR:$a, (f64 DPR:$b))),
|
|
|
|
(VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
|
|
|
|
Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
|
|
|
|
def : Pat<(fsub_mlx (fneg SPR:$dstin), (fmul_su SPR:$a, SPR:$b)),
|
|
|
|
(VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
|
|
|
|
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
|
|
|
|
|
2010-11-13 04:32:20 +08:00
|
|
|
def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
|
|
|
IIC_fpMAC64, "vnmls", ".f64\t$Dd, $Dn, $Dm",
|
2010-12-06 06:04:16 +08:00
|
|
|
[(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
|
|
|
|
(f64 DPR:$Ddin)))]>,
|
2010-11-13 04:32:20 +08:00
|
|
|
RegConstraint<"$Ddin = $Dd">,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
2010-10-14 09:02:08 +08:00
|
|
|
|
2010-11-01 14:00:39 +08:00
|
|
|
def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
|
2010-12-06 06:04:16 +08:00
|
|
|
[(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
|
2010-11-13 04:32:20 +08:00
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
|
2011-02-23 03:53:14 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines on A8.
|
|
|
|
let D = VFPNeonA8Domain;
|
2011-02-16 08:35:02 +08:00
|
|
|
}
|
2010-10-14 09:02:08 +08:00
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VNMLSH : AHbI<0b11100, 0b01, 0, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpMAC16, "vnmls", ".f16\t$Sd, $Sn, $Sm",
|
|
|
|
[]>,
|
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
|
|
|
Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>;
|
|
|
|
|
2010-12-06 06:04:16 +08:00
|
|
|
def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
|
2010-11-13 04:32:20 +08:00
|
|
|
(VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
|
2010-12-06 06:04:16 +08:00
|
|
|
def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
|
2010-11-13 04:32:20 +08:00
|
|
|
(VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
|
2012-04-11 13:33:07 +08:00
|
|
|
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2012-01-22 20:07:33 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Fused FP Multiply-Accumulate Operations.
|
|
|
|
//
|
|
|
|
def VFMAD : ADbI<0b11101, 0b10, 0, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
|
|
|
IIC_fpFMAC64, "vfma", ".f64\t$Dd, $Dn, $Dm",
|
|
|
|
[(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
|
|
|
|
(f64 DPR:$Ddin)))]>,
|
|
|
|
RegConstraint<"$Ddin = $Dd">,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
2012-01-22 20:07:33 +08:00
|
|
|
|
|
|
|
def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpFMAC32, "vfma", ".f32\t$Sd, $Sn, $Sm",
|
|
|
|
[(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
|
|
|
|
SPR:$Sdin))]>,
|
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
|
2012-01-22 20:07:33 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines.
|
|
|
|
}
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VFMAH : AHbI<0b11101, 0b10, 0, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpFMAC16, "vfma", ".f16\t$Sd, $Sn, $Sm",
|
|
|
|
[]>,
|
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
2017-01-24 04:20:39 +08:00
|
|
|
Requires<[HasFullFP16,UseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2012-01-22 20:07:33 +08:00
|
|
|
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
|
|
|
|
(VFMAD DPR:$dstin, DPR:$a, DPR:$b)>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
|
2012-01-22 20:07:33 +08:00
|
|
|
def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
|
|
|
|
(VFMAS SPR:$dstin, SPR:$a, SPR:$b)>,
|
2012-04-11 13:33:07 +08:00
|
|
|
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
|
2012-01-22 20:07:33 +08:00
|
|
|
|
2012-04-11 05:40:28 +08:00
|
|
|
// Match @llvm.fma.* intrinsics
|
2012-04-28 02:51:24 +08:00
|
|
|
// (fma x, y, z) -> (vfms z, x, y)
|
|
|
|
def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, DPR:$Ddin)),
|
2012-04-11 05:40:28 +08:00
|
|
|
(VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP]>;
|
2012-04-28 02:51:24 +08:00
|
|
|
def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, SPR:$Sdin)),
|
2012-04-11 05:40:28 +08:00
|
|
|
(VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
|
|
|
|
Requires<[HasVFP4]>;
|
|
|
|
|
2012-01-22 20:07:33 +08:00
|
|
|
def VFMSD : ADbI<0b11101, 0b10, 1, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
|
|
|
IIC_fpFMAC64, "vfms", ".f64\t$Dd, $Dn, $Dm",
|
|
|
|
[(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
|
|
|
|
(f64 DPR:$Ddin)))]>,
|
|
|
|
RegConstraint<"$Ddin = $Dd">,
|
2017-06-09 17:19:09 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
2012-01-22 20:07:33 +08:00
|
|
|
|
|
|
|
def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpFMAC32, "vfms", ".f32\t$Sd, $Sn, $Sm",
|
|
|
|
[(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
|
|
|
|
SPR:$Sdin))]>,
|
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
2017-06-09 17:19:09 +08:00
|
|
|
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
|
2012-01-22 20:07:33 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines.
|
|
|
|
}
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VFMSH : AHbI<0b11101, 0b10, 1, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpFMAC16, "vfms", ".f16\t$Sd, $Sn, $Sm",
|
|
|
|
[]>,
|
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
2017-06-09 17:19:09 +08:00
|
|
|
Requires<[HasFullFP16,UseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2012-01-22 20:07:33 +08:00
|
|
|
def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
|
|
|
|
(VFMSD DPR:$dstin, DPR:$a, DPR:$b)>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
|
2012-01-22 20:07:33 +08:00
|
|
|
def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
|
|
|
|
(VFMSS SPR:$dstin, SPR:$a, SPR:$b)>,
|
2012-04-11 13:33:07 +08:00
|
|
|
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
|
2012-01-22 20:07:33 +08:00
|
|
|
|
2012-04-11 14:59:47 +08:00
|
|
|
// Match @llvm.fma.* intrinsics
|
2012-04-28 02:51:24 +08:00
|
|
|
// (fma (fneg x), y, z) -> (vfms z, x, y)
|
|
|
|
def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin)),
|
2012-04-11 14:59:47 +08:00
|
|
|
(VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP]>;
|
2012-04-28 02:51:24 +08:00
|
|
|
def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin)),
|
2012-04-11 14:59:47 +08:00
|
|
|
(VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
|
|
|
|
Requires<[HasVFP4]>;
|
2012-04-28 02:51:24 +08:00
|
|
|
// (fma x, (fneg y), z) -> (vfms z, x, y)
|
|
|
|
def : Pat<(f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin)),
|
2012-04-11 14:59:47 +08:00
|
|
|
(VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP]>;
|
2012-04-28 02:51:24 +08:00
|
|
|
def : Pat<(f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin)),
|
2012-04-11 14:59:47 +08:00
|
|
|
(VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
|
|
|
|
Requires<[HasVFP4]>;
|
|
|
|
|
2012-01-22 20:07:33 +08:00
|
|
|
def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
|
|
|
IIC_fpFMAC64, "vfnma", ".f64\t$Dd, $Dn, $Dm",
|
|
|
|
[(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
|
|
|
|
(f64 DPR:$Ddin)))]>,
|
|
|
|
RegConstraint<"$Ddin = $Dd">,
|
2017-06-13 21:04:32 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
2012-01-22 20:07:33 +08:00
|
|
|
|
|
|
|
def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpFMAC32, "vfnma", ".f32\t$Sd, $Sn, $Sm",
|
|
|
|
[(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
|
|
|
|
SPR:$Sdin))]>,
|
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
2017-06-13 21:04:32 +08:00
|
|
|
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
|
2012-01-22 20:07:33 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines.
|
|
|
|
}
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VFNMAH : AHbI<0b11101, 0b01, 1, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpFMAC16, "vfnma", ".f16\t$Sd, $Sn, $Sm",
|
|
|
|
[]>,
|
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
2017-06-13 21:04:32 +08:00
|
|
|
Requires<[HasFullFP16,UseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2012-01-22 20:07:33 +08:00
|
|
|
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
|
|
|
|
(VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
|
2012-01-22 20:07:33 +08:00
|
|
|
def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
|
|
|
|
(VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>,
|
2012-04-11 13:33:07 +08:00
|
|
|
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
|
2012-01-22 20:07:33 +08:00
|
|
|
|
2012-04-11 09:21:25 +08:00
|
|
|
// Match @llvm.fma.* intrinsics
|
2012-04-28 02:51:24 +08:00
|
|
|
// (fneg (fma x, y, z)) -> (vfnma z, x, y)
|
|
|
|
def : Pat<(fneg (fma (f64 DPR:$Dn), (f64 DPR:$Dm), (f64 DPR:$Ddin))),
|
2012-04-11 09:21:25 +08:00
|
|
|
(VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP]>;
|
2012-04-28 02:51:24 +08:00
|
|
|
def : Pat<(fneg (fma (f32 SPR:$Sn), (f32 SPR:$Sm), (f32 SPR:$Sdin))),
|
2012-04-11 09:21:25 +08:00
|
|
|
(VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
|
|
|
|
Requires<[HasVFP4]>;
|
2012-04-28 02:51:24 +08:00
|
|
|
// (fma (fneg x), y, (fneg z)) -> (vfnma z, x, y)
|
|
|
|
def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, (fneg DPR:$Ddin))),
|
2012-04-11 14:59:47 +08:00
|
|
|
(VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP]>;
|
2012-04-28 02:51:24 +08:00
|
|
|
def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, (fneg SPR:$Sdin))),
|
2012-04-11 14:59:47 +08:00
|
|
|
(VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
|
|
|
|
Requires<[HasVFP4]>;
|
2012-04-11 09:21:25 +08:00
|
|
|
|
2012-01-22 20:07:33 +08:00
|
|
|
def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
|
|
|
|
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
|
|
|
|
IIC_fpFMAC64, "vfnms", ".f64\t$Dd, $Dn, $Dm",
|
|
|
|
[(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
|
|
|
|
(f64 DPR:$Ddin)))]>,
|
|
|
|
RegConstraint<"$Ddin = $Dd">,
|
2017-06-13 21:04:32 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
2012-01-22 20:07:33 +08:00
|
|
|
|
|
|
|
def VFNMSS : ASbI<0b11101, 0b01, 0, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm",
|
|
|
|
[(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
|
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
2017-06-13 21:04:32 +08:00
|
|
|
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
|
2012-01-22 20:07:33 +08:00
|
|
|
// Some single precision VFP instructions may be executed on both NEON and
|
|
|
|
// VFP pipelines.
|
|
|
|
}
|
|
|
|
|
2016-01-25 18:26:26 +08:00
|
|
|
def VFNMSH : AHbI<0b11101, 0b01, 0, 0,
|
|
|
|
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
|
|
|
|
IIC_fpFMAC16, "vfnms", ".f16\t$Sd, $Sn, $Sm",
|
|
|
|
[]>,
|
|
|
|
RegConstraint<"$Sdin = $Sd">,
|
2017-06-13 21:04:32 +08:00
|
|
|
Requires<[HasFullFP16,UseFusedMAC]>,
|
|
|
|
Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
|
2016-01-25 18:26:26 +08:00
|
|
|
|
2012-01-22 20:07:33 +08:00
|
|
|
def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
|
|
|
|
(VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
|
2012-01-22 20:07:33 +08:00
|
|
|
def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
|
|
|
|
(VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>,
|
2012-04-11 13:33:07 +08:00
|
|
|
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
|
2007-01-19 15:51:42 +08:00
|
|
|
|
2012-04-11 14:59:47 +08:00
|
|
|
// Match @llvm.fma.* intrinsics
|
2012-06-21 14:10:00 +08:00
|
|
|
|
|
|
|
// (fma x, y, (fneg z)) -> (vfnms z, x, y))
|
|
|
|
def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, (fneg DPR:$Ddin))),
|
|
|
|
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP]>;
|
2012-06-21 14:10:00 +08:00
|
|
|
def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, (fneg SPR:$Sdin))),
|
|
|
|
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
|
|
|
|
Requires<[HasVFP4]>;
|
2012-04-28 02:51:24 +08:00
|
|
|
// (fneg (fma (fneg x), y, z)) -> (vfnms z, x, y)
|
|
|
|
def : Pat<(fneg (f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))),
|
2012-04-11 14:59:47 +08:00
|
|
|
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP]>;
|
2012-04-28 02:51:24 +08:00
|
|
|
def : Pat<(fneg (f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin))),
|
2012-04-11 14:59:47 +08:00
|
|
|
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
|
|
|
|
Requires<[HasVFP4]>;
|
2012-04-28 02:51:24 +08:00
|
|
|
// (fneg (fma x, (fneg y), z) -> (vfnms z, x, y)
|
|
|
|
def : Pat<(fneg (f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin))),
|
2012-04-11 14:59:47 +08:00
|
|
|
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
|
2013-10-24 23:49:39 +08:00
|
|
|
Requires<[HasVFP4,HasDPVFP]>;
|
2012-04-28 02:51:24 +08:00
|
|
|
def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))),
|
2012-04-11 14:59:47 +08:00
|
|
|
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
|
|
|
|
Requires<[HasVFP4]>;
|
|
|
|
|
2007-01-19 15:51:42 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// FP Conditional moves.
|
|
|
|
//
|
|
|
|
|
2014-11-26 08:46:26 +08:00
|
|
|
let hasSideEffects = 0 in {
|
2013-08-22 17:57:11 +08:00
|
|
|
def VMOVDcc : PseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, cmovpred:$p),
|
|
|
|
IIC_fpUNA64,
|
|
|
|
[(set (f64 DPR:$Dd),
|
|
|
|
(ARMcmov DPR:$Dn, DPR:$Dm, cmovpred:$p))]>,
|
2013-10-24 23:49:39 +08:00
|
|
|
RegConstraint<"$Dn = $Dd">, Requires<[HasVFP2,HasDPVFP]>;
|
2013-08-22 17:57:11 +08:00
|
|
|
|
|
|
|
def VMOVScc : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p),
|
|
|
|
IIC_fpUNA32,
|
|
|
|
[(set (f32 SPR:$Sd),
|
|
|
|
(ARMcmov SPR:$Sn, SPR:$Sm, cmovpred:$p))]>,
|
|
|
|
RegConstraint<"$Sn = $Sd">, Requires<[HasVFP2]>;
|
2014-11-26 08:46:26 +08:00
|
|
|
} // hasSideEffects
|
2008-11-12 03:40:26 +08:00
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
2011-01-19 05:58:20 +08:00
|
|
|
// Move from VFP System Register to ARM core register.
|
2008-11-12 03:40:26 +08:00
|
|
|
//
|
|
|
|
|
2011-01-19 05:58:20 +08:00
|
|
|
class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
|
|
|
|
list<dag> pattern>:
|
|
|
|
VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> {
|
2009-10-28 09:44:26 +08:00
|
|
|
|
2010-10-14 09:02:08 +08:00
|
|
|
// Instruction operand.
|
|
|
|
bits<4> Rt;
|
|
|
|
|
2010-02-10 06:35:38 +08:00
|
|
|
let Inst{27-20} = 0b11101111;
|
2011-01-19 05:58:20 +08:00
|
|
|
let Inst{19-16} = opc19_16;
|
|
|
|
let Inst{15-12} = Rt;
|
2010-02-10 06:35:38 +08:00
|
|
|
let Inst{11-8} = 0b1010;
|
|
|
|
let Inst{7} = 0;
|
2010-10-14 09:02:08 +08:00
|
|
|
let Inst{6-5} = 0b00;
|
2010-02-10 06:35:38 +08:00
|
|
|
let Inst{4} = 1;
|
2010-10-14 09:02:08 +08:00
|
|
|
let Inst{3-0} = 0b0000;
|
2010-02-10 06:35:38 +08:00
|
|
|
}
|
|
|
|
|
2011-01-19 05:58:20 +08:00
|
|
|
// APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
|
|
|
|
// to APSR.
|
2012-03-06 08:19:55 +08:00
|
|
|
let Defs = [CPSR], Uses = [FPSCR_NZCV], Rt = 0b1111 /* apsr_nzcv */ in
|
2011-01-19 05:58:20 +08:00
|
|
|
def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
|
2012-03-16 05:34:14 +08:00
|
|
|
"vmrs", "\tAPSR_nzcv, fpscr", [(arm_fmstat)]>;
|
2011-01-19 05:58:20 +08:00
|
|
|
|
2017-09-22 20:17:42 +08:00
|
|
|
let DecoderMethod = "DecodeForVMRSandVMSR" in {
|
|
|
|
// Application level FPSCR -> GPR
|
|
|
|
let hasSideEffects = 1, Uses = [FPSCR] in
|
|
|
|
def VMRS : MovFromVFP<0b0001 /* fpscr */, (outs GPRnopc:$Rt), (ins),
|
|
|
|
"vmrs", "\t$Rt, fpscr",
|
|
|
|
[(set GPRnopc:$Rt, (int_arm_get_fpscr))]>;
|
|
|
|
|
|
|
|
// System level FPEXC, FPSID -> GPR
|
|
|
|
let Uses = [FPSCR] in {
|
|
|
|
def VMRS_FPEXC : MovFromVFP<0b1000 /* fpexc */, (outs GPRnopc:$Rt), (ins),
|
|
|
|
"vmrs", "\t$Rt, fpexc", []>;
|
|
|
|
def VMRS_FPSID : MovFromVFP<0b0000 /* fpsid */, (outs GPRnopc:$Rt), (ins),
|
|
|
|
"vmrs", "\t$Rt, fpsid", []>;
|
|
|
|
def VMRS_MVFR0 : MovFromVFP<0b0111 /* mvfr0 */, (outs GPRnopc:$Rt), (ins),
|
2017-08-09 01:16:46 +08:00
|
|
|
"vmrs", "\t$Rt, mvfr0", []>;
|
2017-09-22 20:17:42 +08:00
|
|
|
def VMRS_MVFR1 : MovFromVFP<0b0110 /* mvfr1 */, (outs GPRnopc:$Rt), (ins),
|
|
|
|
"vmrs", "\t$Rt, mvfr1", []>;
|
|
|
|
let Predicates = [HasFPARMv8] in {
|
|
|
|
def VMRS_MVFR2 : MovFromVFP<0b0101 /* mvfr2 */, (outs GPRnopc:$Rt), (ins),
|
|
|
|
"vmrs", "\t$Rt, mvfr2", []>;
|
|
|
|
}
|
|
|
|
def VMRS_FPINST : MovFromVFP<0b1001 /* fpinst */, (outs GPRnopc:$Rt), (ins),
|
|
|
|
"vmrs", "\t$Rt, fpinst", []>;
|
|
|
|
def VMRS_FPINST2 : MovFromVFP<0b1010 /* fpinst2 */, (outs GPRnopc:$Rt),
|
|
|
|
(ins), "vmrs", "\t$Rt, fpinst2", []>;
|
|
|
|
}
|
2011-01-19 05:58:20 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Move from ARM core register to VFP System Register.
|
|
|
|
//
|
|
|
|
|
|
|
|
class MovToVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
|
|
|
|
list<dag> pattern>:
|
|
|
|
VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> {
|
|
|
|
|
2010-10-14 09:02:08 +08:00
|
|
|
// Instruction operand.
|
|
|
|
bits<4> src;
|
|
|
|
|
|
|
|
// Encode instruction operand.
|
|
|
|
let Inst{15-12} = src;
|
|
|
|
|
2010-02-10 06:35:38 +08:00
|
|
|
let Inst{27-20} = 0b11101110;
|
2011-01-19 05:58:20 +08:00
|
|
|
let Inst{19-16} = opc19_16;
|
2010-02-10 06:35:38 +08:00
|
|
|
let Inst{11-8} = 0b1010;
|
|
|
|
let Inst{7} = 0;
|
|
|
|
let Inst{4} = 1;
|
|
|
|
}
|
2009-10-28 09:44:26 +08:00
|
|
|
|
2017-09-22 20:17:42 +08:00
|
|
|
let DecoderMethod = "DecodeForVMRSandVMSR" in {
|
|
|
|
let Defs = [FPSCR] in {
|
|
|
|
// Application level GPR -> FPSCR
|
|
|
|
def VMSR : MovToVFP<0b0001 /* fpscr */, (outs), (ins GPRnopc:$src),
|
|
|
|
"vmsr", "\tfpscr, $src",
|
|
|
|
[(int_arm_set_fpscr GPRnopc:$src)]>;
|
|
|
|
// System level GPR -> FPEXC
|
|
|
|
def VMSR_FPEXC : MovToVFP<0b1000 /* fpexc */, (outs), (ins GPRnopc:$src),
|
|
|
|
"vmsr", "\tfpexc, $src", []>;
|
|
|
|
// System level GPR -> FPSID
|
|
|
|
def VMSR_FPSID : MovToVFP<0b0000 /* fpsid */, (outs), (ins GPRnopc:$src),
|
|
|
|
"vmsr", "\tfpsid, $src", []>;
|
|
|
|
def VMSR_FPINST : MovToVFP<0b1001 /* fpinst */, (outs), (ins GPRnopc:$src),
|
2013-06-11 17:39:51 +08:00
|
|
|
"vmsr", "\tfpinst, $src", []>;
|
2017-09-22 20:17:42 +08:00
|
|
|
def VMSR_FPINST2 : MovToVFP<0b1010 /* fpinst2 */, (outs), (ins GPRnopc:$src),
|
|
|
|
"vmsr", "\tfpinst2, $src", []>;
|
|
|
|
}
|
2011-01-19 05:58:20 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Misc.
|
|
|
|
//
|
|
|
|
|
2009-10-28 09:44:26 +08:00
|
|
|
// Materialize FP immediates. VFP3 only.
|
2009-11-09 08:11:35 +08:00
|
|
|
let isReMaterializable = 1 in {
|
2010-10-14 10:33:26 +08:00
|
|
|
def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm),
|
2010-04-08 02:19:56 +08:00
|
|
|
VFPMiscFrm, IIC_fpUNA64,
|
2010-10-14 10:33:26 +08:00
|
|
|
"vmov", ".f64\t$Dd, $imm",
|
2013-10-24 23:49:39 +08:00
|
|
|
[(set DPR:$Dd, vfp_f64imm:$imm)]>,
|
|
|
|
Requires<[HasVFP3,HasDPVFP]> {
|
2011-09-30 08:50:06 +08:00
|
|
|
bits<5> Dd;
|
|
|
|
bits<8> imm;
|
2010-10-14 10:33:26 +08:00
|
|
|
|
2009-10-28 09:44:26 +08:00
|
|
|
let Inst{27-23} = 0b11101;
|
2011-09-30 08:50:06 +08:00
|
|
|
let Inst{22} = Dd{4};
|
2009-10-28 09:44:26 +08:00
|
|
|
let Inst{21-20} = 0b11;
|
2011-09-30 08:50:06 +08:00
|
|
|
let Inst{19-16} = imm{7-4};
|
|
|
|
let Inst{15-12} = Dd{3-0};
|
2009-10-28 09:44:26 +08:00
|
|
|
let Inst{11-9} = 0b101;
|
2010-10-14 10:33:26 +08:00
|
|
|
let Inst{8} = 1; // Double precision.
|
2009-10-28 09:44:26 +08:00
|
|
|
let Inst{7-4} = 0b0000;
|
2011-09-30 08:50:06 +08:00
|
|
|
let Inst{3-0} = imm{3-0};
|
2009-10-28 09:44:26 +08:00
|
|
|
}
|
|
|
|
|
2010-10-14 10:33:26 +08:00
|
|
|
def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm),
|
|
|
|
VFPMiscFrm, IIC_fpUNA32,
|
|
|
|
"vmov", ".f32\t$Sd, $imm",
|
|
|
|
[(set SPR:$Sd, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
|
2011-09-30 08:50:06 +08:00
|
|
|
bits<5> Sd;
|
|
|
|
bits<8> imm;
|
2010-10-14 10:33:26 +08:00
|
|
|
|
2009-10-28 09:44:26 +08:00
|
|
|
let Inst{27-23} = 0b11101;
|
2011-09-30 08:50:06 +08:00
|
|
|
let Inst{22} = Sd{0};
|
2009-10-28 09:44:26 +08:00
|
|
|
let Inst{21-20} = 0b11;
|
2011-09-30 08:50:06 +08:00
|
|
|
let Inst{19-16} = imm{7-4};
|
|
|
|
let Inst{15-12} = Sd{4-1};
|
2009-10-28 09:44:26 +08:00
|
|
|
let Inst{11-9} = 0b101;
|
2010-10-14 10:33:26 +08:00
|
|
|
let Inst{8} = 0; // Single precision.
|
2009-10-28 09:44:26 +08:00
|
|
|
let Inst{7-4} = 0b0000;
|
2011-09-30 08:50:06 +08:00
|
|
|
let Inst{3-0} = imm{3-0};
|
2009-10-28 09:44:26 +08:00
|
|
|
}
|
2016-01-25 18:26:26 +08:00
|
|
|
|
|
|
|
def FCONSTH : VFPAI<(outs SPR:$Sd), (ins vfp_f16imm:$imm),
|
|
|
|
VFPMiscFrm, IIC_fpUNA16,
|
|
|
|
"vmov", ".f16\t$Sd, $imm",
|
|
|
|
[]>, Requires<[HasFullFP16]> {
|
|
|
|
bits<5> Sd;
|
|
|
|
bits<8> imm;
|
|
|
|
|
|
|
|
let Inst{27-23} = 0b11101;
|
|
|
|
let Inst{22} = Sd{0};
|
|
|
|
let Inst{21-20} = 0b11;
|
|
|
|
let Inst{19-16} = imm{7-4};
|
|
|
|
let Inst{15-12} = Sd{4-1};
|
|
|
|
let Inst{11-8} = 0b1001; // Half precision
|
|
|
|
let Inst{7-4} = 0b0000;
|
|
|
|
let Inst{3-0} = imm{3-0};
|
|
|
|
}
|
2009-11-09 08:11:35 +08:00
|
|
|
}
|
2011-10-04 05:12:43 +08:00
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Assembler aliases.
|
|
|
|
//
|
2013-12-30 01:58:31 +08:00
|
|
|
// A few mnemonic aliases for pre-unifixed syntax. We don't guarantee to
|
2011-12-08 08:49:29 +08:00
|
|
|
// support them all, but supporting at least some of the basics is
|
|
|
|
// good to be friendly.
|
2011-12-10 07:34:09 +08:00
|
|
|
def : VFP2MnemonicAlias<"flds", "vldr">;
|
|
|
|
def : VFP2MnemonicAlias<"fldd", "vldr">;
|
|
|
|
def : VFP2MnemonicAlias<"fmrs", "vmov">;
|
|
|
|
def : VFP2MnemonicAlias<"fmsr", "vmov">;
|
|
|
|
def : VFP2MnemonicAlias<"fsqrts", "vsqrt">;
|
|
|
|
def : VFP2MnemonicAlias<"fsqrtd", "vsqrt">;
|
|
|
|
def : VFP2MnemonicAlias<"fadds", "vadd.f32">;
|
|
|
|
def : VFP2MnemonicAlias<"faddd", "vadd.f64">;
|
|
|
|
def : VFP2MnemonicAlias<"fmrdd", "vmov">;
|
|
|
|
def : VFP2MnemonicAlias<"fmrds", "vmov">;
|
|
|
|
def : VFP2MnemonicAlias<"fmrrd", "vmov">;
|
|
|
|
def : VFP2MnemonicAlias<"fmdrr", "vmov">;
|
2011-12-20 03:43:50 +08:00
|
|
|
def : VFP2MnemonicAlias<"fmuls", "vmul.f32">;
|
2011-12-10 07:34:09 +08:00
|
|
|
def : VFP2MnemonicAlias<"fmuld", "vmul.f64">;
|
|
|
|
def : VFP2MnemonicAlias<"fnegs", "vneg.f32">;
|
|
|
|
def : VFP2MnemonicAlias<"fnegd", "vneg.f64">;
|
2011-12-10 08:01:02 +08:00
|
|
|
def : VFP2MnemonicAlias<"ftosizd", "vcvt.s32.f64">;
|
|
|
|
def : VFP2MnemonicAlias<"ftosid", "vcvtr.s32.f64">;
|
|
|
|
def : VFP2MnemonicAlias<"ftosizs", "vcvt.s32.f32">;
|
|
|
|
def : VFP2MnemonicAlias<"ftosis", "vcvtr.s32.f32">;
|
|
|
|
def : VFP2MnemonicAlias<"ftouizd", "vcvt.u32.f64">;
|
|
|
|
def : VFP2MnemonicAlias<"ftouid", "vcvtr.u32.f64">;
|
|
|
|
def : VFP2MnemonicAlias<"ftouizs", "vcvt.u32.f32">;
|
|
|
|
def : VFP2MnemonicAlias<"ftouis", "vcvtr.u32.f32">;
|
|
|
|
def : VFP2MnemonicAlias<"fsitod", "vcvt.f64.s32">;
|
|
|
|
def : VFP2MnemonicAlias<"fsitos", "vcvt.f32.s32">;
|
|
|
|
def : VFP2MnemonicAlias<"fuitod", "vcvt.f64.u32">;
|
|
|
|
def : VFP2MnemonicAlias<"fuitos", "vcvt.f32.u32">;
|
2011-12-14 04:13:48 +08:00
|
|
|
def : VFP2MnemonicAlias<"fsts", "vstr">;
|
|
|
|
def : VFP2MnemonicAlias<"fstd", "vstr">;
|
2011-12-14 04:40:37 +08:00
|
|
|
def : VFP2MnemonicAlias<"fmacd", "vmla.f64">;
|
|
|
|
def : VFP2MnemonicAlias<"fmacs", "vmla.f32">;
|
2011-12-20 03:02:41 +08:00
|
|
|
def : VFP2MnemonicAlias<"fcpys", "vmov.f32">;
|
|
|
|
def : VFP2MnemonicAlias<"fcpyd", "vmov.f64">;
|
2011-12-23 03:20:45 +08:00
|
|
|
def : VFP2MnemonicAlias<"fcmps", "vcmp.f32">;
|
|
|
|
def : VFP2MnemonicAlias<"fcmpd", "vcmp.f64">;
|
2011-12-20 03:02:41 +08:00
|
|
|
def : VFP2MnemonicAlias<"fdivs", "vdiv.f32">;
|
|
|
|
def : VFP2MnemonicAlias<"fdivd", "vdiv.f64">;
|
2012-03-17 05:06:13 +08:00
|
|
|
def : VFP2MnemonicAlias<"fmrx", "vmrs">;
|
|
|
|
def : VFP2MnemonicAlias<"fmxr", "vmsr">;
|
2011-12-08 08:49:29 +08:00
|
|
|
|
2012-03-16 04:48:18 +08:00
|
|
|
// Be friendly and accept the old form of zero-compare
|
2013-10-24 23:49:39 +08:00
|
|
|
def : VFP2DPInstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>;
|
2012-03-16 04:48:18 +08:00
|
|
|
def : VFP2InstAlias<"fcmpzs${p} $val", (VCMPZS SPR:$val, pred:$p)>;
|
|
|
|
|
|
|
|
|
2011-10-04 05:12:43 +08:00
|
|
|
def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>;
|
2011-12-10 08:01:02 +08:00
|
|
|
def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm",
|
|
|
|
(VADDS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
|
2013-10-24 23:49:39 +08:00
|
|
|
def : VFP2DPInstAlias<"faddd${p} $Dd, $Dn, $Dm",
|
|
|
|
(VADDD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
|
2011-12-10 08:01:02 +08:00
|
|
|
def : VFP2InstAlias<"fsubs${p} $Sd, $Sn, $Sm",
|
|
|
|
(VSUBS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
|
2013-10-24 23:49:39 +08:00
|
|
|
def : VFP2DPInstAlias<"fsubd${p} $Dd, $Dn, $Dm",
|
|
|
|
(VSUBD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
|
2011-10-04 05:12:43 +08:00
|
|
|
|
2011-12-09 06:51:25 +08:00
|
|
|
// No need for the size suffix on VSQRT. It's implied by the register classes.
|
|
|
|
def : VFP2InstAlias<"vsqrt${p} $Sd, $Sm", (VSQRTS SPR:$Sd, SPR:$Sm, pred:$p)>;
|
2013-10-24 23:49:39 +08:00
|
|
|
def : VFP2DPInstAlias<"vsqrt${p} $Dd, $Dm", (VSQRTD DPR:$Dd, DPR:$Dm, pred:$p)>;
|
2011-12-09 06:51:25 +08:00
|
|
|
|
2011-11-15 07:03:21 +08:00
|
|
|
// VLDR/VSTR accept an optional type suffix.
|
2011-12-07 09:50:36 +08:00
|
|
|
def : VFP2InstAlias<"vldr${p}.32 $Sd, $addr",
|
|
|
|
(VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
|
|
|
|
def : VFP2InstAlias<"vstr${p}.32 $Sd, $addr",
|
|
|
|
(VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
|
|
|
|
def : VFP2InstAlias<"vldr${p}.64 $Dd, $addr",
|
|
|
|
(VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
|
|
|
|
def : VFP2InstAlias<"vstr${p}.64 $Dd, $addr",
|
|
|
|
(VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
|
2011-11-16 04:14:51 +08:00
|
|
|
|
2011-12-22 07:24:15 +08:00
|
|
|
// VMOV can accept optional 32-bit or less data type suffix suffix.
|
|
|
|
def : VFP2InstAlias<"vmov${p}.8 $Rt, $Sn",
|
2011-11-16 04:29:42 +08:00
|
|
|
(VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>;
|
2011-12-22 07:24:15 +08:00
|
|
|
def : VFP2InstAlias<"vmov${p}.16 $Rt, $Sn",
|
|
|
|
(VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>;
|
|
|
|
def : VFP2InstAlias<"vmov${p}.32 $Rt, $Sn",
|
|
|
|
(VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>;
|
|
|
|
def : VFP2InstAlias<"vmov${p}.8 $Sn, $Rt",
|
|
|
|
(VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>;
|
|
|
|
def : VFP2InstAlias<"vmov${p}.16 $Sn, $Rt",
|
|
|
|
(VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>;
|
|
|
|
def : VFP2InstAlias<"vmov${p}.32 $Sn, $Rt",
|
2011-11-16 04:29:42 +08:00
|
|
|
(VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>;
|
|
|
|
|
|
|
|
def : VFP2InstAlias<"vmov${p}.f64 $Rt, $Rt2, $Dn",
|
|
|
|
(VMOVRRD GPR:$Rt, GPR:$Rt2, DPR:$Dn, pred:$p)>;
|
|
|
|
def : VFP2InstAlias<"vmov${p}.f64 $Dn, $Rt, $Rt2",
|
|
|
|
(VMOVDRR DPR:$Dn, GPR:$Rt, GPR:$Rt2, pred:$p)>;
|
2011-11-16 05:18:35 +08:00
|
|
|
|
|
|
|
// VMOVS doesn't need the .f32 to disambiguate from the NEON encoding the way
|
|
|
|
// VMOVD does.
|
|
|
|
def : VFP2InstAlias<"vmov${p} $Sd, $Sm",
|
|
|
|
(VMOVS SPR:$Sd, SPR:$Sm, pred:$p)>;
|
2014-01-08 02:19:23 +08:00
|
|
|
|
|
|
|
// FCONSTD/FCONSTS alias for vmov.f64/vmov.f32
|
|
|
|
// These aliases provide added functionality over vmov.f instructions by
|
|
|
|
// allowing users to write assembly containing encoded floating point constants
|
|
|
|
// (e.g. #0x70 vs #1.0). Without these alises there is no way for the
|
|
|
|
// assembler to accept encoded fp constants (but the equivalent fp-literal is
|
|
|
|
// accepted directly by vmovf).
|
|
|
|
def : VFP3InstAlias<"fconstd${p} $Dd, $val",
|
|
|
|
(FCONSTD DPR:$Dd, vfp_f64imm:$val, pred:$p)>;
|
|
|
|
def : VFP3InstAlias<"fconsts${p} $Sd, $val",
|
|
|
|
(FCONSTS SPR:$Sd, vfp_f32imm:$val, pred:$p)>;
|