[ARM] Add missing selection patterns for vnmla
For the following function:
double fn1(double d0, double d1, double d2) {
double a = -d0 - d1 * d2;
return a;
}
on ARM, LLVM generates code along the lines of
vneg.f64 d0, d0
vmls.f64 d0, d1, d2
i.e., a negate and a multiply-subtract.
The attached patch adds instruction selection patterns to allow it to generate the single instruction
vnmla.f64 d0, d1, d2
(multiply-add with negation) instead, like GCC does.
Committed on behalf of @gergo- (Gergö Barany)
Differential Revision: https://reviews.llvm.org/D35911
llvm-svn: 313972
2017-09-22 17:50:52 +08:00
|
|
|
; RUN: llc -mtriple=arm-eabihf -mattr=+vfp2 %s -o - \
|
2014-04-04 00:01:44 +08:00
|
|
|
; RUN: | FileCheck %s -check-prefix=VFP2
|
|
|
|
|
[ARM] Add missing selection patterns for vnmla
For the following function:
double fn1(double d0, double d1, double d2) {
double a = -d0 - d1 * d2;
return a;
}
on ARM, LLVM generates code along the lines of
vneg.f64 d0, d0
vmls.f64 d0, d1, d2
i.e., a negate and a multiply-subtract.
The attached patch adds instruction selection patterns to allow it to generate the single instruction
vnmla.f64 d0, d1, d2
(multiply-add with negation) instead, like GCC does.
Committed on behalf of @gergo- (Gergö Barany)
Differential Revision: https://reviews.llvm.org/D35911
llvm-svn: 313972
2017-09-22 17:50:52 +08:00
|
|
|
; RUN: llc -mtriple=arm-eabihf -mattr=+vfp3 %s -o - \
|
|
|
|
; RUN: | FileCheck %s -check-prefix=VFP3
|
|
|
|
|
|
|
|
; RUN: llc -mtriple=arm-eabihf -mattr=+neon %s -o - \
|
2014-04-04 00:01:44 +08:00
|
|
|
; RUN: | FileCheck %s -check-prefix=NEON
|
|
|
|
|
|
|
|
; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \
|
|
|
|
; RUN: | FileCheck %s -check-prefix=A8
|
|
|
|
|
|
|
|
; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 -regalloc=basic %s -o - \
|
|
|
|
; RUN: | FileCheck %s -check-prefix=A8
|
|
|
|
|
|
|
|
; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \
|
|
|
|
; RUN: | FileCheck %s -check-prefix=A8U
|
|
|
|
|
|
|
|
; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \
|
|
|
|
; RUN: | FileCheck %s -check-prefix=A8U
|
2009-08-05 01:53:06 +08:00
|
|
|
|
2010-11-13 04:32:20 +08:00
|
|
|
define float @t1(float %acc, float %a, float %b) nounwind {
|
2009-08-05 01:53:06 +08:00
|
|
|
entry:
|
2013-07-14 14:24:09 +08:00
|
|
|
; VFP2-LABEL: t1:
|
2010-11-13 04:32:20 +08:00
|
|
|
; VFP2: vnmla.f32
|
|
|
|
|
[ARM] Add missing selection patterns for vnmla
For the following function:
double fn1(double d0, double d1, double d2) {
double a = -d0 - d1 * d2;
return a;
}
on ARM, LLVM generates code along the lines of
vneg.f64 d0, d0
vmls.f64 d0, d1, d2
i.e., a negate and a multiply-subtract.
The attached patch adds instruction selection patterns to allow it to generate the single instruction
vnmla.f64 d0, d1, d2
(multiply-add with negation) instead, like GCC does.
Committed on behalf of @gergo- (Gergö Barany)
Differential Revision: https://reviews.llvm.org/D35911
llvm-svn: 313972
2017-09-22 17:50:52 +08:00
|
|
|
; VFP3-LABEL: t1:
|
|
|
|
; VFP3: vnmla.f32
|
|
|
|
|
2013-07-14 14:24:09 +08:00
|
|
|
; NEON-LABEL: t1:
|
2010-11-13 04:32:20 +08:00
|
|
|
; NEON: vnmla.f32
|
|
|
|
|
2013-07-14 14:24:09 +08:00
|
|
|
; A8U-LABEL: t1:
|
2013-03-22 02:47:47 +08:00
|
|
|
; A8U: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
|
|
|
|
; A8U: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
|
|
|
|
|
2013-07-14 14:24:09 +08:00
|
|
|
; A8-LABEL: t1:
|
2011-04-01 06:14:03 +08:00
|
|
|
; A8: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
|
2013-03-22 02:47:47 +08:00
|
|
|
; A8: vsub.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
|
2009-08-05 01:53:06 +08:00
|
|
|
%0 = fmul float %a, %b
|
2009-08-11 06:31:04 +08:00
|
|
|
%1 = fsub float -0.0, %0
|
2009-08-05 01:53:06 +08:00
|
|
|
%2 = fsub float %1, %acc
|
|
|
|
ret float %2
|
|
|
|
}
|
|
|
|
|
2010-11-13 04:32:20 +08:00
|
|
|
define float @t2(float %acc, float %a, float %b) nounwind {
|
2009-08-05 02:11:59 +08:00
|
|
|
entry:
|
2013-07-14 14:24:09 +08:00
|
|
|
; VFP2-LABEL: t2:
|
2010-11-13 04:32:20 +08:00
|
|
|
; VFP2: vnmla.f32
|
|
|
|
|
[ARM] Add missing selection patterns for vnmla
For the following function:
double fn1(double d0, double d1, double d2) {
double a = -d0 - d1 * d2;
return a;
}
on ARM, LLVM generates code along the lines of
vneg.f64 d0, d0
vmls.f64 d0, d1, d2
i.e., a negate and a multiply-subtract.
The attached patch adds instruction selection patterns to allow it to generate the single instruction
vnmla.f64 d0, d1, d2
(multiply-add with negation) instead, like GCC does.
Committed on behalf of @gergo- (Gergö Barany)
Differential Revision: https://reviews.llvm.org/D35911
llvm-svn: 313972
2017-09-22 17:50:52 +08:00
|
|
|
; VFP3-LABEL: t2:
|
|
|
|
; VFP3: vnmla.f32
|
|
|
|
|
2013-07-14 14:24:09 +08:00
|
|
|
; NEON-LABEL: t2:
|
2010-11-13 04:32:20 +08:00
|
|
|
; NEON: vnmla.f32
|
|
|
|
|
2013-07-14 14:24:09 +08:00
|
|
|
; A8U-LABEL: t2:
|
2013-03-22 02:47:47 +08:00
|
|
|
; A8U: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}}
|
|
|
|
; A8U: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}}
|
|
|
|
|
2013-07-14 14:24:09 +08:00
|
|
|
; A8-LABEL: t2:
|
2011-05-04 03:09:32 +08:00
|
|
|
; A8: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}}
|
2013-03-22 02:47:47 +08:00
|
|
|
; A8: vsub.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}}
|
2009-08-05 02:11:59 +08:00
|
|
|
%0 = fmul float %a, %b
|
|
|
|
%1 = fmul float -1.0, %0
|
|
|
|
%2 = fsub float %1, %acc
|
|
|
|
ret float %2
|
|
|
|
}
|
|
|
|
|
2010-11-13 04:32:20 +08:00
|
|
|
define double @t3(double %acc, double %a, double %b) nounwind {
|
|
|
|
entry:
|
2013-07-14 14:24:09 +08:00
|
|
|
; VFP2-LABEL: t3:
|
2010-11-13 04:32:20 +08:00
|
|
|
; VFP2: vnmla.f64
|
|
|
|
|
[ARM] Add missing selection patterns for vnmla
For the following function:
double fn1(double d0, double d1, double d2) {
double a = -d0 - d1 * d2;
return a;
}
on ARM, LLVM generates code along the lines of
vneg.f64 d0, d0
vmls.f64 d0, d1, d2
i.e., a negate and a multiply-subtract.
The attached patch adds instruction selection patterns to allow it to generate the single instruction
vnmla.f64 d0, d1, d2
(multiply-add with negation) instead, like GCC does.
Committed on behalf of @gergo- (Gergö Barany)
Differential Revision: https://reviews.llvm.org/D35911
llvm-svn: 313972
2017-09-22 17:50:52 +08:00
|
|
|
; VFP3-LABEL: t3:
|
|
|
|
; VFP3: vnmla.f64
|
|
|
|
|
2013-07-14 14:24:09 +08:00
|
|
|
; NEON-LABEL: t3:
|
2010-11-13 04:32:20 +08:00
|
|
|
; NEON: vnmla.f64
|
|
|
|
|
2013-07-14 14:24:09 +08:00
|
|
|
; A8U-LABEL: t3:
|
2013-03-22 02:47:47 +08:00
|
|
|
; A8U: vnmul.f64 d
|
|
|
|
; A8U: vsub.f64 d
|
|
|
|
|
2013-07-14 14:24:09 +08:00
|
|
|
; A8-LABEL: t3:
|
2013-01-19 08:03:32 +08:00
|
|
|
; A8: vnmul.f64 d
|
|
|
|
; A8: vsub.f64 d
|
2010-11-13 04:32:20 +08:00
|
|
|
%0 = fmul double %a, %b
|
|
|
|
%1 = fsub double -0.0, %0
|
|
|
|
%2 = fsub double %1, %acc
|
|
|
|
ret double %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define double @t4(double %acc, double %a, double %b) nounwind {
|
|
|
|
entry:
|
2013-07-14 14:24:09 +08:00
|
|
|
; VFP2-LABEL: t4:
|
2010-11-13 04:32:20 +08:00
|
|
|
; VFP2: vnmla.f64
|
|
|
|
|
[ARM] Add missing selection patterns for vnmla
For the following function:
double fn1(double d0, double d1, double d2) {
double a = -d0 - d1 * d2;
return a;
}
on ARM, LLVM generates code along the lines of
vneg.f64 d0, d0
vmls.f64 d0, d1, d2
i.e., a negate and a multiply-subtract.
The attached patch adds instruction selection patterns to allow it to generate the single instruction
vnmla.f64 d0, d1, d2
(multiply-add with negation) instead, like GCC does.
Committed on behalf of @gergo- (Gergö Barany)
Differential Revision: https://reviews.llvm.org/D35911
llvm-svn: 313972
2017-09-22 17:50:52 +08:00
|
|
|
; VFP3-LABEL: t4:
|
|
|
|
; VFP3: vnmla.f64
|
|
|
|
|
2013-07-14 14:24:09 +08:00
|
|
|
; NEON-LABEL: t4:
|
2010-11-13 04:32:20 +08:00
|
|
|
; NEON: vnmla.f64
|
|
|
|
|
2013-07-14 14:24:09 +08:00
|
|
|
; A8U-LABEL: t4:
|
2013-03-22 02:47:47 +08:00
|
|
|
; A8U: vnmul.f64 d
|
|
|
|
; A8U: vsub.f64 d
|
|
|
|
|
2013-07-14 14:24:09 +08:00
|
|
|
; A8-LABEL: t4:
|
2013-01-19 08:03:32 +08:00
|
|
|
; A8: vnmul.f64 d
|
|
|
|
; A8: vsub.f64 d
|
2010-11-13 04:32:20 +08:00
|
|
|
%0 = fmul double %a, %b
|
|
|
|
%1 = fmul double -1.0, %0
|
|
|
|
%2 = fsub double %1, %acc
|
|
|
|
ret double %2
|
|
|
|
}
|
[ARM] Add missing selection patterns for vnmla
For the following function:
double fn1(double d0, double d1, double d2) {
double a = -d0 - d1 * d2;
return a;
}
on ARM, LLVM generates code along the lines of
vneg.f64 d0, d0
vmls.f64 d0, d1, d2
i.e., a negate and a multiply-subtract.
The attached patch adds instruction selection patterns to allow it to generate the single instruction
vnmla.f64 d0, d1, d2
(multiply-add with negation) instead, like GCC does.
Committed on behalf of @gergo- (Gergö Barany)
Differential Revision: https://reviews.llvm.org/D35911
llvm-svn: 313972
2017-09-22 17:50:52 +08:00
|
|
|
|
|
|
|
define double @t5(double %acc, double %a, double %b) nounwind {
|
|
|
|
entry:
|
|
|
|
; VFP2-LABEL: t5:
|
|
|
|
; VFP2: vnmla.f64
|
|
|
|
|
|
|
|
; VFP3-LABEL: t5:
|
|
|
|
; VFP3: vnmla.f64
|
|
|
|
|
|
|
|
; NEON-LABEL: t5:
|
|
|
|
; NEON: vnmla.f64
|
|
|
|
|
|
|
|
; A8U-LABEL: t5:
|
|
|
|
; A8U: vmul.f64 d
|
|
|
|
; A8U: vsub.f64 d
|
|
|
|
|
|
|
|
; A8-LABEL: t5:
|
|
|
|
; A8: vmul.f64 d
|
|
|
|
; A8: vsub.f64 d
|
|
|
|
|
|
|
|
%0 = fsub double -0.0, %acc
|
|
|
|
%1 = fmul double %a, %b
|
|
|
|
%2 = fsub double %0, %1
|
|
|
|
ret double %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define float @t6(float %acc, float %a, float %b) nounwind {
|
|
|
|
entry:
|
|
|
|
; VFP2-LABEL: t6:
|
|
|
|
; VFP2: vnmla.f32
|
|
|
|
|
|
|
|
; VFP3-LABEL: t6:
|
|
|
|
; VFP3: vnmla.f32
|
|
|
|
|
|
|
|
; NEON-LABEL: t6:
|
|
|
|
; NEON: vnmla.f32
|
|
|
|
|
|
|
|
; A8U-LABEL: t6:
|
|
|
|
; A8U: vmul.f32 d
|
|
|
|
; A8U: vsub.f32 d
|
|
|
|
|
|
|
|
; A8-LABEL: t6:
|
|
|
|
; A8: vmul.f32 s
|
|
|
|
; A8: vsub.f32 s
|
|
|
|
|
|
|
|
%0 = fsub float -0.0, %acc
|
|
|
|
%1 = fmul float %a, %b
|
|
|
|
%2 = fsub float %0, %1
|
|
|
|
ret float %2
|
|
|
|
}
|