2015-11-29 03:20:49 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefix=SSE
|
2016-01-06 17:08:49 +08:00
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
|
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512
|
[X86] Add ISel patterns to select SSE3/AVX ADDSUB instructions.
This patch adds ISel patterns to select SSE3/AVX ADDSUB instructions
from a sequence of "vadd + vsub + blend".
Example:
///
typedef float float4 __attribute__((ext_vector_type(4)));
float4 foo(float4 A, float4 B) {
float4 X = A - B;
float4 Y = A + B;
return (float4){X[0], Y[1], X[2], Y[3]};
}
///
Before this patch, (with flag -mcpu=corei7) llc produced the following
assembly sequence:
movaps %xmm0, %xmm2
addps %xmm1, %xmm2
subps %xmm1, %xmm0
blendps $10, %xmm2, %xmm0
With this patch, we now get a single
addsubps %xmm1, %xmm0
llvm-svn: 211427
2014-06-21 09:31:15 +08:00
|
|
|
|
|
|
|
; Test ADDSUB ISel patterns.
|
|
|
|
|
[X86] Improve the selection of SSE3/AVX addsub instructions.
This patch teaches the backend how to canonicalize a shuffle vectors
according to the rule:
- (shuffle (FADD A, B), (FSUB A, B), Mask) ->
(shuffle (FSUB A, -B), (FADD A, -B), Mask)
Where 'Mask' is:
<0,5,2,7> ;; for v4f32 and v4f64 shuffles.
<0,3> ;; for v2f64 shuffles.
<0,9,2,11,4,13,6,15> ;; for v8f32 shuffles.
In general, ISel only knows how to pattern-match a canonical
'fadd + fsub + blendi' dag node sequence into an ADDSUB instruction.
This new rule allows to convert a non-canonical dag sequence into a
canonical one that will be matched by a single ADDSUB at ISel stage.
The idea of converting a non-canonical ADDSUB into a canonical one by
swapping the first two operands of the shuffle, and then negating the
second operand of the FADD and FSUB, was originally proposed by Hal Finkel.
llvm-svn: 211771
2014-06-26 18:45:21 +08:00
|
|
|
; Functions below are obtained from the following source:
|
[X86] Add ISel patterns to select SSE3/AVX ADDSUB instructions.
This patch adds ISel patterns to select SSE3/AVX ADDSUB instructions
from a sequence of "vadd + vsub + blend".
Example:
///
typedef float float4 __attribute__((ext_vector_type(4)));
float4 foo(float4 A, float4 B) {
float4 X = A - B;
float4 Y = A + B;
return (float4){X[0], Y[1], X[2], Y[3]};
}
///
Before this patch, (with flag -mcpu=corei7) llc produced the following
assembly sequence:
movaps %xmm0, %xmm2
addps %xmm1, %xmm2
subps %xmm1, %xmm0
blendps $10, %xmm2, %xmm0
With this patch, we now get a single
addsubps %xmm1, %xmm0
llvm-svn: 211427
2014-06-21 09:31:15 +08:00
|
|
|
;
|
|
|
|
; typedef double double2 __attribute__((ext_vector_type(2)));
|
|
|
|
; typedef double double4 __attribute__((ext_vector_type(4)));
|
|
|
|
; typedef float float4 __attribute__((ext_vector_type(4)));
|
|
|
|
; typedef float float8 __attribute__((ext_vector_type(8)));
|
|
|
|
;
|
|
|
|
; float4 test1(float4 A, float4 B) {
|
|
|
|
; float4 X = A - B;
|
|
|
|
; float4 Y = A + B;
|
|
|
|
; return (float4){X[0], Y[1], X[2], Y[3]};
|
|
|
|
; }
|
|
|
|
;
|
|
|
|
; float8 test2(float8 A, float8 B) {
|
|
|
|
; float8 X = A - B;
|
|
|
|
; float8 Y = A + B;
|
[X86] Improve the selection of SSE3/AVX addsub instructions.
This patch teaches the backend how to canonicalize a shuffle vectors
according to the rule:
- (shuffle (FADD A, B), (FSUB A, B), Mask) ->
(shuffle (FSUB A, -B), (FADD A, -B), Mask)
Where 'Mask' is:
<0,5,2,7> ;; for v4f32 and v4f64 shuffles.
<0,3> ;; for v2f64 shuffles.
<0,9,2,11,4,13,6,15> ;; for v8f32 shuffles.
In general, ISel only knows how to pattern-match a canonical
'fadd + fsub + blendi' dag node sequence into an ADDSUB instruction.
This new rule allows to convert a non-canonical dag sequence into a
canonical one that will be matched by a single ADDSUB at ISel stage.
The idea of converting a non-canonical ADDSUB into a canonical one by
swapping the first two operands of the shuffle, and then negating the
second operand of the FADD and FSUB, was originally proposed by Hal Finkel.
llvm-svn: 211771
2014-06-26 18:45:21 +08:00
|
|
|
; return (float8){X[0], Y[1], X[2], Y[3], X[4], Y[5], X[6], Y[7]};
|
[X86] Add ISel patterns to select SSE3/AVX ADDSUB instructions.
This patch adds ISel patterns to select SSE3/AVX ADDSUB instructions
from a sequence of "vadd + vsub + blend".
Example:
///
typedef float float4 __attribute__((ext_vector_type(4)));
float4 foo(float4 A, float4 B) {
float4 X = A - B;
float4 Y = A + B;
return (float4){X[0], Y[1], X[2], Y[3]};
}
///
Before this patch, (with flag -mcpu=corei7) llc produced the following
assembly sequence:
movaps %xmm0, %xmm2
addps %xmm1, %xmm2
subps %xmm1, %xmm0
blendps $10, %xmm2, %xmm0
With this patch, we now get a single
addsubps %xmm1, %xmm0
llvm-svn: 211427
2014-06-21 09:31:15 +08:00
|
|
|
; }
|
|
|
|
;
|
|
|
|
; double4 test3(double4 A, double4 B) {
|
|
|
|
; double4 X = A - B;
|
|
|
|
; double4 Y = A + B;
|
|
|
|
; return (double4){X[0], Y[1], X[2], Y[3]};
|
|
|
|
; }
|
|
|
|
;
|
|
|
|
; double2 test4(double2 A, double2 B) {
|
|
|
|
; double2 X = A - B;
|
|
|
|
; double2 Y = A + B;
|
|
|
|
; return (double2){X[0], Y[1]};
|
|
|
|
; }
|
|
|
|
|
|
|
|
define <4 x float> @test1(<4 x float> %A, <4 x float> %B) {
|
2015-11-29 03:20:49 +08:00
|
|
|
; SSE-LABEL: test1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-11-29 03:20:49 +08:00
|
|
|
; SSE-NEXT: addsubps %xmm1, %xmm0
|
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: test1:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2015-11-29 03:20:49 +08:00
|
|
|
; AVX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
[X86] Add ISel patterns to select SSE3/AVX ADDSUB instructions.
This patch adds ISel patterns to select SSE3/AVX ADDSUB instructions
from a sequence of "vadd + vsub + blend".
Example:
///
typedef float float4 __attribute__((ext_vector_type(4)));
float4 foo(float4 A, float4 B) {
float4 X = A - B;
float4 Y = A + B;
return (float4){X[0], Y[1], X[2], Y[3]};
}
///
Before this patch, (with flag -mcpu=corei7) llc produced the following
assembly sequence:
movaps %xmm0, %xmm2
addps %xmm1, %xmm2
subps %xmm1, %xmm0
blendps $10, %xmm2, %xmm0
With this patch, we now get a single
addsubps %xmm1, %xmm0
llvm-svn: 211427
2014-06-21 09:31:15 +08:00
|
|
|
%sub = fsub <4 x float> %A, %B
|
|
|
|
%add = fadd <4 x float> %A, %B
|
|
|
|
%vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
|
|
|
ret <4 x float> %vecinit6
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test2(<8 x float> %A, <8 x float> %B) {
|
2015-11-29 03:20:49 +08:00
|
|
|
; SSE-LABEL: test2:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-11-29 03:20:49 +08:00
|
|
|
; SSE-NEXT: addsubps %xmm2, %xmm0
|
|
|
|
; SSE-NEXT: addsubps %xmm3, %xmm1
|
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: test2:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2015-11-29 03:20:49 +08:00
|
|
|
; AVX-NEXT: vaddsubps %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX-NEXT: retq
|
[X86] Add ISel patterns to select SSE3/AVX ADDSUB instructions.
This patch adds ISel patterns to select SSE3/AVX ADDSUB instructions
from a sequence of "vadd + vsub + blend".
Example:
///
typedef float float4 __attribute__((ext_vector_type(4)));
float4 foo(float4 A, float4 B) {
float4 X = A - B;
float4 Y = A + B;
return (float4){X[0], Y[1], X[2], Y[3]};
}
///
Before this patch, (with flag -mcpu=corei7) llc produced the following
assembly sequence:
movaps %xmm0, %xmm2
addps %xmm1, %xmm2
subps %xmm1, %xmm0
blendps $10, %xmm2, %xmm0
With this patch, we now get a single
addsubps %xmm1, %xmm0
llvm-svn: 211427
2014-06-21 09:31:15 +08:00
|
|
|
%sub = fsub <8 x float> %A, %B
|
|
|
|
%add = fadd <8 x float> %A, %B
|
|
|
|
%vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
|
|
|
|
ret <8 x float> %vecinit14
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @test3(<4 x double> %A, <4 x double> %B) {
|
2015-11-29 03:20:49 +08:00
|
|
|
; SSE-LABEL: test3:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-11-29 03:20:49 +08:00
|
|
|
; SSE-NEXT: addsubpd %xmm2, %xmm0
|
|
|
|
; SSE-NEXT: addsubpd %xmm3, %xmm1
|
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: test3:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2015-11-29 03:20:49 +08:00
|
|
|
; AVX-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0
|
|
|
|
; AVX-NEXT: retq
|
[X86] Add ISel patterns to select SSE3/AVX ADDSUB instructions.
This patch adds ISel patterns to select SSE3/AVX ADDSUB instructions
from a sequence of "vadd + vsub + blend".
Example:
///
typedef float float4 __attribute__((ext_vector_type(4)));
float4 foo(float4 A, float4 B) {
float4 X = A - B;
float4 Y = A + B;
return (float4){X[0], Y[1], X[2], Y[3]};
}
///
Before this patch, (with flag -mcpu=corei7) llc produced the following
assembly sequence:
movaps %xmm0, %xmm2
addps %xmm1, %xmm2
subps %xmm1, %xmm0
blendps $10, %xmm2, %xmm0
With this patch, we now get a single
addsubps %xmm1, %xmm0
llvm-svn: 211427
2014-06-21 09:31:15 +08:00
|
|
|
%sub = fsub <4 x double> %A, %B
|
|
|
|
%add = fadd <4 x double> %A, %B
|
|
|
|
%vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
|
|
|
ret <4 x double> %vecinit6
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x double> @test4(<2 x double> %A, <2 x double> %B) #0 {
|
2015-11-29 03:20:49 +08:00
|
|
|
; SSE-LABEL: test4:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-11-29 03:20:49 +08:00
|
|
|
; SSE-NEXT: addsubpd %xmm1, %xmm0
|
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: test4:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2015-11-29 03:20:49 +08:00
|
|
|
; AVX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
[X86] Add ISel patterns to select SSE3/AVX ADDSUB instructions.
This patch adds ISel patterns to select SSE3/AVX ADDSUB instructions
from a sequence of "vadd + vsub + blend".
Example:
///
typedef float float4 __attribute__((ext_vector_type(4)));
float4 foo(float4 A, float4 B) {
float4 X = A - B;
float4 Y = A + B;
return (float4){X[0], Y[1], X[2], Y[3]};
}
///
Before this patch, (with flag -mcpu=corei7) llc produced the following
assembly sequence:
movaps %xmm0, %xmm2
addps %xmm1, %xmm2
subps %xmm1, %xmm0
blendps $10, %xmm2, %xmm0
With this patch, we now get a single
addsubps %xmm1, %xmm0
llvm-svn: 211427
2014-06-21 09:31:15 +08:00
|
|
|
%add = fadd <2 x double> %A, %B
|
|
|
|
%sub = fsub <2 x double> %A, %B
|
|
|
|
%vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3>
|
|
|
|
ret <2 x double> %vecinit2
|
|
|
|
}
|
|
|
|
|
2016-01-06 17:08:49 +08:00
|
|
|
define <16 x float> @test5(<16 x float> %A, <16 x float> %B) {
|
|
|
|
; SSE-LABEL: test5:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2016-01-06 17:08:49 +08:00
|
|
|
; SSE-NEXT: addsubps %xmm4, %xmm0
|
|
|
|
; SSE-NEXT: addsubps %xmm5, %xmm1
|
|
|
|
; SSE-NEXT: addsubps %xmm6, %xmm2
|
|
|
|
; SSE-NEXT: addsubps %xmm7, %xmm3
|
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: test5:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX1: # %bb.0:
|
2016-01-06 17:08:49 +08:00
|
|
|
; AVX1-NEXT: vaddsubps %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vaddsubps %ymm3, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: test5:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512: # %bb.0:
|
2017-01-16 00:43:14 +08:00
|
|
|
; AVX512-NEXT: vsubps %zmm1, %zmm0, %zmm2
|
2017-01-14 01:44:28 +08:00
|
|
|
; AVX512-NEXT: movw $-21846, %ax # imm = 0xAAAA
|
|
|
|
; AVX512-NEXT: kmovw %eax, %k1
|
2017-01-16 00:43:14 +08:00
|
|
|
; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm2 {%k1}
|
2017-01-14 01:44:28 +08:00
|
|
|
; AVX512-NEXT: vmovaps %zmm2, %zmm0
|
2016-01-06 17:08:49 +08:00
|
|
|
; AVX512-NEXT: retq
|
|
|
|
%add = fadd <16 x float> %A, %B
|
|
|
|
%sub = fsub <16 x float> %A, %B
|
|
|
|
%vecinit2 = shufflevector <16 x float> %sub, <16 x float> %add, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
|
|
|
|
ret <16 x float> %vecinit2
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x double> @test6(<8 x double> %A, <8 x double> %B) {
|
|
|
|
; SSE-LABEL: test6:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2016-01-06 17:08:49 +08:00
|
|
|
; SSE-NEXT: addsubpd %xmm4, %xmm0
|
|
|
|
; SSE-NEXT: addsubpd %xmm5, %xmm1
|
|
|
|
; SSE-NEXT: addsubpd %xmm6, %xmm2
|
|
|
|
; SSE-NEXT: addsubpd %xmm7, %xmm3
|
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX1-LABEL: test6:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX1: # %bb.0:
|
2016-01-06 17:08:49 +08:00
|
|
|
; AVX1-NEXT: vaddsubpd %ymm2, %ymm0, %ymm0
|
|
|
|
; AVX1-NEXT: vaddsubpd %ymm3, %ymm1, %ymm1
|
|
|
|
; AVX1-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX512-LABEL: test6:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX512: # %bb.0:
|
2016-01-06 17:08:49 +08:00
|
|
|
; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm2
|
|
|
|
; AVX512-NEXT: vsubpd %zmm1, %zmm0, %zmm0
|
2016-07-05 04:41:24 +08:00
|
|
|
; AVX512-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],zmm2[1],zmm0[2],zmm2[3],zmm0[4],zmm2[5],zmm0[6],zmm2[7]
|
2016-01-06 17:08:49 +08:00
|
|
|
; AVX512-NEXT: retq
|
|
|
|
%add = fadd <8 x double> %A, %B
|
|
|
|
%sub = fsub <8 x double> %A, %B
|
|
|
|
%vecinit2 = shufflevector <8 x double> %sub, <8 x double> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
|
|
|
|
ret <8 x double> %vecinit2
|
|
|
|
}
|
|
|
|
|
[X86] Add ISel patterns to select SSE3/AVX ADDSUB instructions.
This patch adds ISel patterns to select SSE3/AVX ADDSUB instructions
from a sequence of "vadd + vsub + blend".
Example:
///
typedef float float4 __attribute__((ext_vector_type(4)));
float4 foo(float4 A, float4 B) {
float4 X = A - B;
float4 Y = A + B;
return (float4){X[0], Y[1], X[2], Y[3]};
}
///
Before this patch, (with flag -mcpu=corei7) llc produced the following
assembly sequence:
movaps %xmm0, %xmm2
addps %xmm1, %xmm2
subps %xmm1, %xmm0
blendps $10, %xmm2, %xmm0
With this patch, we now get a single
addsubps %xmm1, %xmm0
llvm-svn: 211427
2014-06-21 09:31:15 +08:00
|
|
|
define <4 x float> @test1b(<4 x float> %A, <4 x float>* %B) {
|
2015-11-29 03:20:49 +08:00
|
|
|
; SSE-LABEL: test1b:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-11-29 03:20:49 +08:00
|
|
|
; SSE-NEXT: addsubps (%rdi), %xmm0
|
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: test1b:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2015-11-29 03:20:49 +08:00
|
|
|
; AVX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%1 = load <4 x float>, <4 x float>* %B
|
[X86] Add ISel patterns to select SSE3/AVX ADDSUB instructions.
This patch adds ISel patterns to select SSE3/AVX ADDSUB instructions
from a sequence of "vadd + vsub + blend".
Example:
///
typedef float float4 __attribute__((ext_vector_type(4)));
float4 foo(float4 A, float4 B) {
float4 X = A - B;
float4 Y = A + B;
return (float4){X[0], Y[1], X[2], Y[3]};
}
///
Before this patch, (with flag -mcpu=corei7) llc produced the following
assembly sequence:
movaps %xmm0, %xmm2
addps %xmm1, %xmm2
subps %xmm1, %xmm0
blendps $10, %xmm2, %xmm0
With this patch, we now get a single
addsubps %xmm1, %xmm0
llvm-svn: 211427
2014-06-21 09:31:15 +08:00
|
|
|
%add = fadd <4 x float> %A, %1
|
|
|
|
%sub = fsub <4 x float> %A, %1
|
|
|
|
%vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
|
|
|
ret <4 x float> %vecinit6
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test2b(<8 x float> %A, <8 x float>* %B) {
|
2015-11-29 03:20:49 +08:00
|
|
|
; SSE-LABEL: test2b:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-11-29 03:20:49 +08:00
|
|
|
; SSE-NEXT: addsubps (%rdi), %xmm0
|
|
|
|
; SSE-NEXT: addsubps 16(%rdi), %xmm1
|
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: test2b:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2015-11-29 03:20:49 +08:00
|
|
|
; AVX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0
|
|
|
|
; AVX-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%1 = load <8 x float>, <8 x float>* %B
|
[X86] Add ISel patterns to select SSE3/AVX ADDSUB instructions.
This patch adds ISel patterns to select SSE3/AVX ADDSUB instructions
from a sequence of "vadd + vsub + blend".
Example:
///
typedef float float4 __attribute__((ext_vector_type(4)));
float4 foo(float4 A, float4 B) {
float4 X = A - B;
float4 Y = A + B;
return (float4){X[0], Y[1], X[2], Y[3]};
}
///
Before this patch, (with flag -mcpu=corei7) llc produced the following
assembly sequence:
movaps %xmm0, %xmm2
addps %xmm1, %xmm2
subps %xmm1, %xmm0
blendps $10, %xmm2, %xmm0
With this patch, we now get a single
addsubps %xmm1, %xmm0
llvm-svn: 211427
2014-06-21 09:31:15 +08:00
|
|
|
%add = fadd <8 x float> %A, %1
|
|
|
|
%sub = fsub <8 x float> %A, %1
|
|
|
|
%vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
|
|
|
|
ret <8 x float> %vecinit14
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @test3b(<4 x double> %A, <4 x double>* %B) {
|
2015-11-29 03:20:49 +08:00
|
|
|
; SSE-LABEL: test3b:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-11-29 03:20:49 +08:00
|
|
|
; SSE-NEXT: addsubpd (%rdi), %xmm0
|
|
|
|
; SSE-NEXT: addsubpd 16(%rdi), %xmm1
|
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: test3b:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2015-11-29 03:20:49 +08:00
|
|
|
; AVX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0
|
|
|
|
; AVX-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%1 = load <4 x double>, <4 x double>* %B
|
[X86] Add ISel patterns to select SSE3/AVX ADDSUB instructions.
This patch adds ISel patterns to select SSE3/AVX ADDSUB instructions
from a sequence of "vadd + vsub + blend".
Example:
///
typedef float float4 __attribute__((ext_vector_type(4)));
float4 foo(float4 A, float4 B) {
float4 X = A - B;
float4 Y = A + B;
return (float4){X[0], Y[1], X[2], Y[3]};
}
///
Before this patch, (with flag -mcpu=corei7) llc produced the following
assembly sequence:
movaps %xmm0, %xmm2
addps %xmm1, %xmm2
subps %xmm1, %xmm0
blendps $10, %xmm2, %xmm0
With this patch, we now get a single
addsubps %xmm1, %xmm0
llvm-svn: 211427
2014-06-21 09:31:15 +08:00
|
|
|
%add = fadd <4 x double> %A, %1
|
|
|
|
%sub = fsub <4 x double> %A, %1
|
|
|
|
%vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
|
|
|
ret <4 x double> %vecinit6
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x double> @test4b(<2 x double> %A, <2 x double>* %B) {
|
2015-11-29 03:20:49 +08:00
|
|
|
; SSE-LABEL: test4b:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-11-29 03:20:49 +08:00
|
|
|
; SSE-NEXT: addsubpd (%rdi), %xmm0
|
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: test4b:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2015-11-29 03:20:49 +08:00
|
|
|
; AVX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
2015-02-28 05:17:42 +08:00
|
|
|
%1 = load <2 x double>, <2 x double>* %B
|
[X86] Add ISel patterns to select SSE3/AVX ADDSUB instructions.
This patch adds ISel patterns to select SSE3/AVX ADDSUB instructions
from a sequence of "vadd + vsub + blend".
Example:
///
typedef float float4 __attribute__((ext_vector_type(4)));
float4 foo(float4 A, float4 B) {
float4 X = A - B;
float4 Y = A + B;
return (float4){X[0], Y[1], X[2], Y[3]};
}
///
Before this patch, (with flag -mcpu=corei7) llc produced the following
assembly sequence:
movaps %xmm0, %xmm2
addps %xmm1, %xmm2
subps %xmm1, %xmm0
blendps $10, %xmm2, %xmm0
With this patch, we now get a single
addsubps %xmm1, %xmm0
llvm-svn: 211427
2014-06-21 09:31:15 +08:00
|
|
|
%sub = fsub <2 x double> %A, %1
|
|
|
|
%add = fadd <2 x double> %A, %1
|
|
|
|
%vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3>
|
|
|
|
ret <2 x double> %vecinit2
|
|
|
|
}
|
2015-11-30 00:41:04 +08:00
|
|
|
|
|
|
|
define <4 x float> @test1c(<4 x float> %A, <4 x float>* %B) {
|
|
|
|
; SSE-LABEL: test1c:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-11-30 00:41:04 +08:00
|
|
|
; SSE-NEXT: addsubps (%rdi), %xmm0
|
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: test1c:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2015-11-30 00:41:04 +08:00
|
|
|
; AVX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
%1 = load <4 x float>, <4 x float>* %B
|
|
|
|
%add = fadd <4 x float> %A, %1
|
|
|
|
%sub = fsub <4 x float> %A, %1
|
|
|
|
%vecinit6 = shufflevector <4 x float> %add, <4 x float> %sub, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
|
|
|
|
ret <4 x float> %vecinit6
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x float> @test2c(<8 x float> %A, <8 x float>* %B) {
|
|
|
|
; SSE-LABEL: test2c:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-11-30 00:41:04 +08:00
|
|
|
; SSE-NEXT: addsubps (%rdi), %xmm0
|
|
|
|
; SSE-NEXT: addsubps 16(%rdi), %xmm1
|
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: test2c:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2015-11-30 00:41:04 +08:00
|
|
|
; AVX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
%1 = load <8 x float>, <8 x float>* %B
|
|
|
|
%add = fadd <8 x float> %A, %1
|
|
|
|
%sub = fsub <8 x float> %A, %1
|
|
|
|
%vecinit14 = shufflevector <8 x float> %add, <8 x float> %sub, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
|
|
|
|
ret <8 x float> %vecinit14
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x double> @test3c(<4 x double> %A, <4 x double>* %B) {
|
|
|
|
; SSE-LABEL: test3c:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-11-30 00:41:04 +08:00
|
|
|
; SSE-NEXT: addsubpd (%rdi), %xmm0
|
|
|
|
; SSE-NEXT: addsubpd 16(%rdi), %xmm1
|
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: test3c:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2015-11-30 00:41:04 +08:00
|
|
|
; AVX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
%1 = load <4 x double>, <4 x double>* %B
|
|
|
|
%add = fadd <4 x double> %A, %1
|
|
|
|
%sub = fsub <4 x double> %A, %1
|
|
|
|
%vecinit6 = shufflevector <4 x double> %add, <4 x double> %sub, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
|
|
|
|
ret <4 x double> %vecinit6
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x double> @test4c(<2 x double> %A, <2 x double>* %B) {
|
|
|
|
; SSE-LABEL: test4c:
|
2017-12-05 01:18:51 +08:00
|
|
|
; SSE: # %bb.0:
|
2015-11-30 00:41:04 +08:00
|
|
|
; SSE-NEXT: addsubpd (%rdi), %xmm0
|
|
|
|
; SSE-NEXT: retq
|
|
|
|
;
|
|
|
|
; AVX-LABEL: test4c:
|
2017-12-05 01:18:51 +08:00
|
|
|
; AVX: # %bb.0:
|
2015-11-30 00:41:04 +08:00
|
|
|
; AVX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0
|
|
|
|
; AVX-NEXT: retq
|
|
|
|
%1 = load <2 x double>, <2 x double>* %B
|
|
|
|
%sub = fsub <2 x double> %A, %1
|
|
|
|
%add = fadd <2 x double> %A, %1
|
|
|
|
%vecinit2 = shufflevector <2 x double> %add, <2 x double> %sub, <2 x i32> <i32 2, i32 1>
|
|
|
|
ret <2 x double> %vecinit2
|
|
|
|
}
|