[DAG] Further improve the logic in DAGCombiner that folds a pair of shuffles into a single shuffle if the resulting mask is legal.
This patch teaches the DAGCombiner how to fold shuffles according to the
following new rules:
1. shuffle(shuffle(x, y), undef) -> x
2. shuffle(shuffle(x, y), undef) -> y
3. shuffle(shuffle(x, y), undef) -> shuffle(x, undef)
4. shuffle(shuffle(x, y), undef) -> shuffle(y, undef)
The backend avoids to combine shuffles according to rules 3. and 4. if
the resulting shuffle does not have a legal mask. This is to avoid introducing
illegal shuffles that are potentially expanded into a sub-optimal sequence of
target specific dag nodes during vector legalization.
Added test case combine-vec-shuffle-2.ll to verify that we correctly triggers
the new rules when combining shuffles.
llvm-svn: 212748
2014-07-11 02:04:55 +08:00
|
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
|
|
|
|
|
|
|
|
; Check that DAGCombiner correctly folds the following pairs of shuffles
|
|
|
|
; using the following rules:
|
|
|
|
; 1. shuffle(shuffle(x, y), undef) -> x
|
|
|
|
; 2. shuffle(shuffle(x, y), undef) -> y
|
|
|
|
; 3. shuffle(shuffle(x, y), undef) -> shuffle(x, undef)
|
|
|
|
; 4. shuffle(shuffle(x, y), undef) -> shuffle(undef, y)
|
|
|
|
;
|
|
|
|
; Rules 3. and 4. are used only if the resulting shuffle mask is legal.
|
|
|
|
|
|
|
|
define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) {
|
|
|
|
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 3, i32 1>
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
; CHECK-LABEL: test1
|
|
|
|
; Mask: [3,0,0,1]
|
|
|
|
; CHECK: pshufd $67
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
|
|
|
|
|
|
|
|
define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) {
|
|
|
|
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
; CHECK-LABEL: test2
|
|
|
|
; Mask: [2,0,0,3]
|
|
|
|
; CHECK: pshufd $-62
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
|
|
|
|
|
|
|
|
define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) {
|
|
|
|
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
; CHECK-LABEL: test3
|
|
|
|
; Mask: [2,0,0,3]
|
|
|
|
; CHECK: pshufd $-62
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
|
|
|
|
|
|
|
|
define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) {
|
|
|
|
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 7, i32 1>
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 4, i32 4, i32 0, i32 3>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
; CHECK-LABEL: test4
|
|
|
|
; Mask: [0,0,0,1]
|
|
|
|
; CHECK: pshufd $64
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
|
|
|
|
|
|
|
|
define <4 x i32> @test5(<4 x i32> %A, <4 x i32> %B) {
|
|
|
|
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 5, i32 5, i32 2, i32 3>
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 4, i32 3>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
; CHECK-LABEL: test5
|
|
|
|
; Mask: [1,1]
|
|
|
|
; CHECK: movhlps
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
|
|
|
|
|
|
|
|
define <4 x i32> @test6(<4 x i32> %A, <4 x i32> %B) {
|
|
|
|
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 4>
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 4>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
; CHECK-LABEL: test6
|
|
|
|
; Mask: [2,0,0,0]
|
|
|
|
; CHECK: pshufd $2
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
|
|
|
|
|
|
|
|
define <4 x i32> @test7(<4 x i32> %A, <4 x i32> %B) {
|
|
|
|
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
; CHECK-LABEL: test7
|
|
|
|
; Mask: [0,2,0,2]
|
|
|
|
; CHECK: pshufd $-120
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
|
|
|
|
|
|
|
|
define <4 x i32> @test8(<4 x i32> %A, <4 x i32> %B) {
|
|
|
|
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 4, i32 3, i32 4>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
; CHECK-LABEL: test8
|
|
|
|
; Mask: [1,0,3,0]
|
|
|
|
; CHECK: pshufd $49
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
|
|
|
|
|
|
|
|
define <4 x i32> @test9(<4 x i32> %A, <4 x i32> %B) {
|
|
|
|
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 3, i32 2, i32 5>
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
; CHECK-LABEL: test9
|
|
|
|
; Mask: [1,3,0,2]
|
|
|
|
; CHECK: pshufd $-115
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
|
|
|
|
|
|
|
|
define <4 x i32> @test10(<4 x i32> %A, <4 x i32> %B) {
|
|
|
|
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 1, i32 5, i32 5>
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 4>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
; CHECK-LABEL: test10
|
|
|
|
; Mask: [1,0,1,0]
|
|
|
|
; CHECK: pshufd $17
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
|
|
|
|
|
|
|
|
define <4 x i32> @test11(<4 x i32> %A, <4 x i32> %B) {
|
|
|
|
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 2, i32 5, i32 4>
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 0>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
; CHECK-LABEL: test11
|
|
|
|
; Mask: [1,0,2,1]
|
|
|
|
; CHECK: pshufd $97
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
|
|
|
|
|
|
|
|
define <4 x i32> @test12(<4 x i32> %A, <4 x i32> %B) {
|
|
|
|
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 0, i32 2, i32 4>
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 4, i32 0, i32 4>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
; CHECK-LABEL: test12
|
|
|
|
; Mask: [0,0,0,0]
|
|
|
|
; CHECK: pshufd $0
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
|
|
|
|
|
|
|
|
; The following pair of shuffles is folded into vector %A.
|
|
|
|
define <4 x i32> @test13(<4 x i32> %A, <4 x i32> %B) {
|
|
|
|
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 4, i32 2, i32 6>
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 4, i32 0, i32 2, i32 4>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
; CHECK-LABEL: test13
|
|
|
|
; CHECK-NOT: pshufd
|
|
|
|
; CHECK: ret
|
|
|
|
|
|
|
|
|
|
|
|
; The following pair of shuffles is folded into vector %B.
|
|
|
|
define <4 x i32> @test14(<4 x i32> %A, <4 x i32> %B) {
|
|
|
|
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 4>
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 4, i32 1, i32 4>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
; CHECK-LABEL: test14
|
|
|
|
; CHECK-NOT: pshufd
|
|
|
|
; CHECK: ret
|
|
|
|
|
2014-07-11 02:59:41 +08:00
|
|
|
|
|
|
|
; Verify that we don't optimize the following cases. We expect more than one shuffle.
|
|
|
|
|
|
|
|
define <4 x i32> @test15(<4 x i32> %A, <4 x i32> %B) {
|
|
|
|
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 3, i32 1>
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
; CHECK-LABEL: test15
|
|
|
|
; CHECK: shufps $114
|
|
|
|
; CHECK-NEXT: pshufd $-58
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
|
|
|
|
|
|
|
|
define <4 x i32> @test16(<4 x i32> %A, <4 x i32> %B) {
|
|
|
|
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
; CHECK-LABEL: test16
|
|
|
|
; CHECK: blendps $10
|
|
|
|
; CHECK-NEXT: pshufd $-58
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
|
|
|
|
|
|
|
|
define <4 x i32> @test17(<4 x i32> %A, <4 x i32> %B) {
|
|
|
|
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 3, i32 1>
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
; CHECK-LABEL: test17
|
|
|
|
; CHECK: shufps $120
|
|
|
|
; CHECK-NEXT: pshufd $-58
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
|
|
|
|
|
|
|
|
define <4 x i32> @test18(<4 x i32> %A, <4 x i32> %B) {
|
|
|
|
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 3>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
; CHECK-LABEL: test18
|
[DAGCombiner] Improved target independent vector shuffle combine rule.
This patch improves the existing algorithm in DAGCombiner that
attempts to fold shuffles according to rule:
shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(y, undef, M3)
Before this change, there were cases where the DAGCombiner conservatively
avoided folding shuffles even if the resulting mask would have been legal.
That is because the algorithm wrongly assumed that commuting
an illegal shuffle mask would always produce an illegal mask.
With this change, we now correctly compute the commuted shuffle mask before
calling method 'isShuffleMaskLegal' on it.
On X86, this improves for example the codegen for the following function:
define <4 x i32> @test(<4 x i32> %A, <4 x i32> %B) {
%1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 1, i32 2, i32 6, i32 7>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
ret <4 x i32> %2
}
Before this change the X86 backend (-mcpu=corei7) generated
the following assembly code for function @test:
shufps $-23, %xmm0, %xmm1 # xmm1 = xmm1[1,2],xmm0[2,3]
movhlps %xmm1, %xmm1 # xmm1 = xmm1[1,1]
movaps %xmm1, %xmm0
Now we produce:
movhlps %xmm0, %xmm0 # xmm0 = xmm0[1,1]
Added extra test cases in combine-vec-shuffle-2.ll to verify that we correctly
fold according to the above-mentioned rule.
llvm-svn: 215555
2014-08-14 00:09:40 +08:00
|
|
|
; CHECK-NOT: blendps
|
|
|
|
; CHECK: pshufd {{.*}} # xmm0 = xmm1[1,1,0,3]
|
2014-07-11 02:59:41 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
|
|
|
|
define <4 x i32> @test19(<4 x i32> %A, <4 x i32> %B) {
|
|
|
|
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 5, i32 6>
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
; CHECK-LABEL: test19
|
|
|
|
; CHECK: shufps $-104
|
|
|
|
; CHECK-NEXT: pshufd $2
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
|
|
|
|
|
|
|
|
define <4 x i32> @test20(<4 x i32> %A, <4 x i32> %B) {
|
|
|
|
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 3, i32 2, i32 4, i32 4>
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
; CHECK-LABEL: test20
|
|
|
|
; CHECK: shufps $11
|
|
|
|
; CHECK-NEXT: pshufd $-58
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
|
|
|
|
|
|
|
|
define <4 x i32> @test21(<4 x i32> %A, <4 x i32> %B) {
|
|
|
|
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 3, i32 1>
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 3>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
; CHECK-LABEL: test21
|
|
|
|
; CHECK: shufps $120
|
|
|
|
; CHECK-NEXT: pshufd $-60
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
|
[DAGCombiner] Improved target independent vector shuffle combine rule.
This patch improves the existing algorithm in DAGCombiner that
attempts to fold shuffles according to rule:
shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(y, undef, M3)
Before this change, there were cases where the DAGCombiner conservatively
avoided folding shuffles even if the resulting mask would have been legal.
That is because the algorithm wrongly assumed that commuting
an illegal shuffle mask would always produce an illegal mask.
With this change, we now correctly compute the commuted shuffle mask before
calling method 'isShuffleMaskLegal' on it.
On X86, this improves for example the codegen for the following function:
define <4 x i32> @test(<4 x i32> %A, <4 x i32> %B) {
%1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 1, i32 2, i32 6, i32 7>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
ret <4 x i32> %2
}
Before this change the X86 backend (-mcpu=corei7) generated
the following assembly code for function @test:
shufps $-23, %xmm0, %xmm1 # xmm1 = xmm1[1,2],xmm0[2,3]
movhlps %xmm1, %xmm1 # xmm1 = xmm1[1,1]
movaps %xmm1, %xmm0
Now we produce:
movhlps %xmm0, %xmm0 # xmm0 = xmm0[1,1]
Added extra test cases in combine-vec-shuffle-2.ll to verify that we correctly
fold according to the above-mentioned rule.
llvm-svn: 215555
2014-08-14 00:09:40 +08:00
|
|
|
; Test that we correctly combine shuffles according to rule
|
|
|
|
; shuffle(shuffle(x, y), undef) -> shuffle(y, undef)
|
2014-07-11 02:59:41 +08:00
|
|
|
|
|
|
|
define <4 x i32> @test22(<4 x i32> %A, <4 x i32> %B) {
|
|
|
|
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 3>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
; CHECK-LABEL: test22
|
[DAGCombiner] Improved target independent vector shuffle combine rule.
This patch improves the existing algorithm in DAGCombiner that
attempts to fold shuffles according to rule:
shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(y, undef, M3)
Before this change, there were cases where the DAGCombiner conservatively
avoided folding shuffles even if the resulting mask would have been legal.
That is because the algorithm wrongly assumed that commuting
an illegal shuffle mask would always produce an illegal mask.
With this change, we now correctly compute the commuted shuffle mask before
calling method 'isShuffleMaskLegal' on it.
On X86, this improves for example the codegen for the following function:
define <4 x i32> @test(<4 x i32> %A, <4 x i32> %B) {
%1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 1, i32 2, i32 6, i32 7>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
ret <4 x i32> %2
}
Before this change the X86 backend (-mcpu=corei7) generated
the following assembly code for function @test:
shufps $-23, %xmm0, %xmm1 # xmm1 = xmm1[1,2],xmm0[2,3]
movhlps %xmm1, %xmm1 # xmm1 = xmm1[1,1]
movaps %xmm1, %xmm0
Now we produce:
movhlps %xmm0, %xmm0 # xmm0 = xmm0[1,1]
Added extra test cases in combine-vec-shuffle-2.ll to verify that we correctly
fold according to the above-mentioned rule.
llvm-svn: 215555
2014-08-14 00:09:40 +08:00
|
|
|
; CHECK-NOT: blendps
|
|
|
|
; CHECK: pshufd {{.*}} # xmm0 = xmm1[1,1,1,3]
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
|
|
|
|
define <4 x i32> @test23(<4 x i32> %A, <4 x i32> %B) {
|
|
|
|
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 3>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
; CHECK-LABEL: test23
|
|
|
|
; CHECK-NOT: blendps
|
|
|
|
; CHECK: pshufd {{.*}} # xmm0 = xmm1[0,1,0,3]
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
|
|
|
|
define <4 x i32> @test24(<4 x i32> %A, <4 x i32> %B) {
|
|
|
|
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 2, i32 4>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
; CHECK-LABEL: test24
|
|
|
|
; CHECK-NOT: blendps
|
|
|
|
; CHECK: pshufd {{.*}} # xmm0 = xmm1[0,3,2,0]
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
|
|
|
|
define <4 x i32> @test25(<4 x i32> %A, <4 x i32> %B) {
|
|
|
|
%1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 1, i32 5, i32 2, i32 4>
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 1, i32 3, i32 1>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
; CHECK-LABEL: test25
|
|
|
|
; CHECK-NOT: shufps
|
|
|
|
; CHECK: pshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
|
|
|
|
define <4 x i32> @test26(<4 x i32> %A, <4 x i32> %B) {
|
|
|
|
%1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 1, i32 2, i32 6, i32 7>
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
; CHECK-LABEL: test26
|
|
|
|
; CHECK-NOT: shufps
|
|
|
|
; CHECK: movhlps {{.*}} # xmm0 = xmm0[1,1]
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
|
|
|
|
define <4 x i32> @test27(<4 x i32> %A, <4 x i32> %B) {
|
|
|
|
%1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 2, i32 1, i32 5, i32 4>
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
; CHECK-LABEL: test27
|
|
|
|
; CHECK-NOT: shufps
|
|
|
|
; CHECK-NOT: movhlps
|
|
|
|
; CHECK: pshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
|
|
|
|
define <4 x i32> @test28(<4 x i32> %A, <4 x i32> %B) {
|
|
|
|
%1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 1, i32 2, i32 4, i32 5>
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 3, i32 2>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
; CHECK-LABEL: test28
|
|
|
|
; CHECK-NOT: shufps
|
|
|
|
; CHECK-NOT: movhlps
|
|
|
|
; CHECK: pshufd {{.*}} # xmm0 = xmm0[0,1,1,0]
|
2014-07-11 02:59:41 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
|