forked from OSchip/llvm-project
Add AVX versions of blend vector operations and fix some issues noticed
in Nadav's r139285 and r139287 commits. 1) Rename vsel.ll to a more descriptive name 2) Change the order of BLEND operands to "Op1, Op2, Cond", this is necessary because PBLENDVB is already used in different places with this order, and it was being emitted in the wrong way for vselect 3) Add AVX patterns and tests for the same SSE41 instructions llvm-svn: 139305
This commit is contained in:
parent
ea8d803bb0
commit
fb113a0051
|
@ -8697,7 +8697,7 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
|
||||||
SDValue Op2 = Op.getOperand(2);
|
SDValue Op2 = Op.getOperand(2);
|
||||||
DebugLoc DL = Op.getDebugLoc();
|
DebugLoc DL = Op.getDebugLoc();
|
||||||
|
|
||||||
SDValue Ops[] = {Cond, Op1, Op2};
|
SDValue Ops[] = {Op1, Op2, Cond};
|
||||||
|
|
||||||
assert(Op1.getValueType().isVector() && "Op1 must be a vector");
|
assert(Op1.getValueType().isVector() && "Op1 must be a vector");
|
||||||
assert(Op2.getValueType().isVector() && "Op2 must be a vector");
|
assert(Op2.getValueType().isVector() && "Op2 must be a vector");
|
||||||
|
|
|
@ -5853,9 +5853,14 @@ defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
|
||||||
defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
|
defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
|
||||||
memopv32i8, int_x86_avx_blendv_ps_256>;
|
memopv32i8, int_x86_avx_blendv_ps_256>;
|
||||||
|
|
||||||
def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, VR128:$src3),
|
let Predicates = [HasAVX] in {
|
||||||
(VPBLENDVBrr VR128:$src1, VR128:$src2, VR128:$src3)>,
|
def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, VR128:$mask),
|
||||||
Requires<[HasAVX]>;
|
(VPBLENDVBrr VR128:$src1, VR128:$src2, VR128:$mask)>;
|
||||||
|
def : Pat<(X86blendvpd VR128:$src1, VR128:$src2, VR128:$mask),
|
||||||
|
(VBLENDVPDrr VR128:$src1, VR128:$src2, VR128:$mask)>;
|
||||||
|
def : Pat<(X86blendvps VR128:$src1, VR128:$src2, VR128:$mask),
|
||||||
|
(VBLENDVPSrr VR128:$src1, VR128:$src2, VR128:$mask)>;
|
||||||
|
}
|
||||||
|
|
||||||
/// SS41I_ternary_int - SSE 4.1 ternary operator
|
/// SS41I_ternary_int - SSE 4.1 ternary operator
|
||||||
let Uses = [XMM0], Constraints = "$src1 = $dst" in {
|
let Uses = [XMM0], Constraints = "$src1 = $dst" in {
|
||||||
|
@ -5877,16 +5882,18 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
|
defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
|
||||||
defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
|
defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
|
||||||
defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
|
defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
|
||||||
|
|
||||||
def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, XMM0),
|
let Predicates = [HasSSE41] in {
|
||||||
(PBLENDVBrr0 VR128:$src1, VR128:$src2)>, Requires<[HasSSE41]>;
|
def : Pat<(X86pblendvb VR128:$src1, VR128:$src2, XMM0),
|
||||||
def : Pat<(X86blendvpd XMM0, VR128:$src1, VR128:$src2),
|
(PBLENDVBrr0 VR128:$src1, VR128:$src2)>;
|
||||||
(BLENDVPDrr0 VR128:$src1, VR128:$src2)>, Requires<[HasSSE41]>;
|
def : Pat<(X86blendvpd VR128:$src1, VR128:$src2, XMM0),
|
||||||
def : Pat<(X86blendvps XMM0, VR128:$src1, VR128:$src2),
|
(BLENDVPDrr0 VR128:$src1, VR128:$src2)>;
|
||||||
(BLENDVPSrr0 VR128:$src1, VR128:$src2)>, Requires<[HasSSE41]>;
|
def : Pat<(X86blendvps VR128:$src1, VR128:$src2, XMM0),
|
||||||
|
(BLENDVPSrr0 VR128:$src1, VR128:$src2)>;
|
||||||
|
}
|
||||||
|
|
||||||
let Predicates = [HasAVX] in
|
let Predicates = [HasAVX] in
|
||||||
def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
||||||
|
|
|
@ -0,0 +1,47 @@
|
||||||
|
; RUN: llc < %s -mattr=+avx -march=x86 | FileCheck %s
|
||||||
|
|
||||||
|
;CHECK: vsel_float
|
||||||
|
;CHECK: vblendvps
|
||||||
|
;CHECK: ret
|
||||||
|
define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
|
||||||
|
%vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %v1, <4 x float> %v2
|
||||||
|
ret <4 x float> %vsel
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
;CHECK: vsel_i32
|
||||||
|
;CHECK: vblendvps
|
||||||
|
;CHECK: ret
|
||||||
|
define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
|
||||||
|
%vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> %v1, <4 x i32> %v2
|
||||||
|
ret <4 x i32> %vsel
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
;CHECK: vsel_double
|
||||||
|
;CHECK: vblendvpd
|
||||||
|
;CHECK: ret
|
||||||
|
define <2 x double> @vsel_double(<2 x double> %v1, <2 x double> %v2) {
|
||||||
|
%vsel = select <2 x i1> <i1 true, i1 false>, <2 x double> %v1, <2 x double> %v2
|
||||||
|
ret <2 x double> %vsel
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
;CHECK: vsel_i64
|
||||||
|
;CHECK: vblendvpd
|
||||||
|
;CHECK: ret
|
||||||
|
define <2 x i64> @vsel_i64(<2 x i64> %v1, <2 x i64> %v2) {
|
||||||
|
%vsel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %v1, <2 x i64> %v2
|
||||||
|
ret <2 x i64> %vsel
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
;CHECK: vsel_i8
|
||||||
|
;CHECK: vpblendvb
|
||||||
|
;CHECK: ret
|
||||||
|
define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
|
||||||
|
%vsel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i8> %v1, <16 x i8> %v2
|
||||||
|
ret <16 x i8> %vsel
|
||||||
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue