forked from OSchip/llvm-project
[AVX-512] Add more patterns for masked and broadcasted logical operations where the select or broadcast has a floating point type.
These are needed in order to remove the masked floating point logical operation intrinsics and use native IR. llvm-svn: 280465
This commit is contained in:
parent
00aecd97bf
commit
45d6503089
|
@ -4238,215 +4238,72 @@ defm VXOR : avx512_fp_binop_p<0x57, "vxor", X86fxor, HasDQI,
|
|||
SSE_ALU_ITINS_P, 1>;
|
||||
|
||||
// Patterns catch floating point selects with bitcasted integer logic ops.
|
||||
let Predicates = [HasVLX] in {
|
||||
def : Pat<(v4f32 (vselect VK4WM:$mask,
|
||||
(bitconvert (v2i64 (and VR128X:$src1, VR128X:$src2))),
|
||||
VR128X:$src0)),
|
||||
(VPANDDZ128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
|
||||
def : Pat<(v4f32 (vselect VK4WM:$mask,
|
||||
(bitconvert (v2i64 (or VR128X:$src1, VR128X:$src2))),
|
||||
VR128X:$src0)),
|
||||
(VPORDZ128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
|
||||
def : Pat<(v4f32 (vselect VK4WM:$mask,
|
||||
(bitconvert (v2i64 (xor VR128X:$src1, VR128X:$src2))),
|
||||
VR128X:$src0)),
|
||||
(VPXORDZ128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
|
||||
def : Pat<(v4f32 (vselect VK4WM:$mask,
|
||||
(bitconvert (v2i64 (X86andnp VR128X:$src1, VR128X:$src2))),
|
||||
VR128X:$src0)),
|
||||
(VPANDNDZ128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src1,
|
||||
VR128X:$src2)>;
|
||||
|
||||
def : Pat<(v4f32 (vselect VK4WM:$mask,
|
||||
(bitconvert (v2i64 (and VR128X:$src1, VR128X:$src2))),
|
||||
(bitconvert (v4i32 immAllZerosV)))),
|
||||
(VPANDDZ128rrkz VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
|
||||
def : Pat<(v4f32 (vselect VK4WM:$mask,
|
||||
(bitconvert (v2i64 (or VR128X:$src1, VR128X:$src2))),
|
||||
(bitconvert (v4i32 immAllZerosV)))),
|
||||
(VPORDZ128rrkz VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
|
||||
def : Pat<(v4f32 (vselect VK4WM:$mask,
|
||||
(bitconvert (v2i64 (xor VR128X:$src1, VR128X:$src2))),
|
||||
(bitconvert (v4i32 immAllZerosV)))),
|
||||
(VPXORDZ128rrkz VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
|
||||
def : Pat<(v4f32 (vselect VK4WM:$mask,
|
||||
(bitconvert (v2i64 (X86andnp VR128X:$src1, VR128X:$src2))),
|
||||
(bitconvert (v4i32 immAllZerosV)))),
|
||||
(VPANDNDZ128rrkz VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
|
||||
|
||||
def : Pat<(v2f64 (vselect VK2WM:$mask,
|
||||
(bitconvert (v2i64 (and VR128X:$src1, VR128X:$src2))),
|
||||
VR128X:$src0)),
|
||||
(VPANDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
|
||||
def : Pat<(v2f64 (vselect VK2WM:$mask,
|
||||
(bitconvert (v2i64 (or VR128X:$src1, VR128X:$src2))),
|
||||
VR128X:$src0)),
|
||||
(VPORQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
|
||||
def : Pat<(v2f64 (vselect VK2WM:$mask,
|
||||
(bitconvert (v2i64 (xor VR128X:$src1, VR128X:$src2))),
|
||||
VR128X:$src0)),
|
||||
(VPXORQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
|
||||
def : Pat<(v2f64 (vselect VK2WM:$mask,
|
||||
(bitconvert (v2i64 (X86andnp VR128X:$src1, VR128X:$src2))),
|
||||
VR128X:$src0)),
|
||||
(VPANDNQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
|
||||
|
||||
def : Pat<(v2f64 (vselect VK2WM:$mask,
|
||||
(bitconvert (v2i64 (and VR128X:$src1, VR128X:$src2))),
|
||||
(bitconvert (v4i32 immAllZerosV)))),
|
||||
(VPANDQZ128rrkz VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
|
||||
def : Pat<(v2f64 (vselect VK2WM:$mask,
|
||||
(bitconvert (v2i64 (or VR128X:$src1, VR128X:$src2))),
|
||||
(bitconvert (v4i32 immAllZerosV)))),
|
||||
(VPORQZ128rrkz VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
|
||||
def : Pat<(v2f64 (vselect VK2WM:$mask,
|
||||
(bitconvert (v2i64 (xor VR128X:$src1, VR128X:$src2))),
|
||||
(bitconvert (v4i32 immAllZerosV)))),
|
||||
(VPXORQZ128rrkz VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
|
||||
def : Pat<(v2f64 (vselect VK2WM:$mask,
|
||||
(bitconvert (v2i64 (X86andnp VR128X:$src1, VR128X:$src2))),
|
||||
(bitconvert (v4i32 immAllZerosV)))),
|
||||
(VPANDNQZ128rrkz VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
|
||||
|
||||
def : Pat<(v8f32 (vselect VK8WM:$mask,
|
||||
(bitconvert (v4i64 (and VR256X:$src1, VR256X:$src2))),
|
||||
VR256X:$src0)),
|
||||
(VPANDDZ256rrk VR256X:$src0, VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
|
||||
def : Pat<(v8f32 (vselect VK8WM:$mask,
|
||||
(bitconvert (v4i64 (or VR256X:$src1, VR256X:$src2))),
|
||||
VR256X:$src0)),
|
||||
(VPORDZ256rrk VR256X:$src0, VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
|
||||
def : Pat<(v8f32 (vselect VK8WM:$mask,
|
||||
(bitconvert (v4i64 (xor VR256X:$src1, VR256X:$src2))),
|
||||
VR256X:$src0)),
|
||||
(VPXORDZ256rrk VR256X:$src0, VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
|
||||
def : Pat<(v8f32 (vselect VK8WM:$mask,
|
||||
(bitconvert (v4i64 (X86andnp VR256X:$src1, VR256X:$src2))),
|
||||
VR256X:$src0)),
|
||||
(VPANDNDZ256rrk VR256X:$src0, VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
|
||||
|
||||
def : Pat<(v8f32 (vselect VK8WM:$mask,
|
||||
(bitconvert (v4i64 (and VR256X:$src1, VR256X:$src2))),
|
||||
(bitconvert (v8i32 immAllZerosV)))),
|
||||
(VPANDDZ256rrkz VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
|
||||
def : Pat<(v8f32 (vselect VK8WM:$mask,
|
||||
(bitconvert (v4i64 (or VR256X:$src1, VR256X:$src2))),
|
||||
(bitconvert (v8i32 immAllZerosV)))),
|
||||
(VPORDZ256rrkz VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
|
||||
def : Pat<(v8f32 (vselect VK8WM:$mask,
|
||||
(bitconvert (v4i64 (xor VR256X:$src1, VR256X:$src2))),
|
||||
(bitconvert (v8i32 immAllZerosV)))),
|
||||
(VPXORDZ256rrkz VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
|
||||
def : Pat<(v8f32 (vselect VK8WM:$mask,
|
||||
(bitconvert (v4i64 (X86andnp VR256X:$src1, VR256X:$src2))),
|
||||
(bitconvert (v8i32 immAllZerosV)))),
|
||||
(VPANDNDZ256rrkz VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
|
||||
|
||||
def : Pat<(v4f64 (vselect VK4WM:$mask,
|
||||
(bitconvert (v4i64 (and VR256X:$src1, VR256X:$src2))),
|
||||
VR256X:$src0)),
|
||||
(VPANDQZ256rrk VR256X:$src0, VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
|
||||
def : Pat<(v4f64 (vselect VK4WM:$mask,
|
||||
(bitconvert (v4i64 (or VR256X:$src1, VR256X:$src2))),
|
||||
VR256X:$src0)),
|
||||
(VPORQZ256rrk VR256X:$src0, VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
|
||||
def : Pat<(v4f64 (vselect VK4WM:$mask,
|
||||
(bitconvert (v4i64 (xor VR256X:$src1, VR256X:$src2))),
|
||||
VR256X:$src0)),
|
||||
(VPXORQZ256rrk VR256X:$src0, VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
|
||||
def : Pat<(v4f64 (vselect VK4WM:$mask,
|
||||
(bitconvert (v4i64 (X86andnp VR256X:$src1, VR256X:$src2))),
|
||||
VR256X:$src0)),
|
||||
(VPANDNQZ256rrk VR256X:$src0, VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
|
||||
|
||||
def : Pat<(v4f64 (vselect VK4WM:$mask,
|
||||
(bitconvert (v4i64 (and VR256X:$src1, VR256X:$src2))),
|
||||
(bitconvert (v8i32 immAllZerosV)))),
|
||||
(VPANDQZ256rrkz VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
|
||||
def : Pat<(v4f64 (vselect VK4WM:$mask,
|
||||
(bitconvert (v4i64 (or VR256X:$src1, VR256X:$src2))),
|
||||
(bitconvert (v8i32 immAllZerosV)))),
|
||||
(VPORQZ256rrkz VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
|
||||
def : Pat<(v4f64 (vselect VK4WM:$mask,
|
||||
(bitconvert (v4i64 (xor VR256X:$src1, VR256X:$src2))),
|
||||
(bitconvert (v8i32 immAllZerosV)))),
|
||||
(VPXORQZ256rrkz VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
|
||||
def : Pat<(v4f64 (vselect VK4WM:$mask,
|
||||
(bitconvert (v4i64 (X86andnp VR256X:$src1, VR256X:$src2))),
|
||||
(bitconvert (v8i32 immAllZerosV)))),
|
||||
(VPANDNQZ256rrkz VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
|
||||
multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
|
||||
X86VectorVTInfo _, Predicate prd> {
|
||||
let Predicates = [prd] in {
|
||||
// Masked register-register logical operations.
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
|
||||
_.RC:$src0)),
|
||||
(!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
|
||||
_.RC:$src1, _.RC:$src2)>;
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
|
||||
_.ImmAllZerosV)),
|
||||
(!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
|
||||
_.RC:$src2)>;
|
||||
// Masked register-memory logical operations.
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(bitconvert (_.i64VT (OpNode _.RC:$src1,
|
||||
(load addr:$src2)))),
|
||||
_.RC:$src0)),
|
||||
(!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
|
||||
_.RC:$src1, addr:$src2)>;
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))),
|
||||
_.ImmAllZerosV)),
|
||||
(!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
|
||||
addr:$src2)>;
|
||||
// Register-broadcast logical operations.
|
||||
def : Pat<(_.i64VT (OpNode _.RC:$src1,
|
||||
(bitconvert (_.VT (X86VBroadcast
|
||||
(_.ScalarLdFrag addr:$src2)))))),
|
||||
(!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(bitconvert
|
||||
(_.i64VT (OpNode _.RC:$src1,
|
||||
(bitconvert (_.VT
|
||||
(X86VBroadcast
|
||||
(_.ScalarLdFrag addr:$src2))))))),
|
||||
_.RC:$src0)),
|
||||
(!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
|
||||
_.RC:$src1, addr:$src2)>;
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(bitconvert
|
||||
(_.i64VT (OpNode _.RC:$src1,
|
||||
(bitconvert (_.VT
|
||||
(X86VBroadcast
|
||||
(_.ScalarLdFrag addr:$src2))))))),
|
||||
_.ImmAllZerosV)),
|
||||
(!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
|
||||
_.RC:$src1, addr:$src2)>;
|
||||
}
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX512] in {
|
||||
def : Pat<(v16f32 (vselect VK16WM:$mask,
|
||||
(bitconvert (v8i64 (and VR512:$src1, VR512:$src2))),
|
||||
VR512:$src0)),
|
||||
(VPANDDZrrk VR512:$src0, VK16WM:$mask, VR512:$src1, VR512:$src2)>;
|
||||
def : Pat<(v16f32 (vselect VK16WM:$mask,
|
||||
(bitconvert (v8i64 (or VR512:$src1, VR512:$src2))),
|
||||
VR512:$src0)),
|
||||
(VPORDZrrk VR512:$src0, VK16WM:$mask, VR512:$src1, VR512:$src2)>;
|
||||
def : Pat<(v16f32 (vselect VK16WM:$mask,
|
||||
(bitconvert (v8i64 (xor VR512:$src1, VR512:$src2))),
|
||||
VR512:$src0)),
|
||||
(VPXORDZrrk VR512:$src0, VK16WM:$mask, VR512:$src1, VR512:$src2)>;
|
||||
def : Pat<(v16f32 (vselect VK16WM:$mask,
|
||||
(bitconvert (v8i64 (X86andnp VR512:$src1, VR512:$src2))),
|
||||
VR512:$src0)),
|
||||
(VPANDNDZrrk VR512:$src0, VK16WM:$mask, VR512:$src1, VR512:$src2)>;
|
||||
|
||||
def : Pat<(v16f32 (vselect VK16WM:$mask,
|
||||
(bitconvert (v8i64 (and VR512:$src1, VR512:$src2))),
|
||||
(bitconvert (v16i32 immAllZerosV)))),
|
||||
(VPANDDZrrkz VK16WM:$mask, VR512:$src1, VR512:$src2)>;
|
||||
def : Pat<(v16f32 (vselect VK16WM:$mask,
|
||||
(bitconvert (v8i64 (or VR512:$src1, VR512:$src2))),
|
||||
(bitconvert (v16i32 immAllZerosV)))),
|
||||
(VPORDZrrkz VK16WM:$mask, VR512:$src1, VR512:$src2)>;
|
||||
def : Pat<(v16f32 (vselect VK16WM:$mask,
|
||||
(bitconvert (v8i64 (xor VR512:$src1, VR512:$src2))),
|
||||
(bitconvert (v16i32 immAllZerosV)))),
|
||||
(VPXORDZrrkz VK16WM:$mask, VR512:$src1, VR512:$src2)>;
|
||||
def : Pat<(v16f32 (vselect VK16WM:$mask,
|
||||
(bitconvert (v8i64 (X86andnp VR512:$src1, VR512:$src2))),
|
||||
(bitconvert (v16i32 immAllZerosV)))),
|
||||
(VPANDNDZrrkz VK16WM:$mask, VR512:$src1, VR512:$src2)>;
|
||||
|
||||
def : Pat<(v8f64 (vselect VK8WM:$mask,
|
||||
(bitconvert (v8i64 (and VR512:$src1, VR512:$src2))),
|
||||
VR512:$src0)),
|
||||
(VPANDQZrrk VR512:$src0, VK8WM:$mask, VR512:$src1, VR512:$src2)>;
|
||||
def : Pat<(v8f64 (vselect VK8WM:$mask,
|
||||
(bitconvert (v8i64 (or VR512:$src1, VR512:$src2))),
|
||||
VR512:$src0)),
|
||||
(VPORQZrrk VR512:$src0, VK8WM:$mask, VR512:$src1, VR512:$src2)>;
|
||||
def : Pat<(v8f64 (vselect VK8WM:$mask,
|
||||
(bitconvert (v8i64 (xor VR512:$src1, VR512:$src2))),
|
||||
VR512:$src0)),
|
||||
(VPXORQZrrk VR512:$src0, VK8WM:$mask, VR512:$src1, VR512:$src2)>;
|
||||
def : Pat<(v8f64 (vselect VK8WM:$mask,
|
||||
(bitconvert (v8i64 (X86andnp VR512:$src1, VR512:$src2))),
|
||||
VR512:$src0)),
|
||||
(VPANDNQZrrk VR512:$src0, VK8WM:$mask, VR512:$src1, VR512:$src2)>;
|
||||
|
||||
def : Pat<(v8f64 (vselect VK8WM:$mask,
|
||||
(bitconvert (v8i64 (and VR512:$src1, VR512:$src2))),
|
||||
(bitconvert (v16i32 immAllZerosV)))),
|
||||
(VPANDQZrrkz VK8WM:$mask, VR512:$src1, VR512:$src2)>;
|
||||
def : Pat<(v8f64 (vselect VK8WM:$mask,
|
||||
(bitconvert (v8i64 (or VR512:$src1, VR512:$src2))),
|
||||
(bitconvert (v16i32 immAllZerosV)))),
|
||||
(VPORQZrrkz VK8WM:$mask, VR512:$src1, VR512:$src2)>;
|
||||
def : Pat<(v8f64 (vselect VK8WM:$mask,
|
||||
(bitconvert (v8i64 (xor VR512:$src1, VR512:$src2))),
|
||||
(bitconvert (v16i32 immAllZerosV)))),
|
||||
(VPXORQZrrkz VK8WM:$mask, VR512:$src1, VR512:$src2)>;
|
||||
def : Pat<(v8f64 (vselect VK8WM:$mask,
|
||||
(bitconvert (v8i64 (X86andnp VR512:$src1, VR512:$src2))),
|
||||
(bitconvert (v16i32 immAllZerosV)))),
|
||||
(VPANDNQZrrkz VK8WM:$mask, VR512:$src1, VR512:$src2)>;
|
||||
multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> {
|
||||
defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>;
|
||||
defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>;
|
||||
defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>;
|
||||
defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>;
|
||||
defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>;
|
||||
defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>;
|
||||
}
|
||||
|
||||
defm : avx512_fp_logical_lowering_sizes<"VPAND", and>;
|
||||
defm : avx512_fp_logical_lowering_sizes<"VPOR", or>;
|
||||
defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>;
|
||||
defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>;
|
||||
|
||||
multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
X86VectorVTInfo _> {
|
||||
defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
|
|
|
@ -985,20 +985,17 @@ define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
|
|||
define <16 x float> @test_fxor(<16 x float> %a) {
|
||||
; AVX512F-LABEL: test_fxor:
|
||||
; AVX512F: ## BB#0:
|
||||
; AVX512F-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
|
||||
; AVX512F-NEXT: vpxorq %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: test_fxor:
|
||||
; AVX512VL: ## BB#0:
|
||||
; AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
|
||||
; AVX512VL-NEXT: vpxorq %zmm1, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_fxor:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
|
||||
; AVX512BW-NEXT: vpxorq %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: test_fxor:
|
||||
|
@ -1051,20 +1048,17 @@ define <8 x float> @test_fxor_8f32(<8 x float> %a) {
|
|||
define <8 x double> @fabs_v8f64(<8 x double> %p)
|
||||
; AVX512F-LABEL: fabs_v8f64:
|
||||
; AVX512F: ## BB#0:
|
||||
; AVX512F-NEXT: vbroadcastsd {{.*}}(%rip), %zmm1
|
||||
; AVX512F-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: fabs_v8f64:
|
||||
; AVX512VL: ## BB#0:
|
||||
; AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %zmm1
|
||||
; AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: fabs_v8f64:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vbroadcastsd {{.*}}(%rip), %zmm1
|
||||
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: fabs_v8f64:
|
||||
|
@ -1085,20 +1079,17 @@ declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
|
|||
define <16 x float> @fabs_v16f32(<16 x float> %p)
|
||||
; AVX512F-LABEL: fabs_v16f32:
|
||||
; AVX512F: ## BB#0:
|
||||
; AVX512F-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
|
||||
; AVX512F-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: fabs_v16f32:
|
||||
; AVX512VL: ## BB#0:
|
||||
; AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
|
||||
; AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: fabs_v16f32:
|
||||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
|
||||
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: fabs_v16f32:
|
||||
|
|
|
@ -145,8 +145,7 @@ define <8 x double> @fabs_v8f64(<8 x double> %p) {
|
|||
;
|
||||
; X32_AVX512VL-LABEL: fabs_v8f64:
|
||||
; X32_AVX512VL: # BB#0:
|
||||
; X32_AVX512VL-NEXT: vbroadcastsd {{\.LCPI.*}}, %zmm1
|
||||
; X32_AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; X32_AVX512VL-NEXT: vpandq {{\.LCPI.*}}{1to8}, %zmm0, %zmm0
|
||||
; X32_AVX512VL-NEXT: retl
|
||||
;
|
||||
; X32_AVX512VLDQ-LABEL: fabs_v8f64:
|
||||
|
@ -163,8 +162,7 @@ define <8 x double> @fabs_v8f64(<8 x double> %p) {
|
|||
;
|
||||
; X64_AVX512VL-LABEL: fabs_v8f64:
|
||||
; X64_AVX512VL: # BB#0:
|
||||
; X64_AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %zmm1
|
||||
; X64_AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; X64_AVX512VL-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; X64_AVX512VL-NEXT: retq
|
||||
;
|
||||
; X64_AVX512VLDQ-LABEL: fabs_v8f64:
|
||||
|
@ -186,8 +184,7 @@ define <16 x float> @fabs_v16f32(<16 x float> %p) {
|
|||
;
|
||||
; X32_AVX512VL-LABEL: fabs_v16f32:
|
||||
; X32_AVX512VL: # BB#0:
|
||||
; X32_AVX512VL-NEXT: vbroadcastss {{\.LCPI.*}}, %zmm1
|
||||
; X32_AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; X32_AVX512VL-NEXT: vpandd {{\.LCPI.*}}{1to16}, %zmm0, %zmm0
|
||||
; X32_AVX512VL-NEXT: retl
|
||||
;
|
||||
; X32_AVX512VLDQ-LABEL: fabs_v16f32:
|
||||
|
@ -204,8 +201,7 @@ define <16 x float> @fabs_v16f32(<16 x float> %p) {
|
|||
;
|
||||
; X64_AVX512VL-LABEL: fabs_v16f32:
|
||||
; X64_AVX512VL: # BB#0:
|
||||
; X64_AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
|
||||
; X64_AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||
; X64_AVX512VL-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; X64_AVX512VL-NEXT: retq
|
||||
;
|
||||
; X64_AVX512VLDQ-LABEL: fabs_v16f32:
|
||||
|
|
Loading…
Reference in New Issue