[AVX-512] Add more patterns for masked and broadcasted logical operations where the select or broadcast has a floating point type.

These are needed in order to remove the masked floating point logical operation intrinsics and use native IR.

llvm-svn: 280465
This commit is contained in:
Craig Topper 2016-09-02 05:29:13 +00:00
parent 00aecd97bf
commit 45d6503089
3 changed files with 75 additions and 231 deletions

View File

@ -4238,215 +4238,72 @@ defm VXOR : avx512_fp_binop_p<0x57, "vxor", X86fxor, HasDQI,
SSE_ALU_ITINS_P, 1>;
// Patterns catch floating point selects with bitcasted integer logic ops.
let Predicates = [HasVLX] in {
def : Pat<(v4f32 (vselect VK4WM:$mask,
(bitconvert (v2i64 (and VR128X:$src1, VR128X:$src2))),
VR128X:$src0)),
(VPANDDZ128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v4f32 (vselect VK4WM:$mask,
(bitconvert (v2i64 (or VR128X:$src1, VR128X:$src2))),
VR128X:$src0)),
(VPORDZ128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v4f32 (vselect VK4WM:$mask,
(bitconvert (v2i64 (xor VR128X:$src1, VR128X:$src2))),
VR128X:$src0)),
(VPXORDZ128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v4f32 (vselect VK4WM:$mask,
(bitconvert (v2i64 (X86andnp VR128X:$src1, VR128X:$src2))),
VR128X:$src0)),
(VPANDNDZ128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src1,
VR128X:$src2)>;
def : Pat<(v4f32 (vselect VK4WM:$mask,
(bitconvert (v2i64 (and VR128X:$src1, VR128X:$src2))),
(bitconvert (v4i32 immAllZerosV)))),
(VPANDDZ128rrkz VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v4f32 (vselect VK4WM:$mask,
(bitconvert (v2i64 (or VR128X:$src1, VR128X:$src2))),
(bitconvert (v4i32 immAllZerosV)))),
(VPORDZ128rrkz VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v4f32 (vselect VK4WM:$mask,
(bitconvert (v2i64 (xor VR128X:$src1, VR128X:$src2))),
(bitconvert (v4i32 immAllZerosV)))),
(VPXORDZ128rrkz VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v4f32 (vselect VK4WM:$mask,
(bitconvert (v2i64 (X86andnp VR128X:$src1, VR128X:$src2))),
(bitconvert (v4i32 immAllZerosV)))),
(VPANDNDZ128rrkz VK4WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v2f64 (vselect VK2WM:$mask,
(bitconvert (v2i64 (and VR128X:$src1, VR128X:$src2))),
VR128X:$src0)),
(VPANDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v2f64 (vselect VK2WM:$mask,
(bitconvert (v2i64 (or VR128X:$src1, VR128X:$src2))),
VR128X:$src0)),
(VPORQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v2f64 (vselect VK2WM:$mask,
(bitconvert (v2i64 (xor VR128X:$src1, VR128X:$src2))),
VR128X:$src0)),
(VPXORQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v2f64 (vselect VK2WM:$mask,
(bitconvert (v2i64 (X86andnp VR128X:$src1, VR128X:$src2))),
VR128X:$src0)),
(VPANDNQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v2f64 (vselect VK2WM:$mask,
(bitconvert (v2i64 (and VR128X:$src1, VR128X:$src2))),
(bitconvert (v4i32 immAllZerosV)))),
(VPANDQZ128rrkz VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v2f64 (vselect VK2WM:$mask,
(bitconvert (v2i64 (or VR128X:$src1, VR128X:$src2))),
(bitconvert (v4i32 immAllZerosV)))),
(VPORQZ128rrkz VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v2f64 (vselect VK2WM:$mask,
(bitconvert (v2i64 (xor VR128X:$src1, VR128X:$src2))),
(bitconvert (v4i32 immAllZerosV)))),
(VPXORQZ128rrkz VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v2f64 (vselect VK2WM:$mask,
(bitconvert (v2i64 (X86andnp VR128X:$src1, VR128X:$src2))),
(bitconvert (v4i32 immAllZerosV)))),
(VPANDNQZ128rrkz VK2WM:$mask, VR128X:$src1, VR128X:$src2)>;
def : Pat<(v8f32 (vselect VK8WM:$mask,
(bitconvert (v4i64 (and VR256X:$src1, VR256X:$src2))),
VR256X:$src0)),
(VPANDDZ256rrk VR256X:$src0, VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v8f32 (vselect VK8WM:$mask,
(bitconvert (v4i64 (or VR256X:$src1, VR256X:$src2))),
VR256X:$src0)),
(VPORDZ256rrk VR256X:$src0, VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v8f32 (vselect VK8WM:$mask,
(bitconvert (v4i64 (xor VR256X:$src1, VR256X:$src2))),
VR256X:$src0)),
(VPXORDZ256rrk VR256X:$src0, VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v8f32 (vselect VK8WM:$mask,
(bitconvert (v4i64 (X86andnp VR256X:$src1, VR256X:$src2))),
VR256X:$src0)),
(VPANDNDZ256rrk VR256X:$src0, VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v8f32 (vselect VK8WM:$mask,
(bitconvert (v4i64 (and VR256X:$src1, VR256X:$src2))),
(bitconvert (v8i32 immAllZerosV)))),
(VPANDDZ256rrkz VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v8f32 (vselect VK8WM:$mask,
(bitconvert (v4i64 (or VR256X:$src1, VR256X:$src2))),
(bitconvert (v8i32 immAllZerosV)))),
(VPORDZ256rrkz VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v8f32 (vselect VK8WM:$mask,
(bitconvert (v4i64 (xor VR256X:$src1, VR256X:$src2))),
(bitconvert (v8i32 immAllZerosV)))),
(VPXORDZ256rrkz VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v8f32 (vselect VK8WM:$mask,
(bitconvert (v4i64 (X86andnp VR256X:$src1, VR256X:$src2))),
(bitconvert (v8i32 immAllZerosV)))),
(VPANDNDZ256rrkz VK8WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v4f64 (vselect VK4WM:$mask,
(bitconvert (v4i64 (and VR256X:$src1, VR256X:$src2))),
VR256X:$src0)),
(VPANDQZ256rrk VR256X:$src0, VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v4f64 (vselect VK4WM:$mask,
(bitconvert (v4i64 (or VR256X:$src1, VR256X:$src2))),
VR256X:$src0)),
(VPORQZ256rrk VR256X:$src0, VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v4f64 (vselect VK4WM:$mask,
(bitconvert (v4i64 (xor VR256X:$src1, VR256X:$src2))),
VR256X:$src0)),
(VPXORQZ256rrk VR256X:$src0, VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v4f64 (vselect VK4WM:$mask,
(bitconvert (v4i64 (X86andnp VR256X:$src1, VR256X:$src2))),
VR256X:$src0)),
(VPANDNQZ256rrk VR256X:$src0, VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v4f64 (vselect VK4WM:$mask,
(bitconvert (v4i64 (and VR256X:$src1, VR256X:$src2))),
(bitconvert (v8i32 immAllZerosV)))),
(VPANDQZ256rrkz VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v4f64 (vselect VK4WM:$mask,
(bitconvert (v4i64 (or VR256X:$src1, VR256X:$src2))),
(bitconvert (v8i32 immAllZerosV)))),
(VPORQZ256rrkz VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v4f64 (vselect VK4WM:$mask,
(bitconvert (v4i64 (xor VR256X:$src1, VR256X:$src2))),
(bitconvert (v8i32 immAllZerosV)))),
(VPXORQZ256rrkz VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
def : Pat<(v4f64 (vselect VK4WM:$mask,
(bitconvert (v4i64 (X86andnp VR256X:$src1, VR256X:$src2))),
(bitconvert (v8i32 immAllZerosV)))),
(VPANDNQZ256rrkz VK4WM:$mask, VR256X:$src1, VR256X:$src2)>;
multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
X86VectorVTInfo _, Predicate prd> {
let Predicates = [prd] in {
// Masked register-register logical operations.
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
_.RC:$src0)),
(!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
_.RC:$src1, _.RC:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
_.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
_.RC:$src2)>;
// Masked register-memory logical operations.
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(bitconvert (_.i64VT (OpNode _.RC:$src1,
(load addr:$src2)))),
_.RC:$src0)),
(!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
_.RC:$src1, addr:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))),
_.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
addr:$src2)>;
// Register-broadcast logical operations.
def : Pat<(_.i64VT (OpNode _.RC:$src1,
(bitconvert (_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src2)))))),
(!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(bitconvert
(_.i64VT (OpNode _.RC:$src1,
(bitconvert (_.VT
(X86VBroadcast
(_.ScalarLdFrag addr:$src2))))))),
_.RC:$src0)),
(!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
_.RC:$src1, addr:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(bitconvert
(_.i64VT (OpNode _.RC:$src1,
(bitconvert (_.VT
(X86VBroadcast
(_.ScalarLdFrag addr:$src2))))))),
_.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
_.RC:$src1, addr:$src2)>;
}
}
let Predicates = [HasAVX512] in {
def : Pat<(v16f32 (vselect VK16WM:$mask,
(bitconvert (v8i64 (and VR512:$src1, VR512:$src2))),
VR512:$src0)),
(VPANDDZrrk VR512:$src0, VK16WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v16f32 (vselect VK16WM:$mask,
(bitconvert (v8i64 (or VR512:$src1, VR512:$src2))),
VR512:$src0)),
(VPORDZrrk VR512:$src0, VK16WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v16f32 (vselect VK16WM:$mask,
(bitconvert (v8i64 (xor VR512:$src1, VR512:$src2))),
VR512:$src0)),
(VPXORDZrrk VR512:$src0, VK16WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v16f32 (vselect VK16WM:$mask,
(bitconvert (v8i64 (X86andnp VR512:$src1, VR512:$src2))),
VR512:$src0)),
(VPANDNDZrrk VR512:$src0, VK16WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v16f32 (vselect VK16WM:$mask,
(bitconvert (v8i64 (and VR512:$src1, VR512:$src2))),
(bitconvert (v16i32 immAllZerosV)))),
(VPANDDZrrkz VK16WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v16f32 (vselect VK16WM:$mask,
(bitconvert (v8i64 (or VR512:$src1, VR512:$src2))),
(bitconvert (v16i32 immAllZerosV)))),
(VPORDZrrkz VK16WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v16f32 (vselect VK16WM:$mask,
(bitconvert (v8i64 (xor VR512:$src1, VR512:$src2))),
(bitconvert (v16i32 immAllZerosV)))),
(VPXORDZrrkz VK16WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v16f32 (vselect VK16WM:$mask,
(bitconvert (v8i64 (X86andnp VR512:$src1, VR512:$src2))),
(bitconvert (v16i32 immAllZerosV)))),
(VPANDNDZrrkz VK16WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v8f64 (vselect VK8WM:$mask,
(bitconvert (v8i64 (and VR512:$src1, VR512:$src2))),
VR512:$src0)),
(VPANDQZrrk VR512:$src0, VK8WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v8f64 (vselect VK8WM:$mask,
(bitconvert (v8i64 (or VR512:$src1, VR512:$src2))),
VR512:$src0)),
(VPORQZrrk VR512:$src0, VK8WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v8f64 (vselect VK8WM:$mask,
(bitconvert (v8i64 (xor VR512:$src1, VR512:$src2))),
VR512:$src0)),
(VPXORQZrrk VR512:$src0, VK8WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v8f64 (vselect VK8WM:$mask,
(bitconvert (v8i64 (X86andnp VR512:$src1, VR512:$src2))),
VR512:$src0)),
(VPANDNQZrrk VR512:$src0, VK8WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v8f64 (vselect VK8WM:$mask,
(bitconvert (v8i64 (and VR512:$src1, VR512:$src2))),
(bitconvert (v16i32 immAllZerosV)))),
(VPANDQZrrkz VK8WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v8f64 (vselect VK8WM:$mask,
(bitconvert (v8i64 (or VR512:$src1, VR512:$src2))),
(bitconvert (v16i32 immAllZerosV)))),
(VPORQZrrkz VK8WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v8f64 (vselect VK8WM:$mask,
(bitconvert (v8i64 (xor VR512:$src1, VR512:$src2))),
(bitconvert (v16i32 immAllZerosV)))),
(VPXORQZrrkz VK8WM:$mask, VR512:$src1, VR512:$src2)>;
def : Pat<(v8f64 (vselect VK8WM:$mask,
(bitconvert (v8i64 (X86andnp VR512:$src1, VR512:$src2))),
(bitconvert (v16i32 immAllZerosV)))),
(VPANDNQZrrkz VK8WM:$mask, VR512:$src1, VR512:$src2)>;
multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> {
defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>;
defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>;
defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>;
defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>;
defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>;
defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>;
}
defm : avx512_fp_logical_lowering_sizes<"VPAND", and>;
defm : avx512_fp_logical_lowering_sizes<"VPOR", or>;
defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>;
defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>;
multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),

View File

@ -985,20 +985,17 @@ define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
define <16 x float> @test_fxor(<16 x float> %a) {
; AVX512F-LABEL: test_fxor:
; AVX512F: ## BB#0:
; AVX512F-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
; AVX512F-NEXT: vpxorq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: test_fxor:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
; AVX512VL-NEXT: vpxorq %zmm1, %zmm0, %zmm0
; AVX512VL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: test_fxor:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
; AVX512BW-NEXT: vpxorq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: test_fxor:
@ -1051,20 +1048,17 @@ define <8 x float> @test_fxor_8f32(<8 x float> %a) {
define <8 x double> @fabs_v8f64(<8 x double> %p)
; AVX512F-LABEL: fabs_v8f64:
; AVX512F: ## BB#0:
; AVX512F-NEXT: vbroadcastsd {{.*}}(%rip), %zmm1
; AVX512F-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: fabs_v8f64:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %zmm1
; AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: fabs_v8f64:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: vbroadcastsd {{.*}}(%rip), %zmm1
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: fabs_v8f64:
@ -1085,20 +1079,17 @@ declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
define <16 x float> @fabs_v16f32(<16 x float> %p)
; AVX512F-LABEL: fabs_v16f32:
; AVX512F: ## BB#0:
; AVX512F-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
; AVX512F-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: fabs_v16f32:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
; AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: fabs_v16f32:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: fabs_v16f32:

View File

@ -145,8 +145,7 @@ define <8 x double> @fabs_v8f64(<8 x double> %p) {
;
; X32_AVX512VL-LABEL: fabs_v8f64:
; X32_AVX512VL: # BB#0:
; X32_AVX512VL-NEXT: vbroadcastsd {{\.LCPI.*}}, %zmm1
; X32_AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
; X32_AVX512VL-NEXT: vpandq {{\.LCPI.*}}{1to8}, %zmm0, %zmm0
; X32_AVX512VL-NEXT: retl
;
; X32_AVX512VLDQ-LABEL: fabs_v8f64:
@ -163,8 +162,7 @@ define <8 x double> @fabs_v8f64(<8 x double> %p) {
;
; X64_AVX512VL-LABEL: fabs_v8f64:
; X64_AVX512VL: # BB#0:
; X64_AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %zmm1
; X64_AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
; X64_AVX512VL-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; X64_AVX512VL-NEXT: retq
;
; X64_AVX512VLDQ-LABEL: fabs_v8f64:
@ -186,8 +184,7 @@ define <16 x float> @fabs_v16f32(<16 x float> %p) {
;
; X32_AVX512VL-LABEL: fabs_v16f32:
; X32_AVX512VL: # BB#0:
; X32_AVX512VL-NEXT: vbroadcastss {{\.LCPI.*}}, %zmm1
; X32_AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
; X32_AVX512VL-NEXT: vpandd {{\.LCPI.*}}{1to16}, %zmm0, %zmm0
; X32_AVX512VL-NEXT: retl
;
; X32_AVX512VLDQ-LABEL: fabs_v16f32:
@ -204,8 +201,7 @@ define <16 x float> @fabs_v16f32(<16 x float> %p) {
;
; X64_AVX512VL-LABEL: fabs_v16f32:
; X64_AVX512VL: # BB#0:
; X64_AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %zmm1
; X64_AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0
; X64_AVX512VL-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
; X64_AVX512VL-NEXT: retq
;
; X64_AVX512VLDQ-LABEL: fabs_v16f32: