[X86][AVX512] add reduce/range/scalef/rndScale

include encoding and intrinsics

Differential Revision: http://reviews.llvm.org/D11222

llvm-svn: 242896
This commit is contained in:
Asaf Badouh 2015-07-22 12:00:43 +00:00
parent e9ea5a66f2
commit a5b2e5e2a7
14 changed files with 2109 additions and 95 deletions

View File

@ -4221,12 +4221,60 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
[llvm_v8i64_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_rndscale_ps_512: GCCBuiltin<"__builtin_ia32_rndscaleps_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty,
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_rndscale_pd_512: GCCBuiltin<"__builtin_ia32_rndscalepd_mask">,
def int_x86_avx512_mask_rndscale_pd_128 : GCCBuiltin<"__builtin_ia32_rndscalepd_128_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty,
llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_rndscale_pd_256 : GCCBuiltin<"__builtin_ia32_rndscalepd_256_mask">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty,
llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_rndscale_pd_512 : GCCBuiltin<"__builtin_ia32_rndscalepd_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty,
llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_rndscale_ps_128 : GCCBuiltin<"__builtin_ia32_rndscaleps_128_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty,
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_rndscale_ps_256 : GCCBuiltin<"__builtin_ia32_rndscaleps_256_mask">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty,
llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_rndscale_ps_512 : GCCBuiltin<"__builtin_ia32_rndscaleps_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty,
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_reduce_pd_128 : GCCBuiltin<"__builtin_ia32_reducepd128_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty,
llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_reduce_pd_256 : GCCBuiltin<"__builtin_ia32_reducepd256_mask">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty,
llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_reduce_pd_512 : GCCBuiltin<"__builtin_ia32_reducepd512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty,
llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_reduce_ps_128 : GCCBuiltin<"__builtin_ia32_reduceps128_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty,
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_reduce_ps_256 : GCCBuiltin<"__builtin_ia32_reduceps256_mask">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty,
llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_reduce_ps_512 : GCCBuiltin<"__builtin_ia32_reduceps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty,
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_range_pd_128 : GCCBuiltin<"__builtin_ia32_rangepd128_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty,
llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_range_pd_256 : GCCBuiltin<"__builtin_ia32_rangepd256_mask">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty,
llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_range_pd_512 : GCCBuiltin<"__builtin_ia32_rangepd512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty,
llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_range_ps_128 : GCCBuiltin<"__builtin_ia32_rangeps128_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty,
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_range_ps_256 : GCCBuiltin<"__builtin_ia32_rangeps256_mask">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty,
llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_range_ps_512 : GCCBuiltin<"__builtin_ia32_rangeps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty,
llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
}
// Vector load with broadcast
@ -4508,7 +4556,28 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_range_ss : GCCBuiltin<"__builtin_ia32_rangess_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_range_sd : GCCBuiltin<"__builtin_ia32_rangesd_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_reduce_ss : GCCBuiltin<"__builtin_ia32_reducess_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_reduce_sd : GCCBuiltin<"__builtin_ia32_reducesd_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_scalef_sd : GCCBuiltin<"__builtin_ia32_scalefsd_round">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_scalef_ss : GCCBuiltin<"__builtin_ia32_scalefss_round">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_scalef_pd_128 : GCCBuiltin<"__builtin_ia32_scalefpd128_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;

View File

@ -15460,6 +15460,24 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Src1, Src2, Rnd),
Mask, PassThru, Subtarget, DAG);
}
case INTR_TYPE_3OP_MASK_RM: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue Imm = Op.getOperand(3);
SDValue PassThru = Op.getOperand(4);
SDValue Mask = Op.getOperand(5);
// We specify 2 possible modes for intrinsics, with/without rounding modes.
// First, we check if the intrinsic have rounding mode (7 operands),
// if not, we set rounding mode to "current".
SDValue Rnd;
if (Op.getNumOperands() == 7)
Rnd = Op.getOperand(6);
else
Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
Src1, Src2, Imm, Rnd),
Mask, PassThru, Subtarget, DAG);
}
case INTR_TYPE_3OP_MASK: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
@ -19039,7 +19057,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FNMSUB_RND: return "X86ISD::FNMSUB_RND";
case X86ISD::FMADDSUB_RND: return "X86ISD::FMADDSUB_RND";
case X86ISD::FMSUBADD_RND: return "X86ISD::FMSUBADD_RND";
case X86ISD::RNDSCALE: return "X86ISD::RNDSCALE";
case X86ISD::VRNDSCALE: return "X86ISD::VRNDSCALE";
case X86ISD::VREDUCE: return "X86ISD::VREDUCE";
case X86ISD::PCMPESTRI: return "X86ISD::PCMPESTRI";
case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI";
case X86ISD::XTEST: return "X86ISD::XTEST";

View File

@ -386,6 +386,10 @@ namespace llvm {
VFIXUPIMM,
//Range Restriction Calculation For Packed Pairs of Float32/64 values
VRANGE,
// Reduce - Perform Reduction Transformation on scalar\packed FP
VREDUCE,
// RndScale - Round FP Values To Include A Given Number Of Fraction Bits
VRNDSCALE,
// Broadcast scalar to vector
VBROADCAST,
// Broadcast subvector to vector
@ -419,7 +423,6 @@ namespace llvm {
FNMSUB_RND,
FMADDSUB_RND,
FMSUBADD_RND,
RNDSCALE,
// Compress and expand
COMPRESS,

View File

@ -3394,7 +3394,7 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
}
multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
SDNode VecNode, OpndItins itins, bit IsCommutable> {
SDNode VecNode, OpndItins itins, bit IsCommutable = 0> {
defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
@ -3569,13 +3569,34 @@ multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
}//let mayLoad = 1
}
multiclass avx512_fp_scalef_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>;
let mayLoad = 1 in {
defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
"$src2, $src1", "$src1, $src2",
(OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>;
}//let mayLoad = 1
}
multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode> {
defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v16f32_info>,
avx512_fp_round_packed<opc, OpcodeStr, OpNode, v16f32_info>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f64_info>,
avx512_fp_round_packed<opc, OpcodeStr, OpNode, v8f64_info>,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNode, f32x_info>,
avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNode, SSE_ALU_ITINS_S.s>,
EVEX_4V,EVEX_CD8<32, CD8VT1>;
defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNode, f64x_info>,
avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNode, SSE_ALU_ITINS_S.d>,
EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
// Define only if AVX512VL feature is present.
let Predicates = [HasVLX] in {
defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v4f32x_info>,
@ -3588,7 +3609,7 @@ multiclass avx512_fp_scalef_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
}
}
defm VSCALEF : avx512_fp_scalef_all<0x2C, "vscalef", X86scalef>, T8PD;
defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef>, T8PD;
//===----------------------------------------------------------------------===//
// AVX-512 VPTESTM instructions
@ -5481,47 +5502,6 @@ let Predicates = [HasAVX512] in {
(VSQRTSDZm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
}
multiclass avx512_rndscale<bits<8> opc, string OpcodeStr,
X86MemOperand x86memop, RegisterClass RC,
PatFrag mem_frag, Domain d> {
let ExeDomain = d in {
// Intrinsic operation, reg.
// Vector intrinsic operation, reg
def r : AVX512AIi8<opc, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, EVEX;
// Vector intrinsic operation, mem
def m : AVX512AIi8<opc, MRMSrcMem,
(outs RC:$dst), (ins x86memop:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, EVEX;
} // ExeDomain
}
defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512,
loadv16f32, SSEPackedSingle>, EVEX_V512,
EVEX_CD8<32, CD8VF>;
def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1),
imm:$src2, (v16f32 VR512:$src1), (i16 -1),
FROUND_CURRENT)),
(VRNDSCALEPSZr VR512:$src1, imm:$src2)>;
defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512,
loadv8f64, SSEPackedDouble>, EVEX_V512,
VEX_W, EVEX_CD8<64, CD8VF>;
def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1),
imm:$src2, (v8f64 VR512:$src1), (i8 -1),
FROUND_CURRENT)),
(VRNDSCALEPDZr VR512:$src1, imm:$src2)>;
multiclass
avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
@ -5529,20 +5509,20 @@ avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (X86RndScale (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(i32 imm:$src3), (i32 FROUND_CURRENT)))>;
defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
"{sae}, $src3, $src2, $src1", "$src1, $src2, $src3, {sae}",
(_.VT (X86RndScale (_.VT _.RC:$src1), (_.VT _.RC:$src2),
"$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
(_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(i32 imm:$src3), (i32 FROUND_NO_EXC)))>, EVEX_B;
let mayLoad = 1 in
defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), OpcodeStr,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (X86RndScale (_.VT _.RC:$src1),
(_.VT (X86RndScales (_.VT _.RC:$src1),
(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
(i32 imm:$src3), (i32 FROUND_CURRENT)))>;
}
@ -5587,29 +5567,6 @@ defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", f32x_info>,
defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", f64x_info>, VEX_W,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VT1>;
let Predicates = [HasAVX512] in {
def : Pat<(v16f32 (ffloor VR512:$src)),
(VRNDSCALEPSZr VR512:$src, (i32 0x1))>;
def : Pat<(v16f32 (fnearbyint VR512:$src)),
(VRNDSCALEPSZr VR512:$src, (i32 0xC))>;
def : Pat<(v16f32 (fceil VR512:$src)),
(VRNDSCALEPSZr VR512:$src, (i32 0x2))>;
def : Pat<(v16f32 (frint VR512:$src)),
(VRNDSCALEPSZr VR512:$src, (i32 0x4))>;
def : Pat<(v16f32 (ftrunc VR512:$src)),
(VRNDSCALEPSZr VR512:$src, (i32 0x3))>;
def : Pat<(v8f64 (ffloor VR512:$src)),
(VRNDSCALEPDZr VR512:$src, (i32 0x1))>;
def : Pat<(v8f64 (fnearbyint VR512:$src)),
(VRNDSCALEPDZr VR512:$src, (i32 0xC))>;
def : Pat<(v8f64 (fceil VR512:$src)),
(VRNDSCALEPDZr VR512:$src, (i32 0x2))>;
def : Pat<(v8f64 (frint VR512:$src)),
(VRNDSCALEPDZr VR512:$src, (i32 0x4))>;
def : Pat<(v8f64 (ftrunc VR512:$src)),
(VRNDSCALEPDZr VR512:$src, (i32 0x3))>;
}
//-------------------------------------------------
// Integer truncate and extend operations
//-------------------------------------------------
@ -6321,6 +6278,62 @@ defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", avx512vl_f32_info>,
defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>,
EVEX, VEX_W;
//handle instruction reg_vec1 = op(reg_vec,imm)
// op(mem_vec,imm)
// op(broadcast(eltVt),imm)
//all instruction created with FROUND_CURRENT
multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _>{
defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix, "$src2, $src1", "$src2, $src2",
(OpNode (_.VT _.RC:$src1),
(i32 imm:$src2),
(i32 FROUND_CURRENT))>;
let mayLoad = 1 in {
defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
(OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
(i32 imm:$src2),
(i32 FROUND_CURRENT))>;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
"${src1}"##_.BroadcastStr##", $src2",
(OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
(i32 imm:$src2),
(i32 FROUND_CURRENT))>, EVEX_B;
}
}
//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _>{
defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix, "$src2,{sae}, $src1",
"$src1, {sae}, $src2",
(OpNode (_.VT _.RC:$src1),
(i32 imm:$src2),
(i32 FROUND_NO_EXC))>, EVEX_B;
}
multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{
let Predicates = [prd] in {
defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
EVEX_V512;
}
let Predicates = [prd, HasVLX] in {
defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info128>,
EVEX_V128;
defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info256>,
EVEX_V256;
}
}
//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
// op(reg_vec2,mem_vec,imm)
// op(reg_vec2,broadcast(eltVt),imm)
@ -6328,27 +6341,27 @@ defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>,
multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _>{
defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(i8 imm:$src3),
(i32 imm:$src3),
(i32 FROUND_CURRENT))>;
let mayLoad = 1 in {
defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
(ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))),
(i8 imm:$src3),
(i32 imm:$src3),
(i32 FROUND_CURRENT))>;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
(ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr##", $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
(i8 imm:$src3),
(i32 imm:$src3),
(i32 FROUND_CURRENT))>, EVEX_B;
}
}
@ -6388,20 +6401,20 @@ multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(i8 imm:$src3),
(i32 imm:$src3),
(i32 FROUND_CURRENT))>;
let mayLoad = 1 in {
defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
(ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT (scalar_to_vector
(_.ScalarLdFrag addr:$src2))),
(i8 imm:$src3),
(i32 imm:$src3),
(i32 FROUND_CURRENT))>;
let isAsmParserOnly = 1 in {
@ -6417,18 +6430,25 @@ multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _>{
defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpcodeStr, "$src3,{sae}, $src2, $src1",
"$src1, $src2,{sae}, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(i8 imm:$src3),
(i32 imm:$src3),
(i32 FROUND_NO_EXC))>, EVEX_B;
}
//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _> {
defm NAME: avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _>;
defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpcodeStr, "$src3,{sae}, $src2, $src1",
"$src1, $src2,{sae}, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(i32 imm:$src3),
(i32 FROUND_NO_EXC))>, EVEX_B;
}
multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
@ -6466,6 +6486,14 @@ multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
}
}
multiclass avx512_common_fp_sae_packed_imm_all<string OpcodeStr, bits<8> opcPs,
bits<8> opcPd, SDNode OpNode, Predicate prd>{
defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info, opcPs,
OpNode, prd>, EVEX_CD8<32, CD8VF>;
defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info, opcPd,
OpNode, prd>,EVEX_CD8<64, CD8VF> , VEX_W;
}
defm VFIXUPIMMPD : avx512_common_fp_sae_packed_imm<"vfixupimmpd",
avx512vl_f64_info, 0x54, X86VFixupimm, HasAVX512>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
@ -6480,6 +6508,9 @@ defm VFIXUPIMMSS: avx512_common_fp_sae_scalar_imm<"vfixupimmss", f32x_info,
0x55, X86VFixupimm, HasAVX512>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
defm VREDUCE : avx512_common_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, X86VReduce, HasDQI>,AVX512AIi8Base,EVEX;
defm VRNDSCALE : avx512_common_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, X86VRndScale, HasAVX512>,AVX512AIi8Base, EVEX;
defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
0x50, X86VRange, HasDQI>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
@ -6494,6 +6525,12 @@ defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
0x51, X86VRange, HasDQI>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
0x57, X86Reduces, HasDQI>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
0x57, X86Reduces, HasDQI>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
multiclass avx512_shuff_packed_128<string OpcodeStr, AVX512VLVectorVTInfo _,
bits<8> opc, SDNode OpNode = X86Shuf128>{
@ -6505,6 +6542,29 @@ multiclass avx512_shuff_packed_128<string OpcodeStr, AVX512VLVectorVTInfo _,
defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
}
}
let Predicates = [HasAVX512] in {
def : Pat<(v16f32 (ffloor VR512:$src)),
(VRNDSCALEPSZrri VR512:$src, (i32 0x1))>;
def : Pat<(v16f32 (fnearbyint VR512:$src)),
(VRNDSCALEPSZrri VR512:$src, (i32 0xC))>;
def : Pat<(v16f32 (fceil VR512:$src)),
(VRNDSCALEPSZrri VR512:$src, (i32 0x2))>;
def : Pat<(v16f32 (frint VR512:$src)),
(VRNDSCALEPSZrri VR512:$src, (i32 0x4))>;
def : Pat<(v16f32 (ftrunc VR512:$src)),
(VRNDSCALEPSZrri VR512:$src, (i32 0x3))>;
def : Pat<(v8f64 (ffloor VR512:$src)),
(VRNDSCALEPDZrri VR512:$src, (i32 0x1))>;
def : Pat<(v8f64 (fnearbyint VR512:$src)),
(VRNDSCALEPDZrri VR512:$src, (i32 0xC))>;
def : Pat<(v8f64 (fceil VR512:$src)),
(VRNDSCALEPDZrri VR512:$src, (i32 0x2))>;
def : Pat<(v8f64 (frint VR512:$src)),
(VRNDSCALEPDZrri VR512:$src, (i32 0x4))>;
def : Pat<(v8f64 (ftrunc VR512:$src)),
(VRNDSCALEPDZrri VR512:$src, (i32 0x3))>;
}
defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;

View File

@ -232,6 +232,8 @@ def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisInt<3>]>;
def SDTFPBinOpImmRound: SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisInt<3>, SDTCisInt<4>]>;
def SDTFPUnaryOpImmRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisInt<2>, SDTCisInt<3>]>;
def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
def SDTVBroadcastm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>;
@ -302,6 +304,8 @@ def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
def X86VFixupimm : SDNode<"X86ISD::VFIXUPIMM", SDTFPBinOpImmRound>;
def X86VRange : SDNode<"X86ISD::VRANGE", SDTFPBinOpImmRound>;
def X86VReduce : SDNode<"X86ISD::VREDUCE", SDTFPUnaryOpImmRound>;
def X86VRndScale : SDNode<"X86ISD::VRNDSCALE", SDTFPUnaryOpImmRound>;
def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST",
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
@ -346,7 +350,8 @@ def X86exp2 : SDNode<"X86ISD::EXP2", STDFp1SrcRm>;
def X86rsqrt28s : SDNode<"X86ISD::RSQRT28", STDFp2SrcRm>;
def X86rcp28s : SDNode<"X86ISD::RCP28", STDFp2SrcRm>;
def X86RndScale : SDNode<"X86ISD::RNDSCALE", STDFp3SrcRm>;
def X86RndScales : SDNode<"X86ISD::VRNDSCALE", STDFp3SrcRm>;
def X86Reduces : SDNode<"X86ISD::VREDUCE", STDFp3SrcRm>;
def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>,

View File

@ -22,7 +22,7 @@ enum IntrinsicType {
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP,
CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
INTR_TYPE_3OP_MASK, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
VPERM_3OP_MASKZ,
INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
EXPAND_FROM_MEM, BLEND
@ -903,10 +903,32 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_pxor_q_128, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
X86_INTRINSIC_DATA(avx512_mask_pxor_q_256, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
X86_INTRINSIC_DATA(avx512_mask_pxor_q_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
X86_INTRINSIC_DATA(avx512_mask_range_pd_128, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
X86_INTRINSIC_DATA(avx512_mask_range_pd_256, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
X86_INTRINSIC_DATA(avx512_mask_range_pd_512, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
X86_INTRINSIC_DATA(avx512_mask_range_ps_128, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
X86_INTRINSIC_DATA(avx512_mask_range_ps_256, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
X86_INTRINSIC_DATA(avx512_mask_range_ps_512, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
X86_INTRINSIC_DATA(avx512_mask_range_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VRANGE, 0),
X86_INTRINSIC_DATA(avx512_mask_range_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VRANGE, 0),
X86_INTRINSIC_DATA(avx512_mask_reduce_pd_128, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
X86_INTRINSIC_DATA(avx512_mask_reduce_pd_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
X86_INTRINSIC_DATA(avx512_mask_reduce_pd_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
X86_INTRINSIC_DATA(avx512_mask_reduce_ps_128, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
X86_INTRINSIC_DATA(avx512_mask_reduce_ps_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
X86_INTRINSIC_DATA(avx512_mask_reduce_ps_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
X86_INTRINSIC_DATA(avx512_mask_reduce_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VREDUCE, 0),
X86_INTRINSIC_DATA(avx512_mask_reduce_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VREDUCE, 0),
X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_128, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_128, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
X86_INTRINSIC_DATA(avx512_mask_rndscale_sd, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::RNDSCALE, 0),
X86ISD::VRNDSCALE, 0),
X86_INTRINSIC_DATA(avx512_mask_rndscale_ss, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::RNDSCALE, 0),
X86ISD::VRNDSCALE, 0),
X86_INTRINSIC_DATA(avx512_mask_scalef_pd_128, INTR_TYPE_2OP_MASK_RM,
X86ISD::SCALEF, 0),
X86_INTRINSIC_DATA(avx512_mask_scalef_pd_256, INTR_TYPE_2OP_MASK_RM,
@ -919,6 +941,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::SCALEF, 0),
X86_INTRINSIC_DATA(avx512_mask_scalef_ps_512, INTR_TYPE_2OP_MASK_RM,
X86ISD::SCALEF, 0),
X86_INTRINSIC_DATA(avx512_mask_scalef_sd, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::SCALEF, 0),
X86_INTRINSIC_DATA(avx512_mask_scalef_ss, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::SCALEF, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT,

View File

@ -3352,3 +3352,29 @@ define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_512(<16 x float> %x0, <16
ret <16 x i32> %res2
}
declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32)
; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ss
; CHECK-NOT: call
; CHECK: kmov
; CHECK: vscalefss {{.*}}{%k1}
; CHECK: vscalefss {rn-sae}
define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
%res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4)
%res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 8)
%res2 = fadd <4 x float> %res, %res1
ret <4 x float> %res2
}
declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32)
; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_sd
; CHECK-NOT: call
; CHECK: kmov
; CHECK: vscalefsd {{.*}}{%k1}
; CHECK: vscalefsd {rn-sae}
define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
%res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4)
%res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 8)
%res2 = fadd <2 x double> %res, %res1
ret <2 x double> %res2
}

View File

@ -1,3 +1,4 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s
declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double>, <8 x i64>, i8, i32)
@ -192,3 +193,125 @@ define <8 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_512(<8 x i64> %x0, <8 x f
ret <8 x float> %res2
}
declare <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_pd_512
; CHECK-NOT: call
; CHECK: kmov
; CHECK: vreducepd {{.*}}{%k1}
; CHECK: vreducepd
; CHECK: {sae}
define <8 x double>@test_int_x86_avx512_mask_reduce_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
%res = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 8, <8 x double> %x2, i8 %x3, i32 4)
%res1 = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 4, <8 x double> %x2, i8 -1, i32 8)
%res2 = fadd <8 x double> %res, %res1
ret <8 x double> %res2
}
declare <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ps_512
; CHECK-NOT: call
; CHECK: kmov
; CHECK: vreduceps
; CHECK: {sae}
; CKECK: {%k1}
; CHECK: vreduceps
define <16 x float>@test_int_x86_avx512_mask_reduce_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
%res = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 44, <16 x float> %x2, i16 %x3, i32 8)
%res1 = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 4)
%res2 = fadd <16 x float> %res, %res1
ret <16 x float> %res2
}
declare <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8, i32)
; CHECK-LABEL: @test_int_x86_avx512_mask_range_pd_512
; CHECK-NOT: call
; CHECK: kmov
; CHECK: vrangepd
; CKECK: {%k1}
; CHECK: vrangepd
; CHECK: {sae}
define <8 x double>@test_int_x86_avx512_mask_range_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
%res = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 8, <8 x double> %x3, i8 %x4, i32 4)
%res1 = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 4, <8 x double> %x3, i8 -1, i32 8)
%res2 = fadd <8 x double> %res, %res1
ret <8 x double> %res2
}
declare <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16, i32)
; CHECK-LABEL: @test_int_x86_avx512_mask_range_ps_512
; CHECK-NOT: call
; CHECK: kmov
; CHECK: vrangeps
; CKECK: {%k1}
; CHECK: vrangeps
; CHECK: {sae}
define <16 x float>@test_int_x86_avx512_mask_range_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
%res = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 88, <16 x float> %x3, i16 %x4, i32 4)
%res1 = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 4, <16 x float> %x3, i16 -1, i32 8)
%res2 = fadd <16 x float> %res, %res1
ret <16 x float> %res2
}
declare <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32)
; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ss
; CHECK-NOT: call
; CHECK: kmov
; CHECK: vreducess
; CKECK: {%k1}
; CHECK: vreducess
; CHECK: {sae}
define <4 x float>@test_int_x86_avx512_mask_reduce_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
%res = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 4)
%res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8)
%res2 = fadd <4 x float> %res, %res1
ret <4 x float> %res2
}
declare <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32)
; CHECK-LABEL: @test_int_x86_avx512_mask_range_ss
; CHECK-NOT: call
; CHECK: kmov
; CHECK: vrangess
; CHECK: {sae}
; CKECK: {%k1}
; CHECK: vrangess
; CHECK: {sae}
define <4 x float>@test_int_x86_avx512_mask_range_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
%res = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 8)
%res1 = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8)
%res2 = fadd <4 x float> %res, %res1
ret <4 x float> %res2
}
declare <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32)
; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_sd
; CHECK-NOT: call
; CHECK: kmov
; CHECK: vreducesd
; CKECK: {%k1}
; CHECK: vreducesd
; CHECK: {sae}
define <2 x double>@test_int_x86_avx512_mask_reduce_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
%res = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4)
%res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8)
%res2 = fadd <2 x double> %res, %res1
ret <2 x double> %res2
}
declare <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32)
; CHECK-LABEL: @test_int_x86_avx512_mask_range_sd
; CHECK-NOT: call
; CHECK: kmov
; CHECK: vrangesd
; CKECK: {%k1}
; CHECK: vrangesd
; CHECK: {sae}
define <2 x double>@test_int_x86_avx512_mask_range_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
%res = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4)
%res1 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8)
%res2 = fadd <2 x double> %res, %res1
ret <2 x double> %res2
}

View File

@ -1537,3 +1537,114 @@ define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_256(<4 x float> %x0, <4 x
%res2 = add <4 x i64> %res, %res1
ret <4 x i64> %res2
}
declare <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double>, i32, <2 x double>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_pd_128
; CHECK-NOT: call
; CHECK: kmov
; CHECK: vreducepd {{.*}}{%k1}
; CHECK: vreducepd
define <2 x double>@test_int_x86_avx512_mask_reduce_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
%res = call <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3)
%res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double> %x0, i32 8, <2 x double> %x2, i8 -1)
%res2 = fadd <2 x double> %res, %res1
ret <2 x double> %res2
}
declare <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double>, i32, <4 x double>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_pd_256
; CHECK-NOT: call
; CHECK: kmov
; CHECK: vreducepd {{.*}}{%k1}
; CHECK: vreducepd
define <4 x double>@test_int_x86_avx512_mask_reduce_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
%res = call <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3)
%res1 = call <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double> %x0, i32 0, <4 x double> %x2, i8 -1)
%res2 = fadd <4 x double> %res, %res1
ret <4 x double> %res2
}
declare <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float>, i32, <4 x float>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ps_128
; CHECK-NOT: call
; CHECK: kmov
; CHECK: vreduceps {{.*}}{%k1}
; CHECK: vreduceps
define <4 x float>@test_int_x86_avx512_mask_reduce_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
%res = call <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 %x3)
%res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 -1)
%res2 = fadd <4 x float> %res, %res1
ret <4 x float> %res2
}
declare <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float>, i32, <8 x float>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ps_256
; CHECK-NOT: call
; CHECK: kmov
; CHECK: vreduceps {{.*}}{%k1}
; CHECK: vreduceps
define <8 x float>@test_int_x86_avx512_mask_reduce_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
%res = call <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 %x3)
%res1 = call <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 -1)
%res2 = fadd <8 x float> %res, %res1
ret <8 x float> %res2
}
declare <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_range_pd_128
; CHECK-NOT: call
; CHECK: kmov
; CHECK: vrangepd {{.*}}{%k1}
; CHECK: vrangepd
define <2 x double>@test_int_x86_avx512_mask_range_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
%res = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %x0, <2 x double> %x1, i32 4, <2 x double> %x3, i8 %x4)
%res1 = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %x0, <2 x double> %x1, i32 8, <2 x double> %x3, i8 -1)
%res2 = fadd <2 x double> %res, %res1
ret <2 x double> %res2
}
declare <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_range_pd_256
; CHECK-NOT: call
; CHECK: kmov
; CHECK: vrangepd {{.*}}{%k1}
; CHECK: vrangepd
define <4 x double>@test_int_x86_avx512_mask_range_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
%res = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %x0, <4 x double> %x1, i32 4, <4 x double> %x3, i8 %x4)
%res1 = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %x0, <4 x double> %x1, i32 88, <4 x double> %x3, i8 -1)
%res2 = fadd <4 x double> %res, %res1
ret <4 x double> %res2
}
declare <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_range_ps_128
; CHECK-NOT: call
; CHECK: kmov
; CHECK: vrangeps {{.*}}{%k1}
; CHECK: vrangeps
define <4 x float>@test_int_x86_avx512_mask_range_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
%res = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %x0, <4 x float> %x1, i32 4, <4 x float> %x3, i8 %x4)
%res1 = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %x0, <4 x float> %x1, i32 88, <4 x float> %x3, i8 -1)
%res2 = fadd <4 x float> %res, %res1
ret <4 x float> %res2
}
declare <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_range_ps_256
; CHECK-NOT: call
; CHECK: kmov
; CHECK: vrangeps {{.*}}{%k1}
; CHECK: vrangeps
define <8 x float>@test_int_x86_avx512_mask_range_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
%res = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %x0, <8 x float> %x1, i32 4, <8 x float> %x3, i8 %x4)
%res1 = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %x0, <8 x float> %x1, i32 88, <8 x float> %x3, i8 -1)
%res2 = fadd <8 x float> %res, %res1
ret <8 x float> %res2
}

View File

@ -3481,3 +3481,55 @@ define <8 x float>@test_int_x86_avx512_mask_cvt_udq2ps_256(<8 x i32> %x0, <8 x f
ret <8 x float> %res2
}
declare <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double>, i32, <2 x double>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_pd_128
; CHECK-NOT: call
; CHECK: kmov
; CHECK: vrndscalepd {{.*}}{%k1}
; CHECK: vrndscalepd
define <2 x double>@test_int_x86_avx512_mask_rndscale_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
%res = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3)
%res1 = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 88, <2 x double> %x2, i8 -1)
%res2 = fadd <2 x double> %res, %res1
ret <2 x double> %res2
}
declare <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double>, i32, <4 x double>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_pd_256
; CHECK-NOT: call
; CHECK: kmov
; CHECK: vrndscalepd {{.*}}{%k1}
; CHECK: vrndscalepd
define <4 x double>@test_int_x86_avx512_mask_rndscale_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
%res = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3)
%res1 = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 88, <4 x double> %x2, i8 -1)
%res2 = fadd <4 x double> %res, %res1
ret <4 x double> %res2
}
declare <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float>, i32, <4 x float>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_ps_128
; CHECK-NOT: call
; CHECK: kmov
; CHECK: vrndscaleps {{.*}}{%k1}
; CHECK: vrndscaleps
define <4 x float>@test_int_x86_avx512_mask_rndscale_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
%res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 %x3)
%res1 = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 -1)
%res2 = fadd <4 x float> %res, %res1
ret <4 x float> %res2
}
declare <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float>, i32, <8 x float>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_ps_256
; CHECK-NOT: call
; CHECK: kmov
; CHECK: vrndscaleps {{.*}}{%k1}
; CHECK: vrndscaleps
define <8 x float>@test_int_x86_avx512_mask_rndscale_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
%res = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 5, <8 x float> %x2, i8 %x3)
%res1 = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 66, <8 x float> %x2, i8 -1)
%res2 = fadd <8 x float> %res, %res1
ret <8 x float> %res2
}

View File

@ -12846,6 +12846,342 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0xf1,0xfd,0x58,0x5a,0xaa,0xf8,0xfb,0xff,0xff]
vcvtpd2ps -1032(%rdx){1to8}, %ymm5
// CHECK: vscalefsd %xmm21, %xmm22, %xmm21
// CHECK: encoding: [0x62,0xa2,0xcd,0x00,0x2d,0xed]
vscalefsd %xmm21, %xmm22, %xmm21
// CHECK: vscalefsd %xmm21, %xmm22, %xmm21 {%k2}
// CHECK: encoding: [0x62,0xa2,0xcd,0x02,0x2d,0xed]
vscalefsd %xmm21, %xmm22, %xmm21 {%k2}
// CHECK: vscalefsd %xmm21, %xmm22, %xmm21 {%k2} {z}
// CHECK: encoding: [0x62,0xa2,0xcd,0x82,0x2d,0xed]
vscalefsd %xmm21, %xmm22, %xmm21 {%k2} {z}
// CHECK: vscalefsd {rn-sae}, %xmm21, %xmm22, %xmm21
// CHECK: encoding: [0x62,0xa2,0xcd,0x10,0x2d,0xed]
vscalefsd {rn-sae}, %xmm21, %xmm22, %xmm21
// CHECK: vscalefsd {ru-sae}, %xmm21, %xmm22, %xmm21
// CHECK: encoding: [0x62,0xa2,0xcd,0x50,0x2d,0xed]
vscalefsd {ru-sae}, %xmm21, %xmm22, %xmm21
// CHECK: vscalefsd {rd-sae}, %xmm21, %xmm22, %xmm21
// CHECK: encoding: [0x62,0xa2,0xcd,0x30,0x2d,0xed]
vscalefsd {rd-sae}, %xmm21, %xmm22, %xmm21
// CHECK: vscalefsd {rz-sae}, %xmm21, %xmm22, %xmm21
// CHECK: encoding: [0x62,0xa2,0xcd,0x70,0x2d,0xed]
vscalefsd {rz-sae}, %xmm21, %xmm22, %xmm21
// CHECK: vscalefsd (%rcx), %xmm22, %xmm21
// CHECK: encoding: [0x62,0xe2,0xcd,0x00,0x2d,0x29]
vscalefsd (%rcx), %xmm22, %xmm21
// CHECK: vscalefsd 291(%rax,%r14,8), %xmm22, %xmm21
// CHECK: encoding: [0x62,0xa2,0xcd,0x00,0x2d,0xac,0xf0,0x23,0x01,0x00,0x00]
vscalefsd 291(%rax,%r14,8), %xmm22, %xmm21
// CHECK: vscalefsd 1016(%rdx), %xmm22, %xmm21
// CHECK: encoding: [0x62,0xe2,0xcd,0x00,0x2d,0x6a,0x7f]
vscalefsd 1016(%rdx), %xmm22, %xmm21
// CHECK: vscalefsd 1024(%rdx), %xmm22, %xmm21
// CHECK: encoding: [0x62,0xe2,0xcd,0x00,0x2d,0xaa,0x00,0x04,0x00,0x00]
vscalefsd 1024(%rdx), %xmm22, %xmm21
// CHECK: vscalefsd -1024(%rdx), %xmm22, %xmm21
// CHECK: encoding: [0x62,0xe2,0xcd,0x00,0x2d,0x6a,0x80]
vscalefsd -1024(%rdx), %xmm22, %xmm21
// CHECK: vscalefsd -1032(%rdx), %xmm22, %xmm21
// CHECK: encoding: [0x62,0xe2,0xcd,0x00,0x2d,0xaa,0xf8,0xfb,0xff,0xff]
vscalefsd -1032(%rdx), %xmm22, %xmm21
// CHECK: vscalefss %xmm23, %xmm15, %xmm13
// CHECK: encoding: [0x62,0x32,0x05,0x08,0x2d,0xef]
vscalefss %xmm23, %xmm15, %xmm13
// CHECK: vscalefss %xmm23, %xmm15, %xmm13 {%k3}
// CHECK: encoding: [0x62,0x32,0x05,0x0b,0x2d,0xef]
vscalefss %xmm23, %xmm15, %xmm13 {%k3}
// CHECK: vscalefss %xmm23, %xmm15, %xmm13 {%k3} {z}
// CHECK: encoding: [0x62,0x32,0x05,0x8b,0x2d,0xef]
vscalefss %xmm23, %xmm15, %xmm13 {%k3} {z}
// CHECK: vscalefss {rn-sae}, %xmm23, %xmm15, %xmm13
// CHECK: encoding: [0x62,0x32,0x05,0x18,0x2d,0xef]
vscalefss {rn-sae}, %xmm23, %xmm15, %xmm13
// CHECK: vscalefss {ru-sae}, %xmm23, %xmm15, %xmm13
// CHECK: encoding: [0x62,0x32,0x05,0x58,0x2d,0xef]
vscalefss {ru-sae}, %xmm23, %xmm15, %xmm13
// CHECK: vscalefss {rd-sae}, %xmm23, %xmm15, %xmm13
// CHECK: encoding: [0x62,0x32,0x05,0x38,0x2d,0xef]
vscalefss {rd-sae}, %xmm23, %xmm15, %xmm13
// CHECK: vscalefss {rz-sae}, %xmm23, %xmm15, %xmm13
// CHECK: encoding: [0x62,0x32,0x05,0x78,0x2d,0xef]
vscalefss {rz-sae}, %xmm23, %xmm15, %xmm13
// CHECK: vscalefss (%rcx), %xmm15, %xmm13
// CHECK: encoding: [0x62,0x72,0x05,0x08,0x2d,0x29]
vscalefss (%rcx), %xmm15, %xmm13
// CHECK: vscalefss 291(%rax,%r14,8), %xmm15, %xmm13
// CHECK: encoding: [0x62,0x32,0x05,0x08,0x2d,0xac,0xf0,0x23,0x01,0x00,0x00]
vscalefss 291(%rax,%r14,8), %xmm15, %xmm13
// CHECK: vscalefss 508(%rdx), %xmm15, %xmm13
// CHECK: encoding: [0x62,0x72,0x05,0x08,0x2d,0x6a,0x7f]
vscalefss 508(%rdx), %xmm15, %xmm13
// CHECK: vscalefss 512(%rdx), %xmm15, %xmm13
// CHECK: encoding: [0x62,0x72,0x05,0x08,0x2d,0xaa,0x00,0x02,0x00,0x00]
vscalefss 512(%rdx), %xmm15, %xmm13
// CHECK: vscalefss -512(%rdx), %xmm15, %xmm13
// CHECK: encoding: [0x62,0x72,0x05,0x08,0x2d,0x6a,0x80]
vscalefss -512(%rdx), %xmm15, %xmm13
// CHECK: vscalefss -516(%rdx), %xmm15, %xmm13
// CHECK: encoding: [0x62,0x72,0x05,0x08,0x2d,0xaa,0xfc,0xfd,0xff,0xff]
vscalefss -516(%rdx), %xmm15, %xmm13
// CHECK: vrndscalepd $171, %zmm7, %zmm22
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x09,0xf7,0xab]
vrndscalepd $0xab, %zmm7, %zmm22
// CHECK: vrndscalepd $171, %zmm7, %zmm22 {%k1}
// CHECK: encoding: [0x62,0xe3,0xfd,0x49,0x09,0xf7,0xab]
vrndscalepd $0xab, %zmm7, %zmm22 {%k1}
// CHECK: vrndscalepd $171, %zmm7, %zmm22 {%k1} {z}
// CHECK: encoding: [0x62,0xe3,0xfd,0xc9,0x09,0xf7,0xab]
vrndscalepd $0xab, %zmm7, %zmm22 {%k1} {z}
// CHECK: vrndscalepd $171,{sae}, %zmm7, %zmm22
// CHECK: encoding: [0x62,0xe3,0xfd,0x18,0x09,0xf7,0xab]
vrndscalepd $0xab,{sae}, %zmm7, %zmm22
// CHECK: vrndscalepd $123, %zmm7, %zmm22
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x09,0xf7,0x7b]
vrndscalepd $0x7b, %zmm7, %zmm22
// CHECK: vrndscalepd $123,{sae}, %zmm7, %zmm22
// CHECK: encoding: [0x62,0xe3,0xfd,0x18,0x09,0xf7,0x7b]
vrndscalepd $0x7b,{sae}, %zmm7, %zmm22
// CHECK: vrndscalepd $123, (%rcx), %zmm22
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x09,0x31,0x7b]
vrndscalepd $0x7b, (%rcx), %zmm22
// CHECK: vrndscalepd $123, 291(%rax,%r14,8), %zmm22
// CHECK: encoding: [0x62,0xa3,0xfd,0x48,0x09,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
vrndscalepd $0x7b, 291(%rax,%r14,8), %zmm22
// CHECK: vrndscalepd $123, (%rcx){1to8}, %zmm22
// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x09,0x31,0x7b]
vrndscalepd $0x7b, (%rcx){1to8}, %zmm22
// CHECK: vrndscalepd $123, 8128(%rdx), %zmm22
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x09,0x72,0x7f,0x7b]
vrndscalepd $0x7b, 8128(%rdx), %zmm22
// CHECK: vrndscalepd $123, 8192(%rdx), %zmm22
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x09,0xb2,0x00,0x20,0x00,0x00,0x7b]
vrndscalepd $0x7b, 8192(%rdx), %zmm22
// CHECK: vrndscalepd $123, -8192(%rdx), %zmm22
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x09,0x72,0x80,0x7b]
vrndscalepd $0x7b, -8192(%rdx), %zmm22
// CHECK: vrndscalepd $123, -8256(%rdx), %zmm22
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x09,0xb2,0xc0,0xdf,0xff,0xff,0x7b]
vrndscalepd $0x7b, -8256(%rdx), %zmm22
// CHECK: vrndscalepd $123, 1016(%rdx){1to8}, %zmm22
// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x09,0x72,0x7f,0x7b]
vrndscalepd $0x7b, 1016(%rdx){1to8}, %zmm22
// CHECK: vrndscalepd $123, 1024(%rdx){1to8}, %zmm22
// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x09,0xb2,0x00,0x04,0x00,0x00,0x7b]
vrndscalepd $0x7b, 1024(%rdx){1to8}, %zmm22
// CHECK: vrndscalepd $123, -1024(%rdx){1to8}, %zmm22
// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x09,0x72,0x80,0x7b]
vrndscalepd $0x7b, -1024(%rdx){1to8}, %zmm22
// CHECK: vrndscalepd $123, -1032(%rdx){1to8}, %zmm22
// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x09,0xb2,0xf8,0xfb,0xff,0xff,0x7b]
vrndscalepd $0x7b, -1032(%rdx){1to8}, %zmm22
// CHECK: vrndscaleps $171, %zmm7, %zmm13
// CHECK: encoding: [0x62,0x73,0x7d,0x48,0x08,0xef,0xab]
vrndscaleps $0xab, %zmm7, %zmm13
// CHECK: vrndscaleps $171, %zmm7, %zmm13 {%k1}
// CHECK: encoding: [0x62,0x73,0x7d,0x49,0x08,0xef,0xab]
vrndscaleps $0xab, %zmm7, %zmm13 {%k1}
// CHECK: vrndscaleps $171, %zmm7, %zmm13 {%k1} {z}
// CHECK: encoding: [0x62,0x73,0x7d,0xc9,0x08,0xef,0xab]
vrndscaleps $0xab, %zmm7, %zmm13 {%k1} {z}
// CHECK: vrndscaleps $171,{sae}, %zmm7, %zmm13
// CHECK: encoding: [0x62,0x73,0x7d,0x18,0x08,0xef,0xab]
vrndscaleps $0xab,{sae}, %zmm7, %zmm13
// CHECK: vrndscaleps $123, %zmm7, %zmm13
// CHECK: encoding: [0x62,0x73,0x7d,0x48,0x08,0xef,0x7b]
vrndscaleps $0x7b, %zmm7, %zmm13
// CHECK: vrndscaleps $123,{sae}, %zmm7, %zmm13
// CHECK: encoding: [0x62,0x73,0x7d,0x18,0x08,0xef,0x7b]
vrndscaleps $0x7b,{sae}, %zmm7, %zmm13
// CHECK: vrndscaleps $123, (%rcx), %zmm13
// CHECK: encoding: [0x62,0x73,0x7d,0x48,0x08,0x29,0x7b]
vrndscaleps $0x7b, (%rcx), %zmm13
// CHECK: vrndscaleps $123, 291(%rax,%r14,8), %zmm13
// CHECK: encoding: [0x62,0x33,0x7d,0x48,0x08,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
vrndscaleps $0x7b, 291(%rax,%r14,8), %zmm13
// CHECK: vrndscaleps $123, (%rcx){1to16}, %zmm13
// CHECK: encoding: [0x62,0x73,0x7d,0x58,0x08,0x29,0x7b]
vrndscaleps $0x7b, (%rcx){1to16}, %zmm13
// CHECK: vrndscaleps $123, 8128(%rdx), %zmm13
// CHECK: encoding: [0x62,0x73,0x7d,0x48,0x08,0x6a,0x7f,0x7b]
vrndscaleps $0x7b, 8128(%rdx), %zmm13
// CHECK: vrndscaleps $123, 8192(%rdx), %zmm13
// CHECK: encoding: [0x62,0x73,0x7d,0x48,0x08,0xaa,0x00,0x20,0x00,0x00,0x7b]
vrndscaleps $0x7b, 8192(%rdx), %zmm13
// CHECK: vrndscaleps $123, -8192(%rdx), %zmm13
// CHECK: encoding: [0x62,0x73,0x7d,0x48,0x08,0x6a,0x80,0x7b]
vrndscaleps $0x7b, -8192(%rdx), %zmm13
// CHECK: vrndscaleps $123, -8256(%rdx), %zmm13
// CHECK: encoding: [0x62,0x73,0x7d,0x48,0x08,0xaa,0xc0,0xdf,0xff,0xff,0x7b]
vrndscaleps $0x7b, -8256(%rdx), %zmm13
// CHECK: vrndscaleps $123, 508(%rdx){1to16}, %zmm13
// CHECK: encoding: [0x62,0x73,0x7d,0x58,0x08,0x6a,0x7f,0x7b]
vrndscaleps $0x7b, 508(%rdx){1to16}, %zmm13
// CHECK: vrndscaleps $123, 512(%rdx){1to16}, %zmm13
// CHECK: encoding: [0x62,0x73,0x7d,0x58,0x08,0xaa,0x00,0x02,0x00,0x00,0x7b]
vrndscaleps $0x7b, 512(%rdx){1to16}, %zmm13
// CHECK: vrndscaleps $123, -512(%rdx){1to16}, %zmm13
// CHECK: encoding: [0x62,0x73,0x7d,0x58,0x08,0x6a,0x80,0x7b]
vrndscaleps $0x7b, -512(%rdx){1to16}, %zmm13
// CHECK: vrndscaleps $123, -516(%rdx){1to16}, %zmm13
// CHECK: encoding: [0x62,0x73,0x7d,0x58,0x08,0xaa,0xfc,0xfd,0xff,0xff,0x7b]
vrndscaleps $0x7b, -516(%rdx){1to16}, %zmm13
// CHECK: vrndscalesd $171, %xmm15, %xmm12, %xmm25
// CHECK: encoding: [0x62,0x43,0x9d,0x08,0x0b,0xcf,0xab]
vrndscalesd $0xab, %xmm15, %xmm12, %xmm25
// CHECK: vrndscalesd $171, %xmm15, %xmm12, %xmm25 {%k6}
// CHECK: encoding: [0x62,0x43,0x9d,0x0e,0x0b,0xcf,0xab]
vrndscalesd $0xab, %xmm15, %xmm12, %xmm25 {%k6}
// CHECK: vrndscalesd $171, %xmm15, %xmm12, %xmm25 {%k6} {z}
// CHECK: encoding: [0x62,0x43,0x9d,0x8e,0x0b,0xcf,0xab]
vrndscalesd $0xab, %xmm15, %xmm12, %xmm25 {%k6} {z}
// CHECK: vrndscalesd $171, {sae}, %xmm15, %xmm12, %xmm25
// CHECK: encoding: [0x62,0x43,0x9d,0x18,0x0b,0xcf,0xab]
vrndscalesd $0xab,{sae}, %xmm15, %xmm12, %xmm25
// CHECK: vrndscalesd $123, %xmm15, %xmm12, %xmm25
// CHECK: encoding: [0x62,0x43,0x9d,0x08,0x0b,0xcf,0x7b]
vrndscalesd $0x7b, %xmm15, %xmm12, %xmm25
// CHECK: vrndscalesd $123, {sae}, %xmm15, %xmm12, %xmm25
// CHECK: encoding: [0x62,0x43,0x9d,0x18,0x0b,0xcf,0x7b]
vrndscalesd $0x7b,{sae}, %xmm15, %xmm12, %xmm25
// CHECK: vrndscalesd $123, (%rcx), %xmm12, %xmm25
// CHECK: encoding: [0x62,0x63,0x9d,0x08,0x0b,0x09,0x7b]
vrndscalesd $0x7b, (%rcx), %xmm12, %xmm25
// CHECK: vrndscalesd $123, 291(%rax,%r14,8), %xmm12, %xmm25
// CHECK: encoding: [0x62,0x23,0x9d,0x08,0x0b,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
vrndscalesd $0x7b, 291(%rax,%r14,8), %xmm12, %xmm25
// CHECK: vrndscalesd $123, 1016(%rdx), %xmm12, %xmm25
// CHECK: encoding: [0x62,0x63,0x9d,0x08,0x0b,0x4a,0x7f,0x7b]
vrndscalesd $0x7b, 1016(%rdx), %xmm12, %xmm25
// CHECK: vrndscalesd $123, 1024(%rdx), %xmm12, %xmm25
// CHECK: encoding: [0x62,0x63,0x9d,0x08,0x0b,0x8a,0x00,0x04,0x00,0x00,0x7b]
vrndscalesd $0x7b, 1024(%rdx), %xmm12, %xmm25
// CHECK: vrndscalesd $123, -1024(%rdx), %xmm12, %xmm25
// CHECK: encoding: [0x62,0x63,0x9d,0x08,0x0b,0x4a,0x80,0x7b]
vrndscalesd $0x7b, -1024(%rdx), %xmm12, %xmm25
// CHECK: vrndscalesd $123, -1032(%rdx), %xmm12, %xmm25
// CHECK: encoding: [0x62,0x63,0x9d,0x08,0x0b,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
vrndscalesd $0x7b, -1032(%rdx), %xmm12, %xmm25
// CHECK: vrndscaless $171, %xmm17, %xmm11, %xmm11
// CHECK: encoding: [0x62,0x33,0x25,0x08,0x0a,0xd9,0xab]
vrndscaless $0xab, %xmm17, %xmm11, %xmm11
// CHECK: vrndscaless $171, %xmm17, %xmm11, %xmm11 {%k3}
// CHECK: encoding: [0x62,0x33,0x25,0x0b,0x0a,0xd9,0xab]
vrndscaless $0xab, %xmm17, %xmm11, %xmm11 {%k3}
// CHECK: vrndscaless $171, %xmm17, %xmm11, %xmm11 {%k3} {z}
// CHECK: encoding: [0x62,0x33,0x25,0x8b,0x0a,0xd9,0xab]
vrndscaless $0xab, %xmm17, %xmm11, %xmm11 {%k3} {z}
// CHECK: vrndscaless $171, {sae}, %xmm17, %xmm11, %xmm11
// CHECK: encoding: [0x62,0x33,0x25,0x18,0x0a,0xd9,0xab]
vrndscaless $0xab,{sae}, %xmm17, %xmm11, %xmm11
// CHECK: vrndscaless $123, %xmm17, %xmm11, %xmm11
// CHECK: encoding: [0x62,0x33,0x25,0x08,0x0a,0xd9,0x7b]
vrndscaless $0x7b, %xmm17, %xmm11, %xmm11
// CHECK: vrndscaless $123, {sae}, %xmm17, %xmm11, %xmm11
// CHECK: encoding: [0x62,0x33,0x25,0x18,0x0a,0xd9,0x7b]
vrndscaless $0x7b,{sae}, %xmm17, %xmm11, %xmm11
// CHECK: vrndscaless $123, (%rcx), %xmm11, %xmm11
// CHECK: encoding: [0x62,0x73,0x25,0x08,0x0a,0x19,0x7b]
vrndscaless $0x7b, (%rcx), %xmm11, %xmm11
// CHECK: vrndscaless $123, 291(%rax,%r14,8), %xmm11, %xmm11
// CHECK: encoding: [0x62,0x33,0x25,0x08,0x0a,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
vrndscaless $0x7b, 291(%rax,%r14,8), %xmm11, %xmm11
// CHECK: vrndscaless $123, 508(%rdx), %xmm11, %xmm11
// CHECK: encoding: [0x62,0x73,0x25,0x08,0x0a,0x5a,0x7f,0x7b]
vrndscaless $0x7b, 508(%rdx), %xmm11, %xmm11
// CHECK: vrndscaless $123, 512(%rdx), %xmm11, %xmm11
// CHECK: encoding: [0x62,0x73,0x25,0x08,0x0a,0x9a,0x00,0x02,0x00,0x00,0x7b]
vrndscaless $0x7b, 512(%rdx), %xmm11, %xmm11
// CHECK: vrndscaless $123, -512(%rdx), %xmm11, %xmm11
// CHECK: encoding: [0x62,0x73,0x25,0x08,0x0a,0x5a,0x80,0x7b]
vrndscaless $0x7b, -512(%rdx), %xmm11, %xmm11
// CHECK: vrndscaless $123, -516(%rdx), %xmm11, %xmm11
// CHECK: encoding: [0x62,0x73,0x25,0x08,0x0a,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
vrndscaless $0x7b, -516(%rdx), %xmm11, %xmm11
// CHECK: vfmadd132ss %xmm22, %xmm17, %xmm30
// CHECK: encoding: [0x62,0x22,0x75,0x00,0x99,0xf6]
vfmadd132ss %xmm22, %xmm17, %xmm30

View File

@ -1391,6 +1391,470 @@
// CHECK: encoding: [0x62,0x63,0x3d,0x00,0x51,0x8a,0xfc,0xfd,0xff,0xff,0x7b]
vrangess $0x7b,-516(%rdx), %xmm24, %xmm25
// CHECK: vreducepd $171, %zmm19, %zmm19
// CHECK: encoding: [0x62,0xa3,0xfd,0x48,0x56,0xdb,0xab]
vreducepd $0xab, %zmm19, %zmm19
// CHECK: vreducepd $171, %zmm19, %zmm19 {%k6}
// CHECK: encoding: [0x62,0xa3,0xfd,0x4e,0x56,0xdb,0xab]
vreducepd $0xab, %zmm19, %zmm19 {%k6}
// CHECK: vreducepd $171, %zmm19, %zmm19 {%k6} {z}
// CHECK: encoding: [0x62,0xa3,0xfd,0xce,0x56,0xdb,0xab]
vreducepd $0xab, %zmm19, %zmm19 {%k6} {z}
// CHECK: vreducepd $171,{sae}, %zmm19, %zmm19
// CHECK: encoding: [0x62,0xa3,0xfd,0x18,0x56,0xdb,0xab]
vreducepd $0xab,{sae}, %zmm19, %zmm19
// CHECK: vreducepd $123, %zmm19, %zmm19
// CHECK: encoding: [0x62,0xa3,0xfd,0x48,0x56,0xdb,0x7b]
vreducepd $0x7b, %zmm19, %zmm19
// CHECK: vreducepd $123,{sae}, %zmm19, %zmm19
// CHECK: encoding: [0x62,0xa3,0xfd,0x18,0x56,0xdb,0x7b]
vreducepd $0x7b,{sae}, %zmm19, %zmm19
// CHECK: vreducepd $123, (%rcx), %zmm19
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x19,0x7b]
vreducepd $0x7b,(%rcx), %zmm19
// CHECK: vreducepd $123, 291(%rax,%r14,8), %zmm19
// CHECK: encoding: [0x62,0xa3,0xfd,0x48,0x56,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
vreducepd $0x7b,291(%rax,%r14,8), %zmm19
// CHECK: vreducepd $123, (%rcx){1to8}, %zmm19
// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x19,0x7b]
vreducepd $0x7b,(%rcx){1to8}, %zmm19
// CHECK: vreducepd $123, 8128(%rdx), %zmm19
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x5a,0x7f,0x7b]
vreducepd $0x7b,8128(%rdx), %zmm19
// CHECK: vreducepd $123, 8192(%rdx), %zmm19
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x9a,0x00,0x20,0x00,0x00,0x7b]
vreducepd $0x7b,8192(%rdx), %zmm19
// CHECK: vreducepd $123, -8192(%rdx), %zmm19
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x5a,0x80,0x7b]
vreducepd $0x7b,-8192(%rdx), %zmm19
// CHECK: vreducepd $123, -8256(%rdx), %zmm19
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x9a,0xc0,0xdf,0xff,0xff,0x7b]
vreducepd $0x7b,-8256(%rdx), %zmm19
// CHECK: vreducepd $123, 1016(%rdx){1to8}, %zmm19
// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x5a,0x7f,0x7b]
vreducepd $0x7b,1016(%rdx){1to8}, %zmm19
// CHECK: vreducepd $123, 1024(%rdx){1to8}, %zmm19
// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x9a,0x00,0x04,0x00,0x00,0x7b]
vreducepd $0x7b,1024(%rdx){1to8}, %zmm19
// CHECK: vreducepd $123, -1024(%rdx){1to8}, %zmm19
// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x5a,0x80,0x7b]
vreducepd $0x7b,-1024(%rdx){1to8}, %zmm19
// CHECK: vreducepd $123, -1032(%rdx){1to8}, %zmm19
// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
vreducepd $0x7b,-1032(%rdx){1to8}, %zmm19
// CHECK: vreduceps $171, %zmm29, %zmm19
// CHECK: encoding: [0x62,0x83,0x7d,0x48,0x56,0xdd,0xab]
vreduceps $0xab, %zmm29, %zmm19
// CHECK: vreduceps $171, %zmm29, %zmm19 {%k3}
// CHECK: encoding: [0x62,0x83,0x7d,0x4b,0x56,0xdd,0xab]
vreduceps $0xab, %zmm29, %zmm19 {%k3}
// CHECK: vreduceps $171, %zmm29, %zmm19 {%k3} {z}
// CHECK: encoding: [0x62,0x83,0x7d,0xcb,0x56,0xdd,0xab]
vreduceps $0xab, %zmm29, %zmm19 {%k3} {z}
// CHECK: vreduceps $171,{sae}, %zmm29, %zmm19
// CHECK: encoding: [0x62,0x83,0x7d,0x18,0x56,0xdd,0xab]
vreduceps $0xab,{sae}, %zmm29, %zmm19
// CHECK: vreduceps $123, %zmm29, %zmm19
// CHECK: encoding: [0x62,0x83,0x7d,0x48,0x56,0xdd,0x7b]
vreduceps $0x7b, %zmm29, %zmm19
// CHECK: vreduceps $123,{sae}, %zmm29, %zmm19
// CHECK: encoding: [0x62,0x83,0x7d,0x18,0x56,0xdd,0x7b]
vreduceps $0x7b,{sae}, %zmm29, %zmm19
// CHECK: vreduceps $123, (%rcx), %zmm19
// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x56,0x19,0x7b]
vreduceps $0x7b,(%rcx), %zmm19
// CHECK: vreduceps $123, 291(%rax,%r14,8), %zmm19
// CHECK: encoding: [0x62,0xa3,0x7d,0x48,0x56,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
vreduceps $0x7b,291(%rax,%r14,8), %zmm19
// CHECK: vreduceps $123, (%rcx){1to16}, %zmm19
// CHECK: encoding: [0x62,0xe3,0x7d,0x58,0x56,0x19,0x7b]
vreduceps $0x7b,(%rcx){1to16}, %zmm19
// CHECK: vreduceps $123, 8128(%rdx), %zmm19
// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x56,0x5a,0x7f,0x7b]
vreduceps $0x7b,8128(%rdx), %zmm19
// CHECK: vreduceps $123, 8192(%rdx), %zmm19
// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x56,0x9a,0x00,0x20,0x00,0x00,0x7b]
vreduceps $0x7b,8192(%rdx), %zmm19
// CHECK: vreduceps $123, -8192(%rdx), %zmm19
// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x56,0x5a,0x80,0x7b]
vreduceps $0x7b,-8192(%rdx), %zmm19
// CHECK: vreduceps $123, -8256(%rdx), %zmm19
// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x56,0x9a,0xc0,0xdf,0xff,0xff,0x7b]
vreduceps $0x7b,-8256(%rdx), %zmm19
// CHECK: vreduceps $123, 508(%rdx){1to16}, %zmm19
// CHECK: encoding: [0x62,0xe3,0x7d,0x58,0x56,0x5a,0x7f,0x7b]
vreduceps $0x7b,508(%rdx){1to16}, %zmm19
// CHECK: vreduceps $123, 512(%rdx){1to16}, %zmm19
// CHECK: encoding: [0x62,0xe3,0x7d,0x58,0x56,0x9a,0x00,0x02,0x00,0x00,0x7b]
vreduceps $0x7b,512(%rdx){1to16}, %zmm19
// CHECK: vreduceps $123, -512(%rdx){1to16}, %zmm19
// CHECK: encoding: [0x62,0xe3,0x7d,0x58,0x56,0x5a,0x80,0x7b]
vreduceps $0x7b,-512(%rdx){1to16}, %zmm19
// CHECK: vreduceps $123, -516(%rdx){1to16}, %zmm19
// CHECK: encoding: [0x62,0xe3,0x7d,0x58,0x56,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
vreduceps $0x7b,-516(%rdx){1to16}, %zmm19
// CHECK: vreducesd $171, %xmm25, %xmm17, %xmm17
// CHECK: encoding: [0x62,0x83,0xf5,0x00,0x57,0xc9,0xab]
vreducesd $0xab, %xmm25, %xmm17, %xmm17
// CHECK: vreducesd $171, %xmm25, %xmm17, %xmm17 {%k6}
// CHECK: encoding: [0x62,0x83,0xf5,0x06,0x57,0xc9,0xab]
vreducesd $0xab, %xmm25, %xmm17, %xmm17 {%k6}
// CHECK: vreducesd $171, %xmm25, %xmm17, %xmm17 {%k6} {z}
// CHECK: encoding: [0x62,0x83,0xf5,0x86,0x57,0xc9,0xab]
vreducesd $0xab, %xmm25, %xmm17, %xmm17 {%k6} {z}
// CHECK: vreducesd $171,{sae}, %xmm25, %xmm17, %xmm17
// CHECK: encoding: [0x62,0x83,0xf5,0x10,0x57,0xc9,0xab]
vreducesd $0xab,{sae}, %xmm25, %xmm17, %xmm17
// CHECK: vreducesd $123, %xmm25, %xmm17, %xmm17
// CHECK: encoding: [0x62,0x83,0xf5,0x00,0x57,0xc9,0x7b]
vreducesd $0x7b, %xmm25, %xmm17, %xmm17
// CHECK: vreducesd $123,{sae}, %xmm25, %xmm17, %xmm17
// CHECK: encoding: [0x62,0x83,0xf5,0x10,0x57,0xc9,0x7b]
vreducesd $0x7b,{sae}, %xmm25, %xmm17, %xmm17
// CHECK: vreducesd $123, (%rcx), %xmm17, %xmm17
// CHECK: encoding: [0x62,0xe3,0xf5,0x00,0x57,0x09,0x7b]
vreducesd $0x7b,(%rcx), %xmm17, %xmm17
// CHECK: vreducesd $123, 291(%rax,%r14,8), %xmm17, %xmm17
// CHECK: encoding: [0x62,0xa3,0xf5,0x00,0x57,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
vreducesd $0x7b,291(%rax,%r14,8), %xmm17, %xmm17
// CHECK: vreducesd $123, 1016(%rdx), %xmm17, %xmm17
// CHECK: encoding: [0x62,0xe3,0xf5,0x00,0x57,0x4a,0x7f,0x7b]
vreducesd $0x7b,1016(%rdx), %xmm17, %xmm17
// CHECK: vreducesd $123, 1024(%rdx), %xmm17, %xmm17
// CHECK: encoding: [0x62,0xe3,0xf5,0x00,0x57,0x8a,0x00,0x04,0x00,0x00,0x7b]
vreducesd $0x7b,1024(%rdx), %xmm17, %xmm17
// CHECK: vreducesd $123, -1024(%rdx), %xmm17, %xmm17
// CHECK: encoding: [0x62,0xe3,0xf5,0x00,0x57,0x4a,0x80,0x7b]
vreducesd $0x7b,-1024(%rdx), %xmm17, %xmm17
// CHECK: vreducesd $123, -1032(%rdx), %xmm17, %xmm17
// CHECK: encoding: [0x62,0xe3,0xf5,0x00,0x57,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
vreducesd $0x7b,-1032(%rdx), %xmm17, %xmm17
// CHECK: vreducess $171, %xmm21, %xmm29, %xmm30
// CHECK: encoding: [0x62,0x23,0x15,0x00,0x57,0xf5,0xab]
vreducess $0xab, %xmm21, %xmm29, %xmm30
// CHECK: vreducess $171, %xmm21, %xmm29, %xmm30 {%k1}
// CHECK: encoding: [0x62,0x23,0x15,0x01,0x57,0xf5,0xab]
vreducess $0xab, %xmm21, %xmm29, %xmm30 {%k1}
// CHECK: vreducess $171, %xmm21, %xmm29, %xmm30 {%k1} {z}
// CHECK: encoding: [0x62,0x23,0x15,0x81,0x57,0xf5,0xab]
vreducess $0xab, %xmm21, %xmm29, %xmm30 {%k1} {z}
// CHECK: vreducess $171,{sae}, %xmm21, %xmm29, %xmm30
// CHECK: encoding: [0x62,0x23,0x15,0x10,0x57,0xf5,0xab]
vreducess $0xab,{sae}, %xmm21, %xmm29, %xmm30
// CHECK: vreducess $123, %xmm21, %xmm29, %xmm30
// CHECK: encoding: [0x62,0x23,0x15,0x00,0x57,0xf5,0x7b]
vreducess $0x7b, %xmm21, %xmm29, %xmm30
// CHECK: vreducess $123,{sae}, %xmm21, %xmm29, %xmm30
// CHECK: encoding: [0x62,0x23,0x15,0x10,0x57,0xf5,0x7b]
vreducess $0x7b,{sae}, %xmm21, %xmm29, %xmm30
// CHECK: vreducess $123, (%rcx), %xmm29, %xmm30
// CHECK: encoding: [0x62,0x63,0x15,0x00,0x57,0x31,0x7b]
vreducess $0x7b,(%rcx), %xmm29, %xmm30
// CHECK: vreducess $123, 291(%rax,%r14,8), %xmm29, %xmm30
// CHECK: encoding: [0x62,0x23,0x15,0x00,0x57,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
vreducess $0x7b,291(%rax,%r14,8), %xmm29, %xmm30
// CHECK: vreducess $123, 508(%rdx), %xmm29, %xmm30
// CHECK: encoding: [0x62,0x63,0x15,0x00,0x57,0x72,0x7f,0x7b]
vreducess $0x7b,508(%rdx), %xmm29, %xmm30
// CHECK: vreducess $123, 512(%rdx), %xmm29, %xmm30
// CHECK: encoding: [0x62,0x63,0x15,0x00,0x57,0xb2,0x00,0x02,0x00,0x00,0x7b]
vreducess $0x7b,512(%rdx), %xmm29, %xmm30
// CHECK: vreducess $123, -512(%rdx), %xmm29, %xmm30
// CHECK: encoding: [0x62,0x63,0x15,0x00,0x57,0x72,0x80,0x7b]
vreducess $0x7b,-512(%rdx), %xmm29, %xmm30
// CHECK: vreducess $123, -516(%rdx), %xmm29, %xmm30
// CHECK: encoding: [0x62,0x63,0x15,0x00,0x57,0xb2,0xfc,0xfd,0xff,0xff,0x7b]
vreducess $0x7b,-516(%rdx), %xmm29, %xmm30
// CHECK: vreducepd $171, %zmm28, %zmm18
// CHECK: encoding: [0x62,0x83,0xfd,0x48,0x56,0xd4,0xab]
vreducepd $0xab, %zmm28, %zmm18
// CHECK: vreducepd $171, %zmm28, %zmm18 {%k5}
// CHECK: encoding: [0x62,0x83,0xfd,0x4d,0x56,0xd4,0xab]
vreducepd $0xab, %zmm28, %zmm18 {%k5}
// CHECK: vreducepd $171, %zmm28, %zmm18 {%k5} {z}
// CHECK: encoding: [0x62,0x83,0xfd,0xcd,0x56,0xd4,0xab]
vreducepd $0xab, %zmm28, %zmm18 {%k5} {z}
// CHECK: vreducepd $171,{sae}, %zmm28, %zmm18
// CHECK: encoding: [0x62,0x83,0xfd,0x18,0x56,0xd4,0xab]
vreducepd $0xab,{sae}, %zmm28, %zmm18
// CHECK: vreducepd $123, %zmm28, %zmm18
// CHECK: encoding: [0x62,0x83,0xfd,0x48,0x56,0xd4,0x7b]
vreducepd $0x7b, %zmm28, %zmm18
// CHECK: vreducepd $123,{sae}, %zmm28, %zmm18
// CHECK: encoding: [0x62,0x83,0xfd,0x18,0x56,0xd4,0x7b]
vreducepd $0x7b,{sae}, %zmm28, %zmm18
// CHECK: vreducepd $123, (%rcx), %zmm18
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x11,0x7b]
vreducepd $0x7b,(%rcx), %zmm18
// CHECK: vreducepd $123, 4660(%rax,%r14,8), %zmm18
// CHECK: encoding: [0x62,0xa3,0xfd,0x48,0x56,0x94,0xf0,0x34,0x12,0x00,0x00,0x7b]
vreducepd $0x7b,4660(%rax,%r14,8), %zmm18
// CHECK: vreducepd $123, (%rcx){1to8}, %zmm18
// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x11,0x7b]
vreducepd $0x7b,(%rcx){1to8}, %zmm18
// CHECK: vreducepd $123, 8128(%rdx), %zmm18
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x52,0x7f,0x7b]
vreducepd $0x7b,8128(%rdx), %zmm18
// CHECK: vreducepd $123, 8192(%rdx), %zmm18
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x92,0x00,0x20,0x00,0x00,0x7b]
vreducepd $0x7b,8192(%rdx), %zmm18
// CHECK: vreducepd $123, -8192(%rdx), %zmm18
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x52,0x80,0x7b]
vreducepd $0x7b,-8192(%rdx), %zmm18
// CHECK: vreducepd $123, -8256(%rdx), %zmm18
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x92,0xc0,0xdf,0xff,0xff,0x7b]
vreducepd $0x7b,-8256(%rdx), %zmm18
// CHECK: vreducepd $123, 1016(%rdx){1to8}, %zmm18
// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x52,0x7f,0x7b]
vreducepd $0x7b,1016(%rdx){1to8}, %zmm18
// CHECK: vreducepd $123, 1024(%rdx){1to8}, %zmm18
// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x92,0x00,0x04,0x00,0x00,0x7b]
vreducepd $0x7b,1024(%rdx){1to8}, %zmm18
// CHECK: vreducepd $123, -1024(%rdx){1to8}, %zmm18
// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x52,0x80,0x7b]
vreducepd $0x7b,-1024(%rdx){1to8}, %zmm18
// CHECK: vreducepd $123, -1032(%rdx){1to8}, %zmm18
// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x92,0xf8,0xfb,0xff,0xff,0x7b]
vreducepd $0x7b,-1032(%rdx){1to8}, %zmm18
// CHECK: vreduceps $171, %zmm25, %zmm26
// CHECK: encoding: [0x62,0x03,0x7d,0x48,0x56,0xd1,0xab]
vreduceps $0xab, %zmm25, %zmm26
// CHECK: vreduceps $171, %zmm25, %zmm26 {%k3}
// CHECK: encoding: [0x62,0x03,0x7d,0x4b,0x56,0xd1,0xab]
vreduceps $0xab, %zmm25, %zmm26 {%k3}
// CHECK: vreduceps $171, %zmm25, %zmm26 {%k3} {z}
// CHECK: encoding: [0x62,0x03,0x7d,0xcb,0x56,0xd1,0xab]
vreduceps $0xab, %zmm25, %zmm26 {%k3} {z}
// CHECK: vreduceps $171,{sae}, %zmm25, %zmm26
// CHECK: encoding: [0x62,0x03,0x7d,0x18,0x56,0xd1,0xab]
vreduceps $0xab,{sae}, %zmm25, %zmm26
// CHECK: vreduceps $123, %zmm25, %zmm26
// CHECK: encoding: [0x62,0x03,0x7d,0x48,0x56,0xd1,0x7b]
vreduceps $0x7b, %zmm25, %zmm26
// CHECK: vreduceps $123,{sae}, %zmm25, %zmm26
// CHECK: encoding: [0x62,0x03,0x7d,0x18,0x56,0xd1,0x7b]
vreduceps $0x7b,{sae}, %zmm25, %zmm26
// CHECK: vreduceps $123, (%rcx), %zmm26
// CHECK: encoding: [0x62,0x63,0x7d,0x48,0x56,0x11,0x7b]
vreduceps $0x7b,(%rcx), %zmm26
// CHECK: vreduceps $123, 4660(%rax,%r14,8), %zmm26
// CHECK: encoding: [0x62,0x23,0x7d,0x48,0x56,0x94,0xf0,0x34,0x12,0x00,0x00,0x7b]
vreduceps $0x7b,4660(%rax,%r14,8), %zmm26
// CHECK: vreduceps $123, (%rcx){1to16}, %zmm26
// CHECK: encoding: [0x62,0x63,0x7d,0x58,0x56,0x11,0x7b]
vreduceps $0x7b,(%rcx){1to16}, %zmm26
// CHECK: vreduceps $123, 8128(%rdx), %zmm26
// CHECK: encoding: [0x62,0x63,0x7d,0x48,0x56,0x52,0x7f,0x7b]
vreduceps $0x7b,8128(%rdx), %zmm26
// CHECK: vreduceps $123, 8192(%rdx), %zmm26
// CHECK: encoding: [0x62,0x63,0x7d,0x48,0x56,0x92,0x00,0x20,0x00,0x00,0x7b]
vreduceps $0x7b,8192(%rdx), %zmm26
// CHECK: vreduceps $123, -8192(%rdx), %zmm26
// CHECK: encoding: [0x62,0x63,0x7d,0x48,0x56,0x52,0x80,0x7b]
vreduceps $0x7b,-8192(%rdx), %zmm26
// CHECK: vreduceps $123, -8256(%rdx), %zmm26
// CHECK: encoding: [0x62,0x63,0x7d,0x48,0x56,0x92,0xc0,0xdf,0xff,0xff,0x7b]
vreduceps $0x7b,-8256(%rdx), %zmm26
// CHECK: vreduceps $123, 508(%rdx){1to16}, %zmm26
// CHECK: encoding: [0x62,0x63,0x7d,0x58,0x56,0x52,0x7f,0x7b]
vreduceps $0x7b,508(%rdx){1to16}, %zmm26
// CHECK: vreduceps $123, 512(%rdx){1to16}, %zmm26
// CHECK: encoding: [0x62,0x63,0x7d,0x58,0x56,0x92,0x00,0x02,0x00,0x00,0x7b]
vreduceps $0x7b,512(%rdx){1to16}, %zmm26
// CHECK: vreduceps $123, -512(%rdx){1to16}, %zmm26
// CHECK: encoding: [0x62,0x63,0x7d,0x58,0x56,0x52,0x80,0x7b]
vreduceps $0x7b,-512(%rdx){1to16}, %zmm26
// CHECK: vreduceps $123, -516(%rdx){1to16}, %zmm26
// CHECK: encoding: [0x62,0x63,0x7d,0x58,0x56,0x92,0xfc,0xfd,0xff,0xff,0x7b]
vreduceps $0x7b,-516(%rdx){1to16}, %zmm26
// CHECK: vreducesd $171, %xmm24, %xmm19, %xmm25
// CHECK: encoding: [0x62,0x03,0xe5,0x00,0x57,0xc8,0xab]
vreducesd $0xab, %xmm24, %xmm19, %xmm25
// CHECK: vreducesd $171, %xmm24, %xmm19, %xmm25 {%k3}
// CHECK: encoding: [0x62,0x03,0xe5,0x03,0x57,0xc8,0xab]
vreducesd $0xab, %xmm24, %xmm19, %xmm25 {%k3}
// CHECK: vreducesd $171, %xmm24, %xmm19, %xmm25 {%k3} {z}
// CHECK: encoding: [0x62,0x03,0xe5,0x83,0x57,0xc8,0xab]
vreducesd $0xab, %xmm24, %xmm19, %xmm25 {%k3} {z}
// CHECK: vreducesd $171,{sae}, %xmm24, %xmm19, %xmm25
// CHECK: encoding: [0x62,0x03,0xe5,0x10,0x57,0xc8,0xab]
vreducesd $0xab,{sae}, %xmm24, %xmm19, %xmm25
// CHECK: vreducesd $123, %xmm24, %xmm19, %xmm25
// CHECK: encoding: [0x62,0x03,0xe5,0x00,0x57,0xc8,0x7b]
vreducesd $0x7b, %xmm24, %xmm19, %xmm25
// CHECK: vreducesd $123,{sae}, %xmm24, %xmm19, %xmm25
// CHECK: encoding: [0x62,0x03,0xe5,0x10,0x57,0xc8,0x7b]
vreducesd $0x7b,{sae}, %xmm24, %xmm19, %xmm25
// CHECK: vreducesd $123, (%rcx), %xmm19, %xmm25
// CHECK: encoding: [0x62,0x63,0xe5,0x00,0x57,0x09,0x7b]
vreducesd $0x7b,(%rcx), %xmm19, %xmm25
// CHECK: vreducesd $123, 4660(%rax,%r14,8), %xmm19, %xmm25
// CHECK: encoding: [0x62,0x23,0xe5,0x00,0x57,0x8c,0xf0,0x34,0x12,0x00,0x00,0x7b]
vreducesd $0x7b,4660(%rax,%r14,8), %xmm19, %xmm25
// CHECK: vreducesd $123, 1016(%rdx), %xmm19, %xmm25
// CHECK: encoding: [0x62,0x63,0xe5,0x00,0x57,0x4a,0x7f,0x7b]
vreducesd $0x7b,1016(%rdx), %xmm19, %xmm25
// CHECK: vreducesd $123, 1024(%rdx), %xmm19, %xmm25
// CHECK: encoding: [0x62,0x63,0xe5,0x00,0x57,0x8a,0x00,0x04,0x00,0x00,0x7b]
vreducesd $0x7b,1024(%rdx), %xmm19, %xmm25
// CHECK: vreducesd $123, -1024(%rdx), %xmm19, %xmm25
// CHECK: encoding: [0x62,0x63,0xe5,0x00,0x57,0x4a,0x80,0x7b]
vreducesd $0x7b,-1024(%rdx), %xmm19, %xmm25
// CHECK: vreducesd $123, -1032(%rdx), %xmm19, %xmm25
// CHECK: encoding: [0x62,0x63,0xe5,0x00,0x57,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
vreducesd $0x7b,-1032(%rdx), %xmm19, %xmm25
// CHECK: vreducess $171, %xmm21, %xmm24, %xmm30
// CHECK: encoding: [0x62,0x23,0x3d,0x00,0x57,0xf5,0xab]
vreducess $0xab, %xmm21, %xmm24, %xmm30
// CHECK: vreducess $171, %xmm21, %xmm24, %xmm30 {%k2}
// CHECK: encoding: [0x62,0x23,0x3d,0x02,0x57,0xf5,0xab]
vreducess $0xab, %xmm21, %xmm24, %xmm30 {%k2}
// CHECK: vreducess $171, %xmm21, %xmm24, %xmm30 {%k2} {z}
// CHECK: encoding: [0x62,0x23,0x3d,0x82,0x57,0xf5,0xab]
vreducess $0xab, %xmm21, %xmm24, %xmm30 {%k2} {z}
// CHECK: vreducess $171,{sae}, %xmm21, %xmm24, %xmm30
// CHECK: encoding: [0x62,0x23,0x3d,0x10,0x57,0xf5,0xab]
vreducess $0xab,{sae}, %xmm21, %xmm24, %xmm30
// CHECK: vreducess $123, %xmm21, %xmm24, %xmm30
// CHECK: encoding: [0x62,0x23,0x3d,0x00,0x57,0xf5,0x7b]
vreducess $0x7b, %xmm21, %xmm24, %xmm30
// CHECK: vreducess $123,{sae}, %xmm21, %xmm24, %xmm30
// CHECK: encoding: [0x62,0x23,0x3d,0x10,0x57,0xf5,0x7b]
vreducess $0x7b,{sae}, %xmm21, %xmm24, %xmm30
// CHECK: vreducess $123, (%rcx), %xmm24, %xmm30
// CHECK: encoding: [0x62,0x63,0x3d,0x00,0x57,0x31,0x7b]
vreducess $0x7b,(%rcx), %xmm24, %xmm30
// CHECK: vreducess $123, 4660(%rax,%r14,8), %xmm24, %xmm30
// CHECK: encoding: [0x62,0x23,0x3d,0x00,0x57,0xb4,0xf0,0x34,0x12,0x00,0x00,0x7b]
vreducess $0x7b,4660(%rax,%r14,8), %xmm24, %xmm30
// CHECK: vreducess $123, 508(%rdx), %xmm24, %xmm30
// CHECK: encoding: [0x62,0x63,0x3d,0x00,0x57,0x72,0x7f,0x7b]
vreducess $0x7b,508(%rdx), %xmm24, %xmm30
// CHECK: vreducess $123, 512(%rdx), %xmm24, %xmm30
// CHECK: encoding: [0x62,0x63,0x3d,0x00,0x57,0xb2,0x00,0x02,0x00,0x00,0x7b]
vreducess $0x7b,512(%rdx), %xmm24, %xmm30
// CHECK: vreducess $123, -512(%rdx), %xmm24, %xmm30
// CHECK: encoding: [0x62,0x63,0x3d,0x00,0x57,0x72,0x80,0x7b]
vreducess $0x7b,-512(%rdx), %xmm24, %xmm30
// CHECK: vreducess $123, -516(%rdx), %xmm24, %xmm30
// CHECK: encoding: [0x62,0x63,0x3d,0x00,0x57,0xb2,0xfc,0xfd,0xff,0xff,0x7b]
vreducess $0x7b,-516(%rdx), %xmm24, %xmm30
// CHECK: vcvtpd2qq %zmm29, %zmm18
// CHECK: encoding: [0x62,0x81,0xfd,0x48,0x7b,0xd5]
vcvtpd2qq %zmm29, %zmm18

View File

@ -2208,6 +2208,486 @@
// CHECK: encoding: [0x62,0x63,0x45,0x30,0x50,0x82,0xfc,0xfd,0xff,0xff,0x7b]
vrangeps $0x7b,-516(%rdx){1to8}, %ymm23, %ymm24
// CHECK: vreducepd $171, %xmm17, %xmm18
// CHECK: encoding: [0x62,0xa3,0xfd,0x08,0x56,0xd1,0xab]
vreducepd $0xab, %xmm17, %xmm18
// CHECK: vreducepd $171, %xmm17, %xmm18 {%k3}
// CHECK: encoding: [0x62,0xa3,0xfd,0x0b,0x56,0xd1,0xab]
vreducepd $0xab, %xmm17, %xmm18 {%k3}
// CHECK: vreducepd $171, %xmm17, %xmm18 {%k3} {z}
// CHECK: encoding: [0x62,0xa3,0xfd,0x8b,0x56,0xd1,0xab]
vreducepd $0xab, %xmm17, %xmm18 {%k3} {z}
// CHECK: vreducepd $123, %xmm17, %xmm18
// CHECK: encoding: [0x62,0xa3,0xfd,0x08,0x56,0xd1,0x7b]
vreducepd $0x7b, %xmm17, %xmm18
// CHECK: vreducepd $123, (%rcx), %xmm18
// CHECK: encoding: [0x62,0xe3,0xfd,0x08,0x56,0x11,0x7b]
vreducepd $0x7b,(%rcx), %xmm18
// CHECK: vreducepd $123, 291(%rax,%r14,8), %xmm18
// CHECK: encoding: [0x62,0xa3,0xfd,0x08,0x56,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
vreducepd $0x7b,291(%rax,%r14,8), %xmm18
// CHECK: vreducepd $171, %xmm28, %xmm25
// CHECK: encoding: [0x62,0x03,0xfd,0x08,0x56,0xcc,0xab]
vreducepd $0xab, %xmm28, %xmm25
// CHECK: vreducepd $171, %xmm28, %xmm25 {%k4}
// CHECK: encoding: [0x62,0x03,0xfd,0x0c,0x56,0xcc,0xab]
vreducepd $0xab, %xmm28, %xmm25 {%k4}
// CHECK: vreducepd $171, %xmm28, %xmm25 {%k4} {z}
// CHECK: encoding: [0x62,0x03,0xfd,0x8c,0x56,0xcc,0xab]
vreducepd $0xab, %xmm28, %xmm25 {%k4} {z}
// CHECK: vreducepd $123, %xmm28, %xmm25
// CHECK: encoding: [0x62,0x03,0xfd,0x08,0x56,0xcc,0x7b]
vreducepd $0x7b, %xmm28, %xmm25
// CHECK: vreducepd $123, (%rcx), %xmm25
// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x56,0x09,0x7b]
vreducepd $0x7b,(%rcx), %xmm25
// CHECK: vreducepd $123, 4660(%rax,%r14,8), %xmm25
// CHECK: encoding: [0x62,0x23,0xfd,0x08,0x56,0x8c,0xf0,0x34,0x12,0x00,0x00,0x7b]
vreducepd $0x7b,4660(%rax,%r14,8), %xmm25
// CHECK: vreducepd $123, (%rcx){1to2}, %xmm25
// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x56,0x09,0x7b]
vreducepd $0x7b,(%rcx){1to2}, %xmm25
// CHECK: vreducepd $123, 2032(%rdx), %xmm25
// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x56,0x4a,0x7f,0x7b]
vreducepd $0x7b,2032(%rdx), %xmm25
// CHECK: vreducepd $123, 2048(%rdx), %xmm25
// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x56,0x8a,0x00,0x08,0x00,0x00,0x7b]
vreducepd $0x7b,2048(%rdx), %xmm25
// CHECK: vreducepd $123, -2048(%rdx), %xmm25
// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x56,0x4a,0x80,0x7b]
vreducepd $0x7b,-2048(%rdx), %xmm25
// CHECK: vreducepd $123, -2064(%rdx), %xmm25
// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x56,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
vreducepd $0x7b,-2064(%rdx), %xmm25
// CHECK: vreducepd $123, 1016(%rdx){1to2}, %xmm25
// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x56,0x4a,0x7f,0x7b]
vreducepd $0x7b,1016(%rdx){1to2}, %xmm25
// CHECK: vreducepd $123, 1024(%rdx){1to2}, %xmm25
// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x56,0x8a,0x00,0x04,0x00,0x00,0x7b]
vreducepd $0x7b,1024(%rdx){1to2}, %xmm25
// CHECK: vreducepd $123, -1024(%rdx){1to2}, %xmm25
// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x56,0x4a,0x80,0x7b]
vreducepd $0x7b,-1024(%rdx){1to2}, %xmm25
// CHECK: vreducepd $123, -1032(%rdx){1to2}, %xmm25
// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x56,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
vreducepd $0x7b,-1032(%rdx){1to2}, %xmm25
// CHECK: vreducepd $171, %ymm17, %ymm28
// CHECK: encoding: [0x62,0x23,0xfd,0x28,0x56,0xe1,0xab]
vreducepd $0xab, %ymm17, %ymm28
// CHECK: vreducepd $171, %ymm17, %ymm28 {%k4}
// CHECK: encoding: [0x62,0x23,0xfd,0x2c,0x56,0xe1,0xab]
vreducepd $0xab, %ymm17, %ymm28 {%k4}
// CHECK: vreducepd $171, %ymm17, %ymm28 {%k4} {z}
// CHECK: encoding: [0x62,0x23,0xfd,0xac,0x56,0xe1,0xab]
vreducepd $0xab, %ymm17, %ymm28 {%k4} {z}
// CHECK: vreducepd $123, %ymm17, %ymm28
// CHECK: encoding: [0x62,0x23,0xfd,0x28,0x56,0xe1,0x7b]
vreducepd $0x7b, %ymm17, %ymm28
// CHECK: vreducepd $123, (%rcx), %ymm28
// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0x21,0x7b]
vreducepd $0x7b,(%rcx), %ymm28
// CHECK: vreducepd $123, 4660(%rax,%r14,8), %ymm28
// CHECK: encoding: [0x62,0x23,0xfd,0x28,0x56,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b]
vreducepd $0x7b,4660(%rax,%r14,8), %ymm28
// CHECK: vreducepd $123, (%rcx){1to4}, %ymm28
// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0x21,0x7b]
vreducepd $0x7b,(%rcx){1to4}, %ymm28
// CHECK: vreducepd $123, 4064(%rdx), %ymm28
// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0x62,0x7f,0x7b]
vreducepd $0x7b,4064(%rdx), %ymm28
// CHECK: vreducepd $123, 4096(%rdx), %ymm28
// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0xa2,0x00,0x10,0x00,0x00,0x7b]
vreducepd $0x7b,4096(%rdx), %ymm28
// CHECK: vreducepd $123, -4096(%rdx), %ymm28
// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0x62,0x80,0x7b]
vreducepd $0x7b,-4096(%rdx), %ymm28
// CHECK: vreducepd $123, -4128(%rdx), %ymm28
// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0xa2,0xe0,0xef,0xff,0xff,0x7b]
vreducepd $0x7b,-4128(%rdx), %ymm28
// CHECK: vreducepd $123, 1016(%rdx){1to4}, %ymm28
// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0x62,0x7f,0x7b]
vreducepd $0x7b,1016(%rdx){1to4}, %ymm28
// CHECK: vreducepd $123, 1024(%rdx){1to4}, %ymm28
// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0xa2,0x00,0x04,0x00,0x00,0x7b]
vreducepd $0x7b,1024(%rdx){1to4}, %ymm28
// CHECK: vreducepd $123, -1024(%rdx){1to4}, %ymm28
// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0x62,0x80,0x7b]
vreducepd $0x7b,-1024(%rdx){1to4}, %ymm28
// CHECK: vreducepd $123, -1032(%rdx){1to4}, %ymm28
// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
vreducepd $0x7b,-1032(%rdx){1to4}, %ymm28
// CHECK: vreduceps $171, %xmm21, %xmm29
// CHECK: encoding: [0x62,0x23,0x7d,0x08,0x56,0xed,0xab]
vreduceps $0xab, %xmm21, %xmm29
// CHECK: vreduceps $171, %xmm21, %xmm29 {%k7}
// CHECK: encoding: [0x62,0x23,0x7d,0x0f,0x56,0xed,0xab]
vreduceps $0xab, %xmm21, %xmm29 {%k7}
// CHECK: vreduceps $171, %xmm21, %xmm29 {%k7} {z}
// CHECK: encoding: [0x62,0x23,0x7d,0x8f,0x56,0xed,0xab]
vreduceps $0xab, %xmm21, %xmm29 {%k7} {z}
// CHECK: vreduceps $123, %xmm21, %xmm29
// CHECK: encoding: [0x62,0x23,0x7d,0x08,0x56,0xed,0x7b]
vreduceps $0x7b, %xmm21, %xmm29
// CHECK: vreduceps $123, (%rcx), %xmm29
// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x56,0x29,0x7b]
vreduceps $0x7b,(%rcx), %xmm29
// CHECK: vreduceps $123, 4660(%rax,%r14,8), %xmm29
// CHECK: encoding: [0x62,0x23,0x7d,0x08,0x56,0xac,0xf0,0x34,0x12,0x00,0x00,0x7b]
vreduceps $0x7b,4660(%rax,%r14,8), %xmm29
// CHECK: vreduceps $123, (%rcx){1to4}, %xmm29
// CHECK: encoding: [0x62,0x63,0x7d,0x18,0x56,0x29,0x7b]
vreduceps $0x7b,(%rcx){1to4}, %xmm29
// CHECK: vreduceps $123, 2032(%rdx), %xmm29
// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x56,0x6a,0x7f,0x7b]
vreduceps $0x7b,2032(%rdx), %xmm29
// CHECK: vreduceps $123, 2048(%rdx), %xmm29
// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x56,0xaa,0x00,0x08,0x00,0x00,0x7b]
vreduceps $0x7b,2048(%rdx), %xmm29
// CHECK: vreduceps $123, -2048(%rdx), %xmm29
// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x56,0x6a,0x80,0x7b]
vreduceps $0x7b,-2048(%rdx), %xmm29
// CHECK: vreduceps $123, -2064(%rdx), %xmm29
// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x56,0xaa,0xf0,0xf7,0xff,0xff,0x7b]
vreduceps $0x7b,-2064(%rdx), %xmm29
// CHECK: vreduceps $123, 508(%rdx){1to4}, %xmm29
// CHECK: encoding: [0x62,0x63,0x7d,0x18,0x56,0x6a,0x7f,0x7b]
vreduceps $0x7b,508(%rdx){1to4}, %xmm29
// CHECK: vreduceps $123, 512(%rdx){1to4}, %xmm29
// CHECK: encoding: [0x62,0x63,0x7d,0x18,0x56,0xaa,0x00,0x02,0x00,0x00,0x7b]
vreduceps $0x7b,512(%rdx){1to4}, %xmm29
// CHECK: vreduceps $123, -512(%rdx){1to4}, %xmm29
// CHECK: encoding: [0x62,0x63,0x7d,0x18,0x56,0x6a,0x80,0x7b]
vreduceps $0x7b,-512(%rdx){1to4}, %xmm29
// CHECK: vreduceps $123, -516(%rdx){1to4}, %xmm29
// CHECK: encoding: [0x62,0x63,0x7d,0x18,0x56,0xaa,0xfc,0xfd,0xff,0xff,0x7b]
vreduceps $0x7b,-516(%rdx){1to4}, %xmm29
// CHECK: vreduceps $171, %ymm23, %ymm25
// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x56,0xcf,0xab]
vreduceps $0xab, %ymm23, %ymm25
// CHECK: vreduceps $171, %ymm23, %ymm25 {%k3}
// CHECK: encoding: [0x62,0x23,0x7d,0x2b,0x56,0xcf,0xab]
vreduceps $0xab, %ymm23, %ymm25 {%k3}
// CHECK: vreduceps $171, %ymm23, %ymm25 {%k3} {z}
// CHECK: encoding: [0x62,0x23,0x7d,0xab,0x56,0xcf,0xab]
vreduceps $0xab, %ymm23, %ymm25 {%k3} {z}
// CHECK: vreduceps $123, %ymm23, %ymm25
// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x56,0xcf,0x7b]
vreduceps $0x7b, %ymm23, %ymm25
// CHECK: vreduceps $123, (%rcx), %ymm25
// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x09,0x7b]
vreduceps $0x7b,(%rcx), %ymm25
// CHECK: vreduceps $123, 4660(%rax,%r14,8), %ymm25
// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x56,0x8c,0xf0,0x34,0x12,0x00,0x00,0x7b]
vreduceps $0x7b,4660(%rax,%r14,8), %ymm25
// CHECK: vreduceps $123, (%rcx){1to8}, %ymm25
// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x09,0x7b]
vreduceps $0x7b,(%rcx){1to8}, %ymm25
// CHECK: vreduceps $123, 4064(%rdx), %ymm25
// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x4a,0x7f,0x7b]
vreduceps $0x7b,4064(%rdx), %ymm25
// CHECK: vreduceps $123, 4096(%rdx), %ymm25
// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x8a,0x00,0x10,0x00,0x00,0x7b]
vreduceps $0x7b,4096(%rdx), %ymm25
// CHECK: vreduceps $123, -4096(%rdx), %ymm25
// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x4a,0x80,0x7b]
vreduceps $0x7b,-4096(%rdx), %ymm25
// CHECK: vreduceps $123, -4128(%rdx), %ymm25
// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x8a,0xe0,0xef,0xff,0xff,0x7b]
vreduceps $0x7b,-4128(%rdx), %ymm25
// CHECK: vreduceps $123, 508(%rdx){1to8}, %ymm25
// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x4a,0x7f,0x7b]
vreduceps $0x7b,508(%rdx){1to8}, %ymm25
// CHECK: vreduceps $123, 512(%rdx){1to8}, %ymm25
// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x8a,0x00,0x02,0x00,0x00,0x7b]
vreduceps $0x7b,512(%rdx){1to8}, %ymm25
// CHECK: vreduceps $123, -512(%rdx){1to8}, %ymm25
// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x4a,0x80,0x7b]
vreduceps $0x7b,-512(%rdx){1to8}, %ymm25
// CHECK: vreduceps $123, -516(%rdx){1to8}, %ymm25
// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x8a,0xfc,0xfd,0xff,0xff,0x7b]
vreduceps $0x7b,-516(%rdx){1to8}, %ymm25
// CHECK: vreducepd $123, (%rcx){1to2}, %xmm18
// CHECK: encoding: [0x62,0xe3,0xfd,0x18,0x56,0x11,0x7b]
vreducepd $0x7b,(%rcx){1to2}, %xmm18
// CHECK: vreducepd $123, 2032(%rdx), %xmm18
// CHECK: encoding: [0x62,0xe3,0xfd,0x08,0x56,0x52,0x7f,0x7b]
vreducepd $0x7b,2032(%rdx), %xmm18
// CHECK: vreducepd $123, 2048(%rdx), %xmm18
// CHECK: encoding: [0x62,0xe3,0xfd,0x08,0x56,0x92,0x00,0x08,0x00,0x00,0x7b]
vreducepd $0x7b,2048(%rdx), %xmm18
// CHECK: vreducepd $123, -2048(%rdx), %xmm18
// CHECK: encoding: [0x62,0xe3,0xfd,0x08,0x56,0x52,0x80,0x7b]
vreducepd $0x7b,-2048(%rdx), %xmm18
// CHECK: vreducepd $123, -2064(%rdx), %xmm18
// CHECK: encoding: [0x62,0xe3,0xfd,0x08,0x56,0x92,0xf0,0xf7,0xff,0xff,0x7b]
vreducepd $0x7b,-2064(%rdx), %xmm18
// CHECK: vreducepd $123, 1016(%rdx){1to2}, %xmm18
// CHECK: encoding: [0x62,0xe3,0xfd,0x18,0x56,0x52,0x7f,0x7b]
vreducepd $0x7b,1016(%rdx){1to2}, %xmm18
// CHECK: vreducepd $123, 1024(%rdx){1to2}, %xmm18
// CHECK: encoding: [0x62,0xe3,0xfd,0x18,0x56,0x92,0x00,0x04,0x00,0x00,0x7b]
vreducepd $0x7b,1024(%rdx){1to2}, %xmm18
// CHECK: vreducepd $123, -1024(%rdx){1to2}, %xmm18
// CHECK: encoding: [0x62,0xe3,0xfd,0x18,0x56,0x52,0x80,0x7b]
vreducepd $0x7b,-1024(%rdx){1to2}, %xmm18
// CHECK: vreducepd $123, -1032(%rdx){1to2}, %xmm18
// CHECK: encoding: [0x62,0xe3,0xfd,0x18,0x56,0x92,0xf8,0xfb,0xff,0xff,0x7b]
vreducepd $0x7b,-1032(%rdx){1to2}, %xmm18
// CHECK: vreducepd $171, %ymm29, %ymm25
// CHECK: encoding: [0x62,0x03,0xfd,0x28,0x56,0xcd,0xab]
vreducepd $0xab, %ymm29, %ymm25
// CHECK: vreducepd $171, %ymm29, %ymm25 {%k1}
// CHECK: encoding: [0x62,0x03,0xfd,0x29,0x56,0xcd,0xab]
vreducepd $0xab, %ymm29, %ymm25 {%k1}
// CHECK: vreducepd $171, %ymm29, %ymm25 {%k1} {z}
// CHECK: encoding: [0x62,0x03,0xfd,0xa9,0x56,0xcd,0xab]
vreducepd $0xab, %ymm29, %ymm25 {%k1} {z}
// CHECK: vreducepd $123, %ymm29, %ymm25
// CHECK: encoding: [0x62,0x03,0xfd,0x28,0x56,0xcd,0x7b]
vreducepd $0x7b, %ymm29, %ymm25
// CHECK: vreducepd $123, (%rcx), %ymm25
// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0x09,0x7b]
vreducepd $0x7b,(%rcx), %ymm25
// CHECK: vreducepd $123, 291(%rax,%r14,8), %ymm25
// CHECK: encoding: [0x62,0x23,0xfd,0x28,0x56,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
vreducepd $0x7b,291(%rax,%r14,8), %ymm25
// CHECK: vreducepd $123, (%rcx){1to4}, %ymm25
// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0x09,0x7b]
vreducepd $0x7b,(%rcx){1to4}, %ymm25
// CHECK: vreducepd $123, 4064(%rdx), %ymm25
// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0x4a,0x7f,0x7b]
vreducepd $0x7b,4064(%rdx), %ymm25
// CHECK: vreducepd $123, 4096(%rdx), %ymm25
// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0x8a,0x00,0x10,0x00,0x00,0x7b]
vreducepd $0x7b,4096(%rdx), %ymm25
// CHECK: vreducepd $123, -4096(%rdx), %ymm25
// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0x4a,0x80,0x7b]
vreducepd $0x7b,-4096(%rdx), %ymm25
// CHECK: vreducepd $123, -4128(%rdx), %ymm25
// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0x8a,0xe0,0xef,0xff,0xff,0x7b]
vreducepd $0x7b,-4128(%rdx), %ymm25
// CHECK: vreducepd $123, 1016(%rdx){1to4}, %ymm25
// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0x4a,0x7f,0x7b]
vreducepd $0x7b,1016(%rdx){1to4}, %ymm25
// CHECK: vreducepd $123, 1024(%rdx){1to4}, %ymm25
// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0x8a,0x00,0x04,0x00,0x00,0x7b]
vreducepd $0x7b,1024(%rdx){1to4}, %ymm25
// CHECK: vreducepd $123, -1024(%rdx){1to4}, %ymm25
// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0x4a,0x80,0x7b]
vreducepd $0x7b,-1024(%rdx){1to4}, %ymm25
// CHECK: vreducepd $123, -1032(%rdx){1to4}, %ymm25
// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
vreducepd $0x7b,-1032(%rdx){1to4}, %ymm25
// CHECK: vreduceps $171, %xmm23, %xmm20
// CHECK: encoding: [0x62,0xa3,0x7d,0x08,0x56,0xe7,0xab]
vreduceps $0xab, %xmm23, %xmm20
// CHECK: vreduceps $171, %xmm23, %xmm20 {%k7}
// CHECK: encoding: [0x62,0xa3,0x7d,0x0f,0x56,0xe7,0xab]
vreduceps $0xab, %xmm23, %xmm20 {%k7}
// CHECK: vreduceps $171, %xmm23, %xmm20 {%k7} {z}
// CHECK: encoding: [0x62,0xa3,0x7d,0x8f,0x56,0xe7,0xab]
vreduceps $0xab, %xmm23, %xmm20 {%k7} {z}
// CHECK: vreduceps $123, %xmm23, %xmm20
// CHECK: encoding: [0x62,0xa3,0x7d,0x08,0x56,0xe7,0x7b]
vreduceps $0x7b, %xmm23, %xmm20
// CHECK: vreduceps $123, (%rcx), %xmm20
// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x56,0x21,0x7b]
vreduceps $0x7b,(%rcx), %xmm20
// CHECK: vreduceps $123, 291(%rax,%r14,8), %xmm20
// CHECK: encoding: [0x62,0xa3,0x7d,0x08,0x56,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
vreduceps $0x7b,291(%rax,%r14,8), %xmm20
// CHECK: vreduceps $123, (%rcx){1to4}, %xmm20
// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x56,0x21,0x7b]
vreduceps $0x7b,(%rcx){1to4}, %xmm20
// CHECK: vreduceps $123, 2032(%rdx), %xmm20
// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x56,0x62,0x7f,0x7b]
vreduceps $0x7b,2032(%rdx), %xmm20
// CHECK: vreduceps $123, 2048(%rdx), %xmm20
// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x56,0xa2,0x00,0x08,0x00,0x00,0x7b]
vreduceps $0x7b,2048(%rdx), %xmm20
// CHECK: vreduceps $123, -2048(%rdx), %xmm20
// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x56,0x62,0x80,0x7b]
vreduceps $0x7b,-2048(%rdx), %xmm20
// CHECK: vreduceps $123, -2064(%rdx), %xmm20
// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x56,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
vreduceps $0x7b,-2064(%rdx), %xmm20
// CHECK: vreduceps $123, 508(%rdx){1to4}, %xmm20
// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x56,0x62,0x7f,0x7b]
vreduceps $0x7b,508(%rdx){1to4}, %xmm20
// CHECK: vreduceps $123, 512(%rdx){1to4}, %xmm20
// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x56,0xa2,0x00,0x02,0x00,0x00,0x7b]
vreduceps $0x7b,512(%rdx){1to4}, %xmm20
// CHECK: vreduceps $123, -512(%rdx){1to4}, %xmm20
// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x56,0x62,0x80,0x7b]
vreduceps $0x7b,-512(%rdx){1to4}, %xmm20
// CHECK: vreduceps $123, -516(%rdx){1to4}, %xmm20
// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x56,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
vreduceps $0x7b,-516(%rdx){1to4}, %xmm20
// CHECK: vreduceps $171, %ymm22, %ymm26
// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x56,0xd6,0xab]
vreduceps $0xab, %ymm22, %ymm26
// CHECK: vreduceps $171, %ymm22, %ymm26 {%k6}
// CHECK: encoding: [0x62,0x23,0x7d,0x2e,0x56,0xd6,0xab]
vreduceps $0xab, %ymm22, %ymm26 {%k6}
// CHECK: vreduceps $171, %ymm22, %ymm26 {%k6} {z}
// CHECK: encoding: [0x62,0x23,0x7d,0xae,0x56,0xd6,0xab]
vreduceps $0xab, %ymm22, %ymm26 {%k6} {z}
// CHECK: vreduceps $123, %ymm22, %ymm26
// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x56,0xd6,0x7b]
vreduceps $0x7b, %ymm22, %ymm26
// CHECK: vreduceps $123, (%rcx), %ymm26
// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x11,0x7b]
vreduceps $0x7b,(%rcx), %ymm26
// CHECK: vreduceps $123, 291(%rax,%r14,8), %ymm26
// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x56,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
vreduceps $0x7b,291(%rax,%r14,8), %ymm26
// CHECK: vreduceps $123, (%rcx){1to8}, %ymm26
// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x11,0x7b]
vreduceps $0x7b,(%rcx){1to8}, %ymm26
// CHECK: vreduceps $123, 4064(%rdx), %ymm26
// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x52,0x7f,0x7b]
vreduceps $0x7b,4064(%rdx), %ymm26
// CHECK: vreduceps $123, 4096(%rdx), %ymm26
// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x92,0x00,0x10,0x00,0x00,0x7b]
vreduceps $0x7b,4096(%rdx), %ymm26
// CHECK: vreduceps $123, -4096(%rdx), %ymm26
// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x52,0x80,0x7b]
vreduceps $0x7b,-4096(%rdx), %ymm26
// CHECK: vreduceps $123, -4128(%rdx), %ymm26
// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x92,0xe0,0xef,0xff,0xff,0x7b]
vreduceps $0x7b,-4128(%rdx), %ymm26
// CHECK: vreduceps $123, 508(%rdx){1to8}, %ymm26
// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x52,0x7f,0x7b]
vreduceps $0x7b,508(%rdx){1to8}, %ymm26
// CHECK: vreduceps $123, 512(%rdx){1to8}, %ymm26
// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x92,0x00,0x02,0x00,0x00,0x7b]
vreduceps $0x7b,512(%rdx){1to8}, %ymm26
// CHECK: vreduceps $123, -512(%rdx){1to8}, %ymm26
// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x52,0x80,0x7b]
vreduceps $0x7b,-512(%rdx){1to8}, %ymm26
// CHECK: vreduceps $123, -516(%rdx){1to8}, %ymm26
// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x92,0xfc,0xfd,0xff,0xff,0x7b]
vreduceps $0x7b,-516(%rdx){1to8}, %ymm26
// CHECK: vcvtpd2qq %xmm22, %xmm24
// CHECK: encoding: [0x62,0x21,0xfd,0x08,0x7b,0xc6]
vcvtpd2qq %xmm22, %xmm24

View File

@ -16285,6 +16285,246 @@ vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1
// CHECK: encoding: [0x62,0x62,0x4d,0x30,0x2c,0x8a,0xfc,0xfd,0xff,0xff]
vscalefps -516(%rdx){1to8}, %ymm22, %ymm25
// CHECK: vrndscalepd $171, %xmm28, %xmm29
// CHECK: encoding: [0x62,0x03,0xfd,0x08,0x09,0xec,0xab]
vrndscalepd $0xab, %xmm28, %xmm29
// CHECK: vrndscalepd $171, %xmm28, %xmm29 {%k4}
// CHECK: encoding: [0x62,0x03,0xfd,0x0c,0x09,0xec,0xab]
vrndscalepd $0xab, %xmm28, %xmm29 {%k4}
// CHECK: vrndscalepd $171, %xmm28, %xmm29 {%k4} {z}
// CHECK: encoding: [0x62,0x03,0xfd,0x8c,0x09,0xec,0xab]
vrndscalepd $0xab, %xmm28, %xmm29 {%k4} {z}
// CHECK: vrndscalepd $123, %xmm28, %xmm29
// CHECK: encoding: [0x62,0x03,0xfd,0x08,0x09,0xec,0x7b]
vrndscalepd $0x7b, %xmm28, %xmm29
// CHECK: vrndscalepd $123, (%rcx), %xmm29
// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x09,0x29,0x7b]
vrndscalepd $0x7b, (%rcx), %xmm29
// CHECK: vrndscalepd $123, 291(%rax,%r14,8), %xmm29
// CHECK: encoding: [0x62,0x23,0xfd,0x08,0x09,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
vrndscalepd $0x7b, 291(%rax,%r14,8), %xmm29
// CHECK: vrndscalepd $123, (%rcx){1to2}, %xmm29
// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x09,0x29,0x7b]
vrndscalepd $0x7b, (%rcx){1to2}, %xmm29
// CHECK: vrndscalepd $123, 2032(%rdx), %xmm29
// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x09,0x6a,0x7f,0x7b]
vrndscalepd $0x7b, 2032(%rdx), %xmm29
// CHECK: vrndscalepd $123, 2048(%rdx), %xmm29
// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x09,0xaa,0x00,0x08,0x00,0x00,0x7b]
vrndscalepd $0x7b, 2048(%rdx), %xmm29
// CHECK: vrndscalepd $123, -2048(%rdx), %xmm29
// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x09,0x6a,0x80,0x7b]
vrndscalepd $0x7b, -2048(%rdx), %xmm29
// CHECK: vrndscalepd $123, -2064(%rdx), %xmm29
// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x09,0xaa,0xf0,0xf7,0xff,0xff,0x7b]
vrndscalepd $0x7b, -2064(%rdx), %xmm29
// CHECK: vrndscalepd $123, 1016(%rdx){1to2}, %xmm29
// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x09,0x6a,0x7f,0x7b]
vrndscalepd $0x7b, 1016(%rdx){1to2}, %xmm29
// CHECK: vrndscalepd $123, 1024(%rdx){1to2}, %xmm29
// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x09,0xaa,0x00,0x04,0x00,0x00,0x7b]
vrndscalepd $0x7b, 1024(%rdx){1to2}, %xmm29
// CHECK: vrndscalepd $123, -1024(%rdx){1to2}, %xmm29
// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x09,0x6a,0x80,0x7b]
vrndscalepd $0x7b, -1024(%rdx){1to2}, %xmm29
// CHECK: vrndscalepd $123, -1032(%rdx){1to2}, %xmm29
// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x09,0xaa,0xf8,0xfb,0xff,0xff,0x7b]
vrndscalepd $0x7b, -1032(%rdx){1to2}, %xmm29
// CHECK: vrndscalepd $171, %ymm22, %ymm17
// CHECK: encoding: [0x62,0xa3,0xfd,0x28,0x09,0xce,0xab]
vrndscalepd $0xab, %ymm22, %ymm17
// CHECK: vrndscalepd $171, %ymm22, %ymm17 {%k7}
// CHECK: encoding: [0x62,0xa3,0xfd,0x2f,0x09,0xce,0xab]
vrndscalepd $0xab, %ymm22, %ymm17 {%k7}
// CHECK: vrndscalepd $171, %ymm22, %ymm17 {%k7} {z}
// CHECK: encoding: [0x62,0xa3,0xfd,0xaf,0x09,0xce,0xab]
vrndscalepd $0xab, %ymm22, %ymm17 {%k7} {z}
// CHECK: vrndscalepd $123, %ymm22, %ymm17
// CHECK: encoding: [0x62,0xa3,0xfd,0x28,0x09,0xce,0x7b]
vrndscalepd $0x7b, %ymm22, %ymm17
// CHECK: vrndscalepd $123, (%rcx), %ymm17
// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x09,0x09,0x7b]
vrndscalepd $0x7b, (%rcx), %ymm17
// CHECK: vrndscalepd $123, 291(%rax,%r14,8), %ymm17
// CHECK: encoding: [0x62,0xa3,0xfd,0x28,0x09,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
vrndscalepd $0x7b, 291(%rax,%r14,8), %ymm17
// CHECK: vrndscalepd $123, (%rcx){1to4}, %ymm17
// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x09,0x09,0x7b]
vrndscalepd $0x7b, (%rcx){1to4}, %ymm17
// CHECK: vrndscalepd $123, 4064(%rdx), %ymm17
// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x09,0x4a,0x7f,0x7b]
vrndscalepd $0x7b, 4064(%rdx), %ymm17
// CHECK: vrndscalepd $123, 4096(%rdx), %ymm17
// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x09,0x8a,0x00,0x10,0x00,0x00,0x7b]
vrndscalepd $0x7b, 4096(%rdx), %ymm17
// CHECK: vrndscalepd $123, -4096(%rdx), %ymm17
// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x09,0x4a,0x80,0x7b]
vrndscalepd $0x7b, -4096(%rdx), %ymm17
// CHECK: vrndscalepd $123, -4128(%rdx), %ymm17
// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x09,0x8a,0xe0,0xef,0xff,0xff,0x7b]
vrndscalepd $0x7b, -4128(%rdx), %ymm17
// CHECK: vrndscalepd $123, 1016(%rdx){1to4}, %ymm17
// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x09,0x4a,0x7f,0x7b]
vrndscalepd $0x7b, 1016(%rdx){1to4}, %ymm17
// CHECK: vrndscalepd $123, 1024(%rdx){1to4}, %ymm17
// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x09,0x8a,0x00,0x04,0x00,0x00,0x7b]
vrndscalepd $0x7b, 1024(%rdx){1to4}, %ymm17
// CHECK: vrndscalepd $123, -1024(%rdx){1to4}, %ymm17
// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x09,0x4a,0x80,0x7b]
vrndscalepd $0x7b, -1024(%rdx){1to4}, %ymm17
// CHECK: vrndscalepd $123, -1032(%rdx){1to4}, %ymm17
// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x09,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
vrndscalepd $0x7b, -1032(%rdx){1to4}, %ymm17
// CHECK: vrndscaleps $171, %xmm26, %xmm22
// CHECK: encoding: [0x62,0x83,0x7d,0x08,0x08,0xf2,0xab]
vrndscaleps $0xab, %xmm26, %xmm22
// CHECK: vrndscaleps $171, %xmm26, %xmm22 {%k4}
// CHECK: encoding: [0x62,0x83,0x7d,0x0c,0x08,0xf2,0xab]
vrndscaleps $0xab, %xmm26, %xmm22 {%k4}
// CHECK: vrndscaleps $171, %xmm26, %xmm22 {%k4} {z}
// CHECK: encoding: [0x62,0x83,0x7d,0x8c,0x08,0xf2,0xab]
vrndscaleps $0xab, %xmm26, %xmm22 {%k4} {z}
// CHECK: vrndscaleps $123, %xmm26, %xmm22
// CHECK: encoding: [0x62,0x83,0x7d,0x08,0x08,0xf2,0x7b]
vrndscaleps $0x7b, %xmm26, %xmm22
// CHECK: vrndscaleps $123, (%rcx), %xmm22
// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x08,0x31,0x7b]
vrndscaleps $0x7b, (%rcx), %xmm22
// CHECK: vrndscaleps $123, 291(%rax,%r14,8), %xmm22
// CHECK: encoding: [0x62,0xa3,0x7d,0x08,0x08,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
vrndscaleps $0x7b, 291(%rax,%r14,8), %xmm22
// CHECK: vrndscaleps $123, (%rcx){1to4}, %xmm22
// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x08,0x31,0x7b]
vrndscaleps $0x7b, (%rcx){1to4}, %xmm22
// CHECK: vrndscaleps $123, 2032(%rdx), %xmm22
// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x08,0x72,0x7f,0x7b]
vrndscaleps $0x7b, 2032(%rdx), %xmm22
// CHECK: vrndscaleps $123, 2048(%rdx), %xmm22
// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x08,0xb2,0x00,0x08,0x00,0x00,0x7b]
vrndscaleps $0x7b, 2048(%rdx), %xmm22
// CHECK: vrndscaleps $123, -2048(%rdx), %xmm22
// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x08,0x72,0x80,0x7b]
vrndscaleps $0x7b, -2048(%rdx), %xmm22
// CHECK: vrndscaleps $123, -2064(%rdx), %xmm22
// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x08,0xb2,0xf0,0xf7,0xff,0xff,0x7b]
vrndscaleps $0x7b, -2064(%rdx), %xmm22
// CHECK: vrndscaleps $123, 508(%rdx){1to4}, %xmm22
// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x08,0x72,0x7f,0x7b]
vrndscaleps $0x7b, 508(%rdx){1to4}, %xmm22
// CHECK: vrndscaleps $123, 512(%rdx){1to4}, %xmm22
// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x08,0xb2,0x00,0x02,0x00,0x00,0x7b]
vrndscaleps $0x7b, 512(%rdx){1to4}, %xmm22
// CHECK: vrndscaleps $123, -512(%rdx){1to4}, %xmm22
// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x08,0x72,0x80,0x7b]
vrndscaleps $0x7b, -512(%rdx){1to4}, %xmm22
// CHECK: vrndscaleps $123, -516(%rdx){1to4}, %xmm22
// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x08,0xb2,0xfc,0xfd,0xff,0xff,0x7b]
vrndscaleps $0x7b, -516(%rdx){1to4}, %xmm22
// CHECK: vrndscaleps $171, %ymm17, %ymm19
// CHECK: encoding: [0x62,0xa3,0x7d,0x28,0x08,0xd9,0xab]
vrndscaleps $0xab, %ymm17, %ymm19
// CHECK: vrndscaleps $171, %ymm17, %ymm19 {%k7}
// CHECK: encoding: [0x62,0xa3,0x7d,0x2f,0x08,0xd9,0xab]
vrndscaleps $0xab, %ymm17, %ymm19 {%k7}
// CHECK: vrndscaleps $171, %ymm17, %ymm19 {%k7} {z}
// CHECK: encoding: [0x62,0xa3,0x7d,0xaf,0x08,0xd9,0xab]
vrndscaleps $0xab, %ymm17, %ymm19 {%k7} {z}
// CHECK: vrndscaleps $123, %ymm17, %ymm19
// CHECK: encoding: [0x62,0xa3,0x7d,0x28,0x08,0xd9,0x7b]
vrndscaleps $0x7b, %ymm17, %ymm19
// CHECK: vrndscaleps $123, (%rcx), %ymm19
// CHECK: encoding: [0x62,0xe3,0x7d,0x28,0x08,0x19,0x7b]
vrndscaleps $0x7b, (%rcx), %ymm19
// CHECK: vrndscaleps $123, 291(%rax,%r14,8), %ymm19
// CHECK: encoding: [0x62,0xa3,0x7d,0x28,0x08,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
vrndscaleps $0x7b, 291(%rax,%r14,8), %ymm19
// CHECK: vrndscaleps $123, (%rcx){1to8}, %ymm19
// CHECK: encoding: [0x62,0xe3,0x7d,0x38,0x08,0x19,0x7b]
vrndscaleps $0x7b, (%rcx){1to8}, %ymm19
// CHECK: vrndscaleps $123, 4064(%rdx), %ymm19
// CHECK: encoding: [0x62,0xe3,0x7d,0x28,0x08,0x5a,0x7f,0x7b]
vrndscaleps $0x7b, 4064(%rdx), %ymm19
// CHECK: vrndscaleps $123, 4096(%rdx), %ymm19
// CHECK: encoding: [0x62,0xe3,0x7d,0x28,0x08,0x9a,0x00,0x10,0x00,0x00,0x7b]
vrndscaleps $0x7b, 4096(%rdx), %ymm19
// CHECK: vrndscaleps $123, -4096(%rdx), %ymm19
// CHECK: encoding: [0x62,0xe3,0x7d,0x28,0x08,0x5a,0x80,0x7b]
vrndscaleps $0x7b, -4096(%rdx), %ymm19
// CHECK: vrndscaleps $123, -4128(%rdx), %ymm19
// CHECK: encoding: [0x62,0xe3,0x7d,0x28,0x08,0x9a,0xe0,0xef,0xff,0xff,0x7b]
vrndscaleps $0x7b, -4128(%rdx), %ymm19
// CHECK: vrndscaleps $123, 508(%rdx){1to8}, %ymm19
// CHECK: encoding: [0x62,0xe3,0x7d,0x38,0x08,0x5a,0x7f,0x7b]
vrndscaleps $0x7b, 508(%rdx){1to8}, %ymm19
// CHECK: vrndscaleps $123, 512(%rdx){1to8}, %ymm19
// CHECK: encoding: [0x62,0xe3,0x7d,0x38,0x08,0x9a,0x00,0x02,0x00,0x00,0x7b]
vrndscaleps $0x7b, 512(%rdx){1to8}, %ymm19
// CHECK: vrndscaleps $123, -512(%rdx){1to8}, %ymm19
// CHECK: encoding: [0x62,0xe3,0x7d,0x38,0x08,0x5a,0x80,0x7b]
vrndscaleps $0x7b, -512(%rdx){1to8}, %ymm19
// CHECK: vrndscaleps $123, -516(%rdx){1to8}, %ymm19
// CHECK: encoding: [0x62,0xe3,0x7d,0x38,0x08,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
vrndscaleps $0x7b, -516(%rdx){1to8}, %ymm19
// CHECK: vcvtps2pd %xmm27, %xmm20
// CHECK: encoding: [0x62,0x81,0x7c,0x08,0x5a,0xe3]
vcvtps2pd %xmm27, %xmm20