forked from OSchip/llvm-project
AVX-512: Added intrinsics for vcvt, vcvtt, vrndscale, vcmp
Printing rounding control. Enncoding for EVEX_RC (rounding control). llvm-svn: 198277
This commit is contained in:
parent
623b0d64b3
commit
de3f751baf
|
@ -2712,10 +2712,42 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||
|
||||
// Vector convert
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx512_cvt_ps2dq_512 : GCCBuiltin<"__builtin_ia32_cvtps2dq512">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_cvtdq2_ps_512 : GCCBuiltin<"__builtin_ia32_cvtdq2ps512">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_cvttps2dq_512: GCCBuiltin<"__builtin_ia32_cvttps2dq512_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_cvttps2udq_512: GCCBuiltin<"__builtin_ia32_cvttps2udq512_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_cvttpd2dq_512: GCCBuiltin<"__builtin_ia32_cvttpd2dq512_mask">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty,
|
||||
llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_cvttpd2udq_512: GCCBuiltin<"__builtin_ia32_cvttpd2udq512_mask">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty,
|
||||
llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_rndscale_ps_512: GCCBuiltin<"__builtin_ia32_rndscaleps_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_rndscale_pd_512: GCCBuiltin<"__builtin_ia32_rndscalepd_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_cvtps2dq_512: GCCBuiltin<"__builtin_ia32_cvtps2dq512_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_cvtpd2dq_512: GCCBuiltin<"__builtin_ia32_cvtpd2dq512_mask">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty,
|
||||
llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_cvtps2udq_512: GCCBuiltin<"__builtin_ia32_cvtps2udq512_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_cvtpd2udq_512: GCCBuiltin<"__builtin_ia32_cvtpd2udq512_mask">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty,
|
||||
llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_cvtdq2ps_512 : GCCBuiltin<"__builtin_ia32_cvtdq2ps512_mask">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_cvtdq2pd_512 : GCCBuiltin<"__builtin_ia32_cvtdq2pd512_mask">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8i32_ty, llvm_v8f64_ty,
|
||||
llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Vector load with broadcast
|
||||
|
@ -2820,13 +2852,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_rndscale_ps_512 : GCCBuiltin<"__builtin_ia32_rndscaleps512">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_rndscale_pd_512 : GCCBuiltin<"__builtin_ia32_rndscalepd512">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_sqrt_pd_512 : GCCBuiltin<"__builtin_ia32_sqrtpd512">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_sqrt_ps_512 : GCCBuiltin<"__builtin_ia32_sqrtps512">,
|
||||
|
@ -3075,12 +3100,27 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||
|
||||
// Misc.
|
||||
let TargetPrefix = "x86" in {
|
||||
def int_x86_avx512_cmpeq_pi_512 : GCCBuiltin<"__builtin_ia32_cmpeqpi512">,
|
||||
Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty],
|
||||
def int_x86_avx512_mask_cmp_ps_512 : GCCBuiltin<"__builtin_ia32_cmpps512_mask">,
|
||||
Intrinsic<[llvm_i16_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_cmp_pd_512 : GCCBuiltin<"__builtin_ia32_cmppd512_mask">,
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty,
|
||||
llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_avx512_mask_pcmpeq_d_512 : GCCBuiltin<"__builtin_ia32_pcmpeqd512_mask">,
|
||||
Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pcmpeq_q_512 : GCCBuiltin<"__builtin_ia32_pcmpeqq512_mask">,
|
||||
Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pand_d_512 : GCCBuiltin<"__builtin_ia32_pandd512_mask">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
|
||||
llvm_v16i32_ty, llvm_i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_mask_pand_q_512 : GCCBuiltin<"__builtin_ia32_pandq512_mask">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
|
||||
llvm_v8i64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_and_pi : GCCBuiltin<"__builtin_ia32_andpi512">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty],
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -123,6 +123,18 @@ void X86ATTInstPrinter::printAVXCC(const MCInst *MI, unsigned Op,
|
|||
}
|
||||
}
|
||||
|
||||
void X86ATTInstPrinter::printRoundingControl(const MCInst *MI, unsigned Op,
|
||||
raw_ostream &O) {
|
||||
int64_t Imm = MI->getOperand(Op).getImm() & 0x1f;
|
||||
switch (Imm) {
|
||||
case 0: O << "{rn-sae}"; break;
|
||||
case 1: O << "{rd-sae}"; break;
|
||||
case 2: O << "{ru-sae}"; break;
|
||||
case 3: O << "{rz-sae}"; break;
|
||||
|
||||
default: llvm_unreachable("Invalid AVX-512 rounding control argument!");
|
||||
}
|
||||
}
|
||||
/// printPCRelImm - This is used to print an immediate value that ends up
|
||||
/// being encoded as a pc-relative value (e.g. for jumps and calls). These
|
||||
/// print slightly differently than normal immediates. For example, a $ is not
|
||||
|
|
|
@ -43,6 +43,7 @@ public:
|
|||
void printAVXCC(const MCInst *MI, unsigned Op, raw_ostream &OS);
|
||||
void printPCRelImm(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
|
||||
void printMemOffset(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
|
||||
void printRoundingControl(const MCInst *MI, unsigned Op, raw_ostream &OS);
|
||||
|
||||
void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
|
||||
printMemReference(MI, OpNo, O);
|
||||
|
|
|
@ -113,6 +113,19 @@ void X86IntelInstPrinter::printAVXCC(const MCInst *MI, unsigned Op,
|
|||
}
|
||||
}
|
||||
|
||||
void X86IntelInstPrinter::printRoundingControl(const MCInst *MI, unsigned Op,
|
||||
raw_ostream &O) {
|
||||
int64_t Imm = MI->getOperand(Op).getImm() & 0x1f;
|
||||
switch (Imm) {
|
||||
case 0: O << "{rn-sae}"; break;
|
||||
case 1: O << "{rd-sae}"; break;
|
||||
case 2: O << "{ru-sae}"; break;
|
||||
case 3: O << "{rz-sae}"; break;
|
||||
|
||||
default: llvm_unreachable("Invalid AVX-512 rounding control argument!");
|
||||
}
|
||||
}
|
||||
|
||||
/// printPCRelImm - This is used to print an immediate value that ends up
|
||||
/// being encoded as a pc-relative value.
|
||||
void X86IntelInstPrinter::printPCRelImm(const MCInst *MI, unsigned OpNo,
|
||||
|
|
|
@ -40,6 +40,7 @@ public:
|
|||
void printAVXCC(const MCInst *MI, unsigned Op, raw_ostream &O);
|
||||
void printPCRelImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printMemOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printRoundingControl(const MCInst *MI, unsigned Op, raw_ostream &OS);
|
||||
|
||||
void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
|
||||
O << "opaque ptr ";
|
||||
|
|
|
@ -535,6 +535,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
|
|||
bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
|
||||
bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
|
||||
bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
|
||||
bool HasEVEX_RC = false;
|
||||
|
||||
// VEX_R: opcode externsion equivalent to REX.R in
|
||||
// 1's complement (inverted) form
|
||||
|
@ -610,6 +611,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
|
|||
// EVEX_b
|
||||
unsigned char EVEX_b = 0;
|
||||
|
||||
// EVEX_rc
|
||||
unsigned char EVEX_rc = 0;
|
||||
|
||||
// EVEX_aaa
|
||||
unsigned char EVEX_aaa = 0;
|
||||
|
||||
|
@ -676,6 +680,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
|
|||
|
||||
// Classify VEX_B, VEX_4V, VEX_R, VEX_X
|
||||
unsigned NumOps = Desc.getNumOperands();
|
||||
unsigned RcOperand = NumOps-1;
|
||||
unsigned CurOp = 0;
|
||||
if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0)
|
||||
++CurOp;
|
||||
|
@ -834,7 +839,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
|
|||
VEX_X = 0x0;
|
||||
CurOp++;
|
||||
if (HasVEX_4VOp3)
|
||||
VEX_4V = getVEXRegisterEncoding(MI, CurOp);
|
||||
VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
|
||||
if (EVEX_b) {
|
||||
assert(RcOperand >= CurOp);
|
||||
EVEX_rc = MI.getOperand(RcOperand).getImm() & 0x3;
|
||||
HasEVEX_RC = true;
|
||||
}
|
||||
break;
|
||||
case X86II::MRMDestReg:
|
||||
// MRMDestReg instructions forms:
|
||||
|
@ -934,12 +944,19 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
|
|||
(VEX_4V << 3) |
|
||||
(EVEX_U << 2) |
|
||||
VEX_PP, CurByte, OS);
|
||||
EmitByte((EVEX_z << 7) |
|
||||
(EVEX_L2 << 6) |
|
||||
(VEX_L << 5) |
|
||||
(EVEX_b << 4) |
|
||||
(EVEX_V2 << 3) |
|
||||
EVEX_aaa, CurByte, OS);
|
||||
if (HasEVEX_RC)
|
||||
EmitByte((EVEX_z << 7) |
|
||||
(EVEX_rc << 5) |
|
||||
(EVEX_b << 4) |
|
||||
(EVEX_V2 << 3) |
|
||||
EVEX_aaa, CurByte, OS);
|
||||
else
|
||||
EmitByte((EVEX_z << 7) |
|
||||
(EVEX_L2 << 6) |
|
||||
(VEX_L << 5) |
|
||||
(EVEX_b << 4) |
|
||||
(EVEX_V2 << 3) |
|
||||
EVEX_aaa, CurByte, OS);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1206,7 +1223,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
|||
// It uses the EVEX.aaa field?
|
||||
bool HasEVEX = (TSFlags >> X86II::VEXShift) & X86II::EVEX;
|
||||
bool HasEVEX_K = HasEVEX && ((TSFlags >> X86II::VEXShift) & X86II::EVEX_K);
|
||||
|
||||
bool HasEVEX_B = HasEVEX && ((TSFlags >> X86II::VEXShift) & X86II::EVEX_B);
|
||||
|
||||
// Determine where the memory operand starts, if present.
|
||||
int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode);
|
||||
if (MemoryOperand != -1) MemoryOperand += CurOp;
|
||||
|
@ -1302,6 +1320,9 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
|||
CurOp = HasMemOp4 ? SrcRegNum : SrcRegNum + 1;
|
||||
if (HasVEX_4VOp3)
|
||||
++CurOp;
|
||||
// do not count the rounding control operand
|
||||
if (HasEVEX_B)
|
||||
NumOps--;
|
||||
break;
|
||||
|
||||
case X86II::MRMSrcMem: {
|
||||
|
|
|
@ -802,36 +802,42 @@ defm VPCMPUQZ : avx512_icmp_cc<0x1E, VK8, VR512, i512mem, memopv8i64,
|
|||
|
||||
// avx512_cmp_packed - sse 1 & 2 compare packed instructions
|
||||
multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
|
||||
X86MemOperand x86memop, Operand CC,
|
||||
SDNode OpNode, ValueType vt, string asm,
|
||||
string asm_alt, Domain d> {
|
||||
X86MemOperand x86memop, ValueType vt,
|
||||
string suffix, Domain d> {
|
||||
def rri : AVX512PIi8<0xC2, MRMSrcReg,
|
||||
(outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
|
||||
[(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>;
|
||||
(outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
|
||||
!strconcat("vcmp${cc}", suffix,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set KRC:$dst, (X86cmpm (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>;
|
||||
def rrib: AVX512PIi8<0xC2, MRMSrcReg,
|
||||
(outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc, i32imm:$sae),
|
||||
!strconcat("vcmp${cc}", suffix,
|
||||
"\t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"),
|
||||
[], d>, EVEX_B;
|
||||
def rmi : AVX512PIi8<0xC2, MRMSrcMem,
|
||||
(outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
|
||||
(outs KRC:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc),
|
||||
!strconcat("vcmp", suffix,
|
||||
"\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
|
||||
[(set KRC:$dst,
|
||||
(OpNode (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>;
|
||||
(X86cmpm (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>;
|
||||
|
||||
// Accept explicit immediate argument form instead of comparison code.
|
||||
let neverHasSideEffects = 1 in {
|
||||
def rri_alt : AVX512PIi8<0xC2, MRMSrcReg,
|
||||
(outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
|
||||
asm_alt, [], d>;
|
||||
!strconcat("vcmp", suffix,
|
||||
"\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
|
||||
def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem,
|
||||
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
|
||||
asm_alt, [], d>;
|
||||
!strconcat("vcmp", suffix,
|
||||
"\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
|
||||
}
|
||||
}
|
||||
|
||||
defm VCMPPSZ : avx512_cmp_packed<VK16, VR512, f512mem, AVXCC, X86cmpm, v16f32,
|
||||
"vcmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
"vcmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
|
||||
SSEPackedSingle>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VCMPPDZ : avx512_cmp_packed<VK8, VR512, f512mem, AVXCC, X86cmpm, v8f64,
|
||||
"vcmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
"vcmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
|
||||
SSEPackedDouble>, OpSize, EVEX_4V, VEX_W, EVEX_V512,
|
||||
defm VCMPPSZ : avx512_cmp_packed<VK16, VR512, f512mem, v16f32,
|
||||
"ps", SSEPackedSingle>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VCMPPDZ : avx512_cmp_packed<VK8, VR512, f512mem, v8f64,
|
||||
"pd", SSEPackedDouble>, OpSize, EVEX_4V, VEX_W, EVEX_V512,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
|
||||
def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
|
||||
|
@ -849,7 +855,31 @@ def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
|
|||
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
|
||||
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
|
||||
imm:$cc), VK8)>;
|
||||
|
||||
|
||||
def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
|
||||
(v16f32 VR512:$src2), imm:$cc, (i16 -1),
|
||||
FROUND_NO_EXC)),
|
||||
(COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2,
|
||||
(I8Imm imm:$cc), (i32 0)), GR16)>;
|
||||
|
||||
def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
|
||||
(v8f64 VR512:$src2), imm:$cc, (i8 -1),
|
||||
FROUND_NO_EXC)),
|
||||
(COPY_TO_REGCLASS (VCMPPDZrrib VR512:$src1, VR512:$src2,
|
||||
(I8Imm imm:$cc), (i32 0)), GR8)>;
|
||||
|
||||
def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1),
|
||||
(v16f32 VR512:$src2), imm:$cc, (i16 -1),
|
||||
FROUND_CURRENT)),
|
||||
(COPY_TO_REGCLASS (VCMPPSZrri VR512:$src1, VR512:$src2,
|
||||
(I8Imm imm:$cc)), GR16)>;
|
||||
|
||||
def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
|
||||
(v8f64 VR512:$src2), imm:$cc, (i8 -1),
|
||||
FROUND_CURRENT)),
|
||||
(COPY_TO_REGCLASS (VCMPPDZrri VR512:$src1, VR512:$src2,
|
||||
(I8Imm imm:$cc)), GR8)>;
|
||||
|
||||
// Mask register copy, including
|
||||
// - copy between mask registers
|
||||
// - load/store mask registers
|
||||
|
@ -2704,6 +2734,9 @@ let neverHasSideEffects = 1 in {
|
|||
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
|
||||
[(set DstRC:$dst,
|
||||
(OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
|
||||
def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
|
||||
!strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
|
||||
[], d>, EVEX, EVEX_B;
|
||||
let mayLoad = 1 in
|
||||
def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
|
||||
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
|
||||
|
@ -2712,6 +2745,24 @@ let neverHasSideEffects = 1 in {
|
|||
} // neverHasSideEffects = 1
|
||||
}
|
||||
|
||||
multiclass avx512_vcvtt_fp<bits<8> opc, string asm, RegisterClass SrcRC,
|
||||
RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
|
||||
X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
|
||||
Domain d> {
|
||||
let neverHasSideEffects = 1 in {
|
||||
def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
|
||||
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
|
||||
[(set DstRC:$dst,
|
||||
(OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
|
||||
let mayLoad = 1 in
|
||||
def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
|
||||
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
|
||||
[(set DstRC:$dst,
|
||||
(OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
|
||||
} // neverHasSideEffects = 1
|
||||
}
|
||||
|
||||
|
||||
defm VCVTPD2PSZ : avx512_vcvt_fp<0x5A, "vcvtpd2ps", VR512, VR256X, fround,
|
||||
memopv8f64, f512mem, v8f32, v8f64,
|
||||
SSEPackedSingle>, EVEX_V512, VEX_W, OpSize,
|
||||
|
@ -2736,26 +2787,36 @@ defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp,
|
|||
SSEPackedDouble>, EVEX_V512, XS,
|
||||
EVEX_CD8<32, CD8VH>;
|
||||
|
||||
defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint,
|
||||
defm VCVTTPS2DQZ : avx512_vcvtt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint,
|
||||
memopv16f32, f512mem, v16i32, v16f32,
|
||||
SSEPackedSingle>, EVEX_V512, XS,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
|
||||
defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint,
|
||||
defm VCVTTPD2DQZ : avx512_vcvtt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint,
|
||||
memopv8f64, f512mem, v8i32, v8f64,
|
||||
SSEPackedDouble>, EVEX_V512, OpSize, VEX_W,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
|
||||
defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint,
|
||||
defm VCVTTPS2UDQZ : avx512_vcvtt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint,
|
||||
memopv16f32, f512mem, v16i32, v16f32,
|
||||
SSEPackedSingle>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
|
||||
defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint,
|
||||
// cvttps2udq (src, 0, mask-all-ones, sae-current)
|
||||
def : Pat<(v16i32 (int_x86_avx512_mask_cvttps2udq_512 (v16f32 VR512:$src),
|
||||
(v16i32 immAllZerosV), (i16 -1), FROUND_CURRENT)),
|
||||
(VCVTTPS2UDQZrr VR512:$src)>;
|
||||
|
||||
defm VCVTTPD2UDQZ : avx512_vcvtt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint,
|
||||
memopv8f64, f512mem, v8i32, v8f64,
|
||||
SSEPackedDouble>, EVEX_V512, VEX_W,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
|
||||
// cvttpd2udq (src, 0, mask-all-ones, sae-current)
|
||||
def : Pat<(v8i32 (int_x86_avx512_mask_cvttpd2udq_512 (v8f64 VR512:$src),
|
||||
(v8i32 immAllZerosV), (i8 -1), FROUND_CURRENT)),
|
||||
(VCVTTPD2UDQZrr VR512:$src)>;
|
||||
|
||||
defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp,
|
||||
memopv4i64, f256mem, v8f64, v8i32,
|
||||
SSEPackedDouble>, EVEX_V512, XS,
|
||||
|
@ -2771,22 +2832,57 @@ def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
|
|||
(v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
|
||||
|
||||
|
||||
def : Pat<(int_x86_avx512_cvtdq2_ps_512 VR512:$src),
|
||||
(VCVTDQ2PSZrr VR512:$src)>;
|
||||
def : Pat<(int_x86_avx512_cvtdq2_ps_512 (bitconvert (memopv8i64 addr:$src))),
|
||||
(VCVTDQ2PSZrm addr:$src)>;
|
||||
def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src),
|
||||
(v16f32 immAllZerosV), (i16 -1), imm:$rc)),
|
||||
(VCVTDQ2PSZrrb VR512:$src, imm:$rc)>;
|
||||
|
||||
def VCVTPS2DQZrr : AVX512BI<0x5B, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
|
||||
"vcvtps2dq\t{$src, $dst|$dst, $src}",
|
||||
[(set VR512:$dst,
|
||||
(int_x86_avx512_cvt_ps2dq_512 VR512:$src))],
|
||||
IIC_SSE_CVT_PS_RR>, EVEX, EVEX_V512;
|
||||
def VCVTPS2DQZrm : AVX512BI<0x5B, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
|
||||
"vcvtps2dq\t{$src, $dst|$dst, $src}",
|
||||
[(set VR512:$dst,
|
||||
(int_x86_avx512_cvt_ps2dq_512 (memopv16f32 addr:$src)))],
|
||||
IIC_SSE_CVT_PS_RM>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
|
||||
multiclass avx512_vcvt_fp2int<bits<8> opc, string asm, RegisterClass SrcRC,
|
||||
RegisterClass DstRC, PatFrag mem_frag,
|
||||
X86MemOperand x86memop, Domain d> {
|
||||
let neverHasSideEffects = 1 in {
|
||||
def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
|
||||
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
|
||||
[], d>, EVEX;
|
||||
def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
|
||||
!strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
|
||||
[], d>, EVEX, EVEX_B;
|
||||
let mayLoad = 1 in
|
||||
def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
|
||||
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
|
||||
[], d>, EVEX;
|
||||
} // neverHasSideEffects = 1
|
||||
}
|
||||
|
||||
defm VCVTPS2DQZ : avx512_vcvt_fp2int<0x5B, "vcvtps2dq", VR512, VR512,
|
||||
memopv16f32, f512mem, SSEPackedSingle>, OpSize,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VCVTPD2DQZ : avx512_vcvt_fp2int<0xE6, "vcvtpd2dq", VR512, VR256X,
|
||||
memopv8f64, f512mem, SSEPackedDouble>, XD, VEX_W,
|
||||
EVEX_V512, EVEX_CD8<64, CD8VF>;
|
||||
|
||||
def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2dq_512 (v16f32 VR512:$src),
|
||||
(v16i32 immAllZerosV), (i16 -1), imm:$rc)),
|
||||
(VCVTPS2DQZrrb VR512:$src, imm:$rc)>;
|
||||
|
||||
def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2dq_512 (v8f64 VR512:$src),
|
||||
(v8i32 immAllZerosV), (i8 -1), imm:$rc)),
|
||||
(VCVTPD2DQZrrb VR512:$src, imm:$rc)>;
|
||||
|
||||
defm VCVTPS2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtps2udq", VR512, VR512,
|
||||
memopv16f32, f512mem, SSEPackedSingle>,
|
||||
EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
defm VCVTPD2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtpd2udq", VR512, VR256X,
|
||||
memopv8f64, f512mem, SSEPackedDouble>, VEX_W,
|
||||
EVEX_V512, EVEX_CD8<64, CD8VF>;
|
||||
|
||||
def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2udq_512 (v16f32 VR512:$src),
|
||||
(v16i32 immAllZerosV), (i16 -1), imm:$rc)),
|
||||
(VCVTPS2UDQZrrb VR512:$src, imm:$rc)>;
|
||||
|
||||
def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2udq_512 (v8f64 VR512:$src),
|
||||
(v8i32 immAllZerosV), (i8 -1), imm:$rc)),
|
||||
(VCVTPD2UDQZrrb VR512:$src, imm:$rc)>;
|
||||
|
||||
let Predicates = [HasAVX512] in {
|
||||
def : Pat<(v8f32 (fround (loadv8f64 addr:$src))),
|
||||
|
@ -3251,18 +3347,70 @@ let ExeDomain = GenericDomain in {
|
|||
} // ExeDomain = GenericDomain
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX512] in {
|
||||
defm VRNDSCALE : avx512_fp_binop_rm<0x0A, 0x0B, "vrndscale",
|
||||
int_x86_avx512_rndscale_ss,
|
||||
int_x86_avx512_rndscale_sd>, EVEX_4V;
|
||||
multiclass avx512_rndscale<bits<8> opc, string OpcodeStr,
|
||||
X86MemOperand x86memop, RegisterClass RC,
|
||||
PatFrag mem_frag, Domain d> {
|
||||
let ExeDomain = d in {
|
||||
// Intrinsic operation, reg.
|
||||
// Vector intrinsic operation, reg
|
||||
def r : AVX512AIi8<opc, MRMSrcReg,
|
||||
(outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[]>, EVEX;
|
||||
|
||||
defm VRNDSCALEZ : avx512_fp_unop_rm<0x08, 0x09, "vrndscale", f256mem, VR512,
|
||||
memopv16f32, memopv8f64,
|
||||
int_x86_avx512_rndscale_ps_512,
|
||||
int_x86_avx512_rndscale_pd_512, CD8VF>,
|
||||
EVEX, EVEX_V512;
|
||||
// Vector intrinsic operation, mem
|
||||
def m : AVX512AIi8<opc, MRMSrcMem,
|
||||
(outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[]>, EVEX;
|
||||
} // ExeDomain
|
||||
}
|
||||
|
||||
|
||||
defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512,
|
||||
memopv16f32, SSEPackedSingle>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
|
||||
def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1),
|
||||
imm:$src2, (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1),
|
||||
FROUND_CURRENT)),
|
||||
(VRNDSCALEPSZr VR512:$src1, imm:$src2)>;
|
||||
|
||||
|
||||
defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512,
|
||||
memopv8f64, SSEPackedDouble>, EVEX_V512,
|
||||
VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
|
||||
def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1),
|
||||
imm:$src2, (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1),
|
||||
FROUND_CURRENT)),
|
||||
(VRNDSCALEPDZr VR512:$src1, imm:$src2)>;
|
||||
|
||||
multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
|
||||
Operand x86memop, RegisterClass RC, Domain d> {
|
||||
let ExeDomain = d in {
|
||||
def r : AVX512AIi8<opc, MRMSrcReg,
|
||||
(outs RC:$dst), (ins RC:$src1, RC:$src2, i32i8imm:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[]>, EVEX_4V;
|
||||
|
||||
def m : AVX512AIi8<opc, MRMSrcMem,
|
||||
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, i32i8imm:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[]>, EVEX_4V;
|
||||
} // ExeDomain
|
||||
}
|
||||
|
||||
defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", ssmem, FR32X,
|
||||
SSEPackedSingle>, EVEX_CD8<32, CD8VT1>;
|
||||
|
||||
defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", sdmem, FR64X,
|
||||
SSEPackedDouble>, EVEX_CD8<64, CD8VT1>;
|
||||
|
||||
def : Pat<(ffloor FR32X:$src),
|
||||
(VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x1))>;
|
||||
def : Pat<(f64 (ffloor FR64X:$src)),
|
||||
|
@ -3285,26 +3433,26 @@ def : Pat<(f64 (ftrunc FR64X:$src)),
|
|||
(VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>;
|
||||
|
||||
def : Pat<(v16f32 (ffloor VR512:$src)),
|
||||
(VRNDSCALEZPSr VR512:$src, (i32 0x1))>;
|
||||
(VRNDSCALEPSZr VR512:$src, (i32 0x1))>;
|
||||
def : Pat<(v16f32 (fnearbyint VR512:$src)),
|
||||
(VRNDSCALEZPSr VR512:$src, (i32 0xC))>;
|
||||
(VRNDSCALEPSZr VR512:$src, (i32 0xC))>;
|
||||
def : Pat<(v16f32 (fceil VR512:$src)),
|
||||
(VRNDSCALEZPSr VR512:$src, (i32 0x2))>;
|
||||
(VRNDSCALEPSZr VR512:$src, (i32 0x2))>;
|
||||
def : Pat<(v16f32 (frint VR512:$src)),
|
||||
(VRNDSCALEZPSr VR512:$src, (i32 0x4))>;
|
||||
(VRNDSCALEPSZr VR512:$src, (i32 0x4))>;
|
||||
def : Pat<(v16f32 (ftrunc VR512:$src)),
|
||||
(VRNDSCALEZPSr VR512:$src, (i32 0x3))>;
|
||||
(VRNDSCALEPSZr VR512:$src, (i32 0x3))>;
|
||||
|
||||
def : Pat<(v8f64 (ffloor VR512:$src)),
|
||||
(VRNDSCALEZPDr VR512:$src, (i32 0x1))>;
|
||||
(VRNDSCALEPDZr VR512:$src, (i32 0x1))>;
|
||||
def : Pat<(v8f64 (fnearbyint VR512:$src)),
|
||||
(VRNDSCALEZPDr VR512:$src, (i32 0xC))>;
|
||||
(VRNDSCALEPDZr VR512:$src, (i32 0xC))>;
|
||||
def : Pat<(v8f64 (fceil VR512:$src)),
|
||||
(VRNDSCALEZPDr VR512:$src, (i32 0x2))>;
|
||||
(VRNDSCALEPDZr VR512:$src, (i32 0x2))>;
|
||||
def : Pat<(v8f64 (frint VR512:$src)),
|
||||
(VRNDSCALEZPDr VR512:$src, (i32 0x4))>;
|
||||
(VRNDSCALEPDZr VR512:$src, (i32 0x4))>;
|
||||
def : Pat<(v8f64 (ftrunc VR512:$src)),
|
||||
(VRNDSCALEZPDr VR512:$src, (i32 0x3))>;
|
||||
(VRNDSCALEPDZr VR512:$src, (i32 0x3))>;
|
||||
|
||||
//-------------------------------------------------
|
||||
// Integer truncate and extend operations
|
||||
|
|
|
@ -470,6 +470,8 @@ def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>;
|
|||
// 512-bit bitconvert pattern fragments
|
||||
def bc_v16i32 : PatFrag<(ops node:$in), (v16i32 (bitconvert node:$in))>;
|
||||
def bc_v8i64 : PatFrag<(ops node:$in), (v8i64 (bitconvert node:$in))>;
|
||||
def bc_v8f64 : PatFrag<(ops node:$in), (v8f64 (bitconvert node:$in))>;
|
||||
def bc_v16f32 : PatFrag<(ops node:$in), (v16f32 (bitconvert node:$in))>;
|
||||
|
||||
def vzmovl_v2i64 : PatFrag<(ops node:$src),
|
||||
(bitconvert (v2i64 (X86vzmovl
|
||||
|
@ -486,6 +488,14 @@ def fp32imm0 : PatLeaf<(f32 fpimm), [{
|
|||
return N->isExactlyValue(+0.0);
|
||||
}]>;
|
||||
|
||||
def I8Imm : SDNodeXForm<imm, [{
|
||||
// Transformation function: get the low 8 bits.
|
||||
return getI8Imm((uint8_t)N->getZExtValue());
|
||||
}]>;
|
||||
|
||||
def FROUND_NO_EXC : ImmLeaf<i32, [{ return Imm == 8; }]>;
|
||||
def FROUND_CURRENT : ImmLeaf<i32, [{ return Imm == 4; }]>;
|
||||
|
||||
// BYTE_imm - Transform bit immediates into byte immediates.
|
||||
def BYTE_imm : SDNodeXForm<imm, [{
|
||||
// Transformation function: imm >> 3
|
||||
|
|
|
@ -510,6 +510,10 @@ def GR32orGR64 : RegisterOperand<GR32> {
|
|||
let ParserMatchClass = X86GR32orGR64AsmOperand;
|
||||
}
|
||||
|
||||
def AVX512RC : Operand<i32> {
|
||||
let PrintMethod = "printRoundingControl";
|
||||
let OperandType = "OPERAND_IMMEDIATE";
|
||||
}
|
||||
// Sign-extended immediate classes. We don't need to define the full lattice
|
||||
// here because there is no instruction with an ambiguity between ImmSExti64i32
|
||||
// and ImmSExti32i8.
|
||||
|
|
|
@ -74,21 +74,21 @@ define <8 x double> @test_rcp28_pd_512(<8 x double> %a0) {
|
|||
}
|
||||
declare <8 x double> @llvm.x86.avx512.rcp28.pd.512(<8 x double>) nounwind readnone
|
||||
|
||||
define <8 x double> @test_rndscale_pd_512(<8 x double> %a0) {
|
||||
; CHECK: vrndscale
|
||||
%res = call <8 x double> @llvm.x86.avx512.rndscale.pd.512(<8 x double> %a0, i32 7) ; <<8 x double>> [#uses=1]
|
||||
ret <8 x double> %res
|
||||
declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
|
||||
|
||||
define <8 x double> @test7(<8 x double> %a) {
|
||||
; CHECK: vrndscalepd {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x0b]
|
||||
%res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> zeroinitializer, i8 -1, i32 4)
|
||||
ret <8 x double>%res
|
||||
}
|
||||
declare <8 x double> @llvm.x86.avx512.rndscale.pd.512(<8 x double>, i32) nounwind readnone
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
|
||||
|
||||
define <16 x float> @test_rndscale_ps_512(<16 x float> %a0) {
|
||||
; CHECK: vrndscale
|
||||
%res = call <16 x float> @llvm.x86.avx512.rndscale.ps.512(<16 x float> %a0, i32 7) ; <<16 x float>> [#uses=1]
|
||||
ret <16 x float> %res
|
||||
define <16 x float> @test8(<16 x float> %a) {
|
||||
; CHECK: vrndscaleps {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x0b]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> zeroinitializer, i16 -1, i32 4)
|
||||
ret <16 x float>%res
|
||||
}
|
||||
declare <16 x float> @llvm.x86.avx512.rndscale.ps.512(<16 x float>, i32) nounwind readnone
|
||||
|
||||
|
||||
define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
|
||||
; CHECK: vrsqrt14ps
|
||||
|
@ -420,3 +420,31 @@ define <8 x i64> @test_x86_mask_blend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2
|
|||
ret <8 x i64> %res
|
||||
}
|
||||
declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i1> %a0, <8 x i64> %a1, <8 x i64> %a2) nounwind readonly
|
||||
|
||||
define <8 x i32> @test_cvtpd2udq(<8 x double> %a) {
|
||||
;CHECK: vcvtpd2udq {ru-sae}{{.*}}encoding: [0x62,0xf1,0xfc,0x58,0x79,0xc0]
|
||||
%res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %a, <8 x i32>zeroinitializer, i8 -1, i32 2)
|
||||
ret <8 x i32>%res
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
|
||||
|
||||
define <16 x i32> @test_cvtps2udq(<16 x float> %a) {
|
||||
;CHECK: vcvtps2udq {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x38,0x79,0xc0]
|
||||
%res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %a, <16 x i32>zeroinitializer, i16 -1, i32 1)
|
||||
ret <16 x i32>%res
|
||||
}
|
||||
declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32)
|
||||
|
||||
define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
|
||||
;CHECK: vcmpleps {sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02]
|
||||
%res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
|
||||
ret i16 %res
|
||||
}
|
||||
declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
|
||||
|
||||
define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
|
||||
;CHECK: vcmpneqpd %zmm{{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04]
|
||||
%res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
|
||||
ret i8 %res
|
||||
}
|
||||
declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
|
|
@ -25,6 +25,49 @@
|
|||
using namespace llvm;
|
||||
using namespace X86Disassembler;
|
||||
|
||||
/// stringForContext - Returns a string containing the name of a particular
|
||||
/// InstructionContext, usually for diagnostic purposes.
|
||||
///
|
||||
/// @param insnContext - The instruction class to transform to a string.
|
||||
/// @return - A statically-allocated string constant that contains the
|
||||
/// name of the instruction class.
|
||||
static inline const char* stringForContext(InstructionContext insnContext) {
|
||||
switch (insnContext) {
|
||||
default:
|
||||
llvm_unreachable("Unhandled instruction class");
|
||||
#define ENUM_ENTRY(n, r, d) case n: return #n; break;
|
||||
#define ENUM_ENTRY_K_B(n, r, d) ENUM_ENTRY(n, r, d) ENUM_ENTRY(n##_K_B, r, d)\
|
||||
ENUM_ENTRY(n##_KZ, r, d) ENUM_ENTRY(n##_K, r, d) ENUM_ENTRY(n##_B, r, d)\
|
||||
ENUM_ENTRY(n##_KZ_B, r, d)
|
||||
INSTRUCTION_CONTEXTS
|
||||
#undef ENUM_ENTRY
|
||||
#undef ENUM_ENTRY_K_B
|
||||
}
|
||||
}
|
||||
|
||||
/// stringForOperandType - Like stringForContext, but for OperandTypes.
|
||||
static inline const char* stringForOperandType(OperandType type) {
|
||||
switch (type) {
|
||||
default:
|
||||
llvm_unreachable("Unhandled type");
|
||||
#define ENUM_ENTRY(i, d) case i: return #i;
|
||||
TYPES
|
||||
#undef ENUM_ENTRY
|
||||
}
|
||||
}
|
||||
|
||||
/// stringForOperandEncoding - like stringForContext, but for
|
||||
/// OperandEncodings.
|
||||
static inline const char* stringForOperandEncoding(OperandEncoding encoding) {
|
||||
switch (encoding) {
|
||||
default:
|
||||
llvm_unreachable("Unhandled encoding");
|
||||
#define ENUM_ENTRY(i, d) case i: return #i;
|
||||
ENCODINGS
|
||||
#undef ENUM_ENTRY
|
||||
}
|
||||
}
|
||||
|
||||
/// inheritsFrom - Indicates whether all instructions in one class also belong
|
||||
/// to another class.
|
||||
///
|
||||
|
@ -198,6 +241,8 @@ static inline bool inheritsFrom(InstructionContext child,
|
|||
case IC_EVEX_L2_K:
|
||||
case IC_EVEX_L2_B:
|
||||
case IC_EVEX_L2_XS_K:
|
||||
case IC_EVEX_L2_XS_B:
|
||||
case IC_EVEX_L2_XD_B:
|
||||
case IC_EVEX_L2_XD_K:
|
||||
case IC_EVEX_L2_OPSIZE_K:
|
||||
case IC_EVEX_L2_OPSIZE_B:
|
||||
|
@ -212,6 +257,7 @@ static inline bool inheritsFrom(InstructionContext child,
|
|||
case IC_EVEX_L2_W_B:
|
||||
case IC_EVEX_L2_W_XS_K:
|
||||
case IC_EVEX_L2_W_XD_K:
|
||||
case IC_EVEX_L2_W_XD_B:
|
||||
case IC_EVEX_L2_W_OPSIZE_K:
|
||||
case IC_EVEX_L2_W_OPSIZE_B:
|
||||
case IC_EVEX_L2_W_OPSIZE_K_B:
|
||||
|
@ -222,6 +268,8 @@ static inline bool inheritsFrom(InstructionContext child,
|
|||
case IC_EVEX_L2_W_OPSIZE_KZ_B:
|
||||
return false;
|
||||
default:
|
||||
errs() << "Unknown instruction class: " <<
|
||||
stringForContext((InstructionContext)parent) << "\n";
|
||||
llvm_unreachable("Unknown instruction class");
|
||||
}
|
||||
}
|
||||
|
@ -251,49 +299,6 @@ static inline bool outranks(InstructionContext upper,
|
|||
return (ranks[upper] > ranks[lower]);
|
||||
}
|
||||
|
||||
/// stringForContext - Returns a string containing the name of a particular
|
||||
/// InstructionContext, usually for diagnostic purposes.
|
||||
///
|
||||
/// @param insnContext - The instruction class to transform to a string.
|
||||
/// @return - A statically-allocated string constant that contains the
|
||||
/// name of the instruction class.
|
||||
static inline const char* stringForContext(InstructionContext insnContext) {
|
||||
switch (insnContext) {
|
||||
default:
|
||||
llvm_unreachable("Unhandled instruction class");
|
||||
#define ENUM_ENTRY(n, r, d) case n: return #n; break;
|
||||
#define ENUM_ENTRY_K_B(n, r, d) ENUM_ENTRY(n, r, d) ENUM_ENTRY(n##_K_B, r, d)\
|
||||
ENUM_ENTRY(n##_KZ, r, d) ENUM_ENTRY(n##_K, r, d) ENUM_ENTRY(n##_B, r, d)\
|
||||
ENUM_ENTRY(n##_KZ_B, r, d)
|
||||
INSTRUCTION_CONTEXTS
|
||||
#undef ENUM_ENTRY
|
||||
#undef ENUM_ENTRY_K_B
|
||||
}
|
||||
}
|
||||
|
||||
/// stringForOperandType - Like stringForContext, but for OperandTypes.
|
||||
static inline const char* stringForOperandType(OperandType type) {
|
||||
switch (type) {
|
||||
default:
|
||||
llvm_unreachable("Unhandled type");
|
||||
#define ENUM_ENTRY(i, d) case i: return #i;
|
||||
TYPES
|
||||
#undef ENUM_ENTRY
|
||||
}
|
||||
}
|
||||
|
||||
/// stringForOperandEncoding - like stringForContext, but for
|
||||
/// OperandEncodings.
|
||||
static inline const char* stringForOperandEncoding(OperandEncoding encoding) {
|
||||
switch (encoding) {
|
||||
default:
|
||||
llvm_unreachable("Unhandled encoding");
|
||||
#define ENUM_ENTRY(i, d) case i: return #i;
|
||||
ENCODINGS
|
||||
#undef ENUM_ENTRY
|
||||
}
|
||||
}
|
||||
|
||||
/// getDecisionType - Determines whether a ModRM decision with 255 entries can
|
||||
/// be compacted by eliminating redundant information.
|
||||
///
|
||||
|
|
|
@ -1256,6 +1256,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
|
|||
TYPE("i32imm_pcrel", TYPE_REL32)
|
||||
TYPE("SSECC", TYPE_IMM3)
|
||||
TYPE("AVXCC", TYPE_IMM5)
|
||||
TYPE("AVX512RC", TYPE_IMM32)
|
||||
TYPE("brtarget", TYPE_RELv)
|
||||
TYPE("uncondbrtarget", TYPE_RELv)
|
||||
TYPE("brtarget8", TYPE_REL8)
|
||||
|
@ -1313,6 +1314,7 @@ OperandEncoding RecognizableInstr::immediateEncodingFromString
|
|||
ENCODING("u32u8imm", ENCODING_IB)
|
||||
ENCODING("SSECC", ENCODING_IB)
|
||||
ENCODING("AVXCC", ENCODING_IB)
|
||||
ENCODING("AVX512RC", ENCODING_IB)
|
||||
ENCODING("i16imm", ENCODING_Iv)
|
||||
ENCODING("i16i8imm", ENCODING_IB)
|
||||
ENCODING("i32imm", ENCODING_Iv)
|
||||
|
|
Loading…
Reference in New Issue