forked from OSchip/llvm-project
[AVX512] Implement missing patterns for any_extend load lowering.
Differential Revision: http://reviews.llvm.org/D20513 llvm-svn: 270357
This commit is contained in:
parent
5f3fef884f
commit
2ba64ab9ae
|
@ -1132,19 +1132,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
for (MVT VT : MVT::fp_vector_valuetypes())
|
||||
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
|
||||
|
||||
setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i8, Legal);
|
||||
setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i8, Legal);
|
||||
setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i16, Legal);
|
||||
setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i16, Legal);
|
||||
setLoadExtAction(ISD::ZEXTLOAD, MVT::v32i16, MVT::v32i8, Legal);
|
||||
setLoadExtAction(ISD::SEXTLOAD, MVT::v32i16, MVT::v32i8, Legal);
|
||||
setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i8, Legal);
|
||||
setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i8, Legal);
|
||||
setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i16, Legal);
|
||||
setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i16, Legal);
|
||||
setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i32, Legal);
|
||||
setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i32, Legal);
|
||||
|
||||
for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD, ISD::EXTLOAD}) {
|
||||
setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
|
||||
setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
|
||||
setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
|
||||
setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
|
||||
setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
|
||||
setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
|
||||
}
|
||||
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
|
||||
setOperationAction(ISD::SETCC, MVT::i1, Custom);
|
||||
setOperationAction(ISD::SETCCE, MVT::i1, Custom);
|
||||
|
@ -1246,7 +1241,20 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Custom);
|
||||
setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Custom);
|
||||
|
||||
// FIXME. This commands are available on SSE/AVX2, add relevant patterns.
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v8i32, MVT::v8i16, Legal);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Legal);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i8, Legal);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i16, Legal);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i32, Legal);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i8, Legal);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
|
||||
}
|
||||
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
|
||||
|
@ -1433,6 +1441,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
|
||||
setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
|
||||
setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
|
||||
setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
|
||||
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
|
||||
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
|
||||
setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
|
||||
|
@ -1493,6 +1502,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationPromotedToType(ISD::OR, VT, MVT::v8i64);
|
||||
setOperationPromotedToType(ISD::XOR, VT, MVT::v8i64);
|
||||
}
|
||||
|
||||
for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD, ISD::EXTLOAD}) {
|
||||
setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
|
||||
if (Subtarget.hasVLX()) {
|
||||
// FIXME. This commands are available on SSE/AVX2, add relevant patterns.
|
||||
setLoadExtAction(ExtType, MVT::v16i16, MVT::v16i8, Legal);
|
||||
setLoadExtAction(ExtType, MVT::v8i16, MVT::v8i8, Legal);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
|
||||
|
|
|
@ -6380,9 +6380,10 @@ def: Pat<(v16i8 (X86vtrunc (v16i16 VR256X:$src))),
|
|||
}
|
||||
|
||||
multiclass avx512_extend_common<bits<8> opc, string OpcodeStr,
|
||||
X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
|
||||
X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
|
||||
|
||||
X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
|
||||
X86MemOperand x86memop, PatFrag LdFrag, SDPatternOperator OpNode,
|
||||
bit IsCodeGenOnly>{
|
||||
let isCodeGenOnly = IsCodeGenOnly in {
|
||||
defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
|
||||
(ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
|
||||
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
|
||||
|
@ -6394,145 +6395,159 @@ multiclass avx512_extend_common<bits<8> opc, string OpcodeStr,
|
|||
(DestInfo.VT (LdFrag addr:$src))>,
|
||||
EVEX;
|
||||
}
|
||||
}//isCodeGenOnly
|
||||
}
|
||||
|
||||
// support full register inputs (like SSE paterns)
|
||||
multiclass avx512_extend_lowering<SDNode OpNode, X86VectorVTInfo To,
|
||||
multiclass avx512_extend_lowering<SDPatternOperator OpNode, X86VectorVTInfo To,
|
||||
X86VectorVTInfo From, SubRegIndex SubRegIdx> {
|
||||
def : Pat<(To.VT (OpNode (From.VT From.RC:$src))),
|
||||
(!cast<Instruction>(NAME#To.ZSuffix#"rr")
|
||||
(EXTRACT_SUBREG From.RC:$src, SubRegIdx))>;
|
||||
}
|
||||
|
||||
multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr,
|
||||
SDPatternOperator OpNode, bit IsCodeGenOnly,
|
||||
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
|
||||
let Predicates = [HasVLX, HasBWI] in {
|
||||
defm Z128: avx512_extend_common<opc, OpcodeStr, v8i16x_info,
|
||||
v16i8x_info, i64mem, LdFrag, OpNode>,
|
||||
v16i8x_info, i64mem, LdFrag, OpNode, IsCodeGenOnly>,
|
||||
EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128;
|
||||
|
||||
defm Z256: avx512_extend_common<opc, OpcodeStr, v16i16x_info,
|
||||
v16i8x_info, i128mem, LdFrag, OpNode>,
|
||||
v16i8x_info, i128mem, LdFrag, OpNode, IsCodeGenOnly>,
|
||||
avx512_extend_lowering<OpNode, v16i16x_info, v32i8x_info, sub_xmm>,
|
||||
EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256;
|
||||
}
|
||||
let Predicates = [HasBWI] in {
|
||||
defm Z : avx512_extend_common<opc, OpcodeStr, v32i16_info,
|
||||
v32i8x_info, i256mem, LdFrag, OpNode>,
|
||||
v32i8x_info, i256mem, LdFrag, OpNode, IsCodeGenOnly>,
|
||||
EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_extend_BD<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
multiclass avx512_extend_BD<bits<8> opc, string OpcodeStr,
|
||||
SDPatternOperator OpNode, bit IsCodeGenOnly,
|
||||
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
|
||||
let Predicates = [HasVLX, HasAVX512] in {
|
||||
defm Z128: avx512_extend_common<opc, OpcodeStr, v4i32x_info,
|
||||
v16i8x_info, i32mem, LdFrag, OpNode>,
|
||||
v16i8x_info, i32mem, LdFrag, OpNode, IsCodeGenOnly>,
|
||||
EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128;
|
||||
|
||||
defm Z256: avx512_extend_common<opc, OpcodeStr, v8i32x_info,
|
||||
v16i8x_info, i64mem, LdFrag, OpNode>,
|
||||
v16i8x_info, i64mem, LdFrag, OpNode, IsCodeGenOnly>,
|
||||
avx512_extend_lowering<OpNode, v8i32x_info, v32i8x_info, sub_xmm>,
|
||||
EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256;
|
||||
}
|
||||
let Predicates = [HasAVX512] in {
|
||||
defm Z : avx512_extend_common<opc, OpcodeStr, v16i32_info,
|
||||
v16i8x_info, i128mem, LdFrag, OpNode>,
|
||||
v16i8x_info, i128mem, LdFrag, OpNode, IsCodeGenOnly>,
|
||||
EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_extend_BQ<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
multiclass avx512_extend_BQ<bits<8> opc, string OpcodeStr,
|
||||
SDPatternOperator OpNode, bit IsCodeGenOnly,
|
||||
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
|
||||
let Predicates = [HasVLX, HasAVX512] in {
|
||||
defm Z128: avx512_extend_common<opc, OpcodeStr, v2i64x_info,
|
||||
v16i8x_info, i16mem, LdFrag, OpNode>,
|
||||
v16i8x_info, i16mem, LdFrag, OpNode, IsCodeGenOnly>,
|
||||
EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128;
|
||||
|
||||
defm Z256: avx512_extend_common<opc, OpcodeStr, v4i64x_info,
|
||||
v16i8x_info, i32mem, LdFrag, OpNode>,
|
||||
v16i8x_info, i32mem, LdFrag, OpNode, IsCodeGenOnly>,
|
||||
avx512_extend_lowering<OpNode, v4i64x_info, v32i8x_info, sub_xmm>,
|
||||
EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256;
|
||||
}
|
||||
let Predicates = [HasAVX512] in {
|
||||
defm Z : avx512_extend_common<opc, OpcodeStr, v8i64_info,
|
||||
v16i8x_info, i64mem, LdFrag, OpNode>,
|
||||
v16i8x_info, i64mem, LdFrag, OpNode, IsCodeGenOnly>,
|
||||
EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_extend_WD<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
multiclass avx512_extend_WD<bits<8> opc, string OpcodeStr,
|
||||
SDPatternOperator OpNode, bit IsCodeGenOnly,
|
||||
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
|
||||
let Predicates = [HasVLX, HasAVX512] in {
|
||||
defm Z128: avx512_extend_common<opc, OpcodeStr, v4i32x_info,
|
||||
v8i16x_info, i64mem, LdFrag, OpNode>,
|
||||
v8i16x_info, i64mem, LdFrag, OpNode, IsCodeGenOnly>,
|
||||
EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128;
|
||||
|
||||
defm Z256: avx512_extend_common<opc, OpcodeStr, v8i32x_info,
|
||||
v8i16x_info, i128mem, LdFrag, OpNode>,
|
||||
v8i16x_info, i128mem, LdFrag, OpNode, IsCodeGenOnly>,
|
||||
avx512_extend_lowering<OpNode, v8i32x_info, v16i16x_info, sub_xmm>,
|
||||
EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256;
|
||||
}
|
||||
let Predicates = [HasAVX512] in {
|
||||
defm Z : avx512_extend_common<opc, OpcodeStr, v16i32_info,
|
||||
v16i16x_info, i256mem, LdFrag, OpNode>,
|
||||
v16i16x_info, i256mem, LdFrag, OpNode, IsCodeGenOnly>,
|
||||
EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_extend_WQ<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
multiclass avx512_extend_WQ<bits<8> opc, string OpcodeStr,
|
||||
SDPatternOperator OpNode, bit IsCodeGenOnly,
|
||||
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
|
||||
let Predicates = [HasVLX, HasAVX512] in {
|
||||
defm Z128: avx512_extend_common<opc, OpcodeStr, v2i64x_info,
|
||||
v8i16x_info, i32mem, LdFrag, OpNode>,
|
||||
v8i16x_info, i32mem, LdFrag, OpNode, IsCodeGenOnly>,
|
||||
EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128;
|
||||
|
||||
defm Z256: avx512_extend_common<opc, OpcodeStr, v4i64x_info,
|
||||
v8i16x_info, i64mem, LdFrag, OpNode>,
|
||||
v8i16x_info, i64mem, LdFrag, OpNode, IsCodeGenOnly>,
|
||||
avx512_extend_lowering<OpNode, v4i64x_info, v16i16x_info, sub_xmm>,
|
||||
EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256;
|
||||
}
|
||||
let Predicates = [HasAVX512] in {
|
||||
defm Z : avx512_extend_common<opc, OpcodeStr, v8i64_info,
|
||||
v8i16x_info, i128mem, LdFrag, OpNode>,
|
||||
v8i16x_info, i128mem, LdFrag, OpNode, IsCodeGenOnly>,
|
||||
EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_extend_DQ<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
multiclass avx512_extend_DQ<bits<8> opc, string OpcodeStr,
|
||||
SDPatternOperator OpNode, bit IsCodeGenOnly,
|
||||
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
|
||||
|
||||
let Predicates = [HasVLX, HasAVX512] in {
|
||||
defm Z128: avx512_extend_common<opc, OpcodeStr, v2i64x_info,
|
||||
v4i32x_info, i64mem, LdFrag, OpNode>,
|
||||
v4i32x_info, i64mem, LdFrag, OpNode, IsCodeGenOnly>,
|
||||
EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
|
||||
|
||||
defm Z256: avx512_extend_common<opc, OpcodeStr, v4i64x_info,
|
||||
v4i32x_info, i128mem, LdFrag, OpNode>,
|
||||
v4i32x_info, i128mem, LdFrag, OpNode, IsCodeGenOnly>,
|
||||
avx512_extend_lowering<OpNode, v4i64x_info, v8i32x_info, sub_xmm>,
|
||||
EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
|
||||
}
|
||||
let Predicates = [HasAVX512] in {
|
||||
defm Z : avx512_extend_common<opc, OpcodeStr, v8i64_info,
|
||||
v8i32x_info, i256mem, LdFrag, OpNode>,
|
||||
v8i32x_info, i256mem, LdFrag, OpNode, IsCodeGenOnly>,
|
||||
EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
|
||||
}
|
||||
}
|
||||
|
||||
defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, "z">;
|
||||
defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, "z">;
|
||||
defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, "z">;
|
||||
defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, "z">;
|
||||
defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, "z">;
|
||||
defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, "z">;
|
||||
defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, 0, "z">;
|
||||
defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, 0, "z">;
|
||||
defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, 0, "z">;
|
||||
defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, 0, "z">;
|
||||
defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, 0, "z">;
|
||||
defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, 0, "z">;
|
||||
|
||||
defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, 0, "s">;
|
||||
defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, 0, "s">;
|
||||
defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, 0, "s">;
|
||||
defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, 0, "s">;
|
||||
defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, 0, "s">;
|
||||
defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, 0, "s">;
|
||||
|
||||
defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, "s">;
|
||||
defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, "s">;
|
||||
defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, "s">;
|
||||
defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, "s">;
|
||||
defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, "s">;
|
||||
defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, "s">;
|
||||
// EXTLOAD patterns, implemented using vpmovz
|
||||
defm VPMOVAXBW : avx512_extend_BW<0x30, "vpmovzxbw", null_frag, 1, "">;
|
||||
defm VPMOVAXBD : avx512_extend_BD<0x31, "vpmovzxbd", null_frag, 1, "">;
|
||||
defm VPMOVAXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", null_frag, 1, "">;
|
||||
defm VPMOVAXWD : avx512_extend_WD<0x33, "vpmovzxwd", null_frag, 1, "">;
|
||||
defm VPMOVAXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", null_frag, 1, "">;
|
||||
defm VPMOVAXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", null_frag, 1, "">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// GATHER - SCATTER Operations
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gn -mcpu=knl | FileCheck %s --check-prefix=ALL --check-prefix=KNL
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gn -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=SKX
|
||||
|
||||
|
||||
define void @any_extend_load_v8i64(<8 x i8> * %ptr) {
|
||||
; ALL-LABEL: any_extend_load_v8i64:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vpmovzxbq (%rdi), %zmm0
|
||||
; ALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; ALL-NEXT: vpmovqb %zmm0, (%rdi)
|
||||
; ALL-NEXT: retq
|
||||
%wide.load = load <8 x i8>, <8 x i8>* %ptr, align 1
|
||||
%1 = zext <8 x i8> %wide.load to <8 x i64>
|
||||
%2 = add nuw nsw <8 x i64> %1, <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4>
|
||||
%3 = xor <8 x i64> %2, zeroinitializer
|
||||
%4 = trunc <8 x i64> %3 to <8 x i8>
|
||||
store <8 x i8> %4, <8 x i8>* %ptr, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @any_extend_load_v8i32(<8 x i8> * %ptr) {
|
||||
; KNL-LABEL: any_extend_load_v8i32:
|
||||
; KNL: # BB#0:
|
||||
; KNL-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
|
||||
; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
|
||||
; KNL-NEXT: vpaddd %ymm1, %ymm0, %ymm0
|
||||
; KNL-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
|
||||
; KNL-NEXT: vmovq %xmm0, (%rdi)
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: any_extend_load_v8i32:
|
||||
; SKX: # BB#0:
|
||||
; SKX-NEXT: vpmovzxbd (%rdi), %ymm0
|
||||
; SKX-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0
|
||||
; SKX-NEXT: vpmovdb %ymm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
%wide.load = load <8 x i8>, <8 x i8>* %ptr, align 1
|
||||
%1 = zext <8 x i8> %wide.load to <8 x i32>
|
||||
%2 = add nuw nsw <8 x i32> %1, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
|
||||
%3 = xor <8 x i32> %2, zeroinitializer
|
||||
%4 = trunc <8 x i32> %3 to <8 x i8>
|
||||
store <8 x i8> %4, <8 x i8>* %ptr, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @any_extend_load_v8i16(<8 x i8> * %ptr) {
|
||||
; KNL-LABEL: any_extend_load_v8i16:
|
||||
; KNL: # BB#0:
|
||||
; KNL-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
|
||||
; KNL-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0
|
||||
; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
|
||||
; KNL-NEXT: vmovq %xmm0, (%rdi)
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: any_extend_load_v8i16:
|
||||
; SKX: # BB#0:
|
||||
; SKX-NEXT: vpmovzxbw (%rdi), %xmm0
|
||||
; SKX-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; SKX-NEXT: vpmovwb %xmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
%wide.load = load <8 x i8>, <8 x i8>* %ptr, align 1
|
||||
%1 = zext <8 x i8> %wide.load to <8 x i16>
|
||||
%2 = add nuw nsw <8 x i16> %1, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
|
||||
%3 = xor <8 x i16> %2, zeroinitializer
|
||||
%4 = trunc <8 x i16> %3 to <8 x i8>
|
||||
store <8 x i8> %4, <8 x i8>* %ptr, align 1
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue