diff --git a/llvm/include/llvm/IntrinsicsX86.td b/llvm/include/llvm/IntrinsicsX86.td index 70a7fce3112e..ca154edcbf21 100644 --- a/llvm/include/llvm/IntrinsicsX86.td +++ b/llvm/include/llvm/IntrinsicsX86.td @@ -141,10 +141,9 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse_prefetch : GCCBuiltin<"__builtin_ia32_prefetch">, Intrinsic<[llvm_ptr_ty, llvm_int_ty], [IntrWriteMem]>; - def int_x86_sse_movntq : GCCBuiltin<"__builtin_ia32_movntq">, - Intrinsic<[llvm_ptr_ty, llvm_v2i32_ty], [IntrWriteMem]>; - def int_x86_sse_movntps : GCCBuiltin<"__builtin_ia32_movntps">, - Intrinsic<[llvm_ptr_ty, llvm_v4f32_ty], [IntrWriteMem]>; + def int_x86_sse_movnt_ps : GCCBuiltin<"__builtin_ia32_movntps">, + Intrinsic<[llvm_void_ty, llvm_ptr_ty, + llvm_v4f32_ty], [IntrWriteMem]>; def int_x86_sse_sfence : GCCBuiltin<"__builtin_ia32_sfence">, Intrinsic<[llvm_void_ty], [IntrWriteMem]>; } @@ -281,6 +280,19 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". llvm_v2f64_ty], [IntrWriteMem]>; } +// Cacheability support ops +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_sse2_movnt_dq : GCCBuiltin<"__builtin_ia32_movntdq">, + Intrinsic<[llvm_void_ty, llvm_ptr_ty, + llvm_v2i64_ty], [IntrWriteMem]>; + def int_x86_sse2_movnt_pd : GCCBuiltin<"__builtin_ia32_movntpd">, + Intrinsic<[llvm_void_ty, llvm_ptr_ty, + llvm_v2f64_ty], [IntrWriteMem]>; + def int_x86_sse2_movnt_i : GCCBuiltin<"__builtin_ia32_movnti">, + Intrinsic<[llvm_void_ty, llvm_ptr_ty, + llvm_int_ty], [IntrWriteMem]>; +} + // Misc. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse2_packsswb_128 : GCCBuiltin<"__builtin_ia32_packsswb128">, @@ -296,6 +308,9 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_int_ty, llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_pmovmskb_128 : GCCBuiltin<"__builtin_ia32_pmovmskb128">, Intrinsic<[llvm_int_ty, llvm_v16i8_ty], [IntrNoMem]>; + def int_x86_sse2_maskmov_dqu : GCCBuiltin<"__builtin_ia32_maskmovdqu">, + Intrinsic<[llvm_void_ty, llvm_v16i8_ty, + llvm_v16i8_ty, llvm_ptr_ty], [IntrWriteMem]>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td index f44526d664b6..79440990fb9d 100644 --- a/llvm/lib/Target/X86/X86InstrMMX.td +++ b/llvm/lib/Target/X86/X86InstrMMX.td @@ -13,6 +13,13 @@ // //===----------------------------------------------------------------------===// +// Instruction templates +// MMXi8 - MMX instructions with ImmT == Imm8 and TB prefix. +class MMXIi8 o, Format F, dag ops, string asm, list pattern> + : X86Inst, TB, Requires<[HasMMX]> { + let Pattern = pattern; +} + // Some 'special' instructions def IMPLICIT_DEF_VR64 : I<0, Pseudo, (ops VR64:$dst), "#IMPLICIT_DEF $dst", @@ -50,3 +57,21 @@ def CVTTPS2PIrr: I<0x2C, MRMSrcReg, (ops VR64:$dst, VR128:$src), def CVTTPS2PIrm: I<0x2C, MRMSrcMem, (ops VR64:$dst, f64mem:$src), "cvttps2pi {$src, $dst|$dst, $src}", []>, TB, Requires<[HasMMX]>; + +// Shuffle and unpack instructions +def PSHUFWri : MMXIi8<0x70, MRMSrcReg, + (ops VR64:$dst, VR64:$src1, i8imm:$src2), + "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>; +def PSHUFWmi : MMXIi8<0x70, MRMSrcMem, + (ops VR64:$dst, i64mem:$src1, i8imm:$src2), + "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>; + +// Misc. +def MOVNTQ : I<0xE7, MRMDestMem, (ops i64mem:$dst, VR64:$src), + "movntq {$src, $dst|$dst, $src}", []>, TB, + Requires<[HasMMX]>; + +def MASKMOVQ : I<0xF7, MRMDestMem, (ops VR64:$src, VR64:$mask), + "maskmovq {$mask, $src|$src, $mask}", []>, TB, + Requires<[HasMMX]>; + diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 8a0f567d6a06..1ff7562a0296 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1321,13 +1321,6 @@ def PACKUSWBrm : PDI<0x67, MRMSrcReg, (ops VR128:$dst, VR128:$src1, } // Shuffle and unpack instructions -def PSHUFWri : PSIi8<0x70, MRMSrcReg, - (ops VR64:$dst, VR64:$src1, i8imm:$src2), - "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>; -def PSHUFWmi : PSIi8<0x70, MRMSrcMem, - (ops VR64:$dst, i64mem:$src1, i8imm:$src2), - "pshufw {$src2, $src1, $dst|$dst, $src1, $src2}", []>; - def PSHUFDri : PDIi8<0x70, MRMSrcReg, (ops VR128:$dst, VR128:$src1, i8imm:$src2), "pshufd {$src2, $src1, $dst|$dst, $src1, $src2}", @@ -1516,6 +1509,12 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (ops R32:$dst, VR128:$src), "pmovmskb {$src, $dst|$dst, $src}", [(set R32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>; +// Conditional store +def MASKMOVDQU : PDI<0xF7, RawFrm, (ops VR128:$src, VR128:$mask), + "maskmovdqu {$mask, $src|$src, $mask}", + [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, + Imp<[EDI],[]>; + // Prefetching loads def PREFETCHT0 : I<0x18, MRM1m, (ops i8mem:$src), "prefetcht0 $src", []>, TB, @@ -1531,15 +1530,19 @@ def PREFETCHTNTA : I<0x18, MRM0m, (ops i8mem:$src), Requires<[HasSSE1]>; // Non-temporal stores -def MOVNTQ : I<0xE7, MRMDestMem, (ops i64mem:$dst, VR64:$src), - "movntq {$src, $dst|$dst, $src}", []>, TB, - Requires<[HasSSE1]>; -def MOVNTPS : I<0x2B, MRMDestMem, (ops i128mem:$dst, VR128:$src), - "movntps {$src, $dst|$dst, $src}", []>, TB, - Requires<[HasSSE1]>; -def MASKMOVQ : I<0xF7, MRMDestMem, (ops i64mem:$dst, VR64:$src), - "maskmovq {$src, $dst|$dst, $src}", []>, TB, - Requires<[HasSSE1]>; +def MOVNTPSmr : PSI<0x2B, MRMDestMem, (ops i128mem:$dst, VR128:$src), + "movntps {$src, $dst|$dst, $src}", + [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>; +def MOVNTPDmr : PDI<0x2B, MRMDestMem, (ops i128mem:$dst, VR128:$src), + "movntpd {$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>; +def MOVNTDQmr : PDI<0xE7, MRMDestMem, (ops f128mem:$dst, VR128:$src), + "movntdq {$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>; +def MOVNTImr : I<0xC3, MRMDestMem, (ops i32mem:$dst, R32:$src), + "movnti {$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_i addr:$dst, R32:$src)]>, + TB, Requires<[HasSSE2]>; // Store fence def SFENCE : I<0xAE, MRM7m, (ops),