Add separate intrinsics for MMX / SSE shifts with i32 integer operands. This allow us to simplify the horribly complicated matching code.

llvm-svn: 50601
This commit is contained in:
Evan Cheng 2008-05-03 00:52:09 +00:00
parent a1ec89fbf1
commit cdf22f2953
5 changed files with 105 additions and 79 deletions

View File

@ -324,9 +324,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse2_psll_q : GCCBuiltin<"__builtin_ia32_psllq128">, def int_x86_sse2_psll_q : GCCBuiltin<"__builtin_ia32_psllq128">,
Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty,
llvm_v2i64_ty], [IntrNoMem]>; llvm_v2i64_ty], [IntrNoMem]>;
def int_x86_sse2_psll_dq : GCCBuiltin<"__builtin_ia32_pslldqi128">,
Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse2_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw128">, def int_x86_sse2_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw128">,
Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>; llvm_v8i16_ty], [IntrNoMem]>;
@ -336,15 +333,44 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse2_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq128">, def int_x86_sse2_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq128">,
Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty,
llvm_v2i64_ty], [IntrNoMem]>; llvm_v2i64_ty], [IntrNoMem]>;
def int_x86_sse2_psrl_dq : GCCBuiltin<"__builtin_ia32_psrldqi128">,
Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse2_psra_w : GCCBuiltin<"__builtin_ia32_psraw128">, def int_x86_sse2_psra_w : GCCBuiltin<"__builtin_ia32_psraw128">,
Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty, Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem]>; llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_sse2_psra_d : GCCBuiltin<"__builtin_ia32_psrad128">, def int_x86_sse2_psra_d : GCCBuiltin<"__builtin_ia32_psrad128">,
Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty, Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty,
llvm_v4i32_ty], [IntrNoMem]>; llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_sse2_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi128">,
Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse2_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi128">,
Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse2_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi128">,
Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse2_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi128">,
Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse2_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi128">,
Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse2_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi128">,
Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse2_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi128">,
Intrinsic<[llvm_v8i16_ty, llvm_v8i16_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse2_psrai_d : GCCBuiltin<"__builtin_ia32_psradi128">,
Intrinsic<[llvm_v4i32_ty, llvm_v4i32_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse2_psll_dq : GCCBuiltin<"__builtin_ia32_pslldqi128">,
Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_sse2_psrl_dq : GCCBuiltin<"__builtin_ia32_psrldqi128">,
Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty,
llvm_i32_ty], [IntrNoMem]>;
} }
// Integer comparison ops // Integer comparison ops
@ -939,6 +965,33 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_mmx_psra_d : GCCBuiltin<"__builtin_ia32_psrad">, def int_x86_mmx_psra_d : GCCBuiltin<"__builtin_ia32_psrad">,
Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty, Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty,
llvm_v1i64_ty], [IntrNoMem]>; llvm_v1i64_ty], [IntrNoMem]>;
def int_x86_mmx_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi">,
Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_mmx_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi">,
Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_mmx_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi">,
Intrinsic<[llvm_v1i64_ty, llvm_v1i64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_mmx_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi">,
Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_mmx_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi">,
Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_mmx_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi">,
Intrinsic<[llvm_v1i64_ty, llvm_v1i64_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_mmx_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi">,
Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
llvm_i32_ty], [IntrNoMem]>;
def int_x86_mmx_psrai_d : GCCBuiltin<"__builtin_ia32_psradi">,
Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty,
llvm_i32_ty], [IntrNoMem]>;
} }
// Pack ops. // Pack ops.

View File

@ -118,7 +118,8 @@ let isTwoAddress = 1 in {
} }
multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm, multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
string OpcodeStr, Intrinsic IntId> { string OpcodeStr, Intrinsic IntId,
Intrinsic IntId2> {
def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2), (ins VR64:$src1, VR64:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
@ -131,11 +132,7 @@ let isTwoAddress = 1 in {
def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst), def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst),
(ins VR64:$src1, i32i8imm:$src2), (ins VR64:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1, [(set VR64:$dst, (IntId2 VR64:$src1, (i32 imm:$src2)))]>;
(v1i64 (bitconvert
(v2i32 (vector_shuffle immAllZerosV,
(v2i32 (scalar_to_vector (i32 imm:$src2))),
MMX_MOVL_shuffle_mask))))))]>;
} }
} }
@ -283,23 +280,23 @@ let isTwoAddress = 1 in {
// Shift Instructions // Shift Instructions
defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
int_x86_mmx_psrl_w>; int_x86_mmx_psrl_w, int_x86_mmx_psrli_w>;
defm MMX_PSRLD : MMXI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld", defm MMX_PSRLD : MMXI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
int_x86_mmx_psrl_d>; int_x86_mmx_psrl_d, int_x86_mmx_psrli_d>;
defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq", defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
int_x86_mmx_psrl_q>; int_x86_mmx_psrl_q, int_x86_mmx_psrli_q>;
defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
int_x86_mmx_psll_w>; int_x86_mmx_psll_w, int_x86_mmx_pslli_w>;
defm MMX_PSLLD : MMXI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", defm MMX_PSLLD : MMXI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
int_x86_mmx_psll_d>; int_x86_mmx_psll_d, int_x86_mmx_pslli_d>;
defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq", defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
int_x86_mmx_psll_q>; int_x86_mmx_psll_q, int_x86_mmx_pslli_q>;
defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
int_x86_mmx_psra_w>; int_x86_mmx_psra_w, int_x86_mmx_psrai_w>;
defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
int_x86_mmx_psra_d>; int_x86_mmx_psra_d, int_x86_mmx_psrai_d>;
// Comparison Instructions // Comparison Instructions
defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b>; defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b>;

View File

@ -1780,6 +1780,21 @@ multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
(bitconvert (memopv2i64 addr:$src2))))]>; (bitconvert (memopv2i64 addr:$src2))))]>;
} }
multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
string OpcodeStr,
Intrinsic IntId, Intrinsic IntId2> {
def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId VR128:$src1,
(bitconvert (memopv2i64 addr:$src2))))]>;
def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
}
/// PDI_binop_rm - Simple SSE2 binary operator. /// PDI_binop_rm - Simple SSE2 binary operator.
multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT, bit Commutable = 0> { ValueType OpVT, bit Commutable = 0> {
@ -1854,64 +1869,24 @@ defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>;
defm PSADBW : PDI_binop_rm_int<0xE0, "psadbw", int_x86_sse2_psad_bw, 1>; defm PSADBW : PDI_binop_rm_int<0xE0, "psadbw", int_x86_sse2_psad_bw, 1>;
defm PSLLW : PDI_binop_rm_int<0xF1, "psllw", int_x86_sse2_psll_w>; defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
defm PSLLD : PDI_binop_rm_int<0xF2, "pslld", int_x86_sse2_psll_d>; int_x86_sse2_psll_w, int_x86_sse2_pslli_w>;
defm PSLLQ : PDI_binop_rm_int<0xF3, "psllq", int_x86_sse2_psll_q>; defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
int_x86_sse2_psll_d, int_x86_sse2_pslli_d>;
defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
int_x86_sse2_psll_q, int_x86_sse2_pslli_q>;
defm PSRLW : PDI_binop_rm_int<0xD1, "psrlw", int_x86_sse2_psrl_w>; defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
defm PSRLD : PDI_binop_rm_int<0xD2, "psrld", int_x86_sse2_psrl_d>; int_x86_sse2_psrl_w, int_x86_sse2_psrli_w>;
defm PSRLQ : PDI_binop_rm_int<0xD3, "psrlq", int_x86_sse2_psrl_q>; defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
int_x86_sse2_psrl_d, int_x86_sse2_psrli_d>;
defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x72, MRM2r, "psrlq",
int_x86_sse2_psrl_q, int_x86_sse2_psrli_q>;
defm PSRAW : PDI_binop_rm_int<0xE1, "psraw", int_x86_sse2_psra_w>; defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
defm PSRAD : PDI_binop_rm_int<0xE2, "psrad", int_x86_sse2_psra_d>; int_x86_sse2_psra_w, int_x86_sse2_psrai_w>;
defm PSRAD : PDI_binop_rmi_int<0xE2, 0x71, MRM4r, "psrad",
// Some immediate variants need to match a bit_convert. int_x86_sse2_psra_d, int_x86_sse2_psrai_d>;
let Constraints = "$src1 = $dst" in {
def PSLLWri : PDIi8<0x71, MRM6r, (outs VR128:$dst),
(ins VR128:$src1, i32i8imm:$src2),
"psllw\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse2_psll_w VR128:$src1,
(bc_v8i16 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>;
def PSLLDri : PDIi8<0x72, MRM6r, (outs VR128:$dst),
(ins VR128:$src1, i32i8imm:$src2),
"pslld\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse2_psll_d VR128:$src1,
(scalar_to_vector (i32 imm:$src2))))]>;
def PSLLQri : PDIi8<0x73, MRM6r, (outs VR128:$dst),
(ins VR128:$src1, i32i8imm:$src2),
"psllq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse2_psll_q VR128:$src1,
(bc_v2i64 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>;
def PSRLWri : PDIi8<0x71, MRM2r, (outs VR128:$dst),
(ins VR128:$src1, i32i8imm:$src2),
"psrlw\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse2_psrl_w VR128:$src1,
(bc_v8i16 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>;
def PSRLDri : PDIi8<0x72, MRM2r, (outs VR128:$dst),
(ins VR128:$src1, i32i8imm:$src2),
"psrld\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse2_psrl_d VR128:$src1,
(scalar_to_vector (i32 imm:$src2))))]>;
def PSRLQri : PDIi8<0x73, MRM2r, (outs VR128:$dst),
(ins VR128:$src1, i32i8imm:$src2),
"psrlq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse2_psrl_q VR128:$src1,
(bc_v2i64 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>;
def PSRAWri : PDIi8<0x71, MRM4r, (outs VR128:$dst),
(ins VR128:$src1, i32i8imm:$src2),
"psraw\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse2_psra_w VR128:$src1,
(bc_v8i16 (v4i32 (scalar_to_vector (i32 imm:$src2))))))]>;
def PSRADri : PDIi8<0x72, MRM4r, (outs VR128:$dst),
(ins VR128:$src1, i32i8imm:$src2),
"psrad\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse2_psra_d VR128:$src1,
(scalar_to_vector (i32 imm:$src2))))]>;
}
// PSRAQ doesn't exist in SSE[1-3].
// 128-bit logical shifts. // 128-bit logical shifts.
let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in { let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {

View File

@ -122,7 +122,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
if (Name.compare(5,10,"x86.mmx.ps",10) == 0 && if (Name.compare(5,10,"x86.mmx.ps",10) == 0 &&
(Name.compare(13,4,"psll", 4) == 0 || (Name.compare(13,4,"psll", 4) == 0 ||
Name.compare(13,4,"psra", 4) == 0 || Name.compare(13,4,"psra", 4) == 0 ||
Name.compare(13,4,"psrl", 4) == 0)) { Name.compare(13,4,"psrl", 4) == 0) && Name[17] != 'i') {
const llvm::Type *VT = VectorType::get(IntegerType::get(64), 1); const llvm::Type *VT = VectorType::get(IntegerType::get(64), 1);

View File

@ -1,14 +1,15 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep psllq | grep 32 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep psllq | grep 32
; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep psllq | grep 32
; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep psrad ; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep psrad
define i64 @t1(<1 x i64> %mm1) nounwind { define i64 @t1(<1 x i64> %mm1) nounwind {
entry: entry:
%tmp6 = tail call <1 x i64> @llvm.x86.mmx.psll.q( <1 x i64> %mm1, <1 x i64> <i64 32> ) ; <<1 x i64>> [#uses=1] %tmp6 = tail call <1 x i64> @llvm.x86.mmx.pslli.q( <1 x i64> %mm1, i32 32 ) ; <<1 x i64>> [#uses=1]
%retval1112 = bitcast <1 x i64> %tmp6 to i64 ; <i64> [#uses=1] %retval1112 = bitcast <1 x i64> %tmp6 to i64 ; <i64> [#uses=1]
ret i64 %retval1112 ret i64 %retval1112
} }
declare <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64>, <1 x i64>) nounwind readnone declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32) nounwind readnone
define i64 @t2(<2 x i32> %mm1, <2 x i32> %mm2) nounwind { define i64 @t2(<2 x i32> %mm1, <2 x i32> %mm2) nounwind {
entry: entry: