forked from OSchip/llvm-project
Implement sse4.2 string/text processing instructions:
Add patterns and instruction encoding information. Add custom lowering to deal with hardwired return register of uncertain type (xmm0). llvm-svn: 79377
This commit is contained in:
parent
7fb1616d60
commit
9fe912de5f
|
@ -7595,6 +7595,43 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
|
|||
return nextMBB;
|
||||
}
|
||||
|
||||
MachineBasicBlock *
|
||||
X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
|
||||
unsigned numArgs, bool memArg) const {
|
||||
|
||||
MachineFunction *F = BB->getParent();
|
||||
DebugLoc dl = MI->getDebugLoc();
|
||||
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
|
||||
|
||||
unsigned Opc;
|
||||
|
||||
if (memArg) {
|
||||
Opc = numArgs == 3 ?
|
||||
X86::PCMPISTRM128rm :
|
||||
X86::PCMPESTRM128rm;
|
||||
} else {
|
||||
Opc = numArgs == 3 ?
|
||||
X86::PCMPISTRM128rr :
|
||||
X86::PCMPESTRM128rr;
|
||||
}
|
||||
|
||||
MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(Opc));
|
||||
|
||||
for (unsigned i = 0; i < numArgs; ++i) {
|
||||
MachineOperand &Op = MI->getOperand(i+1);
|
||||
|
||||
if (!(Op.isReg() && Op.isImplicit()))
|
||||
MIB.addOperand(Op);
|
||||
}
|
||||
|
||||
BuildMI(BB, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg())
|
||||
.addReg(X86::XMM0);
|
||||
|
||||
F->DeleteMachineInstr(MI);
|
||||
|
||||
return BB;
|
||||
}
|
||||
|
||||
MachineBasicBlock *
|
||||
X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
|
||||
MachineInstr *MI,
|
||||
|
@ -7804,6 +7841,17 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
|
|||
F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
|
||||
return BB;
|
||||
}
|
||||
// String/text processing lowering.
|
||||
case X86::PCMPISTRM128REG:
|
||||
return EmitPCMP(MI, BB, 3, false /* in-mem */);
|
||||
case X86::PCMPISTRM128MEM:
|
||||
return EmitPCMP(MI, BB, 3, true /* in-mem */);
|
||||
case X86::PCMPESTRM128REG:
|
||||
return EmitPCMP(MI, BB, 5, false /* in mem */);
|
||||
case X86::PCMPESTRM128MEM:
|
||||
return EmitPCMP(MI, BB, 5, true /* in mem */);
|
||||
|
||||
// Atomic Lowering.
|
||||
case X86::ATOMAND32:
|
||||
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
|
||||
X86::AND32ri, X86::MOV32rm,
|
||||
|
|
|
@ -693,6 +693,14 @@ namespace llvm {
|
|||
const Value *DstSV, uint64_t DstSVOff,
|
||||
const Value *SrcSV, uint64_t SrcSVOff);
|
||||
|
||||
/// Utility function to emit string processing sse4.2 instructions
|
||||
/// that return in xmm0.
|
||||
// This takes the instruction to expand, the associated machine basic
|
||||
// block, the number of args, and whether or not the second arg is
|
||||
// in memory or not.
|
||||
MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB,
|
||||
unsigned argNum, bool inMem) const;
|
||||
|
||||
/// Utility function to emit atomic bitwise operations (and, or, xor).
|
||||
// It takes the bitwise instruction to expand, the associated machine basic
|
||||
// block, and the associated X86 opcodes for reg/reg and reg/imm.
|
||||
|
|
|
@ -235,6 +235,11 @@ class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
|||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern>, TF, Requires<[HasSSE42]>;
|
||||
|
||||
// SS42AI = SSE 4.2 instructions with TA prefix
|
||||
class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE42]>;
|
||||
|
||||
// X86-64 Instruction templates...
|
||||
//
|
||||
|
||||
|
@ -288,4 +293,3 @@ class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> patter
|
|||
: Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX]>;
|
||||
class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>;
|
||||
|
||||
|
|
|
@ -3657,6 +3657,11 @@ def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
|||
"movntdqa\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SSE4.2 Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
|
||||
|
@ -3739,3 +3744,115 @@ let Constraints = "$src1 = $dst" in {
|
|||
(int_x86_sse42_crc32_64 GR64:$src1, GR64:$src2))]>,
|
||||
OpSize, REX_W;
|
||||
}
|
||||
|
||||
// String/text processing instructions.
|
||||
let Defs = [EFLAGS], usesCustomDAGSchedInserter = 1 in {
|
||||
def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
|
||||
"#PCMPISTRM128rr PSEUDO!",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
|
||||
imm:$src3))]>, OpSize;
|
||||
def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
|
||||
"#PCMPISTRM128rm PSEUDO!",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_sse42_pcmpistrm128 VR128:$src1,
|
||||
(load addr:$src2),
|
||||
imm:$src3))]>, OpSize;
|
||||
}
|
||||
|
||||
let Defs = [XMM0, EFLAGS] in {
|
||||
def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
|
||||
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
|
||||
"pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}",
|
||||
[]>, OpSize;
|
||||
def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
|
||||
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
|
||||
"pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}",
|
||||
[]>, OpSize;
|
||||
}
|
||||
|
||||
let Defs = [EFLAGS], Uses = [EAX, EDX],
|
||||
usesCustomDAGSchedInserter = 1 in {
|
||||
def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
|
||||
"#PCMPESTRM128rr PSEUDO!",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_sse42_pcmpestrm128 VR128:$src1, EAX,
|
||||
VR128:$src3,
|
||||
EDX, imm:$src5))]>, OpSize;
|
||||
def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
|
||||
"#PCMPESTRM128rm PSEUDO!",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_sse42_pcmpestrm128 VR128:$src1, EAX,
|
||||
(load addr:$src3),
|
||||
EDX, imm:$src5))]>, OpSize;
|
||||
}
|
||||
|
||||
let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
|
||||
def PCMPESTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
|
||||
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
|
||||
"pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}",
|
||||
[]>, OpSize;
|
||||
def PCMPESTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
|
||||
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
|
||||
"pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}",
|
||||
[]>, OpSize;
|
||||
}
|
||||
|
||||
let Defs = [ECX, EFLAGS] in {
|
||||
multiclass SS42AI_pcmpistri<Intrinsic IntId128> {
|
||||
def rr : SS42AI<0x63, MRMSrcReg, (outs),
|
||||
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
|
||||
"pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}",
|
||||
[(set ECX,
|
||||
(IntId128 VR128:$src1, VR128:$src2, imm:$src3)),
|
||||
(implicit EFLAGS)]>,
|
||||
OpSize;
|
||||
def rm : SS42AI<0x63, MRMSrcMem, (outs),
|
||||
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
|
||||
"pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}",
|
||||
[(set ECX,
|
||||
(IntId128 VR128:$src1, (load addr:$src2), imm:$src3)),
|
||||
(implicit EFLAGS)]>,
|
||||
OpSize;
|
||||
}
|
||||
}
|
||||
|
||||
defm PCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>;
|
||||
defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>;
|
||||
defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>;
|
||||
defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>;
|
||||
defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>;
|
||||
defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>;
|
||||
|
||||
let Defs = [ECX, EFLAGS] in {
|
||||
let Uses = [EAX, EDX] in {
|
||||
multiclass SS42AI_pcmpestri<Intrinsic IntId128> {
|
||||
def rr : SS42AI<0x61, MRMSrcReg, (outs),
|
||||
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
|
||||
"pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}",
|
||||
[(set ECX,
|
||||
(IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)),
|
||||
(implicit EFLAGS)]>,
|
||||
OpSize;
|
||||
def rm : SS42AI<0x61, MRMSrcMem, (outs),
|
||||
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
|
||||
"pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}",
|
||||
[(set ECX,
|
||||
(IntId128 VR128:$src1, EAX, (load addr:$src3),
|
||||
EDX, imm:$src5)),
|
||||
(implicit EFLAGS)]>,
|
||||
OpSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
defm PCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>;
|
||||
defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>;
|
||||
defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>;
|
||||
defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>;
|
||||
defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>;
|
||||
defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>;
|
||||
|
|
Loading…
Reference in New Issue