ptx: support setp's 4-operand format

llvm-svn: 128767
This commit is contained in:
Che-Liang Chiou 2011-04-02 08:51:39 +00:00
parent ca26e0acbb
commit e34b271718
3 changed files with 111 additions and 35 deletions

View File

@ -130,42 +130,37 @@ PredicateInstruction(MachineInstr *MI,
bool PTXInstrInfo::
SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
const SmallVectorImpl<MachineOperand> &Pred2) const {
// TODO Implement SubsumesPredicate
// Returns true if the first specified predicate subsumes the second,
// e.g. GE subsumes GT.
return false;
const MachineOperand &PredReg1 = Pred1[0];
const MachineOperand &PredReg2 = Pred2[0];
if (PredReg1.getReg() != PredReg2.getReg())
return false;
const MachineOperand &PredOp1 = Pred1[1];
const MachineOperand &PredOp2 = Pred2[1];
if (PredOp1.getImm() != PredOp2.getImm())
return false;
return true;
}
bool PTXInstrInfo::
DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const {
// TODO Implement DefinesPredicate
// If the specified instruction defines any predicate or condition code
// register(s) used for predication, returns true as well as the definition
// predicate(s) by reference.
// If an instruction sets a predicate register, it defines a predicate.
switch (MI->getOpcode()) {
default:
// TODO supprot 5-operand format of setp instruction
if (MI->getNumOperands() < 1)
return false;
case PTX::SETPEQu32rr:
case PTX::SETPEQu32ri:
case PTX::SETPNEu32rr:
case PTX::SETPNEu32ri:
case PTX::SETPLTu32rr:
case PTX::SETPLTu32ri:
case PTX::SETPLEu32rr:
case PTX::SETPLEu32ri:
case PTX::SETPGTu32rr:
case PTX::SETPGTu32ri:
case PTX::SETPGEu32rr:
case PTX::SETPGEu32ri: {
const MachineOperand &MO = MI->getOperand(0);
assert(MO.isReg() && RI.getRegClass(MO.getReg()) == &PTX::PredsRegClass);
Pred.push_back(MO);
Pred.push_back(MachineOperand::CreateImm(PTX::PRED_NORMAL));
return true;
}
}
const MachineOperand &MO = MI->getOperand(0);
if (!MO.isReg() || RI.getRegClass(MO.getReg()) != &PTX::PredsRegClass)
return false;
Pred.push_back(MO);
Pred.push_back(MachineOperand::CreateImm(PTX::PRED_NORMAL));
return true;
}
// branch support

View File

@ -325,14 +325,66 @@ multiclass INT3ntnc<string opcstr, SDNode opnode> {
multiclass PTX_SETP<RegisterClass RC, string regclsname, Operand immcls,
CondCode cmp, string cmpstr> {
// TODO 1. support floating-point 2. support 5-operand format: p|q, a, b, c
def rr
: InstPTX<(outs Preds:$d), (ins RC:$a, RC:$b),
!strconcat("setp.", cmpstr, ".", regclsname, "\t$d, $a, $b"),
[(set Preds:$d, (setcc RC:$a, RC:$b, cmp))]>;
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b),
!strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
[(set Preds:$p, (setcc RC:$a, RC:$b, cmp))]>;
def ri
: InstPTX<(outs Preds:$d), (ins RC:$a, immcls:$b),
!strconcat("setp.", cmpstr, ".", regclsname, "\t$d, $a, $b"),
[(set Preds:$d, (setcc RC:$a, imm:$b, cmp))]>;
: InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b),
!strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
[(set Preds:$p, (setcc RC:$a, imm:$b, cmp))]>;
def rr_and_r
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
!strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
[(set Preds:$p, (and (setcc RC:$a, RC:$b, cmp), Preds:$c))]>;
def ri_and_r
: InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
!strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
[(set Preds:$p, (and (setcc RC:$a, imm:$b, cmp), Preds:$c))]>;
def rr_or_r
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
!strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
[(set Preds:$p, (or (setcc RC:$a, RC:$b, cmp), Preds:$c))]>;
def ri_or_r
: InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
!strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
[(set Preds:$p, (or (setcc RC:$a, imm:$b, cmp), Preds:$c))]>;
def rr_xor_r
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
!strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
[(set Preds:$p, (xor (setcc RC:$a, RC:$b, cmp), Preds:$c))]>;
def ri_xor_r
: InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
!strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
[(set Preds:$p, (xor (setcc RC:$a, imm:$b, cmp), Preds:$c))]>;
def rr_and_not_r
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
!strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
[(set Preds:$p, (and (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>;
def ri_and_not_r
: InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
!strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
[(set Preds:$p, (and (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>;
def rr_or_not_r
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
!strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
[(set Preds:$p, (or (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>;
def ri_or_not_r
: InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
!strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
[(set Preds:$p, (or (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>;
def rr_xor_not_r
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
!strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
[(set Preds:$p, (xor (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>;
def ri_xor_not_r
: InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
!strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
[(set Preds:$p, (xor (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>;
}
multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> {
@ -602,6 +654,10 @@ defm STs : PTX_ST_ALL<"st.shared", store_shared>;
// defm LDp : PTX_LD_ALL<"ld.param", load_parameter>;
// TODO: Do something with st.param if/when it is needed.
def CVT_pred_u32
: InstPTX<(outs Preds:$d), (ins RRegu32:$a), "cvt.pred.u32\t$d, $a",
[(set Preds:$d, (trunc RRegu32:$a))]>;
def CVT_u32_pred
: InstPTX<(outs RRegu32:$d), (ins Preds:$a), "cvt.u32.pred\t$d, $a",
[(set RRegu32:$d, (zext Preds:$a))]>;

View File

@ -107,3 +107,28 @@ define ptx_device i32 @test_setp_ge_u32_ri(i32 %x) {
%z = zext i1 %p to i32
ret i32 %z
}
define ptx_device i32 @test_setp_4_op_format_1(i32 %x, i32 %y, i32 %u, i32 %v) {
; CHECK: setp.gt.u32 p0, r3, r4;
; CHECK-NEXT: setp.eq.and.u32 p0, r1, r2, p0;
; CHECK-NEXT: cvt.u32.pred r0, p0;
; CHECK-NEXT: ret;
%c = icmp eq i32 %x, %y
%d = icmp ugt i32 %u, %v
%e = and i1 %c, %d
%z = zext i1 %e to i32
ret i32 %z
}
define ptx_device i32 @test_setp_4_op_format_2(i32 %x, i32 %y, i32 %w) {
; CHECK: cvt.pred.u32 p0, r3;
; CHECK-NEXT: setp.eq.and.u32 p0, r1, r2, !p0;
; CHECK-NEXT: cvt.u32.pred r0, p0;
; CHECK-NEXT: ret;
%c = trunc i32 %w to i1
%d = icmp eq i32 %x, %y
%e = xor i1 %c, 1
%f = and i1 %d, %e
%z = zext i1 %f to i32
ret i32 %z
}