ptx: add floating-point comparison to setp

Patched by Dan Bailey

llvm-svn: 129847
This commit is contained in:
Che-Liang Chiou 2011-04-20 09:28:20 +00:00
parent 49160f9a71
commit 5a952b3c67
1 changed files with 234 additions and 14 deletions

View File

@ -323,9 +323,9 @@ multiclass INT3ntnc<string opcstr, SDNode opnode> {
[(set RRegu64:$d, (opnode imm:$a, RRegu64:$b))]>; [(set RRegu64:$d, (opnode imm:$a, RRegu64:$b))]>;
} }
multiclass PTX_SETP<RegisterClass RC, string regclsname, Operand immcls, multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls,
CondCode cmp, string cmpstr> { CondCode cmp, string cmpstr> {
// TODO 1. support floating-point 2. support 5-operand format: p|q, a, b, c // TODO support 5-operand format: p|q, a, b, c
def rr def rr
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b), : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b),
@ -387,6 +387,74 @@ multiclass PTX_SETP<RegisterClass RC, string regclsname, Operand immcls,
[(set Preds:$p, (xor (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>; [(set Preds:$p, (xor (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>;
} }
multiclass PTX_SETP_FP<RegisterClass RC, string regclsname,
CondCode ucmp, CondCode ocmp, string cmpstr> {
// TODO support 5-operand format: p|q, a, b, c
def rr_u
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b),
!strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"),
[(set Preds:$p, (setcc RC:$a, RC:$b, ucmp))]>;
def rr_o
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b),
!strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
[(set Preds:$p, (setcc RC:$a, RC:$b, ocmp))]>;
def rr_and_r_u
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
!strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, $c"),
[(set Preds:$p, (and (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>;
def rr_and_r_o
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
!strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
[(set Preds:$p, (and (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>;
def rr_or_r_u
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
!strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, $c"),
[(set Preds:$p, (or (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>;
def rr_or_r_o
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
!strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
[(set Preds:$p, (or (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>;
def rr_xor_r_u
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
!strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, $c"),
[(set Preds:$p, (xor (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>;
def rr_xor_r_o
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
!strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
[(set Preds:$p, (xor (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>;
def rr_and_not_r_u
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
!strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, !$c"),
[(set Preds:$p, (and (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>;
def rr_and_not_r_o
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
!strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
[(set Preds:$p, (and (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>;
def rr_or_not_r_u
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
!strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, !$c"),
[(set Preds:$p, (or (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>;
def rr_or_not_r_o
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
!strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
[(set Preds:$p, (or (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>;
def rr_xor_not_r_u
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
!strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, !$c"),
[(set Preds:$p, (xor (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>;
def rr_xor_not_r_o
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
!strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
[(set Preds:$p, (xor (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>;
}
multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> { multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> {
def rr32 : InstPTX<(outs RC:$d), def rr32 : InstPTX<(outs RC:$d),
(ins MEMri32:$a), (ins MEMri32:$a),
@ -557,18 +625,50 @@ def FCOS64 : InstPTX<(outs RRegf64:$d),
///===- Comparison and Selection Instructions -----------------------------===// ///===- Comparison and Selection Instructions -----------------------------===//
defm SETPEQu32 : PTX_SETP<RRegu32, "u32", i32imm, SETEQ, "eq">; // Compare u16
defm SETPNEu32 : PTX_SETP<RRegu32, "u32", i32imm, SETNE, "ne">;
defm SETPLTu32 : PTX_SETP<RRegu32, "u32", i32imm, SETULT, "lt">; defm SETPEQu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETEQ, "eq">;
defm SETPLEu32 : PTX_SETP<RRegu32, "u32", i32imm, SETULE, "le">; defm SETPNEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETNE, "ne">;
defm SETPGTu32 : PTX_SETP<RRegu32, "u32", i32imm, SETUGT, "gt">; defm SETPLTu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETULT, "lt">;
defm SETPGEu32 : PTX_SETP<RRegu32, "u32", i32imm, SETUGE, "ge">; defm SETPLEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETULE, "le">;
defm SETPEQu64 : PTX_SETP<RRegu64, "u64", i64imm, SETEQ, "eq">; defm SETPGTu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETUGT, "gt">;
defm SETPNEu64 : PTX_SETP<RRegu64, "u64", i64imm, SETNE, "ne">; defm SETPGEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETUGE, "ge">;
defm SETPLTu64 : PTX_SETP<RRegu64, "u64", i64imm, SETULT, "lt">;
defm SETPLEu64 : PTX_SETP<RRegu64, "u64", i64imm, SETULE, "le">; // Compare u32
defm SETPGTu64 : PTX_SETP<RRegu64, "u64", i64imm, SETUGT, "gt">;
defm SETPGEu64 : PTX_SETP<RRegu64, "u64", i64imm, SETUGE, "ge">; defm SETPEQu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETEQ, "eq">;
defm SETPNEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETNE, "ne">;
defm SETPLTu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETULT, "lt">;
defm SETPLEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETULE, "le">;
defm SETPGTu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETUGT, "gt">;
defm SETPGEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETUGE, "ge">;
// Compare u64
defm SETPEQu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETEQ, "eq">;
defm SETPNEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETNE, "ne">;
defm SETPLTu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETULT, "lt">;
defm SETPLEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETULE, "le">;
defm SETPGTu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETUGT, "gt">;
defm SETPGEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETUGE, "ge">;
// Compare f32
defm SETPEQf32 : PTX_SETP_FP<RRegf32, "f32", SETUEQ, SETOEQ, "eq">;
defm SETPNEf32 : PTX_SETP_FP<RRegf32, "f32", SETUNE, SETONE, "ne">;
defm SETPLTf32 : PTX_SETP_FP<RRegf32, "f32", SETULT, SETOLT, "lt">;
defm SETPLEf32 : PTX_SETP_FP<RRegf32, "f32", SETULE, SETOLE, "le">;
defm SETPGTf32 : PTX_SETP_FP<RRegf32, "f32", SETUGT, SETOGT, "gt">;
defm SETPGEf32 : PTX_SETP_FP<RRegf32, "f32", SETUGE, SETOGE, "ge">;
// Compare f64
defm SETPEQf64 : PTX_SETP_FP<RRegf64, "f64", SETUEQ, SETOEQ, "eq">;
defm SETPNEf64 : PTX_SETP_FP<RRegf64, "f64", SETUNE, SETONE, "ne">;
defm SETPLTf64 : PTX_SETP_FP<RRegf64, "f64", SETULT, SETOLT, "lt">;
defm SETPLEf64 : PTX_SETP_FP<RRegf64, "f64", SETULE, SETOLE, "le">;
defm SETPGTf64 : PTX_SETP_FP<RRegf64, "f64", SETUGT, SETOGT, "gt">;
defm SETPGEf64 : PTX_SETP_FP<RRegf64, "f64", SETUGE, SETOGE, "ge">;
///===- Logic and Shift Instructions --------------------------------------===// ///===- Logic and Shift Instructions --------------------------------------===//
@ -654,18 +754,138 @@ defm STs : PTX_ST_ALL<"st.shared", store_shared>;
// defm LDp : PTX_LD_ALL<"ld.param", load_parameter>; // defm LDp : PTX_LD_ALL<"ld.param", load_parameter>;
// TODO: Do something with st.param if/when it is needed. // TODO: Do something with st.param if/when it is needed.
// Conversion to pred
def CVT_pred_u16
: InstPTX<(outs Preds:$d), (ins RRegu16:$a), "cvt.pred.u16\t$d, $a",
[(set Preds:$d, (trunc RRegu16:$a))]>;
def CVT_pred_u32 def CVT_pred_u32
: InstPTX<(outs Preds:$d), (ins RRegu32:$a), "cvt.pred.u32\t$d, $a", : InstPTX<(outs Preds:$d), (ins RRegu32:$a), "cvt.pred.u32\t$d, $a",
[(set Preds:$d, (trunc RRegu32:$a))]>; [(set Preds:$d, (trunc RRegu32:$a))]>;
def CVT_pred_u64
: InstPTX<(outs Preds:$d), (ins RRegu64:$a), "cvt.pred.u64\t$d, $a",
[(set Preds:$d, (trunc RRegu64:$a))]>;
def CVT_pred_f32
: InstPTX<(outs Preds:$d), (ins RRegf32:$a), "cvt.pred.f32\t$d, $a",
[(set Preds:$d, (fp_to_uint RRegf32:$a))]>;
def CVT_pred_f64
: InstPTX<(outs Preds:$d), (ins RRegf64:$a), "cvt.pred.f64\t$d, $a",
[(set Preds:$d, (fp_to_uint RRegf64:$a))]>;
// Conversion to u16
def CVT_u16_pred
: InstPTX<(outs RRegu16:$d), (ins Preds:$a), "cvt.u16.pred\t$d, $a",
[(set RRegu16:$d, (zext Preds:$a))]>;
def CVT_u16_u32
: InstPTX<(outs RRegu16:$d), (ins RRegu32:$a), "cvt.u16.u32\t$d, $a",
[(set RRegu16:$d, (trunc RRegu32:$a))]>;
def CVT_u16_u64
: InstPTX<(outs RRegu16:$d), (ins RRegu64:$a), "cvt.u16.u64\t$d, $a",
[(set RRegu16:$d, (trunc RRegu64:$a))]>;
def CVT_u16_f32
: InstPTX<(outs RRegu16:$d), (ins RRegf32:$a), "cvt.u16.f32\t$d, $a",
[(set RRegu16:$d, (fp_to_uint RRegf32:$a))]>;
def CVT_u16_f64
: InstPTX<(outs RRegu16:$d), (ins RRegf64:$a), "cvt.u16.f64\t$d, $a",
[(set RRegu16:$d, (fp_to_uint RRegf64:$a))]>;
// Conversion to u32
def CVT_u32_pred def CVT_u32_pred
: InstPTX<(outs RRegu32:$d), (ins Preds:$a), "cvt.u32.pred\t$d, $a", : InstPTX<(outs RRegu32:$d), (ins Preds:$a), "cvt.u32.pred\t$d, $a",
[(set RRegu32:$d, (zext Preds:$a))]>; [(set RRegu32:$d, (zext Preds:$a))]>;
def CVT_u32_u16
: InstPTX<(outs RRegu32:$d), (ins RRegu16:$a), "cvt.u32.u16\t$d, $a",
[(set RRegu32:$d, (zext RRegu16:$a))]>;
def CVT_u32_u64
: InstPTX<(outs RRegu32:$d), (ins RRegu64:$a), "cvt.u32.u64\t$d, $a",
[(set RRegu32:$d, (trunc RRegu64:$a))]>;
def CVT_u32_f32
: InstPTX<(outs RRegu32:$d), (ins RRegf32:$a), "cvt.u32.f32\t$d, $a",
[(set RRegu32:$d, (fp_to_uint RRegf32:$a))]>;
def CVT_u32_f64
: InstPTX<(outs RRegu32:$d), (ins RRegf64:$a), "cvt.u32.f64\t$d, $a",
[(set RRegu32:$d, (fp_to_uint RRegf64:$a))]>;
// Conversion to u64
def CVT_u64_pred
: InstPTX<(outs RRegu64:$d), (ins Preds:$a), "cvt.u64.pred\t$d, $a",
[(set RRegu64:$d, (zext Preds:$a))]>;
def CVT_u64_u16
: InstPTX<(outs RRegu64:$d), (ins RRegu16:$a), "cvt.u64.u16\t$d, $a",
[(set RRegu64:$d, (zext RRegu16:$a))]>;
def CVT_u64_u32 def CVT_u64_u32
: InstPTX<(outs RRegu64:$d), (ins RRegu32:$a), "cvt.u64.u32\t$d, $a", : InstPTX<(outs RRegu64:$d), (ins RRegu32:$a), "cvt.u64.u32\t$d, $a",
[(set RRegu64:$d, (zext RRegu32:$a))]>; [(set RRegu64:$d, (zext RRegu32:$a))]>;
def CVT_u64_f32
: InstPTX<(outs RRegu64:$d), (ins RRegf32:$a), "cvt.u64.f32\t$d, $a",
[(set RRegu64:$d, (fp_to_uint RRegf32:$a))]>;
def CVT_u64_f64
: InstPTX<(outs RRegu64:$d), (ins RRegf64:$a), "cvt.u64.f32\t$d, $a",
[(set RRegu64:$d, (fp_to_uint RRegf64:$a))]>;
// Conversion to f32
def CVT_f32_pred
: InstPTX<(outs RRegf32:$d), (ins Preds:$a), "cvt.f32.pred\t$d, $a",
[(set RRegf32:$d, (uint_to_fp Preds:$a))]>;
def CVT_f32_u16
: InstPTX<(outs RRegf32:$d), (ins RRegu16:$a), "cvt.f32.u16\t$d, $a",
[(set RRegf32:$d, (uint_to_fp RRegu16:$a))]>;
def CVT_f32_u32
: InstPTX<(outs RRegf32:$d), (ins RRegu32:$a), "cvt.f32.u32\t$d, $a",
[(set RRegf32:$d, (uint_to_fp RRegu32:$a))]>;
def CVT_f32_u64
: InstPTX<(outs RRegf32:$d), (ins RRegu64:$a), "cvt.f32.u64\t$d, $a",
[(set RRegf32:$d, (uint_to_fp RRegu64:$a))]>;
def CVT_f32_f64
: InstPTX<(outs RRegf32:$d), (ins RRegf64:$a), "cvt.f32.f64\t$d, $a",
[(set RRegf32:$d, (fround RRegf64:$a))]>;
// Conversion to f64
def CVT_f64_pred
: InstPTX<(outs RRegf64:$d), (ins Preds:$a), "cvt.f64.pred\t$d, $a",
[(set RRegf64:$d, (uint_to_fp Preds:$a))]>;
def CVT_f64_u16
: InstPTX<(outs RRegf64:$d), (ins RRegu16:$a), "cvt.f64.u16\t$d, $a",
[(set RRegf64:$d, (uint_to_fp RRegu16:$a))]>;
def CVT_f64_u32
: InstPTX<(outs RRegf64:$d), (ins RRegu32:$a), "cvt.f64.u32\t$d, $a",
[(set RRegf64:$d, (uint_to_fp RRegu32:$a))]>;
def CVT_f64_u64
: InstPTX<(outs RRegf64:$d), (ins RRegu64:$a), "cvt.f64.u64\t$d, $a",
[(set RRegf64:$d, (uint_to_fp RRegu64:$a))]>;
def CVT_f64_f32
: InstPTX<(outs RRegf64:$d), (ins RRegf32:$a), "cvt.f64.f32\t$d, $a",
[(set RRegf64:$d, (fextend RRegf32:$a))]>;
///===- Control Flow Instructions -----------------------------------------===// ///===- Control Flow Instructions -----------------------------------------===//
let isBranch = 1, isTerminator = 1, isBarrier = 1 in { let isBranch = 1, isTerminator = 1, isBarrier = 1 in {