forked from OSchip/llvm-project
ptx: add floating-point comparison to setp
Patched by Dan Bailey llvm-svn: 129847
This commit is contained in:
parent
49160f9a71
commit
5a952b3c67
|
@ -323,9 +323,9 @@ multiclass INT3ntnc<string opcstr, SDNode opnode> {
|
|||
[(set RRegu64:$d, (opnode imm:$a, RRegu64:$b))]>;
|
||||
}
|
||||
|
||||
multiclass PTX_SETP<RegisterClass RC, string regclsname, Operand immcls,
|
||||
multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls,
|
||||
CondCode cmp, string cmpstr> {
|
||||
// TODO 1. support floating-point 2. support 5-operand format: p|q, a, b, c
|
||||
// TODO support 5-operand format: p|q, a, b, c
|
||||
|
||||
def rr
|
||||
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b),
|
||||
|
@ -387,6 +387,74 @@ multiclass PTX_SETP<RegisterClass RC, string regclsname, Operand immcls,
|
|||
[(set Preds:$p, (xor (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>;
|
||||
}
|
||||
|
||||
multiclass PTX_SETP_FP<RegisterClass RC, string regclsname,
|
||||
CondCode ucmp, CondCode ocmp, string cmpstr> {
|
||||
// TODO support 5-operand format: p|q, a, b, c
|
||||
|
||||
def rr_u
|
||||
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b),
|
||||
!strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"),
|
||||
[(set Preds:$p, (setcc RC:$a, RC:$b, ucmp))]>;
|
||||
def rr_o
|
||||
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b),
|
||||
!strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
|
||||
[(set Preds:$p, (setcc RC:$a, RC:$b, ocmp))]>;
|
||||
|
||||
def rr_and_r_u
|
||||
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
|
||||
!strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, $c"),
|
||||
[(set Preds:$p, (and (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>;
|
||||
def rr_and_r_o
|
||||
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
|
||||
!strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
|
||||
[(set Preds:$p, (and (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>;
|
||||
|
||||
def rr_or_r_u
|
||||
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
|
||||
!strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, $c"),
|
||||
[(set Preds:$p, (or (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>;
|
||||
def rr_or_r_o
|
||||
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
|
||||
!strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
|
||||
[(set Preds:$p, (or (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>;
|
||||
|
||||
def rr_xor_r_u
|
||||
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
|
||||
!strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, $c"),
|
||||
[(set Preds:$p, (xor (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>;
|
||||
def rr_xor_r_o
|
||||
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
|
||||
!strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
|
||||
[(set Preds:$p, (xor (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>;
|
||||
|
||||
def rr_and_not_r_u
|
||||
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
|
||||
!strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, !$c"),
|
||||
[(set Preds:$p, (and (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>;
|
||||
def rr_and_not_r_o
|
||||
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
|
||||
!strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
|
||||
[(set Preds:$p, (and (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>;
|
||||
|
||||
def rr_or_not_r_u
|
||||
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
|
||||
!strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, !$c"),
|
||||
[(set Preds:$p, (or (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>;
|
||||
def rr_or_not_r_o
|
||||
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
|
||||
!strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
|
||||
[(set Preds:$p, (or (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>;
|
||||
|
||||
def rr_xor_not_r_u
|
||||
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
|
||||
!strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, !$c"),
|
||||
[(set Preds:$p, (xor (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>;
|
||||
def rr_xor_not_r_o
|
||||
: InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
|
||||
!strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
|
||||
[(set Preds:$p, (xor (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>;
|
||||
}
|
||||
|
||||
multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> {
|
||||
def rr32 : InstPTX<(outs RC:$d),
|
||||
(ins MEMri32:$a),
|
||||
|
@ -557,18 +625,50 @@ def FCOS64 : InstPTX<(outs RRegf64:$d),
|
|||
|
||||
///===- Comparison and Selection Instructions -----------------------------===//
|
||||
|
||||
defm SETPEQu32 : PTX_SETP<RRegu32, "u32", i32imm, SETEQ, "eq">;
|
||||
defm SETPNEu32 : PTX_SETP<RRegu32, "u32", i32imm, SETNE, "ne">;
|
||||
defm SETPLTu32 : PTX_SETP<RRegu32, "u32", i32imm, SETULT, "lt">;
|
||||
defm SETPLEu32 : PTX_SETP<RRegu32, "u32", i32imm, SETULE, "le">;
|
||||
defm SETPGTu32 : PTX_SETP<RRegu32, "u32", i32imm, SETUGT, "gt">;
|
||||
defm SETPGEu32 : PTX_SETP<RRegu32, "u32", i32imm, SETUGE, "ge">;
|
||||
defm SETPEQu64 : PTX_SETP<RRegu64, "u64", i64imm, SETEQ, "eq">;
|
||||
defm SETPNEu64 : PTX_SETP<RRegu64, "u64", i64imm, SETNE, "ne">;
|
||||
defm SETPLTu64 : PTX_SETP<RRegu64, "u64", i64imm, SETULT, "lt">;
|
||||
defm SETPLEu64 : PTX_SETP<RRegu64, "u64", i64imm, SETULE, "le">;
|
||||
defm SETPGTu64 : PTX_SETP<RRegu64, "u64", i64imm, SETUGT, "gt">;
|
||||
defm SETPGEu64 : PTX_SETP<RRegu64, "u64", i64imm, SETUGE, "ge">;
|
||||
// Compare u16
|
||||
|
||||
defm SETPEQu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETEQ, "eq">;
|
||||
defm SETPNEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETNE, "ne">;
|
||||
defm SETPLTu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETULT, "lt">;
|
||||
defm SETPLEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETULE, "le">;
|
||||
defm SETPGTu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETUGT, "gt">;
|
||||
defm SETPGEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETUGE, "ge">;
|
||||
|
||||
// Compare u32
|
||||
|
||||
defm SETPEQu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETEQ, "eq">;
|
||||
defm SETPNEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETNE, "ne">;
|
||||
defm SETPLTu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETULT, "lt">;
|
||||
defm SETPLEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETULE, "le">;
|
||||
defm SETPGTu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETUGT, "gt">;
|
||||
defm SETPGEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETUGE, "ge">;
|
||||
|
||||
// Compare u64
|
||||
|
||||
defm SETPEQu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETEQ, "eq">;
|
||||
defm SETPNEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETNE, "ne">;
|
||||
defm SETPLTu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETULT, "lt">;
|
||||
defm SETPLEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETULE, "le">;
|
||||
defm SETPGTu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETUGT, "gt">;
|
||||
defm SETPGEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETUGE, "ge">;
|
||||
|
||||
// Compare f32
|
||||
|
||||
defm SETPEQf32 : PTX_SETP_FP<RRegf32, "f32", SETUEQ, SETOEQ, "eq">;
|
||||
defm SETPNEf32 : PTX_SETP_FP<RRegf32, "f32", SETUNE, SETONE, "ne">;
|
||||
defm SETPLTf32 : PTX_SETP_FP<RRegf32, "f32", SETULT, SETOLT, "lt">;
|
||||
defm SETPLEf32 : PTX_SETP_FP<RRegf32, "f32", SETULE, SETOLE, "le">;
|
||||
defm SETPGTf32 : PTX_SETP_FP<RRegf32, "f32", SETUGT, SETOGT, "gt">;
|
||||
defm SETPGEf32 : PTX_SETP_FP<RRegf32, "f32", SETUGE, SETOGE, "ge">;
|
||||
|
||||
// Compare f64
|
||||
|
||||
defm SETPEQf64 : PTX_SETP_FP<RRegf64, "f64", SETUEQ, SETOEQ, "eq">;
|
||||
defm SETPNEf64 : PTX_SETP_FP<RRegf64, "f64", SETUNE, SETONE, "ne">;
|
||||
defm SETPLTf64 : PTX_SETP_FP<RRegf64, "f64", SETULT, SETOLT, "lt">;
|
||||
defm SETPLEf64 : PTX_SETP_FP<RRegf64, "f64", SETULE, SETOLE, "le">;
|
||||
defm SETPGTf64 : PTX_SETP_FP<RRegf64, "f64", SETUGT, SETOGT, "gt">;
|
||||
defm SETPGEf64 : PTX_SETP_FP<RRegf64, "f64", SETUGE, SETOGE, "ge">;
|
||||
|
||||
///===- Logic and Shift Instructions --------------------------------------===//
|
||||
|
||||
|
@ -654,18 +754,138 @@ defm STs : PTX_ST_ALL<"st.shared", store_shared>;
|
|||
// defm LDp : PTX_LD_ALL<"ld.param", load_parameter>;
|
||||
// TODO: Do something with st.param if/when it is needed.
|
||||
|
||||
// Conversion to pred
|
||||
|
||||
def CVT_pred_u16
|
||||
: InstPTX<(outs Preds:$d), (ins RRegu16:$a), "cvt.pred.u16\t$d, $a",
|
||||
[(set Preds:$d, (trunc RRegu16:$a))]>;
|
||||
|
||||
def CVT_pred_u32
|
||||
: InstPTX<(outs Preds:$d), (ins RRegu32:$a), "cvt.pred.u32\t$d, $a",
|
||||
[(set Preds:$d, (trunc RRegu32:$a))]>;
|
||||
|
||||
def CVT_pred_u64
|
||||
: InstPTX<(outs Preds:$d), (ins RRegu64:$a), "cvt.pred.u64\t$d, $a",
|
||||
[(set Preds:$d, (trunc RRegu64:$a))]>;
|
||||
|
||||
def CVT_pred_f32
|
||||
: InstPTX<(outs Preds:$d), (ins RRegf32:$a), "cvt.pred.f32\t$d, $a",
|
||||
[(set Preds:$d, (fp_to_uint RRegf32:$a))]>;
|
||||
|
||||
def CVT_pred_f64
|
||||
: InstPTX<(outs Preds:$d), (ins RRegf64:$a), "cvt.pred.f64\t$d, $a",
|
||||
[(set Preds:$d, (fp_to_uint RRegf64:$a))]>;
|
||||
|
||||
// Conversion to u16
|
||||
|
||||
def CVT_u16_pred
|
||||
: InstPTX<(outs RRegu16:$d), (ins Preds:$a), "cvt.u16.pred\t$d, $a",
|
||||
[(set RRegu16:$d, (zext Preds:$a))]>;
|
||||
|
||||
def CVT_u16_u32
|
||||
: InstPTX<(outs RRegu16:$d), (ins RRegu32:$a), "cvt.u16.u32\t$d, $a",
|
||||
[(set RRegu16:$d, (trunc RRegu32:$a))]>;
|
||||
|
||||
def CVT_u16_u64
|
||||
: InstPTX<(outs RRegu16:$d), (ins RRegu64:$a), "cvt.u16.u64\t$d, $a",
|
||||
[(set RRegu16:$d, (trunc RRegu64:$a))]>;
|
||||
|
||||
def CVT_u16_f32
|
||||
: InstPTX<(outs RRegu16:$d), (ins RRegf32:$a), "cvt.u16.f32\t$d, $a",
|
||||
[(set RRegu16:$d, (fp_to_uint RRegf32:$a))]>;
|
||||
|
||||
def CVT_u16_f64
|
||||
: InstPTX<(outs RRegu16:$d), (ins RRegf64:$a), "cvt.u16.f64\t$d, $a",
|
||||
[(set RRegu16:$d, (fp_to_uint RRegf64:$a))]>;
|
||||
|
||||
// Conversion to u32
|
||||
|
||||
def CVT_u32_pred
|
||||
: InstPTX<(outs RRegu32:$d), (ins Preds:$a), "cvt.u32.pred\t$d, $a",
|
||||
[(set RRegu32:$d, (zext Preds:$a))]>;
|
||||
|
||||
def CVT_u32_u16
|
||||
: InstPTX<(outs RRegu32:$d), (ins RRegu16:$a), "cvt.u32.u16\t$d, $a",
|
||||
[(set RRegu32:$d, (zext RRegu16:$a))]>;
|
||||
|
||||
def CVT_u32_u64
|
||||
: InstPTX<(outs RRegu32:$d), (ins RRegu64:$a), "cvt.u32.u64\t$d, $a",
|
||||
[(set RRegu32:$d, (trunc RRegu64:$a))]>;
|
||||
|
||||
def CVT_u32_f32
|
||||
: InstPTX<(outs RRegu32:$d), (ins RRegf32:$a), "cvt.u32.f32\t$d, $a",
|
||||
[(set RRegu32:$d, (fp_to_uint RRegf32:$a))]>;
|
||||
|
||||
def CVT_u32_f64
|
||||
: InstPTX<(outs RRegu32:$d), (ins RRegf64:$a), "cvt.u32.f64\t$d, $a",
|
||||
[(set RRegu32:$d, (fp_to_uint RRegf64:$a))]>;
|
||||
|
||||
// Conversion to u64
|
||||
|
||||
def CVT_u64_pred
|
||||
: InstPTX<(outs RRegu64:$d), (ins Preds:$a), "cvt.u64.pred\t$d, $a",
|
||||
[(set RRegu64:$d, (zext Preds:$a))]>;
|
||||
|
||||
def CVT_u64_u16
|
||||
: InstPTX<(outs RRegu64:$d), (ins RRegu16:$a), "cvt.u64.u16\t$d, $a",
|
||||
[(set RRegu64:$d, (zext RRegu16:$a))]>;
|
||||
|
||||
def CVT_u64_u32
|
||||
: InstPTX<(outs RRegu64:$d), (ins RRegu32:$a), "cvt.u64.u32\t$d, $a",
|
||||
[(set RRegu64:$d, (zext RRegu32:$a))]>;
|
||||
|
||||
def CVT_u64_f32
|
||||
: InstPTX<(outs RRegu64:$d), (ins RRegf32:$a), "cvt.u64.f32\t$d, $a",
|
||||
[(set RRegu64:$d, (fp_to_uint RRegf32:$a))]>;
|
||||
|
||||
def CVT_u64_f64
|
||||
: InstPTX<(outs RRegu64:$d), (ins RRegf64:$a), "cvt.u64.f32\t$d, $a",
|
||||
[(set RRegu64:$d, (fp_to_uint RRegf64:$a))]>;
|
||||
|
||||
// Conversion to f32
|
||||
|
||||
def CVT_f32_pred
|
||||
: InstPTX<(outs RRegf32:$d), (ins Preds:$a), "cvt.f32.pred\t$d, $a",
|
||||
[(set RRegf32:$d, (uint_to_fp Preds:$a))]>;
|
||||
|
||||
def CVT_f32_u16
|
||||
: InstPTX<(outs RRegf32:$d), (ins RRegu16:$a), "cvt.f32.u16\t$d, $a",
|
||||
[(set RRegf32:$d, (uint_to_fp RRegu16:$a))]>;
|
||||
|
||||
def CVT_f32_u32
|
||||
: InstPTX<(outs RRegf32:$d), (ins RRegu32:$a), "cvt.f32.u32\t$d, $a",
|
||||
[(set RRegf32:$d, (uint_to_fp RRegu32:$a))]>;
|
||||
|
||||
def CVT_f32_u64
|
||||
: InstPTX<(outs RRegf32:$d), (ins RRegu64:$a), "cvt.f32.u64\t$d, $a",
|
||||
[(set RRegf32:$d, (uint_to_fp RRegu64:$a))]>;
|
||||
|
||||
def CVT_f32_f64
|
||||
: InstPTX<(outs RRegf32:$d), (ins RRegf64:$a), "cvt.f32.f64\t$d, $a",
|
||||
[(set RRegf32:$d, (fround RRegf64:$a))]>;
|
||||
|
||||
// Conversion to f64
|
||||
|
||||
def CVT_f64_pred
|
||||
: InstPTX<(outs RRegf64:$d), (ins Preds:$a), "cvt.f64.pred\t$d, $a",
|
||||
[(set RRegf64:$d, (uint_to_fp Preds:$a))]>;
|
||||
|
||||
def CVT_f64_u16
|
||||
: InstPTX<(outs RRegf64:$d), (ins RRegu16:$a), "cvt.f64.u16\t$d, $a",
|
||||
[(set RRegf64:$d, (uint_to_fp RRegu16:$a))]>;
|
||||
|
||||
def CVT_f64_u32
|
||||
: InstPTX<(outs RRegf64:$d), (ins RRegu32:$a), "cvt.f64.u32\t$d, $a",
|
||||
[(set RRegf64:$d, (uint_to_fp RRegu32:$a))]>;
|
||||
|
||||
def CVT_f64_u64
|
||||
: InstPTX<(outs RRegf64:$d), (ins RRegu64:$a), "cvt.f64.u64\t$d, $a",
|
||||
[(set RRegf64:$d, (uint_to_fp RRegu64:$a))]>;
|
||||
|
||||
def CVT_f64_f32
|
||||
: InstPTX<(outs RRegf64:$d), (ins RRegf32:$a), "cvt.f64.f32\t$d, $a",
|
||||
[(set RRegf64:$d, (fextend RRegf32:$a))]>;
|
||||
|
||||
///===- Control Flow Instructions -----------------------------------------===//
|
||||
|
||||
let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
|
||||
|
|
Loading…
Reference in New Issue