[GlobalISel] Add G_VECREDUCE_* opcodes for vector reductions.

These mirror the IR and SelectionDAG intrinsics & nodes.

Opcodes added:
G_VECREDUCE_SEQ_FADD
G_VECREDUCE_SEQ_FMUL
G_VECREDUCE_FADD
G_VECREDUCE_FMUL
G_VECREDUCE_FMAX
G_VECREDUCE_FMIN
G_VECREDUCE_ADD
G_VECREDUCE_MUL
G_VECREDUCE_AND
G_VECREDUCE_OR
G_VECREDUCE_XOR
G_VECREDUCE_SMAX
G_VECREDUCE_SMIN
G_VECREDUCE_UMAX
G_VECREDUCE_UMIN

Differential Revision: https://reviews.llvm.org/D88750
This commit is contained in:
Amara Emerson 2020-10-02 11:56:53 -07:00
parent 64c0792946
commit 283b4d6ba3
7 changed files with 314 additions and 1 deletions

View File

@ -545,6 +545,45 @@ Concatenate two vectors and shuffle the elements according to the mask operand.
The mask operand should be an IR Constant which exactly matches the
corresponding mask for the IR shufflevector instruction.
Vector Reduction Operations
---------------------------
These operations represent horizontal vector reduction, producing a scalar result.
G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The SEQ variants perform reductions in sequential order. The first operand is
an initial scalar accumulator value, and the second operand is the vector to reduce.
G_VECREDUCE_FADD, G_VECREDUCE_FMUL
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
These reductions are relaxed variants which may reduce the elements in any order.
G_VECREDUCE_FMAX, G_VECREDUCE_FMIN
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Integer/bitwise reductions
^^^^^^^^^^^^^^^^^^^^^^^^^^
* G_VECREDUCE_ADD
* G_VECREDUCE_MUL
* G_VECREDUCE_AND
* G_VECREDUCE_OR
* G_VECREDUCE_XOR
* G_VECREDUCE_SMAX
* G_VECREDUCE_SMIN
* G_VECREDUCE_UMAX
* G_VECREDUCE_UMIN
Integer reductions may have a result type larger than the vector element type.
However, the reduction is performed using the vector element type and the value
in the top bits is unspecified.
Memory Operations
-----------------

View File

@ -18,6 +18,7 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Module.h"
@ -1679,6 +1680,101 @@ public:
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder buildJumpTable(const LLT PtrTy, unsigned JTI);
/// Build and insert \p Res = G_VECREDUCE_SEQ_FADD \p ScalarIn, \p VecIn
///
/// \p ScalarIn is the scalar accumulator input to start the sequential
/// reduction operation of \p VecIn.
MachineInstrBuilder buildVecReduceSeqFAdd(const DstOp &Dst,
const SrcOp &ScalarIn,
const SrcOp &VecIn) {
return buildInstr(TargetOpcode::G_VECREDUCE_SEQ_FADD, {Dst},
{ScalarIn, {VecIn}});
}
/// Build and insert \p Res = G_VECREDUCE_SEQ_FMUL \p ScalarIn, \p VecIn
///
/// \p ScalarIn is the scalar accumulator input to start the sequential
/// reduction operation of \p VecIn.
MachineInstrBuilder buildVecReduceSeqFMul(const DstOp &Dst,
const SrcOp &ScalarIn,
const SrcOp &VecIn) {
return buildInstr(TargetOpcode::G_VECREDUCE_SEQ_FMUL, {Dst},
{ScalarIn, {VecIn}});
}
/// Build and insert \p Res = G_VECREDUCE_FADD \p Src
///
/// \p ScalarIn is the scalar accumulator input to the reduction operation of
/// \p VecIn.
MachineInstrBuilder buildVecReduceFAdd(const DstOp &Dst,
const SrcOp &ScalarIn,
const SrcOp &VecIn) {
return buildInstr(TargetOpcode::G_VECREDUCE_FADD, {Dst}, {ScalarIn, VecIn});
}
/// Build and insert \p Res = G_VECREDUCE_FMUL \p Src
///
/// \p ScalarIn is the scalar accumulator input to the reduction operation of
/// \p VecIn.
MachineInstrBuilder buildVecReduceFMul(const DstOp &Dst,
const SrcOp &ScalarIn,
const SrcOp &VecIn) {
return buildInstr(TargetOpcode::G_VECREDUCE_FMUL, {Dst}, {ScalarIn, VecIn});
}
/// Build and insert \p Res = G_VECREDUCE_FMAX \p Src
MachineInstrBuilder buildVecReduceFMax(const DstOp &Dst, const SrcOp &Src) {
return buildInstr(TargetOpcode::G_VECREDUCE_FMAX, {Dst}, {Src});
}
/// Build and insert \p Res = G_VECREDUCE_FMIN \p Src
MachineInstrBuilder buildVecReduceFMin(const DstOp &Dst, const SrcOp &Src) {
return buildInstr(TargetOpcode::G_VECREDUCE_FMIN, {Dst}, {Src});
}
/// Build and insert \p Res = G_VECREDUCE_ADD \p Src
MachineInstrBuilder buildVecReduceAdd(const DstOp &Dst, const SrcOp &Src) {
return buildInstr(TargetOpcode::G_VECREDUCE_ADD, {Dst}, {Src});
}
/// Build and insert \p Res = G_VECREDUCE_MUL \p Src
MachineInstrBuilder buildVecReduceMul(const DstOp &Dst, const SrcOp &Src) {
return buildInstr(TargetOpcode::G_VECREDUCE_MUL, {Dst}, {Src});
}
/// Build and insert \p Res = G_VECREDUCE_AND \p Src
MachineInstrBuilder buildVecReduceAnd(const DstOp &Dst, const SrcOp &Src) {
return buildInstr(TargetOpcode::G_VECREDUCE_AND, {Dst}, {Src});
}
/// Build and insert \p Res = G_VECREDUCE_OR \p Src
MachineInstrBuilder buildVecReduceOr(const DstOp &Dst, const SrcOp &Src) {
return buildInstr(TargetOpcode::G_VECREDUCE_OR, {Dst}, {Src});
}
/// Build and insert \p Res = G_VECREDUCE_XOR \p Src
MachineInstrBuilder buildVecReduceXor(const DstOp &Dst, const SrcOp &Src) {
return buildInstr(TargetOpcode::G_VECREDUCE_XOR, {Dst}, {Src});
}
/// Build and insert \p Res = G_VECREDUCE_SMAX \p Src
MachineInstrBuilder buildVecReduceSMax(const DstOp &Dst, const SrcOp &Src) {
return buildInstr(TargetOpcode::G_VECREDUCE_SMAX, {Dst}, {Src});
}
/// Build and insert \p Res = G_VECREDUCE_SMIN \p Src
MachineInstrBuilder buildVecReduceSMin(const DstOp &Dst, const SrcOp &Src) {
return buildInstr(TargetOpcode::G_VECREDUCE_SMIN, {Dst}, {Src});
}
/// Build and insert \p Res = G_VECREDUCE_UMAX \p Src
MachineInstrBuilder buildVecReduceUMax(const DstOp &Dst, const SrcOp &Src) {
return buildInstr(TargetOpcode::G_VECREDUCE_UMAX, {Dst}, {Src});
}
/// Build and insert \p Res = G_VECREDUCE_UMIN \p Src
MachineInstrBuilder buildVecReduceUMin(const DstOp &Dst, const SrcOp &Src) {
return buildInstr(TargetOpcode::G_VECREDUCE_UMIN, {Dst}, {Src});
}
virtual MachineInstrBuilder buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps,
ArrayRef<SrcOp> SrcOps,
Optional<unsigned> Flags = None);

View File

@ -710,10 +710,27 @@ HANDLE_TARGET_OPCODE(G_MEMMOVE)
/// llvm.memset intrinsic
HANDLE_TARGET_OPCODE(G_MEMSET)
/// Vector reductions
HANDLE_TARGET_OPCODE(G_VECREDUCE_SEQ_FADD)
HANDLE_TARGET_OPCODE(G_VECREDUCE_SEQ_FMUL)
HANDLE_TARGET_OPCODE(G_VECREDUCE_FADD)
HANDLE_TARGET_OPCODE(G_VECREDUCE_FMUL)
HANDLE_TARGET_OPCODE(G_VECREDUCE_FMAX)
HANDLE_TARGET_OPCODE(G_VECREDUCE_FMIN)
HANDLE_TARGET_OPCODE(G_VECREDUCE_ADD)
HANDLE_TARGET_OPCODE(G_VECREDUCE_MUL)
HANDLE_TARGET_OPCODE(G_VECREDUCE_AND)
HANDLE_TARGET_OPCODE(G_VECREDUCE_OR)
HANDLE_TARGET_OPCODE(G_VECREDUCE_XOR)
HANDLE_TARGET_OPCODE(G_VECREDUCE_SMAX)
HANDLE_TARGET_OPCODE(G_VECREDUCE_SMIN)
HANDLE_TARGET_OPCODE(G_VECREDUCE_UMAX)
HANDLE_TARGET_OPCODE(G_VECREDUCE_UMIN)
/// Marker for the end of the generic opcode.
/// This is used to check if an opcode is in the range of the
/// generic opcodes.
HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_END, G_MEMSET)
HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_END, G_VECREDUCE_UMIN)
/// BUILTIN_OP_END - This must be the last enum value in this list.
/// The target-specific post-isel opcode values start here.

View File

@ -1261,6 +1261,53 @@ def G_SHUFFLE_VECTOR: GenericInstruction {
let hasSideEffects = 0;
}
//------------------------------------------------------------------------------
// Vector reductions
//------------------------------------------------------------------------------
def G_VECREDUCE_SEQ_FADD : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$acc, type2:$v);
let hasSideEffects = 0;
}
def G_VECREDUCE_SEQ_FMUL : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$acc, type2:$v);
let hasSideEffects = 0;
}
def G_VECREDUCE_FADD : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$acc, type2:$v);
let hasSideEffects = 0;
}
def G_VECREDUCE_FMUL : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$acc, type2:$v);
let hasSideEffects = 0;
}
class VectorReduction : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$v);
let hasSideEffects = 0;
}
def G_VECREDUCE_FMAX : VectorReduction;
def G_VECREDUCE_FMIN : VectorReduction;
def G_VECREDUCE_ADD : VectorReduction;
def G_VECREDUCE_MUL : VectorReduction;
def G_VECREDUCE_AND : VectorReduction;
def G_VECREDUCE_OR : VectorReduction;
def G_VECREDUCE_XOR : VectorReduction;
def G_VECREDUCE_SMAX : VectorReduction;
def G_VECREDUCE_SMIN : VectorReduction;
def G_VECREDUCE_UMAX : VectorReduction;
def G_VECREDUCE_UMIN : VectorReduction;
//------------------------------------------------------------------------------
// Constrained floating point ops
//------------------------------------------------------------------------------

View File

@ -1488,6 +1488,40 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
break;
}
case TargetOpcode::G_VECREDUCE_SEQ_FADD:
case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
case TargetOpcode::G_VECREDUCE_FADD:
case TargetOpcode::G_VECREDUCE_FMUL: {
LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
LLT Src1Ty = MRI->getType(MI->getOperand(1).getReg());
LLT Src2Ty = MRI->getType(MI->getOperand(2).getReg());
if (!DstTy.isScalar())
report("Vector reduction requires a scalar destination type", MI);
if (!Src1Ty.isScalar())
report("FADD/FMUL vector reduction requires a scalar 1st operand", MI);
if (!Src2Ty.isVector())
report("FADD/FMUL vector reduction must have a vector 2nd operand", MI);
break;
}
case TargetOpcode::G_VECREDUCE_FMAX:
case TargetOpcode::G_VECREDUCE_FMIN:
case TargetOpcode::G_VECREDUCE_ADD:
case TargetOpcode::G_VECREDUCE_MUL:
case TargetOpcode::G_VECREDUCE_AND:
case TargetOpcode::G_VECREDUCE_OR:
case TargetOpcode::G_VECREDUCE_XOR:
case TargetOpcode::G_VECREDUCE_SMAX:
case TargetOpcode::G_VECREDUCE_SMIN:
case TargetOpcode::G_VECREDUCE_UMAX:
case TargetOpcode::G_VECREDUCE_UMIN: {
LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
if (!DstTy.isScalar())
report("Vector reduction requires a scalar destination type", MI);
if (!SrcTy.isVector())
report("Vector reduction requires vector source=", MI);
break;
}
default:
break;
}

View File

@ -613,6 +613,51 @@
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_VECREDUCE_SEQ_FADD (opcode {{[0-9]+}}): 3 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_VECREDUCE_SEQ_FMUL (opcode {{[0-9]+}}): 3 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_VECREDUCE_FADD (opcode {{[0-9]+}}): 3 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_VECREDUCE_FMUL (opcode {{[0-9]+}}): 3 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_VECREDUCE_FMAX (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_VECREDUCE_FMIN (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_VECREDUCE_ADD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_VECREDUCE_MUL (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_VECREDUCE_AND (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_VECREDUCE_OR (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_VECREDUCE_XOR (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_VECREDUCE_SMAX (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_VECREDUCE_SMIN (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_VECREDUCE_UMAX (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_VECREDUCE_UMIN (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# CHECK-NOT: ill-defined

View File

@ -0,0 +1,35 @@
# RUN: not --crash llc -o - -global-isel -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s
# REQUIRES: aarch64-registered-target
--- |
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-unknown"
define i32 @vector_reductions() {
ret i32 0
}
...
---
name: vector_reductions
legalized: true
regBankSelected: false
selected: false
tracksRegLiveness: true
body: |
bb.0:
%vec_v2s64:_(<2 x s64>) = IMPLICIT_DEF
%scalar_s64:_(s64) = IMPLICIT_DEF
%seq_fadd:_(<2 x s64>) = G_VECREDUCE_SEQ_FADD %scalar_s64, %vec_v2s64
; CHECK: Bad machine code: Vector reduction requires a scalar destination type
%dst:_(s64) = G_VECREDUCE_SEQ_FADD %vec_v2s64, %vec_v2s64
; CHECK: Bad machine code: FADD/FMUL vector reduction requires a scalar 1st operand
%dst:_(s64) = G_VECREDUCE_SEQ_FADD %scalar_s64, %scalar_s64
; CHECK: Bad machine code: FADD/FMUL vector reduction must have a vector 2nd operand
%dst2:_(s64) = G_VECREDUCE_MUL %scalar_s64
; CHECK: Bad machine code: Vector reduction requires vector source
...