llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1067 lines
42 KiB
C++
Raw Normal View History

//===- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() ------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This implements the SelectionDAG::dump method and friends.
//
//===----------------------------------------------------------------------===//
#include "SDNodeDbgValue.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/Printable.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
#include <cstdint>
#include <iterator>
using namespace llvm;
static cl::opt<bool>
VerboseDAGDumping("dag-dump-verbose", cl::Hidden,
cl::desc("Display more information when dumping selection "
"DAG nodes."));
std::string SDNode::getOperationName(const SelectionDAG *G) const {
switch (getOpcode()) {
default:
if (getOpcode() < ISD::BUILTIN_OP_END)
return "<<Unknown DAG Node>>";
if (isMachineOpcode()) {
if (G)
if (const TargetInstrInfo *TII = G->getSubtarget().getInstrInfo())
if (getMachineOpcode() < TII->getNumOpcodes())
return std::string(TII->getName(getMachineOpcode()));
return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
}
if (G) {
const TargetLowering &TLI = G->getTargetLoweringInfo();
const char *Name = TLI.getTargetNodeName(getOpcode());
if (Name) return Name;
return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>";
}
return "<<Unknown Node #" + utostr(getOpcode()) + ">>";
#ifndef NDEBUG
case ISD::DELETED_NODE: return "<<Deleted Node!>>";
#endif
case ISD::PREFETCH: return "Prefetch";
case ISD::ATOMIC_FENCE: return "AtomicFence";
case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap";
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: return "AtomicCmpSwapWithSuccess";
case ISD::ATOMIC_SWAP: return "AtomicSwap";
case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd";
case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub";
case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd";
case ISD::ATOMIC_LOAD_CLR: return "AtomicLoadClr";
case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr";
case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor";
case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand";
case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin";
case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax";
case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";
case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";
case ISD::ATOMIC_LOAD_FADD: return "AtomicLoadFAdd";
case ISD::ATOMIC_LOAD: return "AtomicLoad";
case ISD::ATOMIC_STORE: return "AtomicStore";
case ISD::PCMARKER: return "PCMarker";
case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
case ISD::SRCVALUE: return "SrcValue";
case ISD::MDNODE_SDNODE: return "MDNode";
case ISD::EntryToken: return "EntryToken";
case ISD::TokenFactor: return "TokenFactor";
case ISD::AssertSext: return "AssertSext";
case ISD::AssertZext: return "AssertZext";
case ISD::AssertAlign: return "AssertAlign";
case ISD::BasicBlock: return "BasicBlock";
case ISD::VALUETYPE: return "ValueType";
case ISD::Register: return "Register";
case ISD::RegisterMask: return "RegisterMask";
case ISD::Constant:
if (cast<ConstantSDNode>(this)->isOpaque())
return "OpaqueConstant";
return "Constant";
case ISD::ConstantFP: return "ConstantFP";
case ISD::GlobalAddress: return "GlobalAddress";
case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
case ISD::FrameIndex: return "FrameIndex";
case ISD::JumpTable: return "JumpTable";
case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
case ISD::RETURNADDR: return "RETURNADDR";
case ISD::ADDROFRETURNADDR: return "ADDROFRETURNADDR";
case ISD::FRAMEADDR: return "FRAMEADDR";
case ISD::SPONENTRY: return "SPONENTRY";
2016-06-19 20:37:52 +08:00
case ISD::LOCAL_RECOVER: return "LOCAL_RECOVER";
case ISD::READ_REGISTER: return "READ_REGISTER";
case ISD::WRITE_REGISTER: return "WRITE_REGISTER";
case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
Add ISD::EH_DWARF_CFA, simplify @llvm.eh.dwarf.cfa on Mips, fix on PowerPC LLVM has an @llvm.eh.dwarf.cfa intrinsic, used to lower the GCC-compatible __builtin_dwarf_cfa() builtin. As pointed out in PR26761, this is currently broken on PowerPC (and likely on ARM as well). Currently, @llvm.eh.dwarf.cfa is lowered using: ADD(FRAMEADDR, FRAME_TO_ARGS_OFFSET) where FRAME_TO_ARGS_OFFSET defaults to the constant zero. On x86, FRAME_TO_ARGS_OFFSET is lowered to 2*SlotSize. This setup, however, does not work for PowerPC. Because of the way that the stack layout works, the canonical frame address is not exactly (FRAMEADDR + FRAME_TO_ARGS_OFFSET) on PowerPC (there is a lower save-area offset as well), so it is not just a matter of implementing FRAME_TO_ARGS_OFFSET for PowerPC (unless we redefine its semantics -- We can do that, since it is currently used only for @llvm.eh.dwarf.cfa lowering, but the better to directly lower the CFA construct itself (since it can be easily represented as a fixed-offset FrameIndex)). Mips currently does this, but by using a custom lowering for ADD that specifically recognizes the (FRAMEADDR, FRAME_TO_ARGS_OFFSET) pattern. This change introduces a ISD::EH_DWARF_CFA node, which by default expands using the existing logic, but can be directly lowered by the target. Mips is updated to use this method (which simplifies its implementation, and I suspect makes it more robust), and updates PowerPC to do the same. Fixes PR26761. Differential Revision: https://reviews.llvm.org/D24038 llvm-svn: 280350
2016-09-01 18:28:47 +08:00
case ISD::EH_DWARF_CFA: return "EH_DWARF_CFA";
case ISD::EH_RETURN: return "EH_RETURN";
case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
case ISD::EH_SJLJ_SETUP_DISPATCH: return "EH_SJLJ_SETUP_DISPATCH";
case ISD::ConstantPool: return "ConstantPool";
case ISD::TargetIndex: return "TargetIndex";
case ISD::ExternalSymbol: return "ExternalSymbol";
case ISD::BlockAddress: return "BlockAddress";
case ISD::INTRINSIC_WO_CHAIN:
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN: {
unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
if (IID < Intrinsic::num_intrinsics)
return Intrinsic::getBaseName((Intrinsic::ID)IID).str();
if (!G)
return "Unknown intrinsic";
if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
return TII->getName(IID);
llvm_unreachable("Invalid intrinsic ID");
}
case ISD::BUILD_VECTOR: return "BUILD_VECTOR";
case ISD::TargetConstant:
if (cast<ConstantSDNode>(this)->isOpaque())
return "OpaqueTargetConstant";
return "TargetConstant";
case ISD::TargetConstantFP: return "TargetConstantFP";
case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
case ISD::TargetFrameIndex: return "TargetFrameIndex";
case ISD::TargetJumpTable: return "TargetJumpTable";
case ISD::TargetConstantPool: return "TargetConstantPool";
case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
case ISD::MCSymbol: return "MCSymbol";
case ISD::TargetBlockAddress: return "TargetBlockAddress";
case ISD::CopyToReg: return "CopyToReg";
case ISD::CopyFromReg: return "CopyFromReg";
case ISD::UNDEF: return "undef";
case ISD::VSCALE: return "vscale";
case ISD::MERGE_VALUES: return "merge_values";
case ISD::INLINEASM: return "inlineasm";
case ISD::INLINEASM_BR: return "inlineasm_br";
case ISD::EH_LABEL: return "eh_label";
case ISD::ANNOTATION_LABEL: return "annotation_label";
case ISD::HANDLENODE: return "handlenode";
// Unary operators
case ISD::FABS: return "fabs";
case ISD::FMINNUM: return "fminnum";
case ISD::STRICT_FMINNUM: return "strict_fminnum";
case ISD::FMAXNUM: return "fmaxnum";
case ISD::STRICT_FMAXNUM: return "strict_fmaxnum";
case ISD::FMINNUM_IEEE: return "fminnum_ieee";
case ISD::FMAXNUM_IEEE: return "fmaxnum_ieee";
case ISD::FMINIMUM: return "fminimum";
case ISD::STRICT_FMINIMUM: return "strict_fminimum";
case ISD::FMAXIMUM: return "fmaximum";
case ISD::STRICT_FMAXIMUM: return "strict_fmaximum";
case ISD::FNEG: return "fneg";
case ISD::FSQRT: return "fsqrt";
case ISD::STRICT_FSQRT: return "strict_fsqrt";
case ISD::FCBRT: return "fcbrt";
case ISD::FSIN: return "fsin";
case ISD::STRICT_FSIN: return "strict_fsin";
case ISD::FCOS: return "fcos";
case ISD::STRICT_FCOS: return "strict_fcos";
case ISD::FSINCOS: return "fsincos";
case ISD::FTRUNC: return "ftrunc";
case ISD::STRICT_FTRUNC: return "strict_ftrunc";
case ISD::FFLOOR: return "ffloor";
case ISD::STRICT_FFLOOR: return "strict_ffloor";
case ISD::FCEIL: return "fceil";
case ISD::STRICT_FCEIL: return "strict_fceil";
case ISD::FRINT: return "frint";
case ISD::STRICT_FRINT: return "strict_frint";
case ISD::FNEARBYINT: return "fnearbyint";
case ISD::STRICT_FNEARBYINT: return "strict_fnearbyint";
case ISD::FROUND: return "fround";
case ISD::STRICT_FROUND: return "strict_fround";
case ISD::FROUNDEVEN: return "froundeven";
case ISD::STRICT_FROUNDEVEN: return "strict_froundeven";
case ISD::FEXP: return "fexp";
case ISD::STRICT_FEXP: return "strict_fexp";
case ISD::FEXP2: return "fexp2";
case ISD::STRICT_FEXP2: return "strict_fexp2";
case ISD::FLOG: return "flog";
case ISD::STRICT_FLOG: return "strict_flog";
case ISD::FLOG2: return "flog2";
case ISD::STRICT_FLOG2: return "strict_flog2";
case ISD::FLOG10: return "flog10";
case ISD::STRICT_FLOG10: return "strict_flog10";
// Binary operators
case ISD::ADD: return "add";
case ISD::SUB: return "sub";
case ISD::MUL: return "mul";
case ISD::MULHU: return "mulhu";
case ISD::MULHS: return "mulhs";
case ISD::AVGFLOORU: return "avgflooru";
case ISD::AVGFLOORS: return "avgfloors";
case ISD::AVGCEILU: return "avgceilu";
case ISD::AVGCEILS: return "avgceils";
case ISD::ABDS: return "abds";
case ISD::ABDU: return "abdu";
case ISD::SDIV: return "sdiv";
case ISD::UDIV: return "udiv";
case ISD::SREM: return "srem";
case ISD::UREM: return "urem";
case ISD::SMUL_LOHI: return "smul_lohi";
case ISD::UMUL_LOHI: return "umul_lohi";
case ISD::SDIVREM: return "sdivrem";
case ISD::UDIVREM: return "udivrem";
case ISD::AND: return "and";
case ISD::OR: return "or";
case ISD::XOR: return "xor";
case ISD::SHL: return "shl";
case ISD::SRA: return "sra";
case ISD::SRL: return "srl";
case ISD::ROTL: return "rotl";
case ISD::ROTR: return "rotr";
case ISD::FSHL: return "fshl";
case ISD::FSHR: return "fshr";
case ISD::FADD: return "fadd";
case ISD::STRICT_FADD: return "strict_fadd";
case ISD::FSUB: return "fsub";
case ISD::STRICT_FSUB: return "strict_fsub";
case ISD::FMUL: return "fmul";
case ISD::STRICT_FMUL: return "strict_fmul";
case ISD::FDIV: return "fdiv";
case ISD::STRICT_FDIV: return "strict_fdiv";
case ISD::FMA: return "fma";
case ISD::STRICT_FMA: return "strict_fma";
case ISD::FMAD: return "fmad";
case ISD::FREM: return "frem";
case ISD::STRICT_FREM: return "strict_frem";
case ISD::FCOPYSIGN: return "fcopysign";
case ISD::FGETSIGN: return "fgetsign";
case ISD::FCANONICALIZE: return "fcanonicalize";
Intrinsic for checking floating point class This change introduces a new intrinsic, `llvm.is.fpclass`, which checks if the provided floating-point number belongs to any of the the specified value classes. The intrinsic implements the checks made by C standard library functions `isnan`, `isinf`, `isfinite`, `isnormal`, `issubnormal`, `issignaling` and corresponding IEEE-754 operations. The primary motivation for this intrinsic is the support of strict FP mode. In this mode using compare instructions or other FP operations is not possible, because if the value is a signaling NaN, floating-point exception `Invalid` is raised, but the aforementioned functions must never raise exceptions. Currently there are two solutions for this problem, both are implemented partially. One of them is using integer operations to implement the check. It was implemented in https://reviews.llvm.org/D95948 for `isnan`. It solves the problem of exceptions, but offers one solution for all targets, although some can do the check in more efficient way. The other, implemented in https://reviews.llvm.org/D96568, introduced a hook 'clang::TargetCodeGenInfo::testFPKind', which injects a target specific code into IR to implement `isnan` and some other functions. It is convenient for targets that have dedicated instruction to determine FP data class. However using target-specific intrinsic complicates analysis and can prevent some optimizations. A special intrinsic for value class checks allows representing data class tests with enough flexibility. During IR transformations it represents the check in target-independent way and saves it from undesired transformations. In the instruction selector it allows efficient lowering depending on the used target and mode. This implementation is an extended variant of `llvm.isnan` introduced in https://reviews.llvm.org/D104854. It is limited to minimal intrinsic support. Target-specific treatment will be implemented in separate patches. Differential Revision: https://reviews.llvm.org/D112025
2021-10-10 23:46:58 +08:00
case ISD::IS_FPCLASS: return "is_fpclass";
case ISD::FPOW: return "fpow";
case ISD::STRICT_FPOW: return "strict_fpow";
case ISD::SMIN: return "smin";
case ISD::SMAX: return "smax";
case ISD::UMIN: return "umin";
case ISD::UMAX: return "umax";
case ISD::FPOWI: return "fpowi";
case ISD::STRICT_FPOWI: return "strict_fpowi";
case ISD::SETCC: return "setcc";
case ISD::SETCCCARRY: return "setcccarry";
case ISD::STRICT_FSETCC: return "strict_fsetcc";
case ISD::STRICT_FSETCCS: return "strict_fsetccs";
case ISD::SELECT: return "select";
case ISD::VSELECT: return "vselect";
case ISD::SELECT_CC: return "select_cc";
case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt";
case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt";
case ISD::CONCAT_VECTORS: return "concat_vectors";
case ISD::INSERT_SUBVECTOR: return "insert_subvector";
case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
case ISD::VECTOR_SPLICE: return "vector_splice";
case ISD::SPLAT_VECTOR: return "splat_vector";
case ISD::SPLAT_VECTOR_PARTS: return "splat_vector_parts";
case ISD::VECTOR_REVERSE: return "vector_reverse";
[IR][SVE] Add new llvm.experimental.stepvector intrinsic This patch adds a new llvm.experimental.stepvector intrinsic, which takes no arguments and returns a linear integer sequence of values of the form <0, 1, ...>. It is primarily intended for scalable vectors, although it will work for fixed width vectors too. It is intended that later patches will make use of this new intrinsic when vectorising induction variables, currently only supported for fixed width. I've added a new CreateStepVector method to the IRBuilder, which will generate a call to this intrinsic for scalable vectors and fall back on creating a ConstantVector for fixed width. For scalable vectors this intrinsic is lowered to a new ISD node called STEP_VECTOR, which takes a single constant integer argument as the step. During lowering this argument is set to a value of 1. The reason for this additional argument at the codegen level is because in future patches we will introduce various generic DAG combines such as mul step_vector(1), 2 -> step_vector(2) add step_vector(1), step_vector(1) -> step_vector(2) shl step_vector(1), 1 -> step_vector(2) etc. that encourage a canonical format for all targets. This hopefully means all other targets supporting scalable vectors can benefit from this too. I've added cost model tests for both fixed width and scalable vectors: llvm/test/Analysis/CostModel/AArch64/neon-stepvector.ll llvm/test/Analysis/CostModel/AArch64/sve-stepvector.ll as well as codegen lowering tests for fixed width and scalable vectors: llvm/test/CodeGen/AArch64/neon-stepvector.ll llvm/test/CodeGen/AArch64/sve-stepvector.ll See this thread for discussion of the intrinsic: https://lists.llvm.org/pipermail/llvm-dev/2021-January/147943.html
2021-02-08 23:46:24 +08:00
case ISD::STEP_VECTOR: return "step_vector";
case ISD::CARRY_FALSE: return "carry_false";
case ISD::ADDC: return "addc";
case ISD::ADDE: return "adde";
case ISD::ADDCARRY: return "addcarry";
case ISD::SADDO_CARRY: return "saddo_carry";
case ISD::SADDO: return "saddo";
case ISD::UADDO: return "uaddo";
case ISD::SSUBO: return "ssubo";
case ISD::USUBO: return "usubo";
case ISD::SMULO: return "smulo";
case ISD::UMULO: return "umulo";
case ISD::SUBC: return "subc";
case ISD::SUBE: return "sube";
case ISD::SUBCARRY: return "subcarry";
case ISD::SSUBO_CARRY: return "ssubo_carry";
case ISD::SHL_PARTS: return "shl_parts";
case ISD::SRA_PARTS: return "sra_parts";
case ISD::SRL_PARTS: return "srl_parts";
case ISD::SADDSAT: return "saddsat";
case ISD::UADDSAT: return "uaddsat";
case ISD::SSUBSAT: return "ssubsat";
case ISD::USUBSAT: return "usubsat";
case ISD::SSHLSAT: return "sshlsat";
case ISD::USHLSAT: return "ushlsat";
case ISD::SMULFIX: return "smulfix";
case ISD::SMULFIXSAT: return "smulfixsat";
case ISD::UMULFIX: return "umulfix";
case ISD::UMULFIXSAT: return "umulfixsat";
case ISD::SDIVFIX: return "sdivfix";
case ISD::SDIVFIXSAT: return "sdivfixsat";
case ISD::UDIVFIX: return "udivfix";
case ISD::UDIVFIXSAT: return "udivfixsat";
// Conversion operators.
case ISD::SIGN_EXTEND: return "sign_extend";
case ISD::ZERO_EXTEND: return "zero_extend";
case ISD::ANY_EXTEND: return "any_extend";
case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
case ISD::ANY_EXTEND_VECTOR_INREG: return "any_extend_vector_inreg";
case ISD::SIGN_EXTEND_VECTOR_INREG: return "sign_extend_vector_inreg";
[x86] Add a ZERO_EXTEND_VECTOR_INREG DAG node and use it when widening vector types to be legal and a ZERO_EXTEND node is encountered. When we use widening to legalize vector types, extend nodes are a real challenge. Either the input or output is likely to be legal, but in many cases not both. As a consequence, we don't really have any way to represent this situation and the prior code in the widening legalization framework would just scalarize the extend operation completely. This patch introduces a new DAG node to represent doing a zero extend of a vector "in register". The core of the idea is to allow legal but different vector types in the input and output. The output vector must have fewer lanes but wider elements. The operation is defined to zero extend the low elements of the input to the size of the output elements, and drop all of the high elements which don't have a corresponding lane in the output vector. It also includes generic expansion of this node in terms of blending a zero vector into the high elements of the vector and bitcasting across. This in turn yields extremely nice code for x86 SSE2 when we use the new widening legalization logic in conjunction with the new shuffle lowering logic. There is still more to do here. We need to support sign extension, any extension, and potentially int-to-float conversions. My current plan is to continue using similar synthetic nodes to model each of these transitions with generic lowering code for each one. However, with this patch LLVM already reaches performance parity with GCC for the core C loops of the x264 code (assuming you disable the hand-written assembly versions) when compiling for SSE2 and SSE3 architectures and enabling the new widening and lowering logic for vectors. Differential Revision: http://reviews.llvm.org/D4405 llvm-svn: 212610
2014-07-09 18:58:18 +08:00
case ISD::ZERO_EXTEND_VECTOR_INREG: return "zero_extend_vector_inreg";
case ISD::TRUNCATE: return "truncate";
case ISD::FP_ROUND: return "fp_round";
case ISD::STRICT_FP_ROUND: return "strict_fp_round";
case ISD::FP_EXTEND: return "fp_extend";
case ISD::STRICT_FP_EXTEND: return "strict_fp_extend";
case ISD::SINT_TO_FP: return "sint_to_fp";
case ISD::STRICT_SINT_TO_FP: return "strict_sint_to_fp";
case ISD::UINT_TO_FP: return "uint_to_fp";
case ISD::STRICT_UINT_TO_FP: return "strict_uint_to_fp";
case ISD::FP_TO_SINT: return "fp_to_sint";
case ISD::STRICT_FP_TO_SINT: return "strict_fp_to_sint";
case ISD::FP_TO_UINT: return "fp_to_uint";
case ISD::STRICT_FP_TO_UINT: return "strict_fp_to_uint";
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
case ISD::FP_TO_SINT_SAT: return "fp_to_sint_sat";
case ISD::FP_TO_UINT_SAT: return "fp_to_uint_sat";
case ISD::BITCAST: return "bitcast";
case ISD::ADDRSPACECAST: return "addrspacecast";
case ISD::FP16_TO_FP: return "fp16_to_fp";
case ISD::STRICT_FP16_TO_FP: return "strict_fp16_to_fp";
case ISD::FP_TO_FP16: return "fp_to_fp16";
case ISD::STRICT_FP_TO_FP16: return "strict_fp_to_fp16";
case ISD::BF16_TO_FP: return "bf16_to_fp";
case ISD::FP_TO_BF16: return "fp_to_bf16";
case ISD::LROUND: return "lround";
case ISD::STRICT_LROUND: return "strict_lround";
case ISD::LLROUND: return "llround";
case ISD::STRICT_LLROUND: return "strict_llround";
case ISD::LRINT: return "lrint";
case ISD::STRICT_LRINT: return "strict_lrint";
case ISD::LLRINT: return "llrint";
case ISD::STRICT_LLRINT: return "strict_llrint";
// Control flow instructions
case ISD::BR: return "br";
case ISD::BRIND: return "brind";
case ISD::BR_JT: return "br_jt";
case ISD::BRCOND: return "brcond";
case ISD::BR_CC: return "br_cc";
case ISD::CALLSEQ_START: return "callseq_start";
case ISD::CALLSEQ_END: return "callseq_end";
// EH instructions
case ISD::CATCHRET: return "catchret";
case ISD::CLEANUPRET: return "cleanupret";
// Other operators
case ISD::LOAD: return "load";
case ISD::STORE: return "store";
case ISD::MLOAD: return "masked_load";
case ISD::MSTORE: return "masked_store";
case ISD::MGATHER: return "masked_gather";
case ISD::MSCATTER: return "masked_scatter";
case ISD::VAARG: return "vaarg";
case ISD::VACOPY: return "vacopy";
case ISD::VAEND: return "vaend";
case ISD::VASTART: return "vastart";
case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc";
case ISD::EXTRACT_ELEMENT: return "extract_element";
case ISD::BUILD_PAIR: return "build_pair";
case ISD::STACKSAVE: return "stacksave";
case ISD::STACKRESTORE: return "stackrestore";
case ISD::TRAP: return "trap";
case ISD::DEBUGTRAP: return "debugtrap";
case ISD::UBSANTRAP: return "ubsantrap";
case ISD::LIFETIME_START: return "lifetime.start";
case ISD::LIFETIME_END: return "lifetime.end";
[CSSPGO] MIR target-independent pseudo instruction for pseudo-probe intrinsic This change introduces a MIR target-independent pseudo instruction corresponding to the IR intrinsic llvm.pseudoprobe for pseudo-probe block instrumentation. Please refer to https://reviews.llvm.org/D86193 for the whole story. An `llvm.pseudoprobe` intrinsic call will be lowered into a target-independent operation named `PSEUDO_PROBE`. Given the following instrumented IR, ``` define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 { bb0: %cmp = icmp eq i32 %x, 0 call void @llvm.pseudoprobe(i64 837061429793323041, i64 1) br i1 %cmp, label %bb1, label %bb2 bb1: call void @llvm.pseudoprobe(i64 837061429793323041, i64 2) br label %bb3 bb2: call void @llvm.pseudoprobe(i64 837061429793323041, i64 3) br label %bb3 bb3: call void @llvm.pseudoprobe(i64 837061429793323041, i64 4) ret void } ``` the corresponding MIR is shown below. Note that block `bb3` is duplicated into `bb1` and `bb2` where its probe is duplicated too. This allows for an accurate execution count to be collected for `bb3`, which is basically the sum of the counts of `bb1` and `bb2`. ``` bb.0.bb0: frame-setup PUSH64r undef $rax, implicit-def $rsp, implicit $rsp TEST32rr killed renamable $edi, renamable $edi, implicit-def $eflags PSEUDO_PROBE 837061429793323041, 1, 0 $edi = MOV32ri 1, debug-location !13; test.c:0 JCC_1 %bb.1, 4, implicit $eflags bb.2.bb2: PSEUDO_PROBE 837061429793323041, 3, 0 PSEUDO_PROBE 837061429793323041, 4, 0 $rax = frame-destroy POP64r implicit-def $rsp, implicit $rsp RETQ bb.1.bb1: PSEUDO_PROBE 837061429793323041, 2, 0 PSEUDO_PROBE 837061429793323041, 4, 0 $rax = frame-destroy POP64r implicit-def $rsp, implicit $rsp RETQ ``` The target op PSEUDO_PROBE will be converted into a piece of binary data by the object emitter with no machine instructions generated. This is done in a different patch. Reviewed By: wmi Differential Revision: https://reviews.llvm.org/D86495
2020-09-11 02:21:19 +08:00
case ISD::PSEUDO_PROBE:
return "pseudoprobe";
case ISD::GC_TRANSITION_START: return "gc_transition.start";
case ISD::GC_TRANSITION_END: return "gc_transition.end";
case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset";
case ISD::FREEZE: return "freeze";
Reland [X86] Codegen for preallocated See https://reviews.llvm.org/D74651 for the preallocated IR constructs and LangRef changes. In X86TargetLowering::LowerCall(), if a call is preallocated, record each argument's offset from the stack pointer and the total stack adjustment. Associate the call Value with an integer index. Store the info in X86MachineFunctionInfo with the integer index as the key. This adds two new target independent ISDOpcodes and two new target dependent Opcodes corresponding to @llvm.call.preallocated.{setup,arg}. The setup ISelDAG node takes in a chain and outputs a chain and a SrcValue of the preallocated call Value. It is lowered to a target dependent node with the SrcValue replaced with the integer index key by looking in X86MachineFunctionInfo. In X86TargetLowering::EmitInstrWithCustomInserter() this is lowered to an %esp adjustment, the exact amount determined by looking in X86MachineFunctionInfo with the integer index key. The arg ISelDAG node takes in a chain, a SrcValue of the preallocated call Value, and the arg index int constant. It produces a chain and the pointer fo the arg. It is lowered to a target dependent node with the SrcValue replaced with the integer index key by looking in X86MachineFunctionInfo. In X86TargetLowering::EmitInstrWithCustomInserter() this is lowered to a lea of the stack pointer plus an offset determined by looking in X86MachineFunctionInfo with the integer index key. Force any function containing a preallocated call to use the frame pointer. Does not yet handle a setup without a call, or a conditional call. Does not yet handle musttail. That requires a LangRef change first. Tried to look at all references to inalloca and see if they apply to preallocated. I've made preallocated versions of tests testing inalloca whenever possible and when they make sense (e.g. not alloca related, inalloca edge cases). Aside from the tests added here, I checked that this codegen produces correct code for something like ``` struct A { A(); A(A&&); ~A(); }; void bar() { foo(foo(foo(foo(foo(A(), 4), 5), 6), 7), 8); } ``` by replacing the inalloca version of the .ll file with the appropriate preallocated code. Running the executable produces the same results as using the current inalloca implementation. Reverted due to unexpectedly passing tests, added REQUIRES: asserts for reland. Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D77689
2020-03-17 03:32:36 +08:00
case ISD::PREALLOCATED_SETUP:
return "call_setup";
case ISD::PREALLOCATED_ARG:
return "call_alloc";
[FPEnv] Intrinsic for setting rounding mode To set non-default rounding mode user usually calls function 'fesetround' from standard C library. This way has some disadvantages. * It creates unnecessary dependency on libc. On the other hand, setting rounding mode requires few instructions and could be made by compiler. Sometimes standard C library even is not available, like in the case of GPU or AI cores that execute small kernels. * Compiler could generate more effective code if it knows that a particular call just sets rounding mode. This change introduces new IR intrinsic, namely 'llvm.set.rounding', which sets current rounding mode, similar to 'fesetround'. It however differs from the latter, because it is a lower level facility: * 'llvm.set.rounding' does not return any value, whereas 'fesetround' returns non-zero value in the case of failure. In glibc 'fesetround' reports failure if its argument is invalid or unsupported or if floating point operations are unavailable on the hardware. Compiler usually knows what core it generates code for and it can validate arguments in many cases. * Rounding mode is specified in 'fesetround' using constants like 'FE_TONEAREST', which are target dependent. It is inconvenient to work with such constants at IR level. C standard provides a target-independent way to specify rounding mode, it is used in FLT_ROUNDS, however it does not define standard way to set rounding mode using this encoding. This change implements only IR intrinsic. Lowering it to machine code is target-specific and will be implemented latter. Mapping of 'fesetround' to 'llvm.set.rounding' is also not implemented here. Differential Revision: https://reviews.llvm.org/D74729
2020-02-03 18:44:42 +08:00
// Floating point environment manipulation
case ISD::FLT_ROUNDS_: return "flt_rounds";
case ISD::SET_ROUNDING: return "set_rounding";
// Bit manipulation
case ISD::ABS: return "abs";
case ISD::BITREVERSE: return "bitreverse";
case ISD::BSWAP: return "bswap";
case ISD::CTPOP: return "ctpop";
case ISD::CTTZ: return "cttz";
case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef";
case ISD::CTLZ: return "ctlz";
case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef";
case ISD::PARITY: return "parity";
// Trampolines
case ISD::INIT_TRAMPOLINE: return "init_trampoline";
case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline";
case ISD::CONDCODE:
switch (cast<CondCodeSDNode>(this)->get()) {
default: llvm_unreachable("Unknown setcc condition!");
case ISD::SETOEQ: return "setoeq";
case ISD::SETOGT: return "setogt";
case ISD::SETOGE: return "setoge";
case ISD::SETOLT: return "setolt";
case ISD::SETOLE: return "setole";
case ISD::SETONE: return "setone";
case ISD::SETO: return "seto";
case ISD::SETUO: return "setuo";
case ISD::SETUEQ: return "setueq";
case ISD::SETUGT: return "setugt";
case ISD::SETUGE: return "setuge";
case ISD::SETULT: return "setult";
case ISD::SETULE: return "setule";
case ISD::SETUNE: return "setune";
case ISD::SETEQ: return "seteq";
case ISD::SETGT: return "setgt";
case ISD::SETGE: return "setge";
case ISD::SETLT: return "setlt";
case ISD::SETLE: return "setle";
case ISD::SETNE: return "setne";
case ISD::SETTRUE: return "settrue";
case ISD::SETTRUE2: return "settrue2";
case ISD::SETFALSE: return "setfalse";
case ISD::SETFALSE2: return "setfalse2";
}
case ISD::VECREDUCE_FADD: return "vecreduce_fadd";
case ISD::VECREDUCE_SEQ_FADD: return "vecreduce_seq_fadd";
case ISD::VECREDUCE_FMUL: return "vecreduce_fmul";
case ISD::VECREDUCE_SEQ_FMUL: return "vecreduce_seq_fmul";
case ISD::VECREDUCE_ADD: return "vecreduce_add";
case ISD::VECREDUCE_MUL: return "vecreduce_mul";
case ISD::VECREDUCE_AND: return "vecreduce_and";
case ISD::VECREDUCE_OR: return "vecreduce_or";
case ISD::VECREDUCE_XOR: return "vecreduce_xor";
case ISD::VECREDUCE_SMAX: return "vecreduce_smax";
case ISD::VECREDUCE_SMIN: return "vecreduce_smin";
case ISD::VECREDUCE_UMAX: return "vecreduce_umax";
case ISD::VECREDUCE_UMIN: return "vecreduce_umin";
case ISD::VECREDUCE_FMAX: return "vecreduce_fmax";
case ISD::VECREDUCE_FMIN: return "vecreduce_fmin";
// Vector Predication
#define BEGIN_REGISTER_VP_SDNODE(SDID, LEGALARG, NAME, ...) \
case ISD::SDID: \
return #NAME;
#include "llvm/IR/VPIntrinsics.def"
}
}
const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
switch (AM) {
default: return "";
case ISD::PRE_INC: return "<pre-inc>";
case ISD::PRE_DEC: return "<pre-dec>";
case ISD::POST_INC: return "<post-inc>";
case ISD::POST_DEC: return "<post-dec>";
}
}
static Printable PrintNodeId(const SDNode &Node) {
return Printable([&Node](raw_ostream &OS) {
#ifndef NDEBUG
OS << 't' << Node.PersistentId;
#else
OS << (const void*)&Node;
#endif
});
}
// Print the MMO with more information from the SelectionDAG.
static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO,
const MachineFunction *MF, const Module *M,
const MachineFrameInfo *MFI,
const TargetInstrInfo *TII, LLVMContext &Ctx) {
ModuleSlotTracker MST(M);
if (MF)
MST.incorporateFunction(MF->getFunction());
SmallVector<StringRef, 0> SSNs;
MMO.print(OS, MST, SSNs, Ctx, MFI, TII);
}
static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO,
const SelectionDAG *G) {
if (G) {
const MachineFunction *MF = &G->getMachineFunction();
return printMemOperand(OS, MMO, MF, MF->getFunction().getParent(),
&MF->getFrameInfo(),
G->getSubtarget().getInstrInfo(), *G->getContext());
}
LLVMContext Ctx;
return printMemOperand(OS, MMO, /*MF=*/nullptr, /*M=*/nullptr,
/*MFI=*/nullptr, /*TII=*/nullptr, Ctx);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void SDNode::dump() const { dump(nullptr); }
LLVM_DUMP_METHOD void SDNode::dump(const SelectionDAG *G) const {
print(dbgs(), G);
dbgs() << '\n';
}
#endif
void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
if (i) OS << ",";
if (getValueType(i) == MVT::Other)
OS << "ch";
else
OS << getValueType(i).getEVTString();
}
}
void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
if (getFlags().hasNoUnsignedWrap())
OS << " nuw";
if (getFlags().hasNoSignedWrap())
OS << " nsw";
if (getFlags().hasExact())
OS << " exact";
if (getFlags().hasNoNaNs())
OS << " nnan";
if (getFlags().hasNoInfs())
OS << " ninf";
if (getFlags().hasNoSignedZeros())
OS << " nsz";
if (getFlags().hasAllowReciprocal())
OS << " arcp";
if (getFlags().hasAllowContract())
OS << " contract";
if (getFlags().hasApproximateFuncs())
OS << " afn";
if (getFlags().hasAllowReassociation())
OS << " reassoc";
[FPEnv] Default NoFPExcept SDNodeFlag to false The NoFPExcept bit in SDNodeFlags currently defaults to true, unlike all other such flags. This is a problem, because it implies that all code that transforms SDNodes without copying flags can introduce a correctness bug, not just a missed optimization. This patch changes the default to false. This makes it necessary to move setting the (No)FPExcept flag for constrained intrinsics from the visitConstrainedIntrinsic routine to the generic visit routine at the place where the other flags are set, or else the intersectFlagsWith call would erase the NoFPExcept flag again. In order to avoid making non-strict FP code worse, whenever SelectionDAGISel::SelectCodeCommon matches on a set of orignal nodes none of which can raise FP exceptions, it will preserve this property on all results nodes generated, by setting the NoFPExcept flag on those result nodes that would otherwise be considered as raising an FP exception. To check whether or not an SD node should be considered as raising an FP exception, the following logic applies: - For machine nodes, check the mayRaiseFPException property of the underlying MI instruction - For regular nodes, check isStrictFPOpcode - For target nodes, check a newly introduced isTargetStrictFPOpcode The latter is implemented by reserving a range of target opcodes, similarly to how memory opcodes are identified. (Note that there a bit of a quirk in identifying target nodes that are both memory nodes and strict FP nodes. To simplify the logic, right now all target memory nodes are automatically also considered strict FP nodes -- this could be fixed by adding one more range.) Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D71841
2020-01-02 23:54:49 +08:00
if (getFlags().hasNoFPExcept())
OS << " nofpexcept";
if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
if (!MN->memoperands_empty()) {
OS << "<";
OS << "Mem:";
for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
e = MN->memoperands_end(); i != e; ++i) {
printMemOperand(OS, **i, G);
if (std::next(i) != e)
OS << " ";
}
OS << ">";
}
} else if (const ShuffleVectorSDNode *SVN =
dyn_cast<ShuffleVectorSDNode>(this)) {
OS << "<";
for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
int Idx = SVN->getMaskElt(i);
if (i) OS << ",";
if (Idx < 0)
OS << "u";
else
OS << Idx;
}
OS << ">";
} else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
OS << '<' << CSDN->getAPIntValue() << '>';
} else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
if (&CSDN->getValueAPF().getSemantics() == &APFloat::IEEEsingle())
OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
else if (&CSDN->getValueAPF().getSemantics() == &APFloat::IEEEdouble())
OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
else {
OS << "<APFloat(";
CSDN->getValueAPF().bitcastToAPInt().print(OS, false);
OS << ")>";
}
} else if (const GlobalAddressSDNode *GADN =
dyn_cast<GlobalAddressSDNode>(this)) {
int64_t offset = GADN->getOffset();
OS << '<';
GADN->getGlobal()->printAsOperand(OS);
OS << '>';
if (offset > 0)
OS << " + " << offset;
else
OS << " " << offset;
if (unsigned int TF = GADN->getTargetFlags())
OS << " [TF=" << TF << ']';
} else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
OS << "<" << FIDN->getIndex() << ">";
} else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
OS << "<" << JTDN->getIndex() << ">";
if (unsigned int TF = JTDN->getTargetFlags())
OS << " [TF=" << TF << ']';
} else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
int offset = CP->getOffset();
if (CP->isMachineConstantPoolEntry())
OS << "<" << *CP->getMachineCPVal() << ">";
else
OS << "<" << *CP->getConstVal() << ">";
if (offset > 0)
OS << " + " << offset;
else
OS << " " << offset;
if (unsigned int TF = CP->getTargetFlags())
OS << " [TF=" << TF << ']';
} else if (const TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(this)) {
OS << "<" << TI->getIndex() << '+' << TI->getOffset() << ">";
if (unsigned TF = TI->getTargetFlags())
OS << " [TF=" << TF << ']';
} else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
OS << "<";
const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
if (LBB)
OS << LBB->getName() << " ";
OS << (const void*)BBDN->getBasicBlock() << ">";
} else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
OS << ' ' << printReg(R->getReg(),
G ? G->getSubtarget().getRegisterInfo() : nullptr);
} else if (const ExternalSymbolSDNode *ES =
dyn_cast<ExternalSymbolSDNode>(this)) {
OS << "'" << ES->getSymbol() << "'";
if (unsigned int TF = ES->getTargetFlags())
OS << " [TF=" << TF << ']';
} else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
if (M->getValue())
OS << "<" << M->getValue() << ">";
else
OS << "<null>";
} else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) {
if (MD->getMD())
OS << "<" << MD->getMD() << ">";
else
OS << "<null>";
} else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
OS << ":" << N->getVT().getEVTString();
}
else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
OS << "<";
printMemOperand(OS, *LD->getMemOperand(), G);
bool doExt = true;
switch (LD->getExtensionType()) {
default: doExt = false; break;
case ISD::EXTLOAD: OS << ", anyext"; break;
case ISD::SEXTLOAD: OS << ", sext"; break;
case ISD::ZEXTLOAD: OS << ", zext"; break;
}
if (doExt)
OS << " from " << LD->getMemoryVT().getEVTString();
const char *AM = getIndexedModeName(LD->getAddressingMode());
if (*AM)
OS << ", " << AM;
OS << ">";
} else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
OS << "<";
printMemOperand(OS, *ST->getMemOperand(), G);
if (ST->isTruncatingStore())
OS << ", trunc to " << ST->getMemoryVT().getEVTString();
const char *AM = getIndexedModeName(ST->getAddressingMode());
if (*AM)
OS << ", " << AM;
OS << ">";
} else if (const MaskedLoadSDNode *MLd = dyn_cast<MaskedLoadSDNode>(this)) {
OS << "<";
printMemOperand(OS, *MLd->getMemOperand(), G);
bool doExt = true;
switch (MLd->getExtensionType()) {
default: doExt = false; break;
case ISD::EXTLOAD: OS << ", anyext"; break;
case ISD::SEXTLOAD: OS << ", sext"; break;
case ISD::ZEXTLOAD: OS << ", zext"; break;
}
if (doExt)
OS << " from " << MLd->getMemoryVT().getEVTString();
const char *AM = getIndexedModeName(MLd->getAddressingMode());
if (*AM)
OS << ", " << AM;
if (MLd->isExpandingLoad())
OS << ", expanding";
OS << ">";
} else if (const MaskedStoreSDNode *MSt = dyn_cast<MaskedStoreSDNode>(this)) {
OS << "<";
printMemOperand(OS, *MSt->getMemOperand(), G);
if (MSt->isTruncatingStore())
OS << ", trunc to " << MSt->getMemoryVT().getEVTString();
const char *AM = getIndexedModeName(MSt->getAddressingMode());
if (*AM)
OS << ", " << AM;
if (MSt->isCompressingStore())
OS << ", compressing";
OS << ">";
} else if (const auto *MGather = dyn_cast<MaskedGatherSDNode>(this)) {
OS << "<";
printMemOperand(OS, *MGather->getMemOperand(), G);
bool doExt = true;
switch (MGather->getExtensionType()) {
default: doExt = false; break;
case ISD::EXTLOAD: OS << ", anyext"; break;
case ISD::SEXTLOAD: OS << ", sext"; break;
case ISD::ZEXTLOAD: OS << ", zext"; break;
}
if (doExt)
OS << " from " << MGather->getMemoryVT().getEVTString();
auto Signed = MGather->isIndexSigned() ? "signed" : "unsigned";
auto Scaled = MGather->isIndexScaled() ? "scaled" : "unscaled";
OS << ", " << Signed << " " << Scaled << " offset";
OS << ">";
} else if (const auto *MScatter = dyn_cast<MaskedScatterSDNode>(this)) {
OS << "<";
printMemOperand(OS, *MScatter->getMemOperand(), G);
if (MScatter->isTruncatingStore())
OS << ", trunc to " << MScatter->getMemoryVT().getEVTString();
auto Signed = MScatter->isIndexSigned() ? "signed" : "unsigned";
auto Scaled = MScatter->isIndexScaled() ? "scaled" : "unscaled";
OS << ", " << Signed << " " << Scaled << " offset";
OS << ">";
} else if (const MemSDNode *M = dyn_cast<MemSDNode>(this)) {
OS << "<";
printMemOperand(OS, *M->getMemOperand(), G);
OS << ">";
} else if (const BlockAddressSDNode *BA =
dyn_cast<BlockAddressSDNode>(this)) {
int64_t offset = BA->getOffset();
OS << "<";
BA->getBlockAddress()->getFunction()->printAsOperand(OS, false);
OS << ", ";
BA->getBlockAddress()->getBasicBlock()->printAsOperand(OS, false);
OS << ">";
if (offset > 0)
OS << " + " << offset;
else
OS << " " << offset;
if (unsigned int TF = BA->getTargetFlags())
OS << " [TF=" << TF << ']';
} else if (const AddrSpaceCastSDNode *ASC =
dyn_cast<AddrSpaceCastSDNode>(this)) {
OS << '['
<< ASC->getSrcAddressSpace()
<< " -> "
<< ASC->getDestAddressSpace()
<< ']';
} else if (const LifetimeSDNode *LN = dyn_cast<LifetimeSDNode>(this)) {
if (LN->hasOffset())
OS << "<" << LN->getOffset() << " to " << LN->getOffset() + LN->getSize() << ">";
} else if (const auto *AA = dyn_cast<AssertAlignSDNode>(this)) {
OS << '<' << AA->getAlign().value() << '>';
}
if (VerboseDAGDumping) {
if (unsigned Order = getIROrder())
OS << " [ORD=" << Order << ']';
if (getNodeId() != -1)
OS << " [ID=" << getNodeId() << ']';
if (!(isa<ConstantSDNode>(this) || (isa<ConstantFPSDNode>(this))))
OS << " # D:" << isDivergent();
if (G && !G->GetDbgValues(this).empty()) {
OS << " [NoOfDbgValues=" << G->GetDbgValues(this).size() << ']';
for (SDDbgValue *Dbg : G->GetDbgValues(this))
if (!Dbg->isInvalidated())
Dbg->print(OS);
} else if (getHasDebugValue())
OS << " [NoOfDbgValues>0]";
}
}
LLVM_DUMP_METHOD void SDDbgValue::print(raw_ostream &OS) const {
OS << " DbgVal(Order=" << getOrder() << ')';
if (isInvalidated())
OS << "(Invalidated)";
if (isEmitted())
OS << "(Emitted)";
OS << "(";
bool Comma = false;
for (const SDDbgOperand &Op : getLocationOps()) {
if (Comma)
OS << ", ";
switch (Op.getKind()) {
case SDDbgOperand::SDNODE:
if (Op.getSDNode())
OS << "SDNODE=" << PrintNodeId(*Op.getSDNode()) << ':' << Op.getResNo();
else
OS << "SDNODE";
break;
case SDDbgOperand::CONST:
OS << "CONST";
break;
case SDDbgOperand::FRAMEIX:
OS << "FRAMEIX=" << Op.getFrameIx();
break;
case SDDbgOperand::VREG:
OS << "VREG=" << Op.getVReg();
break;
}
Comma = true;
}
OS << ")";
if (isIndirect()) OS << "(Indirect)";
if (isVariadic())
OS << "(Variadic)";
OS << ":\"" << Var->getName() << '"';
2018-09-15 01:32:52 +08:00
#ifndef NDEBUG
if (Expr->getNumElements())
Expr->dump();
2018-09-15 01:32:52 +08:00
#endif
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void SDDbgValue::dump() const {
if (isInvalidated())
return;
print(dbgs());
dbgs() << "\n";
}
#endif
/// Return true if this node is so simple that we should just print it inline
/// if it appears as an operand.
static bool shouldPrintInline(const SDNode &Node, const SelectionDAG *G) {
// Avoid lots of cluttering when inline printing nodes with associated
// DbgValues in verbose mode.
if (VerboseDAGDumping && G && !G->GetDbgValues(&Node).empty())
return false;
if (Node.getOpcode() == ISD::EntryToken)
return false;
return Node.getNumOperands() == 0;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
for (const SDValue &Op : N->op_values()) {
if (shouldPrintInline(*Op.getNode(), G))
continue;
if (Op.getNode()->hasOneUse())
DumpNodes(Op.getNode(), indent+2, G);
}
dbgs().indent(indent);
N->dump(G);
}
LLVM_DUMP_METHOD void SelectionDAG::dump() const {
dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:\n";
for (const SDNode &N : allnodes()) {
if (!N.hasOneUse() && &N != getRoot().getNode() &&
(!shouldPrintInline(N, this) || N.use_empty()))
DumpNodes(&N, 2, this);
}
if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
dbgs() << "\n";
if (VerboseDAGDumping) {
if (DbgBegin() != DbgEnd())
dbgs() << "SDDbgValues:\n";
for (auto *Dbg : make_range(DbgBegin(), DbgEnd()))
Dbg->dump();
if (ByvalParmDbgBegin() != ByvalParmDbgEnd())
dbgs() << "Byval SDDbgValues:\n";
for (auto *Dbg : make_range(ByvalParmDbgBegin(), ByvalParmDbgEnd()))
Dbg->dump();
}
dbgs() << "\n";
}
#endif
void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
OS << PrintNodeId(*this) << ": ";
print_types(OS, G);
OS << " = " << getOperationName(G);
print_details(OS, G);
}
static bool printOperand(raw_ostream &OS, const SelectionDAG *G,
const SDValue Value) {
[X86] Part 1 to fix x86-64 fp128 calling convention. Almost all these changes are conditioned and only apply to the new x86-64 f128 type configuration, which will be enabled in a follow up patch. They are required together to make new f128 work. If there is any error, we should fix or revert them as a whole. These changes should have no impact to current configurations. * Relax type legalization checks to accept new f128 type configuration, whose TypeAction is TypeSoftenFloat, not TypeLegal, but also has TLI.isTypeLegal true. * Relax GetSoftenedFloat to return in some cases f128 type SDValue, which is TLI.isTypeLegal but not "softened" to i128 node. * Allow customized FABS, FNEG, FCOPYSIGN on new f128 type configuration, to generate optimized bitwise operators for libm functions. * Enhance related Lower* functions to handle f128 type. * Enhance DAGTypeLegalizer::run, SoftenFloatResult, and related functions to keep new f128 type in register, and convert f128 operators to library calls. * Fix Combiner, Emitter, Legalizer routines that did not handle f128 type. * Add ExpandConstant to handle i128 constants, ExpandNode to handle ISD::Constant node. * Add one more parameter to getCommonSubClass and firstCommonClass, to guarantee that returned common sub class will contain the specified simple value type. This extra parameter is used by EmitCopyFromReg in InstrEmitter.cpp. * Fix infinite loop in getTypeLegalizationCost when f128 is the value type. * Fix printOperand to handle null operand. * Enhance ISD::BITCAST node to handle f128 constant. * Expand new f128 type for BR_CC, SELECT_CC, SELECT, SETCC nodes. * Enhance X86AsmPrinter to emit f128 values in comments. Differential Revision: http://reviews.llvm.org/D15134 llvm-svn: 254653
2015-12-04 06:02:40 +08:00
if (!Value.getNode()) {
OS << "<null>";
return false;
}
if (shouldPrintInline(*Value.getNode(), G)) {
OS << Value->getOperationName(G) << ':';
Value->print_types(OS, G);
Value->print_details(OS, G);
return true;
}
OS << PrintNodeId(*Value.getNode());
if (unsigned RN = Value.getResNo())
OS << ':' << RN;
return false;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
using VisitedSDNodeSet = SmallPtrSet<const SDNode *, 32>;
static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
const SelectionDAG *G, VisitedSDNodeSet &once) {
if (!once.insert(N).second) // If we've been here before, return now.
return;
// Dump the current SDNode, but don't end the line yet.
OS.indent(indent);
N->printr(OS, G);
// Having printed this SDNode, walk the children:
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
if (i) OS << ",";
OS << " ";
const SDValue Op = N->getOperand(i);
bool printedInline = printOperand(OS, G, Op);
if (printedInline)
once.insert(Op.getNode());
}
OS << "\n";
// Dump children that have grandchildren on their own line(s).
for (const SDValue &Op : N->op_values())
DumpNodesr(OS, Op.getNode(), indent+2, G, once);
}
LLVM_DUMP_METHOD void SDNode::dumpr() const {
VisitedSDNodeSet once;
DumpNodesr(dbgs(), this, 0, nullptr, once);
}
LLVM_DUMP_METHOD void SDNode::dumpr(const SelectionDAG *G) const {
VisitedSDNodeSet once;
DumpNodesr(dbgs(), this, 0, G, once);
}
#endif
static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
const SelectionDAG *G, unsigned depth,
unsigned indent) {
if (depth == 0)
return;
OS.indent(indent);
N->print(OS, G);
for (const SDValue &Op : N->op_values()) {
// Don't follow chain operands.
if (Op.getValueType() == MVT::Other)
continue;
OS << '\n';
printrWithDepthHelper(OS, Op.getNode(), G, depth - 1, indent + 2);
}
}
void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
unsigned depth) const {
printrWithDepthHelper(OS, this, G, depth, 0);
}
void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
// Don't print impossibly deep things.
printrWithDepth(OS, G, 10);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD
void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
printrWithDepth(dbgs(), G, depth);
}
LLVM_DUMP_METHOD void SDNode::dumprFull(const SelectionDAG *G) const {
// Don't print impossibly deep things.
dumprWithDepth(G, 10);
}
#endif
void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
printr(OS, G);
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
if (i) OS << ", "; else OS << " ";
printOperand(OS, G, getOperand(i));
}
if (DebugLoc DL = getDebugLoc()) {
OS << ", ";
DL.print(OS);
}
}