forked from OSchip/llvm-project
Add pass to support VX/VF instruction generation
Summary: Add pass to support VX/VF instruction generation Test Plan: none Reviewers: hualin Reviewed By: hualin Subscribers: yanming Differential Revision: http://www.tpt.com/D583
This commit is contained in:
parent
bbe7987a03
commit
47fc50bb92
10
.arcconfig
10
.arcconfig
|
@ -1,8 +1,4 @@
|
|||
{
|
||||
"phabricator.uri" : "https://reviews.llvm.org/",
|
||||
"repository.callsign" : "G",
|
||||
"conduit_uri" : "https://reviews.llvm.org/",
|
||||
"base": "git:HEAD^",
|
||||
"arc.land.onto.default": "main",
|
||||
"arc.land.onto": ["main"]
|
||||
}
|
||||
"phabricator.uri" : "http://www.tpt.com",
|
||||
"notification" : "The code review's uri is Terapines' private network uri"
|
||||
}
|
|
@ -40,6 +40,7 @@ add_llvm_target(RISCVCodeGen
|
|||
RISCVTargetObjectFile.cpp
|
||||
RISCVTargetTransformInfo.cpp
|
||||
VentusRegextInsertion.cpp
|
||||
VentusVVInstrConversion.cpp
|
||||
GISel/RISCVCallLowering.cpp
|
||||
GISel/RISCVInstructionSelector.cpp
|
||||
GISel/RISCVLegalizerInfo.cpp
|
||||
|
|
|
@ -95,6 +95,10 @@ enum {
|
|||
// compiler has free to select either one.
|
||||
UsesMaskPolicyShift = IsRVVWideningReductionShift + 1,
|
||||
UsesMaskPolicyMask = 1 << UsesMaskPolicyShift,
|
||||
|
||||
// Check if this instruction meets the format of RVInstVV
|
||||
IsVVALUInstrShift = UsesMaskPolicyShift + 1,
|
||||
IsVVALUInstrMask = 1 << IsVVALUInstrShift,
|
||||
};
|
||||
|
||||
// Match with the definitions in RISCVInstrFormats.td
|
||||
|
@ -132,6 +136,12 @@ static inline VConstraintType getConstraint(uint64_t TSFlags) {
|
|||
static inline bool hasDummyMaskOp(uint64_t TSFlags) {
|
||||
return TSFlags & HasDummyMaskOpMask;
|
||||
}
|
||||
|
||||
/// \returns true if the instruction meets the format of RVInstVV
|
||||
static inline bool isVVALUInstr(uint64_t TSFlags) {
|
||||
return TSFlags & IsVVALUInstrMask;
|
||||
}
|
||||
|
||||
/// \returns true if tail agnostic is enforced for the instruction.
|
||||
static inline bool doesForceTailAgnostic(uint64_t TSFlags) {
|
||||
return TSFlags & ForceTailAgnosticMask;
|
||||
|
|
|
@ -69,6 +69,9 @@ void initializeRISCVRedundantCopyEliminationPass(PassRegistry &);
|
|||
FunctionPass *createVentusRegextInsertionPass();
|
||||
void initializeVentusRegextInsertionPass(PassRegistry &);
|
||||
|
||||
FunctionPass *createVentusVVInstrConversionPass();
|
||||
void initializeVentusVVInstrConversionPass(PassRegistry &);
|
||||
|
||||
InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &,
|
||||
RISCVSubtarget &,
|
||||
RISCVRegisterBankInfo &);
|
||||
|
|
|
@ -205,6 +205,9 @@ class RVInst<dag outs, dag ins, string opcodestr, string argstr,
|
|||
|
||||
bit UsesMaskPolicy = 0;
|
||||
let TSFlags{18} = UsesMaskPolicy;
|
||||
|
||||
bit IsVVALUInstr = 0;
|
||||
let TSFlags{19} = IsVVALUInstr;
|
||||
}
|
||||
|
||||
// Pseudo instructions
|
||||
|
|
|
@ -290,6 +290,7 @@ void RISCVPassConfig::addPreRegAlloc() {
|
|||
addPass(createRISCVPreRAExpandPseudoPass());
|
||||
if (TM->getOptLevel() != CodeGenOpt::None)
|
||||
addPass(createRISCVMergeBaseOffsetOptPass());
|
||||
addPass(createVentusVVInstrConversionPass());
|
||||
}
|
||||
|
||||
void RISCVPassConfig::addPostRegAlloc() {
|
||||
|
|
|
@ -77,6 +77,7 @@ class RVInstVV<bits<6> funct6, RISCVVFormat opv, dag outs, dag ins,
|
|||
let Inst{14-12} = opv.Value;
|
||||
let Inst{11-7} = vd;
|
||||
let Opcode = OPC_OP_V.Value;
|
||||
let IsVVALUInstr = 1;
|
||||
}
|
||||
|
||||
// vALU branch
|
||||
|
|
|
@ -0,0 +1,263 @@
|
|||
//===-- VentusVVInstrConversion.cpp - VV instruction conversion -----------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains a pass that convert vop.vv instructions to vop.vx/vf
|
||||
// instructions because currently, the objects stored in sGPR and sGPRF32 will
|
||||
// be moved to VGPR in divergent nodes, so the patterns which match VX/VF
|
||||
// instructions will not be matched
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "MCTargetDesc/RISCVBaseInfo.h"
|
||||
#include "MCTargetDesc/RISCVMCTargetDesc.h"
|
||||
#include "RISCV.h"
|
||||
#include "RISCVInstrInfo.h"
|
||||
#include "RISCVRegisterInfo.h"
|
||||
#include "RISCVTargetMachine.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/CodeGen/ISDOpcodes.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineOperand.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/Register.h"
|
||||
#include "llvm/CodeGen/TargetRegisterInfo.h"
|
||||
#include "llvm/IR/Instruction.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/Error.h"
|
||||
#include "llvm/TableGen/Record.h"
|
||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||||
|
||||
#define VENTUS_VV_INSTRUCTION_CONVRSION "Ventus VV instruction conversion pass"
|
||||
#define DEBUG_TYPE "Ventus VV instruction conversion"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
/// This map is a reflection of VV instruction to VX/VF instruction
|
||||
/// currently, we use enum to represent all the reflections
|
||||
DenseMap<unsigned, unsigned> VV2VXOpcodeMap = {
|
||||
{RISCV::VADD_VV , RISCV::VADD_VX},
|
||||
{RISCV::VSUB_VV , RISCV::VSUB_VX},
|
||||
{RISCV::VMINU_VV , RISCV::VMINU_VX},
|
||||
{RISCV::VMIN_VV , RISCV::VMINU_VX},
|
||||
{RISCV::VMAX_VV , RISCV::VMAX_VX},
|
||||
{RISCV::VMAXU_VV , RISCV::VMAXU_VX},
|
||||
{RISCV::VAND_VV , RISCV::VAND_VX},
|
||||
{RISCV::VOR_VV , RISCV::VOR_VX},
|
||||
{RISCV::VXOR_VV , RISCV::VXOR_VX},
|
||||
{RISCV::VMSEQ_VV , RISCV::VMSEQ_VX},
|
||||
{RISCV::VMSNE_VV , RISCV::VMSNE_VX},
|
||||
{RISCV::VMSLTU_VV , RISCV::VMSLTU_VX},
|
||||
{RISCV::VMSLT_VV , RISCV::VMSLT_VX},
|
||||
{RISCV::VMSLEU_VV , RISCV::VMSLEU_VX},
|
||||
{RISCV::VMSLE_VV , RISCV::VMSLE_VX},
|
||||
{RISCV::VSLL_VV , RISCV::VSLL_VX},
|
||||
{RISCV::VSRL_VV , RISCV::VSRL_VX},
|
||||
{RISCV::VSRA_VV , RISCV::VSRA_VX},
|
||||
{RISCV::VSSRL_VV , RISCV::VSSRL_VX},
|
||||
{RISCV::VSSRA_VV , RISCV::VSSRA_VX},
|
||||
{RISCV::VDIVU_VV , RISCV::VDIVU_VX},
|
||||
{RISCV::VDIV_VV , RISCV::VDIV_VX},
|
||||
{RISCV::VREMU_VV , RISCV::VREMU_VX},
|
||||
{RISCV::VFSUB_VV , RISCV::VFSUB_VF},
|
||||
{RISCV::VREM_VV , RISCV::VREM_VX},
|
||||
{RISCV::VMULHU_VV , RISCV::VMULHU_VX},
|
||||
{RISCV::VMUL_VV , RISCV::VMUL_VX},
|
||||
{RISCV::VMULHSU_VV , RISCV::VMULHSU_VX},
|
||||
{RISCV::VMULH_VV , RISCV::VMULH_VX},
|
||||
{RISCV::VMADD_VV , RISCV::VMADD_VX},
|
||||
{RISCV::VNMSUB_VV , RISCV::VNMSUB_VX},
|
||||
{RISCV::VMACC_VV , RISCV::VMACC_VX},
|
||||
{RISCV::VNMSAC_VV , RISCV::VNMSAC_VX},
|
||||
{RISCV::VFADD_VV , RISCV::VFADD_VF},
|
||||
{RISCV::VFMSUB_VV , RISCV::VFMSUB_VF},
|
||||
{RISCV::VFMIN_VV , RISCV::VFMIN_VF},
|
||||
{RISCV::VFMAX_VV , RISCV::VFMAX_VF},
|
||||
{RISCV::VFSGNJ_VV , RISCV::VFSGNJ_VF},
|
||||
{RISCV::VFSGNJN_VV , RISCV::VFSGNJN_VF},
|
||||
{RISCV::VFSGNJX_VV , RISCV::VFSGNJX_VF},
|
||||
{RISCV::VMFEQ_VV , RISCV::VMFEQ_VF},
|
||||
{RISCV::VMFLE_VV , RISCV::VMFLE_VF},
|
||||
{RISCV::VMFLT_VV , RISCV::VMFLT_VF},
|
||||
{RISCV::VMFNE_VV , RISCV::VMFNE_VF},
|
||||
{RISCV::VFDIV_VV , RISCV::VFDIV_VF},
|
||||
{RISCV::VFMUL_VV , RISCV::VFMUL_VF},
|
||||
{RISCV::VFMADD_VV , RISCV::VFMADD_VF},
|
||||
{RISCV::VFNMADD_VV , RISCV::VFNMADD_VF},
|
||||
{RISCV::VFMACC_VV , RISCV::VFMACC_VF},
|
||||
{RISCV::VFNMACC_VV , RISCV::VFNMACC_VF},
|
||||
{RISCV::VFNMSUB_VV , RISCV::VFNMSUB_VF},
|
||||
{RISCV::VFMSAC_VV , RISCV::VFMSAC_VF},
|
||||
{RISCV::VFNMSAC_VV , RISCV::VFNMSAC_VF}};
|
||||
|
||||
class VentusVVInstrConversion : public MachineFunctionPass {
|
||||
public:
|
||||
const RISCVInstrInfo *TII;
|
||||
static char ID;
|
||||
const RISCVRegisterInfo *MRI;
|
||||
const MachineRegisterInfo *MR;
|
||||
|
||||
VentusVVInstrConversion() : MachineFunctionPass(ID) {
|
||||
initializeVentusVVInstrConversionPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
|
||||
StringRef getPassName() const override {
|
||||
return VENTUS_VV_INSTRUCTION_CONVRSION;
|
||||
}
|
||||
|
||||
private:
|
||||
bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
|
||||
|
||||
/// Check if the instruction is VV ALU instruction or not
|
||||
bool isVVALUInstruction(MachineInstr &MI) {
|
||||
return RISCVII::isVVALUInstr(MI.getDesc().TSFlags);
|
||||
};
|
||||
|
||||
bool isVALUCommutableInstr(MachineInstr &MI);
|
||||
|
||||
bool convertInstr(MachineBasicBlock &MBB, MachineInstr &CopyMI,
|
||||
MachineInstr &VVMI);
|
||||
|
||||
bool swapRegOperands(MachineInstr &MI);
|
||||
|
||||
bool isGPR2VGPRCopy(MachineInstr &MI);
|
||||
};
|
||||
|
||||
char VentusVVInstrConversion::ID = 0;
|
||||
|
||||
/// Swap register operands of instruction such as
|
||||
/// vadd.vv v0, v2, v1
|
||||
/// into
|
||||
/// vadd.vv v0, v1, v2
|
||||
bool VentusVVInstrConversion::swapRegOperands(MachineInstr &MI) {
|
||||
MachineOperand &MO1 = MI.getOperand(1);
|
||||
MachineOperand &MO2 = MI.getOperand(2);
|
||||
assert((MO1.isReg() && MO2.isReg()) && "Operand is not register");
|
||||
Register Reg1 = MO1.getReg();
|
||||
Register Reg2 = MO2.getReg();
|
||||
MO1.setReg(Reg2);
|
||||
MO2.setReg(Reg1);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool VentusVVInstrConversion::runOnMachineFunction(MachineFunction &MF) {
|
||||
bool isChanged = false;
|
||||
TII = static_cast<const RISCVInstrInfo *>(MF.getSubtarget().getInstrInfo());
|
||||
MRI = MF.getSubtarget<RISCVSubtarget>().getRegisterInfo();
|
||||
MR = &MF.getRegInfo();
|
||||
for (auto &MBB : MF)
|
||||
isChanged |= runOnMachineBasicBlock(MBB);
|
||||
return isChanged;
|
||||
}
|
||||
|
||||
bool VentusVVInstrConversion::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
|
||||
bool isMBBChanged = false;
|
||||
for (auto &MI : MBB) {
|
||||
MachineInstr *NextMI = MI.getNextNode();
|
||||
// Check RISCV::COPY instructions' format and its next instruction's format
|
||||
if (isGPR2VGPRCopy(MI) && NextMI && isVVALUInstruction(*NextMI)) {
|
||||
// When met here, we can ensure the coding logic goes to the conversion
|
||||
isMBBChanged |= convertInstr(MBB, MI, *NextMI);
|
||||
}
|
||||
}
|
||||
return isMBBChanged;
|
||||
}
|
||||
|
||||
/// This function tries to convert
|
||||
/// vmv.s.x v2, a0
|
||||
/// vadd.vv v0, v0, v2
|
||||
/// into
|
||||
/// vadd.vx v0, v0, a0
|
||||
/// *****************************************************
|
||||
/// vmv.s.x v2, a0
|
||||
/// vmadd.vv v0, v2, v1
|
||||
/// into
|
||||
/// vmadd.vx v0, a0, v1
|
||||
/// VV to VF conversion follows the same routine
|
||||
/// TODO: vrsub has VX and VI version, need to deal with this specifically?
|
||||
bool VentusVVInstrConversion::convertInstr(MachineBasicBlock &MBB,
|
||||
MachineInstr &CopyMI,
|
||||
MachineInstr &VVMI) {
|
||||
bool isMBBChanged = false;
|
||||
if (isVALUCommutableInstr(VVMI) &&
|
||||
CopyMI.getOperand(0).getReg() != VVMI.getOperand(2).getReg())
|
||||
isMBBChanged |= swapRegOperands(VVMI);
|
||||
// Other incommutable instructions check
|
||||
if (CopyMI.getOperand(0).getReg() != VVMI.getOperand(2).getReg())
|
||||
return isMBBChanged;
|
||||
|
||||
unsigned NewOpcode = VV2VXOpcodeMap[VVMI.getOpcode()];
|
||||
assert(NewOpcode && "No VV instruction reflection to VX/VF "
|
||||
"instruction, please check the mapping");
|
||||
Register Dst = VVMI.getOperand(0).getReg();
|
||||
DebugLoc DL = VVMI.getDebugLoc();
|
||||
if (VVMI.getNumExplicitOperands() == 3) {
|
||||
BuildMI(MBB, VVMI, DL, TII->get(NewOpcode), Dst)
|
||||
.addReg(VVMI.getOperand(1).getReg())
|
||||
.addReg(CopyMI.getOperand(1).getReg());
|
||||
VVMI.eraseFromParent();
|
||||
}
|
||||
// Three-operands VV ALU instruction conversion
|
||||
else if (VVMI.getNumExplicitOperands() == 4 &&
|
||||
CopyMI.getOperand(0).getReg() != VVMI.getOperand(3).getReg()) {
|
||||
BuildMI(MBB, VVMI, DL, TII->get(NewOpcode), VVMI.getOperand(0).getReg())
|
||||
.addReg(VVMI.getOperand(1).getReg())
|
||||
.addReg(CopyMI.getOperand(1).getReg())
|
||||
.addReg(VVMI.getOperand(3).getReg());
|
||||
VVMI.eraseFromParent();
|
||||
}
|
||||
// FIXME: maybe we need to take other unsupported instructions into
|
||||
// consideration, so we add an else statement here and return false directly
|
||||
else
|
||||
return isMBBChanged;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// FIXME: we also can add attribute in VentusInstrInfoV.td file, but changes
|
||||
/// are very trivial which can happen in many separated places, for now we use
|
||||
/// enum to accomplish our purpose
|
||||
/// In ventus : V+X = X+V, V*X=X*V
|
||||
bool VentusVVInstrConversion::isVALUCommutableInstr(MachineInstr &MI) {
|
||||
switch (MI.getOpcode()) {
|
||||
default:
|
||||
return false;
|
||||
case RISCV::VADD_VV:
|
||||
case RISCV::VMUL_VV:
|
||||
case RISCV::VFADD_VV:
|
||||
case RISCV::VFMUL_VV:
|
||||
case RISCV::VMADD_VV:
|
||||
case RISCV::VFMADD_VV:
|
||||
case RISCV::VMULH_VV:
|
||||
case RISCV::VMULHSU_VV:
|
||||
case RISCV::VMULHU_VV:
|
||||
return true;
|
||||
};
|
||||
}
|
||||
|
||||
/// Instruction shall be like this: %1:vgpr = COPY %2:gpr
|
||||
bool VentusVVInstrConversion::isGPR2VGPRCopy(MachineInstr &MI) {
|
||||
return MI.getOpcode() == RISCV::COPY &&
|
||||
MRI->isSGPRReg(*MR, MI.getOperand(1).getReg()) &&
|
||||
!MRI->isSGPRReg(*MR, MI.getOperand(0).getReg());
|
||||
}
|
||||
} // end of anonymous namespace
|
||||
|
||||
INITIALIZE_PASS(VentusVVInstrConversion, "ventus-VV-instructions-conversion",
|
||||
VENTUS_VV_INSTRUCTION_CONVRSION, false, false)
|
||||
|
||||
namespace llvm {
|
||||
FunctionPass *createVentusVVInstrConversionPass() {
|
||||
return new VentusVVInstrConversion();
|
||||
}
|
||||
} // end of namespace llvm
|
|
@ -25,13 +25,11 @@ define dso_local spir_kernel void @func(ptr addrspace(1) nocapture noundef align
|
|||
; VENTUS-NEXT: call _Z12get_local_idj
|
||||
; VENTUS-NEXT: vmv.s.x v1, zero
|
||||
; VENTUS-NEXT: vsll.vi v0, v0, 2
|
||||
; VENTUS-NEXT: vmv.s.x v2, s1
|
||||
; VENTUS-NEXT: vadd.vv v0, v2, v0
|
||||
; VENTUS-NEXT: vadd.vx v0, v0, s1
|
||||
; VENTUS-NEXT: vmv.x.s a0, v0
|
||||
; VENTUS-NEXT: vluxei32.v v0, (a0), v1
|
||||
; VENTUS-NEXT: vsll.vi v2, v32, 2
|
||||
; VENTUS-NEXT: vmv.s.x v3, s0
|
||||
; VENTUS-NEXT: vadd.vv v2, v3, v2
|
||||
; VENTUS-NEXT: vadd.vx v2, v2, s0
|
||||
; VENTUS-NEXT: vmv.x.s a0, v2
|
||||
; VENTUS-NEXT: vluxei32.v v2, (a0), v1
|
||||
; VENTUS-NEXT: vadd.vv v0, v2, v0
|
||||
|
|
|
@ -40,16 +40,16 @@ define spir_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
|
|||
; VENTUS-NEXT: vlw v1, zero(s2)
|
||||
; VENTUS-NEXT: add s1, s1, a0
|
||||
; VENTUS-NEXT: lw a1, 0(s1)
|
||||
; VENTUS-NEXT: add a0, s0, a0
|
||||
; VENTUS-NEXT: add a0, a0, s0
|
||||
; VENTUS-NEXT: lw a2, 0(a0)
|
||||
; VENTUS-NEXT: vmv.s.x v2, a1
|
||||
; VENTUS-NEXT: vmv.s.x v3, a2
|
||||
; VENTUS-NEXT: vmadd.vv v2, v1, v3
|
||||
; VENTUS-NEXT: vsuxei32.v v2, (a0), v0
|
||||
; VENTUS-NEXT: vmadd.vv v1, v2, v3
|
||||
; VENTUS-NEXT: vsuxei32.v v1, (a0), v0
|
||||
; VENTUS-NEXT: j .LBB0_3
|
||||
; VENTUS-NEXT: .LBB0_2: # %if.else
|
||||
; VENTUS-NEXT: slli a0, a0, 2
|
||||
; VENTUS-NEXT: add a0, s0, a0
|
||||
; VENTUS-NEXT: add a0, a0, s0
|
||||
; VENTUS-NEXT: sw zero, 0(a0)
|
||||
; VENTUS-NEXT: .LBB0_3: # %if.end
|
||||
; VENTUS-NEXT: lw ra, -36(sp) # 4-byte Folded Reload
|
||||
|
|
|
@ -20,12 +20,10 @@ define spir_kernel void @foo_ker(ptr addrspace(1) nocapture noundef align 4 %A,
|
|||
; VENTUS-NEXT: call _Z13get_global_idj
|
||||
; VENTUS-NEXT: vmv.s.x v1, zero
|
||||
; VENTUS-NEXT: vsll.vi v0, v0, 2
|
||||
; VENTUS-NEXT: vmv.s.x v2, s1
|
||||
; VENTUS-NEXT: vadd.vv v2, v2, v0
|
||||
; VENTUS-NEXT: vadd.vx v2, v0, s1
|
||||
; VENTUS-NEXT: vmv.x.s a0, v2
|
||||
; VENTUS-NEXT: vluxei32.v v2, (a0), v1
|
||||
; VENTUS-NEXT: vmv.s.x v3, s0
|
||||
; VENTUS-NEXT: vadd.vv v0, v3, v0
|
||||
; VENTUS-NEXT: vadd.vx v0, v0, s0
|
||||
; VENTUS-NEXT: vmv.x.s a0, v0
|
||||
; VENTUS-NEXT: vluxei32.v v0, (a0), v1
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v2
|
||||
|
|
|
@ -17,8 +17,7 @@ define float @fadd_f(float noundef %a) {
|
|||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: lui a0, %hi(global_val)
|
||||
; VENTUS-NEXT: lw a0, %lo(global_val)(a0)
|
||||
; VENTUS-NEXT: vmv.s.x v1, a0
|
||||
; VENTUS-NEXT: vfadd.vv v0, v0, v1
|
||||
; VENTUS-NEXT: vfadd.vf v0, v0, a0
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
%val = load float, ptr @global_val, align 4
|
||||
|
@ -41,8 +40,7 @@ define float @fsub_f(float noundef %a) {
|
|||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: lui a0, %hi(global_val)
|
||||
; VENTUS-NEXT: lw a0, %lo(global_val)(a0)
|
||||
; VENTUS-NEXT: vmv.s.x v1, a0
|
||||
; VENTUS-NEXT: vfsub.vv v0, v0, v1
|
||||
; VENTUS-NEXT: vfsub.vf v0, v0, a0
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
%val = load float, ptr @global_val, align 4
|
||||
|
@ -65,8 +63,7 @@ define float @fmul_f(float noundef %a) {
|
|||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: lui a0, %hi(global_val)
|
||||
; VENTUS-NEXT: lw a0, %lo(global_val)(a0)
|
||||
; VENTUS-NEXT: vmv.s.x v1, a0
|
||||
; VENTUS-NEXT: vfmul.vv v0, v0, v1
|
||||
; VENTUS-NEXT: vfmul.vf v0, v0, a0
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
%val = load float, ptr @global_val, align 4
|
||||
|
@ -89,8 +86,7 @@ define float @fdiv_f(float noundef %a, float noundef %b) {
|
|||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: lui a0, %hi(global_val)
|
||||
; VENTUS-NEXT: lw a0, %lo(global_val)(a0)
|
||||
; VENTUS-NEXT: vmv.s.x v1, a0
|
||||
; VENTUS-NEXT: vfdiv.vv v0, v0, v1
|
||||
; VENTUS-NEXT: vfdiv.vf v0, v0, a0
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
%val = load float, ptr @global_val, align 4
|
||||
|
@ -103,8 +99,7 @@ define float @foo_constant(float noundef %a) {
|
|||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: lui a0, %hi(.LCPI8_0)
|
||||
; VENTUS-NEXT: lw a0, %lo(.LCPI8_0)(a0)
|
||||
; VENTUS-NEXT: vmv.s.x v1, a0
|
||||
; VENTUS-NEXT: vfmul.vv v0, v0, v1
|
||||
; VENTUS-NEXT: vfmul.vf v0, v0, a0
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
%mul = fmul float %a, 1.25
|
||||
|
@ -391,8 +386,7 @@ define dso_local float @fmadd_f(float noundef %a, float noundef %b, float nounde
|
|||
; VENTUS-NEXT: lui a0, %hi(.LCPI24_0)
|
||||
; VENTUS-NEXT: lw a0, %lo(.LCPI24_0)(a0)
|
||||
; VENTUS-NEXT: vadd.vx v0, v1, zero
|
||||
; VENTUS-NEXT: vmv.s.x v1, a0
|
||||
; VENTUS-NEXT: vfmadd.vv v0, v1, v2
|
||||
; VENTUS-NEXT: vfmadd.vf v0, a0, v2
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
%0 = tail call float @llvm.fmuladd.f32(float %b, float 0x3FF3333340000000, float %c)
|
||||
|
@ -419,8 +413,7 @@ define dso_local float @fnmadd_f(float noundef %a, float noundef %b, float nound
|
|||
; VENTUS-NEXT: lui a0, %hi(.LCPI26_0)
|
||||
; VENTUS-NEXT: lw a0, %lo(.LCPI26_0)(a0)
|
||||
; VENTUS-NEXT: vadd.vx v0, v1, zero
|
||||
; VENTUS-NEXT: vmv.s.x v1, a0
|
||||
; VENTUS-NEXT: vfmsub.vv v0, v1, v2
|
||||
; VENTUS-NEXT: vfmsub.vf v0, a0, v2
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
%fneg = fmul float %b, 0xBFF3333340000000
|
||||
|
@ -446,8 +439,7 @@ define dso_local float @fmsub_f(float noundef %a, float noundef %b) local_unname
|
|||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: lui a0, %hi(.LCPI28_0)
|
||||
; VENTUS-NEXT: lw a0, %lo(.LCPI28_0)(a0)
|
||||
; VENTUS-NEXT: vmv.s.x v2, a0
|
||||
; VENTUS-NEXT: vfmsub.vv v0, v2, v1
|
||||
; VENTUS-NEXT: vfmsub.vf v0, a0, v1
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
%mul = fmul float %a, 0x3FF3333340000000
|
||||
|
@ -474,8 +466,7 @@ define dso_local float @fnmsub_f(float noundef %a, float noundef %b, float nound
|
|||
; VENTUS-NEXT: lui a0, %hi(.LCPI30_0)
|
||||
; VENTUS-NEXT: lw a0, %lo(.LCPI30_0)(a0)
|
||||
; VENTUS-NEXT: vadd.vx v0, v1, zero
|
||||
; VENTUS-NEXT: vmv.s.x v1, a0
|
||||
; VENTUS-NEXT: vfmadd.vv v0, v1, v2
|
||||
; VENTUS-NEXT: vfmadd.vf v0, a0, v2
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
%fneg = fmul float %b, 0x3FF3333340000000
|
||||
|
|
|
@ -16,8 +16,7 @@ define i32 @vdivu_x(i32 %a) {
|
|||
; VENTUS: # %bb.0:
|
||||
; VENTUS-NEXT: lui a0, %hi(global_val)
|
||||
; VENTUS-NEXT: lw a0, %lo(global_val)(a0)
|
||||
; VENTUS-NEXT: vmv.s.x v1, a0
|
||||
; VENTUS-NEXT: vdivu.vv v0, v0, v1
|
||||
; VENTUS-NEXT: vdivu.vx v0, v0, a0
|
||||
; VENTUS-NEXT: ret
|
||||
%val = load i32, ptr @global_val
|
||||
%udiv = udiv i32 %a, %val
|
||||
|
@ -38,8 +37,7 @@ define i32 @vdiv_x(i32 %a) {
|
|||
; VENTUS: # %bb.0:
|
||||
; VENTUS-NEXT: lui a0, %hi(global_val)
|
||||
; VENTUS-NEXT: lw a0, %lo(global_val)(a0)
|
||||
; VENTUS-NEXT: vmv.s.x v1, a0
|
||||
; VENTUS-NEXT: vdiv.vv v0, v0, v1
|
||||
; VENTUS-NEXT: vdiv.vx v0, v0, a0
|
||||
; VENTUS-NEXT: ret
|
||||
%val = load i32, ptr @global_val
|
||||
%sdiv = sdiv i32 %a, %val
|
||||
|
@ -82,8 +80,7 @@ define i32 @srem_pow2(i32 %a) nounwind {
|
|||
; VENTUS-NEXT: vsrl.vi v1, v1, 29
|
||||
; VENTUS-NEXT: vadd.vv v1, v0, v1
|
||||
; VENTUS-NEXT: li a0, -8
|
||||
; VENTUS-NEXT: vmv.s.x v2, a0
|
||||
; VENTUS-NEXT: vand.vv v1, v1, v2
|
||||
; VENTUS-NEXT: vand.vx v1, v1, a0
|
||||
; VENTUS-NEXT: vsub.vv v0, v0, v1
|
||||
; VENTUS-NEXT: ret
|
||||
%1 = srem i32 %a, 8
|
||||
|
@ -97,8 +94,7 @@ define i32 @srem_pow2_2(i32 %a) nounwind {
|
|||
; VENTUS-NEXT: vsrl.vi v1, v1, 16
|
||||
; VENTUS-NEXT: vadd.vv v1, v0, v1
|
||||
; VENTUS-NEXT: lui a0, 1048560
|
||||
; VENTUS-NEXT: vmv.s.x v2, a0
|
||||
; VENTUS-NEXT: vand.vv v1, v1, v2
|
||||
; VENTUS-NEXT: vand.vx v1, v1, a0
|
||||
; VENTUS-NEXT: vsub.vv v0, v0, v1
|
||||
; VENTUS-NEXT: ret
|
||||
%1 = srem i32 %a, 65536
|
||||
|
@ -130,8 +126,7 @@ define i32 @vadd_x(i32 %a) nounwind {
|
|||
; VENTUS: # %bb.0:
|
||||
; VENTUS-NEXT: lui a0, %hi(global_val)
|
||||
; VENTUS-NEXT: lw a0, %lo(global_val)(a0)
|
||||
; VENTUS-NEXT: vmv.s.x v1, a0
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v1
|
||||
; VENTUS-NEXT: vadd.vx v0, v0, a0
|
||||
; VENTUS-NEXT: ret
|
||||
%val = load i32, ptr @global_val
|
||||
%add = add i32 %a, %val
|
||||
|
@ -161,8 +156,7 @@ define i32 @vsub_x(i32 %a) nounwind {
|
|||
; VENTUS: # %bb.0:
|
||||
; VENTUS-NEXT: lui a0, %hi(global_val)
|
||||
; VENTUS-NEXT: lw a0, %lo(global_val)(a0)
|
||||
; VENTUS-NEXT: vmv.s.x v1, a0
|
||||
; VENTUS-NEXT: vsub.vv v0, v0, v1
|
||||
; VENTUS-NEXT: vsub.vx v0, v0, a0
|
||||
; VENTUS-NEXT: ret
|
||||
%val = load i32, ptr @global_val
|
||||
%sub = sub i32 %a, %val
|
||||
|
@ -196,8 +190,7 @@ define i32 @vmul_x(i32 %a) nounwind {
|
|||
; VENTUS: # %bb.0:
|
||||
; VENTUS-NEXT: lui a0, %hi(global_val)
|
||||
; VENTUS-NEXT: lw a0, %lo(global_val)(a0)
|
||||
; VENTUS-NEXT: vmv.s.x v1, a0
|
||||
; VENTUS-NEXT: vmul.vv v0, v0, v1
|
||||
; VENTUS-NEXT: vmul.vx v0, v0, a0
|
||||
; VENTUS-NEXT: ret
|
||||
%val = load i32, ptr @global_val
|
||||
%mul = mul i32 %a, %val
|
||||
|
@ -222,8 +215,7 @@ define i32 @vmulh_x(i32 %a) nounwind {
|
|||
; VENTUS: # %bb.0:
|
||||
; VENTUS-NEXT: lui a0, %hi(global_val)
|
||||
; VENTUS-NEXT: lw a0, %lo(global_val)(a0)
|
||||
; VENTUS-NEXT: vmv.s.x v1, a0
|
||||
; VENTUS-NEXT: vmulh.vv v0, v0, v1
|
||||
; VENTUS-NEXT: vmulh.vx v0, v0, a0
|
||||
; VENTUS-NEXT: ret
|
||||
%val = load i32, ptr @global_val
|
||||
%1 = sext i32 %a to i64
|
||||
|
@ -252,8 +244,7 @@ define i32 @vmulhu_x(i32 %a) nounwind {
|
|||
; VENTUS: # %bb.0:
|
||||
; VENTUS-NEXT: lui a0, %hi(global_val)
|
||||
; VENTUS-NEXT: lw a0, %lo(global_val)(a0)
|
||||
; VENTUS-NEXT: vmv.s.x v1, a0
|
||||
; VENTUS-NEXT: vmulhu.vv v0, v0, v1
|
||||
; VENTUS-NEXT: vmulhu.vx v0, v0, a0
|
||||
; VENTUS-NEXT: ret
|
||||
%val = load i32, ptr @global_val
|
||||
%1 = zext i32 %a to i64
|
||||
|
@ -281,8 +272,7 @@ define i32 @vmulhsu_x(i32 %a) nounwind {
|
|||
; VENTUS: # %bb.0:
|
||||
; VENTUS-NEXT: lui a0, %hi(global_val)
|
||||
; VENTUS-NEXT: lw a0, %lo(global_val)(a0)
|
||||
; VENTUS-NEXT: vmv.s.x v1, a0
|
||||
; VENTUS-NEXT: vmulhsu.vv v0, v0, v1
|
||||
; VENTUS-NEXT: vmulhsu.vx v0, v0, a0
|
||||
; VENTUS-NEXT: ret
|
||||
%val = load i32, ptr @global_val
|
||||
%1 = sext i32 %a to i64
|
||||
|
@ -331,8 +321,7 @@ define dso_local i32 @nmsub_x(i32 noundef %a, i32 noundef %b) local_unnamed_addr
|
|||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: lui a0, %hi(global_val)
|
||||
; VENTUS-NEXT: lw a0, %lo(global_val)(a0)
|
||||
; VENTUS-NEXT: vmv.s.x v2, a0
|
||||
; VENTUS-NEXT: vmadd.vv v0, v2, v1
|
||||
; VENTUS-NEXT: vmadd.vx v0, a0, v1
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
%val = load i32, ptr @global_val
|
||||
|
@ -360,8 +349,7 @@ define dso_local i32 @madd_x(i32 noundef %a, i32 noundef %b) local_unnamed_addr
|
|||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: lui a0, %hi(global_val)
|
||||
; VENTUS-NEXT: lw a0, %lo(global_val)(a0)
|
||||
; VENTUS-NEXT: vmv.s.x v2, a0
|
||||
; VENTUS-NEXT: vmadd.vv v0, v2, v1
|
||||
; VENTUS-NEXT: vmadd.vx v0, a0, v1
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
%val = load i32, ptr @global_val
|
||||
|
|
|
@ -15,7 +15,7 @@ define dso_local spir_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr addrs
|
|||
; VENTUS-NEXT: lw a1, 0(a0)
|
||||
; VENTUS-NEXT: lw a2, 4(a0)
|
||||
; VENTUS-NEXT: lw a0, 8(a0)
|
||||
; VENTUS-NEXT: add a1, a2, a1
|
||||
; VENTUS-NEXT: add a1, a1, a2
|
||||
; VENTUS-NEXT: sw a1, 0(a0)
|
||||
; VENTUS-NEXT: ret
|
||||
%add = add nsw i32 %b, %a
|
||||
|
|
Loading…
Reference in New Issue