Add pass to support VX/VF instruction generation

Summary: Add pass to support VX/VF instruction generation

Test Plan: none

Reviewers: hualin

Reviewed By: hualin

Subscribers: yanming

Differential Revision: http://www.tpt.com/D583
This commit is contained in:
zhoujing 2023-01-30 13:13:31 +08:00
parent bbe7987a03
commit 47fc50bb92
14 changed files with 315 additions and 62 deletions

View File

@ -1,8 +1,4 @@
{
"phabricator.uri" : "https://reviews.llvm.org/",
"repository.callsign" : "G",
"conduit_uri" : "https://reviews.llvm.org/",
"base": "git:HEAD^",
"arc.land.onto.default": "main",
"arc.land.onto": ["main"]
}
"phabricator.uri" : "http://www.tpt.com",
"notification" : "The code review's uri is Terapines' private network uri"
}

View File

@ -40,6 +40,7 @@ add_llvm_target(RISCVCodeGen
RISCVTargetObjectFile.cpp
RISCVTargetTransformInfo.cpp
VentusRegextInsertion.cpp
VentusVVInstrConversion.cpp
GISel/RISCVCallLowering.cpp
GISel/RISCVInstructionSelector.cpp
GISel/RISCVLegalizerInfo.cpp

View File

@ -95,6 +95,10 @@ enum {
// compiler has free to select either one.
UsesMaskPolicyShift = IsRVVWideningReductionShift + 1,
UsesMaskPolicyMask = 1 << UsesMaskPolicyShift,
// Check if this instruction meets the format of RVInstVV
IsVVALUInstrShift = UsesMaskPolicyShift + 1,
IsVVALUInstrMask = 1 << IsVVALUInstrShift,
};
// Match with the definitions in RISCVInstrFormats.td
@ -132,6 +136,12 @@ static inline VConstraintType getConstraint(uint64_t TSFlags) {
static inline bool hasDummyMaskOp(uint64_t TSFlags) {
return TSFlags & HasDummyMaskOpMask;
}
/// \returns true if the instruction meets the format of RVInstVV
static inline bool isVVALUInstr(uint64_t TSFlags) {
return TSFlags & IsVVALUInstrMask;
}
/// \returns true if tail agnostic is enforced for the instruction.
static inline bool doesForceTailAgnostic(uint64_t TSFlags) {
return TSFlags & ForceTailAgnosticMask;

View File

@ -69,6 +69,9 @@ void initializeRISCVRedundantCopyEliminationPass(PassRegistry &);
FunctionPass *createVentusRegextInsertionPass();
void initializeVentusRegextInsertionPass(PassRegistry &);
FunctionPass *createVentusVVInstrConversionPass();
void initializeVentusVVInstrConversionPass(PassRegistry &);
InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &,
RISCVSubtarget &,
RISCVRegisterBankInfo &);

View File

@ -205,6 +205,9 @@ class RVInst<dag outs, dag ins, string opcodestr, string argstr,
bit UsesMaskPolicy = 0;
let TSFlags{18} = UsesMaskPolicy;
bit IsVVALUInstr = 0;
let TSFlags{19} = IsVVALUInstr;
}
// Pseudo instructions

View File

@ -290,6 +290,7 @@ void RISCVPassConfig::addPreRegAlloc() {
addPass(createRISCVPreRAExpandPseudoPass());
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createRISCVMergeBaseOffsetOptPass());
addPass(createVentusVVInstrConversionPass());
}
void RISCVPassConfig::addPostRegAlloc() {

View File

@ -77,6 +77,7 @@ class RVInstVV<bits<6> funct6, RISCVVFormat opv, dag outs, dag ins,
let Inst{14-12} = opv.Value;
let Inst{11-7} = vd;
let Opcode = OPC_OP_V.Value;
let IsVVALUInstr = 1;
}
// vALU branch

View File

@ -0,0 +1,263 @@
//===-- VentusVVInstrConversion.cpp - VV instruction conversion -----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains a pass that convert vop.vv instructions to vop.vx/vf
// instructions because currently, the objects stored in sGPR and sGPRF32 will
// be moved to VGPR in divergent nodes, so the patterns which match VX/VF
// instructions will not be matched
//
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/RISCVBaseInfo.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "RISCV.h"
#include "RISCVInstrInfo.h"
#include "RISCVRegisterInfo.h"
#include "RISCVTargetMachine.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Error.h"
#include "llvm/TableGen/Record.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#define VENTUS_VV_INSTRUCTION_CONVRSION "Ventus VV instruction conversion pass"
#define DEBUG_TYPE "Ventus VV instruction conversion"
using namespace llvm;
namespace {
/// This map is a reflection of VV instruction to VX/VF instruction
/// currently, we use enum to represent all the reflections
DenseMap<unsigned, unsigned> VV2VXOpcodeMap = {
{RISCV::VADD_VV , RISCV::VADD_VX},
{RISCV::VSUB_VV , RISCV::VSUB_VX},
{RISCV::VMINU_VV , RISCV::VMINU_VX},
{RISCV::VMIN_VV , RISCV::VMINU_VX},
{RISCV::VMAX_VV , RISCV::VMAX_VX},
{RISCV::VMAXU_VV , RISCV::VMAXU_VX},
{RISCV::VAND_VV , RISCV::VAND_VX},
{RISCV::VOR_VV , RISCV::VOR_VX},
{RISCV::VXOR_VV , RISCV::VXOR_VX},
{RISCV::VMSEQ_VV , RISCV::VMSEQ_VX},
{RISCV::VMSNE_VV , RISCV::VMSNE_VX},
{RISCV::VMSLTU_VV , RISCV::VMSLTU_VX},
{RISCV::VMSLT_VV , RISCV::VMSLT_VX},
{RISCV::VMSLEU_VV , RISCV::VMSLEU_VX},
{RISCV::VMSLE_VV , RISCV::VMSLE_VX},
{RISCV::VSLL_VV , RISCV::VSLL_VX},
{RISCV::VSRL_VV , RISCV::VSRL_VX},
{RISCV::VSRA_VV , RISCV::VSRA_VX},
{RISCV::VSSRL_VV , RISCV::VSSRL_VX},
{RISCV::VSSRA_VV , RISCV::VSSRA_VX},
{RISCV::VDIVU_VV , RISCV::VDIVU_VX},
{RISCV::VDIV_VV , RISCV::VDIV_VX},
{RISCV::VREMU_VV , RISCV::VREMU_VX},
{RISCV::VFSUB_VV , RISCV::VFSUB_VF},
{RISCV::VREM_VV , RISCV::VREM_VX},
{RISCV::VMULHU_VV , RISCV::VMULHU_VX},
{RISCV::VMUL_VV , RISCV::VMUL_VX},
{RISCV::VMULHSU_VV , RISCV::VMULHSU_VX},
{RISCV::VMULH_VV , RISCV::VMULH_VX},
{RISCV::VMADD_VV , RISCV::VMADD_VX},
{RISCV::VNMSUB_VV , RISCV::VNMSUB_VX},
{RISCV::VMACC_VV , RISCV::VMACC_VX},
{RISCV::VNMSAC_VV , RISCV::VNMSAC_VX},
{RISCV::VFADD_VV , RISCV::VFADD_VF},
{RISCV::VFMSUB_VV , RISCV::VFMSUB_VF},
{RISCV::VFMIN_VV , RISCV::VFMIN_VF},
{RISCV::VFMAX_VV , RISCV::VFMAX_VF},
{RISCV::VFSGNJ_VV , RISCV::VFSGNJ_VF},
{RISCV::VFSGNJN_VV , RISCV::VFSGNJN_VF},
{RISCV::VFSGNJX_VV , RISCV::VFSGNJX_VF},
{RISCV::VMFEQ_VV , RISCV::VMFEQ_VF},
{RISCV::VMFLE_VV , RISCV::VMFLE_VF},
{RISCV::VMFLT_VV , RISCV::VMFLT_VF},
{RISCV::VMFNE_VV , RISCV::VMFNE_VF},
{RISCV::VFDIV_VV , RISCV::VFDIV_VF},
{RISCV::VFMUL_VV , RISCV::VFMUL_VF},
{RISCV::VFMADD_VV , RISCV::VFMADD_VF},
{RISCV::VFNMADD_VV , RISCV::VFNMADD_VF},
{RISCV::VFMACC_VV , RISCV::VFMACC_VF},
{RISCV::VFNMACC_VV , RISCV::VFNMACC_VF},
{RISCV::VFNMSUB_VV , RISCV::VFNMSUB_VF},
{RISCV::VFMSAC_VV , RISCV::VFMSAC_VF},
{RISCV::VFNMSAC_VV , RISCV::VFNMSAC_VF}};
class VentusVVInstrConversion : public MachineFunctionPass {
public:
const RISCVInstrInfo *TII;
static char ID;
const RISCVRegisterInfo *MRI;
const MachineRegisterInfo *MR;
VentusVVInstrConversion() : MachineFunctionPass(ID) {
initializeVentusVVInstrConversionPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override;
StringRef getPassName() const override {
return VENTUS_VV_INSTRUCTION_CONVRSION;
}
private:
bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
/// Check if the instruction is VV ALU instruction or not
bool isVVALUInstruction(MachineInstr &MI) {
return RISCVII::isVVALUInstr(MI.getDesc().TSFlags);
};
bool isVALUCommutableInstr(MachineInstr &MI);
bool convertInstr(MachineBasicBlock &MBB, MachineInstr &CopyMI,
MachineInstr &VVMI);
bool swapRegOperands(MachineInstr &MI);
bool isGPR2VGPRCopy(MachineInstr &MI);
};
char VentusVVInstrConversion::ID = 0;
/// Swap register operands of instruction such as
/// vadd.vv v0, v2, v1
/// into
/// vadd.vv v0, v1, v2
bool VentusVVInstrConversion::swapRegOperands(MachineInstr &MI) {
MachineOperand &MO1 = MI.getOperand(1);
MachineOperand &MO2 = MI.getOperand(2);
assert((MO1.isReg() && MO2.isReg()) && "Operand is not register");
Register Reg1 = MO1.getReg();
Register Reg2 = MO2.getReg();
MO1.setReg(Reg2);
MO2.setReg(Reg1);
return true;
}
bool VentusVVInstrConversion::runOnMachineFunction(MachineFunction &MF) {
bool isChanged = false;
TII = static_cast<const RISCVInstrInfo *>(MF.getSubtarget().getInstrInfo());
MRI = MF.getSubtarget<RISCVSubtarget>().getRegisterInfo();
MR = &MF.getRegInfo();
for (auto &MBB : MF)
isChanged |= runOnMachineBasicBlock(MBB);
return isChanged;
}
bool VentusVVInstrConversion::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
bool isMBBChanged = false;
for (auto &MI : MBB) {
MachineInstr *NextMI = MI.getNextNode();
// Check RISCV::COPY instructions' format and its next instruction's format
if (isGPR2VGPRCopy(MI) && NextMI && isVVALUInstruction(*NextMI)) {
// When met here, we can ensure the coding logic goes to the conversion
isMBBChanged |= convertInstr(MBB, MI, *NextMI);
}
}
return isMBBChanged;
}
/// This function tries to convert
/// vmv.s.x v2, a0
/// vadd.vv v0, v0, v2
/// into
/// vadd.vx v0, v0, a0
/// *****************************************************
/// vmv.s.x v2, a0
/// vmadd.vv v0, v2, v1
/// into
/// vmadd.vx v0, a0, v1
/// VV to VF conversion follows the same routine
/// TODO: vrsub has VX and VI version, need to deal with this specifically?
bool VentusVVInstrConversion::convertInstr(MachineBasicBlock &MBB,
MachineInstr &CopyMI,
MachineInstr &VVMI) {
bool isMBBChanged = false;
if (isVALUCommutableInstr(VVMI) &&
CopyMI.getOperand(0).getReg() != VVMI.getOperand(2).getReg())
isMBBChanged |= swapRegOperands(VVMI);
// Other incommutable instructions check
if (CopyMI.getOperand(0).getReg() != VVMI.getOperand(2).getReg())
return isMBBChanged;
unsigned NewOpcode = VV2VXOpcodeMap[VVMI.getOpcode()];
assert(NewOpcode && "No VV instruction reflection to VX/VF "
"instruction, please check the mapping");
Register Dst = VVMI.getOperand(0).getReg();
DebugLoc DL = VVMI.getDebugLoc();
if (VVMI.getNumExplicitOperands() == 3) {
BuildMI(MBB, VVMI, DL, TII->get(NewOpcode), Dst)
.addReg(VVMI.getOperand(1).getReg())
.addReg(CopyMI.getOperand(1).getReg());
VVMI.eraseFromParent();
}
// Three-operands VV ALU instruction conversion
else if (VVMI.getNumExplicitOperands() == 4 &&
CopyMI.getOperand(0).getReg() != VVMI.getOperand(3).getReg()) {
BuildMI(MBB, VVMI, DL, TII->get(NewOpcode), VVMI.getOperand(0).getReg())
.addReg(VVMI.getOperand(1).getReg())
.addReg(CopyMI.getOperand(1).getReg())
.addReg(VVMI.getOperand(3).getReg());
VVMI.eraseFromParent();
}
// FIXME: maybe we need to take other unsupported instructions into
// consideration, so we add an else statement here and return false directly
else
return isMBBChanged;
return true;
}
/// FIXME: we also can add attribute in VentusInstrInfoV.td file, but changes
/// are very trivial which can happen in many separated places, for now we use
/// enum to accomplish our purpose
/// In ventus : V+X = X+V, V*X=X*V
bool VentusVVInstrConversion::isVALUCommutableInstr(MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
return false;
case RISCV::VADD_VV:
case RISCV::VMUL_VV:
case RISCV::VFADD_VV:
case RISCV::VFMUL_VV:
case RISCV::VMADD_VV:
case RISCV::VFMADD_VV:
case RISCV::VMULH_VV:
case RISCV::VMULHSU_VV:
case RISCV::VMULHU_VV:
return true;
};
}
/// Instruction shall be like this: %1:vgpr = COPY %2:gpr
bool VentusVVInstrConversion::isGPR2VGPRCopy(MachineInstr &MI) {
return MI.getOpcode() == RISCV::COPY &&
MRI->isSGPRReg(*MR, MI.getOperand(1).getReg()) &&
!MRI->isSGPRReg(*MR, MI.getOperand(0).getReg());
}
} // end of anonymous namespace
INITIALIZE_PASS(VentusVVInstrConversion, "ventus-VV-instructions-conversion",
VENTUS_VV_INSTRUCTION_CONVRSION, false, false)
namespace llvm {
FunctionPass *createVentusVVInstrConversionPass() {
return new VentusVVInstrConversion();
}
} // end of namespace llvm

View File

@ -25,13 +25,11 @@ define dso_local spir_kernel void @func(ptr addrspace(1) nocapture noundef align
; VENTUS-NEXT: call _Z12get_local_idj
; VENTUS-NEXT: vmv.s.x v1, zero
; VENTUS-NEXT: vsll.vi v0, v0, 2
; VENTUS-NEXT: vmv.s.x v2, s1
; VENTUS-NEXT: vadd.vv v0, v2, v0
; VENTUS-NEXT: vadd.vx v0, v0, s1
; VENTUS-NEXT: vmv.x.s a0, v0
; VENTUS-NEXT: vluxei32.v v0, (a0), v1
; VENTUS-NEXT: vsll.vi v2, v32, 2
; VENTUS-NEXT: vmv.s.x v3, s0
; VENTUS-NEXT: vadd.vv v2, v3, v2
; VENTUS-NEXT: vadd.vx v2, v2, s0
; VENTUS-NEXT: vmv.x.s a0, v2
; VENTUS-NEXT: vluxei32.v v2, (a0), v1
; VENTUS-NEXT: vadd.vv v0, v2, v0

View File

@ -40,16 +40,16 @@ define spir_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
; VENTUS-NEXT: vlw v1, zero(s2)
; VENTUS-NEXT: add s1, s1, a0
; VENTUS-NEXT: lw a1, 0(s1)
; VENTUS-NEXT: add a0, s0, a0
; VENTUS-NEXT: add a0, a0, s0
; VENTUS-NEXT: lw a2, 0(a0)
; VENTUS-NEXT: vmv.s.x v2, a1
; VENTUS-NEXT: vmv.s.x v3, a2
; VENTUS-NEXT: vmadd.vv v2, v1, v3
; VENTUS-NEXT: vsuxei32.v v2, (a0), v0
; VENTUS-NEXT: vmadd.vv v1, v2, v3
; VENTUS-NEXT: vsuxei32.v v1, (a0), v0
; VENTUS-NEXT: j .LBB0_3
; VENTUS-NEXT: .LBB0_2: # %if.else
; VENTUS-NEXT: slli a0, a0, 2
; VENTUS-NEXT: add a0, s0, a0
; VENTUS-NEXT: add a0, a0, s0
; VENTUS-NEXT: sw zero, 0(a0)
; VENTUS-NEXT: .LBB0_3: # %if.end
; VENTUS-NEXT: lw ra, -36(sp) # 4-byte Folded Reload

View File

@ -20,12 +20,10 @@ define spir_kernel void @foo_ker(ptr addrspace(1) nocapture noundef align 4 %A,
; VENTUS-NEXT: call _Z13get_global_idj
; VENTUS-NEXT: vmv.s.x v1, zero
; VENTUS-NEXT: vsll.vi v0, v0, 2
; VENTUS-NEXT: vmv.s.x v2, s1
; VENTUS-NEXT: vadd.vv v2, v2, v0
; VENTUS-NEXT: vadd.vx v2, v0, s1
; VENTUS-NEXT: vmv.x.s a0, v2
; VENTUS-NEXT: vluxei32.v v2, (a0), v1
; VENTUS-NEXT: vmv.s.x v3, s0
; VENTUS-NEXT: vadd.vv v0, v3, v0
; VENTUS-NEXT: vadd.vx v0, v0, s0
; VENTUS-NEXT: vmv.x.s a0, v0
; VENTUS-NEXT: vluxei32.v v0, (a0), v1
; VENTUS-NEXT: vadd.vv v0, v0, v2

View File

@ -17,8 +17,7 @@ define float @fadd_f(float noundef %a) {
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lui a0, %hi(global_val)
; VENTUS-NEXT: lw a0, %lo(global_val)(a0)
; VENTUS-NEXT: vmv.s.x v1, a0
; VENTUS-NEXT: vfadd.vv v0, v0, v1
; VENTUS-NEXT: vfadd.vf v0, v0, a0
; VENTUS-NEXT: ret
entry:
%val = load float, ptr @global_val, align 4
@ -41,8 +40,7 @@ define float @fsub_f(float noundef %a) {
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lui a0, %hi(global_val)
; VENTUS-NEXT: lw a0, %lo(global_val)(a0)
; VENTUS-NEXT: vmv.s.x v1, a0
; VENTUS-NEXT: vfsub.vv v0, v0, v1
; VENTUS-NEXT: vfsub.vf v0, v0, a0
; VENTUS-NEXT: ret
entry:
%val = load float, ptr @global_val, align 4
@ -65,8 +63,7 @@ define float @fmul_f(float noundef %a) {
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lui a0, %hi(global_val)
; VENTUS-NEXT: lw a0, %lo(global_val)(a0)
; VENTUS-NEXT: vmv.s.x v1, a0
; VENTUS-NEXT: vfmul.vv v0, v0, v1
; VENTUS-NEXT: vfmul.vf v0, v0, a0
; VENTUS-NEXT: ret
entry:
%val = load float, ptr @global_val, align 4
@ -89,8 +86,7 @@ define float @fdiv_f(float noundef %a, float noundef %b) {
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lui a0, %hi(global_val)
; VENTUS-NEXT: lw a0, %lo(global_val)(a0)
; VENTUS-NEXT: vmv.s.x v1, a0
; VENTUS-NEXT: vfdiv.vv v0, v0, v1
; VENTUS-NEXT: vfdiv.vf v0, v0, a0
; VENTUS-NEXT: ret
entry:
%val = load float, ptr @global_val, align 4
@ -103,8 +99,7 @@ define float @foo_constant(float noundef %a) {
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lui a0, %hi(.LCPI8_0)
; VENTUS-NEXT: lw a0, %lo(.LCPI8_0)(a0)
; VENTUS-NEXT: vmv.s.x v1, a0
; VENTUS-NEXT: vfmul.vv v0, v0, v1
; VENTUS-NEXT: vfmul.vf v0, v0, a0
; VENTUS-NEXT: ret
entry:
%mul = fmul float %a, 1.25
@ -391,8 +386,7 @@ define dso_local float @fmadd_f(float noundef %a, float noundef %b, float nounde
; VENTUS-NEXT: lui a0, %hi(.LCPI24_0)
; VENTUS-NEXT: lw a0, %lo(.LCPI24_0)(a0)
; VENTUS-NEXT: vadd.vx v0, v1, zero
; VENTUS-NEXT: vmv.s.x v1, a0
; VENTUS-NEXT: vfmadd.vv v0, v1, v2
; VENTUS-NEXT: vfmadd.vf v0, a0, v2
; VENTUS-NEXT: ret
entry:
%0 = tail call float @llvm.fmuladd.f32(float %b, float 0x3FF3333340000000, float %c)
@ -419,8 +413,7 @@ define dso_local float @fnmadd_f(float noundef %a, float noundef %b, float nound
; VENTUS-NEXT: lui a0, %hi(.LCPI26_0)
; VENTUS-NEXT: lw a0, %lo(.LCPI26_0)(a0)
; VENTUS-NEXT: vadd.vx v0, v1, zero
; VENTUS-NEXT: vmv.s.x v1, a0
; VENTUS-NEXT: vfmsub.vv v0, v1, v2
; VENTUS-NEXT: vfmsub.vf v0, a0, v2
; VENTUS-NEXT: ret
entry:
%fneg = fmul float %b, 0xBFF3333340000000
@ -446,8 +439,7 @@ define dso_local float @fmsub_f(float noundef %a, float noundef %b) local_unname
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lui a0, %hi(.LCPI28_0)
; VENTUS-NEXT: lw a0, %lo(.LCPI28_0)(a0)
; VENTUS-NEXT: vmv.s.x v2, a0
; VENTUS-NEXT: vfmsub.vv v0, v2, v1
; VENTUS-NEXT: vfmsub.vf v0, a0, v1
; VENTUS-NEXT: ret
entry:
%mul = fmul float %a, 0x3FF3333340000000
@ -474,8 +466,7 @@ define dso_local float @fnmsub_f(float noundef %a, float noundef %b, float nound
; VENTUS-NEXT: lui a0, %hi(.LCPI30_0)
; VENTUS-NEXT: lw a0, %lo(.LCPI30_0)(a0)
; VENTUS-NEXT: vadd.vx v0, v1, zero
; VENTUS-NEXT: vmv.s.x v1, a0
; VENTUS-NEXT: vfmadd.vv v0, v1, v2
; VENTUS-NEXT: vfmadd.vf v0, a0, v2
; VENTUS-NEXT: ret
entry:
%fneg = fmul float %b, 0x3FF3333340000000

View File

@ -16,8 +16,7 @@ define i32 @vdivu_x(i32 %a) {
; VENTUS: # %bb.0:
; VENTUS-NEXT: lui a0, %hi(global_val)
; VENTUS-NEXT: lw a0, %lo(global_val)(a0)
; VENTUS-NEXT: vmv.s.x v1, a0
; VENTUS-NEXT: vdivu.vv v0, v0, v1
; VENTUS-NEXT: vdivu.vx v0, v0, a0
; VENTUS-NEXT: ret
%val = load i32, ptr @global_val
%udiv = udiv i32 %a, %val
@ -38,8 +37,7 @@ define i32 @vdiv_x(i32 %a) {
; VENTUS: # %bb.0:
; VENTUS-NEXT: lui a0, %hi(global_val)
; VENTUS-NEXT: lw a0, %lo(global_val)(a0)
; VENTUS-NEXT: vmv.s.x v1, a0
; VENTUS-NEXT: vdiv.vv v0, v0, v1
; VENTUS-NEXT: vdiv.vx v0, v0, a0
; VENTUS-NEXT: ret
%val = load i32, ptr @global_val
%sdiv = sdiv i32 %a, %val
@ -82,8 +80,7 @@ define i32 @srem_pow2(i32 %a) nounwind {
; VENTUS-NEXT: vsrl.vi v1, v1, 29
; VENTUS-NEXT: vadd.vv v1, v0, v1
; VENTUS-NEXT: li a0, -8
; VENTUS-NEXT: vmv.s.x v2, a0
; VENTUS-NEXT: vand.vv v1, v1, v2
; VENTUS-NEXT: vand.vx v1, v1, a0
; VENTUS-NEXT: vsub.vv v0, v0, v1
; VENTUS-NEXT: ret
%1 = srem i32 %a, 8
@ -97,8 +94,7 @@ define i32 @srem_pow2_2(i32 %a) nounwind {
; VENTUS-NEXT: vsrl.vi v1, v1, 16
; VENTUS-NEXT: vadd.vv v1, v0, v1
; VENTUS-NEXT: lui a0, 1048560
; VENTUS-NEXT: vmv.s.x v2, a0
; VENTUS-NEXT: vand.vv v1, v1, v2
; VENTUS-NEXT: vand.vx v1, v1, a0
; VENTUS-NEXT: vsub.vv v0, v0, v1
; VENTUS-NEXT: ret
%1 = srem i32 %a, 65536
@ -130,8 +126,7 @@ define i32 @vadd_x(i32 %a) nounwind {
; VENTUS: # %bb.0:
; VENTUS-NEXT: lui a0, %hi(global_val)
; VENTUS-NEXT: lw a0, %lo(global_val)(a0)
; VENTUS-NEXT: vmv.s.x v1, a0
; VENTUS-NEXT: vadd.vv v0, v0, v1
; VENTUS-NEXT: vadd.vx v0, v0, a0
; VENTUS-NEXT: ret
%val = load i32, ptr @global_val
%add = add i32 %a, %val
@ -161,8 +156,7 @@ define i32 @vsub_x(i32 %a) nounwind {
; VENTUS: # %bb.0:
; VENTUS-NEXT: lui a0, %hi(global_val)
; VENTUS-NEXT: lw a0, %lo(global_val)(a0)
; VENTUS-NEXT: vmv.s.x v1, a0
; VENTUS-NEXT: vsub.vv v0, v0, v1
; VENTUS-NEXT: vsub.vx v0, v0, a0
; VENTUS-NEXT: ret
%val = load i32, ptr @global_val
%sub = sub i32 %a, %val
@ -196,8 +190,7 @@ define i32 @vmul_x(i32 %a) nounwind {
; VENTUS: # %bb.0:
; VENTUS-NEXT: lui a0, %hi(global_val)
; VENTUS-NEXT: lw a0, %lo(global_val)(a0)
; VENTUS-NEXT: vmv.s.x v1, a0
; VENTUS-NEXT: vmul.vv v0, v0, v1
; VENTUS-NEXT: vmul.vx v0, v0, a0
; VENTUS-NEXT: ret
%val = load i32, ptr @global_val
%mul = mul i32 %a, %val
@ -222,8 +215,7 @@ define i32 @vmulh_x(i32 %a) nounwind {
; VENTUS: # %bb.0:
; VENTUS-NEXT: lui a0, %hi(global_val)
; VENTUS-NEXT: lw a0, %lo(global_val)(a0)
; VENTUS-NEXT: vmv.s.x v1, a0
; VENTUS-NEXT: vmulh.vv v0, v0, v1
; VENTUS-NEXT: vmulh.vx v0, v0, a0
; VENTUS-NEXT: ret
%val = load i32, ptr @global_val
%1 = sext i32 %a to i64
@ -252,8 +244,7 @@ define i32 @vmulhu_x(i32 %a) nounwind {
; VENTUS: # %bb.0:
; VENTUS-NEXT: lui a0, %hi(global_val)
; VENTUS-NEXT: lw a0, %lo(global_val)(a0)
; VENTUS-NEXT: vmv.s.x v1, a0
; VENTUS-NEXT: vmulhu.vv v0, v0, v1
; VENTUS-NEXT: vmulhu.vx v0, v0, a0
; VENTUS-NEXT: ret
%val = load i32, ptr @global_val
%1 = zext i32 %a to i64
@ -281,8 +272,7 @@ define i32 @vmulhsu_x(i32 %a) nounwind {
; VENTUS: # %bb.0:
; VENTUS-NEXT: lui a0, %hi(global_val)
; VENTUS-NEXT: lw a0, %lo(global_val)(a0)
; VENTUS-NEXT: vmv.s.x v1, a0
; VENTUS-NEXT: vmulhsu.vv v0, v0, v1
; VENTUS-NEXT: vmulhsu.vx v0, v0, a0
; VENTUS-NEXT: ret
%val = load i32, ptr @global_val
%1 = sext i32 %a to i64
@ -331,8 +321,7 @@ define dso_local i32 @nmsub_x(i32 noundef %a, i32 noundef %b) local_unnamed_addr
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lui a0, %hi(global_val)
; VENTUS-NEXT: lw a0, %lo(global_val)(a0)
; VENTUS-NEXT: vmv.s.x v2, a0
; VENTUS-NEXT: vmadd.vv v0, v2, v1
; VENTUS-NEXT: vmadd.vx v0, a0, v1
; VENTUS-NEXT: ret
entry:
%val = load i32, ptr @global_val
@ -360,8 +349,7 @@ define dso_local i32 @madd_x(i32 noundef %a, i32 noundef %b) local_unnamed_addr
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lui a0, %hi(global_val)
; VENTUS-NEXT: lw a0, %lo(global_val)(a0)
; VENTUS-NEXT: vmv.s.x v2, a0
; VENTUS-NEXT: vmadd.vv v0, v2, v1
; VENTUS-NEXT: vmadd.vx v0, a0, v1
; VENTUS-NEXT: ret
entry:
%val = load i32, ptr @global_val

View File

@ -15,7 +15,7 @@ define dso_local spir_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr addrs
; VENTUS-NEXT: lw a1, 0(a0)
; VENTUS-NEXT: lw a2, 4(a0)
; VENTUS-NEXT: lw a0, 8(a0)
; VENTUS-NEXT: add a1, a2, a1
; VENTUS-NEXT: add a1, a1, a2
; VENTUS-NEXT: sw a1, 0(a0)
; VENTUS-NEXT: ret
%add = add nsw i32 %b, %a