forked from OSchip/llvm-project
AMDGPU/SI: Enable the post-ra scheduler
Summary: This includes a hazard recognizer implementation to replace some of the hazard handling we had during frame index elimination. Reviewers: arsenm Subscribers: qcolombet, arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D18602 llvm-svn: 268143
This commit is contained in:
parent
52c68bb0f5
commit
cb6ba62d6f
|
@ -384,6 +384,17 @@ void GCNPassConfig::addPreSched2() {
|
|||
}
|
||||
|
||||
void GCNPassConfig::addPreEmitPass() {
|
||||
|
||||
// The hazard recognizer that runs as part of the post-ra scheduler does not
|
||||
// gaurantee to be able handle all hazards correctly. This is because
|
||||
// if there are multiple scheduling regions in a basic block, the regions
|
||||
// are scheduled bottom up, so when we begin to schedule a region we don't
|
||||
// know what instructions were emitted directly before it.
|
||||
//
|
||||
// Here we add a stand-alone hazard recognizer pass which can handle all cases.
|
||||
// hazard recognizer pass.
|
||||
addPass(&PostRAHazardRecognizerID);
|
||||
|
||||
addPass(createSIInsertWaitsPass(), false);
|
||||
addPass(createSIShrinkInstructionsPass());
|
||||
addPass(createSILowerControlFlowPass(), false);
|
||||
|
|
|
@ -47,6 +47,7 @@ add_llvm_target(AMDGPUCodeGen
|
|||
AMDGPUInstrInfo.cpp
|
||||
AMDGPUPromoteAlloca.cpp
|
||||
AMDGPURegisterInfo.cpp
|
||||
GCNHazardRecognizer.cpp
|
||||
R600ClauseMergePass.cpp
|
||||
R600ControlFlowFinalizer.cpp
|
||||
R600EmitClauseMarkers.cpp
|
||||
|
|
|
@ -0,0 +1,182 @@
|
|||
//===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements hazard recognizers for scheduling on GCN processors.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "GCNHazardRecognizer.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "SIInstrInfo.h"
|
||||
#include "llvm/CodeGen/ScheduleDAG.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Hazard Recoginizer Implementation
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
|
||||
CurrCycleInstr(nullptr),
|
||||
MF(MF) {
|
||||
MaxLookAhead = 5;
|
||||
}
|
||||
|
||||
void GCNHazardRecognizer::EmitInstruction(SUnit *SU) {
|
||||
EmitInstruction(SU->getInstr());
|
||||
}
|
||||
|
||||
void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) {
|
||||
CurrCycleInstr = MI;
|
||||
}
|
||||
|
||||
ScheduleHazardRecognizer::HazardType
|
||||
GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
|
||||
const SIInstrInfo *TII =
|
||||
static_cast<const SIInstrInfo*>(MF.getSubtarget().getInstrInfo());
|
||||
MachineInstr *MI = SU->getInstr();
|
||||
|
||||
if (TII->isSMRD(*MI) && checkSMRDHazards(MI) > 0)
|
||||
return NoopHazard;
|
||||
|
||||
if (TII->isVMEM(*MI) && checkVMEMHazards(MI) > 0)
|
||||
return NoopHazard;
|
||||
|
||||
return NoHazard;
|
||||
}
|
||||
|
||||
unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) {
|
||||
return PreEmitNoops(SU->getInstr());
|
||||
}
|
||||
|
||||
unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
|
||||
const SIInstrInfo *TII =
|
||||
static_cast<const SIInstrInfo*>(MF.getSubtarget().getInstrInfo());
|
||||
|
||||
if (TII->isSMRD(*MI))
|
||||
return std::max(0, checkSMRDHazards(MI));
|
||||
|
||||
if (TII->isVMEM(*MI))
|
||||
return std::max(0, checkVMEMHazards(MI));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void GCNHazardRecognizer::EmitNoop() {
|
||||
EmittedInstrs.push_front(nullptr);
|
||||
}
|
||||
|
||||
void GCNHazardRecognizer::AdvanceCycle() {
|
||||
|
||||
// When the scheduler detects a stall, it will call AdvanceCycle() without
|
||||
// emitting any instructions.
|
||||
if (!CurrCycleInstr)
|
||||
return;
|
||||
|
||||
const SIInstrInfo *TII =
|
||||
static_cast<const SIInstrInfo*>(MF.getSubtarget().getInstrInfo());
|
||||
unsigned NumWaitStates = TII->getNumWaitStates(*CurrCycleInstr);
|
||||
|
||||
// Keep track of emitted instructions
|
||||
EmittedInstrs.push_front(CurrCycleInstr);
|
||||
|
||||
// Add a nullptr for each additional wait state after the first. Make sure
|
||||
// not to add more than getMaxLookAhead() items to the list, since we
|
||||
// truncate the list to that size right after this loop.
|
||||
for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead());
|
||||
i < e; ++i) {
|
||||
EmittedInstrs.push_front(nullptr);
|
||||
}
|
||||
|
||||
// getMaxLookahead() is the largest number of wait states we will ever need
|
||||
// to insert, so there is no point in keeping track of more than that many
|
||||
// wait states.
|
||||
EmittedInstrs.resize(getMaxLookAhead());
|
||||
|
||||
CurrCycleInstr = nullptr;
|
||||
}
|
||||
|
||||
void GCNHazardRecognizer::RecedeCycle() {
|
||||
llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Helper Functions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
int GCNHazardRecognizer::getWaitStatesSinceDef(unsigned Reg,
|
||||
std::function<bool(MachineInstr*)> IsHazardDef ) {
|
||||
const TargetRegisterInfo *TRI =
|
||||
MF.getSubtarget<AMDGPUSubtarget>().getRegisterInfo();
|
||||
|
||||
int WaitStates = -1;
|
||||
for (MachineInstr *MI : EmittedInstrs) {
|
||||
++WaitStates;
|
||||
if (!MI || !IsHazardDef(MI))
|
||||
continue;
|
||||
if (MI->modifiesRegister(Reg, TRI))
|
||||
return WaitStates;
|
||||
}
|
||||
return std::numeric_limits<int>::max();
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// No-op Hazard Detection
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
|
||||
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
|
||||
const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
|
||||
|
||||
// This SMRD hazard only affects SI.
|
||||
if (ST.getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS)
|
||||
return 0;
|
||||
|
||||
// A read of an SGPR by SMRD instruction requires 4 wait states when the
|
||||
// SGPR was written by a VALU instruction.
|
||||
int SmrdSgprWaitStates = 4;
|
||||
int WaitStatesNeeded = 0;
|
||||
auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
|
||||
|
||||
for (const MachineOperand &Use : SMRD->uses()) {
|
||||
if (!Use.isReg())
|
||||
continue;
|
||||
int WaitStatesNeededForUse =
|
||||
SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
|
||||
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
|
||||
}
|
||||
return WaitStatesNeeded;
|
||||
}
|
||||
|
||||
int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
|
||||
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
|
||||
const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
|
||||
|
||||
if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
return 0;
|
||||
|
||||
const SIRegisterInfo &TRI = TII->getRegisterInfo();
|
||||
|
||||
// A read of an SGPR by a VMEM instruction requires 5 wait states when the
|
||||
// SGPR was written by a VALU Instruction.
|
||||
int VmemSgprWaitStates = 5;
|
||||
int WaitStatesNeeded = 0;
|
||||
auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
|
||||
|
||||
for (const MachineOperand &Use : VMEM->uses()) {
|
||||
if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
|
||||
continue;
|
||||
|
||||
int WaitStatesNeededForUse =
|
||||
VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
|
||||
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
|
||||
}
|
||||
return WaitStatesNeeded;
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
//===-- GCNHazardRecognizers.h - GCN Hazard Recognizers ---------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines hazard recognizers for scheduling on GCN processors.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIB_TARGET_AMDGPUHAZARDRECOGNIZERS_H
|
||||
#define LLVM_LIB_TARGET_AMDGPUHAZARDRECOGNIZERS_H
|
||||
|
||||
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
|
||||
#include <functional>
|
||||
#include <list>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class MachineFunction;
|
||||
class MachineInstr;
|
||||
class ScheduleDAG;
|
||||
class SIInstrInfo;
|
||||
|
||||
class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
|
||||
|
||||
// This variable stores the instruction that has been emitted this cycle.
|
||||
// It will be added to EmittedInstrs, when AdvanceCycle() or RecedeCycle() is
|
||||
// called.
|
||||
MachineInstr *CurrCycleInstr;
|
||||
std::list<MachineInstr*> EmittedInstrs;
|
||||
const MachineFunction &MF;
|
||||
|
||||
int getWaitStatesSinceDef(unsigned Reg,
|
||||
std::function<bool(MachineInstr*)> IsHazardDef =
|
||||
[](MachineInstr*) {return true;});
|
||||
|
||||
int checkSMRDHazards(MachineInstr *SMRD);
|
||||
int checkVMEMHazards(MachineInstr* VMEM);
|
||||
public:
|
||||
GCNHazardRecognizer(const MachineFunction &MF);
|
||||
// We can only issue one instruction per cycle.
|
||||
bool atIssueLimit() const override { return true; }
|
||||
void EmitInstruction(SUnit *SU) override;
|
||||
void EmitInstruction(MachineInstr *MI) override;
|
||||
HazardType getHazardType(SUnit *SU, int Stalls) override;
|
||||
void EmitNoop() override;
|
||||
unsigned PreEmitNoops(SUnit *SU) override;
|
||||
unsigned PreEmitNoops(MachineInstr *) override;
|
||||
void AdvanceCycle() override;
|
||||
void RecedeCycle() override;
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif //LLVM_LIB_TARGET_AMDGPUHAZARDRECOGNIZERS_H
|
|
@ -15,11 +15,13 @@
|
|||
|
||||
#include "SIInstrInfo.h"
|
||||
#include "AMDGPUTargetMachine.h"
|
||||
#include "GCNHazardRecognizer.h"
|
||||
#include "SIDefines.h"
|
||||
#include "SIMachineFunctionInfo.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/ScheduleDAG.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/CodeGen/RegisterScavenging.h"
|
||||
#include "llvm/MC/MCInstrDesc.h"
|
||||
|
@ -816,6 +818,20 @@ void SIInstrInfo::insertWaitStates(MachineBasicBlock &MBB,
|
|||
}
|
||||
}
|
||||
|
||||
void SIInstrInfo::insertNoop(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI) const {
|
||||
insertWaitStates(MBB, MI, 1);
|
||||
}
|
||||
|
||||
unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) const {
|
||||
switch (MI.getOpcode()) {
|
||||
default: return 1; // FIXME: Do wait states equal cycles?
|
||||
|
||||
case AMDGPU::S_NOP:
|
||||
return MI.getOperand(0).getImm() + 1;
|
||||
}
|
||||
}
|
||||
|
||||
bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
|
||||
MachineBasicBlock &MBB = *MI->getParent();
|
||||
DebugLoc DL = MBB.findDebugLoc(MI);
|
||||
|
@ -1188,8 +1204,11 @@ bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr *MIa,
|
|||
|
||||
if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) &&
|
||||
getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) {
|
||||
assert(MIa->hasOneMemOperand() && MIb->hasOneMemOperand() &&
|
||||
"read2 / write2 not expected here yet");
|
||||
|
||||
if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) {
|
||||
// FIXME: Handle ds_read2 / ds_write2.
|
||||
return false;
|
||||
}
|
||||
unsigned Width0 = (*MIa->memoperands_begin())->getSize();
|
||||
unsigned Width1 = (*MIb->memoperands_begin())->getSize();
|
||||
if (BaseReg0 == BaseReg1 &&
|
||||
|
@ -2964,3 +2983,18 @@ SIInstrInfo::getSerializableTargetIndices() const {
|
|||
{AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}};
|
||||
return makeArrayRef(TargetIndices);
|
||||
}
|
||||
|
||||
/// This is used by the post-RA scheduler (SchedulePostRAList.cpp). The
|
||||
/// post-RA version of misched uses CreateTargetMIHazardRecognizer.
|
||||
ScheduleHazardRecognizer *
|
||||
SIInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
|
||||
const ScheduleDAG *DAG) const {
|
||||
return new GCNHazardRecognizer(DAG->MF);
|
||||
}
|
||||
|
||||
/// This is the hazard recognizer used at -O0 by the PostRAHazardRecognizer
|
||||
/// pass.
|
||||
ScheduleHazardRecognizer *
|
||||
SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const {
|
||||
return new GCNHazardRecognizer(MF);
|
||||
}
|
||||
|
|
|
@ -169,6 +169,14 @@ public:
|
|||
return get(Opcode).TSFlags & SIInstrFlags::VALU;
|
||||
}
|
||||
|
||||
static bool isVMEM(const MachineInstr &MI) {
|
||||
return isMUBUF(MI) || isMTBUF(MI) || isMIMG(MI);
|
||||
}
|
||||
|
||||
bool isVMEM(uint16_t Opcode) const {
|
||||
return isMUBUF(Opcode) || isMTBUF(Opcode) || isMIMG(Opcode);
|
||||
}
|
||||
|
||||
static bool isSOP1(const MachineInstr &MI) {
|
||||
return MI.getDesc().TSFlags & SIInstrFlags::SOP1;
|
||||
}
|
||||
|
@ -440,6 +448,12 @@ public:
|
|||
void insertWaitStates(MachineBasicBlock &MBB,MachineBasicBlock::iterator MI,
|
||||
int Count) const;
|
||||
|
||||
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const;
|
||||
|
||||
/// \brief Return the number of wait states that result from executing this
|
||||
/// instruction.
|
||||
unsigned getNumWaitStates(const MachineInstr &MI) const;
|
||||
|
||||
/// \brief Returns the operand named \p Op. If \p MI does not have an
|
||||
/// operand named \c Op, this function returns nullptr.
|
||||
LLVM_READONLY
|
||||
|
@ -472,6 +486,13 @@ public:
|
|||
ArrayRef<std::pair<int, const char *>>
|
||||
getSerializableTargetIndices() const override;
|
||||
|
||||
ScheduleHazardRecognizer *
|
||||
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
|
||||
const ScheduleDAG *DAG) const override;
|
||||
|
||||
ScheduleHazardRecognizer *
|
||||
CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;
|
||||
|
||||
};
|
||||
|
||||
namespace AMDGPU {
|
||||
|
|
|
@ -596,22 +596,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
|||
}
|
||||
}
|
||||
|
||||
// TODO: only do this when it is needed
|
||||
switch (MF->getSubtarget<AMDGPUSubtarget>().getGeneration()) {
|
||||
case AMDGPUSubtarget::SOUTHERN_ISLANDS:
|
||||
// "VALU writes SGPR" -> "SMRD reads that SGPR" needs 4 wait states
|
||||
// ("S_NOP 3") on SI
|
||||
TII->insertWaitStates(*MBB, MI, 4);
|
||||
break;
|
||||
case AMDGPUSubtarget::SEA_ISLANDS:
|
||||
break;
|
||||
default: // VOLCANIC_ISLANDS and later
|
||||
// "VALU writes SGPR -> VMEM reads that SGPR" needs 5 wait states
|
||||
// ("S_NOP 4") on VI and later. This also applies to VALUs which write
|
||||
// VCC, but we're unlikely to see VMEM use VCC.
|
||||
TII->insertWaitStates(*MBB, MI, 5);
|
||||
}
|
||||
|
||||
MI->eraseFromParent();
|
||||
break;
|
||||
}
|
||||
|
@ -991,3 +975,14 @@ unsigned SIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
|
||||
unsigned Reg) const {
|
||||
const TargetRegisterClass *RC;
|
||||
if (TargetRegisterInfo::isVirtualRegister(Reg))
|
||||
RC = MRI.getRegClass(Reg);
|
||||
else
|
||||
RC = getPhysRegClass(Reg);
|
||||
|
||||
return hasVGPRs(RC);
|
||||
}
|
||||
|
|
|
@ -188,6 +188,8 @@ public:
|
|||
unsigned getSGPR32PressureSet() const { return SGPR32SetID; };
|
||||
unsigned getVGPR32PressureSet() const { return VGPR32SetID; };
|
||||
|
||||
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const;
|
||||
|
||||
private:
|
||||
void buildScratchLoadStore(MachineBasicBlock::iterator MI,
|
||||
unsigned LoadStoreOp, unsigned Value,
|
||||
|
|
|
@ -42,6 +42,7 @@ def Write64Bit : SchedWrite;
|
|||
class SISchedMachineModel : SchedMachineModel {
|
||||
let CompleteModel = 0;
|
||||
let IssueWidth = 1;
|
||||
let PostRAScheduler = 1;
|
||||
}
|
||||
|
||||
def SIFullSpeedModel : SISchedMachineModel;
|
||||
|
|
|
@ -155,8 +155,8 @@ define void @cast_0_flat_to_group_addrspacecast() #0 {
|
|||
}
|
||||
|
||||
; HSA-LABEL: {{^}}cast_neg1_group_to_flat_addrspacecast:
|
||||
; HSA: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
|
||||
; HSA: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
|
||||
; HSA: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
|
||||
; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
|
||||
; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
|
||||
define void @cast_neg1_group_to_flat_addrspacecast() #0 {
|
||||
|
@ -226,8 +226,8 @@ end:
|
|||
|
||||
; Check for prologue initializing special SGPRs pointing to scratch.
|
||||
; HSA-LABEL: {{^}}store_flat_scratch:
|
||||
; HSA: s_mov_b32 flat_scratch_lo, s9
|
||||
; HSA: s_add_u32 [[ADD:s[0-9]+]], s8, s11
|
||||
; HSA-DAG: s_mov_b32 flat_scratch_lo, s9
|
||||
; HSA-DAG: s_add_u32 [[ADD:s[0-9]+]], s8, s11
|
||||
; HSA: s_lshr_b32 flat_scratch_hi, [[ADD]], 8
|
||||
; HSA: flat_store_dword
|
||||
; HSA: s_barrier
|
||||
|
|
|
@ -212,10 +212,10 @@ define void @s_and_32_bit_constant_i64(i64 addrspace(1)* %out, i64 %a) {
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_and_multi_use_inline_imm_i64:
|
||||
; SI: s_load_dwordx2
|
||||
; SI: s_load_dword [[A:s[0-9]+]]
|
||||
; SI: s_load_dword [[B:s[0-9]+]]
|
||||
; SI: s_load_dwordx2
|
||||
; SI: s_load_dwordx2
|
||||
; SI-NOT: and
|
||||
; SI: s_lshl_b32 [[A]], [[A]], 1
|
||||
; SI: s_lshl_b32 [[B]], [[B]], 1
|
||||
|
|
|
@ -2,9 +2,9 @@
|
|||
|
||||
; GCN-LABEL: {{^}}stored_fi_to_lds:
|
||||
; GCN: s_load_dword [[LDSPTR:s[0-9]+]]
|
||||
; GCN: v_mov_b32_e32 [[ZERO0:v[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 [[ZERO1:v[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, [[ZERO1]]
|
||||
; GCN: v_mov_b32_e32 [[ZERO0:v[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 [[VLDSPTR:v[0-9]+]], [[LDSPTR]]
|
||||
; GCN: ds_write_b32 [[VLDSPTR]], [[ZERO0]]
|
||||
define void @stored_fi_to_lds(float* addrspace(3)* %ptr) #0 {
|
||||
|
@ -140,16 +140,16 @@ define void @stored_fi_to_global_2_small_objects(float* addrspace(1)* %ptr) #0 {
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}stored_fi_to_global_huge_frame_offset:
|
||||
; GCN: s_add_i32 [[BASE_1_OFF_0:s[0-9]+]], 0, 0x3ffc
|
||||
; GCN: v_mov_b32_e32 [[BASE_0:v[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
|
||||
; GCN: buffer_store_dword [[BASE_0]], v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
|
||||
|
||||
; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
|
||||
; GCN-DAG: s_add_i32 [[BASE_1_OFF_0:s[0-9]+]], 0, 0x3ffc
|
||||
; GCN-DAG: v_mov_b32_e32 [[V_BASE_1_OFF_0:v[0-9]+]], [[BASE_1_OFF_0]]
|
||||
; GCN: v_mov_b32_e32 [[V_BASE_1_OFF_0:v[0-9]+]], [[BASE_1_OFF_0]]
|
||||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
|
||||
; GCN: s_add_i32 [[BASE_1_OFF_1:s[0-9]+]], 0, 56
|
||||
; GCN: buffer_store_dword [[K]], [[V_BASE_1_OFF_0]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
|
||||
|
||||
; GCN-DAG: s_add_i32 [[BASE_1_OFF_1:s[0-9]+]], 0, 56
|
||||
; GCN-DAG: v_mov_b32_e32 [[V_BASE_1_OFF_1:v[0-9]+]], [[BASE_1_OFF_1]]
|
||||
; GCN: v_mov_b32_e32 [[V_BASE_1_OFF_1:v[0-9]+]], [[BASE_1_OFF_1]]
|
||||
; GCN: buffer_store_dword [[V_BASE_1_OFF_1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||
define void @stored_fi_to_global_huge_frame_offset(i32* addrspace(1)* %ptr) #0 {
|
||||
%tmp0 = alloca [4096 x i32]
|
||||
|
|
|
@ -6,11 +6,11 @@ declare double @llvm.fabs.f64(double) #0
|
|||
declare double @llvm.floor.f64(double) #0
|
||||
|
||||
; FUNC-LABEL: {{^}}fract_f64:
|
||||
; GCN: v_fract_f64_e32 [[FRC:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]
|
||||
; SI: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
|
||||
; SI: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
|
||||
; SI: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]]
|
||||
; SI: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3
|
||||
; GCN-DAG: v_fract_f64_e32 [[FRC:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
|
||||
; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
|
||||
; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]]
|
||||
; SI-DAG: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3
|
||||
; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], [[COND]]
|
||||
; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], [[COND]]
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]]
|
||||
|
@ -24,11 +24,11 @@ define void @fract_f64(double addrspace(1)* %out, double addrspace(1)* %src) #1
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fract_f64_neg:
|
||||
; GCN: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]
|
||||
; SI: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
|
||||
; SI: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
|
||||
; SI: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]]
|
||||
; SI: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3
|
||||
; GCN-DAG: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]
|
||||
; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
|
||||
; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
|
||||
; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]]
|
||||
; SI-DAG: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3
|
||||
; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], [[COND]]
|
||||
; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], [[COND]]
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]]
|
||||
|
@ -43,11 +43,11 @@ define void @fract_f64_neg(double addrspace(1)* %out, double addrspace(1)* %src)
|
|||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fract_f64_neg_abs:
|
||||
; GCN: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -|v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]|
|
||||
; SI: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
|
||||
; SI: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
|
||||
; SI: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]]
|
||||
; SI: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3
|
||||
; GCN-DAG: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -|v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]|
|
||||
; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
|
||||
; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
|
||||
; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]]
|
||||
; SI-DAG: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3
|
||||
; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], [[COND]]
|
||||
; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], [[COND]]
|
||||
; SI: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]]
|
||||
|
|
|
@ -396,10 +396,10 @@ define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x
|
|||
; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f64:
|
||||
|
||||
; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]]
|
||||
; GCN: v_cvt_f32_f16_e32
|
||||
; GCN: v_cvt_f32_f16_e32
|
||||
; GCN-DAG: v_cvt_f32_f16_e32
|
||||
; GCN-DAG: v_lshrrev_b32_e32 {{v[0-9]+}}, 16, {{v[0-9]+}}
|
||||
; GCN: v_cvt_f32_f16_e32
|
||||
; GCN: v_cvt_f32_f16_e32
|
||||
; GCN-NOT: v_cvt_f32_f16
|
||||
|
||||
; GCN: v_cvt_f64_f32_e32
|
||||
|
|
|
@ -208,10 +208,10 @@ endif:
|
|||
; SI-DAG: s_lshl_b32 [[SCALEDIDX:s[0-9]+]], [[IDX]], 1{{$}}
|
||||
; SI-DAG: v_mov_b32_e32 [[ELT0:v[0-9]+]], 0{{$}}
|
||||
|
||||
; SI: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
|
||||
; SI: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
|
||||
; SI: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
|
||||
; SI: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
|
||||
; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
|
||||
; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
|
||||
; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
|
||||
; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
|
||||
|
||||
; SI: s_mov_b32 m0, [[SCALEDIDX]]
|
||||
; SI: v_movreld_b32_e32 v{{[0-9]+}}, [[ELT0]]
|
||||
|
|
|
@ -7,8 +7,8 @@
|
|||
; from constant/invariant memory.
|
||||
|
||||
; GCN-LABEL: {{^}}test_merge_store_constant_i16_invariant_global_pointer_load:
|
||||
; GCN: buffer_load_dwordx2 [[PTR:v\[[0-9]+:[0-9]+\]]],
|
||||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x1c8007b
|
||||
; GCN-DAG: buffer_load_dwordx2 [[PTR:v\[[0-9]+:[0-9]+\]]],
|
||||
; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1c8007b
|
||||
; GCN: buffer_store_dword [[K]], [[PTR]]
|
||||
define void @test_merge_store_constant_i16_invariant_global_pointer_load(i16 addrspace(1)* addrspace(1)* dereferenceable(4096) nonnull %in) #0 {
|
||||
%ptr = load i16 addrspace(1)*, i16 addrspace(1)* addrspace(1)* %in, !invariant.load !0
|
||||
|
|
|
@ -12,8 +12,8 @@ declare double @llvm.AMDGPU.rsq.clamped.f64(double) nounwind readnone
|
|||
; VI-DAG: s_mov_b32 s[[HIGH1:[0-9+]]], 0x7fefffff
|
||||
; VI-DAG: s_mov_b32 s[[HIGH2:[0-9+]]], 0xffefffff
|
||||
; VI-DAG: s_mov_b32 s[[LOW1:[0-9+]]], s[[ALLBITS]]
|
||||
; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW1]]:[[HIGH1]]]
|
||||
; VI: s_mov_b32 s[[LOW2:[0-9+]]], s[[ALLBITS]]
|
||||
; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW1]]:[[HIGH1]]]
|
||||
; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW2]]:[[HIGH2]]]
|
||||
|
||||
define void @rsq_clamped_f64(double addrspace(1)* %out, double %src) nounwind {
|
||||
|
|
|
@ -10,10 +10,10 @@ declare float @llvm.AMDGPU.rsq.clamped.f32(float) nounwind readnone
|
|||
; FUNC-LABEL: {{^}}rsq_clamped_f32:
|
||||
; SI: v_rsq_clamp_f32_e32
|
||||
|
||||
; VI: v_rsq_f32_e32 [[RSQ:v[0-9]+]], {{s[0-9]+}}
|
||||
; VI: v_min_f32_e32 [[MIN:v[0-9]+]], 0x7f7fffff, [[RSQ]]
|
||||
; VI-DAG: v_rsq_f32_e32 [[RSQ:v[0-9]+]], {{s[0-9]+}}
|
||||
; VI-DAG: v_min_f32_e32 [[MIN:v[0-9]+]], 0x7f7fffff, [[RSQ]]
|
||||
; TODO: this constant should be folded:
|
||||
; VI: v_mov_b32_e32 [[MINFLT:v[0-9]+]], 0xff7fffff
|
||||
; VI-DAG: v_mov_b32_e32 [[MINFLT:v[0-9]+]], 0xff7fffff
|
||||
; VI: v_max_f32_e32 {{v[0-9]+}}, [[MIN]], [[MINFLT]]
|
||||
|
||||
; EG: RECIPSQRT_CLAMPED
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
;CHECK-LABEL: {{^}}test1:
|
||||
;CHECK: buffer_atomic_swap v0, off, s[0:3], 0 glc
|
||||
;CHECK: s_movk_i32 [[SOFS:s[0-9]+]], 0x1fff
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: buffer_atomic_swap v0, v1, s[0:3], 0 idxen glc
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
|
@ -12,7 +13,6 @@
|
|||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: buffer_atomic_swap v0, v2, s[0:3], 0 offen offset:42 glc
|
||||
;CHECK-DAG: s_waitcnt vmcnt(0)
|
||||
;CHECK-DAG: s_movk_i32 [[SOFS:s[0-9]+]], 0x1fff
|
||||
;CHECK: buffer_atomic_swap v0, off, s[0:3], [[SOFS]] offset:1 glc
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: buffer_atomic_swap v0, off, s[0:3], 0{{$}}
|
||||
|
@ -70,6 +70,7 @@ main_body:
|
|||
;CHECK-LABEL: {{^}}test3:
|
||||
;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 glc
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: s_movk_i32 [[SOFS:s[0-9]+]], 0x1fff
|
||||
;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, v2, s[0:3], 0 idxen glc
|
||||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, v3, s[0:3], 0 offen glc
|
||||
|
@ -78,7 +79,6 @@ main_body:
|
|||
;CHECK: s_waitcnt vmcnt(0)
|
||||
;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, v3, s[0:3], 0 offen offset:42 glc
|
||||
;CHECK-DAG: s_waitcnt vmcnt(0)
|
||||
;CHECK-DAG: s_movk_i32 [[SOFS:s[0-9]+]], 0x1fff
|
||||
;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[SOFS]] offset:1 glc
|
||||
define amdgpu_ps float @test3(<4 x i32> inreg %rsrc, i32 %data, i32 %cmp, i32 %vindex, i32 %voffset) {
|
||||
main_body:
|
||||
|
|
|
@ -8,10 +8,10 @@ declare double @llvm.amdgcn.rsq.clamp.f64(double) #1
|
|||
; SI: v_rsq_clamp_f32_e32
|
||||
|
||||
; VI: s_load_dword [[SRC:s[0-9]+]]
|
||||
; VI: v_rsq_f32_e32 [[RSQ:v[0-9]+]], [[SRC]]
|
||||
; VI: v_min_f32_e32 [[MIN:v[0-9]+]], 0x7f7fffff, [[RSQ]]
|
||||
; VI-DAG: v_rsq_f32_e32 [[RSQ:v[0-9]+]], [[SRC]]
|
||||
; VI-DAG: v_min_f32_e32 [[MIN:v[0-9]+]], 0x7f7fffff, [[RSQ]]
|
||||
; TODO: this constant should be folded:
|
||||
; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0xff7fffff
|
||||
; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xff7fffff
|
||||
; VI: v_max_f32_e32 [[RESULT:v[0-9]+]], [[MIN]], [[K]]
|
||||
; VI: buffer_store_dword [[RESULT]]
|
||||
define void @rsq_clamp_f32(float addrspace(1)* %out, float %src) #0 {
|
||||
|
@ -30,8 +30,8 @@ define void @rsq_clamp_f32(float addrspace(1)* %out, float %src) #0 {
|
|||
; VI-DAG: s_mov_b32 s[[HIGH2:[0-9+]]], 0xffefffff
|
||||
; VI-DAG: s_mov_b32 s[[LOW1:[0-9+]]], s[[ALLBITS]]
|
||||
; VI-DAG: v_rsq_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}}
|
||||
; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW1]]:[[HIGH1]]]
|
||||
; VI: s_mov_b32 s[[LOW2:[0-9+]]], s[[ALLBITS]]
|
||||
; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW1]]:[[HIGH1]]]
|
||||
; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW2]]:[[HIGH2]]]
|
||||
define void @rsq_clamp_f64(double addrspace(1)* %out, double %src) #0 {
|
||||
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src)
|
||||
|
|
|
@ -13,7 +13,7 @@ define void @round_f64(double addrspace(1)* %out, double %x) #0 {
|
|||
|
||||
; FUNC-LABEL: {{^}}v_round_f64:
|
||||
; SI: buffer_load_dwordx2
|
||||
; SI: v_bfe_u32 [[EXP:v[0-9]+]], v{{[0-9]+}}, 20, 11
|
||||
; SI-DAG: v_bfe_u32 [[EXP:v[0-9]+]], v{{[0-9]+}}, 20, 11
|
||||
|
||||
; SI-DAG: v_not_b32_e32
|
||||
; SI-DAG: v_not_b32_e32
|
||||
|
|
|
@ -5,9 +5,9 @@
|
|||
; FUNC-LABEL: {{^}}round_f32:
|
||||
; SI-DAG: s_load_dword [[SX:s[0-9]+]]
|
||||
; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x7fffffff
|
||||
; SI: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[SX]]
|
||||
; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], [[SX]], [[TRUNC]]
|
||||
; SI: v_mov_b32_e32 [[VX:v[0-9]+]], [[SX]]
|
||||
; SI-DAG: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[SX]]
|
||||
; SI-DAG: v_sub_f32_e32 [[SUB:v[0-9]+]], [[SX]], [[TRUNC]]
|
||||
; SI-DAG: v_mov_b32_e32 [[VX:v[0-9]+]], [[SX]]
|
||||
; SI: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[K]], 1.0, [[VX]]
|
||||
; SI: v_cmp_le_f32_e64 vcc, 0.5, |[[SUB]]|
|
||||
; SI: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, [[VX]]
|
||||
|
|
|
@ -491,8 +491,8 @@ define void @aggressive_combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %o
|
|||
; SI-DENORM: v_fma_f32 [[TMP:v[0-9]+]], [[D]], [[E]], -[[C]]
|
||||
; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[TMP]]
|
||||
|
||||
; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
|
||||
; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP1:v[0-9]+]], [[B]], [[A]]
|
||||
; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
|
||||
; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP1:v[0-9]+]], [[B]], [[A]]
|
||||
; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[TMP2:v[0-9]+]], [[TMP0]], [[TMP1]]
|
||||
; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP2]]
|
||||
|
||||
|
@ -538,8 +538,8 @@ define void @aggressive_combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %o
|
|||
; SI-DENORM: v_fma_f32 [[TMP:v[0-9]+]], -[[D]], [[E]], [[A]]
|
||||
; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP]]
|
||||
|
||||
; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
|
||||
; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP1:v[0-9]+]], [[C]], [[B]]
|
||||
; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
|
||||
; SI-DENORM-SLOWFMAF-DAG: v_mul_f32_e32 [[TMP1:v[0-9]+]], [[C]], [[B]]
|
||||
; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[TMP2:v[0-9]+]], [[TMP0]], [[TMP1]]
|
||||
; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP2]], [[A]]
|
||||
|
||||
|
|
|
@ -7,10 +7,10 @@
|
|||
|
||||
; GCN-LABEL: {{^}}clobber_vgpr_pair_pointer_add:
|
||||
; GCN: s_load_dwordx2 s{{\[}}[[ARG1LO:[0-9]+]]:[[ARG1HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
|
||||
; GCN: buffer_load_dwordx2 v{{\[}}[[LDPTRLO:[0-9]+]]:[[LDPTRHI:[0-9]+]]{{\]}}
|
||||
; GCN-DAG: v_mov_b32_e32 v[[VARG1HI:[0-9]+]], s[[ARG1HI]]
|
||||
; GCN-DAG: buffer_load_dwordx2 v{{\[}}[[LDPTRLO:[0-9]+]]:[[LDPTRHI:[0-9]+]]{{\]}}
|
||||
|
||||
; GCN-NOT: v_mov_b32
|
||||
; GCN: v_mov_b32_e32 v[[VARG1HI:[0-9]+]], s[[ARG1HI]]
|
||||
; GCN-NEXT: v_mov_b32_e32 v[[VARG1LO:[0-9]+]], s[[ARG1LO]]
|
||||
; GCN-NOT: v_mov_b32
|
||||
|
||||
|
|
|
@ -199,10 +199,10 @@ define void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
|
|||
; SI: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||
; VI: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; GCN: s_movk_i32 [[K255:s[0-9]+]], 0xff
|
||||
; GCN: s_and_b32 [[B:s[0-9]+]], [[VALUE]], [[K255]]
|
||||
; GCN: v_mov_b32_e32 [[VK255:v[0-9]+]], [[K255]]
|
||||
; GCN-DAG: s_and_b32 [[B:s[0-9]+]], [[VALUE]], [[K255]]
|
||||
; GCN-DAG: v_mov_b32_e32 [[VK255:v[0-9]+]], [[K255]]
|
||||
; GCN: v_cmp_ne_i32_e32 vcc, [[B]], [[VK255]]
|
||||
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; GCN: buffer_store_byte [[RESULT]]
|
||||
; GCN: s_endpgm
|
||||
define void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind {
|
||||
|
@ -247,10 +247,10 @@ define void @cmp_sext_k_neg1_i8_sext_arg(i1 addrspace(1)* %out, i8 signext %b) n
|
|||
; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
|
||||
; VI: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
|
||||
; GCN: s_movk_i32 [[K:s[0-9]+]], 0xff
|
||||
; GCN: s_and_b32 [[B:s[0-9]+]], [[VAL]], [[K]]
|
||||
; GCN: v_mov_b32_e32 [[VK:v[0-9]+]], [[K]]
|
||||
; GCN-DAG: s_and_b32 [[B:s[0-9]+]], [[VAL]], [[K]]
|
||||
; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], [[K]]
|
||||
; GCN: v_cmp_ne_i32_e32 vcc, [[B]], [[VK]]{{$}}
|
||||
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
|
||||
; GCN: buffer_store_byte [[RESULT]]
|
||||
; GCN: s_endpgm
|
||||
define void @cmp_sext_k_neg1_i8_arg(i1 addrspace(1)* %out, i8 %b) nounwind {
|
||||
|
|
|
@ -4,8 +4,8 @@
|
|||
; GCN-LABEL: {{^}}v_uextract_bit_31_i128:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
|
||||
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], 0{{$}}
|
||||
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
|
||||
|
||||
|
@ -27,9 +27,9 @@ define void @v_uextract_bit_31_i128(i128 addrspace(1)* %out, i128 addrspace(1)*
|
|||
; GCN-LABEL: {{^}}v_uextract_bit_63_i128:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
|
||||
|
||||
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
|
||||
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
|
||||
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO1]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
|
@ -50,9 +50,9 @@ define void @v_uextract_bit_63_i128(i128 addrspace(1)* %out, i128 addrspace(1)*
|
|||
; GCN-LABEL: {{^}}v_uextract_bit_95_i128:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
|
||||
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
|
||||
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
|
||||
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO1]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
|
@ -73,9 +73,9 @@ define void @v_uextract_bit_95_i128(i128 addrspace(1)* %out, i128 addrspace(1)*
|
|||
; GCN-LABEL: {{^}}v_uextract_bit_127_i128:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
|
||||
|
||||
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], 0{{$}}
|
||||
; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}}
|
||||
; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
|
||||
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ZERO1]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
|
||||
|
|
|
@ -300,9 +300,9 @@ define void @v_uextract_bit_31_32_i64_trunc_i32(i32 addrspace(1)* %out, i64 addr
|
|||
|
||||
; GCN-LABEL: {{^}}and_not_mask_i64:
|
||||
; GCN: buffer_load_dwordx2 v{{\[}}[[VALLO:[0-9]+]]:[[VALHI:[0-9]+]]{{\]}}
|
||||
; GCN: v_mov_b32_e32 v[[SHRHI]], 0{{$}}
|
||||
; GCN: v_lshrrev_b32_e32 [[SHR:v[0-9]+]], 20, v[[VALLO]]
|
||||
; GCN-DAG: v_and_b32_e32 v[[SHRLO]], 4, [[SHR]]
|
||||
; GCN-DAG: v_mov_b32_e32 v[[SHRHI]], 0{{$}}
|
||||
; GCN-NOT: v[[SHRLO]]
|
||||
; GCN-NOT: v[[SHRHI]]
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}}
|
||||
|
|
|
@ -4,9 +4,9 @@
|
|||
|
||||
; lshr (i64 x), c: c > 32 => reg_sequence lshr (i32 hi_32(x)), (c - 32), 0
|
||||
; GCN-LABEL: {{^}}lshr_i64_35:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
|
||||
; GCN: v_lshrrev_b32_e32 v[[LO:[0-9]+]], 3, [[VAL]]
|
||||
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
|
||||
; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]]
|
||||
; GCN-DAG: v_lshrrev_b32_e32 v[[LO:[0-9]+]], 3, [[VAL]]
|
||||
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define void @lshr_i64_35(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
%val = load i64, i64 addrspace(1)* %in
|
||||
|
@ -16,9 +16,9 @@ define void @lshr_i64_35(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}lshr_i64_63:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
|
||||
; GCN: v_lshrrev_b32_e32 v[[LO:[0-9]+]], 31, [[VAL]]
|
||||
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
|
||||
; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]]
|
||||
; GCN-DAG: v_lshrrev_b32_e32 v[[LO:[0-9]+]], 31, [[VAL]]
|
||||
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define void @lshr_i64_63(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
%val = load i64, i64 addrspace(1)* %in
|
||||
|
@ -28,9 +28,9 @@ define void @lshr_i64_63(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}lshr_i64_33:
|
||||
; GCN: buffer_load_dword [[VAL:v[0-9]+]]
|
||||
; GCN: v_lshrrev_b32_e32 v[[LO:[0-9]+]], 1, [[VAL]]
|
||||
; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
|
||||
; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]]
|
||||
; GCN-DAG: v_lshrrev_b32_e32 v[[LO:[0-9]+]], 1, [[VAL]]
|
||||
; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
|
||||
define void @lshr_i64_33(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
|
||||
%val = load i64, i64 addrspace(1)* %in
|
||||
|
|
|
@ -2,6 +2,12 @@
|
|||
|
||||
; Make sure this doesn't crash.
|
||||
; CHECK: {{^}}test:
|
||||
; Make sure we are handling hazards correctly.
|
||||
; CHECK: buffer_load_dword [[VHI:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:12
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: v_readlane_b32 s[[HI:[0-9]+]], [[VHI]]
|
||||
; CHECK-NEXT: s_nop 4
|
||||
; CHECK-NEXT: buffer_store_dword v0, off, s[0:[[HI]]{{\]}}, 0
|
||||
; CHECK: s_endpgm
|
||||
define void @test(i32 addrspace(1)* %out, i32 %in) {
|
||||
call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" ()
|
||||
|
|
|
@ -156,11 +156,9 @@ define void @reorder_global_load_local_store_global_load(i32 addrspace(1)* %out,
|
|||
}
|
||||
|
||||
; FUNC-LABEL: @reorder_local_offsets
|
||||
; FIXME: The scheduler doesn't think its proftible to re-order the
|
||||
; loads and stores, and I'm not sure that it really is.
|
||||
; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12
|
||||
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:400
|
||||
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:404
|
||||
; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12
|
||||
; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:400
|
||||
; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:404
|
||||
; CI: buffer_store_dword
|
||||
|
@ -185,8 +183,8 @@ define void @reorder_local_offsets(i32 addrspace(1)* nocapture %out, i32 addrspa
|
|||
|
||||
; FUNC-LABEL: @reorder_global_offsets
|
||||
; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
|
||||
; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404
|
||||
; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
|
||||
; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404
|
||||
; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
|
||||
; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
|
||||
; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404
|
||||
|
|
|
@ -7,9 +7,9 @@ target triple="amdgcn--"
|
|||
; CHECK: s_load_dword s2, s[0:1], 0x9
|
||||
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb
|
||||
; CHECK-NEXT: v_mbcnt_lo_u32_b32_e64
|
||||
; CHECK-NEXT: v_cmp_eq_i32_e32 vcc, 0, v0
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK: v_cmp_eq_i32_e32 vcc, 0, v0
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; CHECK: s_and_saveexec_b64 s[2:3], vcc
|
||||
; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
||||
; BB0_1:
|
||||
; CHECK: s_load_dword s0, s[0:1], 0xa
|
||||
|
|
|
@ -37,8 +37,8 @@ define void @trunc_load_shl_i64(i32 addrspace(1)* %out, i64 %a) {
|
|||
; SI: s_add_u32 s[[LO_SREG2:[0-9]+]], s[[LO_SHL]],
|
||||
; SI: s_addc_u32
|
||||
; SI: v_mov_b32_e32
|
||||
; SI: v_mov_b32_e32
|
||||
; SI: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG2]]
|
||||
; SI: v_mov_b32_e32
|
||||
; SI: buffer_store_dword v[[LO_VREG]],
|
||||
define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64 %a) {
|
||||
%aa = add i64 %a, 234 ; Prevent shrinking store.
|
||||
|
|
|
@ -31,9 +31,9 @@ done:
|
|||
; SI-LABEL: {{^}}uniform_if_vcc:
|
||||
; FIXME: We could use _e32 here if we re-used the 0 from [[STORE_VAL]], and
|
||||
; also scheduled the write first.
|
||||
; SI: v_cmp_eq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 0, s{{[0-9]+}}
|
||||
; SI: s_and_b64 vcc, exec, [[COND]]
|
||||
; SI: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
|
||||
; SI-DAG: v_cmp_eq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 0, s{{[0-9]+}}
|
||||
; SI-DAG: s_and_b64 vcc, exec, [[COND]]
|
||||
; SI-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
|
||||
; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
|
||||
|
||||
; Fall-through to the else
|
||||
|
@ -88,9 +88,9 @@ done:
|
|||
; SI-LABEL: {{^}}uniform_if_swap_br_targets_vcc:
|
||||
; FIXME: We could use _e32 here if we re-used the 0 from [[STORE_VAL]], and
|
||||
; also scheduled the write first.
|
||||
; SI: v_cmp_neq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 0, s{{[0-9]+}}
|
||||
; SI: s_and_b64 vcc, exec, [[COND]]
|
||||
; SI: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
|
||||
; SI-DAG: v_cmp_neq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 0, s{{[0-9]+}}
|
||||
; SI-DAG: s_and_b64 vcc, exec, [[COND]]
|
||||
; SI-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0
|
||||
; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
|
||||
|
||||
; Fall-through to the else
|
||||
|
|
|
@ -19,12 +19,12 @@
|
|||
|
||||
; GCN-NOT: flat_scr
|
||||
|
||||
; GCNMESA: s_mov_b32 s16, s3
|
||||
; GCNMESA: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GCNMESA-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; GCNMESA-NEXT: s_mov_b32 s14, -1
|
||||
; SIMESA-NEXT: s_mov_b32 s15, 0x98f000
|
||||
; VIMESA-NEXT: s_mov_b32 s15, 0x980000
|
||||
; GCNMESA-DAG: s_mov_b32 s16, s3
|
||||
; GCNMESA-DAG: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GCNMESA-DAG: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; GCNMESA-DAG: s_mov_b32 s14, -1
|
||||
; SIMESA-DAG: s_mov_b32 s15, 0x98f000
|
||||
; VIMESA-DAG: s_mov_b32 s15, 0x980000
|
||||
|
||||
|
||||
; GCN: buffer_store_dword {{v[0-9]+}}, off, s[12:15], s16 offset:{{[0-9]+}} ; 4-byte Folded Spill
|
||||
|
|
|
@ -11,12 +11,11 @@
|
|||
|
||||
; GCN-LABEL: {{^}}main:
|
||||
|
||||
; GCN: s_mov_b32 s11, s12
|
||||
; GCN: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GCN-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; GCN-NEXT: s_mov_b32 s14, -1
|
||||
; SI-NEXT: s_mov_b32 s15, 0x98f000
|
||||
; VI-NEXT: s_mov_b32 s15, 0x980000
|
||||
; GCN-DAG: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; GCN-DAG: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; GCN-DAG: s_mov_b32 s14, -1
|
||||
; SI-DAG: s_mov_b32 s15, 0x98f000
|
||||
; VI-DAG: s_mov_b32 s15, 0x980000
|
||||
|
||||
; s12 is offset user SGPR
|
||||
; GCN: buffer_store_dword {{v[0-9]+}}, off, s[12:15], s11 offset:{{[0-9]+}} ; 16-byte Folded Spill
|
||||
|
|
Loading…
Reference in New Issue