forked from OSchip/llvm-project
AMDGPU: really don't commute REV opcodes if the target variant doesn't exist
If pseudoToMCOpcode failed, we would return the original opcode, so operands would be swapped, but the instruction would remain the same. It resulted in LSHLREV a, b ---> LSHLREV b, a. This fixes Glamor text rendering and piglit/arb_sample_shading-builtin-gl-sample-mask on VI. This is a candidate for stable branches. v2: the test was simplified by Tom Stellard llvm-svn: 240824
This commit is contained in:
parent
f2bbd9cf54
commit
cfbdba2d0b
|
@ -440,22 +440,22 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
}
|
||||
}
|
||||
|
||||
unsigned SIInstrInfo::commuteOpcode(const MachineInstr &MI) const {
|
||||
int SIInstrInfo::commuteOpcode(const MachineInstr &MI) const {
|
||||
const unsigned Opcode = MI.getOpcode();
|
||||
|
||||
int NewOpc;
|
||||
|
||||
// Try to map original to commuted opcode
|
||||
NewOpc = AMDGPU::getCommuteRev(Opcode);
|
||||
// Check if the commuted (REV) opcode exists on the target.
|
||||
if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
|
||||
return NewOpc;
|
||||
if (NewOpc != -1)
|
||||
// Check if the commuted (REV) opcode exists on the target.
|
||||
return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
|
||||
|
||||
// Try to map commuted to original opcode
|
||||
NewOpc = AMDGPU::getCommuteOrig(Opcode);
|
||||
// Check if the original (non-REV) opcode exists on the target.
|
||||
if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
|
||||
return NewOpc;
|
||||
if (NewOpc != -1)
|
||||
// Check if the original (non-REV) opcode exists on the target.
|
||||
return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
|
||||
|
||||
return Opcode;
|
||||
}
|
||||
|
@ -771,6 +771,10 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
|
|||
if (MI->getNumOperands() < 3)
|
||||
return nullptr;
|
||||
|
||||
int CommutedOpcode = commuteOpcode(*MI);
|
||||
if (CommutedOpcode == -1)
|
||||
return nullptr;
|
||||
|
||||
int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
|
||||
AMDGPU::OpName::src0);
|
||||
assert(Src0Idx != -1 && "Should always have src0 operand");
|
||||
|
@ -833,7 +837,7 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
|
|||
}
|
||||
|
||||
if (MI)
|
||||
MI->setDesc(get(commuteOpcode(*MI)));
|
||||
MI->setDesc(get(CommutedOpcode));
|
||||
|
||||
return MI;
|
||||
}
|
||||
|
|
|
@ -117,7 +117,7 @@ public:
|
|||
// register. If there is no hardware instruction that can store to \p
|
||||
// DstRC, then AMDGPU::COPY is returned.
|
||||
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
|
||||
unsigned commuteOpcode(const MachineInstr &MI) const;
|
||||
int commuteOpcode(const MachineInstr &MI) const;
|
||||
|
||||
MachineInstr *commuteInstruction(MachineInstr *MI,
|
||||
bool NewMI = false) const override;
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; GCN-LABEL: {{^}}main:
|
||||
; SI: v_lshl_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
|
||||
; VI: v_lshlrev_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, 1
|
||||
|
||||
define void @main() #0 {
|
||||
main_body:
|
||||
%0 = fptosi float undef to i32
|
||||
%1 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> undef, <32 x i8> undef, i32 2)
|
||||
%2 = extractelement <4 x i32> %1, i32 0
|
||||
%3 = and i32 %0, 7
|
||||
%4 = shl i32 1, %3
|
||||
%5 = and i32 %2, %4
|
||||
%6 = icmp eq i32 %5, 0
|
||||
%.10 = select i1 %6, float 0.000000e+00, float undef
|
||||
%7 = call i32 @llvm.SI.packf16(float undef, float %.10)
|
||||
%8 = bitcast i32 %7 to float
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float undef, float %8, float undef, float %8)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32>, <32 x i8>, i32) #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.SI.packf16(float, float) #1
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
||||
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
|
||||
attributes #1 = { nounwind readnone }
|
Loading…
Reference in New Issue