forked from OSchip/llvm-project
R600/SI: use patterns for clamp, fabs, fneg
Instead of using custom inserters, it's simpler and should make DAG folding easier. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Tom Stellard <thomas.stellard@amd.com> Reviewed-by: Michel Dänzer <michel.daenzer@amd.com> llvm-svn: 175755
This commit is contained in:
parent
bf114b42a8
commit
8dbe6f617c
|
@ -62,7 +62,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
||||||
|
|
||||||
MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
|
MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
|
||||||
MachineInstr * MI, MachineBasicBlock * BB) const {
|
MachineInstr * MI, MachineBasicBlock * BB) const {
|
||||||
const TargetInstrInfo * TII = getTargetMachine().getInstrInfo();
|
|
||||||
MachineRegisterInfo & MRI = BB->getParent()->getRegInfo();
|
MachineRegisterInfo & MRI = BB->getParent()->getRegInfo();
|
||||||
MachineBasicBlock::iterator I = MI;
|
MachineBasicBlock::iterator I = MI;
|
||||||
|
|
||||||
|
@ -70,41 +69,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
|
||||||
default:
|
default:
|
||||||
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
|
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
|
||||||
case AMDGPU::BRANCH: return BB;
|
case AMDGPU::BRANCH: return BB;
|
||||||
case AMDGPU::CLAMP_SI:
|
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_ADD_F32_e64))
|
|
||||||
.addOperand(MI->getOperand(0))
|
|
||||||
.addOperand(MI->getOperand(1))
|
|
||||||
.addImm(0x80) // SRC1
|
|
||||||
.addImm(0) // ABS
|
|
||||||
.addImm(1) // CLAMP
|
|
||||||
.addImm(0) // OMOD
|
|
||||||
.addImm(0); // NEG
|
|
||||||
MI->eraseFromParent();
|
|
||||||
break;
|
|
||||||
|
|
||||||
case AMDGPU::FABS_SI:
|
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_ADD_F32_e64))
|
|
||||||
.addOperand(MI->getOperand(0))
|
|
||||||
.addOperand(MI->getOperand(1))
|
|
||||||
.addImm(0x80) // SRC1
|
|
||||||
.addImm(1) // ABS
|
|
||||||
.addImm(0) // CLAMP
|
|
||||||
.addImm(0) // OMOD
|
|
||||||
.addImm(0); // NEG
|
|
||||||
MI->eraseFromParent();
|
|
||||||
break;
|
|
||||||
|
|
||||||
case AMDGPU::FNEG_SI:
|
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_ADD_F32_e64))
|
|
||||||
.addOperand(MI->getOperand(0))
|
|
||||||
.addOperand(MI->getOperand(1))
|
|
||||||
.addImm(0x80) // SRC1
|
|
||||||
.addImm(0) // ABS
|
|
||||||
.addImm(0) // CLAMP
|
|
||||||
.addImm(0) // OMOD
|
|
||||||
.addImm(1); // NEG
|
|
||||||
MI->eraseFromParent();
|
|
||||||
break;
|
|
||||||
case AMDGPU::SHADER_TYPE:
|
case AMDGPU::SHADER_TYPE:
|
||||||
BB->getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType =
|
BB->getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType =
|
||||||
MI->getOperand(0).getImm();
|
MI->getOperand(0).getImm();
|
||||||
|
|
|
@ -1187,10 +1187,6 @@ defm : SamplePatterns<VReg_128, v4i32>;
|
||||||
defm : SamplePatterns<VReg_256, v8i32>;
|
defm : SamplePatterns<VReg_256, v8i32>;
|
||||||
defm : SamplePatterns<VReg_512, v16i32>;
|
defm : SamplePatterns<VReg_512, v16i32>;
|
||||||
|
|
||||||
def CLAMP_SI : CLAMP<VReg_32>;
|
|
||||||
def FABS_SI : FABS<VReg_32>;
|
|
||||||
def FNEG_SI : FNEG<VReg_32>;
|
|
||||||
|
|
||||||
def : Extract_Element <f32, v4f32, VReg_128, 0, sub0>;
|
def : Extract_Element <f32, v4f32, VReg_128, 0, sub0>;
|
||||||
def : Extract_Element <f32, v4f32, VReg_128, 1, sub1>;
|
def : Extract_Element <f32, v4f32, VReg_128, 1, sub1>;
|
||||||
def : Extract_Element <f32, v4f32, VReg_128, 2, sub2>;
|
def : Extract_Element <f32, v4f32, VReg_128, 2, sub2>;
|
||||||
|
@ -1214,6 +1210,28 @@ def : BitConvert <i32, f32, VReg_32>;
|
||||||
def : BitConvert <f32, i32, SReg_32>;
|
def : BitConvert <f32, i32, SReg_32>;
|
||||||
def : BitConvert <f32, i32, VReg_32>;
|
def : BitConvert <f32, i32, VReg_32>;
|
||||||
|
|
||||||
|
/********** =================== **********/
|
||||||
|
/********** Src & Dst modifiers **********/
|
||||||
|
/********** =================== **********/
|
||||||
|
|
||||||
|
def : Pat <
|
||||||
|
(int_AMDIL_clamp VReg_32:$src, (f32 FP_ZERO), (f32 FP_ONE)),
|
||||||
|
(V_ADD_F32_e64 VReg_32:$src, (i32 0x80 /* SRC1 */),
|
||||||
|
0 /* ABS */, 1 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
|
||||||
|
>;
|
||||||
|
|
||||||
|
def : Pat <
|
||||||
|
(fabs VReg_32:$src),
|
||||||
|
(V_ADD_F32_e64 VReg_32:$src, (i32 0x80 /* SRC1 */),
|
||||||
|
1 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
|
||||||
|
>;
|
||||||
|
|
||||||
|
def : Pat <
|
||||||
|
(fneg VReg_32:$src),
|
||||||
|
(V_ADD_F32_e64 VReg_32:$src, (i32 0x80 /* SRC1 */),
|
||||||
|
0 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 1 /* NEG */)
|
||||||
|
>;
|
||||||
|
|
||||||
/********** ================== **********/
|
/********** ================== **********/
|
||||||
/********** Immediate Patterns **********/
|
/********** Immediate Patterns **********/
|
||||||
/********** ================== **********/
|
/********** ================== **********/
|
||||||
|
|
Loading…
Reference in New Issue