forked from OSchip/llvm-project
parent
13ccc8f1bc
commit
689f325099
|
@ -655,6 +655,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
|
|||
case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
|
||||
case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
|
||||
case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
|
||||
case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
|
||||
case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
|
||||
case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
|
||||
case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
|
||||
|
@ -1157,22 +1158,22 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
|
|||
continue;
|
||||
}
|
||||
case AMDGPU::S_AND_B64:
|
||||
splitScalar64BitOp(Worklist, Inst, AMDGPU::S_AND_B32);
|
||||
splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32);
|
||||
Inst->eraseFromParent();
|
||||
continue;
|
||||
|
||||
case AMDGPU::S_OR_B64:
|
||||
splitScalar64BitOp(Worklist, Inst, AMDGPU::S_OR_B32);
|
||||
splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32);
|
||||
Inst->eraseFromParent();
|
||||
continue;
|
||||
|
||||
case AMDGPU::S_XOR_B64:
|
||||
splitScalar64BitOp(Worklist, Inst, AMDGPU::S_XOR_B32);
|
||||
splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32);
|
||||
Inst->eraseFromParent();
|
||||
continue;
|
||||
|
||||
case AMDGPU::S_NOT_B64:
|
||||
splitScalar64BitOp(Worklist, Inst, AMDGPU::S_NOT_B32);
|
||||
splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
|
||||
Inst->eraseFromParent();
|
||||
continue;
|
||||
|
||||
|
@ -1297,9 +1298,62 @@ const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
|
|||
return &AMDGPU::VReg_32RegClass;
|
||||
}
|
||||
|
||||
void SIInstrInfo::splitScalar64BitOp(SmallVectorImpl<MachineInstr *> &Worklist,
|
||||
MachineInstr *Inst,
|
||||
unsigned Opcode) const {
|
||||
void SIInstrInfo::splitScalar64BitUnaryOp(
|
||||
SmallVectorImpl<MachineInstr *> &Worklist,
|
||||
MachineInstr *Inst,
|
||||
unsigned Opcode) const {
|
||||
MachineBasicBlock &MBB = *Inst->getParent();
|
||||
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
|
||||
|
||||
MachineOperand &Dest = Inst->getOperand(0);
|
||||
MachineOperand &Src0 = Inst->getOperand(1);
|
||||
DebugLoc DL = Inst->getDebugLoc();
|
||||
|
||||
MachineBasicBlock::iterator MII = Inst;
|
||||
|
||||
const MCInstrDesc &InstDesc = get(Opcode);
|
||||
const TargetRegisterClass *Src0RC = Src0.isReg() ?
|
||||
MRI.getRegClass(Src0.getReg()) :
|
||||
&AMDGPU::SGPR_32RegClass;
|
||||
|
||||
const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
|
||||
|
||||
MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
|
||||
AMDGPU::sub0, Src0SubRC);
|
||||
|
||||
const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
|
||||
const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
|
||||
|
||||
unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
|
||||
MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
|
||||
.addOperand(SrcReg0Sub0);
|
||||
|
||||
MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
|
||||
AMDGPU::sub1, Src0SubRC);
|
||||
|
||||
unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
|
||||
MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
|
||||
.addOperand(SrcReg0Sub1);
|
||||
|
||||
unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
|
||||
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
|
||||
.addReg(DestSub0)
|
||||
.addImm(AMDGPU::sub0)
|
||||
.addReg(DestSub1)
|
||||
.addImm(AMDGPU::sub1);
|
||||
|
||||
MRI.replaceRegWith(Dest.getReg(), FullDestReg);
|
||||
|
||||
// Try to legalize the operands in case we need to swap the order to keep it
|
||||
// valid.
|
||||
Worklist.push_back(LoHalf);
|
||||
Worklist.push_back(HiHalf);
|
||||
}
|
||||
|
||||
void SIInstrInfo::splitScalar64BitBinaryOp(
|
||||
SmallVectorImpl<MachineInstr *> &Worklist,
|
||||
MachineInstr *Inst,
|
||||
unsigned Opcode) const {
|
||||
MachineBasicBlock &MBB = *Inst->getParent();
|
||||
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
|
||||
|
||||
|
|
|
@ -44,8 +44,11 @@ private:
|
|||
const TargetRegisterClass *RC,
|
||||
const MachineOperand &Op) const;
|
||||
|
||||
void splitScalar64BitOp(SmallVectorImpl<MachineInstr *> & Worklist,
|
||||
MachineInstr *Inst, unsigned Opcode) const;
|
||||
void splitScalar64BitUnaryOp(SmallVectorImpl<MachineInstr *> &Worklist,
|
||||
MachineInstr *Inst, unsigned Opcode) const;
|
||||
|
||||
void splitScalar64BitBinaryOp(SmallVectorImpl<MachineInstr *> &Worklist,
|
||||
MachineInstr *Inst, unsigned Opcode) const;
|
||||
|
||||
void addDescImplicitUseDef(const MCInstrDesc &Desc, MachineInstr *MI) const;
|
||||
|
||||
|
|
|
@ -96,7 +96,9 @@ def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32",
|
|||
[(set i32:$dst, (not i32:$src0))]
|
||||
>;
|
||||
|
||||
def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64", []>;
|
||||
def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64",
|
||||
[(set i64:$dst, (not i64:$src0))]
|
||||
>;
|
||||
def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>;
|
||||
def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>;
|
||||
def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32", []>;
|
||||
|
|
|
@ -45,7 +45,7 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
|
|||
;EG-DAG: SUB_INT
|
||||
;EG-DAG: SUB_INT
|
||||
|
||||
;SI: S_XOR_B64
|
||||
;SI: S_NOT_B64
|
||||
;SI-DAG: S_ADD_I32
|
||||
;SI-DAG: S_ADDC_U32
|
||||
;SI-DAG: S_ADD_I32
|
||||
|
|
|
@ -90,3 +90,43 @@ define void @vector_not_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32
|
|||
store i32 %result, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-CHECK-LABEL: @vector_xor_i64
|
||||
; SI-CHECK: V_XOR_B32_e32
|
||||
; SI-CHECK: V_XOR_B32_e32
|
||||
; SI-CHECK: S_ENDPGM
|
||||
define void @vector_xor_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 addrspace(1)* %in1) {
|
||||
%a = load i64 addrspace(1)* %in0
|
||||
%b = load i64 addrspace(1)* %in1
|
||||
%result = xor i64 %a, %b
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-CHECK-LABEL: @scalar_xor_i64
|
||||
; SI-CHECK: S_XOR_B64
|
||||
; SI-CHECK: S_ENDPGM
|
||||
define void @scalar_xor_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
|
||||
%result = xor i64 %a, %b
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-CHECK-LABEL: @scalar_not_i64
|
||||
; SI-CHECK: S_NOT_B64
|
||||
define void @scalar_not_i64(i64 addrspace(1)* %out, i64 %a) {
|
||||
%result = xor i64 %a, -1
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-CHECK-LABEL: @vector_not_i64
|
||||
; SI-CHECK: V_NOT_B32
|
||||
; SI-CHECK: V_NOT_B32
|
||||
define void @vector_not_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 addrspace(1)* %in1) {
|
||||
%a = load i64 addrspace(1)* %in0
|
||||
%b = load i64 addrspace(1)* %in1
|
||||
%result = xor i64 %a, -1
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue