forked from OSchip/llvm-project
GlobalISel: Implement bitcast action for G_INSERT_VECTOR_ELT
This mirrors the support for the equivalent extracts. This also creates a huge mess that would be greatly improved if we had any bit operation combines.
This commit is contained in:
parent
b1600d8b89
commit
e2f1b48f86
|
@ -318,6 +318,10 @@ public:
|
|||
LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
|
||||
LLT CastTy);
|
||||
|
||||
/// Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
|
||||
LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
|
||||
LLT CastTy);
|
||||
|
||||
LegalizeResult lowerBitcast(MachineInstr &MI);
|
||||
LegalizeResult lowerLoad(MachineInstr &MI);
|
||||
LegalizeResult lowerStore(MachineInstr &MI);
|
||||
|
|
|
@ -2369,6 +2369,28 @@ LegalizerHelper::lowerBitcast(MachineInstr &MI) {
|
|||
return UnableToLegalize;
|
||||
}
|
||||
|
||||
/// Figure out the bit offset into a register when coercing a vector index for
|
||||
/// the wide element type. This is only for the case when promoting vector to
|
||||
/// one with larger elements.
|
||||
//
|
||||
///
|
||||
/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
|
||||
/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
|
||||
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
|
||||
Register Idx,
|
||||
unsigned NewEltSize,
|
||||
unsigned OldEltSize) {
|
||||
const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
|
||||
LLT IdxTy = B.getMRI()->getType(Idx);
|
||||
|
||||
// Now figure out the amount we need to shift to get the target bits.
|
||||
auto OffsetMask = B.buildConstant(
|
||||
IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio));
|
||||
auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
|
||||
return B.buildShl(IdxTy, OffsetIdx,
|
||||
B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
|
||||
}
|
||||
|
||||
/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
|
||||
/// is casting to a vector with a smaller element size, perform multiple element
|
||||
/// extracts and merge the results. If this is coercing to a vector with larger
|
||||
|
@ -2467,13 +2489,9 @@ LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
|
|||
ScaledIdx).getReg(0);
|
||||
}
|
||||
|
||||
// Now figure out the amount we need to shift to get the target bits.
|
||||
auto OffsetMask = MIRBuilder.buildConstant(
|
||||
IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio));
|
||||
auto OffsetIdx = MIRBuilder.buildAnd(IdxTy, Idx, OffsetMask);
|
||||
auto OffsetBits = MIRBuilder.buildShl(
|
||||
IdxTy, OffsetIdx,
|
||||
MIRBuilder.buildConstant(IdxTy, Log2_32(OldEltSize)));
|
||||
// Compute the bit offset into the register of the target element.
|
||||
Register OffsetBits = getBitcastWiderVectorElementOffset(
|
||||
MIRBuilder, Idx, NewEltSize, OldEltSize);
|
||||
|
||||
// Shift the wide element to get the target element.
|
||||
auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
|
||||
|
@ -2485,6 +2503,104 @@ LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
|
|||
return UnableToLegalize;
|
||||
}
|
||||
|
||||
/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
|
||||
/// TargetReg, while preserving other bits in \p TargetReg.
|
||||
///
|
||||
/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
|
||||
static Register buildBitFieldInsert(MachineIRBuilder &B,
|
||||
Register TargetReg, Register InsertReg,
|
||||
Register OffsetBits) {
|
||||
LLT TargetTy = B.getMRI()->getType(TargetReg);
|
||||
LLT InsertTy = B.getMRI()->getType(InsertReg);
|
||||
auto ZextVal = B.buildZExt(TargetTy, InsertReg);
|
||||
auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
|
||||
|
||||
// Produce a bitmask of the value to insert
|
||||
auto EltMask = B.buildConstant(
|
||||
TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
|
||||
InsertTy.getSizeInBits()));
|
||||
// Shift it into position
|
||||
auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
|
||||
auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
|
||||
|
||||
// Clear out the bits in the wide element
|
||||
auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
|
||||
|
||||
// The value to insert has all zeros already, so stick it into the masked
|
||||
// wide element.
|
||||
return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
|
||||
}
|
||||
|
||||
/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
|
||||
/// is increasing the element size, perform the indexing in the target element
|
||||
/// type, and use bit operations to insert at the element position. This is
|
||||
/// intended for architectures that can dynamically index the register file and
|
||||
/// want to force indexing in the native register size.
|
||||
LegalizerHelper::LegalizeResult
|
||||
LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
|
||||
LLT CastTy) {
|
||||
if (TypeIdx != 0)
|
||||
return UnableToLegalize;
|
||||
|
||||
Register Dst = MI.getOperand(0).getReg();
|
||||
Register SrcVec = MI.getOperand(1).getReg();
|
||||
Register Val = MI.getOperand(2).getReg();
|
||||
Register Idx = MI.getOperand(3).getReg();
|
||||
|
||||
LLT VecTy = MRI.getType(Dst);
|
||||
LLT ValTy = MRI.getType(Val);
|
||||
LLT IdxTy = MRI.getType(Idx);
|
||||
|
||||
LLT VecEltTy = VecTy.getElementType();
|
||||
LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
|
||||
const unsigned NewEltSize = NewEltTy.getSizeInBits();
|
||||
const unsigned OldEltSize = VecEltTy.getSizeInBits();
|
||||
|
||||
unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
|
||||
unsigned OldNumElts = VecTy.getNumElements();
|
||||
|
||||
Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
|
||||
if (NewNumElts < OldNumElts) {
|
||||
if (NewEltSize % OldEltSize != 0)
|
||||
return UnableToLegalize;
|
||||
|
||||
// This only depends on powers of 2 because we use bit tricks to figure out
|
||||
// the bit offset we need to shift to get the target element. A general
|
||||
// expansion could emit division/multiply.
|
||||
if (!isPowerOf2_32(NewEltSize / OldEltSize))
|
||||
return UnableToLegalize;
|
||||
|
||||
const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
|
||||
auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
|
||||
|
||||
// Divide to get the index in the wider element type.
|
||||
auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
|
||||
|
||||
Register ExtractedElt = CastVec;
|
||||
if (CastTy.isVector()) {
|
||||
ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
|
||||
ScaledIdx).getReg(0);
|
||||
}
|
||||
|
||||
// Compute the bit offset into the register of the target element.
|
||||
Register OffsetBits = getBitcastWiderVectorElementOffset(
|
||||
MIRBuilder, Idx, NewEltSize, OldEltSize);
|
||||
|
||||
Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
|
||||
Val, OffsetBits);
|
||||
if (CastTy.isVector()) {
|
||||
InsertedElt = MIRBuilder.buildInsertVectorElement(
|
||||
CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
|
||||
}
|
||||
|
||||
MIRBuilder.buildBitcast(Dst, InsertedElt);
|
||||
MI.eraseFromParent();
|
||||
return Legalized;
|
||||
}
|
||||
|
||||
return UnableToLegalize;
|
||||
}
|
||||
|
||||
LegalizerHelper::LegalizeResult
|
||||
LegalizerHelper::lowerLoad(MachineInstr &MI) {
|
||||
// Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
|
||||
|
@ -2674,6 +2790,8 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
|
|||
}
|
||||
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
|
||||
return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
|
||||
case TargetOpcode::G_INSERT_VECTOR_ELT:
|
||||
return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
|
||||
default:
|
||||
return UnableToLegalize;
|
||||
}
|
||||
|
|
|
@ -1338,11 +1338,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
|
|||
VecTy.getSizeInBits() <= MaxRegisterSize &&
|
||||
IdxTy.getSizeInBits() == 32;
|
||||
})
|
||||
.bitcastIf(all(sizeIsMultipleOf32(1), scalarOrEltNarrowerThan(1, 32)),
|
||||
bitcastToVectorElement32(1))
|
||||
.bitcastIf(all(sizeIsMultipleOf32(VecTypeIdx), scalarOrEltNarrowerThan(VecTypeIdx, 32)),
|
||||
bitcastToVectorElement32(VecTypeIdx))
|
||||
//.bitcastIf(vectorSmallerThan(1, 32), bitcastToScalar(1))
|
||||
.bitcastIf(
|
||||
all(sizeIsMultipleOf32(1), scalarOrEltWiderThan(1, 64)),
|
||||
all(sizeIsMultipleOf32(VecTypeIdx), scalarOrEltWiderThan(VecTypeIdx, 64)),
|
||||
[=](const LegalityQuery &Query) {
|
||||
// For > 64-bit element types, try to turn this into a 64-bit
|
||||
// element vector since we may be able to do better indexing
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1738,3 +1738,195 @@ body: |
|
|||
%5:_(p1) = COPY $vgpr0_vgpr1
|
||||
G_STORE %4, %5 :: (store 256, align 4, addrspace 1)
|
||||
...
|
||||
|
||||
---
|
||||
name: insert_vector_elt_varidx_v4s8
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $vgpr2
|
||||
; CHECK-LABEL: name: insert_vector_elt_varidx_v4s8
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
|
||||
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32)
|
||||
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
|
||||
; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32)
|
||||
; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
|
||||
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
|
||||
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
|
||||
; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
|
||||
; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
|
||||
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
|
||||
; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
|
||||
; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
|
||||
; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
|
||||
; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32)
|
||||
; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
|
||||
; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
|
||||
; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
|
||||
; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32)
|
||||
; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
|
||||
; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
|
||||
; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
|
||||
; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32)
|
||||
; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
|
||||
; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[SHL3]](s32)
|
||||
; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[SHL3]](s32)
|
||||
; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
|
||||
; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL5]], [[C5]]
|
||||
; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[XOR]]
|
||||
; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]]
|
||||
; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[OR3]], [[C]](s32)
|
||||
; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[OR3]], [[C1]](s32)
|
||||
; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[OR3]], [[C2]](s32)
|
||||
; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[OR3]](s32)
|
||||
; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
|
||||
; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
|
||||
; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
|
||||
; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32)
|
||||
; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL6]]
|
||||
; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
|
||||
; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]]
|
||||
; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32)
|
||||
; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL7]]
|
||||
; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
|
||||
; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]]
|
||||
; CHECK: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32)
|
||||
; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL8]]
|
||||
; CHECK: $vgpr0 = COPY [[OR6]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s32) = COPY $vgpr2
|
||||
%3:_(<4 x s8>) = G_BITCAST %0
|
||||
%4:_(s8) = G_TRUNC %1
|
||||
%5:_(<4 x s8>) = G_INSERT_VECTOR_ELT %3, %4, %2
|
||||
%6:_(s32) = G_BITCAST %5
|
||||
$vgpr0 = COPY %6
|
||||
...
|
||||
|
||||
---
|
||||
name: insert_vector_elt_varidx_v8s8
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
||||
; CHECK-LABEL: name: insert_vector_elt_varidx_v8s8
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3
|
||||
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
|
||||
; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
|
||||
; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
|
||||
; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
|
||||
; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
|
||||
; CHECK: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16)
|
||||
; CHECK: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16)
|
||||
; CHECK: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16)
|
||||
; CHECK: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16)
|
||||
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
|
||||
; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
|
||||
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16)
|
||||
; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]]
|
||||
; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
|
||||
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
|
||||
; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
|
||||
; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
|
||||
; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]]
|
||||
; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32)
|
||||
; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
|
||||
; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16)
|
||||
; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]]
|
||||
; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
|
||||
; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
|
||||
; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
|
||||
; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
|
||||
; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]]
|
||||
; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16)
|
||||
; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]]
|
||||
; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C3]](s32)
|
||||
; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]]
|
||||
; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
|
||||
; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]]
|
||||
; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32)
|
||||
; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
|
||||
; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16)
|
||||
; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]]
|
||||
; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
|
||||
; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
|
||||
; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
|
||||
; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C5]](s32)
|
||||
; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s32>), [[LSHR6]](s32)
|
||||
; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
|
||||
; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]]
|
||||
; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32)
|
||||
; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C2]]
|
||||
; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[SHL6]](s32)
|
||||
; CHECK: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[SHL6]](s32)
|
||||
; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
|
||||
; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL8]], [[C7]]
|
||||
; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[EVEC]], [[XOR]]
|
||||
; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL7]]
|
||||
; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[BUILD_VECTOR]], [[OR6]](s32), [[LSHR6]](s32)
|
||||
; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC]](<2 x s32>)
|
||||
; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C3]](s32)
|
||||
; CHECK: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
|
||||
; CHECK: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C4]](s32)
|
||||
; CHECK: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C3]](s32)
|
||||
; CHECK: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
|
||||
; CHECK: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C4]](s32)
|
||||
; CHECK: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
|
||||
; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
|
||||
; CHECK: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C8]]
|
||||
; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32)
|
||||
; CHECK: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C8]]
|
||||
; CHECK: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND12]], [[C1]](s16)
|
||||
; CHECK: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND11]], [[SHL9]]
|
||||
; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32)
|
||||
; CHECK: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C8]]
|
||||
; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32)
|
||||
; CHECK: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C8]]
|
||||
; CHECK: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND14]], [[C1]](s16)
|
||||
; CHECK: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND13]], [[SHL10]]
|
||||
; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
|
||||
; CHECK: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C8]]
|
||||
; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32)
|
||||
; CHECK: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C8]]
|
||||
; CHECK: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND16]], [[C1]](s16)
|
||||
; CHECK: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND15]], [[SHL11]]
|
||||
; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32)
|
||||
; CHECK: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C8]]
|
||||
; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32)
|
||||
; CHECK: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C8]]
|
||||
; CHECK: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND18]], [[C1]](s16)
|
||||
; CHECK: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND17]], [[SHL12]]
|
||||
; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
|
||||
; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
|
||||
; CHECK: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
|
||||
; CHECK: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL13]]
|
||||
; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
|
||||
; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16)
|
||||
; CHECK: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32)
|
||||
; CHECK: [[OR12:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL14]]
|
||||
; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR11]](s32), [[OR12]](s32)
|
||||
; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)
|
||||
%0:_(s64) = COPY $vgpr0_vgpr1
|
||||
%1:_(s32) = COPY $vgpr2
|
||||
%2:_(s32) = COPY $vgpr3
|
||||
%3:_(<8 x s8>) = G_BITCAST %0
|
||||
%4:_(s8) = G_TRUNC %1
|
||||
%5:_(<8 x s8>) = G_INSERT_VECTOR_ELT %3, %4, %2
|
||||
%6:_(s64) = G_BITCAST %5
|
||||
$vgpr0_vgpr1 = COPY %6
|
||||
...
|
||||
|
|
Loading…
Reference in New Issue