forked from OSchip/llvm-project
[LegalizeTypes] Prevent an assertion from PromoteIntRes_BSWAP and PromoteIntRes_BITREVERSE if the shift amount is too large for the VT returned by getShiftAmountTy
Summary: getShiftAmountTy for X86 returns MVT::i8. If a BSWAP or BITREVERSE is created that requires promotion and the difference between the original VT and the promoted VT is more than 255 then we won't able to create the constant. This patch adds a check to replace the result from getShiftAmountTy to MVT::i32 if the difference won't fit. This should get legalized later when the shift is ultimately expanded since its clearly an illegal type that we're only promoting to make it a power of 2 bit width. Alternatively we could base the decision completely on the largest shift amount the promoted VT could use. Vectors should be immune here because getShiftAmountTy always returns the incoming VT for vectors. Only the scalar shift amount can be changed by the targets. Reviewers: eli.friedman, RKSimon, spatel Reviewed By: RKSimon Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D53232 llvm-svn: 344460
This commit is contained in:
parent
ffde98de21
commit
189e5b4ab6
|
@ -311,6 +311,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
|
|||
CreateStackStoreLoad(InOp, OutVT));
|
||||
}
|
||||
|
||||
// Helper for BSWAP/BITREVERSE promotion to ensure we can fit the shift amount
|
||||
// in the VT returned by getShiftAmountTy and to return a safe VT if we can't.
|
||||
static EVT getShiftAmountTyForConstant(unsigned Val, EVT VT,
|
||||
const TargetLowering &TLI,
|
||||
SelectionDAG &DAG) {
|
||||
EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
|
||||
// If the value won't fit in the prefered type, just use something safe. It
|
||||
// will be legalized when the shift is expanded.
|
||||
if ((Log2_32(Val) + 1) > ShiftVT.getScalarSizeInBits())
|
||||
ShiftVT = MVT::i32;
|
||||
return ShiftVT;
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
|
||||
SDValue Op = GetPromotedInteger(N->getOperand(0));
|
||||
EVT OVT = N->getValueType(0);
|
||||
|
@ -318,10 +331,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
|
|||
SDLoc dl(N);
|
||||
|
||||
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
|
||||
return DAG.getNode(
|
||||
ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
|
||||
DAG.getConstant(DiffBits, dl,
|
||||
TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
|
||||
EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG);
|
||||
return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
|
||||
DAG.getConstant(DiffBits, dl, ShiftVT));
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
|
||||
|
@ -331,10 +343,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
|
|||
SDLoc dl(N);
|
||||
|
||||
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
|
||||
return DAG.getNode(
|
||||
ISD::SRL, dl, NVT, DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
|
||||
DAG.getConstant(DiffBits, dl,
|
||||
TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
|
||||
EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG);
|
||||
return DAG.getNode(ISD::SRL, dl, NVT,
|
||||
DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
|
||||
DAG.getConstant(DiffBits, dl, ShiftVT));
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
|
||||
|
|
|
@ -523,3 +523,621 @@ define <2 x i16> @undef_v2i16() {
|
|||
%b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> undef)
|
||||
ret <2 x i16> %b
|
||||
}
|
||||
|
||||
; Make sure we don't assert during type legalization promoting a large
|
||||
; bitreverse due to the need for a large shift that won't fit in the i8 returned
|
||||
; from getShiftAmountTy.
|
||||
define i528 @large_promotion(i528 %A) nounwind {
|
||||
; X86-LABEL: large_promotion:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %ebp
|
||||
; X86-NEXT: pushl %ebx
|
||||
; X86-NEXT: pushl %edi
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: subl $56, %esp
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-NEXT: bswapl %ebx
|
||||
; X86-NEXT: movl %ebx, %ebp
|
||||
; X86-NEXT: andl $252645135, %ebp # imm = 0xF0F0F0F
|
||||
; X86-NEXT: shll $4, %ebp
|
||||
; X86-NEXT: andl $-252645136, %ebx # imm = 0xF0F0F0F0
|
||||
; X86-NEXT: shrl $4, %ebx
|
||||
; X86-NEXT: orl %ebp, %ebx
|
||||
; X86-NEXT: movl %ebx, %ebp
|
||||
; X86-NEXT: andl $858993459, %ebp # imm = 0x33333333
|
||||
; X86-NEXT: andl $-858993460, %ebx # imm = 0xCCCCCCCC
|
||||
; X86-NEXT: shrl $2, %ebx
|
||||
; X86-NEXT: leal (%ebx,%ebp,4), %ebx
|
||||
; X86-NEXT: movl %ebx, %ebp
|
||||
; X86-NEXT: andl $1431633920, %ebp # imm = 0x55550000
|
||||
; X86-NEXT: andl $-1431699456, %ebx # imm = 0xAAAA0000
|
||||
; X86-NEXT: shrl %ebx
|
||||
; X86-NEXT: leal (%ebx,%ebp,2), %ebx
|
||||
; X86-NEXT: movl %ebx, (%esp) # 4-byte Spill
|
||||
; X86-NEXT: bswapl %edi
|
||||
; X86-NEXT: movl %edi, %ebx
|
||||
; X86-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F
|
||||
; X86-NEXT: shll $4, %ebx
|
||||
; X86-NEXT: andl $-252645136, %edi # imm = 0xF0F0F0F0
|
||||
; X86-NEXT: shrl $4, %edi
|
||||
; X86-NEXT: orl %ebx, %edi
|
||||
; X86-NEXT: movl %edi, %ebx
|
||||
; X86-NEXT: andl $858993459, %ebx # imm = 0x33333333
|
||||
; X86-NEXT: andl $-858993460, %edi # imm = 0xCCCCCCCC
|
||||
; X86-NEXT: shrl $2, %edi
|
||||
; X86-NEXT: leal (%edi,%ebx,4), %edi
|
||||
; X86-NEXT: movl %edi, %ebx
|
||||
; X86-NEXT: andl $1431655765, %ebx # imm = 0x55555555
|
||||
; X86-NEXT: andl $-1431655766, %edi # imm = 0xAAAAAAAA
|
||||
; X86-NEXT: shrl %edi
|
||||
; X86-NEXT: leal (%edi,%ebx,2), %edi
|
||||
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: bswapl %esi
|
||||
; X86-NEXT: movl %esi, %edi
|
||||
; X86-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
|
||||
; X86-NEXT: shll $4, %edi
|
||||
; X86-NEXT: andl $-252645136, %esi # imm = 0xF0F0F0F0
|
||||
; X86-NEXT: shrl $4, %esi
|
||||
; X86-NEXT: orl %edi, %esi
|
||||
; X86-NEXT: movl %esi, %edi
|
||||
; X86-NEXT: andl $858993459, %edi # imm = 0x33333333
|
||||
; X86-NEXT: andl $-858993460, %esi # imm = 0xCCCCCCCC
|
||||
; X86-NEXT: shrl $2, %esi
|
||||
; X86-NEXT: leal (%esi,%edi,4), %esi
|
||||
; X86-NEXT: movl %esi, %edi
|
||||
; X86-NEXT: andl $1431655765, %edi # imm = 0x55555555
|
||||
; X86-NEXT: andl $-1431655766, %esi # imm = 0xAAAAAAAA
|
||||
; X86-NEXT: shrl %esi
|
||||
; X86-NEXT: leal (%esi,%edi,2), %ebx
|
||||
; X86-NEXT: bswapl %edx
|
||||
; X86-NEXT: movl %edx, %esi
|
||||
; X86-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F
|
||||
; X86-NEXT: shll $4, %esi
|
||||
; X86-NEXT: andl $-252645136, %edx # imm = 0xF0F0F0F0
|
||||
; X86-NEXT: shrl $4, %edx
|
||||
; X86-NEXT: orl %esi, %edx
|
||||
; X86-NEXT: movl %edx, %esi
|
||||
; X86-NEXT: andl $858993459, %esi # imm = 0x33333333
|
||||
; X86-NEXT: andl $-858993460, %edx # imm = 0xCCCCCCCC
|
||||
; X86-NEXT: shrl $2, %edx
|
||||
; X86-NEXT: leal (%edx,%esi,4), %edx
|
||||
; X86-NEXT: movl %edx, %esi
|
||||
; X86-NEXT: andl $1431655765, %esi # imm = 0x55555555
|
||||
; X86-NEXT: andl $-1431655766, %edx # imm = 0xAAAAAAAA
|
||||
; X86-NEXT: shrl %edx
|
||||
; X86-NEXT: leal (%edx,%esi,2), %edx
|
||||
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: bswapl %ecx
|
||||
; X86-NEXT: movl %ecx, %edx
|
||||
; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
|
||||
; X86-NEXT: shll $4, %edx
|
||||
; X86-NEXT: andl $-252645136, %ecx # imm = 0xF0F0F0F0
|
||||
; X86-NEXT: shrl $4, %ecx
|
||||
; X86-NEXT: orl %edx, %ecx
|
||||
; X86-NEXT: movl %ecx, %edx
|
||||
; X86-NEXT: andl $858993459, %edx # imm = 0x33333333
|
||||
; X86-NEXT: andl $-858993460, %ecx # imm = 0xCCCCCCCC
|
||||
; X86-NEXT: shrl $2, %ecx
|
||||
; X86-NEXT: leal (%ecx,%edx,4), %ecx
|
||||
; X86-NEXT: movl %ecx, %edx
|
||||
; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555
|
||||
; X86-NEXT: andl $-1431655766, %ecx # imm = 0xAAAAAAAA
|
||||
; X86-NEXT: shrl %ecx
|
||||
; X86-NEXT: leal (%ecx,%edx,2), %ecx
|
||||
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: bswapl %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
|
||||
; X86-NEXT: shll $4, %ecx
|
||||
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
|
||||
; X86-NEXT: shrl $4, %eax
|
||||
; X86-NEXT: orl %ecx, %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
|
||||
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
|
||||
; X86-NEXT: shrl $2, %eax
|
||||
; X86-NEXT: leal (%eax,%ecx,4), %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
|
||||
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
|
||||
; X86-NEXT: shrl %eax
|
||||
; X86-NEXT: leal (%eax,%ecx,2), %eax
|
||||
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: bswapl %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
|
||||
; X86-NEXT: shll $4, %ecx
|
||||
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
|
||||
; X86-NEXT: shrl $4, %eax
|
||||
; X86-NEXT: orl %ecx, %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
|
||||
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
|
||||
; X86-NEXT: shrl $2, %eax
|
||||
; X86-NEXT: leal (%eax,%ecx,4), %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
|
||||
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
|
||||
; X86-NEXT: shrl %eax
|
||||
; X86-NEXT: leal (%eax,%ecx,2), %eax
|
||||
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: bswapl %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
|
||||
; X86-NEXT: shll $4, %ecx
|
||||
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
|
||||
; X86-NEXT: shrl $4, %eax
|
||||
; X86-NEXT: orl %ecx, %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
|
||||
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
|
||||
; X86-NEXT: shrl $2, %eax
|
||||
; X86-NEXT: leal (%eax,%ecx,4), %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
|
||||
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
|
||||
; X86-NEXT: shrl %eax
|
||||
; X86-NEXT: leal (%eax,%ecx,2), %eax
|
||||
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: bswapl %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
|
||||
; X86-NEXT: shll $4, %ecx
|
||||
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
|
||||
; X86-NEXT: shrl $4, %eax
|
||||
; X86-NEXT: orl %ecx, %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
|
||||
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
|
||||
; X86-NEXT: shrl $2, %eax
|
||||
; X86-NEXT: leal (%eax,%ecx,4), %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
|
||||
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
|
||||
; X86-NEXT: shrl %eax
|
||||
; X86-NEXT: leal (%eax,%ecx,2), %eax
|
||||
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: bswapl %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
|
||||
; X86-NEXT: shll $4, %ecx
|
||||
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
|
||||
; X86-NEXT: shrl $4, %eax
|
||||
; X86-NEXT: orl %ecx, %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
|
||||
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
|
||||
; X86-NEXT: shrl $2, %eax
|
||||
; X86-NEXT: leal (%eax,%ecx,4), %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
|
||||
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
|
||||
; X86-NEXT: shrl %eax
|
||||
; X86-NEXT: leal (%eax,%ecx,2), %eax
|
||||
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: bswapl %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
|
||||
; X86-NEXT: shll $4, %ecx
|
||||
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
|
||||
; X86-NEXT: shrl $4, %eax
|
||||
; X86-NEXT: orl %ecx, %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
|
||||
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
|
||||
; X86-NEXT: shrl $2, %eax
|
||||
; X86-NEXT: leal (%eax,%ecx,4), %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
|
||||
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
|
||||
; X86-NEXT: shrl %eax
|
||||
; X86-NEXT: leal (%eax,%ecx,2), %eax
|
||||
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: bswapl %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
|
||||
; X86-NEXT: shll $4, %ecx
|
||||
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
|
||||
; X86-NEXT: shrl $4, %eax
|
||||
; X86-NEXT: orl %ecx, %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
|
||||
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
|
||||
; X86-NEXT: shrl $2, %eax
|
||||
; X86-NEXT: leal (%eax,%ecx,4), %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
|
||||
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
|
||||
; X86-NEXT: shrl %eax
|
||||
; X86-NEXT: leal (%eax,%ecx,2), %eax
|
||||
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: bswapl %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
|
||||
; X86-NEXT: shll $4, %ecx
|
||||
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
|
||||
; X86-NEXT: shrl $4, %eax
|
||||
; X86-NEXT: orl %ecx, %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
|
||||
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
|
||||
; X86-NEXT: shrl $2, %eax
|
||||
; X86-NEXT: leal (%eax,%ecx,4), %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
|
||||
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
|
||||
; X86-NEXT: shrl %eax
|
||||
; X86-NEXT: leal (%eax,%ecx,2), %eax
|
||||
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: bswapl %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
|
||||
; X86-NEXT: shll $4, %ecx
|
||||
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
|
||||
; X86-NEXT: shrl $4, %eax
|
||||
; X86-NEXT: orl %ecx, %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
|
||||
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
|
||||
; X86-NEXT: shrl $2, %eax
|
||||
; X86-NEXT: leal (%eax,%ecx,4), %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
|
||||
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
|
||||
; X86-NEXT: shrl %eax
|
||||
; X86-NEXT: leal (%eax,%ecx,2), %eax
|
||||
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: bswapl %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
|
||||
; X86-NEXT: shll $4, %ecx
|
||||
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
|
||||
; X86-NEXT: shrl $4, %eax
|
||||
; X86-NEXT: orl %ecx, %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
|
||||
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
|
||||
; X86-NEXT: shrl $2, %eax
|
||||
; X86-NEXT: leal (%eax,%ecx,4), %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
|
||||
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
|
||||
; X86-NEXT: shrl %eax
|
||||
; X86-NEXT: leal (%eax,%ecx,2), %eax
|
||||
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: bswapl %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
|
||||
; X86-NEXT: shll $4, %ecx
|
||||
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
|
||||
; X86-NEXT: shrl $4, %eax
|
||||
; X86-NEXT: orl %ecx, %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
|
||||
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
|
||||
; X86-NEXT: shrl $2, %eax
|
||||
; X86-NEXT: leal (%eax,%ecx,4), %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
|
||||
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
|
||||
; X86-NEXT: shrl %eax
|
||||
; X86-NEXT: leal (%eax,%ecx,2), %edi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: bswapl %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
|
||||
; X86-NEXT: shll $4, %ecx
|
||||
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
|
||||
; X86-NEXT: shrl $4, %eax
|
||||
; X86-NEXT: orl %ecx, %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
|
||||
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
|
||||
; X86-NEXT: shrl $2, %eax
|
||||
; X86-NEXT: leal (%eax,%ecx,4), %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
|
||||
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
|
||||
; X86-NEXT: shrl %eax
|
||||
; X86-NEXT: leal (%eax,%ecx,2), %edx
|
||||
; X86-NEXT: movl (%esp), %esi # 4-byte Reload
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
|
||||
; X86-NEXT: shrdl $16, %eax, %esi
|
||||
; X86-NEXT: shrdl $16, %ebx, %eax
|
||||
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; X86-NEXT: shrdl $16, %ecx, %ebx
|
||||
; X86-NEXT: movl %ebx, (%esp) # 4-byte Spill
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
|
||||
; X86-NEXT: shrdl $16, %eax, %ecx
|
||||
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; X86-NEXT: shrdl $16, %ecx, %eax
|
||||
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
|
||||
; X86-NEXT: shrdl $16, %eax, %ecx
|
||||
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; X86-NEXT: shrdl $16, %ecx, %eax
|
||||
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
|
||||
; X86-NEXT: shrdl $16, %eax, %ecx
|
||||
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; X86-NEXT: shrdl $16, %ecx, %eax
|
||||
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
|
||||
; X86-NEXT: shrdl $16, %eax, %ecx
|
||||
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; X86-NEXT: shrdl $16, %ecx, %eax
|
||||
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
|
||||
; X86-NEXT: shrdl $16, %ebp, %ecx
|
||||
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
|
||||
; X86-NEXT: shrdl $16, %ebx, %ebp
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
|
||||
; X86-NEXT: shrdl $16, %eax, %ebx
|
||||
; X86-NEXT: shrdl $16, %edi, %eax
|
||||
; X86-NEXT: movl %eax, %ecx
|
||||
; X86-NEXT: shrdl $16, %edx, %edi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl %edi, 60(%eax)
|
||||
; X86-NEXT: movl %ecx, 56(%eax)
|
||||
; X86-NEXT: movl %ebx, 52(%eax)
|
||||
; X86-NEXT: movl %ebp, 48(%eax)
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; X86-NEXT: movl %ecx, 44(%eax)
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; X86-NEXT: movl %ecx, 40(%eax)
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; X86-NEXT: movl %ecx, 36(%eax)
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; X86-NEXT: movl %ecx, 32(%eax)
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; X86-NEXT: movl %ecx, 28(%eax)
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; X86-NEXT: movl %ecx, 24(%eax)
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; X86-NEXT: movl %ecx, 20(%eax)
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; X86-NEXT: movl %ecx, 16(%eax)
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; X86-NEXT: movl %ecx, 12(%eax)
|
||||
; X86-NEXT: movl (%esp), %ecx # 4-byte Reload
|
||||
; X86-NEXT: movl %ecx, 8(%eax)
|
||||
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; X86-NEXT: movl %ecx, 4(%eax)
|
||||
; X86-NEXT: movl %esi, (%eax)
|
||||
; X86-NEXT: shrl $16, %edx
|
||||
; X86-NEXT: movw %dx, 64(%eax)
|
||||
; X86-NEXT: addl $56, %esp
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: popl %edi
|
||||
; X86-NEXT: popl %ebx
|
||||
; X86-NEXT: popl %ebp
|
||||
; X86-NEXT: retl $4
|
||||
;
|
||||
; X64-LABEL: large_promotion:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: pushq %rbp
|
||||
; X64-NEXT: pushq %r15
|
||||
; X64-NEXT: pushq %r14
|
||||
; X64-NEXT: pushq %r13
|
||||
; X64-NEXT: pushq %r12
|
||||
; X64-NEXT: pushq %rbx
|
||||
; X64-NEXT: movq %rdi, %r12
|
||||
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp
|
||||
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx
|
||||
; X64-NEXT: bswapq %rbx
|
||||
; X64-NEXT: movabsq $1085102592571150095, %r13 # imm = 0xF0F0F0F0F0F0F0F
|
||||
; X64-NEXT: movq %rbx, %r10
|
||||
; X64-NEXT: andq %r13, %r10
|
||||
; X64-NEXT: shlq $4, %r10
|
||||
; X64-NEXT: movabsq $-1085102592571150096, %rax # imm = 0xF0F0F0F0F0F0F0F0
|
||||
; X64-NEXT: andq %rax, %rbx
|
||||
; X64-NEXT: shrq $4, %rbx
|
||||
; X64-NEXT: orq %r10, %rbx
|
||||
; X64-NEXT: movabsq $3689348814741910323, %r11 # imm = 0x3333333333333333
|
||||
; X64-NEXT: movq %rbx, %r10
|
||||
; X64-NEXT: andq %r11, %r10
|
||||
; X64-NEXT: movabsq $-3689348814741910324, %r14 # imm = 0xCCCCCCCCCCCCCCCC
|
||||
; X64-NEXT: andq %r14, %rbx
|
||||
; X64-NEXT: shrq $2, %rbx
|
||||
; X64-NEXT: leaq (%rbx,%r10,4), %r10
|
||||
; X64-NEXT: movabsq $6148820866244280320, %rbx # imm = 0x5555000000000000
|
||||
; X64-NEXT: andq %r10, %rbx
|
||||
; X64-NEXT: movabsq $-6149102341220990976, %rdi # imm = 0xAAAA000000000000
|
||||
; X64-NEXT: andq %r10, %rdi
|
||||
; X64-NEXT: shrq %rdi
|
||||
; X64-NEXT: leaq (%rdi,%rbx,2), %rdi
|
||||
; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; X64-NEXT: bswapq %rbp
|
||||
; X64-NEXT: movq %rbp, %rdi
|
||||
; X64-NEXT: andq %r13, %rdi
|
||||
; X64-NEXT: shlq $4, %rdi
|
||||
; X64-NEXT: andq %rax, %rbp
|
||||
; X64-NEXT: shrq $4, %rbp
|
||||
; X64-NEXT: orq %rdi, %rbp
|
||||
; X64-NEXT: movq %rbp, %rdi
|
||||
; X64-NEXT: andq %r11, %rdi
|
||||
; X64-NEXT: andq %r14, %rbp
|
||||
; X64-NEXT: shrq $2, %rbp
|
||||
; X64-NEXT: leaq (%rbp,%rdi,4), %rbp
|
||||
; X64-NEXT: movabsq $6148914691236517205, %rbx # imm = 0x5555555555555555
|
||||
; X64-NEXT: movq %rbp, %r10
|
||||
; X64-NEXT: andq %rbx, %r10
|
||||
; X64-NEXT: movabsq $-6148914691236517206, %rdi # imm = 0xAAAAAAAAAAAAAAAA
|
||||
; X64-NEXT: andq %rdi, %rbp
|
||||
; X64-NEXT: shrq %rbp
|
||||
; X64-NEXT: leaq (%rbp,%r10,2), %rbp
|
||||
; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
|
||||
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp
|
||||
; X64-NEXT: bswapq %rbp
|
||||
; X64-NEXT: movq %rbp, %r10
|
||||
; X64-NEXT: andq %r13, %r10
|
||||
; X64-NEXT: shlq $4, %r10
|
||||
; X64-NEXT: andq %rax, %rbp
|
||||
; X64-NEXT: movq %rax, %r15
|
||||
; X64-NEXT: shrq $4, %rbp
|
||||
; X64-NEXT: orq %r10, %rbp
|
||||
; X64-NEXT: movq %rbp, %r10
|
||||
; X64-NEXT: andq %r11, %r10
|
||||
; X64-NEXT: andq %r14, %rbp
|
||||
; X64-NEXT: shrq $2, %rbp
|
||||
; X64-NEXT: leaq (%rbp,%r10,4), %rbp
|
||||
; X64-NEXT: movq %rbp, %r10
|
||||
; X64-NEXT: andq %rbx, %r10
|
||||
; X64-NEXT: andq %rdi, %rbp
|
||||
; X64-NEXT: shrq %rbp
|
||||
; X64-NEXT: leaq (%rbp,%r10,2), %rbp
|
||||
; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; X64-NEXT: bswapq %r10
|
||||
; X64-NEXT: movq %r10, %rax
|
||||
; X64-NEXT: andq %r13, %rax
|
||||
; X64-NEXT: shlq $4, %rax
|
||||
; X64-NEXT: movq %r15, %rdi
|
||||
; X64-NEXT: andq %r15, %r10
|
||||
; X64-NEXT: shrq $4, %r10
|
||||
; X64-NEXT: orq %rax, %r10
|
||||
; X64-NEXT: movq %r10, %rax
|
||||
; X64-NEXT: andq %r11, %rax
|
||||
; X64-NEXT: andq %r14, %r10
|
||||
; X64-NEXT: shrq $2, %r10
|
||||
; X64-NEXT: leaq (%r10,%rax,4), %rax
|
||||
; X64-NEXT: movq %rax, %r10
|
||||
; X64-NEXT: andq %rbx, %r10
|
||||
; X64-NEXT: movabsq $-6148914691236517206, %r15 # imm = 0xAAAAAAAAAAAAAAAA
|
||||
; X64-NEXT: andq %r15, %rax
|
||||
; X64-NEXT: shrq %rax
|
||||
; X64-NEXT: leaq (%rax,%r10,2), %r10
|
||||
; X64-NEXT: bswapq %r9
|
||||
; X64-NEXT: movq %r9, %rax
|
||||
; X64-NEXT: andq %r13, %rax
|
||||
; X64-NEXT: shlq $4, %rax
|
||||
; X64-NEXT: andq %rdi, %r9
|
||||
; X64-NEXT: shrq $4, %r9
|
||||
; X64-NEXT: orq %rax, %r9
|
||||
; X64-NEXT: movq %r9, %rax
|
||||
; X64-NEXT: andq %r11, %rax
|
||||
; X64-NEXT: andq %r14, %r9
|
||||
; X64-NEXT: shrq $2, %r9
|
||||
; X64-NEXT: leaq (%r9,%rax,4), %rax
|
||||
; X64-NEXT: movq %rax, %r9
|
||||
; X64-NEXT: andq %rbx, %r9
|
||||
; X64-NEXT: andq %r15, %rax
|
||||
; X64-NEXT: shrq %rax
|
||||
; X64-NEXT: leaq (%rax,%r9,2), %r9
|
||||
; X64-NEXT: bswapq %r8
|
||||
; X64-NEXT: movq %r8, %rax
|
||||
; X64-NEXT: andq %r13, %rax
|
||||
; X64-NEXT: shlq $4, %rax
|
||||
; X64-NEXT: andq %rdi, %r8
|
||||
; X64-NEXT: shrq $4, %r8
|
||||
; X64-NEXT: orq %rax, %r8
|
||||
; X64-NEXT: movq %r8, %rax
|
||||
; X64-NEXT: andq %r11, %rax
|
||||
; X64-NEXT: andq %r14, %r8
|
||||
; X64-NEXT: shrq $2, %r8
|
||||
; X64-NEXT: leaq (%r8,%rax,4), %rax
|
||||
; X64-NEXT: movq %rax, %r8
|
||||
; X64-NEXT: andq %rbx, %r8
|
||||
; X64-NEXT: andq %r15, %rax
|
||||
; X64-NEXT: shrq %rax
|
||||
; X64-NEXT: leaq (%rax,%r8,2), %r8
|
||||
; X64-NEXT: bswapq %rcx
|
||||
; X64-NEXT: movq %rcx, %rax
|
||||
; X64-NEXT: andq %r13, %rax
|
||||
; X64-NEXT: shlq $4, %rax
|
||||
; X64-NEXT: andq %rdi, %rcx
|
||||
; X64-NEXT: shrq $4, %rcx
|
||||
; X64-NEXT: orq %rax, %rcx
|
||||
; X64-NEXT: movq %rcx, %rax
|
||||
; X64-NEXT: andq %r11, %rax
|
||||
; X64-NEXT: andq %r14, %rcx
|
||||
; X64-NEXT: shrq $2, %rcx
|
||||
; X64-NEXT: leaq (%rcx,%rax,4), %rax
|
||||
; X64-NEXT: movq %rax, %rcx
|
||||
; X64-NEXT: andq %rbx, %rcx
|
||||
; X64-NEXT: andq %r15, %rax
|
||||
; X64-NEXT: shrq %rax
|
||||
; X64-NEXT: leaq (%rax,%rcx,2), %rcx
|
||||
; X64-NEXT: bswapq %rdx
|
||||
; X64-NEXT: movq %rdx, %rax
|
||||
; X64-NEXT: andq %r13, %rax
|
||||
; X64-NEXT: shlq $4, %rax
|
||||
; X64-NEXT: andq %rdi, %rdx
|
||||
; X64-NEXT: shrq $4, %rdx
|
||||
; X64-NEXT: orq %rax, %rdx
|
||||
; X64-NEXT: movq %rdx, %rax
|
||||
; X64-NEXT: andq %r11, %rax
|
||||
; X64-NEXT: andq %r14, %rdx
|
||||
; X64-NEXT: shrq $2, %rdx
|
||||
; X64-NEXT: leaq (%rdx,%rax,4), %rax
|
||||
; X64-NEXT: movq %rax, %rdx
|
||||
; X64-NEXT: andq %rbx, %rdx
|
||||
; X64-NEXT: andq %r15, %rax
|
||||
; X64-NEXT: shrq %rax
|
||||
; X64-NEXT: leaq (%rax,%rdx,2), %rax
|
||||
; X64-NEXT: bswapq %rsi
|
||||
; X64-NEXT: andq %rsi, %r13
|
||||
; X64-NEXT: andq %rdi, %rsi
|
||||
; X64-NEXT: shlq $4, %r13
|
||||
; X64-NEXT: shrq $4, %rsi
|
||||
; X64-NEXT: orq %r13, %rsi
|
||||
; X64-NEXT: andq %rsi, %r11
|
||||
; X64-NEXT: andq %r14, %rsi
|
||||
; X64-NEXT: shrq $2, %rsi
|
||||
; X64-NEXT: leaq (%rsi,%r11,4), %rdx
|
||||
; X64-NEXT: andq %rdx, %rbx
|
||||
; X64-NEXT: andq %r15, %rdx
|
||||
; X64-NEXT: shrq %rdx
|
||||
; X64-NEXT: leaq (%rdx,%rbx,2), %rdx
|
||||
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
|
||||
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
|
||||
; X64-NEXT: shrdq $48, %rdi, %rsi
|
||||
; X64-NEXT: shrdq $48, %rbp, %rdi
|
||||
; X64-NEXT: shrdq $48, %r10, %rbp
|
||||
; X64-NEXT: shrdq $48, %r9, %r10
|
||||
; X64-NEXT: shrdq $48, %r8, %r9
|
||||
; X64-NEXT: shrdq $48, %rcx, %r8
|
||||
; X64-NEXT: shrdq $48, %rax, %rcx
|
||||
; X64-NEXT: shrdq $48, %rdx, %rax
|
||||
; X64-NEXT: movq %rax, 56(%r12)
|
||||
; X64-NEXT: movq %rcx, 48(%r12)
|
||||
; X64-NEXT: movq %r8, 40(%r12)
|
||||
; X64-NEXT: movq %r9, 32(%r12)
|
||||
; X64-NEXT: movq %r10, 24(%r12)
|
||||
; X64-NEXT: movq %rbp, 16(%r12)
|
||||
; X64-NEXT: movq %rdi, 8(%r12)
|
||||
; X64-NEXT: movq %rsi, (%r12)
|
||||
; X64-NEXT: shrq $48, %rdx
|
||||
; X64-NEXT: movw %dx, 64(%r12)
|
||||
; X64-NEXT: movq %r12, %rax
|
||||
; X64-NEXT: popq %rbx
|
||||
; X64-NEXT: popq %r12
|
||||
; X64-NEXT: popq %r13
|
||||
; X64-NEXT: popq %r14
|
||||
; X64-NEXT: popq %r15
|
||||
; X64-NEXT: popq %rbp
|
||||
; X64-NEXT: retq
|
||||
%Z = call i528 @llvm.bitreverse.i528(i528 %A)
|
||||
ret i528 %Z
|
||||
}
|
||||
declare i528 @llvm.bitreverse.i528(i528)
|
||||
|
|
|
@ -206,3 +206,153 @@ define i64 @finally_useful_bswap() {
|
|||
ret i64 %swapped
|
||||
}
|
||||
|
||||
; Make sure we don't assert during type legalization promoting a large
|
||||
; bswap due to the need for a large shift that won't fit in the i8 returned
|
||||
; from getShiftAmountTy.
|
||||
define i528 @large_promotion(i528 %A) nounwind {
|
||||
; CHECK-LABEL: large_promotion:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pushl %ebp
|
||||
; CHECK-NEXT: pushl %ebx
|
||||
; CHECK-NEXT: pushl %edi
|
||||
; CHECK-NEXT: pushl %esi
|
||||
; CHECK-NEXT: subl $44, %esp
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: bswapl %eax
|
||||
; CHECK-NEXT: bswapl %ecx
|
||||
; CHECK-NEXT: shrdl $16, %ecx, %eax
|
||||
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-NEXT: bswapl %edx
|
||||
; CHECK-NEXT: shrdl $16, %edx, %ecx
|
||||
; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-NEXT: bswapl %esi
|
||||
; CHECK-NEXT: shrdl $16, %esi, %edx
|
||||
; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-NEXT: bswapl %edi
|
||||
; CHECK-NEXT: shrdl $16, %edi, %esi
|
||||
; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-NEXT: bswapl %ebx
|
||||
; CHECK-NEXT: shrdl $16, %ebx, %edi
|
||||
; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-NEXT: bswapl %ebp
|
||||
; CHECK-NEXT: shrdl $16, %ebp, %ebx
|
||||
; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; CHECK-NEXT: bswapl %ecx
|
||||
; CHECK-NEXT: shrdl $16, %ecx, %ebp
|
||||
; CHECK-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: bswapl %eax
|
||||
; CHECK-NEXT: shrdl $16, %eax, %ecx
|
||||
; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; CHECK-NEXT: bswapl %ecx
|
||||
; CHECK-NEXT: shrdl $16, %ecx, %eax
|
||||
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: bswapl %eax
|
||||
; CHECK-NEXT: shrdl $16, %eax, %ecx
|
||||
; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
|
||||
; CHECK-NEXT: bswapl %ebp
|
||||
; CHECK-NEXT: shrdl $16, %ebp, %eax
|
||||
; CHECK-NEXT: movl %eax, (%esp) # 4-byte Spill
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; CHECK-NEXT: bswapl %ebx
|
||||
; CHECK-NEXT: shrdl $16, %ebx, %ebp
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; CHECK-NEXT: bswapl %esi
|
||||
; CHECK-NEXT: shrdl $16, %esi, %ebx
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; CHECK-NEXT: bswapl %edx
|
||||
; CHECK-NEXT: shrdl $16, %edx, %esi
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; CHECK-NEXT: bswapl %ecx
|
||||
; CHECK-NEXT: shrdl $16, %ecx, %edx
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; CHECK-NEXT: bswapl %edi
|
||||
; CHECK-NEXT: shrdl $16, %edi, %ecx
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movl %ecx, 60(%eax)
|
||||
; CHECK-NEXT: movl %edx, 56(%eax)
|
||||
; CHECK-NEXT: movl %esi, 52(%eax)
|
||||
; CHECK-NEXT: movl %ebx, 48(%eax)
|
||||
; CHECK-NEXT: movl %ebp, 44(%eax)
|
||||
; CHECK-NEXT: movl (%esp), %ecx # 4-byte Reload
|
||||
; CHECK-NEXT: movl %ecx, 40(%eax)
|
||||
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; CHECK-NEXT: movl %ecx, 36(%eax)
|
||||
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; CHECK-NEXT: movl %ecx, 32(%eax)
|
||||
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; CHECK-NEXT: movl %ecx, 28(%eax)
|
||||
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; CHECK-NEXT: movl %ecx, 24(%eax)
|
||||
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; CHECK-NEXT: movl %ecx, 20(%eax)
|
||||
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; CHECK-NEXT: movl %ecx, 16(%eax)
|
||||
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; CHECK-NEXT: movl %ecx, 12(%eax)
|
||||
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; CHECK-NEXT: movl %ecx, 8(%eax)
|
||||
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; CHECK-NEXT: movl %ecx, 4(%eax)
|
||||
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; CHECK-NEXT: movl %ecx, (%eax)
|
||||
; CHECK-NEXT: shrl $16, %edi
|
||||
; CHECK-NEXT: movw %di, 64(%eax)
|
||||
; CHECK-NEXT: addl $44, %esp
|
||||
; CHECK-NEXT: popl %esi
|
||||
; CHECK-NEXT: popl %edi
|
||||
; CHECK-NEXT: popl %ebx
|
||||
; CHECK-NEXT: popl %ebp
|
||||
; CHECK-NEXT: retl $4
|
||||
;
|
||||
; CHECK64-LABEL: large_promotion:
|
||||
; CHECK64: # %bb.0:
|
||||
; CHECK64-NEXT: pushq %rbx
|
||||
; CHECK64-NEXT: movq %rdi, %rax
|
||||
; CHECK64-NEXT: movq {{[0-9]+}}(%rsp), %rbx
|
||||
; CHECK64-NEXT: movq {{[0-9]+}}(%rsp), %r11
|
||||
; CHECK64-NEXT: movq {{[0-9]+}}(%rsp), %rdi
|
||||
; CHECK64-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; CHECK64-NEXT: bswapq %r10
|
||||
; CHECK64-NEXT: bswapq %rdi
|
||||
; CHECK64-NEXT: shrdq $48, %rdi, %r10
|
||||
; CHECK64-NEXT: bswapq %r11
|
||||
; CHECK64-NEXT: shrdq $48, %r11, %rdi
|
||||
; CHECK64-NEXT: bswapq %rbx
|
||||
; CHECK64-NEXT: shrdq $48, %rbx, %r11
|
||||
; CHECK64-NEXT: bswapq %r9
|
||||
; CHECK64-NEXT: shrdq $48, %r9, %rbx
|
||||
; CHECK64-NEXT: bswapq %r8
|
||||
; CHECK64-NEXT: shrdq $48, %r8, %r9
|
||||
; CHECK64-NEXT: bswapq %rcx
|
||||
; CHECK64-NEXT: shrdq $48, %rcx, %r8
|
||||
; CHECK64-NEXT: bswapq %rdx
|
||||
; CHECK64-NEXT: shrdq $48, %rdx, %rcx
|
||||
; CHECK64-NEXT: bswapq %rsi
|
||||
; CHECK64-NEXT: shrdq $48, %rsi, %rdx
|
||||
; CHECK64-NEXT: shrq $48, %rsi
|
||||
; CHECK64-NEXT: movq %rdx, 56(%rax)
|
||||
; CHECK64-NEXT: movq %rcx, 48(%rax)
|
||||
; CHECK64-NEXT: movq %r8, 40(%rax)
|
||||
; CHECK64-NEXT: movq %r9, 32(%rax)
|
||||
; CHECK64-NEXT: movq %rbx, 24(%rax)
|
||||
; CHECK64-NEXT: movq %r11, 16(%rax)
|
||||
; CHECK64-NEXT: movq %rdi, 8(%rax)
|
||||
; CHECK64-NEXT: movq %r10, (%rax)
|
||||
; CHECK64-NEXT: movw %si, 64(%rax)
|
||||
; CHECK64-NEXT: popq %rbx
|
||||
; CHECK64-NEXT: retq
|
||||
%Z = call i528 @llvm.bswap.i528(i528 %A)
|
||||
ret i528 %Z
|
||||
}
|
||||
declare i528 @llvm.bswap.i528(i528)
|
||||
|
|
Loading…
Reference in New Issue