[LegalizeTypes] Prevent an assertion from PromoteIntRes_BSWAP and PromoteIntRes_BITREVERSE if the shift amount is too large for the VT returned by getShiftAmountTy

Summary:
getShiftAmountTy for X86 returns MVT::i8. If a BSWAP or BITREVERSE is created that requires promotion and the difference between the original VT and the promoted VT is more than 255 then we won't able to create the constant.

This patch adds a check to replace the result from getShiftAmountTy to MVT::i32 if the difference won't fit. This should get legalized later when the shift is ultimately expanded since its clearly an illegal type that we're only promoting to make it a power of 2 bit width. Alternatively we could base the decision completely on the largest shift amount the promoted VT could use.

Vectors should be immune here because getShiftAmountTy always returns the incoming VT for vectors. Only the scalar shift amount can be changed by the targets.

Reviewers: eli.friedman, RKSimon, spatel

Reviewed By: RKSimon

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D53232

llvm-svn: 344460
This commit is contained in:
Craig Topper 2018-10-13 17:47:20 +00:00
parent ffde98de21
commit 189e5b4ab6
3 changed files with 788 additions and 8 deletions

View File

@ -311,6 +311,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
CreateStackStoreLoad(InOp, OutVT)); CreateStackStoreLoad(InOp, OutVT));
} }
// Helper for BSWAP/BITREVERSE promotion to ensure we can fit the shift amount
// in the VT returned by getShiftAmountTy and to return a safe VT if we can't.
static EVT getShiftAmountTyForConstant(unsigned Val, EVT VT,
const TargetLowering &TLI,
SelectionDAG &DAG) {
EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
// If the value won't fit in the prefered type, just use something safe. It
// will be legalized when the shift is expanded.
if ((Log2_32(Val) + 1) > ShiftVT.getScalarSizeInBits())
ShiftVT = MVT::i32;
return ShiftVT;
}
SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
SDValue Op = GetPromotedInteger(N->getOperand(0)); SDValue Op = GetPromotedInteger(N->getOperand(0));
EVT OVT = N->getValueType(0); EVT OVT = N->getValueType(0);
@ -318,10 +331,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
SDLoc dl(N); SDLoc dl(N);
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
return DAG.getNode( EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG);
ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
DAG.getConstant(DiffBits, dl, DAG.getConstant(DiffBits, dl, ShiftVT));
TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
} }
SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
@ -331,10 +343,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
SDLoc dl(N); SDLoc dl(N);
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
return DAG.getNode( EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG);
ISD::SRL, dl, NVT, DAG.getNode(ISD::BITREVERSE, dl, NVT, Op), return DAG.getNode(ISD::SRL, dl, NVT,
DAG.getConstant(DiffBits, dl, DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
TLI.getShiftAmountTy(NVT, DAG.getDataLayout()))); DAG.getConstant(DiffBits, dl, ShiftVT));
} }
SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {

View File

@ -523,3 +523,621 @@ define <2 x i16> @undef_v2i16() {
%b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> undef) %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> undef)
ret <2 x i16> %b ret <2 x i16> %b
} }
; Make sure we don't assert during type legalization promoting a large
; bitreverse due to the need for a large shift that won't fit in the i8 returned
; from getShiftAmountTy.
define i528 @large_promotion(i528 %A) nounwind {
; X86-LABEL: large_promotion:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $56, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: bswapl %ebx
; X86-NEXT: movl %ebx, %ebp
; X86-NEXT: andl $252645135, %ebp # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ebp
; X86-NEXT: andl $-252645136, %ebx # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %ebx
; X86-NEXT: orl %ebp, %ebx
; X86-NEXT: movl %ebx, %ebp
; X86-NEXT: andl $858993459, %ebp # imm = 0x33333333
; X86-NEXT: andl $-858993460, %ebx # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %ebx
; X86-NEXT: leal (%ebx,%ebp,4), %ebx
; X86-NEXT: movl %ebx, %ebp
; X86-NEXT: andl $1431633920, %ebp # imm = 0x55550000
; X86-NEXT: andl $-1431699456, %ebx # imm = 0xAAAA0000
; X86-NEXT: shrl %ebx
; X86-NEXT: leal (%ebx,%ebp,2), %ebx
; X86-NEXT: movl %ebx, (%esp) # 4-byte Spill
; X86-NEXT: bswapl %edi
; X86-NEXT: movl %edi, %ebx
; X86-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ebx
; X86-NEXT: andl $-252645136, %edi # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %edi
; X86-NEXT: orl %ebx, %edi
; X86-NEXT: movl %edi, %ebx
; X86-NEXT: andl $858993459, %ebx # imm = 0x33333333
; X86-NEXT: andl $-858993460, %edi # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %edi
; X86-NEXT: leal (%edi,%ebx,4), %edi
; X86-NEXT: movl %edi, %ebx
; X86-NEXT: andl $1431655765, %ebx # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %edi # imm = 0xAAAAAAAA
; X86-NEXT: shrl %edi
; X86-NEXT: leal (%edi,%ebx,2), %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: bswapl %esi
; X86-NEXT: movl %esi, %edi
; X86-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %edi
; X86-NEXT: andl $-252645136, %esi # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %esi
; X86-NEXT: orl %edi, %esi
; X86-NEXT: movl %esi, %edi
; X86-NEXT: andl $858993459, %edi # imm = 0x33333333
; X86-NEXT: andl $-858993460, %esi # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %esi
; X86-NEXT: leal (%esi,%edi,4), %esi
; X86-NEXT: movl %esi, %edi
; X86-NEXT: andl $1431655765, %edi # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %esi # imm = 0xAAAAAAAA
; X86-NEXT: shrl %esi
; X86-NEXT: leal (%esi,%edi,2), %ebx
; X86-NEXT: bswapl %edx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %esi
; X86-NEXT: andl $-252645136, %edx # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %edx
; X86-NEXT: orl %esi, %edx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: andl $858993459, %esi # imm = 0x33333333
; X86-NEXT: andl $-858993460, %edx # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %edx
; X86-NEXT: leal (%edx,%esi,4), %edx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: andl $1431655765, %esi # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %edx # imm = 0xAAAAAAAA
; X86-NEXT: shrl %edx
; X86-NEXT: leal (%edx,%esi,2), %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: bswapl %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %edx
; X86-NEXT: andl $-252645136, %ecx # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %ecx
; X86-NEXT: orl %edx, %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: andl $858993459, %edx # imm = 0x33333333
; X86-NEXT: andl $-858993460, %ecx # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %ecx
; X86-NEXT: leal (%ecx,%edx,4), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %ecx # imm = 0xAAAAAAAA
; X86-NEXT: shrl %ecx
; X86-NEXT: leal (%ecx,%edx,2), %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: bswapl %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ecx
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %eax
; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
; X86-NEXT: shrl %eax
; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bswapl %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ecx
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %eax
; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
; X86-NEXT: shrl %eax
; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bswapl %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ecx
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %eax
; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
; X86-NEXT: shrl %eax
; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bswapl %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ecx
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %eax
; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
; X86-NEXT: shrl %eax
; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bswapl %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ecx
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %eax
; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
; X86-NEXT: shrl %eax
; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bswapl %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ecx
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %eax
; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
; X86-NEXT: shrl %eax
; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bswapl %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ecx
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %eax
; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
; X86-NEXT: shrl %eax
; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bswapl %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ecx
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %eax
; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
; X86-NEXT: shrl %eax
; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bswapl %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ecx
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %eax
; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
; X86-NEXT: shrl %eax
; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bswapl %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ecx
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %eax
; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
; X86-NEXT: shrl %eax
; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bswapl %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ecx
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %eax
; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
; X86-NEXT: shrl %eax
; X86-NEXT: leal (%eax,%ecx,2), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bswapl %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ecx
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %eax
; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
; X86-NEXT: shrl %eax
; X86-NEXT: leal (%eax,%ecx,2), %edx
; X86-NEXT: movl (%esp), %esi # 4-byte Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: shrdl $16, %eax, %esi
; X86-NEXT: shrdl $16, %ebx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: shrdl $16, %ecx, %ebx
; X86-NEXT: movl %ebx, (%esp) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: shrdl $16, %eax, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: shrdl $16, %ecx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: shrdl $16, %eax, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: shrdl $16, %ecx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: shrdl $16, %eax, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: shrdl $16, %ecx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: shrdl $16, %eax, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: shrdl $16, %ecx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X86-NEXT: shrdl $16, %ebp, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-NEXT: shrdl $16, %ebx, %ebp
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: shrdl $16, %eax, %ebx
; X86-NEXT: shrdl $16, %edi, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: shrdl $16, %edx, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %edi, 60(%eax)
; X86-NEXT: movl %ecx, 56(%eax)
; X86-NEXT: movl %ebx, 52(%eax)
; X86-NEXT: movl %ebp, 48(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 44(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 40(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 36(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 32(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 28(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 24(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 20(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 16(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 12(%eax)
; X86-NEXT: movl (%esp), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 4(%eax)
; X86-NEXT: movl %esi, (%eax)
; X86-NEXT: shrl $16, %edx
; X86-NEXT: movw %dx, 64(%eax)
; X86-NEXT: addl $56, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: large_promotion:
; X64: # %bb.0:
; X64-NEXT: pushq %rbp
; X64-NEXT: pushq %r15
; X64-NEXT: pushq %r14
; X64-NEXT: pushq %r13
; X64-NEXT: pushq %r12
; X64-NEXT: pushq %rbx
; X64-NEXT: movq %rdi, %r12
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx
; X64-NEXT: bswapq %rbx
; X64-NEXT: movabsq $1085102592571150095, %r13 # imm = 0xF0F0F0F0F0F0F0F
; X64-NEXT: movq %rbx, %r10
; X64-NEXT: andq %r13, %r10
; X64-NEXT: shlq $4, %r10
; X64-NEXT: movabsq $-1085102592571150096, %rax # imm = 0xF0F0F0F0F0F0F0F0
; X64-NEXT: andq %rax, %rbx
; X64-NEXT: shrq $4, %rbx
; X64-NEXT: orq %r10, %rbx
; X64-NEXT: movabsq $3689348814741910323, %r11 # imm = 0x3333333333333333
; X64-NEXT: movq %rbx, %r10
; X64-NEXT: andq %r11, %r10
; X64-NEXT: movabsq $-3689348814741910324, %r14 # imm = 0xCCCCCCCCCCCCCCCC
; X64-NEXT: andq %r14, %rbx
; X64-NEXT: shrq $2, %rbx
; X64-NEXT: leaq (%rbx,%r10,4), %r10
; X64-NEXT: movabsq $6148820866244280320, %rbx # imm = 0x5555000000000000
; X64-NEXT: andq %r10, %rbx
; X64-NEXT: movabsq $-6149102341220990976, %rdi # imm = 0xAAAA000000000000
; X64-NEXT: andq %r10, %rdi
; X64-NEXT: shrq %rdi
; X64-NEXT: leaq (%rdi,%rbx,2), %rdi
; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: bswapq %rbp
; X64-NEXT: movq %rbp, %rdi
; X64-NEXT: andq %r13, %rdi
; X64-NEXT: shlq $4, %rdi
; X64-NEXT: andq %rax, %rbp
; X64-NEXT: shrq $4, %rbp
; X64-NEXT: orq %rdi, %rbp
; X64-NEXT: movq %rbp, %rdi
; X64-NEXT: andq %r11, %rdi
; X64-NEXT: andq %r14, %rbp
; X64-NEXT: shrq $2, %rbp
; X64-NEXT: leaq (%rbp,%rdi,4), %rbp
; X64-NEXT: movabsq $6148914691236517205, %rbx # imm = 0x5555555555555555
; X64-NEXT: movq %rbp, %r10
; X64-NEXT: andq %rbx, %r10
; X64-NEXT: movabsq $-6148914691236517206, %rdi # imm = 0xAAAAAAAAAAAAAAAA
; X64-NEXT: andq %rdi, %rbp
; X64-NEXT: shrq %rbp
; X64-NEXT: leaq (%rbp,%r10,2), %rbp
; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp
; X64-NEXT: bswapq %rbp
; X64-NEXT: movq %rbp, %r10
; X64-NEXT: andq %r13, %r10
; X64-NEXT: shlq $4, %r10
; X64-NEXT: andq %rax, %rbp
; X64-NEXT: movq %rax, %r15
; X64-NEXT: shrq $4, %rbp
; X64-NEXT: orq %r10, %rbp
; X64-NEXT: movq %rbp, %r10
; X64-NEXT: andq %r11, %r10
; X64-NEXT: andq %r14, %rbp
; X64-NEXT: shrq $2, %rbp
; X64-NEXT: leaq (%rbp,%r10,4), %rbp
; X64-NEXT: movq %rbp, %r10
; X64-NEXT: andq %rbx, %r10
; X64-NEXT: andq %rdi, %rbp
; X64-NEXT: shrq %rbp
; X64-NEXT: leaq (%rbp,%r10,2), %rbp
; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10
; X64-NEXT: bswapq %r10
; X64-NEXT: movq %r10, %rax
; X64-NEXT: andq %r13, %rax
; X64-NEXT: shlq $4, %rax
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: andq %r15, %r10
; X64-NEXT: shrq $4, %r10
; X64-NEXT: orq %rax, %r10
; X64-NEXT: movq %r10, %rax
; X64-NEXT: andq %r11, %rax
; X64-NEXT: andq %r14, %r10
; X64-NEXT: shrq $2, %r10
; X64-NEXT: leaq (%r10,%rax,4), %rax
; X64-NEXT: movq %rax, %r10
; X64-NEXT: andq %rbx, %r10
; X64-NEXT: movabsq $-6148914691236517206, %r15 # imm = 0xAAAAAAAAAAAAAAAA
; X64-NEXT: andq %r15, %rax
; X64-NEXT: shrq %rax
; X64-NEXT: leaq (%rax,%r10,2), %r10
; X64-NEXT: bswapq %r9
; X64-NEXT: movq %r9, %rax
; X64-NEXT: andq %r13, %rax
; X64-NEXT: shlq $4, %rax
; X64-NEXT: andq %rdi, %r9
; X64-NEXT: shrq $4, %r9
; X64-NEXT: orq %rax, %r9
; X64-NEXT: movq %r9, %rax
; X64-NEXT: andq %r11, %rax
; X64-NEXT: andq %r14, %r9
; X64-NEXT: shrq $2, %r9
; X64-NEXT: leaq (%r9,%rax,4), %rax
; X64-NEXT: movq %rax, %r9
; X64-NEXT: andq %rbx, %r9
; X64-NEXT: andq %r15, %rax
; X64-NEXT: shrq %rax
; X64-NEXT: leaq (%rax,%r9,2), %r9
; X64-NEXT: bswapq %r8
; X64-NEXT: movq %r8, %rax
; X64-NEXT: andq %r13, %rax
; X64-NEXT: shlq $4, %rax
; X64-NEXT: andq %rdi, %r8
; X64-NEXT: shrq $4, %r8
; X64-NEXT: orq %rax, %r8
; X64-NEXT: movq %r8, %rax
; X64-NEXT: andq %r11, %rax
; X64-NEXT: andq %r14, %r8
; X64-NEXT: shrq $2, %r8
; X64-NEXT: leaq (%r8,%rax,4), %rax
; X64-NEXT: movq %rax, %r8
; X64-NEXT: andq %rbx, %r8
; X64-NEXT: andq %r15, %rax
; X64-NEXT: shrq %rax
; X64-NEXT: leaq (%rax,%r8,2), %r8
; X64-NEXT: bswapq %rcx
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: andq %r13, %rax
; X64-NEXT: shlq $4, %rax
; X64-NEXT: andq %rdi, %rcx
; X64-NEXT: shrq $4, %rcx
; X64-NEXT: orq %rax, %rcx
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: andq %r11, %rax
; X64-NEXT: andq %r14, %rcx
; X64-NEXT: shrq $2, %rcx
; X64-NEXT: leaq (%rcx,%rax,4), %rax
; X64-NEXT: movq %rax, %rcx
; X64-NEXT: andq %rbx, %rcx
; X64-NEXT: andq %r15, %rax
; X64-NEXT: shrq %rax
; X64-NEXT: leaq (%rax,%rcx,2), %rcx
; X64-NEXT: bswapq %rdx
; X64-NEXT: movq %rdx, %rax
; X64-NEXT: andq %r13, %rax
; X64-NEXT: shlq $4, %rax
; X64-NEXT: andq %rdi, %rdx
; X64-NEXT: shrq $4, %rdx
; X64-NEXT: orq %rax, %rdx
; X64-NEXT: movq %rdx, %rax
; X64-NEXT: andq %r11, %rax
; X64-NEXT: andq %r14, %rdx
; X64-NEXT: shrq $2, %rdx
; X64-NEXT: leaq (%rdx,%rax,4), %rax
; X64-NEXT: movq %rax, %rdx
; X64-NEXT: andq %rbx, %rdx
; X64-NEXT: andq %r15, %rax
; X64-NEXT: shrq %rax
; X64-NEXT: leaq (%rax,%rdx,2), %rax
; X64-NEXT: bswapq %rsi
; X64-NEXT: andq %rsi, %r13
; X64-NEXT: andq %rdi, %rsi
; X64-NEXT: shlq $4, %r13
; X64-NEXT: shrq $4, %rsi
; X64-NEXT: orq %r13, %rsi
; X64-NEXT: andq %rsi, %r11
; X64-NEXT: andq %r14, %rsi
; X64-NEXT: shrq $2, %rsi
; X64-NEXT: leaq (%rsi,%r11,4), %rdx
; X64-NEXT: andq %rdx, %rbx
; X64-NEXT: andq %r15, %rdx
; X64-NEXT: shrq %rdx
; X64-NEXT: leaq (%rdx,%rbx,2), %rdx
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
; X64-NEXT: shrdq $48, %rdi, %rsi
; X64-NEXT: shrdq $48, %rbp, %rdi
; X64-NEXT: shrdq $48, %r10, %rbp
; X64-NEXT: shrdq $48, %r9, %r10
; X64-NEXT: shrdq $48, %r8, %r9
; X64-NEXT: shrdq $48, %rcx, %r8
; X64-NEXT: shrdq $48, %rax, %rcx
; X64-NEXT: shrdq $48, %rdx, %rax
; X64-NEXT: movq %rax, 56(%r12)
; X64-NEXT: movq %rcx, 48(%r12)
; X64-NEXT: movq %r8, 40(%r12)
; X64-NEXT: movq %r9, 32(%r12)
; X64-NEXT: movq %r10, 24(%r12)
; X64-NEXT: movq %rbp, 16(%r12)
; X64-NEXT: movq %rdi, 8(%r12)
; X64-NEXT: movq %rsi, (%r12)
; X64-NEXT: shrq $48, %rdx
; X64-NEXT: movw %dx, 64(%r12)
; X64-NEXT: movq %r12, %rax
; X64-NEXT: popq %rbx
; X64-NEXT: popq %r12
; X64-NEXT: popq %r13
; X64-NEXT: popq %r14
; X64-NEXT: popq %r15
; X64-NEXT: popq %rbp
; X64-NEXT: retq
%Z = call i528 @llvm.bitreverse.i528(i528 %A)
ret i528 %Z
}
declare i528 @llvm.bitreverse.i528(i528)

View File

@ -206,3 +206,153 @@ define i64 @finally_useful_bswap() {
ret i64 %swapped ret i64 %swapped
} }
; Make sure we don't assert during type legalization promoting a large
; bswap due to the need for a large shift that won't fit in the i8 returned
; from getShiftAmountTy.
define i528 @large_promotion(i528 %A) nounwind {
; CHECK-LABEL: large_promotion:
; CHECK: # %bb.0:
; CHECK-NEXT: pushl %ebp
; CHECK-NEXT: pushl %ebx
; CHECK-NEXT: pushl %edi
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: subl $44, %esp
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: bswapl %eax
; CHECK-NEXT: bswapl %ecx
; CHECK-NEXT: shrdl $16, %ecx, %eax
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: bswapl %edx
; CHECK-NEXT: shrdl $16, %edx, %ecx
; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: bswapl %esi
; CHECK-NEXT: shrdl $16, %esi, %edx
; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: bswapl %edi
; CHECK-NEXT: shrdl $16, %edi, %esi
; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: bswapl %ebx
; CHECK-NEXT: shrdl $16, %ebx, %edi
; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: bswapl %ebp
; CHECK-NEXT: shrdl $16, %ebp, %ebx
; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: bswapl %ecx
; CHECK-NEXT: shrdl $16, %ecx, %ebp
; CHECK-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: bswapl %eax
; CHECK-NEXT: shrdl $16, %eax, %ecx
; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: bswapl %ecx
; CHECK-NEXT: shrdl $16, %ecx, %eax
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: bswapl %eax
; CHECK-NEXT: shrdl $16, %eax, %ecx
; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
; CHECK-NEXT: bswapl %ebp
; CHECK-NEXT: shrdl $16, %ebp, %eax
; CHECK-NEXT: movl %eax, (%esp) # 4-byte Spill
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx
; CHECK-NEXT: bswapl %ebx
; CHECK-NEXT: shrdl $16, %ebx, %ebp
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
; CHECK-NEXT: bswapl %esi
; CHECK-NEXT: shrdl $16, %esi, %ebx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK-NEXT: bswapl %edx
; CHECK-NEXT: shrdl $16, %edx, %esi
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: bswapl %ecx
; CHECK-NEXT: shrdl $16, %ecx, %edx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
; CHECK-NEXT: bswapl %edi
; CHECK-NEXT: shrdl $16, %edi, %ecx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl %ecx, 60(%eax)
; CHECK-NEXT: movl %edx, 56(%eax)
; CHECK-NEXT: movl %esi, 52(%eax)
; CHECK-NEXT: movl %ebx, 48(%eax)
; CHECK-NEXT: movl %ebp, 44(%eax)
; CHECK-NEXT: movl (%esp), %ecx # 4-byte Reload
; CHECK-NEXT: movl %ecx, 40(%eax)
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT: movl %ecx, 36(%eax)
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT: movl %ecx, 32(%eax)
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT: movl %ecx, 28(%eax)
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT: movl %ecx, 24(%eax)
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT: movl %ecx, 20(%eax)
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT: movl %ecx, 16(%eax)
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT: movl %ecx, 12(%eax)
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT: movl %ecx, 8(%eax)
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT: movl %ecx, 4(%eax)
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT: movl %ecx, (%eax)
; CHECK-NEXT: shrl $16, %edi
; CHECK-NEXT: movw %di, 64(%eax)
; CHECK-NEXT: addl $44, %esp
; CHECK-NEXT: popl %esi
; CHECK-NEXT: popl %edi
; CHECK-NEXT: popl %ebx
; CHECK-NEXT: popl %ebp
; CHECK-NEXT: retl $4
;
; CHECK64-LABEL: large_promotion:
; CHECK64: # %bb.0:
; CHECK64-NEXT: pushq %rbx
; CHECK64-NEXT: movq %rdi, %rax
; CHECK64-NEXT: movq {{[0-9]+}}(%rsp), %rbx
; CHECK64-NEXT: movq {{[0-9]+}}(%rsp), %r11
; CHECK64-NEXT: movq {{[0-9]+}}(%rsp), %rdi
; CHECK64-NEXT: movq {{[0-9]+}}(%rsp), %r10
; CHECK64-NEXT: bswapq %r10
; CHECK64-NEXT: bswapq %rdi
; CHECK64-NEXT: shrdq $48, %rdi, %r10
; CHECK64-NEXT: bswapq %r11
; CHECK64-NEXT: shrdq $48, %r11, %rdi
; CHECK64-NEXT: bswapq %rbx
; CHECK64-NEXT: shrdq $48, %rbx, %r11
; CHECK64-NEXT: bswapq %r9
; CHECK64-NEXT: shrdq $48, %r9, %rbx
; CHECK64-NEXT: bswapq %r8
; CHECK64-NEXT: shrdq $48, %r8, %r9
; CHECK64-NEXT: bswapq %rcx
; CHECK64-NEXT: shrdq $48, %rcx, %r8
; CHECK64-NEXT: bswapq %rdx
; CHECK64-NEXT: shrdq $48, %rdx, %rcx
; CHECK64-NEXT: bswapq %rsi
; CHECK64-NEXT: shrdq $48, %rsi, %rdx
; CHECK64-NEXT: shrq $48, %rsi
; CHECK64-NEXT: movq %rdx, 56(%rax)
; CHECK64-NEXT: movq %rcx, 48(%rax)
; CHECK64-NEXT: movq %r8, 40(%rax)
; CHECK64-NEXT: movq %r9, 32(%rax)
; CHECK64-NEXT: movq %rbx, 24(%rax)
; CHECK64-NEXT: movq %r11, 16(%rax)
; CHECK64-NEXT: movq %rdi, 8(%rax)
; CHECK64-NEXT: movq %r10, (%rax)
; CHECK64-NEXT: movw %si, 64(%rax)
; CHECK64-NEXT: popq %rbx
; CHECK64-NEXT: retq
%Z = call i528 @llvm.bswap.i528(i528 %A)
ret i528 %Z
}
declare i528 @llvm.bswap.i528(i528)