[LegalizeTypes] Prevent an assertion from PromoteIntRes_BSWAP and PromoteIntRes_BITREVERSE if the shift amount is too large for the VT returned by getShiftAmountTy

Summary:
getShiftAmountTy for X86 returns MVT::i8. If a BSWAP or BITREVERSE is created that requires promotion and the difference between the original VT and the promoted VT is more than 255 then we won't able to create the constant.

This patch adds a check to replace the result from getShiftAmountTy to MVT::i32 if the difference won't fit. This should get legalized later when the shift is ultimately expanded since its clearly an illegal type that we're only promoting to make it a power of 2 bit width. Alternatively we could base the decision completely on the largest shift amount the promoted VT could use.

Vectors should be immune here because getShiftAmountTy always returns the incoming VT for vectors. Only the scalar shift amount can be changed by the targets.

Reviewers: eli.friedman, RKSimon, spatel

Reviewed By: RKSimon

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D53232

llvm-svn: 344460
This commit is contained in:
Craig Topper 2018-10-13 17:47:20 +00:00
parent ffde98de21
commit 189e5b4ab6
3 changed files with 788 additions and 8 deletions

View File

@ -311,6 +311,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
CreateStackStoreLoad(InOp, OutVT));
}
// Helper for BSWAP/BITREVERSE promotion to ensure we can fit the shift amount
// in the VT returned by getShiftAmountTy and to return a safe VT if we can't.
static EVT getShiftAmountTyForConstant(unsigned Val, EVT VT,
const TargetLowering &TLI,
SelectionDAG &DAG) {
EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
// If the value won't fit in the prefered type, just use something safe. It
// will be legalized when the shift is expanded.
if ((Log2_32(Val) + 1) > ShiftVT.getScalarSizeInBits())
ShiftVT = MVT::i32;
return ShiftVT;
}
SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
SDValue Op = GetPromotedInteger(N->getOperand(0));
EVT OVT = N->getValueType(0);
@ -318,10 +331,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
SDLoc dl(N);
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
return DAG.getNode(
ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
DAG.getConstant(DiffBits, dl,
TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG);
return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
DAG.getConstant(DiffBits, dl, ShiftVT));
}
SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
@ -331,10 +343,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
SDLoc dl(N);
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
return DAG.getNode(
ISD::SRL, dl, NVT, DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
DAG.getConstant(DiffBits, dl,
TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG);
return DAG.getNode(ISD::SRL, dl, NVT,
DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
DAG.getConstant(DiffBits, dl, ShiftVT));
}
SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {

View File

@ -523,3 +523,621 @@ define <2 x i16> @undef_v2i16() {
%b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> undef)
ret <2 x i16> %b
}
; Make sure we don't assert during type legalization promoting a large
; bitreverse due to the need for a large shift that won't fit in the i8 returned
; from getShiftAmountTy.
define i528 @large_promotion(i528 %A) nounwind {
; X86-LABEL: large_promotion:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $56, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: bswapl %ebx
; X86-NEXT: movl %ebx, %ebp
; X86-NEXT: andl $252645135, %ebp # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ebp
; X86-NEXT: andl $-252645136, %ebx # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %ebx
; X86-NEXT: orl %ebp, %ebx
; X86-NEXT: movl %ebx, %ebp
; X86-NEXT: andl $858993459, %ebp # imm = 0x33333333
; X86-NEXT: andl $-858993460, %ebx # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %ebx
; X86-NEXT: leal (%ebx,%ebp,4), %ebx
; X86-NEXT: movl %ebx, %ebp
; X86-NEXT: andl $1431633920, %ebp # imm = 0x55550000
; X86-NEXT: andl $-1431699456, %ebx # imm = 0xAAAA0000
; X86-NEXT: shrl %ebx
; X86-NEXT: leal (%ebx,%ebp,2), %ebx
; X86-NEXT: movl %ebx, (%esp) # 4-byte Spill
; X86-NEXT: bswapl %edi
; X86-NEXT: movl %edi, %ebx
; X86-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ebx
; X86-NEXT: andl $-252645136, %edi # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %edi
; X86-NEXT: orl %ebx, %edi
; X86-NEXT: movl %edi, %ebx
; X86-NEXT: andl $858993459, %ebx # imm = 0x33333333
; X86-NEXT: andl $-858993460, %edi # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %edi
; X86-NEXT: leal (%edi,%ebx,4), %edi
; X86-NEXT: movl %edi, %ebx
; X86-NEXT: andl $1431655765, %ebx # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %edi # imm = 0xAAAAAAAA
; X86-NEXT: shrl %edi
; X86-NEXT: leal (%edi,%ebx,2), %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: bswapl %esi
; X86-NEXT: movl %esi, %edi
; X86-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %edi
; X86-NEXT: andl $-252645136, %esi # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %esi
; X86-NEXT: orl %edi, %esi
; X86-NEXT: movl %esi, %edi
; X86-NEXT: andl $858993459, %edi # imm = 0x33333333
; X86-NEXT: andl $-858993460, %esi # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %esi
; X86-NEXT: leal (%esi,%edi,4), %esi
; X86-NEXT: movl %esi, %edi
; X86-NEXT: andl $1431655765, %edi # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %esi # imm = 0xAAAAAAAA
; X86-NEXT: shrl %esi
; X86-NEXT: leal (%esi,%edi,2), %ebx
; X86-NEXT: bswapl %edx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %esi
; X86-NEXT: andl $-252645136, %edx # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %edx
; X86-NEXT: orl %esi, %edx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: andl $858993459, %esi # imm = 0x33333333
; X86-NEXT: andl $-858993460, %edx # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %edx
; X86-NEXT: leal (%edx,%esi,4), %edx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: andl $1431655765, %esi # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %edx # imm = 0xAAAAAAAA
; X86-NEXT: shrl %edx
; X86-NEXT: leal (%edx,%esi,2), %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: bswapl %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %edx
; X86-NEXT: andl $-252645136, %ecx # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %ecx
; X86-NEXT: orl %edx, %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: andl $858993459, %edx # imm = 0x33333333
; X86-NEXT: andl $-858993460, %ecx # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %ecx
; X86-NEXT: leal (%ecx,%edx,4), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %ecx # imm = 0xAAAAAAAA
; X86-NEXT: shrl %ecx
; X86-NEXT: leal (%ecx,%edx,2), %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: bswapl %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ecx
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %eax
; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
; X86-NEXT: shrl %eax
; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bswapl %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ecx
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %eax
; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
; X86-NEXT: shrl %eax
; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bswapl %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ecx
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %eax
; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
; X86-NEXT: shrl %eax
; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bswapl %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ecx
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %eax
; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
; X86-NEXT: shrl %eax
; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bswapl %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ecx
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %eax
; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
; X86-NEXT: shrl %eax
; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bswapl %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ecx
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %eax
; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
; X86-NEXT: shrl %eax
; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bswapl %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ecx
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %eax
; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
; X86-NEXT: shrl %eax
; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bswapl %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ecx
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %eax
; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
; X86-NEXT: shrl %eax
; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bswapl %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ecx
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %eax
; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
; X86-NEXT: shrl %eax
; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bswapl %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ecx
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %eax
; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
; X86-NEXT: shrl %eax
; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bswapl %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ecx
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %eax
; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
; X86-NEXT: shrl %eax
; X86-NEXT: leal (%eax,%ecx,2), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bswapl %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
; X86-NEXT: shll $4, %ecx
; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0
; X86-NEXT: shrl $4, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC
; X86-NEXT: shrl $2, %eax
; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA
; X86-NEXT: shrl %eax
; X86-NEXT: leal (%eax,%ecx,2), %edx
; X86-NEXT: movl (%esp), %esi # 4-byte Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: shrdl $16, %eax, %esi
; X86-NEXT: shrdl $16, %ebx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: shrdl $16, %ecx, %ebx
; X86-NEXT: movl %ebx, (%esp) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: shrdl $16, %eax, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: shrdl $16, %ecx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: shrdl $16, %eax, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: shrdl $16, %ecx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: shrdl $16, %eax, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: shrdl $16, %ecx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: shrdl $16, %eax, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: shrdl $16, %ecx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X86-NEXT: shrdl $16, %ebp, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-NEXT: shrdl $16, %ebx, %ebp
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: shrdl $16, %eax, %ebx
; X86-NEXT: shrdl $16, %edi, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: shrdl $16, %edx, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %edi, 60(%eax)
; X86-NEXT: movl %ecx, 56(%eax)
; X86-NEXT: movl %ebx, 52(%eax)
; X86-NEXT: movl %ebp, 48(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 44(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 40(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 36(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 32(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 28(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 24(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 20(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 16(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 12(%eax)
; X86-NEXT: movl (%esp), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 4(%eax)
; X86-NEXT: movl %esi, (%eax)
; X86-NEXT: shrl $16, %edx
; X86-NEXT: movw %dx, 64(%eax)
; X86-NEXT: addl $56, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; X64-LABEL: large_promotion:
; X64: # %bb.0:
; X64-NEXT: pushq %rbp
; X64-NEXT: pushq %r15
; X64-NEXT: pushq %r14
; X64-NEXT: pushq %r13
; X64-NEXT: pushq %r12
; X64-NEXT: pushq %rbx
; X64-NEXT: movq %rdi, %r12
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx
; X64-NEXT: bswapq %rbx
; X64-NEXT: movabsq $1085102592571150095, %r13 # imm = 0xF0F0F0F0F0F0F0F
; X64-NEXT: movq %rbx, %r10
; X64-NEXT: andq %r13, %r10
; X64-NEXT: shlq $4, %r10
; X64-NEXT: movabsq $-1085102592571150096, %rax # imm = 0xF0F0F0F0F0F0F0F0
; X64-NEXT: andq %rax, %rbx
; X64-NEXT: shrq $4, %rbx
; X64-NEXT: orq %r10, %rbx
; X64-NEXT: movabsq $3689348814741910323, %r11 # imm = 0x3333333333333333
; X64-NEXT: movq %rbx, %r10
; X64-NEXT: andq %r11, %r10
; X64-NEXT: movabsq $-3689348814741910324, %r14 # imm = 0xCCCCCCCCCCCCCCCC
; X64-NEXT: andq %r14, %rbx
; X64-NEXT: shrq $2, %rbx
; X64-NEXT: leaq (%rbx,%r10,4), %r10
; X64-NEXT: movabsq $6148820866244280320, %rbx # imm = 0x5555000000000000
; X64-NEXT: andq %r10, %rbx
; X64-NEXT: movabsq $-6149102341220990976, %rdi # imm = 0xAAAA000000000000
; X64-NEXT: andq %r10, %rdi
; X64-NEXT: shrq %rdi
; X64-NEXT: leaq (%rdi,%rbx,2), %rdi
; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: bswapq %rbp
; X64-NEXT: movq %rbp, %rdi
; X64-NEXT: andq %r13, %rdi
; X64-NEXT: shlq $4, %rdi
; X64-NEXT: andq %rax, %rbp
; X64-NEXT: shrq $4, %rbp
; X64-NEXT: orq %rdi, %rbp
; X64-NEXT: movq %rbp, %rdi
; X64-NEXT: andq %r11, %rdi
; X64-NEXT: andq %r14, %rbp
; X64-NEXT: shrq $2, %rbp
; X64-NEXT: leaq (%rbp,%rdi,4), %rbp
; X64-NEXT: movabsq $6148914691236517205, %rbx # imm = 0x5555555555555555
; X64-NEXT: movq %rbp, %r10
; X64-NEXT: andq %rbx, %r10
; X64-NEXT: movabsq $-6148914691236517206, %rdi # imm = 0xAAAAAAAAAAAAAAAA
; X64-NEXT: andq %rdi, %rbp
; X64-NEXT: shrq %rbp
; X64-NEXT: leaq (%rbp,%r10,2), %rbp
; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp
; X64-NEXT: bswapq %rbp
; X64-NEXT: movq %rbp, %r10
; X64-NEXT: andq %r13, %r10
; X64-NEXT: shlq $4, %r10
; X64-NEXT: andq %rax, %rbp
; X64-NEXT: movq %rax, %r15
; X64-NEXT: shrq $4, %rbp
; X64-NEXT: orq %r10, %rbp
; X64-NEXT: movq %rbp, %r10
; X64-NEXT: andq %r11, %r10
; X64-NEXT: andq %r14, %rbp
; X64-NEXT: shrq $2, %rbp
; X64-NEXT: leaq (%rbp,%r10,4), %rbp
; X64-NEXT: movq %rbp, %r10
; X64-NEXT: andq %rbx, %r10
; X64-NEXT: andq %rdi, %rbp
; X64-NEXT: shrq %rbp
; X64-NEXT: leaq (%rbp,%r10,2), %rbp
; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10
; X64-NEXT: bswapq %r10
; X64-NEXT: movq %r10, %rax
; X64-NEXT: andq %r13, %rax
; X64-NEXT: shlq $4, %rax
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: andq %r15, %r10
; X64-NEXT: shrq $4, %r10
; X64-NEXT: orq %rax, %r10
; X64-NEXT: movq %r10, %rax
; X64-NEXT: andq %r11, %rax
; X64-NEXT: andq %r14, %r10
; X64-NEXT: shrq $2, %r10
; X64-NEXT: leaq (%r10,%rax,4), %rax
; X64-NEXT: movq %rax, %r10
; X64-NEXT: andq %rbx, %r10
; X64-NEXT: movabsq $-6148914691236517206, %r15 # imm = 0xAAAAAAAAAAAAAAAA
; X64-NEXT: andq %r15, %rax
; X64-NEXT: shrq %rax
; X64-NEXT: leaq (%rax,%r10,2), %r10
; X64-NEXT: bswapq %r9
; X64-NEXT: movq %r9, %rax
; X64-NEXT: andq %r13, %rax
; X64-NEXT: shlq $4, %rax
; X64-NEXT: andq %rdi, %r9
; X64-NEXT: shrq $4, %r9
; X64-NEXT: orq %rax, %r9
; X64-NEXT: movq %r9, %rax
; X64-NEXT: andq %r11, %rax
; X64-NEXT: andq %r14, %r9
; X64-NEXT: shrq $2, %r9
; X64-NEXT: leaq (%r9,%rax,4), %rax
; X64-NEXT: movq %rax, %r9
; X64-NEXT: andq %rbx, %r9
; X64-NEXT: andq %r15, %rax
; X64-NEXT: shrq %rax
; X64-NEXT: leaq (%rax,%r9,2), %r9
; X64-NEXT: bswapq %r8
; X64-NEXT: movq %r8, %rax
; X64-NEXT: andq %r13, %rax
; X64-NEXT: shlq $4, %rax
; X64-NEXT: andq %rdi, %r8
; X64-NEXT: shrq $4, %r8
; X64-NEXT: orq %rax, %r8
; X64-NEXT: movq %r8, %rax
; X64-NEXT: andq %r11, %rax
; X64-NEXT: andq %r14, %r8
; X64-NEXT: shrq $2, %r8
; X64-NEXT: leaq (%r8,%rax,4), %rax
; X64-NEXT: movq %rax, %r8
; X64-NEXT: andq %rbx, %r8
; X64-NEXT: andq %r15, %rax
; X64-NEXT: shrq %rax
; X64-NEXT: leaq (%rax,%r8,2), %r8
; X64-NEXT: bswapq %rcx
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: andq %r13, %rax
; X64-NEXT: shlq $4, %rax
; X64-NEXT: andq %rdi, %rcx
; X64-NEXT: shrq $4, %rcx
; X64-NEXT: orq %rax, %rcx
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: andq %r11, %rax
; X64-NEXT: andq %r14, %rcx
; X64-NEXT: shrq $2, %rcx
; X64-NEXT: leaq (%rcx,%rax,4), %rax
; X64-NEXT: movq %rax, %rcx
; X64-NEXT: andq %rbx, %rcx
; X64-NEXT: andq %r15, %rax
; X64-NEXT: shrq %rax
; X64-NEXT: leaq (%rax,%rcx,2), %rcx
; X64-NEXT: bswapq %rdx
; X64-NEXT: movq %rdx, %rax
; X64-NEXT: andq %r13, %rax
; X64-NEXT: shlq $4, %rax
; X64-NEXT: andq %rdi, %rdx
; X64-NEXT: shrq $4, %rdx
; X64-NEXT: orq %rax, %rdx
; X64-NEXT: movq %rdx, %rax
; X64-NEXT: andq %r11, %rax
; X64-NEXT: andq %r14, %rdx
; X64-NEXT: shrq $2, %rdx
; X64-NEXT: leaq (%rdx,%rax,4), %rax
; X64-NEXT: movq %rax, %rdx
; X64-NEXT: andq %rbx, %rdx
; X64-NEXT: andq %r15, %rax
; X64-NEXT: shrq %rax
; X64-NEXT: leaq (%rax,%rdx,2), %rax
; X64-NEXT: bswapq %rsi
; X64-NEXT: andq %rsi, %r13
; X64-NEXT: andq %rdi, %rsi
; X64-NEXT: shlq $4, %r13
; X64-NEXT: shrq $4, %rsi
; X64-NEXT: orq %r13, %rsi
; X64-NEXT: andq %rsi, %r11
; X64-NEXT: andq %r14, %rsi
; X64-NEXT: shrq $2, %rsi
; X64-NEXT: leaq (%rsi,%r11,4), %rdx
; X64-NEXT: andq %rdx, %rbx
; X64-NEXT: andq %r15, %rdx
; X64-NEXT: shrq %rdx
; X64-NEXT: leaq (%rdx,%rbx,2), %rdx
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
; X64-NEXT: shrdq $48, %rdi, %rsi
; X64-NEXT: shrdq $48, %rbp, %rdi
; X64-NEXT: shrdq $48, %r10, %rbp
; X64-NEXT: shrdq $48, %r9, %r10
; X64-NEXT: shrdq $48, %r8, %r9
; X64-NEXT: shrdq $48, %rcx, %r8
; X64-NEXT: shrdq $48, %rax, %rcx
; X64-NEXT: shrdq $48, %rdx, %rax
; X64-NEXT: movq %rax, 56(%r12)
; X64-NEXT: movq %rcx, 48(%r12)
; X64-NEXT: movq %r8, 40(%r12)
; X64-NEXT: movq %r9, 32(%r12)
; X64-NEXT: movq %r10, 24(%r12)
; X64-NEXT: movq %rbp, 16(%r12)
; X64-NEXT: movq %rdi, 8(%r12)
; X64-NEXT: movq %rsi, (%r12)
; X64-NEXT: shrq $48, %rdx
; X64-NEXT: movw %dx, 64(%r12)
; X64-NEXT: movq %r12, %rax
; X64-NEXT: popq %rbx
; X64-NEXT: popq %r12
; X64-NEXT: popq %r13
; X64-NEXT: popq %r14
; X64-NEXT: popq %r15
; X64-NEXT: popq %rbp
; X64-NEXT: retq
%Z = call i528 @llvm.bitreverse.i528(i528 %A)
ret i528 %Z
}
declare i528 @llvm.bitreverse.i528(i528)

View File

@ -206,3 +206,153 @@ define i64 @finally_useful_bswap() {
ret i64 %swapped
}
; Make sure we don't assert during type legalization promoting a large
; bswap due to the need for a large shift that won't fit in the i8 returned
; from getShiftAmountTy.
define i528 @large_promotion(i528 %A) nounwind {
; CHECK-LABEL: large_promotion:
; CHECK: # %bb.0:
; CHECK-NEXT: pushl %ebp
; CHECK-NEXT: pushl %ebx
; CHECK-NEXT: pushl %edi
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: subl $44, %esp
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: bswapl %eax
; CHECK-NEXT: bswapl %ecx
; CHECK-NEXT: shrdl $16, %ecx, %eax
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: bswapl %edx
; CHECK-NEXT: shrdl $16, %edx, %ecx
; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: bswapl %esi
; CHECK-NEXT: shrdl $16, %esi, %edx
; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: bswapl %edi
; CHECK-NEXT: shrdl $16, %edi, %esi
; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: bswapl %ebx
; CHECK-NEXT: shrdl $16, %ebx, %edi
; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: bswapl %ebp
; CHECK-NEXT: shrdl $16, %ebp, %ebx
; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: bswapl %ecx
; CHECK-NEXT: shrdl $16, %ecx, %ebp
; CHECK-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: bswapl %eax
; CHECK-NEXT: shrdl $16, %eax, %ecx
; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: bswapl %ecx
; CHECK-NEXT: shrdl $16, %ecx, %eax
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: bswapl %eax
; CHECK-NEXT: shrdl $16, %eax, %ecx
; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
; CHECK-NEXT: bswapl %ebp
; CHECK-NEXT: shrdl $16, %ebp, %eax
; CHECK-NEXT: movl %eax, (%esp) # 4-byte Spill
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx
; CHECK-NEXT: bswapl %ebx
; CHECK-NEXT: shrdl $16, %ebx, %ebp
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
; CHECK-NEXT: bswapl %esi
; CHECK-NEXT: shrdl $16, %esi, %ebx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK-NEXT: bswapl %edx
; CHECK-NEXT: shrdl $16, %edx, %esi
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: bswapl %ecx
; CHECK-NEXT: shrdl $16, %ecx, %edx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
; CHECK-NEXT: bswapl %edi
; CHECK-NEXT: shrdl $16, %edi, %ecx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl %ecx, 60(%eax)
; CHECK-NEXT: movl %edx, 56(%eax)
; CHECK-NEXT: movl %esi, 52(%eax)
; CHECK-NEXT: movl %ebx, 48(%eax)
; CHECK-NEXT: movl %ebp, 44(%eax)
; CHECK-NEXT: movl (%esp), %ecx # 4-byte Reload
; CHECK-NEXT: movl %ecx, 40(%eax)
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT: movl %ecx, 36(%eax)
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT: movl %ecx, 32(%eax)
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT: movl %ecx, 28(%eax)
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT: movl %ecx, 24(%eax)
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT: movl %ecx, 20(%eax)
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT: movl %ecx, 16(%eax)
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT: movl %ecx, 12(%eax)
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT: movl %ecx, 8(%eax)
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT: movl %ecx, 4(%eax)
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT: movl %ecx, (%eax)
; CHECK-NEXT: shrl $16, %edi
; CHECK-NEXT: movw %di, 64(%eax)
; CHECK-NEXT: addl $44, %esp
; CHECK-NEXT: popl %esi
; CHECK-NEXT: popl %edi
; CHECK-NEXT: popl %ebx
; CHECK-NEXT: popl %ebp
; CHECK-NEXT: retl $4
;
; CHECK64-LABEL: large_promotion:
; CHECK64: # %bb.0:
; CHECK64-NEXT: pushq %rbx
; CHECK64-NEXT: movq %rdi, %rax
; CHECK64-NEXT: movq {{[0-9]+}}(%rsp), %rbx
; CHECK64-NEXT: movq {{[0-9]+}}(%rsp), %r11
; CHECK64-NEXT: movq {{[0-9]+}}(%rsp), %rdi
; CHECK64-NEXT: movq {{[0-9]+}}(%rsp), %r10
; CHECK64-NEXT: bswapq %r10
; CHECK64-NEXT: bswapq %rdi
; CHECK64-NEXT: shrdq $48, %rdi, %r10
; CHECK64-NEXT: bswapq %r11
; CHECK64-NEXT: shrdq $48, %r11, %rdi
; CHECK64-NEXT: bswapq %rbx
; CHECK64-NEXT: shrdq $48, %rbx, %r11
; CHECK64-NEXT: bswapq %r9
; CHECK64-NEXT: shrdq $48, %r9, %rbx
; CHECK64-NEXT: bswapq %r8
; CHECK64-NEXT: shrdq $48, %r8, %r9
; CHECK64-NEXT: bswapq %rcx
; CHECK64-NEXT: shrdq $48, %rcx, %r8
; CHECK64-NEXT: bswapq %rdx
; CHECK64-NEXT: shrdq $48, %rdx, %rcx
; CHECK64-NEXT: bswapq %rsi
; CHECK64-NEXT: shrdq $48, %rsi, %rdx
; CHECK64-NEXT: shrq $48, %rsi
; CHECK64-NEXT: movq %rdx, 56(%rax)
; CHECK64-NEXT: movq %rcx, 48(%rax)
; CHECK64-NEXT: movq %r8, 40(%rax)
; CHECK64-NEXT: movq %r9, 32(%rax)
; CHECK64-NEXT: movq %rbx, 24(%rax)
; CHECK64-NEXT: movq %r11, 16(%rax)
; CHECK64-NEXT: movq %rdi, 8(%rax)
; CHECK64-NEXT: movq %r10, (%rax)
; CHECK64-NEXT: movw %si, 64(%rax)
; CHECK64-NEXT: popq %rbx
; CHECK64-NEXT: retq
%Z = call i528 @llvm.bswap.i528(i528 %A)
ret i528 %Z
}
declare i528 @llvm.bswap.i528(i528)