forked from OSchip/llvm-project
[DAGCombiner] Allow zextended load combines.
Summary: or(zext(load8(base)), zext(load8(base+1)) -> zext(load16 base) Reviewers: apilipenko, RKSimon Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70487
This commit is contained in:
parent
b30dabfe90
commit
036790f988
|
@ -6750,12 +6750,6 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
|
||||||
return SDValue();
|
return SDValue();
|
||||||
unsigned ByteWidth = VT.getSizeInBits() / 8;
|
unsigned ByteWidth = VT.getSizeInBits() / 8;
|
||||||
|
|
||||||
// Before legalize we can introduce too wide illegal loads which will be later
|
|
||||||
// split into legal sized loads. This enables us to combine i64 load by i8
|
|
||||||
// patterns to a couple of i32 loads on 32 bit targets.
|
|
||||||
if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
|
|
||||||
return SDValue();
|
|
||||||
|
|
||||||
bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
|
bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
|
||||||
auto MemoryByteOffset = [&] (ByteProvider P) {
|
auto MemoryByteOffset = [&] (ByteProvider P) {
|
||||||
assert(P.isMemory() && "Must be a memory byte provider");
|
assert(P.isMemory() && "Must be a memory byte provider");
|
||||||
|
@ -6778,11 +6772,21 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
|
||||||
// Check if all the bytes of the OR we are looking at are loaded from the same
|
// Check if all the bytes of the OR we are looking at are loaded from the same
|
||||||
// base address. Collect bytes offsets from Base address in ByteOffsets.
|
// base address. Collect bytes offsets from Base address in ByteOffsets.
|
||||||
SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
|
SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
|
||||||
for (unsigned i = 0; i < ByteWidth; i++) {
|
unsigned ZeroExtendedBytes = 0;
|
||||||
|
for (int i = ByteWidth - 1; i >= 0; --i) {
|
||||||
auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
|
auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
|
||||||
if (!P || !P->isMemory()) // All the bytes must be loaded from memory
|
if (!P)
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
|
if (P->isConstantZero()) {
|
||||||
|
// It's OK for the N most significant bytes to be 0, we can just
|
||||||
|
// zero-extend the load.
|
||||||
|
if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
|
||||||
|
return SDValue();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
assert(P->isMemory() && "provenance should either be memory or zero");
|
||||||
|
|
||||||
LoadSDNode *L = P->Load;
|
LoadSDNode *L = P->Load;
|
||||||
assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
|
assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
|
||||||
!L->isIndexed() &&
|
!L->isIndexed() &&
|
||||||
|
@ -6821,9 +6825,23 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
|
||||||
assert(Base && "Base address of the accessed memory location must be set");
|
assert(Base && "Base address of the accessed memory location must be set");
|
||||||
assert(FirstOffset != INT64_MAX && "First byte offset must be set");
|
assert(FirstOffset != INT64_MAX && "First byte offset must be set");
|
||||||
|
|
||||||
|
bool NeedsZext = ZeroExtendedBytes > 0;
|
||||||
|
|
||||||
|
EVT MemVT =
|
||||||
|
EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
|
||||||
|
|
||||||
|
// Before legalize we can introduce too wide illegal loads which will be later
|
||||||
|
// split into legal sized loads. This enables us to combine i64 load by i8
|
||||||
|
// patterns to a couple of i32 loads on 32 bit targets.
|
||||||
|
if (LegalOperations &&
|
||||||
|
!TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
|
||||||
|
MemVT))
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
// Check if the bytes of the OR we are looking at match with either big or
|
// Check if the bytes of the OR we are looking at match with either big or
|
||||||
// little endian value load
|
// little endian value load
|
||||||
Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
|
Optional<bool> IsBigEndian = isBigEndian(
|
||||||
|
makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
|
||||||
if (!IsBigEndian.hasValue())
|
if (!IsBigEndian.hasValue())
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
|
@ -6836,7 +6854,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
|
||||||
LoadSDNode *FirstLoad = FirstByteProvider->Load;
|
LoadSDNode *FirstLoad = FirstByteProvider->Load;
|
||||||
|
|
||||||
// The node we are looking at matches with the pattern, check if we can
|
// The node we are looking at matches with the pattern, check if we can
|
||||||
// replace it with a single load and bswap if needed.
|
// replace it with a single (possibly zero-extended) load and bswap + shift if
|
||||||
|
// needed.
|
||||||
|
|
||||||
// If the load needs byte swap check if the target supports it
|
// If the load needs byte swap check if the target supports it
|
||||||
bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
|
bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
|
||||||
|
@ -6844,25 +6863,45 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
|
||||||
// Before legalize we can introduce illegal bswaps which will be later
|
// Before legalize we can introduce illegal bswaps which will be later
|
||||||
// converted to an explicit bswap sequence. This way we end up with a single
|
// converted to an explicit bswap sequence. This way we end up with a single
|
||||||
// load and byte shuffling instead of several loads and byte shuffling.
|
// load and byte shuffling instead of several loads and byte shuffling.
|
||||||
if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
|
// We do not introduce illegal bswaps when zero-extending as this tends to
|
||||||
|
// introduce too many arithmetic instructions.
|
||||||
|
if (NeedsBswap && (LegalOperations || NeedsZext) &&
|
||||||
|
!TLI.isOperationLegal(ISD::BSWAP, VT))
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
// If we need to bswap and zero extend, we have to insert a shift. Check that
|
||||||
|
// it is legal.
|
||||||
|
if (NeedsBswap && NeedsZext && LegalOperations &&
|
||||||
|
!TLI.isOperationLegal(ISD::SHL, VT))
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
// Check that a load of the wide type is both allowed and fast on the target
|
// Check that a load of the wide type is both allowed and fast on the target
|
||||||
bool Fast = false;
|
bool Fast = false;
|
||||||
bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
|
bool Allowed =
|
||||||
VT, *FirstLoad->getMemOperand(), &Fast);
|
TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
|
||||||
|
*FirstLoad->getMemOperand(), &Fast);
|
||||||
if (!Allowed || !Fast)
|
if (!Allowed || !Fast)
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
SDValue NewLoad =
|
SDValue NewLoad = DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
|
||||||
DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
|
SDLoc(N), VT, Chain, FirstLoad->getBasePtr(),
|
||||||
FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
|
FirstLoad->getPointerInfo(), MemVT,
|
||||||
|
FirstLoad->getAlignment());
|
||||||
|
|
||||||
// Transfer chain users from old loads to the new load.
|
// Transfer chain users from old loads to the new load.
|
||||||
for (LoadSDNode *L : Loads)
|
for (LoadSDNode *L : Loads)
|
||||||
DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
|
DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
|
||||||
|
|
||||||
return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
|
if (!NeedsBswap)
|
||||||
|
return NewLoad;
|
||||||
|
|
||||||
|
SDValue ShiftedLoad =
|
||||||
|
NeedsZext
|
||||||
|
? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
|
||||||
|
DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
|
||||||
|
SDLoc(N), LegalOperations))
|
||||||
|
: NewLoad;
|
||||||
|
return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the target has andn, bsl, or a similar bit-select instruction,
|
// If the target has andn, bsl, or a similar bit-select instruction,
|
||||||
|
|
|
@ -445,10 +445,9 @@ define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
|
||||||
define i32 @zext_load_i32_by_i8(i32* %arg) {
|
define i32 @zext_load_i32_by_i8(i32* %arg) {
|
||||||
; CHECK-LABEL: zext_load_i32_by_i8:
|
; CHECK-LABEL: zext_load_i32_by_i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ldrb w8, [x0]
|
; CHECK-NEXT: ldrh w8, [x0]
|
||||||
; CHECK-NEXT: ldrb w9, [x0, #1]
|
; CHECK-NEXT: lsl w8, w8, #16
|
||||||
; CHECK-NEXT: bfi w8, w9, #8, #8
|
; CHECK-NEXT: rev w0, w8
|
||||||
; CHECK-NEXT: mov w0, w8
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
|
|
||||||
%tmp = bitcast i32* %arg to i8*
|
%tmp = bitcast i32* %arg to i8*
|
||||||
|
@ -515,10 +514,7 @@ define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) {
|
||||||
define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
|
define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
|
||||||
; CHECK-LABEL: zext_load_i32_by_i8_bswap:
|
; CHECK-LABEL: zext_load_i32_by_i8_bswap:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ldrb w8, [x0, #1]
|
; CHECK-NEXT: ldrh w0, [x0]
|
||||||
; CHECK-NEXT: ldrb w9, [x0]
|
|
||||||
; CHECK-NEXT: bfi w8, w9, #8, #8
|
|
||||||
; CHECK-NEXT: mov w0, w8
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
|
|
||||||
%tmp = bitcast i32* %arg to i8*
|
%tmp = bitcast i32* %arg to i8*
|
||||||
|
|
|
@ -431,10 +431,7 @@ define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
|
||||||
define i32 @zext_load_i32_by_i8(i32* %arg) {
|
define i32 @zext_load_i32_by_i8(i32* %arg) {
|
||||||
; CHECK-LABEL: zext_load_i32_by_i8:
|
; CHECK-LABEL: zext_load_i32_by_i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ldrb w8, [x0]
|
; CHECK-NEXT: ldrh w0, [x0]
|
||||||
; CHECK-NEXT: ldrb w9, [x0, #1]
|
|
||||||
; CHECK-NEXT: bfi w8, w9, #8, #8
|
|
||||||
; CHECK-NEXT: mov w0, w8
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
|
|
||||||
%tmp = bitcast i32* %arg to i8*
|
%tmp = bitcast i32* %arg to i8*
|
||||||
|
@ -501,10 +498,9 @@ define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) {
|
||||||
define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
|
define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
|
||||||
; CHECK-LABEL: zext_load_i32_by_i8_bswap:
|
; CHECK-LABEL: zext_load_i32_by_i8_bswap:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ldrb w8, [x0, #1]
|
; CHECK-NEXT: ldrh w8, [x0]
|
||||||
; CHECK-NEXT: ldrb w9, [x0]
|
; CHECK-NEXT: lsl w8, w8, #16
|
||||||
; CHECK-NEXT: bfi w8, w9, #8, #8
|
; CHECK-NEXT: rev w0, w8
|
||||||
; CHECK-NEXT: mov w0, w8
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
|
|
||||||
%tmp = bitcast i32* %arg to i8*
|
%tmp = bitcast i32* %arg to i8*
|
||||||
|
|
|
@ -824,25 +824,23 @@ define i32 @zext_load_i32_by_i8(i32* %arg) {
|
||||||
;
|
;
|
||||||
; CHECK-ARMv6-LABEL: zext_load_i32_by_i8:
|
; CHECK-ARMv6-LABEL: zext_load_i32_by_i8:
|
||||||
; CHECK-ARMv6: @ %bb.0:
|
; CHECK-ARMv6: @ %bb.0:
|
||||||
; CHECK-ARMv6-NEXT: ldrb r1, [r0]
|
; CHECK-ARMv6-NEXT: ldrh r0, [r0]
|
||||||
; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1]
|
; CHECK-ARMv6-NEXT: lsl r0, r0, #16
|
||||||
; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #8
|
; CHECK-ARMv6-NEXT: rev r0, r0
|
||||||
; CHECK-ARMv6-NEXT: bx lr
|
; CHECK-ARMv6-NEXT: bx lr
|
||||||
;
|
;
|
||||||
; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8:
|
; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8:
|
||||||
; CHECK-THUMBv6: @ %bb.0:
|
; CHECK-THUMBv6: @ %bb.0:
|
||||||
; CHECK-THUMBv6-NEXT: ldrb r1, [r0]
|
; CHECK-THUMBv6-NEXT: ldrh r0, [r0]
|
||||||
; CHECK-THUMBv6-NEXT: ldrb r0, [r0, #1]
|
; CHECK-THUMBv6-NEXT: lsls r0, r0, #16
|
||||||
; CHECK-THUMBv6-NEXT: lsls r0, r0, #8
|
; CHECK-THUMBv6-NEXT: rev r0, r0
|
||||||
; CHECK-THUMBv6-NEXT: adds r0, r0, r1
|
|
||||||
; CHECK-THUMBv6-NEXT: bx lr
|
; CHECK-THUMBv6-NEXT: bx lr
|
||||||
;
|
;
|
||||||
; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8:
|
; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8:
|
||||||
; CHECK-THUMBv7: @ %bb.0:
|
; CHECK-THUMBv7: @ %bb.0:
|
||||||
; CHECK-THUMBv7-NEXT: ldrb r1, [r0]
|
; CHECK-THUMBv7-NEXT: ldrh r0, [r0]
|
||||||
; CHECK-THUMBv7-NEXT: ldrb r0, [r0, #1]
|
; CHECK-THUMBv7-NEXT: lsls r0, r0, #16
|
||||||
; CHECK-THUMBv7-NEXT: lsls r0, r0, #8
|
; CHECK-THUMBv7-NEXT: rev r0, r0
|
||||||
; CHECK-THUMBv7-NEXT: adds r0, r0, r1
|
|
||||||
; CHECK-THUMBv7-NEXT: bx lr
|
; CHECK-THUMBv7-NEXT: bx lr
|
||||||
|
|
||||||
%tmp = bitcast i32* %arg to i8*
|
%tmp = bitcast i32* %arg to i8*
|
||||||
|
@ -962,32 +960,22 @@ define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) {
|
||||||
define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
|
define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
|
||||||
; CHECK-LABEL: zext_load_i32_by_i8_bswap:
|
; CHECK-LABEL: zext_load_i32_by_i8_bswap:
|
||||||
; CHECK: @ %bb.0:
|
; CHECK: @ %bb.0:
|
||||||
; CHECK-NEXT: ldrb r1, [r0]
|
; CHECK-NEXT: ldrh r0, [r0]
|
||||||
; CHECK-NEXT: ldrb r0, [r0, #1]
|
|
||||||
; CHECK-NEXT: orr r0, r0, r1, lsl #8
|
|
||||||
; CHECK-NEXT: mov pc, lr
|
; CHECK-NEXT: mov pc, lr
|
||||||
;
|
;
|
||||||
; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap:
|
; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap:
|
||||||
; CHECK-ARMv6: @ %bb.0:
|
; CHECK-ARMv6: @ %bb.0:
|
||||||
; CHECK-ARMv6-NEXT: ldrb r1, [r0]
|
; CHECK-ARMv6-NEXT: ldrh r0, [r0]
|
||||||
; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1]
|
|
||||||
; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #8
|
|
||||||
; CHECK-ARMv6-NEXT: bx lr
|
; CHECK-ARMv6-NEXT: bx lr
|
||||||
;
|
;
|
||||||
; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_bswap:
|
; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_bswap:
|
||||||
; CHECK-THUMBv6: @ %bb.0:
|
; CHECK-THUMBv6: @ %bb.0:
|
||||||
; CHECK-THUMBv6-NEXT: ldrb r1, [r0, #1]
|
; CHECK-THUMBv6-NEXT: ldrh r0, [r0]
|
||||||
; CHECK-THUMBv6-NEXT: ldrb r0, [r0]
|
|
||||||
; CHECK-THUMBv6-NEXT: lsls r0, r0, #8
|
|
||||||
; CHECK-THUMBv6-NEXT: adds r0, r0, r1
|
|
||||||
; CHECK-THUMBv6-NEXT: bx lr
|
; CHECK-THUMBv6-NEXT: bx lr
|
||||||
;
|
;
|
||||||
; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_bswap:
|
; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_bswap:
|
||||||
; CHECK-THUMBv7: @ %bb.0:
|
; CHECK-THUMBv7: @ %bb.0:
|
||||||
; CHECK-THUMBv7-NEXT: ldrb r1, [r0, #1]
|
; CHECK-THUMBv7-NEXT: ldrh r0, [r0]
|
||||||
; CHECK-THUMBv7-NEXT: ldrb r0, [r0]
|
|
||||||
; CHECK-THUMBv7-NEXT: lsls r0, r0, #8
|
|
||||||
; CHECK-THUMBv7-NEXT: adds r0, r0, r1
|
|
||||||
; CHECK-THUMBv7-NEXT: bx lr
|
; CHECK-THUMBv7-NEXT: bx lr
|
||||||
|
|
||||||
%tmp = bitcast i32* %arg to i8*
|
%tmp = bitcast i32* %arg to i8*
|
||||||
|
|
|
@ -734,31 +734,22 @@ define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
|
||||||
define i32 @zext_load_i32_by_i8(i32* %arg) {
|
define i32 @zext_load_i32_by_i8(i32* %arg) {
|
||||||
; CHECK-LABEL: zext_load_i32_by_i8:
|
; CHECK-LABEL: zext_load_i32_by_i8:
|
||||||
; CHECK: @ %bb.0:
|
; CHECK: @ %bb.0:
|
||||||
; CHECK-NEXT: ldrb r1, [r0]
|
; CHECK-NEXT: ldrh r0, [r0]
|
||||||
; CHECK-NEXT: ldrb r0, [r0, #1]
|
|
||||||
; CHECK-NEXT: orr r0, r1, r0, lsl #8
|
|
||||||
; CHECK-NEXT: mov pc, lr
|
; CHECK-NEXT: mov pc, lr
|
||||||
;
|
;
|
||||||
; CHECK-ARMv6-LABEL: zext_load_i32_by_i8:
|
; CHECK-ARMv6-LABEL: zext_load_i32_by_i8:
|
||||||
; CHECK-ARMv6: @ %bb.0:
|
; CHECK-ARMv6: @ %bb.0:
|
||||||
; CHECK-ARMv6-NEXT: ldrb r1, [r0]
|
; CHECK-ARMv6-NEXT: ldrh r0, [r0]
|
||||||
; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1]
|
|
||||||
; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #8
|
|
||||||
; CHECK-ARMv6-NEXT: bx lr
|
; CHECK-ARMv6-NEXT: bx lr
|
||||||
;
|
;
|
||||||
; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8:
|
; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8:
|
||||||
; CHECK-THUMBv6: @ %bb.0:
|
; CHECK-THUMBv6: @ %bb.0:
|
||||||
; CHECK-THUMBv6-NEXT: ldrb r1, [r0]
|
; CHECK-THUMBv6-NEXT: ldrh r0, [r0]
|
||||||
; CHECK-THUMBv6-NEXT: ldrb r0, [r0, #1]
|
|
||||||
; CHECK-THUMBv6-NEXT: lsls r0, r0, #8
|
|
||||||
; CHECK-THUMBv6-NEXT: adds r0, r0, r1
|
|
||||||
; CHECK-THUMBv6-NEXT: bx lr
|
; CHECK-THUMBv6-NEXT: bx lr
|
||||||
;
|
;
|
||||||
; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8:
|
; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8:
|
||||||
; CHECK-THUMBv7: @ %bb.0:
|
; CHECK-THUMBv7: @ %bb.0:
|
||||||
; CHECK-THUMBv7-NEXT: ldrb r1, [r0]
|
; CHECK-THUMBv7-NEXT: ldrh r0, [r0]
|
||||||
; CHECK-THUMBv7-NEXT: ldrb r0, [r0, #1]
|
|
||||||
; CHECK-THUMBv7-NEXT: orr.w r0, r1, r0, lsl #8
|
|
||||||
; CHECK-THUMBv7-NEXT: bx lr
|
; CHECK-THUMBv7-NEXT: bx lr
|
||||||
|
|
||||||
%tmp = bitcast i32* %arg to i8*
|
%tmp = bitcast i32* %arg to i8*
|
||||||
|
@ -883,24 +874,23 @@ define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
|
||||||
;
|
;
|
||||||
; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap:
|
; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap:
|
||||||
; CHECK-ARMv6: @ %bb.0:
|
; CHECK-ARMv6: @ %bb.0:
|
||||||
; CHECK-ARMv6-NEXT: ldrb r1, [r0]
|
; CHECK-ARMv6-NEXT: ldrh r0, [r0]
|
||||||
; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1]
|
; CHECK-ARMv6-NEXT: lsl r0, r0, #16
|
||||||
; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #8
|
; CHECK-ARMv6-NEXT: rev r0, r0
|
||||||
; CHECK-ARMv6-NEXT: bx lr
|
; CHECK-ARMv6-NEXT: bx lr
|
||||||
;
|
;
|
||||||
; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_bswap:
|
; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_bswap:
|
||||||
; CHECK-THUMBv6: @ %bb.0:
|
; CHECK-THUMBv6: @ %bb.0:
|
||||||
; CHECK-THUMBv6-NEXT: ldrb r1, [r0, #1]
|
; CHECK-THUMBv6-NEXT: ldrh r0, [r0]
|
||||||
; CHECK-THUMBv6-NEXT: ldrb r0, [r0]
|
; CHECK-THUMBv6-NEXT: lsls r0, r0, #16
|
||||||
; CHECK-THUMBv6-NEXT: lsls r0, r0, #8
|
; CHECK-THUMBv6-NEXT: rev r0, r0
|
||||||
; CHECK-THUMBv6-NEXT: adds r0, r0, r1
|
|
||||||
; CHECK-THUMBv6-NEXT: bx lr
|
; CHECK-THUMBv6-NEXT: bx lr
|
||||||
;
|
;
|
||||||
; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_bswap:
|
; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_bswap:
|
||||||
; CHECK-THUMBv7: @ %bb.0:
|
; CHECK-THUMBv7: @ %bb.0:
|
||||||
; CHECK-THUMBv7-NEXT: ldrb r1, [r0]
|
; CHECK-THUMBv7-NEXT: ldrh r0, [r0]
|
||||||
; CHECK-THUMBv7-NEXT: ldrb r0, [r0, #1]
|
; CHECK-THUMBv7-NEXT: lsls r0, r0, #16
|
||||||
; CHECK-THUMBv7-NEXT: orr.w r0, r0, r1, lsl #8
|
; CHECK-THUMBv7-NEXT: rev r0, r0
|
||||||
; CHECK-THUMBv7-NEXT: bx lr
|
; CHECK-THUMBv7-NEXT: bx lr
|
||||||
|
|
||||||
%tmp = bitcast i32* %arg to i8*
|
%tmp = bitcast i32* %arg to i8*
|
||||||
|
|
|
@ -1119,18 +1119,12 @@ define i32 @zext_load_i32_by_i8(i32* %arg) {
|
||||||
; CHECK-LABEL: zext_load_i32_by_i8:
|
; CHECK-LABEL: zext_load_i32_by_i8:
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; CHECK-NEXT: movzbl (%eax), %ecx
|
; CHECK-NEXT: movzwl (%eax), %eax
|
||||||
; CHECK-NEXT: movzbl 1(%eax), %eax
|
|
||||||
; CHECK-NEXT: shll $8, %eax
|
|
||||||
; CHECK-NEXT: orl %ecx, %eax
|
|
||||||
; CHECK-NEXT: retl
|
; CHECK-NEXT: retl
|
||||||
;
|
;
|
||||||
; CHECK64-LABEL: zext_load_i32_by_i8:
|
; CHECK64-LABEL: zext_load_i32_by_i8:
|
||||||
; CHECK64: # %bb.0:
|
; CHECK64: # %bb.0:
|
||||||
; CHECK64-NEXT: movzbl (%rdi), %ecx
|
; CHECK64-NEXT: movzwl (%rdi), %eax
|
||||||
; CHECK64-NEXT: movzbl 1(%rdi), %eax
|
|
||||||
; CHECK64-NEXT: shll $8, %eax
|
|
||||||
; CHECK64-NEXT: orl %ecx, %eax
|
|
||||||
; CHECK64-NEXT: retq
|
; CHECK64-NEXT: retq
|
||||||
%tmp = bitcast i32* %arg to i8*
|
%tmp = bitcast i32* %arg to i8*
|
||||||
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
|
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
|
||||||
|
@ -1218,18 +1212,16 @@ define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
|
||||||
; CHECK-LABEL: zext_load_i32_by_i8_bswap:
|
; CHECK-LABEL: zext_load_i32_by_i8_bswap:
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; CHECK-NEXT: movzbl 1(%eax), %ecx
|
; CHECK-NEXT: movzwl (%eax), %eax
|
||||||
; CHECK-NEXT: movzbl (%eax), %eax
|
; CHECK-NEXT: shll $16, %eax
|
||||||
; CHECK-NEXT: shll $8, %eax
|
; CHECK-NEXT: bswapl %eax
|
||||||
; CHECK-NEXT: orl %ecx, %eax
|
|
||||||
; CHECK-NEXT: retl
|
; CHECK-NEXT: retl
|
||||||
;
|
;
|
||||||
; CHECK64-LABEL: zext_load_i32_by_i8_bswap:
|
; CHECK64-LABEL: zext_load_i32_by_i8_bswap:
|
||||||
; CHECK64: # %bb.0:
|
; CHECK64: # %bb.0:
|
||||||
; CHECK64-NEXT: movzbl 1(%rdi), %ecx
|
; CHECK64-NEXT: movzwl (%rdi), %eax
|
||||||
; CHECK64-NEXT: movzbl (%rdi), %eax
|
; CHECK64-NEXT: shll $16, %eax
|
||||||
; CHECK64-NEXT: shll $8, %eax
|
; CHECK64-NEXT: bswapl %eax
|
||||||
; CHECK64-NEXT: orl %ecx, %eax
|
|
||||||
; CHECK64-NEXT: retq
|
; CHECK64-NEXT: retq
|
||||||
%tmp = bitcast i32* %arg to i8*
|
%tmp = bitcast i32* %arg to i8*
|
||||||
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
|
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
|
||||||
|
|
Loading…
Reference in New Issue