forked from OSchip/llvm-project
[DAGCombine] Fix for a load combine bug with non-zero offset patterns on BE targets
This pattern is essentially a i16 load from p+1 address: %p1.i16 = bitcast i8* %p to i16* %p2.i8 = getelementptr i8, i8* %p, i64 2 %v1 = load i16, i16* %p1.i16 %v2.i8 = load i8, i8* %p2.i8 %v2 = zext i8 %v2.i8 to i16 %v1.shl = shl i16 %v1, 8 %res = or i16 %v1.shl, %v2 Current implementation would identify %v1 load as the first byte load and would mistakenly emit a i16 load from %p1.i16 address. This patch adds a check that the first byte is loaded from a non-zero offset of the first load address. This way this address can be used as the base address for the combined value. Otherwise just give up combining. llvm-svn: 296336
This commit is contained in:
parent
4b2b6bfb97
commit
f7196c8d9e
|
@ -4593,6 +4593,10 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
|
||||||
assert((BigEndian != LittleEndian) && "should be either or");
|
assert((BigEndian != LittleEndian) && "should be either or");
|
||||||
assert(FirstByteProvider && "must be set");
|
assert(FirstByteProvider && "must be set");
|
||||||
|
|
||||||
|
// Ensure that the first byte is loaded from zero offset of the first load.
|
||||||
|
// So the combined value can be loaded from the first load address.
|
||||||
|
if (MemoryByteOffset(*FirstByteProvider) != 0)
|
||||||
|
return SDValue();
|
||||||
LoadSDNode *FirstLoad = FirstByteProvider->Load;
|
LoadSDNode *FirstLoad = FirstByteProvider->Load;
|
||||||
|
|
||||||
// The node we are looking at matches with the pattern, check if we can
|
// The node we are looking at matches with the pattern, check if we can
|
||||||
|
|
|
@ -563,3 +563,26 @@ define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) {
|
||||||
%tmp8 = or i32 %tmp7, %tmp30
|
%tmp8 = or i32 %tmp7, %tmp30
|
||||||
ret i32 %tmp8
|
ret i32 %tmp8
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; i8* p;
|
||||||
|
; i16* p1.i16 = (i16*) p;
|
||||||
|
; (p1.i16[0] << 8) | ((i16) p[2])
|
||||||
|
;
|
||||||
|
; This is essentialy a i16 load from p[1], but we don't fold the pattern now
|
||||||
|
; because in the original DAG we don't have p[1] address available
|
||||||
|
define i16 @load_i16_from_nonzero_offset(i8* %p) {
|
||||||
|
; CHECK-LABEL: load_i16_from_nonzero_offset:
|
||||||
|
; CHECK: ldrh w8, [x0]
|
||||||
|
; CHECK-NEXT: ldrb w0, [x0, #2]
|
||||||
|
; CHECK-NEXT: bfi w0, w8, #8, #24
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
|
||||||
|
%p1.i16 = bitcast i8* %p to i16*
|
||||||
|
%p2.i8 = getelementptr i8, i8* %p, i64 2
|
||||||
|
%v1 = load i16, i16* %p1.i16
|
||||||
|
%v2.i8 = load i8, i8* %p2.i8
|
||||||
|
%v2 = zext i8 %v2.i8 to i16
|
||||||
|
%v1.shl = shl i16 %v1, 8
|
||||||
|
%res = or i16 %v1.shl, %v2
|
||||||
|
ret i16 %res
|
||||||
|
}
|
||||||
|
|
|
@ -753,3 +753,32 @@ define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) {
|
||||||
%tmp8 = or i32 %tmp7, %tmp30
|
%tmp8 = or i32 %tmp7, %tmp30
|
||||||
ret i32 %tmp8
|
ret i32 %tmp8
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; i8* p;
|
||||||
|
; i16* p1.i16 = (i16*) p;
|
||||||
|
; (p1.i16[0] << 8) | ((i16) p[2])
|
||||||
|
;
|
||||||
|
; This is essentialy a i16 load from p[1], but we don't fold the pattern now
|
||||||
|
; because in the original DAG we don't have p[1] address available
|
||||||
|
define i16 @load_i16_from_nonzero_offset(i8* %p) {
|
||||||
|
; CHECK-LABEL: load_i16_from_nonzero_offset:
|
||||||
|
; CHECK: ldrh r1, [r0]
|
||||||
|
; CHECK-NEXT: ldrb r0, [r0, #2]
|
||||||
|
; CHECK-NEXT: orr r0, r0, r1, lsl #8
|
||||||
|
; CHECK-NEXT: mov pc, lr
|
||||||
|
;
|
||||||
|
; CHECK-ARMv6-LABEL: load_i16_from_nonzero_offset:
|
||||||
|
; CHECK-ARMv6: ldrh r1, [r0]
|
||||||
|
; CHECK-ARMv6-NEXT: ldrb r0, [r0, #2]
|
||||||
|
; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #8
|
||||||
|
; CHECK-ARMv6-NEXT: bx lr
|
||||||
|
|
||||||
|
%p1.i16 = bitcast i8* %p to i16*
|
||||||
|
%p2.i8 = getelementptr i8, i8* %p, i64 2
|
||||||
|
%v1 = load i16, i16* %p1.i16
|
||||||
|
%v2.i8 = load i8, i8* %p2.i8
|
||||||
|
%v2 = zext i8 %v2.i8 to i16
|
||||||
|
%v1.shl = shl i16 %v1, 8
|
||||||
|
%res = or i16 %v1.shl, %v2
|
||||||
|
ret i16 %res
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue