From f7196c8d9ecf17f0e2834aba94bc9dfc7c46b718 Mon Sep 17 00:00:00 2001 From: Artur Pilipenko Date: Mon, 27 Feb 2017 13:04:23 +0000 Subject: [PATCH] [DAGCombine] Fix for a load combine bug with non-zero offset patterns on BE targets This pattern is essentially a i16 load from p+1 address: %p1.i16 = bitcast i8* %p to i16* %p2.i8 = getelementptr i8, i8* %p, i64 2 %v1 = load i16, i16* %p1.i16 %v2.i8 = load i8, i8* %p2.i8 %v2 = zext i8 %v2.i8 to i16 %v1.shl = shl i16 %v1, 8 %res = or i16 %v1.shl, %v2 Current implementation would identify %v1 load as the first byte load and would mistakenly emit a i16 load from %p1.i16 address. This patch adds a check that the first byte is loaded from a non-zero offset of the first load address. This way this address can be used as the base address for the combined value. Otherwise just give up combining. llvm-svn: 296336 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +++ .../AArch64/load-combine-big-endian.ll | 23 +++++++++++++++ .../CodeGen/ARM/load-combine-big-endian.ll | 29 +++++++++++++++++++ 3 files changed, 56 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 26cb3a69cf6e..af3d17059f0a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4593,6 +4593,10 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { assert((BigEndian != LittleEndian) && "should be either or"); assert(FirstByteProvider && "must be set"); + // Ensure that the first byte is loaded from zero offset of the first load. + // So the combined value can be loaded from the first load address. + if (MemoryByteOffset(*FirstByteProvider) != 0) + return SDValue(); LoadSDNode *FirstLoad = FirstByteProvider->Load; // The node we are looking at matches with the pattern, check if we can diff --git a/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll b/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll index 8e533b1fbea5..e60e86a4052e 100644 --- a/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll +++ b/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll @@ -563,3 +563,26 @@ define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) { %tmp8 = or i32 %tmp7, %tmp30 ret i32 %tmp8 } + +; i8* p; +; i16* p1.i16 = (i16*) p; +; (p1.i16[0] << 8) | ((i16) p[2]) +; +; This is essentialy a i16 load from p[1], but we don't fold the pattern now +; because in the original DAG we don't have p[1] address available +define i16 @load_i16_from_nonzero_offset(i8* %p) { +; CHECK-LABEL: load_i16_from_nonzero_offset: +; CHECK: ldrh w8, [x0] +; CHECK-NEXT: ldrb w0, [x0, #2] +; CHECK-NEXT: bfi w0, w8, #8, #24 +; CHECK-NEXT: ret + + %p1.i16 = bitcast i8* %p to i16* + %p2.i8 = getelementptr i8, i8* %p, i64 2 + %v1 = load i16, i16* %p1.i16 + %v2.i8 = load i8, i8* %p2.i8 + %v2 = zext i8 %v2.i8 to i16 + %v1.shl = shl i16 %v1, 8 + %res = or i16 %v1.shl, %v2 + ret i16 %res +} diff --git a/llvm/test/CodeGen/ARM/load-combine-big-endian.ll b/llvm/test/CodeGen/ARM/load-combine-big-endian.ll index 047c732183e9..4068be9527bd 100644 --- a/llvm/test/CodeGen/ARM/load-combine-big-endian.ll +++ b/llvm/test/CodeGen/ARM/load-combine-big-endian.ll @@ -753,3 +753,32 @@ define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) { %tmp8 = or i32 %tmp7, %tmp30 ret i32 %tmp8 } + +; i8* p; +; i16* p1.i16 = (i16*) p; +; (p1.i16[0] << 8) | ((i16) p[2]) +; +; This is essentialy a i16 load from p[1], but we don't fold the pattern now +; because in the original DAG we don't have p[1] address available +define i16 @load_i16_from_nonzero_offset(i8* %p) { +; CHECK-LABEL: load_i16_from_nonzero_offset: +; CHECK: ldrh r1, [r0] +; CHECK-NEXT: ldrb r0, [r0, #2] +; CHECK-NEXT: orr r0, r0, r1, lsl #8 +; CHECK-NEXT: mov pc, lr +; +; CHECK-ARMv6-LABEL: load_i16_from_nonzero_offset: +; CHECK-ARMv6: ldrh r1, [r0] +; CHECK-ARMv6-NEXT: ldrb r0, [r0, #2] +; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #8 +; CHECK-ARMv6-NEXT: bx lr + + %p1.i16 = bitcast i8* %p to i16* + %p2.i8 = getelementptr i8, i8* %p, i64 2 + %v1 = load i16, i16* %p1.i16 + %v2.i8 = load i8, i8* %p2.i8 + %v2 = zext i8 %v2.i8 to i16 + %v1.shl = shl i16 %v1, 8 + %res = or i16 %v1.shl, %v2 + ret i16 %res +}