2015-09-24 15:22:38 +08:00
|
|
|
; REQUIRES: asserts
|
|
|
|
; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck -check-prefix=X86 %s
|
|
|
|
; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -debug-only=isel < %s 2>&1 | FileCheck -check-prefix=DBGDAG %s
|
|
|
|
|
|
|
|
; It's OK to merge the load / store of the first 2 components, but
|
|
|
|
; they must not be placed on the same chain after merging.
|
|
|
|
|
|
|
|
; X86-LABEL: {{^}}merge_store_partial_overlap_load:
|
2016-04-09 02:58:29 +08:00
|
|
|
; X86-DAG: movzwl ([[BASEREG:%[a-z]+]]), %e[[LO2:[a-z]+]]
|
2015-09-24 15:22:38 +08:00
|
|
|
; X86-DAG: movb 2([[BASEREG]]), [[HI1:%[a-z]+]]
|
|
|
|
|
2016-04-09 02:58:29 +08:00
|
|
|
; X86-NEXT: movw %[[LO2]], 1([[BASEREG]])
|
2015-09-24 15:22:38 +08:00
|
|
|
; X86-NEXT: movb [[HI1]], 3([[BASEREG]])
|
|
|
|
; X86-NEXT: retq
|
|
|
|
|
|
|
|
; DBGDAG-LABEL: Optimized lowered selection DAG: BB#0 'merge_store_partial_overlap_load:'
|
|
|
|
; DBGDAG: [[ENTRYTOKEN:t[0-9]+]]: ch = EntryToken
|
|
|
|
; DBGDAG-DAG: [[BASEPTR:t[0-9]+]]: i64,ch = CopyFromReg [[ENTRYTOKEN]],
|
SelectionDAGDumper: Print simple operands inline.
Print simple operands inline instead of their pointer/value number.
Simple operands are SDNodes without predecessors like Constant(FP), Register,
UNDEF. This unifies the behaviour with dumpr() which was already doing this.
Previously:
t0: ch = EntryToken
t1: i64 = Register %vreg0
t2: i64,ch = CopyFromReg t0, t1
t3: i64 = Constant<1>
t4: i64 = add t2, t3
t5: i64 = Constant<2>
t6: i64 = add t2, t5
t10: i64 = undef
t11: i8,ch = load t0, t2, t10<LD1[%tmp81]>
t12: i8,ch = load t0, t4, t10<LD1[%tmp10]>
t13: i8,ch = load t0, t6, t10<LD1[%tmp12]>
Now:
t0: ch = EntryToken
t2: i64,ch = CopyFromReg t0, Register:i64 %vreg0
t4: i64 = add t2, Constant:i64<1>
t6: i64 = add t2, Constant:i64<2>
t11: i8,ch = load<LD1[%tmp81]> t0, t2, undef:i64
t12: i8,ch = load<LD1[%tmp10]> t0, t4, undef:i64
t13: i8,ch = load<LD1[%tmp12]> t0, t6, undef:i64
Differential Revision: http://reviews.llvm.org/D12567
llvm-svn: 248628
2015-09-26 06:27:02 +08:00
|
|
|
; DBGDAG-DAG: [[ADDPTR:t[0-9]+]]: i64 = add [[BASEPTR]], Constant:i64<2>
|
2015-09-24 15:22:38 +08:00
|
|
|
|
SelectionDAGDumper: Print simple operands inline.
Print simple operands inline instead of their pointer/value number.
Simple operands are SDNodes without predecessors like Constant(FP), Register,
UNDEF. This unifies the behaviour with dumpr() which was already doing this.
Previously:
t0: ch = EntryToken
t1: i64 = Register %vreg0
t2: i64,ch = CopyFromReg t0, t1
t3: i64 = Constant<1>
t4: i64 = add t2, t3
t5: i64 = Constant<2>
t6: i64 = add t2, t5
t10: i64 = undef
t11: i8,ch = load t0, t2, t10<LD1[%tmp81]>
t12: i8,ch = load t0, t4, t10<LD1[%tmp10]>
t13: i8,ch = load t0, t6, t10<LD1[%tmp12]>
Now:
t0: ch = EntryToken
t2: i64,ch = CopyFromReg t0, Register:i64 %vreg0
t4: i64 = add t2, Constant:i64<1>
t6: i64 = add t2, Constant:i64<2>
t11: i8,ch = load<LD1[%tmp81]> t0, t2, undef:i64
t12: i8,ch = load<LD1[%tmp10]> t0, t4, undef:i64
t13: i8,ch = load<LD1[%tmp12]> t0, t6, undef:i64
Differential Revision: http://reviews.llvm.org/D12567
llvm-svn: 248628
2015-09-26 06:27:02 +08:00
|
|
|
; DBGDAG-DAG: [[LD2:t[0-9]+]]: i16,ch = load<LD2[%tmp81](align=1)> [[ENTRYTOKEN]], [[BASEPTR]], undef:i64
|
|
|
|
; DBGDAG-DAG: [[LD1:t[0-9]+]]: i8,ch = load<LD1[%tmp12]> [[ENTRYTOKEN]], [[ADDPTR]], undef:i64
|
2015-09-24 15:22:38 +08:00
|
|
|
|
2016-10-14 04:23:25 +08:00
|
|
|
; DBGDAG: [[LOADTOKEN:t[0-9]+]]: ch = TokenFactor [[LD2]]:1, [[LD1]]:1
|
2015-09-24 15:22:38 +08:00
|
|
|
|
2016-10-14 04:23:25 +08:00
|
|
|
; DBGDAG-DAG: [[ST2:t[0-9]+]]: ch = store<ST2[%tmp10](align=1)> [[LOADTOKEN]], [[LD2]], t{{[0-9]+}}, undef:i64
|
|
|
|
; DBGDAG-DAG: [[ST1:t[0-9]+]]: ch = store<ST1[%tmp14]> [[ST2]], [[LD1]], t{{[0-9]+}}, undef:i64
|
|
|
|
; DBGDAG: X86ISD::RET_FLAG [[ST1]],
|
2015-09-24 15:22:38 +08:00
|
|
|
|
|
|
|
; DBGDAG: Type-legalized selection DAG: BB#0 'merge_store_partial_overlap_load:'
|
|
|
|
define void @merge_store_partial_overlap_load([4 x i8]* %tmp) {
|
|
|
|
%tmp8 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i8 0
|
|
|
|
%tmp10 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i8 1
|
|
|
|
%tmp12 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i8 2
|
|
|
|
%tmp14 = getelementptr [4 x i8], [4 x i8]* %tmp, i32 0, i8 3
|
|
|
|
|
|
|
|
%tmp9 = load i8, i8* %tmp8, align 1 ; base + 0
|
|
|
|
%tmp11 = load i8, i8* %tmp10, align 1 ; base + 1
|
|
|
|
%tmp13 = load i8, i8* %tmp12, align 1 ; base + 2
|
|
|
|
|
|
|
|
store i8 %tmp9, i8* %tmp10, align 1 ; base + 1
|
|
|
|
store i8 %tmp11, i8* %tmp12, align 1 ; base + 2
|
|
|
|
store i8 %tmp13, i8* %tmp14, align 1 ; base + 3
|
|
|
|
|
|
|
|
; Should emit
|
|
|
|
; load base + 0, base + 1
|
|
|
|
; store base + 1, base + 2
|
|
|
|
; load base + 2
|
|
|
|
; store base + 3
|
|
|
|
|
|
|
|
ret void
|
|
|
|
}
|