Allow code motion (and thus folding) for atomic (but unordered) memory operands

Building on the work done in D57601, now that we can distinguish between atomic and volatile memory accesses, go ahead and allow code motion of unordered atomics. As seen in the diffs, this allows much better folding of memory operations into using instructions. (Mostly done by the PeepholeOpt pass.)

Note: I have not reviewed all callers of hasOrderedMemoryRef since one of them - isSafeToMove - is very widely used. I'm relying on the documented semantics of each method to judge correctness.

Differential Revision: https://reviews.llvm.org/D59345

llvm-svn: 356170
This commit is contained in:
Philip Reames 2019-03-14 17:20:59 +00:00
parent 6f8dddf169
commit 70d156991c
3 changed files with 31 additions and 71 deletions

View File

@ -1291,10 +1291,8 @@ bool MachineInstr::hasOrderedMemoryRef() const {
return true;
// Check if any of our memory operands are ordered.
// TODO: This should probably be be isUnordered (see D57601), but the callers
// need audited and test cases written to be sure.
return llvm::any_of(memoperands(), [](const MachineMemOperand *MMO) {
return MMO->isVolatile() || MMO->isAtomic();
return !MMO->isUnordered();
});
}

View File

@ -62,8 +62,7 @@ define half @load_half(half* %fptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: movzwl (%rdi), %eax
; CHECK-NEXT: movzwl %ax, %edi
; CHECK-NEXT: movzwl (%rdi), %edi
; CHECK-NEXT: callq __gnu_h2f_ieee
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 8
@ -75,8 +74,7 @@ define half @load_half(half* %fptr) {
define float @load_float(float* %fptr) {
; CHECK-LABEL: load_float:
; CHECK: # %bb.0:
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: movd (%rdi), %xmm0
; CHECK-NEXT: retq
%v = load atomic float, float* %fptr unordered, align 4
ret float %v
@ -85,8 +83,7 @@ define float @load_float(float* %fptr) {
define double @load_double(double* %fptr) {
; CHECK-LABEL: load_double:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: movq %rax, %xmm0
; CHECK-NEXT: movq (%rdi), %xmm0
; CHECK-NEXT: retq
%v = load atomic double, double* %fptr unordered, align 8
ret double %v

View File

@ -437,7 +437,6 @@ define i64 @load_fold_add1(i64* %p) {
ret i64 %ret
}
; Legal, as expected
define i64 @load_fold_add2(i64* %p, i64 %v2) {
; CHECK-O0-LABEL: load_fold_add2:
; CHECK-O0: # %bb.0:
@ -447,15 +446,14 @@ define i64 @load_fold_add2(i64* %p, i64 %v2) {
;
; CHECK-O3-LABEL: load_fold_add2:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: addq %rsi, %rax
; CHECK-O3-NEXT: movq %rsi, %rax
; CHECK-O3-NEXT: addq (%rdi), %rax
; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p unordered, align 8
%ret = add i64 %v, %v2
ret i64 %ret
}
; Legal to fold (TODO)
define i64 @load_fold_add3(i64* %p1, i64* %p2) {
; CHECK-O0-LABEL: load_fold_add3:
; CHECK-O0: # %bb.0:
@ -466,9 +464,8 @@ define i64 @load_fold_add3(i64* %p1, i64* %p2) {
;
; CHECK-O3-LABEL: load_fold_add3:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rcx
; CHECK-O3-NEXT: movq (%rsi), %rax
; CHECK-O3-NEXT: addq %rcx, %rax
; CHECK-O3-NEXT: addq (%rdi), %rax
; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p1 unordered, align 8
%v2 = load atomic i64, i64* %p2 unordered, align 8
@ -495,7 +492,6 @@ define i64 @load_fold_sub1(i64* %p) {
ret i64 %ret
}
; Legal, as expected
define i64 @load_fold_sub2(i64* %p, i64 %v2) {
; CHECK-O0-LABEL: load_fold_sub2:
; CHECK-O0: # %bb.0:
@ -514,7 +510,6 @@ define i64 @load_fold_sub2(i64* %p, i64 %v2) {
ret i64 %ret
}
; Legal to fold (TODO)
define i64 @load_fold_sub3(i64* %p1, i64* %p2) {
; CHECK-O0-LABEL: load_fold_sub3:
; CHECK-O0: # %bb.0:
@ -526,8 +521,7 @@ define i64 @load_fold_sub3(i64* %p1, i64* %p2) {
; CHECK-O3-LABEL: load_fold_sub3:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: movq (%rsi), %rcx
; CHECK-O3-NEXT: subq %rcx, %rax
; CHECK-O3-NEXT: subq (%rsi), %rax
; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p1 unordered, align 8
%v2 = load atomic i64, i64* %p2 unordered, align 8
@ -553,7 +547,6 @@ define i64 @load_fold_mul1(i64* %p) {
ret i64 %ret
}
; Legal, O0 is better than O3 codegen (TODO)
define i64 @load_fold_mul2(i64* %p, i64 %v2) {
; CHECK-O0-LABEL: load_fold_mul2:
; CHECK-O0: # %bb.0:
@ -563,15 +556,14 @@ define i64 @load_fold_mul2(i64* %p, i64 %v2) {
;
; CHECK-O3-LABEL: load_fold_mul2:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: imulq %rsi, %rax
; CHECK-O3-NEXT: movq %rsi, %rax
; CHECK-O3-NEXT: imulq (%rdi), %rax
; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p unordered, align 8
%ret = mul i64 %v, %v2
ret i64 %ret
}
; Legal to fold (TODO)
define i64 @load_fold_mul3(i64* %p1, i64* %p2) {
; CHECK-O0-LABEL: load_fold_mul3:
; CHECK-O0: # %bb.0:
@ -582,9 +574,8 @@ define i64 @load_fold_mul3(i64* %p1, i64* %p2) {
;
; CHECK-O3-LABEL: load_fold_mul3:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rcx
; CHECK-O3-NEXT: movq (%rsi), %rax
; CHECK-O3-NEXT: imulq %rcx, %rax
; CHECK-O3-NEXT: imulq (%rdi), %rax
; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p1 unordered, align 8
%v2 = load atomic i64, i64* %p2 unordered, align 8
@ -639,7 +630,6 @@ define i64 @load_fold_sdiv2(i64* %p, i64 %v2) {
ret i64 %ret
}
; Legal to fold (TODO)
define i64 @load_fold_sdiv3(i64* %p1, i64* %p2) {
; CHECK-O0-LABEL: load_fold_sdiv3:
; CHECK-O0: # %bb.0:
@ -651,9 +641,8 @@ define i64 @load_fold_sdiv3(i64* %p1, i64* %p2) {
; CHECK-O3-LABEL: load_fold_sdiv3:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: movq (%rsi), %rcx
; CHECK-O3-NEXT: cqto
; CHECK-O3-NEXT: idivq %rcx
; CHECK-O3-NEXT: idivq (%rsi)
; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p1 unordered, align 8
%v2 = load atomic i64, i64* %p2 unordered, align 8
@ -685,7 +674,6 @@ define i64 @load_fold_udiv1(i64* %p) {
ret i64 %ret
}
; Legal, as expected
define i64 @load_fold_udiv2(i64* %p, i64 %v2) {
; CHECK-O0-LABEL: load_fold_udiv2:
; CHECK-O0: # %bb.0:
@ -706,7 +694,6 @@ define i64 @load_fold_udiv2(i64* %p, i64 %v2) {
ret i64 %ret
}
; Legal to fold (TODO)
define i64 @load_fold_udiv3(i64* %p1, i64* %p2) {
; CHECK-O0-LABEL: load_fold_udiv3:
; CHECK-O0: # %bb.0:
@ -719,9 +706,8 @@ define i64 @load_fold_udiv3(i64* %p1, i64* %p2) {
; CHECK-O3-LABEL: load_fold_udiv3:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: movq (%rsi), %rcx
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divq %rcx
; CHECK-O3-NEXT: divq (%rsi)
; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p1 unordered, align 8
%v2 = load atomic i64, i64* %p2 unordered, align 8
@ -783,7 +769,6 @@ define i64 @load_fold_srem2(i64* %p, i64 %v2) {
ret i64 %ret
}
; Legal to fold (TODO)
define i64 @load_fold_srem3(i64* %p1, i64* %p2) {
; CHECK-O0-LABEL: load_fold_srem3:
; CHECK-O0: # %bb.0:
@ -796,9 +781,8 @@ define i64 @load_fold_srem3(i64* %p1, i64* %p2) {
; CHECK-O3-LABEL: load_fold_srem3:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: movq (%rsi), %rcx
; CHECK-O3-NEXT: cqto
; CHECK-O3-NEXT: idivq %rcx
; CHECK-O3-NEXT: idivq (%rsi)
; CHECK-O3-NEXT: movq %rdx, %rax
; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p1 unordered, align 8
@ -859,7 +843,6 @@ define i64 @load_fold_urem2(i64* %p, i64 %v2) {
ret i64 %ret
}
; Legal to fold (TODO)
define i64 @load_fold_urem3(i64* %p1, i64* %p2) {
; CHECK-O0-LABEL: load_fold_urem3:
; CHECK-O0: # %bb.0:
@ -873,9 +856,8 @@ define i64 @load_fold_urem3(i64* %p1, i64* %p2) {
; CHECK-O3-LABEL: load_fold_urem3:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: movq (%rsi), %rcx
; CHECK-O3-NEXT: xorl %edx, %edx
; CHECK-O3-NEXT: divq %rcx
; CHECK-O3-NEXT: divq (%rsi)
; CHECK-O3-NEXT: movq %rdx, %rax
; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p1 unordered, align 8
@ -1101,7 +1083,6 @@ define i64 @load_fold_and1(i64* %p) {
ret i64 %ret
}
; Legal, as expected
define i64 @load_fold_and2(i64* %p, i64 %v2) {
; CHECK-O0-LABEL: load_fold_and2:
; CHECK-O0: # %bb.0:
@ -1111,15 +1092,14 @@ define i64 @load_fold_and2(i64* %p, i64 %v2) {
;
; CHECK-O3-LABEL: load_fold_and2:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: andq %rsi, %rax
; CHECK-O3-NEXT: movq %rsi, %rax
; CHECK-O3-NEXT: andq (%rdi), %rax
; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p unordered, align 8
%ret = and i64 %v, %v2
ret i64 %ret
}
; Legal to fold (TODO)
define i64 @load_fold_and3(i64* %p1, i64* %p2) {
; CHECK-O0-LABEL: load_fold_and3:
; CHECK-O0: # %bb.0:
@ -1130,9 +1110,8 @@ define i64 @load_fold_and3(i64* %p1, i64* %p2) {
;
; CHECK-O3-LABEL: load_fold_and3:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rcx
; CHECK-O3-NEXT: movq (%rsi), %rax
; CHECK-O3-NEXT: andq %rcx, %rax
; CHECK-O3-NEXT: andq (%rdi), %rax
; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p1 unordered, align 8
%v2 = load atomic i64, i64* %p2 unordered, align 8
@ -1159,7 +1138,6 @@ define i64 @load_fold_or1(i64* %p) {
ret i64 %ret
}
; Legal, as expected
define i64 @load_fold_or2(i64* %p, i64 %v2) {
; CHECK-O0-LABEL: load_fold_or2:
; CHECK-O0: # %bb.0:
@ -1169,15 +1147,14 @@ define i64 @load_fold_or2(i64* %p, i64 %v2) {
;
; CHECK-O3-LABEL: load_fold_or2:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: orq %rsi, %rax
; CHECK-O3-NEXT: movq %rsi, %rax
; CHECK-O3-NEXT: orq (%rdi), %rax
; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p unordered, align 8
%ret = or i64 %v, %v2
ret i64 %ret
}
; Legal to fold (TODO)
define i64 @load_fold_or3(i64* %p1, i64* %p2) {
; CHECK-O0-LABEL: load_fold_or3:
; CHECK-O0: # %bb.0:
@ -1188,9 +1165,8 @@ define i64 @load_fold_or3(i64* %p1, i64* %p2) {
;
; CHECK-O3-LABEL: load_fold_or3:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rcx
; CHECK-O3-NEXT: movq (%rsi), %rax
; CHECK-O3-NEXT: orq %rcx, %rax
; CHECK-O3-NEXT: orq (%rdi), %rax
; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p1 unordered, align 8
%v2 = load atomic i64, i64* %p2 unordered, align 8
@ -1217,7 +1193,6 @@ define i64 @load_fold_xor1(i64* %p) {
ret i64 %ret
}
; Legal, as expected
define i64 @load_fold_xor2(i64* %p, i64 %v2) {
; CHECK-O0-LABEL: load_fold_xor2:
; CHECK-O0: # %bb.0:
@ -1227,15 +1202,14 @@ define i64 @load_fold_xor2(i64* %p, i64 %v2) {
;
; CHECK-O3-LABEL: load_fold_xor2:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: xorq %rsi, %rax
; CHECK-O3-NEXT: movq %rsi, %rax
; CHECK-O3-NEXT: xorq (%rdi), %rax
; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p unordered, align 8
%ret = xor i64 %v, %v2
ret i64 %ret
}
; Legal to fold (TODO)
define i64 @load_fold_xor3(i64* %p1, i64* %p2) {
; CHECK-O0-LABEL: load_fold_xor3:
; CHECK-O0: # %bb.0:
@ -1246,9 +1220,8 @@ define i64 @load_fold_xor3(i64* %p1, i64* %p2) {
;
; CHECK-O3-LABEL: load_fold_xor3:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rcx
; CHECK-O3-NEXT: movq (%rsi), %rax
; CHECK-O3-NEXT: xorq %rcx, %rax
; CHECK-O3-NEXT: xorq (%rdi), %rax
; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p1 unordered, align 8
%v2 = load atomic i64, i64* %p2 unordered, align 8
@ -1256,7 +1229,6 @@ define i64 @load_fold_xor3(i64* %p1, i64* %p2) {
ret i64 %ret
}
; Legal to fold (TODO)
define i1 @load_fold_icmp1(i64* %p) {
; CHECK-O0-LABEL: load_fold_icmp1:
; CHECK-O0: # %bb.0:
@ -1268,8 +1240,7 @@ define i1 @load_fold_icmp1(i64* %p) {
;
; CHECK-O3-LABEL: load_fold_icmp1:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: cmpq $15, %rax
; CHECK-O3-NEXT: cmpq $15, (%rdi)
; CHECK-O3-NEXT: sete %al
; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p unordered, align 8
@ -1277,7 +1248,6 @@ define i1 @load_fold_icmp1(i64* %p) {
ret i1 %ret
}
; Legal to fold (TODO)
define i1 @load_fold_icmp2(i64* %p, i64 %v2) {
; CHECK-O0-LABEL: load_fold_icmp2:
; CHECK-O0: # %bb.0:
@ -1289,8 +1259,7 @@ define i1 @load_fold_icmp2(i64* %p, i64 %v2) {
;
; CHECK-O3-LABEL: load_fold_icmp2:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: cmpq %rsi, %rax
; CHECK-O3-NEXT: cmpq %rsi, (%rdi)
; CHECK-O3-NEXT: sete %al
; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p unordered, align 8
@ -1298,7 +1267,6 @@ define i1 @load_fold_icmp2(i64* %p, i64 %v2) {
ret i1 %ret
}
; Legal to fold (TODO)
define i1 @load_fold_icmp3(i64* %p1, i64* %p2) {
; CHECK-O0-LABEL: load_fold_icmp3:
; CHECK-O0: # %bb.0:
@ -1311,9 +1279,8 @@ define i1 @load_fold_icmp3(i64* %p1, i64* %p2) {
;
; CHECK-O3-LABEL: load_fold_icmp3:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: movq (%rsi), %rcx
; CHECK-O3-NEXT: cmpq %rcx, %rax
; CHECK-O3-NEXT: movq (%rsi), %rax
; CHECK-O3-NEXT: cmpq %rax, (%rdi)
; CHECK-O3-NEXT: sete %al
; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p1 unordered, align 8
@ -1441,9 +1408,8 @@ define void @rmw_fold_mul2(i64* %p, i64 %v) {
;
; CHECK-O3-LABEL: rmw_fold_mul2:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: imulq %rsi, %rax
; CHECK-O3-NEXT: movq %rax, (%rdi)
; CHECK-O3-NEXT: imulq (%rdi), %rsi
; CHECK-O3-NEXT: movq %rsi, (%rdi)
; CHECK-O3-NEXT: retq
%prev = load atomic i64, i64* %p unordered, align 8
%val = mul i64 %prev, %v
@ -2070,9 +2036,8 @@ define i64 @load_forwarding(i64* %p) {
;
; CHECK-O3-LABEL: load_forwarding:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movq (%rdi), %rcx
; CHECK-O3-NEXT: movq (%rdi), %rax
; CHECK-O3-NEXT: orq %rcx, %rax
; CHECK-O3-NEXT: orq (%rdi), %rax
; CHECK-O3-NEXT: retq
%v = load atomic i64, i64* %p unordered, align 8
%v2 = load atomic i64, i64* %p unordered, align 8