llvm-project/llvm/test/CodeGen/X86/flags-copy-lowering.mir

485 lines
17 KiB
Plaintext
Raw Normal View History

[x86] Introduce a pass to begin more systematically fixing PR36028 and similar issues. The key idea is to lower COPY nodes populating EFLAGS by scanning the uses of EFLAGS and introducing dedicated code to preserve the necessary state in a GPR. In the vast majority of cases, these uses are cmovCC and jCC instructions. For such cases, we can very easily save and restore the necessary information by simply inserting a setCC into a GPR where the original flags are live, and then testing that GPR directly to feed the cmov or conditional branch. However, things are a bit more tricky if arithmetic is using the flags. This patch handles the vast majority of cases that seem to come up in practice: adc, adcx, adox, rcl, and rcr; all without taking advantage of partially preserved EFLAGS as LLVM doesn't currently model that at all. There are a large number of operations that techinaclly observe EFLAGS currently but shouldn't in this case -- they typically are using DF. Currently, they will not be handled by this approach. However, I have never seen this issue come up in practice. It is already pretty rare to have these patterns come up in practical code with LLVM. I had to resort to writing MIR tests to cover most of the logic in this pass already. I suspect even with its current amount of coverage of arithmetic users of EFLAGS it will be a significant improvement over the current use of pushf/popf. It will also produce substantially faster code in most of the common patterns. This patch also removes all of the old lowering for EFLAGS copies, and the hack that forced us to use a frame pointer when EFLAGS copies were found anywhere in a function so that the dynamic stack adjustment wasn't a problem. None of this is needed as we now lower all of these copies directly in MI and without require stack adjustments. Lots of thanks to Reid who came up with several aspects of this approach, and Craig who helped me work out a couple of things tripping me up while working on this. Differential Revision: https://reviews.llvm.org/D45146 llvm-svn: 329657
2018-04-10 09:41:17 +08:00
# RUN: llc -run-pass x86-flags-copy-lowering -verify-machineinstrs -o - %s | FileCheck %s
#
# Lower various interesting copy patterns of EFLAGS without using LAHF/SAHF.
--- |
target triple = "x86_64-unknown-unknown"
declare void @foo()
define i32 @test_branch(i64 %a, i64 %b) {
entry:
call void @foo()
ret i32 0
}
define i32 @test_branch_fallthrough(i64 %a, i64 %b) {
entry:
call void @foo()
ret i32 0
}
define void @test_setcc(i64 %a, i64 %b) {
entry:
call void @foo()
ret void
}
define void @test_cmov(i64 %a, i64 %b) {
entry:
call void @foo()
ret void
}
define void @test_adc(i64 %a, i64 %b) {
entry:
call void @foo()
ret void
}
define void @test_sbb(i64 %a, i64 %b) {
entry:
call void @foo()
ret void
}
define void @test_adcx(i64 %a, i64 %b) {
entry:
call void @foo()
ret void
}
define void @test_adox(i64 %a, i64 %b) {
entry:
call void @foo()
ret void
}
define void @test_rcl(i64 %a, i64 %b) {
entry:
call void @foo()
ret void
}
define void @test_rcr(i64 %a, i64 %b) {
entry:
call void @foo()
ret void
}
...
---
name: test_branch
# CHECK-LABEL: name: test_branch
liveins:
- { reg: '$rdi', virtual-reg: '%0' }
- { reg: '$rsi', virtual-reg: '%1' }
body: |
bb.0:
successors: %bb.1, %bb.2, %bb.3
liveins: $rdi, $rsi
%0:gr64 = COPY $rdi
%1:gr64 = COPY $rsi
CMP64rr %0, %1, implicit-def $eflags
%2:gr64 = COPY $eflags
; CHECK-NOT: COPY{{( killed)?}} $eflags
; CHECK: %[[A_REG:[^:]*]]:gr8 = SETAr implicit $eflags
; CHECK-NEXT: %[[B_REG:[^:]*]]:gr8 = SETBr implicit $eflags
; CHECK-NOT: COPY{{( killed)?}} $eflags
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
$eflags = COPY %2
JA_1 %bb.1, implicit $eflags
JB_1 %bb.2, implicit $eflags
JMP_1 %bb.3
; CHECK-NOT: $eflags =
;
; CHECK: TEST8rr %[[A_REG]], %[[A_REG]], implicit-def $eflags
[x86] Introduce a pass to begin more systematically fixing PR36028 and similar issues. The key idea is to lower COPY nodes populating EFLAGS by scanning the uses of EFLAGS and introducing dedicated code to preserve the necessary state in a GPR. In the vast majority of cases, these uses are cmovCC and jCC instructions. For such cases, we can very easily save and restore the necessary information by simply inserting a setCC into a GPR where the original flags are live, and then testing that GPR directly to feed the cmov or conditional branch. However, things are a bit more tricky if arithmetic is using the flags. This patch handles the vast majority of cases that seem to come up in practice: adc, adcx, adox, rcl, and rcr; all without taking advantage of partially preserved EFLAGS as LLVM doesn't currently model that at all. There are a large number of operations that techinaclly observe EFLAGS currently but shouldn't in this case -- they typically are using DF. Currently, they will not be handled by this approach. However, I have never seen this issue come up in practice. It is already pretty rare to have these patterns come up in practical code with LLVM. I had to resort to writing MIR tests to cover most of the logic in this pass already. I suspect even with its current amount of coverage of arithmetic users of EFLAGS it will be a significant improvement over the current use of pushf/popf. It will also produce substantially faster code in most of the common patterns. This patch also removes all of the old lowering for EFLAGS copies, and the hack that forced us to use a frame pointer when EFLAGS copies were found anywhere in a function so that the dynamic stack adjustment wasn't a problem. None of this is needed as we now lower all of these copies directly in MI and without require stack adjustments. Lots of thanks to Reid who came up with several aspects of this approach, and Craig who helped me work out a couple of things tripping me up while working on this. Differential Revision: https://reviews.llvm.org/D45146 llvm-svn: 329657
2018-04-10 09:41:17 +08:00
; CHECK-NEXT: JNE_1 %bb.1, implicit killed $eflags
; CHECK-SAME: {{$[[:space:]]}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: {{.*$}}
; CHECK-SAME: {{$[[:space:]]}}
; CHECK-NEXT: TEST8rr %[[B_REG]], %[[B_REG]], implicit-def $eflags
[x86] Introduce a pass to begin more systematically fixing PR36028 and similar issues. The key idea is to lower COPY nodes populating EFLAGS by scanning the uses of EFLAGS and introducing dedicated code to preserve the necessary state in a GPR. In the vast majority of cases, these uses are cmovCC and jCC instructions. For such cases, we can very easily save and restore the necessary information by simply inserting a setCC into a GPR where the original flags are live, and then testing that GPR directly to feed the cmov or conditional branch. However, things are a bit more tricky if arithmetic is using the flags. This patch handles the vast majority of cases that seem to come up in practice: adc, adcx, adox, rcl, and rcr; all without taking advantage of partially preserved EFLAGS as LLVM doesn't currently model that at all. There are a large number of operations that techinaclly observe EFLAGS currently but shouldn't in this case -- they typically are using DF. Currently, they will not be handled by this approach. However, I have never seen this issue come up in practice. It is already pretty rare to have these patterns come up in practical code with LLVM. I had to resort to writing MIR tests to cover most of the logic in this pass already. I suspect even with its current amount of coverage of arithmetic users of EFLAGS it will be a significant improvement over the current use of pushf/popf. It will also produce substantially faster code in most of the common patterns. This patch also removes all of the old lowering for EFLAGS copies, and the hack that forced us to use a frame pointer when EFLAGS copies were found anywhere in a function so that the dynamic stack adjustment wasn't a problem. None of this is needed as we now lower all of these copies directly in MI and without require stack adjustments. Lots of thanks to Reid who came up with several aspects of this approach, and Craig who helped me work out a couple of things tripping me up while working on this. Differential Revision: https://reviews.llvm.org/D45146 llvm-svn: 329657
2018-04-10 09:41:17 +08:00
; CHECK-NEXT: JNE_1 %bb.2, implicit killed $eflags
; CHECK-NEXT: JMP_1 %bb.3
bb.1:
%3:gr32 = MOV32ri64 42
$eax = COPY %3
RET 0, $eax
bb.2:
%4:gr32 = MOV32ri64 43
$eax = COPY %4
RET 0, $eax
bb.3:
%5:gr32 = MOV32r0 implicit-def dead $eflags
$eax = COPY %5
RET 0, $eax
...
---
name: test_branch_fallthrough
# CHECK-LABEL: name: test_branch_fallthrough
liveins:
- { reg: '$rdi', virtual-reg: '%0' }
- { reg: '$rsi', virtual-reg: '%1' }
body: |
bb.0:
successors: %bb.1, %bb.2, %bb.3
liveins: $rdi, $rsi
%0:gr64 = COPY $rdi
%1:gr64 = COPY $rsi
CMP64rr %0, %1, implicit-def $eflags
%2:gr64 = COPY $eflags
; CHECK-NOT: COPY{{( killed)?}} $eflags
; CHECK: %[[A_REG:[^:]*]]:gr8 = SETAr implicit $eflags
; CHECK-NEXT: %[[B_REG:[^:]*]]:gr8 = SETBr implicit $eflags
; CHECK-NOT: COPY{{( killed)?}} $eflags
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
$eflags = COPY %2
JA_1 %bb.2, implicit $eflags
JB_1 %bb.3, implicit $eflags
; CHECK-NOT: $eflags =
;
; CHECK: TEST8rr %[[A_REG]], %[[A_REG]], implicit-def $eflags
[x86] Introduce a pass to begin more systematically fixing PR36028 and similar issues. The key idea is to lower COPY nodes populating EFLAGS by scanning the uses of EFLAGS and introducing dedicated code to preserve the necessary state in a GPR. In the vast majority of cases, these uses are cmovCC and jCC instructions. For such cases, we can very easily save and restore the necessary information by simply inserting a setCC into a GPR where the original flags are live, and then testing that GPR directly to feed the cmov or conditional branch. However, things are a bit more tricky if arithmetic is using the flags. This patch handles the vast majority of cases that seem to come up in practice: adc, adcx, adox, rcl, and rcr; all without taking advantage of partially preserved EFLAGS as LLVM doesn't currently model that at all. There are a large number of operations that techinaclly observe EFLAGS currently but shouldn't in this case -- they typically are using DF. Currently, they will not be handled by this approach. However, I have never seen this issue come up in practice. It is already pretty rare to have these patterns come up in practical code with LLVM. I had to resort to writing MIR tests to cover most of the logic in this pass already. I suspect even with its current amount of coverage of arithmetic users of EFLAGS it will be a significant improvement over the current use of pushf/popf. It will also produce substantially faster code in most of the common patterns. This patch also removes all of the old lowering for EFLAGS copies, and the hack that forced us to use a frame pointer when EFLAGS copies were found anywhere in a function so that the dynamic stack adjustment wasn't a problem. None of this is needed as we now lower all of these copies directly in MI and without require stack adjustments. Lots of thanks to Reid who came up with several aspects of this approach, and Craig who helped me work out a couple of things tripping me up while working on this. Differential Revision: https://reviews.llvm.org/D45146 llvm-svn: 329657
2018-04-10 09:41:17 +08:00
; CHECK-NEXT: JNE_1 %bb.2, implicit killed $eflags
; CHECK-SAME: {{$[[:space:]]}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: {{.*$}}
; CHECK-SAME: {{$[[:space:]]}}
; CHECK-NEXT: TEST8rr %[[B_REG]], %[[B_REG]], implicit-def $eflags
[x86] Introduce a pass to begin more systematically fixing PR36028 and similar issues. The key idea is to lower COPY nodes populating EFLAGS by scanning the uses of EFLAGS and introducing dedicated code to preserve the necessary state in a GPR. In the vast majority of cases, these uses are cmovCC and jCC instructions. For such cases, we can very easily save and restore the necessary information by simply inserting a setCC into a GPR where the original flags are live, and then testing that GPR directly to feed the cmov or conditional branch. However, things are a bit more tricky if arithmetic is using the flags. This patch handles the vast majority of cases that seem to come up in practice: adc, adcx, adox, rcl, and rcr; all without taking advantage of partially preserved EFLAGS as LLVM doesn't currently model that at all. There are a large number of operations that techinaclly observe EFLAGS currently but shouldn't in this case -- they typically are using DF. Currently, they will not be handled by this approach. However, I have never seen this issue come up in practice. It is already pretty rare to have these patterns come up in practical code with LLVM. I had to resort to writing MIR tests to cover most of the logic in this pass already. I suspect even with its current amount of coverage of arithmetic users of EFLAGS it will be a significant improvement over the current use of pushf/popf. It will also produce substantially faster code in most of the common patterns. This patch also removes all of the old lowering for EFLAGS copies, and the hack that forced us to use a frame pointer when EFLAGS copies were found anywhere in a function so that the dynamic stack adjustment wasn't a problem. None of this is needed as we now lower all of these copies directly in MI and without require stack adjustments. Lots of thanks to Reid who came up with several aspects of this approach, and Craig who helped me work out a couple of things tripping me up while working on this. Differential Revision: https://reviews.llvm.org/D45146 llvm-svn: 329657
2018-04-10 09:41:17 +08:00
; CHECK-NEXT: JNE_1 %bb.3, implicit killed $eflags
; CHECK-SAME: {{$[[:space:]]}}
; CHECK-NEXT: bb.1:
bb.1:
%5:gr32 = MOV32r0 implicit-def dead $eflags
$eax = COPY %5
RET 0, $eax
bb.2:
%3:gr32 = MOV32ri64 42
$eax = COPY %3
RET 0, $eax
bb.3:
%4:gr32 = MOV32ri64 43
$eax = COPY %4
RET 0, $eax
...
---
name: test_setcc
# CHECK-LABEL: name: test_setcc
liveins:
- { reg: '$rdi', virtual-reg: '%0' }
- { reg: '$rsi', virtual-reg: '%1' }
body: |
bb.0:
liveins: $rdi, $rsi
%0:gr64 = COPY $rdi
%1:gr64 = COPY $rsi
CMP64rr %0, %1, implicit-def $eflags
%2:gr64 = COPY $eflags
; CHECK-NOT: COPY{{( killed)?}} $eflags
; CHECK: %[[A_REG:[^:]*]]:gr8 = SETAr implicit $eflags
; CHECK-NEXT: %[[B_REG:[^:]*]]:gr8 = SETBr implicit $eflags
; CHECK-NEXT: %[[E_REG:[^:]*]]:gr8 = SETEr implicit $eflags
; CHECK-NEXT: %[[NE_REG:[^:]*]]:gr8 = SETNEr implicit $eflags
; CHECK-NOT: COPY{{( killed)?}} $eflags
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
$eflags = COPY %2
%3:gr8 = SETAr implicit $eflags
%4:gr8 = SETBr implicit $eflags
%5:gr8 = SETEr implicit $eflags
SETNEm $rsp, 1, $noreg, -16, $noreg, implicit killed $eflags
[x86] Introduce a pass to begin more systematically fixing PR36028 and similar issues. The key idea is to lower COPY nodes populating EFLAGS by scanning the uses of EFLAGS and introducing dedicated code to preserve the necessary state in a GPR. In the vast majority of cases, these uses are cmovCC and jCC instructions. For such cases, we can very easily save and restore the necessary information by simply inserting a setCC into a GPR where the original flags are live, and then testing that GPR directly to feed the cmov or conditional branch. However, things are a bit more tricky if arithmetic is using the flags. This patch handles the vast majority of cases that seem to come up in practice: adc, adcx, adox, rcl, and rcr; all without taking advantage of partially preserved EFLAGS as LLVM doesn't currently model that at all. There are a large number of operations that techinaclly observe EFLAGS currently but shouldn't in this case -- they typically are using DF. Currently, they will not be handled by this approach. However, I have never seen this issue come up in practice. It is already pretty rare to have these patterns come up in practical code with LLVM. I had to resort to writing MIR tests to cover most of the logic in this pass already. I suspect even with its current amount of coverage of arithmetic users of EFLAGS it will be a significant improvement over the current use of pushf/popf. It will also produce substantially faster code in most of the common patterns. This patch also removes all of the old lowering for EFLAGS copies, and the hack that forced us to use a frame pointer when EFLAGS copies were found anywhere in a function so that the dynamic stack adjustment wasn't a problem. None of this is needed as we now lower all of these copies directly in MI and without require stack adjustments. Lots of thanks to Reid who came up with several aspects of this approach, and Craig who helped me work out a couple of things tripping me up while working on this. Differential Revision: https://reviews.llvm.org/D45146 llvm-svn: 329657
2018-04-10 09:41:17 +08:00
MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %3
MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %4
MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %5
; CHECK-NOT: $eflags =
; CHECK-NOT: = SET{{.*}}
; CHECK: MOV8mr {{.*}}, killed %[[A_REG]]
; CHECK-CHECK: MOV8mr {{.*}}, killed %[[B_REG]]
; CHECK-CHECK: MOV8mr {{.*}}, killed %[[E_REG]]
; CHECK-CHECK: MOV8mr {{.*}}, killed %[[NE_REG]]
RET 0
...
---
name: test_cmov
# CHECK-LABEL: name: test_cmov
liveins:
- { reg: '$rdi', virtual-reg: '%0' }
- { reg: '$rsi', virtual-reg: '%1' }
body: |
bb.0:
liveins: $rdi, $rsi
%0:gr64 = COPY $rdi
%1:gr64 = COPY $rsi
CMP64rr %0, %1, implicit-def $eflags
%2:gr64 = COPY $eflags
; CHECK-NOT: COPY{{( killed)?}} $eflags
; CHECK: %[[A_REG:[^:]*]]:gr8 = SETAr implicit $eflags
; CHECK-NEXT: %[[B_REG:[^:]*]]:gr8 = SETBr implicit $eflags
; CHECK-NEXT: %[[E_REG:[^:]*]]:gr8 = SETEr implicit $eflags
; CHECK-NOT: COPY{{( killed)?}} $eflags
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
$eflags = COPY %2
%3:gr64 = CMOVA64rr %0, %1, implicit $eflags
%4:gr64 = CMOVB64rr %0, %1, implicit $eflags
%5:gr64 = CMOVE64rr %0, %1, implicit $eflags
%6:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags
; CHECK-NOT: $eflags =
; CHECK: TEST8rr %[[A_REG]], %[[A_REG]], implicit-def $eflags
[x86] Introduce a pass to begin more systematically fixing PR36028 and similar issues. The key idea is to lower COPY nodes populating EFLAGS by scanning the uses of EFLAGS and introducing dedicated code to preserve the necessary state in a GPR. In the vast majority of cases, these uses are cmovCC and jCC instructions. For such cases, we can very easily save and restore the necessary information by simply inserting a setCC into a GPR where the original flags are live, and then testing that GPR directly to feed the cmov or conditional branch. However, things are a bit more tricky if arithmetic is using the flags. This patch handles the vast majority of cases that seem to come up in practice: adc, adcx, adox, rcl, and rcr; all without taking advantage of partially preserved EFLAGS as LLVM doesn't currently model that at all. There are a large number of operations that techinaclly observe EFLAGS currently but shouldn't in this case -- they typically are using DF. Currently, they will not be handled by this approach. However, I have never seen this issue come up in practice. It is already pretty rare to have these patterns come up in practical code with LLVM. I had to resort to writing MIR tests to cover most of the logic in this pass already. I suspect even with its current amount of coverage of arithmetic users of EFLAGS it will be a significant improvement over the current use of pushf/popf. It will also produce substantially faster code in most of the common patterns. This patch also removes all of the old lowering for EFLAGS copies, and the hack that forced us to use a frame pointer when EFLAGS copies were found anywhere in a function so that the dynamic stack adjustment wasn't a problem. None of this is needed as we now lower all of these copies directly in MI and without require stack adjustments. Lots of thanks to Reid who came up with several aspects of this approach, and Craig who helped me work out a couple of things tripping me up while working on this. Differential Revision: https://reviews.llvm.org/D45146 llvm-svn: 329657
2018-04-10 09:41:17 +08:00
; CHECK-NEXT: %3:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags
; CHECK-NEXT: TEST8rr %[[B_REG]], %[[B_REG]], implicit-def $eflags
[x86] Introduce a pass to begin more systematically fixing PR36028 and similar issues. The key idea is to lower COPY nodes populating EFLAGS by scanning the uses of EFLAGS and introducing dedicated code to preserve the necessary state in a GPR. In the vast majority of cases, these uses are cmovCC and jCC instructions. For such cases, we can very easily save and restore the necessary information by simply inserting a setCC into a GPR where the original flags are live, and then testing that GPR directly to feed the cmov or conditional branch. However, things are a bit more tricky if arithmetic is using the flags. This patch handles the vast majority of cases that seem to come up in practice: adc, adcx, adox, rcl, and rcr; all without taking advantage of partially preserved EFLAGS as LLVM doesn't currently model that at all. There are a large number of operations that techinaclly observe EFLAGS currently but shouldn't in this case -- they typically are using DF. Currently, they will not be handled by this approach. However, I have never seen this issue come up in practice. It is already pretty rare to have these patterns come up in practical code with LLVM. I had to resort to writing MIR tests to cover most of the logic in this pass already. I suspect even with its current amount of coverage of arithmetic users of EFLAGS it will be a significant improvement over the current use of pushf/popf. It will also produce substantially faster code in most of the common patterns. This patch also removes all of the old lowering for EFLAGS copies, and the hack that forced us to use a frame pointer when EFLAGS copies were found anywhere in a function so that the dynamic stack adjustment wasn't a problem. None of this is needed as we now lower all of these copies directly in MI and without require stack adjustments. Lots of thanks to Reid who came up with several aspects of this approach, and Craig who helped me work out a couple of things tripping me up while working on this. Differential Revision: https://reviews.llvm.org/D45146 llvm-svn: 329657
2018-04-10 09:41:17 +08:00
; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags
; CHECK-NEXT: TEST8rr %[[E_REG]], %[[E_REG]], implicit-def $eflags
[x86] Introduce a pass to begin more systematically fixing PR36028 and similar issues. The key idea is to lower COPY nodes populating EFLAGS by scanning the uses of EFLAGS and introducing dedicated code to preserve the necessary state in a GPR. In the vast majority of cases, these uses are cmovCC and jCC instructions. For such cases, we can very easily save and restore the necessary information by simply inserting a setCC into a GPR where the original flags are live, and then testing that GPR directly to feed the cmov or conditional branch. However, things are a bit more tricky if arithmetic is using the flags. This patch handles the vast majority of cases that seem to come up in practice: adc, adcx, adox, rcl, and rcr; all without taking advantage of partially preserved EFLAGS as LLVM doesn't currently model that at all. There are a large number of operations that techinaclly observe EFLAGS currently but shouldn't in this case -- they typically are using DF. Currently, they will not be handled by this approach. However, I have never seen this issue come up in practice. It is already pretty rare to have these patterns come up in practical code with LLVM. I had to resort to writing MIR tests to cover most of the logic in this pass already. I suspect even with its current amount of coverage of arithmetic users of EFLAGS it will be a significant improvement over the current use of pushf/popf. It will also produce substantially faster code in most of the common patterns. This patch also removes all of the old lowering for EFLAGS copies, and the hack that forced us to use a frame pointer when EFLAGS copies were found anywhere in a function so that the dynamic stack adjustment wasn't a problem. None of this is needed as we now lower all of these copies directly in MI and without require stack adjustments. Lots of thanks to Reid who came up with several aspects of this approach, and Craig who helped me work out a couple of things tripping me up while working on this. Differential Revision: https://reviews.llvm.org/D45146 llvm-svn: 329657
2018-04-10 09:41:17 +08:00
; CHECK-NEXT: %5:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags
; CHECK-NEXT: TEST8rr %[[E_REG]], %[[E_REG]], implicit-def $eflags
[x86] Introduce a pass to begin more systematically fixing PR36028 and similar issues. The key idea is to lower COPY nodes populating EFLAGS by scanning the uses of EFLAGS and introducing dedicated code to preserve the necessary state in a GPR. In the vast majority of cases, these uses are cmovCC and jCC instructions. For such cases, we can very easily save and restore the necessary information by simply inserting a setCC into a GPR where the original flags are live, and then testing that GPR directly to feed the cmov or conditional branch. However, things are a bit more tricky if arithmetic is using the flags. This patch handles the vast majority of cases that seem to come up in practice: adc, adcx, adox, rcl, and rcr; all without taking advantage of partially preserved EFLAGS as LLVM doesn't currently model that at all. There are a large number of operations that techinaclly observe EFLAGS currently but shouldn't in this case -- they typically are using DF. Currently, they will not be handled by this approach. However, I have never seen this issue come up in practice. It is already pretty rare to have these patterns come up in practical code with LLVM. I had to resort to writing MIR tests to cover most of the logic in this pass already. I suspect even with its current amount of coverage of arithmetic users of EFLAGS it will be a significant improvement over the current use of pushf/popf. It will also produce substantially faster code in most of the common patterns. This patch also removes all of the old lowering for EFLAGS copies, and the hack that forced us to use a frame pointer when EFLAGS copies were found anywhere in a function so that the dynamic stack adjustment wasn't a problem. None of this is needed as we now lower all of these copies directly in MI and without require stack adjustments. Lots of thanks to Reid who came up with several aspects of this approach, and Craig who helped me work out a couple of things tripping me up while working on this. Differential Revision: https://reviews.llvm.org/D45146 llvm-svn: 329657
2018-04-10 09:41:17 +08:00
; CHECK-NEXT: %6:gr64 = CMOVE64rr %0, %1, implicit killed $eflags
MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %3
MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %4
MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5
MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %6
RET 0
...
---
name: test_adc
# CHECK-LABEL: name: test_adc
liveins:
- { reg: '$rdi', virtual-reg: '%0' }
- { reg: '$rsi', virtual-reg: '%1' }
body: |
bb.0:
liveins: $rdi, $rsi
%0:gr64 = COPY $rdi
%1:gr64 = COPY $rsi
%2:gr64 = ADD64rr %0, %1, implicit-def $eflags
%3:gr64 = COPY $eflags
; CHECK-NOT: COPY{{( killed)?}} $eflags
; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit $eflags
; CHECK-NOT: COPY{{( killed)?}} $eflags
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
$eflags = COPY %3
%4:gr64 = ADC64ri32 %2:gr64, 42, implicit-def $eflags, implicit $eflags
%5:gr64 = ADC64ri32 %4:gr64, 42, implicit-def $eflags, implicit $eflags
; CHECK-NOT: $eflags =
; CHECK: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags
; CHECK-NEXT: %4:gr64 = ADC64ri32 %2, 42, implicit-def $eflags, implicit killed $eflags
; CHECK-NEXT: %5:gr64 = ADC64ri32 %4, 42, implicit-def{{( dead)?}} $eflags, implicit{{( killed)?}} $eflags
MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5
RET 0
...
---
name: test_sbb
# CHECK-LABEL: name: test_sbb
liveins:
- { reg: '$rdi', virtual-reg: '%0' }
- { reg: '$rsi', virtual-reg: '%1' }
body: |
bb.0:
liveins: $rdi, $rsi
%0:gr64 = COPY $rdi
%1:gr64 = COPY $rsi
%2:gr64 = SUB64rr %0, %1, implicit-def $eflags
%3:gr64 = COPY killed $eflags
; CHECK-NOT: COPY{{( killed)?}} $eflags
; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit $eflags
; CHECK-NOT: COPY{{( killed)?}} $eflags
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
$eflags = COPY %3
%4:gr64 = SBB64ri32 %2:gr64, 42, implicit-def $eflags, implicit killed $eflags
%5:gr64 = SBB64ri32 %4:gr64, 42, implicit-def dead $eflags, implicit killed $eflags
; CHECK-NOT: $eflags =
; CHECK: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags
; CHECK-NEXT: %4:gr64 = SBB64ri32 %2, 42, implicit-def $eflags, implicit killed $eflags
; CHECK-NEXT: %5:gr64 = SBB64ri32 %4, 42, implicit-def{{( dead)?}} $eflags, implicit{{( killed)?}} $eflags
MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5
RET 0
...
---
name: test_adcx
# CHECK-LABEL: name: test_adcx
liveins:
- { reg: '$rdi', virtual-reg: '%0' }
- { reg: '$rsi', virtual-reg: '%1' }
body: |
bb.0:
liveins: $rdi, $rsi
%0:gr64 = COPY $rdi
%1:gr64 = COPY $rsi
%2:gr64 = ADD64rr %0, %1, implicit-def $eflags
%3:gr64 = COPY $eflags
; CHECK-NOT: COPY{{( killed)?}} $eflags
; CHECK: %[[E_REG:[^:]*]]:gr8 = SETEr implicit $eflags
; CHECK-NEXT: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit $eflags
; CHECK-NOT: COPY{{( killed)?}} $eflags
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
$eflags = COPY %3
%4:gr64 = CMOVE64rr %0, %1, implicit $eflags
%5:gr64 = MOV64ri32 42
%6:gr64 = ADCX64rr %2, %5, implicit-def $eflags, implicit $eflags
; CHECK-NOT: $eflags =
; CHECK: TEST8rr %[[E_REG]], %[[E_REG]], implicit-def $eflags
[x86] Introduce a pass to begin more systematically fixing PR36028 and similar issues. The key idea is to lower COPY nodes populating EFLAGS by scanning the uses of EFLAGS and introducing dedicated code to preserve the necessary state in a GPR. In the vast majority of cases, these uses are cmovCC and jCC instructions. For such cases, we can very easily save and restore the necessary information by simply inserting a setCC into a GPR where the original flags are live, and then testing that GPR directly to feed the cmov or conditional branch. However, things are a bit more tricky if arithmetic is using the flags. This patch handles the vast majority of cases that seem to come up in practice: adc, adcx, adox, rcl, and rcr; all without taking advantage of partially preserved EFLAGS as LLVM doesn't currently model that at all. There are a large number of operations that techinaclly observe EFLAGS currently but shouldn't in this case -- they typically are using DF. Currently, they will not be handled by this approach. However, I have never seen this issue come up in practice. It is already pretty rare to have these patterns come up in practical code with LLVM. I had to resort to writing MIR tests to cover most of the logic in this pass already. I suspect even with its current amount of coverage of arithmetic users of EFLAGS it will be a significant improvement over the current use of pushf/popf. It will also produce substantially faster code in most of the common patterns. This patch also removes all of the old lowering for EFLAGS copies, and the hack that forced us to use a frame pointer when EFLAGS copies were found anywhere in a function so that the dynamic stack adjustment wasn't a problem. None of this is needed as we now lower all of these copies directly in MI and without require stack adjustments. Lots of thanks to Reid who came up with several aspects of this approach, and Craig who helped me work out a couple of things tripping me up while working on this. Differential Revision: https://reviews.llvm.org/D45146 llvm-svn: 329657
2018-04-10 09:41:17 +08:00
; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags
; CHECK-NEXT: %5:gr64 = MOV64ri32 42
; CHECK-NEXT: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags
; CHECK-NEXT: %6:gr64 = ADCX64rr %2, %5, implicit-def{{( dead)?}} $eflags, implicit killed $eflags
MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %4
MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %6
RET 0
...
---
name: test_adox
# CHECK-LABEL: name: test_adox
liveins:
- { reg: '$rdi', virtual-reg: '%0' }
- { reg: '$rsi', virtual-reg: '%1' }
body: |
bb.0:
liveins: $rdi, $rsi
%0:gr64 = COPY $rdi
%1:gr64 = COPY $rsi
%2:gr64 = ADD64rr %0, %1, implicit-def $eflags
%3:gr64 = COPY $eflags
; CHECK-NOT: COPY{{( killed)?}} $eflags
; CHECK: %[[E_REG:[^:]*]]:gr8 = SETEr implicit $eflags
; CHECK-NEXT: %[[OF_REG:[^:]*]]:gr8 = SETOr implicit $eflags
; CHECK-NOT: COPY{{( killed)?}} $eflags
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
$eflags = COPY %3
%4:gr64 = CMOVE64rr %0, %1, implicit $eflags
%5:gr64 = MOV64ri32 42
%6:gr64 = ADOX64rr %2, %5, implicit-def $eflags, implicit $eflags
; CHECK-NOT: $eflags =
; CHECK: TEST8rr %[[E_REG]], %[[E_REG]], implicit-def $eflags
[x86] Introduce a pass to begin more systematically fixing PR36028 and similar issues. The key idea is to lower COPY nodes populating EFLAGS by scanning the uses of EFLAGS and introducing dedicated code to preserve the necessary state in a GPR. In the vast majority of cases, these uses are cmovCC and jCC instructions. For such cases, we can very easily save and restore the necessary information by simply inserting a setCC into a GPR where the original flags are live, and then testing that GPR directly to feed the cmov or conditional branch. However, things are a bit more tricky if arithmetic is using the flags. This patch handles the vast majority of cases that seem to come up in practice: adc, adcx, adox, rcl, and rcr; all without taking advantage of partially preserved EFLAGS as LLVM doesn't currently model that at all. There are a large number of operations that techinaclly observe EFLAGS currently but shouldn't in this case -- they typically are using DF. Currently, they will not be handled by this approach. However, I have never seen this issue come up in practice. It is already pretty rare to have these patterns come up in practical code with LLVM. I had to resort to writing MIR tests to cover most of the logic in this pass already. I suspect even with its current amount of coverage of arithmetic users of EFLAGS it will be a significant improvement over the current use of pushf/popf. It will also produce substantially faster code in most of the common patterns. This patch also removes all of the old lowering for EFLAGS copies, and the hack that forced us to use a frame pointer when EFLAGS copies were found anywhere in a function so that the dynamic stack adjustment wasn't a problem. None of this is needed as we now lower all of these copies directly in MI and without require stack adjustments. Lots of thanks to Reid who came up with several aspects of this approach, and Craig who helped me work out a couple of things tripping me up while working on this. Differential Revision: https://reviews.llvm.org/D45146 llvm-svn: 329657
2018-04-10 09:41:17 +08:00
; CHECK-NEXT: %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags
; CHECK-NEXT: %5:gr64 = MOV64ri32 42
; CHECK-NEXT: dead %{{[^:]*}}:gr8 = ADD8ri %[[OF_REG]], 127, implicit-def $eflags
; CHECK-NEXT: %6:gr64 = ADOX64rr %2, %5, implicit-def{{( dead)?}} $eflags, implicit killed $eflags
MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %4
MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %6
RET 0
...
---
name: test_rcl
# CHECK-LABEL: name: test_rcl
liveins:
- { reg: '$rdi', virtual-reg: '%0' }
- { reg: '$rsi', virtual-reg: '%1' }
body: |
bb.0:
liveins: $rdi, $rsi
%0:gr64 = COPY $rdi
%1:gr64 = COPY $rsi
%2:gr64 = ADD64rr %0, %1, implicit-def $eflags
%3:gr64 = COPY $eflags
; CHECK-NOT: COPY{{( killed)?}} $eflags
; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit $eflags
; CHECK-NOT: COPY{{( killed)?}} $eflags
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
$eflags = COPY %3
%4:gr64 = RCL64r1 %2:gr64, implicit-def $eflags, implicit $eflags
%5:gr64 = RCL64r1 %4:gr64, implicit-def $eflags, implicit $eflags
; CHECK-NOT: $eflags =
; CHECK: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags
; CHECK-NEXT: %4:gr64 = RCL64r1 %2, implicit-def $eflags, implicit killed $eflags
; CHECK-NEXT: %5:gr64 = RCL64r1 %4, implicit-def{{( dead)?}} $eflags, implicit{{( killed)?}} $eflags
MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5
RET 0
...
---
name: test_rcr
# CHECK-LABEL: name: test_rcr
liveins:
- { reg: '$rdi', virtual-reg: '%0' }
- { reg: '$rsi', virtual-reg: '%1' }
body: |
bb.0:
liveins: $rdi, $rsi
%0:gr64 = COPY $rdi
%1:gr64 = COPY $rsi
%2:gr64 = ADD64rr %0, %1, implicit-def $eflags
%3:gr64 = COPY $eflags
; CHECK-NOT: COPY{{( killed)?}} $eflags
; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit $eflags
; CHECK-NOT: COPY{{( killed)?}} $eflags
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
$eflags = COPY %3
%4:gr64 = RCR64r1 %2:gr64, implicit-def $eflags, implicit $eflags
%5:gr64 = RCR64r1 %4:gr64, implicit-def $eflags, implicit $eflags
; CHECK-NOT: $eflags =
; CHECK: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags
; CHECK-NEXT: %4:gr64 = RCR64r1 %2, implicit-def $eflags, implicit killed $eflags
; CHECK-NEXT: %5:gr64 = RCR64r1 %4, implicit-def{{( dead)?}} $eflags, implicit{{( killed)?}} $eflags
MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5
RET 0
...