forked from OSchip/llvm-project
[AArch64] Fix i64 nontemporal high-half extraction.
Since we only have pair - not single - nontemporal store instructions, we have to extract the high part into a separate register to be able to use them. When the initial nontemporal codegen support was added, I wrote the extract using the nonsensical UBFX [0,32[. Use the correct LSR form instead. llvm-svn: 259134
This commit is contained in:
parent
bcf27523f5
commit
53010a0d5b
|
@ -5982,7 +5982,7 @@ def : NTStore64Pat<v8i8>;
|
|||
def : Pat<(nontemporalstore GPR64:$Rt,
|
||||
(am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
|
||||
(STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32),
|
||||
(EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 0, 31), sub_32),
|
||||
(EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 32, 63), sub_32),
|
||||
GPR64sp:$Rn, simm7s4:$offset)>;
|
||||
} // AddedComplexity=10
|
||||
} // Predicates = [IsLE]
|
||||
|
|
|
@ -112,7 +112,7 @@ define void @test_stnp_v1i64(<1 x i64>* %p, <1 x i64> %v) #0 {
|
|||
|
||||
define void @test_stnp_i64(i64* %p, i64 %v) #0 {
|
||||
; CHECK-LABEL: test_stnp_i64:
|
||||
; CHECK-NEXT: ubfx x[[HI:[0-9]+]], x1, #0, #32
|
||||
; CHECK-NEXT: lsr x[[HI:[0-9]+]], x1, #32
|
||||
; CHECK-NEXT: stnp w1, w[[HI]], [x0]
|
||||
; CHECK-NEXT: ret
|
||||
store i64 %v, i64* %p, align 1, !nontemporal !0
|
||||
|
@ -162,7 +162,7 @@ define void @test_stnp_v2f32_offset_neg(<2 x float>* %p, <2 x float> %v) #0 {
|
|||
|
||||
define void @test_stnp_i64_offset(i64* %p, i64 %v) #0 {
|
||||
; CHECK-LABEL: test_stnp_i64_offset:
|
||||
; CHECK-NEXT: ubfx x[[HI:[0-9]+]], x1, #0, #32
|
||||
; CHECK-NEXT: lsr x[[HI:[0-9]+]], x1, #32
|
||||
; CHECK-NEXT: stnp w1, w[[HI]], [x0, #8]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp0 = getelementptr i64, i64* %p, i32 1
|
||||
|
@ -172,7 +172,7 @@ define void @test_stnp_i64_offset(i64* %p, i64 %v) #0 {
|
|||
|
||||
define void @test_stnp_i64_offset_neg(i64* %p, i64 %v) #0 {
|
||||
; CHECK-LABEL: test_stnp_i64_offset_neg:
|
||||
; CHECK-NEXT: ubfx x[[HI:[0-9]+]], x1, #0, #32
|
||||
; CHECK-NEXT: lsr x[[HI:[0-9]+]], x1, #32
|
||||
; CHECK-NEXT: stnp w1, w[[HI]], [x0, #-8]
|
||||
; CHECK-NEXT: ret
|
||||
%tmp0 = getelementptr i64, i64* %p, i32 -1
|
||||
|
|
Loading…
Reference in New Issue