Revert "[GlobalISel][Localizer] Enable intra-block localization of already-local uses."

This reverts commit e91e1df6ab.
This commit is contained in:
Muhammad Omair Javaid 2020-03-05 03:12:28 +05:00
parent 50b8088ba2
commit 5583c2f2fb
12 changed files with 190 additions and 220 deletions

View File

@ -86,13 +86,8 @@ bool Localizer::localizeInterBlock(MachineFunction &MF,
LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent();
dbgs() << "Checking use: " << MIUse
<< " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n');
if (isLocalUse(MOUse, MI, InsertMBB)) {
// Even if we're in the same block, if the block is very large we could
// still have many long live ranges. Try to do intra-block localization
// too.
LocalizedInstrs.insert(&MI);
if (isLocalUse(MOUse, MI, InsertMBB))
continue;
}
LLVM_DEBUG(dbgs() << "Fixing non-local use\n");
Changed = true;
auto MBBAndReg = std::make_pair(InsertMBB, Reg);

View File

@ -19,14 +19,14 @@ define i32 @foo() {
; CHECK: bb.1.entry:
; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; CHECK: [[GV:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var1
; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2
; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1
; CHECK: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2
; CHECK: [[GV1:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2
; CHECK: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3
; CHECK: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3
; CHECK: [[GV2:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3
; CHECK: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0
; CHECK: [[C3:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0
; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load 4 from @var1)
; CHECK: [[C3:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1
; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]]
; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(ne), [[LOAD]](s32), [[C]]
; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[ICMP]](s32)
; CHECK: G_BRCOND [[TRUNC]](s1), %bb.3
; CHECK: bb.2.if.then:

View File

@ -39,7 +39,6 @@
}
define void @test_inttoptr() { ret void }
define void @many_local_use_intra_block() { ret void }
...
@ -336,15 +335,15 @@ body: |
; CHECK-LABEL: name: intrablock_with_globalvalue
; CHECK: bb.0.entry:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2
; CHECK: [[GV:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2
; CHECK: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3
; CHECK: [[GV1:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3
; CHECK: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0
; CHECK: [[GV2:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var1
; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[GV2]](p0) :: (load 4 from @var1)
; CHECK: [[C3:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1
; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(eq), [[LOAD]](s32), [[C3]]
; CHECK: [[GV:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var1
; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1
; CHECK: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2
; CHECK: [[GV1:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2
; CHECK: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3
; CHECK: [[GV2:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3
; CHECK: [[C3:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0
; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[GV]](p0) :: (load 4 from @var1)
; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(eq), [[LOAD]](s32), [[C]]
; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[ICMP]](s32)
; CHECK: G_BRCOND [[TRUNC]](s1), %bb.1
; CHECK: G_BR %bb.2
@ -404,12 +403,12 @@ body: |
; CHECK: liveins: $w0, $x1
; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
; CHECK: [[COPY1:%[0-9]+]]:gpr(p0) = COPY $x1
; CHECK: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 0
; CHECK: [[INTTOPTR:%[0-9]+]]:gpr(p0) = G_INTTOPTR [[C]](s64)
; CHECK: [[C1:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 128
; CHECK: [[INTTOPTR1:%[0-9]+]]:gpr(p0) = G_INTTOPTR [[C1]](s64)
; CHECK: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0
; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C2]]
; CHECK: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 128
; CHECK: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0
; CHECK: [[C2:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 0
; CHECK: [[INTTOPTR:%[0-9]+]]:gpr(p0) = G_INTTOPTR [[C2]](s64)
; CHECK: [[INTTOPTR1:%[0-9]+]]:gpr(p0) = G_INTTOPTR [[C]](s64)
; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C1]]
; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[ICMP]](s32)
; CHECK: G_BRCOND [[TRUNC]](s1), %bb.1
; CHECK: G_BR %bb.2
@ -451,28 +450,3 @@ body: |
RET_ReallyLR implicit $x0
...
---
name: many_local_use_intra_block
legalized: true
regBankSelected: true
body: |
bb.0:
; CHECK-LABEL: name: many_local_use_intra_block
; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1
; CHECK: [[ADD:%[0-9]+]]:gpr(s32) = G_ADD [[C]], [[C]]
; CHECK: [[ADD1:%[0-9]+]]:gpr(s32) = G_ADD [[C]], [[C]]
; CHECK: [[ADD2:%[0-9]+]]:gpr(s32) = G_ADD [[C]], [[C]]
; CHECK: [[ADD3:%[0-9]+]]:gpr(s32) = G_ADD [[C]], [[C]]
; CHECK: [[ADD4:%[0-9]+]]:gpr(s32) = G_ADD [[C]], [[C]]
; CHECK: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2
; CHECK: [[ADD5:%[0-9]+]]:gpr(s32) = G_ADD [[C1]], [[C1]]
%0:gpr(s32) = G_CONSTANT i32 1
%1:gpr(s32) = G_CONSTANT i32 2
%2:gpr(s32) = G_ADD %0, %0
%3:gpr(s32) = G_ADD %0, %0
%4:gpr(s32) = G_ADD %0, %0
%5:gpr(s32) = G_ADD %0, %0
%6:gpr(s32) = G_ADD %0, %0
%7:gpr(s32) = G_ADD %1, %1
...

View File

@ -8,9 +8,9 @@ declare void @free(i8*)
; that takes a swifterror parameter and "caller" is the caller of "foo".
define float @foo(%swift_error** swifterror %error_ptr_ref) {
; CHECK-LABEL: foo:
; CHECK: mov [[ID:w[0-9]+]], #1
; CHECK: mov w0, #16
; CHECK: malloc
; CHECK: mov [[ID:w[0-9]+]], #1
; CHECK: strb [[ID]], [x0, #8]
; CHECK: mov x21, x0
; CHECK-NOT: x21
@ -164,9 +164,9 @@ bb_end:
define void @foo_sret(%struct.S* sret %agg.result, i32 %val1, %swift_error** swifterror %error_ptr_ref) {
; CHECK-LABEL: foo_sret:
; CHECK: mov [[SRET:x[0-9]+]], x8
; CHECK: mov [[ID:w[0-9]+]], #1
; CHECK: mov w0, #16
; CHECK: malloc
; CHECK: mov [[ID:w[0-9]+]], #1
; CHECK: strb [[ID]], [x0, #8]
; CHECK: str w{{.*}}, [{{.*}}[[SRET]], #4]
; CHECK: mov x21, x0
@ -220,9 +220,9 @@ handler:
declare void @llvm.va_start(i8*) nounwind
define float @foo_vararg(%swift_error** swifterror %error_ptr_ref, ...) {
; CHECK-LABEL: foo_vararg:
; CHECK-DAG: mov [[ID:w[0-9]+]], #1
; CHECK: mov w0, #16
; CHECK: malloc
; CHECK-DAG: mov [[ID:w[0-9]+]], #1
; CHECK-DAG: strb [[ID]], [x0, #8]
; First vararg

View File

@ -9,12 +9,12 @@ define void @test_const(%dag* %dst) {
; CHECK: liveins: $x0
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 10
; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 0 :: (store 1 into %ir.dst)
; CHECK: [[MOVi32imm1:%[0-9]+]]:gpr32 = MOVi32imm 20
; CHECK: [[MOVi32imm2:%[0-9]+]]:gpr32 = MOVi32imm 50
; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 0 :: (store 1 into %ir.dst)
; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 1 :: (store 1 into %ir.dst + 1)
; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 2 :: (store 1 into %ir.dst + 2)
; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 3 :: (store 1 into %ir.dst + 3)
; CHECK: [[MOVi32imm2:%[0-9]+]]:gpr32 = MOVi32imm 50
; CHECK: STRBBui [[MOVi32imm2]], [[COPY]], 4 :: (store 1 into %ir.dst + 4)
; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 5 :: (store 1 into %ir.dst + 5)
; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 6 :: (store 1 into %ir.dst + 6)

View File

@ -121,10 +121,11 @@ define { [3 x float] } @test_add_elem() {
; SDAG: fmov s2, #1.0
; SDAG: ret
; GISEL-LABEL: test_add_elem:
; GISEL: str x30, [sp, #-16]!
; GISEL: fmov s8, #1.00000000
; GISEL: bl get_vec2
; GISEL: fmov s2, #1.0
; GISEL: ldr x30, [sp], #16
; GISEL: ldr x30, [sp, #8]
; GISEL: mov v2.16b, v8.16b
; GISEL: ldr d8, [sp], #16
; GISEL: ret
%call = tail call { [2 x float] } @get_vec2()

View File

@ -60,9 +60,9 @@ define void @foo2() {
;
; CHECK-GLOBISEL-LABEL: foo2:
; CHECK-GLOBISEL: // %bb.0: // %entry
; CHECK-GLOBISEL-NEXT: adr x8, ptr
; CHECK-GLOBISEL-NEXT: adr x9, dst
; CHECK-GLOBISEL-NEXT: str x9, [x8]
; CHECK-GLOBISEL-NEXT: adr x8, dst
; CHECK-GLOBISEL-NEXT: adr x9, ptr
; CHECK-GLOBISEL-NEXT: str x8, [x9]
; CHECK-GLOBISEL-NEXT: ret
;
; CHECK-PIC-LABEL: foo2:
@ -74,9 +74,9 @@ define void @foo2() {
;
; CHECK-PIC-GLOBISEL-LABEL: foo2:
; CHECK-PIC-GLOBISEL: // %bb.0: // %entry
; CHECK-PIC-GLOBISEL-NEXT: ldr x8, :got:ptr
; CHECK-PIC-GLOBISEL-NEXT: ldr x9, :got:dst
; CHECK-PIC-GLOBISEL-NEXT: str x9, [x8]
; CHECK-PIC-GLOBISEL-NEXT: ldr x8, :got:dst
; CHECK-PIC-GLOBISEL-NEXT: ldr x9, :got:ptr
; CHECK-PIC-GLOBISEL-NEXT: str x8, [x9]
; CHECK-PIC-GLOBISEL-NEXT: ret
entry:
store i8* getelementptr inbounds ([65536 x i8], [65536 x i8]* @dst, i64 0, i64 0), i8** @ptr, align 8
@ -179,9 +179,9 @@ define void @bar2() {
;
; CHECK-GLOBISEL-LABEL: bar2:
; CHECK-GLOBISEL: // %bb.0: // %entry
; CHECK-GLOBISEL-NEXT: adr x8, lptr
; CHECK-GLOBISEL-NEXT: adr x9, ldst
; CHECK-GLOBISEL-NEXT: str x9, [x8]
; CHECK-GLOBISEL-NEXT: adr x8, ldst
; CHECK-GLOBISEL-NEXT: adr x9, lptr
; CHECK-GLOBISEL-NEXT: str x8, [x9]
; CHECK-GLOBISEL-NEXT: ret
;
; CHECK-PIC-LABEL: bar2:
@ -193,9 +193,9 @@ define void @bar2() {
;
; CHECK-PIC-GLOBISEL-LABEL: bar2:
; CHECK-PIC-GLOBISEL: // %bb.0: // %entry
; CHECK-PIC-GLOBISEL-NEXT: adr x8, lptr
; CHECK-PIC-GLOBISEL-NEXT: adr x9, ldst
; CHECK-PIC-GLOBISEL-NEXT: str x9, [x8]
; CHECK-PIC-GLOBISEL-NEXT: adr x8, ldst
; CHECK-PIC-GLOBISEL-NEXT: adr x9, lptr
; CHECK-PIC-GLOBISEL-NEXT: str x8, [x9]
; CHECK-PIC-GLOBISEL-NEXT: ret
entry:
store i8* @ldst, i8** @lptr, align 8
@ -297,9 +297,9 @@ define void @baz2() {
;
; CHECK-GLOBISEL-LABEL: baz2:
; CHECK-GLOBISEL: // %bb.0: // %entry
; CHECK-GLOBISEL-NEXT: adr x8, lptr
; CHECK-GLOBISEL-NEXT: adr x9, lbdst
; CHECK-GLOBISEL-NEXT: str x9, [x8]
; CHECK-GLOBISEL-NEXT: adr x8, lbdst
; CHECK-GLOBISEL-NEXT: adr x9, lptr
; CHECK-GLOBISEL-NEXT: str x8, [x9]
; CHECK-GLOBISEL-NEXT: ret
;
; CHECK-PIC-LABEL: baz2:
@ -311,9 +311,9 @@ define void @baz2() {
;
; CHECK-PIC-GLOBISEL-LABEL: baz2:
; CHECK-PIC-GLOBISEL: // %bb.0: // %entry
; CHECK-PIC-GLOBISEL-NEXT: adr x8, lptr
; CHECK-PIC-GLOBISEL-NEXT: adr x9, lbdst
; CHECK-PIC-GLOBISEL-NEXT: str x9, [x8]
; CHECK-PIC-GLOBISEL-NEXT: adr x8, lbdst
; CHECK-PIC-GLOBISEL-NEXT: adr x9, lptr
; CHECK-PIC-GLOBISEL-NEXT: str x8, [x9]
; CHECK-PIC-GLOBISEL-NEXT: ret
entry:
store i8* getelementptr inbounds ([65536 x i8], [65536 x i8]* @lbdst, i64 0, i64 0), i8** @lptr, align 8

View File

@ -200,12 +200,12 @@ define amdgpu_ps float @fcmp_s_s(float inreg %src0, float inreg %src1) {
; GFX9: liveins: $sgpr2, $sgpr3
; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; GFX9: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
; GFX9: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; GFX9: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY2]]
; GFX9: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
; GFX9: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; GFX9: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY3]], [[COPY4]]
; GFX9: $vgpr0 = COPY [[SELECT]](s32)
; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
@ -214,12 +214,12 @@ define amdgpu_ps float @fcmp_s_s(float inreg %src0, float inreg %src1) {
; GFX10: liveins: $sgpr2, $sgpr3
; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; GFX10: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
; GFX10: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; GFX10: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY2]]
; GFX10: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
; GFX10: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; GFX10: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY3]], [[COPY4]]
; GFX10: $vgpr0 = COPY [[SELECT]](s32)
; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
@ -336,13 +336,13 @@ define amdgpu_ps float @class_s_s(float inreg %src0, i32 inreg %src1) {
; GFX9: liveins: $sgpr2, $sgpr3
; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; GFX9: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
; GFX9: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; GFX9: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[COPY2]](s32), [[COPY3]](s32)
; GFX9: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
; GFX9: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; GFX9: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; GFX9: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; GFX9: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[INT]](s1), [[COPY4]], [[COPY5]]
; GFX9: $vgpr0 = COPY [[SELECT]](s32)
; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0
@ -351,13 +351,13 @@ define amdgpu_ps float @class_s_s(float inreg %src0, i32 inreg %src1) {
; GFX10: liveins: $sgpr2, $sgpr3
; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; GFX10: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
; GFX10: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; GFX10: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[COPY2]](s32), [[COPY3]](s32)
; GFX10: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
; GFX10: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; GFX10: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; GFX10: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
; GFX10: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[INT]](s1), [[COPY4]], [[COPY5]]
; GFX10: $vgpr0 = COPY [[SELECT]](s32)
; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0

View File

@ -6,14 +6,14 @@ define float @dyn_extract_v8f32_const_s_v(i32 %sel) {
; GPRIDX-LABEL: dyn_extract_v8f32_const_s_v:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000
; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000
; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000
; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000
; GPRIDX-NEXT: s_mov_b32 s7, 4.0
; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000
; GPRIDX-NEXT: s_mov_b32 s5, 2.0
; GPRIDX-NEXT: s_mov_b32 s4, 1.0
; GPRIDX-NEXT: s_mov_b32 s5, 2.0
; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000
; GPRIDX-NEXT: s_mov_b32 s7, 4.0
; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000
; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000
; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000
; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000
; GPRIDX-NEXT: s_mov_b64 s[12:13], exec
; GPRIDX-NEXT: BB0_1: ; =>This Inner Loop Header: Depth=1
; GPRIDX-NEXT: v_readfirstlane_b32 s14, v0
@ -32,14 +32,14 @@ define float @dyn_extract_v8f32_const_s_v(i32 %sel) {
; MOVREL-LABEL: dyn_extract_v8f32_const_s_v:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; MOVREL-NEXT: s_mov_b32 s11, 0x41000000
; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000
; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000
; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000
; MOVREL-NEXT: s_mov_b32 s7, 4.0
; MOVREL-NEXT: s_mov_b32 s6, 0x40400000
; MOVREL-NEXT: s_mov_b32 s5, 2.0
; MOVREL-NEXT: s_mov_b32 s4, 1.0
; MOVREL-NEXT: s_mov_b32 s5, 2.0
; MOVREL-NEXT: s_mov_b32 s6, 0x40400000
; MOVREL-NEXT: s_mov_b32 s7, 4.0
; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000
; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000
; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000
; MOVREL-NEXT: s_mov_b32 s11, 0x41000000
; MOVREL-NEXT: s_mov_b64 s[12:13], exec
; MOVREL-NEXT: BB0_1: ; =>This Inner Loop Header: Depth=1
; MOVREL-NEXT: v_readfirstlane_b32 s14, v0
@ -64,13 +64,13 @@ define amdgpu_ps float @dyn_extract_v8f32_const_s_s(i32 inreg %sel) {
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b32 s4, 1.0
; GPRIDX-NEXT: s_mov_b32 m0, s2
; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000
; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000
; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000
; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000
; GPRIDX-NEXT: s_mov_b32 s7, 4.0
; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000
; GPRIDX-NEXT: s_mov_b32 s5, 2.0
; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000
; GPRIDX-NEXT: s_mov_b32 s7, 4.0
; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000
; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000
; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000
; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000
; GPRIDX-NEXT: s_movrels_b32 s0, s4
; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
; GPRIDX-NEXT: ; return to shader part epilog
@ -79,13 +79,13 @@ define amdgpu_ps float @dyn_extract_v8f32_const_s_s(i32 inreg %sel) {
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_mov_b32 s4, 1.0
; MOVREL-NEXT: s_mov_b32 m0, s2
; MOVREL-NEXT: s_mov_b32 s11, 0x41000000
; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000
; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000
; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000
; MOVREL-NEXT: s_mov_b32 s7, 4.0
; MOVREL-NEXT: s_mov_b32 s6, 0x40400000
; MOVREL-NEXT: s_mov_b32 s5, 2.0
; MOVREL-NEXT: s_mov_b32 s6, 0x40400000
; MOVREL-NEXT: s_mov_b32 s7, 4.0
; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000
; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000
; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000
; MOVREL-NEXT: s_mov_b32 s11, 0x41000000
; MOVREL-NEXT: s_movrels_b32 s0, s4
; MOVREL-NEXT: v_mov_b32_e32 v0, s0
; MOVREL-NEXT: ; return to shader part epilog
@ -246,14 +246,14 @@ define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) {
; GPRIDX-LABEL: dyn_extract_v8i64_const_s_v:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GPRIDX-NEXT: s_mov_b64 s[18:19], 8
; GPRIDX-NEXT: s_mov_b64 s[16:17], 7
; GPRIDX-NEXT: s_mov_b64 s[14:15], 6
; GPRIDX-NEXT: s_mov_b64 s[12:13], 5
; GPRIDX-NEXT: s_mov_b64 s[10:11], 4
; GPRIDX-NEXT: s_mov_b64 s[8:9], 3
; GPRIDX-NEXT: s_mov_b64 s[6:7], 2
; GPRIDX-NEXT: s_mov_b64 s[4:5], 1
; GPRIDX-NEXT: s_mov_b64 s[6:7], 2
; GPRIDX-NEXT: s_mov_b64 s[8:9], 3
; GPRIDX-NEXT: s_mov_b64 s[10:11], 4
; GPRIDX-NEXT: s_mov_b64 s[12:13], 5
; GPRIDX-NEXT: s_mov_b64 s[14:15], 6
; GPRIDX-NEXT: s_mov_b64 s[16:17], 7
; GPRIDX-NEXT: s_mov_b64 s[18:19], 8
; GPRIDX-NEXT: s_mov_b64 s[20:21], exec
; GPRIDX-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1
; GPRIDX-NEXT: v_readfirstlane_b32 s22, v0
@ -273,14 +273,14 @@ define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) {
; MOVREL-LABEL: dyn_extract_v8i64_const_s_v:
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; MOVREL-NEXT: s_mov_b64 s[18:19], 8
; MOVREL-NEXT: s_mov_b64 s[16:17], 7
; MOVREL-NEXT: s_mov_b64 s[14:15], 6
; MOVREL-NEXT: s_mov_b64 s[12:13], 5
; MOVREL-NEXT: s_mov_b64 s[10:11], 4
; MOVREL-NEXT: s_mov_b64 s[8:9], 3
; MOVREL-NEXT: s_mov_b64 s[6:7], 2
; MOVREL-NEXT: s_mov_b64 s[4:5], 1
; MOVREL-NEXT: s_mov_b64 s[6:7], 2
; MOVREL-NEXT: s_mov_b64 s[8:9], 3
; MOVREL-NEXT: s_mov_b64 s[10:11], 4
; MOVREL-NEXT: s_mov_b64 s[12:13], 5
; MOVREL-NEXT: s_mov_b64 s[14:15], 6
; MOVREL-NEXT: s_mov_b64 s[16:17], 7
; MOVREL-NEXT: s_mov_b64 s[18:19], 8
; MOVREL-NEXT: s_mov_b64 s[20:21], exec
; MOVREL-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1
; MOVREL-NEXT: v_readfirstlane_b32 s22, v0
@ -306,13 +306,13 @@ define amdgpu_ps void @dyn_extract_v8i64_const_s_s(i32 inreg %sel) {
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_mov_b64 s[4:5], 1
; GPRIDX-NEXT: s_mov_b32 m0, s2
; GPRIDX-NEXT: s_mov_b64 s[18:19], 8
; GPRIDX-NEXT: s_mov_b64 s[16:17], 7
; GPRIDX-NEXT: s_mov_b64 s[14:15], 6
; GPRIDX-NEXT: s_mov_b64 s[12:13], 5
; GPRIDX-NEXT: s_mov_b64 s[10:11], 4
; GPRIDX-NEXT: s_mov_b64 s[8:9], 3
; GPRIDX-NEXT: s_mov_b64 s[6:7], 2
; GPRIDX-NEXT: s_mov_b64 s[8:9], 3
; GPRIDX-NEXT: s_mov_b64 s[10:11], 4
; GPRIDX-NEXT: s_mov_b64 s[12:13], 5
; GPRIDX-NEXT: s_mov_b64 s[14:15], 6
; GPRIDX-NEXT: s_mov_b64 s[16:17], 7
; GPRIDX-NEXT: s_mov_b64 s[18:19], 8
; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[4:5]
; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
@ -323,13 +323,13 @@ define amdgpu_ps void @dyn_extract_v8i64_const_s_s(i32 inreg %sel) {
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_mov_b64 s[4:5], 1
; MOVREL-NEXT: s_mov_b32 m0, s2
; MOVREL-NEXT: s_mov_b64 s[18:19], 8
; MOVREL-NEXT: s_mov_b64 s[16:17], 7
; MOVREL-NEXT: s_mov_b64 s[14:15], 6
; MOVREL-NEXT: s_mov_b64 s[12:13], 5
; MOVREL-NEXT: s_mov_b64 s[10:11], 4
; MOVREL-NEXT: s_mov_b64 s[8:9], 3
; MOVREL-NEXT: s_mov_b64 s[6:7], 2
; MOVREL-NEXT: s_mov_b64 s[8:9], 3
; MOVREL-NEXT: s_mov_b64 s[10:11], 4
; MOVREL-NEXT: s_mov_b64 s[12:13], 5
; MOVREL-NEXT: s_mov_b64 s[14:15], 6
; MOVREL-NEXT: s_mov_b64 s[16:17], 7
; MOVREL-NEXT: s_mov_b64 s[18:19], 8
; MOVREL-NEXT: s_movrels_b64 s[0:1], s[4:5]
; MOVREL-NEXT: v_mov_b32_e32 v0, s0
; MOVREL-NEXT: v_mov_b32_e32 v1, s1

View File

@ -81,13 +81,13 @@ define <8 x float> @dyn_insertelement_v8f32_const_s_v_v(float %val, i32 %idx) {
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000
; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000
; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000
; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000
; GPRIDX-NEXT: s_mov_b32 s7, 4.0
; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000
; GPRIDX-NEXT: s_mov_b32 s5, 2.0
; GPRIDX-NEXT: s_mov_b32 s4, 1.0
; GPRIDX-NEXT: s_mov_b32 s5, 2.0
; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000
; GPRIDX-NEXT: s_mov_b32 s7, 4.0
; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000
; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000
; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000
; GPRIDX-NEXT: v_mov_b32_e32 v17, s11
; GPRIDX-NEXT: v_mov_b32_e32 v16, s10
; GPRIDX-NEXT: v_mov_b32_e32 v15, s9
@ -131,18 +131,18 @@ define <8 x float> @dyn_insertelement_v8f32_const_s_v_v(float %val, i32 %idx) {
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; MOVREL-NEXT: s_waitcnt_vscnt null, 0x0
; MOVREL-NEXT: s_mov_b32 s11, 0x41000000
; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000
; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000
; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000
; MOVREL-NEXT: s_mov_b32 s7, 4.0
; MOVREL-NEXT: s_mov_b32 s6, 0x40400000
; MOVREL-NEXT: s_mov_b32 s5, 2.0
; MOVREL-NEXT: s_mov_b32 s4, 1.0
; MOVREL-NEXT: s_mov_b32 s5, 2.0
; MOVREL-NEXT: s_mov_b32 s6, 0x40400000
; MOVREL-NEXT: s_mov_b32 s7, 4.0
; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000
; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000
; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000
; MOVREL-NEXT: v_mov_b32_e32 v17, s11
; MOVREL-NEXT: v_mov_b32_e32 v16, s10
; MOVREL-NEXT: v_mov_b32_e32 v15, s9
; MOVREL-NEXT: v_mov_b32_e32 v14, s8
; MOVREL-NEXT: v_mov_b32_e32 v13, s7
; MOVREL-NEXT: v_mov_b32_e32 v14, s8
; MOVREL-NEXT: v_mov_b32_e32 v15, s9
; MOVREL-NEXT: v_mov_b32_e32 v16, s10
; MOVREL-NEXT: v_mov_b32_e32 v12, s6
; MOVREL-NEXT: v_mov_b32_e32 v11, s5
; MOVREL-NEXT: v_mov_b32_e32 v10, s4
@ -725,22 +725,22 @@ define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) {
; GPRIDX-LABEL: dyn_insertelement_v8f64_const_s_v_v:
; GPRIDX: ; %bb.0: ; %entry
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GPRIDX-NEXT: s_mov_b32 s18, 0
; GPRIDX-NEXT: s_mov_b32 s8, 0
; GPRIDX-NEXT: s_mov_b32 s19, 0x40200000
; GPRIDX-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; GPRIDX-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GPRIDX-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
; GPRIDX-NEXT: s_mov_b32 s17, 0x401c0000
; GPRIDX-NEXT: s_mov_b32 s16, s18
; GPRIDX-NEXT: s_mov_b32 s15, 0x40180000
; GPRIDX-NEXT: s_mov_b32 s14, s18
; GPRIDX-NEXT: s_mov_b32 s13, 0x40140000
; GPRIDX-NEXT: s_mov_b32 s12, s18
; GPRIDX-NEXT: s_mov_b64 s[10:11], 4.0
; GPRIDX-NEXT: s_mov_b32 s9, 0x40080000
; GPRIDX-NEXT: s_mov_b32 s8, s18
; GPRIDX-NEXT: s_mov_b64 s[6:7], 2.0
; GPRIDX-NEXT: s_mov_b64 s[4:5], 1.0
; GPRIDX-NEXT: s_mov_b64 s[6:7], 2.0
; GPRIDX-NEXT: s_mov_b32 s9, 0x40080000
; GPRIDX-NEXT: s_mov_b64 s[10:11], 4.0
; GPRIDX-NEXT: s_mov_b32 s13, 0x40140000
; GPRIDX-NEXT: s_mov_b32 s12, s8
; GPRIDX-NEXT: s_mov_b32 s15, 0x40180000
; GPRIDX-NEXT: s_mov_b32 s14, s8
; GPRIDX-NEXT: s_mov_b32 s17, 0x401c0000
; GPRIDX-NEXT: s_mov_b32 s16, s8
; GPRIDX-NEXT: s_mov_b32 s18, s8
; GPRIDX-NEXT: v_mov_b32_e32 v34, s19
; GPRIDX-NEXT: v_mov_b32_e32 v33, s18
; GPRIDX-NEXT: v_mov_b32_e32 v32, s17
@ -803,26 +803,26 @@ define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) {
; MOVREL: ; %bb.0: ; %entry
; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; MOVREL-NEXT: s_waitcnt_vscnt null, 0x0
; MOVREL-NEXT: s_mov_b32 s18, 0
; MOVREL-NEXT: s_mov_b32 s8, 0
; MOVREL-NEXT: s_mov_b32 s19, 0x40200000
; MOVREL-NEXT: s_mov_b32 s17, 0x401c0000
; MOVREL-NEXT: s_mov_b32 s15, 0x40180000
; MOVREL-NEXT: s_mov_b32 s13, 0x40140000
; MOVREL-NEXT: s_mov_b32 s16, s18
; MOVREL-NEXT: s_mov_b32 s14, s18
; MOVREL-NEXT: s_mov_b32 s12, s18
; MOVREL-NEXT: s_mov_b64 s[10:11], 4.0
; MOVREL-NEXT: s_mov_b32 s9, 0x40080000
; MOVREL-NEXT: s_mov_b32 s8, s18
; MOVREL-NEXT: s_mov_b64 s[6:7], 2.0
; MOVREL-NEXT: s_mov_b64 s[4:5], 1.0
; MOVREL-NEXT: s_mov_b64 s[6:7], 2.0
; MOVREL-NEXT: s_mov_b32 s9, 0x40080000
; MOVREL-NEXT: s_mov_b64 s[10:11], 4.0
; MOVREL-NEXT: s_mov_b32 s13, 0x40140000
; MOVREL-NEXT: s_mov_b32 s12, s8
; MOVREL-NEXT: s_mov_b32 s15, 0x40180000
; MOVREL-NEXT: s_mov_b32 s14, s8
; MOVREL-NEXT: s_mov_b32 s17, 0x401c0000
; MOVREL-NEXT: s_mov_b32 s16, s8
; MOVREL-NEXT: s_mov_b32 s18, s8
; MOVREL-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; MOVREL-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; MOVREL-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
; MOVREL-NEXT: v_mov_b32_e32 v34, s19
; MOVREL-NEXT: v_mov_b32_e32 v33, s18
; MOVREL-NEXT: v_mov_b32_e32 v32, s17
; MOVREL-NEXT: v_mov_b32_e32 v31, s16
; MOVREL-NEXT: v_mov_b32_e32 v33, s18
; MOVREL-NEXT: v_mov_b32_e32 v30, s15
; MOVREL-NEXT: v_mov_b32_e32 v29, s14
; MOVREL-NEXT: v_mov_b32_e32 v28, s13

View File

@ -203,8 +203,8 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967297(i32 addrspace(1)* %
; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4294967297:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_mov_b32 s0, 4
; GFX6-NEXT: s_mov_b32 s1, s0
; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: s_mov_b32 s1, s0
; GFX6-NEXT: v_mov_b32_e32 v2, 0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
@ -213,8 +213,8 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967297(i32 addrspace(1)* %
; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4294967297:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_mov_b32 s0, 4
; GFX7-NEXT: s_mov_b32 s1, s0
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s1, s0
; GFX7-NEXT: v_mov_b32_e32 v2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
@ -252,11 +252,11 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4096(i32 addrspace(1)* %ptr) {
define amdgpu_ps void @mubuf_store_sgpr_ptr_sgpr_offset(i32 addrspace(1)* inreg %ptr, i32 inreg %soffset) {
; GFX6-LABEL: mubuf_store_sgpr_ptr_sgpr_offset:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x200000
; GFX6-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
; GFX6-NEXT: v_mov_b32_e32 v0, s4
; GFX6-NEXT: s_mov_b32 s0, s2
; GFX6-NEXT: s_mov_b32 s1, s3
; GFX6-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x200000
; GFX6-NEXT: s_lshl_b64 s[4:5], s[2:3], 2
; GFX6-NEXT: v_mov_b32_e32 v0, s4
; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: v_mov_b32_e32 v2, 0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
@ -266,11 +266,11 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_sgpr_offset(i32 addrspace(1)* inreg
;
; GFX7-LABEL: mubuf_store_sgpr_ptr_sgpr_offset:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x200000
; GFX7-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
; GFX7-NEXT: v_mov_b32_e32 v0, s4
; GFX7-NEXT: s_mov_b32 s0, s2
; GFX7-NEXT: s_mov_b32 s1, s3
; GFX7-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x200000
; GFX7-NEXT: s_lshl_b64 s[4:5], s[2:3], 2
; GFX7-NEXT: v_mov_b32_e32 v0, s4
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: v_mov_b32_e32 v2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
@ -286,21 +286,21 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset(i32 addrspace(1)* %ptr,
; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000
; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 2
; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: s_mov_b32 s6, 0
; GFX6-NEXT: s_lshl_b64 s[4:5], s[0:1], 2
; GFX6-NEXT: v_mov_b32_e32 v2, 0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: mubuf_store_vgpr_ptr_sgpr_offset:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000
; GFX7-NEXT: s_lshl_b64 s[0:1], s[0:1], 2
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s6, 0
; GFX7-NEXT: s_lshl_b64 s[4:5], s[0:1], 2
; GFX7-NEXT: v_mov_b32_e32 v2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
; GFX7-NEXT: s_endpgm
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %soffset
store i32 0, i32 addrspace(1)* %gep
@ -311,21 +311,21 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset_offset256(i32 addrspace(
; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset_offset256:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000
; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 2
; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: s_mov_b32 s6, 0
; GFX6-NEXT: s_lshl_b64 s[4:5], s[0:1], 2
; GFX6-NEXT: v_mov_b32_e32 v2, 0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 offset:1024
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: mubuf_store_vgpr_ptr_sgpr_offset_offset256:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000
; GFX7-NEXT: s_lshl_b64 s[0:1], s[0:1], 2
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s6, 0
; GFX7-NEXT: s_lshl_b64 s[4:5], s[0:1], 2
; GFX7-NEXT: v_mov_b32_e32 v2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 offset:1024
; GFX7-NEXT: s_endpgm
%gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 %soffset
%gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 256
@ -435,25 +435,25 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095_vgpr_offset(i32 addrspace
; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4095_vgpr_offset:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX6-NEXT: s_add_u32 s0, s2, 0x3ffc
; GFX6-NEXT: s_add_u32 s4, s2, 0x3ffc
; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
; GFX6-NEXT: s_addc_u32 s1, s3, 0
; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: s_mov_b32 s6, 0
; GFX6-NEXT: s_addc_u32 s5, s3, 0
; GFX6-NEXT: v_mov_b32_e32 v2, 0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
; GFX6-NEXT: s_endpgm
;
; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4095_vgpr_offset:
; GFX7: ; %bb.0:
; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX7-NEXT: s_add_u32 s0, s2, 0x3ffc
; GFX7-NEXT: s_add_u32 s4, s2, 0x3ffc
; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
; GFX7-NEXT: s_addc_u32 s1, s3, 0
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s6, 0
; GFX7-NEXT: s_addc_u32 s5, s3, 0
; GFX7-NEXT: v_mov_b32_e32 v2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
; GFX7-NEXT: s_endpgm
%gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 4095
%gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 %voffset

View File

@ -1466,9 +1466,9 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inr
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32)
; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]]
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024
; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]]
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
@ -1493,9 +1493,9 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inr
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32)
; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]]
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024
; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]]
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0