forked from OSchip/llvm-project
[RISCV] Scalarize gather/scatter on RV64 with Zve32* extension.
i64 indices aren't supported on Zve32*. Scalarize gathers to prevent generating illegal instructions. Since InstCombine will aggressively canonicalize GEP indices to pointer size, we're pretty much always going to have an i64 index. Trying to predict when SelectionDAG will find a smaller index from the TTI hook used by the ScalarizeMaskedMemIntrinPass seems fragile. To optimize this we probably need an IR pass to rewrite it earlier. Test RUN lines have also been added to make sure the strided load/store optimization still works. Reviewed By: reames Differential Revision: https://reviews.llvm.org/D127179
This commit is contained in:
parent
3731bbc425
commit
0c66deb498
|
@ -161,6 +161,16 @@ public:
|
|||
return isLegalMaskedGatherScatter(DataType, Alignment);
|
||||
}
|
||||
|
||||
bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment) {
|
||||
// Scalarize masked gather for RV64 if EEW=64 indices aren't supported.
|
||||
return ST->is64Bit() && !ST->hasVInstructionsI64();
|
||||
}
|
||||
|
||||
bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) {
|
||||
// Scalarize masked scatter for RV64 if EEW=64 indices aren't supported.
|
||||
return ST->is64Bit() && !ST->hasVInstructionsI64();
|
||||
}
|
||||
|
||||
/// \returns How the target needs this vector-predicated operation to be
|
||||
/// transformed.
|
||||
TargetTransformInfo::VPLegalization
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=256 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=256 | FileCheck %s --check-prefixes=CHECK,V
|
||||
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+f,+zve32f -riscv-v-vector-bits-min=256 | FileCheck %s --check-prefixes=CHECK,ZVE32F
|
||||
|
||||
%struct.foo = type { i32, i32, i32, i32 }
|
||||
|
||||
|
@ -54,30 +55,55 @@ for.cond.cleanup: ; preds = %vector.body
|
|||
|
||||
define void @gather_masked(i8* noalias nocapture %A, i8* noalias nocapture readonly %B, <32 x i8> %maskedoff) {
|
||||
;
|
||||
; CHECK-LABEL: gather_masked:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: li a2, 0
|
||||
; CHECK-NEXT: lui a3, 983765
|
||||
; CHECK-NEXT: addiw a3, a3, 873
|
||||
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu
|
||||
; CHECK-NEXT: vmv.s.x v0, a3
|
||||
; CHECK-NEXT: li a3, 32
|
||||
; CHECK-NEXT: li a4, 5
|
||||
; CHECK-NEXT: li a5, 1024
|
||||
; CHECK-NEXT: .LBB1_1: # %vector.body
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, mu
|
||||
; CHECK-NEXT: vmv1r.v v9, v8
|
||||
; CHECK-NEXT: vlse8.v v9, (a1), a4, v0.t
|
||||
; CHECK-NEXT: add a6, a0, a2
|
||||
; CHECK-NEXT: vle8.v v10, (a6)
|
||||
; CHECK-NEXT: vadd.vv v9, v10, v9
|
||||
; CHECK-NEXT: vse8.v v9, (a6)
|
||||
; CHECK-NEXT: addi a2, a2, 32
|
||||
; CHECK-NEXT: addi a1, a1, 160
|
||||
; CHECK-NEXT: bne a2, a5, .LBB1_1
|
||||
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
|
||||
; CHECK-NEXT: ret
|
||||
; V-LABEL: gather_masked:
|
||||
; V: # %bb.0: # %entry
|
||||
; V-NEXT: li a2, 0
|
||||
; V-NEXT: lui a3, 983765
|
||||
; V-NEXT: addiw a3, a3, 873
|
||||
; V-NEXT: vsetivli zero, 1, e32, mf2, ta, mu
|
||||
; V-NEXT: vmv.s.x v0, a3
|
||||
; V-NEXT: li a3, 32
|
||||
; V-NEXT: li a4, 5
|
||||
; V-NEXT: li a5, 1024
|
||||
; V-NEXT: .LBB1_1: # %vector.body
|
||||
; V-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; V-NEXT: vsetvli zero, a3, e8, m1, ta, mu
|
||||
; V-NEXT: vmv1r.v v9, v8
|
||||
; V-NEXT: vlse8.v v9, (a1), a4, v0.t
|
||||
; V-NEXT: add a6, a0, a2
|
||||
; V-NEXT: vle8.v v10, (a6)
|
||||
; V-NEXT: vadd.vv v9, v10, v9
|
||||
; V-NEXT: vse8.v v9, (a6)
|
||||
; V-NEXT: addi a2, a2, 32
|
||||
; V-NEXT: addi a1, a1, 160
|
||||
; V-NEXT: bne a2, a5, .LBB1_1
|
||||
; V-NEXT: # %bb.2: # %for.cond.cleanup
|
||||
; V-NEXT: ret
|
||||
;
|
||||
; ZVE32F-LABEL: gather_masked:
|
||||
; ZVE32F: # %bb.0: # %entry
|
||||
; ZVE32F-NEXT: li a2, 0
|
||||
; ZVE32F-NEXT: lui a3, 983765
|
||||
; ZVE32F-NEXT: addiw a3, a3, 873
|
||||
; ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu
|
||||
; ZVE32F-NEXT: vmv.s.x v0, a3
|
||||
; ZVE32F-NEXT: li a3, 32
|
||||
; ZVE32F-NEXT: li a4, 5
|
||||
; ZVE32F-NEXT: li a5, 1024
|
||||
; ZVE32F-NEXT: .LBB1_1: # %vector.body
|
||||
; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, mu
|
||||
; ZVE32F-NEXT: vmv1r.v v9, v8
|
||||
; ZVE32F-NEXT: vlse8.v v9, (a1), a4, v0.t
|
||||
; ZVE32F-NEXT: add a6, a0, a2
|
||||
; ZVE32F-NEXT: vle8.v v10, (a6)
|
||||
; ZVE32F-NEXT: vadd.vv v9, v10, v9
|
||||
; ZVE32F-NEXT: vse8.v v9, (a6)
|
||||
; ZVE32F-NEXT: addi a2, a2, 32
|
||||
; ZVE32F-NEXT: addi a1, a1, 160
|
||||
; ZVE32F-NEXT: bne a2, a5, .LBB1_1
|
||||
; ZVE32F-NEXT: # %bb.2: # %for.cond.cleanup
|
||||
; ZVE32F-NEXT: ret
|
||||
entry:
|
||||
br label %vector.body
|
||||
|
||||
|
@ -242,30 +268,55 @@ for.cond.cleanup: ; preds = %vector.body
|
|||
|
||||
define void @scatter_masked(i8* noalias nocapture %A, i8* noalias nocapture readonly %B, <32 x i8> %maskedoff) {
|
||||
;
|
||||
; CHECK-LABEL: scatter_masked:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: li a2, 0
|
||||
; CHECK-NEXT: li a3, 32
|
||||
; CHECK-NEXT: lui a4, 983765
|
||||
; CHECK-NEXT: addiw a4, a4, 873
|
||||
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu
|
||||
; CHECK-NEXT: vmv.s.x v0, a4
|
||||
; CHECK-NEXT: li a4, 5
|
||||
; CHECK-NEXT: li a5, 1024
|
||||
; CHECK-NEXT: .LBB5_1: # %vector.body
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: add a6, a1, a2
|
||||
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, mu
|
||||
; CHECK-NEXT: vle8.v v9, (a6)
|
||||
; CHECK-NEXT: vmv1r.v v10, v8
|
||||
; CHECK-NEXT: vlse8.v v10, (a0), a4, v0.t
|
||||
; CHECK-NEXT: vadd.vv v9, v10, v9
|
||||
; CHECK-NEXT: vsse8.v v9, (a0), a4, v0.t
|
||||
; CHECK-NEXT: addi a2, a2, 32
|
||||
; CHECK-NEXT: addi a0, a0, 160
|
||||
; CHECK-NEXT: bne a2, a5, .LBB5_1
|
||||
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
|
||||
; CHECK-NEXT: ret
|
||||
; V-LABEL: scatter_masked:
|
||||
; V: # %bb.0: # %entry
|
||||
; V-NEXT: li a2, 0
|
||||
; V-NEXT: li a3, 32
|
||||
; V-NEXT: lui a4, 983765
|
||||
; V-NEXT: addiw a4, a4, 873
|
||||
; V-NEXT: vsetivli zero, 1, e32, mf2, ta, mu
|
||||
; V-NEXT: vmv.s.x v0, a4
|
||||
; V-NEXT: li a4, 5
|
||||
; V-NEXT: li a5, 1024
|
||||
; V-NEXT: .LBB5_1: # %vector.body
|
||||
; V-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; V-NEXT: add a6, a1, a2
|
||||
; V-NEXT: vsetvli zero, a3, e8, m1, ta, mu
|
||||
; V-NEXT: vle8.v v9, (a6)
|
||||
; V-NEXT: vmv1r.v v10, v8
|
||||
; V-NEXT: vlse8.v v10, (a0), a4, v0.t
|
||||
; V-NEXT: vadd.vv v9, v10, v9
|
||||
; V-NEXT: vsse8.v v9, (a0), a4, v0.t
|
||||
; V-NEXT: addi a2, a2, 32
|
||||
; V-NEXT: addi a0, a0, 160
|
||||
; V-NEXT: bne a2, a5, .LBB5_1
|
||||
; V-NEXT: # %bb.2: # %for.cond.cleanup
|
||||
; V-NEXT: ret
|
||||
;
|
||||
; ZVE32F-LABEL: scatter_masked:
|
||||
; ZVE32F: # %bb.0: # %entry
|
||||
; ZVE32F-NEXT: li a2, 0
|
||||
; ZVE32F-NEXT: li a3, 32
|
||||
; ZVE32F-NEXT: lui a4, 983765
|
||||
; ZVE32F-NEXT: addiw a4, a4, 873
|
||||
; ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu
|
||||
; ZVE32F-NEXT: vmv.s.x v0, a4
|
||||
; ZVE32F-NEXT: li a4, 5
|
||||
; ZVE32F-NEXT: li a5, 1024
|
||||
; ZVE32F-NEXT: .LBB5_1: # %vector.body
|
||||
; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; ZVE32F-NEXT: add a6, a1, a2
|
||||
; ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, mu
|
||||
; ZVE32F-NEXT: vle8.v v9, (a6)
|
||||
; ZVE32F-NEXT: vmv1r.v v10, v8
|
||||
; ZVE32F-NEXT: vlse8.v v10, (a0), a4, v0.t
|
||||
; ZVE32F-NEXT: vadd.vv v9, v10, v9
|
||||
; ZVE32F-NEXT: vsse8.v v9, (a0), a4, v0.t
|
||||
; ZVE32F-NEXT: addi a2, a2, 32
|
||||
; ZVE32F-NEXT: addi a0, a0, 160
|
||||
; ZVE32F-NEXT: bne a2, a5, .LBB5_1
|
||||
; ZVE32F-NEXT: # %bb.2: # %for.cond.cleanup
|
||||
; ZVE32F-NEXT: ret
|
||||
entry:
|
||||
br label %vector.body
|
||||
|
||||
|
@ -554,24 +605,51 @@ declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32>, <8 x i32*>, i32 immar
|
|||
; Make sure we don't crash in getTgtMemIntrinsic for a vector of pointers.
|
||||
define void @gather_of_pointers(i32** noalias nocapture %0, i32** noalias nocapture readonly %1) {
|
||||
;
|
||||
; CHECK-LABEL: gather_of_pointers:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: li a2, 1024
|
||||
; CHECK-NEXT: li a3, 40
|
||||
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
|
||||
; CHECK-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vlse64.v v8, (a1), a3
|
||||
; CHECK-NEXT: addi a4, a1, 80
|
||||
; CHECK-NEXT: vlse64.v v9, (a4), a3
|
||||
; CHECK-NEXT: vse64.v v8, (a0)
|
||||
; CHECK-NEXT: addi a4, a0, 16
|
||||
; CHECK-NEXT: vse64.v v9, (a4)
|
||||
; CHECK-NEXT: addi a2, a2, -4
|
||||
; CHECK-NEXT: addi a0, a0, 32
|
||||
; CHECK-NEXT: addi a1, a1, 160
|
||||
; CHECK-NEXT: bnez a2, .LBB10_1
|
||||
; CHECK-NEXT: # %bb.2:
|
||||
; CHECK-NEXT: ret
|
||||
; V-LABEL: gather_of_pointers:
|
||||
; V: # %bb.0:
|
||||
; V-NEXT: li a2, 1024
|
||||
; V-NEXT: li a3, 40
|
||||
; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
|
||||
; V-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1
|
||||
; V-NEXT: vlse64.v v8, (a1), a3
|
||||
; V-NEXT: addi a4, a1, 80
|
||||
; V-NEXT: vlse64.v v9, (a4), a3
|
||||
; V-NEXT: vse64.v v8, (a0)
|
||||
; V-NEXT: addi a4, a0, 16
|
||||
; V-NEXT: vse64.v v9, (a4)
|
||||
; V-NEXT: addi a2, a2, -4
|
||||
; V-NEXT: addi a0, a0, 32
|
||||
; V-NEXT: addi a1, a1, 160
|
||||
; V-NEXT: bnez a2, .LBB10_1
|
||||
; V-NEXT: # %bb.2:
|
||||
; V-NEXT: ret
|
||||
;
|
||||
; ZVE32F-LABEL: gather_of_pointers:
|
||||
; ZVE32F: # %bb.0:
|
||||
; ZVE32F-NEXT: li a2, 0
|
||||
; ZVE32F-NEXT: li a3, 1
|
||||
; ZVE32F-NEXT: li a4, 1024
|
||||
; ZVE32F-NEXT: li a5, 40
|
||||
; ZVE32F-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1
|
||||
; ZVE32F-NEXT: mul a6, a3, a5
|
||||
; ZVE32F-NEXT: add a6, a1, a6
|
||||
; ZVE32F-NEXT: mul a7, a2, a5
|
||||
; ZVE32F-NEXT: add a7, a1, a7
|
||||
; ZVE32F-NEXT: ld t0, 0(a6)
|
||||
; ZVE32F-NEXT: ld t1, 0(a7)
|
||||
; ZVE32F-NEXT: ld a6, 80(a6)
|
||||
; ZVE32F-NEXT: ld a7, 80(a7)
|
||||
; ZVE32F-NEXT: sd t0, 8(a0)
|
||||
; ZVE32F-NEXT: sd t1, 0(a0)
|
||||
; ZVE32F-NEXT: sd a6, 24(a0)
|
||||
; ZVE32F-NEXT: sd a7, 16(a0)
|
||||
; ZVE32F-NEXT: addi a2, a2, 4
|
||||
; ZVE32F-NEXT: addi a3, a3, 4
|
||||
; ZVE32F-NEXT: addi a4, a4, -4
|
||||
; ZVE32F-NEXT: addi a0, a0, 32
|
||||
; ZVE32F-NEXT: bnez a4, .LBB10_1
|
||||
; ZVE32F-NEXT: # %bb.2:
|
||||
; ZVE32F-NEXT: ret
|
||||
br label %3
|
||||
|
||||
3: ; preds = %3, %2
|
||||
|
@ -604,24 +682,51 @@ declare <2 x i32*> @llvm.masked.gather.v2p0i32.v2p0p0i32(<2 x i32**>, i32 immarg
|
|||
; Make sure we don't crash in getTgtMemIntrinsic for a vector of pointers.
|
||||
define void @scatter_of_pointers(i32** noalias nocapture %0, i32** noalias nocapture readonly %1) {
|
||||
;
|
||||
; CHECK-LABEL: scatter_of_pointers:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: li a2, 1024
|
||||
; CHECK-NEXT: li a3, 40
|
||||
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
|
||||
; CHECK-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: vle64.v v8, (a1)
|
||||
; CHECK-NEXT: addi a4, a1, 16
|
||||
; CHECK-NEXT: vle64.v v9, (a4)
|
||||
; CHECK-NEXT: addi a4, a0, 80
|
||||
; CHECK-NEXT: vsse64.v v8, (a0), a3
|
||||
; CHECK-NEXT: vsse64.v v9, (a4), a3
|
||||
; CHECK-NEXT: addi a2, a2, -4
|
||||
; CHECK-NEXT: addi a1, a1, 32
|
||||
; CHECK-NEXT: addi a0, a0, 160
|
||||
; CHECK-NEXT: bnez a2, .LBB11_1
|
||||
; CHECK-NEXT: # %bb.2:
|
||||
; CHECK-NEXT: ret
|
||||
; V-LABEL: scatter_of_pointers:
|
||||
; V: # %bb.0:
|
||||
; V-NEXT: li a2, 1024
|
||||
; V-NEXT: li a3, 40
|
||||
; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
|
||||
; V-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1
|
||||
; V-NEXT: vle64.v v8, (a1)
|
||||
; V-NEXT: addi a4, a1, 16
|
||||
; V-NEXT: vle64.v v9, (a4)
|
||||
; V-NEXT: addi a4, a0, 80
|
||||
; V-NEXT: vsse64.v v8, (a0), a3
|
||||
; V-NEXT: vsse64.v v9, (a4), a3
|
||||
; V-NEXT: addi a2, a2, -4
|
||||
; V-NEXT: addi a1, a1, 32
|
||||
; V-NEXT: addi a0, a0, 160
|
||||
; V-NEXT: bnez a2, .LBB11_1
|
||||
; V-NEXT: # %bb.2:
|
||||
; V-NEXT: ret
|
||||
;
|
||||
; ZVE32F-LABEL: scatter_of_pointers:
|
||||
; ZVE32F: # %bb.0:
|
||||
; ZVE32F-NEXT: li a2, 0
|
||||
; ZVE32F-NEXT: li a3, 1
|
||||
; ZVE32F-NEXT: li a4, 1024
|
||||
; ZVE32F-NEXT: li a5, 40
|
||||
; ZVE32F-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1
|
||||
; ZVE32F-NEXT: ld a6, 8(a1)
|
||||
; ZVE32F-NEXT: ld a7, 0(a1)
|
||||
; ZVE32F-NEXT: ld t0, 24(a1)
|
||||
; ZVE32F-NEXT: ld t1, 16(a1)
|
||||
; ZVE32F-NEXT: mul t2, a3, a5
|
||||
; ZVE32F-NEXT: add t2, a0, t2
|
||||
; ZVE32F-NEXT: mul t3, a2, a5
|
||||
; ZVE32F-NEXT: add t3, a0, t3
|
||||
; ZVE32F-NEXT: sd a7, 0(t3)
|
||||
; ZVE32F-NEXT: sd a6, 0(t2)
|
||||
; ZVE32F-NEXT: sd t1, 80(t3)
|
||||
; ZVE32F-NEXT: sd t0, 80(t2)
|
||||
; ZVE32F-NEXT: addi a2, a2, 4
|
||||
; ZVE32F-NEXT: addi a3, a3, 4
|
||||
; ZVE32F-NEXT: addi a4, a4, -4
|
||||
; ZVE32F-NEXT: addi a1, a1, 32
|
||||
; ZVE32F-NEXT: bnez a4, .LBB11_1
|
||||
; ZVE32F-NEXT: # %bb.2:
|
||||
; ZVE32F-NEXT: ret
|
||||
br label %3
|
||||
|
||||
3: ; preds = %3, %2
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt %s -S -riscv-gather-scatter-lowering -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=256 | FileCheck %s
|
||||
; RUN: opt %s -S -riscv-gather-scatter-lowering -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=256 | FileCheck %s --check-prefixes=CHECK,V
|
||||
; RUN: opt %s -S -riscv-gather-scatter-lowering -mtriple=riscv64 -mattr=+m,+f,+zve32f -riscv-v-vector-bits-min=256 | FileCheck %s --check-prefixes=CHECK,ZVE32F
|
||||
|
||||
%struct.foo = type { i32, i32, i32, i32 }
|
||||
|
||||
|
@ -575,29 +576,54 @@ declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32>, <8 x i32*>, i32 immar
|
|||
; Make sure we don't crash in getTgtMemIntrinsic for a vector of pointers.
|
||||
define void @gather_of_pointers(i32** noalias nocapture %0, i32** noalias nocapture readonly %1) {
|
||||
;
|
||||
; CHECK-LABEL: @gather_of_pointers(
|
||||
; CHECK-NEXT: br label [[TMP3:%.*]]
|
||||
; CHECK: 3:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = phi i64 [ 0, [[TMP2:%.*]] ], [ [[TMP13:%.*]], [[TMP3]] ]
|
||||
; CHECK-NEXT: [[DOTSCALAR:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[DOTSCALAR1:%.*]], [[TMP3]] ]
|
||||
; CHECK-NEXT: [[DOTSCALAR2:%.*]] = phi i64 [ 10, [[TMP2]] ], [ [[DOTSCALAR3:%.*]], [[TMP3]] ]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32*, i32** [[TMP1:%.*]], i64 [[DOTSCALAR]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32*, i32** [[TMP1]], i64 [[DOTSCALAR2]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = call <2 x i32*> @llvm.riscv.masked.strided.load.v2p0i32.p0p0i32.i64(<2 x i32*> undef, i32** [[TMP5]], i64 40, <2 x i1> <i1 true, i1 true>)
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i32*> @llvm.riscv.masked.strided.load.v2p0i32.p0p0i32.i64(<2 x i32*> undef, i32** [[TMP6]], i64 40, <2 x i1> <i1 true, i1 true>)
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32*, i32** [[TMP0:%.*]], i64 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32** [[TMP9]] to <2 x i32*>*
|
||||
; CHECK-NEXT: store <2 x i32*> [[TMP7]], <2 x i32*>* [[TMP10]], align 8
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32*, i32** [[TMP9]], i64 2
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32** [[TMP11]] to <2 x i32*>*
|
||||
; CHECK-NEXT: store <2 x i32*> [[TMP8]], <2 x i32*>* [[TMP12]], align 8
|
||||
; CHECK-NEXT: [[TMP13]] = add nuw i64 [[TMP4]], 4
|
||||
; CHECK-NEXT: [[DOTSCALAR1]] = add i64 [[DOTSCALAR]], 20
|
||||
; CHECK-NEXT: [[DOTSCALAR3]] = add i64 [[DOTSCALAR2]], 20
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 1024
|
||||
; CHECK-NEXT: br i1 [[TMP14]], label [[TMP15:%.*]], label [[TMP3]]
|
||||
; CHECK: 15:
|
||||
; CHECK-NEXT: ret void
|
||||
; V-LABEL: @gather_of_pointers(
|
||||
; V-NEXT: br label [[TMP3:%.*]]
|
||||
; V: 3:
|
||||
; V-NEXT: [[TMP4:%.*]] = phi i64 [ 0, [[TMP2:%.*]] ], [ [[TMP13:%.*]], [[TMP3]] ]
|
||||
; V-NEXT: [[DOTSCALAR:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[DOTSCALAR1:%.*]], [[TMP3]] ]
|
||||
; V-NEXT: [[DOTSCALAR2:%.*]] = phi i64 [ 10, [[TMP2]] ], [ [[DOTSCALAR3:%.*]], [[TMP3]] ]
|
||||
; V-NEXT: [[TMP5:%.*]] = getelementptr i32*, i32** [[TMP1:%.*]], i64 [[DOTSCALAR]]
|
||||
; V-NEXT: [[TMP6:%.*]] = getelementptr i32*, i32** [[TMP1]], i64 [[DOTSCALAR2]]
|
||||
; V-NEXT: [[TMP7:%.*]] = call <2 x i32*> @llvm.riscv.masked.strided.load.v2p0i32.p0p0i32.i64(<2 x i32*> undef, i32** [[TMP5]], i64 40, <2 x i1> <i1 true, i1 true>)
|
||||
; V-NEXT: [[TMP8:%.*]] = call <2 x i32*> @llvm.riscv.masked.strided.load.v2p0i32.p0p0i32.i64(<2 x i32*> undef, i32** [[TMP6]], i64 40, <2 x i1> <i1 true, i1 true>)
|
||||
; V-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32*, i32** [[TMP0:%.*]], i64 [[TMP4]]
|
||||
; V-NEXT: [[TMP10:%.*]] = bitcast i32** [[TMP9]] to <2 x i32*>*
|
||||
; V-NEXT: store <2 x i32*> [[TMP7]], <2 x i32*>* [[TMP10]], align 8
|
||||
; V-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32*, i32** [[TMP9]], i64 2
|
||||
; V-NEXT: [[TMP12:%.*]] = bitcast i32** [[TMP11]] to <2 x i32*>*
|
||||
; V-NEXT: store <2 x i32*> [[TMP8]], <2 x i32*>* [[TMP12]], align 8
|
||||
; V-NEXT: [[TMP13]] = add nuw i64 [[TMP4]], 4
|
||||
; V-NEXT: [[DOTSCALAR1]] = add i64 [[DOTSCALAR]], 20
|
||||
; V-NEXT: [[DOTSCALAR3]] = add i64 [[DOTSCALAR2]], 20
|
||||
; V-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 1024
|
||||
; V-NEXT: br i1 [[TMP14]], label [[TMP15:%.*]], label [[TMP3]]
|
||||
; V: 15:
|
||||
; V-NEXT: ret void
|
||||
;
|
||||
; ZVE32F-LABEL: @gather_of_pointers(
|
||||
; ZVE32F-NEXT: br label [[TMP3:%.*]]
|
||||
; ZVE32F: 3:
|
||||
; ZVE32F-NEXT: [[TMP4:%.*]] = phi i64 [ 0, [[TMP2:%.*]] ], [ [[TMP17:%.*]], [[TMP3]] ]
|
||||
; ZVE32F-NEXT: [[TMP5:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[TMP2]] ], [ [[TMP18:%.*]], [[TMP3]] ]
|
||||
; ZVE32F-NEXT: [[TMP6:%.*]] = mul nuw nsw <2 x i64> [[TMP5]], <i64 5, i64 5>
|
||||
; ZVE32F-NEXT: [[TMP7:%.*]] = mul <2 x i64> [[TMP5]], <i64 5, i64 5>
|
||||
; ZVE32F-NEXT: [[TMP8:%.*]] = add <2 x i64> [[TMP7]], <i64 10, i64 10>
|
||||
; ZVE32F-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32*, i32** [[TMP1:%.*]], <2 x i64> [[TMP6]]
|
||||
; ZVE32F-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32*, i32** [[TMP1]], <2 x i64> [[TMP8]]
|
||||
; ZVE32F-NEXT: [[TMP11:%.*]] = call <2 x i32*> @llvm.masked.gather.v2p0i32.v2p0p0i32(<2 x i32**> [[TMP9]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x i32*> undef)
|
||||
; ZVE32F-NEXT: [[TMP12:%.*]] = call <2 x i32*> @llvm.masked.gather.v2p0i32.v2p0p0i32(<2 x i32**> [[TMP10]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x i32*> undef)
|
||||
; ZVE32F-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32*, i32** [[TMP0:%.*]], i64 [[TMP4]]
|
||||
; ZVE32F-NEXT: [[TMP14:%.*]] = bitcast i32** [[TMP13]] to <2 x i32*>*
|
||||
; ZVE32F-NEXT: store <2 x i32*> [[TMP11]], <2 x i32*>* [[TMP14]], align 8
|
||||
; ZVE32F-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32*, i32** [[TMP13]], i64 2
|
||||
; ZVE32F-NEXT: [[TMP16:%.*]] = bitcast i32** [[TMP15]] to <2 x i32*>*
|
||||
; ZVE32F-NEXT: store <2 x i32*> [[TMP12]], <2 x i32*>* [[TMP16]], align 8
|
||||
; ZVE32F-NEXT: [[TMP17]] = add nuw i64 [[TMP4]], 4
|
||||
; ZVE32F-NEXT: [[TMP18]] = add <2 x i64> [[TMP5]], <i64 4, i64 4>
|
||||
; ZVE32F-NEXT: [[TMP19:%.*]] = icmp eq i64 [[TMP17]], 1024
|
||||
; ZVE32F-NEXT: br i1 [[TMP19]], label [[TMP20:%.*]], label [[TMP3]]
|
||||
; ZVE32F: 20:
|
||||
; ZVE32F-NEXT: ret void
|
||||
;
|
||||
br label %3
|
||||
|
||||
|
@ -631,29 +657,54 @@ declare <2 x i32*> @llvm.masked.gather.v2p0i32.v2p0p0i32(<2 x i32**>, i32 immarg
|
|||
; Make sure we don't crash in getTgtMemIntrinsic for a vector of pointers.
|
||||
define void @scatter_of_pointers(i32** noalias nocapture %0, i32** noalias nocapture readonly %1) {
|
||||
;
|
||||
; CHECK-LABEL: @scatter_of_pointers(
|
||||
; CHECK-NEXT: br label [[TMP3:%.*]]
|
||||
; CHECK: 3:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = phi i64 [ 0, [[TMP2:%.*]] ], [ [[TMP13:%.*]], [[TMP3]] ]
|
||||
; CHECK-NEXT: [[DOTSCALAR:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[DOTSCALAR1:%.*]], [[TMP3]] ]
|
||||
; CHECK-NEXT: [[DOTSCALAR2:%.*]] = phi i64 [ 10, [[TMP2]] ], [ [[DOTSCALAR3:%.*]], [[TMP3]] ]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32*, i32** [[TMP1:%.*]], i64 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32** [[TMP5]] to <2 x i32*>*
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32*>, <2 x i32*>* [[TMP6]], align 8
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32*, i32** [[TMP5]], i64 2
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32** [[TMP8]] to <2 x i32*>*
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = load <2 x i32*>, <2 x i32*>* [[TMP9]], align 8
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32*, i32** [[TMP0:%.*]], i64 [[DOTSCALAR]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32*, i32** [[TMP0]], i64 [[DOTSCALAR2]]
|
||||
; CHECK-NEXT: call void @llvm.riscv.masked.strided.store.v2p0i32.p0p0i32.i64(<2 x i32*> [[TMP7]], i32** [[TMP11]], i64 40, <2 x i1> <i1 true, i1 true>)
|
||||
; CHECK-NEXT: call void @llvm.riscv.masked.strided.store.v2p0i32.p0p0i32.i64(<2 x i32*> [[TMP10]], i32** [[TMP12]], i64 40, <2 x i1> <i1 true, i1 true>)
|
||||
; CHECK-NEXT: [[TMP13]] = add nuw i64 [[TMP4]], 4
|
||||
; CHECK-NEXT: [[DOTSCALAR1]] = add i64 [[DOTSCALAR]], 20
|
||||
; CHECK-NEXT: [[DOTSCALAR3]] = add i64 [[DOTSCALAR2]], 20
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 1024
|
||||
; CHECK-NEXT: br i1 [[TMP14]], label [[TMP15:%.*]], label [[TMP3]]
|
||||
; CHECK: 15:
|
||||
; CHECK-NEXT: ret void
|
||||
; V-LABEL: @scatter_of_pointers(
|
||||
; V-NEXT: br label [[TMP3:%.*]]
|
||||
; V: 3:
|
||||
; V-NEXT: [[TMP4:%.*]] = phi i64 [ 0, [[TMP2:%.*]] ], [ [[TMP13:%.*]], [[TMP3]] ]
|
||||
; V-NEXT: [[DOTSCALAR:%.*]] = phi i64 [ 0, [[TMP2]] ], [ [[DOTSCALAR1:%.*]], [[TMP3]] ]
|
||||
; V-NEXT: [[DOTSCALAR2:%.*]] = phi i64 [ 10, [[TMP2]] ], [ [[DOTSCALAR3:%.*]], [[TMP3]] ]
|
||||
; V-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32*, i32** [[TMP1:%.*]], i64 [[TMP4]]
|
||||
; V-NEXT: [[TMP6:%.*]] = bitcast i32** [[TMP5]] to <2 x i32*>*
|
||||
; V-NEXT: [[TMP7:%.*]] = load <2 x i32*>, <2 x i32*>* [[TMP6]], align 8
|
||||
; V-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32*, i32** [[TMP5]], i64 2
|
||||
; V-NEXT: [[TMP9:%.*]] = bitcast i32** [[TMP8]] to <2 x i32*>*
|
||||
; V-NEXT: [[TMP10:%.*]] = load <2 x i32*>, <2 x i32*>* [[TMP9]], align 8
|
||||
; V-NEXT: [[TMP11:%.*]] = getelementptr i32*, i32** [[TMP0:%.*]], i64 [[DOTSCALAR]]
|
||||
; V-NEXT: [[TMP12:%.*]] = getelementptr i32*, i32** [[TMP0]], i64 [[DOTSCALAR2]]
|
||||
; V-NEXT: call void @llvm.riscv.masked.strided.store.v2p0i32.p0p0i32.i64(<2 x i32*> [[TMP7]], i32** [[TMP11]], i64 40, <2 x i1> <i1 true, i1 true>)
|
||||
; V-NEXT: call void @llvm.riscv.masked.strided.store.v2p0i32.p0p0i32.i64(<2 x i32*> [[TMP10]], i32** [[TMP12]], i64 40, <2 x i1> <i1 true, i1 true>)
|
||||
; V-NEXT: [[TMP13]] = add nuw i64 [[TMP4]], 4
|
||||
; V-NEXT: [[DOTSCALAR1]] = add i64 [[DOTSCALAR]], 20
|
||||
; V-NEXT: [[DOTSCALAR3]] = add i64 [[DOTSCALAR2]], 20
|
||||
; V-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 1024
|
||||
; V-NEXT: br i1 [[TMP14]], label [[TMP15:%.*]], label [[TMP3]]
|
||||
; V: 15:
|
||||
; V-NEXT: ret void
|
||||
;
|
||||
; ZVE32F-LABEL: @scatter_of_pointers(
|
||||
; ZVE32F-NEXT: br label [[TMP3:%.*]]
|
||||
; ZVE32F: 3:
|
||||
; ZVE32F-NEXT: [[TMP4:%.*]] = phi i64 [ 0, [[TMP2:%.*]] ], [ [[TMP17:%.*]], [[TMP3]] ]
|
||||
; ZVE32F-NEXT: [[TMP5:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[TMP2]] ], [ [[TMP18:%.*]], [[TMP3]] ]
|
||||
; ZVE32F-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32*, i32** [[TMP1:%.*]], i64 [[TMP4]]
|
||||
; ZVE32F-NEXT: [[TMP7:%.*]] = bitcast i32** [[TMP6]] to <2 x i32*>*
|
||||
; ZVE32F-NEXT: [[TMP8:%.*]] = load <2 x i32*>, <2 x i32*>* [[TMP7]], align 8
|
||||
; ZVE32F-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32*, i32** [[TMP6]], i64 2
|
||||
; ZVE32F-NEXT: [[TMP10:%.*]] = bitcast i32** [[TMP9]] to <2 x i32*>*
|
||||
; ZVE32F-NEXT: [[TMP11:%.*]] = load <2 x i32*>, <2 x i32*>* [[TMP10]], align 8
|
||||
; ZVE32F-NEXT: [[TMP12:%.*]] = mul nuw nsw <2 x i64> [[TMP5]], <i64 5, i64 5>
|
||||
; ZVE32F-NEXT: [[TMP13:%.*]] = mul <2 x i64> [[TMP5]], <i64 5, i64 5>
|
||||
; ZVE32F-NEXT: [[TMP14:%.*]] = add <2 x i64> [[TMP13]], <i64 10, i64 10>
|
||||
; ZVE32F-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32*, i32** [[TMP0:%.*]], <2 x i64> [[TMP12]]
|
||||
; ZVE32F-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32*, i32** [[TMP0]], <2 x i64> [[TMP14]]
|
||||
; ZVE32F-NEXT: call void @llvm.masked.scatter.v2p0i32.v2p0p0i32(<2 x i32*> [[TMP8]], <2 x i32**> [[TMP15]], i32 8, <2 x i1> <i1 true, i1 true>)
|
||||
; ZVE32F-NEXT: call void @llvm.masked.scatter.v2p0i32.v2p0p0i32(<2 x i32*> [[TMP11]], <2 x i32**> [[TMP16]], i32 8, <2 x i1> <i1 true, i1 true>)
|
||||
; ZVE32F-NEXT: [[TMP17]] = add nuw i64 [[TMP4]], 4
|
||||
; ZVE32F-NEXT: [[TMP18]] = add <2 x i64> [[TMP5]], <i64 4, i64 4>
|
||||
; ZVE32F-NEXT: [[TMP19:%.*]] = icmp eq i64 [[TMP17]], 1024
|
||||
; ZVE32F-NEXT: br i1 [[TMP19]], label [[TMP20:%.*]], label [[TMP3]]
|
||||
; ZVE32F: 20:
|
||||
; ZVE32F-NEXT: ret void
|
||||
;
|
||||
br label %3
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue