2014-04-04 00:01:44 +08:00
|
|
|
; RUN: llc -mtriple=arm-eabi -mattr=+neon -print-before=post-RA-sched %s -o - 2>&1 \
|
|
|
|
; RUN: | FileCheck %s
|
PR 18466: Fix ARM Pseudo Expansion
When expanding neon pseudo stores, it may miss the implicit uses of sub
regs, which may cause post RA scheduler reorder instructions that
breakes anti dependency.
For example:
VST1d64QPseudo %R0<kill>, 16, %Q9_Q10, pred:14, pred:%noreg
will be expanded to
VST1d64Q %R0<kill>, 16, %D18, pred:14, pred:%noreg;
An instruction that defines %D20 may be scheduled before the store by
mistake.
This patches adds implicit uses for such case. For the example above, it
emits:
VST1d64Q %R0<kill>, 8, %D18, pred:14, pred:%noreg, %Q9_Q10<imp-use>
llvm-svn: 199282
2014-01-15 09:32:12 +08:00
|
|
|
|
|
|
|
define void @vst(i8* %m, [4 x i64] %v) {
|
|
|
|
entry:
|
|
|
|
; CHECK: vst:
|
2018-02-01 06:04:26 +08:00
|
|
|
; CHECK: VST1d64Q killed $r{{[0-9]+}}, 8, $d{{[0-9]+}}, 14, $noreg, implicit killed $q{{[0-9]+}}_q{{[0-9]+}}
|
PR 18466: Fix ARM Pseudo Expansion
When expanding neon pseudo stores, it may miss the implicit uses of sub
regs, which may cause post RA scheduler reorder instructions that
breakes anti dependency.
For example:
VST1d64QPseudo %R0<kill>, 16, %Q9_Q10, pred:14, pred:%noreg
will be expanded to
VST1d64Q %R0<kill>, 16, %D18, pred:14, pred:%noreg;
An instruction that defines %D20 may be scheduled before the store by
mistake.
This patches adds implicit uses for such case. For the example above, it
emits:
VST1d64Q %R0<kill>, 8, %D18, pred:14, pred:%noreg, %Q9_Q10<imp-use>
llvm-svn: 199282
2014-01-15 09:32:12 +08:00
|
|
|
|
|
|
|
%v0 = extractvalue [4 x i64] %v, 0
|
|
|
|
%v1 = extractvalue [4 x i64] %v, 1
|
|
|
|
%v2 = extractvalue [4 x i64] %v, 2
|
|
|
|
%v3 = extractvalue [4 x i64] %v, 3
|
|
|
|
|
|
|
|
%t0 = bitcast i64 %v0 to <8 x i8>
|
|
|
|
%t1 = bitcast i64 %v1 to <8 x i8>
|
|
|
|
%t2 = bitcast i64 %v2 to <8 x i8>
|
|
|
|
%t3 = bitcast i64 %v3 to <8 x i8>
|
|
|
|
|
|
|
|
%s0 = bitcast <8 x i8> %t0 to <1 x i64>
|
|
|
|
%s1 = bitcast <8 x i8> %t1 to <1 x i64>
|
|
|
|
%s2 = bitcast <8 x i8> %t2 to <1 x i64>
|
|
|
|
%s3 = bitcast <8 x i8> %t3 to <1 x i64>
|
|
|
|
|
|
|
|
%tmp0 = bitcast <1 x i64> %s2 to i64
|
|
|
|
%tmp1 = bitcast <1 x i64> %s3 to i64
|
|
|
|
|
|
|
|
%n0 = insertelement <2 x i64> undef, i64 %tmp0, i32 0
|
|
|
|
%n1 = insertelement <2 x i64> %n0, i64 %tmp1, i32 1
|
|
|
|
|
2015-09-30 18:56:37 +08:00
|
|
|
call void @llvm.arm.neon.vst4.p0i8.v1i64(i8* %m, <1 x i64> %s0, <1 x i64> %s1, <1 x i64> %s2, <1 x i64> %s3, i32 8)
|
PR 18466: Fix ARM Pseudo Expansion
When expanding neon pseudo stores, it may miss the implicit uses of sub
regs, which may cause post RA scheduler reorder instructions that
breakes anti dependency.
For example:
VST1d64QPseudo %R0<kill>, 16, %Q9_Q10, pred:14, pred:%noreg
will be expanded to
VST1d64Q %R0<kill>, 16, %D18, pred:14, pred:%noreg;
An instruction that defines %D20 may be scheduled before the store by
mistake.
This patches adds implicit uses for such case. For the example above, it
emits:
VST1d64Q %R0<kill>, 8, %D18, pred:14, pred:%noreg, %Q9_Q10<imp-use>
llvm-svn: 199282
2014-01-15 09:32:12 +08:00
|
|
|
|
|
|
|
call void @bar(<2 x i64> %n1)
|
|
|
|
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
|
|
|
|
define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B, <8 x i8>* %C) nounwind {
|
|
|
|
; CHECK: vtbx4:
|
2018-02-01 06:04:26 +08:00
|
|
|
; CHECK: VTBX4 {{.*}}, 14, $noreg, implicit $q{{[0-9]+}}_q{{[0-9]+}}
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp1 = load <8 x i8>, <8 x i8>* %A
|
|
|
|
%tmp2 = load %struct.__neon_int8x8x4_t, %struct.__neon_int8x8x4_t* %B
|
PR 18466: Fix ARM Pseudo Expansion
When expanding neon pseudo stores, it may miss the implicit uses of sub
regs, which may cause post RA scheduler reorder instructions that
breakes anti dependency.
For example:
VST1d64QPseudo %R0<kill>, 16, %Q9_Q10, pred:14, pred:%noreg
will be expanded to
VST1d64Q %R0<kill>, 16, %D18, pred:14, pred:%noreg;
An instruction that defines %D20 may be scheduled before the store by
mistake.
This patches adds implicit uses for such case. For the example above, it
emits:
VST1d64Q %R0<kill>, 8, %D18, pred:14, pred:%noreg, %Q9_Q10<imp-use>
llvm-svn: 199282
2014-01-15 09:32:12 +08:00
|
|
|
%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
|
|
|
|
%tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
|
|
|
|
%tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
|
|
|
|
%tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp7 = load <8 x i8>, <8 x i8>* %C
|
PR 18466: Fix ARM Pseudo Expansion
When expanding neon pseudo stores, it may miss the implicit uses of sub
regs, which may cause post RA scheduler reorder instructions that
breakes anti dependency.
For example:
VST1d64QPseudo %R0<kill>, 16, %Q9_Q10, pred:14, pred:%noreg
will be expanded to
VST1d64Q %R0<kill>, 16, %D18, pred:14, pred:%noreg;
An instruction that defines %D20 may be scheduled before the store by
mistake.
This patches adds implicit uses for such case. For the example above, it
emits:
VST1d64Q %R0<kill>, 8, %D18, pred:14, pred:%noreg, %Q9_Q10<imp-use>
llvm-svn: 199282
2014-01-15 09:32:12 +08:00
|
|
|
%tmp8 = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7)
|
|
|
|
call void @bar2(%struct.__neon_int8x8x4_t %tmp2, <8 x i8> %tmp8)
|
|
|
|
ret <8 x i8> %tmp8
|
|
|
|
}
|
|
|
|
|
2015-09-30 18:56:37 +08:00
|
|
|
declare void @llvm.arm.neon.vst4.p0i8.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32)
|
PR 18466: Fix ARM Pseudo Expansion
When expanding neon pseudo stores, it may miss the implicit uses of sub
regs, which may cause post RA scheduler reorder instructions that
breakes anti dependency.
For example:
VST1d64QPseudo %R0<kill>, 16, %Q9_Q10, pred:14, pred:%noreg
will be expanded to
VST1d64Q %R0<kill>, 16, %D18, pred:14, pred:%noreg;
An instruction that defines %D20 may be scheduled before the store by
mistake.
This patches adds implicit uses for such case. For the example above, it
emits:
VST1d64Q %R0<kill>, 8, %D18, pred:14, pred:%noreg, %Q9_Q10<imp-use>
llvm-svn: 199282
2014-01-15 09:32:12 +08:00
|
|
|
declare <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
|
|
|
|
declare void @bar2(%struct.__neon_int8x8x4_t, <8 x i8>)
|
|
|
|
declare void @bar(<2 x i64> %arg)
|