diff --git a/llvm/lib/Target/Hexagon/HexagonCallingConv.td b/llvm/lib/Target/Hexagon/HexagonCallingConv.td index 93e17e608dd1..cc41b569e490 100644 --- a/llvm/lib/Target/Hexagon/HexagonCallingConv.td +++ b/llvm/lib/Target/Hexagon/HexagonCallingConv.td @@ -126,16 +126,16 @@ def CC_Hexagon_HVX: CallingConv<[ // HVX 128-byte mode CCIfHvx128< - CCIfType<[v32i32,v64i16,v128i8], + CCIfType<[v32i32,v64i16,v128i8,v32f32,v64f16], CCAssignToReg<[V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15]>>>, CCIfHvx128< - CCIfType<[v64i32,v128i16,v256i8], + CCIfType<[v64i32,v128i16,v256i8,v64f32,v128f16], CCAssignToReg<[W0,W1,W2,W3,W4,W5,W6,W7]>>>, CCIfHvx128< - CCIfType<[v32i32,v64i16,v128i8], + CCIfType<[v32i32,v64i16,v128i8,v32f32,v64f16], CCAssignToStack<128,128>>>, CCIfHvx128< - CCIfType<[v64i32,v128i16,v256i8], + CCIfType<[v64i32,v128i16,v256i8,v64f32,v128f16], CCAssignToStack<256,128>>>, CCDelegateTo @@ -152,10 +152,10 @@ def RetCC_Hexagon_HVX: CallingConv<[ // HVX 128-byte mode CCIfHvx128< - CCIfType<[v32i32,v64i16,v128i8], + CCIfType<[v32i32,v64i16,v128i8,v32f32,v64f16], CCAssignToReg<[V0]>>>, CCIfHvx128< - CCIfType<[v64i32,v128i16,v256i8], + CCIfType<[v64i32,v128i16,v256i8,v64f32,v128f16], CCAssignToReg<[W0]>>>, CCDelegateTo diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 28f7c5414a2a..a3a9097378e7 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -59,6 +59,7 @@ HexagonTargetLowering::initializeHVXLowering() { addRegisterClass(MVT::v32f32, &Hexagon::HvxVRRegClass); addRegisterClass(MVT::v64f16, &Hexagon::HvxVRRegClass); addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass); + addRegisterClass(MVT::v128f16, &Hexagon::HvxWRRegClass); } } @@ -104,6 +105,9 @@ HexagonTargetLowering::initializeHVXLowering() { // independent) handling of it would convert it to a load, which is // not always the optimal choice. setOperationAction(ISD::BUILD_VECTOR, MVT::v64f32, Custom); + // Make concat-vectors custom to handle concats of more than 2 vectors. + setOperationAction(ISD::CONCAT_VECTORS, MVT::v128f16, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v64f32, Custom); } for (MVT T : LegalV) { diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index ad8029687770..8d94a9978831 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -265,9 +265,7 @@ let Predicates = [UseHVX] in { // These should be preferred over a vsplat of 0. def: Pat<(VecI8 vzero), (V6_vd0)>; def: Pat<(VecI16 vzero), (V6_vd0)>; - def: Pat<(VecF16 vzero), (V6_vd0)>; def: Pat<(VecI32 vzero), (V6_vd0)>; - def: Pat<(VecF32 vzero), (V6_vd0)>; def: Pat<(VecPI8 vzero), (PS_vdd0)>; def: Pat<(VecPI16 vzero), (PS_vdd0)>; def: Pat<(VecPI32 vzero), (PS_vdd0)>; @@ -303,7 +301,22 @@ let Predicates = [UseHVX] in { (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; } -let Predicates = [UseHVXFloatingPoint] in { +let Predicates = [UseHVX, UseHVXFloatingPoint] in { + let AddedComplexity = 100 in { + def: Pat<(VecF16 vzero), (V6_vd0)>; + def: Pat<(VecF32 vzero), (V6_vd0)>; + def: Pat<(VecPF16 vzero), (PS_vdd0)>; + def: Pat<(VecPF32 vzero), (PS_vdd0)>; + + def: Pat<(concat_vectors (VecF16 vzero), (VecF16 vzero)), (PS_vdd0)>; + def: Pat<(concat_vectors (VecF32 vzero), (VecF32 vzero)), (PS_vdd0)>; + } + + def: Pat<(VecPF16 (concat_vectors HVF16:$Vs, HVF16:$Vt)), + (Combinev HvxVR:$Vt, HvxVR:$Vs)>; + def: Pat<(VecPF32 (concat_vectors HVF32:$Vs, HVF32:$Vt)), + (Combinev HvxVR:$Vt, HvxVR:$Vs)>; + def: Pat<(HexagonVINSERTW0 HVF16:$Vu, I32:$Rt), (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; def: Pat<(HexagonVINSERTW0 HVF32:$Vu, I32:$Rt), diff --git a/llvm/test/CodeGen/Hexagon/autohvx/calling-conv.ll b/llvm/test/CodeGen/Hexagon/autohvx/calling-conv.ll new file mode 100644 index 000000000000..884eb6e7ac75 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/calling-conv.ll @@ -0,0 +1,1528 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=hexagon < %s | FileCheck %s + +define void @f0(<128 x i8> %a0, <128 x i8>* %a1) #0 { +; CHECK-LABEL: f0: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a1, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + ret void +} + +define void @f1(<128 x i8> %a0, <128 x i8> %a1, <128 x i8>* %a2) #0 { +; CHECK-LABEL: f1: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a2, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a2, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + ret void +} + +define void @f2(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8>* %a3) #0 { +; CHECK-LABEL: f2: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a3, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a3, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a3, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + ret void +} + +define void @f3(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8>* %a4) #0 { +; CHECK-LABEL: f3: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a4, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a4, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a4, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + %v3 = getelementptr <128 x i8>, <128 x i8>* %a4, i32 3 + store <128 x i8> %a3, <128 x i8>* %v3, align 128 + ret void +} + +define void @f4(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8>* %a5) #0 { +; CHECK-LABEL: f4: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a5, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a5, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a5, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + %v3 = getelementptr <128 x i8>, <128 x i8>* %a5, i32 3 + store <128 x i8> %a3, <128 x i8>* %v3, align 128 + %v4 = getelementptr <128 x i8>, <128 x i8>* %a5, i32 4 + store <128 x i8> %a4, <128 x i8>* %v4, align 128 + ret void +} + +define void @f5(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8>* %a6) #0 { +; CHECK-LABEL: f5: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a6, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a6, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a6, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + %v3 = getelementptr <128 x i8>, <128 x i8>* %a6, i32 3 + store <128 x i8> %a3, <128 x i8>* %v3, align 128 + %v4 = getelementptr <128 x i8>, <128 x i8>* %a6, i32 4 + store <128 x i8> %a4, <128 x i8>* %v4, align 128 + %v5 = getelementptr <128 x i8>, <128 x i8>* %a6, i32 5 + store <128 x i8> %a5, <128 x i8>* %v5, align 128 + ret void +} + +define void @f6(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8>* %a7) #0 { +; CHECK-LABEL: f6: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a7, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a7, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a7, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + %v3 = getelementptr <128 x i8>, <128 x i8>* %a7, i32 3 + store <128 x i8> %a3, <128 x i8>* %v3, align 128 + %v4 = getelementptr <128 x i8>, <128 x i8>* %a7, i32 4 + store <128 x i8> %a4, <128 x i8>* %v4, align 128 + %v5 = getelementptr <128 x i8>, <128 x i8>* %a7, i32 5 + store <128 x i8> %a5, <128 x i8>* %v5, align 128 + %v6 = getelementptr <128 x i8>, <128 x i8>* %a7, i32 6 + store <128 x i8> %a6, <128 x i8>* %v6, align 128 + ret void +} + +define void @f7(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8>* %a8) #0 { +; CHECK-LABEL: f7: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r0+#7) = v7 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a8, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a8, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a8, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + %v3 = getelementptr <128 x i8>, <128 x i8>* %a8, i32 3 + store <128 x i8> %a3, <128 x i8>* %v3, align 128 + %v4 = getelementptr <128 x i8>, <128 x i8>* %a8, i32 4 + store <128 x i8> %a4, <128 x i8>* %v4, align 128 + %v5 = getelementptr <128 x i8>, <128 x i8>* %a8, i32 5 + store <128 x i8> %a5, <128 x i8>* %v5, align 128 + %v6 = getelementptr <128 x i8>, <128 x i8>* %a8, i32 6 + store <128 x i8> %a6, <128 x i8>* %v6, align 128 + %v7 = getelementptr <128 x i8>, <128 x i8>* %a8, i32 7 + store <128 x i8> %a7, <128 x i8>* %v7, align 128 + ret void +} + +define void @f8(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8> %a8, <128 x i8>* %a9) #0 { +; CHECK-LABEL: f8: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r1 = add(r0,#1024) +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#7) = v7 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r1+#0) = v8 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a9, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a9, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a9, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + %v3 = getelementptr <128 x i8>, <128 x i8>* %a9, i32 3 + store <128 x i8> %a3, <128 x i8>* %v3, align 128 + %v4 = getelementptr <128 x i8>, <128 x i8>* %a9, i32 4 + store <128 x i8> %a4, <128 x i8>* %v4, align 128 + %v5 = getelementptr <128 x i8>, <128 x i8>* %a9, i32 5 + store <128 x i8> %a5, <128 x i8>* %v5, align 128 + %v6 = getelementptr <128 x i8>, <128 x i8>* %a9, i32 6 + store <128 x i8> %a6, <128 x i8>* %v6, align 128 + %v7 = getelementptr <128 x i8>, <128 x i8>* %a9, i32 7 + store <128 x i8> %a7, <128 x i8>* %v7, align 128 + %v8 = getelementptr <128 x i8>, <128 x i8>* %a9, i32 8 + store <128 x i8> %a8, <128 x i8>* %v8, align 128 + ret void +} + +define void @f9(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8> %a8, <128 x i8> %a9, <128 x i8>* %a10) #0 { +; CHECK-LABEL: f9: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r1 = add(r0,#1024) +; CHECK-NEXT: r2 = add(r0,#1152) +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#7) = v7 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r1+#0) = v8 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r2+#0) = v9 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + %v3 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 3 + store <128 x i8> %a3, <128 x i8>* %v3, align 128 + %v4 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 4 + store <128 x i8> %a4, <128 x i8>* %v4, align 128 + %v5 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 5 + store <128 x i8> %a5, <128 x i8>* %v5, align 128 + %v6 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 6 + store <128 x i8> %a6, <128 x i8>* %v6, align 128 + %v7 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 7 + store <128 x i8> %a7, <128 x i8>* %v7, align 128 + %v8 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 8 + store <128 x i8> %a8, <128 x i8>* %v8, align 128 + %v9 = getelementptr <128 x i8>, <128 x i8>* %a10, i32 9 + store <128 x i8> %a9, <128 x i8>* %v9, align 128 + ret void +} + +define void @f10(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8> %a8, <128 x i8> %a9, <128 x i8> %a10, <128 x i8>* %a11) #0 { +; CHECK-LABEL: f10: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r1 = add(r0,#1024) +; CHECK-NEXT: r2 = add(r0,#1152) +; CHECK-NEXT: r3 = add(r0,#1280) +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#7) = v7 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r1+#0) = v8 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r2+#0) = v9 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r3+#0) = v10 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + %v3 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 3 + store <128 x i8> %a3, <128 x i8>* %v3, align 128 + %v4 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 4 + store <128 x i8> %a4, <128 x i8>* %v4, align 128 + %v5 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 5 + store <128 x i8> %a5, <128 x i8>* %v5, align 128 + %v6 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 6 + store <128 x i8> %a6, <128 x i8>* %v6, align 128 + %v7 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 7 + store <128 x i8> %a7, <128 x i8>* %v7, align 128 + %v8 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 8 + store <128 x i8> %a8, <128 x i8>* %v8, align 128 + %v9 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 9 + store <128 x i8> %a9, <128 x i8>* %v9, align 128 + %v10 = getelementptr <128 x i8>, <128 x i8>* %a11, i32 10 + store <128 x i8> %a10, <128 x i8>* %v10, align 128 + ret void +} + +define void @f11(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8> %a8, <128 x i8> %a9, <128 x i8> %a10, <128 x i8> %a11, <128 x i8>* %a12) #0 { +; CHECK-LABEL: f11: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r1 = add(r0,#1024) +; CHECK-NEXT: r2 = add(r0,#1152) +; CHECK-NEXT: r3 = add(r0,#1280) +; CHECK-NEXT: r4 = add(r0,#1408) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#7) = v7 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r1+#0) = v8 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r2+#0) = v9 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r3+#0) = v10 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r4+#0) = v11 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + %v3 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 3 + store <128 x i8> %a3, <128 x i8>* %v3, align 128 + %v4 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 4 + store <128 x i8> %a4, <128 x i8>* %v4, align 128 + %v5 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 5 + store <128 x i8> %a5, <128 x i8>* %v5, align 128 + %v6 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 6 + store <128 x i8> %a6, <128 x i8>* %v6, align 128 + %v7 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 7 + store <128 x i8> %a7, <128 x i8>* %v7, align 128 + %v8 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 8 + store <128 x i8> %a8, <128 x i8>* %v8, align 128 + %v9 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 9 + store <128 x i8> %a9, <128 x i8>* %v9, align 128 + %v10 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 10 + store <128 x i8> %a10, <128 x i8>* %v10, align 128 + %v11 = getelementptr <128 x i8>, <128 x i8>* %a12, i32 11 + store <128 x i8> %a11, <128 x i8>* %v11, align 128 + ret void +} + +define void @f12(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8> %a8, <128 x i8> %a9, <128 x i8> %a10, <128 x i8> %a11, <128 x i8> %a12, <128 x i8>* %a13) #0 { +; CHECK-LABEL: f12: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r1 = add(r0,#1024) +; CHECK-NEXT: r2 = add(r0,#1152) +; CHECK-NEXT: r3 = add(r0,#1280) +; CHECK-NEXT: r4 = add(r0,#1408) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5 = add(r0,#1536) +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#7) = v7 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r1+#0) = v8 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r2+#0) = v9 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r3+#0) = v10 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r4+#0) = v11 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r5+#0) = v12 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + %v3 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 3 + store <128 x i8> %a3, <128 x i8>* %v3, align 128 + %v4 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 4 + store <128 x i8> %a4, <128 x i8>* %v4, align 128 + %v5 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 5 + store <128 x i8> %a5, <128 x i8>* %v5, align 128 + %v6 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 6 + store <128 x i8> %a6, <128 x i8>* %v6, align 128 + %v7 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 7 + store <128 x i8> %a7, <128 x i8>* %v7, align 128 + %v8 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 8 + store <128 x i8> %a8, <128 x i8>* %v8, align 128 + %v9 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 9 + store <128 x i8> %a9, <128 x i8>* %v9, align 128 + %v10 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 10 + store <128 x i8> %a10, <128 x i8>* %v10, align 128 + %v11 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 11 + store <128 x i8> %a11, <128 x i8>* %v11, align 128 + %v12 = getelementptr <128 x i8>, <128 x i8>* %a13, i32 12 + store <128 x i8> %a12, <128 x i8>* %v12, align 128 + ret void +} + +define void @f13(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8> %a8, <128 x i8> %a9, <128 x i8> %a10, <128 x i8> %a11, <128 x i8> %a12, <128 x i8> %a13, <128 x i8>* %a14) #0 { +; CHECK-LABEL: f13: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r2 = add(r0,#1152) +; CHECK-NEXT: r3 = add(r0,#1280) +; CHECK-NEXT: r1 = add(r0,#1024) +; CHECK-NEXT: r4 = add(r0,#1408) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r2 = add(r0,#1664) +; CHECK-NEXT: vmem(r2+#0) = v9 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r3 = add(r0,#1536) +; CHECK-NEXT: vmem(r3+#0) = v10 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r1+#0) = v8 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r4+#0) = v11 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#7) = v7 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r3+#0) = v12 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r2+#0) = v13 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + %v3 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 3 + store <128 x i8> %a3, <128 x i8>* %v3, align 128 + %v4 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 4 + store <128 x i8> %a4, <128 x i8>* %v4, align 128 + %v5 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 5 + store <128 x i8> %a5, <128 x i8>* %v5, align 128 + %v6 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 6 + store <128 x i8> %a6, <128 x i8>* %v6, align 128 + %v7 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 7 + store <128 x i8> %a7, <128 x i8>* %v7, align 128 + %v8 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 8 + store <128 x i8> %a8, <128 x i8>* %v8, align 128 + %v9 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 9 + store <128 x i8> %a9, <128 x i8>* %v9, align 128 + %v10 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 10 + store <128 x i8> %a10, <128 x i8>* %v10, align 128 + %v11 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 11 + store <128 x i8> %a11, <128 x i8>* %v11, align 128 + %v12 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 12 + store <128 x i8> %a12, <128 x i8>* %v12, align 128 + %v13 = getelementptr <128 x i8>, <128 x i8>* %a14, i32 13 + store <128 x i8> %a13, <128 x i8>* %v13, align 128 + ret void +} + +define void @f14(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8> %a8, <128 x i8> %a9, <128 x i8> %a10, <128 x i8> %a11, <128 x i8> %a12, <128 x i8> %a13, <128 x i8> %a14, <128 x i8>* %a15) #0 { +; CHECK-LABEL: f14: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r2 = add(r0,#1152) +; CHECK-NEXT: r3 = add(r0,#1280) +; CHECK-NEXT: r4 = add(r0,#1408) +; CHECK-NEXT: r1 = add(r0,#1024) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r2 = add(r0,#1792) +; CHECK-NEXT: vmem(r2+#0) = v9 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r3 = add(r0,#1664) +; CHECK-NEXT: vmem(r3+#0) = v10 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r4 = add(r0,#1536) +; CHECK-NEXT: vmem(r4+#0) = v11 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r1+#0) = v8 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r4+#0) = v12 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#7) = v7 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r3+#0) = v13 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r2+#0) = v14 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + %v3 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 3 + store <128 x i8> %a3, <128 x i8>* %v3, align 128 + %v4 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 4 + store <128 x i8> %a4, <128 x i8>* %v4, align 128 + %v5 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 5 + store <128 x i8> %a5, <128 x i8>* %v5, align 128 + %v6 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 6 + store <128 x i8> %a6, <128 x i8>* %v6, align 128 + %v7 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 7 + store <128 x i8> %a7, <128 x i8>* %v7, align 128 + %v8 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 8 + store <128 x i8> %a8, <128 x i8>* %v8, align 128 + %v9 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 9 + store <128 x i8> %a9, <128 x i8>* %v9, align 128 + %v10 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 10 + store <128 x i8> %a10, <128 x i8>* %v10, align 128 + %v11 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 11 + store <128 x i8> %a11, <128 x i8>* %v11, align 128 + %v12 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 12 + store <128 x i8> %a12, <128 x i8>* %v12, align 128 + %v13 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 13 + store <128 x i8> %a13, <128 x i8>* %v13, align 128 + %v14 = getelementptr <128 x i8>, <128 x i8>* %a15, i32 14 + store <128 x i8> %a14, <128 x i8>* %v14, align 128 + ret void +} + +define void @f15(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8> %a8, <128 x i8> %a9, <128 x i8> %a10, <128 x i8> %a11, <128 x i8> %a12, <128 x i8> %a13, <128 x i8> %a14, <128 x i8> %a15, <128 x i8>* %a16) #0 { +; CHECK-LABEL: f15: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r2 = add(r0,#1152) +; CHECK-NEXT: r3 = add(r0,#1280) +; CHECK-NEXT: r1 = add(r0,#1024) +; CHECK-NEXT: r6 = add(r0,#1408) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5 = add(r0,#1536) +; CHECK-NEXT: r4 = add(r0,#1664) +; CHECK-NEXT: r2 = add(r0,#1920) +; CHECK-NEXT: vmem(r2+#0) = v9 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r3 = add(r0,#1792) +; CHECK-NEXT: vmem(r3+#0) = v10 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r1+#0) = v8 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r6+#0) = v11 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r5+#0) = v12 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r4+#0) = v13 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#7) = v7 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r3+#0) = v14 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: vmem(r2+#0) = v15 +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + %v3 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 3 + store <128 x i8> %a3, <128 x i8>* %v3, align 128 + %v4 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 4 + store <128 x i8> %a4, <128 x i8>* %v4, align 128 + %v5 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 5 + store <128 x i8> %a5, <128 x i8>* %v5, align 128 + %v6 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 6 + store <128 x i8> %a6, <128 x i8>* %v6, align 128 + %v7 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 7 + store <128 x i8> %a7, <128 x i8>* %v7, align 128 + %v8 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 8 + store <128 x i8> %a8, <128 x i8>* %v8, align 128 + %v9 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 9 + store <128 x i8> %a9, <128 x i8>* %v9, align 128 + %v10 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 10 + store <128 x i8> %a10, <128 x i8>* %v10, align 128 + %v11 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 11 + store <128 x i8> %a11, <128 x i8>* %v11, align 128 + %v12 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 12 + store <128 x i8> %a12, <128 x i8>* %v12, align 128 + %v13 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 13 + store <128 x i8> %a13, <128 x i8>* %v13, align 128 + %v14 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 14 + store <128 x i8> %a14, <128 x i8>* %v14, align 128 + %v15 = getelementptr <128 x i8>, <128 x i8>* %a16, i32 15 + store <128 x i8> %a15, <128 x i8>* %v15, align 128 + ret void +} + +define void @f16(<128 x i8> %a0, <128 x i8> %a1, <128 x i8> %a2, <128 x i8> %a3, <128 x i8> %a4, <128 x i8> %a5, <128 x i8> %a6, <128 x i8> %a7, <128 x i8> %a8, <128 x i8> %a9, <128 x i8> %a10, <128 x i8> %a11, <128 x i8> %a12, <128 x i8> %a13, <128 x i8> %a14, <128 x i8> %a15, <128 x i8> %a16, <128 x i8>* %a17) #0 { +; CHECK-LABEL: f16: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r2 = add(r0,#1152) +; CHECK-NEXT: r3 = add(r0,#1280) +; CHECK-NEXT: r4 = add(r0,#1408) +; CHECK-NEXT: allocframe(r29,#0):raw +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = add(r30,#8) +; CHECK-NEXT: r7 = add(r0,#1024) +; CHECK-NEXT: r6 = add(r0,#1536) +; CHECK-NEXT: vmem(r2+#0) = v9 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5 = add(r0,#1664) +; CHECK-NEXT: r3 = add(r0,#1920) +; CHECK-NEXT: r2 = add(r0,#2048) +; CHECK-NEXT: vmem(r3+#0) = v10 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r4 = add(r0,#1792) +; CHECK-NEXT: r29 = and(r29,#-128) +; CHECK-NEXT: v16 = vmem(r1+#0) +; CHECK-NEXT: vmem(r4+#0) = v11 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r7+#0) = v8 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r6+#0) = v12 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r5+#0) = v13 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r4+#0) = v14 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#7) = v7 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r3+#0) = v15 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r2+#0) = v16 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r31:30 = dealloc_return(r30):raw +; CHECK-NEXT: } +b0: + %v0 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 0 + store <128 x i8> %a0, <128 x i8>* %v0, align 128 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 1 + store <128 x i8> %a1, <128 x i8>* %v1, align 128 + %v2 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 2 + store <128 x i8> %a2, <128 x i8>* %v2, align 128 + %v3 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 3 + store <128 x i8> %a3, <128 x i8>* %v3, align 128 + %v4 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 4 + store <128 x i8> %a4, <128 x i8>* %v4, align 128 + %v5 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 5 + store <128 x i8> %a5, <128 x i8>* %v5, align 128 + %v6 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 6 + store <128 x i8> %a6, <128 x i8>* %v6, align 128 + %v7 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 7 + store <128 x i8> %a7, <128 x i8>* %v7, align 128 + %v8 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 8 + store <128 x i8> %a8, <128 x i8>* %v8, align 128 + %v9 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 9 + store <128 x i8> %a9, <128 x i8>* %v9, align 128 + %v10 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 10 + store <128 x i8> %a10, <128 x i8>* %v10, align 128 + %v11 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 11 + store <128 x i8> %a11, <128 x i8>* %v11, align 128 + %v12 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 12 + store <128 x i8> %a12, <128 x i8>* %v12, align 128 + %v13 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 13 + store <128 x i8> %a13, <128 x i8>* %v13, align 128 + %v14 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 14 + store <128 x i8> %a14, <128 x i8>* %v14, align 128 + %v15 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 15 + store <128 x i8> %a15, <128 x i8>* %v15, align 128 + %v16 = getelementptr <128 x i8>, <128 x i8>* %a17, i32 16 + store <128 x i8> %a16, <128 x i8>* %v16, align 128 + ret void +} + +define void @f17(<64 x i16> %a0, <64 x i16> %a1, <64 x i16> %a2, <64 x i16> %a3, <64 x i16> %a4, <64 x i16> %a5, <64 x i16> %a6, <64 x i16> %a7, <64 x i16> %a8, <64 x i16> %a9, <64 x i16> %a10, <64 x i16> %a11, <64 x i16> %a12, <64 x i16> %a13, <64 x i16> %a14, <64 x i16> %a15, <64 x i16> %a16, <64 x i16>* %a17) #0 { +; CHECK-LABEL: f17: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r2 = add(r0,#1152) +; CHECK-NEXT: r3 = add(r0,#1280) +; CHECK-NEXT: r4 = add(r0,#1408) +; CHECK-NEXT: allocframe(r29,#0):raw +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = add(r30,#8) +; CHECK-NEXT: r7 = add(r0,#1024) +; CHECK-NEXT: r6 = add(r0,#1536) +; CHECK-NEXT: vmem(r2+#0) = v9 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5 = add(r0,#1664) +; CHECK-NEXT: r3 = add(r0,#1920) +; CHECK-NEXT: r2 = add(r0,#2048) +; CHECK-NEXT: vmem(r3+#0) = v10 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r4 = add(r0,#1792) +; CHECK-NEXT: r29 = and(r29,#-128) +; CHECK-NEXT: v16 = vmem(r1+#0) +; CHECK-NEXT: vmem(r4+#0) = v11 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r7+#0) = v8 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r6+#0) = v12 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r5+#0) = v13 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r4+#0) = v14 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#7) = v7 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r3+#0) = v15 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r2+#0) = v16 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r31:30 = dealloc_return(r30):raw +; CHECK-NEXT: } +b0: + %v0 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 0 + store <64 x i16> %a0, <64 x i16>* %v0, align 128 + %v1 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 1 + store <64 x i16> %a1, <64 x i16>* %v1, align 128 + %v2 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 2 + store <64 x i16> %a2, <64 x i16>* %v2, align 128 + %v3 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 3 + store <64 x i16> %a3, <64 x i16>* %v3, align 128 + %v4 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 4 + store <64 x i16> %a4, <64 x i16>* %v4, align 128 + %v5 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 5 + store <64 x i16> %a5, <64 x i16>* %v5, align 128 + %v6 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 6 + store <64 x i16> %a6, <64 x i16>* %v6, align 128 + %v7 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 7 + store <64 x i16> %a7, <64 x i16>* %v7, align 128 + %v8 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 8 + store <64 x i16> %a8, <64 x i16>* %v8, align 128 + %v9 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 9 + store <64 x i16> %a9, <64 x i16>* %v9, align 128 + %v10 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 10 + store <64 x i16> %a10, <64 x i16>* %v10, align 128 + %v11 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 11 + store <64 x i16> %a11, <64 x i16>* %v11, align 128 + %v12 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 12 + store <64 x i16> %a12, <64 x i16>* %v12, align 128 + %v13 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 13 + store <64 x i16> %a13, <64 x i16>* %v13, align 128 + %v14 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 14 + store <64 x i16> %a14, <64 x i16>* %v14, align 128 + %v15 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 15 + store <64 x i16> %a15, <64 x i16>* %v15, align 128 + %v16 = getelementptr <64 x i16>, <64 x i16>* %a17, i32 16 + store <64 x i16> %a16, <64 x i16>* %v16, align 128 + ret void +} + +define void @f18(<32 x i32> %a0, <32 x i32> %a1, <32 x i32> %a2, <32 x i32> %a3, <32 x i32> %a4, <32 x i32> %a5, <32 x i32> %a6, <32 x i32> %a7, <32 x i32> %a8, <32 x i32> %a9, <32 x i32> %a10, <32 x i32> %a11, <32 x i32> %a12, <32 x i32> %a13, <32 x i32> %a14, <32 x i32> %a15, <32 x i32> %a16, <32 x i32>* %a17) #0 { +; CHECK-LABEL: f18: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r2 = add(r0,#1152) +; CHECK-NEXT: r3 = add(r0,#1280) +; CHECK-NEXT: r4 = add(r0,#1408) +; CHECK-NEXT: allocframe(r29,#0):raw +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = add(r30,#8) +; CHECK-NEXT: r7 = add(r0,#1024) +; CHECK-NEXT: r6 = add(r0,#1536) +; CHECK-NEXT: vmem(r2+#0) = v9 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5 = add(r0,#1664) +; CHECK-NEXT: r3 = add(r0,#1920) +; CHECK-NEXT: r2 = add(r0,#2048) +; CHECK-NEXT: vmem(r3+#0) = v10 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r4 = add(r0,#1792) +; CHECK-NEXT: r29 = and(r29,#-128) +; CHECK-NEXT: v16 = vmem(r1+#0) +; CHECK-NEXT: vmem(r4+#0) = v11 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r7+#0) = v8 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r6+#0) = v12 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r5+#0) = v13 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r4+#0) = v14 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#7) = v7 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r3+#0) = v15 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r2+#0) = v16 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r31:30 = dealloc_return(r30):raw +; CHECK-NEXT: } +b0: + %v0 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 0 + store <32 x i32> %a0, <32 x i32>* %v0, align 128 + %v1 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 1 + store <32 x i32> %a1, <32 x i32>* %v1, align 128 + %v2 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 2 + store <32 x i32> %a2, <32 x i32>* %v2, align 128 + %v3 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 3 + store <32 x i32> %a3, <32 x i32>* %v3, align 128 + %v4 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 4 + store <32 x i32> %a4, <32 x i32>* %v4, align 128 + %v5 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 5 + store <32 x i32> %a5, <32 x i32>* %v5, align 128 + %v6 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 6 + store <32 x i32> %a6, <32 x i32>* %v6, align 128 + %v7 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 7 + store <32 x i32> %a7, <32 x i32>* %v7, align 128 + %v8 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 8 + store <32 x i32> %a8, <32 x i32>* %v8, align 128 + %v9 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 9 + store <32 x i32> %a9, <32 x i32>* %v9, align 128 + %v10 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 10 + store <32 x i32> %a10, <32 x i32>* %v10, align 128 + %v11 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 11 + store <32 x i32> %a11, <32 x i32>* %v11, align 128 + %v12 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 12 + store <32 x i32> %a12, <32 x i32>* %v12, align 128 + %v13 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 13 + store <32 x i32> %a13, <32 x i32>* %v13, align 128 + %v14 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 14 + store <32 x i32> %a14, <32 x i32>* %v14, align 128 + %v15 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 15 + store <32 x i32> %a15, <32 x i32>* %v15, align 128 + %v16 = getelementptr <32 x i32>, <32 x i32>* %a17, i32 16 + store <32 x i32> %a16, <32 x i32>* %v16, align 128 + ret void +} + +define void @f19(<64 x half> %a0, <64 x half> %a1, <64 x half> %a2, <64 x half> %a3, <64 x half> %a4, <64 x half> %a5, <64 x half> %a6, <64 x half> %a7, <64 x half> %a8, <64 x half> %a9, <64 x half> %a10, <64 x half> %a11, <64 x half> %a12, <64 x half> %a13, <64 x half> %a14, <64 x half> %a15, <64 x half> %a16, <64 x half>* %a17) #0 { +; CHECK-LABEL: f19: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r2 = add(r0,#1152) +; CHECK-NEXT: r3 = add(r0,#1280) +; CHECK-NEXT: r4 = add(r0,#1408) +; CHECK-NEXT: allocframe(r29,#0):raw +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = add(r30,#8) +; CHECK-NEXT: r7 = add(r0,#1024) +; CHECK-NEXT: r6 = add(r0,#1536) +; CHECK-NEXT: vmem(r2+#0) = v9 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5 = add(r0,#1664) +; CHECK-NEXT: r3 = add(r0,#1920) +; CHECK-NEXT: r2 = add(r0,#2048) +; CHECK-NEXT: vmem(r3+#0) = v10 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r4 = add(r0,#1792) +; CHECK-NEXT: r29 = and(r29,#-128) +; CHECK-NEXT: v16 = vmem(r1+#0) +; CHECK-NEXT: vmem(r4+#0) = v11 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r7+#0) = v8 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r6+#0) = v12 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r5+#0) = v13 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r4+#0) = v14 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#7) = v7 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r3+#0) = v15 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r2+#0) = v16 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r31:30 = dealloc_return(r30):raw +; CHECK-NEXT: } +b0: + %v0 = getelementptr <64 x half>, <64 x half>* %a17, i32 0 + store <64 x half> %a0, <64 x half>* %v0, align 128 + %v1 = getelementptr <64 x half>, <64 x half>* %a17, i32 1 + store <64 x half> %a1, <64 x half>* %v1, align 128 + %v2 = getelementptr <64 x half>, <64 x half>* %a17, i32 2 + store <64 x half> %a2, <64 x half>* %v2, align 128 + %v3 = getelementptr <64 x half>, <64 x half>* %a17, i32 3 + store <64 x half> %a3, <64 x half>* %v3, align 128 + %v4 = getelementptr <64 x half>, <64 x half>* %a17, i32 4 + store <64 x half> %a4, <64 x half>* %v4, align 128 + %v5 = getelementptr <64 x half>, <64 x half>* %a17, i32 5 + store <64 x half> %a5, <64 x half>* %v5, align 128 + %v6 = getelementptr <64 x half>, <64 x half>* %a17, i32 6 + store <64 x half> %a6, <64 x half>* %v6, align 128 + %v7 = getelementptr <64 x half>, <64 x half>* %a17, i32 7 + store <64 x half> %a7, <64 x half>* %v7, align 128 + %v8 = getelementptr <64 x half>, <64 x half>* %a17, i32 8 + store <64 x half> %a8, <64 x half>* %v8, align 128 + %v9 = getelementptr <64 x half>, <64 x half>* %a17, i32 9 + store <64 x half> %a9, <64 x half>* %v9, align 128 + %v10 = getelementptr <64 x half>, <64 x half>* %a17, i32 10 + store <64 x half> %a10, <64 x half>* %v10, align 128 + %v11 = getelementptr <64 x half>, <64 x half>* %a17, i32 11 + store <64 x half> %a11, <64 x half>* %v11, align 128 + %v12 = getelementptr <64 x half>, <64 x half>* %a17, i32 12 + store <64 x half> %a12, <64 x half>* %v12, align 128 + %v13 = getelementptr <64 x half>, <64 x half>* %a17, i32 13 + store <64 x half> %a13, <64 x half>* %v13, align 128 + %v14 = getelementptr <64 x half>, <64 x half>* %a17, i32 14 + store <64 x half> %a14, <64 x half>* %v14, align 128 + %v15 = getelementptr <64 x half>, <64 x half>* %a17, i32 15 + store <64 x half> %a15, <64 x half>* %v15, align 128 + %v16 = getelementptr <64 x half>, <64 x half>* %a17, i32 16 + store <64 x half> %a16, <64 x half>* %v16, align 128 + ret void +} + +define void @f20(<32 x float> %a0, <32 x float> %a1, <32 x float> %a2, <32 x float> %a3, <32 x float> %a4, <32 x float> %a5, <32 x float> %a6, <32 x float> %a7, <32 x float> %a8, <32 x float> %a9, <32 x float> %a10, <32 x float> %a11, <32 x float> %a12, <32 x float> %a13, <32 x float> %a14, <32 x float> %a15, <32 x float> %a16, <32 x float>* %a17) #0 { +; CHECK-LABEL: f20: +; CHECK: // %bb.0: // %b0 +; CHECK-NEXT: { +; CHECK-NEXT: r2 = add(r0,#1152) +; CHECK-NEXT: r3 = add(r0,#1280) +; CHECK-NEXT: r4 = add(r0,#1408) +; CHECK-NEXT: allocframe(r29,#0):raw +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r1 = add(r30,#8) +; CHECK-NEXT: r7 = add(r0,#1024) +; CHECK-NEXT: r6 = add(r0,#1536) +; CHECK-NEXT: vmem(r2+#0) = v9 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r5 = add(r0,#1664) +; CHECK-NEXT: r3 = add(r0,#1920) +; CHECK-NEXT: r2 = add(r0,#2048) +; CHECK-NEXT: vmem(r3+#0) = v10 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r4 = add(r0,#1792) +; CHECK-NEXT: r29 = and(r29,#-128) +; CHECK-NEXT: v16 = vmem(r1+#0) +; CHECK-NEXT: vmem(r4+#0) = v11 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#0) = v0 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#1) = v1 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r7+#0) = v8 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#2) = v2 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#3) = v3 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#4) = v4 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r6+#0) = v12 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#5) = v5 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r5+#0) = v13 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#6) = v6 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r4+#0) = v14 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r0+#7) = v7 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r3+#0) = v15 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: vmem(r2+#0) = v16 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: r31:30 = dealloc_return(r30):raw +; CHECK-NEXT: } +b0: + %v0 = getelementptr <32 x float>, <32 x float>* %a17, i32 0 + store <32 x float> %a0, <32 x float>* %v0, align 128 + %v1 = getelementptr <32 x float>, <32 x float>* %a17, i32 1 + store <32 x float> %a1, <32 x float>* %v1, align 128 + %v2 = getelementptr <32 x float>, <32 x float>* %a17, i32 2 + store <32 x float> %a2, <32 x float>* %v2, align 128 + %v3 = getelementptr <32 x float>, <32 x float>* %a17, i32 3 + store <32 x float> %a3, <32 x float>* %v3, align 128 + %v4 = getelementptr <32 x float>, <32 x float>* %a17, i32 4 + store <32 x float> %a4, <32 x float>* %v4, align 128 + %v5 = getelementptr <32 x float>, <32 x float>* %a17, i32 5 + store <32 x float> %a5, <32 x float>* %v5, align 128 + %v6 = getelementptr <32 x float>, <32 x float>* %a17, i32 6 + store <32 x float> %a6, <32 x float>* %v6, align 128 + %v7 = getelementptr <32 x float>, <32 x float>* %a17, i32 7 + store <32 x float> %a7, <32 x float>* %v7, align 128 + %v8 = getelementptr <32 x float>, <32 x float>* %a17, i32 8 + store <32 x float> %a8, <32 x float>* %v8, align 128 + %v9 = getelementptr <32 x float>, <32 x float>* %a17, i32 9 + store <32 x float> %a9, <32 x float>* %v9, align 128 + %v10 = getelementptr <32 x float>, <32 x float>* %a17, i32 10 + store <32 x float> %a10, <32 x float>* %v10, align 128 + %v11 = getelementptr <32 x float>, <32 x float>* %a17, i32 11 + store <32 x float> %a11, <32 x float>* %v11, align 128 + %v12 = getelementptr <32 x float>, <32 x float>* %a17, i32 12 + store <32 x float> %a12, <32 x float>* %v12, align 128 + %v13 = getelementptr <32 x float>, <32 x float>* %a17, i32 13 + store <32 x float> %a13, <32 x float>* %v13, align 128 + %v14 = getelementptr <32 x float>, <32 x float>* %a17, i32 14 + store <32 x float> %a14, <32 x float>* %v14, align 128 + %v15 = getelementptr <32 x float>, <32 x float>* %a17, i32 15 + store <32 x float> %a15, <32 x float>* %v15, align 128 + %v16 = getelementptr <32 x float>, <32 x float>* %a17, i32 16 + store <32 x float> %a16, <32 x float>* %v16, align 128 + ret void +} + +define <128 x i8> @f21() #0 { +; CHECK-LABEL: f21: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vxor(v0,v0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = insertelement <128 x i8> undef, i8 0, i32 0 + %v1 = shufflevector <128 x i8> %v0, <128 x i8> undef, <128 x i32> zeroinitializer + ret <128 x i8> %v1 +} + +define <256 x i8> @f22() #0 { +; CHECK-LABEL: f22: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v1:0.w = vsub(v1:0.w,v1:0.w) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = insertelement <256 x i8> undef, i8 0, i32 0 + %v1 = shufflevector <256 x i8> %v0, <256 x i8> undef, <256 x i32> zeroinitializer + ret <256 x i8> %v1 +} + +define <64 x i16> @f23() #0 { +; CHECK-LABEL: f23: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vxor(v0,v0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = insertelement <64 x i16> undef, i16 0, i32 0 + %v1 = shufflevector <64 x i16> %v0, <64 x i16> undef, <64 x i32> zeroinitializer + ret <64 x i16> %v1 +} + +define <128 x i16> @f24() #0 { +; CHECK-LABEL: f24: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v1:0.w = vsub(v1:0.w,v1:0.w) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = insertelement <128 x i16> undef, i16 0, i32 0 + %v1 = shufflevector <128 x i16> %v0, <128 x i16> undef, <128 x i32> zeroinitializer + ret <128 x i16> %v1 +} + +define <32 x i32> @f25() #0 { +; CHECK-LABEL: f25: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vxor(v0,v0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = insertelement <32 x i32> undef, i32 0, i32 0 + %v1 = shufflevector <32 x i32> %v0, <32 x i32> undef, <32 x i32> zeroinitializer + ret <32 x i32> %v1 +} + +define <64 x i32> @f26() #0 { +; CHECK-LABEL: f26: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v1:0.w = vsub(v1:0.w,v1:0.w) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = insertelement <64 x i32> undef, i32 0, i32 0 + %v1 = shufflevector <64 x i32> %v0, <64 x i32> undef, <64 x i32> zeroinitializer + ret <64 x i32> %v1 +} + +define <64 x half> @f27() #0 { +; CHECK-LABEL: f27: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vxor(v0,v0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = insertelement <64 x half> undef, half 0xH0, i32 0 + %v1 = shufflevector <64 x half> %v0, <64 x half> undef, <64 x i32> zeroinitializer + ret <64 x half> %v1 +} + +define <128 x half> @f28() #0 { +; CHECK-LABEL: f28: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v1:0.w = vsub(v1:0.w,v1:0.w) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = insertelement <128 x half> undef, half 0xH0, i32 0 + %v1 = shufflevector <128 x half> %v0, <128 x half> undef, <128 x i32> zeroinitializer + ret <128 x half> %v1 +} + +define <32 x float> @f29() #0 { +; CHECK-LABEL: f29: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vxor(v0,v0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = insertelement <32 x float> undef, float 0.0, i32 0 + %v1 = shufflevector <32 x float> %v0, <32 x float> undef, <32 x i32> zeroinitializer + ret <32 x float> %v1 +} + +define <64 x float> @f30() #0 { +; CHECK-LABEL: f30: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v1:0.w = vsub(v1:0.w,v1:0.w) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %v0 = insertelement <64 x float> undef, float 0.0, i32 0 + %v1 = shufflevector <64 x float> %v0, <64 x float> undef, <64 x i32> zeroinitializer + ret <64 x float> %v1 +} + +attributes #0 = { nounwind "target-cpu"="hexagonv69" "target-features"="+hvxv69,+hvx-length128b,+hvx-qfloat" } diff --git a/llvm/test/CodeGen/Hexagon/autohvx/splat.ll b/llvm/test/CodeGen/Hexagon/autohvx/splat.ll index bbea3a21270c..eea089851e9c 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/splat.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/splat.ll @@ -402,9 +402,8 @@ define <64 x half> @f24(i16 %a0) #2 { ; CHECK-LABEL: f24: ; CHECK: // %bb.0: ; CHECK-NEXT: { -; CHECK-NEXT: v0.h = vsplat(r1) +; CHECK-NEXT: v0.h = vsplat(r0) ; CHECK-NEXT: jumpr r31 -; CHECK-NEXT: vmem(r0+#0) = v0.new ; CHECK-NEXT: } %v0 = bitcast i16 %a0 to half %v1 = insertelement <64 x half> undef, half %v0, i32 0 @@ -417,9 +416,8 @@ define <32 x float> @f25(float %a0) #2 { ; CHECK-LABEL: f25: ; CHECK: // %bb.0: ; CHECK-NEXT: { -; CHECK-NEXT: v0 = vsplat(r1) +; CHECK-NEXT: v0 = vsplat(r0) ; CHECK-NEXT: jumpr r31 -; CHECK-NEXT: vmem(r0+#0) = v0.new ; CHECK-NEXT: } %v0 = insertelement <32 x float> undef, float %a0, i32 0 %v1 = shufflevector <32 x float> %v0, <32 x float> undef, <32 x i32> zeroinitializer