Implement a feature (-vector-unaligned-mem) to allow targets to

ignore alignment requirements for SIMD memory operands.  This
is useful on architectures like the AMD 10h that do not trap on
unaligned references if a status bit is twiddled at startup time.

llvm-svn: 93151
This commit is contained in:
David Greene 2010-01-11 16:29:42 +00:00
parent 9a8dd0db89
commit 206351a1ff
5 changed files with 418 additions and 4 deletions

View File

@ -23,6 +23,7 @@ include "llvm/Target/Target.td"
def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
"Enable conditional move instructions">;
def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX",
"Enable MMX instructions">;
def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
@ -66,6 +67,9 @@ def FeatureFMA3 : SubtargetFeature<"fma3", "HasFMA3", "true",
"Enable three-operand fused multiple-add">;
def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
"Enable four-operand fused multiple-add">;
def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
"HasVectorUAMem", "true",
"Allow unaligned memory operands on vector/SIMD instructions">;
//===----------------------------------------------------------------------===//
// X86 processors supported.

View File

@ -131,11 +131,13 @@ def alignedloadv2i64 : PatFrag<(ops node:$ptr),
// Like 'load', but uses special alignment checks suitable for use in
// memory operands in most SSE instructions, which are required to
// be naturally aligned on some targets but not on others.
// FIXME: Actually implement support for targets that don't require the
// alignment. This probably wants a subtarget predicate.
// be naturally aligned on some targets but not on others. If the subtarget
// allows unaligned accesses, match any load, though this may require
// setting a feature bit in the processor (on startup, for example).
// Opteron 10h and later implement such a feature.
def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() >= 16;
return Subtarget->hasVectorUAMem()
|| cast<LoadSDNode>(N)->getAlignment() >= 16;
}]>;
def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;

View File

@ -286,6 +286,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
, HasFMA3(false)
, HasFMA4(false)
, IsBTMemSlow(false)
, HasVectorUAMem(false)
, DarwinVers(0)
, stackAlignment(8)
// FIXME: this is a known good value for Yonah. How about others?

View File

@ -78,6 +78,10 @@ protected:
/// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
bool IsBTMemSlow;
/// HasVectorUAMem - True if SIMD operations can have unaligned memory operands.
/// This may require setting a feature bit in the processor.
bool HasVectorUAMem;
/// DarwinVers - Nonzero if this is a darwin platform: the numeric
/// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
unsigned char DarwinVers; // Is any darwin-x86 platform.
@ -142,6 +146,7 @@ public:
bool hasFMA3() const { return HasFMA3; }
bool hasFMA4() const { return HasFMA4; }
bool isBTMemSlow() const { return IsBTMemSlow; }
bool hasVectorUAMem() const { return HasVectorUAMem; }
bool isTargetDarwin() const { return TargetType == isDarwin; }
bool isTargetELF() const { return TargetType == isELF; }

View File

@ -0,0 +1,402 @@
; RUN: llc -mattr=vector-unaligned-mem < %s | FileCheck %s
; CHECK: addps{{[ \t]+}}(
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-unknown-linux-gnu"
define i32 @foo(i32 %n1, float* %A2, float* %B3, float* %C4) {
"file loop.c, line 1, bb1": ; srcLine 1
%n = alloca i32, align 4 ; <i32*> [#uses=2] ; [oox.12 : sln.1]
%A = alloca float*, align 8 ; <float**> [#uses=2] ; [oox.13 : sln.1]
%B = alloca float*, align 8 ; <float**> [#uses=2] ; [oox.14 : sln.1]
%C = alloca float*, align 8 ; <float**> [#uses=2] ; [oox.15 : sln.1]
%i = alloca i32, align 4 ; <i32*> [#uses=0] ; [oox.24 : sln.1]
%"$CSVL_V0" = alloca i64, align 8 ; <i64*> [#uses=2] ; [oox.38 : sln.1]
%"$TC_1" = alloca i64, align 8 ; <i64*> [#uses=3] ; [oox.43 : sln.1]
%"$LIS_S5" = alloca i64, align 8 ; <i64*> [#uses=2] ; [oox.54 : sln.1]
%"$LIS_S7" = alloca i64, align 8 ; <i64*> [#uses=2] ; [oox.56 : sln.1]
%"$LIS_S8" = alloca i64, align 8 ; <i64*> [#uses=2] ; [oox.57 : sln.1]
%"$LIS_S9" = alloca i64, align 8 ; <i64*> [#uses=2] ; [oox.58 : sln.1]
%"$LIS_S15" = alloca i64, align 8 ; <i64*> [#uses=3] ; [oox.64 : sln.1]
%"$LIS_S17" = alloca i64, align 8 ; <i64*> [#uses=3] ; [oox.66 : sln.1]
%"$LIS_S18" = alloca i64, align 8 ; <i64*> [#uses=3] ; [oox.67 : sln.1]
%"$LIS_S19" = alloca i64, align 8 ; <i64*> [#uses=2] ; [oox.68 : sln.1]
%"$LIS_S20" = alloca i64, align 8 ; <i64*> [#uses=2] ; [oox.69 : sln.1]
%"$LIS_S21" = alloca i64, align 8 ; <i64*> [#uses=2] ; [oox.70 : sln.1]
%"$MR_n_0" = alloca i32, align 4 ; <i32*> [#uses=7] ; [oox.72 : sln.1]
%"$MR_C_1" = alloca float*, align 8 ; <float**> [#uses=5] ; [oox.73 : sln.1]
%"$MR_A_2" = alloca float*, align 8 ; <float**> [#uses=6] ; [oox.74 : sln.1]
%"$MR_B_3" = alloca float*, align 8 ; <float**> [#uses=5] ; [oox.75 : sln.1]
%"$LCS_0" = alloca i64, align 8 ; <i64*> [#uses=6] ; [oox.82 : sln.1]
%"$LCS_1" = alloca i64, align 8 ; <i64*> [#uses=5] ; [oox.83 : sln.1]
%"$LCS_2" = alloca i64, align 8 ; <i64*> [#uses=3] ; [oox.84 : sln.1]
%"$LCS_1_3" = alloca i64, align 8 ; <i64*> [#uses=3] ; [oox.85 : sln.1]
%"$LCS_4" = alloca i64, align 8 ; <i64*> [#uses=5] ; [oox.86 : sln.1]
%"$LCS_5" = alloca i64, align 8 ; <i64*> [#uses=5] ; [oox.87 : sln.1]
%"$LCS_6" = alloca i64, align 8 ; <i64*> [#uses=5] ; [oox.88 : sln.1]
%"$LCS_n_7" = alloca i64, align 8 ; <i64*> [#uses=3] ; [oox.89 : sln.1]
%"$i_S23" = alloca i64, align 8 ; <i64*> [#uses=15] ; [oox.90 : sln.1]
%"$LC_S24" = alloca i64, align 8 ; <i64*> [#uses=9] ; [oox.91 : sln.1]
%"$SI_S25" = alloca i64, align 8 ; <i64*> [#uses=11] ; [oox.92 : sln.1]
store i32 %n1, i32* %n, align 4 ; [oox.12 : sln.1]
store float* %A2, float** %A, align 8 ; [oox.13 : sln.1]
store float* %B3, float** %B, align 8 ; [oox.14 : sln.1]
store float* %C4, float** %C, align 8 ; [oox.15 : sln.1]
br label %"file loop.c, line 1, bb69" ; [oox.0 : sln.0]
"file loop.c, line 1, bb69": ; srcLine 1 ; preds = %"file loop.c, line 1, bb1"
%r = load i32* %n, align 4 ; <i32> [#uses=1] ; [oox.190 : sln.5]
store i32 %r, i32* %"$MR_n_0", align 4 ; [oox.190 : sln.5]
%r5 = load i32* %"$MR_n_0", align 4 ; <i32> [#uses=1] ; [oox.191 : sln.5]
%r6 = icmp sge i32 0, %r5 ; <i1> [#uses=1] ; [oox.191 : sln.5]
%r7 = zext i1 %r6 to i32 ; <i32> [#uses=1] ; [oox.191 : sln.5]
%r8 = icmp ne i32 %r7, 0 ; <i1> [#uses=1] ; [oox.191 : sln.5]
br i1 %r8, label %"file loop.c, line 5, bb6", label %"file loop.c, line 1, bb3" ; [oox.191 : sln.5]
"file loop.c, line 1, bb3": ; srcLine 1 ; preds = %"file loop.c, line 1, bb69"
br label %"file loop.c, line 5, bb28" ; [oox.0 : sln.0]
"file loop.c, line 5, bb28": ; srcLine 5 ; preds = %"file loop.c, line 1, bb3"
store i64 0, i64* %"$i_S23", align 8 ; [oox.189 : sln.5]
%r9 = load float** %C, align 8 ; <float*> [#uses=1] ; [oox.190 : sln.5]
store float* %r9, float** %"$MR_C_1", align 8 ; [oox.190 : sln.5]
%r10 = load float** %A, align 8 ; <float*> [#uses=1] ; [oox.191 : sln.5]
store float* %r10, float** %"$MR_A_2", align 8 ; [oox.191 : sln.5]
%r11 = load float** %B, align 8 ; <float*> [#uses=1] ; [oox.192 : sln.5]
store float* %r11, float** %"$MR_B_3", align 8 ; [oox.192 : sln.5]
%r12 = load float** %"$MR_A_2", align 8 ; <float*> [#uses=1] ; [oox.193 : sln.5]
%r13 = load float** %"$MR_C_1", align 8 ; <float*> [#uses=1] ; [oox.193 : sln.5]
%r14 = ptrtoint float* %r12 to i64 ; <i64> [#uses=1] ; [oox.193 : sln.5]
%r15 = ptrtoint float* %r13 to i64 ; <i64> [#uses=1] ; [oox.193 : sln.5]
%r16 = sub i64 %r14, %r15 ; <i64> [#uses=1] ; [oox.193 : sln.5]
%r17 = sdiv i64 %r16, 4 ; <i64> [#uses=1] ; [oox.193 : sln.5]
store i64 %r17, i64* %"$LCS_0", align 8 ; [oox.193 : sln.5]
%r18 = load float** %"$MR_A_2", align 8 ; <float*> [#uses=1] ; [oox.194 : sln.5]
%r19 = load float** %"$MR_B_3", align 8 ; <float*> [#uses=1] ; [oox.194 : sln.5]
%r20 = ptrtoint float* %r18 to i64 ; <i64> [#uses=1] ; [oox.194 : sln.5]
%r21 = ptrtoint float* %r19 to i64 ; <i64> [#uses=1] ; [oox.194 : sln.5]
%r22 = sub i64 %r20, %r21 ; <i64> [#uses=1] ; [oox.194 : sln.5]
%r23 = sdiv i64 %r22, 4 ; <i64> [#uses=1] ; [oox.194 : sln.5]
store i64 %r23, i64* %"$LCS_1", align 8 ; [oox.194 : sln.5]
%r24 = load i32* %"$MR_n_0", align 4 ; <i32> [#uses=1] ; [oox.195 : sln.5]
%r25 = sext i32 %r24 to i64 ; <i64> [#uses=1] ; [oox.195 : sln.5]
%r26 = add i64 -1, %r25 ; <i64> [#uses=1] ; [oox.195 : sln.5]
store i64 %r26, i64* %"$LCS_1_3", align 8 ; [oox.195 : sln.5]
%r27 = load i64* %"$LCS_0", align 8 ; <i64> [#uses=1] ; [oox.196 : sln.5]
%r28 = icmp sgt i64 %r27, 0 ; <i1> [#uses=1] ; [oox.196 : sln.5]
%r29 = zext i1 %r28 to i64 ; <i64> [#uses=1] ; [oox.196 : sln.5]
%r30 = load i64* %"$LCS_0", align 8 ; <i64> [#uses=1] ; [oox.196 : sln.5]
%r31 = load i64* %"$LCS_1_3", align 8 ; <i64> [#uses=1] ; [oox.196 : sln.5]
%r32 = icmp sle i64 %r30, %r31 ; <i1> [#uses=1] ; [oox.196 : sln.5]
%r33 = zext i1 %r32 to i32 ; <i32> [#uses=1] ; [oox.196 : sln.5]
%r34 = sext i32 %r33 to i64 ; <i64> [#uses=1] ; [oox.196 : sln.5]
%r35 = and i64 %r29, %r34 ; <i64> [#uses=1] ; [oox.196 : sln.5]
%r36 = load i64* %"$LCS_0", align 8 ; <i64> [#uses=1] ; [oox.196 : sln.5]
%r37 = icmp sle i64 %r36, 4 ; <i1> [#uses=1] ; [oox.196 : sln.5]
%r38 = zext i1 %r37 to i32 ; <i32> [#uses=1] ; [oox.196 : sln.5]
%r39 = sext i32 %r38 to i64 ; <i64> [#uses=1] ; [oox.196 : sln.5]
%r40 = and i64 %r35, %r39 ; <i64> [#uses=1] ; [oox.196 : sln.5]
store i64 %r40, i64* %"$LCS_2", align 8 ; [oox.196 : sln.5]
%r41 = load i64* %"$LCS_1", align 8 ; <i64> [#uses=1] ; [oox.197 : sln.5]
%r42 = load i64* %"$LCS_0", align 8 ; <i64> [#uses=1] ; [oox.197 : sln.5]
%r43 = load i64* %"$LCS_2", align 8 ; <i64> [#uses=1] ; [oox.197 : sln.5]
%r44 = icmp ne i64 %r43, 0 ; <i1> [#uses=1] ; [oox.197 : sln.5]
%r45 = select i1 %r44, i64 %r42, i64 4 ; <i64> [#uses=1] ; [oox.197 : sln.5]
%r46 = load i64* %"$LCS_1", align 8 ; <i64> [#uses=1] ; [oox.197 : sln.5]
%r47 = icmp sgt i64 %r46, 0 ; <i1> [#uses=1] ; [oox.197 : sln.5]
%r48 = zext i1 %r47 to i64 ; <i64> [#uses=1] ; [oox.197 : sln.5]
%r49 = load i64* %"$LCS_1", align 8 ; <i64> [#uses=1] ; [oox.197 : sln.5]
%r50 = load i64* %"$LCS_0", align 8 ; <i64> [#uses=1] ; [oox.197 : sln.5]
%r51 = load i64* %"$LCS_2", align 8 ; <i64> [#uses=1] ; [oox.197 : sln.5]
%r52 = icmp ne i64 %r51, 0 ; <i1> [#uses=1] ; [oox.197 : sln.5]
%r53 = select i1 %r52, i64 %r50, i64 4 ; <i64> [#uses=1] ; [oox.197 : sln.5]
%r54 = icmp sle i64 %r49, %r53 ; <i1> [#uses=1] ; [oox.197 : sln.5]
%r55 = zext i1 %r54 to i32 ; <i32> [#uses=1] ; [oox.197 : sln.5]
%r56 = load i64* %"$LCS_1", align 8 ; <i64> [#uses=1] ; [oox.197 : sln.5]
%r57 = load i64* %"$LCS_1_3", align 8 ; <i64> [#uses=1] ; [oox.197 : sln.5]
%r58 = icmp sle i64 %r56, %r57 ; <i1> [#uses=1] ; [oox.197 : sln.5]
%r59 = zext i1 %r58 to i32 ; <i32> [#uses=1] ; [oox.197 : sln.5]
%r60 = and i32 %r55, %r59 ; <i32> [#uses=1] ; [oox.197 : sln.5]
%r61 = sext i32 %r60 to i64 ; <i64> [#uses=1] ; [oox.197 : sln.5]
%r62 = and i64 %r48, %r61 ; <i64> [#uses=1] ; [oox.197 : sln.5]
%r63 = icmp ne i64 %r62, 0 ; <i1> [#uses=1] ; [oox.197 : sln.5]
%r64 = select i1 %r63, i64 %r41, i64 %r45 ; <i64> [#uses=1] ; [oox.197 : sln.5]
store i64 %r64, i64* %"$CSVL_V0", align 8 ; [oox.197 : sln.5]
%r65 = load i64* %"$CSVL_V0", align 8 ; <i64> [#uses=1] ; [oox.198 : sln.5]
%r66 = icmp sgt i64 %r65, 4 ; <i1> [#uses=1] ; [oox.198 : sln.5]
%r67 = zext i1 %r66 to i32 ; <i32> [#uses=1] ; [oox.198 : sln.5]
%r68 = icmp ne i32 %r67, 0 ; <i1> [#uses=1] ; [oox.198 : sln.5]
br i1 %r68, label %"file loop.c, line 1, bb26", label %"file loop.c, line 1, bb27" ; [oox.198 : sln.5]
"file loop.c, line 1, bb27": ; srcLine 1 ; preds = %"file loop.c, line 5, bb28"
br label %"file loop.c, line 5, bb55" ; [oox.0 : sln.0]
"file loop.c, line 5, bb55": ; srcLine 5 ; preds = %"file loop.c, line 1, bb27"
%r69 = load i32* %"$MR_n_0", align 4 ; <i32> [#uses=1] ; [oox.189 : sln.5]
%r70 = sext i32 %r69 to i64 ; <i64> [#uses=1] ; [oox.189 : sln.5]
store i64 %r70, i64* %"$LIS_S9", align 8 ; [oox.189 : sln.5]
%r71 = load float** %"$MR_B_3", align 8 ; <float*> [#uses=1] ; [oox.190 : sln.6]
%r72 = ptrtoint float* %r71 to i64 ; <i64> [#uses=1] ; [oox.190 : sln.6]
store i64 %r72, i64* %"$LIS_S5", align 8 ; [oox.190 : sln.6]
%r73 = load float** %"$MR_C_1", align 8 ; <float*> [#uses=1] ; [oox.191 : sln.6]
%r74 = ptrtoint float* %r73 to i64 ; <i64> [#uses=1] ; [oox.191 : sln.6]
store i64 %r74, i64* %"$LIS_S7", align 8 ; [oox.191 : sln.6]
%r75 = load float** %"$MR_A_2", align 8 ; <float*> [#uses=1] ; [oox.192 : sln.6]
%r76 = ptrtoint float* %r75 to i64 ; <i64> [#uses=1] ; [oox.192 : sln.6]
store i64 %r76, i64* %"$LIS_S8", align 8 ; [oox.192 : sln.6]
br label %"file loop.c, line 1, in inner loop at depth 0, bb29" ; [oox.0 : sln.0]
"file loop.c, line 1, in inner loop at depth 0, bb29": ; srcLine 1 ; preds = %"file loop.c, line 5, in inner loop at depth 0, bb32", %"file loop.c, line 5, bb55"
br label %"file loop.c, line 5, in inner loop at depth 0, bb32" ; [oox.0 : sln.0]
"file loop.c, line 5, in inner loop at depth 0, bb32": ; srcLine 5 ; preds = %"file loop.c, line 1, in inner loop at depth 0, bb29"
%r77 = load i64* %"$LIS_S7", align 8 ; <i64> [#uses=1] ; [oox.189 : sln.6]
%r78 = inttoptr i64 %r77 to float* ; <float*> [#uses=1] ; [oox.189 : sln.6]
%r79 = load i64* %"$i_S23", align 8 ; <i64> [#uses=1] ; [oox.189 : sln.6]
%r80 = getelementptr float* %r78, i64 %r79 ; <float*> [#uses=1] ; [oox.189 : sln.6]
%r81 = load float* %r80, align 4 ; <float> [#uses=1] ; [oox.189 : sln.6]
%r82 = load i64* %"$LIS_S5", align 8 ; <i64> [#uses=1] ; [oox.189 : sln.6]
%r83 = inttoptr i64 %r82 to float* ; <float*> [#uses=1] ; [oox.189 : sln.6]
%r84 = load i64* %"$i_S23", align 8 ; <i64> [#uses=1] ; [oox.189 : sln.6]
%r85 = getelementptr float* %r83, i64 %r84 ; <float*> [#uses=1] ; [oox.189 : sln.6]
%r86 = load float* %r85, align 4 ; <float> [#uses=1] ; [oox.189 : sln.6]
%r87 = add float %r81, %r86 ; <float> [#uses=1] ; [oox.189 : sln.6]
%r88 = load i64* %"$LIS_S8", align 8 ; <i64> [#uses=1] ; [oox.189 : sln.6]
%r89 = inttoptr i64 %r88 to float* ; <float*> [#uses=1] ; [oox.189 : sln.6]
%r90 = load i64* %"$i_S23", align 8 ; <i64> [#uses=1] ; [oox.189 : sln.6]
%r91 = getelementptr float* %r89, i64 %r90 ; <float*> [#uses=1] ; [oox.189 : sln.6]
store float %r87, float* %r91, align 4 ; [oox.189 : sln.6]
%r92 = load i64* %"$i_S23", align 8 ; <i64> [#uses=1] ; [oox.190 : sln.5]
%r93 = add i64 1, %r92 ; <i64> [#uses=1] ; [oox.190 : sln.5]
store i64 %r93, i64* %"$i_S23", align 8 ; [oox.190 : sln.5]
%r94 = load i64* %"$i_S23", align 8 ; <i64> [#uses=1] ; [oox.191 : sln.5]
%r95 = load i64* %"$LIS_S9", align 8 ; <i64> [#uses=1] ; [oox.191 : sln.5]
%r96 = icmp slt i64 %r94, %r95 ; <i1> [#uses=1] ; [oox.191 : sln.5]
%r97 = zext i1 %r96 to i64 ; <i64> [#uses=1] ; [oox.191 : sln.5]
%r98 = icmp ne i64 %r97, 0 ; <i1> [#uses=1] ; [oox.191 : sln.5]
br i1 %r98, label %"file loop.c, line 1, in inner loop at depth 0, bb29", label %"file loop.c, line 5, bb6" ; [oox.191 : sln.5]
"file loop.c, line 1, bb26": ; srcLine 1 ; preds = %"file loop.c, line 5, bb28"
br label %"file loop.c, line 5, bb48" ; [oox.0 : sln.0]
"file loop.c, line 5, bb48": ; srcLine 5 ; preds = %"file loop.c, line 1, bb26"
%r99 = load i32* %"$MR_n_0", align 4 ; <i32> [#uses=1] ; [oox.189 : sln.5]
%r100 = sext i32 %r99 to i64 ; <i64> [#uses=1] ; [oox.189 : sln.5]
%r101 = icmp slt i64 %r100, 4 ; <i1> [#uses=1] ; [oox.189 : sln.5]
%r102 = zext i1 %r101 to i32 ; <i32> [#uses=1] ; [oox.189 : sln.5]
%r103 = icmp ne i32 %r102, 0 ; <i1> [#uses=1] ; [oox.189 : sln.5]
br i1 %r103, label %"file loop.c, line 5, bb50", label %"file loop.c, line 1, bb47" ; [oox.189 : sln.5]
"file loop.c, line 1, bb47": ; srcLine 1 ; preds = %"file loop.c, line 5, bb48"
br label %"file loop.c, line 5, bb60" ; [oox.0 : sln.0]
"file loop.c, line 5, bb60": ; srcLine 5 ; preds = %"file loop.c, line 1, bb47"
%r104 = load i32* %"$MR_n_0", align 4 ; <i32> [#uses=1] ; [oox.189 : sln.5]
%r105 = sext i32 %r104 to i64 ; <i64> [#uses=1] ; [oox.189 : sln.5]
%r106 = and i64 -4, %r105 ; <i64> [#uses=1] ; [oox.189 : sln.5]
store i64 %r106, i64* %"$TC_1", align 8 ; [oox.189 : sln.5]
%r107 = load i64* %"$TC_1", align 8 ; <i64> [#uses=1] ; [oox.190 : sln.5]
%r108 = sub i64 0, %r107 ; <i64> [#uses=1] ; [oox.190 : sln.5]
store i64 %r108, i64* %"$LC_S24", align 8 ; [oox.190 : sln.5]
store i64 0, i64* %"$SI_S25", align 8 ; [oox.191 : sln.5]
%r109 = load float** %"$MR_C_1", align 8 ; <float*> [#uses=1] ; [oox.192 : sln.6]
%r110 = ptrtoint float* %r109 to i64 ; <i64> [#uses=1] ; [oox.192 : sln.6]
store i64 %r110, i64* %"$LIS_S15", align 8 ; [oox.192 : sln.6]
%r111 = load float** %"$MR_B_3", align 8 ; <float*> [#uses=1] ; [oox.193 : sln.6]
%r112 = ptrtoint float* %r111 to i64 ; <i64> [#uses=1] ; [oox.193 : sln.6]
store i64 %r112, i64* %"$LIS_S17", align 8 ; [oox.193 : sln.6]
%r113 = load float** %"$MR_A_2", align 8 ; <float*> [#uses=1] ; [oox.194 : sln.6]
%r114 = ptrtoint float* %r113 to i64 ; <i64> [#uses=1] ; [oox.194 : sln.6]
store i64 %r114, i64* %"$LIS_S18", align 8 ; [oox.194 : sln.6]
%r115 = load i64* %"$LC_S24", align 8 ; <i64> [#uses=1] ; [oox.195 : sln.5]
%r116 = icmp sge i64 %r115, -15 ; <i1> [#uses=1] ; [oox.195 : sln.5]
%r117 = zext i1 %r116 to i32 ; <i32> [#uses=1] ; [oox.195 : sln.5]
%r118 = icmp ne i32 %r117, 0 ; <i1> [#uses=1] ; [oox.195 : sln.5]
br i1 %r118, label %"file loop.c, line 5, bb64", label %"file loop.c, line 1, bb61" ; [oox.195 : sln.5]
"file loop.c, line 1, bb61": ; srcLine 1 ; preds = %"file loop.c, line 5, bb60"
br label %"file loop.c, line 1, in inner vector loop at depth 0, bb58" ; [oox.0 : sln.0]
"file loop.c, line 1, in inner vector loop at depth 0, bb58": ; srcLine 1 ; preds = %"file loop.c, line 6, in inner vector loop at depth 0, bb59", %"file loop.c, line 1, bb61"
br label %"file loop.c, line 6, in inner vector loop at depth 0, bb59" ; [oox.0 : sln.0]
"file loop.c, line 6, in inner vector loop at depth 0, bb59": ; srcLine 6 ; preds = %"file loop.c, line 1, in inner vector loop at depth 0, bb58"
%r119 = load i64* %"$LIS_S15", align 8 ; <i64> [#uses=1] ; [oox.189 : sln.6]
%r120 = load i64* %"$SI_S25", align 8 ; <i64> [#uses=1] ; [oox.189 : sln.6]
%r121 = add i64 %r119, %r120 ; <i64> [#uses=1] ; [oox.189 : sln.6]
store i64 %r121, i64* %"$LCS_4", align 8 ; [oox.189 : sln.6]
%r122 = load i64* %"$LIS_S17", align 8 ; <i64> [#uses=1] ; [oox.190 : sln.6]
%r123 = load i64* %"$SI_S25", align 8 ; <i64> [#uses=1] ; [oox.190 : sln.6]
%r124 = add i64 %r122, %r123 ; <i64> [#uses=1] ; [oox.190 : sln.6]
store i64 %r124, i64* %"$LCS_5", align 8 ; [oox.190 : sln.6]
%r125 = load i64* %"$LIS_S18", align 8 ; <i64> [#uses=1] ; [oox.191 : sln.6]
%r126 = load i64* %"$SI_S25", align 8 ; <i64> [#uses=1] ; [oox.191 : sln.6]
%r127 = add i64 %r125, %r126 ; <i64> [#uses=1] ; [oox.191 : sln.6]
store i64 %r127, i64* %"$LCS_6", align 8 ; [oox.191 : sln.6]
%r128 = load i64* %"$LCS_4", align 8 ; <i64> [#uses=1] ; [oox.192 : sln.6]
%r129 = inttoptr i64 %r128 to <4 x float>* ; <<4 x float>*> [#uses=1] ; [oox.192 : sln.6]
%r130 = load <4 x float>* %r129, align 4 ; <<4 x float>> [#uses=1] ; [oox.192 : sln.6]
%r131 = load i64* %"$LCS_5", align 8 ; <i64> [#uses=1] ; [oox.192 : sln.6]
%r132 = inttoptr i64 %r131 to <4 x float>* ; <<4 x float>*> [#uses=1] ; [oox.192 : sln.6]
%r133 = load <4 x float>* %r132, align 4 ; <<4 x float>> [#uses=1] ; [oox.192 : sln.6]
%r134 = add <4 x float> %r130, %r133 ; <<4 x float>> [#uses=1] ; [oox.192 : sln.6]
%r135 = load i64* %"$LCS_6", align 8 ; <i64> [#uses=1] ; [oox.192 : sln.6]
%r136 = inttoptr i64 %r135 to <4 x float>* ; <<4 x float>*> [#uses=1] ; [oox.192 : sln.6]
store <4 x float> %r134, <4 x float>* %r136, align 4 ; [oox.192 : sln.6]
%r137 = load i64* %"$LCS_4", align 8 ; <i64> [#uses=1] ; [oox.193 : sln.6]
%r138 = add i64 16, %r137 ; <i64> [#uses=1] ; [oox.193 : sln.6]
%r139 = inttoptr i64 %r138 to <4 x float>* ; <<4 x float>*> [#uses=1] ; [oox.193 : sln.6]
%r140 = load <4 x float>* %r139, align 4 ; <<4 x float>> [#uses=1] ; [oox.193 : sln.6]
%r141 = load i64* %"$LCS_5", align 8 ; <i64> [#uses=1] ; [oox.193 : sln.6]
%r142 = add i64 16, %r141 ; <i64> [#uses=1] ; [oox.193 : sln.6]
%r143 = inttoptr i64 %r142 to <4 x float>* ; <<4 x float>*> [#uses=1] ; [oox.193 : sln.6]
%r144 = load <4 x float>* %r143, align 4 ; <<4 x float>> [#uses=1] ; [oox.193 : sln.6]
%r145 = add <4 x float> %r140, %r144 ; <<4 x float>> [#uses=1] ; [oox.193 : sln.6]
%r146 = load i64* %"$LCS_6", align 8 ; <i64> [#uses=1] ; [oox.193 : sln.6]
%r147 = add i64 16, %r146 ; <i64> [#uses=1] ; [oox.193 : sln.6]
%r148 = inttoptr i64 %r147 to <4 x float>* ; <<4 x float>*> [#uses=1] ; [oox.193 : sln.6]
store <4 x float> %r145, <4 x float>* %r148, align 4 ; [oox.193 : sln.6]
%r149 = load i64* %"$LCS_4", align 8 ; <i64> [#uses=1] ; [oox.194 : sln.6]
%r150 = add i64 32, %r149 ; <i64> [#uses=1] ; [oox.194 : sln.6]
%r151 = inttoptr i64 %r150 to <4 x float>* ; <<4 x float>*> [#uses=1] ; [oox.194 : sln.6]
%r152 = load <4 x float>* %r151, align 4 ; <<4 x float>> [#uses=1] ; [oox.194 : sln.6]
%r153 = load i64* %"$LCS_5", align 8 ; <i64> [#uses=1] ; [oox.194 : sln.6]
%r154 = add i64 32, %r153 ; <i64> [#uses=1] ; [oox.194 : sln.6]
%r155 = inttoptr i64 %r154 to <4 x float>* ; <<4 x float>*> [#uses=1] ; [oox.194 : sln.6]
%r156 = load <4 x float>* %r155, align 4 ; <<4 x float>> [#uses=1] ; [oox.194 : sln.6]
%r157 = add <4 x float> %r152, %r156 ; <<4 x float>> [#uses=1] ; [oox.194 : sln.6]
%r158 = load i64* %"$LCS_6", align 8 ; <i64> [#uses=1] ; [oox.194 : sln.6]
%r159 = add i64 32, %r158 ; <i64> [#uses=1] ; [oox.194 : sln.6]
%r160 = inttoptr i64 %r159 to <4 x float>* ; <<4 x float>*> [#uses=1] ; [oox.194 : sln.6]
store <4 x float> %r157, <4 x float>* %r160, align 4 ; [oox.194 : sln.6]
%r161 = load i64* %"$LCS_4", align 8 ; <i64> [#uses=1] ; [oox.195 : sln.6]
%r162 = add i64 48, %r161 ; <i64> [#uses=1] ; [oox.195 : sln.6]
%r163 = inttoptr i64 %r162 to <4 x float>* ; <<4 x float>*> [#uses=1] ; [oox.195 : sln.6]
%r164 = load <4 x float>* %r163, align 4 ; <<4 x float>> [#uses=1] ; [oox.195 : sln.6]
%r165 = load i64* %"$LCS_5", align 8 ; <i64> [#uses=1] ; [oox.195 : sln.6]
%r166 = add i64 48, %r165 ; <i64> [#uses=1] ; [oox.195 : sln.6]
%r167 = inttoptr i64 %r166 to <4 x float>* ; <<4 x float>*> [#uses=1] ; [oox.195 : sln.6]
%r168 = load <4 x float>* %r167, align 4 ; <<4 x float>> [#uses=1] ; [oox.195 : sln.6]
%r169 = add <4 x float> %r164, %r168 ; <<4 x float>> [#uses=1] ; [oox.195 : sln.6]
%r170 = load i64* %"$LCS_6", align 8 ; <i64> [#uses=1] ; [oox.195 : sln.6]
%r171 = add i64 48, %r170 ; <i64> [#uses=1] ; [oox.195 : sln.6]
%r172 = inttoptr i64 %r171 to <4 x float>* ; <<4 x float>*> [#uses=1] ; [oox.195 : sln.6]
store <4 x float> %r169, <4 x float>* %r172, align 4 ; [oox.195 : sln.6]
%r173 = load i64* %"$SI_S25", align 8 ; <i64> [#uses=1] ; [oox.196 : sln.5]
%r174 = add i64 64, %r173 ; <i64> [#uses=1] ; [oox.196 : sln.5]
store i64 %r174, i64* %"$SI_S25", align 8 ; [oox.196 : sln.5]
%r175 = load i64* %"$LC_S24", align 8 ; <i64> [#uses=1] ; [oox.197 : sln.5]
%r176 = add i64 16, %r175 ; <i64> [#uses=1] ; [oox.197 : sln.5]
store i64 %r176, i64* %"$LC_S24", align 8 ; [oox.197 : sln.5]
%r177 = load i64* %"$LC_S24", align 8 ; <i64> [#uses=1] ; [oox.198 : sln.5]
%r178 = icmp slt i64 %r177, -15 ; <i1> [#uses=1] ; [oox.198 : sln.5]
%r179 = zext i1 %r178 to i32 ; <i32> [#uses=1] ; [oox.198 : sln.5]
%r180 = icmp ne i32 %r179, 0 ; <i1> [#uses=1] ; [oox.198 : sln.5]
br i1 %r180, label %"file loop.c, line 1, in inner vector loop at depth 0, bb58", label %"file loop.c, line 5, bb64" ; [oox.198 : sln.5]
"file loop.c, line 5, bb64": ; srcLine 5 ; preds = %"file loop.c, line 6, in inner vector loop at depth 0, bb59", %"file loop.c, line 5, bb60"
%r181 = load i64* %"$LC_S24", align 8 ; <i64> [#uses=1] ; [oox.189 : sln.5]
%r182 = icmp sge i64 %r181, 0 ; <i1> [#uses=1] ; [oox.189 : sln.5]
%r183 = zext i1 %r182 to i32 ; <i32> [#uses=1] ; [oox.189 : sln.5]
%r184 = icmp ne i32 %r183, 0 ; <i1> [#uses=1] ; [oox.189 : sln.5]
br i1 %r184, label %"file loop.c, line 5, bb45", label %"file loop.c, line 1, bb65" ; [oox.189 : sln.5]
"file loop.c, line 1, bb65": ; srcLine 1 ; preds = %"file loop.c, line 5, bb64"
br label %"file loop.c, line 1, in inner vector loop at depth 0, bb62" ; [oox.0 : sln.0]
"file loop.c, line 1, in inner vector loop at depth 0, bb62": ; srcLine 1 ; preds = %"file loop.c, line 6, in inner vector loop at depth 0, bb63", %"file loop.c, line 1, bb65"
br label %"file loop.c, line 6, in inner vector loop at depth 0, bb63" ; [oox.0 : sln.0]
"file loop.c, line 6, in inner vector loop at depth 0, bb63": ; srcLine 6 ; preds = %"file loop.c, line 1, in inner vector loop at depth 0, bb62"
%r185 = load i64* %"$LIS_S17", align 8 ; <i64> [#uses=1] ; [oox.189 : sln.6]
%r186 = load i64* %"$SI_S25", align 8 ; <i64> [#uses=1] ; [oox.189 : sln.6]
%r187 = add i64 %r185, %r186 ; <i64> [#uses=1] ; [oox.189 : sln.6]
%r188 = inttoptr i64 %r187 to <4 x float>* ; <<4 x float>*> [#uses=1] ; [oox.189 : sln.6]
%r189 = load <4 x float>* %r188, align 4 ; <<4 x float>> [#uses=1] ; [oox.189 : sln.6]
%r190 = load i64* %"$LIS_S15", align 8 ; <i64> [#uses=1] ; [oox.189 : sln.6]
%r191 = load i64* %"$SI_S25", align 8 ; <i64> [#uses=1] ; [oox.189 : sln.6]
%r192 = add i64 %r190, %r191 ; <i64> [#uses=1] ; [oox.189 : sln.6]
%r193 = inttoptr i64 %r192 to <4 x float>* ; <<4 x float>*> [#uses=1] ; [oox.189 : sln.6]
%r194 = load <4 x float>* %r193, align 4 ; <<4 x float>> [#uses=1] ; [oox.189 : sln.6]
%r195 = add <4 x float> %r189, %r194 ; <<4 x float>> [#uses=1] ; [oox.189 : sln.6]
%r196 = load i64* %"$LIS_S18", align 8 ; <i64> [#uses=1] ; [oox.189 : sln.6]
%r197 = load i64* %"$SI_S25", align 8 ; <i64> [#uses=1] ; [oox.189 : sln.6]
%r198 = add i64 %r196, %r197 ; <i64> [#uses=1] ; [oox.189 : sln.6]
%r199 = inttoptr i64 %r198 to <4 x float>* ; <<4 x float>*> [#uses=1] ; [oox.189 : sln.6]
store <4 x float> %r195, <4 x float>* %r199, align 4 ; [oox.189 : sln.6]
%r200 = load i64* %"$SI_S25", align 8 ; <i64> [#uses=1] ; [oox.190 : sln.5]
%r201 = add i64 16, %r200 ; <i64> [#uses=1] ; [oox.190 : sln.5]
store i64 %r201, i64* %"$SI_S25", align 8 ; [oox.190 : sln.5]
%r202 = load i64* %"$LC_S24", align 8 ; <i64> [#uses=1] ; [oox.191 : sln.5]
%r203 = add i64 4, %r202 ; <i64> [#uses=1] ; [oox.191 : sln.5]
store i64 %r203, i64* %"$LC_S24", align 8 ; [oox.191 : sln.5]
%r204 = load i64* %"$LC_S24", align 8 ; <i64> [#uses=1] ; [oox.192 : sln.5]
%r205 = icmp slt i64 %r204, 0 ; <i1> [#uses=1] ; [oox.192 : sln.5]
%r206 = zext i1 %r205 to i64 ; <i64> [#uses=1] ; [oox.192 : sln.5]
%r207 = icmp ne i64 %r206, 0 ; <i1> [#uses=1] ; [oox.192 : sln.5]
br i1 %r207, label %"file loop.c, line 1, in inner vector loop at depth 0, bb62", label %"file loop.c, line 5, bb45" ; [oox.192 : sln.5]
"file loop.c, line 5, bb45": ; srcLine 5 ; preds = %"file loop.c, line 6, in inner vector loop at depth 0, bb63", %"file loop.c, line 5, bb64"
%r208 = load i64* %"$TC_1", align 8 ; <i64> [#uses=1] ; [oox.189 : sln.5]
store i64 %r208, i64* %"$i_S23", align 8 ; [oox.189 : sln.5]
br label %"file loop.c, line 5, bb50" ; [oox.0 : sln.0]
"file loop.c, line 5, bb50": ; srcLine 5 ; preds = %"file loop.c, line 5, bb45", %"file loop.c, line 5, bb48"
%r209 = load i32* %"$MR_n_0", align 4 ; <i32> [#uses=1] ; [oox.189 : sln.5]
%r210 = sext i32 %r209 to i64 ; <i64> [#uses=1] ; [oox.189 : sln.5]
store i64 %r210, i64* %"$LCS_n_7", align 8 ; [oox.189 : sln.5]
%r211 = load i64* %"$LCS_n_7", align 8 ; <i64> [#uses=1] ; [oox.190 : sln.5]
%r212 = load i64* %"$i_S23", align 8 ; <i64> [#uses=1] ; [oox.190 : sln.5]
%r213 = icmp sle i64 %r211, %r212 ; <i1> [#uses=1] ; [oox.190 : sln.5]
%r214 = zext i1 %r213 to i32 ; <i32> [#uses=1] ; [oox.190 : sln.5]
%r215 = icmp ne i32 %r214, 0 ; <i1> [#uses=1] ; [oox.190 : sln.5]
br i1 %r215, label %"file loop.c, line 5, bb6", label %"file loop.c, line 1, bb49" ; [oox.190 : sln.5]
"file loop.c, line 1, bb49": ; srcLine 1 ; preds = %"file loop.c, line 5, bb50"
br label %"file loop.c, line 6, bb57" ; [oox.0 : sln.0]
"file loop.c, line 6, bb57": ; srcLine 6 ; preds = %"file loop.c, line 1, bb49"
%r216 = load float** %"$MR_B_3", align 8 ; <float*> [#uses=1] ; [oox.189 : sln.6]
%r217 = ptrtoint float* %r216 to i64 ; <i64> [#uses=1] ; [oox.189 : sln.6]
store i64 %r217, i64* %"$LIS_S19", align 8 ; [oox.189 : sln.6]
%r218 = load float** %"$MR_C_1", align 8 ; <float*> [#uses=1] ; [oox.190 : sln.6]
%r219 = ptrtoint float* %r218 to i64 ; <i64> [#uses=1] ; [oox.190 : sln.6]
store i64 %r219, i64* %"$LIS_S20", align 8 ; [oox.190 : sln.6]
%r220 = load float** %"$MR_A_2", align 8 ; <float*> [#uses=1] ; [oox.191 : sln.6]
%r221 = ptrtoint float* %r220 to i64 ; <i64> [#uses=1] ; [oox.191 : sln.6]
store i64 %r221, i64* %"$LIS_S21", align 8 ; [oox.191 : sln.6]
br label %"file loop.c, line 1, in inner loop at depth 0, bb51" ; [oox.0 : sln.0]
"file loop.c, line 1, in inner loop at depth 0, bb51": ; srcLine 1 ; preds = %"file loop.c, line 5, in inner loop at depth 0, bb54", %"file loop.c, line 6, bb57"
br label %"file loop.c, line 5, in inner loop at depth 0, bb54" ; [oox.0 : sln.0]
"file loop.c, line 5, in inner loop at depth 0, bb54": ; srcLine 5 ; preds = %"file loop.c, line 1, in inner loop at depth 0, bb51"
%r222 = load i64* %"$LIS_S20", align 8 ; <i64> [#uses=1] ; [oox.189 : sln.6]
%r223 = inttoptr i64 %r222 to float* ; <float*> [#uses=1] ; [oox.189 : sln.6]
%r224 = load i64* %"$i_S23", align 8 ; <i64> [#uses=1] ; [oox.189 : sln.6]
%r225 = getelementptr float* %r223, i64 %r224 ; <float*> [#uses=1] ; [oox.189 : sln.6]
%r226 = load float* %r225, align 4 ; <float> [#uses=1] ; [oox.189 : sln.6]
%r227 = load i64* %"$LIS_S19", align 8 ; <i64> [#uses=1] ; [oox.189 : sln.6]
%r228 = inttoptr i64 %r227 to float* ; <float*> [#uses=1] ; [oox.189 : sln.6]
%r229 = load i64* %"$i_S23", align 8 ; <i64> [#uses=1] ; [oox.189 : sln.6]
%r230 = getelementptr float* %r228, i64 %r229 ; <float*> [#uses=1] ; [oox.189 : sln.6]
%r231 = load float* %r230, align 4 ; <float> [#uses=1] ; [oox.189 : sln.6]
%r232 = add float %r226, %r231 ; <float> [#uses=1] ; [oox.189 : sln.6]
%r233 = load i64* %"$LIS_S21", align 8 ; <i64> [#uses=1] ; [oox.189 : sln.6]
%r234 = inttoptr i64 %r233 to float* ; <float*> [#uses=1] ; [oox.189 : sln.6]
%r235 = load i64* %"$i_S23", align 8 ; <i64> [#uses=1] ; [oox.189 : sln.6]
%r236 = getelementptr float* %r234, i64 %r235 ; <float*> [#uses=1] ; [oox.189 : sln.6]
store float %r232, float* %r236, align 4 ; [oox.189 : sln.6]
%r237 = load i64* %"$i_S23", align 8 ; <i64> [#uses=1] ; [oox.190 : sln.5]
%r238 = add i64 1, %r237 ; <i64> [#uses=1] ; [oox.190 : sln.5]
store i64 %r238, i64* %"$i_S23", align 8 ; [oox.190 : sln.5]
%r239 = load i64* %"$i_S23", align 8 ; <i64> [#uses=1] ; [oox.191 : sln.5]
%r240 = load i64* %"$LCS_n_7", align 8 ; <i64> [#uses=1] ; [oox.191 : sln.5]
%r241 = icmp slt i64 %r239, %r240 ; <i1> [#uses=1] ; [oox.191 : sln.5]
%r242 = zext i1 %r241 to i64 ; <i64> [#uses=1] ; [oox.191 : sln.5]
%r243 = icmp ne i64 %r242, 0 ; <i1> [#uses=1] ; [oox.191 : sln.5]
br i1 %r243, label %"file loop.c, line 1, in inner loop at depth 0, bb51", label %"file loop.c, line 5, bb6" ; [oox.191 : sln.5]
"file loop.c, line 5, bb6": ; srcLine 5 ; preds = %"file loop.c, line 5, in inner loop at depth 0, bb54", %"file loop.c, line 5, bb50", %"file loop.c, line 5, in inner loop at depth 0, bb32", %"file loop.c, line 1, bb69"
ret i32 0 ; [oox.189 : sln.10]
}