[SystemZ] Enable unaligned accesses

The code to distinguish between unaligned and aligned addresses was
already there, so this is mostly just a switch-on-and-test process.

llvm-svn: 182920
This commit is contained in:
Richard Sandiford 2013-05-30 09:45:42 +00:00
parent 2c14269883
commit 46af5a2cdc
14 changed files with 397 additions and 5 deletions

View File

@ -253,6 +253,16 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
return Imm.isZero() || Imm.isNegZero(); return Imm.isZero() || Imm.isNegZero();
} }
bool SystemZTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
bool *Fast) const {
// Unaligned accesses should never be slower than the expanded version.
// We check specifically for aligned accesses in the few cases where
// they are required.
if (Fast)
*Fast = true;
return true;
}
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// Inline asm support // Inline asm support
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//

View File

@ -125,6 +125,7 @@ public:
return true; return true;
} }
virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const; virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
virtual const char *getTargetNodeName(unsigned Opcode) const LLVM_OVERRIDE; virtual const char *getTargetNodeName(unsigned Opcode) const LLVM_OVERRIDE;
virtual std::pair<unsigned, const TargetRegisterClass *> virtual std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const std::string &Constraint, getRegForInlineAsmConstraint(const std::string &Constraint,

View File

@ -34,15 +34,15 @@ define i64 @f1(i64 %length, i64 %index) {
; CHECK-E: la [[TMP:%r[1-5]]], 160(%r3,[[ADDR]]) ; CHECK-E: la [[TMP:%r[1-5]]], 160(%r3,[[ADDR]])
; CHECK-E: mviy 4096([[TMP]]), 4 ; CHECK-E: mviy 4096([[TMP]]), 4
%a = alloca i8, i64 %length %a = alloca i8, i64 %length
store i8 0, i8 *%a store volatile i8 0, i8 *%a
%b = getelementptr i8 *%a, i64 4095 %b = getelementptr i8 *%a, i64 4095
store i8 1, i8 *%b store volatile i8 1, i8 *%b
%c = getelementptr i8 *%a, i64 %index %c = getelementptr i8 *%a, i64 %index
store i8 2, i8 *%c store volatile i8 2, i8 *%c
%d = getelementptr i8 *%c, i64 4095 %d = getelementptr i8 *%c, i64 4095
store i8 3, i8 *%d store volatile i8 3, i8 *%d
%e = getelementptr i8 *%d, i64 1 %e = getelementptr i8 *%d, i64 1
store i8 4, i8 *%e store volatile i8 4, i8 *%e
%count = call i64 @bar(i8 *%a) %count = call i64 @bar(i8 *%a)
%res = add i64 %count, 1 %res = add i64 %count, 1
ret i64 %res ret i64 %res

View File

@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
@g = global i16 1 @g = global i16 1
@h = global i16 1, align 1, section "foo"
; Check signed comparison. ; Check signed comparison.
define i32 @f1(i32 %src1) { define i32 @f1(i32 %src1) {
@ -79,3 +80,23 @@ exit:
%res = phi i32 [ %src1, %entry ], [ %mul, %mulb ] %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
ret i32 %res ret i32 %res
} }
; Repeat f1 with an unaligned address.
define i32 @f5(i32 %src1) {
; CHECK: f5:
; CHECK: lgrl [[REG:%r[0-5]]], h@GOT
; CHECK: ch %r2, 0([[REG]])
; CHECK-NEXT: jl
; CHECK: br %r14
entry:
%val = load i16 *@h, align 1
%src2 = sext i16 %val to i32
%cond = icmp slt i32 %src1, %src2
br i1 %cond, label %exit, label %mulb
mulb:
%mul = mul i32 %src1, %src1
br label %exit
exit:
%res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
ret i32 %res
}

View File

@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
@g = global i16 1 @g = global i16 1
@h = global i16 1, align 1, section "foo"
; Check unsigned comparison. ; Check unsigned comparison.
define i32 @f1(i32 %src1) { define i32 @f1(i32 %src1) {
@ -79,3 +80,24 @@ exit:
%res = phi i32 [ %src1, %entry ], [ %mul, %mulb ] %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
ret i32 %res ret i32 %res
} }
; Repeat f1 with an unaligned address.
define i32 @f5(i32 %src1) {
; CHECK: f5:
; CHECK: lgrl [[REG:%r[0-5]]], h@GOT
; CHECK: llh [[VAL:%r[0-5]]], 0([[REG]])
; CHECK: clr %r2, [[VAL]]
; CHECK-NEXT: jl
; CHECK: br %r14
entry:
%val = load i16 *@h, align 1
%src2 = zext i16 %val to i32
%cond = icmp ult i32 %src1, %src2
br i1 %cond, label %exit, label %mulb
mulb:
%mul = mul i32 %src1, %src1
br label %exit
exit:
%res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
ret i32 %res
}

View File

@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
@g = global i32 1 @g = global i32 1
@h = global i32 1, align 2, section "foo"
; Check signed comparisons. ; Check signed comparisons.
define i32 @f1(i32 %src1) { define i32 @f1(i32 %src1) {
@ -76,3 +77,41 @@ exit:
%res = phi i32 [ %src1, %entry ], [ %mul, %mulb ] %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
ret i32 %res ret i32 %res
} }
; Repeat f1 with an unaligned address.
define i32 @f5(i32 %src1) {
; CHECK: f5:
; CHECK: larl [[REG:%r[0-5]]], h
; CHECK: c %r2, 0([[REG]])
; CHECK-NEXT: jl
; CHECK: br %r14
entry:
%src2 = load i32 *@h, align 2
%cond = icmp slt i32 %src1, %src2
br i1 %cond, label %exit, label %mulb
mulb:
%mul = mul i32 %src1, %src1
br label %exit
exit:
%res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
ret i32 %res
}
; Repeat f2 with an unaligned address.
define i32 @f6(i32 %src1) {
; CHECK: f6:
; CHECK: larl [[REG:%r[0-5]]], h
; CHECK: cl %r2, 0([[REG]])
; CHECK-NEXT: jl
; CHECK: br %r14
entry:
%src2 = load i32 *@h, align 2
%cond = icmp ult i32 %src1, %src2
br i1 %cond, label %exit, label %mulb
mulb:
%mul = mul i32 %src1, %src1
br label %exit
exit:
%res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
ret i32 %res
}

View File

@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
@g = global i16 1 @g = global i16 1
@h = global i16 1, align 1, section "foo"
; Check signed comparison. ; Check signed comparison.
define i64 @f1(i64 %src1) { define i64 @f1(i64 %src1) {
@ -79,3 +80,23 @@ exit:
%res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
ret i64 %res ret i64 %res
} }
; Repeat f1 with an unaligned address.
define i64 @f5(i64 %src1) {
; CHECK: f5:
; CHECK: lgrl [[REG:%r[0-5]]], h@GOT
; CHECK: cgh %r2, 0([[REG]])
; CHECK-NEXT: jl
; CHECK: br %r14
entry:
%val = load i16 *@h, align 1
%src2 = sext i16 %val to i64
%cond = icmp slt i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
mulb:
%mul = mul i64 %src1, %src1
br label %exit
exit:
%res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
ret i64 %res
}

View File

@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
@g = global i16 1 @g = global i16 1
@h = global i16 1, align 1, section "foo"
; Check unsigned comparison. ; Check unsigned comparison.
define i64 @f1(i64 %src1) { define i64 @f1(i64 %src1) {
@ -79,3 +80,24 @@ exit:
%res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
ret i64 %res ret i64 %res
} }
; Repeat f1 with an unaligned address.
define i64 @f5(i64 %src1) {
; CHECK: f5:
; CHECK: lgrl [[REG:%r[0-5]]], h@GOT
; CHECK: llgh [[VAL:%r[0-5]]], 0([[REG]])
; CHECK: clgr %r2, [[VAL]]
; CHECK-NEXT: jl
; CHECK: br %r14
entry:
%val = load i16 *@h, align 1
%src2 = zext i16 %val to i64
%cond = icmp ult i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
mulb:
%mul = mul i64 %src1, %src1
br label %exit
exit:
%res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
ret i64 %res
}

View File

@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
@g = global i32 1 @g = global i32 1
@h = global i32 1, align 2, section "foo"
; Check signed comparison. ; Check signed comparison.
define i64 @f1(i64 %src1) { define i64 @f1(i64 %src1) {
@ -79,3 +80,23 @@ exit:
%res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
ret i64 %res ret i64 %res
} }
; Repeat f1 with an unaligned address.
define i64 @f5(i64 %src1) {
; CHECK: f5:
; CHECK: larl [[REG:%r[0-5]]], h
; CHECK: cgf %r2, 0([[REG]])
; CHECK-NEXT: jl
; CHECK: br %r14
entry:
%val = load i32 *@h, align 2
%src2 = sext i32 %val to i64
%cond = icmp slt i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
mulb:
%mul = mul i64 %src1, %src1
br label %exit
exit:
%res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
ret i64 %res
}

View File

@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
@g = global i32 1 @g = global i32 1
@h = global i32 1, align 2, section "foo"
; Check unsigned comparison. ; Check unsigned comparison.
define i64 @f1(i64 %src1) { define i64 @f1(i64 %src1) {
@ -79,3 +80,23 @@ exit:
%res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
ret i64 %res ret i64 %res
} }
; Repeat f1 with an unaligned address.
define i64 @f5(i64 %src1) {
; CHECK: f5:
; CHECK: larl [[REG:%r[0-5]]], h
; CHECK: clgf %r2, 0([[REG]])
; CHECK-NEXT: jl
; CHECK: br %r14
entry:
%val = load i32 *@h, align 2
%src2 = zext i32 %val to i64
%cond = icmp ult i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
mulb:
%mul = mul i64 %src1, %src1
br label %exit
exit:
%res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
ret i64 %res
}

View File

@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
@g = global i64 1 @g = global i64 1
@h = global i64 1, align 4, section "foo"
; Check signed comparisons. ; Check signed comparisons.
define i64 @f1(i64 %src1) { define i64 @f1(i64 %src1) {
@ -76,3 +77,22 @@ exit:
%res = phi i64 [ %src1, %entry ], [ %mul, %mulb ] %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
ret i64 %res ret i64 %res
} }
; Repeat f1 with an unaligned address.
define i64 @f5(i64 %src1) {
; CHECK: f5:
; CHECK: larl [[REG:%r[0-5]]], h
; CHECK: cg %r2, 0([[REG]])
; CHECK-NEXT: jl
; CHECK: br %r14
entry:
%src2 = load i64 *@h, align 4
%cond = icmp slt i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
mulb:
%mul = mul i64 %src1, %src1
br label %exit
exit:
%res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
ret i64 %res
}

View File

@ -6,6 +6,10 @@
@gsrc32 = global i32 1 @gsrc32 = global i32 1
@gdst16 = global i16 2 @gdst16 = global i16 2
@gdst32 = global i32 2 @gdst32 = global i32 2
@gsrc16u = global i16 1, align 1, section "foo"
@gsrc32u = global i32 1, align 2, section "foo"
@gdst16u = global i16 2, align 1, section "foo"
@gdst32u = global i32 2, align 2, section "foo"
; Check sign-extending loads from i16. ; Check sign-extending loads from i16.
define i32 @f1() { define i32 @f1() {
@ -47,3 +51,49 @@ define void @f4() {
store i32 %val, i32 *@gdst32 store i32 %val, i32 *@gdst32
ret void ret void
} }
; Repeat f1 with an unaligned variable.
define i32 @f5() {
; CHECK: f5:
; CHECK: lgrl [[REG:%r[0-5]]], gsrc16u
; CHECK: lh %r2, 0([[REG]])
; CHECK: br %r14
%val = load i16 *@gsrc16u, align 1
%ext = sext i16 %val to i32
ret i32 %ext
}
; Repeat f2 with an unaligned variable.
define i32 @f6() {
; CHECK: f6:
; CHECK: lgrl [[REG:%r[0-5]]], gsrc16u
; CHECK: llh %r2, 0([[REG]])
; CHECK: br %r14
%val = load i16 *@gsrc16u, align 1
%ext = zext i16 %val to i32
ret i32 %ext
}
; Repeat f3 with an unaligned variable.
define void @f7(i32 %val) {
; CHECK: f7:
; CHECK: lgrl [[REG:%r[0-5]]], gdst16u
; CHECK: sth %r2, 0([[REG]])
; CHECK: br %r14
%half = trunc i32 %val to i16
store i16 %half, i16 *@gdst16u, align 1
ret void
}
; Repeat f4 with unaligned variables.
define void @f8() {
; CHECK: f8:
; CHECK: larl [[REG:%r[0-5]]], gsrc32u
; CHECK: l [[VAL:%r[0-5]]], 0([[REG]])
; CHECK: larl [[REG:%r[0-5]]], gdst32u
; CHECK: st [[VAL]], 0([[REG]])
; CHECK: br %r14
%val = load i32 *@gsrc32u, align 2
store i32 %val, i32 *@gdst32u, align 2
ret void
}

View File

@ -8,6 +8,12 @@
@gdst16 = global i16 2 @gdst16 = global i16 2
@gdst32 = global i32 2 @gdst32 = global i32 2
@gdst64 = global i64 2 @gdst64 = global i64 2
@gsrc16u = global i16 1, align 1, section "foo"
@gsrc32u = global i32 1, align 2, section "foo"
@gsrc64u = global i64 1, align 4, section "foo"
@gdst16u = global i16 2, align 1, section "foo"
@gdst32u = global i32 2, align 2, section "foo"
@gdst64u = global i64 2, align 4, section "foo"
; Check sign-extending loads from i16. ; Check sign-extending loads from i16.
define i64 @f1() { define i64 @f1() {
@ -79,3 +85,82 @@ define void @f7() {
store i64 %val, i64 *@gdst64 store i64 %val, i64 *@gdst64
ret void ret void
} }
; Repeat f1 with an unaligned variable.
define i64 @f8() {
; CHECK: f8:
; CHECK: lgrl [[REG:%r[0-5]]], gsrc16u@GOT
; CHECK: lgh %r2, 0([[REG]])
; CHECK: br %r14
%val = load i16 *@gsrc16u, align 1
%ext = sext i16 %val to i64
ret i64 %ext
}
; Repeat f2 with an unaligned variable.
define i64 @f9() {
; CHECK: f9:
; CHECK: lgrl [[REG:%r[0-5]]], gsrc16u@GOT
; CHECK: llgh %r2, 0([[REG]])
; CHECK: br %r14
%val = load i16 *@gsrc16u, align 1
%ext = zext i16 %val to i64
ret i64 %ext
}
; Repeat f3 with an unaligned variable.
define i64 @f10() {
; CHECK: f10:
; CHECK: larl [[REG:%r[0-5]]], gsrc32u
; CHECK: lgf %r2, 0([[REG]])
; CHECK: br %r14
%val = load i32 *@gsrc32u, align 2
%ext = sext i32 %val to i64
ret i64 %ext
}
; Repeat f4 with an unaligned variable.
define i64 @f11() {
; CHECK: f11:
; CHECK: larl [[REG:%r[0-5]]], gsrc32u
; CHECK: llgf %r2, 0([[REG]])
; CHECK: br %r14
%val = load i32 *@gsrc32u, align 2
%ext = zext i32 %val to i64
ret i64 %ext
}
; Repeat f5 with an unaligned variable.
define void @f12(i64 %val) {
; CHECK: f12:
; CHECK: lgrl [[REG:%r[0-5]]], gdst16u@GOT
; CHECK: sth %r2, 0([[REG]])
; CHECK: br %r14
%half = trunc i64 %val to i16
store i16 %half, i16 *@gdst16u, align 1
ret void
}
; Repeat f6 with an unaligned variable.
define void @f13(i64 %val) {
; CHECK: f13:
; CHECK: larl [[REG:%r[0-5]]], gdst32u
; CHECK: st %r2, 0([[REG]])
; CHECK: br %r14
%word = trunc i64 %val to i32
store i32 %word, i32 *@gdst32u, align 2
ret void
}
; Repeat f7 with unaligned variables.
define void @f14() {
; CHECK: f14:
; CHECK: larl [[REG:%r[0-5]]], gsrc64u
; CHECK: lg [[VAL:%r[0-5]]], 0([[REG]])
; CHECK: larl [[REG:%r[0-5]]], gdst64u
; CHECK: stg [[VAL]], 0([[REG]])
; CHECK: br %r14
%val = load i64 *@gsrc64u, align 4
store i64 %val, i64 *@gdst64u, align 4
ret void
}

View File

@ -0,0 +1,59 @@
; Check that unaligned accesses are allowed in general. We check the
; few exceptions (like CRL) in their respective test files.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
; Check that these four byte stores become a single word store.
define void @f1(i8 *%ptr) {
; CHECK: f1
; CHECK: iilf [[REG:%r[0-5]]], 66051
; CHECK: st [[REG]], 0(%r2)
; CHECK: br %r14
%off1 = getelementptr i8 *%ptr, i64 1
%off2 = getelementptr i8 *%ptr, i64 2
%off3 = getelementptr i8 *%ptr, i64 3
store i8 0, i8 *%ptr
store i8 1, i8 *%off1
store i8 2, i8 *%off2
store i8 3, i8 *%off3
ret void
}
; Check that unaligned 2-byte accesses are allowed.
define i16 @f2(i16 *%src, i16 *%dst) {
; CHECK: f2:
; CHECK: lh %r2, 0(%r2)
; CHECK: sth %r2, 0(%r3)
; CHECK: br %r14
%val = load i16 *%src, align 1
store i16 %val, i16 *%dst, align 1
ret i16 %val
}
; Check that unaligned 4-byte accesses are allowed.
define i32 @f3(i32 *%src1, i32 *%src2, i32 *%dst) {
; CHECK: f3:
; CHECK: l %r2, 0(%r2)
; CHECK: s %r2, 0(%r3)
; CHECK: st %r2, 0(%r4)
; CHECK: br %r14
%val1 = load i32 *%src1, align 1
%val2 = load i32 *%src2, align 2
%sub = sub i32 %val1, %val2
store i32 %sub, i32 *%dst, align 1
ret i32 %sub
}
; Check that unaligned 8-byte accesses are allowed.
define i64 @f4(i64 *%src1, i64 *%src2, i64 *%dst) {
; CHECK: f4:
; CHECK: lg %r2, 0(%r2)
; CHECK: sg %r2, 0(%r3)
; CHECK: stg %r2, 0(%r4)
; CHECK: br %r14
%val1 = load i64 *%src1, align 1
%val2 = load i64 *%src2, align 2
%sub = sub i64 %val1, %val2
store i64 %sub, i64 *%dst, align 4
ret i64 %sub
}