2013-08-12 18:28:10 +08:00
|
|
|
; Test memcmp using CLC.
|
|
|
|
;
|
|
|
|
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
|
|
|
|
|
|
|
|
declare signext i32 @memcmp(i8 *%src1, i8 *%src2, i64 %size)
|
|
|
|
|
|
|
|
; Zero-length comparisons should be optimized away.
|
|
|
|
define i32 @f1(i8 *%src1, i8 *%src2) {
|
|
|
|
; CHECK-LABEL: f1:
|
|
|
|
; CHECK: lhi %r2, 0
|
|
|
|
; CHECK: br %r14
|
|
|
|
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 0)
|
|
|
|
ret i32 %res
|
|
|
|
}
|
|
|
|
|
|
|
|
; Check a case where the result is used as an integer.
|
|
|
|
define i32 @f2(i8 *%src1, i8 *%src2) {
|
|
|
|
; CHECK-LABEL: f2:
|
|
|
|
; CHECK: clc 0(2,%r2), 0(%r3)
|
[SystemZ] Fix sign of integer memcmp result
r188163 used CLC to implement memcmp. Code that compares the result
directly against zero can test the CC value produced by CLC, but code
that needs an integer result must use IPM. The sequence I'd used was:
ipm <reg>
sll <reg>, 2
sra <reg>, 30
but I'd forgotten that this inverts the order, so that CC==1 ("less")
becomes an integer greater than zero, and CC==2 ("greater") becomes
an integer less than zero. This sequence should only be used if the
CLC arguments are reversed to compensate. The problem then is that
the branch condition must also be reversed when testing the CLC
result directly.
Rather than do that, I went for a different sequence that works with
the natural CLC order:
ipm <reg>
srl <reg>, 28
rll <reg>, <reg>, 31
One advantage of this is that it doesn't clobber CC. A disadvantage
is that any sign extension to 64 bits must be done separately,
rather than being folded into the shifts.
llvm-svn: 188538
2013-08-16 18:22:54 +08:00
|
|
|
; CHECK: ipm [[REG:%r[0-5]]]
|
|
|
|
; CHECK: srl [[REG]], 28
|
|
|
|
; CHECK: rll %r2, [[REG]], 31
|
2013-08-12 18:28:10 +08:00
|
|
|
; CHECK: br %r14
|
|
|
|
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 2)
|
|
|
|
ret i32 %res
|
|
|
|
}
|
|
|
|
|
|
|
|
; Check a case where the result is tested for equality.
|
|
|
|
define void @f3(i8 *%src1, i8 *%src2, i32 *%dest) {
|
|
|
|
; CHECK-LABEL: f3:
|
|
|
|
; CHECK: clc 0(3,%r2), 0(%r3)
|
|
|
|
; CHECK-NEXT: je {{\..*}}
|
|
|
|
; CHECK: br %r14
|
|
|
|
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 3)
|
|
|
|
%cmp = icmp eq i32 %res, 0
|
|
|
|
br i1 %cmp, label %exit, label %store
|
|
|
|
|
|
|
|
store:
|
|
|
|
store i32 0, i32 *%dest
|
|
|
|
br label %exit
|
|
|
|
|
|
|
|
exit:
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; Check a case where the result is tested for inequality.
|
|
|
|
define void @f4(i8 *%src1, i8 *%src2, i32 *%dest) {
|
|
|
|
; CHECK-LABEL: f4:
|
|
|
|
; CHECK: clc 0(4,%r2), 0(%r3)
|
|
|
|
; CHECK-NEXT: jlh {{\..*}}
|
|
|
|
; CHECK: br %r14
|
|
|
|
entry:
|
|
|
|
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 4)
|
|
|
|
%cmp = icmp ne i32 %res, 0
|
|
|
|
br i1 %cmp, label %exit, label %store
|
|
|
|
|
|
|
|
store:
|
|
|
|
store i32 0, i32 *%dest
|
|
|
|
br label %exit
|
|
|
|
|
|
|
|
exit:
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; Check a case where the result is tested via slt.
|
|
|
|
define void @f5(i8 *%src1, i8 *%src2, i32 *%dest) {
|
|
|
|
; CHECK-LABEL: f5:
|
|
|
|
; CHECK: clc 0(5,%r2), 0(%r3)
|
|
|
|
; CHECK-NEXT: jl {{\..*}}
|
|
|
|
; CHECK: br %r14
|
|
|
|
entry:
|
|
|
|
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 5)
|
|
|
|
%cmp = icmp slt i32 %res, 0
|
|
|
|
br i1 %cmp, label %exit, label %store
|
|
|
|
|
|
|
|
store:
|
|
|
|
store i32 0, i32 *%dest
|
|
|
|
br label %exit
|
|
|
|
|
|
|
|
exit:
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; Check a case where the result is tested for sgt.
|
|
|
|
define void @f6(i8 *%src1, i8 *%src2, i32 *%dest) {
|
|
|
|
; CHECK-LABEL: f6:
|
|
|
|
; CHECK: clc 0(6,%r2), 0(%r3)
|
|
|
|
; CHECK-NEXT: jh {{\..*}}
|
|
|
|
; CHECK: br %r14
|
|
|
|
entry:
|
|
|
|
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 6)
|
|
|
|
%cmp = icmp sgt i32 %res, 0
|
|
|
|
br i1 %cmp, label %exit, label %store
|
|
|
|
|
|
|
|
store:
|
|
|
|
store i32 0, i32 *%dest
|
|
|
|
br label %exit
|
|
|
|
|
|
|
|
exit:
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; Check the upper end of the CLC range. Here the result is used both as
|
[SystemZ] Fix sign of integer memcmp result
r188163 used CLC to implement memcmp. Code that compares the result
directly against zero can test the CC value produced by CLC, but code
that needs an integer result must use IPM. The sequence I'd used was:
ipm <reg>
sll <reg>, 2
sra <reg>, 30
but I'd forgotten that this inverts the order, so that CC==1 ("less")
becomes an integer greater than zero, and CC==2 ("greater") becomes
an integer less than zero. This sequence should only be used if the
CLC arguments are reversed to compensate. The problem then is that
the branch condition must also be reversed when testing the CLC
result directly.
Rather than do that, I went for a different sequence that works with
the natural CLC order:
ipm <reg>
srl <reg>, 28
rll <reg>, <reg>, 31
One advantage of this is that it doesn't clobber CC. A disadvantage
is that any sign extension to 64 bits must be done separately,
rather than being folded into the shifts.
llvm-svn: 188538
2013-08-16 18:22:54 +08:00
|
|
|
; an integer and for branching.
|
2013-08-12 18:28:10 +08:00
|
|
|
define i32 @f7(i8 *%src1, i8 *%src2, i32 *%dest) {
|
|
|
|
; CHECK-LABEL: f7:
|
|
|
|
; CHECK: clc 0(256,%r2), 0(%r3)
|
[SystemZ] Fix sign of integer memcmp result
r188163 used CLC to implement memcmp. Code that compares the result
directly against zero can test the CC value produced by CLC, but code
that needs an integer result must use IPM. The sequence I'd used was:
ipm <reg>
sll <reg>, 2
sra <reg>, 30
but I'd forgotten that this inverts the order, so that CC==1 ("less")
becomes an integer greater than zero, and CC==2 ("greater") becomes
an integer less than zero. This sequence should only be used if the
CLC arguments are reversed to compensate. The problem then is that
the branch condition must also be reversed when testing the CLC
result directly.
Rather than do that, I went for a different sequence that works with
the natural CLC order:
ipm <reg>
srl <reg>, 28
rll <reg>, <reg>, 31
One advantage of this is that it doesn't clobber CC. A disadvantage
is that any sign extension to 64 bits must be done separately,
rather than being folded into the shifts.
llvm-svn: 188538
2013-08-16 18:22:54 +08:00
|
|
|
; CHECK: ipm [[REG:%r[0-5]]]
|
|
|
|
; CHECK: srl [[REG]], 28
|
|
|
|
; CHECK: rll %r2, [[REG]], 31
|
2013-08-12 18:28:10 +08:00
|
|
|
; CHECK: jl {{.L*}}
|
|
|
|
; CHECK: br %r14
|
|
|
|
entry:
|
|
|
|
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 256)
|
|
|
|
%cmp = icmp slt i32 %res, 0
|
|
|
|
br i1 %cmp, label %exit, label %store
|
|
|
|
|
|
|
|
store:
|
|
|
|
store i32 0, i32 *%dest
|
|
|
|
br label %exit
|
|
|
|
|
|
|
|
exit:
|
|
|
|
ret i32 %res
|
|
|
|
}
|
|
|
|
|
|
|
|
; 257 bytes is too big for a single CLC. For now expect a call instead.
|
|
|
|
define i32 @f8(i8 *%src1, i8 *%src2) {
|
|
|
|
; CHECK-LABEL: f8:
|
|
|
|
; CHECK: brasl %r14, memcmp@PLT
|
|
|
|
; CHECK: br %r14
|
|
|
|
%res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 257)
|
|
|
|
ret i32 %res
|
|
|
|
}
|