[X86] Don't zero/sign-extend i1, i8, or i16 return values to 32 bits (PR22532)

This matches GCC and MSVC's behaviour, and saves on code size.

We were already not extending i1 return values on x86_64 after r127766. This
takes that patch further by applying it to x86 target as well, and also for i8
and i16.

The ABI docs have been unclear about the required behaviour here. The new i386
psABI [1] clearly states (Table 2.4, page 14) that i1, i8, and i16 return
vales do not need to be extended beyond 8 bits. The x86_64 ABI doc is being
updated to say the same [2].

Differential Revision: http://reviews.llvm.org/D16907

 [1]. https://01.org/sites/default/files/file_attach/intel386-psabi-1.0.pdf
 [2]. https://groups.google.com/d/msg/x86-64-abi/E8O33onbnGQ/_RFWw_ixDQAJ

llvm-svn: 260133
This commit is contained in:
Hans Wennborg 2016-02-08 19:34:30 +00:00
parent bc130af434
commit 850ec6ca18
19 changed files with 207 additions and 66 deletions

View File

@ -907,8 +907,7 @@ Currently, only the following parameter attributes are defined:
``zeroext``
This indicates to the code generator that the parameter or return
value should be zero-extended to the extent required by the target's
ABI (which is usually 32-bits, but is 8-bits for a i1 on x86-64) by
the caller (for a parameter) or the callee (for a return value).
ABI by the caller (for a parameter) or the callee (for a return value).
``signext``
This indicates to the code generator that the parameter or return
value should be sign-extended to the extent required by the target's

View File

@ -2537,12 +2537,12 @@ public:
}
/// Return the type that should be used to zero or sign extend a
/// zeroext/signext integer argument or return value. FIXME: Most C calling
/// convention requires the return type to be promoted, but this is not true
/// all the time, e.g. i1 on x86-64. It is also not necessary for non-C
/// calling conventions. The frontend should handle this and include all of
/// the necessary information.
virtual EVT getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
/// zeroext/signext integer return value. FIXME: Some C calling conventions
/// require the return type to be promoted, but this is not true all the time,
/// e.g. i1/i8/i16 on x86/x86_64. It is also not necessary for non-C calling
/// conventions. The frontend should handle this and include all of the
/// necessary information.
virtual EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
ISD::NodeType /*ExtendKind*/) const {
EVT MinVT = getRegisterType(Context, MVT::i32);
return VT.bitsLT(MinVT) ? MinVT : VT;

View File

@ -1381,7 +1381,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
EVT VT = ValueVTs[j];
if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
VT = TLI.getTypeForExtArgOrReturn(Context, VT, ExtendKind);
VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
unsigned NumParts = TLI.getNumRegisters(Context, VT);
MVT PartVT = TLI.getRegisterType(Context, VT);

View File

@ -2374,15 +2374,14 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
return true;
}
EVT
X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
ISD::NodeType ExtendKind) const {
MVT ReturnMVT;
// TODO: Is this also valid on 32-bit?
if (Subtarget.is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
ISD::NodeType ExtendKind) const {
MVT ReturnMVT = MVT::i32;
if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) {
// The ABI does not require i1, i8 or i16 to be extended.
ReturnMVT = MVT::i8;
else
ReturnMVT = MVT::i32;
}
EVT MinVT = getRegisterType(Context, ReturnMVT);
return VT.bitsLT(MinVT) ? MinVT : VT;

View File

@ -1092,8 +1092,8 @@ namespace llvm {
bool mayBeEmittedAsTailCall(CallInst *CI) const override;
EVT getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
ISD::NodeType ExtendKind) const override;
EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
ISD::NodeType ExtendKind) const override;
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
bool isVarArg,

View File

@ -2,9 +2,10 @@
@X = global i32 0 ; <i32*> [#uses=1]
define signext i8 @_Z3fooi(i32 %x) {
define i32 @_Z3fooi(i32 %x) {
entry:
store i32 %x, i32* @X, align 4
%retval67 = trunc i32 %x to i8 ; <i8> [#uses=1]
ret i8 %retval67
%retval = sext i8 %retval67 to i32
ret i32 %retval
}

View File

@ -12,7 +12,7 @@ entry:
; 64BIT-LABEL: t1:
; 64BIT-NOT: movw %si, %ax
; 64BIT: leal 1(%rsi), %eax
; 64BIT: leal 1(%rsi), %ebx
%0 = icmp eq i16 %k, %c ; <i1> [#uses=1]
%1 = add i16 %k, 1 ; <i16> [#uses=3]
br i1 %0, label %bb, label %bb1
@ -34,8 +34,8 @@ entry:
; 64BIT-LABEL: t2:
; 64BIT-NOT: movw %si, %ax
; 64BIT: leal -1(%rsi), %eax
; 64BIT: movzwl %ax
; 64BIT: leal -1(%rsi), %ebx
; 64BIT: movzwl %bx
%0 = icmp eq i16 %k, %c ; <i1> [#uses=1]
%1 = add i16 %k, -1 ; <i16> [#uses=3]
br i1 %0, label %bb, label %bb1
@ -59,7 +59,7 @@ entry:
; 64BIT-LABEL: t3:
; 64BIT-NOT: movw %si, %ax
; 64BIT: leal 2(%rsi), %eax
; 64BIT: leal 2(%rsi), %ebx
%0 = add i16 %k, 2 ; <i16> [#uses=3]
%1 = icmp eq i16 %k, %c ; <i1> [#uses=1]
br i1 %1, label %bb, label %bb1
@ -82,7 +82,7 @@ entry:
; 64BIT-LABEL: t4:
; 64BIT-NOT: movw %si, %ax
; 64BIT: leal (%rsi,%rdi), %eax
; 64BIT: leal (%rsi,%rdi), %ebx
%0 = add i16 %k, %c ; <i16> [#uses=3]
%1 = icmp eq i16 %k, %c ; <i1> [#uses=1]
br i1 %1, label %bb, label %bb1

View File

@ -1,10 +1,15 @@
; RUN: llc < %s -mtriple=i686-unknown-linux-gnu | FileCheck %s -check-prefix=X86
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s -check-prefix=X64
; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN64
; X64: @bar1
; Check that the argument gets zero-extended before calling.
; X86-LABEL: bar1
; X86: movzbl
; X86: calll
; X64-LABEL: bar1
; X64: movzbl
; X64: jmp
; WIN64: @bar1
; WIN64-LABEL: bar1
; WIN64: movzbl
; WIN64: callq
define void @bar1(i1 zeroext %v1) nounwind ssp {
@ -14,10 +19,11 @@ entry:
ret void
}
; X64: @bar2
; Check that on x86-64 the arguments are simply forwarded.
; X64-LABEL: bar2
; X64-NOT: movzbl
; X64: jmp
; WIN64: @bar2
; WIN64-LABEL: bar2
; WIN64-NOT: movzbl
; WIN64: callq
define void @bar2(i8 zeroext %v1) nounwind ssp {
@ -27,16 +33,19 @@ entry:
ret void
}
; X64: @bar3
; X64: callq
; X64-NOT: movzbl
; X64-NOT: and
; X64: ret
; WIN64: @bar3
; WIN64: callq
; WIN64-NOT: movzbl
; WIN64-NOT: and
; WIN64: ret
; Check that i1 return values are not zero-extended.
; X86-LABEL: bar3
; X86: call
; X86-NEXT: {{add|pop}}
; X86-NEXT: ret
; X64-LABEL: bar3
; X64: call
; X64-NEXT: {{add|pop}}
; X64-NEXT: ret
; WIN64-LABEL: bar3
; WIN64: call
; WIN64-NEXT: {{add|pop}}
; WIN64-NEXT: ret
define zeroext i1 @bar3() nounwind ssp {
entry:
%call = call i1 @foo2() nounwind

View File

@ -6,9 +6,9 @@ target triple = "x86_64-apple-macosx10.10.0"
define zeroext i8 @test_udivrem_zext_ah(i8 %x, i8 %y) {
; CHECK-LABEL: test_udivrem_zext_ah
; CHECK: divb
; CHECK: movzbl %ah, [[REG_REM:%[a-z0-9]+]]
; CHECK: movzbl %ah, %e[[REG_REM:[a-z]]]x
; CHECK: movb %al, ([[REG_ZPTR:%[a-z0-9]+]])
; CHECK: movl [[REG_REM]], %eax
; CHECK: movb %[[REG_REM]]l, %al
; CHECK: ret
%div = udiv i8 %x, %y
store i8 %div, i8* @z
@ -51,9 +51,9 @@ define signext i8 @test_sdivrem_sext_ah(i8 %x, i8 %y) {
; CHECK-LABEL: test_sdivrem_sext_ah
; CHECK: cbtw
; CHECK: idivb
; CHECK: movsbl %ah, [[REG_REM:%[a-z0-9]+]]
; CHECK: movsbl %ah, %e[[REG_REM:[a-z]]]x
; CHECK: movb %al, ([[REG_ZPTR]])
; CHECK: movl [[REG_REM]], %eax
; CHECK: movb %[[REG_REM]]l, %al
; CHECK: ret
%div = sdiv i8 %x, %y
store i8 %div, i8* @z

View File

@ -21,7 +21,7 @@ define i32 @foo2(i8 %a) #0 {
; CHECK-LABEL: bar
; CHECK-NOT: cvt
; CHECK: movl
; CHECK: movb
define zeroext i8 @bar(i8 zeroext %a) #0 {
%conv = uitofp i8 %a to float
%conv1 = fptoui float %conv to i8

View File

@ -1,6 +1,6 @@
; RUN: llc < %s -march=x86 | grep mov | count 1
; RUN: llc < %s -march=x86-64 | grep mov | count 1
; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | grep mov | count 1
; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X86
; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64
; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s -check-prefix=X32
define zeroext i8 @foo() nounwind ssp {
entry:
@ -8,6 +8,24 @@ entry:
%1 = lshr i16 %0, 8
%2 = trunc i16 %1 to i8
ret i8 %2
; X86-LABEL: foo
; X86: calll
; X86-NEXT: movb %ah, %al
; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
; X64-LABEL: foo
; X64: callq
; X64-NEXT: shrl $8, %eax
; X64-NEXT: popq
; X64-NEXT: retq
; X32-LABEL: foo
; X32: callq
; X32-NEXT: shrl $8, %eax
; X32-NEXT: popq
; X32-NEXT: retq
}
declare zeroext i16 @bar(...)

View File

@ -3,9 +3,9 @@
define signext i16 @foo(i16 signext %x) nounwind {
entry:
; CHECK-LABEL: foo:
; CHECK-NOT: movzwl
; CHECK: movswl 4(%esp), %eax
; CHECK: xorl $21998, %eax
; CHECK: movzwl 4(%esp), %eax
; CHECK-NEXT: xorl $21998, %eax
; CHECK-NEXT: retl
%0 = xor i16 %x, 21998
ret i16 %0
}
@ -13,9 +13,9 @@ entry:
define signext i16 @bar(i16 signext %x) nounwind {
entry:
; CHECK-LABEL: bar:
; CHECK-NOT: movzwl
; CHECK: movswl 4(%esp), %eax
; CHECK: xorl $-10770, %eax
; CHECK: movzwl 4(%esp), %eax
; CHECK-NEXT: xorl $54766, %eax
; CHECK-NEXT: retl
%0 = xor i16 %x, 54766
ret i16 %0
}

View File

@ -0,0 +1,105 @@
; RUN: llc < %s -mtriple=i686-unknown-linux-gnu | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
@x = common global i32 0, align 4
define zeroext i1 @unsigned_i1() {
entry:
%0 = load i32, i32* @x
%cmp = icmp eq i32 %0, 42
ret i1 %cmp
; Unsigned i1 return values are not extended.
; CHECK-LABEL: unsigned_i1:
; CHECK: cmp
; CHECK-NEXT: sete
; CHECK-NEXT: ret
}
define zeroext i8 @unsigned_i8() {
entry:
%0 = load i32, i32* @x
%cmp = icmp eq i32 %0, 42
%retval = zext i1 %cmp to i8
ret i8 %retval
; Unsigned i8 return values are not extended.
; CHECK-LABEL: unsigned_i8:
; CHECK: cmp
; CHECK-NEXT: sete
; CHECK-NEXT: ret
}
define signext i8 @signed_i8() {
entry:
%0 = load i32, i32* @x
%cmp = icmp eq i32 %0, 42
%retval = zext i1 %cmp to i8
ret i8 %retval
; Signed i8 return values are not extended.
; CHECK-LABEL: signed_i8:
; CHECK: cmp
; CHECK-NEXT: sete
; CHECK-NEXT: ret
}
@a = common global i16 0
@b = common global i16 0
define zeroext i16 @unsigned_i16() {
entry:
%0 = load i16, i16* @a
%1 = load i16, i16* @b
%add = add i16 %1, %0
ret i16 %add
; i16 return values are not extended.
; CHECK-LABEL: unsigned_i16:
; CHECK: movw
; CHECK-NEXT: addw
; CHECK-NEXT: ret
}
define i32 @use_i1() {
entry:
%0 = call i1 @unsigned_i1();
%1 = zext i1 %0 to i32
ret i32 %1
; The high 24 bits of %eax from a function returning i1 are undefined.
; CHECK-LABEL: use_i1:
; CHECK: call
; CHECK-NEXT: movzbl
; CHECK-NEXT: {{pop|add}}
; CHECK-NEXT: ret
}
define i32 @use_i8() {
entry:
%0 = call i8 @unsigned_i8();
%1 = zext i8 %0 to i32
ret i32 %1
; The high 24 bits of %eax from a function returning i8 are undefined.
; CHECK-LABEL: use_i8:
; CHECK: call
; CHECK-NEXT: movzbl
; CHECK-NEXT: {{pop|add}}
; CHECK-NEXT: ret
}
define i32 @use_i16() {
entry:
%0 = call i16 @unsigned_i16();
%1 = zext i16 %0 to i32
ret i32 %1
; The high 16 bits of %eax from a function returning i16 are undefined.
; CHECK-LABEL: use_i16:
; CHECK: call
; CHECK-NEXT: movzwl
; CHECK-NEXT: {{pop|add}}
; CHECK-NEXT: ret
}

View File

@ -66,10 +66,10 @@ entry:
%2 = load i8, i8* %1, align 1 ; <i8> [#uses=1]
ret i8 %2
; CHECK-LABEL: test4:
; CHECK: movsbl ({{.*}},4), %eax
; CHECK: movb ({{.*}},4), %al
; ATOM-LABEL: test4:
; ATOM: movsbl ({{.*}},4), %eax
; ATOM: movb ({{.*}},4), %al
}
define void @test5(i1 %c, <2 x i16> %a, <2 x i16> %b, <2 x i16>* %p) nounwind {

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 | grep movzbl | count 1
; RUN: llc < %s -march=x86 | FileCheck %s
; rdar://6699246
define signext i8 @t1(i8* %A) nounwind readnone ssp {
@ -6,6 +6,11 @@ entry:
%0 = icmp ne i8* %A, null
%1 = zext i1 %0 to i8
ret i8 %1
; CHECK-LABEL: t1:
; CHECK: cmpl
; CHECK-NEXT: setne
; CHECK-NEXT: retl
}
define i8 @t2(i8* %A) nounwind readnone ssp {
@ -13,4 +18,9 @@ entry:
%0 = icmp ne i8* %A, null
%1 = zext i1 %0 to i8
ret i8 %1
; CHECK-LABEL: t2:
; CHECK: cmpl
; CHECK-NEXT: setne
; CHECK-NEXT: retl
}

View File

@ -1,9 +1,10 @@
; RUN: llc < %s -march=x86 > %t
; RUN: grep movsbl %t
; RUN: not grep movz %t
; RUN: not grep and %t
; RUN: llc < %s -march=x86 | FileCheck %s
define signext i8 @foo(i16 signext %x) nounwind {
define signext i8 @foo(i16 signext %x) nounwind {
%retval56 = trunc i16 %x to i8
ret i8 %retval56
; CHECK-LABEL: foo:
; CHECK: movb
; CHECK-NEXT: retl
}

View File

@ -13,11 +13,11 @@ define zeroext i1 @test_bool() {
; Here, there's more zero extension to be done between the call and the return,
; so a tail call is impossible (well, according to current Clang practice
; anyway. The AMD64 ABI isn't crystal clear on the matter).
; FIXME: The high 24 bits returned from test_i32 are undefined; do tail call!
declare zeroext i32 @give_i32()
define zeroext i8 @test_i32() {
; CHECK-LABEL: test_i32:
; CHECK: callq _give_i32
; CHECK: movzbl %al, %eax
; CHECK: ret
%call = tail call zeroext i32 @give_i32()
@ -27,11 +27,11 @@ define zeroext i8 @test_i32() {
; Here, one function is zeroext and the other is signext. To the extent that
; these both mean something they are incompatible so no tail call is possible.
; FIXME: The high 16 bits returned are undefined; do tail call!
declare zeroext i16 @give_unsigned_i16()
define signext i16 @test_incompatible_i16() {
; CHECK-LABEL: test_incompatible_i16:
; CHECK: callq _give_unsigned_i16
; CHECK: cwtl
; CHECK: ret
%call = tail call zeroext i16 @give_unsigned_i16()

View File

@ -8,7 +8,7 @@ define zeroext i1 @test1(i32 %X) nounwind {
ret i1 %Y
}
; CHECK-LABEL: test1:
; CHECK: andl $1, %eax
; CHECK: andb $1, %al
define i1 @test2(i32 %val, i32 %mask) nounwind {
entry:

View File

@ -9,7 +9,6 @@ define zeroext i1 @a(i32 %x) nounwind {
; CHECK-LABEL: a:
; CHECK: mull
; CHECK: seto %al
; CHECK: movzbl %al, %eax
; CHECK: ret
}