2019-08-30 05:53:58 +08:00
|
|
|
; RUN: llc -enable-machine-outliner=never -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s
|
|
|
|
; RUN: llc -global-isel -enable-machine-outliner=never -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s --check-prefix=GISEL
|
|
|
|
|
|
|
|
; FIXME: GISel only knows how to handle explicit G_SEXT instructions. So when
|
|
|
|
; G_SEXT is lowered to anything else, it won't fold in a stx*.
|
|
|
|
; FIXME: GISel doesn't currently handle folding the addressing mode into a cmp.
|
2013-01-31 20:12:40 +08:00
|
|
|
|
|
|
|
@var8 = global i8 0
|
|
|
|
@var16 = global i16 0
|
|
|
|
@var32 = global i32 0
|
|
|
|
@var64 = global i64 0
|
|
|
|
|
2014-04-14 20:50:50 +08:00
|
|
|
define void @addsub_i8rhs() minsize {
|
2013-07-14 14:24:09 +08:00
|
|
|
; CHECK-LABEL: addsub_i8rhs:
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL-LABEL: addsub_i8rhs:
|
2015-02-28 05:17:42 +08:00
|
|
|
%val8_tmp = load i8, i8* @var8
|
|
|
|
%lhs32 = load i32, i32* @var32
|
|
|
|
%lhs64 = load i64, i64* @var64
|
2013-01-31 20:12:40 +08:00
|
|
|
|
|
|
|
; Need this to prevent extension upon load and give a vanilla i8 operand.
|
|
|
|
%val8 = add i8 %val8_tmp, 123
|
|
|
|
|
|
|
|
|
|
|
|
; Zero-extending to 32-bits
|
|
|
|
%rhs32_zext = zext i8 %val8 to i32
|
|
|
|
%res32_zext = add i32 %lhs32, %rhs32_zext
|
|
|
|
store volatile i32 %res32_zext, i32* @var32
|
|
|
|
; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb
|
2013-01-31 20:12:40 +08:00
|
|
|
|
|
|
|
%rhs32_zext_shift = shl i32 %rhs32_zext, 3
|
|
|
|
%res32_zext_shift = add i32 %lhs32, %rhs32_zext_shift
|
|
|
|
store volatile i32 %res32_zext_shift, i32* @var32
|
|
|
|
; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3
|
2013-01-31 20:12:40 +08:00
|
|
|
|
|
|
|
; Zero-extending to 64-bits
|
|
|
|
%rhs64_zext = zext i8 %val8 to i64
|
|
|
|
%res64_zext = add i64 %lhs64, %rhs64_zext
|
|
|
|
store volatile i64 %res64_zext, i64* @var64
|
|
|
|
; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb
|
2013-01-31 20:12:40 +08:00
|
|
|
|
|
|
|
%rhs64_zext_shift = shl i64 %rhs64_zext, 1
|
|
|
|
%res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
|
|
|
|
store volatile i64 %res64_zext_shift, i64* @var64
|
|
|
|
; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1
|
2013-01-31 20:12:40 +08:00
|
|
|
|
|
|
|
; Sign-extending to 32-bits
|
|
|
|
%rhs32_sext = sext i8 %val8 to i32
|
|
|
|
%res32_sext = add i32 %lhs32, %rhs32_sext
|
|
|
|
store volatile i32 %res32_sext, i32* @var32
|
|
|
|
; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb
|
|
|
|
|
|
|
|
%rhs32_sext_shift = shl i32 %rhs32_sext, 1
|
|
|
|
%res32_sext_shift = add i32 %lhs32, %rhs32_sext_shift
|
|
|
|
store volatile i32 %res32_sext_shift, i32* @var32
|
|
|
|
; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb #1
|
|
|
|
|
|
|
|
; Sign-extending to 64-bits
|
|
|
|
%rhs64_sext = sext i8 %val8 to i64
|
|
|
|
%res64_sext = add i64 %lhs64, %rhs64_sext
|
|
|
|
store volatile i64 %res64_sext, i64* @var64
|
|
|
|
; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb
|
|
|
|
|
|
|
|
%rhs64_sext_shift = shl i64 %rhs64_sext, 4
|
|
|
|
%res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift
|
|
|
|
store volatile i64 %res64_sext_shift, i64* @var64
|
|
|
|
; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb #4
|
|
|
|
|
|
|
|
|
|
|
|
; CMP variants
|
|
|
|
%tst = icmp slt i32 %lhs32, %rhs32_zext
|
|
|
|
br i1 %tst, label %end, label %test2
|
|
|
|
; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, uxtb
|
|
|
|
|
|
|
|
test2:
|
|
|
|
%cmp_sext = sext i8 %val8 to i64
|
|
|
|
%tst2 = icmp eq i64 %lhs64, %cmp_sext
|
|
|
|
br i1 %tst2, label %other, label %end
|
|
|
|
; CHECK: cmp {{x[0-9]+}}, {{w[0-9]+}}, sxtb
|
|
|
|
|
|
|
|
other:
|
|
|
|
store volatile i32 %lhs32, i32* @var32
|
|
|
|
ret void
|
|
|
|
|
|
|
|
end:
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2015-07-31 23:55:54 +08:00
|
|
|
define void @sub_i8rhs() minsize {
|
|
|
|
; CHECK-LABEL: sub_i8rhs:
|
|
|
|
%val8_tmp = load i8, i8* @var8
|
|
|
|
%lhs32 = load i32, i32* @var32
|
|
|
|
%lhs64 = load i64, i64* @var64
|
|
|
|
|
|
|
|
; Need this to prevent extension upon load and give a vanilla i8 operand.
|
|
|
|
%val8 = add i8 %val8_tmp, 123
|
|
|
|
|
|
|
|
|
|
|
|
; Zero-extending to 32-bits
|
|
|
|
%rhs32_zext = zext i8 %val8 to i32
|
|
|
|
%res32_zext = sub i32 %lhs32, %rhs32_zext
|
|
|
|
store volatile i32 %res32_zext, i32* @var32
|
|
|
|
; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: subs {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb
|
2015-07-31 23:55:54 +08:00
|
|
|
|
|
|
|
%rhs32_zext_shift = shl i32 %rhs32_zext, 3
|
|
|
|
%res32_zext_shift = sub i32 %lhs32, %rhs32_zext_shift
|
|
|
|
store volatile i32 %res32_zext_shift, i32* @var32
|
|
|
|
; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: subs {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3
|
2015-07-31 23:55:54 +08:00
|
|
|
|
|
|
|
; Zero-extending to 64-bits
|
|
|
|
%rhs64_zext = zext i8 %val8 to i64
|
|
|
|
%res64_zext = sub i64 %lhs64, %rhs64_zext
|
|
|
|
store volatile i64 %res64_zext, i64* @var64
|
|
|
|
; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb
|
2015-07-31 23:55:54 +08:00
|
|
|
|
|
|
|
%rhs64_zext_shift = shl i64 %rhs64_zext, 1
|
|
|
|
%res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift
|
|
|
|
store volatile i64 %res64_zext_shift, i64* @var64
|
|
|
|
; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1
|
2015-07-31 23:55:54 +08:00
|
|
|
|
|
|
|
; Sign-extending to 32-bits
|
|
|
|
%rhs32_sext = sext i8 %val8 to i32
|
|
|
|
%res32_sext = sub i32 %lhs32, %rhs32_sext
|
|
|
|
store volatile i32 %res32_sext, i32* @var32
|
|
|
|
; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb
|
|
|
|
|
|
|
|
%rhs32_sext_shift = shl i32 %rhs32_sext, 1
|
|
|
|
%res32_sext_shift = sub i32 %lhs32, %rhs32_sext_shift
|
|
|
|
store volatile i32 %res32_sext_shift, i32* @var32
|
|
|
|
; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb #1
|
|
|
|
|
|
|
|
; Sign-extending to 64-bits
|
|
|
|
%rhs64_sext = sext i8 %val8 to i64
|
|
|
|
%res64_sext = sub i64 %lhs64, %rhs64_sext
|
|
|
|
store volatile i64 %res64_sext, i64* @var64
|
|
|
|
; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb
|
|
|
|
|
|
|
|
%rhs64_sext_shift = shl i64 %rhs64_sext, 4
|
|
|
|
%res64_sext_shift = sub i64 %lhs64, %rhs64_sext_shift
|
|
|
|
store volatile i64 %res64_sext_shift, i64* @var64
|
|
|
|
; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb #4
|
|
|
|
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2014-04-14 20:50:50 +08:00
|
|
|
define void @addsub_i16rhs() minsize {
|
2013-07-14 14:24:09 +08:00
|
|
|
; CHECK-LABEL: addsub_i16rhs:
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL-LABEL: addsub_i16rhs:
|
2015-02-28 05:17:42 +08:00
|
|
|
%val16_tmp = load i16, i16* @var16
|
|
|
|
%lhs32 = load i32, i32* @var32
|
|
|
|
%lhs64 = load i64, i64* @var64
|
2013-01-31 20:12:40 +08:00
|
|
|
|
|
|
|
; Need this to prevent extension upon load and give a vanilla i16 operand.
|
|
|
|
%val16 = add i16 %val16_tmp, 123
|
|
|
|
|
|
|
|
|
|
|
|
; Zero-extending to 32-bits
|
|
|
|
%rhs32_zext = zext i16 %val16 to i32
|
|
|
|
%res32_zext = add i32 %lhs32, %rhs32_zext
|
|
|
|
store volatile i32 %res32_zext, i32* @var32
|
|
|
|
; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth
|
2013-01-31 20:12:40 +08:00
|
|
|
|
|
|
|
%rhs32_zext_shift = shl i32 %rhs32_zext, 3
|
|
|
|
%res32_zext_shift = add i32 %lhs32, %rhs32_zext_shift
|
|
|
|
store volatile i32 %res32_zext_shift, i32* @var32
|
|
|
|
; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3
|
2013-01-31 20:12:40 +08:00
|
|
|
|
|
|
|
; Zero-extending to 64-bits
|
|
|
|
%rhs64_zext = zext i16 %val16 to i64
|
|
|
|
%res64_zext = add i64 %lhs64, %rhs64_zext
|
|
|
|
store volatile i64 %res64_zext, i64* @var64
|
|
|
|
; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth
|
2013-01-31 20:12:40 +08:00
|
|
|
|
|
|
|
%rhs64_zext_shift = shl i64 %rhs64_zext, 1
|
|
|
|
%res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
|
|
|
|
store volatile i64 %res64_zext_shift, i64* @var64
|
|
|
|
; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1
|
2013-01-31 20:12:40 +08:00
|
|
|
|
|
|
|
; Sign-extending to 32-bits
|
|
|
|
%rhs32_sext = sext i16 %val16 to i32
|
|
|
|
%res32_sext = add i32 %lhs32, %rhs32_sext
|
|
|
|
store volatile i32 %res32_sext, i32* @var32
|
|
|
|
; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth
|
|
|
|
|
|
|
|
%rhs32_sext_shift = shl i32 %rhs32_sext, 1
|
|
|
|
%res32_sext_shift = add i32 %lhs32, %rhs32_sext_shift
|
|
|
|
store volatile i32 %res32_sext_shift, i32* @var32
|
|
|
|
; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth #1
|
|
|
|
|
|
|
|
; Sign-extending to 64-bits
|
|
|
|
%rhs64_sext = sext i16 %val16 to i64
|
|
|
|
%res64_sext = add i64 %lhs64, %rhs64_sext
|
|
|
|
store volatile i64 %res64_sext, i64* @var64
|
|
|
|
; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth
|
|
|
|
|
|
|
|
%rhs64_sext_shift = shl i64 %rhs64_sext, 4
|
|
|
|
%res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift
|
|
|
|
store volatile i64 %res64_sext_shift, i64* @var64
|
|
|
|
; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth #4
|
|
|
|
|
|
|
|
|
|
|
|
; CMP variants
|
|
|
|
%tst = icmp slt i32 %lhs32, %rhs32_zext
|
|
|
|
br i1 %tst, label %end, label %test2
|
|
|
|
; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, uxth
|
|
|
|
|
|
|
|
test2:
|
|
|
|
%cmp_sext = sext i16 %val16 to i64
|
|
|
|
%tst2 = icmp eq i64 %lhs64, %cmp_sext
|
|
|
|
br i1 %tst2, label %other, label %end
|
|
|
|
; CHECK: cmp {{x[0-9]+}}, {{w[0-9]+}}, sxth
|
|
|
|
|
|
|
|
other:
|
|
|
|
store volatile i32 %lhs32, i32* @var32
|
|
|
|
ret void
|
|
|
|
|
|
|
|
end:
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2015-07-31 23:55:54 +08:00
|
|
|
define void @sub_i16rhs() minsize {
|
|
|
|
; CHECK-LABEL: sub_i16rhs:
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL-LABEL: sub_i16rhs:
|
2015-07-31 23:55:54 +08:00
|
|
|
%val16_tmp = load i16, i16* @var16
|
|
|
|
%lhs32 = load i32, i32* @var32
|
|
|
|
%lhs64 = load i64, i64* @var64
|
|
|
|
|
|
|
|
; Need this to prevent extension upon load and give a vanilla i16 operand.
|
|
|
|
%val16 = add i16 %val16_tmp, 123
|
|
|
|
|
|
|
|
|
|
|
|
; Zero-extending to 32-bits
|
|
|
|
%rhs32_zext = zext i16 %val16 to i32
|
|
|
|
%res32_zext = sub i32 %lhs32, %rhs32_zext
|
|
|
|
store volatile i32 %res32_zext, i32* @var32
|
|
|
|
; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: subs {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth
|
2015-07-31 23:55:54 +08:00
|
|
|
|
|
|
|
%rhs32_zext_shift = shl i32 %rhs32_zext, 3
|
|
|
|
%res32_zext_shift = sub i32 %lhs32, %rhs32_zext_shift
|
|
|
|
store volatile i32 %res32_zext_shift, i32* @var32
|
|
|
|
; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: subs {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3
|
2015-07-31 23:55:54 +08:00
|
|
|
|
|
|
|
; Zero-extending to 64-bits
|
|
|
|
%rhs64_zext = zext i16 %val16 to i64
|
|
|
|
%res64_zext = sub i64 %lhs64, %rhs64_zext
|
|
|
|
store volatile i64 %res64_zext, i64* @var64
|
|
|
|
; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth
|
2015-07-31 23:55:54 +08:00
|
|
|
|
|
|
|
%rhs64_zext_shift = shl i64 %rhs64_zext, 1
|
|
|
|
%res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift
|
|
|
|
store volatile i64 %res64_zext_shift, i64* @var64
|
|
|
|
; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1
|
2015-07-31 23:55:54 +08:00
|
|
|
|
|
|
|
; Sign-extending to 32-bits
|
|
|
|
%rhs32_sext = sext i16 %val16 to i32
|
|
|
|
%res32_sext = sub i32 %lhs32, %rhs32_sext
|
|
|
|
store volatile i32 %res32_sext, i32* @var32
|
|
|
|
; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth
|
|
|
|
|
|
|
|
%rhs32_sext_shift = shl i32 %rhs32_sext, 1
|
|
|
|
%res32_sext_shift = sub i32 %lhs32, %rhs32_sext_shift
|
|
|
|
store volatile i32 %res32_sext_shift, i32* @var32
|
|
|
|
; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth #1
|
|
|
|
|
|
|
|
; Sign-extending to 64-bits
|
|
|
|
%rhs64_sext = sext i16 %val16 to i64
|
|
|
|
%res64_sext = sub i64 %lhs64, %rhs64_sext
|
|
|
|
store volatile i64 %res64_sext, i64* @var64
|
|
|
|
; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth
|
|
|
|
|
|
|
|
%rhs64_sext_shift = shl i64 %rhs64_sext, 4
|
|
|
|
%res64_sext_shift = sub i64 %lhs64, %rhs64_sext_shift
|
|
|
|
store volatile i64 %res64_sext_shift, i64* @var64
|
|
|
|
; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth #4
|
|
|
|
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2013-01-31 20:12:40 +08:00
|
|
|
; N.b. we could probably check more here ("add w2, w3, w1, uxtw" for
|
|
|
|
; example), but the remaining instructions are probably not idiomatic
|
|
|
|
; in the face of "add/sub (shifted register)" so I don't intend to.
|
[AArch64] Improve add/sub/cmp isel of uxtw forms.
Don't match the UXTW extended reg forms of ADD/ADDS/SUB/SUBS if the
32-bit to 64-bit zero-extend can be done for free by taking advantage
of the 32-bit defining instruction zeroing the upper 32-bits of the X
register destination. This enables better instruction selection in a
few cases, such as:
sub x0, xzr, x8
instead of:
mov x8, xzr
sub x0, x8, w9, uxtw
madd x0, x1, x1, x8
instead of:
mul x9, x1, x1
add x0, x9, w8, uxtw
cmp x2, x8
instead of:
sub x8, x2, w8, uxtw
cmp x8, #0
add x0, x8, x1, lsl #3
instead of:
lsl x9, x1, #3
add x0, x9, w8, uxtw
Reviewers: t.p.northover, jmolloy
Subscribers: mcrosier, aemerson, llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D24747
llvm-svn: 282413
2016-09-26 23:34:47 +08:00
|
|
|
define void @addsub_i32rhs(i32 %in32) minsize {
|
2013-07-14 14:24:09 +08:00
|
|
|
; CHECK-LABEL: addsub_i32rhs:
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL-LABEL: addsub_i32rhs:
|
2015-02-28 05:17:42 +08:00
|
|
|
%val32_tmp = load i32, i32* @var32
|
|
|
|
%lhs64 = load i64, i64* @var64
|
2013-01-31 20:12:40 +08:00
|
|
|
|
|
|
|
%val32 = add i32 %val32_tmp, 123
|
|
|
|
|
[AArch64] Improve add/sub/cmp isel of uxtw forms.
Don't match the UXTW extended reg forms of ADD/ADDS/SUB/SUBS if the
32-bit to 64-bit zero-extend can be done for free by taking advantage
of the 32-bit defining instruction zeroing the upper 32-bits of the X
register destination. This enables better instruction selection in a
few cases, such as:
sub x0, xzr, x8
instead of:
mov x8, xzr
sub x0, x8, w9, uxtw
madd x0, x1, x1, x8
instead of:
mul x9, x1, x1
add x0, x9, w8, uxtw
cmp x2, x8
instead of:
sub x8, x2, w8, uxtw
cmp x8, #0
add x0, x8, x1, lsl #3
instead of:
lsl x9, x1, #3
add x0, x9, w8, uxtw
Reviewers: t.p.northover, jmolloy
Subscribers: mcrosier, aemerson, llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D24747
llvm-svn: 282413
2016-09-26 23:34:47 +08:00
|
|
|
%rhs64_zext = zext i32 %in32 to i64
|
2013-01-31 20:12:40 +08:00
|
|
|
%res64_zext = add i64 %lhs64, %rhs64_zext
|
|
|
|
store volatile i64 %res64_zext, i64* @var64
|
|
|
|
; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw
|
2013-01-31 20:12:40 +08:00
|
|
|
|
[AArch64] Improve add/sub/cmp isel of uxtw forms.
Don't match the UXTW extended reg forms of ADD/ADDS/SUB/SUBS if the
32-bit to 64-bit zero-extend can be done for free by taking advantage
of the 32-bit defining instruction zeroing the upper 32-bits of the X
register destination. This enables better instruction selection in a
few cases, such as:
sub x0, xzr, x8
instead of:
mov x8, xzr
sub x0, x8, w9, uxtw
madd x0, x1, x1, x8
instead of:
mul x9, x1, x1
add x0, x9, w8, uxtw
cmp x2, x8
instead of:
sub x8, x2, w8, uxtw
cmp x8, #0
add x0, x8, x1, lsl #3
instead of:
lsl x9, x1, #3
add x0, x9, w8, uxtw
Reviewers: t.p.northover, jmolloy
Subscribers: mcrosier, aemerson, llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D24747
llvm-svn: 282413
2016-09-26 23:34:47 +08:00
|
|
|
%rhs64_zext2 = zext i32 %val32 to i64
|
|
|
|
%rhs64_zext_shift = shl i64 %rhs64_zext2, 2
|
2013-01-31 20:12:40 +08:00
|
|
|
%res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift
|
|
|
|
store volatile i64 %res64_zext_shift, i64* @var64
|
|
|
|
; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2
|
2013-01-31 20:12:40 +08:00
|
|
|
|
|
|
|
%rhs64_sext = sext i32 %val32 to i64
|
|
|
|
%res64_sext = add i64 %lhs64, %rhs64_sext
|
|
|
|
store volatile i64 %res64_sext, i64* @var64
|
|
|
|
; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw
|
2013-01-31 20:12:40 +08:00
|
|
|
|
|
|
|
%rhs64_sext_shift = shl i64 %rhs64_sext, 2
|
|
|
|
%res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift
|
|
|
|
store volatile i64 %res64_sext_shift, i64* @var64
|
|
|
|
; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw #2
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw #2
|
2013-01-31 20:12:40 +08:00
|
|
|
|
|
|
|
ret void
|
2013-07-14 04:38:47 +08:00
|
|
|
}
|
2015-07-31 23:55:54 +08:00
|
|
|
|
[AArch64] Improve add/sub/cmp isel of uxtw forms.
Don't match the UXTW extended reg forms of ADD/ADDS/SUB/SUBS if the
32-bit to 64-bit zero-extend can be done for free by taking advantage
of the 32-bit defining instruction zeroing the upper 32-bits of the X
register destination. This enables better instruction selection in a
few cases, such as:
sub x0, xzr, x8
instead of:
mov x8, xzr
sub x0, x8, w9, uxtw
madd x0, x1, x1, x8
instead of:
mul x9, x1, x1
add x0, x9, w8, uxtw
cmp x2, x8
instead of:
sub x8, x2, w8, uxtw
cmp x8, #0
add x0, x8, x1, lsl #3
instead of:
lsl x9, x1, #3
add x0, x9, w8, uxtw
Reviewers: t.p.northover, jmolloy
Subscribers: mcrosier, aemerson, llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D24747
llvm-svn: 282413
2016-09-26 23:34:47 +08:00
|
|
|
define void @sub_i32rhs(i32 %in32) minsize {
|
2015-07-31 23:55:54 +08:00
|
|
|
; CHECK-LABEL: sub_i32rhs:
|
|
|
|
%val32_tmp = load i32, i32* @var32
|
|
|
|
%lhs64 = load i64, i64* @var64
|
|
|
|
|
|
|
|
%val32 = add i32 %val32_tmp, 123
|
|
|
|
|
[AArch64] Improve add/sub/cmp isel of uxtw forms.
Don't match the UXTW extended reg forms of ADD/ADDS/SUB/SUBS if the
32-bit to 64-bit zero-extend can be done for free by taking advantage
of the 32-bit defining instruction zeroing the upper 32-bits of the X
register destination. This enables better instruction selection in a
few cases, such as:
sub x0, xzr, x8
instead of:
mov x8, xzr
sub x0, x8, w9, uxtw
madd x0, x1, x1, x8
instead of:
mul x9, x1, x1
add x0, x9, w8, uxtw
cmp x2, x8
instead of:
sub x8, x2, w8, uxtw
cmp x8, #0
add x0, x8, x1, lsl #3
instead of:
lsl x9, x1, #3
add x0, x9, w8, uxtw
Reviewers: t.p.northover, jmolloy
Subscribers: mcrosier, aemerson, llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D24747
llvm-svn: 282413
2016-09-26 23:34:47 +08:00
|
|
|
%rhs64_zext = zext i32 %in32 to i64
|
2015-07-31 23:55:54 +08:00
|
|
|
%res64_zext = sub i64 %lhs64, %rhs64_zext
|
|
|
|
store volatile i64 %res64_zext, i64* @var64
|
|
|
|
; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw
|
2015-07-31 23:55:54 +08:00
|
|
|
|
[AArch64] Improve add/sub/cmp isel of uxtw forms.
Don't match the UXTW extended reg forms of ADD/ADDS/SUB/SUBS if the
32-bit to 64-bit zero-extend can be done for free by taking advantage
of the 32-bit defining instruction zeroing the upper 32-bits of the X
register destination. This enables better instruction selection in a
few cases, such as:
sub x0, xzr, x8
instead of:
mov x8, xzr
sub x0, x8, w9, uxtw
madd x0, x1, x1, x8
instead of:
mul x9, x1, x1
add x0, x9, w8, uxtw
cmp x2, x8
instead of:
sub x8, x2, w8, uxtw
cmp x8, #0
add x0, x8, x1, lsl #3
instead of:
lsl x9, x1, #3
add x0, x9, w8, uxtw
Reviewers: t.p.northover, jmolloy
Subscribers: mcrosier, aemerson, llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D24747
llvm-svn: 282413
2016-09-26 23:34:47 +08:00
|
|
|
%rhs64_zext2 = zext i32 %val32 to i64
|
|
|
|
%rhs64_zext_shift = shl i64 %rhs64_zext2, 2
|
2015-07-31 23:55:54 +08:00
|
|
|
%res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift
|
|
|
|
store volatile i64 %res64_zext_shift, i64* @var64
|
|
|
|
; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2
|
2015-07-31 23:55:54 +08:00
|
|
|
|
|
|
|
%rhs64_sext = sext i32 %val32 to i64
|
|
|
|
%res64_sext = sub i64 %lhs64, %rhs64_sext
|
|
|
|
store volatile i64 %res64_sext, i64* @var64
|
|
|
|
; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw
|
|
|
|
|
|
|
|
%rhs64_sext_shift = shl i64 %rhs64_sext, 2
|
|
|
|
%res64_sext_shift = sub i64 %lhs64, %rhs64_sext_shift
|
|
|
|
store volatile i64 %res64_sext_shift, i64* @var64
|
|
|
|
; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw #2
|
|
|
|
|
|
|
|
ret void
|
|
|
|
}
|
[AArch64] Improve add/sub/cmp isel of uxtw forms.
Don't match the UXTW extended reg forms of ADD/ADDS/SUB/SUBS if the
32-bit to 64-bit zero-extend can be done for free by taking advantage
of the 32-bit defining instruction zeroing the upper 32-bits of the X
register destination. This enables better instruction selection in a
few cases, such as:
sub x0, xzr, x8
instead of:
mov x8, xzr
sub x0, x8, w9, uxtw
madd x0, x1, x1, x8
instead of:
mul x9, x1, x1
add x0, x9, w8, uxtw
cmp x2, x8
instead of:
sub x8, x2, w8, uxtw
cmp x8, #0
add x0, x8, x1, lsl #3
instead of:
lsl x9, x1, #3
add x0, x9, w8, uxtw
Reviewers: t.p.northover, jmolloy
Subscribers: mcrosier, aemerson, llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D24747
llvm-svn: 282413
2016-09-26 23:34:47 +08:00
|
|
|
|
|
|
|
; Check that implicit zext from w reg write is used instead of uxtw form of add.
|
|
|
|
define i64 @add_fold_uxtw(i32 %x, i64 %y) {
|
|
|
|
; CHECK-LABEL: add_fold_uxtw:
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL-LABEL: add_fold_uxtw:
|
[AArch64] Improve add/sub/cmp isel of uxtw forms.
Don't match the UXTW extended reg forms of ADD/ADDS/SUB/SUBS if the
32-bit to 64-bit zero-extend can be done for free by taking advantage
of the 32-bit defining instruction zeroing the upper 32-bits of the X
register destination. This enables better instruction selection in a
few cases, such as:
sub x0, xzr, x8
instead of:
mov x8, xzr
sub x0, x8, w9, uxtw
madd x0, x1, x1, x8
instead of:
mul x9, x1, x1
add x0, x9, w8, uxtw
cmp x2, x8
instead of:
sub x8, x2, w8, uxtw
cmp x8, #0
add x0, x8, x1, lsl #3
instead of:
lsl x9, x1, #3
add x0, x9, w8, uxtw
Reviewers: t.p.northover, jmolloy
Subscribers: mcrosier, aemerson, llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D24747
llvm-svn: 282413
2016-09-26 23:34:47 +08:00
|
|
|
entry:
|
|
|
|
; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: and w[[TMP:[0-9]+]], w0, #0x3
|
|
|
|
; FIXME: Global ISel produces an unncessary ubfx here.
|
[AArch64] Improve add/sub/cmp isel of uxtw forms.
Don't match the UXTW extended reg forms of ADD/ADDS/SUB/SUBS if the
32-bit to 64-bit zero-extend can be done for free by taking advantage
of the 32-bit defining instruction zeroing the upper 32-bits of the X
register destination. This enables better instruction selection in a
few cases, such as:
sub x0, xzr, x8
instead of:
mov x8, xzr
sub x0, x8, w9, uxtw
madd x0, x1, x1, x8
instead of:
mul x9, x1, x1
add x0, x9, w8, uxtw
cmp x2, x8
instead of:
sub x8, x2, w8, uxtw
cmp x8, #0
add x0, x8, x1, lsl #3
instead of:
lsl x9, x1, #3
add x0, x9, w8, uxtw
Reviewers: t.p.northover, jmolloy
Subscribers: mcrosier, aemerson, llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D24747
llvm-svn: 282413
2016-09-26 23:34:47 +08:00
|
|
|
%m = and i32 %x, 3
|
|
|
|
%ext = zext i32 %m to i64
|
|
|
|
; CHECK-NEXT: add x0, x1, x[[TMP]]
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: add x0, x1, x[[TMP]]
|
[AArch64] Improve add/sub/cmp isel of uxtw forms.
Don't match the UXTW extended reg forms of ADD/ADDS/SUB/SUBS if the
32-bit to 64-bit zero-extend can be done for free by taking advantage
of the 32-bit defining instruction zeroing the upper 32-bits of the X
register destination. This enables better instruction selection in a
few cases, such as:
sub x0, xzr, x8
instead of:
mov x8, xzr
sub x0, x8, w9, uxtw
madd x0, x1, x1, x8
instead of:
mul x9, x1, x1
add x0, x9, w8, uxtw
cmp x2, x8
instead of:
sub x8, x2, w8, uxtw
cmp x8, #0
add x0, x8, x1, lsl #3
instead of:
lsl x9, x1, #3
add x0, x9, w8, uxtw
Reviewers: t.p.northover, jmolloy
Subscribers: mcrosier, aemerson, llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D24747
llvm-svn: 282413
2016-09-26 23:34:47 +08:00
|
|
|
%ret = add i64 %y, %ext
|
|
|
|
ret i64 %ret
|
|
|
|
}
|
|
|
|
|
|
|
|
; Check that implicit zext from w reg write is used instead of uxtw
|
|
|
|
; form of sub and that mov WZR is folded to form a neg instruction.
|
|
|
|
define i64 @sub_fold_uxtw_xzr(i32 %x) {
|
|
|
|
; CHECK-LABEL: sub_fold_uxtw_xzr:
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL-LABEL: sub_fold_uxtw_xzr:
|
[AArch64] Improve add/sub/cmp isel of uxtw forms.
Don't match the UXTW extended reg forms of ADD/ADDS/SUB/SUBS if the
32-bit to 64-bit zero-extend can be done for free by taking advantage
of the 32-bit defining instruction zeroing the upper 32-bits of the X
register destination. This enables better instruction selection in a
few cases, such as:
sub x0, xzr, x8
instead of:
mov x8, xzr
sub x0, x8, w9, uxtw
madd x0, x1, x1, x8
instead of:
mul x9, x1, x1
add x0, x9, w8, uxtw
cmp x2, x8
instead of:
sub x8, x2, w8, uxtw
cmp x8, #0
add x0, x8, x1, lsl #3
instead of:
lsl x9, x1, #3
add x0, x9, w8, uxtw
Reviewers: t.p.northover, jmolloy
Subscribers: mcrosier, aemerson, llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D24747
llvm-svn: 282413
2016-09-26 23:34:47 +08:00
|
|
|
entry:
|
|
|
|
; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: and w[[TMP:[0-9]+]], w0, #0x3
|
[AArch64] Improve add/sub/cmp isel of uxtw forms.
Don't match the UXTW extended reg forms of ADD/ADDS/SUB/SUBS if the
32-bit to 64-bit zero-extend can be done for free by taking advantage
of the 32-bit defining instruction zeroing the upper 32-bits of the X
register destination. This enables better instruction selection in a
few cases, such as:
sub x0, xzr, x8
instead of:
mov x8, xzr
sub x0, x8, w9, uxtw
madd x0, x1, x1, x8
instead of:
mul x9, x1, x1
add x0, x9, w8, uxtw
cmp x2, x8
instead of:
sub x8, x2, w8, uxtw
cmp x8, #0
add x0, x8, x1, lsl #3
instead of:
lsl x9, x1, #3
add x0, x9, w8, uxtw
Reviewers: t.p.northover, jmolloy
Subscribers: mcrosier, aemerson, llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D24747
llvm-svn: 282413
2016-09-26 23:34:47 +08:00
|
|
|
%m = and i32 %x, 3
|
|
|
|
%ext = zext i32 %m to i64
|
|
|
|
; CHECK-NEXT: neg x0, x[[TMP]]
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: negs x0, x[[TMP]]
|
[AArch64] Improve add/sub/cmp isel of uxtw forms.
Don't match the UXTW extended reg forms of ADD/ADDS/SUB/SUBS if the
32-bit to 64-bit zero-extend can be done for free by taking advantage
of the 32-bit defining instruction zeroing the upper 32-bits of the X
register destination. This enables better instruction selection in a
few cases, such as:
sub x0, xzr, x8
instead of:
mov x8, xzr
sub x0, x8, w9, uxtw
madd x0, x1, x1, x8
instead of:
mul x9, x1, x1
add x0, x9, w8, uxtw
cmp x2, x8
instead of:
sub x8, x2, w8, uxtw
cmp x8, #0
add x0, x8, x1, lsl #3
instead of:
lsl x9, x1, #3
add x0, x9, w8, uxtw
Reviewers: t.p.northover, jmolloy
Subscribers: mcrosier, aemerson, llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D24747
llvm-svn: 282413
2016-09-26 23:34:47 +08:00
|
|
|
%ret = sub i64 0, %ext
|
|
|
|
ret i64 %ret
|
|
|
|
}
|
|
|
|
|
|
|
|
; Check that implicit zext from w reg write is used instead of uxtw form of subs/cmp.
|
|
|
|
define i1 @cmp_fold_uxtw(i32 %x, i64 %y) {
|
|
|
|
; CHECK-LABEL: cmp_fold_uxtw:
|
|
|
|
entry:
|
|
|
|
; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
|
|
|
|
%m = and i32 %x, 3
|
|
|
|
%ext = zext i32 %m to i64
|
|
|
|
; CHECK-NEXT: cmp x1, x[[TMP]]
|
|
|
|
; CHECK-NEXT: cset
|
|
|
|
%ret = icmp eq i64 %y, %ext
|
|
|
|
ret i1 %ret
|
|
|
|
}
|
|
|
|
|
|
|
|
; Check that implicit zext from w reg write is used instead of uxtw
|
|
|
|
; form of add, leading to madd selection.
|
|
|
|
define i64 @madd_fold_uxtw(i32 %x, i64 %y) {
|
|
|
|
; CHECK-LABEL: madd_fold_uxtw:
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL-LABEL: madd_fold_uxtw:
|
[AArch64] Improve add/sub/cmp isel of uxtw forms.
Don't match the UXTW extended reg forms of ADD/ADDS/SUB/SUBS if the
32-bit to 64-bit zero-extend can be done for free by taking advantage
of the 32-bit defining instruction zeroing the upper 32-bits of the X
register destination. This enables better instruction selection in a
few cases, such as:
sub x0, xzr, x8
instead of:
mov x8, xzr
sub x0, x8, w9, uxtw
madd x0, x1, x1, x8
instead of:
mul x9, x1, x1
add x0, x9, w8, uxtw
cmp x2, x8
instead of:
sub x8, x2, w8, uxtw
cmp x8, #0
add x0, x8, x1, lsl #3
instead of:
lsl x9, x1, #3
add x0, x9, w8, uxtw
Reviewers: t.p.northover, jmolloy
Subscribers: mcrosier, aemerson, llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D24747
llvm-svn: 282413
2016-09-26 23:34:47 +08:00
|
|
|
entry:
|
|
|
|
; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: and w[[TMP:[0-9]+]], w0, #0x3
|
[AArch64] Improve add/sub/cmp isel of uxtw forms.
Don't match the UXTW extended reg forms of ADD/ADDS/SUB/SUBS if the
32-bit to 64-bit zero-extend can be done for free by taking advantage
of the 32-bit defining instruction zeroing the upper 32-bits of the X
register destination. This enables better instruction selection in a
few cases, such as:
sub x0, xzr, x8
instead of:
mov x8, xzr
sub x0, x8, w9, uxtw
madd x0, x1, x1, x8
instead of:
mul x9, x1, x1
add x0, x9, w8, uxtw
cmp x2, x8
instead of:
sub x8, x2, w8, uxtw
cmp x8, #0
add x0, x8, x1, lsl #3
instead of:
lsl x9, x1, #3
add x0, x9, w8, uxtw
Reviewers: t.p.northover, jmolloy
Subscribers: mcrosier, aemerson, llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D24747
llvm-svn: 282413
2016-09-26 23:34:47 +08:00
|
|
|
%m = and i32 %x, 3
|
|
|
|
%ext = zext i32 %m to i64
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: madd x0, x1, x1, x[[TMP]]
|
[AArch64] Improve add/sub/cmp isel of uxtw forms.
Don't match the UXTW extended reg forms of ADD/ADDS/SUB/SUBS if the
32-bit to 64-bit zero-extend can be done for free by taking advantage
of the 32-bit defining instruction zeroing the upper 32-bits of the X
register destination. This enables better instruction selection in a
few cases, such as:
sub x0, xzr, x8
instead of:
mov x8, xzr
sub x0, x8, w9, uxtw
madd x0, x1, x1, x8
instead of:
mul x9, x1, x1
add x0, x9, w8, uxtw
cmp x2, x8
instead of:
sub x8, x2, w8, uxtw
cmp x8, #0
add x0, x8, x1, lsl #3
instead of:
lsl x9, x1, #3
add x0, x9, w8, uxtw
Reviewers: t.p.northover, jmolloy
Subscribers: mcrosier, aemerson, llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D24747
llvm-svn: 282413
2016-09-26 23:34:47 +08:00
|
|
|
; CHECK-NEXT: madd x0, x1, x1, x[[TMP]]
|
|
|
|
%mul = mul i64 %y, %y
|
|
|
|
%ret = add i64 %mul, %ext
|
|
|
|
ret i64 %ret
|
|
|
|
}
|
|
|
|
|
|
|
|
; Check that implicit zext from w reg write is used instead of uxtw
|
|
|
|
; form of sub, leading to sub/cmp folding.
|
|
|
|
; Check that implicit zext from w reg write is used instead of uxtw form of subs/cmp.
|
|
|
|
define i1 @cmp_sub_fold_uxtw(i32 %x, i64 %y, i64 %z) {
|
|
|
|
; CHECK-LABEL: cmp_sub_fold_uxtw:
|
|
|
|
entry:
|
|
|
|
; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
|
|
|
|
%m = and i32 %x, 3
|
|
|
|
%ext = zext i32 %m to i64
|
|
|
|
; CHECK-NEXT: cmp x[[TMP2:[0-9]+]], x[[TMP]]
|
|
|
|
; CHECK-NEXT: cset
|
|
|
|
%sub = sub i64 %z, %ext
|
|
|
|
%ret = icmp eq i64 %sub, 0
|
|
|
|
ret i1 %ret
|
|
|
|
}
|
|
|
|
|
|
|
|
; Check that implicit zext from w reg write is used instead of uxtw
|
|
|
|
; form of add and add of -1 gets selected as sub.
|
|
|
|
define i64 @add_imm_fold_uxtw(i32 %x) {
|
|
|
|
; CHECK-LABEL: add_imm_fold_uxtw:
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL-LABEL: add_imm_fold_uxtw:
|
[AArch64] Improve add/sub/cmp isel of uxtw forms.
Don't match the UXTW extended reg forms of ADD/ADDS/SUB/SUBS if the
32-bit to 64-bit zero-extend can be done for free by taking advantage
of the 32-bit defining instruction zeroing the upper 32-bits of the X
register destination. This enables better instruction selection in a
few cases, such as:
sub x0, xzr, x8
instead of:
mov x8, xzr
sub x0, x8, w9, uxtw
madd x0, x1, x1, x8
instead of:
mul x9, x1, x1
add x0, x9, w8, uxtw
cmp x2, x8
instead of:
sub x8, x2, w8, uxtw
cmp x8, #0
add x0, x8, x1, lsl #3
instead of:
lsl x9, x1, #3
add x0, x9, w8, uxtw
Reviewers: t.p.northover, jmolloy
Subscribers: mcrosier, aemerson, llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D24747
llvm-svn: 282413
2016-09-26 23:34:47 +08:00
|
|
|
entry:
|
|
|
|
; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: and w[[TMP:[0-9]+]], w0, #0x3
|
[AArch64] Improve add/sub/cmp isel of uxtw forms.
Don't match the UXTW extended reg forms of ADD/ADDS/SUB/SUBS if the
32-bit to 64-bit zero-extend can be done for free by taking advantage
of the 32-bit defining instruction zeroing the upper 32-bits of the X
register destination. This enables better instruction selection in a
few cases, such as:
sub x0, xzr, x8
instead of:
mov x8, xzr
sub x0, x8, w9, uxtw
madd x0, x1, x1, x8
instead of:
mul x9, x1, x1
add x0, x9, w8, uxtw
cmp x2, x8
instead of:
sub x8, x2, w8, uxtw
cmp x8, #0
add x0, x8, x1, lsl #3
instead of:
lsl x9, x1, #3
add x0, x9, w8, uxtw
Reviewers: t.p.northover, jmolloy
Subscribers: mcrosier, aemerson, llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D24747
llvm-svn: 282413
2016-09-26 23:34:47 +08:00
|
|
|
%m = and i32 %x, 3
|
|
|
|
%ext = zext i32 %m to i64
|
|
|
|
; CHECK-NEXT: sub x0, x[[TMP]], #1
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: subs x0, x[[TMP]], #1
|
[AArch64] Improve add/sub/cmp isel of uxtw forms.
Don't match the UXTW extended reg forms of ADD/ADDS/SUB/SUBS if the
32-bit to 64-bit zero-extend can be done for free by taking advantage
of the 32-bit defining instruction zeroing the upper 32-bits of the X
register destination. This enables better instruction selection in a
few cases, such as:
sub x0, xzr, x8
instead of:
mov x8, xzr
sub x0, x8, w9, uxtw
madd x0, x1, x1, x8
instead of:
mul x9, x1, x1
add x0, x9, w8, uxtw
cmp x2, x8
instead of:
sub x8, x2, w8, uxtw
cmp x8, #0
add x0, x8, x1, lsl #3
instead of:
lsl x9, x1, #3
add x0, x9, w8, uxtw
Reviewers: t.p.northover, jmolloy
Subscribers: mcrosier, aemerson, llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D24747
llvm-svn: 282413
2016-09-26 23:34:47 +08:00
|
|
|
%ret = add i64 %ext, -1
|
|
|
|
ret i64 %ret
|
|
|
|
}
|
|
|
|
|
|
|
|
; Check that implicit zext from w reg write is used instead of uxtw
|
|
|
|
; form of add and add lsl form gets selected.
|
|
|
|
define i64 @add_lsl_fold_uxtw(i32 %x, i64 %y) {
|
|
|
|
; CHECK-LABEL: add_lsl_fold_uxtw:
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL-LABEL: add_lsl_fold_uxtw:
|
[AArch64] Improve add/sub/cmp isel of uxtw forms.
Don't match the UXTW extended reg forms of ADD/ADDS/SUB/SUBS if the
32-bit to 64-bit zero-extend can be done for free by taking advantage
of the 32-bit defining instruction zeroing the upper 32-bits of the X
register destination. This enables better instruction selection in a
few cases, such as:
sub x0, xzr, x8
instead of:
mov x8, xzr
sub x0, x8, w9, uxtw
madd x0, x1, x1, x8
instead of:
mul x9, x1, x1
add x0, x9, w8, uxtw
cmp x2, x8
instead of:
sub x8, x2, w8, uxtw
cmp x8, #0
add x0, x8, x1, lsl #3
instead of:
lsl x9, x1, #3
add x0, x9, w8, uxtw
Reviewers: t.p.northover, jmolloy
Subscribers: mcrosier, aemerson, llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D24747
llvm-svn: 282413
2016-09-26 23:34:47 +08:00
|
|
|
entry:
|
|
|
|
; CHECK: orr w[[TMP:[0-9]+]], w0, #0x3
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: orr w[[TMP:[0-9]+]], w0, #0x3
|
[AArch64] Improve add/sub/cmp isel of uxtw forms.
Don't match the UXTW extended reg forms of ADD/ADDS/SUB/SUBS if the
32-bit to 64-bit zero-extend can be done for free by taking advantage
of the 32-bit defining instruction zeroing the upper 32-bits of the X
register destination. This enables better instruction selection in a
few cases, such as:
sub x0, xzr, x8
instead of:
mov x8, xzr
sub x0, x8, w9, uxtw
madd x0, x1, x1, x8
instead of:
mul x9, x1, x1
add x0, x9, w8, uxtw
cmp x2, x8
instead of:
sub x8, x2, w8, uxtw
cmp x8, #0
add x0, x8, x1, lsl #3
instead of:
lsl x9, x1, #3
add x0, x9, w8, uxtw
Reviewers: t.p.northover, jmolloy
Subscribers: mcrosier, aemerson, llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D24747
llvm-svn: 282413
2016-09-26 23:34:47 +08:00
|
|
|
%m = or i32 %x, 3
|
|
|
|
%ext = zext i32 %m to i64
|
|
|
|
%shift = shl i64 %y, 3
|
|
|
|
; CHECK-NEXT: add x0, x[[TMP]], x1, lsl #3
|
2019-08-30 05:53:58 +08:00
|
|
|
; GISEL: add x0, x[[TMP]], x1, lsl #3
|
[AArch64] Improve add/sub/cmp isel of uxtw forms.
Don't match the UXTW extended reg forms of ADD/ADDS/SUB/SUBS if the
32-bit to 64-bit zero-extend can be done for free by taking advantage
of the 32-bit defining instruction zeroing the upper 32-bits of the X
register destination. This enables better instruction selection in a
few cases, such as:
sub x0, xzr, x8
instead of:
mov x8, xzr
sub x0, x8, w9, uxtw
madd x0, x1, x1, x8
instead of:
mul x9, x1, x1
add x0, x9, w8, uxtw
cmp x2, x8
instead of:
sub x8, x2, w8, uxtw
cmp x8, #0
add x0, x8, x1, lsl #3
instead of:
lsl x9, x1, #3
add x0, x9, w8, uxtw
Reviewers: t.p.northover, jmolloy
Subscribers: mcrosier, aemerson, llvm-commits, rengolin
Differential Revision: https://reviews.llvm.org/D24747
llvm-svn: 282413
2016-09-26 23:34:47 +08:00
|
|
|
%ret = add i64 %ext, %shift
|
|
|
|
ret i64 %ret
|
|
|
|
}
|