[RISCV] Add more test case for absolute value. NFC

This adds tests for i8 through i128 with intrinsic and select forms.

Covering rv32 and rv64 with the base ISA, Zbb, and Zbt. Some
Zbb tests already covered part of this, but not all.

FIXMEs have been added for some obviously suboptimal codegen.
This commit is contained in:
Craig Topper 2022-03-01 11:50:43 -08:00
parent a494ae43be
commit 626ecef1fc
1 changed files with 762 additions and 0 deletions

View File

@ -0,0 +1,762 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefix=RV32I
; RUN: llc -mtriple=riscv32 -mattr=+zbb -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefix=RV32ZBB
; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbt -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefix=RV32ZBT
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefix=RV64I
; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefix=RV64ZBB
; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbt -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefix=RV64ZBT
declare i8 @llvm.abs.i8(i8, i1 immarg)
declare i16 @llvm.abs.i16(i16, i1 immarg)
declare i32 @llvm.abs.i32(i32, i1 immarg)
declare i64 @llvm.abs.i64(i64, i1 immarg)
declare i128 @llvm.abs.i128(i128, i1 immarg)
; FIXME: Sign extending the input to the input to the xor isn't needed and
; causes an extra srai.
define i8 @abs8(i8 %x) {
; RV32I-LABEL: abs8:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: srai a1, a0, 24
; RV32I-NEXT: srai a0, a0, 31
; RV32I-NEXT: xor a1, a1, a0
; RV32I-NEXT: sub a0, a1, a0
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: abs8:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: sext.b a0, a0
; RV32ZBB-NEXT: neg a1, a0
; RV32ZBB-NEXT: max a0, a0, a1
; RV32ZBB-NEXT: ret
;
; RV32ZBT-LABEL: abs8:
; RV32ZBT: # %bb.0:
; RV32ZBT-NEXT: slli a0, a0, 24
; RV32ZBT-NEXT: srai a1, a0, 24
; RV32ZBT-NEXT: srai a0, a0, 31
; RV32ZBT-NEXT: xor a1, a1, a0
; RV32ZBT-NEXT: sub a0, a1, a0
; RV32ZBT-NEXT: ret
;
; RV64I-LABEL: abs8:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 56
; RV64I-NEXT: srai a1, a0, 56
; RV64I-NEXT: srai a0, a0, 63
; RV64I-NEXT: xor a1, a1, a0
; RV64I-NEXT: sub a0, a1, a0
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: abs8:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: sext.b a0, a0
; RV64ZBB-NEXT: neg a1, a0
; RV64ZBB-NEXT: max a0, a0, a1
; RV64ZBB-NEXT: ret
;
; RV64ZBT-LABEL: abs8:
; RV64ZBT: # %bb.0:
; RV64ZBT-NEXT: slli a0, a0, 56
; RV64ZBT-NEXT: srai a1, a0, 56
; RV64ZBT-NEXT: srai a0, a0, 63
; RV64ZBT-NEXT: xor a1, a1, a0
; RV64ZBT-NEXT: sub a0, a1, a0
; RV64ZBT-NEXT: ret
%abs = tail call i8 @llvm.abs.i8(i8 %x, i1 true)
ret i8 %abs
}
; FIXME: Sign extending the input to the input to the xor isn't needed and
; causes an extra srai.
define i8 @select_abs8(i8 %x) {
; RV32I-LABEL: select_abs8:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: srai a1, a0, 24
; RV32I-NEXT: srai a0, a0, 31
; RV32I-NEXT: xor a1, a1, a0
; RV32I-NEXT: sub a0, a1, a0
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: select_abs8:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: sext.b a0, a0
; RV32ZBB-NEXT: neg a1, a0
; RV32ZBB-NEXT: max a0, a0, a1
; RV32ZBB-NEXT: ret
;
; RV32ZBT-LABEL: select_abs8:
; RV32ZBT: # %bb.0:
; RV32ZBT-NEXT: slli a0, a0, 24
; RV32ZBT-NEXT: srai a1, a0, 24
; RV32ZBT-NEXT: srai a0, a0, 31
; RV32ZBT-NEXT: xor a1, a1, a0
; RV32ZBT-NEXT: sub a0, a1, a0
; RV32ZBT-NEXT: ret
;
; RV64I-LABEL: select_abs8:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 56
; RV64I-NEXT: srai a1, a0, 56
; RV64I-NEXT: srai a0, a0, 63
; RV64I-NEXT: xor a1, a1, a0
; RV64I-NEXT: sub a0, a1, a0
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: select_abs8:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: sext.b a0, a0
; RV64ZBB-NEXT: neg a1, a0
; RV64ZBB-NEXT: max a0, a0, a1
; RV64ZBB-NEXT: ret
;
; RV64ZBT-LABEL: select_abs8:
; RV64ZBT: # %bb.0:
; RV64ZBT-NEXT: slli a0, a0, 56
; RV64ZBT-NEXT: srai a1, a0, 56
; RV64ZBT-NEXT: srai a0, a0, 63
; RV64ZBT-NEXT: xor a1, a1, a0
; RV64ZBT-NEXT: sub a0, a1, a0
; RV64ZBT-NEXT: ret
%1 = icmp slt i8 %x, 0
%2 = sub nsw i8 0, %x
%3 = select i1 %1, i8 %2, i8 %x
ret i8 %3
}
; FIXME: Sign extending the input to the input to the xor isn't needed and
; causes an extra srai.
define i16 @abs16(i16 %x) {
; RV32I-LABEL: abs16:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a0, a0, 16
; RV32I-NEXT: srai a1, a0, 16
; RV32I-NEXT: srai a0, a0, 31
; RV32I-NEXT: xor a1, a1, a0
; RV32I-NEXT: sub a0, a1, a0
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: abs16:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: sext.h a0, a0
; RV32ZBB-NEXT: neg a1, a0
; RV32ZBB-NEXT: max a0, a0, a1
; RV32ZBB-NEXT: ret
;
; RV32ZBT-LABEL: abs16:
; RV32ZBT: # %bb.0:
; RV32ZBT-NEXT: slli a0, a0, 16
; RV32ZBT-NEXT: srai a1, a0, 16
; RV32ZBT-NEXT: srai a0, a0, 31
; RV32ZBT-NEXT: xor a1, a1, a0
; RV32ZBT-NEXT: sub a0, a1, a0
; RV32ZBT-NEXT: ret
;
; RV64I-LABEL: abs16:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srai a1, a0, 48
; RV64I-NEXT: srai a0, a0, 63
; RV64I-NEXT: xor a1, a1, a0
; RV64I-NEXT: sub a0, a1, a0
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: abs16:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: sext.h a0, a0
; RV64ZBB-NEXT: neg a1, a0
; RV64ZBB-NEXT: max a0, a0, a1
; RV64ZBB-NEXT: ret
;
; RV64ZBT-LABEL: abs16:
; RV64ZBT: # %bb.0:
; RV64ZBT-NEXT: slli a0, a0, 48
; RV64ZBT-NEXT: srai a1, a0, 48
; RV64ZBT-NEXT: srai a0, a0, 63
; RV64ZBT-NEXT: xor a1, a1, a0
; RV64ZBT-NEXT: sub a0, a1, a0
; RV64ZBT-NEXT: ret
%abs = tail call i16 @llvm.abs.i16(i16 %x, i1 true)
ret i16 %abs
}
; FIXME: Sign extending the input to the input to the xor isn't needed and
; causes an extra srai.
define i16 @select_abs16(i16 %x) {
; RV32I-LABEL: select_abs16:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a0, a0, 16
; RV32I-NEXT: srai a1, a0, 16
; RV32I-NEXT: srai a0, a0, 31
; RV32I-NEXT: xor a1, a1, a0
; RV32I-NEXT: sub a0, a1, a0
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: select_abs16:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: sext.h a0, a0
; RV32ZBB-NEXT: neg a1, a0
; RV32ZBB-NEXT: max a0, a0, a1
; RV32ZBB-NEXT: ret
;
; RV32ZBT-LABEL: select_abs16:
; RV32ZBT: # %bb.0:
; RV32ZBT-NEXT: slli a0, a0, 16
; RV32ZBT-NEXT: srai a1, a0, 16
; RV32ZBT-NEXT: srai a0, a0, 31
; RV32ZBT-NEXT: xor a1, a1, a0
; RV32ZBT-NEXT: sub a0, a1, a0
; RV32ZBT-NEXT: ret
;
; RV64I-LABEL: select_abs16:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srai a1, a0, 48
; RV64I-NEXT: srai a0, a0, 63
; RV64I-NEXT: xor a1, a1, a0
; RV64I-NEXT: sub a0, a1, a0
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: select_abs16:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: sext.h a0, a0
; RV64ZBB-NEXT: neg a1, a0
; RV64ZBB-NEXT: max a0, a0, a1
; RV64ZBB-NEXT: ret
;
; RV64ZBT-LABEL: select_abs16:
; RV64ZBT: # %bb.0:
; RV64ZBT-NEXT: slli a0, a0, 48
; RV64ZBT-NEXT: srai a1, a0, 48
; RV64ZBT-NEXT: srai a0, a0, 63
; RV64ZBT-NEXT: xor a1, a1, a0
; RV64ZBT-NEXT: sub a0, a1, a0
; RV64ZBT-NEXT: ret
%1 = icmp slt i16 %x, 0
%2 = sub nsw i16 0, %x
%3 = select i1 %1, i16 %2, i16 %x
ret i16 %3
}
define i32 @abs32(i32 %x) {
; RV32I-LABEL: abs32:
; RV32I: # %bb.0:
; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: xor a0, a0, a1
; RV32I-NEXT: sub a0, a0, a1
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: abs32:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: neg a1, a0
; RV32ZBB-NEXT: max a0, a0, a1
; RV32ZBB-NEXT: ret
;
; RV32ZBT-LABEL: abs32:
; RV32ZBT: # %bb.0:
; RV32ZBT-NEXT: srai a1, a0, 31
; RV32ZBT-NEXT: xor a0, a0, a1
; RV32ZBT-NEXT: sub a0, a0, a1
; RV32ZBT-NEXT: ret
;
; RV64I-LABEL: abs32:
; RV64I: # %bb.0:
; RV64I-NEXT: sraiw a1, a0, 31
; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: abs32:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: sext.w a0, a0
; RV64ZBB-NEXT: neg a1, a0
; RV64ZBB-NEXT: max a0, a0, a1
; RV64ZBB-NEXT: ret
;
; RV64ZBT-LABEL: abs32:
; RV64ZBT: # %bb.0:
; RV64ZBT-NEXT: sraiw a1, a0, 31
; RV64ZBT-NEXT: xor a0, a0, a1
; RV64ZBT-NEXT: subw a0, a0, a1
; RV64ZBT-NEXT: ret
%abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true)
ret i32 %abs
}
define i32 @select_abs32(i32 %x) {
; RV32I-LABEL: select_abs32:
; RV32I: # %bb.0:
; RV32I-NEXT: srai a1, a0, 31
; RV32I-NEXT: xor a0, a0, a1
; RV32I-NEXT: sub a0, a0, a1
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: select_abs32:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: neg a1, a0
; RV32ZBB-NEXT: max a0, a0, a1
; RV32ZBB-NEXT: ret
;
; RV32ZBT-LABEL: select_abs32:
; RV32ZBT: # %bb.0:
; RV32ZBT-NEXT: srai a1, a0, 31
; RV32ZBT-NEXT: xor a0, a0, a1
; RV32ZBT-NEXT: sub a0, a0, a1
; RV32ZBT-NEXT: ret
;
; RV64I-LABEL: select_abs32:
; RV64I: # %bb.0:
; RV64I-NEXT: sraiw a1, a0, 31
; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: select_abs32:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: sext.w a0, a0
; RV64ZBB-NEXT: neg a1, a0
; RV64ZBB-NEXT: max a0, a0, a1
; RV64ZBB-NEXT: ret
;
; RV64ZBT-LABEL: select_abs32:
; RV64ZBT: # %bb.0:
; RV64ZBT-NEXT: sraiw a1, a0, 31
; RV64ZBT-NEXT: xor a0, a0, a1
; RV64ZBT-NEXT: subw a0, a0, a1
; RV64ZBT-NEXT: ret
%1 = icmp slt i32 %x, 0
%2 = sub nsw i32 0, %x
%3 = select i1 %1, i32 %2, i32 %x
ret i32 %3
}
define i64 @abs64(i64 %x) {
; RV32I-LABEL: abs64:
; RV32I: # %bb.0:
; RV32I-NEXT: srai a2, a1, 31
; RV32I-NEXT: xor a0, a0, a2
; RV32I-NEXT: sltu a3, a2, a0
; RV32I-NEXT: xor a1, a1, a2
; RV32I-NEXT: sub a1, a2, a1
; RV32I-NEXT: sub a1, a1, a3
; RV32I-NEXT: sub a0, a2, a0
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: abs64:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: srai a2, a1, 31
; RV32ZBB-NEXT: xor a0, a0, a2
; RV32ZBB-NEXT: sltu a3, a2, a0
; RV32ZBB-NEXT: xor a1, a1, a2
; RV32ZBB-NEXT: sub a1, a2, a1
; RV32ZBB-NEXT: sub a1, a1, a3
; RV32ZBB-NEXT: sub a0, a2, a0
; RV32ZBB-NEXT: ret
;
; RV32ZBT-LABEL: abs64:
; RV32ZBT: # %bb.0:
; RV32ZBT-NEXT: srai a2, a1, 31
; RV32ZBT-NEXT: xor a0, a0, a2
; RV32ZBT-NEXT: sltu a3, a2, a0
; RV32ZBT-NEXT: xor a1, a1, a2
; RV32ZBT-NEXT: sub a1, a2, a1
; RV32ZBT-NEXT: sub a1, a1, a3
; RV32ZBT-NEXT: sub a0, a2, a0
; RV32ZBT-NEXT: ret
;
; RV64I-LABEL: abs64:
; RV64I: # %bb.0:
; RV64I-NEXT: srai a1, a0, 63
; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: sub a0, a1, a0
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: abs64:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: neg a1, a0
; RV64ZBB-NEXT: min a0, a0, a1
; RV64ZBB-NEXT: ret
;
; RV64ZBT-LABEL: abs64:
; RV64ZBT: # %bb.0:
; RV64ZBT-NEXT: srai a1, a0, 63
; RV64ZBT-NEXT: xor a0, a0, a1
; RV64ZBT-NEXT: sub a0, a1, a0
; RV64ZBT-NEXT: ret
%abs = tail call i64 @llvm.abs.i64(i64 %x, i1 true)
%neg = sub nsw i64 0, %abs
ret i64 %neg
}
define i64 @select_abs64(i64 %x) {
; RV32I-LABEL: select_abs64:
; RV32I: # %bb.0:
; RV32I-NEXT: bgez a1, .LBB7_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: snez a2, a0
; RV32I-NEXT: neg a0, a0
; RV32I-NEXT: add a1, a1, a2
; RV32I-NEXT: neg a1, a1
; RV32I-NEXT: .LBB7_2:
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: select_abs64:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: bgez a1, .LBB7_2
; RV32ZBB-NEXT: # %bb.1:
; RV32ZBB-NEXT: snez a2, a0
; RV32ZBB-NEXT: neg a0, a0
; RV32ZBB-NEXT: add a1, a1, a2
; RV32ZBB-NEXT: neg a1, a1
; RV32ZBB-NEXT: .LBB7_2:
; RV32ZBB-NEXT: ret
;
; RV32ZBT-LABEL: select_abs64:
; RV32ZBT: # %bb.0:
; RV32ZBT-NEXT: neg a2, a0
; RV32ZBT-NEXT: slti a3, a1, 0
; RV32ZBT-NEXT: cmov a2, a3, a2, a0
; RV32ZBT-NEXT: snez a0, a0
; RV32ZBT-NEXT: add a0, a1, a0
; RV32ZBT-NEXT: neg a0, a0
; RV32ZBT-NEXT: cmov a1, a3, a0, a1
; RV32ZBT-NEXT: mv a0, a2
; RV32ZBT-NEXT: ret
;
; RV64I-LABEL: select_abs64:
; RV64I: # %bb.0:
; RV64I-NEXT: srai a1, a0, 63
; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: select_abs64:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: neg a1, a0
; RV64ZBB-NEXT: max a0, a0, a1
; RV64ZBB-NEXT: ret
;
; RV64ZBT-LABEL: select_abs64:
; RV64ZBT: # %bb.0:
; RV64ZBT-NEXT: srai a1, a0, 63
; RV64ZBT-NEXT: xor a0, a0, a1
; RV64ZBT-NEXT: sub a0, a0, a1
; RV64ZBT-NEXT: ret
%1 = icmp slt i64 %x, 0
%2 = sub nsw i64 0, %x
%3 = select i1 %1, i64 %2, i64 %x
ret i64 %3
}
define i128 @abs128(i128 %x) {
; RV32I-LABEL: abs128:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a3, 0(a1)
; RV32I-NEXT: lw a2, 4(a1)
; RV32I-NEXT: lw a4, 12(a1)
; RV32I-NEXT: snez a5, a3
; RV32I-NEXT: mv a6, a5
; RV32I-NEXT: bnez a2, .LBB8_5
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: li a7, 0
; RV32I-NEXT: bnez a4, .LBB8_6
; RV32I-NEXT: .LBB8_2:
; RV32I-NEXT: lw a1, 8(a1)
; RV32I-NEXT: beqz a7, .LBB8_4
; RV32I-NEXT: .LBB8_3:
; RV32I-NEXT: neg a7, a1
; RV32I-NEXT: snez a1, a1
; RV32I-NEXT: sltu t0, a7, a6
; RV32I-NEXT: add a1, a4, a1
; RV32I-NEXT: add a1, a1, t0
; RV32I-NEXT: neg a4, a1
; RV32I-NEXT: sub a1, a7, a6
; RV32I-NEXT: add a2, a2, a5
; RV32I-NEXT: neg a2, a2
; RV32I-NEXT: neg a3, a3
; RV32I-NEXT: .LBB8_4:
; RV32I-NEXT: sw a3, 0(a0)
; RV32I-NEXT: sw a1, 8(a0)
; RV32I-NEXT: sw a2, 4(a0)
; RV32I-NEXT: sw a4, 12(a0)
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB8_5:
; RV32I-NEXT: snez a6, a2
; RV32I-NEXT: li a7, 0
; RV32I-NEXT: beqz a4, .LBB8_2
; RV32I-NEXT: .LBB8_6:
; RV32I-NEXT: slti a7, a4, 0
; RV32I-NEXT: lw a1, 8(a1)
; RV32I-NEXT: bnez a7, .LBB8_3
; RV32I-NEXT: j .LBB8_4
;
; RV32ZBB-LABEL: abs128:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: lw a3, 0(a1)
; RV32ZBB-NEXT: lw a2, 4(a1)
; RV32ZBB-NEXT: lw a4, 12(a1)
; RV32ZBB-NEXT: snez a5, a3
; RV32ZBB-NEXT: mv a6, a5
; RV32ZBB-NEXT: bnez a2, .LBB8_5
; RV32ZBB-NEXT: # %bb.1:
; RV32ZBB-NEXT: li a7, 0
; RV32ZBB-NEXT: bnez a4, .LBB8_6
; RV32ZBB-NEXT: .LBB8_2:
; RV32ZBB-NEXT: lw a1, 8(a1)
; RV32ZBB-NEXT: beqz a7, .LBB8_4
; RV32ZBB-NEXT: .LBB8_3:
; RV32ZBB-NEXT: neg a7, a1
; RV32ZBB-NEXT: snez a1, a1
; RV32ZBB-NEXT: sltu t0, a7, a6
; RV32ZBB-NEXT: add a1, a4, a1
; RV32ZBB-NEXT: add a1, a1, t0
; RV32ZBB-NEXT: neg a4, a1
; RV32ZBB-NEXT: sub a1, a7, a6
; RV32ZBB-NEXT: add a2, a2, a5
; RV32ZBB-NEXT: neg a2, a2
; RV32ZBB-NEXT: neg a3, a3
; RV32ZBB-NEXT: .LBB8_4:
; RV32ZBB-NEXT: sw a3, 0(a0)
; RV32ZBB-NEXT: sw a1, 8(a0)
; RV32ZBB-NEXT: sw a2, 4(a0)
; RV32ZBB-NEXT: sw a4, 12(a0)
; RV32ZBB-NEXT: ret
; RV32ZBB-NEXT: .LBB8_5:
; RV32ZBB-NEXT: snez a6, a2
; RV32ZBB-NEXT: li a7, 0
; RV32ZBB-NEXT: beqz a4, .LBB8_2
; RV32ZBB-NEXT: .LBB8_6:
; RV32ZBB-NEXT: slti a7, a4, 0
; RV32ZBB-NEXT: lw a1, 8(a1)
; RV32ZBB-NEXT: bnez a7, .LBB8_3
; RV32ZBB-NEXT: j .LBB8_4
;
; RV32ZBT-LABEL: abs128:
; RV32ZBT: # %bb.0:
; RV32ZBT-NEXT: lw a2, 12(a1)
; RV32ZBT-NEXT: lw a3, 8(a1)
; RV32ZBT-NEXT: lw a4, 0(a1)
; RV32ZBT-NEXT: lw a1, 4(a1)
; RV32ZBT-NEXT: slti a5, a2, 0
; RV32ZBT-NEXT: cmov a5, a2, a5, zero
; RV32ZBT-NEXT: snez a6, a4
; RV32ZBT-NEXT: snez a7, a1
; RV32ZBT-NEXT: cmov a7, a1, a7, a6
; RV32ZBT-NEXT: neg t0, a3
; RV32ZBT-NEXT: sltu t1, t0, a7
; RV32ZBT-NEXT: snez t2, a3
; RV32ZBT-NEXT: add t2, a2, t2
; RV32ZBT-NEXT: add t1, t2, t1
; RV32ZBT-NEXT: neg t1, t1
; RV32ZBT-NEXT: cmov a2, a5, t1, a2
; RV32ZBT-NEXT: sub a7, t0, a7
; RV32ZBT-NEXT: cmov a3, a5, a7, a3
; RV32ZBT-NEXT: add a6, a1, a6
; RV32ZBT-NEXT: neg a6, a6
; RV32ZBT-NEXT: cmov a1, a5, a6, a1
; RV32ZBT-NEXT: neg a6, a4
; RV32ZBT-NEXT: cmov a4, a5, a6, a4
; RV32ZBT-NEXT: sw a4, 0(a0)
; RV32ZBT-NEXT: sw a3, 8(a0)
; RV32ZBT-NEXT: sw a1, 4(a0)
; RV32ZBT-NEXT: sw a2, 12(a0)
; RV32ZBT-NEXT: ret
;
; RV64I-LABEL: abs128:
; RV64I: # %bb.0:
; RV64I-NEXT: bgez a1, .LBB8_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: snez a2, a0
; RV64I-NEXT: neg a0, a0
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: neg a1, a1
; RV64I-NEXT: .LBB8_2:
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: abs128:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: bgez a1, .LBB8_2
; RV64ZBB-NEXT: # %bb.1:
; RV64ZBB-NEXT: snez a2, a0
; RV64ZBB-NEXT: neg a0, a0
; RV64ZBB-NEXT: add a1, a1, a2
; RV64ZBB-NEXT: neg a1, a1
; RV64ZBB-NEXT: .LBB8_2:
; RV64ZBB-NEXT: ret
;
; RV64ZBT-LABEL: abs128:
; RV64ZBT: # %bb.0:
; RV64ZBT-NEXT: neg a2, a0
; RV64ZBT-NEXT: slti a3, a1, 0
; RV64ZBT-NEXT: cmov a2, a3, a2, a0
; RV64ZBT-NEXT: snez a0, a0
; RV64ZBT-NEXT: add a0, a1, a0
; RV64ZBT-NEXT: neg a0, a0
; RV64ZBT-NEXT: cmov a1, a3, a0, a1
; RV64ZBT-NEXT: mv a0, a2
; RV64ZBT-NEXT: ret
%abs = tail call i128 @llvm.abs.i128(i128 %x, i1 true)
ret i128 %abs
}
define i128 @select_abs128(i128 %x) {
; RV32I-LABEL: select_abs128:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a3, 0(a1)
; RV32I-NEXT: lw a2, 4(a1)
; RV32I-NEXT: lw a4, 12(a1)
; RV32I-NEXT: snez a5, a3
; RV32I-NEXT: mv a6, a5
; RV32I-NEXT: bnez a2, .LBB9_5
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: li a7, 0
; RV32I-NEXT: bnez a4, .LBB9_6
; RV32I-NEXT: .LBB9_2:
; RV32I-NEXT: lw a1, 8(a1)
; RV32I-NEXT: beqz a7, .LBB9_4
; RV32I-NEXT: .LBB9_3:
; RV32I-NEXT: neg a7, a1
; RV32I-NEXT: snez a1, a1
; RV32I-NEXT: sltu t0, a7, a6
; RV32I-NEXT: add a1, a4, a1
; RV32I-NEXT: add a1, a1, t0
; RV32I-NEXT: neg a4, a1
; RV32I-NEXT: sub a1, a7, a6
; RV32I-NEXT: add a2, a2, a5
; RV32I-NEXT: neg a2, a2
; RV32I-NEXT: neg a3, a3
; RV32I-NEXT: .LBB9_4:
; RV32I-NEXT: sw a3, 0(a0)
; RV32I-NEXT: sw a1, 8(a0)
; RV32I-NEXT: sw a2, 4(a0)
; RV32I-NEXT: sw a4, 12(a0)
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB9_5:
; RV32I-NEXT: snez a6, a2
; RV32I-NEXT: li a7, 0
; RV32I-NEXT: beqz a4, .LBB9_2
; RV32I-NEXT: .LBB9_6:
; RV32I-NEXT: slti a7, a4, 0
; RV32I-NEXT: lw a1, 8(a1)
; RV32I-NEXT: bnez a7, .LBB9_3
; RV32I-NEXT: j .LBB9_4
;
; RV32ZBB-LABEL: select_abs128:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: lw a3, 0(a1)
; RV32ZBB-NEXT: lw a2, 4(a1)
; RV32ZBB-NEXT: lw a4, 12(a1)
; RV32ZBB-NEXT: snez a5, a3
; RV32ZBB-NEXT: mv a6, a5
; RV32ZBB-NEXT: bnez a2, .LBB9_5
; RV32ZBB-NEXT: # %bb.1:
; RV32ZBB-NEXT: li a7, 0
; RV32ZBB-NEXT: bnez a4, .LBB9_6
; RV32ZBB-NEXT: .LBB9_2:
; RV32ZBB-NEXT: lw a1, 8(a1)
; RV32ZBB-NEXT: beqz a7, .LBB9_4
; RV32ZBB-NEXT: .LBB9_3:
; RV32ZBB-NEXT: neg a7, a1
; RV32ZBB-NEXT: snez a1, a1
; RV32ZBB-NEXT: sltu t0, a7, a6
; RV32ZBB-NEXT: add a1, a4, a1
; RV32ZBB-NEXT: add a1, a1, t0
; RV32ZBB-NEXT: neg a4, a1
; RV32ZBB-NEXT: sub a1, a7, a6
; RV32ZBB-NEXT: add a2, a2, a5
; RV32ZBB-NEXT: neg a2, a2
; RV32ZBB-NEXT: neg a3, a3
; RV32ZBB-NEXT: .LBB9_4:
; RV32ZBB-NEXT: sw a3, 0(a0)
; RV32ZBB-NEXT: sw a1, 8(a0)
; RV32ZBB-NEXT: sw a2, 4(a0)
; RV32ZBB-NEXT: sw a4, 12(a0)
; RV32ZBB-NEXT: ret
; RV32ZBB-NEXT: .LBB9_5:
; RV32ZBB-NEXT: snez a6, a2
; RV32ZBB-NEXT: li a7, 0
; RV32ZBB-NEXT: beqz a4, .LBB9_2
; RV32ZBB-NEXT: .LBB9_6:
; RV32ZBB-NEXT: slti a7, a4, 0
; RV32ZBB-NEXT: lw a1, 8(a1)
; RV32ZBB-NEXT: bnez a7, .LBB9_3
; RV32ZBB-NEXT: j .LBB9_4
;
; RV32ZBT-LABEL: select_abs128:
; RV32ZBT: # %bb.0:
; RV32ZBT-NEXT: lw a2, 12(a1)
; RV32ZBT-NEXT: lw a3, 8(a1)
; RV32ZBT-NEXT: lw a4, 0(a1)
; RV32ZBT-NEXT: lw a1, 4(a1)
; RV32ZBT-NEXT: slti a5, a2, 0
; RV32ZBT-NEXT: cmov a5, a2, a5, zero
; RV32ZBT-NEXT: snez a6, a4
; RV32ZBT-NEXT: snez a7, a1
; RV32ZBT-NEXT: cmov a7, a1, a7, a6
; RV32ZBT-NEXT: neg t0, a3
; RV32ZBT-NEXT: sltu t1, t0, a7
; RV32ZBT-NEXT: snez t2, a3
; RV32ZBT-NEXT: add t2, a2, t2
; RV32ZBT-NEXT: add t1, t2, t1
; RV32ZBT-NEXT: neg t1, t1
; RV32ZBT-NEXT: cmov a2, a5, t1, a2
; RV32ZBT-NEXT: sub a7, t0, a7
; RV32ZBT-NEXT: cmov a3, a5, a7, a3
; RV32ZBT-NEXT: add a6, a1, a6
; RV32ZBT-NEXT: neg a6, a6
; RV32ZBT-NEXT: cmov a1, a5, a6, a1
; RV32ZBT-NEXT: neg a6, a4
; RV32ZBT-NEXT: cmov a4, a5, a6, a4
; RV32ZBT-NEXT: sw a4, 0(a0)
; RV32ZBT-NEXT: sw a3, 8(a0)
; RV32ZBT-NEXT: sw a1, 4(a0)
; RV32ZBT-NEXT: sw a2, 12(a0)
; RV32ZBT-NEXT: ret
;
; RV64I-LABEL: select_abs128:
; RV64I: # %bb.0:
; RV64I-NEXT: bgez a1, .LBB9_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: snez a2, a0
; RV64I-NEXT: neg a0, a0
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: neg a1, a1
; RV64I-NEXT: .LBB9_2:
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: select_abs128:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: bgez a1, .LBB9_2
; RV64ZBB-NEXT: # %bb.1:
; RV64ZBB-NEXT: snez a2, a0
; RV64ZBB-NEXT: neg a0, a0
; RV64ZBB-NEXT: add a1, a1, a2
; RV64ZBB-NEXT: neg a1, a1
; RV64ZBB-NEXT: .LBB9_2:
; RV64ZBB-NEXT: ret
;
; RV64ZBT-LABEL: select_abs128:
; RV64ZBT: # %bb.0:
; RV64ZBT-NEXT: neg a2, a0
; RV64ZBT-NEXT: slti a3, a1, 0
; RV64ZBT-NEXT: cmov a2, a3, a2, a0
; RV64ZBT-NEXT: snez a0, a0
; RV64ZBT-NEXT: add a0, a1, a0
; RV64ZBT-NEXT: neg a0, a0
; RV64ZBT-NEXT: cmov a1, a3, a0, a1
; RV64ZBT-NEXT: mv a0, a2
; RV64ZBT-NEXT: ret
%1 = icmp slt i128 %x, 0
%2 = sub nsw i128 0, %x
%3 = select i1 %1, i128 %2, i128 %x
ret i128 %3
}