forked from OSchip/llvm-project
[AMDGPU] Fix width check for signed mul24 generation.
This changes fixes a case in which the highest set bit of the original result is at bit 31 and sign-extending the mul24 for it would make the result negative. Differential Revision: https://reviews.llvm.org/D111823
This commit is contained in:
parent
6069a6a504
commit
0379263f23
|
@ -509,10 +509,10 @@ bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {
|
|||
|
||||
Intrinsic::ID IntrID = Intrinsic::not_intrinsic;
|
||||
|
||||
// TODO: Should this try to match mulhi24?
|
||||
if (ST->hasMulU24() && isU24(LHS, Size) && isU24(RHS, Size)) {
|
||||
// The 24-bit mul intrinsics yields the low-order 32 bits. The result's bit
|
||||
// width should not exceed 32 if `Size` > 32.
|
||||
// The mul24 instruction yields the low-order 32 bits. If the original
|
||||
// result and the destination is wider than 32 bits, the mul24 would
|
||||
// truncate the result.
|
||||
if (Size > 32 &&
|
||||
numBitsUnsigned(LHS, Size) + numBitsUnsigned(RHS, Size) > 32) {
|
||||
return false;
|
||||
|
@ -520,7 +520,10 @@ bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {
|
|||
|
||||
IntrID = Intrinsic::amdgcn_mul_u24;
|
||||
} else if (ST->hasMulI24() && isI24(LHS, Size) && isI24(RHS, Size)) {
|
||||
if (Size > 32 && numBitsSigned(LHS, Size) + numBitsSigned(RHS, Size) > 31) {
|
||||
// The original result is positive if its destination is wider than 32 bits
|
||||
// and its highest set bit is at bit 31. Generating mul24 and sign-extending
|
||||
// it would yield a negative value.
|
||||
if (Size > 32 && numBitsSigned(LHS, Size) + numBitsSigned(RHS, Size) > 30) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -239,6 +239,39 @@ define i64 @smul24_i64_2(i64 %lhs, i64 %rhs) {
|
|||
ret i64 %mul
|
||||
}
|
||||
|
||||
define i64 @smul24_i64_3(i64 %lhs, i64 %rhs) {
|
||||
; SI-LABEL: @smul24_i64_3(
|
||||
; SI-NEXT: [[LHS_TRUNC:%.*]] = trunc i64 [[LHS:%.*]] to i16
|
||||
; SI-NEXT: [[LHS24:%.*]] = sext i16 [[LHS_TRUNC]] to i64
|
||||
; SI-NEXT: [[RHS_TRUNC:%.*]] = trunc i64 [[RHS:%.*]] to i17
|
||||
; SI-NEXT: [[RHS24:%.*]] = sext i17 [[RHS_TRUNC]] to i64
|
||||
; SI-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]]
|
||||
; SI-NEXT: ret i64 [[MUL]]
|
||||
;
|
||||
; VI-LABEL: @smul24_i64_3(
|
||||
; VI-NEXT: [[LHS_TRUNC:%.*]] = trunc i64 [[LHS:%.*]] to i16
|
||||
; VI-NEXT: [[LHS24:%.*]] = sext i16 [[LHS_TRUNC]] to i64
|
||||
; VI-NEXT: [[RHS_TRUNC:%.*]] = trunc i64 [[RHS:%.*]] to i17
|
||||
; VI-NEXT: [[RHS24:%.*]] = sext i17 [[RHS_TRUNC]] to i64
|
||||
; VI-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]]
|
||||
; VI-NEXT: ret i64 [[MUL]]
|
||||
;
|
||||
; DISABLED-LABEL: @smul24_i64_3(
|
||||
; DISABLED-NEXT: [[LHS_TRUNC:%.*]] = trunc i64 [[LHS:%.*]] to i16
|
||||
; DISABLED-NEXT: [[LHS24:%.*]] = sext i16 [[LHS_TRUNC]] to i64
|
||||
; DISABLED-NEXT: [[RHS_TRUNC:%.*]] = trunc i64 [[RHS:%.*]] to i17
|
||||
; DISABLED-NEXT: [[RHS24:%.*]] = sext i17 [[RHS_TRUNC]] to i64
|
||||
; DISABLED-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]]
|
||||
; DISABLED-NEXT: ret i64 [[MUL]]
|
||||
;
|
||||
%lhs.trunc = trunc i64 %lhs to i16
|
||||
%lhs24 = sext i16 %lhs.trunc to i64
|
||||
%rhs.trunc = trunc i64 %rhs to i17
|
||||
%rhs24 = sext i17 %rhs.trunc to i64
|
||||
%mul = mul i64 %lhs24, %rhs24
|
||||
ret i64 %mul
|
||||
}
|
||||
|
||||
define i64 @umul24_i64(i64 %lhs, i64 %rhs) {
|
||||
; SI-LABEL: @umul24_i64(
|
||||
; SI-NEXT: [[LHS24:%.*]] = and i64 [[LHS:%.*]], 16777215
|
||||
|
|
Loading…
Reference in New Issue