forked from OSchip/llvm-project
[Support] make countLeadingZeros() and countTrailingZeros() return unsigned
This matches countLeadingOnes() and countTrailingOnes(), and APInt's countLeadingZeros() and countTrailingZeros(). (as well as __builtin_clzll()) llvm-svn: 361724
This commit is contained in:
parent
d0f13e618f
commit
b7cc093db2
|
@ -51,14 +51,14 @@ enum ZeroBehavior {
|
|||
|
||||
namespace detail {
|
||||
template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
|
||||
static std::size_t count(T Val, ZeroBehavior) {
|
||||
static unsigned count(T Val, ZeroBehavior) {
|
||||
if (!Val)
|
||||
return std::numeric_limits<T>::digits;
|
||||
if (Val & 0x1)
|
||||
return 0;
|
||||
|
||||
// Bisection method.
|
||||
std::size_t ZeroBits = 0;
|
||||
unsigned ZeroBits = 0;
|
||||
T Shift = std::numeric_limits<T>::digits >> 1;
|
||||
T Mask = std::numeric_limits<T>::max() >> Shift;
|
||||
while (Shift) {
|
||||
|
@ -75,7 +75,7 @@ template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
|
|||
|
||||
#if __GNUC__ >= 4 || defined(_MSC_VER)
|
||||
template <typename T> struct TrailingZerosCounter<T, 4> {
|
||||
static std::size_t count(T Val, ZeroBehavior ZB) {
|
||||
static unsigned count(T Val, ZeroBehavior ZB) {
|
||||
if (ZB != ZB_Undefined && Val == 0)
|
||||
return 32;
|
||||
|
||||
|
@ -91,7 +91,7 @@ template <typename T> struct TrailingZerosCounter<T, 4> {
|
|||
|
||||
#if !defined(_MSC_VER) || defined(_M_X64)
|
||||
template <typename T> struct TrailingZerosCounter<T, 8> {
|
||||
static std::size_t count(T Val, ZeroBehavior ZB) {
|
||||
static unsigned count(T Val, ZeroBehavior ZB) {
|
||||
if (ZB != ZB_Undefined && Val == 0)
|
||||
return 64;
|
||||
|
||||
|
@ -116,7 +116,7 @@ template <typename T> struct TrailingZerosCounter<T, 8> {
|
|||
/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
|
||||
/// valid arguments.
|
||||
template <typename T>
|
||||
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
|
||||
unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
|
||||
static_assert(std::numeric_limits<T>::is_integer &&
|
||||
!std::numeric_limits<T>::is_signed,
|
||||
"Only unsigned integral types are allowed.");
|
||||
|
@ -125,12 +125,12 @@ std::size_t countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
|
|||
|
||||
namespace detail {
|
||||
template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
|
||||
static std::size_t count(T Val, ZeroBehavior) {
|
||||
static unsigned count(T Val, ZeroBehavior) {
|
||||
if (!Val)
|
||||
return std::numeric_limits<T>::digits;
|
||||
|
||||
// Bisection method.
|
||||
std::size_t ZeroBits = 0;
|
||||
unsigned ZeroBits = 0;
|
||||
for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
|
||||
T Tmp = Val >> Shift;
|
||||
if (Tmp)
|
||||
|
@ -144,7 +144,7 @@ template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
|
|||
|
||||
#if __GNUC__ >= 4 || defined(_MSC_VER)
|
||||
template <typename T> struct LeadingZerosCounter<T, 4> {
|
||||
static std::size_t count(T Val, ZeroBehavior ZB) {
|
||||
static unsigned count(T Val, ZeroBehavior ZB) {
|
||||
if (ZB != ZB_Undefined && Val == 0)
|
||||
return 32;
|
||||
|
||||
|
@ -160,7 +160,7 @@ template <typename T> struct LeadingZerosCounter<T, 4> {
|
|||
|
||||
#if !defined(_MSC_VER) || defined(_M_X64)
|
||||
template <typename T> struct LeadingZerosCounter<T, 8> {
|
||||
static std::size_t count(T Val, ZeroBehavior ZB) {
|
||||
static unsigned count(T Val, ZeroBehavior ZB) {
|
||||
if (ZB != ZB_Undefined && Val == 0)
|
||||
return 64;
|
||||
|
||||
|
@ -185,7 +185,7 @@ template <typename T> struct LeadingZerosCounter<T, 8> {
|
|||
/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
|
||||
/// valid arguments.
|
||||
template <typename T>
|
||||
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
|
||||
unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
|
||||
static_assert(std::numeric_limits<T>::is_integer &&
|
||||
!std::numeric_limits<T>::is_signed,
|
||||
"Only unsigned integral types are allowed.");
|
||||
|
@ -458,7 +458,7 @@ inline uint64_t ByteSwap_64(uint64_t Value) {
|
|||
/// \param ZB the behavior on an input of all ones. Only ZB_Width and
|
||||
/// ZB_Undefined are valid arguments.
|
||||
template <typename T>
|
||||
std::size_t countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
|
||||
unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
|
||||
static_assert(std::numeric_limits<T>::is_integer &&
|
||||
!std::numeric_limits<T>::is_signed,
|
||||
"Only unsigned integral types are allowed.");
|
||||
|
@ -474,7 +474,7 @@ std::size_t countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
|
|||
/// \param ZB the behavior on an input of all ones. Only ZB_Width and
|
||||
/// ZB_Undefined are valid arguments.
|
||||
template <typename T>
|
||||
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
|
||||
unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
|
||||
static_assert(std::numeric_limits<T>::is_integer &&
|
||||
!std::numeric_limits<T>::is_signed,
|
||||
"Only unsigned integral types are allowed.");
|
||||
|
|
|
@ -1147,7 +1147,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
|
|||
|
||||
// These alignment values are specified in powers of two, so alignment =
|
||||
// 2^n. The minimum alignment is 2^4 = 16.
|
||||
Out.kernarg_segment_alignment = std::max((size_t)4,
|
||||
Out.kernarg_segment_alignment = std::max<size_t>(4,
|
||||
countTrailingZeros(MaxKernArgAlign));
|
||||
}
|
||||
|
||||
|
|
|
@ -5377,8 +5377,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
|
|||
if (MinCaseVal->isNullValue())
|
||||
TableIndex = SI->getCondition();
|
||||
else
|
||||
TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal,
|
||||
"switch.tableidx");
|
||||
TableIndex =
|
||||
Builder.CreateSub(SI->getCondition(), MinCaseVal, "switch.tableidx");
|
||||
|
||||
// Compute the maximum table size representable by the integer type we are
|
||||
// switching upon.
|
||||
|
@ -5512,7 +5512,8 @@ static bool isSwitchDense(ArrayRef<int64_t> Values) {
|
|||
uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
|
||||
uint64_t Range = Diff + 1;
|
||||
uint64_t NumCases = Values.size();
|
||||
// 40% is the default density for building a jump table in optsize/minsize mode.
|
||||
// 40% is the default density for building a jump table in optsize/minsize
|
||||
// mode.
|
||||
uint64_t MinDensity = 40;
|
||||
|
||||
return NumCases * 100 >= Range * MinDensity;
|
||||
|
@ -5538,11 +5539,11 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
|
|||
if (SI->getNumCases() < 4)
|
||||
return false;
|
||||
|
||||
// This transform is agnostic to the signedness of the input or case values. We
|
||||
// can treat the case values as signed or unsigned. We can optimize more common
|
||||
// cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
|
||||
// as signed.
|
||||
SmallVector<int64_t,4> Values;
|
||||
// This transform is agnostic to the signedness of the input or case values.
|
||||
// We can treat the case values as signed or unsigned. We can optimize more
|
||||
// common cases such as a sequence crossing zero {-4,0,4,8} if we interpret
|
||||
// case values as signed.
|
||||
SmallVector<int64_t, 4> Values;
|
||||
for (auto &C : SI->cases())
|
||||
Values.push_back(C.getCaseValue()->getValue().getSExtValue());
|
||||
llvm::sort(Values);
|
||||
|
@ -5563,9 +5564,9 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
|
|||
for (auto &V : Values)
|
||||
GCD = GreatestCommonDivisor64(GCD, (uint64_t)V);
|
||||
|
||||
// This transform can be done speculatively because it is so cheap - it results
|
||||
// in a single rotate operation being inserted. This can only happen if the
|
||||
// factor extracted is a power of 2.
|
||||
// This transform can be done speculatively because it is so cheap - it
|
||||
// results in a single rotate operation being inserted. This can only happen
|
||||
// if the factor extracted is a power of 2.
|
||||
// FIXME: If the GCD is an odd number we can multiply by the multiplicative
|
||||
// inverse of GCD and then perform this transform.
|
||||
// FIXME: It's possible that optimizing a switch on powers of two might also
|
||||
|
|
Loading…
Reference in New Issue