!2102 合并部分PG使用内建位操作指令的优化

Merge pull request !2102 from wanghao19920907/expr
This commit is contained in:
opengauss-bot 2022-08-31 07:13:15 +00:00 committed by Gitee
commit 8ab353cce1
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
8 changed files with 229 additions and 52 deletions

View File

@ -145,6 +145,33 @@ fi])# PGAC_C_COMPUTED_GOTO
# PGAC_CHECK_BUILTIN_FUNC
# -----------------------
# This is similar to AC_CHECK_FUNCS(), except that it will work for compiler
# builtin functions, as that usually fails to.
# The first argument is the function name, eg [__builtin_clzl], and the
# second is its argument list, eg [unsigned long x]. The current coding
# works only for a single argument named x; we might generalize that later.
# It's assumed that the function's result type is coercible to int.
# On success, we define "HAVEfuncname" (there's usually more than enough
# underscores already, so we don't add another one).
AC_DEFUN([PGAC_CHECK_BUILTIN_FUNC],
[AC_CACHE_CHECK(for $1, pgac_cv$1,
[AC_LINK_IFELSE([AC_LANG_PROGRAM([
int
call$1($2)
{
return $1(x);
}], [])],
[pgac_cv$1=yes],
[pgac_cv$1=no])])
if test x"${pgac_cv$1}" = xyes ; then
AC_DEFINE_UNQUOTED(AS_TR_CPP([HAVE$1]), 1,
[Define to 1 if your compiler understands $1.])
fi])# PGAC_CHECK_BUILTIN_FUNC
# PGAC_PROG_CC_CFLAGS_OPT
# -----------------------
# Given a string, check if the compiler supports the string as a

40
configure vendored
View File

@ -18296,6 +18296,46 @@ _ACEOF
;;
esac
# We assume that we needn't test all widths of these explicitly:
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_clz" >&5
$as_echo_n "checking for __builtin_clz... " >&6; }
if ${pgac_cv__builtin_clz+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
int
call__builtin_clz(unsigned int x)
{
return __builtin_clz(x);
}
int
main ()
{
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
pgac_cv__builtin_clz=yes
else
pgac_cv__builtin_clz=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_clz" >&5
$as_echo "$pgac_cv__builtin_clz" >&6; }
if test x"${pgac_cv__builtin_clz}" = xyes ; then
cat >>confdefs.h <<_ACEOF
#define HAVE__BUILTIN_CLZ 1
_ACEOF
fi
# Check largefile support. You might think this is a system service not a
# compiler characteristic, but you'd be wrong. We must check this before
# probing existence of related functions such as fseeko, since the largefile

View File

@ -1221,6 +1221,9 @@ LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
AC_CHECK_FUNCS([cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat readlink setproctitle setsid sigprocmask symlink sync_file_range towlower utime utimes waitpid wcstombs wcstombs_l])
# We assume that we needn't test all widths of these explicitly:
PGAC_CHECK_BUILTIN_FUNC([__builtin_clz], [unsigned int x])
AC_REPLACE_FUNCS(fseeko)
case $host_os in
# NetBSD uses a custom fseeko/ftello built on fsetpos/fgetpos

View File

@ -67,6 +67,7 @@
#include "postgres.h"
#include "knl/knl_variable.h"
#include "port/pg_bitutils.h"
#include "utils/dynahash.h"
#include "utils/memutils.h"
#include "utils/mmpool.h"
@ -157,43 +158,6 @@ typedef struct AllocMagicData {
#define AllocPointerGetChunk(ptr) ((AllocChunk)(((char*)(ptr)) - ALLOC_CHUNKHDRSZ))
#define AllocChunkGetPointer(chk) ((AllocPointer)(((char*)(chk)) + ALLOC_CHUNKHDRSZ))
/*
* Table for AllocSetFreeIndex
*/
#define LT16(n) n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n
static const unsigned char LogTable256[256] = {0,
1,
2,
2,
3,
3,
3,
3,
4,
4,
4,
4,
4,
4,
4,
4,
LT16(5),
LT16(6),
LT16(6),
LT16(7),
LT16(7),
LT16(7),
LT16(7),
LT16(8),
LT16(8),
LT16(8),
LT16(8),
LT16(8),
LT16(8),
LT16(8),
LT16(8)};
/* ----------
* Debug macros
* ----------
@ -229,20 +193,38 @@ static inline int AllocSetFreeIndex(Size size)
unsigned int t, tsize;
if (size > (1 << ALLOC_MINBITS)) {
tsize = (size - 1) >> ALLOC_MINBITS;
/*----------
* At this point we must compute ceil(log2(size >> ALLOC_MINBITS)).
* This is the same as
* pg_leftmost_one_pos32((size - 1) >> ALLOC_MINBITS) + 1
* or equivalently
* pg_leftmost_one_pos32(size - 1) - ALLOC_MINBITS + 1
*
* However, rather than just calling that function, we duplicate the
* logic here, allowing an additional optimization. It's reasonable
* to assume that ALLOC_CHUNK_LIMIT fits in 16 bits, so we can unroll
* the byte-at-a-time loop in pg_leftmost_one_pos32 and just handle
* the last two bytes.
*
* Yes, this function is enough of a hot-spot to make it worth this
* much trouble.
*----------
*/
#ifdef HAVE__BUILTIN_CLZ
idx = 31 - __builtin_clz((uint32) size - 1) - ALLOC_MINBITS + 1;
#else
uint32 t,
tsize;
/*
* At this point we need to obtain log2(tsize)+1, ie, the number of
* not-all-zero bits at the right. We used to do this with a
* shift-and-count loop, but this function is enough of a hotspot to
* justify micro-optimization effort. The best approach seems to be
* to use a lookup table. Note that this code assumes that
* ALLOCSET_NUM_FREELISTS <= 17, since we only cope with two bytes of
* the tsize value.
*/
t = tsize >> 8;
idx = t ? (LogTable256[t] + 8) : LogTable256[tsize];
/* Statically assert that we only have a 16-bit input value. */
StaticAssertStmt(ALLOC_CHUNK_LIMIT < (1 << 16),
"ALLOC_CHUNK_LIMIT must be less than 64kB");
tsize = size - 1;
t = tsize >> 8;
idx = t ? pg_leftmost_one_pos[t] + 8 : pg_leftmost_one_pos[tsize];
idx -= ALLOC_MINBITS - 1;
#endif
Assert(idx < ALLOCSET_NUM_FREELISTS);
} else
idx = 0;

View File

@ -44,7 +44,7 @@ ifneq "$(MAKECMDGOALS)" "clean"
endif
endif
OBJS = $(LIBOBJS) pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_choose.o chklocale.o dirmod.o erand48.o exec.o fls.o inet_net_ntop.o \
noblock.o path.o pgcheckdir.o pgmkdirp.o pgsleep.o \
noblock.o path.o pg_bitutils.o pgcheckdir.o pgmkdirp.o pgsleep.o \
pgstrcasecmp.o qsort.o qsort_arg.o sprompt.o thread.o flock.o pgstrcasestr.o\
gs_thread.o gs_env_r.o gs_getopt_r.o \
gs_readdir.o gs_strerror.o gs_syscall_lock.o \
@ -53,7 +53,7 @@ OBJS = $(LIBOBJS) pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_choose.o chklocale
ifeq "${host_cpu}" "aarch64"
OBJS = $(LIBOBJS) pg_crc32c_choose.o chklocale.o dirmod.o erand48.o exec.o fls.o inet_net_ntop.o \
noblock.o path.o pgcheckdir.o pgmkdirp.o pgsleep.o \
noblock.o path.o pg_bitutils.o pgcheckdir.o pgmkdirp.o pgsleep.o \
pgstrcasecmp.o qsort.o qsort_arg.o sprompt.o thread.o flock.o pgstrcasestr.o\
gs_thread.o gs_env_r.o gs_getopt_r.o \
gs_readdir.o gs_strerror.o gs_syscall_lock.o \
@ -70,7 +70,7 @@ ifeq "${host_cpu}" "aarch64"
endif
OBJS_TOOL = fls.o strlcat.o strlcpy.o getpeereid.o chklocale.o dirmod.o erand48.o exec.o inet_net_ntop.o \
noblock.o path.o pgcheckdir.o pgmkdirp.o pgsleep.o pgstrcasecmp.o qsort.o qsort_arg.o sprompt.o thread.o flock.o pgstrcasestr.o \
noblock.o path.o pg_bitutils.o pgcheckdir.o pgmkdirp.o pgsleep.o pgstrcasecmp.o qsort.o qsort_arg.o sprompt.o thread.o flock.o pgstrcasestr.o \
gs_thread.o gs_env_r.o gs_getopt_r.o gs_readdir.o gs_strerror.o gs_syscall_lock.o gs_system.o cipher.o
all: libpgport.a libpgport_srv.a libpgport_tool.so

View File

@ -0,0 +1,96 @@
/*-------------------------------------------------------------------------
*
* pg_bitutils.c
* Miscellaneous functions for bit-wise operations.
*
* Copyright (c) 2019-2021, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/port/pg_bitutils.c
*
*-------------------------------------------------------------------------
*/
#include "c.h"
#include "port/pg_bitutils.h"
/*
* Array giving the position of the left-most set bit for each possible
* byte value. We count the right-most position as the 0th bit, and the
* left-most the 7th bit. The 0th entry of the array should not be used.
*
* Note: this is not used by the functions in pg_bitutils.h when
* HAVE__BUILTIN_CLZ is defined, but we provide it anyway, so that
* extensions possibly compiled with a different compiler can use it.
*/
const uint8 pg_leftmost_one_pos[256] = {
0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
};
/*
* Array giving the position of the right-most set bit for each possible
* byte value. We count the right-most position as the 0th bit, and the
* left-most the 7th bit. The 0th entry of the array should not be used.
*
* Note: this is not used by the functions in pg_bitutils.h when
* HAVE__BUILTIN_CTZ is defined, but we provide it anyway, so that
* extensions possibly compiled with a different compiler can use it.
*/
const uint8 pg_rightmost_one_pos[256] = {
0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
};
/*
* Array giving the number of 1-bits in each possible byte value.
*
* Note: we export this for use by functions in which explicit use
* of the popcount functions seems unlikely to be a win.
*/
const uint8 pg_number_of_ones[256] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
};

View File

@ -673,6 +673,9 @@
/* Define to build with Kerberos 5 support. (--with-krb5) */
#undef KRB5
/* Define to 1 if your compiler understands __builtin_clz. */
#undef HAVE__BUILTIN_CLZ
/* Define to 1 if you have __cpuid. */
#undef HAVE__CPUID

View File

@ -0,0 +1,26 @@
/*-------------------------------------------------------------------------
*
* pg_bitutils.h
* Miscellaneous functions for bit-wise operations.
*
*
* Copyright (c) 2019-2021, PostgreSQL Global Development Group
*
* src/include/port/pg_bitutils.h
*
*-------------------------------------------------------------------------
*/
#ifndef PG_BITUTILS_H
#define PG_BITUTILS_H
#ifndef FRONTEND
extern PGDLLIMPORT const uint8 pg_leftmost_one_pos[256];
extern PGDLLIMPORT const uint8 pg_rightmost_one_pos[256];
extern PGDLLIMPORT const uint8 pg_number_of_ones[256];
#else
extern const uint8 pg_leftmost_one_pos[256];
extern const uint8 pg_rightmost_one_pos[256];
extern const uint8 pg_number_of_ones[256];
#endif
#endif /* PG_BITUTILS_H */