diff --git a/build/script/aarch64_opengauss_list b/build/script/aarch64_opengauss_list index ba6b9d3e1..675ed2704 100644 --- a/build/script/aarch64_opengauss_list +++ b/build/script/aarch64_opengauss_list @@ -1,4 +1,10 @@ [server] +./bin/dsscmd +./bin/dssserver +./bin/perctrl +./bin/dms_contrl.sh +./bin/dss_clear.sh +./bin/dss_contrl.sh ./bin/gsql ./bin/gaussdb ./bin/gstrace @@ -824,6 +830,8 @@ ./lib/libzstd.so* ./lib/libxgboost.so ./lib/libpagecompression.so* +./lib/libdssapi.so +./lib/libdms.so ./include/postgresql/server/postgres_ext.h ./include/postgresql/server/pg_config_os.h diff --git a/build/script/utils/common.sh b/build/script/utils/common.sh index 7960937fa..98b655871 100644 --- a/build/script/utils/common.sh +++ b/build/script/utils/common.sh @@ -59,7 +59,9 @@ select_package_command ####################################################################### ##get os dist version ####################################################################### -if [[ -f "/etc/euleros-release" ]]; then +if [[ -f "/etc/openEuler-release" ]]; then + os_name="openEuler" +elif [[ -f "/etc/euleros-release" ]]; then os_name="EulerOS" elif [[ -f "/etc/centos-release" ]]; then os_name="CentOS" diff --git a/build/script/x86_64_opengauss_list b/build/script/x86_64_opengauss_list index 452587788..c50961ad2 100644 --- a/build/script/x86_64_opengauss_list +++ b/build/script/x86_64_opengauss_list @@ -1,4 +1,10 @@ [server] +./bin/dsscmd +./bin/dssserver +./bin/perctrl +./bin/dms_contrl.sh +./bin/dss_clear.sh +./bin/dss_contrl.sh ./bin/gsql ./bin/gaussdb ./bin/gstrace @@ -821,6 +827,9 @@ ./lib/libzstd.so* ./lib/libxgboost.so ./lib/libpagecompression.so* +./lib/libdssapi.so +./lib/libdms.so + ./include/postgresql/server/postgres_ext.h ./include/postgresql/server/pg_config_os.h ./include/postgresql/server/pgtime.h diff --git a/cmake/src/build_options.cmake b/cmake/src/build_options.cmake index f561970f6..d701704bf 100755 --- a/cmake/src/build_options.cmake +++ b/cmake/src/build_options.cmake @@ -17,6 +17,12 @@ if(NOT ${PGPORT}) set(PGPORT 5432) endif() +#this is the default WAL segment size (MB), the old is --with-wal-segsize=16, now -DWAL_SEGSIZE=16 +option(WAL_SEGSIZE 16) +if(NOT ${WAL_SEGSIZE}) + set(WAL_SEGSIZE 16) +endif() + GET_VERSIONSTR_FROMGIT(GET_PG_VERSION_STR) #CMake does not allow g++ to compile C files. They are two different languages. You should understand what you are doing. @@ -58,6 +64,7 @@ option(ENABLE_MYSQL_FDW "enable export or import data with mysql,the old is --en option(ENABLE_ORACLE_FDW "enable export or import data with oracle,the old is --enable-oracle-fdw" OFF) option(BUILD_BY_CMAKE "the BUILD_BY_CMAKE is new,used in distribute pg_regress.cpp" ON) option(DEBUG_UHEAP "collect USTORE statistics" OFF) +option(MAX_ALLOC_SEGNUM "max alloc xlog seg num in extreme_rto" 4) #No matter what to set, the old mppdb aways use ENABLE_THREAD_SAFETY=yes by default defined. option(ENABLE_THREAD_SAFETY "enable thread safety, the old is --enable-thread-safety" ON) @@ -140,6 +147,17 @@ set(OPTIMIZE_OPTIONS -pipe -pthread -fno-aggressive-loop-optimizations -fno-expe set(CHECK_OPTIONS -Wmissing-format-attribute -Wno-attributes -Wno-unused-but-set-variable -Wno-write-strings -Wpointer-arith) set(MACRO_OPTIONS -D_GLIBCXX_USE_CXX11_ABI=0 -DENABLE_GSTRACE -D_GNU_SOURCE -DPGXC -D_POSIX_PTHREAD_SEMANTICS -D_REENTRANT -DSTREAMPLAN -D_THREAD_SAFE ${DB_COMMON_DEFINE}) +# Set MAX_ALLOC_SEGNUM size in extreme_rto +if(${WAL_SEGSIZE} LESS 512) + set(MAX_ALLOC_SEGNUM 4) +elseif(${WAL_SEGSIZE} GREATER_EQUAL 512 AND ${WAL_SEGSIZE} LESS 1024) + set(MAX_ALLOC_SEGNUM 2) +elseif(${WAL_SEGSIZE} GREATER_EQUAL 1024) + set(MAX_ALLOC_SEGNUM 1) +else() + message(FATAL_ERROR "error: Invalid WAL segment size. Allowed values are 1,2,4,8,16,32,64,128,256,512,1024.") +endif() + # libraries need secure options during compling set(LIB_SECURE_OPTIONS -fPIC -fno-common -fstack-protector) # libraries need link options during linking @@ -275,6 +293,7 @@ if(${ENABLE_LLVM_COMPILE} STREQUAL "ON") # LLVM version execute_process(COMMAND ${LLVM_CONFIG} --version OUTPUT_VARIABLE LLVM_VERSION_STR OUTPUT_STRIP_TRAILING_WHITESPACE) string(REPLACE "." ";" LLVM_VERSION_LIST ${LLVM_VERSION_STR}) + message(STATUS "status ENV{LLVM_VERSION_STR}" $ENV{LLVM_VERSION_STR}) list(GET LLVM_VERSION_LIST 0 LLVM_MAJOR_VERSION) list(GET LLVM_VERSION_LIST 1 LLVM_MINOR_VERSION) endif() diff --git a/cmake/src/config-in/pg_config.h.in b/cmake/src/config-in/pg_config.h.in index 90dd4387c..0ab50738b 100755 --- a/cmake/src/config-in/pg_config.h.in +++ b/cmake/src/config-in/pg_config.h.in @@ -870,7 +870,10 @@ /* XLOG_SEG_SIZE is the size of a single WAL file. This must be a power of 2 * * and larger than XLOG_BLCKSZ (preferably, a great deal larger than * * XLOG_BLCKSZ). Changing XLOG_SEG_SIZE requires an initdb. */ -#define XLOG_SEG_SIZE (16 * 1024 * 1024) +#define XLOG_SEG_SIZE (@WAL_SEGSIZE@ * 1024 * 1024) + +/* Number of max alloc xlog segment in extreme_rto, default 4 */ +#define MAX_ALLOC_SEGNUM @MAX_ALLOC_SEGNUM@ /* Number of bits in a file offset, on hosts where this is settable. */ #cmakedefine _FILE_OFFSET_BITS diff --git a/cmake/src/config-in/pg_config.h.in.nocheck.aarch64 b/cmake/src/config-in/pg_config.h.in.nocheck.aarch64 index f13f06361..b892f2404 100755 --- a/cmake/src/config-in/pg_config.h.in.nocheck.aarch64 +++ b/cmake/src/config-in/pg_config.h.in.nocheck.aarch64 @@ -878,6 +878,9 @@ XLOG_BLCKSZ). Changing XLOG_SEG_SIZE requires an initdb. */ #define XLOG_SEG_SIZE (16 * 1024 * 1024) +/* Number of max alloc xlog segment in extreme_rto, default 4 */ +#define MAX_ALLOC_SEGNUM 4 + /* Number of bits in a file offset, on hosts where this is settable. */ /* #undef _FILE_OFFSET_BITS */ diff --git a/cmake/src/config-in/pg_config.h.in.nocheck.x86_64 b/cmake/src/config-in/pg_config.h.in.nocheck.x86_64 index 9859dbad9..bdc972d39 100755 --- a/cmake/src/config-in/pg_config.h.in.nocheck.x86_64 +++ b/cmake/src/config-in/pg_config.h.in.nocheck.x86_64 @@ -878,6 +878,9 @@ XLOG_BLCKSZ). Changing XLOG_SEG_SIZE requires an initdb. */ #define XLOG_SEG_SIZE (16 * 1024 * 1024) +/* Number of max alloc xlog segment in extreme_rto, default 4 */ +#define MAX_ALLOC_SEGNUM 4 + /* Number of bits in a file offset, on hosts where this is settable. */ /* #undef _FILE_OFFSET_BITS */ diff --git a/cmake/src/config-in/pg_config_nocheck.h.in b/cmake/src/config-in/pg_config_nocheck.h.in index fc50b6c8a..3d54679ce 100755 --- a/cmake/src/config-in/pg_config_nocheck.h.in +++ b/cmake/src/config-in/pg_config_nocheck.h.in @@ -870,6 +870,9 @@ * XLOG_BLCKSZ). Changing XLOG_SEG_SIZE requires an initdb. */ #define XLOG_SEG_SIZE (16 * 1024 * 1024) +/* Number of max alloc xlog segment in extreme_rto, default 4 */ +#define MAX_ALLOC_SEGNUM 4 + /* Number of bits in a file offset, on hosts where this is settable. */ /* #undef _FILE_OFFSET_BITS */ diff --git a/cmake/src/set_thirdparty_path.cmake b/cmake/src/set_thirdparty_path.cmake index 1064da5cb..0c7aedfbf 100755 --- a/cmake/src/set_thirdparty_path.cmake +++ b/cmake/src/set_thirdparty_path.cmake @@ -76,6 +76,8 @@ set(HOTPATCH_HOME ${PLATFORM_PATH}/hotpatch) set(SECURE_HOME ${PLATFORM_PATH}/Huawei_Secure_C/${LIB_UNIFIED_SUPPORT}) set(SECUREDYNAMICLIB_HOME ${PLATFORM_PATH}/Huawei_Secure_C/Dynamic_Lib) set(DCF_HOME ${COMPONENT_PATH}/dcf) +set(DMS_HOME ${COMPONENT_PATH}/dms) +set(DSS_HOME ${COMPONENT_PATH}/dss) set(MOCKCPP_HOME ${BUILDTOOLS_PATH}/mockcpp/${LIB_UNIFIED_SUPPORT}) set(GTEST_HOME ${BUILDTOOLS_PATH}/gtest/${LIB_UNIFIED_SUPPORT}) @@ -263,6 +265,17 @@ set(ZSTD_LIB_PATH ${ZSTD_HOME}/lib) set(DCF_INCLUDE_PATH ${DCF_HOME}/include) set(DCF_LIB_PATH ${DCF_HOME}/lib) +############################################################################# +# dms component +############################################################################# +set(DMS_LIB_PATH ${DMS_HOME}/lib) + +############################################################################# +# dss component +############################################################################# +set(DSS_LIB_PATH ${DSS_HOME}/lib) +set(DSS_BIN_PATH ${DSS_HOME}/bin) + ############################################################################# # license manager compnent ############################################################################# diff --git a/configure b/configure index 858b65d90..7d9f7744d 100755 --- a/configure +++ b/configure @@ -3710,8 +3710,12 @@ case ${wal_segsize} in 16) ;; 32) ;; 64) ;; - *) { { $as_echo "$as_me:$LINENO: error: Invalid WAL segment size. Allowed values are 1,2,4,8,16,32,64." >&5 -$as_echo "$as_me: error: Invalid WAL segment size. Allowed values are 1,2,4,8,16,32,64." >&2;} + 128) ;; + 256) ;; + 512) ;; + 1024) ;; + *) { { $as_echo "$as_me:$LINENO: error: Invalid WAL segment size. Allowed values are 1,2,4,8,16,32,64,128,256,512,1024." >&5 +$as_echo "$as_me: error: Invalid WAL segment size. Allowed values are 1,2,4,8,16,32,64,128,256,512,1024." >&2;} { (exit 1); exit 1; }; } esac { $as_echo "$as_me:$LINENO: result: ${wal_segsize}MB" >&5 @@ -3722,6 +3726,18 @@ cat >>confdefs.h <<_ACEOF #define XLOG_SEG_SIZE (${wal_segsize} * 1024 * 1024) _ACEOF +# Set number of MAX_ALLOC_SEGNUM in extreme_rto +if test "${wal_segsize}" -lt 512; then + MAX_ALLOC_SEGNUM=4 +elif test "${wal_segsize}" -ge 512 -a "${wal_segsize}" -lt 1024; then + MAX_ALLOC_SEGNUM=2 +else + MAX_ALLOC_SEGNUM=1 +fi + +cat >>confdefs.h <<_ACEOF +#define MAX_ALLOC_SEGNUM ${MAX_ALLOC_SEGNUM} +_ACEOF # # C compiler diff --git a/configure.in b/configure.in index fcd2fc151..249d16d65 100644 --- a/configure.in +++ b/configure.in @@ -381,7 +381,11 @@ case ${wal_segsize} in 16) ;; 32) ;; 64) ;; - *) AC_MSG_ERROR([Invalid WAL segment size. Allowed values are 1,2,4,8,16,32,64.]) + 128) ;; + 256) ;; + 512) ;; + 1024) ;; + *) AC_MSG_ERROR([Invalid WAL segment size. Allowed values are 1,2,4,8,16,32,64,128,256,512,1024.]) esac AC_MSG_RESULT([${wal_segsize}MB]) @@ -393,6 +397,16 @@ AC_DEFINE_UNQUOTED([XLOG_SEG_SIZE], [(${wal_segsize} * 1024 * 1024)], [ Changing XLOG_SEG_SIZE requires an initdb. ]) +# Set number of MAX_ALLOC_SEGNUM in extreme_rto +if test "${wal_segsize}" -lt 512; then + MAX_ALLOC_SEGNUM=4 +elif test "${wal_segsize}" -ge 512 -a "${wal_segsize}" -lt 1024; then + MAX_ALLOC_SEGNUM=2 +else + MAX_ALLOC_SEGNUM=1 +fi +AC_DEFINE_UNQUOTED(MAX_ALLOC_SEGNUM, $MAX_ALLOC_SEGNUM, [Define the max alloc segnum in extreme_rto.]) + # # C compiler # diff --git a/contrib/pagehack/CMakeLists.txt b/contrib/pagehack/CMakeLists.txt index ed712470b..b15bee10d 100644 --- a/contrib/pagehack/CMakeLists.txt +++ b/contrib/pagehack/CMakeLists.txt @@ -1,5 +1,8 @@ #This is the main CMAKE for build all pagehack. # pagehack +execute_process( + COMMAND ln -fs ${PROJECT_SRC_DIR}/gausskernel/storage/dss/dss_adaptor.cpp ${CMAKE_CURRENT_SOURCE_DIR}/dss_adaptor.cpp +) AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} TGT_pagehack_SRC) set(TGT_pagehack_INC ${TGT_pq_INC} ${ZSTD_INCLUDE_PATH} ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_SRC_DIR}/lib/gstrace ${PROJECT_SRC_DIR}/lib/page_compression diff --git a/contrib/pagehack/Makefile b/contrib/pagehack/Makefile index f80d82b8a..8dd753521 100644 --- a/contrib/pagehack/Makefile +++ b/contrib/pagehack/Makefile @@ -1,6 +1,7 @@ # contrib/pagehack/Makefile MODULE_big = pagehack -OBJS = pagehack.o +OBJS = pagehack.o \ + $(top_builddir)/src/gausskernel/storage/dss/dss_adaptor.o # executable program, even there is no database server/client PROGRAM = pagehack diff --git a/contrib/pagehack/pagehack.cpp b/contrib/pagehack/pagehack.cpp index f6b1eefd1..fb353e68e 100644 --- a/contrib/pagehack/pagehack.cpp +++ b/contrib/pagehack/pagehack.cpp @@ -80,6 +80,8 @@ #include "storage/smgr/relfilenode.h" #include "storage/sinval.h" #include "storage/smgr/segment.h" +#include "storage/dss/dss_adaptor.h" +#include "storage/file/fio_device.h" #include "replication/bcm.h" #include "utils/datetime.h" #include "utils/memutils.h" @@ -88,6 +90,7 @@ #include "cstore.h" #include "common/build_query/build_query.h" #include +#include "tool_common.h" #ifdef ENABLE_MULTIPLE_NODES #include "tsdb/utils/constant_def.h" #endif @@ -100,6 +103,7 @@ #define CLASS_TYPE_NUM 512 #define TEN 10 + typedef unsigned char* binary; static const char* indents[] = { // 10 tab is enough to used. "", @@ -192,6 +196,7 @@ static ParseHeapTupleData PgIndexRelTupleParser[] = {ParseToastIndexTupleData}; static int PgHeapRelTupleParserCursor = -1; static int PgIndexRelTupleParserCursor = -1; + /* For Assert(...) macros. */ THR_LOCAL bool assert_enabled = true; @@ -200,6 +205,7 @@ bool only_vm = false; bool only_bcm = false; bool write_back = false; bool dirty_page = false; +bool enable_dss = false; int start_item = 1; int num_item = 0; int SegNo = 0; @@ -1014,6 +1020,9 @@ static void usage(const char* progname) " -d only for test, use 0xFF to fill the last half page[4k]\n" " -z only for undo space/group meta, dump the specified space/group\n" " -S heap file segment number\n" + "\nDss options:\n" + " -D enable shared storage mode\n" + " -c SOCKETPATH dss connect socket file path\n" "\nCommon options:\n" " --help, -h show this help, then exit\n" " --version, -V output version information, then exit\n"); @@ -3195,7 +3204,8 @@ static int parse_uncompressed_page_file(const char *filename, SegmentType type, BlockNumber number = number_read; size_t result; - if (NULL == (fd = fopen(filename, "rb+"))) { + fd = fopen(filename, "rb+"); + if (fd == NULL) { fprintf(stderr, "%s: %s\n", filename, strerror(errno)); return false; } @@ -3422,7 +3432,8 @@ static bool parse_internal_init_file(char* filename) return false; } - if (NULL == (fp = fopen(filename, "rb"))) { + fp = fopen(filename, "rb"); + if (fp == NULL) { result = false; fprintf(stderr, "IO error when opening %s: %s\n", filename, strerror(errno)); goto read_failed; @@ -3653,7 +3664,8 @@ static int parse_filenodemap_file(char* filename) } fill_filenode_map(pg_class_map); - if (NULL == (fd = fopen(filename, "rb"))) { + fd = fopen(filename, "rb"); + if (fd == NULL) { fprintf(stderr, "%s: %s\n", filename, strerror(errno)); return false; } @@ -3738,7 +3750,8 @@ static int parse_cu_file(char* filename, uint64 offset) errno_t rc = snprintf_s(fullpath, sizeof(fullpath), sizeof(fullpath) - 1, "%s.%d", filename, seg_num); securec_check_ss_c(rc, "\0", "\0"); - if (NULL == (fd = fopen(fullpath, "rb"))) { + fd = fopen(fullpath, "rb"); + if (fd == NULL) { fprintf(stderr, "%s: %s\n", fullpath, strerror(errno)); return false; } @@ -3797,7 +3810,8 @@ static int parse_slot_file(char* filename) size_t readBytes = 0; pg_crc32 checksum = 0; - if (NULL == (fd = fopen(filename, "rb"))) { + fd = fopen(filename, "rb"); + if (fd == NULL) { fprintf(stderr, "%s: %s\n", filename, strerror(errno)); return false; } @@ -3894,7 +3908,8 @@ static int parse_gaussdb_state_file(char* filename) char* BuildModeStr[] = {"node", "auto", "full", "incremental"}; XLogRecPtr lsn; uint32 term; - if (NULL == (fd = fopen(filename, "rb"))) { + fd = fopen(filename, "rb"); + if (fd == NULL) { fprintf(stderr, "%s: %s\n", filename, strerror(errno)); return false; } @@ -3956,7 +3971,8 @@ static int parse_pg_control_file(char* filename) char sysident_str[32]; const char* strftime_fmt = "%c"; - if (NULL == (fd = fopen(filename, "rb"))) { + fd = fopen(filename, "rb"); + if (fd == NULL) { fprintf(stderr, "%s: %s\n", filename, strerror(errno)); return false; } @@ -4091,7 +4107,8 @@ static int parse_clog_file(char* filename) xid = (uint64)segnum * segnum_xid; - if (NULL == (fd = fopen(filename, "rb"))) { + fd = fopen(filename, "rb"); + if (fd == NULL) { fprintf(stderr, "%s: %s\n", filename, strerror(errno)); return false; } @@ -4146,7 +4163,8 @@ static int parse_csnlog_file(char* filename) xid = (uint64)segnum * segnum_xid; - if (NULL == (fd = fopen(filename, "rb"))) { + fd = fopen(filename, "rb"); + if (fd == NULL) { fprintf(stderr, "%s: %s\n", filename, gs_strerror(errno)); return false; } @@ -4403,7 +4421,7 @@ static bool parse_dw_file(const char* file_name, uint32 start_page, uint32 page_ (void)dirname(meta_full_path); /* extract the meta name from DW_META_FILE */ - rc = strcpy_s(meta_name_tmp, PATH_MAX, DW_META_FILE); + rc = strcpy_s(meta_name_tmp, PATH_MAX, T_DW_META_FILE); securec_check(rc, "", ""); meta_name = basename(meta_name_tmp); @@ -4414,7 +4432,6 @@ static bool parse_dw_file(const char* file_name, uint32 start_page, uint32 page_ securec_check(rc, "", ""); fd = fopen(meta_full_path, "rb+"); - if (fd == NULL) { fprintf(stderr, "%s: %s\n", meta_full_path, strerror(errno)); return false; @@ -4580,8 +4597,7 @@ static bool parse_segment_head(char *filename, uint32 start_page) } bool result; - - int fd = open(filename, O_RDONLY); + int fd = open(filename, O_RDONLY, S_IRUSR | S_IWUSR); if (fd < 0) { fprintf(stderr, "Failed to open %s: %s\n", filename, strerror(errno)); free(buf); @@ -4652,7 +4668,8 @@ static bool parse_dw_single_flush_file(const char* file_name) char *dw_block = (char *)TYPEALIGN(BLCKSZ, unaligned_buf2); PageHeader pghr = NULL; - if (NULL == (fd = fopen(file_name, "rb"))) { + fd = fopen(file_name, "rb"); + if (fd == NULL) { fprintf(stderr, "%s: %s\n", file_name, gs_strerror(errno)); free(item); free(unaligned_buf); @@ -5569,6 +5586,7 @@ int main(int argc, char** argv) char* filename = NULL; char* env = NULL; const char* progname = NULL; + const char* socketpath = NULL; uint32 start_point = 0; uint32 num_block = 0; uint64 cu_offset = 0; @@ -5593,7 +5611,7 @@ int main(int argc, char** argv) setvbuf(stderr, NULL, _IONBF, 0); #endif - while ((c = getopt(argc, argv, "bf:o:t:vs:z:n:r:i:I:N:uwdS:")) != -1) { + while ((c = getopt(argc, argv, "bc:Df:o:t:vs:z:n:r:i:I:N:uwdS:")) != -1) { switch (c) { case 'f': filename = optarg; @@ -5699,6 +5717,15 @@ int main(int argc, char** argv) SegNo = (unsigned int)strtolSafe(optarg, 0); break; + case 'D': + enable_dss = true; + break; + + case 'c': + socketpath = optarg; + enable_dss = true; + break; + default: fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit(1); @@ -5721,6 +5748,16 @@ int main(int argc, char** argv) exit(1); } + if (enable_dss && socketpath == NULL) { + fprintf(stderr, "Socketpath cannot be NULL when enable dss.\n"); + exit(1); + } + + if (enable_dss && (filename[0] != '+' || strstr(filename, "/") == NULL)) { + fprintf(stderr, "Filepath should be absolutely when enable dss.\n"); + exit(1); + } + if (((start_point != 0) || (num_block != 0)) && /* only heap/index/undo/dw */ (hackingtype > HACKING_UNDO && hackingtype != HACKING_DW && hackingtype != HACKING_SEGMENT)) { @@ -5749,6 +5786,13 @@ int main(int argc, char** argv) pgdata = env; } + // dss device init + if (dss_device_init(socketpath, enable_dss) != DSS_SUCCESS) { + exit(1); + } + + initDataPathStruct(false); + // if heap relation name is given (-r), force hackingtype to be HACKING_HEAP if (PgHeapRelTupleParserCursor >= 0) { hackingtype = HACKING_HEAP; diff --git a/contrib/pg_xlogdump/CMakeLists.txt b/contrib/pg_xlogdump/CMakeLists.txt index ac48381d1..a2b68491d 100644 --- a/contrib/pg_xlogdump/CMakeLists.txt +++ b/contrib/pg_xlogdump/CMakeLists.txt @@ -27,6 +27,7 @@ execute_process( COMMAND ln -fs ${PROJECT_SRC_DIR}/gausskernel/storage/access/rmgrdesc/undologdesc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/undologdesc.cpp COMMAND ln -fs ${PROJECT_SRC_DIR}/gausskernel/storage/access/rmgrdesc/replorigindesc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/replorigindesc.cpp COMMAND ln -fs ${PROJECT_SRC_DIR}/gausskernel/storage/smgr/cfs/cfs_mddesc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cfs_mddesc.cpp + COMMAND ln -fs ${PROJECT_SRC_DIR}/gausskernel/storage/dss/dss_adaptor.cpp ${CMAKE_CURRENT_SOURCE_DIR}/dss_adaptor.cpp ) AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} TGT_xlogdump_SRC) diff --git a/contrib/pg_xlogdump/Makefile b/contrib/pg_xlogdump/Makefile index 7f32268e1..61a89991d 100644 --- a/contrib/pg_xlogdump/Makefile +++ b/contrib/pg_xlogdump/Makefile @@ -7,7 +7,8 @@ PROGRAM = pg_xlogdump OBJS = pg_xlogdump.o compat.o xlogreader.o xlogreader_common.o rmgrdesc.o \ $(top_builddir)/src/gausskernel/storage/smgr/cfs/cfs_mddesc.o \ $(top_builddir)/src/lib/pgcommon/libpgcommon.a \ - $(RMGRDESCOBJS) $(WIN32RES) + $(RMGRDESCOBJS) $(WIN32RES) \ + $(top_builddir)/src/gausskernel/storage/dss/dss_adaptor.o RMGRDESCSOURCES = $(notdir $(wildcard $(top_srcdir)/src/gausskernel/storage/access/rmgrdesc/*desc.cpp)) RMGRDESCOBJS = $(patsubst %.cpp,%.o,$(RMGRDESCSOURCES)) diff --git a/contrib/pg_xlogdump/pg_xlogdump.cpp b/contrib/pg_xlogdump/pg_xlogdump.cpp index 2cd3a237a..ca6d96c1c 100644 --- a/contrib/pg_xlogdump/pg_xlogdump.cpp +++ b/contrib/pg_xlogdump/pg_xlogdump.cpp @@ -33,6 +33,8 @@ #include "rmgrdesc.h" #include "storage/smgr/segment.h" #include "storage/page_compression.h" +#include "storage/dss/dss_adaptor.h" +#include "storage/file/fio_device.h" static const char* progname; @@ -44,6 +46,8 @@ typedef struct XLogDumpPrivate { bool endptr_reached; char* shareStorageXlogFilePath; long shareStorageXlogSize; + bool enable_dss; + char* socketpath; } XLogDumpPrivate; typedef struct XLogDumpConfig { @@ -77,7 +81,7 @@ typedef struct XLogDumpStats { } XLogDumpStats; static void XLogDumpTablePage(XLogReaderState* record, int block_id, RelFileNode rnode, BlockNumber blk); -static void XLogDumpXLogRead(const char* directory, TimeLineID timeline_id, XLogRecPtr startptr, char* buf, Size count); +static void XLogDumpXLogRead(char* directory, TimeLineID timeline_id, XLogRecPtr startptr, char* buf, Size count); static int XLogDumpReadPage(XLogReaderState* state, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetPtr, char* readBuff, TimeLineID* curFileTLI, char* xlog_path = NULL); static void XLogDumpCountRecord(XLogDumpConfig* config, XLogDumpStats* stats, XLogReaderState* record); @@ -88,6 +92,7 @@ static void XLogDumpDisplayStats(XLogDumpConfig* config, XLogDumpStats* stats); static void usage(void); static int fuzzy_open_file(const char* directory, const char* fname); +static int open_dss_file(char* directory, const char* fname); static void split_path(char* path, char** dir, char** fname); static bool verify_directory(const char* directory); static void print_rmgr_list(void); @@ -224,6 +229,21 @@ static int fuzzy_open_file(const char* directory, const char* fname) return -1; } +// for dss storage +static int open_dss_file(char* directory, const char* fname) +{ + int fd = -1; + char fpath[MAXPGPATH]; + + /* directory / fname */ + canonicalize_path(directory); + snprintf(fpath, MAXPGPATH, "%s/%s", directory, fname); + if (dss_open_file(fpath, O_RDONLY | PG_BINARY, 0, &fd) == GS_SUCCESS) { + return fd; + } + return -1; +} + /* * write the full page to disk. */ @@ -327,7 +347,7 @@ static void XLogDumpReadSharedStorage(char* directory, XLogRecPtr startptr, long * given timeline, containing the specified record pointer; store the data in * the passed buffer. */ -static void XLogDumpXLogRead(const char* directory, TimeLineID timeline_id, XLogRecPtr startptr, char* buf, Size count) +static void XLogDumpXLogRead(char* directory, TimeLineID timeline_id, XLogRecPtr startptr, char* buf, Size count) { char* p = NULL; XLogRecPtr recptr; @@ -359,7 +379,11 @@ static void XLogDumpXLogRead(const char* directory, TimeLineID timeline_id, XLog XLogFileName(fname, MAXFNAMELEN, timeline_id, sendSegNo); - sendFile = fuzzy_open_file(directory, fname); + if (is_dss_file(directory)) { + sendFile = open_dss_file(directory, fname); + } else { + sendFile = fuzzy_open_file(directory, fname); + } if (sendFile < 0) fatal_error("could not find file \"%s\": %s", fname, strerror(errno)); @@ -807,6 +831,8 @@ static void usage(void) printf(" -z, --stats show statistics instead of records\n"); printf(" -v, --verbose show detailed information\n"); printf(" -?, --help show this help, then exit\n"); + printf(" --enable-dss enable shared storage mode\n"); + printf(" --socketpath=SOCKETPATH dss connect socket file path\n"); } int main(int argc, char** argv) @@ -823,12 +849,14 @@ int main(int argc, char** argv) static struct option long_options[] = {{"bkp-details", no_argument, NULL, 'b'}, {"end", required_argument, NULL, 'e'}, + {"enable-dss", no_argument, NULL, 1}, {"follow", no_argument, NULL, 'f'}, {"help", no_argument, NULL, '?'}, {"limit", required_argument, NULL, 'n'}, {"path", required_argument, NULL, 'p'}, {"rmgr", required_argument, NULL, 'r'}, {"start", required_argument, NULL, 's'}, + {"socketpath", required_argument, NULL, 2}, {"timeline", required_argument, NULL, 't'}, {"write-fpw", no_argument, NULL, 'w'}, {"xid", required_argument, NULL, 'x'}, @@ -852,6 +880,8 @@ int main(int argc, char** argv) dumpprivate.endptr = InvalidXLogRecPtr; dumpprivate.endptr_reached = false; dumpprivate.shareStorageXlogFilePath = NULL; + dumpprivate.enable_dss = false; + dumpprivate.socketpath = NULL; const long defaultShareStorageXlogSize = 512 * 1024 * 1024 * 1024L; dumpprivate.shareStorageXlogSize = defaultShareStorageXlogSize; @@ -959,6 +989,13 @@ int main(int argc, char** argv) case 'z': config.stats = true; break; + case 1: + dumpprivate.enable_dss = true; + break; + case 2: + dumpprivate.enable_dss = true; + dumpprivate.socketpath = strdup(optarg); + break; default: goto bad_argument; } @@ -969,6 +1006,30 @@ int main(int argc, char** argv) goto bad_argument; } + // dss device init + if (dss_device_init(dumpprivate.socketpath, dumpprivate.enable_dss) != DSS_SUCCESS) { + fatal_error("failed to init dss device"); + } + + if (dumpprivate.enable_dss) { + if (dumpprivate.socketpath == NULL) { + fprintf(stderr, "%s: socketpath cannot be NULL when enable dss\n", progname); + goto bad_argument; + } + + char* directory = dumpprivate.inpath == NULL ? argv[optind] : dumpprivate.inpath; + if (directory != NULL) { + if (directory[0] != '+' || strstr(directory, "/") == NULL) { + fprintf(stderr, "%s: xlog file path should be absolutely when enable dss\n", progname); + goto bad_argument; + } + } else { + fprintf(stderr, "%s: xlog file path must be specified when enable dss\n", progname); + goto bad_argument; + } + + } + if (dumpprivate.inpath != NULL) { /* validate path points to directory */ if (!verify_directory(dumpprivate.inpath)) { @@ -998,7 +1059,12 @@ int main(int argc, char** argv) fatal_error("cannot open directory \"%s\": %s", dumpprivate.inpath, strerror(errno)); } - fd = fuzzy_open_file(dumpprivate.inpath, fname); + if (is_dss_file(dumpprivate.inpath)) { + fd = open_dss_file(dumpprivate.inpath, fname); + } else { + fd = fuzzy_open_file(dumpprivate.inpath, fname); + } + if (fd < 0) fatal_error("could not open file \"%s\"", fname); close(fd); @@ -1030,7 +1096,12 @@ int main(int argc, char** argv) /* ignore directory, already have that */ split_path(argv[optind + 1], &directory, &fname); - fd = fuzzy_open_file(dumpprivate.inpath, fname); + if (is_dss_file(dumpprivate.inpath)) { + fd = open_dss_file(dumpprivate.inpath, fname); + } else { + fd = fuzzy_open_file(dumpprivate.inpath, fname); + } + if (fd < 0) fatal_error("could not open file \"%s\"", fname); close(fd); diff --git a/doc/src/sgml/installation.sgmlin b/doc/src/sgml/installation.sgmlin index 97b027d9b..cd20ea98b 100644 --- a/doc/src/sgml/installation.sgmlin +++ b/doc/src/sgml/installation.sgmlin @@ -1186,7 +1186,7 @@ su - gaussdb the size of each individual file in the WAL log. It may be useful to adjust this size to control the granularity of WAL log shipping. The default size is 16 megabytes. - The value must be a power of 2 between 1 and 64 (megabytes). + The value must be a power of 2 between 1 and 1024 (megabytes). Note that changing this value requires an gs_initdb. diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 601bdb7a8..ec682ea4b 100755 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -193,6 +193,18 @@ endif() if("${ENABLE_MULTIPLE_NODES}" STREQUAL "OFF") install(DIRECTORY ${DCF_LIB_PATH} DESTINATION .) endif() +if(${ENABLE_MULTIPLE_NODES}_${ENABLE_PRIVATEGAUSS} STREQUAL OFF_OFF) + if(EXISTS ${DMS_LIB_PATH}) + install(DIRECTORY ${DMS_LIB_PATH} DESTINATION .) + endif() + if(EXISTS ${DSS_LIB_PATH}) + install(DIRECTORY ${DSS_LIB_PATH} DESTINATION .) + endif() + if(EXISTS ${DSS_BIN_PATH}) + install(DIRECTORY ${DSS_BIN_PATH} DESTINATION . FILE_PERMISSIONS OWNER_EXECUTE GROUP_EXECUTE WORLD_EXECUTE OWNER_READ GROUP_READ WORLD_READ OWNER_WRITE) + endif() +endif() + install(DIRECTORY ${ZSTD_LIB_PATH} DESTINATION . PATTERN "*.a" EXCLUDE) if(NOT "${ENABLE_LITE_MODE}" STREQUAL "ON") install(DIRECTORY ${LIBOBS_LIB_PATH} DESTINATION .) diff --git a/src/bin/gs_guc/cluster_guc.conf b/src/bin/gs_guc/cluster_guc.conf index 0e5443190..efaff2028 100755 --- a/src/bin/gs_guc/cluster_guc.conf +++ b/src/bin/gs_guc/cluster_guc.conf @@ -697,6 +697,22 @@ logical_decode_options_default|string|0,0|NULL|NULL| logical_sender_timeout|int|0,2147483647|ms|NULL| var_eq_const_selectivity|bool|0,0|NULL|NULL| enable_save_confirmed_lsn|bool|0,0|NULL|NULL| +ss_enable_dss|bool|0,0|NULL|NULL| +ss_dss_vg_name|string|0,0|NULL|NULL| +ss_dss_conn_path|string|0,0|NULL|NULL| +ss_enable_dms|bool|0,0|NULL|NULL| +ss_enable_catalog_centralized|bool|0,0|NULL|NULL| +ss_enable_reform|bool|0,0|NULL|NULL| +ss_enable_ssl|bool|0,0|NULL|NULL| +ss_enable_log_level|bool|0,0|NULL|NULL| +ss_interconnect_channel_count|int|1,32|NULL|NULL| +ss_work_thread_count|int|16,128|NULL|NULL| +ss_recv_msg_pool_size|int|1024,1048576|kB|NULL| +ss_interconnect_type|string|0,0|NULL|NULL| +ss_instance_id|int|0,63|NULL|NULL| +ss_interconnect_url|string|0,0|NULL|NULL| +ss_rdma_work_config|string|0,0|NULL|NULL| +ss_ock_log_path|string|0,0|NULL|NULL| [cmserver] log_dir|string|0,0|NULL|NULL| log_file_size|int|0,2047|MB|NULL| diff --git a/src/bin/gs_guc/pg_guc.cpp b/src/bin/gs_guc/pg_guc.cpp index 53a5d48f6..545f2ced2 100644 --- a/src/bin/gs_guc/pg_guc.cpp +++ b/src/bin/gs_guc/pg_guc.cpp @@ -1297,6 +1297,11 @@ do_gucset(const char *action_type, const char *data_dir) int func_status = -1; int result_status = SUCCESS; + int ss_lines_index = 0; + int ss_optvalue_off = 0; + int ss_optvalue_len = 0; + char ss_enable_dss[MAX_VALUE_LEN] = {0x00}; + FileLock filelock = {NULL, 0}; UpdateOrAddParameter updateoradd = UPDATE_PARAMETER; @@ -1324,6 +1329,13 @@ do_gucset(const char *action_type, const char *data_dir) if (NULL == opt_lines) return FAILURE; + + ss_lines_index = find_gucoption(opt_lines, "ss_enable_dss", NULL, NULL, &ss_optvalue_off, &ss_optvalue_len); + if (INVALID_LINES_IDX != ss_lines_index) { + rc = strncpy_s(ss_enable_dss, MAX_VALUE_LEN, + (opt_lines[ss_lines_index] + ss_optvalue_off), (size_t)ss_optvalue_len); + securec_check_c(rc, "\0", "\0"); + } for (i = 0; i < config_param_number; i++) { @@ -1335,6 +1347,18 @@ do_gucset(const char *action_type, const char *data_dir) return FAILURE; } + if (strncmp(ss_enable_dss, "on", strlen("on")) == 0 && + ((strncmp(config_param[i], "archive_mode", strlen("archive_mode")) == 0 && + strncmp(config_value[i], "on", strlen("on")) == 0) || + (strncmp(config_param[i], "archive_command", strlen("archive_command")) == 0 && + config_value[i] != NULL))) { + release_file_lock(&filelock); + freefile(opt_lines); + GS_FREE(tmpAZStr); + write_stderr(_("%s: Not support archive function while DMS and DSS enabled\n"), progname); + return FAILURE; + } + // only when the parameter is synchronous_standby_names, this branch can be reached. if (g_need_changed && 0 == strncmp(config_param[i], "synchronous_standby_names", strlen(config_param[i]) > strlen("synchronous_standby_names") ? strlen(config_param[i]) : strlen("synchronous_standby_names"))) { diff --git a/src/bin/initdb/CMakeLists.txt b/src/bin/initdb/CMakeLists.txt index e8c381fdb..5a5aa668e 100755 --- a/src/bin/initdb/CMakeLists.txt +++ b/src/bin/initdb/CMakeLists.txt @@ -5,6 +5,7 @@ execute_process( COMMAND ln -fs ${PROJECT_SRC_DIR}/common/backend/utils/mb/encnames.cpp ${CMAKE_CURRENT_SOURCE_DIR}/encnames.cpp COMMAND ln -fs ${PROJECT_SRC_DIR}/common/timezone/localtime.cpp ${CMAKE_CURRENT_SOURCE_DIR}/localtime.cpp COMMAND ln -fs ${PROJECT_SRC_DIR}/common/interfaces/libpq/pqsignal.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pqsignal.cpp + COMMAND ln -fs ${PROJECT_SRC_DIR}/gausskernel/storage/dss/dss_adaptor.cpp ${CMAKE_CURRENT_SOURCE_DIR}/dss_adaptor.cpp ) set(TGT_initdb_SRC @@ -13,6 +14,8 @@ set(TGT_initdb_SRC ${CMAKE_CURRENT_SOURCE_DIR}/initdb.cpp ${CMAKE_CURRENT_SOURCE_DIR}/localtime.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pqsignal.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ss_initdb.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/dss_adaptor.cpp ) set(TGT_initdb_INC diff --git a/src/bin/initdb/Makefile b/src/bin/initdb/Makefile index 07123069e..45fde1e8a 100644 --- a/src/bin/initdb/Makefile +++ b/src/bin/initdb/Makefile @@ -30,7 +30,8 @@ ifneq "$(MAKECMDGOALS)" "clean" endif endif endif -OBJS= initdb.o findtimezone.o localtime.o encnames.o pqsignal.o $(WIN32RES) $(top_builddir)/src/lib/elog/elog.a +OBJS= initdb.o findtimezone.o ss_initdb.o localtime.o encnames.o pqsignal.o $(WIN32RES) $(top_builddir)/src/lib/elog/elog.a \ + $(top_builddir)/src/gausskernel/storage/dss/dss_adaptor.o all: gs_initdb gs_initdb: $(OBJS) | submake-libpgport diff --git a/src/bin/initdb/initdb.cpp b/src/bin/initdb/initdb.cpp index 179d8eea6..e8fe4f081 100644 --- a/src/bin/initdb/initdb.cpp +++ b/src/bin/initdb/initdb.cpp @@ -59,6 +59,11 @@ #include "getopt_long.h" #include "miscadmin.h" #include "bin/elog.h" +#include "catalog/pg_control.h" +#include "storage/dss/dss_adaptor.h" +#include "ss_initdb.h" +#include "storage/smgr/smgr.h" +#include "storage/file/fio_device.h" #ifdef ENABLE_MULTIPLE_NODES #include "distribute_core.h" @@ -75,6 +80,7 @@ * Note that this macro must be the same to fd.h */ #define PG_TEMP_FILES_DIR "pgsql_tmp" +#define SS_PG_TEMP_FILES_DIR "ss_pgsql_tmp" #define RESULT_LENGTH 20 #define BUF_LENGTH 64 #define PG_CAST_CHAR_LENGTH 1024 @@ -148,6 +154,11 @@ static char* lc_time = ""; static char* lc_messages = ""; static const char* default_text_search_config = ""; static char* username = ""; +static char* vgname = ""; +static char* vgdata = ""; +static char* vglog = ""; +static char* socketpath = NULL; +static bool enable_dss = false; static bool pwprompt = false; static char* pwfilename = NULL; static const char* authmethodhost = ""; @@ -262,9 +273,9 @@ static const char* raw_backend_options = "--single " static char bin_path[MAXPGPATH]; static char backend_exec[MAXPGPATH]; -static void* pg_malloc(size_t size); +void* pg_malloc(size_t size); static char* xstrdup(const char* s); -static char** replace_token(char** lines, const char* token, const char* replacement); +char** replace_token(char** lines, const char* token, const char* replacement); #ifndef HAVE_UNIX_SOCKETS static char** filter_lines_with_token(char** lines, const char* token); @@ -277,7 +288,7 @@ static void pre_sync_fname(char *fname, bool isdir); static void fsync_fname(char *fname, bool isdir); #endif static FILE* popen_check(const char* command, const char* mode); -static void exit_nicely(void); +void exit_nicely(void); static char* get_id(void); static char* get_encoding_id(char* encoding_name); static void set_input(char** dest, const char* filename); @@ -473,7 +484,7 @@ void check_env_value(const char* input_env_value) * Note that we can't call exit_nicely() on a memory failure, as it calls * rmtree() which needs memory allocation. So we just exit with a bang. */ -static void* pg_malloc(size_t size) +void* pg_malloc(size_t size) { void* result = NULL; @@ -507,7 +518,7 @@ static char* xstrdup(const char* s) * This does most of what sed was used for in the shell script, but * doesn't need any regexp stuff. */ -static char** replace_token(char** lines, const char* token, const char* replacement) +char** replace_token(char** lines, const char* token, const char* replacement) { int numlines = 1; int i; @@ -802,7 +813,7 @@ pre_sync_fname(char *fname, bool isdir) #if defined(HAVE_SYNC_FILE_RANGE) || (defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)) int fd; - fd = open(fname, O_RDONLY | PG_BINARY); + fd = open(fname, O_RDONLY | PG_BINARY, 0); /* * Some OSs don't allow us to open directories at all (Windows returns @@ -851,9 +862,9 @@ fsync_fname(char *fname, bool isdir) * cases here */ if (!isdir) - fd = open(fname, O_RDWR | PG_BINARY); + fd = open(fname, O_RDWR | PG_BINARY, 0); else - fd = open(fname, O_RDONLY | PG_BINARY); + fd = open(fname, O_RDONLY | PG_BINARY, 0); /* * Some OSs don't allow us to open directories at all (Windows returns @@ -906,11 +917,42 @@ static FILE* popen_check(const char* command, const char* mode) return cmdfd; } +static void rm_dss_dir(char *path) +{ + char filepath[MAXPGPATH]; + dirent *ent = NULL; + struct stat statbuf; + DIR *dir = opendir(path); + if (dir == NULL) { + return; + } + + while ((ent = gs_readdir(dir)) != NULL) { + if (strcmp(ent->d_name, ".") != 0 && strcmp(ent->d_name, "..") != 0 && + strcmp(ent->d_name, ".recycle") != 0) { + int nRet = snprintf_s(filepath, MAXPGPATH, MAXPGPATH - 1, "%s/%s", path, ent->d_name); + securec_check_ss_c(nRet, "\0", "\0"); + + if (lstat(filepath, &statbuf) != 0) { + continue; + } + + if (S_ISDIR(statbuf.st_mode)) { + rm_dss_dir(filepath); + } else { + (void)unlink(filepath); + } + } + } + + (void)closedir(dir); +} + /* * clean up any files we created on failure * if we created the data directory remove it too */ -static void exit_nicely(void) +void exit_nicely(void) { if (!noclean) { if (made_new_pgdata) { @@ -932,6 +974,17 @@ static void exit_nicely(void) if (!rmtree(xlog_dir, false)) write_stderr(_("%s: failed to remove contents of transaction log directory\n"), progname); } + + if (enable_dss) { + write_stderr(_("%s: removing dss directory \"%s\"\n"), progname, vgname); + if (strlen(vgdata) > 0) { + rm_dss_dir(vgdata); + } + if (strlen(vglog) > 0) { + rm_dss_dir(vglog); + } + } + /* otherwise died during startup, do nothing! */ } else { if (made_new_pgdata || found_existing_pgdata) @@ -1228,6 +1281,35 @@ static void CreatePGDefaultTempDir() exit_nicely(); } FREE_AND_RESET(path); + + /* Create specific pgsql_tmp dir in sharedstorage mode + * + * Files in pgsql_tmp are temporary and may be big in some complicated + * condition, so it's best to storeage it in file system. Therefore we + * will create a specific dir in PGDATA when we enable dss mode. The + * path can not be customized now, but we may remove this limit in the + * future. + */ + if (enable_dss) { + size_t length = strlen(pg_data) + strlen(SS_PG_TEMP_FILES_DIR) + 13; + char *ss_path = (char*)pg_malloc(length); + rc = sprintf_s(ss_path, length, + "%s/%s", + pg_data, + SS_PG_TEMP_FILES_DIR); + securec_check_ss_c(rc, ss_path, "\0"); + + if (mkdir(ss_path, S_IRWXU) < 0) { + char errBuffer[ERROR_LIMIT_LEN]; + write_stderr(_("%s: could not mkdir \"%s\": %s\n"), + progname, + ss_path, + pqStrerror(errno, errBuffer, ERROR_LIMIT_LEN)); + FREE_AND_RESET(ss_path); + exit_nicely(); + } + FREE_AND_RESET(ss_path); + } } /* @@ -1280,13 +1362,16 @@ static void test_config_settings(void) static const int trial_conns[] = {100, 50, 40, 30, 20, 10}; static const int trial_bufs[] = { - 4096, 3584, 3072, 2560, 2048, 1536, 1000, 900, 800, 700, 600, 500, 400, 300, 200, 100, 50}; + 131072, 4096, 3584, 3072, 2560, 2048, 1536, 1000, 900, 800, 700, 600, 500, 400, 300, 200, 100, 50}; char cmd[MAXPGPATH]; const int connslen = sizeof(trial_conns) / sizeof(int); const int bufslen = sizeof(trial_bufs) / sizeof(int); int nRet = 0; int i, status, test_conns, test_buffs, ok_buffers = 0; + char** conflines; + char path[MAXPGPATH]; + char repltok[TZ_STRLEN_MAX + 100]; printf(_("selecting default max_connections ... ")); (void)fflush(stdout); @@ -1323,6 +1408,15 @@ static void test_config_settings(void) printf("%d\n", n_connections); + nRet = sprintf_s(path, sizeof(path), "%s/postgresql.conf", pg_data); + securec_check_ss_c(nRet, "\0", "\0"); + + /* read postgresql.conf in pg_data, so the config setting can be retained */ + conflines = readfile(path); + nRet = sprintf_s(repltok, sizeof(repltok), "max_connections = %d", n_connections); + securec_check_ss_c(nRet, "\0", "\0"); + conflines = replace_token(conflines, "#max_connections = 100", repltok); + printf(_("selecting default shared_buffers ... ")); (void)fflush(stdout); @@ -1354,10 +1448,19 @@ static void test_config_settings(void) } n_buffers = test_buffs; - if ((n_buffers * (BLCKSZ / 1024)) % 1024 == 0) + if ((n_buffers * (BLCKSZ / 1024)) % 1024 == 0) { printf("%dMB\n", (n_buffers * (BLCKSZ / 1024)) / 1024); - else + nRet = sprintf_s(repltok, sizeof(repltok), "shared_buffers = %dMB", (n_buffers * (BLCKSZ / 1024)) / 1024); + } else { printf("%dkB\n", n_buffers * (BLCKSZ / 1024)); + nRet = sprintf_s(repltok, sizeof(repltok), "shared_buffers = %dkB", n_buffers * (BLCKSZ / 1024)); + } + securec_check_ss_c(nRet, "\0", "\0"); + conflines = replace_token(conflines, "#shared_buffers = 32MB", repltok); + + writefile(path, conflines); + (void)chmod(path, S_IRUSR | S_IWUSR); + FREE_AND_RESET(conflines); } /* @@ -1378,25 +1481,14 @@ static void setup_config(void) char* buf_default_text_search_config = NULL; char* buf_nodename = NULL; char* buf_default_timezone = NULL; + char* buf_socketpath = NULL; fputs(_("creating configuration files ... "), stdout); (void)fflush(stdout); /* postgresql.conf */ - conflines = readfile(conf_file); - nRet = sprintf_s(repltok, sizeof(repltok), "max_connections = %d", n_connections); - securec_check_ss_c(nRet, "\0", "\0"); - conflines = replace_token(conflines, "#max_connections = 100", repltok); - - if ((n_buffers * (BLCKSZ / 1024)) % 1024 == 0) - nRet = sprintf_s(repltok, sizeof(repltok), "shared_buffers = %dMB", (n_buffers * (BLCKSZ / 1024)) / 1024); - else - nRet = sprintf_s(repltok, sizeof(repltok), "shared_buffers = %dkB", n_buffers * (BLCKSZ / 1024)); - securec_check_ss_c(nRet, "\0", "\0"); - conflines = replace_token(conflines, "#shared_buffers = 32MB", repltok); - #if DEF_PGPORT != 5432 nRet = sprintf_s(repltok, sizeof(repltok), "#port = %d", DEF_PGPORT); securec_check_ss_c(nRet, "\0", "\0"); @@ -1481,6 +1573,28 @@ static void setup_config(void) FREE_AND_RESET(buf_xlog_file_path); } + if (strlen(vgdata) != 0) { + nRet = sprintf_s(repltok, sizeof(repltok), "ss_dss_vg_name = '%s'", vgdata); + securec_check_ss_c(nRet, "\0", "\0"); + conflines = replace_token(conflines, "#ss_dss_vg_name = ''", repltok); + } + + if (socketpath != NULL) { + buf_socketpath = escape_quotes(socketpath); + nRet = sprintf_s(repltok, sizeof(repltok), "ss_dss_conn_path = '%s'", buf_socketpath); + securec_check_ss_c(nRet, "\0", "\0"); + conflines = replace_token(conflines, "#ss_dss_conn_path = ''", repltok); + FREE_AND_RESET(buf_socketpath); + } + + if (enable_dss) { + nRet = strcpy_s(repltok, sizeof(repltok), "ss_enable_dss = on"); + securec_check_c(nRet, "\0", "\0"); + conflines = replace_token(conflines, "#ss_enable_dss = off", repltok); + + conflines = ss_addnodeparmater(conflines); + } + nRet = sprintf_s(path, sizeof(path), "%s/postgresql.conf", pg_data); securec_check_ss_c(nRet, "\0", "\0"); @@ -1722,9 +1836,11 @@ static void bootstrap_template1(void) securec_check_ss_c(nRet, "\0", "\0"); PG_CMD_OPEN; - + for (line = bki_lines; *line != NULL; line++) { - PG_CMD_PUTS(*line); + if (ss_need_mkclusterdir) { + PG_CMD_PUTS(*line); + } FREE_AND_RESET(*line); } @@ -3681,6 +3797,7 @@ static void usage(const char* prog_name) #ifndef ENABLE_MULTIPLE_NODES printf(_(" -c, --enable-dcf enable DCF mode\n")); #endif + printf(_(" --enable-dss enable shared storage mode\n")); printf(_(" [-D, --pgdata=]DATADIR location for this database cluster\n")); #ifdef ENABLE_MULTIPLE_NODES printf(_(" --nodename=NODENAME name of openGauss node initialized\n")); @@ -3688,6 +3805,9 @@ static void usage(const char* prog_name) #else printf(_(" --nodename=NODENAME name of single node initialized\n")); #endif + printf(_(" --vgname=VGNAME name of dss volume group\n")); + printf(_(" --socketpath=SOCKETPATH\n" + " dss connect socket file path\n")); printf(_(" -E, --encoding=ENCODING set default encoding for new databases\n")); printf(_(" --locale=LOCALE set default locale for new databases\n")); printf(_(" --dbcompatibility=DBCOMPATIBILITY set default dbcompatibility for new database\n")); @@ -3771,6 +3891,36 @@ static bool is_file_exist(const char* path) return isExist; } +static void parse_vgname_args(char* args) +{ + vgname = xstrdup(args); + enable_dss = true; + if (strstr(vgname, "/") != NULL) { + fprintf(stderr, "invalid token \"/\" in vgname"); + exit(1); + } + + char *comma = strstr(vgname, ","); + if (comma == NULL) { + vgdata = vgname; + vglog = (char *)""; + return; + } + + vgdata = xstrdup(vgname); + comma = strstr(vgdata, ","); + comma[0] = '\0'; + vglog = comma + 1; + if (strstr(vgdata, ",") != NULL) { + fprintf(stderr, "invalid vgname args, should be two volume group names, example: \"+data,+log\""); + exit(1); + } + if (strstr(vglog, ",") != NULL) { + fprintf(stderr, "invalid vgname args, should be two volume group names, example: \"+data,+log\""); + exit(1); + } +} + int main(int argc, char* argv[]) { /* @@ -3815,6 +3965,10 @@ int main(int argc, char* argv[]) #endif {"dbcompatibility", required_argument, NULL, 13}, {"bucketlength", required_argument, NULL, 14}, + {"vgname", required_argument, NULL, 15}, + {"socketpath", required_argument, NULL, 16}, + {"enable-dss", no_argument, NULL, 17}, + {"dms_url", required_argument, NULL, 18}, {NULL, 0, NULL, 0}}; int c, i, ret; @@ -3886,9 +4040,9 @@ int main(int argc, char* argv[]) /* process command-line options */ #ifdef ENABLE_LITE_MODE - while ((c = getopt_long(argc, argv, "cdD:E:L:nNU:WA:SsT:X:C:w:H:g:", long_options, &option_index)) != -1) { + while ((c = getopt_long(argc, argv, "cdD:E:L:nNU:WA:SsT:X:C:w:H:g:I:", long_options, &option_index)) != -1) { #else - while ((c = getopt_long(argc, argv, "cdD:E:L:nU:WA:SsT:X:C:w:H:g:", long_options, &option_index)) != -1) { + while ((c = getopt_long(argc, argv, "cdD:E:L:nU:WA:SsT:X:C:w:H:g:I:", long_options, &option_index)) != -1) { #endif #define FREE_NOT_STATIC_ZERO_STRING(s) \ do { \ @@ -4073,6 +4227,14 @@ int main(int argc, char* argv[]) check_input_spec_char(optarg); host_ip = xstrdup(optarg); break; + case 'I': + if (atoi(optarg) < MIN_INSTANCEID || atoi(optarg) > MAX_INSTANCEID) { + write_stderr(_("unexpected node id specified, valid range is %d - %d.\n"), + MIN_INSTANCEID, MAX_INSTANCEID); + exit(1); + } + ss_nodeid = atoi(optarg); + break; #ifdef PGXC case 12: FREE_NOT_STATIC_ZERO_STRING(nodename); @@ -4093,7 +4255,25 @@ int main(int argc, char* argv[]) } g_bucket_len = atoi(optarg); break; - + case 15: + FREE_NOT_STATIC_ZERO_STRING(vgname); + FREE_NOT_STATIC_ZERO_STRING(vgdata); + FREE_NOT_STATIC_ZERO_STRING(vglog); + parse_vgname_args(optarg); + break; + case 16: + FREE_NOT_STATIC_ZERO_STRING(socketpath); + socketpath = xstrdup(optarg); + enable_dss = true; + break; + case 17: + enable_dss = true; + break; + case 18: + FREE_NOT_STATIC_ZERO_STRING(ss_nodedatainfo); + check_input_spec_char(optarg); + ss_nodedatainfo = xstrdup(optarg); + break; default: /* getopt_long already emitted a complaint */ write_stderr(_("Try \"%s --help\" for more information.\n"), progname); @@ -4101,7 +4281,10 @@ int main(int argc, char* argv[]) } #undef FREE_NOT_STATIC_ZERO_STRING } - + + /* check nodedata.cfg and node_id */ + ss_issharedstorage = ss_check_nodedatainfo(); + if (default_text_search_config_tmp != NULL) default_text_search_config = default_text_search_config_tmp; if (authmethodhost_tmp != NULL) @@ -4460,6 +4643,12 @@ int main(int argc, char* argv[]) (void)umask(S_IRWXG | S_IRWXO); + // dss device init + if (dss_device_init(socketpath, enable_dss) != DSS_SUCCESS) { + write_stderr(_("failed to init dss device")); + exit_nicely(); + } + // log output redirect init_log(PROG_NAME); @@ -4577,10 +4766,21 @@ int main(int argc, char* argv[]) } } + if (ss_issharedstorage && (ss_check_shareddir(vgdata))) { + ss_need_mkclusterdir = false; + } + /* Create transaction log symlink, if required */ if (strcmp(xlog_dir, "") != 0) { char linkloc[MAXPGPATH] = {'\0'}; + /* check if specify xlog directory in shared storage mode */ + if (enable_dss) { + write_stderr(_("%s: can not specify transaction log directory " + "location in shared storage mode\n"), progname); + exit_nicely(); + } + /* clean up xlog directory name, check it's absolute */ canonicalize_path(xlog_dir); if (!is_absolute_path(xlog_dir)) { @@ -4664,46 +4864,50 @@ int main(int argc, char* argv[]) exit_nicely(); #endif } + + if (enable_dss && ss_issharedstorage) { + ss_mkdirdir(ss_nodeid, pg_data, vgdata, vglog, ss_need_mkclusterdir); + } else { + /* Create required subdirectories */ + printf(_("creating subdirectories ... in ordinary occasion")); + (void)fflush(stdout); - /* Create required subdirectories */ - printf(_("creating subdirectories ... ")); - (void)fflush(stdout); + for (i = 0; (unsigned int)(i) < lengthof(subdirs); i++) { + char* path = NULL; + errno_t sret = 0; + size_t len = 0; - for (i = 0; (unsigned int)(i) < lengthof(subdirs); i++) { - char* path = NULL; - errno_t sret = 0; - size_t len = 0; + /* + * -X means using user define xlog directory, and will create symbolic pg_xlog + * under pg_data directory, So no need to create these sub-directories again. + */ + if (pg_strcasecmp(xlog_dir, "") != 0 && (pg_strcasecmp(subdirs[i], "pg_xlog") == 0)) { + continue; + } - /* - * -X means using user define xlog directory, and will create symbolic pg_xlog - * under pg_data directory, So no need to create these sub-directories again. - */ - if (pg_strcasecmp(xlog_dir, "") != 0 && (pg_strcasecmp(subdirs[i], "pg_xlog") == 0)) { - continue; - } + len = strlen(pg_data) + strlen(subdirs[i]) + 2; + path = (char*)pg_malloc(len); - len = strlen(pg_data) + strlen(subdirs[i]) + 2; - path = (char*)pg_malloc(len); + sret = sprintf_s(path, len, "%s/%s", pg_data, subdirs[i]); + securec_check_ss_c(sret, path, "\0"); - sret = sprintf_s(path, len, "%s/%s", pg_data, subdirs[i]); - securec_check_ss_c(sret, path, "\0"); - - /* - * The parent directory already exists, so we only need mkdir() not - * pg_mkdir_p() here, which avoids some failure modes; cf bug #13853. - */ - if (mkdir(path, S_IRWXU) < 0) { - char errBuffer[ERROR_LIMIT_LEN]; - fprintf(stderr, - _("%s: could not create directory \"%s\": %s\n"), - progname, - path, - pqStrerror(errno, errBuffer, ERROR_LIMIT_LEN)); + /* + * The parent directory already exists, so we only need mkdir() not + * pg_mkdir_p() here, which avoids some failure modes; cf bug #13853. + */ + if (mkdir(path, S_IRWXU) < 0) { + char errBuffer[ERROR_LIMIT_LEN]; + fprintf(stderr, + _("%s: could not create directory \"%s\": %s\n"), + progname, + path, + pqStrerror(errno, errBuffer, ERROR_LIMIT_LEN)); + FREE_AND_RESET(path); + exit_nicely(); + } FREE_AND_RESET(path); - exit_nicely(); } - FREE_AND_RESET(path); } if (strlen(new_xlog_file_path) == 0) { @@ -4738,87 +4942,103 @@ int main(int argc, char* argv[]) } } } - - /* create or check pg_location path */ - mkdirForPgLocationDir(); - check_ok(); + + if (enable_dss) { + size_t total_len = strlen(boot_options) + BUF_LENGTH; + char *options = (char*)pg_malloc(total_len); + errno_t sret = sprintf_s(options, total_len, "%s -c segment_buffers=128MB -G", boot_options); + securec_check_ss_c(sret, options, "\0"); + boot_options = options; + + total_len = strlen(backend_options) + BUF_LENGTH; + options = (char*)pg_malloc(total_len); + sret = sprintf_s(options, total_len, "%s -c segment_buffers=128MB -G", backend_options); + securec_check_ss_c(sret, options, "\0"); + backend_options = options; + } + + if (ss_need_mkclusterdir) { + /* create or check pg_location path */ + mkdirForPgLocationDir(); + check_ok(); + } /* Top level PG_VERSION is checked by bootstrapper, so make it first */ write_version_file(NULL); create_pg_lockfiles(); - /* Select suitable configuration settings */ + /* Create all the text config files and select suitable configuration setting */ set_null_conf(); - test_config_settings(); - - /* Now create all the text config files */ setup_config(); + test_config_settings(); /* Init undo subsystem meta. */ InitUndoSubsystemMeta(); /* Bootstrap template1 */ bootstrap_template1(); + + if (ss_need_mkclusterdir) { + /* + * Make the per-database PG_VERSION for template1 only after init'ing it + */ + write_version_file("base/1"); - /* - * Make the per-database PG_VERSION for template1 only after init'ing it - */ - write_version_file("base/1"); + CreatePGDefaultTempDir(); - CreatePGDefaultTempDir(); + /* Create the stuff we don't need to use bootstrap mode for */ - /* Create the stuff we don't need to use bootstrap mode for */ + setup_auth(); + get_set_pwd(); - setup_auth(); - get_set_pwd(); - - setup_depend(); - load_plpgsql(); - setup_sysviews(); + setup_depend(); + load_plpgsql(); + setup_sysviews(); #ifdef ENABLE_PRIVATEGAUSS - setup_privsysviews(); + setup_privsysviews(); #endif - setup_perfviews(); + setup_perfviews(); #ifdef PGXC - /* Initialize catalog information about the node self */ - setup_nodeself(); + /* Initialize catalog information about the node self */ + setup_nodeself(); #endif - setup_description(); + setup_description(); - setup_collation(); + setup_collation(); - setup_conversion(); + setup_conversion(); - setup_dictionary(); + setup_dictionary(); - setup_privileges(); + setup_privileges(); - setup_bucketmap_len(); + setup_bucketmap_len(); - setup_schema(); + setup_schema(); - load_supported_extension(); + load_supported_extension(); - setup_update(); + setup_update(); #ifndef ENABLE_MULTIPLE_NODES - setup_snapshots(); + setup_snapshots(); #endif - vacuum_db(); + vacuum_db(); - make_template0(); + make_template0(); - make_postgres(); + make_postgres(); #ifdef PGXC - vacuumfreeze("template0"); - vacuumfreeze("template1"); - vacuumfreeze("postgres"); + vacuumfreeze("template0"); + vacuumfreeze("template1"); + vacuumfreeze("postgres"); #endif + } #ifdef ENABLE_LITE_MODE if (do_sync) @@ -4907,10 +5127,10 @@ int main(int argc, char* argv[]) static bool isDirectory(const char* basepath, const char* name) { - struct stat buf; char* path = NULL; int nRet = 0; size_t len = 0; + struct stat buf; len = strlen(basepath) + strlen(name) + 2; path = (char*)pg_malloc(len); @@ -4946,7 +5166,7 @@ static bool isMountDirCorrect(const char* basepath, const char* name) securec_check_ss_c(nRet, path, "\0"); if ((chk_mount_dir = opendir(path)) != NULL) { - while ((de_mount = gs_readdir(chk_mount_dir)) != NULL) { + while ((de_mount = readdir(chk_mount_dir)) != NULL) { if (strcmp(".", de_mount->d_name) == 0 || strcmp("..", de_mount->d_name) == 0) { /* skip this and parent directory */ continue; @@ -4977,17 +5197,22 @@ static void mkdirForPgLocationDir() char* path = NULL; int nRet = 0; size_t len = 0; + char* datadir = pg_data; - len = strlen(pg_data) + strlen("pg_location") + 2; + if (enable_dss) { + datadir = vgdata; + } + + len = strlen(datadir) + 1 + strlen("pg_location") + 1; path = (char*)pg_malloc(len); - nRet = sprintf_s(path, len, "%s/pg_location", pg_data); + nRet = sprintf_s(path, len, "%s/pg_location", datadir); securec_check_ss_c(nRet, "\0", "\0"); switch (pg_check_dir(path)) { case 0: /* directory not there, must create it */ - if (pg_mkdir_p(path, S_IRWXU) != 0) { + if (mkdir(path, S_IRWXU) < 0) { char errBuffer[ERROR_LIMIT_LEN]; write_stderr(_("%s: could not create directory \"%s\": %s\n"), progname, @@ -5001,7 +5226,7 @@ static void mkdirForPgLocationDir() case 1: /* Present but empty, fix permissions and use it */ - if (chmod(path, S_IRWXU) != 0) { + if (is_dss_file(path) != 0 && chmod(path, S_IRWXU) != 0) { char errBuffer[ERROR_LIMIT_LEN]; write_stderr(_("%s: could not change permissions of directory \"%s\": %s\n"), progname, @@ -5016,7 +5241,7 @@ static void mkdirForPgLocationDir() case 2: /* Present and not empty */ { - DIR* chk_pg_location_dir = NULL; + DIR *chk_pg_location_dir = NULL; struct dirent* de_pg_location = NULL; if ((chk_pg_location_dir = opendir(path)) != NULL) { diff --git a/src/bin/initdb/ss_initdb.cpp b/src/bin/initdb/ss_initdb.cpp new file mode 100644 index 000000000..4b0994a01 --- /dev/null +++ b/src/bin/initdb/ss_initdb.cpp @@ -0,0 +1,363 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * ss_initdb.cpp + * + * + * IDENTIFICATION + * src/bin/initdb/ss_initdb.cpp + * + * --------------------------------------------------------------------------------------- + */ + +#include "postgres_fe.h" + +#include + +#include "access/ustore/undo/knl_uundoapi.h" +#include "access/ustore/undo/knl_uundotxn.h" +#include "libpq/pqsignal.h" +#include "mb/pg_wchar.h" +#include "getaddrinfo.h" +#include "getopt_long.h" +#include "miscadmin.h" +#include "bin/elog.h" +#include "ss_initdb.h" +#include "storage/file/fio_device.h" + + +static const char* ss_clusterdirs[] = {"+global", + "+base", + "+pg_tblspc", + "+pg_clog", + "+pg_csnlog", + "+pg_multixact", + "+pg_multixact/members", + "+pg_multixact/offsets", + "+pg_twophase", + "+pg_serial"}; + +static const char* ss_instancedirs[] = {"+pg_xlog", + "+pg_doublewrite" + }; + +static const char* ss_instanceowndirs[] = {"base", + "base/1", + "global", + "pg_xlog", + "pg_xlog/archive_status", + "undo", + "pg_replslot", + "pg_stat_tmp", + "pg_errorinfo", + "pg_logical", + "pg_llog", + "pg_llog/snapshots", + "pg_llog/mappings", + "pg_clog", + "pg_notify", + "pg_csnlog", + "pg_multixact", + "pg_multixact/members", + "pg_multixact/offsets", + "pg_snapshots"}; + +static const char* ss_xlogsubdirs[] = {"archive_status"}; + +/* num of every directory type */ +#define SS_CLUSTERDIRS_NUM ARRAY_NUM(ss_clusterdirs) +#define SS_INSTANCEDIRS_NUM ARRAY_NUM(ss_instancedirs) +#define SS_INSTANCEOENDIRS_NUM ARRAY_NUM(ss_instanceowndirs) +#define SS_XLOGSUBIRS_NUM ARRAY_NUM(ss_xlogsubdirs) + +char *ss_nodedatainfo = NULL; +int32 ss_nodeid = INVALID_INSTANCEID; +bool ss_issharedstorage = false; +bool ss_need_mkclusterdir = true; +static const char *ss_progname = "ss_initdb"; + +/* + * pg_ltoa: converts a signed 32-bit integer to its string representation + * + * Caller must ensure that 'a' points to enough memory to hold the result + * (at least 12 bytes, counting a leading sign and trailing NUL). + */ +void pg_ltoa(int32 value, char *a) +{ + char *start = a; + bool neg = false; + errno_t ss_rc; + + if (a == NULL) { + return; + } + + /* + * Avoid problems with the most negative integer not being representable + * as a positive integer. + */ + if (value == (-2147483647 - 1)) { + const int a_len = 12; + ss_rc = memcpy_s(a, a_len, "-2147483648", a_len); + securec_check(ss_rc, "\0", "\0"); + return; + } else if (value < 0) { + value = -value; + neg = true; + } + + /* Compute the result string backwards. */ + do { + int32 remainder; + int32 oldval = value; + + value /= 10; + remainder = oldval - value * 10; + *a++ = (char)('0' + remainder); + } while (value != 0); + + if (neg) { + *a++ = '-'; + } + + /* Add trailing NUL byte, and back up 'a' to the last character. */ + *a-- = '\0'; + + /* Reverse string. */ + while (start < a) { + char swap = *start; + + *start++ = *a; + *a-- = swap; + } +} + +/* check dms url when gs_initdb */ +bool ss_check_nodedatainfo() +{ + bool issharedstorage = false; + + if ((ss_nodeid == INVALID_INSTANCEID && ss_nodedatainfo != NULL) || + (ss_nodeid != INVALID_INSTANCEID && ss_nodedatainfo == NULL)) { + issharedstorage = false; + fprintf(stderr, _("ss_nodeid is invalid or nodedatainfo file not exist or nodedatainfo file is empty.\n")); + exit(1); + } + + if (ss_nodeid != INVALID_INSTANCEID && ss_nodedatainfo != NULL) { + issharedstorage = true; + } + + return issharedstorage; +} + +bool ss_check_existclusterdir(const char *path) +{ + for (uint32 i = 0; i < SS_CLUSTERDIRS_NUM; i++) { + if (strcmp(ss_clusterdirs[i] + 1, path) == 0) { + /* skip this and parent directory */ + return true; + } + } + return false; +} + +bool ss_check_shareddir(char *path) +{ + char *datadir = path; + DIR *chk_pg_data_dir = NULL; + struct dirent *file = NULL; + + if ((chk_pg_data_dir = opendir(datadir)) != NULL) { + while ((file = readdir(chk_pg_data_dir)) != NULL) { + if (strcmp(".", file->d_name) == 0 || strcmp("..", file->d_name) == 0) { + /* skip this and parent directory */ + continue; + } else if (ss_check_existclusterdir(file->d_name)) { + (void)closedir(chk_pg_data_dir); + return true; + } + } + (void)closedir(chk_pg_data_dir); + } + + return false; +} + +void ss_mkdirdir(int32 node_id, const char *pg_data, const char *vgdata_dir, const char *vglog_dir, + bool need_mkclusterdir) +{ + if (node_id == 0 && !need_mkclusterdir) { + printf(_("The dss file needs to be cleared before node 0 init db.\n")); + (void)fflush(stdout); + exit_nicely(); + } + + /* Create required subdirectories */ + printf(_("creating subdirectories ... in shared storage mode ... ")); + (void)fflush(stdout); + + /* unshared and instance one copy */ + ss_createdir(ss_instanceowndirs, SS_INSTANCEOENDIRS_NUM, INVALID_INSTANCEID, pg_data, vgdata_dir, vglog_dir); + + /* shared and instance one copy */ + ss_createdir(ss_instancedirs, SS_INSTANCEDIRS_NUM, node_id, pg_data, vgdata_dir, vglog_dir); + + /* shared and cluster one copy */ + if (need_mkclusterdir) { + ss_createdir(ss_clusterdirs, SS_CLUSTERDIRS_NUM, INVALID_INSTANCEID, pg_data, vgdata_dir, vglog_dir); + } +} + +void ss_makedirectory(char *path) +{ + /* + * The parent directory already exists, so we only need mkdir() not + * pg_mkdir_p() here, which avoids some failure modes; cf bug #13853. + */ + if (mkdir(path, S_IRWXU) < 0) { + char errBuffer[ERROR_LIMIT_LEN]; + fprintf(stderr, _("%s: could not create directory \"%s\": %s\n"), ss_progname, path, + pqStrerror(errno, errBuffer, ERROR_LIMIT_LEN)); + (void)fflush(stdout); + exit_nicely(); + } +} + +void ss_makesubdir(char *path, const char **subdir, uint num) +{ + size_t len = 0; + errno_t sret = 0; + + for (int i = 0; (unsigned int)i < num; i++) { + len = strlen(path) + strlen(subdir[i]) + 1 + 1; + char *subpath = NULL; + subpath = (char *)pg_malloc(len); + sret = sprintf_s(subpath, len, "%s/%s", path, subdir[i]); + securec_check_ss_c(sret, subpath, "\0"); + ss_makedirectory(subpath); + FREE_AND_RESET(subpath); + } +} + +static char *ss_concat_path(int32 node_id, const char *parent_dir, const char *dir) +{ + char *path = NULL; + char *prepath = NULL; + char nodeid_str[MAXPGPATH]; + size_t len = strlen(parent_dir) + 2 + strlen(dir); + + /* prepared path by connecting vgname and subdir */ + prepath = (char *)pg_malloc(len); + errno_t sret = sprintf_s(prepath, len, "%s/%s", parent_dir, dir + 1); + securec_check_ss_c(sret, prepath, "\0"); + + if (node_id != INVALID_INSTANCEID) { + pg_ltoa(node_id, nodeid_str); + len = len + strlen(nodeid_str); + path = (char *)pg_malloc(len); + + /* full path by connecting prepared path and node id */ + sret = sprintf_s(path, len, "%s%d", prepath, node_id); + securec_check_ss_c(sret, path, "\0"); + } else { + path = (char *)pg_malloc(len); + sret = sprintf_s(path, len, "%s", prepath); + securec_check_ss_c(sret, path, "\0"); + } + + FREE_AND_RESET(prepath); + return path; +} + +void ss_createdir(const char **ss_dirs, int32 num, int32 node_id, const char *pg_data, const char *vgdata_dir, + const char *vglog_dir) +{ + int i; + for (i = 0; i < num; i++) { + char *path = NULL; + errno_t sret = 0; + size_t len = 0; + bool is_dss = is_dss_file(ss_dirs[i]); + bool is_xlog = false; + char *link_path = NULL; + + if (vglog_dir[0] != '\0' && (pg_strcasecmp(ss_dirs[i], "+pg_xlog") == 0 || + pg_strcasecmp(ss_dirs[i], "+pg_doublewrite") == 0 || + pg_strcasecmp(ss_dirs[i], "+pg_notify") == 0 || + pg_strcasecmp(ss_dirs[i], "+pg_snapshots") == 0)) { + is_xlog = true; + } + + if (is_dss) { + if (is_xlog) { + path = ss_concat_path(node_id, vglog_dir, ss_dirs[i]); + link_path = ss_concat_path(node_id, vgdata_dir, ss_dirs[i]); + } else { + path = ss_concat_path(node_id, vgdata_dir, ss_dirs[i]); + } + } else { + len = strlen(pg_data) + strlen(ss_dirs[i]) + 1 + 1; + path = (char *)pg_malloc(len); + sret = sprintf_s(path, len, "%s/%s", pg_data, ss_dirs[i]); + securec_check_ss_c(sret, path, "\0"); + } + + ss_makedirectory(path); + if (is_xlog) { + symlink(path, link_path); + } + + /* create suddirectory of pg_xlog/pg_multixact/pg_llog */ + if (is_dss && pg_strcasecmp(ss_dirs[i] + 1, "pg_xlog") == 0) { + ss_makesubdir(path, ss_xlogsubdirs, SS_XLOGSUBIRS_NUM); + } + + FREE_AND_RESET(path); + FREE_AND_RESET(link_path); + } +} + +/* ss_addnodeparmater + * function: add the extra parameter for share storage for dms during gs_Initdb + * input: conflines char** parameter of postgresql.conf had been added previuosly + * output: conflines char** parameter of postgresql.conf to be added in this function + */ +char **ss_addnodeparmater(char **conflines) +{ + if (!ss_issharedstorage) { + return conflines; + } + + int nRet = 0; + char repltok[TZ_STRLEN_MAX + 100]; + + fputs(_("adding dms parameters to configuration files ... "), stdout); + (void)fflush(stdout); + + nRet = sprintf_s(repltok, sizeof(repltok), "ss_instance_id = %d", ss_nodeid); + securec_check_ss_c(nRet, "\0", "\0"); + conflines = replace_token(conflines, "#ss_instance_id = 0", repltok); + + nRet = strcpy_s(repltok, sizeof(repltok), "ss_enable_dms = on"); + securec_check_ss_c(nRet, "\0", "\0"); + conflines = replace_token(conflines, "#ss_enable_dms = off", repltok); + + nRet = sprintf_s(repltok, sizeof(repltok), "ss_interconnect_url = '%s'", ss_nodedatainfo); + securec_check_ss_c(nRet, "\0", "\0"); + conflines = replace_token(conflines, "#ss_interconnect_url = '0:127.0.0.1:1611'", repltok); + + return conflines; +} diff --git a/src/bin/initdb/ss_initdb.h b/src/bin/initdb/ss_initdb.h new file mode 100644 index 000000000..45b56bb59 --- /dev/null +++ b/src/bin/initdb/ss_initdb.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * ss_initdb.h + * + * + * IDENTIFICATION + * src/bin/initdb/ss_initdb.h + * + * --------------------------------------------------------------------------------------- + */ + +#ifndef SS_INITDB_H_ +#define SS_INITTDB_H_ + +#ifndef ERROR_LIMIT_LEN +#define ERROR_LIMIT_LEN 256 +#endif + +#define ARRAY_NUM(a) (sizeof(a) / sizeof((a)[0])) + +extern char **replace_token(char **lines, const char *token, const char *replacement); +extern void exit_nicely(void); +extern void *pg_malloc(size_t size); + +extern char* ss_nodedatainfo; +extern int32 ss_nodeid; + +extern bool ss_issharedstorage; +extern bool ss_need_mkclusterdir; + +/* check dms url when gs_initdb */ +extern bool ss_check_nodedatainfo(); +extern bool ss_check_existclusterdir(const char* path); +extern bool ss_check_shareddir(char* path); +extern void ss_createdir(const char** ss_dirs, int32 num, int32 node_id, const char* pg_data, const char* vgdata_dir, const char* vglog_dir); +extern void ss_mkdirdir(int32 node_id, const char* pg_data, const char* vgdata_dir, const char* vglog_dir, bool need_mkclusterdir); +extern char** ss_addnodeparmater(char** conflines); + + +#define FREE_AND_RESET(ptr) \ + do { \ + if (NULL != (ptr) && reinterpret_cast(ptr) != static_cast("")) { \ + free(ptr); \ + (ptr) = NULL; \ + } \ + } while (0) + +#endif /* SS_INITDB_H */ \ No newline at end of file diff --git a/src/bin/pg_basebackup/CMakeLists.txt b/src/bin/pg_basebackup/CMakeLists.txt index 4682dd01a..499b77306 100755 --- a/src/bin/pg_basebackup/CMakeLists.txt +++ b/src/bin/pg_basebackup/CMakeLists.txt @@ -80,6 +80,9 @@ target_link_directories(pg_receivexlog PUBLIC ${LIBOPENSSL_LIB_PATH} ${LIBCGROUP_LIB_PATH} ${LIBCURL_LIB_PATH} ${ZLIB_LIB_PATH} ${LIBOBS_LIB_PATH} ${LIBCGROUP_LIB_PATH} ${LIBEDIT_LIB_PATH} ${LZ4_LIB_PATH} ${SECURE_LIB_PATH} ${KERBEROS_LIB_PATH} ${CMAKE_BINARY_DIR}/lib ) +# BIND_NOW +target_link_options(pg_receivexlog PRIVATE "-Wl,-z,relro,-z,now") + install(TARGETS pg_receivexlog RUNTIME DESTINATION bin) # pg_recvlogical bin diff --git a/src/bin/pg_basebackup/Makefile b/src/bin/pg_basebackup/Makefile index 71615fdf1..dfeeba74a 100644 --- a/src/bin/pg_basebackup/Makefile +++ b/src/bin/pg_basebackup/Makefile @@ -18,7 +18,7 @@ subdir = src/bin/pg_basebackup top_builddir = ../../.. include $(top_builddir)/src/Makefile.global -override CPPFLAGS := -I$(libpq_srcdir) -I$(ZLIB_INCLUDE_PATH) $(CPPFLAGS) -DHAVE_LIBZ -fPIC -fPIE -DFRONTEND -I$(top_builddir)/src/bin/pg_rewind -I$(top_builddir)/src/bin/pg_ctl -I$(LZ4_INCLUDE_PATH) -I$(ZSTD_INCLUDE_PATH) -I${top_builddir}/src/lib/page_compression +override CPPFLAGS := -I$(libpq_srcdir) -I$(ZLIB_INCLUDE_PATH) $(CPPFLAGS) -DHAVE_LIBZ -fPIC -fPIE -DFRONTEND -I$(top_builddir)/src/bin/pg_rewind -I$(top_builddir)/src/bin/pg_ctl -I$(LZ4_INCLUDE_PATH) -I$(ZSTD_INCLUDE_PATH) -I${top_builddir}/src/lib/page_compression -I${top_builddir}/src/include LDFLAGS += -Wl,-z,relro,-z,now -L$(LZ4_LIB_PATH) -L$(ZSTD_LIB_PATH) -L${top_builddir}/src/lib/page_compression LIBS += -llz4 -lzstd -lpagecompression diff --git a/src/bin/pg_basebackup/pg_basebackup.cpp b/src/bin/pg_basebackup/pg_basebackup.cpp index 1e5661d7e..b2933d575 100644 --- a/src/bin/pg_basebackup/pg_basebackup.cpp +++ b/src/bin/pg_basebackup/pg_basebackup.cpp @@ -32,6 +32,7 @@ #include #include +#include "tool_common.h" #include "getopt_long.h" #include "receivelog.h" #include "streamutil.h" @@ -83,6 +84,7 @@ extern int tblspaceIndex; extern int standby_message_timeout; /* 10 sec = default */ + /* Progress counters */ static uint64 totalsize; static uint64 totaldone; @@ -1513,13 +1515,13 @@ static void backup_dw_file(const char *target_dir) char *unaligned_buf = NULL; /* Delete the dw file, if it exists. */ - remove_dw_file(OLD_DW_FILE_NAME, target_dir, real_file_path); + remove_dw_file(T_OLD_DW_FILE_NAME, target_dir, real_file_path); rc = memset_s(real_file_path, (PATH_MAX + 1), 0, (PATH_MAX + 1)); securec_check_c(rc, "\0", "\0"); /* Delete the dw build file, if it exists. */ - remove_dw_file(DW_BUILD_FILE_NAME, target_dir, real_file_path); + remove_dw_file(T_DW_BUILD_FILE_NAME, target_dir, real_file_path); /* Create the dw build file. */ if ((fd = open(real_file_path, (DW_FILE_FLAG | O_CREAT), DW_FILE_PERM)) < 0) { diff --git a/src/bin/pg_basebackup/pg_receivexlog.cpp b/src/bin/pg_basebackup/pg_receivexlog.cpp index e1008bd7b..85b21030b 100644 --- a/src/bin/pg_basebackup/pg_receivexlog.cpp +++ b/src/bin/pg_basebackup/pg_receivexlog.cpp @@ -167,7 +167,7 @@ static XLogRecPtr FindStreamingStart(XLogRecPtr currentpos, uint32 currenttimeli disconnect_and_exit(1); } - if (statbuf.st_size == XLOG_SEG_SIZE) { + if (statbuf.st_size == (off_t)XLogSegSize) { /* Completed segment */ if (segno > high_segno) { high_segno = segno; @@ -277,7 +277,7 @@ static void StreamLog(void) /* * Always start streaming at the beginning of a segment */ - startpos -= startpos % XLOG_SEG_SIZE; + startpos -= startpos % XLogSegSize; /* * Start the replication diff --git a/src/bin/pg_basebackup/receivelog.cpp b/src/bin/pg_basebackup/receivelog.cpp index c3accc749..36a8d3f2c 100644 --- a/src/bin/pg_basebackup/receivelog.cpp +++ b/src/bin/pg_basebackup/receivelog.cpp @@ -130,8 +130,8 @@ static int open_walfile(XLogRecPtr startpoint, uint32 timeline, const char* base MAXFNAMELEN - 1, "%08X%08X%08X", timeline, - (uint32)((startpoint / XLOG_SEG_SIZE) / XLogSegmentsPerXLogId), - (uint32)((startpoint / XLOG_SEG_SIZE) % XLogSegmentsPerXLogId)); + (uint32)((startpoint / XLogSegSize) / XLogSegmentsPerXLogId), + (uint32)((startpoint / XLogSegSize) % XLogSegmentsPerXLogId)); securec_check_ss_c(nRet, "", ""); nRet = snprintf_s(fn, sizeof(fn), sizeof(fn) - 1, "%s/%s.partial", basedir, namebuf); @@ -160,11 +160,11 @@ static int open_walfile(XLogRecPtr startpoint, uint32 timeline, const char* base f = -1; return -1; } - if (statbuf.st_size == XLogSegSize) + if (statbuf.st_size == (off_t)XLogSegSize) return f; /* File is open and ready to use */ if (statbuf.st_size != 0) { pg_log(PG_PRINT, - _("%s: WAL segment %s is %d bytes, should be 0 or %d\n"), + _("%s: WAL segment %s is %d bytes, should be 0 or %lu\n"), progname, Lrealpath, (int)statbuf.st_size, @@ -302,7 +302,7 @@ static bool close_walfile(int walfile, const char* basedir, char* walname, bool * Rename the .partial file only if we've completed writing the * whole segment or segment_complete is true. */ - if (currpos == XLOG_SEG_SIZE || segment_complete) { + if (currpos == (off_t)XLogSegSize || segment_complete) { char oldfn[MAXPGPATH]; char newfn[MAXPGPATH]; errno_t nRet; @@ -438,7 +438,7 @@ static bool checkForReceiveTimeout(PGconn* conn) static int DoWALWrite(const char* wal_buf, int len, XLogRecPtr& block_pos, const char* basedir, char* cur_wal_file, uint32 wal_file_timeline, int& walfile, stream_stop_callback stream_stop, PGconn* conn) { - int xlogoff = block_pos % XLOG_SEG_SIZE; + int xlogoff = block_pos % XLogSegSize; int bytes_left = len; int bytes_to_write = 0; @@ -470,8 +470,8 @@ static int DoWALWrite(const char* wal_buf, int len, XLogRecPtr& block_pos, const while (bytes_left) { /* If crossing a WAL boundary, only write up until we reach XLOG_SEG_SIZE. */ - if (xlogoff + bytes_left > XLOG_SEG_SIZE) - bytes_to_write = XLOG_SEG_SIZE - xlogoff; + if (xlogoff + bytes_left > (int)XLogSegSize) + bytes_to_write = (int)XLogSegSize - xlogoff; else bytes_to_write = bytes_left; @@ -502,7 +502,7 @@ static int DoWALWrite(const char* wal_buf, int len, XLogRecPtr& block_pos, const xlogoff += bytes_to_write; /* Did we reach the end of a WAL segment? */ - if (block_pos % XLOG_SEG_SIZE == 0) { + if (block_pos % XLogSegSize == 0) { if (!close_walfile(walfile, basedir, cur_wal_file, false, block_pos)) { suspendHeartBeatTimer(); /* Error message written in close_walfile() */ diff --git a/src/bin/pg_controldata/CMakeLists.txt b/src/bin/pg_controldata/CMakeLists.txt index 61d30c0f4..0da485c35 100755 --- a/src/bin/pg_controldata/CMakeLists.txt +++ b/src/bin/pg_controldata/CMakeLists.txt @@ -1,6 +1,10 @@ #This is the main CMAKE for build all components. +execute_process( + COMMAND ln -fs ${PROJECT_SRC_DIR}/gausskernel/storage/dss/dss_adaptor.cpp ${CMAKE_CURRENT_SOURCE_DIR}/dss_adaptor.cpp +) set(TGT_controldata_SRC ${CMAKE_CURRENT_SOURCE_DIR}/pg_controldata.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/dss_adaptor.cpp ) set(TGT_controldata_INC diff --git a/src/bin/pg_controldata/Makefile b/src/bin/pg_controldata/Makefile index 7ebdf2dc5..ae67b57d5 100644 --- a/src/bin/pg_controldata/Makefile +++ b/src/bin/pg_controldata/Makefile @@ -22,7 +22,8 @@ ifneq "$(MAKECMDGOALS)" "clean" endif endif endif -OBJS= pg_controldata.o $(WIN32RES) $(top_builddir)/src/lib/elog/elog.a +OBJS= pg_controldata.o $(WIN32RES) $(top_builddir)/src/lib/elog/elog.a \ + $(top_builddir)/src/gausskernel/storage/dss/dss_adaptor.o override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) -fPIC -fPIE diff --git a/src/bin/pg_controldata/pg_controldata.cpp b/src/bin/pg_controldata/pg_controldata.cpp index d9486f660..6d1dc64cc 100644 --- a/src/bin/pg_controldata/pg_controldata.cpp +++ b/src/bin/pg_controldata/pg_controldata.cpp @@ -27,17 +27,29 @@ #include "access/xlog.h" #include "catalog/pg_control.h" #include "bin/elog.h" +#include "getopt_long.h" +#include "storage/dss/dss_adaptor.h" +#include "storage/file/fio_device.h" + #define FirstNormalTransactionId ((TransactionId)3) #define TransactionIdIsNormal(xid) ((xid) >= FirstNormalTransactionId) -static void usage(const char* progname) +static const char *progname; +static bool enable_dss = false; + +static void usage(const char* prog_name) { - printf(_("%s displays control information of a openGauss database cluster.\n\n"), progname); + printf(_("%s displays control information of a openGauss database cluster.\n\n"), prog_name); printf(_("Usage:\n")); - printf(_(" %s [OPTION] [DATADIR]\n"), progname); + printf(_(" %s [OPTION] [DATADIR]\n"), prog_name); printf(_("\nOptions:\n")); - printf(_(" -V, --version output version information, then exit\n")); - printf(_(" -?, --help show this help, then exit\n")); + printf(_(" -I, --instance-id=INSTANCE_ID\n")); + printf(_(" display information of specified instance (default all)\n")); + printf(_(" --enable-dss enable shared storage mode\n")); + printf(_(" --socketpath=SOCKETPATH\n")); + printf(_(" dss connect socket file path\n")); + printf(_(" -V, --version output version information, then exit\n")); + printf(_(" -?, --help show this help, then exit\n")); printf(_("\nIf no data directory (DATADIR) is specified, " "the environment variable PGDATA\nis used.\n")); #if ((defined(ENABLE_MULTIPLE_NODES)) || (defined(ENABLE_PRIVATEGAUSS))) @@ -87,87 +99,30 @@ static const char* wal_level_str(WalLevel wal_level) return _("unrecognized wal_level"); } -int main(int argc, char* argv[]) +static void exit_safely(int returnCode) +{ + if (progname != NULL) { + free((char*)progname); + progname = NULL; + } + exit(returnCode); +} + +static void display_data(ControlFileData ControlFile, int instance_id) { - ControlFileData ControlFile; - int fd = -1; - char ControlFilePath[MAXPGPATH]; - char* DataDir = NULL; pg_crc32c crc; /* pg_crc32c as same as pg_crc32 */ time_t time_tmp; char pgctime_str[128]; char ckpttime_str[128]; char sysident_str[32]; const char* strftime_fmt = "%c"; - const char* progname = NULL; int sret = 0; - set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_controldata")); - - progname = get_progname(argv[0]); - - if (argc > 1) { - if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) { - usage(progname); - if (progname != NULL) { - free((char*)progname); - progname = NULL; - } - exit(0); - } - if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) { -#ifdef ENABLE_MULTIPLE_NODES - puts("pg_controldata (PostgreSQL) " PG_VERSION); -#else - puts("pg_controldata (openGauss) " PG_VERSION); -#endif - if (progname != NULL) { - free((char*)progname); - progname = NULL; - } - exit(0); - } + /* display instance id in shared storage mode */ + if (enable_dss) { + printf(_("\npg_control data (instance id %d)\n\n"), instance_id); } - if (argc > 1) { - DataDir = argv[1]; - } else { - DataDir = getenv("PGDATA"); - } - if (DataDir == NULL) { - fprintf(stderr, _("%s: no data directory specified\n"), progname); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); - if (progname != NULL) { - free((char*)progname); - progname = NULL; - } - exit(1); - } - check_env_value_c(DataDir); - sret = snprintf_s(ControlFilePath, MAXPGPATH, MAXPGPATH - 1, "%s/global/pg_control", DataDir); - securec_check_ss_c(sret, "\0", "\0"); - - if ((fd = open(ControlFilePath, O_RDONLY | PG_BINARY, 0)) == -1) { - fprintf( - stderr, _("%s: could not open file \"%s\" for reading: %s\n"), progname, ControlFilePath, strerror(errno)); - if (progname != NULL) { - free((char*)progname); - progname = NULL; - } - exit(2); - } - - if (read(fd, &ControlFile, sizeof(ControlFileData)) != sizeof(ControlFileData)) { - fprintf(stderr, _("%s: could not read file \"%s\": %s\n"), progname, ControlFilePath, strerror(errno)); - close(fd); - if (progname != NULL) { - free((char*)progname); - progname = NULL; - } - exit(2); - } - close(fd); - /* Check the CRC. */ /* using CRC32C since 923 */ INIT_CRC32C(crc); @@ -231,7 +186,7 @@ int main(int argc, char* argv[]) printf(_("Latest checkpoint's oldestXID: " XID_FMT "\n"), ControlFile.checkPointCopy.oldestXid); printf(_("Latest checkpoint's oldestXID's DB: %u\n"), ControlFile.checkPointCopy.oldestXidDB); printf(_("Latest checkpoint's oldestActiveXID: " XID_FMT "\n"), ControlFile.checkPointCopy.oldestActiveXid); - printf(_("Latest checkpoint's remove lsn: %X/%X\n"), + printf(_("Latest checkpoint's remove lsn: %X/%X\n"), (uint32)(ControlFile.checkPointCopy.remove_seg >> 32), (uint32)ControlFile.checkPointCopy.remove_seg); printf(_("Time of latest checkpoint: %s\n"), ckpttime_str); @@ -265,6 +220,151 @@ int main(int argc, char* argv[]) printf( _("Float8 argument passing: %s\n"), (ControlFile.float8ByVal ? _("by value") : _("by reference"))); printf(_("Database system TimeLine: %u\n"), ControlFile.timeline); +} + +int main(int argc, char* argv[]) +{ + ControlFileData ControlFile; + int fd = -1; + bool display_all = true; + char ControlFilePath[MAXPGPATH]; + char* DataDir = NULL; + char* socketpath = NULL; + int sret = 0; + int seekpos; + int option_value; + int option_index; + int display_id; + int ss_nodeid = MIN_INSTANCEID; + off_t ControlFileSize; + + static struct option long_options[] = {{"enable-dss", no_argument, NULL, 1}, + {"socketpath", required_argument, NULL, 2}}; + + set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_controldata")); + + progname = get_progname(argv[0]); + + if (argc > 1) { + if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) { + usage(progname); + exit_safely(0); + } + if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) { +#ifdef ENABLE_MULTIPLE_NODES + puts("pg_controldata (PostgreSQL) " PG_VERSION); +#else + puts("pg_controldata (openGauss) " PG_VERSION); +#endif + exit_safely(0); + } + } + + while ((option_value = getopt_long(argc, argv, "I:V", long_options, &option_index)) != -1) { + switch (option_value) { + case 'I': + if (atoi(optarg) < MIN_INSTANCEID || atoi(optarg) > MAX_INSTANCEID) { + fprintf(stderr, _("%s: unexpected node id specified, valid range is %d - %d\n"), + progname, MIN_INSTANCEID, MAX_INSTANCEID); + exit_safely(1); + } + ss_nodeid = atoi(optarg); + display_all = false; + break; + case 1: + enable_dss = true; + break; + case 2: + enable_dss = true; + socketpath = strdup(optarg); + break; + default: + /* getopt_long already emitted a complaint */ + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + exit_safely(1); + } + } + + if (optind < argc) { + DataDir = argv[optind]; + } else { + DataDir = getenv("PGDATA"); + } + if (DataDir == NULL) { + fprintf(stderr, _("%s: no data directory specified\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); + exit_safely(1); + } + check_env_value_c(DataDir); + + if (enable_dss) { + if (socketpath == NULL) { + fprintf(stderr, _("%s: socketpath cannot be NULL when enable dss\n"), progname); + exit_safely(1); + } + + if (DataDir[0] != '+') { + fprintf(stderr, _("%s: DATADIR should be specified when enable dss\n"), progname); + exit_safely(1); + } + } + + // dss device init + if (dss_device_init(socketpath, enable_dss) != DSS_SUCCESS) { + fprintf(stderr, _("failed to init dss device\n")); + exit_safely(1); + } + + if (enable_dss) { + // in shared storage mode, the cluster contains only one pg_control file + sret = snprintf_s(ControlFilePath, MAXPGPATH, MAXPGPATH - 1, "%s/pg_control", DataDir); + } else { + sret = snprintf_s(ControlFilePath, MAXPGPATH, MAXPGPATH - 1, "%s/global/pg_control", DataDir); + } + securec_check_ss_c(sret, "\0", "\0"); + + fd = open(ControlFilePath, O_RDONLY | PG_BINARY, 0); + if (fd < 0) { + fprintf( + stderr, _("%s: could not open file \"%s\" for reading: %s\n"), progname, ControlFilePath, strerror(errno)); + exit_safely(2); + } + + if ((ControlFileSize = lseek(fd, 0, SEEK_END)) < 0) { + fprintf(stderr, _("%s: could not get \"%s\" size: %s\n"), progname, ControlFilePath, strerror(errno)); + close(fd); + exit_safely(2); + } + + display_id = ss_nodeid; + seekpos = (int)BLCKSZ * ss_nodeid; + if (seekpos >= ControlFileSize) { + fprintf(stderr, _("%s: cound not read beyond end of file \"%s\", file_size: %ld, instance_id: %d\n"), + progname, ControlFilePath, ControlFileSize, ss_nodeid); + close(fd); + exit_safely(2); + } + + do { + if (lseek(fd, (off_t)seekpos, SEEK_SET) < 0) { + fprintf(stderr, _("%s: could not seek in \"%s\" to offset %d: %s\n"), + progname, ControlFilePath, seekpos, strerror(errno)); + close(fd); + exit_safely(2); + } + + if (read(fd, &ControlFile, sizeof(ControlFileData)) != sizeof(ControlFileData)) { + fprintf(stderr, _("%s: could not read file \"%s\": %s\n"), progname, ControlFilePath, strerror(errno)); + close(fd); + exit_safely(2); + } + + display_data(ControlFile, display_id); + seekpos += BLCKSZ; + display_id = display_id + 1; + } while (display_all && seekpos < ControlFileSize); + + close(fd); if (progname != NULL) { free((char*)progname); progname = NULL; diff --git a/src/bin/pg_ctl/Makefile b/src/bin/pg_ctl/Makefile index 3cff126b3..c72eb61e4 100644 --- a/src/bin/pg_ctl/Makefile +++ b/src/bin/pg_ctl/Makefile @@ -16,7 +16,7 @@ subdir = src/bin/pg_ctl top_builddir = ../../.. include $(top_builddir)/src/Makefile.global -override CPPFLAGS := -I$(libpq_srcdir) -I$(ZLIB_INCLUDE_PATH) $(CPPFLAGS) -DHAVE_LIBZ -DFRONTEND -I$(top_builddir)/src/bin/pg_rewind -I${top_builddir}/src/lib/page_compression +override CPPFLAGS := -I$(libpq_srcdir) -I$(ZLIB_INCLUDE_PATH) $(CPPFLAGS) -DHAVE_LIBZ -DFRONTEND -I$(top_builddir)/src/bin/pg_rewind -I${top_builddir}/src/lib/page_compression -I${top_builddir}/src/include override LDFLAGS += -L$(LZ4_LIB_PATH) -L$(ZSTD_LIB_PATH) -L${top_builddir}/src/lib/page_compression ifeq ($(enable_lite_mode), no) LIBS += -lgssapi_krb5_gauss -lgssrpc_gauss -lkrb5_gauss -lkrb5support_gauss -lk5crypto_gauss -lcom_err_gauss -lpagecompression -lzstd -llz4 diff --git a/src/bin/pg_ctl/backup.cpp b/src/bin/pg_ctl/backup.cpp index e7928eb8f..2a3bb91f4 100755 --- a/src/bin/pg_ctl/backup.cpp +++ b/src/bin/pg_ctl/backup.cpp @@ -41,6 +41,7 @@ #include "backup.h" #include "logging.h" +#include "tool_common.h" #include "bin/elog.h" #include "file_ops.h" #include "catalog/catalog.h" @@ -52,6 +53,7 @@ #include "fetchmot.h" #endif + /* Maximum number of digit in integer. Used to allocate memory to copy int to string */ #define MAX_INT_SIZE 20 /* set build receive timeout during master getting in backup mode */ @@ -518,7 +520,7 @@ bool StartLogStreamer( Assert(!XLogRecPtrIsInvalid(param->startptr)); /* Round off to even segment position */ - param->startptr -= param->startptr % XLOG_SEG_SIZE; + param->startptr -= param->startptr % XLogSegSize; #ifndef WIN32 /* Create our background pipe */ @@ -2343,7 +2345,7 @@ static bool backup_dw_file(const char* target_dir) char* unaligned_buf = NULL; /* Delete the dw file, if it exists. */ - rc = snprintf_s(dw_file_path, PATH_MAX, PATH_MAX - 1, "%s/%s", target_dir, OLD_DW_FILE_NAME); + rc = snprintf_s(dw_file_path, PATH_MAX, PATH_MAX - 1, "%s/%s", target_dir, T_OLD_DW_FILE_NAME); securec_check_ss_c(rc, "\0", "\0"); if (realpath(dw_file_path, real_file_path) == NULL) { if (real_file_path[0] == '\0') { @@ -2357,7 +2359,7 @@ static bool backup_dw_file(const char* target_dir) securec_check_c(rc, "\0", "\0"); /* Delete the dw build file, if it exists. */ - rc = snprintf_s(dw_file_path, PATH_MAX, PATH_MAX - 1, "%s/%s", target_dir, DW_BUILD_FILE_NAME); + rc = snprintf_s(dw_file_path, PATH_MAX, PATH_MAX - 1, "%s/%s", target_dir, T_DW_BUILD_FILE_NAME); securec_check_ss_c(rc, "\0", "\0"); if (realpath(dw_file_path, real_file_path) == NULL) { if (real_file_path[0] == '\0') { diff --git a/src/bin/pg_ctl/pg_ctl.cpp b/src/bin/pg_ctl/pg_ctl.cpp index bf0f87de3..8dc296a37 100755 --- a/src/bin/pg_ctl/pg_ctl.cpp +++ b/src/bin/pg_ctl/pg_ctl.cpp @@ -54,6 +54,7 @@ #include "fetch.h" #include "common/fe_memutils.h" #include "logging.h" +#include "tool_common.h" #ifdef ENABLE_MOT #include "fetchmot.h" @@ -80,6 +81,7 @@ #define static #endif + /* PID can be negative for standalone backend */ typedef long pgpid_t; @@ -6720,6 +6722,7 @@ int main(int argc, char** argv) do_wait = false; } + initDataPathStruct(false); SetConfigFilePath(); pg_host = getenv("PGHOST"); diff --git a/src/bin/pg_ctl/receivelog.cpp b/src/bin/pg_ctl/receivelog.cpp index cb2696f93..b6460e4cb 100644 --- a/src/bin/pg_ctl/receivelog.cpp +++ b/src/bin/pg_ctl/receivelog.cpp @@ -92,8 +92,8 @@ static int open_walfile(XLogRecPtr startpoint, uint32 timeline, const char* base MAXFNAMELEN - 1, "%08X%08X%08X", timeline, - (uint32)((startpoint / XLOG_SEG_SIZE) / XLogSegmentsPerXLogId), - (uint32)((startpoint / XLOG_SEG_SIZE) % XLogSegmentsPerXLogId)); + (uint32)((startpoint / XLogSegSize) / XLogSegmentsPerXLogId), + (uint32)((startpoint / XLogSegSize) % XLogSegmentsPerXLogId)); securec_check_ss_c(nRet, "", ""); nRet = snprintf_s(fn, sizeof(fn), sizeof(fn) - 1, "%s/%s.partial", basedir, namebuf); @@ -122,11 +122,11 @@ static int open_walfile(XLogRecPtr startpoint, uint32 timeline, const char* base f = -1; return -1; } - if (statbuf.st_size == XLogSegSize) + if (statbuf.st_size == (off_t)XLogSegSize) return f; /* File is open and ready to use */ if (statbuf.st_size != 0) { pg_log(PG_PRINT, - _("%s: WAL segment %s is %d bytes, should be 0 or %d\n"), + _("%s: WAL segment %s is %d bytes, should be 0 or %lu\n"), progname, Lrealpath, (int)statbuf.st_size, @@ -263,7 +263,7 @@ static bool close_walfile(int walfile, const char* basedir, char* walname, bool * Rename the .partial file only if we've completed writing the * whole segment or segment_complete is true. */ - if (currpos == XLOG_SEG_SIZE || segment_complete) { + if (currpos == (off_t)XLogSegSize || segment_complete) { char oldfn[MAXPGPATH]; char newfn[MAXPGPATH]; errno_t nRet; @@ -398,7 +398,7 @@ static bool checkForReceiveTimeout(PGconn* conn) static int DoWALWrite(const char* wal_buf, int len, XLogRecPtr& block_pos, const char* basedir, char* cur_wal_file, uint32 wal_file_timeline, int& walfile, stream_stop_callback stream_stop, PGconn* conn) { - int xlogoff = block_pos % XLOG_SEG_SIZE; + int xlogoff = block_pos % XLogSegSize; int bytes_left = len; int bytes_to_write = 0; @@ -430,8 +430,8 @@ static int DoWALWrite(const char* wal_buf, int len, XLogRecPtr& block_pos, const while (bytes_left) { /* If crossing a WAL boundary, only write up until we reach XLOG_SEG_SIZE. */ - if (xlogoff + bytes_left > XLOG_SEG_SIZE) - bytes_to_write = XLOG_SEG_SIZE - xlogoff; + if (xlogoff + bytes_left > (int)XLogSegSize) + bytes_to_write = (int)XLogSegSize - xlogoff; else bytes_to_write = bytes_left; @@ -462,7 +462,7 @@ static int DoWALWrite(const char* wal_buf, int len, XLogRecPtr& block_pos, const xlogoff += bytes_to_write; /* Did we reach the end of a WAL segment? */ - if (block_pos % XLOG_SEG_SIZE == 0) { + if (block_pos % XLogSegSize == 0) { if (!close_walfile(walfile, basedir, cur_wal_file, false, block_pos)) { suspendHeartBeatTimer(); /* Error message written in close_walfile() */ diff --git a/src/bin/pg_probackup/CMakeLists.txt b/src/bin/pg_probackup/CMakeLists.txt index 3bfae5045..de199c6d7 100755 --- a/src/bin/pg_probackup/CMakeLists.txt +++ b/src/bin/pg_probackup/CMakeLists.txt @@ -9,6 +9,7 @@ execute_process( COMMAND ln -fs ${PROJECT_SRC_DIR}/bin/pg_basebackup/receivelog.cpp ${CMAKE_CURRENT_SOURCE_DIR}/receivelog.cpp COMMAND ln -fs ${PROJECT_SRC_DIR}/bin/pg_basebackup/streamutil.cpp ${CMAKE_CURRENT_SOURCE_DIR}/streamutil.cpp COMMAND ln -fs ${PROJECT_SRC_DIR}/gausskernel/storage/access/transam/xlogreader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/xlogreader.cpp + COMMAND ln -fs ${PROJECT_SRC_DIR}/gausskernel/storage/dss/dss_adaptor.cpp ${CMAKE_CURRENT_SOURCE_DIR}/dss_adaptor.cpp ) AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} TGT_probackup_SRC) diff --git a/src/bin/pg_probackup/Makefile b/src/bin/pg_probackup/Makefile index 1b17edea8..fd31eb030 100644 --- a/src/bin/pg_probackup/Makefile +++ b/src/bin/pg_probackup/Makefile @@ -21,7 +21,8 @@ OBJS += backup.o catalog.o configure.o data.o \ OBJS += datapagemap.o receivelog.o streamutil.o \ xlogreader.o pg_lzcompress.o -OBJS += $(top_builddir)/src/lib/pgcommon/libpgcommon.a +OBJS += $(top_builddir)/src/lib/pgcommon/libpgcommon.a \ + $(top_builddir)/src/gausskernel/storage/dss/dss_adaptor.o EXTRA_CLEAN = datapagemap.cpp datapagemap.h \ receivelog.cpp receivelog.h streamutil.cpp streamutil.h \ @@ -40,7 +41,7 @@ endif PG_CPPFLAGS = -I$(libpq_srcdir) ${PTHREAD_CFLAGS} -Isrc -I$(top_builddir)/$(subdir) -I$(LZ4_INCLUDE_PATH) -I$(ZLIB_INCLUDE_PATH) -I$(ZSTD_INCLUDE_PATH) # add page_compression so .h LDFLAGS += -L../../lib/page_compression -PG_CPPFLAGS = -I../../lib/page_compression +PG_CPPFLAGS = -I../../lib/page_compression -I../../src/common/port LIBS += -lpagecompression -lzstd override CPPFLAGS := -DFRONTEND $(CPPFLAGS) $(PG_CPPFLAGS) -DHAVE_LIBZ PG_LIBS_INTERNAL = $(libpq_pgport) ${PTHREAD_CFLAGS} diff --git a/src/bin/pg_probackup/backup.cpp b/src/bin/pg_probackup/backup.cpp index 8ddeb63e9..886983735 100644 --- a/src/bin/pg_probackup/backup.cpp +++ b/src/bin/pg_probackup/backup.cpp @@ -23,9 +23,12 @@ #include #include +#include "tool_common.h" #include "thread.h" #include "file.h" #include "common/fe_memutils.h" +#include "storage/file/fio_device.h" + /* list of dirs which will not to be backuped it will be backuped up in external dirs */ @@ -134,9 +137,9 @@ backup_stopbackup_callback(bool fatal, void *userdata) } } -static void run_backup_threads(char *external_prefix, char *database_path, - parray *prev_backup_filelist, parray *external_dirs, - PGNodeInfo *nodeInfo, XLogRecPtr prev_backup_start_lsn) +static void run_backup_threads(char *external_prefix, char *database_path, char *dssdata_path, + parray *prev_backup_filelist, parray *external_dirs, + PGNodeInfo *nodeInfo, XLogRecPtr prev_backup_start_lsn) { int i; int nRet = 0; @@ -163,6 +166,8 @@ static void run_backup_threads(char *external_prefix, char *database_path, securec_check_ss_c(nRet, "\0", "\0"); join_path_components(dirpath, temp, file->rel_path); } + else if (is_dss_type(file->type)) + join_path_components(dirpath, dssdata_path, file->rel_path); else join_path_components(dirpath, database_path, file->rel_path); @@ -205,6 +210,8 @@ static void run_backup_threads(char *external_prefix, char *database_path, arg->nodeInfo = nodeInfo; arg->from_root = instance_config.pgdata; arg->to_root = database_path; + arg->src_dss = instance_config.dss.vgdata; + arg->dst_dss = dssdata_path; arg->external_prefix = external_prefix; arg->external_dirs = external_dirs; arg->files_list = backup_files_list; @@ -412,16 +419,47 @@ static void calc_pgdata_bytes() elog(INFO, "PGDATA size: %s", pretty_bytes); } -static void add_xlog_files_into_backup_list(const char *database_path) +static void add_xlog_files_into_backup_list(const char *database_path, const char *dssdata_path, + int instance_id, bool enable_dss) { int i; parray *xlog_files_list; char pg_xlog_path[MAXPGPATH]; char wal_full_path[MAXPGPATH]; + const char *parent_path; /* Scan backup PG_XLOG_DIR */ xlog_files_list = parray_new(); - join_path_components(pg_xlog_path, database_path, PG_XLOG_DIR); + + /* link dssdata's pg_xlog to database's pg_xlog */ + if (enable_dss) { + char database_xlog[MAXPGPATH]; + char dssdata_xlog[MAXPGPATH]; + errno_t rc; + + rc = snprintf_s(dssdata_xlog, MAXPGPATH, MAXPGPATH - 1, "%s/%s%d", dssdata_path, PG_XLOG_DIR, instance_id); + securec_check_ss_c(rc, "\0", "\0"); + join_path_components(database_xlog, database_path, PG_XLOG_DIR); + + /* dssdata_xlog is already exist, destory it and recreate */ + if (rmdir(dssdata_xlog) != 0) { + elog(ERROR, "can not remove xlog dir \"%s\" : %s", dssdata_xlog, strerror(errno)); + } + + if (symlink(database_xlog, dssdata_xlog) < 0) { + elog(ERROR, "can not link dss xlog dir \"%s\" to database xlog dir \"%s\": %s", dssdata_xlog, database_xlog, + strerror(errno)); + } + + rc = strcpy_s(pg_xlog_path, MAXPGPATH, dssdata_xlog); + securec_check_c(rc, "\0", "\0"); + parent_path = dssdata_path; + } else { + join_path_components(pg_xlog_path, database_path, PG_XLOG_DIR); + parent_path = database_path; + } + + dir_list_file(xlog_files_list, pg_xlog_path, false, true, false, false, true, 0, FIO_BACKUP_HOST); @@ -435,6 +473,11 @@ static void add_xlog_files_into_backup_list(const char *database_path) if (!S_ISREG(file->mode)) continue; + /* refresh file type */ + if (enable_dss) { + file->type = DEV_TYPE_DSS; + } + file->crc = pgFileGetCRC(wal_full_path, true, false); file->write_size = file->size; @@ -444,7 +487,7 @@ static void add_xlog_files_into_backup_list(const char *database_path) pg_free(file->rel_path); /* Now it is relative to /backup_dir/backups/instance_name/backup_id/database/ */ - file->rel_path = pgut_strdup(GetRelativePath(wal_full_path, database_path)); + file->rel_path = pgut_strdup(GetRelativePath(wal_full_path, parent_path)); file->name = last_dir_separator(file->rel_path); @@ -461,7 +504,7 @@ static void add_xlog_files_into_backup_list(const char *database_path) static void sync_files(parray *database_map, const char *database_path, parray *external_dirs, - const char *external_prefix, bool no_sync) + const char *dssdata_path, const char *external_prefix, bool no_sync) { time_t start_time, end_time; char pretty_time[20]; @@ -472,19 +515,28 @@ static void sync_files(parray *database_map, const char *database_path, parray * if (current.from_replica && !exclusive_backup) { pgFile *pg_control = NULL; + char fullpath[MAXPGPATH]; for (unsigned int i = 0; i < parray_num(backup_files_list); i++) { pgFile *tmp_file = (pgFile *)parray_get(backup_files_list, (size_t)i); if (tmp_file->external_dir_num == 0 && - (strcmp(tmp_file->rel_path, XLOG_CONTROL_FILE) == 0)) + (strcmp(tmp_file->name, PG_XLOG_CONTROL_FILE) == 0)) { pg_control = tmp_file; break; } } - if (!pg_control) - elog(ERROR, "Failed to find file \"%s\" in backup filelist.", XLOG_CONTROL_FILE); - set_min_recovery_point(pg_control, database_path, current.stop_lsn); + if (!pg_control) { + elog(ERROR, "Failed to find file \"%s\" in backup filelist.", T_XLOG_CONTROL_FILE); + } + + if (is_dss_type(pg_control->type)) { + join_path_components(fullpath, dssdata_path, pg_control->rel_path); + } else { + join_path_components(fullpath, database_path, pg_control->rel_path); + } + + set_min_recovery_point(pg_control, fullpath, current.stop_lsn); } /* close and sync page header map */ @@ -502,7 +554,7 @@ static void sync_files(parray *database_map, const char *database_path, parray * /* Add archived xlog files into the list of files of this backup */ if (stream_wal) { - add_xlog_files_into_backup_list(database_path); + add_xlog_files_into_backup_list(database_path, dssdata_path, instance_config.dss.instance_id, IsDssMode()); } /* write database map to file and add it to control file */ @@ -541,9 +593,7 @@ static void sync_files(parray *database_map, const char *database_path, parray * continue; /* construct fullpath */ - if (file->external_dir_num == 0) - join_path_components(to_fullpath, database_path, file->rel_path); - else + if (file->external_dir_num != 0) { char external_dst[MAXPGPATH]; @@ -551,6 +601,10 @@ static void sync_files(parray *database_map, const char *database_path, parray * file->external_dir_num); join_path_components(to_fullpath, external_dst, file->rel_path); } + else if (is_dss_type(file->type)) + join_path_components(to_fullpath, dssdata_path, file->rel_path); + else + join_path_components(to_fullpath, database_path, file->rel_path); if (fio_sync(to_fullpath, FIO_BACKUP_HOST) != 0) elog(ERROR, "Cannot sync file \"%s\": %s", to_fullpath, strerror(errno)); @@ -572,6 +626,7 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync, bool { int i; char database_path[MAXPGPATH]; + char dssdata_path[MAXPGPATH]; char external_prefix[MAXPGPATH]; /* Temp value. Used as template */ char label[1024]; XLogRecPtr prev_backup_start_lsn = InvalidXLogRecPtr; @@ -619,6 +674,8 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync, bool pgBackupGetPath(¤t, database_path, lengthof(database_path), DATABASE_DIR); + pgBackupGetPath(¤t, dssdata_path, lengthof(dssdata_path), + DSSDATA_DIR); pgBackupGetPath(¤t, external_prefix, lengthof(external_prefix), EXTERNAL_DIR); @@ -639,6 +696,12 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync, bool dir_list_file(backup_files_list, instance_config.pgdata, true, true, false, backup_logs, true, 0, FIO_LOCAL_HOST, backup_replslots); + /* some files are storage in dss server, list them */ + if (IsDssMode()) { + dir_list_file(backup_files_list, instance_config.dss.vgdata, + true, true, false, backup_logs, true, 0, FIO_DSS_HOST); + } + /* * Get database_map (name to oid) for use in partial restore feature. * It's possible that we fail and database_map will be NULL. @@ -726,7 +789,7 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync, bool /* * Make directories before backup and setup threads at the same time */ - run_backup_threads(external_prefix, database_path, prev_backup_filelist, + run_backup_threads(external_prefix, database_path, dssdata_path, prev_backup_filelist, external_dirs, nodeInfo, prev_backup_start_lsn); /* clean previous backup file list */ @@ -739,7 +802,7 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync, bool /* Notify end of backup */ pg_stop_backup(¤t, backup_conn, nodeInfo); - sync_files(database_map, database_path, external_dirs, external_prefix, no_sync); + sync_files(database_map, database_path, external_dirs, dssdata_path, external_prefix, no_sync); /* be paranoid about instance been from the past */ if (current.backup_mode != BACKUP_MODE_FULL && @@ -874,10 +937,14 @@ do_backup(time_t start_time, pgSetBackupParams *set_backup_params, /* Initialize PGInfonode */ pgNodeInit(&nodeInfo); + /* vgname of dss is already checked in previous step */ if (!instance_config.pgdata) elog(ERROR, "required parameter not specified: PGDATA " "(-D, --pgdata)"); + if (IsDssMode() && current.backup_mode != BACKUP_MODE_FULL) + elog(ERROR, "only support full backup when enable dss."); + /* Update backup status and other metainfo. */ current.status = BACKUP_STATUS_RUNNING; current.start_time = start_time; @@ -889,6 +956,8 @@ do_backup(time_t start_time, pgSetBackupParams *set_backup_params, current.compress_alg = instance_config.compress_alg; current.compress_level = instance_config.compress_level; + current.storage_type = IsDssMode() ? DEV_TYPE_DSS : DEV_TYPE_FILE; + /* Save list of external directories */ if (instance_config.external_dir_str && (pg_strcasecmp(instance_config.external_dir_str, "none") != 0)) @@ -2054,12 +2123,7 @@ backup_files(void *arg) } /* construct destination filepath */ - if (file->external_dir_num == 0) - { - join_path_components(from_fullpath, arguments->from_root, file->rel_path); - join_path_components(to_fullpath, arguments->to_root, file->rel_path); - } - else + if (file->external_dir_num != 0) { char external_dst[MAXPGPATH]; char *external_path = (char *)parray_get(arguments->external_dirs, @@ -2072,6 +2136,16 @@ backup_files(void *arg) join_path_components(to_fullpath, external_dst, file->rel_path); join_path_components(from_fullpath, external_path, file->rel_path); } + else if (is_dss_type(file->type)) + { + join_path_components(from_fullpath, arguments->src_dss, file->rel_path); + join_path_components(to_fullpath, arguments->dst_dss, file->rel_path); + } + else + { + join_path_components(from_fullpath, arguments->from_root, file->rel_path); + join_path_components(to_fullpath, arguments->to_root, file->rel_path); + } /* Encountered some strange beast */ if (!S_ISREG(file->mode)) @@ -2480,6 +2554,16 @@ check_external_for_tablespaces(parray *external_list, PGconn *backup_conn) res = pgut_execute(backup_conn, query, 0, NULL); + /* Check that external directories do not contain dsspath */ + for (i = 0; i < (int)parray_num(external_list); i++) { + char *external_path = (char *)parray_get(external_list, i); + if (is_dss_file(external_path)) + elog(ERROR, + "External directory path (-E option) \"%s\" " + "contains dss path, which is not allow now", + external_path); + } + /* Check successfull execution of query */ if (!res) elog(ERROR, "Failed to get list of tablespaces"); diff --git a/src/bin/pg_probackup/catalog.cpp b/src/bin/pg_probackup/catalog.cpp index 18206f375..6963b5bf9 100644 --- a/src/bin/pg_probackup/catalog.cpp +++ b/src/bin/pg_probackup/catalog.cpp @@ -877,6 +877,16 @@ pgBackupCreateDir(pgBackup *backup) backup->database_dir = (char *)pgut_malloc(MAXPGPATH); join_path_components(backup->database_dir, backup->root_dir, DATABASE_DIR); + if (IsDssMode()) + { + /* prepare dssdata_dir */ + backup->dssdata_dir = (char *)pgut_malloc(MAXPGPATH); + join_path_components(backup->dssdata_dir, backup->root_dir, DSSDATA_DIR); + + /* add into subdirs array, which will be create later */ + parray_append(subdirs, pg_strdup(DSSDATA_DIR)); + } + /* block header map */ init_header_map(backup); @@ -1907,6 +1917,9 @@ pgBackupWriteControl(FILE *out, pgBackup *backup) if (backup->content_crc != 0) fio_fprintf(out, "content-crc = %u\n", backup->content_crc); + + fio_fprintf(out, "\n#Database Storage type\n"); + fio_fprintf(out, "storage-type = %s\n", dev2str(backup->storage_type)); } @@ -2069,14 +2082,15 @@ write_backup_filelist(pgBackup *backup, parray *files, const char *root, "\"mode\":\"%u\", \"is_datafile\":\"%u\", " "\"is_cfs\":\"%u\", \"crc\":\"%u\", " "\"compress_alg\":\"%s\", \"external_dir_num\":\"%d\", " - "\"dbOid\":\"%u\"", + "\"dbOid\":\"%u\", \"file_type\":\"%d\"", file->rel_path, file->write_size, file->mode, file->is_datafile ? 1 : 0, file->is_cfs ? 1 : 0, file->crc, deparse_compress_alg(file->compress_alg), file->external_dir_num, - file->dbOid); + file->dbOid, + (int)file->type); securec_check_ss_c(nRet, "\0", "\0"); len = nRet; @@ -2195,6 +2209,7 @@ readBackupControlFile(const char *path) char *compress_alg = NULL; char *recovery_name = NULL; int parsed_options; + char *storage_type = NULL; errno_t rc = 0; ConfigOption options[] = @@ -2229,6 +2244,7 @@ readBackupControlFile(const char *path) {'s', 0, "note", &backup->note, SOURCE_FILE_STRICT}, {'s', 0, "recovery-name", &recovery_name, SOURCE_FILE_STRICT}, {'u', 0, "content-crc", &backup->content_crc, SOURCE_FILE_STRICT}, + {'s', 0, "storage-type", &storage_type, SOURCE_FILE_STRICT}, {0} }; @@ -2327,6 +2343,9 @@ readBackupControlFile(const char *path) if (compress_alg) backup->compress_alg = parse_compress_alg(compress_alg); + if (storage_type) + backup->storage_type = str2dev(storage_type); + return backup; } diff --git a/src/bin/pg_probackup/configure.cpp b/src/bin/pg_probackup/configure.cpp index bf49c5c42..9210f3626 100644 --- a/src/bin/pg_probackup/configure.cpp +++ b/src/bin/pg_probackup/configure.cpp @@ -14,6 +14,7 @@ #include "configuration.h" #include "json.h" +#include "catalog/pg_control.h" static void assign_log_level_console(ConfigOption *opt, const char *arg); @@ -40,6 +41,7 @@ static void show_configure_json(ConfigOption *opt); #define OPTION_RETENTION_GROUP "Retention parameters" #define OPTION_COMPRESS_GROUP "Compression parameters" #define OPTION_REMOTE_GROUP "Remote access parameters" +#define OPTION_DSS_GROUP "DSS connect parameters" /* * Short name should be non-printable ASCII character. @@ -220,6 +222,27 @@ ConfigOption instance_options[] = &instance_config.remote.libpath, SOURCE_CMD, (OptionSource)0, OPTION_REMOTE_GROUP, 0, option_get_value }, + /* DSS options */ + { + 'b', 232, "enable-dss", + &instance_config.dss.enable_dss, SOURCE_CMD, (OptionSource)0, + OPTION_DSS_GROUP, 0, option_get_value + }, + { + 's', 233, "vgname", + &instance_config.dss.vgname, SOURCE_CMD, (OptionSource)0, + OPTION_DSS_GROUP, 0, option_get_value + }, + { + 's', 234, "socketpath", + &instance_config.dss.socketpath, SOURCE_CMD, (OptionSource)0, + OPTION_DSS_GROUP, 0, option_get_value + }, + { + 'i', 235, "instance-id", + &instance_config.dss.instance_id, SOURCE_CMD, (OptionSource)0, + OPTION_DSS_GROUP, 0, option_get_value + }, { 0 } }; @@ -333,7 +356,7 @@ init_config(InstanceConfig *config, const char *instance_name) #if PG_VERSION_NUM >= 110000 config->xlog_seg_size = 0; #else - config->xlog_seg_size = XLOG_SEG_SIZE; + config->xlog_seg_size = XLogSegSize; #endif config->archive_timeout = ARCHIVE_TIMEOUT_DEFAULT; @@ -349,6 +372,9 @@ init_config(InstanceConfig *config, const char *instance_name) config->compress_level = COMPRESS_LEVEL_DEFAULT; config->remote.proto = (const char*)"ssh"; + + config->dss.enable_dss = false; + config->dss.instance_id = INVALID_INSTANCEID; } /* @@ -515,6 +541,23 @@ readInstanceConfigFile(const char *instance_name) 's', 231, "remote-libpath", &instance->remote.libpath, SOURCE_CMD, (OptionSource)0, OPTION_REMOTE_GROUP, 0, option_get_value }, + /* DSS connect options */ + { + 'b', 232, "enable-dss", &instance->dss.enable_dss, SOURCE_CMD, (OptionSource)0, + OPTION_DSS_GROUP, 0, option_get_value + }, + { + 's', 233, "vgname", &instance->dss.vgname, SOURCE_CMD, (OptionSource)0, + OPTION_DSS_GROUP, 0, option_get_value + }, + { + 's', 234, "socketpath", &instance->dss.socketpath, SOURCE_CMD, (OptionSource)0, + OPTION_DSS_GROUP, 0, option_get_value + }, + { + 'i', 235, "instance-id", &instance->dss.instance_id, SOURCE_CMD, (OptionSource)0, + OPTION_DSS_GROUP, 0, option_get_value + }, { 0 } }; diff --git a/src/bin/pg_probackup/data.cpp b/src/bin/pg_probackup/data.cpp index 48726b83f..adb6962bf 100644 --- a/src/bin/pg_probackup/data.cpp +++ b/src/bin/pg_probackup/data.cpp @@ -24,10 +24,12 @@ #include #endif +#include "tool_common.h" #include "thread.h" #include "common/fe_memutils.h" #include "lz4.h" #include "zstd.h" +#include "storage/file/fio_device.h" /* Union to ease operations on relation pages */ typedef struct DataPage @@ -502,6 +504,10 @@ prepare_page(ConnectionArgs *conn_arg, blknum, from_fullpath, read_len, BLCKSZ); else { + /* If it is in DSS mode, the validation is skipped */ + if (IsDssMode()) + return PageIsOk; + /* We have BLCKSZ of raw data, validate it */ rc = validate_one_page(page, absolute_blknum, InvalidXLogRecPtr, page_st, @@ -800,10 +806,11 @@ backup_non_data_file(pgFile *file, pgFile *prev_file, BackupMode backup_mode, time_t parent_backup_time, bool missing_ok) { + fio_location from_location = is_dss_file(from_fullpath) ? FIO_DSS_HOST : FIO_DB_HOST; /* special treatment for global/pg_control */ - if (file->external_dir_num == 0 && strcmp(file->rel_path, XLOG_CONTROL_FILE) == 0) + if (file->external_dir_num == 0 && strcmp(file->name, PG_XLOG_CONTROL_FILE) == 0) { - copy_pgcontrol_file(from_fullpath, FIO_DB_HOST, + copy_pgcontrol_file(from_fullpath, from_location, to_fullpath, FIO_BACKUP_HOST, file); return; } @@ -815,7 +822,7 @@ backup_non_data_file(pgFile *file, pgFile *prev_file, file->mtime <= parent_backup_time) { - file->crc = fio_get_crc32(from_fullpath, FIO_DB_HOST, false); + file->crc = fio_get_crc32(from_fullpath, from_location, false); /* ...and checksum is the same... */ if (EQ_TRADITIONAL_CRC32(file->crc, prev_file->crc)) @@ -825,8 +832,7 @@ backup_non_data_file(pgFile *file, pgFile *prev_file, } } - backup_non_data_file_internal(from_fullpath, FIO_DB_HOST, - to_fullpath, file, true); + backup_non_data_file_internal(from_fullpath, from_location, to_fullpath, file, true); } /* @@ -901,8 +907,16 @@ restore_data_file(parray *parent_chain, pgFile *dest_file, FILE *out, * At this point we are sure, that something is going to be copied * Open source file. */ - join_path_components(from_root, backup->root_dir, DATABASE_DIR); - join_path_components(from_fullpath, from_root, tmp_file->rel_path); + if (is_dss_type(tmp_file->type)) + { + join_path_components(from_root, backup->root_dir, DSSDATA_DIR); + join_path_components(from_fullpath, from_root, tmp_file->rel_path); + } + else + { + join_path_components(from_root, backup->root_dir, DATABASE_DIR); + join_path_components(from_fullpath, from_root, tmp_file->rel_path); + } in = fopen(from_fullpath, PG_BINARY_R); if (in == NULL) @@ -915,8 +929,8 @@ restore_data_file(parray *parent_chain, pgFile *dest_file, FILE *out, /* get headers for this file */ if (use_headers && tmp_file->n_headers > 0) headers = get_data_file_headers(&(backup->hdr_map), tmp_file, - parse_program_version(backup->program_version), - true); + parse_program_version(backup->program_version), + true); if (use_headers && !headers && tmp_file->n_headers > 0) elog(ERROR, "Failed to get page headers for file \"%s\"", from_fullpath); @@ -928,13 +942,13 @@ restore_data_file(parray *parent_chain, pgFile *dest_file, FILE *out, * copy the file from backup. */ total_write_len += restore_data_file_internal(in, out, tmp_file, - parse_program_version(backup->program_version), - from_fullpath, to_fullpath, dest_file->n_blocks, - use_bitmap ? &(dest_file)->pagemap : NULL, - checksum_map, backup->checksum_version, - /* shiftmap can be used only if backup state precedes the shift */ - backup->stop_lsn <= shift_lsn ? lsn_map : NULL, - headers); + parse_program_version(backup->program_version), + from_fullpath, to_fullpath, dest_file->n_blocks, + use_bitmap ? &(dest_file)->pagemap : NULL, + checksum_map, backup->checksum_version, + /* shiftmap can be used only if backup state precedes the shift */ + backup->stop_lsn <= shift_lsn ? lsn_map : NULL, + headers); if (fclose(in) != 0) elog(ERROR, "Cannot close file \"%s\": %s", from_fullpath, @@ -984,8 +998,8 @@ restore_data_file_internal(FILE *in, FILE *out, pgFile *file, uint32 backup_vers * but should never happen in case of blocks from FULL backup. */ if (fio_fseek(out, cur_pos_out) < 0) - elog(ERROR, "Cannot seek block %u of \"%s\": %s", - blknum, to_fullpath, strerror(errno)); + elog(ERROR, "Cannot seek block %u of \"%s\": %s", + blknum, to_fullpath, strerror(errno)); for (;;) { @@ -1136,8 +1150,8 @@ restore_data_file_internal(FILE *in, FILE *out, pgFile *file, uint32 backup_vers cur_pos_in != headers[n_hdr].pos) { if (fseek(in, headers[n_hdr].pos, SEEK_SET) != 0) - elog(ERROR, "Cannot seek to offset %u of \"%s\": %s", - headers[n_hdr].pos, from_fullpath, strerror(errno)); + elog(ERROR, "Cannot seek to offset %u of \"%s\": %s", + headers[n_hdr].pos, from_fullpath, strerror(errno)); cur_pos_in = headers[n_hdr].pos; } @@ -1224,7 +1238,7 @@ restore_non_data_file_internal(FILE *in, FILE *out, pgFile *file, const char *from_fullpath, const char *to_fullpath) { size_t read_len = 0; - char *buf = (char *)pgut_malloc(STDIO_BUFSIZE); /* 64kB buffer */ + char buf[STDIO_BUFSIZE] __attribute__((__aligned__(ALIGNOF_BUFFER))); /* 64kB buffer, need to be aligned */ /* copy content */ for (;;) @@ -1251,10 +1265,6 @@ restore_non_data_file_internal(FILE *in, FILE *out, pgFile *file, if (feof(in)) break; } - - pg_free(buf); - - } size_t @@ -1355,15 +1365,17 @@ restore_non_data_file(parray *parent_chain, pgBackup *dest_backup, to_fullpath, strerror(errno)); } - if (tmp_file->external_dir_num == 0) - join_path_components(from_root, tmp_backup->root_dir, DATABASE_DIR); - else + if (tmp_file->external_dir_num != 0) { char external_prefix[MAXPGPATH]; join_path_components(external_prefix, tmp_backup->root_dir, EXTERNAL_DIR); makeExternalDirPathByNum(from_root, external_prefix, tmp_file->external_dir_num); } + else if (is_dss_type(tmp_file->type)) + join_path_components(from_root, tmp_backup->root_dir, DSSDATA_DIR); + else + join_path_components(from_root, tmp_backup->root_dir, DATABASE_DIR); join_path_components(from_fullpath, from_root, dest_file->rel_path); @@ -1424,10 +1436,8 @@ bool backup_remote_file(const char *from_fullpath, const char *to_fullpath, pgFi * it is either small control file or already compressed cfs file. */ void -backup_non_data_file_internal(const char *from_fullpath, - fio_location from_location, - const char *to_fullpath, pgFile *file, - bool missing_ok) +backup_non_data_file_internal(const char *from_fullpath, fio_location from_location, + const char *to_fullpath, pgFile *file, bool missing_ok) { FILE *in = NULL; FILE *out = NULL; @@ -1448,9 +1458,12 @@ backup_non_data_file_internal(const char *from_fullpath, to_fullpath, strerror(errno)); /* update file permission */ - if (chmod(to_fullpath, file->mode) == -1) - elog(ERROR, "Cannot change mode of \"%s\": %s", to_fullpath, - strerror(errno)); + if (!is_dss_file(from_fullpath)) + { + if (chmod(to_fullpath, file->mode) == -1) + elog(ERROR, "Cannot change mode of \"%s\": %s", to_fullpath, + strerror(errno)); + } /* backup remote file */ if (fio_is_remote(FIO_DB_HOST)) @@ -1466,7 +1479,7 @@ backup_non_data_file_internal(const char *from_fullpath, if (in == NULL) { /* maybe deleted, it's not error in case of backup */ - if (errno == ENOENT) + if (is_file_delete(errno)) { if (missing_ok) { @@ -1648,7 +1661,7 @@ check_data_file(ConnectionArgs *arguments, pgFile *file, * If file is not found, this is not en error. * It could have been deleted by concurrent openGauss transaction. */ - if (errno == ENOENT) + if (is_file_delete(errno)) { elog(LOG, "File \"%s\" is not found", from_fullpath); return true; @@ -1709,15 +1722,12 @@ validate_file_pages(pgFile *file, const char *fullpath, XLogRecPtr stop_lsn, int n_hdr = -1; off_t cur_pos_in = 0; - - /* should not be possible */ Assert(!(backup_version >= 20400 && file->n_headers <= 0)); in = fopen(fullpath, PG_BINARY_R); if (in == NULL) - elog(ERROR, "Cannot open file \"%s\": %s", - fullpath, strerror(errno)); + elog(ERROR, "Cannot open file \"%s\": %s", fullpath, strerror(errno)); headers = get_data_file_headers(hdr_map, file, backup_version, false); @@ -1732,7 +1742,6 @@ validate_file_pages(pgFile *file, const char *fullpath, XLogRecPtr stop_lsn, /* calc CRC of backup file */ INIT_FILE_CRC32(use_crc32c, crc); - /* read and validate pages one by one */ while (true) { @@ -1837,10 +1846,10 @@ validate_file_pages(pgFile *file, const char *fullpath, XLogRecPtr stop_lsn, const char *errormsg = NULL; uncompressed_size = do_decompress(page.data, BLCKSZ, - compressed_page.data, - compressed_size, - file->compress_alg, - &errormsg); + compressed_page.data, + compressed_size, + file->compress_alg, + &errormsg); if (uncompressed_size < 0 && errormsg != NULL) { elog(WARNING, "An error occured during decompressing block %u of file \"%s\": %s", @@ -1862,14 +1871,12 @@ validate_file_pages(pgFile *file, const char *fullpath, XLogRecPtr stop_lsn, return false; } - rc = validate_one_page(page.data, - file->segno * RELSEG_SIZE + blknum, - stop_lsn, &page_st, checksum_version); + rc = validate_one_page(page.data, file->segno * RELSEG_SIZE + blknum, + stop_lsn, &page_st, checksum_version); } else - rc = validate_one_page(compressed_page.data, - file->segno * RELSEG_SIZE + blknum, - stop_lsn, &page_st, checksum_version); + rc = validate_one_page(compressed_page.data, file->segno * RELSEG_SIZE + blknum, + stop_lsn, &page_st, checksum_version); switch (rc) { diff --git a/src/bin/pg_probackup/dir.cpp b/src/bin/pg_probackup/dir.cpp index 383f2de9c..fb4a766b8 100644 --- a/src/bin/pg_probackup/dir.cpp +++ b/src/bin/pg_probackup/dir.cpp @@ -25,6 +25,7 @@ #include "configuration.h" #include "common/fe_memutils.h" #include "PageCompression.h" +#include "storage/file/fio_device.h" /* * The contents of these directories are removed or recreated during server @@ -126,6 +127,7 @@ static char check_in_tablespace(pgFile *file, bool in_tablespace); static char check_db_dir(pgFile *file); static char check_digit_file(pgFile *file); static char check_nobackup_dir(pgFile *file); +static char check_in_dss(pgFile *file, int include_id); static void dir_list_file_internal(parray *files, pgFile *parent, const char *parent_dir, bool exclude, bool follow_symlink, bool backup_logs, bool skip_hidden, int external_dir_num, fio_location location, @@ -160,7 +162,7 @@ dir_create_dir(const char *dir, mode_t mode) /* Create directory */ if (mkdir(dir, mode) == -1) { - if (errno == EEXIST) /* already exist */ + if (is_file_exist(errno)) /* already exist */ return 0; elog(ERROR, "cannot create directory \"%s\": %s", dir, strerror(errno)); } @@ -179,13 +181,14 @@ pgFileNew(const char *path, const char *rel_path, bool follow_symlink, if (fio_stat(path, &st, follow_symlink, location) < 0) { /* file not found is not an error case */ - if (errno == ENOENT) + if (is_file_delete(errno)) return NULL; elog(ERROR, "cannot stat file \"%s\": %s", path, strerror(errno)); } file = pgFileInit(rel_path); + file->type = fio_device_type(path); file->size = st.st_size; file->mode = st.st_mode; file->mtime = st.st_mtime; @@ -244,7 +247,7 @@ pgFileDelete(mode_t mode, const char *full_path) { if (rmdir(full_path) == -1) { - if (errno == ENOENT) + if (is_file_delete(errno)) return; else if (errno == ENOTDIR) /* could be symbolic link */ goto delete_file; @@ -258,7 +261,7 @@ pgFileDelete(mode_t mode, const char *full_path) delete_file: if (remove(full_path) == -1) { - if (errno == ENOENT) + if (is_file_delete(errno)) return; elog(ERROR, "Cannot remove file \"%s\": %s", full_path, strerror(errno)); @@ -274,7 +277,7 @@ pgFileDelete(mode_t mode, const char *full_path) pg_crc32 pgFileGetCRC(const char *file_path, bool use_crc32c, bool missing_ok) { - FILE *fp; + FILE *fp = NULL; pg_crc32 crc = 0; char *buf; size_t len = 0; @@ -285,7 +288,7 @@ pgFileGetCRC(const char *file_path, bool use_crc32c, bool missing_ok) fp = fopen(file_path, PG_BINARY_R); if (fp == NULL) { - if (errno == ENOENT) + if (is_file_delete(errno)) { if (missing_ok) { @@ -294,8 +297,7 @@ pgFileGetCRC(const char *file_path, bool use_crc32c, bool missing_ok) } } - elog(ERROR, "Cannot open file \"%s\": %s", - file_path, strerror(errno)); + elog(ERROR, "Cannot open file \"%s\": %s", file_path, strerror(errno)); } /* disable stdio buffering */ @@ -591,6 +593,7 @@ dir_check_file(pgFile *file, bool backup_logs, bool backup_replslots) int sscanf_res; char ret; bool in_tablespace = false; + char check_res; in_tablespace = path_is_prefix_of_path(PG_TBLSPC_DIR, file->rel_path); @@ -700,6 +703,12 @@ dir_check_file(pgFile *file, bool backup_logs, bool backup_replslots) return CHECK_FALSE; } + /* skip other instance files in dss mode */ + check_res = check_in_dss(file, instance_config.dss.instance_id); + if (check_res != CHECK_TRUE) { + return check_res; + } + ret = check_in_tablespace(file, in_tablespace); if (ret != -1) { return ret; @@ -708,6 +717,47 @@ dir_check_file(pgFile *file, bool backup_logs, bool backup_replslots) return check_db_dir(file); } +static char check_in_dss(pgFile *file, int include_id) +{ + char instance_id[MAX_INSTANCEID_LEN]; + char top_path[MAXPGPATH]; + errno_t rc = EOK; + int move = 0; + + if (!is_dss_type(file->type)) { + return CHECK_TRUE; + } + + /* step1 : skip other instance owner file or dir */ + strlcpy(top_path, file->rel_path, sizeof(top_path)); + get_top_path(top_path); + + rc = snprintf_s(instance_id, sizeof(instance_id), sizeof(instance_id) - 1, "%d", include_id); + securec_check_ss_c(rc, "\0", "\0"); + + move = (int)strlen(top_path) - (int)strlen(instance_id); + if (move > 0 && move < MAXPGPATH && strcmp(top_path + move, instance_id) != 0) { + char tail = top_path[strlen(top_path) - 1]; + /* Is this file or dir belongs to other instance? */ + if (tail >= '0' && tail <= '9') { + return CHECK_FALSE; + } + } + + /* step2: recheck dir is in the exclude list, include id will be considered */ + if (S_ISDIR(file->mode)) { + for (int i = 0; pgdata_exclude_dir[i]; i++) { + int len = (int)strlen(pgdata_exclude_dir[i]); + if (strncmp(top_path, pgdata_exclude_dir[i], len) == 0 && + strcmp(top_path + len, instance_id) == 0) { + return CHECK_EXCLUDE_FALSE; + } + } + } + + return CHECK_TRUE; +} + static char check_in_tablespace(pgFile *file, bool in_tablespace) { if (in_tablespace) @@ -741,11 +791,15 @@ if (in_tablespace) { file->tblspcOid = DEFAULTTABLESPACE_OID; - int ret = sscanf_s(file->rel_path, "base/%u/", &(file->dbOid)); - if (ret == -1) - elog(INFO, "Cannot parse path \"%s\"", file->rel_path); - if (S_ISDIR(file->mode) && strcmp(file->name, "base") != 0) - file->is_database = true; + /* skip "base" itself */ + if (strcmp(file->name, "base") != 0) + { + int ret = sscanf_s(file->rel_path, "base/%u/", &(file->dbOid)); + if (ret == -1) + elog(INFO, "Cannot parse path \"%s\"", file->rel_path); + if (S_ISDIR(file->mode)) + file->is_database = true; + } } return -1; @@ -949,6 +1003,12 @@ bool SkipSomeDirFile(pgFile *file, struct dirent *dent, bool skipHidden) if (S_ISDIR(file->mode) && (strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0)) { return false; } + /* Skip recycle in dss mode */ + if (is_dss_type(file->type) && strcmp(dent->d_name, ".recycle") == 0) + { + elog(WARNING, "Skip .recycle"); + return false; + } /* skip hidden files and directories */ if (skipHidden && file->name[0] == '.') { elog(WARNING, "Skip hidden file: '%s'", file->name); @@ -958,7 +1018,7 @@ bool SkipSomeDirFile(pgFile *file, struct dirent *dent, bool skipHidden) * Add only files, directories and links. Skip sockets and other * unexpected file formats. */ - if (!S_ISDIR(file->mode) && !S_ISREG(file->mode)) { + if (!S_ISDIR(file->mode) && !S_ISREG(file->mode) && !S_ISLNK(file->mode)) { elog(WARNING, "Skip '%s': unexpected file format", file->name); return false; } @@ -986,7 +1046,7 @@ dir_list_file_internal(parray *files, pgFile *parent, const char *parent_dir, dir = fio_opendir(parent_dir, location); if (dir == NULL) { - if (errno == ENOENT) + if (is_file_delete(errno)) { /* Maybe the directory was removed */ return; @@ -1046,7 +1106,7 @@ dir_list_file_internal(parray *files, pgFile *parent, const char *parent_dir, backup_logs, skip_hidden, external_dir_num, location, backup_replslots); } - if (errno && errno != ENOENT) + if (errno && !is_file_delete(errno)) { int errno_tmp = errno; fio_closedir(dir); @@ -1233,7 +1293,7 @@ create_data_directories(parray *dest_files, const char *data_dir, const char *ba * original directory created as symlink to it. */ - elog(LOG, "Restore directories and symlinks..."); + elog(LOG, "Restore directories and symlinks... in %s", data_dir); /* create directories */ for (i = 0; i < parray_num(dest_files); i++) @@ -1241,6 +1301,10 @@ create_data_directories(parray *dest_files, const char *data_dir, const char *ba char parent_dir[MAXPGPATH]; pgFile *dir = (pgFile *) parray_get(dest_files, i); + /* skip undesirable type */ + if (is_dss_type(dir->type) != fio_is_dss(location)) + continue; + if (!S_ISDIR(dir->mode)) continue; @@ -1248,6 +1312,12 @@ create_data_directories(parray *dest_files, const char *data_dir, const char *ba if (dir->external_dir_num != 0) continue; + if (is_dss_type(dir->type) && is_ss_xlog(dir->rel_path)) { + ss_createdir(dir->rel_path, instance_config.dss.vgdata, + instance_config.dss.vglog); + continue; + } + /* tablespace_map exists */ if (links) { @@ -1702,7 +1772,8 @@ dir_read_file_list(const char *root, const char *external_prefix, dbOid, /* used for partial restore */ hdr_crc, hdr_off, - hdr_size; + hdr_size, + file_type; pgFile *file = nullptr; COMP_FILE_CRC32(true, content_crc, buf, strlen(buf)); @@ -1716,6 +1787,7 @@ dir_read_file_list(const char *root, const char *external_prefix, get_control_value(buf, "compress_alg", compress_alg_string, NULL, false); get_control_value(buf, "external_dir_num", NULL, &external_dir_num, false); get_control_value(buf, "dbOid", NULL, &dbOid, false); + get_control_value(buf, "file_type", NULL, &file_type, true); file = pgFileInit(path); file->write_size = (int64) write_size; @@ -1729,6 +1801,7 @@ dir_read_file_list(const char *root, const char *external_prefix, file->compress_alg = parse_compress_alg(compress_alg_string); file->external_dir_num = external_dir_num; file->dbOid = dbOid ? dbOid : 0; + file->type = (device_type_t)file_type; /* * Optional fields @@ -1805,7 +1878,7 @@ dir_is_empty(const char *path, fio_location location) if (dir == NULL) { /* Directory in path doesn't exist */ - if (errno == ENOENT) + if (is_file_delete(errno)) return true; elog(ERROR, "cannot open directory \"%s\": %s", path, strerror(errno)); } @@ -1818,6 +1891,10 @@ dir_is_empty(const char *path, fio_location location) strcmp(dir_ent->d_name, "..") == 0) continue; + /* Skip recycle in dss mode */ + if (fio_is_dss(location) && strcmp(dir_ent->d_name, ".recycle") == 0) + continue; + /* Directory is not empty */ fio_closedir(dir); return false; @@ -1838,7 +1915,7 @@ fileExists(const char *path, fio_location location) { struct stat buf; - if (fio_stat(path, &buf, true, location) == -1 && errno == ENOENT) + if (fio_stat(path, &buf, true, location) == -1 && is_file_delete(errno)) return false; else if (!S_ISREG(buf.st_mode)) return false; @@ -1846,7 +1923,7 @@ fileExists(const char *path, fio_location location) return true; } -size_t +off_t pgFileSize(const char *path) { struct stat buf; diff --git a/src/bin/pg_probackup/fetch.cpp b/src/bin/pg_probackup/fetch.cpp index 8e6f87d84..021e06173 100644 --- a/src/bin/pg_probackup/fetch.cpp +++ b/src/bin/pg_probackup/fetch.cpp @@ -27,16 +27,13 @@ * */ char * -slurpFile(const char *datadir, const char *path, size_t *filesize, bool safe, fio_location location) +slurpFile(const char *fullpath, size_t *filesize, bool safe, fio_location location) { int fd; char *buffer; struct stat statbuf; - char fullpath[MAXPGPATH]; int len; - join_path_components(fullpath, datadir, path); - if ((fd = fio_open(fullpath, O_RDONLY | PG_BINARY, location)) == -1) { if (safe) diff --git a/src/bin/pg_probackup/file.cpp b/src/bin/pg_probackup/file.cpp index 98003d1ee..436212932 100644 --- a/src/bin/pg_probackup/file.cpp +++ b/src/bin/pg_probackup/file.cpp @@ -15,6 +15,7 @@ #include "file.h" #include "storage/checksum.h" +#include "storage/file/fio_device.h" #include "common/fe_memutils.h" #define PRINTF_BUF_SIZE 1024 @@ -211,6 +212,12 @@ bool fio_is_remote_simple(fio_location location) return is_remote; } +/* Check if specified location is for current node */ +bool fio_is_dss(fio_location location) +{ + return location == FIO_DSS_HOST; +} + /* Try to read specified amount of bytes unless error or EOF are encountered */ ssize_t fio_read_all(int fd, void* buf, size_t size) { @@ -546,7 +553,7 @@ int fio_fprintf(FILE* f, char const* format, ...) return rc; } -/* Flush stream data (does nothing for remote file) */ +/* Flush stream data (does nothing for remote file and dss file) */ int fio_fflush(FILE* f) { int rc = 0; @@ -697,9 +704,22 @@ int fio_seek(int fd, off_t offs) /* Write data to stdio file */ size_t fio_fwrite(FILE* f, void const* buf, size_t size) { - return fio_is_remote_file(f) - ? fio_write(fio_fileno(f), buf, size) - : fwrite(buf, 1, size, f); + if (fio_is_remote_file(f)) + { + return (size_t)fio_write(fio_fileno(f), buf, size); + } + else if (is_dss_file_dec(f)) + { + /* size must be multiples of ALIGNOF_BUFFER in dss */ + char align_buf[size] __attribute__((__aligned__(ALIGNOF_BUFFER))); /* need to be aligned */ + errno_t rc = memcpy_s(align_buf, size, buf, size); + securec_check_c(rc, "\0", "\0"); + return dss_fwrite_file(align_buf, 1, size, f); + } + else + { + return fwrite(buf, 1, size, f); + } } /* Write data to the file */ @@ -728,10 +748,7 @@ int32 fio_decompress(void* dst, void const* src, size_t size, int compress_alg) { const char *errormsg = NULL; - int32 uncompressed_size = do_decompress(dst, BLCKSZ, - src, - size, - (CompressAlg)compress_alg, &errormsg); + int32 uncompressed_size = do_decompress(dst, BLCKSZ, src, size, (CompressAlg)compress_alg, &errormsg); if (uncompressed_size < 0 && errormsg != NULL) { elog(WARNING, "An error occured during decompressing block: %s", errormsg); @@ -766,12 +783,13 @@ ssize_t fio_fwrite_compressed(FILE* f, void const* buf, size_t size, int compres } else { + /* operate is same in local mode and dss mode */ char uncompressed_buf[BLCKSZ]; int32 uncompressed_size = fio_decompress(uncompressed_buf, buf, size, compress_alg); return (uncompressed_size < 0) ? uncompressed_size - : fwrite(uncompressed_buf, 1, uncompressed_size, f); + : fio_fwrite(f, uncompressed_buf, uncompressed_size); } } @@ -983,6 +1001,11 @@ int fio_sync(char const* path, fio_location location) return 0; } + else if (is_dss_file(path)) + { + /* nothing to do in dss mode, data are already sync to disk */ + return 0; + } else { int fd; @@ -1027,7 +1050,7 @@ pg_crc32 fio_get_crc32(const char *file_path, fio_location location, bool decomp else { #ifdef HAVE_LIBZ - if (decompress) + if (decompress && !IsDssMode()) return pgFileGetCRCgz(file_path, true, true); else #endif @@ -1080,6 +1103,7 @@ int fio_mkdir(const char* path, int mode, fio_location location) } else { + /* operate is same in local mode and dss mode */ return dir_create_dir(path, mode); } } @@ -1113,7 +1137,7 @@ int fio_chmod(char const* path, int mode, fio_location location) */ static void fio_load_file(int out, char const* path) { - int fd = open(path, O_RDONLY); + int fd = open(path, O_RDONLY, 0); fio_header hdr; void* buf = NULL; @@ -2098,8 +2122,8 @@ fio_get_lsn_map(const char *fullpath, uint32 checksum_version, } else { - lsn_map = get_lsn_map(fullpath, checksum_version, n_blocks, - shift_lsn, segmentno); + /* operate is same in local mode and dss mode */ + lsn_map = get_lsn_map(fullpath, checksum_version, n_blocks, shift_lsn, segmentno); } return lsn_map; @@ -2152,6 +2176,7 @@ pid_t fio_check_postmaster(const char *pgdata, fio_location location) return hdr.arg; } else + /* operate is same in local mode and dss mode */ return check_postmaster(pgdata); } @@ -2188,6 +2213,7 @@ fio_delete(mode_t mode, const char *fullpath, fio_location location) } else + /* operate is same in local mode and dss mode */ pgFileDelete(mode, fullpath); } diff --git a/src/bin/pg_probackup/file.h b/src/bin/pg_probackup/file.h index 61f9f8996..aa465e604 100644 --- a/src/bin/pg_probackup/file.h +++ b/src/bin/pg_probackup/file.h @@ -74,7 +74,8 @@ typedef enum FIO_LOCAL_HOST, /* data is locate at local host */ FIO_DB_HOST, /* data is located at Postgres server host */ FIO_BACKUP_HOST, /* data is located at backup host */ - FIO_REMOTE_HOST /* date is located at remote host */ + FIO_REMOTE_HOST, /* date is located at remote host */ + FIO_DSS_HOST /* data is located at dss storage, it can be visit in local host */ } fio_location; #define FIO_FDMAX 64 @@ -101,6 +102,7 @@ extern __thread int fio_stdin; /* Check if FILE handle is local or remote (created by FIO) */ #define fio_is_remote_file(file) ((size_t)(file) <= FIO_FDMAX) +extern bool fio_is_dss(fio_location location); extern ssize_t fio_read_all(int fd, void* buf, size_t size); extern ssize_t fio_write_all(int fd, void const* buf, size_t size); extern void fio_redirect(int in, int out, int err); diff --git a/src/bin/pg_probackup/file_gz.cpp b/src/bin/pg_probackup/file_gz.cpp index b1cec9e7f..8d7b3d4ec 100644 --- a/src/bin/pg_probackup/file_gz.cpp +++ b/src/bin/pg_probackup/file_gz.cpp @@ -158,7 +158,7 @@ fio_gzread(gzFile f, void *buf, unsigned size) gz->strm.next_in = gz->buf; } rc = fio_read(gz->fd, gz->strm.next_in + gz->strm.avail_in, - gz->buf + ZLIB_BUFFER_SIZE - gz->strm.next_in - gz->strm.avail_in); + gz->buf + ZLIB_BUFFER_SIZE - gz->strm.next_in - gz->strm.avail_in); if (rc > 0) { gz->strm.avail_in += rc; diff --git a/src/bin/pg_probackup/help.cpp b/src/bin/pg_probackup/help.cpp index ac453633c..28852c527 100644 --- a/src/bin/pg_probackup/help.cpp +++ b/src/bin/pg_probackup/help.cpp @@ -86,6 +86,8 @@ void help_pg_probackup(void) printf(_(" [--remote-path=path] [--remote-user=username]\n")); printf(_(" [--remote-port=port] [--ssh-options=ssh_options]\n")); printf(_(" [--remote-libpath=libpath]\n")); + printf(_(" [--enable-dss] [--instance-id=instance_id]\n")); + printf(_(" [--vgname=\"vgdata,vglog\"] [--socketpath=socketpath]\n")); printf(_(" [--help]\n")); printf(_("\n %s del-instance -B backup-path --instance=instance_name\n"), PROGRAM_NAME); @@ -111,6 +113,8 @@ void help_pg_probackup(void) printf(_(" [--remote-path=path] [--remote-user=username]\n")); printf(_(" [--remote-port=port] [--ssh-options=ssh_options]\n")); printf(_(" [--remote-libpath=libpath]\n")); + printf(_(" [--enable-dss] [--instance-id=instance_id]\n")); + printf(_(" [--vgname=\"vgdata,vglog\"] [--socketpath=socketpath]\n")); printf(_(" [--help]\n")); printf(_("\n %s set-backup -B backup-path --instance=instance_name -i backup-id\n"), PROGRAM_NAME); @@ -153,6 +157,8 @@ void help_pg_probackup(void) printf(_(" [--remote-path=path] [--remote-user=username]\n")); printf(_(" [--remote-port=port] [--ssh-options=ssh_options]\n")); printf(_(" [--remote-libpath=libpath]\n")); + printf(_(" [--enable-dss] [--instance-id=instance_id]\n")); + printf(_(" [--vgname=\"vgdata,vglog\"] [--socketpath=socketpath]\n")); printf(_(" [--ttl=interval] [--expire-time=time]\n")); printf(_(" [--backup-pg-replslot]\n")); printf(_(" [--help]\n")); @@ -169,6 +175,8 @@ void help_pg_probackup(void) printf(_(" [--remote-path=path] [--remote-user=username]\n")); printf(_(" [--remote-port=port] [--ssh-options=ssh_options]\n")); printf(_(" [--remote-libpath=libpath]\n")); + printf(_(" [--enable-dss] [--instance-id=instance_id]\n")); + printf(_(" [--vgname=\"vgdata,vglog\"] [--socketpath=socketpath]\n")); printf(_(" [--log-level-console=log-level-console]\n")); printf(_(" [--log-level-file=log-level-file]\n")); printf(_(" [--log-filename=log-filename]\n")); @@ -237,6 +245,9 @@ static void help_add_instance(void) printf(_(" [--remote-path=path] [--remote-user=username]\n")); printf(_(" [--remote-port=port] [--ssh-options=ssh_options]\n\n")); printf(_(" [--remote-libpath=libpath]\n")); + printf(_(" [--remote-port=port] [--ssh-options=ssh_options]\n")); + printf(_(" [--enable-dss] [--instance-id=instance_id]\n")); + printf(_(" [--vgname=\"vgdata,vglog\"] [--socketpath=socketpath]\n\n")); printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); printf(_(" -D, --pgdata=pgdata-path location of the database storage area\n")); @@ -255,7 +266,13 @@ static void help_add_instance(void) printf(_(" --remote-user=username user name for ssh connection (default: current user)\n")); printf(_(" --remote-libpath=libpath library path on remote host\n")); printf(_(" --ssh-options=ssh_options additional ssh options (default: none)\n")); - printf(_(" (example: --ssh-options='-c cipher_spec -F configfile')\n\n")); + printf(_(" (example: --ssh-options='-c cipher_spec -F configfile')\n")); + + printf(_("\n DSS options:\n")); + printf(_(" --enable-dss enable shared storage mode\n")); + printf(_(" --vgname=\"vgdata,vglog\" name of dss volume group\n")); + printf(_(" --instance-id=instance_id instance id of backup node\n")); + printf(_(" --socketpath=socketpath dss connect socket file path\n\n")); } static void help_del_instance(void) @@ -288,6 +305,9 @@ static void help_set_config(void) printf(_(" [--remote-path=path] [--remote-user=username]\n")); printf(_(" [--remote-port=port] [--ssh-options=ssh_options]\n\n")); printf(_(" [--remote-libpath=libpath]\n")); + printf(_(" [--remote-port=port] [--ssh-options=ssh_options]\n")); + printf(_(" [--enable-dss] [--instance-id=instance_id]\n")); + printf(_(" [--vgname=\"vgdata,vglog\"] [--socketpath=socketpath]\n\n")); printf(_(" -B, --backup-path=backup-path location of the backup storage area\n")); printf(_(" --instance=instance_name name of the instance\n")); @@ -348,7 +368,13 @@ static void help_set_config(void) printf(_(" --remote-user=username user name for ssh connection (default: current user)\n")); printf(_(" --remote-libpath=libpath library path on remote host\n")); printf(_(" --ssh-options=ssh_options additional ssh options (default: none)\n")); - printf(_(" (example: --ssh-options='-c cipher_spec -F configfile')\n\n")); + printf(_(" (example: --ssh-options='-c cipher_spec -F configfile')\n")); + + printf(_("\n DSS options:\n")); + printf(_(" --enable-dss enable shared storage mode\n")); + printf(_(" --vgname=\"vgdata,vglog\" name of dss volume group\n")); + printf(_(" --instance-id=instance_id instance id of backup node\n")); + printf(_(" --socketpath=socketpath dss connect socket file path\n\n")); } static void help_set_backup(void) @@ -420,6 +446,8 @@ static void help_backup(void) printf(_(" [--remote-path=path] [--remote-user=username]\n")); printf(_(" [--remote-port=port] [--ssh-options=ssh_options]\n")); printf(_(" [--remote-libpath=libpath]\n")); + printf(_(" [--enable-dss] [--instance-id=instance_id]\n")); + printf(_(" [--vgname=\"vgdata,vglog\"] [--socketpath=socketpath]\n")); printf(_(" [--ttl=interval] [--expire-time=time]\n\n")); printf(_(" [--backup-pg-replslot]\n")); @@ -507,6 +535,12 @@ static void help_backup(void) printf(_(" --ssh-options=ssh_options additional ssh options (default: none)\n")); printf(_(" (example: --ssh-options='-c cipher_spec -F configfile')\n")); + printf(_("\n DSS options:\n")); + printf(_(" --enable-dss enable shared storage mode\n")); + printf(_(" --vgname=\"vgdata,vglog\" name of dss volume group\n")); + printf(_(" --instance-id=instance_id instance id of backup node\n")); + printf(_(" --socketpath=socketpath dss connect socket file path\n")); + printf(_("\n Pinning options:\n")); printf(_(" --ttl=interval pin backup for specified amount of time; 0 unpin\n")); printf(_(" available units: 'ms', 's', 'min', 'h', 'd' (default: s)\n")); @@ -529,6 +563,8 @@ static void help_restore(void) printf(_(" [--remote-path=path] [--remote-user=username]\n")); printf(_(" [--remote-port=port] [--ssh-options=ssh_options]\n")); printf(_(" [--remote-libpath=libpath]\n")); + printf(_(" [--enable-dss] [--instance-id=instance_id]\n")); + printf(_(" [--vgname=\"vgdata,vglog\"] [--socketpath=socketpath]\n")); printf(_(" [--log-level-console=log-level-console]\n")); printf(_(" [--log-level-file=log-level-file]\n")); printf(_(" [--log-filename=log-filename]\n")); @@ -577,6 +613,12 @@ static void help_restore(void) printf(_(" --ssh-options=ssh_options additional ssh options (default: none)\n")); printf(_(" (example: --ssh-options='-c cipher_spec -F configfile')\n")); + printf(_("\n DSS options:\n")); + printf(_(" --enable-dss enable shared storage mode\n")); + printf(_(" --vgname=\"vgdata,vglog\" name of dss volume group\n")); + printf(_(" --instance-id=instance_id instance id of backup node\n")); + printf(_(" --socketpath=socketpath dss connect socket file path\n")); + printf(_("\n Logging options:\n")); printf(_(" --log-level-console=log-level-console\n")); printf(_(" level for console logging (default: info)\n")); diff --git a/src/bin/pg_probackup/init.cpp b/src/bin/pg_probackup/init.cpp index 2fb8fd8e1..70f43a085 100644 --- a/src/bin/pg_probackup/init.cpp +++ b/src/bin/pg_probackup/init.cpp @@ -125,7 +125,7 @@ do_add_instance(InstanceConfig *instance) config_set_opt(instance_options, &instance_config.remote.ssh_config, SOURCE_DEFAULT); - /* pgdata was set through command line */ + /* pgdata and vgname were set through command line */ do_set_config(true); elog(INFO, "Instance '%s' successfully inited", instance_name); diff --git a/src/bin/pg_probackup/merge.cpp b/src/bin/pg_probackup/merge.cpp index 22a932e8c..d60b59d6e 100644 --- a/src/bin/pg_probackup/merge.cpp +++ b/src/bin/pg_probackup/merge.cpp @@ -15,6 +15,7 @@ #include "thread.h" #include "common/fe_memutils.h" +#include "storage/file/fio_device.h" typedef struct { @@ -1393,7 +1394,7 @@ merge_data_file(parray *parent_chain, pgBackup *full_backup, if (out == NULL) elog(ERROR, "Cannot open merge target file \"%s\": %s", to_fullpath_tmp1, strerror(errno)); - setvbuf(out, buffer, _IOFBF, STDIO_BUFSIZE); + setvbuf(out, buffer, _IOFBF, STDIO_BUFSIZE); /* restore file into temp file */ restore_data_file(parent_chain, dest_file, out, to_fullpath_tmp1, diff --git a/src/bin/pg_probackup/parsexlog.cpp b/src/bin/pg_probackup/parsexlog.cpp index 63ddb6aa5..ae896195f 100644 --- a/src/bin/pg_probackup/parsexlog.cpp +++ b/src/bin/pg_probackup/parsexlog.cpp @@ -1123,7 +1123,7 @@ static int read_requested_page(XLogReaderData *reader_data, char *readBuf, } } #ifdef HAVE_LIBZ - else + else if (!IsDssMode()) { if (fio_gzseek(reader_data->gz_xlogfile, (z_off_t) targetPageOff, SEEK_SET) == -1) { @@ -1733,7 +1733,7 @@ CleanupXLogPageRead(XLogReaderState *xlogreader) reader_data->xlogfile = -1; } #ifdef HAVE_LIBZ - else if (reader_data->gz_xlogfile != NULL) + else if (reader_data->gz_xlogfile != NULL && !IsDssMode()) { fio_gzclose(reader_data->gz_xlogfile); reader_data->gz_xlogfile = NULL; diff --git a/src/bin/pg_probackup/pg_probackup.cpp b/src/bin/pg_probackup/pg_probackup.cpp index 00411b2a9..8d84f937d 100644 --- a/src/bin/pg_probackup/pg_probackup.cpp +++ b/src/bin/pg_probackup/pg_probackup.cpp @@ -17,10 +17,13 @@ #include +#include "tool_common.h" #include "configuration.h" #include "thread.h" #include #include "common/fe_memutils.h" +#include "storage/file/fio_device.h" +#include "storage/dss/dss_adaptor.h" const char *PROGRAM_NAME = NULL; /* PROGRAM_NAME_FULL without .exe suffix * if any */ @@ -149,6 +152,7 @@ static void opt_backup_mode(ConfigOption *opt, const char *arg); static void opt_show_format(ConfigOption *opt, const char *arg); static void compress_init(void); +static void dss_init(void); /* * Short name should be non-printable ASCII character. @@ -755,7 +759,7 @@ int main(int argc, char *argv[]) /* Initialize logger */ init_logger(backup_path, &instance_config.logger); - + /* command was initialized for a few commands */ if (command) { @@ -782,6 +786,13 @@ int main(int argc, char *argv[]) !is_absolute_path(instance_config.pgdata)) elog(ERROR, "-D, --pgdata must be an absolute path"); + /* prepare pgdata of g_datadir struct */ + if (instance_config.pgdata != NULL) + { + errno_t rc = strcpy_s(g_datadir.pg_data, strlen(instance_config.pgdata) + 1, instance_config.pgdata); + securec_check_c(rc, "\0", "\0"); + } + #if PG_VERSION_NUM >= 110000 /* Check xlog-seg-size option */ if (instance_name && @@ -814,6 +825,10 @@ int main(int argc, char *argv[]) /* compress_init */ compress_init(); + dss_init(); + + initDataPathStruct(IsDssMode()); + /* do actual operation */ return do_actual_operate(); } @@ -909,3 +924,70 @@ compress_init(void) elog(ERROR, "Multithread backup does not support pglz compression"); } } + +static void dss_init(void) +{ + if (instance_config.dss.enable_dss) { + /* skip in some special backup modes */ + if (backup_subcmd == DELETE_CMD || backup_subcmd == DELETE_INSTANCE_CMD) { + return; + } + + /* register for dssapi */ + if (dss_device_init(instance_config.dss.socketpath, instance_config.dss.enable_dss) != DSS_SUCCESS) { + elog(ERROR, "fail to init dss device"); + return; + } + + if (IsSshProtocol()) { + elog(ERROR, "Remote operations on dss mode are not supported"); + } + + parse_vgname_args(instance_config.dss.vgname); + + /* Check dss connect */ + if (!dss_exist_dir(instance_config.dss.vgdata)) { + elog(ERROR, "Could not connect dssserver, vgdata: \"%s\", socketpath: \"%s\", check and retry later.", + instance_config.dss.vgdata, instance_config.dss.socketpath); + } + + if (strlen(instance_config.dss.vglog) && !dss_exist_dir(instance_config.dss.vglog)) { + elog(ERROR, "Could not connect dssserver, vglog: \"%s\", socketpath: \"%s\", check and retry later.", + instance_config.dss.vglog, instance_config.dss.socketpath); + } + + /* Check backup instance id in shared storage mode */ + int id = instance_config.dss.instance_id; + if (id < MIN_INSTANCEID || id > MAX_INSTANCEID) { + elog(ERROR, "Instance id must be specified in dss mode, valid range is %d - %d.", + MIN_INSTANCEID, MAX_INSTANCEID); + } + + if (backup_subcmd != RESTORE_CMD) { + off_t size = 0; + char xlog_control_path[MAXPGPATH]; + + join_path_components(xlog_control_path, instance_config.dss.vgdata, PG_XLOG_CONTROL_FILE); + if ((size = dss_get_file_size(xlog_control_path)) == INVALID_DEVICE_SIZE) { + elog(ERROR, "Could not get \"%s\" size: %s", xlog_control_path, strerror(errno)); + } + + if (size < (off_t)BLCKSZ * id) { + elog(ERROR, "Cound not read beyond end of file \"%s\", file_size: %ld, instance_id: %d\n", + xlog_control_path, size, id); + } + } + + /* Prepare some g_datadir parameters */ + g_datadir.instance_id = id; + + errno_t rc = strcpy_s(g_datadir.dss_data, strlen(instance_config.dss.vgdata) + 1, instance_config.dss.vgdata); + securec_check_c(rc, "\0", "\0"); + + rc = strcpy_s(g_datadir.dss_log, strlen(instance_config.dss.vglog) + 1, instance_config.dss.vglog); + securec_check_c(rc, "\0", "\0"); + + XLogSegmentSize = DSS_XLOG_SEG_SIZE; + instance_config.xlog_seg_size = DSS_XLOG_SEG_SIZE; + } +} \ No newline at end of file diff --git a/src/bin/pg_probackup/pg_probackupa.h b/src/bin/pg_probackup/pg_probackupa.h index 79f8d7787..938d452fa 100644 --- a/src/bin/pg_probackup/pg_probackupa.h +++ b/src/bin/pg_probackup/pg_probackupa.h @@ -53,11 +53,13 @@ extern const char *PROGRAM_FULL_PATH; /* Directory/File names */ #define DATABASE_DIR "database" +#define DSSDATA_DIR "dssdata" #define BACKUPS_DIR "backups" #define PG_XLOG_DIR "pg_xlog" #define PG_LOG_DIR "pg_log" #define PG_TBLSPC_DIR "pg_tblspc" #define PG_GLOBAL_DIR "global" +#define PG_XLOG_CONTROL_FILE "pg_control" #define BACKUP_CONTROL_FILE "backup.control" #define BACKUP_CATALOG_CONF_FILE "pg_probackup.conf" #define BACKUP_CATALOG_PID "backup.pid" diff --git a/src/bin/pg_probackup/pg_probackupb.h b/src/bin/pg_probackup/pg_probackupb.h index d023bdb44..54ddc6fe7 100644 --- a/src/bin/pg_probackup/pg_probackupb.h +++ b/src/bin/pg_probackup/pg_probackupb.h @@ -56,6 +56,7 @@ typedef struct pgFile_t pg_crc32 hdr_crc; /* CRC value of header file: name_hdr */ off_t hdr_off; /* offset in header map */ int hdr_size; /* offset in header map */ + device_type_t type; /* file device type */ } pgFile; typedef struct page_map_entry @@ -128,6 +129,16 @@ typedef struct ArchiveOptions const char *user; } ArchiveOptions; +typedef struct DssOptions +{ + bool enable_dss; + int instance_id; + const char *vgname; + char *vglog; + char *vgdata; + char *socketpath; +} DssOptions; + /* * An instance configuration. It can be stored in a configuration file or passed * from command line. @@ -168,6 +179,9 @@ typedef struct InstanceConfig /* Archive description */ ArchiveOptions archive; + + /* DSS conntct parameters */ + DssOptions dss; } InstanceConfig; extern ConfigOption instance_options[]; @@ -268,6 +282,8 @@ struct pgBackup backup_path/instance_name/backup_id */ char *database_dir; /* Full path to directory with data files: backup_path/instance_name/backup_id/database */ + char *dssdata_dir; /* Full path to directory with dss data files: + backup_path/instance_name/backup_id/database/dssdata */ parray *files; /* list of files belonging to this backup * must be populated explicitly */ char *note; @@ -278,6 +294,9 @@ struct pgBackup /* map used for access to page headers */ HeaderMap hdr_map; + + /* device type */ + device_type_t storage_type; }; /* Recovery target for restore and validate subcommands */ @@ -334,6 +353,8 @@ typedef struct const char *from_root; const char *to_root; + const char *src_dss; + const char *dst_dss; const char *external_prefix; parray *files_list; diff --git a/src/bin/pg_probackup/pg_probackupc.h b/src/bin/pg_probackup/pg_probackupc.h index 3d2b68676..dcad6c37e 100644 --- a/src/bin/pg_probackup/pg_probackupc.h +++ b/src/bin/pg_probackup/pg_probackupc.h @@ -26,6 +26,7 @@ strspn(fname, "0123456789ABCDEF") == XLOG_FNAME_LEN && \ strcmp((fname) + XLOG_FNAME_LEN, ".gz.part") == 0) +#define IsDssMode() (instance_config.dss.enable_dss == true) #define IsSshProtocol() (instance_config.remote.host && strcmp(instance_config.remote.proto, "ssh") == 0) /* directory options */ @@ -145,8 +146,7 @@ extern int do_delete_instance(void); extern void do_delete_status(InstanceConfig *instance_config, const char *status); /* in fetch.c */ -extern char *slurpFile(const char *datadir, - const char *path, +extern char *slurpFile(const char *fullpath, size_t *filesize, bool safe, fio_location location); @@ -280,7 +280,7 @@ extern int dir_create_dir(const char *path, mode_t mode); extern bool dir_is_empty(const char *path, fio_location location); extern bool fileExists(const char *path, fio_location location); -extern size_t pgFileSize(const char *path); +extern off_t pgFileSize(const char *path); extern pgFile *pgFileNew(const char *path, const char *rel_path, bool follow_symlink, int external_dir_num, @@ -327,9 +327,8 @@ extern size_t restore_data_file_internal(FILE *in, FILE *out, pgFile *file, uint const char *from_fullpath, const char *to_fullpath, int nblocks, datapagemap_t *map, PageState *checksum_map, int checksum_version, datapagemap_t *lsn_map, BackupPageHeader2 *headers); -extern size_t restore_non_data_file(parray *parent_chain, pgBackup *dest_backup, - pgFile *dest_file, FILE *out, const char *to_fullpath, - bool already_exists); +extern size_t restore_non_data_file(parray *parent_chain, pgBackup *dest_backup, pgFile *dest_file, FILE *out, + const char *to_fullpath, bool already_exists); extern void restore_non_data_file_internal(FILE *in, FILE *out, pgFile *file, const char *from_fullpath, const char *to_fullpath); extern bool create_empty_file(fio_location from_location, const char *to_root, @@ -381,10 +380,14 @@ extern XLogRecPtr get_checkpoint_location(PGconn *conn); extern uint64 get_system_identifier(const char *pgdata_path); extern uint64 get_remote_system_identifier(PGconn *conn); extern uint32 get_data_checksum_version(bool safe); -extern pg_crc32c get_pgcontrol_checksum(const char *pgdata_path); +extern pg_crc32c get_pgcontrol_checksum(const char *fullpath); extern uint32 get_xlog_seg_size(char *pgdata_path); extern void get_redo(const char *pgdata_path, RedoParams *redo); -extern void set_min_recovery_point(pgFile *file, const char *backup_path, +extern void parse_vgname_args(const char* args); +extern bool is_ss_xlog(const char *ss_dir); +extern void ss_createdir(const char *ss_dir, const char *vgdata, const char *vglog); +extern char* xstrdup(const char* s); +extern void set_min_recovery_point(pgFile *file, const char *fullpath, XLogRecPtr stop_backup_lsn); extern void copy_pgcontrol_file(const char *from_fullpath, fio_location from_location, const char *to_fullpath, fio_location to_location, pgFile *file); @@ -392,6 +395,8 @@ extern void copy_pgcontrol_file(const char *from_fullpath, fio_location from_loc extern void time2iso(char *buf, size_t len, time_t time); extern const char *status2str(BackupStatus status); extern BackupStatus str2status(const char *status); +extern const char *dev2str(device_type_t type); +extern device_type_t str2dev(const char *dev); extern const char *base36enc(long unsigned int value); extern char *base36enc_dup(long unsigned int value); extern long unsigned int base36dec(const char *text); diff --git a/src/bin/pg_probackup/restore.cpp b/src/bin/pg_probackup/restore.cpp index b13b7acfe..19058ef54 100644 --- a/src/bin/pg_probackup/restore.cpp +++ b/src/bin/pg_probackup/restore.cpp @@ -19,6 +19,7 @@ #include "thread.h" #include "common/fe_memutils.h" #include "catalog/catalog.h" +#include "storage/file/fio_device.h" #define RESTORE_ARRAY_LEN 100 @@ -31,6 +32,7 @@ typedef struct parray *parent_chain; bool skip_external_dirs; const char *to_root; + const char *to_dss; size_t restored_bytes; bool use_bitmap; IncrRestoreMode incremental_mode; @@ -55,8 +57,8 @@ static void set_orphan_status(parray *backups, pgBackup *parent_backup); static void pg12_recovery_config(pgBackup *backup, bool add_include); static void restore_chain(pgBackup *dest_backup, parray *parent_chain, - pgRestoreParams *params, - const char *pgdata_path, bool no_sync); + pgRestoreParams *params, const char *pgdata_path, + const char *dssdata_path, bool no_sync); static void check_incremental_compatibility(const char *pgdata, uint64 system_identifier, IncrRestoreMode incremental_mode); static pgBackup *find_backup_range(parray *backups, @@ -83,6 +85,7 @@ static void threads_handle(pthread_t *threads, parray *parent_chain, pgRestoreParams *params, const char *pgdata_path, + const char *dssdata_path, bool use_bitmap, size_t total_bytes); static void sync_restored_files(parray *dest_files, @@ -159,6 +162,7 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, const char *action = (const char *)(params->is_restore ? "Restore":"Validate"); parray *parent_chain = NULL; bool pgdata_is_empty = true; + bool dssdata_is_empty = true; bool tblspaces_are_empty = true; if (params->is_restore) @@ -166,6 +170,29 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, if (instance_config.pgdata == NULL) elog(ERROR, "required parameter not specified: PGDATA (-D, --pgdata)"); + + if (IsDssMode()) + { + /* do not support increment restore in dss mode */ + if (params->incremental_mode != INCR_NONE) + { + elog(ERROR, "Incremental restore is not support when enable dss"); + } + + if (!dir_is_empty(instance_config.dss.vgdata, FIO_DSS_HOST)) + { + dssdata_is_empty = false; + elog(ERROR, "Restore destination is not empty: \"%s\"", + instance_config.dss.vgdata); + } + if (!dir_is_empty(instance_config.dss.vglog, FIO_DSS_HOST)) + { + dssdata_is_empty = false; + elog(ERROR, "Restore destination is not empty: \"%s\"", + instance_config.dss.vglog); + } + } + /* Check if restore destination empty */ if (!dir_is_empty(instance_config.pgdata, FIO_DB_HOST)) { @@ -228,7 +255,7 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, { check_tablespace_mapping(dest_backup, params->incremental_mode != INCR_NONE, &tblspaces_are_empty); - if (params->incremental_mode != INCR_NONE && pgdata_is_empty && tblspaces_are_empty) + if (params->incremental_mode != INCR_NONE && pgdata_is_empty && tblspaces_are_empty && dssdata_is_empty) { elog(INFO, "Destination directory and tablespace directories are empty, " "disable incremental restore"); @@ -393,8 +420,8 @@ do_restore_or_validate(time_t target_backup_id, pgRecoveryTarget *rt, */ if (params->is_restore) { - restore_chain(dest_backup, parent_chain, - params, instance_config.pgdata, no_sync); + restore_chain(dest_backup, parent_chain, params, instance_config.pgdata, + instance_config.dss.vgdata, no_sync); /* Create recovery.conf with given recovery target parameters */ create_recovery_conf(target_backup_id, rt, dest_backup, params); @@ -684,8 +711,8 @@ static XLogRecPtr determine_shift_lsn(pgBackup *dest_backup) */ void restore_chain(pgBackup *dest_backup, parray *parent_chain, - pgRestoreParams *params, - const char *pgdata_path, bool no_sync) + pgRestoreParams *params, const char *pgdata_path, + const char *dssdata_path, bool no_sync) { int i; char timestamp[100]; @@ -780,6 +807,13 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain, params->incremental_mode != INCR_NONE, FIO_DB_HOST); + /* some file is in dssserver */ + if (IsDssMode()) + create_data_directories(dest_files, instance_config.dss.vgdata, + dest_backup->root_dir, true, + params->incremental_mode != INCR_NONE, + FIO_DSS_HOST); + /* * Restore dest_backup external directories. */ @@ -847,9 +881,9 @@ restore_chain(pgBackup *dest_backup, parray *parent_chain, threads_args = (restore_files_arg *) palloc(sizeof(restore_files_arg) * num_threads); - threads_handle(threads, threads_args, dest_backup, - dest_files, pgdata_files, external_dirs, parent_chain, - params, pgdata_path, use_bitmap, total_bytes); + threads_handle(threads, threads_args, dest_backup, dest_files, + pgdata_files, external_dirs, parent_chain, params, + pgdata_path, dssdata_path, use_bitmap, total_bytes); /* Close page header maps */ for (i = parray_num(parent_chain) - 1; i >= 0; i--) @@ -1003,6 +1037,7 @@ static void threads_handle(pthread_t *threads, parray *parent_chain, pgRestoreParams *params, const char *pgdata_path, + const char *dssdata_path, bool use_bitmap, size_t total_bytes) { @@ -1036,6 +1071,7 @@ static void threads_handle(pthread_t *threads, arg->parent_chain = parent_chain; arg->skip_external_dirs = params->skip_external_dirs; arg->to_root = pgdata_path; + arg->to_dss = dssdata_path; arg->use_bitmap = use_bitmap; arg->incremental_mode = params->incremental_mode; arg->shift_lsn = params->shift_lsn; @@ -1102,6 +1138,10 @@ static void sync_restored_files(parray *dest_files, params->skip_external_dirs) continue; + /* skip dss files, which do not need sync */ + if (is_dss_type(dest_file->type)) + continue; + /* construct fullpath */ if (dest_file->external_dir_num == 0) { @@ -1160,6 +1200,7 @@ restore_files(void *arg) char to_fullpath[MAXPGPATH]; FILE *out = NULL; char *out_buf = (char *)pgut_malloc(STDIO_BUFSIZE); + fio_location out_location; restore_files_arg *arguments = (restore_files_arg *) arg; @@ -1208,14 +1249,18 @@ restore_files(void *arg) continue; /* set fullpath of destination file */ - if (dest_file->external_dir_num == 0) - join_path_components(to_fullpath, arguments->to_root, dest_file->rel_path); - else + if (dest_file->external_dir_num != 0) { char *external_path = (char *)parray_get(arguments->dest_external_dirs, dest_file->external_dir_num - 1); join_path_components(to_fullpath, external_path, dest_file->rel_path); } + else if (is_dss_type(dest_file->type)) + { + join_path_components(to_fullpath, arguments->to_dss, dest_file->rel_path); + } + else + join_path_components(to_fullpath, arguments->to_root, dest_file->rel_path); if (arguments->incremental_mode != INCR_NONE && parray_bsearch(arguments->pgdata_files, dest_file, pgFileCompareRelPathWithExternalDesc)) @@ -1223,6 +1268,7 @@ restore_files(void *arg) already_exists = true; } + out_location = is_dss_type(dest_file->type) ? FIO_DSS_HOST : FIO_DB_HOST; /* * Handle incremental restore case for data files. * If file is already exists in pgdata, then @@ -1237,13 +1283,13 @@ restore_files(void *arg) { lsn_map = fio_get_lsn_map(to_fullpath, arguments->dest_backup->checksum_version, dest_file->n_blocks, arguments->shift_lsn, - dest_file->segno * RELSEG_SIZE, FIO_DB_HOST); + dest_file->segno * RELSEG_SIZE, out_location); } else if (arguments->incremental_mode == INCR_CHECKSUM) { checksum_map = fio_get_checksum_map(to_fullpath, arguments->dest_backup->checksum_version, dest_file->n_blocks, arguments->dest_backup->stop_lsn, - dest_file->segno * RELSEG_SIZE, FIO_DB_HOST); + dest_file->segno * RELSEG_SIZE, out_location); } } @@ -1253,20 +1299,20 @@ restore_files(void *arg) * if file do not exist */ if ((already_exists && dest_file->write_size == 0) || !already_exists) - out = fio_fopen(to_fullpath, PG_BINARY_W, FIO_DB_HOST); + out = fio_fopen(to_fullpath, PG_BINARY_W, out_location); /* * If file already exists and dest size is not zero, * then open it for reading and writing. */ else - out = fio_fopen(to_fullpath, PG_BINARY_R "+", FIO_DB_HOST); + out = fio_fopen(to_fullpath, PG_BINARY_R "+", out_location); if (out == NULL) elog(ERROR, "Cannot open restore target file \"%s\": %s", to_fullpath, strerror(errno)); /* update file permission */ - if (fio_chmod(to_fullpath, dest_file->mode, FIO_DB_HOST) == -1) + if (fio_chmod(to_fullpath, dest_file->mode, out_location) == -1) elog(ERROR, "Cannot change mode of \"%s\": %s", to_fullpath, strerror(errno)); @@ -1283,8 +1329,8 @@ restore_files(void *arg) if (!fio_is_remote_file(out)) setvbuf(out, out_buf, _IOFBF, STDIO_BUFSIZE); /* Destination file is data file */ - arguments->restored_bytes += restore_data_file(arguments->parent_chain, - dest_file, out, to_fullpath, + arguments->restored_bytes += restore_data_file(arguments->parent_chain, dest_file, + out, to_fullpath, arguments->use_bitmap, checksum_map, arguments->shift_lsn, lsn_map, true); } @@ -1295,8 +1341,9 @@ restore_files(void *arg) setvbuf(out, NULL, _IONBF, BUFSIZ); /* Destination file is nonedata file */ arguments->restored_bytes += restore_non_data_file(arguments->parent_chain, - arguments->dest_backup, dest_file, out, to_fullpath, - already_exists); + arguments->dest_backup, + dest_file, out, + to_fullpath, already_exists); } done: @@ -2041,3 +2088,4 @@ check_incremental_compatibility(const char *pgdata, uint64 system_identifier, if (!success) elog(ERROR, "Incremental restore is impossible"); } + diff --git a/src/bin/pg_probackup/show.cpp b/src/bin/pg_probackup/show.cpp index d262b2433..5acefda58 100644 --- a/src/bin/pg_probackup/show.cpp +++ b/src/bin/pg_probackup/show.cpp @@ -36,6 +36,7 @@ typedef struct ShowBackendRow char zratio[20]; char start_lsn[20]; char stop_lsn[20]; + char type[20]; const char *status; } ShowBackendRow; @@ -56,11 +57,12 @@ typedef struct ShowArchiveRow static void show_instance_start(void); static void show_instance_end(void); -static void show_instance(const char *instance_name, time_t requested_backup_id, bool show_name); +static void show_instance(InstanceConfig *instance, time_t requested_backup_id, bool show_name); static void print_backup_json_object(PQExpBuffer buf, pgBackup *backup); static int show_backup(const char *instance_name, time_t requested_backup_id); -static void show_instance_plain(const char *instance_name, parray *backup_list, bool show_name); +static void show_instance_plain(const char *instance_name, device_type_t instance_type, + parray *backup_list, bool show_name); static void show_instance_json(const char *instance_name, parray *backup_list); static void show_instance_archive(InstanceConfig *instance); @@ -110,7 +112,7 @@ do_show(const char *instance_name, time_t requested_backup_id, bool show_archive if (show_archive) show_instance_archive(instance); else - show_instance(instance->name, INVALID_BACKUP_ID, true); + show_instance(instance, INVALID_BACKUP_ID, true); } show_instance_end(); @@ -124,17 +126,17 @@ do_show(const char *instance_name, time_t requested_backup_id, bool show_archive requested_backup_id == INVALID_BACKUP_ID) { show_instance_start(); + InstanceConfig *instance = readInstanceConfigFile(instance_name); if (show_archive) { - InstanceConfig *instance = readInstanceConfigFile(instance_name); if (instance == NULL) { return 0; } show_instance_archive(instance); } else - show_instance(instance_name, requested_backup_id, false); + show_instance(instance, requested_backup_id, false); show_instance_end(); @@ -324,14 +326,18 @@ show_instance_end(void) * Show brief meta information about all backups in the backup instance. */ static void -show_instance(const char *instance_name, time_t requested_backup_id, bool show_name) +show_instance(InstanceConfig *instance, time_t requested_backup_id, bool show_name) { parray *backup_list; + const char *instance_name; + device_type_t instance_type; + instance_name = instance->name; + instance_type = instance->dss.enable_dss ? DEV_TYPE_DSS : DEV_TYPE_FILE; backup_list = catalog_get_backup_list(instance_name, requested_backup_id); if (show_format == SHOW_PLAIN) - show_instance_plain(instance_name, backup_list, show_name); + show_instance_plain(instance_name, instance_type, backup_list, show_name); else if (show_format == SHOW_JSON) show_instance_json(instance_name, backup_list); else @@ -553,18 +559,18 @@ static void process_time(pgBackup *backup, ShowBackendRow *row) * Show instance backups in plain format. */ static void -show_instance_plain(const char *instance_name, parray *backup_list, bool show_name) +show_instance_plain(const char *instance_name, device_type_t instance_type, parray *backup_list, bool show_name) { -#define SHOW_FIELDS_COUNT 14 +#define SHOW_FIELDS_COUNT 15 int i; const char *names[SHOW_FIELDS_COUNT] = { "Instance", "Version", "ID", "Recovery Time", "Mode", "WAL Mode", "TLI", "Time", "Data", "WAL", - "Zratio", "Start LSN", "Stop LSN", "Status" }; + "Zratio", "Start LSN", "Stop LSN", "Type", "Status" }; const char *field_formats[SHOW_FIELDS_COUNT] = { " %-*s ", " %-*s ", " %-*s ", " %-*s ", " %-*s ", " %-*s ", " %-*s ", " %*s ", " %*s ", " %*s ", - " %*s ", " %-*s ", " %-*s ", " %-*s "}; + " %*s ", " %-*s ", " %-*s ", " %-*s ", " %-*s "}; uint32 widths[SHOW_FIELDS_COUNT]; uint32 widths_sum = 0; ShowBackendRow *rows = NULL; @@ -690,6 +696,12 @@ show_instance_plain(const char *instance_name, parray *backup_list, bool show_na widths[cur] = Max(widths[cur], strlen(row->stop_lsn)); cur++; + /* Type (FILE OR DSS) */ + rc = snprintf_s(row->type, lengthof(row->type), lengthof(row->type) - 1, "%s", dev2str(instance_type)); + securec_check_ss_c(rc, "\0", "\0"); + widths[cur] = Max(widths[cur], (uint32)strlen(row->type)); + cur++; + /* Status */ row->status = status2str(backup->status); widths[cur] = Max(widths[cur], strlen(row->status)); @@ -778,6 +790,10 @@ show_instance_plain(const char *instance_name, parray *backup_list, bool show_na row->stop_lsn); cur++; + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], + row->type); + cur++; + appendPQExpBuffer(&show_buf, field_formats[cur], widths[cur], row->status); cur++; diff --git a/src/bin/pg_probackup/util.cpp b/src/bin/pg_probackup/util.cpp index 64528b2ca..223e24236 100644 --- a/src/bin/pg_probackup/util.cpp +++ b/src/bin/pg_probackup/util.cpp @@ -19,7 +19,9 @@ #include #include +#include "tool_common.h" #include "common/fe_memutils.h" +#include "storage/file/fio_device.h" static const char *statusName[] = { @@ -36,6 +38,14 @@ static const char *statusName[] = "CORRUPT" }; +static const char *devTypeName[] = +{ + "FILE", + "DSS", + "UNKNOWN", + "UNKNOWN" +}; + uint32 NUM_65536 = 65536; uint32 NUM_10000 = 10000; @@ -84,12 +94,10 @@ static void checkControlFile(ControlFileData *ControlFile) { pg_crc32c crc; - /* Calculate CRC */ INIT_CRC32C(crc); COMP_CRC32C(crc, (char *) ControlFile, offsetof(ControlFileData, crc)); FIN_CRC32C(crc); - /* Then compare it */ if (!EQ_CRC32C(crc, ControlFile->crc)) elog(ERROR, "Calculated CRC checksum does not match value stored in file.\n" @@ -104,21 +112,75 @@ checkControlFile(ControlFileData *ControlFile) "the PostgreSQL installation would be incompatible with this data directory."); } +static void checkSSControlFile(ControlFileData* ControlFile, char* last, size_t size) +{ + pg_crc32c crc; + /* Calculate CRC */ + INIT_CRC32C(crc); + COMP_CRC32C(crc, (char *) ControlFile, offsetof(ControlFileData, crc)); + COMP_CRC32C(crc, last, size); + FIN_CRC32C(crc); + ControlFile->crc = crc; + + if ((ControlFile->pg_control_version % NUM_65536 == 0 || ControlFile->pg_control_version % NUM_65536 > NUM_10000) && + ControlFile->pg_control_version / NUM_65536 != 0) + elog(ERROR, "possible byte ordering mismatch\n" + "The byte ordering used to store the pg_control file might not match the one\n" + "used by this program. In that case the results below would be incorrect, and\n" + "the PostgreSQL installation would be incompatible with this data directory."); +} + /* * Verify control file contents in the buffer src, and copy it to *ControlFile. */ static void digestControlFile(ControlFileData *ControlFile, char *src, size_t size) { - if (size != PG_CONTROL_SIZE) - elog(ERROR, "unexpected control file size %d, expected %d", - (int) size, PG_CONTROL_SIZE); + errno_t rc; + char* oldSrc = src; + char* tmpDssSrc; + size_t clearSize = 0; + bool dssMode = IsDssMode(); + int instanceId = instance_config.dss.instance_id; + int64 instanceId64 = (int64) instanceId; + size_t instanceIdSize = (size_t) instanceId; + size_t compareSize = PG_CONTROL_SIZE; + /* control file contents need special handle in dss mode */ + if (dssMode) { + // dms support (MAX_INSTANCEID + 1) instance, and last page for all control. + compareSize = 1 + MAX_INSTANCEID - MIN_INSTANCEID; + compareSize = (compareSize + 1) * PG_CONTROL_SIZE; + src += instanceId64 * PG_CONTROL_SIZE; + // in here, we clear all control page except instance page and last page. + if (instanceId != MIN_INSTANCEID) { + clearSize = instanceIdSize * PG_CONTROL_SIZE; + rc = memset_s(oldSrc, clearSize, 0, clearSize); + securec_check_c(rc, "\0", "\0"); + } + if (instanceId != MAX_INSTANCEID) { + clearSize = (size_t) (MAX_INSTANCEID - instanceIdSize) * PG_CONTROL_SIZE; + tmpDssSrc = oldSrc; + tmpDssSrc += (instanceId64 + 1) * PG_CONTROL_SIZE; + rc = memset_s(tmpDssSrc, clearSize, 0, clearSize); + securec_check_c(rc, "\0", "\0"); + } + } + + if (size != compareSize) + elog(ERROR, "unexpected control file size %d, expected %d", + (int) size, compareSize); - errno_t rc = memcpy_s(ControlFile, sizeof(ControlFileData), src, sizeof(ControlFileData)); + rc = memcpy_s(ControlFile, sizeof(ControlFileData), src, sizeof(ControlFileData)); securec_check_c(rc, "\0", "\0"); /* Additional checks on control file */ - checkControlFile(ControlFile); + if (dssMode) { + tmpDssSrc = oldSrc; + tmpDssSrc += (MAX_INSTANCEID + 1) * PG_CONTROL_SIZE; + checkSSControlFile(ControlFile, tmpDssSrc, PG_CONTROL_SIZE); + } else { + checkControlFile(ControlFile); + } } /* @@ -136,7 +198,6 @@ writeControlFile(ControlFileData *ControlFile, const char *path, fio_location lo /* Write pg_control */ fd = fio_open(path, O_RDWR | O_CREAT | O_TRUNC | PG_BINARY, location); - if (fd < 0) elog(ERROR, "Failed to open file: %s", path); @@ -150,6 +211,30 @@ writeControlFile(ControlFileData *ControlFile, const char *path, fio_location lo pg_free(buffer); } +/* + * Write Dss buffer to pg_control + */ +static void +writeDssControlFile(char* src, size_t srcLen, const char *path, fio_location location) +{ + int fd; + /* Write pg_control */ + fd = fio_open(path, + O_RDWR | O_CREAT | O_TRUNC | PG_BINARY, location); + if (fd < 0) + elog(ERROR, "Failed to open file: %s", path); + + if (fio_write(fd, src, srcLen) != (ssize_t)srcLen) + elog(ERROR, "Failed to overwrite file: %s", path); + + if (fio_flush(fd) != 0) + elog(ERROR, "Failed to sync file: %s", path); + + if (fio_close(fd) != 0) { + elog(ERROR, "Failed to close file: %s", path); + } +} + /* * Utility shared by backup and restore to fetch the current timeline * used by a node. @@ -189,10 +274,17 @@ get_current_timeline_from_control(bool safe) ControlFileData ControlFile; char *buffer; size_t size; + fio_location location; /* First fetch file... */ - buffer = slurpFile(instance_config.pgdata, XLOG_CONTROL_FILE, &size, - safe, FIO_DB_HOST); + location = is_dss_file(T_XLOG_CONTROL_FILE) ? FIO_DSS_HOST : FIO_DB_HOST; + if (IsDssMode()) { + buffer = slurpFile(T_XLOG_CONTROL_FILE, &size, false, location); + } else { + char xlog_ctl_file_path[MAXPGPATH] = {'\0'}; + join_path_components(xlog_ctl_file_path, instance_config.pgdata, T_XLOG_CONTROL_FILE); + buffer = slurpFile(xlog_ctl_file_path, &size, false, location); + } if (safe && buffer == NULL) return 0; @@ -233,8 +325,16 @@ get_checkpoint_location(PGconn *conn) char *buffer; size_t size; ControlFileData ControlFile; + fio_location location; - buffer = slurpFile(instance_config.pgdata, XLOG_CONTROL_FILE, &size, false, FIO_DB_HOST); + location = is_dss_file(T_XLOG_CONTROL_FILE) ? FIO_DSS_HOST : FIO_DB_HOST; + if (IsDssMode()) { + buffer = slurpFile(T_XLOG_CONTROL_FILE, &size, false, location); + } else { + char xlog_ctl_file_path[MAXPGPATH] = {'\0'}; + join_path_components(xlog_ctl_file_path, instance_config.pgdata, T_XLOG_CONTROL_FILE); + buffer = slurpFile(xlog_ctl_file_path, &size, false, location); + } digestControlFile(&ControlFile, buffer, size); pg_free(buffer); @@ -248,9 +348,18 @@ get_system_identifier(const char *pgdata_path) ControlFileData ControlFile; char *buffer; size_t size; + fio_location location; /* First fetch file... */ - buffer = slurpFile(pgdata_path, XLOG_CONTROL_FILE, &size, false, FIO_DB_HOST); + location = is_dss_file(T_XLOG_CONTROL_FILE) ? FIO_DSS_HOST : FIO_DB_HOST; + if (IsDssMode()) { + buffer = slurpFile(T_XLOG_CONTROL_FILE, &size, false, location); + } else { + char xlog_ctl_file_path[MAXPGPATH] = {'\0'}; + join_path_components(xlog_ctl_file_path, pgdata_path, T_XLOG_CONTROL_FILE); + buffer = slurpFile(xlog_ctl_file_path, &size, false, location); + } + if (buffer == NULL) return 0; digestControlFile(&ControlFile, buffer, size); @@ -283,8 +392,17 @@ get_remote_system_identifier(PGconn *conn) char *buffer; size_t size; ControlFileData ControlFile; + fio_location location; - buffer = slurpFile(instance_config.pgdata, XLOG_CONTROL_FILE, &size, false, FIO_DB_HOST); + location = is_dss_file(T_XLOG_CONTROL_FILE) ? FIO_DSS_HOST : FIO_DB_HOST; + location = is_dss_file(T_XLOG_CONTROL_FILE) ? FIO_DSS_HOST : FIO_DB_HOST; + if (IsDssMode()) { + buffer = slurpFile(T_XLOG_CONTROL_FILE, &size, false, location); + } else { + char xlog_ctl_file_path[MAXPGPATH] = {'\0'}; + join_path_components(xlog_ctl_file_path, instance_config.pgdata, T_XLOG_CONTROL_FILE); + buffer = slurpFile(xlog_ctl_file_path, &size, false, location); + } digestControlFile(&ControlFile, buffer, size); pg_free(buffer); @@ -299,15 +417,24 @@ get_xlog_seg_size(char *pgdata_path) ControlFileData ControlFile; char *buffer; size_t size; + fio_location location; /* First fetch file... */ - buffer = slurpFile(pgdata_path, XLOG_CONTROL_FILE, &size, false, FIO_DB_HOST); + location = is_dss_file(T_XLOG_CONTROL_FILE) ? FIO_DSS_HOST : FIO_DB_HOST; + location = is_dss_file(T_XLOG_CONTROL_FILE) ? FIO_DSS_HOST : FIO_DB_HOST; + if (IsDssMode()) { + buffer = slurpFile(T_XLOG_CONTROL_FILE, &size, false, location); + } else { + char xlog_ctl_file_path[MAXPGPATH] = {'\0'}; + join_path_components(xlog_ctl_file_path, pgdata_path, T_XLOG_CONTROL_FILE); + buffer = slurpFile(xlog_ctl_file_path, &size, false, location); + } digestControlFile(&ControlFile, buffer, size); pg_free(buffer); return ControlFile.xlog_seg_size; #else - return (uint32) XLOG_SEG_SIZE; + return (uint32) XLogSegSize; #endif } @@ -317,10 +444,17 @@ get_data_checksum_version(bool safe) ControlFileData ControlFile; char *buffer; size_t size; + fio_location location; /* First fetch file... */ - buffer = slurpFile(instance_config.pgdata, XLOG_CONTROL_FILE, &size, - safe, FIO_DB_HOST); + location = is_dss_file(T_XLOG_CONTROL_FILE) ? FIO_DSS_HOST : FIO_DB_HOST; + if (IsDssMode()) { + buffer = slurpFile(T_XLOG_CONTROL_FILE, &size, false, location); + } else { + char xlog_ctl_file_path[MAXPGPATH] = {'\0'}; + join_path_components(xlog_ctl_file_path, instance_config.pgdata, T_XLOG_CONTROL_FILE); + buffer = slurpFile(xlog_ctl_file_path, &size, false, location); + } if (buffer == NULL) return 0; digestControlFile(&ControlFile, buffer, size); @@ -330,15 +464,14 @@ get_data_checksum_version(bool safe) } pg_crc32c -get_pgcontrol_checksum(const char *pgdata_path) +get_pgcontrol_checksum(const char *fullpath) { ControlFileData ControlFile; char *buffer; size_t size; - /* First fetch file... */ - buffer = slurpFile(pgdata_path, XLOG_CONTROL_FILE, &size, false, FIO_BACKUP_HOST); - + /* First fetch file in backup dir ... */ + buffer = slurpFile(fullpath, &size, false, FIO_BACKUP_HOST); digestControlFile(&ControlFile, buffer, size); pg_free(buffer); @@ -351,9 +484,17 @@ get_redo(const char *pgdata_path, RedoParams *redo) ControlFileData ControlFile; char *buffer; size_t size; + fio_location location; /* First fetch file... */ - buffer = slurpFile(pgdata_path, XLOG_CONTROL_FILE, &size, false, FIO_DB_HOST); + location = is_dss_file(T_XLOG_CONTROL_FILE) ? FIO_DSS_HOST : FIO_DB_HOST; + if (IsDssMode()) { + buffer = slurpFile(T_XLOG_CONTROL_FILE, &size, false, location); + } else { + char xlog_ctl_file_path[MAXPGPATH] = {'\0'}; + join_path_components(xlog_ctl_file_path, pgdata_path, T_XLOG_CONTROL_FILE); + buffer = slurpFile(xlog_ctl_file_path, &size, false, location); + } digestControlFile(&ControlFile, buffer, size); pg_free(buffer); @@ -376,21 +517,103 @@ get_redo(const char *pgdata_path, RedoParams *redo) } +void +parse_vgname_args(const char* args) +{ + char *vgname = xstrdup(args); + if (strstr(vgname, "/") != NULL) + elog(ERROR, "invalid token \"/\" in vgname"); + + char *comma = strstr(vgname, ","); + if (comma == NULL) { + instance_config.dss.vgdata = vgname; + instance_config.dss.vglog = const_cast(""); + return; + } + + instance_config.dss.vgdata = xstrdup(vgname); + comma = strstr(instance_config.dss.vgdata, ","); + comma[0] = '\0'; + instance_config.dss.vglog = comma + 1; + if (strstr(instance_config.dss.vgdata, ",") != NULL) + elog(ERROR, "invalid vgname args, should be two volume group names, example: \"+data,+log\""); + if (strstr(instance_config.dss.vglog, ",") != NULL) + elog(ERROR, "invalid vgname args, should be two volume group names, example: \"+data,+log\""); +} + +bool +is_ss_xlog(const char *ss_dir) +{ + char ss_xlog[MAXPGPATH] = {0}; + char ss_doublewrite[MAXPGPATH] = {0}; + char ss_notify[MAXPGPATH] = {0}; + char ss_snapshots[MAXPGPATH] = {0}; + int rc = EOK; + int instance_id = instance_config.dss.instance_id; + + rc = sprintf_s(ss_xlog, sizeof(ss_xlog), "%s%d", "pg_xlog", instance_id); + securec_check_ss_c(rc, "\0", "\0"); + + rc = sprintf_s(ss_doublewrite, sizeof(ss_doublewrite), "%s%d", "pg_doublewrite", instance_id); + securec_check_ss_c(rc, "\0", "\0"); + + rc = sprintf_s(ss_notify, sizeof(ss_notify), "%s%d", "pg_notify", instance_id); + securec_check_ss_c(rc, "\0", "\0"); + + rc = sprintf_s(ss_snapshots, sizeof(ss_snapshots), "%s%d", "pg_snapshots", instance_id); + securec_check_ss_c(rc, "\0", "\0"); + + if (IsDssMode() && strlen(instance_config.dss.vglog) && + (pg_strcasecmp(ss_dir, ss_xlog) == 0 || + pg_strcasecmp(ss_dir, ss_doublewrite) == 0 || + pg_strcasecmp(ss_dir, ss_notify) == 0 || + pg_strcasecmp(ss_dir, ss_notify) == 0)) { + return true; + } + return false; +} + +void +ss_createdir(const char *ss_dir, const char *vgdata, const char *vglog) +{ + char path[MAXPGPATH] = {0}; + char link_path[MAXPGPATH] = {0}; + int rc = EOK; + + rc = sprintf_s(link_path, sizeof(link_path), "%s/%s", vgdata, ss_dir); + securec_check_ss_c(rc, "\0", "\0"); + rc = sprintf_s(path, sizeof(path), "%s/%s", vglog, ss_dir); + securec_check_ss_c(rc, "\0", "\0"); + + dir_create_dir(path, DIR_PERMISSION); + if (symlink(path, link_path) < 0) { + elog(ERROR, "can not link dss xlog dir \"%s\" to dss xlog dir \"%s\": %s", link_path, path, + strerror(errno)); + } +} + /* * Rewrite minRecoveryPoint of pg_control in backup directory. minRecoveryPoint * 'as-is' is not to be trusted. */ void -set_min_recovery_point(pgFile *file, const char *backup_path, +set_min_recovery_point(pgFile *file, const char *fullpath, XLogRecPtr stop_backup_lsn) { ControlFileData ControlFile; char *buffer; size_t size; - char fullpath[MAXPGPATH]; + fio_location location; /* First fetch file content */ - buffer = slurpFile(instance_config.pgdata, XLOG_CONTROL_FILE, &size, false, FIO_DB_HOST); + location = is_dss_file(T_XLOG_CONTROL_FILE) ? FIO_DSS_HOST : FIO_DB_HOST; + if (IsDssMode()) { + buffer = slurpFile(T_XLOG_CONTROL_FILE, &size, false, location); + } else { + char xlog_ctl_file_path[MAXPGPATH] = {'\0'}; + join_path_components(xlog_ctl_file_path, instance_config.pgdata, T_XLOG_CONTROL_FILE); + buffer = slurpFile(xlog_ctl_file_path, &size, false, location); + } digestControlFile(&ControlFile, buffer, size); elog(LOG, "Current minRecPoint %X/%X", @@ -410,9 +633,6 @@ set_min_recovery_point(pgFile *file, const char *backup_path, FIN_CRC32C(ControlFile.crc); /* overwrite pg_control */ - errno_t rc = snprintf_s(fullpath, sizeof(fullpath), sizeof(fullpath) - 1, - "%s/%s", backup_path, XLOG_CONTROL_FILE); - securec_check_ss_c(rc, "\0", "\0"); writeControlFile(&ControlFile, fullpath, FIO_LOCAL_HOST); /* Update pg_control checksum in backup_list */ @@ -432,17 +652,19 @@ copy_pgcontrol_file(const char *from_fullpath, fio_location from_location, char *buffer; size_t size; - buffer = slurpFile(from_fullpath, "", &size, false, from_location); - + buffer = slurpFile(from_fullpath, &size, false, from_location); digestControlFile(&ControlFile, buffer, size); - file->crc = ControlFile.crc; file->read_size = size; file->write_size = size; file->uncompressed_size = size; - - writeControlFile(&ControlFile, to_fullpath, to_location); - + + /* Write pg_control */ + if (is_dss_type(file->type)) { + writeDssControlFile(buffer, size, to_fullpath, to_location); + } else { + writeControlFile(&ControlFile, to_fullpath, to_location); + } pg_free(buffer); } @@ -522,6 +744,20 @@ str2status(const char *status) return BACKUP_STATUS_INVALID; } +const char *dev2str(device_type_t type) +{ + return devTypeName[type]; +} + +device_type_t str2dev(const char *dev) +{ + for (int i = 0; i < (int)DEV_TYPE_NUM; i++) { + if (pg_strcasecmp(dev, devTypeName[i]) == 0) + return (device_type_t)i; + } + return DEV_TYPE_INVALID; +} + bool datapagemap_is_set(datapagemap_t *map, BlockNumber blkno) { diff --git a/src/bin/pg_probackup/validate.cpp b/src/bin/pg_probackup/validate.cpp index ad50b9ee3..37b3c028b 100644 --- a/src/bin/pg_probackup/validate.cpp +++ b/src/bin/pg_probackup/validate.cpp @@ -16,6 +16,7 @@ #include "thread.h" #include "common/fe_memutils.h" +#include "storage/file/fio_device.h" static void *pgBackupValidateFiles(void *arg); static void do_validate_instance(void); @@ -26,6 +27,7 @@ static bool skipped_due_to_lock = false; typedef struct { const char *base_path; + const char *dss_path; parray *files; bool corrupted; XLogRecPtr stop_lsn; @@ -88,6 +90,16 @@ bool pre_check_backup(pgBackup *backup) return false; } + /* Check backup storage mode suitable */ + if (IsDssMode() != is_dss_type(backup->storage_type)) + { + elog(WARNING, "Backup %s is not suit for instance %s, because they have different " + "storage type. Change it to CORRUPT and skip validation.", + base36enc((long unsigned int)backup->start_time), instance_name); + write_backup_status(backup, BACKUP_STATUS_CORRUPT, instance_name, true); + return false; + } + if (backup->status == BACKUP_STATUS_OK || backup->status == BACKUP_STATUS_DONE || backup->status == BACKUP_STATUS_MERGING) elog(INFO, "Validating backup %s", base36enc(backup->start_time)); @@ -109,6 +121,7 @@ void pgBackupValidate(pgBackup *backup, pgRestoreParams *params) { char base_path[MAXPGPATH]; + char dss_path[MAXPGPATH]; char external_prefix[MAXPGPATH]; parray *files = NULL; bool corrupted = false; @@ -122,6 +135,7 @@ pgBackupValidate(pgBackup *backup, pgRestoreParams *params) return; join_path_components(base_path, backup->root_dir, DATABASE_DIR); + join_path_components(dss_path, backup->root_dir, DSSDATA_DIR); join_path_components(external_prefix, backup->root_dir, EXTERNAL_DIR); files = get_backup_filelist(backup, false); @@ -158,6 +172,7 @@ pgBackupValidate(pgBackup *backup, pgRestoreParams *params) validate_files_arg *arg = &(threads_args[i]); arg->base_path = base_path; + arg->dss_path = dss_path; arg->files = files; arg->corrupted = false; arg->backup_mode = backup->backup_mode; @@ -257,7 +272,7 @@ void check_crc(pgFile *file, char *file_fullpath, validate_files_arg *arguments) if (arguments->backup_version >= 20025 && strcmp(file->name, "pg_control") == 0 && !file->external_dir_num) - crc = get_pgcontrol_checksum(arguments->base_path); + crc = get_pgcontrol_checksum(file_fullpath); else crc = pgFileGetCRC(file_fullpath, arguments->backup_version <= 20021 || @@ -277,6 +292,9 @@ void check_crc(pgFile *file, char *file_fullpath, validate_files_arg *arguments) * check page headers, checksums (if enabled) * and compute checksum of the file */ + if (IsDssMode()) { + return; + } if (!validate_file_pages(file, file_fullpath, arguments->stop_lsn, arguments->checksum_version, arguments->backup_version, @@ -348,6 +366,8 @@ pgBackupValidateFiles(void *arg) makeExternalDirPathByNum(temp, arguments->external_prefix, file->external_dir_num); join_path_components(file_fullpath, temp, file->rel_path); } + else if (is_dss_type(file->type)) + join_path_components(file_fullpath, arguments->dss_path, file->rel_path); else join_path_components(file_fullpath, arguments->base_path, file->rel_path); diff --git a/src/bin/pg_resetxlog/Makefile b/src/bin/pg_resetxlog/Makefile index e575478dd..25d9810e3 100644 --- a/src/bin/pg_resetxlog/Makefile +++ b/src/bin/pg_resetxlog/Makefile @@ -27,7 +27,7 @@ OBJS= pg_resetxlog.o $(WIN32RES) all: pg_resetxlog override CXXFLAGS += -fPIE -override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) +override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) -I${top_builddir}/src/include CFLAGS += -Wl,-z,relro,-z,now -fPIE LDFLAGS += -pie diff --git a/src/bin/pg_resetxlog/pg_resetxlog.cpp b/src/bin/pg_resetxlog/pg_resetxlog.cpp index f01594b92..96434ad8d 100644 --- a/src/bin/pg_resetxlog/pg_resetxlog.cpp +++ b/src/bin/pg_resetxlog/pg_resetxlog.cpp @@ -49,6 +49,7 @@ #include #endif +#include "tool_common.h" #include "access/transam.h" #include "access/tuptoaster.h" #include "access/multixact.h" @@ -239,6 +240,8 @@ int main(int argc, char* argv[]) exit(1); } + initDataPathStruct(false); + /* * Check for a postmaster lock file --- if there is one, refuse to * proceed, on grounds we might be interfering with a live installation. @@ -364,7 +367,7 @@ static bool ReadControlFile(void) pg_crc32 crc; errno_t rc = 0; - if ((fd = open(XLOG_CONTROL_FILE, O_RDONLY | PG_BINARY, 0)) < 0) { + if ((fd = open(T_XLOG_CONTROL_FILE, O_RDONLY | PG_BINARY, 0)) < 0) { /* * If pg_control is not there at all, or we can't read it, the odds * are we've been handed a bad DataDir path, so give up. User can do @@ -373,14 +376,14 @@ static bool ReadControlFile(void) fprintf(stderr, _("%s: could not open file \"%s\" for reading: %s\n"), progname, - XLOG_CONTROL_FILE, + T_XLOG_CONTROL_FILE, strerror(errno)); if (errno == ENOENT) fprintf(stderr, _("If you are sure the data directory path is correct, execute\n" " touch %s\n" "and try again.\n"), - XLOG_CONTROL_FILE); + T_XLOG_CONTROL_FILE); exit(1); } @@ -394,7 +397,7 @@ static bool ReadControlFile(void) } len = read(fd, buffer, PG_CONTROL_SIZE); if (len < 0) { - fprintf(stderr, _("%s: could not read file \"%s\": %s\n"), progname, XLOG_CONTROL_FILE, strerror(errno)); + fprintf(stderr, _("%s: could not read file \"%s\": %s\n"), progname, T_XLOG_CONTROL_FILE, strerror(errno)); free(buffer); buffer = NULL; close(fd); @@ -494,7 +497,7 @@ static void GuessControlValues(void) ControlFile.blcksz = BLCKSZ; ControlFile.relseg_size = RELSEG_SIZE; ControlFile.xlog_blcksz = XLOG_BLCKSZ; - ControlFile.xlog_seg_size = XLOG_SEG_SIZE; + ControlFile.xlog_seg_size = XLogSegSize; ControlFile.nameDataLen = NAMEDATALEN; ControlFile.indexMaxKeys = INDEX_MAX_KEYS; ControlFile.toast_max_chunk_size = TOAST_MAX_CHUNK_SIZE; @@ -637,9 +640,9 @@ static void RewriteControlFile(void) rc = memcpy_s(buffer, PG_CONTROL_SIZE, &ControlFile, sizeof(ControlFileData)); securec_check_c(rc, "", ""); - unlink(XLOG_CONTROL_FILE); + unlink(T_XLOG_CONTROL_FILE); - fd = open(XLOG_CONTROL_FILE, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, S_IRUSR | S_IWUSR); + fd = open(T_XLOG_CONTROL_FILE, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, S_IRUSR | S_IWUSR); if (fd < 0) { fprintf(stderr, _("%s: could not create pg_control file: %s\n"), progname, strerror(errno)); exit(1); diff --git a/src/bin/pg_rewind/Makefile b/src/bin/pg_rewind/Makefile index b557a32a8..d644ce1fd 100644 --- a/src/bin/pg_rewind/Makefile +++ b/src/bin/pg_rewind/Makefile @@ -18,7 +18,7 @@ include $(top_builddir)/src/Makefile.global PG_CPPFLAGS = -I$(libpq_srcdir) PG_LIBS = $(libpq_pgport) -override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) -I$(top_builddir)/src/bin/pg_ctl -I${top_builddir}/src/lib/page_compression +override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) -I$(top_builddir)/src/bin/pg_ctl -I${top_builddir}/src/lib/page_compression -I${top_builddir}/src/include override LDFLAGS := -L../../src/lib/page_compression ifneq "$(MAKECMDGOALS)" "clean" ifneq "$(MAKECMDGOALS)" "distclean" diff --git a/src/bin/pg_rewind/parsexlog.cpp b/src/bin/pg_rewind/parsexlog.cpp index 266cedea9..e9ead584d 100644 --- a/src/bin/pg_rewind/parsexlog.cpp +++ b/src/bin/pg_rewind/parsexlog.cpp @@ -463,7 +463,7 @@ XLogRecPtr getValidCommonLSN(XLogRecPtr checkLsn, XLogRecPtr maxLsn) XLByteToPrevSeg(checkLsn, checkLogSegNo); if (maxLogSegNo > (checkLogSegNo + 1)) { for (loopLogSegNo = (checkLogSegNo + 1); loopLogSegNo <= maxLogSegNo; loopLogSegNo++) { - startLsn = loopLogSegNo * XLOG_SEG_SIZE; + startLsn = loopLogSegNo * XLogSegSize; curLsn = InvalidXLogRecPtr; curLsn = XLogFindNextRecord(xlogreader, startLsn); if (!XLogRecPtrIsInvalid(curLsn)) { diff --git a/src/bin/pg_rewind/pg_rewind.cpp b/src/bin/pg_rewind/pg_rewind.cpp index aadf227b0..da0e1f953 100755 --- a/src/bin/pg_rewind/pg_rewind.cpp +++ b/src/bin/pg_rewind/pg_rewind.cpp @@ -34,6 +34,8 @@ #include "common/build_query/build_query.h" #include "bin/elog.h" #include "pg_build.h" +#include "tool_common.h" + #define FORMATTED_TS_LEN 128 #define BUILD_PID "gs_build.pid" @@ -718,7 +720,7 @@ static void rewind_dw_file() char* unaligned_buf = NULL; /* Delete the dw file, if it exists. */ - rc = snprintf_s(dw_file_path, MAXPGPATH, MAXPGPATH - 1, "%s/%s", datadir_target, OLD_DW_FILE_NAME); + rc = snprintf_s(dw_file_path, MAXPGPATH, MAXPGPATH - 1, "%s/%s", datadir_target, T_OLD_DW_FILE_NAME); securec_check_ss_c(rc, "\0", "\0"); if (realpath(dw_file_path, real_file_path) == NULL) { if (real_file_path[0] == '\0') { @@ -734,7 +736,7 @@ static void rewind_dw_file() securec_check_c(rc, "\0", "\0"); /* Delete the dw build file, if it exists. */ - rc = snprintf_s(dw_file_path, MAXPGPATH, MAXPGPATH - 1, "%s/%s", datadir_target, DW_BUILD_FILE_NAME); + rc = snprintf_s(dw_file_path, MAXPGPATH, MAXPGPATH - 1, "%s/%s", datadir_target, T_DW_BUILD_FILE_NAME); securec_check_ss_c(rc, "\0", "\0"); if (realpath(dw_file_path, real_file_path) == NULL) { if (real_file_path[0] == '\0') { diff --git a/src/bin/psql/print.cpp b/src/bin/psql/print.cpp index 1cafae333..886f0f16a 100644 --- a/src/bin/psql/print.cpp +++ b/src/bin/psql/print.cpp @@ -1951,7 +1951,8 @@ void printTableInit( content->ncolumns = ncolumns; content->nrows = nrows; - if (ncolumns * nrows + 1 <= 0) { + int64 res = (int64)ncolumns * (int64)nrows + 1L; + if (res >= (int64)PG_INT32_MAX) { fprintf(stderr, _("Error: Integer overflow when select execution.\n")); exit(EXIT_FAILURE); } diff --git a/src/common/backend/catalog/catalog.cpp b/src/common/backend/catalog/catalog.cpp index df2c99e7c..147b39e3d 100644 --- a/src/common/backend/catalog/catalog.cpp +++ b/src/common/backend/catalog/catalog.cpp @@ -203,6 +203,15 @@ char* relpathbackend(RelFileNode rnode, BackendId backend, ForkNumber forknum) { int pathlen; char* path = NULL; + char* datadir = (char *)palloc(MAXPGPATH); + + if (ENABLE_DSS && rnode.dbNode != 1) { + errno_t rc = snprintf_s(datadir, MAXPGPATH, MAXPGPATH - 1, "%s/", + g_instance.attr.attr_storage.dss_attr.ss_dss_vg_name); + securec_check_ss(rc, "\0", "\0"); + } else { + datadir[0] = '\0'; + } // Column store if (IsValidColForkNum(forknum)) { @@ -217,56 +226,117 @@ char* relpathbackend(RelFileNode rnode, BackendId backend, ForkNumber forknum) if (rnode.spcNode == GLOBALTABLESPACE_OID) { /* Shared system relations live in {datadir}/global */ - Assert(rnode.dbNode == 0); - Assert(IsHeapFileNode(rnode)); + if (!ENABLE_DSS) { + Assert(rnode.dbNode == 0); + } Assert(backend == InvalidBackendId); - pathlen = 7 + OIDCHARS + 1 + FORKNAMECHARS + 1; + pathlen = (int)strlen(GLOTBSDIR) + OIDCHARS + 1 + FORKNAMECHARS + 1; path = (char*)palloc(pathlen); if (forknum != MAIN_FORKNUM) { - rc = snprintf_s(path, pathlen, pathlen - 1, "global/%u_%s", rnode.relNode, forkNames[forknum]); + rc = snprintf_s(path, pathlen, pathlen - 1, "%s/%u_%s", GLOTBSDIR, rnode.relNode, forkNames[forknum]); } else { - rc = snprintf_s(path, pathlen, pathlen - 1, "global/%u", rnode.relNode); + rc = snprintf_s(path, pathlen, pathlen - 1, "%s/%u", GLOTBSDIR, rnode.relNode); } securec_check_ss(rc, "\0", "\0"); } else if (rnode.spcNode == DEFAULTTABLESPACE_OID) { /* The default tablespace is {datadir}/base */ if (backend == InvalidBackendId) { - pathlen = 5 + OIDCHARS + 1 + OIDCHARS + 1 + OIDCHARS + 1 + FORKNAMECHARS + 1 + OIDCHARS + 2; + pathlen = strlen(datadir) + 1 + 5 + OIDCHARS + 1 + OIDCHARS + 1 + + OIDCHARS + 1 + FORKNAMECHARS + 1 + OIDCHARS + 2; path = (char*)palloc(pathlen); if (forknum != MAIN_FORKNUM) { if (!IsBucketFileNode(rnode)) { - rc = snprintf_s(path, pathlen, pathlen - 1, "base/%u/%u_%s", - rnode.dbNode, rnode.relNode, forkNames[forknum]); + rc = snprintf_s(path, pathlen, pathlen - 1, "%sbase/%u/%u_%s", + datadir, rnode.dbNode, rnode.relNode, forkNames[forknum]); } else { - rc = snprintf_s(path, pathlen, pathlen - 1, "base/%u/%u_b%d_%s", - rnode.dbNode, rnode.relNode, rnode.bucketNode, forkNames[forknum]); + rc = snprintf_s(path, pathlen, pathlen - 1, "%sbase/%u/%u_b%d_%s", + datadir, rnode.dbNode, rnode.relNode, rnode.bucketNode, forkNames[forknum]); } } else { if (!IsBucketFileNode(rnode)) { - rc = snprintf_s(path, pathlen, pathlen - 1, "base/%u/%u", rnode.dbNode, rnode.relNode); + rc = snprintf_s(path, pathlen, pathlen - 1, "%sbase/%u/%u", + datadir, rnode.dbNode, rnode.relNode); } else { - rc = snprintf_s(path, pathlen, pathlen - 1, "base/%u/%u_b%d", - rnode.dbNode, rnode.relNode, rnode.bucketNode); + rc = snprintf_s(path, pathlen, pathlen - 1, "%sbase/%u/%u_b%d", + datadir, rnode.dbNode, rnode.relNode, rnode.bucketNode); } } securec_check_ss(rc, "\0", "\0"); } else { /* OIDCHARS will suffice for an integer, too */ Assert(!IsBucketFileNode(rnode)); - pathlen = 5 + OIDCHARS + 2 + OIDCHARS + 1 + OIDCHARS + 1 + FORKNAMECHARS + 1; + pathlen = (int)strlen(datadir) + 1 + 5 + OIDCHARS + 2 + OIDCHARS + 1 + + OIDCHARS + 1 + FORKNAMECHARS + 1; path = (char*)palloc(pathlen); if (forknum != MAIN_FORKNUM) { - rc = snprintf_s(path, pathlen, pathlen - 1, "base/%u/t%d_%u_%s", - rnode.dbNode, backend, rnode.relNode, forkNames[forknum]); + rc = snprintf_s(path, pathlen, pathlen - 1, "%sbase/%u/t%d_%u_%s", + datadir, rnode.dbNode, backend, rnode.relNode, forkNames[forknum]); } else { - rc = snprintf_s(path, pathlen, pathlen - 1, "base/%u/t%d_%u", rnode.dbNode, backend, rnode.relNode); + rc = snprintf_s(path, pathlen, pathlen - 1, "%sbase/%u/t%d_%u", + datadir, rnode.dbNode, backend, rnode.relNode); } securec_check_ss(rc, "\0", "\0"); } + } else if (ENABLE_DSS) { + if (backend == InvalidBackendId) { + pathlen = strlen(TBLSPCDIR) + 1 + OIDCHARS + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1 + + OIDCHARS + 1 + OIDCHARS + 1 + OIDCHARS + 2 + OIDCHARS + 1 + FORKNAMECHARS + 1; + path = (char*)palloc(pathlen); + if (forknum != MAIN_FORKNUM) + rc = snprintf_s(path, + pathlen, + pathlen - 1, + "%s/%u/%s/%u/%u_%s", + TBLSPCDIR, + rnode.spcNode, + TABLESPACE_VERSION_DIRECTORY, + rnode.dbNode, + rnode.relNode, + forkNames[forknum]); + else + rc = snprintf_s(path, + pathlen, + pathlen - 1, + "%s/%u/%s/%u/%u", + TBLSPCDIR, + rnode.spcNode, + TABLESPACE_VERSION_DIRECTORY, + rnode.dbNode, + rnode.relNode); + securec_check_ss(rc, "\0", "\0"); + } else { + pathlen = 9 + 1 + OIDCHARS + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1 + OIDCHARS + 2 + + OIDCHARS + 1 + OIDCHARS + 1 + FORKNAMECHARS + 1; + path = (char*)palloc(pathlen); + if (forknum != MAIN_FORKNUM) + rc = snprintf_s(path, + pathlen, + pathlen - 1, + "%s/%u/%s/%u/t%d_%u_%s", + TBLSPCDIR, + rnode.spcNode, + TABLESPACE_VERSION_DIRECTORY, + rnode.dbNode, + backend, + rnode.relNode, + forkNames[forknum]); + else + rc = snprintf_s(path, + pathlen, + pathlen - 1, + "%s/%u/%s/%u/t%d_%u", + TBLSPCDIR, + rnode.spcNode, + TABLESPACE_VERSION_DIRECTORY, + rnode.dbNode, + backend, + rnode.relNode); + securec_check_ss(rc, "\0", "\0"); + } } else { /* All other tablespaces are accessed via symlinks */ if (backend == InvalidBackendId) { - pathlen = 9 + 1 + OIDCHARS + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1 + + pathlen = strlen(TBLSPCDIR) + 1 + OIDCHARS + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1 + #ifdef PGXC /* Postgres-XC tablespaces include node name */ strlen(g_instance.attr.attr_common.PGXCNodeName) + 1 + @@ -276,20 +346,23 @@ char* relpathbackend(RelFileNode rnode, BackendId backend, ForkNumber forknum) #ifdef PGXC if (forknum != MAIN_FORKNUM) { if (!IsBucketFileNode(rnode)) { - rc = snprintf_s(path, pathlen, pathlen - 1, "pg_tblspc/%u/%s_%s/%u/%u_%s", - rnode.spcNode, TABLESPACE_VERSION_DIRECTORY, g_instance.attr.attr_common.PGXCNodeName, - rnode.dbNode, rnode.relNode, forkNames[forknum]); + rc = snprintf_s(path, pathlen, pathlen - 1, "%s/%u/%s_%s/%u/%u_%s", + TBLSPCDIR, rnode.spcNode, TABLESPACE_VERSION_DIRECTORY, + g_instance.attr.attr_common.PGXCNodeName, rnode.dbNode, + rnode.relNode, forkNames[forknum]); } else { - rc = snprintf_s(path, pathlen, pathlen - 1, "pg_tblspc/%u/%s_%s/%u/%u_b%d_%s", - rnode.spcNode, TABLESPACE_VERSION_DIRECTORY, g_instance.attr.attr_common.PGXCNodeName, - rnode.dbNode, rnode.relNode, rnode.bucketNode, forkNames[forknum]); + rc = snprintf_s(path, pathlen, pathlen - 1, "%s/%u/%s_%s/%u/%u_b%d_%s", + TBLSPCDIR, rnode.spcNode, TABLESPACE_VERSION_DIRECTORY, + g_instance.attr.attr_common.PGXCNodeName, rnode.dbNode, + rnode.relNode, rnode.bucketNode, forkNames[forknum]); } } else { if (!IsBucketFileNode(rnode)) { rc = snprintf_s(path, pathlen, pathlen - 1, - "pg_tblspc/%u/%s_%s/%u/%u", + "%s/%u/%s_%s/%u/%u", + TBLSPCDIR, rnode.spcNode, TABLESPACE_VERSION_DIRECTORY, g_instance.attr.attr_common.PGXCNodeName, @@ -299,7 +372,8 @@ char* relpathbackend(RelFileNode rnode, BackendId backend, ForkNumber forknum) rc = snprintf_s(path, pathlen, pathlen - 1, - "pg_tblspc/%u/%s_%s/%u/%u_b%d", + "%s/%u/%s_%s/%u/%u_b%d", + TBLSPCDIR, rnode.spcNode, TABLESPACE_VERSION_DIRECTORY, g_instance.attr.attr_common.PGXCNodeName, @@ -314,7 +388,8 @@ char* relpathbackend(RelFileNode rnode, BackendId backend, ForkNumber forknum) rc = snprintf_s(path, pathlen, pathlen - 1, - "pg_tblspc/%u/%s/%u/%u_%s", + "%s/%u/%s/%u/%u_%s", + TBLSPCDIR, rnode.spcNode, TABLESPACE_VERSION_DIRECTORY, rnode.dbNode, @@ -324,7 +399,8 @@ char* relpathbackend(RelFileNode rnode, BackendId backend, ForkNumber forknum) rc = snprintf_s(path, pathlen, pathlen - 1, - "pg_tblspc/%u/%s/%u/%u", + "%s/%u/%s/%u/%u", + TBLSPCDIR, rnode.spcNode, TABLESPACE_VERSION_DIRECTORY, rnode.dbNode, @@ -344,7 +420,8 @@ char* relpathbackend(RelFileNode rnode, BackendId backend, ForkNumber forknum) rc = snprintf_s(path, pathlen, pathlen - 1, - "pg_tblspc/%u/%s_%s/%u/t%d_%u_%s", + "%s/%u/%s_%s/%u/t%d_%u_%s", + TBLSPCDIR, rnode.spcNode, TABLESPACE_VERSION_DIRECTORY, g_instance.attr.attr_common.PGXCNodeName, @@ -356,7 +433,8 @@ char* relpathbackend(RelFileNode rnode, BackendId backend, ForkNumber forknum) rc = snprintf_s(path, pathlen, pathlen - 1, - "pg_tblspc/%u/%s_%s/%u/t%d_%u", + "%s/%u/%s_%s/%u/t%d_%u", + TBLSPCDIR, rnode.spcNode, TABLESPACE_VERSION_DIRECTORY, g_instance.attr.attr_common.PGXCNodeName, @@ -369,7 +447,8 @@ char* relpathbackend(RelFileNode rnode, BackendId backend, ForkNumber forknum) rc = snprintf_s(path, pathlen, pathlen - 1, - "pg_tblspc/%u/%s/%u/t%d_%u_%s", + "%s/%u/%s/%u/t%d_%u_%s", + TBLSPCDIR, rnode.spcNode, TABLESPACE_VERSION_DIRECTORY, rnode.dbNode, @@ -380,7 +459,8 @@ char* relpathbackend(RelFileNode rnode, BackendId backend, ForkNumber forknum) rc = snprintf_s(path, pathlen, pathlen - 1, - "pg_tblspc/%u/%s/%u/t%d_%u", + "%s/%u/%s/%u/t%d_%u", + TBLSPCDIR, rnode.spcNode, TABLESPACE_VERSION_DIRECTORY, rnode.dbNode, @@ -391,6 +471,8 @@ char* relpathbackend(RelFileNode rnode, BackendId backend, ForkNumber forknum) } } } + + pfree(datadir); return path; } @@ -479,8 +561,24 @@ RelFileNodeForkNum relpath_to_filenode(char* path) char* tmptoken = NULL; parsepath = pstrdup(path); - token = strtok_r(parsepath, "/", &tmptoken); + if (ENABLE_DSS && parsepath[0] == '+') { + char *tmppath = NULL; + char *newpath = parsepath; + uint32 pathsize = (uint32)strlen(parsepath); + uint32 homesize = (uint32)strlen(g_instance.attr.attr_storage.dss_attr.ss_dss_vg_name); + if ((pathsize <= homesize + 1) || + (strncmp(path, g_instance.attr.attr_storage.dss_attr.ss_dss_vg_name, homesize) != 0)) { + pfree(parsepath); + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid relation file path %s.", path))); + } + newpath += homesize + 1; + tmppath = pstrdup(newpath); + pfree(parsepath); + parsepath = tmppath; + } + + token = strtok_r(parsepath, "/", &tmptoken); if (NULL == tmptoken || '\0' == *tmptoken) { pfree(parsepath); ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid relation file path %s.", path))); @@ -541,13 +639,22 @@ RelFileNodeForkNum relpath_to_filenode(char* path) } char tblspcversiondir[MAXPGPATH]; - int errorno = snprintf_s(tblspcversiondir, - MAXPGPATH, - MAXPGPATH - 1, - "%s_%s", - TABLESPACE_VERSION_DIRECTORY, - g_instance.attr.attr_common.PGXCNodeName); - securec_check_ss(errorno, "\0", "\0"); + if (ENABLE_DSS) { + int errorno = snprintf_s(tblspcversiondir, + MAXPGPATH, + MAXPGPATH - 1, + "%s", + TABLESPACE_VERSION_DIRECTORY); + securec_check_ss(errorno, "\0", "\0"); + } else { + int errorno = snprintf_s(tblspcversiondir, + MAXPGPATH, + MAXPGPATH - 1, + "%s_%s", + TABLESPACE_VERSION_DIRECTORY, + g_instance.attr.attr_common.PGXCNodeName); + securec_check_ss(errorno, "\0", "\0"); + } /* skip tablespaces which not belong to us. */ if (0 != strncmp(token, tblspcversiondir, strlen(tblspcversiondir) + 1)) { pfree(parsepath); @@ -576,26 +683,49 @@ char* GetDatabasePath(Oid dbNode, Oid spcNode) int pathlen; char* path = NULL; errno_t rc = EOK; + char* datadir = (char *)palloc(MAXPGPATH); + + if (ENABLE_DSS && dbNode != 1) { + rc = snprintf_s(datadir, MAXPGPATH, MAXPGPATH - 1, "%s/", + g_instance.attr.attr_storage.dss_attr.ss_dss_vg_name); + securec_check_ss(rc, "\0", "\0"); + } else { + datadir[0] = '\0'; + } if (spcNode == GLOBALTABLESPACE_OID) { /* Shared system relations live in {datadir}/global */ Assert(dbNode == 0); - pathlen = 6 + 1; + pathlen = (int)strlen(GLOTBSDIR) + 1; path = (char*)palloc(pathlen); - rc = snprintf_s(path, pathlen, pathlen - 1, "global"); + rc = snprintf_s(path, pathlen, pathlen - 1, "%s", GLOTBSDIR); securec_check_ss(rc, "\0", "\0"); } else if (spcNode == DEFAULTTABLESPACE_OID) { /* The default tablespace is {datadir}/base */ - pathlen = 5 + OIDCHARS + 1; + pathlen = (int)strlen(datadir) + 1 + 5 + OIDCHARS + 1; path = (char*)palloc(pathlen); - rc = snprintf_s(path, pathlen, pathlen - 1, "base/%u", dbNode); + rc = snprintf_s(path, pathlen, pathlen - 1, "%sbase/%u", datadir, dbNode); + securec_check_ss(rc, "\0", "\0"); + } else if (ENABLE_DSS) { + /* All other tablespaces are accessed via symlinks */ + pathlen = (int)strlen(TBLSPCDIR) + 1 + OIDCHARS + 1 + (int)strlen(TABLESPACE_VERSION_DIRECTORY) + + 1 + OIDCHARS + 1; + path = (char*)palloc(pathlen); + rc = snprintf_s(path, + pathlen, + pathlen - 1, + "%s/%u/%s/%u", + TBLSPCDIR, + spcNode, + TABLESPACE_VERSION_DIRECTORY, + dbNode); securec_check_ss(rc, "\0", "\0"); } else { /* All other tablespaces are accessed via symlinks */ - pathlen = 9 + 1 + OIDCHARS + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + + pathlen = (int)strlen(TBLSPCDIR) + 1 + OIDCHARS + 1 + (int)strlen(TABLESPACE_VERSION_DIRECTORY) + #ifdef PGXC /* Postgres-XC tablespaces include node name in path */ - strlen(g_instance.attr.attr_common.PGXCNodeName) + 1 + + (int)strlen(g_instance.attr.attr_common.PGXCNodeName) + 1 + #endif 1 + OIDCHARS + 1; path = (char*)palloc(pathlen); @@ -603,17 +733,26 @@ char* GetDatabasePath(Oid dbNode, Oid spcNode) rc = snprintf_s(path, pathlen, pathlen - 1, - "pg_tblspc/%u/%s_%s/%u", + "%s/%u/%s_%s/%u", + TBLSPCDIR, spcNode, TABLESPACE_VERSION_DIRECTORY, g_instance.attr.attr_common.PGXCNodeName, dbNode); #else - rc = - snprintf_s(path, pathlen, pathlen - 1, "pg_tblspc/%u/%s/%u", spcNode, TABLESPACE_VERSION_DIRECTORY, dbNode); + rc = snprintf_s(path, + pathlen, + pathlen - 1, + "%s/%u/%s/%u", + TBLSPCDIR, + spcNode, + TABLESPACE_VERSION_DIRECTORY, + dbNode); #endif securec_check_ss(rc, "\0", "\0"); } + + pfree(datadir); return path; } diff --git a/src/common/backend/catalog/heap.cpp b/src/common/backend/catalog/heap.cpp index a74a89282..3883eefbc 100644 --- a/src/common/backend/catalog/heap.cpp +++ b/src/common/backend/catalog/heap.cpp @@ -525,6 +525,13 @@ Relation heap_create(const char* relname, Oid relnamespace, Oid reltablespace, O Relation rel; bool isbucket = false; + if (IsInitdb && EnableInitDBSegment) { + /* store tables in segment storage as all possible while initdb */ + if (relpersistence == RELPERSISTENCE_PERMANENT) { + storage_type = SEGMENT_PAGE; + } + } + /* The caller must have provided an OID for the relation. */ Assert(OidIsValid(relid)); @@ -578,7 +585,17 @@ Relation heap_create(const char* relname, Oid relnamespace, Oid reltablespace, O } break; } - + + if (ENABLE_DSS && !partitioned_relation) { + /* + * when we store systable to segment, we should allocate segment header page + * for all objects, to avoid some issues, like: pg_table_size for view. + * if the view has no segment header page, the seg_totalblocks' call will crash, + * because of read_head_buffer's magic number check fail. + */ + create_storage = true; + } + /* * Never allow a pg_class entry to explicitly specify the database's * default tablespace in reltablespace; force it to zero instead. This @@ -599,17 +616,21 @@ Relation heap_create(const char* relname, Oid relnamespace, Oid reltablespace, O if (u_sess->proc_cxt.IsBinaryUpgrade) { if (!partitioned_relation && storage_type == SEGMENT_PAGE) { isbucket = BUCKET_OID_IS_VALID(bucketOid) && !newcbi; + Oid database_id = (ConvertToRelfilenodeTblspcOid(reltablespace) == GLOBALTABLESPACE_OID) ? + InvalidOid : u_sess->proc_cxt.MyDatabaseId; relfilenode = seg_alloc_segment(ConvertToRelfilenodeTblspcOid(reltablespace), - u_sess->proc_cxt.MyDatabaseId, isbucket, relfilenode); + database_id, isbucket, relfilenode); } } else { create_storage = false; } - } else if (storage_type == SEGMENT_PAGE && !partitioned_relation) { - Assert(reltablespace != GLOBALTABLESPACE_OID); + } else if ((storage_type == SEGMENT_PAGE && !partitioned_relation) || + (storage_type == SEGMENT_PAGE && ENABLE_DSS && create_storage)) { isbucket = BUCKET_OID_IS_VALID(bucketOid) && !newcbi; + Oid database_id = (ConvertToRelfilenodeTblspcOid(reltablespace) == GLOBALTABLESPACE_OID) ? + InvalidOid : u_sess->proc_cxt.MyDatabaseId; relfilenode = (Oid)seg_alloc_segment(ConvertToRelfilenodeTblspcOid(reltablespace), - u_sess->proc_cxt.MyDatabaseId, isbucket, InvalidBlockNumber); + database_id, isbucket, InvalidBlockNumber); ereport(LOG, (errmsg("Segment Relation %s(%u) set relfilenode %u xid %lu", relname, relid, relfilenode, GetCurrentTransactionIdIfAny()))); } else { @@ -2479,6 +2500,15 @@ static Oid AddNewRelationType(const char* typname, Oid typeNamespace, Oid new_re InvalidOid); /* rowtypes never have a collation */ } +static Datum AddSegmentOption(Datum relOptions) +{ + DefElem *def = makeDefElem(pstrdup("segment"), (Node *)makeString((char *)"on")); + List* optsList = untransformRelOptions(relOptions); + optsList = lappend(optsList, def); + + return transformRelOptions((Datum)0, optsList, NULL, NULL, false, false); +} + /* -------------------------------- * heap_create_with_catalog * @@ -2532,6 +2562,21 @@ Oid heap_create_with_catalog(const char *relname, Oid relnamespace, Oid reltable bool relhasbucket = false; bool relhasuids = false; + if (IsInitdb && EnableInitDBSegment) { + if (relpersistence == RELPERSISTENCE_UNLOGGED) { + relpersistence = RELPERSISTENCE_PERMANENT; + ereport(WARNING, + (errmsg("Store unlogged table in segment when enable system table segment"))); + } + + /* store tables in segment storage as all possible while initdb */ + if (relpersistence == RELPERSISTENCE_PERMANENT && + (relkind != RELKIND_SEQUENCE && relkind != RELKIND_LARGE_SEQUENCE)) { + storage_type = SEGMENT_PAGE; + reloptions = AddSegmentOption(reloptions); + } + } + pg_class_desc = heap_open(RelationRelationId, RowExclusiveLock); /* diff --git a/src/common/backend/catalog/namespace.cpp b/src/common/backend/catalog/namespace.cpp index 36cb9a936..651353c7e 100644 --- a/src/common/backend/catalog/namespace.cpp +++ b/src/common/backend/catalog/namespace.cpp @@ -4444,6 +4444,10 @@ static void InitTempTableNamespace(void) ereport(ERROR, (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION), errmsg("cannot create temporary tables during recovery"))); + if (SSIsServerModeReadOnly()) { + ereport(ERROR, (errmsg("cannot create temporary tables at Standby with DMS enabled"))); + } + timeLineId = get_controlfile_timeline(); tempID = __sync_add_and_fetch(>_tempID_seed, 1); diff --git a/src/common/backend/catalog/objectaddress.cpp b/src/common/backend/catalog/objectaddress.cpp index 627be57d3..62060c6c3 100644 --- a/src/common/backend/catalog/objectaddress.cpp +++ b/src/common/backend/catalog/objectaddress.cpp @@ -185,6 +185,11 @@ static const ObjectPropertyType* get_object_property_data(Oid class_id); ObjectAddress get_object_address( ObjectType objtype, List* objname, List* objargs, Relation* relp, LOCKMODE lockmode, bool missing_ok) { + if (ENABLE_DMS && (objtype == OBJECT_PUBLICATION || objtype == OBJECT_PUBLICATION_NAMESPACE || + objtype == OBJECT_PUBLICATION_REL || objtype == OBJECT_SUBSCRIPTION)) { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Not support Publication and Subscription while DMS and DSS enabled"))); + } ObjectAddress address; ObjectAddress old_address = {InvalidOid, InvalidOid, 0}; Relation relation = NULL; diff --git a/src/common/backend/catalog/pg_hashbucket.cpp b/src/common/backend/catalog/pg_hashbucket.cpp index ac588effa..5679cc7ce 100755 --- a/src/common/backend/catalog/pg_hashbucket.cpp +++ b/src/common/backend/catalog/pg_hashbucket.cpp @@ -1161,8 +1161,10 @@ HeapTuple HbktModifyRelationRelfilenode(HeapTuple reltup, DataTransferType trans newrelfilenode = GetNewRelFileNode(indexrel->rd_rel->reltablespace, NULL, indexrel->rd_rel->relpersistence); bucketNode = InvalidBktId; } else { + Oid database_id = (ConvertToRelfilenodeTblspcOid(indexrel->rd_rel->reltablespace) == GLOBALTABLESPACE_OID) ? + InvalidOid : u_sess->proc_cxt.MyDatabaseId; newrelfilenode = seg_alloc_segment(ConvertToRelfilenodeTblspcOid(indexrel->rd_rel->reltablespace), - u_sess->proc_cxt.MyDatabaseId, isBucket, InvalidBlockNumber); + database_id, isBucket, InvalidBlockNumber); bucketNode = SegmentBktId; } rnode = indexrel->rd_node; diff --git a/src/common/backend/catalog/storage.cpp b/src/common/backend/catalog/storage.cpp index 9fe10a8e8..e9a6a8236 100644 --- a/src/common/backend/catalog/storage.cpp +++ b/src/common/backend/catalog/storage.cpp @@ -51,6 +51,7 @@ #include "utils/rel_gs.h" #include "utils/snapmgr.h" #include "utils/syscache.h" +#include "ddes/dms/ss_transaction.h" #ifdef ENABLE_MULTIPLE_NODES #include "tsdb/cache/part_cachemgr.h" @@ -843,6 +844,10 @@ void smgrDoDropBufferUsingScan(bool isCommit) } } DropRelFileNodeAllBuffersUsingScan(rnodes, rnode_len); + + if (ENABLE_DMS && SS_PRIMARY_MODE && rnode_len > 0) { + SSBCastDropRelAllBuffer(rnodes, rnode_len); + } } void smgrDoDropBufferUsingHashTbl(bool isCommit) @@ -850,6 +855,9 @@ void smgrDoDropBufferUsingHashTbl(bool isCommit) PendingRelDelete* pending = NULL; PendingRelDelete* next = NULL; + int rnode_len = 0; + RelFileNode rnodes[DROP_BUFFER_USING_HASH_DEL_REL_NUM_THRESHOLD]; + int nestLevel = GetCurrentTransactionNestLevel(); HTAB* relfilenode_hashtbl = relfilenode_hashtbl_create(); int enter_cnt = 0; @@ -863,11 +871,23 @@ void smgrDoDropBufferUsingHashTbl(bool isCommit) (void)hash_search(relfilenode_hashtbl, &(pending->relnode), HASH_ENTER, &found); if (!found) { enter_cnt++; + + if (ENABLE_DMS && SS_PRIMARY_MODE) { + if (rnode_len >= DROP_BUFFER_USING_HASH_DEL_REL_NUM_THRESHOLD) { + SSBCastDropRelAllBuffer(rnodes, rnode_len); + rnode_len = 0; + } + rnodes[rnode_len++] = pending->relnode; + } } } } } } + + if (ENABLE_DMS && SS_PRIMARY_MODE && rnode_len > 0) { + SSBCastDropRelAllBuffer(rnodes, rnode_len); + } /* At least one relnode founded */ if (enter_cnt > 0) { diff --git a/src/common/backend/catalog/toasting.cpp b/src/common/backend/catalog/toasting.cpp index 622b3c789..fae972e41 100644 --- a/src/common/backend/catalog/toasting.cpp +++ b/src/common/backend/catalog/toasting.cpp @@ -678,6 +678,10 @@ static void InitLobTempToastNamespace(void) ereport(ERROR, (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION), errmsg("cannot create temporary tables during recovery"))); + if (SSIsServerModeReadOnly()) { + ereport(ERROR, (errmsg("cannot create temporary tables at Standby with DMS enabled"))); + } + timeLineId = get_controlfile_timeline(); tempID = __sync_add_and_fetch(>_tempID_seed, 1); diff --git a/src/common/backend/libpq/auth.cpp b/src/common/backend/libpq/auth.cpp index c5646fb8b..b03c9a7f5 100644 --- a/src/common/backend/libpq/auth.cpp +++ b/src/common/backend/libpq/auth.cpp @@ -706,7 +706,8 @@ void ClientAuthentication(Port* port) if (IsRoleExist(port->user_name) && GetRoleOid(port->user_name) != INITIAL_USER_ID) { Oid roleid = GetRoleOid(port->user_name); USER_STATUS rolestatus; - if (t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE) { + if (t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE || + (ENABLE_DMS && !SS_MY_INST_IS_MASTER)) { rolestatus = GetAccountLockedStatusFromHashTable(roleid); } else { rolestatus = GetAccountLockedStatus(roleid); @@ -714,7 +715,8 @@ void ClientAuthentication(Port* port) if (UNLOCK_STATUS != rolestatus) { errno_t errorno = EOK; bool unlocked = false; - if (t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE) { + if (t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE || + (ENABLE_DMS && !SS_MY_INST_IS_MASTER)) { unlocked = UnlockAccountToHashTable(roleid, false, false); } else { unlocked = TryUnlockAccount(roleid, false, false); @@ -738,7 +740,8 @@ void ClientAuthentication(Port* port) (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION), errmsg("The account has been locked."))); } } else if (status == STATUS_OK) { - if (t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE) { + if (t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE || + (ENABLE_DMS && !SS_MY_INST_IS_MASTER)) { (void)UnlockAccountToHashTable(roleid, false, true); } else { (void)TryUnlockAccount(roleid, false, true); @@ -747,7 +750,8 @@ void ClientAuthentication(Port* port) /* if password is not right, send signal to try lock the account*/ if (status == STATUS_WRONG_PASSWORD) { - if (t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE) { + if (t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE || + (ENABLE_DMS && !SS_MY_INST_IS_MASTER)) { UpdateFailCountToHashTable(roleid, 1, false); } else { TryLockAccount(roleid, 1, false); diff --git a/src/common/backend/parser/gram.y b/src/common/backend/parser/gram.y index 669ff0670..aa5b3a00b 100644 --- a/src/common/backend/parser/gram.y +++ b/src/common/backend/parser/gram.y @@ -2027,7 +2027,17 @@ OptSchemaEltList: ; OptBlockchainWith: - WITH BLOCKCHAIN { $$ = true; } + WITH BLOCKCHAIN + { + if (ENABLE_DMS) { + const char* message = "BLOCKCHAIN is not supported while DMS and DSS enabled"; + InsertErrorMessage(message, u_sess->plsql_cxt.plpgsql_yylloc); + ereport(errstate, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("BLOCKCHAIN is not supported while DMS and DSS enabled"))); + } + + $$ = true; + } | /* EMPTY */ { $$ = false; } ; @@ -2049,7 +2059,17 @@ AlterSchemaStmt: ; OptAlterToBlockchain: - WITH BLOCKCHAIN { $$ = true; } + WITH BLOCKCHAIN + { + if (ENABLE_DMS) { + const char* message = "BLOCKCHAIN is not supported while DMS and DSS enabled"; + InsertErrorMessage(message, u_sess->plsql_cxt.plpgsql_yylloc); + ereport(errstate, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("BLOCKCHAIN is not supported while DMS and DSS enabled"))); + } + + $$ = true; + } | WITHOUT BLOCKCHAIN { $$ = false; } ; @@ -8656,6 +8676,11 @@ CreateMatViewStmt: errmsg("It's not supported to specify distribute key on incremental materialized views"))); } #endif + if (ENABLE_DMS) { + ereport(errstate, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("matview is not supported while DMS and DSS enabled."))); + } + $6->ivm = $3; $$ = (Node *) ctas; } @@ -16641,14 +16666,28 @@ TransactionStmt: $$ = (Node *)n; } | PREPARE TRANSACTION Sconst - { + { + if (ENABLE_DMS) { + const char* message = "PREPARE TRANSACTION is not supported while DMS and DSS enabled"; + InsertErrorMessage(message, u_sess->plsql_cxt.plpgsql_yylloc); + ereport(errstate, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("PREPARE TRANSACTION is not supported while DMS and DSS enabled"))); + } + TransactionStmt *n = makeNode(TransactionStmt); n->kind = TRANS_STMT_PREPARE; n->gid = $3; $$ = (Node *)n; } | COMMIT PREPARED Sconst - { + { + if (ENABLE_DMS) { + const char* message = "COMMIT TRANSACTION is not supported while DMS and DSS enabled"; + InsertErrorMessage(message, u_sess->plsql_cxt.plpgsql_yylloc); + ereport(errstate, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COMMIT TRANSACTION is not supported while DMS and DSS enabled"))); + } + TransactionStmt *n = makeNode(TransactionStmt); n->kind = TRANS_STMT_COMMIT_PREPARED; n->gid = $3; @@ -16656,7 +16695,14 @@ TransactionStmt: $$ = (Node *)n; } | COMMIT PREPARED Sconst WITH Sconst - { + { + if (ENABLE_DMS) { + const char* message = "COMMIT TRANSACTION is not supported while DMS and DSS enabled"; + InsertErrorMessage(message, u_sess->plsql_cxt.plpgsql_yylloc); + ereport(errstate, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("COMMIT TRANSACTION is not supported while DMS and DSS enabled"))); + } + TransactionStmt *n = makeNode(TransactionStmt); n->kind = TRANS_STMT_COMMIT_PREPARED; n->gid = $3; @@ -16664,7 +16710,14 @@ TransactionStmt: $$ = (Node *)n; } | ROLLBACK PREPARED Sconst - { + { + if (ENABLE_DMS) { + const char* message = "ROLLBACK TRANSACTION is not supported while DMS and DSS enabled"; + InsertErrorMessage(message, u_sess->plsql_cxt.plpgsql_yylloc); + ereport(errstate, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("ROLLBACK TRANSACTION is not supported while DMS and DSS enabled"))); + } + TransactionStmt *n = makeNode(TransactionStmt); n->kind = TRANS_STMT_ROLLBACK_PREPARED; n->gid = $3; diff --git a/src/common/backend/utils/adt/dbsize.cpp b/src/common/backend/utils/adt/dbsize.cpp index fc2e1e191..76b2cc899 100644 --- a/src/common/backend/utils/adt/dbsize.cpp +++ b/src/common/backend/utils/adt/dbsize.cpp @@ -71,6 +71,7 @@ #include "storage/cstore/cstore_compress.h" #include "storage/page_compression.h" #include "vecexecutor/vecnodes.h" +#include "storage/file/fio_device.h" #ifdef PGXC static Datum pgxc_database_size(Oid dbOid); @@ -159,6 +160,7 @@ static int64 calculate_database_size(Oid dbOid) { int64 totalsize; DIR* dirdesc = NULL; + char* dssdir = NULL; struct dirent* direntry = NULL; char dirpath[MAXPGPATH] = {'\0'}; char pathname[MAXPGPATH] = {'\0'}; @@ -170,16 +172,30 @@ static int64 calculate_database_size(Oid dbOid) if (aclresult != ACLCHECK_OK) aclcheck_error(aclresult, ACL_KIND_DATABASE, get_and_check_db_name(dbOid)); + /* Get the vgname in DSS mode */ + if (ENABLE_DSS) + dssdir = g_instance.attr.attr_storage.dss_attr.ss_dss_vg_name; + /* Shared storage in pg_global is not counted */ /* Include pg_default storage */ - rc = snprintf_s(pathname, MAXPGPATH, MAXPGPATH - 1, "base/%u", dbOid); - securec_check_ss(rc, "\0", "\0"); + if (ENABLE_DSS && dbOid != 1) { + rc = snprintf_s(pathname, MAXPGPATH, MAXPGPATH - 1, "%s/base/%u", dssdir, dbOid); + securec_check_ss(rc, "", ""); + } else { + rc = snprintf_s(pathname, MAXPGPATH, MAXPGPATH - 1, "base/%u", dbOid); + securec_check_ss(rc, "\0", "\0"); + } totalsize = db_dir_size(pathname); /* Scan the non-default tablespaces */ - rc = snprintf_s(dirpath, MAXPGPATH, MAXPGPATH - 1, "pg_tblspc"); - securec_check_ss(rc, "\0", "\0"); + if (ENABLE_DSS) { + rc = snprintf_s(dirpath, MAXPGPATH, MAXPGPATH - 1, "%s/pg_tblspc", dssdir); + securec_check_ss(rc, "", ""); + } else { + rc = snprintf_s(dirpath, MAXPGPATH, MAXPGPATH - 1, "pg_tblspc"); + securec_check_ss(rc, "\0", "\0"); + } dirdesc = AllocateDir(dirpath); if (NULL == dirdesc) ereport(ERROR, (errcode_for_file_access(), errmsg("could not open tablespace directory \"%s\": %m", dirpath))); @@ -192,16 +208,26 @@ static int64 calculate_database_size(Oid dbOid) #ifdef PGXC /* openGauss tablespaces include node name in path */ - rc = snprintf_s(pathname, - MAXPGPATH, - MAXPGPATH - 1, - "pg_tblspc/%s/%s_%s/%u", - direntry->d_name, - TABLESPACE_VERSION_DIRECTORY, - g_instance.attr.attr_common.PGXCNodeName, - dbOid); - securec_check_ss(rc, "\0", "\0"); - + if (ENABLE_DSS) { + rc = snprintf_s(pathname, + MAXPGPATH, + MAXPGPATH - 1, + "pg_tblspc/%s/%s/%u", + direntry->d_name, + TABLESPACE_VERSION_DIRECTORY, + dbOid); + securec_check_ss(rc, "\0", "\0"); + } else { + rc = snprintf_s(pathname, + MAXPGPATH, + MAXPGPATH - 1, + "pg_tblspc/%s/%s_%s/%u", + direntry->d_name, + TABLESPACE_VERSION_DIRECTORY, + g_instance.attr.attr_common.PGXCNodeName, + dbOid); + securec_check_ss(rc, "\0", "\0"); + } #else rc = snprintf_s(pathname, MAXPGPATH, @@ -212,6 +238,13 @@ static int64 calculate_database_size(Oid dbOid) dbOid); securec_check_ss(rc, "\0", "\0"); #endif + /* Get the path in DSS mode */ + if (ENABLE_DSS) { + char temp_path[MAXPGPATH]; + rc = snprintf_s(temp_path, MAXPGPATH, MAXPGPATH - 1, "%s", pathname); + rc = snprintf_s(pathname, MAXPGPATH, MAXPGPATH - 1, "%s/%s", dssdir, temp_path); + securec_check_ss(rc, "", ""); + } totalsize += db_dir_size(pathname); } @@ -592,21 +625,37 @@ static int64 calculate_tablespace_size(Oid tblspcOid) errdetail("Please calculate size of DFS tablespace \"%s\" on coordinator node.", get_tablespace_name(tblspcOid)))); } - if (tblspcOid == DEFAULTTABLESPACE_OID) - rc = snprintf_s(tblspcPath, MAXPGPATH, MAXPGPATH - 1, "base"); - - else if (tblspcOid == GLOBALTABLESPACE_OID) - rc = snprintf_s(tblspcPath, MAXPGPATH, MAXPGPATH - 1, "global"); - else + if (tblspcOid == DEFAULTTABLESPACE_OID) { + if (ENABLE_DSS) { + rc = snprintf_s(tblspcPath, MAXPGPATH, MAXPGPATH - 1, "%s/base", + g_instance.attr.attr_storage.dss_attr.ss_dss_vg_name); + } else { + rc = snprintf_s(tblspcPath, MAXPGPATH, MAXPGPATH - 1, "base"); + } + } else if (tblspcOid == GLOBALTABLESPACE_OID) { + if (ENABLE_DSS) { + rc = snprintf_s(tblspcPath, MAXPGPATH, MAXPGPATH - 1, "%s/global", + g_instance.attr.attr_storage.dss_attr.ss_dss_vg_name); + } else { + rc = snprintf_s(tblspcPath, MAXPGPATH, MAXPGPATH - 1, "global"); + } + } else #ifdef PGXC /* openGauss tablespaces include node name in path */ - rc = snprintf_s(tblspcPath, - MAXPGPATH, - MAXPGPATH - 1, - "pg_tblspc/%u/%s_%s", - tblspcOid, - TABLESPACE_VERSION_DIRECTORY, - g_instance.attr.attr_common.PGXCNodeName); + if (ENABLE_DSS) { + rc = snprintf_s( + tblspcPath, MAXPGPATH, MAXPGPATH - 1, "%s/pg_tblspc/%u/%s", + g_instance.attr.attr_storage.dss_attr.ss_dss_vg_name, + tblspcOid, TABLESPACE_VERSION_DIRECTORY); + } else { + rc = snprintf_s(tblspcPath, + MAXPGPATH, + MAXPGPATH - 1, + "pg_tblspc/%u/%s_%s", + tblspcOid, + TABLESPACE_VERSION_DIRECTORY, + g_instance.attr.attr_common.PGXCNodeName); + } #else rc = snprintf_s( tblspcPath, MAXPGPATH, MAXPGPATH - 1, "pg_tblspc/%u/%s", tblspcOid, TABLESPACE_VERSION_DIRECTORY); diff --git a/src/common/backend/utils/adt/genfile.cpp b/src/common/backend/utils/adt/genfile.cpp index 484270630..b022f5876 100644 --- a/src/common/backend/utils/adt/genfile.cpp +++ b/src/common/backend/utils/adt/genfile.cpp @@ -38,6 +38,7 @@ #include "utils/lsyscache.h" #include "catalog/pg_partition_fn.h" #include "storage/cfs/cfs_buffers.h" +#include "storage/file/fio_device.h" typedef struct { char* location; @@ -1402,7 +1403,7 @@ Datum pg_ls_waldir(PG_FUNCTION_ARGS) if (!superuser() && !isMonitoradmin(GetUserId())) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("only system/monitor admin can check WAL directory!"))); - return pg_ls_dir_files(fcinfo, XLOGDIR, false); + return pg_ls_dir_files(fcinfo, SS_XLOGDIR, false); } /* @@ -1415,8 +1416,11 @@ static Datum pg_ls_tmpdir(FunctionCallInfo fcinfo, Oid tblspc) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("only system/monitor admin can check pgsql_tmp directory!"))); - if (!SearchSysCacheExists1(TABLESPACEOID, ObjectIdGetDatum(tblspc))) - ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("tablespace with OID %u does not exist", tblspc))); + if (OidIsValid(tblspc)) { + if (!SearchSysCacheExists1(TABLESPACEOID, ObjectIdGetDatum(tblspc))) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("tablespace with OID %u does not exist", tblspc))); + } TempTablespacePath(path, tblspc); return pg_ls_dir_files(fcinfo, path, true); @@ -1428,7 +1432,11 @@ static Datum pg_ls_tmpdir(FunctionCallInfo fcinfo, Oid tblspc) */ Datum pg_ls_tmpdir_noargs(PG_FUNCTION_ARGS) { - return pg_ls_tmpdir(fcinfo, DEFAULTTABLESPACE_OID); + if (!ENABLE_DSS) { + return pg_ls_tmpdir(fcinfo, DEFAULTTABLESPACE_OID); + } else { + return pg_ls_tmpdir(fcinfo, InvalidOid); + } } /* diff --git a/src/common/backend/utils/adt/misc.cpp b/src/common/backend/utils/adt/misc.cpp index 9a2d24bdc..8fdb0bfad 100644 --- a/src/common/backend/utils/adt/misc.cpp +++ b/src/common/backend/utils/adt/misc.cpp @@ -38,6 +38,7 @@ #include "storage/pmsignal.h" #include "storage/proc.h" #include "storage/procarray.h" +#include "storage/file/fio_device.h" #include "utils/lsyscache.h" #include "tcop/tcopprot.h" #include "utils/acl.h" @@ -626,37 +627,50 @@ Datum pg_tablespace_databases(PG_FUNCTION_ARGS) fctx = (ts_db_fctx*)palloc(sizeof(ts_db_fctx)); - /* - * size = tablespace dirname length + dir sep char + oid + terminator - */ -#ifdef PGXC - /* openGauss tablespaces also include node name in path */ - location_len = 9 + 1 + OIDCHARS + 1 + strlen(g_instance.attr.attr_common.PGXCNodeName) + 1 + - strlen(TABLESPACE_VERSION_DIRECTORY) + 1; - fctx->location = (char*)palloc(location_len); -#else - location_len = 9 + 1 + OIDCHARS + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1 fctx->location = - (char*)palloc(location_len); -#endif if (tablespaceOid == GLOBALTABLESPACE_OID) { fctx->dirdesc = NULL; ereport(WARNING, (errmsg("global tablespace never has databases"))); } else { - if (tablespaceOid == DEFAULTTABLESPACE_OID) - ss_rc = sprintf_s(fctx->location, location_len, "base"); - else -#ifdef PGXC - /* openGauss tablespaces also include node name in path */ + if (tablespaceOid == DEFAULTTABLESPACE_OID) { + location_len = (int)strlen(DEFTBSDIR) + 1; + fctx->location = (char*)palloc(location_len); + ss_rc = sprintf_s(fctx->location, (size_t)location_len, "%s", DEFTBSDIR); + } else if (ENABLE_DSS) { + location_len = (int)strlen(TBLSPCDIR) + 1 + OIDCHARS + 1 + + (int)strlen(TABLESPACE_VERSION_DIRECTORY) + 1; + fctx->location = (char*)palloc(location_len); ss_rc = sprintf_s(fctx->location, location_len, - "pg_tblspc/%u/%s_%s", + "%s/%u/%s", + TBLSPCDIR, + tablespaceOid, + TABLESPACE_VERSION_DIRECTORY); + } else { +#ifdef PGXC + /* openGauss tablespaces also include node name in path */ + location_len = (int)strlen(TBLSPCDIR) + 1 + OIDCHARS + 1 + + (int)strlen(TABLESPACE_VERSION_DIRECTORY) + 1 + + (int)strlen(g_instance.attr.attr_common.PGXCNodeName) + 1; + fctx->location = (char*)palloc(location_len); + ss_rc = sprintf_s(fctx->location, + location_len, + "%s/%u/%s_%s", + TBLSPCDIR, tablespaceOid, TABLESPACE_VERSION_DIRECTORY, g_instance.attr.attr_common.PGXCNodeName); #else - ss_rc = sprintf_s( - fctx->location, location_len, "pg_tblspc/%u/%s", tablespaceOid, TABLESPACE_VERSION_DIRECTORY); + location_len = (int)strlen(TBLSPCDIR) + 1 + OIDCHARS + 1 + + (int)strlen(TABLESPACE_VERSION_DIRECTORY) + 1; + fctx->location = (char*)palloc(location_len); + ss_rc = sprintf_s(fctx->location, + location_len, + "%s/%u/%s", + TBLSPCDIR, + tablespaceOid, + TABLESPACE_VERSION_DIRECTORY); #endif + } securec_check_ss(ss_rc, "\0", "\0"); fctx->dirdesc = AllocateDir(fctx->location); @@ -690,7 +704,7 @@ Datum pg_tablespace_databases(PG_FUNCTION_ARGS) /* if database subdir is empty, don't report tablespace as used */ /* size = path length + dir sep char + file name + terminator */ - int sub_len = strlen(fctx->location) + 1 + strlen(de->d_name) + 1; + int sub_len = (int)strlen(fctx->location) + 1 + (int)strlen(de->d_name) + 1; subdir = (char*)palloc(sub_len); ss_rc = sprintf_s(subdir, sub_len, "%s/%s", fctx->location, de->d_name); securec_check_ss(ss_rc, "\0", "\0"); @@ -743,7 +757,8 @@ Datum pg_tablespace_location(PG_FUNCTION_ARGS) * Find the location of the tablespace by reading the symbolic link that * is in pg_tblspc/. */ - errno_t ss_rc = snprintf_s(sourcepath, sizeof(sourcepath), sizeof(sourcepath) - 1, "pg_tblspc/%u", tablespaceOid); + errno_t ss_rc = snprintf_s(sourcepath, sizeof(sourcepath), sizeof(sourcepath) - 1, + "%s/%u", TBLSPCDIR, tablespaceOid); securec_check_ss(ss_rc, "\0", "\0"); rllen = readlink(sourcepath, targetpath, sizeof(targetpath)); diff --git a/src/common/backend/utils/adt/pgundostatfuncs.cpp b/src/common/backend/utils/adt/pgundostatfuncs.cpp index 73e76fa71..26c262fb0 100644 --- a/src/common/backend/utils/adt/pgundostatfuncs.cpp +++ b/src/common/backend/utils/adt/pgundostatfuncs.cpp @@ -2438,6 +2438,11 @@ Datum gs_undo_dump_parsepage_mv(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("unsupported view in multiple nodes mode."))); PG_RETURN_VOID(); #else + if (ENABLE_DSS) { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("unsupported view when enable dss."))); + PG_RETURN_VOID(); + } + /* check user's right */ const char fName[MAXFNAMELEN] = "gs_undo_dump_parsepage_mv"; CheckUser(fName); diff --git a/src/common/backend/utils/cache/catcache.cpp b/src/common/backend/utils/cache/catcache.cpp index 0bdc6b678..a8491bdfd 100644 --- a/src/common/backend/utils/cache/catcache.cpp +++ b/src/common/backend/utils/cache/catcache.cpp @@ -1163,6 +1163,10 @@ void InitCatCachePhase2(CatCache* cache, bool touch_index) */ bool IndexScanOK(int cache_id) { + if (SS_STANDBY_MODE) { + return false; + } + switch (cache_id) { case INDEXRELID: diff --git a/src/common/backend/utils/cache/knl_localsysdbcache.cpp b/src/common/backend/utils/cache/knl_localsysdbcache.cpp index 3b3e46174..4196ff2ac 100644 --- a/src/common/backend/utils/cache/knl_localsysdbcache.cpp +++ b/src/common/backend/utils/cache/knl_localsysdbcache.cpp @@ -43,7 +43,8 @@ public: } ~LSCCloseCheck() { - Assert(!EnableGlobalSysCache() || m_lsc_closed || g_instance.distribute_test_param_instance->elevel == PANIC); + Assert(!EnableGlobalSysCache() || m_lsc_closed || t_thrd.role == DMS_WORKER || + g_instance.distribute_test_param_instance->elevel == PANIC); } void setCloseFlag(bool value) { @@ -213,6 +214,14 @@ void CreateLocalSysDBCache() #endif } } + +#if defined(USE_ASSERT_CHECKING) && !defined(ENABLE_MEMORY_CHECK) +void CloseLSCCheck() +{ + lsc_close_check.setCloseFlag(true); +} +#endif + static void ReleaseBadPtrList(bool isCommit); static void ThreadNodeGroupCallback(Datum arg, int cacheid, uint32 hashvalue) { diff --git a/src/common/backend/utils/cache/partcache.cpp b/src/common/backend/utils/cache/partcache.cpp index eaefb842f..a1f45020f 100644 --- a/src/common/backend/utils/cache/partcache.cpp +++ b/src/common/backend/utils/cache/partcache.cpp @@ -1769,8 +1769,10 @@ void PartitionSetNewRelfilenode(Relation parent, Partition part, TransactionId f /* segment storage */ Assert(parent->storage_type == SEGMENT_PAGE); isbucket = BUCKET_OID_IS_VALID(parent->rd_bucketoid) && !RelationIsCrossBucketIndex(parent); + Oid database_id = (ConvertToRelfilenodeTblspcOid(part->pd_part->reltablespace) == GLOBALTABLESPACE_OID) ? + InvalidOid : u_sess->proc_cxt.MyDatabaseId; newrelfilenode = seg_alloc_segment(ConvertToRelfilenodeTblspcOid(part->pd_part->reltablespace), - u_sess->proc_cxt.MyDatabaseId, isbucket, InvalidBlockNumber); + database_id, isbucket, InvalidBlockNumber); } diff --git a/src/common/backend/utils/cache/relcache.cpp b/src/common/backend/utils/cache/relcache.cpp index e852b7402..d88634a69 100644 --- a/src/common/backend/utils/cache/relcache.cpp +++ b/src/common/backend/utils/cache/relcache.cpp @@ -181,6 +181,7 @@ #include "storage/page_compression.h" #include "storage/smgr/smgr.h" #include "storage/smgr/segment.h" +#include "storage/file/fio_device.h" #include "threadpool/threadpool.h" #include "storage/tcap.h" #include "utils/array.h" @@ -3077,7 +3078,11 @@ extern void formrdesc(const char* relationName, Oid relationReltype, bool isshar if (IsBootstrapProcessingMode()) RelationMapUpdateMap(RelationGetRelid(relation), RelationGetRelid(relation), isshared, true); - relation->storage_type = HEAP_DISK; + if (t_thrd.shemem_ptr_cxt.ControlFile->bootstrap_segment) { + relation->storage_type = SEGMENT_PAGE; + } else { + relation->storage_type = HEAP_DISK; + } /* * initialize the relation lock manager information @@ -4724,8 +4729,10 @@ void RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid, Multi } else { /* segment storage */ isbucket = BUCKET_OID_IS_VALID(relation->rd_bucketoid) && !RelationIsCrossBucketIndex(relation); + Oid database_id = (ConvertToRelfilenodeTblspcOid(relation->rd_rel->reltablespace) == GLOBALTABLESPACE_OID) ? + InvalidOid : u_sess->proc_cxt.MyDatabaseId; newrelfilenode = seg_alloc_segment(ConvertToRelfilenodeTblspcOid(relation->rd_rel->reltablespace), - u_sess->proc_cxt.MyDatabaseId, isbucket, InvalidBlockNumber); + database_id, isbucket, InvalidBlockNumber); } // We must consider cudesc relation and delta relation when it is a CStore relation @@ -7022,6 +7029,10 @@ struct PublicationActions* GetRelationPublicationActions(Relation relation) */ static bool load_relcache_init_file(bool shared) { + if (ENABLE_DMS) { + return false; + } + FILE* fp = NULL; char initfilename[MAXPGPATH]; Relation* rels = NULL; @@ -7033,7 +7044,8 @@ static bool load_relcache_init_file(bool shared) rc = snprintf_s(initfilename, sizeof(initfilename), sizeof(initfilename) - 1, - "global/%s.%u", + "%s/%s.%u", + GLOTBSDIR, RELCACHE_INIT_FILENAME, GRAND_VERSION_NUM); else @@ -7416,6 +7428,10 @@ read_failed: */ static void write_relcache_init_file(bool shared) { + if (ENABLE_DSS || SS_STANDBY_MODE) { + return; + } + FILE* fp = NULL; char tempfilename[MAXPGPATH]; char finalfilename[MAXPGPATH]; @@ -7444,7 +7460,8 @@ static void write_relcache_init_file(bool shared) rc = snprintf_s(tempfilename, sizeof(tempfilename), sizeof(tempfilename) - 1, - "global/%s.%u.%lu", + "%s/%s.%u.%lu", + GLOTBSDIR, RELCACHE_INIT_FILENAME, GRAND_VERSION_NUM, t_thrd.proc_cxt.MyProcPid); @@ -7452,7 +7469,8 @@ static void write_relcache_init_file(bool shared) rc = snprintf_s(finalfilename, sizeof(finalfilename), sizeof(finalfilename) - 1, - "global/%s.%u", + "%s/%s.%u", + GLOTBSDIR, RELCACHE_INIT_FILENAME, GRAND_VERSION_NUM); securec_check_ss(rc, "\0", "\0"); @@ -7675,12 +7693,12 @@ void RelationCacheInitFilePreInvalidate(void) if (unlink(initfilename) < 0) { /* - * The file might not be there if no backend has been started since - * the last removal. But complain about failures other than ENOENT. - * Fortunately, it's not too late to abort the transaction if we can't - * get rid of the would-be-obsolete init file. - */ - if (errno != ENOENT) + * The file might not be there if no backend has been started since + * the last removal. But complain about failures other than ENOENT. + * Fortunately, it's not too late to abort the transaction if we can't + * get rid of the would-be-obsolete init file. + */ + if (!FILE_POSSIBLY_DELETED(errno)) ereport(ERROR, (errcode_for_file_access(), errmsg("could not remove cache file \"%s\": %m", initfilename))); } } @@ -7711,17 +7729,18 @@ void RelationCacheInitFileRemove(void) * We zap the shared cache file too. In theory it can't get out of sync * enough to be a problem, but in data-corruption cases, who knows ... */ - rc = snprintf_s(path, sizeof(path), sizeof(path) - 1, "global/%s.%u", RELCACHE_INIT_FILENAME, GRAND_VERSION_NUM); + rc = snprintf_s(path, sizeof(path), sizeof(path) - 1, "%s/%s.%u", + GLOTBSDIR, RELCACHE_INIT_FILENAME, GRAND_VERSION_NUM); securec_check_ss(rc, "\0", "\0"); unlink_initfile(path); /* Scan everything in the default tablespace */ - RelationCacheInitFileRemoveInDir("base"); + RelationCacheInitFileRemoveInDir(DEFTBSDIR); /* Scan the tablespace link directory to find non-default tablespaces */ - dir = AllocateDir(tblspcdir); + dir = AllocateDir(TBLSPCDIR); if (dir == NULL) { - ereport(LOG, (errmsg("could not open tablespace link directory \"%s\": %m", tblspcdir))); + ereport(LOG, (errmsg("could not open tablespace link directory \"%s\": %m", TBLSPCDIR))); return; } @@ -7730,17 +7749,32 @@ void RelationCacheInitFileRemove(void) /* Scan the tablespace dir for per-database dirs */ #ifdef PGXC /* Postgres-XC tablespaces include node name in path */ + if (ENABLE_DSS) { + rc = snprintf_s(path, + sizeof(path), + sizeof(path) - 1, + "%s/%s/%s", + TBLSPCDIR, + de->d_name, + TABLESPACE_VERSION_DIRECTORY); + } else { + rc = snprintf_s(path, + sizeof(path), + sizeof(path) - 1, + "%s/%s/%s_%s", + TBLSPCDIR, + de->d_name, + TABLESPACE_VERSION_DIRECTORY, + g_instance.attr.attr_common.PGXCNodeName); + } +#else rc = snprintf_s(path, sizeof(path), sizeof(path) - 1, - "%s/%s/%s_%s", - tblspcdir, + "%s/%s/%s", + TBLSPCDIR, de->d_name, - TABLESPACE_VERSION_DIRECTORY, - g_instance.attr.attr_common.PGXCNodeName); -#else - rc = snprintf_s( - path, sizeof(path), sizeof(path) - 1, "%s/%s/%s", tblspcdir, de->d_name, TABLESPACE_VERSION_DIRECTORY); + TABLESPACE_VERSION_DIRECTORY); #endif securec_check_ss(rc, "\0", "\0"); RelationCacheInitFileRemoveInDir(path); @@ -7788,7 +7822,7 @@ static void unlink_initfile(const char* initfilename) { if (unlink(initfilename) < 0) { /* It might not be there, but log any error other than ENOENT */ - if (errno != ENOENT) + if (!FILE_POSSIBLY_DELETED(errno)) ereport(LOG, (errmsg("could not remove cache file \"%s\": %m", initfilename))); } } diff --git a/src/common/backend/utils/cache/relmapper.cpp b/src/common/backend/utils/cache/relmapper.cpp index 314bcf88d..4fe12d60a 100644 --- a/src/common/backend/utils/cache/relmapper.cpp +++ b/src/common/backend/utils/cache/relmapper.cpp @@ -53,6 +53,7 @@ #include "miscadmin.h" #include "storage/smgr/fd.h" #include "storage/lock/lwlock.h" +#include "storage/file/fio_device.h" #include "utils/inval.h" #include "utils/relmapper.h" @@ -486,6 +487,17 @@ void RelationMapFinishBootstrap(void) { Assert(IsBootstrapProcessingMode()); + if (ENABLE_DSS) { + char map_file_name[MAXPGPATH]; + int rc = snprintf_s(map_file_name, sizeof(map_file_name), sizeof(map_file_name) - 1, "%s/global/%s", + g_instance.attr.attr_storage.dss_attr.ss_dss_vg_name, RELMAPPER_FILENAME); + securec_check_ss_c(rc, "\0", "\0"); + + if (dss_exist_file(map_file_name)) { + return; + } + } + knl_u_relmap_context *relmap_cxt = GetRelMapCxt(); /* Shouldn't be anything "pending" ... */ Assert(relmap_cxt->active_shared_updates->num_mappings == 0); @@ -604,10 +616,18 @@ void load_relmap_file(bool shared, RelMapFile *map) bool isNewMap; if (shared) { - rc = snprintf_s( - map_file_name[0], sizeof(map_file_name[0]), sizeof(map_file_name[0]) - 1, "global/%s", RELMAPPER_FILENAME); + rc = snprintf_s(map_file_name[0], + sizeof(map_file_name[0]), + sizeof(map_file_name[0]) - 1, + "%s/%s", + GLOTBSDIR, + RELMAPPER_FILENAME); securec_check_ss(rc, "\0", "\0"); - rc = snprintf_s(map_file_name[1], sizeof(map_file_name[1]), sizeof(map_file_name[1]) - 1, "global/%s", + rc = snprintf_s(map_file_name[1], + sizeof(map_file_name[1]), + sizeof(map_file_name[1]) - 1, + "%s/%s", + GLOTBSDIR, RELMAPPER_FILENAME_BAK); securec_check_ss(rc, "\0", "\0"); } else { @@ -630,10 +650,9 @@ void load_relmap_file(bool shared, RelMapFile *map) // check backup file if (stat(map_file_name[1], &stat_buf) != 0) { - if (ENOENT != errno) { + if (!FILE_POSSIBLY_DELETED(errno)) { ereport(LOG, (errmsg("can not stat file \"%s\", ignore backup file", map_file_name[1]))); - } - else { + } else { fix_backup = true; // switch to exclusive lock to do backup map file recovery LWLockRelease(RelationMappingLock); @@ -761,11 +780,21 @@ static void write_relmap_file(bool shared, RelMapFile* newmap, bool write_wal, b knl_u_relmap_context *relmap_cxt = GetRelMapCxt(); for (int i = 0; i < 2; i++) { if (shared) { - rc = snprintf_s(map_file_name, sizeof(map_file_name), sizeof(map_file_name) - 1, "global/%s", fname[i]); + rc = snprintf_s(map_file_name, + sizeof(map_file_name), + sizeof(map_file_name) - 1, + "%s/%s", + GLOTBSDIR, + fname[i]); securec_check_ss_c(rc, "\0", "\0"); real_map = relmap_cxt->shared_map; } else { - rc = snprintf_s(map_file_name, sizeof(map_file_name), sizeof(map_file_name) - 1, "%s/%s", dbpath, fname[i]); + rc = snprintf_s(map_file_name, + sizeof(map_file_name), + sizeof(map_file_name) - 1, + "%s/%s", + dbpath, + fname[i]); securec_check_ss_c(rc, "\0", "\0"); real_map = relmap_cxt->local_map; } @@ -962,7 +991,12 @@ static void recover_relmap_file(bool shared, bool backupfile, RelMapFile* real_m } if (shared) { - rc = snprintf_s(map_file_name, sizeof(map_file_name), sizeof(map_file_name) - 1, "global/%s", file_name); + rc = snprintf_s(map_file_name, + sizeof(map_file_name), + sizeof(map_file_name) - 1, + "%s/%s", + GLOTBSDIR, + file_name); securec_check_ss(rc, "\0", "\0"); } else { rc = snprintf_s(map_file_name, @@ -1076,14 +1110,25 @@ void relmap_redo(XLogReaderState* record) static int WriteOldVersionRelmap(RelMapFile* map, int fd) { errno_t rc; - char* mapCache = (char*)palloc0(RELMAP_SIZE_OLD); + char* mapCache_ori = NULL; + char* mapCache = NULL; + if (ENABLE_DSS) { + mapCache_ori = (char*)palloc0(RELMAP_SIZE_OLD + ALIGNOF_BUFFER); + mapCache = (char *)BUFFERALIGN(mapCache_ori); + } else { + mapCache = (char*)palloc0(RELMAP_SIZE_OLD); + } rc = memcpy_s(mapCache, RELMAP_SIZE_OLD, map, MAPPING_LEN_OLDMAP_HEAD); securec_check(rc, "\0", "\0"); rc = memcpy_s( mapCache + MAPPING_LEN_OLDMAP_HEAD, RELMAP_SIZE_OLD - MAPPING_LEN_OLDMAP_HEAD, &(map->crc), MAPPING_LEN_TAIL); securec_check(rc, "\0", "\0"); int writeBytes = write(fd, mapCache, RELMAP_SIZE_OLD); - pfree_ext(mapCache); + if (ENABLE_DSS) { + pfree_ext(mapCache_ori); + } else { + pfree_ext(mapCache); + } return writeBytes; } @@ -1176,10 +1221,25 @@ static int32 ReadRelMapFile(RelMapFile* map, int fd, bool isNewMap) static int32 WriteRelMapFile(RelMapFile* map, int fd) { int32 writeBytes = 0; + char* unalignRelMap = NULL; + RelMapFile* relMap = NULL; + if (IS_NEW_RELMAP(map->magic)) { - writeBytes = write(fd, map, sizeof(RelMapFile)); + if (ENABLE_DSS) { + unalignRelMap = (char*)palloc0(sizeof(RelMapFile) + ALIGNOF_BUFFER); + relMap = (RelMapFile*)BUFFERALIGN(unalignRelMap); + errno_t err = memcpy_s(relMap, sizeof(RelMapFile), map, sizeof(RelMapFile)); + securec_check(err, "\0", "\0"); + } else { + relMap = map; + } + writeBytes = write(fd, relMap, sizeof(RelMapFile)); } else { writeBytes = WriteOldVersionRelmap(map, fd); } + + if (unalignRelMap != NULL) { + pfree(unalignRelMap); + } return writeBytes; } diff --git a/src/common/backend/utils/error/be_module.cpp b/src/common/backend/utils/error/be_module.cpp index 7149f2d6c..b96ed1193 100755 --- a/src/common/backend/utils/error/be_module.cpp +++ b/src/common/backend/utils/error/be_module.cpp @@ -130,6 +130,9 @@ const module_data module_map[] = {{MOD_ALL, "ALL"}, {MOD_DISASTER_READ, "DISASTER_READ"}, {MODE_REPSYNC, "REPSYNC"}, {MOD_SQLPATCH, "SQLPATCH"}, + {MOD_DMS, "DMS"}, + {MOD_DSS, "DSS_API"}, + {MOD_GPI, "GPI"}, /* add your module name above */ {MOD_MAX, "BACKEND"}}; diff --git a/src/common/backend/utils/error/elog.cpp b/src/common/backend/utils/error/elog.cpp index 43a59725e..4cc9095eb 100644 --- a/src/common/backend/utils/error/elog.cpp +++ b/src/common/backend/utils/error/elog.cpp @@ -309,7 +309,7 @@ bool errstart(int elevel, const char* filename, int lineno, const char* funcname */ for (i = 0; i <= t_thrd.log_cxt.errordata_stack_depth; i++) elevel = Max(elevel, t_thrd.log_cxt.errordata[i].elevel); - if (elevel == FATAL && t_thrd.role == JOB_WORKER) { + if (elevel == FATAL && (t_thrd.role == JOB_WORKER || t_thrd.role == DMS_WORKER)) { elevel = ERROR; } } @@ -851,11 +851,14 @@ int errcode_for_file_access(void) /* File not found */ case ENOENT: /* No such file or directory */ + case ERR_DSS_FILE_NOT_EXIST: /* No such file in dss */ + case ERR_DSS_DIR_NOT_EXIST: /* No such directory in dss */ edata->sqlerrcode = ERRCODE_UNDEFINED_FILE; break; /* Duplicate file */ case EEXIST: /* File exists */ + case ERR_DSS_DIR_CREATE_DUPLICATED: /* File or directory already existed in DSS */ edata->sqlerrcode = ERRCODE_DUPLICATE_FILE; break; @@ -870,6 +873,7 @@ int errcode_for_file_access(void) /* Insufficient resources */ case ENOSPC: /* No space left on device */ + case ERR_DSS_NO_SPACE: /* No space left on dss */ edata->sqlerrcode = ERRCODE_DISK_FULL; break; diff --git a/src/common/backend/utils/fmgr/dfmgr.cpp b/src/common/backend/utils/fmgr/dfmgr.cpp index 71599ca7e..dac5218ab 100644 --- a/src/common/backend/utils/fmgr/dfmgr.cpp +++ b/src/common/backend/utils/fmgr/dfmgr.cpp @@ -28,6 +28,7 @@ #include "utils/hsearch.h" #include "utils/memutils.h" #include "utils/syscall_lock.h" +#include "storage/file/fio_device.h" /* Max size of error message of dlopen */ #define DLERROR_MSG_MAX_LEN 512 diff --git a/src/common/backend/utils/init/globals.cpp b/src/common/backend/utils/init/globals.cpp index ab6b37510..68f397579 100644 --- a/src/common/backend/utils/init/globals.cpp +++ b/src/common/backend/utils/init/globals.cpp @@ -164,6 +164,9 @@ const uint32 STANDBY_STMTHIST_VERSION_NUM = 92777; bool useLocalXid = false; #endif +/* allow to store tables in segment storage while initdb */ +bool EnableInitDBSegment = false; + /* * EarlyBindingTLSVariables * Bind static variables to another static TLS variable's address. diff --git a/src/common/backend/utils/init/miscinit.cpp b/src/common/backend/utils/init/miscinit.cpp index 144c2b932..69ae99992 100644 --- a/src/common/backend/utils/init/miscinit.cpp +++ b/src/common/backend/utils/init/miscinit.cpp @@ -62,6 +62,7 @@ #include "utils/inval.h" #include "utils/lsyscache.h" #include "gs_policy/policy_common.h" +#include "storage/file/fio_device.h" #ifdef ENABLE_MULTIPLE_NODES #include "tsdb/compaction/compaction_entry.h" @@ -1775,6 +1776,11 @@ void ValidatePgVersion(const char* path) const char* version_string = PG_VERSION; errno_t rc; + // skip in dss mode + if (ENABLE_DSS) { + return; + } + my_major = strtol(version_string, &endptr, 10); if (*endptr == '.') @@ -1984,3 +1990,110 @@ bool contain_backend_version(uint32 version_number) { version_number < V5R2C00_START_VERSION_NUM) || (version_number >= V5R2C00_BACKEND_VERSION_NUM)); } + +void ss_initdwsubdir(char *dssdir, int instance_id) +{ + int rc; + + /* file correspanding to double write directory */ + rc = snprintf_s(g_instance.datadir_cxt.dw_subdir_cxt.dwOldPath, MAXPGPATH, MAXPGPATH - 1, + "%s/pg_doublewrite%d/pg_dw", dssdir, instance_id); + securec_check_ss(rc, "", ""); + + rc = snprintf_s(g_instance.datadir_cxt.dw_subdir_cxt.dwPathPrefix, MAXPGPATH, MAXPGPATH - 1, + "%s/pg_doublewrite%d/pg_dw_", dssdir, instance_id); + securec_check_ss(rc, "", ""); + + rc = snprintf_s(g_instance.datadir_cxt.dw_subdir_cxt.dwSinglePath, MAXPGPATH, MAXPGPATH - 1, + "%s/pg_doublewrite%d/pg_dw_single", dssdir, instance_id); + securec_check_ss(rc, "", ""); + + rc = snprintf_s(g_instance.datadir_cxt.dw_subdir_cxt.dwBuildPath, MAXPGPATH, MAXPGPATH - 1, + "%s/pg_doublewrite%d/pg_dw.build", dssdir, instance_id); + securec_check_ss(rc, "", ""); + + rc = snprintf_s(g_instance.datadir_cxt.dw_subdir_cxt.dwUpgradePath, MAXPGPATH, MAXPGPATH - 1, + "%s/pg_doublewrite%d/dw_upgrade", dssdir, instance_id); + securec_check_ss(rc, "", ""); + + rc = snprintf_s(g_instance.datadir_cxt.dw_subdir_cxt.dwBatchUpgradeMetaPath, MAXPGPATH, MAXPGPATH - 1, + "%s/pg_doublewrite%d/dw_batch_upgrade_meta", dssdir, instance_id); + securec_check_ss(rc, "", ""); + + rc = snprintf_s(g_instance.datadir_cxt.dw_subdir_cxt.dwBatchUpgradeFilePath, MAXPGPATH, MAXPGPATH - 1, + "%s/pg_doublewrite%d/dw_batch_upgrade_files", dssdir, instance_id); + securec_check_ss(rc, "", ""); + + rc = snprintf_s(g_instance.datadir_cxt.dw_subdir_cxt.dwMetaPath, MAXPGPATH, MAXPGPATH - 1, + "%s/pg_doublewrite%d/pg_dw_meta", dssdir, instance_id); + securec_check_ss(rc, "", ""); + + rc = snprintf_s(g_instance.datadir_cxt.dw_subdir_cxt.dwExtChunkPath, MAXPGPATH, MAXPGPATH - 1, + "%s/pg_doublewrite%d/pg_dw_ext_chunk", dssdir, instance_id); + securec_check_ss(rc, "", ""); + + g_instance.datadir_cxt.dw_subdir_cxt.dwStorageType = (uint8)DEV_TYPE_DSS; +} + +/* + * Check whether dss connect is successful. + */ +void initDSSConf(void) +{ + if (!ENABLE_DSS) { + return; + } + if (!dss_exist_dir(g_instance.attr.attr_storage.dss_attr.ss_dss_vg_name)) { + ereport(FATAL, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("Could not connect dssserver, vgname: \"%s\", socketpath: \"%s\"", + g_instance.attr.attr_storage.dss_attr.ss_dss_vg_name, + g_instance.attr.attr_storage.dss_attr.ss_dss_conn_path), + errhint("Check vgname and socketpath and restart later."))); + } else { + errno_t rc = EOK; + char *dssdir = g_instance.attr.attr_storage.dss_attr.ss_dss_vg_name; + + rc = snprintf_s(g_instance.datadir_cxt.baseDir, MAXPGPATH, MAXPGPATH - 1, "%s/base", dssdir); + securec_check_ss(rc, "", ""); + + rc = snprintf_s(g_instance.datadir_cxt.globalDir, MAXPGPATH, MAXPGPATH - 1, "%s/global", dssdir); + securec_check_ss(rc, "", ""); + + rc = snprintf_s(g_instance.datadir_cxt.locationDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_location", dssdir); + securec_check_ss(rc, "", ""); + + rc = snprintf_s(g_instance.datadir_cxt.tblspcDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_tblspc", dssdir); + securec_check_ss(rc, "", ""); + + rc = snprintf_s(g_instance.datadir_cxt.clogDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_clog", dssdir); + securec_check_ss(rc, "", ""); + + rc = snprintf_s(g_instance.datadir_cxt.csnlogDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_csnlog", dssdir); + securec_check_ss(rc, "", ""); + + rc = snprintf_s(g_instance.datadir_cxt.serialDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_serial", dssdir); + securec_check_ss(rc, "", ""); + + rc = snprintf_s(g_instance.datadir_cxt.twophaseDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_twophase", dssdir); + securec_check_ss(rc, "", ""); + + rc = snprintf_s(g_instance.datadir_cxt.multixactDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_multixact", dssdir); + securec_check_ss(rc, "", ""); + + rc = snprintf_s(g_instance.datadir_cxt.xlogDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_xlog%d", dssdir, + g_instance.attr.attr_storage.dms_attr.instance_id); + securec_check_ss(rc, "", ""); + + rc = snprintf_s(g_instance.datadir_cxt.controlPath, MAXPGPATH, MAXPGPATH - 1, "%s/pg_control", dssdir); + securec_check_ss(rc, "", ""); + + rc = snprintf_s(g_instance.datadir_cxt.controlBakPath, MAXPGPATH, MAXPGPATH - 1, "%s/pg_control.backup", + dssdir); + securec_check_ss(rc, "", ""); + + ss_initdwsubdir(dssdir, g_instance.attr.attr_storage.dms_attr.instance_id); + } + + /* set xlog seg size to 1GB */ + XLogSegmentSize = DSS_XLOG_SEG_SIZE; +} diff --git a/src/common/backend/utils/init/postinit.cpp b/src/common/backend/utils/init/postinit.cpp index 32586eded..fddce1fc6 100644 --- a/src/common/backend/utils/init/postinit.cpp +++ b/src/common/backend/utils/init/postinit.cpp @@ -72,6 +72,7 @@ #include "storage/procsignal.h" #include "storage/sinvaladt.h" #include "storage/smgr/smgr.h" +#include "storage/file/fio_device.h" #include "tcop/tcopprot.h" #include "threadpool/threadpool.h" #include "utils/acl.h" @@ -609,7 +610,9 @@ void BaseInit(void) InitSync(); smgrinit(); InitBufferPoolAccess(); - undo::UndoLogInit(); + if (!ENABLE_DSS) { + undo::UndoLogInit(); + } } /* ------------------------------------- @@ -2657,7 +2660,7 @@ void PostgresInitializer::SetDatabasePath() securec_check_ss(rcs, "\0", "\0"); pgaudit_user_login(FALSE, (char*)m_username, m_details); - if (errno == ENOENT) + if (FILE_POSSIBLY_DELETED(errno)) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", m_dbname), @@ -2915,5 +2918,3 @@ void PostgresInitializer::InitBarrierCreator() return; } - - diff --git a/src/common/backend/utils/misc/guc.cpp b/src/common/backend/utils/misc/guc.cpp index 4f002d910..98ad0c4b7 100755 --- a/src/common/backend/utils/misc/guc.cpp +++ b/src/common/backend/utils/misc/guc.cpp @@ -188,8 +188,10 @@ #define WRITE_CONFIG_LOCK_LEN (1024 * 1024) #ifdef EXEC_BACKEND -#define CONFIG_EXEC_PARAMS "global/config_exec_params" -#define CONFIG_EXEC_PARAMS_NEW "global/config_exec_params.new" +#define CONFIG_EXEC_PARAMS (g_instance.attr.attr_storage.dss_attr.ss_enable_dss ? \ + ((char*)"config_exec_params") : ((char*)"global/config_exec_params")) +#define CONFIG_EXEC_PARAMS_NEW (g_instance.attr.attr_storage.dss_attr.ss_enable_dss ? \ + ((char*)"config_exec_params.new") : ((char*)"global/config_exec_params.new")) #endif /* upper limit for GUC variables measured in kilobytes of memory */ @@ -445,6 +447,8 @@ static void assign_syslog_facility(int newval, void* extra); static void assign_syslog_ident(const char* newval, void* extra); static void assign_session_replication_role(int newval, void* extra); static bool check_client_min_messages(int* newval, void** extra, GucSource source); +static bool check_default_transaction_isolation(int* newval, void** extra, GucSource source); +static bool check_enable_stmt_track(bool* newval, void** extra, GucSource source); static bool check_debug_assertions(bool* newval, void** extra, GucSource source); #ifdef USE_BONJOUR static bool check_bonjour(bool* newval, void** extra, GucSource source); @@ -999,6 +1003,8 @@ const char* const config_group_names[] = { gettext_noop("Instruments Options"), gettext_noop("Column Encryption"), gettext_noop("Compress Options"), + /* SHARED_STORAGE_OPTIONS */ + gettext_noop("Shared Storage Options"), #ifdef PGXC /* DATA_NODES */ gettext_noop("Datanodes and Connection Pooling"), @@ -1125,7 +1131,7 @@ static void InitConfigureNamesBool() gettext_noop("Enable full/slow sql feature"), NULL}, &u_sess->attr.attr_common.enable_stmt_track, true, - NULL, + check_enable_stmt_track, NULL, NULL}, {{"track_stmt_parameter", @@ -4056,7 +4062,7 @@ static void InitConfigureNamesEnum() &u_sess->attr.attr_common.DefaultXactIsoLevel, XACT_READ_COMMITTED, isolation_level_options, - NULL, + check_default_transaction_isolation, NULL, NULL}, @@ -5058,6 +5064,32 @@ static int guc_name_compare(const char* namea, const char* nameb) return 0; } +static void parseDmsInstanceCount() +{ + if (!ENABLE_DMS) { + return; + } + + char *dms_url = g_instance.attr.attr_storage.dms_attr.interconnect_url; + List *l = NULL; + char *url = pstrdup(dms_url); + if (!SplitIdentifierString(url, ',', &l)) { + pfree(url); + g_instance.attr.attr_storage.dms_attr.inst_count = 1; + return; + } + + if (list_length(l) == 0 || list_length(l) > DMS_MAX_INSTANCE) { + pfree(url); + g_instance.attr.attr_storage.dms_attr.inst_count = 1; + return; + } + + g_instance.attr.attr_storage.dms_attr.inst_count = list_length(l); + pfree(url); + return; +} + /* * Initiaize Postmaster level GUC options during postmaster proc. * @@ -5072,6 +5104,11 @@ void InitializePostmasterGUC() g_instance.attr.attr_storage.enable_gtm_free = true; #endif g_instance.attr.attr_network.PoolerPort = g_instance.attr.attr_network.PostPortNumber + 1; + parseDmsInstanceCount(); +#ifndef USE_ASSERT_CHECKING + /* in Release, this param is ON and undisclosed */ + g_instance.attr.attr_storage.dms_attr.enable_reform = true; +#endif } /* @@ -11196,6 +11233,23 @@ static bool check_client_min_messages(int* newval, void** extra, GucSource sourc return true; } +static bool check_default_transaction_isolation(int *newval, void **extra, GucSource source) +{ + if (ENABLE_DMS && *newval != XACT_READ_COMMITTED) { + ereport(ERROR, (errmsg("Only support read committed transaction isolation level while DMS and DSS enabled"))); + return false; + } + return true; +} + +static bool check_enable_stmt_track(bool *newval, void **extra, GucSource source) +{ + if (ENABLE_DMS && !SS_MY_INST_IS_MASTER) { + *newval = false; + } + return true; +} + static bool check_debug_assertions(bool* newval, void** extra, GucSource source) { #ifndef USE_ASSERT_CHECKING diff --git a/src/common/backend/utils/misc/guc/guc_storage.cpp b/src/common/backend/utils/misc/guc/guc_storage.cpp index 9c7c5e936..aeca4efcd 100755 --- a/src/common/backend/utils/misc/guc/guc_storage.cpp +++ b/src/common/backend/utils/misc/guc/guc_storage.cpp @@ -204,6 +204,12 @@ static bool check_and_assign_namespace_oids(List* elemlist); static bool check_and_assign_general_oids(List* elemlist); static int GetLengthAndCheckReplConn(const char* ConnInfoList); +static bool check_ss_interconnect_type(char **newval, void **extra, GucSource source); +static bool check_ss_rdma_work_config(char** newval, void** extra, GucSource source); +static bool check_ss_dss_vg_name(char** newval, void** extra, GucSource source); +static bool check_ss_dss_conn_path(char** newval, void** extra, GucSource source); +static bool check_ss_enable_ssl(bool* newval, void** extra, GucSource source); + #ifndef ENABLE_MULTIPLE_NODES static void assign_dcf_election_timeout(int newval, void* extra); static void assign_dcf_auto_elc_priority_en(int newval, void* extra); @@ -975,6 +981,83 @@ static void InitStorageConfigureNamesBool() NULL, NULL}, + {{"ss_enable_dss", + PGC_POSTMASTER, + NODE_SINGLENODE, + SHARED_STORAGE_OPTIONS, + gettext_noop("Whether use dss"), + NULL, + GUC_SUPERUSER_ONLY}, + &g_instance.attr.attr_storage.dss_attr.ss_enable_dss, + false, + NULL, + NULL, + NULL}, + + {{"ss_enable_dms", + PGC_POSTMASTER, + NODE_SINGLENODE, + SHARED_STORAGE_OPTIONS, + gettext_noop("Whether use dms"), + NULL, + GUC_SUPERUSER_ONLY}, + &g_instance.attr.attr_storage.dms_attr.enable_dms, + false, + NULL, + NULL, + NULL}, + + {{"ss_enable_catalog_centralized", + PGC_POSTMASTER, + NODE_SINGLENODE, + SHARED_STORAGE_OPTIONS, + gettext_noop("Whether dms catalog stored centralized or distributed"), + NULL, + GUC_SUPERUSER_ONLY}, + &g_instance.attr.attr_storage.dms_attr.enable_catalog_centralized, + true, + NULL, + NULL, + NULL}, +#ifdef USE_ASSERT_CHECKING + {{"ss_enable_reform", + PGC_POSTMASTER, + NODE_SINGLENODE, + SHARED_STORAGE_OPTIONS, + gettext_noop("Whether use dms reform"), + NULL, + GUC_SUPERUSER_ONLY}, + &g_instance.attr.attr_storage.dms_attr.enable_reform, + true, + NULL, + NULL, + NULL}, +#endif + {{"ss_enable_ssl", + PGC_POSTMASTER, + NODE_SINGLENODE, + SHARED_STORAGE_OPTIONS, + gettext_noop("Whether use dms ssl"), + NULL, + GUC_SUPERUSER_ONLY}, + &g_instance.attr.attr_storage.dms_attr.enable_ssl, + true, + check_ss_enable_ssl, + NULL, + NULL}, + + {{"ss_enable_log_level", + PGC_POSTMASTER, + NODE_SINGLENODE, + SHARED_STORAGE_OPTIONS, + gettext_noop("Set dms and dss log level to LOG"), + NULL}, + &g_instance.attr.attr_storage.dms_attr.enable_log_level, + false, + NULL, + NULL, + NULL}, + #ifdef USE_ASSERT_CHECKING {{"enable_hashbucket", PGC_SUSET, @@ -3289,6 +3372,62 @@ static void InitStorageConfigureNamesInt() NULL, NULL, NULL}, + {{"ss_instance_id", + PGC_POSTMASTER, + NODE_SINGLENODE, + SHARED_STORAGE_OPTIONS, + gettext_noop("Sets the instance id."), + NULL, + GUC_SUPERUSER_ONLY}, + &g_instance.attr.attr_storage.dms_attr.instance_id, + 0, + 0, + 63, + NULL, + NULL, + NULL}, + {{"ss_interconnect_channel_count", + PGC_POSTMASTER, + NODE_SINGLENODE, + SHARED_STORAGE_OPTIONS, + gettext_noop("Sets ss mes interconnect channel count"), + NULL, + GUC_SUPERUSER_ONLY}, + &g_instance.attr.attr_storage.dms_attr.channel_count, + 16, + 1, + 32, + NULL, + NULL, + NULL}, + {{"ss_work_thread_count", + PGC_POSTMASTER, + NODE_SINGLENODE, + SHARED_STORAGE_OPTIONS, + gettext_noop("Sets ss mes work thread count"), + NULL, + GUC_SUPERUSER_ONLY}, + &g_instance.attr.attr_storage.dms_attr.work_thread_count, + 32, + 16, + 128, + NULL, + NULL, + NULL}, + {{"ss_recv_msg_pool_size", + PGC_POSTMASTER, + NODE_SINGLENODE, + SHARED_STORAGE_OPTIONS, + gettext_noop("Sets the ss receive message pool size (KB)"), + NULL, + GUC_SUPERUSER_ONLY | GUC_UNIT_KB}, + &g_instance.attr.attr_storage.dms_attr.recv_msg_pool_size, + 16 * 1024, + 1024, + 1024 * 1024, + NULL, + NULL, + NULL}, /* End-of-list marker */ {{NULL, (GucContext)0, @@ -4089,6 +4228,76 @@ static void InitStorageConfigureNamesString() check_logical_decode_options_default, assign_logical_decode_options_default, NULL}, + {{"ss_dss_vg_name", + PGC_POSTMASTER, + NODE_SINGLENODE, + SHARED_STORAGE_OPTIONS, + gettext_noop("Sets the vg name of DSS node."), + NULL}, + &g_instance.attr.attr_storage.dss_attr.ss_dss_vg_name, + "", + check_ss_dss_vg_name, + NULL, + NULL}, + {{"ss_dss_conn_path", + PGC_POSTMASTER, + NODE_SINGLENODE, + SHARED_STORAGE_OPTIONS, + gettext_noop("Sets the socket file path of DSS node."), + NULL}, + &g_instance.attr.attr_storage.dss_attr.ss_dss_conn_path, + "UDS:/tmp/.dss_unix_d_socket", + check_ss_dss_conn_path, + NULL, + NULL}, + {{"ss_interconnect_url", + PGC_POSTMASTER, + NODE_SINGLENODE, + SHARED_STORAGE_OPTIONS, + gettext_noop("Sets the url to connect to ss."), + NULL, + GUC_SUPERUSER_ONLY}, + &g_instance.attr.attr_storage.dms_attr.interconnect_url, + "0:127.0.0.1:1611", + NULL, + NULL, + NULL}, + {{"ss_interconnect_type", + PGC_POSTMASTER, + NODE_SINGLENODE, + SHARED_STORAGE_OPTIONS, + gettext_noop("Sets the type of connect to ss, range: TCP, RDMA."), + NULL, + GUC_SUPERUSER_ONLY}, + &g_instance.attr.attr_storage.dms_attr.interconnect_type, + "TCP", + check_ss_interconnect_type, + NULL, + NULL}, + {{"ss_rdma_work_config", + PGC_POSTMASTER, + NODE_SINGLENODE, + SHARED_STORAGE_OPTIONS, + gettext_noop("Sets config with digit number: A B."), + NULL, + GUC_SUPERUSER_ONLY}, + &g_instance.attr.attr_storage.dms_attr.rdma_work_config, + "", + check_ss_rdma_work_config, + NULL, + NULL}, + {{"ss_ock_log_path", + PGC_POSTMASTER, + NODE_SINGLENODE, + SHARED_STORAGE_OPTIONS, + gettext_noop("Sets config with string file path."), + NULL, + GUC_SUPERUSER_ONLY}, + &g_instance.attr.attr_storage.dms_attr.ock_log_path, + "", + NULL, + NULL, + NULL}, {{NULL, (GucContext)0, (GucNodeType)0, @@ -5380,6 +5589,119 @@ static int GetLengthAndCheckReplConn(const char* ConnInfoList) return repl_len; } +static bool check_ss_interconnect_type(char **newval, void **extra, GucSource source) +{ + return (strcmp("TCP", *newval) == 0 || strcmp("RDMA", *newval) == 0); +} + +static inline bool check_digit_text(char *str, uint32* len) +{ + uint32 idx = 0; + if (str == NULL) { + *len = 0; + return true; + } + + while (*str != '\0' && *str == ' ') { + idx++; + ++str; + } + + while (*str != '\0') { + if (*str == ' ') { + break; + } + + if (*str >= '0' && *str <= '9') { + ++str; + ++idx; + } else { + *len = 0; + return false; + } + } + + *len = idx; + return true; +} + +static bool check_ss_rdma_work_config(char** newval, void** extra, GucSource source) +{ + uint32 idx1 = 0; + uint32 idx2 = 0; + if (!check_digit_text(*newval, &idx1)) { + return false; + } + if (!check_digit_text(*newval + idx1, &idx2)) { + return false; + } + return true; +} + +static bool check_ss_dss_vg_name(char** newval, void** extra, GucSource source) +{ + char *ReplStr = NULL; + char *ptr = NULL; + if (newval == NULL || *newval == NULL || **newval == '\0') { + return true; + } + + ReplStr = pstrdup(*newval); + if (*ReplStr == '+') { + ptr = ReplStr; + while (*ptr != '\0') { + if (*ptr == '/') { + break; + } + ptr++; + } + if (*ptr == '\0') { + pfree(ReplStr); + return true; + } + } + ereport(ERROR, (errmsg("DSS vg name must start with '+' and not comtain '\\'."))); + pfree(ReplStr); + return false; +} + +static bool check_ss_dss_conn_path(char** newval, void** extra, GucSource source) +{ + char *ReplStr = NULL; + int strlen = sizeof("UDS:") - 1; + if (newval == NULL || *newval == NULL || **newval == '\0') { + ereport(ERROR, (errmsg("DSS conn path can not be NULL."))); + return false; + } + + canonicalize_path(*newval); + ReplStr = pstrdup(*newval); + if (strncmp(ReplStr, "UDS:", strlen) != 0) { + ereport(ERROR, (errmsg("DSS conn path format: \"UDS:socket_domain\""))); + return false; + } + + if (!ENABLE_DSS) { + return true; + } + + ReplStr = ReplStr + strlen; + if (is_absolute_path(ReplStr)) { + return true; + } else { + ereport(ERROR, (errmsg("DSS conn path must be absolute path."))); + } + return false; +} + +static bool check_ss_enable_ssl(bool *newval, void **extra, GucSource source) +{ + if (!*newval) { + ereport(WARNING, (errmsg("The SSL connection will be disabled during build, which brings security risks."))); + } + return true; +} + #ifndef ENABLE_MULTIPLE_NODES static void assign_dcf_election_timeout(int newval, void* extra) diff --git a/src/common/backend/utils/misc/pg_controldata.cpp b/src/common/backend/utils/misc/pg_controldata.cpp index 91f18c8d8..8c53e4061 100644 --- a/src/common/backend/utils/misc/pg_controldata.cpp +++ b/src/common/backend/utils/misc/pg_controldata.cpp @@ -140,7 +140,7 @@ ControlFileData* GetControlfile(const char *dataDir, bool *crc_ok_p) AssertArg(crc_ok_p); controlFile = (ControlFileData*)palloc(sizeof(ControlFileData)); - errno_t rc = snprintf_s(controlFilePath, MAXPGPATH, MAXPGPATH - 1, "%s/global/pg_control", dataDir); + errno_t rc = snprintf_s(controlFilePath, MAXPGPATH, MAXPGPATH - 1, "%s/%s", dataDir, XLOG_CONTROL_FILE); securec_check_ss_c(rc, "\0", "\0"); fd = OpenControlFile(controlFilePath); diff --git a/src/common/backend/utils/misc/postgresql_single.conf.sample b/src/common/backend/utils/misc/postgresql_single.conf.sample index 994c891aa..cd6d4501f 100644 --- a/src/common/backend/utils/misc/postgresql_single.conf.sample +++ b/src/common/backend/utils/misc/postgresql_single.conf.sample @@ -810,3 +810,22 @@ job_queue_processes = 10 # Number of concurrent jobs, optional: [0..1000] #plsql_show_all_error=off #enable_seqscan_fusion = off #enable_cachedplan_mgr=on + +#------------------------------------------------------------------------------ +# SHARED STORAGE OPTIONS +#------------------------------------------------------------------------------ +#ss_enable_dms = off +#ss_enable_dss = off +#ss_enable_reform = on +#ss_enable_ssl = on +#ss_enable_catalog_centralized = on +#ss_instance_id = 0 +#ss_dss_vg_name = '' +#ss_dss_conn_path = '' +#ss_interconnect_channel_count = 16 +#ss_work_thread_count = 32 +#ss_recv_msg_pool_size = 16MB +#ss_interconnect_type = 'TCP' +#ss_interconnect_url = '0:127.0.0.1:1611' +#ss_rdma_work_config = '' +#ss_ock_log_path = '' diff --git a/src/common/backend/utils/mmgr/memprot.cpp b/src/common/backend/utils/mmgr/memprot.cpp index 4dbd2013d..7947c26fc 100755 --- a/src/common/backend/utils/mmgr/memprot.cpp +++ b/src/common/backend/utils/mmgr/memprot.cpp @@ -956,6 +956,7 @@ void gs_memprot_reserved_backend(int avail_mem) int reserved_thread_count = g_instance.attr.attr_network.ReservedBackends + NUM_CMAGENT_PROCS + wal_thread_count + NUM_DCF_CALLBACK_PROCS + + NUM_DMS_CALLBACK_PROCS + g_instance.attr.attr_storage.max_wal_senders; /* reserve 10MB per-thread for sysadmin user */ reserved_mem += reserved_thread_count * 10; diff --git a/src/common/backend/utils/time/snapmgr.cpp b/src/common/backend/utils/time/snapmgr.cpp index 6631a3e41..c14d14d27 100644 --- a/src/common/backend/utils/time/snapmgr.cpp +++ b/src/common/backend/utils/time/snapmgr.cpp @@ -59,6 +59,10 @@ #include "utils/memutils.h" #include "utils/snapmgr.h" #include "utils/syscache.h" +#include "ddes/dms/ss_common_attr.h" +#include "ddes/dms/ss_transaction.h" +#include "storage/file/fio_device.h" + #ifdef PGXC #include "pgxc/pgxc.h" #endif @@ -82,19 +86,21 @@ typedef struct ActiveSnapshotElt { static THR_LOCAL bool RegisterStreamSnapshot = false; /* Define pathname of exported-snapshot files */ -#define SNAPSHOT_EXPORT_DIR "pg_snapshots" +#define SNAPSHOT_EXPORT_DIR (g_instance.datadir_cxt.snapshotsDir) /* Structure holding info about exported snapshot. */ typedef struct ExportedSnapshot { char *snapfile; Snapshot snapshot; } ExportedSnapshot; + #define XactExportFilePath(path, xid, num, suffix) \ { \ int rc = snprintf_s(path, \ sizeof(path), \ sizeof(path) - 1, \ - SNAPSHOT_EXPORT_DIR "/%08X%08X-%d%s", \ + "%s/%08X%08X-%d%s", \ + (g_instance.datadir_cxt.snapshotsDir), \ (uint32)((xid) >> 32), \ (uint32)(xid), \ (num), \ @@ -175,7 +181,7 @@ bool IsXidVisibleInGtmLiteLocalSnapshot(TransactionId xid, Snapshot snapshot, return false; } -static void RecheckXidFinish(TransactionId xid, CommitSeqNo csn) +void RecheckXidFinish(TransactionId xid, CommitSeqNo csn) { if (TransactionIdIsInProgress(xid)) { ereport(defence_errlevel(), (errmsg("transaction id %lu is still running, " @@ -208,7 +214,11 @@ bool XidVisibleInSnapshot(TransactionId xid, Snapshot snapshot, TransactionIdSta #endif loop: - csn = TransactionIdGetCommitSeqNo(xid, false, true, false, snapshot); + if (SS_STANDBY_MODE) { + csn = SSTransactionIdGetCommitSeqNo(xid, false, true, false, snapshot, sync); + } else { + csn = TransactionIdGetCommitSeqNo(xid, false, true, false, snapshot); + } #ifdef XIDVIS_DEBUG ereport(DEBUG1, @@ -228,6 +238,10 @@ loop: else return false; } else if (COMMITSEQNO_IS_COMMITTING(csn)) { + /* SS master node would've already sync-waited, so this should never happen */ + if (SS_STANDBY_MODE) { + ereport(FATAL, (errmsg("SS xid %lu's csn %lu is still COMMITTING after Master txn waited.", xid, csn))); + } if (looped) { ereport(DEBUG1, (errmsg("transaction id %lu's csn %ld is changed to ABORT after lockwait.", xid, csn))); /* recheck if transaction id is finished */ @@ -346,9 +360,17 @@ bool CommittedXidVisibleInSnapshot(TransactionId xid, Snapshot snapshot, Buffer } loop: - csn = TransactionIdGetCommitSeqNo(xid, true, true, false, snapshot); + if (SS_STANDBY_MODE) { + csn = SSTransactionIdGetCommitSeqNo(xid, true, true, false, snapshot, NULL); + } else { + csn = TransactionIdGetCommitSeqNo(xid, true, true, false, snapshot); + } if (COMMITSEQNO_IS_COMMITTING(csn)) { + /* SS master node would've already sync-waited, so this should never happen */ + if (SS_STANDBY_MODE) { + ereport(FATAL, (errmsg("SS xid %lu's csn %lu is still COMMITTING after Master txn waited.", xid, csn))); + } if (looped) { ereport(WARNING, (errmsg("committed transaction id %lu's csn %lu" @@ -1544,7 +1566,7 @@ void ImportSnapshot(const char* idstr) ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid snapshot identifier: \"%s\"", idstr))); /* OK, read the file */ - int rc = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, SNAPSHOT_EXPORT_DIR "/%s", idstr); + int rc = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "%s/%s", SNAPSHOT_EXPORT_DIR, idstr); securec_check_ss(rc, "", ""); f = AllocateFile(path, PG_BINARY_R); @@ -1668,7 +1690,7 @@ void DeleteAllExportedSnapshotFiles(void) if (strcmp(s_de->d_name, ".") == 0 || strcmp(s_de->d_name, "..") == 0) continue; - rc = snprintf_s(buf, MAXPGPATH, MAXPGPATH - 1, SNAPSHOT_EXPORT_DIR "/%s", s_de->d_name); + rc = snprintf_s(buf, MAXPGPATH, MAXPGPATH - 1, "%s/%s", SNAPSHOT_EXPORT_DIR, s_de->d_name); securec_check_ss(rc, "", ""); /* Again, unlink failure is not worthy of FATAL */ if (unlink(buf)) diff --git a/src/common/interfaces/libpq/CMakeLists.txt b/src/common/interfaces/libpq/CMakeLists.txt index 1528af3b3..420c94075 100755 --- a/src/common/interfaces/libpq/CMakeLists.txt +++ b/src/common/interfaces/libpq/CMakeLists.txt @@ -50,6 +50,8 @@ execute_process( COMMAND ln -fs ${PROJECT_SRC_DIR}/common/backend/libpq/md5.cpp ${CMAKE_CURRENT_SOURCE_DIR}/md5.cpp COMMAND ln -fs ${PROJECT_SRC_DIR}/common/backend/libpq/sha2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sha2.cpp COMMAND ln -fs ${PROJECT_SRC_DIR}/common/port/pgstrcasecmp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pgstrcasecmp.cpp + COMMAND ln -fs ${PROJECT_SRC_DIR}/gausskernel/storage/file/fio_device.cpp ${CMAKE_CURRENT_SOURCE_DIR}/fio_device.cpp + COMMAND ln -fs ${PROJECT_SRC_DIR}/gausskernel/storage/dss/fio_dss.cpp ${CMAKE_CURRENT_SOURCE_DIR}/fio_dss.cpp ) # libpq.a @@ -87,6 +89,9 @@ list(APPEND TGT_pq_SRC ${CMAKE_CURRENT_SOURCE_DIR}/frontend_parser/fe-wchar.cpp ${CMAKE_CURRENT_SOURCE_DIR}/frontend_parser/frontend_mbutils.cpp ${CMAKE_CURRENT_SOURCE_DIR}/frontend_parser/wstrncmp.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/fio_device.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/fio_dss.cpp + ) set(TGT_pq_INC @@ -98,6 +103,7 @@ set(TGT_pq_INC ${LIBEDIT_INCLUDE_PATH} ${ZLIB_INCLUDE_PATH} ${PROJECT_SRC_DIR}/include/libpq + ${PROJECT_SRC_DIR}/include/storage/file if(NOT "${ENABLE_LITE_MODE}" STREQUAL "ON") ${JAVA_HOME}/include ${JAVA_HOME}/include/linux @@ -142,6 +148,8 @@ execute_process( COMMAND ln -fs ${PROJECT_SRC_DIR}/common/interfaces/libpq/fe-protocol2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libpq_ce/fe-protocol2.cpp COMMAND ln -fs ${PROJECT_SRC_DIR}/common/interfaces/libpq/fe-protocol3.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libpq_ce/fe-protocol3.cpp COMMAND ln -fs ${PROJECT_SRC_DIR}/common/interfaces/libpq/fe-secure.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libpq_ce/fe-secure.cpp + COMMAND ln -fs ${PROJECT_SRC_DIR}/gausskernel/storage/file/fio_device.cpp ${CMAKE_CURRENT_SOURCE_DIR}/fio_device.cpp + COMMAND ln -fs ${PROJECT_SRC_DIR}/gausskernel/storage/dss/fio_dss.cpp ${CMAKE_CURRENT_SOURCE_DIR}/fio_dss.cpp ) SET(TGT_pq_ce_INC @@ -187,6 +195,9 @@ list(APPEND TGT_pq_ce_SRC ${CMAKE_CURRENT_SOURCE_DIR}/gs_syscall_lock.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gs_readdir.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cl_state.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/fio_device.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/fio_dss.cpp + ) set(pq_ce_DEF_OPTIONS ${MACRO_OPTIONS} -DFRONTEND -DFRONTEND_PARSER -DUNSAFE_STAT_OK -DHAVE_CE -DWORDS_BIGENDIAN -DSO_MAJOR_VERSION=5) set(pq_ce_COMPILE_OPTIONS ${OPTIMIZE_OPTIONS} ${OS_OPTIONS} ${PROTECT_OPTIONS} ${WARNING_OPTIONS} ${LIB_SECURE_OPTIONS} ${CHECK_OPTIONS} -fstack-protector-all) diff --git a/src/common/interfaces/libpq/Makefile b/src/common/interfaces/libpq/Makefile index 58ce13ee7..2ed81c9fa 100644 --- a/src/common/interfaces/libpq/Makefile +++ b/src/common/interfaces/libpq/Makefile @@ -63,7 +63,8 @@ override CFLAGS := $(filter-out -fPIE, $(CFLAGS)) -fPIC -fstack-protector-all # OBJS from this file. OBJS= fe-auth.o fe-connect.o fe-exec.o fe-misc.o fe-print.o fe-lobj.o \ fe-protocol2.o fe-protocol3.o pqexpbuffer.o pqsignal.o fe-secure.o \ - libpq-events.o + libpq-events.o \ + $(top_builddir)/src/gausskernel/storage/dss/fio_dss.o $(top_builddir)/src/gausskernel/storage/file/fio_device.o # libpgport C files we always use OBJS += chklocale.o inet_net_ntop.o noblock.o pgstrcasecmp.o thread.o cipher.o path.o pgsleep.o # libpgport C files that are needed if identified by configure diff --git a/src/common/pl/plpgsql/src/pl_handler.cpp b/src/common/pl/plpgsql/src/pl_handler.cpp index 64d1e0c2e..b47a211ea 100755 --- a/src/common/pl/plpgsql/src/pl_handler.cpp +++ b/src/common/pl/plpgsql/src/pl_handler.cpp @@ -209,6 +209,7 @@ void InsertGsSource(Oid objId, Oid nspid, const char* name, const char* type, bo "EXCEPTION WHEN OTHERS THEN NULL; \n"); appendStringInfoString(&str, "end;"); List* rawParseList = raw_parser(str.data); + pfree_ext(str.data); DoStmt* stmt = (DoStmt *)linitial(rawParseList); int save_compile_status = getCompileStatus(); int save_compile_list_length = list_length(u_sess->plsql_cxt.compile_context_list); @@ -242,7 +243,6 @@ void InsertGsSource(Oid objId, Oid nspid, const char* name, const char* type, bo if (temp != NULL) { MemoryContextSwitchTo(temp); } - pfree_ext(str.data); } static void PkgInsertGsSource(Oid pkgOid, bool isSpec, bool status) { diff --git a/src/common/port/CMakeLists.txt b/src/common/port/CMakeLists.txt index 92f207efd..735339d33 100755 --- a/src/common/port/CMakeLists.txt +++ b/src/common/port/CMakeLists.txt @@ -39,6 +39,11 @@ execute_process( COMMAND ln -fs ${CMAKE_CURRENT_SOURCE_DIR}/crc32_arm_parallel.S ${CMAKE_CURRENT_SOURCE_DIR}/port_srv/crc32_arm_parallel.S ) +execute_process( + COMMAND ln -fs ${PROJECT_SRC_DIR}/gausskernel/storage/file/fio_device.cpp ${CMAKE_CURRENT_SOURCE_DIR}/fio_device.cpp + COMMAND ln -fs ${PROJECT_SRC_DIR}/gausskernel/storage/dss/fio_dss.cpp ${CMAKE_CURRENT_SOURCE_DIR}/fio_dss.cpp +) + list(APPEND TGT_port_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fls.cpp ${CMAKE_CURRENT_SOURCE_DIR}/strlcat.cpp @@ -71,7 +76,10 @@ list(APPEND TGT_port_SRC ${CMAKE_CURRENT_SOURCE_DIR}/gs_strerror.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gs_syscall_lock.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gs_system.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/cipher.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/cipher.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/tool_common.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/fio_device.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/fio_dss.cpp ) # aarch64 @@ -138,6 +146,8 @@ list(APPEND TGT_pgport_srv_SRC ${CMAKE_CURRENT_SOURCE_DIR}/port_srv/gs_syscall_lock.cpp ${CMAKE_CURRENT_SOURCE_DIR}/port_srv/gs_system.cpp ${CMAKE_CURRENT_SOURCE_DIR}/port_srv/cipher.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/fio_device.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/fio_dss.cpp ) # aarch64 @@ -199,6 +209,8 @@ set(TGT_pgport_tool_SRC ${CMAKE_CURRENT_SOURCE_DIR}/gs_syscall_lock.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gs_system.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cipher.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/fio_device.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/fio_dss.cpp ) SET(TGT_pgport_tool_INC ${PROJECT_SRC_DIR}/common/backend diff --git a/src/common/port/Makefile b/src/common/port/Makefile index 51767b7e3..8911d6f8c 100644 --- a/src/common/port/Makefile +++ b/src/common/port/Makefile @@ -43,13 +43,15 @@ ifneq "$(MAKECMDGOALS)" "clean" endif endif endif + OBJS = $(LIBOBJS) pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_choose.o chklocale.o dirmod.o erand48.o exec.o fls.o inet_net_ntop.o \ noblock.o path.o pgcheckdir.o pgmkdirp.o pgsleep.o \ pgstrcasecmp.o qsort.o qsort_arg.o sprompt.o thread.o flock.o pgstrcasestr.o\ gs_thread.o gs_env_r.o gs_getopt_r.o \ gs_readdir.o gs_strerror.o gs_syscall_lock.o \ gs_system.o \ - cipher.o + cipher.o tool_common.o \ + $(top_builddir)/src/gausskernel/storage/file/fio_device.o $(top_builddir)/src/gausskernel/storage/dss/fio_dss.o ifeq "${host_cpu}" "aarch64" OBJS = $(LIBOBJS) pg_crc32c_choose.o chklocale.o dirmod.o erand48.o exec.o fls.o inet_net_ntop.o \ @@ -58,7 +60,8 @@ OBJS = $(LIBOBJS) pg_crc32c_choose.o chklocale.o dirmod.o erand48.o exec.o fls.o gs_thread.o gs_env_r.o gs_getopt_r.o \ gs_readdir.o gs_strerror.o gs_syscall_lock.o \ gs_system.o \ - cipher.o + cipher.o tool_common.o \ + $(top_builddir)/src/gausskernel/storage/file/fio_device.o $(top_builddir)/src/gausskernel/storage/dss/fio_dss.o endif # foo_srv.o and foo.o are both built from foo.c, but only foo.o has -DFRONTEND OBJS_SRV = $(OBJS:%.o=%_srv.o) @@ -71,7 +74,8 @@ endif OBJS_TOOL = fls.o strlcat.o strlcpy.o getpeereid.o chklocale.o dirmod.o erand48.o exec.o inet_net_ntop.o \ noblock.o path.o pgcheckdir.o pgmkdirp.o pgsleep.o pgstrcasecmp.o qsort.o qsort_arg.o sprompt.o thread.o flock.o pgstrcasestr.o \ - gs_thread.o gs_env_r.o gs_getopt_r.o gs_readdir.o gs_strerror.o gs_syscall_lock.o gs_system.o cipher.o + gs_thread.o gs_env_r.o gs_getopt_r.o gs_readdir.o gs_strerror.o gs_syscall_lock.o gs_system.o cipher.o \ + $(top_builddir)/src/gausskernel/storage/file/fio_device.o $(top_builddir)/src/gausskernel/storage/dss/fio_dss.o tool_common.o all: libpgport.a libpgport_srv.a libpgport_tool.so diff --git a/src/common/port/dirmod.cpp b/src/common/port/dirmod.cpp index 35e096634..392305b35 100644 --- a/src/common/port/dirmod.cpp +++ b/src/common/port/dirmod.cpp @@ -42,6 +42,7 @@ #endif #endif +#include "storage/file/fio_device.h" #include "securec_check.h" #ifndef FRONTEND @@ -462,7 +463,7 @@ bool rmtree(const char* path, bool rmtopdir, bool noent_ok) * the bgwriter receives the message in time. */ if (lstat(pathbuf, &statbuf) != 0) { - if (errno != ENOENT) { + if (!is_file_delete(errno)) { #ifndef FRONTEND ereport(WARNING, (errmsg("could not stat file or directory \"%s\": %m", pathbuf))); #else diff --git a/src/common/port/gs_readdir.cpp b/src/common/port/gs_readdir.cpp index 29d5f5293..d28464085 100644 --- a/src/common/port/gs_readdir.cpp +++ b/src/common/port/gs_readdir.cpp @@ -28,6 +28,8 @@ #include #include #include +#include "storage/file/fio_device.h" + #ifndef LEN_D_NAME #define LEN_D_NAME offsetof(struct dirent, d_name) diff --git a/src/common/port/path.cpp b/src/common/port/path.cpp index 92c319db8..15ff610fb 100644 --- a/src/common/port/path.cpp +++ b/src/common/port/path.cpp @@ -429,6 +429,21 @@ bool path_is_prefix_of_path(const char* path1, const char* path2) return false; } +void get_top_path(char *path) +{ + char *p = NULL; + + if (path == NULL || path[0] == '\0') { + return; + } + + /* fetch first dir sep */ + for (p = path; !IS_DIR_SEP(*p) && p[0] != '\0'; p++) + ; + + *p = '\0'; +} + /* * Extracts the actual name of the program as called - * stripped of .exe suffix if any diff --git a/src/common/port/pgcheckdir.cpp b/src/common/port/pgcheckdir.cpp index 0eb72c851..206f85066 100644 --- a/src/common/port/pgcheckdir.cpp +++ b/src/common/port/pgcheckdir.cpp @@ -15,6 +15,7 @@ #include "c.h" #include +#include "storage/file/fio_device.h" /* * Test to see if a directory exists and is empty or not. @@ -32,11 +33,11 @@ int pg_check_dir(const char* dir) struct dirent* file = NULL; errno = 0; - chkdir = opendir(dir); - if (chkdir == NULL) - return (errno == ENOENT) ? 0 : -1; + if (chkdir == NULL) { + return (is_file_delete(errno) ? 0 : -1); + } while ((file = gs_readdir(chkdir)) != NULL) { if (strcmp(".", file->d_name) == 0 || strcmp("..", file->d_name) == 0) { diff --git a/src/common/port/pgmkdirp.cpp b/src/common/port/pgmkdirp.cpp index 5635c7520..d1989cc4f 100644 --- a/src/common/port/pgmkdirp.cpp +++ b/src/common/port/pgmkdirp.cpp @@ -34,6 +34,7 @@ #include #include +#include "storage/file/fio_device.h" /* * main function to make dir diff --git a/src/common/port/tool_common.cpp b/src/common/port/tool_common.cpp new file mode 100644 index 000000000..888d80888 --- /dev/null +++ b/src/common/port/tool_common.cpp @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2022 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * tool_common.cpp + * + * IDENTIFICATION + * src/common/port/tool_common.cpp + * + * --------------------------------------------------------------------------------------- + */ + +#include "securec.h" +#include "securec_check.h" +#include "tool_common.h" + +datadir_t g_datadir; /* need init when used in first time */ + +static void initFileDataPathStruct(datadir_t *dataDir); +static void initDSSDataPathStruct(datadir_t *dataDir); + +void initDataPathStruct(bool enable_dss) +{ + if (enable_dss) { + initDSSDataPathStruct(&g_datadir); + } else { + initFileDataPathStruct(&g_datadir); + } +} + +static void initFileDataPathStruct(datadir_t *dataDir) +{ + errno_t rc = EOK; + + // dir path + rc = snprintf_s(dataDir->baseDir, MAXPGPATH, MAXPGPATH - 1, "%s/base", dataDir->pg_data); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->globalDir, MAXPGPATH, MAXPGPATH - 1, "%s/global", dataDir->pg_data); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->locationDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_location", dataDir->pg_data); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->tblspcDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_tblspc", dataDir->pg_data); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->clogDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_clog", dataDir->pg_data); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->csnlogDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_csnlog", dataDir->pg_data); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->notifyDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_notify", dataDir->pg_data); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->serialDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_serial", dataDir->pg_data); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->snapshotsDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_snapshots", dataDir->pg_data); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->twophaseDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_twophase", dataDir->pg_data); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->multixactDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_multixact", dataDir->pg_data); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->xlogDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_xlog", dataDir->pg_data); + securec_check_ss_c(rc, "", ""); + + // sub-file path + rc = snprintf_s(dataDir->controlPath, MAXPGPATH, MAXPGPATH - 1, "%s/global/pg_control", dataDir->pg_data); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->controlBakPath, MAXPGPATH, MAXPGPATH - 1, "%s/global/pg_control.backup", dataDir->pg_data); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->dwDir.dwOldPath, MAXPGPATH, MAXPGPATH - 1, "%s/global/pg_dw", dataDir->pg_data); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->dwDir.dwPathPrefix, MAXPGPATH, MAXPGPATH - 1, "%s/global/pg_dw_", dataDir->pg_data); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->dwDir.dwSinglePath, MAXPGPATH, MAXPGPATH - 1, "%s/global/pg_dw_single", dataDir->pg_data); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->dwDir.dwBuildPath, MAXPGPATH, MAXPGPATH - 1, "%s/global/pg_dw.build", dataDir->pg_data); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->dwDir.dwMetaPath, MAXPGPATH, MAXPGPATH - 1, "%s/global/pg_dw_meta", dataDir->pg_data); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->dwDir.dwUpgradePath, MAXPGPATH, MAXPGPATH - 1, "%s/global/dw_upgrade", dataDir->pg_data); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->dwDir.dwBatchUpgradeMetaPath, MAXPGPATH, MAXPGPATH - 1, "%s/global/dw_batch_upgrade_meta", + dataDir->pg_data); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->dwDir.dwBatchUpgradeFilePath, MAXPGPATH, MAXPGPATH - 1, "%s/global/dw_batch_upgrade_files", + dataDir->pg_data); + securec_check_ss_c(rc, "", ""); +} + +static void initDSSDataPathStruct(datadir_t *dataDir) +{ + errno_t rc = EOK; + + // DSS file directory (cluster owner) + rc = snprintf_s(dataDir->baseDir, MAXPGPATH, MAXPGPATH - 1, "%s/base", dataDir->dss_data); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->globalDir, MAXPGPATH, MAXPGPATH - 1, "%s/global", dataDir->dss_data); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->locationDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_location", dataDir->dss_data); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->tblspcDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_tblspc", dataDir->dss_data); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->controlPath, MAXPGPATH, MAXPGPATH - 1, "%s/pg_control", dataDir->dss_data); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->controlBakPath, MAXPGPATH, MAXPGPATH - 1, "%s/pg_control.backup", dataDir->dss_data); + securec_check_ss_c(rc, "", ""); + + // DSS file directory (instance owner) + rc = snprintf_s(dataDir->clogDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_clog%d", dataDir->dss_data, + dataDir->instance_id); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->csnlogDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_csnlog%d", dataDir->dss_data, + dataDir->instance_id); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->serialDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_serial%d", dataDir->dss_data, + dataDir->instance_id); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->snapshotsDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_snapshots%d", dataDir->dss_data, + dataDir->instance_id); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->twophaseDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_twophase%d", dataDir->dss_data, + dataDir->instance_id); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->multixactDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_multixact%d", dataDir->dss_data, + dataDir->instance_id); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->xlogDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_xlog%d", dataDir->dss_data, + dataDir->instance_id); + securec_check_ss_c(rc, "", ""); + + // Unix file directory (instance owner) + rc = snprintf_s(dataDir->dwDir.dwOldPath, MAXPGPATH, MAXPGPATH - 1, "%s/pg_doublewrite%d/pg_dw", + dataDir->pg_data, dataDir->instance_id); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->dwDir.dwPathPrefix, MAXPGPATH, MAXPGPATH - 1, "%s/pg_doublewrite%d/pg_dw_", + dataDir->pg_data, dataDir->instance_id); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->dwDir.dwSinglePath, MAXPGPATH, MAXPGPATH - 1, "%s/pg_doublewrite%d/pg_dw_single", + dataDir->pg_data, dataDir->instance_id); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->dwDir.dwBuildPath, MAXPGPATH, MAXPGPATH - 1, "%s/pg_doublewrite%d/pg_dw.build", + dataDir->pg_data, dataDir->instance_id); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->dwDir.dwUpgradePath, MAXPGPATH, MAXPGPATH - 1, "%s/pg_doublewrite%d/dw_upgrade", + dataDir->pg_data, dataDir->instance_id); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->dwDir.dwMetaPath, MAXPGPATH, MAXPGPATH - 1, "%s/pg_doublewrite%d/pg_dw_meta", + dataDir->pg_data, dataDir->instance_id); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->dwDir.dwBatchUpgradeMetaPath, MAXPGPATH, MAXPGPATH - 1, + "%s/pg_doublewrite%d/dw_batch_upgrade_meta", dataDir->pg_data, dataDir->instance_id); + securec_check_ss_c(rc, "", ""); + + rc = snprintf_s(dataDir->dwDir.dwBatchUpgradeFilePath, MAXPGPATH, MAXPGPATH - 1, + "%s/pg_doublewrite%d/dw_batch_upgrade_files", dataDir->pg_data, dataDir->instance_id); + securec_check_ss_c(rc, "", ""); +} diff --git a/src/gausskernel/CMakeLists.txt b/src/gausskernel/CMakeLists.txt index 94698e82b..5c98c8ac3 100755 --- a/src/gausskernel/CMakeLists.txt +++ b/src/gausskernel/CMakeLists.txt @@ -7,6 +7,7 @@ set(CMAKE_SKIP_RPATH TRUE) set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/bootstrap ${CMAKE_CURRENT_SOURCE_DIR}/storage + ${CMAKE_CURRENT_SOURCE_DIR}/ddes ${CMAKE_CURRENT_SOURCE_DIR}/cbb ${CMAKE_CURRENT_SOURCE_DIR}/dbmind ${CMAKE_CURRENT_SOURCE_DIR}/optimizer @@ -23,6 +24,7 @@ add_subdirectory(bootstrap) add_subdirectory(process) add_subdirectory(runtime) add_subdirectory(security) +add_subdirectory(ddes) # gaussdb SET(llvm_libs_var "") @@ -133,6 +135,7 @@ set(gaussdb_objects $ $ $ + $ ) if("${ENABLE_MULTIPLE_NODES}" STREQUAL "OFF") @@ -194,6 +197,7 @@ list(APPEND gaussdb_objects $ $ $ + $ $ $ $ diff --git a/src/gausskernel/Makefile b/src/gausskernel/Makefile index 719452f51..a22a6a162 100755 --- a/src/gausskernel/Makefile +++ b/src/gausskernel/Makefile @@ -22,7 +22,7 @@ override CXXFLAGS += $(PTHREAD_CFLAGS) # these directories: catalog lib libpq nodes parser po port regex snowball tsearch utils pgxc # under ../common # -SUBDIRS = ../common/backend bootstrap cbb optimizer process dbmind runtime security storage \ +SUBDIRS = ../common/backend bootstrap cbb optimizer process dbmind runtime security storage ddes \ $(top_builddir)/src/common/timezone $(top_builddir)/src/common/interfaces/libpq \ $(top_builddir)/contrib/file_fdw $(top_builddir)/contrib/log_fdw \ $(top_builddir)/contrib/test_decoding $(top_builddir)/contrib/mppdb_decoding \ diff --git a/src/gausskernel/bootstrap/bootstrap.cpp b/src/gausskernel/bootstrap/bootstrap.cpp index f9bb0b14d..c995d2791 100755 --- a/src/gausskernel/bootstrap/bootstrap.cpp +++ b/src/gausskernel/bootstrap/bootstrap.cpp @@ -30,6 +30,7 @@ #include "catalog/pg_collation.h" #include "catalog/pg_proc.h" #include "catalog/pg_type.h" +#include "catalog/pg_class.h" #include "libpq/pqsignal.h" #include "miscadmin.h" #include "pgstat.h" @@ -69,6 +70,9 @@ #endif #include "gssignal/gs_signal.h" +#include "storage/file/fio_device.h" +#include "storage/dss/dss_adaptor.h" +#include "storage/dss/dss_log.h" #define ALLOC(t, c) ((t*)selfpalloc0((unsigned)(c) * sizeof(t))) @@ -248,7 +252,7 @@ void BootStrapProcessMain(int argc, char* argv[]) t_thrd.bootstrap_cxt.MyAuxProcType = CheckerProcess; initOptParseContext(&optCtxt); - while ((flag = getopt_r(argc, argv, "B:c:d:D:Fr:x:g:-:", &optCtxt)) != -1) { + while ((flag = getopt_r(argc, argv, "B:c:d:D:FGr:x:g:-:", &optCtxt)) != -1) { switch (flag) { case 'B': SetConfigOption("shared_buffers", optCtxt.optarg, PGC_POSTMASTER, PGC_S_ARGV); @@ -270,6 +274,9 @@ void BootStrapProcessMain(int argc, char* argv[]) case 'F': SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV); break; + case 'G': + EnableInitDBSegment = true; + break; case 'g': SetConfigOption("xlog_file_path", optCtxt.optarg, PGC_POSTMASTER, PGC_S_ARGV); break; @@ -334,6 +341,17 @@ void BootStrapProcessMain(int argc, char* argv[]) /* If standalone, create lockfile for data directory */ if (!IsUnderPostmaster) CreateDataDirLockFile(false); + + /* Callback function for dss operator */ + if (dss_device_init(g_instance.attr.attr_storage.dss_attr.ss_dss_conn_path, + g_instance.attr.attr_storage.dss_attr.ss_enable_dss) != DSS_SUCCESS) { + ereport(PANIC, (errmsg("failed to init dss device"))); + proc_exit(1); + } + if (ENABLE_DSS) { + dss_log_init(); + } + initDSSConf(); SetProcessingMode(BootstrapProcessing); u_sess->attr.attr_common.IgnoreSystemIndexes = true; @@ -687,6 +705,16 @@ void DefineAttr(const char* name, char* type, int attnum) } +/* ---------------- + * ChangePgClassBucketValueForSegment + * ---------------- + */ +static inline void ChangePgClassBucketValueForSegment() +{ + values[Anum_pg_class_relbucket - 1] = ObjectIdGetDatum(VirtualSegmentOid); + Nulls[Anum_pg_class_relbucket - 1] = false; +} + /* ---------------- * InsertOneTuple * @@ -705,6 +733,10 @@ void InsertOneTuple(Oid objectid) if (IsBootingPgProc(t_thrd.bootstrap_cxt.boot_reldesc)) { ereport(FATAL, (errmsg("Built-in functions should not be added into pg_proc"))); } + + if (IsBootingPgClass(t_thrd.bootstrap_cxt.boot_reldesc) && EnableInitDBSegment) { + ChangePgClassBucketValueForSegment(); + } tupDesc = CreateTupleDesc(t_thrd.bootstrap_cxt.numattr, RelationGetForm(t_thrd.bootstrap_cxt.boot_reldesc)->relhasoids, t_thrd.bootstrap_cxt.attrtypes, diff --git a/src/gausskernel/cbb/instruments/ash/ash.cpp b/src/gausskernel/cbb/instruments/ash/ash.cpp index c2eed5f76..a4b7ed910 100755 --- a/src/gausskernel/cbb/instruments/ash/ash.cpp +++ b/src/gausskernel/cbb/instruments/ash/ash.cpp @@ -1212,6 +1212,9 @@ NON_EXEC_STATIC void ActiveSessionCollectMain() pgstat_report_appname("Asp"); ereport(LOG, ( errmsg("ASP thread start"))); pgstat_report_activity(STATE_IDLE, NULL); + if (g_instance.attr.attr_storage.dms_attr.enable_dms && !SS_MY_INST_IS_MASTER) { + u_sess->attr.attr_common.enable_asp = false; + } SubAspWorker(); } diff --git a/src/gausskernel/cbb/instruments/wdr/snapshot.cpp b/src/gausskernel/cbb/instruments/wdr/snapshot.cpp index d5ff492ea..29a520671 100755 --- a/src/gausskernel/cbb/instruments/wdr/snapshot.cpp +++ b/src/gausskernel/cbb/instruments/wdr/snapshot.cpp @@ -1029,18 +1029,29 @@ static void CreateStatTable(const char* query, const char* tablename) void SnapshotNameSpace::CreateSnapStatTables(void) { - const char* createTs = "create table snapshot.tables_snap_timestamp(snapshot_id bigint not null, db_name text, " - "tablename text, start_ts timestamp with time zone, end_ts timestamp with time zone)"; + StringInfoData createTs; + StringInfoData createSnapshot; const char* tablename1 = "tables_snap_timestamp"; - - CreateStatTable(createTs, tablename1); - - const char* createSnapshot = - "create table snapshot.snapshot(snapshot_id bigint not null, " - "start_ts timestamp with time zone, end_ts timestamp with time zone, primary key (snapshot_id))"; const char* tablename2 = "snapshot"; - CreateStatTable(createSnapshot, tablename2); + initStringInfo(&createTs); + appendStringInfo( + &createTs, "create table snapshot.tables_snap_timestamp(snapshot_id bigint not null, db_name text, " + "tablename text, start_ts timestamp with time zone, end_ts timestamp with time zone)"); + + initStringInfo(&createSnapshot); + appendStringInfo( + &createSnapshot, "create table snapshot.snapshot(snapshot_id bigint not null, " + "start_ts timestamp with time zone, end_ts timestamp with time zone, primary key (snapshot_id))"); + + // only allow create segment storage table when enable dss + if (ENABLE_DSS) { + appendStringInfo(&createTs, " with (segment = on)"); + appendStringInfo(&createSnapshot, " with (segment = on)"); + } + + CreateStatTable(createTs.data, tablename1); + CreateStatTable(createSnapshot.data, tablename2); } static void DropIndexes(const char* indexName) @@ -1104,6 +1115,10 @@ void SnapshotNameSpace::CreateTable(const char** views, int numViews, bool isSha views[i], snapColAttrType); } + /* only allow create segment storage table when enable dss */ + if (ENABLE_DSS) { + appendStringInfo(&query, " with (segment = on)"); + } if (!SnapshotNameSpace::ExecuteQuery(query.data, SPI_OK_UTILITY)) { ereport(ERROR, (errmodule(MOD_WDR_SNAPSHOT), errcode(ERRCODE_DATA_EXCEPTION), errmsg("create WDR snapshot data table failed"), diff --git a/src/gausskernel/cbb/utils/partition/partitionmap.cpp b/src/gausskernel/cbb/utils/partition/partitionmap.cpp index 0a2da537a..db00f36d3 100644 --- a/src/gausskernel/cbb/utils/partition/partitionmap.cpp +++ b/src/gausskernel/cbb/utils/partition/partitionmap.cpp @@ -781,6 +781,11 @@ void RelationInitPartitionMap(Relation relation, bool isSubPartition) (errcode(ERRCODE_RUN_TRANSACTION_DURING_RECOVERY), errmsg("Can not run transaction to remote nodes during recovery."))); } + + if (SSIsServerModeReadOnly()) { + ereport(ERROR, (errmsg("Can not run transaction to remote nodes at Standby with DMS enabled"))); + } + Assert(0); ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), diff --git a/src/gausskernel/ddes/CMakeLists.txt b/src/gausskernel/ddes/CMakeLists.txt new file mode 100644 index 000000000..b61b3c527 --- /dev/null +++ b/src/gausskernel/ddes/CMakeLists.txt @@ -0,0 +1,14 @@ +#This is the main CMAKE for build bin. + +set(CMAKE_VERBOSE_MAKEFILE ON) +set(CMAKE_RULE_MESSAGES OFF) +set(CMAKE_SKIP_RPATH TRUE) + +set(CMAKE_MODULE_PATH + ${PROJECT_OPENGS_DIR}/src/gausskernel/ddes/adapter +) + +install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/script/dms_contrl.sh DESTINATION bin) +install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/script/dss_contrl.sh DESTINATION bin) + +add_subdirectory(adapter) \ No newline at end of file diff --git a/src/gausskernel/ddes/Makefile b/src/gausskernel/ddes/Makefile new file mode 100644 index 000000000..00d826a7c --- /dev/null +++ b/src/gausskernel/ddes/Makefile @@ -0,0 +1,14 @@ +#--------------------------------------------------------------------------------------- +# +# IDENTIFICATION +# src/gausskernel/ddes/Makefile +# +# --------------------------------------------------------------------------------------- + +subdir = src/gausskernel/ddes +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +SUBDIRS = adapter + +include $(top_srcdir)/src/gausskernel/common.mk diff --git a/src/gausskernel/ddes/adapter/CMakeLists.txt b/src/gausskernel/ddes/adapter/CMakeLists.txt new file mode 100644 index 000000000..93592d99d --- /dev/null +++ b/src/gausskernel/ddes/adapter/CMakeLists.txt @@ -0,0 +1,11 @@ +#This is the main CMAKE for build bin. +AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} TGT_dms_adapter_SRC) + +set(TGT_dms_adapter_INC + ${PROJECT_SRC_DIR}/include +) + +set(dms_adapter_DEF_OPTIONS ${MACRO_OPTIONS} -DOPENGAUSS) +set(dms_adapter_COMPILE_OPTIONS ${OPTIMIZE_OPTIONS} ${OS_OPTIONS} ${PROTECT_OPTIONS} ${WARNING_OPTIONS} ${BIN_SECURE_OPTIONS} ${CHECK_OPTIONS}) +set(dms_adapter_LINK_OPTIONS ${BIN_LINK_OPTIONS}) +add_static_objtarget(gausskernel_ddes_dms_adapter TGT_dms_adapter_SRC TGT_dms_adapter_INC "${dms_adapter_DEF_OPTIONS}" "${dms_adapter_COMPILE_OPTIONS}" "${dms_adapter_LINK_OPTIONS}") \ No newline at end of file diff --git a/src/gausskernel/ddes/adapter/Makefile b/src/gausskernel/ddes/adapter/Makefile new file mode 100644 index 000000000..9ab198f83 --- /dev/null +++ b/src/gausskernel/ddes/adapter/Makefile @@ -0,0 +1,28 @@ +#--------------------------------------------------------------------------------------- +# +# IDENTIFICATION +# src/gausskernel/ddes/adapter/Makefile +# +# --------------------------------------------------------------------------------------- + + +subdir = src/gausskernel/ddes/adapter +top_builddir = ../../../.. + +include $(top_builddir)/src/Makefile.global + +override CPPFLAGS := -I$(top_srcdir)/src/include/ddes/dms -DOPENGAUSS $(CPPFLAGS) + +ifneq "$(MAKECMDGOALS)" "clean" + ifneq "$(MAKECMDGOALS)" "distclean" + ifneq "$(shell which g++ |grep hutaf_llt |wc -l)" "1" + -include $(DEPEND) + endif + endif +endif + +OBJS = ss_dms_bufmgr.o ss_dms_callback.o ss_dms_log_output.o ss_dms_recovery.o ss_dms.o ss_init.o \ + ss_reform_common.o ss_switchover.o ss_transaction.o + +include $(top_srcdir)/src/gausskernel/common.mk + diff --git a/src/gausskernel/ddes/adapter/ss_dms.cpp b/src/gausskernel/ddes/adapter/ss_dms.cpp new file mode 100644 index 000000000..4bf74c3cc --- /dev/null +++ b/src/gausskernel/ddes/adapter/ss_dms.cpp @@ -0,0 +1,280 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * ss_dms.cpp + * Dynamic loading of the DMS + * + * IDENTIFICATION + * src/gausskernel/ddes/adapter/ss_dms.cpp + * + * --------------------------------------------------------------------------------------- + */ + +#ifndef WIN32 +#include "dlfcn.h" +#endif + +#include "ddes/dms/ss_dms.h" +#include "utils/elog.h" + +ss_dms_func_t g_ss_dms_func; + +// return DMS_ERROR if error occurs +#define SS_RETURN_IFERR(ret) \ + do { \ + int _status_ = (ret); \ + if (SECUREC_UNLIKELY(_status_ != DMS_SUCCESS)) { \ + return _status_; \ + } \ + } while (0) + +int dms_load_symbol(char *symbol, void **sym_lib_handle) +{ +#ifndef WIN32 + const char *dlsym_err = NULL; + + *sym_lib_handle = dlsym(g_ss_dms_func.handle, symbol); + dlsym_err = dlerror(); + if (dlsym_err != NULL) { + ereport(FATAL, (errcode(ERRCODE_INVALID_OPERATION), + errmsg("incompatible library \"%s\", load %s failed, %s", SS_LIBDMS_NAME, symbol, dlsym_err))); + return DMS_ERROR; + } +#endif // !WIN32 + return DMS_SUCCESS; +} + +int dms_open_dl(void **lib_handle, char *symbol) +{ +#ifdef WIN32 + return DMS_ERROR; +#else + *lib_handle = dlopen(symbol, RTLD_LAZY); + if (*lib_handle == NULL) { + ereport(ERROR, (errcode_for_file_access(), errmsg("could not load library %s, %s", SS_LIBDMS_NAME, dlerror()))); + return DMS_ERROR; + } + return DMS_SUCCESS; +#endif +} + +void dms_close_dl(void *lib_handle) +{ +#ifndef WIN32 + (void)dlclose(lib_handle); +#endif +} + +#define DMS_LOAD_SYMBOL_FUNC(func) dms_load_symbol(#func, (void **)&g_ss_dms_func.func) + +int ss_dms_func_init() +{ + SS_RETURN_IFERR(dms_open_dl(&g_ss_dms_func.handle, (char *)SS_LIBDMS_NAME)); + + SS_RETURN_IFERR(DMS_LOAD_SYMBOL_FUNC(dms_show_version)); + char version[DMS_VERSION_MAX_LEN] = { 0 }; + g_ss_dms_func.dms_show_version((char *)version); + ereport(LOG, (errmsg("Dynamically loading the DMS library, version: \n%s", version))); + + SS_RETURN_IFERR(DMS_LOAD_SYMBOL_FUNC(dms_get_version)); + SS_RETURN_IFERR(DMS_LOAD_SYMBOL_FUNC(dms_init)); + SS_RETURN_IFERR(DMS_LOAD_SYMBOL_FUNC(dms_get_error)); + SS_RETURN_IFERR(DMS_LOAD_SYMBOL_FUNC(dms_uninit)); + SS_RETURN_IFERR(DMS_LOAD_SYMBOL_FUNC(dms_request_page)); + SS_RETURN_IFERR(DMS_LOAD_SYMBOL_FUNC(dms_broadcast_msg)); + SS_RETURN_IFERR(DMS_LOAD_SYMBOL_FUNC(dms_request_opengauss_update_xid)); + SS_RETURN_IFERR(DMS_LOAD_SYMBOL_FUNC(dms_request_opengauss_xid_csn)); + SS_RETURN_IFERR(DMS_LOAD_SYMBOL_FUNC(dms_request_opengauss_txn_status)); + SS_RETURN_IFERR(DMS_LOAD_SYMBOL_FUNC(dms_request_opengauss_txn_snapshot)); + SS_RETURN_IFERR(DMS_LOAD_SYMBOL_FUNC(dms_register_thread_init)); + SS_RETURN_IFERR(DMS_LOAD_SYMBOL_FUNC(dms_release_owner)); + SS_RETURN_IFERR(DMS_LOAD_SYMBOL_FUNC(dms_wait_reform)); + SS_RETURN_IFERR(DMS_LOAD_SYMBOL_FUNC(dms_buf_res_rebuild_drc)); + SS_RETURN_IFERR(DMS_LOAD_SYMBOL_FUNC(dms_is_recovery_session)); + SS_RETURN_IFERR(DMS_LOAD_SYMBOL_FUNC(drc_get_page_master_id)); + SS_RETURN_IFERR(DMS_LOAD_SYMBOL_FUNC(dms_release_page_batch)); + SS_RETURN_IFERR(DMS_LOAD_SYMBOL_FUNC(dms_register_ssl_decrypt_pwd)); + SS_RETURN_IFERR(DMS_LOAD_SYMBOL_FUNC(dms_set_ssl_param)); + SS_RETURN_IFERR(DMS_LOAD_SYMBOL_FUNC(dms_get_ssl_param)); + SS_RETURN_IFERR(DMS_LOAD_SYMBOL_FUNC(dms_recovery_page_need_skip)); + SS_RETURN_IFERR(DMS_LOAD_SYMBOL_FUNC(dms_reform_failed)); + SS_RETURN_IFERR(DMS_LOAD_SYMBOL_FUNC(dms_switchover)); + SS_RETURN_IFERR(DMS_LOAD_SYMBOL_FUNC(dms_drc_accessible)); + SS_RETURN_IFERR(DMS_LOAD_SYMBOL_FUNC(dms_broadcast_opengauss_ddllock)); + SS_RETURN_IFERR(DMS_LOAD_SYMBOL_FUNC(dms_reform_last_failed)); + + return DMS_SUCCESS; +} + +void ss_dms_func_uninit() +{ + if (g_ss_dms_func.handle != NULL) { + dms_close_dl(g_ss_dms_func.handle); + g_ss_dms_func.handle = NULL; + } +} + +static int dms_get_lib_version() +{ + return g_ss_dms_func.dms_get_version(); +} + +static int dms_get_my_version() +{ + return DMS_LOCAL_MAJOR_VERSION * DMS_LOCAL_MAJOR_VER_WEIGHT + DMS_LOCAL_MINOR_VERSION * DMS_LOCAL_MINOR_VER_WEIGHT + + DMS_LOCAL_VERSION; +} + +int dms_init(dms_profile_t *dms_profile) +{ + int my_version = dms_get_my_version(); + int lib_version = dms_get_lib_version(); + + if (my_version != lib_version) { + ereport(FATAL, (errmsg("dms library version is not matched, expected = %d, actual = %d", + my_version, lib_version))); + } + + int ret = g_ss_dms_func.dms_init(dms_profile); + if (ret != DMS_SUCCESS) { + return ret; + } + + return DMS_SUCCESS; +} + +void dms_uninit(void) +{ + g_ss_dms_func.dms_uninit(); + ss_dms_func_uninit(); +} + +void dms_get_error(int *errcode, const char **errmsg) +{ + g_ss_dms_func.dms_get_error(errcode, errmsg); +} + +int dms_request_page(dms_context_t *dms_ctx, dms_buf_ctrl_t *ctrl, dms_lock_mode_t mode) +{ + return g_ss_dms_func.dms_request_page(dms_ctx, ctrl, mode); +} + +int dms_broadcast_msg(dms_context_t *dms_ctx, char *data, unsigned int len, unsigned char handle_recv_msg, + unsigned int timeout) +{ + return g_ss_dms_func.dms_broadcast_msg(dms_ctx, data, len, handle_recv_msg, timeout); +} + +int dms_request_opengauss_update_xid(dms_context_t *dms_ctx, unsigned short t_infomask, unsigned short t_infomask2, + unsigned long long *uxid) +{ + return g_ss_dms_func.dms_request_opengauss_update_xid(dms_ctx, t_infomask, t_infomask2, uxid); +} +int dms_request_opengauss_xid_csn(dms_context_t *dms_ctx, dms_opengauss_xid_csn_t *dms_txn_info, + dms_opengauss_csn_result_t *xid_csn_result) +{ + return g_ss_dms_func.dms_request_opengauss_xid_csn(dms_ctx, dms_txn_info, xid_csn_result); +} +int dms_request_opengauss_txn_status(dms_context_t *dms_ctx, unsigned char request, unsigned char *result) +{ + return g_ss_dms_func.dms_request_opengauss_txn_status(dms_ctx, request, result); +} +int dms_request_opengauss_txn_snapshot(dms_context_t *dms_ctx, dms_opengauss_txn_snapshot_t *dms_txn_snapshot) +{ + return g_ss_dms_func.dms_request_opengauss_txn_snapshot(dms_ctx, dms_txn_snapshot); +} + +int dms_broadcast_opengauss_ddllock(dms_context_t *dms_ctx, char *data, unsigned int len, unsigned char handle_recv_msg, + unsigned int timeout, unsigned char resend_after_reform) +{ + return g_ss_dms_func.dms_broadcast_opengauss_ddllock(dms_ctx, data, len, handle_recv_msg, timeout, + resend_after_reform); +} + +int dms_register_thread_init(dms_thread_init_t thrd_init) +{ + return g_ss_dms_func.dms_register_thread_init(thrd_init); +} + +int dms_release_owner(dms_context_t *dms_ctx, dms_buf_ctrl_t *ctrl, unsigned char *released) +{ + return g_ss_dms_func.dms_release_owner(dms_ctx, ctrl, released); +} + +int dms_wait_reform(unsigned int *has_offline) +{ + return g_ss_dms_func.dms_wait_reform(has_offline); +} + +int dms_buf_res_rebuild_drc(dms_context_t *dms_ctx, dms_buf_ctrl_t *ctrl, unsigned long long lsn, + unsigned char is_dirty) +{ + return g_ss_dms_func.dms_buf_res_rebuild_drc(dms_ctx, ctrl, lsn, is_dirty); +} + +int dms_is_recovery_session(unsigned int sid) +{ + return g_ss_dms_func.dms_is_recovery_session(sid); +} + +int drc_get_page_master_id(char pageid[DMS_PAGEID_SIZE], unsigned char *master_id) +{ + return g_ss_dms_func.drc_get_page_master_id(pageid, master_id); +} + +int dms_release_page_batch(dms_context_t *dms_ctx, dcs_batch_buf_t *owner_map, unsigned int *owner_count) +{ + return g_ss_dms_func.dms_release_page_batch(dms_ctx, owner_map, owner_count); +} + +int dms_register_ssl_decrypt_pwd(dms_decrypt_pwd_t cb_func) +{ + return g_ss_dms_func.dms_register_ssl_decrypt_pwd(cb_func); +} + +int dms_set_ssl_param(const char *param_name, const char *param_value) +{ + return g_ss_dms_func.dms_set_ssl_param(param_name, param_value); +} + +int dms_get_ssl_param(const char *param_name, char *param_value, unsigned int size) +{ + return g_ss_dms_func.dms_get_ssl_param(param_name, param_value, size); +} + +int dms_recovery_page_need_skip(char pageid[DMS_PAGEID_SIZE], unsigned char *skip) +{ + return g_ss_dms_func.dms_recovery_page_need_skip(pageid, skip); +} + +int dms_reform_failed(void) +{ + return g_ss_dms_func.dms_reform_failed(); +} + +int dms_switchover(unsigned int sess_id) +{ + return g_ss_dms_func.dms_switchover(sess_id); +} + +int dms_drc_accessible(void) +{ + return g_ss_dms_func.dms_drc_accessible(); +} + +int dms_reform_last_failed(void) +{ + return g_ss_dms_func.dms_reform_last_failed(); +} \ No newline at end of file diff --git a/src/gausskernel/ddes/adapter/ss_dms_bufmgr.cpp b/src/gausskernel/ddes/adapter/ss_dms_bufmgr.cpp new file mode 100644 index 000000000..b49b05251 --- /dev/null +++ b/src/gausskernel/ddes/adapter/ss_dms_bufmgr.cpp @@ -0,0 +1,510 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * ss_dms_bufmgr.cpp + * Provide common interface for read page within DMS process. + * + * IDENTIFICATION + * src/gausskernel/ddes/adapter/ss_dms_bufmgr.cpp + * + * --------------------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "storage/proc.h" +#include "storage/buf/bufmgr.h" +#include "storage/smgr/segment.h" +#include "replication/shared_storage_walreceiver.h" +#include "ddes/dms/ss_dms_bufmgr.h" +#include "securec_check.h" +#include "miscadmin.h" + +void InitDmsBufCtrl(void) +{ + bool found_dms_buf = false; + t_thrd.storage_cxt.dmsBufCtl = (dms_buf_ctrl_t *)CACHELINEALIGN(ShmemInitStruct( + "dms buffer ctrl", TOTAL_BUFFER_NUM * sizeof(dms_buf_ctrl_t) + PG_CACHE_LINE_SIZE, &found_dms_buf)); + + if (!found_dms_buf) { + for (int i = 0; i < TOTAL_BUFFER_NUM; i++) { + dms_buf_ctrl_t *buf_ctrl = GetDmsBufCtrl(i); + buf_ctrl->buf_id = i; + buf_ctrl->state = 0; + buf_ctrl->is_remote_dirty = 0; + buf_ctrl->lock_mode = (uint8)DMS_LOCK_NULL; + buf_ctrl->is_edp = 0; + buf_ctrl->force_request = 0; + buf_ctrl->edp_scn = 0; + buf_ctrl->edp_map = 0; + buf_ctrl->pblk_relno = InvalidOid; + buf_ctrl->pblk_blkno = InvalidBlockNumber; + buf_ctrl->pblk_lsn = InvalidXLogRecPtr; + } + } +} + +void InitDmsContext(dms_context_t *dmsContext) +{ + /* Proc threads id range: [0, TotalProcs - 1]. Non-proc threads id range: [TotalProcs + 1, TotalProcs + 4] */ + uint32 TotalProcs = (uint32)(GLOBAL_ALL_PROCS); + dmsContext->inst_id = (unsigned int)SS_MY_INST_ID; + dmsContext->sess_id = (unsigned int)(t_thrd.proc ? t_thrd.proc->logictid : t_thrd.myLogicTid + TotalProcs); + dmsContext->db_handle = t_thrd.proc; + dmsContext->sess_rcy = (unsigned int)AmPageRedoProcess() || (unsigned int)AmStartupProcess() + || (unsigned int)AmDmsReformProcProcess(); + dmsContext->is_try = 0; +} + +void InitDmsBufContext(dms_context_t* dmsBufCxt, BufferTag buftag) +{ + InitDmsContext(dmsBufCxt); + dmsBufCxt->len = DMS_PAGEID_SIZE; + dmsBufCxt->type = (unsigned char)DRC_RES_PAGE_TYPE; + errno_t err = memcpy_s(dmsBufCxt->resid, DMS_PAGEID_SIZE, &buftag, sizeof(BufferTag)); + securec_check_c(err, "\0", "\0"); +} + +static void CalcSegDmsPhysicalLoc(BufferDesc* buf_desc, Buffer buffer) +{ + if (IsSegmentFileNode(buf_desc->tag.rnode)) { + SegmentCheck(!IsSegmentPhysicalRelNode(buf_desc->tag.rnode)); + SegPageLocation loc = seg_get_physical_location(buf_desc->tag.rnode, buf_desc->tag.forkNum, + buf_desc->tag.blockNum); + SegmentCheck(loc.blocknum != InvalidBlockNumber); + + ereport(DEBUG1, (errmsg("buffer:%d is segdata page, bufdesc seginfo is empty, calc segfileno:%d, segblkno:%u", + buffer, (int32)loc.extent_size, loc.blocknum))); + + buf_desc->seg_fileno = (uint8)EXTENT_SIZE_TO_TYPE((int)loc.extent_size); + buf_desc->seg_blockno = loc.blocknum; + } +} + +bool LockModeCompatible(dms_buf_ctrl_t *buf_ctrl, LWLockMode mode) +{ + bool compatible = false; + + if (mode == LW_SHARED) { + switch (buf_ctrl->lock_mode) { + case DMS_LOCK_SHARE: + case DMS_LOCK_EXCLUSIVE: + compatible = true; + break; + default: + break; + } + } else if (mode == LW_EXCLUSIVE) { + if (buf_ctrl->lock_mode == (uint8)DMS_LOCK_EXCLUSIVE) { + compatible = true; + } + } else { + AssertEreport(0, MOD_DMS, "lock mode value is wrong"); + } + + return compatible; +} + +void MarkReadPblk(int buf_id, const XLogPhyBlock *pblk) +{ + dms_buf_ctrl_t *buf_ctrl = GetDmsBufCtrl(buf_id); + if (pblk) { + buf_ctrl->pblk_relno = pblk->relNode; + buf_ctrl->pblk_blkno = pblk->block; + buf_ctrl->pblk_lsn = pblk->lsn; + } else { + buf_ctrl->pblk_relno = InvalidOid; + buf_ctrl->pblk_blkno = InvalidBlockNumber; + buf_ctrl->pblk_lsn = InvalidXLogRecPtr; + } +} + +void MarkReadHint(int buf_id, char persistence, bool extend, const XLogPhyBlock *pblk) +{ + dms_buf_ctrl_t *buf_ctrl = GetDmsBufCtrl(buf_id); + if (persistence == 'p') { + buf_ctrl->state |= BUF_IS_RELPERSISTENT; + } else if (persistence == 't') { + buf_ctrl->state |= BUF_IS_RELPERSISTENT_TEMP; + } + + if (extend) { + buf_ctrl->state |= BUF_IS_EXTEND; + } + + MarkReadPblk(buf_id, pblk); +} + +void ClearReadHint(int buf_id, bool buf_deleted) +{ + dms_buf_ctrl_t *buf_ctrl = GetDmsBufCtrl(buf_id); + buf_ctrl->state &= ~(BUF_NEED_LOAD | BUF_IS_LOADED | BUF_LOAD_FAILED | BUF_NEED_TRANSFER | BUF_IS_EXTEND); + if (buf_deleted) { + buf_ctrl->state = 0; + } +} + +/* + * true: the page is transferred successfully by dms, + * false: the page request is rejected or error, if hold the content_lock, + * should release the content_lock and io_in_process lock and retry. + */ +bool StartReadPage(BufferDesc *buf_desc, LWLockMode mode) +{ + dms_buf_ctrl_t *buf_ctrl = GetDmsBufCtrl(buf_desc->buf_id); + dms_lock_mode_t req_mode = (mode == LW_SHARED) ? DMS_LOCK_SHARE : DMS_LOCK_EXCLUSIVE; + + dms_context_t dms_ctx; + InitDmsBufContext(&dms_ctx, buf_desc->tag); + + int ret = dms_request_page(&dms_ctx, buf_ctrl, req_mode); + return (ret == DMS_SUCCESS); +} + +Buffer TerminateReadPage(BufferDesc* buf_desc, ReadBufferMode read_mode, const XLogPhyBlock *pblk) +{ + dms_buf_ctrl_t *buf_ctrl = GetDmsBufCtrl(buf_desc->buf_id); + Buffer buffer; + bool isExtend = (buf_ctrl->state & BUF_IS_EXTEND) ? true: false; + if (buf_ctrl->state & BUF_NEED_LOAD) { + buffer = ReadBuffer_common_for_dms(read_mode, buf_desc, pblk); + } else { + Block bufBlock = BufHdrGetBlock(buf_desc); + if (isExtend && !PageIsNew((Page)bufBlock)) { + ForkNumber forkNum = buf_desc->tag.forkNum; + SMgrRelation smgr = smgropen(buf_desc->tag.rnode, InvalidBackendId); + BlockNumber blockNum = buf_desc->tag.blockNum; + if (blockNum != smgrnblocks(smgr, forkNum)) { + ereport(PANIC, (errmsg("For DMS Master it can't extend a page already exits!"))); + } + ereport(DEBUG1, (errmodule(MOD_DMS), errmsg("The page is from standby but it is not empty!"))); + errno_t er = memset_s((char *)bufBlock, BLCKSZ, 0, BLCKSZ); + securec_check(er, "", ""); + } + + Page page = (Page)(bufBlock); + PageSetChecksumInplace(page, buf_desc->tag.blockNum); + +#ifdef USE_ASSERT_CHECKING + if (SS_NORMAL_PRIMARY && read_mode == RBM_NORMAL && !isExtend && + (!(pg_atomic_read_u32(&buf_desc->state) & BM_VALID))) { + Block aux_block = palloc(8192); + ReadBuffer_common_for_check(read_mode, buf_desc, pblk, aux_block); + XLogRecPtr disk_lsn = PageGetLSN(aux_block); + XLogRecPtr mem_lsn = PageGetLSN(page); + if (disk_lsn > mem_lsn) { + ereport(PANIC, (errmsg("[%d/%d/%d/%d %d-%d] memory lsn(0x%llx) is less than disk lsn(0x%llx).", + buf_desc->tag.rnode.spcNode, buf_desc->tag.rnode.dbNode, buf_desc->tag.rnode.relNode, + buf_desc->tag.rnode.bucketNode, buf_desc->tag.forkNum, buf_desc->tag.blockNum, + (unsigned long long)mem_lsn, (unsigned long long)disk_lsn))); + } + pfree(aux_block); + } +#endif + + TerminateBufferIO(buf_desc, false, BM_VALID); + buffer = BufferDescriptorGetBuffer(buf_desc); + if (!SSFAILOVER_TRIGGER && !RecoveryInProgress()) { + CalcSegDmsPhysicalLoc(buf_desc, buffer); + } + } + + if ((read_mode == RBM_ZERO_AND_LOCK || read_mode == RBM_ZERO_AND_CLEANUP_LOCK) && + !LWLockHeldByMe(buf_desc->content_lock)) { + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + } + + ClearReadHint(buf_desc->buf_id); + return buffer; +} + +static bool DmsStartBufferIO(BufferDesc *buf_desc, LWLockMode mode) +{ + uint32 buf_state; + dms_buf_ctrl_t *buf_ctrl = GetDmsBufCtrl(buf_desc->buf_id); + + if (IsSegmentBufferID(buf_desc->buf_id)) { + Assert(!HasInProgressBuf()); + } else { + Assert(!t_thrd.storage_cxt.InProgressBuf || t_thrd.storage_cxt.InProgressBuf == buf_desc); + } + + if (LWLockHeldByMe(buf_desc->io_in_progress_lock)) { + return false; + } + + if (LockModeCompatible(buf_ctrl, mode)) { + if (!(pg_atomic_read_u32(&buf_desc->state) & BM_IO_IN_PROGRESS)) { + return false; + } + } + + for (;;) { + (void)LWLockAcquire(buf_desc->io_in_progress_lock, LW_EXCLUSIVE); + + buf_state = LockBufHdr(buf_desc); + if (!(buf_state & BM_IO_IN_PROGRESS)) { + break; + } + + UnlockBufHdr(buf_desc, buf_state); + LWLockRelease(buf_desc->io_in_progress_lock); + WaitIO(buf_desc); + } + + if (LockModeCompatible(buf_ctrl, mode)) { + UnlockBufHdr(buf_desc, buf_state); + LWLockRelease(buf_desc->io_in_progress_lock); + return false; + } + + buf_state |= BM_IO_IN_PROGRESS; + UnlockBufHdr(buf_desc, buf_state); + + if (IsSegmentBufferID(buf_desc->buf_id)) { + SetInProgressFlags(buf_desc, true); + } else { + t_thrd.storage_cxt.InProgressBuf = buf_desc; + t_thrd.storage_cxt.IsForInput = true; + } + return true; +} + +Buffer TerminateReadSegPage(BufferDesc *buf_desc, ReadBufferMode read_mode, SegSpace *spc) +{ + dms_buf_ctrl_t *buf_ctrl = GetDmsBufCtrl(buf_desc->buf_id); + Buffer buffer; + if (buf_ctrl->state & BUF_NEED_LOAD) { + buffer = ReadSegBufferForDMS(buf_desc, read_mode, spc); + } else { + Page page = (Page)BufHdrGetBlock(buf_desc); + PageSetChecksumInplace(page, buf_desc->tag.blockNum); + +#ifdef USE_ASSERT_CHECKING + if (SS_NORMAL_PRIMARY && read_mode == RBM_NORMAL && + (!(pg_atomic_read_u32(&buf_desc->state) & BM_VALID))) { + Block aux_block = palloc(8192); + ReadSegBufferForCheck(buf_desc, read_mode, spc, aux_block); + XLogRecPtr disk_lsn = PageGetLSN(aux_block); + XLogRecPtr mem_lsn = PageGetLSN(page); + if (disk_lsn > mem_lsn) { + ereport(PANIC, (errmsg("[%d/%d/%d/%d %d-%d] memory lsn(0x%llu) is less than disk lsn(0x%llx).", + buf_desc->tag.rnode.spcNode, buf_desc->tag.rnode.dbNode, buf_desc->tag.rnode.relNode, + buf_desc->tag.rnode.bucketNode, buf_desc->tag.forkNum, buf_desc->tag.blockNum, + (unsigned long long)mem_lsn, (unsigned long long)disk_lsn))); + } + pfree(aux_block); + } +#endif + + SegTerminateBufferIO(buf_desc, false, BM_VALID); + buffer = BufferDescriptorGetBuffer(buf_desc); + } + + if ((read_mode == RBM_ZERO_AND_LOCK || read_mode == RBM_ZERO_AND_CLEANUP_LOCK) && + !LWLockHeldByMe(buf_desc->content_lock)) { + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + } + + ClearReadHint(buf_desc->buf_id); + return buffer; +} + +Buffer DmsReadSegPage(Buffer buffer, LWLockMode mode, ReadBufferMode read_mode) +{ + BufferDesc *buf_desc = GetBufferDescriptor(buffer - 1); + dms_buf_ctrl_t *buf_ctrl = GetDmsBufCtrl(buf_desc->buf_id); + + if (buf_ctrl->state & BUF_IS_RELPERSISTENT_TEMP) { + return buffer; + } + + if (!DmsStartBufferIO(buf_desc, mode)) { + return buffer; + } + + if (!StartReadPage(buf_desc, mode)) { + return 0; + } + return TerminateReadSegPage(buf_desc, read_mode); +} + +Buffer DmsReadPage(Buffer buffer, LWLockMode mode, ReadBufferMode read_mode) +{ + BufferDesc *buf_desc = GetBufferDescriptor(buffer - 1); + dms_buf_ctrl_t *buf_ctrl = GetDmsBufCtrl(buf_desc->buf_id); + + if (buf_ctrl->state & BUF_IS_RELPERSISTENT_TEMP) { + return buffer; + } + + XLogPhyBlock pblk = {0, 0, 0}; + if (OidIsValid(buf_ctrl->pblk_relno)) { + Assert(ExtentTypeIsValid(buf_ctrl->pblk_relno)); + Assert(buf_ctrl->pblk_blkno != InvalidBlockNumber); + pblk.relNode = buf_ctrl->pblk_relno; + pblk.block = buf_ctrl->pblk_blkno; + pblk.lsn = buf_ctrl->pblk_lsn; + } + + if (!DmsStartBufferIO(buf_desc, mode)) { + return buffer; + } + + if (!StartReadPage(buf_desc, mode)) { + return 0; + } + return TerminateReadPage(buf_desc, read_mode, OidIsValid(buf_ctrl->pblk_relno) ? &pblk : NULL); +} + +bool DmsReleaseOwner(BufferTag buf_tag, int buf_id) +{ + dms_buf_ctrl_t *buf_ctrl = GetDmsBufCtrl(buf_id); + if (buf_ctrl->state & BUF_IS_RELPERSISTENT_TEMP) { + return true; + } + unsigned char released = 0; + dms_context_t dms_ctx; + InitDmsBufContext(&dms_ctx, buf_tag); + + return ((dms_release_owner(&dms_ctx, buf_ctrl, &released) == DMS_SUCCESS) && (released != 0)); +} + +int32 CheckBuf4Rebuild(BufferDesc *buf_desc) +{ + dms_buf_ctrl_t *buf_ctrl = GetDmsBufCtrl(buf_desc->buf_id); + Assert(buf_ctrl != NULL); + Assert(buf_ctrl->is_edp != 1); + + if (buf_ctrl->lock_mode == (unsigned char)DMS_LOCK_NULL) { + if (g_instance.dms_cxt.SSRecoveryInfo.in_failover) { + InvalidateBuffer(buf_desc); + } + return DMS_SUCCESS; + } + + dms_context_t dms_ctx; + InitDmsBufContext(&dms_ctx, buf_desc->tag); + bool is_dirty = (buf_desc->state & (BM_DIRTY | BM_JUST_DIRTIED)) > 0 ? true : false; + int ret = dms_buf_res_rebuild_drc(&dms_ctx, buf_ctrl, (unsigned long long)BufferGetLSN(buf_desc), is_dirty); + if (ret != DMS_SUCCESS) { + ereport(DEBUG1, (errmsg("Failed to rebuild page, rel:%u/%u/%u/%d, forknum:%d, blocknum:%u.", + buf_desc->tag.rnode.spcNode, buf_desc->tag.rnode.dbNode, buf_desc->tag.rnode.relNode, + buf_desc->tag.rnode.bucketNode, buf_desc->tag.forkNum, buf_desc->tag.blockNum))); + return ret; + } + return DMS_SUCCESS; +} + +int SSLockAcquire(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock, bool dontWait, + dms_opengauss_lock_req_type_t reqType) +{ + dms_context_t dms_ctx; + InitDmsContext(&dms_ctx); + SSBroadcastDDLLock ssmsg; + ssmsg.type = BCAST_DDLLOCK; + errno_t rc = memcpy_s(&(ssmsg.locktag), sizeof(LOCKTAG), locktag, sizeof(LOCKTAG)); + securec_check(rc, "\0", "\0"); + ssmsg.lockmode = lockmode; + ssmsg.sessionlock = sessionLock; + ssmsg.dontWait = dontWait; + unsigned int count = SS_BROADCAST_FAILED_RETRYCOUNTS; + int ret = DMS_ERROR; + + int output_backup = t_thrd.postgres_cxt.whereToSendOutput; + t_thrd.postgres_cxt.whereToSendOutput = DestNone; + /* retry 3 times to get the lock (22seconds) */ + while (ret != DMS_SUCCESS && !dontWait && count) { + ret = dms_broadcast_opengauss_ddllock(&dms_ctx, (char *)&ssmsg, sizeof(SSBroadcastDDLLock), + (unsigned char)false, dontWait ? SS_BROADCAST_WAIT_FIVE_MICROSECONDS : SS_BROADCAST_WAIT_FIVE_SECONDS, + (unsigned char)reqType); + if (ret == DMS_SUCCESS) { + break; + } + pg_usleep(5000L); + count--; + } + + t_thrd.postgres_cxt.whereToSendOutput = output_backup; + return ret; +} + +int SSLockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock) +{ + dms_context_t dms_ctx; + InitDmsContext(&dms_ctx); + SSBroadcastDDLLock ssmsg; + ssmsg.type = BCAST_DDLLOCKRELEASE; + errno_t rc = memcpy_s(&(ssmsg.locktag), sizeof(LOCKTAG), locktag, sizeof(LOCKTAG)); + securec_check(rc, "\0", "\0"); + ssmsg.lockmode = lockmode; + ssmsg.sessionlock = sessionLock; + ssmsg.dontWait = false; + + int output_backup = t_thrd.postgres_cxt.whereToSendOutput; + t_thrd.postgres_cxt.whereToSendOutput = DestNone; + int ret = dms_broadcast_opengauss_ddllock(&dms_ctx, (char *)&ssmsg, sizeof(SSBroadcastDDLLock), + (unsigned char)false, SS_BROADCAST_WAIT_FIVE_SECONDS, (unsigned char)false); + if (ret != DMS_SUCCESS) { + ereport(WARNING, (errmsg("SS broadcast DDLLockRelease request failed!"))); + } + + t_thrd.postgres_cxt.whereToSendOutput = output_backup; + return ret; +} + +void SSLockReleaseAll() +{ + dms_context_t dms_ctx; + InitDmsContext(&dms_ctx); + SSBroadcastCmdOnly ssmsg; + ssmsg.type = BCAST_DDLLOCKRELEASE_ALL; + + int output_backup = t_thrd.postgres_cxt.whereToSendOutput; + t_thrd.postgres_cxt.whereToSendOutput = DestNone; + int ret = dms_broadcast_opengauss_ddllock(&dms_ctx, (char *)&ssmsg, sizeof(SSBroadcastCmdOnly), + (unsigned char)false, SS_BROADCAST_WAIT_FIVE_SECONDS, (unsigned char)LOCK_RELEASE_SELF); + if (ret != DMS_SUCCESS) { + ereport(DEBUG1, (errmsg("SS broadcast DDLLockReleaseAll request failed!"))); + } + + t_thrd.postgres_cxt.whereToSendOutput = output_backup; +} + +void SSLockAcquireAll() +{ + PROCLOCK *proclock = NULL; + HASH_SEQ_STATUS seqstat; + int i; + for (i = 0; i < NUM_LOCK_PARTITIONS; i++) { + (void)LWLockAcquire(GetMainLWLockByIndex(FirstLockMgrLock + i), LW_SHARED); + } + + hash_seq_init(&seqstat, t_thrd.storage_cxt.LockMethodProcLockHash); + while ((proclock = (PROCLOCK *)hash_seq_search(&seqstat))) { + if ((proclock->tag.myLock->tag.locktag_type < (uint8)LOCKTAG_PAGE || + proclock->tag.myLock->tag.locktag_type == (uint8)LOCKTAG_OBJECT) && + (proclock->holdMask & LOCKBIT_ON(AccessExclusiveLock))) { + LOCK *lock = proclock->tag.myLock; + int ret = SSLockAcquire(&(lock->tag), AccessExclusiveLock, false, false, LOCK_REACQUIRE); + if (ret) { + ereport(WARNING, (errmodule(MOD_DMS), errmsg("SS Broadcast LockAcquire when reform finished failed"))); + } + } + } + + for (i = NUM_LOCK_PARTITIONS; --i >= 0;) { + LWLockRelease(GetMainLWLockByIndex(FirstLockMgrLock + i)); + } +} diff --git a/src/gausskernel/ddes/adapter/ss_dms_callback.cpp b/src/gausskernel/ddes/adapter/ss_dms_callback.cpp new file mode 100644 index 000000000..44ce00361 --- /dev/null +++ b/src/gausskernel/ddes/adapter/ss_dms_callback.cpp @@ -0,0 +1,1494 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * ss_dms_callback.cpp + * Provide callback interface for called inside DMS API + * + * IDENTIFICATION + * src/gausskernel/ddes/adapter/ss_dms_callback.cpp + * + * --------------------------------------------------------------------------------------- + */ +#include "ddes/dms/ss_dms_callback.h" +#include "postgres.h" +#include "miscadmin.h" +#include "postmaster/postmaster.h" +#include "utils/memutils.h" +#include "utils/palloc.h" +#include "utils/resowner.h" +#include "utils/postinit.h" +#include "storage/procarray.h" +#include "access/xact.h" +#include "access/transam.h" +#include "access/csnlog.h" +#include "ddes/dms/ss_dms_bufmgr.h" +#include "storage/buf/buf_internals.h" +#include "ddes/dms/ss_transaction.h" +#include "storage/smgr/segment.h" +#include "storage/sinvaladt.h" +#include "replication/walsender_private.h" +#include "replication/walreceiver.h" +#include "ddes/dms/ss_switchover.h" +#include "ddes/dms/ss_dms_log_output.h" +#include "ddes/dms/ss_reform_common.h" +#include "ddes/dms/ss_dms_bufmgr.h" +#include "storage/file/fio_device.h" +#include "storage/buf/bufmgr.h" + +static void DMSWriteNormalLog(dms_log_id_t dms_log_id, dms_log_level_t dms_log_level, const char *code_file_name, + uint32 code_line_num, const char *module_name, const char *format, ...) +{ + int32 errcode; + uint32 log_level; + const char *last_file = NULL; + int32 ret = DMSLogLevelCheck(dms_log_id, dms_log_level, &log_level); + if (ret == -1) { + return; + } + +#ifdef WIN32 + last_file = strrchr(code_file_name, '\\'); +#else + last_file = strrchr(code_file_name, '/'); +#endif + if (last_file == NULL) { + last_file = code_file_name; + } else { + last_file++; + } + + va_list args; + va_start(args, format); + char buf[DMS_LOGGER_BUFFER_SIZE]; + errcode = vsnprintf_s(buf, DMS_LOGGER_BUFFER_SIZE, DMS_LOGGER_BUFFER_SIZE, format, args); + if (errcode < 0) { + va_end(args); + return; + } + va_end(args); + + uint32 saveInterruptHoldoffCount = t_thrd.int_cxt.InterruptHoldoffCount; + MemoryContext old_context = MemoryContextSwitchTo(ErrorContext); + PG_TRY(); + { + DMSLogOutput(log_level, last_file, code_line_num, buf); + } + PG_CATCH(); + { + t_thrd.int_cxt.InterruptHoldoffCount = saveInterruptHoldoffCount; + if (t_thrd.role == DMS_WORKER) { + FlushErrorState(); + } + } + PG_END_TRY(); + (void)MemoryContextSwitchTo(old_context); +} + +static int CBGetUpdateXid(void *db_handle, unsigned long long xid, unsigned int t_infomask, unsigned int t_infomask2, + unsigned long long *uxid) +{ + int result = DMS_SUCCESS; + uint32 saveInterruptHoldoffCount = t_thrd.int_cxt.InterruptHoldoffCount; + + PG_TRY(); + { + *uxid = + (unsigned long long)MultiXactIdGetUpdateXid((TransactionId)xid, (uint16)t_infomask, (uint16)t_infomask2); + } + PG_CATCH(); + { + result = DMS_ERROR; + t_thrd.int_cxt.InterruptHoldoffCount = saveInterruptHoldoffCount; + if (t_thrd.role == DMS_WORKER) { + FlushErrorState(); + } + } + PG_END_TRY(); + return result; +} + +static CommitSeqNo TransactionWaitCommittingCSN(dms_opengauss_xid_csn_t *xid_csn_ctx, bool *sync) +{ + bool looped = false; + bool isCommit = (bool)xid_csn_ctx->is_committed; + bool isMvcc = (bool)xid_csn_ctx->is_mvcc; + bool isNest = (bool)xid_csn_ctx->is_nest; + TransactionId xid = xid_csn_ctx->xid; + CommitSeqNo snapshotcsn = xid_csn_ctx->snapshotcsn; + TransactionId parentXid = InvalidTransactionId; + SnapshotData snapshot = {SNAPSHOT_MVCC}; + snapshot.xmin = xid_csn_ctx->snapshotxmin; + snapshot.snapshotcsn = snapshotcsn; + CommitSeqNo csn = TransactionIdGetCommitSeqNo(xid, isCommit, isMvcc, isNest, &snapshot); + + while (COMMITSEQNO_IS_COMMITTING(csn)) { + if (looped && isCommit) { + ereport(DEBUG1, + (errmodule(MOD_DMS), errmsg("committed SS xid %lu's csn %lu" + "is changed to FROZEN after lockwait.", xid, csn))); + CSNLogSetCommitSeqNo(xid, 0, NULL, COMMITSEQNO_FROZEN); + SetLatestFetchState(xid, COMMITSEQNO_FROZEN); + /* in this case, SS tuple is visible on standby, as we already compared and waited */ + return COMMITSEQNO_FROZEN; + } else if (looped && !isCommit) { + ereport(DEBUG1, (errmodule(MOD_DMS), + errmsg("SS XID %lu's csn %lu is changed to ABORT after lockwait.", xid, csn))); + /* recheck if transaction id is finished */ + RecheckXidFinish(xid, csn); + CSNLogSetCommitSeqNo(xid, 0, NULL, COMMITSEQNO_ABORTED); + SetLatestFetchState(xid, COMMITSEQNO_ABORTED); + /* in this case, SS tuple is not visible on standby */ + return COMMITSEQNO_ABORTED; + } else { + if (!COMMITSEQNO_IS_SUBTRANS(csn)) { + /* If snapshotcsn lower than csn stored in csn log, don't need to wait. */ + CommitSeqNo latestCSN = GET_COMMITSEQNO(csn); + if (latestCSN >= snapshotcsn) { + ereport(DEBUG1, + (errmodule(MOD_DMS), errmsg( + "snapshotcsn %lu < csn %lu stored in CSNLog, TXN invisible, no need to sync wait, XID %lu", + snapshotcsn, + latestCSN, + xid))); + /* in this case, SS tuple is not visible; to return ABORT is inappropriate, so let standby judge */ + return latestCSN; + } + } else { + parentXid = (TransactionId)GET_PARENTXID(csn); + } + + if (u_sess->attr.attr_common.xc_maintenance_mode || t_thrd.xact_cxt.bInAbortTransaction) { + return COMMITSEQNO_ABORTED; + } + + // standby does not need buf lock or validation + if (TransactionIdIsValid(parentXid)) { + SyncLocalXidWait(parentXid); + } else { + SyncLocalXidWait(xid); + } + + looped = true; + *sync = true; + parentXid = InvalidTransactionId; + csn = TransactionIdGetCommitSeqNo(xid, isCommit, isMvcc, isNest, &snapshot); + } + } + return csn; +} + +static int CBGetTxnCSN(void *db_handle, dms_opengauss_xid_csn_t *csn_req, dms_opengauss_csn_result_t *csn_res) +{ + int ret; + uint32 saveInterruptHoldoffCount = t_thrd.int_cxt.InterruptHoldoffCount; + PG_TRY(); + { + bool sync = false; + CLogXidStatus clogstatus = CLOG_XID_STATUS_IN_PROGRESS; + XLogRecPtr lsn = InvalidXLogRecPtr; + CommitSeqNo csn = TransactionWaitCommittingCSN(csn_req, &sync); + clogstatus = CLogGetStatus(csn_req->xid, &lsn); + csn_res->csn = csn; + csn_res->sync = (unsigned char)sync; + csn_res->clogstatus = (unsigned int)clogstatus; + csn_res->lsn = lsn; + ret = DMS_SUCCESS; + } + PG_CATCH(); + { + t_thrd.int_cxt.InterruptHoldoffCount = saveInterruptHoldoffCount; + if (t_thrd.role == DMS_WORKER) { + FlushErrorState(); + } + ret = DMS_ERROR; + } + PG_END_TRY(); + return ret; +} + +static int CBGetSnapshotData(void *db_handle, dms_opengauss_txn_snapshot_t *txn_snapshot) +{ + if (SS_IN_REFORM) { + return DMS_ERROR; + } + + if (RecoveryInProgress()) { + return DMS_ERROR; + } + + int retCode = DMS_ERROR; + SnapshotData snapshot = {SNAPSHOT_MVCC}; + uint32 saveInterruptHoldoffCount = t_thrd.int_cxt.InterruptHoldoffCount; + + PG_TRY(); + { + (void)GetSnapshotData(&snapshot, false); + if (snapshot.xmin != InvalidTransactionId) { + txn_snapshot->xmin = snapshot.xmin; + txn_snapshot->xmax = snapshot.xmax; + txn_snapshot->snapshotcsn = snapshot.snapshotcsn; + txn_snapshot->localxmin = u_sess->utils_cxt.RecentGlobalXmin; + retCode = DMS_SUCCESS; + } + } + PG_CATCH(); + { + t_thrd.int_cxt.InterruptHoldoffCount = saveInterruptHoldoffCount; + if (t_thrd.role == DMS_WORKER) { + FlushErrorState(); + } + } + PG_END_TRY(); + + return retCode; +} + +static int CBGetTxnStatus(void *db_handle, unsigned long long xid, unsigned char type, unsigned char *result) +{ + uint32 saveInterruptHoldoffCount = t_thrd.int_cxt.InterruptHoldoffCount; + PG_TRY(); + { + switch (type) { + case XID_INPROGRESS: + *result = (unsigned char)TransactionIdIsInProgress(xid); + break; + case XID_COMMITTED: + *result = (unsigned char)TransactionIdDidCommit(xid); + break; + default: + return DMS_ERROR; + } + } + PG_CATCH(); + { + t_thrd.int_cxt.InterruptHoldoffCount = saveInterruptHoldoffCount; + if (t_thrd.role == DMS_WORKER) { + FlushErrorState(); + } + } + PG_END_TRY(); + return DMS_SUCCESS; +} + +static int CBGetCurrModeAndLockBuffer(void *db_handle, int buffer, unsigned char lock_mode, + unsigned char *curr_mode) +{ + Assert((buffer - 1) >= 0); + BufferDesc *bufHdr = GetBufferDescriptor(buffer - 1); + *curr_mode = (unsigned char)GetHeldLWLockMode(bufHdr->content_lock); // LWLockMode + Assert(*curr_mode == LW_EXCLUSIVE || *curr_mode == LW_SHARED); + LockBuffer((Buffer)buffer, lock_mode); // BUFFER_LOCK_UNLOCK, BUFFER_LOCK_SHARE or BUFFER_LOCK_EXCLUSIVE + ereport(LOG, (errmodule(MOD_DMS), + errmsg("SS lock buf success, buffer=%d, mode=%hhu, curr_mode=%hhu", buffer, lock_mode, *curr_mode))); + return DMS_SUCCESS; +} + +static int CBSwitchoverDemote(void *db_handle) +{ + DemoteMode demote_mode = FastDemote; + + /* borrows walsender lock */ + SpinLockAcquire(&t_thrd.walsender_cxt.WalSndCtl->mutex); + if (t_thrd.walsender_cxt.WalSndCtl->demotion > NoDemote) { + SpinLockRelease(&t_thrd.walsender_cxt.WalSndCtl->mutex); + ereport(LOG, (errmodule(MOD_DMS), errmsg("[SS switchover] master is doing switchover," + " probably standby already requested switchover."))); + return DMS_SUCCESS; + } + Assert(g_instance.dms_cxt.SSClusterState == NODESTATE_NORMAL); + Assert(SS_MY_INST_IS_MASTER); + + t_thrd.walsender_cxt.WalSndCtl->demotion = demote_mode; + g_instance.dms_cxt.SSClusterState = NODESTATE_PRIMARY_DEMOTING; + g_instance.dms_cxt.SSRecoveryInfo.new_primary_reset_walbuf_flag = true; + SpinLockRelease(&t_thrd.walsender_cxt.WalSndCtl->mutex); + + ereport(LOG, + (errmodule(MOD_DMS), errmsg("[SS switchover] Recv %s demote request from DMS reformer.", + DemoteModeDesc(demote_mode)))); + + SendPostmasterSignal(PMSIGNAL_DEMOTE_PRIMARY); + + const int WAIT_DEMOTE = 6000; /* wait up to 10 min in case of too many dirty pages to be flushed */ + for (int ntries = 0;; ntries++) { + if (pmState == PM_RUN && g_instance.dms_cxt.SSClusterState == NODESTATE_PROMOTE_APPROVE) { + SpinLockAcquire(&t_thrd.walsender_cxt.WalSndCtl->mutex); + t_thrd.walsender_cxt.WalSndCtl->demotion = NoDemote; + SpinLockRelease(&t_thrd.walsender_cxt.WalSndCtl->mutex); + + if (dss_set_server_status_wrapper(false) != GS_SUCCESS) { + ereport(PANIC, + (errmodule(MOD_DMS), + errmsg("[SS switchover] set dssserver standby failed, vgname: \"%s\", socketpath: \"%s\"", + g_instance.attr.attr_storage.dss_attr.ss_dss_vg_name, + g_instance.attr.attr_storage.dss_attr.ss_dss_conn_path), + errhint("Check vgname and socketpath and restart later."))); + } + ereport(LOG, + (errmodule(MOD_DMS), errmsg("[SS switchover] Success in %s primary demote, running as standby," + " waiting for reformer setting new role.", DemoteModeDesc(demote_mode)))); + return DMS_SUCCESS; + } else { + if (ntries >= WAIT_DEMOTE) { + ereport(WARNING, + (errmodule(MOD_DMS), errmsg("[SS switchover] Failure in %s primary demote, need reform recovery.", + DemoteModeDesc(demote_mode)))); + return DMS_ERROR; + } + } + + CHECK_FOR_INTERRUPTS(); + pg_usleep(100000L); /* wait 0.1 sec, then retry */ + } + return DMS_ERROR; +} + +static int CBDbIsPrimary(void *db_handle) +{ + return g_instance.dms_cxt.SSReformerControl.primaryInstId == SS_MY_INST_ID ? 1 : 0; +} + +static int CBSwitchoverPromote(void *db_handle, unsigned char origPrimaryId) +{ + g_instance.dms_cxt.SSClusterState = NODESTATE_STANDBY_PROMOTING; + g_instance.dms_cxt.SSRecoveryInfo.new_primary_reset_walbuf_flag = true; + ereport(LOG, (errmodule(MOD_DMS), errmsg("[SS switchover] Starting to promote standby."))); + + /* since original primary must have demoted, it is safe to allow promting standby write */ + if (dss_set_server_status_wrapper(true) != GS_SUCCESS) { + ereport(PANIC, (errmodule(MOD_DMS), errmsg("Could not set dssserver flag, vgname: \"%s\", socketpath: \"%s\"", + g_instance.attr.attr_storage.dss_attr.ss_dss_vg_name, + g_instance.attr.attr_storage.dss_attr.ss_dss_conn_path), + errhint("Check vgname and socketpath and restart later."))); + } + + SSNotifySwitchoverPromote(); + + const int WAIT_PROMOTE = 1200; /* wait 120 sec */ + for (int ntries = 0;; ntries++) { + if (pmState == PM_RUN && g_instance.dms_cxt.SSClusterState == NODESTATE_STANDBY_PROMOTED) { + /* flush control file primary id in advance to save new standby's waiting time */ + SSSavePrimaryInstId(SS_MY_INST_ID); + + SSReadControlFile(REFORM_CTRL_PAGE); + Assert(SSGetPrimaryInstId() == SS_MY_INST_ID); + ereport(LOG, (errmodule(MOD_DMS), + errmsg("[SS switchover] Standby promote: success, set new primary:%d.", SS_MY_INST_ID))); + return DMS_SUCCESS; + } else { + if (ntries >= WAIT_PROMOTE) { + ereport(WARNING, (errmodule(MOD_DMS), + errmsg("[SS switchover] Standby promote timeout, please try again later."))); + } + } + + CHECK_FOR_INTERRUPTS(); + pg_usleep(100000L); /* wait 0.1 sec, then retry */ + } + return DMS_ERROR; +} + +/* only sets switchover errno, everything else set in setPrimaryId */ +static void CBSwitchoverResult(void *db_handle, int result) +{ + if (result == DMS_SUCCESS) { + ereport(LOG, (errmodule(MOD_DMS), + errmsg("[SS switchover] Switchover success, letting reformer update roles."))); + return; + } + ereport(WARNING, (errmodule(MOD_DMS), errmsg("[SS switchover] Switchover failed, errno: %d.", result))); +} + +static int SetPrimaryIdOnStandby(int primary_id) +{ + g_instance.dms_cxt.SSReformerControl.primaryInstId = primary_id; + + for (int ntries = 0;; ntries++) { + SSReadControlFile(REFORM_CTRL_PAGE); /* need to double check */ + if (g_instance.dms_cxt.SSReformerControl.primaryInstId == primary_id) { + ereport(LOG, (errmodule(MOD_DMS), + errmsg("[SS %s] Reform success, this is a standby:%d confirming new primary:%d.", + SS_PERFORMING_SWITCHOVER ? "switchover" : "reform", SS_MY_INST_ID, primary_id))); + return DMS_SUCCESS; + } else { + if (ntries >= WAIT_REFORM_CTRL_REFRESH_TRIES) { + ereport(ERROR, + (errmodule(MOD_DMS), errmsg("[SS %s] Failed to confirm new primary: %d," + " control file indicates primary is %d; wait timeout.", + SS_PERFORMING_SWITCHOVER ? "switchover" : "reform", (int)primary_id, + g_instance.dms_cxt.SSReformerControl.primaryInstId))); + return DMS_ERROR; + } + } + + CHECK_FOR_INTERRUPTS(); + pg_usleep(REFORM_WAIT_TIME); /* wait 0.01 sec, then retry */ + } + + return DMS_ERROR; +} + +/* called on both new primary and all standby nodes to refresh status */ +static int CBSaveStableList(void *db_handle, unsigned long long list_stable, unsigned char reformer_id, + unsigned int save_ctrl) +{ + int primary_id = (int)reformer_id; + g_instance.dms_cxt.SSReformerControl.primaryInstId = primary_id; + g_instance.dms_cxt.SSReformerControl.list_stable = list_stable; + int ret = DMS_ERROR; + SSLockReleaseAll(); + if ((int)primary_id == SS_MY_INST_ID) { + if (g_instance.dms_cxt.SSClusterState > NODESTATE_NORMAL) { + Assert(g_instance.dms_cxt.SSClusterState == NODESTATE_STANDBY_PROMOTED || + g_instance.dms_cxt.SSClusterState == NODESTATE_STANDBY_FAILOVER_PROMOTING); + } + SSSaveReformerCtrl(); + Assert(g_instance.dms_cxt.SSReformerControl.primaryInstId == (int)primary_id); + ereport(LOG, (errmodule(MOD_DMS), errmsg("[SS %s] set current instance:%d as primary.", + SS_PERFORMING_SWITCHOVER ? "switchover" : "reform", primary_id))); + if (g_instance.dms_cxt.SSClusterState == NODESTATE_NORMAL) { + /* only send to standby recoveried or new joined */ + SSLockAcquireAll(); + } + ret = DMS_SUCCESS; + } else { /* we are on standby */ + ret = SetPrimaryIdOnStandby(primary_id); + } + + /* SSClusterState and in_reform must be set atomically */ + g_instance.dms_cxt.SSClusterState = NODESTATE_NORMAL; + g_instance.dms_cxt.SSReformInfo.in_reform = false; + g_instance.dms_cxt.SSRecoveryInfo.startup_reform = false; + ereport(LOG, + (errmodule(MOD_DMS), + errmsg("[SS reform/SS switchover/SS failover] Reform success, instance:%d is running.", + g_instance.attr.attr_storage.dms_attr.instance_id))); + return ret; +} + +/* currently not used in switchover, everything set in setPrimaryId */ +static void CBSetDbStandby(void *db_handle) +{ + /* nothing to do now, but need to implements callback interface */ +} + +static void ReleaseResource() +{ + LWLockReleaseAll(); + AbortBufferIO(); + UnlockBuffers(); + /* buffer pins are released here: */ + ResourceOwnerRelease(t_thrd.utils_cxt.CurrentResourceOwner, RESOURCE_RELEASE_BEFORE_LOCKS, false, true); + FlushErrorState(); +} + +static unsigned int CBPageHashCode(const char pageid[DMS_PAGEID_SIZE]) +{ + BufferTag *tag = (BufferTag *)pageid; + return BufTableHashCode(tag); +} + +static unsigned long long CBGetPageLSN(const dms_buf_ctrl_t *buf_ctrl) +{ + Assert(buf_ctrl->buf_id < TOTAL_BUFFER_NUM); + if (buf_ctrl->buf_id >= TOTAL_BUFFER_NUM) { + return 0; + } + BufferDesc* buf_desc = GetBufferDescriptor(buf_ctrl->buf_id); + XLogRecPtr lsn = BufferGetLSN(buf_desc); + return lsn; +} + +static unsigned long long CBGetGlobalLSN(void *db_handle) +{ + return GetInsertRecPtr(); +} + +static void DmsReleaseBuffer(int buffer, bool is_seg) +{ + if (is_seg) { + SegReleaseBuffer(buffer); + } else { + ReleaseBuffer(buffer); + } +} + +static int tryEnterLocalPage(BufferTag *tag, dms_lock_mode_t mode, dms_buf_ctrl_t **buf_ctrl) +{ + bool is_seg; + int buf_id = -1; + uint32 hash; + LWLock *partition_lock = NULL; + BufferDesc *buf_desc = NULL; + RelFileNode relfilenode = tag->rnode; + +#ifdef USE_ASSERT_CHECKING + if (IsSegmentPhysicalRelNode(relfilenode)) { + SegSpace *spc = spc_open(relfilenode.spcNode, relfilenode.dbNode, false, false); + BlockNumber spc_nblocks = spc_size(spc, relfilenode.relNode, tag->forkNum); + if (tag->blockNum >= spc_nblocks) { + ereport(PANIC, (errmodule(MOD_DMS), + errmsg("unexpected blocknum %u >= spc nblocks %u", tag->blockNum, spc_nblocks))); + } + } +#endif + + *buf_ctrl = NULL; + hash = BufTableHashCode(tag); + partition_lock = BufMappingPartitionLock(hash); + + int buffer; + uint32 saveInterruptHoldoffCount = t_thrd.int_cxt.InterruptHoldoffCount; + PG_TRY(); + { + do { + (void)LWLockAcquire(partition_lock, LW_SHARED); + buf_id = BufTableLookup(tag, hash); + if (buf_id < 0) { + LWLockRelease(partition_lock); + buffer = 0; + break; + } + + buf_desc = GetBufferDescriptor(buf_id); + if (IsSegmentBufferID(buf_id)) { + (void)SegPinBuffer(buf_desc); + is_seg = true; + } else { + ResourceOwnerEnlargeBuffers(t_thrd.utils_cxt.CurrentResourceOwner); + (void)PinBuffer(buf_desc, NULL); + is_seg = false; + } + LWLockRelease(partition_lock); + + WaitIO(buf_desc); + + if (!(pg_atomic_read_u32(&buf_desc->state) & BM_VALID)) { + ereport(WARNING, (errmodule(MOD_DMS), + errmsg("[%d/%d/%d/%d %d-%d] try enter page failed, buffer is not valid, state = 0x%x", + tag->rnode.spcNode, tag->rnode.dbNode, tag->rnode.relNode, tag->rnode.bucketNode, + tag->forkNum, tag->blockNum, buf_desc->state))); + DmsReleaseBuffer(buf_desc->buf_id + 1, is_seg); + buffer = 0; + break; + } + + if (pg_atomic_read_u32(&buf_desc->state) & BM_IO_ERROR) { + ereport(WARNING, (errmodule(MOD_DMS), + errmsg("[%d/%d/%d/%d %d-%d] try enter page failed, buffer is io error, state = 0x%x", + tag->rnode.spcNode, tag->rnode.dbNode, tag->rnode.relNode, tag->rnode.bucketNode, + tag->forkNum, tag->blockNum, buf_desc->state))); + DmsReleaseBuffer(buf_desc->buf_id + 1, is_seg); + buffer = 0; + break; + } + + LWLockMode content_mode = (mode == DMS_LOCK_SHARE) ? LW_SHARED : LW_EXCLUSIVE; + (void)LWLockAcquire(buf_desc->content_lock, content_mode); + *buf_ctrl = GetDmsBufCtrl(buf_id); + (*buf_ctrl)->lsn_on_disk = BufferGetLSN(buf_desc); + (*buf_ctrl)->seg_fileno = buf_desc->seg_fileno; + (*buf_ctrl)->seg_blockno = buf_desc->seg_blockno; + Assert(buf_id >= 0); + buffer = buf_id + 1; + } while (0); + } + PG_CATCH(); + { + t_thrd.int_cxt.InterruptHoldoffCount = saveInterruptHoldoffCount; + ReleaseResource(); + } + PG_END_TRY(); + + return buffer; +} + +static int CBTryEnterLocalPage(void *db_handle, char pageid[DMS_PAGEID_SIZE], dms_lock_mode_t mode, + dms_buf_ctrl_t **buf_ctrl) +{ + BufferTag *tag = (BufferTag *)pageid; + Buffer buffer = tryEnterLocalPage(tag, mode, buf_ctrl); + if (buffer <= 0) { + if (*buf_ctrl != NULL) { + ereport(PANIC, (errmsg("CBTryEnterLocalPage failed"))); + } + } else { + if (*buf_ctrl == NULL) { + ereport(PANIC, (errmsg("CBTryEnterLocalPage failed"))); + } + } + + return DMS_SUCCESS; +} + +static int CBEnterLocalPage(void *db_handle, char pageid[DMS_PAGEID_SIZE], dms_lock_mode_t mode, + dms_buf_ctrl_t **buf_ctrl) +{ + BufferTag *tag = (BufferTag *)pageid; + Buffer buffer = tryEnterLocalPage(tag, mode, buf_ctrl); + return (buffer > 0) ? DMS_SUCCESS : DMS_ERROR; +} + +static unsigned char CBPageDirty(dms_buf_ctrl_t *buf_ctrl) +{ + Assert(buf_ctrl->buf_id < TOTAL_BUFFER_NUM); + if (buf_ctrl->buf_id >= TOTAL_BUFFER_NUM) { + return 0; + } + BufferDesc *buf_desc = GetBufferDescriptor(buf_ctrl->buf_id); + return pg_atomic_read_u32(&buf_desc->state) & (BM_DIRTY | BM_JUST_DIRTIED); +} + +static void CBLeaveLocalPage(void *db_handle, dms_buf_ctrl_t *buf_ctrl) +{ + Assert(buf_ctrl->buf_id < TOTAL_BUFFER_NUM); + if (buf_ctrl->buf_id >= TOTAL_BUFFER_NUM) { + return; + } + + if (IsSegmentBufferID(buf_ctrl->buf_id)) { + SegUnlockReleaseBuffer(buf_ctrl->buf_id + 1); + } else { + UnlockReleaseBuffer(buf_ctrl->buf_id + 1); + } +} + +static char* CBGetPage(dms_buf_ctrl_t *buf_ctrl) +{ + Assert(buf_ctrl->buf_id < TOTAL_BUFFER_NUM); + if (buf_ctrl->buf_id >= TOTAL_BUFFER_NUM) { + return NULL; + } + BufferDesc *buf_desc = GetBufferDescriptor(buf_ctrl->buf_id); + return (char *)BufHdrGetBlock(buf_desc); +} + +static void CBInvalidatePage(void *db_handle, char pageid[DMS_PAGEID_SIZE]) +{ + bool valid = false; + int buf_id; + BufferTag* tag = (BufferTag *)pageid; + uint32 hash; + LWLock *partition_lock = NULL; + BufferDesc *buf_desc = NULL; + dms_buf_ctrl_t *buf_ctrl = NULL; + + hash = BufTableHashCode(tag); + partition_lock = BufMappingPartitionLock(hash); + (void)LWLockAcquire(partition_lock, LW_SHARED); + buf_id = BufTableLookup(tag, hash); + if (buf_id < 0) { + /* not found in shared buffer */ + LWLockRelease(partition_lock); + return; + } + + uint32 saveInterruptHoldoffCount = t_thrd.int_cxt.InterruptHoldoffCount; + PG_TRY(); + { + buf_desc = GetBufferDescriptor(buf_id); + if (IsSegmentBufferID(buf_id)) { + valid = SegPinBuffer(buf_desc); + } else { + ResourceOwnerEnlargeBuffers(t_thrd.utils_cxt.CurrentResourceOwner); + valid = PinBuffer(buf_desc, NULL); + } + LWLockRelease(partition_lock); + + if (valid) { + (void)LWLockAcquire(buf_desc->content_lock, LW_EXCLUSIVE); + buf_ctrl = GetDmsBufCtrl(buf_id); + buf_ctrl->lock_mode = (unsigned char)DMS_LOCK_NULL; + LWLockRelease(buf_desc->content_lock); + } + if (IsSegmentBufferID(buf_id)) { + SegReleaseBuffer(buf_id + 1); + } else { + ReleaseBuffer(buf_id + 1); + } + } + PG_CATCH(); + { + t_thrd.int_cxt.InterruptHoldoffCount = saveInterruptHoldoffCount; + ReleaseResource(); + } + PG_END_TRY(); +} + +static void CBXLogFlush(void *db_handle, unsigned long long *lsn) +{ + (void)LWLockAcquire(WALWriteLock, LW_EXCLUSIVE); + (void)XLogBackgroundFlush(); + *lsn = GetFlushRecPtr(); + LWLockRelease(WALWriteLock); +} + +static char *CBDisplayBufferTag(char *displayBuf, unsigned int count, char *pageid) +{ + BufferTag pagetag = *(BufferTag *)pageid; + int ret = sprintf_s(displayBuf, count, "spc/db/rel/bucket fork-block: %u/%u/%u/%d %d-%u", + pagetag.rnode.spcNode, pagetag.rnode.dbNode, pagetag.rnode.relNode, pagetag.rnode.bucketNode, + pagetag.forkNum, pagetag.blockNum); + securec_check_ss(ret, "", ""); + return displayBuf; +} + +static int CBRemoveBufLoadStatus(dms_buf_ctrl_t *buf_ctrl, dms_buf_load_status_t dms_buf_load_status) +{ + switch (dms_buf_load_status) { + case DMS_BUF_NEED_LOAD: + buf_ctrl->state &= ~BUF_NEED_LOAD; + break; + case DMS_BUF_IS_LOADED: + buf_ctrl->state &= ~BUF_IS_LOADED; + break; + case DMS_BUF_LOAD_FAILED: + buf_ctrl->state &= ~BUF_LOAD_FAILED; + break; + case DMS_BUF_NEED_TRANSFER: + buf_ctrl->state &= ~BUF_NEED_TRANSFER; + break; + default: + Assert(0); + } + return DMS_SUCCESS; +} + +static int CBSetBufLoadStatus(dms_buf_ctrl_t *buf_ctrl, dms_buf_load_status_t dms_buf_load_status) +{ + switch (dms_buf_load_status) { + case DMS_BUF_NEED_LOAD: + buf_ctrl->state |= BUF_NEED_LOAD; + break; + case DMS_BUF_IS_LOADED: + buf_ctrl->state |= BUF_IS_LOADED; + break; + case DMS_BUF_LOAD_FAILED: + buf_ctrl->state |= BUF_LOAD_FAILED; + break; + case DMS_BUF_NEED_TRANSFER: + buf_ctrl->state |= BUF_NEED_TRANSFER; + break; + default: + Assert(0); + } + return DMS_SUCCESS; +} + +static void *CBGetHandle(unsigned int *db_handle_index) +{ + void *db_handle = g_instance.proc_base->allProcs[g_instance.dms_cxt.dmsProcSid]; + *db_handle_index = pg_atomic_fetch_add_u32(&g_instance.dms_cxt.dmsProcSid, 1); + return db_handle; +} + +static char *CBMemAlloc(void *context, unsigned int size) +{ + char *ptr = NULL; + MemoryContext old_cxt = MemoryContextSwitchTo(t_thrd.dms_cxt.msgContext); + uint32 saveInterruptHoldoffCount = t_thrd.int_cxt.InterruptHoldoffCount; + + PG_TRY(); + { + ptr = (char *)palloc(size); + } + PG_CATCH(); + { + t_thrd.int_cxt.InterruptHoldoffCount = saveInterruptHoldoffCount; + if (t_thrd.role == DMS_WORKER) { + FlushErrorState(); + } + } + PG_END_TRY(); + + (void)MemoryContextSwitchTo(old_cxt); + return ptr; +} + +static void CBMemFree(void *context, void *pointer) +{ + uint32 saveInterruptHoldoffCount = t_thrd.int_cxt.InterruptHoldoffCount; + + PG_TRY(); + { + pfree(pointer); + } + PG_CATCH(); + { + t_thrd.int_cxt.InterruptHoldoffCount = saveInterruptHoldoffCount; + if (t_thrd.role == DMS_WORKER) { + FlushErrorState(); + } + } + PG_END_TRY(); +} + +static void CBMemReset(void *context) +{ + uint32 saveInterruptHoldoffCount = t_thrd.int_cxt.InterruptHoldoffCount; + PG_TRY(); + { + MemoryContextReset(t_thrd.dms_cxt.msgContext); + } + PG_CATCH(); + { + t_thrd.int_cxt.InterruptHoldoffCount = saveInterruptHoldoffCount; + if (t_thrd.role == DMS_WORKER) { + FlushErrorState(); + } + } + PG_END_TRY(); +} + +static int32 CBProcessLockAcquire(char *data, uint32 len) +{ + if (unlikely(len != sizeof(SSBroadcastDDLLock))) { + ereport(DEBUG1, (errmsg("invalid broadcast ddl lock message"))); + return DMS_ERROR; + } + + SSBroadcastDDLLock *ssmsg = (SSBroadcastDDLLock *)data; + LockAcquireResult res; + uint32 saveInterruptHoldoffCount = t_thrd.int_cxt.InterruptHoldoffCount; + PG_TRY(); + { + res = LockAcquire(&(ssmsg->locktag), ssmsg->lockmode, false, ssmsg->dontWait); + } + PG_CATCH(); + { + t_thrd.int_cxt.InterruptHoldoffCount = saveInterruptHoldoffCount; + res = LOCKACQUIRE_NOT_AVAIL; + ereport(WARNING, (errmsg("SS Standby process DDLLockAccquire got in PG_CATCH"))); + if (t_thrd.role == DMS_WORKER) { + FlushErrorState(); + } + } + PG_END_TRY(); + + if (!(ssmsg->dontWait) && res == LOCKACQUIRE_NOT_AVAIL) { + ereport(WARNING, (errmsg("SS process DDLLockAccquire request failed!"))); + return DMS_ERROR; + } + return DMS_SUCCESS; +} + +static int32 CBProcessLockRelease(char *data, uint32 len) +{ + if (unlikely(len != sizeof(SSBroadcastDDLLock))) { + ereport(DEBUG1, (errmsg("invalid lock release message"))); + return DMS_ERROR; + } + + SSBroadcastDDLLock *ssmsg = (SSBroadcastDDLLock *)data; + int res = DMS_SUCCESS; + uint32 saveInterruptHoldoffCount = t_thrd.int_cxt.InterruptHoldoffCount; + PG_TRY(); + { + (void)LockRelease(&(ssmsg->locktag), ssmsg->lockmode, ssmsg->sessionlock); + } + PG_CATCH(); + { + t_thrd.int_cxt.InterruptHoldoffCount = saveInterruptHoldoffCount; + res = DMS_ERROR; + ereport(WARNING, (errmsg("SS process DDLLockRelease request failed!"))); + if (t_thrd.role == DMS_WORKER) { + FlushErrorState(); + } + } + PG_END_TRY(); + + return res; +} + +static int32 CBProcessReleaseAllLock(uint32 len) +{ + if (unlikely(len != sizeof(SSBroadcastCmdOnly))) { + return DMS_ERROR; + } + + uint32 saveInterruptHoldoffCount = t_thrd.int_cxt.InterruptHoldoffCount; + int res = DMS_SUCCESS; + PG_TRY(); + { + LockErrorCleanup(); + LockReleaseAll(DEFAULT_LOCKMETHOD, true); + LockReleaseAll(USER_LOCKMETHOD, true); + } + PG_CATCH(); + { + t_thrd.int_cxt.InterruptHoldoffCount = saveInterruptHoldoffCount; + res = DMS_ERROR; + ereport(WARNING, (errmsg("SS process DDLLockReleaseAll request failed!"))); + if (t_thrd.role == DMS_WORKER) { + FlushErrorState(); + } + } + PG_END_TRY(); + + return res; +} + +static int32 CBProcessBroadcast(void *db_handle, char *data, unsigned int len, char *output_msg, + uint32 *output_msg_len) +{ + int32 ret = DMS_SUCCESS; + SSBroadcastOp bcast_op = *(SSBroadcastOp *)data; + + *output_msg_len = 0; + uint32 saveInterruptHoldoffCount = t_thrd.int_cxt.InterruptHoldoffCount; + + PG_TRY(); + { + switch (bcast_op) { + case BCAST_GET_XMIN: + ret = SSGetOldestXmin(data, len, output_msg, output_msg_len); + break; + case BCAST_SI: + ret = SSProcessSharedInvalMsg(data, len); + break; + case BCAST_SEGDROPTL: + ret = SSProcessSegDropTimeline(data, len); + break; + case BCAST_DROP_REL_ALL_BUFFER: + ret = SSProcessDropRelAllBuffer(data, len); + break; + case BCAST_DROP_REL_RANGE_BUFFER: + ret = SSProcessDropRelRangeBuffer(data, len); + break; + case BCAST_DROP_DB_ALL_BUFFER: + ret = SSProcessDropDBAllBuffer(data, len); + break; + case BCAST_DROP_SEG_SPACE: + ret = SSProcessDropSegSpace(data, len); + break; + case BCAST_DDLLOCK: + ret = CBProcessLockAcquire(data, len); + break; + case BCAST_DDLLOCKRELEASE: + ret = CBProcessLockRelease(data, len); + break; + case BCAST_DDLLOCKRELEASE_ALL: + ret = CBProcessReleaseAllLock(len); + break; + case BCAST_CHECK_DB_BACKENDS: + ret = SSCheckDbBackends(data, len, output_msg, output_msg_len); + break; + default: + ereport(WARNING, (errmodule(MOD_DMS), errmsg("invalid broadcast operate type"))); + ret = DMS_ERROR; + break; + } + } + PG_CATCH(); + { + t_thrd.int_cxt.InterruptHoldoffCount = saveInterruptHoldoffCount; + if (t_thrd.role == DMS_WORKER) { + FlushErrorState(); + } + } + PG_END_TRY(); + + return ret; +} + +static int32 CBProcessBroadcastAck(void *db_handle, char *data, unsigned int len) +{ + int32 ret = DMS_SUCCESS; + SSBroadcastOpAck bcast_op = *(SSBroadcastOpAck *)data; + + switch (bcast_op) { + case BCAST_GET_XMIN_ACK: + ret = SSGetOldestXminAck((SSBroadcastXminAck *)data); + break; + case BCAST_CHECK_DB_BACKENDS_ACK: + ret = SSCheckDbBackendsAck(data, len); + break; + default: + ereport(WARNING, (errmodule(MOD_DMS), errmsg("invalid broadcast ack type"))); + ret = DMS_ERROR; + } + return ret; +} + +static int CBGetDmsStatus(void *db_handle) +{ + return (int)g_instance.dms_cxt.dms_status; +} + +static void CBSetDmsStatus(void *db_handle, int dms_status) +{ + g_instance.dms_cxt.dms_status = (dms_status_t)dms_status; +} + +static int32 CBDrcBufRebuild(void *db_handle) +{ + uint32 buf_state; + for (int i = 0; i < TOTAL_BUFFER_NUM; i++) { + BufferDesc *buf_desc = GetBufferDescriptor(i); + buf_state = LockBufHdr(buf_desc); + if ((buf_state & BM_VALID) || (buf_state & BM_TAG_VALID)) { + int ret = CheckBuf4Rebuild(buf_desc); + if (ret != DMS_SUCCESS) { + UnlockBufHdr(buf_desc, buf_state); + return ret; + } + } + UnlockBufHdr(buf_desc, buf_state); + } + return GS_SUCCESS; +} + +// used for find bufferdesc in dms +static void SSGetBufferDesc(char *pageid, bool *is_valid, BufferDesc** ret_buf_desc) +{ + bool valid; + int buf_id; + uint32 hash; + LWLock *partition_lock = NULL; + BufferTag *tag = (BufferTag *)pageid; + BufferDesc *buf_desc; + + RelFileNode relfilenode = tag->rnode; + +#ifdef USE_ASSERT_CHECKING + if (IsSegmentPhysicalRelNode(relfilenode)) { + SegSpace *spc = spc_open(relfilenode.spcNode, relfilenode.dbNode, false, false); + BlockNumber spc_nblocks = spc_size(spc, relfilenode.relNode, tag->forkNum); + if (tag->blockNum >= spc_nblocks) { + ereport(PANIC, (errmodule(MOD_DMS), + errmsg("unexpected blocknum %u >= spc nblocks %u", tag->blockNum, spc_nblocks))); + } + } +#endif + + hash = BufTableHashCode(tag); + partition_lock = BufMappingPartitionLock(hash); + + uint32 saveInterruptHoldoffCount = t_thrd.int_cxt.InterruptHoldoffCount; + PG_TRY(); + { + (void)LWLockAcquire(partition_lock, LW_SHARED); + buf_id = BufTableLookup(tag, hash); + Assert(buf_id >= 0); + if (buf_id >= 0) { + buf_desc = GetBufferDescriptor(buf_id); + ResourceOwnerEnlargeBuffers(t_thrd.utils_cxt.CurrentResourceOwner); + if (IsSegmentBufferID(buf_id)) { + valid = SegPinBuffer(buf_desc); + } else { + valid = PinBuffer(buf_desc, NULL); + } + LWLockRelease(partition_lock); + *is_valid = valid; + *ret_buf_desc = buf_desc; + } else { + *ret_buf_desc = NULL; + } + } + PG_CATCH(); + { + t_thrd.int_cxt.InterruptHoldoffCount = saveInterruptHoldoffCount; + ReleaseResource(); + } + PG_END_TRY(); +} + +void SSUnPinBuffer(BufferDesc* buf_desc) +{ + if (IsSegmentBufferID(buf_desc->buf_id)) { + SegUnpinBuffer(buf_desc); + } else { + UnpinBuffer(buf_desc, true); + } +} + +static int CBConfirmOwner(void *db_handle, char *pageid, unsigned char *lock_mode, unsigned char *is_edp, + unsigned long long *edp_lsn) +{ + BufferDesc *buf_desc = NULL; + bool valid; + dms_buf_ctrl_t *buf_ctrl = NULL; + + SSGetBufferDesc(pageid, &valid, &buf_desc); + if (buf_desc == NULL) { + return DMS_ERROR; + } + + if (!valid) { + *lock_mode = (uint8)DMS_LOCK_NULL; + *is_edp = (unsigned char)false; + SSUnPinBuffer(buf_desc); + return GS_SUCCESS; + } + + /* + * not acquire buf_desc->content_lock + * consistency guaranteed by reform phase + */ + buf_ctrl = GetDmsBufCtrl(buf_desc->buf_id); + *lock_mode = buf_ctrl->lock_mode; + // opengauss currently no edp + Assert(buf_ctrl->is_edp == 0); + *is_edp = (unsigned char)false; + SSUnPinBuffer(buf_desc); + + return GS_SUCCESS; +} + +static int CBConfirmConverting(void *db_handle, char *pageid, unsigned char smon_chk, + unsigned char *lock_mode, unsigned long long *edp_map, unsigned long long *lsn) +{ + BufferDesc *buf_desc = NULL; + bool valid; + dms_buf_ctrl_t *buf_ctrl = NULL; + bool timeout = false; + + *lsn = 0; + *edp_map = 0; + + SSGetBufferDesc(pageid, &valid, &buf_desc); + if (buf_desc == NULL) { + return DMS_ERROR; + } + + if (!valid) { + *lock_mode = (uint8)DMS_LOCK_NULL; + SSUnPinBuffer(buf_desc); + return GS_SUCCESS; + } + + struct timeval begin_tv; + struct timeval now_tv; + (void)gettimeofday(&begin_tv, NULL); + long begin = GET_MS(begin_tv); + long now; + + while (true) { + (void)gettimeofday(&now_tv, NULL); + now = GET_MS(now_tv); + if (now - begin > REFORM_CONFIRM_TIMEOUT) { + timeout = true; + break; + } + + bool is_locked = LWLockConditionalAcquire(buf_desc->content_lock, LW_EXCLUSIVE); + if (is_locked) { + buf_ctrl = GetDmsBufCtrl(buf_desc->buf_id); + *lock_mode = buf_ctrl->lock_mode; + LWLockRelease(buf_desc->content_lock); + break; + } + pg_usleep(REFORM_CONFIRM_INTERVAL); /* sleep 5ms */ + } + + if (!timeout) { + SSUnPinBuffer(buf_desc); + return GS_SUCCESS; + } + + if (smon_chk) { + SSUnPinBuffer(buf_desc); + return GS_TIMEDOUT; + } + + // without lock + buf_ctrl = GetDmsBufCtrl(buf_desc->buf_id); + *lock_mode = buf_ctrl->lock_mode; + + SSUnPinBuffer(buf_desc); + return GS_SUCCESS; +} + +static int CBGetStableList(void *db_handle, unsigned long long *list_stable, unsigned char *reformer_id) +{ + *list_stable = g_instance.dms_cxt.SSReformerControl.list_stable; + *reformer_id = (uint8)g_instance.dms_cxt.SSReformerControl.primaryInstId; + return GS_SUCCESS; +} + +static int CBStartup(void *db_handle) +{ + g_instance.dms_cxt.SSRecoveryInfo.ready_to_startup = true; + return GS_SUCCESS; +} + +static int CBRecoveryStandby(void *db_handle, int inst_id) +{ + Assert(inst_id == g_instance.attr.attr_storage.dms_attr.instance_id); + ereport(LOG, (errmsg("[SS reform] Recovery as standby"))); + + g_instance.dms_cxt.SSRecoveryInfo.skip_redo_replay = true; + if (!SSRecoveryNodes()) { + ereport(WARNING, (errmodule(MOD_DMS), errmsg("Recovery failed in startup first"))); + return GS_ERROR; + } + g_instance.dms_cxt.SSRecoveryInfo.skip_redo_replay = false; + + return GS_SUCCESS; +} + +static int CBRecoveryPrimary(void *db_handle, int inst_id) +{ + Assert(g_instance.dms_cxt.SSReformerControl.primaryInstId == inst_id || + g_instance.dms_cxt.SSReformerControl.primaryInstId == -1); + g_instance.dms_cxt.SSRecoveryInfo.skip_redo_replay = false; + ereport(LOG, (errmsg("[SS reform] Recovery as primary, will replay xlog from inst:%d", + g_instance.dms_cxt.SSReformerControl.primaryInstId))); + + if (!SSRecoveryNodes()) { + ereport(WARNING, (errmodule(MOD_DMS), errmsg("Recovery failed in startup first"))); + return GS_ERROR; + } + + return GS_SUCCESS; +} + +static int CBFlushCopy(void *db_handle, char *pageid) +{ + /* + * 1. request page from remote + * 2. mark page need flush + */ + while (!g_instance.dms_cxt.SSRecoveryInfo.reclsn_updated) { + pg_usleep(100L); /* sleep 0.1ms */ + } + + BufferTag* tag = (BufferTag*)pageid; + Buffer buffer; + + uint32 saveInterruptHoldoffCount = t_thrd.int_cxt.InterruptHoldoffCount; + PG_TRY(); + { + if (IsSegmentPhysicalRelNode(tag->rnode)) { + SegSpace *spc = spc_open(tag->rnode.spcNode, tag->rnode.dbNode, true, false); + buffer = ReadBufferFast(spc, tag->rnode, tag->forkNum, tag->blockNum, RBM_NORMAL); + } else { + buffer = ReadBufferWithoutRelcache(tag->rnode, tag->forkNum, tag->blockNum, RBM_NORMAL, NULL, NULL); + } + } + PG_CATCH(); + { + t_thrd.int_cxt.InterruptHoldoffCount = saveInterruptHoldoffCount; + if (t_thrd.role == DMS_WORKER) { + FlushErrorState(); + } + } + PG_END_TRY(); + + /** + * when remote DB instance reboot, this round reform fail + * primary node may fail to get page from remote node which reboot, this phase should return fail + */ + if (BufferIsInvalid(buffer)) { + if (dms_reform_failed()) { + return GS_ERROR; + } else { + Assert(0); + } + } + + LockBuffer(buffer, BUFFER_LOCK_SHARE); + ereport(LOG, (errmsg("[SS] ready to flush copy, spc/db/rel/bucket fork-block: %u/%u/%u/%d %d-%u", + tag->rnode.spcNode, tag->rnode.dbNode, tag->rnode.relNode, tag->rnode.bucketNode, + tag->forkNum, tag->blockNum))); + MarkBufferDirty(buffer); + UnlockReleaseBuffer(buffer); + return GS_SUCCESS; +} + +static int CBFailoverPromote(void *db_handle) +{ + Assert(g_instance.dms_cxt.SSClusterState == NODESTATE_NORMAL); + g_instance.dms_cxt.SSRecoveryInfo.failover_triggered = true; + g_instance.dms_cxt.SSClusterState = NODESTATE_STANDBY_FAILOVER_PROMOTING; + ereport(LOG, (errmodule(MOD_DMS), errmsg("[SS failover] failover trigger."))); + + SSTriggerFailover(); + while (true) { + if (SSFAILOVER_TRIGGER && g_instance.pid_cxt.StartupPID != 0) { + ereport(LOG, (errmodule(MOD_DMS), errmsg("startup thread success."))); + return GS_SUCCESS; + } + pg_usleep(REFORM_WAIT_TIME); + } +} + +static int CBGetDBPrimaryId(void *db_handle, unsigned int *primary_id) +{ + *primary_id = (unsigned int)g_instance.dms_cxt.SSReformerControl.primaryInstId; + return GS_SUCCESS; +} + +static void CBReformStartNotify(void *db_handle, dms_role_t role) +{ + ss_reform_info_t *reform_info = &g_instance.dms_cxt.SSReformInfo; + reform_info->dms_role = role; + reform_info->in_reform = true; + g_instance.dms_cxt.SSClusterState = NODESTATE_NORMAL; + g_instance.dms_cxt.SSRecoveryInfo.reform_ready = false; + ereport(LOG, (errmodule(MOD_DMS), errmsg("[SS reform] dms reform start, role:%d", role))); + if (reform_info->dms_role == DMS_ROLE_REFORMER) { + if (dss_set_server_status_wrapper(true) != GS_SUCCESS) { + ereport(PANIC, (errmodule(MOD_DMS), errmsg("[SS reform] Could not set dssserver flag=read_write"))); + } + if (!SS_MY_INST_IS_MASTER) { + // means failover + g_instance.dms_cxt.SSRecoveryInfo.reclsn_updated = false; + g_instance.dms_cxt.SSRecoveryInfo.in_failover = true; + } + } else { + if (dss_set_server_status_wrapper(false) != GS_SUCCESS) { + ereport(PANIC, (errmodule(MOD_DMS), errmsg("[SS reform] Could not set dssserver flag=read_only"))); + } + } + + /* cluster has no transactions during startup reform */ + if (!g_instance.dms_cxt.SSRecoveryInfo.startup_reform) { + SendPostmasterSignal(PMSIGNAL_DMS_REFORM); + } + + while (true) { + if (dms_reform_failed()) { + ereport(WARNING, (errmodule(MOD_DMS), errmsg("reform failed during caneling backends"))); + return; + } + if (g_instance.dms_cxt.SSRecoveryInfo.reform_ready || g_instance.dms_cxt.SSRecoveryInfo.startup_reform) { + ereport(LOG, (errmodule(MOD_DMS), errmsg("reform ready, backends have been terminated"))); + return; + } + pg_usleep(REFORM_WAIT_TIME); + } +} + +static int CBSetBufInfo(dms_buf_ctrl_t* buf_ctrl) +{ + Assert(buf_ctrl->buf_id < TOTAL_BUFFER_NUM); + if (buf_ctrl->buf_id >= TOTAL_BUFFER_NUM) { + return DMS_ERROR; + } + + BufferDesc *buf_desc = GetBufferDescriptor(buf_ctrl->buf_id); + buf_desc->lsn_on_disk = buf_ctrl->lsn_on_disk; + buf_desc->seg_fileno = buf_ctrl->seg_fileno; + buf_desc->seg_blockno = buf_ctrl->seg_blockno; + return GS_SUCCESS; +} + +void DmsCallbackThreadShmemInit(unsigned char need_startup, char **reg_data) +{ + IsUnderPostmaster = true; + // to add cnt, avoid postmain execute proc_exit to free shmem now + (void)pg_atomic_add_fetch_u32(&g_instance.dms_cxt.inDmsThreShmemInitCnt, 1); + + // postmain execute proc_exit now, share mem maybe shdmt, exit this thread now. + if (pg_atomic_read_u32(&g_instance.dms_cxt.inProcExitCnt) > 0) { + (void)pg_atomic_sub_fetch_u32(&g_instance.dms_cxt.inDmsThreShmemInitCnt, 1); + ThreadExitCXX(0); + } + EarlyBindingTLSVariables(); + MemoryContextInit(); + knl_thread_init(DMS_WORKER); + *reg_data = (char *)&t_thrd; + t_thrd.fake_session = create_session_context(t_thrd.top_mem_cxt, 0); + t_thrd.fake_session->status = KNL_SESS_FAKE; + u_sess = t_thrd.fake_session; + t_thrd.proc_cxt.MyProcPid = gs_thread_self(); + if (!need_startup) { + t_thrd.proc_cxt.MyProgName = "DMS WORKER"; + } else { + t_thrd.proc_cxt.MyProgName = "DMS REFORM PROC"; + } + t_thrd.proc_cxt.MyStartTime = time(NULL); + + SelfMemoryContext = THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_DEFAULT); + /* memory context will be used by DMS message process functions */ + t_thrd.dms_cxt.msgContext = AllocSetContextCreate(TopMemoryContext, + "DMSWorkerContext", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + /* create timer with thread safe */ + if (gs_signal_createtimer() < 0) { + ereport(FATAL, (errmsg("create timer fail at thread : %lu", t_thrd.proc_cxt.MyProcPid))); + } + CreateLocalSysDBCache(); + InitShmemForDmsCallBack(); + Assert(t_thrd.utils_cxt.CurrentResourceOwner == NULL); + t_thrd.utils_cxt.CurrentResourceOwner = + ResourceOwnerCreate(NULL, "dms worker", THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE)); + SharedInvalBackendInit(false, false); + pgstat_initialize(); + u_sess->attr.attr_common.Log_line_prefix = "\%m \%u \%d \%h \%p \%S "; + log_timezone = g_instance.dms_cxt.log_timezone; + (void)pg_atomic_sub_fetch_u32(&g_instance.dms_cxt.inDmsThreShmemInitCnt, 1); + t_thrd.postgres_cxt.whereToSendOutput = (int)DestNone; +} + +void DmsInitCallback(dms_callback_t *callback) +{ + // used in reform + callback->get_list_stable = CBGetStableList; + callback->save_list_stable = CBSaveStableList; + callback->opengauss_startup = CBStartup; + callback->opengauss_recovery_standby = CBRecoveryStandby; + callback->opengauss_recovery_primary = CBRecoveryPrimary; + callback->get_dms_status = CBGetDmsStatus; + callback->set_dms_status = CBSetDmsStatus; + callback->dms_reform_rebuild_buf_res = CBDrcBufRebuild; + callback->dms_thread_init = DmsCallbackThreadShmemInit; + callback->confirm_owner = CBConfirmOwner; + callback->confirm_converting = CBConfirmConverting; + callback->flush_copy = CBFlushCopy; + callback->get_db_primary_id = CBGetDBPrimaryId; + callback->failover_promote_opengauss = CBFailoverPromote; + callback->reform_start_notify = CBReformStartNotify; + callback->set_buf_info = CBSetBufInfo; + + callback->get_page_hash_val = CBPageHashCode; + callback->read_local_page4transfer = CBEnterLocalPage; + callback->try_read_local_page = CBTryEnterLocalPage; + callback->leave_local_page = CBLeaveLocalPage; + callback->page_is_dirty = CBPageDirty; + callback->get_page = CBGetPage; + callback->set_buf_load_status = CBSetBufLoadStatus; + callback->remove_buf_load_status = CBRemoveBufLoadStatus; + callback->invld_share_copy = CBInvalidatePage; + callback->get_db_handle = CBGetHandle; + callback->display_pageid = CBDisplayBufferTag; + + callback->mem_alloc = CBMemAlloc; + callback->mem_free = CBMemFree; + callback->mem_reset = CBMemReset; + + callback->get_page_lsn = CBGetPageLSN; + callback->get_global_lsn = CBGetGlobalLSN; + callback->log_flush = CBXLogFlush; + callback->process_broadcast = CBProcessBroadcast; + callback->process_broadcast_ack = CBProcessBroadcastAck; + + callback->get_opengauss_xid_csn = CBGetTxnCSN; + callback->get_opengauss_update_xid = CBGetUpdateXid; + callback->get_opengauss_txn_status = CBGetTxnStatus; + callback->opengauss_lock_buffer = CBGetCurrModeAndLockBuffer; + callback->get_opengauss_txn_snapshot = CBGetSnapshotData; + + callback->log_output = DMSWriteNormalLog; + + callback->switchover_demote = CBSwitchoverDemote; + callback->switchover_promote_opengauss = CBSwitchoverPromote; + callback->set_switchover_result = CBSwitchoverResult; + callback->set_db_standby = CBSetDbStandby; + callback->db_is_primary = CBDbIsPrimary; +} diff --git a/src/gausskernel/ddes/adapter/ss_dms_log_output.cpp b/src/gausskernel/ddes/adapter/ss_dms_log_output.cpp new file mode 100644 index 000000000..f0fa94664 --- /dev/null +++ b/src/gausskernel/ddes/adapter/ss_dms_log_output.cpp @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2022 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * ss_dms_log_output.cpp + * write log for dms + * + * + * IDENTIFICATION + * src/gausskernel/ddes/adapter/ss_dms_log_output.cpp + * + * --------------------------------------------------------------------------------------- + */ + +#include +#include +#include + +#include "utils/elog.h" +#include "knl/knl_thread.h" +#include "ddes/dms/ss_dms_log_output.h" + +void DMSLogOutput(uint32 ss_log_level, const char *code_file_name, uint32 code_line_num, char buf[]) +{ + int saved_log_output = t_thrd.postgres_cxt.whereToSendOutput; + if (t_thrd.role == WORKER || t_thrd.role == THREADPOOL_WORKER) { + t_thrd.postgres_cxt.whereToSendOutput = (int)DestNone; + } + int32 log_level; + switch (ss_log_level) { + case LOG_RUN_ERR_LEVEL: + case LOG_DEBUG_ERR_LEVEL: + /* avoid error->fatal, proc_exit() infinite loop and deadlocks */ + log_level = WARNING; + break; + case LOG_RUN_WAR_LEVEL: + case LOG_DEBUG_WAR_LEVEL: + log_level = WARNING; + break; + case LOG_RUN_INF_LEVEL: + case LOG_DEBUG_INF_LEVEL: + log_level = ENABLE_SS_LOG ? LOG : DEBUG1; + break; + default: + log_level = DEBUG1; // it will be DEBUG level later + break; + } + ereport(log_level, (errmodule(MOD_DMS), errmsg("%s:%u %s", code_file_name, code_line_num, buf))); + if (t_thrd.role == WORKER || t_thrd.role == THREADPOOL_WORKER) { + t_thrd.postgres_cxt.whereToSendOutput = saved_log_output; + } +} + +int32 DMSLogLevelCheck(dms_log_id_t dms_log_id, dms_log_level_t dms_log_level, uint32 *log_level) +{ + static uint32 db_log_map[DMS_LOG_ID_COUNT][DMS_LOG_LEVEL_COUNT] = { + {LOG_RUN_ERR_LEVEL, LOG_RUN_WAR_LEVEL, LOG_RUN_INF_LEVEL}, + {LOG_DEBUG_ERR_LEVEL, LOG_DEBUG_WAR_LEVEL, LOG_DEBUG_INF_LEVEL} + }; + + if (dms_log_id >= DMS_LOG_ID_COUNT || dms_log_level >= DMS_LOG_LEVEL_COUNT) { + return -1; + } + *log_level = db_log_map[dms_log_id][dms_log_level]; + return 0; +} \ No newline at end of file diff --git a/src/gausskernel/ddes/adapter/ss_dms_recovery.cpp b/src/gausskernel/ddes/adapter/ss_dms_recovery.cpp new file mode 100644 index 000000000..00f43ee8a --- /dev/null +++ b/src/gausskernel/ddes/adapter/ss_dms_recovery.cpp @@ -0,0 +1,319 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * ss_dms_recovery.cpp + * Provide common interface for recovery within DMS reform process. + * + * IDENTIFICATION + * src/gausskernel/ddes/adapter/ss_dms_recovery.cpp + * + * --------------------------------------------------------------------------------------- + */ +#include "postgres.h" +#include "access/xlog.h" +#include "access/xact.h" +#include "access/multi_redo_api.h" +#include "storage/standby.h" +#include "storage/pmsignal.h" +#include "storage/buf/bufmgr.h" +#include "storage/dss/fio_dss.h" +#include "storage/smgr/fd.h" +#include "storage/smgr/segment.h" +#include "postmaster/postmaster.h" +#include "storage/file/fio_device.h" +#include "ddes/dms/ss_dms_bufmgr.h" +#include "ddes/dms/ss_dms_recovery.h" +#include "ddes/dms/ss_reform_common.h" +#include "access/double_write.h" +#include +#include +#include +#include +#include + +int SSGetPrimaryInstId() +{ + return g_instance.dms_cxt.SSReformerControl.primaryInstId; +} + +void SSSavePrimaryInstId(int id) +{ + g_instance.dms_cxt.SSReformerControl.primaryInstId = id; + SSSaveReformerCtrl(); +} + +/* + * Wake up startup process to replay WAL, or to notice that + * failover has been requested. + */ +void SSWakeupRecovery(void) +{ + g_instance.dms_cxt.SSRecoveryInfo.recovery_pause_flag = false; +} + +bool SSRecoveryNodes() +{ + bool result = false; + + if (t_thrd.shemem_ptr_cxt.XLogCtl->IsRecoveryDone && + t_thrd.shemem_ptr_cxt.ControlFile->state == DB_IN_PRODUCTION) { + result = true; + } else { + /* Release my own lock before recovery */ + SSLockReleaseAll(); + SSWakeupRecovery(); + while (true) { + if (dms_reform_failed()) { + result = false; + break; + } + if (t_thrd.shemem_ptr_cxt.XLogCtl->IsRecoveryDone && + t_thrd.shemem_ptr_cxt.ControlFile->state == DB_IN_PRODUCTION) { + result = true; + break; + } + pg_usleep(REFORM_WAIT_TIME); + } + } + + return result; +} + +bool SSRecoveryApplyDelay(const XLogReaderState *record) +{ + if (!ENABLE_REFORM) { + return false; + } + + while (g_instance.dms_cxt.SSRecoveryInfo.recovery_pause_flag) { + /* might change the trigger file's location */ + RedoInterruptCallBack(); + + pg_usleep(REFORM_WAIT_TIME); + } + + return true; +} + +void SSReadControlFile(int id) +{ + pg_crc32c crc; + errno_t rc = EOK; + int fd = -1; + char *fname = NULL; + bool retry = false; + int read_size = 0; + int len = 0; + fname = XLOG_CONTROL_FILE; + +loop: + fd = BasicOpenFile(fname, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR); + if (fd < 0) { + ereport(FATAL, (errcode_for_file_access(), errmsg("could not open control file \"%s\": %m", fname))); + } + + off_t seekpos = (off_t)BLCKSZ * id; + + if (id == REFORM_CTRL_PAGE) { + len = sizeof(ss_reformer_ctrl_t); + } else { + len = sizeof(ControlFileData); + } + + read_size = (int)BUFFERALIGN(len); + char buffer[read_size] __attribute__((__aligned__(ALIGNOF_BUFFER))); + if (pread(fd, buffer, read_size, seekpos) != read_size) { + ereport(PANIC, (errcode_for_file_access(), errmsg("could not read from control file: %m"))); + } + + if (id == REFORM_CTRL_PAGE) { + rc = memcpy_s(&g_instance.dms_cxt.SSReformerControl, len, buffer, len); + securec_check(rc, "", ""); + if (close(fd) < 0) { + ereport(PANIC, (errcode_for_file_access(), errmsg("could not close control file: %m"))); + } + + /* Now check the CRC. */ + INIT_CRC32C(crc); + COMP_CRC32C(crc, (char *)&g_instance.dms_cxt.SSReformerControl, offsetof(ss_reformer_ctrl_t, crc)); + FIN_CRC32C(crc); + + if (!EQ_CRC32C(crc, g_instance.dms_cxt.SSReformerControl.crc)) { + if (retry == false) { + ereport(WARNING, (errmsg("control file \"%s\" contains incorrect checksum, try backup file", fname))); + fname = XLOG_CONTROL_FILE_BAK; + retry = true; + goto loop; + } else { + ereport(FATAL, (errmsg("incorrect checksum in control file"))); + } + } + } else { + rc = memcpy_s(t_thrd.shemem_ptr_cxt.ControlFile, (size_t)len, buffer, (size_t)len); + securec_check(rc, "", ""); + if (close(fd) < 0) { + ereport(PANIC, (errcode_for_file_access(), errmsg("could not close control file: %m"))); + } + + /* Now check the CRC. */ + INIT_CRC32C(crc); + COMP_CRC32C(crc, (char *)t_thrd.shemem_ptr_cxt.ControlFile, offsetof(ControlFileData, crc)); + FIN_CRC32C(crc); + + if (!EQ_CRC32C(crc, t_thrd.shemem_ptr_cxt.ControlFile->crc)) { + if (retry == false) { + ereport(WARNING, (errmsg("control file \"%s\" contains incorrect checksum, try backup file", fname))); + fname = XLOG_CONTROL_FILE_BAK; + retry = true; + goto loop; + } else { + ereport(FATAL, (errmsg("incorrect checksum in control file"))); + } + } + } +} + +/* initialize reformer ctrl parameter when initdb */ +void SSWriteReformerControlPages(void) +{ + /* + * If already exists control file, reformer page must have been initialized + */ + if (dss_exist_file(XLOG_CONTROL_FILE)) { + SSReadControlFile(REFORM_CTRL_PAGE); + if (g_instance.dms_cxt.SSReformerControl.list_stable != 0 || + g_instance.dms_cxt.SSReformerControl.primaryInstId == SS_MY_INST_ID) { + (void)printf("[SS] ERROR: files from last install must be cleared.\n"); + ereport(PANIC, (errmsg("Files from last initdb not cleared"))); + } + (void)printf("[SS] Current node:%d acknowledges cluster PRIMARY node:%d.\n", + SS_MY_INST_ID, g_instance.dms_cxt.SSReformerControl.primaryInstId); + return; + } + + int fd = -1; + char buffer[PG_CONTROL_SIZE] __attribute__((__aligned__(ALIGNOF_BUFFER))); /* need to be aligned */ + errno_t errorno = EOK; + + /* + * Initialize list_stable and primaryInstId + * First node to initdb is chosen as primary for now, and for first-time cluster startup. + */ + Assert(!dss_exist_file(XLOG_CONTROL_FILE)); + g_instance.dms_cxt.SSReformerControl.list_stable = 0; + g_instance.dms_cxt.SSReformerControl.primaryInstId = SS_MY_INST_ID; + (void)printf("[SS] Current node:%d initdb first, will become PRIMARY for first-time SS cluster startup.\n", + SS_MY_INST_ID); + + /* Contents are protected with a CRC */ + INIT_CRC32C(g_instance.dms_cxt.SSReformerControl.crc); + COMP_CRC32C(g_instance.dms_cxt.SSReformerControl.crc, (char *)&g_instance.dms_cxt.SSReformerControl, + offsetof(ss_reformer_ctrl_t, crc)); + FIN_CRC32C(g_instance.dms_cxt.SSReformerControl.crc); + + if (sizeof(ss_reformer_ctrl_t) > PG_CONTROL_SIZE) { + ereport(PANIC, (errmsg("sizeof(ControlFileData) is larger than PG_CONTROL_SIZE; fix either one"))); + } + + errorno = memset_s(buffer, PG_CONTROL_SIZE, 0, PG_CONTROL_SIZE); + securec_check(errorno, "", ""); + + errorno = memcpy_s(buffer, PG_CONTROL_SIZE, &g_instance.dms_cxt.SSReformerControl, sizeof(ss_reformer_ctrl_t)); + securec_check(errorno, "", ""); + + fd = BasicOpenFile(XLOG_CONTROL_FILE, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, S_IRUSR | S_IWUSR); + if (fd < 0) { + ereport(PANIC, + (errcode_for_file_access(), errmsg("could not create control file \"%s\": %m", XLOG_CONTROL_FILE))); + } + + SSWriteInstanceControlFile(fd, buffer, REFORM_CTRL_PAGE, PG_CONTROL_SIZE); + if (close(fd)) { + ereport(PANIC, (errcode_for_file_access(), errmsg("could not close control file: %m"))); + } +} + +void SSTriggerFailover() +{ + if (g_instance.dms_cxt.SSRecoveryInfo.startup_reform) { + g_instance.dms_cxt.SSRecoveryInfo.restart_failover_flag = true; + ereport(LOG, (errmodule(MOD_DMS), errmsg("[SS failover] do failover when DB restart."))); + } else { + SendPostmasterSignal(PMSIGNAL_DMS_TRIGGERFAILOVER); + ereport(LOG, (errmodule(MOD_DMS), errmsg("[SS failover] do failover when DB alive"))); + } +} + +void SShandle_promote_signal() +{ + volatile XLogCtlData *xlogctl = t_thrd.shemem_ptr_cxt.XLogCtl; + SpinLockAcquire(&xlogctl->info_lck); + xlogctl->IsRecoveryDone = false; + xlogctl->SharedRecoveryInProgress = true; + SpinLockRelease(&xlogctl->info_lck); + + t_thrd.shemem_ptr_cxt.ControlFile->state = DB_IN_CRASH_RECOVERY; + pg_memory_barrier(); + + if (pmState == PM_WAIT_BACKENDS) { + g_instance.pid_cxt.StartupPID = initialize_util_thread(STARTUP); + Assert(g_instance.pid_cxt.StartupPID != 0); + pmState = PM_STARTUP; + } + + ereport(LOG, (errmodule(MOD_DMS), errmsg("[SS failover] begin startup."))); +} + + +void ss_failover_dw_init_internal() +{ + /* + * step 1: remove self dw file dw_exit close self dw + * step 2: load old primary dw ,and finish dw recovery, exit + * step 3: rebuild dw file and init self dw + */ + + char *dssdir = g_instance.attr.attr_storage.dss_attr.ss_dss_vg_name; + int old_primary_id = g_instance.dms_cxt.SSReformerControl.primaryInstId; + int self_id = g_instance.attr.attr_storage.dms_attr.instance_id; + if (!g_instance.dms_cxt.SSRecoveryInfo.startup_reform) { + dw_exit(true); + dw_exit(false); + } + + ss_initdwsubdir(dssdir, old_primary_id); + dw_ext_init(); + dw_init(); + dw_exit(true); + dw_exit(false); + + ss_initdwsubdir(dssdir, self_id); + dw_ext_init(); + dw_init(); + ereport(LOG, (errmodule(MOD_DMS), errmsg("[SS failover] dw init finish"))); +} + +void ss_failover_dw_init() +{ + for (int i = 0; i < g_instance.ckpt_cxt_ctl->pgwr_procs.num; i++) { + if (g_instance.pid_cxt.PageWriterPID[i] != 0) { + signal_child(g_instance.pid_cxt.PageWriterPID[i], SIGTERM, -1); + } + } + ckpt_shutdown_pagewriter(); + + ss_failover_dw_init_internal(); +} + diff --git a/src/gausskernel/ddes/adapter/ss_init.cpp b/src/gausskernel/ddes/adapter/ss_init.cpp new file mode 100644 index 000000000..615feb882 --- /dev/null +++ b/src/gausskernel/ddes/adapter/ss_init.cpp @@ -0,0 +1,423 @@ +/* + * Copyright (c) 2022 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * ss_init.cpp + * initialize for DMS shared storage. + * + * + * IDENTIFICATION + * src/gausskernel/ddes/adapter/ss_init.cpp + * + * --------------------------------------------------------------------------------------- + */ + +#include "utils/builtins.h" +#include +#include +#include "utils/palloc.h" +#include "utils/memutils.h" +#include "utils/elog.h" +#include "knl/knl_instance.h" +#include "securec.h" +#include "nodes/pg_list.h" +#include "storage/buf/bufmgr.h" +#include "ddes/dms/ss_init.h" +#include "ddes/dms/ss_dms_callback.h" +#include "ddes/dms/ss_dms.h" +#include "ddes/dms/ss_reform_common.h" +#include "postmaster/postmaster.h" + +#define FIXED_NUM_OF_INST_IP_PORT 3 +#define BYTES_PER_KB 1024 + + +const int MAX_CPU_STR_LEN = 5; +const int DEFAULT_DIGIT_RADIX = 10; +static void scanURL(dms_profile_t* profile, char* ipportstr, int index) +{ + List* l = NULL; + /* syntax: inst_id:ip:port */ + if (!SplitIdentifierString(ipportstr, ':', &l) || list_length(l) != FIXED_NUM_OF_INST_IP_PORT) { + ereport(FATAL, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid ip:port syntax %s", ipportstr))); + } + + errno_t ret; + char* ipstr = (char*)lsecond(l); + char* portstr = (char*)lthird(l); + ret = strncpy_s(profile->inst_net_addr[index].ip, DMS_MAX_IP_LEN, ipstr, strlen(ipstr)); + if (ret != EOK) { + ereport(FATAL, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid ip string: %s", ipstr))); + } + profile->inst_net_addr[index].port = (uint16)pg_strtoint16(portstr); + profile->inst_map |= ((uint64)1 << index); + + return; +} + +static void scanURLList(dms_profile_t* profile, List* l) +{ + char* ipport = NULL; + ListCell* cell = NULL; + int i = 0; + + foreach(cell, l) { + ipport = (char*)lfirst(cell); + scanURL(profile, ipport, i++); + } + profile->inst_cnt = (unsigned int)i; +} + +static void parseInternalURL(dms_profile_t *profile) +{ + List* l = NULL; + char* rawstring = g_instance.attr.attr_storage.dms_attr.interconnect_url; + char* copystring = pstrdup(rawstring); + /* syntax: inst_id0:ip0:port0, inst_id1:ip1:port1, ... */ + if (!SplitIdentifierString(copystring, ',', &l)) { + ereport(FATAL, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid list syntax for \"ss_interconnect_url\""))); + } + + if (list_length(l) == 0 || list_length(l) > DMS_MAX_INSTANCE) { + ereport(FATAL, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("too many ip:port list for \"ss_interconnect_url\""))); + } + + scanURLList(profile, l); +} + +static inline dms_conn_mode_t convertInterconnectType() +{ + if (!strcasecmp(g_instance.attr.attr_storage.dms_attr.interconnect_type, "TCP")) { + return DMS_CONN_MODE_TCP; + } else { + return DMS_CONN_MODE_RDMA; + } +} + +static void SetDmsParam(const char *dmsParamName, const char *dmsParamValue) +{ + if (dms_set_ssl_param(dmsParamName, dmsParamValue) != 0) + ereport(WARNING, (errmsg("Failed to set DMS %s: %s.", dmsParamName, dmsParamValue))); +} + +static void InitDmsSSL() +{ + char *parentdir = NULL; + KeyMode keymode = SERVER_MODE; + if (is_absolute_path(g_instance.attr.attr_security.ssl_key_file)) { + parentdir = pstrdup(g_instance.attr.attr_security.ssl_key_file); + get_parent_directory(parentdir); + decode_cipher_files(keymode, NULL, parentdir, u_sess->libpq_cxt.server_key); + } else { + decode_cipher_files(keymode, NULL, t_thrd.proc_cxt.DataDir, u_sess->libpq_cxt.server_key); + parentdir = pstrdup(t_thrd.proc_cxt.DataDir); + } + pfree_ext(parentdir); + + /* never give a change to log it */ + dms_set_ssl_param("SSL_PWD_PLAINTEXT", reinterpret_cast(u_sess->libpq_cxt.server_key)); + /* clear the sensitive info in server_key */ + errno_t errorno = EOK; + errorno = memset_s(u_sess->libpq_cxt.server_key, CIPHER_LEN + 1, 0, CIPHER_LEN + 1); + securec_check(errorno, "\0", "\0"); + + char ssl_file_path[PATH_MAX + 1] = {0}; + if (NULL != realpath(g_instance.attr.attr_security.ssl_ca_file, ssl_file_path)) { + SetDmsParam("SSL_CA", ssl_file_path); + } + + errorno = memset_s(ssl_file_path, PATH_MAX + 1, 0, PATH_MAX + 1); + securec_check(errorno, "\0", "\0"); + + if (NULL != realpath(g_instance.attr.attr_security.ssl_key_file, ssl_file_path)) { + SetDmsParam("SSL_KEY", ssl_file_path); + } + + errorno = memset_s(ssl_file_path, PATH_MAX + 1, 0, PATH_MAX + 1); + securec_check(errorno, "\0", "\0"); + + if (NULL != realpath(g_instance.attr.attr_security.ssl_crl_file, ssl_file_path)) { + SetDmsParam("SSL_CRL", ssl_file_path); + } + + errorno = memset_s(ssl_file_path, PATH_MAX + 1, 0, PATH_MAX + 1); + securec_check(errorno, "\0", "\0"); + + if (NULL != realpath(g_instance.attr.attr_security.ssl_cert_file, ssl_file_path)) { + SetDmsParam("SSL_CERT", ssl_file_path); + } + + /* to limit line width */ + int dms_guc_param = u_sess->attr.attr_security.ssl_cert_notify_time; + SetDmsParam("SSL_CERT_NOTIFY_TIME", std::to_string(dms_guc_param).c_str()); +} + +static void splitDigitNumber(char *str, char *output, uint32 outputLen, uint32* len) +{ + if (str == NULL) { + output[0] = '\0'; + return; + } + + uint32_t start = 0; + while (*str != '\0' && *str == ' ') { + ++start; + ++str; + } + + if (*str == '\0') { + output[0] = '\0'; + return; + } + + char* startPtr = str; + uint32 idx = 0; + while (*str != '\0' && *str != ' ') { + if (*str >= '0' && *str <= '9') { + ++str; + ++idx; + } else { + output[0] = '\0'; + return; + } + } + + // if the count of digit number is larger than outputLen, it is out of range + if (idx >= outputLen) { + output[0] = '\0'; + return; + } + int ret = strncpy_s(output, outputLen, startPtr, idx); + securec_check_c(ret, "\0", "\0"); + *len = start + idx; +} + +bool is_err(char *err) +{ + if (err == NULL) { + return false; + } + + while (*err != '\0') { + if (*err != ' ') { + return true; + } + err++; + } + + return false; +} + +static void setRdmaWorkConfig(dms_profile_t *profile) +{ + knl_instance_attr_dms *dms_attr = &g_instance.attr.attr_storage.dms_attr; + char lowStr[MAX_CPU_STR_LEN] = {0}; + char highStr[MAX_CPU_STR_LEN] = {0}; + uint32_t offset = 0; + profile->rdma_rpc_use_busypoll = false; + profile->rdma_rpc_is_bind_core = false; + if (dms_attr->rdma_work_config == NULL || dms_attr->rdma_work_config[0] == '\0') { + return; + } + + // if number >= MAX_CPU_STR_LEN, it exceeded the number of CPUs. + splitDigitNumber(dms_attr->rdma_work_config, lowStr, MAX_CPU_STR_LEN, &offset); + splitDigitNumber(dms_attr->rdma_work_config + offset, highStr, MAX_CPU_STR_LEN, &offset); + if (lowStr[0] != '\0' && highStr[0] != '\0') { + // if number of decimal digits is less than DEFAULT_DIGIT_RADIX(5), The number range must be within Int64. + char *err = NULL; + int64 lowCpu = strtoll(lowStr, &err, DEFAULT_DIGIT_RADIX); + int64 highCpu = strtoll(highStr, &err, DEFAULT_DIGIT_RADIX); + if (lowCpu > highCpu) { + return; + } + + // get cpu count + int64 cpuCount = get_nprocs_conf(); + if (lowCpu >= cpuCount || highCpu >= cpuCount) { + return; + } + + profile->rdma_rpc_use_busypoll = true; + profile->rdma_rpc_is_bind_core = true; + profile->rdma_rpc_bind_core_start = (uint8)lowCpu; + profile->rdma_rpc_bind_core_end = (uint8)highCpu; + } +} + +static void SetOckLogPath(knl_instance_attr_dms* dms_attr, char *ock_log_path) +{ + int ret = memset_s(ock_log_path, DMS_OCK_LOG_PATH_LEN, 0, DMS_OCK_LOG_PATH_LEN); + securec_check_c(ret, "\0", "\0"); + int len = strlen(dms_attr->ock_log_path); + char realPath[PATH_MAX + 1] = {0}; + if (len == 0) { + char* loghome = gs_getenv_r("GAUSSLOG"); + if (loghome && '\0' != loghome[0]) { + check_backend_env(loghome); + if (realpath(loghome, realPath) == NULL) { + ereport(FATAL, (errmsg("failed to realpath $GAUSSLOG/pg_log"))); + ock_log_path[0] = '.'; + return; + } + ret = snprintf_s(ock_log_path, DMS_OCK_LOG_PATH_LEN, DMS_OCK_LOG_PATH_LEN - 1, "%s/pg_log", realPath); + securec_check_ss(ret, "", ""); + // ock_log_path not exist, create ock_log_path path + if (0 != pg_mkdir_p(ock_log_path, S_IRWXU) && errno != EEXIST) { + ereport(FATAL, (errmsg("failed to mkdir $GAUSSLOG/pg_log"))); + return; + } + return; + } else { + ock_log_path[0] = '.'; + } + } else { + check_backend_env(dms_attr->ock_log_path); + if (realpath(dms_attr->ock_log_path, realPath) == NULL) { + ereport(FATAL, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Failed to realpath config param ss_ock_log_path"))); + ock_log_path[0] = '.'; + return; + } + ret = strncpy_s(ock_log_path, DMS_OCK_LOG_PATH_LEN, realPath, strlen(realPath)); + securec_check_c(ret, "\0", "\0"); + + // ock_log_path not exist, create ock_log_path path + if (0 != pg_mkdir_p(ock_log_path, S_IRWXU) && errno != EEXIST) { + ereport(FATAL, (errmsg("failed to mkdir ss_ock_log_path"))); + return; + } + } +} + +static void setDMSProfile(dms_profile_t* profile) +{ + knl_instance_attr_dms* dms_attr = &g_instance.attr.attr_storage.dms_attr; + profile->resource_catalog_centralized = (unsigned int)dms_attr->enable_catalog_centralized; + profile->inst_id = (uint32)dms_attr->instance_id; + profile->page_size = BLCKSZ; + profile->data_buffer_size = (unsigned long long)((int64)TOTAL_BUFFER_NUM * BLCKSZ); + profile->recv_msg_buf_size = (unsigned long long)((int64)dms_attr->recv_msg_pool_size * BYTES_PER_KB); + profile->channel_cnt = (uint32)dms_attr->channel_count; + profile->work_thread_cnt = (uint32)dms_attr->work_thread_count; + profile->max_session_cnt = DMS_MAX_SESSIONS; + profile->time_stat_enabled = FALSE; + profile->pipe_type = convertInterconnectType(); + profile->conn_created_during_init = TRUE; + setRdmaWorkConfig(profile); + SetOckLogPath(dms_attr, profile->ock_log_path); + profile->inst_map = 0; + profile->enable_reform = (unsigned char)dms_attr->enable_reform; + profile->load_balance_mode = 1; /* primary-standby */ + + if (dms_attr->enable_ssl && g_instance.attr.attr_security.EnableSSL) { + InitDmsSSL(); + } + parseInternalURL(profile); + + /* some callback initialize */ + DmsInitCallback(&profile->callback); +} + +void DMSInit() +{ + if (ss_dms_func_init() != DMS_SUCCESS) { + ereport(FATAL, (errmsg("failed to init dms library"))); + } + if (dms_register_thread_init(DmsCallbackThreadShmemInit)) { + ereport(FATAL, (errmsg("failed to register dms memcxt callback!"))); + } + + dms_profile_t profile; + errno_t rc = memset_s(&profile, sizeof(dms_profile_t), 0, sizeof(dms_profile_t)); + securec_check(rc, "\0", "\0"); + setDMSProfile(&profile); + + g_instance.dms_cxt.log_timezone = u_sess->attr.attr_common.log_timezone; + + if (dms_init(&profile) != DMS_SUCCESS) { + int32 err; + const char *msg = NULL; + dms_get_error(&err, &msg); + ereport(FATAL, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("failed to initialize dms, errno: %d, reason: %s", err, msg))); + } + g_instance.dms_cxt.dmsInited = true; +} + +void DMSUninit() +{ + if (!ENABLE_DMS || !g_instance.dms_cxt.dmsInited) { + return; + } + + g_instance.dms_cxt.dmsInited = false; + ereport(LOG, (errmsg("DMS uninit worker threads, DRC, errdesc and DL"))); + dms_uninit(); +} + +int32 DMSWaitReform() +{ + uint32 has_offline; /* currently not used in openGauss */ + return dms_wait_reform(&has_offline); +} + + +static bool DMSReformCheckStartup() +{ + if (g_instance.dms_cxt.SSRecoveryInfo.ready_to_startup) { + g_instance.pid_cxt.StartupPID = initialize_util_thread(STARTUP); + Assert(g_instance.pid_cxt.StartupPID != 0); + pmState = PM_STARTUP; + g_instance.dms_cxt.SSRecoveryInfo.ready_to_startup = false; + return true; + } + + if (g_instance.dms_cxt.SSRecoveryInfo.restart_failover_flag) { + g_instance.dms_cxt.SSRecoveryInfo.restart_failover_flag = false; + SSRestartFailoverPromote(); + return true; + } + return false; +} + +bool DMSWaitInitStartup() +{ + ereport(LOG, (errmsg("[SS reform] Node:%d first-round reform wait to initialize startup thread.", SS_MY_INST_ID))); + g_instance.dms_cxt.dms_status = (dms_status_t)DMS_STATUS_JOIN; + + while (g_instance.pid_cxt.StartupPID == 0) { + (void)DMSReformCheckStartup(); + if (dms_reform_last_failed()) { + return false; + } + pg_usleep(REFORM_WAIT_TIME); + } + + if (g_instance.pid_cxt.StartupPID != 0) { + ereport(LOG, (errmsg("[SS reform] Node:%d initialize startup thread success.", SS_MY_INST_ID))); + } + + return true; +} \ No newline at end of file diff --git a/src/gausskernel/ddes/adapter/ss_reform_common.cpp b/src/gausskernel/ddes/adapter/ss_reform_common.cpp new file mode 100644 index 000000000..3b5e5293f --- /dev/null +++ b/src/gausskernel/ddes/adapter/ss_reform_common.cpp @@ -0,0 +1,272 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2021-2022. All rights reserved. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * ss_reform_common.cpp + * common methods for crash recovery, switchover and failover. + * + * + * IDENTIFICATION + * src/gausskernel/ddes/adapter/ss_init.cpp + * + * --------------------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "access/xlog.h" +#include "postmaster/postmaster.h" +#include "storage/smgr/fd.h" +#include "storage/dss/fio_dss.h" +#include "ddes/dms/ss_dms.h" +#include "ddes/dms/ss_common_attr.h" +#include "ddes/dms/ss_dms_bufmgr.h" +#include "ddes/dms/ss_reform_common.h" +#include "storage/file/fio_device.h" +#include "storage/smgr/segment_internal.h" + +/* + * Add xlog reader private structure for page read. + */ +typedef struct XLogPageReadPrivate { + int emode; + bool fetching_ckpt; /* are we fetching a checkpoint record? */ + bool randAccess; +} XLogPageReadPrivate; + +static int SSXLogFileReadAnyTLI(XLogSegNo segno, int emode, uint32 sources, char* xlog_path) +{ + char path[MAXPGPATH]; + ListCell *cell = NULL; + int fd = -1; + errno_t errorno = EOK; + + foreach (cell, t_thrd.xlog_cxt.expectedTLIs) { + TimeLineID tli = (TimeLineID)lfirst_int(cell); + if (tli < t_thrd.xlog_cxt.curFileTLI) { + break; /* don't bother looking at too-old TLIs */ + } + + errorno = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "%s/%08X%08X%08X", xlog_path, tli, + (uint32)((segno) / XLogSegmentsPerXLogId), (uint32)((segno) % XLogSegmentsPerXLogId)); + securec_check_ss(errorno, "", ""); + t_thrd.xlog_cxt.restoredFromArchive = false; + + fd = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0); + if (fd >= 0) { + /* Success! */ + t_thrd.xlog_cxt.curFileTLI = tli; + + /* Track source of data in assorted state variables */ + t_thrd.xlog_cxt.readSource = sources; + t_thrd.xlog_cxt.XLogReceiptSource = (int)sources; + + /* In FROM_STREAM case, caller tracks receipt time, not me */ + if (sources != XLOG_FROM_STREAM) { + t_thrd.xlog_cxt.XLogReceiptTime = GetCurrentTimestamp(); + } + + return fd; + } + if (FILE_POSSIBLY_DELETED(errno)) { /* unexpected failure? */ + ereport(PANIC, (errcode_for_file_access(), errmsg("could not open file \"%s\" (log segment %s): %m", path, + XLogFileNameP(t_thrd.xlog_cxt.ThisTimeLineID, segno)))); + } + } + + return -1; +} + +static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr) +{ + if (t_thrd.xlog_cxt.readSource == XLOG_FROM_PG_XLOG && emode == LOG) { + if (XLByteEQ(RecPtr, t_thrd.xlog_cxt.lastComplaint)) { + emode = DEBUG1; + } else { + t_thrd.xlog_cxt.lastComplaint = RecPtr; + } + } + return emode; +} + +static int SSReadXLog(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int expectReadLen, + char *buf, TimeLineID *readTLI, char* xlog_path) +{ + /* Load reader private data */ + XLogPageReadPrivate *readprivate = (XLogPageReadPrivate *)xlogreader->private_data; + int emode = readprivate->emode; + bool randAccess = readprivate->randAccess; + uint32 targetPageOff; + +#ifdef USE_ASSERT_CHECKING + XLogSegNo targetSegNo; + + XLByteToSeg(targetPagePtr, targetSegNo); +#endif + targetPageOff = targetPagePtr % XLogSegSize; + + /* + * See if we need to switch to a new segment because the requested record + * is not in the currently open one. + */ + if (t_thrd.xlog_cxt.readFile >= 0 && !XLByteInSeg(targetPagePtr, t_thrd.xlog_cxt.readSegNo)) { + close(t_thrd.xlog_cxt.readFile); + t_thrd.xlog_cxt.readFile = -1; + t_thrd.xlog_cxt.readSource = 0; + } + + XLByteToSeg(targetPagePtr, t_thrd.xlog_cxt.readSegNo); + + /* In archive or crash recovery. */ + if (t_thrd.xlog_cxt.readFile < 0) { + uint32 sources; + + /* Reset curFileTLI if random fetch. */ + if (randAccess) { + t_thrd.xlog_cxt.curFileTLI = 0; + } + + sources = XLOG_FROM_PG_XLOG; + if (t_thrd.xlog_cxt.InArchiveRecovery) { + sources |= XLOG_FROM_ARCHIVE; + } + + t_thrd.xlog_cxt.readFile = SSXLogFileReadAnyTLI(t_thrd.xlog_cxt.readSegNo, emode, sources, xlog_path); + + if (t_thrd.xlog_cxt.readFile < 0) { + return -1; + } + } + + /* + * At this point, we have the right segment open and if we're streaming we + * know the requested record is in it. + */ + Assert(t_thrd.xlog_cxt.readFile != -1); + + /* read size for XLOG_FROM_PG_XLOG */ + t_thrd.xlog_cxt.readLen = XLOG_BLCKSZ; + + /* Read the requested page */ + t_thrd.xlog_cxt.readOff = targetPageOff; + +try_again: + ssize_t actualBytes = pread(t_thrd.xlog_cxt.readFile, buf, XLOG_BLCKSZ, t_thrd.xlog_cxt.readOff); + if (actualBytes != XLOG_BLCKSZ) { + ereport(LOG, (errcode_for_file_access(), errmsg("read xlog(start:%X/%X, pos:%u len:%d) failed : %m", + static_cast(targetPagePtr >> BIT_NUM_INT32), + static_cast(targetPagePtr), targetPageOff, + expectReadLen))); + + ereport(emode_for_corrupt_record(emode, targetPagePtr), + (errcode_for_file_access(), + errmsg("could not read from log file %s to offset %u: %m", + XLogFileNameP(t_thrd.xlog_cxt.ThisTimeLineID, t_thrd.xlog_cxt.readSegNo), + t_thrd.xlog_cxt.readOff))); + if (errno == EINTR) { + errno = 0; + pg_usleep(REFORM_WAIT_TIME); + goto try_again; + } + + goto next_record_is_invalid; + } + + Assert(targetSegNo == t_thrd.xlog_cxt.readSegNo); + Assert(targetPageOff == t_thrd.xlog_cxt.readOff); + Assert((uint32)expectReadLen <= t_thrd.xlog_cxt.readLen); + + *readTLI = t_thrd.xlog_cxt.curFileTLI; + + return (int)t_thrd.xlog_cxt.readLen; + +next_record_is_invalid: + t_thrd.xlog_cxt.failedSources |= t_thrd.xlog_cxt.readSource; + + if (t_thrd.xlog_cxt.readFile >= 0) { + close(t_thrd.xlog_cxt.readFile); + } + t_thrd.xlog_cxt.readFile = -1; + t_thrd.xlog_cxt.readLen = 0; + t_thrd.xlog_cxt.readSource = 0; + + return -1; +} + +int SSXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen, + XLogRecPtr targetRecPtr, char *readBuf, TimeLineID *readTLI, char* xlog_path) +{ + int read_len = SSReadXLog(xlogreader, targetPagePtr, Max(XLOG_BLCKSZ, reqLen), readBuf, + readTLI, g_instance.dms_cxt.SSRecoveryInfo.recovery_xlogDir); + return read_len; +} + +void SSGetXlogPath() +{ + int primaryId = -1; + errno_t rc = EOK; + char *dssdir = g_instance.attr.attr_storage.dss_attr.ss_dss_vg_name; + + /* get primary inst id */ + primaryId = SSGetPrimaryInstId(); + + rc = snprintf_s(g_instance.dms_cxt.SSRecoveryInfo.recovery_xlogDir, MAXPGPATH, MAXPGPATH - 1, "%s/pg_xlog%d", + dssdir, primaryId); + securec_check_ss(rc, "", ""); +} + +void SSSaveReformerCtrl() +{ + int fd = -1; + int len; + errno_t err = EOK; + char *fname[2]; + len = sizeof(ss_reformer_ctrl_t); + + int write_size = (int)BUFFERALIGN(len); + char buffer[write_size] __attribute__((__aligned__(ALIGNOF_BUFFER))) = { 0 }; + + err = memcpy_s(&buffer, write_size, &g_instance.dms_cxt.SSReformerControl, len); + securec_check(err, "\0", "\0"); + + INIT_CRC32C(((ss_reformer_ctrl_t *)buffer)->crc); + COMP_CRC32C(((ss_reformer_ctrl_t *)buffer)->crc, (char *)buffer, offsetof(ss_reformer_ctrl_t, crc)); + FIN_CRC32C(((ss_reformer_ctrl_t *)buffer)->crc); + + fname[0] = XLOG_CONTROL_FILE_BAK; + fname[1] = XLOG_CONTROL_FILE; + + for (int i = 0; i < BAK_CTRL_FILE_NUM; i++) { + if (i == 0) { + fd = BasicOpenFile(fname[i], O_CREAT | O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR); + } else { + fd = BasicOpenFile(fname[i], O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR); + } + + if (fd < 0) { + ereport(FATAL, (errcode_for_file_access(), errmsg("could not open control file \"%s\": %m", fname[i]))); + } + + SSWriteInstanceControlFile(fd, buffer, REFORM_CTRL_PAGE, write_size); + if (close(fd)) { + ereport(PANIC, (errcode_for_file_access(), errmsg("could not close control file: %m"))); + } + } +} + +void SSClearSegCache() +{ + (void)LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE); + HeapMemResetHash(t_thrd.storage_cxt.SegSpcCache, "Shared Seg Spc hash by request"); + LWLockRelease(ShmemIndexLock); +} diff --git a/src/gausskernel/ddes/adapter/ss_switchover.cpp b/src/gausskernel/ddes/adapter/ss_switchover.cpp new file mode 100644 index 000000000..6e712f7f1 --- /dev/null +++ b/src/gausskernel/ddes/adapter/ss_switchover.cpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2021-2022. All rights reserved. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * ss_switchover.cpp + * Shared storage switchover routines. + * + * + * IDENTIFICATION + * src/gausskernel/ddes/adapter/ss_switchover.cpp + * + * --------------------------------------------------------------------------------------- + */ +#include "ddes/dms/ss_switchover.h" + +#include +#include +#include +#include +#include "miscadmin.h" + +#include "port/pg_crc32c.h" +#include "utils/elog.h" +#include "utils/atomic.h" +#include "access/xlog.h" +#include "knl/knl_instance.h" +#include "securec.h" +#include "storage/procarray.h" +#include "replication/replicainternal.h" +#include "storage/smgr/fd.h" +#include "access/csnlog.h" +#include "access/twophase.h" +#include "access/htup.h" +#include "access/multixact.h" +#include "catalog/pg_database.h" +#include "access/xlog_internal.h" + +#include "ddes/dms/ss_dms_callback.h" +#include "ddes/dms/ss_dms_log_output.h" +#include "ddes/dms/ss_dms_bufmgr.h" +#include "ddes/dms/ss_transaction.h" +#include "ddes/dms/ss_reform_common.h" +#include "storage/file/fio_device.h" + +void SSDoSwitchover() +{ + /* SSClusterState and in_reform should be set atomically for role judgement */ + Assert(g_instance.dms_cxt.SSClusterState == NODESTATE_NORMAL); + ereport(LOG, (errmsg("[SS switchover] Starting switchover, " + "current inst:%d will be promoted.", SS_MY_INST_ID))); + (void)dms_switchover((unsigned int)t_thrd.myLogicTid); +} + +void SSNotifySwitchoverPromote() +{ + SendPostmasterSignal(PMSIGNAL_DMS_SWITCHOVER_PROMOTE); +} + +void SSHandleSwitchoverPromote() +{ + ereport(LOG, (errmsg("[SS switchover] Standby promote: begin StartupThread."))); + Assert(g_instance.dms_cxt.SSReformerControl.primaryInstId != SS_MY_INST_ID); + + /* allow recovery in switchover to keep LSN in order */ + t_thrd.shemem_ptr_cxt.XLogCtl->IsRecoveryDone = false; + t_thrd.shemem_ptr_cxt.XLogCtl->SharedRecoveryInProgress = true; + t_thrd.shemem_ptr_cxt.ControlFile->state = DB_IN_CRASH_RECOVERY; + pg_memory_barrier(); + + /* let StartupXLOG do the rest of switchover standby promotion */ + if (pmState == PM_WAIT_BACKENDS) { + g_instance.pid_cxt.StartupPID = initialize_util_thread(STARTUP); + Assert(g_instance.pid_cxt.StartupPID != 0); + pmState = PM_STARTUP; + } + return; +} \ No newline at end of file diff --git a/src/gausskernel/ddes/adapter/ss_transaction.cpp b/src/gausskernel/ddes/adapter/ss_transaction.cpp new file mode 100644 index 000000000..eda70944c --- /dev/null +++ b/src/gausskernel/ddes/adapter/ss_transaction.cpp @@ -0,0 +1,549 @@ +/* + * Copyright (c) 2022 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * ss_transaction.cpp + * ss transaction related + * + * + * IDENTIFICATION + * src/gausskernel/ddes/adapter/ss_transaction.cpp + * + * --------------------------------------------------------------------------------------- + */ +#include "utils/snapshot.h" +#include "utils/postinit.h" +#include "storage/procarray.h" +#include "storage/buf/bufmgr.h" +#include "storage/smgr/segment_internal.h" +#include "ddes/dms/ss_transaction.h" +#include "ddes/dms/ss_dms_bufmgr.h" + +Snapshot SSGetSnapshotData(Snapshot snapshot) +{ + dms_opengauss_txn_snapshot_t dms_snapshot; + dms_context_t dms_ctx; + InitDmsContext(&dms_ctx); + dms_ctx.xmap_ctx.dest_id = (unsigned int)SS_MASTER_ID; + + if (dms_request_opengauss_txn_snapshot(&dms_ctx, &dms_snapshot) != DMS_SUCCESS) { + ereport(ERROR, (errmsg("failed to request snapshot from master through dms"))); + return NULL; + } + + snapshot->xmin = dms_snapshot.xmin; + snapshot->xmax = dms_snapshot.xmax; + snapshot->snapshotcsn = dms_snapshot.snapshotcsn; + if (!TransactionIdIsValid(t_thrd.pgxact->xmin)) { + t_thrd.pgxact->xmin = u_sess->utils_cxt.TransactionXmin = snapshot->xmin; + } + + if (!TransactionIdIsNormal(u_sess->utils_cxt.RecentGlobalXmin)) { + u_sess->utils_cxt.RecentGlobalXmin = FirstNormalTransactionId; + } + u_sess->utils_cxt.RecentGlobalDataXmin = u_sess->utils_cxt.RecentGlobalXmin; + u_sess->utils_cxt.RecentXmin = snapshot->xmin; + return snapshot; +} + +static int SSTransactionIdGetCSN(dms_opengauss_xid_csn_t *dms_txn_info, dms_opengauss_csn_result_t *xid_csn_result) +{ + dms_context_t dms_ctx; + InitDmsContext(&dms_ctx); + dms_ctx.xid_ctx.inst_id = (unsigned char)SS_MASTER_ID; + + return dms_request_opengauss_xid_csn(&dms_ctx, dms_txn_info, xid_csn_result); +} + +/* + * xid -> csnlog status + * is_committed: if true, then no need to fetch xid status from clog + */ +CommitSeqNo SSTransactionIdGetCommitSeqNo(TransactionId transactionId, bool isCommit, bool isMvcc, bool isNest, + Snapshot snapshot, bool* sync) +{ + if ((snapshot == NULL || !IsVersionMVCCSnapshot(snapshot)) && + TransactionIdEquals(transactionId, t_thrd.xact_cxt.cachedFetchCSNXid)) { + t_thrd.xact_cxt.latestFetchCSNXid = t_thrd.xact_cxt.cachedFetchCSNXid; + t_thrd.xact_cxt.latestFetchCSN = t_thrd.xact_cxt.cachedFetchCSN; + return t_thrd.xact_cxt.cachedFetchCSN; + } + if (!TransactionIdIsNormal(transactionId)) { + t_thrd.xact_cxt.latestFetchCSNXid = InvalidTransactionId; + if (TransactionIdEquals(transactionId, BootstrapTransactionId) || + TransactionIdEquals(transactionId, FrozenTransactionId)) { + return COMMITSEQNO_FROZEN; + } + return COMMITSEQNO_ABORTED; + } + + CommitSeqNo csn = 0; // COMMITSEQNO_INPROGRESS by default + CLogXidStatus clogstatus = CLOG_XID_STATUS_IN_PROGRESS; + XLogRecPtr lsn = InvalidXLogRecPtr; + dms_opengauss_csn_result_t xid_csn_result = { 0 }; + dms_opengauss_xid_csn_t dms_txn_info; + dms_txn_info.xid = transactionId; + dms_txn_info.is_committed = (unsigned char)isCommit; + dms_txn_info.is_mvcc = (unsigned char)isMvcc; + dms_txn_info.is_nest = (unsigned char)isNest; + if (snapshot != NULL) { + dms_txn_info.snapshotcsn = snapshot->snapshotcsn; + dms_txn_info.snapshotxmin = snapshot->xmin; + } else { + dms_txn_info.snapshotcsn = InvalidCommitSeqNo; + dms_txn_info.snapshotxmin = InvalidTransactionId; + } + + if (SSTransactionIdGetCSN(&dms_txn_info, &xid_csn_result) == DMS_SUCCESS) { + csn = xid_csn_result.csn; + clogstatus = (int)xid_csn_result.clogstatus; + lsn = xid_csn_result.lsn; + if (sync != NULL && (bool)xid_csn_result.sync) { + *sync = (bool)xid_csn_result.sync; + ereport(DEBUG1, (errmsg("SS primary xid sync success, xid=%lu.", transactionId))); + } + if (snapshot != NULL) { + ereport(DEBUG1, (errmsg("SS get txn info success, xid=%lu, snapshot=%lu-%lu-%lu, csn=%lu.", transactionId, + snapshot->xmin, snapshot->xmax, snapshot->snapshotcsn, csn))); + } else { + ereport(DEBUG1, (errmsg("SS get txn info success, snapshot is NULL"))); + } + } else { + if (snapshot != NULL) { + ereport(ERROR, (errmsg("SS get txn info failed, xid=%lu, snapshot=%lu-%lu-%lu.", transactionId, + snapshot->xmin, snapshot->xmax, snapshot->snapshotcsn))); + } else { + ereport(ERROR, (errmsg("SS get txn info failed, snapshot is NULL"))); + } + } + + if (COMMITSEQNO_IS_COMMITTED(csn) || COMMITSEQNO_IS_ABORTED(csn)) { + t_thrd.xact_cxt.cachedFetchCSNXid = transactionId; + t_thrd.xact_cxt.cachedFetchCSN = csn; + } + + if (clogstatus != CLOG_XID_STATUS_IN_PROGRESS && clogstatus != CLOG_XID_STATUS_SUB_COMMITTED) { + t_thrd.xact_cxt.cachedFetchXid = transactionId; + t_thrd.xact_cxt.cachedFetchXidStatus = clogstatus; + t_thrd.xact_cxt.cachedCommitLSN = lsn; + } + + return csn; +} + +/* + * xid -> clog status + * true if given transaction committed + */ +bool SSTransactionIdDidCommit(TransactionId transactionId) +{ + bool did_commit = false; + bool remote_get = false; + + if (TransactionIdEquals(transactionId, t_thrd.xact_cxt.cachedFetchXid)) { + t_thrd.xact_cxt.latestFetchXid = t_thrd.xact_cxt.cachedFetchXid; + t_thrd.xact_cxt.latestFetchXidStatus = t_thrd.xact_cxt.cachedFetchXidStatus; + if (t_thrd.xact_cxt.cachedFetchXidStatus == CLOG_XID_STATUS_COMMITTED) + did_commit = true; + } + + if (!TransactionIdIsNormal(transactionId)) { + t_thrd.xact_cxt.latestFetchXid = InvalidTransactionId; + if (TransactionIdEquals(transactionId, BootstrapTransactionId)) { + did_commit = true; + } else if (TransactionIdEquals(transactionId, FrozenTransactionId)) { + did_commit = true; + } + } + + if (!did_commit) { + dms_context_t dms_ctx; + InitDmsContext(&dms_ctx); + + dms_ctx.xid_ctx.xid = *(uint64 *)(&transactionId); + dms_ctx.xid_ctx.inst_id = (unsigned char)SS_MASTER_ID; + + if (dms_request_opengauss_txn_status(&dms_ctx, (uint8)XID_COMMITTED, (uint8 *)&did_commit) != DMS_SUCCESS) { + ereport(FATAL, (errmsg("SS get txn did_commit failed, xid=%lu.", transactionId))); + } + remote_get = true; + ereport(DEBUG1, + (errmsg("SS get txn did_commit success, xid=%lu, did_commit=%d.", transactionId, did_commit))); + } + + if (did_commit && remote_get) { + t_thrd.xact_cxt.cachedFetchXid = transactionId; + t_thrd.xact_cxt.cachedFetchXidStatus = CLOG_XID_STATUS_COMMITTED; + t_thrd.xact_cxt.latestFetchXid = transactionId; + t_thrd.xact_cxt.latestFetchXidStatus = CLOG_XID_STATUS_COMMITTED; + } + + return did_commit; +} + +/* xid -> clog status */ +/* true if given transaction in progress */ +bool SSTransactionIdIsInProgress(TransactionId transactionId) +{ + bool in_progress = true; + dms_context_t dms_ctx; + InitDmsContext(&dms_ctx); + + dms_ctx.xid_ctx.xid = *(uint64 *)(&transactionId); + dms_ctx.xid_ctx.inst_id = (unsigned char)SS_MASTER_ID; + + if (dms_request_opengauss_txn_status(&dms_ctx, (uint8)XID_INPROGRESS, (uint8 *)&in_progress) != DMS_SUCCESS) { + ereport(ERROR, (errmsg("SS get txn in_progress failed, xid=%lu.", transactionId))); + } + ereport(DEBUG1, (errmsg("SS get txn in_progress success, xid=%lu, in_progress=%d.", transactionId, in_progress))); + return in_progress; +} + +TransactionId SSMultiXactIdGetUpdateXid(TransactionId xmax, uint16 t_infomask, uint16 t_infomask2) +{ + TransactionId update_xid; + dms_context_t dms_ctx; + InitDmsContext(&dms_ctx); + + dms_ctx.xid_ctx.xid = *(uint64 *)(&xmax); + dms_ctx.xid_ctx.inst_id = (unsigned char)SS_MASTER_ID; + + int ret = dms_request_opengauss_update_xid(&dms_ctx, t_infomask, t_infomask2, (unsigned long long *)&update_xid); + if (ret != DMS_SUCCESS) { + update_xid = InvalidTransactionId; + ereport(WARNING, + (errmsg("SS get update xid failed, multixact xid=%lu.", xmax))); + } + + ereport(DEBUG1, (errmsg("SS get update xid success, multixact xid=%lu, uxid=%lu.", xmax, update_xid))); + return update_xid; +} + +int SSGetOldestXmin(char *data, uint32 len, char *output_msg, uint32 *output_msg_len) +{ + if (unlikely(len != sizeof(SSBroadcastXmin))) { + ereport(DEBUG1, (errmsg("invalid broadcast xmin message"))); + return DMS_ERROR; + } + + SSBroadcastXminAck* getXminReq = (SSBroadcastXminAck *)output_msg; + getXminReq->type = BCAST_GET_XMIN_ACK; + GetOldestGlobalProcXmin(&(getXminReq->xmin)); + *output_msg_len = sizeof(SSBroadcastXminAck); + return DMS_SUCCESS; +} + +/* Calbulate the oldest xmin during broadcast xmin ack */ +int SSGetOldestXminAck(SSBroadcastXminAck *ack_data) +{ + TransactionId xmin_ack = pg_atomic_read_u64(&g_instance.dms_cxt.xminAck); + if (TransactionIdIsValid(ack_data->xmin) && TransactionIdIsNormal(ack_data->xmin) && + TransactionIdPrecedes(ack_data->xmin, xmin_ack)) { + pg_atomic_write_u64(&g_instance.dms_cxt.xminAck, ack_data->xmin); + } + return DMS_SUCCESS; +} + +bool SSGetOldestXminFromAllStandby() +{ + dms_context_t dms_ctx; + InitDmsContext(&dms_ctx); + SSBroadcastXmin xmin_data; + xmin_data.type = BCAST_GET_XMIN; + xmin_data.xmin = InvalidTransactionId; + pg_atomic_write_u64(&g_instance.dms_cxt.xminAck, MaxTransactionId); + int ret = dms_broadcast_msg(&dms_ctx, (char *)&xmin_data, sizeof(SSBroadcastXmin), + (unsigned char)true, SS_BROADCAST_WAIT_FIVE_SECONDS); + if (ret != DMS_SUCCESS) { + return false; + } + return true; +} + +int SSCheckDbBackends(char *data, uint32 len, char *output_msg, uint32 *output_msg_len) +{ + if (unlikely(len != sizeof(SSBroadcastDbBackends))) { + return DMS_ERROR; + } + + SSBroadcastDbBackends *checkDbBackendsMsg = (SSBroadcastDbBackends *)data; + SSBroadcastDbBackendsAck *checkDbBackendsReq = (SSBroadcasDbBackendsAck *)output_msg; + int notherbackends, npreparedxacts; + + (void)CountOtherDBBackends(checkDbBackendsMsg->dbid, ¬herbackends, &npreparedxacts); + + checkDbBackendsReq->type = BCAST_CHECK_DB_BACKENDS_ACK; + checkDbBackendsReq->count = notherbackends + npreparedxacts; + *output_msg_len = sizeof(SSBroadcastDbBackendsAck); + return DMS_SUCCESS; +} + +int SSCheckDbBackendsAck(char *data, unsigned int len) +{ + SSBroadcastDbBackendsAck *ack_data = (SSBroadcasDbBackendsAck *)data; + + if (len != sizeof(SSBroadcastDbBackendsAck)) { + ereport(WARNING, (errmsg("SS get check other db backends failed."))); + return DMS_ERROR; + } + + // Is other backends running in the given DB? + if (ack_data->count != 0) { + return DMS_EXIST_RUNNING_BACKENDS; + } + + return DMS_NO_RUNNING_BACKENDS; +} + +bool SSCheckDbBackendsFromAllStandby(Oid dbid) +{ + dms_context_t dms_ctx; + InitDmsContext(&dms_ctx); + SSBroadcastDbBackends backends_data; + backends_data.type = BCAST_CHECK_DB_BACKENDS; + backends_data.dbid = dbid; + + int ret = dms_broadcast_msg(&dms_ctx, (char *)&backends_data, sizeof(SSBroadcastDbBackends), + (unsigned char)true, SS_BROADCAST_WAIT_FIVE_SECONDS); + if (ret != DMS_NO_RUNNING_BACKENDS) { + return true; + } + return false; +} + +void SSSendSharedInvalidMessages(const SharedInvalidationMessage *msgs, int n) +{ + dms_context_t dms_ctx; + InitDmsContext(&dms_ctx); + for (int i = 0; i < n; i++) { + SharedInvalidationMessage *msg = (SharedInvalidationMessage *)(msgs + i); + SSBroadcastSI ssmsg; + ssmsg.type = BCAST_SI; + if (msg->id >= SHAREDINVALFUNC_ID) { + errno_t rc = + memcpy_s(&(ssmsg.msg), sizeof(SharedInvalidationMessage), msg, sizeof(SharedInvalidationMessage)); + securec_check_c(rc, "", ""); + } else { + ereport(DEBUG1, (errmsg("invalid shared invalidation msg type!"))); + return; + } + int backup_output = t_thrd.postgres_cxt.whereToSendOutput; + t_thrd.postgres_cxt.whereToSendOutput = DestNone; + int ret = dms_broadcast_msg(&dms_ctx, (char *)&ssmsg, sizeof(SSBroadcastSI), (unsigned char)false, + SS_BROADCAST_WAIT_FIVE_SECONDS); + if (ret != DMS_SUCCESS) { + ereport(DEBUG1, (errmsg("SS broadcast SI msg failed!"))); + } + t_thrd.postgres_cxt.whereToSendOutput = backup_output; + } +} + +void SSBCastDropRelAllBuffer(RelFileNode *rnodes, int rnode_len) +{ + dms_context_t dms_ctx; + InitDmsContext(&dms_ctx); + + if (rnode_len <= 0 || rnode_len > DROP_BUFFER_USING_HASH_DEL_REL_NUM_THRESHOLD) { + return; + } + + uint32 bytes = (uint32)(sizeof(RelFileNode) * rnode_len); + SSBroadcastDropRelAllBuffer *msg = (SSBroadcastDropRelAllBuffer *)palloc( + sizeof(SSBroadcastDropRelAllBuffer) + bytes); + msg->type = BCAST_DROP_REL_ALL_BUFFER; + msg->size = rnode_len; + errno_t rc = memcpy_s(msg->rnodes, bytes, rnodes, bytes); + securec_check_c(rc, "", ""); + + int output_backup = t_thrd.postgres_cxt.whereToSendOutput; + t_thrd.postgres_cxt.whereToSendOutput = DestNone; + int ret = dms_broadcast_msg(&dms_ctx, (char *)msg, sizeof(SSBroadcastDropRelAllBuffer) + bytes, + (unsigned char)false, SS_BROADCAST_WAIT_FIVE_SECONDS); + if (ret != DMS_SUCCESS) { + ereport(DEBUG1, (errmsg("SS broadcast drop rel all buffer msg failed, rnode=[%d/%d/%d/%d]", + rnodes->spcNode, rnodes->dbNode, rnodes->relNode, rnodes->bucketNode))); + } + t_thrd.postgres_cxt.whereToSendOutput = output_backup; +} + +void SSBCastDropRelRangeBuffer(RelFileNode node, ForkNumber forkNum, BlockNumber firstDelBlock) +{ + dms_context_t dms_ctx; + InitDmsContext(&dms_ctx); + + SSBroadcastDropRelRangeBuffer *msg = (SSBroadcastDropRelRangeBuffer *)palloc( + sizeof(SSBroadcastDropRelRangeBuffer)); + msg->type = BCAST_DROP_REL_RANGE_BUFFER; + msg->node = node; + msg->forkNum = forkNum; + msg->firstDelBlock = firstDelBlock; + + int output_backup = t_thrd.postgres_cxt.whereToSendOutput; + t_thrd.postgres_cxt.whereToSendOutput = DestNone; + int ret = dms_broadcast_msg(&dms_ctx, (char *)msg, sizeof(SSBroadcastDropRelRangeBuffer), + (unsigned char)false, SS_BROADCAST_WAIT_FIVE_SECONDS); + if (ret != DMS_SUCCESS) { + ereport(DEBUG1, (errmsg("SS broadcast drop rel range buffer msg failed, rnode=[%d/%d/%d/%d]," + "firstDelBlock=%u", node.spcNode, node.dbNode, node.relNode, node.bucketNode, firstDelBlock))); + } + t_thrd.postgres_cxt.whereToSendOutput = output_backup; +} + +void SSBCastDropDBAllBuffer(Oid dbid) +{ + dms_context_t dms_ctx; + InitDmsContext(&dms_ctx); + + SSBroadcastDropDBAllBuffer *msg = (SSBroadcastDropDBAllBuffer *)palloc( + sizeof(SSBroadcastDropDBAllBuffer)); + msg->type = BCAST_DROP_DB_ALL_BUFFER; + msg->dbid = dbid; + + int output_backup = t_thrd.postgres_cxt.whereToSendOutput; + t_thrd.postgres_cxt.whereToSendOutput = DestNone; + int ret = dms_broadcast_msg(&dms_ctx, (char *)msg, sizeof(SSBroadcastDropDBAllBuffer), + (unsigned char)false, SS_BROADCAST_WAIT_FIVE_SECONDS); + if (ret != DMS_SUCCESS) { + ereport(DEBUG1, (errmsg("SS broadcast drop db all buffer msg failed, db=%d", dbid))); + } + t_thrd.postgres_cxt.whereToSendOutput = output_backup; +} + +void SSBCastDropSegSpace(Oid spcNode, Oid dbNode) +{ + dms_context_t dms_ctx; + InitDmsContext(&dms_ctx); + + SSBroadcastDropSegSpace *msg = (SSBroadcastDropSegSpace *)palloc( + sizeof(SSBroadcastDropSegSpace)); + msg->type = BCAST_DROP_SEG_SPACE; + msg->spcNode = spcNode; + msg->dbNode = dbNode; + + int output_backup = t_thrd.postgres_cxt.whereToSendOutput; + t_thrd.postgres_cxt.whereToSendOutput = DestNone; + int ret = dms_broadcast_msg(&dms_ctx, (char *)msg, sizeof(SSBroadcastDropSegSpace), + (unsigned char)false, SS_BROADCAST_WAIT_FIVE_SECONDS); + if (ret != DMS_SUCCESS) { + ereport(DEBUG1, (errmsg("SS broadcast drop seg space msg failed, spc=%d, db=%d", spcNode, dbNode))); + } + t_thrd.postgres_cxt.whereToSendOutput = output_backup; +} + +int SSProcessSharedInvalMsg(char *data, uint32 len) +{ + if (unlikely(len != sizeof(SSBroadcastSI))) { + ereport(DEBUG1, (errmsg("invalid broadcast SI message"))); + return DMS_ERROR; + } + + SSBroadcastSI* ssmsg = (SSBroadcastSI *)data; + /* process msg one by one */ + SendSharedInvalidMessages(&(ssmsg->msg), 1); + return DMS_SUCCESS; +} + +void SSUpdateSegDropTimeline(uint32 seg_drop_timeline) +{ + dms_context_t dms_ctx; + InitDmsContext(&dms_ctx); + SSBroadcastSegDropTL ssmsg; + ssmsg.type = BCAST_SEGDROPTL; + ssmsg.seg_drop_timeline = seg_drop_timeline; + int output_backup = t_thrd.postgres_cxt.whereToSendOutput; + t_thrd.postgres_cxt.whereToSendOutput = DestNone; + int ret = dms_broadcast_msg(&dms_ctx, (char *)&ssmsg, sizeof(SSBroadcastSegDropTL), (unsigned char)false, + SS_BROADCAST_WAIT_FIVE_SECONDS); + if (ret != DMS_SUCCESS) { + ereport(DEBUG1, (errmsg("SS broadcast seg_drop_timeline failed!"))); + } + t_thrd.postgres_cxt.whereToSendOutput = output_backup; +} + +int SSProcessSegDropTimeline(char *data, uint32 len) +{ + if (unlikely(len != sizeof(SSBroadcastSegDropTL))) { + ereport(DEBUG1, (errmsg("invalid broadcast seg drop tl message"))); + return DMS_ERROR; + } + + SSBroadcastSegDropTL* ssmsg = (SSBroadcastSegDropTL *)data; + pg_atomic_write_u32(&g_instance.segment_cxt.segment_drop_timeline, ssmsg->seg_drop_timeline); + return DMS_SUCCESS; +} + +int SSProcessDropRelAllBuffer(char *data, uint32 len) +{ + if (unlikely(len < sizeof(SSBroadcastDropRelAllBuffer))) { + ereport(DEBUG1, (errmodule(MOD_DMS), errmsg("invalid drop rel buffer message"))); + return DMS_ERROR; + } + + SSBroadcastDropRelAllBuffer *msg = (SSBroadcastDropRelAllBuffer *)data; + int rnode_len = msg->size; + RelFileNode *rnodes = msg->rnodes; + + if (unlikely(rnode_len <= 0)) { + return DMS_SUCCESS; + } + + if (unlikely(rnode_len > DROP_BUFFER_USING_HASH_DEL_REL_NUM_THRESHOLD)) { + ereport(DEBUG1, (errmodule(MOD_DMS), errmsg("invalid buffer message is invalidate"))); + return DMS_ERROR; + } + + if (unlikely(len != (sizeof(SSBroadcastDropRelAllBuffer) + rnode_len * sizeof(RelFileNode)))) { + ereport(DEBUG1, (errmodule(MOD_DMS), errmsg("invalid drop rel buffer message"))); + return DMS_ERROR; + } + + DropRelFileNodeAllBuffersUsingScan(rnodes, rnode_len); + return DMS_SUCCESS; +} + +int SSProcessDropRelRangeBuffer(char *data, uint32 len) +{ + if (unlikely(len != sizeof(SSBroadcastDropRelRangeBuffer))) { + ereport(DEBUG1, (errmodule(MOD_DMS), errmsg("invalid drop rel range buffer message"))); + return DMS_ERROR; + } + SSBroadcastDropRelRangeBuffer *msg = (SSBroadcastDropRelRangeBuffer *)data; + DropRelFileNodeShareBuffers(msg->node, msg->forkNum, msg->firstDelBlock); + return DMS_SUCCESS; +} + +int SSProcessDropDBAllBuffer(char *data, uint32 len) +{ + if (unlikely(len != sizeof(SSBroadcastDropDBAllBuffer))) { + ereport(DEBUG1, (errmodule(MOD_DMS), errmsg("invalid drop db all buffer message"))); + return DMS_ERROR; + } + + SSBroadcastDropDBAllBuffer *msg = (SSBroadcastDropDBAllBuffer *)data; + DropDatabaseBuffers(msg->dbid); + return DMS_SUCCESS; +} + +int SSProcessDropSegSpace(char *data, uint32 len) +{ + if (unlikely(len != sizeof(SSBroadcastDropSegSpace))) { + ereport(DEBUG1, (errmodule(MOD_DMS), errmsg("invalid drop seg space message"))); + return DMS_ERROR; + } + + SSBroadcastDropSegSpace *msg = (SSBroadcastDropSegSpace *)data; + SSDrop_seg_space(msg->spcNode, msg->dbNode); + return DMS_SUCCESS; +} + diff --git a/src/gausskernel/ddes/ddes_commit_id b/src/gausskernel/ddes/ddes_commit_id new file mode 100644 index 000000000..39539cdaf --- /dev/null +++ b/src/gausskernel/ddes/ddes_commit_id @@ -0,0 +1,2 @@ +dms_commit_id=c37ea91a52ebdaa0fb6cae2905525030efa9d831 +dss_commit_id=534ef46d718b4c61be50f24b7984f5ec9d0fcc8a \ No newline at end of file diff --git a/src/gausskernel/ddes/script/dms_contrl.sh b/src/gausskernel/ddes/script/dms_contrl.sh new file mode 100644 index 000000000..8028d4a0e --- /dev/null +++ b/src/gausskernel/ddes/script/dms_contrl.sh @@ -0,0 +1,252 @@ +#!/bin/bash +export PATH=${GAUSSHOME}/bin:$PATH +export LD_LIBRARY_PATH=${GAUSSHOME}/lib:${GAUSSHOME}/add-ons:$LD_LIBRARY_PATH + +curr_path=`dirname $(readlink -f $0)` +curr_filename=`basename $(readlink -f $0)` +os_user=`whoami` +file_user=`ls -l ${curr_path}"/${curr_filename}" | awk '{print $3}'` + +if [ ${file_user} != ${os_user} ]; then + echo "Can't run ${curr_filename}, because it does not belong to the current user!" + exit 1 +fi + +GSDB_BIN=${GAUSSHOME}/bin/gaussdb +BIN_PATH=${GAUSSHOME}/bin +SCRIPT_NAME=$0 + +usage() +{ + echo "Usage: $0 [cmd] [gaussdb_id] [GSDB_HOME] [DSS_HOME]" + echo "cmd:" + echo " -start: start ${GSDB_BIN}&delete dss_stop_flag_file" + echo " -stop: stop ${GSDB_BIN}" + echo " -check: check ${GSDB_BIN}" + echo " -clean: clean ${GSDB_BIN}" + echo " -reg: register gaussdb" + echo " -unreg: unregister gaussdb" + echo " -isreg: check whether gaussdb is registered" + echo "gaussdb_id:" + echo " gaussdb id" + echo "GSDB_HOME:" + echo " ${GSDB_BIN} data path" + echo "DSS_HOME:" + echo " dssserver data path" +} + +if [ $# -lt 3 ] +then + echo "parameter numbers not meet, num=$#." + usage + exit 1 +fi + +log() +{ + time=`date "+%Y-%m-%d %H:%M:%S"` + echo "$time $1" +} + +assert_empty() +{ + return +} + +assert_nonempty() +{ + if [[ -z ${2} ]] + then + log "The ${1} parameter is empty." + exit 1 + fi +} + +program_pid() +{ + pid=`ps -f f -u \`whoami\` | grep ${1} | grep ${2} | grep -v grep | grep -v ${SCRIPT_NAME} | awk '{print $2}'` + echo ${pid} +} + +kill_program() +{ + assert_nonempty 1 ${1} + assert_nonempty 2 ${2} + pid=`program_pid $1 $2` + if [[ -z ${pid} ]] + then + log "${1} is already dead." + return + fi + + kill -9 ${pid} + sleep 3 + ps -f -p "${pid}" | grep ${1} + if [ $? = 0 ] + then + log "ERROR! ${1} with pid:${pid} is not killed..." + exit 0 + fi +} + +function clear_script_log +{ + local _log_dir=$1 + local _log_name=$2 + local _max_log_backup=$3 + + if [ -L ${_log_dir} ]; then + typeset log_num=`find -L "${_log_dir}" -maxdepth 1 -type f -name "${_log_name}*" | wc -l` + if [ ${log_num} -ge ${_max_log_backup} ];then + find -L "${_log_dir}" -maxdepth 1 -type f -name "${_log_name}*" | xargs ls -t {} 2>/dev/null | tail -n $(expr ${log_num} - ${_max_log_backup}) | xargs -i rm -f {} + fi + else + typeset log_num=$(find "${_log_dir}" -maxdepth 1 -type f -name "${_log_name}*" | wc -l) + if [ ${log_num} -ge ${_max_log_backup} ];then + find "${_log_dir}" -maxdepth 1 -type f -name "${_log_name}*" | xargs ls -t {} 2>/dev/null | tail -n $(expr ${log_num} - ${_max_log_backup}) | xargs -i rm -f {} + fi + fi +} + +check_log_file() +{ + log_path=$1 + log_file=$2 + operation=$3 + # max log file size 16 * 1024 * 1024 + MAX_LOG_SIZE=16777216 + MAX_LOG_BACKUP=10 + log_file_size=$(ls -l ${log_file} |awk '{print $5}') + if [ -f ${log_file} ];then + if [ ${log_file_size} -ge ${MAX_LOG_SIZE} ];then + mv -f ${log_file} "${log_path}/${operation}-`date +%Y-%m-%d_%H%M%S`.log" 2>/dev/null + clear_script_log "${log_path}" "${operation}-" $MAX_LOG_BACKUP + fi + fi +} + +touch_logfile() +{ + log_file=$1 + if [ ! -f $log_file ] + then + touch $log_file + fi +} + +assert_nonempty 1 ${1} +assert_nonempty 2 ${2} +assert_nonempty 3 ${3} + +CMD=${1} +INSTANCE_ID=${2} +GSDB_HOME=${3} +TMP_DSS_HOME=${4} +if [[ ! -z "${TMP_DSS_HOME}" ]] +then + export DSS_HOME=${4} +fi + +# 1st step: if dss_flag_file exists, delete it +# 2nd step: if dssserver exists, start database +# 3nd step: if dssserver no exists, exit +function Start() +{ + db_start_log=${GSDB_HOME}/DBstart.log + check_log_file ${GSDB_HOME} $db_start_log DBstart + + if [[ -z "${GSDB_HOME}" ]] + then + db_start_log=/dev/null + else + touch_logfile $db_start_log + chmod 600 $db_start_log + fi + + dss_flag_file=instance_manual_start_$(expr $INSTANCE_ID + 20001 - 6001) + if [[ -f $GAUSSHOME/bin/$dss_flag_file ]]; + then + rm $GAUSSHOME/bin/$dss_flag_file + fi + + pid=`program_pid dssserver ${DSS_HOME}` + if [[ -z ${pid} ]] + then + log "dssserver not exist in dir ${DSS_HOME}..." + exit 1 + else + log "Starting dn..." + nohup ${GSDB_BIN} -D ${GSDB_HOME} >> $db_start_log 2>&1 & + sleep 3 + log "start dn in ${DSS_HOME} success." + fi +} + +# 1st step: kill database +function Stop() +{ + log "stop ${GSDB_BIN}..." + ${BIN_PATH}/gs_ctl stop -D ${GSDB_HOME} + sleep 5 + + pid=`program_pid ${GSDB_BIN} ${GSDB_HOME}` + if [[ -z ${pid} ]] + then + log "${GSDB_BIN} stopped in dir ${GSDB_HOME}..." + else + log "Killing ${GSDB_BIN} if running..." + kill_program ${GSDB_BIN} ${GSDB_HOME} + fi +} + +# 1st step: check database if exists +function Check() +{ + pid=$(program_pid ${GSDB_BIN} ${GSDB_HOME}) + if [[ -z ${pid} ]] + then + log "check ${GSDB_BIN} in ${GSDB_HOME} fail." + exit 1 + fi + + log "check gaussdb in ${GSDB_HOME} success." +} + +# 1st step: kill database +function Clean() +{ + log "stop ${GSDB_BIN}..." + kill_program ${GSDB_BIN} ${GSDB_HOME} + sleep 3 +} + +function Main() +{ + if [ "$CMD" == "-start" ]; then + Start + exit 0 + elif [ "$CMD" == "-stop" ]; then + Stop + exit 0 + elif [ "$CMD" == "-check" ]; then + Check + exit 0 + elif [ "$CMD" == "-clean" ]; then + Clean + exit 0 + elif [ "$CMD" == "-reg" ]; then + # IO FENCE + exit 0 + elif [ "$CMD" == "-unreg" ]; then + # IO FENCE + exit 0 + elif [ "$CMD" == "-isreg" ]; then + # IO FENCE + exit 11 + else + echo "Please confirm the input parameters." + exit 1 + fi +} + +Main \ No newline at end of file diff --git a/src/gausskernel/ddes/script/dss_contrl.sh b/src/gausskernel/ddes/script/dss_contrl.sh new file mode 100644 index 000000000..597873909 --- /dev/null +++ b/src/gausskernel/ddes/script/dss_contrl.sh @@ -0,0 +1,423 @@ +#!/bin/bash +export PATH=${GAUSSHOME}/bin:$PATH +export LD_LIBRARY_PATH=${GAUSSHOME}/lib:${GAUSSHOME}/add-ons:$LD_LIBRARY_PATH + +curr_path=`dirname $(readlink -f $0)` +curr_filename=`basename $(readlink -f $0)` +os_user=`whoami` +file_user=`ls -l ${curr_path}"/${curr_filename}" | awk '{print $3}'` + +if [ ${file_user} != ${os_user} ]; then + echo "Can't run ${curr_filename}, because it does not belong to the current user!" + exit 1 +fi + +GSDB_BIN=${GAUSSHOME}/bin/gaussdb +BIN_PATH=${GAUSSHOME}/bin +SCRIPT_NAME=$0 + +usage() +{ + echo "Usage: $0 [cmd] [dssserver_id] [DSS_HOME] [GSDB_HOME]" + echo "cmd:" + echo " -start: start dssserver" + echo " -stop: stop dssserver&create dn_stop_flag_file" + echo " -check: check dssserver" + echo " -clean: clean dssserver&${GSDB_BIN}" + echo " -reg: register dssserver" + echo " -unreg: unregister dssserver" + echo " -isreg: check whether dssserver is registered" + echo "dssserver_id:" + echo " dssserver id" + echo "DSS_HOME:" + echo " dssserver data path" + echo "GSDB_HOME:" + echo " ${GSDB_BIN} data path" +} + +if [ $# -lt 4 ] +then + echo "parameter numbers not meet, num=$#." + usage + exit 1 +fi + +log() +{ + time=`date "+%Y-%m-%d %H:%M:%S"` + echo "$time $1" +} + +assert_empty() +{ + return +} + +assert_nonempty() +{ + if [[ -z ${2} ]] + then + log "The ${1} parameter is empty." + exit 1 + fi +} + +program_pid() +{ + pid=`ps -f f -u \`whoami\` | grep ${1} | grep ${2} | grep -v grep | grep -v ${SCRIPT_NAME} | awk '{print $2}'` + echo ${pid} +} + +kill_program() +{ + assert_nonempty 1 ${1} + assert_nonempty 2 ${2} + pid=`program_pid $1 $2` + if [[ -z ${pid} ]] + then + log "${1} is already dead." + return + fi + + kill -9 ${pid} + sleep 3 + ps -f -p "${pid}" | grep ${1} + if [ $? = 0 ] + then + log "ERROR! ${1} with pid:${pid} is not killed..." + exit 0 + fi +} + +check_dss_start() +{ + started=0 + for (( i=1; i<30; i++ )) + do + pid=`program_pid dssserver ${1}` + if [[ ! -z ${pid} ]] + then + started=1 + break + fi + sleep 1 + done + + if [[ ${started} -eq 0 ]] + then + log "ERROR! start dssserver in dir ${1} failed" + exit 1 + fi +} + +function clear_script_log +{ + local _log_dir=$1 + local _log_name=$2 + local _max_log_backup=$3 + + if [ -L ${_log_dir} ]; then + typeset log_num=`find -L "${_log_dir}" -maxdepth 1 -type f -name "${_log_name}*" | wc -l` + if [ ${log_num} -ge ${_max_log_backup} ];then + find -L "${_log_dir}" -maxdepth 1 -type f -name "${_log_name}*" | xargs ls -t {} 2>/dev/null | tail -n $(expr ${log_num} - ${_max_log_backup}) | xargs -i rm -f {} + fi + else + typeset log_num=$(find "${_log_dir}" -maxdepth 1 -type f -name "${_log_name}*" | wc -l) + if [ ${log_num} -ge ${_max_log_backup} ];then + find "${_log_dir}" -maxdepth 1 -type f -name "${_log_name}*" | xargs ls -t {} 2>/dev/null | tail -n $(expr ${log_num} - ${_max_log_backup}) | xargs -i rm -f {} + fi + fi +} + +check_log_file() +{ + log_path=$1 + log_file=$2 + operation=$3 + # max log file size 16 * 1024 * 1024 + MAX_LOG_SIZE=16777216 + MAX_LOG_BACKUP=10 + log_file_size=$(ls -l ${log_file} |awk '{print $5}') + if [ -f ${log_file} ];then + if [ ${log_file_size} -ge ${MAX_LOG_SIZE} ];then + mv -f ${log_file} "${log_path}/${operation}-`date +%Y-%m-%d_%H%M%S`.log" 2>/dev/null + clear_script_log "${log_path}" "${operation}-" $MAX_LOG_BACKUP + fi + fi +} + +touch_logfile() +{ + log_file=$1 + if [ ! -f $log_file ] + then + touch $log_file + fi +} + +assert_nonempty 1 ${1} +assert_nonempty 2 ${2} +assert_nonempty 3 ${3} +assert_nonempty 4 ${4} + +CMD=${1} +INSTANCE_ID=${2} +export DSS_HOME=${3} +GSDB_HOME=${4} +CONN_PATH=UDS:${DSS_HOME}/.dss_unix_d_socket + +function check_dss_config() +{ + log "Checking dss_inst.ini before start dss..." + if [[ ! -e ${DSS_HOME}/cfg/dss_inst.ini ]] + then + log "${DSS_HOME}/cfg/dss_inst.ini must exist" + exit 1 + fi + + log "Checking dss_vg_conf.ini before start dss..." + if [[ ! -e ${DSS_HOME}/cfg/dss_vg_conf.ini ]] + then + log "${DSS_HOME}/cfg/dss_vg_conf.ini must exist" + exit 1 + fi + + LSNR_PATH=`awk '/LSNR_PATH/{print}' ${DSS_HOME}/cfg/dss_inst.ini | awk -F= '{print $2}' | xargs` + if [[ -z ${LSNR_PATH} ]] + then + log "can't find lsnr path. Aborting." + exit 1 + fi + CONN_PATH=UDS:${LSNR_PATH}/.dss_unix_d_socket +} + +# 1st step: if database exists, kill it +# 2nd step: if dssserver no exists, start it +function Start() +{ + check_dss_config + + startdss_log=${DSS_HOME}/startdss.log + db_start_log=${GSDB_HOME}/DBstart.log + check_log_file ${DSS_HOME} $startdss_log startdss + check_log_file ${GSDB_HOME} $db_start_log DBstart + if [[ -z "${DSS_HOME}" ]] + then + startdss_log=/dev/null + else + touch_logfile $startdss_log + chmod 600 $startdss_log + fi + + if [[ -z "${GSDB_HOME}" ]] + then + db_start_log=/dev/null + else + touch_logfile $db_start_log + chmod 600 $db_start_log + fi + + pid=`program_pid dssserver ${DSS_HOME}` + if [[ ! -z ${pid} ]] + then + log "dssserver already started in dir ${DSS_HOME}..." + else + log "Starting dssserver..." + pid=`program_pid ${GSDB_BIN} ${GSDB_HOME}` + if [[ ! -z ${pid} ]] + then + kill_program ${GSDB_BIN} ${GSDB_HOME} + else + log "${GSDB_BIN} is offline in dir ${GSDB_HOME}..." + fi + nohup dssserver -D ${DSS_HOME} >> ${startdss_log} 2>&1 & + check_dss_start ${DSS_HOME} + log "start dss in ${DSS_HOME} success." + fi +} + +# 1st Whether there is a dn stop tag file +# 2st stop tag file need to be created when there is no dn stop tag file +# 3st step: kill database +# 4nd step: stop dssserver by using dsscmd +# 5rd step: if fail to stop dssserver in 2nd step, then kill dssserver +function Stop() +{ + log "stop ${GSDB_BIN}..." + db_flag_file=instance_manual_start_$(expr $INSTANCE_ID + 6001) + echo "db_flag_file=$db_flag_file" + + if [[ -f $GAUSSHOME/bin/$db_flag_file ]]; + then + log "$GAUSSHOME/bin/$db_flag_file is exist" + else + touch $GAUSSHOME/bin/$db_flag_file + fi + + pid=$(program_pid ${GSDB_BIN} ${GSDB_HOME}) + if [[ -z ${pid} ]] + then + log "stop dssserver if running..." + nohup dsscmd stopdss -U ${CONN_PATH} >> /dev/null 2>&1 + sleep 2 + + pid=`program_pid dssserver ${DSS_HOME}` + if [[ -z ${pid} ]] + then + log "dssserver stopped in dir ${DSS_HOME}..." + exit 0 + fi + log "Killing dssserver if running..." + kill_program dssserver ${DSS_HOME} + else + log "stop ${GSDB_BIN}..." + ${BIN_PATH}/gs_ctl stop -D ${GSDB_HOME} + sleep 5 + + pid=`program_pid ${GSDB_BIN} ${GSDB_HOME}` + if [[ -z ${pid} ]] + then + log "${GSDB_BIN} stopped in dir ${GSDB_HOME}..." + else + log "Killing ${GSDB_BIN} if running..." + kill_program ${GSDB_BIN} ${GSDB_HOME} + fi + + log "stop dssserver if running..." + nohup dsscmd stopdss -U ${CONN_PATH} >> /dev/null 2>&1 + sleep 2 + pid=`program_pid dssserver ${DSS_HOME}` + if [[ -z ${pid} ]] + then + log "dssserver stopped in dir ${DSS_HOME}..." + exit 0 + fi + log "Killing dssserver if running..." + kill_program dssserver ${DSS_HOME} + fi +} + +# 1st step: check dssserver if exists +function Check() +{ + pid=$(program_pid dssserver ${DSS_HOME}) + if [[ -z ${pid} ]] + then + log "check dssserver in ${DSS_HOME} fail." + exit 1 + fi + + log "check dss in ${DSS_HOME} success." +} + +# 1st step: kill database +# 2nd step: stop dssserver by using dsscmd +# 3rd step: if fail to stop dssserver in 2nd step, then kill dssserver +function Clean() +{ + log "stop ${GSDB_BIN}..." + kill_program ${GSDB_BIN} ${GSDB_HOME} + sleep 3 + + log "stop dssserver if running..." + nohup dsscmd stopdss -U ${CONN_PATH} >> /dev/null 2>&1 + sleep 2 + + pid=`program_pid dssserver ${DSS_HOME}` + if [[ -z ${pid} ]] + then + log "dssserver stopped in dir ${DSS_HOME}..." + exit 0 + fi + log "Killing dssserver if running..." + kill_program dssserver ${DSS_HOME} +} + +function Reg() +{ + LOCAL_INSTANCE_ID=`awk '/INST_ID/{print}' ${DSS_HOME}/cfg/dss_inst.ini | awk -F= '{print $2}' | xargs` + if [[ -z ${LOCAL_INSTANCE_ID} ]] + then + log "can't find inst id. Aborting." + exit 1 + fi + dsscmd reghl -i ${LOCAL_INSTANCE_ID} -D ${DSS_HOME} >> /dev/null 2>&1 + if [[ $? != 0 ]] + then + log "dsscmd reghl -i ${LOCAL_INSTANCE_ID} -D ${DSS_HOME} fail." + exit 1 + fi + log "register success." +} + +function Unreg() +{ + LOCAL_INSTANCE_ID=`awk '/INST_ID/{print}' ${DSS_HOME}/cfg/dss_inst.ini | awk -F= '{print $2}' | xargs` + if [[ -z ${LOCAL_INSTANCE_ID} ]] + then + log "can't find inst id. Aborting." + exit 1 + fi + if [[ ${LOCAL_INSTANCE_ID} == ${INSTANCE_ID} ]] + then + dsscmd unreghl -i ${LOCAL_INSTANCE_ID} -D ${DSS_HOME} >> /dev/null 2>&1 + else + pid=$(program_pid dssserver ${DSS_HOME}) + if [[ -z ${pid} ]] + then + log "dssserver is not running." + exit 1 + fi + dsscmd kickh -i ${INSTANCE_ID} -U ${CONN_PATH} >> /dev/null 2>&1 + fi + + if [[ $? != 0 ]] + then + log "dsscmd kickh -i ${INSTANCE_ID} -U ${CONN_PATH} fail, or dsscmd unreghl -i ${LOCAL_INSTANCE_ID} -D ${DSS_HOME} fail." + exit 1 + fi + log "unregister ${INSTANCE_ID} success." +} + +function Isreg() +{ + dsscmd inq_reg -i ${INSTANCE_ID} -D ${DSS_HOME} >> /dev/null 2>&1 + result=$? + if [[ ${result} == 255 ]] + then + log "dsscmd inq_reg -i ${INSTANCE_ID} -D ${DSS_HOME} fail." + exit -1 + fi + exit ${result} +} + +function Main() +{ + if [ "$CMD" == "-start" ]; then + Start + exit 0 + elif [ "$CMD" == "-stop" ]; then + Stop + exit 0 + elif [ "$CMD" == "-check" ]; then + Check + exit 0 + elif [ "$CMD" == "-clean" ]; then + Clean + exit 0 + elif [ "$CMD" == "-reg" ]; then + Reg + exit 0 + elif [ "$CMD" == "-unreg" ]; then + Unreg + exit 0 + elif [ "$CMD" == "-isreg" ]; then + Isreg + exit 0 + else + echo "Please confirm the input parameters." + exit 1 + fi +} + +Main \ No newline at end of file diff --git a/src/gausskernel/optimizer/commands/async.cpp b/src/gausskernel/optimizer/commands/async.cpp index ee5070a23..6998da1c4 100644 --- a/src/gausskernel/optimizer/commands/async.cpp +++ b/src/gausskernel/optimizer/commands/async.cpp @@ -169,6 +169,8 @@ typedef struct AsyncQueueEntry { #define AsyncQueueEntryEmptySize (offsetof(AsyncQueueEntry, data) + 2) +#define NOTIFYDIR (g_instance.datadir_cxt.notifyDir) + /* * Struct describing a queue position, and assorted macros for working with it */ @@ -389,7 +391,8 @@ void AsyncShmemInit(void) * Set up SLRU management of the pg_notify data. */ SimpleLruInit(AsyncCtl, GetBuiltInTrancheName(LWTRANCHE_ASYNC_CTL), LWTRANCHE_ASYNC_CTL, NUM_ASYNC_BUFFERS, 0, - AsyncCtlLock, "pg_notify"); + AsyncCtlLock, NOTIFYDIR); + /* Override default assumption that writes should be fsync'd */ AsyncCtl->do_fsync = false; diff --git a/src/gausskernel/optimizer/commands/cluster.cpp b/src/gausskernel/optimizer/commands/cluster.cpp index d6c9a6f21..5eaad2488 100755 --- a/src/gausskernel/optimizer/commands/cluster.cpp +++ b/src/gausskernel/optimizer/commands/cluster.cpp @@ -5514,8 +5514,10 @@ static void HbktModifyPartIndexRelnode(Relation indexRel, Partition indexPart, D newrelfilenode = GetNewRelFileNode(indexPart->pd_part->reltablespace, NULL, indexRel->rd_rel->relpersistence); bucketNode = InvalidBktId; } else { + Oid database_id = (ConvertToRelfilenodeTblspcOid(indexPart->pd_part->reltablespace) == GLOBALTABLESPACE_OID) ? + InvalidOid : u_sess->proc_cxt.MyDatabaseId; newrelfilenode = seg_alloc_segment(ConvertToRelfilenodeTblspcOid(indexPart->pd_part->reltablespace), - u_sess->proc_cxt.MyDatabaseId, isBucket, InvalidBlockNumber); + database_id, isBucket, InvalidBlockNumber); bucketNode = SegmentBktId; } diff --git a/src/gausskernel/optimizer/commands/dbcommands.cpp b/src/gausskernel/optimizer/commands/dbcommands.cpp index c5cc2ea35..4d92e76cc 100644 --- a/src/gausskernel/optimizer/commands/dbcommands.cpp +++ b/src/gausskernel/optimizer/commands/dbcommands.cpp @@ -65,6 +65,7 @@ #include "storage/ipc.h" #include "storage/procarray.h" #include "storage/smgr/smgr.h" +#include "storage/file/fio_device.h" #include "utils/acl.h" #include "utils/builtins.h" #include "utils/fmgroids.h" @@ -2236,6 +2237,9 @@ int errdetail_busy_db(int notherbackends, int npreparedxacts) "There are %d other sessions using the database.", notherbackends, notherbackends); + else if (ENABLE_DMS && notherbackends == 0 && npreparedxacts == 0) + errdetail("[SS] There is other session(s) or prepared transaction(s) using the database " + "in other db node(s). Or some other node(s) is not connected in the cluster by dms."); else errdetail_plural("There is %d prepared transaction using the database.", "There are %d prepared transactions using the database.", diff --git a/src/gausskernel/optimizer/commands/functioncmds.cpp b/src/gausskernel/optimizer/commands/functioncmds.cpp index 48f773de6..e7bf9df1e 100644 --- a/src/gausskernel/optimizer/commands/functioncmds.cpp +++ b/src/gausskernel/optimizer/commands/functioncmds.cpp @@ -1092,6 +1092,7 @@ void CreateFunction(CreateFunctionStmt* stmt, const char* queryString, Oid pkg_o if (strcasecmp(get_language_name(languageOid), "plpgsql") != 0) { u_sess->plsql_cxt.isCreateFunction = false; } + #ifdef ENABLE_MULTIPLE_NODES if (languageOid == JavalanguageId) { /* @@ -3037,4 +3038,4 @@ static void checkAllowAlter(HeapTuple tup) { errcause("package is one object,not allow alter function in package"), erraction("rebuild package"))); } -} \ No newline at end of file +} diff --git a/src/gausskernel/optimizer/commands/matview.cpp b/src/gausskernel/optimizer/commands/matview.cpp index 80b7c7802..551138cf7 100755 --- a/src/gausskernel/optimizer/commands/matview.cpp +++ b/src/gausskernel/optimizer/commands/matview.cpp @@ -313,21 +313,34 @@ int64 MlogGetMaxSeqno(Oid mlogid) */ void SetRelationIsScannable(Relation relation) { - Page page; + Page page; + char* unaligned_buffer = NULL; - Assert(relation->rd_rel->relkind == RELKIND_MATVIEW); - Assert(relation->rd_isscannable == false); + Assert(relation->rd_rel->relkind == RELKIND_MATVIEW); + Assert(relation->rd_isscannable == false); - RelationOpenSmgr(relation); - page = (Page)palloc(BLCKSZ); - PageInit(page, BLCKSZ, 0, true); - PageSetChecksumInplace(page, 0); - smgrextend(relation->rd_smgr, MAIN_FORKNUM, 0, (char *) page, true); - pfree(page); + RelationOpenSmgr(relation); - smgrimmedsync(relation->rd_smgr, MAIN_FORKNUM); + if (ENABLE_DSS) { + unaligned_buffer = (char*)palloc(BLCKSZ + ALIGNOF_BUFFER); + page = (Page)BUFFERALIGN(unaligned_buffer); + } else { + page = (Page)palloc(BLCKSZ); + } - RelationCacheInvalidateEntry(relation->rd_id); + PageInit(page, BLCKSZ, 0, true); + PageSetChecksumInplace(page, 0); + smgrextend(relation->rd_smgr, MAIN_FORKNUM, 0, (char *) page, true); + + if (ENABLE_DSS) { + pfree(unaligned_buffer); + } else { + pfree(page); + } + + smgrimmedsync(relation->rd_smgr, MAIN_FORKNUM); + + RelationCacheInvalidateEntry(relation->rd_id); } static Index get_index_ref(QueryDesc* queryDesc, Oid relid) diff --git a/src/gausskernel/optimizer/commands/tablecmds.cpp b/src/gausskernel/optimizer/commands/tablecmds.cpp index 9f3b0d2a3..f3dc39cd0 100644 --- a/src/gausskernel/optimizer/commands/tablecmds.cpp +++ b/src/gausskernel/optimizer/commands/tablecmds.cpp @@ -2221,6 +2221,10 @@ Oid DefineRelation(CreateStmt* stmt, char relkind, Oid ownerId, bool isCTAS) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("hasuids is not supported in current version!"))); } + if (ENABLE_DMS && relhasuids) { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("hasuids is not supported under Shared Storage."))); + } if (std_opt != NULL) { RowTblCheckHashBucketOption(stmt->options, std_opt); if ((std_opt->segment)) { @@ -2736,7 +2740,24 @@ Oid DefineRelation(CreateStmt* stmt, char relkind, Oid ownerId, bool isCTAS) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("The table %s do not support segment storage", stmt->relation->relname))); } - + + if (ENABLE_DMS) { + if ((relkind == RELKIND_RELATION && storage_type != SEGMENT_PAGE) || + relkind == RELKIND_MATVIEW || + pg_strcasecmp(storeChar, ORIENTATION_ROW) != 0 || + relkind == RELKIND_FOREIGN_TABLE || + stmt->relation->relpersistence == RELPERSISTENCE_UNLOGGED || + stmt->relation->relpersistence == RELPERSISTENCE_TEMP || + stmt->relation->relpersistence == RELPERSISTENCE_GLOBAL_TEMP || + pg_strcasecmp(COMPRESSION_NO, StdRdOptionsGetStringData(std_opt, compression, COMPRESSION_NO)) != 0 || + IsCompressedByCmprsInPgclass((RelCompressType)stmt->row_compress)) { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Only support segment storage type and ASTORE while DMS and DSS enabled.\n" + "Foreign table, matview, temp table or unlogged table is not supported.\nCompression is not " + "supported."))); + } + } + /* * Create the relation. Inherited defaults and constraints are passed in * for immediate handling --- since they don't need parsing, they can be @@ -15314,6 +15335,10 @@ static void ATExecSetRelOptions(Relation rel, List* defList, AlterTableType oper ereport(ERROR, (errcode(ERRCODE_INVALID_OPERATION), errmsg("table with oids cannot add or modify hasuids by ALTER TABLE command."))); } + if (ENABLE_DMS && newRelHasUids) { + ereport(ERROR, (errcode(ERRCODE_INVALID_OPERATION), + errmsg("table under Shared Storage cannot add or modify hasuids by ALTER TABLE command."))); + } if (RelationIsColStore(rel)) { /* un-supported options. dont care its values */ ForbidToSetOptionsForColTbl(defList); @@ -15613,8 +15638,10 @@ static void ATExecSetTableSpaceForPartitionP3(Oid tableOid, Oid partOid, Oid new } else { newcbi = RelationIsCrossBucketIndex(rel); isbucket = BUCKET_OID_IS_VALID(rel->rd_bucketoid) && !newcbi; + Oid database_id = (ConvertToRelfilenodeTblspcOid(newTableSpace) == GLOBALTABLESPACE_OID) ? + InvalidOid : u_sess->proc_cxt.MyDatabaseId; newrelfilenode = seg_alloc_segment(ConvertToRelfilenodeTblspcOid(newTableSpace), - u_sess->proc_cxt.MyDatabaseId, isbucket, InvalidBlockNumber); + database_id, isbucket, InvalidBlockNumber); } partRel = partitionGetRelation(rel, part); /* make sure we create the right underlying storage for cross-bucket index */ @@ -15901,6 +15928,7 @@ static void JudgeSmgrDsync(char relpersistence, bool copying_initfork, SMgrRelat static void copy_relation_data(Relation rel, SMgrRelation* dstptr, ForkNumber forkNum, char relpersistence) { char* buf = NULL; + char* unalign_buffer = NULL; Page page; bool use_wal = false; bool copying_initfork = false; @@ -15925,7 +15953,12 @@ static void copy_relation_data(Relation rel, SMgrRelation* dstptr, ForkNumber fo } ADIO_ELSE() { - buf = (char*)palloc(BLCKSZ); + if (ENABLE_DSS) { + unalign_buffer = (char*)palloc(BLCKSZ + ALIGNOF_BUFFER); + buf = (char*)BUFFERALIGN(unalign_buffer); + } else { + buf = (char*)palloc(BLCKSZ); + } } ADIO_END(); page = (Page)buf; @@ -16062,7 +16095,11 @@ static void copy_relation_data(Relation rel, SMgrRelation* dstptr, ForkNumber fo } ADIO_ELSE() { - pfree_ext(buf); + if (ENABLE_DSS) { + pfree_ext(unalign_buffer); + } else { + pfree_ext(buf); + } } ADIO_END(); @@ -16074,6 +16111,7 @@ static void mergeHeapBlock(Relation src, Relation dest, ForkNumber forkNum, char bool destHasFSM) { char* buf = NULL; + char* unaligned_buffer = NULL; char* bufToWrite = NULL; Page page = NULL; bool use_wal = false; @@ -16102,7 +16140,12 @@ static void mergeHeapBlock(Relation src, Relation dest, ForkNumber forkNum, char } ADIO_ELSE() { - buf = (char*)palloc(BLCKSZ); + if (ENABLE_DSS) { + unaligned_buffer = (char*)palloc(BLCKSZ + ALIGNOF_BUFFER); + buf = (char*)BUFFERALIGN(unaligned_buffer); + } else { + buf = (char*)palloc(BLCKSZ); + } } ADIO_END(); page = (Page)buf; @@ -16285,7 +16328,11 @@ static void mergeHeapBlock(Relation src, Relation dest, ForkNumber forkNum, char } ADIO_ELSE() { - pfree_ext(buf); + if (ENABLE_DSS) { + pfree_ext(unaligned_buffer); + } else { + pfree_ext(buf); + } } ADIO_END(); diff --git a/src/gausskernel/optimizer/commands/tablespace.cpp b/src/gausskernel/optimizer/commands/tablespace.cpp index ea2aced3a..6336e1818 100644 --- a/src/gausskernel/optimizer/commands/tablespace.cpp +++ b/src/gausskernel/optimizer/commands/tablespace.cpp @@ -72,6 +72,7 @@ #include "storage/smgr/fd.h" #include "storage/standby.h" #include "storage/smgr/segment.h" +#include "storage/file/fio_device.h" #include "utils/acl.h" #include "utils/builtins.h" #include "utils/fmgroids.h" @@ -140,10 +141,11 @@ void TablespaceCreateDbspace(Oid spcNode, Oid dbNode, bool isRedo) Assert(OidIsValid(dbNode)); dir = GetDatabasePath(dbNode, spcNode); + errno = 0; if (stat(dir, &st) < 0) { /* Directory does not exist? */ - if (errno == ENOENT) { + if (FILE_POSSIBLY_DELETED(errno)) { /* * Acquire TablespaceCreateLock to ensure that no DROP TABLESPACE * or TablespaceCreateDbspace is running concurrently. @@ -162,7 +164,7 @@ void TablespaceCreateDbspace(Oid spcNode, Oid dbNode, bool isRedo) char* parentdir = NULL; /* Failure other than not exists or not in WAL replay? */ - if (errno != ENOENT || !isRedo) + if (!FILE_POSSIBLY_DELETED(errno) || !isRedo) ereport( ERROR, (errcode_for_file_access(), errmsg("could not create directory \"%s\": %m", dir))); @@ -179,7 +181,7 @@ void TablespaceCreateDbspace(Oid spcNode, Oid dbNode, bool isRedo) /* create the second parent */ get_parent_directory(parentdir); /* Can't create parent and it doesn't already exist? */ - if (mkdir(parentdir, S_IRWXU) < 0 && errno != EEXIST) + if (mkdir(parentdir, S_IRWXU) < 0 && !FILE_ALREADY_EXIST(errno)) ereport(ERROR, (errcode_for_file_access(), errmsg("could not create directory \"%s\": %m", parentdir))); pfree_ext(parentdir); @@ -188,13 +190,13 @@ void TablespaceCreateDbspace(Oid spcNode, Oid dbNode, bool isRedo) parentdir = pstrdup(dir); get_parent_directory(parentdir); /* Can't create parent and it doesn't already exist? */ - if (mkdir(parentdir, S_IRWXU) < 0 && errno != EEXIST) + if (mkdir(parentdir, S_IRWXU) < 0 && !FILE_ALREADY_EXIST(errno)) ereport(ERROR, (errcode_for_file_access(), errmsg("could not create directory \"%s\": %m", parentdir))); pfree_ext(parentdir); /* Create database directory */ - if (mkdir(dir, S_IRWXU) < 0 && errno != EEXIST) + if (mkdir(parentdir, S_IRWXU) < 0 && !FILE_ALREADY_EXIST(errno)) ereport( ERROR, (errcode_for_file_access(), errmsg("could not create directory \"%s\": %m", dir))); } @@ -551,6 +553,13 @@ void CreateTableSpace(CreateTableSpaceStmt* stmt) (errmodule(MOD_TBLSPC), errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("Create tablespace with absolute location can't be allowed"))); + + if (!relative && ENABLE_DSS) { + ereport(ERROR, + (errmodule(MOD_TBLSPC), + errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("Can not create tablespace with absolute location in shared storage mode"))); + } /* Must be users with sysadmin privilege or the member of gs_role_tablespace role */ if (!superuser() && !is_member_of_role(GetUserId(), DEFAULT_ROLE_TABLESPACE)) { @@ -612,18 +621,30 @@ void CreateTableSpace(CreateTableSpaceStmt* stmt) errmsg("The relative location can not be null"))); /* We need reform location for relative mode */ + int len; + errno_t rc = EOK; relativeLocation = pstrdup(location); pfree_ext(location); - int len = strlen(t_thrd.proc_cxt.DataDir) + 1 + strlen(relativeLocation) + 1 + strlen(PG_LOCATION_DIR) + 1; - location = (char*)palloc(len); - errno_t rc = EOK; - if (t_thrd.proc_cxt.DataDir[strlen(t_thrd.proc_cxt.DataDir)] == '/') - rc = snprintf_s( - location, len, len - 1, "%s%s/%s", t_thrd.proc_cxt.DataDir, PG_LOCATION_DIR, relativeLocation); - else - rc = snprintf_s( - location, len, len - 1, "%s/%s/%s", t_thrd.proc_cxt.DataDir, PG_LOCATION_DIR, relativeLocation); + if (ENABLE_DSS) { + len = (int)strlen(PG_LOCATION_DIR) + 1 + (int)strlen(relativeLocation) + 1; + location = (char*)palloc(len); + rc = snprintf_s(location, len, len - 1, "%s/%s", PG_LOCATION_DIR, relativeLocation); + } else { + if (t_thrd.proc_cxt.DataDir[strlen(t_thrd.proc_cxt.DataDir)] == '/') { + len = (int)strlen(t_thrd.proc_cxt.DataDir) + (int)strlen(PG_LOCATION_DIR) + + 1 + (int)strlen(relativeLocation) + 1; + location = (char*)palloc(len); + rc = snprintf_s( + location, len, len - 1, "%s%s/%s", t_thrd.proc_cxt.DataDir, PG_LOCATION_DIR, relativeLocation); + } else { + len = (int)strlen(t_thrd.proc_cxt.DataDir) + 1 + (int)strlen(PG_LOCATION_DIR) + + 1 + (int)strlen(relativeLocation) + 1; + location = (char*)palloc(len); + rc = snprintf_s( + location, len, len - 1, "%s/%s/%s", t_thrd.proc_cxt.DataDir, PG_LOCATION_DIR, relativeLocation); + } + } securec_check_ss(rc, "\0", "\0"); } @@ -1041,7 +1062,7 @@ static void check_tablespace_symlink(const char* location) Assert(location != NULL); - dir = AllocateDir(tbs_path); + dir = AllocateDir(TBLSPCDIR); if (dir == NULL) { ereport(ERROR, (errmodule(MOD_TBLSPC), @@ -1052,7 +1073,7 @@ static void check_tablespace_symlink(const char* location) if (strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0) continue; - rc = snprintf_s(tmppath, MAXPGPATH + 2, MAXPGPATH + 1, "%s/%s", tbs_path, dent->d_name); + rc = snprintf_s(tmppath, MAXPGPATH + 2, MAXPGPATH + 1, "%s/%s", TBLSPCDIR, dent->d_name); securec_check_ss(rc, "\0", "\0"); /* get file status */ @@ -1069,7 +1090,7 @@ static void check_tablespace_symlink(const char* location) ereport(ERROR, (errmodule(MOD_TBLSPC), errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("\"%s\" is not symlink, please check and clean the remains in \"%s\"", tmppath, tbs_path))); + errmsg("\"%s\" is not symlink, please check and clean the remains in \"%s\"", tmppath, TBLSPCDIR))); } /* get target directory */ @@ -1133,18 +1154,23 @@ static void check_tablespace_symlink(const char* location) */ static void create_tablespace_directories(const char* location, const Oid tablespaceoid) { - char* linkloc = (char*)palloc(OIDCHARS + OIDCHARS + 1); + char* linkloc = (char*)palloc(strlen(TBLSPCDIR) + OIDCHARS + 2); char* locationWithTempDir = NULL; int locationWithTempDirLen = 0; #ifdef PGXC - char* location_with_version_dir = - (char*)palloc(strlen(location) + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1 + PGXC_NODENAME_LENGTH + 1); + char* location_with_version_dir = NULL; + if (ENABLE_DSS) { + location_with_version_dir = (char *)palloc(strlen(location) + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1); + } else { + location_with_version_dir = + (char*)palloc(strlen(location) + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1 + PGXC_NODENAME_LENGTH + 1); + } #else char* location_with_version_dir = palloc(strlen(location) + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1); #endif int rc = 0; - rc = sprintf_s(linkloc, OIDCHARS + OIDCHARS + 1, "pg_tblspc/%u", tablespaceoid); + rc = sprintf_s(linkloc, strlen(TBLSPCDIR) + 1 + OIDCHARS + 1, "%s/%u", TBLSPCDIR, tablespaceoid); securec_check_ss(rc, "\0", "\0"); #ifdef PGXC /* @@ -1152,13 +1178,22 @@ static void create_tablespace_directories(const char* location, const Oid tables * of TABLESPACE_VERSION_DIRECTORY. Node name unicity in Postgres-XC * cluster insures unicity of tablespace. */ - rc = sprintf_s(location_with_version_dir, - strlen(location) + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1 + PGXC_NODENAME_LENGTH + 1, - "%s/%s_%s", - location, - TABLESPACE_VERSION_DIRECTORY, - g_instance.attr.attr_common.PGXCNodeName); - securec_check_ss(rc, "\0", "\0"); + if (ENABLE_DSS) { + rc = sprintf_s(location_with_version_dir, + strlen(location) + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1, + "%s/%s", + location, + TABLESPACE_VERSION_DIRECTORY); + securec_check_ss(rc, "\0", "\0"); + } else { + rc = sprintf_s(location_with_version_dir, + strlen(location) + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1 + PGXC_NODENAME_LENGTH + 1, + "%s/%s_%s", + location, + TABLESPACE_VERSION_DIRECTORY, + g_instance.attr.attr_common.PGXCNodeName); + securec_check_ss(rc, "\0", "\0"); + } #else rc = sprintf_s(location_with_version_dir, strlen(location) + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1, @@ -1185,7 +1220,7 @@ static void create_tablespace_directories(const char* location, const Oid tables * it doesn't exist or has the wrong owner. */ if (chmod(location, S_IRWXU) != 0) { - if (errno == ENOENT) + if (FILE_POSSIBLY_DELETED(errno)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_FILE), errmsg("directory \"%s\" does not exist", location), @@ -1218,7 +1253,7 @@ static void create_tablespace_directories(const char* location, const Oid tables * in a single location. */ if (mkdir(location_with_version_dir, S_IRWXU) < 0) { - if (errno == EEXIST) { + if (FILE_ALREADY_EXIST(errno)) { if (!IsRoachRestore()) ereport(ERROR, (errcode(ERRCODE_OBJECT_IN_USE), @@ -1233,7 +1268,7 @@ static void create_tablespace_directories(const char* location, const Oid tables // Create PG_TEMP_FILES_DIR directory // if (mkdir(locationWithTempDir, S_IRWXU) < 0) { - if (errno == EEXIST) { + if (FILE_ALREADY_EXIST(errno)) { if (!IsRoachRestore()) ereport(ERROR, (errcode(ERRCODE_OBJECT_IN_USE), @@ -1249,12 +1284,12 @@ static void create_tablespace_directories(const char* location, const Oid tables struct stat st; if (lstat(linkloc, &st) < 0) { - if (errno != ENOENT) + if (!FILE_POSSIBLY_DELETED(errno)) ereport(ERROR, (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", linkloc))); } else if (S_ISDIR(st.st_mode)) { - if (rmdir(linkloc) < 0 && errno != ENOENT) + if (rmdir(linkloc) < 0 && !FILE_POSSIBLY_DELETED(errno)) ereport(ERROR, (errcode_for_file_access(), errmsg("could not remove directory \"%s\": %m", linkloc))); - } else if (unlink(linkloc) < 0 && errno != ENOENT) { + } else if (unlink(linkloc) < 0 && !FILE_POSSIBLY_DELETED(errno)) { ereport(ERROR, (errcode_for_file_access(), errmsg("could not remove symbolic link \"%s\": %m", linkloc))); } } @@ -1363,20 +1398,36 @@ static void createtbspc_abort_callback(bool isCommit, const void* arg) char* linkloc = NULL; struct stat st; errno_t rc = EOK; - int len = strlen("pg_tblspc") + 1 + OIDCHARS + 1 + strlen(g_instance.attr.attr_common.PGXCNodeName) + 1 + + int len = 0; + if (ENABLE_DSS) { + len = strlen(TBLSPCDIR) + 1 + OIDCHARS + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1; + } else { + len = strlen(TBLSPCDIR) + 1 + OIDCHARS + 1 + strlen(g_instance.attr.attr_common.PGXCNodeName) + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1; + } if (isCommit) return; linkloc_with_version_dir = (char*)palloc(len); - rc = sprintf_s(linkloc_with_version_dir, - len, - "pg_tblspc/%u/%s_%s", - tablespaceoid, - TABLESPACE_VERSION_DIRECTORY, - g_instance.attr.attr_common.PGXCNodeName); - securec_check_ss(rc, "\0", "\0"); + if (ENABLE_DSS) { + rc = sprintf_s(linkloc_with_version_dir, + len, + "%s/%u/%s", + TBLSPCDIR, + tablespaceoid, + TABLESPACE_VERSION_DIRECTORY); + securec_check_ss(rc, "\0", "\0"); + } else { + rc = sprintf_s(linkloc_with_version_dir, + len, + "%s/%u/%s_%s", + TBLSPCDIR, + tablespaceoid, + TABLESPACE_VERSION_DIRECTORY, + g_instance.attr.attr_common.PGXCNodeName); + securec_check_ss(rc, "\0", "\0"); + } /* First, remove version directory */ if (!rmtree(linkloc_with_version_dir, true)) { @@ -1433,20 +1484,39 @@ static bool destroy_tablespace_directories(Oid tablespaceoid, bool redo) errno_t rc = EOK; #ifdef PGXC - int len = strlen("pg_tblspc") + 1 + OIDCHARS + 1 + strlen(g_instance.attr.attr_common.PGXCNodeName) + 1 + - strlen(TABLESPACE_VERSION_DIRECTORY) + 1; + int len = 0; + if (ENABLE_DSS) { + len = strlen(TBLSPCDIR) + 1 + OIDCHARS + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1; + linkloc_with_version_dir = (char*)palloc(len); + rc = sprintf_s(linkloc_with_version_dir, + len, + "%s/%u/%s", + TBLSPCDIR, + tablespaceoid, + TABLESPACE_VERSION_DIRECTORY); + securec_check_ss(rc, "\0", "\0"); + } else { + len = strlen(TBLSPCDIR) + 1 + OIDCHARS + 1 + strlen(g_instance.attr.attr_common.PGXCNodeName) + 1 + + strlen(TABLESPACE_VERSION_DIRECTORY) + 1; + linkloc_with_version_dir = (char*)palloc(len); + rc = sprintf_s(linkloc_with_version_dir, + len, + "%s/%u/%s_%s", + TBLSPCDIR, + tablespaceoid, + TABLESPACE_VERSION_DIRECTORY, + g_instance.attr.attr_common.PGXCNodeName); + securec_check_ss(rc, "\0", "\0"); + } +#else + int len = strlen(TBLSPCDIR) + 1 + OIDCHARS + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1; linkloc_with_version_dir = (char*)palloc(len); rc = sprintf_s(linkloc_with_version_dir, len, - "pg_tblspc/%u/%s_%s", + "%s/%u/%s", + TBLSPCDIR, tablespaceoid, - TABLESPACE_VERSION_DIRECTORY, - g_instance.attr.attr_common.PGXCNodeName); - securec_check_ss(rc, "\0", "\0"); -#else - int len = strlen("pg_tblspc") + 1 + OIDCHARS + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1; - linkloc_with_version_dir = (char*)palloc(len); - rc = sprintf_s(linkloc_with_version_dir, len, "pg_tblspc/%u/%s", tablespaceoid, TABLESPACE_VERSION_DIRECTORY); + TABLESPACE_VERSION_DIRECTORY); securec_check_ss(rc, "\0", "\0"); #endif @@ -1474,7 +1544,7 @@ static bool destroy_tablespace_directories(Oid tablespaceoid, bool redo) */ dirdesc = AllocateDir(linkloc_with_version_dir); if (dirdesc == NULL) { - if (errno == ENOENT) { + if (!FILE_POSSIBLY_DELETED(errno)) { if (!redo) ereport(WARNING, (errcode_for_file_access(), @@ -1558,7 +1628,7 @@ remove_symlink: (errcode_for_file_access(), errmsg("could not remove directory \"%s\": %m", linkloc))); } else { if (unlink(linkloc) < 0) - ereport(redo ? LOG : (errno == ENOENT ? WARNING : ERROR), + ereport(redo ? LOG : (FILE_POSSIBLY_DELETED(errno) ? WARNING : ERROR), (errcode_for_file_access(), errmsg("could not remove symbolic link \"%s\": %m", linkloc))); } @@ -1609,7 +1679,7 @@ void remove_tablespace_symlink(const char* linkloc) struct stat st; if (lstat(linkloc, &st) < 0) { - if (errno == ENOENT) + if (FILE_POSSIBLY_DELETED(errno)) return; ereport(ERROR, (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", linkloc))); } @@ -1619,12 +1689,12 @@ void remove_tablespace_symlink(const char* linkloc) * This will fail if the directory isn't empty, but not if it's a * junction point. */ - if (rmdir(linkloc) < 0 && errno != ENOENT) + if (rmdir(linkloc) < 0 && !FILE_POSSIBLY_DELETED(errno)) ereport(ERROR, (errcode_for_file_access(), errmsg("could not remove directory \"%s\": %m", linkloc))); } #ifdef S_ISLNK else if (S_ISLNK(st.st_mode)) { - if (unlink(linkloc) < 0 && errno != ENOENT) + if (unlink(linkloc) < 0 && !FILE_POSSIBLY_DELETED(errno)) ereport(ERROR, (errcode_for_file_access(), errmsg("could not remove symbolic link \"%s\": %m", linkloc))); } #endif @@ -2422,20 +2492,29 @@ recheck: void xlog_create_tblspc(Oid tsId, char* tsPath, bool isRelativePath) { + int len; char* location = tsPath; + errno_t rc = EOK; + if (isRelativePath) { - int len = strlen(t_thrd.proc_cxt.DataDir) + 1 + strlen(tsPath) + 1 + strlen(PG_LOCATION_DIR) + 1; - location = (char*)palloc(len); - errno_t rc = EOK; - if (t_thrd.proc_cxt.DataDir[strlen(t_thrd.proc_cxt.DataDir) - 1] == '/') { - rc = snprintf_s( - location, len, len - 1, "%s%s/%s", t_thrd.proc_cxt.DataDir, PG_LOCATION_DIR, tsPath); - securec_check_ss(rc, "\0", "\0"); + if (ENABLE_DSS) { + len = (int)strlen(PG_LOCATION_DIR) + 1 + (int)strlen(tsPath) + 1; + location = (char*)palloc(len); + rc = snprintf_s(location, len, len - 1, "%s/%s", PG_LOCATION_DIR, tsPath); } else { - rc = snprintf_s( - location, len, len - 1, "%s/%s/%s", t_thrd.proc_cxt.DataDir, PG_LOCATION_DIR, tsPath); - securec_check_ss(rc, "\0", "\0"); + if (t_thrd.proc_cxt.DataDir[strlen(t_thrd.proc_cxt.DataDir) - 1] == '/') { + len = strlen(t_thrd.proc_cxt.DataDir) + strlen(PG_LOCATION_DIR) + 1 + strlen(tsPath) + 1; + location = (char*)palloc(len); + rc = snprintf_s( + location, len, len - 1, "%s%s/%s", t_thrd.proc_cxt.DataDir, PG_LOCATION_DIR, tsPath); + } else { + len = strlen(t_thrd.proc_cxt.DataDir) + 1 + strlen(PG_LOCATION_DIR) + 1 + strlen(tsPath) + 1; + location = (char*)palloc(len); + rc = snprintf_s( + location, len, len - 1, "%s/%s/%s", t_thrd.proc_cxt.DataDir, PG_LOCATION_DIR, tsPath); + } } + securec_check_ss(rc, "\0", "\0"); } check_create_dir(location); create_tablespace_directories(location, tsId); @@ -2701,7 +2780,14 @@ void TableSpaceUsageManager::IsExceedMaxsize(Oid tableSpaceOid, uint64 requestSi uint64 currentSize = 0; TableSpaceUsageBucket* bucket = NULL; TableSpaceUsageSlot* slot = NULL; - + + /* skip it while initdb */ + if (IsInitdb) { + u_sess->cmd_cxt.l_tableSpaceOid = tableSpaceOid; + u_sess->cmd_cxt.l_isLimit = false; + return; + } + /* * Segment-page storage calls IsExceedMaxsize is often caused by 'smgrextend', which does physical file * extension. However, smgrextend may be invoked in ReadBuffer_common_ReadBlock that after invoking diff --git a/src/gausskernel/optimizer/commands/user.cpp b/src/gausskernel/optimizer/commands/user.cpp index a62f8fb02..dce160f79 100755 --- a/src/gausskernel/optimizer/commands/user.cpp +++ b/src/gausskernel/optimizer/commands/user.cpp @@ -1738,7 +1738,7 @@ void AlterRole(AlterRoleStmt* stmt) /* Extract options from the statement node tree */ foreach (option, stmt->options) { - DefElem* defel = (DefElem*)lfirst(option); + DefElem *defel = (DefElem *)lfirst(option); if (strcmp(defel->defname, "password") == 0 || strcmp(defel->defname, "encryptedPassword") == 0 || strcmp(defel->defname, "unencryptedPassword") == 0 || strcmp(defel->defname, "expiredPassword") == 0) { @@ -2261,13 +2261,13 @@ void AlterRole(AlterRoleStmt* stmt) CheckLockPrivilege(roleid, tuple, is_opradmin); if (stmt->lockstatus == LOCK_ROLE) { - if (t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE) { + if (t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE || SS_STANDBY_MODE) { UpdateFailCountToHashTable(roleid, 0, true); } else { TryLockAccount(roleid, 0, true); } } else { - if (t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE) { + if (t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE || SS_STANDBY_MODE) { UnlockAccountToHashTable(roleid, true, false); } else { TryUnlockAccount(roleid, true, false); @@ -2457,7 +2457,7 @@ void AlterRole(AlterRoleStmt* stmt) } /* If locked, try unlock to see whether lock time is over. */ - if (t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE) { + if (t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE || SS_STANDBY_MODE) { if (UNLOCK_STATUS != GetAccountLockedStatusFromHashTable(roleid)) { UnlockAccountToHashTable(roleid, false, false); rolestatus = GetAccountLockedStatusFromHashTable(roleid); @@ -2780,7 +2780,7 @@ void AlterRole(AlterRoleStmt* stmt) /* the password is not right, and try to lock the account */ if (u_sess->attr.attr_security.Password_lock_time > 0 && u_sess->attr.attr_security.Failed_login_attempts > 0) { - if (t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE) { + if (t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE || SS_STANDBY_MODE) { UpdateFailCountToHashTable(roleid, 1, false); } else { TryLockAccount(roleid, 1, false); @@ -2791,7 +2791,7 @@ void AlterRole(AlterRoleStmt* stmt) str_reset(oldPasswd); ereport(ERROR, (errcode(ERRCODE_INVALID_PASSWORD), errmsg("The old password is invalid."))); } else { - if (t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE) { + if (t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE || SS_STANDBY_MODE) { UnlockAccountToHashTable(roleid, false, true); } else { TryUnlockAccount(roleid, false, true); @@ -5163,7 +5163,7 @@ void TryLockAccount(Oid roleID, int extrafails, bool superlock) char* rolename = NULL; /* We could not insert new xlog if recovery in process */ - if (RecoveryInProgress()) { + if (RecoveryInProgress() || SSIsServerModeReadOnly()) { return; } @@ -5314,7 +5314,7 @@ bool TryUnlockAccount(Oid roleID, bool superunlock, bool isreset) char* rolename = NULL; /* We could not insert new xlog if recovery in process */ - if (RecoveryInProgress()) { + if (RecoveryInProgress() || SSIsServerModeReadOnly()) { return false; } @@ -5432,7 +5432,7 @@ void TryUnlockAllAccounts(void) bool roleIdIsNull = false; char* rolename = NULL; - if (t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE) { + if (t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE || SS_STANDBY_MODE) { return; } diff --git a/src/gausskernel/optimizer/commands/vacuum.cpp b/src/gausskernel/optimizer/commands/vacuum.cpp index a5aa2cd4c..b78f21113 100644 --- a/src/gausskernel/optimizer/commands/vacuum.cpp +++ b/src/gausskernel/optimizer/commands/vacuum.cpp @@ -2453,6 +2453,12 @@ static bool vacuum_rel(Oid relid, VacuumStmt* vacstmt, bool do_toast) * warning here; it would just lead to chatter during a database-wide * VACUUM.) */ + if (ENABLE_DSS && RELATION_IS_TEMP(onerel)) { + CloseAllRelationsBeforeReturnFalse(); + proc_snapshot_and_transaction(); + return false; + } + if (RELATION_IS_OTHER_TEMP(onerel)) { CloseAllRelationsBeforeReturnFalse(); diff --git a/src/gausskernel/optimizer/commands/variable.cpp b/src/gausskernel/optimizer/commands/variable.cpp index 1689e372b..921b792e3 100644 --- a/src/gausskernel/optimizer/commands/variable.cpp +++ b/src/gausskernel/optimizer/commands/variable.cpp @@ -524,6 +524,11 @@ bool check_transaction_read_only(bool* newval, void** extra, GucSource source) GUC_check_errmsg("cannot set transaction read-write mode during recovery"); return false; } + if (SSIsServerModeReadOnly()) { + GUC_check_errcode(ERRCODE_FEATURE_NOT_SUPPORTED); + GUC_check_errmsg("cannot set transaction read-write mode at Standby while DMS enabled"); + return false; + } } return true; @@ -595,6 +600,18 @@ bool check_XactIsoLevel(char** newval, void** extra, GucSource source) GUC_check_errhint("You can use REPEATABLE READ instead."); return false; } + + do { + /* Supporting the gs_dump in DSS mode */ + if (strcmp(u_sess->attr.attr_common.application_name, "gs_dump") == 0 && SS_PRIMARY_MODE) + break; + /* Only support read committed while DMS enabled */ + if (ENABLE_DMS && newXactIsoLevel != XACT_READ_COMMITTED) { + GUC_check_errcode(ERRCODE_FEATURE_NOT_SUPPORTED); + GUC_check_errmsg("Only support read committed transaction isolation level while DMS and DSS enabled"); + return false; + } + } while (0); } *extra = MemoryContextAlloc(SESS_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_OPTIMIZER), sizeof(int)); diff --git a/src/gausskernel/optimizer/commands/verify.cpp b/src/gausskernel/optimizer/commands/verify.cpp index e5981d1cc..13526c57c 100644 --- a/src/gausskernel/optimizer/commands/verify.cpp +++ b/src/gausskernel/optimizer/commands/verify.cpp @@ -1277,6 +1277,15 @@ static bool VerifyRowRelFast(Relation rel, VerifyDesc* checkCudesc) /* If we got a cancel signal during the copy of the data, quit */ CHECK_FOR_INTERRUPTS(); SMGR_READ_STATUS rdStatus = smgrread(src, forkNum, blkno, buf); + /* For DMS , try to read from buffer in case the data is not flused to disk */ + if (rdStatus == SMGR_RD_CRC_ERROR && ENABLE_DMS) { + Buffer buffer = ReadBufferWithoutRelcache(src->smgr_rnode.node, forkNum, blkno, RBM_NORMAL, NULL, NULL); + if (buffer != InvalidBuffer) { + ReleaseBuffer(buffer); + continue; + } + } + /* check the page & crc */ if (rdStatus == SMGR_RD_CRC_ERROR) { // Retry 5 times to increase program reliability. @@ -1320,16 +1329,16 @@ static bool VerifyRowRelFast(Relation rel, VerifyDesc* checkCudesc) handle_in_client(true))); /* Add the wye page to the global variable and try to fix it. */ addGlobalRepairBadBlockStat(src->smgr_rnode, forkNum, blkno); - } else if (rdStatus == SMGR_RD_OK) { - /* Ustrore white-box verification adapt to analyze verify. */ - UPageVerifyParams verifyParam; - Page page = (char *) buf; - if (unlikely(ConstructUstoreVerifyParam(USTORE_VERIFY_MOD_UPAGE, USTORE_VERIFY_FAST, - (char *) &verifyParam, rel, page, InvalidBlockNumber, NULL, NULL, InvalidXLogRecPtr, NULL, - NULL, true))) { - ExecuteUstoreVerify(USTORE_VERIFY_MOD_UPAGE, (char *) &verifyParam); - } + } else if (rdStatus == SMGR_RD_OK) { + /* Ustrore white-box verification adapt to analyze verify. */ + UPageVerifyParams verifyParam; + Page page = (char *) buf; + if (unlikely(ConstructUstoreVerifyParam(USTORE_VERIFY_MOD_UPAGE, USTORE_VERIFY_FAST, + (char *) &verifyParam, rel, page, InvalidBlockNumber, NULL, NULL, InvalidXLogRecPtr, NULL, + NULL, true))) { + ExecuteUstoreVerify(USTORE_VERIFY_MOD_UPAGE, (char *) &verifyParam); } + } } pfree_ext(buf); diff --git a/src/gausskernel/optimizer/commands/verifyrepair.cpp b/src/gausskernel/optimizer/commands/verifyrepair.cpp index 7a2498297..7fec15e4f 100644 --- a/src/gausskernel/optimizer/commands/verifyrepair.cpp +++ b/src/gausskernel/optimizer/commands/verifyrepair.cpp @@ -654,6 +654,11 @@ Datum local_clear_bad_block_info(PG_FUNCTION_ARGS) Datum gs_repair_page(PG_FUNCTION_ARGS) { + if (ENABLE_DMS) { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Not support repair page while DMS and DSS enabled"))); + } + checkInstanceType(); checkSupUserOrOperaMode(); // read in parameters @@ -682,6 +687,10 @@ bool CheckRelDataFilePath(const char* path) Datum gs_repair_file(PG_FUNCTION_ARGS) { + if (ENABLE_DMS) { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Not support repair file while DMS and DSS enabled"))); + } checkInstanceType(); checkSupUserOrOperaMode(); Oid tableOid = PG_GETARG_UINT32(0); @@ -898,6 +907,10 @@ void gs_tryrepair_compress_extent(SMgrRelation reln, BlockNumber logicBlockNumbe Datum gs_verify_and_tryrepair_page(PG_FUNCTION_ARGS) { + if (ENABLE_DMS) { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Not support verify and tryrepair page while DMS and DSS enabled"))); + } #define REPAIR_BLOCK_STAT_NATTS 6 checkInstanceType(); checkSupUserOrOperaMode(); @@ -1975,4 +1988,4 @@ static void checkInstanceType() ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("Must be in primary DN.")))); } -} +} \ No newline at end of file diff --git a/src/gausskernel/process/main/main.cpp b/src/gausskernel/process/main/main.cpp index 5a1f1aebd..68dc69c1a 100755 --- a/src/gausskernel/process/main/main.cpp +++ b/src/gausskernel/process/main/main.cpp @@ -446,6 +446,7 @@ static void help(const char* progname) #endif printf(_(" -r FILENAME send stdout and stderr to given file\n")); printf(_(" -x NUM internal use\n")); + printf(_(" -G store tables in segment page while initdb\n")); #ifdef ENABLE_MULTIPLE_NODES printf(_("\nNode options:\n")); diff --git a/src/gausskernel/process/postmaster/autovacuum.cpp b/src/gausskernel/process/postmaster/autovacuum.cpp index 1e665429f..0756dd811 100755 --- a/src/gausskernel/process/postmaster/autovacuum.cpp +++ b/src/gausskernel/process/postmaster/autovacuum.cpp @@ -3473,7 +3473,8 @@ static void autovac_report_activity(autovac_table* tab) */ bool AutoVacuumingActive(void) { - if (!u_sess->attr.attr_storage.autovacuum_start_daemon || !u_sess->attr.attr_common.pgstat_track_counts) + if (!u_sess->attr.attr_storage.autovacuum_start_daemon || !u_sess->attr.attr_common.pgstat_track_counts || + SSIsServerModeReadOnly()) return false; return true; } diff --git a/src/gausskernel/process/postmaster/bgworker.cpp b/src/gausskernel/process/postmaster/bgworker.cpp index 7ed65df48..bcb5522d0 100644 --- a/src/gausskernel/process/postmaster/bgworker.cpp +++ b/src/gausskernel/process/postmaster/bgworker.cpp @@ -41,6 +41,11 @@ bool IsBgWorkerProcess(void) return t_thrd.role == BGWORKER; } +bool IsDMSWorkerProcess(void) +{ + return t_thrd.role == DMS_WORKER; +} + static inline void BgworkerPutBackToFreeList(BackgroundWorker* bgworker) { BGW_HDR* bgworker_base = (BGW_HDR *)g_instance.bgw_base; diff --git a/src/gausskernel/process/postmaster/cfs_shrinker.cpp b/src/gausskernel/process/postmaster/cfs_shrinker.cpp index 4452a3c31..425c7f05a 100644 --- a/src/gausskernel/process/postmaster/cfs_shrinker.cpp +++ b/src/gausskernel/process/postmaster/cfs_shrinker.cpp @@ -186,6 +186,10 @@ static void CfsShrinkerSigtermHander(SIGNAL_ARGS) ThreadId StartCfsShrinkerCapturer(void) { + if (ENABLE_DMS) { + return 0; + } + if (!IsPostmasterEnvironment) { return 0; } diff --git a/src/gausskernel/process/postmaster/fencedudf.cpp b/src/gausskernel/process/postmaster/fencedudf.cpp index 244f2c95f..0206130f4 100644 --- a/src/gausskernel/process/postmaster/fencedudf.cpp +++ b/src/gausskernel/process/postmaster/fencedudf.cpp @@ -65,6 +65,7 @@ #include "utils/fmgrtab.h" #include "utils/postinit.h" #include "utils/relmapper.h" +#include "utils/knl_localsysdbcache.h" #include "catalog/pg_language.h" #include "catalog/pg_proc.h" #include "commands/user.h" @@ -236,6 +237,15 @@ void FencedUDFMasterMain(int argc, char* argv[]) THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_AI)); #endif +#if defined(USE_ASSERT_CHECKING) && !defined(ENABLE_MEMORY_CHECK) + /* + * ignore lsc check in UDF, because UDF call exit() to abort process, + * it will execute object destruction function, + * call proc_exit() to abort UDF will be better. + */ + CloseLSCCheck(); +#endif + /* * Step 2: Create socket for listening coming connection * Create listen socket using unix domain way which don't occupy port diff --git a/src/gausskernel/process/postmaster/pagewriter.cpp b/src/gausskernel/process/postmaster/pagewriter.cpp index 773f36682..14b502c3b 100755 --- a/src/gausskernel/process/postmaster/pagewriter.cpp +++ b/src/gausskernel/process/postmaster/pagewriter.cpp @@ -1008,7 +1008,7 @@ static uint32 calculate_pagewriter_flush_num() * If the xlog generation speed is slower than dirty queue rec lsn update speed and not many dirty pages, * no need to scan too many dirty page, because the dirty page rec lsn is same. */ - if (dirty_percent < HIGH_WATER && avg_lsn_rate < XLOG_SEG_SIZE * UPDATE_REC_XLOG_NUM && + if (dirty_percent < HIGH_WATER && avg_lsn_rate < XLogSegSize * UPDATE_REC_XLOG_NUM && lsn_target_percent < HIGH_WATER) { lsn_scan_factor = 1; } diff --git a/src/gausskernel/process/postmaster/postmaster.cpp b/src/gausskernel/process/postmaster/postmaster.cpp index 0d235cc28..6dafee00f 100644 --- a/src/gausskernel/process/postmaster/postmaster.cpp +++ b/src/gausskernel/process/postmaster/postmaster.cpp @@ -255,6 +255,11 @@ #include "gs_ledger/blockchain.h" #include "communication/commproxy_interface.h" +#include "storage/file/fio_device.h" +#include "storage/dss/dss_adaptor.h" +#include "storage/dss/dss_log.h" +#include "ddes/dms/ss_switchover.h" +#include "ddes/dms/ss_reform_common.h" #ifdef ENABLE_UT #define static @@ -913,7 +918,7 @@ bool SetDBStateFileState(DbState state, bool optional) securec_check_intval(rc, , false); /* Write the new content into a temp file and rename it at last. */ - int fd = open(gaussdb_state_file, O_RDONLY); + int fd = open(gaussdb_state_file, O_RDONLY, 0); if (fd == -1) { if (errno == ENOENT && optional) { write_stderr("gaussdb.state does not exist, and skipt setting since it is optional."); @@ -1570,6 +1575,25 @@ int PostmasterMain(int argc, char* argv[]) InitializeNumLwLockPartitions(); + if (dss_device_init(g_instance.attr.attr_storage.dss_attr.ss_dss_conn_path, + g_instance.attr.attr_storage.dss_attr.ss_enable_dss) != DSS_SUCCESS) { + write_stderr("failed to init dss device\n"); + ExitPostmaster(1); + } + if (ENABLE_DSS) { + if (g_instance.attr.attr_storage.recovery_parse_workers > 1 || + g_instance.attr.attr_storage.recovery_redo_workers_per_paser_worker > 1) { + write_stderr("Not support extreme RTO while DMS and DSS enabled, please cancel rto parameter\n"); + ExitPostmaster(1); + } + + if (u_sess->attr.attr_common.XLogArchiveMode || strlen(u_sess->attr.attr_storage.XLogArchiveCommand) != 0) { + write_stderr("Not support archive function while DMS and DSS enabled\n"); + ExitPostmaster(1); + } + dss_log_init(); + } + noProcLogicTid = GLOBAL_ALL_PROCS; if (FencedUDFMasterMode) { @@ -1595,6 +1619,9 @@ int PostmasterMain(int argc, char* argv[]) FencedUDFMasterMain(0, NULL); return 0; } + + /* Check DSS config */ + initDSSConf(); /* Verify that t_thrd.proc_cxt.DataDir looks reasonable */ checkDataDir(); @@ -2320,14 +2347,96 @@ int PostmasterMain(int argc, char* argv[]) /* PostmasterRandom wants its own copy */ gettimeofday(&t_thrd.postmaster_cxt.random_start_time, NULL); + + /* load primary id and reform stable list from control file in shared storage based on dms and dss. + * (1) If the current instance startup in multimaster_primary mode, the condition is as follows: + * (a) primary_id loaded from control file is equel to the current instance id, the current instance id + * will be save straightly into control file. + * (b) primary_id loaded from controlfile is not the current instance id, this indicate that the current node + * node is multimaster_standby previously, then the current node will need to recovery by pg_xlog from + * primary_id, so the current instance id will not be save as primary id until recovery finish. + * (2) If the current instance startup in multimaster_standby mode, get primary_id from control file. + */ + if (g_instance.attr.attr_storage.dms_attr.enable_dms) { + /* load primary id and reform stable list from control file */ + SSReadControlFile(REFORM_CTRL_PAGE); + int src_id = g_instance.dms_cxt.SSReformerControl.primaryInstId; + ereport(LOG, (errmsg("[SS reform] node%d starts, found cluster PRIMARY:%d", + g_instance.attr.attr_storage.dms_attr.instance_id, src_id))); + Assert(src_id >= 0 && src_id <= DMS_MAX_INSTANCE - 1); + + if (!SS_MY_INST_IS_MASTER && g_instance.attr.attr_storage.dms_attr.enable_reform) { + const long SLEEP_ONE_SEC = 1000000L; + while (g_instance.dms_cxt.SSReformerControl.list_stable == 0) { + pg_usleep(SLEEP_ONE_SEC); + SSReadControlFile(REFORM_CTRL_PAGE); + ereport(WARNING, (errmsg("[SS reform] node%d waiting for PRIMARY:%d to finish 1st reform", + g_instance.attr.attr_storage.dms_attr.instance_id, src_id))); + } + ereport(LOG, (errmsg("[SS reform] Success: node:%d wait for PRIMARY:%d to finish 1st reform", + g_instance.attr.attr_storage.dms_attr.instance_id, src_id))); + } + } + + if (SS_PRIMARY_MODE) { + if (dss_set_server_status_wrapper(true) != GS_SUCCESS) { + ereport(FATAL, (errmsg("Could not set dssserver flag, vgname: \"%s\", socketpath: \"%s\"", + g_instance.attr.attr_storage.dss_attr.ss_dss_vg_name, + g_instance.attr.attr_storage.dss_attr.ss_dss_conn_path), + errhint("Check vgname and socketpath and restart later."))); + } + ereport(LOG, (errmsg("set dss server status as primary"))); + } else if (SS_STANDBY_MODE) { + if (dss_set_server_status_wrapper(false) != GS_SUCCESS) { + ereport(FATAL, (errmsg("Could not set dssserver flag, vgname: \"%s\", socketpath: \"%s\"", + g_instance.attr.attr_storage.dss_attr.ss_dss_vg_name, + g_instance.attr.attr_storage.dss_attr.ss_dss_conn_path), + errhint("Check vgname and socketpath and restart later."))); + } + ereport(LOG, (errmsg("set dss server status as standby"))); + } + + /* + * Save backend variables for DCF call back thread, + * the saved backend variables will be restored in + * DCF call back thread share memory init function. + */ + if (g_instance.attr.attr_storage.dcf_attr.enable_dcf || g_instance.attr.attr_storage.dms_attr.enable_dms) { + int ss_rc = memset_s(&port, sizeof(port), 0, sizeof(port)); + securec_check(ss_rc, "\0", "\0"); + port.sock = PGINVALID_SOCKET; + BackendVariablesGlobal = static_cast(palloc(sizeof(BackendParameters))); + save_backend_variables(BackendVariablesGlobal, &port); + + if (g_instance.attr.attr_storage.dms_attr.enable_dms) { + /* need to initialize before STARTUP */ + DMSInit(); + } + } /* * We're ready to rock and roll... */ ShareStorageInit(); - g_instance.pid_cxt.StartupPID = initialize_util_thread(STARTUP); - Assert(g_instance.pid_cxt.StartupPID != 0); - pmState = PM_STARTUP; + if (ENABLE_DMS && ENABLE_REFORM) { + if (!DMSWaitInitStartup()) { + if (g_instance.pid_cxt.StartupPID == 0) { + ereport(LOG, (errmsg("[SS reform] Node:%d first startup fail and exit", SS_MY_INST_ID))); + KillGraceThreads(); + WaitGraceThreadsExit(); + + // threading: do not clean sema, maybe other thread is using it. + cancelSemphoreRelease(); + cancelIpcMemoryDetach(); + + ExitPostmaster(0); + } + } + } else { + g_instance.pid_cxt.StartupPID = initialize_util_thread(STARTUP); + Assert(g_instance.pid_cxt.StartupPID != 0); + pmState = PM_STARTUP; + } #ifdef ENABLE_MULTIPLE_NODES @@ -2346,18 +2455,6 @@ int PostmasterMain(int argc, char* argv[]) load_searchserver_library(); #endif - /* - * Save backend variables for DCF call back thread, - * the saved backend variables will be restored in - * DCF call back thread share memory init function. - */ - if (g_instance.attr.attr_storage.dcf_attr.enable_dcf) { - int ss_rc = memset_s(&port, sizeof(port), 0, sizeof(port)); - securec_check(ss_rc, "\0", "\0"); - port.sock = PGINVALID_SOCKET; - BackendVariablesGlobal = static_cast(palloc(sizeof(BackendParameters))); - save_backend_variables(BackendVariablesGlobal, &port); - } /* If start with plpython fenced mode, we just startup as fenced mode */ if (PythonFencedMasterModel) { fencedMasterPID = StartUDFMaster(); @@ -2529,8 +2626,13 @@ static void checkDataDir(void) /* Look for PG_VERSION before looking for pg_control */ ValidatePgVersion(t_thrd.proc_cxt.DataDir); - - int ret = snprintf_s(path, sizeof(path), MAXPGPATH - 1, "%s/global/pg_control", t_thrd.proc_cxt.DataDir); + + int ret = 0; + if (ENABLE_DSS) { + ret = snprintf_s(path, sizeof(path), MAXPGPATH - 1, "%s", XLOG_CONTROL_FILE); + } else { + ret = snprintf_s(path, sizeof(path), MAXPGPATH - 1, "%s/%s", t_thrd.proc_cxt.DataDir, XLOG_CONTROL_FILE); + } securec_check_intval(ret, , ); fp = AllocateFile(path, PG_BINARY_R); @@ -2611,7 +2713,7 @@ static void CheckShareStorageConfigConflicts(void) if ((uint64)g_instance.attr.attr_storage.xlog_file_size % XLogSegSize != 0) { ereport(ERROR, (errcode(ERRCODE_SYSTEM_ERROR), - errmsg("value of \"xlog_file_size\" %ld must be an integer multiple of %u", + errmsg("value of \"xlog_file_size\" %ld must be an integer multiple of %lu", g_instance.attr.attr_storage.xlog_file_size, XLogSegSize))); } @@ -2619,6 +2721,16 @@ static void CheckShareStorageConfigConflicts(void) ereport(LOG, (errmsg("use scsi to preempt shared storage"))); } } + + if (g_instance.attr.attr_storage.dss_attr.ss_enable_dss) { + char *temp_tablespaces = u_sess->attr.attr_storage.temp_tablespaces; + if (temp_tablespaces != NULL && strlen(temp_tablespaces) > 0) { + ereport(ERROR, + (errcode(ERRCODE_SYSTEM_ERROR), + errmsg("shared storage mode could not support specifics tablespace(s)."), + errhint("Either set temp_tablespaces to NULL, or turn off ss_enable_dss."))); + } + } } /* @@ -2816,7 +2928,7 @@ static int ServerLoop(void) /* Database Security: Support database audit */ char details[PGAUDIT_MAXLENGTH] = {0}; bool threadPoolActivated = g_instance.attr.attr_common.enable_thread_pool; - + bool startup_reform_finish = false; /* make sure gaussdb can receive request */ DISABLE_MEMORY_PROTECT(); @@ -2886,6 +2998,20 @@ static int ServerLoop(void) gs_signal_setmask(&t_thrd.libpq_cxt.UnBlockSig, NULL); (void)gs_signal_unblock_sigusr2(); + if (ENABLE_DMS && ENABLE_REFORM && g_instance.dms_cxt.SSRecoveryInfo.startup_reform + && !startup_reform_finish) { + ereport(LOG, (errmsg("[SS reform] Node:%d first-round reform start wait.", SS_MY_INST_ID))); + if (!DMSWaitReform()) { + ereport(WARNING, (errmsg("[SS reform] Node:%d first-round reform failed, shutdown now", + SS_MY_INST_ID))); + (void)gs_signal_send(PostmasterPid, SIGTERM); + startup_reform_finish = true; + } else { + ereport(LOG, (errmsg("[SS reform] Node:%d first-round reform success.", SS_MY_INST_ID))); + startup_reform_finish = true; + } + } + this_start_poll_time = mc_timers_us(); if ((this_start_poll_time - last_start_loop_time) != 0) { gs_set_libcomm_used_rate( @@ -3174,7 +3300,8 @@ static int ServerLoop(void) if (!u_sess->proc_cxt.IsBinaryUpgrade && g_instance.pid_cxt.AutoVacPID == 0 && (AutoVacuumingActive() || t_thrd.postmaster_cxt.start_autovac_launcher) && pmState == PM_RUN && !dummyStandbyMode && u_sess->attr.attr_common.upgrade_mode != 1 && - !g_instance.streaming_dr_cxt.isInSwitchover) { + !g_instance.streaming_dr_cxt.isInSwitchover && + !SS_STANDBY_MODE && !SS_PERFORMING_SWITCHOVER && !SS_STANDBY_FAILOVER) { g_instance.pid_cxt.AutoVacPID = initialize_util_thread(AUTOVACUUM_LAUNCHER); if (g_instance.pid_cxt.AutoVacPID != 0) @@ -3262,7 +3389,7 @@ static int ServerLoop(void) /* If we have lost the snapshot capturer, try to start a new one */ if ((g_instance.role == VSINGLENODE) && pmState == PM_RUN && - g_instance.pid_cxt.TxnSnapCapturerPID == 0 && !dummyStandbyMode) + g_instance.pid_cxt.TxnSnapCapturerPID == 0 && !dummyStandbyMode && !ENABLE_DMS) g_instance.pid_cxt.TxnSnapCapturerPID = StartTxnSnapCapturer(); /* If we have lost the cfs shrinker, try to start a new one */ @@ -3270,26 +3397,31 @@ static int ServerLoop(void) g_instance.pid_cxt.CfsShrinkerPID = StartCfsShrinkerCapturer(); /* If we have lost the rbcleaner, try to start a new one */ - if (ENABLE_TCAP_RECYCLEBIN && (g_instance.role == VSINGLENODE) && pmState == PM_RUN && g_instance.pid_cxt.RbCleanrPID == 0 && !dummyStandbyMode) + if (ENABLE_TCAP_RECYCLEBIN && (g_instance.role == VSINGLENODE) && pmState == PM_RUN && + g_instance.pid_cxt.RbCleanrPID == 0 && !dummyStandbyMode && !ENABLE_DMS) g_instance.pid_cxt.RbCleanrPID = StartRbCleaner(); /* If we have lost the stats collector, try to start a new one */ if ((IS_PGXC_COORDINATOR || (g_instance.role == VSINGLENODE)) && g_instance.pid_cxt.SnapshotPID == 0 && - u_sess->attr.attr_common.enable_wdr_snapshot && pmState == PM_RUN) + u_sess->attr.attr_common.enable_wdr_snapshot && pmState == PM_RUN && !SS_STANDBY_MODE && + !SS_PERFORMING_SWITCHOVER && !SS_STANDBY_FAILOVER) g_instance.pid_cxt.SnapshotPID = snapshot_start(); - if (ENABLE_ASP && g_instance.pid_cxt.AshPID == 0 && pmState == PM_RUN && !dummyStandbyMode) + if (ENABLE_ASP && g_instance.pid_cxt.AshPID == 0 && pmState == PM_RUN && !dummyStandbyMode + && !SS_STANDBY_MODE && !SS_PERFORMING_SWITCHOVER && !SS_STANDBY_FAILOVER) g_instance.pid_cxt.AshPID = initialize_util_thread(ASH_WORKER); /* If we have lost the full sql flush thread, try to start a new one */ - if (ENABLE_STATEMENT_TRACK && g_instance.pid_cxt.StatementPID == 0 && (pmState == PM_RUN || pmState == PM_HOT_STANDBY)) + if (ENABLE_STATEMENT_TRACK && g_instance.pid_cxt.StatementPID == 0 && (pmState == PM_RUN || pmState == PM_HOT_STANDBY) + && !SS_STANDBY_MODE && !SS_PERFORMING_SWITCHOVER && !SS_STANDBY_FAILOVER) g_instance.pid_cxt.StatementPID = initialize_util_thread(TRACK_STMT_WORKER); if ((IS_PGXC_COORDINATOR || IS_SINGLE_NODE) && u_sess->attr.attr_common.enable_instr_rt_percentile && g_instance.pid_cxt.PercentilePID == 0 && pmState == PM_RUN) g_instance.pid_cxt.PercentilePID = initialize_util_thread(PERCENTILE_WORKER); - if (g_instance.stat_cxt.stack_perf_start) { + if ((ENABLE_DMS && pmState == PM_RUN && g_instance.stat_cxt.stack_perf_start) + || (!ENABLE_DMS && g_instance.stat_cxt.stack_perf_start)) { g_instance.pid_cxt.StackPerfPID = initialize_util_thread(STACK_PERF_WORKER); g_instance.stat_cxt.stack_perf_start = false; } @@ -3320,8 +3452,10 @@ static int ServerLoop(void) t_thrd.postmaster_cxt.HaShmData->current_mode == PRIMARY_MODE) && #endif u_sess->attr.attr_common.upgrade_mode != 1 && - g_instance.pid_cxt.TwoPhaseCleanerPID == 0 && pmState == PM_RUN) + g_instance.pid_cxt.TwoPhaseCleanerPID == 0 && pmState == PM_RUN + && !SS_STANDBY_MODE && !SS_PERFORMING_SWITCHOVER && !SS_STANDBY_FAILOVER) { g_instance.pid_cxt.TwoPhaseCleanerPID = initialize_util_thread(TWOPASECLEANER); + } /* If we have lost the LWLock monitor, try to start a new one */ if (g_instance.pid_cxt.FaultMonitorPID == 0 && pmState == PM_RUN) @@ -4131,6 +4265,11 @@ int ProcessStartupPacket(Port* port, bool SSLdone) ereport(elevel, (errcode(ERRCODE_CANNOT_CONNECT_NOW), errmsg("can not accept connection if hot standby off"))); } + + if (SS_IN_REFORM) { + ereport(ERROR, (errcode(ERRCODE_CANNOT_CONNECT_NOW), + errmsg("cannot accept connection during SS cluster reform"))); + } #endif } } @@ -5241,7 +5380,6 @@ static void pmdie(SIGNAL_ARGS) if (g_instance.pid_cxt.WalWriterPID != 0) signal_child(g_instance.pid_cxt.WalWriterPID, SIGTERM); - pmState = PM_WAIT_BACKENDS; if (ENABLE_THREAD_POOL) { g_threadPoolControler->EnableAdjustPool(); @@ -5328,6 +5466,15 @@ static void PrepareDemoteResponse(void) if (NoDemote == g_instance.demotion) return; + if (ENABLE_DMS) { + ereport(LOG, + (errmsg("[SS switchover] primary demoting: shutdown ckpt done, demote success. restart now"))); + Assert(g_instance.dms_cxt.SSClusterState == NODESTATE_PRIMARY_DEMOTING); + g_instance.dms_cxt.SSClusterState = NODESTATE_PROMOTE_APPROVE; + + allow_immediate_pgstat_restart(); + return; + } SetWalsndsNodeState(NODESTATE_PROMOTE_APPROVE, NODESTATE_STANDBY_REDIRECT); /* @@ -5475,6 +5622,11 @@ static void ProcessDemoteRequest(void) g_instance.demotion = FastDemote; ereport(LOG, (errmsg("received fast demote request"))); + /* Under SS, demotion terminates only backends and ckpt threads. */ + if (ENABLE_DMS) { + goto dms_demote; + } + if (g_instance.pid_cxt.StartupPID != 0) signal_child(g_instance.pid_cxt.StartupPID, SIGTERM); @@ -5596,6 +5748,7 @@ static void ProcessDemoteRequest(void) #endif /* ENABLE_MULTIPLE_NODES */ +dms_demote: if (pmState == PM_RECOVERY) { /* * Only startup, bgwriter, and checkpointer should be active @@ -5612,40 +5765,108 @@ static void ProcessDemoteRequest(void) g_threadPoolControler->ShutDownScheduler(true, true); g_threadPoolControler->ShutDownThreads(); } - /* shut down all backends and autovac workers */ - (void)SignalSomeChildren(SIGTERM, BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC); - /* and the autovac launcher too */ - if (g_instance.pid_cxt.AutoVacPID != 0) - signal_child(g_instance.pid_cxt.AutoVacPID, SIGTERM); + /* cancel TXNs on all nodes in SS and kill only backends and autovac threads */ + if (ENABLE_DMS) { + if (g_instance.pid_cxt.AshPID != 0) { + Assert(!dummyStandbyMode); + signal_child(g_instance.pid_cxt.AshPID, SIGTERM); + } - if (g_instance.pid_cxt.UndoLauncherPID != 0) - signal_child(g_instance.pid_cxt.UndoLauncherPID, SIGTERM); -#ifndef ENABLE_MULTIPLE_NODES - if (g_instance.pid_cxt.ApplyLauncerPID != 0) - signal_child(g_instance.pid_cxt.ApplyLauncerPID, SIGTERM); + if (g_instance.pid_cxt.TwoPhaseCleanerPID != 0) + signal_child(g_instance.pid_cxt.TwoPhaseCleanerPID, SIGTERM); + + if (g_instance.pid_cxt.StatementPID!= 0) { + Assert(!dummyStandbyMode); + signal_child(g_instance.pid_cxt.StatementPID, SIGTERM); + } + + /* and the walwriter too, to avoid checkpoint hang after ss switchover */ + if (g_instance.pid_cxt.WalWriterPID != 0) + signal_child(g_instance.pid_cxt.WalWriterPID, SIGTERM); + StopAliveBuildSender(); + + if (g_instance.pid_cxt.WalWriterAuxiliaryPID != 0) + signal_child(g_instance.pid_cxt.WalWriterAuxiliaryPID, SIGTERM); + + /* shut down all backends and autovac workers */ + (void)SignalSomeChildren(SIGTERM, BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC); + + /* and the autovac launcher too */ + if (g_instance.pid_cxt.AutoVacPID != 0) { + signal_child(g_instance.pid_cxt.AutoVacPID, SIGTERM); + } + + if (g_instance.pid_cxt.UndoRecyclerPID != 0) { + signal_child(g_instance.pid_cxt.UndoRecyclerPID, SIGTERM); + } + + if (g_instance.pid_cxt.WLMCollectPID != 0) { + WLMProcessThreadShutDown(); + signal_child(g_instance.pid_cxt.WLMCollectPID, SIGTERM); + } + + if (g_instance.pid_cxt.WLMMonitorPID != 0) + signal_child(g_instance.pid_cxt.WLMMonitorPID, SIGTERM); + + if (g_instance.pid_cxt.WLMArbiterPID != 0) + signal_child(g_instance.pid_cxt.WLMArbiterPID, SIGTERM); + + /* kill it once again since WLMonitor would start it */ + if (g_instance.pid_cxt.WLMCollectPID != 0) { + Assert(!dummyStandbyMode); + signal_child(g_instance.pid_cxt.WLMCollectPID, SIGTERM); + } + + if (g_instance.pid_cxt.PercentilePID != 0) { + Assert(!dummyStandbyMode); + signal_child(g_instance.pid_cxt.PercentilePID, SIGTERM); + } + + if (g_instance.pid_cxt.SnapshotPID != 0) { + Assert(!dummyStandbyMode); + signal_child(g_instance.pid_cxt.SnapshotPID, SIGTERM); + } + + ereport(LOG, (errmsg("[SS switchover] primary demoting: " + "killed threads, waiting for backends die"))); + pmState = PM_WAIT_BACKENDS; + } else { + /* shut down all backends and autovac workers */ + (void)SignalSomeChildren(SIGTERM, BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC); + + /* and the autovac launcher too */ + if (g_instance.pid_cxt.AutoVacPID != 0) + signal_child(g_instance.pid_cxt.AutoVacPID, SIGTERM); + + if (g_instance.pid_cxt.UndoLauncherPID != 0) + signal_child(g_instance.pid_cxt.UndoLauncherPID, SIGTERM); +#if !defined(ENABLE_MULTIPLE_NODES) + if (g_instance.pid_cxt.ApplyLauncerPID != 0) + signal_child(g_instance.pid_cxt.ApplyLauncerPID, SIGTERM); #endif - if (g_instance.pid_cxt.GlobalStatsPID != 0) - signal_child(g_instance.pid_cxt.GlobalStatsPID, SIGTERM); + if (g_instance.pid_cxt.GlobalStatsPID != 0) + signal_child(g_instance.pid_cxt.GlobalStatsPID, SIGTERM); - if (g_instance.pid_cxt.UndoRecyclerPID != 0) - signal_child(g_instance.pid_cxt.UndoRecyclerPID, SIGTERM); + if (g_instance.pid_cxt.UndoRecyclerPID != 0) + signal_child(g_instance.pid_cxt.UndoRecyclerPID, SIGTERM); - if (g_instance.pid_cxt.PgJobSchdPID != 0) - signal_child(g_instance.pid_cxt.PgJobSchdPID, SIGTERM); + if (g_instance.pid_cxt.PgJobSchdPID != 0) + signal_child(g_instance.pid_cxt.PgJobSchdPID, SIGTERM); - if ((IS_PGXC_COORDINATOR) && g_instance.pid_cxt.CommPoolerCleanPID != 0) - signal_child(g_instance.pid_cxt.CommPoolerCleanPID, SIGTERM); + if ((IS_PGXC_COORDINATOR) && g_instance.pid_cxt.CommPoolerCleanPID != 0) + signal_child(g_instance.pid_cxt.CommPoolerCleanPID, SIGTERM); - /* and the walwriter too */ - if (g_instance.pid_cxt.WalWriterPID != 0) - signal_child(g_instance.pid_cxt.WalWriterPID, SIGTERM); - StopAliveBuildSender(); + /* and the walwriter too */ + if (g_instance.pid_cxt.WalWriterPID != 0) + signal_child(g_instance.pid_cxt.WalWriterPID, SIGTERM); + StopAliveBuildSender(); - if (g_instance.pid_cxt.WalWriterAuxiliaryPID != 0) - signal_child(g_instance.pid_cxt.WalWriterAuxiliaryPID, SIGTERM); + if (g_instance.pid_cxt.WalWriterAuxiliaryPID != 0) + signal_child(g_instance.pid_cxt.WalWriterAuxiliaryPID, SIGTERM); - pmState = PM_WAIT_BACKENDS; + pmState = PM_WAIT_BACKENDS; + } } break; @@ -5900,7 +6121,8 @@ static void reaper(SIGNAL_ARGS) */ if (!u_sess->proc_cxt.IsBinaryUpgrade && AutoVacuumingActive() && g_instance.pid_cxt.AutoVacPID == 0 && !dummyStandbyMode && u_sess->attr.attr_common.upgrade_mode != 1 && - !g_instance.streaming_dr_cxt.isInSwitchover) + !g_instance.streaming_dr_cxt.isInSwitchover && + !SS_STANDBY_MODE && !SS_PERFORMING_SWITCHOVER && !SS_STANDBY_FAILOVER) g_instance.pid_cxt.AutoVacPID = initialize_util_thread(AUTOVACUUM_LAUNCHER); /* Before GRAND VERSION NUM 81000, we do not support scheduled job. */ @@ -5945,27 +6167,31 @@ static void reaper(SIGNAL_ARGS) g_instance.pid_cxt.PgStatPID = pgstat_start(); if ((g_instance.role == VSINGLENODE) && pmState == PM_RUN && - g_instance.pid_cxt.TxnSnapCapturerPID == 0 && !dummyStandbyMode) + g_instance.pid_cxt.TxnSnapCapturerPID == 0 && !dummyStandbyMode && !ENABLE_DMS) g_instance.pid_cxt.TxnSnapCapturerPID = StartTxnSnapCapturer(); /* If we have lost the cfs shrinker, try to start a new one */ if (g_instance.pid_cxt.CfsShrinkerPID == 0 && pmState <= PM_RUN) g_instance.pid_cxt.CfsShrinkerPID = StartCfsShrinkerCapturer(); - if (ENABLE_TCAP_RECYCLEBIN && (g_instance.role == VSINGLENODE) && pmState == PM_RUN && g_instance.pid_cxt.RbCleanrPID== 0 && !dummyStandbyMode) + if (ENABLE_TCAP_RECYCLEBIN && (g_instance.role == VSINGLENODE) && pmState == PM_RUN && + g_instance.pid_cxt.RbCleanrPID == 0 && !dummyStandbyMode && !ENABLE_DMS) g_instance.pid_cxt.RbCleanrPID = StartRbCleaner(); - if ((IS_PGXC_COORDINATOR || (g_instance.role == VSINGLENODE)) && u_sess->attr.attr_common.enable_wdr_snapshot && - g_instance.pid_cxt.SnapshotPID == 0 && !dummyStandbyMode) + if ((IS_PGXC_COORDINATOR || (g_instance.role == VSINGLENODE)) && + u_sess->attr.attr_common.enable_wdr_snapshot && g_instance.pid_cxt.SnapshotPID == 0 && + !dummyStandbyMode && !SS_STANDBY_MODE && !SS_PERFORMING_SWITCHOVER && !SS_STANDBY_FAILOVER) g_instance.pid_cxt.SnapshotPID = snapshot_start(); if ((IS_PGXC_COORDINATOR || IS_SINGLE_NODE) && u_sess->attr.attr_common.enable_instr_rt_percentile && g_instance.pid_cxt.PercentilePID == 0 && !dummyStandbyMode) g_instance.pid_cxt.PercentilePID = initialize_util_thread(PERCENTILE_WORKER); - if (ENABLE_ASP && g_instance.pid_cxt.AshPID == 0 && !dummyStandbyMode) + if (ENABLE_ASP && g_instance.pid_cxt.AshPID == 0 && !dummyStandbyMode + && !SS_STANDBY_MODE && !SS_PERFORMING_SWITCHOVER && !SS_STANDBY_FAILOVER) g_instance.pid_cxt.AshPID = initialize_util_thread(ASH_WORKER); - if (ENABLE_STATEMENT_TRACK && g_instance.pid_cxt.StatementPID == 0) + if (ENABLE_STATEMENT_TRACK && g_instance.pid_cxt.StatementPID == 0 + && !SS_STANDBY_MODE && !SS_PERFORMING_SWITCHOVER && !SS_STANDBY_FAILOVER) g_instance.pid_cxt.StatementPID = initialize_util_thread(TRACK_STMT_WORKER); /* Database Security: Support database audit */ @@ -5991,8 +6217,10 @@ static void reaper(SIGNAL_ARGS) t_thrd.postmaster_cxt.HaShmData->current_mode == PRIMARY_MODE) && #endif u_sess->attr.attr_common.upgrade_mode != 1 && - g_instance.pid_cxt.TwoPhaseCleanerPID == 0) + g_instance.pid_cxt.TwoPhaseCleanerPID == 0 && !SS_STANDBY_MODE && !SS_PERFORMING_SWITCHOVER + && !SS_STANDBY_FAILOVER) { g_instance.pid_cxt.TwoPhaseCleanerPID = initialize_util_thread(TWOPASECLEANER); + } if (g_instance.pid_cxt.FaultMonitorPID == 0) g_instance.pid_cxt.FaultMonitorPID = initialize_util_thread(FAULTMONITOR); @@ -6196,8 +6424,10 @@ static void reaper(SIGNAL_ARGS) if (g_instance.pid_cxt.PgStatPID != 0) signal_child(g_instance.pid_cxt.PgStatPID, SIGQUIT); - if (g_instance.pid_cxt.TxnSnapCapturerPID != 0) - signal_child(g_instance.pid_cxt.TxnSnapCapturerPID, SIGQUIT); + if (!SS_PERFORMING_SWITCHOVER) { + if (g_instance.pid_cxt.TxnSnapCapturerPID != 0) + signal_child(g_instance.pid_cxt.TxnSnapCapturerPID, SIGQUIT); + } if (g_instance.pid_cxt.CfsShrinkerPID != 0) signal_child(g_instance.pid_cxt.CfsShrinkerPID, SIGQUIT); @@ -6465,7 +6695,7 @@ static void reaper(SIGNAL_ARGS) if (!EXIT_STATUS_0(exitstatus)) LogChildExit(LOG, _("snapshot collector process"), pid, exitstatus); - if (pmState == PM_RUN) + if (pmState == PM_RUN && !SS_STANDBY_MODE && !SS_PERFORMING_SWITCHOVER && !SS_STANDBY_FAILOVER) g_instance.pid_cxt.SnapshotPID = snapshot_start(); continue; } @@ -6477,7 +6707,8 @@ static void reaper(SIGNAL_ARGS) if (!EXIT_STATUS_0(exitstatus)) LogChildExit(LOG, _("Active session history collector process"), pid, exitstatus); - if (pmState == PM_RUN && ENABLE_ASP) + if (pmState == PM_RUN && ENABLE_ASP && !SS_STANDBY_MODE && !SS_PERFORMING_SWITCHOVER + && !SS_STANDBY_FAILOVER) g_instance.pid_cxt.AshPID = initialize_util_thread(ASH_WORKER); continue; } @@ -6489,7 +6720,8 @@ static void reaper(SIGNAL_ARGS) if (!EXIT_STATUS_0(exitstatus)) LogChildExit(LOG, _("full SQL statement flush process"), pid, exitstatus); - if (pmState == PM_RUN && ENABLE_STATEMENT_TRACK) + if (pmState == PM_RUN && ENABLE_STATEMENT_TRACK && !SS_STANDBY_MODE && !SS_PERFORMING_SWITCHOVER + && !SS_STANDBY_FAILOVER) g_instance.pid_cxt.StatementPID = initialize_util_thread(TRACK_STMT_WORKER); continue; } @@ -7279,7 +7511,10 @@ static void PostmasterStateMachine(void) * later after writing the checkpoint record, like the archiver * process. */ - if (CountChildren(BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC) == 0 && g_instance.pid_cxt.StartupPID == 0 && + if ((SS_PERFORMING_SWITCHOVER && CountChildren(BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC) == 0 && + g_instance.pid_cxt.AshPID == 0 && g_instance.pid_cxt.TwoPhaseCleanerPID == 0 && + g_instance.pid_cxt.StatementPID == 0 && g_instance.pid_cxt.PercentilePID == 0) || + (CountChildren(BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC) == 0 && g_instance.pid_cxt.StartupPID == 0 && g_instance.pid_cxt.TwoPhaseCleanerPID == 0 && g_instance.pid_cxt.FaultMonitorPID == 0 && g_instance.pid_cxt.WalReceiverPID == 0 && g_instance.pid_cxt.WalRcvWriterPID == 0 && g_instance.pid_cxt.DataReceiverPID == 0 && g_instance.pid_cxt.DataRcvWriterPID == 0 && @@ -7309,7 +7544,7 @@ static void PostmasterStateMachine(void) #ifndef ENABLE_MULTIPLE_NODES g_instance.pid_cxt.ApplyLauncerPID == 0 && #endif - IsAllPageWorkerExit() && IsAllBuildSenderExit()) { + IsAllPageWorkerExit() && IsAllBuildSenderExit())) { if (g_instance.fatal_error) { /* * Start waiting for dead_end children to die. This state @@ -7434,11 +7669,13 @@ static void PostmasterStateMachine(void) */ if (DLGetHead(g_instance.backend_list) == NULL && g_instance.pid_cxt.PgArchPID == 0 && g_instance.pid_cxt.PgStatPID == 0 && AuditAllShutDown() && - ckpt_all_flush_buffer_thread_exit() && ObsArchAllShutDown()) { + ckpt_all_flush_buffer_thread_exit() && ObsArchAllShutDown() && !SS_PERFORMING_SWITCHOVER) { AsssertAllChildThreadExit(); /* syslogger is not considered here */ pmState = PM_NO_CHILDREN; + } else if (SS_PERFORMING_SWITCHOVER) { + pmState = PM_NO_CHILDREN; } } @@ -7536,8 +7773,11 @@ static void PostmasterStateMachine(void) if (g_threadPoolControler && g_threadPoolControler->GetScheduler()->HasShutDown() == false) g_threadPoolControler->ShutDownScheduler(true, false); } - shmem_exit(1); - reset_shared(g_instance.attr.attr_network.PostPortNumber); + + if (!ENABLE_DMS) { + shmem_exit(1); + reset_shared(g_instance.attr.attr_network.PostPortNumber); + } /* after reseting shared memory, we shall reset col-space cache. * all the data of this cache will be out of date after switchover. @@ -7548,7 +7788,7 @@ static void PostmasterStateMachine(void) /* if failed to enter archive-recovery state, then reboot as primary. */ { volatile HaShmemData* hashmdata = t_thrd.postmaster_cxt.HaShmData; - hashmdata->current_mode = STANDBY_MODE; + hashmdata->current_mode = ENABLE_DMS ? NORMAL_MODE : STANDBY_MODE; NotifyGscHotStandby(); UpdateOptsFile(); ereport(LOG, (errmsg("archive recovery started"))); @@ -7686,6 +7926,11 @@ bool SignalCancelAllBackEnd() return SignalSomeChildren(SIGINT, BACKEND_TYPE_NORMAL); } +void SignalTermAllBackEnd() +{ + (void)SignalSomeChildren(SIGTERM, BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC); +} + static int IsHaWhiteListIp(const Port* port) { const int maxIpAddressLen = 64; @@ -8256,6 +8501,7 @@ void ExitPostmaster(int status) * * MUST -- vadim 05-10-1999 */ + DMSUninit(); CloseGaussPidDir(); @@ -8309,25 +8555,53 @@ static void handle_recovery_started() * Crank up the background tasks. It doesn't matter if this fails, * we'll just try again later. */ - Assert(g_instance.pid_cxt.CheckpointerPID == 0); - g_instance.pid_cxt.CheckpointerPID = initialize_util_thread(CHECKPOINT_THREAD); - Assert(g_instance.pid_cxt.BgWriterPID == 0); - if (!ENABLE_INCRE_CKPT) { - g_instance.pid_cxt.BgWriterPID = initialize_util_thread(BGWRITER); - } - Assert(g_instance.pid_cxt.SpBgWriterPID == 0); - g_instance.pid_cxt.SpBgWriterPID = initialize_util_thread(SPBGWRITER); + if (ENABLE_DMS) { + if (g_instance.pid_cxt.CheckpointerPID == 0) { + g_instance.pid_cxt.CheckpointerPID = initialize_util_thread(CHECKPOINT_THREAD); + } - if (ENABLE_INCRE_CKPT) { - for (int i = 0; i < g_instance.ckpt_cxt_ctl->pgwr_procs.num; i++) { - Assert(g_instance.pid_cxt.PageWriterPID[i] == 0); - g_instance.pid_cxt.PageWriterPID[i] = initialize_util_thread(PAGEWRITER_THREAD); + if (!ENABLE_INCRE_CKPT && g_instance.pid_cxt.BgWriterPID == 0) { + g_instance.pid_cxt.BgWriterPID = initialize_util_thread(BGWRITER); + } + + if (g_instance.pid_cxt.SpBgWriterPID == 0) { + g_instance.pid_cxt.SpBgWriterPID = initialize_util_thread(SPBGWRITER); + } + + if (u_sess->attr.attr_storage.enable_cbm_tracking && g_instance.pid_cxt.CBMWriterPID == 0) { + g_instance.pid_cxt.CBMWriterPID = initialize_util_thread(CBMWRITER); + } + + if (ENABLE_INCRE_CKPT) { + for (int i = 0; i < g_instance.ckpt_cxt_ctl->pgwr_procs.num; i++) { + if (g_instance.pid_cxt.PageWriterPID[i] == 0) { + g_instance.pid_cxt.PageWriterPID[i] = initialize_util_thread(PAGEWRITER_THREAD); + } + } + } + } else { + Assert(g_instance.pid_cxt.CheckpointerPID == 0); + g_instance.pid_cxt.CheckpointerPID = initialize_util_thread(CHECKPOINT_THREAD); + Assert(g_instance.pid_cxt.BgWriterPID == 0); + if (!ENABLE_INCRE_CKPT) { + g_instance.pid_cxt.BgWriterPID = initialize_util_thread(BGWRITER); + } + Assert(g_instance.pid_cxt.SpBgWriterPID == 0); + g_instance.pid_cxt.SpBgWriterPID = initialize_util_thread(SPBGWRITER); + + Assert(g_instance.pid_cxt.CBMWriterPID == 0); + if (u_sess->attr.attr_storage.enable_cbm_tracking) { + g_instance.pid_cxt.CBMWriterPID = initialize_util_thread(CBMWRITER); + } + + if (ENABLE_INCRE_CKPT) { + for (int i = 0; i < g_instance.ckpt_cxt_ctl->pgwr_procs.num; i++) { + Assert(g_instance.pid_cxt.PageWriterPID[i] == 0); + g_instance.pid_cxt.PageWriterPID[i] = initialize_util_thread(PAGEWRITER_THREAD); + } } } - Assert(g_instance.pid_cxt.CBMWriterPID == 0); - if (u_sess->attr.attr_storage.enable_cbm_tracking) { - g_instance.pid_cxt.CBMWriterPID = initialize_util_thread(CBMWRITER); - } + pmState = PM_RECOVERY; } } @@ -8898,7 +9172,7 @@ static void sigusr1_handler(SIGNAL_ARGS) /* should not start a worker in shutdown or demotion procedure */ if (CheckPostmasterSignal(PMSIGNAL_START_TXNSNAPWORKER) && g_instance.status == NoShutdown && - g_instance.demotion == NoDemote) { + g_instance.demotion == NoDemote && !ENABLE_DMS) { /* The rbcleaner wants us to start a worker process. */ StartTxnSnapWorker(); } @@ -8936,7 +9210,7 @@ static void sigusr1_handler(SIGNAL_ARGS) if (CheckPostmasterSignal(PMSIGNAL_START_WALRECEIVER) && g_instance.pid_cxt.WalReceiverPID == 0 && (pmState == PM_STARTUP || pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY || pmState == PM_WAIT_READONLY) && - g_instance.status == NoShutdown) { + g_instance.status == NoShutdown && !ENABLE_DMS) { if (g_instance.pid_cxt.WalRcvWriterPID == 0) { g_instance.pid_cxt.WalRcvWriterPID = initialize_util_thread(WALRECWRITE); SetWalRcvWriterPID(g_instance.pid_cxt.WalRcvWriterPID); @@ -8969,6 +9243,12 @@ static void sigusr1_handler(SIGNAL_ARGS) /* Advance postmaster's state machine */ PostmasterStateMachine(); } + + if (SS_STANDBY_MODE && !SS_PERFORMING_SWITCHOVER && pmState == PM_RUN && + (mode = CheckSwitchoverSignal())) { + SSDoSwitchover(); + } + if ((mode = CheckSwitchoverSignal()) != 0 && WalRcvIsOnline() && DataRcvIsOnline() && (pmState == PM_STARTUP || pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY || pmState == PM_WAIT_READONLY)) { if (!IS_SHARED_STORAGE_STANDBY_CLUSTER_STANDBY_MODE) { @@ -8993,6 +9273,178 @@ static void sigusr1_handler(SIGNAL_ARGS) if (CheckFinishRedoSignal() && g_instance.comm_cxt.localinfo_cxt.is_finish_redo != 1) { pg_atomic_write_u32(&(g_instance.comm_cxt.localinfo_cxt.is_finish_redo), 1); } + + if (ENABLE_DMS && CheckPostmasterSignal(PMSIGNAL_DMS_SWITCHOVER_PROMOTE)) { + if (ENABLE_THREAD_POOL) { + g_threadPoolControler->CloseAllSessions(); + /* + * before pmState set to wait backends, + * threadpool cannot launch new thread by scheduler during demote. + */ + g_threadPoolControler->ShutDownScheduler(true, true); + g_threadPoolControler->ShutDownThreads(true); + } + /* shut down all backends and autovac workers */ + (void)SignalSomeChildren(SIGTERM, BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC); + + /* and the autovac launcher too */ + if (g_instance.pid_cxt.AutoVacPID != 0) + signal_child(g_instance.pid_cxt.AutoVacPID, SIGTERM); + + if (g_instance.pid_cxt.PgJobSchdPID != 0) + signal_child(g_instance.pid_cxt.PgJobSchdPID, SIGTERM); + + /* and the walwriter too */ + if (g_instance.pid_cxt.WalWriterPID != 0) + signal_child(g_instance.pid_cxt.WalWriterPID, SIGTERM); + + if (g_instance.pid_cxt.WalWriterAuxiliaryPID != 0) + signal_child(g_instance.pid_cxt.WalWriterAuxiliaryPID, SIGTERM); + + /* WLM threads need to release resources, such as long-holding table locks */ + if (g_instance.pid_cxt.WLMCollectPID != 0) { + WLMProcessThreadShutDown(); + signal_child(g_instance.pid_cxt.WLMCollectPID, SIGTERM); + } + + if (g_instance.pid_cxt.WLMMonitorPID != 0) + signal_child(g_instance.pid_cxt.WLMMonitorPID, SIGTERM); + + if (g_instance.pid_cxt.WLMArbiterPID != 0) + signal_child(g_instance.pid_cxt.WLMArbiterPID, SIGTERM); + + /* kill it once again since WLMonitor would start it */ + if (g_instance.pid_cxt.WLMCollectPID != 0) { + Assert(!dummyStandbyMode); + signal_child(g_instance.pid_cxt.WLMCollectPID, SIGTERM); + } + + pmState = PM_WAIT_BACKENDS; + if (ENABLE_THREAD_POOL) { + g_threadPoolControler->EnableAdjustPool(); + } + + SSHandleSwitchoverPromote(); + } + + if (ENABLE_DMS && CheckPostmasterSignal(PMSIGNAL_DMS_REFORM)) { + if (ENABLE_THREAD_POOL) { + g_threadPoolControler->CloseAllSessions(); + /* + * before pmState set to wait backends, + * threadpool cannot launch new thread by scheduler during demote. + */ + g_threadPoolControler->ShutDownScheduler(true, true); + g_threadPoolControler->ShutDownThreads(true); + } + /* shut down all backends and autovac workers */ + (void)SignalSomeChildren(SIGTERM, BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC); + + /* and the autovac launcher too */ + if (g_instance.pid_cxt.AutoVacPID != 0) + signal_child(g_instance.pid_cxt.AutoVacPID, SIGTERM); + + if (g_instance.pid_cxt.PgJobSchdPID != 0) + signal_child(g_instance.pid_cxt.PgJobSchdPID, SIGTERM); + + /* and the walwriter too */ + if (g_instance.pid_cxt.WalWriterPID != 0) + signal_child(g_instance.pid_cxt.WalWriterPID, SIGTERM); + + if (g_instance.pid_cxt.WalWriterAuxiliaryPID != 0) + signal_child(g_instance.pid_cxt.WalWriterAuxiliaryPID, SIGTERM); + + /* WLM threads need to release resources, such as long-holding table locks */ + if (g_instance.pid_cxt.WLMCollectPID != 0) { + WLMProcessThreadShutDown(); + signal_child(g_instance.pid_cxt.WLMCollectPID, SIGTERM); + } + + if (g_instance.pid_cxt.WLMMonitorPID != 0) + signal_child(g_instance.pid_cxt.WLMMonitorPID, SIGTERM); + + if (g_instance.pid_cxt.WLMArbiterPID != 0) + signal_child(g_instance.pid_cxt.WLMArbiterPID, SIGTERM); + + /* kill it once again since WLMonitor would start it */ + if (g_instance.pid_cxt.WLMCollectPID != 0) { + Assert(!dummyStandbyMode); + signal_child(g_instance.pid_cxt.WLMCollectPID, SIGTERM); + } + + if (g_instance.pid_cxt.AshPID!= 0) { + Assert(!dummyStandbyMode); + signal_child(g_instance.pid_cxt.AshPID, SIGTERM); + } + + if (g_instance.pid_cxt.TwoPhaseCleanerPID != 0) + signal_child(g_instance.pid_cxt.TwoPhaseCleanerPID, SIGTERM); + + if (g_instance.pid_cxt.StatementPID!= 0) { + Assert(!dummyStandbyMode); + signal_child(g_instance.pid_cxt.StatementPID, SIGTERM); + } + + if (g_instance.pid_cxt.UndoRecyclerPID != 0) { + signal_child(g_instance.pid_cxt.UndoRecyclerPID, SIGTERM); + } + + if (g_instance.pid_cxt.PercentilePID != 0) { + Assert(!dummyStandbyMode); + signal_child(g_instance.pid_cxt.PercentilePID, SIGTERM); + } + + if (g_instance.pid_cxt.SnapshotPID != 0) { + Assert(!dummyStandbyMode); + signal_child(g_instance.pid_cxt.SnapshotPID, SIGTERM); + } + + if (ENABLE_THREAD_POOL) { + g_threadPoolControler->EnableAdjustPool(); + } + + ereport(LOG, (errmodule(MOD_DMS), errmsg("[SS reform] terminate backends success"))); + g_instance.dms_cxt.SSRecoveryInfo.reform_ready = true; + } + + if (ENABLE_DMS && CheckPostmasterSignal(PMSIGNAL_DMS_TRIGGERFAILOVER)) { + if (ENABLE_THREAD_POOL) { + g_threadPoolControler->CloseAllSessions(); + /* + * before pmState set to wait backends, + * threadpool cannot launch new thread by scheduler during demote. + */ + g_threadPoolControler->ShutDownScheduler(true, true); + g_threadPoolControler->ShutDownThreads(); + } + /* shut down all backends and autovac workers */ + (void)SignalSomeChildren(SIGTERM, BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC); + + /* and the autovac launcher too */ + if (g_instance.pid_cxt.AutoVacPID != 0) + signal_child(g_instance.pid_cxt.AutoVacPID, SIGTERM); + + if (g_instance.pid_cxt.PgJobSchdPID != 0) + signal_child(g_instance.pid_cxt.PgJobSchdPID, SIGTERM); + + /* + * before init startup threads, need close WalWriter and WalWriterAuxiliary + * because during StartupXLOG remove_xlogtemp_files() occurs process file concurrently + */ + if (g_instance.pid_cxt.WalWriterPID != 0) + signal_child(g_instance.pid_cxt.WalWriterPID, SIGTERM); + + if (g_instance.pid_cxt.WalWriterAuxiliaryPID != 0) + signal_child(g_instance.pid_cxt.WalWriterAuxiliaryPID, SIGTERM); + + pmState = PM_WAIT_BACKENDS; + if (ENABLE_THREAD_POOL) { + g_threadPoolControler->EnableAdjustPool(); + } + + SShandle_promote_signal(); + } + if (CheckPromoteSignal()) { handle_promote_signal(); } @@ -11147,6 +11599,7 @@ DbState get_local_dbstate_sub(WalRcvData* walrcv, ServerMode mode) IsCascadeStandby())) || dummyStandbyMode || disater_recovery_has_no_build_reason) { has_build_reason = false; } + switch (mode) { case NORMAL_MODE: case PRIMARY_MODE: @@ -11180,6 +11633,19 @@ DbState get_local_dbstate(void) ServerMode mode = t_thrd.postmaster_cxt.HaShmData->current_mode; DbState db_state = UNKNOWN_STATE; + if (ENABLE_DMS) { + if (SS_PRIMARY_DEMOTING) { + db_state = DEMOTING_STATE; + } else if (SS_STANDBY_WAITING) { + db_state = WAITING_STATE; + } else if (SS_STANDBY_PROMOTING || SS_STANDBY_FAILOVER) { + db_state = PROMOTING_STATE; + } else { + db_state = NORMAL_STATE; + } + return db_state; + } + if (t_thrd.walsender_cxt.WalSndCtl && t_thrd.walsender_cxt.WalSndCtl->demotion > NoDemote) db_state = DEMOTING_STATE; else if (walrcv && NODESTATE_STANDBY_WAITING == walrcv->node_state) @@ -11223,6 +11689,12 @@ const char* wal_get_db_state_string(DbState db_state) static ServerMode get_cur_mode(void) { + if (ENABLE_DMS) { + if (RecoveryInProgress()) { + return RECOVERY_MODE; + } + return SS_STANDBY_MODE ? STANDBY_MODE : PRIMARY_MODE; + } return t_thrd.postmaster_cxt.HaShmData->current_mode; } @@ -12243,6 +12715,14 @@ int GaussDbThreadMain(knl_thread_arg* arg) ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); + if (g_instance.attr.attr_storage.dms_attr.enable_dms) { + t_thrd.dms_cxt.msgContext = AllocSetContextCreate(t_thrd.top_mem_cxt, + "DMSWorkerContext", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + } + /* * Reload any libraries that were preloaded by the postmaster. Since we * exec'd this process, those libraries didn't come along with us; but we @@ -13436,3 +13916,60 @@ void ShutdownForDRSwitchover(void) ereport(LOG, (errmsg("Close All Sessions and shutdown AutoVacuum for DR switchover."))); } +void InitShmemForDmsCallBack() +{ + Port port; + errno_t rc = 0; + rc = memset_s(&port, sizeof(port), 0, sizeof(port)); + securec_check(rc, "\0", "\0"); + port.sock = PGINVALID_SOCKET; + InitializeGUCOptions(); + restore_backend_variables(BackendVariablesGlobal, &port); + /* may no need read_nondefault_variables */ + BaseInit(); + InitProcessAndShareMemory(); +} + +const char *GetSSServerMode() +{ + if (SS_STANDBY_MODE) { + return "Standby"; + } + + if (SS_PRIMARY_MODE) { + return "Primary"; + } + + return "Unknown"; +} + +bool SSIsServerModeReadOnly() +{ + return SS_PERFORMING_SWITCHOVER || SS_STANDBY_MODE; +} + +void SSRestartFailoverPromote() +{ + /* shut down all backends and autovac workers */ + (void)SignalSomeChildren(SIGTERM, BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC); + + /* and the autovac launcher too */ + if (g_instance.pid_cxt.AutoVacPID != 0) + signal_child(g_instance.pid_cxt.AutoVacPID, SIGTERM); + + if (g_instance.pid_cxt.PgJobSchdPID != 0) + signal_child(g_instance.pid_cxt.PgJobSchdPID, SIGTERM); + + /* + * before init startup threads, need close WalWriter and WalWriterAuxiliary + * because during StartupXLOG remove_xlogtemp_files() occurs process file concurrently + */ + if (g_instance.pid_cxt.WalWriterPID != 0) + signal_child(g_instance.pid_cxt.WalWriterPID, SIGTERM); + + if (g_instance.pid_cxt.WalWriterAuxiliaryPID != 0) + signal_child(g_instance.pid_cxt.WalWriterAuxiliaryPID, SIGTERM); + + pmState = PM_WAIT_BACKENDS; + SShandle_promote_signal(); +} \ No newline at end of file diff --git a/src/gausskernel/process/tcop/postgres.cpp b/src/gausskernel/process/tcop/postgres.cpp index b69489706..89ceb3955 100755 --- a/src/gausskernel/process/tcop/postgres.cpp +++ b/src/gausskernel/process/tcop/postgres.cpp @@ -190,6 +190,9 @@ extern int optreset; /* might not be declared by system headers */ #include "storage/mot/jit_exec.h" #endif #include "commands/sqladvisor.h" +#include "storage/file/fio_device.h" +#include "storage/dss/dss_adaptor.h" +#include "storage/dss/dss_log.h" THR_LOCAL VerifyCopyCommandIsReparsed copy_need_to_be_reparse = NULL; @@ -6885,7 +6888,7 @@ void process_postgres_switches(int argc, char* argv[], GucContext ctx, const cha * the common help() function in main/main.c. */ initOptParseContext(&optCtxt); - while ((flag = getopt_r(argc, argv, "A:B:bc:C:D:d:EeFf:h:ijk:lN:nOo:Pp:r:S:sTt:v:W:g:Q:-:", &optCtxt)) != -1) { + while ((flag = getopt_r(argc, argv, "A:B:bc:C:D:d:EeFf:Gh:ijk:lN:nOo:Pp:r:S:sTt:v:W:g:Q:-:", &optCtxt)) != -1) { switch (flag) { case 'A': SetConfigOption("debug_assertions", optCtxt.optarg, ctx, gucsource); @@ -6932,6 +6935,16 @@ void process_postgres_switches(int argc, char* argv[], GucContext ctx, const cha if (!set_plan_disabling_options(optCtxt.optarg, ctx, gucsource)) errs++; break; + + case 'G': + if (!singleuser) { + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("-G can be used only in single user or bootstrap mode"))); + } + EnableInitDBSegment = true; + break; + case 'g': SetConfigOption("xlog_file_path", optCtxt.optarg, ctx, gucsource); break; @@ -7664,6 +7677,17 @@ int PostgresMain(int argc, char* argv[], const char* dbname, const char* usernam CreateDataDirLockFile(false); } + /* Callback function for dss operator */ + if (dss_device_init(g_instance.attr.attr_storage.dss_attr.ss_dss_conn_path, + g_instance.attr.attr_storage.dss_attr.ss_enable_dss) != DSS_SUCCESS) { + ereport(FATAL, (errmsg("failed to init dss device"))); + proc_exit(1); + } + if (ENABLE_DSS) { + dss_log_init(); + } + initDSSConf(); + /* Early initialization */ BaseInit(); diff --git a/src/gausskernel/process/tcop/utility.cpp b/src/gausskernel/process/tcop/utility.cpp index e80cfdd73..21bed188a 100755 --- a/src/gausskernel/process/tcop/utility.cpp +++ b/src/gausskernel/process/tcop/utility.cpp @@ -518,8 +518,9 @@ static void check_xact_readonly(Node* parse_tree) break; } case T_AlterRoleStmt: { - AlterRoleStmt* stmt = (AlterRoleStmt*)parse_tree; - if (!(DO_NOTHING != stmt->lockstatus && t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE)) { + AlterRoleStmt *stmt = (AlterRoleStmt *)parse_tree; + if (!(DO_NOTHING != stmt->lockstatus && + (t_thrd.postmaster_cxt.HaShmData->current_mode == STANDBY_MODE || SS_STANDBY_MODE))) { PreventCommandIfReadOnly(CreateCommandTag(parse_tree)); } break; @@ -538,6 +539,13 @@ static void check_xact_readonly(Node* parse_tree) */ void PreventCommandIfReadOnly(const char* cmd_name) { + if (SSIsServerModeReadOnly()) { + ereport(ERROR, + (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION), + /* translator: %s is name of a SQL command, eg CREATE */ + errmsg("cannot execute %s at Standby node while DMS enabled", cmd_name))); + } + if (u_sess->attr.attr_common.XactReadOnly && u_sess->attr.attr_storage.replorigin_sesssion_origin == 0) ereport(ERROR, (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION), @@ -555,6 +563,13 @@ void PreventCommandIfReadOnly(const char* cmd_name) */ void PreventCommandDuringRecovery(const char* cmd_name) { + if (SSIsServerModeReadOnly()) { + ereport(ERROR, + (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION), + /* translator: %s is name of a SQL command, eg CREATE */ + errmsg("cannot execute %s at Standby node while DMS enabled", cmd_name))); + } + if (RecoveryInProgress()) ereport(ERROR, (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION), @@ -7080,7 +7095,11 @@ void standard_ProcessUtility(Node* parse_tree, const char* query_string, ParamLi } case T_CreatePublicationStmt: -#ifdef ENABLE_MULTIPLE_NODES + if (ENABLE_DMS) { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Not support CreatePublication while DMS and DSS enabled"))); + } +#if defined(ENABLE_MULTIPLE_NODES) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("PUBLICATION is not currently supported"), @@ -7089,7 +7108,11 @@ void standard_ProcessUtility(Node* parse_tree, const char* query_string, ParamLi CreatePublication((CreatePublicationStmt *) parse_tree); break; case T_AlterPublicationStmt: -#ifdef ENABLE_MULTIPLE_NODES + if (ENABLE_DMS) { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Not support AlterPublication while DMS and DSS enabled"))); + } +#if defined(ENABLE_MULTIPLE_NODES) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("PUBLICATION is not currently supported"), @@ -7098,7 +7121,11 @@ void standard_ProcessUtility(Node* parse_tree, const char* query_string, ParamLi AlterPublication((AlterPublicationStmt *) parse_tree); break; case T_CreateSubscriptionStmt: -#ifdef ENABLE_MULTIPLE_NODES + if (ENABLE_DMS) { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Not support CreateSubscription while DMS and DSS enabled"))); + } +#if defined(ENABLE_MULTIPLE_NODES) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("SUBSCRIPTION is not currently supported"), @@ -7107,7 +7134,11 @@ void standard_ProcessUtility(Node* parse_tree, const char* query_string, ParamLi CreateSubscription((CreateSubscriptionStmt *) parse_tree, is_top_level); break; case T_AlterSubscriptionStmt: -#ifdef ENABLE_MULTIPLE_NODES + if (ENABLE_DMS) { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Not support AlterSubscription while DMS and DSS enabled"))); + } +#if defined(ENABLE_MULTIPLE_NODES) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("SUBSCRIPTION is not currently supported"), @@ -7116,7 +7147,11 @@ void standard_ProcessUtility(Node* parse_tree, const char* query_string, ParamLi AlterSubscription((AlterSubscriptionStmt *) parse_tree, is_top_level); break; case T_DropSubscriptionStmt: -#ifdef ENABLE_MULTIPLE_NODES + if (ENABLE_DMS) { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Not support DropSubscription while DMS and DSS enabled"))); + } +#if defined(ENABLE_MULTIPLE_NODES) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("SUBSCRIPTION is not currently supported"), diff --git a/src/gausskernel/process/threadpool/knl_instance.cpp b/src/gausskernel/process/threadpool/knl_instance.cpp index def21a4c5..7814d7f7f 100755 --- a/src/gausskernel/process/threadpool/knl_instance.cpp +++ b/src/gausskernel/process/threadpool/knl_instance.cpp @@ -170,6 +170,31 @@ static void knl_g_startup_init(knl_g_startup_context *starup_cxt) starup_cxt->current_record = NULL; } +static void knl_g_dms_init(knl_g_dms_context *dms_cxt) +{ + Assert(dms_cxt != NULL); + dms_cxt->dmsProcSid = 0; + dms_cxt->xminAck = 0; + dms_cxt->SSReformerControl.list_stable = 0; + dms_cxt->SSReformerControl.primaryInstId = -1; + dms_cxt->SSReformInfo.in_reform = false; + dms_cxt->SSReformInfo.dms_role = DMS_ROLE_UNKNOW; + dms_cxt->SSClusterState = NODESTATE_NORMAL; + dms_cxt->SSRecoveryInfo.recovery_pause_flag = true; + dms_cxt->SSRecoveryInfo.failover_triggered = false; + dms_cxt->SSRecoveryInfo.new_primary_reset_walbuf_flag = false; + dms_cxt->SSRecoveryInfo.skip_redo_replay = false; + dms_cxt->SSRecoveryInfo.reclsn_updated = false; + dms_cxt->SSRecoveryInfo.ready_to_startup = false; + dms_cxt->SSRecoveryInfo.startup_reform = true; + dms_cxt->SSRecoveryInfo.restart_failover_flag = false; + dms_cxt->SSRecoveryInfo.reform_ready = false; + dms_cxt->SSRecoveryInfo.in_failover = false; + dms_cxt->log_timezone = NULL; + pg_atomic_init_u32(&dms_cxt->inDmsThreShmemInitCnt, 0); + pg_atomic_init_u32(&dms_cxt->inProcExitCnt, 0); + dms_cxt->dmsInited = false; +} static void knl_g_tests_init(knl_g_tests_context* tests_cxt) { @@ -707,6 +732,93 @@ static void knl_g_roach_init(knl_g_roach_context* roach_cxt) roach_cxt->targetRestoreTimeFromMedia = NULL; } +static void knl_g_dwsubdir_init(knl_g_dwsubdatadir_context* dw_subdir_cxt) +{ + Assert(dw_subdir_cxt != NULL); + errno_t rc = memset_s(dw_subdir_cxt, sizeof(knl_g_dwsubdatadir_context), 0, sizeof(knl_g_dwsubdatadir_context)); + securec_check(rc, "\0", "\0"); + + errno_t errorno = EOK; + + errorno = strcpy_s(dw_subdir_cxt->dwOldPath, MAXPGPATH, "global/pg_dw"); + securec_check_c(errorno, "\0", "\0"); + + errorno = strcpy_s(dw_subdir_cxt->dwPathPrefix, MAXPGPATH, "global/pg_dw_"); + securec_check_c(errorno, "\0", "\0"); + + errorno = strcpy_s(dw_subdir_cxt->dwSinglePath, MAXPGPATH, "global/pg_dw_single"); + securec_check_c(errorno, "\0", "\0"); + + errorno = strcpy_s(dw_subdir_cxt->dwBuildPath, MAXPGPATH, "global/pg_dw.build"); + securec_check_c(errorno, "\0", "\0"); + + errorno = strcpy_s(dw_subdir_cxt->dwUpgradePath, MAXPGPATH, "global/dw_upgrade"); + securec_check_c(errorno, "\0", "\0"); + + errorno = strcpy_s(dw_subdir_cxt->dwBatchUpgradeMetaPath, MAXPGPATH, "global/dw_batch_upgrade_meta"); + securec_check_c(errorno, "\0", "\0"); + + errorno = strcpy_s(dw_subdir_cxt->dwBatchUpgradeFilePath, MAXPGPATH, "global/dw_batch_upgrade_files"); + securec_check_c(errorno, "\0", "\0"); + + errorno = strcpy_s(dw_subdir_cxt->dwMetaPath, MAXPGPATH, "global/pg_dw_meta"); + securec_check_c(errorno, "\0", "\0"); + + errorno = strcpy_s(dw_subdir_cxt->dwExtChunkPath, MAXPGPATH, "global/pg_dw_ext_chunk"); + securec_check_c(errorno, "\0", "\0"); + + dw_subdir_cxt->dwStorageType = 0; +} + +static void knl_g_datadir_init(knl_g_datadir_context* datadir_init) +{ + errno_t errorno = EOK; + + errorno = strcpy_s(datadir_init->baseDir, MAXPGPATH, "base"); + securec_check_c(errorno, "\0", "\0"); + + errorno = strcpy_s(datadir_init->globalDir, MAXPGPATH, "global"); + securec_check_c(errorno, "\0", "\0"); + + errorno = strcpy_s(datadir_init->clogDir, MAXPGPATH, "pg_clog"); + securec_check_c(errorno, "\0", "\0"); + + errorno = strcpy_s(datadir_init->csnlogDir, MAXPGPATH, "pg_csnlog"); + securec_check_c(errorno, "\0", "\0"); + + errorno = strcpy_s(datadir_init->locationDir, MAXPGPATH, "pg_location"); + securec_check_c(errorno, "\0", "\0"); + + errorno = strcpy_s(datadir_init->notifyDir, MAXPGPATH, "pg_notify"); + securec_check_c(errorno, "\0", "\0"); + + errorno = strcpy_s(datadir_init->serialDir, MAXPGPATH, "pg_serial"); + securec_check_c(errorno, "\0", "\0"); + + errorno = strcpy_s(datadir_init->snapshotsDir, MAXPGPATH, "pg_snapshots"); + securec_check_c(errorno, "\0", "\0"); + + errorno = strcpy_s(datadir_init->tblspcDir, MAXPGPATH, "pg_tblspc"); + securec_check_c(errorno, "\0", "\0"); + + errorno = strcpy_s(datadir_init->twophaseDir, MAXPGPATH, "pg_twophase"); + securec_check_c(errorno, "\0", "\0"); + + errorno = strcpy_s(datadir_init->multixactDir, MAXPGPATH, "pg_multixact"); + securec_check_c(errorno, "\0", "\0"); + + errorno = strcpy_s(datadir_init->xlogDir, MAXPGPATH, "pg_xlog"); + securec_check_c(errorno, "\0", "\0"); + + errorno = strcpy_s(datadir_init->controlPath, MAXPGPATH, "global/pg_control"); + securec_check_c(errorno, "\0", "\0"); + + errorno = strcpy_s(datadir_init->controlBakPath, MAXPGPATH, "global/pg_control.backup"); + securec_check_c(errorno, "\0", "\0"); + + knl_g_dwsubdir_init(&datadir_init->dw_subdir_cxt); +} + static void knl_g_streaming_dr_init(knl_g_streaming_dr_context* streaming_dr_cxt) { streaming_dr_cxt->isInSwitchover = false; @@ -820,6 +932,7 @@ void knl_instance_init() knl_g_bgwriter_init(&g_instance.bgwriter_cxt); knl_g_repair_init(&g_instance.repair_cxt); knl_g_startup_init(&g_instance.startup_cxt); + knl_g_dms_init(&g_instance.dms_cxt); knl_g_shmem_init(&g_instance.shmem_cxt); g_instance.ckpt_cxt_ctl = &g_instance.ckpt_cxt; g_instance.ckpt_cxt_ctl = (knl_g_ckpt_context*)TYPEALIGN(SIZE_OF_TWO_UINT64, g_instance.ckpt_cxt_ctl); @@ -862,6 +975,8 @@ void knl_instance_init() pthread_mutex_init(&g_instance.loadPluginLock[i], NULL); } #endif + + knl_g_datadir_init(&g_instance.datadir_cxt); } void add_numa_alloc_info(void* numaAddr, size_t length) diff --git a/src/gausskernel/process/threadpool/knl_thread.cpp b/src/gausskernel/process/threadpool/knl_thread.cpp index 7b404c827..e7761d0d8 100755 --- a/src/gausskernel/process/threadpool/knl_thread.cpp +++ b/src/gausskernel/process/threadpool/knl_thread.cpp @@ -1392,6 +1392,8 @@ static void knl_t_storage_init(knl_t_storage_context* storage_cxt) storage_cxt->statement_fin_time = 0; storage_cxt->statement_fin_time2 = 0; storage_cxt->pageCopy = NULL; + storage_cxt->pageCopy_ori = NULL; + storage_cxt->segPageCopyOri = NULL; storage_cxt->isSwitchoverLockHolder = false; storage_cxt->num_held_lwlocks = 0; @@ -1423,6 +1425,7 @@ static void knl_t_storage_init(knl_t_storage_context* storage_cxt) storage_cxt->max_safe_fds = 32; storage_cxt->max_userdatafiles = 8192 - 1000; storage_cxt->timeoutRemoteOpera = 0; + storage_cxt->dmsBufCtl = NULL; } static void knl_t_port_init(knl_t_port_context* port_cxt) diff --git a/src/gausskernel/process/threadpool/threadpool_sessctl.cpp b/src/gausskernel/process/threadpool/threadpool_sessctl.cpp index 1e305f32b..f0908d677 100755 --- a/src/gausskernel/process/threadpool/threadpool_sessctl.cpp +++ b/src/gausskernel/process/threadpool/threadpool_sessctl.cpp @@ -859,7 +859,7 @@ TransactionId ThreadPoolSessControl::ListAllSessionGttFrozenxids(int maxSize, *n = 0; } - if (RecoveryInProgress()) { + if (RecoveryInProgress() || SSIsServerModeReadOnly()) { return InvalidTransactionId; } diff --git a/src/gausskernel/runtime/executor/functions.cpp b/src/gausskernel/runtime/executor/functions.cpp index 251155407..4c6c93fb6 100644 --- a/src/gausskernel/runtime/executor/functions.cpp +++ b/src/gausskernel/runtime/executor/functions.cpp @@ -1576,7 +1576,6 @@ bool check_sql_fn_retval(Oid func_id, Oid ret_type, List* query_tree_list, bool* Oid res_type; ListCell* lc = NULL; bool gs_encrypted_proc_was_created = false; - AssertArg(!IsPolymorphicType(ret_type)); CommandCounterIncrement(); if (modify_target_list != NULL) *modify_target_list = false; /* initialize for no change */ diff --git a/src/gausskernel/storage/CMakeLists.txt b/src/gausskernel/storage/CMakeLists.txt index 39c052f20..8778d39d9 100755 --- a/src/gausskernel/storage/CMakeLists.txt +++ b/src/gausskernel/storage/CMakeLists.txt @@ -24,6 +24,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/tcap ${CMAKE_CURRENT_SOURCE_DIR}/xlog_share_storage ${CMAKE_CURRENT_SOURCE_DIR}/dorado_operation + ${CMAKE_CURRENT_SOURCE_DIR}/dss ) if("${ENABLE_MOT}" STREQUAL "ON") @@ -53,6 +54,7 @@ add_subdirectory(nvm) add_subdirectory(tcap) add_subdirectory(xlog_share_storage) add_subdirectory(dorado_operation) +add_subdirectory(dss) if("${ENABLE_MOT}" STREQUAL "ON") add_subdirectory(mot) diff --git a/src/gausskernel/storage/Makefile b/src/gausskernel/storage/Makefile index e852740d4..6f40370de 100644 --- a/src/gausskernel/storage/Makefile +++ b/src/gausskernel/storage/Makefile @@ -25,7 +25,7 @@ subdir = src/gausskernel/storage top_builddir = ../../.. include $(top_builddir)/src/Makefile.global -SUBDIRS = access bulkload replication buffer cmgr cstore file freespace ipc large_object lmgr page remote smgr nvm tcap sync dorado_operation xlog_share_storage +SUBDIRS = access bulkload replication buffer cmgr cstore file freespace ipc large_object lmgr page remote smgr nvm tcap sync dorado_operation xlog_share_storage dss ifeq ($(enable_mot), yes) SUBDIRS += mot diff --git a/src/gausskernel/storage/access/heap/heapam.cpp b/src/gausskernel/storage/access/heap/heapam.cpp index 189b99df3..804aca34b 100755 --- a/src/gausskernel/storage/access/heap/heapam.cpp +++ b/src/gausskernel/storage/access/heap/heapam.cpp @@ -100,6 +100,8 @@ #include "catalog/pg_hashbucket_fn.h" #include "gstrace/gstrace_infra.h" #include "gstrace/access_gstrace.h" +#include "ddes/dms/ss_transaction.h" + #ifdef ENABLE_MULTIPLE_NODES #include "tsdb/storage/ts_store_insert.h" #endif /* ENABLE_MULTIPLE_NODES */ @@ -145,7 +147,6 @@ static void ComputeNewXmaxInfomask(TransactionId xmax, uint16 old_infomask, uint uint16 *result_infomask, uint16 *result_infomask2); static void GetMultiXactIdHintBits(MultiXactId multi, uint16 *new_infomask, uint16 *new_infomask2); -static TransactionId MultiXactIdGetUpdateXid(TransactionId xmax, uint16 t_infomask, uint16 t_infomask2); static bool DoesMultiXactIdConflict(MultiXactId multi, LockTupleMode lockmode); /* ---------------- @@ -7167,6 +7168,7 @@ static TM_Result heap_lock_updated_tuple_rec(Relation rel, ItemPointer tid, Tran uint16 old_infomask2; TransactionId xmax; TransactionId new_xmax; + TransactionId priorXmax = InvalidTransactionId; Buffer vmbuffer = InvalidBuffer; BlockNumber block; TM_Result result; @@ -7195,17 +7197,37 @@ l4: CHECK_FOR_INTERRUPTS(); /* - * Before locking the buffer, pin the visibility map page if it - * appears to be necessary. Since we haven't got the lock yet, - * someone else might be in the middle of changing this, so we'll need - * to recheck after we have the lock. - */ + * Before locking the buffer, pin the visibility map page if it + * appears to be necessary. Since we haven't got the lock yet, + * someone else might be in the middle of changing this, so we'll need + * to recheck after we have the lock. + */ if (PageIsAllVisible(BufferGetPage(buf))) { visibilitymap_pin(rel, block, &vmbuffer); } LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + /* + * Check the tuple XMIN against prior XMAX, if any. If we reached the + * end of the chain, we're done, so return success. + */ + if (TransactionIdIsValid(priorXmax) && + !TransactionIdEquals(HeapTupleGetRawXmin(&mytup), priorXmax)) { + result = TM_Ok; + goto out_locked; + } + + /* + * Also check Xmin: if this tuple was created by an aborted + * (sub)transaction, then we already locked the last live one in the + * chain, thus we're done, so return success. + */ + if (TransactionIdDidAbort(HeapTupleGetRawXmin(&mytup))) { + result = TM_Ok; + goto out_locked; + } + old_infomask = mytup.t_data->t_infomask; old_infomask2 = mytup.t_data->t_infomask2; xmax = HeapTupleGetRawXmax(&mytup); @@ -7369,6 +7391,7 @@ next: } /* tail recursion */ + priorXmax = HeapTupleGetUpdateXid(&mytup); ItemPointerCopy(&(mytup.t_data->t_ctid), &tupid); UnlockReleaseBuffer(buf); if (vmbuffer != InvalidBuffer) @@ -7769,8 +7792,11 @@ static void GetMultiXactIdHintBits(MultiXactId multi, uint16 *new_infomask, uint * Caller is expected to check the status of the updating transaction, if * necessary. */ -static TransactionId MultiXactIdGetUpdateXid(TransactionId xmax, uint16 t_infomask, uint16 t_infomask2) +TransactionId MultiXactIdGetUpdateXid(TransactionId xmax, uint16 t_infomask, uint16 t_infomask2) { + if (SS_STANDBY_MODE) { + return SSMultiXactIdGetUpdateXid(xmax, t_infomask, t_infomask2); + } TransactionId updateXact = InvalidTransactionId; MultiXactMember *members = NULL; int nmembers; diff --git a/src/gausskernel/storage/access/heap/heapam_visibility.cpp b/src/gausskernel/storage/access/heap/heapam_visibility.cpp index 27bf4103c..c78faefa5 100644 --- a/src/gausskernel/storage/access/heap/heapam_visibility.cpp +++ b/src/gausskernel/storage/access/heap/heapam_visibility.cpp @@ -75,6 +75,7 @@ #include "utils/combocid.h" #include "utils/snapmgr.h" #include "commands/vacuum.h" +#include "ddes/dms/ss_common_attr.h" /* Log SetHintBits() */ static inline void LogSetHintBit(HeapTupleHeader tuple, Buffer buffer, uint16 infomask) @@ -149,6 +150,9 @@ static inline void LogSetHintBit(HeapTupleHeader tuple, Buffer buffer, uint16 in */ static inline void SetHintBits(HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid) { + if (SS_STANDBY_MODE) { + return; + } #ifdef PGXC // The following scenario may use local snapshot, so do not set hint bits. // Notice: we don't support two or more bits within infomask. diff --git a/src/gausskernel/storage/access/heap/hio.cpp b/src/gausskernel/storage/access/heap/hio.cpp index df5f1f0a7..87007e489 100644 --- a/src/gausskernel/storage/access/heap/hio.cpp +++ b/src/gausskernel/storage/access/heap/hio.cpp @@ -663,9 +663,10 @@ loop: */ page = BufferGetPage(buffer); if (!PageIsNew(page)) { - ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("page %u of relation \"%s\" should be empty but is not", BufferGetBlockNumber(buffer), - RelationGetRelationName(relation)))); + int elevel = ENABLE_DMS ? PANIC : ERROR; + ereport(elevel, + (errcode(ERRCODE_DATA_CORRUPTED), errmsg("page %u of relation \"%s\" should be empty but is not", + BufferGetBlockNumber(buffer), RelationGetRelationName(relation)))); } phdr = (HeapPageHeader)page; @@ -780,9 +781,10 @@ Buffer RelationGetNewBufferForBulkInsert(Relation relation, Size len, Size dict_ page = BufferGetPage(buffer); if (!PageIsNew(page)) { - ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("page %u of relation \"%s\" should be empty but is not", BufferGetBlockNumber(buffer), - RelationGetRelationName(relation)))); + int elevel = ENABLE_DMS ? PANIC : ERROR; + ereport(elevel, + (errcode(ERRCODE_DATA_CORRUPTED), errmsg("page %u of relation \"%s\" should be empty but is not", + BufferGetBlockNumber(buffer), RelationGetRelationName(relation)))); } phdr = (HeapPageHeader)page; diff --git a/src/gausskernel/storage/access/heap/pruneheap.cpp b/src/gausskernel/storage/access/heap/pruneheap.cpp index ae78655cc..fef972f20 100644 --- a/src/gausskernel/storage/access/heap/pruneheap.cpp +++ b/src/gausskernel/storage/access/heap/pruneheap.cpp @@ -80,7 +80,7 @@ void heap_page_prune_opt(Relation relation, Buffer buffer) * clean the page. The master will likely issue a cleaning WAL record soon * anyway, so this is no particular loss. */ - if (RecoveryInProgress()) + if (RecoveryInProgress() || SSIsServerModeReadOnly()) return; oldest_xmin = u_sess->utils_cxt.RecentGlobalXmin; diff --git a/src/gausskernel/storage/access/heap/rewriteheap.cpp b/src/gausskernel/storage/access/heap/rewriteheap.cpp index c45cda9b7..e501edce1 100644 --- a/src/gausskernel/storage/access/heap/rewriteheap.cpp +++ b/src/gausskernel/storage/access/heap/rewriteheap.cpp @@ -241,6 +241,8 @@ RewriteState begin_heap_rewrite(Relation old_heap, Relation new_heap, Transactio MemoryContext old_cxt; HASHCTL hash_ctl; errno_t errorno = EOK; + char* unalign_cmprBuffer = NULL; + char* unalign_rsBuffer = NULL; /* * To ease cleanup, make a separate context that will contain the @@ -273,7 +275,12 @@ RewriteState begin_heap_rewrite(Relation old_heap, Relation new_heap, Transactio } ADIO_ELSE() { - state->rs_buffer = (Page)palloc(BLCKSZ); + if (ENABLE_DSS) { + unalign_rsBuffer = (char*)palloc(BLCKSZ + ALIGNOF_BUFFER); + state->rs_buffer = (Page)BUFFERALIGN(unalign_rsBuffer); + } else { + state->rs_buffer = (Page)palloc(BLCKSZ); + } } ADIO_END(); @@ -300,7 +307,12 @@ RewriteState begin_heap_rewrite(Relation old_heap, Relation new_heap, Transactio } ADIO_ELSE() { - state->rs_cmprBuffer = (Page)palloc0(BLCKSZ); + if (ENABLE_DSS) { + unalign_cmprBuffer = (char*)palloc0(BLCKSZ + ALIGNOF_BUFFER); + state->rs_cmprBuffer = (Page)BUFFERALIGN(unalign_cmprBuffer); + } else { + state->rs_cmprBuffer = (Page)palloc0(BLCKSZ); + } } ADIO_END(); state->rs_tupBuf = (HeapTuple *)palloc(sizeof(HeapTuple) * DEFAULTBUFFEREDTUPLES); diff --git a/src/gausskernel/storage/access/heap/visibilitymap.cpp b/src/gausskernel/storage/access/heap/visibilitymap.cpp index 6eddc6af5..1c56555e5 100644 --- a/src/gausskernel/storage/access/heap/visibilitymap.cpp +++ b/src/gausskernel/storage/access/heap/visibilitymap.cpp @@ -270,6 +270,10 @@ void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, XLogRe */ bool visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf) { + if (ENABLE_DMS && !SS_PRIMARY_MODE) { + return false; + } + BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk); uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk); uint8 mapBit = HEAPBLK_TO_MAPBIT(heapBlk); @@ -596,6 +600,7 @@ static void vm_extend(Relation rel, BlockNumber vm_nblocks) { BlockNumber vm_nblocks_now; Page pg; + Page pg_ori = NULL; ADIO_RUN() { @@ -603,7 +608,12 @@ static void vm_extend(Relation rel, BlockNumber vm_nblocks) } ADIO_ELSE() { - pg = (Page)palloc(BLCKSZ); + if (ENABLE_DSS) { + pg_ori = (Page)palloc(BLCKSZ + ALIGNOF_BUFFER); + pg = (Page)BUFFERALIGN(pg_ori); + } else { + pg = (Page)palloc(BLCKSZ); + } } ADIO_END(); @@ -642,7 +652,7 @@ static void vm_extend(Relation rel, BlockNumber vm_nblocks) /* Now extend the file */ while (vm_nblocks_now < vm_nblocks) { if (IsSegmentFileNode(rel->rd_node)) { - Buffer buf = ReadBufferExtended(rel, VISIBILITYMAP_FORKNUM, P_NEW, RBM_NORMAL, NULL); + Buffer buf = ReadBufferExtended(rel, VISIBILITYMAP_FORKNUM, P_NEW, RBM_ZERO, NULL); ReleaseBuffer(buf); #ifdef USE_ASSERT_CHECKING BufferDesc *buf_desc = GetBufferDescriptor(buf - 1); @@ -676,8 +686,13 @@ static void vm_extend(Relation rel, BlockNumber vm_nblocks) } ADIO_ELSE() { - pfree(pg); - pg = NULL; + if (ENABLE_DSS) { + pfree(pg_ori); + pg_ori = NULL; + } else { + pfree(pg); + pg = NULL; + } } ADIO_END(); } diff --git a/src/gausskernel/storage/access/index/genam.cpp b/src/gausskernel/storage/access/index/genam.cpp index a0c200a2a..3ce68686b 100644 --- a/src/gausskernel/storage/access/index/genam.cpp +++ b/src/gausskernel/storage/access/index/genam.cpp @@ -598,7 +598,7 @@ static bool GPIInsertFakeParentRelCacheForSubpartition(GPIScanDesc gpiScan, Memo HTAB* fakeRels = gpiScan->fakeRelationTable; Relation parentRel = gpiScan->parentRelation; Oid parentPartOid = partid_get_parentid(gpiScan->currPartOid); - if (parentPartOid != parentRel->rd_id) { + if (OidIsValid(parentPartOid) && parentPartOid != parentRel->rd_id) { PartRelIdCacheKey fakeRelKey = {parentPartOid, InvalidBktId}; Partition parentPartition = NULL; FakeRelationIdCacheLookup(fakeRels, fakeRelKey, parentRel, parentPartition); @@ -741,7 +741,10 @@ bool GPIGetNextPartRelation(GPIScanDesc gpiScan, MemoryContext cxt, LOCKMODE lmo /* Get current partition status in GPI */ currStatus = PartitionGetMetadataStatus(gpiScan->currPartOid, false); /* Just save partition status if current partition metadata is invisible */ - if (currStatus == PART_METADATA_INVISIBLE) { + if (currStatus == PART_METADATA_INVISIBLE || currStatus == PART_METADATA_NOEXIST) { + if (currStatus == PART_METADATA_NOEXIST && module_logging_is_on(MOD_GPI)) { + ereport(LOG, (errmodule(MOD_GPI), errmsg("Partition %u does not exist", gpiScan->currPartOid))); + } /* If current partition metadata is invisible, add current partition oid into invisiblePartMap */ (void)OidRBTreeInsertOid(gpiScan->invisiblePartTree, gpiScan->currPartOid); gpiScan->currPartOid = InvalidOid; diff --git a/src/gausskernel/storage/access/nbtree/nbtree.cpp b/src/gausskernel/storage/access/nbtree/nbtree.cpp index e62239b7f..784c66603 100644 --- a/src/gausskernel/storage/access/nbtree/nbtree.cpp +++ b/src/gausskernel/storage/access/nbtree/nbtree.cpp @@ -148,6 +148,7 @@ Datum btbuildempty(PG_FUNCTION_ARGS) { Relation index = (Relation)PG_GETARG_POINTER(0); Page metapage; + char* unaligned_buffer = NULL; /* Construct metapage. */ ADIO_RUN() @@ -156,7 +157,12 @@ Datum btbuildempty(PG_FUNCTION_ARGS) } ADIO_ELSE() { - metapage = (Page)palloc(BLCKSZ); + if (ENABLE_DSS) { + unaligned_buffer = (char*)palloc(BLCKSZ + ALIGNOF_BUFFER); + metapage = (Page)BUFFERALIGN(unaligned_buffer); + } else { + metapage = (Page)palloc(BLCKSZ); + } } ADIO_END(); @@ -192,7 +198,11 @@ Datum btbuildempty(PG_FUNCTION_ARGS) } ADIO_ELSE() { - pfree(metapage); + if (ENABLE_DSS) { + pfree(unaligned_buffer); + } else { + pfree(metapage); + } } ADIO_END(); diff --git a/src/gausskernel/storage/access/nbtree/nbtsort.cpp b/src/gausskernel/storage/access/nbtree/nbtsort.cpp index f711a2385..93a9801aa 100644 --- a/src/gausskernel/storage/access/nbtree/nbtsort.cpp +++ b/src/gausskernel/storage/access/nbtree/nbtsort.cpp @@ -76,6 +76,7 @@ #include "catalog/pg_partition_fn.h" #include "miscadmin.h" #include "storage/smgr/smgr.h" +#include "storage/file/fio_device.h" #include "storage/proc.h" #include "tcop/tcopprot.h" #include "utils/aiomem.h" @@ -257,7 +258,11 @@ static Page _bt_blnewpage(uint32 level) } ADIO_ELSE() { - page = (Page)palloc(BLCKSZ); + if (ENABLE_DSS) { + page = (Page)mem_align_alloc(SYS_LOGICAL_BLOCK_SIZE, BLCKSZ); + } else { + page = (Page)palloc(BLCKSZ); + } } ADIO_END(); @@ -307,7 +312,11 @@ static void _bt_segment_blwritepage(BTWriteState *wstate, Page page, BlockNumber PageSetLSN(BufferGetPage(buf), xlog_ptr); MarkBufferDirty(buf); UnlockReleaseBuffer(buf); - pfree(page); + if (ENABLE_DSS) { + mem_align_free(page); + } else { + pfree(page); + } page = NULL; } @@ -318,11 +327,12 @@ static void _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno) { if (IsSegmentFileNode(wstate->index->rd_node)) { _bt_segment_blwritepage(wstate, page, blkno); - return; + return; } bool need_free = false; errno_t errorno = EOK; + char* unalign_zerobuffer = NULL; /* XLOG stuff */ if (wstate->btws_use_wal) { @@ -355,7 +365,12 @@ static void _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno) } ADIO_ELSE() { - wstate->btws_zeropage = (Page)palloc0(BLCKSZ); + if (ENABLE_DSS) { + unalign_zerobuffer = (char*)palloc0(BLCKSZ + ALIGNOF_BUFFER); + wstate->btws_zeropage = (Page)BUFFERALIGN(unalign_zerobuffer); + } else { + wstate->btws_zeropage = (Page)palloc0(BLCKSZ); + } } ADIO_END(); need_free = true; @@ -411,10 +426,18 @@ static void _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno) ADIO_ELSE() { if (need_free) { - pfree(wstate->btws_zeropage); + if (ENABLE_DSS) { + pfree(unalign_zerobuffer); + } else { + pfree(wstate->btws_zeropage); + } wstate->btws_zeropage = NULL; } - pfree(page); + if (ENABLE_DSS) { + mem_align_free(page); + } else { + pfree(page); + } page = NULL; } ADIO_END(); @@ -805,7 +828,11 @@ void _bt_uppershutdown(BTWriteState *wstate, BTPageState *state) } ADIO_ELSE() { - metapage = (Page)palloc(BLCKSZ); + if (ENABLE_DSS) { + metapage = (Page)mem_align_alloc(SYS_LOGICAL_BLOCK_SIZE, BLCKSZ); + } else { + metapage = (Page)palloc(BLCKSZ); + } } ADIO_END(); _bt_initmetapage(metapage, rootblkno, rootlevel); diff --git a/src/gausskernel/storage/access/transam/cbmfuncs.cpp b/src/gausskernel/storage/access/transam/cbmfuncs.cpp index 4e0f08ea4..d5316afa6 100644 --- a/src/gausskernel/storage/access/transam/cbmfuncs.cpp +++ b/src/gausskernel/storage/access/transam/cbmfuncs.cpp @@ -92,6 +92,10 @@ Datum pg_cbm_get_merged_file(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("recovery is in progress"), errhint("pg_cbm_get_merged_file() cannot be executed during recovery."))); + if (SSIsServerModeReadOnly()) { + ereport(ERROR, (errmsg("pg_cbm_get_merged_file() cannot be executed at Standby with DMS enabled"))); + } + char *start_lsn_str = text_to_cstring(start_lsn_arg); char *end_lsn_str = text_to_cstring(end_lsn_arg); char merged_file_name[MAXPGPATH] = {'\0'}; @@ -279,12 +283,23 @@ Datum pg_cbm_get_changed_block(PG_FUNCTION_ARGS) pfree(db_path); } else { /* tablespace create/drop */ - int len = strlen("pg_tblspc") + 1 + OIDCHARS + 1 + strlen(g_instance.attr.attr_common.PGXCNodeName) + 1 + - strlen(TABLESPACE_VERSION_DIRECTORY) + 2; + int len = 0; + if (ENABLE_DSS) { + len = strlen("pg_tblspc") + 1 + OIDCHARS + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 2; + } else { + len = strlen("pg_tblspc") + 1 + OIDCHARS + 1 + strlen(g_instance.attr.attr_common.PGXCNodeName) + + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 2; + } char *tblspc_path = (char *)palloc(len); - rc = snprintf_s(tblspc_path, len, len - 1, "pg_tblspc/%u/%s_%s", cur_array_entry.cbmTag.rNode.spcNode, - TABLESPACE_VERSION_DIRECTORY, g_instance.attr.attr_common.PGXCNodeName); - securec_check_ss(rc, "\0", "\0"); + if (ENABLE_DSS) { + rc = snprintf_s(tblspc_path, len, len - 1, "pg_tblspc/%u/%s", cur_array_entry.cbmTag.rNode.spcNode, + TABLESPACE_VERSION_DIRECTORY); + securec_check_ss(rc, "\0", "\0"); + } else { + rc = snprintf_s(tblspc_path, len, len - 1, "pg_tblspc/%u/%s_%s", cur_array_entry.cbmTag.rNode.spcNode, + TABLESPACE_VERSION_DIRECTORY, g_instance.attr.attr_common.PGXCNodeName); + securec_check_ss(rc, "\0", "\0"); + } values[6] = CStringGetTextDatum(tblspc_path); pfree(tblspc_path); } @@ -377,6 +392,10 @@ Datum pg_cbm_force_track(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("recovery is in progress"), errhint("pg_cbm_force_track() cannot be executed during recovery."))); + if (SSIsServerModeReadOnly()) { + ereport(ERROR, (errmsg("pg_cbm_force_track() cannot be executed at Standby with DMS enabled"))); + } + if (time_out < 0) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("Negative timeout for force track cbm!"))); diff --git a/src/gausskernel/storage/access/transam/clog.cpp b/src/gausskernel/storage/access/transam/clog.cpp index 358103fa9..b0737890b 100644 --- a/src/gausskernel/storage/access/transam/clog.cpp +++ b/src/gausskernel/storage/access/transam/clog.cpp @@ -47,6 +47,7 @@ #include "pg_trace.h" #include "storage/smgr/fd.h" #include "storage/proc.h" +#include "storage/file/fio_device.h" #ifdef USE_ASSERT_CHECKING #include "utils/builtins.h" #endif /* USE_ASSERT_CHECKING */ @@ -534,6 +535,11 @@ static void CLogSetStatusBit(TransactionId xid, CLogXidStatus status, XLogRecPtr if (t_thrd.xlog_cxt.InRecovery && status == CLOG_XID_STATUS_SUB_COMMITTED && curval == CLOG_XID_STATUS_COMMITTED) return; + if (SS_STANDBY_MODE) { + ereport(WARNING, (errmodule(MOD_DMS), errmsg("DMS standby can't set clog status"))); + return; + } + /* * Current state change should be from 0 or subcommitted to target state * or we should already be there when replaying changes during recovery. @@ -663,7 +669,7 @@ void CLOGShmemInit(void) rc = sprintf_s(name, SLRU_MAX_NAME_LENGTH, "%s%d", "CLOG Ctl", i); securec_check_ss(rc, "", ""); SimpleLruInit(ClogCtl(i), name, (int)LWTRANCHE_CLOG_CTL, (int)CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE, - CBufMappingPartitionLockByIndex(i), "pg_clog"); + CBufMappingPartitionLockByIndex(i), CLOGDIR); } } @@ -752,6 +758,11 @@ void TrimCLOG(void) return; } + if (SS_STANDBY_MODE && !SS_STANDBY_PROMOTING) { + ereport(WARNING, (errmodule(MOD_DMS), errmsg("DMS standby can't trim clog status"))); + return; + } + TransactionId xid = t_thrd.xact_cxt.ShmemVariableCache->nextXid; int64 pageno = (int64)TransactionIdToPage(xid); @@ -841,7 +852,7 @@ int ClogSegCurMaxPageNo(char *path, int64 pageno) */ int fd = BasicOpenFile(path, O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR); if (fd < 0) { - if (errno != ENOENT) { + if (!FILE_POSSIBLY_DELETED(errno)) { Assert(!t_thrd.xlog_cxt.InRecovery); LWLockRelease(ClogCtl(pageno)->shared->control_lock); ereport(ERROR, (errcode(ERRCODE_IO_ERROR), errmsg("Open file %s failed. %s\n", path, strerror(errno)))); @@ -1273,3 +1284,17 @@ Datum gs_fault_inject(PG_FUNCTION_ARGS) PG_RETURN_INT64(0); #endif } + +void SSCLOGShmemClear(void) +{ + int i = 0; + int rc = 0; + char name[SLRU_MAX_NAME_LENGTH]; + + for (i = 0; i < NUM_CLOG_PARTITIONS; i++) { + rc = sprintf_s(name, SLRU_MAX_NAME_LENGTH, "%s%d", "CLOG Ctl", i); + securec_check_ss(rc, "", ""); + SimpleLruSetPageEmpty(ClogCtl(i), name, (int)LWTRANCHE_CLOG_CTL, (int)CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE, + CBufMappingPartitionLockByIndex(i), CLOGDIR); + } +} diff --git a/src/gausskernel/storage/access/transam/csnlog.cpp b/src/gausskernel/storage/access/transam/csnlog.cpp index b4945e731..f2b19fc24 100644 --- a/src/gausskernel/storage/access/transam/csnlog.cpp +++ b/src/gausskernel/storage/access/transam/csnlog.cpp @@ -176,6 +176,11 @@ void CSNLogSetCommitSeqNo(TransactionId xid, int nsubxids, TransactionId *subxid static void CSNLogSetPageStatus(TransactionId xid, int nsubxids, TransactionId *subxids, CommitSeqNo csn, int64 pageno, TransactionId topxid) { + if (SS_STANDBY_MODE) { + ereport(WARNING, (errmodule(MOD_DMS), errmsg("DMS standby can't set csnlog status"))); + return; + } + int slotno; int i; bool modified = false; @@ -250,6 +255,11 @@ restart: */ void SubTransSetParent(TransactionId xid, TransactionId parent) { + if (SS_STANDBY_MODE) { + ereport(WARNING, (errmodule(MOD_DMS), errmsg("DMS standby can't set csnlog status"))); + return; + } + int64 pageno = TransactionIdToCSNPage(xid); int entryno = TransactionIdToCSNPgIndex(xid); int slotno; @@ -563,7 +573,7 @@ void CSNLOGShmemInit(void) rc = sprintf_s(name, SLRU_MAX_NAME_LENGTH, "%s%d", "CSNLOG Ctl", i); securec_check_ss(rc, "\0", "\0"); SimpleLruInit(CsnlogCtl(i), name, LWTRANCHE_CSNLOG_CTL, CSNLOGShmemBuffers(), 0, - CSNBufMappingPartitionLockByIndex(i), "pg_csnlog", i); + CSNBufMappingPartitionLockByIndex(i), CSNLOGDIR, i); } } @@ -816,3 +826,17 @@ void TruncateCSNLOG(TransactionId oldestXact) elog(LOG, "truncate CSN log oldestXact %lu, next xid %lu", oldestXact, t_thrd.xact_cxt.ShmemVariableCache->nextXid); } + +void SSCSNLOGShmemClear(void) +{ + int i; + int rc = 0; + char name[SLRU_MAX_NAME_LENGTH]; + + for (i = 0; i < NUM_CSNLOG_PARTITIONS; i++) { + rc = sprintf_s(name, SLRU_MAX_NAME_LENGTH, "%s%d", "CSNLOG Ctl", i); + securec_check_ss(rc, "\0", "\0"); + SimpleLruSetPageEmpty(CsnlogCtl(i), name, (int)LWTRANCHE_CSNLOG_CTL, (int)CSNLOGShmemBuffers(), 0, + CSNBufMappingPartitionLockByIndex(i), CSNLOGDIR, i); + } +} diff --git a/src/gausskernel/storage/access/transam/double_write.cpp b/src/gausskernel/storage/access/transam/double_write.cpp index 5c2ea8ddf..d320e0e44 100644 --- a/src/gausskernel/storage/access/transam/double_write.cpp +++ b/src/gausskernel/storage/access/transam/double_write.cpp @@ -30,6 +30,8 @@ #include "access/double_write.h" #include "storage/smgr/smgr.h" #include "storage/smgr/segment.h" +#include "storage/dss/dss_adaptor.h" +#include "storage/file/fio_device.h" #include "pgstat.h" #include "utils/palloc.h" #include "gstrace/gstrace_infra.h" @@ -38,6 +40,7 @@ #include "postmaster/bgwriter.h" #include "knl/knl_thread.h" #include "tde_key_management/tde_key_storage.h" +#include "ddes/dms/ss_dms_recovery.h" #ifdef ENABLE_UT #define static @@ -1650,6 +1653,14 @@ void dw_recover_batch_meta_file(int fd, dw_batch_meta_file *batch_meta_file) void dw_remove_batch_meta_file() { + knl_g_dw_context *dw_cxt = &g_instance.dw_batch_cxt; + if (ENABLE_DSS && dw_cxt->fd > 0) { + int rc = close(dw_cxt->fd); + if (rc == -1) { + ereport(ERROR, (errcode_for_file_access(), errmodule(MOD_DW), errmsg("DW file close failed"))); + } + } + ereport(LOG, (errmodule(MOD_DW), errmsg("start remove dw_batch_meta_file."))); dw_remove_file(DW_META_FILE); } @@ -2052,6 +2063,9 @@ void dw_transfer_phybuffer_addr(const BufferDesc *buf_desc, BufferTag *buf_tag) Assert(buf_desc->seg_fileno <= EXTENT_TYPES && buf_desc->seg_fileno > EXTENT_INVALID); buf_tag->rnode.relNode = buf_desc->seg_fileno; buf_tag->blockNum = buf_desc->seg_blockno; + } else if (ENABLE_REFORM && SS_BEFORE_RECOVERY) { + buf_tag->rnode.relNode = buf_desc->seg_fileno; + buf_tag->blockNum = buf_desc->seg_blockno; } else { SegPageLocation loc = seg_get_physical_location(buf_desc->tag.rnode, buf_desc->tag.forkNum, buf_desc->tag.blockNum); diff --git a/src/gausskernel/storage/access/transam/extreme_rto/dispatcher.cpp b/src/gausskernel/storage/access/transam/extreme_rto/dispatcher.cpp index 60df839a6..79049a699 100755 --- a/src/gausskernel/storage/access/transam/extreme_rto/dispatcher.cpp +++ b/src/gausskernel/storage/access/transam/extreme_rto/dispatcher.cpp @@ -311,7 +311,7 @@ void AllocRecordReadBuffer(XLogReaderState *xlogreader, uint32 privateLen) g_dispatcher->rtoXlogBufState.targetRecPtr = InvalidXLogRecPtr; g_dispatcher->rtoXlogBufState.expectLsn = InvalidXLogRecPtr; g_dispatcher->rtoXlogBufState.waitRedoDone = 0; - g_dispatcher->rtoXlogBufState.readsegbuf = (char *)palloc0(XLOG_SEG_SIZE * MAX_ALLOC_SEGNUM); + g_dispatcher->rtoXlogBufState.readsegbuf = (char *)palloc0(XLogSegSize * MAX_ALLOC_SEGNUM); g_dispatcher->rtoXlogBufState.readBuf = (char *)palloc0(XLOG_BLCKSZ); g_dispatcher->rtoXlogBufState.readprivate = (void *)palloc0(MAXALIGN(privateLen)); errorno = memset_s(g_dispatcher->rtoXlogBufState.readprivate, MAXALIGN(privateLen), 0, MAXALIGN(privateLen)); @@ -323,7 +323,7 @@ void AllocRecordReadBuffer(XLogReaderState *xlogreader, uint32 privateLen) char *readsegbuf = g_dispatcher->rtoXlogBufState.readsegbuf; for (uint32 i = 0; i < MAX_ALLOC_SEGNUM; i++) { g_dispatcher->rtoXlogBufState.xlogsegarray[i].readsegbuf = readsegbuf; - readsegbuf += XLOG_SEG_SIZE; + readsegbuf += XLogSegSize; g_dispatcher->rtoXlogBufState.xlogsegarray[i].bufState = NONE; } diff --git a/src/gausskernel/storage/access/transam/extreme_rto/page_redo.cpp b/src/gausskernel/storage/access/transam/extreme_rto/page_redo.cpp index e57bebcac..6ec226624 100755 --- a/src/gausskernel/storage/access/transam/extreme_rto/page_redo.cpp +++ b/src/gausskernel/storage/access/transam/extreme_rto/page_redo.cpp @@ -69,6 +69,7 @@ #include "replication/walreceiver.h" #include "replication/datareceiver.h" #include "pgxc/barrier.h" +#include "storage/file/fio_device.h" #ifdef ENABLE_MOT #include "storage/mot/mot_fdw.h" #endif @@ -1632,7 +1633,7 @@ void ResetRtoXlogReadBuf(XLogRecPtr targetPagePtr) XLogSegNo segno; XLByteToSeg(targetPagePtr, segno); g_recordbuffer->xlogsegarray[g_recordbuffer->applyindex].segno = segno; - g_recordbuffer->xlogsegarray[g_recordbuffer->applyindex].readlen = targetPagePtr % XLOG_SEG_SIZE; + g_recordbuffer->xlogsegarray[g_recordbuffer->applyindex].readlen = targetPagePtr % XLogSegSize; pg_atomic_write_u32(&(g_recordbuffer->readindex), g_recordbuffer->applyindex); pg_atomic_write_u32(&(g_recordbuffer->xlogsegarray[g_recordbuffer->readindex].bufState), APPLYING); @@ -1652,7 +1653,7 @@ RecordBufferAarray *GetCurrentSegmentBuf(XLogRecPtr targetPagePtr) if (bufState != APPLYING) { return NULL; } - uint32 targetPageOff = (targetPagePtr % XLOG_SEG_SIZE); + uint32 targetPageOff = (targetPagePtr % XLogSegSize); XLogSegNo targetSegNo; XLByteToSeg(targetPagePtr, targetSegNo); if (cursegbuffer->segno == targetSegNo) { @@ -1734,7 +1735,7 @@ void XLogReadWorkerSegFallback(XLogSegNo lastRplSegNo) pg_atomic_write_u32(&(readseg->bufState), APPLIED); applyseg->segno = lastRplSegNo; applyseg->readlen = applyseg->segoffset; - errorno = memset_s(applyseg->readsegbuf, XLOG_SEG_SIZE, 0, XLOG_SEG_SIZE); + errorno = memset_s(applyseg->readsegbuf, XLogSegSize, 0, XLogSegSize); securec_check(errorno, "", ""); } @@ -2072,9 +2073,9 @@ static void XLogReadWorkRun() writeoffset = readseg->readlen; if (targetSegNo != readseg->segno) { - reqlen = XLOG_SEG_SIZE - writeoffset; + reqlen = XLogSegSize - writeoffset; } else { - uint32 targetPageOff = receivedUpto % XLOG_SEG_SIZE; + uint32 targetPageOff = receivedUpto % XLogSegSize; if (targetPageOff <= writeoffset) { pg_usleep(sleepTime); return; @@ -2092,7 +2093,7 @@ static void XLogReadWorkRun() waitcount = 0; char *readBuf = readseg->readsegbuf + writeoffset; - XLogRecPtr targetSartPtr = readseg->segno * XLOG_SEG_SIZE + writeoffset; + XLogRecPtr targetSartPtr = readseg->segno * XLogSegSize + writeoffset; uint32 readlen = 0; GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_2]); bool result = XLogReadFromWriteBuffer(targetSartPtr, reqlen, readBuf, &readlen); @@ -2102,7 +2103,7 @@ static void XLogReadWorkRun() } pg_atomic_write_u32(&(readseg->readlen), (writeoffset + readlen)); - if (readseg->readlen == XLOG_SEG_SIZE) { + if (readseg->readlen == XLogSegSize) { GetRedoStartTime(g_redoWorker->timeCostList[TIME_COST_STEP_3]); InitReadBuf(readindex + 1, readseg->segno + 1); CountRedoTime(g_redoWorker->timeCostList[TIME_COST_STEP_3]); @@ -3000,4 +3001,4 @@ void SeqCheckRemoteReadAndRepairPage() } } -} // namespace extreme_rto +} // namespace extreme_rto \ No newline at end of file diff --git a/src/gausskernel/storage/access/transam/multixact.cpp b/src/gausskernel/storage/access/transam/multixact.cpp index eb4c8d685..04782bbde 100644 --- a/src/gausskernel/storage/access/transam/multixact.cpp +++ b/src/gausskernel/storage/access/transam/multixact.cpp @@ -495,6 +495,10 @@ void MultiXactIdSetOldestMember(void) */ static void MultiXactIdSetOldestVisible(void) { + if (ENABLE_DMS && t_thrd.role == DMS_WORKER) { + return; + } + if (!MultiXactIdIsValid(t_thrd.shemem_ptr_cxt.OldestVisibleMXactId[t_thrd.proc_cxt.MyBackendId])) { MultiXactId oldestMXact; int i; @@ -761,6 +765,11 @@ static MultiXactId CreateMultiXactId(int nmembers, MultiXactMember *members) */ static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, int nmembers, TransactionId *xidsWithStatus) { + if (SS_STANDBY_MODE) { + ereport(WARNING, (errmodule(MOD_DMS), errmsg("DMS standby can't set newmultixact status"))); + return; + } + int64 pageno; int64 prev_pageno; int entryno; @@ -1177,6 +1186,10 @@ static MultiXactId mXactCacheGetBySet(int nmembers, MultiXactMember *members) */ static int mXactCacheGetById(MultiXactId multi, MultiXactMember **members) { + if (ENABLE_DMS && t_thrd.role == DMS_WORKER) { + return -1; + } + mXactCacheEnt *entry = NULL; errno_t rc = EOK; @@ -1209,6 +1222,10 @@ static int mXactCacheGetById(MultiXactId multi, MultiXactMember **members) */ static void mXactCachePut(MultiXactId multi, int nmembers, MultiXactMember *members) { + if (ENABLE_DMS && t_thrd.role == DMS_WORKER) { + return; + } + mXactCacheEnt *entry = NULL; errno_t rc = EOK; @@ -1422,8 +1439,13 @@ Size MultiXactShmemSize(void) add_size(sizeof(MultiXactStateData), mul_size(sizeof(MultiXactId) * 2, MaxOldestSlot)) size = SHARED_MULTIXACT_STATE_SIZE; - size = add_size(size, SimpleLruShmemSize(NUM_MXACTOFFSET_BUFFERS, 0)); - size = add_size(size, SimpleLruShmemSize(NUM_MXACTMEMBER_BUFFERS, 0)); + if (ENABLE_DSS) { + size = add_size(size, SimpleLruShmemSize(DSS_MAX_MXACTOFFSET, 0)); + size = add_size(size, SimpleLruShmemSize(DSS_MAX_MXACTMEMBER, 0)); + } else { + size = add_size(size, SimpleLruShmemSize(NUM_MXACTOFFSET_BUFFERS, 0)); + size = add_size(size, SimpleLruShmemSize(NUM_MXACTMEMBER_BUFFERS, 0)); + } return size; } @@ -1432,15 +1454,29 @@ void MultiXactShmemInit(void) { bool found = false; errno_t rc = EOK; + char path[MAXPGPATH]; debug_elog2(DEBUG2, "Shared Memory Init for MultiXact"); - SimpleLruInit(t_thrd.shemem_ptr_cxt.MultiXactOffsetCtl, GetBuiltInTrancheName(LWTRANCHE_MULTIXACTOFFSET_CTL), - LWTRANCHE_MULTIXACTOFFSET_CTL, NUM_MXACTOFFSET_BUFFERS, 0, MultiXactOffsetControlLock, - "pg_multixact/offsets"); - SimpleLruInit(t_thrd.shemem_ptr_cxt.MultiXactMemberCtl, GetBuiltInTrancheName(LWTRANCHE_MULTIXACTMEMBER_CTL), - LWTRANCHE_MULTIXACTMEMBER_CTL, NUM_MXACTMEMBER_BUFFERS, 0, MultiXactMemberControlLock, - "pg_multixact/members"); + rc = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "%s/offsets", MULTIXACTDIR); + securec_check_ss(rc, "\0", "\0"); + if (ENABLE_DSS) { + SimpleLruInit(t_thrd.shemem_ptr_cxt.MultiXactOffsetCtl, GetBuiltInTrancheName(LWTRANCHE_MULTIXACTOFFSET_CTL), + LWTRANCHE_MULTIXACTOFFSET_CTL, DSS_MAX_MXACTOFFSET, 0, MultiXactOffsetControlLock, path); + } else { + SimpleLruInit(t_thrd.shemem_ptr_cxt.MultiXactOffsetCtl, GetBuiltInTrancheName(LWTRANCHE_MULTIXACTOFFSET_CTL), + LWTRANCHE_MULTIXACTOFFSET_CTL, NUM_MXACTOFFSET_BUFFERS, 0, MultiXactOffsetControlLock, path); + } + + rc = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "%s/members", MULTIXACTDIR); + securec_check_ss(rc, "\0", "\0"); + if (ENABLE_DSS) { + SimpleLruInit(t_thrd.shemem_ptr_cxt.MultiXactMemberCtl, GetBuiltInTrancheName(LWTRANCHE_MULTIXACTMEMBER_CTL), + LWTRANCHE_MULTIXACTMEMBER_CTL, DSS_MAX_MXACTMEMBER, 0, MultiXactMemberControlLock, path); + } else { + SimpleLruInit(t_thrd.shemem_ptr_cxt.MultiXactMemberCtl, GetBuiltInTrancheName(LWTRANCHE_MULTIXACTMEMBER_CTL), + LWTRANCHE_MULTIXACTMEMBER_CTL, NUM_MXACTMEMBER_BUFFERS, 0, MultiXactMemberControlLock, path); + } /* Initialize our shared state struct */ t_thrd.shemem_ptr_cxt.MultiXactState = (MultiXactStateData *)ShmemInitStruct("Shared MultiXact State", @@ -1936,6 +1972,11 @@ static bool SlruScanDirCbFindEarliest(SlruCtl ctl, const char* filename, int64 s */ void TruncateMultiXact(MultiXactId oldestMXact) { + if (SS_STANDBY_MODE) { + ereport(WARNING, (errmodule(MOD_DMS), errmsg("DMS standby can't truncate multixact status"))); + return; + } + MultiXactOffset oldestOffset; #ifndef ENABLE_MULTIPLE_NODES @@ -2128,3 +2169,23 @@ void multixact_redo(XLogReaderState *record) ereport(PANIC, (errmsg("multixact_redo: unknown op code %u", (uint32)info))); } } + +void SSMultiXactShmemClear(void) +{ + errno_t rc = EOK; + char path[MAXPGPATH]; + + debug_elog2(DEBUG2, "Shared Memory Init for MultiXact"); + + rc = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "%s/offsets", MULTIXACTDIR); + securec_check_ss(rc, "\0", "\0"); + SimpleLruSetPageEmpty(t_thrd.shemem_ptr_cxt.MultiXactOffsetCtl, + GetBuiltInTrancheName(LWTRANCHE_MULTIXACTOFFSET_CTL), LWTRANCHE_MULTIXACTOFFSET_CTL, DSS_MAX_MXACTOFFSET, 0, + MultiXactOffsetControlLock, path); + + rc = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "%s/members", MULTIXACTDIR); + securec_check_ss(rc, "\0", "\0"); + SimpleLruSetPageEmpty(t_thrd.shemem_ptr_cxt.MultiXactMemberCtl, + GetBuiltInTrancheName(LWTRANCHE_MULTIXACTMEMBER_CTL), LWTRANCHE_MULTIXACTMEMBER_CTL, DSS_MAX_MXACTMEMBER, 0, + MultiXactMemberControlLock, path); +} \ No newline at end of file diff --git a/src/gausskernel/storage/access/transam/slru.cpp b/src/gausskernel/storage/access/transam/slru.cpp index 32ada6e26..8d180c062 100644 --- a/src/gausskernel/storage/access/transam/slru.cpp +++ b/src/gausskernel/storage/access/transam/slru.cpp @@ -57,11 +57,13 @@ #include "access/slru.h" #include "access/transam.h" #include "access/xlog.h" +#include "access/csnlog.h" #include "storage/smgr/fd.h" #include "storage/shmem.h" #include "miscadmin.h" #include "pgstat.h" #include "utils/builtins.h" +#include "storage/file/fio_device.h" /* * During SimpleLruFlush(), we will usually not need to write/fsync more @@ -531,8 +533,10 @@ static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata) /* Acquire per-buffer lock (cannot deadlock, see notes at top) */ (void)LWLockAcquire(shared->buffer_locks[slotno], LW_EXCLUSIVE); - /* Release control lock while doing I/O */ - LWLockRelease(shared->control_lock); + if (!ENABLE_DSS) { + /* Release control lock while doing I/O */ + LWLockRelease(shared->control_lock); + } /* Do the write */ ok = SlruPhysicalWritePage(ctl, pageno, slotno, fdata); @@ -544,8 +548,10 @@ static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata) (void)close(fdata->fd[i]); } - /* Re-acquire control lock and update page state */ - (void)LWLockAcquire(shared->control_lock, LW_EXCLUSIVE); + if (!ENABLE_DSS) { + /* Re-acquire control lock and update page state */ + (void)LWLockAcquire(shared->control_lock, LW_EXCLUSIVE); + } if (!(shared->page_number[slotno] == pageno && shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS)) ereport(PANIC, @@ -657,6 +663,41 @@ static bool SlruPhysicalReadPage(SlruCtl ctl, int64 pageno, int slotno) return true; } +static bool SSPreAllocSegment(int fd, SlruFlush fdata) +{ + struct stat s; + if (fstat(fd, &s) < 0) { + t_thrd.xact_cxt.slru_errcause = SLRU_OPEN_FAILED; + t_thrd.xact_cxt.slru_errno = errno; + if (fdata == NULL) { + (void)close(fd); + } + return false; + } + + int64 trunc_size = (int64)(SLRU_PAGES_PER_SEGMENT * BLCKSZ); + if (s.st_size < trunc_size) { + /* extend file at once to avoid dss cross-border write issue */ + pgstat_report_waitevent(WAIT_EVENT_SLRU_WRITE); + errno = 0; + if (fallocate(fd, 0, s.st_size, trunc_size) != 0) { + pgstat_report_waitevent(WAIT_EVENT_END); + if (errno == 0) { + errno = ENOSPC; + } + t_thrd.xact_cxt.slru_errcause = SLRU_WRITE_FAILED; + t_thrd.xact_cxt.slru_errno = errno; + if (fdata == NULL) { + (void)close(fd); + } + return false; + } + pgstat_report_waitevent(WAIT_EVENT_END); + } + + return true; +} + /* * Physical write of a page from a buffer slot * @@ -777,11 +818,34 @@ static bool SlruPhysicalWritePage(SlruCtl ctl, int64 pageno, int slotno, SlruFlu } if (lseek(fd, (off_t)offset, SEEK_SET) < 0) { - t_thrd.xact_cxt.slru_errcause = SLRU_SEEK_FAILED; - t_thrd.xact_cxt.slru_errno = errno; - if (fdata == NULL) - (void)close(fd); - return false; + bool failed = true; + if (ENABLE_DSS && errno == ERR_DSS_FILE_SEEK) { + if (!SSPreAllocSegment(fd, fdata)) { + return false; + } + if (lseek(fd, (off_t)offset, SEEK_SET) >= 0) { + failed = false; + } + } + + if (failed) { + t_thrd.xact_cxt.slru_errcause = SLRU_SEEK_FAILED; + t_thrd.xact_cxt.slru_errno = errno; + if (fdata == NULL) { + (void)close(fd); + } + return false; + } + } + + if (SS_STANDBY_PROMOTING) { + ereport(WARNING, (errmodule(MOD_DMS), errmsg("DMS standby can't write slru page for switchover"))); + return true; + } + + if (SS_STANDBY_MODE && strlen(shared->page_buffer[slotno])) { + force_backtrace_messages = true; + ereport(PANIC, (errmodule(MOD_DMS), errmsg("DMS standby can't write to disk"))); } errno = 0; @@ -1078,11 +1142,19 @@ int SimpleLruFlush(SlruCtl ctl, bool checkpoint) */ void SimpleLruTruncate(SlruCtl ctl, int64 cutoffPage, int partitionNum) { + if (SS_STANDBY_MODE) { + ereport(WARNING, (errmodule(MOD_DMS), errmsg("DMS standby can't truncate slru page"))); + return; + } + SlruShared shared = NULL; int64 slotno; - bool isCsnLogCtl = strcmp(ctl->dir, "pg_csnlog") == 0; + bool isCsnLogCtl = false; bool isPart = (partitionNum > NUM_SLRU_DEFAULT_PARTITION); + /* check whether the slru file is csnlog */ + isCsnLogCtl = strcmp(ctl->dir, CSNLOGDIR) == 0; + /* * The cutoff point is the start of the segment containing cutoffPage. */ @@ -1245,6 +1317,18 @@ bool SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, const void *data) return retval; } +void SimpleLruSetPageEmpty(SlruCtl ctl, const char *name, int trancheId, int nslots, int nlsns, LWLock *ctllock, + const char *subdir, int index) +{ + bool found = false; + int slotno; + SlruShared shared = (SlruShared)ShmemInitStruct(name, SimpleLruShmemSize(nslots, nlsns), &found); + + for (slotno = 0; slotno < nslots; slotno++) { + shared->page_status[slotno] = SLRU_PAGE_EMPTY; + } +} + #ifdef ENABLE_UT void ut_SetErrCause(int errcause) diff --git a/src/gausskernel/storage/access/transam/transam.cpp b/src/gausskernel/storage/access/transam/transam.cpp index 1814cb903..21d05d224 100644 --- a/src/gausskernel/storage/access/transam/transam.cpp +++ b/src/gausskernel/storage/access/transam/transam.cpp @@ -33,6 +33,8 @@ #include "utils/snapmgr.h" #include "replication/walreceiver.h" #include "storage/procarray.h" +#include "ddes/dms/ss_transaction.h" +#include "ddes/dms/ss_common_attr.h" #ifdef PGXC #include "utils/builtins.h" @@ -180,7 +182,7 @@ RETRY: (errmsg("TransactionIdGetCommitSeqNo: " "Treat CSN as frozen when csnlog file cannot be found for the given xid: %lu csn: %lu", transactionId, result))); - } else if (GTM_LITE_MODE && retry_times == 0) { + } else if ((GTM_LITE_MODE || (ENABLE_DMS && t_thrd.role == DMS_WORKER)) && retry_times == 0) { t_thrd.int_cxt.InterruptHoldoffCount = saveInterruptHoldoffCount; FlushErrorState(); ereport(LOG, (errmsg("recentGlobalXmin has been updated, csn log may be truncated, try clog, xid" @@ -391,6 +393,10 @@ Datum pgxc_get_csn(PG_FUNCTION_ARGS) */ bool TransactionIdDidCommit(TransactionId transactionId) /* true if given transaction committed */ { + if (SS_STANDBY_MODE) { + return SSTransactionIdDidCommit(transactionId); + } + CLogXidStatus xidstatus; xidstatus = TransactionLogFetch(transactionId); diff --git a/src/gausskernel/storage/access/transam/twophase.cpp b/src/gausskernel/storage/access/transam/twophase.cpp index 63ca29730..2f334aee9 100644 --- a/src/gausskernel/storage/access/transam/twophase.cpp +++ b/src/gausskernel/storage/access/transam/twophase.cpp @@ -126,11 +126,12 @@ #include "storage/mot/mot_fdw.h" #endif #include "instruments/instr_statement.h" +#include "storage/file/fio_device.h" /* * Directory where Two-phase commit files reside within PGDATA */ -#define TWOPHASE_DIR "pg_twophase" +#define TWOPHASE_DIR (g_instance.datadir_cxt.twophaseDir) int PendingPreparedXactsCount = 0; @@ -2070,7 +2071,7 @@ static char *ReadTwoPhaseFile(TransactionId xid, bool give_warnings) pg_crc32 calc_crc, file_crc; errno_t rc; - rc = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, TWOPHASE_DIR "/%08X%08X", (uint32)(xid >> 32), (uint32)xid); + rc = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "%s/%08X%08X", TWOPHASE_DIR, (uint32)(xid >> 32), (uint32)xid); securec_check_ss(rc, "", ""); fd = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0); @@ -2864,7 +2865,7 @@ static void RemoveTwoPhaseFile(TransactionId xid, bool giveWarning) char path[MAXPGPATH]; errno_t rc = EOK; - rc = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, TWOPHASE_DIR "/%08X%08X", (uint32)(xid >> 32), (uint32)xid); + rc = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "%s/%08X%08X", TWOPHASE_DIR, (uint32)(xid >> 32), (uint32)xid); securec_check_ss(rc, "", ""); if (unlink(path)) { if (errno != ENOENT || giveWarning) { @@ -2892,7 +2893,7 @@ static void RecreateTwoPhaseFile(TransactionId xid, void *content, int len) COMP_CRC32(statefile_crc, content, len); FIN_CRC32(statefile_crc); - rc = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, TWOPHASE_DIR "/%08X%08X", (uint32)(xid >> 32), (uint32)xid); + rc = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "%s/%08X%08X", TWOPHASE_DIR, (uint32)(xid >> 32), (uint32)xid); securec_check_ss(rc, "", ""); fd = BasicOpenFile(path, O_CREAT | O_TRUNC | O_WRONLY | PG_BINARY, S_IRUSR | S_IWUSR); diff --git a/src/gausskernel/storage/access/transam/varsup.cpp b/src/gausskernel/storage/access/transam/varsup.cpp index 5b39435ff..059f25f56 100644 --- a/src/gausskernel/storage/access/transam/varsup.cpp +++ b/src/gausskernel/storage/access/transam/varsup.cpp @@ -135,6 +135,10 @@ TransactionId GetNewTransactionId(bool isSubXact) ereport(ERROR, (errcode(ERRCODE_INVALID_TRANSACTION_INITIATION), errmsg("cannot assign TransactionIds during streaming disaster recovery"))); } + if (SSIsServerModeReadOnly()) { + ereport(ERROR, (errcode(ERRCODE_INVALID_TRANSACTION_INITIATION), + errmsg("cannot assign TransactionIds at Standby with DMS enabled"))); + } (void)LWLockAcquire(XidGenLock, LW_EXCLUSIVE); xid = t_thrd.xact_cxt.ShmemVariableCache->nextXid; @@ -346,6 +350,9 @@ Oid GetNewObjectId(bool IsToastRel) if (RecoveryInProgress()) ereport(ERROR, (errcode(ERRCODE_INVALID_TRANSACTION_INITIATION), errmsg("cannot assign OIDs during recovery"))); + if (SSIsServerModeReadOnly()) { + ereport(ERROR, (errmsg("cannot assign OIDs at Standby with DMS enabled"))); + } /* * During inplace or online upgrade, if newly added system objects are * to be pinned, we set their oids by GUC parameters, such as diff --git a/src/gausskernel/storage/access/transam/xact.cpp b/src/gausskernel/storage/access/transam/xact.cpp index e2c3ea975..0773e66fb 100755 --- a/src/gausskernel/storage/access/transam/xact.cpp +++ b/src/gausskernel/storage/access/transam/xact.cpp @@ -105,6 +105,7 @@ #include "commands/sequence.h" #include "postmaster/bgworker.h" #include "replication/walreceiver.h" +#include "ddes/dms/ss_common_attr.h" #ifdef ENABLE_MULTIPLE_NODES #include "tsdb/cache/queryid_cachemgr.h" #include "tsdb/cache/part_cachemgr.h" @@ -858,7 +859,7 @@ static void AssignTransactionId(TransactionState s) log_unknown_top = true; /* allocate undo zone before generate a new xid. */ - if (!isSubXact && IsUnderPostmaster) { + if (!isSubXact && IsUnderPostmaster && !ENABLE_DSS) { undo::AllocateUndoZone(); pg_memory_barrier(); } @@ -1453,6 +1454,10 @@ void UpdateNextMaxKnownCSN(CommitSeqNo csn) * GTM mode update nextCommitSeqNo in UpdateCSNAtTransactionCommit. * GTM-Lite mode update nextCommitSeqNo in this function. */ + if (ENABLE_DMS) { + return; + } + if (!GTM_LITE_MODE) { return; } @@ -2451,6 +2456,17 @@ static void StartTransaction(bool begin_on_gtm) u_sess->attr.attr_common.XactReadOnly = false; } } + + if (ENABLE_DMS) { + if (u_sess->attr.attr_common.DefaultXactIsoLevel != XACT_READ_COMMITTED) { + ereport(ERROR, + (errmsg("Only support read committed transcation isolation level while DMS and DSS enabled."))); + } + if (!SS_MY_INST_IS_MASTER) { + u_sess->attr.attr_common.XactReadOnly = true; + } + } + u_sess->attr.attr_storage.XactDeferrable = u_sess->attr.attr_storage.DefaultXactDeferrable; #ifdef PGXC /* PGXC - PGXC doesn't support 9.1 serializable transactions. They are diff --git a/src/gausskernel/storage/access/transam/xlog.cpp b/src/gausskernel/storage/access/transam/xlog.cpp index 6b209e6ef..aff1badd1 100755 --- a/src/gausskernel/storage/access/transam/xlog.cpp +++ b/src/gausskernel/storage/access/transam/xlog.cpp @@ -13,17 +13,16 @@ * * ------------------------------------------------------------------------- */ +#include "c.h" #include "postgres.h" #include "knl/knl_variable.h" #include #include #include -#include #include #include #include -#include #include #include #include @@ -154,6 +153,10 @@ #include "access/extreme_rto/spsc_blocking_queue.h" #include "access/extreme_rto/page_redo.h" #include "vectorsonic/vsonichash.h" + +#include "ddes/dms/ss_reform_common.h" +#include "ddes/dms/ss_dms_recovery.h" +#include "storage/file/fio_device.h" #ifdef ENABLE_UT #define STATIC #else @@ -175,11 +178,6 @@ #define RecoveryFromDummyStandby() (t_thrd.postmaster_cxt.ReplConnArray[2] != NULL && IS_DN_DUMMY_STANDYS_MODE()) -#define CHECKPOINT_LEN (SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint)) -#define CHECKPOINTNEW_LEN (SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPointNew)) -#define CHECKPOINTPLUS_LEN (SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPointPlus)) -#define CHECKPOINTUNDO_LEN (SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPointUndo)) - /* MaxMacAddrList controls GetMACAddrHash function to get the max mac number */ #define MaxMacAddrList 10 @@ -264,7 +262,6 @@ static void remove_xlogtemp_files(void); static bool validate_parse_delay_ddl_file(DelayDDLRange *delayRange); static bool write_delay_ddl_file(const DelayDDLRange &delayRange, bool onErrDelete); extern void CalculateLocalLatestSnapshot(bool forceCalc); -void update_dirty_page_queue_rec_lsn(XLogRecPtr current_insert_lsn, bool need_immediately_update = false); /* * Calculate the amount of space left on the page after 'endptr'. Beware @@ -300,7 +297,7 @@ void update_dirty_page_queue_rec_lsn(XLogRecPtr current_insert_lsn, bool need_im */ #define UsableBytesInPage (XLOG_BLCKSZ - SizeOfXLogShortPHD) #define UsableBytesInSegment \ - ((XLOG_SEG_SIZE / XLOG_BLCKSZ) * UsableBytesInPage - (SizeOfXLogLongPHD - SizeOfXLogShortPHD)) + ((XLogSegSize / XLOG_BLCKSZ) * UsableBytesInPage - (SizeOfXLogLongPHD - SizeOfXLogShortPHD)) /* * Add xlog reader private structure for page read. @@ -379,7 +376,6 @@ static void ValidateXLOGDirectoryStructure(void); static void CleanupBackupHistory(void); static XLogRecord *ReadRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr, int emode, bool fetching_ckpt); void CheckRecoveryConsistency(void); -static XLogRecord *ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr, int whichChkpt); static bool existsTimeLineHistory(TimeLineID probeTLI); static bool rescanLatestTimeLine(void); static TimeLineID findNewestTimeLine(TimeLineID startTLI); @@ -651,6 +647,10 @@ static XLogRecPtr XLogInsertRecordGroup(XLogRecData *rdata, XLogRecPtr fpw_lsn) ereport(ERROR, (errcode(ERRCODE_CASE_NOT_FOUND), errmsg("cannot make new WAL entries during recovery"))); } + if (!SSXLogInsertAllowed()) { + ereport(FATAL, (errmsg("SS standby cannot insert XLOG entries"))); + } + START_CRIT_SECTION(); /* Add ourselves to the list of processes needing a group xlog status update. */ @@ -1083,6 +1083,10 @@ static XLogRecPtr XLogInsertRecordSingle(XLogRecData *rdata, XLogRecPtr fpw_lsn) ereport(ERROR, (errcode(ERRCODE_CASE_NOT_FOUND), errmsg("cannot make new WAL entries during recovery"))); } + if (!SSXLogInsertAllowed()) { + ereport(FATAL, (errmsg("SS standby cannot insert XLOG entries"))); + } + /* ---------- * * We have now done all the preparatory work we can without holding a @@ -1276,7 +1280,7 @@ static XLogRecPtr XLogInsertRecordSingle(XLogRecData *rdata, XLogRecPtr fpw_lsn) if (inserted) { EndPos = StartPos + MAXALIGN(SizeOfXLogRecord); if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ && EndPos % XLOG_BLCKSZ != 0) { - if (EndPos % XLOG_SEG_SIZE == EndPos % XLOG_BLCKSZ) { + if (EndPos % XLogSegSize == EndPos % XLOG_BLCKSZ) { EndPos += SizeOfXLogLongPHD; } else { EndPos += SizeOfXLogShortPHD; @@ -1485,7 +1489,7 @@ loop: startbytepos = compare.u64[0]; ptr = XLogBytePosToEndRecPtr(startbytepos); - if (ptr % XLOG_SEG_SIZE == 0) { + if (ptr % XLogSegSize == 0) { *EndPos = *StartPos = ptr; return false; } @@ -1497,8 +1501,8 @@ loop: *StartPos = XLogBytePosToRecPtr(startbytepos); *EndPos = XLogBytePosToEndRecPtr(endbytepos); - segleft = XLOG_SEG_SIZE - ((*EndPos) % XLOG_SEG_SIZE); - if (segleft != XLOG_SEG_SIZE) { + segleft = XLogSegSize - ((*EndPos) % XLogSegSize); + if (segleft != XLogSegSize) { /* consume the rest of the segment */ *EndPos += segleft; endbytepos = XLogRecPtrToBytePos(*EndPos); @@ -1520,7 +1524,7 @@ loop: startbytepos = Insert->CurrBytePos; ptr = XLogBytePosToEndRecPtr(startbytepos); - if (ptr % XLOG_SEG_SIZE == 0) { + if (ptr % XLogSegSize == 0) { SpinLockRelease(&Insert->insertpos_lck); *EndPos = *StartPos = ptr; return false; @@ -1532,8 +1536,8 @@ loop: *StartPos = XLogBytePosToRecPtr(startbytepos); *EndPos = XLogBytePosToEndRecPtr(endbytepos); - segleft = XLOG_SEG_SIZE - ((*EndPos) % XLOG_SEG_SIZE); - if (segleft != XLOG_SEG_SIZE) { + segleft = XLogSegSize - ((*EndPos) % XLogSegSize); + if (segleft != XLogSegSize) { /* consume the rest of the segment */ *EndPos += segleft; endbytepos = XLogRecPtrToBytePos(*EndPos); @@ -1546,7 +1550,7 @@ loop: *PrevPtr = XLogBytePosToRecPtr(startbytepos - prevbytesize); - Assert((*EndPos) % XLOG_SEG_SIZE == 0); + Assert((*EndPos) % XLogSegSize == 0); Assert(XLogRecPtrToBytePos(*EndPos) == endbytepos); Assert(XLogRecPtrToBytePos(*StartPos) == startbytepos); Assert(XLogRecPtrToBytePos(*PrevPtr) == (startbytepos - prevbytesize)); @@ -1776,7 +1780,7 @@ static void CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rd * allocated and zeroed in the WAL buffers so that when the caller (or * someone else) does XLogWrite(), it can really write out all the zeros. */ - if (isLogSwitch && CurrPos % XLOG_SEG_SIZE != 0) { + if (isLogSwitch && CurrPos % XLogSegSize != 0) { int currlrc = *currlrc_ptr; /* An xlog-switch record doesn't contain any data besides the header */ Assert(write_len == SizeOfXLogRecord); @@ -2060,7 +2064,7 @@ static uint64 XLogRecPtrToBytePos(XLogRecPtr ptr) XLByteToSeg(ptr, fullsegs); - fullpages = (ptr % XLOG_SEG_SIZE) / XLOG_BLCKSZ; + fullpages = (ptr % XLogSegSize) / XLOG_BLCKSZ; offset = ptr % XLOG_BLCKSZ; if (fullpages == 0) { @@ -2098,7 +2102,7 @@ static void XLogArchiveNotify(const char *xlog) errno_t errorno = EOK; /* insert an otherwise empty file called .ready */ - errorno = snprintf_s(archiveStatusPath, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/archive_status/%s%s", xlog, ".ready"); + errorno = snprintf_s(archiveStatusPath, MAXPGPATH, MAXPGPATH - 1, "%s/%s%s", ARCHIVEDIR, xlog, ".ready"); securec_check_ss(errorno, "", ""); fd = AllocateFile(archiveStatusPath, "w"); @@ -2151,7 +2155,7 @@ void XLogArchiveForceDone(const char *xlog) errno_t errorno = EOK; /* Exit if already known done */ - errorno = snprintf_s(archiveDone, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/archive_status/%s%s", xlog, ".done"); + errorno = snprintf_s(archiveDone, MAXPGPATH, MAXPGPATH - 1, "%s/%s%s", ARCHIVEDIR, xlog, ".done"); securec_check_ss(errorno, "", ""); if (stat(archiveDone, &stat_buf) == 0) { @@ -2159,7 +2163,7 @@ void XLogArchiveForceDone(const char *xlog) } /* If .ready exists, rename it to .done */ - errorno = snprintf_s(archiveReady, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/archive_status/%s%s", xlog, ".ready"); + errorno = snprintf_s(archiveReady, MAXPGPATH, MAXPGPATH - 1, "%s/%s%s", ARCHIVEDIR, xlog, ".ready"); securec_check_ss(errorno, "", ""); if (stat(archiveReady, &stat_buf) == 0) { @@ -2208,21 +2212,21 @@ static bool XLogArchiveCheckDone(const char *xlog) } /* First check for .done --- this means archiver is done with it */ - errorno = snprintf_s(archiveStatusPath, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/archive_status/%s%s", xlog, ".done"); + errorno = snprintf_s(archiveStatusPath, MAXPGPATH, MAXPGPATH - 1, "%s/%s%s", ARCHIVEDIR, xlog, ".done"); securec_check_ss(errorno, "", ""); if (stat(archiveStatusPath, &stat_buf) == 0) { return true; } /* check for .ready --- this means archiver is still busy with it */ - errorno = snprintf_s(archiveStatusPath, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/archive_status/%s%s", xlog, ".ready"); + errorno = snprintf_s(archiveStatusPath, MAXPGPATH, MAXPGPATH - 1, "%s/%s%s", ARCHIVEDIR, xlog, ".ready"); securec_check_ss(errorno, "", ""); if (stat(archiveStatusPath, &stat_buf) == 0) { return false; } /* Race condition --- maybe archiver just finished, so recheck */ - errorno = snprintf_s(archiveStatusPath, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/archive_status/%s%s", xlog, ".done"); + errorno = snprintf_s(archiveStatusPath, MAXPGPATH, MAXPGPATH - 1, "%s/%s%s", ARCHIVEDIR, xlog, ".done"); securec_check_ss(errorno, "", ""); if (stat(archiveStatusPath, &stat_buf) == 0) { return true; @@ -2250,21 +2254,21 @@ static bool XLogArchiveIsBusy(const char *xlog) errno_t errorno = EOK; /* First check for .done --- this means archiver is done with it */ - errorno = snprintf_s(archiveStatusPath, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/archive_status/%s%s", xlog, ".done"); + errorno = snprintf_s(archiveStatusPath, MAXPGPATH, MAXPGPATH - 1, "%s/%s%s", ARCHIVEDIR, xlog, ".done"); securec_check_ss(errorno, "", ""); if (stat(archiveStatusPath, &stat_buf) == 0) { return false; } /* check for .ready --- this means archiver is still busy with it */ - errorno = snprintf_s(archiveStatusPath, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/archive_status/%s%s", xlog, ".ready"); + errorno = snprintf_s(archiveStatusPath, MAXPGPATH, MAXPGPATH - 1, "%s/%s%s", ARCHIVEDIR, xlog, ".ready"); securec_check_ss(errorno, "", ""); if (stat(archiveStatusPath, &stat_buf) == 0) { return true; } /* Race condition --- maybe archiver just finished, so recheck */ - errorno = snprintf_s(archiveStatusPath, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/archive_status/%s%s", xlog, ".done"); + errorno = snprintf_s(archiveStatusPath, MAXPGPATH, MAXPGPATH - 1, "%s/%s%s", ARCHIVEDIR, xlog, ".done"); securec_check_ss(errorno, "", ""); if (stat(archiveStatusPath, &stat_buf) == 0) { return false; @@ -2296,7 +2300,7 @@ bool XLogArchiveIsReady(const char *xlog) char archiveStatusPath[MAXPGPATH]; struct stat stat_buf; errno_t rc = EOK; - rc = snprintf_s(archiveStatusPath, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/archive_status/%s%s", xlog, ".ready"); + rc = snprintf_s(archiveStatusPath, MAXPGPATH, MAXPGPATH - 1, "%s/%s%s", ARCHIVEDIR, xlog, ".ready"); securec_check_ss(rc, "", ""); if (stat(archiveStatusPath, &stat_buf) == 0) { @@ -2317,12 +2321,12 @@ static void XLogArchiveCleanup(const char *xlog) errno_t errorno = EOK; /* Remove the .done file */ - errorno = snprintf_s(archiveStatusPath, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/archive_status/%s%s", xlog, ".done"); + errorno = snprintf_s(archiveStatusPath, MAXPGPATH, MAXPGPATH - 1, "%s/%s%s", ARCHIVEDIR, xlog, ".done"); securec_check_ss(errorno, "", ""); unlink(archiveStatusPath); /* Remove the .ready file if present --- normally it shouldn't be */ - errorno = snprintf_s(archiveStatusPath, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/archive_status/%s%s", xlog, ".ready"); + errorno = snprintf_s(archiveStatusPath, MAXPGPATH, MAXPGPATH - 1, "%s/%s%s", ARCHIVEDIR, xlog, ".ready"); securec_check_ss(errorno, "", ""); unlink(archiveStatusPath); } @@ -3204,6 +3208,10 @@ void XLogWaitFlush(XLogRecPtr recptr) return; } + if (!SSXLogInsertAllowed()) { + return; + } + volatile XLogRecPtr flushTo = gs_compare_and_swap_u64(&g_instance.wal_cxt.flushResult, 0, 0); while (XLByteLT(flushTo, recptr)) { @@ -3410,6 +3418,10 @@ bool XLogBackgroundFlush(void) return false; } + if (!SSXLogInsertAllowed()) { + return false; + } + #ifndef ENABLE_MULTIPLE_NODES if (g_instance.attr.attr_storage.dcf_attr.enable_dcf && t_thrd.dcf_cxt.dcfCtxInfo->isDcfStarted) { WriteRqstPtr = (XLogRecPtr)pg_atomic_barrier_read_u64(&curr_entry_ptr->endLSN); @@ -3698,7 +3710,7 @@ int XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) char path[MAXPGPATH]; char tmppath[MAXPGPATH]; char *zbuffer = NULL; - char zbuffer_raw[XLOG_BLCKSZ + MAXIMUM_ALIGNOF]; + char zbuffer_raw[XLOG_BLCKSZ + ALIGNOF_BUFFER]; XLogSegNo installed_segno; int max_advance; int fd; @@ -3706,7 +3718,7 @@ int XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) errno_t rc = EOK; gstrace_entry(GS_TRC_ID_XLogFileInit); - rc = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/%08X%08X%08X", t_thrd.xlog_cxt.ThisTimeLineID, + rc = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "%s/%08X%08X%08X", SS_XLOGDIR, t_thrd.xlog_cxt.ThisTimeLineID, (uint32)((logsegno) / XLogSegmentsPerXLogId), (uint32)((logsegno) % XLogSegmentsPerXLogId)); securec_check_ss(rc, "", ""); @@ -3715,7 +3727,7 @@ int XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) fd = BasicOpenFile(path, O_RDWR | PG_BINARY | (unsigned int)get_sync_bit(u_sess->attr.attr_storage.sync_method), S_IRUSR | S_IWUSR); if (fd < 0) { - if (errno != ENOENT) { + if (!FILE_POSSIBLY_DELETED(errno)) { ereport(ERROR, (errcode_for_file_access(), errmsg("could not open file \"%s\" (log segment %s): %m", path, XLogFileNameP(t_thrd.xlog_cxt.ThisTimeLineID, logsegno)))); @@ -3726,6 +3738,10 @@ int XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) } } + if (!SSXLogInsertAllowed()) { + ereport(FATAL, (errmsg("SS standby cannot init xlog files due to DSS"))); + } + /* * Initialize an empty (all zeroes) segment. NOTE: it is possible that * another process is doing the same thing. If so, we will end up @@ -3734,7 +3750,7 @@ int XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) */ ereport(DEBUG2, (errmsg("creating and filling new WAL file"))); - rc = snprintf_s(tmppath, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/xlogtemp.%lu", gs_thread_self()); + rc = snprintf_s(tmppath, MAXPGPATH, MAXPGPATH - 1, "%s/xlogtemp.%lu", SS_XLOGDIR, gs_thread_self()); securec_check_ss(rc, "\0", "\0"); unlink(tmppath); @@ -3757,24 +3773,40 @@ int XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) * Note: ensure the buffer is reasonably well-aligned; this may save a few * cycles transferring data to the kernel. */ - zbuffer = (char *)MAXALIGN(zbuffer_raw); + zbuffer = (char *)BUFFERALIGN(zbuffer_raw); rc = memset_s(zbuffer, XLOG_BLCKSZ, 0, XLOG_BLCKSZ); securec_check(rc, "\0", "\0"); - for (nbytes = 0; (uint32)nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ) { - errno = 0; - pgstat_report_waitevent(WAIT_EVENT_WAL_INIT_WRITE); - if ((int)write(fd, zbuffer, XLOG_BLCKSZ) != (int)XLOG_BLCKSZ) { - int save_errno = errno; - // If we fail to make the file, delete it to release disk space - unlink(tmppath); + if (is_dss_fd(fd)) { + /* extend file and fill space at once to avoid performance issue */ + pgstat_report_waitevent(WAIT_EVENT_WAL_INIT_WRITE); + errno = 0; + if (ftruncate(fd, XLogSegSize) != 0) { + int save_errno = errno; close(fd); + unlink(tmppath); /* if write didn't set errno, assume problem is no disk space */ errno = save_errno ? save_errno : ENOSPC; - ereport(ERROR, (errcode_for_file_access(), errmsg("could not write to file \"%s\": %m", tmppath))); } pgstat_report_waitevent(WAIT_EVENT_END); + } else { + for (nbytes = 0; (uint32)nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ) { + errno = 0; + pgstat_report_waitevent(WAIT_EVENT_WAL_INIT_WRITE); + if ((int)write(fd, zbuffer, XLOG_BLCKSZ) != (int)XLOG_BLCKSZ) { + int save_errno = errno; + + // If we fail to make the file, delete it to release disk space + close(fd); + unlink(tmppath); + /* if write didn't set errno, assume problem is no disk space */ + errno = save_errno ? save_errno : ENOSPC; + + ereport(ERROR, (errcode_for_file_access(), errmsg("could not write to file \"%s\": %m", tmppath))); + } + pgstat_report_waitevent(WAIT_EVENT_END); + } } pgstat_report_waitevent(WAIT_EVENT_WAL_INIT_SYNC); @@ -3853,7 +3885,7 @@ bool PreInitXlogFileForStandby(XLogRecPtr requestLsn) (uint32)lastWriteLsn))); lastWriteLsn = requestLsn; nextSegNo = 0; - } else if (requestLsn < lastWriteLsn + XLOG_SEG_SIZE) { + } else if (requestLsn < lastWriteLsn + XLogSegSize) { /* If the requestLsn is not more than one segement, skip! */ return false; } else { @@ -4092,7 +4124,7 @@ static bool InstallXLogFileSegment(XLogSegNo *segno, const char *tmppath, bool f struct stat stat_buf; errno_t errorno = EOK; - errorno = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/%08X%08X%08X", t_thrd.xlog_cxt.ThisTimeLineID, + errorno = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "%s/%08X%08X%08X", SS_XLOGDIR, t_thrd.xlog_cxt.ThisTimeLineID, (uint32)((*segno) / XLogSegmentsPerXLogId), (uint32)((*segno) % XLogSegmentsPerXLogId)); securec_check_ss(errorno, "", ""); @@ -4118,7 +4150,7 @@ static bool InstallXLogFileSegment(XLogSegNo *segno, const char *tmppath, bool f } (*segno)++; (*max_advance)--; - errorno = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/%08X%08X%08X", + errorno = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "%s/%08X%08X%08X", SS_XLOGDIR, t_thrd.xlog_cxt.ThisTimeLineID, (uint32)((*segno) / XLogSegmentsPerXLogId), (uint32)((*segno) % XLogSegmentsPerXLogId)); securec_check_ss(errorno, "", ""); @@ -4150,7 +4182,7 @@ int XLogFileOpen(XLogSegNo segno) int fd; errno_t errorno = EOK; - errorno = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/%08X%08X%08X", t_thrd.xlog_cxt.ThisTimeLineID, + errorno = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "%s/%08X%08X%08X", SS_XLOGDIR, t_thrd.xlog_cxt.ThisTimeLineID, (uint32)((segno) / XLogSegmentsPerXLogId), (uint32)((segno) % XLogSegmentsPerXLogId)); securec_check_ss(errorno, "", ""); @@ -4198,7 +4230,7 @@ static int XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli, int source, case XLOG_FROM_PG_XLOG: case XLOG_FROM_STREAM: - errorno = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/%08X%08X%08X", tli, + errorno = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "%s/%08X%08X%08X", SS_XLOGDIR, tli, (uint32)((segno) / XLogSegmentsPerXLogId), (uint32)((segno) % XLogSegmentsPerXLogId)); securec_check_ss(errorno, "", ""); t_thrd.xlog_cxt.restoredFromArchive = false; @@ -4217,7 +4249,7 @@ static int XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli, int source, KeepFileRestoredFromArchive((const char *)path, (const char *)xlogfname); // Set path to point at the new file in pg_xlog. - errorno = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/%s", xlogfname); + errorno = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "%s/%s", SS_XLOGDIR, xlogfname); securec_check_ss(errorno, "", ""); } @@ -4241,7 +4273,7 @@ static int XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli, int source, return fd; } - if (errno != ENOENT || !notfoundOk) { /* unexpected failure? */ + if (!FILE_POSSIBLY_DELETED(errno) || !notfoundOk) { /* unexpected failure? */ ereport(PANIC, (errcode_for_file_access(), errmsg("could not open file \"%s\" (log segment %s): %m", path, XLogFileNameP(t_thrd.xlog_cxt.ThisTimeLineID, segno)))); } @@ -4292,8 +4324,9 @@ static int XLogFileReadAnyTLI(XLogSegNo segno, int emode, uint32 sources) } /* Couldn't find it. For simplicity, complain about front timeline */ - errorno = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/%08X%08X%08X", t_thrd.xlog_cxt.recoveryTargetTLI, - (uint32)((segno) / XLogSegmentsPerXLogId), (uint32)((segno) % XLogSegmentsPerXLogId)); + errorno = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "%s/%08X%08X%08X", SS_XLOGDIR, + t_thrd.xlog_cxt.recoveryTargetTLI, (uint32)((segno) / XLogSegmentsPerXLogId), + (uint32)((segno) % XLogSegmentsPerXLogId)); securec_check_ss(errorno, "", ""); errno = ENOENT; @@ -4833,7 +4866,7 @@ static void remove_xlogtemp_files(void) struct stat st; errno_t errorno = EOK; - if ((dir = opendir(XLOGDIR)) != NULL) { + if ((dir = opendir(SS_XLOGDIR)) != NULL) { while ((de = readdir(dir)) != NULL) { /* Skip special stuff */ if (strncmp(de->d_name, ".", 1) == 0 || strncmp(de->d_name, "..", 2) == 0) { @@ -4844,11 +4877,11 @@ static void remove_xlogtemp_files(void) continue; } - errorno = snprintf_s(fullpath, sizeof(fullpath), sizeof(fullpath) - 1, XLOGDIR "/%s", de->d_name); + errorno = snprintf_s(fullpath, sizeof(fullpath), sizeof(fullpath) - 1, "%s/%s", SS_XLOGDIR, de->d_name); securec_check_ss(errorno, "\0", "\0"); if (lstat(fullpath, &st) != 0) { - if (errno != ENOENT) { + if (!FILE_POSSIBLY_DELETED(errno)) { ereport(WARNING, (errmsg("could not stat file or directory : %s", fullpath))); } /* If the file went away while scanning, it's not an error. */ @@ -4901,15 +4934,18 @@ static void UpdateLastRemovedPtr(const char *filename) */ static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr endptr) { + if (!SSXLogInsertAllowed()) + return; + DIR *xldir = NULL; struct dirent *xlde = NULL; char lastoff[MAXFNAMELEN]; errno_t errorno = EOK; - xldir = AllocateDir(XLOGDIR); + xldir = AllocateDir(SS_XLOGDIR); if (xldir == NULL) { ereport(ERROR, - (errcode_for_file_access(), errmsg("could not open transaction log directory \"%s\": %m", XLOGDIR))); + (errcode_for_file_access(), errmsg("could not open transaction log directory \"%s\": %m", SS_XLOGDIR))); } errorno = snprintf_s(lastoff, MAXFNAMELEN, MAXFNAMELEN - 1, "%08X%08X%08X", t_thrd.xlog_cxt.ThisTimeLineID, @@ -4974,7 +5010,7 @@ static void RemoveXlogFile(const char *segname, XLogRecPtr endptr) XLByteToPrevSeg(endptr, endLogSegNo); max_advance = XLOGfileslop; - errno_t errorno = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/%s", segname); + errno_t errorno = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "%s/%s", SS_XLOGDIR, segname); securec_check_ss(errorno, "", ""); /* @@ -5050,8 +5086,8 @@ static void ValidateXLOGDirectoryStructure(void) errno_t errorno = EOK; /* Check for pg_xlog; if it doesn't exist, error out */ - if (stat(XLOGDIR, &stat_buf) != 0 || !S_ISDIR(stat_buf.st_mode)) - ereport(FATAL, (errmsg("required WAL directory \"%s\" does not exist", XLOGDIR))); + if (stat(SS_XLOGDIR, &stat_buf) != 0 || !S_ISDIR(stat_buf.st_mode)) + ereport(FATAL, (errmsg("required WAL directory \"%s\" does not exist", SS_XLOGDIR))); /* Check for archive_status */ errorno = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/archive_status"); @@ -6021,6 +6057,56 @@ static TimeLineID findNewestTimeLine(TimeLineID startTLI) return newestTLI; } +static off_t GetControlPageByInstanceId() +{ + off_t seekpos = 0; + if (ENABLE_DSS) { + seekpos = (off_t)BLCKSZ * g_instance.attr.attr_storage.dms_attr.instance_id; + } + + return seekpos; +} + +void SSWriteInstanceControlFile(int fd, const char* buffer, int id, off_t wsize) +{ + errno = 0; + off_t seekpos = 0; + off_t actual_size = 0; + + /* find write postion of instance according to noid_id */ + if (id != -1) { + seekpos = (off_t)BLCKSZ * id; + + off_t fileSize = dss_get_file_size(XLOG_CONTROL_FILE); + ereport(LOG, (errmsg_internal("start fileSize = %ld", fileSize))); + if ((off_t)seekpos > fileSize) { + (void)dss_fallocate_file(fd, 0, fileSize, seekpos - fileSize); + } + + actual_size = lseek(fd, seekpos, SEEK_SET); + if (actual_size == -1) { + ereport(PANIC, (errcode_for_file_access(), + errmsg("write position %ld is larger than max write size in dss", seekpos))); + } + } + + pgstat_report_waitevent(WAIT_EVENT_CONTROL_FILE_WRITE); + if (write(fd, buffer, wsize) != wsize) { + /* if write didn't set errno, assume problem is no disk space */ + if (errno == 0) { + errno = ENOSPC; + } + ereport(PANIC, (errcode_for_file_access(), errmsg("could not write to control file: %m"))); + } + pgstat_report_waitevent(WAIT_EVENT_END); + + pgstat_report_waitevent((uint32)WAIT_EVENT_CONTROL_FILE_SYNC); + if (pg_fsync(fd) != 0) { + ereport(PANIC, (errcode_for_file_access(), errmsg("could not fsync control file: %m"))); + } + pgstat_report_waitevent(WAIT_EVENT_END); +} + /* * I/O routines for pg_control * @@ -6038,7 +6124,7 @@ static TimeLineID findNewestTimeLine(TimeLineID startTLI) STATIC void WriteControlFile(void) { int fd = -1; - char buffer[PG_CONTROL_SIZE]; /* need not be aligned */ + char buffer[PG_CONTROL_SIZE] __attribute__((__aligned__(ALIGNOF_BUFFER))); /* need to be aligned */ errno_t errorno = EOK; /* @@ -6053,7 +6139,7 @@ STATIC void WriteControlFile(void) t_thrd.shemem_ptr_cxt.ControlFile->blcksz = BLCKSZ; t_thrd.shemem_ptr_cxt.ControlFile->relseg_size = RELSEG_SIZE; t_thrd.shemem_ptr_cxt.ControlFile->xlog_blcksz = XLOG_BLCKSZ; - t_thrd.shemem_ptr_cxt.ControlFile->xlog_seg_size = XLOG_SEG_SIZE; + t_thrd.shemem_ptr_cxt.ControlFile->xlog_seg_size = XLogSegSize; t_thrd.shemem_ptr_cxt.ControlFile->nameDataLen = NAMEDATALEN; t_thrd.shemem_ptr_cxt.ControlFile->indexMaxKeys = INDEX_MAX_KEYS; @@ -6099,29 +6185,34 @@ STATIC void WriteControlFile(void) char current_absolute_path[MAX_SIZE] = {0}; getcwd(current_absolute_path, MAX_SIZE); #endif - + fd = BasicOpenFile(XLOG_CONTROL_FILE, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, S_IRUSR | S_IWUSR); if (fd < 0) { ereport(PANIC, (errcode_for_file_access(), errmsg("could not create control file \"%s\": %m", XLOG_CONTROL_FILE))); } - errno = 0; - pgstat_report_waitevent(WAIT_EVENT_CONTROL_FILE_WRITE); - if (write(fd, buffer, PG_CONTROL_SIZE) != PG_CONTROL_SIZE) { - /* if write didn't set errno, assume problem is no disk space */ - if (errno == 0) { - errno = ENOSPC; + /* create pg_control file */ + if (ENABLE_DSS) { + SSWriteInstanceControlFile(fd, buffer, g_instance.attr.attr_storage.dms_attr.instance_id, PG_CONTROL_SIZE); + } else { + errno = 0; + pgstat_report_waitevent(WAIT_EVENT_CONTROL_FILE_WRITE); + if (write(fd, buffer, PG_CONTROL_SIZE) != PG_CONTROL_SIZE) { + /* if write didn't set errno, assume problem is no disk space */ + if (errno == 0) { + errno = ENOSPC; + } + ereport(PANIC, (errcode_for_file_access(), errmsg("could not write to control file: %m"))); } - ereport(PANIC, (errcode_for_file_access(), errmsg("could not write to control file: %m"))); - } - pgstat_report_waitevent(WAIT_EVENT_END); + pgstat_report_waitevent(WAIT_EVENT_END); - pgstat_report_waitevent(WAIT_EVENT_CONTROL_FILE_SYNC); - if (pg_fsync(fd) != 0) { - ereport(PANIC, (errcode_for_file_access(), errmsg("could not fsync control file: %m"))); + pgstat_report_waitevent(WAIT_EVENT_CONTROL_FILE_SYNC); + if (pg_fsync(fd) != 0) { + ereport(PANIC, (errcode_for_file_access(), errmsg("could not fsync control file: %m"))); + } + pgstat_report_waitevent(WAIT_EVENT_END); } - pgstat_report_waitevent(WAIT_EVENT_END); if (close(fd)) { ereport(PANIC, (errcode_for_file_access(), errmsg("could not close control file: %m"))); @@ -6134,6 +6225,7 @@ STATIC void ReadControlFile(void) int fd = -1; char *fname = NULL; bool retry = false; + errno_t errorno = EOK; // Read data... #ifdef USE_ASSERT_CHECKING @@ -6149,11 +6241,31 @@ loop: if (fd < 0) { ereport(FATAL, (errcode_for_file_access(), errmsg("could not open control file \"%s\": %m", fname))); } - pgstat_report_waitevent(WAIT_EVENT_CONTROL_FILE_READ); - if (read(fd, t_thrd.shemem_ptr_cxt.ControlFile, sizeof(ControlFileData)) != sizeof(ControlFileData)) { - ereport(PANIC, (errcode_for_file_access(), errmsg("could not read from control file: %m"))); + + off_t seekpos = GetControlPageByInstanceId(); + (void)lseek(fd, seekpos, SEEK_SET); + + if (ENABLE_DSS) { + int read_size = BUFFERALIGN(sizeof(ControlFileData)); + char buffer[read_size] __attribute__((__aligned__(ALIGNOF_BUFFER))); + + pgstat_report_waitevent(WAIT_EVENT_CONTROL_FILE_READ); + if (read(fd, buffer, read_size) != read_size) { + ereport(PANIC, (errcode_for_file_access(), errmsg("could not read from control file: %m"))); + } + pgstat_report_waitevent(WAIT_EVENT_END); + + errorno = memcpy_s(t_thrd.shemem_ptr_cxt.ControlFile, + sizeof(ControlFileData), buffer, sizeof(ControlFileData)); + securec_check_c(errorno, "\0", "\0"); + } else { + pgstat_report_waitevent(WAIT_EVENT_CONTROL_FILE_READ); + if (read(fd, t_thrd.shemem_ptr_cxt.ControlFile, sizeof(ControlFileData)) != + sizeof(ControlFileData)) { + ereport(PANIC, (errcode_for_file_access(), errmsg("could not read from control file: %m"))); + } + pgstat_report_waitevent(WAIT_EVENT_END); } - pgstat_report_waitevent(WAIT_EVENT_END); if (close(fd)) { ereport(PANIC, (errcode_for_file_access(), errmsg("could not close control file: %m"))); @@ -6249,11 +6361,11 @@ loop: t_thrd.shemem_ptr_cxt.ControlFile->xlog_blcksz, XLOG_BLCKSZ), errhint("It looks like you need to recompile or gs_initdb."))); } - if (t_thrd.shemem_ptr_cxt.ControlFile->xlog_seg_size != XLOG_SEG_SIZE) { + if (t_thrd.shemem_ptr_cxt.ControlFile->xlog_seg_size != XLogSegSize) { ereport(FATAL, (errmsg("database files are incompatible with server"), errdetail("The database cluster was initialized with XLOG_SEG_SIZE %u," - " but the server was compiled with XLOG_SEG_SIZE %d.", - t_thrd.shemem_ptr_cxt.ControlFile->xlog_seg_size, XLOG_SEG_SIZE), + " but the server was compiled with XLOG_SEG_SIZE %lu.", + t_thrd.shemem_ptr_cxt.ControlFile->xlog_seg_size, XLogSegSize), errhint("It looks like you need to recompile or gs_initdb."))); } if (t_thrd.shemem_ptr_cxt.ControlFile->nameDataLen != NAMEDATALEN) { @@ -6344,15 +6456,18 @@ void UpdateControlFile(void) int len; errno_t err = EOK; char *fname[2]; - ControlFileData copy_of_ControlFile; - len = sizeof(ControlFileData); - err = memcpy_s(©_of_ControlFile, len, t_thrd.shemem_ptr_cxt.ControlFile, len); + int write_size; + + write_size = (int)BUFFERALIGN(len); + char buffer[write_size] __attribute__((__aligned__(ALIGNOF_BUFFER))) = {0}; + + err = memcpy_s(&buffer, write_size, t_thrd.shemem_ptr_cxt.ControlFile, len); securec_check(err, "\0", "\0"); - INIT_CRC32C(copy_of_ControlFile.crc); - COMP_CRC32C(copy_of_ControlFile.crc, (char *)©_of_ControlFile, offsetof(ControlFileData, crc)); - FIN_CRC32C(copy_of_ControlFile.crc); + INIT_CRC32C(((ControlFileData *)buffer)->crc); + COMP_CRC32C(((ControlFileData *)buffer)->crc, (char *)buffer, offsetof(ControlFileData, crc)); + FIN_CRC32C(((ControlFileData *)buffer)->crc); #ifdef USE_ASSERT_CHECKING #define MAX_SIZE 1024 @@ -6374,9 +6489,12 @@ void UpdateControlFile(void) ereport(FATAL, (errcode_for_file_access(), errmsg("could not open control file \"%s\": %m", fname[i]))); } + off_t seekpos = GetControlPageByInstanceId(); + (void)lseek(fd, seekpos, SEEK_SET); + errno = 0; pgstat_report_waitevent(WAIT_EVENT_CONTROL_FILE_WRITE_UPDATE); - if (write(fd, ©_of_ControlFile, len) != len) { + if (write(fd, buffer, write_size) != write_size) { /* if write didn't set errno, assume problem is no disk space */ if (errno == 0) { errno = ENOSPC; @@ -6430,6 +6548,10 @@ static void RecoverControlFile(void) ereport(FATAL, (errcode_for_file_access(), errmsg("recover failed could not open control file \"%s\": %m", XLOG_CONTROL_FILE))); } + + off_t seekpos = GetControlPageByInstanceId(); + (void)lseek(fd, seekpos, SEEK_SET); + errno = 0; /* write the whole block */ if (write(fd, &buffer, PG_CONTROL_SIZE) != PG_CONTROL_SIZE) { @@ -6492,8 +6614,8 @@ void SetThisTimeID(TimeLineID timelineID) static int XLOGChooseNumBuffers(void) { int xbuffers = g_instance.attr.attr_storage.NBuffers / 32; - if (xbuffers > XLOG_SEG_SIZE / XLOG_BLCKSZ) { - xbuffers = XLOG_SEG_SIZE / XLOG_BLCKSZ; + if (xbuffers > (int)XLogSegSize / XLOG_BLCKSZ) { + xbuffers = (int)XLogSegSize / XLOG_BLCKSZ; } if (xbuffers < 8) { xbuffers = 8; @@ -6616,6 +6738,18 @@ void XLOGShmemInit(void) t_thrd.shemem_ptr_cxt.GlobalWALInsertLocks = t_thrd.shemem_ptr_cxt.XLogCtl->Insert.WALInsertLocks; t_thrd.shemem_ptr_cxt.LocalGroupWALInsertLocks = t_thrd.shemem_ptr_cxt.GlobalWALInsertLocks[t_thrd.proc->nodeno]; + + if ((((SS_STANDBY_PROMOTING && t_thrd.role == STARTUP) || SS_PRIMARY_DEMOTED) && + g_instance.dms_cxt.SSRecoveryInfo.new_primary_reset_walbuf_flag == true) || + SSFAILOVER_TRIGGER) { + g_instance.dms_cxt.SSRecoveryInfo.new_primary_reset_walbuf_flag = false; + errorno = memset_s(t_thrd.shemem_ptr_cxt.XLogCtl->xlblocks, + sizeof(XLogRecPtr) * g_instance.attr.attr_storage.XLOGbuffers, 0, + sizeof(XLogRecPtr) * g_instance.attr.attr_storage.XLOGbuffers); + securec_check(errorno, "", ""); + ereport(LOG, (errmsg("[SS switchover] Successfully reset xlblocks when thrd:%lu with role:%d started", + t_thrd.proc->pid, (int)t_thrd.role))); + } return; } errorno = memset_s(t_thrd.shemem_ptr_cxt.XLogCtl, sizeof(XLogCtlData), 0, sizeof(XLogCtlData)); @@ -6732,7 +6866,11 @@ static XLogSegNo GetOldestXLOGSegNo(const char *workingPath) XLogSegNo segno; errno_t rc = EOK; - rc = snprintf_s(xlogDirStr, MAXPGPATH, MAXPGPATH - 1, "%s/%s", workingPath, XLOGDIR); + if (ENABLE_DSS) { + rc = snprintf_s(xlogDirStr, MAXPGPATH, MAXPGPATH - 1, "%s", SS_XLOGDIR); + } else { + rc = snprintf_s(xlogDirStr, MAXPGPATH, MAXPGPATH - 1, "%s/%s", workingPath, XLOGDIR); + } securec_check_ss(rc, "", ""); xlogDir = opendir(xlogDirStr); if (!xlogDir) { @@ -6771,7 +6909,11 @@ XLogSegNo GetNewestXLOGSegNo(const char *workingPath) XLogSegNo segno; errno_t rc = EOK; - rc = snprintf_s(xlogDirStr, MAXPGPATH, MAXPGPATH - 1, "%s/%s", workingPath, XLOGDIR); + if (ENABLE_DSS) { + rc = snprintf_s(xlogDirStr, MAXPGPATH, MAXPGPATH - 1, "%s", SS_XLOGDIR); + } else { + rc = snprintf_s(xlogDirStr, MAXPGPATH, MAXPGPATH - 1, "%s/%s", workingPath, XLOGDIR); + } securec_check_ss(rc, "", ""); xlogDir = opendir(xlogDirStr); if (!xlogDir) { @@ -6877,7 +7019,7 @@ void BootStrapXLOG(void) */ macAddr = GetMACAddr(); sysidentifier = macAddr << 16; - fd = open("/dev/urandom", O_RDONLY); + fd = open("/dev/urandom", O_RDONLY, 0); if (fd == -1) { ereport(ERROR, (errcode_for_file_access(), errmsg("cann't read a random number from file \"/dev/urandom\"."))); } @@ -7006,6 +7148,11 @@ void BootStrapXLOG(void) t_thrd.xlog_cxt.openLogFile = -1; + /* In SS, the first node to create control file is will be primary */ + if (ENABLE_DSS) { + SSWriteReformerControlPages(); + } + /* Now create pg_control */ ret = memset_s(t_thrd.shemem_ptr_cxt.ControlFile, sizeof(ControlFileData), 0, sizeof(ControlFileData)); securec_check(ret, "", ""); @@ -7022,9 +7169,11 @@ void BootStrapXLOG(void) t_thrd.shemem_ptr_cxt.ControlFile->max_prepared_xacts = g_instance.attr.attr_storage.max_prepared_xacts; t_thrd.shemem_ptr_cxt.ControlFile->max_locks_per_xact = g_instance.attr.attr_storage.max_locks_per_xact; t_thrd.shemem_ptr_cxt.ControlFile->wal_level = g_instance.attr.attr_storage.wal_level; + t_thrd.shemem_ptr_cxt.ControlFile->bootstrap_segment = EnableInitDBSegment; /* some additional ControlFile fields are set in WriteControlFile() */ WriteControlFile(); + if (IS_SHARED_STORAGE_MODE) { ShareStorageXLogCtl *ctlInfo = g_instance.xlog_cxt.shareStorageXLogCtl; InitShareStorageCtlInfo(ctlInfo, sysidentifier); @@ -9089,7 +9238,17 @@ void StartupXLOG(void) * Note: in most control paths, *ControlFile is already valid and we need * not do ReadControlFile() here, but might as well do it to be sure. */ - ReadControlFile(); + if (ENABLE_DMS) { + int src_id = g_instance.attr.attr_storage.dms_attr.instance_id; + if (SSFAILOVER_TRIGGER || SS_STANDBY_PROMOTING) { + src_id = SSGetPrimaryInstId(); + ereport(LOG, (errmsg("[SS Reform]: Standby:%d promoting, reading control file of original primary:%d", + g_instance.attr.attr_storage.dms_attr.instance_id, src_id))); + } + SSReadControlFile(src_id); + } else { + ReadControlFile(); + } if (FORCE_FINISH_ENABLED) { max_page_flush_lsn = mpfl_read_max_flush_lsn(); /* we can't exit proc here, because init gaussdb will run through here and there must be no LsnInfoFile. */ @@ -9282,7 +9441,23 @@ void StartupXLOG(void) errorno = memset_s(&readprivate, sizeof(XLogPageReadPrivate), 0, sizeof(XLogPageReadPrivate)); securec_check(errorno, "", ""); - xlogreader = XLogReaderAllocate(&XLogPageRead, &readprivate); + if (ENABLE_DMS && ENABLE_DSS) { + if (SSFAILOVER_TRIGGER || SS_STANDBY_PROMOTING) { + SSGetXlogPath(); + xlogreader = XLogReaderAllocate(&SSXLogPageRead, &readprivate, ALIGNOF_BUFFER); + close_readFile_if_open(); + // init shared memory set page empty + SSCSNLOGShmemClear(); + SSCLOGShmemClear(); + SSMultiXactShmemClear(); + SSClearSegCache(); + } else { + xlogreader = XLogReaderAllocate(&XLogPageRead, &readprivate, ALIGNOF_BUFFER); + } + } else { + xlogreader = XLogReaderAllocate(&XLogPageRead, &readprivate); + } + if (xlogreader == NULL) { ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"), errdetail("Failed while allocating an XLog reading processor"))); @@ -9491,11 +9666,20 @@ void StartupXLOG(void) wasCheckpoint = wasShutdown || (record->xl_info == XLOG_CHECKPOINT_ONLINE); } - /* process assist file of chunk recycling */ - dw_ext_init(); + /* + * initialize double write, recover partial write + * in SS Switchover, skip dw init since we didn't do ShutdownXLOG + */ - /* initialize double write, recover partial write */ - dw_init(); + if (!SS_PERFORMING_SWITCHOVER && !SSFAILOVER_TRIGGER) { + /* process assist file of chunk recycling */ + dw_ext_init(); + dw_init(); + } + + if (SSFAILOVER_TRIGGER) { + ss_failover_dw_init(); + } /* Recover meta of undo subsystem. */ undo::RecoveryUndoSystemMeta(); @@ -9543,6 +9727,9 @@ void StartupXLOG(void) /* init dirty page queue rec lsn to checkpoint.redo */ update_dirty_page_queue_rec_lsn(checkPoint.redo, true); + if (ENABLE_DMS) { + g_instance.dms_cxt.SSRecoveryInfo.reclsn_updated = true; + } /* * for gtm environment, we need to set the local csn to next xid to increase. @@ -9676,6 +9863,10 @@ void StartupXLOG(void) * have been a clean shutdown and we did not have a recovery.conf file, * then assume no recovery needed. */ + if (SS_STANDBY_PROMOTING) { + ereport(LOG, (errmsg("[SS switchover] Standby promote: redo shutdown checkpoint now"))); + t_thrd.xlog_cxt.InRecovery = true; + } if (XLByteLT(checkPoint.redo, RecPtr)) { #ifdef ENABLE_MULTIPLE_NODES if (wasShutdown) { @@ -9788,7 +9979,9 @@ void StartupXLOG(void) } t_thrd.shemem_ptr_cxt.ControlFile->time = (pg_time_t)time(NULL); /* No need to hold ControlFileLock yet, we aren't up far enough */ - UpdateControlFile(); + if (!SSFAILOVER_TRIGGER) { + UpdateControlFile(); + } /* initialize our local copy of minRecoveryPoint */ t_thrd.xlog_cxt.minRecoveryPoint = t_thrd.shemem_ptr_cxt.ControlFile->minRecoveryPoint; @@ -9834,7 +10027,7 @@ void StartupXLOG(void) * connections, so that read-only backends don't try to read whatever * garbage is left over from before. */ - if (!RecoveryByPending) { + if (!RecoveryByPending && (!SSFAILOVER_TRIGGER && SSModifySharedLunAllowed())) { ResetUnloggedRelations(UNLOGGED_RELATION_CLEANUP); } @@ -9940,6 +10133,7 @@ void StartupXLOG(void) PublishStartupProcessInformation(); EnableSyncRequestForwarding(); SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED); + if (ENABLE_INCRE_CKPT) { t_thrd.xlog_cxt.pagewriter_launched = true; } else { @@ -9966,7 +10160,7 @@ void StartupXLOG(void) ResetXLogStatics(); // Find the first record that logically follows the checkpoint --- it // might physically precede it, though. - if (XLByteLT(checkPoint.redo, RecPtr)) { + if (XLByteLT(checkPoint.redo, RecPtr) || SS_STANDBY_PROMOTING) { /* back up to find the record */ record = ReadRecord(xlogreader, checkPoint.redo, PANIC, false); } else { @@ -10095,6 +10289,16 @@ void StartupXLOG(void) #else CountRedoTime(t_thrd.xlog_cxt.timeCost[TIME_COST_STEP_2]); #endif + + if (ENABLE_DMS && !SS_PERFORMING_SWITCHOVER && SSRecoveryApplyDelay(xlogreader)) { + if (xlogctl->recoveryPause) { + recoveryPausesHere(); + } + } + + if (ENABLE_DMS && SSSKIP_REDO_REPLAY) { + break; + } /* * ShmemVariableCache->nextXid must be beyond record's xid. @@ -10306,6 +10510,10 @@ void StartupXLOG(void) EndOfLog = t_thrd.xlog_cxt.EndRecPtr; XLByteToPrevSeg(EndOfLog, endLogSegNo); + if ((ENABLE_DMS && SSFAILOVER_TRIGGER) || SS_STANDBY_PROMOTING) { + bool use_existent = true; + (void)XLogFileInit(endLogSegNo, &use_existent, true); + } uint32 redoReadOff = t_thrd.xlog_cxt.readOff; GetWritePermissionSharedStorage(); @@ -10432,6 +10640,9 @@ void StartupXLOG(void) g_instance.wal_cxt.flushResult = EndOfLog; AdvanceXLInsertBuffer(EndOfLog, false, NULL); g_instance.wal_cxt.sentResult = EndOfLog; + if (ENABLE_DMS) { + ereport(LOG, (errmsg("[SS reform] EndOfLog:%lu.", EndOfLog))); + } /* Pre-scan prepared transactions to find out the range of XIDs present */ oldestActiveXID = PrescanPreparedTransactions(NULL, NULL); @@ -10487,7 +10698,7 @@ void StartupXLOG(void) * AFTER recovery is complete so that any unlogged relations created * during recovery also get picked up. */ - if (t_thrd.xlog_cxt.InRecovery && !RecoveryByPending) { + if (t_thrd.xlog_cxt.InRecovery && !RecoveryByPending && SSModifySharedLunAllowed()) { ResetUnloggedRelations(UNLOGGED_RELATION_INIT); } @@ -10497,11 +10708,17 @@ void StartupXLOG(void) t_thrd.xlog_cxt.InRecovery = false; g_instance.roach_cxt.isRoachRestore = false; - LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); - t_thrd.shemem_ptr_cxt.ControlFile->state = DB_IN_PRODUCTION; - t_thrd.shemem_ptr_cxt.ControlFile->time = (pg_time_t)time(NULL); - UpdateControlFile(); - LWLockRelease(ControlFileLock); + if (ENABLE_DMS && ENABLE_REFORM) { + g_instance.dms_cxt.SSRecoveryInfo.recovery_pause_flag = true; + } + + if (!SSFAILOVER_TRIGGER && !SS_STANDBY_PROMOTING) { + LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); + t_thrd.shemem_ptr_cxt.ControlFile->state = DB_IN_PRODUCTION; + t_thrd.shemem_ptr_cxt.ControlFile->time = (pg_time_t)time(NULL); + UpdateControlFile(); + LWLockRelease(ControlFileLock); + } /* start the archive_timeout timer running */ t_thrd.shemem_ptr_cxt.XLogCtl->lastSegSwitchTime = (pg_time_t)time(NULL); @@ -10598,6 +10815,25 @@ void StartupXLOG(void) } } + if (SSFAILOVER_TRIGGER || SS_STANDBY_PROMOTING) { + if (SSFAILOVER_TRIGGER) { + g_instance.dms_cxt.SSRecoveryInfo.failover_triggered = false; + g_instance.dms_cxt.SSRecoveryInfo.in_failover = false; + pg_memory_barrier(); + } + ereport(LOG, (errmodule(MOD_DMS), + errmsg("[SS switchover/failover] standby promoting: start full checkpoint."))); + + RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_IMMEDIATE | CHECKPOINT_WAIT); + LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); + t_thrd.shemem_ptr_cxt.ControlFile->state = DB_IN_PRODUCTION; + t_thrd.shemem_ptr_cxt.ControlFile->time = (pg_time_t)time(NULL); + UpdateControlFile(); + LWLockRelease(ControlFileLock); + ereport(LOG, (errmodule(MOD_DMS), + errmsg("[SS switchover/failover] standby promoting: finished start checkpoint."))); + } + NextXidAfterReovery = t_thrd.xact_cxt.ShmemVariableCache->nextXid; OldestXidAfterRecovery = t_thrd.xact_cxt.ShmemVariableCache->oldestXid; PendingPreparedXactsCount = GetPendingXactCount(); @@ -10624,6 +10860,12 @@ void StartupXLOG(void) CalculateLocalLatestSnapshot(true); LWLockRelease(ProcArrayLock); + if (SS_PERFORMING_SWITCHOVER && g_instance.dms_cxt.SSClusterState == NODESTATE_STANDBY_PROMOTING) { + ereport(LOG, (errmsg("[SS switchover] Standby promote: StartupXLOG finished, promote success"))); + Assert(g_instance.dms_cxt.SSClusterState == NODESTATE_STANDBY_PROMOTING); + g_instance.dms_cxt.SSClusterState = NODESTATE_STANDBY_PROMOTED; + } + ereport(LOG, (errmsg("redo done, nextXid: " XID_FMT ", startupMaxXid: " XID_FMT ", recentLocalXmin: " XID_FMT ", recentGlobalXmin: %lu, PendingPreparedXacts: %d" ", NextCommitSeqNo: %lu, cutoff_csn_min: %lu.", @@ -10648,11 +10890,11 @@ void CopyXlogForForceFinishRedo(XLogSegNo logSegNo, uint32 termId, XLogReaderSta char dstPath[MAXPGPATH]; errno_t errorno = EOK; - errorno = snprintf_s(srcPath, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/%08X%08X%08X", xlogreader->readPageTLI, + errorno = snprintf_s(srcPath, MAXPGPATH, MAXPGPATH - 1, "%s/%08X%08X%08X", SS_XLOGDIR, xlogreader->readPageTLI, (uint32)((logSegNo) / XLogSegmentsPerXLogId), (uint32)((logSegNo) % XLogSegmentsPerXLogId)); securec_check_ss(errorno, "", ""); - errorno = snprintf_s(dstPath, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/%08X%08X%08X%08X", termId, + errorno = snprintf_s(dstPath, MAXPGPATH, MAXPGPATH - 1, "%s/%08X%08X%08X%08X", SS_XLOGDIR, termId, t_thrd.xlog_cxt.ThisTimeLineID, (uint32)((logSegNo) / XLogSegmentsPerXLogId), (uint32)((logSegNo) % XLogSegmentsPerXLogId)); securec_check_ss(errorno, "", ""); @@ -10672,10 +10914,10 @@ void RenameXlogForForceFinishRedo(XLogSegNo beginSegNo, TimeLineID tli, uint32 t char dstPath[MAXPGPATH]; errno_t errorno = EOK; - errorno = snprintf_s(srcPath, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/%08X%08X%08X", tli, + errorno = snprintf_s(srcPath, MAXPGPATH, MAXPGPATH - 1, "%s/%08X%08X%08X", SS_XLOGDIR, tli, (uint32)((sn) / XLogSegmentsPerXLogId), (uint32)((sn) % XLogSegmentsPerXLogId)); securec_check_ss(errorno, "", ""); - errorno = snprintf_s(dstPath, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/%08X%08X%08X%08X", termId, tli, + errorno = snprintf_s(dstPath, MAXPGPATH, MAXPGPATH - 1, "%s/%08X%08X%08X%08X", SS_XLOGDIR, termId, tli, (uint32)((sn) / XLogSegmentsPerXLogId), (uint32)((sn) % XLogSegmentsPerXLogId)); securec_check_ss(errorno, "", ""); if (durable_rename(srcPath, dstPath, WARNING) != 0) { @@ -10979,7 +11221,9 @@ bool RecoveryInProgress(void) * shared variable has once been seen false. */ if (!t_thrd.xlog_cxt.LocalRecoveryInProgress) { - return false; + if (!ENABLE_DMS || (ENABLE_DMS && !SSFAILOVER_TRIGGER && !SS_STANDBY_PROMOTING)) { + return false; + } } /* use volatile pointer to prevent code rearrangement */ @@ -11087,7 +11331,7 @@ static void LocalSetXLogInsertAllowed(void) * whichChkpt identifies the checkpoint (merely for reporting purposes). * 1 for "primary", 2 for "secondary", 0 for "other" (backup_label) */ -static XLogRecord *ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr, int whichChkpt) +XLogRecord *ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr, int whichChkpt) { XLogRecord *record = NULL; @@ -11186,7 +11430,9 @@ void InitXLOGAccess(void) /* ThisTimeLineID doesn't change so we need no lock to copy it */ t_thrd.xlog_cxt.ThisTimeLineID = t_thrd.shemem_ptr_cxt.XLogCtl->ThisTimeLineID; - Assert(t_thrd.xlog_cxt.ThisTimeLineID != 0 || IsBootstrapProcessingMode()); + if (!ENABLE_DMS) { + Assert(t_thrd.xlog_cxt.ThisTimeLineID != 0 || IsBootstrapProcessingMode()); + } /* Use GetRedoRecPtr to copy the RedoRecPtr safely */ (void)GetRedoRecPtr(); @@ -11349,6 +11595,23 @@ void DummyStandbySetRecoveryTargetTLI(TimeLineID timeLineID) */ void ShutdownXLOG(int code, Datum arg) { + if (SS_STANDBY_PROMOTING) { + ereport(LOG, (errmsg("[SS switchover] Standby promote: skipping shutdown checkpoint"))); + return; + } + + if (SS_PRIMARY_DEMOTING) { + ereport(LOG, (errmsg("[SS switchover] primary demote: doing shutdown checkpoint"))); + CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE); + ckpt_shutdown_pagewriter(); + + if (g_instance.ckpt_cxt_ctl->dirty_page_queue != NULL) { + pfree(g_instance.ckpt_cxt_ctl->dirty_page_queue); + g_instance.ckpt_cxt_ctl->dirty_page_queue = NULL; + } + return; + } + ereport(LOG, (errmsg("shutting down"))); if (RecoveryInProgress()) { @@ -11390,21 +11653,28 @@ void ShutdownXLOG(int code, Datum arg) g_instance.bgwriter_cxt.rel_hashtbl_lock = NULL; g_instance.bgwriter_cxt.rel_one_fork_hashtbl_lock = NULL; - ShutdownCLOG(); - ShutdownCSNLOG(); - ShutdownMultiXact(); + if (!ENABLE_DMS || !SS_STANDBY_MODE) { + ShutdownCLOG(); + ShutdownCSNLOG(); + ShutdownMultiXact(); + } /* Shutdown double write. */ dw_exit(true); dw_exit(false); /* try clear page repair thread mem again */ - ClearPageRepairTheadMem(); - g_instance.repair_cxt.page_repair_hashtbl_lock = NULL; - g_instance.repair_cxt.file_repair_hashtbl_lock = NULL; + if (!ENABLE_DMS || g_instance.dms_cxt.SSClusterState == NODESTATE_NORMAL) { + ClearPageRepairTheadMem(); + g_instance.repair_cxt.page_repair_hashtbl_lock = NULL; + g_instance.repair_cxt.file_repair_hashtbl_lock = NULL; + } if (IsInitdb) { ShutdownShareStorageXLogCopy(); + if (t_thrd.xlog_cxt.openLogFile >= 0) { + XLogFileClose(); + } } ereport(LOG, (errmsg("database system is shut down"))); } @@ -11567,6 +11837,18 @@ void CreateCheckPoint(int flags) (errcode(ERRCODE_INVALID_TRANSACTION_STATE), errmsg("can't create a checkpoint during recovery"))); } + if (!SSXLogInsertAllowed()) { + ereport(ERROR, + (errcode(ERRCODE_INVALID_TRANSACTION_STATE), errmsg("can't create a checkpoint on SS standby node"))); + } + + if (ENABLE_DMS && SS_STANDBY_MODE && !SS_STANDBY_PROMOTING) { + return; + } else if (SSFAILOVER_TRIGGER) { + ereport(LOG, (errmodule(MOD_DMS), errmsg("[SS failover] do not do CreateCheckpoint during failover"))); + return; + } + /* CHECKPOINT_IS_SHUTDOWN CHECKPOINT_END_OF_RECOVERY CHECKPOINT_FORCE shuld do full checkpoint */ if (shutdown || ((unsigned int)flags & (CHECKPOINT_FORCE))) { doFullCheckpoint = true; @@ -13762,6 +14044,10 @@ void assign_xlog_sync_method(int new_sync_method, void *extra) */ void issue_xlog_fsync(int fd, XLogSegNo segno) { + if (ENABLE_DSS) { + return; + } + switch (u_sess->attr.attr_storage.sync_method) { case SYNC_METHOD_FSYNC: if (pg_fsync_no_writethrough(fd) != 0) { @@ -13840,7 +14126,11 @@ static void CollectTableSpace(DIR *tblspcdir, List **tablespaces, StringInfo tbl { struct dirent *de = NULL; tablespaceinfo *ti = NULL; - int datadirpathlen = strlen(t_thrd.proc_cxt.DataDir); + const char *dataDir = NULL; + int datadirpathlen = -1; + + dataDir = is_dss_file(TBLSPCDIR) ? g_instance.attr.attr_storage.dss_attr.ss_dss_vg_name : t_thrd.proc_cxt.DataDir; + datadirpathlen = strlen(dataDir); /* Collect information about all tablespaces */ while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL) { char fullpath[MAXPGPATH + PG_TBLSPCS]; @@ -13859,7 +14149,8 @@ static void CollectTableSpace(DIR *tblspcdir, List **tablespaces, StringInfo tbl if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) continue; - errorno = snprintf_s(fullpath, MAXPGPATH + PG_TBLSPCS, MAXPGPATH + PG_TBLSPCS - 1, "pg_tblspc/%s", de->d_name); + errorno = snprintf_s(fullpath, MAXPGPATH + PG_TBLSPCS, MAXPGPATH + PG_TBLSPCS - 1, + "%s/%s", TBLSPCDIR, de->d_name); securec_check_ss(errorno, "\0", "\0"); #if defined(HAVE_READLINK) || defined(WIN32) @@ -13878,7 +14169,7 @@ static void CollectTableSpace(DIR *tblspcdir, List **tablespaces, StringInfo tbl * when it's located within PGDATA, or NULL if it's located * elsewhere. */ - if (rllen > datadirpathlen && strncmp(linkpath, t_thrd.proc_cxt.DataDir, datadirpathlen) == 0 && + if (rllen > datadirpathlen && strncmp(linkpath, dataDir, datadirpathlen) == 0 && IS_DIR_SEP(linkpath[datadirpathlen])) relpath = linkpath + datadirpathlen + 1; @@ -16282,7 +16573,7 @@ bool XLogReadFromWriteBuffer(XLogRecPtr targetStartPtr, int reqLen, char *readBu * asked for a too old WAL segment that has already been * removed or recycled. */ - if (errno == ENOENT) { + if (FILE_POSSIBLY_DELETED(errno)) { ereport(ERROR, (errcode_for_file_access(), errmsg("requested WAL segment %s has already been removed", XLogFileNameP(t_thrd.xlog_cxt.ThisTimeLineID, t_thrd.xlog_cxt.readSegNo)))); @@ -18535,7 +18826,7 @@ loop: cur_rec_lsn = compare.u64[0]; /* if we already left behind dirty array queue reclsn, do nothing */ if (!XLByteLE(current_insert_lsn, cur_rec_lsn) && - (need_immediately_update || current_insert_lsn - cur_rec_lsn > XLOG_SEG_SIZE * UPDATE_REC_XLOG_NUM)) { + (need_immediately_update || current_insert_lsn - cur_rec_lsn > XLogSegSize * UPDATE_REC_XLOG_NUM)) { exchange.u64[0] = current_insert_lsn; exchange.u64[1] = compare.u64[1]; @@ -18583,7 +18874,7 @@ void update_dirty_page_queue_rec_lsn(XLogRecPtr current_insert_lsn, bool need_im if (!XLByteLE(current_insert_lsn, g_instance.ckpt_cxt_ctl->dirty_page_queue_reclsn) && (need_immediately_update || - current_insert_lsn - g_instance.ckpt_cxt_ctl->dirty_page_queue_reclsn > XLOG_SEG_SIZE * UPDATE_REC_XLOG_NUM)) { + current_insert_lsn - g_instance.ckpt_cxt_ctl->dirty_page_queue_reclsn > XLogSegSize * UPDATE_REC_XLOG_NUM)) { g_instance.ckpt_cxt_ctl->dirty_page_queue_reclsn = current_insert_lsn; is_update = true; } @@ -19078,12 +19369,6 @@ int WriteXlogToShareStorage(XLogRecPtr startLsn, char *buf, int writeLen) return g_instance.xlog_cxt.shareStorageopCtl.opereateIf->WriteXlog(startLsn, buf, writeLen); } -void FsyncXlogToShareStorage() -{ - Assert(g_instance.xlog_cxt.shareStorageopCtl.isInit && (g_instance.xlog_cxt.shareStorageopCtl.opereateIf != NULL)); - g_instance.xlog_cxt.shareStorageopCtl.opereateIf->fsync(); -} - pg_crc32c CalShareStorageCtlInfoCrc(const ShareStorageXLogCtl *ctlInfo) { Assert(g_instance.xlog_cxt.shareStorageopCtl.isInit); @@ -19214,7 +19499,7 @@ void FindLastRecordCheckInfoOnShareStorage(XLogRecPtr *lastRecordPtr, pg_crc32 * ShareStorageXLogCtl *ctlInfo = AlignAllocShareStorageCtl(); ReadShareStorageCtlInfo(ctlInfo); XLogRecPtr startLsn = ctlInfo->insertHead - (ctlInfo->insertHead % XLOG_BLCKSZ); - XLogRecPtr endPtr = XLOG_SEG_SIZE; + XLogRecPtr endPtr = XLogSegSize; if (ctlInfo->insertHead > g_instance.xlog_cxt.shareStorageopCtl.xlogFileSize) { endPtr = ctlInfo->insertHead - g_instance.xlog_cxt.shareStorageopCtl.xlogFileSize; } @@ -19368,3 +19653,30 @@ XLogRecPtr GetFlushMainStandby() return flushptr; } + +/* SS bans data write unless current node is: + * 1. normal: running as primary; + * 2. switchover: primary demoting, doing shutdown checkpoint; + * 3. switchover: standby promoting, doing StartupXLOG; + * 4. switchover: standby promoted, running as primary de facto, + * waiting for DMS reformer thread to update its role. + * 5. failover: doing StartupXLOG + */ +bool SSXLogInsertAllowed() +{ + /* allow xlog write as long as to different VGs */ + return true; +} + +bool SSModifySharedLunAllowed() +{ + if (!ENABLE_DMS || + SS_PRIMARY_MODE || + g_instance.dms_cxt.SSClusterState == NODESTATE_PRIMARY_DEMOTING || + g_instance.dms_cxt.SSClusterState == NODESTATE_STANDBY_PROMOTING || + g_instance.dms_cxt.SSClusterState == NODESTATE_STANDBY_PROMOTED || + SSFAILOVER_TRIGGER) { + return true; + } + return false; +} diff --git a/src/gausskernel/storage/access/transam/xlogfuncs.cpp b/src/gausskernel/storage/access/transam/xlogfuncs.cpp index 4f1b62f18..aaebeba8f 100755 --- a/src/gausskernel/storage/access/transam/xlogfuncs.cpp +++ b/src/gausskernel/storage/access/transam/xlogfuncs.cpp @@ -100,7 +100,7 @@ Datum pg_start_backup(PG_FUNCTION_ARGS) errmsg("a non-exclusive backup is already in progress in this session"))); backupidstr = text_to_cstring(backupid); - dir = AllocateDir("pg_tblspc"); + dir = AllocateDir(TBLSPCDIR); if (!dir) { ereport(ERROR, (errmsg("could not open directory \"%s\": %m", "pg_tblspc"))); } @@ -181,7 +181,7 @@ Datum pg_start_backup_v2(PG_FUNCTION_ARGS) errmsg("a non-exclusive backup is already in progress in this session"))); backupidstr = text_to_cstring(backupid); - dir = AllocateDir("pg_tblspc"); + dir = AllocateDir(TBLSPCDIR); if (!dir) { ereport(ERROR, (errmsg("could not open directory \"%s\": %m", "pg_tblspc"))); } diff --git a/src/gausskernel/storage/access/transam/xloginsert.cpp b/src/gausskernel/storage/access/transam/xloginsert.cpp index 329cf3c94..cd6f53e72 100755 --- a/src/gausskernel/storage/access/transam/xloginsert.cpp +++ b/src/gausskernel/storage/access/transam/xloginsert.cpp @@ -78,6 +78,9 @@ static void XLogResetLogicalPage(void); */ void XLogBeginInsert(void) { + if (SS_PERFORMING_SWITCHOVER) { + XLogResetInsertion(); + } Assert(t_thrd.xlog_cxt.max_registered_block_id == 0); Assert(t_thrd.xlog_cxt.mainrdata_last == (XLogRecData *)&t_thrd.xlog_cxt.mainrdata_head); Assert(t_thrd.xlog_cxt.mainrdata_len == 0); @@ -91,6 +94,11 @@ void XLogBeginInsert(void) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("XLogBeginInsert was already called"))); + if (!SSXLogInsertAllowed()) { + ereport(LOG, (errmsg("SS standby cannot insert XLOG entries"))); + return; + } + t_thrd.xlog_cxt.begininsert_called = true; } diff --git a/src/gausskernel/storage/access/transam/xlogreader.cpp b/src/gausskernel/storage/access/transam/xlogreader.cpp index 823b2f799..155ce0878 100644 --- a/src/gausskernel/storage/access/transam/xlogreader.cpp +++ b/src/gausskernel/storage/access/transam/xlogreader.cpp @@ -1400,7 +1400,7 @@ XLogRecPtr FindMaxLSN(char *workingPath, char *returnMsg, int msgLen, pg_crc32 * } /* Start to find the max lsn from a valid xlogfile */ - startLsn = (xlogReadLogSeg * XLOG_SEG_SIZE) + ((XLogRecPtr)xlogReadLogid * XLogSegmentsPerXLogId * XLogSegSize); + startLsn = (xlogReadLogSeg * XLogSegSize) + ((XLogRecPtr)xlogReadLogid * XLogSegmentsPerXLogId * XLogSegSize); while (!XLogRecPtrIsInvalid(startLsn)) { /* find the first valid record from the bigger xlogrecord. then break */ curLsn = XLogFindNextRecord(xlogReader, startLsn); @@ -1409,7 +1409,7 @@ XLogRecPtr FindMaxLSN(char *workingPath, char *returnMsg, int msgLen, pg_crc32 * close(xlogreadfd); xlogreadfd = -1; } - startLsn = startLsn - XLOG_SEG_SIZE; + startLsn = startLsn - XLogSegSize; continue; } else { findValidXLogFile = true; @@ -1566,7 +1566,7 @@ XLogRecPtr FindMinLSN(char *workingPath, char *returnMsg, int msgLen, pg_crc32 * } /* Start to find the min lsn from a valid xlogfile */ - startLsn = (xlogReadLogSeg * XLOG_SEG_SIZE) + ((XLogRecPtr)xlogReadLogid * XLogSegmentsPerXLogId * XLogSegSize); + startLsn = (xlogReadLogSeg * XLogSegSize) + ((XLogRecPtr)xlogReadLogid * XLogSegmentsPerXLogId * XLogSegSize); while (!XLogRecPtrIsInvalid(startLsn)) { curLsn = XLogFindNextRecord(xlogReader, startLsn); if (XLogRecPtrIsInvalid(curLsn)) { @@ -1574,7 +1574,7 @@ XLogRecPtr FindMinLSN(char *workingPath, char *returnMsg, int msgLen, pg_crc32 * close(xlogreadfd); xlogreadfd = -1; } - startLsn = startLsn + XLOG_SEG_SIZE; + startLsn = startLsn + XLogSegSize; continue; } else { findValidXLogFile = true; diff --git a/src/gausskernel/storage/access/transam/xlogutils.cpp b/src/gausskernel/storage/access/transam/xlogutils.cpp index b926750ce..8b5f7904d 100644 --- a/src/gausskernel/storage/access/transam/xlogutils.cpp +++ b/src/gausskernel/storage/access/transam/xlogutils.cpp @@ -40,6 +40,7 @@ #include "storage/lmgr.h" #include "storage/smgr/smgr.h" #include "storage/smgr/segment.h" +#include "storage/file/fio_device.h" #include "utils/guc.h" #include "utils/hsearch.h" #include "utils/rel.h" @@ -1550,7 +1551,7 @@ static void XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count) XLByteToSeg(recptr, t_thrd.xlog_cxt.sendSegNo); - errorno = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/%08X%08X%08X", tli, + errorno = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "%s/%08X%08X%08X", SS_XLOGDIR, tli, (uint32)((t_thrd.xlog_cxt.sendSegNo) / XLogSegmentsPerXLogId), (uint32)((t_thrd.xlog_cxt.sendSegNo) % XLogSegmentsPerXLogId)); securec_check_ss(errorno, "", ""); @@ -1558,7 +1559,7 @@ static void XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count) t_thrd.xlog_cxt.sendFile = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0); if (t_thrd.xlog_cxt.sendFile < 0) { - if (errno == ENOENT) + if (FILE_POSSIBLY_DELETED(errno)) ereport(ERROR, (errcode_for_file_access(), errmsg("requested WAL segment %s has already been removed", path))); else @@ -1573,7 +1574,7 @@ static void XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count) if (lseek(t_thrd.xlog_cxt.sendFile, (off_t)startoff, SEEK_SET) < 0) { char path[MAXPGPATH]; - errorno = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/%08X%08X%08X", tli, + errorno = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "%s/%08X%08X%08X", SS_XLOGDIR, tli, (uint32)((t_thrd.xlog_cxt.sendSegNo) / XLogSegmentsPerXLogId), (uint32)((t_thrd.xlog_cxt.sendSegNo) % XLogSegmentsPerXLogId)); securec_check_ss(errorno, "", ""); @@ -1597,7 +1598,7 @@ static void XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count) if (readbytes <= 0) { char path[MAXPGPATH]; - errorno = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, XLOGDIR "/%08X%08X%08X", tli, + errorno = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "%s/%08X%08X%08X", SS_XLOGDIR, tli, (uint32)((t_thrd.xlog_cxt.sendSegNo) / XLogSegmentsPerXLogId), (uint32)((t_thrd.xlog_cxt.sendSegNo) % XLogSegmentsPerXLogId)); securec_check_ss(errorno, "", ""); diff --git a/src/gausskernel/storage/buffer/CMakeLists.txt b/src/gausskernel/storage/buffer/CMakeLists.txt index 8feb7facd..d3512a095 100755 --- a/src/gausskernel/storage/buffer/CMakeLists.txt +++ b/src/gausskernel/storage/buffer/CMakeLists.txt @@ -13,7 +13,7 @@ set(TGT_buffer_INC ${ZLIB_INCLUDE_PATH} ) -set(buffer_DEF_OPTIONS ${MACRO_OPTIONS}) +set(buffer_DEF_OPTIONS ${MACRO_OPTIONS} -DOPENGAUSS) set(buffer_COMPILE_OPTIONS ${OPTIMIZE_OPTIONS} ${OS_OPTIONS} ${PROTECT_OPTIONS} ${WARNING_OPTIONS} ${BIN_SECURE_OPTIONS} ${CHECK_OPTIONS}) set(buffer_LINK_OPTIONS ${BIN_LINK_OPTIONS}) add_static_objtarget(gausskernel_storage_buffer TGT_buffer_SRC TGT_buffer_INC "${buffer_DEF_OPTIONS}" "${buffer_COMPILE_OPTIONS}" "${buffer_LINK_OPTIONS}") diff --git a/src/gausskernel/storage/buffer/buf_init.cpp b/src/gausskernel/storage/buffer/buf_init.cpp index a19c947e1..406712944 100644 --- a/src/gausskernel/storage/buffer/buf_init.cpp +++ b/src/gausskernel/storage/buffer/buf_init.cpp @@ -25,6 +25,8 @@ #include "postmaster/pagewriter.h" #include "postmaster/bgwriter.h" #include "utils/palloc.h" +#include "ddes/dms/ss_dms_bufmgr.h" +#include "ddes/dms/ss_common_attr.h" const int PAGE_QUEUE_SLOT_MULTI_NBUFFERS = 5; @@ -85,8 +87,14 @@ void InitBufferPool(void) t_thrd.storage_cxt.BufferBlocks = (char *)CACHELINEALIGN(ShmemInitStruct("Buffer Blocks", buffer_size, &found_bufs)); #else - buffer_size = (TOTAL_BUFFER_NUM - NVM_BUFFER_NUM) * (Size)BLCKSZ; - t_thrd.storage_cxt.BufferBlocks = (char *)ShmemInitStruct("Buffer Blocks", buffer_size, &found_bufs); + if (ENABLE_DSS) { + buffer_size = (uint64)((TOTAL_BUFFER_NUM - NVM_BUFFER_NUM) * (Size)BLCKSZ + ALIGNOF_BUFFER); + t_thrd.storage_cxt.BufferBlocks = + (char *)BUFFERALIGN(ShmemInitStruct("Buffer Blocks", buffer_size, &found_bufs)); + } else { + buffer_size = (TOTAL_BUFFER_NUM - NVM_BUFFER_NUM) * (Size)BLCKSZ; + t_thrd.storage_cxt.BufferBlocks = (char *)ShmemInitStruct("Buffer Blocks", buffer_size, &found_bufs); + } #endif if (g_instance.attr.attr_storage.nvm_attr.enable_nvm) { @@ -165,6 +173,12 @@ void InitBufferPool(void) g_instance.bgwriter_cxt.rel_one_fork_hashtbl_lock = LWLockAssign(LWTRANCHE_UNLINK_REL_FORK_TBL); } + /* re-assign locks for un-reinited buffers, may delete this */ + if (SS_PERFORMING_SWITCHOVER) { + g_instance.bgwriter_cxt.rel_hashtbl_lock = LWLockAssign(LWTRANCHE_UNLINK_REL_TBL); + g_instance.bgwriter_cxt.rel_one_fork_hashtbl_lock = LWLockAssign(LWTRANCHE_UNLINK_REL_FORK_TBL); + } + /* Init other shared buffer-management stuff */ StrategyInitialize(!found_descs); @@ -173,6 +187,10 @@ void InitBufferPool(void) /* Initialize per-backend file flush context */ WritebackContextInit(t_thrd.storage_cxt.BackendWritebackContext, &u_sess->attr.attr_common.backend_flush_after); + + if (ENABLE_DMS) { + InitDmsBufCtrl(); + } } /* @@ -206,6 +224,11 @@ Size BufferShmemSize(void) /* size of candidate free map */ size = add_size(size, mul_size(TOTAL_BUFFER_NUM, sizeof(bool))); + /* size of dms buf ctrl and buffer align */ + if (ENABLE_DMS) { + size = add_size(size, mul_size(TOTAL_BUFFER_NUM, sizeof(dms_buf_ctrl_t))) + ALIGNOF_BUFFER + PG_CACHE_LINE_SIZE; + } + return size; } diff --git a/src/gausskernel/storage/buffer/bufmgr.cpp b/src/gausskernel/storage/buffer/bufmgr.cpp index 5a6e96fb4..864a7297f 100644 --- a/src/gausskernel/storage/buffer/bufmgr.cpp +++ b/src/gausskernel/storage/buffer/bufmgr.cpp @@ -82,6 +82,9 @@ #include "gstrace/storage_gstrace.h" #include "tsan_annotation.h" #include "tde_key_management/tde_key_storage.h" +#include "ddes/dms/ss_dms_bufmgr.h" +#include "ddes/dms/ss_common_attr.h" +#include "ddes/dms/ss_transaction.h" const int ONE_MILLISECOND = 1; const int TEN_MICROSECOND = 10; @@ -127,7 +130,7 @@ static bool ReadBuffer_common_ReadBlock(SMgrRelation smgr, char relpersistence, bool *need_repair); static Buffer ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit, const XLogPhyBlock *pblk); - +static void TerminateBufferIO_common(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits); /* * Return the PrivateRefCount entry for the passed buffer. It is searched @@ -708,7 +711,14 @@ static volatile BufferDesc *PageListBufferAlloc(SMgrRelation smgr, char relpersi /* Everything is fine, the buffer is ours, so break */ old_flags = buf_state & BUF_FLAG_MASK; if (BUF_STATE_GET_REFCOUNT(buf_state) == 1 && !(old_flags & BM_DIRTY) && !(old_flags & BM_IS_META)) { - break; + if (ENABLE_DMS && (old_flags & BM_TAG_VALID)) { + if (DmsReleaseOwner(buf->tag, buf->buf_id)) { + ClearReadHint(buf->buf_id, true); + break; + } + } else { + break; + } } /* @@ -2052,7 +2062,8 @@ static bool ReadBuffer_common_ReadBlock(SMgrRelation smgr, char relpersistence, blockNum, relpath(smgr->smgr_rnode, forkNum)))); return false; } - ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), + int elevel = ENABLE_DMS ? PANIC : ERROR; + ereport(elevel, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("invalid page in block %u of relation %s", blockNum, relpath(smgr->smgr_rnode, forkNum)))); } @@ -2065,6 +2076,85 @@ static bool ReadBuffer_common_ReadBlock(SMgrRelation smgr, char relpersistence, return needputtodirty; } +void ReadBuffer_common_for_check(ReadBufferMode readmode, BufferDesc* buf_desc, + const XLogPhyBlock *pblk, Block bufBlock) +{ + bool need_repair = false; + dms_buf_ctrl_t *buf_ctrl = GetDmsBufCtrl(buf_desc->buf_id); + BlockNumber blockNum = InvalidBlockNumber; + ForkNumber forkNum = buf_desc->tag.forkNum; + bool isExtend = (buf_ctrl->state & BUF_IS_EXTEND) ? true: false; + SMgrRelation smgr = smgropen(buf_desc->tag.rnode, InvalidBackendId); + blockNum = buf_desc->tag.blockNum; + char relpersistence = (buf_ctrl->state & BUF_IS_RELPERSISTENT)? 'p': 0; + + if (pblk != NULL) { + Assert(PhyBlockIsValid(*pblk)); + Assert(OidIsValid(pblk->relNode)); + ereport(DEBUG1, (errmsg("Reading SegPage databuffer %d with pblk%u-%u", + buf_ctrl->buf_id, pblk->relNode, pblk->block))); + (void)ReadBuffer_common_ReadBlock(smgr, relpersistence, forkNum, + blockNum, readmode, isExtend, bufBlock, pblk, &need_repair); + } else { + (void)ReadBuffer_common_ReadBlock(smgr, relpersistence, forkNum, + blockNum, readmode, isExtend, bufBlock, NULL, &need_repair); + } + if (need_repair) { + ereport(PANIC, (errmsg("[%d/%d/%d/%d %d-%d]need_repair.", + buf_desc->tag.rnode.spcNode, buf_desc->tag.rnode.dbNode, buf_desc->tag.rnode.relNode, + buf_desc->tag.rnode.bucketNode, buf_desc->tag.forkNum, buf_desc->tag.blockNum))); + } +} + +/* + * ReadBuffer read block for dms -- fast read block for dms + * + */ +Buffer ReadBuffer_common_for_dms(ReadBufferMode readmode, BufferDesc* buf_desc, const XLogPhyBlock *pblk) +{ + bool needputtodirty = false; + bool need_repair = false; + dms_buf_ctrl_t *buf_ctrl = GetDmsBufCtrl(buf_desc->buf_id); + BlockNumber blockNum = InvalidBlockNumber; + ForkNumber forkNum = buf_desc->tag.forkNum; + bool isExtend = (buf_ctrl->state & BUF_IS_EXTEND) ? true: false; + SMgrRelation smgr = smgropen(buf_desc->tag.rnode, InvalidBackendId); + blockNum = buf_desc->tag.blockNum; + char relpersistence = (buf_ctrl->state & BUF_IS_RELPERSISTENT)? 'p': 0; + Block bufBlock = BufHdrGetBlock(buf_desc); + + if (pblk != NULL) { + Assert(PhyBlockIsValid(*pblk)); + Assert(OidIsValid(pblk->relNode)); + ereport(DEBUG1, (errmsg("Reading SegPage databuffer %d with pblk%u-%u", + buf_ctrl->buf_id, pblk->relNode, pblk->block))); + needputtodirty = ReadBuffer_common_ReadBlock(smgr, relpersistence, forkNum, + blockNum, readmode, isExtend, bufBlock, pblk, &need_repair); + } else { + needputtodirty = ReadBuffer_common_ReadBlock(smgr, relpersistence, forkNum, + blockNum, readmode, isExtend, bufBlock, NULL, &need_repair); + } + if (need_repair) { + LWLockRelease(buf_desc->io_in_progress_lock); + UnpinBuffer(buf_desc, true); + AbortBufferIO(); + return InvalidBuffer; + } + + buf_desc->lsn_on_disk = PageGetLSN(bufBlock); +#ifdef USE_ASSERT_CHECKING + buf_desc->lsn_dirty = InvalidXLogRecPtr; +#endif + /* Set BM_VALID, terminate IO, and wake up any waiters */ + TerminateBufferIO(buf_desc, false, BM_VALID); + + t_thrd.vacuum_cxt.VacuumPageMiss++; + if (t_thrd.vacuum_cxt.VacuumCostActive) + t_thrd.vacuum_cxt.VacuumCostBalance += u_sess->attr.attr_storage.VacuumCostPageMiss; + + return BufferDescriptorGetBuffer(buf_desc); +} + static inline void BufferDescSetPBLK(BufferDesc *buf, const XLogPhyBlock *pblk) { if (pblk != NULL) { @@ -2151,11 +2241,15 @@ static Buffer ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumb } } +found_branch: /* At this point we do NOT hold any locks. * * if it was already in the buffer pool, we're done */ if (found) { + if (ENABLE_DMS) { + MarkReadPblk(bufHdr->buf_id, pblk); + } if (!isExtend) { /* Just need to update stats before we exit */ *hit = true; @@ -2174,7 +2268,12 @@ static Buffer ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumb */ if (!isLocalBuf) { if (mode == RBM_ZERO_AND_LOCK) { - LWLockAcquire(bufHdr->content_lock, LW_EXCLUSIVE); + if (ENABLE_DMS) { + GetDmsBufCtrl(bufHdr->buf_id)->state |= BUF_READ_MODE_ZERO_LOCK; + LockBuffer(BufferDescriptorGetBuffer(bufHdr), BUFFER_LOCK_EXCLUSIVE); + } else { + LWLockAcquire(bufHdr->content_lock, LW_EXCLUSIVE); + } /* * A corner case in segment-page storage: * a block is moved by segment space shrink, and its physical location is changed. But physical @@ -2185,10 +2284,13 @@ static Buffer ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumb */ BufferDescSetPBLK(bufHdr, pblk); } else if (mode == RBM_ZERO_AND_CLEANUP_LOCK) { + if (ENABLE_DMS) { + GetDmsBufCtrl(bufHdr->buf_id)->state |= BUF_READ_MODE_ZERO_LOCK; + } LockBufferForCleanup(BufferDescriptorGetBuffer(bufHdr)); } } - + return BufferDescriptorGetBuffer(bufHdr); } @@ -2248,6 +2350,55 @@ static Buffer ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumb } } + /* DMS: Try get page remote */ + if (ENABLE_DMS) { + MarkReadHint(bufHdr->buf_id, relpersistence, isExtend, pblk); + if (mode != RBM_FOR_REMOTE && relpersistence != RELPERSISTENCE_TEMP && !isLocalBuf) { + Assert(!(pg_atomic_read_u32(&bufHdr->state) & BM_VALID)); + + do { + bool startio; + if (LWLockHeldByMe(bufHdr->io_in_progress_lock)) { + startio = true; + } else { + startio = StartBufferIO(bufHdr, true); + } + + if (!startio) { + Assert(pg_atomic_read_u32(&bufHdr->state) & BM_VALID); + found = true; + goto found_branch; + } + + dms_buf_ctrl_t *buf_ctrl = GetDmsBufCtrl(bufHdr->buf_id); + LWLockMode req_lock_mode = isExtend ? LW_EXCLUSIVE : LW_SHARED; + if (!LockModeCompatible(buf_ctrl, req_lock_mode)) { + if (!StartReadPage(bufHdr, req_lock_mode)) { + TerminateBufferIO(bufHdr, false, 0); + // when reform fail, should return InvalidBuffer to reform proc thread + if (AmDmsReformProcProcess() && dms_reform_failed()) { + return InvalidBuffer; + } + + pg_usleep(5000L); + continue; + } + } else { + /* + * 1. previous attempts to read the buffer must have failed, + * but DRC has been created, so load page directly again + * 2. maybe we have failed previous, and try again in this loop + */ + buf_ctrl->state |= BUF_NEED_LOAD; + } + break; + }while (true); + + return TerminateReadPage(bufHdr, mode, pblk); + } + ClearReadHint(bufHdr->buf_id); + } + /* * if we have gotten to this point, we have allocated a buffer for the * page but its contents are not yet valid. IO_IN_PROGRESS is set for it, @@ -2597,6 +2748,7 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumbe } else { FlushBuffer(buf, NULL); } + LWLockRelease(buf->content_lock); ScheduleBufferTagForWriteback(t_thrd.storage_cxt.BackendWritebackContext, &buf->tag); @@ -2694,7 +2846,6 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumbe * Need to lock the buffer header too in order to change its tag. */ buf_state = LockBufHdr(buf); - /* * Somebody could have pinned or re-dirtied the buffer while we were * doing the I/O and making the new hashtable entry. If so, we can't @@ -2702,9 +2853,22 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumbe * over with a new victim buffer. */ old_flags = buf_state & BUF_FLAG_MASK; + if (BUF_STATE_GET_REFCOUNT(buf_state) == 1 && !(old_flags & BM_DIRTY) && !(old_flags & BM_IS_META)) { - break; + if (ENABLE_DMS && (old_flags & BM_TAG_VALID)) { + /* + * notify DMS to release drc owner. if failed, can't recycle this buffer. + * release owner procedure is in buf header lock, it's not reasonable, + * need to improve. + */ + if (DmsReleaseOwner(old_tag, buf->buf_id)) { + ClearReadHint(buf->buf_id, true); + break; + } + } else { + break; + } } UnlockBufHdr(buf, buf_state); @@ -2745,6 +2909,10 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumbe UnlockBufHdr(buf, buf_state); + if (ENABLE_DMS) { + GetDmsBufCtrl(buf->buf_id)->lock_mode = DMS_LOCK_NULL; + } + if (old_flags & BM_TAG_VALID) { BufTableDelete(&old_tag, old_hash); if (old_partition_lock != new_partition_lock) { @@ -2757,6 +2925,9 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumbe Assert(PhyBlockIsValid(*pblk)); buf->seg_fileno = pblk->relNode; buf->seg_blockno = pblk->block; + if (ENABLE_DMS) { + MarkReadPblk(buf->buf_id, pblk); + } } else { buf->seg_fileno = EXTENT_INVALID; buf->seg_blockno = InvalidBlockNumber; @@ -2856,6 +3027,24 @@ retry: goto retry; } + if (ENABLE_DMS && (buf_state & BM_TAG_VALID)) { + /* before release owner, request page again using X to ensure other node invalid page */ + if (SS_NORMAL_PRIMARY && GetDmsBufCtrl(buf->buf_id)->lock_mode != DMS_LOCK_EXCLUSIVE && + !(GetDmsBufCtrl(buf->buf_id)->state & BUF_IS_RELPERSISTENT_TEMP)) { + ereport(DEBUG1, (errmodule(MOD_DMS), errmsg("DMS master force invalidate other node's page"))); + (void)StartReadPage(buf, LW_EXCLUSIVE); + } + + if (!DmsReleaseOwner(buf->tag, buf->buf_id)) { + UnlockBufHdr(buf, buf_state); + LWLockRelease(old_partition_lock); + pg_usleep(5000); + goto retry; + } + + ClearReadHint(buf->buf_id, true); + } + /* remove from dirty page list */ if (ENABLE_INCRE_CKPT && (buf_state & BM_DIRTY)) { if (!XLogRecPtrIsInvalid(pg_atomic_read_u64(&buf->rec_lsn))) { @@ -4731,6 +4920,10 @@ void DropRelFileNodeShareBuffers(RelFileNode node, ForkNumber forkNum, BlockNumb else UnlockBufHdr(buf_desc, buf_state); } + + if (ENABLE_DMS && SS_PRIMARY_MODE) { + SSBCastDropRelRangeBuffer(node, forkNum, firstDelBlock); + } } /* @@ -5045,6 +5238,11 @@ void DropDatabaseBuffers(Oid dbid) UnlockBufHdr(buf_desc, buf_state); } } + + if (ENABLE_DMS && SS_PRIMARY_MODE) { + SSBCastDropDBAllBuffer(dbid); + } + gstrace_exit(GS_TRC_ID_DropDatabaseBuffers); } @@ -5313,6 +5511,10 @@ void IncrBufferRefCount(Buffer buffer) */ void MarkBufferDirtyHint(Buffer buffer, bool buffer_std) { + if (SS_STANDBY_MODE) { + return; + } + BufferDesc *buf_desc = NULL; Page page = BufferGetPage(buffer); @@ -5526,6 +5728,8 @@ void LockBuffer(Buffer buffer, int mode) if (dw_enabled() && t_thrd.storage_cxt.num_held_lwlocks > 0) { need_update_lockid = true; } + +retry: if (mode == BUFFER_LOCK_UNLOCK) { LWLockRelease(buf->content_lock); } else if (mode == BUFFER_LOCK_SHARE) { @@ -5535,6 +5739,38 @@ void LockBuffer(Buffer buffer, int mode) } else { ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), (errmsg("unrecognized buffer lock mode: %d", mode)))); } + + /* + * need to transfer newest page version by DMS. + */ + if (ENABLE_DMS && mode != BUFFER_LOCK_UNLOCK) { + LWLockMode lock_mode = (mode == BUFFER_LOCK_SHARE) ? LW_SHARED : LW_EXCLUSIVE; + Buffer tmp_buffer; + ReadBufferMode read_mode = RBM_NORMAL; + if (lock_mode == LW_EXCLUSIVE && (GetDmsBufCtrl(buffer - 1)->state & BUF_READ_MODE_ZERO_LOCK)) { + read_mode = RBM_ZERO_AND_LOCK; + GetDmsBufCtrl(buffer - 1)->state &= ~BUF_READ_MODE_ZERO_LOCK; + } + + if (IsSegmentBufferID(buf->buf_id)) { + tmp_buffer = DmsReadSegPage(buffer, lock_mode, read_mode); + } else { + tmp_buffer = DmsReadPage(buffer, lock_mode, read_mode); + } + + if (tmp_buffer == 0) { + /* failed to request newest page, release related locks, and retry */ + if (IsSegmentBufferID(buf->buf_id)) { + SegTerminateBufferIO((BufferDesc *)buf, false, 0); + } else { + TerminateBufferIO(buf, false, 0); + } + LWLockRelease(buf->content_lock); + + pg_usleep(5000L); + goto retry; + } + } } /* @@ -5557,17 +5793,39 @@ bool TryLockBuffer(Buffer buffer, int mode, bool must_wait) } volatile BufferDesc *buf = GetBufferDescriptor(buffer - 1); - + bool ret = false; if (mode == BUFFER_LOCK_SHARE) { - return LWLockConditionalAcquire(buf->content_lock, LW_SHARED); + ret = LWLockConditionalAcquire(buf->content_lock, LW_SHARED); } else if (mode == BUFFER_LOCK_EXCLUSIVE) { - return LWLockConditionalAcquire(buf->content_lock, LW_EXCLUSIVE); + ret = LWLockConditionalAcquire(buf->content_lock, LW_EXCLUSIVE); } else { ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), (errmsg("unrecognized buffer lock mode for TryLockBuffer: %d", mode)))); } - return false; + /* transfer newest page version by DMS */ + if (ENABLE_DMS && ret) { + LWLockMode lock_mode = (mode == BUFFER_LOCK_SHARE) ? LW_SHARED : LW_EXCLUSIVE; + Buffer tmp_buffer; + if (IsSegmentBufferID(buf->buf_id)) { + tmp_buffer = DmsReadSegPage(buffer, lock_mode, RBM_NORMAL); + } else { + tmp_buffer = DmsReadPage(buffer, lock_mode, RBM_NORMAL); + } + + if (tmp_buffer == 0) { + /* failed to request newest page, release related locks, and retry */ + if (IsSegmentBufferID(buf->buf_id)) { + SegTerminateBufferIO((BufferDesc *)buf, false, 0); + } else { + TerminateBufferIO(buf, false, 0); + } + LWLockRelease(buf->content_lock); + ret = false; + } + } + + return ret; } /* @@ -5586,7 +5844,31 @@ bool ConditionalLockBuffer(Buffer buffer) buf = GetBufferDescriptor(buffer - 1); - return LWLockConditionalAcquire(buf->content_lock, LW_EXCLUSIVE); +retry: + bool ret = LWLockConditionalAcquire(buf->content_lock, LW_EXCLUSIVE); + + if (ENABLE_DMS && ret) { + Buffer tmp_buffer; + if (IsSegmentBufferID(buf->buf_id)) { + tmp_buffer = DmsReadSegPage(buffer, LW_EXCLUSIVE, RBM_NORMAL); + } else { + tmp_buffer = DmsReadPage(buffer, LW_EXCLUSIVE, RBM_NORMAL); + } + + /* failed to request newest page, release related locks, and retry */ + if (tmp_buffer == 0) { + if (IsSegmentBufferID(buf->buf_id)) { + SegTerminateBufferIO((BufferDesc *)buf, false, 0); + } else { + TerminateBufferIO(buf, false, 0); + } + LWLockRelease(buf->content_lock); + + pg_usleep(5000L); + goto retry; + } + } + return ret; } /* @@ -6177,7 +6459,9 @@ void AbortBufferIO_common(BufferDesc *buf, bool isForInput) if (isForInput) { /* When reading we expect the buffer to be invalid but not dirty */ Assert(!(buf_state & BM_DIRTY)); - Assert(!(buf_state & BM_VALID)); + if (!ENABLE_DSS) { + Assert(!(buf_state & BM_VALID)); + } UnlockBufHdr(buf, buf_state); } else { /* When writing we expect the buffer to be valid and dirty */ diff --git a/src/gausskernel/storage/buffer/localbuf.cpp b/src/gausskernel/storage/buffer/localbuf.cpp index c14078840..29b2c0fd5 100644 --- a/src/gausskernel/storage/buffer/localbuf.cpp +++ b/src/gausskernel/storage/buffer/localbuf.cpp @@ -497,7 +497,8 @@ static Block GetLocalBufferStorage(void) num_bufs = Min((unsigned int)(num_bufs), MaxAllocSize / BLCKSZ); u_sess->storage_cxt.cur_block = - (char*)MemoryContextAlloc(u_sess->storage_cxt.LocalBufferContext, num_bufs * BLCKSZ); + (char*)BUFFERALIGN(MemoryContextAlloc(u_sess->storage_cxt.LocalBufferContext, + num_bufs * BLCKSZ + ALIGNOF_BUFFER)); u_sess->storage_cxt.next_buf_in_block = 0; u_sess->storage_cxt.num_bufs_in_block = num_bufs; } diff --git a/src/gausskernel/storage/cstore/cstore_am.cpp b/src/gausskernel/storage/cstore/cstore_am.cpp index 41cde3ce2..4dee7879a 100644 --- a/src/gausskernel/storage/cstore/cstore_am.cpp +++ b/src/gausskernel/storage/cstore/cstore_am.cpp @@ -47,6 +47,7 @@ #include "storage/cucache_mgr.h" #include "storage/cstore/cstore_compress.h" #include "storage/smgr/smgr.h" +#include "storage/file/fio_device.h" #include "access/heapam.h" #include "access/sysattr.h" #include "executor/instrument.h" diff --git a/src/gausskernel/storage/cstore/cstore_insert.cpp b/src/gausskernel/storage/cstore/cstore_insert.cpp index 7f7cbff9a..3a88deca9 100644 --- a/src/gausskernel/storage/cstore/cstore_insert.cpp +++ b/src/gausskernel/storage/cstore/cstore_insert.cpp @@ -2982,7 +2982,18 @@ int PartitionValueCache::InternalWrite(const char* buf, int len) void PartitionValueCache::FlushData() { if (likely(m_bufCursor > 0)) { - int retval = FilePWrite(m_fd, m_buffer, m_bufCursor, m_writeOffset); + int retval; + if (ENABLE_DSS) { + char *buffer_ori = (char*)palloc(BLCKSZ + m_bufCursor); + char *buffer_ali = (char*)BUFFERALIGN(buffer_ori); + errno_t rc = memcpy_s(buffer_ali, m_bufCursor, m_buffer, m_bufCursor); + securec_check(rc, "", ""); + retval = FilePWrite(m_fd, buffer_ali, m_bufCursor, (off_t)m_writeOffset); + pfree(buffer_ori); + buffer_ali = NULL; + } else { + retval = FilePWrite(m_fd, m_buffer, m_bufCursor, m_writeOffset); + } if (retval < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not write cache file \"%s\": %m", FilePathName(m_fd)))); m_writeOffset += retval; diff --git a/src/gausskernel/storage/cstore/custorage.cpp b/src/gausskernel/storage/cstore/custorage.cpp index d9964c5c3..4fdbd62d5 100755 --- a/src/gausskernel/storage/cstore/custorage.cpp +++ b/src/gausskernel/storage/cstore/custorage.cpp @@ -44,6 +44,9 @@ #include "utils/aiomem.h" #include "utils/plog.h" #include "securec_check.h" +#include "storage/dss/fio_dss.h" +#include "storage/file/fio_device.h" +#include "storage/vfd.h" /* * The max size for single data file @@ -222,20 +225,22 @@ void CUStorage::InitFileNamePrefix(_in_ const CFileNode& cFileNode) if (spcoid == GLOBALTABLESPACE_OID) { /* Shared system relations live in {datadir}/global */ Assert(dboid == 0); - pathlen = strlen("global") + 1 + OIDCHARS + 1 + strlen(attr_name) + 1; - rc = snprintf_s(m_fileNamePrefix, sizeof(m_fileNamePrefix), pathlen, "global/%u_%s", reloid, attr_name); + pathlen = strlen(GLOTBSDIR) + 1 + OIDCHARS + 1 + strlen(attr_name) + 1; + rc = snprintf_s(m_fileNamePrefix, sizeof(m_fileNamePrefix), pathlen, "%s/%u_%s", GLOTBSDIR, reloid, attr_name); securec_check_ss(rc, "", ""); } else if (spcoid == DEFAULTTABLESPACE_OID) { /* The default tablespace is {datadir}/base */ - pathlen = strlen("base") + 1 + OIDCHARS + 1 + OIDCHARS + 1 + strlen(attr_name) + 1; - rc = snprintf_s(m_fileNamePrefix, sizeof(m_fileNamePrefix), pathlen, "base/%u/%u_%s", dboid, reloid, attr_name); + pathlen = strlen(DEFTBSDIR) + 1 + OIDCHARS + 1 + OIDCHARS + 1 + strlen(attr_name) + 1; + rc = snprintf_s(m_fileNamePrefix, sizeof(m_fileNamePrefix), pathlen, "%s/%u/%u_%s", DEFTBSDIR, dboid, reloid, + attr_name); securec_check_ss(rc, "", ""); } else { /* All other tablespaces are accessed via symlinks */ rc = snprintf_s(m_fileNamePrefix, sizeof(m_fileNamePrefix), sizeof(m_fileNamePrefix) - 1, - "pg_tblspc/%u/%s_%s/%u/%u_%s", + "%s/%u/%s_%s/%u/%u_%s", + TBLSPCDIR, spcoid, TABLESPACE_VERSION_DIRECTORY, g_instance.attr.attr_common.PGXCNodeName, @@ -377,7 +382,32 @@ void CUStorage::SaveCU(char* write_buf, _in_ uint64 offset, _in_ int size, bool } Assert(m_fd != FILE_INVALID); - int writtenBytes = FilePWrite(m_fd, write_buf, write_size, writeOffset); + /* + * DSS pwrite does not allow file offset beyond the end of the file, + * so we need fallocate first. In extend situation, lock is already + * acquire in previous step. + */ + vfd *vfdcache = GetVfdCache(); + int fd = vfdcache[m_fd].fd; + if (is_dss_fd(fd)) { + off_t fileSize = dss_get_file_size(m_fileName); + if ((off_t)writeOffset > fileSize) { + (void)dss_fallocate_file(fd, 0, fileSize, (off_t)writeOffset - fileSize); + } + } + + int writtenBytes; + if (ENABLE_DSS) { + char *buffer_ori = (char*)palloc(BLCKSZ + write_size); + char *buffer_ali = (char*)BUFFERALIGN(buffer_ori); + rc = memcpy_s(buffer_ali, write_size, write_buf, write_size); + securec_check(rc, "", ""); + writtenBytes = FilePWrite(m_fd, buffer_ali, write_size, (off_t)writeOffset); + pfree(buffer_ori); + buffer_ali = NULL; + } else { + writtenBytes = FilePWrite(m_fd, write_buf, write_size, (off_t)writeOffset); + } if (writtenBytes != write_size) { int align_size = is_2byte_align ? ALIGNOF_TIMESERIES_CUSIZE : ALIGNOF_CUSIZE; SaveCUReportIOError(tmpFileName, writeOffset, writtenBytes, write_size, size, align_size); @@ -432,7 +462,18 @@ void CUStorage::OverwriteCU( } int nbytes = 0; - if ((nbytes = FilePWrite(m_fd, write_buf, write_size, writeOffset)) != write_size) { + if (ENABLE_DSS) { + char *buffer_ori = (char*)palloc(BLCKSZ + write_size); + char *buffer_ali = (char*)BUFFERALIGN(buffer_ori); + rc = memcpy_s(buffer_ali, write_size, write_buf, write_size); + securec_check(rc, "", ""); + nbytes = FilePWrite(m_fd, buffer_ali, write_size, writeOffset); + pfree(buffer_ori); + buffer_ali = NULL; + } else { + nbytes = FilePWrite(m_fd, write_buf, write_size, writeOffset); + } + if (nbytes != write_size) { // just warning ereport(WARNING, (errcode_for_file_access(), @@ -1088,7 +1129,7 @@ uint64 GetColDataFileSize(Relation rel, int attid) if (stat(pathname, &fst) < 0) { /* pathname file is not exist */ - if (errno == ENOENT) + if (FILE_POSSIBLY_DELETED(errno)) break; else ereport(ERROR, (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", pathname))); diff --git a/src/gausskernel/storage/dss/CMakeLists.txt b/src/gausskernel/storage/dss/CMakeLists.txt new file mode 100644 index 000000000..7e96cd061 --- /dev/null +++ b/src/gausskernel/storage/dss/CMakeLists.txt @@ -0,0 +1,11 @@ +#This is the main CMAKE for build bin. +AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} TGT_dss_SRC) + +set(TGT_dss_INC + ${PROJECT_SRC_DIR}/include +) + +set(dss_DEF_OPTIONS ${MACRO_OPTIONS}) +set(dss_COMPILE_OPTIONS ${OPTIMIZE_OPTIONS} ${OS_OPTIONS} ${PROTECT_OPTIONS} ${WARNING_OPTIONS} ${BIN_SECURE_OPTIONS} ${CHECK_OPTIONS}) +set(dss_LINK_OPTIONS ${BIN_LINK_OPTIONS}) +add_static_objtarget(gausskernel_storage_dss TGT_dss_SRC TGT_dss_INC "${dss_DEF_OPTIONS}" "${dss_COMPILE_OPTIONS}" "${dss_LINK_OPTIONS}") diff --git a/src/gausskernel/storage/dss/Makefile b/src/gausskernel/storage/dss/Makefile new file mode 100644 index 000000000..b8b04577c --- /dev/null +++ b/src/gausskernel/storage/dss/Makefile @@ -0,0 +1,39 @@ +# +# Copyright (c) 2022 Huawei Technologies Co.,Ltd. +# +# openGauss is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# --------------------------------------------------------------------------------------- +# +# Makefile +# Makefile for storage/dss +# +# IDENTIFICATION +# src/gausskernel/storage/dss/Makefile +# +# --------------------------------------------------------------------------------------- + +subdir = src/gausskernel/storage/dss +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global + +override CPPFLAGS := $(filter-out -fPIE, $(CPPFLAGS)) -fPIC + +ifneq "$(MAKECMDGOALS)" "clean" + ifneq "$(MAKECMDGOALS)" "distclean" + ifneq "$(shell which g++ |grep hutaf_llt |wc -l)" "1" + -include $(DEPEND) + endif + endif +endif +OBJS = dss_adaptor.o dss_log.o fio_dss.o + +include $(top_srcdir)/src/gausskernel/common.mk diff --git a/src/gausskernel/storage/dss/dss_adaptor.cpp b/src/gausskernel/storage/dss/dss_adaptor.cpp new file mode 100644 index 000000000..dc0e7ba75 --- /dev/null +++ b/src/gausskernel/storage/dss/dss_adaptor.cpp @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2022 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * dss_adaptor.cpp + * DSS Adapter Interface. + * + * + * IDENTIFICATION + * src/gausskernel/storage/dss/dss_adaptor.cpp + * + * --------------------------------------------------------------------------------------- + */ + +#ifndef WIN32 +#include "dlfcn.h" +#endif + +#include "storage/dss/dss_adaptor.h" +#include "storage/dss/fio_dss.h" +#include "utils/elog.h" + +dss_device_op_t device_op = {0}; + +// return DSS_ERROR if error occurs +#define SS_RETURN_IFERR(ret) \ + do { \ + int _status_ = (ret); \ + if (_status_ != DSS_SUCCESS) { \ + return _status_; \ + } \ + } while (0) + +int dss_load_symbol(void *lib_handle, char *symbol, void **sym_lib_handle) +{ +#ifndef WIN32 + const char *dlsym_err = NULL; + + *sym_lib_handle = dlsym(lib_handle, symbol); + dlsym_err = dlerror(); + if (dlsym_err != NULL) { + return DSS_ERROR; + } + return DSS_SUCCESS; +#endif // !WIN32 +} + +int dss_open_dl(void **lib_handle, char *symbol) +{ +#ifdef WIN32 + return DSS_ERROR; +#else + *lib_handle = dlopen(symbol, RTLD_LAZY); + if (*lib_handle == NULL) { + return DSS_ERROR; + } + return DSS_SUCCESS; +#endif +} + +void dss_close_dl(void *lib_handle) +{ +#ifndef WIN32 + (void)dlclose(lib_handle); +#endif +} + +static int dss_get_lib_version() +{ + return device_op.dss_get_version(); +} + +static int dss_get_my_version() +{ + return DSS_LOCAL_MAJOR_VERSION * DSS_LOCAL_MAJOR_VER_WEIGHT + DSS_LOCAL_MINOR_VERSION * DSS_LOCAL_MINOR_VER_WEIGHT + + DSS_LOCAL_VERSION; +} + +int dss_device_init(const char *conn_path, bool enable_dss) +{ + if (!enable_dss) { + // not enable dss, just return success + return DSS_SUCCESS; + } + SS_RETURN_IFERR(dss_open_dl(&device_op.handle, (char *)SS_LIBDSS_NAME)); + + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_fcreate", (void **)&device_op.dss_create)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_fclose", (void **)&device_op.dss_close)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_fread", (void **)&device_op.dss_read)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_pread", (void **)&device_op.dss_pread)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_fopen", (void **)&device_op.dss_open)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_fremove", (void **)&device_op.dss_remove)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_fseek", (void **)&device_op.dss_seek)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_ftruncate", (void **)&device_op.dss_truncate)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_fwrite", (void **)&device_op.dss_write)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_pwrite", (void **)&device_op.dss_pwrite)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_fexist", (void **)&device_op.dss_exist)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_dmake", (void **)&device_op.dss_create_dir)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_dexist", (void **)&device_op.dss_exist_dir)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_dopen", (void **)&device_op.dss_open_dir)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_dread", (void **)&device_op.dss_read_dir)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_dclose", (void **)&device_op.dss_close_dir)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_dremove", (void **)&device_op.dss_remove_dir)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_frename", (void **)&device_op.dss_rename)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_check_size", (void **)&device_op.dss_check_size)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_align_size", (void **)&device_op.dss_align_size)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_fsize_maxwr", (void **)&device_op.dss_fsize)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_get_fname", (void **)&device_op.dss_fname)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_get_error", (void **)&device_op.dss_get_error)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_symlink", (void **)&device_op.dss_link)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_unlink", (void **)&device_op.dss_unlink)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_islink", (void **)&device_op.dss_exist_link)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_readlink", (void **)&device_op.dss_read_link)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_stat", (void **)&device_op.dss_stat)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_lstat", (void **)&device_op.dss_lstat)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_fstat", (void **)&device_op.dss_fstat)); + SS_RETURN_IFERR( + dss_load_symbol(device_op.handle, "dss_set_server_status", (void **)&device_op.dss_set_server_status)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_set_svr_path", (void **)&device_op.dss_set_svr_path)); + SS_RETURN_IFERR( + dss_load_symbol(device_op.handle, "dss_register_log_callback", (void **)&device_op.dss_register_log_callback)); + SS_RETURN_IFERR(dss_load_symbol(device_op.handle, "dss_get_lib_version", (void **)&device_op.dss_get_version)); + + int my_version = dss_get_my_version(); + int lib_version = dss_get_lib_version(); + if (my_version != lib_version) { + return DSS_ERROR; + } + + if (device_op.handle == NULL) { + return DSS_ERROR; + } + + dss_device_register(&device_op, enable_dss); + if (conn_path != NULL) { + device_op.dss_set_svr_path(conn_path); + } + + return DSS_SUCCESS; +} + +void dss_register_log_callback(dss_log_output cb_log_output) +{ + device_op.dss_register_log_callback(cb_log_output); +} \ No newline at end of file diff --git a/src/gausskernel/storage/dss/dss_log.cpp b/src/gausskernel/storage/dss/dss_log.cpp new file mode 100644 index 000000000..bd970508c --- /dev/null +++ b/src/gausskernel/storage/dss/dss_log.cpp @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2022 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * dss_log.cpp + * write log for dss + * + * + * IDENTIFICATION + * src/gausskernel/storage/dss/dss_log.cpp + * + * --------------------------------------------------------------------------------------- + */ + +#include "utils/palloc.h" +#include "utils/elog.h" +#include "utils/memutils.h" +#include "knl/knl_thread.h" +#include "storage/dss/dss_api_def.h" +#include "storage/dss/dss_log.h" +#include "storage/dss/dss_adaptor.h" + +static void dss_log_report(uint32 dss_log_level, const char *code_file_name, uint32 code_line_num, const char buf[]) +{ + int saved_log_output = t_thrd.postgres_cxt.whereToSendOutput; + if (t_thrd.role == WORKER || t_thrd.role == THREADPOOL_WORKER) { + t_thrd.postgres_cxt.whereToSendOutput = (int)DestNone; + } + int32 log_level; + switch (dss_log_level) { + /* In view of the differences between DSSAPI and glibc, we record the DSS_ERROR as WARNING */ + case LOG_RUN_ERR_LEVEL: + case LOG_DEBUG_ERR_LEVEL: + log_level = WARNING; + break; + case LOG_RUN_WAR_LEVEL: + case LOG_DEBUG_WAR_LEVEL: + log_level = WARNING; + break; + case LOG_RUN_INF_LEVEL: + case LOG_DEBUG_INF_LEVEL: + log_level = ENABLE_SS_LOG ? LOG : DEBUG1; + break; + default: + log_level = DEBUG1; // it will be DEBUG level later. + break; + } + + ereport(log_level, + (errmodule(MOD_DSS), + errmsg("%s:%u %s", code_file_name, code_line_num, buf))); + if (t_thrd.role == WORKER || t_thrd.role == THREADPOOL_WORKER) { + t_thrd.postgres_cxt.whereToSendOutput = saved_log_output; + } +} + +static int32 dss_log_check(dss_log_id_t dss_log_id, dss_log_level_t dss_log_level, uint32 *log_level) +{ + static uint32 db_log_map[DSS_LOG_ID_COUNT][DSS_LOG_LEVEL_COUNT] = { + {LOG_RUN_ERR_LEVEL, LOG_RUN_WAR_LEVEL, LOG_RUN_INF_LEVEL}, + {LOG_DEBUG_ERR_LEVEL, LOG_DEBUG_WAR_LEVEL, LOG_DEBUG_INF_LEVEL} + }; + + if (dss_log_id >= DSS_LOG_ID_COUNT || dss_log_level >= DSS_LOG_LEVEL_COUNT) { + return -1; + } + + *log_level = db_log_map[dss_log_id][dss_log_level]; + + return 0; +} + +static void dss_write_normal_log(dss_log_id_t dss_log_id, dss_log_level_t dss_log_level, const char *code_file_name, + uint32 code_line_num, const char *module_name, const char *format, ...) +{ + int32 errcode; + uint32 log_level; + const char *last_file = NULL; + + int32 ret = dss_log_check(dss_log_id, dss_log_level, &log_level); + if (ret == -1) { + return; + } + +#ifdef WIN32 + last_file = strrchr(code_file_name, '\\'); +#else + last_file = strrchr(code_file_name, '/'); +#endif + if (last_file == NULL) { + last_file = code_file_name; + } else { + last_file++; + } + + va_list args; + va_start(args, format); + char buf[DMS_LOGGER_BUFFER_SIZE]; + errcode = vsnprintf_s(buf, DMS_LOGGER_BUFFER_SIZE, DMS_LOGGER_BUFFER_SIZE, format, args); + if (errcode < 0) { + va_end(args); + return; + } + va_end(args); + + int saveInterruptHoldoffCount = (int)t_thrd.int_cxt.InterruptHoldoffCount; + MemoryContext old_context = MemoryContextSwitchTo(ErrorContext); + PG_TRY(); + { + dss_log_report(log_level, last_file, code_line_num, buf); + } + PG_CATCH(); + { + t_thrd.int_cxt.InterruptHoldoffCount = (uint32)saveInterruptHoldoffCount; + if (t_thrd.role == DMS_WORKER) { + FlushErrorState(); + } + } + PG_END_TRY(); + (void)MemoryContextSwitchTo(old_context); +} + +void dss_log_init(void) +{ + dss_register_log_callback(dss_write_normal_log); +} \ No newline at end of file diff --git a/src/gausskernel/storage/dss/fio_dss.cpp b/src/gausskernel/storage/dss/fio_dss.cpp new file mode 100644 index 000000000..56afa05b3 --- /dev/null +++ b/src/gausskernel/storage/dss/fio_dss.cpp @@ -0,0 +1,710 @@ +/* + * Copyright (c) 2022 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * fio_dss.cpp + * DSS File System Adapter Interface. + * + * + * IDENTIFICATION + * src/gausskernel/storage/dss/fio_dss.cpp + * + * --------------------------------------------------------------------------------------- + */ + +#include +#include +#include +#include +#include "securec.h" +#include "securec_check.h" +#include "storage/file/fio_device.h" + +static char zero_area[FILE_EXTEND_STEP_SIZE + ALIGNOF_BUFFER] = { 0 }; + +void dss_set_errno(int *errcode); + +ssize_t buffer_align(char **unalign_buff, char **buff, size_t size); +ssize_t dss_align_read(int handle, void *buf, size_t size, off_t offset, bool use_p); +int dss_pwrite_file_by_zero(int handle, off_t offset, off_t len); +int dss_get_file_name(int handle, char *fname, size_t fname_size); + +// interface for register raw device callback function +dss_device_op_t g_dss_device_op; +bool g_enable_dss = false; + +/* Xlog Segment Size */ +uint64 XLogSegmentSize = XLOG_SEG_SIZE; + +void dss_device_register(dss_device_op_t *dss_device_op, bool enable_dss) +{ + g_dss_device_op = *dss_device_op; + g_enable_dss = enable_dss; +} + +bool is_dss_file(const char *name) +{ + if (g_enable_dss) { + return (name[0] == '+') ? true : false; + } + + return false; +} + +bool is_dss_file_dec(FILE *stream) +{ + DSS_STREAM *dss_stream = (DSS_STREAM *)stream; + if (dss_stream->magic_head == DSS_MAGIC_NUMBER) { + return true; + } + return false; +} + +bool is_dss_fd(int handle) +{ + if (handle >= (int)DSS_HANDLE_BASE) { + return true; + } + + return false; +} + +void dss_set_errno(int *errcode) +{ + int errorcode = 0; + const char *errormsg = NULL; + + g_dss_device_op.dss_get_error(&errorcode, &errormsg); + errno = errorcode; + + if (errcode != NULL) { + *errcode = errorcode; + } +} + +bool dss_exist_file(const char *file_name) +{ + bool result = false; + if (g_dss_device_op.dss_exist(file_name, &result) != DSS_SUCCESS) { + dss_set_errno(NULL); + return false; + } + return result; +} + +int dss_access_file(const char *file_name, int mode) +{ + struct stat statbuf = {0}; + return dss_stat_file(file_name, &statbuf); +} + +int dss_create_dir(const char *name, mode_t mode) +{ + if (g_dss_device_op.dss_create_dir(name) != DSS_SUCCESS) { + dss_set_errno(NULL); + return GS_ERROR; + } + return GS_SUCCESS; +} + +bool dss_exist_dir(const char *name) +{ + bool result = false; + if (g_dss_device_op.dss_exist_dir(name, &result) != DSS_SUCCESS) { + dss_set_errno(NULL); + return false; + } + return result; +} + +int dss_open_dir(const char *name, DIR **dir_handle) +{ + DSS_DIR *dss_dir = NULL; + + /* dss_dir_t will be free in dss_close_dir */ + dss_dir = (DSS_DIR*)malloc(sizeof(DSS_DIR)); + dss_dir->dir_handle = g_dss_device_op.dss_open_dir(name); + if (dss_dir->dir_handle == NULL) { + dss_set_errno(NULL); + free(dss_dir); + return GS_ERROR; + } + + dss_dir->magic_head = DSS_MAGIC_NUMBER; + *dir_handle = (DIR*)dss_dir; + return GS_SUCCESS; +} + +int dss_read_dir(DIR *dir_handle, struct dirent **result) +{ + dss_dirent_t dirent_t; + dss_dir_item_t item_t; + errno_t rc; + DSS_DIR *dss_dir = (DSS_DIR*)dir_handle; + + *result = NULL; + if (g_dss_device_op.dss_read_dir(dss_dir->dir_handle, &dirent_t, &item_t) != DSS_SUCCESS) { + dss_set_errno(NULL); + return GS_ERROR; + } + + if (item_t == NULL) { + dss_set_errno(NULL); + return GS_SUCCESS; + } + + rc = strcpy_s(dss_dir->filename, MAX_FILE_NAME_LEN, dirent_t.d_name); + securec_check_c(rc, "\0", "\0"); + rc = strcpy_s(dss_dir->ret.d_name, MAX_FILE_NAME_LEN, dirent_t.d_name); + securec_check_c(rc, "\0", "\0"); + + *result = &dss_dir->ret; + + return GS_SUCCESS; +} + +int dss_close_dir(DIR *dir_handle) +{ + DSS_DIR *dss_dir_t = (DSS_DIR*)dir_handle; + int result = GS_SUCCESS; + + if (g_dss_device_op.dss_close_dir(dss_dir_t->dir_handle) != DSS_SUCCESS) { + dss_set_errno(NULL); + result = GS_ERROR; + } + free(dss_dir_t); + return result; +} + +int dss_remove_dir(const char *name) +{ + if (g_dss_device_op.dss_remove_dir(name) != DSS_SUCCESS) { + dss_set_errno(NULL); + return GS_ERROR; + } + return GS_SUCCESS; +} + +int dss_rename_file(const char *src, const char *dst) +{ + if (g_dss_device_op.dss_rename(src, dst) != DSS_SUCCESS) { + dss_set_errno(NULL); + return GS_ERROR; + } + return GS_SUCCESS; +} + +int dss_remove_file(const char *name) +{ + if (g_dss_device_op.dss_remove(name) != DSS_SUCCESS) { + dss_set_errno(NULL); + return GS_ERROR; + } + return GS_SUCCESS; +} + +int dss_open_file(const char *name, int flags, mode_t mode, int *handle) +{ + if ((flags & O_CREAT) != 0 && !dss_exist_file(name)) { + // file not exists, create it first. + if (g_dss_device_op.dss_create(name, flags) != DSS_SUCCESS) { + dss_set_errno(NULL); + return GS_ERROR; + } + } + + if (g_dss_device_op.dss_open(name, flags, handle) != DSS_SUCCESS) { + *handle = -1; + dss_set_errno(NULL); + return GS_ERROR; + } + return GS_SUCCESS; +} + +int dss_fopen_file(const char *name, const char* mode, FILE **stream) +{ + int openmode = 0; + int handle = -1; + off_t fsize = INVALID_DEVICE_SIZE; + DSS_STREAM *dss_fstream = NULL; + + // check open mode + if (strstr(mode, "r+")) { + openmode |= O_RDWR; + } else if (strchr(mode, 'r')) { + openmode |= O_RDONLY; + } + + if (strstr(mode, "w+")) { + openmode |= O_RDWR | O_CREAT | O_TRUNC; + } else if (strchr(mode, 'w')) { + openmode |= O_WRONLY | O_CREAT | O_TRUNC; + } + + if (strstr(mode, "a+")) { + openmode |= O_RDWR | O_CREAT | O_APPEND; + } else if (strchr(mode, 'a')) { + openmode |= O_WRONLY | O_CREAT | O_APPEND; + } + + // get handle and fsize of open file + if (dss_open_file(name, openmode, 0, &handle) != GS_SUCCESS) { + return GS_ERROR; + } + + if ((fsize = dss_get_file_size(name)) == INVALID_DEVICE_SIZE) { + return GS_ERROR; + } + + // init dss stream handle + dss_fstream = (DSS_STREAM*)malloc(sizeof(DSS_STREAM)); + dss_fstream->fsize = fsize; + dss_fstream->errcode = 0; + dss_fstream->handle = handle; + + dss_fstream->magic_head = DSS_MAGIC_NUMBER; + *stream = (FILE*)dss_fstream; + return GS_SUCCESS; +} + +int dss_close_file(int handle) +{ + if (g_dss_device_op.dss_close(handle) != DSS_SUCCESS) { + dss_set_errno(NULL); + return GS_ERROR; + } + return GS_SUCCESS; +} + +int dss_fclose_file(FILE *stream) +{ + // set errcode of stream in close is meaningless + int status = dss_close_file(dss_fileno(stream)); + free(stream); + return status; +} + +ssize_t dss_read_file(int handle, void *buf, size_t size) +{ + return dss_align_read(handle, buf, size, -1, false); +} + +ssize_t dss_pread_file(int handle, void *buf, size_t size, off_t offset) +{ + return dss_align_read(handle, buf, size, offset, true); +} + +ssize_t dss_align_read(int handle, void *buf, size_t size, off_t offset, bool use_p) +{ + size_t newSize = size; + char* unalign_buff = NULL; + char* buff = NULL; + bool address_align = false; + int ret = DSS_ERROR; + int r_size = 0; + + if ((((uint64)buf) % ALIGNOF_BUFFER) == 0) { + address_align = true; + } + + if ((!address_align) || ((size % ALIGNOF_BUFFER) != 0)) { + newSize = buffer_align(&unalign_buff, &buff, size); + } else { + buff = (char*)buf; + } + + if (use_p) { + ret = g_dss_device_op.dss_pread(handle, buff, newSize, offset, &r_size); + } else { + ret = g_dss_device_op.dss_read(handle, buff, newSize, &r_size); + } + + if (ret != DSS_SUCCESS) { + dss_set_errno(NULL); + if (unalign_buff != NULL) { + free(unalign_buff); + } + return -1; + } + + if (unalign_buff != NULL) { + int move = (int)size - (int)newSize; + errno_t rc = memcpy_s(buf, size, buff, size); + securec_check_c(rc, "\0", "\0"); + free(unalign_buff); + // change current access position to correct point + if (move < 0 && dss_seek_file(handle, move, SEEK_CUR) < 0) { + return -1; + } + } + + return (((ssize_t)(r_size)) < ((ssize_t)(size)) ? ((ssize_t)(r_size)) : ((ssize_t)(size))); +} + +size_t dss_fread_file(void *buf, size_t size, size_t nmemb, FILE *stream) +{ + ssize_t r_size = 0; + DSS_STREAM *dss_fstream = (DSS_STREAM*)stream; + if ((r_size = dss_align_read(dss_fstream->handle, buf, size * nmemb, -1, false)) == -1) { + dss_set_errno(&dss_fstream->errcode); + return 0; + } + return (size_t)r_size; +} + +ssize_t dss_write_file(int handle, const void *buf, size_t size) +{ + if (g_dss_device_op.dss_write(handle, buf, size) != DSS_SUCCESS) { + dss_set_errno(NULL); + return -1; + } + return (ssize_t)size; +} + +ssize_t dss_pwrite_file(int handle, const void *buf, size_t size, off_t offset) +{ + if (g_dss_device_op.dss_pwrite(handle, buf, size, offset) != DSS_SUCCESS) { + dss_set_errno(NULL); + return -1; + } + return (ssize_t)size; +} + +size_t dss_fwrite_file(const void *buf, size_t size, size_t count, FILE *stream) +{ + DSS_STREAM *dss_fstream = (DSS_STREAM*)stream; + if (g_dss_device_op.dss_write(dss_fstream->handle, buf, size * count) != DSS_SUCCESS) { + dss_set_errno(&dss_fstream->errcode); + return (size_t)-1; + } + return count; +} + +off_t dss_seek_file(int handle, off_t offset, int origin) +{ + if (origin == SEEK_END) { + origin = DSS_SEEK_MAXWR; + } + off_t size = (off_t)g_dss_device_op.dss_seek(handle, offset, origin); + if (size == -1) { + dss_set_errno(NULL); + } + return size; +} + +int dss_fseek_file(FILE *stream, long offset, int whence) +{ + if (whence == SEEK_END) { + whence = DSS_SEEK_MAXWR; + } + DSS_STREAM *dss_fstream = (DSS_STREAM*)stream; + off_t size = (off_t)g_dss_device_op.dss_seek(dss_fstream->handle, offset, whence); + if (size == -1) { + dss_set_errno(&dss_fstream->errcode); + return -1; + } + return (int)size; +} + +long dss_ftell_file(FILE *stream) +{ + DSS_STREAM *dss_fstream = (DSS_STREAM*)stream; + off_t size = (off_t)g_dss_device_op.dss_seek(dss_fstream->handle, 0, SEEK_CUR); + if (size == -1) { + dss_set_errno(&dss_fstream->errcode); + } + return size; +} + +void dss_rewind_file(FILE *stream) +{ + DSS_STREAM *dss_fstream = (DSS_STREAM*)stream; + off_t size = (off_t)g_dss_device_op.dss_seek(dss_fstream->handle, 0, SEEK_SET); + if (size == -1) { + dss_set_errno(&dss_fstream->errcode); + } +} + +int dss_fflush_file(FILE *stream) +{ + /* nothing to do, because DSS will enable O_SYNC and O_DIRECT for all IO */ + return GS_SUCCESS; +} + +int dss_sync_file(int handle) +{ + /* nothing to do, because DSS will enable O_SYNC and O_DIRECT for all IO */ + return GS_SUCCESS; +} + +int dss_truncate_file(int handle, off_t keep_size) +{ + /* not guarantee fill zero */ + if (g_dss_device_op.dss_truncate(handle, keep_size) != DSS_SUCCESS) { + dss_set_errno(NULL); + return GS_ERROR; + } + return GS_SUCCESS; +} + +int dss_ftruncate_file(FILE *stream, off_t keep_size) +{ + /* file stream do not have truncate function, so it's ok we do not set errcode in stream */ + return dss_truncate_file(dss_fileno(stream), keep_size); +} + +int dss_get_file_name(int handle, char *fname, size_t fname_size) +{ + if (g_dss_device_op.dss_fname(handle, fname, fname_size) != DSS_SUCCESS) { + dss_set_errno(NULL); + return GS_ERROR; + } + return GS_SUCCESS; +} + +off_t dss_get_file_size(const char *fname) +{ + off_t fsize = INVALID_DEVICE_SIZE; + + g_dss_device_op.dss_fsize(fname, &fsize); + if (fsize == INVALID_DEVICE_SIZE) { + dss_set_errno(NULL); + } + + return fsize; +} + +int dss_fallocate_file(int handle, int mode, off_t offset, off_t len) +{ + return dss_pwrite_file_by_zero(handle, offset, len); +} + +int dss_link(const char *src, const char *dst) +{ + if (g_dss_device_op.dss_link(src, dst) != DSS_SUCCESS) { + dss_set_errno(NULL); + return GS_ERROR; + } + return GS_SUCCESS; +} + +int dss_unlink_target(const char *name) +{ + if (g_dss_device_op.dss_unlink(name) != DSS_SUCCESS) { + dss_set_errno(NULL); + return GS_ERROR; + } + return GS_SUCCESS; +} + +bool dss_exist_link(const char *name) +{ + bool result = false; + if (g_dss_device_op.dss_exist_link(name, &result) != DSS_SUCCESS) { + dss_set_errno(NULL); + return false; + } + return result; +} + +ssize_t dss_read_link(const char *path, char *buf, size_t buf_size) +{ + ssize_t result = (ssize_t)g_dss_device_op.dss_read_link(path, buf, buf_size); + if (result == -1) { + dss_set_errno(NULL); + } + return result; +} + +int dss_setvbuf(FILE *stream, char *buf, int mode, size_t size) +{ + /* nothing to do in dss mode */ + return 0; +} + +int dss_feof(FILE *stream) +{ + DSS_STREAM *dss_fstream = (DSS_STREAM*)stream; + if (dss_ftell_file(stream) < dss_fstream->fsize) { + return 0; + } + return 1; +} + +int dss_ferror(FILE *stream) +{ + DSS_STREAM *dss_fstream = (DSS_STREAM*)stream; + if (dss_fstream->errcode) { + return 1; + } + return 0; +} + +int dss_fileno(FILE *stream) +{ + DSS_STREAM *dss_fstream = (DSS_STREAM*)stream; + return dss_fstream->handle; +} + +int dss_stat_file(const char *path, struct stat *buf) +{ + dss_stat_t st; + if (g_dss_device_op.dss_stat(path, &st) != DSS_SUCCESS) { + dss_set_errno(NULL); + return -1; + } + + // file type and mode + switch (st.type) { + case DSS_PATH: + buf->st_mode = S_IFDIR; + break; + case DSS_FILE: + buf->st_mode = S_IFREG; + break; + case DSS_LINK: + /* fall-through */ + default: + return -1; + } + // total size, in bytes + buf->st_size = (long)st.written_size; + // time of last modification + buf->st_mtime = st.update_time; + + return 0; +} + +int dss_fstat_file(int handle, struct stat *buf) +{ + dss_stat_t st; + if (g_dss_device_op.dss_fstat(handle, &st) != DSS_SUCCESS) { + dss_set_errno(NULL); + return -1; + } + + // file type and mode + switch (st.type) { + case DSS_PATH: + buf->st_mode = S_IFDIR; + break; + case DSS_FILE: + buf->st_mode = S_IFREG; + break; + case DSS_LINK: + /* fall-through */ + default: + return -1; + } + // total size, in bytes + buf->st_size = (long)st.written_size; + // time of last modification + buf->st_mtime = st.update_time; + + return 0; +} + +// return information of link itself when path is link +int dss_lstat_file(const char *path, struct stat *buf) +{ + dss_stat_t st; + if (g_dss_device_op.dss_lstat(path, &st) != DSS_SUCCESS) { + dss_set_errno(NULL); + return -1; + } + + // file type and mode + switch (st.type) { + case DSS_PATH: + buf->st_mode = S_IFDIR; + break; + case DSS_FILE: + buf->st_mode = S_IFREG; + break; + case DSS_LINK: + buf->st_mode = S_IFLNK; + break; + default: + return -1; + } + // total size, in bytes + buf->st_size = (long)st.written_size; + // time of last modification + buf->st_mtime = st.update_time; + + return 0; +} + +int dss_chmod_file(const char* path, mode_t mode) +{ + // dss do not have mode + return 0; +} + +ssize_t buffer_align(char **unalign_buff, char **buff, size_t size) +{ + size_t newSize = size; + size_t size_mod = ALIGNOF_BUFFER - (newSize % ALIGNOF_BUFFER); + size_t size_move = 0; + + if ((size % ALIGNOF_BUFFER) != 0) { + newSize = BUFFERALIGN(size); + size_move += size_mod; + } + + size_move += ALIGNOF_BUFFER; + + *unalign_buff = (char*)malloc(size + size_move); + *buff = (char*)BUFFERALIGN(*unalign_buff); + + return (ssize_t)newSize; +} + +int dss_pwrite_file_by_zero(int handle, off_t offset, off_t len) +{ + char *zero_area_aligned = (char *)(((uintptr_t)zero_area + ALIGNOF_BUFFER - 1) & (~(ALIGNOF_BUFFER - 1))); + off_t remain_size = len; + ssize_t write_size; + while (remain_size > 0) { + write_size = (remain_size > FILE_EXTEND_STEP_SIZE) ? FILE_EXTEND_STEP_SIZE : (ssize_t)remain_size; + if (dss_pwrite_file(handle, zero_area_aligned, write_size, offset) != write_size) { + return GS_ERROR; + } + + offset += write_size; + remain_size -= write_size; + } + + return GS_SUCCESS; +} + +int dss_set_server_status_wrapper(bool is_master) +{ + if (is_master) { + return g_dss_device_op.dss_set_server_status(DSS_STATUS_READWRITE); + } else { + return g_dss_device_op.dss_set_server_status(DSS_STATUS_READONLY); + } +} + +int dss_remove_dev(const char *name) +{ + if (dss_exist_file(name)) { + return dss_remove_file(name); + } else if (dss_exist_link(name)) { + return dss_unlink_target(name); + } else { + return GS_SUCCESS; + } +} \ No newline at end of file diff --git a/src/gausskernel/storage/file/copydir.cpp b/src/gausskernel/storage/file/copydir.cpp index ef531f002..5f5f3a3ca 100644 --- a/src/gausskernel/storage/file/copydir.cpp +++ b/src/gausskernel/storage/file/copydir.cpp @@ -28,6 +28,7 @@ #include "storage/smgr/segment.h" #include "miscadmin.h" #include "pgstat.h" +#include "storage/file/fio_device.h" /* * On Windows, call non-macro versions of palloc; we can't reference @@ -56,7 +57,8 @@ bool copydir(char* fromdir, char* todir, bool recurse, int elevel) char fromfile[MAXPGPATH]; char tofile[MAXPGPATH]; - if (mkdir(todir, S_IRWXU) != 0 && !(errno == EEXIST && IsRoachRestore())) { + if (mkdir(todir, S_IRWXU) != 0 && + !(FILE_ALREADY_EXIST(errno) && IsRoachRestore())) { ereport(ERROR, (errcode_for_file_access(), errmsg("could not create directory \"%s\": %m", todir))); } @@ -152,12 +154,29 @@ void copy_file_internal(char* fromfile, char* tofile, bool trunc_file) int dstflag; int nbytes; off_t offset; + int buf_size; + char* unalign_buffer = NULL; + int align_nbytes; /* Use palloc to ensure we get a maxaligned buffer */ #define COPY_BUF_SIZE (8 * BLCKSZ) + /* DSS needs large buffer to speed up */ +#define COPY_BUF_SIZE_FOR_DSS (2 * 1024 * 1024) /* add extern BLCKSZ for protect memory overstep the boundary */ - buffer = (char*)palloc0(COPY_BUF_SIZE + BLCKSZ); + buf_size = COPY_BUF_SIZE + BLCKSZ; + + if (ENABLE_DSS) { + buf_size = COPY_BUF_SIZE_FOR_DSS + BLCKSZ; + buf_size += ALIGNOF_BUFFER; + } + unalign_buffer = (char*)palloc0(buf_size); + + if (ENABLE_DSS) { + buffer = (char*)BUFFERALIGN(unalign_buffer); + } else { + buffer = unalign_buffer; + } /* * Open the files @@ -178,7 +197,9 @@ void copy_file_internal(char* fromfile, char* tofile, bool trunc_file) /* * Do the data copying. */ - for (offset = 0;; offset += nbytes) { + struct stat stat_buf; + (void)stat(fromfile, &stat_buf); + for (offset = 0;offset < stat_buf.st_size; offset += nbytes) { /* If we got a cancel signal during the copy of the file, quit */ CHECK_FOR_INTERRUPTS(); @@ -194,8 +215,14 @@ void copy_file_internal(char* fromfile, char* tofile, bool trunc_file) break; } errno = 0; + + align_nbytes = nbytes; + if (ENABLE_DSS && ((nbytes % ALIGNOF_BUFFER) != 0)) { + align_nbytes = (int)BUFFERALIGN(nbytes); + } + pgstat_report_waitevent(WAIT_EVENT_COPY_FILE_WRITE); - if ((int)write(dstfd, buffer, nbytes) != nbytes) { + if ((int)write(dstfd, buffer, align_nbytes) != align_nbytes) { pgstat_report_waitevent(WAIT_EVENT_END); (void)close(srcfd); (void)close(dstfd); @@ -222,7 +249,7 @@ void copy_file_internal(char* fromfile, char* tofile, bool trunc_file) (void)close(srcfd); - pfree(buffer); + pfree(unalign_buffer); } void copy_file(char* fromfile, char* tofile) @@ -279,7 +306,7 @@ int durable_rename(const char* oldfile, const char* newfile, int elevel) errno = 0; fd = BasicOpenFile((char*)newfile, PG_BINARY | O_RDWR, 0); if (fd < 0) { - if (errno != ENOENT) { + if (!FILE_POSSIBLY_DELETED(errno)) { ereport(elevel, (errcode_for_file_access(), errmsg("could not open file \"%s\": %m", newfile))); return -1; } @@ -390,6 +417,9 @@ static int fsync_fname_ext(const char* fname, bool isdir, bool ignore_perm, int int flags; int returncode; + if (is_dss_file(fname)) { + return 0; + } /* * Some OSs require directories to be opened read-only whereas other * systems don't allow us to fsync files opened read-only; so we need both @@ -451,6 +481,11 @@ static int fsync_parent_path(const char* fname, int elevel) { char parentpath[MAXPGPATH]; errno_t retcode; + + if (is_dss_file(fname)) { + return 0; + } + retcode = strncpy_s(parentpath, MAXPGPATH, fname, strlen(fname)); securec_check(retcode, "\0", "\0"); get_parent_directory(parentpath); diff --git a/src/gausskernel/storage/file/fd.cpp b/src/gausskernel/storage/file/fd.cpp index c60b3fa76..632ec4407 100644 --- a/src/gausskernel/storage/file/fd.cpp +++ b/src/gausskernel/storage/file/fd.cpp @@ -83,6 +83,7 @@ #include "storage/vfd.h" #include "storage/ipc.h" #include "storage/shmem.h" +#include "storage/file/fio_device.h" #include "threadpool/threadpool.h" #include "utils/guc.h" #include "utils/plog.h" @@ -232,7 +233,7 @@ static File AllocateVfd(void); static void FreeVfd(File file); static int FileAccess(File file); -static File OpenTemporaryFileInTablespace(Oid tblspcOid, bool rejectError); +static File OpenTemporaryFileInTablespaceOrDir(Oid tblspcOid, bool rejectError); static void CleanupTempFiles(bool isProcExit); static void RemovePgTempFilesInDir(const char* tmpdirname, bool unlinkAll); static void RemovePgTempRelationFiles(const char* tsdirname); @@ -367,13 +368,16 @@ RelFileNodeForkNum RelFileNodeForkNumFill(RelFileNode* rnode, */ int pg_fsync(int fd) { + if (is_dss_fd(fd)) { + return 0; + } /* #if is to skip the sync_method test if there's no need for it */ #if defined(HAVE_FSYNC_WRITETHROUGH) && !defined(FSYNC_WRITETHROUGH_IS_FSYNC) if (u_sess->attr.attr_storage.sync_method == SYNC_METHOD_FSYNC_WRITETHROUGH) return pg_fsync_writethrough(fd); else #endif - return pg_fsync_no_writethrough(fd); + return pg_fsync_no_writethrough(fd); } /* @@ -432,6 +436,9 @@ int pg_fdatasync(int fd) */ void pg_flush_data(int fd, off_t offset, off_t nbytes) { + if (is_dss_fd(fd)) { + return; + } /* * Right now file flushing is primarily used to avoid making later * fsync()/fdatasync() calls have less impact. Thus don't trigger flushes @@ -1457,7 +1464,7 @@ PathNameDeleteTemporaryDir(const char *dirname) struct stat statbuf; /* Silently ignore missing directory. */ - if (stat(dirname, &statbuf) != 0 && errno == ENOENT) + if (stat(dirname, &statbuf) != 0 && FILE_POSSIBLY_DELETED(errno)) return; /* @@ -1494,30 +1501,35 @@ File OpenTemporaryFile(bool interXact) */ if (!interXact) ResourceOwnerEnlargeFiles(t_thrd.utils_cxt.CurrentResourceOwner); - /* - * If some temp tablespace(s) have been given to us, try to use the next - * one. If a given tablespace can't be found, we silently fall back to - * the database's default tablespace. - * - * BUT: if the temp file is slated to outlive the current transaction, - * force it into the database's default tablespace, so that it will not - * pose a threat to possible tablespace drop attempts. - */ - if (u_sess->storage_cxt.numTempTableSpaces > 0 && !interXact) { - Oid tblspcOid = GetNextTempTableSpace(); - if (OidIsValid(tblspcOid)) - file = OpenTemporaryFileInTablespace(tblspcOid, false); - } + + if (ENABLE_DSS) { + file = OpenTemporaryFileInTablespaceOrDir(InvalidOid, true); + } else { + /* + * If some temp tablespace(s) have been given to us, try to use the next + * one. If a given tablespace can't be found, we silently fall back to + * the database's default tablespace. + * + * BUT: if the temp file is slated to outlive the current transaction, + * force it into the database's default tablespace, so that it will not + * pose a threat to possible tablespace drop attempts. + */ + if (u_sess->storage_cxt.numTempTableSpaces > 0 && !interXact) { + Oid tblspcOid = GetNextTempTableSpace(); + if (OidIsValid(tblspcOid)) + file = OpenTemporaryFileInTablespaceOrDir(tblspcOid, false); + } - /* - * If not, or if tablespace is bad, create in database's default - * tablespace. u_sess->proc_cxt.MyDatabaseTableSpace should normally be set before we get - * here, but just in case it isn't, fall back to pg_default tablespace. - */ - if (file <= 0) - file = OpenTemporaryFileInTablespace( - u_sess->proc_cxt.MyDatabaseTableSpace ? u_sess->proc_cxt.MyDatabaseTableSpace : DEFAULTTABLESPACE_OID, - true); + /* + * If not, or if tablespace is bad, create in database's default + * tablespace. u_sess->proc_cxt.MyDatabaseTableSpace should normally be set before we get + * here, but just in case it isn't, fall back to pg_default tablespace. + */ + if (file <= 0) + file = OpenTemporaryFileInTablespaceOrDir( + u_sess->proc_cxt.MyDatabaseTableSpace ? u_sess->proc_cxt.MyDatabaseTableSpace : + DEFAULTTABLESPACE_OID, true); + } vfd *vfdcache = GetVfdCache(); /* Mark it for deletion at close and temporary file size limit */ @@ -1542,12 +1554,19 @@ void TempTablespacePath(char *path, Oid tablespace) * * If someone tries to specify pg_global, use pg_default instead. */ - if (tablespace == InvalidOid || tablespace == DEFAULTTABLESPACE_OID || tablespace == GLOBALTABLESPACE_OID) - err_rc = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "base/%s", PG_TEMP_FILES_DIR); - else { + if (tablespace == DEFAULTTABLESPACE_OID || tablespace == GLOBALTABLESPACE_OID) { + err_rc = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "%s/%s", DEFTBSDIR, PG_TEMP_FILES_DIR); + } else if (ENABLE_DSS && tablespace == InvalidOid) { + err_rc = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "%s", SS_PG_TEMP_FILES_DIR); + } else { /* All other tablespaces are accessed via symlinks */ - err_rc = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "pg_tblspc/%u/%s_%s/%s", tablespace, - TABLESPACE_VERSION_DIRECTORY, g_instance.attr.attr_common.PGXCNodeName, PG_TEMP_FILES_DIR); + if (ENABLE_DSS) { + err_rc = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "%s/%u/%s/%s", TBLSPCDIR, tablespace, + TABLESPACE_VERSION_DIRECTORY, PG_TEMP_FILES_DIR); + } else { + err_rc = snprintf_s(path, MAXPGPATH, MAXPGPATH - 1, "%s/%u/%s_%s/%s", TBLSPCDIR, tablespace, + TABLESPACE_VERSION_DIRECTORY, g_instance.attr.attr_common.PGXCNodeName, PG_TEMP_FILES_DIR); + } } securec_check_ss(err_rc, "", ""); } @@ -1616,10 +1635,10 @@ void UnlinkCacheFile(const char* pathname) } /* - * Open a temporary file in a specific tablespace. + * Open a temporary file in a specific tablespace or dirctory. * Subroutine for OpenTemporaryFile, which see for details. */ -static File OpenTemporaryFileInTablespace(Oid tblspcOid, bool rejectError) +static File OpenTemporaryFileInTablespaceOrDir(Oid tblspcOid, bool rejectError) { char tempdirpath[MAXPGPATH]; char tempfilepath[MAXPGPATH]; @@ -1633,31 +1652,46 @@ static File OpenTemporaryFileInTablespace(Oid tblspcOid, bool rejectError) */ if (tblspcOid == DEFAULTTABLESPACE_OID || tblspcOid == GLOBALTABLESPACE_OID) { /* The default tablespace is {datadir}/base */ - rc = snprintf_s(tempdirpath, sizeof(tempdirpath), sizeof(tempdirpath) - 1, "base/%s", PG_TEMP_FILES_DIR); - securec_check_ss(rc, "", ""); + rc = snprintf_s(tempdirpath, sizeof(tempdirpath), sizeof(tempdirpath) - 1, + "%s/%s", DEFTBSDIR, PG_TEMP_FILES_DIR); + } else if (ENABLE_DSS && tblspcOid == InvalidOid) { + rc = snprintf_s(tempdirpath, sizeof(tempdirpath), sizeof(tempdirpath) - 1, "%s", SS_PG_TEMP_FILES_DIR); } else { /* All other tablespaces are accessed via symlinks */ #ifdef PGXC /* Postgres-XC tablespaces include node name in path */ - rc = snprintf_s(tempdirpath, - sizeof(tempdirpath), - sizeof(tempdirpath) - 1, - "pg_tblspc/%u/%s_%s/%s", - tblspcOid, - TABLESPACE_VERSION_DIRECTORY, - g_instance.attr.attr_common.PGXCNodeName, - PG_TEMP_FILES_DIR); + if (ENABLE_DSS) { + rc = snprintf_s(tempdirpath, + sizeof(tempdirpath), + sizeof(tempdirpath) - 1, + "%s/%u/%s/%s", + TBLSPCDIR, + tblspcOid, + TABLESPACE_VERSION_DIRECTORY, + PG_TEMP_FILES_DIR); + } else { + rc = snprintf_s(tempdirpath, + sizeof(tempdirpath), + sizeof(tempdirpath) - 1, + "%s/%u/%s_%s/%s", + TBLSPCDIR, + tblspcOid, + TABLESPACE_VERSION_DIRECTORY, + g_instance.attr.attr_common.PGXCNodeName, + PG_TEMP_FILES_DIR); + } #else rc = snprintf_s(tempdirpath, sizeof(tempdirpath), sizeof(tempdirpath) - 1, - "pg_tblspc/%u/%s/%s", + "%s/%u/%s/%s", + TBLSPCDIR, tblspcOid, TABLESPACE_VERSION_DIRECTORY, PG_TEMP_FILES_DIR); #endif - securec_check_ss(rc, "", ""); } + securec_check_ss(rc, "", ""); /* * Generate a tempfile name that should be unique within the current @@ -1810,11 +1844,11 @@ bool PathNameDeleteTemporaryFile(const char *path, bool error_on_failure) * non-existence to support BufFileDeleteShared which doesn't know how * many segments it has to delete until it runs out. */ - if (stat_errno == ENOENT) + if (FILE_POSSIBLY_DELETED(stat_errno)) return false; if (unlink(path) < 0) { - if (errno != ENOENT) + if (!FILE_POSSIBLY_DELETED(errno)) ereport(error_on_failure ? ERROR : LOG, (errcode_for_file_access(), errmsg("cannot unlink temporary file \"%s\": %m", path))); return false; @@ -1933,6 +1967,10 @@ int FilePrefetch(File file, off_t offset, int amount, uint32 wait_event_info) Assert(FileIsValid(file)); vfd *vfdcache = GetVfdCache(); + if (is_dss_file(vfdcache[file].fileName)) { + return 0; + } + DO_DB(ereport(LOG, (errmsg("FilePrefetch: %d (%s) " INT64_FORMAT " %d", file, @@ -3473,18 +3511,19 @@ void RemovePgTempFiles(void) DIR* spc_dir = NULL; struct dirent* spc_de = NULL; errno_t rc = EOK; + /* * First process temp files in pg_default ($PGDATA/base) */ - rc = snprintf_s(temp_path, sizeof(temp_path), sizeof(temp_path) - 1, "base/%s", PG_TEMP_FILES_DIR); + rc = snprintf_s(temp_path, sizeof(temp_path), sizeof(temp_path) - 1, "%s/%s", DEFTBSDIR, PG_TEMP_FILES_DIR); securec_check_ss(rc, "", ""); RemovePgTempFilesInDir(temp_path, false); - RemovePgTempRelationFiles("base"); + RemovePgTempRelationFiles(DEFTBSDIR); /* * Cycle through temp directories for all non-default tablespaces. */ - spc_dir = AllocateDir("pg_tblspc"); + spc_dir = AllocateDir(TBLSPCDIR); if (spc_dir == NULL) { ereport(ERROR, (errcode_for_file_access(), errmsg("Allocate dir failed."))); } @@ -3502,20 +3541,34 @@ void RemovePgTempFiles(void) securec_check(rc, "", ""); #ifdef PGXC /* Postgres-XC tablespaces include node name in path */ - rc = snprintf_s(temp_path, - sizeof(temp_path), - sizeof(temp_path) - 1, - "pg_tblspc/%s/%s_%s/%s", - spc_de->d_name, - TABLESPACE_VERSION_DIRECTORY, - g_instance.attr.attr_common.PGXCNodeName, - PG_TEMP_FILES_DIR); - securec_check_ss(rc, "", ""); + if (ENABLE_DSS) { + rc = snprintf_s(temp_path, + sizeof(temp_path), + sizeof(temp_path) - 1, + "%s/%s/%s/%s", + TBLSPCDIR, + spc_de->d_name, + TABLESPACE_VERSION_DIRECTORY, + PG_TEMP_FILES_DIR); + securec_check_ss(rc, "", ""); + } else { + rc = snprintf_s(temp_path, + sizeof(temp_path), + sizeof(temp_path) - 1, + "%s/%s/%s_%s/%s", + TBLSPCDIR, + spc_de->d_name, + TABLESPACE_VERSION_DIRECTORY, + g_instance.attr.attr_common.PGXCNodeName, + PG_TEMP_FILES_DIR); + securec_check_ss(rc, "", ""); + } #else rc = snprintf_s(temp_path, sizeof(temp_path), sizeof(temp_path) - 1, - "pg_tblspc/%s/%s/%s", + "%s/%s/%s/%s", + TBLSPCDIR, curSubDir, TABLESPACE_VERSION_DIRECTORY, PG_TEMP_FILES_DIR); @@ -3525,19 +3578,32 @@ void RemovePgTempFiles(void) #ifdef PGXC /* Postgres-XC tablespaces include node name in path */ - rc = snprintf_s(temp_path, - sizeof(temp_path), - sizeof(temp_path) - 1, - "pg_tblspc/%s/%s_%s", - spc_de->d_name, - TABLESPACE_VERSION_DIRECTORY, - g_instance.attr.attr_common.PGXCNodeName); - securec_check_ss(rc, "", ""); + if (ENABLE_DSS) { + rc = snprintf_s(temp_path, + sizeof(temp_path), + sizeof(temp_path) - 1, + "%s/%s/%s", + TBLSPCDIR, + spc_de->d_name, + TABLESPACE_VERSION_DIRECTORY); + securec_check_ss(rc, "", ""); + } else { + rc = snprintf_s(temp_path, + sizeof(temp_path), + sizeof(temp_path) - 1, + "%s/%s/%s_%s", + TBLSPCDIR, + spc_de->d_name, + TABLESPACE_VERSION_DIRECTORY, + g_instance.attr.attr_common.PGXCNodeName); + securec_check_ss(rc, "", ""); + } #else rc = snprintf_s(temp_path, sizeof(temp_path), sizeof(temp_path) - 1, - "pg_tblspc/%s/%s", + "%s/%s/%s", + TBLSPCDIR, curSubDir, TABLESPACE_VERSION_DIRECTORY); securec_check_ss(rc, "\0", "\0"); @@ -3552,7 +3618,11 @@ void RemovePgTempFiles(void) * t_thrd.proc_cxt.DataDir as well. */ #ifdef EXEC_BACKEND - RemovePgTempFilesInDir(PG_TEMP_FILES_DIR, false); + if (ENABLE_DSS) { + RemovePgTempFilesInDir(SS_PG_TEMP_FILES_DIR, false); + } else { + RemovePgTempFilesInDir(PG_TEMP_FILES_DIR, false); + } #endif } @@ -3567,8 +3637,9 @@ static void RemovePgTempFilesInDir(const char* tmpdirname, bool unlinkAll) temp_dir = AllocateDir(tmpdirname); if (temp_dir == NULL) { /* anything except ENOENT is fishy */ - if (errno != ENOENT) + if (!FILE_POSSIBLY_DELETED(errno)) { ereport(LOG, (errmsg("could not open temporary-files directory \"%s\": %m", tmpdirname))); + } return; } @@ -3991,8 +4062,9 @@ static void Walkdir(const char *path, void (*action)(const char *fname, bool isd static void UnlinkIfExistsFname(const char *fname, bool isdir, int elevel) { if (isdir) { - if (rmdir(fname) != 0 && errno != ENOENT) + if (rmdir(fname) != 0 && !FILE_POSSIBLY_DELETED(errno)) { ereport(elevel, (errcode_for_file_access(), errmsg("could not rmdir directory \"%s\": %m", fname))); + } } else { /* Use PathNameDeleteTemporaryFile to report filesize */ PathNameDeleteTemporaryFile(fname, false); diff --git a/src/gausskernel/storage/file/fio_device.cpp b/src/gausskernel/storage/file/fio_device.cpp new file mode 100644 index 000000000..021351115 --- /dev/null +++ b/src/gausskernel/storage/file/fio_device.cpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2022 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * fio_device.cpp + * Storage Adapter Interface. + * + * + * IDENTIFICATION + * src/gausskernel/storage/file/fio_device.cpp + * + * --------------------------------------------------------------------------------------- + */ + +#include +#include +#include "c.h" +#include "storage/file/fio_device.h" + + +device_type_t fio_device_type(const char *name) +{ + if (is_dss_file(name)) { + return DEV_TYPE_DSS; + } + + return DEV_TYPE_FILE; +} + +bool is_dss_type(device_type_t type) +{ + return type == DEV_TYPE_DSS; +} + +bool is_file_exist(int err) +{ + return (err == EEXIST || err == ERR_DSS_DIR_CREATE_DUPLICATED); +} + +bool is_file_delete(int err) +{ + return (err == ENOENT || err == ERR_DSS_DIR_NOT_EXIST || err == ERR_DSS_FILE_NOT_EXIST); +} \ No newline at end of file diff --git a/src/gausskernel/storage/file/reinit.cpp b/src/gausskernel/storage/file/reinit.cpp index 7709f91a7..a5d937f5c 100644 --- a/src/gausskernel/storage/file/reinit.cpp +++ b/src/gausskernel/storage/file/reinit.cpp @@ -21,6 +21,7 @@ #include "storage/copydir.h" #include "storage/smgr/fd.h" #include "storage/reinit.h" +#include "storage/file/fio_device.h" #include "utils/hsearch.h" #include "utils/memutils.h" #ifdef PGXC @@ -72,13 +73,11 @@ void ResetUnloggedRelations(int op) /* * First process unlogged files in pg_default ($PGDATA/base) */ - ResetUnloggedRelationsInTablespaceDir("base", op); - + ResetUnloggedRelationsInTablespaceDir(DEFTBSDIR, op); /* * Cycle through directories for all non-default tablespaces. */ - spc_dir = AllocateDir("pg_tblspc"); - + spc_dir = AllocateDir(TBLSPCDIR); while ((spc_de = ReadDir(spc_dir, "pg_tblspc")) != NULL) { if (strcmp(spc_de->d_name, ".") == 0 || strcmp(spc_de->d_name, "..") == 0) { continue; @@ -86,18 +85,30 @@ void ResetUnloggedRelations(int op) #ifdef PGXC /* Postgres-XC tablespaces include the node name in path */ - rc = snprintf_s(temp_path, - sizeof(temp_path), - sizeof(temp_path) - 1, - "pg_tblspc/%s/%s_%s", - spc_de->d_name, - TABLESPACE_VERSION_DIRECTORY, - g_instance.attr.attr_common.PGXCNodeName); + if (ENABLE_DSS) { + rc = snprintf_s(temp_path, + sizeof(temp_path), + sizeof(temp_path) - 1, + "%s/%s/%s", + TBLSPCDIR, + spc_de->d_name, + TABLESPACE_VERSION_DIRECTORY); + } else { + rc = snprintf_s(temp_path, + sizeof(temp_path), + sizeof(temp_path) - 1, + "%s/%s/%s_%s", + TBLSPCDIR, + spc_de->d_name, + TABLESPACE_VERSION_DIRECTORY, + g_instance.attr.attr_common.PGXCNodeName); + } #else rc = snprintf_s(temp_path, sizeof(temp_path), sizeof(temp_path) - 1, - "pg_tblspc/%s/%s", + "%s/%s/%s", + TBLSPCDIR spc_de->d_name, TABLESPACE_VERSION_DIRECTORY); #endif diff --git a/src/gausskernel/storage/file/sharedfileset.cpp b/src/gausskernel/storage/file/sharedfileset.cpp index 08eb09795..d5749bfc7 100644 --- a/src/gausskernel/storage/file/sharedfileset.cpp +++ b/src/gausskernel/storage/file/sharedfileset.cpp @@ -56,7 +56,11 @@ void SharedFileSetInit(SharedFileSet *fileset) fileset->ntablespaces = GetTempTablespaces(&fileset->tablespaces[0], lengthof(fileset->tablespaces)); if (fileset->ntablespaces == 0) { /* If the GUC is empty, use current database's default tablespace */ - fileset->tablespaces[0] = u_sess->proc_cxt.MyDatabaseTableSpace; + if (ENABLE_DSS) { + fileset->tablespaces[0] = InvalidOid; + } else { + fileset->tablespaces[0] = u_sess->proc_cxt.MyDatabaseTableSpace; + } fileset->ntablespaces = 1; } else { int i; @@ -187,6 +191,10 @@ static void SharedFileSetPath(char *path, const SharedFileSet *fileset, Oid tabl */ static Oid ChooseTablespace(const SharedFileSet *fileset, const char *name) { + if (ENABLE_DSS) { + return InvalidOid; + } + uint32 hash = hash_any((const unsigned char *)name, strlen(name)); return fileset->tablespaces[hash % fileset->ntablespaces]; diff --git a/src/gausskernel/storage/freespace/freespace.cpp b/src/gausskernel/storage/freespace/freespace.cpp index 42b087e73..3e92898ef 100644 --- a/src/gausskernel/storage/freespace/freespace.cpp +++ b/src/gausskernel/storage/freespace/freespace.cpp @@ -588,6 +588,7 @@ static void fsm_extend(Relation rel, BlockNumber fsm_nblocks) { BlockNumber fsm_nblocks_now; Page pg; + char* unalign_buffer = NULL; ADIO_RUN() { @@ -595,7 +596,12 @@ static void fsm_extend(Relation rel, BlockNumber fsm_nblocks) } ADIO_ELSE() { - pg = (Page)palloc(BLCKSZ); + if (ENABLE_DSS) { + unalign_buffer = (char*)palloc(BLCKSZ + ALIGNOF_BUFFER); + pg = (Page)BUFFERALIGN(unalign_buffer); + } else { + pg = (char*)palloc(BLCKSZ); + } } ADIO_END(); @@ -633,7 +639,7 @@ static void fsm_extend(Relation rel, BlockNumber fsm_nblocks) while (fsm_nblocks_now < fsm_nblocks) { if (IsSegmentFileNode(rel->rd_node)) { - Buffer buf = ReadBufferExtended(rel, FSM_FORKNUM, P_NEW, RBM_NORMAL, NULL); + Buffer buf = ReadBufferExtended(rel, FSM_FORKNUM, P_NEW, RBM_ZERO, NULL); #ifdef USE_ASSERT_CHECKING BufferDesc *buf_desc = GetBufferDescriptor(buf - 1); Assert(buf_desc->tag.blockNum == fsm_nblocks_now); @@ -658,7 +664,11 @@ static void fsm_extend(Relation rel, BlockNumber fsm_nblocks) } ADIO_ELSE() { - pfree(pg); + if (ENABLE_DSS) { + pfree(unalign_buffer); + } else { + pfree(pg); + } } ADIO_END(); } diff --git a/src/gausskernel/storage/ipc/ipc.cpp b/src/gausskernel/storage/ipc/ipc.cpp index 5a304ce51..9f21760b5 100644 --- a/src/gausskernel/storage/ipc/ipc.cpp +++ b/src/gausskernel/storage/ipc/ipc.cpp @@ -65,6 +65,7 @@ extern "C" { /* postmaster need wait some thread in immediate shutdown */ #define NUMWAITTHREADS 1 #define WAITTIME 15 +#define WAIT_DMS_INIT_TIMEOUT 100 volatile unsigned int alive_threads_waitted = NUMWAITTHREADS; @@ -158,6 +159,14 @@ void proc_exit(int code) { DynamicFileList* file_scanner = NULL; + if (ENABLE_DMS && t_thrd.proc_cxt.MyProcPid == PostmasterPid) { + // add cnt to avoid DmsCallbackThreadShmemInit to use UsedShmemSegAddr + (void)pg_atomic_add_fetch_u32(&g_instance.dms_cxt.inProcExitCnt, 1); + while (pg_atomic_read_u32(&g_instance.dms_cxt.inDmsThreShmemInitCnt) > 0) { + // if some threads call DmsCallbackThreadShmemInit, wait until they finish + pg_usleep(WAIT_DMS_INIT_TIMEOUT); + } + } if (t_thrd.utils_cxt.backend_reserved) { ereport(DEBUG2, (errmodule(MOD_MEM), errmsg("[BackendReservedExit] current thread role is: %d, used memory is: %d MB\n", @@ -254,13 +263,13 @@ void proc_exit(int code) /* Clean up Allocated descs */ FreeAllAllocatedDescs(); - /* Clean up everything that must be cleaned up */ - proc_exit_prepare(code); - if (u_sess->SPI_cxt.autonomous_session) { DestoryAutonomousSession(true); } + /* Clean up everything that must be cleaned up */ + proc_exit_prepare(code); + /* * Protect the node group incase the ShutPostgres Callback function * has not been registered diff --git a/src/gausskernel/storage/ipc/procarray.cpp b/src/gausskernel/storage/ipc/procarray.cpp index ce58bee39..f4ec88c83 100755 --- a/src/gausskernel/storage/ipc/procarray.cpp +++ b/src/gausskernel/storage/ipc/procarray.cpp @@ -120,6 +120,8 @@ #include "access/multi_redo_api.h" #include "gstrace/gstrace_infra.h" #include "gstrace/storage_gstrace.h" +#include "ddes/dms/ss_common_attr.h" +#include "ddes/dms/ss_transaction.h" #ifdef ENABLE_UT #define static @@ -1293,6 +1295,10 @@ bool TransactionIdIsInProgress(TransactionId xid, uint32* needSync, bool shortcu return false; } + if (SS_STANDBY_MODE) { + return SSTransactionIdIsInProgress(xid); + } + /* * Also, we can handle our own transaction (and subtransactions) without * any access to shared memory. @@ -1901,8 +1907,14 @@ RETRY: SetLocalSnapshotPreparedArray(snapshot); snapshot->gtm_snapshot_type = GTM_SNAPSHOT_TYPE_LOCAL; } - Snapshot result = GetLocalSnapshotData(snapshot); - snapshot->snapshotcsn = pg_atomic_read_u64(&t_thrd.xact_cxt.ShmemVariableCache->nextCommitSeqNo); + + Snapshot result; + if (SS_STANDBY_MODE) { + result = SSGetSnapshotData(snapshot); + } else { + result = GetLocalSnapshotData(snapshot); + snapshot->snapshotcsn = pg_atomic_read_u64(&t_thrd.xact_cxt.ShmemVariableCache->nextCommitSeqNo); + } if (result) { if (GTM_LITE_MODE) { @@ -3420,6 +3432,14 @@ bool CountOtherDBBackends(Oid databaseId, int* nbackends, int* nprepared) ThreadId wdrxdb_pids[MAXAUTOVACPIDS]; int tries; + if (ENABLE_DMS && SS_PRIMARY_MODE) { + bool ret = SSCheckDbBackendsFromAllStandby(databaseId); + if (ret) { + *nbackends = *nprepared = 0; + return true; + } + } + /* 50 tries with 100ms sleep between tries makes 5 sec total wait */ for (tries = 0; tries < 50; tries++) { int nworkers = 0; @@ -4764,6 +4784,14 @@ void CalculateLocalLatestSnapshot(bool forceCalc) if (TransactionIdPrecedes(xmin, globalxmin)) globalxmin = xmin; + if (ENABLE_DMS && SS_PRIMARY_MODE && SSGetOldestXminFromAllStandby()) { + TransactionId ss_oldest_xmin = pg_atomic_read_u64(&g_instance.dms_cxt.xminAck); + if (TransactionIdIsValid(ss_oldest_xmin) && TransactionIdIsNormal(ss_oldest_xmin) && + TransactionIdPrecedes(ss_oldest_xmin, globalxmin)) { + globalxmin = ss_oldest_xmin; + } + } + t_thrd.xact_cxt.ShmemVariableCache->xmin = xmin; t_thrd.xact_cxt.ShmemVariableCache->recentLocalXmin = globalxmin; if (GTM_FREE_MODE) { @@ -5038,7 +5066,7 @@ TransactionId ListAllThreadGttFrozenxids(int maxSize, ThreadId *pids, Transactio *n = 0; } - if (RecoveryInProgress()) + if (RecoveryInProgress() || SSIsServerModeReadOnly()) return InvalidTransactionId; flags |= PROC_IS_AUTOVACUUM; @@ -5123,3 +5151,29 @@ void UpdateXLogMaxCSN(CommitSeqNo xlogCSN) LWLockRelease(XLogMaxCSNLock); } } + +/* Get the current oldestxmin, as there may be no transaction or no finished one */ +void GetOldestGlobalProcXmin(TransactionId *globalProcXmin) +{ + TransactionId globalxmin = MaxTransactionId; + *globalProcXmin = InvalidTransactionId; + ProcArrayStruct *arrayP = g_instance.proc_array_idx; + int *pgprocnos = arrayP->pgprocnos; + int numProcs = arrayP->numProcs; + (void)LWLockAcquire(ProcArrayLock, LW_SHARED); + for (int index = 0; index < numProcs; index++) { + int pgprocno = pgprocnos[index]; + volatile PGXACT* pgxact = &g_instance.proc_base_all_xacts[pgprocno]; + TransactionId xid; + if (pgxact->vacuumFlags & PROC_IN_VACUUM) + continue; + + xid = pgxact->xmin; + + if (TransactionIdIsNormal(xid) && TransactionIdPrecedesOrEquals(xid, globalxmin)) { + globalxmin = xid; + *globalProcXmin = globalxmin; + } + } + LWLockRelease(ProcArrayLock); +} diff --git a/src/gausskernel/storage/ipc/sinval.cpp b/src/gausskernel/storage/ipc/sinval.cpp index 7cceaddfa..c14ba10d9 100644 --- a/src/gausskernel/storage/ipc/sinval.cpp +++ b/src/gausskernel/storage/ipc/sinval.cpp @@ -26,6 +26,7 @@ #include "utils/inval.h" #include "utils/plancache.h" #include "libcomm/libcomm.h" +#include "ddes/dms/ss_transaction.h" /* * Because backends sitting idle will not be reading sinval events, we @@ -74,6 +75,10 @@ void GlobalExecuteSharedInvalidMessages(const SharedInvalidationMessage* msgs, i */ void SendSharedInvalidMessages(const SharedInvalidationMessage* msgs, int n) { + if (ENABLE_DMS && SS_PRIMARY_MODE && !RecoveryInProgress()) { + SSSendSharedInvalidMessages(msgs, n); + } + /* threads who not support gsc still need invalid global when commit */ if (EnableGlobalSysCache()) { GlobalInvalidSharedInvalidMessages(msgs, n, true); diff --git a/src/gausskernel/storage/ipc/standby.cpp b/src/gausskernel/storage/ipc/standby.cpp index f7b5b1ef7..3b5143e25 100755 --- a/src/gausskernel/storage/ipc/standby.cpp +++ b/src/gausskernel/storage/ipc/standby.cpp @@ -1043,6 +1043,10 @@ static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock* locks) */ void LogAccessExclusiveLock(Oid dbOid, Oid relOid) { + if (ENABLE_DMS) { + return; + } + xl_standby_lock xlrec; xlrec.xid = GetTopTransactionId(); @@ -1063,6 +1067,10 @@ void LogAccessExclusiveLock(Oid dbOid, Oid relOid) */ void LogAccessExclusiveLockPrepare(void) { + if (ENABLE_DMS) { + return; + } + /* * Ensure that a TransactionId has been assigned to this transaction, for * two reasons, both related to lock release on the standby. First, we @@ -1093,6 +1101,10 @@ static void LogReleaseAccessExclusiveLocks(int nlocks, xl_standby_lock* locks) void LogReleaseAccessExclusiveLock(TransactionId xid, Oid dbOid, Oid relOid) { + if (ENABLE_DMS) { + return; + } + xl_standby_lock xlrec; xlrec.xid = xid; diff --git a/src/gausskernel/storage/lmgr/lock.cpp b/src/gausskernel/storage/lmgr/lock.cpp index 66cc00bf8..518512ae2 100644 --- a/src/gausskernel/storage/lmgr/lock.cpp +++ b/src/gausskernel/storage/lmgr/lock.cpp @@ -49,6 +49,7 @@ #include "executor/exec/execStream.h" #include "instruments/instr_event.h" #include "instruments/instr_statement.h" +#include "ddes/dms/ss_dms_bufmgr.h" #define NLOCKENTS() \ mul_size(g_instance.attr.attr_storage.max_locks_per_xact, \ @@ -1028,6 +1029,17 @@ static LockAcquireResult LockAcquireExtendedXC(const LOCKTAG *locktag, LOCKMODE } instr_stmt_report_lock(LOCK_END, lockmode); + if (ENABLE_DMS && SS_PRIMARY_MODE && + (locktag->locktag_type < (uint8)LOCKTAG_PAGE || locktag->locktag_type == (uint8)LOCKTAG_OBJECT) && + lockmode >= AccessExclusiveLock && !RecoveryInProgress()) { + int ret = SSLockAcquire(locktag, lockmode, sessionLock, false); + if (ret) { + (void)LockRelease(locktag, lockmode, sessionLock); + ereport(ERROR, + (errmsg("SSBcast LockAcquire failed, release my own local and report error!"))); + } + } + return LOCKACQUIRE_OK; } @@ -1220,6 +1232,13 @@ static void RemoveLocalLock(LOCALLOCK *locallock) } if (!hash_search(t_thrd.storage_cxt.LockMethodLocalHash, (void *)&(locallock->tag), HASH_REMOVE, NULL)) ereport(WARNING, (errmsg("locallock table corrupted"))); + + if (ENABLE_DMS && SS_PRIMARY_MODE && + (locallock->tag.lock.locktag_type < (uint8)LOCKTAG_PAGE || + locallock->tag.lock.locktag_type == (uint8)LOCKTAG_OBJECT) && + locallock->tag.mode == AccessExclusiveLock && !RecoveryInProgress()) { + (void)SSLockRelease(&(locallock->tag.lock), locallock->tag.mode, false); + } } bool inline IsInSameLockGroup(const PROCLOCK *proclock1, const PROCLOCK *proclock2) diff --git a/src/gausskernel/storage/lmgr/lwlock.cpp b/src/gausskernel/storage/lmgr/lwlock.cpp index 0003d1803..c5787ad71 100644 --- a/src/gausskernel/storage/lmgr/lwlock.cpp +++ b/src/gausskernel/storage/lmgr/lwlock.cpp @@ -391,7 +391,11 @@ int NumLWLocks(void) numLocks += NUM_CLOG_PARTITIONS * CLOGShmemBuffers(); /* multixact.c needs two SLRU areas */ - numLocks += NUM_MXACTOFFSET_BUFFERS + NUM_MXACTMEMBER_BUFFERS; + if (ENABLE_DSS) { + numLocks += DSS_MAX_MXACTOFFSET + DSS_MAX_MXACTMEMBER; + } else { + numLocks += NUM_MXACTOFFSET_BUFFERS + NUM_MXACTMEMBER_BUFFERS; + } /* async.c needs one per Async buffer */ numLocks += NUM_ASYNC_BUFFERS; diff --git a/src/gausskernel/storage/lmgr/predicate.cpp b/src/gausskernel/storage/lmgr/predicate.cpp index eb154108a..4cb248efe 100644 --- a/src/gausskernel/storage/lmgr/predicate.cpp +++ b/src/gausskernel/storage/lmgr/predicate.cpp @@ -319,6 +319,8 @@ #define OldSerXidPage(xid) ((((uint32)(xid)) / OLDSERXID_ENTRIESPERPAGE) % (OLDSERXID_MAX_PAGE + 1)) +#define SERIALDIR (g_instance.datadir_cxt.serialDir) + typedef struct OldSerXidControlData { int headPage; /* newest initialized page */ TransactionId headXid; /* newest valid Xid in the SLRU */ @@ -645,7 +647,7 @@ static void OldSerXidInit(void) NUM_OLDSERXID_BUFFERS, 0, OldSerXidLock, - "pg_serial"); + SERIALDIR); /* Override default assumption that writes should be fsync'd */ t_thrd.shemem_ptr_cxt.OldSerXidSlruCtl->do_fsync = false; diff --git a/src/gausskernel/storage/lmgr/proc.cpp b/src/gausskernel/storage/lmgr/proc.cpp index 03ef6bf9b..4886fe0a8 100755 --- a/src/gausskernel/storage/lmgr/proc.cpp +++ b/src/gausskernel/storage/lmgr/proc.cpp @@ -132,7 +132,8 @@ int ProcGlobalSemas(void) * We need a sema per backend (including autovacuum), plus one for each * auxiliary process. */ - return g_instance.shmem_cxt.MaxBackends + NUM_CMAGENT_PROCS + NUM_AUXILIARY_PROCS + NUM_DCF_CALLBACK_PROCS; + return (g_instance.shmem_cxt.MaxBackends + NUM_CMAGENT_PROCS + \ + NUM_AUXILIARY_PROCS + NUM_DCF_CALLBACK_PROCS + NUM_DMS_CALLBACK_PROCS); } /* @@ -352,7 +353,7 @@ void InitProcGlobal(void) g_instance.proc_base->allProcCount = TotalProcs; g_instance.proc_base->allNonPreparedProcCount = g_instance.shmem_cxt.MaxBackends + NUM_CMAGENT_PROCS + NUM_AUXILIARY_PROCS + - NUM_DCF_CALLBACK_PROCS; + NUM_DCF_CALLBACK_PROCS + NUM_DMS_CALLBACK_PROCS; if (procs == NULL) ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of shared memory"))); @@ -391,7 +392,7 @@ void InitProcGlobal(void) * with a real process */ if (i < g_instance.shmem_cxt.MaxBackends + NUM_CMAGENT_PROCS + - NUM_AUXILIARY_PROCS + NUM_DCF_CALLBACK_PROCS) { + NUM_AUXILIARY_PROCS + NUM_DCF_CALLBACK_PROCS + NUM_DMS_CALLBACK_PROCS) { PGSemaphoreCreate(&(procs[i]->sem)); InitSharedLatch(&(procs[i]->procLatch)); procs[i]->backendLock = LWLockAssign(LWTRANCHE_PROC); @@ -423,12 +424,17 @@ void InitProcGlobal(void) procs[i]->links.next = (SHM_QUEUE *)g_instance.proc_base->pgjobfreeProcs; g_instance.proc_base->pgjobfreeProcs = procs[i]; } else if (i < g_instance.shmem_cxt.MaxConnections + thread_pool_stream_proc_num + AUXILIARY_BACKENDS + - g_instance.attr.attr_sql.job_queue_processes + 1 + NUM_DCF_CALLBACK_PROCS) { + g_instance.attr.attr_sql.job_queue_processes + 1 + NUM_DCF_CALLBACK_PROCS + \ + NUM_DMS_CALLBACK_PROCS) { /* PGPROC for external thread, add to externalFreeProcs list */ procs[i]->links.next = (SHM_QUEUE *)g_instance.proc_base->externalFreeProcs; g_instance.proc_base->externalFreeProcs = procs[i]; + if (!pg_atomic_read_u32(&g_instance.dms_cxt.dmsProcSid)) { + pg_atomic_write_u32(&g_instance.dms_cxt.dmsProcSid, (uint32)(i + NUM_DCF_CALLBACK_PROCS)); + } } else if (i < g_instance.shmem_cxt.MaxConnections + thread_pool_stream_proc_num + AUXILIARY_BACKENDS + - g_instance.attr.attr_sql.job_queue_processes + 1 + NUM_DCF_CALLBACK_PROCS + NUM_CMAGENT_PROCS) { + g_instance.attr.attr_sql.job_queue_processes + 1 + NUM_DCF_CALLBACK_PROCS + NUM_CMAGENT_PROCS + \ + NUM_DMS_CALLBACK_PROCS) { /* * This pointer indicates the first position of cm anget's procs. * In the first time, cmAgentFreeProcs is NULL, so procs[LAST]->links.next is NULL. @@ -438,10 +444,11 @@ void InitProcGlobal(void) g_instance.proc_base->cmAgentFreeProcs = procs[i]; } else if (i < g_instance.shmem_cxt.MaxConnections + thread_pool_stream_proc_num + AUXILIARY_BACKENDS + g_instance.attr.attr_sql.job_queue_processes + 1 + - NUM_CMAGENT_PROCS + g_max_worker_processes + NUM_DCF_CALLBACK_PROCS) { + NUM_CMAGENT_PROCS + g_max_worker_processes + NUM_DCF_CALLBACK_PROCS + NUM_DMS_CALLBACK_PROCS) { procs[i]->links.next = (SHM_QUEUE*)g_instance.proc_base->bgworkerFreeProcs; g_instance.proc_base->bgworkerFreeProcs = procs[i]; - } else if (i < g_instance.shmem_cxt.MaxBackends + NUM_CMAGENT_PROCS + NUM_DCF_CALLBACK_PROCS) { + } else if (i < g_instance.shmem_cxt.MaxBackends + NUM_CMAGENT_PROCS + NUM_DCF_CALLBACK_PROCS + \ + NUM_DMS_CALLBACK_PROCS) { /* * PGPROC for AV launcher/worker, add to autovacFreeProcs list * list size is autovacuum_max_workers + AUTOVACUUM_LAUNCHERS @@ -468,9 +475,10 @@ void InitProcGlobal(void) * processes and prepared transactions. */ g_instance.proc_aux_base = &procs[g_instance.shmem_cxt.MaxBackends + - NUM_CMAGENT_PROCS + NUM_DCF_CALLBACK_PROCS]; + NUM_CMAGENT_PROCS + NUM_DCF_CALLBACK_PROCS + NUM_DMS_CALLBACK_PROCS]; g_instance.proc_preparexact_base = &procs[g_instance.shmem_cxt.MaxBackends + - NUM_CMAGENT_PROCS + NUM_AUXILIARY_PROCS + NUM_DCF_CALLBACK_PROCS]; + NUM_CMAGENT_PROCS + NUM_AUXILIARY_PROCS + NUM_DCF_CALLBACK_PROCS + \ + NUM_DMS_CALLBACK_PROCS]; /* Create &g_instance.proc_base_mutex_lock mutexlock, too */ pthread_mutex_init(&g_instance.proc_base_mutex_lock, NULL); @@ -642,7 +650,7 @@ static void GetProcFromFreeList() t_thrd.proc = g_instance.proc_base->pgjobfreeProcs; } else if (IsBgWorkerProcess()) { t_thrd.proc = g_instance.proc_base->bgworkerFreeProcs; - } else if (t_thrd.dcf_cxt.is_dcf_thread) { + } else if (t_thrd.dcf_cxt.is_dcf_thread || t_thrd.role == DMS_WORKER) { t_thrd.proc = g_instance.proc_base->externalFreeProcs; } else if (u_sess->libpq_cxt.IsConnFromCmAgent) { t_thrd.proc = GetFreeCMAgentProc(); @@ -759,7 +767,7 @@ void InitProcess(void) g_instance.proc_base->pgjobfreeProcs = (PGPROC*)t_thrd.proc->links.next; } else if (IsBgWorkerProcess()) { g_instance.proc_base->bgworkerFreeProcs = (PGPROC*)t_thrd.proc->links.next; - } else if (t_thrd.dcf_cxt.is_dcf_thread) { + } else if (t_thrd.dcf_cxt.is_dcf_thread || t_thrd.role == DMS_WORKER) { g_instance.proc_base->externalFreeProcs = (PGPROC*)t_thrd.proc->links.next; } else if (u_sess->libpq_cxt.IsConnFromCmAgent) { g_instance.proc_base->cmAgentFreeProcs = (PGPROC *)t_thrd.proc->links.next; @@ -841,7 +849,7 @@ void InitProcess(void) */ if (IsUnderPostmaster && !IsAutoVacuumLauncherProcess() && !IsJobSchedulerProcess() && !IsJobWorkerProcess() && !t_thrd.dcf_cxt.is_dcf_thread && !IsBgWorkerProcess() && - !IsFencedProcessingMode()) + !IsFencedProcessingMode() && (t_thrd.role != DMS_WORKER)) MarkPostmasterChildActive(); /* @@ -1360,7 +1368,7 @@ static void ProcPutBackToFreeList() } else if (IsJobSchedulerProcess() || IsJobWorkerProcess()) { t_thrd.proc->links.next = (SHM_QUEUE*)g_instance.proc_base->pgjobfreeProcs; g_instance.proc_base->pgjobfreeProcs = t_thrd.proc; - } else if (t_thrd.dcf_cxt.is_dcf_thread) { + } else if (t_thrd.dcf_cxt.is_dcf_thread || t_thrd.role == DMS_WORKER) { t_thrd.proc->links.next = (SHM_QUEUE *)g_instance.proc_base->externalFreeProcs; g_instance.proc_base->externalFreeProcs = t_thrd.proc; } else if (u_sess->libpq_cxt.IsConnFromCmAgent) { @@ -1508,9 +1516,10 @@ static void ProcKill(int code, Datum arg) * This process is no longer present in shared memory in any meaningful * way, so tell the postmaster we've cleaned up acceptably well. (XXX * autovac launcher should be included here someday) + * DMS worker threads does not have shmem resources to clean. */ if (IsUnderPostmaster && !IsAutoVacuumLauncherProcess() && !StreamThreadAmI() && !IsJobSchedulerProcess() && - !IsJobWorkerProcess() && !IsBgWorkerProcess()) + !IsJobWorkerProcess() && !IsBgWorkerProcess() && !IsDMSWorkerProcess()) MarkPostmasterChildInactive(); /* diff --git a/src/gausskernel/storage/page/bufpage.cpp b/src/gausskernel/storage/page/bufpage.cpp index f89eaf940..153ab6bee 100644 --- a/src/gausskernel/storage/page/bufpage.cpp +++ b/src/gausskernel/storage/page/bufpage.cpp @@ -277,8 +277,14 @@ static inline void AllocPageCopyMem() } ADIO_ELSE() { - t_thrd.storage_cxt.pageCopy = (char*)MemoryContextAlloc( - THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE), BLCKSZ); + if (ENABLE_DSS) { + t_thrd.storage_cxt.pageCopy_ori = (char*)MemoryContextAlloc( + THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE), (BLCKSZ + ALIGNOF_BUFFER)); + t_thrd.storage_cxt.pageCopy = (char*)BUFFERALIGN(t_thrd.storage_cxt.pageCopy_ori); + } else { + t_thrd.storage_cxt.pageCopy = (char*)MemoryContextAlloc( + THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE), BLCKSZ); + } } ADIO_END(); } @@ -289,8 +295,14 @@ static inline void AllocPageCopyMem() } ADIO_ELSE() { - t_thrd.storage_cxt.segPageCopy = (char*)MemoryContextAlloc( - THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE), BLCKSZ); + if (ENABLE_DSS) { + t_thrd.storage_cxt.segPageCopyOri = (char*)MemoryContextAlloc( + THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE), (BLCKSZ + ALIGNOF_BUFFER)); + t_thrd.storage_cxt.segPageCopy = (char*)BUFFERALIGN(t_thrd.storage_cxt.segPageCopyOri); + } else { + t_thrd.storage_cxt.segPageCopy = (char*)MemoryContextAlloc( + THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE), BLCKSZ); + } } ADIO_END(); } diff --git a/src/gausskernel/storage/page/gs_xlogdump.cpp b/src/gausskernel/storage/page/gs_xlogdump.cpp index 28fab24c0..f76b087ae 100644 --- a/src/gausskernel/storage/page/gs_xlogdump.cpp +++ b/src/gausskernel/storage/page/gs_xlogdump.cpp @@ -356,6 +356,12 @@ static void XLogDump(XLogRecPtr start_lsn, XLogRecPtr end_lsn, XLogFilter *filte /* There are only two parameters in PG_FUNCTION_ARGS: start_lsn and end_lsn */ Datum gs_xlogdump_lsn(PG_FUNCTION_ARGS) { + if (ENABLE_DSS) { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unsupported gs_xlogdump_lsn when enable dss."))); + PG_RETURN_VOID(); + } + errno_t rc = EOK; /* check user's right */ const char fName[MAXFNAMELEN] = "gs_xlogdump_lsn"; @@ -395,6 +401,12 @@ Datum gs_xlogdump_lsn(PG_FUNCTION_ARGS) /* There are only one parameter in PG_FUNCTION_ARGS: c_xid */ Datum gs_xlogdump_xid(PG_FUNCTION_ARGS) { + if (ENABLE_DSS) { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unsupported gs_xlogdump_xid when enable dss."))); + PG_RETURN_VOID(); + } + errno_t rc = EOK; /* check user's right */ const char fName[MAXFNAMELEN] = "gs_xlogdump_xid"; @@ -430,6 +442,12 @@ Datum gs_xlogdump_xid(PG_FUNCTION_ARGS) /* There are only three parameters in PG_FUNCTION_ARGS: path, blocknum, relation_type */ Datum gs_xlogdump_tablepath(PG_FUNCTION_ARGS) { + if (ENABLE_DSS) { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unsupported gs_xlogdump_tablepath when enable dss."))); + PG_RETURN_VOID(); + } + errno_t rc = EOK; /* check user's right */ const char fName[MAXFNAMELEN] = "gs_xlogdump_tablepath"; @@ -472,6 +490,12 @@ Datum gs_xlogdump_tablepath(PG_FUNCTION_ARGS) Datum gs_xlogdump_parsepage_tablepath(PG_FUNCTION_ARGS) { + if (ENABLE_DSS) { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unsupported gs_xlogdump_parsepage_tablepath when enable dss."))); + PG_RETURN_VOID(); + } + /* check user's right */ const char fName[MAXFNAMELEN] = "gs_xlogdump_parsepage_tablepath"; CheckUser(fName); diff --git a/src/gausskernel/storage/page/pageparse.cpp b/src/gausskernel/storage/page/pageparse.cpp index b973784f6..0521f88aa 100644 --- a/src/gausskernel/storage/page/pageparse.cpp +++ b/src/gausskernel/storage/page/pageparse.cpp @@ -47,6 +47,7 @@ #include "utils/palloc.h" #include "utils/relmapper.h" #include "pageparse.h" +#include "storage/file/fio_device_com.h" #define INVALID_FD (-1) @@ -1625,4 +1626,4 @@ Datum gs_parse_page_bypath(PG_FUNCTION_ARGS) } char *outputFilename = ParsePage(path, blocknum, relation_type, read_memory); PG_RETURN_TEXT_P(cstring_to_text(outputFilename)); -} +} \ No newline at end of file diff --git a/src/gausskernel/storage/replication/basebackup.cpp b/src/gausskernel/storage/replication/basebackup.cpp index 59c22a46f..5e8d7cee6 100755 --- a/src/gausskernel/storage/replication/basebackup.cpp +++ b/src/gausskernel/storage/replication/basebackup.cpp @@ -441,7 +441,7 @@ static void perform_base_backup(basebackup_options *opt, DIR *tblspcdir) XLByteToSeg(startptr, startSegNo); XLogSegNo lastRemovedSegno = XLogGetLastRemovedSegno(); if (startSegNo <= lastRemovedSegno) { - startptr = (lastRemovedSegno + 1) * XLOG_SEG_SIZE; + startptr = (lastRemovedSegno + 1) * XLogSegSize; } SendXlogRecPtrResult(startptr); @@ -584,7 +584,7 @@ static void perform_base_backup(basebackup_options *opt, DIR *tblspcdir) if (fstat(fileno(fp), &statbuf) != 0) { ereport(ERROR, (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", pathbuf))); } - if (statbuf.st_size != XLogSegSize) { + if (statbuf.st_size != (off_t)XLogSegSize) { CheckXLogRemoved(segno, tli); ereport(ERROR, (errcode_for_file_access(), errmsg("unexpected WAL file size \"%s\"", walFiles[i]))); } @@ -601,11 +601,11 @@ static void perform_base_backup(basebackup_options *opt, DIR *tblspcdir) len += cnt; - if (len == XLogSegSize) + if (len == (off_t)XLogSegSize) break; } - if (len != XLogSegSize) { + if (len != (off_t)XLogSegSize) { CheckXLogRemoved(segno, tli); ereport(ERROR, (errcode_for_file_access(), errmsg("unexpected WAL file size \"%s\"", walFiles[i]))); } @@ -1130,9 +1130,15 @@ int64 sendTablespace(const char *path, bool sizeonly) * 'path' points to the tablespace location, but we only want to include * the version directory in it that belongs to us. */ - rc = snprintf_s(relativedirname, sizeof(relativedirname), sizeof(relativedirname) - 1, "%s_%s", - TABLESPACE_VERSION_DIRECTORY, g_instance.attr.attr_common.PGXCNodeName); - securec_check_ss(rc, "", ""); + if (ENABLE_DSS) { + rc = snprintf_s(relativedirname, sizeof(relativedirname), sizeof(relativedirname) - 1, "%s", + TABLESPACE_VERSION_DIRECTORY); + securec_check_ss(rc, "", ""); + } else { + rc = snprintf_s(relativedirname, sizeof(relativedirname), sizeof(relativedirname) - 1, "%s_%s", + TABLESPACE_VERSION_DIRECTORY, g_instance.attr.attr_common.PGXCNodeName); + securec_check_ss(rc, "", ""); + } rc = snprintf_s(pathbuf, sizeof(pathbuf), sizeof(pathbuf) - 1, "%s/%s", path, relativedirname); securec_check_ss(rc, "", ""); diff --git a/src/gausskernel/storage/replication/logical/parallel_reorderbuffer.cpp b/src/gausskernel/storage/replication/logical/parallel_reorderbuffer.cpp index bd3ad7d06..35f4f5b0a 100644 --- a/src/gausskernel/storage/replication/logical/parallel_reorderbuffer.cpp +++ b/src/gausskernel/storage/replication/logical/parallel_reorderbuffer.cpp @@ -769,7 +769,7 @@ static void ParallelReorderBufferRestoreCleanup(ParallelReorderBufferTXN *txn, X for (XLogSegNo cur = first; cur <= last; cur++) { char path[MAXPGPATH]; XLogRecPtr recptr; - recptr = (cur * XLOG_SEG_SIZE); + recptr = (cur * XLogSegSize); errno_t rc = sprintf_s(path, sizeof(path), "pg_replslot/%s/snap/xid-%lu-lsn-%X-%X.snap", t_thrd.walsender_cxt.slotname, txn->xid, (uint32)(recptr >> 32), uint32(recptr)); securec_check_ss(rc, "", ""); @@ -1038,7 +1038,7 @@ static Size ParallelReorderBufferRestoreChanges(ParallelReorderBuffer *prb, Para Assert(*segno != 0 || dlist_is_empty(&txn->changes)); - recptr = (*segno * XLOG_SEG_SIZE); + recptr = (*segno * XLogSegSize); /* * No need to care about TLIs here, only used during a single run, diff --git a/src/gausskernel/storage/replication/logical/reorderbuffer.cpp b/src/gausskernel/storage/replication/logical/reorderbuffer.cpp index 3ca59611b..c49b1b2cc 100644 --- a/src/gausskernel/storage/replication/logical/reorderbuffer.cpp +++ b/src/gausskernel/storage/replication/logical/reorderbuffer.cpp @@ -2277,7 +2277,7 @@ static Size ReorderBufferRestoreChanges(ReorderBuffer *rb, ReorderBufferTXN *txn Assert(*segno != 0 || dlist_is_empty(&txn->changes)); - recptr = (*segno * XLOG_SEG_SIZE); + recptr = (*segno * XLogSegSize); /* * No need to care about TLIs here, only used during a single run, @@ -2501,7 +2501,7 @@ static void ReorderBufferRestoreCleanup(ReorderBuffer *rb, ReorderBufferTXN *txn for (cur = first; cur <= last; cur++) { char path[MAXPGPATH]; XLogRecPtr recptr; - recptr = (cur * XLOG_SEG_SIZE); + recptr = (cur * XLogSegSize); rc = sprintf_s(path, sizeof(path), "pg_replslot/%s/snap/xid-%lu-lsn-%X-%X.snap", NameStr(slot->data.name), txn->xid, (uint32)(recptr >> 32), uint32(recptr)); securec_check_ss(rc, "", ""); diff --git a/src/gausskernel/storage/replication/slotfuncs.cpp b/src/gausskernel/storage/replication/slotfuncs.cpp index 910b8eb97..32f640c87 100755 --- a/src/gausskernel/storage/replication/slotfuncs.cpp +++ b/src/gausskernel/storage/replication/slotfuncs.cpp @@ -440,6 +440,11 @@ void redo_slot_create(const ReplicationSlotPersistentData *slotInfo, char* extra */ Datum pg_create_logical_replication_slot(PG_FUNCTION_ARGS) { + if (ENABLE_DMS) { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Not support create logical replication slot while DMS and DSS enabled"))); + } + Name name = PG_GETARG_NAME(0); Name plugin = PG_GETARG_NAME(1); errno_t rc = EOK; @@ -490,6 +495,11 @@ Datum pg_create_logical_replication_slot(PG_FUNCTION_ARGS) */ Datum pg_drop_replication_slot(PG_FUNCTION_ARGS) { + if (ENABLE_DMS) { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Not support drop replication slot while DMS and DSS enabled"))); + } + Name name = PG_GETARG_NAME(0); bool for_backup = false; bool isLogical = false; diff --git a/src/gausskernel/storage/replication/syncrep.cpp b/src/gausskernel/storage/replication/syncrep.cpp index 8df4f0908..1671219a5 100755 --- a/src/gausskernel/storage/replication/syncrep.cpp +++ b/src/gausskernel/storage/replication/syncrep.cpp @@ -213,7 +213,7 @@ SyncWaitRet SyncRepWaitForLSN(XLogRecPtr XactCommitLSN, bool enableHandleCancel) * sync replication standby names defined. Note that those standbys don't * need to be connected. */ - if (!u_sess->attr.attr_storage.enable_stream_replication || !SyncRepRequested() || + if (ENABLE_DMS || !u_sess->attr.attr_storage.enable_stream_replication || !SyncRepRequested() || !SyncStandbysDefined() || (t_thrd.postmaster_cxt.HaShmData->current_mode == NORMAL_MODE)) return NOT_REQUEST; diff --git a/src/gausskernel/storage/replication/walreceiver.cpp b/src/gausskernel/storage/replication/walreceiver.cpp index 33a167857..0fe76df2c 100755 --- a/src/gausskernel/storage/replication/walreceiver.cpp +++ b/src/gausskernel/storage/replication/walreceiver.cpp @@ -453,6 +453,7 @@ void WalReceiverMain(void) int nRet = 0; errno_t rc = 0; + Assert(ENABLE_DSS == false); t_thrd.walreceiver_cxt.last_sendfilereply_timestamp = GetCurrentTimestamp(); t_thrd.walreceiver_cxt.standby_config_modify_time = time(NULL); @@ -2111,7 +2112,7 @@ int GetSyncPercent(XLogRecPtr startLsn, XLogRecPtr totalLsn, XLogRecPtr hasCompl if (segno < WalGetSyncCountWindow()) { startLsn = InvalidXLogRecPtr; } else { - startLsn = totalLsn - (WalGetSyncCountWindow() * XLOG_SEG_SIZE); + startLsn = totalLsn - (WalGetSyncCountWindow() * XLogSegSize); basePercent = STREAMING_START_PERCENT; } } @@ -2432,7 +2433,11 @@ Datum pg_stat_get_stream_replications(PG_FUNCTION_ARGS) ereport(WARNING, (errmsg("server mode is unknown."))); /* local role */ - values[0] = CStringGetTextDatum(wal_get_role_string(local_role)); + if (g_instance.attr.attr_storage.dms_attr.enable_dms) { + values[0] = CStringGetTextDatum(GetSSServerMode()); + } else { + values[0] = CStringGetTextDatum(wal_get_role_string(local_role)); + } /* static connections */ values[1] = Int32GetDatum(static_connnections); /* db state */ @@ -2656,7 +2661,7 @@ void WalRcvSetPercentCountStartLsn(XLogRecPtr startLsn) /* Set start send lsn for current walsender (only called in walsender) */ static void WalRcvRefreshPercentCountStartLsn(XLogRecPtr currentMaxLsn, XLogRecPtr currentDoneLsn) { - uint64 coundWindow = ((uint64)WalGetSyncCountWindow() * XLOG_SEG_SIZE); + uint64 coundWindow = ((uint64)WalGetSyncCountWindow() * XLogSegSize); volatile WalRcvData *walrcv = t_thrd.walreceiverfuncs_cxt.WalRcv; XLogRecPtr baseStartLsn = InvalidXLogRecPtr; diff --git a/src/gausskernel/storage/replication/walsender.cpp b/src/gausskernel/storage/replication/walsender.cpp index b6f159ac5..93b0d7a6e 100755 --- a/src/gausskernel/storage/replication/walsender.cpp +++ b/src/gausskernel/storage/replication/walsender.cpp @@ -95,6 +95,7 @@ #include "storage/procarray.h" #include "storage/lmgr.h" #include "storage/xlog_share_storage/xlog_share_storage.h" +#include "storage/file/fio_device.h" #include "tcop/tcopprot.h" #include "utils/acl.h" #include "utils/builtins.h" @@ -4656,6 +4657,7 @@ retry: uint32 startoff; int segbytes; int readbytes; + bool need_read = true; startoff = recptr % XLogSegSize; @@ -4677,7 +4679,7 @@ retry: * asked for a too old WAL segment that has already been * removed or recycled. */ - if (errno == ENOENT) { + if (FILE_POSSIBLY_DELETED(errno)) { /* we suppose wal segments removed happend when we can't open the xlog file. */ WalSegmemtRemovedhappened = true; ereport(ERROR, @@ -4715,7 +4717,27 @@ retry: } pgstat_report_waitevent(WAIT_EVENT_WAL_READ); - readbytes = read(t_thrd.walsender_cxt.sendFile, p, segbytes); + /* consider O_DIRECT in dss mode */ + if (is_dss_fd(t_thrd.walsender_cxt.sendFile)) { + off_t oldStartPos = dss_seek_file(t_thrd.walsender_cxt.sendFile, 0, SEEK_CUR); + off_t movePos = oldStartPos % ALIGNOF_BUFFER; + off_t newStartPos = oldStartPos - movePos; + /* change current access position to newStartPos for O_DIRECT read */ + if (movePos != 0) { + (void)dss_seek_file(t_thrd.walsender_cxt.sendFile, newStartPos, SEEK_SET); + char *new_buff = (char*)palloc(movePos + segbytes); + int new_read = read(t_thrd.walsender_cxt.sendFile, new_buff, movePos + segbytes); + readbytes = new_read - (int)movePos; + errno_t rc = memcpy_s(p, readbytes, new_buff + movePos, readbytes); + securec_check(rc, "\0", "\0"); + pfree(new_buff); + need_read = false; + } + } + + if (need_read) { + readbytes = read(t_thrd.walsender_cxt.sendFile, p, segbytes); + } pgstat_report_waitevent(WAIT_EVENT_END); if (readbytes <= 0) { (void)close(t_thrd.walsender_cxt.sendFile); @@ -6035,7 +6057,7 @@ Datum gs_paxos_stat_replication(PG_FUNCTION_ARGS) securec_check_ss(ret, "\0", "\0"); values[j++] = CStringGetTextDatum(location); /* sync_percent */ - uint64 coundWindow = ((uint64)WalGetSyncCountWindow() * XLOG_SEG_SIZE); + uint64 coundWindow = ((uint64)WalGetSyncCountWindow() * XLogSegSize); if (XLogDiff(sndFlush, flush) < coundWindow) { syncStart = InvalidXLogRecPtr; } else { @@ -7009,7 +7031,7 @@ static void WalSndSetPercentCountStartLsn(XLogRecPtr startLsn) /* Set start send lsn for current walsender (only called in walsender) */ static void WalSndRefreshPercentCountStartLsn(XLogRecPtr currentMaxLsn, XLogRecPtr currentDoneLsn) { - uint64 coundWindow = ((uint64)WalGetSyncCountWindow() * XLOG_SEG_SIZE); + uint64 coundWindow = ((uint64)WalGetSyncCountWindow() * XLogSegSize); volatile WalSnd *walsnd = t_thrd.walsender_cxt.MyWalSnd; XLogRecPtr baseStartLsn = InvalidXLogRecPtr; diff --git a/src/gausskernel/storage/smgr/md.cpp b/src/gausskernel/storage/smgr/md.cpp index 0f62de106..885b60265 100644 --- a/src/gausskernel/storage/smgr/md.cpp +++ b/src/gausskernel/storage/smgr/md.cpp @@ -37,6 +37,7 @@ #include "storage/page_compression.h" #include "storage/smgr/knl_usync.h" #include "storage/smgr/smgr.h" +#include "storage/file/fio_device.h" #include "utils/aiomem.h" #include "utils/hsearch.h" #include "utils/memutils.h" @@ -502,7 +503,7 @@ static void mdunlinkfork(const RelFileNodeBackend& rnode, ForkNumber forkNum, bo */ Assert(IsHeapFileNode(rnode.node)); if (isRedo || u_sess->attr.attr_common.IsInplaceUpgrade || forkNum != MAIN_FORKNUM || - RelFileNodeBackendIsTemp(rnode)) { + RelFileNodeBackendIsTemp(rnode) || ENABLE_DMS) { /* First, forget any pending sync requests for the first segment */ if (!RelFileNodeBackendIsTemp(rnode)) { md_register_forget_request(rnode.node, forkNum, 0 /* first segment */); @@ -510,7 +511,7 @@ static void mdunlinkfork(const RelFileNodeBackend& rnode, ForkNumber forkNum, bo /* Next unlink the file */ ret = unlink(openFilePath); - if (ret < 0 && errno != ENOENT) { + if (ret < 0 && !FILE_POSSIBLY_DELETED(errno)) { ereport(WARNING, (errcode_for_file_access(), errmsg("could not remove file \"%s\": ", openFilePath))); } if (isRedo) { @@ -523,7 +524,6 @@ static void mdunlinkfork(const RelFileNodeBackend& rnode, ForkNumber forkNum, bo fd = BasicOpenFile(openFilePath, O_RDWR | PG_BINARY, 0); if (fd >= 0) { int save_errno; - ret = ftruncate(fd, 0); save_errno = errno; (void)close(fd); @@ -531,7 +531,7 @@ static void mdunlinkfork(const RelFileNodeBackend& rnode, ForkNumber forkNum, bo } else { ret = -1; } - if (ret < 0 && errno != ENOENT) { + if (ret < 0 && !FILE_POSSIBLY_DELETED(errno)) { ereport(WARNING, (errcode_for_file_access(), errmsg("could not truncate file \"%s\": %m", openFilePath))); } @@ -912,6 +912,11 @@ void mdasyncread(SMgrRelation reln, ForkNumber forkNum, AioDispatchDesc_t **dLis if (IS_COMPRESSED_MAINFORK(reln, forkNum)) { return; } + + if (ENABLE_DSS) { + return; + } + for (int i = 0; i < dn; i++) { off_t offset; MdfdVec *v = NULL; @@ -1018,6 +1023,11 @@ void mdasyncwrite(SMgrRelation reln, ForkNumber forkNumber, AioDispatchDesc_t ** if (IS_COMPRESSED_MAINFORK(reln, forkNumber)) { return; } + + if (ENABLE_DSS) { + return; + } + for (int i = 0; i < dn; i++) { off_t offset; MdfdVec *v = NULL; @@ -1150,7 +1160,8 @@ static void check_file_stat(char *file_name) struct stat stat_buf; char file_path[MAX_PATH_LEN] = {0}; char strfbuf[FILE_NAME_LEN]; - if (t_thrd.proc_cxt.DataDir == NULL || file_name == NULL) { + + if (t_thrd.proc_cxt.DataDir == NULL || file_name == NULL || is_dss_file(file_name)) { return; } rc = snprintf_s(file_path, MAX_PATH_LEN, MAX_PATH_LEN - 1, "%s/%s", t_thrd.proc_cxt.DataDir, file_name); @@ -1559,6 +1570,9 @@ void mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks) prior_blocks = 0; while (v != NULL) { MdfdVec *ov = v; + if (ENABLE_DMS && !RelFileNodeBackendIsTemp(reln->smgr_rnode)) { + md_register_forget_request(reln->smgr_rnode.node, forknum, 0 /* first segment */); + } if (prior_blocks > nblocks) { /* @@ -2010,7 +2024,7 @@ int SyncMdFile(const FileTag *ftag, char *path) */ int UnlinkMdFile(const FileTag *ftag, char *path) { - char *p; + char *p; /* Compute the path. */ p = relpathperm(ftag->rnode, MAIN_FORKNUM); diff --git a/src/gausskernel/storage/smgr/segment/data_file.cpp b/src/gausskernel/storage/smgr/segment/data_file.cpp index 8fd14eac4..be22d2c42 100644 --- a/src/gausskernel/storage/smgr/segment/data_file.cpp +++ b/src/gausskernel/storage/smgr/segment/data_file.cpp @@ -31,7 +31,9 @@ #include "storage/smgr/fd.h" #include "storage/smgr/knl_usync.h" #include "storage/smgr/segment.h" +#include "storage/file/fio_device.h" #include "postmaster/pagerepair.h" +#include "ddes/dms/ss_common_attr.h" static const mode_t SEGMENT_FILE_MODE = S_IWUSR | S_IRUSR; @@ -41,6 +43,7 @@ static void df_open_target_files(SegLogicFile *sf, int targetno); static char *slice_filename(const char *filename, int sliceno); void df_extend_internal(SegLogicFile *sf); +void df_extend_file_vector(SegLogicFile *sf); /* * We can not use virtual fd because space data files are accessed by multi-thread. @@ -137,6 +140,65 @@ void df_create_file(SegLogicFile *sf, bool redo) pfree(filename); } +/* + * Refreshes segfile's total blocks and compare to target block. Returns true if total block num holds target. + */ +bool df_ss_update_segfile_size(SegLogicFile *sf, BlockNumber target_block) +{ + if (!ENABLE_DMS) { + return false; + } + + uint32 flags = O_RDWR | PG_BINARY; + if (sf->file_num == 0) { + char *filename = slice_filename(sf->filename, 0); + int fd = dv_open_file(filename, flags, (int)SEGMENT_FILE_MODE); + if (fd < 0) { + ereport(LOG, + (errmodule(MOD_SEGMENT_PAGE), errmsg("File \"%s\" does not exist, stop read here.", filename))); + pfree(filename); + return false; + } + + sf->file_num++; + sf->segfiles[0].fd = fd; + sf->segfiles[0].sliceno = 0; + } + + int sliceno = sf->file_num - 1; + int fd = sf->segfiles[sliceno].fd; + off_t size = lseek(fd, 0L, SEEK_END); + sf->total_blocks = (uint32)(sliceno * DF_FILE_SLICE_BLOCKS + size / BLCKSZ); /* size of full slices + last slice */ + + while (size == DF_FILE_SLICE_SIZE) { + sliceno = sf->file_num; + char *filename = slice_filename(sf->filename, sf->file_num); /* needed if primary created new slice */ + if (sliceno >= sf->vector_capacity) { + df_extend_file_vector(sf); + } + fd = dv_open_file(filename, flags, (int)SEGMENT_FILE_MODE); + if (fd < 0) { + ereport(LOG, + (errmodule(MOD_SEGMENT_PAGE), errmsg("File \"%s\" does not exist, stop read here.", filename))); + pfree(filename); + break; + } + + sf->segfiles[sliceno].fd = fd; + sf->segfiles[sliceno].sliceno = sliceno; + + size = lseek(fd, 0L, SEEK_END); + sf->total_blocks += (uint32)(size / BLCKSZ); + sf->file_num++; + } + + if (sf->total_blocks <= target_block) { + return false; + } + + return true; +} + static SegPhysicalFile df_get_physical_file(SegLogicFile *sf, int sliceno, BlockNumber target_block) { AutoMutexLock filelock(&sf->filelock); @@ -147,7 +209,7 @@ static SegPhysicalFile df_get_physical_file(SegLogicFile *sf, int sliceno, Block (errcode(ERRCODE_DATA_EXCEPTION), errmsg("df_get_physical_file target_block is InvalidBlockNumber!\n"))); } - if (sf->total_blocks <= target_block) { + if (sf->total_blocks <= target_block && !df_ss_update_segfile_size(sf, target_block)) { ereport(LOG, (errmodule(MOD_SEGMENT_PAGE), errmsg("Try to access file %s block %u, exceeds the file total blocks %u", sf->filename, target_block, sf->total_blocks))); @@ -365,7 +427,7 @@ static void df_open_target_files(SegLogicFile *sf, int targetno) } int fd = dv_open_file(filename, flags, SEGMENT_FILE_MODE); if (fd < 0) { - if (errno != ENOENT) { + if (!FILE_POSSIBLY_DELETED(errno)) { ereport(PANIC, (errcode_for_file_access(), errmsg("could not open file \"%s\": %m", filename))); } // The file does not exist, break. @@ -649,10 +711,10 @@ void df_unlink(SegLogicFile *sf) char *path = slice_filename(sf->filename, i); int ret = unlink(path); - pfree(path); if (ret < 0) { - ereport(ERROR, (errmsg("Could not remove file %s", path))); + ereport(ERROR, (errmsg("Could not remove file %s, errno : %d", path, errno))); } + pfree(path); sf->file_num--; } sf->file_num = 0; @@ -777,4 +839,4 @@ void segForgetDatabaseFsyncRequests(Oid dbid) bool seg_filetag_matches(const FileTag *ftag, const FileTag *candidate) { return ftag->rnode.dbNode == candidate->rnode.dbNode; -} +} \ No newline at end of file diff --git a/src/gausskernel/storage/smgr/segment/extent_group.cpp b/src/gausskernel/storage/smgr/segment/extent_group.cpp index d674ff8ff..4554203ef 100644 --- a/src/gausskernel/storage/smgr/segment/extent_group.cpp +++ b/src/gausskernel/storage/smgr/segment/extent_group.cpp @@ -51,12 +51,24 @@ bool eg_df_valid(SegExtentGroup *seg) } /* Read map head, check 1. block checksum; 2. bit unit */ - char *buffer = (char *)palloc(BLCKSZ); + char *buffer = NULL; + char *unaligned_buffer = NULL; + if (ENABLE_DSS) { + unaligned_buffer = (char*)palloc(BLCKSZ + ALIGNOF_BUFFER); + buffer = (char *)BUFFERALIGN(unaligned_buffer); + } else { + buffer = (char *)palloc(BLCKSZ); + } + df_pread_block(sf, buffer, DF_MAP_HEAD_PAGE); if (!PageIsVerified((Page)buffer, DF_MAP_HEAD_PAGE)) { ereport(LOG, (errmsg("extent group %s map head block checksum verification failed", sf->filename))); - pfree(buffer); + if (ENABLE_DSS) { + pfree(unaligned_buffer); + } else { + pfree(buffer); + } return false; } @@ -64,11 +76,20 @@ bool eg_df_valid(SegExtentGroup *seg) if (map_head->bit_unit != seg->extent_size) { ereport(LOG, (errmsg("extent group %s, bit unit in head is %u, but its extent size is %u ", sf->filename, map_head->bit_unit, seg->extent_size))); - pfree(buffer); + if (ENABLE_DSS) { + pfree(unaligned_buffer); + } else { + pfree(buffer); + } return false; } - pfree(buffer); + if (ENABLE_DSS) { + pfree(unaligned_buffer); + } else { + pfree(buffer); + } + return true; } @@ -208,7 +229,16 @@ void eg_init_map_head_page_content(Page map_head_page, int extent_size) static void eg_init_map_head(SegExtentGroup *seg, XLogRecPtr rec_ptr) { BlockNumber pageno = DF_MAP_HEAD_PAGE; - Page page = (Page)palloc(BLCKSZ); + Page page = NULL; + char* unaligned_page = NULL; + + if (ENABLE_DSS) { + unaligned_page = (char*)palloc(BLCKSZ + ALIGNOF_BUFFER); + page = (Page)BUFFERALIGN(unaligned_page); + } else { + page = (Page)palloc(BLCKSZ); + } + errno_t er = memset_s((void *)page, BLCKSZ, 0, BLCKSZ); securec_check(er, "", ""); @@ -218,7 +248,11 @@ static void eg_init_map_head(SegExtentGroup *seg, XLogRecPtr rec_ptr) PageSetChecksumInplace(page, pageno); df_pwrite_block(seg->segfile, (char *)page, pageno); - pfree(page); + if (ENABLE_DSS) { + pfree(unaligned_page); + } else { + pfree(page); + } seg->map_head_entry = pageno; seg->map_head = NULL; @@ -425,7 +459,7 @@ bool eg_alloc_preassigned_block(SegExtentGroup *seg, BlockNumber preassigned_blo void eg_init_bitmap_page(SegExtentGroup *seg, BlockNumber pageno, BlockNumber first_page) { - Buffer buffer = ReadBufferFast(seg->space, seg->rnode, seg->forknum, pageno, RBM_NORMAL); + Buffer buffer = ReadBufferFast(seg->space, seg->rnode, seg->forknum, pageno, RBM_ZERO); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); START_CRIT_SECTION(); @@ -731,7 +765,7 @@ void eg_create_if_necessary(SegExtentGroup *seg) return; } - TablespaceCreateDbspace(seg->rnode.spcNode, seg->rnode.dbNode, false); + TablespaceCreateDbspace(seg->rnode.spcNode, seg->rnode.dbNode, false); /* Ensure tablespace limits at first. */ uint64 requestSize = DF_FILE_EXTEND_STEP_SIZE; TableSpaceUsageManager::IsExceedMaxsize(seg->rnode.spcNode, requestSize, true); diff --git a/src/gausskernel/storage/smgr/segment/segbuffer.cpp b/src/gausskernel/storage/smgr/segment/segbuffer.cpp index 6d57a29cd..b0ad1fc24 100644 --- a/src/gausskernel/storage/smgr/segment/segbuffer.cpp +++ b/src/gausskernel/storage/smgr/segment/segbuffer.cpp @@ -32,6 +32,7 @@ #include "storage/smgr/smgr.h" #include "utils/resowner.h" #include "pgstat.h" +#include "ddes/dms/ss_dms_bufmgr.h" /* * Segment buffer, used for segment meta data, e.g., segment head, space map head. We separate segment @@ -48,12 +49,21 @@ static const int TEN_MICROSECOND = 10; #define SegBufferIsPinned(bufHdr) ((bufHdr)->state & BUF_REFCOUNT_MASK) static BufferDesc *SegStrategyGetBuffer(uint32 *buf_state); -static bool SegStartBufferIO(BufferDesc *buf, bool forInput); -static void SegTerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits); extern PrivateRefCountEntry *GetPrivateRefCountEntry(Buffer buffer, bool create, bool do_move); extern void ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref); +void SetInProgressFlags(BufferDesc *bufDesc, bool input) +{ + InProgressBuf = bufDesc; + isForInput = input; +} + +bool HasInProgressBuf(void) +{ + return InProgressBuf != NULL; +} + void AbortSegBufferIO(void) { if (InProgressBuf != NULL) { @@ -99,7 +109,7 @@ static bool SegStartBufferIO(BufferDesc *buf, bool forInput) return true; } -static void SegTerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits) +void SegTerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits) { SegmentCheck(buf == InProgressBuf); @@ -144,6 +154,9 @@ bool SegPinBuffer(BufferDesc *buf) ereport(DEBUG5, (errmodule(MOD_SEGMENT_PAGE), errmsg("[SegPinBuffer] (%u %u %u %d) %d %u ", buf->tag.rnode.spcNode, buf->tag.rnode.dbNode, buf->tag.rnode.relNode, buf->tag.rnode.bucketNode, buf->tag.forkNum, buf->tag.blockNum))); + + ResourceOwnerEnlargeBuffers(t_thrd.utils_cxt.CurrentResourceOwner); + bool result; PrivateRefCountEntry * ref = GetPrivateRefCountEntry(BufferDescriptorGetBuffer(buf), true, true); SegmentCheck(ref != NULL); @@ -169,8 +182,6 @@ bool SegPinBuffer(BufferDesc *buf) } ref->refcount++; - - ResourceOwnerEnlargeBuffers(t_thrd.utils_cxt.CurrentResourceOwner); ResourceOwnerRememberBuffer(t_thrd.utils_cxt.CurrentResourceOwner, BufferDescriptorGetBuffer(buf)); return result; @@ -371,17 +382,135 @@ void ReportInvalidPage(RepairBlockKey key) return; } +void ReadSegBufferForCheck(BufferDesc* bufHdr, ReadBufferMode mode, SegSpace *spc, Block bufBlock) +{ + if (spc == NULL) { + bool found; + SegSpcTag tag = {.spcNode = bufHdr->tag.rnode.spcNode, .dbNode = bufHdr->tag.rnode.dbNode}; + SegmentCheck(t_thrd.storage_cxt.SegSpcCache != NULL); + spc = (SegSpace *)hash_search(t_thrd.storage_cxt.SegSpcCache, (void *)&tag, HASH_FIND, &found); + SegmentCheck(found); + } + + seg_physical_read(spc, bufHdr->tag.rnode, bufHdr->tag.forkNum, bufHdr->tag.blockNum, (char *)bufBlock); + if (!PageIsVerified((char *)bufBlock, bufHdr->tag.blockNum)) { + ereport(PANIC, (errmsg("[%d/%d/%d/%d %d-%d] verified failed", + bufHdr->tag.rnode.spcNode, bufHdr->tag.rnode.dbNode, bufHdr->tag.rnode.relNode, + bufHdr->tag.rnode.bucketNode, bufHdr->tag.forkNum, bufHdr->tag.blockNum))); + } + + if (!PageIsSegmentVersion(bufBlock) && !PageIsNew(bufBlock)) { + ereport(PANIC, (errmsg("[%d/%d/%d/%d %d-%d] page version is %d", + bufHdr->tag.rnode.spcNode, bufHdr->tag.rnode.dbNode, bufHdr->tag.rnode.relNode, + bufHdr->tag.rnode.bucketNode, bufHdr->tag.forkNum, bufHdr->tag.blockNum, + PageGetPageLayoutVersion(bufBlock)))); + } +} + +Buffer ReadSegBufferForDMS(BufferDesc* bufHdr, ReadBufferMode mode, SegSpace *spc) +{ + if (spc == NULL) { + bool found; + SegSpcTag tag = {.spcNode = bufHdr->tag.rnode.spcNode, .dbNode = bufHdr->tag.rnode.dbNode}; + SegmentCheck(t_thrd.storage_cxt.SegSpcCache != NULL); + spc = (SegSpace *)hash_search(t_thrd.storage_cxt.SegSpcCache, (void *)&tag, HASH_FIND, &found); + SegmentCheck(found); + ereport(DEBUG1, (errmsg("Fetch cached SegSpace success, spcNode:%u dbNode:%u.", bufHdr->tag.rnode.spcNode, + bufHdr->tag.rnode.dbNode))); + } + + char *bufBlock = (char *)BufHdrGetBlock(bufHdr); + if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK || mode == RBM_ZERO) { + errno_t er = memset_s((char *)bufBlock, BLCKSZ, 0, BLCKSZ); + securec_check(er, "", ""); + } else { + seg_physical_read(spc, bufHdr->tag.rnode, bufHdr->tag.forkNum, bufHdr->tag.blockNum, bufBlock); + ereport(DEBUG1, + (errmsg("DMS SegPage ReadBuffer success, bufid:%d, blockNum:%u of reln:%s mode %d.", + bufHdr->buf_id, bufHdr->tag.blockNum, relpathperm(bufHdr->tag.rnode, bufHdr->tag.forkNum), (int)mode))); + if (!PageIsVerified(bufBlock, bufHdr->tag.blockNum)) { + RepairBlockKey key; + key.relfilenode = bufHdr->tag.rnode; + key.forknum = bufHdr->tag.forkNum; + key.blocknum = bufHdr->tag.blockNum; + ReportInvalidPage(key); + return InvalidBuffer; + } + + if (!PageIsSegmentVersion(bufBlock) && !PageIsNew(bufBlock)) { + ereport(PANIC, (errmsg("Read DMS SegPage buffer, block %u of relation %s, but page version is %d", + bufHdr->tag.blockNum, relpathperm(bufHdr->tag.rnode, bufHdr->tag.forkNum), + PageGetPageLayoutVersion(bufBlock)))); + } + } + + bufHdr->lsn_on_disk = PageGetLSN(bufBlock); +#ifdef USE_ASSERT_CHECKING + bufHdr->lsn_dirty = InvalidXLogRecPtr; +#endif + SegTerminateBufferIO(bufHdr, false, BM_VALID); + SegmentCheck(SegBufferIsPinned(bufHdr)); + return BufferDescriptorGetBuffer(bufHdr); +} + Buffer ReadBufferFast(SegSpace *spc, RelFileNode rnode, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode) { bool found = false; + /* Make sure we will have room to remember the buffer pin */ + ResourceOwnerEnlargeBuffers(t_thrd.utils_cxt.CurrentResourceOwner); + BufferDesc *bufHdr = SegBufferAlloc(spc, rnode, forkNum, blockNum, &found); if (!found) { SegmentCheck(!(pg_atomic_read_u32(&bufHdr->state) & BM_VALID)); char *bufBlock = (char *)BufHdrGetBlock(bufHdr); - if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK) { + + if (ENABLE_DMS && mode != RBM_FOR_REMOTE) { + Assert(!(pg_atomic_read_u32(&bufHdr->state) & BM_VALID)); + + do { + bool startio; + if (LWLockHeldByMe(bufHdr->io_in_progress_lock)) { + startio = true; + } else { + startio = SegStartBufferIO(bufHdr, true); + } + + if (!startio) { + Assert(pg_atomic_read_u32(&bufHdr->state) & BM_VALID); + found = true; + goto found_branch; + } + + dms_buf_ctrl_t *buf_ctrl = GetDmsBufCtrl(bufHdr->buf_id); + if (!LockModeCompatible(buf_ctrl, LW_SHARED)) { + if (!StartReadPage(bufHdr, LW_SHARED)) { + SegTerminateBufferIO((BufferDesc *)bufHdr, false, 0); + // when reform fail, should return InvalidBuffer to reform proc thread + if (AmDmsReformProcProcess() && dms_reform_failed()) { + return InvalidBuffer; + } + + pg_usleep(5000L); + continue; + } + } else { + /* + * previous attempts to read the buffer must have failed, + * but DRC has been created, so load page directly again + */ + Assert(pg_atomic_read_u32(&bufHdr->state) & BM_IO_ERROR); + buf_ctrl->state |= BUF_NEED_LOAD; + } + + break; + } while (true); + return TerminateReadSegPage(bufHdr, mode, spc); + } + + if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK || mode == RBM_ZERO) { errno_t er = memset_s((char *)bufBlock, BLCKSZ, 0, BLCKSZ); securec_check(er, "", ""); } else { @@ -407,8 +536,14 @@ Buffer ReadBufferFast(SegSpace *spc, RelFileNode rnode, ForkNumber forkNum, Bloc SegTerminateBufferIO(bufHdr, false, BM_VALID); } +found_branch: if (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK) { - LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_EXCLUSIVE); + if (ENABLE_DMS) { + GetDmsBufCtrl(bufHdr->buf_id)->state |= BUF_READ_MODE_ZERO_LOCK; + LockBuffer(BufferDescriptorGetBuffer(bufHdr), BUFFER_LOCK_EXCLUSIVE); + } else { + LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_EXCLUSIVE); + } } SegmentCheck(SegBufferIsPinned(bufHdr)); @@ -513,7 +648,19 @@ BufferDesc *SegBufferAlloc(SegSpace *spc, RelFileNode rnode, ForkNumber forkNum, old_flags = buf_state & BUF_FLAG_MASK; if (BUF_STATE_GET_REFCOUNT(buf_state) == 1 && !(old_flags & BM_DIRTY)) { - break; + if (ENABLE_DMS && (old_flags & BM_TAG_VALID)) { + /* + * notify DMS to release drc owner. if failed, can't recycle this buffer. + * release owner procedure is in buf header lock, it's not reasonable, + * need to improve. + */ + if (DmsReleaseOwner(old_tag, buf->buf_id)) { + ClearReadHint(buf->buf_id, true); + break; + } + } else { + break; + } } UnlockBufHdr(buf, buf_state); BufTableDelete(&new_tag, new_hash); @@ -530,6 +677,10 @@ BufferDesc *SegBufferAlloc(SegSpace *spc, RelFileNode rnode, ForkNumber forkNum, buf_state |= BM_TAG_VALID | BM_PERMANENT | BUF_USAGECOUNT_ONE; UnlockBufHdr(buf, buf_state); + if (ENABLE_DMS) { + GetDmsBufCtrl(buf->buf_id)->lock_mode = DMS_LOCK_NULL; + } + if (old_flag_valid) { BufTableDelete(&old_tag, old_hash); if (old_partition_lock != new_partition_lock) { diff --git a/src/gausskernel/storage/smgr/segment/segxlog.cpp b/src/gausskernel/storage/smgr/segment/segxlog.cpp index 05954cd08..fe1b431bf 100644 --- a/src/gausskernel/storage/smgr/segment/segxlog.cpp +++ b/src/gausskernel/storage/smgr/segment/segxlog.cpp @@ -768,7 +768,8 @@ static void redo_space_drop(XLogReaderState *record) void seg_redo_new_page_copy_and_flush(BufferTag *tag, char *data, XLogRecPtr lsn) { - char page[BLCKSZ]; + char page[BLCKSZ] __attribute__((__aligned__(ALIGNOF_BUFFER))) = {0}; + errno_t er = memcpy_s(page, BLCKSZ, data, BLCKSZ); securec_check(er, "\0", "\0"); diff --git a/src/gausskernel/storage/smgr/segment/space.cpp b/src/gausskernel/storage/smgr/segment/space.cpp index e91f79c69..e85cb2eca 100644 --- a/src/gausskernel/storage/smgr/segment/space.cpp +++ b/src/gausskernel/storage/smgr/segment/space.cpp @@ -39,6 +39,7 @@ #include "utils/inval.h" #include "utils/relfilenodemap.h" #include "pgxc/execRemote.h" +#include "ddes/dms/ss_transaction.h" void spc_lock(SegSpace *spc) { @@ -277,6 +278,10 @@ SegSpace *spc_open(Oid spcNode, Oid dbNode, bool create, bool isRedo) void spc_drop_space_node(Oid spcNode, Oid dbNode) { + if (ENABLE_DMS && SS_PRIMARY_MODE) { + SSBCastDropSegSpace(spcNode, dbNode); + } + SegSpace *spc = spc_init_space_node(spcNode, dbNode); SegSpcTag tag = {.spcNode = spcNode, .dbNode = dbNode}; bool found = false; @@ -301,6 +306,10 @@ void spc_drop_space_node(Oid spcNode, Oid dbNode) */ SegSpace *spc_drop(Oid spcNode, Oid dbNode, bool redo) { + if (ENABLE_DMS && SS_PRIMARY_MODE) { + SSBCastDropSegSpace(spcNode, dbNode); + } + SegSpace *spc = spc_init_space_node(spcNode, dbNode); AutoMutexLock spc_lock(&spc->lock); spc_lock.lock(); @@ -352,6 +361,44 @@ SegSpace *spc_drop(Oid spcNode, Oid dbNode, bool redo) return spc; } +static void SSClose_seg_files(SegSpace *spc) +{ + for (int egid = 0; egid < EXTENT_TYPES; egid++) { + for (int j = 0; j <= SEGMENT_MAX_FORKNUM; j++) { + SegExtentGroup *eg = &spc->extent_group[egid][j]; + SegLogicFile *sf = eg->segfile; + AutoMutexLock filelock(&sf->filelock); + filelock.lock(); + + for (int i = sf->file_num - 1; i >= 0; i--) { + (void)close(sf->segfiles[i].fd); + sf->segfiles[i].fd = -1; + sf->file_num--; + } + sf->file_num = 0; + filelock.unLock(); + } + } +} + +void SSDrop_seg_space(Oid spcNode, Oid dbNode) +{ + SegSpace *spc = spc_init_space_node(spcNode, dbNode); + AutoMutexLock spc_lock(&spc->lock); + spc_lock.lock(); + + SpaceDataFileStatus dataStatus = spc_status(spc); + if (dataStatus == SpaceDataFileStatus::EMPTY) { + spc_lock.unLock(); + return; + } + + SegDropSpaceMetaBuffers(spcNode, dbNode); + SSClose_seg_files(spc); + spc_lock.unLock(); + return; +} + /* * After shrink, the space's physical size: * 1. is aligned to DF_FILE_EXTEND_STEP_BLOCKS (128MB) @@ -431,7 +478,15 @@ Buffer try_get_moved_pagebuf(RelFileNode *rnode, int forknum, BlockNumber logic_ static void copy_extent(SegExtentGroup *seg, RelFileNode logic_rnode, uint32 logic_start_blocknum, BlockNumber nblocks, BlockNumber phy_from_extent, BlockNumber phy_to_extent) { - char *content = (char *)palloc(BLCKSZ); + char *content = NULL; + char *unaligned_content = NULL; + if (ENABLE_DSS) { + unaligned_content = (char*)palloc(BLCKSZ + ALIGNOF_BUFFER); + content = (char*)BUFFERALIGN(unaligned_content); + } else { + content = (char *)palloc(BLCKSZ); + } + char *pagedata = NULL; for (int i = 0; i < seg->extent_size; i++) { /* @@ -525,7 +580,12 @@ static void copy_extent(SegExtentGroup *seg, RelFileNode logic_rnode, uint32 log UnpinBuffer(bufdesc, true); } } - pfree(content); + + if (ENABLE_DSS) { + pfree(unaligned_content); + } else { + pfree(content); + } } /* @@ -1144,6 +1204,10 @@ Datum gs_space_shrink(PG_FUNCTION_ARGS) errmsg("Don't shrink space, for recovery is in progress."))); } + if (SS_STANDBY_MODE) { + ereport(ERROR, (errmsg("SS standby cannot perform gs_space_shrink"))); + } + Oid spaceid = PG_GETARG_OID(0); Oid dbid = PG_GETARG_OID(1); uint32 extent_type = PG_GETARG_UINT32(2); @@ -1159,6 +1223,10 @@ Datum local_space_shrink(PG_FUNCTION_ARGS) errmsg("Don't shrink space locally, for recovery is in progress."))); } + if (SS_STANDBY_MODE) { + ereport(ERROR, (errmsg("SS standby cannot perform local_space_shrink"))); + } + char *tablespacename = text_to_cstring(PG_GETARG_TEXT_PP(0)); char *dbname = text_to_cstring(PG_GETARG_TEXT_PP(1)); Oid spaceid = get_tablespace_oid_by_name(tablespacename); @@ -1180,6 +1248,10 @@ Datum global_space_shrink(PG_FUNCTION_ARGS) errmsg("Don't shrink space globally, for recovery is in progress."))); } + if (SS_STANDBY_MODE) { + ereport(ERROR, (errmsg("SS standby cannot perform global_space_shrink"))); + } + char *tablespacename = text_to_cstring(PG_GETARG_TEXT_PP(0)); char *dbname = text_to_cstring(PG_GETARG_TEXT_PP(1)); diff --git a/src/gausskernel/storage/smgr/segment/xlog_atomic_op.cpp b/src/gausskernel/storage/smgr/segment/xlog_atomic_op.cpp index d425bd389..45cdee7e5 100644 --- a/src/gausskernel/storage/smgr/segment/xlog_atomic_op.cpp +++ b/src/gausskernel/storage/smgr/segment/xlog_atomic_op.cpp @@ -395,6 +395,11 @@ void XLogAtomicOpStart() ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("recovery is in progress"), errhint("cannot make new WAL entries during recovery"))); } + + if (!SSXLogInsertAllowed()) { + ereport(FATAL, (errmsg("SS standby cannot make new WAL entries"))); + } + XLogAtomicOpMgr->XLogStart(); } diff --git a/src/gausskernel/storage/smgr/segstore.cpp b/src/gausskernel/storage/smgr/segstore.cpp index 1b99e9331..9a618eab6 100755 --- a/src/gausskernel/storage/smgr/segstore.cpp +++ b/src/gausskernel/storage/smgr/segstore.cpp @@ -38,6 +38,7 @@ #include "utils/resowner.h" #include "vectorsonic/vsonichash.h" #include "storage/procarray.h" +#include "ddes/dms/ss_transaction.h" /* * This code manages relations that reside on segment-page storage. It implements functions used for smgr.cpp. * @@ -97,6 +98,9 @@ static void seg_update_timeline() { pg_atomic_add_fetch_u32(&g_instance.segment_cxt.segment_drop_timeline, 1); + if (ENABLE_DMS && SS_PRIMARY_MODE) { + SSUpdateSegDropTimeline(pg_atomic_read_u32(&g_instance.segment_cxt.segment_drop_timeline)); + } } static uint32 seg_get_drop_timeline() @@ -136,6 +140,12 @@ void UnlockSegmentHeadPartition(Oid spcNode, Oid dbNode, BlockNumber head) #define ReadLevel0Buffer(spc, blockno) \ ReadBufferFast((spc), EXTENT_GROUP_RNODE((spc), LEVEL0_PAGE_EXTENT_SIZE), MAIN_FORKNUM, (blockno), RBM_NORMAL) +#define ReadSegmentBuffer_RBM_ZERO(spc, blockno) \ + ReadBufferFast((spc), EXTENT_GROUP_RNODE((spc), SEGMENT_HEAD_EXTENT_SIZE), MAIN_FORKNUM, (blockno), RBM_ZERO) + +#define ReadLevel0Buffer_RBM_ZERO(spc, blockno) \ + ReadBufferFast((spc), EXTENT_GROUP_RNODE((spc), LEVEL0_PAGE_EXTENT_SIZE), MAIN_FORKNUM, (blockno), RBM_ZERO) + /* * Calculate extent id & offset according to logic page id * input: logic_id @@ -227,7 +237,7 @@ void seg_init_new_level0_page(SegSpace *spc, uint32_t new_extent_id, Buffer seg_ { SegmentHead *seg_head = (SegmentHead *)PageGetContents(BufferGetPage(seg_head_buffer)); - Buffer new_level0_buffer = ReadLevel0Buffer(spc, new_level0_page); + Buffer new_level0_buffer = ReadLevel0Buffer_RBM_ZERO(spc, new_level0_page); LockBuffer(new_level0_buffer, BUFFER_LOCK_EXCLUSIVE); Page level0_page = BufferGetPage(new_level0_buffer); @@ -486,6 +496,9 @@ static bool normal_open_segment(SMgrRelation reln, int forknum, bool create) reln->seg_desc[MAIN_FORKNUM]->head_blocknum))); } + if (ENABLE_DMS) { + LockBuffer(main_buffer, BUFFER_LOCK_SHARE); + } /* * For non-main fork, the segment head is stored in the main fork segment head. * The block number being invalid means that the segment has not been created yet. @@ -494,6 +507,9 @@ static bool normal_open_segment(SMgrRelation reln, int forknum, bool create) if (fork_head_blocknum == InvalidBlockNumber) { if (create) { + if (ENABLE_DMS) { + LockBuffer(main_buffer, BUFFER_LOCK_UNLOCK); + } LockBuffer(main_buffer, BUFFER_LOCK_EXCLUSIVE); /* @@ -529,10 +545,16 @@ static bool normal_open_segment(SMgrRelation reln, int forknum, bool create) } SegUnlockReleaseBuffer(main_buffer); } else { + if (ENABLE_DMS) { + LockBuffer(main_buffer, BUFFER_LOCK_UNLOCK); + } SegReleaseBuffer(main_buffer); return false; } } else { + if (ENABLE_DMS) { + LockBuffer(main_buffer, BUFFER_LOCK_UNLOCK); + } SegReleaseBuffer(main_buffer); } @@ -709,7 +731,7 @@ static void bucket_ensure_mapblock(SegSpace *spc, Buffer main_buffer, uint32 blo spc_alloc_extent(spc, SEGMENT_HEAD_EXTENT_SIZE, MAIN_FORKNUM, InvalidBlockNumber, iptr); main_head->bkt_map[blockid] = newmap_block; - Buffer map_buffer = ReadSegmentBuffer(spc, newmap_block); + Buffer map_buffer = ReadSegmentBuffer_RBM_ZERO(spc, newmap_block); LockBuffer(map_buffer, BUFFER_LOCK_EXCLUSIVE); bucket_init_map_page(map_buffer, lsn); XLogAtomicOpRegisterBuffer(map_buffer, REGBUF_WILL_INIT, SPCXLOG_BUCKET_INIT_MAPBLOCK, @@ -771,7 +793,7 @@ static BlockNumber bucket_alloc_segment(Oid tablespace_id, Oid database_id, Bloc XLogAtomicOpStart(); BlockNumber main_head_blocknum = spc_alloc_extent(spc, SEGMENT_HEAD_EXTENT_SIZE, MAIN_FORKNUM, InvalidBlockNumber, iptr); - Buffer main_head_buffer = ReadSegmentBuffer(spc, main_head_blocknum); + Buffer main_head_buffer = ReadSegmentBuffer_RBM_ZERO(spc, main_head_blocknum); LockBuffer(main_head_buffer, BUFFER_LOCK_EXCLUSIVE); Page main_head_page = BufferGetPage(main_head_buffer); @@ -1091,10 +1113,16 @@ SegPageLocation seg_get_physical_location(RelFileNode rnode, ForkNumber forknum, reln = smgropen(rnode, InvalidBackendId); Buffer buffer = read_head_buffer(reln, forknum, false); + if (ENABLE_DMS) { + LockBuffer(buffer, BUFFER_LOCK_SHARE); + } SegmentCheck(BufferIsValid(buffer)); SegmentHead *head = (SegmentHead *)PageGetContents(BufferGetBlock(buffer)); SegPageLocation loc = seg_logic_to_physic_mapping(reln, head, blocknum); + if (ENABLE_DMS) { + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + } SegReleaseBuffer(buffer); return loc; @@ -1141,7 +1169,7 @@ static bool open_segment(SMgrRelation reln, ForkNumber forknum, bool create, XLo } if (reln->seg_desc[forknum]) { - if (forknum != MAIN_FORKNUM && !CurrentThreadIsWorker() && + if (forknum != MAIN_FORKNUM && (!CurrentThreadIsWorker() || SS_STANDBY_MODE) && reln->seg_desc[forknum]->timeline != seg_get_drop_timeline()) { /* * It's possible that the current smgr cache is invalid. We should close it and reopen. @@ -1461,11 +1489,17 @@ SMGR_READ_STATUS seg_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blo LOG_SMGR_API(reln->smgr_rnode, forknum, blocknum, "seg_read"); Buffer seg_buffer = read_head_buffer(reln, forknum, false); + if (ENABLE_DMS) { + LockBuffer(seg_buffer, BUFFER_LOCK_SHARE); + } SegmentCheck(BufferIsValid(seg_buffer)); SegmentHead *seg_head = (SegmentHead *)PageGetContents(BufferGetBlock(seg_buffer)); SegmentCheck(IsNormalSegmentHead(seg_head)); if (seg_head->nblocks <= blocknum) { + if (ENABLE_DMS) { + LockBuffer(seg_buffer, BUFFER_LOCK_UNLOCK); + } SegReleaseBuffer(seg_buffer); ereport(ERROR, (errmodule(MOD_SEGMENT_PAGE), errcode(ERRCODE_DATA_CORRUPTED), errmsg("seg_read blocknum exceeds segment size"), @@ -1474,6 +1508,10 @@ SMGR_READ_STATUS seg_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blo blocknum, seg_head->nblocks))); } + if (ENABLE_DMS) { + LockBuffer(seg_buffer, BUFFER_LOCK_UNLOCK); + } + LockSegmentHeadPartition(reln->seg_space->spcNode, reln->seg_space->dbNode, reln->seg_desc[forknum]->head_blocknum, LW_SHARED); diff --git a/src/include/Makefile b/src/include/Makefile index 5e6d2e929..be8dbbc48 100644 --- a/src/include/Makefile +++ b/src/include/Makefile @@ -167,6 +167,7 @@ endif $(INSTALL_DATA) replication/origin.h '$(DESTDIR)$(includedir_server)/replication/origin.h' $(INSTALL_DATA) parser/scanner.h '$(DESTDIR)$(includedir_server)/parser/scanner.h' $(INSTALL_DATA) parser/keywords.h '$(DESTDIR)$(includedir_server)/parser/keywords.h' + $(INSTALL_DATA) ddes/dms/dms_api.h '$(DESTDIR)$(includedir_server)/ddes/dms/dms_api.h' # We don't use INSTALL_DATA for performance reasons --- there are a lot of files cp $(srcdir)/*.h '$(DESTDIR)$(includedir_server)'/ || exit; \ chmod $(INSTALL_DATA_MODE) '$(DESTDIR)$(includedir_server)'/*.h || exit; \ diff --git a/src/include/access/clog.h b/src/include/access/clog.h index ec8a27b42..c637bf48a 100644 --- a/src/include/access/clog.h +++ b/src/include/access/clog.h @@ -58,6 +58,8 @@ typedef int CLogXidStatus; #define CLOG_XID_STATUS_COMMITTED 0x01 #define CLOG_XID_STATUS_ABORTED 0x02 +#define CLOGDIR (g_instance.datadir_cxt.clogDir) + /* * A "subcommitted" transaction is a committed subtransaction whose parent * hasn't committed or aborted yet. @@ -102,6 +104,7 @@ typedef enum { #include "fmgr.h" extern Datum gs_fault_inject(PG_FUNCTION_ARGS); +extern void SSCLOGShmemClear(void); #endif /* CLOG_H */ diff --git a/src/include/access/csnlog.h b/src/include/access/csnlog.h index 7d93c21d2..d7a96a6b9 100644 --- a/src/include/access/csnlog.h +++ b/src/include/access/csnlog.h @@ -21,6 +21,8 @@ #define CSNBufMappingPartitionLock(hashcode) (&t_thrd.shemem_ptr_cxt.mainLWLockArray[FirstCSNBufMappingLock + CSNBufHashPartition(hashcode)].lock) #define CSNBufMappingPartitionLockByIndex(i) (&t_thrd.shemem_ptr_cxt.mainLWLockArray[FirstCSNBufMappingLock + i].lock) +#define CSNLOGDIR (g_instance.datadir_cxt.csnlogDir) + extern void CSNLogSetCommitSeqNo(TransactionId xid, int nsubxids, TransactionId* subxids, CommitSeqNo csn); extern CommitSeqNo CSNLogGetCommitSeqNo(TransactionId xid); extern CommitSeqNo CSNLogGetNestCommitSeqNo(TransactionId xid); @@ -35,5 +37,6 @@ extern void ShutdownCSNLOG(void); extern void CheckPointCSNLOG(void); extern void ExtendCSNLOG(TransactionId newestXact); extern void TruncateCSNLOG(TransactionId oldestXact); +void SSCSNLOGShmemClear(void); #endif /* CSNLOG_H */ diff --git a/src/include/access/double_write_basic.h b/src/include/access/double_write_basic.h index c1e665152..32ff61117 100644 --- a/src/include/access/double_write_basic.h +++ b/src/include/access/double_write_basic.h @@ -33,15 +33,16 @@ static const uint32 HALF_K = 512; -static const char OLD_DW_FILE_NAME[] = "global/pg_dw"; -static const char DW_FILE_NAME_PREFIX[] = "global/pg_dw_"; -static const char SINGLE_DW_FILE_NAME[] = "global/pg_dw_single"; -static const char DW_BUILD_FILE_NAME[] = "global/pg_dw.build"; -static const char DW_UPGRADE_FILE_NAME[] = "global/dw_upgrade"; -static const char DW_BATCH_UPGRADE_META_FILE_NAME[] = "global/dw_batch_upgrade_meta"; -static const char DW_BATCH_UPGRADE_BATCH_FILE_NAME[] = "global/dw_batch_upgrade_files"; -static const char DW_META_FILE[] = "global/pg_dw_meta"; -static const char DW_EXT_DIRECTORY[] = "global/pg_dw_ext_chunk"; +#define OLD_DW_FILE_NAME (g_instance.datadir_cxt.dw_subdir_cxt.dwOldPath) +#define DW_FILE_NAME_PREFIX (g_instance.datadir_cxt.dw_subdir_cxt.dwPathPrefix) +#define SINGLE_DW_FILE_NAME (g_instance.datadir_cxt.dw_subdir_cxt.dwSinglePath) +#define DW_BUILD_FILE_NAME (g_instance.datadir_cxt.dw_subdir_cxt.dwBuildPath) +#define DW_UPGRADE_FILE_NAME (g_instance.datadir_cxt.dw_subdir_cxt.dwUpgradePath) +#define DW_BATCH_UPGRADE_META_FILE_NAME (g_instance.datadir_cxt.dw_subdir_cxt.dwBatchUpgradeMetaPath) +#define DW_BATCH_UPGRADE_BATCH_FILE_NAME (g_instance.datadir_cxt.dw_subdir_cxt.dwBatchUpgradeFilePath) +#define DW_META_FILE (g_instance.datadir_cxt.dw_subdir_cxt.dwMetaPath) +#define DW_EXT_DIRECTORY (g_instance.datadir_cxt.dw_subdir_cxt.dwExtChunkPath) +#define DW_STORAGE_TYPE ((device_type_t)g_instance.datadir_cxt.dw_subdir_cxt.dwStorageType) static const uint32 DW_TRY_WRITE_TIMES = 8; #ifndef WIN32 diff --git a/src/include/access/extreme_rto/dispatcher.h b/src/include/access/extreme_rto/dispatcher.h index e78524743..0f24ba42a 100644 --- a/src/include/access/extreme_rto/dispatcher.h +++ b/src/include/access/extreme_rto/dispatcher.h @@ -62,8 +62,6 @@ typedef struct ReadPipeline { #define MAX_XLOG_READ_BUFFER (0xFFFFF) /* 8k uint */ -#define MAX_ALLOC_SEGNUM (4) /* 16* 4 */ - typedef enum { WORKER_STATE_STOP = 0, WORKER_STATE_RUN, diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 68154d875..e30638807 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -348,6 +348,7 @@ extern void heap_inplace_update(Relation relation, HeapTuple tuple, bool waitFlu extern bool heap_freeze_tuple(HeapTuple tuple, TransactionId cutoff_xid, TransactionId cutoff_multi, bool *changedMultiXid = NULL); extern bool heap_tuple_needs_freeze(HeapTuple tuple, TransactionId cutoff_xid, MultiXactId cutoff_multi, Buffer buf); +extern TransactionId MultiXactIdGetUpdateXid(TransactionId xmax, uint16 t_infomask, uint16 t_infomask2); extern Oid simple_heap_insert(Relation relation, HeapTuple tup); extern void simple_heap_delete(Relation relation, ItemPointer tid, int options = 0, bool allow_update_self = false); diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h index bb9b991f9..cb76eaeaf 100644 --- a/src/include/access/multixact.h +++ b/src/include/access/multixact.h @@ -23,6 +23,8 @@ #define MaxMultiXactOffset UINT64CONST(0xFFFFFFFFFFFFFFFF) +#define MULTIXACTDIR (g_instance.datadir_cxt.multixactDir) + /* Number of SLRU buffers to use for multixact */ #define NUM_MXACTOFFSET_BUFFERS 8 #define NUM_MXACTMEMBER_BUFFERS 16 @@ -140,5 +142,6 @@ extern void multixact_redo(XLogReaderState* record); extern void multixact_desc(StringInfo buf, XLogReaderState* record); extern const char* multixact_type_name(uint8 subtype); extern void get_multixact_pageno(uint8 info, int64 *pageno, XLogReaderState *record); +extern void SSMultiXactShmemClear(void); #endif /* MULTIXACT_H */ diff --git a/src/include/access/slru.h b/src/include/access/slru.h index 7f29c7cb2..b8b1d556a 100644 --- a/src/include/access/slru.h +++ b/src/include/access/slru.h @@ -173,4 +173,7 @@ extern void ut_SlruReportIOError(SlruCtl ctl, int64 pageno, TransactionId xid); extern void SimpleLruWaitIO(SlruCtl ctl, int slotno); extern bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, const char* filename, int64 segpage, const void* data); +extern void SimpleLruSetPageEmpty( + SlruCtl ctl, const char* name, int trancheId, int nslots, int nlsns, LWLock* ctllock, const char* subdir, int index = 0); + #endif /* SLRU_H */ diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index e2f965335..a06537486 100755 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -132,7 +132,9 @@ typedef enum WalLevel { #define XLogHintBitIsNeeded() (g_instance.attr.attr_storage.wal_log_hints) /* Do we need to WAL-log information required only for Hot Standby and logical replication? */ -#define XLogStandbyInfoActive() (g_instance.attr.attr_storage.wal_level >= WAL_LEVEL_HOT_STANDBY) +#define XLogStandbyInfoActive() \ + (g_instance.attr.attr_storage.wal_level >= WAL_LEVEL_HOT_STANDBY && \ + !g_instance.attr.attr_storage.dms_attr.enable_dms) /* Do we need to WAL-log information required only for logical replication? */ #define XLogLogicalInfoActive() (g_instance.attr.attr_storage.wal_level >= WAL_LEVEL_LOGICAL) extern const char* DemoteModeDescs[]; @@ -249,6 +251,10 @@ struct WALInitSegLockPadded { #define LAZY_BACKWRITE 0x0400 /* lazy backwrite */ #define PAGERANGE_BACKWRITE 0x0800 /* PageRangeBackWrite */ +#define CHECKPOINT_LEN (SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint)) +#define CHECKPOINTNEW_LEN (SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPointNew)) +#define CHECKPOINTPLUS_LEN (SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPointPlus)) +#define CHECKPOINTUNDO_LEN (SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPointUndo)) /* Checkpoint statistics */ typedef struct CheckpointStatsData { @@ -663,6 +669,8 @@ extern bool RecoveryInProgress(void); extern bool HotStandbyActive(void); extern bool HotStandbyActiveInReplay(void); extern bool XLogInsertAllowed(void); +extern bool SSXLogInsertAllowed(void); +extern bool SSModifySharedLunAllowed(void); extern void GetXLogReceiptTime(TimestampTz* rtime, bool* fromStream); extern XLogRecPtr GetXLogReplayRecPtr(TimeLineID* targetTLI, XLogRecPtr* ReplayReadPtr = NULL); extern void SetXLogReplayRecPtr(XLogRecPtr readRecPtr, XLogRecPtr endRecPtr); @@ -820,7 +828,6 @@ void ReadShareStorageCtlInfo(ShareStorageXLogCtl* ctlInfo); pg_crc32c CalShareStorageCtlInfoCrc(const ShareStorageXLogCtl *ctlInfo); int ReadXlogFromShareStorage(XLogRecPtr startLsn, char *buf, int expectReadLen); int WriteXlogToShareStorage(XLogRecPtr startLsn, char *buf, int writeLen); -void FsyncXlogToShareStorage(); Size SimpleValidatePage(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, char* page); void ShareStorageInit(); void FindLastRecordCheckInfoOnShareStorage(XLogRecPtr *lastRecordPtr, pg_crc32 *lastRecordCrc, @@ -832,6 +839,8 @@ void rename_recovery_conf_for_roach(); bool CheckForFailoverTrigger(void); bool CheckForSwitchoverTrigger(void); void HandleCascadeStandbyPromote(XLogRecPtr *recptr); +void update_dirty_page_queue_rec_lsn(XLogRecPtr current_insert_lsn, bool need_immediately_update = false); +XLogRecord *ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr, int whichChkpt); extern XLogRecPtr XlogRemoveSegPrimary; @@ -885,6 +894,12 @@ static inline void WakeupWalSemaphore(PGSemaphore sema) PGSemaphoreUnlock(sema); } +static inline void FsyncXlogToShareStorage() +{ + Assert(g_instance.xlog_cxt.shareStorageopCtl.isInit && (g_instance.xlog_cxt.shareStorageopCtl.opereateIf != NULL)); + g_instance.xlog_cxt.shareStorageopCtl.opereateIf->fsync(); +} + /* * Options for enum values stored in other modules */ @@ -910,4 +925,7 @@ XLogRecPtr GetFlushMainStandby(); extern bool RecoveryIsSuspend(void); extern void InitUndoCountThreshold(); + +/* for recovery */ +void SSWriteInstanceControlFile(int fd, const char* buffer, int id, off_t size); #endif /* XLOG_H */ diff --git a/src/include/access/xlog_basic.h b/src/include/access/xlog_basic.h index 432fc0870..46ce4afe2 100644 --- a/src/include/access/xlog_basic.h +++ b/src/include/access/xlog_basic.h @@ -34,6 +34,7 @@ #include "port/pg_crc32c.h" #include "utils/pg_crc.h" #include "tde_key_management/data_common.h" +#include "storage/file/fio_device_com.h" /* * These macros encapsulate knowledge about the exact layout of XLog file * names, timeline history file names, and archive-status file names. @@ -53,12 +54,13 @@ * The XLOG is split into WAL segments (physical files) of the size indicated * by XLOG_SEG_SIZE. */ -#define XLogSegSize ((uint32)XLOG_SEG_SIZE) -#define XLogSegmentsPerXLogId (UINT64CONST(0x100000000) / XLOG_SEG_SIZE) +#define XLogSegSize XLogSegmentSize +#define XLogSegmentsPerXLogId (UINT64CONST(0x100000000) / XLogSegmentSize) #define XLogRecordMaxSize ((uint32)0x3fffe000) /* 1 gigabyte - 8 kbyte */ /* Compute XLogRecPtr with segment number and offset. */ -#define XLogSegNoOffsetToRecPtr(segno, offset, dest) (dest) = (segno)*XLOG_SEG_SIZE + (offset) +#define XLogSegNoOffsetToRecPtr(segno, offset, dest) \ + (dest) = (segno) * XLogSegmentSize + (offset) /* * Compute ID and segment from an XLogRecPtr. @@ -88,9 +90,11 @@ /* * The XLog directory and control file (relative to $PGDATA) */ +#define SS_XLOGDIR (g_instance.datadir_cxt.xlogDir) #define XLOGDIR "pg_xlog" -#define XLOG_CONTROL_FILE "global/pg_control" -#define XLOG_CONTROL_FILE_BAK "global/pg_control.backup" +#define ARCHIVEDIR "pg_xlog/archive_status" +#define XLOG_CONTROL_FILE (g_instance.datadir_cxt.controlPath) +#define XLOG_CONTROL_FILE_BAK (g_instance.datadir_cxt.controlBakPath) #define MAX_PAGE_FLUSH_LSN_FILE "global/max_page_flush_lsn" #define PG_LSN_XLOG_FLUSH_CHK_FILE "global/pg_lsnxlogflushchk" diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index f4bde1ee9..776dba2a8 100755 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -57,7 +57,8 @@ nRet = snprintf_s(path, \ len, \ len - 1, \ - XLOGDIR "/%08X%08X%08X", \ + "%s/%08X%08X%08X", \ + SS_XLOGDIR, \ tli, \ (uint32)((logSegNo) / XLogSegmentsPerXLogId), \ (uint32)((logSegNo) % XLogSegmentsPerXLogId)); \ @@ -103,7 +104,8 @@ nRet = snprintf_s(path, \ len, \ len - 1, \ - XLOGDIR "/%08X%08X%08X", \ + "%s/%08X%08X%08X", \ + SS_XLOGDIR, \ tli, \ (uint32)((logSegNo) / XLogSegmentsPerXLogId), \ (uint32)((logSegNo) % XLogSegmentsPerXLogId)); \ @@ -142,7 +144,7 @@ #define StatusFilePath(path, len, xlog, suffix) \ do { \ int nRet = 0; \ - nRet = snprintf_s(path, len, len - 1, XLOGDIR "/archive_status/%s%s", xlog, suffix); \ + nRet = snprintf_s(path, len, len - 1, "%s/%s%s", ARCHIVEDIR, xlog, suffix); \ securec_check_ss(nRet, "\0", "\0"); \ } while (0) @@ -168,7 +170,8 @@ nRet = snprintf_s(path, \ len, \ len - 1, \ - XLOGDIR "/%08X%08X%08X.%08X.backup", \ + "%s/%08X%08X%08X.%08X.backup", \ + SS_XLOGDIR, \ tli, \ (uint32)((logSegNo) / XLogSegmentsPerXLogId), \ (uint32)((logSegNo) % XLogSegmentsPerXLogId), \ diff --git a/src/include/catalog/catalog.h b/src/include/catalog/catalog.h index 97c94fc0a..75c08cd34 100644 --- a/src/include/catalog/catalog.h +++ b/src/include/catalog/catalog.h @@ -29,6 +29,10 @@ #define TABLESPACE_VERSION_DIRECTORY "PG_" PG_MAJORVERSION "_" \ CppAsString2(CATALOG_VERSION_NO) +#define DEFTBSDIR (g_instance.datadir_cxt.baseDir) +#define GLOTBSDIR (g_instance.datadir_cxt.globalDir) +#define TBLSPCDIR (g_instance.datadir_cxt.tblspcDir) + /* file name: Cxxxxx.0 * the max length is up to MaxAttrNumber */ diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index bda7e688f..67d1ceb97 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -234,6 +234,9 @@ typedef struct ControlFileData { bool float4ByVal; /* float4 pass-by-value? */ bool float8ByVal; /* float8, int8, etc pass-by-value? */ + /* flag indicating bootstrap relations stored in segment or not */ + bool bootstrap_segment; + /* CRC of all above ... MUST BE LAST! */ pg_crc32c crc; } ControlFileData; @@ -253,6 +256,11 @@ typedef struct LsnXlogFlushData { */ #define PG_CONTROL_SIZE 8192 +#define MIN_INSTANCEID 0 +#define MAX_INSTANCEID 63 +#define INVALID_INSTANCEID -1 +#define MAX_INSTANCEID_LEN 3 /* max string len of instance id */ + #define PG_LSNXLOGFLUSHCHK_FILESIZE 512 extern uint32 get_controlfile_timeline(void); /* diff --git a/src/include/commands/tablespace.h b/src/include/commands/tablespace.h index 5cbfb78f4..2970d4161 100644 --- a/src/include/commands/tablespace.h +++ b/src/include/commands/tablespace.h @@ -30,7 +30,7 @@ #define CRITICA_POINT_VALUE 104857600 /* 100 MB */ #define TABLESPACE_THRESHOLD_RATE 0.9 /* threshold rate */ #define TABLESPACE_UNLIMITED_STRING "unlimited" -#define PG_LOCATION_DIR "pg_location" +#define PG_LOCATION_DIR (g_instance.datadir_cxt.locationDir) typedef struct TableSpaceUsageSlot { uint64 maxSize; diff --git a/src/include/ddes/dms/dms_api.h b/src/include/ddes/dms/dms_api.h new file mode 100644 index 000000000..4a5ef2d52 --- /dev/null +++ b/src/include/ddes/dms/dms_api.h @@ -0,0 +1,736 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * dms_api.h + * Defines the DMS data structure. + * + * IDENTIFICATION + * src/include/ddes/dms/dms_api.h + * + * --------------------------------------------------------------------------------------- + */ + +#ifndef __DMS_API_H__ +#define __DMS_API_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef OPENGAUSS +#define DMS_SUCCESS 0 +#define DMS_ERROR (-1) +#define DMS_PAGEID_SIZE 24 // openGauss bufferTag size +#else +#define DMS_PAGEID_SIZE 16 +#endif + +#define DMS_XID_SIZE 12 +#define DMS_INSTANCES_SIZE 4 +#define DMS_ROWID_SIZE 16 +#define DMS_INDEX_PROFILE_SIZE 96 +#define DMS_MAX_IP_LEN 64 +#define DMS_MAX_INSTANCES 64 + +#define DMS_VERSION_MAX_LEN 256 +#define DMS_OCK_LOG_PATH_LEN 256 +typedef enum en_dms_online_status { + DMS_ONLINE_STATUS_OUT = 0, + DMS_ONLINE_STATUS_JOIN = 1, + DMS_ONLINE_STATUS_REFORM = 2, + DMS_ONLINE_STATUS_IN = 3, +} dms_online_status_t; + +typedef enum en_dms_dr_type { + DMS_DR_TYPE_INVALID = 0, + DMS_DR_TYPE_DATABASE = 1, + DMS_DR_TYPE_SPACE = 2, + DMS_DR_TYPE_TABLE = 3, + DMS_DR_TYPE_DDL = 4, + DMS_DR_TYPE_SEQENCE = 5, + DMS_DR_TYPE_SERIAL = 6, + DMS_DR_TYPE_ROLE = 7, + DMS_DR_TYPE_USER = 8, + DMS_DR_TYPE_DC = 9, + DMS_DR_TYPE_INDEX = 10, + DMS_DR_TYPE_TRIGGER = 11, + DMS_DR_TYPE_HEAP = 12, + DMS_DR_TYPE_HEAP_PART = 13, + DMS_DR_TYPE_HEAP_LATCH = 14, + DMS_DR_TYPE_HEAP_PART_LATCH = 15, + DMS_DR_TYPE_BTREE_LATCH = 16, + DMS_DR_TYPE_BRTEE_PART_LATCH = 17, + DMS_DR_TYPE_INTERVAL_PART_LATCH = 18, + DMS_DR_TYPE_LOB_LATCH = 19, + DMS_DR_TYPE_LOB_PART_LATCH = 20, + DMS_DR_TYPE_PROFILE = 21, + DMS_DR_TYPE_UNDO = 22, + DMS_DR_TYPE_PROC = 23, + DMS_DR_TYPE_GDV = 24, + DMS_DR_TYPE_MAX, +} dms_dr_type_t; + +// persistent distributed resource id +typedef enum en_dms_persistent_id { + DMS_ID_DATABASE_CTRL = 0, + DMS_ID_DATABASE_SWITCH_CTRL = 1, + DMS_ID_DATABASE_BAKUP = 2, + DMS_ID_DATABASE_LINK = 3, + DMS_ID_SPACE_OP = 10, + DMS_ID_SPACE_BLOCK = 11, + DMS_ID_DDL_OP = 20, + DMS_ID_DC_CTX = 30, + DMS_ID_INDEX_RECYLE = 40, + DMS_ID_UNDO_SET = 50, +}dms_pst_id_t; + +// for smon deadlock check +#define DMS_SMON_DLOCK_MSG_MAX_LEN 24 +#define DMS_SMON_TLOCK_MSG_MAX_LEN 24 +#define DMS_SMON_ILOCK_MSG_MAX_LEN 60 +#define DMS_SMON_MAX_SQL_LEN 10240 // The maximum size of a message to be transferred in the MES is 32 KB. +#define MAX_TABLE_LOCK_NUM 2048 + +typedef enum en_dms_smon_req_type { + DMS_SMON_REQ_SID_BY_RMID = 0, + DMS_SMON_REQ_DLOCK_BY_RMID = 1, + DMS_SMON_REQ_ROWID_BY_RMID = 2, +}dms_smon_req_type_t; + +typedef enum en_dms_smon_req_tlock_type { + DMS_SMON_REQ_TABLE_LOCK_SHARED_MSG = 0, + DMS_SMON_REQ_TABLE_LOCK_EXCLU_MSG = 1, + DMS_SMON_REQ_TABLE_LOCK_ALL_MSG = 2, +}dms_smon_req_tlock_type_t; + +typedef enum en_dms_smon_req_rm_type { + DMS_SMON_REQ_TABLE_LOCK_RM = 0, + DMS_SMON_REQ_TABLE_LOCK_WAIT_RM = 1, +}dms_smon_req_rm_type_t; + +typedef enum en_dms_smon_check_tlock_type { + DMS_SMON_CHECK_WAIT_EVENT_STATUS_BY_SID = 0, + DMS_SMON_CHECK_WAIT_TABLE_STATUS_BY_TID = 1, +}dms_smon_check_tlock_type_t; + +/* distributed resource id definition */ +#pragma pack(4) +typedef struct st_dms_drid { + union { + struct { + unsigned long long key1; + unsigned long long key2; + }; + struct { + unsigned short type; // lock type + unsigned short uid; // user id, for table lock resource + unsigned int oid; // lock id + unsigned int index; // index id + unsigned int parent_part; // parent partition id + unsigned int part; // partition id + }; + }; +} dms_drid_t; +#pragma pack() + +typedef enum en_drc_res_type { + DRC_RES_INVALID_TYPE, + DRC_RES_PAGE_TYPE, + DRC_RES_LOCK_TYPE, + DRC_RES_LOCAL_LOCK_TYPE, + DRC_RES_TXN_TYPE, + DRC_RES_LOCAL_TXN_TYPE, + DRC_RES_LOCK_ITEM_TYPE, +} drc_res_type_e; + +#define DMS_RESID_SIZE 32 +#define DMS_DRID_SIZE sizeof(dms_drid_t) + +typedef struct st_dms_drlock { + dms_drid_t drid; +} dms_drlock_t; + +typedef struct st_dms_drlatch { + dms_drid_t drid; +} dms_drlatch_t; + +typedef struct st_dms_xid_ctx { + unsigned long long xid; + unsigned char is_scan; + unsigned char inst_id; + unsigned char unused[2]; + unsigned long long scn; +} dms_xid_ctx_t; + +typedef struct st_dms_xmap_ctx { + unsigned int xmap; + unsigned int dest_id; +} dms_xmap_ctx_t; + +typedef struct st_dms_context { + unsigned int inst_id; // current instance id + unsigned int sess_id; // current session id + unsigned int sess_rcy; // request page: recovery session flag + void *db_handle; + unsigned char is_try; + unsigned char type; + unsigned short len; + union { + char resid[DMS_RESID_SIZE]; + dms_drid_t lock_id; + dms_xid_ctx_t xid_ctx; + dms_xmap_ctx_t xmap_ctx; + unsigned char edp_inst; + }; +} dms_context_t; + +typedef struct st_dms_cr { + void *cr_cursor; + unsigned long long query_scn; + unsigned int ssn; + char *page; +} dms_cr_t; + +typedef struct st_dms_opengauss_xid_csn { + unsigned long long xid; + unsigned long long snapshotcsn; + unsigned long long snapshotxmin; + unsigned char is_committed; + unsigned char is_mvcc; + unsigned char is_nest; +} dms_opengauss_xid_csn_t; + +typedef struct st_dms_opengauss_csn_result { + unsigned long long csn; + unsigned char sync; + unsigned int clogstatus; + unsigned long long lsn; +} dms_opengauss_csn_result_t; + +typedef struct dms_opengauss_txn_snapshot { + unsigned long long xmin; + unsigned long long xmax; + unsigned long long snapshotcsn; + unsigned long long localxmin; +} dms_opengauss_txn_snapshot_t; + +typedef enum dms_opengauss_lock_req_type { + LOCK_NORMAL_MODE, + LOCK_RELEASE_SELF, + LOCK_REACQUIRE, +} dms_opengauss_lock_req_type_t; + +typedef struct st_dms_txn_info { + unsigned long long scn; + unsigned char is_owscn; + unsigned char status; + unsigned char unused[2]; +} dms_txn_info_t; + +typedef struct st_dms_txn_snapshot { + unsigned long long scn; + unsigned int xnum; + unsigned short rmid; + unsigned char status; + unsigned char in_process; +} dms_txn_snapshot_t; + +typedef struct st_dms_edp_info { + char page[DMS_PAGEID_SIZE]; + unsigned long long lsn; + union { + unsigned char id; + unsigned long long edp_map; + }; +} dms_edp_info_t; + +typedef struct st_dms_buf_ctrl { + volatile unsigned char is_remote_dirty; + volatile unsigned char lock_mode; // used only in DMS, 0: Null, 1: Shared lock, 2: Exclusive lock + // used only in DMS, 0: no, 1: yes, this page is old version, + // can be discard only after latest version in other instance is cleaned + volatile unsigned char is_edp; + volatile unsigned char force_request; // force to request page from remote + volatile unsigned char need_chk_master; // suport owner transfer page again + volatile unsigned char need_flush; // for recovery, owner is abort, copy instance should flush before release + unsigned long long edp_scn; // set when become edp, lastest scn when page becomes edp + unsigned long long edp_map; // records edp instance + long long last_ckpt_time; // last time when local edp page is added to group. +#ifdef OPENGAUSS + int buf_id; + unsigned int state; + unsigned int pblk_relno; + unsigned int pblk_blkno; + unsigned long long pblk_lsn; + unsigned long long lsn_on_disk; + unsigned char seg_fileno; + unsigned int seg_blockno; +#endif +}dms_buf_ctrl_t; + +typedef enum en_dms_page_latch_mode { + DMS_PAGE_LATCH_MODE_S = 1, + DMS_PAGE_LATCH_MODE_X = 2, + DMS_PAGE_LATCH_MODE_FORCE_S = 3, +} dms_page_latch_mode_t; + +#define DMS_ENTER_PAGE_NORMAL (unsigned char)0 // normal access for single page +#define DMS_ENTER_PAGE_RESIDENT (unsigned char)1 // resident in memory, not in LRU +#define DMS_ENTER_PAGE_PINNED (unsigned char)2 // temp pinned for undo rollback +#define DMS_ENTER_PAGE_NO_READ (unsigned char)4 // don't read from disk,caller will initialize +#define DMS_ENTER_PAGE_TRY (unsigned char)8 // try to read from buffer, don't read from disk +#define DMS_ENTER_PAGE_LRU_STATS_SCAN (unsigned char)0x10 // add to stats LRU list +#define DMS_ENTER_PAGE_LRU_HIGH_AGE (unsigned char)0x20 // decrease possibility to be recycled of page +#define DMS_ENTER_PAGE_LOCAL (unsigned char)0x40 // check local page without redo log +#define DMS_ENTER_PAGE_REMOTE (unsigned char)0x80 // remote access mode + +// pack read page parameters together +typedef struct st_dms_read_page_assist { + char *pageid; + unsigned long long query_scn; // if not invalid, try edp, check edp scn with query_scn + dms_page_latch_mode_t mode; + unsigned char options; // DMS_ENTER_PAGE_XXX + unsigned char try_edp; // check edp if local page not usable + unsigned short read_num; // == 1 no prefetch, > 1 prefetch multiple pages +} dms_read_page_assist_t; + +typedef enum en_dms_buf_load_status { + DMS_BUF_NEED_LOAD = 0x00, + DMS_BUF_IS_LOADED = 0x01, + DMS_BUF_LOAD_FAILED = 0x02, + DMS_BUF_NEED_TRANSFER = 0x04, // used only in DTC, means need ask master/coordinator for latest version +} dms_buf_load_status_t; + +typedef enum en_dms_cr_status { + DMS_CR_TRY_READ = 0, + DMS_CR_LOCAL_READ, + DMS_CR_READ_PAGE, + DMS_CR_CONSTRUCT, + DMS_CR_PAGE_VISIBLE, + DMS_CR_CHECK_MASTER, + DMS_CR_REQ_MASTER, + DMS_CR_REQ_OWNER, +} dms_cr_status_t; + +typedef enum en_dms_log_level { + DMS_LOG_LEVEL_ERROR = 0, // error conditions + DMS_LOG_LEVEL_WARN, // warning conditions + DMS_LOG_LEVEL_INFO, // informational messages + DMS_LOG_LEVEL_COUNT, +} dms_log_level_t; + +typedef enum en_dms_log_id { + DMS_LOG_ID_RUN = 0, + DMS_LOG_ID_DEBUG, + DMS_LOG_ID_COUNT, +} dms_log_id_t; + +/* +* lock mode in DMS, we use it to coordinate concurrent access among different instances. +*/ +typedef enum en_dms_lock_mode { + DMS_LOCK_NULL = 0, + DMS_LOCK_SHARE = 1, + DMS_LOCK_EXCLUSIVE = 2, + DMS_LOCK_MODE_MAX = 3, +} dms_lock_mode_t; + +typedef enum en_dms_conn_mode { + DMS_CONN_MODE_TCP = 0, + DMS_CONN_MODE_RDMA = 1, +} dms_conn_mode_t; + +typedef enum en_dms_txn_wait_status { + DMS_REMOTE_TXN_WAIT = 0, + DMS_REMOTE_TXN_END = 1 +} dms_txn_wait_status_t; + +typedef enum en_dms_xact_status { + DMS_XACT_END = 0, + DMS_XACT_BEGIN = 1, + DMS_XACT_PHASE1 = 2, + DMS_XACT_PHASE2 = 3 +} dms_xact_status_t; + +typedef enum en_dms_cm_stat { + DMS_CM_RES_UNKNOWN = 0, + DMS_CM_RES_ONLINE = 1, + DMS_CM_RES_OFFLINE = 2, + /********************/ + DMS_CM_RES_STATE_COUNT = 3, +} dms_cm_stat_t; + +typedef struct st_inst_list { + unsigned char inst_id_list[DMS_MAX_INSTANCES]; + unsigned char inst_id_count; + unsigned char reserve[3]; +} instance_list_t; + +typedef enum en_dms_wait_event { + DMS_EVT_IDLE_WAIT = 0, + + DMS_EVT_GC_BUFFER_BUSY, + DMS_EVT_DCS_REQ_MASTER4PAGE_1WAY, + DMS_EVT_DCS_REQ_MASTER4PAGE_2WAY, + DMS_EVT_DCS_REQ_MASTER4PAGE_3WAY, + DMS_EVT_DCS_REQ_MASTER4PAGE_TRY, + DMS_EVT_DCS_REQ_OWNER4PAGE, + DMS_EVT_DCS_CLAIM_OWNER, + DMS_EVT_DCS_RELEASE_OWNER, + DMS_EVT_DCS_INVLDT_SHARE_COPY_REQ, + DMS_EVT_DCS_INVLDT_SHARE_COPY_PROCESS, + DMS_EVT_DCS_TRANSFER_PAGE_LATCH, + DMS_EVT_DCS_TRANSFER_PAGE_READONLY2X, + DMS_EVT_DCS_TRANSFER_PAGE_FLUSHLOG, + DMS_EVT_DCS_TRANSFER_PAGE, + DMS_EVT_PCR_REQ_BTREE_PAGE, + DMS_EVT_PCR_REQ_HEAP_PAGE, + DMS_EVT_PCR_REQ_MASTER, + DMS_EVT_PCR_REQ_OWNER, + DMS_EVT_PCR_CHECK_CURR_VISIBLE, + DMS_EVT_TXN_REQ_INFO, + DMS_EVT_TXN_REQ_SNAPSHOT, + DMS_EVT_DLS_REQ_LOCK, + DMS_EVT_DLS_REQ_TABLE, + DMS_EVT_DLS_WAIT_TXN, + DMS_EVT_DEAD_LOCK_TXN, + DMS_EVT_DEAD_LOCK_TABLE, + DMS_EVT_DEAD_LOCK_ITL, + DMS_EVT_BROADCAST_BTREE_SPLIT, + DMS_EVT_BROADCAST_ROOT_PAGE, + + DMS_EVT_COUNT, +} dms_wait_event_t; + +typedef enum en_dms_sysstat { + DMS_STAT_BUFFER_GETS = 0, + DMS_STAT_BUFFER_SENDS, + DMS_STAT_CR_READS, + DMS_STAT_CR_GETS, + DMS_STAT_CR_SENDS, + DMS_STAT_NET_TIME, + + DMS_STAT_COUNT, +} dms_sysstat_t; + +typedef enum en_dms_role { + DMS_ROLE_UNKNOW = 0, + DMS_ROLE_REFORMER = 1, + DMS_ROLE_PARTNER = 2 +} dms_role_t; + +#define DCS_BATCH_BUF_SIZE (1024 * 30) +#define DCS_RLS_OWNER_BATCH_SIZE (DCS_BATCH_BUF_SIZE / DMS_PAGEID_SIZE) +typedef struct st_dcs_batch_buf { + char buffers[DMS_MAX_INSTANCES][DCS_BATCH_BUF_SIZE]; + unsigned int count[DMS_MAX_INSTANCES]; + unsigned int max_count; +} dcs_batch_buf_t; + +typedef int(*dms_get_list_stable)(void *db_handle, unsigned long long *list_stable, unsigned char *reformer_id); +typedef int(*dms_save_list_stable)(void *db_handle, unsigned long long list_stable, unsigned char reformer_id, + unsigned int save_ctrl); +typedef int(*dms_get_dms_status)(void *db_handle); +typedef void(*dms_set_dms_status)(void *db_handle, int status); +typedef int(*dms_confirm_converting)(void *db_handle, char *pageid, unsigned char smon_chk, + unsigned char *lock_mode, unsigned long long *edp_map, unsigned long long *lsn); +typedef int(*dms_confirm_owner)(void *db_handle, char *pageid, unsigned char *lock_mode, unsigned char *is_edp, + unsigned long long *lsn); +typedef int(*dms_flush_copy)(void *db_handle, char *pageid); +typedef int(*dms_edp_lsn)(void *db_handle, char *pageid, unsigned long long *lsn); +typedef int(*dms_disk_lsn)(void *db_handle, char *pageid, unsigned long long *lsn); +typedef int(*dms_recovery)(void *db_handle, void *recovery_list, void *remove_list, int is_reformer); +typedef int(*dms_opengauss_startup)(void *db_handle); +typedef int(*dms_opengauss_recovery_standby)(void *db_handle, int inst_id); +typedef int(*dms_opengauss_recovery_primary)(void *db_handle, int inst_id); +typedef void(*dms_reform_start_notify)(void *db_handle, dms_role_t role); +typedef int(*dms_undo_init)(void *db_handle, unsigned char inst_id); +typedef int(*dms_tx_area_init)(void *db_handle, unsigned char inst_id); +typedef int(*dms_tx_area_load)(void *db_handle, unsigned char inst_id); +typedef unsigned char(*dms_recovery_in_progress)(void *db_handle); +typedef unsigned int(*dms_get_page_hash_val)(const char pageid[DMS_PAGEID_SIZE]); +typedef unsigned long long(*dms_get_page_lsn)(const dms_buf_ctrl_t *buf_ctrl); +typedef int(*dms_set_buf_load_status)(dms_buf_ctrl_t *buf_ctrl, dms_buf_load_status_t dms_buf_load_status); +typedef int(*dms_remove_buf_load_status)(dms_buf_ctrl_t *buf_ctrl, dms_buf_load_status_t dms_buf_load_status); +typedef void(*dms_update_global_lsn)(void *db_handle, unsigned long long lamport_lsn); +typedef void(*dms_update_global_scn)(void *db_handle, unsigned long long lamport_scn); +typedef void(*dms_update_page_lfn)(dms_buf_ctrl_t *buf_ctrl, unsigned long long lastest_lfn); +typedef unsigned long long (*dms_get_page_lfn)(dms_buf_ctrl_t *buf_ctrl); +typedef unsigned long long(*dms_get_global_lfn)(void *db_handle); +typedef unsigned long long(*dms_get_global_scn)(void *db_handle); +typedef unsigned long long(*dms_get_global_lsn)(void *db_handle); +typedef unsigned long long(*dms_get_global_flushed_lfn)(void *db_handle); +typedef int(*dms_read_local_page4transfer)(void *db_handle, char pageid[DMS_PAGEID_SIZE], + dms_lock_mode_t mode, dms_buf_ctrl_t **buf_ctrl); +typedef int(*dms_try_read_local_page)(void *db_handle, char pageid[DMS_PAGEID_SIZE], + dms_lock_mode_t mode, dms_buf_ctrl_t **buf_ctrl); +typedef unsigned char(*dms_page_is_dirty)(dms_buf_ctrl_t *buf_ctrl); +typedef void(*dms_leave_local_page)(void *db_handle, dms_buf_ctrl_t *buf_ctrl); +typedef void(*dms_get_pageid)(dms_buf_ctrl_t *buf_ctrl, char **pageid, unsigned int *size); +typedef char *(*dms_get_page)(dms_buf_ctrl_t *buf_ctrl); +typedef void (*dms_invalidate_page)(void *db_handle, char pageid[DMS_PAGEID_SIZE]); +typedef void *(*dms_get_db_handle)(unsigned int *db_handle_index); +typedef void *(*dms_stack_push_cr_cursor)(void *db_handle); +typedef void (*dms_stack_pop_cr_cursor)(void *db_handle); +typedef void(*dms_init_cr_cursor)(void *cr_cursor, char pageid[DMS_PAGEID_SIZE], char xid[DMS_XID_SIZE], + unsigned long long query_scn, unsigned int ssn); +typedef void(*dms_init_index_cr_cursor)(void *cr_cursor, char pageid[DMS_PAGEID_SIZE], char xid[DMS_XID_SIZE], + unsigned long long query_scn, unsigned int ssn, char entry[DMS_PAGEID_SIZE], char *index_profile); +typedef void(*dms_init_check_cr_cursor)(void *cr_cursor, char rowid[DMS_ROWID_SIZE], char xid[DMS_XID_SIZE], + unsigned long long query_scn, unsigned int ssn); +typedef char *(*dms_get_wxid_from_cr_cursor)(void *cr_cursor); +typedef unsigned char(*dms_get_instid_of_xid_from_cr_cursor)(void *db_handle, void *cr_cursor); +typedef int(*dms_get_page_invisible_txn_list)(void *db_handle, void *cr_cursor, void *cr_page, + unsigned char *is_empty_txn_list, unsigned char *exist_waiting_txn); +typedef int(*dms_reorganize_page_with_undo)(void *db_handle, void *cr_cursor, void *cr_page); +typedef int(*dms_check_heap_page_visible_with_undo_snapshot)(void *db_handle, void *cr_cursor, void *page, + unsigned char *is_found); +typedef void(*dms_set_page_force_request)(void *db_handle, char pageid[DMS_PAGEID_SIZE]); +typedef void(*dms_get_entry_pageid_from_cr_cursor)(void *cr_cursor, char index_entry_pageid[DMS_PAGEID_SIZE]); +typedef void(*dms_get_index_profile_from_cr_cursor)(void *cr_cursor, char index_profile[DMS_INDEX_PROFILE_SIZE]); +typedef void(*dms_get_xid_from_cr_cursor)(void *cr_cursor, char xid[DMS_XID_SIZE]); +typedef void(*dms_get_rowid_from_cr_cursor)(void *cr_cursor, char rowid[DMS_ROWID_SIZE]); +typedef int(*dms_read_page)(void *db_handle, dms_read_page_assist_t *assist, char **page_addr); +typedef void(*dms_leave_page)(void *db_handle, unsigned char changed); +typedef char *(*dms_mem_alloc)(void *context, unsigned int size); +typedef void(*dms_mem_free)(void *context, void *ptr); +typedef void(*dms_mem_reset)(void *context); +// The maximum length of output_msg is 128 bytes. +typedef int (*dms_process_broadcast)(void *db_handle, char *data, unsigned int len, char *output_msg, + unsigned int *output_msg_len); +typedef int (*dms_process_broadcast_ack)(void *db_handle, char *data, unsigned int len); +typedef int(*dms_get_txn_info)(void *db_handle, unsigned long long xid, + unsigned char is_scan, dms_txn_info_t *txn_info); +typedef int(*dms_get_opengauss_xid_csn)(void *db_handle, dms_opengauss_xid_csn_t *csn_req, + dms_opengauss_csn_result_t *csn_ack); +typedef int(*dms_get_opengauss_update_xid)(void *db_handle, unsigned long long xid, + unsigned int t_infomask, unsigned int t_infomask2, unsigned long long *uxid); +typedef int(*dms_get_opengauss_txn_status)(void *db_handle, unsigned long long xid, unsigned char type, + unsigned char* status); +typedef int(*dms_opengauss_lock_buffer)(void *db_handle, int buffer, unsigned char lock_mode, + unsigned char* curr_mode); +typedef int(*dms_get_txn_snapshot)(void *db_handle, unsigned int xmap, dms_txn_snapshot_t *txn_snapshot); +typedef int(*dms_get_opengauss_txn_snapshot)(void *db_handle, dms_opengauss_txn_snapshot_t *txn_snapshot); +typedef void (*dms_log_output)(dms_log_id_t log_type, dms_log_level_t log_level, const char *code_file_name, + unsigned int code_line_num, const char *module_name, const char *format, ...); +typedef void (*dms_log_flush)(void *db_handle, unsigned long long *lsn); +typedef int(*dms_process_edp)(void *db_handle, dms_edp_info_t *pages, unsigned int count); +typedef void (*dms_clean_ctrl_edp)(void *db_handle, dms_buf_ctrl_t *dms_ctrl); +typedef char *(*dms_display_pageid)(char *display_buf, unsigned int count, char *pageid); +typedef char *(*dms_display_xid)(char *display_buf, unsigned int count, char *xid); +typedef char *(*dms_display_rowid)(char *display_buf, unsigned int count, char *rowid); +typedef int (*dms_drc_buf_res_rebuild)(void *db_handle); +typedef unsigned char(*dms_ckpt_session)(void *db_handle); +typedef void (*dms_check_if_build_complete)(void *db_handle, unsigned int *build_complete); +typedef int (*dms_db_is_primary)(void *db_handle); +typedef void (*dms_set_switchover_result)(void *db_handle, int result); +typedef void (*dms_set_db_standby)(void *db_handle); +typedef int (*dms_load_tablespace)(void *db_handle, unsigned int *has_offline); + +// for openGauss +typedef void (*dms_thread_init_t)(unsigned char need_startup, char **reg_data); +typedef int (*dms_get_db_primary_id)(void *db_handle, unsigned int *primary_id); +typedef int (*dms_set_buf_info)(dms_buf_ctrl_t *buf_ctrl); + +// for ssl +typedef int(*dms_decrypt_pwd_t)(const char *cipher, unsigned int len, char *plain, unsigned int size); + +// for smon check deadlock +typedef unsigned short (*dms_get_sid_by_rmid)(void *db_handle, unsigned short rmid); +typedef void (*dms_get_txn_dlock_by_rmid)(void *db_handle, unsigned short rmid, char *dlock, unsigned int dlock_len); +typedef void (*dms_get_rowid_by_rmid)(void *db_handle, unsigned short rmid, char rowid[DMS_ROWID_SIZE]); +typedef void (*dms_get_sql_from_session)(void *db_handle, unsigned short sid, char *sql_str, unsigned int sql_str_len); +typedef void (*dms_get_itl_lock_by_xid)(void *db_handle, char xid[DMS_XID_SIZE], char *ilock, unsigned int ilock_len); +typedef void (*dms_check_tlock_status)(void *db_handle, unsigned int type, unsigned short sid, + unsigned long long tableid, unsigned int *in_use); +typedef void (*dms_get_tlock_msg_by_tid)(void *db_handle, unsigned long long table_id, unsigned int type, char *rsp, + unsigned int rsp_len, unsigned int *tlock_cnt); +typedef void (*dms_get_tlock_msg_by_rm)(void *db_handle, unsigned short sid, unsigned short rmid, int type, char *tlock, + unsigned int tlock_len); + +typedef int (*dms_switchover_demote)(void *db_handle); +typedef int (*dms_switchover_promote)(void *db_handle); +typedef int (*dms_switchover_promote_opengauss)(void *db_handle, unsigned char origPrimaryId); +typedef int (*dms_failover_promote_opengauss)(void *db_handle); +typedef int (*dms_refresh_point)(void *db_handle); + +typedef struct st_dms_callback { + // used in reform + dms_get_list_stable get_list_stable; + dms_save_list_stable save_list_stable; + dms_get_dms_status get_dms_status; + dms_set_dms_status set_dms_status; + dms_confirm_owner confirm_owner; + dms_confirm_converting confirm_converting; + dms_flush_copy flush_copy; + dms_edp_lsn edp_lsn; + dms_disk_lsn disk_lsn; + dms_recovery recovery; + dms_db_is_primary db_is_primary; + dms_undo_init undo_init; + dms_tx_area_init tx_area_init; + dms_tx_area_load tx_area_load; + dms_recovery_in_progress recovery_in_progress; + dms_drc_buf_res_rebuild dms_reform_rebuild_buf_res; + dms_check_if_build_complete check_if_build_complete; + + // used in reform for opengauss + dms_thread_init_t dms_thread_init; + dms_get_db_primary_id get_db_primary_id; + dms_opengauss_startup opengauss_startup; + dms_opengauss_recovery_standby opengauss_recovery_standby; + dms_opengauss_recovery_primary opengauss_recovery_primary; + dms_reform_start_notify reform_start_notify; + dms_set_buf_info set_buf_info; + + dms_get_page_hash_val get_page_hash_val; + dms_get_page_lsn get_page_lsn; + dms_set_buf_load_status set_buf_load_status; + dms_remove_buf_load_status remove_buf_load_status; + dms_update_global_scn update_global_scn; + dms_update_global_lsn update_global_lsn; + dms_update_page_lfn update_page_lfn; + dms_get_global_scn get_global_scn; + dms_get_global_lsn get_global_lsn; + dms_get_global_lfn get_global_lfn; + dms_get_page_lfn get_page_lfn; + dms_get_global_flushed_lfn get_global_flushed_lfn; + dms_read_local_page4transfer read_local_page4transfer; + dms_try_read_local_page try_read_local_page; + dms_page_is_dirty page_is_dirty; + dms_leave_local_page leave_local_page; + dms_get_pageid get_pageid; + dms_get_page get_page; + dms_invalidate_page invld_share_copy; + dms_get_db_handle get_db_handle; + dms_stack_push_cr_cursor stack_push_cr_cursor; + dms_stack_pop_cr_cursor stack_pop_cr_cursor; + dms_init_cr_cursor init_heap_cr_cursor; + dms_init_index_cr_cursor init_index_cr_cursor; + dms_init_check_cr_cursor init_check_cr_cursor; + dms_get_wxid_from_cr_cursor get_wxid_from_cr_cursor; + dms_get_instid_of_xid_from_cr_cursor get_instid_of_xid_from_cr_cursor; + dms_get_page_invisible_txn_list get_heap_invisible_txn_list; + dms_get_page_invisible_txn_list get_index_invisible_txn_list; + dms_reorganize_page_with_undo reorganize_heap_page_with_undo; + dms_reorganize_page_with_undo reorganize_index_page_with_undo; + dms_check_heap_page_visible_with_undo_snapshot check_heap_page_visible_with_udss; + dms_set_page_force_request set_page_force_request; + dms_get_entry_pageid_from_cr_cursor get_entry_pageid_from_cr_cursor; + dms_get_index_profile_from_cr_cursor get_index_profile_from_cr_cursor; + dms_get_xid_from_cr_cursor get_xid_from_cr_cursor; + dms_get_rowid_from_cr_cursor get_rowid_from_cr_cursor; + dms_read_page read_page; + dms_leave_page leave_page; + + /* memory manager callback functions provided by DB */ + dms_mem_alloc mem_alloc; + dms_mem_free mem_free; + dms_mem_reset mem_reset; + + dms_process_broadcast process_broadcast; + dms_process_broadcast_ack process_broadcast_ack; + dms_get_txn_info get_txn_info; + dms_get_opengauss_xid_csn get_opengauss_xid_csn; + dms_get_opengauss_update_xid get_opengauss_update_xid; + dms_get_opengauss_txn_status get_opengauss_txn_status; + dms_opengauss_lock_buffer opengauss_lock_buffer; + dms_get_txn_snapshot get_txn_snapshot; + dms_get_opengauss_txn_snapshot get_opengauss_txn_snapshot; + dms_log_output log_output; + dms_log_flush log_flush; + dms_process_edp ckpt_edp; + dms_process_edp clean_edp; + dms_ckpt_session ckpt_session; + dms_clean_ctrl_edp clean_ctrl_edp; + dms_display_pageid display_pageid; + dms_display_xid display_xid; + dms_display_rowid display_rowid; + + // for smon deadlock check + dms_get_sid_by_rmid get_sid_by_rmid; + dms_get_txn_dlock_by_rmid get_txn_dlock_by_rmid; + dms_get_rowid_by_rmid get_rowid_by_rmid; + dms_get_sql_from_session get_sql_from_session; + dms_get_itl_lock_by_xid get_itl_lock_by_xid; + dms_check_tlock_status check_tlock_status; + dms_get_tlock_msg_by_tid get_tlock_by_tid; + dms_get_tlock_msg_by_rm get_tlock_by_rm; + + // for switchover + dms_switchover_demote switchover_demote; + dms_switchover_promote switchover_promote; + dms_switchover_promote_opengauss switchover_promote_opengauss; + dms_failover_promote_opengauss failover_promote_opengauss; + dms_set_switchover_result set_switchover_result; + dms_set_db_standby set_db_standby; + dms_load_tablespace load_tablespace; + + dms_refresh_point refresh_point; +} dms_callback_t; + +typedef struct st_dms_instance_net_addr { + char ip[DMS_MAX_IP_LEN]; + unsigned short port; + unsigned char reserved[2]; +} dms_instance_net_addr_t; + +typedef struct st_dms_profile { + unsigned int inst_id; + unsigned long long inst_map; + dms_callback_t callback; + unsigned long long data_buffer_size; + unsigned int channel_cnt; // Number of connections between instances + unsigned int work_thread_cnt; // Number of MES working threads + unsigned int max_session_cnt; // Number of client sessions to be supported + unsigned short mfc_tickets; // message flow control, max requests from A instance to B instance + unsigned short mfc_max_wait_ticket_time; // max time to wait for ticket while sending a message + unsigned int page_size; + unsigned long long recv_msg_buf_size; + + dms_conn_mode_t pipe_type; // Inter-instance communication mode. Currently, only TCP and RDMA are supported. + unsigned int inst_cnt; // Number of cluster instances + dms_instance_net_addr_t inst_net_addr[DMS_MAX_INSTANCES]; // Cluster instance ip and port + // Indicates whether to connected to other instances during DMS initialization. + unsigned int conn_created_during_init : 1; + unsigned int resource_catalog_centralized : 1; // 1: centralized, 0: distributed + unsigned int load_balance_mode : 1; // 1: primary&standby + unsigned int time_stat_enabled : 1; + unsigned int reserved : 28; + unsigned int elapsed_switch; + unsigned char rdma_rpc_use_busypoll; // busy poll need to occupy the cpu core + unsigned char rdma_rpc_is_bind_core; + unsigned char rdma_rpc_bind_core_start; + unsigned char rdma_rpc_bind_core_end; + char ock_log_path[DMS_OCK_LOG_PATH_LEN]; + unsigned char enable_reform; +} dms_profile_t; + +#define DMS_BUF_CTRL_IS_OWNER(ctrl) ((ctrl)->lock_mode == DMS_LOCK_EXCLUSIVE || \ + ((ctrl)->lock_mode == DMS_LOCK_SHARE)) +#define DMS_BUF_CTRL_NOT_LOCK(ctrl) ((ctrl)->lock_mode == DMS_LOCK_NULL) + +#define DMS_LOCAL_MAJOR_VER_WEIGHT 1000000 +#define DMS_LOCAL_MINOR_VER_WEIGHT 1000 +#define DMS_LOCAL_MAJOR_VERSION 0 +#define DMS_LOCAL_MINOR_VERSION 0 +#define DMS_LOCAL_VERSION 22 + +#ifdef __cplusplus +} +#endif + +#endif /* __DMS_H__ */ + diff --git a/src/include/ddes/dms/ss_common_attr.h b/src/include/ddes/dms/ss_common_attr.h new file mode 100644 index 000000000..4cbc42a72 --- /dev/null +++ b/src/include/ddes/dms/ss_common_attr.h @@ -0,0 +1,135 @@ + +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * ss_common_attr.h + * + * IDENTIFICATION + * src/include/ddes/dms/ss_common_attr.h + * + * --------------------------------------------------------------------------------------- + */ +#ifndef SS_COMMON_ATTR_H +#define SS_COMMON_ATTR_H + +#ifndef OPENGAUSS +#define OPENGAUSS +#endif + +#include "dms_api.h" +#include "ss_init.h" + +#define ENABLE_DMS (g_instance.attr.attr_storage.dms_attr.enable_dms && !IsInitdb) +#define ENABLE_SS_LOG (g_instance.attr.attr_storage.dms_attr.enable_log_level) +#define ENABLE_REFORM (g_instance.attr.attr_storage.dms_attr.enable_reform) + +#define SS_REFORM_REFORMER \ + (ENABLE_DMS && (g_instance.dms_cxt.SSReformInfo.in_reform == true) \ + && (g_instance.dms_cxt.SSReformInfo.dms_role == DMS_ROLE_REFORMER)) + +#define SS_REFORM_PARTNER \ + (ENABLE_DMS && (g_instance.dms_cxt.SSReformInfo.in_reform == true) \ + && (g_instance.dms_cxt.SSReformInfo.dms_role != DMS_ROLE_REFORMER)) + +#define SS_NORMAL_PRIMARY \ + (ENABLE_DMS && (g_instance.dms_cxt.SSClusterState == NODESTATE_NORMAL) \ + && (g_instance.dms_cxt.SSReformerControl.primaryInstId == SS_MY_INST_ID) \ + && (g_instance.dms_cxt.SSReformInfo.in_reform == false)) + +#define SS_NORMAL_STANDBY \ + (ENABLE_DMS && (g_instance.dms_cxt.SSClusterState == NODESTATE_NORMAL) \ + && (g_instance.dms_cxt.SSReformerControl.primaryInstId != SS_MY_INST_ID) \ + && (g_instance.dms_cxt.SSReformInfo.in_reform == false)) + +#define SS_PRIMARY_MODE (SS_NORMAL_PRIMARY || SS_REFORM_REFORMER) + +#define SS_STANDBY_MODE (SS_NORMAL_STANDBY || SS_REFORM_PARTNER) + +#define SS_IN_REFORM (ENABLE_DMS && g_instance.dms_cxt.SSReformInfo.in_reform == true) + +#define SS_STANDBY_FAILOVER (((g_instance.dms_cxt.SSClusterState == NODESTATE_NORMAL) \ + || (g_instance.dms_cxt.SSClusterState == NODESTATE_STANDBY_FAILOVER_PROMOTING)) \ + && (g_instance.dms_cxt.SSReformerControl.primaryInstId != SS_MY_INST_ID) \ + && SS_REFORM_REFORMER) + +#define SS_PERFORMING_SWITCHOVER \ + (ENABLE_DMS && (g_instance.dms_cxt.SSClusterState > NODESTATE_NORMAL && \ + g_instance.dms_cxt.SSClusterState != NODESTATE_STANDBY_FAILOVER_PROMOTING)) + +#define SS_STANDBY_PROMOTING \ + (ENABLE_DMS && (g_instance.dms_cxt.SSClusterState == NODESTATE_STANDBY_PROMOTING)) + +#define SS_PRIMARY_DEMOTING \ + (ENABLE_DMS && (g_instance.dms_cxt.SSClusterState >= NODESTATE_PRIMARY_DEMOTING) && \ + (g_instance.dms_cxt.SSClusterState <= NODESTATE_PROMOTE_APPROVE)) + +#define SS_PRIMARY_DEMOTED \ + (ENABLE_DMS && (g_instance.dms_cxt.SSClusterState == NODESTATE_PROMOTE_APPROVE)) + +#define SS_STANDBY_WAITING \ + (ENABLE_DMS && (g_instance.dms_cxt.SSClusterState == NODESTATE_STANDBY_WAITING || \ + g_instance.dms_cxt.SSClusterState == NODESTATE_STANDBY_REDIRECT)) + +/* DMS_BUF_NEED_LOAD */ +#define BUF_NEED_LOAD 0x1 +/* DMS_BUF_IS_LOADED */ +#define BUF_IS_LOADED 0x2 +/* DMS_BUF_LOAD_FAILED */ +#define BUF_LOAD_FAILED 0x4 +/* DMS_BUF_NEED_TRANSFER */ +#define BUF_NEED_TRANSFER 0x8 +/* mark buffer whether is extended when dms read from disk */ +#define BUF_IS_EXTEND 0x10 + +/* mark buffer whether is persistent when dms read from disk, don't clear */ +#define BUF_IS_RELPERSISTENT 0x20 +#define BUF_IS_RELPERSISTENT_TEMP 0x40 +#define BUF_READ_MODE_ZERO_LOCK 0x80 + +#define SS_BROADCAST_FAILED_RETRYCOUNTS 4 +#define SS_BROADCAST_WAIT_INFINITE (0xFFFFFFFF) +#define SS_BROADCAST_WAIT_FIVE_SECONDS (5000) +#define SS_BROADCAST_WAIT_ONE_SECOND (1000) +#define SS_BROADCAST_WAIT_FIVE_MICROSECONDS (5) + +typedef enum SSBroadcastOp { + BCAST_GET_XMIN = 0, + BCAST_CANCEL_TRX_FOR_SWITCHOVER, + BCAST_SI, + BCAST_SEGDROPTL, + BCAST_DROP_REL_ALL_BUFFER, + BCAST_DROP_REL_RANGE_BUFFER, + BCAST_DROP_DB_ALL_BUFFER, + BCAST_DROP_SEG_SPACE, + BCAST_CANCEL_TRX_FOR_FAILOVER, + BCAST_DDLLOCK, + BCAST_DDLLOCKRELEASE, + BCAST_DDLLOCKRELEASE_ALL, + BCAST_CHECK_DB_BACKENDS, + BCAST_END +} SSBroadcastOp; + +typedef enum SSBroadcastOpAck { + BCAST_GET_XMIN_ACK = 0, + BCAST_CANCEL_TRX_ACK, + BCAST_CHECK_DB_BACKENDS_ACK, + BCAST_ACK_END +} SSBroadcastOpAck; + +typedef struct SSBroadcastCmdOnly { + SSBroadcastOp type; // must be first +} SSBroadcastCmdOnly; + +#endif diff --git a/src/include/ddes/dms/ss_dms.h b/src/include/ddes/dms/ss_dms.h new file mode 100644 index 000000000..127d3be4a --- /dev/null +++ b/src/include/ddes/dms/ss_dms.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * ss_dms.h + * Defines the DMS function pointer. + * + * IDENTIFICATION + * src/include/ddes/dms/ss_dms.h + * + * --------------------------------------------------------------------------------------- + */ + +#ifndef __SS_DMS_H__ +#define __SS_DMS_H__ + +#include "dms_api.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define SS_LIBDMS_NAME "libdms.so" + +typedef struct st_ss_dms_func { + void *handle; + int (*dms_get_version)(void); + void (*dms_show_version)(char *version); + int (*dms_init)(dms_profile_t *dms_profile); + void (*dms_get_error)(int *errcode, const char **errmsg); + void (*dms_uninit)(void); + int (*dms_request_page)(dms_context_t *dms_ctx, dms_buf_ctrl_t *ctrl, dms_lock_mode_t mode); + int (*dms_broadcast_msg)(dms_context_t *dms_ctx, char *data, unsigned int len, unsigned char handle_recv_msg, + unsigned int timeout); + int (*dms_request_opengauss_update_xid)(dms_context_t *dms_ctx, + unsigned short t_infomask, unsigned short t_infomask2, unsigned long long *uxid); + int (*dms_request_opengauss_xid_csn)(dms_context_t *dms_ctx, dms_opengauss_xid_csn_t *dms_txn_info, + dms_opengauss_csn_result_t *xid_csn_result); + int (*dms_request_opengauss_txn_status)(dms_context_t *dms_ctx, unsigned char request, unsigned char *result); + int (*dms_request_opengauss_txn_snapshot)(dms_context_t *dms_ctx, + dms_opengauss_txn_snapshot_t *dms_txn_snapshot); + int (*dms_register_thread_init)(dms_thread_init_t thrd_init); + int (*dms_release_owner)(dms_context_t *dms_ctx, dms_buf_ctrl_t *ctrl, unsigned char *released); + int (*dms_wait_reform)(unsigned int *has_offline); + int (*dms_buf_res_rebuild_drc)(dms_context_t *dms_ctx, dms_buf_ctrl_t *ctrl, unsigned long long lsn, + unsigned char is_dirty); + int (*dms_is_recovery_session)(unsigned int sid); + int (*drc_get_page_master_id)(char pageid[DMS_PAGEID_SIZE], unsigned char *master_id); + int (*dms_release_page_batch)(dms_context_t *dms_ctx, dcs_batch_buf_t *owner_map, unsigned int *owner_count); + int (*dms_register_ssl_decrypt_pwd)(dms_decrypt_pwd_t cb_func); + int (*dms_set_ssl_param)(const char *param_name, const char *param_value); + int (*dms_get_ssl_param)(const char *param_name, char *param_value, unsigned int size); + int (*dms_recovery_page_need_skip)(char pageid[DMS_PAGEID_SIZE], unsigned char *skip); + int (*dms_reform_failed)(void); + int (*dms_switchover)(unsigned int sess_id); + int (*dms_drc_accessible)(void); + int (*dms_broadcast_opengauss_ddllock)(dms_context_t *dms_ctx, char *data, unsigned int len, + unsigned char handle_recv_msg, unsigned int timeout, unsigned char resend_after_reform); + int (*dms_reform_last_failed)(void); +} ss_dms_func_t; + +int ss_dms_func_init(); +int dms_init(dms_profile_t *dms_profile); +void dms_get_error(int *errcode, const char **errmsg); +void dms_uninit(void); +int dms_request_page(dms_context_t *dms_ctx, dms_buf_ctrl_t *ctrl, dms_lock_mode_t mode); +int dms_broadcast_msg(dms_context_t *dms_ctx, char *data, unsigned int len, unsigned char handle_recv_msg, + unsigned int timeout); +int dms_request_opengauss_update_xid(dms_context_t *dms_ctx, + unsigned short t_infomask, unsigned short t_infomask2, unsigned long long *uxid); +int dms_request_opengauss_xid_csn(dms_context_t *dms_ctx, dms_opengauss_xid_csn_t *dms_txn_info, + dms_opengauss_csn_result_t *xid_csn_result); +int dms_request_opengauss_txn_status(dms_context_t *dms_ctx, unsigned char request, unsigned char *result); +int dms_request_opengauss_txn_snapshot(dms_context_t *dms_ctx, + dms_opengauss_txn_snapshot_t *dms_txn_snapshot); +int dms_register_thread_init(dms_thread_init_t thrd_init); +int dms_release_owner(dms_context_t *dms_ctx, dms_buf_ctrl_t *ctrl, unsigned char *released); +int dms_wait_reform(unsigned int *has_offline); +int dms_buf_res_rebuild_drc(dms_context_t *dms_ctx, dms_buf_ctrl_t *ctrl, unsigned long long lsn, + unsigned char is_dirty); +int dms_is_recovery_session(unsigned int sid); +int drc_get_page_master_id(char pageid[DMS_PAGEID_SIZE], unsigned char *master_id); +int dms_release_page_batch(dms_context_t *dms_ctx, dcs_batch_buf_t *owner_map, unsigned int *owner_count); +int dms_register_ssl_decrypt_pwd(dms_decrypt_pwd_t cb_func); +int dms_set_ssl_param(const char *param_name, const char *param_value); +int dms_get_ssl_param(const char *param_name, char *param_value, unsigned int size); +int dms_recovery_page_need_skip(char pageid[DMS_PAGEID_SIZE], unsigned char *skip); +int dms_reform_failed(void); +int dms_switchover(unsigned int sess_id); +int dms_drc_accessible(void); +int dms_broadcast_opengauss_ddllock(dms_context_t *dms_ctx, char *data, unsigned int len, unsigned char handle_recv_msg, + unsigned int timeout, unsigned char resend_after_reform); +int dms_reform_last_failed(void); +#ifdef __cplusplus +} +#endif + +#endif /* __SS_DMS_H__ */ \ No newline at end of file diff --git a/src/include/ddes/dms/ss_dms_bufmgr.h b/src/include/ddes/dms/ss_dms_bufmgr.h new file mode 100644 index 000000000..b2cc70ef0 --- /dev/null +++ b/src/include/ddes/dms/ss_dms_bufmgr.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * ss_dms_bufmgr.h + * + * IDENTIFICATION + * src/include/ddes/dms/ss_dms_bufmgr.h + * + * --------------------------------------------------------------------------------------- + */ +#ifndef SS_DMS_BUFMGR_H +#define SS_DMS_BUFMGR_H + +#include "ddes/dms/ss_common_attr.h" +#include "ddes/dms/ss_dms.h" +#include "storage/buf/buf_internals.h" + +#define GetDmsBufCtrl(id) (&t_thrd.storage_cxt.dmsBufCtl[(id)]) + +typedef struct SSBroadcastDDLLock { + SSBroadcastOp type; // must be first + LOCKTAG locktag; + LOCKMODE lockmode; + bool sessionlock; + bool dontWait; +} SSBroadcastDDLLock; + +void InitDmsBufCtrl(void); +void InitDmsContext(dms_context_t* dmsContext); + +void MarkReadHint(int buf_id, char persistence, bool extend, const XLogPhyBlock *pblk); +bool LockModeCompatible(dms_buf_ctrl_t *buf_ctrl, LWLockMode mode); +bool StartReadPage(BufferDesc *buf_desc, LWLockMode mode); +void ClearReadHint(int buf_id, bool buf_deleted = false); +Buffer TerminateReadPage(BufferDesc* buf_desc, ReadBufferMode read_mode, const XLogPhyBlock *pblk); +Buffer TerminateReadSegPage(BufferDesc *buf_desc, ReadBufferMode read_mode, SegSpace *spc = NULL); +Buffer DmsReadPage(Buffer buffer, LWLockMode mode, ReadBufferMode read_mode); +Buffer DmsReadSegPage(Buffer buffer, LWLockMode mode, ReadBufferMode read_mode); +bool DmsReleaseOwner(BufferTag buf_tag, int buf_id); +int32 CheckBuf4Rebuild(BufferDesc* buf_desc); +int SSLockAcquire(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock, bool dontWait, + dms_opengauss_lock_req_type_t reqType = LOCK_NORMAL_MODE); +int SSLockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock); +void SSLockReleaseAll(); +void SSLockAcquireAll(); +void MarkReadPblk(int buf_id, const XLogPhyBlock *pblk); + +#endif diff --git a/src/include/ddes/dms/ss_dms_callback.h b/src/include/ddes/dms/ss_dms_callback.h new file mode 100644 index 000000000..f10919bf0 --- /dev/null +++ b/src/include/ddes/dms/ss_dms_callback.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * ss_dms_callback.h + * + * IDENTIFICATION + * src/include/ddes/dms/ss_dms_callback.h + * + * --------------------------------------------------------------------------------------- + */ +#ifndef SS_DMS_CALLBACK_H +#define SS_DMS_CALLBACK_H + +#include "ss_common_attr.h" + +/* 5 seconds */ +#define REFORM_CONFIRM_TIMEOUT 5000000 +#define REFORM_CONFIRM_INTERVAL 5000 +#define GET_MS(tv) (tv.tv_sec * 1000000 + tv.tv_usec) +#define DMS_LOGGER_BUFFER_SIZE 2048 + +extern void DmsInitCallback(dms_callback_t *callback); +extern void DmsCallbackThreadShmemInit(unsigned char need_startup, char **reg_data); + +#endif diff --git a/src/include/ddes/dms/ss_dms_log_output.h b/src/include/ddes/dms/ss_dms_log_output.h new file mode 100644 index 000000000..bf8041498 --- /dev/null +++ b/src/include/ddes/dms/ss_dms_log_output.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2022 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * ss_dms_log_output.cpp + * initialize for DMS shared storage. + * + * + * IDENTIFICATION + * src/include/ss_output_log.cpp + * + * --------------------------------------------------------------------------------------- + */ +#ifndef __SS_OUTPUT_LOG_H__ +#define __SS_OUTPUT_LOG_H__ + +#include "ss_common_attr.h" + +#define GS_MAX_PATH_BUFFER_SIZE 1024 +#define GS_MAX_NAME_LEN 64 +typedef bool bool8; +typedef bool bool32; +#define GS_FILE_NAME_BUFFER_SIZE 1024. +/* _log_level */ +#define LOG_NONE 0x00000000 +#define LOG_RUN_ERR_LEVEL 0x00000001 +#define LOG_RUN_WAR_LEVEL 0x00000002 +#define LOG_RUN_INF_LEVEL 0x00000004 +#define LOG_DEBUG_ERR_LEVEL 0x00000010 +#define LOG_DEBUG_WAR_LEVEL 0x00000020 +#define LOG_DEBUG_INF_LEVEL 0x00000040 + +void DMSLogOutput(uint32 ss_log_level, const char *code_file_name, uint32 code_line_num, char buf[]); +int32 DMSLogLevelCheck(dms_log_id_t dms_log_id, dms_log_level_t dms_log_level, uint32 *log_level); + +typedef enum en_log_level { + SS_LEVEL_ERROR = 0, // error conditions + SS_LEVEL_WARN, // warning conditions + SS_LEVEL_INFO, // informational messages +} log_level_t; + +typedef enum en_log_id { + LOG_RUN = 0, + LOG_DEBUG, + LOG_COUNT, // LOG COUNT = 2 +} log_id_t; + +#endif diff --git a/src/include/ddes/dms/ss_dms_recovery.h b/src/include/ddes/dms/ss_dms_recovery.h new file mode 100644 index 000000000..3aa59874b --- /dev/null +++ b/src/include/ddes/dms/ss_dms_recovery.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * ss_dms_recovery.h + * + * IDENTIFICATION + * src/include/ddes/dms/ss_dms_recovery.h + * + * --------------------------------------------------------------------------------------- + */ +#ifndef SS_DMS_RECOVERY_H +#define SS_DMS_RECOVERY_H + +#include "port.h" +#include "ddes/dms/ss_common_attr.h" + +#define REFORM_CTRL_PAGE DMS_MAX_INSTANCE + +#define RECOVERY_WAIT_TIME 10000 +#define SSFAILOVER_TRIGGER (ENABLE_DMS && g_instance.dms_cxt.SSRecoveryInfo.failover_triggered == true && \ + g_instance.dms_cxt.SSClusterState == NODESTATE_STANDBY_FAILOVER_PROMOTING) +#define SSSKIP_REDO_REPLAY (ENABLE_DMS && g_instance.dms_cxt.SSRecoveryInfo.skip_redo_replay == true) +#define SS_BEFORE_RECOVERY (ENABLE_DMS && g_instance.dms_cxt.SSReformInfo.in_reform == true \ + && g_instance.dms_cxt.SSRecoveryInfo.recovery_pause_flag == true) + +typedef struct st_reformer_ctrl { + uint64 list_stable; // stable instances list + int primaryInstId; + pg_crc32c crc; +} ss_reformer_ctrl_t; + +typedef struct st_reform_info { + bool in_reform; + dms_role_t dms_role; +} ss_reform_info_t; + +typedef struct ss_recovery_info { + bool recovery_pause_flag; + volatile bool failover_triggered; + char recovery_xlogDir[MAXPGPATH]; + bool skip_redo_replay; + LWLock* update_seg_lock; + bool new_primary_reset_walbuf_flag; + bool reclsn_updated; + bool ready_to_startup; // when DB start (except failover), the flag will set true + bool startup_reform; // used to judge DB first start, when first reform finshed set false + bool restart_failover_flag; // used to indicate do failover when DB start + bool reform_ready; + bool in_failover; // used to judge this is failover, this tag will combine with failover_triggered later + // in failover Scenario,before failover_triggered become true, this node knows itself will become new primary +} ss_recovery_info_t; + +extern bool SSRecoveryNodes(); +extern int SSGetPrimaryInstId(); +extern void SSSavePrimaryInstId(int id); +extern void SSReadControlFile(int id); +extern void SSWriteReformerControlPages(void); +extern bool SSRecoveryApplyDelay(const XLogReaderState *record); +extern void SShandle_promote_signal(); +extern void SSTriggerFailover(); +extern void ss_failover_dw_init(); + + +#endif \ No newline at end of file diff --git a/src/include/ddes/dms/ss_init.h b/src/include/ddes/dms/ss_init.h new file mode 100644 index 000000000..bd692d698 --- /dev/null +++ b/src/include/ddes/dms/ss_init.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2022 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * ss_init.h + * include header file for DMS shared storage. + * + * + * IDENTIFICATION + * src/include/ddes/dms/ss_init.h + * + * --------------------------------------------------------------------------------------- + */ +#ifndef SRC_INCLUDE_DDES_SS_INIT_H +#define SRC_INCLUDE_DDES_SS_INIT_H + +#include "c.h" + +#define DMS_MAX_INSTANCE 64 +#define DMS_MAX_SESSIONS (uint32)16320 + +#define SS_MASTER_ID g_instance.dms_cxt.SSReformerControl.primaryInstId // currently master ID is hardcoded as 0 +#define SS_MY_INST_ID g_instance.attr.attr_storage.dms_attr.instance_id +#define SS_MY_INST_IS_MASTER (SS_MY_INST_ID == SS_MASTER_ID) + +void DMSInit(); +void DMSUninit(); +int32 DMSWaitReform(); +bool DMSWaitInitStartup(); + +#endif diff --git a/src/include/ddes/dms/ss_reform_common.h b/src/include/ddes/dms/ss_reform_common.h new file mode 100644 index 000000000..387a6b52a --- /dev/null +++ b/src/include/ddes/dms/ss_reform_common.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2022 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * ss_reform_common.h + * include header file for ss reform common. + * + * + * IDENTIFICATION + * src/include/ddes/dms/ss_reform_common.h + * + * --------------------------------------------------------------------------------------- + */ +#include "access/xlog_basic.h" +#include "access/xlogdefs.h" + +#define BAK_CTRL_FILE_NUM 2 +#define BIT_NUM_INT32 32 +#define REFORM_WAIT_TIME 10000 /* 0.01 sec */ +#define WAIT_REFORM_CTRL_REFRESH_TRIES 1000 + +typedef struct SSBroadcastCancelTrx { + SSBroadcastOp type; // must be first +} SSBroadcastCancelTrx; + +int SSXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen, + XLogRecPtr targetRecPtr, char *readBuf, TimeLineID *readTLI, char* xlog_path); +void SSGetXlogPath(); +void SSSaveReformerCtrl(); +void SSClearSegCache(); +int SSCancelTransactionOfAllStandby(SSBroadcastOp type); +int SSProcessCancelTransaction(SSBroadcastOp type); \ No newline at end of file diff --git a/src/include/ddes/dms/ss_switchover.h b/src/include/ddes/dms/ss_switchover.h new file mode 100644 index 000000000..817ef8f7b --- /dev/null +++ b/src/include/ddes/dms/ss_switchover.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * ss_switchover.h + * ss_switchover + * + * + * IDENTIFICATION + * src/include/ddes/dms/ss_switchover.h + * + * --------------------------------------------------------------------------------------- + */ +#ifndef SRC_INCLUDE_DDES_SS_SWITCHOVER_H +#define SRC_INCLUDE_DDES_SS_SWITCHOVER_H + +#include "ddes/dms/ss_common_attr.h" + +#define CHECKPOINT_RECORD_LOCATION1 1 +#define CHECKPOINT_RECORD_LOCATION2 2 + +void SSDoSwitchover(); +void SSHandleSwitchoverPromote(); +void SSNotifySwitchoverPromote(); + +#endif diff --git a/src/include/ddes/dms/ss_transaction.h b/src/include/ddes/dms/ss_transaction.h new file mode 100644 index 000000000..afeaf83c5 --- /dev/null +++ b/src/include/ddes/dms/ss_transaction.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2022 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * ss_transaction.h + * ss_transaction + * + * + * IDENTIFICATION + * src/include/ddes/dms/ss_transaction.h + * + * --------------------------------------------------------------------------------------- + */ +#ifndef SRC_INCLUDE_DDES_SS_TRANSACTION_H +#define SRC_INCLUDE_DDES_SS_TRANSACTION_H + +#include "ddes/dms/ss_common_attr.h" +#include "access/transam.h" +#include "storage/sinval.h" + +#define DMS_NO_RUNNING_BACKENDS (DMS_SUCCESS) +#define DMS_EXIST_RUNNING_BACKENDS (DMS_ERROR) + +typedef struct SSBroadcastXmin { + SSBroadcastOp type; // must be first + TransactionId xmin; +} SSBroadcastXmin; + +typedef struct SSBroadcastXminAck { + SSBroadcastOpAck type; // must be first + TransactionId xmin; +} SSBroadcastXminAck; + +typedef struct SSBroadcastSI { + SSBroadcastOp type; // must be first + SharedInvalidationMessage msg; +} SSBroadcastSI; + +typedef struct SSBroadcastSegDropTL { + SSBroadcastOp type; // must be first + uint32 seg_drop_timeline; +} SSBroadcastSegDropTL; + +typedef struct SSBroadcastDropRelAllBuffer { + SSBroadcastOp type; // must be first + int32 size; + RelFileNode rnodes[FLEXIBLE_ARRAY_MEMBER]; +} SSBroadcastDropRelAllBuffer; + +typedef struct SSBroadcastDropRelRangeBuffer { + SSBroadcastOp type; // must be first + RelFileNode node; + ForkNumber forkNum; + BlockNumber firstDelBlock; +} SSBroadcastDropRelRangeBuffer; + +typedef struct SSBroadcastDropDBAllBuffer { + SSBroadcastOp type; // must be first + Oid dbid; +} SSBroadcastDropDBAllBuffer; + +typedef struct SSBroadcastDropSegSpace { + SSBroadcastOp type; // must be first + Oid spcNode; + Oid dbNode; +} SSBroadcastDropSegSpace; + +typedef struct SSBroadcasDbBackends { + SSBroadcastOp type; // must be first + Oid dbid; +} SSBroadcastDbBackends; + +typedef struct SSBroadcasDbBackendsAck { + SSBroadcastOpAck type; // must be first + int count; +} SSBroadcastDbBackendsAck; + +Snapshot SSGetSnapshotData(Snapshot snapshot); +CommitSeqNo SSTransactionIdGetCommitSeqNo(TransactionId transactionId, bool isCommit, bool isMvcc, bool isNest, + Snapshot snapshot, bool* sync); +bool SSTransactionIdDidCommit(TransactionId transactionId); +bool SSTransactionIdIsInProgress(TransactionId transactionId); +TransactionId SSMultiXactIdGetUpdateXid(TransactionId xmax, uint16 t_infomask, uint16 t_infomask2); +bool SSGetOldestXminFromAllStandby(); +int SSGetOldestXmin(char *data, uint32 len, char *output_msg, uint32 *output_msg_len); +int SSGetOldestXminAck(SSBroadcastXminAck *ack_data); +void SSSendSharedInvalidMessages(const SharedInvalidationMessage* msgs, int n); +void SSBCastDropRelAllBuffer(RelFileNode *rnodes, int rnode_len); +void SSBCastDropRelRangeBuffer(RelFileNode node, ForkNumber forkNum, BlockNumber firstDelBlock); +void SSBCastDropDBAllBuffer(Oid dbid); +void SSBCastDropSegSpace(Oid spcNode, Oid dbNode); +int SSProcessSharedInvalMsg(char *data, uint32 len); +void SSUpdateSegDropTimeline(uint32 seg_drop_timeline); +int SSProcessSegDropTimeline(char *data, uint32 len); +int SSProcessDropRelAllBuffer(char *data, uint32 len); +int SSProcessDropRelRangeBuffer(char *data, uint32 len); +int SSProcessDropDBAllBuffer(char *data, uint32 len); +int SSProcessDropSegSpace(char *data, uint32 len); +int SSCheckDbBackends(char *data, uint32 len, char *output_msg, uint32 *output_msg_len); +int SSCheckDbBackendsAck(char *data, unsigned int len); +bool SSCheckDbBackendsFromAllStandby(Oid dbid); + +#endif diff --git a/src/include/gs_thread.h b/src/include/gs_thread.h index 1bef17ef9..e9b84951e 100755 --- a/src/include/gs_thread.h +++ b/src/include/gs_thread.h @@ -147,7 +147,8 @@ typedef enum knl_thread_role { WAL_STANDBY_SENDER, /* Am I cascading WAL to another standby ? */ WAL_DB_SENDER, TOP_CONSUMER, - DCF_WORKER + DCF_WORKER, + DMS_WORKER } knl_thread_role; /* diff --git a/src/include/knl/knl_guc/knl_instance_attr_storage.h b/src/include/knl/knl_guc/knl_instance_attr_storage.h index 9b61e27f5..1175d7bd7 100755 --- a/src/include/knl/knl_guc/knl_instance_attr_storage.h +++ b/src/include/knl/knl_guc/knl_instance_attr_storage.h @@ -89,6 +89,29 @@ typedef struct knl_instance_attr_nvm { double bypassNvm; } knl_instance_attr_nvm; +typedef struct knl_instance_attr_dss { + bool ss_enable_dss; + char* ss_dss_vg_name; + char* ss_dss_conn_path; +} knl_instance_attr_dss; + +typedef struct knl_instance_attr_dms { + bool enable_dms; + bool enable_catalog_centralized; + int instance_id; + int recv_msg_pool_size; + char* interconnect_url; + char* interconnect_type; + char* rdma_work_config; + char* ock_log_path; + int channel_count; + int work_thread_count; + bool enable_reform; + bool enable_ssl; + int inst_count; + bool enable_log_level; +} knl_instance_attr_dms; + typedef struct knl_instance_attr_storage { bool wal_log_hints; bool EnableHotStandby; @@ -154,6 +177,8 @@ typedef struct knl_instance_attr_storage { char* available_zone; knl_instance_attr_dcf dcf_attr; knl_instance_attr_nvm nvm_attr; + knl_instance_attr_dss dss_attr; + knl_instance_attr_dms dms_attr; int num_internal_lock_partitions[LWLOCK_PART_KIND]; char* num_internal_lock_partitions_str; int wal_insert_status_entries_power; diff --git a/src/include/knl/knl_instance.h b/src/include/knl/knl_instance.h index f4fa14b4e..293c2e235 100755 --- a/src/include/knl/knl_instance.h +++ b/src/include/knl/knl_instance.h @@ -72,6 +72,7 @@ #endif #include "postmaster/barrier_creator.h" #include "pgxc/barrier.h" +#include "ddes/dms/ss_dms_recovery.h" const int NUM_PERCENTILE_COUNT = 2; const int INIT_NUMA_ALLOC_COUNT = 32; @@ -111,6 +112,16 @@ enum knl_parallel_redo_state { REDO_DONE, }; +/* + * used for dms + */ +typedef enum en_dms_status { + DMS_STATUS_OUT = 0, + DMS_STATUS_JOIN = 1, + DMS_STATUS_REFORM = 2, + DMS_STATUS_IN = 3 +} dms_status_t; + /* all process level attribute which expose to user */ typedef struct knl_instance_attr { @@ -1151,6 +1162,52 @@ typedef struct knl_g_abo_context { HTAB *models; } knl_g_abo_context; +typedef struct knl_g_dwsubdatadir_context { + /* share and cluster one copy */ + char dwOldPath[MAXPGPATH]; + char dwPathPrefix[MAXPGPATH]; + char dwSinglePath[MAXPGPATH]; + char dwBuildPath[MAXPGPATH]; + char dwUpgradePath[MAXPGPATH]; + char dwBatchUpgradeMetaPath[MAXPGPATH]; + char dwBatchUpgradeFilePath[MAXPGPATH]; + char dwMetaPath[MAXPGPATH]; + char dwExtChunkPath[MAXPGPATH]; + uint8 dwStorageType; +} knl_g_dwsubdatadir_context; + +typedef struct knl_g_datadir_context { + char baseDir[MAXPGPATH]; + char globalDir[MAXPGPATH]; + char clogDir[MAXPGPATH]; + char csnlogDir[MAXPGPATH]; + char locationDir[MAXPGPATH]; + char notifyDir[MAXPGPATH]; + char serialDir[MAXPGPATH]; + char snapshotsDir[MAXPGPATH]; + char tblspcDir[MAXPGPATH]; + char twophaseDir[MAXPGPATH]; + char multixactDir[MAXPGPATH]; + char xlogDir[MAXPGPATH]; + char controlPath[MAXPGPATH]; + char controlBakPath[MAXPGPATH]; + knl_g_dwsubdatadir_context dw_subdir_cxt; +} knl_g_datadir_context; + +typedef struct knl_g_dms_context { + uint32 dmsProcSid; + uint64 xminAck; + dms_status_t dms_status; + ClusterNodeState SSClusterState; + ss_reformer_ctrl_t SSReformerControl; // saved in disk; saved by primary + ss_reform_info_t SSReformInfo; + ss_recovery_info_t SSRecoveryInfo; + pg_tz* log_timezone; + pg_atomic_uint32 inDmsThreShmemInitCnt; // the count of threads in DmsCallbackThreadShmemInit + pg_atomic_uint32 inProcExitCnt; // Post Main in proc_exit function + bool dmsInited; +}knl_g_dms_context; + typedef struct knl_instance_context { knl_virtual_role role; volatile int status; @@ -1284,9 +1341,10 @@ typedef struct knl_instance_context { #endif pg_atomic_uint32 extensionNum; knl_g_audit_context audit_cxt; - knl_g_abo_context abo_cxt; knl_g_listen_context listen_cxt; + knl_g_datadir_context datadir_cxt; + knl_g_dms_context dms_cxt; } knl_instance_context; extern long random(); diff --git a/src/include/knl/knl_thread.h b/src/include/knl/knl_thread.h index d51d5cd65..4d017164f 100755 --- a/src/include/knl/knl_thread.h +++ b/src/include/knl/knl_thread.h @@ -73,10 +73,10 @@ #include "replication/origin.h" #include "catalog/pg_subscription.h" #include "port/pg_crc32c.h" +#include "ddes/dms/ss_common_attr.h" + #define MAX_PATH_LEN 1024 - extern const int g_reserve_param_num; - #define PARTKEY_VALUE_MAXNUM 64 typedef struct ResourceOwnerData* ResourceOwner; @@ -2735,7 +2735,9 @@ typedef struct knl_t_storage_context { /* global variable */ char* pageCopy; + char* pageCopy_ori; char* segPageCopy; + char* segPageCopyOri; bool isSwitchoverLockHolder; int num_held_lwlocks; @@ -2784,10 +2786,9 @@ typedef struct knl_t_storage_context { int max_safe_fds; /* default if not changed */ /* reserve `1000' for thread-private file id */ int max_userdatafiles; - int timeoutRemoteOpera; - char* PcaBufferBlocks; + dms_buf_ctrl_t* dmsBufCtl; } knl_t_storage_context; typedef struct knl_t_port_context { @@ -3309,6 +3310,10 @@ typedef struct knl_t_publication_context { bool updateConninfoNeeded; } knl_t_publication_context; +typedef struct knl_t_dms_context { + MemoryContext msgContext; +} knl_t_dms_context; + /* thread context. */ typedef struct knl_thrd_context { knl_thread_role role; @@ -3456,6 +3461,7 @@ typedef struct knl_thrd_context { knl_t_page_compression_context page_compression_cxt; knl_t_cfs_shrinker_context cfs_shrinker_cxt; knl_t_sql_patch_context sql_patch_cxt; + knl_t_dms_context dms_cxt; } knl_thrd_context; #ifdef ENABLE_MOT @@ -3498,4 +3504,5 @@ RedoPageRepairCallBackFunc RegisterRedoPageRepairCallBack(RedoPageRepairCallBack void RedoPageRepairCallBack(RepairBlockKey key, XLogPhyBlock pblk); extern void VerifyMemoryContext(); + #endif /* SRC_INCLUDE_KNL_KNL_THRD_H_ */ diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 89c501575..f54d0d3b2 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -340,6 +340,7 @@ extern const uint32 BACKUP_SLOT_VERSION_NUM; #ifdef PGXC extern bool useLocalXid; #endif +extern bool EnableInitDBSegment; #define DEFUALT_STACK_SIZE 16384 @@ -400,6 +401,8 @@ extern void InitializeSessionUserIdStandalone(void); extern void SetSessionAuthorization(Oid userid, bool is_superuser); extern Oid GetCurrentRoleId(void); extern void SetCurrentRoleId(Oid roleid, bool is_superuser); +void ss_initdwsubdir(char *dssdir, int instance_id); +extern void initDSSConf(void); extern Oid get_current_lcgroup_oid(); extern const char* get_current_lcgroup_name(); @@ -459,6 +462,7 @@ extern bool CheckExecDirectPrivilege(const char* query); /* check user have priv u_sess->misc_cxt.Mode = (mode); \ } while (0) +#define ENABLE_DSS (g_instance.attr.attr_storage.dss_attr.ss_enable_dss == true) /* * Auxiliary-process type identifiers. @@ -539,6 +543,8 @@ typedef enum { #define AmTsCompactionConsumerProcess() (t_thrd.bootstrap_cxt.MyAuxProcType == TsCompactionConsumerProcess) #define AmTsCompactionAuxiliaryProcess() (t_thrd.bootstrap_cxt.MyAuxProcType == TsCompactionAuxiliaryProcess) #define AmPageRedoWorker() (t_thrd.bootstrap_cxt.MyAuxProcType == PageRedoProcess) +#define AmDmsReformProcProcess() (t_thrd.role == DMS_WORKER && \ + strncmp("DMS REFORM PROC", t_thrd.proc_cxt.MyProgName, 15) == 0) diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index eddb9b8e6..07c4a0fae 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -891,6 +891,9 @@ XLOG_BLCKSZ). Changing XLOG_SEG_SIZE requires an initdb. */ #undef XLOG_SEG_SIZE +/* Number of max alloc xlog segment in extreme_rto, default 4 */ +#undef MAX_ALLOC_SEGNUM + /* Number of bits in a file offset, on hosts where this is settable. */ #undef _FILE_OFFSET_BITS diff --git a/src/include/port.h b/src/include/port.h index f7c9cc21b..8ce5044ee 100644 --- a/src/include/port.h +++ b/src/include/port.h @@ -48,6 +48,7 @@ extern void make_native_path(char* path); extern bool path_contains_parent_reference(const char* path); extern bool path_is_relative_and_below_cwd(const char* path); extern bool path_is_prefix_of_path(const char* path1, const char* path2); +extern void get_top_path(char *path); extern const char* get_progname(const char* argv0); extern void get_share_path(const char* my_exec_path, char* ret_path); extern void get_etc_path(const char* my_exec_path, char* ret_path, size_t ret_path_len); diff --git a/src/include/postgres.h b/src/include/postgres.h index b7a1b70f1..e7d7a0ae5 100644 --- a/src/include/postgres.h +++ b/src/include/postgres.h @@ -1000,6 +1000,7 @@ extern void RemoveTempNamespace(); #endif #define CacheIsProcOid(cc_id) ((cc_id) == PROCOID) #define IsBootingPgProc(rel) IsProcRelation(rel) +#define IsBootingPgClass(rel) (RelationGetRelid(rel) == RelationRelationId) #define BootUsingBuiltinFunc true extern int errdetail_abort(void); diff --git a/src/include/postmaster/bgworker.h b/src/include/postmaster/bgworker.h index 4a97332af..3a75c7012 100644 --- a/src/include/postmaster/bgworker.h +++ b/src/include/postmaster/bgworker.h @@ -93,6 +93,7 @@ extern bool RegisterBackgroundWorker(BackgroundWorker *worker); extern int LaunchBackgroundWorkers(int nworkers, void *bgshared, bgworker_main bgmain, bgworker_exit bgexit); extern void BackgroundWorkerMain(void); extern bool IsBgWorkerProcess(void); +extern bool IsDMSWorkerProcess(void); extern void BgworkerListSyncQuit(); extern void BgworkerListWaitFinish(int *nparticipants); extern void InitBgworkerGlobal(void); diff --git a/src/include/postmaster/postmaster.h b/src/include/postmaster/postmaster.h index 7e85505a6..66f76be7a 100755 --- a/src/include/postmaster/postmaster.h +++ b/src/include/postmaster/postmaster.h @@ -218,6 +218,8 @@ extern void set_disable_conn_mode(void); #define IsConnPortFromCoord(port) false #endif +const char *GetSSServerMode(); +bool SSIsServerModeReadOnly(); bool IsFromLocalAddr(Port* port); extern bool IsMatchSocketAddr(const struct sockaddr* sock_addr, int compare_port); extern bool IsHAPort(Port* port); @@ -246,4 +248,7 @@ extern void InitProcessAndShareMemory(); extern void InitShmemForDcfCallBack(); extern void ShutdownForDRSwitchover(void); extern void InitMemoryLogDirectory(); +extern void InitShmemForDmsCallBack(); +extern void SignalTermAllBackEnd(); +extern void SSRestartFailoverPromote(); #endif /* _POSTMASTER_H */ diff --git a/src/include/replication/replicainternal.h b/src/include/replication/replicainternal.h index de1cfe35b..667bb75e7 100755 --- a/src/include/replication/replicainternal.h +++ b/src/include/replication/replicainternal.h @@ -176,7 +176,8 @@ typedef enum ClusterNodeState { NODESTATE_STANDBY_PROMOTING, NODESTATE_STANDBY_FAILOVER_PROMOTING, NODESTATE_PRIMARY_DEMOTING_WAIT_CATCHUP, - NODESTATE_DEMOTE_FAILED + NODESTATE_DEMOTE_FAILED, + NODESTATE_STANDBY_PROMOTED } ClusterNodeState; #endif diff --git a/src/include/storage/buf/bufmgr.h b/src/include/storage/buf/bufmgr.h index 547a68b1e..2f8c01ce3 100644 --- a/src/include/storage/buf/bufmgr.h +++ b/src/include/storage/buf/bufmgr.h @@ -412,5 +412,11 @@ extern int getDuplicateRequest(CheckpointerRequest *requests, int num_requests, extern void RemoteReadFile(RemoteReadFileKey *key, char *buf, uint32 size, int timeout, uint32* remote_size); extern int64 RemoteReadFileSize(RemoteReadFileKey *key, int timeout); +extern void WaitIO(BufferDesc *buf); extern bool StartBufferIO(BufferDesc* buf, bool forInput); + +extern Buffer ReadBuffer_common_for_dms(ReadBufferMode readmode, BufferDesc *bufDesc, const XLogPhyBlock *pblk); +extern void ReadBuffer_common_for_check(ReadBufferMode readmode, BufferDesc* buf_desc, + const XLogPhyBlock *pblk, Block bufBlock); + #endif diff --git a/src/include/storage/dss/dss_adaptor.h b/src/include/storage/dss/dss_adaptor.h new file mode 100644 index 000000000..4e79237f1 --- /dev/null +++ b/src/include/storage/dss/dss_adaptor.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * dss_adaptor.h + * + * IDENTIFICATION + * src/include/storage/dss/dss_adaptor.h + * + * --------------------------------------------------------------------------------------- + */ + +#ifndef DSS_ADAPTOR_H +#define DSS_ADAPTOR_H + +#include +#include +#include +#include "dss_api_def.h" + +#define SS_LIBDSS_NAME "libdssapi.so" + +int dss_device_init(const char *conn_path, bool enable_dss); + +// callback for register dssapi +typedef int (*dss_open_device)(const char *name, int flags, int *handle); +typedef int (*dss_read_device)(int handle, void *buf, int size, int *read_size); +typedef int (*dss_write_device)(int handle, const void *buf, int size); +typedef int (*dss_pread_device)(int handle, void *buf, int size, long offset, int *read_size); +typedef int (*dss_pwrite_device)(int handle, const void *buf, int size, long offset); +typedef long (*dss_seek_device)(int handle, long offset, int origin); +typedef int (*dss_trucate_device)(int handle, long keep_size); +typedef int (*dss_create_device)(const char *name, int flags); +typedef int (*dss_remove_device)(const char *name); +typedef int (*dss_close_device)(int handle); +typedef int (*dss_exist_device)(const char *name, bool *result); +typedef int (*dss_create_device_dir)(const char *name); +typedef int (*dss_exist_device_dir)(const char *name, bool *result); +typedef dss_dir_handle (*dss_open_device_dir)(const char *name); +typedef int (*dss_read_device_dir)(dss_dir_handle dir, dss_dirent_t *item, dss_dir_item_t *result); +typedef int (*dss_close_device_dir)(dss_dir_handle dir); +typedef int (*dss_remove_device_dir)(const char *name); +typedef int (*dss_rename_device)(const char *src, const char *dst); +typedef int (*dss_check_device_size)(int size); +typedef int (*dss_align_device_size)(int size); +typedef int (*dss_link_device)(const char *oldpath, const char *newpath); +typedef int (*dss_unlink_device)(const char *path); +typedef int (*dss_exist_device_link)(const char *path, bool *result); +typedef int (*dss_device_name)(int handle, char *fname, size_t fname_size); +typedef int (*dss_read_device_link)(const char *path, char *buf, int bufsize); +typedef int (*dss_stat_device)(const char *path, dss_stat_info_t item); +typedef int (*dss_lstat_device)(const char *path, dss_stat_info_t item); +typedef int (*dss_fstat_device)(int handle, dss_stat_info_t item); +typedef int (*dss_set_status)(dss_server_status_t status); +typedef void (*dss_device_size)(const char *fname, long *fsize); +typedef void (*dss_error_info)(int *errorcode, const char **errormsg); +typedef void (*dss_svr_path)(const char *conn_path); +typedef void (*dss_log_callback)(dss_log_output cb_log_output); +typedef int (*dss_version)(void); +typedef struct st_dss_device_op_t { + void *handle; + dss_create_device dss_create; + dss_remove_device dss_remove; + dss_open_device dss_open; + dss_read_device dss_read; + dss_pread_device dss_pread; + dss_write_device dss_write; + dss_pwrite_device dss_pwrite; + dss_seek_device dss_seek; + dss_trucate_device dss_truncate; + dss_close_device dss_close; + dss_exist_device dss_exist; + dss_create_device_dir dss_create_dir; + dss_exist_device_dir dss_exist_dir; + dss_rename_device dss_rename; + dss_check_device_size dss_check_size; + dss_align_device_size dss_align_size; + dss_device_size dss_fsize; + dss_device_name dss_fname; + dss_error_info dss_get_error; + dss_open_device_dir dss_open_dir; + dss_read_device_dir dss_read_dir; + dss_close_device_dir dss_close_dir; + dss_remove_device_dir dss_remove_dir; + dss_link_device dss_link; + dss_unlink_device dss_unlink; + dss_exist_device_link dss_exist_link; + dss_read_device_link dss_read_link; + dss_stat_device dss_stat; + dss_lstat_device dss_lstat; + dss_fstat_device dss_fstat; + dss_set_status dss_set_server_status; + dss_svr_path dss_set_svr_path; + dss_log_callback dss_register_log_callback; + dss_version dss_get_version; +} dss_device_op_t; + +void dss_register_log_callback(dss_log_output cb_log_output); + +#endif // DSS_ADAPTOR_H \ No newline at end of file diff --git a/src/include/storage/dss/dss_api_def.h b/src/include/storage/dss/dss_api_def.h new file mode 100644 index 000000000..e326a8d2a --- /dev/null +++ b/src/include/storage/dss/dss_api_def.h @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2022 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * dss_api_def.h + * Defines the DSS data structure. + * + * + * IDENTIFICATION + * src/include/storage/dss/dss_api_def.h + * + * --------------------------------------------------------------------------------------- + */ + +#ifndef __DSS_API_DEF_H +#define __DSS_API_DEF_H + +#include "time.h" + +struct __dss_dir; +typedef struct __dss_dir *dss_dir_handle; + +typedef enum en_dss_log_level { + DSS_LOG_LEVEL_ERROR = 0, // error conditions + DSS_LOG_LEVEL_WARN, // warning conditions + DSS_LOG_LEVEL_INFO, // informational messages + DSS_LOG_LEVEL_COUNT, +} dss_log_level_t; + +typedef enum en_dss_log_id { + DSS_LOG_ID_RUN = 0, + DSS_LOG_ID_DEBUG, + DSS_LOG_ID_COUNT, +} dss_log_id_t; + +#define DSS_SEEK_MAXWR 3 /* Used for seek actual file size for openGauss */ +#define DSS_MAX_NAME_LEN 64 /* Consistent with dss_defs.h */ +#define DSS_FILE_PATH_MAX_LENGTH (SIZE_K(1) + 1) /* Consistent with dss_defs.h */ +/* make the dss handle start from this value, to be distinguished from file system handle value */ +#define DSS_HANDLE_BASE 0x20000000 +#define DSS_CONN_NEVER_TIMEOUT (-1) +#define DSS_VERSION_MAX_LEN 256 + +typedef enum en_dss_item_type { DSS_PATH, DSS_FILE, DSS_LINK } dss_item_type_t; + +typedef struct st_dss_dirent { + dss_item_type_t d_type; + char d_name[DSS_MAX_NAME_LEN]; +} dss_dirent_t; + +typedef enum en_dss_server_status { + DSS_STATUS_NORMAL = 0, + DSS_STATUS_READONLY, + DSS_STATUS_READWRITE, +} dss_server_status_t; + +typedef struct st_dss_stat { + unsigned long long size; + unsigned long long written_size; + time_t create_time; + time_t update_time; + char name[DSS_MAX_NAME_LEN]; + dss_item_type_t type; +} dss_stat_t; + +typedef struct st_dss_dirent *dss_dir_item_t; +typedef struct st_dss_stat *dss_stat_info_t; + +typedef void (*dss_log_output)(dss_log_id_t log_type, dss_log_level_t log_level, const char *code_file_name, + unsigned int code_line_num, const char *module_name, const char *format, ...); + +#define DSS_LOCAL_MAJOR_VER_WEIGHT 1000000 +#define DSS_LOCAL_MINOR_VER_WEIGHT 1000 +#define DSS_LOCAL_MAJOR_VERSION 0 +#define DSS_LOCAL_MINOR_VERSION 0 +#define DSS_LOCAL_VERSION 2 + +#define DSS_SUCCESS 0 +#define DSS_ERROR (-1) + +/** 1.DSS range [2000, 2500] * + * 2.ERR_DSS_SUBMODEL_ACTION_DETAIL, _DETAIL is optional which indicates the error cause. + */ +#define ERR_DSS_FLOOR 2000 + +// vg error [2000, 2060) +#define ERR_DSS_VG_CREATE 2000 +#define ERR_DSS_VG_LOCK 2010 +#define ERR_DSS_VG_REMOVE 2020 +#define ERR_DSS_VG_CHECK 2030 +#define ERR_DSS_VG_CHECK_NOT_INIT 2031 +#define ERR_DSS_VG_NOT_EXIST 2040 + +// volumn error [2060, 2130) +#define ERR_DSS_VOLUME_OPEN 2060 +#define ERR_DSS_VOLUME_READ 2070 +#define ERR_DSS_VOLUME_WRITE 2080 +#define ERR_DSS_VOLUME_SEEK 2090 +#define ERR_DSS_VOLUME_ADD 2100 +#define ERR_DSS_VOLUME_ADD_EXISTED 2101 +#define ERR_DSS_VOLUME_REMOVE 2110 +#define ERR_DSS_VOLUME_REMOVE_NOEXIST 2111 +#define ERR_DSS_VOLUME_REMOVE_NONEMPTY 2112 +#define ERR_DSS_VOLUME_REMOVE_SUPER_BLOCK 2113 + +// file error [2130, 2230) +#define ERR_DSS_FILE_SEEK 2130 +#define ERR_DSS_FILE_REMOVE 2140 +#define ERR_DSS_FILE_REMOVE_OPENING 2141 +#define ERR_DSS_FILE_RENAME 2150 +#define ERR_DSS_FILE_NOT_EXIST 2190 +#define ERR_DSS_FILE_OPENING_REMOTE 2191 +#define ERR_DSS_FILE_TYPE_MISMATCH 2192 +#define ERR_DSS_FILE_PATH_ILL 2193 + +// dir error [2230, 2280) +#define ERR_DSS_DIR_REMOVE 2230 +#define ERR_DSS_DIR_REMOVE_NOT_EMPTY 2231 +#define ERR_DSS_DIR_CREATE 2240 +#define ERR_DSS_DIR_CREATE_DUPLICATED 2241 +#define ERR_DSS_DIR_NOT_EXIST 2270 + +// link error [2280, 2300) +#define ERR_DSS_LINK_READ 2280 +#define ERR_DSS_LINK_READ_NOT_LINK 2281 + +// config error [2300, 2320) +#define ERR_DSS_CONFIG_FILE_OVERSIZED 2300 +#define ERR_DSS_CONFIG_LOAD 2301 +#define ERR_DSS_CONFIG_LINE_OVERLONG 2302 + +// redo error [2320, 2350) +#define ERR_DSS_REDO_ILL 2320 + +// Basic Data Structure error [2350, 2400) +#define ERR_DSS_OAMAP_INSERT 2350 +#define ERR_DSS_OAMAP_INSERT_DUP_KEY 2351 +#define ERR_DSS_OAMAP_FETCH 2352 +#define ERR_DSS_SKLIST_ERR 2360 +#define ERR_DSS_SKLIST_NOT_INIT 2361 +#define ERR_DSS_SKLIST_NOT_EXIST 2362 +#define ERR_DSS_SKLIST_EXIST 2363 +#define ERR_DSS_SHM_CREATE 2370 +#define ERR_DSS_SHM_CHECK 2371 +#define ERR_DSS_GA_INIT 2380 +#define ERR_DSS_SESSION_INVALID_ID 2390 +#define ERR_DSS_SESSION_CREATE 2391 + +// other error [2400, 2500) +#define ERR_DSS_INVALID_PARAM 2400 +#define ERR_DSS_NO_SPACE 2401 +#define ERR_DSS_ENV_NOT_INITIALIZED 2402 +#define ERR_DSS_CLI_EXEC_FAIL 2403 +#define ERR_DSS_FNODE_CHECK 2404 +#define ERR_DSS_LOCK_TIMEOUT 2405 +#define ERR_DSS_SERVER_IS_DOWN 2406 +#define ERR_DSS_CHECK_SIZE 2407 +#define ERR_DSS_MES_ILL 2408 +#define ERR_DSS_STRING_TOO_LONG 2409 +#define ERR_DSS_TCP_TIMEOUT_REMAIN 2410 +#define ERR_DSS_UDS_INVALID_URL 2411 +#define ERR_DSS_RECV_MSG_FAILED 2412 +#define ERR_DSS_LINK_NOT_EXIST 2413 + +#define ERR_DSS_CEIL 2500 + +#endif // __DSS_API_DEF_H diff --git a/src/include/storage/dss/dss_log.h b/src/include/storage/dss/dss_log.h new file mode 100644 index 000000000..f30b37133 --- /dev/null +++ b/src/include/storage/dss/dss_log.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * dss_log.h + * + * IDENTIFICATION + * src/include/storage/dss/dss_log.h + * + * --------------------------------------------------------------------------------------- + */ + +#ifndef DSS_LOG_H +#define DSS_LOG_H + +#define DMS_LOGGER_BUFFER_SIZE 2048 + +void dss_log_init(void); + +typedef enum dss_log_level { + LOG_RUN_ERR_LEVEL = 0, + LOG_RUN_WAR_LEVEL, + LOG_RUN_INF_LEVEL, + LOG_DEBUG_ERR_LEVEL, + LOG_DEBUG_WAR_LEVEL, + LOG_DEBUG_INF_LEVEL +} dss_log_level; + +#endif /* DSS_LOG_H */ \ No newline at end of file diff --git a/src/include/storage/dss/fio_dss.h b/src/include/storage/dss/fio_dss.h new file mode 100644 index 000000000..7d366eb85 --- /dev/null +++ b/src/include/storage/dss/fio_dss.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2022 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * fio_dss.h + * DSS File System Adapter Header File. + * + * + * IDENTIFICATION + * src/include/storage/dss/fio_dss.h + * + * --------------------------------------------------------------------------------------- + */ + +#ifndef FIO_DSS_H +#define FIO_DSS_H + +#include "c.h" +#include +#include +#include +#include "storage/dss/dss_adaptor.h" + +void dss_device_register(dss_device_op_t *dss_device_op, bool enable_dss); + +void dss_set_errno(int *errcode); +bool dss_exist_file(const char *file_name); +int dss_access_file(const char *file_name, int mode); +int dss_create_dir(const char *name, mode_t mode); +bool dss_exist_dir(const char *name); +int dss_open_dir(const char *name, DIR **dir_handle); +int dss_read_dir(DIR *dir_handle, struct dirent **result); +int dss_close_dir(DIR *dir_handle); +int dss_remove_dir(const char *name); +int dss_rename_file(const char *src, const char *dst); +int dss_remove_file(const char *name); +int dss_open_file(const char *name, int flags, mode_t mode, int *handle); +int dss_fopen_file(const char *name, const char* mode, FILE **stream); +int dss_close_file(int handle); +int dss_fclose_file(FILE *stream); +ssize_t dss_read_file(int handle, void *buf, size_t size); +ssize_t dss_pread_file(int handle, void *buf, size_t size, off_t offset); +size_t dss_fread_file(void *buf, size_t size, size_t nmemb, FILE *stream); +ssize_t dss_write_file(int handle, const void *buf, size_t size); +ssize_t dss_pwrite_file(int handle, const void *buf, size_t size, off_t offset); +size_t dss_fwrite_file(const void *buf, size_t size, size_t count, FILE *stream); +off_t dss_seek_file(int handle, off_t offset, int origin); +int dss_fseek_file(FILE *stream, long offset, int whence); +long dss_ftell_file(FILE *stream); +void dss_rewind_file(FILE *stream); +int dss_fflush_file(FILE *stream); +int dss_sync_file(int handle); +int dss_truncate_file(int handle, off_t keep_size); +int dss_ftruncate_file(FILE *stream, off_t keep_size); +off_t dss_get_file_size(const char *fname); +int dss_fallocate_file(int handle, int mode, off_t offset, off_t len); +int dss_link(const char *src, const char *dst); +int dss_unlink_target(const char *name); +bool dss_exist_link(const char *name); +ssize_t dss_read_link(const char *path, char *buf, size_t buf_size); +int dss_setvbuf(FILE *stream, char *buf, int mode, size_t size); +int dss_feof(FILE *stream); +int dss_ferror(FILE *stream); +int dss_fileno(FILE *stream); +int dss_stat_file(const char *path, struct stat *buf); +int dss_lstat_file(const char *path, struct stat *buf); +int dss_fstat_file(int handle, struct stat *buf); +int dss_chmod_file(const char* path, mode_t mode); +int dss_set_server_status_wrapper(bool is_master); +int dss_remove_dev(const char *name); + +#endif // FIO_DSS_H \ No newline at end of file diff --git a/src/include/storage/file/fio_device.h b/src/include/storage/file/fio_device.h new file mode 100644 index 000000000..47a086614 --- /dev/null +++ b/src/include/storage/file/fio_device.h @@ -0,0 +1,539 @@ +/* + * Copyright (c) 2022 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * fio_device.h + * Storage Adapter Header File. + * + * + * IDENTIFICATION + * src/include/storage/file/fio_device.h + * + * --------------------------------------------------------------------------------------- + */ + +#ifndef FIO_DEVICE_H +#define FIO_DEVICE_H + +#include +#include +#include +#include +#include +#include +#include "storage/dss/fio_dss.h" +#include "storage/file/fio_device_com.h" + +bool is_dss_type(device_type_t type); +bool is_dss_file(const char *name); +bool is_dss_file_dec(FILE *stream); +bool is_dss_fd(int handle); +bool is_file_delete(int err); +bool is_file_exist(int err); +device_type_t fio_device_type(const char *name); + +typedef struct { + uint64 magic_head; + char filename[MAX_FILE_NAME_LEN]; + dss_dir_handle dir_handle; + struct dirent ret; /* Used to return to caller */ +} DSS_DIR; + +typedef struct { + uint64 magic_head; + off_t fsize; + int errcode; + int handle; +} DSS_STREAM; + +static inline int rename_dev(const char *oldpath, const char *newpath) +{ + if (is_dss_file(oldpath)) { + if (dss_rename_file(oldpath, newpath) != GS_SUCCESS) { + return -1; + } + return 0; + } else { + return rename(oldpath, newpath); + } +} + +static inline int open_dev(const char *pathname, int flags, mode_t mode) +{ + int handle; + if (is_dss_file(pathname)) { + if (dss_open_file(pathname, flags, mode, &handle) != GS_SUCCESS) { + return -1; + } + return handle; + } else { + return open(pathname, flags, mode); + } +} + +static inline int close_dev(int fd) +{ + if (is_dss_fd(fd)) { + if (dss_close_file(fd) != GS_SUCCESS) { + return -1; + } + return 0; + } else { + return close(fd); + } +} + +static inline ssize_t read_dev(int fd, void *buf, size_t count) +{ + if (is_dss_fd(fd)) { + return dss_read_file(fd, buf, count); + } else { + return read(fd, buf, count); + } +} + +static inline ssize_t pread_dev(int fd, void *buf, size_t count, off_t offset) +{ + if (is_dss_fd(fd)) { + return dss_pread_file(fd, buf, count, offset); + } else { + return pread(fd, buf, count, offset); + } +} + +static inline ssize_t write_dev(int fd, const void *buf, size_t count) +{ + if (is_dss_fd(fd)) { + return dss_write_file(fd, buf, count); + } else { + return write(fd, buf, count); + } +} + +static inline ssize_t pwrite_dev(int fd, const void *buf, size_t count, off_t offset) +{ + if (is_dss_fd(fd)) { + return dss_pwrite_file(fd, buf, count, offset); + } else { + return pwrite(fd, buf, count, offset); + } +} + +static inline off_t lseek_dev(int fd, off_t offset, int whence) +{ + if (is_dss_fd(fd)) { + return dss_seek_file(fd, offset, whence); + } else { + return lseek(fd, offset, whence); + } +} + +static inline int fsync_dev(int fd) +{ + if (is_dss_fd(fd)) { + if (dss_sync_file(fd) != GS_SUCCESS) { + return -1; + } + return 0; + } else { + return fsync(fd); + } +} + +static inline int fallocate_dev(int fd, int mode, off_t offset, off_t len) +{ + if (is_dss_fd(fd)) { + if (dss_fallocate_file(fd, mode, offset, len) != GS_SUCCESS) { + return -1; + } + return 0; + } else { + return fallocate(fd, mode, offset, len); + } +} + +static inline int access_dev(const char *pathname, int mode) +{ + if (is_dss_file(pathname)) { + if (!dss_exist_file(pathname) && !dss_exist_dir(pathname)) { + return -1; + } + return dss_access_file(pathname, mode); + } else { + return access(pathname, mode); + } +} + +static inline int mkdir_dev(const char *pathname, mode_t mode) +{ + if (is_dss_file(pathname)) { + if (dss_create_dir(pathname, mode) != GS_SUCCESS) { + return -1; + } + return 0; + } else { + return mkdir(pathname, mode); + } +} + +static inline int rmdir_dev(const char *pathname) +{ + if (is_dss_file(pathname)) { + if (dss_remove_dir(pathname) != GS_SUCCESS) { + return -1; + } + return 0; + } else { + return rmdir(pathname); + } +} + + +static inline int symlink_dev(const char *target, const char *linkpath) +{ + if (is_dss_file(target)) { + if (dss_link(target, linkpath) != GS_SUCCESS) { + return -1; + } + return 0; + } else { + return symlink(target, linkpath); + } +} + +static inline ssize_t readlink_dev(const char *pathname, char *buf, size_t bufsiz) +{ + if (is_dss_file(pathname)) { + if (!dss_exist_link(pathname)) { + return -1; + } + + return dss_read_link(pathname, buf, bufsiz); + } else { + return readlink(pathname, buf, bufsiz); + } +} + +static inline int chmod_dev(const char *pathname, mode_t mode) +{ + if (is_dss_file(pathname)) { + return dss_chmod_file(pathname, mode); + } else { + return chmod(pathname, mode); + } +} + +static inline int unlink_dev(const char *pathname) +{ + if (is_dss_file(pathname)) { + if (dss_remove_dev(pathname) != GS_SUCCESS) { + return -1; + } + return 0; + } else { + return unlink(pathname); + } +} + +static inline int lstat_dev(const char * pathname, struct stat * statbuf) +{ + if (is_dss_file(pathname)) { + if (!dss_exist_file(pathname) && !dss_exist_dir(pathname)) { + errno = ENOENT; + return -1; + } + return dss_lstat_file(pathname, statbuf); + } else { + return lstat(pathname, statbuf); + } +} + +static inline int stat_dev(const char *pathname, struct stat *statbuf) +{ + if (is_dss_file(pathname)) { + if (!dss_exist_file(pathname) && !dss_exist_dir(pathname)) { + errno = ENOENT; + return -1; + } + return dss_stat_file(pathname, statbuf); + } else { + return stat(pathname, statbuf); + } +} + +static inline int fstat_dev(int fd, struct stat *statbuf) +{ + if (is_dss_fd(fd)) { + return dss_fstat_file(fd, statbuf); + } else { + return fstat(fd, statbuf); + } +} + +static inline FILE *fopen_dev(const char *pathname, const char *mode) +{ + if (unlikely(is_dss_file(pathname))) { + FILE *stream = NULL; + if (dss_fopen_file(pathname, mode, &stream) != GS_SUCCESS) { + return NULL; + } + return stream; + } else { + return fopen(pathname, mode); + } +} + +static inline int remove_dev(const char *pathname) +{ + if (is_dss_file(pathname)) { + if (dss_remove_dev(pathname) != GS_SUCCESS) { + return -1; + } + return 0; + } else { + return remove(pathname); + } +} + +static inline int fclose_dev(FILE *stream) +{ + DSS_STREAM *dss_stream = (DSS_STREAM *)stream; + if (unlikely(dss_stream->magic_head == DSS_MAGIC_NUMBER)) { + if (dss_fclose_file(stream) != GS_SUCCESS) { + return -1; + } + return 0; + } else { + return fclose(stream); + } +} + +static inline size_t fread_dev(void *ptr, size_t size, size_t nmemb, FILE *stream) +{ + DSS_STREAM *dss_stream = (DSS_STREAM *)stream; + if (unlikely(dss_stream->magic_head == DSS_MAGIC_NUMBER)) { + return dss_fread_file(ptr, size, nmemb, stream); + } else { + return fread(ptr, size, nmemb, stream); + } +} + +static inline size_t fwrite_dev(const void *ptr, size_t size, size_t nmemb, FILE *stream) +{ + DSS_STREAM *dss_stream = (DSS_STREAM *)stream; + if (unlikely(dss_stream->magic_head == DSS_MAGIC_NUMBER)) { + return dss_fwrite_file(ptr, size, nmemb, stream); + } else { + return fwrite(ptr, size, nmemb, stream); + } +} + +static inline int fseek_dev(FILE *stream, long offset, int whence) +{ + DSS_STREAM *dss_stream = (DSS_STREAM *)stream; + if (unlikely(dss_stream->magic_head == DSS_MAGIC_NUMBER)) { + return dss_fseek_file(stream, offset, whence); + } else { + return fseek(stream, offset, whence); + } +} + +static inline long ftell_dev(FILE *stream) +{ + DSS_STREAM *dss_stream = (DSS_STREAM *)stream; + if (unlikely(dss_stream->magic_head == DSS_MAGIC_NUMBER)) { + return dss_ftell_file(stream); + } else { + return ftell(stream); + } +} + +static inline int fflush_dev(FILE *stream) +{ + DSS_STREAM *dss_stream = (DSS_STREAM *)stream; + if (unlikely(dss_stream->magic_head == DSS_MAGIC_NUMBER)) { + /* nothing to do, because DSS will enable O_SYNC and O_DIRECT for all IO */ + return 0; + } else { + return fflush(stream); + } +} + +static inline int ftruncate_dev(int fd, off_t length) +{ + if (unlikely(is_dss_fd(fd))) { + return dss_truncate_file(fd, length); + } else { + return ftruncate(fd, length); + } +} + +static inline int feof_dev(FILE *stream) +{ + DSS_STREAM *dss_stream = (DSS_STREAM *)stream; + if (unlikely(dss_stream->magic_head == DSS_MAGIC_NUMBER)) { + return dss_feof(stream); + } else { + return feof(stream); + } +} + +static inline int ferror_dev(FILE *stream) +{ + DSS_STREAM *dss_stream = (DSS_STREAM *)stream; + if (unlikely(dss_stream->magic_head == DSS_MAGIC_NUMBER)) { + return dss_ferror(stream); + } else { + return ferror(stream); + } +} + +static inline int fileno_dev(FILE *stream) +{ + DSS_STREAM *dss_stream = (DSS_STREAM *)stream; + if (unlikely(dss_stream->magic_head == DSS_MAGIC_NUMBER)) { + return dss_fileno(stream); + } else { + return fileno(stream); + } +} + +static inline int setvbuf_dev(FILE *stream, char *buf, int type, size_t size) +{ + DSS_STREAM *dss_stream = (DSS_STREAM *)stream; + if (unlikely(dss_stream->magic_head == DSS_MAGIC_NUMBER)) { + /* nothing to do in DSS */ + return 0; + } else { + return setvbuf(stream, buf, type, size); + } +} + +static inline void rewind_dev(FILE *stream) +{ + DSS_STREAM *dss_stream = (DSS_STREAM *)stream; + if (unlikely(dss_stream->magic_head == DSS_MAGIC_NUMBER)) { + dss_rewind_file(stream); + } else { + rewind(stream); + } +} + +static inline DIR *opendir_dev(const char *name) +{ + if (unlikely(is_dss_file(name))) { + DIR *dir = NULL; + if (dss_open_dir(name, &dir) != GS_SUCCESS) { + return NULL; + } + return dir; + } else { + return opendir(name); + } +} + +static inline struct dirent *readdir_dev(DIR *dirp) +{ + DSS_DIR *dss_dir = (DSS_DIR *)dirp; + + if (dirp == NULL) { + return NULL; + } + + if (unlikely(dss_dir->magic_head == DSS_MAGIC_NUMBER)) { + struct dirent *de = NULL; + (void)dss_read_dir(dirp, &de); + return de; + } else { + return readdir(dirp); + } +} + +static inline int readdir_r_dev(DIR *dirp, struct dirent *entry, struct dirent **result) +{ + DSS_DIR *dss_dir = (DSS_DIR *)dirp; + if (unlikely(dss_dir->magic_head == DSS_MAGIC_NUMBER)) { + if (dss_read_dir(dirp, result) != GS_SUCCESS) { + return 1; + } + return 0; + } else { + /** + * In arm environment, readdir_r warning about deprecated-declarations, + * but for thread safe keep using readdir_r. + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + return readdir_r(dirp, entry, result); +#pragma GCC diagnostic pop + } +} + +static inline int closedir_dev(DIR *dirp) +{ + DSS_DIR *dss_dir = (DSS_DIR *)dirp; + if (unlikely(dss_dir->magic_head == DSS_MAGIC_NUMBER)) { + if (dss_close_dir(dirp) != GS_SUCCESS) { + return -1; + } + return 0; + } else { + return closedir(dirp); + } +} + +/* declare macros to replace file system API */ +#define rename(oldpath, newpath) rename_dev((oldpath), (newpath)) +#define open(pathname, flags, mode) open_dev((pathname), (flags), (mode)) +#define close(fd) close_dev(fd) +#define read(fd, buf, count) read_dev((fd), (buf), (count)) +#define pread(fd, buf, count, offset) pread_dev((fd), (buf), (count), (offset)) +#define pread64(fd, buf, count, offset) pread_dev((fd), (buf), (count), (offset)) +#define write(fd, buf, count) write_dev((fd), (buf), (count)) +#define pwrite(fd, buf, count, offset) pwrite_dev((fd), (buf), (count), (offset)) +#define pwrite64(fd, buf, count, offset) pwrite_dev((fd), (buf), (count), (offset)) +#define lseek(fd, offset, whence) lseek_dev((fd), (offset), (whence)) +#define lseek64(fd, offset, whence) lseek_dev((fd), (offset), (whence)) +#define fsync(fd) fsync_dev(fd) +#define fallocate(fd, mode, offset, len) fallocate_dev((fd), (mode), (offset), (len)) +#define access(pathname, mode) access_dev((pathname), (mode)) +#define mkdir(pathname, mode) mkdir_dev((pathname), (mode)) +#define rmdir(pathname) rmdir_dev(pathname) +#define symlink(target, linkpath) symlink_dev((target), (linkpath)) +#define readlink(pathname, buf, bufsiz) readlink_dev((pathname), (buf), (bufsiz)) +#define chmod(pathname, mode) chmod_dev((pathname), (mode)) +#define unlink(pathname) unlink_dev(pathname) +#define stat(pathname, statbuf) stat_dev(pathname, statbuf) +#define lstat(pathname, statbuf) lstat_dev(pathname, statbuf) +#define ftruncate(fd, length) ftruncate_dev((fd), (length)) +#define fopen(pathname, mode) fopen_dev((pathname), (mode)) +#define fclose(stream) fclose_dev((stream)) +#define fread(ptr, size, nmemb, stream) fread_dev((ptr), (size), (nmemb), (stream)) +#define fwrite(ptr, size, nmemb, stream) fwrite_dev((ptr), (size), (nmemb), (stream)) +#define fseek(stream, offset, whence) fseek_dev((stream), (offset), (whence)) +#define ftell(stream) ftell_dev((stream)) +#define fflush(stream) fflush_dev((stream)) +#define feof(stream) feof_dev((stream)) +#define ferror(stream) ferror_dev((stream)) +#define fileno(stream) fileno_dev((stream)) +#define setvbuf(stream, buf, type, size) setvbuf_dev((stream), (buf), (type), (size)) +#define rewind(stream) rewind_dev((stream)) +#define opendir(name) opendir_dev((name)) +#define readdir(dirp) readdir_dev((dirp)) +#define readdir_r(dirp, entry, result) readdir_r_dev((dirp), (entry), (result)) +#define closedir(dirp) closedir_dev((dirp)) +#define fstat(fd, statbuf) fstat_dev(fd, statbuf) +#define remove(pathname) remove_dev(pathname) + +#endif /* FIO_DEVICE_H */ diff --git a/src/include/storage/file/fio_device_com.h b/src/include/storage/file/fio_device_com.h new file mode 100644 index 000000000..007a80ef4 --- /dev/null +++ b/src/include/storage/file/fio_device_com.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2022 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * fio_device_com.h + * Storage Adapter Header File. + * + * + * IDENTIFICATION + * src/include/storage/file/fio_device_com.h + * + * --------------------------------------------------------------------------------------- + */ + +#ifndef FIO_DEVICE_COM_H +#define FIO_DEVICE_COM_H + +#include "c.h" +#include +#include +#include + +typedef enum en_device_type { + DEV_TYPE_FILE = 0, + DEV_TYPE_DSS, + DEV_TYPE_NUM, + DEV_TYPE_INVALID +} device_type_t; + +extern bool g_enable_dss; +extern uint64 XLogSegmentSize; + +#define INVALID_DEVICE_SIZE 0x7FFFFFFFFFFFFFFF +#define FILE_EXTEND_STEP_SIZE 2097152 // 2MB +#define DSS_XLOG_SEG_SIZE 1073741824 // 1GB xlog seg file size for DSS only +#define DSS_BATCH_SIZE 2048 +#define DSS_SLRU_SEGMENT_SZIE 16777216 + +#define DSS_MAGIC_NUMBER 0xFEDCBA9876543210 +#define MAX_FILE_NAME_LEN 64 +#define DSS_MAX_MXACTOFFSET 1024 +#define DSS_MAX_MXACTMEMBER 2048 + +#define GS_SUCCESS 0 +#define GS_ERROR (-1) +#define GS_TIMEDOUT 1 + +#endif /* FIO_DEVICE_COM_H */ diff --git a/src/include/storage/lock/lwlock.h b/src/include/storage/lock/lwlock.h index 229ee502b..0ab2e3a2b 100644 --- a/src/include/storage/lock/lwlock.h +++ b/src/include/storage/lock/lwlock.h @@ -132,7 +132,7 @@ const struct LWLOCK_PARTITION_DESC LWLockPartInfo[] = { #define NUM_SESSION_ROLEID_PARTITIONS 128 #ifdef WIN32 -#define NUM_INDIVIDUAL_LWLOCKS 116 /* num should be same as lwlockname.txt */ +#define NUM_INDIVIDUAL_LWLOCKS 132 /* num should be same as lwlockname.txt */ #endif /* Number of partitions the global package runtime state hashtable */ diff --git a/src/include/storage/pmsignal.h b/src/include/storage/pmsignal.h index 5fbead2d9..04d9257cd 100644 --- a/src/include/storage/pmsignal.h +++ b/src/include/storage/pmsignal.h @@ -50,6 +50,9 @@ typedef enum { PMSIGNAL_START_LOGICAL_READ_WORKER,/* start logical read worker */ PMSIGNAL_START_PARALLEL_DECODE_WORKER,/* start parallel decoding worker */ PMSIGNAL_START_APPLY_WORKER, /* start a apply worker */ + PMSIGNAL_DMS_TRIGGERFAILOVER, /* failover for reform */ + PMSIGNAL_DMS_SWITCHOVER_PROMOTE, /* dms standby switchover promote */ + PMSIGNAL_DMS_REFORM, /* dms reform start during PM_RUN */ NUM_PMSIGNALS /* Must be last value of enum! */ } PMSignalReason; diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 4dbfdf708..22b4e9ae6 100755 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -441,9 +441,22 @@ const int MAX_COMPACTION_THREAD_NUM = 10; (g_instance.attr.attr_storage.dcf_attr.enable_dcf ? \ g_instance.attr.attr_storage.dcf_attr.dcf_max_workers : 0) +#define NUM_DMS_REFORM_CALLLBACK_PROCS (3) +#define NUM_DMS_LSNR_CALLBACK_PROC (1) +#define NUM_DMS_RDMA_THREAD_CNT (g_instance.attr.attr_storage.dms_attr.work_thread_count * 2) +#define NUM_DMS_CALLBACK_PROCS \ + (g_instance.attr.attr_storage.dms_attr.enable_dms ? \ + (g_instance.attr.attr_storage.dms_attr.channel_count * g_instance.attr.attr_storage.dms_attr.inst_count + \ + ((!strcasecmp(g_instance.attr.attr_storage.dms_attr.interconnect_type, "TCP"))? \ + g_instance.attr.attr_storage.dms_attr.work_thread_count : \ + NUM_DMS_RDMA_THREAD_CNT) + \ + NUM_DMS_LSNR_CALLBACK_PROC + \ + NUM_DMS_REFORM_CALLLBACK_PROCS ) : 0) + #define GLOBAL_ALL_PROCS \ (g_instance.shmem_cxt.MaxBackends + \ NUM_CMAGENT_PROCS + NUM_AUXILIARY_PROCS + NUM_DCF_CALLBACK_PROCS + \ + NUM_DMS_CALLBACK_PROCS + \ (g_instance.attr.attr_storage.max_prepared_xacts * NUM_TWOPHASE_PARTITIONS)) #define GLOBAL_MAX_SESSION_NUM (2 * g_instance.shmem_cxt.MaxBackends) diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h index d05013e75..5a8370506 100755 --- a/src/include/storage/procarray.h +++ b/src/include/storage/procarray.h @@ -190,3 +190,4 @@ extern void ResetProcXidCache(PGPROC* proc, bool needlock); // For GTT extern TransactionId ListAllThreadGttFrozenxids(int maxSize, ThreadId *pids, TransactionId *xids, int *n); extern TransactionId GetReplicationSlotCatalogXmin(); +extern void GetOldestGlobalProcXmin(TransactionId *globalProcXmin); diff --git a/src/include/storage/smgr/fd.h b/src/include/storage/smgr/fd.h index 47b2c292d..e8122e7af 100644 --- a/src/include/storage/smgr/fd.h +++ b/src/include/storage/smgr/fd.h @@ -44,6 +44,7 @@ #include "storage/smgr/relfilenode.h" #include "storage/page_compression.h" #include "postmaster/aiocompleter.h" +#include "storage/file/fio_device_com.h" /* * FileSeek uses the standard UNIX lseek(2) flags. @@ -66,19 +67,6 @@ typedef struct DataFileIdCacheEntry { enum FileExistStatus { FILE_EXIST, FILE_NOT_EXIST, FILE_NOT_REG }; -/* - * On Windows, we have to interpret EACCES as possibly meaning the same as - * ENOENT, because if a file is unlinked-but-not-yet-gone on that platform, - * that's what you get. Ugh. This code is designed so that we don't - * actually believe these cases are okay without further evidence (namely, - * a pending fsync request getting canceled ... see mdsync). - */ -#ifndef WIN32 -#define FILE_POSSIBLY_DELETED(err) ((err) == ENOENT) -#else -#define FILE_POSSIBLY_DELETED(err) ((err) == ENOENT || (err) == EACCES) -#endif - /* * prototypes for functions in fd.c */ @@ -198,5 +186,6 @@ extern bool repair_deleted_file_check(RelFileNodeForkNum fileNode, int fd); // #define PG_TEMP_FILES_DIR "pgsql_tmp" #define PG_TEMP_FILE_PREFIX "pgsql_tmp" +#define SS_PG_TEMP_FILES_DIR "ss_pgsql_tmp" #endif /* FD_H */ diff --git a/src/include/storage/smgr/segment.h b/src/include/storage/smgr/segment.h index bdc82438c..a9d2b001a 100644 --- a/src/include/storage/smgr/segment.h +++ b/src/include/storage/smgr/segment.h @@ -127,6 +127,8 @@ DecodedXLogBlockOp XLogAtomicDecodeBlockData(char *data, int len); * APIs used for segment store metadata. */ BufferDesc *SegBufferAlloc(SegSpace *spc, RelFileNode rnode, ForkNumber forkNum, BlockNumber blockNum, bool *foundPtr); +Buffer ReadSegBufferForDMS(BufferDesc* bufHdr, ReadBufferMode mode, SegSpace *spc = NULL); +void ReadSegBufferForCheck(BufferDesc* bufHdr, ReadBufferMode mode, SegSpace *spc, Block bufBlock); Buffer ReadBufferFast(SegSpace *spc, RelFileNode rnode, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode); void SegReleaseBuffer(Buffer buffer); void SegUnlockReleaseBuffer(Buffer buffer); @@ -139,6 +141,10 @@ void FlushOneSegmentBuffer(Buffer buffer); void FlushOneBufferIncludeDW(BufferDesc *buf_desc); Buffer try_get_moved_pagebuf(RelFileNode *rnode, int forknum, BlockNumber logic_blocknum); +void SetInProgressFlags(BufferDesc *bufDesc, bool input); +bool HasInProgressBuf(void); +void SegTerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits); + /* Segment Remain API */ enum StatRemainExtentType { ALLOC_SEGMENT = 1, @@ -160,8 +166,10 @@ typedef struct ExtentTag { XLogRecPtr lsn; } ExtentTag; -#define XLOG_REMAIN_SEGS_FILE_PATH "global/pg_remain_segs" -#define XLOG_REMAIN_SEGS_BACKUP_FILE_PATH "global/pg_remain_segs.backup" +#define XLOG_REMAIN_SEGS_FILE_PATH (g_instance.attr.attr_storage.dss_attr.ss_enable_dss ? \ + ((char*)"pg_remain_segs") : ((char*)"global/pg_remain_segs")) +#define XLOG_REMAIN_SEGS_BACKUP_FILE_PATH (g_instance.attr.attr_storage.dss_attr.ss_enable_dss ? \ + ((char*)"pg_remain_segs.backup") : ((char*)"global/pg_remain_segs.backup")) #define XLOG_REMAIN_SEGS_SIZE 8192 #define XLOG_REMAIN_SEG_FILE_LEAST_LEN (sizeof(XLogRecPtr) + sizeof(uint32) * 3) #define XLOG_REMAIN_SEGS_BATCH_NUM 20 diff --git a/src/include/storage/smgr/segment_internal.h b/src/include/storage/smgr/segment_internal.h index 3bbb79be3..7d6a9e54a 100644 --- a/src/include/storage/smgr/segment_internal.h +++ b/src/include/storage/smgr/segment_internal.h @@ -33,6 +33,7 @@ #include "storage/buf/bufpage.h" #include "storage/lock/lwlock.h" #include "storage/smgr/smgr.h" +#include "storage/file/fio_device_com.h" #include "utils/segment_test.h" const int DF_MAP_GROUP_RESERVED = 3; @@ -100,6 +101,7 @@ void df_unlink(SegLogicFile *sf); void df_create_file(SegLogicFile *sf, bool redo); void df_shrink(SegLogicFile *sf, BlockNumber target); void df_flush_data(SegLogicFile *sf, BlockNumber blocknum, BlockNumber nblocks); +bool df_ss_update_segfile_size(SegLogicFile *sf, BlockNumber target_block); /* * Data files status in the segment space; @@ -396,6 +398,7 @@ SegSpace *spc_drop(Oid tablespace_id, Oid database_id, bool redo); void spc_drop_space_node(Oid spcNode, Oid dbNode); void spc_lock(SegSpace *spc); void spc_unlock(SegSpace *spc); +void SSDrop_seg_space(Oid spcNode, Oid dbNode); BlockNumber spc_alloc_extent(SegSpace *spc, int extent_size, ForkNumber forknum, BlockNumber designate_block, ExtentInversePointer iptr); @@ -501,6 +504,7 @@ typedef struct SegmentDesc { uint32 timeline; } SegmentDesc; + #define IsNormalForknum(forknum) \ ((forknum) == MAIN_FORKNUM || (forknum) == FSM_FORKNUM || (forknum) == VISIBILITYMAP_FORKNUM) diff --git a/src/include/storage/smgr/smgr.h b/src/include/storage/smgr/smgr.h index 5ef15b985..c55456e00 100644 --- a/src/include/storage/smgr/smgr.h +++ b/src/include/storage/smgr/smgr.h @@ -22,6 +22,8 @@ #include "utils/rel_gs.h" #include "vecexecutor/vectorbatch.h" #include "nodes/bitmapset.h" +#include "storage/file/fio_device_com.h" +#include "storage/dss/dss_api_def.h" typedef int File; @@ -137,11 +139,13 @@ enum SMGR_READ_STATUS { * a pending fsync request getting canceled ... see mdsync). */ #ifndef WIN32 -#define FILE_POSSIBLY_DELETED(err) ((err) == ENOENT) +#define FILE_POSSIBLY_DELETED(err) ((err) == ENOENT || (err) == ERR_DSS_DIR_NOT_EXIST || (err) == ERR_DSS_FILE_NOT_EXIST) #else #define FILE_POSSIBLY_DELETED(err) ((err) == ENOENT || (err) == EACCES) #endif +#define FILE_ALREADY_EXIST(err) ((err) == EEXIST || (err) == ERR_DSS_DIR_CREATE_DUPLICATED) + extern void smgrinit(void); extern SMgrRelation smgropen(const RelFileNode& rnode, BackendId backend, int col = 0); extern void smgrshutdown(int code, Datum arg); diff --git a/src/include/storage/vfd.h b/src/include/storage/vfd.h index 1135517da..9ca946d57 100644 --- a/src/include/storage/vfd.h +++ b/src/include/storage/vfd.h @@ -20,6 +20,7 @@ #include "storage/page_compression.h" #include "storage/smgr/relfilenode.h" +#include "storage/file/fio_device_com.h" typedef struct vfd { int fd; /* current FD, or VFD_CLOSED if none */ unsigned short fdstate; /* bitflags for VFD's state */ diff --git a/src/include/tool_common.h b/src/include/tool_common.h new file mode 100644 index 000000000..a7e84ba4c --- /dev/null +++ b/src/include/tool_common.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2020 Huawei Technologies Co.,Ltd. + * + * openGauss is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + * --------------------------------------------------------------------------------------- + * + * tool_common.h + * + * IDENTIFICATION + * src/include/tool_common.h + * + * --------------------------------------------------------------------------------------- + */ +#ifndef TOOL_COMMON_H +#define TOOL_COMMON_H + +#include "storage/file/fio_device_com.h" + +#define MAXPGPATH 1024 + +#define T_SS_XLOGDIR \ + (g_enable_dss ? g_datadir.xlogDir : "pg_xlog") +#define T_DEFTBSDIR \ + (g_enable_dss ? g_datadir.baseDir : "base") +#define T_GLOTBSDIR \ + (g_enable_dss ? g_datadir.globalDir : "global") +#define T_CLOGDIR \ + (g_enable_dss ? g_datadir.clogDir : "pg_clog") +#define T_CSNLOGDIR \ + (g_enable_dss ? g_datadir.csnlogDir : "pg_csnlog") +#define T_PG_LOCATION_DIR \ + (g_enable_dss ? g_datadir.locationDir : "pg_location") +#define T_NOTIFYDIR \ + (g_enable_dss ? g_datadir.notifyDir : "pg_notify") +#define T_SERIALDIR \ + (g_enable_dss ? g_datadir.serialDir : "pg_serial") +#define T_SNAPSHOT_EXPORT_DIR \ + (g_enable_dss ? g_datadir.snapshotsDir : "pg_snapshots") +#define T_TBLSPCDIR \ + (g_enable_dss ? g_datadir.tblspcDir : "pg_tblspc") +#define T_TWOPHASE_DIR \ + (g_enable_dss ? g_datadir.twophaseDir : "pg_twophase") +#define T_MULTIXACTDIR \ + (g_enable_dss ? g_datadir.multixactDir : "pg_multixact") +#define T_XLOG_CONTROL_FILE \ + (g_enable_dss ? g_datadir.controlPath : "global/pg_control") +#define T_XLOG_CONTROL_FILE_BAK \ + (g_enable_dss ? g_datadir.controlBakPath : "global/pg_control.backup") +#define T_OLD_DW_FILE_NAME \ + (g_enable_dss ? g_datadir.dwDir.dwOldPath : "global/pg_dw") +#define T_DW_FILE_NAME_PREFIX \ + (g_enable_dss ? g_datadir.dwDir.dwPathPrefix : "global/pg_dw_") +#define T_SINGLE_DW_FILE_NAME \ + (g_enable_dss ? g_datadir.dwDir.dwSinglePath : "global/pg_dw_single") +#define T_DW_BUILD_FILE_NAME \ + (g_enable_dss ? g_datadir.dwDir.dwBuildPath : "global/pg_dw.build") +#define T_DW_UPGRADE_FILE_NAME \ + (g_enable_dss ? g_datadir.dwDir.dwUpgradePath : "global/dw_upgrade") +#define T_DW_BATCH_UPGRADE_META_FILE_NAME \ + (g_enable_dss ? g_datadir.dwDir.dwBatchUpgradeMetaPath : "global/dw_batch_upgrade_meta") +#define T_DW_BATCH_UPGRADE_BATCH_FILE_NAME \ + (g_enable_dss ? g_datadir.dwDir.dwBatchUpgradeFilePath : "global/dw_batch_upgrade_files") +#define T_DW_META_FILE \ + (g_enable_dss ? g_datadir.dwDir.dwMetaPath : "global/pg_dw_meta") + +typedef struct st_dw_subdatadir_t { + char dwOldPath[MAXPGPATH]; + char dwPathPrefix[MAXPGPATH]; + char dwSinglePath[MAXPGPATH]; + char dwBuildPath[MAXPGPATH]; + char dwUpgradePath[MAXPGPATH]; + char dwBatchUpgradeMetaPath[MAXPGPATH]; + char dwBatchUpgradeFilePath[MAXPGPATH]; + char dwMetaPath[MAXPGPATH]; +} dw_subdatadir_t; + +typedef struct st_datadir_t { + char pg_data[MAXPGPATH]; // pg_data path in unix + char dss_data[MAXPGPATH]; // dss vgdata (only in dss mode) + char dss_log[MAXPGPATH]; // dss vglog (only in dss mode) + int instance_id; // instance id of cluster (only in dss mode) + char xlogDir[MAXPGPATH]; + char baseDir[MAXPGPATH]; + char globalDir[MAXPGPATH]; + char clogDir[MAXPGPATH]; + char csnlogDir[MAXPGPATH]; + char locationDir[MAXPGPATH]; + char notifyDir[MAXPGPATH]; + char serialDir[MAXPGPATH]; + char snapshotsDir[MAXPGPATH]; + char tblspcDir[MAXPGPATH]; + char twophaseDir[MAXPGPATH]; + char multixactDir[MAXPGPATH]; + char controlPath[MAXPGPATH]; + char controlBakPath[MAXPGPATH]; + dw_subdatadir_t dwDir; +} datadir_t; + +void initDataPathStruct(bool enable_dss); + +extern datadir_t g_datadir; + +#endif diff --git a/src/include/utils/be_module.h b/src/include/utils/be_module.h index d3d10242c..6585940d4 100755 --- a/src/include/utils/be_module.h +++ b/src/include/utils/be_module.h @@ -140,9 +140,12 @@ enum ModuleId { MOD_LOGICAL_DECODE, /* logical decode */ MOD_GPRC, /* global package runtime cache */ MOD_DISASTER_READ, - MODE_REPSYNC, /* debug info for func SyncRepWaitForLSN */ MOD_SQLPATCH, + MOD_DMS, /* DMS */ + MOD_DSS, /* dss api module */ + + MOD_GPI, /* debug info for global partition index */ /* * Add your module id above. diff --git a/src/include/utils/guc_tables.h b/src/include/utils/guc_tables.h index 12b4cad4a..59882982d 100644 --- a/src/include/utils/guc_tables.h +++ b/src/include/utils/guc_tables.h @@ -102,6 +102,7 @@ enum config_group { TSDB, INSTRUMENTS_OPTIONS, CE_OPTIONS, + SHARED_STORAGE_OPTIONS, #ifdef PGXC DATA_NODES, GTM, diff --git a/src/include/utils/knl_localsysdbcache.h b/src/include/utils/knl_localsysdbcache.h index 188d8a40c..ebb146047 100644 --- a/src/include/utils/knl_localsysdbcache.h +++ b/src/include/utils/knl_localsysdbcache.h @@ -56,6 +56,10 @@ extern MemoryContext LocalSmgrStorageMemoryCxt(); extern bool EnableGlobalSysCache(); +#if defined(USE_ASSERT_CHECKING) && !defined(ENABLE_MEMORY_CHECK) +extern void CloseLSCCheck(); +#endif + knl_u_inval_context *GetInvalCxt(); knl_u_relmap_context *GetRelMapCxt(); diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h index 3b9e26b8f..ce1baa4b9 100644 --- a/src/include/utils/snapmgr.h +++ b/src/include/utils/snapmgr.h @@ -50,6 +50,7 @@ extern bool CommittedXidVisibleInSnapshot(TransactionId xid, Snapshot snapshot, extern bool CommittedXidVisibleInDecodeSnapshot(TransactionId xid, Snapshot snapshot, Buffer buffer); extern bool IsXidVisibleInGtmLiteLocalSnapshot(TransactionId xid, Snapshot snapshot, TransactionIdStatus hint_status, bool xmin_equal_xmax, Buffer buffer, bool *sync); +extern void RecheckXidFinish(TransactionId xid, CommitSeqNo csn); /* * We don't provide a static SnapshotDirty variable because it would be * non-reentrant. Instead, users of that snapshot type should declare a diff --git a/src/test/ha/ha_schedule_single_ss b/src/test/ha/ha_schedule_single_ss new file mode 100644 index 000000000..4483e0a0e --- /dev/null +++ b/src/test/ha/ha_schedule_single_ss @@ -0,0 +1,3 @@ +dms_reform/ss_recovrey_primary +dms_reform/ss_switchover +dms_reform/ss_failover diff --git a/src/test/ha/run_ha_single_ss.sh b/src/test/ha/run_ha_single_ss.sh new file mode 100644 index 000000000..954e9f118 --- /dev/null +++ b/src/test/ha/run_ha_single_ss.sh @@ -0,0 +1,67 @@ +#!/bin/sh +# run all the test case of ha +CUR_DIR=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd) +echo "CUR_DIR : $CUR_DIR" + +#init some variables +loop_num=$1 +if [ -z $1 ]; then + loop_num=1 +fi +count=0 + +source ./../ss/ss_database_build_env.sh + +test -f regression.diffs.hacheck && rm regression.diffs.hacheck + +RESULT_DIR=$CUR_DIR/../ha/results/dms_reform +total_starttime=`date +"%Y-%m-%d %H:%M:%S"` +total_startvalue=`date -d "$total_starttime" +%s` + +if [ -d ${RESULT_DIR} ]; then + echo "${RESULT_DIR} exists, so need to clean and recreate" + rm -rf ${RESULT_DIR} + else + echo "${RESULT_DIR} not exists, so need to recreate" + fi + mkdir ${RESULT_DIR} + +#init and start the database +printf "init and start the database in shared storage based on dms and dss\n" +sh ./../ss/deploy_two_inst_ss.sh > ./results/deploy_two_inst_ss.log 2>&1 + +for((i=1;i<=$loop_num;i++)) +do + printf "run the ha_schedule %d time\n" $i + printf "%-50s%-10s%-10s\n" "testcase" "result" "time(s)" + for line in `cat ha_schedule_single_ss$2 | grep -v ^#` + do + printf "%-50s" $line + starttime=`date +"%Y-%m-%d %H:%M:%S"` + sh ./testcase/$line.sh > ./results/$line.log 2>&1 + count=`expr $count + 1` + endtime=`date +"%Y-%m-%d %H:%M:%S"` + starttime1=`date -d "$starttime" +%s` + endtime1=`date -d "$endtime" +%s` + interval=`expr $endtime1 - $starttime1` + if [ $( grep "$failed_keyword" ./results/$line.log | grep -v "the database system is shutting down" | wc -l ) -eq 0 ]; then + printf "%-10s%-10s\n" ".... ok" $interval + else + printf "%-10s%-10s\n" ".... FAILED" $interval + cp ./results/$line.log regression.diffs.hacheck + exit 1 + fi + done +done + +#stop the database +printf "stop the database\n" +sleep 5 +stop_gaussdb ${SS_DATA}/dn0 +stop_gaussdb ${SS_DATA}/dn1 +sleep 3 + +total_endtime=`date +"%Y-%m-%d %H:%M:%S"` +total_endvalue=`date -d "$total_endtime" +%s` +printf "all %d tests passed.\n" $count +printf "total time: %ss\n" $(($total_endvalue - $total_startvalue)) diff --git a/src/test/ha/testcase/dms_reform/ss_failover.sh b/src/test/ha/testcase/dms_reform/ss_failover.sh new file mode 100644 index 000000000..e16407062 --- /dev/null +++ b/src/test/ha/testcase/dms_reform/ss_failover.sh @@ -0,0 +1,66 @@ +#!/bin/sh + +source ./../ss/ss_database_build_env.sh + +# failover test +test_1() +{ + sleep 10 + + # switchover at first + ${GAUSSHOME}/bin/gs_ctl switchover -D ${SS_DATA}/dn1 + if [ $? -eq 0 ]; then + echo "all of success!" + else + echo "$failed_keyword" + exit 1 + fi + + sleep 10 + + # stop database + stop_gaussdb ${SS_DATA}/dn0 + if [ $? -eq 0 ]; then + echo "all of success!" + else + echo "$failed_keyword" + exit 1 + fi + sleep 5 + stop_gaussdb ${SS_DATA}/dn1 + if [ $? -eq 0 ]; then + echo "all of success!" + else + echo "$failed_keyword" + exit 1 + fi + sleep 10 + + # start database0 to simulate failover + start_gaussdb ${SS_DATA}/dn0 ${PGPORT0} + if [ $? -eq 0 ]; then + echo "all of success!" + else + echo "$failed_keyword" + exit 1 + fi + sleep 5 + + start_gaussdb ${SS_DATA}/dn1 ${PGPORT1} + if [ $? -eq 0 ]; then + echo "all of success!" + else + echo "$failed_keyword" + exit 1 + fi + + sleep 5 +} + +tear_down() +{ + sleep 5 +} + +test_1 +tear_down diff --git a/src/test/ha/testcase/dms_reform/ss_recovrey_primary.sh b/src/test/ha/testcase/dms_reform/ss_recovrey_primary.sh new file mode 100644 index 000000000..6b1d7e609 --- /dev/null +++ b/src/test/ha/testcase/dms_reform/ss_recovrey_primary.sh @@ -0,0 +1,39 @@ +#!/bin/sh + +source ./../ss/ss_database_build_env.sh + +# stop database +test_1() +{ + sleep 10 + + # stop database + stop_gaussdb ${SS_DATA}/dn0 + if [ $? -eq 0 ]; then + echo "all of success!" + else + echo "$failed_keyword" + exit 1 + fi + + sleep 15 + + # start database + start_gaussdb ${SS_DATA}/dn0 + if [ $? -eq 0 ]; then + echo "all of success!" + else + echo "$failed_keyword" + exit 1 + fi + + sleep 5 +} + +tear_down() +{ + sleep 2 +} + +test_1 +tear_down \ No newline at end of file diff --git a/src/test/ha/testcase/dms_reform/ss_switchover.sh b/src/test/ha/testcase/dms_reform/ss_switchover.sh new file mode 100644 index 000000000..098ccdb6e --- /dev/null +++ b/src/test/ha/testcase/dms_reform/ss_switchover.sh @@ -0,0 +1,37 @@ +#!/bin/sh + +source ./../ss/ss_database_build_env.sh + +# switchover test +test_1() +{ + sleep 5 + + # switchover + ${GAUSSHOME}/bin/gs_ctl switchover -D ${SS_DATA}/dn1 + if [ $? -eq 0 ]; then + echo "all of success!" + else + echo "$failed_keyword" + exit 1 + fi + + sleep 10 +} + +tear_down() +{ + sleep 10 + #switchover + ${GAUSSHOME}/bin/gs_ctl switchover -D ${SS_DATA}/dn0 + if [ $? -eq 0 ]; then + echo "all of success!" + else + echo "$failed_keyword" + exit 1 + fi +} + +test_1 +tear_down + diff --git a/src/test/regress/CMakeLists.txt b/src/test/regress/CMakeLists.txt index 92cbf35e9..057d3c181 100755 --- a/src/test/regress/CMakeLists.txt +++ b/src/test/regress/CMakeLists.txt @@ -15,7 +15,7 @@ set(pg_regress_single_DEF_OPTIONS ${MACRO_OPTIONS} -DHOST_TUPLE="${HOST_TUPLE}" set(pg_regress_single_main_DEF_OPTIONS ${MACRO_OPTIONS}) set(pg_regress_single_COMPILE_OPTIONS ${OPTIMIZE_OPTIONS} ${OS_OPTIONS} ${PROTECT_OPTIONS} ${WARNING_OPTIONS} ${BIN_SECURE_OPTIONS} ${CHECK_OPTIONS}) set(pg_regress_single_LINK_OPTIONS ${BIN_LINK_OPTIONS}) -set(pg_regress_single_LINK_LIBS pg_regress_single_obj -lpgport -lcrypt -ldl -lm -lssl -lcrypto -l${SECURE_C_CHECK} -pthread -lrt -lz -lminiunz) +set(pg_regress_single_LINK_LIBS pg_regress_single_obj -lpgport -lcrypt -ldl -lm -lssl -lcrypto -l${SECURE_C_CHECK} -pthread -lrt -lz -lminiunz) add_static_objtarget(pg_regress_single_obj TGT_pg_regress_single_SRC TGT_pg_regress_single_INC "${pg_regress_single_DEF_OPTIONS}" "${pg_regress_single_COMPILE_OPTIONS}" "${pg_regress_single_LINK_OPTIONS}") add_bintarget(pg_regress_single TGT_pg_regress_single_main_SRC TGT_pg_regress_single_INC "${pg_regress_single_main_DEF_OPTIONS}" "${pg_regress_single_COMPILE_OPTIONS}" "${pg_regress_single_LINK_OPTIONS}" "${pg_regress_single_LINK_LIBS}") add_dependencies(pg_regress_single pg_regress_single_obj pgport_static) @@ -44,6 +44,9 @@ MESSAGE(STATUS ${PROJECT_TRUNK_DIR} ${CMAKE_BINARY_DIR} ${CMAKE_INSTALL_PREFIX}) add_func_target_withargs_fastcheck(fastcheck_single fastcheck_single) add_func_target_withargs_fastcheck(fastcheck_single_audit fastcheck_single_audit) +add_func_target_withargs_fastcheck(fastcheck_single_ss fastcheck_single_ss) +add_func_target_withargs_fastcheck(fastcheck_gs_probackup fastcheck_gs_probackup) +add_func_target_withargs_fastcheck(fastcheck_ss fastcheck_ss) add_func_target_withargs_fastcheck(fastcheck_lite fastcheck_lite) add_func_target_withargs_fastcheck(fastcheck_single_mot fastcheck_single_mot) add_func_target_withargs_fastcheck(fastcheck_ce_single fastcheck_ce_single) @@ -59,6 +62,7 @@ add_func_target_withargs_fastcheck(hacheck_multi_single_mot hacheck_multi_single add_func_target_withargs_fastcheck(hacheck_single_paxos hacheck_single_paxos) add_func_target_withargs_fastcheck(decodecheck_single decodecheck_single) add_func_target_withargs_fastcheck(upgradecheck_single upgradecheck_single) +add_func_target_withargs_fastcheck(hacheck_ss_all hacheck_ss_all) function(add_func_target_withargs_gocheck _name _cmdargs) separate_arguments(cmd_args UNIX_COMMAND "${_cmdargs}") diff --git a/src/test/regress/expected/segment_subpartition_createtable_ss.out b/src/test/regress/expected/segment_subpartition_createtable_ss.out new file mode 100644 index 000000000..173dc5df5 --- /dev/null +++ b/src/test/regress/expected/segment_subpartition_createtable_ss.out @@ -0,0 +1,1539 @@ +--1.create table +--list_list list_hash list_range range_list range_hash range_range +--prepare +DROP SCHEMA segment_subpartition_createtable CASCADE; +ERROR: schema "segment_subpartition_createtable" does not exist +CREATE SCHEMA segment_subpartition_createtable; +SET CURRENT_SCHEMA TO segment_subpartition_createtable; +--1.1 normal table +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( '2' ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); +insert into list_list values('201902', '1', '1', 1); +insert into list_list values('201902', '2', '1', 1); +insert into list_list values('201902', '1', '1', 1); +insert into list_list values('201903', '2', '1', 1); +insert into list_list values('201903', '1', '1', 1); +insert into list_list values('201903', '2', '1', 1); +select * from list_list; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 +(6 rows) + +drop table list_list; +CREATE TABLE list_hash +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY HASH (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a, + SUBPARTITION p_201901_b + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a, + SUBPARTITION p_201902_b + ) +); +insert into list_hash values('201902', '1', '1', 1); +insert into list_hash values('201902', '2', '1', 1); +insert into list_hash values('201902', '3', '1', 1); +insert into list_hash values('201903', '4', '1', 1); +insert into list_hash values('201903', '5', '1', 1); +insert into list_hash values('201903', '6', '1', 1); +select * from list_hash; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 4 | 1 | 1 + 201903 | 5 | 1 | 1 + 201903 | 6 | 1 | 1 + 201902 | 2 | 1 | 1 + 201902 | 3 | 1 | 1 + 201902 | 1 | 1 | 1 +(6 rows) + +drop table list_hash; +CREATE TABLE list_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a values less than ('4'), + SUBPARTITION p_201901_b values less than ('6') + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a values less than ('3'), + SUBPARTITION p_201902_b values less than ('6') + ) +); +insert into list_range values('201902', '1', '1', 1); +insert into list_range values('201902', '2', '1', 1); +insert into list_range values('201902', '3', '1', 1); +insert into list_range values('201903', '4', '1', 1); +insert into list_range values('201903', '5', '1', 1); +insert into list_range values('201903', '6', '1', 1); +ERROR: inserted partition key does not map to any table partition +select * from list_range; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 4 | 1 | 1 + 201903 | 5 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 + 201902 | 3 | 1 | 1 +(5 rows) + +drop table list_range; +CREATE TABLE range_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a values ('1'), + SUBPARTITION p_201901_b values ('2') + ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) + ( + SUBPARTITION p_201902_a values ('1'), + SUBPARTITION p_201902_b values ('2') + ) +); +insert into range_list values('201902', '1', '1', 1); +insert into range_list values('201902', '2', '1', 1); +insert into range_list values('201902', '1', '1', 1); +insert into range_list values('201903', '2', '1', 1); +insert into range_list values('201903', '1', '1', 1); +insert into range_list values('201903', '2', '1', 1); +select * from range_list; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 +(6 rows) + +drop table range_list; +CREATE TABLE range_hash +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY HASH (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a, + SUBPARTITION p_201901_b + ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) + ( + SUBPARTITION p_201902_a, + SUBPARTITION p_201902_b + ) +); +insert into range_hash values('201902', '1', '1', 1); +insert into range_hash values('201902', '2', '1', 1); +insert into range_hash values('201902', '1', '1', 1); +insert into range_hash values('201903', '2', '1', 1); +insert into range_hash values('201903', '1', '1', 1); +insert into range_hash values('201903', '2', '1', 1); +select * from range_hash; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 +(6 rows) + +drop table range_hash; +CREATE TABLE range_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201901_b VALUES LESS THAN( '3' ) + ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) + ( + SUBPARTITION p_201902_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201902_b VALUES LESS THAN( '3' ) + ) +); +insert into range_range values('201902', '1', '1', 1); +insert into range_range values('201902', '2', '1', 1); +insert into range_range values('201902', '1', '1', 1); +insert into range_range values('201903', '2', '1', 1); +insert into range_range values('201903', '1', '1', 1); +insert into range_range values('201903', '2', '1', 1); +select * from range_range; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(6 rows) + +drop table range_range; +CREATE TABLE hash_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY hash (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( '2' ) + ), + PARTITION p_201902 + ( + SUBPARTITION p_201902_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); +insert into hash_list values('201901', '1', '1', 1); +insert into hash_list values('201901', '2', '1', 1); +insert into hash_list values('201901', '1', '1', 1); +insert into hash_list values('201903', '2', '1', 1); +insert into hash_list values('201903', '1', '1', 1); +insert into hash_list values('201903', '2', '1', 1); +select * from hash_list; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 + 201901 | 2 | 1 | 1 + 201901 | 1 | 1 | 1 + 201901 | 1 | 1 | 1 +(6 rows) + +drop table hash_list; +CREATE TABLE hash_hash +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY hash (month_code) SUBPARTITION BY hash (dept_code) +( + PARTITION p_201901 + ( + SUBPARTITION p_201901_a, + SUBPARTITION p_201901_b + ), + PARTITION p_201902 + ( + SUBPARTITION p_201902_a, + SUBPARTITION p_201902_b + ) +); +insert into hash_hash values('201901', '1', '1', 1); +insert into hash_hash values('201901', '2', '1', 1); +insert into hash_hash values('201901', '1', '1', 1); +insert into hash_hash values('201903', '2', '1', 1); +insert into hash_hash values('201903', '1', '1', 1); +insert into hash_hash values('201903', '2', '1', 1); +select * from hash_hash; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 + 201901 | 2 | 1 | 1 + 201901 | 1 | 1 | 1 + 201901 | 1 | 1 | 1 +(6 rows) + +drop table hash_hash; +CREATE TABLE hash_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY hash (month_code) SUBPARTITION BY range (dept_code) +( + PARTITION p_201901 + ( + SUBPARTITION p_201901_a VALUES LESS THAN ( '2' ), + SUBPARTITION p_201901_b VALUES LESS THAN ( '3' ) + ), + PARTITION p_201902 + ( + SUBPARTITION p_201902_a VALUES LESS THAN ( '2' ), + SUBPARTITION p_201902_b VALUES LESS THAN ( '3' ) + ) +); +insert into hash_range values('201901', '1', '1', 1); +insert into hash_range values('201901', '2', '1', 1); +insert into hash_range values('201901', '1', '1', 1); +insert into hash_range values('201903', '2', '1', 1); +insert into hash_range values('201903', '1', '1', 1); +insert into hash_range values('201903', '2', '1', 1); +select * from hash_range; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 + 201901 | 1 | 1 | 1 + 201901 | 1 | 1 | 1 + 201901 | 2 | 1 | 1 +(6 rows) + +drop table hash_range; +--1.2 table with default subpartition +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); +drop table list_list; +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( '2' ) + ), + PARTITION p_201902 VALUES ( '201903' ) +); +drop table list_list; +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ), + PARTITION p_201902 VALUES ( '201903' ) +); +drop table list_list; +CREATE TABLE list_hash +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY HASH (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a, + SUBPARTITION p_201902_b + ) +); +drop table list_hash; +CREATE TABLE list_hash +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY HASH (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a, + SUBPARTITION p_201901_b + ), + PARTITION p_201902 VALUES ( '201903' ) +); +drop table list_hash; +CREATE TABLE list_hash +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY HASH (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ), + PARTITION p_201902 VALUES ( '201903' ) +); +drop table list_hash; +CREATE TABLE list_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a values less than ('3'), + SUBPARTITION p_201902_b values less than ('6') + ) +); +drop table list_range; +CREATE TABLE list_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a values less than ('4'), + SUBPARTITION p_201901_b values less than ('6') + ), + PARTITION p_201902 VALUES ( '201903' ) +); +drop table list_range; +CREATE TABLE list_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ), + PARTITION p_201902 VALUES ( '201903' ) +); +drop table list_range; +CREATE TABLE range_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) + ( + SUBPARTITION p_201902_a values ('1'), + SUBPARTITION p_201902_b values ('2') + ) +); +drop table range_list; +CREATE TABLE range_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a values ('1'), + SUBPARTITION p_201901_b values ('2') + ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) +); +drop table range_list; +CREATE TABLE range_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) +); +drop table range_list; +CREATE TABLE range_hash +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY HASH (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) + ( + SUBPARTITION p_201902_a, + SUBPARTITION p_201902_b + ) +); +drop table range_hash; +CREATE TABLE range_hash +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY HASH (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a, + SUBPARTITION p_201901_b + ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) +); +drop table range_hash; +CREATE TABLE range_hash +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY HASH (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) +); +drop table range_hash; +CREATE TABLE range_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) + ( + SUBPARTITION p_201902_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201902_b VALUES LESS THAN( '3' ) + ) +); +drop table range_range; +CREATE TABLE range_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201901_b VALUES LESS THAN( '3' ) + ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) +); +drop table range_range; +CREATE TABLE range_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) +); +drop table range_range; +CREATE TABLE hash_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY HASH (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901, + PARTITION p_201902 + ( + SUBPARTITION p_201902_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201902_b VALUES LESS THAN( '3' ) + ) +); +drop table hash_range; +CREATE TABLE hash_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY HASH (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 + ( + SUBPARTITION p_201901_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201901_b VALUES LESS THAN( '3' ) + ), + PARTITION p_201902 +); +drop table hash_range; +CREATE TABLE hash_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY HASH (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901, + PARTITION p_201902 +); +drop table hash_range; +CREATE TABLE hash_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY HASH (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901, + PARTITION p_201902 + ( + SUBPARTITION p_201902_a VALUES( '2' ), + SUBPARTITION p_201902_b VALUES( '3' ) + ) +); +drop table hash_range; +CREATE TABLE hash_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY HASH (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 + ( + SUBPARTITION p_201901_a VALUES( '2' ), + SUBPARTITION p_201901_b VALUES( '3' ) + ), + PARTITION p_201902 +); +drop table hash_range; +CREATE TABLE hash_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY HASH (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901, + PARTITION p_201902 +); +drop table hash_range; +CREATE TABLE hash_hash +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY HASH (month_code) SUBPARTITION BY HASH (dept_code) +( + PARTITION p_201901, + PARTITION p_201902 + ( + SUBPARTITION p_201902_a, + SUBPARTITION p_201902_b + ) +); +drop table hash_hash; +CREATE TABLE hash_hash +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY HASH (month_code) SUBPARTITION BY HASH (dept_code) +( + PARTITION p_201901 + ( + SUBPARTITION p_201901_a, + SUBPARTITION p_201901_b + ), + PARTITION p_201902 +); +drop table hash_hash; +CREATE TABLE hash_hash +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY HASH (month_code) SUBPARTITION BY HASH (dept_code) +( + PARTITION p_201901, + PARTITION p_201902 +); +drop table hash_hash; +--1.3 subpartition name check +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_a VALUES ( '2' ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); +ERROR: duplicate subpartition name: "p_201901_a" +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( '2' ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); +ERROR: duplicate subpartition name: "p_201901_a" +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901 VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( '2' ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); +ERROR: duplicate subpartition name: "p_201901" +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201901_subpartdefault1 VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); +drop table list_list; +--1.4 subpartition key check +-- 一级分区和二级分区分区键是同一列 +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (month_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( '2' ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); +ERROR: The two partition keys of a subpartition partition table are the same. +DETAIL: N/A +--二级分区的键值一样 +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( '1' ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); +ERROR: list partition p_201901_a and p_201901_b has overlapped value +--分区列不存在 +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_codeXXX) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( '2' ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); +ERROR: undefined column"month_codexxx" is used as a partitioning column +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_codeXXX) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( '2' ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); +ERROR: undefined column"dept_codexxx" is used as a partitioning column +CREATE TABLE list_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a values less than ('4'), + SUBPARTITION p_201901_b values less than ('4') + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a values less than ('3'), + SUBPARTITION p_201902_b values less than ('6') + ) +); +ERROR: partition bound of partition "p_201901_b" is too low +drop table list_range; +ERROR: table "list_range" does not exist +--1.5 list subpartition whith default +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( '2' ) + ), + PARTITION p_201902 VALUES ( default ) + ( + SUBPARTITION p_201902_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); +insert into list_list values('201902', '1', '1', 1); +insert into list_list values('201902', '2', '1', 1); +insert into list_list values('201902', '1', '1', 1); +insert into list_list values('201903', '2', '1', 1); +insert into list_list values('201903', '1', '1', 1); +insert into list_list values('201903', '2', '1', 1); +select * from list_list partition (p_201901); + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 +(3 rows) + +select * from list_list partition (p_201902); + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 +(3 rows) + +drop table list_list; +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( default ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); +drop table list_list; +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_b VALUES ( default ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); +drop table list_list; +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( default ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( default ) + ) +); +drop table list_list; +--1.6 declaration and definition of the subpatiiton type are same. +--error +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY hash (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( default ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( default ) + ) +); +ERROR: The syntax format of subpartition is incorrect, the declaration and definition of the subpartition do not match. +DETAIL: The syntax format of subpartition p_201901_a is incorrect. +--1.7 add constraint +CREATE TABLE range_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201901_b VALUES LESS THAN( MAXVALUE ) + ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) + ( + SUBPARTITION p_201902_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201902_b VALUES LESS THAN( '6' ) + ) +); +alter table range_range add constraint constraint_check CHECK (sales_amt IS NOT NULL); +insert into range_range values(1,1,1); +ERROR: new row for relation "range_range" violates check constraint "constraint_check" +DETAIL: N/A +drop table range_range; +-- drop partition column +CREATE TABLE range_hash_02 +( + col_1 int , + col_2 int, + col_3 VARCHAR2 ( 30 ) , + col_4 int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (col_1) SUBPARTITION BY hash (col_2) +( + PARTITION p_range_1 VALUES LESS THAN( -10 ) + ( + SUBPARTITION p_hash_1_1 , + SUBPARTITION p_hash_1_2 , + SUBPARTITION p_hash_1_3 + ), + PARTITION p_range_2 VALUES LESS THAN( 20 ), + PARTITION p_range_3 VALUES LESS THAN( 30) + ( + SUBPARTITION p_hash_3_1 , + SUBPARTITION p_hash_3_2 , + SUBPARTITION p_hash_3_3 + ), + PARTITION p_range_4 VALUES LESS THAN( 50) + ( + SUBPARTITION p_hash_4_1 , + SUBPARTITION p_hash_4_2 , + SUBPARTITION range_hash_02 + ), + PARTITION p_range_5 VALUES LESS THAN( MAXVALUE ) +) ENABLE ROW MOVEMENT; +alter table range_hash_02 drop column col_1; +ERROR: cannot drop partitioning column "col_1" +alter table range_hash_02 drop column col_2; +ERROR: cannot drop partitioning column "col_2" +drop table range_hash_02; +--1.8 SET ROW MOVEMENT +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1', '2' ), + SUBPARTITION p_201901_b VALUES ( default ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( '1', '2' ), + SUBPARTITION p_201902_b VALUES ( default ) + ) +); +alter table list_list disable ROW MOVEMENT; +insert into list_list values('201902', '1', '1', 1); +update list_list set month_code = '201903'; +ERROR: fail to update partitioned table "list_list" +DETAIL: disable row movement +update list_list set dept_code = '3'; +ERROR: fail to update partitioned table "list_list" +DETAIL: disable row movement +alter table list_list enable ROW MOVEMENT; +update list_list set month_code = '201903'; +update list_list set dept_code = '3'; +drop table list_list; +--1.9 without subpartition declaration +create table test(a int) WITH (SEGMENT=ON) +partition by range(a) +( +partition p1 values less than(100) +( +subpartition subp1 values less than(50), +subpartition subp2 values less than(100) +), +partition p2 values less than(200), +partition p3 values less than(maxvalue) +); +ERROR: The syntax format of subpartition is incorrect, missing declaration of subpartition. +DETAIL: N/A +--1.10 create table like +CREATE TABLE range_range +( + col_1 int primary key, + col_2 int NOT NULL , + col_3 VARCHAR2 ( 30 ) NOT NULL , + col_4 int generated always as(2*col_2) stored , + check (col_4 >= col_2) +) WITH (SEGMENT=ON) +PARTITION BY RANGE (col_1) SUBPARTITION BY RANGE (col_2) +( + PARTITION p_range_1 VALUES LESS THAN( 10 ) + ( + SUBPARTITION p_range_1_1 VALUES LESS THAN( 5 ), + SUBPARTITION p_range_1_2 VALUES LESS THAN( MAXVALUE ) + ), + PARTITION p_range_2 VALUES LESS THAN( 20 ) + ( + SUBPARTITION p_range_2_1 VALUES LESS THAN( 5 ), + SUBPARTITION p_range_2_2 VALUES LESS THAN( 10 ) + ) +) ENABLE ROW MOVEMENT; +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "range_range_pkey" for table "range_range" +CREATE TABLE range_range_02 (like range_range INCLUDING ALL ); +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "range_range_02_pkey" for table "range_range_02" +drop table range_range; +--ROW LEVEL SECURITY POLICY +CREATE TABLE range_range +( + month_code VARCHAR2 ( 30 ) , + dept_code VARCHAR2 ( 30 ) , + user_no VARCHAR2 ( 30 ) , + sales_amt int, + primary key(month_code, dept_code) +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201901_b VALUES LESS THAN( MAXVALUE ) + ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) + ( + SUBPARTITION p_201902_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201902_b VALUES LESS THAN( '3' ) + ) +); +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "range_range_pkey" for table "range_range" +CREATE ROW LEVEL SECURITY POLICY range_range_rls ON range_range USING(user_no = CURRENT_USER); +ERROR: Un-support feature +DETAIL: Do not support row level security policy on subpartition table. +drop table range_range; +CREATE SCHEMA ledgernsp WITH BLOCKCHAIN; +ERROR: BLOCKCHAIN is not supported while DMS and DSS enabled +CREATE SCHEMA ledgernsp; +ALTER SCHEMA ledgernsp WITH BLOCKCHAIN; +ERROR: BLOCKCHAIN is not supported while DMS and DSS enabled +DROP SCHEMA ledgernsp; +-- create table as +CREATE TABLE range_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201901_b VALUES LESS THAN( MAXVALUE ) + ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) + ( + SUBPARTITION p_201902_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201902_b VALUES LESS THAN( '3' ) + ) +) ENABLE ROW MOVEMENT; +insert into range_range values(201902,1,1,1),(201902,1,1,1),(201902,3,1,1),(201903,1,1,1),(201903,2,1,1),(201903,2,1,1); +select * from range_range subpartition(p_201901_a) where month_code in(201902,201903) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 +(2 rows) + +create table range_range_copy WITH (SEGMENT=ON) as select * from range_range subpartition(p_201901_a) where month_code in(201902,201903); +select * from range_range_copy order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 +(2 rows) + +drop table range_range; +drop table range_range_copy; +--1.11 create index +create table range_range_03 +( + c_int int, + c_char1 char(3000), + c_char2 char(5000), + c_char3 char(6000), + c_varchar1 varchar(3000), + c_varchar2 varchar(5000), + c_varchar3 varchar, + c_varchar4 varchar, + c_text1 text, + c_text2 text, + c_text3 text, + c int, + primary key(c,c_int) +) with (parallel_workers=10, SEGMENT=ON) +partition by range (c_int) subpartition by range (c_char1) +( + partition p1 values less than(50) + ( + subpartition p1_1 values less than('c'), + subpartition p1_2 values less than(maxvalue) + ), + partition p2 values less than(100) + ( + subpartition p2_1 values less than('c'), + subpartition p2_2 values less than(maxvalue) + ), + partition p3 values less than(150) + ( + subpartition p3_1 values less than('c'), + subpartition p3_2 values less than(maxvalue) + ), + partition p4 values less than(200) + ( + subpartition p4_1 values less than('c'), + subpartition p4_2 values less than(maxvalue) + ), + partition p5 values less than(maxvalue)( + subpartition p5_1 values less than('c'), + subpartition p5_2 values less than(maxvalue) + ) +) enable row movement; +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "range_range_03_pkey" for table "range_range_03" +create index range_range_03_idx1 on range_range_03 (c_varchar1) local; --success +create index range_range_03_idx2 on range_range_03 (c_varchar2) local ( + partition cpt7_p1, + partition cpt7_p2, + partition cpt7_p3, + partition cpt7_p4, + partition cpt7_p5 +); --failed +ERROR: Cannot match subpartitions when create subpartition indexes. +create index range_range_03_idx3 on range_range_03 (c_varchar3); --success, default global +create index range_range_03_idx4 on range_range_03 (c_varchar4) global; --success +create index range_range_03_idx5 on range_range_03 (c_varchar4) local; --failed, can not be same column with global index +ERROR: Global and local partition index should not be on same column +\d+ range_range_03 + Table "segment_subpartition_createtable.range_range_03" + Column | Type | Modifiers | Storage | Stats target | Description +------------+-------------------------+-----------+----------+--------------+------------- + c_int | integer | not null | plain | | + c_char1 | character(3000) | | extended | | + c_char2 | character(5000) | | extended | | + c_char3 | character(6000) | | extended | | + c_varchar1 | character varying(3000) | | extended | | + c_varchar2 | character varying(5000) | | extended | | + c_varchar3 | character varying | | extended | | + c_varchar4 | character varying | | extended | | + c_text1 | text | | extended | | + c_text2 | text | | extended | | + c_text3 | text | | extended | | + c | integer | not null | plain | | +Indexes: + "range_range_03_pkey" PRIMARY KEY, btree (c, c_int) TABLESPACE pg_default + "range_range_03_idx1" btree (c_varchar1) LOCAL TABLESPACE pg_default + "range_range_03_idx3" btree (c_varchar3) TABLESPACE pg_default + "range_range_03_idx4" btree (c_varchar4) TABLESPACE pg_default +Partition By RANGE(c_int) Subpartition By RANGE(c_char1) +Number of partitions: 5 (View pg_partition to check each partition range.) +Number of subpartitions: 10 (View pg_partition to check each subpartition range.) +Has OIDs: no +Options: orientation=row, parallel_workers=10, segment=on, compression=no + +select pg_get_tabledef('range_range_03'); + pg_get_tabledef +---------------------------------------------------------------------------------------------------- + SET search_path = segment_subpartition_createtable; + + CREATE TABLE range_range_03 ( + + c_int integer NOT NULL, + + c_char1 character(3000), + + c_char2 character(5000), + + c_char3 character(6000), + + c_varchar1 character varying(3000), + + c_varchar2 character varying(5000), + + c_varchar3 character varying, + + c_varchar4 character varying, + + c_text1 text, + + c_text2 text, + + c_text3 text, + + c integer NOT NULL + + ) + + WITH (orientation=row, parallel_workers=10, segment=on, compression=no) + + PARTITION BY RANGE (c_int) SUBPARTITION BY RANGE (c_char1) + + ( + + PARTITION p1 VALUES LESS THAN (50) TABLESPACE pg_default + + ( + + SUBPARTITION p1_1 VALUES LESS THAN ('c') TABLESPACE pg_default, + + SUBPARTITION p1_2 VALUES LESS THAN (MAXVALUE) TABLESPACE pg_default + + ), + + PARTITION p2 VALUES LESS THAN (100) TABLESPACE pg_default + + ( + + SUBPARTITION p2_1 VALUES LESS THAN ('c') TABLESPACE pg_default, + + SUBPARTITION p2_2 VALUES LESS THAN (MAXVALUE) TABLESPACE pg_default + + ), + + PARTITION p3 VALUES LESS THAN (150) TABLESPACE pg_default + + ( + + SUBPARTITION p3_1 VALUES LESS THAN ('c') TABLESPACE pg_default, + + SUBPARTITION p3_2 VALUES LESS THAN (MAXVALUE) TABLESPACE pg_default + + ), + + PARTITION p4 VALUES LESS THAN (200) TABLESPACE pg_default + + ( + + SUBPARTITION p4_1 VALUES LESS THAN ('c') TABLESPACE pg_default, + + SUBPARTITION p4_2 VALUES LESS THAN (MAXVALUE) TABLESPACE pg_default + + ), + + PARTITION p5 VALUES LESS THAN (MAXVALUE) TABLESPACE pg_default + + ( + + SUBPARTITION p5_1 VALUES LESS THAN ('c') TABLESPACE pg_default, + + SUBPARTITION p5_2 VALUES LESS THAN (MAXVALUE) TABLESPACE pg_default + + ) + + ) + + ENABLE ROW MOVEMENT; + + CREATE INDEX range_range_03_idx4 ON range_range_03 USING btree (c_varchar4) TABLESPACE pg_default;+ + CREATE INDEX range_range_03_idx3 ON range_range_03 USING btree (c_varchar3) TABLESPACE pg_default;+ + CREATE INDEX range_range_03_idx1 ON range_range_03 USING btree (c_varchar1) LOCAL( + + PARTITION partition_name( + + SUBPARTITION p1_1_c_varchar1_idx, + + SUBPARTITION p1_2_c_varchar1_idx + + ), + + PARTITION partition_name( + + SUBPARTITION p2_1_c_varchar1_idx, + + SUBPARTITION p2_2_c_varchar1_idx + + ), + + PARTITION partition_name( + + SUBPARTITION p3_1_c_varchar1_idx, + + SUBPARTITION p3_2_c_varchar1_idx + + ), + + PARTITION partition_name( + + SUBPARTITION p4_1_c_varchar1_idx, + + SUBPARTITION p4_2_c_varchar1_idx + + ), + + PARTITION partition_name( + + SUBPARTITION p5_1_c_varchar1_idx, + + SUBPARTITION p5_2_c_varchar1_idx + + ) + + ) TABLESPACE pg_default; + + ALTER TABLE range_range_03 ADD CONSTRAINT range_range_03_pkey PRIMARY KEY (c, c_int); +(1 row) + +drop table range_range_03; +--unique local index columns must contain the partition key +CREATE TABLE range_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201901_b VALUES LESS THAN( MAXVALUE ) + ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) + ( + SUBPARTITION p_201902_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201902_b VALUES LESS THAN( '3' ) + ) +) ENABLE ROW MOVEMENT; +create unique index idx on range_range(month_code) local; +ERROR: unique local index columns must contain all the partition keys and collation must be default collation +create unique index idx1 on range_range(month_code, user_no) local; +ERROR: unique local index columns must contain all the partition keys and collation must be default collation +drop table range_range; +-- partkey has timestampwithzone type +drop table hash_range; +ERROR: table "hash_range" does not exist +CREATE TABLE hash_range +( + col_1 int PRIMARY KEY USING INDEX, + col_2 int NOT NULL , + col_3 int NOT NULL , + col_4 int, + col_19 TIMESTAMP WITH TIME ZONE +) WITH (SEGMENT=ON) +PARTITION BY HASH (col_2) SUBPARTITION BY RANGE (col_19) +( partition p_hash_1 + ( + SUBPARTITION p_range_1_1 VALUES LESS THAN( 5 ), + SUBPARTITION p_range_1_2 VALUES LESS THAN( MAXVALUE ) + ), + partition p_hash_2, + PARTITION p_hash_3, + PARTITION p_hash_4, + PARTITION p_hash_5, + PARTITION p_hash_7 +) ENABLE ROW MOVEMENT; +ERROR: partition key value must be const or const-evaluable expression +CREATE TABLE hash_range +( + col_1 int PRIMARY KEY USING INDEX, + col_2 int NOT NULL , + col_3 int NOT NULL , + col_4 int, + col_19 TIMESTAMP WITH TIME ZONE +) WITH (SEGMENT=ON) +PARTITION BY HASH (col_19) SUBPARTITION BY RANGE (col_2) +( partition p_hash_1 + ( + SUBPARTITION p_range_1_1 VALUES LESS THAN( 5 ), + SUBPARTITION p_range_1_2 VALUES LESS THAN( MAXVALUE ) + ), + partition p_hash_2, + PARTITION p_hash_3, + PARTITION p_hash_4, + PARTITION p_hash_5, + PARTITION p_hash_7 +) ENABLE ROW MOVEMENT; +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "hash_range_pkey" for table "hash_range" +drop table hash_range; +--clean +DROP SCHEMA segment_subpartition_createtable CASCADE; +NOTICE: drop cascades to table range_range_02 +RESET CURRENT_SCHEMA; diff --git a/src/test/regress/expected/show_database_info.out b/src/test/regress/expected/show_database_info.out new file mode 100644 index 000000000..0175ed88e --- /dev/null +++ b/src/test/regress/expected/show_database_info.out @@ -0,0 +1,27 @@ +\l+ +--? .* +--? .* +--? .* +--? music | .* | SQL_ASCII | C | C | | 640 MB | pg_default | +--? music_pg | .* | SQL_ASCII | C | C | | 640 MB | pg_default | +--? mydb | .* | UTF8 | en_US.UTF-8 | en_US.UTF-8 | | 640 MB | pg_default | +--? pl_test_pkg_func | .* | UTF8 | en_US.UTF-8 | en_US.UTF-8 | | 640 MB | pg_default | +--? postgres | .* | UTF8 | en_US.UTF-8 | en_US.UTF-8 | | 640 MB | pg_default | default administrative connection database +--? regression | .* | UTF8 | en_US.UTF-8 | en_US.UTF-8 | | 640 MB | pg_default | +--? template0 | .* | UTF8 | en_US.UTF-8 | en_US.UTF-8 | .*| 640 MB | pg_default | default template for new databases +--? | .* | | | | .* | | | +--? template1 | .* | UTF8 | en_US.UTF-8 | en_US.UTF-8 | .*| 640 MB | pg_default | unmodifiable empty database +--? | .* | | | | .* | | | +(8 rows) + +select * from pg_ls_tmpdir(); +--? .* +--? .* +(0 rows) + +select * from pg_ls_waldir(); +--? .* +--? .* +--? 000000010000000000000001 | 1073741824 | .* +(1 row) + diff --git a/src/test/regress/expected/ss_metacmd.out b/src/test/regress/expected/ss_metacmd.out new file mode 100644 index 000000000..12b47c8f6 --- /dev/null +++ b/src/test/regress/expected/ss_metacmd.out @@ -0,0 +1,87 @@ +DROP VIEW IF EXISTS SS_VIEW1 CASCADE; +NOTICE: view "ss_view1" does not exist, skipping +DROP TABLE IF EXISTS SS_METACMD_TAB1 CASCADE; +NOTICE: table "ss_metacmd_tab1" does not exist, skipping +CREATE TABLE SS_METACMD_TAB1 (ID INT NOT NULL PRIMARY KEY, NAME VARCHAR(128)) WITH(SEGMENT = ON); +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "ss_metacmd_tab1_pkey" for table "ss_metacmd_tab1" +CREATE VIEW SS_VIEW1 AS + SELECT * from SS_METACMD_TAB1; + +INSERT INTO SS_METACMD_TAB1 VALUES (1, 'PAIR'); +COMMIT; +WARNING: there is no transaction in progress +\dS+ SS_METACMD_TAB1 + Table "public.ss_metacmd_tab1" + Column | Type | Modifiers | Storage | Stats target | Description +--------+------------------------+-----------+----------+--------------+------------- + id | integer | not null | plain | | + name | character varying(128) | | extended | | +Indexes: + "ss_metacmd_tab1_pkey" PRIMARY KEY, btree (id) TABLESPACE pg_default +Has OIDs: no +Options: orientation=row, segment=on, compression=no + +\dS+ SS_VIEW1 + View "public.ss_view1" + Column | Type | Modifiers | Storage | Description +--------+------------------------+-----------+----------+------------- + id | integer | | plain | + name | character varying(128) | | extended | +View definition: + SELECT * + FROM ss_metacmd_tab1; + +select pg_catalog.pg_table_size('SS_VIEW1'); + pg_table_size +--------------- + 0 +(1 row) + +select pg_catalog.pg_table_size('SS_METACMD_TAB1'); + pg_table_size +--------------- + 8192 +(1 row) + +select gs_xlogdump_xid('171794'); +ERROR: unsupported gs_xlogdump_xid when enable dss. +CONTEXT: referenced column: gs_xlogdump_xid +select gs_xlogdump_lsn('0/70230830', '0/7023AB80'); +ERROR: unsupported gs_xlogdump_lsn when enable dss. +CONTEXT: referenced column: gs_xlogdump_lsn +select gs_xlogdump_tablepath('+data/base/15114/4600', 0, 'heap'); +ERROR: unsupported gs_xlogdump_tablepath when enable dss. +CONTEXT: referenced column: gs_xlogdump_tablepath +select gs_xlogdump_parsepage_tablepath('+data/base/15114/4600', 0, 'heap', false); +ERROR: unsupported gs_xlogdump_parsepage_tablepath when enable dss. +CONTEXT: referenced column: gs_xlogdump_parsepage_tablepath +drop TABLE if exists ss_range_range_ddl_001; +NOTICE: table "ss_range_range_ddl_001" does not exist, skipping +CREATE TABLE ss_range_range_ddl_001 +( + col_1 int primary key USING INDEX TABLESPACE startend_tbs4, -- expected error + col_2 bigint NOT NULL , + col_3 VARCHAR2 ( 30 ) NOT NULL , + col_4 int generated always as(2*col_2) stored , + col_5 bigint, + col_6 bool, + col_7 text, + col_8 decimal, + col_9 numeric(12,6), + col_10 date, + check (col_4 >= col_2) +) +with(FILLFACTOR=80,segment=on) +PARTITION BY range (col_1) SUBPARTITION BY range (col_2) +( + PARTITION p_range_1 values less than (-10 ) + ( + SUBPARTITION p_range_1_1 values less than ( 0), + SUBPARTITION p_range_1_2 values less than ( MAXVALUE ) + ) +) ENABLE ROW MOVEMENT; +ERROR: tablespace "startend_tbs4" does not exist +drop TABLE if exists ss_range_range_ddl_001; +NOTICE: table "ss_range_range_ddl_001" does not exist, skipping +DROP VIEW IF EXISTS SS_VIEW1 CASCADE; +DROP TABLE IF EXISTS SS_METACMD_TAB1 CASCADE; diff --git a/src/test/regress/expected/ss_r/replace_func_with_two_args.out b/src/test/regress/expected/ss_r/replace_func_with_two_args.out new file mode 100644 index 000000000..95123fd59 --- /dev/null +++ b/src/test/regress/expected/ss_r/replace_func_with_two_args.out @@ -0,0 +1,111 @@ +-- +-- replace function with two arguments +-- +select replace('string', ''); + replace +--------- + string +(1 row) + +select replace('string', 'i'); + replace +--------- + strng +(1 row) + +select replace('string', 'in'); + replace +--------- + strg +(1 row) + +select replace('string', 'ing'); + replace +--------- + str +(1 row) + +select replace('', 'ing'); + replace +--------- + +(1 row) + +select replace(NULL, 'ing'); + replace +--------- + +(1 row) + +select replace('ing', ''); + replace +--------- + ing +(1 row) + +select replace('ing', NULL); + replace +--------- + ing +(1 row) + +select replace('', ''); + replace +--------- + +(1 row) + +select replace(NULL, NULL); + replace +--------- + +(1 row) + +select replace(123, '1'); + replace +--------- + 23 +(1 row) + +select replace('123', 1); + replace +--------- + 23 +(1 row) + +select replace(123, 1); + replace +--------- + 23 +(1 row) + +select replace('abc\nabc', '\n'); + replace +--------- + abcabc +(1 row) + +select replace('abc\nabc', E'\n'); + replace +---------- + abc\nabc +(1 row) + +select replace(E'abc\nabc', E'\n'); + replace +--------- + abcabc +(1 row) + +select replace('~!@#$%^&*()', '!@'); + replace +----------- + ~#$%^&*() +(1 row) + +select replace('高斯', '高'); + replace +--------- + 斯 +(1 row) + diff --git a/src/test/regress/expected/ss_r/segment_subpartition_analyze_vacuum.out b/src/test/regress/expected/ss_r/segment_subpartition_analyze_vacuum.out new file mode 100644 index 000000000..4927ecd5d --- /dev/null +++ b/src/test/regress/expected/ss_r/segment_subpartition_analyze_vacuum.out @@ -0,0 +1,9 @@ +SET CURRENT_SCHEMA TO segment_subpartition_analyze_vacuum; +select * from range_list order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 2 | 1 +(3 rows) + diff --git a/src/test/regress/expected/ss_r/segment_subpartition_gpi.out b/src/test/regress/expected/ss_r/segment_subpartition_gpi.out new file mode 100644 index 000000000..c0417cc11 --- /dev/null +++ b/src/test/regress/expected/ss_r/segment_subpartition_gpi.out @@ -0,0 +1,11 @@ +SET CURRENT_SCHEMA TO segment_subpartition_gpi; +select * from range_list where user_no = '1' order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 +(5 rows) + diff --git a/src/test/regress/expected/ss_r/segment_subpartition_scan.out b/src/test/regress/expected/ss_r/segment_subpartition_scan.out new file mode 100644 index 000000000..02b7513ef --- /dev/null +++ b/src/test/regress/expected/ss_r/segment_subpartition_scan.out @@ -0,0 +1,39 @@ +SET CURRENT_SCHEMA TO segment_subpartition_scan; +select * from range_list order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(6 rows) + +select * from range_list where month_code = '201902' order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 +(3 rows) + +select * from range_list where dept_code = '1' order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 +(3 rows) + +select * from range_list where user_no = '1' order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(6 rows) + diff --git a/src/test/regress/expected/ss_r/segment_subpartition_select.out b/src/test/regress/expected/ss_r/segment_subpartition_select.out new file mode 100644 index 000000000..49bd7f888 --- /dev/null +++ b/src/test/regress/expected/ss_r/segment_subpartition_select.out @@ -0,0 +1,1028 @@ +SET CURRENT_SCHEMA TO segment_subpartition_select; +select * from range_list order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(6 rows) + +select * from range_list where user_no is not null order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(6 rows) + +select * from range_list where user_no is not null and dept_code = user_no order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 +(3 rows) + +select * from range_list where user_no is not null and dept_code in ('2') order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(3 rows) + +select * from range_list where user_no is not null and dept_code <> '2' order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 +(3 rows) + +select * from range_list partition (p_201901) order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 +(3 rows) + +select * from range_list partition (p_201902) order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(3 rows) + +select * from range_list where user_no is not null and dept_code <> '2' UNION ALL select * from range_list partition (p_201902) order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(6 rows) + +select * from range_list where user_no is not null and dept_code <> '2' UNION ALL select * from range_list partition (p_201902) where dept_code in ('2') order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(5 rows) + +select * from range_hash order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(6 rows) + +select * from range_hash where user_no is not null order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(6 rows) + +select * from range_hash where user_no is not null and dept_code = user_no order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 +(3 rows) + +select * from range_hash where user_no is not null and dept_code in ('2') order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(3 rows) + +select * from range_hash where user_no is not null and dept_code <> '2' order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 +(3 rows) + +select * from range_hash partition (p_201901) order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 +(3 rows) + +select * from range_hash partition (p_201902) order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(3 rows) + +select * from range_hash where user_no is not null and dept_code <> '2' UNION ALL select * from range_hash partition (p_201902) order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(6 rows) + +select * from range_hash where user_no is not null and dept_code <> '2' UNION ALL select * from range_hash partition (p_201902) where dept_code in ('2') order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(5 rows) + +select * from range_range order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(6 rows) + +select * from range_range where user_no is not null order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(6 rows) + +select * from range_range where user_no is not null and dept_code = user_no order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 +(3 rows) + +select * from range_range where user_no is not null and dept_code in ('2') order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(3 rows) + +select * from range_range where user_no is not null and dept_code <> '2' order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 +(3 rows) + +select * from range_range partition (p_201901) order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 +(3 rows) + +select * from range_range partition (p_201902) order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(3 rows) + +select * from range_range where user_no is not null and dept_code <> '2' UNION ALL select * from range_range partition (p_201902) order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(6 rows) + +select * from range_range where user_no is not null and dept_code <> '2' UNION ALL select * from range_range partition (p_201902) where dept_code in ('2') order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(5 rows) + +select * from view_temp; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 +(6 rows) + +--error +select * from view_temp partition (p_201901); +ERROR: relation "view_temp" is not partitioned table +DETAIL: N/A. +select * from view_temp partition (p_201902); +ERROR: relation "view_temp" is not partitioned table +DETAIL: N/A. +--join normal table +select * from range_list left join t1 on range_list.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 1 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(6 rows) + +select * from range_list left join t1 on range_list.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(3 rows) + +select * from range_list right join t1 on range_list.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 1 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + | | | | 201901 | 1 + | | | | 201904 | 4 + | | | | 201905 | 5 + | | | | 201906 | 6 + | | | | 201907 | 7 + | | | | 201908 | 8 + | | | | 201909 | 9 + | | | | 201910 | 10 +(14 rows) + +select * from range_list right join t1 on range_list.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(3 rows) + +select * from range_list full join t1 on range_list.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 1 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + | | | | 201901 | 1 + | | | | 201904 | 4 + | | | | 201905 | 5 + | | | | 201906 | 6 + | | | | 201907 | 7 + | | | | 201908 | 8 + | | | | 201909 | 9 + | | | | 201910 | 10 +(14 rows) + +select * from range_list full join t1 on range_list.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(3 rows) + +select * from range_list inner join t1 on range_list.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 1 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(6 rows) + +select * from range_list inner join t1 on range_list.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(3 rows) + +select * from range_hash left join t1 on range_hash.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 1 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(6 rows) + +select * from range_hash left join t1 on range_hash.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(3 rows) + +select * from range_hash right join t1 on range_hash.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 1 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + | | | | 201901 | 1 + | | | | 201904 | 4 + | | | | 201905 | 5 + | | | | 201906 | 6 + | | | | 201907 | 7 + | | | | 201908 | 8 + | | | | 201909 | 9 + | | | | 201910 | 10 +(14 rows) + +select * from range_hash right join t1 on range_hash.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(3 rows) + +select * from range_hash full join t1 on range_hash.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 1 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + | | | | 201901 | 1 + | | | | 201904 | 4 + | | | | 201905 | 5 + | | | | 201906 | 6 + | | | | 201907 | 7 + | | | | 201908 | 8 + | | | | 201909 | 9 + | | | | 201910 | 10 +(14 rows) + +select * from range_hash full join t1 on range_hash.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(3 rows) + +select * from range_hash inner join t1 on range_hash.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 1 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(6 rows) + +select * from range_hash inner join t1 on range_hash.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(3 rows) + +select * from range_range left join t1 on range_range.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 1 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(6 rows) + +select * from range_range left join t1 on range_range.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(3 rows) + +select * from range_range right join t1 on range_range.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 1 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + | | | | 201901 | 1 + | | | | 201904 | 4 + | | | | 201905 | 5 + | | | | 201906 | 6 + | | | | 201907 | 7 + | | | | 201908 | 8 + | | | | 201909 | 9 + | | | | 201910 | 10 +(14 rows) + +select * from range_range right join t1 on range_range.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(3 rows) + +select * from range_range full join t1 on range_range.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 1 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + | | | | 201901 | 1 + | | | | 201904 | 4 + | | | | 201905 | 5 + | | | | 201906 | 6 + | | | | 201907 | 7 + | | | | 201908 | 8 + | | | | 201909 | 9 + | | | | 201910 | 10 +(14 rows) + +select * from range_range full join t1 on range_range.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(3 rows) + +select * from range_range inner join t1 on range_range.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 1 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(6 rows) + +select * from range_range inner join t1 on range_range.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(3 rows) + +--join range_list and range_hash +select * from range_list left join range_hash on range_list.month_code = range_hash.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(18 rows) + +select * from range_list left join range_hash on range_list.month_code = range_hash.month_code where range_list.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(9 rows) + +select * from range_list right join range_hash on range_list.month_code = range_hash.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(18 rows) + +select * from range_list right join range_hash on range_list.month_code = range_hash.month_code where range_list.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(9 rows) + +select * from range_list full join range_hash on range_list.month_code = range_hash.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(18 rows) + +select * from range_list full join range_hash on range_list.month_code = range_hash.month_code where range_list.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(9 rows) + +select * from range_list inner join range_hash on range_list.month_code = range_hash.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(18 rows) + +select * from range_list inner join range_hash on range_list.month_code = range_hash.month_code where range_list.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(9 rows) + +--join range_hash and range_range +select * from range_hash left join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(18 rows) + +select * from range_hash left join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(9 rows) + +select * from range_hash right join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(18 rows) + +select * from range_hash right join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(9 rows) + +select * from range_hash full join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(18 rows) + +select * from range_hash full join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(9 rows) + +select * from range_hash inner join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(18 rows) + +select * from range_hash inner join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(9 rows) + +--join range_hash and range_range +select * from range_hash left join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(18 rows) + +select * from range_hash left join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(9 rows) + +select * from range_hash right join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(18 rows) + +select * from range_hash right join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(9 rows) + +select * from range_hash full join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(18 rows) + +select * from range_hash full join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(9 rows) + +select * from range_hash inner join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(18 rows) + +select * from range_hash inner join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(9 rows) + +select * from pjade subpartition(hrp1_1) union select * from cjade order by 1,2,3; + jid | jn | name +-----+----+------- + 6 | 8 | tom + 8 | 18 | jerry + 16 | 8 | jade + 18 | 20 | jack +(4 rows) + +select * from pjade subpartition(hrp1_1) p union select * from cjade order by 1,2,3; + jid | jn | name +-----+----+------- + 6 | 8 | tom + 8 | 18 | jerry + 16 | 8 | jade + 18 | 20 | jack +(4 rows) + +select * from pjade subpartition(hrp1_1) union select * from cjade order by 1,2,3; + jid | jn | name +-----+----+------- + 6 | 8 | tom + 8 | 18 | jerry + 16 | 8 | jade + 18 | 20 | jack +(4 rows) + +select * from pjade subpartition(hrp1_1) p union select * from cjade order by 1,2,3; + jid | jn | name +-----+----+------- + 6 | 8 | tom + 8 | 18 | jerry + 16 | 8 | jade + 18 | 20 | jack +(4 rows) + diff --git a/src/test/regress/expected/ss_r/segment_subpartition_split.out b/src/test/regress/expected/ss_r/segment_subpartition_split.out new file mode 100644 index 000000000..856a30bbb --- /dev/null +++ b/src/test/regress/expected/ss_r/segment_subpartition_split.out @@ -0,0 +1,59 @@ +SET CURRENT_SCHEMA TO segment_subpartition_split; +select * from list_list subpartition (p_201902_a) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 1 | 1 | 1 +(1 row) + +select * from list_list subpartition (p_201902_b) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 2 | 1 | 1 + 201903 | 3 | 1 | 1 +(2 rows) + +select * from list_list subpartition (p_201902_c) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- +(0 rows) + +select * from range_range subpartition (p_201901_a) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 +(1 row) + +select * from range_range subpartition (p_201901_b) order by 1,2,3,4; +ERROR: subpartition "p_201901_b" of relation "range_range" does not exist +select * from range_range subpartition (p_201901_c) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 +(1 row) + +select * from range_range subpartition (p_201901_d) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 3 | 1 | 1 +(1 row) + +select * from range_range subpartition (p_201902_a) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 1 | 1 | 1 +(1 row) + +select * from range_range subpartition (p_201902_b) order by 1,2,3,4; +ERROR: subpartition "p_201902_b" of relation "range_range" does not exist +select * from range_range subpartition (p_201902_c) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 2 | 1 | 1 +(1 row) + +select * from range_range subpartition (p_201902_d) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 5 | 1 | 1 +(1 row) + diff --git a/src/test/regress/expected/ss_r/segment_subpartition_truncate.out b/src/test/regress/expected/ss_r/segment_subpartition_truncate.out new file mode 100644 index 000000000..e63165b3a --- /dev/null +++ b/src/test/regress/expected/ss_r/segment_subpartition_truncate.out @@ -0,0 +1,11 @@ +SET CURRENT_SCHEMA TO segment_subpartition_truncate; +select * from list_list partition (p_201901); + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- +(0 rows) + +select * from list_list subpartition (p_201902_b); + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- +(0 rows) + diff --git a/src/test/regress/expected/ss_r/trunc_func_for_date.out b/src/test/regress/expected/ss_r/trunc_func_for_date.out new file mode 100644 index 000000000..b9028268c --- /dev/null +++ b/src/test/regress/expected/ss_r/trunc_func_for_date.out @@ -0,0 +1,217 @@ +--- +--- data type 1 : timestamp +--- +-- format can recognize +select trunc(timestamp '2021-08-11 20:19:39', 'cc'); -- century + trunc +-------------------------- + Mon Jan 01 00:00:00 2001 +(1 row) + +select trunc(timestamp '2021-08-11 20:19:39', 'yyyy'); -- year + trunc +-------------------------- + Fri Jan 01 00:00:00 2021 +(1 row) + +select trunc(timestamp '2021-08-11 20:19:39', 'q'); -- quarter + trunc +-------------------------- + Thu Jul 01 00:00:00 2021 +(1 row) + +select trunc(timestamp '2021-08-11 20:19:39', 'mm'); -- month + trunc +-------------------------- + Sun Aug 01 00:00:00 2021 +(1 row) + +select trunc(timestamp '2021-08-11 20:19:39', 'j'); -- day + trunc +-------------------------- + Wed Aug 11 00:00:00 2021 +(1 row) + +select trunc(timestamp '2021-08-11 20:19:39', 'dd'); -- day + trunc +-------------------------- + Wed Aug 11 00:00:00 2021 +(1 row) + +select trunc(timestamp '2021-08-11 20:19:39', 'ddd'); -- day + trunc +-------------------------- + Wed Aug 11 00:00:00 2021 +(1 row) + +select trunc(timestamp '2021-08-11 20:19:39', 'hh'); -- hour + trunc +-------------------------- + Wed Aug 11 20:00:00 2021 +(1 row) + +select trunc(timestamp '2021-08-11 20:19:39', 'mi'); -- minute + trunc +-------------------------- + Wed Aug 11 20:19:00 2021 +(1 row) + +-- format can not recognize +select trunc(timestamp '2021-08-11 20:19:39', 'qq'); -- quarter +ERROR: timestamp units "qq" not recognized +CONTEXT: referenced column: trunc +select trunc(timestamp '2021-08-11 20:19:39', 'mmm'); -- month +ERROR: timestamp units "mmm" not recognized +CONTEXT: referenced column: trunc +select trunc(timestamp '2021-08-11 20:19:39', 'dddd'); -- day +ERROR: timestamp units "dddd" not recognized +CONTEXT: referenced column: trunc +select trunc(timestamp '2021-08-11 20:19:39', 'hhh'); -- hour +ERROR: timestamp units "hhh" not recognized +CONTEXT: referenced column: trunc +--- +--- data type 2 : timestamptz +--- +-- format can recognize +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'cc'); -- century + trunc +------------------------------ + Mon Jan 01 00:00:00 2001 PST +(1 row) + +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'yyyy'); -- year + trunc +------------------------------ + Fri Jan 01 00:00:00 2021 PST +(1 row) + +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'q'); -- quarter + trunc +------------------------------ + Thu Jul 01 00:00:00 2021 PDT +(1 row) + +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'mm'); -- month + trunc +------------------------------ + Sun Aug 01 00:00:00 2021 PDT +(1 row) + +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'j'); -- day + trunc +------------------------------ + Wed Aug 11 00:00:00 2021 PDT +(1 row) + +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'dd'); -- day + trunc +------------------------------ + Wed Aug 11 00:00:00 2021 PDT +(1 row) + +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'ddd'); -- day + trunc +------------------------------ + Wed Aug 11 00:00:00 2021 PDT +(1 row) + +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'hh'); -- hour + trunc +------------------------------ + Wed Aug 11 17:00:00 2021 PDT +(1 row) + +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'mi'); -- minute + trunc +------------------------------ + Wed Aug 11 17:48:00 2021 PDT +(1 row) + +-- format can't recognize +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'qq'); -- quarter +ERROR: timestamp with time zone units "qq" not recognized +CONTEXT: referenced column: trunc +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'mmm'); -- month +ERROR: timestamp with time zone units "mmm" not recognized +CONTEXT: referenced column: trunc +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'dddd'); -- day +ERROR: timestamp with time zone units "dddd" not recognized +CONTEXT: referenced column: trunc +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'hhh'); -- hour +ERROR: timestamp with time zone units "hhh" not recognized +CONTEXT: referenced column: trunc +--- +--- data type 3 : interval +--- +-- format can recognize +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'cc'); -- century + trunc +------- + @ 0 +(1 row) + +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'yyyy'); -- year + trunc +----------- + @ 2 years +(1 row) + +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'q'); -- quarter + trunc +------------------ + @ 2 years 3 mons +(1 row) + +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'mm'); -- month + trunc +------------------ + @ 2 years 3 mons +(1 row) + +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'j'); -- day + trunc +------------------------- + @ 2 years 3 mons 4 days +(1 row) + +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'dd'); -- day + trunc +------------------------- + @ 2 years 3 mons 4 days +(1 row) + +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'ddd'); -- day + trunc +------------------------- + @ 2 years 3 mons 4 days +(1 row) + +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'hh'); -- hour + trunc +--------------------------------- + @ 2 years 3 mons 4 days 5 hours +(1 row) + +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'mi'); -- minute + trunc +---------------------------------------- + @ 2 years 3 mons 4 days 5 hours 6 mins +(1 row) + +-- format can not recognize +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'qq'); -- quarter +ERROR: interval units "qq" not recognized +CONTEXT: referenced column: trunc +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'mmm'); -- month +ERROR: interval units "mmm" not recognized +CONTEXT: referenced column: trunc +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'dddd'); -- day +ERROR: interval units "dddd" not recognized +CONTEXT: referenced column: trunc +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'hhh'); -- hour +ERROR: interval units "hhh" not recognized +CONTEXT: referenced column: trunc +-- not supported +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'w'); -- week +ERROR: interval units "w" not supported +CONTEXT: referenced column: trunc diff --git a/src/test/regress/expected/ss_unsupported_hasuids_table.out b/src/test/regress/expected/ss_unsupported_hasuids_table.out new file mode 100644 index 000000000..175388d5f --- /dev/null +++ b/src/test/regress/expected/ss_unsupported_hasuids_table.out @@ -0,0 +1,10 @@ +drop table if exists hasuids_standby_t1; +NOTICE: table "hasuids_standby_t1" does not exist, skipping +CREATE TABLE hasuids_standby_t1 (id int,num int) with (segment=on); +alter table hasuids_standby_t1 set (hasuids=on); +ERROR: table under Shared Storage cannot add or modify hasuids by ALTER TABLE command. +drop table if exists hasuids_standby_t1; +CREATE TABLE hasuids_standby_t1 (id int,num int) with (segment=on,hasuids=on); +ERROR: hasuids is not supported under Shared Storage. +drop table if exists hasuids_standby_t1; +NOTICE: table "hasuids_standby_t1" does not exist, skipping diff --git a/src/test/regress/expected/ss_wr/replace_func_with_two_args.out b/src/test/regress/expected/ss_wr/replace_func_with_two_args.out new file mode 100644 index 000000000..95123fd59 --- /dev/null +++ b/src/test/regress/expected/ss_wr/replace_func_with_two_args.out @@ -0,0 +1,111 @@ +-- +-- replace function with two arguments +-- +select replace('string', ''); + replace +--------- + string +(1 row) + +select replace('string', 'i'); + replace +--------- + strng +(1 row) + +select replace('string', 'in'); + replace +--------- + strg +(1 row) + +select replace('string', 'ing'); + replace +--------- + str +(1 row) + +select replace('', 'ing'); + replace +--------- + +(1 row) + +select replace(NULL, 'ing'); + replace +--------- + +(1 row) + +select replace('ing', ''); + replace +--------- + ing +(1 row) + +select replace('ing', NULL); + replace +--------- + ing +(1 row) + +select replace('', ''); + replace +--------- + +(1 row) + +select replace(NULL, NULL); + replace +--------- + +(1 row) + +select replace(123, '1'); + replace +--------- + 23 +(1 row) + +select replace('123', 1); + replace +--------- + 23 +(1 row) + +select replace(123, 1); + replace +--------- + 23 +(1 row) + +select replace('abc\nabc', '\n'); + replace +--------- + abcabc +(1 row) + +select replace('abc\nabc', E'\n'); + replace +---------- + abc\nabc +(1 row) + +select replace(E'abc\nabc', E'\n'); + replace +--------- + abcabc +(1 row) + +select replace('~!@#$%^&*()', '!@'); + replace +----------- + ~#$%^&*() +(1 row) + +select replace('高斯', '高'); + replace +--------- + 斯 +(1 row) + diff --git a/src/test/regress/expected/ss_wr/segment_subpartition_analyze_vacuum.out b/src/test/regress/expected/ss_wr/segment_subpartition_analyze_vacuum.out new file mode 100644 index 000000000..cb82f50cb --- /dev/null +++ b/src/test/regress/expected/ss_wr/segment_subpartition_analyze_vacuum.out @@ -0,0 +1,57 @@ +-- prepare +DROP SCHEMA segment_subpartition_analyze_vacuum CASCADE; +ERROR: schema "segment_subpartition_analyze_vacuum" does not exist +CREATE SCHEMA segment_subpartition_analyze_vacuum; +SET CURRENT_SCHEMA TO segment_subpartition_analyze_vacuum; +-- base function +CREATE TABLE range_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a values ('1'), + SUBPARTITION p_201901_b values ('2') + ), + PARTITION p_201902 VALUES LESS THAN( '201910' ) + ( + SUBPARTITION p_201902_a values ('1'), + SUBPARTITION p_201902_b values ('2') + ) +); +create index idx_month_code_local on range_list(month_code) local; +create index idx_dept_code_global on range_list(dept_code) global; +create index idx_user_no_global on range_list(user_no) global; +insert into range_list values('201902', '1', '1', 1); +insert into range_list values('201902', '2', '1', 1); +insert into range_list values('201902', '1', '1', 1); +insert into range_list values('201903', '2', '2', 1); +insert into range_list values('201903', '1', '1', 1); +insert into range_list values('201903', '2', '1', 1); +select * from range_list order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 2 | 1 +(6 rows) + +delete from range_list where month_code = '201902'; +select * from range_list order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 2 | 1 +(3 rows) + +analyze range_list; +analyze range_list partition (p_201901); diff --git a/src/test/regress/expected/ss_wr/segment_subpartition_gpi.out b/src/test/regress/expected/ss_wr/segment_subpartition_gpi.out new file mode 100644 index 000000000..ff1355e0f --- /dev/null +++ b/src/test/regress/expected/ss_wr/segment_subpartition_gpi.out @@ -0,0 +1,46 @@ +-- prepare +DROP SCHEMA segment_subpartition_gpi CASCADE; +ERROR: schema "segment_subpartition_gpi" does not exist +CREATE SCHEMA segment_subpartition_gpi; +SET CURRENT_SCHEMA TO segment_subpartition_gpi; +-- base function +CREATE TABLE range_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a values ('1'), + SUBPARTITION p_201901_b values ('2') + ), + PARTITION p_201902 VALUES LESS THAN( '201910' ) + ( + SUBPARTITION p_201902_a values ('1'), + SUBPARTITION p_201902_b values ('2') + ) +); +create index idx_month_code_local on range_list(month_code) local; +create index idx_dept_code_global on range_list(dept_code) global; +create index idx_user_no_global on range_list(user_no) global; +insert into range_list values('201902', '1', '1', 1); +insert into range_list values('201902', '2', '1', 1); +insert into range_list values('201902', '1', '1', 1); +insert into range_list values('201903', '2', '2', 1); +insert into range_list values('201903', '1', '1', 1); +insert into range_list values('201903', '2', '1', 1); +select * from range_list order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 2 | 1 +(6 rows) + diff --git a/src/test/regress/expected/ss_wr/segment_subpartition_scan.out b/src/test/regress/expected/ss_wr/segment_subpartition_scan.out new file mode 100644 index 000000000..a2e5bbbc8 --- /dev/null +++ b/src/test/regress/expected/ss_wr/segment_subpartition_scan.out @@ -0,0 +1,317 @@ +--prepare +DROP SCHEMA segment_subpartition_scan CASCADE; +ERROR: schema "segment_subpartition_scan" does not exist +CREATE SCHEMA segment_subpartition_scan; +SET CURRENT_SCHEMA TO segment_subpartition_scan; +--scan +CREATE TABLE range_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a values ('1'), + SUBPARTITION p_201901_b values ('2') + ), + PARTITION p_201902 VALUES LESS THAN( '201910' ) + ( + SUBPARTITION p_201902_a values ('1'), + SUBPARTITION p_201902_b values ('2') + ) +); +insert into range_list values('201902', '1', '1', 1); +insert into range_list values('201902', '2', '1', 1); +insert into range_list values('201902', '1', '1', 1); +insert into range_list values('201903', '2', '1', 1); +insert into range_list values('201903', '1', '1', 1); +insert into range_list values('201903', '2', '1', 1); +explain(costs off, verbose on) select * from range_list order by 1, 2, 3, 4; + QUERY PLAN +--------------------------------------------------------------------------------------------------- + Sort + Output: month_code, dept_code, user_no, sales_amt + Sort Key: range_list.month_code, range_list.dept_code, range_list.user_no, range_list.sales_amt + -> Partition Iterator + Output: month_code, dept_code, user_no, sales_amt + Iterations: 2, Sub Iterations: 4 + -> Partitioned Seq Scan on segment_subpartition_scan.range_list + Output: month_code, dept_code, user_no, sales_amt + Selected Partitions: 1..2 + Selected Subpartitions: ALL +(10 rows) + +select * from range_list order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(6 rows) + +create index idx_month_code on range_list(month_code) local; +create index idx_dept_code on range_list(dept_code) local; +create index idx_user_no on range_list(user_no) local; +set enable_seqscan = off; +explain(costs off, verbose on) select * from range_list where month_code = '201902' order by 1, 2, 3, 4; + QUERY PLAN +---------------------------------------------------------------------------------- + Sort + Output: month_code, dept_code, user_no, sales_amt + Sort Key: range_list.dept_code, range_list.user_no, range_list.sales_amt + -> Partition Iterator + Output: month_code, dept_code, user_no, sales_amt + Iterations: 1, Sub Iterations: 2 + -> Partitioned Bitmap Heap Scan on segment_subpartition_scan.range_list + Output: month_code, dept_code, user_no, sales_amt + Recheck Cond: ((range_list.month_code)::text = '201902'::text) + Selected Partitions: 1 + Selected Subpartitions: ALL + -> Partitioned Bitmap Index Scan on idx_month_code + Index Cond: ((range_list.month_code)::text = '201902'::text) + Selected Partitions: 1 + Selected Subpartitions: ALL +(15 rows) + +select * from range_list where month_code = '201902' order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 +(3 rows) + +explain(costs off, verbose on) select * from range_list where dept_code = '1' order by 1, 2, 3, 4; + QUERY PLAN +---------------------------------------------------------------------------------- + Sort + Output: month_code, dept_code, user_no, sales_amt + Sort Key: range_list.month_code, range_list.user_no, range_list.sales_amt + -> Partition Iterator + Output: month_code, dept_code, user_no, sales_amt + Iterations: 2, Sub Iterations: 2 + -> Partitioned Bitmap Heap Scan on segment_subpartition_scan.range_list + Output: month_code, dept_code, user_no, sales_amt + Recheck Cond: ((range_list.dept_code)::text = '1'::text) + Selected Partitions: 1..2 + Selected Subpartitions: 1:1, 2:1 + -> Partitioned Bitmap Index Scan on idx_dept_code + Index Cond: ((range_list.dept_code)::text = '1'::text) + Selected Partitions: 1..2 + Selected Subpartitions: 1:1, 2:1 +(15 rows) + +select * from range_list where dept_code = '1' order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 +(3 rows) + +explain(costs off, verbose on) select * from range_list where user_no = '1' order by 1, 2, 3, 4; + QUERY PLAN +---------------------------------------------------------------------------------- + Sort + Output: month_code, dept_code, user_no, sales_amt + Sort Key: range_list.month_code, range_list.dept_code, range_list.sales_amt + -> Partition Iterator + Output: month_code, dept_code, user_no, sales_amt + Iterations: 2, Sub Iterations: 4 + -> Partitioned Bitmap Heap Scan on segment_subpartition_scan.range_list + Output: month_code, dept_code, user_no, sales_amt + Recheck Cond: ((range_list.user_no)::text = '1'::text) + Selected Partitions: 1..2 + Selected Subpartitions: ALL + -> Partitioned Bitmap Index Scan on idx_user_no + Index Cond: ((range_list.user_no)::text = '1'::text) + Selected Partitions: 1..2 + Selected Subpartitions: ALL +(15 rows) + +select * from range_list where user_no = '1' order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(6 rows) + +set enable_bitmapscan = off; +explain(costs off, verbose on) select * from range_list where month_code = '201902' order by 1, 2, 3, 4; + QUERY PLAN +------------------------------------------------------------------------------------------------- + Sort + Output: month_code, dept_code, user_no, sales_amt + Sort Key: range_list.dept_code, range_list.user_no, range_list.sales_amt + -> Partition Iterator + Output: month_code, dept_code, user_no, sales_amt + Iterations: 1, Sub Iterations: 2 + -> Partitioned Index Scan using idx_month_code on segment_subpartition_scan.range_list + Output: month_code, dept_code, user_no, sales_amt + Index Cond: ((range_list.month_code)::text = '201902'::text) + Selected Partitions: 1 + Selected Subpartitions: ALL +(11 rows) + +select * from range_list where month_code = '201902' order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 +(3 rows) + +explain(costs off, verbose on) select * from range_list where dept_code = '1' order by 1, 2, 3, 4; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Sort + Output: month_code, dept_code, user_no, sales_amt + Sort Key: range_list.month_code, range_list.user_no, range_list.sales_amt + -> Partition Iterator + Output: month_code, dept_code, user_no, sales_amt + Iterations: 2, Sub Iterations: 2 + -> Partitioned Index Scan using idx_dept_code on segment_subpartition_scan.range_list + Output: month_code, dept_code, user_no, sales_amt + Index Cond: ((range_list.dept_code)::text = '1'::text) + Selected Partitions: 1..2 + Selected Subpartitions: 1:1, 2:1 +(11 rows) + +select * from range_list where dept_code = '1' order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 +(3 rows) + +explain(costs off, verbose on) select * from range_list where user_no = '1' order by 1, 2, 3, 4; + QUERY PLAN +---------------------------------------------------------------------------------------------- + Sort + Output: month_code, dept_code, user_no, sales_amt + Sort Key: range_list.month_code, range_list.dept_code, range_list.sales_amt + -> Partition Iterator + Output: month_code, dept_code, user_no, sales_amt + Iterations: 2, Sub Iterations: 4 + -> Partitioned Index Scan using idx_user_no on segment_subpartition_scan.range_list + Output: month_code, dept_code, user_no, sales_amt + Index Cond: ((range_list.user_no)::text = '1'::text) + Selected Partitions: 1..2 + Selected Subpartitions: ALL +(11 rows) + +select * from range_list where user_no = '1' order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(6 rows) + +reset enable_seqscan; +reset enable_bitmapscan; +create table range_range_jade(jid int,jn int,name varchar2) WITH (SEGMENT=ON) partition by range (jid) subpartition by range(jn) +( + partition hrp1 values less than(16)( + subpartition hrp1_1 values less than(16), +subpartition hrp1_2 values less than(26), +subpartition hrp1_3 values less than(36), + subpartition hrp1_4 values less than(maxvalue)), + partition hrp2 values less than(26)( + subpartition hrp2_1 values less than(maxvalue)), + partition hrp3 values less than(36)( + subpartition hrp3_1 values less than(16), +subpartition hrp3_2 values less than(26), + subpartition hrp3_3 values less than(maxvalue)), + partition hrp4 values less than(maxvalue)( + subpartition hrp4_1 values less than(16), + subpartition hrp4_2 values less than(maxvalue)) +)ENABLE ROW MOVEMENT; +-- no errors +set enable_partition_opfusion = on; +insert into range_range_jade values(1,2,'jade'); +reset enable_partition_opfusion; +CREATE TABLE IF NOT EXISTS list_range_02 +( + col_1 int , + col_2 int, +col_3 VARCHAR2 ( 30 ) , + col_4 int +) WITH (SEGMENT=ON) +PARTITION BY list (col_1) SUBPARTITION BY range (col_2) +( + PARTITION p_list_1 VALUES(-1,-2,-3,-4,-5,-6,-7,-8,-9,-10 ) + ( + SUBPARTITION p_range_1_1 VALUES LESS THAN( -10 ), + SUBPARTITION p_range_1_2 VALUES LESS THAN( 0 ), + SUBPARTITION p_range_1_3 VALUES LESS THAN( 10 ), + SUBPARTITION p_range_1_4 VALUES LESS THAN( 20 ), + SUBPARTITION p_range_1_5 VALUES LESS THAN( 50 ) + ), + PARTITION p_list_2 VALUES(1,2,3,4,5,6,7,8,9,10 ), + PARTITION p_list_3 VALUES(11,12,13,14,15,16,17,18,19,20) + ( + SUBPARTITION p_range_3_1 VALUES LESS THAN( 15 ), + SUBPARTITION p_range_3_2 VALUES LESS THAN( MAXVALUE ) + ), + PARTITION p_list_4 VALUES(21,22,23,24,25,26,27,28,29,30) + ( + SUBPARTITION p_range_4_1 VALUES LESS THAN( -10 ), + SUBPARTITION p_range_4_2 VALUES LESS THAN( 0 ), + SUBPARTITION p_range_4_3 VALUES LESS THAN( 10 ), + SUBPARTITION p_range_4_4 VALUES LESS THAN( 20 ), + SUBPARTITION p_range_4_5 VALUES LESS THAN( 50 ) + ), + PARTITION p_list_5 VALUES(31,32,33,34,35,36,37,38,39,40) + ( + SUBPARTITION p_range_5_1 VALUES LESS THAN( MAXVALUE ) + ), + PARTITION p_list_6 VALUES(41,42,43,44,45,46,47,48,49,50) + ( + SUBPARTITION p_range_6_1 VALUES LESS THAN( -10 ), + SUBPARTITION p_range_6_2 VALUES LESS THAN( 0 ), + SUBPARTITION p_range_6_3 VALUES LESS THAN( 10 ), + SUBPARTITION p_range_6_4 VALUES LESS THAN( 20 ), + SUBPARTITION p_range_6_5 VALUES LESS THAN( 50 ) + ), + PARTITION p_list_7 VALUES(default) +) ENABLE ROW MOVEMENT; +create index index_01 on list_range_02(col_2) local ; +explain (costs off) select * from list_range_02 where col_2 in + (select col_1 from list_range_02 subpartition(p_list_2_subpartdefault1) + where col_1 >10 and col_1 <100) and col_1 +col_2 =50 and col_2 in (100,200,300 ); + QUERY PLAN +---------------------------------------------------------------------------------------------------------------- + Nested Loop Semi Join + Join Filter: (segment_subpartition_scan.list_range_02.col_2 = segment_subpartition_scan.list_range_02.col_1) + -> Partition Iterator + Iterations: 4, Sub Iterations: 4 + -> Partitioned Index Scan using index_01 on list_range_02 + Index Cond: ((col_2 = ANY ('{100,200,300}'::integer[])) AND (col_2 > 10) AND (col_2 < 100)) + Filter: ((col_1 + col_2) = 50) + Selected Partitions: 1,3,5..6 + Selected Subpartitions: 1:1, 3:1, 5:1, 6:1 + -> Materialize + -> Partition Iterator + Iterations: 1, Sub Iterations: 1 + -> Partitioned Seq Scan on list_range_02 + Filter: ((col_1 > 10) AND (col_1 < 100) AND (col_1 = ANY ('{100,200,300}'::integer[]))) + Selected Partitions: 6 + Selected Subpartitions: ALL +(16 rows) + diff --git a/src/test/regress/expected/ss_wr/segment_subpartition_select.out b/src/test/regress/expected/ss_wr/segment_subpartition_select.out new file mode 100644 index 000000000..1e557862e --- /dev/null +++ b/src/test/regress/expected/ss_wr/segment_subpartition_select.out @@ -0,0 +1,1221 @@ +--prepare +DROP SCHEMA segment_subpartition_select CASCADE; +ERROR: schema "segment_subpartition_select" does not exist +CREATE SCHEMA segment_subpartition_select; +SET CURRENT_SCHEMA TO segment_subpartition_select; +--select +CREATE TABLE t1 +( + c1 int, + c2 int +) WITH (SEGMENT=ON); +insert into t1 values(generate_series(201901,201910), generate_series(1,10)); +CREATE TABLE range_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a values ('1'), + SUBPARTITION p_201901_b values ('2') + ), + PARTITION p_201902 VALUES LESS THAN( '201910' ) + ( + SUBPARTITION p_201902_a values ('1'), + SUBPARTITION p_201902_b values ('2') + ) +); +insert into range_list values('201902', '1', '1', 1); +insert into range_list values('201902', '2', '1', 1); +insert into range_list values('201902', '1', '1', 1); +insert into range_list values('201902', '3', '1', 1); +ERROR: inserted partition key does not map to any table partition +insert into range_list values('201903', '2', '1', 1); +insert into range_list values('201903', '1', '1', 1); +insert into range_list values('201903', '2', '1', 1); +insert into range_list values('201903', '3', '1', 1); +ERROR: inserted partition key does not map to any table partition +select * from range_list order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(6 rows) + +select * from range_list where user_no is not null order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(6 rows) + +select * from range_list where user_no is not null and dept_code = user_no order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 +(3 rows) + +select * from range_list where user_no is not null and dept_code in ('2') order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(3 rows) + +select * from range_list where user_no is not null and dept_code <> '2' order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 +(3 rows) + +select * from range_list partition (p_201901) order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 +(3 rows) + +select * from range_list partition (p_201902) order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(3 rows) + +select * from range_list where user_no is not null and dept_code <> '2' UNION ALL select * from range_list partition (p_201902) order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(6 rows) + +select * from range_list where user_no is not null and dept_code <> '2' UNION ALL select * from range_list partition (p_201902) where dept_code in ('2') order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(5 rows) + +CREATE TABLE range_hash +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY HASH (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a, + SUBPARTITION p_201901_b + ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) + ( + SUBPARTITION p_201902_a, + SUBPARTITION p_201902_b + ) +); +insert into range_hash values('201902', '1', '1', 1); +insert into range_hash values('201902', '2', '1', 1); +insert into range_hash values('201902', '1', '1', 1); +insert into range_hash values('201903', '2', '1', 1); +insert into range_hash values('201903', '1', '1', 1); +insert into range_hash values('201903', '2', '1', 1); +select * from range_hash order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(6 rows) + +select * from range_hash where user_no is not null order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(6 rows) + +select * from range_hash where user_no is not null and dept_code = user_no order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 +(3 rows) + +select * from range_hash where user_no is not null and dept_code in ('2') order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(3 rows) + +select * from range_hash where user_no is not null and dept_code <> '2' order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 +(3 rows) + +select * from range_hash partition (p_201901) order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 +(3 rows) + +select * from range_hash partition (p_201902) order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(3 rows) + +select * from range_hash where user_no is not null and dept_code <> '2' UNION ALL select * from range_hash partition (p_201902) order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(6 rows) + +select * from range_hash where user_no is not null and dept_code <> '2' UNION ALL select * from range_hash partition (p_201902) where dept_code in ('2') order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(5 rows) + +CREATE TABLE range_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201901_b VALUES LESS THAN( '3' ) + ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) + ( + SUBPARTITION p_201902_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201902_b VALUES LESS THAN( '3' ) + ) +); +insert into range_range values('201902', '1', '1', 1); +insert into range_range values('201902', '2', '1', 1); +insert into range_range values('201902', '1', '1', 1); +insert into range_range values('201903', '2', '1', 1); +insert into range_range values('201903', '1', '1', 1); +insert into range_range values('201903', '2', '1', 1); +select * from range_range order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(6 rows) + +select * from range_range where user_no is not null order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(6 rows) + +select * from range_range where user_no is not null and dept_code = user_no order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 +(3 rows) + +select * from range_range where user_no is not null and dept_code in ('2') order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(3 rows) + +select * from range_range where user_no is not null and dept_code <> '2' order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 +(3 rows) + +select * from range_range partition (p_201901) order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 +(3 rows) + +select * from range_range partition (p_201902) order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(3 rows) + +select * from range_range where user_no is not null and dept_code <> '2' UNION ALL select * from range_range partition (p_201902) order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(6 rows) + +select * from range_range where user_no is not null and dept_code <> '2' UNION ALL select * from range_range partition (p_201902) where dept_code in ('2') order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(5 rows) + +--view +create view view_temp as select * from range_list; +select * from view_temp; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 +(6 rows) + +--error +select * from view_temp partition (p_201901); +ERROR: relation "view_temp" is not partitioned table +DETAIL: N/A. +select * from view_temp partition (p_201902); +ERROR: relation "view_temp" is not partitioned table +DETAIL: N/A. +with tmp1 as (select * from range_list ) select * from tmp1 order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(6 rows) + +with tmp1 as (select * from range_list partition (p_201901)) select * from tmp1 order by 1, 2, 3, 4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 +(3 rows) + +--join normal table +select * from range_list left join t1 on range_list.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 1 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(6 rows) + +select * from range_list left join t1 on range_list.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(3 rows) + +select * from range_list right join t1 on range_list.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 1 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + | | | | 201901 | 1 + | | | | 201904 | 4 + | | | | 201905 | 5 + | | | | 201906 | 6 + | | | | 201907 | 7 + | | | | 201908 | 8 + | | | | 201909 | 9 + | | | | 201910 | 10 +(14 rows) + +select * from range_list right join t1 on range_list.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(3 rows) + +select * from range_list full join t1 on range_list.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 1 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + | | | | 201901 | 1 + | | | | 201904 | 4 + | | | | 201905 | 5 + | | | | 201906 | 6 + | | | | 201907 | 7 + | | | | 201908 | 8 + | | | | 201909 | 9 + | | | | 201910 | 10 +(14 rows) + +select * from range_list full join t1 on range_list.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(3 rows) + +select * from range_list inner join t1 on range_list.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 1 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(6 rows) + +select * from range_list inner join t1 on range_list.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(3 rows) + +select * from range_hash left join t1 on range_hash.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 1 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(6 rows) + +select * from range_hash left join t1 on range_hash.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(3 rows) + +select * from range_hash right join t1 on range_hash.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 1 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + | | | | 201901 | 1 + | | | | 201904 | 4 + | | | | 201905 | 5 + | | | | 201906 | 6 + | | | | 201907 | 7 + | | | | 201908 | 8 + | | | | 201909 | 9 + | | | | 201910 | 10 +(14 rows) + +select * from range_hash right join t1 on range_hash.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(3 rows) + +select * from range_hash full join t1 on range_hash.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 1 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + | | | | 201901 | 1 + | | | | 201904 | 4 + | | | | 201905 | 5 + | | | | 201906 | 6 + | | | | 201907 | 7 + | | | | 201908 | 8 + | | | | 201909 | 9 + | | | | 201910 | 10 +(14 rows) + +select * from range_hash full join t1 on range_hash.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(3 rows) + +select * from range_hash inner join t1 on range_hash.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 1 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(6 rows) + +select * from range_hash inner join t1 on range_hash.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(3 rows) + +select * from range_range left join t1 on range_range.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 1 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(6 rows) + +select * from range_range left join t1 on range_range.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(3 rows) + +select * from range_range right join t1 on range_range.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 1 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + | | | | 201901 | 1 + | | | | 201904 | 4 + | | | | 201905 | 5 + | | | | 201906 | 6 + | | | | 201907 | 7 + | | | | 201908 | 8 + | | | | 201909 | 9 + | | | | 201910 | 10 +(14 rows) + +select * from range_range right join t1 on range_range.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(3 rows) + +select * from range_range full join t1 on range_range.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 1 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + | | | | 201901 | 1 + | | | | 201904 | 4 + | | | | 201905 | 5 + | | | | 201906 | 6 + | | | | 201907 | 7 + | | | | 201908 | 8 + | | | | 201909 | 9 + | | | | 201910 | 10 +(14 rows) + +select * from range_range full join t1 on range_range.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(3 rows) + +select * from range_range inner join t1 on range_range.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 1 | 1 | 1 | 201902 | 2 + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 1 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(6 rows) + +select * from range_range inner join t1 on range_range.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + month_code | dept_code | user_no | sales_amt | c1 | c2 +------------+-----------+---------+-----------+--------+---- + 201902 | 2 | 1 | 1 | 201902 | 2 + 201903 | 2 | 1 | 1 | 201903 | 3 + 201903 | 2 | 1 | 1 | 201903 | 3 +(3 rows) + +--join range_list and range_hash +select * from range_list left join range_hash on range_list.month_code = range_hash.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(18 rows) + +select * from range_list left join range_hash on range_list.month_code = range_hash.month_code where range_list.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(9 rows) + +select * from range_list right join range_hash on range_list.month_code = range_hash.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(18 rows) + +select * from range_list right join range_hash on range_list.month_code = range_hash.month_code where range_list.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(9 rows) + +select * from range_list full join range_hash on range_list.month_code = range_hash.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(18 rows) + +select * from range_list full join range_hash on range_list.month_code = range_hash.month_code where range_list.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(9 rows) + +select * from range_list inner join range_hash on range_list.month_code = range_hash.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(18 rows) + +select * from range_list inner join range_hash on range_list.month_code = range_hash.month_code where range_list.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(9 rows) + +--join range_hash and range_range +select * from range_hash left join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(18 rows) + +select * from range_hash left join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(9 rows) + +select * from range_hash right join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(18 rows) + +select * from range_hash right join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(9 rows) + +select * from range_hash full join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(18 rows) + +select * from range_hash full join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(9 rows) + +select * from range_hash inner join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(18 rows) + +select * from range_hash inner join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(9 rows) + +--join range_hash and range_range +select * from range_hash left join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(18 rows) + +select * from range_hash left join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(9 rows) + +select * from range_hash right join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(18 rows) + +select * from range_hash right join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(9 rows) + +select * from range_hash full join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(18 rows) + +select * from range_hash full join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(9 rows) + +select * from range_hash inner join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 | 201902 | 2 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(18 rows) + +select * from range_hash inner join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + month_code | dept_code | user_no | sales_amt | month_code | dept_code | user_no | sales_amt +------------+-----------+---------+-----------+------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 | 201902 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 | 201903 | 2 | 1 | 1 +(9 rows) + +CREATE TABLE IF NOT EXISTS list_range_02 +( + col_1 int , + col_2 int, + col_3 VARCHAR2 ( 30 ) , + col_4 int +) WITH (SEGMENT=ON) +PARTITION BY list (col_1) SUBPARTITION BY range (col_2) +( + PARTITION p_list_1 VALUES(-1,-2,-3,-4,-5,-6,-7,-8,-9,-10 ) + ( + SUBPARTITION p_range_1_1 VALUES LESS THAN( -10 ), + SUBPARTITION p_range_1_2 VALUES LESS THAN( 0 ), + SUBPARTITION p_range_1_3 VALUES LESS THAN( 10 ), + SUBPARTITION p_range_1_4 VALUES LESS THAN( 20 ), + SUBPARTITION p_range_1_5 VALUES LESS THAN( 50 ) + ), + PARTITION p_list_2 VALUES(1,2,3,4,5,6,7,8,9,10 ), + PARTITION p_list_3 VALUES(11,12,13,14,15,16,17,18,19,20) + ( + SUBPARTITION p_range_3_1 VALUES LESS THAN( 15 ), + SUBPARTITION p_range_3_2 VALUES LESS THAN( MAXVALUE ) + ), + PARTITION p_list_4 VALUES(21,22,23,24,25,26,27,28,29,30) + ( + SUBPARTITION p_range_4_1 VALUES LESS THAN( -10 ), + SUBPARTITION p_range_4_2 VALUES LESS THAN( 0 ), + SUBPARTITION p_range_4_3 VALUES LESS THAN( 10 ), + SUBPARTITION p_range_4_4 VALUES LESS THAN( 20 ), + SUBPARTITION p_range_4_5 VALUES LESS THAN( 50 ) + ), + PARTITION p_list_5 VALUES(31,32,33,34,35,36,37,38,39,40) + ( + SUBPARTITION p_range_5_1 VALUES LESS THAN( MAXVALUE ) + ), + PARTITION p_list_6 VALUES(41,42,43,44,45,46,47,48,49,50) + ( + SUBPARTITION p_range_6_1 VALUES LESS THAN( -10 ), + SUBPARTITION p_range_6_2 VALUES LESS THAN( 0 ), + SUBPARTITION p_range_6_3 VALUES LESS THAN( 10 ), + SUBPARTITION p_range_6_4 VALUES LESS THAN( 20 ), + SUBPARTITION p_range_6_5 VALUES LESS THAN( 50 ) + ), + PARTITION p_list_7 VALUES(default) +) ENABLE ROW MOVEMENT; +create index index_01 on list_range_02(col_2) local ; +INSERT INTO list_range_02 VALUES (GENERATE_SERIES(0, 19),GENERATE_SERIES(0, 1000),GENERATE_SERIES(0, 99)); + explain (costs off, verbose on) select * from list_range_02 where col_2 >500 and col_2 <8000 order by col_1; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Sort + Output: col_1, col_2, col_3, col_4 + Sort Key: list_range_02.col_1 + -> Partition Iterator + Output: col_1, col_2, col_3, col_4 + Iterations: 4, Sub Iterations: 4 + -> Partitioned Bitmap Heap Scan on segment_subpartition_select.list_range_02 + Output: col_1, col_2, col_3, col_4 + Recheck Cond: ((list_range_02.col_2 > 500) AND (list_range_02.col_2 < 8000)) + Selected Partitions: 1,3,5..6 + Selected Subpartitions: 1:1, 3:1, 5:1, 6:1 + -> Partitioned Bitmap Index Scan on index_01 + Index Cond: ((list_range_02.col_2 > 500) AND (list_range_02.col_2 < 8000)) + Selected Partitions: 1,3,5..6 + Selected Subpartitions: 1:1, 3:1, 5:1, 6:1 +(15 rows) + +create table pjade(jid int,jn int,name varchar2) WITH (SEGMENT=ON) partition by range(jid) subpartition by range(jn) +( + partition hrp1 values less than(16)( + subpartition hrp1_1 values less than(16), + subpartition hrp1_2 values less than(maxvalue)), + partition hrp2 values less than(maxvalue)( + subpartition hrp3_1 values less than(16), + subpartition hrp3_3 values less than(maxvalue)) +); +create table cjade(jid int,jn int,name varchar2) WITH (SEGMENT=ON); +insert into pjade values(6,8,'tom'),(8,18,'jerry'),(16,8,'jade'),(18,20,'jack'); +insert into cjade values(6,8,'tom'),(8,18,'jerry'),(16,8,'jade'),(18,20,'jack'); +select * from pjade subpartition(hrp1_1) union select * from cjade order by 1,2,3; + jid | jn | name +-----+----+------- + 6 | 8 | tom + 8 | 18 | jerry + 16 | 8 | jade + 18 | 20 | jack +(4 rows) + +select * from pjade subpartition(hrp1_1) p union select * from cjade order by 1,2,3; + jid | jn | name +-----+----+------- + 6 | 8 | tom + 8 | 18 | jerry + 16 | 8 | jade + 18 | 20 | jack +(4 rows) + +select * from pjade subpartition(hrp1_1) union select * from cjade order by 1,2,3; + jid | jn | name +-----+----+------- + 6 | 8 | tom + 8 | 18 | jerry + 16 | 8 | jade + 18 | 20 | jack +(4 rows) + +select * from pjade subpartition(hrp1_1) p union select * from cjade order by 1,2,3; + jid | jn | name +-----+----+------- + 6 | 8 | tom + 8 | 18 | jerry + 16 | 8 | jade + 18 | 20 | jack +(4 rows) + diff --git a/src/test/regress/expected/ss_wr/segment_subpartition_split.out b/src/test/regress/expected/ss_wr/segment_subpartition_split.out new file mode 100644 index 000000000..9c13c4b75 --- /dev/null +++ b/src/test/regress/expected/ss_wr/segment_subpartition_split.out @@ -0,0 +1,357 @@ +--prepare +DROP SCHEMA segment_subpartition_split CASCADE; +ERROR: schema "segment_subpartition_split" does not exist +CREATE SCHEMA segment_subpartition_split; +SET CURRENT_SCHEMA TO segment_subpartition_split; +--split subpartition +-- list subpartition +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( default ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( default ) + ) +); +insert into list_list values('201902', '1', '1', 1); +insert into list_list values('201902', '2', '1', 1); +insert into list_list values('201902', '1', '1', 1); +insert into list_list values('201903', '1', '1', 1); +insert into list_list values('201903', '2', '1', 1); +insert into list_list values('201903', '3', '1', 1); +select * from list_list order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 3 | 1 | 1 +(6 rows) + +select * from list_list subpartition (p_201901_a) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 +(2 rows) + +select * from list_list subpartition (p_201901_b) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 +(1 row) + +alter table list_list split subpartition p_201901_b values (2) into +( + subpartition p_201901_b, + subpartition p_201901_c +); +select * from list_list subpartition (p_201901_a) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 +(2 rows) + +select * from list_list subpartition (p_201901_b) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 +(1 row) + +select * from list_list subpartition (p_201901_c) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- +(0 rows) + +select * from list_list partition (p_201901); + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 +(3 rows) + +select * from list_list subpartition (p_201902_a) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 1 | 1 | 1 +(1 row) + +select * from list_list subpartition (p_201902_b) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 2 | 1 | 1 + 201903 | 3 | 1 | 1 +(2 rows) + +alter table list_list split subpartition p_201902_b values (2, 3) into +( + subpartition p_201902_b, + subpartition p_201902_c +); +select * from list_list subpartition (p_201902_a) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 1 | 1 | 1 +(1 row) + +select * from list_list subpartition (p_201902_b) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 2 | 1 | 1 + 201903 | 3 | 1 | 1 +(2 rows) + +select * from list_list subpartition (p_201902_c) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- +(0 rows) + +--error +alter table list_list split subpartition p_201902_a values (3) into +( + subpartition p_201902_ab, + subpartition p_201902_ac +); +ERROR: Only the default boundary subpartition can be splited. +-- range subpartition +CREATE TABLE range_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201901_b VALUES LESS THAN( MAXVALUE ) + ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) + ( + SUBPARTITION p_201902_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201902_b VALUES LESS THAN( '6' ) + ) +); +insert into range_range values('201902', '1', '1', 1); +insert into range_range values('201902', '2', '1', 1); +insert into range_range values('201902', '3', '1', 1); +insert into range_range values('201903', '1', '1', 1); +insert into range_range values('201903', '2', '1', 1); +insert into range_range values('201903', '5', '1', 1); +select * from range_range order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 + 201902 | 3 | 1 | 1 + 201903 | 1 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 5 | 1 | 1 +(6 rows) + +select * from range_range subpartition (p_201901_a) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 +(1 row) + +select * from range_range subpartition (p_201901_b) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 + 201902 | 3 | 1 | 1 +(2 rows) + +alter table range_range split subpartition p_201901_b at (3) into +( + subpartition p_201901_c, + subpartition p_201901_d +); +select * from range_range subpartition (p_201901_a) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 +(1 row) + +select * from range_range subpartition (p_201901_b) order by 1,2,3,4; +ERROR: subpartition "p_201901_b" of relation "range_range" does not exist +select * from range_range subpartition (p_201901_c) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 +(1 row) + +select * from range_range subpartition (p_201901_d) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 3 | 1 | 1 +(1 row) + +select * from range_range subpartition (p_201902_a) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 1 | 1 | 1 +(1 row) + +select * from range_range subpartition (p_201902_b) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 2 | 1 | 1 + 201903 | 5 | 1 | 1 +(2 rows) + +alter table range_range split subpartition p_201902_b at (3) into +( + subpartition p_201902_c, + subpartition p_201902_d +); +select * from range_range subpartition (p_201902_a) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 1 | 1 | 1 +(1 row) + +select * from range_range subpartition (p_201902_b) order by 1,2,3,4; +ERROR: subpartition "p_201902_b" of relation "range_range" does not exist +select * from range_range subpartition (p_201902_c) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 2 | 1 | 1 +(1 row) + +select * from range_range subpartition (p_201902_d) order by 1,2,3,4; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 5 | 1 | 1 +(1 row) + +--test syntax +CREATE TABLE IF NOT EXISTS list_hash +( + col_1 int , + col_2 int , + col_3 int , + col_4 int +) WITH (SEGMENT=ON) +PARTITION BY list (col_1) SUBPARTITION BY hash (col_2) +( + PARTITION p_list_1 VALUES (-1,-2,-3,-4,-5,-6,-7,-8,-9,-10 ) + ( + SUBPARTITION p_hash_1_1 , + SUBPARTITION p_hash_1_2 , + SUBPARTITION p_hash_1_3 + ), + PARTITION p_list_2 VALUES (1,2,3,4,5,6,7,8,9,10 ) + ( + SUBPARTITION p_hash_2_1 , + SUBPARTITION p_hash_2_2 , + SUBPARTITION p_hash_2_3 , + SUBPARTITION p_hash_2_4 , + SUBPARTITION p_hash_2_5 + ), + PARTITION p_list_3 VALUES (11,12,13,14,15,16,17,18,19,20), + PARTITION p_list_4 VALUES (21,22,23,24,25,26,27,28,29,30 ) + ( + SUBPARTITION p_hash_4_1 + ), + PARTITION p_list_5 VALUES (default) + ( + SUBPARTITION p_hash_5_1 + ), + PARTITION p_list_6 VALUES (31,32,33,34,35,36,37,38,39,40) + ( + SUBPARTITION p_hash_6_1 , + SUBPARTITION p_hash_6_2 , + SUBPARTITION p_hash_6_3 + ) +) ENABLE ROW MOVEMENT ; +alter table list_hash split subPARTITION p_hash_2_3 at(-10) into ( subPARTITION add_p_01 , subPARTITION add_p_02 ); +ERROR: Hash subpartition does not support split. +DETAIL: N/A +CREATE TABLE range_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201901_b VALUES LESS THAN( MAXVALUE ) + ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) + ( + SUBPARTITION p_201902_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201902_b VALUES LESS THAN( '6' ) + ) +); +ERROR: relation "range_range" already exists in schema "segment_subpartition_split" +DETAIL: creating new table with existing name in the same schema +alter table range_range split subpartition p_201901_b values (3) into +( + subpartition p_201901_c, + subpartition p_201901_d +) update global index; +ERROR: split subpartition "p_201901_b" does not exist. +CREATE TABLE IF NOT EXISTS list_list_02 +( + col_1 int , + col_2 int , + col_3 int , + col_4 int +) WITH (SEGMENT=ON) +PARTITION BY list (col_1) SUBPARTITION BY list (col_2) +( + PARTITION p_list_1 VALUES(-1,-2,-3,-4,-5,-6,-7,-8,-9,-10 ) + ( + SUBPARTITION p_list_1_1 VALUES ( 0,-1,-2,-3,-4,-5,-6,-7,-8,-9 ), + SUBPARTITION p_list_1_2 VALUES ( default ) + ), + PARTITION p_list_2 VALUES(0,1,2,3,4,5,6,7,8,9) + ( + SUBPARTITION p_list_2_1 VALUES ( 0,1,2,3,4,5,6,7,8,9 ), + SUBPARTITION p_list_2_2 VALUES ( default ), + SUBPARTITION p_list_2_3 VALUES ( 10,11,12,13,14,15,16,17,18,19), + SUBPARTITION p_list_2_4 VALUES ( 20,21,22,23,24,25,26,27,28,29 ), + SUBPARTITION p_list_2_5 VALUES ( 30,31,32,33,34,35,36,37,38,39 ) + ), + PARTITION p_list_3 VALUES(10,11,12,13,14,15,16,17,18,19) + ( + SUBPARTITION p_list_3_2 VALUES ( default ) + ), + PARTITION p_list_4 VALUES(default ), + PARTITION p_list_5 VALUES(20,21,22,23,24,25,26,27,28,29) + ( + SUBPARTITION p_list_5_1 VALUES ( 0,1,2,3,4,5,6,7,8,9 ), + SUBPARTITION p_list_5_2 VALUES ( default ), + SUBPARTITION p_list_5_3 VALUES ( 10,11,12,13,14,15,16,17,18,19), + SUBPARTITION p_list_5_4 VALUES ( 20,21,22,23,24,25,26,27,28,29 ), + SUBPARTITION p_list_5_5 VALUES ( 30,31,32,33,34,35,36,37,38,39 ) + ), + PARTITION p_list_6 VALUES(30,31,32,33,34,35,36,37,38,39), + PARTITION p_list_7 VALUES(40,41,42,43,44,45,46,47,48,49) + ( + SUBPARTITION p_list_7_1 VALUES ( default ) + ) +) ENABLE ROW MOVEMENT; diff --git a/src/test/regress/expected/ss_wr/segment_subpartition_truncate.out b/src/test/regress/expected/ss_wr/segment_subpartition_truncate.out new file mode 100644 index 000000000..cf0bfc19a --- /dev/null +++ b/src/test/regress/expected/ss_wr/segment_subpartition_truncate.out @@ -0,0 +1,137 @@ +--prepare +DROP SCHEMA segment_subpartition_truncate CASCADE; +ERROR: schema "segment_subpartition_truncate" does not exist +CREATE SCHEMA segment_subpartition_truncate; +SET CURRENT_SCHEMA TO segment_subpartition_truncate; +--truncate partition/subpartition +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( default ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); +insert into list_list values('201902', '1', '1', 1); +insert into list_list values('201902', '2', '1', 1); +insert into list_list values('201902', '1', '1', 1); +insert into list_list values('201903', '2', '1', 1); +insert into list_list values('201903', '1', '1', 1); +insert into list_list values('201903', '2', '1', 1); +select * from list_list; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 + 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 +(6 rows) + +select * from list_list partition (p_201901); + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 +(3 rows) + +alter table list_list truncate partition p_201901; +select * from list_list partition (p_201901); + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- +(0 rows) + +select * from list_list partition (p_201902); + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 + 201903 | 1 | 1 | 1 +(3 rows) + +alter table list_list truncate partition p_201902; +select * from list_list partition (p_201902); + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- +(0 rows) + +select * from list_list; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- +(0 rows) + +insert into list_list values('201902', '1', '1', 1); +insert into list_list values('201902', '2', '1', 1); +insert into list_list values('201902', '1', '1', 1); +insert into list_list values('201903', '2', '1', 1); +insert into list_list values('201903', '1', '1', 1); +insert into list_list values('201903', '2', '1', 1); +select * from list_list subpartition (p_201901_a); + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 1 | 1 | 1 + 201902 | 1 | 1 | 1 +(2 rows) + +alter table list_list truncate subpartition p_201901_a; +select * from list_list subpartition (p_201901_a); + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- +(0 rows) + +select * from list_list subpartition (p_201901_b); + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201902 | 2 | 1 | 1 +(1 row) + +alter table list_list truncate subpartition p_201901_b; +select * from list_list subpartition (p_201901_b); + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- +(0 rows) + +select * from list_list subpartition (p_201902_a); + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 1 | 1 | 1 +(1 row) + +alter table list_list truncate subpartition p_201902_a; +select * from list_list subpartition (p_201902_a); + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- +(0 rows) + +select * from list_list subpartition (p_201902_b); + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- + 201903 | 2 | 1 | 1 + 201903 | 2 | 1 | 1 +(2 rows) + +alter table list_list truncate subpartition p_201902_b; +select * from list_list subpartition (p_201902_b); + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- +(0 rows) + +select * from list_list; + month_code | dept_code | user_no | sales_amt +------------+-----------+---------+----------- +(0 rows) + diff --git a/src/test/regress/expected/ss_wr/trunc_func_for_date.out b/src/test/regress/expected/ss_wr/trunc_func_for_date.out new file mode 100644 index 000000000..b9028268c --- /dev/null +++ b/src/test/regress/expected/ss_wr/trunc_func_for_date.out @@ -0,0 +1,217 @@ +--- +--- data type 1 : timestamp +--- +-- format can recognize +select trunc(timestamp '2021-08-11 20:19:39', 'cc'); -- century + trunc +-------------------------- + Mon Jan 01 00:00:00 2001 +(1 row) + +select trunc(timestamp '2021-08-11 20:19:39', 'yyyy'); -- year + trunc +-------------------------- + Fri Jan 01 00:00:00 2021 +(1 row) + +select trunc(timestamp '2021-08-11 20:19:39', 'q'); -- quarter + trunc +-------------------------- + Thu Jul 01 00:00:00 2021 +(1 row) + +select trunc(timestamp '2021-08-11 20:19:39', 'mm'); -- month + trunc +-------------------------- + Sun Aug 01 00:00:00 2021 +(1 row) + +select trunc(timestamp '2021-08-11 20:19:39', 'j'); -- day + trunc +-------------------------- + Wed Aug 11 00:00:00 2021 +(1 row) + +select trunc(timestamp '2021-08-11 20:19:39', 'dd'); -- day + trunc +-------------------------- + Wed Aug 11 00:00:00 2021 +(1 row) + +select trunc(timestamp '2021-08-11 20:19:39', 'ddd'); -- day + trunc +-------------------------- + Wed Aug 11 00:00:00 2021 +(1 row) + +select trunc(timestamp '2021-08-11 20:19:39', 'hh'); -- hour + trunc +-------------------------- + Wed Aug 11 20:00:00 2021 +(1 row) + +select trunc(timestamp '2021-08-11 20:19:39', 'mi'); -- minute + trunc +-------------------------- + Wed Aug 11 20:19:00 2021 +(1 row) + +-- format can not recognize +select trunc(timestamp '2021-08-11 20:19:39', 'qq'); -- quarter +ERROR: timestamp units "qq" not recognized +CONTEXT: referenced column: trunc +select trunc(timestamp '2021-08-11 20:19:39', 'mmm'); -- month +ERROR: timestamp units "mmm" not recognized +CONTEXT: referenced column: trunc +select trunc(timestamp '2021-08-11 20:19:39', 'dddd'); -- day +ERROR: timestamp units "dddd" not recognized +CONTEXT: referenced column: trunc +select trunc(timestamp '2021-08-11 20:19:39', 'hhh'); -- hour +ERROR: timestamp units "hhh" not recognized +CONTEXT: referenced column: trunc +--- +--- data type 2 : timestamptz +--- +-- format can recognize +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'cc'); -- century + trunc +------------------------------ + Mon Jan 01 00:00:00 2001 PST +(1 row) + +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'yyyy'); -- year + trunc +------------------------------ + Fri Jan 01 00:00:00 2021 PST +(1 row) + +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'q'); -- quarter + trunc +------------------------------ + Thu Jul 01 00:00:00 2021 PDT +(1 row) + +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'mm'); -- month + trunc +------------------------------ + Sun Aug 01 00:00:00 2021 PDT +(1 row) + +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'j'); -- day + trunc +------------------------------ + Wed Aug 11 00:00:00 2021 PDT +(1 row) + +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'dd'); -- day + trunc +------------------------------ + Wed Aug 11 00:00:00 2021 PDT +(1 row) + +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'ddd'); -- day + trunc +------------------------------ + Wed Aug 11 00:00:00 2021 PDT +(1 row) + +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'hh'); -- hour + trunc +------------------------------ + Wed Aug 11 17:00:00 2021 PDT +(1 row) + +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'mi'); -- minute + trunc +------------------------------ + Wed Aug 11 17:48:00 2021 PDT +(1 row) + +-- format can't recognize +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'qq'); -- quarter +ERROR: timestamp with time zone units "qq" not recognized +CONTEXT: referenced column: trunc +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'mmm'); -- month +ERROR: timestamp with time zone units "mmm" not recognized +CONTEXT: referenced column: trunc +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'dddd'); -- day +ERROR: timestamp with time zone units "dddd" not recognized +CONTEXT: referenced column: trunc +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'hhh'); -- hour +ERROR: timestamp with time zone units "hhh" not recognized +CONTEXT: referenced column: trunc +--- +--- data type 3 : interval +--- +-- format can recognize +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'cc'); -- century + trunc +------- + @ 0 +(1 row) + +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'yyyy'); -- year + trunc +----------- + @ 2 years +(1 row) + +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'q'); -- quarter + trunc +------------------ + @ 2 years 3 mons +(1 row) + +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'mm'); -- month + trunc +------------------ + @ 2 years 3 mons +(1 row) + +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'j'); -- day + trunc +------------------------- + @ 2 years 3 mons 4 days +(1 row) + +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'dd'); -- day + trunc +------------------------- + @ 2 years 3 mons 4 days +(1 row) + +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'ddd'); -- day + trunc +------------------------- + @ 2 years 3 mons 4 days +(1 row) + +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'hh'); -- hour + trunc +--------------------------------- + @ 2 years 3 mons 4 days 5 hours +(1 row) + +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'mi'); -- minute + trunc +---------------------------------------- + @ 2 years 3 mons 4 days 5 hours 6 mins +(1 row) + +-- format can not recognize +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'qq'); -- quarter +ERROR: interval units "qq" not recognized +CONTEXT: referenced column: trunc +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'mmm'); -- month +ERROR: interval units "mmm" not recognized +CONTEXT: referenced column: trunc +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'dddd'); -- day +ERROR: interval units "dddd" not recognized +CONTEXT: referenced column: trunc +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'hhh'); -- hour +ERROR: interval units "hhh" not recognized +CONTEXT: referenced column: trunc +-- not supported +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'w'); -- week +ERROR: interval units "w" not supported +CONTEXT: referenced column: trunc diff --git a/src/test/regress/expected/unsupported_immutable_func.out b/src/test/regress/expected/unsupported_immutable_func.out new file mode 100644 index 000000000..a3388a6c5 --- /dev/null +++ b/src/test/regress/expected/unsupported_immutable_func.out @@ -0,0 +1,16 @@ +drop procedure procedure_name_immutable; +ERROR: function procedure_name_immutable does not exist +create or replace procedure procedure_name_immutable +IMMUTABLE +NOT SHIPPABLE NOT LEAKPROOF +STRICT EXTERNAL +SECURITY INVOKER PACKAGE +COST 480 ROWS 528 +IS +begin +NULL; +rollback; +end; +/ +NOTICE: Immutable function will be shippable anyway. +ERROR: ROWS is not applicable when function does not return a set diff --git a/src/test/regress/gs_probackup.sh b/src/test/regress/gs_probackup.sh new file mode 100644 index 000000000..0edc1bb3d --- /dev/null +++ b/src/test/regress/gs_probackup.sh @@ -0,0 +1,60 @@ +#!/bin/sh +cur_path=$(cd $(dirname $0); pwd) +bin_path=${GAUSSHOME}/bin/ +SS_DATA=${HOME}/ss_data +echo "cur_path=${cur_path}" +echo "bin_path=${bin_path}" + +build_datebase() +{ + cd ${cur_path}/../ss/ + sh build_ss_database.sh +} + +gs_probackup_pre() +{ + cd ${bin_path} + if [ -d ${HOME}/gauss_bck ];then + rm -rf ${HOME}/gauss_bck + fi + ${bin_path}/gs_probackup init -B ${HOME}/gauss_bck + ${bin_path}/gs_probackup add-instance -B ${HOME}/gauss_bck -D ${SS_DATA}/dn0 --instance backup1 --enable-dss --instance-id 0 --vgname +data --socketpath=UDS:${SS_DATA}/dss_home0/.dss_unix_d_socket + ${bin_path}/gs_probackup backup -B ${HOME}/gauss_bck/ --instance backup1 -b full -d postgres -p 2000 + ${bin_path}/gs_probackup show -B ${HOME}/gauss_bck/ + ${bin_path}/gs_ctl stop -D ${SS_DATA}/dn0 + ${bin_path}/gs_ctl stop -D ${SS_DATA}/dn1 + ps ux | grep dssserver | grep -v grep | grep "${SS_DATA}"| awk '{print $2}' | xargs kill -9 + rm -rf ${SS_DATA}/dn0/* + rm -rf ${SS_DATA}/dss_disk/dss_data.dmp +} + +gs_probackup_start() +{ + truncate -s 10G ${SS_DATA}/dss_disk/dss_data.dmp + chmod 777 ${SS_DATA}/dss_disk/dss_data.dmp + ${bin_path}/dsscmd cv -g data -v ${SS_DATA}/dss_disk/dss_data.dmp -s 2048 -D ${SS_DATA}/dss_home0/ + ${bin_path}/dssserver -D ${SS_DATA}/dss_home0/ & + ${bin_path}/gs_probackup restore -B ${HOME}/gauss_bck/ --instance backup1 + ${bin_path}/gs_ctl start -D ${SS_DATA}/dn0 + pid=`ps ux | grep gaussdb |grep -v grep |awk '{print $2}'` + if [ ! -n "${pid}" ];then + echo "gs probackup failed" + exit 1 + fi + ${bin_path}/gs_ctl stop -D ${SS_DATA}/dn0 +} + +clean_env() +{ + ps ux | grep dssserver | grep -v grep | grep "${SS_DATA}"| awk '{print $2}' | xargs kill -9 >/dev/null 2>&1 + rm -rf ${HOME}/gauss_bck + echo "gs_probackup ok" +} + + +build_datebase +gs_probackup_pre +gs_probackup_start +clean_env + + diff --git a/src/test/regress/input/single_node_checkguc.source b/src/test/regress/input/single_node_checkguc.source new file mode 100644 index 000000000..9ae2bb564 --- /dev/null +++ b/src/test/regress/input/single_node_checkguc.source @@ -0,0 +1,70 @@ +-- +-- check dms guc parameter and ssl parameter +-- check dms guc + + +\! @abs_bindir@/gs_ctl stop -D @abs_srcdir@/tmp_check/datanode1 > /dev/null +\! sleep 5 + +-- check ssl guc with value +\! chmod 600 @abs_srcdir@/sslcert_ss/* +\! cp @abs_srcdir@/sslcert_ss/server.* @abs_srcdir@/tmp_check/datanode1/ +\! cp @abs_srcdir@/sslcert_ss/cacert.pem @abs_srcdir@/tmp_check/datanode1/ +\! export PGSSLMODE=verify-ca +\! export PGSSLROOTCERT=@abs_srcdir@/sslcert_ss/cacert.pem + +\! @abs_bindir@/gs_guc set -D @abs_srcdir@/tmp_check/datanode1/ -c "ssl = on" + +\! @abs_bindir@/gs_guc set -D @abs_srcdir@/tmp_check/datanode1/ -c "ssl_cert_notify_time = 90" + +\! @abs_bindir@/gs_guc set -D @abs_srcdir@/tmp_check/datanode1/ -c "ssl_cert_file = 'server.crt'" + +\! @abs_bindir@/gs_guc set -D @abs_srcdir@/tmp_check/datanode1/ -c "ssl_key_file = 'server.key'" + +\! @abs_bindir@/gs_guc set -D @abs_srcdir@/tmp_check/datanode1/ -c "ssl_ca_file = 'cacert.pem'" + + +\! @abs_bindir@/gs_ctl start -D @abs_srcdir@/tmp_check/datanode1 > /dev/null +\! sleep 10 + +-- check ssl guc without value +\! @abs_bindir@/gs_ctl stop -D @abs_srcdir@/tmp_check/datanode1 > /dev/null +\! sleep 5 + +\! @abs_bindir@/gs_guc set -D @abs_srcdir@/tmp_check/datanode1/ -c "ssl = off" + +\! @abs_bindir@/gs_guc set -D @abs_srcdir@/tmp_check/datanode1/ -c "ssl_cert_file = ''" + +\! @abs_bindir@/gs_guc set -D @abs_srcdir@/tmp_check/datanode1/ -c "ssl_key_file = ''" + +\! @abs_bindir@/gs_guc set -D @abs_srcdir@/tmp_check/datanode1/ -c "ssl_ca_file = ''" + +\! @abs_bindir@/gs_ctl stop -D @abs_srcdir@/tmp_check/datanode1 > /dev/null + +\! rm -rf @abs_srcdir@/tmp_check/datanode1/server.* +\! rm -rf @abs_srcdir@/tmp_check/datanode1/cacert.pem + +\! @abs_bindir@/gs_ctl start -D @abs_srcdir@/tmp_check/datanode1 > /dev/null +\! sleep 10 + +-- check rdma guc +\! @abs_bindir@/gs_ctl stop -D @abs_srcdir@/tmp_check/datanode1 > /dev/null +\! sleep 5 + +\! @abs_bindir@/gs_guc set -D @abs_srcdir@/tmp_check/datanode1/ -c "ss_rdma_work_config='abc123'" + +\! @abs_bindir@/gs_ctl start -D @abs_srcdir@/tmp_check/datanode1 > /dev/null +\! sleep 10 + +-- clean guc vlue +\! @abs_bindir@/gs_ctl stop -D @abs_srcdir@/tmp_check/datanode1 > /dev/null +\! sleep 5 + +\! @abs_bindir@/gs_guc set -D @abs_srcdir@/tmp_check/datanode1/ -c "ss_rdma_work_config=''" + +\! @abs_bindir@/gs_ctl start -D @abs_srcdir@/tmp_check/datanode1 > /dev/null + +\! sleep 5 + + + diff --git a/src/test/regress/make_fastcheck_ss_postgresql.conf b/src/test/regress/make_fastcheck_ss_postgresql.conf new file mode 100644 index 000000000..f182af69e --- /dev/null +++ b/src/test/regress/make_fastcheck_ss_postgresql.conf @@ -0,0 +1,41 @@ +shared_buffers = 256MB +autovacuum=false +ss_enable_dms = on +shared_buffers = 256MB +work_mem = 16MB +fsync = off +synchronous_commit = off +archive_mode = off +audit_user_violation = 1 +audit_system_object = 511 +audit_dml_state = 1 +audit_function_exec = 1 +audit_copy_exec = 1 +full_page_writes = off +wal_keep_segments = 50 +checkpoint_segments = 16 +checkpoint_timeout = 30min +enable_bbox_dump = off +bbox_dump_count = 4 +bbox_dump_path = '/tmp/invalidpath' +comm_tcp_mode = on +#comm_cn_dn_logic_conn = false +enable_absolute_tablespace = true +#enable_dynamic_workload = false +max_connections = 1000 +query_mem='256MB' +auth_iteration_count=2048 +enable_sonic_hashagg=on +enable_sonic_hashjoin=on +enable_cbm_tracking = on +enable_opfusion=on +uncontrolled_memory_context='HashCacheContext,TupleHashTable,TupleSort,AggContext,SRF multi-call context,CteScan*,FunctionScan*,RemoteQuery*,VecAgg*,HashContext,TopTransactionContext' +#enable_tsdb = on +enable_thread_pool = on +enable_default_cfunc_libpath = off +enable_stateless_pooler_reuse = on +# enable_ustore = on +sql_beta_feature = 'a_style_coerce' +enable_global_syscache = on +log_min_messages = FATAL +ss_enable_reform = off diff --git a/src/test/regress/output/recovery_2pc_tools.source b/src/test/regress/output/recovery_2pc_tools.source index ed2f46dfb..9943b0c85 100644 --- a/src/test/regress/output/recovery_2pc_tools.source +++ b/src/test/regress/output/recovery_2pc_tools.source @@ -587,6 +587,18 @@ select name,vartype,unit,min_val,max_val from pg_settings where name <> 'qunit_c sql_ignore_strategy | string | | | sql_inheritance | bool | | | sql_use_spacelimit | integer | kB | -1 | 2147483647 + ss_dss_conn_path | string | | | + ss_dss_vg_name | string | | | + ss_enable_catalog_centralized | bool | | | + ss_enable_dms | bool | | | + ss_enable_dss | bool | | | + ss_enable_log_level | bool | | | + ss_enable_reform | bool | | | + ss_enable_ssl | bool | | | + ss_instance_id | integer | | 0 | 63 + ss_interconnect_channel_count | integer | | 1 | 32 + ss_interconnect_type | string | | | + ss_interconnect_url | string | | | ssl | bool | | | ssl_ca_file | string | | | ssl_cert_file | string | | | @@ -595,6 +607,10 @@ select name,vartype,unit,min_val,max_val from pg_settings where name <> 'qunit_c ssl_crl_file | string | | | ssl_key_file | string | | | ssl_renegotiation_limit | integer | kB | 0 | 2147483647 + ss_ock_log_path | string | | | + ss_rdma_work_config | string | | | + ss_recv_msg_pool_size | integer | kB | 1024 | 1048576 + ss_work_thread_count | integer | | 16 | 128 standard_conforming_strings | bool | | | standby_shared_buffers_fraction | real | | 0.1 | 1 statement_timeout | integer | ms | 0 | 2147483647 diff --git a/src/test/regress/output/single_node_checkguc.source b/src/test/regress/output/single_node_checkguc.source new file mode 100644 index 000000000..288385677 --- /dev/null +++ b/src/test/regress/output/single_node_checkguc.source @@ -0,0 +1,120 @@ +-- +-- check dms guc parameter and ssl parameter +-- check dms guc +\! @abs_bindir@/gs_ctl stop -D @abs_srcdir@/tmp_check/datanode1 > /dev/null +\! sleep 5 +-- check ssl guc with value +\! chmod 600 @abs_srcdir@/sslcert_ss/* +\! cp @abs_srcdir@/sslcert_ss/server.* @abs_srcdir@/tmp_check/datanode1/ +\! cp @abs_srcdir@/sslcert_ss/cacert.pem @abs_srcdir@/tmp_check/datanode1/ +\! export PGSSLMODE=verify-ca +\! export PGSSLROOTCERT=@abs_srcdir@/sslcert_ss/cacert.pem +\! @abs_bindir@/gs_guc set -D @abs_srcdir@/tmp_check/datanode1/ -c "ssl = on" +The gs_guc run with the following arguments: [@abs_bindir@/gs_guc -D @abs_srcdir@/tmp_check/datanode1/ -c ssl = on set ]. +expected instance path: [@abs_srcdir@/tmp_check/datanode1/postgresql.conf] +gs_guc set: ssl=on: [@abs_srcdir@/tmp_check/datanode1/postgresql.conf] + +Total instances: 1. Failed instances: 0. +Success to perform gs_guc! + +\! @abs_bindir@/gs_guc set -D @abs_srcdir@/tmp_check/datanode1/ -c "ssl_cert_notify_time = 90" +The gs_guc run with the following arguments: [@abs_bindir@/gs_guc -D @abs_srcdir@/tmp_check/datanode1/ -c ssl_cert_notify_time = 90 set ]. +NOTICE: Alarm days before ssl cert expires. +expected instance path: [@abs_srcdir@/tmp_check/datanode1/postgresql.conf] +gs_guc set: ssl_cert_notify_time=90: [@abs_srcdir@/tmp_check/datanode1/postgresql.conf] + +Total instances: 1. Failed instances: 0. +Success to perform gs_guc! + +\! @abs_bindir@/gs_guc set -D @abs_srcdir@/tmp_check/datanode1/ -c "ssl_cert_file = 'server.crt'" +The gs_guc run with the following arguments: [@abs_bindir@/gs_guc -D @abs_srcdir@/tmp_check/datanode1/ -c ssl_cert_file = 'server.crt' set ]. +expected instance path: [@abs_srcdir@/tmp_check/datanode1/postgresql.conf] +gs_guc set: ssl_cert_file='server.crt': [@abs_srcdir@/tmp_check/datanode1/postgresql.conf] + +Total instances: 1. Failed instances: 0. +Success to perform gs_guc! + +\! @abs_bindir@/gs_guc set -D @abs_srcdir@/tmp_check/datanode1/ -c "ssl_key_file = 'server.key'" +The gs_guc run with the following arguments: [@abs_bindir@/gs_guc -D @abs_srcdir@/tmp_check/datanode1/ -c ssl_key_file = 'server.key' set ]. +expected instance path: [@abs_srcdir@/tmp_check/datanode1/postgresql.conf] +gs_guc set: ssl_key_file='server.key': [@abs_srcdir@/tmp_check/datanode1/postgresql.conf] + +Total instances: 1. Failed instances: 0. +Success to perform gs_guc! + +\! @abs_bindir@/gs_guc set -D @abs_srcdir@/tmp_check/datanode1/ -c "ssl_ca_file = 'cacert.pem'" +The gs_guc run with the following arguments: [@abs_bindir@/gs_guc -D @abs_srcdir@/tmp_check/datanode1/ -c ssl_ca_file = 'cacert.pem' set ]. +expected instance path: [@abs_srcdir@/tmp_check/datanode1/postgresql.conf] +gs_guc set: ssl_ca_file='cacert.pem': [@abs_srcdir@/tmp_check/datanode1/postgresql.conf] + +Total instances: 1. Failed instances: 0. +Success to perform gs_guc! + +\! @abs_bindir@/gs_ctl start -D @abs_srcdir@/tmp_check/datanode1 > /dev/null +\! sleep 10 +-- check ssl guc without value +\! @abs_bindir@/gs_ctl stop -D @abs_srcdir@/tmp_check/datanode1 > /dev/null +\! sleep 5 +\! @abs_bindir@/gs_guc set -D @abs_srcdir@/tmp_check/datanode1/ -c "ssl = off" +The gs_guc run with the following arguments: [@abs_bindir@/gs_guc -D @abs_srcdir@/tmp_check/datanode1/ -c ssl = off set ]. +expected instance path: [@abs_srcdir@/tmp_check/datanode1/postgresql.conf] +gs_guc set: ssl=off: [@abs_srcdir@/tmp_check/datanode1/postgresql.conf] + +Total instances: 1. Failed instances: 0. +Success to perform gs_guc! + +\! @abs_bindir@/gs_guc set -D @abs_srcdir@/tmp_check/datanode1/ -c "ssl_cert_file = ''" +The gs_guc run with the following arguments: [@abs_bindir@/gs_guc -D @abs_srcdir@/tmp_check/datanode1/ -c ssl_cert_file = '' set ]. +expected instance path: [@abs_srcdir@/tmp_check/datanode1/postgresql.conf] +gs_guc set: ssl_cert_file='': [@abs_srcdir@/tmp_check/datanode1/postgresql.conf] + +Total instances: 1. Failed instances: 0. +Success to perform gs_guc! + +\! @abs_bindir@/gs_guc set -D @abs_srcdir@/tmp_check/datanode1/ -c "ssl_key_file = ''" +The gs_guc run with the following arguments: [@abs_bindir@/gs_guc -D @abs_srcdir@/tmp_check/datanode1/ -c ssl_key_file = '' set ]. +expected instance path: [@abs_srcdir@/tmp_check/datanode1/postgresql.conf] +gs_guc set: ssl_key_file='': [@abs_srcdir@/tmp_check/datanode1/postgresql.conf] + +Total instances: 1. Failed instances: 0. +Success to perform gs_guc! + +\! @abs_bindir@/gs_guc set -D @abs_srcdir@/tmp_check/datanode1/ -c "ssl_ca_file = ''" +The gs_guc run with the following arguments: [@abs_bindir@/gs_guc -D @abs_srcdir@/tmp_check/datanode1/ -c ssl_ca_file = '' set ]. +expected instance path: [@abs_srcdir@/tmp_check/datanode1/postgresql.conf] +gs_guc set: ssl_ca_file='': [@abs_srcdir@/tmp_check/datanode1/postgresql.conf] + +Total instances: 1. Failed instances: 0. +Success to perform gs_guc! + +\! @abs_bindir@/gs_ctl stop -D @abs_srcdir@/tmp_check/datanode1 > /dev/null +\! rm -rf @abs_srcdir@/tmp_check/datanode1/server.* +\! rm -rf @abs_srcdir@/tmp_check/datanode1/cacert.pem +\! @abs_bindir@/gs_ctl start -D @abs_srcdir@/tmp_check/datanode1 > /dev/null +\! sleep 10 +-- check rdma guc +\! @abs_bindir@/gs_ctl stop -D @abs_srcdir@/tmp_check/datanode1 > /dev/null +\! sleep 5 +\! @abs_bindir@/gs_guc set -D @abs_srcdir@/tmp_check/datanode1/ -c "ss_rdma_work_config='abc123'" +The gs_guc run with the following arguments: [@abs_bindir@/gs_guc -D @abs_srcdir@/tmp_check/datanode1/ -c ss_rdma_work_config='abc123' set ]. +expected instance path: [@abs_srcdir@/tmp_check/datanode1/postgresql.conf] +gs_guc set: ss_rdma_work_config='abc123': [@abs_srcdir@/tmp_check/datanode1/postgresql.conf] + +Total instances: 1. Failed instances: 0. +Success to perform gs_guc! + +\! @abs_bindir@/gs_ctl start -D @abs_srcdir@/tmp_check/datanode1 > /dev/null +\! sleep 10 +-- clean guc vlue +\! @abs_bindir@/gs_ctl stop -D @abs_srcdir@/tmp_check/datanode1 > /dev/null +\! sleep 5 +\! @abs_bindir@/gs_guc set -D @abs_srcdir@/tmp_check/datanode1/ -c "ss_rdma_work_config=''" +The gs_guc run with the following arguments: [@abs_bindir@/gs_guc -D @abs_srcdir@/tmp_check/datanode1/ -c ss_rdma_work_config='' set ]. +expected instance path: [@abs_srcdir@/tmp_check/datanode1/postgresql.conf] +gs_guc set: ss_rdma_work_config='': [@abs_srcdir@/tmp_check/datanode1/postgresql.conf] + +Total instances: 1. Failed instances: 0. +Success to perform gs_guc! + +\! @abs_bindir@/gs_ctl start -D @abs_srcdir@/tmp_check/datanode1 > /dev/null +\! sleep 5 diff --git a/src/test/regress/parallel_scheduleSS b/src/test/regress/parallel_scheduleSS new file mode 100644 index 000000000..aac944db3 --- /dev/null +++ b/src/test/regress/parallel_scheduleSS @@ -0,0 +1,567 @@ +# ---------- +# src/test/regress/parallel_scheduleSS +# +# By convention, we put no more than twenty tests in any one parallel group; +# this limits the number of connections needed to run the tests. +# ---------- + +# so long time +#test: hw_subpartition_createtable hw_subpartition_scan hw_subpartition_select hw_subpartition_split hw_subpartition_truncate hw_subpartition_update hw_subpartition_gpi hw_subpartition_alter_table hw_subpartition_index hw_subpartition_add_drop_partition hw_subpartition_tablespace hw_subpartition_ddl_index +#test: hw_subpartition_vacuum_partition + +test: replace_func_with_two_args trunc_func_for_date + +#test: analyze_commands +#test: single_node_job +test: single_node_ddl +test: single_node_sqlbypass +test: median + + +# run tablespace by itself, and first, because it forces a checkpoint; +# we'd prefer not to have checkpoints later in the tests because that +# interferes with crash-recovery testing. +#test: single_node_tablespace + +#test: large_sequence int16 gs_dump_sequence + +# ---------- +# The first group of parallel tests +# ---------- +test: single_node_boolean single_node_char single_node_name single_node_varchar single_node_text single_node_int2 single_node_int4 single_node_int8 single_node_oid single_node_float4 single_node_float8 single_node_bit single_node_numeric single_node_uuid single_node_enum single_node_money + +# Depends on things setup during char, varchar and text +#test: single_node_strings +# Depends on int2, int4, int8, float4, float8 +test: single_node_numerology + +# ---------- +# The second group of parallel tests +# ---------- +test: single_node_lseg single_node_box single_node_path single_node_polygon single_node_circle single_node_date single_node_time single_node_timetz single_node_timestamp single_node_timestamptz +#test: single_node_interval +test: single_node_abstime single_node_reltime +#test: single_node_tinterval +test: single_node_inet single_node_macaddr single_node_tstypes single_node_comments + +# ---------- +# Another group of parallel tests +# geometry depends on point, lseg, box, path, polygon and circle +# horology depends on interval, timetz, timestamp, timestamptz, reltime and abstime +# ---------- +#test: single_node_geometry single_node_horology +#test: single_node_regex +test: single_node_regex_temp +test: single_node_oidjoins single_node_type_sanity + +# ---------- +# These four each depend on the previous one +# ---------- +#test: single_node_insert xc_rownum +test: single_node_temple +test: single_node_create_function_1 +#test: single_node_create_type +#test: single_node_create_table +#test: single_node_create_function_2 + +# ---------- +# Load huge amounts of data +# We should split the data files into single files and then +# execute two copy tests parallel, to check that copy itself +# is concurrent safe. +# ---------- +#test: single_node_copy single_node_copyselect + +# ---------- +# More groups of parallel tests +# ---------- +#test: single_node_create_misc +#test: single_node_create_operator +# These depend on the above two +#test: single_node_create_index +#test: single_node_create_view + +# ---------- +# Another group of parallel tests +# ---------- +test: single_node_create_aggregate +#test: single_node_create_function_3 single_node_create_cast +#test: single_node_constraints single_node_triggers single_node_inherit single_node_create_table_like single_node_typed_table +#test: single_node_vacuum +#test: single_node_drop_if_exists + +# ---------- +# sanity_check does a vacuum, affecting the sort order of SELECT * +# results. So it should not run parallel to other tests. +# ---------- +#test: single_node_sanity_check + +# ---------- +# Believe it or not, select creates a table, subsequent +# tests need. +# ---------- +test: single_node_errors +#test: single_node_select +ignore: single_node_random + +# ---------- +# Another group of parallel tests +# ---------- +#test: single_node_select_into single_node_select_distinct +#test: single_node_select_distinct_on single_node_select_implicit single_node_select_having +test: single_node_select_implicit single_node_select_having +#test: single_node_subselect +#test: single_node_union +#test: single_node_case single_node_join single_node_aggregates +#test: single_node_transactions +test: single_node_random +#test: single_node_portals +#test: single_node_arrays +#test: single_node_btree_index single_node_hash_index single_node_update +test: single_node_update +#test single_node_namespace +#test: single_node_prepared_xacts +#test: single_node_delete + +# ---------- +# Another group of parallel tests +# ---------- +#test: single_node_privileges +#test: single_node_security_label single_node_collate + +#test: single_node_misc +# rules cannot run concurrently with any test that creates a view +#test: single_node_rules + +# ---------- +# Another group of parallel tests +# ---------- +#test: single_node_select_views +#test: single_node_portals_p2 +#test: single_node_foreign_key +#test: single_node_foreign_key single_node_cluster single_node_dependency +#test: single_node_guc +test: single_node_bitmapops +#test: single_node_tsearch +#test: single_node_tsdicts +#test: single_node_foreign_data +#single_node_window +#test: single_node_xmlmap +#test: single_node_functional_deps single_node_advisory_lock single_node_json single_node_equivclass + +# ---------- +# Another group of parallel tests +# NB: temp.sql does a reconnect which transiently uses 2 connections, +# so keep this parallel group to at most 19 tests +# ---------- +#test: single_node_sequence +#test: single_node_plancache single_node_limit single_node_plpgsql single_node_copy2 single_node_temp single_node_domain single_node_rangefuncs single_node_prepare single_node_without_oid single_node_conversion single_node_truncate single_node_alter_table single_node_sequence single_node_polymorphism +#test: single_node_rowtypes +#test: single_node_returning single_node_largeobject single_node_with single_node_xml + +# run stats by itself because its delay may be insufficient under heavy load +#test: single_node_stats + +# run and check forbidden functions are still forbidden to use in single node +test: single_node_forbidden +#test: single_node_mergeinto merge_subquery merge_subquery3 + +# Trigger tests +#test: single_node_triggers +#test: single_node_xc_trigship + +# Synonym tests +#test: single_node_synonym + +# unsupported view tests +test: single_node_unsupported_view +#test: hw_cstore + +# ---------- +# single_node_commit/rollback tests +# ---------- +# interval partition +#test: hw_partition_interval_index +#test: hw_partition_interval_select +test: hw_partition_interval_compatibility +#openGauss synchronization test cases +#test: partiton_pathkey_col_plan partiton_pathkey_col_randomexec partiton_pathkey_row_plan partiton_pathkey_row_randomexec +#test the locales setting expect not affacted each other +#test: pg_session_locale +# ---------- +# These four each depend on the previous one(duplicate) +# duplicated create_function_1 create_type create_table copy +# ---------- +#test: type_sanity +#test: create_function_1 +#test: create_table +#test: temp__4 +#test: copy# + +# ---------- +# More groups of parallel tests +# duplicated create_misc +# ---------- +#test: hw_hashagg_start +#test: create_misc +#test: create_view1 create_view2 create_view3 create_view4 create_view5 +#test: int8# + +#dupliacated select int8 +#test: select +#test: misc +#test: stats +#test: alter_system_set + +# test for hll +#test: hll_hash hll_func hll_para hll_mpp hll_cstore hll_misc + +# test for function pg_get_tabledef +test: function_get_table_def + +#dispatch from 13 +#test: function +#test: aggregates_part1 aggregates_part2 aggregates_part3 count_distinct_part1 count_distinct_part2 count_distinct_part4 +#test: count_distinct_part3# + +test: hw_dfx_thread_status + +test: stable_function_shippable +# ---------- +# data partition +# ---------- +#test: physical_slot + +#test: hw_smp + +# test MERGE INTO + +# test INSERT UPDATE UPSERT +#test: insert_update_002 insert_update_003 insert_update_008 insert_update_009 insert_update_010 +#test: insert_update_001# +#test: delete update namespace case select_having select_implicit +#test: hw_test_operate_user +#test: hw_createtbl_llt +#test: gsqlerr# +#test: sqlLLT +#test: hw_sql_llt# + +test: upsert_where + +#test: upsert_prepare +#test: upsert_001 upsert_002 upsert_003 upsert_008 upsert_009 upsert_010 +#test: upsert_grammer_test_01 upsert_unlog_test upsert_tmp_test +#test: upsert_grammer_test_02 upsert_restriction upsert_composite +#test: upsert_trigger_test upsert_explain upsert_where upsert_where_sublink +#test: upsert_clean + +# all pass +# run tablespace by itself, and first, because it forces a checkpoint; +# we'd prefer not to have checkpoints later in the tests because that +# interferes with crash-recovery testing. +#test: hw_alter_session +#test: tablespace +#test: hw_account_lock + +# ---------- +# Another group of parallel tests +# ---------- +#test: hw_independent_user hw_user_basic hw_user_revoke hw_user_privilege hw_user_pguser hw_user_namespace +#test: hw_interval_format hw_function_p_3 hw_function_p_4 hw_current_schema hw_functions +#test: hw_function_p_1 hw_function_p_2# +#test: hw_dba_enable_partition hw_tablespace +#test: hw_procedure_define +#test: hw_anonymous_block +#test: hw_procedure# +#test: hw_grant_all hw_func_return_out +#hw_dynamic_sql +test: hw_package_function + +#show plan +#test: plan_hint + +###split from parallel_schedule4### + +# ---------- +# Another group of parallel tests +# ---------- +# plsql_packages tests + +test: hw_empty_str_to_null +#test: hw_schema + +#test: tpchrush +#test: tpch01 tpch03 tpch04 libcomm_check_status tpch03_querymem +#test: tpch05 tpch06 tpch07 tpch08 +#test: tpch09 tpch10 tpch11 tpch12 +#test: tpch13 tpch14 tpch15 tpch16 +#test: tpch18 tpch19 tpch20 tpch18_querymem +#test: tpch21 tpch22 tpch11_pretty_performance +#test: tpch02 tpch17 + +#test export +#test: temp__2 + +#test: vec_prepare_001 vec_prepare_002 +#test: vec_prepare_003 + +#test sort optimize +#test: sort_optimize_row sort_optimize_column sort_optimize_001 +#test early free +#test: early_free +#test for col tpch with vector engine disabled +#test: tpch_disablevec01 tpch_disablevec03 tpch_disablevec04 +#test: tpch_disablevec05 tpch_disablevec06 tpch_disablevec07 +#test: tpch_disablevec08 tpch_disablevec09 tpch_disablevec12 +#test: tpch_disablevec13 tpch_disablevec14 tpch_disablevec16 +#test: tpch_disablevec18 tpch_disablevec19 tpch_disablevec21 + +# test subpartition with segment=on +test: segment_subpartition_createtable_ss segment_subpartition_scan segment_subpartition_split segment_subpartition_truncate segment_subpartition_update segment_subpartition_gpi segment_subpartition_analyze_vacuum segment_subpartition_add_drop_partition segment_subpartition_ddl_index +#test: segment_subpartition_vacuum_partition +# segment_subpartition_alter_table + +# ---------- +# Postgres-XC additional tests +# ---------- + +# This was used by triggers +test: xc_create_function +# Now xc_misc is used by xc_returning_step1 and xc_returning_step2 +#test: xc_misc +# Those ones can be run in parallel +test: xc_groupby xc_distkey xc_having +#test: hw_rewrite_lazyagg hw_light +#test: xc_temp xc_FQS +#test: xc_remote hw_pbe +test: xc_FQS_join xc_copy +#test: xc_alter_table +test: xc_constraints xc_limit xc_sort +#test: xc_params xc_returning_step1 +test: xc_params +test: xc_returning_step2 + +#test row compress +#test: compress compress01 compress02 cmpr_toast_000 cmpr_toast_update cmpr_index_00 cmpr_6bytes cmpr_int cmpr_datetime cmpr_numstr cmpr_numstr01 cmpr_float cmpr_nulls_delta cmpr_nulls_prefix cmpr_copyto cmpr_mode_none00 cmpr_mode_none01 cmpr_references_00 cmpr_references_01 +#test: cmpr_rollback cmpr_drop_column cmpr_drop_column_01 cmpr_drop_column_02 cmpr_drop_column_03 cmpr_dead_loop_00 cmpr_timewithzone cmpr_cluster_00 + +# Cluster setting related test is independant + + +test: xc_dml + +# Postgres-XC : Removed this test from the parallel group of tests since it used to give inconsistent plan outputs. +#test: inherit +# ---------- +# Another group of parallel tests +# ---------- +#test: create_function_3 vacuum +#test: constraints drop_if_exists + +#test: errors subplan_base +#test: subplan_new +#test: select +#test: col_subplan_new +#test: col_subplan_base_1 +#test: join +#test: select_into subselect_part2 gs_aggregate +#test: select_distinct subselect_part1 transactions btree_index select_distinct_on arrays hash_index +#test: transactions_control random union +#test: aggregates +#test: holdable_cursor +#test: portals_p2 window tsearch temp__6 col_subplan_base_2 + +#test: alter_table_000 alter_table_002 alter_table_003 +#test: alter_table_001 + +#test: with + +# ---------- +# Database security +# ---------- +#test: hw_pwd_reuse +#test: hw_auditadmin + +#test: performance_enhance +#test: explain_fqs +#test: explain_pbe +# temp__3 create_table copy vec_prepare_001 vec_prepare_002 vec_prepare_003 int4 int8 are duplicated +test: temp__3 +# ---------- +# Another group of parallel tests +# NB: temp.sql does a reconnect which transiently uses 2 connections, +# so keep this parallel group to at most 19 tests +# ---------- +#test: plpgsql +test: plpgsql_multiset +test: plpgsql_array_opengauss +test: plpgsql_table_opengauss +test: arrayinterface_single +#arrayinterface_ted +#test: plancache limit rangefuncs prepare +# arrayinterface_ted +test: plpgsql_assign_value_to_array_attribute +test: plpgsql_cursor_rowtype +test: plpgsql_array_of_record +test: plpgsql_assign_list +#test: plpgsql_package_type +test: plpgsql_record_attrname +test: largeobject +#test: hw_explain_pretty1 hw_explain_pretty2 hw_explain_pretty3 +test: goto +#test: equivalence_class +#test: tsdb_delta2_compress +#test: tsdb_xor_compress +#test: tsdb_aggregate + +test: readline +#test: hw_to_timestamp hw_view_privilege + +#test: hw_identifier +#test: hw_hashint1 hw_smalldatetime_hash hw_rawtype_hash +#test: hw_nvarchar2_hash cmpr_smallint cmpr_prefix_150left cmpr_uint32_oid +test: oidjoins opr_sanity_2 regex +#test: opr_sanity_1 + +test: pmk +# Cluster setting related test is independant +# ---------- +# Test of changed data type compatible with Oracle + +#test: hw_datatype_2 hw_datatype_3 +#test: hw_datatype +#test: test_regex llt_atc + +# ---------- +# test for set operations +# ---------- +test: select_nest_views +#test: enum +#show plan +#test: col_joinplan col_joinnew +#test: col_limit col_distinct col_prepare +#test: col_function_1 col_function_2 col_count_distinct_1 col_count_distinct_2 col_count_distinct_3 col_count_distinct_4 +#test: directory_test +#test: analyse_verify +#test: create_compositetype +#test: hw_pct_type_and_rowtype +#test: create_basetype +#test: tabletype +#test with recursive +test: recursive_ref_recursive +#test: recursive_prepare +#test: recursive_cte +#test: recursive_cte_col +#test: nohashjoin_recursive_cte +#test: nohashjoin_recursive_cte_col +#test: others +#test: icbc_customer +#test: recursive_unshippable +#test: recursive_finalize +#test: recursive_cte_1 +test: test_relpages + +test: temp__3 +#test: vec_window_pre +#test: gin_test_2 +#test: window1 +#test: vec_window_001 +#test: vec_window_002 +#test: vec_numeric_sop_1 vec_numeric_sop_2 vec_numeric_sop_3 vec_numeric_sop_4 vec_numeric_sop_5 +#test: vec_window_end + +#test: vec_unique_pre vec_bitmap_prepare +#test: vec_unique vec_setop_001 vec_setop_002 vec_setop_003 vec_setop_004 hw_vec_int4 hw_vec_int8 hw_vec_float4 hw_vec_float8 +#test: vec_setop_005 +#test: hw_vec_constrainst vec_numeric vec_numeric_1 vec_numeric_2 vec_bitmap_1 vec_bitmap_2 +#test: disable_vector_engine +#test: hybrid_row_column +#test: retry +#test: hw_replication_slots +test: insert +#test: copy2 temp +test: truncate +#test: temp_table + +#test: b_compatibility +#test: hw_compatibility +#test: hw_groupingsets hw_row_grouping_set +#test: char_truncation_common char_truncation_cast + +#this case is dispatched from schedule10(gin_test) +#test: gin_test1 gin_test2 gin_test3 + +#the fallowing part is dispatched from schedule15 + +# FIXME: move me back to the parallel test when the refcnt issue is fixed +# Below two teste are unstable, temporarily ignoring. This is same to distribute_dattistic, relallvisible, Dongwang will solve the problem. + +#test: hw_expression_alias + + +#========================================================================================================================================== + + +# ---------- +# src/test/regress/parallel_schedule.33 +# +# By convention, we put no more than twenty tests in any one parallel group; +# this limits the number of connections needed to run the tests. +# ---------- + + +test: udf_crem + +#test: create_c_function + +#---1. Drop-Column test +#test: cstore_drop_column_replicated +#test: cstore_drop_column + +#split from parallel_schedule2 + +# ---------- +# Advisory lock need to be tested in series in Postgres-XC +# --------- +test: advisory_lock + +# ---------- +# Another group of parallel tests +# ---------- +#test: cluster dependency bitmapops tsdicts functional_deps +#test: json_and_jsonb json jsonb jsonb2 +#test: guc + +# test for vec sonic hash +#test: vec_sonic_hashjoin_number_prepare +#test: vec_sonic_hashjoin_number_nospill + +#test: dml +#test: hashfilter hashfilter_1 +test: reduce_orderby +#test: backtrace_log +#test: bulkload_start +test: bulkload_parallel_test_2 bulkload_parallel_test_3 +#test: bulkload_parallel_test_1 bulkload_parallel_test_4 + +#test: tpchcol05 tpchcol07 tpchcol08 tpchcol09 + +#test: tpchcol01 +#test: tpchcol06 +#test: tpchcol03 tpchcol04 +#test: tpchcol12 tpchcol13 tpchcol14 tpchcol16 tpchcol18 tpchcol19 tpchcol21 + +#test: vec_partition vec_partition_1 vec_material_001 + +#test: llvm_vecsort llvm_vecsort2 + +#test: udf_crem create_c_function + +#test: hw_package +#test: publication +#test: subscription + +test: ss_metacmd +test: show_database_info +test: unsupported_immutable_func ss_unsupported_hasuids_table +test: single_node_checkguc diff --git a/src/test/regress/parallel_schedule_ss_read b/src/test/regress/parallel_schedule_ss_read new file mode 100644 index 000000000..01ae99916 --- /dev/null +++ b/src/test/regress/parallel_schedule_ss_read @@ -0,0 +1,8 @@ +test: replace_func_with_two_args trunc_func_for_date + +test: segment_subpartition_scan +test: segment_subpartition_split +test: segment_subpartition_truncate +test: segment_subpartition_gpi +test: segment_subpartition_analyze_vacuum +test: segment_subpartition_select \ No newline at end of file diff --git a/src/test/regress/pg_regress.cpp b/src/test/regress/pg_regress.cpp index b83f58478..e1f181d58 100644 --- a/src/test/regress/pg_regress.cpp +++ b/src/test/regress/pg_regress.cpp @@ -444,6 +444,10 @@ static bool change_password = false; /* Only init database, for inplace upgrade test use */ static bool init_database = false; +/* for shared storage test use */ +static bool enable_ss = false; +static bool ss_standby_read = false; + /* Do inplace upgrade before run regression tests */ static bool inplace_upgrade = false; static bool parallel_initdb = false; @@ -1222,6 +1226,14 @@ static void stop_postmaster(void) #endif postmaster_running = false; + if (enable_ss) { + printf("stop DSS now!\n"); + int ret = system("ps ux | grep dssserver | grep -v grep | awk '{print $2}' | xargs kill -9"); + if (ret != 0) { + fprintf(stderr, _("\n could not stop dss: exit code was %d\n"), ret); + exit(2); /* not exit(), that would be recursive */ + } + } } } @@ -1421,6 +1433,38 @@ static void start_single_node() free(data_folder); } +/* Start single datanode for test */ +static void start_ss_node(int i) +{ + char buf[MAXPGPATH * 4]; + int port_number = myinfo.dn_port[i]; + char* data_folder = get_node_info_name(i, DATANODE, false); + + (void)snprintf(buf, + sizeof(buf), + SYSTEMQUOTE "\"%s/gaussdb\" -p %d -D \"%s/%s\" -c log_statement=all -c logging_collector=true -c " + "\"listen_addresses=%s\" & > \"%s/log/postmaster_%s.log\" 2>&1" SYSTEMQUOTE, + bindir, + port_number, + temp_install, + data_folder, + hostname ? hostname : "*", + outputdir, + data_folder); + header(_("\nstart cmd is : %s\n"), buf); + PID_TYPE datanode_pid = spawn_process(buf); + if (datanode_pid == INVALID_PID) { + fprintf(stderr, _("\n%s: could not spawn postmaster: %s\n"), progname, strerror(errno)); + exit_nicely(2); + } + + printf("start %dth node: node_pid %d.\n", i, datanode_pid); + + myinfo.dn_pid[i] = datanode_pid; + + free(data_folder); +} + /* * Start given node */ @@ -1474,7 +1518,7 @@ static void start_my_node(int i, int type, bool is_main, bool standby, int upgra data_folder, upgrade_from == 0 ? "0" : grayscale_upgrade == -1 ? "1" : "2"); #endif - + FILE* fstream = NULL; char buf[50]; memset(buf, 0, sizeof(buf)); @@ -1892,12 +1936,23 @@ static void initdb_node_info(bool standby) for (i = 0; i < myinfo.dn_num; i++) { char buf[MAXPGPATH * 4]; + char ss_extra_args[MAXPGPATH] = {0}; + + (void)snprintf(ss_extra_args, + sizeof(ss_extra_args), + SYSTEMQUOTE "--vgname=\"+data,+log%i\" --enable-dss --dms_url=\"%s\" -I %d " + "--socketpath=\"UDS:%s/dss_home%d/.dss_unix_d_socket\"" SYSTEMQUOTE, + i, + ss_standby_read ? "0:127.0.0.1:1611,1:127.0.0.1:1711" : "0:127.0.0.1:1611", + i, + temp_install, + i); char* data_folder = get_node_info_name(i, DATANODE, false); char* data_folder2 = get_node_info_name(i, DATANODE, true); (void)snprintf(buf, sizeof(buf), - SYSTEMQUOTE "\"%s/gs_initdb\" --nodename %s %s -w \"gauss@123\" -D \"%s/%s\" -L \"%s\" --noclean%s%s > " + SYSTEMQUOTE "\"%s/gs_initdb\" --nodename %s %s -w \"gauss@123\" -D \"%s/%s\" -L \"%s\" %s --noclean%s%s > " "\"%s/log/initdb.log\" 2>&1" SYSTEMQUOTE, bindir, data_folder2, @@ -1905,6 +1960,7 @@ static void initdb_node_info(bool standby) temp_install, data_folder, datadir, + enable_ss ? ss_extra_args : "", debug ? " --debug" : "", nolocale ? " --no-locale" : "", outputdir); @@ -2303,7 +2359,7 @@ static void psql_command(const char* database, const char* query, ...) /* And now we can build and execute the shell command */ (void)snprintf(psql_cmd, sizeof(psql_cmd), - SYSTEMQUOTE "\"%s%sgsql\" -X %s -p %d -c \"%s\" \"%s\"" SYSTEMQUOTE, + SYSTEMQUOTE "\"%s%sgsql\" -X %s -p %d -c \"%s\" -d \"%s\"" SYSTEMQUOTE, psqldir ? psqldir : "", psqldir ? "/" : "", super_user_altered ? "" : (passwd_altered ? "-U upcheck -W Gauss@123" : "-U upcheck -W gauss@123"), @@ -4867,7 +4923,11 @@ static void run_schedule(const char* schedule, test_function tfunc, diag_functio REGR_START_TIMER_TEMP(0); - pids[0] = (tfunc)(tests[0], &resultfiles[0], &expectfiles[0], &tags[0], use_jdbc_client); + if (!ss_standby_read) { + pids[0] = (tfunc)(tests[0], &resultfiles[0], &expectfiles[0], &tags[0], use_jdbc_client); + } else { + pids[0] = (tfunc)(tests[0], &resultfiles[0], &expectfiles[0], &tags[0], false); + } } wait_for_tests(pids, statuses, NULL, 1); @@ -4891,7 +4951,11 @@ static void run_schedule(const char* schedule, test_function tfunc, diag_functio REGR_START_TIMER; /* Invoke the single test file */ - pids[i] = (tfunc)(tests[i], &resultfiles[i], &expectfiles[i], &tags[i], use_jdbc_client); + if (!ss_standby_read) { + pids[i] = (tfunc)(tests[i], &resultfiles[i], &expectfiles[i], &tags[i], use_jdbc_client); + } else { + pids[i] = (tfunc)(tests[i], &resultfiles[i], &expectfiles[i], &tags[i], false); + } i++; } @@ -4942,7 +5006,11 @@ static void run_schedule(const char* schedule, test_function tfunc, diag_functio REGR_START_TIMER_TEMP(i); - pids[i] = (tfunc)(tests[i], &resultfiles[i], &expectfiles[i], &tags[i], use_jdbc_client); + if (!ss_standby_read) { + pids[i] = (tfunc)(tests[i], &resultfiles[i], &expectfiles[i], &tags[i], use_jdbc_client); + } else { + pids[i] = (tfunc)(tests[i], &resultfiles[i], &expectfiles[i], &tags[i], false); + } i++; } @@ -4987,6 +5055,151 @@ static void run_schedule(const char* schedule, test_function tfunc, diag_functio g_uiCurLineBufIdx = 0; } + if (ss_standby_read) { + if (num_tests == 1) { + if (bscript) { + bscript = false; + + status(_("script %-22s .... "), tests[0]); + REGR_START_TIMER_TEMP(0); + + pids[0] = scriptExecute(tests[0], &resultfiles[0], &expectfiles[0], &tags[0]); + } else { + if (isSystemTableDDL) { + status(_("system_table_ddl_test %-24s .... "), tests[0]); + } else if (isPlanAndProto) { + status(_("plan_proto_test %-24s .... "), tests[0]); + } else if (use_jdbc_client) { + status(_("jdbc test %-24s .... "), tests[0]); + } else { + status(_("test %-24s .... "), tests[0]); + } + makeNestedDirectory(tests[0]); + + REGR_START_TIMER_TEMP(0); + + pids[0] = (tfunc)(tests[0], &resultfiles[0], &expectfiles[0], &tags[0], true); + } + + wait_for_tests(pids, statuses, NULL, 1); + REGR_STOP_TIMER_TEMP(0); + REGR_PRINT_ONEGROUP_ELAPSED_TIME; + /* status line is finished below */ + } else if (max_connections > 0 && max_connections < num_tests) { + int oldest = 0; + + i = 0; + + do { + while (i < num_tests) { + if (i - oldest >= max_connections) { + wait_for_tests(pids + oldest, statuses + oldest, tests + oldest, i - oldest); + oldest = i; + } + + makeNestedDirectory(tests[i]); + + REGR_START_TIMER; + + /* Invoke the single test file */ + pids[i] = (tfunc)(tests[i], &resultfiles[i], &expectfiles[i], &tags[i], true); + i++; + } + + if (false == bBuffReloadReq) + break; + + /* Resetting the flag */ + bBuffReloadReq = false; + + iRet = regrReloadAndParseLineBuffer(&bBuffReloadReq, + &bHalfReadTest, + &pcLastSpace, + scf, + &num_tests, + &iMaxParallelTests, + &tests, + &resultfiles, + &expectfiles, + &tags, + &pids, + &statuses); + if (iRet == REGR_EOF_REACHED) { + break; + } + + if (iRet != REGR_SUCCESS) { + goto LB_ERR_HNDL_LVL_4; + } + } while (true); + if (isSystemTableDDL) { + status(_("parallel group (%d system_table_ddl_tests, in groups of %d): "), num_tests, max_connections); + } else if (isPlanAndProto) { + status(_("parallel group (%d plan_proto_tests, in groups of %d): "), num_tests, max_connections); + } else { + status(_("parallel group (%d tests, in groups of %d): "), num_tests, max_connections); + } + + wait_for_tests(pids + oldest, statuses + oldest, tests + oldest, i - oldest); + status_end(); + + g_uiCurLineBufIdx = 0; + } else { + i = 0; + + do { + while (i < num_tests) { + makeNestedDirectory(tests[i]); + + REGR_START_TIMER_TEMP(i); + + pids[i] = (tfunc)(tests[i], &resultfiles[i], &expectfiles[i], &tags[i], true); + + i++; + } + + if (false == bBuffReloadReq) + break; + + /* Resetting the flag */ + bBuffReloadReq = false; + + iRet = regrReloadAndParseLineBuffer(&bBuffReloadReq, + &bHalfReadTest, + &pcLastSpace, + scf, + &num_tests, + &iMaxParallelTests, + &tests, + &resultfiles, + &expectfiles, + &tags, + &pids, + &statuses); + if (iRet == REGR_EOF_REACHED) { + break; + } + + if (iRet != REGR_SUCCESS) { + goto LB_ERR_HNDL_LVL_4; + } + } while (true); + if (isSystemTableDDL) { + status(_("parallel group (%d system_table_ddl_tests): "), num_tests); + } else if (isPlanAndProto) { + status(_("parallel group (%d plan_proto_tests): "), num_tests); + } else { + status(_("parallel group (%d tests): "), num_tests); + } + + wait_for_tests(pids, statuses, tests, num_tests); + REGR_PRINT_ONEGROUP_ELAPSED_TIME; + status_end(); + + g_uiCurLineBufIdx = 0; + } + num_tests *= 2; + } /* Check results for all tests */ for (i = 0; i < num_tests; i++) { @@ -5248,7 +5461,7 @@ static void check_global_variables() } } -#define BASE_PGXC_LIKE_MACRO_NUM 1394 +#define BASE_PGXC_LIKE_MACRO_NUM 1393 static void check_pgxc_like_macros() { #ifdef BUILD_BY_CMAKE @@ -5330,6 +5543,18 @@ static void open_result_files(void) if (!directory_exists(file)) { make_directory(file); } + if (ss_standby_read) { + rc = snprintf_s(file, sizeof(file), sizeof(file) - 1, "%s/results/ss_wr", outputdir); + securec_check_ss_c(rc, "", ""); + if (!directory_exists(file)) { + make_directory(file); + } + rc = snprintf_s(file, sizeof(file), sizeof(file) - 1, "%s/results/ss_r", outputdir); + securec_check_ss_c(rc, "", ""); + if (!directory_exists(file)) { + make_directory(file); + } + } } /* create jdbc_user & grant all database to it */ @@ -5454,6 +5679,8 @@ static void help(void) printf(_(" --use-existing use an existing installation\n")); printf(_(" --launcher=CMD use CMD as launcher of gsql\n")); printf(_(" --skip_environment_cleanup do not clean generated sql scripts\n")); + printf(_(" --enable_ss test shared storage mode\n")); + printf(_(" --ss_standby_read test standby read in shared storage mode\n")); printf(_("\n")); printf(_("Options for \"temp-install\" mode:\n")); printf(_(" --no-locale use C locale\n")); @@ -6207,11 +6434,22 @@ static void start_postmaster(void) start_my_node(i, DATANODE, false, standby_defined, upgrade_from); } } else { - assert(0 == myinfo.co_num && 1 == myinfo.dn_num); - start_single_node(); + if (!enable_ss) { + assert(0 == myinfo.co_num && 1 == myinfo.dn_num); + start_single_node(); + } else { + start_ss_node(0); + if (ss_standby_read) { + start_ss_node(1); + } + } } - pg_usleep(10000000L); + if (!enable_ss) { + pg_usleep(10000000L); + } else { + pg_usleep(100000000L); + } /* * Wait till postmaster is able to accept connections (normally only a @@ -6377,6 +6615,8 @@ int regression_main(int argc, char* argv[], init_function ifunc, test_function t {"skip_environment_cleanup", no_argument, NULL, 61}, {"ecpg", no_argument, NULL, 62}, {"dbcmpt", required_argument, NULL, 63}, + {"enable_ss", no_argument, NULL, 64}, + {"ss_standby_read", no_argument, NULL, 65}, {NULL, 0, NULL, 0} }; @@ -6665,6 +6905,13 @@ int regression_main(int argc, char* argv[], init_function ifunc, test_function t case 63: g_db_compatibility = strdup(optarg); break; + case 64: + enable_ss = true; + break; + case 65: + ss_standby_read = true; + enable_ss = true; + break; default: /* getopt_long already emitted a complaint */ fprintf(stderr, _("\nTry \"%s -h\" for more information.\n"), progname); @@ -6799,15 +7046,15 @@ int regression_main(int argc, char* argv[], init_function ifunc, test_function t if (myinfo.keep_data == false) { if (only_install == false) { #ifndef ENABLE_LLT - if (directory_exists(temp_install)) { - header(_("removing existing temp installation")); - (void)rmtree(temp_install, true); + if (!enable_ss) { + if (directory_exists(temp_install)) { + header(_("removing existing temp installation")); + (void)rmtree(temp_install, true); + } + header(_("creating temporary installation")); + /* make the temp install top directory */ + make_directory(temp_install); } - - header(_("creating temporary installation")); - - /* make the temp install top directory */ - make_directory(temp_install); #endif /* and a directory for log files */ @@ -7241,12 +7488,17 @@ int regression_main(int argc, char* argv[], init_function ifunc, test_function t if (!g_bEnableDiagCollection) dfunc = NULL; + + test_function tmpfunc = tfunc; + if (ss_standby_read) { + tmpfunc = psql_ss_start_test; + } for (ssl = schedulelist; ssl != NULL; ssl = ssl->next) { - run_schedule(ssl->str, tfunc, dfunc); + run_schedule(ssl->str, tmpfunc, dfunc); } for (ssl = extra_tests; ssl != NULL; ssl = ssl->next) { - run_single_test(ssl->str, tfunc, dfunc); + run_single_test(ssl->str, tmpfunc, dfunc); } (void)gettimeofday(&end_time, NULL); diff --git a/src/test/regress/pg_regress.h b/src/test/regress/pg_regress.h index fe6337fc1..921a03c68 100644 --- a/src/test/regress/pg_regress.h +++ b/src/test/regress/pg_regress.h @@ -131,3 +131,7 @@ PID_TYPE spawn_process(const char* cmdline); void exit_nicely(int code); void replace_string(char* string, char* replace, char* replacement); bool file_exists(const char* file); + +PID_TYPE psql_ss_start_test( + const char* testname, _stringlist** resultfiles, _stringlist** expectfiles, _stringlist** tags, + bool is_stanby); diff --git a/src/test/regress/pg_regress_main.cpp b/src/test/regress/pg_regress_main.cpp index 0cfbf0284..47949fbfc 100644 --- a/src/test/regress/pg_regress_main.cpp +++ b/src/test/regress/pg_regress_main.cpp @@ -86,7 +86,7 @@ static void gen_sql_cmd(char * const psql_cmd, int psql_size, const char* testna outfile); securec_check_ss_c(rc, "\0", "\0"); } - } else{ + } else { (void)snprintf_s(psql_cmd + offset, (SQL_CMD_LEN - offset), psql_size - offset, @@ -183,6 +183,99 @@ static PID_TYPE psql_start_test( return pid; } +/* generate gsql/jdbc command for specific flag */ +static void gen_ss_sql_cmd(char * const psql_cmd, int psql_size, const char* testname, const char* infile, + const char* outfile, bool is_stanby) +{ + int port = is_stanby ? myinfo.dn_port[1] : myinfo.dn_port[0]; + (void)snprintf_s(psql_cmd, + (SQL_CMD_LEN), + psql_size, + SYSTEMQUOTE "\"%s%sgsql\" -X -p %d -a %s %s -q -d \"%s\" -C " + "< \"%s\" > \"%s\" 2>&1" SYSTEMQUOTE, + psqldir ? psqldir : "", + psqldir ? "/" : "", + port, + (char*)g_stRegrConfItems.acFieldSepForAllText, + (char*)g_stRegrConfItems.acTuplesOnly, + dblist->str, + infile, + outfile); +} + +/* + * start a psql test process for specified file (including redirection), + * and return process ID + */ +PID_TYPE psql_ss_start_test( + const char* testname, _stringlist** resultfiles, _stringlist** expectfiles, _stringlist** tags, + bool is_stanby) + { + PID_TYPE pid; + char infile[MAXPGPATH]; + char outfile[MAXPGPATH]; + char expectfile[MAXPGPATH]; + char psql_cmd[SQL_CMD_LEN]; + /* + * Look for files in the output dir first, consistent with a vpath search. + * This is mainly to create more reasonable error messages if the file is + * not found. It also allows local test overrides when running pg_regress + * outside of the source tree. + */ + + if (!is_stanby) { + snprintf(infile, sizeof(infile), "%s/sql/ss_wr/%s.sql", outputdir, testname); + if (!file_exists(infile)) { + snprintf(infile, sizeof(infile), "%s/sql/ss_wr/%s.sql", inputdir, testname); + } + } else { + snprintf(infile, sizeof(infile), "%s/sql/ss_r/%s.sql", outputdir, testname); + if (!file_exists(infile)) { + snprintf(infile, sizeof(infile), "%s/sql/ss_r/%s.sql", inputdir, testname); + } + } + + + /* If the .sql file does not exist, then record the error in diff summary + * file and cont */ + if (!file_exists(infile)) { + FILE* fp = fopen(difffilename, "a"); + + if (fp) { + (void)fprintf(fp, "\n[%s]: No such file or directory!!\n", infile); + fclose(fp); + } else + fprintf(stderr, _("\n COULD NOT OPEN [%s]!!!!\n"), difffilename); + } + if (!is_stanby) { + (void)snprintf(outfile, sizeof(outfile), "%s/results/ss_wr/%s.out", outputdir, testname); + + snprintf(expectfile, sizeof(expectfile), "%s/expected/ss_wr/%s.out", outputdir, testname); + if (!file_exists(expectfile)) + snprintf(expectfile, sizeof(expectfile), "%s/expected/ss_wr/%s.out", inputdir, testname); + } else { + (void)snprintf(outfile, sizeof(outfile), "%s/results/ss_r/%s.out", outputdir, testname); + + snprintf(expectfile, sizeof(expectfile), "%s/expected/ss_r/%s.out", outputdir, testname); + if (!file_exists(expectfile)) + snprintf(expectfile, sizeof(expectfile), "%s/expected/ss_r/%s.out", inputdir, testname); + } + + add_stringlist_item(resultfiles, outfile); + add_stringlist_item(expectfiles, expectfile); + + gen_ss_sql_cmd(psql_cmd, sizeof(psql_cmd), testname, infile, outfile, is_stanby); + + pid = spawn_process(psql_cmd); + + if (pid == INVALID_PID) { + fprintf(stderr, _("could not start process for test %s\n"), testname); + exit(2); + } + + return pid; +} + static void psql_init(void) { /* set default regression database name */ diff --git a/src/test/regress/single_check.sh b/src/test/regress/single_check.sh index ac36438a1..d78282842 100755 --- a/src/test/regress/single_check.sh +++ b/src/test/regress/single_check.sh @@ -116,6 +116,31 @@ function real_regresscheck_single_audit() $pg_regress_check --dlpath=$DL_PATH $EXTRA_REGRESS_OPTS $3 -b $TEMP_INSTALL --abs_gausshome=\'$PREFIX_HOME\' --single_node --schedule=$SCHEDULE -w --keep_last_data=$keep_last_data --temp-config=$TEMP_CONFIG $MAXCONNOPT --regconf=$REGCONF } +function real_regresscheck_single_ss() +{ + set_hotpatch_env + set_common_env $1 $2 + + if [ -d $TEMP_INSTALL ];then + rm -rf $TEMP_INSTALL + fi + sh ${TOP_DIR}/src/test/ss/conf_start_dss_inst.sh 1 $TEMP_INSTALL ${HOME}/ss_fastcheck_disk + echo "regresscheck_ss_single: $pg_regress_check --dlpath=$DL_PATH $EXTRA_REGRESS_OPTS $3 -b $TEMP_INSTALL --abs_gausshome=\'$PREFIX_HOME\' --single_node --schedule=$SCHEDULE -w --keep_last_data=$keep_last_data --temp-config=$TEMP_CONFIG $MAXCONNOPT --regconf=$REGCONF --enable_ss" + $pg_regress_check --dlpath=$DL_PATH $EXTRA_REGRESS_OPTS $3 -b $TEMP_INSTALL --abs_gausshome=\'$PREFIX_HOME\' --single_node --schedule=$SCHEDULE -w --keep_last_data=$keep_last_data --temp-config=$TEMP_CONFIG $MAXCONNOPT --regconf=$REGCONF --enable_ss --enable-segment +} + +function real_regresscheck_ss() +{ + set_hotpatch_env + set_common_env $1 $2 + + if [ -d $TEMP_INSTALL ];then + rm -rf $TEMP_INSTALL + fi + sh ${TOP_DIR}/src/test/ss/conf_start_dss_inst.sh 2 $TEMP_INSTALL ${HOME}/ss_fastcheck_disk + echo "regresscheck_ss: $pg_regress_check --dlpath=$DL_PATH $EXTRA_REGRESS_OPTS $3 -b $TEMP_INSTALL --abs_gausshome=\'$PREFIX_HOME\' --single_node --schedule=$SCHEDULE -w --keep_last_data=$keep_last_data --temp-config=$TEMP_CONFIG $MAXCONNOPT --regconf=$REGCONF --ss_standby_read" + $pg_regress_check --dlpath=$DL_PATH $EXTRA_REGRESS_OPTS $3 -b $TEMP_INSTALL --abs_gausshome=\'$PREFIX_HOME\' --single_node --schedule=$SCHEDULE -w --keep_last_data=$keep_last_data --temp-config=$TEMP_CONFIG $MAXCONNOPT --regconf=$REGCONF --ss_standby_read --enable-segment +} function real_regresscheck_single_mot() { @@ -229,11 +254,20 @@ function real_hacheck() sh ./run_ha_multi_single_mot.sh 1 ${part} ;; hacheck_single_paxos) sh ./run_paxos_single.sh ;; + hacheck_ss_all) + sh ./run_ha_single_ss.sh ;; *) echo "module $module is not valid" ;; esac } +function check_gs_probackup() +{ + REGRESS_PATH=$ROOT_CODE_PATH/${OPENGS}/src/test/regress + cd ${REGRESS_PATH}/ + sh ${REGRESS_PATH}/gs_probackup.sh; +} + #These only used for *check cmd. DO_CMD=$1 TRUNK_CODE_PATH=${2} @@ -389,6 +423,15 @@ case $DO_CMD in --fastcheck_lite|fastcheck_lite) args_val="-d 1 -c 0 -p $p -r 1 " real_regresscheck_single parallel_schedule.lite$part make_fastcheck_postgresql.conf "${args_val}" ;; + --fastcheck_single_ss|fastcheck_single_ss) + args_val="-d 1 -c 0 -p $p -r 1 " + real_regresscheck_single_ss parallel_scheduleSS make_fastcheck_ss_postgresql.conf "${args_val}" ;; + --fastcheck_ss|fastcheck_ss) + args_val="-d 2 -c 0 -p $p -r 1 " + real_regresscheck_ss parallel_schedule_ss_read$part make_fastcheck_ss_postgresql.conf "${args_val}" ;; + --fastcheck_gs_probackup|fastcheck_gs_probackup) + args_val=$(echo $DO_CMD | sed 's\--\\g') + check_gs_probackup;; --upgradecheck_single|upgradecheck_single) args_val="-d 1 -c 0 -p $p -r 1 " @@ -406,7 +449,7 @@ case $DO_CMD in --wlmcheck_single|wlmcheck_single) args_val="-d 6 -c 3 -p $p -r ${runtest}" real_wmlcheck parallel_schedule${part}.wlm make_wlmcheck_postgresql.conf "${args_val}" ;; - --hacheck_single_all|hacheck_single_all|--hacheck_single|hacheck_single|--hacheck_multi_single|hacheck_multi_single|--hacheck_multi_single_mot|hacheck_multi_single_mot|--hacheck_decode|hacheck_decode|--hacheck_single_paxos|hacheck_single_paxos) + --hacheck_single_all|hacheck_single_all|--hacheck_single|hacheck_single|--hacheck_multi_single|hacheck_multi_single|--hacheck_multi_single_mot|hacheck_multi_single_mot|--hacheck_decode|hacheck_decode|--hacheck_single_paxos|hacheck_single_paxos|--hacheck_ss_all|hacheck_ss_all) args_val=$(echo $DO_CMD | sed 's\--\\g') real_hacheck "${args_val}";; --fastcheck_ledger_single|fastcheck_ledger_single) diff --git a/src/test/regress/sql/segment_subpartition_createtable_ss.sql b/src/test/regress/sql/segment_subpartition_createtable_ss.sql new file mode 100644 index 000000000..8960d4e06 --- /dev/null +++ b/src/test/regress/sql/segment_subpartition_createtable_ss.sql @@ -0,0 +1,1383 @@ + +--1.create table +--list_list list_hash list_range range_list range_hash range_range + +--prepare +DROP SCHEMA segment_subpartition_createtable CASCADE; +CREATE SCHEMA segment_subpartition_createtable; +SET CURRENT_SCHEMA TO segment_subpartition_createtable; + +--1.1 normal table +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( '2' ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); +insert into list_list values('201902', '1', '1', 1); +insert into list_list values('201902', '2', '1', 1); +insert into list_list values('201902', '1', '1', 1); +insert into list_list values('201903', '2', '1', 1); +insert into list_list values('201903', '1', '1', 1); +insert into list_list values('201903', '2', '1', 1); +select * from list_list; +drop table list_list; + +CREATE TABLE list_hash +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY HASH (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a, + SUBPARTITION p_201901_b + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a, + SUBPARTITION p_201902_b + ) +); +insert into list_hash values('201902', '1', '1', 1); +insert into list_hash values('201902', '2', '1', 1); +insert into list_hash values('201902', '3', '1', 1); +insert into list_hash values('201903', '4', '1', 1); +insert into list_hash values('201903', '5', '1', 1); +insert into list_hash values('201903', '6', '1', 1); +select * from list_hash; +drop table list_hash; + +CREATE TABLE list_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a values less than ('4'), + SUBPARTITION p_201901_b values less than ('6') + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a values less than ('3'), + SUBPARTITION p_201902_b values less than ('6') + ) +); +insert into list_range values('201902', '1', '1', 1); +insert into list_range values('201902', '2', '1', 1); +insert into list_range values('201902', '3', '1', 1); +insert into list_range values('201903', '4', '1', 1); +insert into list_range values('201903', '5', '1', 1); +insert into list_range values('201903', '6', '1', 1); + +select * from list_range; +drop table list_range; + +CREATE TABLE range_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a values ('1'), + SUBPARTITION p_201901_b values ('2') + ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) + ( + SUBPARTITION p_201902_a values ('1'), + SUBPARTITION p_201902_b values ('2') + ) +); +insert into range_list values('201902', '1', '1', 1); +insert into range_list values('201902', '2', '1', 1); +insert into range_list values('201902', '1', '1', 1); +insert into range_list values('201903', '2', '1', 1); +insert into range_list values('201903', '1', '1', 1); +insert into range_list values('201903', '2', '1', 1); + +select * from range_list; +drop table range_list; + +CREATE TABLE range_hash +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY HASH (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a, + SUBPARTITION p_201901_b + ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) + ( + SUBPARTITION p_201902_a, + SUBPARTITION p_201902_b + ) +); +insert into range_hash values('201902', '1', '1', 1); +insert into range_hash values('201902', '2', '1', 1); +insert into range_hash values('201902', '1', '1', 1); +insert into range_hash values('201903', '2', '1', 1); +insert into range_hash values('201903', '1', '1', 1); +insert into range_hash values('201903', '2', '1', 1); + +select * from range_hash; +drop table range_hash; + +CREATE TABLE range_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201901_b VALUES LESS THAN( '3' ) + ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) + ( + SUBPARTITION p_201902_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201902_b VALUES LESS THAN( '3' ) + ) +); +insert into range_range values('201902', '1', '1', 1); +insert into range_range values('201902', '2', '1', 1); +insert into range_range values('201902', '1', '1', 1); +insert into range_range values('201903', '2', '1', 1); +insert into range_range values('201903', '1', '1', 1); +insert into range_range values('201903', '2', '1', 1); + +select * from range_range; +drop table range_range; + +CREATE TABLE hash_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY hash (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( '2' ) + ), + PARTITION p_201902 + ( + SUBPARTITION p_201902_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); +insert into hash_list values('201901', '1', '1', 1); +insert into hash_list values('201901', '2', '1', 1); +insert into hash_list values('201901', '1', '1', 1); +insert into hash_list values('201903', '2', '1', 1); +insert into hash_list values('201903', '1', '1', 1); +insert into hash_list values('201903', '2', '1', 1); + +select * from hash_list; +drop table hash_list; + +CREATE TABLE hash_hash +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY hash (month_code) SUBPARTITION BY hash (dept_code) +( + PARTITION p_201901 + ( + SUBPARTITION p_201901_a, + SUBPARTITION p_201901_b + ), + PARTITION p_201902 + ( + SUBPARTITION p_201902_a, + SUBPARTITION p_201902_b + ) +); +insert into hash_hash values('201901', '1', '1', 1); +insert into hash_hash values('201901', '2', '1', 1); +insert into hash_hash values('201901', '1', '1', 1); +insert into hash_hash values('201903', '2', '1', 1); +insert into hash_hash values('201903', '1', '1', 1); +insert into hash_hash values('201903', '2', '1', 1); + +select * from hash_hash; +drop table hash_hash; + +CREATE TABLE hash_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY hash (month_code) SUBPARTITION BY range (dept_code) +( + PARTITION p_201901 + ( + SUBPARTITION p_201901_a VALUES LESS THAN ( '2' ), + SUBPARTITION p_201901_b VALUES LESS THAN ( '3' ) + ), + PARTITION p_201902 + ( + SUBPARTITION p_201902_a VALUES LESS THAN ( '2' ), + SUBPARTITION p_201902_b VALUES LESS THAN ( '3' ) + ) +); +insert into hash_range values('201901', '1', '1', 1); +insert into hash_range values('201901', '2', '1', 1); +insert into hash_range values('201901', '1', '1', 1); +insert into hash_range values('201903', '2', '1', 1); +insert into hash_range values('201903', '1', '1', 1); +insert into hash_range values('201903', '2', '1', 1); + +select * from hash_range; +drop table hash_range; + + +--1.2 table with default subpartition +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); +drop table list_list; + +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( '2' ) + ), + PARTITION p_201902 VALUES ( '201903' ) +); +drop table list_list; + +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ), + PARTITION p_201902 VALUES ( '201903' ) +); +drop table list_list; + + +CREATE TABLE list_hash +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY HASH (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a, + SUBPARTITION p_201902_b + ) +); +drop table list_hash; + +CREATE TABLE list_hash +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY HASH (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a, + SUBPARTITION p_201901_b + ), + PARTITION p_201902 VALUES ( '201903' ) +); +drop table list_hash; + +CREATE TABLE list_hash +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY HASH (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ), + PARTITION p_201902 VALUES ( '201903' ) +); +drop table list_hash; + +CREATE TABLE list_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a values less than ('3'), + SUBPARTITION p_201902_b values less than ('6') + ) +); +drop table list_range; + +CREATE TABLE list_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a values less than ('4'), + SUBPARTITION p_201901_b values less than ('6') + ), + PARTITION p_201902 VALUES ( '201903' ) +); +drop table list_range; + +CREATE TABLE list_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ), + PARTITION p_201902 VALUES ( '201903' ) +); +drop table list_range; + +CREATE TABLE range_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) + ( + SUBPARTITION p_201902_a values ('1'), + SUBPARTITION p_201902_b values ('2') + ) +); +drop table range_list; + +CREATE TABLE range_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a values ('1'), + SUBPARTITION p_201901_b values ('2') + ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) +); +drop table range_list; + +CREATE TABLE range_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) +); +drop table range_list; + +CREATE TABLE range_hash +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY HASH (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) + ( + SUBPARTITION p_201902_a, + SUBPARTITION p_201902_b + ) +); +drop table range_hash; + +CREATE TABLE range_hash +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY HASH (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a, + SUBPARTITION p_201901_b + ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) +); +drop table range_hash; + +CREATE TABLE range_hash +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY HASH (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) +); +drop table range_hash; + +CREATE TABLE range_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) + ( + SUBPARTITION p_201902_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201902_b VALUES LESS THAN( '3' ) + ) +); +drop table range_range; + +CREATE TABLE range_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201901_b VALUES LESS THAN( '3' ) + ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) +); +drop table range_range; + +CREATE TABLE range_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) +); +drop table range_range; + +CREATE TABLE hash_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY HASH (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901, + PARTITION p_201902 + ( + SUBPARTITION p_201902_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201902_b VALUES LESS THAN( '3' ) + ) +); +drop table hash_range; + +CREATE TABLE hash_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY HASH (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 + ( + SUBPARTITION p_201901_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201901_b VALUES LESS THAN( '3' ) + ), + PARTITION p_201902 +); +drop table hash_range; + +CREATE TABLE hash_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY HASH (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901, + PARTITION p_201902 +); +drop table hash_range; + +CREATE TABLE hash_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY HASH (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901, + PARTITION p_201902 + ( + SUBPARTITION p_201902_a VALUES( '2' ), + SUBPARTITION p_201902_b VALUES( '3' ) + ) +); +drop table hash_range; + +CREATE TABLE hash_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY HASH (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 + ( + SUBPARTITION p_201901_a VALUES( '2' ), + SUBPARTITION p_201901_b VALUES( '3' ) + ), + PARTITION p_201902 +); +drop table hash_range; + +CREATE TABLE hash_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY HASH (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901, + PARTITION p_201902 +); +drop table hash_range; + +CREATE TABLE hash_hash +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY HASH (month_code) SUBPARTITION BY HASH (dept_code) +( + PARTITION p_201901, + PARTITION p_201902 + ( + SUBPARTITION p_201902_a, + SUBPARTITION p_201902_b + ) +); +drop table hash_hash; + +CREATE TABLE hash_hash +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY HASH (month_code) SUBPARTITION BY HASH (dept_code) +( + PARTITION p_201901 + ( + SUBPARTITION p_201901_a, + SUBPARTITION p_201901_b + ), + PARTITION p_201902 +); +drop table hash_hash; + +CREATE TABLE hash_hash +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY HASH (month_code) SUBPARTITION BY HASH (dept_code) +( + PARTITION p_201901, + PARTITION p_201902 +); +drop table hash_hash; + + +--1.3 subpartition name check +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_a VALUES ( '2' ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); + +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( '2' ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); + +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901 VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( '2' ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); + +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201901_subpartdefault1 VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); +drop table list_list; + + + +--1.4 subpartition key check +-- 一级分区和二级分区分区键是同一列 + +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (month_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( '2' ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); + +--二级分区的键值一样 + +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( '1' ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); + +--分区列不存在 +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_codeXXX) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( '2' ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); + +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_codeXXX) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( '2' ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); + + +CREATE TABLE list_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a values less than ('4'), + SUBPARTITION p_201901_b values less than ('4') + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a values less than ('3'), + SUBPARTITION p_201902_b values less than ('6') + ) +); +drop table list_range; + + +--1.5 list subpartition whith default + +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( '2' ) + ), + PARTITION p_201902 VALUES ( default ) + ( + SUBPARTITION p_201902_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); +insert into list_list values('201902', '1', '1', 1); +insert into list_list values('201902', '2', '1', 1); +insert into list_list values('201902', '1', '1', 1); +insert into list_list values('201903', '2', '1', 1); +insert into list_list values('201903', '1', '1', 1); +insert into list_list values('201903', '2', '1', 1); +select * from list_list partition (p_201901); +select * from list_list partition (p_201902); +drop table list_list; + +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( default ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); +drop table list_list; + +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_b VALUES ( default ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); +drop table list_list; + +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( default ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( default ) + ) +); +drop table list_list; + +--1.6 declaration and definition of the subpatiiton type are same. +--error +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY hash (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( default ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( default ) + ) +); + +--1.7 add constraint +CREATE TABLE range_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201901_b VALUES LESS THAN( MAXVALUE ) + ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) + ( + SUBPARTITION p_201902_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201902_b VALUES LESS THAN( '6' ) + ) +); + +alter table range_range add constraint constraint_check CHECK (sales_amt IS NOT NULL); +insert into range_range values(1,1,1); +drop table range_range; + +-- drop partition column +CREATE TABLE range_hash_02 +( + col_1 int , + col_2 int, + col_3 VARCHAR2 ( 30 ) , + col_4 int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (col_1) SUBPARTITION BY hash (col_2) +( + PARTITION p_range_1 VALUES LESS THAN( -10 ) + ( + SUBPARTITION p_hash_1_1 , + SUBPARTITION p_hash_1_2 , + SUBPARTITION p_hash_1_3 + ), + PARTITION p_range_2 VALUES LESS THAN( 20 ), + PARTITION p_range_3 VALUES LESS THAN( 30) + ( + SUBPARTITION p_hash_3_1 , + SUBPARTITION p_hash_3_2 , + SUBPARTITION p_hash_3_3 + ), + PARTITION p_range_4 VALUES LESS THAN( 50) + ( + SUBPARTITION p_hash_4_1 , + SUBPARTITION p_hash_4_2 , + SUBPARTITION range_hash_02 + ), + PARTITION p_range_5 VALUES LESS THAN( MAXVALUE ) +) ENABLE ROW MOVEMENT; + +alter table range_hash_02 drop column col_1; + +alter table range_hash_02 drop column col_2; + +drop table range_hash_02; +--1.8 SET ROW MOVEMENT +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1', '2' ), + SUBPARTITION p_201901_b VALUES ( default ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( '1', '2' ), + SUBPARTITION p_201902_b VALUES ( default ) + ) +); +alter table list_list disable ROW MOVEMENT; +insert into list_list values('201902', '1', '1', 1); +update list_list set month_code = '201903'; +update list_list set dept_code = '3'; +alter table list_list enable ROW MOVEMENT; +update list_list set month_code = '201903'; +update list_list set dept_code = '3'; +drop table list_list; + +--1.9 without subpartition declaration +create table test(a int) WITH (SEGMENT=ON) +partition by range(a) +( +partition p1 values less than(100) +( +subpartition subp1 values less than(50), +subpartition subp2 values less than(100) +), +partition p2 values less than(200), +partition p3 values less than(maxvalue) +); + +--1.10 create table like +CREATE TABLE range_range +( + col_1 int primary key, + col_2 int NOT NULL , + col_3 VARCHAR2 ( 30 ) NOT NULL , + col_4 int generated always as(2*col_2) stored , + check (col_4 >= col_2) +) WITH (SEGMENT=ON) +PARTITION BY RANGE (col_1) SUBPARTITION BY RANGE (col_2) +( + PARTITION p_range_1 VALUES LESS THAN( 10 ) + ( + SUBPARTITION p_range_1_1 VALUES LESS THAN( 5 ), + SUBPARTITION p_range_1_2 VALUES LESS THAN( MAXVALUE ) + ), + PARTITION p_range_2 VALUES LESS THAN( 20 ) + ( + SUBPARTITION p_range_2_1 VALUES LESS THAN( 5 ), + SUBPARTITION p_range_2_2 VALUES LESS THAN( 10 ) + ) +) ENABLE ROW MOVEMENT; + +CREATE TABLE range_range_02 (like range_range INCLUDING ALL ); +drop table range_range; + +--ROW LEVEL SECURITY POLICY +CREATE TABLE range_range +( + month_code VARCHAR2 ( 30 ) , + dept_code VARCHAR2 ( 30 ) , + user_no VARCHAR2 ( 30 ) , + sales_amt int, + primary key(month_code, dept_code) +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201901_b VALUES LESS THAN( MAXVALUE ) + ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) + ( + SUBPARTITION p_201902_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201902_b VALUES LESS THAN( '3' ) + ) +); +CREATE ROW LEVEL SECURITY POLICY range_range_rls ON range_range USING(user_no = CURRENT_USER); + +drop table range_range; + +CREATE SCHEMA ledgernsp WITH BLOCKCHAIN; +CREATE SCHEMA ledgernsp; +ALTER SCHEMA ledgernsp WITH BLOCKCHAIN; +DROP SCHEMA ledgernsp; + +-- create table as +CREATE TABLE range_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201901_b VALUES LESS THAN( MAXVALUE ) + ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) + ( + SUBPARTITION p_201902_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201902_b VALUES LESS THAN( '3' ) + ) +) ENABLE ROW MOVEMENT; + +insert into range_range values(201902,1,1,1),(201902,1,1,1),(201902,3,1,1),(201903,1,1,1),(201903,2,1,1),(201903,2,1,1); + +select * from range_range subpartition(p_201901_a) where month_code in(201902,201903) order by 1,2,3,4; + +create table range_range_copy WITH (SEGMENT=ON) as select * from range_range subpartition(p_201901_a) where month_code in(201902,201903); + +select * from range_range_copy order by 1,2,3,4; + +drop table range_range; +drop table range_range_copy; + +--1.11 create index +create table range_range_03 +( + c_int int, + c_char1 char(3000), + c_char2 char(5000), + c_char3 char(6000), + c_varchar1 varchar(3000), + c_varchar2 varchar(5000), + c_varchar3 varchar, + c_varchar4 varchar, + c_text1 text, + c_text2 text, + c_text3 text, + c int, + primary key(c,c_int) +) with (parallel_workers=10, SEGMENT=ON) +partition by range (c_int) subpartition by range (c_char1) +( + partition p1 values less than(50) + ( + subpartition p1_1 values less than('c'), + subpartition p1_2 values less than(maxvalue) + ), + partition p2 values less than(100) + ( + subpartition p2_1 values less than('c'), + subpartition p2_2 values less than(maxvalue) + ), + partition p3 values less than(150) + ( + subpartition p3_1 values less than('c'), + subpartition p3_2 values less than(maxvalue) + ), + partition p4 values less than(200) + ( + subpartition p4_1 values less than('c'), + subpartition p4_2 values less than(maxvalue) + ), + partition p5 values less than(maxvalue)( + subpartition p5_1 values less than('c'), + subpartition p5_2 values less than(maxvalue) + ) +) enable row movement; + +create index range_range_03_idx1 on range_range_03 (c_varchar1) local; --success + +create index range_range_03_idx2 on range_range_03 (c_varchar2) local ( + partition cpt7_p1, + partition cpt7_p2, + partition cpt7_p3, + partition cpt7_p4, + partition cpt7_p5 +); --failed + +create index range_range_03_idx3 on range_range_03 (c_varchar3); --success, default global + +create index range_range_03_idx4 on range_range_03 (c_varchar4) global; --success + +create index range_range_03_idx5 on range_range_03 (c_varchar4) local; --failed, can not be same column with global index + +\d+ range_range_03 + +select pg_get_tabledef('range_range_03'); + +drop table range_range_03; + +--unique local index columns must contain the partition key +CREATE TABLE range_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201901_b VALUES LESS THAN( MAXVALUE ) + ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) + ( + SUBPARTITION p_201902_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201902_b VALUES LESS THAN( '3' ) + ) +) ENABLE ROW MOVEMENT; +create unique index idx on range_range(month_code) local; +create unique index idx1 on range_range(month_code, user_no) local; +drop table range_range; + +-- partkey has timestampwithzone type +drop table hash_range; +CREATE TABLE hash_range +( + col_1 int PRIMARY KEY USING INDEX, + col_2 int NOT NULL , + col_3 int NOT NULL , + col_4 int, + col_19 TIMESTAMP WITH TIME ZONE +) WITH (SEGMENT=ON) +PARTITION BY HASH (col_2) SUBPARTITION BY RANGE (col_19) +( partition p_hash_1 + ( + SUBPARTITION p_range_1_1 VALUES LESS THAN( 5 ), + SUBPARTITION p_range_1_2 VALUES LESS THAN( MAXVALUE ) + ), + partition p_hash_2, + PARTITION p_hash_3, + PARTITION p_hash_4, + PARTITION p_hash_5, + PARTITION p_hash_7 +) ENABLE ROW MOVEMENT; + +CREATE TABLE hash_range +( + col_1 int PRIMARY KEY USING INDEX, + col_2 int NOT NULL , + col_3 int NOT NULL , + col_4 int, + col_19 TIMESTAMP WITH TIME ZONE +) WITH (SEGMENT=ON) +PARTITION BY HASH (col_19) SUBPARTITION BY RANGE (col_2) +( partition p_hash_1 + ( + SUBPARTITION p_range_1_1 VALUES LESS THAN( 5 ), + SUBPARTITION p_range_1_2 VALUES LESS THAN( MAXVALUE ) + ), + partition p_hash_2, + PARTITION p_hash_3, + PARTITION p_hash_4, + PARTITION p_hash_5, + PARTITION p_hash_7 +) ENABLE ROW MOVEMENT; +drop table hash_range; +--clean +DROP SCHEMA segment_subpartition_createtable CASCADE; +RESET CURRENT_SCHEMA; diff --git a/src/test/regress/sql/show_database_info.sql b/src/test/regress/sql/show_database_info.sql new file mode 100644 index 000000000..32315eb2d --- /dev/null +++ b/src/test/regress/sql/show_database_info.sql @@ -0,0 +1,5 @@ +\l+ + +select * from pg_ls_tmpdir(); + +select * from pg_ls_waldir(); \ No newline at end of file diff --git a/src/test/regress/sql/ss_metacmd.sql b/src/test/regress/sql/ss_metacmd.sql new file mode 100644 index 000000000..ce06a7efc --- /dev/null +++ b/src/test/regress/sql/ss_metacmd.sql @@ -0,0 +1,48 @@ +DROP VIEW IF EXISTS SS_VIEW1 CASCADE; +DROP TABLE IF EXISTS SS_METACMD_TAB1 CASCADE; +CREATE TABLE SS_METACMD_TAB1 (ID INT NOT NULL PRIMARY KEY, NAME VARCHAR(128)) WITH(SEGMENT = ON); + +CREATE VIEW SS_VIEW1 AS + SELECT * from SS_METACMD_TAB1; + +INSERT INTO SS_METACMD_TAB1 VALUES (1, 'PAIR'); +COMMIT; + +\dS+ SS_METACMD_TAB1 +\dS+ SS_VIEW1 +select pg_catalog.pg_table_size('SS_VIEW1'); +select pg_catalog.pg_table_size('SS_METACMD_TAB1'); + +select gs_xlogdump_xid('171794'); +select gs_xlogdump_lsn('0/70230830', '0/7023AB80'); +select gs_xlogdump_tablepath('+data/base/15114/4600', 0, 'heap'); +select gs_xlogdump_parsepage_tablepath('+data/base/15114/4600', 0, 'heap', false); + +drop TABLE if exists ss_range_range_ddl_001; +CREATE TABLE ss_range_range_ddl_001 +( + col_1 int primary key USING INDEX TABLESPACE startend_tbs4, -- expected error + col_2 bigint NOT NULL , + col_3 VARCHAR2 ( 30 ) NOT NULL , + col_4 int generated always as(2*col_2) stored , + col_5 bigint, + col_6 bool, + col_7 text, + col_8 decimal, + col_9 numeric(12,6), + col_10 date, + check (col_4 >= col_2) +) +with(FILLFACTOR=80,segment=on) +PARTITION BY range (col_1) SUBPARTITION BY range (col_2) +( + PARTITION p_range_1 values less than (-10 ) + ( + SUBPARTITION p_range_1_1 values less than ( 0), + SUBPARTITION p_range_1_2 values less than ( MAXVALUE ) + ) +) ENABLE ROW MOVEMENT; + +drop TABLE if exists ss_range_range_ddl_001; +DROP VIEW IF EXISTS SS_VIEW1 CASCADE; +DROP TABLE IF EXISTS SS_METACMD_TAB1 CASCADE; \ No newline at end of file diff --git a/src/test/regress/sql/ss_r/replace_func_with_two_args.sql b/src/test/regress/sql/ss_r/replace_func_with_two_args.sql new file mode 100644 index 000000000..c05dee105 --- /dev/null +++ b/src/test/regress/sql/ss_r/replace_func_with_two_args.sql @@ -0,0 +1,27 @@ +-- +-- replace function with two arguments +-- + +select replace('string', ''); +select replace('string', 'i'); +select replace('string', 'in'); +select replace('string', 'ing'); + +select replace('', 'ing'); +select replace(NULL, 'ing'); +select replace('ing', ''); +select replace('ing', NULL); +select replace('', ''); +select replace(NULL, NULL); + +select replace(123, '1'); +select replace('123', 1); +select replace(123, 1); + +select replace('abc\nabc', '\n'); +select replace('abc\nabc', E'\n'); +select replace(E'abc\nabc', E'\n'); + +select replace('~!@#$%^&*()', '!@'); + +select replace('高斯', '高'); \ No newline at end of file diff --git a/src/test/regress/sql/ss_r/segment_subpartition_analyze_vacuum.sql b/src/test/regress/sql/ss_r/segment_subpartition_analyze_vacuum.sql new file mode 100644 index 000000000..7a858a89b --- /dev/null +++ b/src/test/regress/sql/ss_r/segment_subpartition_analyze_vacuum.sql @@ -0,0 +1,2 @@ +SET CURRENT_SCHEMA TO segment_subpartition_analyze_vacuum; +select * from range_list order by 1, 2, 3, 4; \ No newline at end of file diff --git a/src/test/regress/sql/ss_r/segment_subpartition_gpi.sql b/src/test/regress/sql/ss_r/segment_subpartition_gpi.sql new file mode 100644 index 000000000..0ce92ea46 --- /dev/null +++ b/src/test/regress/sql/ss_r/segment_subpartition_gpi.sql @@ -0,0 +1,2 @@ +SET CURRENT_SCHEMA TO segment_subpartition_gpi; +select * from range_list where user_no = '1' order by 1, 2, 3, 4; \ No newline at end of file diff --git a/src/test/regress/sql/ss_r/segment_subpartition_scan.sql b/src/test/regress/sql/ss_r/segment_subpartition_scan.sql new file mode 100644 index 000000000..b6b87d6e8 --- /dev/null +++ b/src/test/regress/sql/ss_r/segment_subpartition_scan.sql @@ -0,0 +1,7 @@ +SET CURRENT_SCHEMA TO segment_subpartition_scan; + +select * from range_list order by 1, 2, 3, 4; + +select * from range_list where month_code = '201902' order by 1, 2, 3, 4; +select * from range_list where dept_code = '1' order by 1, 2, 3, 4; +select * from range_list where user_no = '1' order by 1, 2, 3, 4; diff --git a/src/test/regress/sql/ss_r/segment_subpartition_select.sql b/src/test/regress/sql/ss_r/segment_subpartition_select.sql new file mode 100644 index 000000000..342e20ea6 --- /dev/null +++ b/src/test/regress/sql/ss_r/segment_subpartition_select.sql @@ -0,0 +1,124 @@ +SET CURRENT_SCHEMA TO segment_subpartition_select; + +select * from range_list order by 1, 2, 3, 4; + +select * from range_list where user_no is not null order by 1, 2, 3, 4; +select * from range_list where user_no is not null and dept_code = user_no order by 1, 2, 3, 4; +select * from range_list where user_no is not null and dept_code in ('2') order by 1, 2, 3, 4; +select * from range_list where user_no is not null and dept_code <> '2' order by 1, 2, 3, 4; +select * from range_list partition (p_201901) order by 1, 2, 3, 4; +select * from range_list partition (p_201902) order by 1, 2, 3, 4; +select * from range_list where user_no is not null and dept_code <> '2' UNION ALL select * from range_list partition (p_201902) order by 1, 2, 3, 4; +select * from range_list where user_no is not null and dept_code <> '2' UNION ALL select * from range_list partition (p_201902) where dept_code in ('2') order by 1, 2, 3, 4; + +select * from range_hash order by 1, 2, 3, 4; + +select * from range_hash where user_no is not null order by 1, 2, 3, 4; +select * from range_hash where user_no is not null and dept_code = user_no order by 1, 2, 3, 4; +select * from range_hash where user_no is not null and dept_code in ('2') order by 1, 2, 3, 4; +select * from range_hash where user_no is not null and dept_code <> '2' order by 1, 2, 3, 4; +select * from range_hash partition (p_201901) order by 1, 2, 3, 4; +select * from range_hash partition (p_201902) order by 1, 2, 3, 4; +select * from range_hash where user_no is not null and dept_code <> '2' UNION ALL select * from range_hash partition (p_201902) order by 1, 2, 3, 4; +select * from range_hash where user_no is not null and dept_code <> '2' UNION ALL select * from range_hash partition (p_201902) where dept_code in ('2') order by 1, 2, 3, 4; + +select * from range_range order by 1, 2, 3, 4; + +select * from range_range where user_no is not null order by 1, 2, 3, 4; +select * from range_range where user_no is not null and dept_code = user_no order by 1, 2, 3, 4; +select * from range_range where user_no is not null and dept_code in ('2') order by 1, 2, 3, 4; +select * from range_range where user_no is not null and dept_code <> '2' order by 1, 2, 3, 4; +select * from range_range partition (p_201901) order by 1, 2, 3, 4; +select * from range_range partition (p_201902) order by 1, 2, 3, 4; +select * from range_range where user_no is not null and dept_code <> '2' UNION ALL select * from range_range partition (p_201902) order by 1, 2, 3, 4; +select * from range_range where user_no is not null and dept_code <> '2' UNION ALL select * from range_range partition (p_201902) where dept_code in ('2') order by 1, 2, 3, 4; + +select * from view_temp; +--error +select * from view_temp partition (p_201901); +select * from view_temp partition (p_201902); +--join normal table +select * from range_list left join t1 on range_list.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; +select * from range_list left join t1 on range_list.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + +select * from range_list right join t1 on range_list.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; +select * from range_list right join t1 on range_list.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + +select * from range_list full join t1 on range_list.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; +select * from range_list full join t1 on range_list.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + +select * from range_list inner join t1 on range_list.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; +select * from range_list inner join t1 on range_list.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + + +select * from range_hash left join t1 on range_hash.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; +select * from range_hash left join t1 on range_hash.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + +select * from range_hash right join t1 on range_hash.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; +select * from range_hash right join t1 on range_hash.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + +select * from range_hash full join t1 on range_hash.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; +select * from range_hash full join t1 on range_hash.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + +select * from range_hash inner join t1 on range_hash.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; +select * from range_hash inner join t1 on range_hash.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + + +select * from range_range left join t1 on range_range.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; +select * from range_range left join t1 on range_range.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + +select * from range_range right join t1 on range_range.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; +select * from range_range right join t1 on range_range.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + +select * from range_range full join t1 on range_range.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; +select * from range_range full join t1 on range_range.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + +select * from range_range inner join t1 on range_range.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; +select * from range_range inner join t1 on range_range.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + +--join range_list and range_hash + +select * from range_list left join range_hash on range_list.month_code = range_hash.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; +select * from range_list left join range_hash on range_list.month_code = range_hash.month_code where range_list.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + +select * from range_list right join range_hash on range_list.month_code = range_hash.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; +select * from range_list right join range_hash on range_list.month_code = range_hash.month_code where range_list.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + +select * from range_list full join range_hash on range_list.month_code = range_hash.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; +select * from range_list full join range_hash on range_list.month_code = range_hash.month_code where range_list.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + +select * from range_list inner join range_hash on range_list.month_code = range_hash.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; +select * from range_list inner join range_hash on range_list.month_code = range_hash.month_code where range_list.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + +--join range_hash and range_range + +select * from range_hash left join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; +select * from range_hash left join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + +select * from range_hash right join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; +select * from range_hash right join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + +select * from range_hash full join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; +select * from range_hash full join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + +select * from range_hash inner join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; +select * from range_hash inner join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + +--join range_hash and range_range + +select * from range_hash left join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; +select * from range_hash left join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + +select * from range_hash right join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; +select * from range_hash right join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + +select * from range_hash full join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; +select * from range_hash full join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + +select * from range_hash inner join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; +select * from range_hash inner join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + +select * from pjade subpartition(hrp1_1) union select * from cjade order by 1,2,3; +select * from pjade subpartition(hrp1_1) p union select * from cjade order by 1,2,3; +select * from pjade subpartition(hrp1_1) union select * from cjade order by 1,2,3; +select * from pjade subpartition(hrp1_1) p union select * from cjade order by 1,2,3; \ No newline at end of file diff --git a/src/test/regress/sql/ss_r/segment_subpartition_split.sql b/src/test/regress/sql/ss_r/segment_subpartition_split.sql new file mode 100644 index 000000000..1582e1c8e --- /dev/null +++ b/src/test/regress/sql/ss_r/segment_subpartition_split.sql @@ -0,0 +1,14 @@ +SET CURRENT_SCHEMA TO segment_subpartition_split; +select * from list_list subpartition (p_201902_a) order by 1,2,3,4; +select * from list_list subpartition (p_201902_b) order by 1,2,3,4; +select * from list_list subpartition (p_201902_c) order by 1,2,3,4; + +select * from range_range subpartition (p_201901_a) order by 1,2,3,4; +select * from range_range subpartition (p_201901_b) order by 1,2,3,4; +select * from range_range subpartition (p_201901_c) order by 1,2,3,4; +select * from range_range subpartition (p_201901_d) order by 1,2,3,4; + +select * from range_range subpartition (p_201902_a) order by 1,2,3,4; +select * from range_range subpartition (p_201902_b) order by 1,2,3,4; +select * from range_range subpartition (p_201902_c) order by 1,2,3,4; +select * from range_range subpartition (p_201902_d) order by 1,2,3,4; \ No newline at end of file diff --git a/src/test/regress/sql/ss_r/segment_subpartition_truncate.sql b/src/test/regress/sql/ss_r/segment_subpartition_truncate.sql new file mode 100644 index 000000000..22fc89685 --- /dev/null +++ b/src/test/regress/sql/ss_r/segment_subpartition_truncate.sql @@ -0,0 +1,4 @@ +SET CURRENT_SCHEMA TO segment_subpartition_truncate; +select * from list_list partition (p_201901); + +select * from list_list subpartition (p_201902_b); diff --git a/src/test/regress/sql/ss_r/trunc_func_for_date.sql b/src/test/regress/sql/ss_r/trunc_func_for_date.sql new file mode 100644 index 000000000..11cbcbedd --- /dev/null +++ b/src/test/regress/sql/ss_r/trunc_func_for_date.sql @@ -0,0 +1,65 @@ +--- +--- data type 1 : timestamp +--- + +-- format can recognize +select trunc(timestamp '2021-08-11 20:19:39', 'cc'); -- century +select trunc(timestamp '2021-08-11 20:19:39', 'yyyy'); -- year +select trunc(timestamp '2021-08-11 20:19:39', 'q'); -- quarter +select trunc(timestamp '2021-08-11 20:19:39', 'mm'); -- month +select trunc(timestamp '2021-08-11 20:19:39', 'j'); -- day +select trunc(timestamp '2021-08-11 20:19:39', 'dd'); -- day +select trunc(timestamp '2021-08-11 20:19:39', 'ddd'); -- day +select trunc(timestamp '2021-08-11 20:19:39', 'hh'); -- hour +select trunc(timestamp '2021-08-11 20:19:39', 'mi'); -- minute + +-- format can not recognize +select trunc(timestamp '2021-08-11 20:19:39', 'qq'); -- quarter +select trunc(timestamp '2021-08-11 20:19:39', 'mmm'); -- month +select trunc(timestamp '2021-08-11 20:19:39', 'dddd'); -- day +select trunc(timestamp '2021-08-11 20:19:39', 'hhh'); -- hour + +--- +--- data type 2 : timestamptz +--- + +-- format can recognize +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'cc'); -- century +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'yyyy'); -- year +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'q'); -- quarter +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'mm'); -- month +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'j'); -- day +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'dd'); -- day +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'ddd'); -- day +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'hh'); -- hour +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'mi'); -- minute + +-- format can't recognize +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'qq'); -- quarter +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'mmm'); -- month +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'dddd'); -- day +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'hhh'); -- hour + +--- +--- data type 3 : interval +--- + +-- format can recognize +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'cc'); -- century +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'yyyy'); -- year +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'q'); -- quarter +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'mm'); -- month +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'j'); -- day +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'dd'); -- day +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'ddd'); -- day +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'hh'); -- hour +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'mi'); -- minute + +-- format can not recognize +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'qq'); -- quarter +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'mmm'); -- month +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'dddd'); -- day +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'hhh'); -- hour + +-- not supported +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'w'); -- week \ No newline at end of file diff --git a/src/test/regress/sql/ss_unsupported_hasuids_table.sql b/src/test/regress/sql/ss_unsupported_hasuids_table.sql new file mode 100644 index 000000000..a97cf03dc --- /dev/null +++ b/src/test/regress/sql/ss_unsupported_hasuids_table.sql @@ -0,0 +1,7 @@ +drop table if exists hasuids_standby_t1; +CREATE TABLE hasuids_standby_t1 (id int,num int) with (segment=on); +alter table hasuids_standby_t1 set (hasuids=on); + +drop table if exists hasuids_standby_t1; +CREATE TABLE hasuids_standby_t1 (id int,num int) with (segment=on,hasuids=on); +drop table if exists hasuids_standby_t1; \ No newline at end of file diff --git a/src/test/regress/sql/ss_wr/replace_func_with_two_args.sql b/src/test/regress/sql/ss_wr/replace_func_with_two_args.sql new file mode 100644 index 000000000..c05dee105 --- /dev/null +++ b/src/test/regress/sql/ss_wr/replace_func_with_two_args.sql @@ -0,0 +1,27 @@ +-- +-- replace function with two arguments +-- + +select replace('string', ''); +select replace('string', 'i'); +select replace('string', 'in'); +select replace('string', 'ing'); + +select replace('', 'ing'); +select replace(NULL, 'ing'); +select replace('ing', ''); +select replace('ing', NULL); +select replace('', ''); +select replace(NULL, NULL); + +select replace(123, '1'); +select replace('123', 1); +select replace(123, 1); + +select replace('abc\nabc', '\n'); +select replace('abc\nabc', E'\n'); +select replace(E'abc\nabc', E'\n'); + +select replace('~!@#$%^&*()', '!@'); + +select replace('高斯', '高'); \ No newline at end of file diff --git a/src/test/regress/sql/ss_wr/segment_subpartition_analyze_vacuum.sql b/src/test/regress/sql/ss_wr/segment_subpartition_analyze_vacuum.sql new file mode 100644 index 000000000..85be1a8b8 --- /dev/null +++ b/src/test/regress/sql/ss_wr/segment_subpartition_analyze_vacuum.sql @@ -0,0 +1,44 @@ +-- prepare +DROP SCHEMA segment_subpartition_analyze_vacuum CASCADE; +CREATE SCHEMA segment_subpartition_analyze_vacuum; +SET CURRENT_SCHEMA TO segment_subpartition_analyze_vacuum; + +-- base function + +CREATE TABLE range_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a values ('1'), + SUBPARTITION p_201901_b values ('2') + ), + PARTITION p_201902 VALUES LESS THAN( '201910' ) + ( + SUBPARTITION p_201902_a values ('1'), + SUBPARTITION p_201902_b values ('2') + ) +); + +create index idx_month_code_local on range_list(month_code) local; +create index idx_dept_code_global on range_list(dept_code) global; +create index idx_user_no_global on range_list(user_no) global; + +insert into range_list values('201902', '1', '1', 1); +insert into range_list values('201902', '2', '1', 1); +insert into range_list values('201902', '1', '1', 1); +insert into range_list values('201903', '2', '2', 1); +insert into range_list values('201903', '1', '1', 1); +insert into range_list values('201903', '2', '1', 1); +select * from range_list order by 1, 2, 3, 4; +delete from range_list where month_code = '201902'; +select * from range_list order by 1, 2, 3, 4; +analyze range_list; +analyze range_list partition (p_201901); + diff --git a/src/test/regress/sql/ss_wr/segment_subpartition_gpi.sql b/src/test/regress/sql/ss_wr/segment_subpartition_gpi.sql new file mode 100644 index 000000000..c04cc662b --- /dev/null +++ b/src/test/regress/sql/ss_wr/segment_subpartition_gpi.sql @@ -0,0 +1,39 @@ +-- prepare +DROP SCHEMA segment_subpartition_gpi CASCADE; +CREATE SCHEMA segment_subpartition_gpi; +SET CURRENT_SCHEMA TO segment_subpartition_gpi; + +-- base function +CREATE TABLE range_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a values ('1'), + SUBPARTITION p_201901_b values ('2') + ), + PARTITION p_201902 VALUES LESS THAN( '201910' ) + ( + SUBPARTITION p_201902_a values ('1'), + SUBPARTITION p_201902_b values ('2') + ) +); + +create index idx_month_code_local on range_list(month_code) local; +create index idx_dept_code_global on range_list(dept_code) global; +create index idx_user_no_global on range_list(user_no) global; + +insert into range_list values('201902', '1', '1', 1); +insert into range_list values('201902', '2', '1', 1); +insert into range_list values('201902', '1', '1', 1); +insert into range_list values('201903', '2', '2', 1); +insert into range_list values('201903', '1', '1', 1); +insert into range_list values('201903', '2', '1', 1); +select * from range_list order by 1, 2, 3, 4; + diff --git a/src/test/regress/sql/ss_wr/segment_subpartition_scan.sql b/src/test/regress/sql/ss_wr/segment_subpartition_scan.sql new file mode 100644 index 000000000..48cd257af --- /dev/null +++ b/src/test/regress/sql/ss_wr/segment_subpartition_scan.sql @@ -0,0 +1,135 @@ +--prepare +DROP SCHEMA segment_subpartition_scan CASCADE; +CREATE SCHEMA segment_subpartition_scan; +SET CURRENT_SCHEMA TO segment_subpartition_scan; + +--scan +CREATE TABLE range_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a values ('1'), + SUBPARTITION p_201901_b values ('2') + ), + PARTITION p_201902 VALUES LESS THAN( '201910' ) + ( + SUBPARTITION p_201902_a values ('1'), + SUBPARTITION p_201902_b values ('2') + ) +); + +insert into range_list values('201902', '1', '1', 1); +insert into range_list values('201902', '2', '1', 1); +insert into range_list values('201902', '1', '1', 1); +insert into range_list values('201903', '2', '1', 1); +insert into range_list values('201903', '1', '1', 1); +insert into range_list values('201903', '2', '1', 1); + +explain(costs off, verbose on) select * from range_list order by 1, 2, 3, 4; +select * from range_list order by 1, 2, 3, 4; + +create index idx_month_code on range_list(month_code) local; +create index idx_dept_code on range_list(dept_code) local; +create index idx_user_no on range_list(user_no) local; + +set enable_seqscan = off; +explain(costs off, verbose on) select * from range_list where month_code = '201902' order by 1, 2, 3, 4; +select * from range_list where month_code = '201902' order by 1, 2, 3, 4; +explain(costs off, verbose on) select * from range_list where dept_code = '1' order by 1, 2, 3, 4; +select * from range_list where dept_code = '1' order by 1, 2, 3, 4; +explain(costs off, verbose on) select * from range_list where user_no = '1' order by 1, 2, 3, 4; +select * from range_list where user_no = '1' order by 1, 2, 3, 4; + +set enable_bitmapscan = off; +explain(costs off, verbose on) select * from range_list where month_code = '201902' order by 1, 2, 3, 4; +select * from range_list where month_code = '201902' order by 1, 2, 3, 4; +explain(costs off, verbose on) select * from range_list where dept_code = '1' order by 1, 2, 3, 4; +select * from range_list where dept_code = '1' order by 1, 2, 3, 4; +explain(costs off, verbose on) select * from range_list where user_no = '1' order by 1, 2, 3, 4; +select * from range_list where user_no = '1' order by 1, 2, 3, 4; + +reset enable_seqscan; +reset enable_bitmapscan; + + +create table range_range_jade(jid int,jn int,name varchar2) WITH (SEGMENT=ON) partition by range (jid) subpartition by range(jn) +( + partition hrp1 values less than(16)( + subpartition hrp1_1 values less than(16), +subpartition hrp1_2 values less than(26), +subpartition hrp1_3 values less than(36), + subpartition hrp1_4 values less than(maxvalue)), + partition hrp2 values less than(26)( + subpartition hrp2_1 values less than(maxvalue)), + partition hrp3 values less than(36)( + subpartition hrp3_1 values less than(16), +subpartition hrp3_2 values less than(26), + subpartition hrp3_3 values less than(maxvalue)), + partition hrp4 values less than(maxvalue)( + subpartition hrp4_1 values less than(16), + subpartition hrp4_2 values less than(maxvalue)) +)ENABLE ROW MOVEMENT; +-- no errors +set enable_partition_opfusion = on; +insert into range_range_jade values(1,2,'jade'); +reset enable_partition_opfusion; + +CREATE TABLE IF NOT EXISTS list_range_02 +( + col_1 int , + col_2 int, +col_3 VARCHAR2 ( 30 ) , + col_4 int +) WITH (SEGMENT=ON) +PARTITION BY list (col_1) SUBPARTITION BY range (col_2) +( + PARTITION p_list_1 VALUES(-1,-2,-3,-4,-5,-6,-7,-8,-9,-10 ) + ( + SUBPARTITION p_range_1_1 VALUES LESS THAN( -10 ), + SUBPARTITION p_range_1_2 VALUES LESS THAN( 0 ), + SUBPARTITION p_range_1_3 VALUES LESS THAN( 10 ), + SUBPARTITION p_range_1_4 VALUES LESS THAN( 20 ), + SUBPARTITION p_range_1_5 VALUES LESS THAN( 50 ) + ), + PARTITION p_list_2 VALUES(1,2,3,4,5,6,7,8,9,10 ), + PARTITION p_list_3 VALUES(11,12,13,14,15,16,17,18,19,20) + ( + SUBPARTITION p_range_3_1 VALUES LESS THAN( 15 ), + SUBPARTITION p_range_3_2 VALUES LESS THAN( MAXVALUE ) + ), + PARTITION p_list_4 VALUES(21,22,23,24,25,26,27,28,29,30) + ( + SUBPARTITION p_range_4_1 VALUES LESS THAN( -10 ), + SUBPARTITION p_range_4_2 VALUES LESS THAN( 0 ), + SUBPARTITION p_range_4_3 VALUES LESS THAN( 10 ), + SUBPARTITION p_range_4_4 VALUES LESS THAN( 20 ), + SUBPARTITION p_range_4_5 VALUES LESS THAN( 50 ) + ), + PARTITION p_list_5 VALUES(31,32,33,34,35,36,37,38,39,40) + ( + SUBPARTITION p_range_5_1 VALUES LESS THAN( MAXVALUE ) + ), + PARTITION p_list_6 VALUES(41,42,43,44,45,46,47,48,49,50) + ( + SUBPARTITION p_range_6_1 VALUES LESS THAN( -10 ), + SUBPARTITION p_range_6_2 VALUES LESS THAN( 0 ), + SUBPARTITION p_range_6_3 VALUES LESS THAN( 10 ), + SUBPARTITION p_range_6_4 VALUES LESS THAN( 20 ), + SUBPARTITION p_range_6_5 VALUES LESS THAN( 50 ) + ), + PARTITION p_list_7 VALUES(default) +) ENABLE ROW MOVEMENT; + +create index index_01 on list_range_02(col_2) local ; + +explain (costs off) select * from list_range_02 where col_2 in + (select col_1 from list_range_02 subpartition(p_list_2_subpartdefault1) + where col_1 >10 and col_1 <100) and col_1 +col_2 =50 and col_2 in (100,200,300 ); + diff --git a/src/test/regress/sql/ss_wr/segment_subpartition_select.sql b/src/test/regress/sql/ss_wr/segment_subpartition_select.sql new file mode 100644 index 000000000..7ae8dff66 --- /dev/null +++ b/src/test/regress/sql/ss_wr/segment_subpartition_select.sql @@ -0,0 +1,293 @@ +--prepare +DROP SCHEMA segment_subpartition_select CASCADE; +CREATE SCHEMA segment_subpartition_select; +SET CURRENT_SCHEMA TO segment_subpartition_select; + +--select +CREATE TABLE t1 +( + c1 int, + c2 int +) WITH (SEGMENT=ON); +insert into t1 values(generate_series(201901,201910), generate_series(1,10)); + +CREATE TABLE range_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a values ('1'), + SUBPARTITION p_201901_b values ('2') + ), + PARTITION p_201902 VALUES LESS THAN( '201910' ) + ( + SUBPARTITION p_201902_a values ('1'), + SUBPARTITION p_201902_b values ('2') + ) +); + +insert into range_list values('201902', '1', '1', 1); +insert into range_list values('201902', '2', '1', 1); +insert into range_list values('201902', '1', '1', 1); +insert into range_list values('201902', '3', '1', 1); +insert into range_list values('201903', '2', '1', 1); +insert into range_list values('201903', '1', '1', 1); +insert into range_list values('201903', '2', '1', 1); +insert into range_list values('201903', '3', '1', 1); + +select * from range_list order by 1, 2, 3, 4; + +select * from range_list where user_no is not null order by 1, 2, 3, 4; +select * from range_list where user_no is not null and dept_code = user_no order by 1, 2, 3, 4; +select * from range_list where user_no is not null and dept_code in ('2') order by 1, 2, 3, 4; +select * from range_list where user_no is not null and dept_code <> '2' order by 1, 2, 3, 4; +select * from range_list partition (p_201901) order by 1, 2, 3, 4; +select * from range_list partition (p_201902) order by 1, 2, 3, 4; +select * from range_list where user_no is not null and dept_code <> '2' UNION ALL select * from range_list partition (p_201902) order by 1, 2, 3, 4; +select * from range_list where user_no is not null and dept_code <> '2' UNION ALL select * from range_list partition (p_201902) where dept_code in ('2') order by 1, 2, 3, 4; + + + +CREATE TABLE range_hash +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY HASH (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a, + SUBPARTITION p_201901_b + ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) + ( + SUBPARTITION p_201902_a, + SUBPARTITION p_201902_b + ) +); + +insert into range_hash values('201902', '1', '1', 1); +insert into range_hash values('201902', '2', '1', 1); +insert into range_hash values('201902', '1', '1', 1); +insert into range_hash values('201903', '2', '1', 1); +insert into range_hash values('201903', '1', '1', 1); +insert into range_hash values('201903', '2', '1', 1); + +select * from range_hash order by 1, 2, 3, 4; + +select * from range_hash where user_no is not null order by 1, 2, 3, 4; +select * from range_hash where user_no is not null and dept_code = user_no order by 1, 2, 3, 4; +select * from range_hash where user_no is not null and dept_code in ('2') order by 1, 2, 3, 4; +select * from range_hash where user_no is not null and dept_code <> '2' order by 1, 2, 3, 4; +select * from range_hash partition (p_201901) order by 1, 2, 3, 4; +select * from range_hash partition (p_201902) order by 1, 2, 3, 4; +select * from range_hash where user_no is not null and dept_code <> '2' UNION ALL select * from range_hash partition (p_201902) order by 1, 2, 3, 4; +select * from range_hash where user_no is not null and dept_code <> '2' UNION ALL select * from range_hash partition (p_201902) where dept_code in ('2') order by 1, 2, 3, 4; + + +CREATE TABLE range_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201901_b VALUES LESS THAN( '3' ) + ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) + ( + SUBPARTITION p_201902_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201902_b VALUES LESS THAN( '3' ) + ) +); +insert into range_range values('201902', '1', '1', 1); +insert into range_range values('201902', '2', '1', 1); +insert into range_range values('201902', '1', '1', 1); +insert into range_range values('201903', '2', '1', 1); +insert into range_range values('201903', '1', '1', 1); +insert into range_range values('201903', '2', '1', 1); + +select * from range_range order by 1, 2, 3, 4; + +select * from range_range where user_no is not null order by 1, 2, 3, 4; +select * from range_range where user_no is not null and dept_code = user_no order by 1, 2, 3, 4; +select * from range_range where user_no is not null and dept_code in ('2') order by 1, 2, 3, 4; +select * from range_range where user_no is not null and dept_code <> '2' order by 1, 2, 3, 4; +select * from range_range partition (p_201901) order by 1, 2, 3, 4; +select * from range_range partition (p_201902) order by 1, 2, 3, 4; +select * from range_range where user_no is not null and dept_code <> '2' UNION ALL select * from range_range partition (p_201902) order by 1, 2, 3, 4; +select * from range_range where user_no is not null and dept_code <> '2' UNION ALL select * from range_range partition (p_201902) where dept_code in ('2') order by 1, 2, 3, 4; + +--view +create view view_temp as select * from range_list; +select * from view_temp; +--error +select * from view_temp partition (p_201901); +select * from view_temp partition (p_201902); + +with tmp1 as (select * from range_list ) select * from tmp1 order by 1, 2, 3, 4; +with tmp1 as (select * from range_list partition (p_201901)) select * from tmp1 order by 1, 2, 3, 4; + +--join normal table +select * from range_list left join t1 on range_list.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; +select * from range_list left join t1 on range_list.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + +select * from range_list right join t1 on range_list.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; +select * from range_list right join t1 on range_list.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + +select * from range_list full join t1 on range_list.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; +select * from range_list full join t1 on range_list.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + +select * from range_list inner join t1 on range_list.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; +select * from range_list inner join t1 on range_list.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + + +select * from range_hash left join t1 on range_hash.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; +select * from range_hash left join t1 on range_hash.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + +select * from range_hash right join t1 on range_hash.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; +select * from range_hash right join t1 on range_hash.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + +select * from range_hash full join t1 on range_hash.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; +select * from range_hash full join t1 on range_hash.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + +select * from range_hash inner join t1 on range_hash.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; +select * from range_hash inner join t1 on range_hash.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + + +select * from range_range left join t1 on range_range.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; +select * from range_range left join t1 on range_range.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + +select * from range_range right join t1 on range_range.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; +select * from range_range right join t1 on range_range.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + +select * from range_range full join t1 on range_range.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; +select * from range_range full join t1 on range_range.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + +select * from range_range inner join t1 on range_range.month_code = t1.c1 order by 1, 2, 3, 4, 5, 6; +select * from range_range inner join t1 on range_range.month_code = t1.c1 where dept_code = 2 order by 1, 2, 3, 4, 5, 6; + +--join range_list and range_hash + +select * from range_list left join range_hash on range_list.month_code = range_hash.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; +select * from range_list left join range_hash on range_list.month_code = range_hash.month_code where range_list.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + +select * from range_list right join range_hash on range_list.month_code = range_hash.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; +select * from range_list right join range_hash on range_list.month_code = range_hash.month_code where range_list.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + +select * from range_list full join range_hash on range_list.month_code = range_hash.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; +select * from range_list full join range_hash on range_list.month_code = range_hash.month_code where range_list.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + +select * from range_list inner join range_hash on range_list.month_code = range_hash.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; +select * from range_list inner join range_hash on range_list.month_code = range_hash.month_code where range_list.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + +--join range_hash and range_range + +select * from range_hash left join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; +select * from range_hash left join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + +select * from range_hash right join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; +select * from range_hash right join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + +select * from range_hash full join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; +select * from range_hash full join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + +select * from range_hash inner join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; +select * from range_hash inner join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + +--join range_hash and range_range + +select * from range_hash left join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; +select * from range_hash left join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + +select * from range_hash right join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; +select * from range_hash right join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + +select * from range_hash full join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; +select * from range_hash full join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + +select * from range_hash inner join range_range on range_hash.month_code = range_range.month_code order by 1, 2, 3, 4, 5, 6, 7, 8; +select * from range_hash inner join range_range on range_hash.month_code = range_range.month_code where range_hash.dept_code = 2 order by 1, 2, 3, 4, 5, 6, 7, 8; + +CREATE TABLE IF NOT EXISTS list_range_02 +( + col_1 int , + col_2 int, + col_3 VARCHAR2 ( 30 ) , + col_4 int +) WITH (SEGMENT=ON) +PARTITION BY list (col_1) SUBPARTITION BY range (col_2) +( + PARTITION p_list_1 VALUES(-1,-2,-3,-4,-5,-6,-7,-8,-9,-10 ) + ( + SUBPARTITION p_range_1_1 VALUES LESS THAN( -10 ), + SUBPARTITION p_range_1_2 VALUES LESS THAN( 0 ), + SUBPARTITION p_range_1_3 VALUES LESS THAN( 10 ), + SUBPARTITION p_range_1_4 VALUES LESS THAN( 20 ), + SUBPARTITION p_range_1_5 VALUES LESS THAN( 50 ) + ), + PARTITION p_list_2 VALUES(1,2,3,4,5,6,7,8,9,10 ), + PARTITION p_list_3 VALUES(11,12,13,14,15,16,17,18,19,20) + ( + SUBPARTITION p_range_3_1 VALUES LESS THAN( 15 ), + SUBPARTITION p_range_3_2 VALUES LESS THAN( MAXVALUE ) + ), + PARTITION p_list_4 VALUES(21,22,23,24,25,26,27,28,29,30) + ( + SUBPARTITION p_range_4_1 VALUES LESS THAN( -10 ), + SUBPARTITION p_range_4_2 VALUES LESS THAN( 0 ), + SUBPARTITION p_range_4_3 VALUES LESS THAN( 10 ), + SUBPARTITION p_range_4_4 VALUES LESS THAN( 20 ), + SUBPARTITION p_range_4_5 VALUES LESS THAN( 50 ) + ), + PARTITION p_list_5 VALUES(31,32,33,34,35,36,37,38,39,40) + ( + SUBPARTITION p_range_5_1 VALUES LESS THAN( MAXVALUE ) + ), + PARTITION p_list_6 VALUES(41,42,43,44,45,46,47,48,49,50) + ( + SUBPARTITION p_range_6_1 VALUES LESS THAN( -10 ), + SUBPARTITION p_range_6_2 VALUES LESS THAN( 0 ), + SUBPARTITION p_range_6_3 VALUES LESS THAN( 10 ), + SUBPARTITION p_range_6_4 VALUES LESS THAN( 20 ), + SUBPARTITION p_range_6_5 VALUES LESS THAN( 50 ) + ), + PARTITION p_list_7 VALUES(default) +) ENABLE ROW MOVEMENT; +create index index_01 on list_range_02(col_2) local ; + +INSERT INTO list_range_02 VALUES (GENERATE_SERIES(0, 19),GENERATE_SERIES(0, 1000),GENERATE_SERIES(0, 99)); + explain (costs off, verbose on) select * from list_range_02 where col_2 >500 and col_2 <8000 order by col_1; + +create table pjade(jid int,jn int,name varchar2) WITH (SEGMENT=ON) partition by range(jid) subpartition by range(jn) +( + partition hrp1 values less than(16)( + subpartition hrp1_1 values less than(16), + subpartition hrp1_2 values less than(maxvalue)), + partition hrp2 values less than(maxvalue)( + subpartition hrp3_1 values less than(16), + subpartition hrp3_3 values less than(maxvalue)) +); + +create table cjade(jid int,jn int,name varchar2) WITH (SEGMENT=ON); +insert into pjade values(6,8,'tom'),(8,18,'jerry'),(16,8,'jade'),(18,20,'jack'); +insert into cjade values(6,8,'tom'),(8,18,'jerry'),(16,8,'jade'),(18,20,'jack'); +select * from pjade subpartition(hrp1_1) union select * from cjade order by 1,2,3; +select * from pjade subpartition(hrp1_1) p union select * from cjade order by 1,2,3; +select * from pjade subpartition(hrp1_1) union select * from cjade order by 1,2,3; +select * from pjade subpartition(hrp1_1) p union select * from cjade order by 1,2,3; + diff --git a/src/test/regress/sql/ss_wr/segment_subpartition_split.sql b/src/test/regress/sql/ss_wr/segment_subpartition_split.sql new file mode 100644 index 000000000..d5657c490 --- /dev/null +++ b/src/test/regress/sql/ss_wr/segment_subpartition_split.sql @@ -0,0 +1,231 @@ +--prepare +DROP SCHEMA segment_subpartition_split CASCADE; +CREATE SCHEMA segment_subpartition_split; +SET CURRENT_SCHEMA TO segment_subpartition_split; + +--split subpartition +-- list subpartition +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( default ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( default ) + ) +); +insert into list_list values('201902', '1', '1', 1); +insert into list_list values('201902', '2', '1', 1); +insert into list_list values('201902', '1', '1', 1); +insert into list_list values('201903', '1', '1', 1); +insert into list_list values('201903', '2', '1', 1); +insert into list_list values('201903', '3', '1', 1); +select * from list_list order by 1,2,3,4; + +select * from list_list subpartition (p_201901_a) order by 1,2,3,4; +select * from list_list subpartition (p_201901_b) order by 1,2,3,4; +alter table list_list split subpartition p_201901_b values (2) into +( + subpartition p_201901_b, + subpartition p_201901_c +); +select * from list_list subpartition (p_201901_a) order by 1,2,3,4; +select * from list_list subpartition (p_201901_b) order by 1,2,3,4; +select * from list_list subpartition (p_201901_c) order by 1,2,3,4; + +select * from list_list partition (p_201901); + +select * from list_list subpartition (p_201902_a) order by 1,2,3,4; +select * from list_list subpartition (p_201902_b) order by 1,2,3,4; +alter table list_list split subpartition p_201902_b values (2, 3) into +( + subpartition p_201902_b, + subpartition p_201902_c +); +select * from list_list subpartition (p_201902_a) order by 1,2,3,4; +select * from list_list subpartition (p_201902_b) order by 1,2,3,4; +select * from list_list subpartition (p_201902_c) order by 1,2,3,4; + +--error +alter table list_list split subpartition p_201902_a values (3) into +( + subpartition p_201902_ab, + subpartition p_201902_ac +); + + +-- range subpartition +CREATE TABLE range_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201901_b VALUES LESS THAN( MAXVALUE ) + ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) + ( + SUBPARTITION p_201902_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201902_b VALUES LESS THAN( '6' ) + ) +); +insert into range_range values('201902', '1', '1', 1); +insert into range_range values('201902', '2', '1', 1); +insert into range_range values('201902', '3', '1', 1); +insert into range_range values('201903', '1', '1', 1); +insert into range_range values('201903', '2', '1', 1); +insert into range_range values('201903', '5', '1', 1); +select * from range_range order by 1,2,3,4; + +select * from range_range subpartition (p_201901_a) order by 1,2,3,4; +select * from range_range subpartition (p_201901_b) order by 1,2,3,4; +alter table range_range split subpartition p_201901_b at (3) into +( + subpartition p_201901_c, + subpartition p_201901_d +); +select * from range_range subpartition (p_201901_a) order by 1,2,3,4; +select * from range_range subpartition (p_201901_b) order by 1,2,3,4; +select * from range_range subpartition (p_201901_c) order by 1,2,3,4; +select * from range_range subpartition (p_201901_d) order by 1,2,3,4; + +select * from range_range subpartition (p_201902_a) order by 1,2,3,4; +select * from range_range subpartition (p_201902_b) order by 1,2,3,4; +alter table range_range split subpartition p_201902_b at (3) into +( + subpartition p_201902_c, + subpartition p_201902_d +); +select * from range_range subpartition (p_201902_a) order by 1,2,3,4; +select * from range_range subpartition (p_201902_b) order by 1,2,3,4; +select * from range_range subpartition (p_201902_c) order by 1,2,3,4; +select * from range_range subpartition (p_201902_d) order by 1,2,3,4; + +--test syntax +CREATE TABLE IF NOT EXISTS list_hash +( + col_1 int , + col_2 int , + col_3 int , + col_4 int +) WITH (SEGMENT=ON) +PARTITION BY list (col_1) SUBPARTITION BY hash (col_2) +( + PARTITION p_list_1 VALUES (-1,-2,-3,-4,-5,-6,-7,-8,-9,-10 ) + ( + SUBPARTITION p_hash_1_1 , + SUBPARTITION p_hash_1_2 , + SUBPARTITION p_hash_1_3 + ), + PARTITION p_list_2 VALUES (1,2,3,4,5,6,7,8,9,10 ) + ( + SUBPARTITION p_hash_2_1 , + SUBPARTITION p_hash_2_2 , + SUBPARTITION p_hash_2_3 , + SUBPARTITION p_hash_2_4 , + SUBPARTITION p_hash_2_5 + ), + PARTITION p_list_3 VALUES (11,12,13,14,15,16,17,18,19,20), + PARTITION p_list_4 VALUES (21,22,23,24,25,26,27,28,29,30 ) + ( + SUBPARTITION p_hash_4_1 + ), + PARTITION p_list_5 VALUES (default) + ( + SUBPARTITION p_hash_5_1 + ), + PARTITION p_list_6 VALUES (31,32,33,34,35,36,37,38,39,40) + ( + SUBPARTITION p_hash_6_1 , + SUBPARTITION p_hash_6_2 , + SUBPARTITION p_hash_6_3 + ) +) ENABLE ROW MOVEMENT ; + +alter table list_hash split subPARTITION p_hash_2_3 at(-10) into ( subPARTITION add_p_01 , subPARTITION add_p_02 ); + +CREATE TABLE range_range +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY RANGE (month_code) SUBPARTITION BY RANGE (dept_code) +( + PARTITION p_201901 VALUES LESS THAN( '201903' ) + ( + SUBPARTITION p_201901_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201901_b VALUES LESS THAN( MAXVALUE ) + ), + PARTITION p_201902 VALUES LESS THAN( '201904' ) + ( + SUBPARTITION p_201902_a VALUES LESS THAN( '2' ), + SUBPARTITION p_201902_b VALUES LESS THAN( '6' ) + ) +); +alter table range_range split subpartition p_201901_b values (3) into +( + subpartition p_201901_c, + subpartition p_201901_d +) update global index; + + +CREATE TABLE IF NOT EXISTS list_list_02 +( + col_1 int , + col_2 int , + col_3 int , + col_4 int +) WITH (SEGMENT=ON) +PARTITION BY list (col_1) SUBPARTITION BY list (col_2) +( + PARTITION p_list_1 VALUES(-1,-2,-3,-4,-5,-6,-7,-8,-9,-10 ) + ( + SUBPARTITION p_list_1_1 VALUES ( 0,-1,-2,-3,-4,-5,-6,-7,-8,-9 ), + SUBPARTITION p_list_1_2 VALUES ( default ) + ), + PARTITION p_list_2 VALUES(0,1,2,3,4,5,6,7,8,9) + ( + SUBPARTITION p_list_2_1 VALUES ( 0,1,2,3,4,5,6,7,8,9 ), + SUBPARTITION p_list_2_2 VALUES ( default ), + SUBPARTITION p_list_2_3 VALUES ( 10,11,12,13,14,15,16,17,18,19), + SUBPARTITION p_list_2_4 VALUES ( 20,21,22,23,24,25,26,27,28,29 ), + SUBPARTITION p_list_2_5 VALUES ( 30,31,32,33,34,35,36,37,38,39 ) + ), + PARTITION p_list_3 VALUES(10,11,12,13,14,15,16,17,18,19) + ( + SUBPARTITION p_list_3_2 VALUES ( default ) + ), + PARTITION p_list_4 VALUES(default ), + PARTITION p_list_5 VALUES(20,21,22,23,24,25,26,27,28,29) + ( + SUBPARTITION p_list_5_1 VALUES ( 0,1,2,3,4,5,6,7,8,9 ), + SUBPARTITION p_list_5_2 VALUES ( default ), + SUBPARTITION p_list_5_3 VALUES ( 10,11,12,13,14,15,16,17,18,19), + SUBPARTITION p_list_5_4 VALUES ( 20,21,22,23,24,25,26,27,28,29 ), + SUBPARTITION p_list_5_5 VALUES ( 30,31,32,33,34,35,36,37,38,39 ) + ), + PARTITION p_list_6 VALUES(30,31,32,33,34,35,36,37,38,39), + PARTITION p_list_7 VALUES(40,41,42,43,44,45,46,47,48,49) + ( + SUBPARTITION p_list_7_1 VALUES ( default ) + ) +) ENABLE ROW MOVEMENT; diff --git a/src/test/regress/sql/ss_wr/segment_subpartition_truncate.sql b/src/test/regress/sql/ss_wr/segment_subpartition_truncate.sql new file mode 100644 index 000000000..5e3121210 --- /dev/null +++ b/src/test/regress/sql/ss_wr/segment_subpartition_truncate.sql @@ -0,0 +1,67 @@ +--prepare +DROP SCHEMA segment_subpartition_truncate CASCADE; +CREATE SCHEMA segment_subpartition_truncate; +SET CURRENT_SCHEMA TO segment_subpartition_truncate; + +--truncate partition/subpartition +CREATE TABLE list_list +( + month_code VARCHAR2 ( 30 ) NOT NULL , + dept_code VARCHAR2 ( 30 ) NOT NULL , + user_no VARCHAR2 ( 30 ) NOT NULL , + sales_amt int +) WITH (SEGMENT=ON) +PARTITION BY LIST (month_code) SUBPARTITION BY LIST (dept_code) +( + PARTITION p_201901 VALUES ( '201902' ) + ( + SUBPARTITION p_201901_a VALUES ( '1' ), + SUBPARTITION p_201901_b VALUES ( default ) + ), + PARTITION p_201902 VALUES ( '201903' ) + ( + SUBPARTITION p_201902_a VALUES ( '1' ), + SUBPARTITION p_201902_b VALUES ( '2' ) + ) +); +insert into list_list values('201902', '1', '1', 1); +insert into list_list values('201902', '2', '1', 1); +insert into list_list values('201902', '1', '1', 1); +insert into list_list values('201903', '2', '1', 1); +insert into list_list values('201903', '1', '1', 1); +insert into list_list values('201903', '2', '1', 1); +select * from list_list; + +select * from list_list partition (p_201901); +alter table list_list truncate partition p_201901; +select * from list_list partition (p_201901); + +select * from list_list partition (p_201902); +alter table list_list truncate partition p_201902; +select * from list_list partition (p_201902); +select * from list_list; + +insert into list_list values('201902', '1', '1', 1); +insert into list_list values('201902', '2', '1', 1); +insert into list_list values('201902', '1', '1', 1); +insert into list_list values('201903', '2', '1', 1); +insert into list_list values('201903', '1', '1', 1); +insert into list_list values('201903', '2', '1', 1); + +select * from list_list subpartition (p_201901_a); +alter table list_list truncate subpartition p_201901_a; +select * from list_list subpartition (p_201901_a); + +select * from list_list subpartition (p_201901_b); +alter table list_list truncate subpartition p_201901_b; +select * from list_list subpartition (p_201901_b); + +select * from list_list subpartition (p_201902_a); +alter table list_list truncate subpartition p_201902_a; +select * from list_list subpartition (p_201902_a); + +select * from list_list subpartition (p_201902_b); +alter table list_list truncate subpartition p_201902_b; +select * from list_list subpartition (p_201902_b); + +select * from list_list; diff --git a/src/test/regress/sql/ss_wr/trunc_func_for_date.sql b/src/test/regress/sql/ss_wr/trunc_func_for_date.sql new file mode 100644 index 000000000..11cbcbedd --- /dev/null +++ b/src/test/regress/sql/ss_wr/trunc_func_for_date.sql @@ -0,0 +1,65 @@ +--- +--- data type 1 : timestamp +--- + +-- format can recognize +select trunc(timestamp '2021-08-11 20:19:39', 'cc'); -- century +select trunc(timestamp '2021-08-11 20:19:39', 'yyyy'); -- year +select trunc(timestamp '2021-08-11 20:19:39', 'q'); -- quarter +select trunc(timestamp '2021-08-11 20:19:39', 'mm'); -- month +select trunc(timestamp '2021-08-11 20:19:39', 'j'); -- day +select trunc(timestamp '2021-08-11 20:19:39', 'dd'); -- day +select trunc(timestamp '2021-08-11 20:19:39', 'ddd'); -- day +select trunc(timestamp '2021-08-11 20:19:39', 'hh'); -- hour +select trunc(timestamp '2021-08-11 20:19:39', 'mi'); -- minute + +-- format can not recognize +select trunc(timestamp '2021-08-11 20:19:39', 'qq'); -- quarter +select trunc(timestamp '2021-08-11 20:19:39', 'mmm'); -- month +select trunc(timestamp '2021-08-11 20:19:39', 'dddd'); -- day +select trunc(timestamp '2021-08-11 20:19:39', 'hhh'); -- hour + +--- +--- data type 2 : timestamptz +--- + +-- format can recognize +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'cc'); -- century +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'yyyy'); -- year +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'q'); -- quarter +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'mm'); -- month +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'j'); -- day +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'dd'); -- day +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'ddd'); -- day +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'hh'); -- hour +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'mi'); -- minute + +-- format can't recognize +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'qq'); -- quarter +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'mmm'); -- month +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'dddd'); -- day +select trunc(timestamptz '2021-08-12 08:48:26.366526+08', 'hhh'); -- hour + +--- +--- data type 3 : interval +--- + +-- format can recognize +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'cc'); -- century +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'yyyy'); -- year +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'q'); -- quarter +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'mm'); -- month +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'j'); -- day +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'dd'); -- day +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'ddd'); -- day +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'hh'); -- hour +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'mi'); -- minute + +-- format can not recognize +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'qq'); -- quarter +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'mmm'); -- month +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'dddd'); -- day +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'hhh'); -- hour + +-- not supported +select trunc(interval '2 years 3 months 4 days 5 hours 6 minutes 7 seconds', 'w'); -- week \ No newline at end of file diff --git a/src/test/regress/sql/unsupported_immutable_func.sql b/src/test/regress/sql/unsupported_immutable_func.sql new file mode 100644 index 000000000..d2606cb55 --- /dev/null +++ b/src/test/regress/sql/unsupported_immutable_func.sql @@ -0,0 +1,15 @@ +drop procedure procedure_name_immutable; + +create or replace procedure procedure_name_immutable +IMMUTABLE +NOT SHIPPABLE NOT LEAKPROOF +STRICT EXTERNAL +SECURITY INVOKER PACKAGE +COST 480 ROWS 528 +IS +begin +NULL; +rollback; +end; +/ + diff --git a/src/test/ss/build_ss_database.sh b/src/test/ss/build_ss_database.sh new file mode 100644 index 000000000..8f2bf56e8 --- /dev/null +++ b/src/test/ss/build_ss_database.sh @@ -0,0 +1,108 @@ +#!/bin/sh + +echo "check env var" +if [ ${GAUSSHOME} ] && [ -d ${GAUSSHOME}/bin ];then + echo "GAUSSHOME: ${GAUSSHOME}" +else + echo "GAUSSHOME NOT EXIST" + exit 1; +fi + +PGPORT0=2000 +PGPORT1=3000 +SUPER_PASSWORD=Gauss_234 +TPCC_USER=tpcc +TPCC_PASSWORD=Hello@123 +dms_url="0:127.0.0.1:1611,1:127.0.0.1:1711" +SS_DATA=${HOME}/ss_data + +clear_shm() +{ + ipcs -m | grep $USER | awk '{print $2}' | while read shm; do + if [ -n ${shm} ]; then + ipcrm -m ${shm} + fi + done + ipcs -s | grep $USER | awk '{print $2}' | while read shm; do + if [ -n ${shm} ]; then + ipcrm -s ${shm} + fi + done +} + +kill_gaussdb() +{ + ps ux | grep gaussdb | grep -v grep | awk '{print $2}' | xargs kill -9 > /dev/null 2>&1 + ps ux | grep gsql | grep -v grep | awk '{print $2}' | xargs kill -9 > /dev/null 2>&1 + ps ux | grep dssserver | grep -v grep | awk '{print $2}' | xargs kill -9 > /dev/null 2>&1 + clear_shm + sleep 2 +} + +init_ss_data() +{ + rm -rf ${SS_DATA} + mkdir -p ${SS_DATA} +} + +alter_dms_open() +{ + for node in $@ + do + echo -e "\nautovacuum=false" >> ${node}/postgresql.conf + echo -e "\nss_enable_reform = off" >> ${node}/postgresql.conf + echo "${node}:" + cat ${node}/postgresql.conf | grep ss_enable_dms + done +} + +init_gaussdb() +{ + inst_id=$1 + dss_home=$2 + echo "${GAUSSHOME}/bin/gs_initdb -D ${SS_DATA}/dn${inst_id} --nodename=single_node -w ${SUPER_PASSWORD} --vgname=\"+data,+log${inst_id}\" --enable-dss --dms_url=\"${dms_url}\" -I ${inst_id} --socketpath=\"UDS:${dss_home}/.dss_unix_d_socket\"" + + ${GAUSSHOME}/bin/gs_initdb -D ${SS_DATA}/dn${inst_id} --nodename=single_node -w ${SUPER_PASSWORD} --vgname="+data,+log${inst_id}" --enable-dss --dms_url="${dms_url}" -I ${inst_id} --socketpath="UDS:${dss_home}/.dss_unix_d_socket" +} + +start_gaussdb() +{ + data_node=$1 + pg_port=$2 + echo "" >> ${data_node}/postgresql.conf + echo "port = ${pg_port}" >> ${data_node}/postgresql.conf + echo "> starting ${data_node}" && ${GAUSSHOME}/bin/gs_ctl start -D ${data_node} + sleep 3 +} + +create_tpcc_user() +{ + pg_port=$1 + user_name=$2 + user_password=$3 + ${GAUSSHOME}/bin/gsql -h 127.0.0.1 -d postgres -p ${pg_port} -U${USER} -W${SUPER_PASSWORD} -c "create user ${user_name} with password \"${user_password}\";grant all privileges to ${user_name};" +} + +main() +{ + # clean env & init conf + kill_gaussdb + init_ss_data + + sh ./conf_start_dss_inst.sh 2 ${SS_DATA} ${SS_DATA}/dss_disk + init_gaussdb 0 ${SS_DATA}/dss_home0 + init_gaussdb 1 ${SS_DATA}/dss_home1 + +# sh ./copy_xlog_to_private_vg.sh ${GAUSSHOME}/bin/dsscmd ${SS_DATA}/dss_home0 2 + + alter_dms_open ${SS_DATA}/dn0 ${SS_DATA}/dn1 + + export DSS_HOME=${SS_DATA}/dss_home0 + start_gaussdb ${SS_DATA}/dn0 ${PGPORT0} + export DSS_HOME=${SS_DATA}/dss_home1 + start_gaussdb ${SS_DATA}/dn1 ${PGPORT1} + + create_tpcc_user ${PGPORT0} ${TPCC_USER} ${TPCC_PASSWORD} +} + +main $@ \ No newline at end of file diff --git a/src/test/ss/conf_start_dss_inst.sh b/src/test/ss/conf_start_dss_inst.sh new file mode 100644 index 000000000..54fbb03c6 --- /dev/null +++ b/src/test/ss/conf_start_dss_inst.sh @@ -0,0 +1,106 @@ +#!/bin/bash +INST_OFFSET=`expr $UID % 64` +SIMULATE_SIZE=50000 # Unit: MB +LOG_SIZE=30000 # Unit: MB +declare inst_count='' +declare last_id='' + +init_dss_conf() +{ + dss_home=$1 + inst_id=`expr $2 + $INST_OFFSET` + simu_path=$3 + lock_path=$4 + echo "init ${dss_home}" + + mkdir -p ${dss_home}/cfg + mkdir -p ${dss_home}/log + + echo "data:${simu_path}/dss_data.dmp" > ${dss_home}/cfg/dss_vg_conf.ini + for i in `seq 0 $last_id` + do + echo "log${i}:${simu_path}/dss_log${i}.dmp" >> ${dss_home}/cfg/dss_vg_conf.ini + done + + echo "INST_ID = ${inst_id}" > ${dss_home}/cfg/dss_inst.ini + echo "_LOG_LEVEL = 255" >> ${dss_home}/cfg/dss_inst.ini + echo "_LOG_BACKUP_FILE_COUNT = 128" >> ${dss_home}/cfg/dss_inst.ini + echo "_LOG_MAX_FILE_SIZE = 100M" >> ${dss_home}/cfg/dss_inst.ini + echo "LSNR_PATH = ${dss_home}" >> ${dss_home}/cfg/dss_inst.ini + echo "DISK_LOCK_FILE_PATH = ${lock_path}" >> ${dss_home}/cfg/dss_inst.ini +} + +create_vg() +{ + dss_home=$1 + simu_path=$2 + + export DSS_HOME=${dss_home} + rm -rf ${simu_path} + mkdir ${simu_path} + + echo " =========== truncate `expr ${SIMULATE_SIZE} / 1000`G =========== " +# dd if=/dev/zero bs=1048576 count=${SIMULATE_SIZE} of=${simu_path}/dss_data.dmp + truncate -s `expr ${SIMULATE_SIZE} / 1000`G ${simu_path}/dss_data.dmp + chmod 777 ${simu_path}/dss_data.dmp + + for i in `seq 0 $last_id` + do + echo " =========== truncate `expr ${LOG_SIZE} / 1000`G =========== " +# dd if=/dev/zero bs=1048576 count=${LOG_SIZE} of=${simu_path}/dss_log${i}.dmp + truncate -s `expr ${SIMULATE_SIZE} / 1000`G ${simu_path}/dss_log${i}.dmp + chmod 777 ${simu_path}/dss_log${i}.dmp + done + + echo "> creating volume group ${simu_path}/dss_data.dmp" + ${GAUSSHOME}/bin/dsscmd cv -g data -v ${simu_path}/dss_data.dmp -s 2048 -D ${dss_home} + + for i in `seq 0 $last_id` + do + echo "> creating volume group ${simu_path}/dss_log${i}.dmp" + ${GAUSSHOME}/bin/dsscmd cv -g log${i} -v ${simu_path}/dss_log${i}.dmp -s 2048 -D ${dss_home} + done +} + +start_dss() +{ + dsshome_pre=$1 + echo " ================= starting $inst_count dssserver process =================" + for i in `seq 0 $last_id` + do + dss="${dsshome_pre}${i}" + echo "> starting ${dss}" && nohup ${GAUSSHOME}/bin/dssserver -D ${dss} & + sleep 1 + done + + # check start node number equals input value number + dss_pids=`ps ux | grep dssserver | grep -v grep | wc -l` + if [ $inst_count != ${dss_pids} ]; then + echo "dssserver start failed, or parameter error" + exit 1; + else + echo " ================= $inst_count dssserver process started =================" + fi +} + +function main() { + inst_count=$1 + last_id=`expr $inst_count - 1` + pre_path=$2 + if [ ! -d ${pre_path} ]; then + mkdir -p ${pre_path} + fi + simu_path=$3 + + echo "init & start $inst_count dss node" + for i in `seq 0 $last_id` + do + echo "init_dss_conf ${pre_path}/dss_home$i" + init_dss_conf ${pre_path}/dss_home$i $i ${simu_path} ${pre_path} + done + + create_vg ${pre_path}/dss_home0 ${simu_path} + start_dss ${pre_path}/dss_home ${inst_count} +} + +main $@ \ No newline at end of file diff --git a/src/test/ss/copy_xlog_to_private_vg.sh b/src/test/ss/copy_xlog_to_private_vg.sh new file mode 100644 index 000000000..a107dbe25 --- /dev/null +++ b/src/test/ss/copy_xlog_to_private_vg.sh @@ -0,0 +1,35 @@ +#!/bin/sh + +main() +{ + dsscmd_bin=$1 + dss_home=$2 + inst_count=$3 + last_id=`expr $inst_count - 1` + + export DSS_HOME=${dss_home} + + for inst_id in `seq 0 $last_id` + do + ${dsscmd_bin} mkdir +log${inst_id} pg_xlog${inst_id} UDS:${dss_home}/.dss_unix_d_socket + ${dsscmd_bin} mkdir +log${inst_id}/pg_xlog${inst_id} archive_status UDS:${dss_home}/.dss_unix_d_socket + ${dsscmd_bin} cp +data/pg_xlog${inst_id}/000000010000000000000001 +log${inst_id}/pg_xlog${inst_id}/000000010000000000000001 UDS:${dss_home}/.dss_unix_d_socket + + ${dsscmd_bin} rm +data/pg_xlog${inst_id}/000000010000000000000001 UDS:${dss_home}/.dss_unix_d_socket + ${dsscmd_bin} rmdir +data/pg_xlog${inst_id}/archive_status UDS:${dss_home}/.dss_unix_d_socket + ${dsscmd_bin} rmdir +data/pg_xlog${inst_id} UDS:${dss_home}/.dss_unix_d_socket + ${dsscmd_bin} ln +log${inst_id}/pg_xlog${inst_id} +data/pg_xlog${inst_id} UDS:${dss_home}/.dss_unix_d_socket + + ${dsscmd_bin} mkdir +log${inst_id} pg_doublewrite${inst_id} UDS:${dss_home}/.dss_unix_d_socket + ${dsscmd_bin} cp +data/pg_doublewrite${inst_id}/pg_dw_meta +log${inst_id}/pg_doublewrite${inst_id}/pg_dw_meta UDS:${dss_home}/.dss_unix_d_socket + ${dsscmd_bin} cp +data/pg_doublewrite${inst_id}/pg_dw_0 +log${inst_id}/pg_doublewrite${inst_id}/pg_dw_0 UDS:${dss_home}/.dss_unix_d_socket + ${dsscmd_bin} cp +data/pg_doublewrite${inst_id}/pg_dw_single +log${inst_id}/pg_doublewrite${inst_id}/pg_dw_single UDS:${dss_home}/.dss_unix_d_socket + ${dsscmd_bin} rmdir +data/pg_doublewrite${inst_id} -r UDS:${dss_home}/.dss_unix_d_socket + ${dsscmd_bin} ln +log${inst_id}/pg_doublewrite${inst_id} +data/pg_doublewrite${inst_id} UDS:${dss_home}/.dss_unix_d_socket + + ${dsscmd_bin} ls +data/pg_xlog${inst_id} UDS:${dss_home}/.dss_unix_d_socket + ${dsscmd_bin} ls +data/pg_doublewrite${inst_id} UDS:${dss_home}/.dss_unix_d_socket + done +} + +main $@ \ No newline at end of file diff --git a/src/test/ss/deploy_two_inst_ss.sh b/src/test/ss/deploy_two_inst_ss.sh new file mode 100644 index 000000000..96e88f32b --- /dev/null +++ b/src/test/ss/deploy_two_inst_ss.sh @@ -0,0 +1,36 @@ +#!/bin/bash +CUR_DIR=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd) +echo "CUR_DIR : $CUR_DIR" + +source $CUR_DIR/../ss/ss_database_build_env.sh + +deploy_two_inst() +{ + # clean env & init conf + kill_gaussdb + clean_database_env + + sh ./../ss/conf_start_dss_inst.sh 2 ${SS_DATA} ${SS_DATA}/dss_disk + init_gaussdb 0 ${SS_DATA}/dss_home0 + init_gaussdb 1 ${SS_DATA}/dss_home1 + + set_gausdb_port ${SS_DATA}/dn0 ${PGPORT0} + set_gausdb_port ${SS_DATA}/dn1 ${PGPORT1} + + #sh ./../ss/copy_xlog_to_private_vg.sh ${GAUSSHOME}/bin/dsscmd ${SS_DATA}/dss_home0 2 + + assign_dms_parameter ${SS_DATA}/dn0 ${SS_DATA}/dn1 + + export DSS_HOME=${SS_DATA}/dss_home0 + start_gaussdb ${SS_DATA}/dn0 + export DSS_HOME=${SS_DATA}/dss_home1 + start_gaussdb ${SS_DATA}/dn1 +} + +main() +{ + deploy_two_inst +} + +main $@ + diff --git a/src/test/ss/ss_database_build_env.sh b/src/test/ss/ss_database_build_env.sh new file mode 100644 index 000000000..afe6e21fc --- /dev/null +++ b/src/test/ss/ss_database_build_env.sh @@ -0,0 +1,112 @@ +#!/bin/sh +CUR_DIR=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd) +echo "CUR_DIR : $CUR_DIR" + +echo "check env var" +if [ ${GAUSSHOME} ] && [ -d ${GAUSSHOME}/bin ];then + echo "GAUSSHOME: ${GAUSSHOME}" +else + echo "GAUSSHOME NOT EXIST" + exit 1; +fi + +CUR_DIR=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd) +echo "CUR_DIR : $CUR_DIR" +SS_SCRIPT_DIR=$CUR_DIR/ + +PGPORT0=4400 +PGPORT1=5500 +SUPER_PASSWORD=Gauss_234 +nodedate_cfg="0:127.0.0.1:1611,1:127.0.0.1:1711" +SS_DATA=${HOME}/ss_hacheck + +failed_keyword="testcase_failed" + +clean_database_env() +{ + if [ -d ${SS_DATA} ]; then + echo "${SS_DATA} exists, so need to clean and recreate" + rm -rf ${SS_DATA} + else + echo "${SS_DATA} not exists, so need to recreate" + fi + mkdir ${SS_DATA} +} + +clear_shm() +{ + ipcs -m | grep $USER | awk '{print $2}' | while read shm; do + if [ -n ${shm} ]; then + ipcrm -m ${shm} + fi + done + ipcs -s | grep $USER | awk '{print $2}' | while read shm; do + if [ -n ${shm} ]; then + ipcrm -s ${shm} + fi + done +} + +kill_gaussdb() +{ + ps ux | grep gaussdb | grep -v grep | awk '{print $2}' | xargs kill -9 > /dev/null 2>&1 + ps ux | grep gsql | grep -v grep | awk '{print $2}' | xargs kill -9 > /dev/null 2>&1 + ps ux | grep dssserver | grep -v grep | awk '{print $2}' | xargs kill -9 > /dev/null 2>&1 + clear_shm + sleep 2 +} + +kill_dss() +{ + ps ux | grep dssserver | grep -v grep | awk '{print $2}' | xargs kill -9 > /dev/null 2>&1 +} + +assign_dms_parameter() +{ + for node in $@ + do + echo -e "\nautovacuum=false" >> ${node}/postgresql.conf + echo -e "\nss_enable_reform=on" >> ${node}/postgresql.conf + echo -e "\nss_enable_ssl = 0" >> ${node}/postgresql.conf + echo -e "\nlog_min_messages = warning" >> ${node}/postgresql.conf + echo -e "\nlogging_module ='on(ALL)'" >> ${node}/postgresql.conf + echo -e "\nincremental_checkpoint_timeout = 120s" >> ${node}/postgresql.conf + echo -e "\npagewriter_sleep = 60ms" >> ${node}/postgresql.conf + echo -e "\nrecovery_max_workers = 6" >> ${node}/postgresql.conf + echo -e "\nenable_double_write = on" >> ${node}/postgresql.conf + echo "${node}:" + cat ${node}/postgresql.conf | grep ss_enable_dms + done +} + +init_gaussdb() +{ + inst_id=$1 + dss_home=$2 + echo "${GAUSSHOME}/bin/gs_initdb -D ${SS_DATA}/dn${inst_id} --nodename=single_node -w ${SUPER_PASSWORD} --vgname=\"+data,+log${inst_id}\" --enable-dss --dms_url=\"${nodedate_cfg}\" -I ${inst_id} --socketpath=\"UDS:${dss_home}/.dss_unix_d_socket\"" + + ${GAUSSHOME}/bin/gs_initdb -D ${SS_DATA}/dn${inst_id} --nodename=single_node -w ${SUPER_PASSWORD} --vgname="+data,+log${inst_id}" --enable-dss --dms_url="${nodedate_cfg}" -I ${inst_id} --socketpath="UDS:${dss_home}/.dss_unix_d_socket" +} + +set_gausdb_port() +{ + data_node=$1 + pg_port=$2 + echo "" >> ${data_node}/postgresql.conf + echo "port = ${pg_port}" >> ${data_node}/postgresql.conf +} + +start_gaussdb() +{ + data_node=$1 + echo "> starting ${data_node}" && nohup ${GAUSSHOME}/bin/gaussdb -D ${data_node} & + sleep 10 +} + +stop_gaussdb() +{ + data_node=$1 + echo "> stop ${data_node}" && ${GAUSSHOME}/bin/gs_ctl stop -D ${data_node} + sleep 5 +} + diff --git a/src/tools/msvc/Solution.pm b/src/tools/msvc/Solution.pm index 36d8a2c09..084754ae6 100644 --- a/src/tools/msvc/Solution.pm +++ b/src/tools/msvc/Solution.pm @@ -52,7 +52,7 @@ sub _new $options->{wal_segsize} = 16 unless $options->{wal_segsize}; # undef or 0 means default die "Bad wal_segsize $options->{wal_segsize}" - unless grep { $_ == $options->{wal_segsize} } (1, 2, 4, 8, 16, 32, 64); + unless grep { $_ == $options->{wal_segsize} } (1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024); $self->DeterminePlatform();