table/index(btree) support compression

This commit is contained in:
wuyuechuan 2021-12-08 21:04:28 +08:00
parent 17a872b1fd
commit 4b35784ea0
101 changed files with 7636 additions and 940 deletions

View File

@ -2,7 +2,7 @@
# pagehack
AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} TGT_pagehack_SRC)
set(TGT_pagehack_INC
${TGT_pq_INC} ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_SRC_DIR}/lib/gstrace
${TGT_pq_INC} ${ZSTD_INCLUDE_PATH} ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_SRC_DIR}/lib/gstrace
)
set(pagehack_DEF_OPTIONS ${MACRO_OPTIONS})
@ -11,12 +11,13 @@ if(${ENABLE_DEBUG} STREQUAL "ON")
endif()
set(pagehack_COMPILE_OPTIONS ${OS_OPTIONS} ${PROTECT_OPTIONS} ${WARNING_OPTIONS} ${CHECK_OPTIONS} ${BIN_SECURE_OPTIONS} ${OPTIMIZE_OPTIONS})
set(pagehack_LINK_OPTIONS ${BIN_LINK_OPTIONS})
set(pagehack_LINK_LIBS -lpgport -lcrypt -ldl -lm -ledit -lssl -lcrypto -lsecurec -lrt -lz -lminiunz)
set(pagehack_LINK_LIBS -lpgport -lcrypt -ldl -lm -ledit -lssl -lcrypto -lsecurec -lrt -lz -lminiunz -lzstd)
add_bintarget(pagehack TGT_pagehack_SRC TGT_pagehack_INC "${pagehack_DEF_OPTIONS}" "${pagehack_COMPILE_OPTIONS}" "${pagehack_LINK_OPTIONS}" "${pagehack_LINK_LIBS}")
add_dependencies(pagehack pgport_static)
target_link_directories(pagehack PUBLIC
${LIBOPENSSL_LIB_PATH} ${PROTOBUF_LIB_PATH} ${LIBPARQUET_LIB_PATH} ${LIBCURL_LIB_PATH} ${SECURE_LIB_PATH}
${ZLIB_LIB_PATH} ${LIBOBS_LIB_PATH} ${LIBEDIT_LIB_PATH} ${LIBCGROUP_LIB_PATH} ${CMAKE_BINARY_DIR}/lib
${ZSTD_LIB_PATH}
)
install(TARGETS pagehack RUNTIME DESTINATION bin)

View File

@ -1,6 +1,6 @@
# contrib/pagehack/Makefile
MODULE_big = pagehack
OBJS = pagehack.o
OBJS = openGaussCompression.o pagehack.o
# executable program, even there is no database server/client
PROGRAM = pagehack
@ -13,7 +13,7 @@ else
subdir = contrib/pagehack
top_builddir = ../..
include $(top_builddir)/src/Makefile.global
enable_shared = false
override CFLAGS += -lzstd
ifeq ($(enable_debug), yes)
PG_CPPFLAGS += -DDEBUG

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,177 @@
/*
* Copyright (c) Huawei Technologies Co., Ltd. 2012-2018. All rights reserved.
*/
#include "openGaussCompression.h"
#include "storage/checksum_impl.h"
#include "storage/page_compression_impl.h"
void OpenGaussCompression::SetFilePath(const char *filePath, int segNo)
{
int rc = snprintf_s(pcaFilePath, MAXPGPATH, MAXPGPATH - 1, PCA_SUFFIX, filePath);
securec_check_ss_c(rc, "\0", "\0");
rc = snprintf_s(pcdFilePath, MAXPGPATH, MAXPGPATH - 1, PCD_SUFFIX, filePath);
securec_check_ss_c(rc, "\0", "\0");
this->segmentNo = segNo;
}
OpenGaussCompression::~OpenGaussCompression()
{
if (pcaFd != nullptr) {
fclose(pcaFd);
}
if (pcdFd != nullptr) {
fclose(pcdFd);
}
if (header != nullptr) {
pc_munmap(header);
}
}
bool OpenGaussCompression::TryOpen()
{
if ((pcaFd = fopen(this->pcaFilePath, "rb+")) == nullptr) {
return false;
}
if ((pcdFd = fopen(this->pcdFilePath, "rb+")) == nullptr) {
return false;
}
if (fseeko(pcaFd, (off_t)offsetof(PageCompressHeader, chunk_size), SEEK_SET) != 0) {
return false;
}
if (fread(&chunkSize, sizeof(chunkSize), 1, this->pcaFd) <= 0) {
return false;
}
header = pc_mmap(fileno(pcaFd), chunkSize, false);
return true;
}
bool OpenGaussCompression::ReadChunkOfBlock(char *dst, size_t *dstLen, BlockNumber blockNumber)
{
auto currentAddr = GET_PAGE_COMPRESS_ADDR(header, chunkSize, blockNumber);
do {
auto chunkNum = currentAddr->nchunks;
for (uint8 i = 0; i < chunkNum; i++) {
off_t seekPos = (off_t)OFFSET_OF_PAGE_COMPRESS_CHUNK(chunkSize, currentAddr->chunknos[i]);
uint8 start = i;
while (i < chunkNum - 1 && currentAddr->chunknos[i + 1] == currentAddr->chunknos[i] + 1) {
i++;
}
if (fseeko(this->pcdFd, seekPos, SEEK_SET) != 0) {
return false;
}
size_t readAmount = chunkSize * (i - start + 1);
if (fread(dst + start * chunkSize, 1, readAmount, this->pcdFd) != readAmount && ferror(this->pcdFd)) {
return false;
}
*dstLen += readAmount;
}
if (chunkNum == 0 || DecompressPage(dst, decompressedBuffer, header->algorithm) == BLCKSZ) {
break;
}
} while (true);
if (PageIs8BXidHeapVersion(dst)) {
byteConvert = ((HeapPageCompressData *)dst)->byte_convert;
diffConvert = ((HeapPageCompressData *)dst)->diff_convert;
} else {
byteConvert = ((PageCompressData *)dst)->byte_convert;
diffConvert = ((PageCompressData *)dst)->diff_convert;
}
this->blockNumber = blockNumber;
return true;
}
bool OpenGaussCompression::WriteBackCompressedData(char *source, size_t sourceLen, BlockNumber blockNumber)
{
auto currentAddr = GET_PAGE_COMPRESS_ADDR(header, chunkSize, blockNumber);
for (size_t i = 0; i < currentAddr->nchunks; ++i) {
off_t seekPos = (off_t)OFFSET_OF_PAGE_COMPRESS_CHUNK(chunkSize, currentAddr->chunknos[i]);
if (fseeko(this->pcdFd, seekPos, SEEK_SET) != 0) {
return false;
}
Assert(sourceLen >= i * chunkSize);
auto writeCount = fwrite(source + i * chunkSize, 1, chunkSize, this->pcdFd);
bool success = chunkSize == writeCount;
if (!success) {
return false;
}
}
fflush(this->pcdFd);
return true;
}
void OpenGaussCompression::MarkUncompressedDirty()
{
constexpr int writeLen = BLCKSZ / 2;
unsigned char fill_byte[writeLen] = {0xFF};
for (int i = 0; i < writeLen; i++)
fill_byte[i] = 0xFF;
auto rc = memcpy_s(decompressedBuffer + writeLen, BLCKSZ - writeLen, fill_byte, writeLen);
securec_check(rc, "", "");
}
BlockNumber OpenGaussCompression::GetMaxBlockNumber()
{
return (BlockNumber)pg_atomic_read_u32(&header->nblocks);
}
char *OpenGaussCompression::GetPcdFilePath()
{
return this->pcdFilePath;
}
char *OpenGaussCompression::GetDecompressedPage()
{
return this->decompressedBuffer;
}
bool OpenGaussCompression::WriteBackUncompressedData()
{
auto algorithm = header->algorithm;
auto workBufferSize = CompressPageBufferBound(decompressedBuffer, algorithm);
if (workBufferSize < 0) {
return false;
}
char *work_buffer = (char *)malloc(workBufferSize);
RelFileCompressOption relFileCompressOption;
relFileCompressOption.compressPreallocChunks = 0;
relFileCompressOption.compressLevelSymbol = true;
relFileCompressOption.compressLevel = 1;
relFileCompressOption.compressAlgorithm = algorithm;
relFileCompressOption.byteConvert = byteConvert;
relFileCompressOption.diffConvert = diffConvert;
auto compress_buffer_size = CompressPage(decompressedBuffer, work_buffer, workBufferSize, relFileCompressOption);
if (compress_buffer_size < 0) {
return false;
}
uint8 nchunks = (compress_buffer_size - 1) / chunkSize + 1;
auto bufferSize = chunkSize * nchunks;
if (bufferSize >= BLCKSZ) {
/* store original page if can not save space? */
free(work_buffer);
work_buffer = (char *)decompressedBuffer;
nchunks = BLCKSZ / chunkSize;
} else {
/* fill zero in the last chunk */
if (compress_buffer_size < bufferSize) {
auto leftSize = bufferSize - compress_buffer_size;
errno_t rc = memset_s(work_buffer + compress_buffer_size, leftSize, 0, leftSize);
securec_check(rc, "", "");
}
}
uint8 need_chunks = nchunks;
PageCompressAddr *pcAddr = GET_PAGE_COMPRESS_ADDR(header, chunkSize, blockNumber);
if (pcAddr->allocated_chunks < need_chunks) {
auto chunkno = pg_atomic_fetch_add_u32(&header->allocated_chunks, need_chunks - pcAddr->allocated_chunks);
for (uint8 i = pcAddr->allocated_chunks; i < need_chunks; ++i) {
pcAddr->chunknos[i] = ++chunkno;
}
pcAddr->allocated_chunks = need_chunks;
pcAddr->nchunks = need_chunks;
}
return this->WriteBackCompressedData(work_buffer, compress_buffer_size, blockNumber);
}
#include "compression_algorithm.ini"

View File

@ -0,0 +1,40 @@
#ifndef OPENGAUSS_SERVER_OPENGAUSSCOMPRESSION_H
#define OPENGAUSS_SERVER_OPENGAUSSCOMPRESSION_H
#define FRONTEND 1
#include <stdio.h>
#include "c.h"
#include "storage/buf/block.h"
#include "storage/page_compression.h"
class OpenGaussCompression {
private:
FILE* pcaFd = nullptr;
FILE* pcdFd = nullptr;
char pcaFilePath[MAXPGPATH];
char pcdFilePath[MAXPGPATH];
PageCompressHeader* header = nullptr;
private:
int segmentNo;
BlockNumber blockNumber;
decltype(PageCompressHeader::chunk_size) chunkSize;
char decompressedBuffer[BLCKSZ];
bool byteConvert;
bool diffConvert;
public:
void SetFilePath(const char* filePath, int segNo);
virtual ~OpenGaussCompression();
bool TryOpen();
bool ReadChunkOfBlock(char* dst, size_t* dstLen, BlockNumber blockNumber);
bool WriteBackCompressedData(char* source, size_t sourceLen, BlockNumber blockNumber);
bool WriteBackUncompressedData();
void MarkUncompressedDirty();
BlockNumber GetMaxBlockNumber();
char* GetPcdFilePath();
char* GetDecompressedPage();
};
#endif // OPENGAUSS_SERVER_OPENGAUSSCOMPRESSION_H

View File

@ -90,6 +90,9 @@
#include "tsdb/utils/constant_def.h"
#endif
#include "openGaussCompression.h"
/* Max number of pg_class oid, currently about 4000 */
#define MAX_PG_CLASS_ID 10000
/* Number of pg_class types */
@ -129,6 +132,7 @@ static const char* PgHeapRelName[] = {"pg_class",
"pg_am",
"pg_statistic",
"pg_toast"};
typedef enum SegmentType { SEG_HEAP, SEG_FSM, SEG_UHEAP, SEG_INDEX_BTREE, SEG_UNDO, SEG_UNKNOWN } SegmentType;
static void ParsePgClassTupleData(binary tupdata, int len, binary nullBitmap, int natrrs);
static void ParsePgIndexTupleData(binary tupdata, int len, binary nullBitmap, int nattrs);
@ -146,6 +150,8 @@ static void ParseToastTupleData(binary tupdata, int len, binary nullBitmap, int
static void ParseTDSlot(const char *page);
static void ParseToastIndexTupleData(binary tupdata, int len, binary nullBitmap, int nattrs);
static int parse_uncompressed_page_file(const char *filename, SegmentType type, const uint32 start_point,
const uint32 number_read);
static ParseHeapTupleData PgHeapRelTupleParser[] = {
ParsePgClassTupleData, // pg_class
@ -894,8 +900,6 @@ static const char* HACKINGTYPE[] = {"heap",
"segment"
};
typedef enum SegmentType { SEG_HEAP, SEG_FSM, SEG_UHEAP, SEG_INDEX_BTREE, SEG_UNDO, SEG_UNKNOWN } SegmentType;
const char* PageTypeNames[] = {"DATA", "FSM", "VM"};
#define GETHEAPSTRUCT(TUP) ((unsigned char*)(TUP) + (TUP)->t_hoff)
@ -3093,7 +3097,78 @@ static int parse_a_page(const char* buffer, int blkno, int blknum, SegmentType t
return true;
}
static BlockNumber CalculateMaxBlockNumber(BlockNumber blknum, BlockNumber start, BlockNumber number)
{
/* parse */
if (start >= blknum) {
fprintf(stderr, "start point exceeds the total block number of relation.\n");
return InvalidBlockNumber;
} else if ((start + number) > blknum) {
fprintf(stderr, "don't have %d blocks from block %d in the relation, only %d blocks\n", number, start,
(blknum - start));
number = blknum;
} else if (number == 0) {
number = blknum;
} else {
number += start;
}
return number;
}
static int parse_page_file(const char* filename, SegmentType type, const uint32 start_point, const uint32 number_read)
{
if (type != SEG_HEAP && type != SEG_INDEX_BTREE) {
return parse_uncompressed_page_file(filename, type, start_point, number_read);
}
auto openGaussCompression = new OpenGaussCompression();
openGaussCompression->SetFilePath(filename, SegNo);
bool success = openGaussCompression->TryOpen();
if (!success) {
delete openGaussCompression;
return parse_uncompressed_page_file(filename, type, start_point, number_read);
}
BlockNumber start = start_point;
BlockNumber blknum = openGaussCompression->GetMaxBlockNumber();
BlockNumber number = CalculateMaxBlockNumber(blknum, start, number_read);
if (number == InvalidBlockNumber) {
delete openGaussCompression;
return false;
}
char compressed[BLCKSZ];
size_t compressedLen;
while (start < number) {
if (!openGaussCompression->ReadChunkOfBlock(compressed, &compressedLen, start)) {
fprintf(stderr, "read block %d failed, filename: %s: %s\n", start, openGaussCompression->GetPcdFilePath(),
strerror(errno));
delete openGaussCompression;
return false;
}
if (!parse_a_page(openGaussCompression->GetDecompressedPage(), start, blknum, type)) {
fprintf(stderr, "Error during parsing block %d/%d\n", start, blknum);
delete openGaussCompression;
return false;
}
if ((write_back && num_item) || dirty_page) {
if (dirty_page) {
openGaussCompression->MarkUncompressedDirty();
}
if (!openGaussCompression->WriteBackUncompressedData()) {
fprintf(stderr, "write back failed, filename: %s: %s\n", openGaussCompression->GetPcdFilePath(),
strerror(errno));
delete openGaussCompression;
return false;
}
}
start++;
}
delete openGaussCompression;
return true;
}
static int parse_uncompressed_page_file(const char *filename, SegmentType type, const uint32 start_point,
const uint32 number_read)
{
char buffer[BLCKSZ];
FILE* fd = NULL;
@ -3121,21 +3196,9 @@ static int parse_page_file(const char* filename, SegmentType type, const uint32
blknum = size / BLCKSZ;
/* parse */
if (start >= blknum) {
fprintf(stderr, "start point exceeds the total block number of relation.\n");
fclose(fd);
number = CalculateMaxBlockNumber(blknum, start, number);
if (number == InvalidBlockNumber) {
return false;
} else if ((start + number) > blknum) {
fprintf(stderr,
"don't have %d blocks from block %d in the relation, only %d blocks\n",
number,
start,
(blknum - start));
number = blknum;
} else if (number == 0) {
number = blknum;
} else {
number += start;
}
Assert((start * BLCKSZ) < size);

View File

@ -717,7 +717,7 @@ else # not PGXS
endif
endif
override CPPFLAGS := $(CPPFLAGS) -I$(LIBODBC_INCLUDE_PATH) -I$(LIBOBS_INCLUDE_PATH) -I$(LIBCGROUP_INCLUDE_PATH) -I$(LIBOPENSSL_INCLUDE_PATH) -I${LIBORC_INCLUDE_PATH} -I${LIBPARQUET_INCLUDE_PATH} -I${PROTOBUF_INCLUDE_PATH} -I${BOOST_INCLUDE_PATH} -I$(LIBLLVM_INCLUDE_PATH) -I$(KERBEROS_INCLUDE_PATH) -I$(CJSON_INCLUDE_PATH) -I$(NUMA_INCLUDE_PATH) -I$(ZLIB_INCLUDE_PATH) -I$(LZ4_INCLUDE_PATH) -I$(LIBCURL_INCLUDE_PATH) -I$(DCF_INCLUDE_PATH)
override CPPFLAGS := $(CPPFLAGS) -I$(LIBODBC_INCLUDE_PATH) -I$(LIBOBS_INCLUDE_PATH) -I$(LIBCGROUP_INCLUDE_PATH) -I$(LIBOPENSSL_INCLUDE_PATH) -I${LIBORC_INCLUDE_PATH} -I${LIBPARQUET_INCLUDE_PATH} -I${PROTOBUF_INCLUDE_PATH} -I${BOOST_INCLUDE_PATH} -I$(LIBLLVM_INCLUDE_PATH) -I$(KERBEROS_INCLUDE_PATH) -I$(CJSON_INCLUDE_PATH) -I$(NUMA_INCLUDE_PATH) -I$(ZLIB_INCLUDE_PATH) -I$(LZ4_INCLUDE_PATH) -I$(LIBCURL_INCLUDE_PATH) -I$(DCF_INCLUDE_PATH) -I$(ZSTD_INCLUDE_PATH)
# GDS links to libevent
ifeq ($(enable_multiple_nodes), yes)
@ -852,6 +852,9 @@ endif
# append zlib for compression: zlib
LDFLAGS += -L$(ZLIB_LIB_PATH) -I$(ZLIB_INCLUDE_PATH)
#append zstd for compression: zstd
LDFLAGS += -L$(ZSTD_LIB_PATH) -I$(ZSTD_INCLUDE_PATH)
LDFLAGS += -L$(SECURE_LIB_PATH)
LDFLAGS += -L$(LIBOPENSSL_LIB_PATH)
LDFLAGS += -L$(LIBSTD_LIB_PATH)

View File

@ -26,7 +26,7 @@ ifneq "$(MAKECMDGOALS)" "clean"
endif
endif
endif
OBJS = file_ops.o datapagemap.o fetch.o filemap.o logging.o parsexlog.o pg_rewind.o
OBJS = file_ops.o datapagemap.o fetch.o filemap.o logging.o parsexlog.o pg_rewind.o compressed_rewind.o
#all:gs_rewind.a

View File

@ -0,0 +1,46 @@
/* -------------------------------------------------------------------------
*
* compressed_common.h
*
* Copyright (c) 2021 Huawei Technologies Co.,Ltd.
*
* -------------------------------------------------------------------------
*/
#ifndef OPENGAUSS_SERVER_COMPRESS_COMPRESSED_COMMON_H
#define OPENGAUSS_SERVER_COMPRESS_COMPRESSED_COMMON_H
#include "utils/atomic.h"
struct RewindCompressInfo {
bool compressed = false; /* compressed table or not */
uint32 oldBlockNumber = 0;
uint32 newBlockNumber = 0;
uint8 algorithm = 0; /* compressed algorithm */
uint16 chunkSize = 0; /* compressed chunk size */
};
struct CompressedPcaInfo {
char *pcaMap = NULL;
int pcaFd = -1;
char path[MAXPGPATH];
int32 chunkSize = 0;
int32 algorithm = 0;
};
#define COPY_REWIND_COMPRESS_INFO(entry, infoPointer, oldBlock, newBlock) \
(entry)->rewindCompressInfo.oldBlockNumber = 0; \
(entry)->rewindCompressInfo.newBlockNumber = 0; \
(entry)->rewindCompressInfo.compressed = false; \
(entry)->rewindCompressInfo.algorithm = 0; \
(entry)->rewindCompressInfo.chunkSize = 0; \
if ((infoPointer) != NULL && (infoPointer)->compressed) { \
(entry)->rewindCompressInfo.oldBlockNumber = (oldBlock); \
(entry)->rewindCompressInfo.newBlockNumber = (newBlock); \
(entry)->rewindCompressInfo.compressed = true; \
(entry)->rewindCompressInfo.algorithm = (infoPointer)->algorithm; \
(entry)->rewindCompressInfo.chunkSize = (infoPointer)->chunkSize; \
}
#endif // OPENGAUSS_SERVER_COMPRESS_COMPRESSED_COMMON_H

View File

@ -0,0 +1,129 @@
/*
* Copyright (c) Huawei Technologies Co., Ltd. 2012-2018. All rights reserved.
*
* openGauss is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
*
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PSL v2 for more details.
* -------------------------------------------------------------------------
*
* compressed_rewind.cpp
* Functions for fetching compressed table.
*
*
* IDENTIFICATION
* ./src/bin/pg_rewind/compressed_rewind.cpp
*
* -------------------------------------------------------------------------
*/
#include "compressed_rewind.h"
#include "libpq/libpq-fe.h"
#include "lib/string.h"
#include "logging.h"
#include "filemap.h"
#include "utils/elog.h"
#include "file_ops.h"
void FormatPathToPca(const char* path, char* dst, size_t len, bool withPrefix)
{
errno_t rc;
if (withPrefix) {
rc = snprintf_s(dst, len, len - 1, "%s/" PCA_SUFFIX, pg_data, path);
} else {
rc = snprintf_s(dst, len, len - 1, PCA_SUFFIX, path);
}
securec_check_ss_c(rc, "\0", "\0");
}
void FormatPathToPcd(const char* path, char* dst, size_t len, bool withPrefix)
{
errno_t rc;
if (withPrefix) {
rc = snprintf_s(dst, len, len - 1, "%s/" PCD_SUFFIX, pg_data, path);
} else {
rc = snprintf_s(dst, len, len - 1, PCD_SUFFIX, path);
}
securec_check_ss_c(rc, "\0", "\0");
}
template <typename T>
bool ReadCompressedInfo(T& t, off_t offset, FILE* file, char* pcaFilePath, size_t len)
{
if (fseeko(file, offset, SEEK_SET) != 0) {
pg_fatal("could not seek in file \"%s\": \"%lu\": %s\n", pcaFilePath, len, strerror(errno));
return false;
}
if (fread(&t, sizeof(t), 1, file) <= 0) {
pg_fatal("could not open file \"%s\": \"%lu\": %s\n", pcaFilePath, len, strerror(errno));
return false;
}
return true;
}
/**
* write RewindCompressInfo
* @param file file fp
* @param pcaFilePath file path,for ereport
* @param rewindCompressInfo pointer of return
* @return sucesss or not
*/
static bool ReadRewindCompressedInfo(FILE* file, char* pcaFilePath, size_t len, RewindCompressInfo* rewindCompressInfo)
{
off_t offset = (off_t)offsetof(PageCompressHeader, chunk_size);
if (!ReadCompressedInfo(rewindCompressInfo->chunkSize, offset, file, pcaFilePath, len)) {
return false;
}
offset = (off_t)offsetof(PageCompressHeader, algorithm);
if (!ReadCompressedInfo(rewindCompressInfo->algorithm, offset, file, pcaFilePath, len)) {
return false;
}
offset = (off_t)offsetof(PageCompressHeader, nblocks);
if (!ReadCompressedInfo(rewindCompressInfo->oldBlockNumber, offset, file, pcaFilePath, len)) {
return false;
}
rewindCompressInfo->compressed = true;
return true;
}
bool FetchSourcePca(const char* strValue, RewindCompressInfo* rewindCompressInfo)
{
size_t length = 0;
PageCompressHeader* ptr = (PageCompressHeader*)PQunescapeBytea((const unsigned char*)strValue, &length);
rewindCompressInfo->compressed = false;
if (length == sizeof(PageCompressHeader)) {
rewindCompressInfo->compressed = true;
rewindCompressInfo->algorithm = ptr->algorithm;
rewindCompressInfo->newBlockNumber = ptr->nblocks;
rewindCompressInfo->oldBlockNumber = 0;
rewindCompressInfo->chunkSize = ptr->chunk_size;
}
PQfreemem(ptr);
return rewindCompressInfo->compressed;
}
bool ProcessLocalPca(const char* tablePath, RewindCompressInfo* rewindCompressInfo)
{
rewindCompressInfo->compressed = false;
if (!isRelDataFile(tablePath)) {
return false;
}
char pcaFilePath[MAXPGPATH];
FormatPathToPca(tablePath, pcaFilePath, MAXPGPATH, true);
FILE* file = fopen(pcaFilePath, "rb");
if (file == NULL) {
if (errno == ENOENT) {
return false;
}
pg_fatal("could not open file \"%s\": %s\n", pcaFilePath, strerror(errno));
return false;
}
bool success = ReadRewindCompressedInfo(file, pcaFilePath, MAXPGPATH, rewindCompressInfo);
fclose(file);
return success;
}

View File

@ -0,0 +1,21 @@
/* -------------------------------------------------------------------------
*
* compressed_rewind.h
*
* Copyright (c) 2021 Huawei Technologies Co.,Ltd.
*
* -------------------------------------------------------------------------
*/
#ifndef OPENGAUSS_SERVER_COMPRESS_COMPRESSED_REWIND_H
#define OPENGAUSS_SERVER_COMPRESS_COMPRESSED_REWIND_H
#include "compressed_common.h"
#include "storage/page_compression.h"
#include "storage/smgr/relfilenode.h"
extern bool FetchSourcePca(const char* strValue, RewindCompressInfo* rewindCompressInfo);
extern bool ProcessLocalPca(const char* tablePath, RewindCompressInfo* rewindCompressInfo);
extern void FormatPathToPca(const char* path, char* dst, size_t len, bool withPrefix = false);
extern void FormatPathToPcd(const char* path, char* dst, size_t len, bool withPrefix = false);
#endif // OPENGAUSS_SERVER_COMPRESS_COMPRESSED_REWIND_H

View File

@ -23,6 +23,7 @@
#include "libpq/libpq-fe.h"
#include "libpq/libpq-int.h"
#include "common/fe_memutils.h"
#include "compressed_rewind.h"
#include "catalog/catalog.h"
#include "catalog/pg_type.h"
@ -47,11 +48,11 @@ const uint64 MAX_FILE_SIZE = 0xFFFFFFFF;
#define MAX_PARAM_LEN 1024
static BuildErrorCode receiveFileChunks(const char* sql, FILE* file);
static BuildErrorCode execute_pagemap(datapagemap_t* pagemap, const char* path, FILE* file);
static BuildErrorCode execute_pagemap(file_entry_t* entry, FILE* file);
static char* run_simple_query(const char* sql);
static BuildErrorCode recurse_dir(const char* datadir, const char* path, process_file_callback_t callback);
static void get_slot_name_by_app_name(void);
static BuildErrorCode CheckResultSet(PGresult* pgResult);
BuildErrorCode libpqConnect(const char* connstr)
{
PGresult* res = NULL;
@ -246,10 +247,22 @@ BuildErrorCode fetchSourceFileList()
* general, so if the admin has put any custom symbolic links in the data
* directory, they won't be copied correctly.
*/
sql = "SELECT path, size, isdir, pg_tablespace_location(pg_tablespace.oid) AS link_target \n"
/* skip pca/pcd files and concat pca with table file */
sql = "WITH tmp_table AS (\n"
"SELECT path, size, isdir, pg_tablespace_location(pg_tablespace.oid) AS link_target \n"
"FROM (SELECT * FROM pg_stat_file_recursive('.')) AS files \n"
"LEFT OUTER JOIN pg_tablespace ON files.path like 'pg_tblspc/%' AND oid::text = files.filename\n";
res = PQexec(conn, sql);
"LEFT OUTER JOIN pg_tablespace ON files.path ~ '^pg_tblspc/' AND oid :: text = files.filename\n"
"),compressed_address AS (SELECT path pca_path, substr(path, 0, length(path) - 4) AS table_path\n"
"FROM pg_stat_file_recursive('.') WHERE path ~ '_pca$' AND length(path) > 4)\n"
"SELECT path, size, isdir, link_target,\n"
"CASE WHEN pca_path IS NOT NULL THEN pg_read_binary_file(pca_path, 0, %d, true)\n"
"ELSE NULL END AS pchdr\n"
"FROM tmp_table LEFT JOIN compressed_address\n"
"ON tmp_table.path = compressed_address.table_path\nWHERE path !~ '_pca$' AND path !~ '_pcd$'\n";
char sqlbuf[1024];
int rc = snprintf_s(sqlbuf, sizeof(sqlbuf), sizeof(sqlbuf) - 1, sql, SIZE_OF_PAGE_COMPRESS_HEADER_DATA);
securec_check_ss_c(rc, "\0", "\0");
res = PQexec(conn, (const char*)sqlbuf);
if (PQresultStatus(res) != PGRES_TUPLES_OK) {
pg_log(PG_ERROR, "could not fetch file list: %s", PQresultErrorMessage(res));
@ -257,7 +270,7 @@ BuildErrorCode fetchSourceFileList()
}
/* sanity check the result set */
if (PQnfields(res) != 4) {
if (PQnfields(res) != 5) {
pg_fatal("unexpected result set while fetching file list\n");
PG_CHECKBUILD_AND_FREE_PGRESULT_RETURN(res);
}
@ -300,7 +313,13 @@ BuildErrorCode fetchSourceFileList()
}
}
}
process_source_file(path, type, filesize, link_target);
RewindCompressInfo rewindCompressInfo;
RewindCompressInfo *pointer = NULL;
if (!PQgetisnull(res, i, 4) && FetchSourcePca(PQgetvalue(res, i, 4), &rewindCompressInfo)) {
filesize = rewindCompressInfo.newBlockNumber * BLCKSZ;
pointer = &rewindCompressInfo;
}
process_source_file(path, type, filesize, link_target, pointer);
PG_CHECKBUILD_AND_FREE_PGRESULT_RETURN(res);
}
PQclear(res);
@ -356,7 +375,7 @@ static BuildErrorCode receiveFileChunks(const char* sql, FILE* file)
}
/* sanity check the result set */
if (PQnfields(res) != 4 || PQntuples(res) != 1) {
if (PQnfields(res) != 7 || PQntuples(res) != 1) {
pg_fatal("unexpected result set size while fetching remote files\n");
PG_CHECKBUILD_AND_FREE_PGRESULT_RETURN(res);
}
@ -385,6 +404,8 @@ static BuildErrorCode receiveFileChunks(const char* sql, FILE* file)
pg_fatal("unexpected result length while fetching remote files\n");
PG_CHECKBUILD_AND_FREE_PGRESULT_RETURN(res);
}
/* check compressed result set */
CheckResultSet(res);
/* Read result set to local variables */
errorno = memcpy_s(&chunkoff, sizeof(int32), PQgetvalue(res, 0, 1), sizeof(int32));
@ -420,15 +441,37 @@ static BuildErrorCode receiveFileChunks(const char* sql, FILE* file)
continue;
}
pg_log(PG_DEBUG, "received chunk for file \"%s\", offset %d, size %d\n",
filename, chunkoff, chunksize);
fprintf(file, "received chunk for file \"%s\", offset %d, size %d\n",
filename, chunkoff, chunksize);
int32 algorithm;
errorno = memcpy_s(&algorithm, sizeof(int32), PQgetvalue(res, 0, 4), sizeof(int32));
securec_check_c(errorno, "\0", "\0");
algorithm = ntohl(algorithm);
if (algorithm == 0) {
pg_log(PG_DEBUG, "received chunk for file \"%s\", offset %d, size %d\n", filename, chunkoff, chunksize);
fprintf(file, "received chunk for file \"%s\", offset %d, size %d\n", filename, chunkoff, chunksize);
open_target_file(filename, false);
pg_free(filename);
PG_CHECKBUILD_AND_FREE_PGRESULT_RETURN(res);
write_target_range(chunk, chunkoff, chunksize, chunkspace);
} else {
int32 chunkSize;
int errorno = memcpy_s(&chunkSize, sizeof(int32), PQgetvalue(res, 0, 5), sizeof(int32));
securec_check_c(errorno, "\0", "\0");
chunkSize = ntohl(chunkSize);
bool rebuild = *PQgetvalue(res, 0, 6) != 0;
char dst[MAXPGPATH];
/* open pca */
FormatPathToPca(filename, dst, MAXPGPATH, false);
OpenCompressedPcaFile(dst, chunkSize, algorithm, rebuild);
open_target_file(filename, false);
pg_free(filename);
PG_CHECKBUILD_AND_FREE_PGRESULT_RETURN(res);
write_target_range(chunk, chunkoff, chunksize, chunkspace);
/* open pcd */
FormatPathToPcd(filename, dst, MAXPGPATH, false);
open_target_file(dst, false);
BlockNumber blockNumber = chunkoff;
size_t blockSize = chunkspace;
/* fetch result */
FetchCompressedFile(chunk, blockNumber, blockSize);
}
PG_CHECKBUILD_AND_FREE_PGRESULT_RETURN(res);
PQclear(res);
res = NULL;
@ -436,6 +479,32 @@ static BuildErrorCode receiveFileChunks(const char* sql, FILE* file)
return BUILD_SUCCESS;
}
/**
* check result set of compressed tables
* @param pgResult result
* @return success or not
*/
static BuildErrorCode CheckResultSet(PGresult* res)
{
#define PQ_TYPE(index, type) (PQftype(res, (index)) != (type))
if (PQ_TYPE(4, INT4OID) || PQ_TYPE(5, INT4OID) || PQ_TYPE(6, BOOLOID)) {
pg_fatal(
"FetchCompressedFile:unexpected data types: %u %u %u\n", PQftype(res, 4), PQftype(res, 5), PQftype(res, 6));
PG_CHECKBUILD_AND_FREE_PGRESULT_RETURN(res);
}
#define PQ_FORMAT(index) (PQfformat(res, 0) != 1)
if (PQ_FORMAT(4) && PQ_FORMAT(5) && PQ_FORMAT(6)) {
pg_fatal("unexpected result format while fetching remote files\n");
PG_CHECKBUILD_AND_FREE_PGRESULT_RETURN(res);
}
#define PQ_ISNULL(index) (PQgetisnull(res, 0, (index)))
if (PQ_ISNULL(4) || PQ_ISNULL(5) || PQ_ISNULL(6)) {
pg_fatal("unexpected null values in result while fetching remote files\n");
PG_CHECKBUILD_AND_FREE_PGRESULT_RETURN(res);
}
return BUILD_SUCCESS;
}
/*
* Receive a single file as a malloc'd buffer.
*/
@ -489,6 +558,43 @@ error:
return result;
}
static void CompressedFileCopy(const file_entry_t* entry, bool rebuild)
{
Assert(!rebuild || entry->rewindCompressInfo.oldBlockNumber == 0);
if (dry_run) {
return;
}
char linebuf[MAXPGPATH + 47];
int ret = snprintf_s(linebuf,
sizeof(linebuf),
sizeof(linebuf) - 1,
"%s\t%u\t%u\t%u\t%u\t%u\n",
entry->path,
entry->rewindCompressInfo.oldBlockNumber,
entry->rewindCompressInfo.newBlockNumber - entry->rewindCompressInfo.oldBlockNumber,
entry->rewindCompressInfo.algorithm,
entry->rewindCompressInfo.chunkSize,
rebuild);
securec_check_ss_c(ret, "\0", "\0");
if (PQputCopyData(conn, linebuf, strlen(linebuf)) != 1) {
pg_fatal("could not send COPY data: %s", PQerrorMessage(conn));
}
pg_log(PG_PROGRESS, "CompressedFileCopy:%s", linebuf);
}
static void CompressedFileRemove(const file_entry_t* entry)
{
remove_target((file_entry_t*) entry);
char* path = entry->path;
char dst[MAXPGPATH];
FormatPathToPca(path, dst, MAXPGPATH);
remove_target_file(dst, false);
FormatPathToPcd(path, dst, MAXPGPATH);
remove_target_file(dst, false);
pg_log(PG_PROGRESS, "CompressedFileRemove: %s\n", path);
}
/*
* Write a file range to a temporary table in the server.
*
@ -498,7 +604,7 @@ error:
*/
static void fetch_file_range(const char* path, unsigned int begin, unsigned int end)
{
char linebuf[MAXPGPATH + 23];
char linebuf[MAXPGPATH + 47];
int ss_c = 0;
/* Split the range into CHUNKSIZE chunks */
@ -510,12 +616,12 @@ static void fetch_file_range(const char* path, unsigned int begin, unsigned int
} else {
len = end - begin;
}
ss_c = snprintf_s(linebuf, sizeof(linebuf), sizeof(linebuf) - 1, "%s\t%u\t%u\n", path, begin, len);
ss_c = snprintf_s(
linebuf, sizeof(linebuf), sizeof(linebuf) - 1, "%s\t%u\t%u\t%u\t%u\t%u\n", path, begin, len, 0, 0, 0);
securec_check_ss_c(ss_c, "\0", "\0");
if (PQputCopyData(conn, linebuf, strlen(linebuf)) != 1)
pg_fatal("could not send COPY data: %s", PQerrorMessage(conn));
begin += len;
}
}
@ -534,7 +640,8 @@ BuildErrorCode executeFileMap(filemap_t* map, FILE* file)
* First create a temporary table, and load it with the blocks that we
* need to fetch.
*/
sql = "CREATE TEMPORARY TABLE fetchchunks(path text, begin int4, len int4);";
sql = "CREATE TEMPORARY TABLE fetchchunks(path text, begin int4, len int4, "
"algorithm int4, chunksize int4, rebuild bool);";
res = PQexec(conn, sql);
if (PQresultStatus(res) != PGRES_COMMAND_OK) {
pg_fatal("could not create temporary table: %s", PQresultErrorMessage(res));
@ -558,11 +665,16 @@ BuildErrorCode executeFileMap(filemap_t* map, FILE* file)
entry = map->array[i];
/* report all the path to check whether it's correct */
if (entry->rewindCompressInfo.compressed) {
pg_log(PG_PROGRESS, "path: %s, type: %d, action: %d\n", entry->path, entry->type, entry->action);
}
pg_log(PG_DEBUG, "path: %s, type: %d, action: %d\n", entry->path, entry->type, entry->action);
fprintf(file, "path: %s, type: %d, action: %d\n", entry->path, entry->type, entry->action);
/* If this is a relation file, copy the modified blocks */
execute_pagemap(&entry->pagemap, entry->path, file);
bool compressed = entry->rewindCompressInfo.compressed;
execute_pagemap(entry, file);
PG_CHECKBUILD_AND_FREE_PGRESULT_RETURN(res);
switch (entry->action) {
@ -571,29 +683,47 @@ BuildErrorCode executeFileMap(filemap_t* map, FILE* file)
break;
case FILE_ACTION_COPY:
/* Truncate the old file out of the way, if any */
open_target_file(entry->path, true);
PG_CHECKBUILD_AND_FREE_PGRESULT_RETURN(res);
fetch_file_range(entry->path, 0, entry->newsize);
PG_CHECKBUILD_AND_FREE_PGRESULT_RETURN(res);
if (compressed) {
CompressedFileCopy(entry, true);
PG_CHECKBUILD_AND_FREE_PGRESULT_RETURN(res);
} else {
/* Truncate the old file out of the way, if any */
open_target_file(entry->path, true);
PG_CHECKBUILD_AND_FREE_PGRESULT_RETURN(res);
fetch_file_range(entry->path, 0, entry->newsize);
PG_CHECKBUILD_AND_FREE_PGRESULT_RETURN(res);
}
break;
case FILE_ACTION_TRUNCATE:
truncate_target_file(entry->path, entry->newsize);
if (compressed) {
CompressedFileTruncate(entry->path, &entry->rewindCompressInfo);
} else {
truncate_target_file(entry->path, entry->newsize);
}
PG_CHECKBUILD_AND_FREE_PGRESULT_RETURN(res);
break;
case FILE_ACTION_COPY_TAIL:
fetch_file_range(entry->path, entry->oldsize, entry->newsize);
if (compressed) {
CompressedFileCopy(entry, false);
} else {
fetch_file_range(entry->path, entry->oldsize, entry->newsize);
}
PG_CHECKBUILD_AND_FREE_PGRESULT_RETURN(res);
break;
case FILE_ACTION_REMOVE:
remove_target(entry);
if (compressed) {
CompressedFileRemove(entry);
} else {
remove_target(entry);
}
PG_CHECKBUILD_AND_FREE_PGRESULT_RETURN(res);
break;
case FILE_ACTION_CREATE:
Assert(!compressed);
create_target(entry);
PG_CHECKBUILD_AND_FREE_PGRESULT_RETURN(res);
break;
@ -625,9 +755,14 @@ BuildErrorCode executeFileMap(filemap_t* map, FILE* file)
* temporary table. Now, actually fetch all of those ranges.
*/
sql = "SELECT path, begin, \n"
" pg_read_binary_file(path, begin, len, true) AS chunk,\n"
" len \n"
"FROM fetchchunks\n";
" pg_read_binary_file(path, begin, len, true) AS chunk, len, algorithm, chunksize,rebuild \n"
"FROM fetchchunks where algorithm =0 \n"
"union all \n"
"select (json->>'path')::text as path, (json->>'blocknum')::int4 as begin, (json->>'data')::bytea as chunk,\n"
"(json->>'len')::int4 as len, algorithm, chunksize,rebuild \n"
"from (select row_to_json(pg_read_binary_file_blocks(path,begin,len)) json, algorithm, chunksize,rebuild \n"
"from fetchchunks where algorithm !=0) \n"
"order by path, begin;";
fprintf(file, "fetch and write file based on temporary table fetchchunks.\n");
return receiveFileChunks(sql, file);
@ -687,7 +822,7 @@ BuildErrorCode backupFileMap(filemap_t* map)
/* to be supported later */
break;
case FILE_ACTION_COPY:
case FILE_ACTION_COPY: {
/* create fake file for restore when file not exist, otherwise, backup file */
file_entry_t statbuf;
if (targetFilemapSearch(entry->path, &statbuf) < 0) {
@ -696,6 +831,7 @@ BuildErrorCode backupFileMap(filemap_t* map)
backup_target_file(entry->path, divergeXlogFileName);
}
break;
}
case FILE_ACTION_COPY_TAIL:
case FILE_ACTION_TRUNCATE:
@ -719,17 +855,60 @@ BuildErrorCode backupFileMap(filemap_t* map)
return BUILD_SUCCESS;
}
static BuildErrorCode execute_pagemap(datapagemap_t* pagemap, const char* path, FILE* file)
/**
* combine continue blocks numbers and copy file
* @param entry file entry
* @param file file
*/
static void CompressedFileCopy(file_entry_t* entry, FILE* file)
{
datapagemap_t* pagemap = &entry->pagemap;
datapagemap_iterator_t* iter = datapagemap_iterate(pagemap);
BlockNumber blkno;
file_entry_t fileEntry;
fileEntry.path = entry->path;
fileEntry.rewindCompressInfo = entry->rewindCompressInfo;
int invalidNumber = -1;
long int before = invalidNumber;
while (datapagemap_next(iter, &blkno)) {
fprintf(file, " block %u\n", blkno);
if (before == -1) {
fileEntry.rewindCompressInfo.oldBlockNumber = blkno;
before = blkno;
} else {
if (before == blkno - 1) {
before = blkno;
} else {
fileEntry.rewindCompressInfo.newBlockNumber = before + 1;
CompressedFileCopy(&fileEntry, false);
fileEntry.rewindCompressInfo.oldBlockNumber = blkno;
before = blkno;
}
}
}
if (before != invalidNumber) {
fileEntry.rewindCompressInfo.newBlockNumber = before + 1;
CompressedFileCopy(&fileEntry, false);
}
}
static BuildErrorCode execute_pagemap(file_entry_t* entry, FILE* file)
{
datapagemap_iterator_t* iter = NULL;
BlockNumber blkno;
off_t offset;
datapagemap_t* pagemap = &entry->pagemap;
char* path = entry->path;
iter = datapagemap_iterate(pagemap);
while (datapagemap_next(iter, &blkno)) {
fprintf(file, " block %u\n", blkno);
offset = blkno * BLCKSZ;
fetch_file_range(path, offset, offset + BLCKSZ);
if (entry->rewindCompressInfo.compressed) {
CompressedFileCopy(entry, file);
} else {
while (datapagemap_next(iter, &blkno)) {
fprintf(file, " block %u\n", blkno);
offset = blkno * BLCKSZ;
fetch_file_range(path, offset, offset + BLCKSZ);
}
}
pg_free(iter);
return BUILD_SUCCESS;
@ -775,9 +954,19 @@ static BuildErrorCode recurse_dir(const char* datadir, const char* parentpath, p
struct stat fst;
char fullpath[MAXPGPATH];
char path[MAXPGPATH];
const size_t MINPCANAMESIZE = 4;
if (strcmp(xlde->d_name, ".") == 0 || strcmp(xlde->d_name, "..") == 0)
continue;
/* Skip compressed page files */
size_t dirNamePath = strlen(xlde->d_name);
if (dirNamePath >= MINPCANAMESIZE) {
const char* suffix = xlde->d_name + dirNamePath - MINPCANAMESIZE;
if (strncmp(suffix, "_pca", MINPCANAMESIZE) == 0 || strncmp(suffix, "_pcd", MINPCANAMESIZE) == 0) {
continue;
}
}
ss_c = snprintf_s(fullpath, MAXPGPATH, MAXPGPATH - 1, "%s/%s", fullparentpath, xlde->d_name);
securec_check_ss_c(ss_c, "\0", "\0");
@ -808,8 +997,15 @@ static BuildErrorCode recurse_dir(const char* datadir, const char* parentpath, p
continue;
if (S_ISREG(fst.st_mode)) {
if ((uint64)fst.st_size <= MAX_FILE_SIZE) {
callback(path, FILE_TYPE_REGULAR, fst.st_size, NULL);
uint64 fileSize = (uint64)fst.st_size;
RewindCompressInfo rewindCompressInfo;
RewindCompressInfo *pointer = NULL;
if (ProcessLocalPca(path, &rewindCompressInfo)) {
fileSize = rewindCompressInfo.oldBlockNumber * BLCKSZ;
pointer = &rewindCompressInfo;
}
if (fileSize <= MAX_FILE_SIZE) {
callback(path, FILE_TYPE_REGULAR, fileSize, NULL, pointer);
if (increment_return_code != BUILD_SUCCESS) {
(void)closedir(xldir);
}
@ -818,7 +1014,7 @@ static BuildErrorCode recurse_dir(const char* datadir, const char* parentpath, p
pg_log(PG_WARNING, "file size of \"%s\" is over %ld\n", fullpath, MAX_FILE_SIZE);
}
} else if (S_ISDIR(fst.st_mode)) {
callback(path, FILE_TYPE_DIRECTORY, 0, NULL);
callback(path, FILE_TYPE_DIRECTORY, 0, NULL, NULL);
if (increment_return_code != BUILD_SUCCESS) {
(void)closedir(xldir);
}
@ -843,7 +1039,7 @@ static BuildErrorCode recurse_dir(const char* datadir, const char* parentpath, p
}
link_target[len] = '\0';
callback(path, FILE_TYPE_SYMLINK, 0, link_target);
callback(path, FILE_TYPE_SYMLINK, 0, link_target, NULL);
/*
* If it's a symlink within pg_tblspc, we need to recurse into it,

View File

@ -42,7 +42,9 @@ extern XLogRecPtr libpqGetCurrentXlogInsertLocation(void);
extern void libpqRequestCheckpoint(void);
typedef void (*process_file_callback_t)(const char* path, file_type_t type, size_t size, const char* link_target);
typedef void (*process_file_callback_t)(const char* path, file_type_t type, size_t oldsize, const char* link_target,
const RewindCompressInfo* rewindCompressInfo);
extern BuildErrorCode traverse_datadir(const char* datadir, process_file_callback_t callback);
extern void get_source_slotname(void);

View File

@ -25,6 +25,8 @@
#include "common/fe_memutils.h"
#include "common/build_query/build_query.h"
#include "compressed_rewind.h"
#include "storage/page_compression_impl.h"
#include "replication/replicainternal.h"
#define BLOCKSIZE (8 * 1024)
@ -36,6 +38,8 @@ static int dstfd = -1;
static char dstpath[MAXPGPATH] = "";
static bool g_isRelDataFile = false;
static CompressedPcaInfo g_compressedPcaInfo;
static void create_target_dir(const char* path);
static void remove_target_dir(const char* path);
static void create_target_symlink(const char* path, const char* slink);
@ -101,7 +105,7 @@ void close_target_file(void)
dstfd = -1;
}
void write_target_range(char* buf, off_t begin, size_t size, int space)
void write_target_range(char* buf, off_t begin, size_t size, int space, bool compressed)
{
int writeleft;
char* p = NULL;
@ -112,7 +116,7 @@ void write_target_range(char* buf, off_t begin, size_t size, int space)
if (dry_run)
return;
if (begin % BLOCKSIZE != 0) {
if (!compressed && begin % BLOCKSIZE != 0) {
(void)close(dstfd);
dstfd = -1;
pg_fatal("seek position %ld in target file \"%s\" is not in BLOCKSIZEs\n", size, dstpath);
@ -1221,3 +1225,142 @@ bool tablespaceDataIsValid(const char* path)
return true;
}
void CompressedFileTruncate(const char *path, const RewindCompressInfo *rewindCompressInfo)
{
if (dry_run) {
return;
}
uint16 chunkSize = rewindCompressInfo->chunkSize;
BlockNumber oldBlockNumber = rewindCompressInfo->oldBlockNumber;
BlockNumber newBlockNumber = rewindCompressInfo->newBlockNumber;
Assert(oldBlockNumber > newBlockNumber);
char pcaPath[MAXPGPATH];
FormatPathToPca(path, pcaPath, MAXPGPATH, true);
int pcaFd = open(pcaPath, O_RDWR | PG_BINARY, 0600);
if (pcaFd < 0) {
pg_fatal("CompressedFileTruncate: could not open file \"%s\": %s\n", pcaPath, strerror(errno));
return;
}
PageCompressHeader* map = pc_mmap(pcaFd, chunkSize, false);
if (map == MAP_FAILED) {
pg_fatal("CompressedFileTruncate: Failed to mmap file \"%s\": %s\n", pcaPath, strerror(errno));
return;
}
/* write zero to truncated addr */
for (BlockNumber blockNumber = newBlockNumber; blockNumber < oldBlockNumber; ++blockNumber) {
PageCompressAddr* addr = GET_PAGE_COMPRESS_ADDR(map, chunkSize, blockNumber);
for (size_t i = 0; i < addr->allocated_chunks; ++i) {
addr->chunknos[i] = 0;
}
addr->nchunks = 0;
addr->allocated_chunks = 0;
addr->checksum = 0;
}
map->last_synced_nblocks = map->nblocks = newBlockNumber;
/* find the max used chunk number */
pc_chunk_number_t beforeUsedChunks = map->allocated_chunks;
pc_chunk_number_t max_used_chunkno = 0;
for (BlockNumber blockNumber = 0; blockNumber < newBlockNumber; ++blockNumber) {
PageCompressAddr* addr = GET_PAGE_COMPRESS_ADDR(map, chunkSize, blockNumber);
for (uint8 i = 0; i < addr->allocated_chunks; i++) {
if (addr->chunknos[i] > max_used_chunkno) {
max_used_chunkno = addr->chunknos[i];
}
}
}
map->allocated_chunks = map->last_synced_allocated_chunks = max_used_chunkno;
/* truncate pcd qfile */
if (beforeUsedChunks > max_used_chunkno) {
char pcdPath[MAXPGPATH];
FormatPathToPcd(path, pcdPath, MAXPGPATH, false);
truncate_target_file(pcdPath, max_used_chunkno * chunkSize);
}
pc_munmap(map);
pg_log(PG_PROGRESS, "CompressedFileTruncate: %s\n", path);
}
void OpenCompressedPcaFile(const char* fileName, int32 chunkSize, int32 algorithm, bool rebuild)
{
if (dry_run) {
return;
}
if (g_compressedPcaInfo.pcaFd != -1 && strcmp(fileName, &g_compressedPcaInfo.path[strlen(pg_data) + 1]) == 0) {
/* already open */
return;
}
CloseCompressedPcaFile();
int rc = snprintf_s(g_compressedPcaInfo.path, sizeof(g_compressedPcaInfo.path),
sizeof(g_compressedPcaInfo.path) - 1,
"%s/%s", pg_data, fileName);
securec_check_ss_c(rc, "\0", "\0");
int mode = O_RDWR | PG_BINARY;
mode = rebuild ? (mode | O_TRUNC | O_CREAT) : mode;
g_compressedPcaInfo.pcaFd = open(g_compressedPcaInfo.path, mode, S_IRUSR | S_IWUSR);
if (g_compressedPcaInfo.pcaFd < 0) {
pg_fatal("could not open compressed pca file \"%s\": %s\n", g_compressedPcaInfo.path, strerror(errno));
return;
}
g_compressedPcaInfo.algorithm = algorithm;
g_compressedPcaInfo.chunkSize = chunkSize;
g_compressedPcaInfo.pcaMap = (char*) pc_mmap(g_compressedPcaInfo.pcaFd, chunkSize, false);
if ((void*)g_compressedPcaInfo.pcaMap == MAP_FAILED) {
pg_fatal("OpenCompressedPcaFile: Failed to mmap file \"%s\": %s\n", g_compressedPcaInfo.path, strerror(errno));
return;
}
}
void CloseCompressedPcaFile()
{
if (g_compressedPcaInfo.pcaFd == -1) {
return;
}
pc_munmap((PageCompressHeader*)g_compressedPcaInfo.pcaMap);
if (close(g_compressedPcaInfo.pcaFd) != 0) {
pg_fatal("could not close target file \"%s\": %s\n", g_compressedPcaInfo.path, gs_strerror(errno));
}
g_compressedPcaInfo.pcaFd = -1;
g_compressedPcaInfo.pcaMap = NULL;
g_compressedPcaInfo.chunkSize = 0;
g_compressedPcaInfo.algorithm = 0;
}
void FetchCompressedFile(char* buf, BlockNumber blockNumber, int32 size)
{
int32 chunkSize = g_compressedPcaInfo.chunkSize;
int needChunks = size / chunkSize;
PageCompressHeader* pcMap = (PageCompressHeader*) g_compressedPcaInfo.pcaMap;
PageCompressAddr* pcAddr = GET_PAGE_COMPRESS_ADDR(pcMap, chunkSize, blockNumber);
// 2. allocate chunks
if (pcAddr->allocated_chunks < needChunks) {
auto chunkno = pg_atomic_fetch_add_u32(&pcMap->allocated_chunks, needChunks - pcAddr->allocated_chunks);
for (int i = pcAddr->allocated_chunks; i < needChunks; i++) {
pcAddr->chunknos[i] = ++chunkno;
}
pcAddr->allocated_chunks = needChunks;
}
for (int32 i = 0; i < needChunks; ++i) {
auto buffer_pos = buf + chunkSize * i;
off_t seekpos = (off_t) OFFSET_OF_PAGE_COMPRESS_CHUNK(chunkSize, pcAddr->chunknos[i]);
int32 start = i;
while (i < needChunks - 1 && pcAddr->chunknos[i + 1] == pcAddr->chunknos[i] + 1) {
i++;
}
int write_amount = chunkSize * (i - start + 1);
// open file dstfd
write_target_range(buffer_pos, seekpos, write_amount, 0, true);
}
pcAddr->nchunks = pcAddr->allocated_chunks;
pcAddr->checksum = AddrChecksum32(blockNumber, pcAddr);
}

View File

@ -11,10 +11,11 @@
#define FILE_OPS_H
#include "filemap.h"
#include "compressed_common.h"
extern char* pg_data;
extern void open_target_file(const char* path, bool trunc);
extern void write_target_range(char* buf, off_t begin, size_t size, int space);
extern void write_target_range(char* buf, off_t begin, size_t size, int space, bool compressed = false);
extern void close_target_file(void);
extern void truncate_target_file(const char* path, off_t newsize);
extern void create_target(file_entry_t* t);
@ -40,6 +41,9 @@ extern bool restore_target_dir(const char* datadir_target, bool remove_from);
extern void delete_target_file(const char* file);
extern bool isPathInFilemap(const char* path);
extern bool tablespaceDataIsValid(const char* path);
extern void CompressedFileTruncate(const char* path, const RewindCompressInfo* rewindCompressInfo);
void FetchCompressedFile(char* buf, BlockNumber begin, int32 size);
void OpenCompressedPcaFile(const char* fileName, int32 chunkSize, int32 algorithm, bool rebuild);
void CloseCompressedPcaFile();
#endif /* FILE_OPS_H */

View File

@ -19,6 +19,7 @@
#include "catalog/catalog.h"
#include "catalog/pg_tablespace.h"
#include "common/fe_memutils.h"
#include "compressed_rewind.h"
#include "storage/cu.h"
#include "storage/smgr/fd.h"
@ -127,7 +128,8 @@ void filemapInit(void)
filemaptarget = filemap_create();
}
void processTargetFileMap(const char* path, file_type_t type, size_t oldsize, const char* link_target)
void processTargetFileMap(const char* path, file_type_t type, size_t oldsize, const char* link_target,
const RewindCompressInfo* info)
{
file_entry_t* entry = NULL;
filemap_t* map = filemaptarget;
@ -143,6 +145,8 @@ void processTargetFileMap(const char* path, file_type_t type, size_t oldsize, co
entry->pagemap.bitmap = NULL;
entry->pagemap.bitmapsize = 0;
COPY_REWIND_COMPRESS_INFO(entry, info, info == NULL ? 0 : info->oldBlockNumber, 0)
if (map->last != NULL) {
map->last->next = entry;
map->last = entry;
@ -211,7 +215,7 @@ BuildErrorCode targetFilemapProcess(void)
filemap_t* map = filemaptarget;
for (i = 0; i < map->narray; i++) {
entry = map->array[i];
process_target_file(entry->path, entry->type, entry->oldsize, entry->link_target);
process_target_file(entry->path, entry->type, entry->oldsize, entry->link_target, &entry->rewindCompressInfo);
}
return BUILD_SUCCESS;
}
@ -322,7 +326,8 @@ static bool process_source_file_sanity_check(const char* path, file_type_t type)
* action needs to be taken for the file, depending on whether the file
* exists in the target and whether the size matches.
*/
void process_source_file(const char* path, file_type_t type, size_t newsize, const char* link_target)
void process_source_file(const char* path, file_type_t type, size_t newsize, const char* link_target,
RewindCompressInfo* info)
{
bool exists = false;
char localpath[MAXPGPATH];
@ -330,6 +335,7 @@ void process_source_file(const char* path, file_type_t type, size_t newsize, con
filemap_t* map = filemap;
file_action_t action = FILE_ACTION_NONE;
size_t oldsize = 0;
BlockNumber oldBlockNumber = 0;
file_entry_t* entry = NULL;
int ss_c = 0;
bool isreldatafile = false;
@ -480,7 +486,21 @@ void process_source_file(const char* path, file_type_t type, size_t newsize, con
* replayed.
*/
/* mod blocksize 8k to avoid half page write */
oldsize = statbuf.oldsize;
RewindCompressInfo oldRewindCompressInfo;
bool sourceCompressed = info != NULL;
bool targetCompressed = ProcessLocalPca(path, &oldRewindCompressInfo);
if (sourceCompressed && !targetCompressed) {
info->compressed = false;
action = FILE_ACTION_REMOVE;
break;
} else if (!sourceCompressed && targetCompressed) {
info = &oldRewindCompressInfo;
action = FILE_ACTION_REMOVE;
break;
} else if (sourceCompressed && targetCompressed) {
oldBlockNumber = oldRewindCompressInfo.oldBlockNumber;
oldsize = oldBlockNumber * BLCKSZ;
}
if (oldsize % BLOCKSIZE != 0) {
oldsize = oldsize - (oldsize % BLOCKSIZE);
pg_log(PG_PROGRESS, "target file size mod BLOCKSIZE not equal 0 %s %ld \n", path, statbuf.oldsize);
@ -511,6 +531,8 @@ void process_source_file(const char* path, file_type_t type, size_t newsize, con
entry->pagemap.bitmapsize = 0;
entry->isrelfile = isreldatafile;
COPY_REWIND_COMPRESS_INFO(entry, info, oldBlockNumber, info == NULL ? 0 : info->newBlockNumber)
if (map->last != NULL) {
map->last->next = entry;
map->last = entry;
@ -526,7 +548,8 @@ void process_source_file(const char* path, file_type_t type, size_t newsize, con
* marks target data directory's files that didn't exist in the source for
* deletion.
*/
void process_target_file(const char* path, file_type_t type, size_t oldsize, const char* link_target)
void process_target_file(const char* path, file_type_t type, size_t oldsize, const char* link_target,
const RewindCompressInfo* info)
{
bool exists = false;
file_entry_t key;
@ -555,7 +578,7 @@ void process_target_file(const char* path, file_type_t type, size_t oldsize, con
*/
for (int excludeIdx = 0; excludeFiles[excludeIdx] != NULL; excludeIdx++) {
if (strstr(path, excludeFiles[excludeIdx]) != NULL) {
pg_log(PG_DEBUG, "entry \"%s\" excluded from target file list", path);
pg_log(PG_DEBUG, "entry \"%s\" excluded from target file list\n", path);
return;
}
}
@ -607,6 +630,8 @@ void process_target_file(const char* path, file_type_t type, size_t oldsize, con
entry->pagemap.bitmapsize = 0;
entry->isrelfile = isRelDataFile(path);
COPY_REWIND_COMPRESS_INFO(entry, info, info == NULL ? 0 : info->oldBlockNumber, 0)
if (map->last == NULL)
map->first = entry;
else
@ -769,7 +794,8 @@ void process_waldata_change(
entry->pagemap.bitmap = NULL;
entry->pagemap.bitmapsize = 0;
entry->isrelfile = isRelDataFile(path);
RewindCompressInfo *rewindCompressInfo = NULL;
COPY_REWIND_COMPRESS_INFO(entry, rewindCompressInfo, 0, 0)
if (map->last != NULL) {
map->last->next = entry;
map->last = entry;

View File

@ -8,6 +8,7 @@
#ifndef FILEMAP_H
#define FILEMAP_H
#include "compressed_common.h"
#include "storage/smgr/relfilenode.h"
#include "storage/buf/block.h"
@ -42,6 +43,9 @@ typedef struct file_entry_t {
file_action_t action;
/* for compressed table */
RewindCompressInfo rewindCompressInfo;
/* for a regular file */
size_t oldsize;
size_t newsize;
@ -96,8 +100,10 @@ extern void print_filemap(void);
extern void print_filemap_to_file(FILE* file);
/* Functions for populating the filemap */
extern void process_source_file(const char* path, file_type_t type, size_t newsize, const char* link_target);
extern void process_target_file(const char* path, file_type_t type, size_t newsize, const char* link_target);
extern void process_source_file(const char* path, file_type_t type, size_t newsize, const char* link_target,
RewindCompressInfo* rewindCompressInfo = nullptr);
extern void process_target_file(const char* path, file_type_t type, size_t newsize, const char* link_target,
const RewindCompressInfo* rewindCompressInfo = nullptr);
extern void process_block_change(ForkNumber forknum, RelFileNode rnode, BlockNumber blkno);
extern void process_waldata_change(
ForkNumber forknum, RelFileNode rnode, StorageEngine store, off_t file_offset, size_t data_size);

View File

@ -161,7 +161,7 @@ BuildErrorCode findCommonCheckpoint(const char* datadir, TimeLineID tli, XLogRec
pg_fatal("find max lsn fail, errmsg:%s\n", returnmsg);
return BUILD_FATAL;
}
pg_log(PG_PROGRESS, "find max lsn success, %s\n", returnmsg);
pg_log(PG_PROGRESS, "find max lsn success, %s", returnmsg);
readprivate.datadir = datadir;
readprivate.tli = tli;

View File

@ -3416,8 +3416,9 @@
AddBuiltinFunc(_0(3470), _1("gs_password_notifytime"), _2(0), _3(true), _4(false), _5(gs_password_notifytime), _6(23), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('i'), _19(0), _20(0), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("gs_password_notifytime"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33(NULL), _34('f'), _35(NULL), _36(0), _37(false))
),
AddFuncGroup(
"gs_read_block_from_remote", 1,
AddBuiltinFunc(_0(4767), _1("gs_read_block_from_remote"), _2(9), _3(true), _4(false), _5(gs_read_block_from_remote), _6(17), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(9, 23, 23, 23, 21, 23, 28, 23, 28, 16), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("gs_read_block_from_remote"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33(NULL), _34('f'), _35(NULL), _36(0), _37(false))
"gs_read_block_from_remote", 2,
AddBuiltinFunc(_0(4767), _1("gs_read_block_from_remote"), _2(9), _3(true), _4(false), _5(gs_read_block_from_remote), _6(17), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(9, 23, 23, 23, 21, 23, 28, 23, 28, 16), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("gs_read_block_from_remote"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33(NULL), _34('f'), _35(NULL), _36(0), _37(false)),
AddBuiltinFunc(_0(4768), _1("gs_read_block_from_remote"), _2(10), _3(true), _4(false), _5(gs_read_block_from_remote_compress), _6(17), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(10, 23, 23, 23, 21, 21, 23, 28, 23, 28, 16), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("gs_read_block_from_remote_compress"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33(NULL), _34('f'), _35(NULL), _36(0), _37(false))
),
AddFuncGroup(
"gs_respool_exception_info", 1,
@ -7685,6 +7686,10 @@
AddBuiltinFunc(_0(3827), _1("pg_read_binary_file"), _2(4), _3(true), _4(false), _5(pg_read_binary_file), _6(17), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(4, 25, 20, 20, 16), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("pg_read_binary_file"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33(NULL), _34('f'), _35(NULL), _36(0), _37(false)),
AddBuiltinFunc(_0(3828), _1("pg_read_binary_file"), _2(1), _3(true), _4(false), _5(pg_read_binary_file_all), _6(17), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(1, 25), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("pg_read_binary_file_all"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33(NULL), _34('f'), _35(NULL), _36(0), _37(false))
),
AddFuncGroup(
"pg_read_binary_file_blocks", 1,
AddBuiltinFunc(_0(8413), _1("pg_read_binary_file_blocks"), _2(3), _3(true), _4(true), _5(pg_read_binary_file_blocks), _6(2249), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(100), _11(20), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(3, 25, 20, 20), _21(7, 25, 20, 20, 25, 23, 23, 17), _22(7, 'i', 'i', 'i', 'o', 'o', 'o', 'o'), _23(7, "input", "blocknum", "blockcount", "path", "blocknum", "len", "data"), _24(NULL), _25("pg_read_binary_file_blocks"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33(NULL), _34('f'))
),
AddFuncGroup(
"pg_read_file", 2,
AddBuiltinFunc(_0(2624), _1("pg_read_file"), _2(3), _3(true), _4(false), _5(pg_read_file), _6(25), _7(PG_CATALOG_NAMESPACE), _8(BOOTSTRAP_SUPERUSERID), _9(INTERNALlanguageId), _10(1), _11(0), _12(0), _13(0), _14(false), _15(false), _16(false), _17(false), _18('v'), _19(0), _20(3, 25, 20, 20), _21(NULL), _22(NULL), _23(NULL), _24(NULL), _25("pg_read_file"), _26(NULL), _27(NULL), _28(NULL), _29(0), _30(false), _31(NULL), _32(false), _33(NULL), _34('f'), _35(NULL), _36(0), _37(false)),

View File

@ -83,6 +83,7 @@
#include "pgxc/groupmgr.h"
#include "storage/buf/buf.h"
#include "storage/predicate.h"
#include "storage/page_compression.h"
#include "storage/buf/bufmgr.h"
#include "storage/lmgr.h"
#include "storage/smgr/smgr.h"
@ -453,8 +454,9 @@ static void InitPartitionDef(Partition newPartition, Oid partOid, char strategy)
*/
Relation heap_create(const char* relname, Oid relnamespace, Oid reltablespace, Oid relid, Oid relfilenode,
Oid bucketOid, TupleDesc tupDesc, char relkind, char relpersistence, bool partitioned_relation, bool rowMovement,
bool shared_relation, bool mapped_relation, bool allow_system_table_mods, int8 row_compress, Oid ownerid,
bool skip_create_storage, TableAmType tam_type, int8 relindexsplit, StorageType storage_type, bool newcbi)
bool shared_relation, bool mapped_relation, bool allow_system_table_mods, int8 row_compress, Datum reloptions,
Oid ownerid, bool skip_create_storage, TableAmType tam_type, int8 relindexsplit, StorageType storage_type,
bool newcbi, Oid accessMethodObjectId)
{
bool create_storage = false;
Relation rel;
@ -564,9 +566,11 @@ Relation heap_create(const char* relname, Oid relnamespace, Oid reltablespace, O
relpersistence,
relkind,
row_compress,
reloptions,
tam_type,
relindexsplit,
storage_type
storage_type,
accessMethodObjectId
);
if (partitioned_relation) {
@ -2640,6 +2644,7 @@ Oid heap_create_with_catalog(const char *relname, Oid relnamespace, Oid reltable
mapped_relation,
allow_system_table_mods,
row_compress,
reloptions,
ownerid,
false,
tam,
@ -5167,7 +5172,7 @@ void dropDeltaTableOnPartition(Oid partId)
*
*/
Partition heapCreatePartition(const char* part_name, bool for_partitioned_table, Oid part_tablespace, Oid part_id,
Oid partFileNode, Oid bucketOid, Oid ownerid, StorageType storage_type, bool newcbi)
Oid partFileNode, Oid bucketOid, Oid ownerid, StorageType storage_type, bool newcbi, Datum reloptions)
{
Partition new_part_desc = NULL;
bool createStorage = false;
@ -5220,7 +5225,8 @@ Partition heapCreatePartition(const char* part_name, bool for_partitioned_table,
part_id, /* partition oid */
partFileNode, /* partition's file node, same as partition oid*/
part_tablespace,
for_partitioned_table ? HEAP_DISK : storage_type);
for_partitioned_table ? HEAP_DISK : storage_type,
reloptions);
/*
* Save newcbi as a context indicator to
@ -5619,7 +5625,9 @@ Oid heapAddRangePartition(Relation pgPartRel, Oid partTableOid, Oid partTablespa
newPartrelfileOid,
bucketOid,
ownerid,
storage_type);
storage_type,
false,
reloptions);
Assert(newPartitionOid == PartitionGetPartid(newPartition));
InitPartitionDef(newPartition, partTableOid, PART_STRATEGY_RANGE);
@ -5812,7 +5820,9 @@ Oid HeapAddIntervalPartition(Relation pgPartRel, Relation rel, Oid partTableOid,
partrelfileOid,
bucketOid,
ownerid,
storage_type);
storage_type,
false,
reloptions);
pfree(partName);
Assert(newPartitionOid == PartitionGetPartid(newPartition));
@ -5904,7 +5914,10 @@ Oid HeapAddListPartition(Relation pgPartRel, Oid partTableOid, Oid partTablespac
partrelfileOid,
bucketOid,
ownerid,
storage_type);
storage_type,
false,
reloptions);
Assert(newListPartitionOid == PartitionGetPartid(newListPartition));
InitPartitionDef(newListPartition, partTableOid, PART_STRATEGY_LIST);
@ -6167,7 +6180,9 @@ Oid HeapAddHashPartition(Relation pgPartRel, Oid partTableOid, Oid partTablespac
partrelfileOid,
bucketOid,
ownerid,
storage_type);
storage_type,
false,
reloptions);
Assert(newHashPartitionOid == PartitionGetPartid(newHashPartition));
InitPartitionDef(newHashPartition, partTableOid, PART_STRATEGY_HASH);
@ -6328,7 +6343,9 @@ static void addNewPartitionTupleForTable(Relation pg_partition_rel, const char*
new_partition_rfoid,
InvalidOid,
ownerid,
HEAP_DISK);
HEAP_DISK,
false,
reloptions);
Assert(new_partition_oid == PartitionGetPartid(new_partition));
new_partition->pd_part->parttype = PART_OBJ_TYPE_PARTED_TABLE;

View File

@ -913,9 +913,9 @@ Oid index_create(Relation heapRelation, const char *indexRelationName, Oid index
indexRelation = heap_create(indexRelationName, namespaceId, tableSpaceId, indexRelationId, relFileNode,
RELATION_CREATE_BUCKET(heapRelation) ? heapRelation->rd_bucketoid : InvalidOid, indexTupDesc, relKind,
relpersistence, isLocalPart, false, shared_relation, mapped_relation, allow_system_table_mods,
REL_CMPRS_NOT_SUPPORT, heapRelation->rd_rel->relowner, skip_create_storage,
REL_CMPRS_NOT_SUPPORT, (Datum)reloptions, heapRelation->rd_rel->relowner, skip_create_storage,
isUstore ? TAM_USTORE : TAM_HEAP, /* XXX: Index tables are by default HEAP Table Type */
relindexsplit, storage_type, extra->crossBucket);
relindexsplit, storage_type, extra->crossBucket, accessMethodObjectId);
Assert(indexRelationId == RelationGetRelid(indexRelation));
@ -933,7 +933,6 @@ Oid index_create(Relation heapRelation, const char *indexRelationName, Oid index
* XXX should have a cleaner way to create cataloged indexes
*/
indexRelation->rd_rel->relowner = heapRelation->rd_rel->relowner;
indexRelation->rd_rel->relam = accessMethodObjectId;
indexRelation->rd_rel->relhasoids = false;
if (accessMethodObjectId == PSORT_AM_OID) {
@ -1245,7 +1244,8 @@ Oid partition_index_create(const char* partIndexName, /* the name of partition i
parentIndex->rd_bucketoid,
parentIndex->rd_rel->relowner,
RelationGetStorageType(parentIndex),
extra->crossbucket);
extra->crossbucket,
indexRelOptions);
partitionIndex->pd_part->parttype = PART_OBJ_TYPE_INDEX_PARTITION;
partitionIndex->pd_part->rangenum = 0;
partitionIndex->pd_part->parentid = parentIndexId;
@ -1283,9 +1283,13 @@ Oid partition_index_create(const char* partIndexName, /* the name of partition i
partitionIndex->pd_part->relfrozenxid = (ShortTransactionId)InvalidTransactionId;
/* insert into pg_partition */
#ifndef ENABLE_MULTIPLE_NODES
insertPartitionEntry(pg_partition_rel, partitionIndex, partitionIndex->pd_id, NULL, NULL, 0, 0, 0, indexRelOptions,
PART_OBJ_TYPE_INDEX_PARTITION);
#else
insertPartitionEntry(
pg_partition_rel, partitionIndex, partitionIndex->pd_id, NULL, NULL, 0, 0, 0, 0, PART_OBJ_TYPE_INDEX_PARTITION);
#endif
/* Make the above change visible */
CommandCounterIncrement();

View File

@ -316,17 +316,30 @@ void log_smgrcreate(RelFileNode* rnode, ForkNumber forkNum)
if (IsSegmentFileNode(*rnode)) {
return;
}
xl_smgr_create_compress xlrec;
uint size;
uint8 info = XLOG_SMGR_CREATE | XLR_SPECIAL_REL_UPDATE;
/*
* compressOptions Copy
*/
if (rnode->opt != 0) {
xlrec.pageCompressOpts = rnode->opt;
size = sizeof(xl_smgr_create_compress);
info |= XLR_REL_COMPRESS;
} else {
size = sizeof(xl_smgr_create);
}
/*
* Make an XLOG entry reporting the file creation.
*/
xl_smgr_create xlrec;
xlrec.forkNum = forkNum;
RelFileNodeRelCopy(xlrec.rnode, *rnode);
xlrec.xlrec.forkNum = forkNum;
RelFileNodeRelCopy(xlrec.xlrec.rnode, *rnode);
XLogBeginInsert();
XLogRegisterData((char*)&xlrec, sizeof(xlrec));
XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE | XLR_SPECIAL_REL_UPDATE, false, rnode->bucketNode);
XLogRegisterData((char*)&xlrec, size);
XLogInsert(RM_SMGR_ID, info, false, rnode->bucketNode);
}
static void CStoreRelDropStorage(Relation rel, RelFileNode* rnode, Oid ownerid)
@ -688,15 +701,26 @@ void RelationTruncate(Relation rel, BlockNumber nblocks)
* Make an XLOG entry reporting the file truncation.
*/
XLogRecPtr lsn;
xl_smgr_truncate xlrec;
xl_smgr_truncate_compress xlrec;
uint size;
uint8 info = XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE;
xlrec.blkno = nblocks;
RelFileNodeRelCopy(xlrec.rnode, rel->rd_node);
xlrec.xlrec.blkno = nblocks;
if (rel->rd_node.opt != 0) {
xlrec.pageCompressOpts = rel->rd_node.opt;
size = sizeof(xl_smgr_truncate_compress);
info |= XLR_REL_COMPRESS;
} else {
size = sizeof(xl_smgr_truncate);
}
RelFileNodeRelCopy(xlrec.xlrec.rnode, rel->rd_node);
XLogBeginInsert();
XLogRegisterData((char*)&xlrec, sizeof(xlrec));
XLogRegisterData((char*)&xlrec, size);
lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE, false, rel->rd_node.bucketNode);
lsn = XLogInsert(RM_SMGR_ID, info, false, rel->rd_node.bucketNode);
/*
* Flush, because otherwise the truncation of the main relation might
@ -1207,7 +1231,7 @@ void smgr_redo(XLogReaderState* record)
{
XLogRecPtr lsn = record->EndRecPtr;
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
bool compress = XLogRecGetInfo(record) & XLR_REL_COMPRESS;
/* Backup blocks are not used in smgr records */
Assert(!XLogRecHasAnyBlockRefs(record));
@ -1216,14 +1240,14 @@ void smgr_redo(XLogReaderState* record)
RelFileNode rnode;
RelFileNodeCopy(rnode, xlrec->rnode, XLogRecGetBucketId(record));
smgr_redo_create(rnode, xlrec->forkNum, (char *)xlrec);
/* Redo column file, attid is hidden in forkNum */
rnode.opt = compress ? ((xl_smgr_create_compress*)XLogRecGetData(record))->pageCompressOpts : 0;
smgr_redo_create(rnode, xlrec->forkNum, (char *)xlrec);
/* Redo column file, attid is hidden in forkNum */
} else if (info == XLOG_SMGR_TRUNCATE) {
xl_smgr_truncate* xlrec = (xl_smgr_truncate*)XLogRecGetData(record);
RelFileNode rnode;
RelFileNodeCopy(rnode, xlrec->rnode, XLogRecGetBucketId(record));
rnode.opt = compress ? ((xl_smgr_truncate_compress*)XLogRecGetData(record))->pageCompressOpts : 0;
/*
* Forcibly create relation if it doesn't exist (which suggests that
* it was dropped somewhere later in the WAL sequence). As in

View File

@ -69,6 +69,7 @@
#include "storage/custorage.h"
#include "storage/smgr/segment.h"
#include "storage/cstore/cstore_compress.h"
#include "storage/page_compression.h"
#include "vecexecutor/vecnodes.h"
#ifdef PGXC
@ -791,6 +792,7 @@ int64 calculate_relation_size(RelFileNode* rfn, BackendId backend, ForkNumber fo
relationpath = relpathbackend(*rfn, backend, forknum);
bool rowCompress = IS_COMPRESSED_RNODE((*rfn), forknum);
for (segcount = 0;; segcount++) {
struct stat fst;
@ -807,7 +809,7 @@ int64 calculate_relation_size(RelFileNode* rfn, BackendId backend, ForkNumber fo
else
ereport(ERROR, (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", pathname)));
}
totalsize += fst.st_size;
totalsize += rowCompress ? CalculateMainForkSize((char*)pathname, rfn, forknum) : fst.st_size;
}
pfree_ext(relationpath);

View File

@ -316,6 +316,132 @@ Datum pg_read_binary_file_all(PG_FUNCTION_ARGS)
PG_RETURN_BYTEA_P(read_binary_file(filename, 0, -1, false));
}
struct CompressAddressItemState {
uint32 blkno;
int segmentNo;
ReadBlockChunksStruct rbStruct;
FILE *pcaFile;
};
static void ReadBinaryFileBlocksFirstCall(PG_FUNCTION_ARGS, int32 startBlockNum, int32 blockCount)
{
char* path = convert_and_check_filename(PG_GETARG_TEXT_PP(0));
int segmentNo = 0;
UndoFileType undoFileType = UNDO_INVALID;
if (!is_row_data_file(path, &segmentNo, &undoFileType)) {
ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_RESOURCES), errmsg("%s is not a relation file.", path)));
}
/* create a function context for cross-call persistence */
FuncCallContext* fctx = SRF_FIRSTCALL_INIT();
/* switch to memory context appropriate for multiple function calls */
MemoryContext mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
/* initialize file scanning code */
CompressAddressItemState* itemState = (CompressAddressItemState*)palloc(sizeof(CompressAddressItemState));
/* save mmap to inter_call_data->pcMap */
char pcaFilePath[MAXPGPATH];
errno_t rc = snprintf_s(pcaFilePath, MAXPGPATH, MAXPGPATH - 1, PCA_SUFFIX, path);
securec_check_ss(rc, "\0", "\0");
FILE* pcaFile = AllocateFile((const char*)pcaFilePath, "rb");
if (pcaFile == NULL) {
ereport(ERROR, (errcode_for_file_access(), errmsg("could not open file \"%s\": %m", pcaFilePath)));
}
PageCompressHeader* map = pc_mmap(fileno(pcaFile), ReadChunkSize(pcaFile, pcaFilePath, MAXPGPATH), true);
if (map == MAP_FAILED) {
ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_RESOURCES), errmsg("Failed to mmap %s: %m", pcaFilePath)));
}
if ((BlockNumber)startBlockNum + (BlockNumber)blockCount > map->nblocks) {
auto blockNum = map->nblocks;
ReleaseMap(map, pcaFilePath);
ereport(ERROR,
(ERRCODE_INVALID_PARAMETER_VALUE,
errmsg("invalid blocknum \"%d\" and block count \"%d\", the max blocknum is \"%u\"",
startBlockNum,
blockCount,
blockNum)));
}
/* construct ReadBlockChunksStruct */
char* pcdFilePath = (char*)palloc0(MAXPGPATH);
rc = snprintf_s(pcdFilePath, MAXPGPATH, MAXPGPATH - 1, PCD_SUFFIX, path);
securec_check_ss(rc, "\0", "\0");
FILE* fp = AllocateFile(pcdFilePath, "rb");
if (fp == NULL) {
ereport(ERROR, (errcode_for_file_access(), errmsg("could not open file \"%s\": %m", pcdFilePath)));
}
char* pageBuffer = (char*)palloc(BLCKSZ);
itemState->pcaFile = pcaFile;
itemState->rbStruct.header = map;
itemState->rbStruct.pageBuffer = pageBuffer;
itemState->rbStruct.pageBufferLen = BLCKSZ;
itemState->rbStruct.fp = fp;
itemState->rbStruct.segmentNo = segmentNo;
itemState->rbStruct.fileName = pcdFilePath;
/*
* build tupdesc for result tuples. This must match this function's
* pg_proc entry!
*/
TupleDesc tupdesc = CreateTemplateTupleDesc(4, false, TAM_HEAP);
TupleDescInitEntry(tupdesc, (AttrNumber)1, "path", TEXTOID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber)2, "blocknum", INT4OID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber)3, "len", INT4OID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber)4, "data", BYTEAOID, -1, 0);
fctx->tuple_desc = BlessTupleDesc(tupdesc);
itemState->blkno = startBlockNum;
fctx->max_calls = blockCount;
fctx->user_fctx = itemState;
MemoryContextSwitchTo(mctx);
}
Datum pg_read_binary_file_blocks(PG_FUNCTION_ARGS)
{
int32 startBlockNum = PG_GETARG_INT32(1);
int32 blockCount = PG_GETARG_INT32(2);
if (startBlockNum < 0 || blockCount <= 0 || startBlockNum + blockCount > RELSEG_SIZE) {
ereport(ERROR, (ERRCODE_INVALID_PARAMETER_VALUE,
errmsg("invalid blocknum \"%d\" or block count \"%d\"", startBlockNum, blockCount)));
}
/* stuff done only on the first call of the function */
if (SRF_IS_FIRSTCALL()) {
ReadBinaryFileBlocksFirstCall(fcinfo, startBlockNum, blockCount);
}
/* stuff done on every call of the function */
FuncCallContext *fctx = SRF_PERCALL_SETUP();
CompressAddressItemState *itemState = (CompressAddressItemState *)fctx->user_fctx;
if (fctx->call_cntr < fctx->max_calls) {
bytea *buf = (bytea *)palloc(BLCKSZ + VARHDRSZ);
size_t len = ReadAllChunkOfBlock(VARDATA(buf), BLCKSZ, itemState->blkno, itemState->rbStruct);
SET_VARSIZE(buf, len + VARHDRSZ);
Datum values[4];
values[0] = PG_GETARG_DATUM(0);
values[1] = Int32GetDatum(itemState->blkno);
values[2] = Int32GetDatum(len);
values[3] = PointerGetDatum(buf);
/* Build and return the result tuple. */
bool nulls[4];
securec_check(memset_s(nulls, sizeof(nulls), 0, sizeof(nulls)), "\0", "\0");
HeapTuple tuple = heap_form_tuple(fctx->tuple_desc, (Datum*)values, (bool*)nulls);
Datum result = HeapTupleGetDatum(tuple);
itemState->blkno++;
SRF_RETURN_NEXT(fctx, result);
} else {
if (itemState->rbStruct.header != NULL) {
pc_munmap(itemState->rbStruct.header);
}
FreeFile(itemState->pcaFile);
FreeFile(itemState->rbStruct.fp);
SRF_RETURN_DONE(fctx);
}
}
/*
* stat a file

View File

@ -664,3 +664,281 @@ void pglz_decompress(const PGLZ_Header* source, char* dest)
* That's it.
*/
}
/* ----------
* lz_compress -
*
* Compresses source into dest using strategy. Returns the number of
* bytes written in buffer dest, or -1 if compression fails.
* ----------
*/
int32 lz_compress(const char* source, int32 slen, char* dest)
{
unsigned char* bp = (unsigned char*) dest;
unsigned char* bstart = bp;
int hist_next = 0;
bool hist_recycle = false;
const char* dp = source;
const char* dend = source + slen;
unsigned char ctrl_dummy = 0;
unsigned char* ctrlp = &ctrl_dummy;
unsigned char ctrlb = 0;
unsigned char ctrl = 0;
bool found_match = false;
int32 match_len;
int32 match_off;
int32 good_match;
int32 good_drop;
int32 result_size;
int32 result_max;
int32 need_rate;
errno_t rc;
const PGLZ_Strategy* strategy = PGLZ_strategy_always;
/*
* Our fallback strategy is the default.
*/
if (strategy == NULL) {
strategy = PGLZ_strategy_default;
}
/*
* If the strategy forbids compression (at all or if source chunk size out
* of range), fail.
*/
if (strategy->match_size_good <= 0 || slen < strategy->min_input_size || slen > strategy->max_input_size) {
return -1;
}
/*
* Limit the match parameters to the supported range.
*/
good_match = strategy->match_size_good;
if (good_match > PGLZ_MAX_MATCH) {
good_match = PGLZ_MAX_MATCH;
} else if (good_match < 17) {
good_match = 17;
}
good_drop = strategy->match_size_drop;
if (good_drop < 0) {
good_drop = 0;
} else if (good_drop > 100) {
good_drop = 100;
}
need_rate = strategy->min_comp_rate;
if (need_rate < 0) {
need_rate = 0;
} else if (need_rate > 99) {
need_rate = 99;
}
/*
* Compute the maximum result size allowed by the strategy, namely the
* input size minus the minimum wanted compression rate. This had better
* be <= slen, else we might overrun the provided output buffer.
*/
if (slen > (INT_MAX / 100)) {
/* Approximate to avoid overflow */
result_max = (slen / 100) * (100 - need_rate);
} else {
result_max = (slen * (100 - need_rate)) / 100;
}
/*
* Initialize the history lists to empty. We do not need to zero the
* hist_entries[] array; its entries are initialized as they are used.
*/
rc = memset_s(u_sess->utils_cxt.hist_start, HIST_START_LEN, 0, HIST_START_LEN);
securec_check(rc, "\0", "\0");
/*
* Compress the source directly into the output buffer.
*/
while (dp < dend) {
/*
* If we already exceeded the maximum result size, fail.
*
* We check once per loop; since the loop body could emit as many as 4
* bytes (a control byte and 3-byte tag), PGLZ_MAX_OUTPUT() had better
* allow 4 slop bytes.
*/
if (bp - bstart >= result_max) {
return -1;
}
/*
* If we've emitted more than first_success_by bytes without finding
* anything compressible at all, fail. This lets us fall out
* reasonably quickly when looking at incompressible input (such as
* pre-compressed data).
*/
if (!found_match && bp - bstart >= strategy->first_success_by) {
return -1;
}
/*
* Try to find a match in the history
*/
if (pglz_find_match(u_sess->utils_cxt.hist_start, dp, dend, &match_len, &match_off, good_match, good_drop)) {
/*
* Create the tag and add history entries for all matched
* characters.
*/
pglz_out_tag(ctrlp, ctrlb, ctrl, bp, match_len, match_off);
while (match_len--) {
pglz_hist_add(
u_sess->utils_cxt.hist_start, u_sess->utils_cxt.hist_entries, hist_next, hist_recycle, dp,
dend);
dp++; /* Do not do this ++ in the line above! */
/* The macro would do it four times - Jan. */
}
found_match = true;
} else {
/*
* No match found. Copy one literal byte.
*/
pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp);
pglz_hist_add(
u_sess->utils_cxt.hist_start, u_sess->utils_cxt.hist_entries, hist_next, hist_recycle, dp, dend);
dp++; /* Do not do this ++ in the line above! */
/* The macro would do it four times - Jan. */
}
}
/*
* Write out the last control byte and check that we haven't overrun the
* output size allowed by the strategy.
*/
*ctrlp = ctrlb;
result_size = bp - bstart;
if (result_size >= result_max) {
return -1;
}
/* success */
return result_size;
}
/* ----------
* pglz_decompress -
*
* Decompresses source into dest. Returns the number of bytes
* decompressed in the destination buffer, and *optionally*
* checks that both the source and dest buffers have been
* fully read and written to, respectively.
* ----------
*/
int32 lz_decompress(const char* source, int32 slen, char* dest, int32 rawsize, bool check_complete)
{
const unsigned char* sp;
const unsigned char* srcend;
unsigned char* dp;
unsigned char* destend;
errno_t rc = 0;
sp = (const unsigned char*) source;
srcend = ((const unsigned char*) source) + slen;
dp = (unsigned char*) dest;
destend = dp + rawsize;
while (sp < srcend && dp < destend) {
/*
* Read one control byte and process the next 8 items (or as many as
* remain in the compressed input).
*/
unsigned char ctrl = *sp++;
int ctrlc;
for (ctrlc = 0; ctrlc < 8 && sp < srcend && dp < destend; ctrlc++) {
if (ctrl & 1) {
/*
* Set control bit means we must read a match tag. The match
* is coded with two bytes. First byte uses lower nibble to
* code length - 3. Higher nibble contains upper 4 bits of the
* offset. The next following byte contains the lower 8 bits
* of the offset. If the length is coded as 18, another
* extension tag byte tells how much longer the match really
* was (0-255).
*/
int32 len;
int32 off;
len = (sp[0] & 0x0f) + 3;
off = ((sp[0] & 0xf0) << 4) | sp[1];
sp += 2;
if (len == 18) {
len += *sp++;
}
/*
* Now we copy the bytes specified by the tag from OUTPUT to
* OUTPUT (copy len bytes from dp - off to dp). The copied
* areas could overlap, to preven possible uncertainty, we
* copy only non-overlapping regions.
*/
len = Min(len, destend - dp);
while (off < len) {
/*---------
* When offset is smaller than length - source and
* destination regions overlap. memmove() is resolving
* this overlap in an incompatible way with pglz. Thus we
* resort to memcpy()-ing non-overlapping regions.
*
* Consider input: 112341234123412341234
* At byte 5 here ^ we have match with length 16 and
* offset 4. 11234M(len=16, off=4)
* We are decoding first period of match and rewrite match
* 112341234M(len=12, off=8)
*
* The same match is now at position 9, it points to the
* same start byte of output, but from another position:
* the offset is doubled.
*
* We iterate through this offset growth until we can
* proceed to usual memcpy(). If we would try to decode
* the match at byte 5 (len=16, off=4) by memmove() we
* would issue memmove(5, 1, 16) which would produce
* 112341234XXXXXXXXXXXX, where series of X is 12
* undefined bytes, that were at bytes [5:17].
* ---------
*/
errno_t rc = memcpy_s(dp, off + 1, dp - off, off);
securec_check(rc, "", "");
len -= off;
dp += off;
off += off;
}
rc = memcpy_s(dp, len + 1, dp - off, len);
securec_check(rc, "", "");
dp += len;
} else {
/*
* An unset control bit means LITERAL BYTE. So we just copy
* one from INPUT to OUTPUT.
*/
*dp++ = *sp++;
}
/*
* Advance the control bit
*/
ctrl >>= 1;
}
}
/*
* Check we decompressed the right amount. If we are slicing, then we
* won't necessarily be at the end of the source or dest buffers when we
* hit a stop, so we don't test them.
*/
if (check_complete && (dp != destend || sp != srcend)) {
return -1;
}
/*
* That's it.
*/
return (char*) dp - dest;
}

View File

@ -57,6 +57,7 @@
#include "rewrite/rewriteDefine.h"
#include "rewrite/rewriteHandler.h"
#include "storage/lmgr.h"
#include "storage/page_compression.h"
#include "storage/smgr/smgr.h"
#include "storage/smgr/segment.h"
#include "catalog/storage.h"
@ -233,6 +234,47 @@ static Partition AllocatePartitionDesc(Form_pg_partition partp)
return partition;
}
void SetupPageCompressForPartition(RelFileNode* node, PageCompressOpts* compress_options, const char* relationName)
{
uint1 algorithm = compress_options->compressType;
if (algorithm == COMPRESS_TYPE_NONE) {
node->opt = 0;
} else {
if (!SUPPORT_PAGE_COMPRESSION) {
ereport(ERROR, (errmsg("unsupported page compression on this platform")));
}
uint1 compressLevel;
bool symbol = false;
if (compress_options->compressLevel >= 0) {
symbol = true;
compressLevel = compress_options->compressLevel;
} else {
symbol = false;
compressLevel = -compress_options->compressLevel;
}
bool success = false;
uint1 chunkSize = ConvertChunkSize(compress_options->compressChunkSize, &success);
if (!success) {
ereport(ERROR, (errmsg("invalid compress_chunk_size %d , must be one of %d, %d, %d or %d for %s",
compress_options->compressChunkSize, BLCKSZ / 16, BLCKSZ / 8, BLCKSZ / 4, BLCKSZ / 2,
relationName)));
}
uint1 preallocChunks;
if (compress_options->compressPreallocChunks >= BLCKSZ / compress_options->compressChunkSize) {
preallocChunks = (uint1)(BLCKSZ / compress_options->compressChunkSize - 1);
} else {
preallocChunks = (uint1)(compress_options->compressPreallocChunks);
}
Assert(preallocChunks <= MAX_PREALLOC_CHUNKS);
node->opt = 0;
SET_COMPRESS_OPTION((*node), compress_options->compressByteConvert, compress_options->compressDiffConvert,
preallocChunks, symbol, compressLevel, algorithm, chunkSize);
}
}
StorageType PartitionGetStorageType(Oid parentOid)
{
HeapTuple pg_class_tuple;
@ -376,6 +418,12 @@ static void PartitionInitPhysicalAddr(Partition partition)
partition->pd_id)));
}
}
partition->pd_node.opt = 0;
if (partition->rd_options) {
SetupPageCompressForPartition(&partition->pd_node, &((StdRdOptions*)(partition->rd_options))->compress,
PartitionGetPartitionName(partition));
}
}
/*
@ -464,7 +512,7 @@ void PartitionClose(Partition partition)
}
Partition PartitionBuildLocalPartition(const char *relname, Oid partid, Oid partfilenode, Oid parttablespace,
StorageType storage_type)
StorageType storage_type, Datum reloptions)
{
Partition part;
MemoryContext oldcxt;
@ -513,6 +561,11 @@ Partition PartitionBuildLocalPartition(const char *relname, Oid partid, Oid part
if (partfilenode != InvalidOid) {
PartitionInitPhysicalAddr(part);
/* compressed option was set by PartitionInitPhysicalAddr if part->rd_options != NULL */
if (part->rd_options == NULL && reloptions) {
StdRdOptions* options = (StdRdOptions*)default_reloptions(reloptions, false, RELOPT_KIND_HEAP);
SetupPageCompressForPartition(&part->pd_node, &options->compress, PartitionGetPartitionName(part));
}
}
if (storage_type == SEGMENT_PAGE) {

View File

@ -167,6 +167,7 @@
#include "rewrite/rewriteDefine.h"
#include "rewrite/rewriteRlsPolicy.h"
#include "storage/lmgr.h"
#include "storage/page_compression.h"
#include "storage/smgr/smgr.h"
#include "storage/smgr/segment.h"
#include "threadpool/threadpool.h"
@ -1232,7 +1233,7 @@ static OpClassCacheEnt* LookupOpclassInfo(Oid operatorClassOid, StrategyNumber n
static void RelationCacheInitFileRemoveInDir(const char* tblspcpath);
static void unlink_initfile(const char* initfilename);
static void SetBackendId(Relation relation);
static void SetupPageCompressForRelation(Relation relation, PageCompressOpts *compress_options);
/*
* ScanPgRelation
*
@ -2420,6 +2421,12 @@ static void RelationInitPhysicalAddr(Relation relation)
if (!RelationIsPartitioned(relation) && relation->storage_type == SEGMENT_PAGE) {
relation->rd_node.bucketNode = SegmentBktId;
}
// setup page compression options
relation->rd_node.opt = 0;
if (relation->rd_options && REL_SUPPORT_COMPRESSED(relation)) {
SetupPageCompressForRelation(relation, &((StdRdOptions*)(relation->rd_options))->compress);
}
}
static void IndexRelationInitKeyNums(Relation relation)
@ -4247,8 +4254,9 @@ void AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid, SubTrans
* and enter it into the relcache.
*/
Relation RelationBuildLocalRelation(const char* relname, Oid relnamespace, TupleDesc tupDesc, Oid relid,
Oid relfilenode, Oid reltablespace, bool shared_relation, bool mapped_relation, char relpersistence, char relkind,
int8 row_compress, TableAmType tam_type, int8 relindexsplit, StorageType storage_type)
Oid relfilenode, Oid reltablespace, bool shared_relation, bool mapped_relation, char relpersistence,
char relkind, int8 row_compress, Datum reloptions, TableAmType tam_type, int8 relindexsplit,
StorageType storage_type, Oid accessMethodObjectId)
{
Relation rel;
MemoryContext oldcxt;
@ -4364,6 +4372,7 @@ Relation RelationBuildLocalRelation(const char* relname, Oid relnamespace, Tuple
rel->rd_rel->relowner = BOOTSTRAP_SUPERUSERID;
rel->rd_rel->parttype = PARTTYPE_NON_PARTITIONED_RELATION;
rel->rd_rel->relrowmovement = false;
rel->rd_rel->relam = accessMethodObjectId;
/* set up persistence and relcache fields dependent on it */
rel->rd_rel->relpersistence = relpersistence;
@ -4420,6 +4429,13 @@ Relation RelationBuildLocalRelation(const char* relname, Oid relnamespace, Tuple
RelationInitPhysicalAddr(rel);
/* compressed option was set by RelationInitPhysicalAddr if rel->rd_options != NULL */
if (rel->rd_options == NULL && reloptions && SUPPORT_COMPRESSED(relkind, rel->rd_rel->relam)) {
StdRdOptions *options = (StdRdOptions *) default_reloptions(reloptions, false, RELOPT_KIND_HEAP);
SetupPageCompressForRelation(rel, &options->compress);
}
/* materialized view not initially scannable */
if (relkind == RELKIND_MATVIEW)
rel->rd_isscannable = false;
@ -7758,3 +7774,41 @@ void GetTdeInfoFromRel(Relation rel, TdeInfo *tde_info)
}
}
/* setup page compress options for relation */
static void SetupPageCompressForRelation(Relation relation, PageCompressOpts* compress_options)
{
relation->rd_node.opt = 0;
uint1 algorithm = compress_options->compressType;
if (algorithm != COMPRESS_TYPE_NONE) {
if (!SUPPORT_PAGE_COMPRESSION) {
elog(ERROR, "unsupported page compression on this platform");
}
uint1 compressLevel;
bool symbol = false;
if (compress_options->compressLevel >= 0) {
symbol = true;
compressLevel = compress_options->compressLevel;
} else {
symbol = false;
compressLevel = -compress_options->compressLevel;
}
bool success = false;
uint1 chunkSize = ConvertChunkSize(compress_options->compressChunkSize, &success);
if (!success) {
elog(ERROR, "invalid compress_chunk_size %d , must be one of %d, %d, %d or %d for %s",
compress_options->compressChunkSize, BLCKSZ / 16, BLCKSZ / 8, BLCKSZ / 4, BLCKSZ / 2,
RelationGetRelationName(relation));
}
uint1 preallocChunks;
if (compress_options->compressPreallocChunks >= BLCKSZ / compress_options->compressChunkSize) {
preallocChunks = (uint1)(BLCKSZ / compress_options->compressChunkSize - 1);
} else {
preallocChunks = (uint1)(compress_options->compressPreallocChunks);
}
Assert(preallocChunks <= MAX_PREALLOC_CHUNKS);
SET_COMPRESS_OPTION(relation->rd_node, compress_options->compressByteConvert,
compress_options->compressDiffConvert, preallocChunks,
symbol, compressLevel, algorithm, chunkSize);
}
}

View File

@ -59,7 +59,7 @@ bool open_join_children = true;
bool will_shutdown = false;
/* hard-wired binary version number */
const uint32 GRAND_VERSION_NUM = 92423;
const uint32 GRAND_VERSION_NUM = 92424;
const uint32 HINT_ENHANCEMENT_VERSION_NUM = 92359;
const uint32 MATVIEW_VERSION_NUM = 92213;

View File

@ -961,6 +961,7 @@ const char* const config_group_names[] = {
/* INSTRUMENTS_OPTIONS */
gettext_noop("Instruments Options"),
gettext_noop("Column Encryption"),
gettext_noop("Compress Options"),
#ifdef PGXC
/* DATA_NODES */
gettext_noop("Datanodes and Connection Pooling"),

View File

@ -114,6 +114,7 @@ bool gs_memory_enjection(void)
}
#endif
/*
* check if the node is on heavy memory status now?
* is strict is true, we'll do some pre-judgement.
@ -907,6 +908,36 @@ int MemoryProtectFunctions::gs_posix_memalign(void** memptr, Size alignment, Siz
return ENOMEM; /* insufficient memory */
}
/**
* reseve memory for mmap of compressed table
* @tparam mem_type MEM_SHRD is supported only
* @param sz reserved size(bytes)
* @param needProtect
* @return success or not
*/
template <MemType type>
bool MemoryProtectFunctions::gs_memprot_reserve(Size sz, bool needProtect)
{
if (type != MEM_SHRD) {
return false;
}
return memTracker_ReserveMem<type>(sz, needProtect);
}
/**
* release the momery allocated by gs_memprot_reserve
* @tparam type MEM_SHRD is supported only
* @param sz free size(bytes)
*/
template <MemType type>
void MemoryProtectFunctions::gs_memprot_release(Size sz)
{
if (type != MEM_SHRD) {
return;
}
memTracker_ReleaseMem<type>(sz);
}
/* thread level initialization */
void gs_memprot_thread_init(void)
{

View File

@ -628,6 +628,7 @@ endif
cp $(LIBOBS_LIB_PATH)/libpcre* '$(DESTDIR)$(libdir)/../temp/'
mv $(DESTDIR)$(libdir)/../temp/* '$(DESTDIR)$(libdir)/'
cp $(SECUREDYNAMICLIB_HOME)/libsecurec* '$(DESTDIR)$(libdir)/'
cp $(ZSTD_LIB_PATH)/libzstd* '$(DESTDIR)$(libdir)/'
cp $(LIBOBS_LIB_PATH)/liblog4* '$(DESTDIR)$(libdir)/'
cp $(LIBOBS_LIB_PATH)/libeSDK* '$(DESTDIR)$(libdir)/'
cp $(LIBOBS_LIB_PATH)/libxml2* '$(DESTDIR)$(libdir)/'

View File

@ -233,6 +233,7 @@ Boot_CreateStmt:
mapped_relation,
true,
REL_CMPRS_NOT_SUPPORT,
(Datum)0,
BOOTSTRAP_SUPERUSERID,
false,
TAM_HEAP,

View File

@ -183,6 +183,8 @@ int RemoteGetCU(char* remoteAddress, uint32 spcnode, uint32 dbnode, uint32 relno
* @IN spcnode: tablespace id
* @IN dbnode: database id
* @IN relnode: relfilenode
* @IN bucketnode: bucketnode
* @IN opt: compressed table options
* @IN/OUT forknum: forknum
* @IN/OUT blocknum: block number
* @IN/OUT blocksize: block size
@ -190,7 +192,7 @@ int RemoteGetCU(char* remoteAddress, uint32 spcnode, uint32 dbnode, uint32 relno
* @IN/OUT page_data: pointer of page data
* @Return: remote read error code
*/
extern int RemoteGetPage(char* remoteAddress, uint32 spcnode, uint32 dbnode, uint32 relnode, int4 bucketnode,
int RemoteGetPage(char* remoteAddress, uint32 spcnode, uint32 dbnode, uint32 relnode, int2 bucketnode, uint2 opt,
int32 forknum, uint32 blocknum, uint32 blocksize, uint64 lsn, char* pageData)
{
PGconn* conGet = NULL;
@ -244,8 +246,9 @@ extern int RemoteGetPage(char* remoteAddress, uint32 spcnode, uint32 dbnode, uin
}
tnRet = snprintf_s(sqlCommands, MAX_PATH_LEN, MAX_PATH_LEN - 1,
"SELECT gs_read_block_from_remote(%u, %u, %u, %d, %d, '%lu', %u, '%lu', false);",
spcnode, dbnode, relnode, bucketnode, forknum, blocknum, blocksize, lsn);
"SELECT gs_read_block_from_remote(%u, %u, %u, %d, %d, %d, '%lu', %u, '%lu', false);", spcnode,
dbnode, relnode, bucketnode, opt, forknum, blocknum, blocksize, lsn);
securec_check_ss(tnRet, "", "");
res = PQexecParams(conGet, (const char*)sqlCommands, 0, NULL, NULL, NULL, NULL, 1);

View File

@ -120,6 +120,7 @@
#include "storage/freespace.h"
#include "storage/lmgr.h"
#include "storage/lock/lock.h"
#include "storage/page_compression.h"
#include "storage/predicate.h"
#include "storage/remote_read.h"
#include "storage/smgr/segment.h"
@ -1046,10 +1047,10 @@ static bool isOrientationSet(List* options, bool* isCUFormat, bool isDfsTbl)
* @Param [IN] relkind: table's kind(ordinary table or other database object).
* @return: option with defalut options.
*/
static List* AddDefaultOptionsIfNeed(List* options, const char relkind, int8 relcmprs, Oid relnamespace)
static List* AddDefaultOptionsIfNeed(List* options, const char relkind, CreateStmt* stmt, Oid relnamespace)
{
List* res = options;
int8 relcmprs = stmt->row_compress;
ListCell* cell = NULL;
bool isCStore = false;
bool isTsStore = false;
@ -1058,6 +1059,10 @@ static List* AddDefaultOptionsIfNeed(List* options, const char relkind, int8 rel
bool isUstore = false;
bool assignedStorageType = false;
bool hasRowCompressType = false;
bool hasRowCompressChunk = false;
bool hasRowCompressPre = false;
bool hasRowCompressLevel = false;
(void)isOrientationSet(options, NULL, false);
foreach (cell, options) {
DefElem* def = (DefElem*)lfirst(cell);
@ -1087,6 +1092,14 @@ static List* AddDefaultOptionsIfNeed(List* options, const char relkind, int8 rel
ereport(ERROR,
(errcode(ERRCODE_INVALID_OPTION),
errmsg("It is not allowed to assign version option for non-dfs table.")));
} else if (pg_strcasecmp(def->defname, "compresstype") == 0) {
hasRowCompressType = true;
} else if (pg_strcasecmp(def->defname, "compress_chunk_size") == 0) {
hasRowCompressChunk = true;
} else if (pg_strcasecmp(def->defname, "compress_prealloc_chunks") == 0) {
hasRowCompressPre = true;
} else if (pg_strcasecmp(def->defname, "compress_level") == 0) {
hasRowCompressLevel = true;
}
if (pg_strcasecmp(def->defname, "orientation") == 0 && pg_strcasecmp(defGetString(def), ORIENTATION_ORC) == 0) {
@ -1112,6 +1125,25 @@ static List* AddDefaultOptionsIfNeed(List* options, const char relkind, int8 rel
res = lappend(options, def);
}
if ((isCStore || isTsStore || relkind != RELKIND_RELATION ||
stmt->relation->relpersistence == RELPERSISTENCE_UNLOGGED ||
stmt->relation->relpersistence == RELPERSISTENCE_TEMP ||
stmt->relation->relpersistence == RELPERSISTENCE_GLOBAL_TEMP) &&
(hasRowCompressType || hasRowCompressChunk || hasRowCompressPre || hasRowCompressLevel)) {
ereport(ERROR,
(errcode(ERRCODE_INVALID_OPTION),
errmsg("only row orientation table support "
"compresstype/compress_chunk_size/compress_prealloc_chunks/compress_level.")));
}
if (!hasRowCompressType && (hasRowCompressChunk || hasRowCompressPre || hasRowCompressLevel)) {
ereport(ERROR,
(errcode(ERRCODE_INVALID_OPTION),
errmsg("compress_chunk_size/compress_prealloc_chunks/compress_level "
"should be used with compresstype.")));
}
if (isUstore && !isCStore && !hasCompression) {
DefElem* def = makeDefElem("compression", (Node *)makeString(COMPRESSION_NO));
res = lappend(options, def);
@ -1147,7 +1179,7 @@ static List* AddDefaultOptionsIfNeed(List* options, const char relkind, int8 rel
DefElem *def1 = makeDefElem("orientation", (Node *)makeString(ORIENTATION_ROW));
res = lcons(def1, options);
}
if (!hasCompression) {
if (!hasCompression && !hasRowCompressType) {
DefElem *def2 = makeDefElem("compression", (Node *)rowCmprOpt);
res = lappend(options, def2);
}
@ -1986,7 +2018,7 @@ Oid DefineRelation(CreateStmt* stmt, char relkind, Oid ownerId, bool isCTAS)
/* Add default options for relation if need. */
if (!dfsTablespace) {
if (!u_sess->attr.attr_common.IsInplaceUpgrade) {
stmt->options = AddDefaultOptionsIfNeed(stmt->options, relkind, stmt->row_compress, namespaceId);
stmt->options = AddDefaultOptionsIfNeed(stmt->options, relkind, stmt, namespaceId);
}
} else {
checkObjectCreatedinHDFSTblspc(stmt, relkind);
@ -2221,10 +2253,13 @@ Oid DefineRelation(CreateStmt* stmt, char relkind, Oid ownerId, bool isCTAS)
ereport(LOG, (errmodule(MOD_TIMESERIES), errmsg("use implicit distribution column method.")));
}
} else if (pg_strcasecmp(storeChar, TABLE_ACCESS_METHOD_USTORE) == 0) {
if (pg_strcasecmp(COMPRESSION_NO, StdRdOptionsGetStringData(std_opt, compression, COMPRESSION_NO)) != 0 ||
auto compression = StdRdOptionsGetStringData(std_opt, compression, COMPRESSION_NO);
auto orientation = StdRdOptionsGetStringData(std_opt, orientation, ORIENTATION_ROW);
if ((pg_strcasecmp(COMPRESSION_NO, compression) != 0 &&
pg_strcasecmp(ORIENTATION_COLUMN, orientation) == 0) ||
IsCompressedByCmprsInPgclass((RelCompressType)stmt->row_compress)) {
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("UStore tables do not support compression.")));
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("UStore tables do not support compression.")));
}
ForbidToSetOptionsForRowTbl(stmt->options);
ForbidToSetOptionsForUstoreTbl(stmt->options);
@ -14160,6 +14195,50 @@ static void ATExecSetRelOptionsToast(Oid toastid, List* defList, AlterTableType
heap_close(pgclass, RowExclusiveLock);
}
/**
* Do not modify compression parameters.
*/
void static CheckSupportModifyCompression(Relation rel, bytea* relOoption)
{
if (!relOoption || !REL_SUPPORT_COMPRESSED(rel)) {
return;
}
PageCompressOpts* newCompressOpt = &(((StdRdOptions*)relOoption)->compress);
RelFileCompressOption current;
TransCompressOptions(rel->rd_node, &current);
if (newCompressOpt) {
int1 algorithm = newCompressOpt->compressType;
if (algorithm != current.compressAlgorithm) {
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("change compresstype OPTION is not supported")));
}
if (current.compressAlgorithm != COMPRESS_TYPE_NONE &&
newCompressOpt->compressChunkSize != CHUNK_SIZE_LIST[current.compressChunkSize]) {
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("change compress_chunk_size OPTION is not supported")));
}
} else {
if (current.compressAlgorithm != COMPRESS_TYPE_NONE) {
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("change compresstype OPTION is not supported")));
}
}
/*
* forbid modify partition CompressOption
*/
if (HEAP_IS_PARTITIONED(rel)) {
if ((int)current.compressLevel != newCompressOpt->compressLevel) {
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("change partition compressLevel OPTION is not supported")));
}
if ((int)current.compressPreallocChunks != newCompressOpt->compressPreallocChunks) {
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("change partition compress_prealloc_chunks OPTION is not supported")));
}
}
}
/*
* Set, reset, or replace reloptions.
*/
@ -14297,6 +14376,7 @@ static void ATExecSetRelOptions(Relation rel, List* defList, AlterTableType oper
}
/* Validate */
bytea* relOpt = NULL;
switch (rel->rd_rel->relkind) {
case RELKIND_RELATION: {
/* this options only can be used when define a new relation.
@ -14305,6 +14385,7 @@ static void ATExecSetRelOptions(Relation rel, List* defList, AlterTableType oper
ForbidUserToSetDefinedOptions(defList);
bytea* heapRelOpt = heap_reloptions(rel->rd_rel->relkind, newOptions, true);
relOpt = heapRelOpt;
const char* algo = RelationGetAlgo(rel);
if (RelationIsColStore(rel)) {
/* un-supported options. dont care its values */
@ -14338,17 +14419,20 @@ static void ATExecSetRelOptions(Relation rel, List* defList, AlterTableType oper
break;
}
case RELKIND_INDEX:
case RELKIND_GLOBAL_INDEX:
case RELKIND_GLOBAL_INDEX: {
ForbidUserToSetDefinedIndexOptions(defList);
(void)index_reloptions(rel->rd_am->amoptions, newOptions, true);
relOpt = index_reloptions(rel->rd_am->amoptions, newOptions, true);
break;
}
default:
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a table, view, materialized view, index, or TOAST table", RelationGetRelationName(rel))));
break;
}
CheckSupportModifyCompression(rel, relOpt);
/*
* All we need do here is update the pg_class row; the new options will be
* propagated into relcaches during post-commit cache inval.
@ -21062,6 +21146,11 @@ static void checkCompressForExchange(Relation partTableRel, Relation ordTableRel
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("tables in ALTER TABLE EXCHANGE PARTITION must have the same type of compress")));
}
if (partTableRel->rd_node.opt != ordTableRel->rd_node.opt) {
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("tables in ALTER TABLE EXCHANGE PARTITION must have the same type of compress")));
}
}
// Description : Check number, type of column
@ -22705,9 +22794,16 @@ static char* GenTemporaryPartitionName(Relation partTableRel, int sequence)
return pstrdup(tmpName);
}
#ifndef ENABLE_MULTIPLE_NODES
static Oid GetNewPartitionOid(Relation pgPartRel, Relation partTableRel, Node *partDef, Oid bucketOid,
bool *isTimestamptz, StorageType stype, Datum new_reloptions)
{
#else
static Oid GetNewPartitionOid(Relation pgPartRel, Relation partTableRel, Node *partDef,
Oid bucketOid, bool *isTimestamptz, StorageType stype)
{
Datum new_reloptions = (Datum)0;
#endif
Oid newPartOid = InvalidOid;
switch (nodeTag(partDef)) {
case T_RangePartitionDefState:
@ -22729,7 +22825,7 @@ static Oid GetNewPartitionOid(Relation pgPartRel, Relation partTableRel, Node *p
bucketOid,
(ListPartitionDefState *)partDef,
partTableRel->rd_rel->relowner,
(Datum)0,
new_reloptions,
isTimestamptz,
stype);
break;
@ -22740,7 +22836,7 @@ static Oid GetNewPartitionOid(Relation pgPartRel, Relation partTableRel, Node *p
bucketOid,
(HashPartitionDefState *)partDef,
partTableRel->rd_rel->relowner,
(Datum)0,
new_reloptions,
isTimestamptz,
stype);
break;
@ -22793,8 +22889,13 @@ static Oid AddTemporaryPartition(Relation partTableRel, Node* partDef)
}
/* Temporary tables do not use segment-page */
#ifndef ENABLE_MULTIPLE_NODES
newPartOid = GetNewPartitionOid(pgPartRel, partTableRel, partDef, bucketOid,
isTimestamptz, RelationGetStorageType(partTableRel), new_reloptions);
#else
newPartOid = GetNewPartitionOid(
pgPartRel, partTableRel, partDef, bucketOid, isTimestamptz, RelationGetStorageType(partTableRel));
#endif
// We must bump the command counter to make the newly-created
// partition tuple visible for opening.
@ -23054,7 +23155,7 @@ static void fastAddPartition(Relation partTableRel, List* destPartDefList, List*
bucketOid = RelationGetBucketOid(partTableRel);
pgPartRel = relation_open(PartitionRelationId, RowExclusiveLock);
#ifndef ENABLE_MULTIPLE_NODES
bool isNull = false;
HeapTuple tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(partTableRel->rd_id));
Datum relOptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions, &isNull);
@ -23062,6 +23163,7 @@ static void fastAddPartition(Relation partTableRel, List* destPartDefList, List*
Datum newRelOptions = transformRelOptions((Datum)0, oldRelOptions, NULL, NULL, false, false);
ReleaseSysCache(tuple);
list_free_ext(oldRelOptions);
#endif
foreach (cell, destPartDefList) {
RangePartitionDefState* partDef = (RangePartitionDefState*)lfirst(cell);
@ -23072,7 +23174,11 @@ static void fastAddPartition(Relation partTableRel, List* destPartDefList, List*
bucketOid,
partDef,
partTableRel->rd_rel->relowner,
#ifndef ENABLE_MULTIPLE_NODES
(Datum)newRelOptions,
#else
(Datum)0,
#endif
isTimestamptz,
RelationGetStorageType(partTableRel),
AccessExclusiveLock);

View File

@ -518,7 +518,7 @@ try_get_buf:
item->bucketNode = buf_desc->tag.rnode.bucketNode;
item->forkNum = buf_desc->tag.forkNum;
item->blockNum = buf_desc->tag.blockNum;
if(IsSegmentFileNode(buf_desc->tag.rnode)) {
if(IsSegmentFileNode(buf_desc->tag.rnode) || buf_desc->tag.rnode.opt != 0) {
*is_new_relfilenode = true;
}
} else {

View File

@ -4470,6 +4470,12 @@ const char* pgstat_get_wait_io(WaitEventIO w)
case WAIT_EVENT_LOGCTRL_SLEEP:
event_name = "LOGCTRL_SLEEP";
break;
case WAIT_EVENT_COMPRESS_ADDRESS_FILE_FLUSH:
event_name = "PCA_FLUSH";
break;
case WAIT_EVENT_COMPRESS_ADDRESS_FILE_SYNC:
event_name = "PCA_SYNC";
break;
/* no default case, so that compiler will warn */
case IO_EVENT_NUM:
break;

View File

@ -2068,6 +2068,8 @@ int PostmasterMain(int argc, char* argv[])
ngroup_info_hash_create();
/*init Role id hash table*/
InitRoleIdHashTable();
/* pcmap */
RealInitialMMapLockArray();
/* init unique sql */
InitUniqueSQL();
/* init hypo index */

View File

@ -115,6 +115,10 @@ static relopt_bool boolRelOpts[] = {
{{ "on_commit_delete_rows", "global temp table on commit options", RELOPT_KIND_HEAP}, true},
{{ "crossbucket", "Enables cross bucket index creation in this index relation", RELOPT_KIND_BTREE}, false },
{{ "enable_tde", "enable table's level transparent data encryption", RELOPT_KIND_HEAP }, false },
{{ "compress_byte_convert", "Whether do byte convert in compression", RELOPT_KIND_HEAP | RELOPT_KIND_BTREE},
false },
{{ "compress_diff_convert", "Whether do diiffer convert in compression", RELOPT_KIND_HEAP | RELOPT_KIND_BTREE},
false },
/* list terminator */
{{NULL}}
};
@ -235,6 +239,16 @@ static relopt_int intRelOpts[] = {
},
-1, 0, 32
},
{{ "compress_level", "Level of page compression.", RELOPT_KIND_HEAP | RELOPT_KIND_BTREE}, 0, -31, 31},
{{ "compresstype", "compress type (none, pglz or zstd).", RELOPT_KIND_HEAP | RELOPT_KIND_BTREE}, 0, 0, 2},
{{ "compress_chunk_size", "Size of chunk to store compressed page.", RELOPT_KIND_HEAP | RELOPT_KIND_BTREE},
BLCKSZ / 2,
BLCKSZ / 16,
BLCKSZ / 2},
{{ "compress_prealloc_chunks", "Number of prealloced chunks for each block.", RELOPT_KIND_HEAP | RELOPT_KIND_BTREE},
0,
0,
7},
/* list terminator */
{{NULL}}
};
@ -1934,6 +1948,18 @@ bytea *default_reloptions(Datum reloptions, bool validate, relopt_kind kind)
{ "cmk_id", RELOPT_TYPE_STRING, offsetof(StdRdOptions, cmk_id)},
{ "encrypt_algo", RELOPT_TYPE_STRING, offsetof(StdRdOptions, encrypt_algo)},
{ "enable_tde", RELOPT_TYPE_BOOL, offsetof(StdRdOptions, enable_tde)},
{ "compresstype", RELOPT_TYPE_INT,
offsetof(StdRdOptions, compress) + offsetof(PageCompressOpts, compressType)},
{ "compress_level", RELOPT_TYPE_INT,
offsetof(StdRdOptions, compress) + offsetof(PageCompressOpts, compressLevel)},
{ "compress_chunk_size", RELOPT_TYPE_INT,
offsetof(StdRdOptions, compress) + offsetof(PageCompressOpts, compressChunkSize)},
{"compress_prealloc_chunks", RELOPT_TYPE_INT,
offsetof(StdRdOptions, compress) + offsetof(PageCompressOpts, compressPreallocChunks)},
{ "compress_byte_convert", RELOPT_TYPE_BOOL,
offsetof(StdRdOptions, compress) + offsetof(PageCompressOpts, compressByteConvert)},
{ "compress_diff_convert", RELOPT_TYPE_BOOL,
offsetof(StdRdOptions, compress) + offsetof(PageCompressOpts, compressDiffConvert)},
};
options = parseRelOptions(reloptions, validate, kind, &numoptions);

View File

@ -177,6 +177,9 @@ bool XLogBlockRefreshRedoBufferInfo(XLogBlockHead *blockhead, RedoBufferInfo *bu
if (bufferinfo->blockinfo.rnode.relNode != XLogBlockHeadGetRelNode(blockhead)) {
return false;
}
if (bufferinfo->blockinfo.rnode.opt != XLogBlockHeadGetCompressOpt(blockhead)) {
return false;
}
if (bufferinfo->blockinfo.forknum != XLogBlockHeadGetForkNum(blockhead)) {
return false;
}
@ -200,6 +203,7 @@ void XLogBlockInitRedoBlockInfo(XLogBlockHead *blockhead, RedoBufferTag *blockin
blockinfo->rnode.dbNode = XLogBlockHeadGetDbNode(blockhead);
blockinfo->rnode.relNode = XLogBlockHeadGetRelNode(blockhead);
blockinfo->rnode.bucketNode = XLogBlockHeadGetBucketId(blockhead);
blockinfo->rnode.opt = XLogBlockHeadGetCompressOpt(blockhead);
blockinfo->forknum = XLogBlockHeadGetForkNum(blockhead);
blockinfo->blkno = XLogBlockHeadGetBlockNum(blockhead);
blockinfo->pblk = XLogBlockHeadGetPhysicalBlock(blockhead);
@ -272,7 +276,7 @@ void XLogRecSetBlockCommonState(XLogReaderState *record, XLogBlockParseEnum bloc
blockparse->blockhead.spcNode = filenode.rnode.node.spcNode;
blockparse->blockhead.dbNode = filenode.rnode.node.dbNode;
blockparse->blockhead.bucketNode = filenode.rnode.node.bucketNode;
blockparse->blockhead.opt = filenode.rnode.node.opt;
blockparse->blockhead.blkno = filenode.segno;
blockparse->blockhead.forknum = filenode.forknumber;
@ -1361,7 +1365,7 @@ void XLogBlockDdlCommonRedo(XLogBlockHead *blockhead, void *blockrecbody, RedoBu
rnode.dbNode = blockhead->dbNode;
rnode.relNode = blockhead->relNode;
rnode.bucketNode = blockhead->bucketNode;
rnode.opt = blockhead->opt;
switch (blockddlrec->blockddltype) {
case BLOCK_DDL_CREATE_RELNODE:
smgr_redo_create(rnode, blockhead->forknum, blockddlrec->mainData);
@ -1430,7 +1434,7 @@ void XLogBlockSegDdlDoRealAction(XLogBlockHead* blockhead, void* blockrecbody, R
rnode.dbNode = blockhead->dbNode;
rnode.relNode = blockhead->relNode;
rnode.bucketNode = blockhead->bucketNode;
rnode.opt = blockhead->opt;
switch (segddlrec->blockddlrec.blockddltype) {
case BLOCK_DDL_TRUNCATE_RELNODE:
xlog_block_segpage_redo_truncate(rnode, blockhead, segddlrec);
@ -1455,7 +1459,7 @@ void XLogBlockDdlDoSmgrAction(XLogBlockHead *blockhead, void *blockrecbody, Redo
rnode.dbNode = blockhead->dbNode;
rnode.relNode = blockhead->relNode;
rnode.bucketNode = blockhead->bucketNode;
rnode.opt = blockhead->opt;
switch (blockddlrec->blockddltype) {
case BLOCK_DDL_CREATE_RELNODE:
smgr_redo_create(rnode, blockhead->forknum, blockddlrec->mainData);

View File

@ -281,7 +281,11 @@ inline void dw_prepare_page(dw_batch_t *batch, uint16 page_num, uint16 page_id,
if (t_thrd.proc->workingVersionNum < DW_SUPPORT_SINGLE_FLUSH_VERSION) {
page_num = page_num | IS_HASH_BKT_SEGPAGE_MASK;
}
batch->buftag_ver = HASHBUCKET_TAG;
if (t_thrd.proc->workingVersionNum < PAGE_COMPRESSION_VERSION) {
batch->buftag_ver = HASHBUCKET_TAG;
} else {
batch->buftag_ver = PAGE_COMPRESS_TAG;
}
} else {
batch->buftag_ver = ORIGIN_TAG;
}
@ -304,7 +308,7 @@ static void dw_prepare_file_head(char *file_head, uint16 start, uint16 dwn)
curr_head->head.page_id = 0;
curr_head->head.dwn = dwn;
curr_head->start = start;
curr_head->buftag_version = HASHBUCKET_TAG;
curr_head->buftag_version = PAGE_COMPRESS_TAG;
curr_head->tail.dwn = dwn;
dw_calc_file_head_checksum(curr_head);
}
@ -430,15 +434,21 @@ static void dw_recover_pages(T1 *batch, T2 *buf_tag, PageHeader data_page, BufTa
for (i = 0; i < GET_REL_PGAENUM(batch->page_num); i++) {
buf_tag = &batch->buf_tag[i];
relnode.dbNode = buf_tag->rnode.dbNode;
relnode.spcNode = buf_tag->rnode.spcNode;
relnode.relNode = buf_tag->rnode.relNode;
if (tag_ver == HASHBUCKET_TAG) {
relnode.dbNode = buf_tag->rnode.dbNode;
relnode.spcNode = buf_tag->rnode.spcNode;
relnode.relNode = buf_tag->rnode.relNode;
relnode.opt = 0;
// 2 bytes are used for bucketNode.
relnode.bucketNode = (int2)((BufferTagSecondVer *)buf_tag)->rnode.bucketNode;
} else if (tag_ver == PAGE_COMPRESS_TAG) {
relnode.opt = ((BufferTag *)buf_tag)->rnode.opt;
relnode.bucketNode = ((BufferTag *)buf_tag)->rnode.bucketNode;
} else {
relnode.dbNode = buf_tag->rnode.dbNode;
relnode.spcNode = buf_tag->rnode.spcNode;
relnode.relNode = buf_tag->rnode.relNode;
relnode.opt = 0;
relnode.bucketNode = InvalidBktId;
}
relation = smgropen(relnode, InvalidBackendId, GetColumnNum(buf_tag->forkNum));
@ -757,7 +767,10 @@ static void dw_recover_partial_write(knl_g_dw_context *cxt)
}
if (t_thrd.proc->workingVersionNum < DW_SUPPORT_SINGLE_FLUSH_VERSION) {
bool is_hashbucket = ((curr_head->page_num & IS_HASH_BKT_SEGPAGE_MASK) != 0);
curr_head->buftag_ver = is_hashbucket ? HASHBUCKET_TAG : ORIGIN_TAG;
curr_head->buftag_ver = is_hashbucket ?
(t_thrd.proc->workingVersionNum < PAGE_COMPRESSION_VERSION ? HASHBUCKET_TAG
: PAGE_COMPRESS_TAG)
: ORIGIN_TAG;
}
remain_pages = read_asst.buf_end - read_asst.buf_start;
@ -1988,9 +2001,9 @@ int buftag_compare(const void *pa, const void *pb)
static inline void dw_log_recovery_page(int elevel, const char *state, BufferTag buf_tag)
{
ereport(elevel, (errmodule(MOD_DW),
errmsg("[single flush] recovery, %s: buf_tag[rel %u/%u/%u blk %u fork %d]",
errmsg("[single flush] recovery, %s: buf_tag[rel %u/%u/%u blk %u fork %d], compress: %u",
state, buf_tag.rnode.spcNode, buf_tag.rnode.dbNode, buf_tag.rnode.relNode, buf_tag.blockNum,
buf_tag.forkNum)));
buf_tag.forkNum, buf_tag.rnode.opt)));
}
void dw_recovery_page_single(const dw_single_flush_item *item, uint16 item_num)

View File

@ -53,6 +53,7 @@ static inline void PRXLogRecGetBlockTag(XLogRecParseState *recordBlockState, Rel
rnode->relNode = blockparse->blockhead.relNode;
rnode->spcNode = blockparse->blockhead.spcNode;
rnode->bucketNode = blockparse->blockhead.bucketNode;
rnode->opt = blockparse->blockhead.opt;
}
if (blknum != NULL) {
*blknum = blockparse->blockhead.blkno;
@ -223,7 +224,7 @@ void PRTrackRelStorageDrop(XLogRecParseState *recordBlockState, HTAB *redoItemHa
rNode.dbNode = blockparse->blockhead.dbNode;
rNode.relNode = blockparse->blockhead.relNode;
rNode.bucketNode = blockparse->blockhead.bucketNode;
rNode.opt = blockparse->blockhead.opt;
#ifdef USE_ASSERT_CHECKING
ereport(LOG, (errmsg("PRTrackRelTruncate:(%X/%X)clear relation %u/%u/%u forknum %u record",
(uint32)(blockparse->blockhead.end_ptr >> 32), (uint32)(blockparse->blockhead.end_ptr),

View File

@ -1364,6 +1364,7 @@ void RedoPageWorkerRedoBcmBlock(XLogRecParseState *procState)
node.dbNode = procState->blockparse.blockhead.dbNode;
node.relNode = procState->blockparse.blockhead.relNode;
node.bucketNode = procState->blockparse.blockhead.bucketNode;
node.opt = procState->blockparse.blockhead.opt;
XLogBlockNewCuParse *newCuParse = &(procState->blockparse.extra_rec.blocknewcu);
uint8 info = XLogBlockHeadGetInfo(&procState->blockparse.blockhead) & ~XLR_INFO_MASK;
switch (info & XLOG_HEAP_OPMASK) {

View File

@ -479,7 +479,8 @@ XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, bool isupgrade, int bucket_id, bo
* The caller can set rmgr bits and XLR_SPECIAL_REL_UPDATE; the rest are
* reserved for use by me.
*/
if ((info & ~(XLR_RMGR_INFO_MASK | XLR_SPECIAL_REL_UPDATE | XLR_BTREE_UPGRADE_FLAG | XLR_IS_TOAST)) != 0) {
if ((info & ~(XLR_RMGR_INFO_MASK | XLR_SPECIAL_REL_UPDATE |
XLR_BTREE_UPGRADE_FLAG | XLR_REL_COMPRESS | XLR_IS_TOAST)) != 0) {
ereport(PANIC, (errmsg("invalid xlog info mask %hhx", info)));
}
@ -682,6 +683,12 @@ static XLogRecData *XLogRecordAssemble(RmgrId rmid, uint8 info, XLogFPWInfo fpw_
bool samerel = false;
bool tde = false;
// must be uncompressed table during upgrade
bool isCompressedTable = regbuf->rnode.opt != 0;
if (t_thrd.proc->workingVersionNum < PAGE_COMPRESSION_VERSION) {
Assert(!isCompressedTable);
}
if (!regbuf->in_use)
continue;
@ -829,7 +836,7 @@ static XLogRecData *XLogRecordAssemble(RmgrId rmid, uint8 info, XLogFPWInfo fpw_
samerel = false;
prev_regbuf = regbuf;
if (!samerel && IsSegmentFileNode(regbuf->rnode)) {
if (!samerel && (IsSegmentFileNode(regbuf->rnode) || isCompressedTable)) {
Assert(bkpb.id <= XLR_MAX_BLOCK_ID);
bkpb.id += BKID_HAS_BUCKET_OR_SEGPAGE;
}
@ -845,9 +852,20 @@ static XLogRecData *XLogRecordAssemble(RmgrId rmid, uint8 info, XLogFPWInfo fpw_
}
if (!samerel) {
if (IsSegmentFileNode(regbuf->rnode)) {
XLOG_ASSEMBLE_ONE_ITEM(scratch, sizeof(RelFileNode), &regbuf->rnode, remained_size);
hashbucket_flag = true;
if (IsSegmentFileNode(regbuf->rnode) || isCompressedTable) {
if (IsSegmentFileNode(regbuf->rnode)) {
XLOG_ASSEMBLE_ONE_ITEM(scratch, sizeof(RelFileNode), &regbuf->rnode, remained_size);
hashbucket_flag = true;
}
if (t_thrd.proc->workingVersionNum < PAGE_COMPRESSION_VERSION) {
Assert(!isCompressedTable);
RelFileNodeV2 relFileNodeV2;
RelFileNodeV2Copy(relFileNodeV2, regbuf->rnode);
XLOG_ASSEMBLE_ONE_ITEM(scratch, sizeof(RelFileNodeV2), &regbuf->rnode, remained_size);
} else {
info |= XLR_REL_COMPRESS;
XLOG_ASSEMBLE_ONE_ITEM(scratch, sizeof(RelFileNode), &regbuf->rnode, remained_size);
}
} else {
XLOG_ASSEMBLE_ONE_ITEM(scratch, sizeof(RelFileNodeOld), &regbuf->rnode, remained_size);
no_hashbucket_flag = true;

View File

@ -1214,6 +1214,18 @@ void ResetDecoder(XLogReaderState *state)
remaining -= sizeof(type); \
} while (0)
/**
* happens during the upgrade, copy the RelFileNodeV2 to RelFileNode
* support little-endian system
* @param relfileNode relfileNode
*/
static void CompressTableRecord(RelFileNode* relfileNode)
{
if (relfileNode->bucketNode <= -1 && relfileNode->opt == 0xFFFF) {
relfileNode->opt = 0;
}
}
/*
* Decode the previously read record.
*
@ -1333,8 +1345,11 @@ bool DecodeXLogRecord(XLogReaderState *state, XLogRecord *record, char **errorms
if (remaining < filenodelen)
goto shortdata_err;
blk->rnode.bucketNode = InvalidBktId;
blk->rnode.opt = 0;
errno_t rc = memcpy_s(&blk->rnode, filenodelen, ptr, filenodelen);
securec_check(rc, "\0", "\0");
/* support decode old version of relfileNode */
CompressTableRecord(&blk->rnode);
ptr += filenodelen;
remaining -= filenodelen;

View File

@ -1139,7 +1139,7 @@ void XLogDropBktRowRelation(XLogRecParseState *redoblockstate)
rnode.spcNode = redoblockstate->blockparse.blockhead.spcNode;
rnode.dbNode = redoblockstate->blockparse.blockhead.dbNode;
rnode.relNode = redoblockstate->blockparse.blockhead.relNode;
rnode.opt = redoblockstate->blockparse.blockhead.opt;
uint32 *bktmap = (uint32 *)redoblockstate->blockparse.extra_rec.blockddlrec.mainData;
for (uint32 bktNode = 0; bktNode < MAX_BUCKETMAPLEN; bktNode++) {
if (!GET_BKT_MAP_BIT(bktmap, bktNode)) {
@ -1163,6 +1163,7 @@ void XLogForgetDDLRedo(XLogRecParseState *redoblockstate)
relNode.dbNode = redoblockstate->blockparse.blockhead.dbNode;
relNode.relNode = redoblockstate->blockparse.blockhead.relNode;
relNode.bucketNode = redoblockstate->blockparse.blockhead.bucketNode;
relNode.opt = redoblockstate->blockparse.blockhead.opt;
XlogDropRowReation(relNode);
}
} else if (ddlrecparse->blockddltype == BLOCK_DDL_TRUNCATE_RELNODE) {
@ -1171,6 +1172,7 @@ void XLogForgetDDLRedo(XLogRecParseState *redoblockstate)
relNode.dbNode = redoblockstate->blockparse.blockhead.dbNode;
relNode.relNode = redoblockstate->blockparse.blockhead.relNode;
relNode.bucketNode = redoblockstate->blockparse.blockhead.bucketNode;
relNode.opt = redoblockstate->blockparse.blockhead.opt;
XLogTruncateRelation(relNode, redoblockstate->blockparse.blockhead.forknum,
redoblockstate->blockparse.blockhead.blkno);
}
@ -1182,7 +1184,8 @@ void XLogDropSpaceShrink(XLogRecParseState *redoblockstate)
.spcNode = redoblockstate->blockparse.blockhead.spcNode,
.dbNode = redoblockstate->blockparse.blockhead.dbNode,
.relNode = redoblockstate->blockparse.blockhead.relNode,
.bucketNode = redoblockstate->blockparse.blockhead.bucketNode
.bucketNode = redoblockstate->blockparse.blockhead.bucketNode,
.opt = redoblockstate->blockparse.blockhead.opt
};
ForkNumber forknum = redoblockstate->blockparse.blockhead.forknum;
BlockNumber target_size = redoblockstate->blockparse.blockhead.blkno;

View File

@ -1389,6 +1389,7 @@ static void UHeapXlogUpdateBlock(XLogBlockHead *blockhead, XLogBlockDataParse *b
rnode.dbNode = blockhead->dbNode;
rnode.relNode = blockhead->relNode;
rnode.bucketNode = blockhead->bucketNode;
rnode.opt = blockhead->opt;
XLogRecordPageWithFreeSpace(rnode, bufferinfo->blockinfo.blkno, freespace);
}
} else {

View File

@ -6026,8 +6026,11 @@ void shared_buffer_write_error_callback(void *arg)
/* Buffer is pinned, so we can read the tag without locking the spinlock */
if (buf_desc != NULL) {
char *path = relpathperm(((BufferDesc *)buf_desc)->tag.rnode, ((BufferDesc *)buf_desc)->tag.forkNum);
(void)errcontext("writing block %u of relation %s", buf_desc->tag.blockNum, path);
if (((BufferDesc *)buf_desc)->tag.rnode.opt) {
(void)errcontext("writing block %u of relation %s_pcd", buf_desc->tag.blockNum, path);
} else {
(void)errcontext("writing block %u of relation %s", buf_desc->tag.blockNum, path);
}
pfree(path);
}
}
@ -6382,7 +6385,7 @@ retry:
PROFILING_REMOTE_START();
int ret_code = RemoteGetPage(remote_address, rnode.node.spcNode, rnode.node.dbNode, rnode.node.relNode,
rnode.node.bucketNode, fork_num, block_num, BLCKSZ, cur_lsn, buf);
rnode.node.bucketNode, rnode.node.opt, fork_num, block_num, BLCKSZ, cur_lsn, buf);
PROFILING_REMOTE_END_READ(BLCKSZ, (ret_code == REMOTE_READ_OK));

View File

@ -191,6 +191,16 @@ static pthread_mutex_t VFDLockArray[NUM_VFD_PARTITIONS];
#define VFDMappingPartitionLock(hashcode) \
(&VFDLockArray[VFDTableHashPartition(hashcode)])
/*
* pc_munmap
*/
#define SAFE_MUNMAP(vfdP) \
do { \
if ((vfdP)->with_pcmap && (vfdP)->pcmap != NULL) { \
UnReferenceAddrFile((vfdP)); \
(vfdP)->pcmap = NULL; \
} \
} while (0)
/* --------------------
*
* Private Routines
@ -344,11 +354,13 @@ RelFileNodeForkNum RelFileNodeForkNumFill(RelFileNode* rnode,
filenode.rnode.node.spcNode = rnode->spcNode;
filenode.rnode.node.dbNode = rnode->dbNode;
filenode.rnode.node.bucketNode = rnode->bucketNode;
filenode.rnode.node.opt = rnode->opt;
} else {
filenode.rnode.node.relNode = InvalidOid;
filenode.rnode.node.spcNode = InvalidOid;
filenode.rnode.node.dbNode = InvalidOid;
filenode.rnode.node.bucketNode = InvalidBktId;
filenode.rnode.node.opt = 0;
}
filenode.rnode.backend = backend;
@ -898,6 +910,7 @@ static void LruDelete(File file)
vfdP = &u_sess->storage_cxt.VfdCache[file];
SAFE_MUNMAP(vfdP);
/* delete the vfd record from the LRU ring */
Delete(file);
@ -1669,6 +1682,8 @@ void FileCloseWithThief(File file)
{
Vfd* vfdP = &u_sess->storage_cxt.VfdCache[file];
if (!FileIsNotOpen(file)) {
SAFE_MUNMAP(vfdP);
/* remove the file from the lru ring */
Delete(file);
/* the thief has close the real fd */
@ -1807,6 +1822,8 @@ void FileClose(File file)
vfdP = &u_sess->storage_cxt.VfdCache[file];
if (!FileIsNotOpen(file)) {
SAFE_MUNMAP(vfdP);
/* remove the file from the lru ring */
Delete(file);
@ -3917,3 +3934,71 @@ static void UnlinkIfExistsFname(const char *fname, bool isdir, int elevel)
}
}
/*
* initialize page compress memory map.
*
*/
void SetupPageCompressMemoryMap(File file, RelFileNode node, const RelFileNodeForkNum& relFileNodeForkNum)
{
Vfd *vfdP = &u_sess->storage_cxt.VfdCache[file];
auto chunk_size = CHUNK_SIZE_LIST[GET_COMPRESS_CHUNK_SIZE(node.opt)];
int returnCode = FileAccess(file);
if (returnCode < 0) {
ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_RESOURCES), errmsg("Failed to open file %s: %m", vfdP->fileName)));
}
RelFileNodeForkNum newOne(relFileNodeForkNum);
newOne.forknumber = PCA_FORKNUM;
PageCompressHeader *map = GetPageCompressHeader(vfdP, chunk_size, newOne);
if (map == (void *) (-1)) {
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_RESOURCES), errmsg("Failed to mmap page compression address file %s: %m",
vfdP->fileName)));
}
if (map->chunk_size == 0 && map->algorithm == 0) {
map->chunk_size = chunk_size;
map->algorithm = GET_COMPRESS_ALGORITHM(node.opt);
if (pc_msync(map) != 0) {
ereport(data_sync_elevel(ERROR),
(errcode_for_file_access(), errmsg("could not msync file \"%s\": %m", vfdP->fileName)));
}
}
if (RecoveryInProgress()) {
CheckAndRepairCompressAddress(map, chunk_size, map->algorithm, vfdP->fileName);
}
vfdP->with_pcmap = true;
vfdP->pcmap = map;
}
/*
* Return the page compress memory map.
*
*/
PageCompressHeader *GetPageCompressMemoryMap(File file, uint32 chunk_size)
{
int returnCode;
Vfd *vfdP = &u_sess->storage_cxt.VfdCache[file];
PageCompressHeader *map = NULL;
Assert(FileIsValid(file));
returnCode = FileAccess(file);
if (returnCode < 0) {
ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_RESOURCES), errmsg("Failed to open file %s: %m", vfdP->fileName)));
}
Assert(vfdP->with_pcmap);
if (vfdP->pcmap == NULL) {
map = GetPageCompressHeader(vfdP, chunk_size, vfdP->fileNode);
if (map == MAP_FAILED) {
ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_RESOURCES), errmsg(
"Failed to mmap page compression address file %s: %m", vfdP->fileName)));
}
vfdP->with_pcmap = true;
vfdP->pcmap = map;
}
return vfdP->pcmap;
}

View File

@ -115,7 +115,7 @@ Datum gs_read_block_from_remote(PG_FUNCTION_ARGS)
/* if request to read CU block, we use forkNum column to replace colid. */
(void)StandbyReadCUforPrimary(spcNode, dbNode, relNode, forkNum, blockNum, blockSize, lsn, &result);
} else {
(void)StandbyReadPageforPrimary(spcNode, dbNode, relNode, bucketNode, forkNum, blockNum, blockSize,
(void)StandbyReadPageforPrimary(spcNode, dbNode, relNode, bucketNode, 0, forkNum, blockNum, blockSize,
lsn, &result);
}
@ -126,6 +126,53 @@ Datum gs_read_block_from_remote(PG_FUNCTION_ARGS)
}
}
/*
* Read block from buffer from primary, returning it as bytea
*/
Datum gs_read_block_from_remote_compress(PG_FUNCTION_ARGS)
{
uint32 spcNode;
uint32 dbNode;
uint32 relNode;
int16 bucketNode;
uint16 opt = 0;
int32 forkNum;
uint64 blockNum;
uint32 blockSize;
uint64 lsn;
bool isForCU = false;
bytea* result = NULL;
if (GetUserId() != BOOTSTRAP_SUPERUSERID) {
ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be initial account to read files"))));
}
/* handle optional arguments */
spcNode = PG_GETARG_UINT32(0);
dbNode = PG_GETARG_UINT32(1);
relNode = PG_GETARG_UINT32(2);
bucketNode = PG_GETARG_INT16(3);
opt = PG_GETARG_UINT16(4);
forkNum = PG_GETARG_INT32(5);
blockNum = (uint64)PG_GETARG_TRANSACTIONID(6);
blockSize = PG_GETARG_UINT32(7);
lsn = (uint64)PG_GETARG_TRANSACTIONID(8);
isForCU = PG_GETARG_BOOL(9);
/* get block from local buffer */
if (isForCU) {
/* if request to read CU block, we use forkNum column to replace colid. */
(void)StandbyReadCUforPrimary(spcNode, dbNode, relNode, forkNum, blockNum, blockSize, lsn, &result);
} else {
(void)StandbyReadPageforPrimary(spcNode, dbNode, relNode, bucketNode, opt, forkNum, blockNum, blockSize,
lsn, &result);
}
if (NULL != result) {
PG_RETURN_BYTEA_P(result);
} else {
PG_RETURN_NULL();
}
}
/*
* @Description: read cu for primary
* @IN spcnode: tablespace id
@ -203,7 +250,7 @@ int StandbyReadCUforPrimary(uint32 spcnode, uint32 dbnode, uint32 relnode, int32
* @Return: remote read error code
* @See also:
*/
int StandbyReadPageforPrimary(uint32 spcnode, uint32 dbnode, uint32 relnode, int16 bucketnode, int32 forknum,
int StandbyReadPageforPrimary(uint32 spcnode, uint32 dbnode, uint32 relnode, int16 bucketnode, uint2 opt, int32 forknum,
uint32 blocknum, uint32 blocksize, uint64 lsn, bytea** pagedata)
{
Assert(pagedata);
@ -220,7 +267,7 @@ int StandbyReadPageforPrimary(uint32 spcnode, uint32 dbnode, uint32 relnode, int
return ret_code;
}
RelFileNode relfilenode {spcnode, dbnode, relnode, bucketnode};
RelFileNode relfilenode {spcnode, dbnode, relnode, bucketnode, opt};
{
bytea* pageData = (bytea*)palloc(BLCKSZ + VARHDRSZ);

View File

@ -33,6 +33,7 @@
#include "access/xlog.h"
#include "storage/smgr/fd.h"
#include "storage/ipc.h"
#include "storage/page_compression.h"
#include "storage/pmsignal.h"
#include "storage/checksum.h"
#ifdef ENABLE_MOT
@ -112,6 +113,9 @@ static void send_xlog_header(const char *linkpath);
static void save_xlogloc(const char *xloglocation);
static XLogRecPtr GetMinArchiveSlotLSN(void);
/* compressed Function */
static void SendCompressedFile(char* readFileName, int basePathLen, struct stat& statbuf, bool missingOk, int64* size);
/*
* save xlog location
*/
@ -1101,6 +1105,35 @@ static bool IsDCFPath(const char *pathname)
return false;
}
#define SEND_DIR_ADD_SIZE(size, statbuf) ((size) = (size) + (((statbuf).st_size + 511) & ~511) + BUILD_PATH_LEN)
/**
* send file or compressed file
* @param sizeOnly send or not
* @param pathbuf path
* @param pathBufLen pathLen
* @param basepathlen subfix of path
* @param statbuf path stat
*/
static void SendRealFile(bool sizeOnly, char* pathbuf, size_t pathBufLen, int basepathlen, struct stat* statbuf)
{
int64 size = 0;
// we must ensure the page integrity when in IncrementalCheckpoint
if (!sizeOnly && g_instance.attr.attr_storage.enableIncrementalCheckpoint &&
IsCompressedFile(pathbuf, strlen(pathbuf)) != COMPRESSED_TYPE_UNKNOWN) {
SendCompressedFile(pathbuf, basepathlen, (*statbuf), true, &size);
} else {
bool sent = false;
if (!sizeOnly) {
sent = sendFile(pathbuf, pathbuf + basepathlen + 1, statbuf, true);
}
if (sent || sizeOnly) {
/* Add size, rounded up to 512byte block */
SEND_DIR_ADD_SIZE(size, (*statbuf));
}
}
}
/*
* Include all files from the given directory in the output tar stream. If
* 'sizeonly' is true, we just calculate a total length and return it, without
@ -1393,15 +1426,7 @@ static int64 sendDir(const char *path, int basepathlen, bool sizeonly, List *tab
if (!skip_this_dir)
size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, sendtblspclinks);
} else if (S_ISREG(statbuf.st_mode)) {
bool sent = false;
if (!sizeonly)
sent = sendFile(pathbuf, pathbuf + basepathlen + 1, &statbuf, true);
if (sent || sizeonly) {
/* Add size, rounded up to 512byte block */
size = size + ((statbuf.st_size + 511) & ~511) + BUILD_PATH_LEN;
}
SendRealFile(sizeonly, pathbuf, strlen(pathbuf), basepathlen, &statbuf);
} else
ereport(WARNING, (errmsg("skipping special file \"%s\"", pathbuf)));
}
@ -1528,6 +1553,15 @@ bool is_row_data_file(const char *path, int *segNo, UndoFileType *undoFileType)
int nmatch;
char *fname = NULL;
/* Skip compressed page files */
size_t pathLen = strlen(path);
if (pathLen >= 4) {
const char* suffix = path + pathLen - 4;
if (strncmp(suffix, "_pca", 4) == 0 || strncmp(suffix, "_pcd", 4) == 0) {
return false;
}
}
if ((fname = strstr((char *)path, "pg_tblspc/")) != NULL) {
nmatch = sscanf_s(fname, "pg_tblspc/%u/%*[^/]/%u/%s", &spcNode, &dbNode, buf, sizeof(buf));
if (nmatch == 3) {
@ -1645,6 +1679,245 @@ static void SendTableSpaceForBackup(basebackup_options* opt, List* tablespaces,
}
}
/**
* init buf_block if not yet; repalloc PqSendBuffer if necessary
*/
static void SendFilePreInit(void)
{
if (t_thrd.basebackup_cxt.buf_block == NULL) {
MemoryContext oldcxt = MemoryContextSwitchTo(THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE));
t_thrd.basebackup_cxt.buf_block = (char *)palloc0(TAR_SEND_SIZE);
MemoryContextSwitchTo(oldcxt);
}
/*
* repalloc to `MaxBuildAllocSize' in one time, to avoid many small step repalloc in `pq_putmessage_noblock'
* and low performance.
*/
if (INT2SIZET(t_thrd.libpq_cxt.PqSendBufferSize) < MaxBuildAllocSize) {
t_thrd.libpq_cxt.PqSendBuffer = (char *)repalloc(t_thrd.libpq_cxt.PqSendBuffer, MaxBuildAllocSize);
t_thrd.libpq_cxt.PqSendBufferSize = MaxBuildAllocSize;
}
}
/**
* check file
* @param readFileName
* @param statbuf
* @param supress error if missingOk is false when file is not found
* @return return null if file.size > MAX_TAR_MEMBER_FILELEN or file cant found
*/
static FILE *SizeCheckAndAllocate(char *readFileName, const struct stat &statbuf, bool missingOk)
{
/*
* Some compilers will throw a warning knowing this test can never be true
* because pgoff_t can't exceed the compared maximum on their platform.
*/
if (statbuf.st_size > MAX_TAR_MEMBER_FILELEN) {
ereport(WARNING, (errcode(ERRCODE_NAME_TOO_LONG),
errmsg("archive member \"%s\" too large for tar format", readFileName)));
return NULL;
}
FILE *fp = AllocateFile(readFileName, "rb");
if (fp == NULL) {
if (errno == ENOENT && missingOk)
return NULL;
ereport(ERROR, (errcode_for_file_access(), errmsg("could not open file \"%s\": %m", readFileName)));
}
return fp;
}
static void TransferPcaFile(const char *readFileName, int basePathLen, const struct stat &statbuf,
PageCompressHeader *transfer,
size_t len)
{
const char *tarfilename = readFileName + basePathLen + 1;
_tarWriteHeader(tarfilename, NULL, (struct stat*)(&statbuf));
char *data = (char *) transfer;
size_t lenBuffer = len;
while (lenBuffer > 0) {
size_t transferLen = Min(TAR_SEND_SIZE, lenBuffer);
if (pq_putmessage_noblock('d', data, transferLen)) {
ereport(ERROR, (errcode_for_file_access(), errmsg("base backup could not send data, aborting backup")));
}
data = data + transferLen;
lenBuffer -= transferLen;
}
size_t pad = ((len + 511) & ~511) - len;
if (pad > 0) {
securec_check(memset_s(t_thrd.basebackup_cxt.buf_block, pad, 0, pad), "", "");
(void) pq_putmessage_noblock('d', t_thrd.basebackup_cxt.buf_block, pad);
}
}
static void FileStat(char* path, struct stat* fileStat)
{
if (stat(path, fileStat) != 0) {
if (errno != ENOENT) {
ereport(ERROR, (errcode_for_file_access(), errmsg("could not stat file or directory \"%s\": %m", path)));
}
}
}
static void SendCompressedFile(char* readFileName, int basePathLen, struct stat& statbuf, bool missingOk, int64* size)
{
char* tarfilename = readFileName + basePathLen + 1;
SendFilePreInit();
FILE* fp = SizeCheckAndAllocate(readFileName, statbuf, missingOk);
if (fp == NULL) {
return;
}
size_t readFileNameLen = strlen(readFileName);
/* dont send pca file */
if (readFileNameLen < 4 || strncmp(readFileName + readFileNameLen - 4, "_pca", 4) == 0 ||
strncmp(readFileName + readFileNameLen - 4, "_pcd", 4) != 0) {
FreeFile(fp);
return;
}
char tablePath[MAXPGPATH] = {0};
securec_check_c(memcpy_s(tablePath, MAXPGPATH, readFileName, readFileNameLen - 4), "", "");
int segmentNo = 0;
UndoFileType undoFileType = UNDO_INVALID;
if (!is_row_data_file(tablePath, &segmentNo, &undoFileType)) {
ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_RESOURCES), errmsg("%s is not a relation file.", tablePath)));
}
char pcaFilePath[MAXPGPATH];
securec_check_c(memcpy_s(pcaFilePath, MAXPGPATH, readFileName, readFileNameLen), "", "");
pcaFilePath[readFileNameLen - 1] = 'a';
FILE* pcaFile = AllocateFile(pcaFilePath, "rb");
if (pcaFile == NULL) {
if (errno == ENOENT && missingOk) {
FreeFile(fp);
return;
}
ereport(ERROR, (errcode_for_file_access(), errmsg("could not open file \"%s\": %m", pcaFilePath)));
}
uint16 chunkSize = ReadChunkSize(pcaFile, pcaFilePath, MAXPGPATH);
struct stat pcaStruct;
FileStat((char*)pcaFilePath, &pcaStruct);
size_t pcaFileLen = SIZE_OF_PAGE_COMPRESS_ADDR_FILE(chunkSize);
PageCompressHeader* map = pc_mmap_real_size(fileno(pcaFile), pcaFileLen, true);
if (map == MAP_FAILED) {
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
errmsg("Failed to mmap page compression address file %s: %m", pcaFilePath)));
}
PageCompressHeader* transfer = (PageCompressHeader*)palloc0(pcaFileLen);
/* decompressed page buffer, avoid frequent allocation */
BlockNumber blockNum = 0;
size_t chunkIndex = 1;
off_t totalLen = 0;
off_t sendLen = 0;
/* send the pkg header containing msg like file size */
BlockNumber totalBlockNum = (BlockNumber)pg_atomic_read_u32(&map->nblocks);
/* some chunks may have been allocated but not used.
* Reserve 0 chunks for avoiding the error when the size of a compressed block extends */
auto reservedChunks = 0;
securec_check(memcpy_s(transfer, pcaFileLen, map, pcaFileLen), "", "");
decltype(statbuf.st_size) realSize = (map->allocated_chunks + reservedChunks) * chunkSize;
statbuf.st_size = statbuf.st_size >= realSize ? statbuf.st_size : realSize;
_tarWriteHeader(tarfilename, NULL, (struct stat*)(&statbuf));
bool* onlyExtend = (bool*)palloc0(totalBlockNum * sizeof(bool));
/* allocated in advance to prevent repeated allocated */
char pageBuffer[BLCKSZ];
ReadBlockChunksStruct rbStruct{map, pageBuffer, BLCKSZ, fp, segmentNo, readFileName};
for (blockNum = 0; blockNum < totalBlockNum; blockNum++) {
PageCompressAddr* addr = GET_PAGE_COMPRESS_ADDR(transfer, chunkSize, blockNum);
/* skip some blocks which only extends. The size of blocks is 0. */
if (addr->nchunks == 0) {
onlyExtend[blockNum] = true;
continue;
}
/* read block to t_thrd.basebackup_cxt.buf_block */
size_t bufferSize = TAR_SEND_SIZE - sendLen;
size_t len = ReadAllChunkOfBlock(t_thrd.basebackup_cxt.buf_block + sendLen, bufferSize, blockNum, rbStruct);
/* merge Blocks */
sendLen += len;
if (totalLen + (off_t)len > statbuf.st_size) {
ReleaseMap(map, readFileName);
ereport(ERROR,
(errcode_for_file_access(),
errmsg("some blocks in %s had been changed. Retry backup please. PostBlocks:%u, currentReadBlocks "
":%u, transferSize: %lu. totalLen: %lu, len: %lu",
readFileName,
totalBlockNum,
blockNum,
statbuf.st_size,
totalLen,
len)));
}
if (sendLen > TAR_SEND_SIZE - BLCKSZ) {
if (pq_putmessage_noblock('d', t_thrd.basebackup_cxt.buf_block, sendLen)) {
ReleaseMap(map, readFileName);
ereport(ERROR, (errcode_for_file_access(), errmsg("base backup could not send data, aborting backup")));
}
sendLen = 0;
}
uint8 nchunks = len / chunkSize;
addr->nchunks = addr->allocated_chunks = nchunks;
for (size_t i = 0; i < nchunks; i++) {
addr->chunknos[i] = chunkIndex++;
}
addr->checksum = AddrChecksum32(blockNum, addr);
totalLen += len;
}
ReleaseMap(map, readFileName);
if (sendLen != 0) {
if (pq_putmessage_noblock('d', t_thrd.basebackup_cxt.buf_block, sendLen)) {
ereport(ERROR, (errcode_for_file_access(), errmsg("base backup could not send data, aborting backup")));
}
}
/* If the file was truncated while we were sending it, pad it with zeros */
if (totalLen < statbuf.st_size) {
securec_check(memset_s(t_thrd.basebackup_cxt.buf_block, TAR_SEND_SIZE, 0, TAR_SEND_SIZE), "", "");
while (totalLen < statbuf.st_size) {
size_t cnt = Min(TAR_SEND_SIZE, statbuf.st_size - totalLen);
(void)pq_putmessage_noblock('d', t_thrd.basebackup_cxt.buf_block, cnt);
totalLen += cnt;
}
}
size_t pad = ((totalLen + 511) & ~511) - totalLen;
if (pad > 0) {
securec_check(memset_s(t_thrd.basebackup_cxt.buf_block, pad, 0, pad), "", "");
(void)pq_putmessage_noblock('d', t_thrd.basebackup_cxt.buf_block, pad);
}
SEND_DIR_ADD_SIZE(*size, statbuf);
// allocate chunks of some pages which only extend
for (size_t blockNum = 0; blockNum < totalBlockNum; ++blockNum) {
if (onlyExtend[blockNum]) {
PageCompressAddr* addr = GET_PAGE_COMPRESS_ADDR(transfer, chunkSize, blockNum);
for (size_t i = 0; i < addr->allocated_chunks; i++) {
addr->chunknos[i] = chunkIndex++;
}
}
}
transfer->nblocks = transfer->last_synced_nblocks = blockNum;
transfer->last_synced_allocated_chunks = transfer->allocated_chunks = chunkIndex;
TransferPcaFile(pcaFilePath, basePathLen, pcaStruct, transfer, pcaFileLen);
SEND_DIR_ADD_SIZE(*size, pcaStruct);
FreeFile(pcaFile);
FreeFile(fp);
pfree(transfer);
pfree(onlyExtend);
}
/*
* Given the member, write the TAR header & send the file.
*
@ -1669,38 +1942,10 @@ static bool sendFile(char *readfilename, char *tarfilename, struct stat *statbuf
int retryCnt = 0;
UndoFileType undoFileType = UNDO_INVALID;
if (t_thrd.basebackup_cxt.buf_block == NULL) {
MemoryContext oldcxt = NULL;
oldcxt = MemoryContextSwitchTo(THREAD_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_STORAGE));
t_thrd.basebackup_cxt.buf_block = (char *)palloc0(TAR_SEND_SIZE);
MemoryContextSwitchTo(oldcxt);
}
/*
* repalloc to `MaxBuildAllocSize' in one time, to avoid many small step repalloc in `pq_putmessage_noblock'
* and low performance.
*/
if (INT2SIZET(t_thrd.libpq_cxt.PqSendBufferSize) < MaxBuildAllocSize) {
t_thrd.libpq_cxt.PqSendBuffer = (char *)repalloc(t_thrd.libpq_cxt.PqSendBuffer, MaxBuildAllocSize);
t_thrd.libpq_cxt.PqSendBufferSize = MaxBuildAllocSize;
}
/*
* Some compilers will throw a warning knowing this test can never be true
* because pgoff_t can't exceed the compared maximum on their platform.
*/
if (statbuf->st_size > MAX_TAR_MEMBER_FILELEN) {
ereport(WARNING, (errcode(ERRCODE_NAME_TOO_LONG),
errmsg("archive member \"%s\" too large for tar format", tarfilename)));
return false;
}
fp = AllocateFile(readfilename, "rb");
SendFilePreInit();
fp = SizeCheckAndAllocate(readfilename, *statbuf, missing_ok);
if (fp == NULL) {
if (errno == ENOENT && missing_ok)
return false;
ereport(ERROR, (errcode_for_file_access(), errmsg("could not open file \"%s\": %m", readfilename)));
return false;
}
isNeedCheck = is_row_data_file(readfilename, &segNo, &undoFileType);

View File

@ -9,7 +9,7 @@ ifneq "$(MAKECMDGOALS)" "clean"
endif
endif
endif
OBJS = md.o smgr.o smgrtype.o knl_uundofile.o segstore.o
OBJS = md.o smgr.o smgrtype.o knl_uundofile.o segstore.o page_compression.o mmap_shared.o
SUBDIRS = segment

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,146 @@
/*
* Copyright (c) 2021 Huawei Technologies Co.,Ltd.
*
* openGauss is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
*
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PSL v2 for more details.
* ---------------------------------------------------------------------------------------
*
*
*
* IDENTIFICATION
* src/gausskernel/storage/smgr/mmap_shared.cpp
*
* ---------------------------------------------------------------------------------------
*/
#include "postgres.h"
#include "miscadmin.h"
#include "catalog/pg_type.h"
#include "utils/datum.h"
#include "utils/relcache.h"
#include "utils/memutils.h"
#include "utils/memprot.h"
#include "storage/page_compression.h"
#include "executor/executor.h"
#include "storage/vfd.h"
struct MmapEntry {
RelFileNodeForkNum relFileNodeForkNum;
/*
* the following are setting sin runtime
*/
size_t reference = 0;
PageCompressHeader *pcmap = NULL;
};
constexpr size_t LOCK_ARRAY_SIZE = 1024;
static pthread_mutex_t mmapLockArray[LOCK_ARRAY_SIZE];
static inline uint32 MmapTableHashCode(const RelFileNodeForkNum &relFileNodeForkNum)
{
return tag_hash((void *)&relFileNodeForkNum, sizeof(RelFileNodeForkNum));
}
static inline pthread_mutex_t *MmapPartitionLock(size_t hashCode)
{
return &mmapLockArray[hashCode % LOCK_ARRAY_SIZE];
}
static inline PageCompressHeader *MmapSharedMapFile(Vfd *vfdP, int chunkSize, bool readonly)
{
size_t pcMapSize = SIZE_OF_PAGE_COMPRESS_ADDR_FILE(chunkSize);
PageCompressHeader *map = pc_mmap_real_size(vfdP->fd, pcMapSize, false);
if (map == MAP_FAILED) {
ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
errmsg("Failed to mmap page compression address file %s: %m", vfdP->fileName)));
}
return map;
}
void RealInitialMMapLockArray()
{
for (size_t i = 0; i < LOCK_ARRAY_SIZE; ++i) {
pthread_mutex_init(&mmapLockArray[i], NULL);
}
HASHCTL ctl;
/* hash accessed by database file id */
errno_t rc = memset_s(&ctl, sizeof(ctl), 0, sizeof(ctl));
securec_check(rc, "", "");
ctl.keysize = sizeof(RelFileNodeForkNum);
ctl.entrysize = sizeof(MmapEntry);
ctl.hash = tag_hash;
ctl.num_partitions = LOCK_ARRAY_SIZE;
const size_t initLen = 256;
g_instance.mmapCache = HeapMemInitHash(
"mmap hash", initLen,
(Max(g_instance.attr.attr_common.max_files_per_process, t_thrd.storage_cxt.max_userdatafiles)) / 2, &ctl,
HASH_ELEM | HASH_FUNCTION | HASH_PARTITION);
}
PageCompressHeader *GetPageCompressHeader(void *vfd, int chunkSize, const RelFileNodeForkNum &relFileNodeForkNum)
{
if (IsInitdb && g_instance.mmapCache == NULL) {
RealInitialMMapLockArray();
}
Vfd *currentVfd = (Vfd *)vfd;
uint32 hashCode = MmapTableHashCode(relFileNodeForkNum);
AutoMutexLock mmapLock(MmapPartitionLock(hashCode));
mmapLock.lock();
bool find = false;
MmapEntry *mmapEntry = (MmapEntry *)hash_search_with_hash_value(g_instance.mmapCache, (void *)&relFileNodeForkNum,
hashCode, HASH_ENTER, &find);
if (!find) {
mmapEntry->pcmap = NULL;
mmapEntry->reference = 0;
}
if (mmapEntry->pcmap == NULL) {
mmapEntry->pcmap = MmapSharedMapFile(currentVfd, chunkSize, false);
}
++mmapEntry->reference;
mmapLock.unLock();
return mmapEntry->pcmap;
}
void UnReferenceAddrFile(void *vfd)
{
Vfd *currentVfd = (Vfd *)vfd;
RelFileNodeForkNum relFileNodeForkNum = currentVfd->fileNode;
uint32 hashCode = MmapTableHashCode(relFileNodeForkNum);
AutoMutexLock mmapLock(MmapPartitionLock(hashCode));
mmapLock.lock();
MmapEntry *mmapEntry = (MmapEntry *)hash_search_with_hash_value(g_instance.mmapCache, (void *)&relFileNodeForkNum,
hashCode, HASH_FIND, NULL);
if (mmapEntry == NULL) {
ereport(ERROR, (errcode_for_file_access(),
errmsg("UnReferenceAddrFile failed! mmap not found, filePath: %s", currentVfd->fileName)));
}
--mmapEntry->reference;
if (mmapEntry->reference == 0) {
if (pc_munmap(mmapEntry->pcmap) != 0) {
ereport(ERROR,
(errcode_for_file_access(), errmsg("could not munmap file \"%s\": %m", currentVfd->fileName)));
}
if (hash_search_with_hash_value(g_instance.mmapCache, (void *)&relFileNodeForkNum, hashCode, HASH_REMOVE,
NULL) == NULL) {
ereport(ERROR,
(errcode_for_file_access(),
errmsg("UnReferenceAddrFile failed! remove hash key failed, filePath: %s", currentVfd->fileName)));
}
} else if (mmapEntry->reference < 0) {
ereport(FATAL, (errcode_for_file_access(), errmsg("could not munmap file \"%s\": %m", currentVfd->fileName)));
}
mmapLock.unLock();
}

View File

@ -0,0 +1,472 @@
/*
* Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
* Copyright (c) 2020, PostgreSQL Global Development Group
*
* openGauss is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
*
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PSL v2 for more details.
* -------------------------------------------------------------------------
*
* page_compression.cpp
* Routines for page compression
*
* There are two implementations at the moment: zstd, and the Postgres
* pg_lzcompress(). zstd support requires that the server was compiled
* with --with-zstd.
* IDENTIFICATION
* ./src/gausskernel/storage/smgr/page_compression.cpp
*
* -------------------------------------------------------------------------
*/
#include "postgres.h"
#include "miscadmin.h"
#include "catalog/pg_type.h"
#include "utils/datum.h"
#include "utils/relcache.h"
#include "utils/timestamp.h"
#include "storage/checksum.h"
#include "storage/page_compression.h"
#include "storage/page_compression_impl.h"
static void CheckHeaderOfCompressAddr(PageCompressHeader* pcMap, uint16 chunk_size, uint8 algorithm, const char* path)
{
if (pcMap->chunk_size != chunk_size || pcMap->algorithm != algorithm) {
if (u_sess->attr.attr_security.zero_damaged_pages) {
ereport(WARNING,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("invalid chunk_size %u or algorithm %u in head of compress relation address file \"%s\", "
"and reinitialized it.",
pcMap->chunk_size,
pcMap->algorithm,
path)));
pcMap->algorithm = algorithm;
pg_atomic_write_u32(&pcMap->nblocks, RELSEG_SIZE);
pg_atomic_write_u32(&pcMap->allocated_chunks, 0);
pg_atomic_write_u32(&pcMap->last_synced_allocated_chunks, 0);
pcMap->chunk_size = chunk_size;
} else {
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("invalid chunk_size %u or algorithm %u in head of compress relation address file \"%s\"",
pcMap->chunk_size,
pcMap->algorithm,
path)));
}
}
}
void CheckAndRepairCompressAddress(PageCompressHeader *pcMap, uint16 chunk_size, uint8 algorithm, const char *path)
{
TimestampTz lastRecoveryTime = pcMap->last_recovery_start_time;
TimestampTz pgStartTime = t_thrd.time_cxt.pg_start_time;
error_t rc;
/* if the relation had been checked in this startup, skip */
if (lastRecoveryTime == pgStartTime) {
return;
}
/* check head of compress address file */
CheckHeaderOfCompressAddr(pcMap, chunk_size, algorithm, path);
uint32 nblocks = pg_atomic_read_u32(&pcMap->nblocks);
uint32 allocated_chunks = pg_atomic_read_u32(&pcMap->allocated_chunks);
BlockNumber *global_chunknos = (BlockNumber *)palloc0(MAX_CHUNK_NUMBER(chunk_size) * sizeof(BlockNumber));
BlockNumber max_blocknum = (BlockNumber)-1;
BlockNumber max_nonzero_blocknum = (BlockNumber)-1;
BlockNumber max_allocated_chunkno = (pc_chunk_number_t)0;
/* check compress address of every pages */
for (BlockNumber blocknum = 0; blocknum < (BlockNumber)RELSEG_SIZE; ++blocknum) {
PageCompressAddr *pcAddr = GET_PAGE_COMPRESS_ADDR(pcMap, chunk_size, blocknum);
if (pcAddr->checksum != AddrChecksum32(blocknum, pcAddr)) {
ereport(WARNING, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("invalid checkum %u of block %u in file \"%s\"",
pcAddr->checksum, blocknum, path)));
pcAddr->allocated_chunks = pcAddr->nchunks = 0;
for (int i = 0; i < BLCKSZ / chunk_size; ++i) {
pcAddr->chunknos[i] = 0;
}
pcAddr->checksum = 0;
}
/*
* skip when found first zero filled block after nblocks
* if(blocknum >= (BlockNumber)nblocks && pcAddr->allocated_chunks == 0)
* break;
*/
/* check allocated_chunks for one page */
if (pcAddr->allocated_chunks > BLCKSZ / chunk_size) {
if (u_sess->attr.attr_security.zero_damaged_pages) {
rc = memset_s((void *)pcAddr, SIZE_OF_PAGE_COMPRESS_ADDR(chunk_size), 0,
SIZE_OF_PAGE_COMPRESS_ADDR(chunk_size));
securec_check_c(rc, "\0", "\0");
ereport(WARNING, (errcode(ERRCODE_DATA_CORRUPTED),
errmsg("invalid allocated_chunks %u of block %u in file \"%s\", and zero this block",
pcAddr->allocated_chunks, blocknum, path)));
continue;
} else {
pfree(global_chunknos);
ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED),
errmsg("invalid allocated_chunks %u of block %u in file \"%s\"",
pcAddr->allocated_chunks, blocknum, path)));
}
}
/* check chunknos for one page */
for (int i = 0; i < pcAddr->allocated_chunks; ++i) {
/* check for invalid chunkno */
if (pcAddr->chunknos[i] == 0 || pcAddr->chunknos[i] > MAX_CHUNK_NUMBER(chunk_size)) {
if (u_sess->attr.attr_security.zero_damaged_pages) {
rc = memset_s((void *)pcAddr, SIZE_OF_PAGE_COMPRESS_ADDR(chunk_size), 0,
SIZE_OF_PAGE_COMPRESS_ADDR(chunk_size));
securec_check_c(rc, "\0", "\0");
ereport(WARNING, (errcode(ERRCODE_DATA_CORRUPTED),
errmsg("invalid chunk number %u of block %u in file \"%s\", and zero this block",
pcAddr->chunknos[i], blocknum, path)));
continue;
} else {
pfree(global_chunknos);
ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED),
errmsg("invalid chunk number %u of block %u in file \"%s\"", pcAddr->chunknos[i],
blocknum, path)));
}
}
/* check for duplicate chunkno */
if (global_chunknos[pcAddr->chunknos[i] - 1] != 0) {
if (u_sess->attr.attr_security.zero_damaged_pages) {
rc = memset_s((void *)pcAddr, SIZE_OF_PAGE_COMPRESS_ADDR(chunk_size), 0,
SIZE_OF_PAGE_COMPRESS_ADDR(chunk_size));
securec_check_c(rc, "\0", "\0");
ereport(
WARNING,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg(
"chunk number %u of block %u duplicate with block %u in file \"%s\", and zero this block",
pcAddr->chunknos[i], blocknum, global_chunknos[pcAddr->chunknos[i] - 1], path)));
continue;
} else {
pfree(global_chunknos);
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("chunk number %u of block %u duplicate with block %u in file \"%s\"",
pcAddr->chunknos[i], blocknum, global_chunknos[pcAddr->chunknos[i] - 1], path)));
}
}
}
/* clean chunknos beyond allocated_chunks for one page */
for (int i = pcAddr->allocated_chunks; i < BLCKSZ / chunk_size; ++i) {
if (pcAddr->chunknos[i] != 0) {
pcAddr->chunknos[i] = 0;
ereport(WARNING, (errcode(ERRCODE_DATA_CORRUPTED),
errmsg("clear chunk number %u beyond allocated_chunks %u of block %u in file \"%s\"",
pcAddr->chunknos[i], pcAddr->allocated_chunks, blocknum, path)));
}
}
/* check nchunks for one page */
if (pcAddr->nchunks > pcAddr->allocated_chunks) {
if (u_sess->attr.attr_security.zero_damaged_pages) {
rc = memset_s((void *)pcAddr, SIZE_OF_PAGE_COMPRESS_ADDR(chunk_size), 0,
SIZE_OF_PAGE_COMPRESS_ADDR(chunk_size));
securec_check_c(rc, "\0", "\0");
ereport(
WARNING,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("nchunks %u exceeds allocated_chunks %u of block %u in file \"%s\", and zero this block",
pcAddr->nchunks, pcAddr->allocated_chunks, blocknum, path)));
continue;
} else {
pfree(global_chunknos);
ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED),
errmsg("nchunks %u exceeds allocated_chunks %u of block %u in file \"%s\"",
pcAddr->nchunks, pcAddr->allocated_chunks, blocknum, path)));
}
}
max_blocknum = blocknum;
if (pcAddr->nchunks > 0) {
max_nonzero_blocknum = blocknum;
}
for (int i = 0; i < pcAddr->allocated_chunks; ++i) {
global_chunknos[pcAddr->chunknos[i] - 1] = blocknum + 1;
if (pcAddr->chunknos[i] > max_allocated_chunkno) {
max_allocated_chunkno = pcAddr->chunknos[i];
}
}
}
int unused_chunks = 0;
/* check for holes in allocated chunks */
for (BlockNumber i = 0; i < max_allocated_chunkno; i++) {
if (global_chunknos[i] == 0) {
unused_chunks++;
}
}
if (unused_chunks > 0) {
ereport(WARNING, (errcode(ERRCODE_DATA_CORRUPTED),
errmsg("there are %u chunks of total allocated chunks %u can not be use in file \"%s\"",
unused_chunks, max_allocated_chunkno, path),
errhint("You may need to run VACUMM FULL to optimize space allocation.")));
}
/* update nblocks in head of compressed file */
if (nblocks < max_nonzero_blocknum + 1) {
pg_atomic_write_u32(&pcMap->nblocks, max_nonzero_blocknum + 1);
pg_atomic_write_u32(&pcMap->last_synced_nblocks, max_nonzero_blocknum + 1);
ereport(WARNING, (errcode(ERRCODE_DATA_CORRUPTED),
errmsg("update nblocks head of compressed file \"%s\". old: %u, new: %u", path, nblocks,
max_nonzero_blocknum + 1)));
}
/* update allocated_chunks in head of compress file */
if (allocated_chunks != max_allocated_chunkno) {
pg_atomic_write_u32(&pcMap->allocated_chunks, max_allocated_chunkno);
pg_atomic_write_u32(&pcMap->last_synced_allocated_chunks, max_allocated_chunkno);
ereport(WARNING, (errcode(ERRCODE_DATA_CORRUPTED),
errmsg("update allocated_chunks in head of compressed file \"%s\". old: %u, new: %u", path,
allocated_chunks, max_allocated_chunkno)));
}
/* clean compress address after max_blocknum + 1 */
for (BlockNumber blocknum = max_blocknum + 1; blocknum < (BlockNumber)RELSEG_SIZE; blocknum++) {
char buf[128];
char *p = NULL;
PageCompressAddr *pcAddr = GET_PAGE_COMPRESS_ADDR(pcMap, chunk_size, blocknum);
/* skip zero block */
if (pcAddr->allocated_chunks == 0 && pcAddr->nchunks == 0) {
continue;
}
/* clean compress address and output content of the address */
rc = memset_s(buf, sizeof(buf), 0, sizeof(buf));
securec_check_c(rc, "\0", "\0");
p = buf;
for (int i = 0; i < pcAddr->allocated_chunks; i++) {
if (pcAddr->chunknos[i]) {
const char *formatStr = i == 0 ? "%u" : ",%u";
errno_t rc =
snprintf_s(p, sizeof(buf) - (p - buf), sizeof(buf) - (p - buf) - 1, formatStr, pcAddr->chunknos[i]);
securec_check_ss(rc, "\0", "\0");
p += strlen(p);
}
}
rc =
memset_s((void *)pcAddr, SIZE_OF_PAGE_COMPRESS_ADDR(chunk_size), 0, SIZE_OF_PAGE_COMPRESS_ADDR(chunk_size));
securec_check_c(rc, "\0", "\0");
ereport(WARNING, (errcode(ERRCODE_DATA_CORRUPTED),
errmsg("clean unused compress address of block %u in file \"%s\", old "
"allocated_chunks/nchunks/chunknos: %u/%u/{%s}",
blocknum, path, pcAddr->allocated_chunks, pcAddr->nchunks, buf)));
}
pfree(global_chunknos);
if (pc_msync(pcMap) != 0) {
ereport(ERROR, (errcode_for_file_access(), errmsg("could not msync file \"%s\": %m", path)));
}
pcMap->last_recovery_start_time = pgStartTime;
}
int64 CalculateMainForkSize(char* pathName, RelFileNode* rnode, ForkNumber forkNumber)
{
Assert(IS_COMPRESSED_RNODE((*rnode), forkNumber));
Assert(rnode->bucketNode == -1);
return CalculateCompressMainForkSize(pathName);
}
void CopyCompressedPath(char dst[MAXPGPATH], const char* pathName, CompressedFileType compressFileType)
{
int rc;
if (compressFileType == COMPRESSED_TABLE_PCA_FILE) {
rc = snprintf_s(dst, MAXPGPATH, MAXPGPATH - 1, PCA_SUFFIX, pathName);
} else {
rc = snprintf_s(dst, MAXPGPATH, MAXPGPATH - 1, PCD_SUFFIX, pathName);
}
securec_check_ss(rc, "\0", "\0");
}
int64 CalculateCompressMainForkSize(char* pathName, bool suppressedENOENT)
{
int64 totalsize = 0;
char pcFilePath[MAXPGPATH];
CopyCompressedPath(pcFilePath, pathName, COMPRESSED_TABLE_PCA_FILE);
totalsize += CalculateFileSize(pcFilePath, MAXPGPATH, suppressedENOENT);
CopyCompressedPath(pcFilePath, pathName, COMPRESSED_TABLE_PCD_FILE);
totalsize += CalculateFileSize(pcFilePath, MAXPGPATH, suppressedENOENT);
return totalsize;
}
uint16 ReadChunkSize(FILE* pcaFile, char* pcaFilePath, size_t len)
{
uint16 chunkSize;
if (fseeko(pcaFile, (off_t)offsetof(PageCompressHeader, chunk_size), SEEK_SET) != 0) {
ereport(ERROR,
(errcode_for_file_access(), errmsg("could not seek in file \"%s\": \"%lu\": %m", pcaFilePath, len)));
}
if (fread(&chunkSize, sizeof(chunkSize), 1, pcaFile) <= 0) {
ereport(ERROR,
(errcode_for_file_access(), errmsg("could not open file \"%s\": \"%lu\": %m", pcaFilePath, len)));
}
return chunkSize;
}
int64 CalculateFileSize(char* pathName, size_t size, bool suppressedENOENT)
{
struct stat structstat;
if (stat(pathName, &structstat)) {
if (errno == ENOENT) {
if (suppressedENOENT) {
return 0;
}
ereport(ERROR, (errcode_for_file_access(), errmsg("could not FIND file \"%s\": %m", pathName)));
} else {
ereport(ERROR, (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", pathName)));
}
}
return structstat.st_size;
}
uint1 ConvertChunkSize(uint32 compressedChunkSize, bool *success)
{
uint1 chunkSize = INDEX_OF_HALF_BLCKSZ;
switch (compressedChunkSize) {
case BLCKSZ / 2:
chunkSize = INDEX_OF_HALF_BLCKSZ;
break;
case BLCKSZ / 4:
chunkSize = INDEX_OF_QUARTER_BLCKSZ;
break;
case BLCKSZ / 8:
chunkSize = INDEX_OF_EIGHTH_BRICK_BLCKSZ;
break;
case BLCKSZ / 16:
chunkSize = INDEX_OF_SIXTEENTHS_BLCKSZ;
break;
default:
*success = false;
return chunkSize;
}
*success = true;
return chunkSize;
}
constexpr int MAX_RETRY_LIMIT = 60;
constexpr long RETRY_SLEEP_TIME = 1000000L;
size_t ReadAllChunkOfBlock(char *dst, size_t destLen, BlockNumber blockNumber, ReadBlockChunksStruct& rbStruct)
{
PageCompressHeader* header = rbStruct.header;
if (blockNumber >= header->nblocks) {
ereport(ERROR,
(ERRCODE_INVALID_PARAMETER_VALUE,
errmsg("blocknum \"%u\" exceeds max block number", blockNumber)));
}
char* pageBuffer = rbStruct.pageBuffer;
const char* fileName = rbStruct.fileName;
decltype(PageCompressHeader::chunk_size) chunkSize = header->chunk_size;
decltype(ReadBlockChunksStruct::segmentNo) segmentNo = rbStruct.segmentNo;
PageCompressAddr* currentAddr = GET_PAGE_COMPRESS_ADDR(header, chunkSize, blockNumber);
size_t tryCount = 0;
/* for empty chunks write */
uint8 allocatedChunks;
uint8 nchunks;
do {
allocatedChunks = currentAddr->allocated_chunks;
nchunks = currentAddr->nchunks;
for (uint8 i = 0; i < nchunks; ++i) {
off_t seekPos = (off_t)OFFSET_OF_PAGE_COMPRESS_CHUNK(chunkSize, currentAddr->chunknos[i]);
uint8 start = i;
while (i < nchunks - 1 && currentAddr->chunknos[i + 1] == currentAddr->chunknos[i] + 1) {
i++;
}
if (fseeko(rbStruct.fp, seekPos, SEEK_SET) != 0) {
ReleaseMap(header, fileName);
ereport(ERROR, (errcode_for_file_access(), errmsg("could not seek in file \"%s\": %m", fileName)));
}
size_t readAmount = chunkSize * (i - start + 1);
if (fread(dst + start * chunkSize, 1, readAmount, rbStruct.fp) != readAmount && ferror(rbStruct.fp)) {
ReleaseMap(header, fileName);
ereport(ERROR, (errcode_for_file_access(), errmsg("could not read file \"%s\": %m", fileName)));
}
}
if (nchunks == 0) {
break;
}
if (DecompressPage(dst, pageBuffer, header->algorithm) == BLCKSZ) {
PageHeader phdr = PageHeader(pageBuffer);
BlockNumber blkNo = blockNumber + segmentNo * ((BlockNumber)RELSEG_SIZE);
if (PageIsNew(phdr) || pg_checksum_page(pageBuffer, blkNo) == phdr->pd_checksum) {
break;
}
}
if (tryCount < MAX_RETRY_LIMIT) {
++tryCount;
pg_usleep(RETRY_SLEEP_TIME);
} else {
ReleaseMap(header, fileName);
ereport(ERROR,
(errcode_for_file_access(),
errmsg("base backup cheksum or Decompressed blockno %u failed in file \"%s\", aborting backup. "
"nchunks: %u, allocatedChunks: %u, segno: %d.",
blockNumber,
fileName,
nchunks,
allocatedChunks,
segmentNo)));
}
} while (true);
if (allocatedChunks > nchunks) {
auto currentWriteSize = nchunks * chunkSize;
securec_check(
memset_s(dst + currentWriteSize, destLen - currentWriteSize, 0, (allocatedChunks - nchunks) * chunkSize),
"",
"");
}
return allocatedChunks * chunkSize;
}
CompressedFileType IsCompressedFile(char *fileName, size_t fileNameLen)
{
size_t suffixLen = 4;
if (fileNameLen >= suffixLen) {
const char *suffix = fileName + fileNameLen - suffixLen;
if (strncmp(suffix, "_pca", suffixLen) == 0) {
return COMPRESSED_TABLE_PCA_FILE;
} else if (strncmp(suffix, "_pcd", suffixLen) == 0) {
return COMPRESSED_TABLE_PCD_FILE;
}
}
return COMPRESSED_TYPE_UNKNOWN;
}
void ReleaseMap(PageCompressHeader* map, const char* fileName)
{
if (map != NULL && pc_munmap(map) != 0) {
ereport(WARNING, (errcode_for_file_access(), errmsg("could not munmap file \"%s\": %m", fileName)));
}
}

View File

@ -31,7 +31,8 @@
typedef enum BufTagVer {
ORIGIN_TAG = 0,
HASHBUCKET_TAG
HASHBUCKET_TAG,
PAGE_COMPRESS_TAG
} BufTagVer;
typedef struct st_dw_batch {

View File

@ -20,8 +20,10 @@
#include "storage/buf/block.h"
#include "storage/buf/buf.h"
#include "storage/buf/bufpage.h"
#include "storage/page_compression.h"
#include "storage/smgr/relfilenode.h"
struct XLogPhyBlock;
/*
* The minimum size of the WAL construction working area. If you need to
@ -47,7 +49,7 @@ struct XLogPhyBlock;
* is taken */
/* prototypes for public functions in xloginsert.c: */
extern void XLogBeginInsert(void);
extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, bool isupgrade = false, int bucket_id = InvalidBktId,
extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, bool isupgrade = false, int bucket_id = InvalidBktId,
bool isSwitchoverBarrier = false);
extern void XLogEnsureRecordSpace(int nbuffers, int ndatas);
extern void XLogRegisterData(char* data, int len);

View File

@ -59,6 +59,7 @@ typedef void (*relasexlogreadstate)(void* record);
#define XLogBlockHeadGetForkNum(blockhead) ((blockhead)->forknum)
#define XLogBlockHeadGetBlockNum(blockhead) ((blockhead)->blkno)
#define XLogBlockHeadGetBucketId(blockhead) ((blockhead)->bucketNode)
#define XLogBlockHeadGetCompressOpt(blockhead) ((blockhead)->opt)
#define XLogBlockHeadGetValidInfo(blockhead) ((blockhead)->block_valid)
#define XLogBlockHeadGetPhysicalBlock(blockhead) ((blockhead)->pblk)
/* for common blockhead end */
@ -492,7 +493,8 @@ typedef struct {
TransactionId xl_xid; /* xact id */
Oid spcNode; /* tablespace */
Oid dbNode; /* database */
int4 bucketNode; /* bucket */
int2 bucketNode; /* bucket */
uint2 opt;
XLogPhyBlock pblk;
} XLogBlockHead;

View File

@ -37,7 +37,8 @@
*/
#define XLR_SPECIAL_REL_UPDATE 0x01
#define XLR_BTREE_UPGRADE_FLAG 0x02
/* If xlog record is the compress table creation */
#define XLR_REL_COMPRESS 0X04
#define XLR_IS_TOAST 0X08
/* If xlog record is from toast page */
@ -84,7 +85,7 @@ typedef struct XLogRecordBlockHeader {
#define BKID_HAS_TDE_PAGE (0x40)
#define BKID_GET_BKID(id) (id & 0x3F)
/*
/*
* In segment-page storage, RelFileNode and block number are logic for XLog. Thus, we need record
* physical location in xlog. This macro is used to check whether in such situation.
*/

View File

@ -79,12 +79,14 @@ extern Relation heap_create(const char *relname,
bool mapped_relation,
bool allow_system_table_mods,
int8 row_compress,
Datum reloptions,
Oid ownerid,
bool skip_create_storage,
TableAmType tam_type,
int8 relindexsplit = 0,
StorageType storage_type = HEAP_DISK,
bool newcbi = false);
bool newcbi = false,
Oid accessMethodObjectId = 0);
extern bool heap_is_matview_init_state(Relation rel);
@ -97,7 +99,9 @@ heapCreatePartition(const char* part_name,
Oid bucketOid,
Oid ownerid,
StorageType storage_type,
bool newcbi = false);
bool newcbi = false,
Datum reloptions = Datum(0));
extern Oid heap_create_with_catalog(const char *relname,
Oid relnamespace,
@ -119,7 +123,7 @@ extern Oid heap_create_with_catalog(const char *relname,
bool use_user_acl,
bool allow_system_table_mods,
PartitionState *partTableState,
int8 row_compress,
int8 row_compress,
HashBucketInfo *bucketinfo,
bool record_dependce = true,
List* ceLst = NULL,
@ -192,7 +196,7 @@ extern void CheckAttributeType(const char *attname, Oid atttypid, Oid attcollati
#ifdef PGXC
/* Functions related to distribution data of relations */
extern void AddRelationDistribution(const char *relname, Oid relid, DistributeBy *distributeby,
PGXCSubCluster *subcluster, List *parentOids, TupleDesc descriptor, bool isinstallationgroup,
PGXCSubCluster *subcluster, List *parentOids, TupleDesc descriptor, bool isinstallationgroup,
bool isbucket = false, int bucketmaplen = 0);
extern void GetRelationDistributionItems(Oid relid, DistributeBy *distributeby, TupleDesc descriptor, char *locatortype,
int *hashalgorithm, int *hashbuckets, AttrNumber *attnum);

View File

@ -38,11 +38,23 @@ typedef struct xl_smgr_create {
ForkNumber forkNum;
} xl_smgr_create;
typedef struct xl_smgr_create_compress {
xl_smgr_create xlrec;
uint2 pageCompressOpts;
} xl_smgr_create_compress;
typedef struct xl_smgr_truncate {
BlockNumber blkno;
RelFileNodeOld rnode;
} xl_smgr_truncate;
typedef struct xl_smgr_truncate_compress {
xl_smgr_truncate xlrec;
uint2 pageCompressOpts;
} xl_smgr_truncate_compress;
extern void log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum);
extern void smgr_redo(XLogReaderState *record);

View File

@ -0,0 +1,2 @@
DROP FUNCTION IF EXISTS pg_catalog.pg_read_binary_file_blocks(IN inputpath text, IN startblocknum int8, IN count int8) CASCADE;
DROP FUNCTION IF EXISTS pg_catalog.gs_read_block_from_remote(int4, int4, int4, int2, int2, int4, xid, int4, xid, boolean) CASCADE;

View File

@ -0,0 +1,2 @@
DROP FUNCTION IF EXISTS pg_catalog.pg_read_binary_file_blocks(IN inputpath text, IN startblocknum int8, IN count int8) CASCADE;
DROP FUNCTION IF EXISTS pg_catalog.gs_read_block_from_remote(int4, int4, int4, int2, int2, int4, xid, int4, xid, boolean) CASCADE;

View File

@ -0,0 +1,22 @@
SET LOCAL inplace_upgrade_next_system_object_oids = IUO_PROC, 4768;
CREATE OR REPLACE FUNCTION pg_catalog.gs_read_block_from_remote
( int4,
int4,
int4,
int2,
int2,
int4,
xid,
int4,
xid,
boolean)
RETURNS SETOF record LANGUAGE INTERNAL ROWS 1 STRICT as 'gs_read_block_from_remote_compress';
-- pg_read_binary_file_blocks()
--
SET LOCAL inplace_upgrade_next_system_object_oids = IUO_PROC, 8413;
CREATE FUNCTION pg_catalog.pg_read_binary_file_blocks(IN inputpath text, IN startblocknum int8, IN count int8,
OUT path text,
OUT blocknum int4,
OUT len int4,
OUT data bytea)
AS 'pg_read_binary_file_blocks' LANGUAGE INTERNAL IMMUTABLE STRICT;

View File

@ -0,0 +1,22 @@
SET LOCAL inplace_upgrade_next_system_object_oids = IUO_PROC, 4768;
CREATE OR REPLACE FUNCTION pg_catalog.gs_read_block_from_remote
( int4,
int4,
int4,
int2,
int2,
int4,
xid,
int4,
xid,
boolean)
RETURNS SETOF record LANGUAGE INTERNAL ROWS 1 STRICT as 'gs_read_block_from_remote_compress';
-- pg_read_binary_file_blocks()
--
SET LOCAL inplace_upgrade_next_system_object_oids = IUO_PROC, 8413;
CREATE FUNCTION pg_catalog.pg_read_binary_file_blocks(IN inputpath text, IN startblocknum int8, IN count int8,
OUT path text,
OUT blocknum int4,
OUT len int4,
OUT data bytea)
AS 'pg_read_binary_file_blocks' LANGUAGE INTERNAL IMMUTABLE STRICT;

View File

@ -979,6 +979,7 @@ typedef struct knl_instance_context {
knl_g_archive_obs_context archive_obs_cxt;
knl_g_archive_obs_thread_info archive_obs_thread_info;
struct HTAB* ngroup_hash_table;
struct HTAB* mmapCache;
knl_g_hypo_context hypo_cxt;
knl_g_segment_context segment_cxt;

View File

@ -1313,6 +1313,8 @@ typedef enum WaitEventIO {
WAIT_EVENT_OBS_READ,
WAIT_EVENT_OBS_WRITE,
WAIT_EVENT_LOGCTRL_SLEEP,
WAIT_EVENT_COMPRESS_ADDRESS_FILE_FLUSH,
WAIT_EVENT_COMPRESS_ADDRESS_FILE_SYNC,
IO_EVENT_NUM = WAIT_EVENT_LOGCTRL_SLEEP - WAIT_EVENT_BUFFILE_READ + 1 // MUST be last, DO NOT use this value.
} WaitEventIO;

View File

@ -33,7 +33,7 @@
extern int RemoteGetCU(char* remote_address, uint32 spcnode, uint32 dbnode, uint32 relnode, int32 colid,
uint64 offset, int32 size, uint64 lsn, char* cu_data);
extern int RemoteGetPage(char* remote_address, uint32 spcnode, uint32 dbnode, uint32 relnode, int4 bucketnode,
extern int RemoteGetPage(char* remote_address, uint32 spcnode, uint32 dbnode, uint32 relnode, int2 bucketnode, uint2 opt,
int32 forknum, uint32 blocknum, uint32 blocksize, uint64 lsn, char* page_data);
#endif /* REMOTE_READ_CLIENT_H */

View File

@ -96,6 +96,13 @@ typedef struct buftag {
BlockNumber blockNum; /* blknum relative to begin of reln */
} BufferTag;
typedef struct buftagnocompress {
RelFileNodeV2 rnode;
ForkNumber forkNum;
BlockNumber blockNum; /* blknum relative to begin of reln */
} BufferTagSecondVer;
typedef struct buftagnohbkt {
RelFileNodeOld rnode; /* physical relation identifier */
ForkNumber forkNum;

View File

@ -180,6 +180,8 @@ typedef HeapPageHeaderData* HeapPageHeader;
#define GetPageHeaderSize(page) (PageIs8BXidHeapVersion(page) ? SizeOfHeapPageHeaderData : SizeOfPageHeaderData)
#define SizeOfHeapPageUpgradeData MAXALIGN(offsetof(HeapPageHeaderData, pd_linp) - offsetof(PageHeaderData, pd_linp))
#define GET_ITEMID_BY_IDX(buf, i) ((ItemIdData *)(buf + GetPageHeaderSize(buf) + (i) * sizeof(ItemIdData)))
#define PageXLogRecPtrGet(val) \
((uint64) (val).xlogid << 32 | (val).xrecoff)
@ -406,6 +408,7 @@ inline OffsetNumber PageGetMaxOffsetNumber(char* pghr)
#define PageSetLSNInternal(page, lsn) \
(((PageHeader)(page))->pd_lsn.xlogid = (uint32)((lsn) >> 32), ((PageHeader)(page))->pd_lsn.xrecoff = (uint32)(lsn))
#ifndef FRONTEND
inline void PageSetLSN(Page page, XLogRecPtr LSN, bool check = true)
{
if (check && XLByteLT(LSN, PageGetLSN(page))) {
@ -413,6 +416,7 @@ inline void PageSetLSN(Page page, XLogRecPtr LSN, bool check = true)
}
PageSetLSNInternal(page, LSN);
}
#endif
#define PageHasFreeLinePointers(page) (((PageHeader)(page))->pd_flags & PD_HAS_FREE_LINES)
#define PageSetHasFreeLinePointers(page) (((PageHeader)(page))->pd_flags |= PD_HAS_FREE_LINES)

View File

@ -0,0 +1,336 @@
/*
* page_compression.h
* internal declarations for page compression
*
* Copyright (c) 2020, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/include/storage/page_compression.h
*/
#ifndef PAGE_COMPRESSION_H
#define PAGE_COMPRESSION_H
#include <sys/mman.h>
#include "storage/buf/bufpage.h"
#include "datatype/timestamp.h"
#include "catalog/pg_class.h"
#include "catalog/pg_am.h"
#include "utils/atomic.h"
/* The page compression feature relies on native atomic operation support.
* On platforms that do not support native atomic operations, the members
* of pg_atomic_uint32 contain semaphore objects, which will affect the
* persistence of compressed page address files.
*/
#define SUPPORT_PAGE_COMPRESSION (sizeof(pg_atomic_uint32) == sizeof(uint32))
/* In order to avoid the inconsistency of address metadata data when the server
* is down, it is necessary to prevent the address metadata of one data block
* from crossing two storage device blocks. The block size of ordinary storage
* devices is a multiple of 512, so 512 is used as the block size of the
* compressed address file.
*/
#define COMPRESS_ADDR_BLCKSZ 512
/* COMPRESS_ALGORITHM_XXX must be the same as COMPRESS_TYPE_XXX */
#define COMPRESS_ALGORITHM_PGLZ 1
#define COMPRESS_ALGORITHM_ZSTD 2
constexpr uint32 COMPRESS_ADDRESS_FLUSH_CHUNKS = 5000;
#define SUPPORT_COMPRESSED(relKind, relam) \
((relKind) == RELKIND_RELATION || ((relKind) == RELKIND_INDEX && (relam) == BTREE_AM_OID))
#define REL_SUPPORT_COMPRESSED(relation) \
((relation->rd_rel->relkind) == RELKIND_RELATION || \
((relation->rd_rel->relkind) == RELKIND_INDEX && (relation->rd_rel->relam) == BTREE_AM_OID))
typedef uint32 pc_chunk_number_t;
const uint32 PAGE_COMPRESSION_VERSION = 92424;
enum CompressedFileType {
COMPRESSED_TYPE_UNKNOWN,
COMPRESSED_TABLE_FILE,
COMPRESSED_TABLE_PCA_FILE,
COMPRESSED_TABLE_PCD_FILE
};
/*
* layout of files for Page Compress:
*
* 1. page compression address file(_pca)
* - PageCompressHeader
* - PageCompressAddr[]
*
* 2. page compression data file(_pcd)
* - PageCompressData[]
*
*/
typedef struct PageCompressHeader {
pg_atomic_uint32 nblocks; /* number of total blocks in this segment */
pg_atomic_uint32 allocated_chunks; /* number of total allocated chunks in data area */
uint16 chunk_size; /* size of each chunk, must be 1/2 1/4 or 1/8 of BLCKSZ */
uint8 algorithm; /* compress algorithm, 1=pglz, 2=lz4 */
pg_atomic_uint32 last_synced_nblocks; /* last synced nblocks */
pg_atomic_uint32 last_synced_allocated_chunks; /* last synced allocated_chunks */
pg_atomic_uint32 sync;
TimestampTz last_recovery_start_time; /* postmaster start time of last recovery */
} PageCompressHeader;
typedef struct PageCompressAddr {
uint32 checksum;
volatile uint8 nchunks; /* number of chunks for this block */
volatile uint8 allocated_chunks; /* number of allocated chunks for this block */
/* variable-length fields, 1 based chunk no array for this block, size of the array must be 2, 4 or 8 */
pc_chunk_number_t chunknos[FLEXIBLE_ARRAY_MEMBER];
} PageCompressAddr;
struct ReadBlockChunksStruct {
PageCompressHeader* header; // header: pca file
char* pageBuffer; // pageBuffer: decompressed page
size_t pageBufferLen;
FILE* fp; // fp: table fp
int segmentNo;
char* fileName; // fileName: for error report
};
typedef struct PageCompressData {
char page_header[SizeOfPageHeaderData]; /* page header */
uint32 size : 16; /* size of compressed data */
uint32 byte_convert : 1;
uint32 diff_convert : 1;
uint32 unused : 14;
char data[FLEXIBLE_ARRAY_MEMBER]; /* compressed page, except for the page header */
} PageCompressData;
typedef struct HeapPageCompressData {
char page_header[SizeOfHeapPageHeaderData]; /* page header */
uint32 size : 16; /* size of compressed data */
uint32 byte_convert : 1;
uint32 diff_convert : 1;
uint32 unused : 14;
char data[FLEXIBLE_ARRAY_MEMBER]; /* compressed page, except for the page header */
} HeapPageCompressData;
const uint4 CHUNK_SIZE_LIST[4] = {BLCKSZ / 2, BLCKSZ / 4, BLCKSZ / 8, BLCKSZ / 16};
constexpr uint4 INDEX_OF_HALF_BLCKSZ = 0;
constexpr uint4 INDEX_OF_QUARTER_BLCKSZ = 1;
constexpr uint4 INDEX_OF_EIGHTH_BRICK_BLCKSZ = 2;
constexpr uint4 INDEX_OF_SIXTEENTHS_BLCKSZ = 3;
#define MAX_PREALLOC_CHUNKS 7
#define PCA_SUFFIX "%s_pca"
#define PCD_SUFFIX "%s_pcd"
#define SIZE_OF_PAGE_COMPRESS_HEADER_DATA sizeof(PageCompressHeader)
#define SIZE_OF_PAGE_COMPRESS_ADDR_HEADER_DATA offsetof(PageCompressAddr, chunknos)
#define SIZE_OF_PAGE_COMPRESS_DATA_HEADER_DATA(heapData) \
((heapData) ? offsetof(HeapPageCompressData, data) : offsetof(PageCompressData, data))
#define SIZE_OF_PAGE_COMPRESS_ADDR(chunk_size) \
(SIZE_OF_PAGE_COMPRESS_ADDR_HEADER_DATA + sizeof(pc_chunk_number_t) * (BLCKSZ / (chunk_size)))
#define NUMBER_PAGE_COMPRESS_ADDR_PER_BLOCK(chunk_size) (COMPRESS_ADDR_BLCKSZ / SIZE_OF_PAGE_COMPRESS_ADDR(chunk_size))
#define OFFSET_OF_PAGE_COMPRESS_ADDR(chunk_size, blockno) \
(COMPRESS_ADDR_BLCKSZ * (1 + (blockno) / NUMBER_PAGE_COMPRESS_ADDR_PER_BLOCK(chunk_size)) + \
SIZE_OF_PAGE_COMPRESS_ADDR(chunk_size) * ((blockno) % NUMBER_PAGE_COMPRESS_ADDR_PER_BLOCK(chunk_size)))
#define GET_PAGE_COMPRESS_ADDR(pcbuffer, chunk_size, blockno) \
(PageCompressAddr*)((char*)(pcbuffer) + OFFSET_OF_PAGE_COMPRESS_ADDR((chunk_size), (blockno) % RELSEG_SIZE))
#define SIZE_OF_PAGE_COMPRESS_ADDR_FILE(chunk_size) OFFSET_OF_PAGE_COMPRESS_ADDR((chunk_size), RELSEG_SIZE)
#define OFFSET_OF_PAGE_COMPRESS_CHUNK(chunk_size, chunkno) ((chunk_size) * ((chunkno)-1))
/* Abnormal scenarios may cause holes in the space allocation of data files,
* causing data file expansion. Usually the holes are not too big, so the definition
* allows a maximum of 10,000 chunks for holes. If allocated_chunks exceeds this value,
* VACUUM FULL needs to be executed to reclaim space.
*/
#define MAX_CHUNK_NUMBER(chunk_size) ((uint32)(RELSEG_SIZE * (BLCKSZ / (chunk_size)) + 10000))
constexpr unsigned CMP_BYTE_CONVERT_LEN = 1;
constexpr unsigned CMP_DIFF_CONVERT_LEN = 1;
constexpr unsigned CMP_PRE_CHUNK_LEN = 3;
constexpr unsigned CMP_LEVEL_SYMBOL_LEN = 1;
constexpr unsigned CMP_LEVEL_LEN = 5;
constexpr unsigned CMP_ALGORITHM_LEN = 3;
constexpr unsigned CMP_CHUNK_SIZE_LEN = 2;
constexpr unsigned CMP_BYTE_CONVERT_INDEX = 0;
constexpr unsigned CMP_DIFF_CONVERT_INDEX = 1;
constexpr unsigned CMP_PRE_CHUNK_INDEX = 2;
constexpr unsigned CMP_COMPRESS_LEVEL_SYMBOL = 3;
constexpr unsigned CMP_LEVEL_INDEX = 4;
constexpr unsigned CMP_ALGORITHM_INDEX = 5;
constexpr unsigned CMP_CHUNK_SIZE_INDEX = 6;
struct CmpBitStuct {
unsigned int bitLen;
unsigned int mask;
unsigned int moveBit;
};
constexpr CmpBitStuct g_cmpBitStruct[] = {{CMP_BYTE_CONVERT_LEN, 0x01, 15},
{CMP_DIFF_CONVERT_LEN, 0x01, 14},
{CMP_PRE_CHUNK_LEN, 0x07, 11},
{CMP_LEVEL_SYMBOL_LEN, 0x01, 10},
{CMP_LEVEL_LEN, 0x1F, 5},
{CMP_ALGORITHM_LEN, 0x07, 2},
{CMP_CHUNK_SIZE_LEN, 0x03, 0}};
/* RelFileCompressOption: Row-oriented table compress option */
struct RelFileCompressOption {
unsigned byteConvert : g_cmpBitStruct[CMP_BYTE_CONVERT_INDEX].bitLen, /* need byte convert? */
diffConvert : g_cmpBitStruct[CMP_DIFF_CONVERT_INDEX].bitLen, /* need diff convert processed? */
compressPreallocChunks : g_cmpBitStruct[CMP_PRE_CHUNK_INDEX]
.bitLen, /* prealloced chunks to store compressed data */
compressLevelSymbol : g_cmpBitStruct[CMP_COMPRESS_LEVEL_SYMBOL]
.bitLen, /* compress level symbol, true for positive and false for negative */
compressLevel : g_cmpBitStruct[CMP_LEVEL_INDEX].bitLen, /* compress level */
compressAlgorithm : g_cmpBitStruct[CMP_ALGORITHM_INDEX].bitLen, /* compress algorithm */
compressChunkSize : g_cmpBitStruct[CMP_CHUNK_SIZE_INDEX].bitLen; /* chunk size of compressed data */
};
inline void TransCompressOptions(const RelFileNode& node, RelFileCompressOption* opt)
{
unsigned short compressOption = node.opt;
opt->compressChunkSize = compressOption & g_cmpBitStruct[CMP_CHUNK_SIZE_INDEX].mask;
compressOption = compressOption >> g_cmpBitStruct[CMP_CHUNK_SIZE_INDEX].bitLen;
opt->compressAlgorithm = compressOption & g_cmpBitStruct[CMP_ALGORITHM_INDEX].mask;
compressOption = compressOption >> g_cmpBitStruct[CMP_ALGORITHM_INDEX].bitLen;
opt->compressLevel = compressOption & g_cmpBitStruct[CMP_LEVEL_INDEX].mask;
compressOption = compressOption >> g_cmpBitStruct[CMP_LEVEL_INDEX].bitLen;
opt->compressLevelSymbol = compressOption & g_cmpBitStruct[CMP_COMPRESS_LEVEL_SYMBOL].mask;
compressOption = compressOption >> g_cmpBitStruct[CMP_COMPRESS_LEVEL_SYMBOL].bitLen;
opt->compressPreallocChunks = compressOption & g_cmpBitStruct[CMP_PRE_CHUNK_INDEX].mask;
compressOption = compressOption >> g_cmpBitStruct[CMP_PRE_CHUNK_INDEX].bitLen;
opt->diffConvert = compressOption & g_cmpBitStruct[CMP_DIFF_CONVERT_INDEX].mask;
compressOption = compressOption >> g_cmpBitStruct[CMP_DIFF_CONVERT_INDEX].bitLen;
opt->byteConvert = compressOption & g_cmpBitStruct[CMP_BYTE_CONVERT_INDEX].mask;
compressOption = compressOption >> g_cmpBitStruct[CMP_BYTE_CONVERT_INDEX].bitLen;
}
#define SET_COMPRESS_OPTION(node, byteConvert, diffConvert, preChunks, symbol, level, algorithm, chunkSize) \
do { \
(node).opt = (node).opt << g_cmpBitStruct[CMP_BYTE_CONVERT_INDEX].bitLen; \
(node).opt += (byteConvert)&g_cmpBitStruct[CMP_BYTE_CONVERT_INDEX].mask; \
(node).opt = (node).opt << g_cmpBitStruct[CMP_DIFF_CONVERT_INDEX].bitLen; \
(node).opt += (diffConvert)&g_cmpBitStruct[CMP_DIFF_CONVERT_INDEX].mask; \
(node).opt = (node).opt << g_cmpBitStruct[CMP_PRE_CHUNK_INDEX].bitLen; \
(node).opt += (preChunks)&g_cmpBitStruct[CMP_PRE_CHUNK_INDEX].mask; \
(node).opt = (node).opt << g_cmpBitStruct[CMP_COMPRESS_LEVEL_SYMBOL].bitLen; \
(node).opt += (symbol)&g_cmpBitStruct[CMP_COMPRESS_LEVEL_SYMBOL].mask; \
(node).opt = (node).opt << g_cmpBitStruct[CMP_LEVEL_INDEX].bitLen; \
(node).opt += (level)&g_cmpBitStruct[CMP_LEVEL_INDEX].mask; \
(node).opt = (node).opt << g_cmpBitStruct[CMP_ALGORITHM_INDEX].bitLen; \
(node).opt += (algorithm)&g_cmpBitStruct[CMP_ALGORITHM_INDEX].mask; \
(node).opt = (node).opt << g_cmpBitStruct[CMP_CHUNK_SIZE_INDEX].bitLen; \
(node).opt += (chunkSize)&g_cmpBitStruct[CMP_CHUNK_SIZE_INDEX].mask; \
} while (0)
#define GET_ROW_COL_CONVERT(opt) \
(((opt) >> g_cmpBitStruct[CMP_BYTE_CONVERT_INDEX].moveBit) & g_cmpBitStruct[CMP_BYTE_CONVERT_INDEX].mask)
#define GET_DIFF_CONVERT(opt) \
(((opt) >> g_cmpBitStruct[CMP_DIFF_CONVERT_INDEX].moveBit) & g_cmpBitStruct[CMP_DIFF_CONVERT_INDEX].mask)
#define GET_COMPRESS_PRE_CHUNKS(opt) \
(((opt) >> g_cmpBitStruct[CMP_PRE_CHUNK_INDEX].moveBit) & g_cmpBitStruct[CMP_PRE_CHUNK_INDEX].mask)
#define GET_COMPRESS_LEVEL_SYMBOL(opt) \
(((opt) >> g_cmpBitStruct[CMP_COMPRESS_LEVEL_SYMBOL].moveBit) & g_cmpBitStruct[CMP_COMPRESS_LEVEL_SYMBOL].mask)
#define GET_COMPRESS_LEVEL(opt) \
(((opt) >> g_cmpBitStruct[CMP_LEVEL_INDEX].moveBit) & g_cmpBitStruct[CMP_LEVEL_INDEX].mask)
#define GET_COMPRESS_ALGORITHM(opt) \
(((opt) >> g_cmpBitStruct[CMP_ALGORITHM_INDEX].moveBit) & g_cmpBitStruct[CMP_ALGORITHM_INDEX].mask)
#define GET_COMPRESS_CHUNK_SIZE(opt) \
(((opt) >> g_cmpBitStruct[CMP_CHUNK_SIZE_INDEX].moveBit) & g_cmpBitStruct[CMP_CHUNK_SIZE_INDEX].mask)
#define IS_COMPRESSED_MAINFORK(reln, forkNum) ((reln)->smgr_rnode.node.opt != 0 && (forkNum) == MAIN_FORKNUM)
#define IS_COMPRESSED_RNODE(rnode, forkNum) ((rnode).opt != 0 && (forkNum) == MAIN_FORKNUM)
/* Compress function */
template <bool heapPageData>
extern int TemplateCompressPage(const char* src, char* dst, int dst_size, RelFileCompressOption option);
template <bool heapPageData>
extern int TemplateDecompressPage(const char* src, char* dst, uint8 algorithm);
int CompressPageBufferBound(const char* page, uint8 algorithm);
int CompressPage(const char* src, char* dst, int dst_size, RelFileCompressOption option);
int DecompressPage(const char* src, char* dst, uint8 algorithm);
/* Memory mapping function */
extern PageCompressHeader* pc_mmap(int fd, int chunk_size, bool readonly);
extern PageCompressHeader* pc_mmap_real_size(int fd, int size, bool readonly);
extern int pc_munmap(PageCompressHeader * map);
extern int pc_msync(PageCompressHeader * map);
/**
* format mainfork path name to compressed path
* @param dst destination buffer
* @param pathName uncompressed table name
* @param compressFileType pca or pcd
*/
extern void CopyCompressedPath(char dst[MAXPGPATH], const char* pathName, CompressedFileType compressFileType);
/**
* @param pathName mainFork File path name
* @param relFileNode physically access, for validation
* @param forkNumber for validation
* @return size of mainFork
*/
extern int64 CalculateMainForkSize(char* pathName, RelFileNode* relFileNode, ForkNumber forkNumber);
extern int64 CalculateCompressMainForkSize(char* pathName, bool suppressedENOENT = false);
extern uint16 ReadChunkSize(FILE *pcaFile, char* pcaFilePath, size_t len);
/**
* read compressed chunks into dst, and decompressed page into pageBuffer
* @param dst destination
* @param destLen destination length
* @param blockNumber blockNumber
* @param ReadBlockChunksStruct other data needed
*/
size_t ReadAllChunkOfBlock(char *dst, size_t destLen, BlockNumber blockNumber, ReadBlockChunksStruct& rbStruct);
/**
* check if fileName is end with pca or pcd
* @param fileName fileName
* @return filetype
*/
CompressedFileType IsCompressedFile(char *fileName, size_t fileNameLen);
int64 CalculateFileSize(char* pathName, size_t size, bool suppressedENOENT = false);
/**
* release mmap. print warning log if failed
* @param map mmap pointer
* @param fileName mmap filename, for loggging
*/
void ReleaseMap(PageCompressHeader* map, const char* fileName);
/**
* convert chunk size to the index of CHUNK_SIZE_LIST
* @param compressedChunkSize {BLCKSZ / 2, BLCKSZ / 4, BLCKSZ / 8, BLCKSZ / 16}
* @param success success or not
* @return index of CHUNK_SIZE_LIST
*/
extern uint1 ConvertChunkSize(uint32 compressedChunkSize, bool* success);
/**
*
* @param blockNumber block number
* @param pageCompressAddr addr of block
* @return checksum uint32
*/
extern uint32 AddrChecksum32(BlockNumber blockNumber, const PageCompressAddr* pageCompressAddr);
#ifndef FRONTEND
extern void CheckAndRepairCompressAddress(PageCompressHeader *pcMap, uint16 chunk_size, uint8 algorithm, const char *path);
PageCompressHeader* GetPageCompressHeader(void* vfd, int chunkSize, const RelFileNodeForkNum &relFileNodeForkNum);
void UnReferenceAddrFile(void* vfd);
void RealInitialMMapLockArray();
#endif
#endif /* PAGE_COMPRESSION_H */

View File

@ -0,0 +1,715 @@
/*
* page_compression.h
* internal declarations for page compression
*
* Copyright (c) 2020, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/include/storage/page_compression_impl.h
*/
#ifndef RC_INCLUDE_STORAGE_PAGE_COMPRESSION_IMPL_H
#define RC_INCLUDE_STORAGE_PAGE_COMPRESSION_IMPL_H
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <linux/falloc.h>
#include <sys/stat.h>
#include <assert.h>
#include <sys/mman.h>
#include "storage/page_compression.h"
#include "utils/pg_lzcompress.h"
#include <zstd.h>
#define DEFAULT_ZSTD_COMPRESSION_LEVEL (1)
#define MIN_ZSTD_COMPRESSION_LEVEL ZSTD_minCLevel()
#define MAX_ZSTD_COMPRESSION_LEVEL ZSTD_maxCLevel()
#define COMPRESS_DEFAULT_ERROR (-1)
#define COMPRESS_UNSUPPORTED_ERROR (-2)
#define GS_INVALID_ID16 (uint16)0xFFFF
#define MIN_DIFF_SIZE (64)
#define MIN_CONVERT_CNT (4)
#ifndef USE_ASSERT_CHECKING
#define ASSERT(condition)
#else
#define ASSERT(condition) assert(condition)
#endif
#ifndef FRONTEND
/**
* return data of page
* @param dst HeapPageCompressData or HeapPageCompressData
* @param heapPageData heapPageData or pagedata
* @return dst->data
*/
static inline char* GetPageCompressedData(char* dst, bool heapPageData)
{
return heapPageData ? ((HeapPageCompressData*)dst)->data : ((PageCompressData*)dst)->data;
}
static inline void FreePointer(void* pointer)
{
if (pointer != NULL) {
pfree(pointer);
}
}
/*======================================================================================*/
#define COMPRESS ""
void cprs_diff_convert_rows(char *buf, uint32 offset,uint16 min_row_len, uint16 real_row_cnt) {
uint16 row_cnt = real_row_cnt;
uint32 common_size = min_row_len;
uint8 *copy_begin = (uint8 *)(buf + offset);
uint16 i, j;
for (i = 0; i < common_size; i++) {
for (j = row_cnt - 1; j > 0; j--) {
copy_begin[i * row_cnt + j] -= copy_begin[i * row_cnt + (j - 1)];
}
}
return ;
}
void cprs_diff_deconvert_rows(char *buf, uint32 offset, uint16 min_row_len, uint16 real_row_cnt) {
uint16 row_cnt = real_row_cnt;
uint32 common_size = min_row_len;
uint8 *copy_begin = (uint8 *)(buf + offset);
uint16 i, j;
for (i = 0; i < common_size; i++) {
for (j = 1; j < row_cnt; j++) {
copy_begin[i * row_cnt + j] += copy_begin[i * row_cnt + (j - 1)];
}
}
return ;
}
void CompressConvertItemIds(char *buf, char *aux_buf) {
errno_t ret;
HeapPageHeaderData *page = (HeapPageHeaderData *)buf;
uint16 row_cnt = (page->pd_lower - GetPageHeaderSize(page)) / sizeof(ItemIdData);
uint32 total_size = row_cnt * sizeof(ItemIdData);
char *copy_begin = buf + GetPageHeaderSize(page);
uint16 i, j, k;
// clear aux_buf
ret = memset_sp(aux_buf, BLCKSZ, 0, BLCKSZ);
securec_check(ret, "", "");
k = 0;
for (i = 0; i < row_cnt; i++) {
for (j = 0; j < sizeof(ItemIdData); j++) {
aux_buf[j * row_cnt + i] = copy_begin[k++];
}
}
// cp aux_buf to page_buf
ret = memcpy_sp(copy_begin, total_size, aux_buf, total_size);
securec_check(ret, "", "");
return ;
}
void CompressConvertRows(char *buf, char *aux_buf, int16 *real_order, uint16 max_row_len, uint16 real_row_cnt) {
errno_t ret;
HeapPageHeaderData *page = (HeapPageHeaderData *)buf;
uint16 row_cnt = real_row_cnt;
uint32 total_size = page->pd_special - page->pd_upper;
char *copy_begin = buf + page->pd_upper;
char *row;
uint16 i, j, k, cur, up, row_size;
ret = memset_sp(aux_buf, BLCKSZ, 0, BLCKSZ);
securec_check(ret, "", "");
k = 0;
for (i = 0; i < max_row_len; i++) {
for (j = 0; j < row_cnt; j++) {
up = (j == (row_cnt - 1)) ? page->pd_special : GET_ITEMID_BY_IDX(buf, (real_order[j + 1]))->lp_off;
cur = GET_ITEMID_BY_IDX(buf, (real_order[j]))->lp_off;
row_size = up - cur;
row = buf + cur;
if (i < row_size) {
aux_buf[k++] = row[i]; // this part is reshaped
}
}
}
if (k != total_size) {
printf("ERROR!!! convert_rows_2 error...!!!\n");
ASSERT(0);
return;
}
// cp aux_buf to page_buf
ret = memcpy_sp(copy_begin, total_size, aux_buf, total_size);
securec_check(ret, "", "");
return ;
}
// 1: as tuple_offset order, that means asc order.
// 2: store all itemid's idx.
// 3:maybe some itemid is not in order.
void CompressConvertItemRealOrder(char *buf, int16 *real_order, uint16 real_row_cnt) {
HeapPageHeaderData *page = (HeapPageHeaderData *)buf;
uint16 row_cnt = (page->pd_lower - GetPageHeaderSize(page)) / sizeof(ItemIdData);
ItemIdData *begin = (ItemIdData *)(buf + GetPageHeaderSize(page));
int16 *link_order = real_order + real_row_cnt;
int16 i, head, curr, prev;
int16 end = -1; // invalid index
head = end;
// very likely to seems that itemids stored by desc order, and ignore invalid itemid
for (i = 0; i < row_cnt; i++) {
if (!ItemIdIsNormal(begin + i)) {
continue;
}
if (head == end) { // set the head idx, insert the first
link_order[i] = end;
head = i;
continue;
}
if ((begin + i)->lp_off < (begin + head)->lp_off) {
link_order[i] = head; // update the head idx
head = i;
continue;
}
prev = head;
curr = link_order[head];
while ((curr != end) && ((begin + i)->lp_off > (begin + curr)->lp_off)) {
prev = curr;
curr = link_order[curr];
}
link_order[prev] = i;
link_order[i] = curr;
}
// arrange the link to array
curr = head;
for (i = 0; i < real_row_cnt; i++) {
real_order[i] = curr;
curr = link_order[curr];
}
if (curr != end) {
printf("ERROR!!! pre_convert_real_order error...!!!\n");
ASSERT(0);
return;
}
}
// maybe some itemid is not valid
uint16 HeapPageCalcRealRowCnt (char *buf) {
HeapPageHeaderData *page = (HeapPageHeaderData *)buf;
uint16 cnt = 0;
uint16 i;
uint16 row_cnt = (page->pd_lower - GetPageHeaderSize(page)) / sizeof(ItemIdData);
for (i = 0; i < row_cnt; i++) {
if (ItemIdIsNormal(GET_ITEMID_BY_IDX(buf, i))) {
cnt++;
}
}
return cnt;
}
// to find all row size are diffs in MIN_DIFF_SIZE byts.
bool CompressConvertCheck(char *buf, int16 **real_order, uint16 *max_row_len, uint16 *min_row_len, uint16 *real_row_cnt) {
HeapPageHeaderData *page = (HeapPageHeaderData *)buf;
uint16 row_cnt = (page->pd_lower - GetPageHeaderSize(page)) / sizeof(ItemIdData);
int16 i, row_size;
ItemIdData *ptr = NULL;
uint16 up = page->pd_special;
uint16 min_size = GS_INVALID_ID16;
uint16 max_size = 0;
errno_t ret;
if (page->pd_lower < GetPageHeaderSize(page) || (page->pd_lower > page->pd_upper)) {
return false;
}
uint16 normal_row_cnt = HeapPageCalcRealRowCnt(buf);
if (normal_row_cnt < MIN_CONVERT_CNT) { // no need convert
return false;
}
// to store the real tuple order.
/*
--------------------------|--------------------------
xxxxxxxxxxxxxxxxxxxxxxxxxx|xxxxxxxxxxxxxxxxxxxxxxxxxx
--------------------------|--------------------------
*/
// the first part is real array order, and the second part is link.
*real_order = (int16 *)palloc(sizeof(uint16) * row_cnt * 2);
if (*real_order == NULL) {
printf("zfunc compress file");
return false;
}
ret = memset_sp(*real_order, sizeof(uint16) * row_cnt * 2, 0, sizeof(uint16) * row_cnt * 2);
securec_check(ret, "", "");
// order the ItemIds by tuple_offset order.
CompressConvertItemRealOrder(buf, *real_order, normal_row_cnt);
// do the check, to check all size of tuples.
for (i = normal_row_cnt - 1; i >= 0; i--) {
ptr = GET_ITEMID_BY_IDX(buf, ((*real_order)[i]));
row_size = up - ptr->lp_off;
if (row_size < MIN_CONVERT_CNT * 2) {
return false;
}
min_size = (row_size < min_size) ? row_size : min_size;
max_size = (row_size > max_size) ? row_size : max_size;
if ((max_size - min_size) > MIN_DIFF_SIZE) { // no need convert
return false;
}
up = ptr->lp_off;
}
// get the min row common size.
*max_row_len = max_size;
*min_row_len = min_size;
*real_row_cnt = normal_row_cnt;
return true;
}
bool CompressConvertOnePage(char *buf, char *aux_buf, bool diff_convert) {
uint16 max_row_len = 0;
uint16 min_row_len = 0;
int16 *real_order = NULL; // itemids are not in order sometimes. we must find the real
uint16 real_row_cnt = 0;
if (!CompressConvertCheck(buf, &real_order, &max_row_len, &min_row_len, &real_row_cnt)) {
FreePointer((void*)real_order);
return false;
}
CompressConvertRows(buf, aux_buf, real_order, max_row_len, real_row_cnt);
CompressConvertItemIds(buf, aux_buf);
if (diff_convert) {
cprs_diff_convert_rows(buf, ((HeapPageHeaderData *)buf)->pd_upper, min_row_len, real_row_cnt);
cprs_diff_convert_rows(buf, GetPageHeaderSize(buf), sizeof(ItemIdData),
(((HeapPageHeaderData *)buf)->pd_lower - GetPageHeaderSize(buf)) / sizeof(ItemIdData));
}
FreePointer((void*)real_order);
return true;
}
void CompressPagePrepareConvert(char *src, bool diff_convert, bool *real_ByteConvert)
{
char *aux_buf = NULL;
errno_t rc;
aux_buf = (char *)palloc(BLCKSZ);
if (aux_buf == NULL) {
// add log
return;
}
rc = memset_sp(aux_buf, BLCKSZ, 0, BLCKSZ);
securec_check(rc, "", "");
// do convert
*real_ByteConvert = false;
if (CompressConvertOnePage(src, aux_buf, diff_convert)) {
*real_ByteConvert = true;
}
FreePointer((void*)aux_buf);
}
/**
* CompressPageBufferBound()
* -- Get the destination buffer boundary to compress one page.
* Return needed destination buffer size for compress one page or
* -1 for unrecognized compression algorithm
*/
int CompressPageBufferBound(const char* page, uint8 algorithm)
{
switch (algorithm) {
case COMPRESS_ALGORITHM_PGLZ:
return BLCKSZ + 4;
case COMPRESS_ALGORITHM_ZSTD:
return ZSTD_compressBound(BLCKSZ - GetPageHeaderSize(page));
default:
return -1;
}
}
int CompressPage(const char* src, char* dst, int dst_size, RelFileCompressOption option)
{
if (PageIs8BXidHeapVersion(src)) {
return TemplateCompressPage<true>(src, dst, dst_size, option);
} else {
return TemplateCompressPage<false>(src, dst, dst_size, option);
}
}
int DecompressPage(const char* src, char* dst, uint8 algorithm)
{
if (PageIs8BXidHeapVersion(src)) {
return TemplateDecompressPage<true>(src, dst, algorithm);
} else {
return TemplateDecompressPage<false>(src, dst, algorithm);
}
}
inline size_t GetSizeOfHeadData(bool heapPageData)
{
if (heapPageData) {
return SizeOfHeapPageHeaderData;
} else {
return SizeOfPageHeaderData;
}
}
/**
* CompressPage() -- Compress one page.
*
* Only the parts other than the page header will be compressed. The
* compressed data is rounded by chunck_size, The insufficient part is
* filled with zero. Compression needs to be able to save at least one
* chunk of space, otherwise it fail.
* This function returen the size of compressed data or
* -1 for compression fail
* COMPRESS_UNSUPPORTED_ERROR for unrecognized compression algorithm
*/
template <bool heapPageData>
int TemplateCompressPage(const char* src, char* dst, int dst_size, RelFileCompressOption option)
{
int compressed_size;
int8 level = option.compressLevelSymbol ? option.compressLevel : -option.compressLevel;
size_t sizeOfHeaderData = GetSizeOfHeadData(heapPageData);
char* src_copy = NULL;
bool real_ByteConvert = false;
errno_t rc;
if (option.byteConvert) {
// copy and maybe change it
src_copy = (char*)palloc(BLCKSZ);
if (src_copy == NULL) {
// add log
return -1;
}
rc = memcpy_s(src_copy, BLCKSZ, src, BLCKSZ);
securec_check(rc, "", "");
CompressPagePrepareConvert(src_copy, option.diffConvert, &real_ByteConvert); /* preprocess convert src */
}
char* data = GetPageCompressedData(dst, heapPageData);
switch (option.compressAlgorithm) {
case COMPRESS_ALGORITHM_PGLZ:
if (real_ByteConvert) {
compressed_size = lz_compress(src_copy + sizeOfHeaderData, BLCKSZ - sizeOfHeaderData, data);
} else {
compressed_size = lz_compress(src + sizeOfHeaderData, BLCKSZ - sizeOfHeaderData, data);
}
break;
case COMPRESS_ALGORITHM_ZSTD: {
if (level == 0 || level < MIN_ZSTD_COMPRESSION_LEVEL || level > MAX_ZSTD_COMPRESSION_LEVEL) {
level = DEFAULT_ZSTD_COMPRESSION_LEVEL;
}
if (real_ByteConvert) {
compressed_size =
ZSTD_compress(data, dst_size, src_copy + sizeOfHeaderData, BLCKSZ - sizeOfHeaderData, level);
} else {
compressed_size =
ZSTD_compress(data, dst_size, src + sizeOfHeaderData, BLCKSZ - sizeOfHeaderData, level);
}
if (ZSTD_isError(compressed_size)) {
FreePointer((void*)src_copy);
return -1;
}
break;
}
default:
FreePointer((void*)src_copy);
return COMPRESS_UNSUPPORTED_ERROR;
}
if (compressed_size < 0) {
FreePointer((void*)src_copy);
return -1;
}
if (heapPageData) {
HeapPageCompressData* pcdptr = ((HeapPageCompressData*)dst);
rc = memcpy_s(pcdptr->page_header, sizeOfHeaderData, src, sizeOfHeaderData);
securec_check(rc, "", "");
pcdptr->size = compressed_size;
pcdptr->byte_convert = real_ByteConvert;
pcdptr->diff_convert = option.diffConvert;
} else {
PageCompressData* pcdptr = ((PageCompressData*)dst);
rc = memcpy_s(pcdptr->page_header, sizeOfHeaderData, src, sizeOfHeaderData);
securec_check(rc, "", "");
pcdptr->size = compressed_size;
pcdptr->byte_convert = real_ByteConvert;
pcdptr->diff_convert = option.diffConvert;
}
FreePointer((void*)src_copy);
return SIZE_OF_PAGE_COMPRESS_DATA_HEADER_DATA(heapPageData) + compressed_size;
}
/*======================================================================================*/
#define DECOMPRESS ""
void DecompressDeconvertRows(char *buf, char *aux_buf, int16 *real_order, uint16 max_row_len, uint16 real_row_cnt) {
errno_t ret;
HeapPageHeaderData *page = (HeapPageHeaderData *)buf;
uint16 row_cnt = real_row_cnt;
uint32 total_size = page->pd_special - page->pd_upper;
char *copy_begin = buf + page->pd_upper;
char *row;
uint16 i, j, k, cur, up, row_size;
ret = memset_sp(aux_buf, BLCKSZ, 0, BLCKSZ);
securec_check(ret, "", "");
for (i = 0, k = 0; i < max_row_len; i++) {
for (j = 0; j < row_cnt; j++) {
up = (j == (row_cnt - 1)) ? page->pd_special : GET_ITEMID_BY_IDX(buf, (real_order[j + 1]))->lp_off;
cur = GET_ITEMID_BY_IDX(buf, (real_order[j]))->lp_off;
row_size = up - cur;
row = aux_buf + cur;
if (i < row_size) {
row[i] = copy_begin[k++]; // this part is reshaped
}
}
}
if (k != total_size) {
printf("ERROR!!! pg_deconvert_rows error...!!!\n");
ASSERT(0);
return;
}
// cp aux_buf to page_buf
ret = memcpy_sp(copy_begin, total_size, aux_buf + page->pd_upper, total_size);
securec_check(ret, "", "");
return ;
}
void DecompressDeconvertItemIds(char *buf, char *aux_buf) {
errno_t ret;
HeapPageHeaderData *page = (HeapPageHeaderData *)buf;
uint16 row_cnt = (page->pd_lower - GetPageHeaderSize(page)) / sizeof(ItemIdData);
uint32 total_size = row_cnt * sizeof(ItemIdData);
char* copy_begin = buf + GetPageHeaderSize(page);
uint16 i, j, k;
// clear aux_buf
ret = memset_sp(aux_buf, BLCKSZ, 0, BLCKSZ);
securec_check(ret, "", "");
for (i = 0, k = 0; i < sizeof(ItemIdData); i++) {
for (j = 0; j < row_cnt; j++) {
aux_buf[j * sizeof(ItemIdData) + i] = copy_begin[k++];
}
}
// cp aux_buf to page_buf
ret = memcpy_sp(copy_begin, total_size, aux_buf, total_size);
securec_check(ret, "", "");
return ;
}
void DecompressDeconvertOnePage(char *buf, char *aux_buf, bool diff_convert) {
uint16 max_row_len = 0;
uint16 min_row_len = 0;
int16 *real_order = NULL; // itemids are not in order sometimes. we must find the real
uint16 real_row_cnt = 0;
if (diff_convert) {
cprs_diff_deconvert_rows(buf, GetPageHeaderSize(buf), sizeof(ItemIdData),
(((HeapPageHeaderData *)buf)->pd_lower - GetPageHeaderSize(buf)) / sizeof(ItemIdData));
}
// =======firstly, arrange the itemids.
DecompressDeconvertItemIds(buf, aux_buf);
if (!CompressConvertCheck(buf, &real_order, &max_row_len, &min_row_len, &real_row_cnt)) {
FreePointer((void*)real_order);
ASSERT(0);
return ;
}
// =======and last, the tuples
if (diff_convert) {
cprs_diff_deconvert_rows(buf, ((HeapPageHeaderData *)buf)->pd_upper, min_row_len, real_row_cnt);
}
DecompressDeconvertRows(buf, aux_buf, real_order, max_row_len, real_row_cnt);
FreePointer((void*)real_order);
}
void DecompressPageDeconvert(char *src, bool diff_convert)
{
char *aux_buf = NULL;
errno_t rc;
aux_buf = (char *)palloc(BLCKSZ);
if (aux_buf == NULL) {
// add log
return;
}
rc = memset_s(aux_buf, BLCKSZ, 0, BLCKSZ);
securec_check(rc, "", "");
// do convert
DecompressDeconvertOnePage(src, aux_buf, diff_convert);
FreePointer((void*)aux_buf);
}
/**
* DecompressPage() -- Decompress one compressed page.
* return size of decompressed page which should be BLCKSZ or
* -1 for decompress error
* -2 for unrecognized compression algorithm
*
* note:The size of dst must be greater than or equal to BLCKSZ.
*/
template <bool heapPageData>
int TemplateDecompressPage(const char* src, char* dst, uint8 algorithm)
{
int decompressed_size;
char* data;
uint32 size;
bool byte_convert, diff_convert;
size_t headerSize = GetSizeOfHeadData(heapPageData);
int rc = memcpy_s(dst, headerSize, src, headerSize);
securec_check(rc, "", "");
if (heapPageData) {
data = ((HeapPageCompressData*)src)->data;
size = ((HeapPageCompressData*)src)->size;
byte_convert = ((HeapPageCompressData*)src)->byte_convert;
diff_convert = ((HeapPageCompressData*)src)->diff_convert;
} else {
data = ((PageCompressData*)src)->data;
size = ((PageCompressData*)src)->size;
byte_convert = ((PageCompressData*)src)->byte_convert;
diff_convert = ((PageCompressData*)src)->diff_convert;
}
switch (algorithm) {
case COMPRESS_ALGORITHM_PGLZ:
decompressed_size = lz_decompress(data, size, dst + headerSize, BLCKSZ - headerSize, false);
break;
case COMPRESS_ALGORITHM_ZSTD:
decompressed_size = ZSTD_decompress(dst + headerSize, BLCKSZ - headerSize, data, size);
if (ZSTD_isError(decompressed_size)) {
return -1;
}
break;
default:
return COMPRESS_UNSUPPORTED_ERROR;
break;
}
if (byte_convert) {
DecompressPageDeconvert(dst, diff_convert);
}
return headerSize + decompressed_size;
}
#endif
/**
* pc_mmap() -- create memory map for page compress file's address area.
*
*/
PageCompressHeader* pc_mmap(int fd, int chunk_size, bool readonly)
{
int pc_memory_map_size = SIZE_OF_PAGE_COMPRESS_ADDR_FILE(chunk_size);
return pc_mmap_real_size(fd, pc_memory_map_size, readonly);
}
/**
* pc_mmap_real_size() -- create memory map for page compress file's address area.
*
*/
extern PageCompressHeader* pc_mmap_real_size(int fd, int pc_memory_map_size, bool readonly)
{
PageCompressHeader* map = NULL;
int file_size = lseek(fd, 0, SEEK_END);
if (file_size != pc_memory_map_size) {
if (ftruncate(fd, pc_memory_map_size) != 0) {
return (PageCompressHeader*) MAP_FAILED;
}
}
if (readonly) {
map = (PageCompressHeader*) mmap(NULL, pc_memory_map_size, PROT_READ, MAP_SHARED, fd, 0);
} else {
map = (PageCompressHeader*) mmap(NULL, pc_memory_map_size, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0);
}
return map;
}
/**
* pc_munmap() -- release memory map of page compress file.
*
*/
int pc_munmap(PageCompressHeader *map)
{
return munmap(map, SIZE_OF_PAGE_COMPRESS_ADDR_FILE(map->chunk_size));
}
/**
* pc_msync() -- sync memory map of page compress file.
*
*/
int pc_msync(PageCompressHeader *map)
{
#ifndef FRONTEND
if (!u_sess->attr.attr_storage.enableFsync) {
return 0;
}
#endif
return msync(map, SIZE_OF_PAGE_COMPRESS_ADDR_FILE(map->chunk_size), MS_SYNC);
}
uint32 AddrChecksum32(BlockNumber blockNumber, const PageCompressAddr* pageCompressAddr)
{
#define UINT_LEN sizeof(uint32)
uint32 checkSum = 0;
char* addr = ((char*) pageCompressAddr) + UINT_LEN;
size_t len = sizeof(PageCompressAddr) - UINT_LEN;
do {
if (len >= UINT_LEN) {
checkSum += *((uint32*) addr);
addr += UINT_LEN;
len -= UINT_LEN;
} else {
char finalNum[UINT_LEN] = {0};
size_t i = 0;
for (; i < len; ++i) {
finalNum[i] = addr[i];
}
checkSum += *((uint32*) finalNum);
len -= i;
}
} while (len);
return checkSum;
}
#endif

View File

@ -37,7 +37,7 @@
extern int StandbyReadCUforPrimary(uint32 spcnode, uint32 dbnode, uint32 relnode, int32 colid, uint64 offset,
int32 size, uint64 lsn, bytea** cudata);
extern int StandbyReadPageforPrimary(uint32 spcnode, uint32 dbnode, uint32 relnode, int16 bucketnode, int32 forknum, uint32 blocknum,
extern int StandbyReadPageforPrimary(uint32 spcnode, uint32 dbnode, uint32 relnode, int16 bucketnode, uint16 opt, int32 forknum, uint32 blocknum,
uint32 blocksize, uint64 lsn, bytea** pagedata);
#endif /* REMOTE_ADAPTER_H */

View File

@ -42,6 +42,7 @@
#include <dirent.h>
#include "utils/hsearch.h"
#include "storage/smgr/relfilenode.h"
#include "storage/page_compression.h"
#include "postmaster/aiocompleter.h"
/*
@ -182,6 +183,10 @@ extern int data_sync_elevel(int elevel);
extern bool FdRefcntIsZero(SMgrRelation reln, ForkNumber forkNum);
extern FileExistStatus CheckFileExists(const char* path);
/* Page compression support routines */
extern void SetupPageCompressMemoryMap(File file, RelFileNode node, const RelFileNodeForkNum& relFileNodeForkNum);
extern PageCompressHeader *GetPageCompressMemoryMap(File file, uint32 chunk_size);
/* Filename components for OpenTemporaryFile */
// Note that this macro must be the same to macro in initdb.cpp
// If you change it, you must also change initdb.cpp

View File

@ -45,6 +45,9 @@ typedef int ForkNumber;
#define VISIBILITYMAP_FORKNUM 2
#define BCM_FORKNUM 3
#define INIT_FORKNUM 4
// used for data file cache, you can modify than as you like
#define PCA_FORKNUM 5
#define PCD_FORKNUM 6
/*
* NOTE: if you add a new fork, change MAX_FORKNUM below and update the
@ -97,9 +100,18 @@ typedef struct RelFileNode {
Oid spcNode; /* tablespace */
Oid dbNode; /* database */
Oid relNode; /* relation */
int4 bucketNode; /* bucketid */
int2 bucketNode; /* bucketid */
uint2 opt;
} RelFileNode;
typedef struct RelFileNodeV2 {
Oid spcNode; /* tablespace */
Oid dbNode; /* database */
Oid relNode; /* relation */
int4 bucketNode; /* bucketid */
} RelFileNodeV2;
#define IsSegmentFileNode(rnode) ((rnode).bucketNode > InvalidBktId)
#define IsHeapFileNode(rnode) (!IsSegmentFileNode(rnode))
#define IsSegmentPhysicalRelNode(rNode) (IsSegmentFileNode(rNode) && (rNode).relNode <= 5)
@ -130,6 +142,14 @@ typedef struct RelFileNodeOld
(relFileNode).bucketNode = (bucketid); \
} while(0)
#define RelFileNodeV2Copy(relFileNodeV2, relFileNode) \
do { \
(relFileNodeV2).spcNode = (relFileNode).spcNode; \
(relFileNodeV2).dbNode = (relFileNode).dbNode; \
(relFileNodeV2).relNode = (relFileNode).relNode; \
(relFileNodeV2).bucketNode = (relFileNode).bucketNode; \
} while (0)
/*This struct used for remove duplicated file list where we scan part of BCM files*/
typedef struct RelFileNodeKey {
RelFileNode relfilenode; /*relfilenode*/

View File

@ -17,6 +17,7 @@
#include <dirent.h>
#include "utils/resowner.h"
#include "storage/page_compression.h"
#include "storage/smgr/relfilenode.h"
typedef struct vfd {
@ -34,6 +35,8 @@ typedef struct vfd {
int fileFlags; /* open(2) flags for (re)opening the file */
int fileMode; /* mode to pass to open(2) */
RelFileNodeForkNum fileNode; /* current logical file node */
bool with_pcmap; /* is page compression relation */
PageCompressHeader *pcmap; /* memory map of page compression address file */
} Vfd;
#endif /* VFD_H */

View File

@ -221,6 +221,12 @@ public:
template <MemType mem_type>
static int gs_posix_memalign(void** memptr, Size alignment, Size sz, bool needProtect);
template <MemType mem_type>
static bool gs_memprot_reserve(Size sz, bool needProtect);
template <MemType mem_type>
static void gs_memprot_release(Size sz);
};
extern int alloc_trunk_size(int width);

View File

@ -588,6 +588,7 @@ extern Datum pg_read_file(PG_FUNCTION_ARGS);
extern Datum pg_read_file_all(PG_FUNCTION_ARGS);
extern Datum pg_read_binary_file(PG_FUNCTION_ARGS);
extern Datum pg_read_binary_file_all(PG_FUNCTION_ARGS);
extern Datum pg_read_binary_file_blocks(PG_FUNCTION_ARGS);
extern Datum pg_ls_dir(PG_FUNCTION_ARGS);
extern Datum pg_stat_file_recursive(PG_FUNCTION_ARGS);

View File

@ -64,7 +64,7 @@ extern void PartitionCacheInitializePhase3(void);
* Routine to create a partcache entry for an about-to-be-created relation
*/
Partition PartitionBuildLocalPartition(const char *relname, Oid partid, Oid partfilenode, Oid parttablespace,
StorageType storage_type);
StorageType storage_type, Datum reloptions);
/*
* Routines for backend startup
*/

View File

@ -125,6 +125,11 @@ extern const PGLZ_Strategy* const PGLZ_strategy_always;
* ----------
*/
extern bool pglz_compress(const char* source, int32 slen, PGLZ_Header* dest, const PGLZ_Strategy* strategy);
extern void pglz_decompress(const PGLZ_Header* source, char* dest);
extern int32 lz_compress(const char* source, int32 slen, char* dest);
extern int32 lz_decompress(const char* source, int32 slen, char* dest, int32 rawsize, bool check_complete);
#endif /* _PG_LZCOMPRESS_H_ */

View File

@ -94,6 +94,16 @@ typedef struct RelationBucketKey
Oid *bucketKeyType; /*the data type of partition key*/
}RelationBucketKey;
/* page compress related reloptions. */
typedef struct PageCompressOpts {
int compressType; /* compress algorithm */
int compressLevel; /* compress level */
uint32 compressChunkSize; /* chunk size of compressed data */
uint32 compressPreallocChunks; /* prealloced chunks to store compressed data */
bool compressByteConvert; /* byte row-coll-convert */
bool compressDiffConvert; /* make difference convert */
} PageCompressOpts;
/* describe commit sequence number of object in pg_object */
typedef struct ObjectCSN
{
@ -305,6 +315,12 @@ typedef enum RedisRelAction {
REDIS_REL_RESET_CTID
} RedisHtlAction;
/* PageCompressOpts->compressType values */
typedef enum CompressTypeOption {
COMPRESS_TYPE_NONE = 0, COMPRESS_TYPE_PGLZ = 1, COMPRESS_TYPE_ZSTD = 2
} CompressTypeOption;
typedef struct StdRdOptions {
int32 vl_len_; /* varlena header (do not touch directly!) */
int fillfactor; /* page fill factor in percent (0..100) */
@ -370,6 +386,7 @@ typedef struct StdRdOptions {
char* encrypt_algo;
bool enable_tde; /* switch flag for table-level TDE encryption */
bool on_commit_delete_rows; /* global temp table */
PageCompressOpts compress; /* page compress related reloptions. */
} StdRdOptions;
#define HEAP_MIN_FILLFACTOR 10

View File

@ -636,6 +636,11 @@ extern void PartitionDecrementReferenceCount(Partition part);
((PARTTYPE_VALUE_PARTITIONED_RELATION == (relation)->rd_rel->parttype) && \
(RELKIND_RELATION == (relation)->rd_rel->relkind))
#define HEAP_IS_PARTITIONED(relation) \
((PARTTYPE_PARTITIONED_RELATION == (relation)->rd_rel->parttype || \
PARTTYPE_VALUE_PARTITIONED_RELATION == (relation)->rd_rel->parttype) && \
(RELKIND_RELATION == (relation)->rd_rel->relkind || RELKIND_INDEX == (relation)->rd_rel->relkind))
/*
* type bucketOid bucketKey meaning
* N INV INV relation has no bucket

View File

@ -107,7 +107,8 @@ extern void RelationCacheInitializePhase3(void);
*/
extern Relation RelationBuildLocalRelation(const char* relname, Oid relnamespace, TupleDesc tupDesc, Oid relid,
Oid relfilenode, Oid reltablespace, bool shared_relation, bool mapped_relation, char relpersistence, char relkind,
int8 row_compress, TableAmType tam_type, int8 relindexsplit = 0, StorageType storage_type = HEAP_DISK);
int8 row_compress, Datum reloptions, TableAmType tam_type, int8 relindexsplit = 0, StorageType storage_type = HEAP_DISK,
Oid accessMethodObjectId = 0);
/*
* Routine to manage assignment of new relfilenode to a relation

View File

@ -8,8 +8,6 @@ drop table if exists test_trigger_src_tbl;
NOTICE: table "test_trigger_src_tbl" does not exist, skipping
drop package if exists trigger_test;
NOTICE: package trigger_test() does not exist, skipping
drop table if exists test1;
NOTICE: table "test1" does not exist, skipping
drop table if exists dams_ci.test1;
ERROR: schema "dams_ci" does not exist
drop table if exists dams_ci.DB_LOG;

View File

@ -0,0 +1,183 @@
create schema normal_test;
CREATE TABLE normal_test.tbl_pc(id int, c1 text) WITH(compresstype=1);
\d+ normal_test.tbl_pc
Table "normal_test.tbl_pc"
Column | Type | Modifiers | Storage | Stats target | Description
--------+---------+-----------+----------+--------------+-------------
id | integer | | plain | |
c1 | text | | extended | |
Has OIDs: no
Options: orientation=row, compresstype=1
INSERT INTO normal_test.tbl_pc SELECT id, id::text FROM generate_series(1,1000) id;
select count(*) from normal_test.tbl_pc;
count
-------
1000
(1 row)
select count(*) from normal_test.tbl_pc where id < 100;
count
-------
99
(1 row)
checkpoint;
vacuum normal_test.tbl_pc;
select count(*) from normal_test.tbl_pc;
count
-------
1000
(1 row)
select count(*) from normal_test.tbl_pc where id < 100;
count
-------
99
(1 row)
-- normal index
create index on normal_test.tbl_pc(id) WITH (compresstype=2,compress_chunk_size=1024);
alter index normal_test.tbl_pc_id_idx set (compresstype=1); --failed
ERROR: change compresstype OPTION is not supported
alter index normal_test.tbl_pc_id_idx set (compress_chunk_size=2048); --failed
ERROR: change compress_chunk_size OPTION is not supported
alter index normal_test.tbl_pc_id_idx set (compress_prealloc_chunks=2); --success
alter index normal_test.tbl_pc_id_idx set (compress_level=2); --success
set enable_seqscan = off;
set enable_bitmapscan = off;
select count(*) from normal_test.tbl_pc;
count
-------
1000
(1 row)
CREATE TABLE normal_test.tbl_partition(id int) WITH(compresstype=2,compress_chunk_size=1024) partition by range(id)
(
partition p0 values less than(5000),
partition p1 values less than(10000),
partition p2 values less than(20000),
partition p3 values less than(30000),
partition p4 values less than(40000),
partition p5 values less than(50000),
partition p6 values less than(60000),
partition p7 values less than(70000)
);
insert into normal_test.tbl_partition select generate_series(1,65000);
select count(*) from normal_test.tbl_partition;
count
-------
65000
(1 row)
checkpoint;
vacuum normal_test.tbl_partition;
select count(*) from normal_test.tbl_partition;
count
-------
65000
(1 row)
-- exchange
select relname, reloptions from pg_partition where parentid in (Select relfilenode from pg_class where relname like 'tbl_partition') order by relname;
relname | reloptions
---------------+----------------------------------------------------------------------------
p0 | {orientation=row,compresstype=2,compress_chunk_size=1024}
p1 | {orientation=row,compresstype=2,compress_chunk_size=1024}
p2 | {orientation=row,compresstype=2,compress_chunk_size=1024}
p3 | {orientation=row,compresstype=2,compress_chunk_size=1024}
p4 | {orientation=row,compresstype=2,compress_chunk_size=1024}
p5 | {orientation=row,compresstype=2,compress_chunk_size=1024}
p6 | {orientation=row,compresstype=2,compress_chunk_size=1024}
p7 | {orientation=row,compresstype=2,compress_chunk_size=1024}
tbl_partition | {orientation=row,compresstype=2,compress_chunk_size=1024,wait_clean_gpi=n}
(9 rows)
create table normal_test.exchange_table(id int) WITH(compresstype=2,compress_chunk_size=1024);
ALTER TABLE normal_test.tbl_partition EXCHANGE PARTITION FOR(2500) WITH TABLE normal_test.exchange_table;
select count(*) from normal_test.tbl_partition;
count
-------
60001
(1 row)
-- spilit
ALTER TABLE normal_test.tbl_partition SPLIT PARTITION p1 AT (7500) INTO (PARTITION p10, PARTITION p11);
select relname, reloptions from pg_partition where parentid in (Select relfilenode from pg_class where relname like 'tbl_partition') order by relname;
relname | reloptions
---------------+----------------------------------------------------------------------------
p0 | {orientation=row,compresstype=2,compress_chunk_size=1024}
p10 | {orientation=row,compresstype=2,compress_chunk_size=1024,wait_clean_gpi=y}
p11 | {orientation=row,compresstype=2,compress_chunk_size=1024,wait_clean_gpi=y}
p2 | {orientation=row,compresstype=2,compress_chunk_size=1024}
p3 | {orientation=row,compresstype=2,compress_chunk_size=1024}
p4 | {orientation=row,compresstype=2,compress_chunk_size=1024}
p5 | {orientation=row,compresstype=2,compress_chunk_size=1024}
p6 | {orientation=row,compresstype=2,compress_chunk_size=1024}
p7 | {orientation=row,compresstype=2,compress_chunk_size=1024}
tbl_partition | {orientation=row,compresstype=2,compress_chunk_size=1024,wait_clean_gpi=y}
(10 rows)
create index on normal_test.tbl_partition(id) local WITH (compresstype=2,compress_chunk_size=1024);
\d+ normal_test.tbl_partition
Table "normal_test.tbl_partition"
Column | Type | Modifiers | Storage | Stats target | Description
--------+---------+-----------+---------+--------------+-------------
id | integer | | plain | |
Indexes:
"tbl_partition_id_idx" btree (id) LOCAL(PARTITION p0_id_idx, PARTITION p10_id_idx, PARTITION p11_id_idx, PARTITION p2_id_idx, PARTITION p3_id_idx, PARTITION p4_id_idx, PARTITION p5_id_idx, PARTITION p6_id_idx, PARTITION p7_id_idx) WITH (compresstype=2, compress_chunk_size=1024) TABLESPACE pg_default
--?.*
--?.*
Has OIDs: no
Options: orientation=row, compresstype=2, compress_chunk_size=1024
select relname, reloptions from pg_partition where parentid in (Select relfilenode from pg_class where relname like 'tbl_partition_id_idx') order by relname;
relname | reloptions
------------+-------------------------------------------
p0_id_idx | {compresstype=2,compress_chunk_size=1024}
p10_id_idx | {compresstype=2,compress_chunk_size=1024}
p11_id_idx | {compresstype=2,compress_chunk_size=1024}
p2_id_idx | {compresstype=2,compress_chunk_size=1024}
p3_id_idx | {compresstype=2,compress_chunk_size=1024}
p4_id_idx | {compresstype=2,compress_chunk_size=1024}
p5_id_idx | {compresstype=2,compress_chunk_size=1024}
p6_id_idx | {compresstype=2,compress_chunk_size=1024}
p7_id_idx | {compresstype=2,compress_chunk_size=1024}
(9 rows)
-- unsupport
alter index normal_test.tbl_partition_id_idx set (compresstype=1);
ERROR: change compresstype OPTION is not supported
alter index normal_test.tbl_partition_id_idx set (compress_chunk_size=2048);
ERROR: change compress_chunk_size OPTION is not supported
alter index normal_test.tbl_partition_id_idx set (compress_prealloc_chunks=2);
ERROR: change partition compress_prealloc_chunks OPTION is not supported
-- support
alter table normal_test.tbl_pc set (compress_prealloc_chunks=2);
-- new testcase
set search_path=normal_test;
\d+
--?.*
--?.*
--?.*
--?.*
--?.*
--?.*
(3 rows)
reset search_path;
CREATE TABLE normal_test.pre_handle(id int) WITH(compresstype=2, compress_chunk_size=512, compress_byte_convert=true, compress_diff_convert=true);
insert into normal_test.pre_handle select generate_series(1,1000);
checkpoint;
select count(*) from normal_test.pre_handle;
count
-------
1000
(1 row)
drop schema normal_test cascade;
NOTICE: drop cascades to 4 other objects
DETAIL: drop cascades to table normal_test.tbl_pc
drop cascades to table normal_test.tbl_partition
drop cascades to table normal_test.exchange_table
drop cascades to table normal_test.pre_handle

View File

@ -0,0 +1,79 @@
-- row table pg_table_size
create schema table_size_schema;
CREATE TABLE table_size_schema.normal_table(id int);
CREATE TABLE table_size_schema.compressed_table_1024(id int) WITH(compresstype=2, compress_chunk_size=1024);
CREATE TABLE table_size_schema.compressed_table_2048(id int) WITH(compresstype=2, compress_chunk_size=2048);
CREATE TABLE table_size_schema.compressed_table_4096(id int) WITH(compresstype=2, compress_chunk_size=4096);
select pg_table_size('table_size_schema.normal_table');
pg_table_size
---------------
0
(1 row)
select pg_table_size('table_size_schema.compressed_table_1024');
pg_table_size
---------------
5592896
(1 row)
select pg_table_size('table_size_schema.compressed_table_2048');
pg_table_size
---------------
3196168
(1 row)
select pg_table_size('table_size_schema.compressed_table_4096');
pg_table_size
---------------
2097664
(1 row)
drop schema table_size_schema cascade;
NOTICE: drop cascades to 4 other objects
DETAIL: drop cascades to table table_size_schema.normal_table
drop cascades to table table_size_schema.compressed_table_1024
drop cascades to table table_size_schema.compressed_table_2048
drop cascades to table table_size_schema.compressed_table_4096
-- partition table pg_table_size
create schema partition_table_size_schema;
create table partition_table_size_schema.normal_partition(INV_DATE_SK integer)
partition by range(inv_date_sk)(partition p0 values less than(5000),partition p1 values less than(10000));
create table partition_table_size_schema.compressed_partition_1024(INV_DATE_SK integer)
WITH(compresstype=2, compress_chunk_size=1024)
partition by range(inv_date_sk)(partition p0 values less than(5000),partition p1 values less than(10000));
create table partition_table_size_schema.compressed_partition_2048(INV_DATE_SK integer)
WITH(compresstype=2, compress_chunk_size=2048)
partition by range(inv_date_sk)(partition p0 values less than(5000),partition p1 values less than(10000));
create table partition_table_size_schema.compressed_partition_4096(INV_DATE_SK integer)
WITH(compresstype=2, compress_chunk_size=4096)
partition by range(inv_date_sk)(partition p0 values less than(5000),partition p1 values less than(10000));
select pg_table_size('partition_table_size_schema.normal_partition');
pg_table_size
---------------
0
(1 row)
select pg_table_size('partition_table_size_schema.compressed_partition_1024');
pg_table_size
---------------
11185792
(1 row)
select pg_table_size('partition_table_size_schema.compressed_partition_2048');
pg_table_size
---------------
6392336
(1 row)
select pg_table_size('partition_table_size_schema.compressed_partition_4096');
pg_table_size
---------------
4195328
(1 row)
drop schema partition_table_size_schema cascade;
NOTICE: drop cascades to 4 other objects
DETAIL: drop cascades to table partition_table_size_schema.normal_partition
drop cascades to table partition_table_size_schema.compressed_partition_1024
drop cascades to table partition_table_size_schema.compressed_partition_2048
drop cascades to table partition_table_size_schema.compressed_partition_4096

View File

@ -0,0 +1,32 @@
CREATE TABLESPACE normal_tablespace RELATIVE LOCATION 'normal_tablespace';
SELECT pg_tablespace_size('normal_tablespace');
pg_tablespace_size
--------------------
4096
(1 row)
CREATE TABLE normal_table(id int) TABLESPACE normal_tablespace;
SELECT pg_tablespace_size('normal_tablespace');
pg_tablespace_size
--------------------
8192
(1 row)
CREATE TABLESPACE compress_tablespace RELATIVE LOCATION 'compress_tablespace';
SELECT pg_tablespace_size('compress_tablespace');
pg_tablespace_size
--------------------
4096
(1 row)
CREATE TABLE compressed_table_1024(id int) WITH(compresstype=2, compress_chunk_size=1024) TABLESPACE compress_tablespace;
SELECT pg_tablespace_size('compress_tablespace');
pg_tablespace_size
--------------------
5601088
(1 row)
DROP TABLE normal_table;
DROP TABLESPACE normal_tablespace;
DROP TABLE compressed_table_1024;
DROP TABLESPACE compress_tablespace;

View File

@ -0,0 +1,66 @@
create schema unspported_feature;
-- unspport compressType: 3
CREATE TABLE unspported_feature.compressed_table_1024(id int) WITH(compresstype=3, compress_chunk_size=1024);
ERROR: value 3 out of bounds for option "compresstype"
DETAIL: Valid values are between "0" and "2".
-- unspport compress_chunk_size: 2000
CREATE TABLE unspported_feature.compressed_table_1024(id int) WITH(compresstype=2, compress_chunk_size=2000);
ERROR: invalid compress_chunk_size 2000 , must be one of 512, 1024, 2048 or 4096 for compressed_table_1024
-- unspport compress_prealloc_chunks: -1
CREATE TABLE unspported_feature.compressed_table_1024(id int) WITH(compresstype=2, compress_prealloc_chunks=-1);
ERROR: value -1 out of bounds for option "compress_prealloc_chunks"
DETAIL: Valid values are between "0" and "7".
-- unspport compress_prealloc_chunks: 8
CREATE TABLE unspported_feature.compressed_table_1024(id int) WITH(compresstype=2, compress_prealloc_chunks=8);
ERROR: value 8 out of bounds for option "compress_prealloc_chunks"
DETAIL: Valid values are between "0" and "7".
-- unspport compress_level: 128
CREATE TABLE unspported_feature.compressed_table_1024(id int) WITH(compresstype=2, compress_level=128);
ERROR: value 128 out of bounds for option "compress_level"
DETAIL: Valid values are between "-31" and "31".
-- compresstype cant be used with column table
CREATE TABLE unspported_feature.compressed_table_1024(id int) WITH(ORIENTATION = 'column', compresstype=2);
ERROR: only row orientation table support compresstype/compress_chunk_size/compress_prealloc_chunks/compress_level.
-- compresstype cant be used with temp table
CREATE TEMP TABLE compressed_temp_table_1024(id int) WITH(compresstype=2);
ERROR: only row orientation table support compresstype/compress_chunk_size/compress_prealloc_chunks/compress_level.
-- compresstype cant be used with unlogged table
CREATE unlogged TABLE compressed_unlogged_table_1024(id int) WITH(compresstype=2);
ERROR: only row orientation table support compresstype/compress_chunk_size/compress_prealloc_chunks/compress_level.
-- use compress_prealloc_chunks\compress_chunk_size\compress_level without compresstype
CREATE TABLE unspported_feature.compressed_table_1024(id int) WITH(compress_prealloc_chunks=5);
ERROR: compress_chunk_size/compress_prealloc_chunks/compress_level should be used with compresstype.
CREATE TABLE unspported_feature.compressed_table_1024(id int) WITH(compress_chunk_size=1024);
ERROR: compress_chunk_size/compress_prealloc_chunks/compress_level should be used with compresstype.
CREATE TABLE unspported_feature.compressed_table_1024(id int) WITH(compress_level=5);
ERROR: compress_chunk_size/compress_prealloc_chunks/compress_level should be used with compresstype.
-- unspport exchange
CREATE TABLE unspported_feature.exchange_table(id int) WITH(compresstype=2);
CREATE TABLE unspported_feature.alter_table(id int) partition by range(id)
(
partition p0 values less than(5000),
partition p1 values less than(10000),
partition p2 values less than(20000),
partition p3 values less than(30000),
partition p4 values less than(40000),
partition p5 values less than(50000),
partition p6 values less than(60000),
partition p7 values less than(70000)
);
ALTER TABLE unspported_feature.alter_table EXCHANGE PARTITION FOR(2500) WITH TABLE unspported_feature.exchange_table;
ERROR: tables in ALTER TABLE EXCHANGE PARTITION must have the same type of compress
-- unspport alter compress_chunk_size
create TABLE unspported_feature.alter_table_option(id int) WITH(compresstype=2);
\d+ unspported_feature.alter_table_option
Table "unspported_feature.alter_table_option"
Column | Type | Modifiers | Storage | Stats target | Description
--------+---------+-----------+---------+--------------+-------------
id | integer | | plain | |
Has OIDs: no
Options: orientation=row, compresstype=2
ALTER TABLE unspported_feature.alter_table_option SET(compresstype=0); -- fail
ERROR: change compresstype OPTION is not supported
ALTER TABLE unspported_feature.alter_table_option SET(compress_chunk_size=2048); -- fail
ERROR: change compress_chunk_size OPTION is not supported
ALTER TABLE unspported_feature.alter_table_option SET(compress_level=2, compress_prealloc_chunks=0);

View File

@ -1,347 +1,347 @@
--
-- RULES TEST
--
--
-- Tables and rules for the view test
--
create table test1 (a int4, b int4);
create view tv1 as select * from test1;
create rule tv1_ins as on insert to tv1 do instead
insert into test1 values (new.a, new.b);
create rule tv1_upd as on update to tv1 do instead
update test1 set a = new.a, b = new.b
where a = old.a;
create rule tv1_del as on delete to tv1 do instead
delete from test1 where a = old.a;
-- insert values
insert into tv1 values (1, 11);
insert into tv1 values (2, 12);
select * from tv1;
a | b
---+----
1 | 11
2 | 12
(2 rows)
-- update values
update tv1 set a = 10 where b = 11;
update tv1 set a = 12 , b = 22 where b = 12;
select * from tv1;
a | b
----+----
10 | 11
12 | 22
(2 rows)
-- delete values
delete from tv1 where a = 10;
select * from tv1;
a | b
----+----
12 | 22
(1 row)
drop rule if exists tv1_ins on tv1;
drop rule if exists tv1_upd on tv1;
drop rule if exists tv1_del on tv1;
drop view if exists tv1;
drop table if exists test1;
--
-- Tables and rules for the constraint update/delete/insert test
--
create table ttsystem (sysname text, sysdesc text);
create table ttadmin (pname text, sysname text);
create table ttperon (pname text, pdesc text);
create table ttinterface (sysname text, ifname text);
create rule usys_ins as on insert to ttsystem do also (
insert into ttinterface values (new.sysname,'');
insert into ttadmin values ('',new.sysname);
);
create rule usys_del as on delete to ttsystem do also (
delete from ttinterface where sysname = old.sysname;
delete from ttadmin where sysname = old.sysname;
);
create rule usys_upd as on update to ttsystem do also (
update ttinterface set sysname = new.sysname
where sysname = old.sysname;
update ttadmin set sysname = new.sysname
where sysname = old.sysname
);
create rule upers_ins as on insert to ttperon do also (
insert into ttadmin values (new.pname,'');
);
create rule upers_del as on delete to ttperon do also
delete from ttadmin where pname = old.pname;
create rule upers_upd as on update to ttperon do also
update ttadmin set pname = new.pname where pname = old.pname;
-- test 1
insert into ttsystem values ('winxi', 'Linux Jan Wieck');
insert into ttsystem values ('notjw', 'Qu Yan');
insert into ttsystem values ('yuyan', 'Fileserver');
insert into ttinterface values ('winxi', 'dola');
insert into ttinterface values ('winxi', 'eth1');
insert into ttinterface values ('notjw', 'dola');
insert into ttinterface values ('yuyan', 'dola');
insert into ttperon values ('jw', 'Jan Wieck');
insert into ttperon values ('bm', 'Bruce Momjian');
insert into ttadmin values ('jw', 'winxi');
insert into ttadmin values ('jw', 'notjw');
insert into ttadmin values ('bm', 'yuyan');
select * from ttsystem;
sysname | sysdesc
---------+-----------------
winxi | Linux Jan Wieck
notjw | Qu Yan
yuyan | Fileserver
(3 rows)
select * from ttinterface;
sysname | ifname
---------+--------
winxi |
notjw |
yuyan |
winxi | dola
winxi | eth1
notjw | dola
yuyan | dola
(7 rows)
select * from ttperon;
pname | pdesc
-------+---------------
jw | Jan Wieck
bm | Bruce Momjian
(2 rows)
select * from ttadmin;
pname | sysname
-------+---------
| winxi
| notjw
| yuyan
jw |
bm |
jw | winxi
jw | notjw
bm | yuyan
(8 rows)
-- test 2
update ttsystem set sysname = 'pluto' where sysname = 'yuyan';
select * from ttinterface;
sysname | ifname
---------+--------
winxi |
notjw |
winxi | dola
winxi | eth1
notjw | dola
pluto |
pluto | dola
(7 rows)
select * from ttadmin;
pname | sysname
-------+---------
| winxi
| notjw
jw |
bm |
jw | winxi
jw | notjw
| pluto
bm | pluto
(8 rows)
update ttperon set pname = 'jwieck' where pdesc = 'Jan Wieck';
select * from ttadmin order by pname, sysname;
pname | sysname
--------+---------
bm | pluto
bm |
jwieck | notjw
jwieck | winxi
jwieck |
| notjw
| pluto
| winxi
(8 rows)
delete from ttsystem where sysname = 'winxi';
select * from ttinterface;
sysname | ifname
---------+--------
notjw |
notjw | dola
pluto |
pluto | dola
(4 rows)
select * from ttadmin;
pname | sysname
--------+---------
| notjw
bm |
| pluto
bm | pluto
jwieck |
jwieck | notjw
(6 rows)
delete from ttperon where pname = 'bm';
select * from ttadmin;
pname | sysname
--------+---------
| notjw
| pluto
jwieck |
jwieck | notjw
(4 rows)
drop rule if exists usys_upd on ttsystem;
drop rule if exists usys_del on ttsystem;
drop rule if exists usys_ins on ttsystem;
drop rule if exists upers_upd on ttperon;
drop rule if exists upers_del on ttperon;
drop rule if exists upers_ins on ttperon;
drop table if exists ttsystem;
drop table if exists ttinterface;
drop table if exists ttperon;
drop table if exists ttadmin;
--
-- Tables and rules for the logging test
--
create table temp (ename char(20), salary money);
create table templog (ename char(20), action char(10), newsal money, oldsal money);
create rule temp_ins as on insert to temp do
insert into templog values (new.ename, 'hired', new.salary, '0.00');
create rule temp_upd as on update to temp where new.salary != old.salary do
insert into templog values (new.ename, 'honored', new.salary, old.salary);
create rule temp_del as on delete to temp do
insert into templog values (old.ename, 'fired', '0.00', old.salary);
insert into temp values ('tyu', '45.00');
insert into temp values ('asd', '90.00');
select * from templog;
ename | action | newsal | oldsal
----------------------+------------+--------+--------
tyu | hired | $45.00 | $0.00
asd | hired | $90.00 | $0.00
(2 rows)
update temp set salary = salary * 2 where ename = 'tyu';
select * from templog;
ename | action | newsal | oldsal
----------------------+------------+--------+--------
tyu | hired | $45.00 | $0.00
asd | hired | $90.00 | $0.00
tyu | honored | $90.00 | $45.00
(3 rows)
delete from temp where ename = 'tyu';
select * from templog;
ename | action | newsal | oldsal
----------------------+------------+--------+--------
tyu | hired | $45.00 | $0.00
asd | hired | $90.00 | $0.00
tyu | honored | $90.00 | $45.00
tyu | fired | $0.00 | $90.00
(4 rows)
select * from temp;
ename | salary
----------------------+--------
asd | $90.00
(1 row)
drop rule if exists temp_ins on temp;
drop rule if exists temp_upd on temp;
drop rule if exists temp_del on temp;
drop table if exists temp;
drop table if exists templog;
--
-- Rules for condition
-- rule test
--
create table test4 (a int4, b text);
create table test5 (a int4, b text);
create table test6 (a int4, b text);
create rule test4_ins1 as on insert to test4
where new.a >= 10 and new.a < 20 do instead
insert into test5 values (new.a, new.b);
create rule test4_ins2 as on insert to test4
where new.a >= 20 and new.a < 30 do
insert into test6 values (new.a, new.b);
-- test
insert into test4 values (5, 'huijioa');
insert into test4 values (15, 'afhuvbn');
insert into test4 values (25, 'qwerty');
insert into test4 values (35, 'zxcvbn');
select * from test4;
a | b
----+---------
5 | huijioa
25 | qwerty
35 | zxcvbn
(3 rows)
select * from test5;
a | b
----+---------
15 | afhuvbn
(1 row)
select * from test6;
a | b
----+--------
25 | qwerty
(1 row)
drop rule if exists test4_ins1 on test4;
drop rule if exists test4_ins2 on test4;
drop table if exists test4;
drop table if exists test5;
drop table if exists test6;
--
-- Tables and rules for select
--
create table ttt1 (a int4, b text);
create table ttt2 (a int4, b text);
create rule "_RETURN" as on select to ttt1 do instead (
select * from ttt2;
);
-- test
insert into ttt1 values (1, 'hello');
insert into ttt2 values (10, 'world');
select * from ttt1;
a | b
----+-------
10 | world
(1 row)
drop table if exists ttt1;
drop table if exists ttt2;
--
-- Tables and rules for question
--
create table test_statement(id int);
create table escapetest (ts varchar(50));
create rule r1 as on insert to escapetest do (
delete from test_statement;
insert into test_statement values (1);
insert into test_statement values (2);
);
-- test
insert into escapetest(ts) values (NULL);
select * from test_statement;
id
----
1
2
(2 rows)
drop rule if exists r1 on escapetest;
drop table if exists test_statement;
drop table if exists escapetest;
--
-- RULES TEST
--
--
-- Tables and rules for the view test
--
create table rule_test1_table (a int4, b int4);
create view tv1 as select * from rule_test1_table;
create rule tv1_ins as on insert to tv1 do instead
insert into rule_test1_table values (new.a, new.b);
create rule tv1_upd as on update to tv1 do instead
update rule_test1_table set a = new.a, b = new.b
where a = old.a;
create rule tv1_del as on delete to tv1 do instead
delete from rule_test1_table where a = old.a;
-- insert values
insert into tv1 values (1, 11);
insert into tv1 values (2, 12);
select * from tv1;
a | b
---+----
1 | 11
2 | 12
(2 rows)
-- update values
update tv1 set a = 10 where b = 11;
update tv1 set a = 12 , b = 22 where b = 12;
select * from tv1;
a | b
----+----
10 | 11
12 | 22
(2 rows)
-- delete values
delete from tv1 where a = 10;
select * from tv1;
a | b
----+----
12 | 22
(1 row)
drop rule if exists tv1_ins on tv1;
drop rule if exists tv1_upd on tv1;
drop rule if exists tv1_del on tv1;
drop view if exists tv1;
drop table if exists rule_test1_table;
--
-- Tables and rules for the constraint update/delete/insert test
--
create table ttsystem (sysname text, sysdesc text);
create table ttadmin (pname text, sysname text);
create table ttperon (pname text, pdesc text);
create table ttinterface (sysname text, ifname text);
create rule usys_ins as on insert to ttsystem do also (
insert into ttinterface values (new.sysname,'');
insert into ttadmin values ('',new.sysname);
);
create rule usys_del as on delete to ttsystem do also (
delete from ttinterface where sysname = old.sysname;
delete from ttadmin where sysname = old.sysname;
);
create rule usys_upd as on update to ttsystem do also (
update ttinterface set sysname = new.sysname
where sysname = old.sysname;
update ttadmin set sysname = new.sysname
where sysname = old.sysname
);
create rule upers_ins as on insert to ttperon do also (
insert into ttadmin values (new.pname,'');
);
create rule upers_del as on delete to ttperon do also
delete from ttadmin where pname = old.pname;
create rule upers_upd as on update to ttperon do also
update ttadmin set pname = new.pname where pname = old.pname;
-- test 1
insert into ttsystem values ('winxi', 'Linux Jan Wieck');
insert into ttsystem values ('notjw', 'Qu Yan');
insert into ttsystem values ('yuyan', 'Fileserver');
insert into ttinterface values ('winxi', 'dola');
insert into ttinterface values ('winxi', 'eth1');
insert into ttinterface values ('notjw', 'dola');
insert into ttinterface values ('yuyan', 'dola');
insert into ttperon values ('jw', 'Jan Wieck');
insert into ttperon values ('bm', 'Bruce Momjian');
insert into ttadmin values ('jw', 'winxi');
insert into ttadmin values ('jw', 'notjw');
insert into ttadmin values ('bm', 'yuyan');
select * from ttsystem;
sysname | sysdesc
---------+-----------------
winxi | Linux Jan Wieck
notjw | Qu Yan
yuyan | Fileserver
(3 rows)
select * from ttinterface;
sysname | ifname
---------+--------
winxi |
notjw |
yuyan |
winxi | dola
winxi | eth1
notjw | dola
yuyan | dola
(7 rows)
select * from ttperon;
pname | pdesc
-------+---------------
jw | Jan Wieck
bm | Bruce Momjian
(2 rows)
select * from ttadmin;
pname | sysname
-------+---------
| winxi
| notjw
| yuyan
jw |
bm |
jw | winxi
jw | notjw
bm | yuyan
(8 rows)
-- test 2
update ttsystem set sysname = 'pluto' where sysname = 'yuyan';
select * from ttinterface;
sysname | ifname
---------+--------
winxi |
notjw |
winxi | dola
winxi | eth1
notjw | dola
pluto |
pluto | dola
(7 rows)
select * from ttadmin;
pname | sysname
-------+---------
| winxi
| notjw
jw |
bm |
jw | winxi
jw | notjw
| pluto
bm | pluto
(8 rows)
update ttperon set pname = 'jwieck' where pdesc = 'Jan Wieck';
select * from ttadmin order by pname, sysname;
pname | sysname
--------+---------
bm | pluto
bm |
jwieck | notjw
jwieck | winxi
jwieck |
| notjw
| pluto
| winxi
(8 rows)
delete from ttsystem where sysname = 'winxi';
select * from ttinterface;
sysname | ifname
---------+--------
notjw |
notjw | dola
pluto |
pluto | dola
(4 rows)
select * from ttadmin;
pname | sysname
--------+---------
| notjw
bm |
| pluto
bm | pluto
jwieck |
jwieck | notjw
(6 rows)
delete from ttperon where pname = 'bm';
select * from ttadmin;
pname | sysname
--------+---------
| notjw
| pluto
jwieck |
jwieck | notjw
(4 rows)
drop rule if exists usys_upd on ttsystem;
drop rule if exists usys_del on ttsystem;
drop rule if exists usys_ins on ttsystem;
drop rule if exists upers_upd on ttperon;
drop rule if exists upers_del on ttperon;
drop rule if exists upers_ins on ttperon;
drop table if exists ttsystem;
drop table if exists ttinterface;
drop table if exists ttperon;
drop table if exists ttadmin;
--
-- Tables and rules for the logging test
--
create table temp (ename char(20), salary money);
create table templog (ename char(20), action char(10), newsal money, oldsal money);
create rule temp_ins as on insert to temp do
insert into templog values (new.ename, 'hired', new.salary, '0.00');
create rule temp_upd as on update to temp where new.salary != old.salary do
insert into templog values (new.ename, 'honored', new.salary, old.salary);
create rule temp_del as on delete to temp do
insert into templog values (old.ename, 'fired', '0.00', old.salary);
insert into temp values ('tyu', '45.00');
insert into temp values ('asd', '90.00');
select * from templog;
ename | action | newsal | oldsal
----------------------+------------+--------+--------
tyu | hired | $45.00 | $0.00
asd | hired | $90.00 | $0.00
(2 rows)
update temp set salary = salary * 2 where ename = 'tyu';
select * from templog;
ename | action | newsal | oldsal
----------------------+------------+--------+--------
tyu | hired | $45.00 | $0.00
asd | hired | $90.00 | $0.00
tyu | honored | $90.00 | $45.00
(3 rows)
delete from temp where ename = 'tyu';
select * from templog;
ename | action | newsal | oldsal
----------------------+------------+--------+--------
tyu | hired | $45.00 | $0.00
asd | hired | $90.00 | $0.00
tyu | honored | $90.00 | $45.00
tyu | fired | $0.00 | $90.00
(4 rows)
select * from temp;
ename | salary
----------------------+--------
asd | $90.00
(1 row)
drop rule if exists temp_ins on temp;
drop rule if exists temp_upd on temp;
drop rule if exists temp_del on temp;
drop table if exists temp;
drop table if exists templog;
--
-- Rules for condition
-- rule test
--
create table test4 (a int4, b text);
create table test5 (a int4, b text);
create table test6 (a int4, b text);
create rule test4_ins1 as on insert to test4
where new.a >= 10 and new.a < 20 do instead
insert into test5 values (new.a, new.b);
create rule test4_ins2 as on insert to test4
where new.a >= 20 and new.a < 30 do
insert into test6 values (new.a, new.b);
-- test
insert into test4 values (5, 'huijioa');
insert into test4 values (15, 'afhuvbn');
insert into test4 values (25, 'qwerty');
insert into test4 values (35, 'zxcvbn');
select * from test4;
a | b
----+---------
5 | huijioa
25 | qwerty
35 | zxcvbn
(3 rows)
select * from test5;
a | b
----+---------
15 | afhuvbn
(1 row)
select * from test6;
a | b
----+--------
25 | qwerty
(1 row)
drop rule if exists test4_ins1 on test4;
drop rule if exists test4_ins2 on test4;
drop table if exists test4;
drop table if exists test5;
drop table if exists test6;
--
-- Tables and rules for select
--
create table ttt1 (a int4, b text);
create table ttt2 (a int4, b text);
create rule "_RETURN" as on select to ttt1 do instead (
select * from ttt2;
);
-- test
insert into ttt1 values (1, 'hello');
insert into ttt2 values (10, 'world');
select * from ttt1;
a | b
----+-------
10 | world
(1 row)
drop table if exists ttt1;
drop table if exists ttt2;
--
-- Tables and rules for question
--
create table test_statement(id int);
create table escapetest (ts varchar(50));
create rule r1 as on insert to escapetest do (
delete from test_statement;
insert into test_statement values (1);
insert into test_statement values (2);
);
-- test
insert into escapetest(ts) values (NULL);
select * from test_statement;
id
----
1
2
(2 rows)
drop rule if exists r1 on escapetest;
drop table if exists test_statement;
drop table if exists escapetest;

View File

@ -2743,6 +2743,7 @@ WHERE d.classoid IS NULL AND p1.oid <= 9999 order by 1;
4764 | ubtoptions
4765 | ubtcostestimate
4767 | gs_read_block_from_remote
4768 | gs_read_block_from_remote
4789 | remote_rto_stat
4800 | job_cancel
4801 | job_finish
@ -2939,6 +2940,7 @@ WHERE d.classoid IS NULL AND p1.oid <= 9999 order by 1;
7998 | set_working_grand_version_num_manually
8001 | get_paxos_replication_info
8050 | datalength
8413 | pg_read_binary_file_blocks
8642 | gs_txid_oldestxmin
9004 | smalldatetime_in
9006 | smalldatetime_out

View File

@ -0,0 +1,6 @@
\! @abs_bindir@/gsql -dpostgres -p @portstring@ -c "create database gs_basebackup;"
\! @abs_bindir@/gsql -dgs_basebackup -p @portstring@ -f "@abs_srcdir@/sql/gs_basebackup/init/compress_data.sql";
\! mkdir @abs_bindir@/../gs_basebackup_node_nstream_np
\! chmod 700 @abs_bindir@/../gs_basebackup_node_nstream_np
\! chmod +x @abs_srcdir@/script/gs_basebackup/gs_basebackup.sh
\! @abs_srcdir@/script/gs_basebackup/gs_basebackup.sh @abs_bindir@ @abs_srcdir@ @portstring@ gs_basebackup_node_nstream_np compress_data.sql

View File

@ -0,0 +1,28 @@
--?.*
CREATE DATABASE
--?.*
CREATE TABLE
CREATE INDEX
INSERT 0 1000
CHECKPOINT
--?.*
--?.*
--?.*
--?.*
--?.*
--?.*
--?.*
--?.*
count
-------
1000
(1 row)
SET
count
-------
1000
(1 row)
--?.*
SHUTDOWN

View File

@ -837,4 +837,5 @@ test: hw_cipher_aes128
test: sequence_cache_test
test: pg_buffercache_pages
test: test_astore_multixact
test: test_astore_multixact
test: row_compression/pg_table_size row_compression/pg_tablespace_size row_compression/unsupported_feature row_compression/normal_test

View File

@ -2,8 +2,9 @@ abs_bindir=$1
abs_srcdir=$2
abs_port=$3
dataNode=$4
x_option=${5-}
format=${6-}
validate_sql=$5
x_option=${6-}
format=${7-}
# backup
if [ 'x'${x_option} == 'x' ]
then
@ -54,9 +55,8 @@ sleep 10s
$abs_bindir/gs_ctl status -D $abs_bindir/../$dataNode
#validate
$abs_bindir/gsql -dgs_basebackup -p$gs_basebackup_port -f "$abs_srcdir/sql/gs_basebackup/validate/tablespace.sql";
$abs_bindir/gsql -dgs_basebackup -p$gs_basebackup_port -f "$abs_srcdir/sql/gs_basebackup/validate/mot.sql";
$abs_bindir/gsql -dgs_basebackup -p$gs_basebackup_port -f "$abs_srcdir/sql/gs_basebackup/validate/$validate_sql";
#stop node
$abs_bindir/gsql -dgs_basebackup -p$gs_basebackup_port -c 'SHUTDOWN IMMEDIATE'
$abs_bindir/gsql -dgs_basebackup -p$gs_basebackup_port -c 'SHUTDOWN IMMEDIATE'

View File

@ -0,0 +1,4 @@
CREATE TABLE tbl_pc(id int, c1 text) WITH(compresstype=2, compress_chunk_size=512);
create index on tbl_pc(id) WITH (compresstype=2,compress_chunk_size=1024);
INSERT INTO tbl_pc SELECT id, id::text FROM generate_series(1,1000) id;
checkpoint;

View File

@ -0,0 +1,3 @@
select count(*) from tbl_pc;
set enable_seqscan=off;
select count(*) from tbl_pc;

View File

@ -3,7 +3,6 @@ drop trigger if exists insert_trigger on test_trigger_src_tbl;
drop table if exists test_trigger_des_tbl;
drop table if exists test_trigger_src_tbl;
drop package if exists trigger_test;
drop table if exists test1;
drop table if exists dams_ci.test1;
drop table if exists dams_ci.DB_LOG;
drop table if exists au_pkg;

View File

@ -0,0 +1,69 @@
create schema normal_test;
CREATE TABLE normal_test.tbl_pc(id int, c1 text) WITH(compresstype=1);
\d+ normal_test.tbl_pc
INSERT INTO normal_test.tbl_pc SELECT id, id::text FROM generate_series(1,1000) id;
select count(*) from normal_test.tbl_pc;
select count(*) from normal_test.tbl_pc where id < 100;
checkpoint;
vacuum normal_test.tbl_pc;
select count(*) from normal_test.tbl_pc;
select count(*) from normal_test.tbl_pc where id < 100;
-- normal index
create index on normal_test.tbl_pc(id) WITH (compresstype=2,compress_chunk_size=1024);
alter index normal_test.tbl_pc_id_idx set (compresstype=1); --failed
alter index normal_test.tbl_pc_id_idx set (compress_chunk_size=2048); --failed
alter index normal_test.tbl_pc_id_idx set (compress_prealloc_chunks=2); --success
alter index normal_test.tbl_pc_id_idx set (compress_level=2); --success
set enable_seqscan = off;
set enable_bitmapscan = off;
select count(*) from normal_test.tbl_pc;
CREATE TABLE normal_test.tbl_partition(id int) WITH(compresstype=2,compress_chunk_size=1024) partition by range(id)
(
partition p0 values less than(5000),
partition p1 values less than(10000),
partition p2 values less than(20000),
partition p3 values less than(30000),
partition p4 values less than(40000),
partition p5 values less than(50000),
partition p6 values less than(60000),
partition p7 values less than(70000)
);
insert into normal_test.tbl_partition select generate_series(1,65000);
select count(*) from normal_test.tbl_partition;
checkpoint;
vacuum normal_test.tbl_partition;
select count(*) from normal_test.tbl_partition;
-- exchange
select relname, reloptions from pg_partition where parentid in (Select relfilenode from pg_class where relname like 'tbl_partition') order by relname;
create table normal_test.exchange_table(id int) WITH(compresstype=2,compress_chunk_size=1024);
ALTER TABLE normal_test.tbl_partition EXCHANGE PARTITION FOR(2500) WITH TABLE normal_test.exchange_table;
select count(*) from normal_test.tbl_partition;
-- spilit
ALTER TABLE normal_test.tbl_partition SPLIT PARTITION p1 AT (7500) INTO (PARTITION p10, PARTITION p11);
select relname, reloptions from pg_partition where parentid in (Select relfilenode from pg_class where relname like 'tbl_partition') order by relname;
create index on normal_test.tbl_partition(id) local WITH (compresstype=2,compress_chunk_size=1024);
\d+ normal_test.tbl_partition
select relname, reloptions from pg_partition where parentid in (Select relfilenode from pg_class where relname like 'tbl_partition_id_idx') order by relname;
-- unsupport
alter index normal_test.tbl_partition_id_idx set (compresstype=1);
alter index normal_test.tbl_partition_id_idx set (compress_chunk_size=2048);
alter index normal_test.tbl_partition_id_idx set (compress_prealloc_chunks=2);
-- support
alter table normal_test.tbl_pc set (compress_prealloc_chunks=2);
-- new testcase
set search_path=normal_test;
\d+
reset search_path;
CREATE TABLE normal_test.pre_handle(id int) WITH(compresstype=2, compress_chunk_size=512, compress_byte_convert=true, compress_diff_convert=true);
insert into normal_test.pre_handle select generate_series(1,1000);
checkpoint;
select count(*) from normal_test.pre_handle;
drop schema normal_test cascade;

View File

@ -0,0 +1,30 @@
-- row table pg_table_size
create schema table_size_schema;
CREATE TABLE table_size_schema.normal_table(id int);
CREATE TABLE table_size_schema.compressed_table_1024(id int) WITH(compresstype=2, compress_chunk_size=1024);
CREATE TABLE table_size_schema.compressed_table_2048(id int) WITH(compresstype=2, compress_chunk_size=2048);
CREATE TABLE table_size_schema.compressed_table_4096(id int) WITH(compresstype=2, compress_chunk_size=4096);
select pg_table_size('table_size_schema.normal_table');
select pg_table_size('table_size_schema.compressed_table_1024');
select pg_table_size('table_size_schema.compressed_table_2048');
select pg_table_size('table_size_schema.compressed_table_4096');
drop schema table_size_schema cascade;
-- partition table pg_table_size
create schema partition_table_size_schema;
create table partition_table_size_schema.normal_partition(INV_DATE_SK integer)
partition by range(inv_date_sk)(partition p0 values less than(5000),partition p1 values less than(10000));
create table partition_table_size_schema.compressed_partition_1024(INV_DATE_SK integer)
WITH(compresstype=2, compress_chunk_size=1024)
partition by range(inv_date_sk)(partition p0 values less than(5000),partition p1 values less than(10000));
create table partition_table_size_schema.compressed_partition_2048(INV_DATE_SK integer)
WITH(compresstype=2, compress_chunk_size=2048)
partition by range(inv_date_sk)(partition p0 values less than(5000),partition p1 values less than(10000));
create table partition_table_size_schema.compressed_partition_4096(INV_DATE_SK integer)
WITH(compresstype=2, compress_chunk_size=4096)
partition by range(inv_date_sk)(partition p0 values less than(5000),partition p1 values less than(10000));
select pg_table_size('partition_table_size_schema.normal_partition');
select pg_table_size('partition_table_size_schema.compressed_partition_1024');
select pg_table_size('partition_table_size_schema.compressed_partition_2048');
select pg_table_size('partition_table_size_schema.compressed_partition_4096');
drop schema partition_table_size_schema cascade;

View File

@ -0,0 +1,14 @@
CREATE TABLESPACE normal_tablespace RELATIVE LOCATION 'normal_tablespace';
SELECT pg_tablespace_size('normal_tablespace');
CREATE TABLE normal_table(id int) TABLESPACE normal_tablespace;
SELECT pg_tablespace_size('normal_tablespace');
CREATE TABLESPACE compress_tablespace RELATIVE LOCATION 'compress_tablespace';
SELECT pg_tablespace_size('compress_tablespace');
CREATE TABLE compressed_table_1024(id int) WITH(compresstype=2, compress_chunk_size=1024) TABLESPACE compress_tablespace;
SELECT pg_tablespace_size('compress_tablespace');
DROP TABLE normal_table;
DROP TABLESPACE normal_tablespace;
DROP TABLE compressed_table_1024;
DROP TABLESPACE compress_tablespace;

View File

@ -0,0 +1,41 @@
create schema unspported_feature;
-- unspport compressType: 3
CREATE TABLE unspported_feature.compressed_table_1024(id int) WITH(compresstype=3, compress_chunk_size=1024);
-- unspport compress_chunk_size: 2000
CREATE TABLE unspported_feature.compressed_table_1024(id int) WITH(compresstype=2, compress_chunk_size=2000);
-- unspport compress_prealloc_chunks: -1
CREATE TABLE unspported_feature.compressed_table_1024(id int) WITH(compresstype=2, compress_prealloc_chunks=-1);
-- unspport compress_prealloc_chunks: 8
CREATE TABLE unspported_feature.compressed_table_1024(id int) WITH(compresstype=2, compress_prealloc_chunks=8);
-- unspport compress_level: 128
CREATE TABLE unspported_feature.compressed_table_1024(id int) WITH(compresstype=2, compress_level=128);
-- compresstype cant be used with column table
CREATE TABLE unspported_feature.compressed_table_1024(id int) WITH(ORIENTATION = 'column', compresstype=2);
-- compresstype cant be used with temp table
CREATE TEMP TABLE compressed_temp_table_1024(id int) WITH(compresstype=2);
-- compresstype cant be used with unlogged table
CREATE unlogged TABLE compressed_unlogged_table_1024(id int) WITH(compresstype=2);
-- use compress_prealloc_chunks\compress_chunk_size\compress_level without compresstype
CREATE TABLE unspported_feature.compressed_table_1024(id int) WITH(compress_prealloc_chunks=5);
CREATE TABLE unspported_feature.compressed_table_1024(id int) WITH(compress_chunk_size=1024);
CREATE TABLE unspported_feature.compressed_table_1024(id int) WITH(compress_level=5);
-- unspport exchange
CREATE TABLE unspported_feature.exchange_table(id int) WITH(compresstype=2);
CREATE TABLE unspported_feature.alter_table(id int) partition by range(id)
(
partition p0 values less than(5000),
partition p1 values less than(10000),
partition p2 values less than(20000),
partition p3 values less than(30000),
partition p4 values less than(40000),
partition p5 values less than(50000),
partition p6 values less than(60000),
partition p7 values less than(70000)
);
ALTER TABLE unspported_feature.alter_table EXCHANGE PARTITION FOR(2500) WITH TABLE unspported_feature.exchange_table;
-- unspport alter compress_chunk_size
create TABLE unspported_feature.alter_table_option(id int) WITH(compresstype=2);
\d+ unspported_feature.alter_table_option
ALTER TABLE unspported_feature.alter_table_option SET(compresstype=0); -- fail
ALTER TABLE unspported_feature.alter_table_option SET(compress_chunk_size=2048); -- fail
ALTER TABLE unspported_feature.alter_table_option SET(compress_level=2, compress_prealloc_chunks=0);

Some files were not shown because too many files have changed in this diff Show More