生成SortGroup计划

This commit is contained in:
ljy 2023-01-31 11:56:36 +08:00
parent d10d4cc3d2
commit fa6df03368
14 changed files with 1097 additions and 25 deletions

View File

@ -3791,8 +3791,7 @@ bool permit_predpush(PlannerInfo *root)
return !predpushHint->negative;
}
const unsigned int G_NUM_SET_HINT_WHITE_LIST = 33;
const char* G_SET_HINT_WHITE_LIST[G_NUM_SET_HINT_WHITE_LIST] = {
const char* G_SET_HINT_WHITE_LIST[] = {
/* keep in the ascending alphabetical order of frequency */
(char*)"best_agg_plan",
(char*)"cost_weight_index",
@ -3818,6 +3817,7 @@ const char* G_SET_HINT_WHITE_LIST[G_NUM_SET_HINT_WHITE_LIST] = {
(char*)"enable_remotesort",
(char*)"enable_seqscan",
(char*)"enable_sort",
(char*)"enable_sortgroup_agg",
(char*)"enable_stream_operator",
(char*)"enable_stream_recursive",
(char*)"enable_tidscan",
@ -3828,6 +3828,8 @@ const char* G_SET_HINT_WHITE_LIST[G_NUM_SET_HINT_WHITE_LIST] = {
(char*)"seq_page_cost",
(char*)"try_vector_engine_strategy"};
const unsigned int G_NUM_SET_HINT_WHITE_LIST = sizeof(G_SET_HINT_WHITE_LIST) / sizeof(G_SET_HINT_WHITE_LIST[0]);
static int param_str_cmp(const void *s1, const void *s2)
{
const char *key = (const char *)s1;

View File

@ -682,6 +682,17 @@ static void InitSqlConfigureNamesBool()
NULL,
NULL,
NULL},
{{"enable_sortgroup_agg",
PGC_USERSET,
NODE_ALL,
QUERY_TUNING_METHOD,
gettext_noop("Enables the planner's use of sort group aggregation plans."),
NULL},
&u_sess->attr.attr_sql.enable_sortgroup_agg,
false,
NULL,
NULL,
NULL},
{{"enable_material",
PGC_USERSET,
NODE_ALL,

View File

@ -2294,6 +2294,120 @@ void cost_sort(Path* path, List* pathkeys, Cost input_cost, double tuples, int w
(g_instance.cost_cxt.disable_cost_enlarge_factor * g_instance.cost_cxt.disable_cost_enlarge_factor);
}
/*
* cost_groupsort
* Determines and returns the cost of sorting a relation using groupsort,
* not including the cost of reading the input data.
*/
static void cost_groupsort(PlannerInfo *root, Cost *startup_cost, Cost *run_cost, double *tuples, int width, Cost comparison_cost,
int sort_mem, double dNumGroups)
{
double totalTuples = *tuples;
double input_bytes = relation_byte_size(totalTuples, width, false);
double output_bytes;
long sort_mem_bytes = sort_mem * 1024L;
double remainTuples;
double remainGroups;
double maxGroups = (double)sort_mem_bytes / BLCKSZ;
Cost discard_costs = 0;
Cost cpu_costs = 0;
Cost disk_costs = 0;
/* Include the default cost-per-comparison */
comparison_cost += 2.0 * u_sess->attr.attr_sql.cpu_operator_cost;
if (0 < root->limit_tuples && root->limit_tuples < dNumGroups) {
/* estimate how many tuples are discarded directly */
remainGroups = root->limit_tuples;
double ratio = (remainGroups / dNumGroups);
remainTuples = ratio * totalTuples;
output_bytes = ratio * input_bytes;
}
else {
remainGroups = dNumGroups;
remainTuples = totalTuples;
output_bytes = input_bytes;
}
/*mustn't do log(0)*/
if (remainGroups < 2.0)
remainGroups = 2.0;
if (remainTuples < 2.0)
remainTuples = 2.0;
if (remainGroups > maxGroups || remainGroups * width > sort_mem_bytes) {
/*
* too many groups, or required memory exceeds exceeds work_mem,
* don't consider this plan
*/
*startup_cost += g_instance.cost_cxt.disable_cost * g_instance.cost_cxt.disable_cost_enlarge_factor;
}
if (remainTuples < totalTuples) {
double discard_tuples = totalTuples - remainTuples;
/*
* Assume 0.9 of tuples are discarded directly,
* 0.1 tuples are inserted into skiplist first, but discarded by LIMIT N latter
*/
discard_costs = 0.9 * discard_tuples * comparison_cost + /*discarded directly*/
0.1 * discard_tuples * comparison_cost * LOG2(remainGroups); /* discarded by LIMIT N */
}
if (output_bytes > sort_mem_bytes) {
/*
* We'll have to use a disk-based sort of all the tuples
*/
double pagesPerGroup = ceil(input_bytes / remainGroups / BLCKSZ);
double npages = pagesPerGroup * remainGroups;
double npageaccesses;
/*
* CPU costs
*
* Assume about NUMBER_TUPLES *log2 (NUMBER_GROUPS) comparisons
*/
cpu_costs += comparison_cost * remainTuples * LOG2(remainGroups);
/* Disk costs */
npageaccesses = 2.0 * npages;
/* Assume 3/4ths of accesses are sequential, 1/4th are not */
disk_costs += npageaccesses * (u_sess->attr.attr_sql.seq_page_cost * 0.75
+ u_sess->attr.attr_sql.random_page_cost * 0.25);
} else {
/* We'll use plain groupsort on all the input tuples */
cpu_costs += comparison_cost * remainTuples * LOG2(remainGroups);
}
*startup_cost = discard_costs + cpu_costs + disk_costs;
/*
* Also charge a small amount (arbitrarily set equal to operator cost) per
* extracted tuple.
*/
*run_cost = u_sess->attr.attr_sql.cpu_operator_cost * remainTuples;
*tuples = remainTuples;
}
/*
* cost_sort_group
* Determines and returns the cost of sorting a relation using groupsort,
* including the cost of reading the input data.
*/
void cost_sort_group(Path *path, PlannerInfo *root, Cost input_cost, double tuples, int width,
Cost comparison_cost, int sort_mem, double dNumGroups)
{
Cost startup_cost = 0;
Cost run_cost = 0;
cost_groupsort(root, &startup_cost, &run_cost, &tuples, width, comparison_cost, sort_mem, dNumGroups);
startup_cost += input_cost;
path->rows = tuples;
path->startup_cost = startup_cost;
path->total_cost = startup_cost + run_cost;
}
/*
* compute_sort_disk_cost
* compute disk spill cost of sort operator
@ -2614,14 +2728,17 @@ void cost_agg(Path* path, PlannerInfo* root, AggStrategy aggstrategy, const AggC
/* we aren't grouping */
total_cost = startup_cost + u_sess->attr.attr_sql.cpu_tuple_cost;
output_tuples = 1;
} else if (aggstrategy == AGG_SORTED) {
} else if (aggstrategy == AGG_SORTED || aggstrategy == AGG_SORT_GROUP) {
/* Here we are able to deliver output on-the-fly */
startup_cost = input_startup_cost;
total_cost = input_total_cost;
/* calcs phrased this way to match HASHED case, see note above */
total_cost += aggcosts->transCost.startup;
total_cost += aggcosts->transCost.per_tuple * input_tuples;
total_cost += (u_sess->attr.attr_sql.cpu_operator_cost * numGroupCols) * input_tuples;
if (aggstrategy != AGG_SORT_GROUP) {
/* AGG_SORT_GROUP is not need to to perform grouping comparisons */
total_cost += (u_sess->attr.attr_sql.cpu_operator_cost * numGroupCols) * input_tuples;
}
total_cost += aggcosts->finalCost * numGroups;
total_cost += u_sess->attr.attr_sql.cpu_tuple_cost * numGroups;
output_tuples = numGroups;

View File

@ -7234,6 +7234,48 @@ Sort* make_sort(PlannerInfo* root, Plan* lefttree, int numCols, AttrNumber* sort
return node;
}
/*
* make_sortgroup --- basic routine to build a SortGroup plan node
*/
SortGroup* make_sortgroup(PlannerInfo* root, Plan* lefttree, int numCols, AttrNumber* sortColIdx, Oid* sortOperators,
Oid* collations, bool* nullsFirst, double dNumGroup)
{
SortGroup* node = makeNode(SortGroup);
Plan* plan = &node->plan;
Path sort_path; /* dummy for result of cost_sort_group */
copy_plan_costsize(plan, lefttree); /* only care about copying size */
#ifdef STREAMPLAN
inherit_plan_locator_info((Plan*)node, lefttree);
#endif
cost_sort_group(&sort_path,
root,
lefttree->total_cost,
lefttree->plan_rows,
lefttree->plan_width,
0.0,
u_sess->opt_cxt.op_work_mem,
dNumGroup);
plan->startup_cost = sort_path.startup_cost;
plan->total_cost = sort_path.total_cost;
plan->plan_rows = sort_path.rows;
plan->targetlist = lefttree->targetlist;
plan->qual = NIL;
plan->lefttree = lefttree;
plan->righttree = NULL;
plan->hasUniqueResults = lefttree->hasUniqueResults;
plan->dop = lefttree->dop;
node->numCols = numCols;
node->sortColIdx = sortColIdx;
node->sortOperators = sortOperators;
node->collations = collations;
node->nullsFirst = nullsFirst;
return node;
}
/*
* prepare_sort_from_pathkeys
* Prepare to sort according to given pathkeys
@ -7672,6 +7714,54 @@ Sort* make_sort_from_groupcols(PlannerInfo* root, List* groupcls, AttrNumber* gr
return make_sort(root, lefttree, numsortkeys, sortColIdx, sortOperators, collations, nullsFirst, -1.0);
}
/*
* make_sort_group_from_groupcols
* Create SortGroup plan
*
* 'groupcls' is the list of SortGroupClauses
* 'grpColIdx' gives the column numbers to use
*
*/
SortGroup* make_sort_group_from_groupcols(PlannerInfo* root, List* groupcls, AttrNumber* grpColIdx, Plan* lefttree, double dNumGroup)
{
List* sub_tlist = lefttree->targetlist;
ListCell* l = NULL;
int numsortkeys;
AttrNumber* sortColIdx = NULL;
Oid* sortOperators = NULL;
Oid* collations = NULL;
bool* nullsFirst = NULL;
/* Convert list-ish representation to arrays wanted by executor */
numsortkeys = list_length(groupcls);
sortColIdx = (AttrNumber*)palloc(numsortkeys * sizeof(AttrNumber));
sortOperators = (Oid*)palloc(numsortkeys * sizeof(Oid));
collations = (Oid*)palloc(numsortkeys * sizeof(Oid));
nullsFirst = (bool*)palloc(numsortkeys * sizeof(bool));
numsortkeys = 0;
foreach (l, groupcls) {
SortGroupClause* grpcl = (SortGroupClause*)lfirst(l);
TargetEntry* tle = get_tle_by_resno(sub_tlist, grpColIdx[numsortkeys]);
if (tle == NULL) {
/* just break if we cannot find TargetEntry for SortGroupClause */
ereport(ERROR,
(errmodule(MOD_OPT),
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
errmsg("fail to find TargetEntry referenced by SortGroupClause"))));
}
sortColIdx[numsortkeys] = tle->resno;
sortOperators[numsortkeys] = grpcl->sortop;
collations[numsortkeys] = exprCollation((Node*)tle->expr);
nullsFirst[numsortkeys] = grpcl->nulls_first;
numsortkeys++;
}
return make_sortgroup(root, lefttree, numsortkeys, sortColIdx, sortOperators, collations, nullsFirst, dNumGroup);
}
/*
* make_sort_from_targetlist
* Create sort plan to sort based on input plan's targetlist
@ -7880,6 +7970,12 @@ Agg* make_agg(PlannerInfo* root, List* tlist, List* qual, AggStrategy aggstrateg
node->single_node = true;
}
if (aggstrategy == AGG_SORT_GROUP && lefttree->type != T_SortGroup) {
/*subnode is not SortGroup, fallback strategy to AGG_SORTED */
aggstrategy = AGG_SORTED;
root->consider_sortgroup_agg = false;
}
node->aggstrategy = aggstrategy;
node->numCols = numGroupCols;
node->grpColIdx = grpColIdx;

View File

@ -473,8 +473,8 @@ void update_tuple_fraction(PlannerInfo* root,
!pathkeys_contained_in(root->window_pathkeys, root->group_pathkeys))
tuple_fraction = 0.0;
/* In any case, limit_tuples shouldn't be specified here */
AssertEreport(limit_tuples < 0,
/* if we don not consider sort group agg, limit_tuples shouldn't be specified here */
AssertEreport(root->consider_sortgroup_agg || limit_tuples < 0,
MOD_OPT,
"invalid limit tuples when estimating the number of result groups in grouping process.");
} else if (parse->hasAggs || root->hasHavingQual || parse->groupingSets) {

View File

@ -2623,6 +2623,7 @@ static Plan* grouping_planner(PlannerInfo* root, double tuple_fraction)
Assert(!root->planner_targets);
root->planner_targets = planner_targets;
root->consider_sortgroup_agg = u_sess->attr.attr_sql.enable_sortgroup_agg;
/*
* Apply memory context for generate plan in optimizer.
@ -2649,6 +2650,9 @@ static Plan* grouping_planner(PlannerInfo* root, double tuple_fraction)
limit_tuples = (double)count_est + (double)offset_est;
}
if (limit_tuples < 0)
root->consider_sortgroup_agg = false;
if (parse->setOperations) {
List* set_sortclauses = NIL;
@ -2768,8 +2772,10 @@ static Plan* grouping_planner(PlannerInfo* root, double tuple_fraction)
/* Preprocess GROUP BY clause, if any */
/* Preprocess Grouping set, if any */
if (parse->groupingSets)
if (parse->groupingSets) {
parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
root->consider_sortgroup_agg = false;
}
if (parse->groupClause) {
ListCell* lc = NULL;
@ -2847,6 +2853,7 @@ static Plan* grouping_planner(PlannerInfo* root, double tuple_fraction)
UpsertExpr* upsertClause = parse->upsertClause;
upsertClause->updateTlist =
preprocess_upsert_targetlist(upsertClause->updateTlist, parse->resultRelation, parse->rtable);
root->consider_sortgroup_agg = false;
}
/*
* Locate any window functions in the tlist. (We don't need to look
@ -2864,6 +2871,7 @@ static Plan* grouping_planner(PlannerInfo* root, double tuple_fraction)
}
else
parse->hasWindowFuncs = false;
root->consider_sortgroup_agg = false;
}
/*
@ -2916,14 +2924,18 @@ static Plan* grouping_planner(PlannerInfo* root, double tuple_fraction)
u_sess->opt_cxt.query_dop = dop_tmp;
}
if (parse->distinctClause || parse->havingQual || parse->hasWindowFuncs || root->hasHavingQual)
root->consider_sortgroup_agg = false;
/*
* Figure out whether there's a hard limit on the number of rows that
* query_planner's result subplan needs to return. Even if we know a
* hard limit overall, it doesn't apply if the query has any
* grouping/aggregation operations, or SRFs in the tlist.
* grouping/aggregation operations(except sortgroup_agg), or SRFs in the tlist.
*/
if (parse->groupClause || parse->groupingSets || parse->distinctClause || parse->hasAggs ||
parse->hasWindowFuncs || root->hasHavingQual || parse->hasTargetSRFs)
if (!root->consider_sortgroup_agg &&
(parse->groupClause || parse->groupingSets || parse->distinctClause || parse->hasAggs ||
parse->hasWindowFuncs || root->hasHavingQual || parse->hasTargetSRFs))
sub_limit_tuples = -1.0;
else
sub_limit_tuples = limit_tuples;
@ -3135,6 +3147,7 @@ static Plan* grouping_planner(PlannerInfo* root, double tuple_fraction)
* right tlist, and it has no sort order.
*/
current_pathkeys = NIL;
root->consider_sortgroup_agg = false;
} else {
/*
* Normal case --- create a plan according to query_planner's
@ -3325,6 +3338,9 @@ static Plan* grouping_planner(PlannerInfo* root, double tuple_fraction)
}
#endif
bool grouping_contains_srfs = planner_targets->grouping_contains_srfs;
if (grouping_contains_srfs) {
root->consider_sortgroup_agg = false;
}
/*
* groupColIdx is now cast in stone, so record a mapping from
* tleSortGroupRef to column index. setrefs.c needs this to
@ -3680,11 +3696,19 @@ static Plan* grouping_planner(PlannerInfo* root, double tuple_fraction)
if (need_sort_for_grouping && partial_plan == NULL &&
(IS_STREAM_PLAN || parse->groupingSets == NULL)) {
result_plan =
(Plan*)make_sort_from_groupcols(root, parse->groupClause, groupColIdx, result_plan);
if (root->consider_sortgroup_agg) {
result_plan = (Plan*) make_sort_group_from_groupcols(root, parse->groupClause, groupColIdx, result_plan, dNumGroups[0]);
}
else {
result_plan =
(Plan*)make_sort_from_groupcols(root, parse->groupClause, groupColIdx, result_plan);
}
current_pathkeys = root->group_pathkeys;
}
aggstrategy = AGG_SORTED;
if (root->consider_sortgroup_agg)
aggstrategy = AGG_SORT_GROUP;
else
aggstrategy = AGG_SORTED;
} else {
if (IS_STREAM_PLAN && count_distinct_optimization) {
@ -5808,6 +5832,12 @@ static void compute_hashed_path_cost(PlannerInfo* root, double limit_tuples, int
bool needs_stream = false;
bool need_second_hashagg = false;
if (!u_sess->attr.attr_sql.enable_hashagg) {
copy_path_costsize(hashed_p, cheapest_path);
hashed_p->total_cost = hashed_p->startup_cost = g_instance.cost_cxt.disable_cost;
return;
}
/*
* See if the estimated cost is no more than doing it the other way. While
* avoiding the need for sorted input is usually a win, the fact that the
@ -6307,6 +6337,14 @@ static void compute_sorted_path_cost(PlannerInfo* root, double limit_tuples, int
if (!pathkeys_contained_in(root->group_pathkeys, current_pathkeys)) {
current_pathkeys = root->group_pathkeys;
need_sort_for_grouping = true;
if (!u_sess->attr.attr_sql.enable_sort) {
sorted_p->total_cost = sorted_p->startup_cost = g_instance.cost_cxt.disable_cost;
return;
}
} else {
/* already sorted, never consider group sorting */
root->consider_sortgroup_agg = false;
}
if (is_replicate || !parse->hasAggs) {
@ -6409,6 +6447,57 @@ static void compute_sorted_path_cost(PlannerInfo* root, double limit_tuples, int
ereport(DEBUG1, (errmodule(MOD_OPT_AGG), (errmsg("[final sorted path total cost]: %lf", sorted_p->total_cost))));
}
/*
* compute_sort_group_path_cost: compute sort group path cost for choose.
*
*/
static void compute_sort_group_path_cost(PlannerInfo *root, double limit_tuples, int path_width, Path *cheapest_path,
const double dNumGroup, AggClauseCosts *agg_costs, Size hashentrysize,
List *target_pathkeys, Path *sorted_p)
{
Query *parse = root->parse;
int numGroupCols = list_length(parse->groupClause);
List *current_pathkeys;
copy_path_costsize(sorted_p, cheapest_path);
current_pathkeys = cheapest_path->pathkeys;
if (!u_sess->attr.attr_sql.enable_sortgroup_agg ||
!root->consider_sortgroup_agg ||
pathkeys_contained_in(root->group_pathkeys, current_pathkeys))
{
/* already sorted, or sort group agg is disabled, never consider group sorting */
root->consider_sortgroup_agg = false;
sorted_p->total_cost = sorted_p->startup_cost = g_instance.cost_cxt.disable_cost;
return;
}
else {
current_pathkeys = root->group_pathkeys;
}
cost_sort_group(sorted_p,
root,
cheapest_path->total_cost,
PATH_LOCAL_ROWS(cheapest_path),
path_width,
0.0,
u_sess->opt_cxt.op_work_mem,
dNumGroup);
cost_agg(sorted_p,
root,
AGG_SORT_GROUP,
agg_costs,
numGroupCols,
dNumGroup,
sorted_p->startup_cost,
sorted_p->total_cost,
sorted_p->rows,
path_width,
path_width,
hashentrysize);
}
/*
* Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
* aggregates. (Doing so would imply storing *all* the input values in
@ -6460,7 +6549,7 @@ static bool choose_hashed_grouping(PlannerInfo* root, double tuple_fraction, dou
bool can_sort = false;
Size hashentrysize;
List* target_pathkeys = NIL;
Path hashed_p, sorted_p;
Path hashed_p, sorted_p, sort_group_p;
errno_t rc = EOK;
can_hash = grouping_is_can_hash(parse, agg_costs);
@ -6523,14 +6612,9 @@ static bool choose_hashed_grouping(PlannerInfo* root, double tuple_fraction, dou
#endif
}
/* Prefer hashagg or sort when guc is set */
if (!u_sess->attr.attr_sql.enable_hashagg && u_sess->attr.attr_sql.enable_sort)
return false;
if (!u_sess->attr.attr_sql.enable_sort && u_sess->attr.attr_sql.enable_hashagg)
return true;
/* If guc plan_mode_seed is random plan, we should choose random path between AGG_HASHED and AGG_SORTED */
if (u_sess->attr.attr_sql.plan_mode_seed != OPTIMIZE_PLAN) {
root->consider_sortgroup_agg = false;
int random_option = choose_random_option(lengthof(g_agglist));
return (AGG_HASHED == g_agglist[random_option]);
}
@ -6553,6 +6637,8 @@ static bool choose_hashed_grouping(PlannerInfo* root, double tuple_fraction, dou
securec_check(rc, "\0", "\0");
rc = memset_s(&sorted_p, sizeof(sorted_p), 0, sizeof(sorted_p));
securec_check(rc, "\0", "\0");
rc = memset_s(&sort_group_p, sizeof(sort_group_p), 0, sizeof(sort_group_p));
securec_check(rc, "\0", "\0");
/* compute the minimal total cost for hash path. */
Distribution* distribution = ng_get_dest_distribution(cheapest_path);
@ -6579,12 +6665,30 @@ static bool choose_hashed_grouping(PlannerInfo* root, double tuple_fraction, dou
target_pathkeys,
&sorted_p);
compute_sort_group_path_cost(root,
limit_tuples,
path_width,
cheapest_path,
dNumGroups[0],
agg_costs,
hashentrysize,
target_pathkeys,
&sort_group_p);
/*
* Now make the decision using the top-level tuple fraction. First we
* have to convert an absolute count (LIMIT) into fractional form.
*/
tuple_fraction = tuple_fraction >= 1.0 ? tuple_fraction / dNumGroups[0] : tuple_fraction;
if (root->consider_sortgroup_agg &&
compare_fractional_path_costs(&sort_group_p, &sorted_p, tuple_fraction) < 0) {
/*sort group is cheaper, so use it*/
copy_path_costsize(&sorted_p, &sort_group_p);
} else {
root->consider_sortgroup_agg = false;
}
if (compare_fractional_path_costs(&hashed_p, &sorted_p, tuple_fraction) < 0) {
/* Hashed is cheaper, so use it */
return true;
@ -9307,6 +9411,8 @@ static bool vector_engine_walker_internal(Plan* result_plan, bool check_rescan,
break;
case T_Agg: {
if (((Agg*)result_plan)->aggstrategy == AGG_SORT_GROUP)
return true;
/* Check if targetlist contains unsupported feature */
if (vector_engine_expression_walker((Node*)(result_plan->targetlist), NULL))
return true;
@ -9544,6 +9650,7 @@ static Plan* fallback_plan(Plan* result_plan)
case T_BaseResult:
case T_ProjectSet:
case T_Sort:
case T_SortGroup:
case T_Stream:
case T_Material:
case T_StartWithOp:
@ -9729,7 +9836,14 @@ Plan* vectorize_plan(Plan* result_plan, bool ignore_remotequery, bool forceVecto
return make_rowtove_plan(result_plan);
}
break;
case T_SortGroup:
{
result_plan->lefttree = vectorize_plan(result_plan->lefttree, ignore_remotequery, forceVectorEngine);
if (result_plan->lefttree && IsVecOutput(result_plan->lefttree)) {
result_plan->lefttree = (Plan*) make_vectorow(result_plan->lefttree);
}
return result_plan;
}
case T_MergeJoin:
case T_NestLoop:
result_plan->lefttree = vectorize_plan(result_plan->lefttree, ignore_remotequery, forceVectorEngine);

View File

@ -1,5 +1,6 @@
#include "postgres.h"
#include "miscadmin.h"
#include "access/tableam.h"
#include "executor/executor.h"
#include "executor/node/nodeSortGroup.h"
#include "executor/tuptable.h"
@ -350,8 +351,6 @@ static void initGroupIter(SortGroupStatePriv *state)
/*
* Accept one tuple while collecting input data for group sort.
*
* Note that the input data is always copied; the caller need not save it.
*/
static void groupSortPutTupleslot(SortGroupStatePriv *state, TupleTableSlot *slot)
{
@ -360,7 +359,7 @@ static void groupSortPutTupleslot(SortGroupStatePriv *state, TupleTableSlot *slo
SkiplistNode *groupNode;
if (unlikely(slot->tts_nvalid < state->nKeys)) {
heap_slot_getallattrs(slot);
tableam_tslot_getallattrs(slot);
}
if (skiplist->length >= state->max_groups) {

View File

@ -67,6 +67,7 @@ typedef struct knl_session_attr_sql {
bool enable_sort;
bool enable_compress_spill;
bool enable_hashagg;
bool enable_sortgroup_agg;
bool enable_material;
bool enable_nestloop;
bool enable_mergejoin;

View File

@ -106,6 +106,8 @@ extern void cost_recursive_union(Plan* runion, Plan* nrterm, Plan* rterm);
extern void cost_sort(Path* path, List* pathkeys, Cost input_cost, double tuples, int width, Cost comparison_cost,
int sort_mem, double limit_tuples, bool col_store, int dop = 1, OpMemInfo* mem_info = NULL,
bool index_sort = false);
extern void cost_sort_group(Path *path, PlannerInfo *root, Cost input_cost, double tuples, int width,
Cost comparison_cost, int sort_mem, double dNumGroups);
extern void cost_merge_append(Path* path, PlannerInfo* root, List* pathkeys, int n_streams, Cost input_startup_cost,
Cost input_total_cost, double tuples);
extern void cost_material(Path* path, Cost input_startup_cost, Cost input_total_cost, double tuples, int width);

View File

@ -79,9 +79,12 @@ extern Sort* make_sort_from_pathkeys(
PlannerInfo* root, Plan* lefttree, List* pathkeys, double limit_tuples, bool can_parallel = false);
extern Sort* make_sort_from_sortclauses(PlannerInfo* root, List* sortcls, Plan* lefttree);
extern Sort* make_sort_from_groupcols(PlannerInfo* root, List* groupcls, AttrNumber* grpColIdx, Plan* lefttree);
extern SortGroup* make_sort_group_from_groupcols(PlannerInfo* root, List* groupcls, AttrNumber* grpColIdx, Plan* lefttree, double dNumGroup);
extern Sort* make_sort_from_targetlist(PlannerInfo* root, Plan* lefttree, double limit_tuples);
extern Sort* make_sort(PlannerInfo* root, Plan* lefttree, int numCols, AttrNumber* sortColIdx, Oid* sortOperators,
Oid* collations, bool* nullsFirst, double limit_tuples);
extern SortGroup* make_sortgroup(PlannerInfo* root, Plan* lefttree, int numCols, AttrNumber* sortColIdx, Oid* sortOperators,
Oid* collations, bool* nullsFirst, double dNumGroup);
extern Agg* make_agg(PlannerInfo* root, List* tlist, List* qual, AggStrategy aggstrategy,
const AggClauseCosts* aggcosts, int numGroupCols, AttrNumber* grpColIdx, Oid* grpOperators, long numGroups,
Plan* lefttree, WindowLists* wflists, bool need_stream, bool trans_agg, List* groupingSets = NIL,

View File

@ -0,0 +1,433 @@
create schema sortgroupagg;
set search_path=sortgroupagg;
create table tbl_10k(id bigint, v1 numeric, v2 char(150));
insert into tbl_10k select generate_series(1, 10 * 1000), (RANDOM() * 67)::int::numeric + 10e-100, (RANDOM() * 77)::int::numeric+10e-100;
analyze tbl_10k;
set enable_sortgroup_agg=on;
explain (costs off) select sum(id), v1,v2 from tbl_10k group by v1,v2 order by v1,v2 limit 1;
QUERY PLAN
----------------------------------------
Limit
-> GroupAggregate
Group By Key: v1, v2
-> Group Sort
Sorted Group Key: v1, v2
-> Seq Scan on tbl_10k
(6 rows)
-- order keys are not contained in group keys, needs sorts after aggregation
explain (costs off) select sum(id), v1,v2 from tbl_10k group by v1,v2 order by v1,sum(id) limit 10;
QUERY PLAN
----------------------------------------------
Limit
-> Sort
Sort Key: v1, (sum(id))
-> GroupAggregate
Group By Key: v1, v2
-> Group Sort
Sorted Group Key: v1, v2
-> Seq Scan on tbl_10k
(8 rows)
create table agg_1 as
select sum(id), v1,v2 from tbl_10k group by v1,v2 order by v1,sum(id) limit 10 offset 11;
set enable_sortgroup_agg=off;
create table agg_2 as
select sum(id), v1,v2 from tbl_10k group by v1,v2 order by v1,sum(id) limit 10 offset 11;
-- Compare results to hash aggregation results
(select * from agg_1 except select * from agg_2)
union all
(select * from agg_2 except select * from agg_1);
sum | v1 | v2
-----+----+----
(0 rows)
drop table agg_1, agg_2;
set enable_sortgroup_agg=on;
-- In the following cases, we cannot perform sortgroup
-- 1. plain agg
explain (costs off) select count(*) from tbl_10k limit 1;
QUERY PLAN
---------------------------------
Limit
-> Aggregate
-> Seq Scan on tbl_10k
(3 rows)
-- 2. HAVING clauses
explain (costs off) select sum(id), v1,v2 from tbl_10k group by v1,v2 having v1+v2>0 order by v1,v2 limit 1;
QUERY PLAN
-----------------------------------------------------------------
Limit
-> Sort
Sort Key: v1, v2
-> HashAggregate
Group By Key: v1, v2
-> Seq Scan on tbl_10k
Filter: ((v1 + (v2)::numeric) > 0::numeric)
(7 rows)
--3. distinct
explain (costs off) select distinct(v1,v2) from tbl_10k group by v1,v2 limit 1;
QUERY PLAN
---------------------------------------------
Limit
-> Unique
-> Sort
Sort Key: (ROW(v1, v2))
-> HashAggregate
Group By Key: v1, v2
-> Seq Scan on tbl_10k
(7 rows)
--4. grouping sets
explain (costs off) select sum(v1),v1 from tbl_10k group by grouping sets((v1),(v2)) order by v1 desc limit 1;
QUERY PLAN
--------------------------------------------------
Limit
-> Sort
Sort Key: v1 DESC
-> GroupAggregate
Group By Key: v1
Sort Key: v2
Group By Key: v2
-> Sort
Sort Key: v1 DESC NULLS LAST
-> Seq Scan on tbl_10k
(10 rows)
-- 5. winows
explain (costs off) SELECT v1, avg(v1) OVER (PARTITION BY v2) FROM tbl_10k group by v1, v2 order by v1,v2 limit 1;
QUERY PLAN
---------------------------------------------------
Limit
-> Sort
Sort Key: v1, v2
-> WindowAgg
-> Sort
Sort Key: v2
-> HashAggregate
Group By Key: v1, v2
-> Seq Scan on tbl_10k
(9 rows)
--6. no LIMIT cluases
explain (costs off) select sum(id), v1,v2 from tbl_10k group by v1,v2 order by v1,v2;
QUERY PLAN
---------------------------------
Sort
Sort Key: v1, v2
-> HashAggregate
Group By Key: v1, v2
-> Seq Scan on tbl_10k
(5 rows)
set enable_hashagg =off;
set enable_sort=off;
-- GROUP BY single key
explain (costs off) select avg(v2), v2 from tbl_10k group by v2 order by v2 limit 1000 offset 10;
QUERY PLAN
---------------------------------------
Limit
-> GroupAggregate
Group By Key: v2
-> Group Sort
Sorted Group Key: v2
-> Seq Scan on tbl_10k
(6 rows)
create table agg_sortgroup_1 as
select avg(v2), v2 from tbl_10k group by v2 order by v2 limit 1000 offset 10;
create table agg_sortgroup_2 as
select avg(v1), v1 from tbl_10k group by v1 order by v1 desc limit 1000 offset 10;
set work_mem =64;
create table agg_sortgroup_disk_1 as
select avg(v2), v2 from tbl_10k group by v2 order by v2 limit 1000 offset 10;
create table agg_sortgroup_disk_2 as
select avg(v1), v1 from tbl_10k group by v1 order by v1 desc limit 1000 offset 10;
set work_mem =default;
set enable_hashagg =on;
set enable_sortgroup_agg=off;
set enable_sort=on;
create table agg_hashagg_1 as
select avg(v2), v2 from tbl_10k group by v2 order by v2 limit 1000 offset 10;
create table agg_hashagg_2 as
select avg(v1), v1 from tbl_10k group by v1 order by v1 desc limit 1000 offset 10;
-- Compare results to hash aggregation results
(select * from agg_sortgroup_1 except select * from agg_hashagg_1)
union all
(select * from agg_hashagg_1 except select * from agg_sortgroup_1);
avg | v2
-----+----
(0 rows)
(select * from agg_sortgroup_2 except select * from agg_hashagg_2)
union all
(select * from agg_hashagg_2 except select * from agg_sortgroup_2);
avg | v1
-----+----
(0 rows)
(select * from agg_sortgroup_disk_1 except select * from agg_hashagg_1)
union all
(select * from agg_hashagg_1 except select * from agg_sortgroup_disk_1);
avg | v2
-----+----
(0 rows)
(select * from agg_sortgroup_disk_2 except select * from agg_hashagg_2)
union all
(select * from agg_hashagg_2 except select * from agg_sortgroup_disk_2);
avg | v1
-----+----
(0 rows)
drop table agg_sortgroup_1,agg_sortgroup_2,agg_hashagg_1,agg_hashagg_2, agg_sortgroup_disk_1, agg_sortgroup_disk_2;
-- GROUP BY multiple keys
set enable_sortgroup_agg=on;
set enable_hashagg =off;
set enable_sort=off;
explain (costs off) select sum(v2+v1), v2,v1 from tbl_10k group by v2,v1 order by v2 desc ,v1 asc limit 1000 offset 10;
QUERY PLAN
---------------------------------------------
Limit
-> GroupAggregate
Group By Key: v2, v1
-> Group Sort
Sorted Group Key: v2 DESC, v1
-> Seq Scan on tbl_10k
(6 rows)
create table agg_sortgroup_1 as
select sum(v2+v1), v2,v1 from tbl_10k group by v2,v1 order by v2 desc ,v1 asc limit 1000 offset 10;
create table agg_sortgroup_2 as
select sum(v2+v1), v2,v1 from tbl_10k group by v1,v2 order by v1 asc ,v2 desc limit 1000 offset 10;
set work_mem =64;
create table agg_sortgroup_disk_1 as
select sum(v2+v1), v2,v1 from tbl_10k group by v2,v1 order by v2 desc ,v1 asc limit 1000 offset 10;
create table agg_sortgroup_disk_2 as
select sum(v2+v1), v2,v1 from tbl_10k group by v1,v2 order by v1 asc ,v2 desc limit 1000 offset 10;
set work_mem =default;
set enable_sortgroup_agg=off;
set enable_hashagg =on;
set enable_sort=on;
create table agg_hashagg_1 as
select sum(v2+v1), v2,v1 from tbl_10k group by v2,v1 order by v2 desc ,v1 asc limit 1000 offset 10;
create table agg_hashagg_2 as
select sum(v2+v1), v2,v1 from tbl_10k group by v1,v2 order by v1 asc ,v2 desc limit 1000 offset 10;
-- Compare results to hash aggregation results
(select * from agg_sortgroup_1 except select * from agg_hashagg_1)
union all
(select * from agg_hashagg_1 except select * from agg_sortgroup_1);
sum | v2 | v1
-----+----+----
(0 rows)
(select * from agg_sortgroup_2 except select * from agg_hashagg_2)
union all
(select * from agg_hashagg_2 except select * from agg_sortgroup_2);
sum | v2 | v1
-----+----+----
(0 rows)
(select * from agg_sortgroup_disk_1 except select * from agg_hashagg_1)
union all
(select * from agg_hashagg_1 except select * from agg_sortgroup_disk_1);
sum | v2 | v1
-----+----+----
(0 rows)
(select * from agg_sortgroup_disk_2 except select * from agg_hashagg_2)
union all
(select * from agg_hashagg_2 except select * from agg_sortgroup_disk_2);
sum | v2 | v1
-----+----+----
(0 rows)
drop table agg_sortgroup_1,agg_sortgroup_2,agg_hashagg_1,agg_hashagg_2,agg_sortgroup_disk_1, agg_sortgroup_disk_2;
-- already sorted, we don't consider sortgroup aggregation
set enable_sortgroup_agg=on;
set enable_hashagg =off;
set enable_sort=off;
explain (costs off) select avg(v1), v1 from tbl_10k group by v1 order by v1 limit 1;
QUERY PLAN
---------------------------------------
Limit
-> GroupAggregate
Group By Key: v1
-> Group Sort
Sorted Group Key: v1
-> Seq Scan on tbl_10k
(6 rows)
set enable_seqscan=off;
create index v1_index on tbl_10k (v1);
analyze tbl_10k;
explain (costs off) select avg(v1), v1 from tbl_10k group by v1 order by v1 limit 1;
QUERY PLAN
-------------------------------------------------------
Limit
-> GroupAggregate
Group By Key: v1
-> Index Only Scan using v1_index on tbl_10k
(4 rows)
drop index v1_index;
set enable_seqscan=on;
-- test ExecReScanSortGroup
set enable_sortgroup_agg=on;
set enable_hashagg =off;
set enable_sort=off;
set enable_material =off;
explain (costs off)
WITH t1 AS (
SELECT v1::int % 10 as a1, SUM(id) as b1
FROM tbl_10k
GROUP BY v1::int % 10 order by v1::int % 10 limit 100
), t2 AS (
SELECT v2::char(5)::numeric::int % 10 as a2, SUM(id) as b2
FROM tbl_10k
GROUP BY v2::char(5)::numeric::int % 10 order by v2::char(5)::numeric::int % 10 limit 101
)
select a1, a2, b1+b2 from t1 inner join t2 on (b1 + b2 > 10);
QUERY PLAN
--------------------------------------------------------------------------------------------------------------
Nested Loop
Join Filter: (((sum(sortgroupagg.tbl_10k.id)) + (sum(sortgroupagg.tbl_10k.id))) > 10::numeric)
-> Limit
-> GroupAggregate
Group By Key: (((sortgroupagg.tbl_10k.v1)::integer % 10))
-> Group Sort
Sorted Group Key: (((sortgroupagg.tbl_10k.v1)::integer % 10))
-> Seq Scan on tbl_10k
-> Limit
-> GroupAggregate
Group By Key: (((((sortgroupagg.tbl_10k.v2)::character(5))::numeric)::integer % 10))
-> Group Sort
Sorted Group Key: (((((sortgroupagg.tbl_10k.v2)::character(5))::numeric)::integer % 10))
-> Seq Scan on tbl_10k
(14 rows)
create table mem_rescan_1 as
WITH t1 AS (
SELECT v1::int % 10 as a1, SUM(id) as b1
FROM tbl_10k
GROUP BY v1::int % 10 order by v1::int % 10 limit 100
), t2 AS (
SELECT v2::char(5)::numeric::int % 10 as a2, SUM(id) as b2
FROM tbl_10k
GROUP BY v2::char(5)::numeric::int % 10 order by v2::char(5)::numeric::int % 10 limit 101
)
select a1, a2, b1+b2 from t1 inner join t2 on (b1 + b2 > 10);
set work_mem =64;
create table disk_rescan_1 as
WITH t1 AS (
SELECT v1::int % 10 as a1, SUM(id) as b1
FROM tbl_10k
GROUP BY v1::int % 10 order by v1::int % 10 limit 100
), t2 AS (
SELECT v2::char(5)::numeric::int % 10 as a2, SUM(id) as b2
FROM tbl_10k
GROUP BY v2::char(5)::numeric::int % 10 order by v2::char(5)::numeric::int % 10 limit 101
)
select a1, a2, b1+b2 from t1 inner join t2 on (b1 + b2 > 10);
-- Compare results between MEMORY SORT and DISK SORT
(select * from mem_rescan_1 except select * from disk_rescan_1)
union all
(select * from disk_rescan_1 except select * from mem_rescan_1);
a1 | a2 | ?column?
----+----+----------
(0 rows)
set work_mem =default;
set enable_sortgroup_agg=off;
set enable_hashagg =on;
set enable_sort=on;
create table hashagg_rescan_1 as
WITH t1 AS (
SELECT v1::int % 10 as a1, SUM(id) as b1
FROM tbl_10k
GROUP BY v1::int % 10 order by v1::int % 10 limit 100
), t2 AS (
SELECT v2::char(5)::numeric::int % 10 as a2, SUM(id) as b2
FROM tbl_10k
GROUP BY v2::char(5)::numeric::int % 10 order by v2::char(5)::numeric::int % 10 limit 101
)
select a1, a2, b1+b2 from t1 inner join t2 on (b1 + b2 > 10);
-- Compare results to hash aggregation results
-- hashagg_rescan_1 = mem_rescan_1 = disk_rescan_1
(select * from mem_rescan_1 except select * from hashagg_rescan_1)
union all
(select * from hashagg_rescan_1 except select * from mem_rescan_1);
a1 | a2 | ?column?
----+----+----------
(0 rows)
drop table mem_rescan_1,hashagg_rescan_1,disk_rescan_1;
drop table tbl_10k;
create table tbl_cstore_10k(id bigint, v1 numeric, v2 numeric) with (orientation = column);
insert into tbl_cstore_10k select generate_series(1, 10 * 1000), (RANDOM() * 67)::int::numeric, (RANDOM() * 77)::int::numeric;
analyze tbl_cstore_10k;
set enable_sortgroup_agg=on;
set enable_hashagg =off;
set enable_sort=off;
explain (costs off) select sum(id), v1,v2 from tbl_cstore_10k group by v1,v2 order by v1,v2 limit 11 offset 10;
QUERY PLAN
-------------------------------------------------------
Limit
-> GroupAggregate
Group By Key: v1, v2
-> Group Sort
Sorted Group Key: v1, v2
-> Row Adapter
-> CStore Scan on tbl_cstore_10k
(7 rows)
create table agg_sortgroup_1 as
select sum(id), v1,v2 from tbl_cstore_10k group by v1,v2 order by v1,v2 limit 11 offset 10;
set enable_sortgroup_agg=off;
set enable_hashagg =on;
set enable_sort=on;
create table agg_vecagg_1 as
select sum(id), v1,v2 from tbl_cstore_10k group by v1,v2 order by v1,v2 limit 11 offset 10;
(select * from agg_sortgroup_1 except select * from agg_vecagg_1)
union all
(select * from agg_vecagg_1 except select * from agg_sortgroup_1);
sum | v1 | v2
-----+----+----
(0 rows)
drop table tbl_cstore_10k, agg_sortgroup_1,agg_vecagg_1;
create table tbl_ustore_10k(id bigint, v1 numeric, v2 numeric) with (storage_type=ustore);
insert into tbl_ustore_10k select generate_series(1, 10 * 1000), (RANDOM() * 67)::int::numeric, (RANDOM() * 77)::int::numeric;
analyze tbl_ustore_10k;
set enable_sortgroup_agg=on;
set enable_hashagg =off;
set enable_sort=off;
explain (costs off) select sum(id), v1,v2 from tbl_ustore_10k group by v1,v2 order by v1,v2 limit 11 offset 10;
QUERY PLAN
----------------------------------------------
Limit
-> GroupAggregate
Group By Key: v1, v2
-> Group Sort
Sorted Group Key: v1, v2
-> Seq Scan on tbl_ustore_10k
(6 rows)
create table agg_sortgroup_1 as
select sum(id), v1,v2 from tbl_ustore_10k group by v1,v2 order by v1,v2 limit 11 offset 10;
set enable_sortgroup_agg=off;
set enable_hashagg =on;
set enable_sort=on;
create table agg_hashagg_1 as
select sum(id), v1,v2 from tbl_ustore_10k group by v1,v2 order by v1,v2 limit 11 offset 10;
(select * from agg_sortgroup_1 except select * from agg_hashagg_1)
union all
(select * from agg_hashagg_1 except select * from agg_sortgroup_1);
sum | v1 | v2
-----+----+----
(0 rows)
drop table tbl_ustore_10k, agg_sortgroup_1,agg_hashagg_1;
drop schema sortgroupagg cascade;

View File

@ -738,6 +738,7 @@ test: vec_numeric_sop_1 vec_numeric_sop_2 vec_numeric_sop_3 vec_numeric_sop_4 ve
#test: vec_window_end
test: window_srf
test: enable_trace_column
test: sortgroup_agg
test: vec_unique_pre vec_bitmap_prepare
test: vec_unique vec_setop_001 vec_setop_002 vec_setop_003 vec_setop_004 hw_vec_int4 hw_vec_int8 hw_vec_float4 hw_vec_float8

View File

@ -5238,7 +5238,7 @@ static void check_global_variables()
}
}
#define BASE_PGXC_LIKE_MACRO_NUM 1394
#define BASE_PGXC_LIKE_MACRO_NUM 1395
static void check_pgxc_like_macros()
{
#ifdef BUILD_BY_CMAKE

View File

@ -0,0 +1,293 @@
create schema sortgroupagg;
set search_path=sortgroupagg;
create table tbl_10k(id bigint, v1 numeric, v2 char(150));
insert into tbl_10k select generate_series(1, 10 * 1000), (RANDOM() * 67)::int::numeric + 10e-100, (RANDOM() * 77)::int::numeric+10e-100;
analyze tbl_10k;
set enable_sortgroup_agg=on;
explain (costs off) select sum(id), v1,v2 from tbl_10k group by v1,v2 order by v1,v2 limit 1;
-- order keys are not contained in group keys, needs sorts after aggregation
explain (costs off) select sum(id), v1,v2 from tbl_10k group by v1,v2 order by v1,sum(id) limit 10;
create table agg_1 as
select sum(id), v1,v2 from tbl_10k group by v1,v2 order by v1,sum(id) limit 10 offset 11;
set enable_sortgroup_agg=off;
create table agg_2 as
select sum(id), v1,v2 from tbl_10k group by v1,v2 order by v1,sum(id) limit 10 offset 11;
-- Compare results to hash aggregation results
(select * from agg_1 except select * from agg_2)
union all
(select * from agg_2 except select * from agg_1);
drop table agg_1, agg_2;
set enable_sortgroup_agg=on;
-- In the following cases, we cannot perform sortgroup
-- 1. plain agg
explain (costs off) select count(*) from tbl_10k limit 1;
-- 2. HAVING clauses
explain (costs off) select sum(id), v1,v2 from tbl_10k group by v1,v2 having v1+v2>0 order by v1,v2 limit 1;
--3. distinct
explain (costs off) select distinct(v1,v2) from tbl_10k group by v1,v2 limit 1;
--4. grouping sets
explain (costs off) select sum(v1),v1 from tbl_10k group by grouping sets((v1),(v2)) order by v1 desc limit 1;
-- 5. winows
explain (costs off) SELECT v1, avg(v1) OVER (PARTITION BY v2) FROM tbl_10k group by v1, v2 order by v1,v2 limit 1;
--6. no LIMIT cluases
explain (costs off) select sum(id), v1,v2 from tbl_10k group by v1,v2 order by v1,v2;
set enable_hashagg =off;
set enable_sort=off;
-- GROUP BY single key
explain (costs off) select avg(v2), v2 from tbl_10k group by v2 order by v2 limit 1000 offset 10;
create table agg_sortgroup_1 as
select avg(v2), v2 from tbl_10k group by v2 order by v2 limit 1000 offset 10;
create table agg_sortgroup_2 as
select avg(v1), v1 from tbl_10k group by v1 order by v1 desc limit 1000 offset 10;
set work_mem =64;
create table agg_sortgroup_disk_1 as
select avg(v2), v2 from tbl_10k group by v2 order by v2 limit 1000 offset 10;
create table agg_sortgroup_disk_2 as
select avg(v1), v1 from tbl_10k group by v1 order by v1 desc limit 1000 offset 10;
set work_mem =default;
set enable_hashagg =on;
set enable_sortgroup_agg=off;
set enable_sort=on;
create table agg_hashagg_1 as
select avg(v2), v2 from tbl_10k group by v2 order by v2 limit 1000 offset 10;
create table agg_hashagg_2 as
select avg(v1), v1 from tbl_10k group by v1 order by v1 desc limit 1000 offset 10;
-- Compare results to hash aggregation results
(select * from agg_sortgroup_1 except select * from agg_hashagg_1)
union all
(select * from agg_hashagg_1 except select * from agg_sortgroup_1);
(select * from agg_sortgroup_2 except select * from agg_hashagg_2)
union all
(select * from agg_hashagg_2 except select * from agg_sortgroup_2);
(select * from agg_sortgroup_disk_1 except select * from agg_hashagg_1)
union all
(select * from agg_hashagg_1 except select * from agg_sortgroup_disk_1);
(select * from agg_sortgroup_disk_2 except select * from agg_hashagg_2)
union all
(select * from agg_hashagg_2 except select * from agg_sortgroup_disk_2);
drop table agg_sortgroup_1,agg_sortgroup_2,agg_hashagg_1,agg_hashagg_2, agg_sortgroup_disk_1, agg_sortgroup_disk_2;
-- GROUP BY multiple keys
set enable_sortgroup_agg=on;
set enable_hashagg =off;
set enable_sort=off;
explain (costs off) select sum(v2+v1), v2,v1 from tbl_10k group by v2,v1 order by v2 desc ,v1 asc limit 1000 offset 10;
create table agg_sortgroup_1 as
select sum(v2+v1), v2,v1 from tbl_10k group by v2,v1 order by v2 desc ,v1 asc limit 1000 offset 10;
create table agg_sortgroup_2 as
select sum(v2+v1), v2,v1 from tbl_10k group by v1,v2 order by v1 asc ,v2 desc limit 1000 offset 10;
set work_mem =64;
create table agg_sortgroup_disk_1 as
select sum(v2+v1), v2,v1 from tbl_10k group by v2,v1 order by v2 desc ,v1 asc limit 1000 offset 10;
create table agg_sortgroup_disk_2 as
select sum(v2+v1), v2,v1 from tbl_10k group by v1,v2 order by v1 asc ,v2 desc limit 1000 offset 10;
set work_mem =default;
set enable_sortgroup_agg=off;
set enable_hashagg =on;
set enable_sort=on;
create table agg_hashagg_1 as
select sum(v2+v1), v2,v1 from tbl_10k group by v2,v1 order by v2 desc ,v1 asc limit 1000 offset 10;
create table agg_hashagg_2 as
select sum(v2+v1), v2,v1 from tbl_10k group by v1,v2 order by v1 asc ,v2 desc limit 1000 offset 10;
-- Compare results to hash aggregation results
(select * from agg_sortgroup_1 except select * from agg_hashagg_1)
union all
(select * from agg_hashagg_1 except select * from agg_sortgroup_1);
(select * from agg_sortgroup_2 except select * from agg_hashagg_2)
union all
(select * from agg_hashagg_2 except select * from agg_sortgroup_2);
(select * from agg_sortgroup_disk_1 except select * from agg_hashagg_1)
union all
(select * from agg_hashagg_1 except select * from agg_sortgroup_disk_1);
(select * from agg_sortgroup_disk_2 except select * from agg_hashagg_2)
union all
(select * from agg_hashagg_2 except select * from agg_sortgroup_disk_2);
drop table agg_sortgroup_1,agg_sortgroup_2,agg_hashagg_1,agg_hashagg_2,agg_sortgroup_disk_1, agg_sortgroup_disk_2;
-- already sorted, we don't consider sortgroup aggregation
set enable_sortgroup_agg=on;
set enable_hashagg =off;
set enable_sort=off;
explain (costs off) select avg(v1), v1 from tbl_10k group by v1 order by v1 limit 1;
set enable_seqscan=off;
create index v1_index on tbl_10k (v1);
analyze tbl_10k;
explain (costs off) select avg(v1), v1 from tbl_10k group by v1 order by v1 limit 1;
drop index v1_index;
set enable_seqscan=on;
-- test ExecReScanSortGroup
set enable_sortgroup_agg=on;
set enable_hashagg =off;
set enable_sort=off;
set enable_material =off;
explain (costs off)
WITH t1 AS (
SELECT v1::int % 10 as a1, SUM(id) as b1
FROM tbl_10k
GROUP BY v1::int % 10 order by v1::int % 10 limit 100
), t2 AS (
SELECT v2::char(5)::numeric::int % 10 as a2, SUM(id) as b2
FROM tbl_10k
GROUP BY v2::char(5)::numeric::int % 10 order by v2::char(5)::numeric::int % 10 limit 101
)
select a1, a2, b1+b2 from t1 inner join t2 on (b1 + b2 > 10);
create table mem_rescan_1 as
WITH t1 AS (
SELECT v1::int % 10 as a1, SUM(id) as b1
FROM tbl_10k
GROUP BY v1::int % 10 order by v1::int % 10 limit 100
), t2 AS (
SELECT v2::char(5)::numeric::int % 10 as a2, SUM(id) as b2
FROM tbl_10k
GROUP BY v2::char(5)::numeric::int % 10 order by v2::char(5)::numeric::int % 10 limit 101
)
select a1, a2, b1+b2 from t1 inner join t2 on (b1 + b2 > 10);
set work_mem =64;
create table disk_rescan_1 as
WITH t1 AS (
SELECT v1::int % 10 as a1, SUM(id) as b1
FROM tbl_10k
GROUP BY v1::int % 10 order by v1::int % 10 limit 100
), t2 AS (
SELECT v2::char(5)::numeric::int % 10 as a2, SUM(id) as b2
FROM tbl_10k
GROUP BY v2::char(5)::numeric::int % 10 order by v2::char(5)::numeric::int % 10 limit 101
)
select a1, a2, b1+b2 from t1 inner join t2 on (b1 + b2 > 10);
-- Compare results between MEMORY SORT and DISK SORT
(select * from mem_rescan_1 except select * from disk_rescan_1)
union all
(select * from disk_rescan_1 except select * from mem_rescan_1);
set work_mem =default;
set enable_sortgroup_agg=off;
set enable_hashagg =on;
set enable_sort=on;
create table hashagg_rescan_1 as
WITH t1 AS (
SELECT v1::int % 10 as a1, SUM(id) as b1
FROM tbl_10k
GROUP BY v1::int % 10 order by v1::int % 10 limit 100
), t2 AS (
SELECT v2::char(5)::numeric::int % 10 as a2, SUM(id) as b2
FROM tbl_10k
GROUP BY v2::char(5)::numeric::int % 10 order by v2::char(5)::numeric::int % 10 limit 101
)
select a1, a2, b1+b2 from t1 inner join t2 on (b1 + b2 > 10);
-- Compare results to hash aggregation results
-- hashagg_rescan_1 = mem_rescan_1 = disk_rescan_1
(select * from mem_rescan_1 except select * from hashagg_rescan_1)
union all
(select * from hashagg_rescan_1 except select * from mem_rescan_1);
drop table mem_rescan_1,hashagg_rescan_1,disk_rescan_1;
drop table tbl_10k;
create table tbl_cstore_10k(id bigint, v1 numeric, v2 numeric) with (orientation = column);
insert into tbl_cstore_10k select generate_series(1, 10 * 1000), (RANDOM() * 67)::int::numeric, (RANDOM() * 77)::int::numeric;
analyze tbl_cstore_10k;
set enable_sortgroup_agg=on;
set enable_hashagg =off;
set enable_sort=off;
explain (costs off) select sum(id), v1,v2 from tbl_cstore_10k group by v1,v2 order by v1,v2 limit 11 offset 10;
create table agg_sortgroup_1 as
select sum(id), v1,v2 from tbl_cstore_10k group by v1,v2 order by v1,v2 limit 11 offset 10;
set enable_sortgroup_agg=off;
set enable_hashagg =on;
set enable_sort=on;
create table agg_vecagg_1 as
select sum(id), v1,v2 from tbl_cstore_10k group by v1,v2 order by v1,v2 limit 11 offset 10;
(select * from agg_sortgroup_1 except select * from agg_vecagg_1)
union all
(select * from agg_vecagg_1 except select * from agg_sortgroup_1);
drop table tbl_cstore_10k, agg_sortgroup_1,agg_vecagg_1;
create table tbl_ustore_10k(id bigint, v1 numeric, v2 numeric) with (storage_type=ustore);
insert into tbl_ustore_10k select generate_series(1, 10 * 1000), (RANDOM() * 67)::int::numeric, (RANDOM() * 77)::int::numeric;
analyze tbl_ustore_10k;
set enable_sortgroup_agg=on;
set enable_hashagg =off;
set enable_sort=off;
explain (costs off) select sum(id), v1,v2 from tbl_ustore_10k group by v1,v2 order by v1,v2 limit 11 offset 10;
create table agg_sortgroup_1 as
select sum(id), v1,v2 from tbl_ustore_10k group by v1,v2 order by v1,v2 limit 11 offset 10;
set enable_sortgroup_agg=off;
set enable_hashagg =on;
set enable_sort=on;
create table agg_hashagg_1 as
select sum(id), v1,v2 from tbl_ustore_10k group by v1,v2 order by v1,v2 limit 11 offset 10;
(select * from agg_sortgroup_1 except select * from agg_hashagg_1)
union all
(select * from agg_hashagg_1 except select * from agg_sortgroup_1);
drop table tbl_ustore_10k, agg_sortgroup_1,agg_hashagg_1;
drop schema sortgroupagg cascade;