From 2b09dd1a67f1c01f4843c1e0b882761305dfb2c8 Mon Sep 17 00:00:00 2001 From: openGaussDev Date: Tue, 8 Mar 2022 17:01:08 +0800 Subject: [PATCH] : fix swcb qual push down issues Offering: openGaussDev More detail: push down only join quals and leave filter quals outside of the sw op Match-id-85219d101564ac6e5b2a3b575e9738c3ba1ccdca --- src/common/backend/parser/parse_startwith.cpp | 63 +++++++++++++++++-- src/test/regress/expected/sw_bugfix-1.out | 22 ++++++- src/test/regress/expected/sw_icbc.out | 6 +- src/test/regress/sql/sw_bugfix-1.sql | 10 +++ 4 files changed, 93 insertions(+), 8 deletions(-) diff --git a/src/common/backend/parser/parse_startwith.cpp b/src/common/backend/parser/parse_startwith.cpp index 1ab1ac9fb..551321563 100644 --- a/src/common/backend/parser/parse_startwith.cpp +++ b/src/common/backend/parser/parse_startwith.cpp @@ -1105,12 +1105,24 @@ static void transformStartWithClause(StartWithTransformContext *context, SelectS StartWithWalker(context, connectByExpr); } - /* now handle where quals which could whole push down */ + /* + * now handle where quals which might need to be pushed down. + * 1. this is necessary only for implicitly joined tables + * such as ... FROM t1,t2 WHERE t1.xx = t2.yy ... + * 2. note that only join quals should be pushed down while + * non-join quals such as (t1.xx < n) should be kept in the outer loop + * of the CTE scan, otherwise the end-result will be different from + * those produced by the standard swcb syntax. + * (the issue here is that implicitly joined tables are difficult to handle + * in our implementation of start with .. connect by .. syntax, + * as we don't have a clear cut of join quals from the non-join quals at this stage. + * users should be encouraged to use explicity joined tables whenever + * possible before a clear-cut solution is implemented.) + */ int lens = list_length(context->pstate->p_start_info); if (lens != 1) { Node *whereClause = (Node *)copyObject(stmt->whereClause); context->whereClause = whereClause; - stmt->whereClause = NULL; } @@ -1258,6 +1270,36 @@ static void AddWithClauseToBranch(ParseState *pstate, SelectStmt *stmt, List *re return; } +static bool walker_to_exclude_non_join_quals(Node *node, Node *context_node) +{ + if (node == NULL) { + return false; + } + + if (!IsA(node, A_Expr)) { + return raw_expression_tree_walker(node, (bool (*)()) walker_to_exclude_non_join_quals, (void*)NULL); + } + + A_Expr* expr = (A_Expr*) node; + /* + * this is to achieve consistent result sets with those produced by the original + * start with .. connect by syntax, which does not push filter quals down to connect quals. + * if non-column item appears on any side of an operator, we guess that it is + * not a join qual so should not be filtered in sw op, and force it to be true. + * this rule is not always correct but should work fine most of the time. + * could be improved later on, e.g. find better ways to extract non-join quals + * from the where clause. + */ + if (expr->kind == AEXPR_OP && + (!IsA(expr->lexpr, ColumnRef) || !IsA(expr->rexpr, ColumnRef))) { + expr->lexpr = makeBoolAConst(true, -1); + expr->rexpr = makeBoolAConst(true, -1); + expr->kind = AEXPR_OR; + } + + return false; +} + /* * -------------------------------------------------------------------------------------- * @Brief: Create SWCB's conversion CTE's inner branch, normally we add ConnectByExpr to @@ -1316,6 +1358,10 @@ static SelectStmt *CreateStartWithCTEInnerBranch(ParseState* pstate, JoinExpr *final_join = (JoinExpr *)origin_table; /* pushdown requires deep copying of the quals */ Node *whereCopy = (Node *)copyObject(whereClause); + /* only join quals can be pushed down */ + raw_expression_tree_walker((Node *)whereCopy, + (bool (*)())walker_to_exclude_non_join_quals, (void*)NULL); + if (final_join->quals == NULL) { final_join->quals = whereCopy; } else { @@ -1422,11 +1468,18 @@ static SelectStmt *CreateStartWithCTEOuterBranch(ParseState *pstate, /* push whereClause down to init part, taking care to avoid NULL in expr. */ quals = (Node *)startWithExpr; + Node* whereClauseCopy = (Node *)copyObject(whereClause); + + if (whereClause != NULL) { + /* only join quals can be pushed down */ + raw_expression_tree_walker((Node*)whereClauseCopy, + (bool (*)())walker_to_exclude_non_join_quals, (void*)NULL); + } + if (quals == NULL) { - /* pushdown requires deep copying of the quals */ - quals = (Node *)copyObject(whereClause); + quals = whereClauseCopy; } else if (whereClause != NULL) { - quals = (Node *)makeA_Expr(AEXPR_AND, NULL, (Node *)copyObject(whereClause), + quals = (Node *)makeA_Expr(AEXPR_AND, NULL, whereClauseCopy, (Node*)startWithExpr, -1); } diff --git a/src/test/regress/expected/sw_bugfix-1.out b/src/test/regress/expected/sw_bugfix-1.out index 328ba3c16..8d72667db 100644 --- a/src/test/regress/expected/sw_bugfix-1.out +++ b/src/test/regress/expected/sw_bugfix-1.out @@ -430,6 +430,7 @@ explain (costs off) select t1.ID,t1.VCH,pid,NAME,PTEX from TEST_HCB_FQB t1,TEST_ QUERY PLAN ---------------------------------------------------------------------------- CTE Scan on tmp_reuslt + Filter: ("t1@id" = "t2@id") CTE tmp_reuslt -> StartWith Operator Start With pseudo atts: RUITR, array_key_1 @@ -448,7 +449,7 @@ explain (costs off) select t1.ID,t1.VCH,pid,NAME,PTEX from TEST_HCB_FQB t1,TEST_ -> WorkTable Scan on tmp_reuslt -> Hash -> Seq Scan on test_hcb_fqb t1 -(19 rows) +(20 rows) CREATE OR REPLACE FUNCTION test_hcb_pro1(i_id in int) return int AS @@ -1470,3 +1471,22 @@ DROP TABLE IF EXISTS start; DROP TABLE IF EXISTS connect; DROP TABLE IF EXISTS siblings; DROP TABLE IF EXISTS prior; +-- test where clause pushdown result correctness +create table xt1(id int, lid int, name text); +create table xt2(idd int, lidd int, name text); +insert into xt1 values(1,null,'A'),(2,1,'B'),(3,2,'C'); +insert into xt2 values(1,null,'A'),(2,1,'B'),(3,2,'C'), (4,3,'D'); +select * from xt2,xt1 where xt1.id=xt2.idd and xt1.id!=2 start with id=2 connect by prior id=lid; + idd | lidd | name | id | lid | name +-----+------+------+----+-----+------ + 3 | 2 | C | 3 | 2 | C +(1 row) + +select * from xt2,xt1 where xt1.id=xt2.idd and xt1.id=3 start with id=2 connect by prior id=lid; + idd | lidd | name | id | lid | name +-----+------+------+----+-----+------ + 3 | 2 | C | 3 | 2 | C +(1 row) + +drop table if exists xt1; +drop table if exists xt2; diff --git a/src/test/regress/expected/sw_icbc.out b/src/test/regress/expected/sw_icbc.out index 421b67d4a..82fe61450 100755 --- a/src/test/regress/expected/sw_icbc.out +++ b/src/test/regress/expected/sw_icbc.out @@ -477,6 +477,7 @@ explain (costs off) select * from t1, t2 where t1.id = t2.id start with t1.id = QUERY PLAN ----------------------------------------------------------------------------- CTE Scan on tmp_reuslt + Filter: ("t1@id" = "t2@id") CTE tmp_reuslt -> StartWith Operator Start With pseudo atts: RUITR, array_key_1 @@ -495,7 +496,7 @@ explain (costs off) select * from t1, t2 where t1.id = t2.id start with t1.id = -> WorkTable Scan on tmp_reuslt -> Hash -> Seq Scan on t2 -(19 rows) +(20 rows) explain (costs off) select * from t1 join t2 on t1.id = t2.id start with t1.id = t2.id and t1.id = 1 connect by prior t1.id = t1.pid; QUERY PLAN @@ -525,6 +526,7 @@ explain (costs off) select * from t1, (select * from t2) as test where t1.id = t QUERY PLAN ----------------------------------------------------------------------------- CTE Scan on tmp_reuslt + Filter: ("t1@id" = "test@id") CTE tmp_reuslt -> StartWith Operator Start With pseudo atts: RUITR, array_key_1 @@ -543,7 +545,7 @@ explain (costs off) select * from t1, (select * from t2) as test where t1.id = t -> WorkTable Scan on tmp_reuslt -> Hash -> Seq Scan on t2 -(19 rows) +(20 rows) explain (costs off) select id, (select id from t2 start with t2.id = t1.id connect by t2.id = t1.id limit 1) from t1 where id = 1; ERROR: START WITH CONNECT BY clauses must have at least one prior key. diff --git a/src/test/regress/sql/sw_bugfix-1.sql b/src/test/regress/sql/sw_bugfix-1.sql index 0ff32a29b..cbf505b7a 100644 --- a/src/test/regress/sql/sw_bugfix-1.sql +++ b/src/test/regress/sql/sw_bugfix-1.sql @@ -431,3 +431,13 @@ DROP TABLE IF EXISTS start; DROP TABLE IF EXISTS connect; DROP TABLE IF EXISTS siblings; DROP TABLE IF EXISTS prior; + +-- test where clause pushdown result correctness +create table xt1(id int, lid int, name text); +create table xt2(idd int, lidd int, name text); +insert into xt1 values(1,null,'A'),(2,1,'B'),(3,2,'C'); +insert into xt2 values(1,null,'A'),(2,1,'B'),(3,2,'C'), (4,3,'D'); +select * from xt2,xt1 where xt1.id=xt2.idd and xt1.id!=2 start with id=2 connect by prior id=lid; +select * from xt2,xt1 where xt1.id=xt2.idd and xt1.id=3 start with id=2 connect by prior id=lid; +drop table if exists xt1; +drop table if exists xt2;