Merge branch 'master' into update-jemalloc

This commit is contained in:
Alexey Milovidov 2020-03-09 07:05:53 +03:00
commit 386151a3d7
176 changed files with 2116 additions and 1440 deletions

View File

@ -35,27 +35,6 @@ if (SANITIZE)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libmsan")
endif ()
# Temporarily disable many external libraries that don't work under
# MemorySanitizer yet.
set (ENABLE_HDFS 0 CACHE BOOL "")
set (ENABLE_CAPNP 0 CACHE BOOL "")
set (ENABLE_RDKAFKA 0 CACHE BOOL "")
set (ENABLE_POCO_MONGODB 0 CACHE BOOL "")
set (ENABLE_POCO_NETSSL 0 CACHE BOOL "")
set (ENABLE_POCO_ODBC 0 CACHE BOOL "")
set (ENABLE_ODBC 0 CACHE BOOL "")
set (ENABLE_MYSQL 0 CACHE BOOL "")
set (ENABLE_EMBEDDED_COMPILER 0 CACHE BOOL "")
set (USE_INTERNAL_CAPNP_LIBRARY 0 CACHE BOOL "")
set (USE_SIMDJSON 0 CACHE BOOL "")
set (ENABLE_ORC 0 CACHE BOOL "")
set (ENABLE_PARQUET 0 CACHE BOOL "")
set (USE_CAPNP 0 CACHE BOOL "")
set (USE_INTERNAL_ORC_LIBRARY 0 CACHE BOOL "")
set (USE_ORC 0 CACHE BOOL "")
set (USE_AVRO 0 CACHE BOOL "")
set (ENABLE_SSL 0 CACHE BOOL "")
elseif (SANITIZE STREQUAL "thread")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=thread")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=thread")

2
contrib/libunwind vendored

@ -1 +1 @@
Subproject commit 68cffcbbd1840e14664a5f7f19c5e43f65c525b5
Subproject commit ede00622ff8ecb1848ed22187eabbfaf8b4e9307

2
contrib/openssl vendored

@ -1 +1 @@
Subproject commit debbae80cb44de55fd8040fdfbe4b506601ff2a6
Subproject commit 07e9623064508d15dd61367f960ebe7fc9aecd77

View File

@ -305,22 +305,17 @@ void PerformanceTest::runQueries(
statistics.startWatches();
try
{
executeQuery(connection, query, statistics, stop_conditions, interrupt_listener, context, test_info.settings);
if (test_info.exec_type == ExecutionType::Loop)
LOG_INFO(log, "Will run query in loop");
for (size_t iteration = 0; !statistics.got_SIGINT; ++iteration)
{
LOG_INFO(log, "Will run query in loop");
for (size_t iteration = 1; !statistics.got_SIGINT; ++iteration)
stop_conditions.reportIterations(iteration);
if (stop_conditions.areFulfilled())
{
stop_conditions.reportIterations(iteration);
if (stop_conditions.areFulfilled())
{
LOG_INFO(log, "Stop conditions fulfilled");
break;
}
executeQuery(connection, query, statistics, stop_conditions, interrupt_listener, context, test_info.settings);
LOG_INFO(log, "Stop conditions fulfilled");
break;
}
executeQuery(connection, query, statistics, stop_conditions, interrupt_listener, context, test_info.settings);
}
}
catch (const Exception & e)

View File

@ -54,7 +54,6 @@ PerformanceTestInfo::PerformanceTestInfo(
extractQueries(config);
extractAuxiliaryQueries(config);
processSubstitutions(config);
getExecutionType(config);
getStopConditions(config);
}
@ -141,22 +140,6 @@ void PerformanceTestInfo::processSubstitutions(XMLConfigurationPtr config)
}
}
void PerformanceTestInfo::getExecutionType(XMLConfigurationPtr config)
{
if (!config->has("type"))
throw Exception("Missing type property in config: " + test_name,
ErrorCodes::BAD_ARGUMENTS);
std::string config_exec_type = config->getString("type");
if (config_exec_type == "loop")
exec_type = ExecutionType::Loop;
else if (config_exec_type == "once")
exec_type = ExecutionType::Once;
else
throw Exception("Unknown type " + config_exec_type + " in :" + test_name,
ErrorCodes::BAD_ARGUMENTS);
}
void PerformanceTestInfo::getStopConditions(XMLConfigurationPtr config)
{

View File

@ -12,11 +12,6 @@
namespace DB
{
enum class ExecutionType
{
Loop,
Once
};
using XMLConfiguration = Poco::Util::XMLConfiguration;
using XMLConfigurationPtr = Poco::AutoPtr<XMLConfiguration>;
@ -34,7 +29,6 @@ public:
Strings queries;
Settings settings;
ExecutionType exec_type;
StringToVector substitutions;
size_t times_to_run;
@ -47,7 +41,6 @@ private:
void applySettings(XMLConfigurationPtr config);
void extractQueries(XMLConfigurationPtr config);
void processSubstitutions(XMLConfigurationPtr config);
void getExecutionType(XMLConfigurationPtr config);
void getStopConditions(XMLConfigurationPtr config);
void extractAuxiliaryQueries(XMLConfigurationPtr config);
};

View File

@ -17,13 +17,6 @@ namespace DB
namespace
{
std::string getMainMetric(const PerformanceTestInfo & test_info)
{
if (test_info.exec_type == ExecutionType::Loop)
return "min_time";
else
return "rows_per_second";
}
bool isASCIIString(const std::string & str)
{
@ -120,50 +113,40 @@ std::string ReportBuilder::buildFullReport(
runJSON.set("exception", "Some exception occurred with non ASCII message. This may produce invalid JSON. Try reproduce locally.");
}
if (test_info.exec_type == ExecutionType::Loop)
/// in seconds
runJSON.set("min_time", statistics.min_time / double(1000));
if (statistics.sampler.size() != 0)
{
/// in seconds
runJSON.set("min_time", statistics.min_time / double(1000));
if (statistics.sampler.size() != 0)
JSONString quantiles(4); /// here, 4 is the size of \t padding
for (double percent = 10; percent <= 90; percent += 10)
{
JSONString quantiles(4); /// here, 4 is the size of \t padding
for (double percent = 10; percent <= 90; percent += 10)
{
std::string quantile_key = std::to_string(percent / 100.0);
while (quantile_key.back() == '0')
quantile_key.pop_back();
std::string quantile_key = std::to_string(percent / 100.0);
while (quantile_key.back() == '0')
quantile_key.pop_back();
quantiles.set(quantile_key,
statistics.sampler.quantileInterpolated(percent / 100.0));
}
quantiles.set("0.95",
statistics.sampler.quantileInterpolated(95 / 100.0));
quantiles.set("0.99",
statistics.sampler.quantileInterpolated(99 / 100.0));
quantiles.set("0.999",
statistics.sampler.quantileInterpolated(99.9 / 100.0));
quantiles.set("0.9999",
statistics.sampler.quantileInterpolated(99.99 / 100.0));
runJSON.set("quantiles", quantiles.asString());
quantiles.set(quantile_key,
statistics.sampler.quantileInterpolated(percent / 100.0));
}
quantiles.set("0.95",
statistics.sampler.quantileInterpolated(95 / 100.0));
quantiles.set("0.99",
statistics.sampler.quantileInterpolated(99 / 100.0));
quantiles.set("0.999",
statistics.sampler.quantileInterpolated(99.9 / 100.0));
quantiles.set("0.9999",
statistics.sampler.quantileInterpolated(99.99 / 100.0));
runJSON.set("total_time", statistics.total_time);
if (statistics.total_time != 0)
{
runJSON.set("queries_per_second", static_cast<double>(statistics.queries) / statistics.total_time);
runJSON.set("rows_per_second", static_cast<double>(statistics.total_rows_read) / statistics.total_time);
runJSON.set("bytes_per_second", static_cast<double>(statistics.total_bytes_read) / statistics.total_time);
}
runJSON.set("quantiles", quantiles.asString());
}
else
runJSON.set("total_time", statistics.total_time);
if (statistics.total_time != 0)
{
runJSON.set("max_rows_per_second", statistics.max_rows_speed);
runJSON.set("max_bytes_per_second", statistics.max_bytes_speed);
runJSON.set("avg_rows_per_second", statistics.avg_rows_speed_value);
runJSON.set("avg_bytes_per_second", statistics.avg_bytes_speed_value);
runJSON.set("queries_per_second", static_cast<double>(statistics.queries) / statistics.total_time);
runJSON.set("rows_per_second", static_cast<double>(statistics.total_rows_read) / statistics.total_time);
runJSON.set("bytes_per_second", static_cast<double>(statistics.total_bytes_read) / statistics.total_time);
}
runJSON.set("memory_usage", statistics.memory_usage);
@ -197,7 +180,7 @@ std::string ReportBuilder::buildCompactReport(
output << "run " << std::to_string(number_of_launch + 1) << ": ";
std::string main_metric = getMainMetric(test_info);
std::string main_metric = "min_time";
output << main_metric << " = ";
size_t index = number_of_launch * test_info.queries.size() + query_index;

View File

@ -28,8 +28,6 @@ void StopConditionsSet::loadFromConfig(const ConfigurationPtr & stop_conditions_
min_time_not_changing_for_ms.value = stop_conditions_view->getUInt64(key);
else if (key == "max_speed_not_changing_for_ms")
max_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key);
else if (key == "average_speed_not_changing_for_ms")
average_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key);
else
throw Exception("Met unknown stop condition: " + key, ErrorCodes::LOGICAL_ERROR);
@ -45,7 +43,6 @@ void StopConditionsSet::reset()
iterations.fulfilled = false;
min_time_not_changing_for_ms.fulfilled = false;
max_speed_not_changing_for_ms.fulfilled = false;
average_speed_not_changing_for_ms.fulfilled = false;
fulfilled_count = 0;
}

View File

@ -30,7 +30,6 @@ struct StopConditionsSet
StopCondition iterations;
StopCondition min_time_not_changing_for_ms;
StopCondition max_speed_not_changing_for_ms;
StopCondition average_speed_not_changing_for_ms;
size_t initialized_count = 0;
size_t fulfilled_count = 0;

View File

@ -67,41 +67,6 @@ void TestStats::update_min_time(UInt64 min_time_candidate)
}
}
void TestStats::update_max_speed(
size_t max_speed_candidate,
Stopwatch & max_speed_watch,
UInt64 & max_speed)
{
if (max_speed_candidate > max_speed)
{
max_speed = max_speed_candidate;
max_speed_watch.restart();
}
}
void TestStats::update_average_speed(
double new_speed_info,
Stopwatch & avg_speed_watch,
size_t & number_of_info_batches,
double precision,
double & avg_speed_first,
double & avg_speed_value)
{
avg_speed_value = ((avg_speed_value * number_of_info_batches) + new_speed_info);
++number_of_info_batches;
avg_speed_value /= number_of_info_batches;
if (avg_speed_first == 0)
avg_speed_first = avg_speed_value;
auto [min, max] = std::minmax(avg_speed_value, avg_speed_first);
if (1 - min / max >= precision)
{
avg_speed_first = avg_speed_value;
avg_speed_watch.restart();
}
}
void TestStats::add(size_t rows_read_inc, size_t bytes_read_inc)
{
@ -109,26 +74,6 @@ void TestStats::add(size_t rows_read_inc, size_t bytes_read_inc)
total_bytes_read += bytes_read_inc;
last_query_rows_read += rows_read_inc;
last_query_bytes_read += bytes_read_inc;
double new_rows_speed = last_query_rows_read / watch_per_query.elapsedSeconds();
double new_bytes_speed = last_query_bytes_read / watch_per_query.elapsedSeconds();
/// Update rows speed
update_max_speed(new_rows_speed, max_rows_speed_watch, max_rows_speed);
update_average_speed(new_rows_speed,
avg_rows_speed_watch,
number_of_rows_speed_info_batches,
avg_rows_speed_precision,
avg_rows_speed_first,
avg_rows_speed_value);
/// Update bytes speed
update_max_speed(new_bytes_speed, max_bytes_speed_watch, max_bytes_speed);
update_average_speed(new_bytes_speed,
avg_bytes_speed_watch,
number_of_bytes_speed_info_batches,
avg_bytes_speed_precision,
avg_bytes_speed_first,
avg_bytes_speed_value);
}
void TestStats::updateQueryInfo()
@ -144,10 +89,6 @@ TestStats::TestStats()
watch.reset();
watch_per_query.reset();
min_time_watch.reset();
max_rows_speed_watch.reset();
max_bytes_speed_watch.reset();
avg_rows_speed_watch.reset();
avg_bytes_speed_watch.reset();
}
@ -156,10 +97,6 @@ void TestStats::startWatches()
watch.start();
watch_per_query.start();
min_time_watch.start();
max_rows_speed_watch.start();
max_bytes_speed_watch.start();
avg_rows_speed_watch.start();
avg_bytes_speed_watch.start();
}
}

View File

@ -13,10 +13,6 @@ struct TestStats
Stopwatch watch;
Stopwatch watch_per_query;
Stopwatch min_time_watch;
Stopwatch max_rows_speed_watch;
Stopwatch max_bytes_speed_watch;
Stopwatch avg_rows_speed_watch;
Stopwatch avg_bytes_speed_watch;
bool last_query_was_cancelled = false;
std::string query_id;
@ -62,19 +58,6 @@ struct TestStats
void update_min_time(UInt64 min_time_candidate);
void update_average_speed(
double new_speed_info,
Stopwatch & avg_speed_watch,
size_t & number_of_info_batches,
double precision,
double & avg_speed_first,
double & avg_speed_value);
void update_max_speed(
size_t max_speed_candidate,
Stopwatch & max_speed_watch,
UInt64 & max_speed);
void add(size_t rows_read_inc, size_t bytes_read_inc);
void updateQueryInfo();

View File

@ -32,8 +32,6 @@ public:
DEFINE_REPORT_FUNC(reportBytesReadUncompressed, bytes_read_uncompressed)
DEFINE_REPORT_FUNC(reportIterations, iterations)
DEFINE_REPORT_FUNC(reportMinTimeNotChangingFor, min_time_not_changing_for_ms)
DEFINE_REPORT_FUNC(reportMaxSpeedNotChangingFor, max_speed_not_changing_for_ms)
DEFINE_REPORT_FUNC(reportAverageSpeedNotChangingFor, average_speed_not_changing_for_ms)
#undef REPORT

View File

@ -21,8 +21,6 @@ void checkFulfilledConditionsAndUpdate(
stop_conditions.reportBytesReadUncompressed(statistics.total_bytes_read);
stop_conditions.reportTotalTime(statistics.watch.elapsed() / (1000 * 1000));
stop_conditions.reportMinTimeNotChangingFor(statistics.min_time_watch.elapsed() / (1000 * 1000));
stop_conditions.reportMaxSpeedNotChangingFor(statistics.max_rows_speed_watch.elapsed() / (1000 * 1000));
stop_conditions.reportAverageSpeedNotChangingFor(statistics.avg_rows_speed_watch.elapsed() / (1000 * 1000));
if (stop_conditions.areFulfilled())
{

View File

@ -4,8 +4,10 @@
#include <Interpreters/CrossToInnerJoinVisitor.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
#include <Interpreters/IdentifierSemantic.h>
#include <Interpreters/QueryAliasesVisitor.h>
#include <Interpreters/misc.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTFunction.h>
@ -27,41 +29,26 @@ namespace ErrorCodes
namespace
{
struct JoinedTable
struct JoinedElement
{
DatabaseAndTableWithAlias table;
ASTTablesInSelectQueryElement * element = nullptr;
ASTTableJoin * join = nullptr;
ASTPtr array_join = nullptr;
bool has_using = false;
JoinedTable(ASTPtr table_element)
JoinedElement(const ASTTablesInSelectQueryElement & table_element)
: element(table_element)
{
element = table_element->as<ASTTablesInSelectQueryElement>();
if (!element)
throw Exception("Logical error: TablesInSelectQueryElement expected", ErrorCodes::LOGICAL_ERROR);
if (element.table_join)
join = element.table_join->as<ASTTableJoin>();
}
if (element->table_join)
{
join = element->table_join->as<ASTTableJoin>();
if (join->kind == ASTTableJoin::Kind::Cross ||
join->kind == ASTTableJoin::Kind::Comma)
{
if (!join->children.empty())
throw Exception("Logical error: CROSS JOIN has expressions", ErrorCodes::LOGICAL_ERROR);
}
void checkTableName(const DatabaseAndTableWithAlias & table, const String & current_database) const
{
if (!element.table_expression)
throw Exception("Not a table expression in JOIN (ARRAY JOIN?)", ErrorCodes::LOGICAL_ERROR);
if (join->using_expression_list)
has_using = true;
}
ASTTableExpression * table_expression = element.table_expression->as<ASTTableExpression>();
if (!table_expression)
throw Exception("Wrong table expression in JOIN", ErrorCodes::LOGICAL_ERROR);
if (element->table_expression)
{
const auto & expr = element->table_expression->as<ASTTableExpression &>();
table = DatabaseAndTableWithAlias(expr);
}
array_join = element->array_join;
if (!table.same(DatabaseAndTableWithAlias(*table_expression, current_database)))
throw Exception("Inconsistent table names", ErrorCodes::LOGICAL_ERROR);
}
void rewriteCommaToCross()
@ -70,7 +57,24 @@ struct JoinedTable
join->kind = ASTTableJoin::Kind::Cross;
}
void rewriteCrossToInner(ASTPtr on_expression)
{
join->kind = ASTTableJoin::Kind::Inner;
join->strictness = ASTTableJoin::Strictness::All;
join->on_expression = on_expression;
join->children.push_back(join->on_expression);
}
ASTPtr arrayJoin() const { return element.array_join; }
const ASTTableJoin * tableJoin() const { return join; }
bool canAttachOnExpression() const { return join && !join->on_expression; }
bool hasUsing() const { return join && join->using_expression_list; }
private:
const ASTTablesInSelectQueryElement & element;
ASTTableJoin * join = nullptr;
};
bool isComparison(const String & name)
@ -89,13 +93,14 @@ class CheckExpressionVisitorData
public:
using TypeToVisit = const ASTFunction;
CheckExpressionVisitorData(const std::vector<JoinedTable> & tables_)
CheckExpressionVisitorData(const std::vector<JoinedElement> & tables_,
const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns,
Aliases && aliases_)
: joined_tables(tables_)
, tables(tables_with_columns)
, aliases(aliases_)
, ands_only(true)
{
for (auto & joined : joined_tables)
tables.push_back(joined.table);
}
{}
void visit(const ASTFunction & node, const ASTPtr & ast)
{
@ -160,9 +165,10 @@ public:
}
private:
const std::vector<JoinedTable> & joined_tables;
std::vector<DatabaseAndTableWithAlias> tables;
const std::vector<JoinedElement> & joined_tables;
const std::vector<TableWithColumnNamesAndTypes> & tables;
std::map<size_t, std::vector<ASTPtr>> asts_to_join_on;
Aliases aliases;
bool ands_only;
size_t canMoveEqualsToJoinOn(const ASTFunction & node)
@ -177,6 +183,12 @@ private:
if (!left || !right)
return false;
/// Moving expressions that use column aliases is not supported.
if (left->isShort() && aliases.count(left->shortName()))
return false;
if (right->isShort() && aliases.count(right->shortName()))
return false;
return checkIdentifiers(*left, *right);
}
@ -185,15 +197,17 @@ private:
/// @return table position to attach expression to or 0.
size_t checkIdentifiers(const ASTIdentifier & left, const ASTIdentifier & right)
{
size_t left_table_pos = 0;
bool left_match = IdentifierSemantic::chooseTable(left, tables, left_table_pos);
std::optional<size_t> left_table_pos = IdentifierSemantic::getMembership(left);
if (!left_table_pos)
left_table_pos = IdentifierSemantic::chooseTable(left, tables);
size_t right_table_pos = 0;
bool right_match = IdentifierSemantic::chooseTable(right, tables, right_table_pos);
std::optional<size_t> right_table_pos = IdentifierSemantic::getMembership(right);
if (!right_table_pos)
right_table_pos = IdentifierSemantic::chooseTable(right, tables);
if (left_match && right_match && (left_table_pos != right_table_pos))
if (left_table_pos && right_table_pos && (*left_table_pos != *right_table_pos))
{
size_t table_pos = std::max(left_table_pos, right_table_pos);
size_t table_pos = std::max(*left_table_pos, *right_table_pos);
if (joined_tables[table_pos].canAttachOnExpression())
return table_pos;
}
@ -205,7 +219,7 @@ using CheckExpressionMatcher = ConstOneTypeMatcher<CheckExpressionVisitorData, f
using CheckExpressionVisitor = ConstInDepthNodeVisitor<CheckExpressionMatcher, true>;
bool getTables(ASTSelectQuery & select, std::vector<JoinedTable> & joined_tables, size_t & num_comma)
bool getTables(ASTSelectQuery & select, std::vector<JoinedElement> & joined_tables, size_t & num_comma)
{
if (!select.tables())
return false;
@ -224,23 +238,37 @@ bool getTables(ASTSelectQuery & select, std::vector<JoinedTable> & joined_tables
for (auto & child : tables->children)
{
joined_tables.emplace_back(JoinedTable(child));
JoinedTable & t = joined_tables.back();
if (t.array_join)
auto table_element = child->as<ASTTablesInSelectQueryElement>();
if (!table_element)
throw Exception("Logical error: TablesInSelectQueryElement expected", ErrorCodes::LOGICAL_ERROR);
joined_tables.emplace_back(JoinedElement(*table_element));
JoinedElement & t = joined_tables.back();
if (t.arrayJoin())
{
++num_array_join;
continue;
}
if (t.has_using)
if (t.hasUsing())
{
++num_using;
continue;
}
if (auto * join = t.join)
if (auto * join = t.tableJoin())
{
if (join->kind == ASTTableJoin::Kind::Cross ||
join->kind == ASTTableJoin::Kind::Comma)
{
if (!join->children.empty())
throw Exception("Logical error: CROSS JOIN has expressions", ErrorCodes::LOGICAL_ERROR);
}
if (join->kind == ASTTableJoin::Kind::Comma)
++num_comma;
}
}
if (num_using && (num_tables - num_array_join) > 2)
@ -251,12 +279,20 @@ bool getTables(ASTSelectQuery & select, std::vector<JoinedTable> & joined_tables
if (num_array_join || num_using)
return false;
return true;
}
}
bool CrossToInnerJoinMatcher::needChildVisit(ASTPtr & node, const ASTPtr &)
{
if (node->as<ASTSubquery>())
return false;
return true;
}
void CrossToInnerJoinMatcher::visit(ASTPtr & ast, Data & data)
{
if (auto * t = ast->as<ASTSelectQuery>())
@ -266,10 +302,19 @@ void CrossToInnerJoinMatcher::visit(ASTPtr & ast, Data & data)
void CrossToInnerJoinMatcher::visit(ASTSelectQuery & select, ASTPtr &, Data & data)
{
size_t num_comma = 0;
std::vector<JoinedTable> joined_tables;
std::vector<JoinedElement> joined_tables;
if (!getTables(select, joined_tables, num_comma))
return;
/// Check if joined_tables are consistent with known tables_with_columns
{
if (joined_tables.size() != data.tables_with_columns.size())
throw Exception("Logical error: inconsistent number of tables", ErrorCodes::LOGICAL_ERROR);
for (size_t i = 0; i < joined_tables.size(); ++i)
joined_tables[i].checkTableName(data.tables_with_columns[i].table, data.current_database);
}
/// COMMA to CROSS
if (num_comma)
@ -283,7 +328,13 @@ void CrossToInnerJoinMatcher::visit(ASTSelectQuery & select, ASTPtr &, Data & da
if (!select.where())
return;
CheckExpressionVisitor::Data visitor_data{joined_tables};
Aliases aliases;
QueryAliasesVisitor::Data query_aliases_data{aliases};
if (ASTPtr with = select.with())
QueryAliasesVisitor(query_aliases_data).visit(with);
QueryAliasesVisitor(query_aliases_data).visit(select.select());
CheckExpressionVisitor::Data visitor_data{joined_tables, data.tables_with_columns, std::move(aliases)};
CheckExpressionVisitor(visitor_data).visit(select.where());
if (visitor_data.complex())
@ -293,12 +344,7 @@ void CrossToInnerJoinMatcher::visit(ASTSelectQuery & select, ASTPtr &, Data & da
{
if (visitor_data.matchAny(i))
{
ASTTableJoin & join = *joined_tables[i].join;
join.kind = ASTTableJoin::Kind::Inner;
join.strictness = ASTTableJoin::Strictness::All;
join.on_expression = visitor_data.makeOnExpression(i);
join.children.push_back(join.on_expression);
joined_tables[i].rewriteCrossToInner(visitor_data.makeOnExpression(i));
data.done = true;
}
}

View File

@ -6,6 +6,7 @@ namespace DB
{
class ASTSelectQuery;
struct TableWithColumnNamesAndTypes;
/// AST transformer. It replaces cross joins with equivalented inner join if possible.
class CrossToInnerJoinMatcher
@ -13,10 +14,12 @@ class CrossToInnerJoinMatcher
public:
struct Data
{
const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns;
const String current_database;
bool done = false;
};
static bool needChildVisit(ASTPtr &, const ASTPtr &) { return true; }
static bool needChildVisit(ASTPtr &, const ASTPtr &);
static void visit(ASTPtr & ast, Data & data);
private:

View File

@ -35,6 +35,12 @@ struct DatabaseAndTableWithAlias
/// Check if it satisfies another db_table name. @note opterion is not symmetric.
bool satisfies(const DatabaseAndTableWithAlias & table, bool table_may_be_an_alias);
/// Exactly the same table name
bool same(const DatabaseAndTableWithAlias & db_table) const
{
return database == db_table.database && table == db_table.table && alias == db_table.alias;
}
};
struct TableWithColumnNames
@ -80,6 +86,19 @@ struct TableWithColumnNamesAndTypes
, columns(columns_)
{}
bool hasColumn(const String & name) const
{
if (names.empty())
{
for (auto & col : columns)
names.insert(col.name);
for (auto & col : hidden_columns)
names.insert(col.name);
}
return names.count(name);
}
void addHiddenColumns(const NamesAndTypesList & addition)
{
hidden_columns.insert(hidden_columns.end(), addition.begin(), addition.end());
@ -99,6 +118,9 @@ struct TableWithColumnNamesAndTypes
return TableWithColumnNames(table, std::move(out_columns), std::move(out_hidden_columns));
}
private:
mutable NameSet names;
};
std::vector<DatabaseAndTableWithAlias> getDatabaseAndTables(const ASTSelectQuery & select_query, const String & current_database);

View File

@ -50,9 +50,8 @@ void ExpressionInfoMatcher::visit(const ASTIdentifier & identifier, const ASTPtr
}
else
{
size_t best_table_pos = 0;
if (IdentifierSemantic::chooseTable(identifier, data.tables, best_table_pos))
data.unique_reference_tables_pos.emplace(best_table_pos);
if (auto best_table_pos = IdentifierSemantic::chooseTable(identifier, data.tables))
data.unique_reference_tables_pos.emplace(*best_table_pos);
}
}

View File

@ -14,29 +14,18 @@ namespace ErrorCodes
namespace
{
const DatabaseAndTableWithAlias & extractTable(const DatabaseAndTableWithAlias & table)
{
return table;
}
const DatabaseAndTableWithAlias & extractTable(const TableWithColumnNames & table)
{
return table.table;
}
template <typename T>
IdentifierSemantic::ColumnMatch tryChooseTable(const ASTIdentifier & identifier, const std::vector<T> & tables,
size_t & best_table_pos, bool allow_ambiguous)
std::optional<size_t> tryChooseTable(const ASTIdentifier & identifier, const std::vector<T> & tables, bool allow_ambiguous)
{
using ColumnMatch = IdentifierSemantic::ColumnMatch;
best_table_pos = 0;
size_t best_table_pos = 0;
auto best_match = ColumnMatch::NoMatch;
size_t same_match = 0;
for (size_t i = 0; i < tables.size(); ++i)
{
auto match = IdentifierSemantic::canReferColumnToTable(identifier, extractTable(tables[i]));
auto match = IdentifierSemantic::canReferColumnToTable(identifier, tables[i]);
if (match != ColumnMatch::NoMatch)
{
if (match > best_match)
@ -54,9 +43,13 @@ IdentifierSemantic::ColumnMatch tryChooseTable(const ASTIdentifier & identifier,
{
if (!allow_ambiguous)
throw Exception("Ambiguous column '" + identifier.name + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME);
return ColumnMatch::Ambiguous;
best_match = ColumnMatch::Ambiguous;
return {};
}
return best_match;
if (best_match != ColumnMatch::NoMatch)
return best_table_pos;
return {};
}
}
@ -125,18 +118,22 @@ std::optional<size_t> IdentifierSemantic::getMembership(const ASTIdentifier & id
return identifier.semantic->membership;
}
bool IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector<DatabaseAndTableWithAlias> & tables,
size_t & best_table_pos, bool ambiguous)
std::optional<size_t> IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector<DatabaseAndTableWithAlias> & tables,
bool ambiguous)
{
static constexpr auto no_match = IdentifierSemantic::ColumnMatch::NoMatch;
return tryChooseTable<DatabaseAndTableWithAlias>(identifier, tables, best_table_pos, ambiguous) != no_match;
return tryChooseTable<DatabaseAndTableWithAlias>(identifier, tables, ambiguous);
}
bool IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector<TableWithColumnNames> & tables,
size_t & best_table_pos, bool ambiguous)
std::optional<size_t> IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector<TableWithColumnNames> & tables,
bool ambiguous)
{
static constexpr auto no_match = IdentifierSemantic::ColumnMatch::NoMatch;
return tryChooseTable<TableWithColumnNames>(identifier, tables, best_table_pos, ambiguous) != no_match;
return tryChooseTable<TableWithColumnNames>(identifier, tables, ambiguous);
}
std::optional<size_t> IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector<TableWithColumnNamesAndTypes> & tables,
bool ambiguous)
{
return tryChooseTable<TableWithColumnNamesAndTypes>(identifier, tables, ambiguous);
}
std::pair<String, String> IdentifierSemantic::extractDatabaseAndTable(const ASTIdentifier & identifier)
@ -198,6 +195,22 @@ IdentifierSemantic::ColumnMatch IdentifierSemantic::canReferColumnToTable(const
return ColumnMatch::NoMatch;
}
IdentifierSemantic::ColumnMatch IdentifierSemantic::canReferColumnToTable(const ASTIdentifier & identifier,
const TableWithColumnNames & db_and_table)
{
/// TODO: ColumnName match logic is disabled cause caller's code is not ready for it
return canReferColumnToTable(identifier, db_and_table.table);
}
IdentifierSemantic::ColumnMatch IdentifierSemantic::canReferColumnToTable(const ASTIdentifier & identifier,
const TableWithColumnNamesAndTypes & db_and_table)
{
ColumnMatch match = canReferColumnToTable(identifier, db_and_table.table);
if (match == ColumnMatch::NoMatch && identifier.isShort() && db_and_table.hasColumn(identifier.shortName()))
match = ColumnMatch::ColumnName;
return match;
}
/// Strip qualificators from left side of column name.
/// Example: 'database.table.name' -> 'name'.
void IdentifierSemantic::setColumnShortName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table)

View File

@ -22,6 +22,7 @@ struct IdentifierSemantic
enum class ColumnMatch
{
NoMatch,
ColumnName, /// column qualified with column names list
AliasedTableName, /// column qualified with table name (but table has an alias so its priority is lower than TableName)
TableName, /// column qualified with table name
DbAndTable, /// column qualified with database and table name
@ -40,6 +41,9 @@ struct IdentifierSemantic
static std::optional<String> extractNestedName(const ASTIdentifier & identifier, const String & table_name);
static ColumnMatch canReferColumnToTable(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table);
static ColumnMatch canReferColumnToTable(const ASTIdentifier & identifier, const TableWithColumnNames & db_and_table);
static ColumnMatch canReferColumnToTable(const ASTIdentifier & identifier, const TableWithColumnNamesAndTypes & db_and_table);
static void setColumnShortName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table);
static void setColumnLongName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table);
static bool canBeAlias(const ASTIdentifier & identifier);
@ -47,10 +51,12 @@ struct IdentifierSemantic
static void coverName(ASTIdentifier &, const String & alias);
static std::optional<ASTIdentifier> uncover(const ASTIdentifier & identifier);
static std::optional<size_t> getMembership(const ASTIdentifier & identifier);
static bool chooseTable(const ASTIdentifier &, const std::vector<DatabaseAndTableWithAlias> & tables, size_t & best_table_pos,
bool ambiguous = false);
static bool chooseTable(const ASTIdentifier &, const std::vector<TableWithColumnNames> & tables, size_t & best_table_pos,
bool ambiguous = false);
static std::optional<size_t> chooseTable(const ASTIdentifier &, const std::vector<DatabaseAndTableWithAlias> & tables,
bool allow_ambiguous = false);
static std::optional<size_t> chooseTable(const ASTIdentifier &, const std::vector<TableWithColumnNames> & tables,
bool allow_ambiguous = false);
static std::optional<size_t> chooseTable(const ASTIdentifier &, const std::vector<TableWithColumnNamesAndTypes> & tables,
bool allow_ambiguous = false);
private:
static bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & database, const String & table);

View File

@ -235,23 +235,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
throw Exception("Too deep subqueries. Maximum: " + settings.max_subquery_depth.toString(),
ErrorCodes::TOO_DEEP_SUBQUERIES);
JoinedTables joined_tables(getSelectQuery());
if (joined_tables.hasJoins())
{
CrossToInnerJoinVisitor::Data cross_to_inner;
CrossToInnerJoinVisitor(cross_to_inner).visit(query_ptr);
JoinToSubqueryTransformVisitor::Data join_to_subs_data{*context};
JoinToSubqueryTransformVisitor(join_to_subs_data).visit(query_ptr);
joined_tables.reset(getSelectQuery());
}
max_streams = settings.max_threads;
ASTSelectQuery & query = getSelectQuery();
const ASTPtr & left_table_expression = joined_tables.leftTableExpression();
bool has_input = input || input_pipe;
if (input)
{
/// Read from prepared input.
@ -262,35 +246,51 @@ InterpreterSelectQuery::InterpreterSelectQuery(
/// Read from prepared input.
source_header = input_pipe->getHeader();
}
else if (joined_tables.isLeftTableSubquery())
{
/// Read from subquery.
interpreter_subquery = std::make_unique<InterpreterSelectWithUnionQuery>(
left_table_expression, getSubqueryContext(*context), options.subquery());
source_header = interpreter_subquery->getSampleBlock();
}
else if (!storage)
{
if (joined_tables.isLeftTableFunction())
{
/// Read from table function. propagate all settings from initSettings(),
/// alternative is to call on current `context`, but that can potentially pollute it.
storage = getSubqueryContext(*context).executeTableFunction(left_table_expression);
}
else
storage = joined_tables.getLeftTableStorage(*context);
}
JoinedTables joined_tables(getSubqueryContext(*context), getSelectQuery());
if (!has_input && !storage)
storage = joined_tables.getLeftTableStorage();
if (storage)
{
table_lock = storage->lockStructureForShare(false, context->getInitialQueryId());
table_id = storage->getStorageID();
joined_tables.resolveTables(getSubqueryContext(*context), storage);
}
else
joined_tables.resolveTables(getSubqueryContext(*context), source_header.getNamesAndTypesList());
if (has_input || !joined_tables.resolveTables())
joined_tables.makeFakeTable(storage, source_header);
/// Rewrite JOINs
if (!has_input && joined_tables.tablesCount() > 1)
{
CrossToInnerJoinVisitor::Data cross_to_inner{joined_tables.tablesWithColumns(), context->getCurrentDatabase()};
CrossToInnerJoinVisitor(cross_to_inner).visit(query_ptr);
JoinToSubqueryTransformVisitor::Data join_to_subs_data{*context};
JoinToSubqueryTransformVisitor(join_to_subs_data).visit(query_ptr);
joined_tables.reset(getSelectQuery());
joined_tables.resolveTables();
if (storage && joined_tables.isLeftTableSubquery())
{
/// Rewritten with subquery. Free storage here locks here.
storage = {};
table_lock.release();
table_id = StorageID::createEmpty();
}
}
if (!has_input)
{
interpreter_subquery = joined_tables.makeLeftTableSubquery(options.subquery());
if (interpreter_subquery)
source_header = interpreter_subquery->getSampleBlock();
}
max_streams = settings.max_threads;
ASTSelectQuery & query = getSelectQuery();
auto analyze = [&] (bool try_move_to_prewhere = true)
{
@ -330,11 +330,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
if (syntax_analyzer_result->rewrite_subqueries)
{
/// remake interpreter_subquery when PredicateOptimizer rewrites subqueries and main table is subquery
if (joined_tables.isLeftTableSubquery())
interpreter_subquery = std::make_unique<InterpreterSelectWithUnionQuery>(
left_table_expression,
getSubqueryContext(*context),
options.subquery());
interpreter_subquery = joined_tables.makeLeftTableSubquery(options.subquery());
}
}

View File

@ -147,9 +147,8 @@ struct ColumnAliasesMatcher
{
bool last_table = false;
{
size_t best_table_pos = 0;
if (IdentifierSemantic::chooseTable(*identifier, tables, best_table_pos))
last_table = (best_table_pos + 1 == tables.size());
if (auto best_table_pos = IdentifierSemantic::chooseTable(*identifier, tables))
last_table = (*best_table_pos + 1 == tables.size());
}
if (!last_table)
@ -207,10 +206,9 @@ struct ColumnAliasesMatcher
bool last_table = false;
String long_name;
size_t table_pos = 0;
if (IdentifierSemantic::chooseTable(node, data.tables, table_pos))
if (auto table_pos = IdentifierSemantic::chooseTable(node, data.tables))
{
auto & table = data.tables[table_pos];
auto & table = data.tables[*table_pos];
IdentifierSemantic::setColumnLongName(node, table); /// table_name.column_name -> table_alias.column_name
long_name = node.name;
if (&table == &data.tables.back())

View File

@ -6,6 +6,7 @@
#include <Storages/StorageValues.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
namespace DB
{
@ -33,8 +34,9 @@ void checkTablesWithColumns(const std::vector<T> & tables_with_columns, const Co
}
JoinedTables::JoinedTables(const ASTSelectQuery & select_query)
: table_expressions(getTableExpressions(select_query))
JoinedTables::JoinedTables(Context && context_, const ASTSelectQuery & select_query)
: context(context_)
, table_expressions(getTableExpressions(select_query))
, left_table_expression(extractTableExpression(select_query, 0))
, left_db_and_table(getDatabaseAndTable(select_query, 0))
{}
@ -49,9 +51,20 @@ bool JoinedTables::isLeftTableFunction() const
return left_table_expression && left_table_expression->as<ASTFunction>();
}
StoragePtr JoinedTables::getLeftTableStorage(Context & context)
std::unique_ptr<InterpreterSelectWithUnionQuery> JoinedTables::makeLeftTableSubquery(const SelectQueryOptions & select_options)
{
StoragePtr storage;
if (!isLeftTableSubquery())
return {};
return std::make_unique<InterpreterSelectWithUnionQuery>(left_table_expression, context, select_options);
}
StoragePtr JoinedTables::getLeftTableStorage()
{
if (isLeftTableSubquery())
return {};
if (isLeftTableFunction())
return context.executeTableFunction(left_table_expression);
if (left_db_and_table)
{
@ -75,42 +88,36 @@ StoragePtr JoinedTables::getLeftTableStorage(Context & context)
if (tmp_table_id.database_name == database_name && tmp_table_id.table_name == table_name)
{
/// Read from view source.
storage = context.getViewSource();
return context.getViewSource();
}
}
if (!storage)
{
/// Read from table. Even without table expression (implicit SELECT ... FROM system.one).
storage = context.getTable(database_name, table_name);
}
return storage;
/// Read from table. Even without table expression (implicit SELECT ... FROM system.one).
return context.getTable(database_name, table_name);
}
void JoinedTables::resolveTables(const Context & context, StoragePtr storage)
bool JoinedTables::resolveTables()
{
tables_with_columns = getDatabaseAndTablesWithColumns(table_expressions, context);
checkTablesWithColumns(tables_with_columns, context);
if (tables_with_columns.empty())
return !tables_with_columns.empty();
}
void JoinedTables::makeFakeTable(StoragePtr storage, const Block & source_header)
{
if (storage)
{
const ColumnsDescription & storage_columns = storage->getColumns();
tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, storage_columns.getOrdinary());
auto & table = tables_with_columns.back();
table.addHiddenColumns(storage_columns.getMaterialized());
table.addHiddenColumns(storage_columns.getAliases());
table.addHiddenColumns(storage_columns.getVirtuals());
}
}
void JoinedTables::resolveTables(const Context & context, const NamesAndTypesList & source_columns)
{
tables_with_columns = getDatabaseAndTablesWithColumns(table_expressions, context);
checkTablesWithColumns(tables_with_columns, context);
if (tables_with_columns.empty())
tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, source_columns);
else
tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, source_header.getNamesAndTypesList());
}
}

View File

@ -2,6 +2,7 @@
#include <Core/NamesAndTypes.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
#include <Storages/IStorage_fwd.h>
namespace DB
@ -9,6 +10,7 @@ namespace DB
class ASTSelectQuery;
class Context;
struct SelectQueryOptions;
/// Joined tables' columns resolver.
/// We want to get each table structure at most once per table occurance. Or even better once per table.
@ -16,32 +18,30 @@ class Context;
class JoinedTables
{
public:
JoinedTables() = default;
JoinedTables(const ASTSelectQuery & select_query);
JoinedTables(Context && contex, const ASTSelectQuery & select_query);
void reset(const ASTSelectQuery & select_query)
{
*this = JoinedTables(select_query);
*this = JoinedTables(std::move(context), select_query);
}
StoragePtr getLeftTableStorage(Context & context);
/// Resolve columns or get from storage. It assumes storage is not nullptr.
void resolveTables(const Context & context, StoragePtr storage);
/// Resolve columns or get from source list.
void resolveTables(const Context & context, const NamesAndTypesList & source_columns);
StoragePtr getLeftTableStorage();
bool resolveTables();
void makeFakeTable(StoragePtr storage, const Block & source_header);
const std::vector<TableWithColumnNamesAndTypes> & tablesWithColumns() const { return tables_with_columns; }
bool isLeftTableSubquery() const;
bool isLeftTableFunction() const;
bool hasJoins() const { return table_expressions.size() > 1; }
size_t tablesCount() const { return table_expressions.size(); }
const ASTPtr & leftTableExpression() const { return left_table_expression; }
const String & leftTableDatabase() const { return database_name; }
const String & leftTableName() const { return table_name; }
std::unique_ptr<InterpreterSelectWithUnionQuery> makeLeftTableSubquery(const SelectQueryOptions & select_options);
private:
Context context;
std::vector<const ASTTableExpression *> table_expressions;
std::vector<TableWithColumnNamesAndTypes> tables_with_columns;

View File

@ -30,7 +30,7 @@ static String wrongAliasMessage(const ASTPtr & ast, const ASTPtr & prev_ast, con
}
bool QueryAliasesMatcher::needChildVisit(ASTPtr & node, const ASTPtr &)
bool QueryAliasesMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &)
{
/// Don't descent into table functions and subqueries and special case for ArrayJoin.
if (node->as<ASTTableExpression>() || node->as<ASTSelectWithUnionQuery>() || node->as<ASTArrayJoin>())
@ -38,7 +38,7 @@ bool QueryAliasesMatcher::needChildVisit(ASTPtr & node, const ASTPtr &)
return true;
}
void QueryAliasesMatcher::visit(ASTPtr & ast, Data & data)
void QueryAliasesMatcher::visit(const ASTPtr & ast, Data & data)
{
if (auto * s = ast->as<ASTSubquery>())
visit(*s, ast, data);
@ -81,8 +81,9 @@ void QueryAliasesMatcher::visit(const ASTArrayJoin &, const ASTPtr & ast, Data &
/// set unique aliases for all subqueries. this is needed, because:
/// 1) content of subqueries could change after recursive analysis, and auto-generated column names could become incorrect
/// 2) result of different scalar subqueries can be cached inside expressions compilation cache and must have different names
void QueryAliasesMatcher::visit(ASTSubquery & subquery, const ASTPtr & ast, Data & data)
void QueryAliasesMatcher::visit(const ASTSubquery & const_subquery, const ASTPtr & ast, Data & data)
{
ASTSubquery & subquery = const_cast<ASTSubquery &>(const_subquery);
Aliases & aliases = data.aliases;
static std::atomic_uint64_t subquery_index = 0;

View File

@ -15,19 +15,19 @@ struct ASTArrayJoin;
class QueryAliasesMatcher
{
public:
using Visitor = InDepthNodeVisitor<QueryAliasesMatcher, false>;
using Visitor = ConstInDepthNodeVisitor<QueryAliasesMatcher, false>;
struct Data
{
Aliases & aliases;
};
static void visit(ASTPtr & ast, Data & data);
static bool needChildVisit(ASTPtr & node, const ASTPtr & child);
static void visit(const ASTPtr & ast, Data & data);
static bool needChildVisit(const ASTPtr & node, const ASTPtr & child);
private:
static void visit(const ASTSelectQuery & select, const ASTPtr & ast, Data & data);
static void visit(ASTSubquery & subquery, const ASTPtr & ast, Data & data);
static void visit(const ASTSubquery & subquery, const ASTPtr & ast, Data & data);
static void visit(const ASTArrayJoin &, const ASTPtr & ast, Data & data);
static void visitOther(const ASTPtr & ast, Data & data);
};

View File

@ -93,10 +93,10 @@ void TranslateQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr &,
if (IdentifierSemantic::getColumnName(identifier))
{
String short_name = identifier.shortName();
size_t table_pos = 0;
bool allow_ambiguous = data.join_using_columns.count(short_name);
if (IdentifierSemantic::chooseTable(identifier, data.tables, table_pos, allow_ambiguous))
if (auto best_pos = IdentifierSemantic::chooseTable(identifier, data.tables, allow_ambiguous))
{
size_t table_pos = *best_pos;
if (data.unknownColumn(table_pos, identifier))
{
String table_name = data.tables[table_pos].table.getQualifiedNamePrefix(false);

View File

@ -161,7 +161,7 @@ bool MergeTreeIndexConditionBloomFilter::mayBeTrueOnGranule(const MergeTreeIndex
match_rows = maybeTrueOnBloomFilter(&*hash_column, filter, hash_functions);
}
rpn_stack.emplace_back(match_rows, !match_rows);
rpn_stack.emplace_back(match_rows, true);
if (element.function == RPNElement::FUNCTION_NOT_EQUALS || element.function == RPNElement::FUNCTION_NOT_IN)
rpn_stack.back() = !rpn_stack.back();
}

View File

@ -1,491 +0,0 @@
#include <Storages/IStorage.h>
#include <Storages/ColumnsDescription.h>
#include <Storages/StorageGenerate.h>
#include <Storages/StorageFactory.h>
#include <DataStreams/OneBlockInputStream.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <Processors/Pipe.h>
#include <Parsers/ASTLiteral.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeEnum.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypeDecimalBase.h>
#include <DataTypes/DataTypeArray.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnTuple.h>
#include <Common/SipHash.h>
#include <Common/randomSeed.h>
#include <pcg_random.hpp>
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
extern const int LOGICAL_ERROR;
extern const int BAD_TYPE_OF_FIELD;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
void fillColumnWithRandomData(IColumn & column, const DataTypePtr type, UInt64 limit,
UInt64 max_array_length, UInt64 max_string_length, pcg32 & generator, pcg64_fast & generator64)
{
TypeIndex idx = type->getTypeId();
switch (idx)
{
case TypeIndex::Nothing:
throw Exception("Random Generator not implemented for type 'Nothing'.", ErrorCodes::NOT_IMPLEMENTED);
case TypeIndex::UInt8:
{
auto & data = typeid_cast<ColumnVector<UInt8> &>(column).getData();
data.resize(limit);
for (UInt64 i = 0; i < limit; ++i)
{
data[i] = static_cast<UInt8>(generator());
}
break;
}
case TypeIndex::UInt16:
{
auto & data = typeid_cast<ColumnVector<UInt16> &>(column).getData();
data.resize(limit);
for (UInt64 i = 0; i < limit; ++i)
{
data[i] = static_cast<UInt16>(generator());
}
break;
}
case TypeIndex::UInt32:
{
auto & data = typeid_cast<ColumnVector<UInt32> &>(column).getData();
data.resize(limit);
for (UInt64 i = 0; i < limit; ++i)
{
data[i] = static_cast<UInt32>(generator());
}
break;
}
case TypeIndex::UInt64:
{
auto & data = typeid_cast<ColumnVector<UInt64> &>(column).getData();
data.resize(limit);
for (UInt64 i = 0; i < limit; ++i)
{
UInt64 a = static_cast<UInt64>(generator64());
data[i] = static_cast<UInt64>(a);
}
break;
}
case TypeIndex::UInt128:
throw Exception("There is no DataType 'UInt128' support.", ErrorCodes::NOT_IMPLEMENTED);
case TypeIndex::Int8:
{
auto & data = typeid_cast<ColumnVector<Int8> &>(column).getData();
data.resize(limit);
for (UInt64 i = 0; i < limit; ++i)
{
data[i] = static_cast<Int8>(generator());
}
break;
}
case TypeIndex::Int16:
{
auto & data = typeid_cast<ColumnVector<Int16> &>(column).getData();
data.resize(limit);
for (UInt64 i = 0; i < limit; ++i)
{
data[i] = static_cast<Int16>(generator());
}
break;
}
case TypeIndex::Int32:
{
auto & data = typeid_cast<ColumnVector<Int32> &>(column).getData();
data.resize(limit);
for (UInt64 i = 0; i < limit; ++i)
{
data[i] = static_cast<Int32>(generator());
}
break;
}
case TypeIndex::Int64:
{
auto & data = typeid_cast<ColumnVector<Int64> &>(column).getData();
data.resize(limit);
for (UInt64 i = 0; i < limit; ++i)
{
data[i] = static_cast<Int64>(generator64());
}
break;
}
case TypeIndex::Int128:
throw Exception("There is no DataType 'Int128' support.", ErrorCodes::NOT_IMPLEMENTED);
case TypeIndex::Float32:
{
auto & data = typeid_cast<ColumnVector<Float32> &>(column).getData();
data.resize(limit);
double d = 1.0;
for (UInt64 i = 0; i < limit; ++i)
{
d = std::numeric_limits<float>::max();
data[i] = (d / pcg32::max()) * generator();
}
break;
}
case TypeIndex::Float64:
{
auto & data = typeid_cast<ColumnVector<Float64> &>(column).getData();
data.resize(limit);
double d = 1.0;
for (UInt64 i = 0; i < limit; ++i)
{
d = std::numeric_limits<double>::max();
data[i] = (d / pcg64::max()) * generator64();
}
break;
}
case TypeIndex::Date:
{
auto & data = typeid_cast<ColumnVector<UInt16> &>(column).getData();
data.resize(limit);
for (UInt64 i = 0; i < limit; ++i)
{
data[i] = static_cast<UInt16>(generator());
}
break;
}
case TypeIndex::DateTime:
{
auto & data = typeid_cast<ColumnVector<UInt32> &>(column).getData();
data.resize(limit);
for (UInt64 i = 0; i < limit; ++i)
{
data[i] = static_cast<UInt32>(generator());
}
break;
}
case TypeIndex::DateTime64:
{
UInt32 scale;
if (auto * ptype = typeid_cast<const DataTypeDateTime64 *>(type.get()))
scale = ptype->getScale();
else
throw Exception("Static cast to DataTypeDateTime64 failed ", ErrorCodes::BAD_TYPE_OF_FIELD);
auto & data = typeid_cast<ColumnDecimal<Decimal64> &>(column).getData();
data.resize(limit);
for (UInt64 i = 0; i < limit; ++i)
{
UInt32 fractional = static_cast<UInt32>(generator()) % intExp10(scale);
UInt32 whole = static_cast<UInt32>(generator());
DateTime64 dt = DecimalUtils::decimalFromComponents<DateTime64>(whole, fractional, scale);
data[i] = dt;
}
break;
}
case TypeIndex::String:
{
auto & column_string = typeid_cast<ColumnString &>(column);
auto & offsets = column_string.getOffsets();
auto & chars = column_string.getChars();
UInt64 offset = 0;
{
offsets.resize(limit);
for (UInt64 i = 0; i < limit; ++i)
{
offset += 1 + static_cast<UInt64>(generator()) % max_string_length;
offsets[i] = offset;
}
chars.resize(offset);
for (UInt64 i = 0; i < offset; ++i)
{
if (offset - i > 5)
{
UInt32 r = generator();
chars[i] = 32 + (r & 0x7F) % 95;
chars[i + 1] = 32 + ((r >> 7) & 0x7F) % 95;
chars[i + 2] = 32 + ((r >> 14) & 0x7F) % 95;
chars[i + 3] = 32 + ((r >> 21) & 0x7F) % 95;
chars[i + 4] = 32 + (r >> 28);
i += 4;
}
else
{
UInt32 r = generator();
chars[i] = 32 + (r % 95);
}
}
// add terminating zero char
for (auto & i : offsets)
{
chars[i - 1] = 0;
}
}
break;
}
case TypeIndex::FixedString:
{
auto & column_string = typeid_cast<ColumnFixedString &>(column);
const size_t len = column_string.sizeOfValueIfFixed();
auto & chars = column_string.getChars();
UInt64 num_chars = static_cast<UInt64>(len) * limit;
{
chars.resize(num_chars);
for (UInt64 i = 0; i < num_chars; ++i)
{
chars[i] = static_cast<UInt8>(generator());
}
}
break;
}
case TypeIndex::Enum8:
{
auto values = typeid_cast<const DataTypeEnum<Int8> *>(type.get())->getValues();
auto & data = typeid_cast<ColumnVector<Int8> &>(column).getData();
data.resize(limit);
UInt8 size = values.size();
UInt8 off;
for (UInt64 i = 0; i < limit; ++i)
{
off = static_cast<UInt8>(generator()) % size;
data[i] = values[off].second;
}
break;
}
case TypeIndex::Enum16:
{
auto values = typeid_cast<const DataTypeEnum<Int16> *>(type.get())->getValues();
auto & data = typeid_cast<ColumnVector<Int16> &>(column).getData();
data.resize(limit);
UInt16 size = values.size();
UInt8 off;
for (UInt64 i = 0; i < limit; ++i)
{
off = static_cast<UInt16>(generator()) % size;
data[i] = values[off].second;
}
break;
}
case TypeIndex::Decimal32:
{
auto & data = typeid_cast<ColumnDecimal<Decimal32> &>(column).getData();
data.resize(limit);
for (UInt64 i = 0; i < limit; ++i)
{
data[i] = static_cast<Int32>(generator());
}
break;
}
case TypeIndex::Decimal64:
{
auto & data = typeid_cast<ColumnDecimal<Decimal64> &>(column).getData();
data.resize(limit);
for (UInt64 i = 0; i < limit; ++i)
{
UInt64 a = static_cast<UInt64>(generator()) << 32 | static_cast<UInt64>(generator());
data[i] = a;
}
break;
}
case TypeIndex::Decimal128:
{
auto & data = typeid_cast<ColumnDecimal<Decimal128> &>(column).getData();
data.resize(limit);
for (UInt64 i = 0; i < limit; ++i)
{
Int128 x = static_cast<Int128>(generator64()) << 64 | static_cast<Int128>(generator64());
data[i] = x;
}
break;
}
case TypeIndex::UUID:
{
auto & data = typeid_cast<ColumnVector<UInt128> &>(column).getData();
data.resize(limit);
for (UInt64 i = 0; i < limit; ++i)
{
UInt64 a = static_cast<UInt64>(generator64());
UInt64 b = static_cast<UInt64>(generator64());
auto x = UInt128(a, b);
data[i] = x;
}
break;
}
case TypeIndex::Array:
{
auto & column_array = typeid_cast<ColumnArray &>(column);
auto nested_type = typeid_cast<const DataTypeArray *>(type.get())->getNestedType();
auto & offsets = column_array.getOffsets();
IColumn & data = column_array.getData();
UInt64 offset = 0;
{
offsets.resize(limit);
for (UInt64 i = 0; i < limit; ++i)
{
offset += static_cast<UInt64>(generator()) % max_array_length;
offsets[i] = offset;
}
}
fillColumnWithRandomData(data, nested_type, offset, max_array_length, max_string_length, generator, generator64);
break;
}
case TypeIndex::Tuple:
{
auto &column_tuple = typeid_cast<ColumnTuple &>(column);
auto elements = typeid_cast<const DataTypeTuple *>(type.get())->getElements();
for (size_t i = 0; i < column_tuple.tupleSize(); ++i)
{
fillColumnWithRandomData(column_tuple.getColumn(i), elements[i], limit, max_array_length, max_string_length, generator, generator64);
}
break;
}
case TypeIndex::Set:
throw Exception("Type 'Set' can not be stored in a table.", ErrorCodes::LOGICAL_ERROR);
case TypeIndex::Interval:
throw Exception("Type 'Interval' can not be stored in a table.", ErrorCodes::LOGICAL_ERROR);
case TypeIndex::Nullable:
{
auto & column_nullable = typeid_cast<ColumnNullable &>(column);
auto nested_type = typeid_cast<const DataTypeNullable *>(type.get())->getNestedType();
auto & null_map = column_nullable.getNullMapData();
IColumn & nested_column = column_nullable.getNestedColumn();
fillColumnWithRandomData(nested_column, nested_type, limit, max_array_length, max_string_length, generator, generator64);
null_map.resize(limit);
for (UInt64 i = 0; i < limit; ++i)
{
null_map[i] = generator() < 1024; /// No real motivation for this.
}
break;
}
case TypeIndex::Function:
throw Exception("Type 'Function' can not be stored in a table.", ErrorCodes::LOGICAL_ERROR);
case TypeIndex::AggregateFunction:
throw Exception("Random Generator not implemented for type 'AggregateFunction'.", ErrorCodes::NOT_IMPLEMENTED);
case TypeIndex::LowCardinality:
throw Exception("Random Generator not implemented for type 'LowCardinality'.", ErrorCodes::NOT_IMPLEMENTED);
}
}
StorageGenerate::StorageGenerate(const StorageID & table_id_, const ColumnsDescription & columns_,
UInt64 max_array_length_, UInt64 max_string_length_, UInt64 random_seed_)
: IStorage(table_id_), max_array_length(max_array_length_), max_string_length(max_string_length_)
{
random_seed = random_seed_ ? random_seed_ : randomSeed();
setColumns(columns_);
}
class GenerateSource : public SourceWithProgress
{
public:
GenerateSource(UInt64 block_size_, UInt64 max_array_length_, UInt64 max_string_length_, UInt64 random_seed_, Block block_header_)
: SourceWithProgress(block_header_), block_size(block_size_), max_array_length(max_array_length_), max_string_length(max_string_length_)
, block_header(block_header_), r32(random_seed_), r64(random_seed_) {}
String getName() const override { return "Generate"; }
protected:
Chunk generate() override
{
auto columns = block_header.cloneEmptyColumns();
DataTypes types = block_header.getDataTypes();
auto cur_type = types.cbegin();
for (auto & col : columns)
{
fillColumnWithRandomData(col->assumeMutableRef(), *cur_type, block_size, max_array_length, max_string_length, r32, r64);
++cur_type;
}
return {std::move(columns), block_size};
}
private:
UInt64 block_size;
UInt64 max_array_length;
UInt64 max_string_length;
Block block_header;
pcg32 r32;
pcg64_fast r64;
};
void registerStorageGenerate(StorageFactory & factory)
{
factory.registerStorage("Generate", [](const StorageFactory::Arguments & args)
{
ASTs & engine_args = args.engine_args;
if (engine_args.size() > 3)
throw Exception("Storage Generate requires at most three arguments: "\
"max_array_length, max_string_length, random_seed.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
UInt64 max_array_length_ = 10;
UInt64 max_string_length_ = 10;
UInt64 random_seed_ = 0; // zero for random
/// Parsing second argument if present
if (engine_args.size() >= 1)
max_array_length_ = engine_args[0]->as<ASTLiteral &>().value.safeGet<UInt64>();
if (engine_args.size() >= 2)
max_string_length_ = engine_args[1]->as<ASTLiteral &>().value.safeGet<UInt64>();
if (engine_args.size() == 3)
random_seed_ = engine_args[2]->as<ASTLiteral &>().value.safeGet<UInt64>();
return StorageGenerate::create(args.table_id, args.columns, max_array_length_, max_string_length_, random_seed_);
});
}
Pipes StorageGenerate::read(
const Names & column_names,
const SelectQueryInfo & /*query_info*/,
const Context & /*context*/,
QueryProcessingStage::Enum /*processed_stage*/,
size_t max_block_size,
unsigned num_streams)
{
check(column_names, true);
Pipes pipes;
pipes.reserve(num_streams);
const ColumnsDescription & columns_ = getColumns();
Block block_header;
for (const auto & name : column_names)
{
const auto & name_type = columns_.get(name);
MutableColumnPtr column = name_type.type->createColumn();
block_header.insert({std::move(column), name_type.type, name_type.name});
}
pcg32 generate(random_seed);
for (UInt64 i = 0; i < num_streams; ++i)
{
pipes.emplace_back(std::make_shared<GenerateSource>(max_block_size, max_array_length, max_string_length, generate(), block_header));
}
return pipes;
}
}

View File

@ -0,0 +1,437 @@
#include <Storages/IStorage.h>
#include <Storages/ColumnsDescription.h>
#include <Storages/StorageGenerateRandom.h>
#include <Storages/StorageFactory.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <Processors/Pipe.h>
#include <Parsers/ASTLiteral.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeEnum.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypeDecimalBase.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeFixedString.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnTuple.h>
#include <Common/SipHash.h>
#include <Common/randomSeed.h>
#include <common/unaligned.h>
#include <Functions/FunctionFactory.h>
#include <pcg_random.hpp>
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
namespace
{
void fillBufferWithRandomData(char * __restrict data, size_t size, pcg64 & rng)
{
char * __restrict end = data + size;
while (data < end)
{
/// The loop can be further optimized.
UInt64 number = rng();
unalignedStore<UInt64>(data, number);
data += sizeof(UInt64); /// We assume that data has at least 7-byte padding (see PaddedPODArray)
}
}
ColumnPtr fillColumnWithRandomData(
const DataTypePtr type, UInt64 limit, UInt64 max_array_length, UInt64 max_string_length, pcg64 & rng, const Context & context)
{
TypeIndex idx = type->getTypeId();
switch (idx)
{
case TypeIndex::String:
{
/// Mostly the same as the implementation of randomPrintableASCII function.
auto column = ColumnString::create();
ColumnString::Chars & data_to = column->getChars();
ColumnString::Offsets & offsets_to = column->getOffsets();
offsets_to.resize(limit);
IColumn::Offset offset = 0;
for (size_t row_num = 0; row_num < limit; ++row_num)
{
size_t length = rng() % (max_string_length + 1); /// Slow
IColumn::Offset next_offset = offset + length + 1;
data_to.resize(next_offset);
offsets_to[row_num] = next_offset;
auto * data_to_ptr = data_to.data(); /// avoid assert on array indexing after end
for (size_t pos = offset, end = offset + length; pos < end; pos += 4) /// We have padding in column buffers that we can overwrite.
{
UInt64 rand = rng();
UInt16 rand1 = rand;
UInt16 rand2 = rand >> 16;
UInt16 rand3 = rand >> 32;
UInt16 rand4 = rand >> 48;
/// Printable characters are from range [32; 126].
/// https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
data_to_ptr[pos + 0] = 32 + ((rand1 * 95) >> 16);
data_to_ptr[pos + 1] = 32 + ((rand2 * 95) >> 16);
data_to_ptr[pos + 2] = 32 + ((rand3 * 95) >> 16);
data_to_ptr[pos + 3] = 32 + ((rand4 * 95) >> 16);
/// NOTE gcc failed to vectorize this code (aliasing of char?)
/// TODO Implement SIMD optimizations from Danila Kutenin.
}
data_to[offset + length] = 0;
offset = next_offset;
}
return column;
}
case TypeIndex::Enum8:
{
auto column = ColumnVector<Int8>::create();
auto values = typeid_cast<const DataTypeEnum<Int8> *>(type.get())->getValues();
auto & data = column->getData();
data.resize(limit);
UInt8 size = values.size();
UInt8 off;
for (UInt64 i = 0; i < limit; ++i)
{
off = static_cast<UInt8>(rng()) % size;
data[i] = values[off].second;
}
return column;
}
case TypeIndex::Enum16:
{
auto column = ColumnVector<Int16>::create();
auto values = typeid_cast<const DataTypeEnum<Int16> *>(type.get())->getValues();
auto & data = column->getData();
data.resize(limit);
UInt16 size = values.size();
UInt8 off;
for (UInt64 i = 0; i < limit; ++i)
{
off = static_cast<UInt16>(rng()) % size;
data[i] = values[off].second;
}
return column;
}
case TypeIndex::Array:
{
auto nested_type = typeid_cast<const DataTypeArray *>(type.get())->getNestedType();
auto offsets_column = ColumnVector<ColumnArray::Offset>::create();
auto & offsets = offsets_column->getData();
UInt64 offset = 0;
offsets.resize(limit);
for (UInt64 i = 0; i < limit; ++i)
{
offset += static_cast<UInt64>(rng()) % (max_array_length + 1);
offsets[i] = offset;
}
auto data_column = fillColumnWithRandomData(nested_type, offset, max_array_length, max_string_length, rng, context);
return ColumnArray::create(std::move(data_column), std::move(offsets_column));
}
case TypeIndex::Tuple:
{
auto elements = typeid_cast<const DataTypeTuple *>(type.get())->getElements();
const size_t tuple_size = elements.size();
Columns tuple_columns(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
tuple_columns[i] = fillColumnWithRandomData(elements[i], limit, max_array_length, max_string_length, rng, context);
return ColumnTuple::create(std::move(tuple_columns));
}
case TypeIndex::Nullable:
{
auto nested_type = typeid_cast<const DataTypeNullable *>(type.get())->getNestedType();
auto nested_column = fillColumnWithRandomData(nested_type, limit, max_array_length, max_string_length, rng, context);
auto null_map_column = ColumnUInt8::create();
auto & null_map = null_map_column->getData();
null_map.resize(limit);
for (UInt64 i = 0; i < limit; ++i)
null_map[i] = rng() % 16 == 0; /// No real motivation for this.
return ColumnNullable::create(std::move(nested_column), std::move(null_map_column));
}
case TypeIndex::UInt8:
{
auto column = ColumnUInt8::create();
column->getData().resize(limit);
fillBufferWithRandomData(reinterpret_cast<char *>(column->getData().data()), limit * sizeof(UInt8), rng);
return column;
}
case TypeIndex::UInt16: [[fallthrough]];
case TypeIndex::Date:
{
auto column = ColumnUInt16::create();
column->getData().resize(limit);
fillBufferWithRandomData(reinterpret_cast<char *>(column->getData().data()), limit * sizeof(UInt16), rng);
return column;
}
case TypeIndex::UInt32: [[fallthrough]];
case TypeIndex::DateTime:
{
auto column = ColumnUInt32::create();
column->getData().resize(limit);
fillBufferWithRandomData(reinterpret_cast<char *>(column->getData().data()), limit * sizeof(UInt32), rng);
return column;
}
case TypeIndex::UInt64:
{
auto column = ColumnUInt64::create();
column->getData().resize(limit);
fillBufferWithRandomData(reinterpret_cast<char *>(column->getData().data()), limit * sizeof(UInt64), rng);
return column;
}
case TypeIndex::UInt128: [[fallthrough]];
case TypeIndex::UUID:
{
auto column = ColumnUInt128::create();
column->getData().resize(limit);
fillBufferWithRandomData(reinterpret_cast<char *>(column->getData().data()), limit * sizeof(UInt128), rng);
return column;
}
case TypeIndex::Int8:
{
auto column = ColumnInt8::create();
column->getData().resize(limit);
fillBufferWithRandomData(reinterpret_cast<char *>(column->getData().data()), limit * sizeof(Int8), rng);
return column;
}
case TypeIndex::Int16:
{
auto column = ColumnInt16::create();
column->getData().resize(limit);
fillBufferWithRandomData(reinterpret_cast<char *>(column->getData().data()), limit * sizeof(Int16), rng);
return column;
}
case TypeIndex::Int32:
{
auto column = ColumnInt32::create();
column->getData().resize(limit);
fillBufferWithRandomData(reinterpret_cast<char *>(column->getData().data()), limit * sizeof(Int32), rng);
return column;
}
case TypeIndex::Int64:
{
auto column = ColumnInt64::create();
column->getData().resize(limit);
fillBufferWithRandomData(reinterpret_cast<char *>(column->getData().data()), limit * sizeof(Int64), rng);
return column;
}
case TypeIndex::Float32:
{
auto column = ColumnFloat32::create();
column->getData().resize(limit);
fillBufferWithRandomData(reinterpret_cast<char *>(column->getData().data()), limit * sizeof(Float32), rng);
return column;
}
case TypeIndex::Float64:
{
auto column = ColumnFloat64::create();
column->getData().resize(limit);
fillBufferWithRandomData(reinterpret_cast<char *>(column->getData().data()), limit * sizeof(Float64), rng);
return column;
}
case TypeIndex::Decimal32:
{
auto column = type->createColumn();
auto & column_concrete = typeid_cast<ColumnDecimal<Decimal32> &>(*column);
column_concrete.getData().resize(limit);
fillBufferWithRandomData(reinterpret_cast<char *>(column_concrete.getData().data()), limit * sizeof(Decimal32), rng);
return column;
}
case TypeIndex::Decimal64: /// TODO Decimal may be generated out of range.
{
auto column = type->createColumn();
auto & column_concrete = typeid_cast<ColumnDecimal<Decimal64> &>(*column);
column_concrete.getData().resize(limit);
fillBufferWithRandomData(reinterpret_cast<char *>(column_concrete.getData().data()), limit * sizeof(Decimal64), rng);
return column;
}
case TypeIndex::Decimal128:
{
auto column = type->createColumn();
auto & column_concrete = typeid_cast<ColumnDecimal<Decimal128> &>(*column);
column_concrete.getData().resize(limit);
fillBufferWithRandomData(reinterpret_cast<char *>(column_concrete.getData().data()), limit * sizeof(Decimal128), rng);
return column;
}
case TypeIndex::FixedString:
{
size_t n = typeid_cast<const DataTypeFixedString &>(*type).getN();
auto column = ColumnFixedString::create(n);
column->getChars().resize(limit * n);
fillBufferWithRandomData(reinterpret_cast<char *>(column->getChars().data()), limit * n, rng);
return column;
}
case TypeIndex::DateTime64:
{
auto column = type->createColumn();
auto & column_concrete = typeid_cast<ColumnDecimal<Decimal64> &>(*column);
column_concrete.getData().resize(limit);
UInt64 range = (1ULL << 32) * intExp10(typeid_cast<const DataTypeDateTime64 &>(*type).getScale());
for (size_t i = 0; i < limit; ++i)
column_concrete.getData()[i] = rng() % range; /// Slow
return column;
}
default:
throw Exception("The 'GenerateRandom' is not implemented for type " + type->getName(), ErrorCodes::NOT_IMPLEMENTED);
}
}
class GenerateSource : public SourceWithProgress
{
public:
GenerateSource(UInt64 block_size_, UInt64 max_array_length_, UInt64 max_string_length_, UInt64 random_seed_, Block block_header_, const Context & context_)
: SourceWithProgress(block_header_), block_size(block_size_), max_array_length(max_array_length_), max_string_length(max_string_length_)
, block_header(block_header_), rng(random_seed_), context(context_) {}
String getName() const override { return "GenerateRandom"; }
protected:
Chunk generate() override
{
Columns columns;
columns.reserve(block_header.columns());
DataTypes types = block_header.getDataTypes();
for (const auto & type : types)
columns.emplace_back(fillColumnWithRandomData(type, block_size, max_array_length, max_string_length, rng, context));
return {std::move(columns), block_size};
}
private:
UInt64 block_size;
UInt64 max_array_length;
UInt64 max_string_length;
Block block_header;
pcg64 rng;
const Context & context;
};
}
StorageGenerateRandom::StorageGenerateRandom(const StorageID & table_id_, const ColumnsDescription & columns_,
UInt64 max_array_length_, UInt64 max_string_length_, std::optional<UInt64> random_seed_)
: IStorage(table_id_), max_array_length(max_array_length_), max_string_length(max_string_length_)
{
random_seed = random_seed_ ? sipHash64(*random_seed_) : randomSeed();
setColumns(columns_);
}
void registerStorageGenerateRandom(StorageFactory & factory)
{
factory.registerStorage("GenerateRandom", [](const StorageFactory::Arguments & args)
{
ASTs & engine_args = args.engine_args;
if (engine_args.size() > 3)
throw Exception("Storage GenerateRandom requires at most three arguments: "
"random_seed, max_string_length, max_array_length.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
std::optional<UInt64> random_seed;
UInt64 max_string_length = 10;
UInt64 max_array_length = 10;
if (engine_args.size() >= 1)
{
const Field & value = engine_args[0]->as<const ASTLiteral &>().value;
if (!value.isNull())
random_seed = value.safeGet<UInt64>();
}
if (engine_args.size() >= 2)
max_string_length = engine_args[1]->as<const ASTLiteral &>().value.safeGet<UInt64>();
if (engine_args.size() == 3)
max_array_length = engine_args[2]->as<const ASTLiteral &>().value.safeGet<UInt64>();
return StorageGenerateRandom::create(args.table_id, args.columns, max_array_length, max_string_length, random_seed);
});
}
Pipes StorageGenerateRandom::read(
const Names & column_names,
const SelectQueryInfo & /*query_info*/,
const Context & context,
QueryProcessingStage::Enum /*processed_stage*/,
size_t max_block_size,
unsigned num_streams)
{
check(column_names, true);
Pipes pipes;
pipes.reserve(num_streams);
const ColumnsDescription & columns_ = getColumns();
Block block_header;
for (const auto & name : column_names)
{
const auto & name_type = columns_.get(name);
MutableColumnPtr column = name_type.type->createColumn();
block_header.insert({std::move(column), name_type.type, name_type.name});
}
/// Will create more seed values for each source from initial seed.
pcg64 generate(random_seed);
for (UInt64 i = 0; i < num_streams; ++i)
pipes.emplace_back(std::make_shared<GenerateSource>(max_block_size, max_array_length, max_string_length, generate(), block_header, context));
return pipes;
}
}

View File

@ -1,5 +1,6 @@
#pragma once
#include <optional>
#include <ext/shared_ptr_helper.h>
#include <Storages/IStorage.h>
@ -8,11 +9,11 @@ namespace DB
{
/* Generates random data for given schema.
*/
class StorageGenerate : public ext::shared_ptr_helper<StorageGenerate>, public IStorage
class StorageGenerateRandom : public ext::shared_ptr_helper<StorageGenerateRandom>, public IStorage
{
friend struct ext::shared_ptr_helper<StorageGenerate>;
friend struct ext::shared_ptr_helper<StorageGenerateRandom>;
public:
std::string getName() const override { return "Generate"; }
std::string getName() const override { return "GenerateRandom"; }
Pipes read(
const Names & column_names,
@ -28,8 +29,8 @@ private:
UInt64 random_seed = 0;
protected:
StorageGenerate(const StorageID & table_id_, const ColumnsDescription & columns_,
UInt64 max_array_length, UInt64 max_string_length, UInt64 random_seed);
StorageGenerateRandom(const StorageID & table_id_, const ColumnsDescription & columns_,
UInt64 max_array_length, UInt64 max_string_length, std::optional<UInt64> random_seed);
};
}

View File

@ -29,7 +29,7 @@ void registerStorages()
registerStorageView(factory);
registerStorageMaterializedView(factory);
registerStorageLiveView(factory);
registerStorageGenerate(factory);
registerStorageGenerateRandom(factory);
#if USE_AWS_S3
registerStorageS3(factory);

View File

@ -23,7 +23,7 @@ void registerStorageJoin(StorageFactory & factory);
void registerStorageView(StorageFactory & factory);
void registerStorageMaterializedView(StorageFactory & factory);
void registerStorageLiveView(StorageFactory & factory);
void registerStorageGenerate(StorageFactory & factory);
void registerStorageGenerateRandom(StorageFactory & factory);
#if USE_AWS_S3
void registerStorageS3(StorageFactory & factory);

View File

@ -2,7 +2,7 @@
#include <Common/Exception.h>
#include <Core/Block.h>
#include <Storages/StorageGenerate.h>
#include <Storages/StorageGenerateRandom.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTLiteral.h>
@ -10,7 +10,7 @@
#include <TableFunctions/ITableFunction.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <TableFunctions/TableFunctionGenerate.h>
#include <TableFunctions/TableFunctionGenerateRandom.h>
#include <TableFunctions/parseColumnsListForTableFunction.h>
#include "registerTableFunctions.h"
@ -25,7 +25,7 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
StoragePtr TableFunctionGenerate::executeImpl(const ASTPtr & ast_function, const Context & context, const std::string & table_name) const
StoragePtr TableFunctionGenerateRandom::executeImpl(const ASTPtr & ast_function, const Context & context, const std::string & table_name) const
{
ASTs & args_func = ast_function->children;
@ -36,41 +36,45 @@ StoragePtr TableFunctionGenerate::executeImpl(const ASTPtr & ast_function, const
if (args.size() < 1)
throw Exception("Table function '" + getName() + "' requires at least one argument: "
" structure(, max_array_length, max_string_length, random_seed).",
" structure, [random_seed, max_string_length, max_array_length].",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if (args.size() > 4)
throw Exception("Table function '" + getName() + "' requires at most four arguments: "
" structure, max_array_length, max_string_length, random_seed.",
" structure, [random_seed, max_string_length, max_array_length].",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
/// Parsing first argument as table structure and creating a sample block
std::string structure = args[0]->as<ASTLiteral &>().value.safeGet<String>();
std::string structure = args[0]->as<const ASTLiteral &>().value.safeGet<String>();
UInt64 max_array_length = 10;
UInt64 max_string_length = 10;
UInt64 random_seed = 0; // zero for random
UInt64 max_array_length = 10;
std::optional<UInt64> random_seed;
/// Parsing second argument if present
if (args.size() >= 2)
max_array_length = args[1]->as<ASTLiteral &>().value.safeGet<UInt64>();
{
const Field & value = args[1]->as<const ASTLiteral &>().value;
if (!value.isNull())
random_seed = value.safeGet<UInt64>();
}
if (args.size() >= 3)
max_string_length = args[2]->as<ASTLiteral &>().value.safeGet<UInt64>();
max_string_length = args[2]->as<const ASTLiteral &>().value.safeGet<UInt64>();
if (args.size() == 4)
random_seed = args[3]->as<ASTLiteral &>().value.safeGet<UInt64>();
max_array_length = args[3]->as<const ASTLiteral &>().value.safeGet<UInt64>();
ColumnsDescription columns = parseColumnsListFromString(structure, context);
auto res = StorageGenerate::create(StorageID(getDatabaseName(), table_name), columns, max_array_length, max_string_length, random_seed);
auto res = StorageGenerateRandom::create(StorageID(getDatabaseName(), table_name), columns, max_array_length, max_string_length, random_seed);
res->startup();
return res;
}
void registerTableFunctionGenerate(TableFunctionFactory & factory)
{
factory.registerFunction<TableFunctionGenerate>(TableFunctionFactory::CaseInsensitive);
factory.registerFunction<TableFunctionGenerateRandom>();
}
}

View File

@ -4,12 +4,14 @@
namespace DB
{
/* generate(structure, [max_array_length, max_string_length, random_seed]) - creates a temporary storage that generates columns with random data
/* generateRandom(structure, [max_array_length, max_string_length, random_seed])
* - creates a temporary storage that generates columns with random data
*/
class TableFunctionGenerate : public ITableFunction
class TableFunctionGenerateRandom : public ITableFunction
{
public:
static constexpr auto name = "generate";
static constexpr auto name = "generateRandom";
std::string getName() const override { return name; }
private:
StoragePtr executeImpl(const ASTPtr & ast_function, const Context & context, const std::string & table_name) const override;

View File

@ -4,3 +4,12 @@ fun:__gxx_personality_*
# We apply std::tolower to uninitialized padding, but don't use the result, so
# it is OK. Reproduce with "select ngramDistanceCaseInsensitive(materialize(''), '')"
fun:tolower
# May be it's not OK, but suppress it to run other tests
# Some functions in OpenSSL:
fun:probable_prime
fun:BN_bin2bn
fun:BN_add_word
fun:bn_div_fixed_top
fun:bn_mul_words
fun:BN_cmp

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -6,17 +6,9 @@ This directory contains `.xml`-files with performance tests for `clickhouse-perf
First of all you should check existing tests don't cover your case. If there are no such tests than you should write your own.
There two types of performance tests:
* First is executed in loop, and have tag `<type>loop</type>` in config.
* Second one is executed only once and have tag `<type>once</type>` in config.
You have to specify `preconditions`. It contains table names. Only `hits_100m_single`, `hits_10m_single`, `test.hits` are available in CI.
Type `once` should be used only for endless queries. Even if your query really long (10 seconds+), it's better to choose `loop` test.
After you have choosen type, you have to specify `preconditions`. It contains table names. Only `hits_100m_single`, `hits_10m_single`, `test.hits` are available in CI.
The most important part of test is `stop_conditions`. For `loop` test you should always use `min_time_not_changing_for_ms` stop condition. For `once` test you can choose between `average_speed_not_changing_for_ms` and `max_speed_not_changing_for_ms`, but first is preferable. Also you should always specify `total_time_ms` metric. Endless tests will be ignored by CI.
`loop` tests are always compared by `min_time` metric and `once` tests compared by `max_rows_per_second`.
The most important part of test is `stop_conditions`. Also you should always specify `total_time_ms` metric. Endless tests will be ignored by CI.
You can use `substitions`, `create`, `fill` and `drop` queries to prepare test. You can find examples in this folder.

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>
<iterations>10</iterations>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<any_of>

View File

@ -1,5 +1,4 @@
<test>
<type>once</type>
<stop_conditions>
<all_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<any_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -1,5 +1,4 @@
<test>
<type>once</type>
<preconditions>
<table_exists>hits_100m_single</table_exists>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<any_of>
@ -22,5 +21,5 @@
</substitution>
</substitutions>
<query>SELECT bitCount({expr}) FROM numbers(1000000) FORMAT Null</query>
<query>SELECT bitCount({expr}) FROM numbers(100000000) FORMAT Null</query>
</test>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -1,5 +1,4 @@
<test>
<type>once</type>
<stop_conditions>
<all_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<any_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -1,6 +1,5 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>
<iterations>10</iterations>

View File

@ -1,6 +1,5 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>
<iterations>10</iterations>

View File

@ -1,6 +1,5 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>
<iterations>10</iterations>

View File

@ -1,6 +1,5 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>
<iterations>10</iterations>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -7,7 +7,6 @@
<table_exists>hits_100m_single</table_exists>
</preconditions>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -8,7 +8,6 @@
<table_exists>hits_1000m_single</table_exists>
</preconditions>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<any_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<any_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<any_of>

View File

@ -7,7 +7,6 @@
<table_exists>hits_100m_single</table_exists>
</preconditions>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -7,7 +7,6 @@
<table_exists>hits_100m_single</table_exists>
</preconditions>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -1,6 +1,5 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -1,6 +1,5 @@
<test>
<type>loop</type>
<tags>
<tag>long</tag>
@ -132,11 +131,8 @@
</substitution>
</substitutions>
<query>SELECT count() FROM numbers(100000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {datetime_transform}(t, '{time_zone}'))</query>
<query>SELECT count() FROM numbers(100000) WHERE NOT ignore(toDate('2017-01-01') + number % 1000 + rand() % 10 AS t, {date_transform}(t))</query>
<query>SELECT count() FROM numbers(100000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {binary_function}(t, 1))</query>
<query>SELECT count() FROM numbers(100000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, toStartOfInterval(t, INTERVAL 1 month))</query>
<query>SELECT count() FROM numbers(10000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {datetime_transform}(t, '{time_zone}'))</query>
<query>SELECT count() FROM numbers(10000000) WHERE NOT ignore(toDate('2017-01-01') + number % 1000 + rand() % 10 AS t, {date_transform}(t))</query>
<query>SELECT count() FROM numbers(10000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {binary_function}(t, 1))</query>
<query>SELECT count() FROM numbers(10000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, toStartOfInterval(t, INTERVAL 1 month))</query>
</test>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<preconditions>
<table_exists>default.hits_100m_single</table_exists>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<create_query>CREATE TABLE t (x UInt64, d32 Decimal32(3), d64 Decimal64(4), d128 Decimal128(5)) ENGINE = Memory</create_query>
<fill_query>INSERT INTO t SELECT number AS x, x AS d32, x AS d64, x d128 FROM numbers(1000000)</fill_query>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<preconditions>
<table_exists>test.hits</table_exists>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -7,7 +7,6 @@
<table_exists>test.hits</table_exists>
</preconditions>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<tags>
<tag>long</tag>
</tags>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<tags>
<tag>long</tag>
</tags>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<tags>
</tags>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<any_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<any_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -1,30 +1,28 @@
<test>
<type>loop</type>
<stop_conditions>
<any_of>
<average_speed_not_changing_for_ms>4000</average_speed_not_changing_for_ms>
<total_time_ms>10000</total_time_ms>
</any_of>
</stop_conditions>
<query>SELECT COUNT(*) FROM (SELECT * FROM generate('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8') LIMIT 100000);</query>
<query>SELECT COUNT(*) FROM (SELECT * FROM generate('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 10, 10, 1) LIMIT 100000);</query>
<query>SELECT COUNT(*) FROM (SELECT * FROM generate('i Enum8(\'hello\' = 1, \'world\' = 5)', 10, 10, 1) LIMIT 100000);</query>
<query>SELECT COUNT(*) FROM (SELECT * FROM generate('i Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5)))', 10, 10, 1) LIMIT 100000);</query>
<query>SELECT COUNT(*) FROM (SELECT * FROM generate('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))', 10, 10, 1) LIMIT 100000);</query>
<query>SELECT COUNT(*) FROM (SELECT * FROM generate('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 10, 10, 1) LIMIT 100000);</query>
<query>SELECT COUNT(*) FROM (SELECT * FROM generate('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 10, 10, 1) LIMIT 100000);</query>
<query>SELECT COUNT(*) FROM (SELECT * FROM generate('f32 Float32, f64 Float64', 10, 10, 1) LIMIT 100000);</query>
<query>SELECT COUNT(*) FROM (SELECT * FROM generate('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 10, 10, 1) LIMIT 100000);</query>
<query>SELECT COUNT(*) FROM (SELECT * FROM generate('i Tuple(Int32, Int64)', 10, 10, 1) LIMIT 100000);</query>
<query>SELECT COUNT(*) FROM (SELECT * FROM generate('i Array(Int8)', 10, 10, 1) LIMIT 100000);</query>
<query>SELECT COUNT(*) FROM (SELECT * FROM generate('i Array(Nullable(Int32))', 10, 10, 1) LIMIT 100000);</query>
<query>SELECT COUNT(*) FROM (SELECT * FROM generate('i Tuple(Int32, Array(Int64))', 10, 10, 1) LIMIT 100000);</query>
<query>SELECT COUNT(*) FROM (SELECT * FROM generate('i Nullable(String)', 10, 10, 1) LIMIT 100000);</query>
<query>SELECT COUNT(*) FROM (SELECT * FROM generate('i Array(String)', 10, 10, 1) LIMIT 100000);</query>
<query>SELECT COUNT(*) FROM (SELECT * FROM generate('i UUID', 10, 10, 1) LIMIT 100000);</query>
<query>SELECT COUNT(*) FROM (SELECT * FROM generate('i Array(Nullable(UUID))', 10, 10, 1) LIMIT 100000);</query>
<query>SELECT COUNT(*) FROM (SELECT * FROM generate('i FixedString(4)', 10, 10, 1) LIMIT 100000);</query>
<query>SELECT COUNT(*) FROM (SELECT * FROM generate('i String', 10, 10, 1) LIMIT 100000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8') LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Enum8(\'hello\' = 1, \'world\' = 5)', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5)))', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('f32 Float32, f64 Float64', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Tuple(Int32, Int64)', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Int8)', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(Int32))', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Tuple(Int32, Array(Int64))', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Nullable(String)', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(String)', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i UUID', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(UUID))', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i FixedString(4)', 0, 10, 10) LIMIT 10000000);</query>
<query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i String', 0, 10, 10) LIMIT 10000000);</query>
</test>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<any_of>

View File

@ -1,6 +1,5 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<any_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<any_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<any_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<any_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<any_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<preconditions>
<table_exists>hits_100m_single</table_exists>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<settings>
<input_format_values_accurate_types_of_literals>1</input_format_values_accurate_types_of_literals>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>
<iterations>5</iterations>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<all_of>

View File

@ -1,5 +1,4 @@
<test>
<type>loop</type>
<stop_conditions>
<any_of>

Some files were not shown because too many files have changed in this diff Show More